From 96e02d1586782eadf051fa3d6bc4132d2447ac2c Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Sat, 4 Feb 2012 10:47:10 +0100 Subject: exec: fix use-after-free bug in setup_new_exec() Setting the task name is done within setup_new_exec() by accessing bprm->filename. However this happens after flush_old_exec(). This may result in a use after free bug, flush_old_exec() may "complete" vfork_done, which will wake up the parent which in turn may free the passed in filename. To fix this add a new tcomm field in struct linux_binprm which contains the now early generated task name until it is used. Fixes this bug on s390: Unable to handle kernel pointer dereference at virtual kernel address 0000000039768000 Process kworker/u:3 (pid: 245, task: 000000003a3dc840, ksp: 0000000039453818) Krnl PSW : 0704000180000000 0000000000282e94 (setup_new_exec+0xa0/0x374) Call Trace: ([<0000000000282e2c>] setup_new_exec+0x38/0x374) [<00000000002dd12e>] load_elf_binary+0x402/0x1bf4 [<0000000000280a42>] search_binary_handler+0x38e/0x5bc [<0000000000282b6c>] do_execve_common+0x410/0x514 [<0000000000282cb6>] do_execve+0x46/0x58 [<00000000005bce58>] kernel_execve+0x28/0x70 [<000000000014ba2e>] ____call_usermodehelper+0x102/0x140 [<00000000005bc8da>] kernel_thread_starter+0x6/0xc [<00000000005bc8d4>] kernel_thread_starter+0x0/0xc Last Breaking-Event-Address: [<00000000002830f0>] setup_new_exec+0x2fc/0x374 Kernel panic - not syncing: Fatal exception: panic_on_oops Reported-by: Sebastian Ott Signed-off-by: Heiko Carstens Signed-off-by: Linus Torvalds --- fs/exec.c | 33 +++++++++++++++++---------------- 1 file changed, 17 insertions(+), 16 deletions(-) (limited to 'fs/exec.c') diff --git a/fs/exec.c b/fs/exec.c index aeb135c7ff5c..92ce83a11e90 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -1071,6 +1071,21 @@ void set_task_comm(struct task_struct *tsk, char *buf) perf_event_comm(tsk); } +static void filename_to_taskname(char *tcomm, const char *fn, unsigned int len) +{ + int i, ch; + + /* Copies the binary name from after last slash */ + for (i = 0; (ch = *(fn++)) != '\0';) { + if (ch == '/') + i = 0; /* overwrite what we wrote */ + else + if (i < len - 1) + tcomm[i++] = ch; + } + tcomm[i] = '\0'; +} + int flush_old_exec(struct linux_binprm * bprm) { int retval; @@ -1085,6 +1100,7 @@ int flush_old_exec(struct linux_binprm * bprm) set_mm_exe_file(bprm->mm, bprm->file); + filename_to_taskname(bprm->tcomm, bprm->filename, sizeof(bprm->tcomm)); /* * Release all of the old mmap stuff */ @@ -1116,10 +1132,6 @@ EXPORT_SYMBOL(would_dump); void setup_new_exec(struct linux_binprm * bprm) { - int i, ch; - const char *name; - char tcomm[sizeof(current->comm)]; - arch_pick_mmap_layout(current->mm); /* This is the point of no return */ @@ -1130,18 +1142,7 @@ void setup_new_exec(struct linux_binprm * bprm) else set_dumpable(current->mm, suid_dumpable); - name = bprm->filename; - - /* Copies the binary name from after last slash */ - for (i=0; (ch = *(name++)) != '\0';) { - if (ch == '/') - i = 0; /* overwrite what we wrote */ - else - if (i < (sizeof(tcomm) - 1)) - tcomm[i++] = ch; - } - tcomm[i] = '\0'; - set_task_comm(current, tcomm); + set_task_comm(current, bprm->tcomm); /* Set the new mm task size. We have to do that late because it may * depend on TIF_32BIT which is only updated in flush_thread() on -- cgit v1.2.3 From 1dce27c5aa6770e9d195f2bb7db1db3d4dde5591 Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 16 Feb 2012 17:49:42 +0000 Subject: Wrap accesses to the fd_sets in struct fdtable Wrap accesses to the fd_sets in struct fdtable (for recording open files and close-on-exec flags) so that we can move away from using fd_sets since we abuse the fd_set structs by not allocating the full-sized structure under normal circumstances and by non-core code looking at the internals of the fd_sets. The first abuse means that use of FD_ZERO() on these fd_sets is not permitted, since that cannot be told about their abnormal lengths. This introduces six wrapper functions for setting, clearing and testing close-on-exec flags and fd-is-open flags: void __set_close_on_exec(int fd, struct fdtable *fdt); void __clear_close_on_exec(int fd, struct fdtable *fdt); bool close_on_exec(int fd, const struct fdtable *fdt); void __set_open_fd(int fd, struct fdtable *fdt); void __clear_open_fd(int fd, struct fdtable *fdt); bool fd_is_open(int fd, const struct fdtable *fdt); Note that I've prepended '__' to the names of the set/clear functions because they require the caller to hold a lock to use them. Note also that I haven't added wrappers for looking behind the scenes at the the array. Possibly that should exist too. Signed-off-by: David Howells Link: http://lkml.kernel.org/r/20120216174942.23314.1364.stgit@warthog.procyon.org.uk Signed-off-by: H. Peter Anvin Cc: Al Viro --- Documentation/filesystems/files.txt | 4 ++-- arch/powerpc/platforms/cell/spufs/coredump.c | 2 +- drivers/staging/android/binder.c | 10 +++++----- fs/autofs4/dev-ioctl.c | 2 +- fs/exec.c | 4 ++-- fs/fcntl.c | 18 ++++++++--------- fs/file.c | 8 ++++---- fs/open.c | 4 ++-- fs/proc/base.c | 2 +- include/linux/fdtable.h | 30 ++++++++++++++++++++++++++++ 10 files changed, 57 insertions(+), 27 deletions(-) (limited to 'fs/exec.c') diff --git a/Documentation/filesystems/files.txt b/Documentation/filesystems/files.txt index ac2facc50d2a..46dfc6b038c3 100644 --- a/Documentation/filesystems/files.txt +++ b/Documentation/filesystems/files.txt @@ -113,8 +113,8 @@ the fdtable structure - if (fd >= 0) { /* locate_fd() may have expanded fdtable, load the ptr */ fdt = files_fdtable(files); - FD_SET(fd, fdt->open_fds); - FD_CLR(fd, fdt->close_on_exec); + __set_open_fd(fd, fdt); + __clear_close_on_exec(fd, fdt); spin_unlock(&files->file_lock); ..... diff --git a/arch/powerpc/platforms/cell/spufs/coredump.c b/arch/powerpc/platforms/cell/spufs/coredump.c index 03c5fce2a5b3..c2c5b078ba80 100644 --- a/arch/powerpc/platforms/cell/spufs/coredump.c +++ b/arch/powerpc/platforms/cell/spufs/coredump.c @@ -122,7 +122,7 @@ static struct spu_context *coredump_next_context(int *fd) struct spu_context *ctx = NULL; for (; *fd < fdt->max_fds; (*fd)++) { - if (!FD_ISSET(*fd, fdt->open_fds)) + if (!fd_is_open(*fd, fdt)) continue; file = fcheck(*fd); diff --git a/drivers/staging/android/binder.c b/drivers/staging/android/binder.c index 7491801a661c..35dd9c370e55 100644 --- a/drivers/staging/android/binder.c +++ b/drivers/staging/android/binder.c @@ -408,11 +408,11 @@ repeat: goto repeat; } - FD_SET(fd, fdt->open_fds); + __set_open_fd(fd, fdt); if (flags & O_CLOEXEC) - FD_SET(fd, fdt->close_on_exec); + __set_close_on_exec(fd, fdt); else - FD_CLR(fd, fdt->close_on_exec); + __clear_close_on_exec(fd, fdt); files->next_fd = fd + 1; #if 1 /* Sanity check */ @@ -453,7 +453,7 @@ static void task_fd_install( static void __put_unused_fd(struct files_struct *files, unsigned int fd) { struct fdtable *fdt = files_fdtable(files); - __FD_CLR(fd, fdt->open_fds); + __clear_open_fd(fd, fdt); if (fd < files->next_fd) files->next_fd = fd; } @@ -479,7 +479,7 @@ static long task_close_fd(struct binder_proc *proc, unsigned int fd) if (!filp) goto out_unlock; rcu_assign_pointer(fdt->fd[fd], NULL); - FD_CLR(fd, fdt->close_on_exec); + __clear_close_on_exec(fd, fdt); __put_unused_fd(files, fd); spin_unlock(&files->file_lock); retval = filp_close(filp, files); diff --git a/fs/autofs4/dev-ioctl.c b/fs/autofs4/dev-ioctl.c index 76741d8d7786..3dfd615afb6b 100644 --- a/fs/autofs4/dev-ioctl.c +++ b/fs/autofs4/dev-ioctl.c @@ -230,7 +230,7 @@ static void autofs_dev_ioctl_fd_install(unsigned int fd, struct file *file) fdt = files_fdtable(files); BUG_ON(fdt->fd[fd] != NULL); rcu_assign_pointer(fdt->fd[fd], file); - FD_SET(fd, fdt->close_on_exec); + __set_close_on_exec(fd, fdt); spin_unlock(&files->file_lock); } diff --git a/fs/exec.c b/fs/exec.c index 92ce83a11e90..22cc38d9e79f 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -2078,8 +2078,8 @@ static int umh_pipe_setup(struct subprocess_info *info, struct cred *new) fd_install(0, rp); spin_lock(&cf->file_lock); fdt = files_fdtable(cf); - FD_SET(0, fdt->open_fds); - FD_CLR(0, fdt->close_on_exec); + __set_open_fd(0, fdt); + __clear_close_on_exec(0, fdt); spin_unlock(&cf->file_lock); /* and disallow core files too */ diff --git a/fs/fcntl.c b/fs/fcntl.c index 22764c7c8382..75e7c1f3a080 100644 --- a/fs/fcntl.c +++ b/fs/fcntl.c @@ -32,20 +32,20 @@ void set_close_on_exec(unsigned int fd, int flag) spin_lock(&files->file_lock); fdt = files_fdtable(files); if (flag) - FD_SET(fd, fdt->close_on_exec); + __set_close_on_exec(fd, fdt); else - FD_CLR(fd, fdt->close_on_exec); + __clear_close_on_exec(fd, fdt); spin_unlock(&files->file_lock); } -static int get_close_on_exec(unsigned int fd) +static bool get_close_on_exec(unsigned int fd) { struct files_struct *files = current->files; struct fdtable *fdt; - int res; + bool res; rcu_read_lock(); fdt = files_fdtable(files); - res = FD_ISSET(fd, fdt->close_on_exec); + res = close_on_exec(fd, fdt); rcu_read_unlock(); return res; } @@ -90,15 +90,15 @@ SYSCALL_DEFINE3(dup3, unsigned int, oldfd, unsigned int, newfd, int, flags) err = -EBUSY; fdt = files_fdtable(files); tofree = fdt->fd[newfd]; - if (!tofree && FD_ISSET(newfd, fdt->open_fds)) + if (!tofree && fd_is_open(newfd, fdt)) goto out_unlock; get_file(file); rcu_assign_pointer(fdt->fd[newfd], file); - FD_SET(newfd, fdt->open_fds); + __set_open_fd(newfd, fdt); if (flags & O_CLOEXEC) - FD_SET(newfd, fdt->close_on_exec); + __set_close_on_exec(newfd, fdt); else - FD_CLR(newfd, fdt->close_on_exec); + __clear_close_on_exec(newfd, fdt); spin_unlock(&files->file_lock); if (tofree) diff --git a/fs/file.c b/fs/file.c index 4c6992d8f3ba..114fea0a2cec 100644 --- a/fs/file.c +++ b/fs/file.c @@ -366,7 +366,7 @@ struct files_struct *dup_fd(struct files_struct *oldf, int *errorp) * is partway through open(). So make sure that this * fd is available to the new process. */ - FD_CLR(open_files - i, new_fdt->open_fds); + __clear_open_fd(open_files - i, new_fdt); } rcu_assign_pointer(*new_fds++, f); } @@ -460,11 +460,11 @@ repeat: if (start <= files->next_fd) files->next_fd = fd + 1; - FD_SET(fd, fdt->open_fds); + __set_open_fd(fd, fdt); if (flags & O_CLOEXEC) - FD_SET(fd, fdt->close_on_exec); + __set_close_on_exec(fd, fdt); else - FD_CLR(fd, fdt->close_on_exec); + __clear_close_on_exec(fd, fdt); error = fd; #if 1 /* Sanity check */ diff --git a/fs/open.c b/fs/open.c index 77becc041149..5720854156db 100644 --- a/fs/open.c +++ b/fs/open.c @@ -836,7 +836,7 @@ EXPORT_SYMBOL(dentry_open); static void __put_unused_fd(struct files_struct *files, unsigned int fd) { struct fdtable *fdt = files_fdtable(files); - __FD_CLR(fd, fdt->open_fds); + __clear_open_fd(fd, fdt); if (fd < files->next_fd) files->next_fd = fd; } @@ -1080,7 +1080,7 @@ SYSCALL_DEFINE1(close, unsigned int, fd) if (!filp) goto out_unlock; rcu_assign_pointer(fdt->fd[fd], NULL); - FD_CLR(fd, fdt->close_on_exec); + __clear_close_on_exec(fd, fdt); __put_unused_fd(files, fd); spin_unlock(&files->file_lock); retval = filp_close(filp, files); diff --git a/fs/proc/base.c b/fs/proc/base.c index d4548dd49b02..db6ab4b36a0b 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -1754,7 +1754,7 @@ static int proc_fd_info(struct inode *inode, struct path *path, char *info) fdt = files_fdtable(files); f_flags = file->f_flags & ~O_CLOEXEC; - if (FD_ISSET(fd, fdt->close_on_exec)) + if (close_on_exec(fd, fdt)) f_flags |= O_CLOEXEC; if (path) { diff --git a/include/linux/fdtable.h b/include/linux/fdtable.h index 82163c4b32c9..7675da2c18f7 100644 --- a/include/linux/fdtable.h +++ b/include/linux/fdtable.h @@ -38,6 +38,36 @@ struct fdtable { struct fdtable *next; }; +static inline void __set_close_on_exec(int fd, struct fdtable *fdt) +{ + FD_SET(fd, fdt->close_on_exec); +} + +static inline void __clear_close_on_exec(int fd, struct fdtable *fdt) +{ + FD_CLR(fd, fdt->close_on_exec); +} + +static inline bool close_on_exec(int fd, const struct fdtable *fdt) +{ + return FD_ISSET(fd, fdt->close_on_exec); +} + +static inline void __set_open_fd(int fd, struct fdtable *fdt) +{ + FD_SET(fd, fdt->open_fds); +} + +static inline void __clear_open_fd(int fd, struct fdtable *fdt) +{ + FD_CLR(fd, fdt->open_fds); +} + +static inline bool fd_is_open(int fd, const struct fdtable *fdt) +{ + return FD_ISSET(fd, fdt->open_fds); +} + /* * Open file table structure */ -- cgit v1.2.3 From 1fd36adcd98c14d2fd97f545293c488775cb2823 Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 16 Feb 2012 17:49:54 +0000 Subject: Replace the fd_sets in struct fdtable with an array of unsigned longs Replace the fd_sets in struct fdtable with an array of unsigned longs and then use the standard non-atomic bit operations rather than the FD_* macros. This: (1) Removes the abuses of struct fd_set: (a) Since we don't want to allocate a full fd_set the vast majority of the time, we actually, in effect, just allocate a just-big-enough array of unsigned longs and cast it to an fd_set type - so why bother with the fd_set at all? (b) Some places outside of the core fdtable handling code (such as SELinux) want to look inside the array of unsigned longs hidden inside the fd_set struct for more efficient iteration over the entire set. (2) Eliminates the use of FD_*() macros in the kernel completely. (3) Permits the __FD_*() macros to be deleted entirely where not exposed to userspace. Signed-off-by: David Howells Link: http://lkml.kernel.org/r/20120216174954.23314.48147.stgit@warthog.procyon.org.uk Signed-off-by: H. Peter Anvin Cc: Al Viro --- fs/exec.c | 4 ++-- fs/file.c | 46 ++++++++++++++++++++++------------------------ fs/select.c | 2 +- include/linux/fdtable.h | 28 ++++++++++------------------ kernel/exit.c | 2 +- security/selinux/hooks.c | 2 +- 6 files changed, 37 insertions(+), 47 deletions(-) (limited to 'fs/exec.c') diff --git a/fs/exec.c b/fs/exec.c index 22cc38d9e79f..cfd5e3047bd8 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -1026,10 +1026,10 @@ static void flush_old_files(struct files_struct * files) fdt = files_fdtable(files); if (i >= fdt->max_fds) break; - set = fdt->close_on_exec->fds_bits[j]; + set = fdt->close_on_exec[j]; if (!set) continue; - fdt->close_on_exec->fds_bits[j] = 0; + fdt->close_on_exec[j] = 0; spin_unlock(&files->file_lock); for ( ; set ; i++,set >>= 1) { if (set & 1) { diff --git a/fs/file.c b/fs/file.c index 114fea0a2cec..2d479dd8484e 100644 --- a/fs/file.c +++ b/fs/file.c @@ -40,7 +40,7 @@ int sysctl_nr_open_max = 1024 * 1024; /* raised later */ */ static DEFINE_PER_CPU(struct fdtable_defer, fdtable_defer_list); -static void *alloc_fdmem(unsigned int size) +static void *alloc_fdmem(size_t size) { /* * Very large allocations can stress page reclaim, so fall back to @@ -142,7 +142,7 @@ static void copy_fdtable(struct fdtable *nfdt, struct fdtable *ofdt) static struct fdtable * alloc_fdtable(unsigned int nr) { struct fdtable *fdt; - char *data; + void *data; /* * Figure out how many fds we actually want to support in this fdtable. @@ -172,14 +172,15 @@ static struct fdtable * alloc_fdtable(unsigned int nr) data = alloc_fdmem(nr * sizeof(struct file *)); if (!data) goto out_fdt; - fdt->fd = (struct file **)data; - data = alloc_fdmem(max_t(unsigned int, + fdt->fd = data; + + data = alloc_fdmem(max_t(size_t, 2 * nr / BITS_PER_BYTE, L1_CACHE_BYTES)); if (!data) goto out_arr; - fdt->open_fds = (fd_set *)data; - data += nr / BITS_PER_BYTE; - fdt->close_on_exec = (fd_set *)data; + fdt->open_fds = data; + data += nr / BITS_PER_LONG; + fdt->close_on_exec = data; fdt->next = NULL; return fdt; @@ -275,11 +276,11 @@ static int count_open_files(struct fdtable *fdt) int i; /* Find the last open fd */ - for (i = size/(8*sizeof(long)); i > 0; ) { - if (fdt->open_fds->fds_bits[--i]) + for (i = size / BITS_PER_LONG; i > 0; ) { + if (fdt->open_fds[--i]) break; } - i = (i+1) * 8 * sizeof(long); + i = (i + 1) * BITS_PER_LONG; return i; } @@ -306,8 +307,8 @@ struct files_struct *dup_fd(struct files_struct *oldf, int *errorp) newf->next_fd = 0; new_fdt = &newf->fdtab; new_fdt->max_fds = NR_OPEN_DEFAULT; - new_fdt->close_on_exec = (fd_set *)&newf->close_on_exec_init; - new_fdt->open_fds = (fd_set *)&newf->open_fds_init; + new_fdt->close_on_exec = newf->close_on_exec_init; + new_fdt->open_fds = newf->open_fds_init; new_fdt->fd = &newf->fd_array[0]; new_fdt->next = NULL; @@ -350,10 +351,8 @@ struct files_struct *dup_fd(struct files_struct *oldf, int *errorp) old_fds = old_fdt->fd; new_fds = new_fdt->fd; - memcpy(new_fdt->open_fds->fds_bits, - old_fdt->open_fds->fds_bits, open_files/8); - memcpy(new_fdt->close_on_exec->fds_bits, - old_fdt->close_on_exec->fds_bits, open_files/8); + memcpy(new_fdt->open_fds, old_fdt->open_fds, open_files / 8); + memcpy(new_fdt->close_on_exec, old_fdt->close_on_exec, open_files / 8); for (i = open_files; i != 0; i--) { struct file *f = *old_fds++; @@ -379,11 +378,11 @@ struct files_struct *dup_fd(struct files_struct *oldf, int *errorp) memset(new_fds, 0, size); if (new_fdt->max_fds > open_files) { - int left = (new_fdt->max_fds-open_files)/8; - int start = open_files / (8 * sizeof(unsigned long)); + int left = (new_fdt->max_fds - open_files) / 8; + int start = open_files / BITS_PER_LONG; - memset(&new_fdt->open_fds->fds_bits[start], 0, left); - memset(&new_fdt->close_on_exec->fds_bits[start], 0, left); + memset(&new_fdt->open_fds[start], 0, left); + memset(&new_fdt->close_on_exec[start], 0, left); } rcu_assign_pointer(newf->fdt, new_fdt); @@ -419,8 +418,8 @@ struct files_struct init_files = { .fdtab = { .max_fds = NR_OPEN_DEFAULT, .fd = &init_files.fd_array[0], - .close_on_exec = (fd_set *)&init_files.close_on_exec_init, - .open_fds = (fd_set *)&init_files.open_fds_init, + .close_on_exec = init_files.close_on_exec_init, + .open_fds = init_files.open_fds_init, }, .file_lock = __SPIN_LOCK_UNLOCKED(init_task.file_lock), }; @@ -443,8 +442,7 @@ repeat: fd = files->next_fd; if (fd < fdt->max_fds) - fd = find_next_zero_bit(fdt->open_fds->fds_bits, - fdt->max_fds, fd); + fd = find_next_zero_bit(fdt->open_fds, fdt->max_fds, fd); error = expand_files(files, fd); if (error < 0) diff --git a/fs/select.c b/fs/select.c index d33418fdc858..2e7fbe8a092c 100644 --- a/fs/select.c +++ b/fs/select.c @@ -348,7 +348,7 @@ static int max_select_fd(unsigned long n, fd_set_bits *fds) set = ~(~0UL << (n & (__NFDBITS-1))); n /= __NFDBITS; fdt = files_fdtable(current->files); - open_fds = fdt->open_fds->fds_bits+n; + open_fds = fdt->open_fds + n; max = 0; if (set) { set &= BITS(fds, n); diff --git a/include/linux/fdtable.h b/include/linux/fdtable.h index 7675da2c18f7..158a41eed314 100644 --- a/include/linux/fdtable.h +++ b/include/linux/fdtable.h @@ -21,51 +21,43 @@ */ #define NR_OPEN_DEFAULT BITS_PER_LONG -/* - * The embedded_fd_set is a small fd_set, - * suitable for most tasks (which open <= BITS_PER_LONG files) - */ -struct embedded_fd_set { - unsigned long fds_bits[1]; -}; - struct fdtable { unsigned int max_fds; struct file __rcu **fd; /* current fd array */ - fd_set *close_on_exec; - fd_set *open_fds; + unsigned long *close_on_exec; + unsigned long *open_fds; struct rcu_head rcu; struct fdtable *next; }; static inline void __set_close_on_exec(int fd, struct fdtable *fdt) { - FD_SET(fd, fdt->close_on_exec); + __set_bit(fd, fdt->close_on_exec); } static inline void __clear_close_on_exec(int fd, struct fdtable *fdt) { - FD_CLR(fd, fdt->close_on_exec); + __clear_bit(fd, fdt->close_on_exec); } static inline bool close_on_exec(int fd, const struct fdtable *fdt) { - return FD_ISSET(fd, fdt->close_on_exec); + return test_bit(fd, fdt->close_on_exec); } static inline void __set_open_fd(int fd, struct fdtable *fdt) { - FD_SET(fd, fdt->open_fds); + __set_bit(fd, fdt->open_fds); } static inline void __clear_open_fd(int fd, struct fdtable *fdt) { - FD_CLR(fd, fdt->open_fds); + __clear_bit(fd, fdt->open_fds); } static inline bool fd_is_open(int fd, const struct fdtable *fdt) { - return FD_ISSET(fd, fdt->open_fds); + return test_bit(fd, fdt->open_fds); } /* @@ -83,8 +75,8 @@ struct files_struct { */ spinlock_t file_lock ____cacheline_aligned_in_smp; int next_fd; - struct embedded_fd_set close_on_exec_init; - struct embedded_fd_set open_fds_init; + unsigned long close_on_exec_init[1]; + unsigned long open_fds_init[1]; struct file __rcu * fd_array[NR_OPEN_DEFAULT]; }; diff --git a/kernel/exit.c b/kernel/exit.c index 4b4042f9bc6a..4db020015f14 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -473,7 +473,7 @@ static void close_files(struct files_struct * files) i = j * __NFDBITS; if (i >= fdt->max_fds) break; - set = fdt->open_fds->fds_bits[j++]; + set = fdt->open_fds[j++]; while (set) { if (set & 1) { struct file * file = xchg(&fdt->fd[i], NULL); diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c index 6a3683e28426..421c990a20b2 100644 --- a/security/selinux/hooks.c +++ b/security/selinux/hooks.c @@ -2145,7 +2145,7 @@ static inline void flush_unauthorized_files(const struct cred *cred, fdt = files_fdtable(files); if (i >= fdt->max_fds) break; - set = fdt->open_fds->fds_bits[j]; + set = fdt->open_fds[j]; if (!set) continue; spin_unlock(&files->file_lock); -- cgit v1.2.3 From 4ff16c25e2cc48cbe6956e356c38a25ac063a64d Mon Sep 17 00:00:00 2001 From: David Smith Date: Tue, 7 Feb 2012 10:11:05 -0600 Subject: tracepoint, vfs, sched: Add exec() tracepoint Added a minimal exec tracepoint. Exec is an important major event in the life of a task, like fork(), clone() or exit(), all of which we already trace. [ We also do scheduling re-balancing during exec() - so it's useful from a scheduler instrumentation POV as well. ] If you want to watch a task start up, when it gets exec'ed is a good place to start. With the addition of this tracepoint, exec's can be monitored and better picture of general system activity can be obtained. This tracepoint will also enable better process life tracking, allowing you to answer questions like "what process keeps starting up binary X?". This tracepoint can also be useful in ftrace filtering and trigger conditions: i.e. starting or stopping filtering when exec is called. Signed-off-by: David Smith Signed-off-by: Peter Zijlstra Cc: Steven Rostedt Cc: Christoph Hellwig Cc: Al Viro Cc: Andrew Morton Cc: Linus Torvalds Link: http://lkml.kernel.org/r/4F314D19.7030504@redhat.com Signed-off-by: Ingo Molnar --- fs/exec.c | 9 ++++++--- include/trace/events/sched.h | 27 +++++++++++++++++++++++++++ 2 files changed, 33 insertions(+), 3 deletions(-) (limited to 'fs/exec.c') diff --git a/fs/exec.c b/fs/exec.c index aeb135c7ff5c..d0d208092773 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -63,6 +63,8 @@ #include #include "internal.h" +#include + int core_uses_pid; char core_pattern[CORENAME_MAX_SIZE] = "core"; unsigned int core_pipe_limit; @@ -1401,9 +1403,10 @@ int search_binary_handler(struct linux_binprm *bprm,struct pt_regs *regs) */ bprm->recursion_depth = depth; if (retval >= 0) { - if (depth == 0) - ptrace_event(PTRACE_EVENT_EXEC, - old_pid); + if (depth == 0) { + trace_sched_process_exec(current, old_pid, bprm); + ptrace_event(PTRACE_EVENT_EXEC, old_pid); + } put_binfmt(fmt); allow_write_access(bprm->file); if (bprm->file) diff --git a/include/trace/events/sched.h b/include/trace/events/sched.h index 6ba596b07a72..e61ddfe8fe9e 100644 --- a/include/trace/events/sched.h +++ b/include/trace/events/sched.h @@ -6,6 +6,7 @@ #include #include +#include /* * Tracepoint for calling kthread_stop, performed to end a kthread: @@ -275,6 +276,32 @@ TRACE_EVENT(sched_process_fork, __entry->child_comm, __entry->child_pid) ); +/* + * Tracepoint for exec: + */ +TRACE_EVENT(sched_process_exec, + + TP_PROTO(struct task_struct *p, pid_t old_pid, + struct linux_binprm *bprm), + + TP_ARGS(p, old_pid, bprm), + + TP_STRUCT__entry( + __string( filename, bprm->filename ) + __field( pid_t, pid ) + __field( pid_t, old_pid ) + ), + + TP_fast_assign( + __assign_str(filename, bprm->filename); + __entry->pid = p->pid; + __entry->old_pid = p->pid; + ), + + TP_printk("filename=%s pid=%d old_pid=%d", __get_str(filename), + __entry->pid, __entry->old_pid) +); + /* * XXX the below sched_stat tracepoints only apply to SCHED_OTHER/BATCH/IDLE * adding sched_stat support to SCHED_FIFO/RR would be welcome. -- cgit v1.2.3 From c415c3b47ea2754659d915cca387a20999044163 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Mon, 5 Mar 2012 14:59:13 -0800 Subject: vfork: introduce complete_vfork_done() No functional changes. Move the clear-and-complete-vfork_done code into the new trivial helper, complete_vfork_done(). Signed-off-by: Oleg Nesterov Acked-by: Tejun Heo Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/exec.c | 8 ++------ include/linux/sched.h | 1 + kernel/fork.c | 17 ++++++++++------- 3 files changed, 13 insertions(+), 13 deletions(-) (limited to 'fs/exec.c') diff --git a/fs/exec.c b/fs/exec.c index 92ce83a11e90..dccdcec913e9 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -1915,7 +1915,6 @@ static int coredump_wait(int exit_code, struct core_state *core_state) { struct task_struct *tsk = current; struct mm_struct *mm = tsk->mm; - struct completion *vfork_done; int core_waiters = -EBUSY; init_completion(&core_state->startup); @@ -1934,11 +1933,8 @@ static int coredump_wait(int exit_code, struct core_state *core_state) * Make sure nobody is waiting for us to release the VM, * otherwise we can deadlock when we wait on each other */ - vfork_done = tsk->vfork_done; - if (vfork_done) { - tsk->vfork_done = NULL; - complete(vfork_done); - } + if (tsk->vfork_done) + complete_vfork_done(tsk); if (core_waiters) wait_for_completion(&core_state->startup); diff --git a/include/linux/sched.h b/include/linux/sched.h index 7d379a6bfd88..1b25a37f2aee 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -2291,6 +2291,7 @@ extern int do_execve(const char *, const char __user * const __user *, const char __user * const __user *, struct pt_regs *); extern long do_fork(unsigned long, unsigned long, struct pt_regs *, unsigned long, int __user *, int __user *); +extern void complete_vfork_done(struct task_struct *tsk); struct task_struct *fork_idle(int); extern void set_task_comm(struct task_struct *tsk, char *from); diff --git a/kernel/fork.c b/kernel/fork.c index e2cd3e2a5ae8..cf3d96379608 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -668,6 +668,14 @@ struct mm_struct *mm_access(struct task_struct *task, unsigned int mode) return mm; } +void complete_vfork_done(struct task_struct *tsk) +{ + struct completion *vfork_done = tsk->vfork_done; + + tsk->vfork_done = NULL; + complete(vfork_done); +} + /* Please note the differences between mmput and mm_release. * mmput is called whenever we stop holding onto a mm_struct, * error success whatever. @@ -683,8 +691,6 @@ struct mm_struct *mm_access(struct task_struct *task, unsigned int mode) */ void mm_release(struct task_struct *tsk, struct mm_struct *mm) { - struct completion *vfork_done = tsk->vfork_done; - /* Get rid of any futexes when releasing the mm */ #ifdef CONFIG_FUTEX if (unlikely(tsk->robust_list)) { @@ -704,11 +710,8 @@ void mm_release(struct task_struct *tsk, struct mm_struct *mm) /* Get rid of any cached register state */ deactivate_mm(tsk, mm); - /* notify parent sleeping on vfork() */ - if (vfork_done) { - tsk->vfork_done = NULL; - complete(vfork_done); - } + if (tsk->vfork_done) + complete_vfork_done(tsk); /* * If we're exiting normally, clear a user-space tid field if -- cgit v1.2.3 From 57b59c4a1400fa6c34764eab2e35a8762dc05a09 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Mon, 5 Mar 2012 14:59:13 -0800 Subject: coredump_wait: don't call complete_vfork_done() Now that CLONE_VFORK is killable, coredump_wait() no longer needs complete_vfork_done(). zap_threads() should find and kill all tasks with the same ->mm, this includes our parent if ->vfork_done is set. mm_release() becomes the only caller, unexport complete_vfork_done(). Signed-off-by: Oleg Nesterov Acked-by: Tejun Heo Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/exec.c | 14 ++------------ include/linux/sched.h | 1 - kernel/fork.c | 2 +- 3 files changed, 3 insertions(+), 14 deletions(-) (limited to 'fs/exec.c') diff --git a/fs/exec.c b/fs/exec.c index dccdcec913e9..153dee14fe55 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -1926,19 +1926,9 @@ static int coredump_wait(int exit_code, struct core_state *core_state) core_waiters = zap_threads(tsk, mm, core_state, exit_code); up_write(&mm->mmap_sem); - if (unlikely(core_waiters < 0)) - goto fail; - - /* - * Make sure nobody is waiting for us to release the VM, - * otherwise we can deadlock when we wait on each other - */ - if (tsk->vfork_done) - complete_vfork_done(tsk); - - if (core_waiters) + if (core_waiters > 0) wait_for_completion(&core_state->startup); -fail: + return core_waiters; } diff --git a/include/linux/sched.h b/include/linux/sched.h index b6467711f12e..11fcafaf4ae4 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -2291,7 +2291,6 @@ extern int do_execve(const char *, const char __user * const __user *, const char __user * const __user *, struct pt_regs *); extern long do_fork(unsigned long, unsigned long, struct pt_regs *, unsigned long, int __user *, int __user *); -extern void complete_vfork_done(struct task_struct *tsk); struct task_struct *fork_idle(int); extern void set_task_comm(struct task_struct *tsk, char *from); diff --git a/kernel/fork.c b/kernel/fork.c index 892c534ce6e3..44b0e21af50e 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -668,7 +668,7 @@ struct mm_struct *mm_access(struct task_struct *task, unsigned int mode) return mm; } -void complete_vfork_done(struct task_struct *tsk) +static void complete_vfork_done(struct task_struct *tsk) { struct completion *vfork; -- cgit v1.2.3 From e8e3c3d66fd9d1ee2250f68d778cc48c1346d228 Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Fri, 25 Nov 2011 23:14:27 +0800 Subject: fs: remove the second argument of k[un]map_atomic() Acked-by: Benjamin LaHaise Signed-off-by: Cong Wang --- fs/aio.c | 30 +++++++++++++++--------------- fs/bio-integrity.c | 10 +++++----- fs/exec.c | 4 ++-- fs/namei.c | 4 ++-- fs/pipe.c | 8 ++++---- fs/splice.c | 7 ++----- include/linux/bio.h | 8 ++++---- 7 files changed, 34 insertions(+), 37 deletions(-) (limited to 'fs/exec.c') diff --git a/fs/aio.c b/fs/aio.c index b9d64d89a043..5b600cb8779e 100644 --- a/fs/aio.c +++ b/fs/aio.c @@ -160,7 +160,7 @@ static int aio_setup_ring(struct kioctx *ctx) info->nr = nr_events; /* trusted copy */ - ring = kmap_atomic(info->ring_pages[0], KM_USER0); + ring = kmap_atomic(info->ring_pages[0]); ring->nr = nr_events; /* user copy */ ring->id = ctx->user_id; ring->head = ring->tail = 0; @@ -168,32 +168,32 @@ static int aio_setup_ring(struct kioctx *ctx) ring->compat_features = AIO_RING_COMPAT_FEATURES; ring->incompat_features = AIO_RING_INCOMPAT_FEATURES; ring->header_length = sizeof(struct aio_ring); - kunmap_atomic(ring, KM_USER0); + kunmap_atomic(ring); return 0; } /* aio_ring_event: returns a pointer to the event at the given index from - * kmap_atomic(, km). Release the pointer with put_aio_ring_event(); + * kmap_atomic(). Release the pointer with put_aio_ring_event(); */ #define AIO_EVENTS_PER_PAGE (PAGE_SIZE / sizeof(struct io_event)) #define AIO_EVENTS_FIRST_PAGE ((PAGE_SIZE - sizeof(struct aio_ring)) / sizeof(struct io_event)) #define AIO_EVENTS_OFFSET (AIO_EVENTS_PER_PAGE - AIO_EVENTS_FIRST_PAGE) -#define aio_ring_event(info, nr, km) ({ \ +#define aio_ring_event(info, nr) ({ \ unsigned pos = (nr) + AIO_EVENTS_OFFSET; \ struct io_event *__event; \ __event = kmap_atomic( \ - (info)->ring_pages[pos / AIO_EVENTS_PER_PAGE], km); \ + (info)->ring_pages[pos / AIO_EVENTS_PER_PAGE]); \ __event += pos % AIO_EVENTS_PER_PAGE; \ __event; \ }) -#define put_aio_ring_event(event, km) do { \ +#define put_aio_ring_event(event) do { \ struct io_event *__event = (event); \ (void)__event; \ - kunmap_atomic((void *)((unsigned long)__event & PAGE_MASK), km); \ + kunmap_atomic((void *)((unsigned long)__event & PAGE_MASK)); \ } while(0) static void ctx_rcu_free(struct rcu_head *head) @@ -1019,10 +1019,10 @@ int aio_complete(struct kiocb *iocb, long res, long res2) if (kiocbIsCancelled(iocb)) goto put_rq; - ring = kmap_atomic(info->ring_pages[0], KM_IRQ1); + ring = kmap_atomic(info->ring_pages[0]); tail = info->tail; - event = aio_ring_event(info, tail, KM_IRQ0); + event = aio_ring_event(info, tail); if (++tail >= info->nr) tail = 0; @@ -1043,8 +1043,8 @@ int aio_complete(struct kiocb *iocb, long res, long res2) info->tail = tail; ring->tail = tail; - put_aio_ring_event(event, KM_IRQ0); - kunmap_atomic(ring, KM_IRQ1); + put_aio_ring_event(event); + kunmap_atomic(ring); pr_debug("added to ring %p at [%lu]\n", iocb, tail); @@ -1089,7 +1089,7 @@ static int aio_read_evt(struct kioctx *ioctx, struct io_event *ent) unsigned long head; int ret = 0; - ring = kmap_atomic(info->ring_pages[0], KM_USER0); + ring = kmap_atomic(info->ring_pages[0]); dprintk("in aio_read_evt h%lu t%lu m%lu\n", (unsigned long)ring->head, (unsigned long)ring->tail, (unsigned long)ring->nr); @@ -1101,18 +1101,18 @@ static int aio_read_evt(struct kioctx *ioctx, struct io_event *ent) head = ring->head % info->nr; if (head != ring->tail) { - struct io_event *evp = aio_ring_event(info, head, KM_USER1); + struct io_event *evp = aio_ring_event(info, head); *ent = *evp; head = (head + 1) % info->nr; smp_mb(); /* finish reading the event before updatng the head */ ring->head = head; ret = 1; - put_aio_ring_event(evp, KM_USER1); + put_aio_ring_event(evp); } spin_unlock(&info->ring_lock); out: - kunmap_atomic(ring, KM_USER0); + kunmap_atomic(ring); dprintk("leaving aio_read_evt: %d h%lu t%lu\n", ret, (unsigned long)ring->head, (unsigned long)ring->tail); return ret; diff --git a/fs/bio-integrity.c b/fs/bio-integrity.c index c2183f3917cd..e85c04b9f61c 100644 --- a/fs/bio-integrity.c +++ b/fs/bio-integrity.c @@ -357,7 +357,7 @@ static void bio_integrity_generate(struct bio *bio) bix.sector_size = bi->sector_size; bio_for_each_segment(bv, bio, i) { - void *kaddr = kmap_atomic(bv->bv_page, KM_USER0); + void *kaddr = kmap_atomic(bv->bv_page); bix.data_buf = kaddr + bv->bv_offset; bix.data_size = bv->bv_len; bix.prot_buf = prot_buf; @@ -371,7 +371,7 @@ static void bio_integrity_generate(struct bio *bio) total += sectors * bi->tuple_size; BUG_ON(total > bio->bi_integrity->bip_size); - kunmap_atomic(kaddr, KM_USER0); + kunmap_atomic(kaddr); } } @@ -498,7 +498,7 @@ static int bio_integrity_verify(struct bio *bio) bix.sector_size = bi->sector_size; bio_for_each_segment(bv, bio, i) { - void *kaddr = kmap_atomic(bv->bv_page, KM_USER0); + void *kaddr = kmap_atomic(bv->bv_page); bix.data_buf = kaddr + bv->bv_offset; bix.data_size = bv->bv_len; bix.prot_buf = prot_buf; @@ -507,7 +507,7 @@ static int bio_integrity_verify(struct bio *bio) ret = bi->verify_fn(&bix); if (ret) { - kunmap_atomic(kaddr, KM_USER0); + kunmap_atomic(kaddr); return ret; } @@ -517,7 +517,7 @@ static int bio_integrity_verify(struct bio *bio) total += sectors * bi->tuple_size; BUG_ON(total > bio->bi_integrity->bip_size); - kunmap_atomic(kaddr, KM_USER0); + kunmap_atomic(kaddr); } return ret; diff --git a/fs/exec.c b/fs/exec.c index 153dee14fe55..1a07d1c2d78e 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -1339,13 +1339,13 @@ int remove_arg_zero(struct linux_binprm *bprm) ret = -EFAULT; goto out; } - kaddr = kmap_atomic(page, KM_USER0); + kaddr = kmap_atomic(page); for (; offset < PAGE_SIZE && kaddr[offset]; offset++, bprm->p++) ; - kunmap_atomic(kaddr, KM_USER0); + kunmap_atomic(kaddr); put_arg_page(page); if (offset == PAGE_SIZE) diff --git a/fs/namei.c b/fs/namei.c index 46ea9cc16647..d135da74ce04 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -3371,9 +3371,9 @@ retry: if (err) goto fail; - kaddr = kmap_atomic(page, KM_USER0); + kaddr = kmap_atomic(page); memcpy(kaddr, symname, len-1); - kunmap_atomic(kaddr, KM_USER0); + kunmap_atomic(kaddr); err = pagecache_write_end(NULL, mapping, 0, len-1, len-1, page, fsdata); diff --git a/fs/pipe.c b/fs/pipe.c index a932ced92a16..fe0502f9beb2 100644 --- a/fs/pipe.c +++ b/fs/pipe.c @@ -230,7 +230,7 @@ void *generic_pipe_buf_map(struct pipe_inode_info *pipe, { if (atomic) { buf->flags |= PIPE_BUF_FLAG_ATOMIC; - return kmap_atomic(buf->page, KM_USER0); + return kmap_atomic(buf->page); } return kmap(buf->page); @@ -251,7 +251,7 @@ void generic_pipe_buf_unmap(struct pipe_inode_info *pipe, { if (buf->flags & PIPE_BUF_FLAG_ATOMIC) { buf->flags &= ~PIPE_BUF_FLAG_ATOMIC; - kunmap_atomic(map_data, KM_USER0); + kunmap_atomic(map_data); } else kunmap(buf->page); } @@ -565,14 +565,14 @@ redo1: iov_fault_in_pages_read(iov, chars); redo2: if (atomic) - src = kmap_atomic(page, KM_USER0); + src = kmap_atomic(page); else src = kmap(page); error = pipe_iov_copy_from_user(src, iov, chars, atomic); if (atomic) - kunmap_atomic(src, KM_USER0); + kunmap_atomic(src); else kunmap(page); diff --git a/fs/splice.c b/fs/splice.c index 1ec0493266b3..f16402ed915c 100644 --- a/fs/splice.c +++ b/fs/splice.c @@ -737,15 +737,12 @@ int pipe_to_file(struct pipe_inode_info *pipe, struct pipe_buffer *buf, goto out; if (buf->page != page) { - /* - * Careful, ->map() uses KM_USER0! - */ char *src = buf->ops->map(pipe, buf, 1); - char *dst = kmap_atomic(page, KM_USER1); + char *dst = kmap_atomic(page); memcpy(dst + offset, src + buf->offset, this_len); flush_dcache_page(page); - kunmap_atomic(dst, KM_USER1); + kunmap_atomic(dst); buf->ops->unmap(pipe, buf, src); } ret = pagecache_write_end(file, mapping, sd->pos, this_len, this_len, diff --git a/include/linux/bio.h b/include/linux/bio.h index 129a9c097958..de5422a57511 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -101,10 +101,10 @@ static inline int bio_has_allocated_vec(struct bio *bio) * I/O completely on that queue (see ide-dma for example) */ #define __bio_kmap_atomic(bio, idx, kmtype) \ - (kmap_atomic(bio_iovec_idx((bio), (idx))->bv_page, kmtype) + \ + (kmap_atomic(bio_iovec_idx((bio), (idx))->bv_page) + \ bio_iovec_idx((bio), (idx))->bv_offset) -#define __bio_kunmap_atomic(addr, kmtype) kunmap_atomic(addr, kmtype) +#define __bio_kunmap_atomic(addr, kmtype) kunmap_atomic(addr) /* * merge helpers etc @@ -317,7 +317,7 @@ static inline char *bvec_kmap_irq(struct bio_vec *bvec, unsigned long *flags) * balancing is a lot nicer this way */ local_irq_save(*flags); - addr = (unsigned long) kmap_atomic(bvec->bv_page, KM_BIO_SRC_IRQ); + addr = (unsigned long) kmap_atomic(bvec->bv_page); BUG_ON(addr & ~PAGE_MASK); @@ -328,7 +328,7 @@ static inline void bvec_kunmap_irq(char *buffer, unsigned long *flags) { unsigned long ptr = (unsigned long) buffer & PAGE_MASK; - kunmap_atomic((void *) ptr, KM_BIO_SRC_IRQ); + kunmap_atomic((void *) ptr); local_irq_restore(*flags); } -- cgit v1.2.3 From e636825346b36a07ccfc8e30946d52855e21f681 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Mon, 19 Mar 2012 17:03:22 +0100 Subject: exit_signal: simplify the "we have changed execution domain" logic exit_notify() checks "tsk->self_exec_id != tsk->parent_exec_id" to handle the "we have changed execution domain" case. We can change do_thread() to always set ->exit_signal = SIGCHLD and remove this check to simplify the code. We could change setup_new_exec() instead, this looks more logical because it increments ->self_exec_id. But note that de_thread() already resets ->exit_signal if it changes the leader, let's keep both changes close to each other. Note that we change ->exit_signal lockless, this changes the rules. Thereafter ->exit_signal is not stable under tasklist but this is fine, the only possible change is OLDSIG -> SIGCHLD. This can race with eligible_child() but the race is harmless. We can race with reparent_leader() which changes our ->exit_signal in parallel, but it does the same change to SIGCHLD. The noticeable user-visible change is that the execing task is not "visible" to do_wait()->eligible_child(__WCLONE) right after exec. To me this looks more logical, and this is consistent with mt case. Signed-off-by: Oleg Nesterov Signed-off-by: Linus Torvalds --- fs/exec.c | 3 +++ kernel/exit.c | 7 +------ 2 files changed, 4 insertions(+), 6 deletions(-) (limited to 'fs/exec.c') diff --git a/fs/exec.c b/fs/exec.c index b0695a9900ef..1e94d2263ae0 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -977,6 +977,9 @@ static int de_thread(struct task_struct *tsk) sig->notify_count = 0; no_thread_group: + /* we have changed execution domain */ + tsk->exit_signal = SIGCHLD; + if (current->mm) setmax_mm_hiwater_rss(&sig->maxrss, current->mm); diff --git a/kernel/exit.c b/kernel/exit.c index 752d2c0abd19..51ac4ced1313 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -827,14 +827,9 @@ static void exit_notify(struct task_struct *tsk, int group_dead) * If the parent exec id doesn't match the exec id we saved * when we started then we know the parent has changed security * domain. - * - * If our self_exec id doesn't match our parent_exec_id then - * we have changed execution domain as these two values started - * the same after a fork. */ if (thread_group_leader(tsk) && tsk->exit_signal != SIGCHLD && - (tsk->parent_exec_id != tsk->real_parent->self_exec_id || - tsk->self_exec_id != tsk->parent_exec_id)) + tsk->parent_exec_id != tsk->real_parent->self_exec_id) tsk->exit_signal = SIGCHLD; if (unlikely(tsk->ptrace)) { -- cgit v1.2.3 From 701085b219016d38f105b031381b9cee6200253a Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Mon, 19 Mar 2012 17:04:01 +0100 Subject: exec: move de_thread()->setmax_mm_hiwater_rss() into exec_mmap() Minor cleanup. de_thread()->setmax_mm_hiwater_rss() looks a bit strange, move it into exec_mmap() which plays with old_mm. Signed-off-by: Oleg Nesterov Signed-off-by: Linus Torvalds --- fs/exec.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'fs/exec.c') diff --git a/fs/exec.c b/fs/exec.c index 1e94d2263ae0..95551c6da090 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -850,6 +850,7 @@ static int exec_mmap(struct mm_struct *mm) if (old_mm) { up_read(&old_mm->mmap_sem); BUG_ON(active_mm != old_mm); + setmax_mm_hiwater_rss(&tsk->signal->maxrss, old_mm); mm_update_next_owner(old_mm); mmput(old_mm); return 0; @@ -980,9 +981,6 @@ no_thread_group: /* we have changed execution domain */ tsk->exit_signal = SIGCHLD; - if (current->mm) - setmax_mm_hiwater_rss(&sig->maxrss, current->mm); - exit_itimers(sig); flush_itimer_signals(); -- cgit v1.2.3 From 8fc3dc5a3a17aa2b353886422bd89420619af211 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sat, 17 Mar 2012 03:05:16 -0400 Subject: __register_binfmt() made void Just don't pass NULL to it - nobody does, anyway. Signed-off-by: Al Viro --- arch/alpha/kernel/binfmt_loader.c | 3 ++- arch/x86/ia32/ia32_aout.c | 3 ++- fs/binfmt_aout.c | 3 ++- fs/binfmt_elf.c | 3 ++- fs/binfmt_elf_fdpic.c | 3 ++- fs/binfmt_em86.c | 3 ++- fs/binfmt_flat.c | 3 ++- fs/binfmt_misc.c | 7 ++----- fs/binfmt_script.c | 3 ++- fs/binfmt_som.c | 3 ++- fs/exec.c | 6 ++---- include/linux/binfmts.h | 10 +++++----- 12 files changed, 27 insertions(+), 23 deletions(-) (limited to 'fs/exec.c') diff --git a/arch/alpha/kernel/binfmt_loader.c b/arch/alpha/kernel/binfmt_loader.c index 3fcfad410130..d1f474d1d44d 100644 --- a/arch/alpha/kernel/binfmt_loader.c +++ b/arch/alpha/kernel/binfmt_loader.c @@ -46,6 +46,7 @@ static struct linux_binfmt loader_format = { static int __init init_loader_binfmt(void) { - return insert_binfmt(&loader_format); + insert_binfmt(&loader_format); + return 0; } arch_initcall(init_loader_binfmt); diff --git a/arch/x86/ia32/ia32_aout.c b/arch/x86/ia32/ia32_aout.c index 39e49091f648..cdfc8dc43670 100644 --- a/arch/x86/ia32/ia32_aout.c +++ b/arch/x86/ia32/ia32_aout.c @@ -519,7 +519,8 @@ out: static int __init init_aout_binfmt(void) { - return register_binfmt(&aout_format); + register_binfmt(&aout_format); + return 0; } static void __exit exit_aout_binfmt(void) diff --git a/fs/binfmt_aout.c b/fs/binfmt_aout.c index 1ff94054d35a..a543364ba29b 100644 --- a/fs/binfmt_aout.c +++ b/fs/binfmt_aout.c @@ -454,7 +454,8 @@ out: static int __init init_aout_binfmt(void) { - return register_binfmt(&aout_format); + register_binfmt(&aout_format); + return 0; } static void __exit exit_aout_binfmt(void) diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c index 07d096c49920..f8ac4251877e 100644 --- a/fs/binfmt_elf.c +++ b/fs/binfmt_elf.c @@ -2077,7 +2077,8 @@ out: static int __init init_elf_binfmt(void) { - return register_binfmt(&elf_format); + register_binfmt(&elf_format); + return 0; } static void __exit exit_elf_binfmt(void) diff --git a/fs/binfmt_elf_fdpic.c b/fs/binfmt_elf_fdpic.c index 30745f459faf..e7afcb67a2d3 100644 --- a/fs/binfmt_elf_fdpic.c +++ b/fs/binfmt_elf_fdpic.c @@ -91,7 +91,8 @@ static struct linux_binfmt elf_fdpic_format = { static int __init init_elf_fdpic_binfmt(void) { - return register_binfmt(&elf_fdpic_format); + register_binfmt(&elf_fdpic_format); + return 0; } static void __exit exit_elf_fdpic_binfmt(void) diff --git a/fs/binfmt_em86.c b/fs/binfmt_em86.c index b8e8b0acf9bd..2790c7e1912e 100644 --- a/fs/binfmt_em86.c +++ b/fs/binfmt_em86.c @@ -100,7 +100,8 @@ static struct linux_binfmt em86_format = { static int __init init_em86_binfmt(void) { - return register_binfmt(&em86_format); + register_binfmt(&em86_format); + return 0; } static void __exit exit_em86_binfmt(void) diff --git a/fs/binfmt_flat.c b/fs/binfmt_flat.c index 1bffbe0ed778..68affab88146 100644 --- a/fs/binfmt_flat.c +++ b/fs/binfmt_flat.c @@ -950,7 +950,8 @@ static int load_flat_binary(struct linux_binprm * bprm, struct pt_regs * regs) static int __init init_flat_binfmt(void) { - return register_binfmt(&flat_format); + register_binfmt(&flat_format); + return 0; } /****************************************************************************/ diff --git a/fs/binfmt_misc.c b/fs/binfmt_misc.c index a9198dfd5f85..1ffb60355cae 100644 --- a/fs/binfmt_misc.c +++ b/fs/binfmt_misc.c @@ -726,11 +726,8 @@ static struct file_system_type bm_fs_type = { static int __init init_misc_binfmt(void) { int err = register_filesystem(&bm_fs_type); - if (!err) { - err = insert_binfmt(&misc_format); - if (err) - unregister_filesystem(&bm_fs_type); - } + if (!err) + insert_binfmt(&misc_format); return err; } diff --git a/fs/binfmt_script.c b/fs/binfmt_script.c index 396a9884591f..d3b8c1f63155 100644 --- a/fs/binfmt_script.c +++ b/fs/binfmt_script.c @@ -105,7 +105,8 @@ static struct linux_binfmt script_format = { static int __init init_script_binfmt(void) { - return register_binfmt(&script_format); + register_binfmt(&script_format); + return 0; } static void __exit exit_script_binfmt(void) diff --git a/fs/binfmt_som.c b/fs/binfmt_som.c index cc8560f6c9b0..ec15972dd98a 100644 --- a/fs/binfmt_som.c +++ b/fs/binfmt_som.c @@ -289,7 +289,8 @@ static int load_som_library(struct file *f) static int __init init_som_binfmt(void) { - return register_binfmt(&som_format); + register_binfmt(&som_format); + return 0; } static void __exit exit_som_binfmt(void) diff --git a/fs/exec.c b/fs/exec.c index 153dee14fe55..2c5ae338773c 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -79,15 +79,13 @@ static atomic_t call_count = ATOMIC_INIT(1); static LIST_HEAD(formats); static DEFINE_RWLOCK(binfmt_lock); -int __register_binfmt(struct linux_binfmt * fmt, int insert) +void __register_binfmt(struct linux_binfmt * fmt, int insert) { - if (!fmt) - return -EINVAL; + BUG_ON(!fmt); write_lock(&binfmt_lock); insert ? list_add(&fmt->lh, &formats) : list_add_tail(&fmt->lh, &formats); write_unlock(&binfmt_lock); - return 0; } EXPORT_SYMBOL(__register_binfmt); diff --git a/include/linux/binfmts.h b/include/linux/binfmts.h index 0092102db2de..366422bc1633 100644 --- a/include/linux/binfmts.h +++ b/include/linux/binfmts.h @@ -92,17 +92,17 @@ struct linux_binfmt { unsigned long min_coredump; /* minimal dump size */ }; -extern int __register_binfmt(struct linux_binfmt *fmt, int insert); +extern void __register_binfmt(struct linux_binfmt *fmt, int insert); /* Registration of default binfmt handlers */ -static inline int register_binfmt(struct linux_binfmt *fmt) +static inline void register_binfmt(struct linux_binfmt *fmt) { - return __register_binfmt(fmt, 0); + __register_binfmt(fmt, 0); } /* Same as above, but adds a new binfmt at the top of the list */ -static inline int insert_binfmt(struct linux_binfmt *fmt) +static inline void insert_binfmt(struct linux_binfmt *fmt) { - return __register_binfmt(fmt, 1); + __register_binfmt(fmt, 1); } extern void unregister_binfmt(struct linux_binfmt *); -- cgit v1.2.3 From 19e5109fef2c368ab3f8a5157270f87f4a7c0326 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Thu, 23 Feb 2012 22:29:17 -0500 Subject: take removal of PF_FORKNOEXEC to flush_old_exec() Signed-off-by: Al Viro --- arch/x86/ia32/ia32_aout.c | 1 - fs/binfmt_aout.c | 1 - fs/binfmt_elf.c | 2 -- fs/binfmt_elf_fdpic.c | 3 --- fs/binfmt_flat.c | 1 - fs/binfmt_som.c | 1 - fs/exec.c | 2 +- 7 files changed, 1 insertion(+), 10 deletions(-) (limited to 'fs/exec.c') diff --git a/arch/x86/ia32/ia32_aout.c b/arch/x86/ia32/ia32_aout.c index cdfc8dc43670..4c2e59a420b9 100644 --- a/arch/x86/ia32/ia32_aout.c +++ b/arch/x86/ia32/ia32_aout.c @@ -323,7 +323,6 @@ static int load_aout_binary(struct linux_binprm *bprm, struct pt_regs *regs) } install_exec_creds(bprm); - current->flags &= ~PF_FORKNOEXEC; if (N_MAGIC(ex) == OMAGIC) { unsigned long text_addr, map_size; diff --git a/fs/binfmt_aout.c b/fs/binfmt_aout.c index a543364ba29b..4d5e6d26578c 100644 --- a/fs/binfmt_aout.c +++ b/fs/binfmt_aout.c @@ -267,7 +267,6 @@ static int load_aout_binary(struct linux_binprm * bprm, struct pt_regs * regs) } install_exec_creds(bprm); - current->flags &= ~PF_FORKNOEXEC; if (N_MAGIC(ex) == OMAGIC) { unsigned long text_addr, map_size; diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c index f8ac4251877e..81878b78c9d4 100644 --- a/fs/binfmt_elf.c +++ b/fs/binfmt_elf.c @@ -712,7 +712,6 @@ static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs) goto out_free_dentry; /* OK, This is the point of no return */ - current->flags &= ~PF_FORKNOEXEC; current->mm->def_flags = def_flags; /* Do this immediately, since STACK_TOP as used in setup_arg_pages @@ -934,7 +933,6 @@ static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs) #endif /* ARCH_HAS_SETUP_ADDITIONAL_PAGES */ install_exec_creds(bprm); - current->flags &= ~PF_FORKNOEXEC; retval = create_elf_tables(bprm, &loc->elf_ex, load_addr, interp_load_addr); if (retval < 0) { diff --git a/fs/binfmt_elf_fdpic.c b/fs/binfmt_elf_fdpic.c index e7afcb67a2d3..c64bf5ee2df4 100644 --- a/fs/binfmt_elf_fdpic.c +++ b/fs/binfmt_elf_fdpic.c @@ -335,8 +335,6 @@ static int load_elf_fdpic_binary(struct linux_binprm *bprm, current->mm->context.exec_fdpic_loadmap = 0; current->mm->context.interp_fdpic_loadmap = 0; - current->flags &= ~PF_FORKNOEXEC; - #ifdef CONFIG_MMU elf_fdpic_arch_lay_out_mm(&exec_params, &interp_params, @@ -414,7 +412,6 @@ static int load_elf_fdpic_binary(struct linux_binprm *bprm, #endif install_exec_creds(bprm); - current->flags &= ~PF_FORKNOEXEC; if (create_elf_fdpic_tables(bprm, current->mm, &exec_params, &interp_params) < 0) goto error_kill; diff --git a/fs/binfmt_flat.c b/fs/binfmt_flat.c index 68affab88146..04f61f0bdfde 100644 --- a/fs/binfmt_flat.c +++ b/fs/binfmt_flat.c @@ -902,7 +902,6 @@ static int load_flat_binary(struct linux_binprm * bprm, struct pt_regs * regs) libinfo.lib_list[j].start_data:UNLOADED_LIB; install_exec_creds(bprm); - current->flags &= ~PF_FORKNOEXEC; set_binfmt(&flat_format); diff --git a/fs/binfmt_som.c b/fs/binfmt_som.c index ec15972dd98a..e4fc746629a7 100644 --- a/fs/binfmt_som.c +++ b/fs/binfmt_som.c @@ -225,7 +225,6 @@ load_som_binary(struct linux_binprm * bprm, struct pt_regs * regs) goto out_free; /* OK, This is the point of no return */ - current->flags &= ~PF_FORKNOEXEC; current->personality = PER_HPUX; setup_new_exec(bprm); diff --git a/fs/exec.c b/fs/exec.c index 2c5ae338773c..60478a0e7a37 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -1110,7 +1110,7 @@ int flush_old_exec(struct linux_binprm * bprm) bprm->mm = NULL; /* We're using it now */ set_fs(USER_DS); - current->flags &= ~(PF_RANDOMIZE | PF_KTHREAD); + current->flags &= ~(PF_RANDOMIZE | PF_FORKNOEXEC | PF_KTHREAD); flush_thread(); current->personality &= ~bprm->per_clear; -- cgit v1.2.3 From 05af2e104a0c282dcd9303431e1360750ba76de6 Mon Sep 17 00:00:00 2001 From: David Rientjes Date: Wed, 21 Mar 2012 16:34:13 -0700 Subject: mm, counters: remove task argument to sync_mm_rss() and __sync_task_rss_stat() sync_mm_rss() can only be used for current to avoid race conditions in iterating and clearing its per-task counters. Remove the task argument for it and its helper function, __sync_task_rss_stat(), to avoid thinking it can be used safely for anything other than current. Signed-off-by: David Rientjes Acked-by: KAMEZAWA Hiroyuki Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/exec.c | 2 +- include/linux/mm.h | 4 ++-- kernel/exit.c | 2 +- mm/memory.c | 18 +++++++++--------- mm/mmu_context.c | 2 +- 5 files changed, 14 insertions(+), 14 deletions(-) (limited to 'fs/exec.c') diff --git a/fs/exec.c b/fs/exec.c index 3908544f5d18..6ed164d20d7d 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -824,7 +824,7 @@ static int exec_mmap(struct mm_struct *mm) /* Notify parent that we're no longer interested in the old VM */ tsk = current; old_mm = current->mm; - sync_mm_rss(tsk, old_mm); + sync_mm_rss(old_mm); mm_release(tsk, old_mm); if (old_mm) { diff --git a/include/linux/mm.h b/include/linux/mm.h index df17ff23d50e..ce2b2a3b2876 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1131,9 +1131,9 @@ static inline void setmax_mm_hiwater_rss(unsigned long *maxrss, } #if defined(SPLIT_RSS_COUNTING) -void sync_mm_rss(struct task_struct *task, struct mm_struct *mm); +void sync_mm_rss(struct mm_struct *mm); #else -static inline void sync_mm_rss(struct task_struct *task, struct mm_struct *mm) +static inline void sync_mm_rss(struct mm_struct *mm) { } #endif diff --git a/kernel/exit.c b/kernel/exit.c index 0ed15fed579f..d26acd3c1e2e 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -934,7 +934,7 @@ void do_exit(long code) acct_update_integrals(tsk); /* sync mm's RSS info before statistics gathering */ if (tsk->mm) - sync_mm_rss(tsk, tsk->mm); + sync_mm_rss(tsk->mm); group_dead = atomic_dec_and_test(&tsk->signal->live); if (group_dead) { hrtimer_cancel(&tsk->signal->real_timer); diff --git a/mm/memory.c b/mm/memory.c index a5de734e14a7..2d27239ce4dd 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -125,17 +125,17 @@ core_initcall(init_zero_pfn); #if defined(SPLIT_RSS_COUNTING) -static void __sync_task_rss_stat(struct task_struct *task, struct mm_struct *mm) +static void __sync_task_rss_stat(struct mm_struct *mm) { int i; for (i = 0; i < NR_MM_COUNTERS; i++) { - if (task->rss_stat.count[i]) { - add_mm_counter(mm, i, task->rss_stat.count[i]); - task->rss_stat.count[i] = 0; + if (current->rss_stat.count[i]) { + add_mm_counter(mm, i, current->rss_stat.count[i]); + current->rss_stat.count[i] = 0; } } - task->rss_stat.events = 0; + current->rss_stat.events = 0; } static void add_mm_counter_fast(struct mm_struct *mm, int member, int val) @@ -157,12 +157,12 @@ static void check_sync_rss_stat(struct task_struct *task) if (unlikely(task != current)) return; if (unlikely(task->rss_stat.events++ > TASK_RSS_EVENTS_THRESH)) - __sync_task_rss_stat(task, task->mm); + __sync_task_rss_stat(task->mm); } -void sync_mm_rss(struct task_struct *task, struct mm_struct *mm) +void sync_mm_rss(struct mm_struct *mm) { - __sync_task_rss_stat(task, mm); + __sync_task_rss_stat(mm); } #else /* SPLIT_RSS_COUNTING */ @@ -643,7 +643,7 @@ static inline void add_mm_rss_vec(struct mm_struct *mm, int *rss) int i; if (current->mm == mm) - sync_mm_rss(current, mm); + sync_mm_rss(mm); for (i = 0; i < NR_MM_COUNTERS; i++) if (rss[i]) add_mm_counter(mm, i, rss[i]); diff --git a/mm/mmu_context.c b/mm/mmu_context.c index cf332bc0080a..3dcfaf4ed355 100644 --- a/mm/mmu_context.c +++ b/mm/mmu_context.c @@ -53,7 +53,7 @@ void unuse_mm(struct mm_struct *mm) struct task_struct *tsk = current; task_lock(tsk); - sync_mm_rss(tsk, mm); + sync_mm_rss(mm); tsk->mm = NULL; /* active_mm is still 'mm' */ enter_lazy_tlb(mm, tsk); -- cgit v1.2.3 From 96f951edb1f1bdbbc99b0cd458f9808bb83d58ae Mon Sep 17 00:00:00 2001 From: David Howells Date: Wed, 28 Mar 2012 18:30:03 +0100 Subject: Add #includes needed to permit the removal of asm/system.h asm/system.h is a cause of circular dependency problems because it contains commonly used primitive stuff like barrier definitions and uncommonly used stuff like switch_to() that might require MMU definitions. asm/system.h has been disintegrated by this point on all arches into the following common segments: (1) asm/barrier.h Moved memory barrier definitions here. (2) asm/cmpxchg.h Moved xchg() and cmpxchg() here. #included in asm/atomic.h. (3) asm/bug.h Moved die() and similar here. (4) asm/exec.h Moved arch_align_stack() here. (5) asm/elf.h Moved AT_VECTOR_SIZE_ARCH here. (6) asm/switch_to.h Moved switch_to() here. Signed-off-by: David Howells --- drivers/misc/sgi-gru/gru_instructions.h | 1 + drivers/staging/crystalhd/bc_dts_defs.h | 2 ++ fs/binfmt_elf.c | 1 + fs/binfmt_elf_fdpic.c | 1 + fs/exec.c | 1 + include/asm-generic/bitops/atomic.h | 2 +- include/linux/llist.h | 3 +-- include/linux/mtd/map.h | 1 + include/linux/spinlock.h | 1 + kernel/sched/core.c | 1 + 10 files changed, 11 insertions(+), 3 deletions(-) (limited to 'fs/exec.c') diff --git a/drivers/misc/sgi-gru/gru_instructions.h b/drivers/misc/sgi-gru/gru_instructions.h index d95587cc794c..04d5170ac149 100644 --- a/drivers/misc/sgi-gru/gru_instructions.h +++ b/drivers/misc/sgi-gru/gru_instructions.h @@ -40,6 +40,7 @@ extern void gru_wait_abort_proc(void *cb); *((volatile unsigned long *)(p)) = v; /* force st.rel */ \ } while (0) #elif defined(CONFIG_X86_64) +#include #define __flush_cache(p) clflush(p) #define gru_ordered_store_ulong(p, v) \ do { \ diff --git a/drivers/staging/crystalhd/bc_dts_defs.h b/drivers/staging/crystalhd/bc_dts_defs.h index 8cd51a7aad8e..647e116e10de 100644 --- a/drivers/staging/crystalhd/bc_dts_defs.h +++ b/drivers/staging/crystalhd/bc_dts_defs.h @@ -26,6 +26,8 @@ #ifndef _BC_DTS_DEFS_H_ #define _BC_DTS_DEFS_H_ +#include + /* BIT Mask */ #define BC_BIT(_x) (1 << (_x)) diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c index 81878b78c9d4..18276531f7c6 100644 --- a/fs/binfmt_elf.c +++ b/fs/binfmt_elf.c @@ -35,6 +35,7 @@ #include #include #include +#include static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs); static int load_elf_library(struct file *); diff --git a/fs/binfmt_elf_fdpic.c b/fs/binfmt_elf_fdpic.c index c64bf5ee2df4..9bd5612a8224 100644 --- a/fs/binfmt_elf_fdpic.c +++ b/fs/binfmt_elf_fdpic.c @@ -39,6 +39,7 @@ #include #include #include +#include typedef char *elf_caddr_t; diff --git a/fs/exec.c b/fs/exec.c index 23559c227d9c..c8b63d14da85 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -59,6 +59,7 @@ #include #include #include +#include #include #include "internal.h" diff --git a/include/asm-generic/bitops/atomic.h b/include/asm-generic/bitops/atomic.h index ecc44a8e2b44..9ae6c34dc191 100644 --- a/include/asm-generic/bitops/atomic.h +++ b/include/asm-generic/bitops/atomic.h @@ -2,7 +2,7 @@ #define _ASM_GENERIC_BITOPS_ATOMIC_H_ #include -#include +#include #ifdef CONFIG_SMP #include diff --git a/include/linux/llist.h b/include/linux/llist.h index 801b44b07aac..a5199f6d0e82 100644 --- a/include/linux/llist.h +++ b/include/linux/llist.h @@ -56,8 +56,7 @@ */ #include -#include -#include +#include struct llist_head { struct llist_node *first; diff --git a/include/linux/mtd/map.h b/include/linux/mtd/map.h index 94e924e2ecd5..ade5c990f1f0 100644 --- a/include/linux/mtd/map.h +++ b/include/linux/mtd/map.h @@ -31,6 +31,7 @@ #include #include #include +#include #ifdef CONFIG_MTD_MAP_BANK_WIDTH_1 #define map_bankwidth(map) 1 diff --git a/include/linux/spinlock.h b/include/linux/spinlock.h index 7df6c17b0281..fa0f93e4d86d 100644 --- a/include/linux/spinlock.h +++ b/include/linux/spinlock.h @@ -55,6 +55,7 @@ #include #include #include +#include #include diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 503d6426126d..157fb9b2b186 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -73,6 +73,7 @@ #include #include +#include #include #include #include -- cgit v1.2.3 From 6308191f6f55d3629c7dbe72dfb856ad9fa560fd Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Fri, 30 Mar 2012 18:26:36 +0200 Subject: tracing, sched, vfs: Fix 'old_pid' usage in trace_sched_process_exec() 1. TRACE_EVENT(sched_process_exec) forgets to actually use the old pid argument, it sets ->old_pid = p->pid. 2. search_binary_handler() uses the wrong pid number. tracepoint needs the global pid_t from the root namespace, while old_pid is the virtual pid number as it seen by the tracer/parent. With this patch we have two pid_t's in search_binary_handler(), not really nice. Perhaps we should switch to "struct pid*", but in this case it would be better to cleanup the current code first and move the "depth == 0" code outside. Signed-off-by: Oleg Nesterov Cc: David Smith Cc: Peter Zijlstra Cc: Steven Rostedt Cc: Denys Vlasenko Link: http://lkml.kernel.org/r/20120330162636.GA4857@redhat.com Signed-off-by: Ingo Molnar --- fs/exec.c | 7 ++++--- include/trace/events/sched.h | 2 +- 2 files changed, 5 insertions(+), 4 deletions(-) (limited to 'fs/exec.c') diff --git a/fs/exec.c b/fs/exec.c index 23559c227d9c..644f6c4eb606 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -1370,7 +1370,7 @@ int search_binary_handler(struct linux_binprm *bprm,struct pt_regs *regs) unsigned int depth = bprm->recursion_depth; int try,retval; struct linux_binfmt *fmt; - pid_t old_pid; + pid_t old_pid, old_vpid; retval = security_bprm_check(bprm); if (retval) @@ -1381,8 +1381,9 @@ int search_binary_handler(struct linux_binprm *bprm,struct pt_regs *regs) return retval; /* Need to fetch pid before load_binary changes it */ + old_pid = current->pid; rcu_read_lock(); - old_pid = task_pid_nr_ns(current, task_active_pid_ns(current->parent)); + old_vpid = task_pid_nr_ns(current, task_active_pid_ns(current->parent)); rcu_read_unlock(); retval = -ENOENT; @@ -1405,7 +1406,7 @@ int search_binary_handler(struct linux_binprm *bprm,struct pt_regs *regs) if (retval >= 0) { if (depth == 0) { trace_sched_process_exec(current, old_pid, bprm); - ptrace_event(PTRACE_EVENT_EXEC, old_pid); + ptrace_event(PTRACE_EVENT_EXEC, old_vpid); } put_binfmt(fmt); allow_write_access(bprm->file); diff --git a/include/trace/events/sched.h b/include/trace/events/sched.h index fbc7b1ad929b..ea7a2035456d 100644 --- a/include/trace/events/sched.h +++ b/include/trace/events/sched.h @@ -295,7 +295,7 @@ TRACE_EVENT(sched_process_exec, TP_fast_assign( __assign_str(filename, bprm->filename); __entry->pid = p->pid; - __entry->old_pid = p->pid; + __entry->old_pid = old_pid; ), TP_printk("filename=%s pid=%d old_pid=%d", __get_str(filename), -- cgit v1.2.3