From 336dd1f70ff62d7dd8655228caed4c5bfc818c56 Mon Sep 17 00:00:00 2001 From: Ulrich Drepper Date: Wed, 23 Jul 2008 21:29:29 -0700 Subject: flag parameters: dup2 This patch adds the new dup3 syscall. It extends the old dup2 syscall by one parameter which is meant to hold a flag value. Support for the O_CLOEXEC flag is added in this patch. The following test must be adjusted for architectures other than x86 and x86-64 and in case the syscall numbers changed. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #include #include #include #include #include #ifndef __NR_dup3 # ifdef __x86_64__ # define __NR_dup3 292 # elif defined __i386__ # define __NR_dup3 330 # else # error "need __NR_dup3" # endif #endif int main (void) { int fd = syscall (__NR_dup3, 1, 4, 0); if (fd == -1) { puts ("dup3(0) failed"); return 1; } int coe = fcntl (fd, F_GETFD); if (coe == -1) { puts ("fcntl failed"); return 1; } if (coe & FD_CLOEXEC) { puts ("dup3(0) set close-on-exec flag"); return 1; } close (fd); fd = syscall (__NR_dup3, 1, 4, O_CLOEXEC); if (fd == -1) { puts ("dup3(O_CLOEXEC) failed"); return 1; } coe = fcntl (fd, F_GETFD); if (coe == -1) { puts ("fcntl failed"); return 1; } if ((coe & FD_CLOEXEC) == 0) { puts ("dup3(O_CLOEXEC) set close-on-exec flag"); return 1; } close (fd); puts ("OK"); return 0; } ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Signed-off-by: Ulrich Drepper Acked-by: Davide Libenzi Cc: Michael Kerrisk Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/fcntl.c | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) (limited to 'fs/fcntl.c') diff --git a/fs/fcntl.c b/fs/fcntl.c index 330a7d782591..9679fcbdeaa0 100644 --- a/fs/fcntl.c +++ b/fs/fcntl.c @@ -125,13 +125,16 @@ static int dupfd(struct file *file, unsigned int start, int cloexec) return fd; } -asmlinkage long sys_dup2(unsigned int oldfd, unsigned int newfd) +asmlinkage long sys_dup3(unsigned int oldfd, unsigned int newfd, int flags) { int err = -EBADF; struct file * file, *tofree; struct files_struct * files = current->files; struct fdtable *fdt; + if ((flags & ~O_CLOEXEC) != 0) + return -EINVAL; + spin_lock(&files->file_lock); if (!(file = fcheck(oldfd))) goto out_unlock; @@ -163,7 +166,10 @@ asmlinkage long sys_dup2(unsigned int oldfd, unsigned int newfd) rcu_assign_pointer(fdt->fd[newfd], file); FD_SET(newfd, fdt->open_fds); - FD_CLR(newfd, fdt->close_on_exec); + if (flags & O_CLOEXEC) + FD_SET(newfd, fdt->close_on_exec); + else + FD_CLR(newfd, fdt->close_on_exec); spin_unlock(&files->file_lock); if (tofree) @@ -181,6 +187,11 @@ out_fput: goto out; } +asmlinkage long sys_dup2(unsigned int oldfd, unsigned int newfd) +{ + return sys_dup3(oldfd, newfd, 0); +} + asmlinkage long sys_dup(unsigned int fildes) { int ret = -EBADF; -- cgit v1.2.3 From 3c333937ee3be114b181c4861188cfe8f6a59697 Mon Sep 17 00:00:00 2001 From: Ulrich Drepper Date: Fri, 25 Jul 2008 22:32:13 -0400 Subject: [PATCH] dup3 fix Al Viro notice one cornercase that the new dup3() code. The dup2() function, as a special case, handles dup-ing to the same file descriptor. In this case the current dup3() code does nothing at all. I.e., it ingnores the flags parameter. This shouldn't happen, the close-on-exec flag should be set if requested. In case the O_CLOEXEC bit in the flags parameter is not set the dup3() function should behave in this respect identical to dup2(). This means dup3(fd, fd, 0) should not actively reset the c-o-e flag. The patch below implements this minor change. [AV: credits to Artur Grabowski for bringing that up as potential subtle point in dup2() behaviour] Signed-off-by: Ulrich Drepper Signed-off-by: Al Viro --- fs/fcntl.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'fs/fcntl.c') diff --git a/fs/fcntl.c b/fs/fcntl.c index 9679fcbdeaa0..ce12a6885115 100644 --- a/fs/fcntl.c +++ b/fs/fcntl.c @@ -139,8 +139,13 @@ asmlinkage long sys_dup3(unsigned int oldfd, unsigned int newfd, int flags) if (!(file = fcheck(oldfd))) goto out_unlock; err = newfd; - if (newfd == oldfd) + if (unlikely(newfd == oldfd)) { + if (flags & O_CLOEXEC) { + fdt = files_fdtable(files); + FD_SET(newfd, fdt->close_on_exec); + } goto out_unlock; + } err = -EBADF; if (newfd >= current->signal->rlim[RLIMIT_NOFILE].rlim_cur) goto out_unlock; -- cgit v1.2.3 From 6c5d0512a091480c9f981162227fdb1c9d70e555 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sat, 26 Jul 2008 13:38:19 -0400 Subject: [PATCH] get rid of corner case in dup3() entirely Since Ulrich is OK with getting rid of dup3(fd, fd, flags) completely, to hell the damn thing goes. Corner case for dup2() is handled in sys_dup2() (complete with -EBADF if dup2(fd, fd) is called with fd that is not open), the rest is done in dup3(). Signed-off-by: Al Viro --- fs/fcntl.c | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) (limited to 'fs/fcntl.c') diff --git a/fs/fcntl.c b/fs/fcntl.c index ce12a6885115..3deec9887089 100644 --- a/fs/fcntl.c +++ b/fs/fcntl.c @@ -135,18 +135,12 @@ asmlinkage long sys_dup3(unsigned int oldfd, unsigned int newfd, int flags) if ((flags & ~O_CLOEXEC) != 0) return -EINVAL; + if (unlikely(oldfd == newfd)) + return -EINVAL; + spin_lock(&files->file_lock); if (!(file = fcheck(oldfd))) goto out_unlock; - err = newfd; - if (unlikely(newfd == oldfd)) { - if (flags & O_CLOEXEC) { - fdt = files_fdtable(files); - FD_SET(newfd, fdt->close_on_exec); - } - goto out_unlock; - } - err = -EBADF; if (newfd >= current->signal->rlim[RLIMIT_NOFILE].rlim_cur) goto out_unlock; get_file(file); /* We are now finished with oldfd */ @@ -194,6 +188,14 @@ out_fput: asmlinkage long sys_dup2(unsigned int oldfd, unsigned int newfd) { + if (unlikely(newfd == oldfd)) { /* corner case */ + struct files_struct *files = current->files; + rcu_read_lock(); + if (!fcheck_files(files, oldfd)) + oldfd = -EBADF; + rcu_read_unlock(); + return oldfd; + } return sys_dup3(oldfd, newfd, 0); } -- cgit v1.2.3 From 4e1e018ecc6f7bfd10fc75b3ff9715cc8164e0a2 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sat, 26 Jul 2008 16:01:20 -0400 Subject: [PATCH] fix RLIM_NOFILE handling * dup2() should return -EBADF on exceeded sysctl_nr_open * dup() should *not* return -EINVAL even if you have rlimit set to 0; it should get -EMFILE instead. Check for orig_start exceeding rlimit taken to sys_fcntl(). Failing expand_files() in dup{2,3}() now gets -EMFILE remapped to -EBADF. Consequently, remaining checks for rlimit are taken to expand_files(). Signed-off-by: Al Viro --- fs/fcntl.c | 18 ++++++------------ fs/file.c | 9 +++++++++ fs/open.c | 9 --------- 3 files changed, 15 insertions(+), 21 deletions(-) (limited to 'fs/fcntl.c') diff --git a/fs/fcntl.c b/fs/fcntl.c index 3deec9887089..61d625136813 100644 --- a/fs/fcntl.c +++ b/fs/fcntl.c @@ -64,11 +64,6 @@ static int locate_fd(unsigned int orig_start, int cloexec) struct fdtable *fdt; spin_lock(&files->file_lock); - - error = -EINVAL; - if (orig_start >= current->signal->rlim[RLIMIT_NOFILE].rlim_cur) - goto out; - repeat: fdt = files_fdtable(files); /* @@ -83,10 +78,6 @@ repeat: if (start < fdt->max_fds) newfd = find_next_zero_bit(fdt->open_fds->fds_bits, fdt->max_fds, start); - - error = -EMFILE; - if (newfd >= current->signal->rlim[RLIMIT_NOFILE].rlim_cur) - goto out; error = expand_files(files, newfd); if (error < 0) @@ -141,13 +132,14 @@ asmlinkage long sys_dup3(unsigned int oldfd, unsigned int newfd, int flags) spin_lock(&files->file_lock); if (!(file = fcheck(oldfd))) goto out_unlock; - if (newfd >= current->signal->rlim[RLIMIT_NOFILE].rlim_cur) - goto out_unlock; get_file(file); /* We are now finished with oldfd */ err = expand_files(files, newfd); - if (err < 0) + if (unlikely(err < 0)) { + if (err == -EMFILE) + err = -EBADF; goto out_fput; + } /* To avoid races with open() and dup(), we will mark the fd as * in-use in the open-file bitmap throughout the entire dup2() @@ -328,6 +320,8 @@ static long do_fcntl(int fd, unsigned int cmd, unsigned long arg, switch (cmd) { case F_DUPFD: case F_DUPFD_CLOEXEC: + if (arg >= current->signal->rlim[RLIMIT_NOFILE].rlim_cur) + break; get_file(filp); err = dupfd(filp, arg, cmd == F_DUPFD_CLOEXEC); break; diff --git a/fs/file.c b/fs/file.c index 7b3887e054d0..d8773b19fe47 100644 --- a/fs/file.c +++ b/fs/file.c @@ -250,9 +250,18 @@ int expand_files(struct files_struct *files, int nr) struct fdtable *fdt; fdt = files_fdtable(files); + + /* + * N.B. For clone tasks sharing a files structure, this test + * will limit the total number of files that can be opened. + */ + if (nr >= current->signal->rlim[RLIMIT_NOFILE].rlim_cur) + return -EMFILE; + /* Do we need to expand? */ if (nr < fdt->max_fds) return 0; + /* Can we expand? */ if (nr >= sysctl_nr_open) return -EMFILE; diff --git a/fs/open.c b/fs/open.c index 3fe1a6857c75..52647be277a2 100644 --- a/fs/open.c +++ b/fs/open.c @@ -972,7 +972,6 @@ int get_unused_fd_flags(int flags) int fd, error; struct fdtable *fdt; - error = -EMFILE; spin_lock(&files->file_lock); repeat: @@ -980,13 +979,6 @@ repeat: fd = find_next_zero_bit(fdt->open_fds->fds_bits, fdt->max_fds, files->next_fd); - /* - * N.B. For clone tasks sharing a files structure, this test - * will limit the total number of files that can be opened. - */ - if (fd >= current->signal->rlim[RLIMIT_NOFILE].rlim_cur) - goto out; - /* Do we need to expand the fd array or fd set? */ error = expand_files(files, fd); if (error < 0) @@ -997,7 +989,6 @@ repeat: * If we needed to expand the fs array we * might have blocked - try again. */ - error = -EMFILE; goto repeat; } -- cgit v1.2.3 From 1027abe8827b47f7e9c4ed6514fde3d44f79963c Mon Sep 17 00:00:00 2001 From: Al Viro Date: Wed, 30 Jul 2008 04:13:04 -0400 Subject: [PATCH] merge locate_fd() and get_unused_fd() New primitive: alloc_fd(start, flags). get_unused_fd() and get_unused_fd_flags() become wrappers on top of it. Signed-off-by: Al Viro --- fs/fcntl.c | 87 +++++++++------------------------------------------- fs/file.c | 61 ++++++++++++++++++++++++++++++++++++ fs/open.c | 56 --------------------------------- include/linux/file.h | 3 +- 4 files changed, 77 insertions(+), 130 deletions(-) (limited to 'fs/fcntl.c') diff --git a/fs/fcntl.c b/fs/fcntl.c index 61d625136813..2e40799daad6 100644 --- a/fs/fcntl.c +++ b/fs/fcntl.c @@ -49,73 +49,6 @@ static int get_close_on_exec(unsigned int fd) return res; } -/* - * locate_fd finds a free file descriptor in the open_fds fdset, - * expanding the fd arrays if necessary. Must be called with the - * file_lock held for write. - */ - -static int locate_fd(unsigned int orig_start, int cloexec) -{ - struct files_struct *files = current->files; - unsigned int newfd; - unsigned int start; - int error; - struct fdtable *fdt; - - spin_lock(&files->file_lock); -repeat: - fdt = files_fdtable(files); - /* - * Someone might have closed fd's in the range - * orig_start..fdt->next_fd - */ - start = orig_start; - if (start < files->next_fd) - start = files->next_fd; - - newfd = start; - if (start < fdt->max_fds) - newfd = find_next_zero_bit(fdt->open_fds->fds_bits, - fdt->max_fds, start); - - error = expand_files(files, newfd); - if (error < 0) - goto out; - - /* - * If we needed to expand the fs array we - * might have blocked - try again. - */ - if (error) - goto repeat; - - if (start <= files->next_fd) - files->next_fd = newfd + 1; - - FD_SET(newfd, fdt->open_fds); - if (cloexec) - FD_SET(newfd, fdt->close_on_exec); - else - FD_CLR(newfd, fdt->close_on_exec); - error = newfd; - -out: - spin_unlock(&files->file_lock); - return error; -} - -static int dupfd(struct file *file, unsigned int start, int cloexec) -{ - int fd = locate_fd(start, cloexec); - if (fd >= 0) - fd_install(fd, file); - else - fput(file); - - return fd; -} - asmlinkage long sys_dup3(unsigned int oldfd, unsigned int newfd, int flags) { int err = -EBADF; @@ -194,10 +127,15 @@ asmlinkage long sys_dup2(unsigned int oldfd, unsigned int newfd) asmlinkage long sys_dup(unsigned int fildes) { int ret = -EBADF; - struct file * file = fget(fildes); - - if (file) - ret = dupfd(file, 0, 0); + struct file *file = fget(fildes); + + if (file) { + ret = get_unused_fd(); + if (ret >= 0) + fd_install(ret, file); + else + fput(file); + } return ret; } @@ -322,8 +260,11 @@ static long do_fcntl(int fd, unsigned int cmd, unsigned long arg, case F_DUPFD_CLOEXEC: if (arg >= current->signal->rlim[RLIMIT_NOFILE].rlim_cur) break; - get_file(filp); - err = dupfd(filp, arg, cmd == F_DUPFD_CLOEXEC); + err = alloc_fd(arg, cmd == F_DUPFD_CLOEXEC ? O_CLOEXEC : 0); + if (err >= 0) { + get_file(filp); + fd_install(err, filp); + } break; case F_GETFD: err = get_close_on_exec(fd) ? FD_CLOEXEC : 0; diff --git a/fs/file.c b/fs/file.c index d8773b19fe47..f313314f996f 100644 --- a/fs/file.c +++ b/fs/file.c @@ -6,6 +6,7 @@ * Manage the dynamic fd arrays in the process files_struct. */ +#include #include #include #include @@ -432,3 +433,63 @@ struct files_struct init_files = { }, .file_lock = __SPIN_LOCK_UNLOCKED(init_task.file_lock), }; + +/* + * allocate a file descriptor, mark it busy. + */ +int alloc_fd(unsigned start, unsigned flags) +{ + struct files_struct *files = current->files; + unsigned int fd; + int error; + struct fdtable *fdt; + + spin_lock(&files->file_lock); +repeat: + fdt = files_fdtable(files); + fd = start; + if (fd < files->next_fd) + fd = files->next_fd; + + if (fd < fdt->max_fds) + fd = find_next_zero_bit(fdt->open_fds->fds_bits, + fdt->max_fds, fd); + + error = expand_files(files, fd); + if (error < 0) + goto out; + + /* + * If we needed to expand the fs array we + * might have blocked - try again. + */ + if (error) + goto repeat; + + if (start <= files->next_fd) + files->next_fd = fd + 1; + + FD_SET(fd, fdt->open_fds); + if (flags & O_CLOEXEC) + FD_SET(fd, fdt->close_on_exec); + else + FD_CLR(fd, fdt->close_on_exec); + error = fd; +#if 1 + /* Sanity check */ + if (rcu_dereference(fdt->fd[fd]) != NULL) { + printk(KERN_WARNING "alloc_fd: slot %d not NULL!\n", fd); + rcu_assign_pointer(fdt->fd[fd], NULL); + } +#endif + +out: + spin_unlock(&files->file_lock); + return error; +} + +int get_unused_fd(void) +{ + return alloc_fd(0, 0); +} +EXPORT_SYMBOL(get_unused_fd); diff --git a/fs/open.c b/fs/open.c index 52647be277a2..07da9359481c 100644 --- a/fs/open.c +++ b/fs/open.c @@ -963,62 +963,6 @@ struct file *dentry_open(struct dentry *dentry, struct vfsmount *mnt, int flags) } EXPORT_SYMBOL(dentry_open); -/* - * Find an empty file descriptor entry, and mark it busy. - */ -int get_unused_fd_flags(int flags) -{ - struct files_struct * files = current->files; - int fd, error; - struct fdtable *fdt; - - spin_lock(&files->file_lock); - -repeat: - fdt = files_fdtable(files); - fd = find_next_zero_bit(fdt->open_fds->fds_bits, fdt->max_fds, - files->next_fd); - - /* Do we need to expand the fd array or fd set? */ - error = expand_files(files, fd); - if (error < 0) - goto out; - - if (error) { - /* - * If we needed to expand the fs array we - * might have blocked - try again. - */ - goto repeat; - } - - FD_SET(fd, fdt->open_fds); - if (flags & O_CLOEXEC) - FD_SET(fd, fdt->close_on_exec); - else - FD_CLR(fd, fdt->close_on_exec); - files->next_fd = fd + 1; -#if 1 - /* Sanity check */ - if (fdt->fd[fd] != NULL) { - printk(KERN_WARNING "get_unused_fd: slot %d not NULL!\n", fd); - fdt->fd[fd] = NULL; - } -#endif - error = fd; - -out: - spin_unlock(&files->file_lock); - return error; -} - -int get_unused_fd(void) -{ - return get_unused_fd_flags(0); -} - -EXPORT_SYMBOL(get_unused_fd); - static void __put_unused_fd(struct files_struct *files, unsigned int fd) { struct fdtable *fdt = files_fdtable(files); diff --git a/include/linux/file.h b/include/linux/file.h index 27c64bdc68c9..a20259e248a5 100644 --- a/include/linux/file.h +++ b/include/linux/file.h @@ -34,8 +34,9 @@ extern struct file *fget(unsigned int fd); extern struct file *fget_light(unsigned int fd, int *fput_needed); extern void set_close_on_exec(unsigned int fd, int flag); extern void put_filp(struct file *); +extern int alloc_fd(unsigned start, unsigned flags); extern int get_unused_fd(void); -extern int get_unused_fd_flags(int flags); +#define get_unused_fd_flags(flags) alloc_fd(0, (flags)) extern void put_unused_fd(unsigned int fd); extern void fd_install(unsigned int fd, struct file *file); -- cgit v1.2.3 From 1b7e190b4764ea3ca1080404dd593eae5230d2b3 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Wed, 30 Jul 2008 06:18:03 -0400 Subject: [PATCH] clean dup2() up a bit Signed-off-by: Al Viro --- fs/fcntl.c | 53 +++++++++++++++++++++++++++-------------------------- 1 file changed, 27 insertions(+), 26 deletions(-) (limited to 'fs/fcntl.c') diff --git a/fs/fcntl.c b/fs/fcntl.c index 2e40799daad6..ac4f7db9f134 100644 --- a/fs/fcntl.c +++ b/fs/fcntl.c @@ -63,31 +63,35 @@ asmlinkage long sys_dup3(unsigned int oldfd, unsigned int newfd, int flags) return -EINVAL; spin_lock(&files->file_lock); - if (!(file = fcheck(oldfd))) - goto out_unlock; - get_file(file); /* We are now finished with oldfd */ - err = expand_files(files, newfd); + file = fcheck(oldfd); + if (unlikely(!file)) + goto Ebadf; if (unlikely(err < 0)) { if (err == -EMFILE) - err = -EBADF; - goto out_fput; + goto Ebadf; + goto out_unlock; } - - /* To avoid races with open() and dup(), we will mark the fd as - * in-use in the open-file bitmap throughout the entire dup2() - * process. This is quite safe: do_close() uses the fd array - * entry, not the bitmap, to decide what work needs to be - * done. --sct */ - /* Doesn't work. open() might be there first. --AV */ - - /* Yes. It's a race. In user space. Nothing sane to do */ + /* + * We need to detect attempts to do dup2() over allocated but still + * not finished descriptor. NB: OpenBSD avoids that at the price of + * extra work in their equivalent of fget() - they insert struct + * file immediately after grabbing descriptor, mark it larval if + * more work (e.g. actual opening) is needed and make sure that + * fget() treats larval files as absent. Potentially interesting, + * but while extra work in fget() is trivial, locking implications + * and amount of surgery on open()-related paths in VFS are not. + * FreeBSD fails with -EBADF in the same situation, NetBSD "solution" + * deadlocks in rather amusing ways, AFAICS. All of that is out of + * scope of POSIX or SUS, since neither considers shared descriptor + * tables and this condition does not arise without those. + */ err = -EBUSY; fdt = files_fdtable(files); tofree = fdt->fd[newfd]; if (!tofree && FD_ISSET(newfd, fdt->open_fds)) - goto out_fput; - + goto out_unlock; + get_file(file); rcu_assign_pointer(fdt->fd[newfd], file); FD_SET(newfd, fdt->open_fds); if (flags & O_CLOEXEC) @@ -98,17 +102,14 @@ asmlinkage long sys_dup3(unsigned int oldfd, unsigned int newfd, int flags) if (tofree) filp_close(tofree, files); - err = newfd; -out: - return err; -out_unlock: - spin_unlock(&files->file_lock); - goto out; -out_fput: + return newfd; + +Ebadf: + err = -EBADF; +out_unlock: spin_unlock(&files->file_lock); - fput(file); - goto out; + return err; } asmlinkage long sys_dup2(unsigned int oldfd, unsigned int newfd) -- cgit v1.2.3