From 890275b5eb79e9933d12290473eab9ac38da0051 Mon Sep 17 00:00:00 2001 From: Mimi Zohar Date: Tue, 2 Nov 2010 10:13:07 -0400 Subject: IMA: maintain i_readcount in the VFS layer ima_counts_get() updated the readcount and invalidated the PCR, as necessary. Only update the i_readcount in the VFS layer. Move the PCR invalidation checks to ima_file_check(), where it belongs. Maintaining the i_readcount in the VFS layer, will allow other subsystems to use i_readcount. Signed-off-by: Mimi Zohar Acked-by: Eric Paris --- fs/open.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'fs/open.c') diff --git a/fs/open.c b/fs/open.c index 4197b9ed023d..0d485c50bb95 100644 --- a/fs/open.c +++ b/fs/open.c @@ -688,7 +688,8 @@ static struct file *__dentry_open(struct dentry *dentry, struct vfsmount *mnt, if (error) goto cleanup_all; } - ima_counts_get(f); + if ((f->f_mode & (FMODE_READ | FMODE_WRITE)) == FMODE_READ) + i_readcount_inc(inode); f->f_flags &= ~(O_CREAT | O_EXCL | O_NOCTTY | O_TRUNC); -- cgit v1.2.3 From 1ca551c6caae7b52178555cdedea6ca26444be46 Mon Sep 17 00:00:00 2001 From: Marco Stornelli Date: Sat, 5 Mar 2011 11:10:19 +0100 Subject: Check for immutable/append flag in fallocate path In the fallocate path the kernel doesn't check for the immutable/append flag. It's possible to have a race condition in this scenario: an application open a file in read/write and it does something, meanwhile root set the immutable flag on the file, the application at that point can call fallocate with success. In addition, we don't allow to do any unreserve operation on an append only file but only the reserve one. Signed-off-by: Marco Stornelli Signed-off-by: Al Viro --- fs/open.c | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'fs/open.c') diff --git a/fs/open.c b/fs/open.c index 5a2c6ebc22b5..b47aab39c057 100644 --- a/fs/open.c +++ b/fs/open.c @@ -233,6 +233,14 @@ int do_fallocate(struct file *file, int mode, loff_t offset, loff_t len) if (!(file->f_mode & FMODE_WRITE)) return -EBADF; + + /* It's not possible punch hole on append only file */ + if (mode & FALLOC_FL_PUNCH_HOLE && IS_APPEND(inode)) + return -EPERM; + + if (IS_IMMUTABLE(inode)) + return -EPERM; + /* * Revalidate the write permissions, in case security policy has * changed since the files were opened. -- cgit v1.2.3 From 47c805dc2d2dff686962f5f0baa6bac2d703ba19 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Wed, 23 Feb 2011 17:44:09 -0500 Subject: switch do_filp_open() to struct open_flags take calculation of open_flags by open(2) arguments into new helper in fs/open.c, move filp_open() over there, have it and do_sys_open() use that helper, switch exec.c callers of do_filp_open() to explicit (and constant) struct open_flags. Signed-off-by: Al Viro --- fs/exec.c | 18 +++++++---- fs/internal.h | 8 +++++ fs/namei.c | 88 ++++++------------------------------------------------ fs/open.c | 73 +++++++++++++++++++++++++++++++++++++++++++- include/linux/fs.h | 2 -- 5 files changed, 101 insertions(+), 88 deletions(-) (limited to 'fs/open.c') diff --git a/fs/exec.c b/fs/exec.c index 52a447d9b6ab..ba99e1abb1aa 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -115,13 +115,16 @@ SYSCALL_DEFINE1(uselib, const char __user *, library) struct file *file; char *tmp = getname(library); int error = PTR_ERR(tmp); + static const struct open_flags uselib_flags = { + .open_flag = O_LARGEFILE | O_RDONLY | __FMODE_EXEC, + .acc_mode = MAY_READ | MAY_EXEC | MAY_OPEN, + .intent = LOOKUP_OPEN + }; if (IS_ERR(tmp)) goto out; - file = do_filp_open(AT_FDCWD, tmp, - O_LARGEFILE | O_RDONLY | __FMODE_EXEC, 0, - MAY_READ | MAY_EXEC | MAY_OPEN); + file = do_filp_open(AT_FDCWD, tmp, &uselib_flags, LOOKUP_FOLLOW); putname(tmp); error = PTR_ERR(file); if (IS_ERR(file)) @@ -721,10 +724,13 @@ struct file *open_exec(const char *name) { struct file *file; int err; + static const struct open_flags open_exec_flags = { + .open_flag = O_LARGEFILE | O_RDONLY | __FMODE_EXEC, + .acc_mode = MAY_EXEC | MAY_OPEN, + .intent = LOOKUP_OPEN + }; - file = do_filp_open(AT_FDCWD, name, - O_LARGEFILE | O_RDONLY | __FMODE_EXEC, 0, - MAY_EXEC | MAY_OPEN); + file = do_filp_open(AT_FDCWD, name, &open_exec_flags, LOOKUP_FOLLOW); if (IS_ERR(file)) goto out; diff --git a/fs/internal.h b/fs/internal.h index 9b976b57d7fe..6fdbdf2c6047 100644 --- a/fs/internal.h +++ b/fs/internal.h @@ -106,6 +106,14 @@ extern void put_super(struct super_block *sb); struct nameidata; extern struct file *nameidata_to_filp(struct nameidata *); extern void release_open_intent(struct nameidata *); +struct open_flags { + int open_flag; + int mode; + int acc_mode; + int intent; +}; +extern struct file *do_filp_open(int dfd, const char *pathname, + const struct open_flags *op, int lookup_flags); /* * inode.c diff --git a/fs/namei.c b/fs/namei.c index 5e4206f45371..9c7fa946abe1 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -2169,13 +2169,6 @@ exit: return ERR_PTR(error); } -struct open_flags { - int open_flag; - int mode; - int acc_mode; - int intent; -}; - /* * Handle O_CREAT case for do_filp_open */ @@ -2305,74 +2298,28 @@ exit: * open_to_namei_flags() for more details. */ struct file *do_filp_open(int dfd, const char *pathname, - int open_flag, int mode, int acc_mode) + const struct open_flags *op, int flags) { struct file *filp; struct nameidata nd; int error; struct path path; int count = 0; - int flag = open_to_namei_flags(open_flag); - int flags = 0; - struct open_flags op; - - if (!(open_flag & O_CREAT)) - mode = 0; - - /* Must never be set by userspace */ - open_flag &= ~FMODE_NONOTIFY; - - /* - * O_SYNC is implemented as __O_SYNC|O_DSYNC. As many places only - * check for O_DSYNC if the need any syncing at all we enforce it's - * always set instead of having to deal with possibly weird behaviour - * for malicious applications setting only __O_SYNC. - */ - if (open_flag & __O_SYNC) - open_flag |= O_DSYNC; - - op.open_flag = open_flag; - - if (!acc_mode) - acc_mode = MAY_OPEN | ACC_MODE(open_flag); - - /* O_TRUNC implies we need access checks for write permissions */ - if (open_flag & O_TRUNC) - acc_mode |= MAY_WRITE; - - /* Allow the LSM permission hook to distinguish append - access from general write access. */ - if (open_flag & O_APPEND) - acc_mode |= MAY_APPEND; - - op.acc_mode = acc_mode; - - op.intent = LOOKUP_OPEN; - if (open_flag & O_CREAT) { - op.intent |= LOOKUP_CREATE; - if (open_flag & O_EXCL) - op.intent |= LOOKUP_EXCL; - } - - if (open_flag & O_DIRECTORY) - flags |= LOOKUP_DIRECTORY; - if (!(open_flag & O_NOFOLLOW)) - flags |= LOOKUP_FOLLOW; filp = get_empty_filp(); if (!filp) return ERR_PTR(-ENFILE); - filp->f_flags = open_flag; + filp->f_flags = op->open_flag; nd.intent.open.file = filp; - nd.intent.open.flags = flag; - nd.intent.open.create_mode = mode; + nd.intent.open.flags = open_to_namei_flags(op->open_flag); + nd.intent.open.create_mode = op->mode; - if (open_flag & O_CREAT) + if (op->open_flag & O_CREAT) goto creat; /* !O_CREAT, simple open */ - error = do_path_lookup(dfd, pathname, flags | op.intent, &nd); + error = do_path_lookup(dfd, pathname, flags | op->intent, &nd); if (unlikely(error)) goto out_filp2; error = -ELOOP; @@ -2386,7 +2333,7 @@ struct file *do_filp_open(int dfd, const char *pathname, goto out_path2; } audit_inode(pathname, nd.path.dentry); - filp = finish_open(&nd, open_flag, acc_mode); + filp = finish_open(&nd, op->open_flag, op->acc_mode); out2: release_open_intent(&nd); return filp; @@ -2416,7 +2363,7 @@ reval: /* * We have the parent and last component. */ - filp = do_last(&nd, &path, &op, pathname); + filp = do_last(&nd, &path, op, pathname); while (unlikely(!filp)) { /* trailing symlink */ struct path link = path; struct inode *linki = link.dentry->d_inode; @@ -2443,7 +2390,7 @@ reval: if (unlikely(error)) filp = ERR_PTR(error); else - filp = do_last(&nd, &path, &op, pathname); + filp = do_last(&nd, &path, op, pathname); if (!IS_ERR(cookie) && linki->i_op->put_link) linki->i_op->put_link(link.dentry, &nd, cookie); path_put(&link); @@ -2465,23 +2412,6 @@ out_filp: goto out; } -/** - * filp_open - open file and return file pointer - * - * @filename: path to open - * @flags: open flags as per the open(2) second argument - * @mode: mode for the new file if O_CREAT is set, else ignored - * - * This is the helper to open a file from kernelspace if you really - * have to. But in generally you should not do this, so please move - * along, nothing to see here.. - */ -struct file *filp_open(const char *filename, int flags, int mode) -{ - return do_filp_open(AT_FDCWD, filename, flags, mode, 0); -} -EXPORT_SYMBOL(filp_open); - /** * lookup_create - lookup a dentry, creating it if it doesn't exist * @nd: nameidata info diff --git a/fs/open.c b/fs/open.c index b47aab39c057..d05e18c60bae 100644 --- a/fs/open.c +++ b/fs/open.c @@ -890,15 +890,86 @@ void fd_install(unsigned int fd, struct file *file) EXPORT_SYMBOL(fd_install); +static inline int build_open_flags(int flags, int mode, struct open_flags *op) +{ + int lookup_flags = 0; + int acc_mode; + + if (!(flags & O_CREAT)) + mode = 0; + op->mode = mode; + + /* Must never be set by userspace */ + flags &= ~FMODE_NONOTIFY; + + /* + * O_SYNC is implemented as __O_SYNC|O_DSYNC. As many places only + * check for O_DSYNC if the need any syncing at all we enforce it's + * always set instead of having to deal with possibly weird behaviour + * for malicious applications setting only __O_SYNC. + */ + if (flags & __O_SYNC) + flags |= O_DSYNC; + + op->open_flag = flags; + + acc_mode = MAY_OPEN | ACC_MODE(flags); + + /* O_TRUNC implies we need access checks for write permissions */ + if (flags & O_TRUNC) + acc_mode |= MAY_WRITE; + + /* Allow the LSM permission hook to distinguish append + access from general write access. */ + if (flags & O_APPEND) + acc_mode |= MAY_APPEND; + + op->acc_mode = acc_mode; + + op->intent = LOOKUP_OPEN; + if (flags & O_CREAT) { + op->intent |= LOOKUP_CREATE; + if (flags & O_EXCL) + op->intent |= LOOKUP_EXCL; + } + + if (flags & O_DIRECTORY) + lookup_flags |= LOOKUP_DIRECTORY; + if (!(flags & O_NOFOLLOW)) + lookup_flags |= LOOKUP_FOLLOW; + return lookup_flags; +} + +/** + * filp_open - open file and return file pointer + * + * @filename: path to open + * @flags: open flags as per the open(2) second argument + * @mode: mode for the new file if O_CREAT is set, else ignored + * + * This is the helper to open a file from kernelspace if you really + * have to. But in generally you should not do this, so please move + * along, nothing to see here.. + */ +struct file *filp_open(const char *filename, int flags, int mode) +{ + struct open_flags op; + int lookup = build_open_flags(flags, mode, &op); + return do_filp_open(AT_FDCWD, filename, &op, lookup); +} +EXPORT_SYMBOL(filp_open); + long do_sys_open(int dfd, const char __user *filename, int flags, int mode) { + struct open_flags op; + int lookup = build_open_flags(flags, mode, &op); char *tmp = getname(filename); int fd = PTR_ERR(tmp); if (!IS_ERR(tmp)) { fd = get_unused_fd_flags(flags); if (fd >= 0) { - struct file *f = do_filp_open(dfd, tmp, flags, mode, 0); + struct file *f = do_filp_open(dfd, tmp, &op, lookup); if (IS_ERR(f)) { put_unused_fd(fd); fd = PTR_ERR(f); diff --git a/include/linux/fs.h b/include/linux/fs.h index e38b50a4b9d2..9c75714f92c1 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2205,8 +2205,6 @@ extern struct file *create_read_pipe(struct file *f, int flags); extern struct file *create_write_pipe(int flags); extern void free_write_pipe(struct file *); -extern struct file *do_filp_open(int dfd, const char *pathname, - int open_flag, int mode, int acc_mode); extern int may_open(struct path *, int, int); extern int kernel_read(struct file *, loff_t, char *, unsigned long); -- cgit v1.2.3 From 73d049a40fc6269189c4e2ba6792cb5dd054883c Mon Sep 17 00:00:00 2001 From: Al Viro Date: Fri, 11 Mar 2011 12:08:24 -0500 Subject: open-style analog of vfs_path_lookup() new function: file_open_root(dentry, mnt, name, flags) opens the file vfs_path_lookup would arrive to. Note that name can be empty; in that case the usual requirement that dentry should be a directory is lifted. open-coded equivalents switched to it, may_open() got down exactly one caller and became static. Signed-off-by: Al Viro --- arch/um/drivers/mconsole_kern.c | 21 ++--------- fs/internal.h | 2 ++ fs/namei.c | 80 ++++++++++++++++++++++++++--------------- fs/nfsctl.c | 21 +++-------- fs/open.c | 14 ++++++++ include/linux/fs.h | 4 +-- kernel/sysctl_binary.c | 19 +--------- 7 files changed, 77 insertions(+), 84 deletions(-) (limited to 'fs/open.c') diff --git a/arch/um/drivers/mconsole_kern.c b/arch/um/drivers/mconsole_kern.c index 975613b23dcf..c70e047eed72 100644 --- a/arch/um/drivers/mconsole_kern.c +++ b/arch/um/drivers/mconsole_kern.c @@ -124,35 +124,18 @@ void mconsole_log(struct mc_request *req) #if 0 void mconsole_proc(struct mc_request *req) { - struct nameidata nd; struct vfsmount *mnt = current->nsproxy->pid_ns->proc_mnt; struct file *file; - int n, err; + int n; char *ptr = req->request.data, *buf; mm_segment_t old_fs = get_fs(); ptr += strlen("proc"); ptr = skip_spaces(ptr); - err = vfs_path_lookup(mnt->mnt_root, mnt, ptr, LOOKUP_FOLLOW, &nd); - if (err) { - mconsole_reply(req, "Failed to look up file", 1, 0); - goto out; - } - - err = may_open(&nd.path, MAY_READ, O_RDONLY); - if (result) { - mconsole_reply(req, "Failed to open file", 1, 0); - path_put(&nd.path); - goto out; - } - - file = dentry_open(nd.path.dentry, nd.path.mnt, O_RDONLY, - current_cred()); - err = PTR_ERR(file); + file = file_open_root(mnt->mnt_root, mnt, ptr, O_RDONLY); if (IS_ERR(file)) { mconsole_reply(req, "Failed to open file", 1, 0); - path_put(&nd.path); goto out; } diff --git a/fs/internal.h b/fs/internal.h index 6fdbdf2c6047..52abc5287f50 100644 --- a/fs/internal.h +++ b/fs/internal.h @@ -114,6 +114,8 @@ struct open_flags { }; extern struct file *do_filp_open(int dfd, const char *pathname, const struct open_flags *op, int lookup_flags); +extern struct file *do_file_open_root(struct dentry *, struct vfsmount *, + const char *, const struct open_flags *, int lookup_flags); /* * inode.c diff --git a/fs/namei.c b/fs/namei.c index 8ee7785d5642..abc8d2df121c 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -1487,11 +1487,13 @@ static int path_init(int dfd, const char *name, unsigned int flags, nd->depth = 0; if (flags & LOOKUP_ROOT) { struct inode *inode = nd->root.dentry->d_inode; - if (!inode->i_op->lookup) - return -ENOTDIR; - retval = inode_permission(inode, MAY_EXEC); - if (retval) - return retval; + if (*name) { + if (!inode->i_op->lookup) + return -ENOTDIR; + retval = inode_permission(inode, MAY_EXEC); + if (retval) + return retval; + } nd->path = nd->root; nd->inode = inode; if (flags & LOOKUP_RCU) { @@ -1937,7 +1939,7 @@ int vfs_create(struct inode *dir, struct dentry *dentry, int mode, return error; } -int may_open(struct path *path, int acc_mode, int flag) +static int may_open(struct path *path, int acc_mode, int flag) { struct dentry *dentry = path->dentry; struct inode *inode = dentry->d_inode; @@ -2250,11 +2252,10 @@ exit: } static struct file *path_openat(int dfd, const char *pathname, - const struct open_flags *op, int flags) + struct nameidata *nd, const struct open_flags *op, int flags) { struct file *base = NULL; struct file *filp; - struct nameidata nd; struct path path; int count = 0; int error; @@ -2264,27 +2265,27 @@ static struct file *path_openat(int dfd, const char *pathname, return ERR_PTR(-ENFILE); filp->f_flags = op->open_flag; - nd.intent.open.file = filp; - nd.intent.open.flags = open_to_namei_flags(op->open_flag); - nd.intent.open.create_mode = op->mode; + nd->intent.open.file = filp; + nd->intent.open.flags = open_to_namei_flags(op->open_flag); + nd->intent.open.create_mode = op->mode; - error = path_init(dfd, pathname, flags | LOOKUP_PARENT, &nd, &base); + error = path_init(dfd, pathname, flags | LOOKUP_PARENT, nd, &base); if (unlikely(error)) goto out_filp; current->total_link_count = 0; - error = link_path_walk(pathname, &nd); + error = link_path_walk(pathname, nd); if (unlikely(error)) goto out_filp; - filp = do_last(&nd, &path, op, pathname); + filp = do_last(nd, &path, op, pathname); while (unlikely(!filp)) { /* trailing symlink */ struct path link = path; struct inode *linki = link.dentry->d_inode; void *cookie; - if (!(nd.flags & LOOKUP_FOLLOW) || count++ == 32) { - path_put_conditional(&path, &nd); - path_put(&nd.path); + if (!(nd->flags & LOOKUP_FOLLOW) || count++ == 32) { + path_put_conditional(&path, nd); + path_put(&nd->path); filp = ERR_PTR(-ELOOP); break; } @@ -2299,23 +2300,23 @@ static struct file *path_openat(int dfd, const char *pathname, * have to putname() it when we are done. Procfs-like symlinks * just set LAST_BIND. */ - nd.flags |= LOOKUP_PARENT; - nd.flags &= ~(LOOKUP_OPEN|LOOKUP_CREATE|LOOKUP_EXCL); - error = __do_follow_link(&link, &nd, &cookie); + nd->flags |= LOOKUP_PARENT; + nd->flags &= ~(LOOKUP_OPEN|LOOKUP_CREATE|LOOKUP_EXCL); + error = __do_follow_link(&link, nd, &cookie); if (unlikely(error)) filp = ERR_PTR(error); else - filp = do_last(&nd, &path, op, pathname); + filp = do_last(nd, &path, op, pathname); if (!IS_ERR(cookie) && linki->i_op->put_link) - linki->i_op->put_link(link.dentry, &nd, cookie); + linki->i_op->put_link(link.dentry, nd, cookie); path_put(&link); } out: - if (nd.root.mnt && !(nd.flags & LOOKUP_ROOT)) - path_put(&nd.root); + if (nd->root.mnt && !(nd->flags & LOOKUP_ROOT)) + path_put(&nd->root); if (base) fput(base); - release_open_intent(&nd); + release_open_intent(nd); return filp; out_filp: @@ -2326,16 +2327,39 @@ out_filp: struct file *do_filp_open(int dfd, const char *pathname, const struct open_flags *op, int flags) { + struct nameidata nd; struct file *filp; - filp = path_openat(dfd, pathname, op, flags | LOOKUP_RCU); + filp = path_openat(dfd, pathname, &nd, op, flags | LOOKUP_RCU); if (unlikely(filp == ERR_PTR(-ECHILD))) - filp = path_openat(dfd, pathname, op, flags); + filp = path_openat(dfd, pathname, &nd, op, flags); if (unlikely(filp == ERR_PTR(-ESTALE))) - filp = path_openat(dfd, pathname, op, flags | LOOKUP_REVAL); + filp = path_openat(dfd, pathname, &nd, op, flags | LOOKUP_REVAL); return filp; } +struct file *do_file_open_root(struct dentry *dentry, struct vfsmount *mnt, + const char *name, const struct open_flags *op, int flags) +{ + struct nameidata nd; + struct file *file; + + nd.root.mnt = mnt; + nd.root.dentry = dentry; + + flags |= LOOKUP_ROOT; + + if (dentry->d_inode->i_op->follow_link) + return ERR_PTR(-ELOOP); + + file = path_openat(-1, name, &nd, op, flags | LOOKUP_RCU); + if (unlikely(file == ERR_PTR(-ECHILD))) + file = path_openat(-1, name, &nd, op, flags); + if (unlikely(file == ERR_PTR(-ESTALE))) + file = path_openat(-1, name, &nd, op, flags | LOOKUP_REVAL); + return file; +} + /** * lookup_create - lookup a dentry, creating it if it doesn't exist * @nd: nameidata info diff --git a/fs/nfsctl.c b/fs/nfsctl.c index bf9cbd242ddd..124e8fcb0dd6 100644 --- a/fs/nfsctl.c +++ b/fs/nfsctl.c @@ -22,30 +22,17 @@ static struct file *do_open(char *name, int flags) { - struct nameidata nd; struct vfsmount *mnt; - int error; + struct file *file; mnt = do_kern_mount("nfsd", 0, "nfsd", NULL); if (IS_ERR(mnt)) return (struct file *)mnt; - error = vfs_path_lookup(mnt->mnt_root, mnt, name, 0, &nd); - mntput(mnt); /* drop do_kern_mount reference */ - if (error) - return ERR_PTR(error); - - if (flags == O_RDWR) - error = may_open(&nd.path, MAY_READ|MAY_WRITE, flags); - else - error = may_open(&nd.path, MAY_WRITE, flags); + file = file_open_root(mnt->mnt_root, mnt, name, flags); - if (!error) - return dentry_open(nd.path.dentry, nd.path.mnt, flags, - current_cred()); - - path_put(&nd.path); - return ERR_PTR(error); + mntput(mnt); /* drop do_kern_mount reference */ + return file; } static struct { diff --git a/fs/open.c b/fs/open.c index d05e18c60bae..48afc5c139d2 100644 --- a/fs/open.c +++ b/fs/open.c @@ -959,6 +959,20 @@ struct file *filp_open(const char *filename, int flags, int mode) } EXPORT_SYMBOL(filp_open); +struct file *file_open_root(struct dentry *dentry, struct vfsmount *mnt, + const char *filename, int flags) +{ + struct open_flags op; + int lookup = build_open_flags(flags, 0, &op); + if (flags & O_CREAT) + return ERR_PTR(-EINVAL); + if (!filename && (flags & O_DIRECTORY)) + if (!dentry->d_inode->i_op->lookup) + return ERR_PTR(-ENOTDIR); + return do_file_open_root(dentry, mnt, filename, &op, lookup); +} +EXPORT_SYMBOL(file_open_root); + long do_sys_open(int dfd, const char __user *filename, int flags, int mode) { struct open_flags op; diff --git a/include/linux/fs.h b/include/linux/fs.h index 9c75714f92c1..bf5c3c896072 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1990,6 +1990,8 @@ extern int do_fallocate(struct file *file, int mode, loff_t offset, extern long do_sys_open(int dfd, const char __user *filename, int flags, int mode); extern struct file *filp_open(const char *, int, int); +extern struct file *file_open_root(struct dentry *, struct vfsmount *, + const char *, int); extern struct file * dentry_open(struct dentry *, struct vfsmount *, int, const struct cred *); extern int filp_close(struct file *, fl_owner_t id); @@ -2205,8 +2207,6 @@ extern struct file *create_read_pipe(struct file *f, int flags); extern struct file *create_write_pipe(int flags); extern void free_write_pipe(struct file *); -extern int may_open(struct path *, int, int); - extern int kernel_read(struct file *, loff_t, char *, unsigned long); extern struct file * open_exec(const char *); diff --git a/kernel/sysctl_binary.c b/kernel/sysctl_binary.c index b875bedf7c9a..3b8e028b9601 100644 --- a/kernel/sysctl_binary.c +++ b/kernel/sysctl_binary.c @@ -1321,13 +1321,11 @@ static ssize_t binary_sysctl(const int *name, int nlen, void __user *oldval, size_t oldlen, void __user *newval, size_t newlen) { const struct bin_table *table = NULL; - struct nameidata nd; struct vfsmount *mnt; struct file *file; ssize_t result; char *pathname; int flags; - int acc_mode; pathname = sysctl_getname(name, nlen, &table); result = PTR_ERR(pathname); @@ -1337,28 +1335,17 @@ static ssize_t binary_sysctl(const int *name, int nlen, /* How should the sysctl be accessed? */ if (oldval && oldlen && newval && newlen) { flags = O_RDWR; - acc_mode = MAY_READ | MAY_WRITE; } else if (newval && newlen) { flags = O_WRONLY; - acc_mode = MAY_WRITE; } else if (oldval && oldlen) { flags = O_RDONLY; - acc_mode = MAY_READ; } else { result = 0; goto out_putname; } mnt = current->nsproxy->pid_ns->proc_mnt; - result = vfs_path_lookup(mnt->mnt_root, mnt, pathname, 0, &nd); - if (result) - goto out_putname; - - result = may_open(&nd.path, acc_mode, flags); - if (result) - goto out_putpath; - - file = dentry_open(nd.path.dentry, nd.path.mnt, flags, current_cred()); + file = file_open_root(mnt->mnt_root, mnt, pathname, flags); result = PTR_ERR(file); if (IS_ERR(file)) goto out_putname; @@ -1370,10 +1357,6 @@ out_putname: putname(pathname); out: return result; - -out_putpath: - path_put(&nd.path); - goto out_putname; } -- cgit v1.2.3 From 1abf0c718f15a56a0a435588d1b104c7a37dc9bd Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 13 Mar 2011 03:51:11 -0400 Subject: New kind of open files - "location only". New flag for open(2) - O_PATH. Semantics: * pathname is resolved, but the file itself is _NOT_ opened as far as filesystem is concerned. * almost all operations on the resulting descriptors shall fail with -EBADF. Exceptions are: 1) operations on descriptors themselves (i.e. close(), dup(), dup2(), dup3(), fcntl(fd, F_DUPFD), fcntl(fd, F_DUPFD_CLOEXEC, ...), fcntl(fd, F_GETFD), fcntl(fd, F_SETFD, ...)) 2) fcntl(fd, F_GETFL), for a common non-destructive way to check if descriptor is open 3) "dfd" arguments of ...at(2) syscalls, i.e. the starting points of pathname resolution * closing such descriptor does *NOT* affect dnotify or posix locks. * permissions are checked as usual along the way to file; no permission checks are applied to the file itself. Of course, giving such thing to syscall will result in permission checks (at the moment it means checking that starting point of ....at() is a directory and caller has exec permissions on it). fget() and fget_light() return NULL on such descriptors; use of fget_raw() and fget_raw_light() is needed to get them. That protects existing code from dealing with those things. There are two things still missing (they come in the next commits): one is handling of symlinks (right now we refuse to open them that way; see the next commit for semantics related to those) and another is descriptor passing via SCM_RIGHTS datagrams. Signed-off-by: Al Viro --- fs/fcntl.c | 37 ++++++++++++++++++++++++++----- fs/file_table.c | 53 ++++++++++++++++++++++++++++++++++++++++----- fs/namei.c | 2 +- fs/open.c | 35 +++++++++++++++++++++++++----- include/asm-generic/fcntl.h | 4 ++++ include/linux/file.h | 2 ++ include/linux/fs.h | 3 +++ 7 files changed, 119 insertions(+), 17 deletions(-) (limited to 'fs/open.c') diff --git a/fs/fcntl.c b/fs/fcntl.c index cb1026181bdc..6c82e5bac039 100644 --- a/fs/fcntl.c +++ b/fs/fcntl.c @@ -131,7 +131,7 @@ SYSCALL_DEFINE2(dup2, unsigned int, oldfd, unsigned int, newfd) SYSCALL_DEFINE1(dup, unsigned int, fildes) { int ret = -EBADF; - struct file *file = fget(fildes); + struct file *file = fget_raw(fildes); if (file) { ret = get_unused_fd(); @@ -426,15 +426,35 @@ static long do_fcntl(int fd, unsigned int cmd, unsigned long arg, return err; } +static int check_fcntl_cmd(unsigned cmd) +{ + switch (cmd) { + case F_DUPFD: + case F_DUPFD_CLOEXEC: + case F_GETFD: + case F_SETFD: + case F_GETFL: + return 1; + } + return 0; +} + SYSCALL_DEFINE3(fcntl, unsigned int, fd, unsigned int, cmd, unsigned long, arg) { struct file *filp; long err = -EBADF; - filp = fget(fd); + filp = fget_raw(fd); if (!filp) goto out; + if (unlikely(filp->f_mode & FMODE_PATH)) { + if (!check_fcntl_cmd(cmd)) { + fput(filp); + goto out; + } + } + err = security_file_fcntl(filp, cmd, arg); if (err) { fput(filp); @@ -456,10 +476,17 @@ SYSCALL_DEFINE3(fcntl64, unsigned int, fd, unsigned int, cmd, long err; err = -EBADF; - filp = fget(fd); + filp = fget_raw(fd); if (!filp) goto out; + if (unlikely(filp->f_mode & FMODE_PATH)) { + if (!check_fcntl_cmd(cmd)) { + fput(filp); + goto out; + } + } + err = security_file_fcntl(filp, cmd, arg); if (err) { fput(filp); @@ -808,14 +835,14 @@ static int __init fcntl_init(void) * Exceptions: O_NONBLOCK is a two bit define on parisc; O_NDELAY * is defined as O_NONBLOCK on some platforms and not on others. */ - BUILD_BUG_ON(18 - 1 /* for O_RDONLY being 0 */ != HWEIGHT32( + BUILD_BUG_ON(19 - 1 /* for O_RDONLY being 0 */ != HWEIGHT32( O_RDONLY | O_WRONLY | O_RDWR | O_CREAT | O_EXCL | O_NOCTTY | O_TRUNC | O_APPEND | /* O_NONBLOCK | */ __O_SYNC | O_DSYNC | FASYNC | O_DIRECT | O_LARGEFILE | O_DIRECTORY | O_NOFOLLOW | O_NOATIME | O_CLOEXEC | - __FMODE_EXEC + __FMODE_EXEC | O_PATH )); fasync_cache = kmem_cache_create("fasync_cache", diff --git a/fs/file_table.c b/fs/file_table.c index eb36b6b17e26..3c16e1ca163e 100644 --- a/fs/file_table.c +++ b/fs/file_table.c @@ -276,11 +276,10 @@ struct file *fget(unsigned int fd) rcu_read_lock(); file = fcheck_files(files, fd); if (file) { - if (!atomic_long_inc_not_zero(&file->f_count)) { - /* File object ref couldn't be taken */ - rcu_read_unlock(); - return NULL; - } + /* File object ref couldn't be taken */ + if (file->f_mode & FMODE_PATH || + !atomic_long_inc_not_zero(&file->f_count)) + file = NULL; } rcu_read_unlock(); @@ -289,6 +288,23 @@ struct file *fget(unsigned int fd) EXPORT_SYMBOL(fget); +struct file *fget_raw(unsigned int fd) +{ + struct file *file; + struct files_struct *files = current->files; + + rcu_read_lock(); + file = fcheck_files(files, fd); + if (file) { + /* File object ref couldn't be taken */ + if (!atomic_long_inc_not_zero(&file->f_count)) + file = NULL; + } + rcu_read_unlock(); + + return file; +} + /* * Lightweight file lookup - no refcnt increment if fd table isn't shared. * @@ -310,6 +326,33 @@ struct file *fget_light(unsigned int fd, int *fput_needed) struct file *file; struct files_struct *files = current->files; + *fput_needed = 0; + if (atomic_read(&files->count) == 1) { + file = fcheck_files(files, fd); + if (file && (file->f_mode & FMODE_PATH)) + file = NULL; + } else { + rcu_read_lock(); + file = fcheck_files(files, fd); + if (file) { + if (!(file->f_mode & FMODE_PATH) && + atomic_long_inc_not_zero(&file->f_count)) + *fput_needed = 1; + else + /* Didn't get the reference, someone's freed */ + file = NULL; + } + rcu_read_unlock(); + } + + return file; +} + +struct file *fget_raw_light(unsigned int fd, int *fput_needed) +{ + struct file *file; + struct files_struct *files = current->files; + *fput_needed = 0; if (atomic_read(&files->count) == 1) { file = fcheck_files(files, fd); diff --git a/fs/namei.c b/fs/namei.c index 33be51a2ddb7..e1d9f90d9776 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -1544,7 +1544,7 @@ static int path_init(int dfd, const char *name, unsigned int flags, } else { struct dentry *dentry; - file = fget_light(dfd, &fput_needed); + file = fget_raw_light(dfd, &fput_needed); retval = -EBADF; if (!file) goto out_fail; diff --git a/fs/open.c b/fs/open.c index 48afc5c139d2..14a51de01f54 100644 --- a/fs/open.c +++ b/fs/open.c @@ -669,11 +669,16 @@ static struct file *__dentry_open(struct dentry *dentry, struct vfsmount *mnt, int (*open)(struct inode *, struct file *), const struct cred *cred) { + static const struct file_operations empty_fops = {}; struct inode *inode; int error; f->f_mode = OPEN_FMODE(f->f_flags) | FMODE_LSEEK | FMODE_PREAD | FMODE_PWRITE; + + if (unlikely(f->f_flags & O_PATH)) + f->f_mode = FMODE_PATH; + inode = dentry->d_inode; if (f->f_mode & FMODE_WRITE) { error = __get_file_write_access(inode, mnt); @@ -687,9 +692,15 @@ static struct file *__dentry_open(struct dentry *dentry, struct vfsmount *mnt, f->f_path.dentry = dentry; f->f_path.mnt = mnt; f->f_pos = 0; - f->f_op = fops_get(inode->i_fop); file_sb_list_add(f, inode->i_sb); + if (unlikely(f->f_mode & FMODE_PATH)) { + f->f_op = &empty_fops; + return f; + } + + f->f_op = fops_get(inode->i_fop); + error = security_dentry_open(f, cred); if (error) goto cleanup_all; @@ -911,9 +922,18 @@ static inline int build_open_flags(int flags, int mode, struct open_flags *op) if (flags & __O_SYNC) flags |= O_DSYNC; - op->open_flag = flags; + /* + * If we have O_PATH in the open flag. Then we + * cannot have anything other than the below set of flags + */ + if (flags & O_PATH) { + flags &= O_DIRECTORY | O_NOFOLLOW | O_PATH; + acc_mode = 0; + } else { + acc_mode = MAY_OPEN | ACC_MODE(flags); + } - acc_mode = MAY_OPEN | ACC_MODE(flags); + op->open_flag = flags; /* O_TRUNC implies we need access checks for write permissions */ if (flags & O_TRUNC) @@ -926,7 +946,8 @@ static inline int build_open_flags(int flags, int mode, struct open_flags *op) op->acc_mode = acc_mode; - op->intent = LOOKUP_OPEN; + op->intent = flags & O_PATH ? 0 : LOOKUP_OPEN; + if (flags & O_CREAT) { op->intent |= LOOKUP_CREATE; if (flags & O_EXCL) @@ -1053,8 +1074,10 @@ int filp_close(struct file *filp, fl_owner_t id) if (filp->f_op && filp->f_op->flush) retval = filp->f_op->flush(filp, id); - dnotify_flush(filp, id); - locks_remove_posix(filp, id); + if (likely(!(filp->f_mode & FMODE_PATH))) { + dnotify_flush(filp, id); + locks_remove_posix(filp, id); + } fput(filp); return retval; } diff --git a/include/asm-generic/fcntl.h b/include/asm-generic/fcntl.h index 0fc16e3f0bfc..84793c7025e2 100644 --- a/include/asm-generic/fcntl.h +++ b/include/asm-generic/fcntl.h @@ -80,6 +80,10 @@ #define O_SYNC (__O_SYNC|O_DSYNC) #endif +#ifndef O_PATH +#define O_PATH 010000000 +#endif + #ifndef O_NDELAY #define O_NDELAY O_NONBLOCK #endif diff --git a/include/linux/file.h b/include/linux/file.h index e85baebf6279..21a79958541c 100644 --- a/include/linux/file.h +++ b/include/linux/file.h @@ -29,6 +29,8 @@ static inline void fput_light(struct file *file, int fput_needed) extern struct file *fget(unsigned int fd); extern struct file *fget_light(unsigned int fd, int *fput_needed); +extern struct file *fget_raw(unsigned int fd); +extern struct file *fget_raw_light(unsigned int fd, int *fput_needed); extern void set_close_on_exec(unsigned int fd, int flag); extern void put_filp(struct file *); extern int alloc_fd(unsigned start, unsigned flags); diff --git a/include/linux/fs.h b/include/linux/fs.h index f2143e0942c2..13df14e2c42e 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -102,6 +102,9 @@ struct inodes_stat_t { /* File is huge (eg. /dev/kmem): treat loff_t as unsigned */ #define FMODE_UNSIGNED_OFFSET ((__force fmode_t)0x2000) +/* File is opened with O_PATH; almost nothing can be done with it */ +#define FMODE_PATH ((__force fmode_t)0x4000) + /* File was opened by fanotify and shouldn't generate fanotify events */ #define FMODE_NONOTIFY ((__force fmode_t)0x1000000) -- cgit v1.2.3 From 65cfc6722361570bfe255698d9cd4dccaf47570d Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 13 Mar 2011 15:56:26 -0400 Subject: readlinkat(), fchownat() and fstatat() with empty relative pathnames For readlinkat() we simply allow empty pathname; it will fail unless we have dfd equal to O_PATH-opened symlink, so we are outside of POSIX scope here. For fchownat() and fstatat() we allow AT_EMPTY_PATH; let the caller explicitly ask for such behaviour. Signed-off-by: Al Viro --- fs/open.c | 10 ++++++---- fs/stat.c | 7 +++++-- 2 files changed, 11 insertions(+), 6 deletions(-) (limited to 'fs/open.c') diff --git a/fs/open.c b/fs/open.c index 14a51de01f54..3cac0bda46df 100644 --- a/fs/open.c +++ b/fs/open.c @@ -573,13 +573,15 @@ SYSCALL_DEFINE5(fchownat, int, dfd, const char __user *, filename, uid_t, user, { struct path path; int error = -EINVAL; - int follow; + int lookup_flags; - if ((flag & ~AT_SYMLINK_NOFOLLOW) != 0) + if ((flag & ~(AT_SYMLINK_NOFOLLOW | AT_EMPTY_PATH)) != 0) goto out; - follow = (flag & AT_SYMLINK_NOFOLLOW) ? 0 : LOOKUP_FOLLOW; - error = user_path_at(dfd, filename, follow, &path); + lookup_flags = (flag & AT_SYMLINK_NOFOLLOW) ? 0 : LOOKUP_FOLLOW; + if (flag & AT_EMPTY_PATH) + lookup_flags |= LOOKUP_EMPTY; + error = user_path_at(dfd, filename, lookup_flags, &path); if (error) goto out; error = mnt_want_write(path.mnt); diff --git a/fs/stat.c b/fs/stat.c index d5c61cf2b703..961039121cb8 100644 --- a/fs/stat.c +++ b/fs/stat.c @@ -75,13 +75,16 @@ int vfs_fstatat(int dfd, const char __user *filename, struct kstat *stat, int error = -EINVAL; int lookup_flags = 0; - if ((flag & ~(AT_SYMLINK_NOFOLLOW | AT_NO_AUTOMOUNT)) != 0) + if ((flag & ~(AT_SYMLINK_NOFOLLOW | AT_NO_AUTOMOUNT | + AT_EMPTY_PATH)) != 0) goto out; if (!(flag & AT_SYMLINK_NOFOLLOW)) lookup_flags |= LOOKUP_FOLLOW; if (flag & AT_NO_AUTOMOUNT) lookup_flags |= LOOKUP_NO_AUTOMOUNT; + if (flag & AT_EMPTY_PATH) + lookup_flags |= LOOKUP_EMPTY; error = user_path_at(dfd, filename, lookup_flags, &path); if (error) @@ -297,7 +300,7 @@ SYSCALL_DEFINE4(readlinkat, int, dfd, const char __user *, pathname, if (bufsiz <= 0) return -EINVAL; - error = user_path_at(dfd, pathname, 0, &path); + error = user_path_at(dfd, pathname, LOOKUP_EMPTY, &path); if (!error) { struct inode *inode = path.dentry->d_inode; -- cgit v1.2.3 From c212f9aaf9101a037fb7f59e75e639437e11d758 Mon Sep 17 00:00:00 2001 From: Tetsuo Handa Date: Wed, 19 Jan 2011 21:08:41 +0900 Subject: fs: Use BUG_ON(!mnt) at dentry_open(). dentry_open() requires callers to pass a valid vfsmount. Signed-off-by: Tetsuo Handa Signed-off-by: Al Viro --- fs/open.c | 13 ++----------- 1 file changed, 2 insertions(+), 11 deletions(-) (limited to 'fs/open.c') diff --git a/fs/open.c b/fs/open.c index f83ca80cc59a..b52cf013ffa1 100644 --- a/fs/open.c +++ b/fs/open.c @@ -835,17 +835,8 @@ struct file *dentry_open(struct dentry *dentry, struct vfsmount *mnt, int flags, validate_creds(cred); - /* - * We must always pass in a valid mount pointer. Historically - * callers got away with not passing it, but we must enforce this at - * the earliest possible point now to avoid strange problems deep in the - * filesystem stack. - */ - if (!mnt) { - printk(KERN_WARNING "%s called with NULL vfsmount\n", __func__); - dump_stack(); - return ERR_PTR(-EINVAL); - } + /* We must always pass in a valid mount pointer. */ + BUG_ON(!mnt); error = -ENFILE; f = get_empty_filp(); -- cgit v1.2.3