diff options
Diffstat (limited to 'fs/namespace.c')
-rw-r--r-- | fs/namespace.c | 211 |
1 files changed, 106 insertions, 105 deletions
diff --git a/fs/namespace.c b/fs/namespace.c index 5a4438445bf7..d18deb4c410b 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -236,6 +236,7 @@ static struct mount *alloc_vfsmnt(const char *name) INIT_LIST_HEAD(&mnt->mnt_slave_list); INIT_LIST_HEAD(&mnt->mnt_slave); INIT_HLIST_NODE(&mnt->mnt_mp_list); + INIT_LIST_HEAD(&mnt->mnt_umounting); init_fs_pin(&mnt->mnt_umount, drop_mountpoint); } return mnt; @@ -274,7 +275,7 @@ int __mnt_is_readonly(struct vfsmount *mnt) { if (mnt->mnt_flags & MNT_READONLY) return 1; - if (mnt->mnt_sb->s_flags & MS_RDONLY) + if (sb_rdonly(mnt->mnt_sb)) return 1; return 0; } @@ -430,13 +431,18 @@ int __mnt_want_write_file(struct file *file) } /** - * mnt_want_write_file - get write access to a file's mount + * mnt_want_write_file_path - get write access to a file's mount * @file: the file who's mount on which to take a write * * This is like mnt_want_write, but it takes a file and can * do some optimisations if the file is open for write already + * + * Called by the vfs for cases when we have an open file at hand, but will do an + * inode operation on it (important distinction for files opened on overlayfs, + * since the file operations will come from the real underlying file, while + * inode operations come from the overlay). */ -int mnt_want_write_file(struct file *file) +int mnt_want_write_file_path(struct file *file) { int ret; @@ -446,6 +452,55 @@ int mnt_want_write_file(struct file *file) sb_end_write(file->f_path.mnt->mnt_sb); return ret; } + +static inline int may_write_real(struct file *file) +{ + struct dentry *dentry = file->f_path.dentry; + struct dentry *upperdentry; + + /* Writable file? */ + if (file->f_mode & FMODE_WRITER) + return 0; + + /* Not overlayfs? */ + if (likely(!(dentry->d_flags & DCACHE_OP_REAL))) + return 0; + + /* File refers to upper, writable layer? */ + upperdentry = d_real(dentry, NULL, 0, D_REAL_UPPER); + if (upperdentry && + (file_inode(file) == d_inode(upperdentry) || + file_inode(file) == d_inode(dentry))) + return 0; + + /* Lower layer: can't write to real file, sorry... */ + return -EPERM; +} + +/** + * mnt_want_write_file - get write access to a file's mount + * @file: the file who's mount on which to take a write + * + * This is like mnt_want_write, but it takes a file and can + * do some optimisations if the file is open for write already + * + * Mostly called by filesystems from their ioctl operation before performing + * modification. On overlayfs this needs to check if the file is on a read-only + * lower layer and deny access in that case. + */ +int mnt_want_write_file(struct file *file) +{ + int ret; + + ret = may_write_real(file); + if (!ret) { + sb_start_write(file_inode(file)->i_sb); + ret = __mnt_want_write_file(file); + if (ret) + sb_end_write(file_inode(file)->i_sb); + } + return ret; +} EXPORT_SYMBOL_GPL(mnt_want_write_file); /** @@ -483,10 +538,16 @@ void __mnt_drop_write_file(struct file *file) __mnt_drop_write(file->f_path.mnt); } -void mnt_drop_write_file(struct file *file) +void mnt_drop_write_file_path(struct file *file) { mnt_drop_write(file->f_path.mnt); } + +void mnt_drop_write_file(struct file *file) +{ + __mnt_drop_write(file->f_path.mnt); + sb_end_write(file_inode(file)->i_sb); +} EXPORT_SYMBOL(mnt_drop_write_file); static int mnt_make_readonly(struct mount *mnt) @@ -970,7 +1031,7 @@ vfs_kern_mount(struct file_system_type *type, int flags, const char *name, void if (!mnt) return ERR_PTR(-ENOMEM); - if (flags & MS_KERNMOUNT) + if (flags & SB_KERNMOUNT) mnt->mnt.mnt_flags = MNT_INTERNAL; root = mount_fs(type, flags, name, data); @@ -1002,7 +1063,7 @@ vfs_submount(const struct dentry *mountpoint, struct file_system_type *type, if (mountpoint->d_sb->s_user_ns != &init_user_ns) return ERR_PTR(-EPERM); - return vfs_kern_mount(type, MS_SUBMOUNT, name, data); + return vfs_kern_mount(type, SB_SUBMOUNT, name, data); } EXPORT_SYMBOL_GPL(vfs_submount); @@ -1123,12 +1184,10 @@ static LLIST_HEAD(delayed_mntput_list); static void delayed_mntput(struct work_struct *unused) { struct llist_node *node = llist_del_all(&delayed_mntput_list); - struct llist_node *next; + struct mount *m, *t; - for (; node; node = next) { - next = llist_next(node); - cleanup_mnt(llist_entry(node, struct mount, mnt_llist)); - } + llist_for_each_entry_safe(m, t, node, mnt_llist) + cleanup_mnt(m); } static DECLARE_DELAYED_WORK(delayed_mntput_work, delayed_mntput); @@ -1237,65 +1296,6 @@ struct vfsmount *mnt_clone_internal(const struct path *path) return &p->mnt; } -static inline void mangle(struct seq_file *m, const char *s) -{ - seq_escape(m, s, " \t\n\\"); -} - -/* - * Simple .show_options callback for filesystems which don't want to - * implement more complex mount option showing. - * - * See also save_mount_options(). - */ -int generic_show_options(struct seq_file *m, struct dentry *root) -{ - const char *options; - - rcu_read_lock(); - options = rcu_dereference(root->d_sb->s_options); - - if (options != NULL && options[0]) { - seq_putc(m, ','); - mangle(m, options); - } - rcu_read_unlock(); - - return 0; -} -EXPORT_SYMBOL(generic_show_options); - -/* - * If filesystem uses generic_show_options(), this function should be - * called from the fill_super() callback. - * - * The .remount_fs callback usually needs to be handled in a special - * way, to make sure, that previous options are not overwritten if the - * remount fails. - * - * Also note, that if the filesystem's .remount_fs function doesn't - * reset all options to their default value, but changes only newly - * given options, then the displayed options will not reflect reality - * any more. - */ -void save_mount_options(struct super_block *sb, char *options) -{ - BUG_ON(sb->s_options); - rcu_assign_pointer(sb->s_options, kstrdup(options, GFP_KERNEL)); -} -EXPORT_SYMBOL(save_mount_options); - -void replace_mount_options(struct super_block *sb, char *options) -{ - char *old = sb->s_options; - rcu_assign_pointer(sb->s_options, options); - if (old) { - synchronize_rcu(); - kfree(old); - } -} -EXPORT_SYMBOL(replace_mount_options); - #ifdef CONFIG_PROC_FS /* iterator; we want it to have access to namespace_sem, thus here... */ static void *m_start(struct seq_file *m, loff_t *pos) @@ -1592,8 +1592,8 @@ static int do_umount(struct mount *mnt, int flags) if (!capable(CAP_SYS_ADMIN)) return -EPERM; down_write(&sb->s_umount); - if (!(sb->s_flags & MS_RDONLY)) - retval = do_remount_sb(sb, MS_RDONLY, NULL, 0); + if (!sb_rdonly(sb)) + retval = do_remount_sb(sb, SB_RDONLY, NULL, 0); up_write(&sb->s_umount); return retval; } @@ -1656,7 +1656,7 @@ out_unlock: namespace_unlock(); } -/* +/* * Is the caller allowed to modify his namespace? */ static inline bool may_mount(void) @@ -2117,7 +2117,7 @@ static void unlock_mount(struct mountpoint *where) static int graft_tree(struct mount *mnt, struct mount *p, struct mountpoint *mp) { - if (mnt->mnt.mnt_sb->s_flags & MS_NOUSER) + if (mnt->mnt.mnt_sb->s_flags & SB_NOUSER) return -EINVAL; if (d_is_dir(mp->m_dentry) != @@ -2131,9 +2131,9 @@ static int graft_tree(struct mount *mnt, struct mount *p, struct mountpoint *mp) * Sanity check the flags to change_mnt_propagation. */ -static int flags_to_propagation_type(int flags) +static int flags_to_propagation_type(int ms_flags) { - int type = flags & ~(MS_REC | MS_SILENT); + int type = ms_flags & ~(MS_REC | MS_SILENT); /* Fail if any non-propagation flags are set */ if (type & ~(MS_SHARED | MS_PRIVATE | MS_SLAVE | MS_UNBINDABLE)) @@ -2147,18 +2147,18 @@ static int flags_to_propagation_type(int flags) /* * recursively change the type of the mountpoint. */ -static int do_change_type(struct path *path, int flag) +static int do_change_type(struct path *path, int ms_flags) { struct mount *m; struct mount *mnt = real_mount(path->mnt); - int recurse = flag & MS_REC; + int recurse = ms_flags & MS_REC; int type; int err = 0; if (path->dentry != path->mnt->mnt_root) return -EINVAL; - type = flags_to_propagation_type(flag); + type = flags_to_propagation_type(ms_flags); if (!type) return -EINVAL; @@ -2210,7 +2210,7 @@ static int do_loopback(struct path *path, const char *old_name, err = -EINVAL; if (mnt_ns_loop(old_path.dentry)) - goto out; + goto out; mp = lock_mount(path); err = PTR_ERR(mp); @@ -2280,8 +2280,8 @@ static int change_mount_flags(struct vfsmount *mnt, int ms_flags) * If you've mounted a non-root directory somewhere and want to do remount * on it - tough luck. */ -static int do_remount(struct path *path, int flags, int mnt_flags, - void *data) +static int do_remount(struct path *path, int ms_flags, int sb_flags, + int mnt_flags, void *data) { int err; struct super_block *sb = path->mnt->mnt_sb; @@ -2325,12 +2325,12 @@ static int do_remount(struct path *path, int flags, int mnt_flags, return err; down_write(&sb->s_umount); - if (flags & MS_BIND) - err = change_mount_flags(path->mnt, flags); + if (ms_flags & MS_BIND) + err = change_mount_flags(path->mnt, ms_flags); else if (!capable(CAP_SYS_ADMIN)) err = -EPERM; else - err = do_remount_sb(sb, flags, data, 0); + err = do_remount_sb(sb, sb_flags, data, 0); if (!err) { lock_mount_hash(); mnt_flags |= mnt->mnt.mnt_flags & ~MNT_USER_SETTABLE_MASK; @@ -2495,7 +2495,7 @@ static bool mount_too_revealing(struct vfsmount *mnt, int *new_mnt_flags); * create a new mount for userspace and request it to be added into the * namespace's tree */ -static int do_new_mount(struct path *path, const char *fstype, int flags, +static int do_new_mount(struct path *path, const char *fstype, int sb_flags, int mnt_flags, const char *name, void *data) { struct file_system_type *type; @@ -2509,7 +2509,7 @@ static int do_new_mount(struct path *path, const char *fstype, int flags, if (!type) return -ENODEV; - mnt = vfs_kern_mount(type, flags, name, data); + mnt = vfs_kern_mount(type, sb_flags, name, data); if (!IS_ERR(mnt) && (type->fs_flags & FS_HAS_SUBTYPE) && !mnt->mnt_sb->s_subtype) mnt = fs_set_subtype(mnt, fstype); @@ -2764,8 +2764,8 @@ long do_mount(const char *dev_name, const char __user *dir_name, const char *type_page, unsigned long flags, void *data_page) { struct path path; + unsigned int mnt_flags = 0, sb_flags; int retval = 0; - int mnt_flags = 0; /* Discard magic */ if ((flags & MS_MGC_MSK) == MS_MGC_VAL) @@ -2775,6 +2775,9 @@ long do_mount(const char *dev_name, const char __user *dir_name, if (data_page) ((char *)data_page)[PAGE_SIZE - 1] = 0; + if (flags & MS_NOUSER) + return -EINVAL; + /* ... and get the mountpoint */ retval = user_path(dir_name, &path); if (retval) @@ -2784,7 +2787,7 @@ long do_mount(const char *dev_name, const char __user *dir_name, type_page, flags, data_page); if (!retval && !may_mount()) retval = -EPERM; - if (!retval && (flags & MS_MANDLOCK) && !may_mandlock()) + if (!retval && (flags & SB_MANDLOCK) && !may_mandlock()) retval = -EPERM; if (retval) goto dput_out; @@ -2806,7 +2809,7 @@ long do_mount(const char *dev_name, const char __user *dir_name, mnt_flags |= MNT_NODIRATIME; if (flags & MS_STRICTATIME) mnt_flags &= ~(MNT_RELATIME | MNT_NOATIME); - if (flags & MS_RDONLY) + if (flags & SB_RDONLY) mnt_flags |= MNT_READONLY; /* The default atime for remount is preservation */ @@ -2817,12 +2820,16 @@ long do_mount(const char *dev_name, const char __user *dir_name, mnt_flags |= path.mnt->mnt_flags & MNT_ATIME_MASK; } - flags &= ~(MS_NOSUID | MS_NOEXEC | MS_NODEV | MS_ACTIVE | MS_BORN | - MS_NOATIME | MS_NODIRATIME | MS_RELATIME| MS_KERNMOUNT | - MS_STRICTATIME | MS_NOREMOTELOCK | MS_SUBMOUNT); + sb_flags = flags & (SB_RDONLY | + SB_SYNCHRONOUS | + SB_MANDLOCK | + SB_DIRSYNC | + SB_SILENT | + SB_POSIXACL | + SB_I_VERSION); if (flags & MS_REMOUNT) - retval = do_remount(&path, flags & ~MS_REMOUNT, mnt_flags, + retval = do_remount(&path, flags, sb_flags, mnt_flags, data_page); else if (flags & MS_BIND) retval = do_loopback(&path, dev_name, flags & MS_REC); @@ -2831,7 +2838,7 @@ long do_mount(const char *dev_name, const char __user *dir_name, else if (flags & MS_MOVE) retval = do_move_mount(&path, dev_name); else - retval = do_new_mount(&path, type_page, flags, mnt_flags, + retval = do_new_mount(&path, type_page, sb_flags, mnt_flags, dev_name, data_page); dput_out: path_put(&path); @@ -3238,7 +3245,6 @@ static void __init init_mount_tree(void) void __init mnt_init(void) { - unsigned u; int err; mnt_cache = kmem_cache_create("mnt_cache", sizeof(struct mount), @@ -3247,22 +3253,17 @@ void __init mnt_init(void) mount_hashtable = alloc_large_system_hash("Mount-cache", sizeof(struct hlist_head), mhash_entries, 19, - 0, + HASH_ZERO, &m_hash_shift, &m_hash_mask, 0, 0); mountpoint_hashtable = alloc_large_system_hash("Mountpoint-cache", sizeof(struct hlist_head), mphash_entries, 19, - 0, + HASH_ZERO, &mp_hash_shift, &mp_hash_mask, 0, 0); if (!mount_hashtable || !mountpoint_hashtable) panic("Failed to allocate mount hash table\n"); - for (u = 0; u <= m_hash_mask; u++) - INIT_HLIST_HEAD(&mount_hashtable[u]); - for (u = 0; u <= mp_hash_mask; u++) - INIT_HLIST_HEAD(&mountpoint_hashtable[u]); - kernfs_init(); err = sysfs_init(); @@ -3287,7 +3288,7 @@ void put_mnt_ns(struct mnt_namespace *ns) struct vfsmount *kern_mount_data(struct file_system_type *type, void *data) { struct vfsmount *mnt; - mnt = vfs_kern_mount(type, MS_KERNMOUNT, type->name, data); + mnt = vfs_kern_mount(type, SB_KERNMOUNT, type->name, data); if (!IS_ERR(mnt)) { /* * it is a longterm mount, don't release mnt until @@ -3364,7 +3365,7 @@ static bool mnt_already_visible(struct mnt_namespace *ns, struct vfsmount *new, mnt_flags = mnt->mnt.mnt_flags; /* Don't miss readonly hidden in the superblock flags */ - if (mnt->mnt.mnt_sb->s_flags & MS_RDONLY) + if (sb_rdonly(mnt->mnt.mnt_sb)) mnt_flags |= MNT_LOCK_READONLY; /* Verify the mount flags are equal to or more permissive |