From 4b908091b7e58d09a67c1fdf9d2eedd40722c03c Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Wed, 19 Nov 2008 03:57:02 +0300 Subject: proc 1/6: implement support for automounts in task directories This is a general mechanism that is capable of removing any unused mounts on /proc in any directory. As we flush the mounts when a processes dies this mechanism is tailored for flushing mounts in the per task and per task group directories. Signed-off-by: Eric W. Biederman Signed-off-by: Alexey Dobriyan --- include/linux/proc_fs.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include') diff --git a/include/linux/proc_fs.h b/include/linux/proc_fs.h index b8bdb96eff78..768e6278cbdc 100644 --- a/include/linux/proc_fs.h +++ b/include/linux/proc_fs.h @@ -101,6 +101,7 @@ extern spinlock_t proc_subdir_lock; extern void proc_root_init(void); +void proc_shrink_automounts(void); void proc_flush_task(struct task_struct *task); struct dentry *proc_pid_lookup(struct inode *dir, struct dentry * dentry, struct nameidata *); int proc_pid_readdir(struct file * filp, void * dirent, filldir_t filldir); @@ -207,6 +208,10 @@ static inline void proc_flush_task(struct task_struct *task) { } +static inline void proc_shrink_automounts(void) +{ +} + static inline struct proc_dir_entry *create_proc_entry(const char *name, mode_t mode, struct proc_dir_entry *parent) { return NULL; } static inline struct proc_dir_entry *proc_create(const char *name, -- cgit v1.2.3 From c0f23e402155f510dd27d05b4f8d3c0a967dbf69 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Wed, 31 Dec 2008 00:04:30 +0300 Subject: proc 3/6: make /proc/net it's own filesystem Make the VFS happy with /proc/net by making it it's own filesystem avoiding issues with hard links to directories and other silliness that confuse the vfs today. We preserve backwards compatibility by automatically mounting /proc/self/net and marking it as a shrinkable mount so userspace doesn't need to care about it. Signed-off-by: Eric W. Biederman [These patch depends on 459c19f524a9d89c65717a7d061d5f11ecf6bcb8 "SELinux: correctly detect proc filesystems of the form "proc/foo"" 12204e24b1330428c3062faee10a0d80b8a5cb61 "security: pass mount flags to security_sb_kern_mount()" 74192246910ff4fb95309ba1a683215644beeb62 "SELinux: don't check permissions for kernel mounts" --adobriyan ] Signed-off-by: Alexey Dobriyan --- fs/proc/base.c | 4 +- fs/proc/internal.h | 1 - fs/proc/proc_net.c | 197 +++++++++++++++++++++++++++++++------------- include/linux/magic.h | 1 + include/net/net_namespace.h | 1 + security/selinux/hooks.c | 29 +++++-- 6 files changed, 167 insertions(+), 66 deletions(-) (limited to 'include') diff --git a/fs/proc/base.c b/fs/proc/base.c index 0c9de19a1633..bc4b44ddf688 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -126,6 +126,8 @@ struct pid_entry { NOD(NAME, (S_IFREG|(MODE)), \ NULL, &proc_single_file_operations, \ { .proc_show = show } ) +#define MNT(NAME, MODE, iops) \ + NOD(NAME, (S_IFDIR|(MODE)), &iops, NULL, {}) /* * Count the number of hardlinks for the pid_entry table, excluding the . @@ -2488,7 +2490,7 @@ static const struct pid_entry tgid_base_stuff[] = { DIR("fd", S_IRUSR|S_IXUSR, proc_fd_inode_operations, proc_fd_operations), DIR("fdinfo", S_IRUSR|S_IXUSR, proc_fdinfo_inode_operations, proc_fdinfo_operations), #ifdef CONFIG_NET - DIR("net", S_IRUGO|S_IXUGO, proc_net_inode_operations, proc_net_operations), + MNT("net", S_IRUGO|S_IXUGO, proc_net_inode_operations), #endif REG("environ", S_IRUSR, proc_environ_operations), INF("auxv", S_IRUSR, proc_pid_auxv), diff --git a/fs/proc/internal.h b/fs/proc/internal.h index 67708ddc9fb9..34492c55f63e 100644 --- a/fs/proc/internal.h +++ b/fs/proc/internal.h @@ -58,7 +58,6 @@ extern const struct file_operations proc_numa_maps_operations; extern const struct file_operations proc_smaps_operations; extern const struct file_operations proc_clear_refs_operations; extern const struct file_operations proc_pagemap_operations; -extern const struct file_operations proc_net_operations; extern const struct inode_operations proc_net_inode_operations; void free_proc_entry(struct proc_dir_entry *de); diff --git a/fs/proc/proc_net.c b/fs/proc/proc_net.c index 04d1270f1c38..1054d929b86f 100644 --- a/fs/proc/proc_net.c +++ b/fs/proc/proc_net.c @@ -20,11 +20,13 @@ #include #include #include +#include #include #include #include "internal.h" +static struct file_system_type proc_net_fs_type; static struct net *get_proc_net(const struct inode *inode) { @@ -117,63 +119,53 @@ static struct net *get_proc_task_net(struct inode *dir) return net; } -static struct dentry *proc_tgid_net_lookup(struct inode *dir, - struct dentry *dentry, struct nameidata *nd) +void *proc_net_follow_link(struct dentry *dentry, struct nameidata *nd) { - struct dentry *de; + /* Follow to a mount point of the proper network namespace. */ + struct vfsmount *mnt; struct net *net; - - de = ERR_PTR(-ENOENT); - net = get_proc_task_net(dir); - if (net != NULL) { - de = proc_lookup_de(net->proc_net, dir, dentry); - put_net(net); - } - return de; -} - -static int proc_tgid_net_getattr(struct vfsmount *mnt, struct dentry *dentry, - struct kstat *stat) -{ - struct inode *inode = dentry->d_inode; - struct net *net; - - net = get_proc_task_net(inode); - - generic_fillattr(inode, stat); - - if (net != NULL) { - stat->nlink = net->proc_net->nlink; - put_net(net); + int err = -ENOENT; + + net = get_proc_task_net(dentry->d_inode); + if (!net) + goto out_err; + + mnt = kern_mount_data(&proc_net_fs_type, net); + if (IS_ERR(mnt)) + goto out_err; + + dput(nd->path.dentry); + nd->path.dentry = dget(dentry); + + err = do_add_mount(mntget(mnt), &nd->path, MNT_SHRINKABLE, + &proc_automounts); + if (err < 0) { + mntput(mnt); + if (err == -EBUSY) + goto out_follow; + goto out_err; } - - return 0; + err = 0; + path_put(&nd->path); + nd->path.mnt = mnt; + nd->path.dentry = dget(mnt->mnt_root); + put_net(net); +out: + return ERR_PTR(err); +out_err: + path_put(&nd->path); + goto out; +out_follow: + /* We raced with ourselves so just walk the mounts */ + while (d_mountpoint(nd->path.dentry) && + follow_down(&nd->path.mnt, &nd->path.dentry)) + ; + err = 0; + goto out; } const struct inode_operations proc_net_inode_operations = { - .lookup = proc_tgid_net_lookup, - .getattr = proc_tgid_net_getattr, -}; - -static int proc_tgid_net_readdir(struct file *filp, void *dirent, - filldir_t filldir) -{ - int ret; - struct net *net; - - ret = -EINVAL; - net = get_proc_task_net(filp->f_path.dentry->d_inode); - if (net != NULL) { - ret = proc_readdir_de(net->proc_net, filp, dirent, filldir); - put_net(net); - } - return ret; -} - -const struct file_operations proc_net_operations = { - .llseek = generic_file_llseek, - .read = generic_read_dir, - .readdir = proc_tgid_net_readdir, + .follow_link = proc_net_follow_link, }; @@ -190,21 +182,94 @@ void proc_net_remove(struct net *net, const char *name) } EXPORT_SYMBOL_GPL(proc_net_remove); +static int proc_net_fill_super(struct super_block *sb) +{ + struct net *net = sb->s_fs_info; + struct proc_dir_entry *netd = net->proc_net; + struct inode *root_inode = NULL; + + sb->s_flags |= MS_NODIRATIME | MS_NOSUID | MS_NOEXEC; + sb->s_blocksize = PAGE_SIZE; + sb->s_blocksize_bits = PAGE_SHIFT; + sb->s_magic = PROC_NET_SUPER_MAGIC; + sb->s_op = &proc_sops; + sb->s_time_gran = 1; + + de_get(netd); + root_inode = proc_get_inode(sb, netd->low_ino, netd); + if (!root_inode) + goto out_no_root; + root_inode->i_uid = 0; + root_inode->i_gid = 0; + sb->s_root = d_alloc_root(root_inode); + if (!sb->s_root) + goto out_no_root; + return 0; + +out_no_root: + printk("%s: get root inode failed\n", __func__); + iput(root_inode); + de_put(netd); + return -ENOMEM; +} + +static int proc_net_test_super(struct super_block *sb, void *data) +{ + return sb->s_fs_info == data; +} + +static int proc_net_set_super(struct super_block *sb, void *data) +{ + sb->s_fs_info = data; + return set_anon_super(sb, NULL); +} + +static int proc_net_get_sb(struct file_system_type *fs_type, + int flags, const char *dev_name, void *data, struct vfsmount *mnt) +{ + struct super_block *sb; + + if (!(flags & MS_KERNMOUNT)) + data = current->nsproxy->net_ns; + + sb = sget(fs_type, proc_net_test_super, proc_net_set_super, data); + if (IS_ERR(sb)) + return PTR_ERR(sb); + + if (!sb->s_root) { + int err; + sb->s_flags = flags; + err = proc_net_fill_super(sb); + if (err) { + up_write(&sb->s_umount); + deactivate_super(sb); + return err; + } + + sb->s_flags |= MS_ACTIVE; + } + + return simple_set_mnt(mnt, sb); +} + +static struct file_system_type proc_net_fs_type = { + .name = "proc/net", + .get_sb = proc_net_get_sb, + .kill_sb = kill_litter_super, +}; + static __net_init int proc_net_ns_init(struct net *net) { struct proc_dir_entry *netd, *net_statd; + struct vfsmount *mnt; int err; err = -ENOMEM; - netd = kzalloc(sizeof(*netd), GFP_KERNEL); + netd = proc_create_root(); if (!netd) goto out; netd->data = net; - netd->nlink = 2; - netd->name = "net"; - netd->namelen = 3; - netd->parent = &proc_root; err = -EEXIST; net_statd = proc_net_mkdir(net, "stat", netd); @@ -213,8 +278,17 @@ static __net_init int proc_net_ns_init(struct net *net) net->proc_net = netd; net->proc_net_stat = net_statd; + + mnt = kern_mount_data(&proc_net_fs_type, net); + if (IS_ERR(mnt)) + goto free_stat; + + net->proc_mnt = mnt; + return 0; +free_stat: + remove_proc_entry("stat", netd); free_net: kfree(netd); out: @@ -224,7 +298,14 @@ out: static __net_exit void proc_net_ns_exit(struct net *net) { remove_proc_entry("stat", net->proc_net); - kfree(net->proc_net); + release_proc_entry(net->proc_net); + /* + * We won't be looking up this super block any more so set s_fs_info to + * NULL to ensure it doesn't conflict with network namespaces allocated + * in the future at the same address. + */ + net->proc_mnt->mnt_sb->s_fs_info = NULL; + mntput(net->proc_mnt); } static struct pernet_operations __net_initdata proc_net_ns_ops = { @@ -235,6 +316,6 @@ static struct pernet_operations __net_initdata proc_net_ns_ops = { int __init proc_net_init(void) { proc_symlink("net", NULL, "self/net"); - + register_filesystem(&proc_net_fs_type); return register_pernet_subsys(&proc_net_ns_ops); } diff --git a/include/linux/magic.h b/include/linux/magic.h index 439f6f3cb0c4..3839b32e07f6 100644 --- a/include/linux/magic.h +++ b/include/linux/magic.h @@ -31,6 +31,7 @@ #define NFS_SUPER_MAGIC 0x6969 #define OPENPROM_SUPER_MAGIC 0x9fa1 #define PROC_SUPER_MAGIC 0x9fa0 +#define PROC_NET_SUPER_MAGIC 0x706e6574 #define QNX4_SUPER_MAGIC 0x002f /* qnx4 fs detection */ #define REISERFS_SUPER_MAGIC 0x52654973 /* used by gcc */ diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h index 6fc13d905c5f..055a82cd5f8d 100644 --- a/include/net/net_namespace.h +++ b/include/net/net_namespace.h @@ -41,6 +41,7 @@ struct net { struct proc_dir_entry *proc_net; struct proc_dir_entry *proc_net_stat; + struct vfsmount *proc_mnt; #ifdef CONFIG_SYSCTL struct ctl_table_set sysctls; diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c index 00815973d412..38c88e1f66a8 100644 --- a/security/selinux/hooks.c +++ b/security/selinux/hooks.c @@ -698,7 +698,8 @@ static int selinux_set_mnt_opts(struct super_block *sb, goto out; } - if (strcmp(sb->s_type->name, "proc") == 0) + /* "proc", "proc/net" */ + if (strncmp(sb->s_type->name, "proc", 4) == 0) sbsec->proc = 1; /* Determine the labeling behavior to use for this filesystem type. */ @@ -1149,16 +1150,18 @@ static inline u16 socket_type_to_security_class(int family, int type, int protoc } #ifdef CONFIG_PROC_FS -static int selinux_proc_get_sid(struct proc_dir_entry *de, +static int selinux_proc_get_sid(struct super_block *sb, + struct proc_dir_entry *de, u16 tclass, u32 *sid) { int buflen, rc; char *buffer, *path, *end; + rc = -ENOMEM; buffer = (char *)__get_free_page(GFP_KERNEL); if (!buffer) - return -ENOMEM; + goto out; buflen = PAGE_SIZE; end = buffer+buflen; @@ -1169,19 +1172,32 @@ static int selinux_proc_get_sid(struct proc_dir_entry *de, while (de && de != de->parent) { buflen -= de->namelen + 1; if (buflen < 0) - break; + goto out_free; end -= de->namelen; memcpy(end, de->name, de->namelen); *--end = '/'; path = end; de = de->parent; } + if (strcmp(sb->s_type->name, "proc") != 0) { + const char *name = sb->s_type->name + 4; + int namelen = strlen(name); + buflen -= namelen; + if (buflen < 0) + goto out_free; + end -= namelen; + memcpy(end, name, namelen); + path = end; + } rc = security_genfs_sid("proc", path, tclass, sid); +out_free: free_page((unsigned long)buffer); +out: return rc; } #else -static int selinux_proc_get_sid(struct proc_dir_entry *de, +static int selinux_proc_get_sid(struct super_block *sb, + struct proc_dir_entry *de, u16 tclass, u32 *sid) { @@ -1330,7 +1346,8 @@ static int inode_doinit_with_dentry(struct inode *inode, struct dentry *opt_dent struct proc_inode *proci = PROC_I(inode); if (proci->pde) { isec->sclass = inode_mode_to_security_class(inode->i_mode); - rc = selinux_proc_get_sid(proci->pde, + rc = selinux_proc_get_sid(inode->i_sb, + proci->pde, isec->sclass, &sid); if (rc) -- cgit v1.2.3 From 0e6a2bfcbae4ee3cf770a6a5da203b4a336ff8ff Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Wed, 19 Nov 2008 05:10:17 +0300 Subject: proc 5/6: simplify network namespace lookup Since the network namespace is recorded in the superblock we don't need to remember it on each directory under /proc/net. Signed-off-by: Eric W. Biederman Signed-off-by: Alexey Dobriyan --- fs/proc/generic.c | 17 ----------------- fs/proc/proc_net.c | 13 ++++++++++--- include/linux/proc_fs.h | 5 ----- 3 files changed, 10 insertions(+), 25 deletions(-) (limited to 'include') diff --git a/fs/proc/generic.c b/fs/proc/generic.c index faf40a94e006..5ee636abbc2f 100644 --- a/fs/proc/generic.c +++ b/fs/proc/generic.c @@ -669,23 +669,6 @@ struct proc_dir_entry *proc_mkdir_mode(const char *name, mode_t mode, return ent; } -struct proc_dir_entry *proc_net_mkdir(struct net *net, const char *name, - struct proc_dir_entry *parent) -{ - struct proc_dir_entry *ent; - - ent = __proc_create(&parent, name, S_IFDIR | S_IRUGO | S_IXUGO, 2); - if (ent) { - ent->data = net; - if (proc_register(parent, ent) < 0) { - kfree(ent); - ent = NULL; - } - } - return ent; -} -EXPORT_SYMBOL_GPL(proc_net_mkdir); - struct proc_dir_entry *proc_mkdir(const char *name, struct proc_dir_entry *parent) { diff --git a/fs/proc/proc_net.c b/fs/proc/proc_net.c index b026c87dcb4a..baaddad8436c 100644 --- a/fs/proc/proc_net.c +++ b/fs/proc/proc_net.c @@ -30,7 +30,7 @@ static struct file_system_type proc_net_fs_type; static struct net *get_proc_net(const struct inode *inode) { - return maybe_get_net(PDE_NET(PDE(inode))); + return maybe_get_net(inode->i_sb->s_fs_info); } int seq_open_net(struct inode *ino, struct file *f, @@ -211,6 +211,15 @@ struct proc_dir_entry *proc_net_fops_create(struct net *net, } EXPORT_SYMBOL_GPL(proc_net_fops_create); +struct proc_dir_entry *proc_net_mkdir(struct net *net, const char *name, + struct proc_dir_entry *parent) +{ + if (!parent) + parent = net->proc_net; + return proc_mkdir(name, parent); +} +EXPORT_SYMBOL_GPL(proc_net_mkdir); + void proc_net_remove(struct net *net, const char *name) { remove_proc_entry(name, net->proc_net); @@ -304,8 +313,6 @@ static __net_init int proc_net_ns_init(struct net *net) if (!netd) goto out; - netd->data = net; - err = -EEXIST; net_statd = proc_net_mkdir(net, "stat", netd); if (!net_statd) diff --git a/include/linux/proc_fs.h b/include/linux/proc_fs.h index 768e6278cbdc..4f13811bdcbd 100644 --- a/include/linux/proc_fs.h +++ b/include/linux/proc_fs.h @@ -304,11 +304,6 @@ static inline struct proc_dir_entry *PDE(const struct inode *inode) return PROC_I(inode)->pde; } -static inline struct net *PDE_NET(struct proc_dir_entry *pde) -{ - return pde->parent->data; -} - struct proc_maps_private { struct pid *pid; struct task_struct *task; -- cgit v1.2.3