diff options
author | Stephen Rothwell <sfr@canb.auug.org.au> | 2009-01-23 16:41:28 +1100 |
---|---|---|
committer | Stephen Rothwell <sfr@canb.auug.org.au> | 2009-01-23 17:08:39 +1100 |
commit | 87be218c9c601605d52b70de667205f30c6749d4 (patch) | |
tree | 44a3cd88183d5dbea2d1f481a5563aa11ac1bf42 /fs | |
parent | dc92d17bc5eb33f91b4b16d2c896f1960cca2fbd (diff) | |
parent | 7f97784510006c41f045d97f1c48f7c639715873 (diff) |
Merge commit 'proc/proc'
Conflicts:
security/selinux/hooks.c
Diffstat (limited to 'fs')
-rw-r--r-- | fs/proc/Makefile | 1 | ||||
-rw-r--r-- | fs/proc/automount.c | 28 | ||||
-rw-r--r-- | fs/proc/base.c | 26 | ||||
-rw-r--r-- | fs/proc/generic.c | 62 | ||||
-rw-r--r-- | fs/proc/inode-alloc.txt | 14 | ||||
-rw-r--r-- | fs/proc/inode.c | 2 | ||||
-rw-r--r-- | fs/proc/internal.h | 15 | ||||
-rw-r--r-- | fs/proc/proc_net.c | 235 |
8 files changed, 278 insertions, 105 deletions
diff --git a/fs/proc/Makefile b/fs/proc/Makefile index 63d965193b22..757f7c11461c 100644 --- a/fs/proc/Makefile +++ b/fs/proc/Makefile @@ -9,6 +9,7 @@ proc-$(CONFIG_MMU) := mmu.o task_mmu.o proc-y += inode.o root.o base.o generic.o array.o \ proc_tty.o +proc-y += automount.o proc-y += cmdline.o proc-y += cpuinfo.o proc-y += devices.o diff --git a/fs/proc/automount.c b/fs/proc/automount.c new file mode 100644 index 000000000000..5d22b5aa442c --- /dev/null +++ b/fs/proc/automount.c @@ -0,0 +1,28 @@ +#include <linux/list.h> +#include <linux/mount.h> +#include <linux/workqueue.h> +#include "internal.h" + +LIST_HEAD(proc_automounts); + +static void proc_expire_automounts(struct work_struct *work); + +static DECLARE_DELAYED_WORK(proc_automount_task, proc_expire_automounts); +static int proc_automount_timeout = 500 * HZ; + +void proc_shrink_automounts(void) +{ + struct list_head *list = &proc_automounts; + + mark_mounts_for_expiry(list); + mark_mounts_for_expiry(list); + if (list_empty(list)) + return; + + schedule_delayed_work(&proc_automount_task, proc_automount_timeout); +} + +static void proc_expire_automounts(struct work_struct *work) +{ + proc_shrink_automounts(); +} diff --git a/fs/proc/base.c b/fs/proc/base.c index 0c9de19a1633..baf850c09609 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -126,6 +126,8 @@ struct pid_entry { NOD(NAME, (S_IFREG|(MODE)), \ NULL, &proc_single_file_operations, \ { .proc_show = show } ) +#define MNT(NAME, MODE, iops) \ + NOD(NAME, (S_IFDIR|(MODE)), &iops, NULL, {}) /* * Count the number of hardlinks for the pid_entry table, excluding the . @@ -1514,6 +1516,7 @@ static int pid_revalidate(struct dentry *dentry, struct nameidata *nd) struct inode *inode = dentry->d_inode; struct task_struct *task = get_proc_task(inode); const struct cred *cred; + int ret = 0; if (task) { if ((inode->i_mode == (S_IFDIR|S_IRUGO|S_IXUGO)) || @@ -1528,12 +1531,14 @@ static int pid_revalidate(struct dentry *dentry, struct nameidata *nd) inode->i_gid = 0; } inode->i_mode &= ~(S_ISUID | S_ISGID); - security_task_to_inode(task, inode); + ret = proc_net_revalidate(task, dentry, nd); + if (ret == 1) + security_task_to_inode(task, inode); put_task_struct(task); - return 1; } - d_drop(dentry); - return 0; + if (ret == 0) + d_drop(dentry); + return ret; } static int pid_delete_dentry(struct dentry * dentry) @@ -2488,7 +2493,7 @@ static const struct pid_entry tgid_base_stuff[] = { DIR("fd", S_IRUSR|S_IXUSR, proc_fd_inode_operations, proc_fd_operations), DIR("fdinfo", S_IRUSR|S_IXUSR, proc_fdinfo_inode_operations, proc_fdinfo_operations), #ifdef CONFIG_NET - DIR("net", S_IRUGO|S_IXUGO, proc_net_inode_operations, proc_net_operations), + MNT("net", S_IRUGO|S_IXUGO, proc_net_inode_operations), #endif REG("environ", S_IRUSR, proc_environ_operations), INF("auxv", S_IRUSR, proc_pid_auxv), @@ -2591,15 +2596,11 @@ static void proc_flush_task_mnt(struct vfsmount *mnt, pid_t pid, pid_t tgid) name.len = snprintf(buf, sizeof(buf), "%d", pid); dentry = d_hash_and_lookup(mnt->mnt_root, &name); if (dentry) { - if (!(current->flags & PF_EXITING)) - shrink_dcache_parent(dentry); + shrink_dcache_parent(dentry); d_drop(dentry); dput(dentry); } - if (tgid == 0) - goto out; - name.name = buf; name.len = snprintf(buf, sizeof(buf), "%d", tgid); leader = d_hash_and_lookup(mnt->mnt_root, &name); @@ -2660,13 +2661,12 @@ void proc_flush_task(struct task_struct *task) struct upid *upid; pid = task_pid(task); - if (thread_group_leader(task)) - tgid = task_tgid(task); + tgid = task_tgid(task); for (i = 0; i <= pid->level; i++) { upid = &pid->numbers[i]; proc_flush_task_mnt(upid->ns->proc_mnt, upid->nr, - tgid ? tgid->numbers[i].nr : 0); + tgid->numbers[i].nr); } upid = &pid->numbers[pid->level]; diff --git a/fs/proc/generic.c b/fs/proc/generic.c index db7fa5cab988..5ee636abbc2f 100644 --- a/fs/proc/generic.c +++ b/fs/proc/generic.c @@ -307,6 +307,21 @@ static DEFINE_SPINLOCK(proc_inum_lock); /* protects the above */ /* * Return an inode number between PROC_DYNAMIC_FIRST and * 0xffffffff, or zero on failure. + * + * Current inode allocations in the proc-fs (hex-numbers): + * + * 00000000 reserved + * 00000001-00000fff static entries (goners) + * 001 root-ino + * + * 00001000-00001fff unused + * 0001xxxx-7fffxxxx pid-dir entries for pid 1-7fff + * 80000000-efffffff unused + * f0000000-ffffffff dynamic entries + * + * Goal: + * Once we split the thing into several virtual filesystems, + * we will get rid of magical ranges (and this comment, BTW). */ static unsigned int get_inode_number(void) { @@ -528,7 +543,6 @@ static int proc_register(struct proc_dir_entry * dir, struct proc_dir_entry * dp dp->proc_fops = &proc_dir_operations; dp->proc_iops = &proc_dir_inode_operations; } - dir->nlink++; } else if (S_ISLNK(dp->mode)) { if (dp->proc_iops == NULL) dp->proc_iops = &proc_link_inode_operations; @@ -551,6 +565,8 @@ static int proc_register(struct proc_dir_entry * dir, struct proc_dir_entry * dp dp->next = dir->subdir; dp->parent = dir; dir->subdir = dp; + if (S_ISDIR(dp->mode)) + dir->nlink++; spin_unlock(&proc_subdir_lock); return 0; @@ -595,6 +611,24 @@ static struct proc_dir_entry *__proc_create(struct proc_dir_entry **parent, return ent; } +struct proc_dir_entry *proc_create_root(void) +{ + struct proc_dir_entry *ent, *parent = NULL; + + ent = __proc_create(&parent, "..", S_IFDIR | S_IRUGO | S_IXUGO, 2); + if (ent) { + ent->proc_fops = &proc_dir_operations; + ent->proc_iops = &proc_dir_inode_operations; + ent->low_ino = get_inode_number(); + ent->parent = ent; + if (!ent->low_ino) { + kfree(ent); + ent = NULL; + } + } + return ent; +} + struct proc_dir_entry *proc_symlink(const char *name, struct proc_dir_entry *parent, const char *dest) { @@ -635,23 +669,6 @@ struct proc_dir_entry *proc_mkdir_mode(const char *name, mode_t mode, return ent; } -struct proc_dir_entry *proc_net_mkdir(struct net *net, const char *name, - struct proc_dir_entry *parent) -{ - struct proc_dir_entry *ent; - - ent = __proc_create(&parent, name, S_IFDIR | S_IRUGO | S_IXUGO, 2); - if (ent) { - ent->data = net; - if (proc_register(parent, ent) < 0) { - kfree(ent); - ent = NULL; - } - } - return ent; -} -EXPORT_SYMBOL_GPL(proc_net_mkdir); - struct proc_dir_entry *proc_mkdir(const char *name, struct proc_dir_entry *parent) { @@ -754,6 +771,8 @@ void remove_proc_entry(const char *name, struct proc_dir_entry *parent) de = *p; *p = de->next; de->next = NULL; + if (S_ISDIR(de->mode)) + parent->nlink--; break; } } @@ -761,6 +780,11 @@ void remove_proc_entry(const char *name, struct proc_dir_entry *parent) if (!de) return; + release_proc_entry(de); +} + +void release_proc_entry(struct proc_dir_entry *de) +{ spin_lock(&de->pde_unload_lock); /* * Stop accepting new callers into module. If you're @@ -796,8 +820,6 @@ continue_removing: } spin_unlock(&de->pde_unload_lock); - if (S_ISDIR(de->mode)) - parent->nlink--; de->nlink = 0; WARN(de->subdir, KERN_WARNING "%s: removing non-empty directory " "'%s/%s', leaking at least '%s'\n", __func__, diff --git a/fs/proc/inode-alloc.txt b/fs/proc/inode-alloc.txt deleted file mode 100644 index 77212f938c2c..000000000000 --- a/fs/proc/inode-alloc.txt +++ /dev/null @@ -1,14 +0,0 @@ -Current inode allocations in the proc-fs (hex-numbers): - - 00000000 reserved - 00000001-00000fff static entries (goners) - 001 root-ino - - 00001000-00001fff unused - 0001xxxx-7fffxxxx pid-dir entries for pid 1-7fff - 80000000-efffffff unused - f0000000-ffffffff dynamic entries - -Goal: - a) once we'll split the thing into several virtual filesystems we - will get rid of magical ranges (and this file, BTW). diff --git a/fs/proc/inode.c b/fs/proc/inode.c index 3e76bb9b3ad6..0bbb937c5f0d 100644 --- a/fs/proc/inode.c +++ b/fs/proc/inode.c @@ -112,7 +112,7 @@ void __init proc_init_inodecache(void) init_once); } -static const struct super_operations proc_sops = { +const struct super_operations proc_sops = { .alloc_inode = proc_alloc_inode, .destroy_inode = proc_destroy_inode, .drop_inode = generic_delete_inode, diff --git a/fs/proc/internal.h b/fs/proc/internal.h index cd53ff838498..ab62ede71531 100644 --- a/fs/proc/internal.h +++ b/fs/proc/internal.h @@ -58,9 +58,17 @@ extern const struct file_operations proc_numa_maps_operations; extern const struct file_operations proc_smaps_operations; extern const struct file_operations proc_clear_refs_operations; extern const struct file_operations proc_pagemap_operations; -extern const struct file_operations proc_net_operations; extern const struct inode_operations proc_net_inode_operations; +#ifdef CONFIG_NET +int proc_net_revalidate(struct task_struct *tsk, struct dentry *dentry, struct nameidata *nd); +#else +static inline int proc_net_revalidate(struct task_struct *tsk, struct dentry *dentry, struct nameidata *nd) +{ + return 1; +} +#endif + void free_proc_entry(struct proc_dir_entry *de); void proc_init_inodecache(void); @@ -84,6 +92,9 @@ struct dentry *proc_lookup_de(struct proc_dir_entry *de, struct inode *ino, struct dentry *dentry); int proc_readdir_de(struct proc_dir_entry *de, struct file *filp, void *dirent, filldir_t filldir); +struct proc_dir_entry *proc_create_root(void); +void release_proc_entry(struct proc_dir_entry *de); +extern const struct super_operations proc_sops; struct pde_opener { struct inode *inode; @@ -91,3 +102,5 @@ struct pde_opener { int (*release)(struct inode *, struct file *); struct list_head lh; }; + +extern struct list_head proc_automounts; diff --git a/fs/proc/proc_net.c b/fs/proc/proc_net.c index 04d1270f1c38..baaddad8436c 100644 --- a/fs/proc/proc_net.c +++ b/fs/proc/proc_net.c @@ -20,15 +20,17 @@ #include <linux/bitops.h> #include <linux/mount.h> #include <linux/nsproxy.h> +#include <linux/namei.h> #include <net/net_namespace.h> #include <linux/seq_file.h> #include "internal.h" +static struct file_system_type proc_net_fs_type; static struct net *get_proc_net(const struct inode *inode) { - return maybe_get_net(PDE_NET(PDE(inode))); + return maybe_get_net(inode->i_sb->s_fs_info); } int seq_open_net(struct inode *ino, struct file *f, @@ -117,66 +119,91 @@ static struct net *get_proc_task_net(struct inode *dir) return net; } -static struct dentry *proc_tgid_net_lookup(struct inode *dir, - struct dentry *dentry, struct nameidata *nd) +void *proc_net_follow_link(struct dentry *dentry, struct nameidata *nd) { - struct dentry *de; + /* Follow to a mount point of the proper network namespace. */ + struct vfsmount *mnt; struct net *net; - - de = ERR_PTR(-ENOENT); - net = get_proc_task_net(dir); - if (net != NULL) { - de = proc_lookup_de(net->proc_net, dir, dentry); - put_net(net); - } - return de; -} - -static int proc_tgid_net_getattr(struct vfsmount *mnt, struct dentry *dentry, - struct kstat *stat) -{ - struct inode *inode = dentry->d_inode; - struct net *net; - - net = get_proc_task_net(inode); - - generic_fillattr(inode, stat); - - if (net != NULL) { - stat->nlink = net->proc_net->nlink; - put_net(net); + int err = -ENOENT; + + net = get_proc_task_net(dentry->d_inode); + if (!net) + goto out_err; + + mnt = kern_mount_data(&proc_net_fs_type, net); + if (IS_ERR(mnt)) + goto out_err; + + dput(nd->path.dentry); + nd->path.dentry = dget(dentry); + + err = do_add_mount(mntget(mnt), &nd->path, MNT_SHRINKABLE, + &proc_automounts); + if (err < 0) { + mntput(mnt); + if (err == -EBUSY) + goto out_follow; + goto out_err; } - - return 0; + err = 0; + path_put(&nd->path); + nd->path.mnt = mnt; + nd->path.dentry = dget(mnt->mnt_root); + put_net(net); +out: + return ERR_PTR(err); +out_err: + path_put(&nd->path); + goto out; +out_follow: + /* We raced with ourselves so just walk the mounts */ + while (d_mountpoint(nd->path.dentry) && + follow_down(&nd->path.mnt, &nd->path.dentry)) + ; + err = 0; + goto out; } const struct inode_operations proc_net_inode_operations = { - .lookup = proc_tgid_net_lookup, - .getattr = proc_tgid_net_getattr, + .follow_link = proc_net_follow_link, }; -static int proc_tgid_net_readdir(struct file *filp, void *dirent, - filldir_t filldir) + +int proc_net_revalidate(struct task_struct *task, struct dentry *dentry, + struct nameidata *nd) { - int ret; - struct net *net; + struct inode *inode = dentry->d_inode; + struct dentry *tdentry; + struct vfsmount *tmnt; + int ret = 1; - ret = -EINVAL; - net = get_proc_task_net(filp->f_path.dentry->d_inode); - if (net != NULL) { - ret = proc_readdir_de(net->proc_net, filp, dirent, filldir); - put_net(net); + /* Are we talking about a proc/net mount point? */ + if (!nd || inode->i_op != &proc_net_inode_operations) + goto out; + + /* + * If the wrong filesystem is mounted on /proc/<pid>/net report the + * dentry is invalid. + */ + tmnt = mntget(nd->path.mnt); + tdentry = dget(dentry); + if (follow_down(&tmnt, &tdentry)) { + struct nsproxy *ns; + + rcu_read_lock(); + ns = task_nsproxy(task); + if ((ns == NULL) || + (tmnt->mnt_sb->s_magic != PROC_NET_SUPER_MAGIC) || + (tmnt->mnt_sb->s_fs_info != ns->net_ns)) + ret = 0; + rcu_read_unlock(); } + dput(tdentry); + mntput(tmnt); +out: return ret; } -const struct file_operations proc_net_operations = { - .llseek = generic_file_llseek, - .read = generic_read_dir, - .readdir = proc_tgid_net_readdir, -}; - - struct proc_dir_entry *proc_net_fops_create(struct net *net, const char *name, mode_t mode, const struct file_operations *fops) { @@ -184,28 +211,108 @@ struct proc_dir_entry *proc_net_fops_create(struct net *net, } EXPORT_SYMBOL_GPL(proc_net_fops_create); +struct proc_dir_entry *proc_net_mkdir(struct net *net, const char *name, + struct proc_dir_entry *parent) +{ + if (!parent) + parent = net->proc_net; + return proc_mkdir(name, parent); +} +EXPORT_SYMBOL_GPL(proc_net_mkdir); + void proc_net_remove(struct net *net, const char *name) { remove_proc_entry(name, net->proc_net); } EXPORT_SYMBOL_GPL(proc_net_remove); +static int proc_net_fill_super(struct super_block *sb) +{ + struct net *net = sb->s_fs_info; + struct proc_dir_entry *netd = net->proc_net; + struct inode *root_inode = NULL; + + sb->s_flags |= MS_NODIRATIME | MS_NOSUID | MS_NOEXEC; + sb->s_blocksize = PAGE_SIZE; + sb->s_blocksize_bits = PAGE_SHIFT; + sb->s_magic = PROC_NET_SUPER_MAGIC; + sb->s_op = &proc_sops; + sb->s_time_gran = 1; + + de_get(netd); + root_inode = proc_get_inode(sb, netd->low_ino, netd); + if (!root_inode) + goto out_no_root; + root_inode->i_uid = 0; + root_inode->i_gid = 0; + sb->s_root = d_alloc_root(root_inode); + if (!sb->s_root) + goto out_no_root; + return 0; + +out_no_root: + printk("%s: get root inode failed\n", __func__); + iput(root_inode); + de_put(netd); + return -ENOMEM; +} + +static int proc_net_test_super(struct super_block *sb, void *data) +{ + return sb->s_fs_info == data; +} + +static int proc_net_set_super(struct super_block *sb, void *data) +{ + sb->s_fs_info = data; + return set_anon_super(sb, NULL); +} + +static int proc_net_get_sb(struct file_system_type *fs_type, + int flags, const char *dev_name, void *data, struct vfsmount *mnt) +{ + struct super_block *sb; + + if (!(flags & MS_KERNMOUNT)) + data = current->nsproxy->net_ns; + + sb = sget(fs_type, proc_net_test_super, proc_net_set_super, data); + if (IS_ERR(sb)) + return PTR_ERR(sb); + + if (!sb->s_root) { + int err; + sb->s_flags = flags; + err = proc_net_fill_super(sb); + if (err) { + up_write(&sb->s_umount); + deactivate_super(sb); + return err; + } + + sb->s_flags |= MS_ACTIVE; + } + + return simple_set_mnt(mnt, sb); +} + +static struct file_system_type proc_net_fs_type = { + .name = "proc/net", + .get_sb = proc_net_get_sb, + .kill_sb = kill_litter_super, +}; + static __net_init int proc_net_ns_init(struct net *net) { struct proc_dir_entry *netd, *net_statd; + struct vfsmount *mnt; int err; err = -ENOMEM; - netd = kzalloc(sizeof(*netd), GFP_KERNEL); + netd = proc_create_root(); if (!netd) goto out; - netd->data = net; - netd->nlink = 2; - netd->name = "net"; - netd->namelen = 3; - netd->parent = &proc_root; - err = -EEXIST; net_statd = proc_net_mkdir(net, "stat", netd); if (!net_statd) @@ -213,8 +320,17 @@ static __net_init int proc_net_ns_init(struct net *net) net->proc_net = netd; net->proc_net_stat = net_statd; + + mnt = kern_mount_data(&proc_net_fs_type, net); + if (IS_ERR(mnt)) + goto free_stat; + + net->proc_mnt = mnt; + return 0; +free_stat: + remove_proc_entry("stat", netd); free_net: kfree(netd); out: @@ -224,7 +340,14 @@ out: static __net_exit void proc_net_ns_exit(struct net *net) { remove_proc_entry("stat", net->proc_net); - kfree(net->proc_net); + release_proc_entry(net->proc_net); + /* + * We won't be looking up this super block any more so set s_fs_info to + * NULL to ensure it doesn't conflict with network namespaces allocated + * in the future at the same address. + */ + net->proc_mnt->mnt_sb->s_fs_info = NULL; + mntput(net->proc_mnt); } static struct pernet_operations __net_initdata proc_net_ns_ops = { @@ -235,6 +358,6 @@ static struct pernet_operations __net_initdata proc_net_ns_ops = { int __init proc_net_init(void) { proc_symlink("net", NULL, "self/net"); - + register_filesystem(&proc_net_fs_type); return register_pernet_subsys(&proc_net_ns_ops); } |