summaryrefslogtreecommitdiff
path: root/fs
diff options
context:
space:
mode:
authorStephen Rothwell <sfr@canb.auug.org.au>2009-01-23 16:41:28 +1100
committerStephen Rothwell <sfr@canb.auug.org.au>2009-01-23 17:08:39 +1100
commit87be218c9c601605d52b70de667205f30c6749d4 (patch)
tree44a3cd88183d5dbea2d1f481a5563aa11ac1bf42 /fs
parentdc92d17bc5eb33f91b4b16d2c896f1960cca2fbd (diff)
parent7f97784510006c41f045d97f1c48f7c639715873 (diff)
Merge commit 'proc/proc'
Conflicts: security/selinux/hooks.c
Diffstat (limited to 'fs')
-rw-r--r--fs/proc/Makefile1
-rw-r--r--fs/proc/automount.c28
-rw-r--r--fs/proc/base.c26
-rw-r--r--fs/proc/generic.c62
-rw-r--r--fs/proc/inode-alloc.txt14
-rw-r--r--fs/proc/inode.c2
-rw-r--r--fs/proc/internal.h15
-rw-r--r--fs/proc/proc_net.c235
8 files changed, 278 insertions, 105 deletions
diff --git a/fs/proc/Makefile b/fs/proc/Makefile
index 63d965193b22..757f7c11461c 100644
--- a/fs/proc/Makefile
+++ b/fs/proc/Makefile
@@ -9,6 +9,7 @@ proc-$(CONFIG_MMU) := mmu.o task_mmu.o
proc-y += inode.o root.o base.o generic.o array.o \
proc_tty.o
+proc-y += automount.o
proc-y += cmdline.o
proc-y += cpuinfo.o
proc-y += devices.o
diff --git a/fs/proc/automount.c b/fs/proc/automount.c
new file mode 100644
index 000000000000..5d22b5aa442c
--- /dev/null
+++ b/fs/proc/automount.c
@@ -0,0 +1,28 @@
+#include <linux/list.h>
+#include <linux/mount.h>
+#include <linux/workqueue.h>
+#include "internal.h"
+
+LIST_HEAD(proc_automounts);
+
+static void proc_expire_automounts(struct work_struct *work);
+
+static DECLARE_DELAYED_WORK(proc_automount_task, proc_expire_automounts);
+static int proc_automount_timeout = 500 * HZ;
+
+void proc_shrink_automounts(void)
+{
+ struct list_head *list = &proc_automounts;
+
+ mark_mounts_for_expiry(list);
+ mark_mounts_for_expiry(list);
+ if (list_empty(list))
+ return;
+
+ schedule_delayed_work(&proc_automount_task, proc_automount_timeout);
+}
+
+static void proc_expire_automounts(struct work_struct *work)
+{
+ proc_shrink_automounts();
+}
diff --git a/fs/proc/base.c b/fs/proc/base.c
index 0c9de19a1633..baf850c09609 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -126,6 +126,8 @@ struct pid_entry {
NOD(NAME, (S_IFREG|(MODE)), \
NULL, &proc_single_file_operations, \
{ .proc_show = show } )
+#define MNT(NAME, MODE, iops) \
+ NOD(NAME, (S_IFDIR|(MODE)), &iops, NULL, {})
/*
* Count the number of hardlinks for the pid_entry table, excluding the .
@@ -1514,6 +1516,7 @@ static int pid_revalidate(struct dentry *dentry, struct nameidata *nd)
struct inode *inode = dentry->d_inode;
struct task_struct *task = get_proc_task(inode);
const struct cred *cred;
+ int ret = 0;
if (task) {
if ((inode->i_mode == (S_IFDIR|S_IRUGO|S_IXUGO)) ||
@@ -1528,12 +1531,14 @@ static int pid_revalidate(struct dentry *dentry, struct nameidata *nd)
inode->i_gid = 0;
}
inode->i_mode &= ~(S_ISUID | S_ISGID);
- security_task_to_inode(task, inode);
+ ret = proc_net_revalidate(task, dentry, nd);
+ if (ret == 1)
+ security_task_to_inode(task, inode);
put_task_struct(task);
- return 1;
}
- d_drop(dentry);
- return 0;
+ if (ret == 0)
+ d_drop(dentry);
+ return ret;
}
static int pid_delete_dentry(struct dentry * dentry)
@@ -2488,7 +2493,7 @@ static const struct pid_entry tgid_base_stuff[] = {
DIR("fd", S_IRUSR|S_IXUSR, proc_fd_inode_operations, proc_fd_operations),
DIR("fdinfo", S_IRUSR|S_IXUSR, proc_fdinfo_inode_operations, proc_fdinfo_operations),
#ifdef CONFIG_NET
- DIR("net", S_IRUGO|S_IXUGO, proc_net_inode_operations, proc_net_operations),
+ MNT("net", S_IRUGO|S_IXUGO, proc_net_inode_operations),
#endif
REG("environ", S_IRUSR, proc_environ_operations),
INF("auxv", S_IRUSR, proc_pid_auxv),
@@ -2591,15 +2596,11 @@ static void proc_flush_task_mnt(struct vfsmount *mnt, pid_t pid, pid_t tgid)
name.len = snprintf(buf, sizeof(buf), "%d", pid);
dentry = d_hash_and_lookup(mnt->mnt_root, &name);
if (dentry) {
- if (!(current->flags & PF_EXITING))
- shrink_dcache_parent(dentry);
+ shrink_dcache_parent(dentry);
d_drop(dentry);
dput(dentry);
}
- if (tgid == 0)
- goto out;
-
name.name = buf;
name.len = snprintf(buf, sizeof(buf), "%d", tgid);
leader = d_hash_and_lookup(mnt->mnt_root, &name);
@@ -2660,13 +2661,12 @@ void proc_flush_task(struct task_struct *task)
struct upid *upid;
pid = task_pid(task);
- if (thread_group_leader(task))
- tgid = task_tgid(task);
+ tgid = task_tgid(task);
for (i = 0; i <= pid->level; i++) {
upid = &pid->numbers[i];
proc_flush_task_mnt(upid->ns->proc_mnt, upid->nr,
- tgid ? tgid->numbers[i].nr : 0);
+ tgid->numbers[i].nr);
}
upid = &pid->numbers[pid->level];
diff --git a/fs/proc/generic.c b/fs/proc/generic.c
index db7fa5cab988..5ee636abbc2f 100644
--- a/fs/proc/generic.c
+++ b/fs/proc/generic.c
@@ -307,6 +307,21 @@ static DEFINE_SPINLOCK(proc_inum_lock); /* protects the above */
/*
* Return an inode number between PROC_DYNAMIC_FIRST and
* 0xffffffff, or zero on failure.
+ *
+ * Current inode allocations in the proc-fs (hex-numbers):
+ *
+ * 00000000 reserved
+ * 00000001-00000fff static entries (goners)
+ * 001 root-ino
+ *
+ * 00001000-00001fff unused
+ * 0001xxxx-7fffxxxx pid-dir entries for pid 1-7fff
+ * 80000000-efffffff unused
+ * f0000000-ffffffff dynamic entries
+ *
+ * Goal:
+ * Once we split the thing into several virtual filesystems,
+ * we will get rid of magical ranges (and this comment, BTW).
*/
static unsigned int get_inode_number(void)
{
@@ -528,7 +543,6 @@ static int proc_register(struct proc_dir_entry * dir, struct proc_dir_entry * dp
dp->proc_fops = &proc_dir_operations;
dp->proc_iops = &proc_dir_inode_operations;
}
- dir->nlink++;
} else if (S_ISLNK(dp->mode)) {
if (dp->proc_iops == NULL)
dp->proc_iops = &proc_link_inode_operations;
@@ -551,6 +565,8 @@ static int proc_register(struct proc_dir_entry * dir, struct proc_dir_entry * dp
dp->next = dir->subdir;
dp->parent = dir;
dir->subdir = dp;
+ if (S_ISDIR(dp->mode))
+ dir->nlink++;
spin_unlock(&proc_subdir_lock);
return 0;
@@ -595,6 +611,24 @@ static struct proc_dir_entry *__proc_create(struct proc_dir_entry **parent,
return ent;
}
+struct proc_dir_entry *proc_create_root(void)
+{
+ struct proc_dir_entry *ent, *parent = NULL;
+
+ ent = __proc_create(&parent, "..", S_IFDIR | S_IRUGO | S_IXUGO, 2);
+ if (ent) {
+ ent->proc_fops = &proc_dir_operations;
+ ent->proc_iops = &proc_dir_inode_operations;
+ ent->low_ino = get_inode_number();
+ ent->parent = ent;
+ if (!ent->low_ino) {
+ kfree(ent);
+ ent = NULL;
+ }
+ }
+ return ent;
+}
+
struct proc_dir_entry *proc_symlink(const char *name,
struct proc_dir_entry *parent, const char *dest)
{
@@ -635,23 +669,6 @@ struct proc_dir_entry *proc_mkdir_mode(const char *name, mode_t mode,
return ent;
}
-struct proc_dir_entry *proc_net_mkdir(struct net *net, const char *name,
- struct proc_dir_entry *parent)
-{
- struct proc_dir_entry *ent;
-
- ent = __proc_create(&parent, name, S_IFDIR | S_IRUGO | S_IXUGO, 2);
- if (ent) {
- ent->data = net;
- if (proc_register(parent, ent) < 0) {
- kfree(ent);
- ent = NULL;
- }
- }
- return ent;
-}
-EXPORT_SYMBOL_GPL(proc_net_mkdir);
-
struct proc_dir_entry *proc_mkdir(const char *name,
struct proc_dir_entry *parent)
{
@@ -754,6 +771,8 @@ void remove_proc_entry(const char *name, struct proc_dir_entry *parent)
de = *p;
*p = de->next;
de->next = NULL;
+ if (S_ISDIR(de->mode))
+ parent->nlink--;
break;
}
}
@@ -761,6 +780,11 @@ void remove_proc_entry(const char *name, struct proc_dir_entry *parent)
if (!de)
return;
+ release_proc_entry(de);
+}
+
+void release_proc_entry(struct proc_dir_entry *de)
+{
spin_lock(&de->pde_unload_lock);
/*
* Stop accepting new callers into module. If you're
@@ -796,8 +820,6 @@ continue_removing:
}
spin_unlock(&de->pde_unload_lock);
- if (S_ISDIR(de->mode))
- parent->nlink--;
de->nlink = 0;
WARN(de->subdir, KERN_WARNING "%s: removing non-empty directory "
"'%s/%s', leaking at least '%s'\n", __func__,
diff --git a/fs/proc/inode-alloc.txt b/fs/proc/inode-alloc.txt
deleted file mode 100644
index 77212f938c2c..000000000000
--- a/fs/proc/inode-alloc.txt
+++ /dev/null
@@ -1,14 +0,0 @@
-Current inode allocations in the proc-fs (hex-numbers):
-
- 00000000 reserved
- 00000001-00000fff static entries (goners)
- 001 root-ino
-
- 00001000-00001fff unused
- 0001xxxx-7fffxxxx pid-dir entries for pid 1-7fff
- 80000000-efffffff unused
- f0000000-ffffffff dynamic entries
-
-Goal:
- a) once we'll split the thing into several virtual filesystems we
- will get rid of magical ranges (and this file, BTW).
diff --git a/fs/proc/inode.c b/fs/proc/inode.c
index 3e76bb9b3ad6..0bbb937c5f0d 100644
--- a/fs/proc/inode.c
+++ b/fs/proc/inode.c
@@ -112,7 +112,7 @@ void __init proc_init_inodecache(void)
init_once);
}
-static const struct super_operations proc_sops = {
+const struct super_operations proc_sops = {
.alloc_inode = proc_alloc_inode,
.destroy_inode = proc_destroy_inode,
.drop_inode = generic_delete_inode,
diff --git a/fs/proc/internal.h b/fs/proc/internal.h
index cd53ff838498..ab62ede71531 100644
--- a/fs/proc/internal.h
+++ b/fs/proc/internal.h
@@ -58,9 +58,17 @@ extern const struct file_operations proc_numa_maps_operations;
extern const struct file_operations proc_smaps_operations;
extern const struct file_operations proc_clear_refs_operations;
extern const struct file_operations proc_pagemap_operations;
-extern const struct file_operations proc_net_operations;
extern const struct inode_operations proc_net_inode_operations;
+#ifdef CONFIG_NET
+int proc_net_revalidate(struct task_struct *tsk, struct dentry *dentry, struct nameidata *nd);
+#else
+static inline int proc_net_revalidate(struct task_struct *tsk, struct dentry *dentry, struct nameidata *nd)
+{
+ return 1;
+}
+#endif
+
void free_proc_entry(struct proc_dir_entry *de);
void proc_init_inodecache(void);
@@ -84,6 +92,9 @@ struct dentry *proc_lookup_de(struct proc_dir_entry *de, struct inode *ino,
struct dentry *dentry);
int proc_readdir_de(struct proc_dir_entry *de, struct file *filp, void *dirent,
filldir_t filldir);
+struct proc_dir_entry *proc_create_root(void);
+void release_proc_entry(struct proc_dir_entry *de);
+extern const struct super_operations proc_sops;
struct pde_opener {
struct inode *inode;
@@ -91,3 +102,5 @@ struct pde_opener {
int (*release)(struct inode *, struct file *);
struct list_head lh;
};
+
+extern struct list_head proc_automounts;
diff --git a/fs/proc/proc_net.c b/fs/proc/proc_net.c
index 04d1270f1c38..baaddad8436c 100644
--- a/fs/proc/proc_net.c
+++ b/fs/proc/proc_net.c
@@ -20,15 +20,17 @@
#include <linux/bitops.h>
#include <linux/mount.h>
#include <linux/nsproxy.h>
+#include <linux/namei.h>
#include <net/net_namespace.h>
#include <linux/seq_file.h>
#include "internal.h"
+static struct file_system_type proc_net_fs_type;
static struct net *get_proc_net(const struct inode *inode)
{
- return maybe_get_net(PDE_NET(PDE(inode)));
+ return maybe_get_net(inode->i_sb->s_fs_info);
}
int seq_open_net(struct inode *ino, struct file *f,
@@ -117,66 +119,91 @@ static struct net *get_proc_task_net(struct inode *dir)
return net;
}
-static struct dentry *proc_tgid_net_lookup(struct inode *dir,
- struct dentry *dentry, struct nameidata *nd)
+void *proc_net_follow_link(struct dentry *dentry, struct nameidata *nd)
{
- struct dentry *de;
+ /* Follow to a mount point of the proper network namespace. */
+ struct vfsmount *mnt;
struct net *net;
-
- de = ERR_PTR(-ENOENT);
- net = get_proc_task_net(dir);
- if (net != NULL) {
- de = proc_lookup_de(net->proc_net, dir, dentry);
- put_net(net);
- }
- return de;
-}
-
-static int proc_tgid_net_getattr(struct vfsmount *mnt, struct dentry *dentry,
- struct kstat *stat)
-{
- struct inode *inode = dentry->d_inode;
- struct net *net;
-
- net = get_proc_task_net(inode);
-
- generic_fillattr(inode, stat);
-
- if (net != NULL) {
- stat->nlink = net->proc_net->nlink;
- put_net(net);
+ int err = -ENOENT;
+
+ net = get_proc_task_net(dentry->d_inode);
+ if (!net)
+ goto out_err;
+
+ mnt = kern_mount_data(&proc_net_fs_type, net);
+ if (IS_ERR(mnt))
+ goto out_err;
+
+ dput(nd->path.dentry);
+ nd->path.dentry = dget(dentry);
+
+ err = do_add_mount(mntget(mnt), &nd->path, MNT_SHRINKABLE,
+ &proc_automounts);
+ if (err < 0) {
+ mntput(mnt);
+ if (err == -EBUSY)
+ goto out_follow;
+ goto out_err;
}
-
- return 0;
+ err = 0;
+ path_put(&nd->path);
+ nd->path.mnt = mnt;
+ nd->path.dentry = dget(mnt->mnt_root);
+ put_net(net);
+out:
+ return ERR_PTR(err);
+out_err:
+ path_put(&nd->path);
+ goto out;
+out_follow:
+ /* We raced with ourselves so just walk the mounts */
+ while (d_mountpoint(nd->path.dentry) &&
+ follow_down(&nd->path.mnt, &nd->path.dentry))
+ ;
+ err = 0;
+ goto out;
}
const struct inode_operations proc_net_inode_operations = {
- .lookup = proc_tgid_net_lookup,
- .getattr = proc_tgid_net_getattr,
+ .follow_link = proc_net_follow_link,
};
-static int proc_tgid_net_readdir(struct file *filp, void *dirent,
- filldir_t filldir)
+
+int proc_net_revalidate(struct task_struct *task, struct dentry *dentry,
+ struct nameidata *nd)
{
- int ret;
- struct net *net;
+ struct inode *inode = dentry->d_inode;
+ struct dentry *tdentry;
+ struct vfsmount *tmnt;
+ int ret = 1;
- ret = -EINVAL;
- net = get_proc_task_net(filp->f_path.dentry->d_inode);
- if (net != NULL) {
- ret = proc_readdir_de(net->proc_net, filp, dirent, filldir);
- put_net(net);
+ /* Are we talking about a proc/net mount point? */
+ if (!nd || inode->i_op != &proc_net_inode_operations)
+ goto out;
+
+ /*
+ * If the wrong filesystem is mounted on /proc/<pid>/net report the
+ * dentry is invalid.
+ */
+ tmnt = mntget(nd->path.mnt);
+ tdentry = dget(dentry);
+ if (follow_down(&tmnt, &tdentry)) {
+ struct nsproxy *ns;
+
+ rcu_read_lock();
+ ns = task_nsproxy(task);
+ if ((ns == NULL) ||
+ (tmnt->mnt_sb->s_magic != PROC_NET_SUPER_MAGIC) ||
+ (tmnt->mnt_sb->s_fs_info != ns->net_ns))
+ ret = 0;
+ rcu_read_unlock();
}
+ dput(tdentry);
+ mntput(tmnt);
+out:
return ret;
}
-const struct file_operations proc_net_operations = {
- .llseek = generic_file_llseek,
- .read = generic_read_dir,
- .readdir = proc_tgid_net_readdir,
-};
-
-
struct proc_dir_entry *proc_net_fops_create(struct net *net,
const char *name, mode_t mode, const struct file_operations *fops)
{
@@ -184,28 +211,108 @@ struct proc_dir_entry *proc_net_fops_create(struct net *net,
}
EXPORT_SYMBOL_GPL(proc_net_fops_create);
+struct proc_dir_entry *proc_net_mkdir(struct net *net, const char *name,
+ struct proc_dir_entry *parent)
+{
+ if (!parent)
+ parent = net->proc_net;
+ return proc_mkdir(name, parent);
+}
+EXPORT_SYMBOL_GPL(proc_net_mkdir);
+
void proc_net_remove(struct net *net, const char *name)
{
remove_proc_entry(name, net->proc_net);
}
EXPORT_SYMBOL_GPL(proc_net_remove);
+static int proc_net_fill_super(struct super_block *sb)
+{
+ struct net *net = sb->s_fs_info;
+ struct proc_dir_entry *netd = net->proc_net;
+ struct inode *root_inode = NULL;
+
+ sb->s_flags |= MS_NODIRATIME | MS_NOSUID | MS_NOEXEC;
+ sb->s_blocksize = PAGE_SIZE;
+ sb->s_blocksize_bits = PAGE_SHIFT;
+ sb->s_magic = PROC_NET_SUPER_MAGIC;
+ sb->s_op = &proc_sops;
+ sb->s_time_gran = 1;
+
+ de_get(netd);
+ root_inode = proc_get_inode(sb, netd->low_ino, netd);
+ if (!root_inode)
+ goto out_no_root;
+ root_inode->i_uid = 0;
+ root_inode->i_gid = 0;
+ sb->s_root = d_alloc_root(root_inode);
+ if (!sb->s_root)
+ goto out_no_root;
+ return 0;
+
+out_no_root:
+ printk("%s: get root inode failed\n", __func__);
+ iput(root_inode);
+ de_put(netd);
+ return -ENOMEM;
+}
+
+static int proc_net_test_super(struct super_block *sb, void *data)
+{
+ return sb->s_fs_info == data;
+}
+
+static int proc_net_set_super(struct super_block *sb, void *data)
+{
+ sb->s_fs_info = data;
+ return set_anon_super(sb, NULL);
+}
+
+static int proc_net_get_sb(struct file_system_type *fs_type,
+ int flags, const char *dev_name, void *data, struct vfsmount *mnt)
+{
+ struct super_block *sb;
+
+ if (!(flags & MS_KERNMOUNT))
+ data = current->nsproxy->net_ns;
+
+ sb = sget(fs_type, proc_net_test_super, proc_net_set_super, data);
+ if (IS_ERR(sb))
+ return PTR_ERR(sb);
+
+ if (!sb->s_root) {
+ int err;
+ sb->s_flags = flags;
+ err = proc_net_fill_super(sb);
+ if (err) {
+ up_write(&sb->s_umount);
+ deactivate_super(sb);
+ return err;
+ }
+
+ sb->s_flags |= MS_ACTIVE;
+ }
+
+ return simple_set_mnt(mnt, sb);
+}
+
+static struct file_system_type proc_net_fs_type = {
+ .name = "proc/net",
+ .get_sb = proc_net_get_sb,
+ .kill_sb = kill_litter_super,
+};
+
static __net_init int proc_net_ns_init(struct net *net)
{
struct proc_dir_entry *netd, *net_statd;
+ struct vfsmount *mnt;
int err;
err = -ENOMEM;
- netd = kzalloc(sizeof(*netd), GFP_KERNEL);
+ netd = proc_create_root();
if (!netd)
goto out;
- netd->data = net;
- netd->nlink = 2;
- netd->name = "net";
- netd->namelen = 3;
- netd->parent = &proc_root;
-
err = -EEXIST;
net_statd = proc_net_mkdir(net, "stat", netd);
if (!net_statd)
@@ -213,8 +320,17 @@ static __net_init int proc_net_ns_init(struct net *net)
net->proc_net = netd;
net->proc_net_stat = net_statd;
+
+ mnt = kern_mount_data(&proc_net_fs_type, net);
+ if (IS_ERR(mnt))
+ goto free_stat;
+
+ net->proc_mnt = mnt;
+
return 0;
+free_stat:
+ remove_proc_entry("stat", netd);
free_net:
kfree(netd);
out:
@@ -224,7 +340,14 @@ out:
static __net_exit void proc_net_ns_exit(struct net *net)
{
remove_proc_entry("stat", net->proc_net);
- kfree(net->proc_net);
+ release_proc_entry(net->proc_net);
+ /*
+ * We won't be looking up this super block any more so set s_fs_info to
+ * NULL to ensure it doesn't conflict with network namespaces allocated
+ * in the future at the same address.
+ */
+ net->proc_mnt->mnt_sb->s_fs_info = NULL;
+ mntput(net->proc_mnt);
}
static struct pernet_operations __net_initdata proc_net_ns_ops = {
@@ -235,6 +358,6 @@ static struct pernet_operations __net_initdata proc_net_ns_ops = {
int __init proc_net_init(void)
{
proc_symlink("net", NULL, "self/net");
-
+ register_filesystem(&proc_net_fs_type);
return register_pernet_subsys(&proc_net_ns_ops);
}