diff options
-rw-r--r-- | Documentation/filesystems/proc.txt | 1 | ||||
-rw-r--r-- | arch/mips/kernel/stacktrace.c | 24 | ||||
-rw-r--r-- | fs/proc/Makefile | 1 | ||||
-rw-r--r-- | fs/proc/automount.c | 28 | ||||
-rw-r--r-- | fs/proc/base.c | 249 | ||||
-rw-r--r-- | fs/proc/generic.c | 55 | ||||
-rw-r--r-- | fs/proc/inode.c | 5 | ||||
-rw-r--r-- | fs/proc/internal.h | 15 | ||||
-rw-r--r-- | fs/proc/proc_net.c | 235 | ||||
-rw-r--r-- | fs/proc/root.c | 8 | ||||
-rw-r--r-- | include/linux/magic.h | 1 | ||||
-rw-r--r-- | include/linux/proc_fs.h | 10 | ||||
-rw-r--r-- | include/net/net_namespace.h | 1 | ||||
-rw-r--r-- | kernel/exit.c | 1 | ||||
-rw-r--r-- | security/selinux/hooks.c | 29 |
15 files changed, 443 insertions, 220 deletions
diff --git a/Documentation/filesystems/proc.txt b/Documentation/filesystems/proc.txt index 71df353e367c..334ef2f983fa 100644 --- a/Documentation/filesystems/proc.txt +++ b/Documentation/filesystems/proc.txt @@ -140,6 +140,7 @@ Table 1-1: Process specific entries in /proc statm Process memory status information status Process status in human readable form wchan If CONFIG_KALLSYMS is set, a pre-decoded wchan + stack Report full stack trace, enable via CONFIG_STACKTRACE smaps Extension based on maps, the rss size for each mapped file .............................................................................. diff --git a/arch/mips/kernel/stacktrace.c b/arch/mips/kernel/stacktrace.c index 0632e2a849c0..58f5cd76c8c3 100644 --- a/arch/mips/kernel/stacktrace.c +++ b/arch/mips/kernel/stacktrace.c @@ -32,7 +32,8 @@ static void save_raw_context_stack(struct stack_trace *trace, } } -static void save_context_stack(struct stack_trace *trace, struct pt_regs *regs) +static void save_context_stack(struct stack_trace *trace, + struct task_struct *tsk, struct pt_regs *regs) { unsigned long sp = regs->regs[29]; #ifdef CONFIG_KALLSYMS @@ -41,7 +42,7 @@ static void save_context_stack(struct stack_trace *trace, struct pt_regs *regs) if (raw_show_trace || !__kernel_text_address(pc)) { unsigned long stack_page = - (unsigned long)task_stack_page(current); + (unsigned long)task_stack_page(tsk); if (stack_page && sp >= stack_page && sp <= stack_page + THREAD_SIZE - 32) save_raw_context_stack(trace, sp); @@ -54,7 +55,7 @@ static void save_context_stack(struct stack_trace *trace, struct pt_regs *regs) trace->entries[trace->nr_entries++] = pc; if (trace->nr_entries >= trace->max_entries) break; - pc = unwind_stack(current, &sp, pc, &ra); + pc = unwind_stack(tsk, &sp, pc, &ra); } while (pc); #else save_raw_context_stack(trace, sp); @@ -66,12 +67,23 @@ static void save_context_stack(struct stack_trace *trace, struct pt_regs *regs) */ void save_stack_trace(struct stack_trace *trace) { + save_stack_trace_tsk(current, trace); +} +EXPORT_SYMBOL_GPL(save_stack_trace); + +void save_stack_trace_tsk(struct task_struct *tsk, struct stack_trace *trace) +{ struct pt_regs dummyregs; struct pt_regs *regs = &dummyregs; WARN_ON(trace->nr_entries || !trace->max_entries); - prepare_frametrace(regs); - save_context_stack(trace, regs); + if (tsk != current) { + regs->regs[29] = tsk->thread.reg29; + regs->regs[31] = 0; + regs->cp0_epc = tsk->thread.reg31; + } else + prepare_frametrace(regs); + save_context_stack(trace, tsk, regs); } -EXPORT_SYMBOL_GPL(save_stack_trace); +EXPORT_SYMBOL_GPL(save_stack_trace_tsk); diff --git a/fs/proc/Makefile b/fs/proc/Makefile index 63d965193b22..757f7c11461c 100644 --- a/fs/proc/Makefile +++ b/fs/proc/Makefile @@ -9,6 +9,7 @@ proc-$(CONFIG_MMU) := mmu.o task_mmu.o proc-y += inode.o root.o base.o generic.o array.o \ proc_tty.o +proc-y += automount.o proc-y += cmdline.o proc-y += cpuinfo.o proc-y += devices.o diff --git a/fs/proc/automount.c b/fs/proc/automount.c new file mode 100644 index 000000000000..5d22b5aa442c --- /dev/null +++ b/fs/proc/automount.c @@ -0,0 +1,28 @@ +#include <linux/list.h> +#include <linux/mount.h> +#include <linux/workqueue.h> +#include "internal.h" + +LIST_HEAD(proc_automounts); + +static void proc_expire_automounts(struct work_struct *work); + +static DECLARE_DELAYED_WORK(proc_automount_task, proc_expire_automounts); +static int proc_automount_timeout = 500 * HZ; + +void proc_shrink_automounts(void) +{ + struct list_head *list = &proc_automounts; + + mark_mounts_for_expiry(list); + mark_mounts_for_expiry(list); + if (list_empty(list)) + return; + + schedule_delayed_work(&proc_automount_task, proc_automount_timeout); +} + +static void proc_expire_automounts(struct work_struct *work) +{ + proc_shrink_automounts(); +} diff --git a/fs/proc/base.c b/fs/proc/base.c index cf42c42cbfbb..94ec750ed7c7 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -65,6 +65,7 @@ #include <linux/mm.h> #include <linux/rcupdate.h> #include <linux/kallsyms.h> +#include <linux/stacktrace.h> #include <linux/resource.h> #include <linux/module.h> #include <linux/mount.h> @@ -109,25 +110,24 @@ struct pid_entry { .op = OP, \ } -#define DIR(NAME, MODE, OTYPE) \ - NOD(NAME, (S_IFDIR|(MODE)), \ - &proc_##OTYPE##_inode_operations, &proc_##OTYPE##_operations, \ - {} ) -#define LNK(NAME, OTYPE) \ +#define DIR(NAME, MODE, iops, fops) \ + NOD(NAME, (S_IFDIR|(MODE)), &iops, &fops, {} ) +#define LNK(NAME, get_link) \ NOD(NAME, (S_IFLNK|S_IRWXUGO), \ &proc_pid_link_inode_operations, NULL, \ - { .proc_get_link = &proc_##OTYPE##_link } ) -#define REG(NAME, MODE, OTYPE) \ - NOD(NAME, (S_IFREG|(MODE)), NULL, \ - &proc_##OTYPE##_operations, {}) -#define INF(NAME, MODE, OTYPE) \ + { .proc_get_link = get_link } ) +#define REG(NAME, MODE, fops) \ + NOD(NAME, (S_IFREG|(MODE)), NULL, &fops, {}) +#define INF(NAME, MODE, read) \ NOD(NAME, (S_IFREG|(MODE)), \ NULL, &proc_info_file_operations, \ - { .proc_read = &proc_##OTYPE } ) -#define ONE(NAME, MODE, OTYPE) \ + { .proc_read = read } ) +#define ONE(NAME, MODE, show) \ NOD(NAME, (S_IFREG|(MODE)), \ NULL, &proc_single_file_operations, \ - { .proc_show = &proc_##OTYPE } ) + { .proc_show = show } ) +#define MNT(NAME, MODE, iops) \ + NOD(NAME, (S_IFDIR|(MODE)), &iops, NULL, {}) /* * Count the number of hardlinks for the pid_entry table, excluding the . @@ -340,6 +340,37 @@ static int proc_pid_wchan(struct task_struct *task, char *buffer) } #endif /* CONFIG_KALLSYMS */ +#ifdef CONFIG_STACKTRACE + +#define MAX_STACK_TRACE_DEPTH 64 + +static int proc_pid_stack(struct seq_file *m, struct pid_namespace *ns, + struct pid *pid, struct task_struct *task) +{ + struct stack_trace trace; + unsigned long *entries; + int i; + + entries = kmalloc(MAX_STACK_TRACE_DEPTH * sizeof(*entries), GFP_KERNEL); + if (!entries) + return -ENOMEM; + + trace.nr_entries = 0; + trace.max_entries = MAX_STACK_TRACE_DEPTH; + trace.entries = entries; + trace.skip = 0; + save_stack_trace_tsk(task, &trace); + + for (i = 0; i < trace.nr_entries; i++) { + seq_printf(m, "[<%p>] %pS\n", + (void *)entries[i], (void *)entries[i]); + } + kfree(entries); + + return 0; +} +#endif + #ifdef CONFIG_SCHEDSTATS /* * Provides /proc/PID/schedstat @@ -1186,8 +1217,6 @@ static int sched_show(struct seq_file *m, void *v) struct inode *inode = m->private; struct task_struct *p; - WARN_ON(!inode); - p = get_proc_task(inode); if (!p) return -ESRCH; @@ -1205,8 +1234,6 @@ sched_write(struct file *file, const char __user *buf, struct inode *inode = file->f_path.dentry->d_inode; struct task_struct *p; - WARN_ON(!inode); - p = get_proc_task(inode); if (!p) return -ESRCH; @@ -1491,6 +1518,7 @@ static int pid_revalidate(struct dentry *dentry, struct nameidata *nd) struct inode *inode = dentry->d_inode; struct task_struct *task = get_proc_task(inode); const struct cred *cred; + int ret = 0; if (task) { if ((inode->i_mode == (S_IFDIR|S_IRUGO|S_IXUGO)) || @@ -1505,12 +1533,14 @@ static int pid_revalidate(struct dentry *dentry, struct nameidata *nd) inode->i_gid = 0; } inode->i_mode &= ~(S_ISUID | S_ISGID); - security_task_to_inode(task, inode); + ret = proc_net_revalidate(task, dentry, nd); + if (ret == 1) + security_task_to_inode(task, inode); put_task_struct(task); - return 1; } - d_drop(dentry); - return 0; + if (ret == 0) + d_drop(dentry); + return ret; } static int pid_delete_dentry(struct dentry * dentry) @@ -2138,12 +2168,12 @@ static const struct file_operations proc_pid_attr_operations = { }; static const struct pid_entry attr_dir_stuff[] = { - REG("current", S_IRUGO|S_IWUGO, pid_attr), - REG("prev", S_IRUGO, pid_attr), - REG("exec", S_IRUGO|S_IWUGO, pid_attr), - REG("fscreate", S_IRUGO|S_IWUGO, pid_attr), - REG("keycreate", S_IRUGO|S_IWUGO, pid_attr), - REG("sockcreate", S_IRUGO|S_IWUGO, pid_attr), + REG("current", S_IRUGO|S_IWUGO, proc_pid_attr_operations), + REG("prev", S_IRUGO, proc_pid_attr_operations), + REG("exec", S_IRUGO|S_IWUGO, proc_pid_attr_operations), + REG("fscreate", S_IRUGO|S_IWUGO, proc_pid_attr_operations), + REG("keycreate", S_IRUGO|S_IWUGO, proc_pid_attr_operations), + REG("sockcreate", S_IRUGO|S_IWUGO, proc_pid_attr_operations), }; static int proc_attr_dir_readdir(struct file * filp, @@ -2465,74 +2495,77 @@ static const struct file_operations proc_task_operations; static const struct inode_operations proc_task_inode_operations; static const struct pid_entry tgid_base_stuff[] = { - DIR("task", S_IRUGO|S_IXUGO, task), - DIR("fd", S_IRUSR|S_IXUSR, fd), - DIR("fdinfo", S_IRUSR|S_IXUSR, fdinfo), + DIR("task", S_IRUGO|S_IXUGO, proc_task_inode_operations, proc_task_operations), + DIR("fd", S_IRUSR|S_IXUSR, proc_fd_inode_operations, proc_fd_operations), + DIR("fdinfo", S_IRUSR|S_IXUSR, proc_fdinfo_inode_operations, proc_fdinfo_operations), #ifdef CONFIG_NET - DIR("net", S_IRUGO|S_IXUGO, net), + MNT("net", S_IRUGO|S_IXUGO, proc_net_inode_operations), #endif - REG("environ", S_IRUSR, environ), - INF("auxv", S_IRUSR, pid_auxv), - ONE("status", S_IRUGO, pid_status), - ONE("personality", S_IRUSR, pid_personality), - INF("limits", S_IRUSR, pid_limits), + REG("environ", S_IRUSR, proc_environ_operations), + INF("auxv", S_IRUSR, proc_pid_auxv), + ONE("status", S_IRUGO, proc_pid_status), + ONE("personality", S_IRUSR, proc_pid_personality), + INF("limits", S_IRUSR, proc_pid_limits), #ifdef CONFIG_SCHED_DEBUG - REG("sched", S_IRUGO|S_IWUSR, pid_sched), + REG("sched", S_IRUGO|S_IWUSR, proc_pid_sched_operations), #endif #ifdef CONFIG_HAVE_ARCH_TRACEHOOK - INF("syscall", S_IRUSR, pid_syscall), + INF("syscall", S_IRUSR, proc_pid_syscall), #endif - INF("cmdline", S_IRUGO, pid_cmdline), - ONE("stat", S_IRUGO, tgid_stat), - ONE("statm", S_IRUGO, pid_statm), - REG("maps", S_IRUGO, maps), + INF("cmdline", S_IRUGO, proc_pid_cmdline), + ONE("stat", S_IRUGO, proc_tgid_stat), + ONE("statm", S_IRUGO, proc_pid_statm), + REG("maps", S_IRUGO, proc_maps_operations), #ifdef CONFIG_NUMA - REG("numa_maps", S_IRUGO, numa_maps), + REG("numa_maps", S_IRUGO, proc_numa_maps_operations), #endif - REG("mem", S_IRUSR|S_IWUSR, mem), - LNK("cwd", cwd), - LNK("root", root), - LNK("exe", exe), - REG("mounts", S_IRUGO, mounts), - REG("mountinfo", S_IRUGO, mountinfo), - REG("mountstats", S_IRUSR, mountstats), + REG("mem", S_IRUSR|S_IWUSR, proc_mem_operations), + LNK("cwd", proc_cwd_link), + LNK("root", proc_root_link), + LNK("exe", proc_exe_link), + REG("mounts", S_IRUGO, proc_mounts_operations), + REG("mountinfo", S_IRUGO, proc_mountinfo_operations), + REG("mountstats", S_IRUSR, proc_mountstats_operations), #ifdef CONFIG_PROC_PAGE_MONITOR - REG("clear_refs", S_IWUSR, clear_refs), - REG("smaps", S_IRUGO, smaps), - REG("pagemap", S_IRUSR, pagemap), + REG("clear_refs", S_IWUSR, proc_clear_refs_operations), + REG("smaps", S_IRUGO, proc_smaps_operations), + REG("pagemap", S_IRUSR, proc_pagemap_operations), #endif #ifdef CONFIG_SECURITY - DIR("attr", S_IRUGO|S_IXUGO, attr_dir), + DIR("attr", S_IRUGO|S_IXUGO, proc_attr_dir_inode_operations, proc_attr_dir_operations), #endif #ifdef CONFIG_KALLSYMS - INF("wchan", S_IRUGO, pid_wchan), + INF("wchan", S_IRUGO, proc_pid_wchan), +#endif +#ifdef CONFIG_STACKTRACE + ONE("stack", S_IRUSR, proc_pid_stack), #endif #ifdef CONFIG_SCHEDSTATS - INF("schedstat", S_IRUGO, pid_schedstat), + INF("schedstat", S_IRUGO, proc_pid_schedstat), #endif #ifdef CONFIG_LATENCYTOP - REG("latency", S_IRUGO, lstats), + REG("latency", S_IRUGO, proc_lstats_operations), #endif #ifdef CONFIG_PROC_PID_CPUSET - REG("cpuset", S_IRUGO, cpuset), + REG("cpuset", S_IRUGO, proc_cpuset_operations), #endif #ifdef CONFIG_CGROUPS - REG("cgroup", S_IRUGO, cgroup), + REG("cgroup", S_IRUGO, proc_cgroup_operations), #endif - INF("oom_score", S_IRUGO, oom_score), - REG("oom_adj", S_IRUGO|S_IWUSR, oom_adjust), + INF("oom_score", S_IRUGO, proc_oom_score), + REG("oom_adj", S_IRUGO|S_IWUSR, proc_oom_adjust_operations), #ifdef CONFIG_AUDITSYSCALL - REG("loginuid", S_IWUSR|S_IRUGO, loginuid), - REG("sessionid", S_IRUGO, sessionid), + REG("loginuid", S_IWUSR|S_IRUGO, proc_loginuid_operations), + REG("sessionid", S_IRUGO, proc_sessionid_operations), #endif #ifdef CONFIG_FAULT_INJECTION - REG("make-it-fail", S_IRUGO|S_IWUSR, fault_inject), + REG("make-it-fail", S_IRUGO|S_IWUSR, proc_fault_inject_operations), #endif #if defined(USE_ELF_CORE_DUMP) && defined(CONFIG_ELF_CORE) - REG("coredump_filter", S_IRUGO|S_IWUSR, coredump_filter), + REG("coredump_filter", S_IRUGO|S_IWUSR, proc_coredump_filter_operations), #endif #ifdef CONFIG_TASK_IO_ACCOUNTING - INF("io", S_IRUGO, tgid_io_accounting), + INF("io", S_IRUGO, proc_tgid_io_accounting), #endif }; @@ -2569,15 +2602,11 @@ static void proc_flush_task_mnt(struct vfsmount *mnt, pid_t pid, pid_t tgid) name.len = snprintf(buf, sizeof(buf), "%d", pid); dentry = d_hash_and_lookup(mnt->mnt_root, &name); if (dentry) { - if (!(current->flags & PF_EXITING)) - shrink_dcache_parent(dentry); + shrink_dcache_parent(dentry); d_drop(dentry); dput(dentry); } - if (tgid == 0) - goto out; - name.name = buf; name.len = snprintf(buf, sizeof(buf), "%d", tgid); leader = d_hash_and_lookup(mnt->mnt_root, &name); @@ -2638,13 +2667,12 @@ void proc_flush_task(struct task_struct *task) struct upid *upid; pid = task_pid(task); - if (thread_group_leader(task)) - tgid = task_tgid(task); + tgid = task_tgid(task); for (i = 0; i <= pid->level; i++) { upid = &pid->numbers[i]; proc_flush_task_mnt(upid->ns->proc_mnt, upid->nr, - tgid ? tgid->numbers[i].nr : 0); + tgid->numbers[i].nr); } upid = &pid->numbers[pid->level]; @@ -2805,66 +2833,69 @@ out_no_task: * Tasks */ static const struct pid_entry tid_base_stuff[] = { - DIR("fd", S_IRUSR|S_IXUSR, fd), - DIR("fdinfo", S_IRUSR|S_IXUSR, fdinfo), - REG("environ", S_IRUSR, environ), - INF("auxv", S_IRUSR, pid_auxv), - ONE("status", S_IRUGO, pid_status), - ONE("personality", S_IRUSR, pid_personality), - INF("limits", S_IRUSR, pid_limits), + DIR("fd", S_IRUSR|S_IXUSR, proc_fd_inode_operations, proc_fd_operations), + DIR("fdinfo", S_IRUSR|S_IXUSR, proc_fdinfo_inode_operations, proc_fd_operations), + REG("environ", S_IRUSR, proc_environ_operations), + INF("auxv", S_IRUSR, proc_pid_auxv), + ONE("status", S_IRUGO, proc_pid_status), + ONE("personality", S_IRUSR, proc_pid_personality), + INF("limits", S_IRUSR, proc_pid_limits), #ifdef CONFIG_SCHED_DEBUG - REG("sched", S_IRUGO|S_IWUSR, pid_sched), + REG("sched", S_IRUGO|S_IWUSR, proc_pid_sched_operations), #endif #ifdef CONFIG_HAVE_ARCH_TRACEHOOK - INF("syscall", S_IRUSR, pid_syscall), + INF("syscall", S_IRUSR, proc_pid_syscall), #endif - INF("cmdline", S_IRUGO, pid_cmdline), - ONE("stat", S_IRUGO, tid_stat), - ONE("statm", S_IRUGO, pid_statm), - REG("maps", S_IRUGO, maps), + INF("cmdline", S_IRUGO, proc_pid_cmdline), + ONE("stat", S_IRUGO, proc_tid_stat), + ONE("statm", S_IRUGO, proc_pid_statm), + REG("maps", S_IRUGO, proc_maps_operations), #ifdef CONFIG_NUMA - REG("numa_maps", S_IRUGO, numa_maps), + REG("numa_maps", S_IRUGO, proc_numa_maps_operations), #endif - REG("mem", S_IRUSR|S_IWUSR, mem), - LNK("cwd", cwd), - LNK("root", root), - LNK("exe", exe), - REG("mounts", S_IRUGO, mounts), - REG("mountinfo", S_IRUGO, mountinfo), + REG("mem", S_IRUSR|S_IWUSR, proc_mem_operations), + LNK("cwd", proc_cwd_link), + LNK("root", proc_root_link), + LNK("exe", proc_exe_link), + REG("mounts", S_IRUGO, proc_mounts_operations), + REG("mountinfo", S_IRUGO, proc_mountinfo_operations), #ifdef CONFIG_PROC_PAGE_MONITOR - REG("clear_refs", S_IWUSR, clear_refs), - REG("smaps", S_IRUGO, smaps), - REG("pagemap", S_IRUSR, pagemap), + REG("clear_refs", S_IWUSR, proc_clear_refs_operations), + REG("smaps", S_IRUGO, proc_smaps_operations), + REG("pagemap", S_IRUSR, proc_pagemap_operations), #endif #ifdef CONFIG_SECURITY - DIR("attr", S_IRUGO|S_IXUGO, attr_dir), + DIR("attr", S_IRUGO|S_IXUGO, proc_attr_dir_inode_operations, proc_attr_dir_operations), #endif #ifdef CONFIG_KALLSYMS - INF("wchan", S_IRUGO, pid_wchan), + INF("wchan", S_IRUGO, proc_pid_wchan), +#endif +#ifdef CONFIG_STACKTRACE + ONE("stack", S_IRUSR, proc_pid_stack), #endif #ifdef CONFIG_SCHEDSTATS - INF("schedstat", S_IRUGO, pid_schedstat), + INF("schedstat", S_IRUGO, proc_pid_schedstat), #endif #ifdef CONFIG_LATENCYTOP - REG("latency", S_IRUGO, lstats), + REG("latency", S_IRUGO, proc_lstats_operations), #endif #ifdef CONFIG_PROC_PID_CPUSET - REG("cpuset", S_IRUGO, cpuset), + REG("cpuset", S_IRUGO, proc_cpuset_operations), #endif #ifdef CONFIG_CGROUPS - REG("cgroup", S_IRUGO, cgroup), + REG("cgroup", S_IRUGO, proc_cgroup_operations), #endif - INF("oom_score", S_IRUGO, oom_score), - REG("oom_adj", S_IRUGO|S_IWUSR, oom_adjust), + INF("oom_score", S_IRUGO, proc_oom_score), + REG("oom_adj", S_IRUGO|S_IWUSR, proc_oom_adjust_operations), #ifdef CONFIG_AUDITSYSCALL - REG("loginuid", S_IWUSR|S_IRUGO, loginuid), - REG("sessionid", S_IRUSR, sessionid), + REG("loginuid", S_IWUSR|S_IRUGO, proc_loginuid_operations), + REG("sessionid", S_IRUSR, proc_sessionid_operations), #endif #ifdef CONFIG_FAULT_INJECTION - REG("make-it-fail", S_IRUGO|S_IWUSR, fault_inject), + REG("make-it-fail", S_IRUGO|S_IWUSR, proc_fault_inject_operations), #endif #ifdef CONFIG_TASK_IO_ACCOUNTING - INF("io", S_IRUGO, tid_io_accounting), + INF("io", S_IRUGO, proc_tid_io_accounting), #endif }; diff --git a/fs/proc/generic.c b/fs/proc/generic.c index 60a359b35582..03155ff075bf 100644 --- a/fs/proc/generic.c +++ b/fs/proc/generic.c @@ -14,7 +14,6 @@ #include <linux/stat.h> #include <linux/module.h> #include <linux/mount.h> -#include <linux/smp_lock.h> #include <linux/init.h> #include <linux/idr.h> #include <linux/namei.h> @@ -379,7 +378,6 @@ struct dentry *proc_lookup_de(struct proc_dir_entry *de, struct inode *dir, struct inode *inode = NULL; int error = -ENOENT; - lock_kernel(); spin_lock(&proc_subdir_lock); for (de = de->subdir; de ; de = de->next) { if (de->namelen != dentry->d_name.len) @@ -397,7 +395,6 @@ struct dentry *proc_lookup_de(struct proc_dir_entry *de, struct inode *dir, } spin_unlock(&proc_subdir_lock); out_unlock: - unlock_kernel(); if (inode) { dentry->d_op = &proc_dentry_operations; @@ -432,8 +429,6 @@ int proc_readdir_de(struct proc_dir_entry *de, struct file *filp, void *dirent, struct inode *inode = filp->f_path.dentry->d_inode; int ret = 0; - lock_kernel(); - ino = inode->i_ino; i = filp->f_pos; switch (i) { @@ -487,7 +482,7 @@ int proc_readdir_de(struct proc_dir_entry *de, struct file *filp, void *dirent, spin_unlock(&proc_subdir_lock); } ret = 1; -out: unlock_kernel(); +out: return ret; } @@ -504,6 +499,7 @@ int proc_readdir(struct file *filp, void *dirent, filldir_t filldir) * the /proc directory. */ static const struct file_operations proc_dir_operations = { + .llseek = generic_file_llseek, .read = generic_read_dir, .readdir = proc_readdir, }; @@ -532,7 +528,6 @@ static int proc_register(struct proc_dir_entry * dir, struct proc_dir_entry * dp dp->proc_fops = &proc_dir_operations; dp->proc_iops = &proc_dir_inode_operations; } - dir->nlink++; } else if (S_ISLNK(dp->mode)) { if (dp->proc_iops == NULL) dp->proc_iops = &proc_link_inode_operations; @@ -555,6 +550,8 @@ static int proc_register(struct proc_dir_entry * dir, struct proc_dir_entry * dp dp->next = dir->subdir; dp->parent = dir; dir->subdir = dp; + if (S_ISDIR(dp->mode)) + dir->nlink++; spin_unlock(&proc_subdir_lock); return 0; @@ -599,6 +596,24 @@ static struct proc_dir_entry *__proc_create(struct proc_dir_entry **parent, return ent; } +struct proc_dir_entry *proc_create_root(void) +{ + struct proc_dir_entry *ent, *parent = NULL; + + ent = __proc_create(&parent, "..", S_IFDIR | S_IRUGO | S_IXUGO, 2); + if (ent) { + ent->proc_fops = &proc_dir_operations; + ent->proc_iops = &proc_dir_inode_operations; + ent->low_ino = get_inode_number(); + ent->parent = ent; + if (!ent->low_ino) { + kfree(ent); + ent = NULL; + } + } + return ent; +} + struct proc_dir_entry *proc_symlink(const char *name, struct proc_dir_entry *parent, const char *dest) { @@ -639,23 +654,6 @@ struct proc_dir_entry *proc_mkdir_mode(const char *name, mode_t mode, return ent; } -struct proc_dir_entry *proc_net_mkdir(struct net *net, const char *name, - struct proc_dir_entry *parent) -{ - struct proc_dir_entry *ent; - - ent = __proc_create(&parent, name, S_IFDIR | S_IRUGO | S_IXUGO, 2); - if (ent) { - ent->data = net; - if (proc_register(parent, ent) < 0) { - kfree(ent); - ent = NULL; - } - } - return ent; -} -EXPORT_SYMBOL_GPL(proc_net_mkdir); - struct proc_dir_entry *proc_mkdir(const char *name, struct proc_dir_entry *parent) { @@ -758,6 +756,8 @@ void remove_proc_entry(const char *name, struct proc_dir_entry *parent) de = *p; *p = de->next; de->next = NULL; + if (S_ISDIR(de->mode)) + parent->nlink--; break; } } @@ -765,6 +765,11 @@ void remove_proc_entry(const char *name, struct proc_dir_entry *parent) if (!de) return; + release_proc_entry(de); +} + +void release_proc_entry(struct proc_dir_entry *de) +{ spin_lock(&de->pde_unload_lock); /* * Stop accepting new callers into module. If you're @@ -800,8 +805,6 @@ continue_removing: } spin_unlock(&de->pde_unload_lock); - if (S_ISDIR(de->mode)) - parent->nlink--; de->nlink = 0; WARN(de->subdir, KERN_WARNING "%s: removing non-empty directory " "'%s/%s', leaking at least '%s'\n", __func__, diff --git a/fs/proc/inode.c b/fs/proc/inode.c index bcb674275348..cf4a3aeb7f38 100644 --- a/fs/proc/inode.c +++ b/fs/proc/inode.c @@ -35,16 +35,13 @@ struct proc_dir_entry *de_get(struct proc_dir_entry *de) */ void de_put(struct proc_dir_entry *de) { - lock_kernel(); if (!atomic_read(&de->count)) { printk("de_put: entry %s already free!\n", de->name); - unlock_kernel(); return; } if (atomic_dec_and_test(&de->count)) free_proc_entry(de); - unlock_kernel(); } /* @@ -123,7 +120,7 @@ void __init proc_init_inodecache(void) proc_get_inodes, kick_inodes); } -static const struct super_operations proc_sops = { +const struct super_operations proc_sops = { .alloc_inode = proc_alloc_inode, .destroy_inode = proc_destroy_inode, .drop_inode = generic_delete_inode, diff --git a/fs/proc/internal.h b/fs/proc/internal.h index 3e8aeb8b61ce..5cfe79805ea5 100644 --- a/fs/proc/internal.h +++ b/fs/proc/internal.h @@ -60,9 +60,17 @@ extern const struct file_operations proc_numa_maps_operations; extern const struct file_operations proc_smaps_operations; extern const struct file_operations proc_clear_refs_operations; extern const struct file_operations proc_pagemap_operations; -extern const struct file_operations proc_net_operations; extern const struct inode_operations proc_net_inode_operations; +#ifdef CONFIG_NET +int proc_net_revalidate(struct task_struct *tsk, struct dentry *dentry, struct nameidata *nd); +#else +static inline int proc_net_revalidate(struct task_struct *tsk, struct dentry *dentry, struct nameidata *nd) +{ + return 1; +} +#endif + void free_proc_entry(struct proc_dir_entry *de); void proc_init_inodecache(void); @@ -86,6 +94,9 @@ struct dentry *proc_lookup_de(struct proc_dir_entry *de, struct inode *ino, struct dentry *dentry); int proc_readdir_de(struct proc_dir_entry *de, struct file *filp, void *dirent, filldir_t filldir); +struct proc_dir_entry *proc_create_root(void); +void release_proc_entry(struct proc_dir_entry *de); +extern const struct super_operations proc_sops; struct pde_opener { struct inode *inode; @@ -93,3 +104,5 @@ struct pde_opener { int (*release)(struct inode *, struct file *); struct list_head lh; }; + +extern struct list_head proc_automounts; diff --git a/fs/proc/proc_net.c b/fs/proc/proc_net.c index 7bc296f424ae..baaddad8436c 100644 --- a/fs/proc/proc_net.c +++ b/fs/proc/proc_net.c @@ -18,18 +18,19 @@ #include <linux/sched.h> #include <linux/module.h> #include <linux/bitops.h> -#include <linux/smp_lock.h> #include <linux/mount.h> #include <linux/nsproxy.h> +#include <linux/namei.h> #include <net/net_namespace.h> #include <linux/seq_file.h> #include "internal.h" +static struct file_system_type proc_net_fs_type; static struct net *get_proc_net(const struct inode *inode) { - return maybe_get_net(PDE_NET(PDE(inode))); + return maybe_get_net(inode->i_sb->s_fs_info); } int seq_open_net(struct inode *ino, struct file *f, @@ -118,65 +119,91 @@ static struct net *get_proc_task_net(struct inode *dir) return net; } -static struct dentry *proc_tgid_net_lookup(struct inode *dir, - struct dentry *dentry, struct nameidata *nd) +void *proc_net_follow_link(struct dentry *dentry, struct nameidata *nd) { - struct dentry *de; + /* Follow to a mount point of the proper network namespace. */ + struct vfsmount *mnt; struct net *net; - - de = ERR_PTR(-ENOENT); - net = get_proc_task_net(dir); - if (net != NULL) { - de = proc_lookup_de(net->proc_net, dir, dentry); - put_net(net); - } - return de; -} - -static int proc_tgid_net_getattr(struct vfsmount *mnt, struct dentry *dentry, - struct kstat *stat) -{ - struct inode *inode = dentry->d_inode; - struct net *net; - - net = get_proc_task_net(inode); - - generic_fillattr(inode, stat); - - if (net != NULL) { - stat->nlink = net->proc_net->nlink; - put_net(net); + int err = -ENOENT; + + net = get_proc_task_net(dentry->d_inode); + if (!net) + goto out_err; + + mnt = kern_mount_data(&proc_net_fs_type, net); + if (IS_ERR(mnt)) + goto out_err; + + dput(nd->path.dentry); + nd->path.dentry = dget(dentry); + + err = do_add_mount(mntget(mnt), &nd->path, MNT_SHRINKABLE, + &proc_automounts); + if (err < 0) { + mntput(mnt); + if (err == -EBUSY) + goto out_follow; + goto out_err; } - - return 0; + err = 0; + path_put(&nd->path); + nd->path.mnt = mnt; + nd->path.dentry = dget(mnt->mnt_root); + put_net(net); +out: + return ERR_PTR(err); +out_err: + path_put(&nd->path); + goto out; +out_follow: + /* We raced with ourselves so just walk the mounts */ + while (d_mountpoint(nd->path.dentry) && + follow_down(&nd->path.mnt, &nd->path.dentry)) + ; + err = 0; + goto out; } const struct inode_operations proc_net_inode_operations = { - .lookup = proc_tgid_net_lookup, - .getattr = proc_tgid_net_getattr, + .follow_link = proc_net_follow_link, }; -static int proc_tgid_net_readdir(struct file *filp, void *dirent, - filldir_t filldir) + +int proc_net_revalidate(struct task_struct *task, struct dentry *dentry, + struct nameidata *nd) { - int ret; - struct net *net; + struct inode *inode = dentry->d_inode; + struct dentry *tdentry; + struct vfsmount *tmnt; + int ret = 1; - ret = -EINVAL; - net = get_proc_task_net(filp->f_path.dentry->d_inode); - if (net != NULL) { - ret = proc_readdir_de(net->proc_net, filp, dirent, filldir); - put_net(net); + /* Are we talking about a proc/net mount point? */ + if (!nd || inode->i_op != &proc_net_inode_operations) + goto out; + + /* + * If the wrong filesystem is mounted on /proc/<pid>/net report the + * dentry is invalid. + */ + tmnt = mntget(nd->path.mnt); + tdentry = dget(dentry); + if (follow_down(&tmnt, &tdentry)) { + struct nsproxy *ns; + + rcu_read_lock(); + ns = task_nsproxy(task); + if ((ns == NULL) || + (tmnt->mnt_sb->s_magic != PROC_NET_SUPER_MAGIC) || + (tmnt->mnt_sb->s_fs_info != ns->net_ns)) + ret = 0; + rcu_read_unlock(); } + dput(tdentry); + mntput(tmnt); +out: return ret; } -const struct file_operations proc_net_operations = { - .read = generic_read_dir, - .readdir = proc_tgid_net_readdir, -}; - - struct proc_dir_entry *proc_net_fops_create(struct net *net, const char *name, mode_t mode, const struct file_operations *fops) { @@ -184,28 +211,108 @@ struct proc_dir_entry *proc_net_fops_create(struct net *net, } EXPORT_SYMBOL_GPL(proc_net_fops_create); +struct proc_dir_entry *proc_net_mkdir(struct net *net, const char *name, + struct proc_dir_entry *parent) +{ + if (!parent) + parent = net->proc_net; + return proc_mkdir(name, parent); +} +EXPORT_SYMBOL_GPL(proc_net_mkdir); + void proc_net_remove(struct net *net, const char *name) { remove_proc_entry(name, net->proc_net); } EXPORT_SYMBOL_GPL(proc_net_remove); +static int proc_net_fill_super(struct super_block *sb) +{ + struct net *net = sb->s_fs_info; + struct proc_dir_entry *netd = net->proc_net; + struct inode *root_inode = NULL; + + sb->s_flags |= MS_NODIRATIME | MS_NOSUID | MS_NOEXEC; + sb->s_blocksize = PAGE_SIZE; + sb->s_blocksize_bits = PAGE_SHIFT; + sb->s_magic = PROC_NET_SUPER_MAGIC; + sb->s_op = &proc_sops; + sb->s_time_gran = 1; + + de_get(netd); + root_inode = proc_get_inode(sb, netd->low_ino, netd); + if (!root_inode) + goto out_no_root; + root_inode->i_uid = 0; + root_inode->i_gid = 0; + sb->s_root = d_alloc_root(root_inode); + if (!sb->s_root) + goto out_no_root; + return 0; + +out_no_root: + printk("%s: get root inode failed\n", __func__); + iput(root_inode); + de_put(netd); + return -ENOMEM; +} + +static int proc_net_test_super(struct super_block *sb, void *data) +{ + return sb->s_fs_info == data; +} + +static int proc_net_set_super(struct super_block *sb, void *data) +{ + sb->s_fs_info = data; + return set_anon_super(sb, NULL); +} + +static int proc_net_get_sb(struct file_system_type *fs_type, + int flags, const char *dev_name, void *data, struct vfsmount *mnt) +{ + struct super_block *sb; + + if (!(flags & MS_KERNMOUNT)) + data = current->nsproxy->net_ns; + + sb = sget(fs_type, proc_net_test_super, proc_net_set_super, data); + if (IS_ERR(sb)) + return PTR_ERR(sb); + + if (!sb->s_root) { + int err; + sb->s_flags = flags; + err = proc_net_fill_super(sb); + if (err) { + up_write(&sb->s_umount); + deactivate_super(sb); + return err; + } + + sb->s_flags |= MS_ACTIVE; + } + + return simple_set_mnt(mnt, sb); +} + +static struct file_system_type proc_net_fs_type = { + .name = "proc/net", + .get_sb = proc_net_get_sb, + .kill_sb = kill_litter_super, +}; + static __net_init int proc_net_ns_init(struct net *net) { struct proc_dir_entry *netd, *net_statd; + struct vfsmount *mnt; int err; err = -ENOMEM; - netd = kzalloc(sizeof(*netd), GFP_KERNEL); + netd = proc_create_root(); if (!netd) goto out; - netd->data = net; - netd->nlink = 2; - netd->name = "net"; - netd->namelen = 3; - netd->parent = &proc_root; - err = -EEXIST; net_statd = proc_net_mkdir(net, "stat", netd); if (!net_statd) @@ -213,8 +320,17 @@ static __net_init int proc_net_ns_init(struct net *net) net->proc_net = netd; net->proc_net_stat = net_statd; + + mnt = kern_mount_data(&proc_net_fs_type, net); + if (IS_ERR(mnt)) + goto free_stat; + + net->proc_mnt = mnt; + return 0; +free_stat: + remove_proc_entry("stat", netd); free_net: kfree(netd); out: @@ -224,7 +340,14 @@ out: static __net_exit void proc_net_ns_exit(struct net *net) { remove_proc_entry("stat", net->proc_net); - kfree(net->proc_net); + release_proc_entry(net->proc_net); + /* + * We won't be looking up this super block any more so set s_fs_info to + * NULL to ensure it doesn't conflict with network namespaces allocated + * in the future at the same address. + */ + net->proc_mnt->mnt_sb->s_fs_info = NULL; + mntput(net->proc_mnt); } static struct pernet_operations __net_initdata proc_net_ns_ops = { @@ -235,6 +358,6 @@ static struct pernet_operations __net_initdata proc_net_ns_ops = { int __init proc_net_init(void) { proc_symlink("net", NULL, "self/net"); - + register_filesystem(&proc_net_fs_type); return register_pernet_subsys(&proc_net_ns_ops); } diff --git a/fs/proc/root.c b/fs/proc/root.c index 7761602af9de..f6299a25594e 100644 --- a/fs/proc/root.c +++ b/fs/proc/root.c @@ -16,7 +16,6 @@ #include <linux/sched.h> #include <linux/module.h> #include <linux/bitops.h> -#include <linux/smp_lock.h> #include <linux/mount.h> #include <linux/pid_namespace.h> @@ -162,17 +161,12 @@ static int proc_root_readdir(struct file * filp, unsigned int nr = filp->f_pos; int ret; - lock_kernel(); - if (nr < FIRST_PROCESS_ENTRY) { int error = proc_readdir(filp, dirent, filldir); - if (error <= 0) { - unlock_kernel(); + if (error <= 0) return error; - } filp->f_pos = FIRST_PROCESS_ENTRY; } - unlock_kernel(); ret = proc_pid_readdir(filp, dirent, filldir); return ret; diff --git a/include/linux/magic.h b/include/linux/magic.h index a07aa79593b7..ed0ab7dc2c74 100644 --- a/include/linux/magic.h +++ b/include/linux/magic.h @@ -30,6 +30,7 @@ #define NFS_SUPER_MAGIC 0x6969 #define OPENPROM_SUPER_MAGIC 0x9fa1 #define PROC_SUPER_MAGIC 0x9fa0 +#define PROC_NET_SUPER_MAGIC 0x706e6574 #define QNX4_SUPER_MAGIC 0x002f /* qnx4 fs detection */ #define REISERFS_SUPER_MAGIC 0x52654973 /* used by gcc */ diff --git a/include/linux/proc_fs.h b/include/linux/proc_fs.h index b8bdb96eff78..4f13811bdcbd 100644 --- a/include/linux/proc_fs.h +++ b/include/linux/proc_fs.h @@ -101,6 +101,7 @@ extern spinlock_t proc_subdir_lock; extern void proc_root_init(void); +void proc_shrink_automounts(void); void proc_flush_task(struct task_struct *task); struct dentry *proc_pid_lookup(struct inode *dir, struct dentry * dentry, struct nameidata *); int proc_pid_readdir(struct file * filp, void * dirent, filldir_t filldir); @@ -207,6 +208,10 @@ static inline void proc_flush_task(struct task_struct *task) { } +static inline void proc_shrink_automounts(void) +{ +} + static inline struct proc_dir_entry *create_proc_entry(const char *name, mode_t mode, struct proc_dir_entry *parent) { return NULL; } static inline struct proc_dir_entry *proc_create(const char *name, @@ -299,11 +304,6 @@ static inline struct proc_dir_entry *PDE(const struct inode *inode) return PROC_I(inode)->pde; } -static inline struct net *PDE_NET(struct proc_dir_entry *pde) -{ - return pde->parent->data; -} - struct proc_maps_private { struct pid *pid; struct task_struct *task; diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h index 6fc13d905c5f..055a82cd5f8d 100644 --- a/include/net/net_namespace.h +++ b/include/net/net_namespace.h @@ -41,6 +41,7 @@ struct net { struct proc_dir_entry *proc_net; struct proc_dir_entry *proc_net_stat; + struct vfsmount *proc_mnt; #ifdef CONFIG_SYSCTL struct ctl_table_set sysctls; diff --git a/kernel/exit.c b/kernel/exit.c index e69edc74aeeb..03cfc486bbc0 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -174,6 +174,7 @@ repeat: * can't be modifying its own credentials */ atomic_dec(&__task_cred(p)->user->processes); + proc_shrink_automounts(); proc_flush_task(p); write_lock_irq(&tasklist_lock); tracehook_finish_release_task(p); diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c index 51e8c75b00cc..9155fa9d6fad 100644 --- a/security/selinux/hooks.c +++ b/security/selinux/hooks.c @@ -698,7 +698,8 @@ static int selinux_set_mnt_opts(struct super_block *sb, goto out; } - if (strcmp(sb->s_type->name, "proc") == 0) + /* "proc", "proc/net" */ + if (strncmp(sb->s_type->name, "proc", 4) == 0) sbsec->proc = 1; /* Determine the labeling behavior to use for this filesystem type. */ @@ -1149,16 +1150,18 @@ static inline u16 socket_type_to_security_class(int family, int type, int protoc } #ifdef CONFIG_PROC_FS -static int selinux_proc_get_sid(struct proc_dir_entry *de, +static int selinux_proc_get_sid(struct super_block *sb, + struct proc_dir_entry *de, u16 tclass, u32 *sid) { int buflen, rc; char *buffer, *path, *end; + rc = -ENOMEM; buffer = (char *)__get_free_page(GFP_KERNEL); if (!buffer) - return -ENOMEM; + goto out; buflen = PAGE_SIZE; end = buffer+buflen; @@ -1169,19 +1172,32 @@ static int selinux_proc_get_sid(struct proc_dir_entry *de, while (de && de != de->parent) { buflen -= de->namelen + 1; if (buflen < 0) - break; + goto out_free; end -= de->namelen; memcpy(end, de->name, de->namelen); *--end = '/'; path = end; de = de->parent; } + if (strcmp(sb->s_type->name, "proc") != 0) { + const char *name = sb->s_type->name + 4; + int namelen = strlen(name); + buflen -= namelen; + if (buflen < 0) + goto out_free; + end -= namelen; + memcpy(end, name, namelen); + path = end; + } rc = security_genfs_sid("proc", path, tclass, sid); +out_free: free_page((unsigned long)buffer); +out: return rc; } #else -static int selinux_proc_get_sid(struct proc_dir_entry *de, +static int selinux_proc_get_sid(struct super_block *sb, + struct proc_dir_entry *de, u16 tclass, u32 *sid) { @@ -1330,7 +1346,8 @@ static int inode_doinit_with_dentry(struct inode *inode, struct dentry *opt_dent struct proc_inode *proci = PROC_I(inode); if (proci->pde) { isec->sclass = inode_mode_to_security_class(inode->i_mode); - rc = selinux_proc_get_sid(proci->pde, + rc = selinux_proc_get_sid(inode->i_sb, + proci->pde, isec->sclass, &sid); if (rc) |