diff options
Diffstat (limited to 'fs/proc')
-rw-r--r-- | fs/proc/Kconfig | 4 | ||||
-rw-r--r-- | fs/proc/array.c | 11 | ||||
-rw-r--r-- | fs/proc/base.c | 126 | ||||
-rw-r--r-- | fs/proc/generic.c | 9 | ||||
-rw-r--r-- | fs/proc/inode.c | 30 | ||||
-rw-r--r-- | fs/proc/meminfo.c | 8 | ||||
-rw-r--r-- | fs/proc/nommu.c | 1 | ||||
-rw-r--r-- | fs/proc/proc_net.c | 19 | ||||
-rw-r--r-- | fs/proc/proc_sysctl.c | 196 | ||||
-rw-r--r-- | fs/proc/root.c | 128 | ||||
-rw-r--r-- | fs/proc/self.c | 8 | ||||
-rw-r--r-- | fs/proc/task_mmu.c | 56 | ||||
-rw-r--r-- | fs/proc/task_nommu.c | 18 | ||||
-rw-r--r-- | fs/proc/thread_self.c | 8 | ||||
-rw-r--r-- | fs/proc/vmcore.c | 6 |
15 files changed, 475 insertions, 153 deletions
diff --git a/fs/proc/Kconfig b/fs/proc/Kconfig index 27ef84d99f59..971a42f6357d 100644 --- a/fs/proc/Kconfig +++ b/fs/proc/Kconfig @@ -23,7 +23,7 @@ config PROC_FS /proc" or the equivalent line in /etc/fstab does the job. The /proc file system is explained in the file - <file:Documentation/filesystems/proc.txt> and on the proc(5) manpage + <file:Documentation/filesystems/proc.rst> and on the proc(5) manpage ("man 5 proc"). This option will enlarge your kernel by about 67 KB. Several @@ -95,7 +95,7 @@ config PROC_CHILDREN default n help Provides a fast way to retrieve first level children pids of a task. See - <file:Documentation/filesystems/proc.txt> for more information. + <file:Documentation/filesystems/proc.rst> for more information. Say Y if you are running any user-space software which takes benefit from this interface. For example, rkt is such a piece of software. diff --git a/fs/proc/array.c b/fs/proc/array.c index 8e16f14bb05a..55ecbeb3a721 100644 --- a/fs/proc/array.c +++ b/fs/proc/array.c @@ -92,7 +92,6 @@ #include <linux/user_namespace.h> #include <linux/fs_struct.h> -#include <asm/pgtable.h> #include <asm/processor.h> #include "internal.h" @@ -248,8 +247,8 @@ void render_sigset_t(struct seq_file *m, const char *header, seq_putc(m, '\n'); } -static void collect_sigign_sigcatch(struct task_struct *p, sigset_t *ign, - sigset_t *catch) +static void collect_sigign_sigcatch(struct task_struct *p, sigset_t *sigign, + sigset_t *sigcatch) { struct k_sigaction *k; int i; @@ -257,9 +256,9 @@ static void collect_sigign_sigcatch(struct task_struct *p, sigset_t *ign, k = p->sighand->action; for (i = 1; i <= _NSIG; ++i, ++k) { if (k->sa.sa_handler == SIG_IGN) - sigaddset(ign, i); + sigaddset(sigign, i); else if (k->sa.sa_handler != SIG_DFL) - sigaddset(catch, i); + sigaddset(sigcatch, i); } } @@ -728,7 +727,7 @@ static int children_seq_show(struct seq_file *seq, void *v) { struct inode *inode = file_inode(seq->file); - seq_printf(seq, "%d ", pid_nr_ns(v, proc_pid_ns(inode))); + seq_printf(seq, "%d ", pid_nr_ns(v, proc_pid_ns(inode->i_sb))); return 0; } diff --git a/fs/proc/base.c b/fs/proc/base.c index 6042b646ab27..d86c0afc8a85 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -697,13 +697,21 @@ int proc_setattr(struct dentry *dentry, struct iattr *attr) * May current process learn task's sched/cmdline info (for hide_pid_min=1) * or euid/egid (for hide_pid_min=2)? */ -static bool has_pid_permissions(struct pid_namespace *pid, +static bool has_pid_permissions(struct proc_fs_info *fs_info, struct task_struct *task, - int hide_pid_min) + enum proc_hidepid hide_pid_min) { - if (pid->hide_pid < hide_pid_min) + /* + * If 'hidpid' mount option is set force a ptrace check, + * we indicate that we are using a filesystem syscall + * by passing PTRACE_MODE_READ_FSCREDS + */ + if (fs_info->hide_pid == HIDEPID_NOT_PTRACEABLE) + return ptrace_may_access(task, PTRACE_MODE_READ_FSCREDS); + + if (fs_info->hide_pid < hide_pid_min) return true; - if (in_group_p(pid->pid_gid)) + if (in_group_p(fs_info->pid_gid)) return true; return ptrace_may_access(task, PTRACE_MODE_READ_FSCREDS); } @@ -711,18 +719,18 @@ static bool has_pid_permissions(struct pid_namespace *pid, static int proc_pid_permission(struct inode *inode, int mask) { - struct pid_namespace *pid = proc_pid_ns(inode); + struct proc_fs_info *fs_info = proc_sb_info(inode->i_sb); struct task_struct *task; bool has_perms; task = get_proc_task(inode); if (!task) return -ESRCH; - has_perms = has_pid_permissions(pid, task, HIDEPID_NO_ACCESS); + has_perms = has_pid_permissions(fs_info, task, HIDEPID_NO_ACCESS); put_task_struct(task); if (!has_perms) { - if (pid->hide_pid == HIDEPID_INVISIBLE) { + if (fs_info->hide_pid == HIDEPID_INVISIBLE) { /* * Let's make getdents(), stat(), and open() * consistent with each other. If a process @@ -746,7 +754,7 @@ static const struct inode_operations proc_def_inode_operations = { static int proc_single_show(struct seq_file *m, void *v) { struct inode *inode = m->private; - struct pid_namespace *ns = proc_pid_ns(inode); + struct pid_namespace *ns = proc_pid_ns(inode->i_sb); struct pid *pid = proc_pid(inode); struct task_struct *task; int ret; @@ -1415,7 +1423,7 @@ static const struct file_operations proc_fail_nth_operations = { static int sched_show(struct seq_file *m, void *v) { struct inode *inode = m->private; - struct pid_namespace *ns = proc_pid_ns(inode); + struct pid_namespace *ns = proc_pid_ns(inode->i_sb); struct task_struct *p; p = get_proc_task(inode); @@ -1573,6 +1581,7 @@ static ssize_t timens_offsets_write(struct file *file, const char __user *buf, noffsets = 0; for (pos = kbuf; pos; pos = next_line) { struct proc_timens_offset *off = &offsets[noffsets]; + char clock[10]; int err; /* Find the end of line and ensure we don't look past it */ @@ -1584,10 +1593,21 @@ static ssize_t timens_offsets_write(struct file *file, const char __user *buf, next_line = NULL; } - err = sscanf(pos, "%u %lld %lu", &off->clockid, + err = sscanf(pos, "%9s %lld %lu", clock, &off->val.tv_sec, &off->val.tv_nsec); if (err != 3 || off->val.tv_nsec >= NSEC_PER_SEC) goto out; + + clock[sizeof(clock) - 1] = 0; + if (strcmp(clock, "monotonic") == 0 || + strcmp(clock, __stringify(CLOCK_MONOTONIC)) == 0) + off->clockid = CLOCK_MONOTONIC; + else if (strcmp(clock, "boottime") == 0 || + strcmp(clock, __stringify(CLOCK_BOOTTIME)) == 0) + off->clockid = CLOCK_BOOTTIME; + else + goto out; + noffsets++; if (noffsets == ARRAY_SIZE(offsets)) { if (next_line) @@ -1897,7 +1917,7 @@ int pid_getattr(const struct path *path, struct kstat *stat, u32 request_mask, unsigned int query_flags) { struct inode *inode = d_inode(path->dentry); - struct pid_namespace *pid = proc_pid_ns(inode); + struct proc_fs_info *fs_info = proc_sb_info(inode->i_sb); struct task_struct *task; generic_fillattr(inode, stat); @@ -1907,7 +1927,7 @@ int pid_getattr(const struct path *path, struct kstat *stat, rcu_read_lock(); task = pid_task(proc_pid(inode), PIDTYPE_PID); if (task) { - if (!has_pid_permissions(pid, task, HIDEPID_INVISIBLE)) { + if (!has_pid_permissions(fs_info, task, HIDEPID_INVISIBLE)) { rcu_read_unlock(); /* * This doesn't prevent learning whether PID exists, @@ -2092,11 +2112,11 @@ static int map_files_d_revalidate(struct dentry *dentry, unsigned int flags) goto out; if (!dname_to_vma_addr(dentry, &vm_start, &vm_end)) { - status = down_read_killable(&mm->mmap_sem); + status = mmap_read_lock_killable(mm); if (!status) { exact_vma_exists = !!find_exact_vma(mm, vm_start, vm_end); - up_read(&mm->mmap_sem); + mmap_read_unlock(mm); } } @@ -2143,7 +2163,7 @@ static int map_files_get_link(struct dentry *dentry, struct path *path) if (rc) goto out_mmput; - rc = down_read_killable(&mm->mmap_sem); + rc = mmap_read_lock_killable(mm); if (rc) goto out_mmput; @@ -2154,7 +2174,7 @@ static int map_files_get_link(struct dentry *dentry, struct path *path) path_get(path); rc = 0; } - up_read(&mm->mmap_sem); + mmap_read_unlock(mm); out_mmput: mmput(mm); @@ -2244,7 +2264,7 @@ static struct dentry *proc_map_files_lookup(struct inode *dir, goto out_put_task; result = ERR_PTR(-EINTR); - if (down_read_killable(&mm->mmap_sem)) + if (mmap_read_lock_killable(mm)) goto out_put_mm; result = ERR_PTR(-ENOENT); @@ -2257,7 +2277,7 @@ static struct dentry *proc_map_files_lookup(struct inode *dir, (void *)(unsigned long)vma->vm_file->f_mode); out_no_vma: - up_read(&mm->mmap_sem); + mmap_read_unlock(mm); out_put_mm: mmput(mm); out_put_task: @@ -2302,7 +2322,7 @@ proc_map_files_readdir(struct file *file, struct dir_context *ctx) if (!mm) goto out_put_task; - ret = down_read_killable(&mm->mmap_sem); + ret = mmap_read_lock_killable(mm); if (ret) { mmput(mm); goto out_put_task; @@ -2313,11 +2333,11 @@ proc_map_files_readdir(struct file *file, struct dir_context *ctx) /* * We need two passes here: * - * 1) Collect vmas of mapped files with mmap_sem taken - * 2) Release mmap_sem and instantiate entries + * 1) Collect vmas of mapped files with mmap_lock taken + * 2) Release mmap_lock and instantiate entries * * otherwise we get lockdep complained, since filldir() - * routine might require mmap_sem taken in might_fault(). + * routine might require mmap_lock taken in might_fault(). */ for (vma = mm->mmap, pos = 2; vma; vma = vma->vm_next) { @@ -2329,7 +2349,7 @@ proc_map_files_readdir(struct file *file, struct dir_context *ctx) p = genradix_ptr_alloc(&fa, nr_files++, GFP_KERNEL); if (!p) { ret = -ENOMEM; - up_read(&mm->mmap_sem); + mmap_read_unlock(mm); mmput(mm); goto out_put_task; } @@ -2338,7 +2358,7 @@ proc_map_files_readdir(struct file *file, struct dir_context *ctx) p->end = vma->vm_end; p->mode = vma->vm_file->f_mode; } - up_read(&mm->mmap_sem); + mmap_read_unlock(mm); mmput(mm); for (i = 0; i < nr_files; i++) { @@ -2458,7 +2478,7 @@ static int proc_timers_open(struct inode *inode, struct file *file) return -ENOMEM; tp->pid = proc_pid(inode); - tp->ns = proc_pid_ns(inode); + tp->ns = proc_pid_ns(inode->i_sb); return 0; } @@ -2758,6 +2778,15 @@ static const struct pid_entry smack_attr_dir_stuff[] = { LSM_DIR_OPS(smack); #endif +#ifdef CONFIG_SECURITY_APPARMOR +static const struct pid_entry apparmor_attr_dir_stuff[] = { + ATTR("apparmor", "current", 0666), + ATTR("apparmor", "prev", 0444), + ATTR("apparmor", "exec", 0666), +}; +LSM_DIR_OPS(apparmor); +#endif + static const struct pid_entry attr_dir_stuff[] = { ATTR(NULL, "current", 0666), ATTR(NULL, "prev", 0444), @@ -2769,6 +2798,10 @@ static const struct pid_entry attr_dir_stuff[] = { DIR("smack", 0555, proc_smack_attr_dir_inode_ops, proc_smack_attr_dir_ops), #endif +#ifdef CONFIG_SECURITY_APPARMOR + DIR("apparmor", 0555, + proc_apparmor_attr_dir_inode_ops, proc_apparmor_attr_dir_ops), +#endif }; static int proc_attr_dir_readdir(struct file *file, struct dir_context *ctx) @@ -3274,7 +3307,6 @@ static const struct inode_operations proc_tgid_base_inode_operations = { void proc_flush_pid(struct pid *pid) { proc_invalidate_siblings_dcache(&pid->inodes, &pid->lock); - put_pid(pid); } static struct dentry *proc_pid_instantiate(struct dentry * dentry, @@ -3301,6 +3333,7 @@ struct dentry *proc_pid_lookup(struct dentry *dentry, unsigned int flags) { struct task_struct *task; unsigned tgid; + struct proc_fs_info *fs_info; struct pid_namespace *ns; struct dentry *result = ERR_PTR(-ENOENT); @@ -3308,7 +3341,8 @@ struct dentry *proc_pid_lookup(struct dentry *dentry, unsigned int flags) if (tgid == ~0U) goto out; - ns = dentry->d_sb->s_fs_info; + fs_info = proc_sb_info(dentry->d_sb); + ns = fs_info->pid_ns; rcu_read_lock(); task = find_task_by_pid_ns(tgid, ns); if (task) @@ -3317,7 +3351,14 @@ struct dentry *proc_pid_lookup(struct dentry *dentry, unsigned int flags) if (!task) goto out; + /* Limit procfs to only ptraceable tasks */ + if (fs_info->hide_pid == HIDEPID_NOT_PTRACEABLE) { + if (!has_pid_permissions(fs_info, task, HIDEPID_NO_ACCESS)) + goto out_put_task; + } + result = proc_pid_instantiate(dentry, task, NULL); +out_put_task: put_task_struct(task); out: return result; @@ -3343,20 +3384,8 @@ retry: pid = find_ge_pid(iter.tgid, ns); if (pid) { iter.tgid = pid_nr_ns(pid, ns); - iter.task = pid_task(pid, PIDTYPE_PID); - /* What we to know is if the pid we have find is the - * pid of a thread_group_leader. Testing for task - * being a thread_group_leader is the obvious thing - * todo but there is a window when it fails, due to - * the pid transfer logic in de_thread. - * - * So we perform the straight forward test of seeing - * if the pid we have found is the pid of a thread - * group leader, and don't worry if the task we have - * found doesn't happen to be a thread group leader. - * As we don't care in the case of readdir. - */ - if (!iter.task || !has_group_leader_pid(iter.task)) { + iter.task = pid_task(pid, PIDTYPE_TGID); + if (!iter.task) { iter.tgid += 1; goto retry; } @@ -3372,20 +3401,21 @@ retry: int proc_pid_readdir(struct file *file, struct dir_context *ctx) { struct tgid_iter iter; - struct pid_namespace *ns = proc_pid_ns(file_inode(file)); + struct proc_fs_info *fs_info = proc_sb_info(file_inode(file)->i_sb); + struct pid_namespace *ns = proc_pid_ns(file_inode(file)->i_sb); loff_t pos = ctx->pos; if (pos >= PID_MAX_LIMIT + TGID_OFFSET) return 0; if (pos == TGID_OFFSET - 2) { - struct inode *inode = d_inode(ns->proc_self); + struct inode *inode = d_inode(fs_info->proc_self); if (!dir_emit(ctx, "self", 4, inode->i_ino, DT_LNK)) return 0; ctx->pos = pos = pos + 1; } if (pos == TGID_OFFSET - 1) { - struct inode *inode = d_inode(ns->proc_thread_self); + struct inode *inode = d_inode(fs_info->proc_thread_self); if (!dir_emit(ctx, "thread-self", 11, inode->i_ino, DT_LNK)) return 0; ctx->pos = pos = pos + 1; @@ -3399,7 +3429,7 @@ int proc_pid_readdir(struct file *file, struct dir_context *ctx) unsigned int len; cond_resched(); - if (!has_pid_permissions(ns, iter.task, HIDEPID_INVISIBLE)) + if (!has_pid_permissions(fs_info, iter.task, HIDEPID_INVISIBLE)) continue; len = snprintf(name, sizeof(name), "%u", iter.tgid); @@ -3599,6 +3629,7 @@ static struct dentry *proc_task_lookup(struct inode *dir, struct dentry * dentry struct task_struct *task; struct task_struct *leader = get_proc_task(dir); unsigned tid; + struct proc_fs_info *fs_info; struct pid_namespace *ns; struct dentry *result = ERR_PTR(-ENOENT); @@ -3609,7 +3640,8 @@ static struct dentry *proc_task_lookup(struct inode *dir, struct dentry * dentry if (tid == ~0U) goto out; - ns = dentry->d_sb->s_fs_info; + fs_info = proc_sb_info(dentry->d_sb); + ns = fs_info->pid_ns; rcu_read_lock(); task = find_task_by_pid_ns(tid, ns); if (task) @@ -3723,7 +3755,7 @@ static int proc_task_readdir(struct file *file, struct dir_context *ctx) /* f_version caches the tgid value that the last readdir call couldn't * return. lseek aka telldir automagically resets f_version to 0. */ - ns = proc_pid_ns(inode); + ns = proc_pid_ns(inode->i_sb); tid = (int)file->f_version; file->f_version = 0; for (task = first_tid(proc_pid(inode), tid, ctx->pos - 2, ns); diff --git a/fs/proc/generic.c b/fs/proc/generic.c index 4ed6dabdf6ff..2f9fa179194d 100644 --- a/fs/proc/generic.c +++ b/fs/proc/generic.c @@ -269,6 +269,11 @@ struct dentry *proc_lookup_de(struct inode *dir, struct dentry *dentry, struct dentry *proc_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags) { + struct proc_fs_info *fs_info = proc_sb_info(dir->i_sb); + + if (fs_info->pidonly == PROC_PIDONLY_ON) + return ERR_PTR(-ENOENT); + return proc_lookup_de(dir, dentry, PDE(dir)); } @@ -325,6 +330,10 @@ int proc_readdir_de(struct file *file, struct dir_context *ctx, int proc_readdir(struct file *file, struct dir_context *ctx) { struct inode *inode = file_inode(file); + struct proc_fs_info *fs_info = proc_sb_info(inode->i_sb); + + if (fs_info->pidonly == PROC_PIDONLY_ON) + return 1; return proc_readdir_de(file, ctx, PDE(inode)); } diff --git a/fs/proc/inode.c b/fs/proc/inode.c index fb4cace9ea41..f40c2532c057 100644 --- a/fs/proc/inode.c +++ b/fs/proc/inode.c @@ -24,6 +24,7 @@ #include <linux/seq_file.h> #include <linux/slab.h> #include <linux/mount.h> +#include <linux/bug.h> #include <linux/uaccess.h> @@ -165,15 +166,28 @@ void proc_invalidate_siblings_dcache(struct hlist_head *inodes, spinlock_t *lock deactivate_super(old_sb); } +static inline const char *hidepid2str(enum proc_hidepid v) +{ + switch (v) { + case HIDEPID_OFF: return "off"; + case HIDEPID_NO_ACCESS: return "noaccess"; + case HIDEPID_INVISIBLE: return "invisible"; + case HIDEPID_NOT_PTRACEABLE: return "ptraceable"; + } + WARN_ONCE(1, "bad hide_pid value: %d\n", v); + return "unknown"; +} + static int proc_show_options(struct seq_file *seq, struct dentry *root) { - struct super_block *sb = root->d_sb; - struct pid_namespace *pid = sb->s_fs_info; + struct proc_fs_info *fs_info = proc_sb_info(root->d_sb); - if (!gid_eq(pid->pid_gid, GLOBAL_ROOT_GID)) - seq_printf(seq, ",gid=%u", from_kgid_munged(&init_user_ns, pid->pid_gid)); - if (pid->hide_pid != HIDEPID_OFF) - seq_printf(seq, ",hidepid=%u", pid->hide_pid); + if (!gid_eq(fs_info->pid_gid, GLOBAL_ROOT_GID)) + seq_printf(seq, ",gid=%u", from_kgid_munged(&init_user_ns, fs_info->pid_gid)); + if (fs_info->hide_pid != HIDEPID_OFF) + seq_printf(seq, ",hidepid=%s", hidepid2str(fs_info->hide_pid)); + if (fs_info->pidonly != PROC_PIDONLY_OFF) + seq_printf(seq, ",subset=pid"); return 0; } @@ -464,6 +478,7 @@ proc_reg_get_unmapped_area(struct file *file, unsigned long orig_addr, static int proc_reg_open(struct inode *inode, struct file *file) { + struct proc_fs_info *fs_info = proc_sb_info(inode->i_sb); struct proc_dir_entry *pde = PDE(inode); int rv = 0; typeof_member(struct proc_ops, proc_open) open; @@ -477,6 +492,9 @@ static int proc_reg_open(struct inode *inode, struct file *file) return rv; } + if (fs_info->pidonly == PROC_PIDONLY_ON) + return -ENOENT; + /* * Ensure that * 1) PDE's ->release hook will be called no matter what diff --git a/fs/proc/meminfo.c b/fs/proc/meminfo.c index 8c1f1bb1a5ce..e9a6841fc25b 100644 --- a/fs/proc/meminfo.c +++ b/fs/proc/meminfo.c @@ -17,7 +17,6 @@ #include <linux/cma.h> #endif #include <asm/page.h> -#include <asm/pgtable.h> #include "internal.h" void __attribute__((weak)) arch_report_meminfo(struct seq_file *m) @@ -103,11 +102,14 @@ static int meminfo_proc_show(struct seq_file *m, void *v) show_val_kb(m, "SUnreclaim: ", sunreclaim); seq_printf(m, "KernelStack: %8lu kB\n", global_zone_page_state(NR_KERNEL_STACK_KB)); +#ifdef CONFIG_SHADOW_CALL_STACK + seq_printf(m, "ShadowCallStack:%8lu kB\n", + global_zone_page_state(NR_KERNEL_SCS_KB)); +#endif show_val_kb(m, "PageTables: ", global_zone_page_state(NR_PAGETABLE)); - show_val_kb(m, "NFS_Unstable: ", - global_node_page_state(NR_UNSTABLE_NFS)); + show_val_kb(m, "NFS_Unstable: ", 0); show_val_kb(m, "Bounce: ", global_zone_page_state(NR_BOUNCE)); show_val_kb(m, "WritebackTmp: ", diff --git a/fs/proc/nommu.c b/fs/proc/nommu.c index 14c2badb8fd9..13452b32e2bd 100644 --- a/fs/proc/nommu.c +++ b/fs/proc/nommu.c @@ -22,7 +22,6 @@ #include <linux/hugetlb.h> #include <linux/vmalloc.h> #include <linux/uaccess.h> -#include <asm/pgtable.h> #include <asm/tlb.h> #include <asm/div64.h> #include "internal.h" diff --git a/fs/proc/proc_net.c b/fs/proc/proc_net.c index 4888c5224442..dba63b2429f0 100644 --- a/fs/proc/proc_net.c +++ b/fs/proc/proc_net.c @@ -98,6 +98,25 @@ static const struct proc_ops proc_net_seq_ops = { .proc_release = seq_release_net, }; +int bpf_iter_init_seq_net(void *priv_data) +{ +#ifdef CONFIG_NET_NS + struct seq_net_private *p = priv_data; + + p->net = get_net(current->nsproxy->net_ns); +#endif + return 0; +} + +void bpf_iter_fini_seq_net(void *priv_data) +{ +#ifdef CONFIG_NET_NS + struct seq_net_private *p = priv_data; + + put_net(p->net); +#endif +} + struct proc_dir_entry *proc_create_net_data(const char *name, umode_t mode, struct proc_dir_entry *parent, const struct seq_operations *ops, unsigned int state_size, void *data) diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c index b6f5d459b087..42c5128c7d1c 100644 --- a/fs/proc/proc_sysctl.c +++ b/fs/proc/proc_sysctl.c @@ -14,6 +14,7 @@ #include <linux/mm.h> #include <linux/module.h> #include <linux/bpf-cgroup.h> +#include <linux/mount.h> #include "internal.h" static const struct dentry_operations proc_sys_dentry_operations; @@ -539,13 +540,13 @@ out: return err; } -static ssize_t proc_sys_call_handler(struct file *filp, void __user *buf, +static ssize_t proc_sys_call_handler(struct file *filp, void __user *ubuf, size_t count, loff_t *ppos, int write) { struct inode *inode = file_inode(filp); struct ctl_table_header *head = grab_header(inode); struct ctl_table *table = PROC_I(inode)->sysctl_entry; - void *new_buf = NULL; + void *kbuf; ssize_t error; if (IS_ERR(head)) @@ -564,27 +565,42 @@ static ssize_t proc_sys_call_handler(struct file *filp, void __user *buf, if (!table->proc_handler) goto out; - error = BPF_CGROUP_RUN_PROG_SYSCTL(head, table, write, buf, &count, - ppos, &new_buf); + /* don't even try if the size is too large */ + if (count > KMALLOC_MAX_SIZE) + return -ENOMEM; + + if (write) { + kbuf = memdup_user_nul(ubuf, count); + if (IS_ERR(kbuf)) { + error = PTR_ERR(kbuf); + goto out; + } + } else { + error = -ENOMEM; + kbuf = kzalloc(count, GFP_KERNEL); + if (!kbuf) + goto out; + } + + error = BPF_CGROUP_RUN_PROG_SYSCTL(head, table, write, &kbuf, &count, + ppos); if (error) - goto out; + goto out_free_buf; /* careful: calling conventions are nasty here */ - if (new_buf) { - mm_segment_t old_fs; - - old_fs = get_fs(); - set_fs(KERNEL_DS); - error = table->proc_handler(table, write, (void __user *)new_buf, - &count, ppos); - set_fs(old_fs); - kfree(new_buf); - } else { - error = table->proc_handler(table, write, buf, &count, ppos); + error = table->proc_handler(table, write, kbuf, &count, ppos); + if (error) + goto out_free_buf; + + if (!write) { + error = -EFAULT; + if (copy_to_user(ubuf, kbuf, count)) + goto out_free_buf; } - if (!error) - error = count; + error = count; +out_free_buf: + kfree(kbuf); out: sysctl_head_finish(head); @@ -1692,3 +1708,147 @@ int __init proc_sys_init(void) return sysctl_init(); } + +struct sysctl_alias { + const char *kernel_param; + const char *sysctl_param; +}; + +/* + * Historically some settings had both sysctl and a command line parameter. + * With the generic sysctl. parameter support, we can handle them at a single + * place and only keep the historical name for compatibility. This is not meant + * to add brand new aliases. When adding existing aliases, consider whether + * the possibly different moment of changing the value (e.g. from early_param + * to the moment do_sysctl_args() is called) is an issue for the specific + * parameter. + */ +static const struct sysctl_alias sysctl_aliases[] = { + {"hardlockup_all_cpu_backtrace", "kernel.hardlockup_all_cpu_backtrace" }, + {"hung_task_panic", "kernel.hung_task_panic" }, + {"numa_zonelist_order", "vm.numa_zonelist_order" }, + {"softlockup_all_cpu_backtrace", "kernel.softlockup_all_cpu_backtrace" }, + {"softlockup_panic", "kernel.softlockup_panic" }, + { } +}; + +static const char *sysctl_find_alias(char *param) +{ + const struct sysctl_alias *alias; + + for (alias = &sysctl_aliases[0]; alias->kernel_param != NULL; alias++) { + if (strcmp(alias->kernel_param, param) == 0) + return alias->sysctl_param; + } + + return NULL; +} + +/* Set sysctl value passed on kernel command line. */ +static int process_sysctl_arg(char *param, char *val, + const char *unused, void *arg) +{ + char *path; + struct vfsmount **proc_mnt = arg; + struct file_system_type *proc_fs_type; + struct file *file; + int len; + int err; + loff_t pos = 0; + ssize_t wret; + + if (strncmp(param, "sysctl", sizeof("sysctl") - 1) == 0) { + param += sizeof("sysctl") - 1; + + if (param[0] != '/' && param[0] != '.') + return 0; + + param++; + } else { + param = (char *) sysctl_find_alias(param); + if (!param) + return 0; + } + + /* + * To set sysctl options, we use a temporary mount of proc, look up the + * respective sys/ file and write to it. To avoid mounting it when no + * options were given, we mount it only when the first sysctl option is + * found. Why not a persistent mount? There are problems with a + * persistent mount of proc in that it forces userspace not to use any + * proc mount options. + */ + if (!*proc_mnt) { + proc_fs_type = get_fs_type("proc"); + if (!proc_fs_type) { + pr_err("Failed to find procfs to set sysctl from command line\n"); + return 0; + } + *proc_mnt = kern_mount(proc_fs_type); + put_filesystem(proc_fs_type); + if (IS_ERR(*proc_mnt)) { + pr_err("Failed to mount procfs to set sysctl from command line\n"); + return 0; + } + } + + path = kasprintf(GFP_KERNEL, "sys/%s", param); + if (!path) + panic("%s: Failed to allocate path for %s\n", __func__, param); + strreplace(path, '.', '/'); + + file = file_open_root((*proc_mnt)->mnt_root, *proc_mnt, path, O_WRONLY, 0); + if (IS_ERR(file)) { + err = PTR_ERR(file); + if (err == -ENOENT) + pr_err("Failed to set sysctl parameter '%s=%s': parameter not found\n", + param, val); + else if (err == -EACCES) + pr_err("Failed to set sysctl parameter '%s=%s': permission denied (read-only?)\n", + param, val); + else + pr_err("Error %pe opening proc file to set sysctl parameter '%s=%s'\n", + file, param, val); + goto out; + } + len = strlen(val); + wret = kernel_write(file, val, len, &pos); + if (wret < 0) { + err = wret; + if (err == -EINVAL) + pr_err("Failed to set sysctl parameter '%s=%s': invalid value\n", + param, val); + else + pr_err("Error %pe writing to proc file to set sysctl parameter '%s=%s'\n", + ERR_PTR(err), param, val); + } else if (wret != len) { + pr_err("Wrote only %zd bytes of %d writing to proc file %s to set sysctl parameter '%s=%s\n", + wret, len, path, param, val); + } + + err = filp_close(file, NULL); + if (err) + pr_err("Error %pe closing proc file to set sysctl parameter '%s=%s\n", + ERR_PTR(err), param, val); +out: + kfree(path); + return 0; +} + +void do_sysctl_args(void) +{ + char *command_line; + struct vfsmount *proc_mnt = NULL; + + command_line = kstrdup(saved_command_line, GFP_KERNEL); + if (!command_line) + panic("%s: Failed to allocate copy of command line\n", __func__); + + parse_args("Setting sysctl args", command_line, + NULL, 0, -1, -1, &proc_mnt, process_sysctl_arg); + + if (proc_mnt) + kern_unmount(proc_mnt); + + kfree(command_line); +} diff --git a/fs/proc/root.c b/fs/proc/root.c index 2633f10446c3..5e444d4f9717 100644 --- a/fs/proc/root.c +++ b/fs/proc/root.c @@ -32,21 +32,86 @@ struct proc_fs_context { struct pid_namespace *pid_ns; unsigned int mask; - int hidepid; + enum proc_hidepid hidepid; int gid; + enum proc_pidonly pidonly; }; enum proc_param { Opt_gid, Opt_hidepid, + Opt_subset, }; static const struct fs_parameter_spec proc_fs_parameters[] = { fsparam_u32("gid", Opt_gid), - fsparam_u32("hidepid", Opt_hidepid), + fsparam_string("hidepid", Opt_hidepid), + fsparam_string("subset", Opt_subset), {} }; +static inline int valid_hidepid(unsigned int value) +{ + return (value == HIDEPID_OFF || + value == HIDEPID_NO_ACCESS || + value == HIDEPID_INVISIBLE || + value == HIDEPID_NOT_PTRACEABLE); +} + +static int proc_parse_hidepid_param(struct fs_context *fc, struct fs_parameter *param) +{ + struct proc_fs_context *ctx = fc->fs_private; + struct fs_parameter_spec hidepid_u32_spec = fsparam_u32("hidepid", Opt_hidepid); + struct fs_parse_result result; + int base = (unsigned long)hidepid_u32_spec.data; + + if (param->type != fs_value_is_string) + return invalf(fc, "proc: unexpected type of hidepid value\n"); + + if (!kstrtouint(param->string, base, &result.uint_32)) { + if (!valid_hidepid(result.uint_32)) + return invalf(fc, "proc: unknown value of hidepid - %s\n", param->string); + ctx->hidepid = result.uint_32; + return 0; + } + + if (!strcmp(param->string, "off")) + ctx->hidepid = HIDEPID_OFF; + else if (!strcmp(param->string, "noaccess")) + ctx->hidepid = HIDEPID_NO_ACCESS; + else if (!strcmp(param->string, "invisible")) + ctx->hidepid = HIDEPID_INVISIBLE; + else if (!strcmp(param->string, "ptraceable")) + ctx->hidepid = HIDEPID_NOT_PTRACEABLE; + else + return invalf(fc, "proc: unknown value of hidepid - %s\n", param->string); + + return 0; +} + +static int proc_parse_subset_param(struct fs_context *fc, char *value) +{ + struct proc_fs_context *ctx = fc->fs_private; + + while (value) { + char *ptr = strchr(value, ','); + + if (ptr != NULL) + *ptr++ = '\0'; + + if (*value != '\0') { + if (!strcmp(value, "pid")) { + ctx->pidonly = PROC_PIDONLY_ON; + } else { + return invalf(fc, "proc: unsupported subset option - %s\n", value); + } + } + value = ptr; + } + + return 0; +} + static int proc_parse_param(struct fs_context *fc, struct fs_parameter *param) { struct proc_fs_context *ctx = fc->fs_private; @@ -63,10 +128,13 @@ static int proc_parse_param(struct fs_context *fc, struct fs_parameter *param) break; case Opt_hidepid: - ctx->hidepid = result.uint_32; - if (ctx->hidepid < HIDEPID_OFF || - ctx->hidepid > HIDEPID_INVISIBLE) - return invalfc(fc, "hidepid value must be between 0 and 2.\n"); + if (proc_parse_hidepid_param(fc, param)) + return -EINVAL; + break; + + case Opt_subset: + if (proc_parse_subset_param(fc, param->string) < 0) + return -EINVAL; break; default: @@ -77,26 +145,33 @@ static int proc_parse_param(struct fs_context *fc, struct fs_parameter *param) return 0; } -static void proc_apply_options(struct super_block *s, +static void proc_apply_options(struct proc_fs_info *fs_info, struct fs_context *fc, - struct pid_namespace *pid_ns, struct user_namespace *user_ns) { struct proc_fs_context *ctx = fc->fs_private; if (ctx->mask & (1 << Opt_gid)) - pid_ns->pid_gid = make_kgid(user_ns, ctx->gid); + fs_info->pid_gid = make_kgid(user_ns, ctx->gid); if (ctx->mask & (1 << Opt_hidepid)) - pid_ns->hide_pid = ctx->hidepid; + fs_info->hide_pid = ctx->hidepid; + if (ctx->mask & (1 << Opt_subset)) + fs_info->pidonly = ctx->pidonly; } static int proc_fill_super(struct super_block *s, struct fs_context *fc) { - struct pid_namespace *pid_ns = get_pid_ns(s->s_fs_info); + struct proc_fs_context *ctx = fc->fs_private; struct inode *root_inode; + struct proc_fs_info *fs_info; int ret; - proc_apply_options(s, fc, pid_ns, current_user_ns()); + fs_info = kzalloc(sizeof(*fs_info), GFP_KERNEL); + if (!fs_info) + return -ENOMEM; + + fs_info->pid_ns = get_pid_ns(ctx->pid_ns); + proc_apply_options(fs_info, fc, current_user_ns()); /* User space would break if executables or devices appear on proc */ s->s_iflags |= SB_I_USERNS_VISIBLE | SB_I_NOEXEC | SB_I_NODEV; @@ -106,6 +181,7 @@ static int proc_fill_super(struct super_block *s, struct fs_context *fc) s->s_magic = PROC_SUPER_MAGIC; s->s_op = &proc_sops; s->s_time_gran = 1; + s->s_fs_info = fs_info; /* * procfs isn't actually a stacking filesystem; however, there is @@ -113,7 +189,7 @@ static int proc_fill_super(struct super_block *s, struct fs_context *fc) * top of it */ s->s_stack_depth = FILESYSTEM_MAX_STACK_DEPTH; - + /* procfs dentries and inodes don't require IO to create */ s->s_shrink.seeks = 0; @@ -140,19 +216,17 @@ static int proc_fill_super(struct super_block *s, struct fs_context *fc) static int proc_reconfigure(struct fs_context *fc) { struct super_block *sb = fc->root->d_sb; - struct pid_namespace *pid = sb->s_fs_info; + struct proc_fs_info *fs_info = proc_sb_info(sb); sync_filesystem(sb); - proc_apply_options(sb, fc, pid, current_user_ns()); + proc_apply_options(fs_info, fc, current_user_ns()); return 0; } static int proc_get_tree(struct fs_context *fc) { - struct proc_fs_context *ctx = fc->fs_private; - - return get_tree_keyed(fc, proc_fill_super, ctx->pid_ns); + return get_tree_nodev(fc, proc_fill_super); } static void proc_fs_context_free(struct fs_context *fc) @@ -188,15 +262,19 @@ static int proc_init_fs_context(struct fs_context *fc) static void proc_kill_sb(struct super_block *sb) { - struct pid_namespace *ns; + struct proc_fs_info *fs_info = proc_sb_info(sb); + + if (!fs_info) { + kill_anon_super(sb); + return; + } + + dput(fs_info->proc_self); + dput(fs_info->proc_thread_self); - ns = (struct pid_namespace *)sb->s_fs_info; - if (ns->proc_self) - dput(ns->proc_self); - if (ns->proc_thread_self) - dput(ns->proc_thread_self); kill_anon_super(sb); - put_pid_ns(ns); + put_pid_ns(fs_info->pid_ns); + kfree(fs_info); } static struct file_system_type proc_fs_type = { diff --git a/fs/proc/self.c b/fs/proc/self.c index 57c0a1047250..ca5158fa561c 100644 --- a/fs/proc/self.c +++ b/fs/proc/self.c @@ -12,7 +12,7 @@ static const char *proc_self_get_link(struct dentry *dentry, struct inode *inode, struct delayed_call *done) { - struct pid_namespace *ns = proc_pid_ns(inode); + struct pid_namespace *ns = proc_pid_ns(inode->i_sb); pid_t tgid = task_tgid_nr_ns(current, ns); char *name; @@ -36,10 +36,10 @@ static unsigned self_inum __ro_after_init; int proc_setup_self(struct super_block *s) { struct inode *root_inode = d_inode(s->s_root); - struct pid_namespace *ns = proc_pid_ns(root_inode); + struct proc_fs_info *fs_info = proc_sb_info(s); struct dentry *self; int ret = -ENOMEM; - + inode_lock(root_inode); self = d_alloc_name(s->s_root, "self"); if (self) { @@ -62,7 +62,7 @@ int proc_setup_self(struct super_block *s) if (ret) pr_err("proc_fill_super: can't allocate /proc/self\n"); else - ns->proc_self = self; + fs_info->proc_self = self; return ret; } diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c index 8d382d4ec067..dbda4499a859 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c @@ -145,7 +145,7 @@ static void *m_start(struct seq_file *m, loff_t *ppos) return NULL; } - if (down_read_killable(&mm->mmap_sem)) { + if (mmap_read_lock_killable(mm)) { mmput(mm); put_task_struct(priv->task); priv->task = NULL; @@ -188,7 +188,7 @@ static void m_stop(struct seq_file *m, void *v) return; release_task_mempolicy(priv); - up_read(&mm->mmap_sem); + mmap_read_unlock(mm); mmput(mm); put_task_struct(priv->task); priv->task = NULL; @@ -546,10 +546,17 @@ static void smaps_pmd_entry(pmd_t *pmd, unsigned long addr, struct mem_size_stats *mss = walk->private; struct vm_area_struct *vma = walk->vma; bool locked = !!(vma->vm_flags & VM_LOCKED); - struct page *page; + struct page *page = NULL; - /* FOLL_DUMP will return -EFAULT on huge zero page */ - page = follow_trans_huge_pmd(vma, addr, pmd, FOLL_DUMP); + if (pmd_present(*pmd)) { + /* FOLL_DUMP will return -EFAULT on huge zero page */ + page = follow_trans_huge_pmd(vma, addr, pmd, FOLL_DUMP); + } else if (unlikely(thp_migration_supported() && is_swap_pmd(*pmd))) { + swp_entry_t entry = pmd_to_swp_entry(*pmd); + + if (is_migration_entry(entry)) + page = migration_entry_to_page(entry); + } if (IS_ERR_OR_NULL(page)) return; if (PageAnon(page)) @@ -578,8 +585,7 @@ static int smaps_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end, ptl = pmd_trans_huge_lock(pmd, vma); if (ptl) { - if (pmd_present(*pmd)) - smaps_pmd_entry(pmd, addr, walk); + smaps_pmd_entry(pmd, addr, walk); spin_unlock(ptl); goto out; } @@ -587,7 +593,7 @@ static int smaps_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end, if (pmd_trans_unstable(pmd)) goto out; /* - * The mmap_sem held all the way back in m_start() is what + * The mmap_lock held all the way back in m_start() is what * keeps khugepaged out of here and from collapsing things * in here. */ @@ -622,9 +628,6 @@ static void show_smap_vma_flags(struct seq_file *m, struct vm_area_struct *vma) [ilog2(VM_GROWSDOWN)] = "gd", [ilog2(VM_PFNMAP)] = "pf", [ilog2(VM_DENYWRITE)] = "dw", -#ifdef CONFIG_X86_INTEL_MPX - [ilog2(VM_MPX)] = "mp", -#endif [ilog2(VM_LOCKED)] = "lo", [ilog2(VM_IO)] = "io", [ilog2(VM_SEQ_READ)] = "sr", @@ -638,6 +641,9 @@ static void show_smap_vma_flags(struct seq_file *m, struct vm_area_struct *vma) [ilog2(VM_ARCH_1)] = "ar", [ilog2(VM_WIPEONFORK)] = "wf", [ilog2(VM_DONTDUMP)] = "dd", +#ifdef CONFIG_ARM64_BTI + [ilog2(VM_ARM64_BTI)] = "bt", +#endif #ifdef CONFIG_MEM_SOFT_DIRTY [ilog2(VM_SOFTDIRTY)] = "sd", #endif @@ -746,7 +752,7 @@ static void smap_gather_stats(struct vm_area_struct *vma, } } #endif - /* mmap_sem is held in m_start */ + /* mmap_lock is held in m_start */ walk_page_vma(vma, &smaps_walk_ops, mss); } @@ -841,7 +847,7 @@ static int show_smaps_rollup(struct seq_file *m, void *v) memset(&mss, 0, sizeof(mss)); - ret = down_read_killable(&mm->mmap_sem); + ret = mmap_read_lock_killable(mm); if (ret) goto out_put_mm; @@ -860,7 +866,7 @@ static int show_smaps_rollup(struct seq_file *m, void *v) __show_smap(m, &mss, true); release_task_mempolicy(priv); - up_read(&mm->mmap_sem); + mmap_read_unlock(mm); out_put_mm: mmput(mm); @@ -1134,7 +1140,7 @@ static ssize_t clear_refs_write(struct file *file, const char __user *buf, }; if (type == CLEAR_REFS_MM_HIWATER_RSS) { - if (down_write_killable(&mm->mmap_sem)) { + if (mmap_write_lock_killable(mm)) { count = -EINTR; goto out_mm; } @@ -1144,11 +1150,11 @@ static ssize_t clear_refs_write(struct file *file, const char __user *buf, * resident set size to this mm's current rss value. */ reset_mm_hiwater_rss(mm); - up_write(&mm->mmap_sem); + mmap_write_unlock(mm); goto out_mm; } - if (down_read_killable(&mm->mmap_sem)) { + if (mmap_read_lock_killable(mm)) { count = -EINTR; goto out_mm; } @@ -1157,8 +1163,8 @@ static ssize_t clear_refs_write(struct file *file, const char __user *buf, for (vma = mm->mmap; vma; vma = vma->vm_next) { if (!(vma->vm_flags & VM_SOFTDIRTY)) continue; - up_read(&mm->mmap_sem); - if (down_write_killable(&mm->mmap_sem)) { + mmap_read_unlock(mm); + if (mmap_write_lock_killable(mm)) { count = -EINTR; goto out_mm; } @@ -1177,14 +1183,14 @@ static ssize_t clear_refs_write(struct file *file, const char __user *buf, * failed like if * get_proc_task() fails? */ - up_write(&mm->mmap_sem); + mmap_write_unlock(mm); goto out_mm; } for (vma = mm->mmap; vma; vma = vma->vm_next) { vma->vm_flags &= ~VM_SOFTDIRTY; vma_set_page_prot(vma); } - downgrade_write(&mm->mmap_sem); + mmap_write_downgrade(mm); break; } @@ -1197,7 +1203,7 @@ static ssize_t clear_refs_write(struct file *file, const char __user *buf, if (type == CLEAR_REFS_SOFT_DIRTY) mmu_notifier_invalidate_range_end(&range); tlb_finish_mmu(&tlb, 0, -1); - up_read(&mm->mmap_sem); + mmap_read_unlock(mm); out_mm: mmput(mm); } @@ -1558,11 +1564,11 @@ static ssize_t pagemap_read(struct file *file, char __user *buf, /* overflow ? */ if (end < start_vaddr || end > end_vaddr) end = end_vaddr; - ret = down_read_killable(&mm->mmap_sem); + ret = mmap_read_lock_killable(mm); if (ret) goto out_free; ret = walk_page_range(mm, start_vaddr, end, &pagemap_ops, &pm); - up_read(&mm->mmap_sem); + mmap_read_unlock(mm); start_vaddr = end; len = min(count, PM_ENTRY_BYTES * pm.pos); @@ -1821,7 +1827,7 @@ static int show_numa_map(struct seq_file *m, void *v) if (is_vm_hugetlb_page(vma)) seq_puts(m, " huge"); - /* mmap_sem is held by m_start */ + /* mmap_lock is held by m_start */ walk_page_vma(vma, &show_numa_ops, md); if (!md->pages) diff --git a/fs/proc/task_nommu.c b/fs/proc/task_nommu.c index 7907e6419e57..a6d21fc0033c 100644 --- a/fs/proc/task_nommu.c +++ b/fs/proc/task_nommu.c @@ -25,7 +25,7 @@ void task_mem(struct seq_file *m, struct mm_struct *mm) struct rb_node *p; unsigned long bytes = 0, sbytes = 0, slack = 0, size; - down_read(&mm->mmap_sem); + mmap_read_lock(mm); for (p = rb_first(&mm->mm_rb); p; p = rb_next(p)) { vma = rb_entry(p, struct vm_area_struct, vm_rb); @@ -77,7 +77,7 @@ void task_mem(struct seq_file *m, struct mm_struct *mm) "Shared:\t%8lu bytes\n", bytes, slack, sbytes); - up_read(&mm->mmap_sem); + mmap_read_unlock(mm); } unsigned long task_vsize(struct mm_struct *mm) @@ -86,12 +86,12 @@ unsigned long task_vsize(struct mm_struct *mm) struct rb_node *p; unsigned long vsize = 0; - down_read(&mm->mmap_sem); + mmap_read_lock(mm); for (p = rb_first(&mm->mm_rb); p; p = rb_next(p)) { vma = rb_entry(p, struct vm_area_struct, vm_rb); vsize += vma->vm_end - vma->vm_start; } - up_read(&mm->mmap_sem); + mmap_read_unlock(mm); return vsize; } @@ -104,7 +104,7 @@ unsigned long task_statm(struct mm_struct *mm, struct rb_node *p; unsigned long size = kobjsize(mm); - down_read(&mm->mmap_sem); + mmap_read_lock(mm); for (p = rb_first(&mm->mm_rb); p; p = rb_next(p)) { vma = rb_entry(p, struct vm_area_struct, vm_rb); size += kobjsize(vma); @@ -119,7 +119,7 @@ unsigned long task_statm(struct mm_struct *mm, >> PAGE_SHIFT; *data = (PAGE_ALIGN(mm->start_stack) - (mm->start_data & PAGE_MASK)) >> PAGE_SHIFT; - up_read(&mm->mmap_sem); + mmap_read_unlock(mm); size >>= PAGE_SHIFT; size += *text + *data; *resident = size; @@ -211,7 +211,7 @@ static void *m_start(struct seq_file *m, loff_t *pos) if (!mm || !mmget_not_zero(mm)) return NULL; - if (down_read_killable(&mm->mmap_sem)) { + if (mmap_read_lock_killable(mm)) { mmput(mm); return ERR_PTR(-EINTR); } @@ -221,7 +221,7 @@ static void *m_start(struct seq_file *m, loff_t *pos) if (n-- == 0) return p; - up_read(&mm->mmap_sem); + mmap_read_unlock(mm); mmput(mm); return NULL; } @@ -231,7 +231,7 @@ static void m_stop(struct seq_file *m, void *_vml) struct proc_maps_private *priv = m->private; if (!IS_ERR_OR_NULL(_vml)) { - up_read(&priv->mm->mmap_sem); + mmap_read_unlock(priv->mm); mmput(priv->mm); } if (priv->task) { diff --git a/fs/proc/thread_self.c b/fs/proc/thread_self.c index f61ae53533f5..ac284f409568 100644 --- a/fs/proc/thread_self.c +++ b/fs/proc/thread_self.c @@ -12,7 +12,7 @@ static const char *proc_thread_self_get_link(struct dentry *dentry, struct inode *inode, struct delayed_call *done) { - struct pid_namespace *ns = proc_pid_ns(inode); + struct pid_namespace *ns = proc_pid_ns(inode->i_sb); pid_t tgid = task_tgid_nr_ns(current, ns); pid_t pid = task_pid_nr_ns(current, ns); char *name; @@ -36,7 +36,7 @@ static unsigned thread_self_inum __ro_after_init; int proc_setup_thread_self(struct super_block *s) { struct inode *root_inode = d_inode(s->s_root); - struct pid_namespace *ns = proc_pid_ns(root_inode); + struct proc_fs_info *fs_info = proc_sb_info(s); struct dentry *thread_self; int ret = -ENOMEM; @@ -60,9 +60,9 @@ int proc_setup_thread_self(struct super_block *s) inode_unlock(root_inode); if (ret) - pr_err("proc_fill_super: can't allocate /proc/thread_self\n"); + pr_err("proc_fill_super: can't allocate /proc/thread-self\n"); else - ns->proc_thread_self = thread_self; + fs_info->proc_thread_self = thread_self; return ret; } diff --git a/fs/proc/vmcore.c b/fs/proc/vmcore.c index 7dc800cce354..c3a345c28a93 100644 --- a/fs/proc/vmcore.c +++ b/fs/proc/vmcore.c @@ -27,7 +27,6 @@ #include <linux/pagemap.h> #include <linux/uaccess.h> #include <linux/mem_encrypt.h> -#include <asm/pgtable.h> #include <asm/io.h> #include "internal.h" @@ -266,7 +265,8 @@ static int vmcoredd_mmap_dumps(struct vm_area_struct *vma, unsigned long dst, if (start < offset + dump->size) { tsz = min(offset + (u64)dump->size - start, (u64)size); buf = dump->buf + start - offset; - if (remap_vmalloc_range_partial(vma, dst, buf, tsz)) { + if (remap_vmalloc_range_partial(vma, dst, buf, 0, + tsz)) { ret = -EFAULT; goto out_unlock; } @@ -624,7 +624,7 @@ static int mmap_vmcore(struct file *file, struct vm_area_struct *vma) tsz = min(elfcorebuf_sz + elfnotes_sz - (size_t)start, size); kaddr = elfnotes_buf + start - elfcorebuf_sz - vmcoredd_orig_sz; if (remap_vmalloc_range_partial(vma, vma->vm_start + len, - kaddr, tsz)) + kaddr, 0, tsz)) goto fail; size -= tsz; |