diff options
Diffstat (limited to 'fs/proc/base.c')
-rw-r--r-- | fs/proc/base.c | 356 |
1 files changed, 177 insertions, 179 deletions
diff --git a/fs/proc/base.c b/fs/proc/base.c index ca651ac00660..c87b6b9a8a76 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -47,7 +47,7 @@ * Overall revision about smaps. */ -#include <asm/uaccess.h> +#include <linux/uaccess.h> #include <linux/errno.h> #include <linux/time.h> @@ -85,6 +85,11 @@ #include <linux/user_namespace.h> #include <linux/fs_struct.h> #include <linux/slab.h> +#include <linux/sched/autogroup.h> +#include <linux/sched/mm.h> +#include <linux/sched/coredump.h> +#include <linux/sched/debug.h> +#include <linux/sched/stat.h> #include <linux/flex_array.h> #include <linux/posix-timers.h> #ifdef CONFIG_HARDWALL @@ -104,9 +109,12 @@ * in /proc for a task before it execs a suid executable. */ +static u8 nlink_tid; +static u8 nlink_tgid; + struct pid_entry { const char *name; - int len; + unsigned int len; umode_t mode; const struct inode_operations *iop; const struct file_operations *fop; @@ -139,13 +147,13 @@ struct pid_entry { * Count the number of hardlinks for the pid_entry table, excluding the . * and .. links. */ -static unsigned int pid_entry_count_dirs(const struct pid_entry *entries, +static unsigned int __init pid_entry_nlink(const struct pid_entry *entries, unsigned int n) { unsigned int i; unsigned int count; - count = 0; + count = 2; for (i = 0; i < n; ++i) { if (S_ISDIR(entries[i].mode)) ++count; @@ -289,101 +297,69 @@ static ssize_t proc_pid_cmdline_read(struct file *file, char __user *buf, } } else { /* - * Command line (1 string) occupies ARGV and maybe - * extends into ENVP. - */ - if (len1 + len2 <= *pos) - goto skip_argv_envp; - if (len1 <= *pos) - goto skip_argv; - - p = arg_start + *pos; - len = len1 - *pos; - while (count > 0 && len > 0) { - unsigned int _count, l; - int nr_read; - bool final; - - _count = min3(count, len, PAGE_SIZE); - nr_read = access_remote_vm(mm, p, page, _count, 0); - if (nr_read < 0) - rv = nr_read; - if (nr_read <= 0) - goto out_free_page; - - /* - * Command line can be shorter than whole ARGV - * even if last "marker" byte says it is not. - */ - final = false; - l = strnlen(page, nr_read); - if (l < nr_read) { - nr_read = l; - final = true; - } - - if (copy_to_user(buf, page, nr_read)) { - rv = -EFAULT; - goto out_free_page; - } - - p += nr_read; - len -= nr_read; - buf += nr_read; - count -= nr_read; - rv += nr_read; - - if (final) - goto out_free_page; - } -skip_argv: - /* * Command line (1 string) occupies ARGV and * extends into ENVP. */ - if (len1 <= *pos) { - p = env_start + *pos - len1; - len = len1 + len2 - *pos; - } else { - p = env_start; - len = len2; + struct { + unsigned long p; + unsigned long len; + } cmdline[2] = { + { .p = arg_start, .len = len1 }, + { .p = env_start, .len = len2 }, + }; + loff_t pos1 = *pos; + unsigned int i; + + i = 0; + while (i < 2 && pos1 >= cmdline[i].len) { + pos1 -= cmdline[i].len; + i++; } - while (count > 0 && len > 0) { - unsigned int _count, l; - int nr_read; - bool final; - - _count = min3(count, len, PAGE_SIZE); - nr_read = access_remote_vm(mm, p, page, _count, 0); - if (nr_read < 0) - rv = nr_read; - if (nr_read <= 0) - goto out_free_page; - - /* Find EOS. */ - final = false; - l = strnlen(page, nr_read); - if (l < nr_read) { - nr_read = l; - final = true; - } - - if (copy_to_user(buf, page, nr_read)) { - rv = -EFAULT; - goto out_free_page; + while (i < 2) { + p = cmdline[i].p + pos1; + len = cmdline[i].len - pos1; + while (count > 0 && len > 0) { + unsigned int _count, l; + int nr_read; + bool final; + + _count = min3(count, len, PAGE_SIZE); + nr_read = access_remote_vm(mm, p, page, _count, 0); + if (nr_read < 0) + rv = nr_read; + if (nr_read <= 0) + goto out_free_page; + + /* + * Command line can be shorter than whole ARGV + * even if last "marker" byte says it is not. + */ + final = false; + l = strnlen(page, nr_read); + if (l < nr_read) { + nr_read = l; + final = true; + } + + if (copy_to_user(buf, page, nr_read)) { + rv = -EFAULT; + goto out_free_page; + } + + p += nr_read; + len -= nr_read; + buf += nr_read; + count -= nr_read; + rv += nr_read; + + if (final) + goto out_free_page; } - p += nr_read; - len -= nr_read; - buf += nr_read; - count -= nr_read; - rv += nr_read; - - if (final) - goto out_free_page; + /* Only first chunk can be read partially. */ + pos1 = 0; + i++; } -skip_argv_envp: - ; } out_free_page: @@ -726,11 +702,11 @@ static int proc_pid_permission(struct inode *inode, int mask) task = get_proc_task(inode); if (!task) return -ESRCH; - has_perms = has_pid_permissions(pid, task, 1); + has_perms = has_pid_permissions(pid, task, HIDEPID_NO_ACCESS); put_task_struct(task); if (!has_perms) { - if (pid->hide_pid == 2) { + if (pid->hide_pid == HIDEPID_INVISIBLE) { /* * Let's make getdents(), stat(), and open() * consistent with each other. If a process @@ -795,7 +771,7 @@ struct mm_struct *proc_mem_open(struct inode *inode, unsigned int mode) if (!IS_ERR_OR_NULL(mm)) { /* ensure this mm_struct can't be freed */ - atomic_inc(&mm->mm_count); + mmgrab(mm); /* but do not pin its memory */ mmput(mm); } @@ -842,7 +818,7 @@ static ssize_t mem_rw(struct file *file, char __user *buf, return -ENOMEM; copied = 0; - if (!atomic_inc_not_zero(&mm->mm_users)) + if (!mmget_not_zero(mm)) goto free; /* Maybe we should limit FOLL_FORCE to actual ptrace users? */ @@ -950,7 +926,7 @@ static ssize_t environ_read(struct file *file, char __user *buf, return -ENOMEM; ret = 0; - if (!atomic_inc_not_zero(&mm->mm_users)) + if (!mmget_not_zero(mm)) goto free; down_read(&mm->mmap_sem); @@ -1093,7 +1069,7 @@ static int __set_oom_adj(struct file *file, int oom_adj, bool legacy) if (p) { if (atomic_read(&p->mm->mm_users) > 1) { mm = p->mm; - atomic_inc(&mm->mm_count); + mmgrab(mm); } task_unlock(p); } @@ -1243,7 +1219,7 @@ static const struct file_operations proc_oom_score_adj_operations = { }; #ifdef CONFIG_AUDITSYSCALL -#define TMPBUFLEN 21 +#define TMPBUFLEN 11 static ssize_t proc_loginuid_read(struct file * file, char __user * buf, size_t count, loff_t *ppos) { @@ -1664,11 +1640,63 @@ const struct inode_operations proc_pid_link_inode_operations = { /* building an inode */ -struct inode *proc_pid_make_inode(struct super_block * sb, struct task_struct *task) +void task_dump_owner(struct task_struct *task, mode_t mode, + kuid_t *ruid, kgid_t *rgid) +{ + /* Depending on the state of dumpable compute who should own a + * proc file for a task. + */ + const struct cred *cred; + kuid_t uid; + kgid_t gid; + + /* Default to the tasks effective ownership */ + rcu_read_lock(); + cred = __task_cred(task); + uid = cred->euid; + gid = cred->egid; + rcu_read_unlock(); + + /* + * Before the /proc/pid/status file was created the only way to read + * the effective uid of a /process was to stat /proc/pid. Reading + * /proc/pid/status is slow enough that procps and other packages + * kept stating /proc/pid. To keep the rules in /proc simple I have + * made this apply to all per process world readable and executable + * directories. + */ + if (mode != (S_IFDIR|S_IRUGO|S_IXUGO)) { + struct mm_struct *mm; + task_lock(task); + mm = task->mm; + /* Make non-dumpable tasks owned by some root */ + if (mm) { + if (get_dumpable(mm) != SUID_DUMP_USER) { + struct user_namespace *user_ns = mm->user_ns; + + uid = make_kuid(user_ns, 0); + if (!uid_valid(uid)) + uid = GLOBAL_ROOT_UID; + + gid = make_kgid(user_ns, 0); + if (!gid_valid(gid)) + gid = GLOBAL_ROOT_GID; + } + } else { + uid = GLOBAL_ROOT_UID; + gid = GLOBAL_ROOT_GID; + } + task_unlock(task); + } + *ruid = uid; + *rgid = gid; +} + +struct inode *proc_pid_make_inode(struct super_block * sb, + struct task_struct *task, umode_t mode) { struct inode * inode; struct proc_inode *ei; - const struct cred *cred; /* We need a new inode */ @@ -1678,6 +1706,7 @@ struct inode *proc_pid_make_inode(struct super_block * sb, struct task_struct *t /* Common stuff */ ei = PROC_I(inode); + inode->i_mode = mode; inode->i_ino = get_next_ino(); inode->i_mtime = inode->i_atime = inode->i_ctime = current_time(inode); inode->i_op = &proc_def_inode_operations; @@ -1689,13 +1718,7 @@ struct inode *proc_pid_make_inode(struct super_block * sb, struct task_struct *t if (!ei->pid) goto out_unlock; - if (task_dumpable(task)) { - rcu_read_lock(); - cred = __task_cred(task); - inode->i_uid = cred->euid; - inode->i_gid = cred->egid; - rcu_read_unlock(); - } + task_dump_owner(task, 0, &inode->i_uid, &inode->i_gid); security_task_to_inode(task, inode); out: @@ -1706,12 +1729,12 @@ out_unlock: return NULL; } -int pid_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat) +int pid_getattr(const struct path *path, struct kstat *stat, + u32 request_mask, unsigned int query_flags) { - struct inode *inode = d_inode(dentry); + struct inode *inode = d_inode(path->dentry); struct task_struct *task; - const struct cred *cred; - struct pid_namespace *pid = dentry->d_sb->s_fs_info; + struct pid_namespace *pid = path->dentry->d_sb->s_fs_info; generic_fillattr(inode, stat); @@ -1720,7 +1743,7 @@ int pid_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat) stat->gid = GLOBAL_ROOT_GID; task = pid_task(proc_pid(inode), PIDTYPE_PID); if (task) { - if (!has_pid_permissions(pid, task, 2)) { + if (!has_pid_permissions(pid, task, HIDEPID_INVISIBLE)) { rcu_read_unlock(); /* * This doesn't prevent learning whether PID exists, @@ -1728,12 +1751,7 @@ int pid_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat) */ return -ENOENT; } - if ((inode->i_mode == (S_IFDIR|S_IRUGO|S_IXUGO)) || - task_dumpable(task)) { - cred = __task_cred(task); - stat->uid = cred->euid; - stat->gid = cred->egid; - } + task_dump_owner(task, inode->i_mode, &stat->uid, &stat->gid); } rcu_read_unlock(); return 0; @@ -1749,18 +1767,11 @@ int pid_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat) * Rewrite the inode's ownerships here because the owning task may have * performed a setuid(), etc. * - * Before the /proc/pid/status file was created the only way to read - * the effective uid of a /process was to stat /proc/pid. Reading - * /proc/pid/status is slow enough that procps and other packages - * kept stating /proc/pid. To keep the rules in /proc simple I have - * made this apply to all per process world readable and executable - * directories. */ int pid_revalidate(struct dentry *dentry, unsigned int flags) { struct inode *inode; struct task_struct *task; - const struct cred *cred; if (flags & LOOKUP_RCU) return -ECHILD; @@ -1769,17 +1780,8 @@ int pid_revalidate(struct dentry *dentry, unsigned int flags) task = get_proc_task(inode); if (task) { - if ((inode->i_mode == (S_IFDIR|S_IRUGO|S_IXUGO)) || - task_dumpable(task)) { - rcu_read_lock(); - cred = __task_cred(task); - inode->i_uid = cred->euid; - inode->i_gid = cred->egid; - rcu_read_unlock(); - } else { - inode->i_uid = GLOBAL_ROOT_UID; - inode->i_gid = GLOBAL_ROOT_GID; - } + task_dump_owner(task, inode->i_mode, &inode->i_uid, &inode->i_gid); + inode->i_mode &= ~(S_ISUID | S_ISGID); security_task_to_inode(task, inode); put_task_struct(task); @@ -1876,7 +1878,6 @@ static int map_files_d_revalidate(struct dentry *dentry, unsigned int flags) bool exact_vma_exists = false; struct mm_struct *mm = NULL; struct task_struct *task; - const struct cred *cred; struct inode *inode; int status = 0; @@ -1901,16 +1902,8 @@ static int map_files_d_revalidate(struct dentry *dentry, unsigned int flags) mmput(mm); if (exact_vma_exists) { - if (task_dumpable(task)) { - rcu_read_lock(); - cred = __task_cred(task); - inode->i_uid = cred->euid; - inode->i_gid = cred->egid; - rcu_read_unlock(); - } else { - inode->i_uid = GLOBAL_ROOT_UID; - inode->i_gid = GLOBAL_ROOT_GID; - } + task_dump_owner(task, 0, &inode->i_uid, &inode->i_gid); + security_task_to_inode(task, inode); status = 1; } @@ -1967,7 +1960,7 @@ out: struct map_files_info { fmode_t mode; - unsigned long len; + unsigned int len; unsigned char name[4*sizeof(long)+2]; /* max: %lx-%lx\0 */ }; @@ -2004,7 +1997,9 @@ proc_map_files_instantiate(struct inode *dir, struct dentry *dentry, struct proc_inode *ei; struct inode *inode; - inode = proc_pid_make_inode(dir->i_sb, task); + inode = proc_pid_make_inode(dir->i_sb, task, S_IFLNK | + ((mode & FMODE_READ ) ? S_IRUSR : 0) | + ((mode & FMODE_WRITE) ? S_IWUSR : 0)); if (!inode) return -ENOENT; @@ -2013,12 +2008,6 @@ proc_map_files_instantiate(struct inode *dir, struct dentry *dentry, inode->i_op = &proc_map_files_link_inode_operations; inode->i_size = 64; - inode->i_mode = S_IFLNK; - - if (mode & FMODE_READ) - inode->i_mode |= S_IRUSR; - if (mode & FMODE_WRITE) - inode->i_mode |= S_IWUSR; d_set_d_op(dentry, &tid_map_files_dentry_operations); d_add(dentry, inode); @@ -2178,7 +2167,7 @@ static const struct file_operations proc_map_files_operations = { .llseek = generic_file_llseek, }; -#ifdef CONFIG_CHECKPOINT_RESTORE +#if defined(CONFIG_CHECKPOINT_RESTORE) && defined(CONFIG_POSIX_TIMERS) struct timers_private { struct pid *pid; struct task_struct *task; @@ -2372,12 +2361,11 @@ static int proc_pident_instantiate(struct inode *dir, struct inode *inode; struct proc_inode *ei; - inode = proc_pid_make_inode(dir->i_sb, task); + inode = proc_pid_make_inode(dir->i_sb, task, p->mode); if (!inode) goto out; ei = PROC_I(inode); - inode->i_mode = p->mode; if (S_ISDIR(inode->i_mode)) set_nlink(inode, 2); /* Use getattr to fix if necessary */ if (p->iop) @@ -2412,14 +2400,14 @@ static struct dentry *proc_pident_lookup(struct inode *dir, * Yes, it does not scale. And it should not. Don't add * new entries into /proc/<tgid>/ without very good reasons. */ - last = &ents[nents - 1]; - for (p = ents; p <= last; p++) { + last = &ents[nents]; + for (p = ents; p < last; p++) { if (p->len != dentry->d_name.len) continue; if (!memcmp(dentry->d_name.name, p->name, p->len)) break; } - if (p > last) + if (p >= last) goto out; error = proc_pident_instantiate(dir, dentry, task, p); @@ -2444,7 +2432,7 @@ static int proc_pident_readdir(struct file *file, struct dir_context *ctx, if (ctx->pos >= nents + 2) goto out; - for (p = ents + (ctx->pos - 2); p <= ents + nents - 1; p++) { + for (p = ents + (ctx->pos - 2); p < ents + nents; p++) { if (!proc_fill_cache(file, ctx, p->name, p->len, proc_pident_instantiate, task, p)) break; @@ -2488,6 +2476,12 @@ static ssize_t proc_pid_attr_write(struct file * file, const char __user * buf, length = -ESRCH; if (!task) goto out_no_task; + + /* A task may only write its own attributes. */ + length = -EACCES; + if (current != task) + goto out; + if (count > PAGE_SIZE) count = PAGE_SIZE; @@ -2503,14 +2497,13 @@ static ssize_t proc_pid_attr_write(struct file * file, const char __user * buf, } /* Guard against adverse ptrace interaction */ - length = mutex_lock_interruptible(&task->signal->cred_guard_mutex); + length = mutex_lock_interruptible(¤t->signal->cred_guard_mutex); if (length < 0) goto out_free; - length = security_setprocattr(task, - (char*)file->f_path.dentry->d_name.name, + length = security_setprocattr(file->f_path.dentry->d_name.name, page, count); - mutex_unlock(&task->signal->cred_guard_mutex); + mutex_unlock(¤t->signal->cred_guard_mutex); out_free: kfree(page); out: @@ -2936,7 +2929,7 @@ static const struct pid_entry tgid_base_stuff[] = { REG("projid_map", S_IRUGO|S_IWUSR, proc_projid_map_operations), REG("setgroups", S_IRUGO|S_IWUSR, proc_setgroups_operations), #endif -#ifdef CONFIG_CHECKPOINT_RESTORE +#if defined(CONFIG_CHECKPOINT_RESTORE) && defined(CONFIG_POSIX_TIMERS) REG("timers", S_IRUGO, proc_timers_operations), #endif REG("timerslack_ns", S_IRUGO|S_IWUGO, proc_pid_set_timerslack_ns_operations), @@ -3059,17 +3052,15 @@ static int proc_pid_instantiate(struct inode *dir, { struct inode *inode; - inode = proc_pid_make_inode(dir->i_sb, task); + inode = proc_pid_make_inode(dir->i_sb, task, S_IFDIR | S_IRUGO | S_IXUGO); if (!inode) goto out; - inode->i_mode = S_IFDIR|S_IRUGO|S_IXUGO; inode->i_op = &proc_tgid_base_inode_operations; inode->i_fop = &proc_tgid_base_operations; inode->i_flags|=S_IMMUTABLE; - set_nlink(inode, 2 + pid_entry_count_dirs(tgid_base_stuff, - ARRAY_SIZE(tgid_base_stuff))); + set_nlink(inode, nlink_tgid); d_set_d_op(dentry, &pid_dentry_operations); @@ -3181,7 +3172,9 @@ int proc_pid_readdir(struct file *file, struct dir_context *ctx) iter.tgid += 1, iter = next_tgid(ns, iter)) { char name[PROC_NUMBUF]; int len; - if (!has_pid_permissions(ns, iter.task, 2)) + + cond_resched(); + if (!has_pid_permissions(ns, iter.task, HIDEPID_INVISIBLE)) continue; len = snprintf(name, sizeof(name), "%d", iter.tgid); @@ -3352,17 +3345,15 @@ static int proc_task_instantiate(struct inode *dir, struct dentry *dentry, struct task_struct *task, const void *ptr) { struct inode *inode; - inode = proc_pid_make_inode(dir->i_sb, task); + inode = proc_pid_make_inode(dir->i_sb, task, S_IFDIR | S_IRUGO | S_IXUGO); if (!inode) goto out; - inode->i_mode = S_IFDIR|S_IRUGO|S_IXUGO; inode->i_op = &proc_tid_base_inode_operations; inode->i_fop = &proc_tid_base_operations; inode->i_flags|=S_IMMUTABLE; - set_nlink(inode, 2 + pid_entry_count_dirs(tid_base_stuff, - ARRAY_SIZE(tid_base_stuff))); + set_nlink(inode, nlink_tid); d_set_d_op(dentry, &pid_dentry_operations); @@ -3526,9 +3517,10 @@ static int proc_task_readdir(struct file *file, struct dir_context *ctx) return 0; } -static int proc_task_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat) +static int proc_task_getattr(const struct path *path, struct kstat *stat, + u32 request_mask, unsigned int query_flags) { - struct inode *inode = d_inode(dentry); + struct inode *inode = d_inode(path->dentry); struct task_struct *p = get_proc_task(inode); generic_fillattr(inode, stat); @@ -3552,3 +3544,9 @@ static const struct file_operations proc_task_operations = { .iterate_shared = proc_task_readdir, .llseek = generic_file_llseek, }; + +void __init set_proc_pid_nlink(void) +{ + nlink_tid = pid_entry_nlink(tid_base_stuff, ARRAY_SIZE(tid_base_stuff)); + nlink_tgid = pid_entry_nlink(tgid_base_stuff, ARRAY_SIZE(tgid_base_stuff)); +} |