diff options
author | Stephen Rothwell <sfr@canb.auug.org.au> | 2016-12-09 15:25:01 +1100 |
---|---|---|
committer | Stephen Rothwell <sfr@canb.auug.org.au> | 2016-12-09 15:25:01 +1100 |
commit | bbf1977af00aa496490120ac57e2de52410815b9 (patch) | |
tree | 9cb0377499cc0c7161b72cacf91e8857ce7466aa | |
parent | 1d26bd4be36fb4611d10901a7ec9b3d2c63c711f (diff) | |
parent | fee1df54b64871f8c097a53fcb02145af48c0b48 (diff) |
Merge remote-tracking branch 'userns/for-next'
-rw-r--r-- | arch/alpha/kernel/ptrace.c | 2 | ||||
-rw-r--r-- | arch/blackfin/kernel/ptrace.c | 4 | ||||
-rw-r--r-- | arch/cris/arch-v32/kernel/ptrace.c | 2 | ||||
-rw-r--r-- | arch/ia64/kernel/ptrace.c | 2 | ||||
-rw-r--r-- | arch/mips/kernel/ptrace32.c | 4 | ||||
-rw-r--r-- | arch/powerpc/kernel/ptrace32.c | 4 | ||||
-rw-r--r-- | fs/exec.c | 21 | ||||
-rw-r--r-- | fs/notify/inotify/inotify.h | 17 | ||||
-rw-r--r-- | fs/notify/inotify/inotify_fsnotify.c | 6 | ||||
-rw-r--r-- | fs/notify/inotify/inotify_user.c | 34 | ||||
-rw-r--r-- | include/linux/capability.h | 2 | ||||
-rw-r--r-- | include/linux/fsnotify_backend.h | 3 | ||||
-rw-r--r-- | include/linux/mm.h | 2 | ||||
-rw-r--r-- | include/linux/mm_types.h | 1 | ||||
-rw-r--r-- | include/linux/ptrace.h | 4 | ||||
-rw-r--r-- | include/linux/sched.h | 5 | ||||
-rw-r--r-- | include/linux/user_namespace.h | 4 | ||||
-rw-r--r-- | kernel/capability.c | 36 | ||||
-rw-r--r-- | kernel/fork.c | 9 | ||||
-rw-r--r-- | kernel/ptrace.c | 70 | ||||
-rw-r--r-- | kernel/ucount.c | 6 | ||||
-rw-r--r-- | mm/init-mm.c | 2 | ||||
-rw-r--r-- | mm/memory.c | 2 | ||||
-rw-r--r-- | mm/nommu.c | 2 | ||||
-rw-r--r-- | security/integrity/evm/evm_crypto.c | 12 |
25 files changed, 186 insertions, 70 deletions
diff --git a/arch/alpha/kernel/ptrace.c b/arch/alpha/kernel/ptrace.c index 940dfb406591..04abdec7f496 100644 --- a/arch/alpha/kernel/ptrace.c +++ b/arch/alpha/kernel/ptrace.c @@ -283,7 +283,7 @@ long arch_ptrace(struct task_struct *child, long request, /* When I and D space are separate, these will need to be fixed. */ case PTRACE_PEEKTEXT: /* read word at location addr. */ case PTRACE_PEEKDATA: - copied = access_process_vm(child, addr, &tmp, sizeof(tmp), + copied = ptrace_access_vm(child, addr, &tmp, sizeof(tmp), FOLL_FORCE); ret = -EIO; if (copied != sizeof(tmp)) diff --git a/arch/blackfin/kernel/ptrace.c b/arch/blackfin/kernel/ptrace.c index 8d79286ee4e8..360d99645163 100644 --- a/arch/blackfin/kernel/ptrace.c +++ b/arch/blackfin/kernel/ptrace.c @@ -270,7 +270,7 @@ long arch_ptrace(struct task_struct *child, long request, switch (bfin_mem_access_type(addr, to_copy)) { case BFIN_MEM_ACCESS_CORE: case BFIN_MEM_ACCESS_CORE_ONLY: - copied = access_process_vm(child, addr, &tmp, + copied = ptrace_access_vm(child, addr, &tmp, to_copy, FOLL_FORCE); if (copied) break; @@ -323,7 +323,7 @@ long arch_ptrace(struct task_struct *child, long request, switch (bfin_mem_access_type(addr, to_copy)) { case BFIN_MEM_ACCESS_CORE: case BFIN_MEM_ACCESS_CORE_ONLY: - copied = access_process_vm(child, addr, &data, + copied = ptrace_access_vm(child, addr, &data, to_copy, FOLL_FORCE | FOLL_WRITE); break; diff --git a/arch/cris/arch-v32/kernel/ptrace.c b/arch/cris/arch-v32/kernel/ptrace.c index f0df654ac6fc..fe1f9cf7b391 100644 --- a/arch/cris/arch-v32/kernel/ptrace.c +++ b/arch/cris/arch-v32/kernel/ptrace.c @@ -147,7 +147,7 @@ long arch_ptrace(struct task_struct *child, long request, /* The trampoline page is globally mapped, no page table to traverse.*/ tmp = *(unsigned long*)addr; } else { - copied = access_process_vm(child, addr, &tmp, sizeof(tmp), FOLL_FORCE); + copied = ptrace_access_vm(child, addr, &tmp, sizeof(tmp), FOLL_FORCE); if (copied != sizeof(tmp)) break; diff --git a/arch/ia64/kernel/ptrace.c b/arch/ia64/kernel/ptrace.c index 31aa8c0f68e1..36f660da8124 100644 --- a/arch/ia64/kernel/ptrace.c +++ b/arch/ia64/kernel/ptrace.c @@ -1159,7 +1159,7 @@ arch_ptrace (struct task_struct *child, long request, case PTRACE_PEEKTEXT: case PTRACE_PEEKDATA: /* read word at location addr */ - if (access_process_vm(child, addr, &data, sizeof(data), + if (ptrace_access_vm(child, addr, &data, sizeof(data), FOLL_FORCE) != sizeof(data)) return -EIO; diff --git a/arch/mips/kernel/ptrace32.c b/arch/mips/kernel/ptrace32.c index 7e71a4e0281b..5fcbdcd7abd0 100644 --- a/arch/mips/kernel/ptrace32.c +++ b/arch/mips/kernel/ptrace32.c @@ -69,7 +69,7 @@ long compat_arch_ptrace(struct task_struct *child, compat_long_t request, if (get_user(addrOthers, (u32 __user * __user *) (unsigned long) addr) != 0) break; - copied = access_process_vm(child, (u64)addrOthers, &tmp, + copied = ptrace_access_vm(child, (u64)addrOthers, &tmp, sizeof(tmp), FOLL_FORCE); if (copied != sizeof(tmp)) break; @@ -178,7 +178,7 @@ long compat_arch_ptrace(struct task_struct *child, compat_long_t request, if (get_user(addrOthers, (u32 __user * __user *) (unsigned long) addr) != 0) break; ret = 0; - if (access_process_vm(child, (u64)addrOthers, &data, + if (ptrace_access_vm(child, (u64)addrOthers, &data, sizeof(data), FOLL_FORCE | FOLL_WRITE) == sizeof(data)) break; diff --git a/arch/powerpc/kernel/ptrace32.c b/arch/powerpc/kernel/ptrace32.c index 010b7b310237..1e887f3a61a6 100644 --- a/arch/powerpc/kernel/ptrace32.c +++ b/arch/powerpc/kernel/ptrace32.c @@ -73,7 +73,7 @@ long compat_arch_ptrace(struct task_struct *child, compat_long_t request, if (get_user(addrOthers, (u32 __user * __user *)addr) != 0) break; - copied = access_process_vm(child, (u64)addrOthers, &tmp, + copied = ptrace_access_vm(child, (u64)addrOthers, &tmp, sizeof(tmp), FOLL_FORCE); if (copied != sizeof(tmp)) break; @@ -178,7 +178,7 @@ long compat_arch_ptrace(struct task_struct *child, compat_long_t request, if (get_user(addrOthers, (u32 __user * __user *)addr) != 0) break; ret = 0; - if (access_process_vm(child, (u64)addrOthers, &tmp, + if (ptrace_access_vm(child, (u64)addrOthers, &tmp, sizeof(tmp), FOLL_FORCE | FOLL_WRITE) == sizeof(tmp)) break; diff --git a/fs/exec.c b/fs/exec.c index 923c57d96899..88b5e1efdbd6 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -1277,8 +1277,22 @@ EXPORT_SYMBOL(flush_old_exec); void would_dump(struct linux_binprm *bprm, struct file *file) { - if (inode_permission(file_inode(file), MAY_READ) < 0) + struct inode *inode = file_inode(file); + if (inode_permission(inode, MAY_READ) < 0) { + struct user_namespace *old, *user_ns; bprm->interp_flags |= BINPRM_FLAGS_ENFORCE_NONDUMP; + + /* Ensure mm->user_ns contains the executable */ + user_ns = old = bprm->mm->user_ns; + while ((user_ns != &init_user_ns) && + !privileged_wrt_inode_uidgid(user_ns, inode)) + user_ns = user_ns->parent; + + if (old != user_ns) { + bprm->mm->user_ns = get_user_ns(user_ns); + put_user_ns(old); + } + } } EXPORT_SYMBOL(would_dump); @@ -1308,7 +1322,6 @@ void setup_new_exec(struct linux_binprm * bprm) !gid_eq(bprm->cred->gid, current_egid())) { current->pdeath_signal = 0; } else { - would_dump(bprm, bprm->file); if (bprm->interp_flags & BINPRM_FLAGS_ENFORCE_NONDUMP) set_dumpable(current->mm, suid_dumpable); } @@ -1408,7 +1421,7 @@ static void check_unsafe_exec(struct linux_binprm *bprm) unsigned n_fs; if (p->ptrace) { - if (p->ptrace & PT_PTRACE_CAP) + if (ptracer_capable(p, current_user_ns())) bprm->unsafe |= LSM_UNSAFE_PTRACE_CAP; else bprm->unsafe |= LSM_UNSAFE_PTRACE; @@ -1743,6 +1756,8 @@ static int do_execveat_common(int fd, struct filename *filename, if (retval < 0) goto out; + would_dump(bprm, bprm->file); + retval = exec_binprm(bprm); if (retval < 0) goto out; diff --git a/fs/notify/inotify/inotify.h b/fs/notify/inotify/inotify.h index ed855ef6f077..4e9bea095e0b 100644 --- a/fs/notify/inotify/inotify.h +++ b/fs/notify/inotify/inotify.h @@ -30,3 +30,20 @@ extern int inotify_handle_event(struct fsnotify_group *group, const unsigned char *file_name, u32 cookie); extern const struct fsnotify_ops inotify_fsnotify_ops; + +#ifdef CONFIG_INOTIFY_USER +static inline void dec_inotify_instances(struct ucounts *ucounts) +{ + dec_ucount(ucounts, UCOUNT_INOTIFY_INSTANCES); +} + +static inline struct ucounts *inc_inotify_watches(struct ucounts *ucounts) +{ + return inc_ucount(ucounts->ns, ucounts->uid, UCOUNT_INOTIFY_WATCHES); +} + +static inline void dec_inotify_watches(struct ucounts *ucounts) +{ + dec_ucount(ucounts, UCOUNT_INOTIFY_WATCHES); +} +#endif diff --git a/fs/notify/inotify/inotify_fsnotify.c b/fs/notify/inotify/inotify_fsnotify.c index 2cd900c2c737..1e6b3cfc2cfd 100644 --- a/fs/notify/inotify/inotify_fsnotify.c +++ b/fs/notify/inotify/inotify_fsnotify.c @@ -165,10 +165,8 @@ static void inotify_free_group_priv(struct fsnotify_group *group) /* ideally the idr is empty and we won't hit the BUG in the callback */ idr_for_each(&group->inotify_data.idr, idr_callback, group); idr_destroy(&group->inotify_data.idr); - if (group->inotify_data.user) { - atomic_dec(&group->inotify_data.user->inotify_devs); - free_uid(group->inotify_data.user); - } + if (group->inotify_data.ucounts) + dec_inotify_instances(group->inotify_data.ucounts); } static void inotify_free_event(struct fsnotify_event *fsn_event) diff --git a/fs/notify/inotify/inotify_user.c b/fs/notify/inotify/inotify_user.c index 69d1ea3d292a..1cf41c623be1 100644 --- a/fs/notify/inotify/inotify_user.c +++ b/fs/notify/inotify/inotify_user.c @@ -44,10 +44,8 @@ #include <asm/ioctls.h> -/* these are configurable via /proc/sys/fs/inotify/ */ -static int inotify_max_user_instances __read_mostly; +/* configurable via /proc/sys/fs/inotify/ */ static int inotify_max_queued_events __read_mostly; -static int inotify_max_user_watches __read_mostly; static struct kmem_cache *inotify_inode_mark_cachep __read_mostly; @@ -60,7 +58,7 @@ static int zero; struct ctl_table inotify_table[] = { { .procname = "max_user_instances", - .data = &inotify_max_user_instances, + .data = &init_user_ns.ucount_max[UCOUNT_INOTIFY_INSTANCES], .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_minmax, @@ -68,7 +66,7 @@ struct ctl_table inotify_table[] = { }, { .procname = "max_user_watches", - .data = &inotify_max_user_watches, + .data = &init_user_ns.ucount_max[UCOUNT_INOTIFY_WATCHES], .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_minmax, @@ -500,7 +498,7 @@ void inotify_ignored_and_remove_idr(struct fsnotify_mark *fsn_mark, /* remove this mark from the idr */ inotify_remove_from_idr(group, i_mark); - atomic_dec(&group->inotify_data.user->inotify_watches); + dec_inotify_watches(group->inotify_data.ucounts); } /* ding dong the mark is dead */ @@ -584,14 +582,17 @@ static int inotify_new_watch(struct fsnotify_group *group, tmp_i_mark->fsn_mark.mask = mask; tmp_i_mark->wd = -1; - ret = -ENOSPC; - if (atomic_read(&group->inotify_data.user->inotify_watches) >= inotify_max_user_watches) - goto out_err; - ret = inotify_add_to_idr(idr, idr_lock, tmp_i_mark); if (ret) goto out_err; + /* increment the number of watches the user has */ + if (!inc_inotify_watches(group->inotify_data.ucounts)) { + inotify_remove_from_idr(group, tmp_i_mark); + ret = -ENOSPC; + goto out_err; + } + /* we are on the idr, now get on the inode */ ret = fsnotify_add_mark_locked(&tmp_i_mark->fsn_mark, group, inode, NULL, 0); @@ -601,8 +602,6 @@ static int inotify_new_watch(struct fsnotify_group *group, goto out_err; } - /* increment the number of watches the user has */ - atomic_inc(&group->inotify_data.user->inotify_watches); /* return the watch descriptor for this new mark */ ret = tmp_i_mark->wd; @@ -653,10 +652,11 @@ static struct fsnotify_group *inotify_new_group(unsigned int max_events) spin_lock_init(&group->inotify_data.idr_lock); idr_init(&group->inotify_data.idr); - group->inotify_data.user = get_current_user(); + group->inotify_data.ucounts = inc_ucount(current_user_ns(), + current_euid(), + UCOUNT_INOTIFY_INSTANCES); - if (atomic_inc_return(&group->inotify_data.user->inotify_devs) > - inotify_max_user_instances) { + if (!group->inotify_data.ucounts) { fsnotify_destroy_group(group); return ERR_PTR(-EMFILE); } @@ -819,8 +819,8 @@ static int __init inotify_user_setup(void) inotify_inode_mark_cachep = KMEM_CACHE(inotify_inode_mark, SLAB_PANIC); inotify_max_queued_events = 16384; - inotify_max_user_instances = 128; - inotify_max_user_watches = 8192; + init_user_ns.ucount_max[UCOUNT_INOTIFY_INSTANCES] = 128; + init_user_ns.ucount_max[UCOUNT_INOTIFY_WATCHES] = 8192; return 0; } diff --git a/include/linux/capability.h b/include/linux/capability.h index dbc21c719ce6..6ffb67e10c06 100644 --- a/include/linux/capability.h +++ b/include/linux/capability.h @@ -240,8 +240,10 @@ static inline bool ns_capable_noaudit(struct user_namespace *ns, int cap) return true; } #endif /* CONFIG_MULTIUSER */ +extern bool privileged_wrt_inode_uidgid(struct user_namespace *ns, const struct inode *inode); extern bool capable_wrt_inode_uidgid(const struct inode *inode, int cap); extern bool file_ns_capable(const struct file *file, struct user_namespace *ns, int cap); +extern bool ptracer_capable(struct task_struct *tsk, struct user_namespace *ns); /* audit system wants to get cap info from files as well */ extern int get_vfs_caps_from_disk(const struct dentry *dentry, struct cpu_vfs_cap_data *cpu_caps); diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h index 79467b239fcf..251f2268baad 100644 --- a/include/linux/fsnotify_backend.h +++ b/include/linux/fsnotify_backend.h @@ -16,6 +16,7 @@ #include <linux/spinlock.h> #include <linux/types.h> #include <linux/atomic.h> +#include <linux/user_namespace.h> /* * IN_* from inotfy.h lines up EXACTLY with FS_*, this is so we can easily @@ -170,7 +171,7 @@ struct fsnotify_group { struct inotify_group_private_data { spinlock_t idr_lock; struct idr idr; - struct user_struct *user; + struct ucounts *ucounts; } inotify_data; #endif #ifdef CONFIG_FANOTIFY diff --git a/include/linux/mm.h b/include/linux/mm.h index a92c8d73aeaf..0b5b2e4df14e 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1270,6 +1270,8 @@ extern int access_process_vm(struct task_struct *tsk, unsigned long addr, void * unsigned int gup_flags); extern int access_remote_vm(struct mm_struct *mm, unsigned long addr, void *buf, int len, unsigned int gup_flags); +extern int __access_remote_vm(struct task_struct *tsk, struct mm_struct *mm, + unsigned long addr, void *buf, int len, unsigned int gup_flags); long get_user_pages_remote(struct task_struct *tsk, struct mm_struct *mm, unsigned long start, unsigned long nr_pages, diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index 4a8acedf4b7d..08d947fc4c59 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -473,6 +473,7 @@ struct mm_struct { */ struct task_struct __rcu *owner; #endif + struct user_namespace *user_ns; /* store ref to file /proc/<pid>/exe symlink points to */ struct file __rcu *exe_file; diff --git a/include/linux/ptrace.h b/include/linux/ptrace.h index 504c98a278d4..e0e539321ab9 100644 --- a/include/linux/ptrace.h +++ b/include/linux/ptrace.h @@ -8,6 +8,9 @@ #include <linux/pid_namespace.h> /* For task_active_pid_ns. */ #include <uapi/linux/ptrace.h> +extern int ptrace_access_vm(struct task_struct *tsk, unsigned long addr, + void *buf, int len, unsigned int gup_flags); + /* * Ptrace flags * @@ -19,7 +22,6 @@ #define PT_SEIZED 0x00010000 /* SEIZE used, enable new behavior */ #define PT_PTRACED 0x00000001 #define PT_DTRACE 0x00000002 /* delayed trace (used on m68k, i386) */ -#define PT_PTRACE_CAP 0x00000004 /* ptracer can follow suid-exec */ #define PT_OPT_FLAG_SHIFT 3 /* PT_TRACE_* event enable flags */ diff --git a/include/linux/sched.h b/include/linux/sched.h index 65c54b6147ba..4cc09bca46fe 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -864,10 +864,6 @@ struct user_struct { atomic_t __count; /* reference count */ atomic_t processes; /* How many processes does this user have? */ atomic_t sigpending; /* How many pending signals does this user have? */ -#ifdef CONFIG_INOTIFY_USER - atomic_t inotify_watches; /* How many inotify watches does this user have? */ - atomic_t inotify_devs; /* How many inotify devs does this user have opened? */ -#endif #ifdef CONFIG_FANOTIFY atomic_t fanotify_listeners; #endif @@ -1681,6 +1677,7 @@ struct task_struct { struct list_head cpu_timers[3]; /* process credentials */ + const struct cred __rcu *ptracer_cred; /* Tracer's credentials at attach */ const struct cred __rcu *real_cred; /* objective and real subjective task * credentials (COW) */ const struct cred __rcu *cred; /* effective (overridable) subjective task diff --git a/include/linux/user_namespace.h b/include/linux/user_namespace.h index eb209d4523f5..363e0e8082a9 100644 --- a/include/linux/user_namespace.h +++ b/include/linux/user_namespace.h @@ -32,6 +32,10 @@ enum ucount_type { UCOUNT_NET_NAMESPACES, UCOUNT_MNT_NAMESPACES, UCOUNT_CGROUP_NAMESPACES, +#ifdef CONFIG_INOTIFY_USER + UCOUNT_INOTIFY_INSTANCES, + UCOUNT_INOTIFY_WATCHES, +#endif UCOUNT_COUNTS, }; diff --git a/kernel/capability.c b/kernel/capability.c index 00411c82dac5..4984e1f552eb 100644 --- a/kernel/capability.c +++ b/kernel/capability.c @@ -457,6 +457,19 @@ bool file_ns_capable(const struct file *file, struct user_namespace *ns, EXPORT_SYMBOL(file_ns_capable); /** + * privileged_wrt_inode_uidgid - Do capabilities in the namespace work over the inode? + * @ns: The user namespace in question + * @inode: The inode in question + * + * Return true if the inode uid and gid are within the namespace. + */ +bool privileged_wrt_inode_uidgid(struct user_namespace *ns, const struct inode *inode) +{ + return kuid_has_mapping(ns, inode->i_uid) && + kgid_has_mapping(ns, inode->i_gid); +} + +/** * capable_wrt_inode_uidgid - Check nsown_capable and uid and gid mapped * @inode: The inode in question * @cap: The capability in question @@ -469,7 +482,26 @@ bool capable_wrt_inode_uidgid(const struct inode *inode, int cap) { struct user_namespace *ns = current_user_ns(); - return ns_capable(ns, cap) && kuid_has_mapping(ns, inode->i_uid) && - kgid_has_mapping(ns, inode->i_gid); + return ns_capable(ns, cap) && privileged_wrt_inode_uidgid(ns, inode); } EXPORT_SYMBOL(capable_wrt_inode_uidgid); + +/** + * ptracer_capable - Determine if the ptracer holds CAP_SYS_PTRACE in the namespace + * @tsk: The task that may be ptraced + * @ns: The user namespace to search for CAP_SYS_PTRACE in + * + * Return true if the task that is ptracing the current task had CAP_SYS_PTRACE + * in the specified user namespace. + */ +bool ptracer_capable(struct task_struct *tsk, struct user_namespace *ns) +{ + int ret = 0; /* An absent tracer adds no restrictions */ + const struct cred *cred; + rcu_read_lock(); + cred = rcu_dereference(tsk->ptracer_cred); + if (cred) + ret = security_capable_noaudit(cred, ns, CAP_SYS_PTRACE); + rcu_read_unlock(); + return (ret == 0); +} diff --git a/kernel/fork.c b/kernel/fork.c index bdeebe8d55cd..3c306e62e295 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -745,7 +745,8 @@ static void mm_init_owner(struct mm_struct *mm, struct task_struct *p) #endif } -static struct mm_struct *mm_init(struct mm_struct *mm, struct task_struct *p) +static struct mm_struct *mm_init(struct mm_struct *mm, struct task_struct *p, + struct user_namespace *user_ns) { mm->mmap = NULL; mm->mm_rb = RB_ROOT; @@ -785,6 +786,7 @@ static struct mm_struct *mm_init(struct mm_struct *mm, struct task_struct *p) if (init_new_context(p, mm)) goto fail_nocontext; + mm->user_ns = get_user_ns(user_ns); return mm; fail_nocontext: @@ -830,7 +832,7 @@ struct mm_struct *mm_alloc(void) return NULL; memset(mm, 0, sizeof(*mm)); - return mm_init(mm, current); + return mm_init(mm, current, current_user_ns()); } /* @@ -845,6 +847,7 @@ void __mmdrop(struct mm_struct *mm) destroy_context(mm); mmu_notifier_mm_destroy(mm); check_mm(mm); + put_user_ns(mm->user_ns); free_mm(mm); } EXPORT_SYMBOL_GPL(__mmdrop); @@ -1126,7 +1129,7 @@ static struct mm_struct *dup_mm(struct task_struct *tsk) memcpy(mm, oldmm, sizeof(*mm)); - if (!mm_init(mm, tsk)) + if (!mm_init(mm, tsk, mm->user_ns)) goto fail_nomem; err = dup_mmap(mm, oldmm); diff --git a/kernel/ptrace.c b/kernel/ptrace.c index e6474f7272ec..49ba7c1ade9d 100644 --- a/kernel/ptrace.c +++ b/kernel/ptrace.c @@ -27,6 +27,35 @@ #include <linux/cn_proc.h> #include <linux/compat.h> +/* + * Access another process' address space via ptrace. + * Source/target buffer must be kernel space, + * Do not walk the page table directly, use get_user_pages + */ +int ptrace_access_vm(struct task_struct *tsk, unsigned long addr, + void *buf, int len, unsigned int gup_flags) +{ + struct mm_struct *mm; + int ret; + + mm = get_task_mm(tsk); + if (!mm) + return 0; + + if (!tsk->ptrace || + (current != tsk->parent) || + ((get_dumpable(mm) != SUID_DUMP_USER) && + !ptracer_capable(tsk, mm->user_ns))) { + mmput(mm); + return 0; + } + + ret = __access_remote_vm(tsk, mm, addr, buf, len, gup_flags); + mmput(mm); + + return ret; +} + /* * ptrace a task: make the debugger its new parent and @@ -39,6 +68,9 @@ void __ptrace_link(struct task_struct *child, struct task_struct *new_parent) BUG_ON(!list_empty(&child->ptrace_entry)); list_add(&child->ptrace_entry, &new_parent->ptraced); child->parent = new_parent; + rcu_read_lock(); + child->ptracer_cred = get_cred(__task_cred(new_parent)); + rcu_read_unlock(); } /** @@ -71,12 +103,16 @@ void __ptrace_link(struct task_struct *child, struct task_struct *new_parent) */ void __ptrace_unlink(struct task_struct *child) { + const struct cred *old_cred; BUG_ON(!child->ptrace); clear_tsk_thread_flag(child, TIF_SYSCALL_TRACE); child->parent = child->real_parent; list_del_init(&child->ptrace_entry); + old_cred = child->ptracer_cred; + child->ptracer_cred = NULL; + put_cred(old_cred); spin_lock(&child->sighand->siglock); child->ptrace = 0; @@ -220,7 +256,7 @@ static int ptrace_has_cap(struct user_namespace *ns, unsigned int mode) static int __ptrace_may_access(struct task_struct *task, unsigned int mode) { const struct cred *cred = current_cred(), *tcred; - int dumpable = 0; + struct mm_struct *mm; kuid_t caller_uid; kgid_t caller_gid; @@ -271,16 +307,11 @@ static int __ptrace_may_access(struct task_struct *task, unsigned int mode) return -EPERM; ok: rcu_read_unlock(); - smp_rmb(); - if (task->mm) - dumpable = get_dumpable(task->mm); - rcu_read_lock(); - if (dumpable != SUID_DUMP_USER && - !ptrace_has_cap(__task_cred(task)->user_ns, mode)) { - rcu_read_unlock(); - return -EPERM; - } - rcu_read_unlock(); + mm = task->mm; + if (mm && + ((get_dumpable(mm) != SUID_DUMP_USER) && + !ptrace_has_cap(mm->user_ns, mode))) + return -EPERM; return security_ptrace_access_check(task, mode); } @@ -344,10 +375,6 @@ static int ptrace_attach(struct task_struct *task, long request, if (seize) flags |= PT_SEIZED; - rcu_read_lock(); - if (ns_capable(__task_cred(task)->user_ns, CAP_SYS_PTRACE)) - flags |= PT_PTRACE_CAP; - rcu_read_unlock(); task->ptrace = flags; __ptrace_link(task, current); @@ -537,7 +564,8 @@ int ptrace_readdata(struct task_struct *tsk, unsigned long src, char __user *dst int this_len, retval; this_len = (len > sizeof(buf)) ? sizeof(buf) : len; - retval = access_process_vm(tsk, src, buf, this_len, FOLL_FORCE); + retval = ptrace_access_vm(tsk, src, buf, this_len, FOLL_FORCE); + if (!retval) { if (copied) break; @@ -564,7 +592,7 @@ int ptrace_writedata(struct task_struct *tsk, char __user *src, unsigned long ds this_len = (len > sizeof(buf)) ? sizeof(buf) : len; if (copy_from_user(buf, src, this_len)) return -EFAULT; - retval = access_process_vm(tsk, dst, buf, this_len, + retval = ptrace_access_vm(tsk, dst, buf, this_len, FOLL_FORCE | FOLL_WRITE); if (!retval) { if (copied) @@ -1128,7 +1156,7 @@ int generic_ptrace_peekdata(struct task_struct *tsk, unsigned long addr, unsigned long tmp; int copied; - copied = access_process_vm(tsk, addr, &tmp, sizeof(tmp), FOLL_FORCE); + copied = ptrace_access_vm(tsk, addr, &tmp, sizeof(tmp), FOLL_FORCE); if (copied != sizeof(tmp)) return -EIO; return put_user(tmp, (unsigned long __user *)data); @@ -1139,7 +1167,7 @@ int generic_ptrace_pokedata(struct task_struct *tsk, unsigned long addr, { int copied; - copied = access_process_vm(tsk, addr, &data, sizeof(data), + copied = ptrace_access_vm(tsk, addr, &data, sizeof(data), FOLL_FORCE | FOLL_WRITE); return (copied == sizeof(data)) ? 0 : -EIO; } @@ -1157,7 +1185,7 @@ int compat_ptrace_request(struct task_struct *child, compat_long_t request, switch (request) { case PTRACE_PEEKTEXT: case PTRACE_PEEKDATA: - ret = access_process_vm(child, addr, &word, sizeof(word), + ret = ptrace_access_vm(child, addr, &word, sizeof(word), FOLL_FORCE); if (ret != sizeof(word)) ret = -EIO; @@ -1167,7 +1195,7 @@ int compat_ptrace_request(struct task_struct *child, compat_long_t request, case PTRACE_POKETEXT: case PTRACE_POKEDATA: - ret = access_process_vm(child, addr, &data, sizeof(data), + ret = ptrace_access_vm(child, addr, &data, sizeof(data), FOLL_FORCE | FOLL_WRITE); ret = (ret != sizeof(data) ? -EIO : 0); break; diff --git a/kernel/ucount.c b/kernel/ucount.c index 9d20d5dd298a..a6bcea47306b 100644 --- a/kernel/ucount.c +++ b/kernel/ucount.c @@ -57,7 +57,7 @@ static struct ctl_table_root set_root = { static int zero = 0; static int int_max = INT_MAX; -#define UCOUNT_ENTRY(name) \ +#define UCOUNT_ENTRY(name) \ { \ .procname = name, \ .maxlen = sizeof(int), \ @@ -74,6 +74,10 @@ static struct ctl_table user_table[] = { UCOUNT_ENTRY("max_net_namespaces"), UCOUNT_ENTRY("max_mnt_namespaces"), UCOUNT_ENTRY("max_cgroup_namespaces"), +#ifdef CONFIG_INOTIFY_USER_ + UCOUNT_ENTRY("max_inotify_instances"), + UCOUNT_ENTRY("max_inotify_watches"), +#endif { } }; #endif /* CONFIG_SYSCTL */ diff --git a/mm/init-mm.c b/mm/init-mm.c index a56a851908d2..975e49f00f34 100644 --- a/mm/init-mm.c +++ b/mm/init-mm.c @@ -6,6 +6,7 @@ #include <linux/cpumask.h> #include <linux/atomic.h> +#include <linux/user_namespace.h> #include <asm/pgtable.h> #include <asm/mmu.h> @@ -21,5 +22,6 @@ struct mm_struct init_mm = { .mmap_sem = __RWSEM_INITIALIZER(init_mm.mmap_sem), .page_table_lock = __SPIN_LOCK_UNLOCKED(init_mm.page_table_lock), .mmlist = LIST_HEAD_INIT(init_mm.mmlist), + .user_ns = &init_user_ns, INIT_MM_CONTEXT(init_mm) }; diff --git a/mm/memory.c b/mm/memory.c index 3142fc0f7253..00a95cc7df0a 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -3868,7 +3868,7 @@ EXPORT_SYMBOL_GPL(generic_access_phys); * Access another process' address space as given in mm. If non-NULL, use the * given task for page fault accounting. */ -static int __access_remote_vm(struct task_struct *tsk, struct mm_struct *mm, +int __access_remote_vm(struct task_struct *tsk, struct mm_struct *mm, unsigned long addr, void *buf, int len, unsigned int gup_flags) { struct vm_area_struct *vma; diff --git a/mm/nommu.c b/mm/nommu.c index 9720e0bab029..27bc543128e5 100644 --- a/mm/nommu.c +++ b/mm/nommu.c @@ -1808,7 +1808,7 @@ void filemap_map_pages(struct fault_env *fe, } EXPORT_SYMBOL(filemap_map_pages); -static int __access_remote_vm(struct task_struct *tsk, struct mm_struct *mm, +int __access_remote_vm(struct task_struct *tsk, struct mm_struct *mm, unsigned long addr, void *buf, int len, unsigned int gup_flags) { struct vm_area_struct *vma; diff --git a/security/integrity/evm/evm_crypto.c b/security/integrity/evm/evm_crypto.c index bf663915412e..d7f282d75cc1 100644 --- a/security/integrity/evm/evm_crypto.c +++ b/security/integrity/evm/evm_crypto.c @@ -151,8 +151,16 @@ static void hmac_add_misc(struct shash_desc *desc, struct inode *inode, memset(&hmac_misc, 0, sizeof(hmac_misc)); hmac_misc.ino = inode->i_ino; hmac_misc.generation = inode->i_generation; - hmac_misc.uid = from_kuid(inode->i_sb->s_user_ns, inode->i_uid); - hmac_misc.gid = from_kgid(inode->i_sb->s_user_ns, inode->i_gid); + /* The hmac uid and gid must be encoded in the initial user + * namespace (not the filesystems user namespace) as encoding + * them in the filesystems user namespace allows an attack + * where first they are written in an unprivileged fuse mount + * of a filesystem and then the system is tricked to mount the + * filesystem for real on next boot and trust it because + * everything is signed. + */ + hmac_misc.uid = from_kuid(&init_user_ns, inode->i_uid); + hmac_misc.gid = from_kgid(&init_user_ns, inode->i_gid); hmac_misc.mode = inode->i_mode; crypto_shash_update(desc, (const u8 *)&hmac_misc, sizeof(hmac_misc)); if (evm_hmac_attrs & EVM_ATTR_FSUUID) |