summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorStephen Rothwell <sfr@canb.auug.org.au>2016-12-09 15:25:01 +1100
committerStephen Rothwell <sfr@canb.auug.org.au>2016-12-09 15:25:01 +1100
commitbbf1977af00aa496490120ac57e2de52410815b9 (patch)
tree9cb0377499cc0c7161b72cacf91e8857ce7466aa
parent1d26bd4be36fb4611d10901a7ec9b3d2c63c711f (diff)
parentfee1df54b64871f8c097a53fcb02145af48c0b48 (diff)
Merge remote-tracking branch 'userns/for-next'
-rw-r--r--arch/alpha/kernel/ptrace.c2
-rw-r--r--arch/blackfin/kernel/ptrace.c4
-rw-r--r--arch/cris/arch-v32/kernel/ptrace.c2
-rw-r--r--arch/ia64/kernel/ptrace.c2
-rw-r--r--arch/mips/kernel/ptrace32.c4
-rw-r--r--arch/powerpc/kernel/ptrace32.c4
-rw-r--r--fs/exec.c21
-rw-r--r--fs/notify/inotify/inotify.h17
-rw-r--r--fs/notify/inotify/inotify_fsnotify.c6
-rw-r--r--fs/notify/inotify/inotify_user.c34
-rw-r--r--include/linux/capability.h2
-rw-r--r--include/linux/fsnotify_backend.h3
-rw-r--r--include/linux/mm.h2
-rw-r--r--include/linux/mm_types.h1
-rw-r--r--include/linux/ptrace.h4
-rw-r--r--include/linux/sched.h5
-rw-r--r--include/linux/user_namespace.h4
-rw-r--r--kernel/capability.c36
-rw-r--r--kernel/fork.c9
-rw-r--r--kernel/ptrace.c70
-rw-r--r--kernel/ucount.c6
-rw-r--r--mm/init-mm.c2
-rw-r--r--mm/memory.c2
-rw-r--r--mm/nommu.c2
-rw-r--r--security/integrity/evm/evm_crypto.c12
25 files changed, 186 insertions, 70 deletions
diff --git a/arch/alpha/kernel/ptrace.c b/arch/alpha/kernel/ptrace.c
index 940dfb406591..04abdec7f496 100644
--- a/arch/alpha/kernel/ptrace.c
+++ b/arch/alpha/kernel/ptrace.c
@@ -283,7 +283,7 @@ long arch_ptrace(struct task_struct *child, long request,
/* When I and D space are separate, these will need to be fixed. */
case PTRACE_PEEKTEXT: /* read word at location addr. */
case PTRACE_PEEKDATA:
- copied = access_process_vm(child, addr, &tmp, sizeof(tmp),
+ copied = ptrace_access_vm(child, addr, &tmp, sizeof(tmp),
FOLL_FORCE);
ret = -EIO;
if (copied != sizeof(tmp))
diff --git a/arch/blackfin/kernel/ptrace.c b/arch/blackfin/kernel/ptrace.c
index 8d79286ee4e8..360d99645163 100644
--- a/arch/blackfin/kernel/ptrace.c
+++ b/arch/blackfin/kernel/ptrace.c
@@ -270,7 +270,7 @@ long arch_ptrace(struct task_struct *child, long request,
switch (bfin_mem_access_type(addr, to_copy)) {
case BFIN_MEM_ACCESS_CORE:
case BFIN_MEM_ACCESS_CORE_ONLY:
- copied = access_process_vm(child, addr, &tmp,
+ copied = ptrace_access_vm(child, addr, &tmp,
to_copy, FOLL_FORCE);
if (copied)
break;
@@ -323,7 +323,7 @@ long arch_ptrace(struct task_struct *child, long request,
switch (bfin_mem_access_type(addr, to_copy)) {
case BFIN_MEM_ACCESS_CORE:
case BFIN_MEM_ACCESS_CORE_ONLY:
- copied = access_process_vm(child, addr, &data,
+ copied = ptrace_access_vm(child, addr, &data,
to_copy,
FOLL_FORCE | FOLL_WRITE);
break;
diff --git a/arch/cris/arch-v32/kernel/ptrace.c b/arch/cris/arch-v32/kernel/ptrace.c
index f0df654ac6fc..fe1f9cf7b391 100644
--- a/arch/cris/arch-v32/kernel/ptrace.c
+++ b/arch/cris/arch-v32/kernel/ptrace.c
@@ -147,7 +147,7 @@ long arch_ptrace(struct task_struct *child, long request,
/* The trampoline page is globally mapped, no page table to traverse.*/
tmp = *(unsigned long*)addr;
} else {
- copied = access_process_vm(child, addr, &tmp, sizeof(tmp), FOLL_FORCE);
+ copied = ptrace_access_vm(child, addr, &tmp, sizeof(tmp), FOLL_FORCE);
if (copied != sizeof(tmp))
break;
diff --git a/arch/ia64/kernel/ptrace.c b/arch/ia64/kernel/ptrace.c
index 31aa8c0f68e1..36f660da8124 100644
--- a/arch/ia64/kernel/ptrace.c
+++ b/arch/ia64/kernel/ptrace.c
@@ -1159,7 +1159,7 @@ arch_ptrace (struct task_struct *child, long request,
case PTRACE_PEEKTEXT:
case PTRACE_PEEKDATA:
/* read word at location addr */
- if (access_process_vm(child, addr, &data, sizeof(data),
+ if (ptrace_access_vm(child, addr, &data, sizeof(data),
FOLL_FORCE)
!= sizeof(data))
return -EIO;
diff --git a/arch/mips/kernel/ptrace32.c b/arch/mips/kernel/ptrace32.c
index 7e71a4e0281b..5fcbdcd7abd0 100644
--- a/arch/mips/kernel/ptrace32.c
+++ b/arch/mips/kernel/ptrace32.c
@@ -69,7 +69,7 @@ long compat_arch_ptrace(struct task_struct *child, compat_long_t request,
if (get_user(addrOthers, (u32 __user * __user *) (unsigned long) addr) != 0)
break;
- copied = access_process_vm(child, (u64)addrOthers, &tmp,
+ copied = ptrace_access_vm(child, (u64)addrOthers, &tmp,
sizeof(tmp), FOLL_FORCE);
if (copied != sizeof(tmp))
break;
@@ -178,7 +178,7 @@ long compat_arch_ptrace(struct task_struct *child, compat_long_t request,
if (get_user(addrOthers, (u32 __user * __user *) (unsigned long) addr) != 0)
break;
ret = 0;
- if (access_process_vm(child, (u64)addrOthers, &data,
+ if (ptrace_access_vm(child, (u64)addrOthers, &data,
sizeof(data),
FOLL_FORCE | FOLL_WRITE) == sizeof(data))
break;
diff --git a/arch/powerpc/kernel/ptrace32.c b/arch/powerpc/kernel/ptrace32.c
index 010b7b310237..1e887f3a61a6 100644
--- a/arch/powerpc/kernel/ptrace32.c
+++ b/arch/powerpc/kernel/ptrace32.c
@@ -73,7 +73,7 @@ long compat_arch_ptrace(struct task_struct *child, compat_long_t request,
if (get_user(addrOthers, (u32 __user * __user *)addr) != 0)
break;
- copied = access_process_vm(child, (u64)addrOthers, &tmp,
+ copied = ptrace_access_vm(child, (u64)addrOthers, &tmp,
sizeof(tmp), FOLL_FORCE);
if (copied != sizeof(tmp))
break;
@@ -178,7 +178,7 @@ long compat_arch_ptrace(struct task_struct *child, compat_long_t request,
if (get_user(addrOthers, (u32 __user * __user *)addr) != 0)
break;
ret = 0;
- if (access_process_vm(child, (u64)addrOthers, &tmp,
+ if (ptrace_access_vm(child, (u64)addrOthers, &tmp,
sizeof(tmp),
FOLL_FORCE | FOLL_WRITE) == sizeof(tmp))
break;
diff --git a/fs/exec.c b/fs/exec.c
index 923c57d96899..88b5e1efdbd6 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -1277,8 +1277,22 @@ EXPORT_SYMBOL(flush_old_exec);
void would_dump(struct linux_binprm *bprm, struct file *file)
{
- if (inode_permission(file_inode(file), MAY_READ) < 0)
+ struct inode *inode = file_inode(file);
+ if (inode_permission(inode, MAY_READ) < 0) {
+ struct user_namespace *old, *user_ns;
bprm->interp_flags |= BINPRM_FLAGS_ENFORCE_NONDUMP;
+
+ /* Ensure mm->user_ns contains the executable */
+ user_ns = old = bprm->mm->user_ns;
+ while ((user_ns != &init_user_ns) &&
+ !privileged_wrt_inode_uidgid(user_ns, inode))
+ user_ns = user_ns->parent;
+
+ if (old != user_ns) {
+ bprm->mm->user_ns = get_user_ns(user_ns);
+ put_user_ns(old);
+ }
+ }
}
EXPORT_SYMBOL(would_dump);
@@ -1308,7 +1322,6 @@ void setup_new_exec(struct linux_binprm * bprm)
!gid_eq(bprm->cred->gid, current_egid())) {
current->pdeath_signal = 0;
} else {
- would_dump(bprm, bprm->file);
if (bprm->interp_flags & BINPRM_FLAGS_ENFORCE_NONDUMP)
set_dumpable(current->mm, suid_dumpable);
}
@@ -1408,7 +1421,7 @@ static void check_unsafe_exec(struct linux_binprm *bprm)
unsigned n_fs;
if (p->ptrace) {
- if (p->ptrace & PT_PTRACE_CAP)
+ if (ptracer_capable(p, current_user_ns()))
bprm->unsafe |= LSM_UNSAFE_PTRACE_CAP;
else
bprm->unsafe |= LSM_UNSAFE_PTRACE;
@@ -1743,6 +1756,8 @@ static int do_execveat_common(int fd, struct filename *filename,
if (retval < 0)
goto out;
+ would_dump(bprm, bprm->file);
+
retval = exec_binprm(bprm);
if (retval < 0)
goto out;
diff --git a/fs/notify/inotify/inotify.h b/fs/notify/inotify/inotify.h
index ed855ef6f077..4e9bea095e0b 100644
--- a/fs/notify/inotify/inotify.h
+++ b/fs/notify/inotify/inotify.h
@@ -30,3 +30,20 @@ extern int inotify_handle_event(struct fsnotify_group *group,
const unsigned char *file_name, u32 cookie);
extern const struct fsnotify_ops inotify_fsnotify_ops;
+
+#ifdef CONFIG_INOTIFY_USER
+static inline void dec_inotify_instances(struct ucounts *ucounts)
+{
+ dec_ucount(ucounts, UCOUNT_INOTIFY_INSTANCES);
+}
+
+static inline struct ucounts *inc_inotify_watches(struct ucounts *ucounts)
+{
+ return inc_ucount(ucounts->ns, ucounts->uid, UCOUNT_INOTIFY_WATCHES);
+}
+
+static inline void dec_inotify_watches(struct ucounts *ucounts)
+{
+ dec_ucount(ucounts, UCOUNT_INOTIFY_WATCHES);
+}
+#endif
diff --git a/fs/notify/inotify/inotify_fsnotify.c b/fs/notify/inotify/inotify_fsnotify.c
index 2cd900c2c737..1e6b3cfc2cfd 100644
--- a/fs/notify/inotify/inotify_fsnotify.c
+++ b/fs/notify/inotify/inotify_fsnotify.c
@@ -165,10 +165,8 @@ static void inotify_free_group_priv(struct fsnotify_group *group)
/* ideally the idr is empty and we won't hit the BUG in the callback */
idr_for_each(&group->inotify_data.idr, idr_callback, group);
idr_destroy(&group->inotify_data.idr);
- if (group->inotify_data.user) {
- atomic_dec(&group->inotify_data.user->inotify_devs);
- free_uid(group->inotify_data.user);
- }
+ if (group->inotify_data.ucounts)
+ dec_inotify_instances(group->inotify_data.ucounts);
}
static void inotify_free_event(struct fsnotify_event *fsn_event)
diff --git a/fs/notify/inotify/inotify_user.c b/fs/notify/inotify/inotify_user.c
index 69d1ea3d292a..1cf41c623be1 100644
--- a/fs/notify/inotify/inotify_user.c
+++ b/fs/notify/inotify/inotify_user.c
@@ -44,10 +44,8 @@
#include <asm/ioctls.h>
-/* these are configurable via /proc/sys/fs/inotify/ */
-static int inotify_max_user_instances __read_mostly;
+/* configurable via /proc/sys/fs/inotify/ */
static int inotify_max_queued_events __read_mostly;
-static int inotify_max_user_watches __read_mostly;
static struct kmem_cache *inotify_inode_mark_cachep __read_mostly;
@@ -60,7 +58,7 @@ static int zero;
struct ctl_table inotify_table[] = {
{
.procname = "max_user_instances",
- .data = &inotify_max_user_instances,
+ .data = &init_user_ns.ucount_max[UCOUNT_INOTIFY_INSTANCES],
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec_minmax,
@@ -68,7 +66,7 @@ struct ctl_table inotify_table[] = {
},
{
.procname = "max_user_watches",
- .data = &inotify_max_user_watches,
+ .data = &init_user_ns.ucount_max[UCOUNT_INOTIFY_WATCHES],
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec_minmax,
@@ -500,7 +498,7 @@ void inotify_ignored_and_remove_idr(struct fsnotify_mark *fsn_mark,
/* remove this mark from the idr */
inotify_remove_from_idr(group, i_mark);
- atomic_dec(&group->inotify_data.user->inotify_watches);
+ dec_inotify_watches(group->inotify_data.ucounts);
}
/* ding dong the mark is dead */
@@ -584,14 +582,17 @@ static int inotify_new_watch(struct fsnotify_group *group,
tmp_i_mark->fsn_mark.mask = mask;
tmp_i_mark->wd = -1;
- ret = -ENOSPC;
- if (atomic_read(&group->inotify_data.user->inotify_watches) >= inotify_max_user_watches)
- goto out_err;
-
ret = inotify_add_to_idr(idr, idr_lock, tmp_i_mark);
if (ret)
goto out_err;
+ /* increment the number of watches the user has */
+ if (!inc_inotify_watches(group->inotify_data.ucounts)) {
+ inotify_remove_from_idr(group, tmp_i_mark);
+ ret = -ENOSPC;
+ goto out_err;
+ }
+
/* we are on the idr, now get on the inode */
ret = fsnotify_add_mark_locked(&tmp_i_mark->fsn_mark, group, inode,
NULL, 0);
@@ -601,8 +602,6 @@ static int inotify_new_watch(struct fsnotify_group *group,
goto out_err;
}
- /* increment the number of watches the user has */
- atomic_inc(&group->inotify_data.user->inotify_watches);
/* return the watch descriptor for this new mark */
ret = tmp_i_mark->wd;
@@ -653,10 +652,11 @@ static struct fsnotify_group *inotify_new_group(unsigned int max_events)
spin_lock_init(&group->inotify_data.idr_lock);
idr_init(&group->inotify_data.idr);
- group->inotify_data.user = get_current_user();
+ group->inotify_data.ucounts = inc_ucount(current_user_ns(),
+ current_euid(),
+ UCOUNT_INOTIFY_INSTANCES);
- if (atomic_inc_return(&group->inotify_data.user->inotify_devs) >
- inotify_max_user_instances) {
+ if (!group->inotify_data.ucounts) {
fsnotify_destroy_group(group);
return ERR_PTR(-EMFILE);
}
@@ -819,8 +819,8 @@ static int __init inotify_user_setup(void)
inotify_inode_mark_cachep = KMEM_CACHE(inotify_inode_mark, SLAB_PANIC);
inotify_max_queued_events = 16384;
- inotify_max_user_instances = 128;
- inotify_max_user_watches = 8192;
+ init_user_ns.ucount_max[UCOUNT_INOTIFY_INSTANCES] = 128;
+ init_user_ns.ucount_max[UCOUNT_INOTIFY_WATCHES] = 8192;
return 0;
}
diff --git a/include/linux/capability.h b/include/linux/capability.h
index dbc21c719ce6..6ffb67e10c06 100644
--- a/include/linux/capability.h
+++ b/include/linux/capability.h
@@ -240,8 +240,10 @@ static inline bool ns_capable_noaudit(struct user_namespace *ns, int cap)
return true;
}
#endif /* CONFIG_MULTIUSER */
+extern bool privileged_wrt_inode_uidgid(struct user_namespace *ns, const struct inode *inode);
extern bool capable_wrt_inode_uidgid(const struct inode *inode, int cap);
extern bool file_ns_capable(const struct file *file, struct user_namespace *ns, int cap);
+extern bool ptracer_capable(struct task_struct *tsk, struct user_namespace *ns);
/* audit system wants to get cap info from files as well */
extern int get_vfs_caps_from_disk(const struct dentry *dentry, struct cpu_vfs_cap_data *cpu_caps);
diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h
index 79467b239fcf..251f2268baad 100644
--- a/include/linux/fsnotify_backend.h
+++ b/include/linux/fsnotify_backend.h
@@ -16,6 +16,7 @@
#include <linux/spinlock.h>
#include <linux/types.h>
#include <linux/atomic.h>
+#include <linux/user_namespace.h>
/*
* IN_* from inotfy.h lines up EXACTLY with FS_*, this is so we can easily
@@ -170,7 +171,7 @@ struct fsnotify_group {
struct inotify_group_private_data {
spinlock_t idr_lock;
struct idr idr;
- struct user_struct *user;
+ struct ucounts *ucounts;
} inotify_data;
#endif
#ifdef CONFIG_FANOTIFY
diff --git a/include/linux/mm.h b/include/linux/mm.h
index a92c8d73aeaf..0b5b2e4df14e 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1270,6 +1270,8 @@ extern int access_process_vm(struct task_struct *tsk, unsigned long addr, void *
unsigned int gup_flags);
extern int access_remote_vm(struct mm_struct *mm, unsigned long addr,
void *buf, int len, unsigned int gup_flags);
+extern int __access_remote_vm(struct task_struct *tsk, struct mm_struct *mm,
+ unsigned long addr, void *buf, int len, unsigned int gup_flags);
long get_user_pages_remote(struct task_struct *tsk, struct mm_struct *mm,
unsigned long start, unsigned long nr_pages,
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 4a8acedf4b7d..08d947fc4c59 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -473,6 +473,7 @@ struct mm_struct {
*/
struct task_struct __rcu *owner;
#endif
+ struct user_namespace *user_ns;
/* store ref to file /proc/<pid>/exe symlink points to */
struct file __rcu *exe_file;
diff --git a/include/linux/ptrace.h b/include/linux/ptrace.h
index 504c98a278d4..e0e539321ab9 100644
--- a/include/linux/ptrace.h
+++ b/include/linux/ptrace.h
@@ -8,6 +8,9 @@
#include <linux/pid_namespace.h> /* For task_active_pid_ns. */
#include <uapi/linux/ptrace.h>
+extern int ptrace_access_vm(struct task_struct *tsk, unsigned long addr,
+ void *buf, int len, unsigned int gup_flags);
+
/*
* Ptrace flags
*
@@ -19,7 +22,6 @@
#define PT_SEIZED 0x00010000 /* SEIZE used, enable new behavior */
#define PT_PTRACED 0x00000001
#define PT_DTRACE 0x00000002 /* delayed trace (used on m68k, i386) */
-#define PT_PTRACE_CAP 0x00000004 /* ptracer can follow suid-exec */
#define PT_OPT_FLAG_SHIFT 3
/* PT_TRACE_* event enable flags */
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 65c54b6147ba..4cc09bca46fe 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -864,10 +864,6 @@ struct user_struct {
atomic_t __count; /* reference count */
atomic_t processes; /* How many processes does this user have? */
atomic_t sigpending; /* How many pending signals does this user have? */
-#ifdef CONFIG_INOTIFY_USER
- atomic_t inotify_watches; /* How many inotify watches does this user have? */
- atomic_t inotify_devs; /* How many inotify devs does this user have opened? */
-#endif
#ifdef CONFIG_FANOTIFY
atomic_t fanotify_listeners;
#endif
@@ -1681,6 +1677,7 @@ struct task_struct {
struct list_head cpu_timers[3];
/* process credentials */
+ const struct cred __rcu *ptracer_cred; /* Tracer's credentials at attach */
const struct cred __rcu *real_cred; /* objective and real subjective task
* credentials (COW) */
const struct cred __rcu *cred; /* effective (overridable) subjective task
diff --git a/include/linux/user_namespace.h b/include/linux/user_namespace.h
index eb209d4523f5..363e0e8082a9 100644
--- a/include/linux/user_namespace.h
+++ b/include/linux/user_namespace.h
@@ -32,6 +32,10 @@ enum ucount_type {
UCOUNT_NET_NAMESPACES,
UCOUNT_MNT_NAMESPACES,
UCOUNT_CGROUP_NAMESPACES,
+#ifdef CONFIG_INOTIFY_USER
+ UCOUNT_INOTIFY_INSTANCES,
+ UCOUNT_INOTIFY_WATCHES,
+#endif
UCOUNT_COUNTS,
};
diff --git a/kernel/capability.c b/kernel/capability.c
index 00411c82dac5..4984e1f552eb 100644
--- a/kernel/capability.c
+++ b/kernel/capability.c
@@ -457,6 +457,19 @@ bool file_ns_capable(const struct file *file, struct user_namespace *ns,
EXPORT_SYMBOL(file_ns_capable);
/**
+ * privileged_wrt_inode_uidgid - Do capabilities in the namespace work over the inode?
+ * @ns: The user namespace in question
+ * @inode: The inode in question
+ *
+ * Return true if the inode uid and gid are within the namespace.
+ */
+bool privileged_wrt_inode_uidgid(struct user_namespace *ns, const struct inode *inode)
+{
+ return kuid_has_mapping(ns, inode->i_uid) &&
+ kgid_has_mapping(ns, inode->i_gid);
+}
+
+/**
* capable_wrt_inode_uidgid - Check nsown_capable and uid and gid mapped
* @inode: The inode in question
* @cap: The capability in question
@@ -469,7 +482,26 @@ bool capable_wrt_inode_uidgid(const struct inode *inode, int cap)
{
struct user_namespace *ns = current_user_ns();
- return ns_capable(ns, cap) && kuid_has_mapping(ns, inode->i_uid) &&
- kgid_has_mapping(ns, inode->i_gid);
+ return ns_capable(ns, cap) && privileged_wrt_inode_uidgid(ns, inode);
}
EXPORT_SYMBOL(capable_wrt_inode_uidgid);
+
+/**
+ * ptracer_capable - Determine if the ptracer holds CAP_SYS_PTRACE in the namespace
+ * @tsk: The task that may be ptraced
+ * @ns: The user namespace to search for CAP_SYS_PTRACE in
+ *
+ * Return true if the task that is ptracing the current task had CAP_SYS_PTRACE
+ * in the specified user namespace.
+ */
+bool ptracer_capable(struct task_struct *tsk, struct user_namespace *ns)
+{
+ int ret = 0; /* An absent tracer adds no restrictions */
+ const struct cred *cred;
+ rcu_read_lock();
+ cred = rcu_dereference(tsk->ptracer_cred);
+ if (cred)
+ ret = security_capable_noaudit(cred, ns, CAP_SYS_PTRACE);
+ rcu_read_unlock();
+ return (ret == 0);
+}
diff --git a/kernel/fork.c b/kernel/fork.c
index bdeebe8d55cd..3c306e62e295 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -745,7 +745,8 @@ static void mm_init_owner(struct mm_struct *mm, struct task_struct *p)
#endif
}
-static struct mm_struct *mm_init(struct mm_struct *mm, struct task_struct *p)
+static struct mm_struct *mm_init(struct mm_struct *mm, struct task_struct *p,
+ struct user_namespace *user_ns)
{
mm->mmap = NULL;
mm->mm_rb = RB_ROOT;
@@ -785,6 +786,7 @@ static struct mm_struct *mm_init(struct mm_struct *mm, struct task_struct *p)
if (init_new_context(p, mm))
goto fail_nocontext;
+ mm->user_ns = get_user_ns(user_ns);
return mm;
fail_nocontext:
@@ -830,7 +832,7 @@ struct mm_struct *mm_alloc(void)
return NULL;
memset(mm, 0, sizeof(*mm));
- return mm_init(mm, current);
+ return mm_init(mm, current, current_user_ns());
}
/*
@@ -845,6 +847,7 @@ void __mmdrop(struct mm_struct *mm)
destroy_context(mm);
mmu_notifier_mm_destroy(mm);
check_mm(mm);
+ put_user_ns(mm->user_ns);
free_mm(mm);
}
EXPORT_SYMBOL_GPL(__mmdrop);
@@ -1126,7 +1129,7 @@ static struct mm_struct *dup_mm(struct task_struct *tsk)
memcpy(mm, oldmm, sizeof(*mm));
- if (!mm_init(mm, tsk))
+ if (!mm_init(mm, tsk, mm->user_ns))
goto fail_nomem;
err = dup_mmap(mm, oldmm);
diff --git a/kernel/ptrace.c b/kernel/ptrace.c
index e6474f7272ec..49ba7c1ade9d 100644
--- a/kernel/ptrace.c
+++ b/kernel/ptrace.c
@@ -27,6 +27,35 @@
#include <linux/cn_proc.h>
#include <linux/compat.h>
+/*
+ * Access another process' address space via ptrace.
+ * Source/target buffer must be kernel space,
+ * Do not walk the page table directly, use get_user_pages
+ */
+int ptrace_access_vm(struct task_struct *tsk, unsigned long addr,
+ void *buf, int len, unsigned int gup_flags)
+{
+ struct mm_struct *mm;
+ int ret;
+
+ mm = get_task_mm(tsk);
+ if (!mm)
+ return 0;
+
+ if (!tsk->ptrace ||
+ (current != tsk->parent) ||
+ ((get_dumpable(mm) != SUID_DUMP_USER) &&
+ !ptracer_capable(tsk, mm->user_ns))) {
+ mmput(mm);
+ return 0;
+ }
+
+ ret = __access_remote_vm(tsk, mm, addr, buf, len, gup_flags);
+ mmput(mm);
+
+ return ret;
+}
+
/*
* ptrace a task: make the debugger its new parent and
@@ -39,6 +68,9 @@ void __ptrace_link(struct task_struct *child, struct task_struct *new_parent)
BUG_ON(!list_empty(&child->ptrace_entry));
list_add(&child->ptrace_entry, &new_parent->ptraced);
child->parent = new_parent;
+ rcu_read_lock();
+ child->ptracer_cred = get_cred(__task_cred(new_parent));
+ rcu_read_unlock();
}
/**
@@ -71,12 +103,16 @@ void __ptrace_link(struct task_struct *child, struct task_struct *new_parent)
*/
void __ptrace_unlink(struct task_struct *child)
{
+ const struct cred *old_cred;
BUG_ON(!child->ptrace);
clear_tsk_thread_flag(child, TIF_SYSCALL_TRACE);
child->parent = child->real_parent;
list_del_init(&child->ptrace_entry);
+ old_cred = child->ptracer_cred;
+ child->ptracer_cred = NULL;
+ put_cred(old_cred);
spin_lock(&child->sighand->siglock);
child->ptrace = 0;
@@ -220,7 +256,7 @@ static int ptrace_has_cap(struct user_namespace *ns, unsigned int mode)
static int __ptrace_may_access(struct task_struct *task, unsigned int mode)
{
const struct cred *cred = current_cred(), *tcred;
- int dumpable = 0;
+ struct mm_struct *mm;
kuid_t caller_uid;
kgid_t caller_gid;
@@ -271,16 +307,11 @@ static int __ptrace_may_access(struct task_struct *task, unsigned int mode)
return -EPERM;
ok:
rcu_read_unlock();
- smp_rmb();
- if (task->mm)
- dumpable = get_dumpable(task->mm);
- rcu_read_lock();
- if (dumpable != SUID_DUMP_USER &&
- !ptrace_has_cap(__task_cred(task)->user_ns, mode)) {
- rcu_read_unlock();
- return -EPERM;
- }
- rcu_read_unlock();
+ mm = task->mm;
+ if (mm &&
+ ((get_dumpable(mm) != SUID_DUMP_USER) &&
+ !ptrace_has_cap(mm->user_ns, mode)))
+ return -EPERM;
return security_ptrace_access_check(task, mode);
}
@@ -344,10 +375,6 @@ static int ptrace_attach(struct task_struct *task, long request,
if (seize)
flags |= PT_SEIZED;
- rcu_read_lock();
- if (ns_capable(__task_cred(task)->user_ns, CAP_SYS_PTRACE))
- flags |= PT_PTRACE_CAP;
- rcu_read_unlock();
task->ptrace = flags;
__ptrace_link(task, current);
@@ -537,7 +564,8 @@ int ptrace_readdata(struct task_struct *tsk, unsigned long src, char __user *dst
int this_len, retval;
this_len = (len > sizeof(buf)) ? sizeof(buf) : len;
- retval = access_process_vm(tsk, src, buf, this_len, FOLL_FORCE);
+ retval = ptrace_access_vm(tsk, src, buf, this_len, FOLL_FORCE);
+
if (!retval) {
if (copied)
break;
@@ -564,7 +592,7 @@ int ptrace_writedata(struct task_struct *tsk, char __user *src, unsigned long ds
this_len = (len > sizeof(buf)) ? sizeof(buf) : len;
if (copy_from_user(buf, src, this_len))
return -EFAULT;
- retval = access_process_vm(tsk, dst, buf, this_len,
+ retval = ptrace_access_vm(tsk, dst, buf, this_len,
FOLL_FORCE | FOLL_WRITE);
if (!retval) {
if (copied)
@@ -1128,7 +1156,7 @@ int generic_ptrace_peekdata(struct task_struct *tsk, unsigned long addr,
unsigned long tmp;
int copied;
- copied = access_process_vm(tsk, addr, &tmp, sizeof(tmp), FOLL_FORCE);
+ copied = ptrace_access_vm(tsk, addr, &tmp, sizeof(tmp), FOLL_FORCE);
if (copied != sizeof(tmp))
return -EIO;
return put_user(tmp, (unsigned long __user *)data);
@@ -1139,7 +1167,7 @@ int generic_ptrace_pokedata(struct task_struct *tsk, unsigned long addr,
{
int copied;
- copied = access_process_vm(tsk, addr, &data, sizeof(data),
+ copied = ptrace_access_vm(tsk, addr, &data, sizeof(data),
FOLL_FORCE | FOLL_WRITE);
return (copied == sizeof(data)) ? 0 : -EIO;
}
@@ -1157,7 +1185,7 @@ int compat_ptrace_request(struct task_struct *child, compat_long_t request,
switch (request) {
case PTRACE_PEEKTEXT:
case PTRACE_PEEKDATA:
- ret = access_process_vm(child, addr, &word, sizeof(word),
+ ret = ptrace_access_vm(child, addr, &word, sizeof(word),
FOLL_FORCE);
if (ret != sizeof(word))
ret = -EIO;
@@ -1167,7 +1195,7 @@ int compat_ptrace_request(struct task_struct *child, compat_long_t request,
case PTRACE_POKETEXT:
case PTRACE_POKEDATA:
- ret = access_process_vm(child, addr, &data, sizeof(data),
+ ret = ptrace_access_vm(child, addr, &data, sizeof(data),
FOLL_FORCE | FOLL_WRITE);
ret = (ret != sizeof(data) ? -EIO : 0);
break;
diff --git a/kernel/ucount.c b/kernel/ucount.c
index 9d20d5dd298a..a6bcea47306b 100644
--- a/kernel/ucount.c
+++ b/kernel/ucount.c
@@ -57,7 +57,7 @@ static struct ctl_table_root set_root = {
static int zero = 0;
static int int_max = INT_MAX;
-#define UCOUNT_ENTRY(name) \
+#define UCOUNT_ENTRY(name) \
{ \
.procname = name, \
.maxlen = sizeof(int), \
@@ -74,6 +74,10 @@ static struct ctl_table user_table[] = {
UCOUNT_ENTRY("max_net_namespaces"),
UCOUNT_ENTRY("max_mnt_namespaces"),
UCOUNT_ENTRY("max_cgroup_namespaces"),
+#ifdef CONFIG_INOTIFY_USER_
+ UCOUNT_ENTRY("max_inotify_instances"),
+ UCOUNT_ENTRY("max_inotify_watches"),
+#endif
{ }
};
#endif /* CONFIG_SYSCTL */
diff --git a/mm/init-mm.c b/mm/init-mm.c
index a56a851908d2..975e49f00f34 100644
--- a/mm/init-mm.c
+++ b/mm/init-mm.c
@@ -6,6 +6,7 @@
#include <linux/cpumask.h>
#include <linux/atomic.h>
+#include <linux/user_namespace.h>
#include <asm/pgtable.h>
#include <asm/mmu.h>
@@ -21,5 +22,6 @@ struct mm_struct init_mm = {
.mmap_sem = __RWSEM_INITIALIZER(init_mm.mmap_sem),
.page_table_lock = __SPIN_LOCK_UNLOCKED(init_mm.page_table_lock),
.mmlist = LIST_HEAD_INIT(init_mm.mmlist),
+ .user_ns = &init_user_ns,
INIT_MM_CONTEXT(init_mm)
};
diff --git a/mm/memory.c b/mm/memory.c
index 3142fc0f7253..00a95cc7df0a 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -3868,7 +3868,7 @@ EXPORT_SYMBOL_GPL(generic_access_phys);
* Access another process' address space as given in mm. If non-NULL, use the
* given task for page fault accounting.
*/
-static int __access_remote_vm(struct task_struct *tsk, struct mm_struct *mm,
+int __access_remote_vm(struct task_struct *tsk, struct mm_struct *mm,
unsigned long addr, void *buf, int len, unsigned int gup_flags)
{
struct vm_area_struct *vma;
diff --git a/mm/nommu.c b/mm/nommu.c
index 9720e0bab029..27bc543128e5 100644
--- a/mm/nommu.c
+++ b/mm/nommu.c
@@ -1808,7 +1808,7 @@ void filemap_map_pages(struct fault_env *fe,
}
EXPORT_SYMBOL(filemap_map_pages);
-static int __access_remote_vm(struct task_struct *tsk, struct mm_struct *mm,
+int __access_remote_vm(struct task_struct *tsk, struct mm_struct *mm,
unsigned long addr, void *buf, int len, unsigned int gup_flags)
{
struct vm_area_struct *vma;
diff --git a/security/integrity/evm/evm_crypto.c b/security/integrity/evm/evm_crypto.c
index bf663915412e..d7f282d75cc1 100644
--- a/security/integrity/evm/evm_crypto.c
+++ b/security/integrity/evm/evm_crypto.c
@@ -151,8 +151,16 @@ static void hmac_add_misc(struct shash_desc *desc, struct inode *inode,
memset(&hmac_misc, 0, sizeof(hmac_misc));
hmac_misc.ino = inode->i_ino;
hmac_misc.generation = inode->i_generation;
- hmac_misc.uid = from_kuid(inode->i_sb->s_user_ns, inode->i_uid);
- hmac_misc.gid = from_kgid(inode->i_sb->s_user_ns, inode->i_gid);
+ /* The hmac uid and gid must be encoded in the initial user
+ * namespace (not the filesystems user namespace) as encoding
+ * them in the filesystems user namespace allows an attack
+ * where first they are written in an unprivileged fuse mount
+ * of a filesystem and then the system is tricked to mount the
+ * filesystem for real on next boot and trust it because
+ * everything is signed.
+ */
+ hmac_misc.uid = from_kuid(&init_user_ns, inode->i_uid);
+ hmac_misc.gid = from_kgid(&init_user_ns, inode->i_gid);
hmac_misc.mode = inode->i_mode;
crypto_shash_update(desc, (const u8 *)&hmac_misc, sizeof(hmac_misc));
if (evm_hmac_attrs & EVM_ATTR_FSUUID)