summaryrefslogtreecommitdiff
path: root/kernel/fork.c
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/fork.c')
-rw-r--r--kernel/fork.c164
1 files changed, 52 insertions, 112 deletions
diff --git a/kernel/fork.c b/kernel/fork.c
index fc72f09a61b2..4d32190861bd 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -261,7 +261,7 @@ static unsigned long *alloc_thread_stack_node(struct task_struct *tsk, int node)
THREAD_SIZE_ORDER);
if (likely(page)) {
- tsk->stack = page_address(page);
+ tsk->stack = kasan_reset_tag(page_address(page));
return tsk->stack;
}
return NULL;
@@ -276,13 +276,8 @@ static inline void free_thread_stack(struct task_struct *tsk)
if (vm) {
int i;
- for (i = 0; i < THREAD_SIZE / PAGE_SIZE; i++) {
- mod_memcg_page_state(vm->pages[i],
- MEMCG_KERNEL_STACK_KB,
- -(int)(PAGE_SIZE / 1024));
-
+ for (i = 0; i < THREAD_SIZE / PAGE_SIZE; i++)
memcg_kmem_uncharge_page(vm->pages[i], 0);
- }
for (i = 0; i < NR_CACHED_STACKS; i++) {
if (this_cpu_cmpxchg(cached_stacks[i],
@@ -307,6 +302,7 @@ static unsigned long *alloc_thread_stack_node(struct task_struct *tsk,
{
unsigned long *stack;
stack = kmem_cache_alloc_node(thread_stack_cache, THREADINFO_GFP, node);
+ stack = kasan_reset_tag(stack);
tsk->stack = stack;
return stack;
}
@@ -359,7 +355,13 @@ struct vm_area_struct *vm_area_dup(struct vm_area_struct *orig)
struct vm_area_struct *new = kmem_cache_alloc(vm_area_cachep, GFP_KERNEL);
if (new) {
- *new = *orig;
+ ASSERT_EXCLUSIVE_WRITER(orig->vm_flags);
+ ASSERT_EXCLUSIVE_WRITER(orig->vm_file);
+ /*
+ * orig->shared.rb may be modified concurrently, but the clone
+ * will be reinitialized.
+ */
+ *new = data_race(*orig);
INIT_LIST_HEAD(&new->anon_vma_chain);
new->vm_next = new->vm_prev = NULL;
}
@@ -376,31 +378,14 @@ static void account_kernel_stack(struct task_struct *tsk, int account)
void *stack = task_stack_page(tsk);
struct vm_struct *vm = task_stack_vm_area(tsk);
- BUILD_BUG_ON(IS_ENABLED(CONFIG_VMAP_STACK) && PAGE_SIZE % 1024 != 0);
-
- if (vm) {
- int i;
-
- BUG_ON(vm->nr_pages != THREAD_SIZE / PAGE_SIZE);
-
- for (i = 0; i < THREAD_SIZE / PAGE_SIZE; i++) {
- mod_zone_page_state(page_zone(vm->pages[i]),
- NR_KERNEL_STACK_KB,
- PAGE_SIZE / 1024 * account);
- }
- } else {
- /*
- * All stack pages are in the same zone and belong to the
- * same memcg.
- */
- struct page *first_page = virt_to_page(stack);
-
- mod_zone_page_state(page_zone(first_page), NR_KERNEL_STACK_KB,
- THREAD_SIZE / 1024 * account);
- mod_memcg_obj_state(stack, MEMCG_KERNEL_STACK_KB,
- account * (THREAD_SIZE / 1024));
- }
+ /* All stack pages are in the same node. */
+ if (vm)
+ mod_lruvec_page_state(vm->pages[0], NR_KERNEL_STACK_KB,
+ account * (THREAD_SIZE / 1024));
+ else
+ mod_lruvec_slab_state(stack, NR_KERNEL_STACK_KB,
+ account * (THREAD_SIZE / 1024));
}
static int memcg_charge_kernel_stack(struct task_struct *tsk)
@@ -409,24 +394,23 @@ static int memcg_charge_kernel_stack(struct task_struct *tsk)
struct vm_struct *vm = task_stack_vm_area(tsk);
int ret;
+ BUILD_BUG_ON(IS_ENABLED(CONFIG_VMAP_STACK) && PAGE_SIZE % 1024 != 0);
+
if (vm) {
int i;
+ BUG_ON(vm->nr_pages != THREAD_SIZE / PAGE_SIZE);
+
for (i = 0; i < THREAD_SIZE / PAGE_SIZE; i++) {
/*
* If memcg_kmem_charge_page() fails, page->mem_cgroup
- * pointer is NULL, and both memcg_kmem_uncharge_page()
- * and mod_memcg_page_state() in free_thread_stack()
- * will ignore this page. So it's safe.
+ * pointer is NULL, and memcg_kmem_uncharge_page() in
+ * free_thread_stack() will ignore this page.
*/
ret = memcg_kmem_charge_page(vm->pages[i], GFP_KERNEL,
0);
if (ret)
return ret;
-
- mod_memcg_page_state(vm->pages[i],
- MEMCG_KERNEL_STACK_KB,
- PAGE_SIZE / 1024);
}
}
#endif
@@ -473,7 +457,6 @@ void free_task(struct task_struct *tsk)
#endif
rt_mutex_debug_task_free(tsk);
ftrace_graph_exit_task(tsk);
- put_seccomp_filter(tsk);
arch_release_task_struct(tsk);
if (tsk->flags & PF_KTHREAD)
free_kthread_struct(tsk);
@@ -1474,7 +1457,7 @@ static int copy_files(unsigned long clone_flags, struct task_struct *tsk)
goto out;
}
- newf = dup_fd(oldf, &error);
+ newf = dup_fd(oldf, NR_OPEN_MAX, &error);
if (!newf)
goto out;
@@ -1787,22 +1770,18 @@ static void pidfd_show_fdinfo(struct seq_file *m, struct file *f)
*/
static __poll_t pidfd_poll(struct file *file, struct poll_table_struct *pts)
{
- struct task_struct *task;
struct pid *pid = file->private_data;
__poll_t poll_flags = 0;
poll_wait(file, &pid->wait_pidfd, pts);
- rcu_read_lock();
- task = pid_task(pid, PIDTYPE_PID);
/*
* Inform pollers only when the whole thread group exits.
* If the thread group leader exits before all other threads in the
* group, then poll(2) should block, similar to the wait(2) family.
*/
- if (!task || (task->exit_state && thread_group_empty(task)))
+ if (thread_group_exited(pid))
poll_flags = EPOLLIN | EPOLLRDNORM;
- rcu_read_unlock();
return poll_flags;
}
@@ -2035,17 +2014,11 @@ static __latent_entropy struct task_struct *copy_process(
seqcount_spinlock_init(&p->mems_allowed_seq, &p->alloc_lock);
#endif
#ifdef CONFIG_TRACE_IRQFLAGS
- p->irq_events = 0;
- p->hardirq_enable_ip = 0;
- p->hardirq_enable_event = 0;
- p->hardirq_disable_ip = _THIS_IP_;
- p->hardirq_disable_event = 0;
- p->softirqs_enabled = 1;
- p->softirq_enable_ip = _THIS_IP_;
- p->softirq_enable_event = 0;
- p->softirq_disable_ip = 0;
- p->softirq_disable_event = 0;
- p->softirq_context = 0;
+ memset(&p->irqtrace, 0, sizeof(p->irqtrace));
+ p->irqtrace.hardirq_disable_ip = _THIS_IP_;
+ p->irqtrace.softirq_enable_ip = _THIS_IP_;
+ p->softirqs_enabled = 1;
+ p->softirq_context = 0;
#endif
p->pagefault_disabled = 0;
@@ -2102,8 +2075,7 @@ static __latent_entropy struct task_struct *copy_process(
retval = copy_io(clone_flags, p);
if (retval)
goto bad_fork_cleanup_namespaces;
- retval = copy_thread_tls(clone_flags, args->stack, args->stack_size, p,
- args->tls);
+ retval = copy_thread(clone_flags, args->stack, args->stack_size, p, args->tls);
if (retval)
goto bad_fork_cleanup_io;
@@ -2302,6 +2274,7 @@ static __latent_entropy struct task_struct *copy_process(
write_unlock_irq(&tasklist_lock);
proc_fork_connector(p);
+ sched_post_fork(p);
cgroup_post_fork(p, args);
perf_event_fork(p);
@@ -2421,6 +2394,20 @@ long _do_fork(struct kernel_clone_args *args)
long nr;
/*
+ * For legacy clone() calls, CLONE_PIDFD uses the parent_tid argument
+ * to return the pidfd. Hence, CLONE_PIDFD and CLONE_PARENT_SETTID are
+ * mutually exclusive. With clone3() CLONE_PIDFD has grown a separate
+ * field in struct clone_args and it still doesn't make sense to have
+ * them both point at the same memory location. Performing this check
+ * here has the advantage that we don't need to have a separate helper
+ * to check for legacy clone().
+ */
+ if ((args->flags & CLONE_PIDFD) &&
+ (args->flags & CLONE_PARENT_SETTID) &&
+ (args->pidfd == args->parent_tid))
+ return -EINVAL;
+
+ /*
* Determine whether and which event to report to ptracer. When
* called from kernel_thread or CLONE_UNTRACED is explicitly
* requested, no event is reported; otherwise, report if the event
@@ -2477,42 +2464,6 @@ long _do_fork(struct kernel_clone_args *args)
return nr;
}
-bool legacy_clone_args_valid(const struct kernel_clone_args *kargs)
-{
- /* clone(CLONE_PIDFD) uses parent_tidptr to return a pidfd */
- if ((kargs->flags & CLONE_PIDFD) &&
- (kargs->flags & CLONE_PARENT_SETTID))
- return false;
-
- return true;
-}
-
-#ifndef CONFIG_HAVE_COPY_THREAD_TLS
-/* For compatibility with architectures that call do_fork directly rather than
- * using the syscall entry points below. */
-long do_fork(unsigned long clone_flags,
- unsigned long stack_start,
- unsigned long stack_size,
- int __user *parent_tidptr,
- int __user *child_tidptr)
-{
- struct kernel_clone_args args = {
- .flags = (lower_32_bits(clone_flags) & ~CSIGNAL),
- .pidfd = parent_tidptr,
- .child_tid = child_tidptr,
- .parent_tid = parent_tidptr,
- .exit_signal = (lower_32_bits(clone_flags) & CSIGNAL),
- .stack = stack_start,
- .stack_size = stack_size,
- };
-
- if (!legacy_clone_args_valid(&args))
- return -EINVAL;
-
- return _do_fork(&args);
-}
-#endif
-
/*
* Create a kernel thread.
*/
@@ -2591,24 +2542,12 @@ SYSCALL_DEFINE5(clone, unsigned long, clone_flags, unsigned long, newsp,
.tls = tls,
};
- if (!legacy_clone_args_valid(&args))
- return -EINVAL;
-
return _do_fork(&args);
}
#endif
#ifdef __ARCH_WANT_SYS_CLONE3
-/*
- * copy_thread implementations handle CLONE_SETTLS by reading the TLS value from
- * the registers containing the syscall arguments for clone. This doesn't work
- * with clone3 since the TLS value is passed in clone_args instead.
- */
-#ifndef CONFIG_HAVE_COPY_THREAD_TLS
-#error clone3 requires copy_thread_tls support in arch
-#endif
-
noinline static int copy_clone_args_from_user(struct kernel_clone_args *kargs,
struct clone_args __user *uargs,
size_t usize)
@@ -2905,14 +2844,15 @@ static int unshare_fs(unsigned long unshare_flags, struct fs_struct **new_fsp)
/*
* Unshare file descriptor table if it is being shared
*/
-static int unshare_fd(unsigned long unshare_flags, struct files_struct **new_fdp)
+int unshare_fd(unsigned long unshare_flags, unsigned int max_fds,
+ struct files_struct **new_fdp)
{
struct files_struct *fd = current->files;
int error = 0;
if ((unshare_flags & CLONE_FILES) &&
(fd && atomic_read(&fd->count) > 1)) {
- *new_fdp = dup_fd(fd, &error);
+ *new_fdp = dup_fd(fd, max_fds, &error);
if (!*new_fdp)
return error;
}
@@ -2923,7 +2863,7 @@ static int unshare_fd(unsigned long unshare_flags, struct files_struct **new_fdp
/*
* unshare allows a process to 'unshare' part of the process
* context which was originally shared using clone. copy_*
- * functions used by do_fork() cannot be used here directly
+ * functions used by _do_fork() cannot be used here directly
* because they modify an inactive task_struct that is being
* constructed. Here we are modifying the current, active,
* task_struct.
@@ -2972,7 +2912,7 @@ int ksys_unshare(unsigned long unshare_flags)
err = unshare_fs(unshare_flags, &new_fs);
if (err)
goto bad_unshare_out;
- err = unshare_fd(unshare_flags, &new_fd);
+ err = unshare_fd(unshare_flags, NR_OPEN_MAX, &new_fd);
if (err)
goto bad_unshare_cleanup_fs;
err = unshare_userns(unshare_flags, &new_cred);
@@ -3061,7 +3001,7 @@ int unshare_files(struct files_struct **displaced)
struct files_struct *copy = NULL;
int error;
- error = unshare_fd(CLONE_FILES, &copy);
+ error = unshare_fd(CLONE_FILES, NR_OPEN_MAX, &copy);
if (error || !copy) {
*displaced = NULL;
return error;