summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--include/linux/io_uring_types.h5
-rw-r--r--include/uapi/linux/io_uring.h4
-rw-r--r--io_uring/Makefile1
-rw-r--r--io_uring/cancel.c5
-rw-r--r--io_uring/cancel.h4
-rw-r--r--io_uring/futex.c386
-rw-r--r--io_uring/futex.h36
-rw-r--r--io_uring/io_uring.c7
-rw-r--r--io_uring/opdef.c34
-rw-r--r--kernel/futex/futex.h20
-rw-r--r--kernel/futex/requeue.c3
-rw-r--r--kernel/futex/syscalls.c18
-rw-r--r--kernel/futex/waitwake.c49
13 files changed, 545 insertions, 27 deletions
diff --git a/include/linux/io_uring_types.h b/include/linux/io_uring_types.h
index e4e67899b134..d3009d56af0b 100644
--- a/include/linux/io_uring_types.h
+++ b/include/linux/io_uring_types.h
@@ -321,6 +321,11 @@ struct io_ring_ctx {
struct hlist_head waitid_list;
+#ifdef CONFIG_FUTEX
+ struct hlist_head futex_list;
+ struct io_alloc_cache futex_cache;
+#endif
+
const struct cred *sq_creds; /* cred used for __io_sq_thread() */
struct io_sq_data *sq_data; /* if using sq thread polling */
diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h
index 0f7f6acd0e5a..f1c16f817742 100644
--- a/include/uapi/linux/io_uring.h
+++ b/include/uapi/linux/io_uring.h
@@ -70,6 +70,7 @@ struct io_uring_sqe {
__u32 msg_ring_flags;
__u32 uring_cmd_flags;
__u32 waitid_flags;
+ __u32 futex_flags;
};
__u64 user_data; /* data to be passed back at completion time */
/* pack this to avoid bogus arm OABI complaints */
@@ -249,6 +250,9 @@ enum io_uring_op {
IORING_OP_SENDMSG_ZC,
IORING_OP_READ_MULTISHOT,
IORING_OP_WAITID,
+ IORING_OP_FUTEX_WAIT,
+ IORING_OP_FUTEX_WAKE,
+ IORING_OP_FUTEX_WAITV,
/* this goes last, obviously */
IORING_OP_LAST,
diff --git a/io_uring/Makefile b/io_uring/Makefile
index 7bd64e442567..e5be47e4fc3b 100644
--- a/io_uring/Makefile
+++ b/io_uring/Makefile
@@ -10,3 +10,4 @@ obj-$(CONFIG_IO_URING) += io_uring.o xattr.o nop.o fs.o splice.o \
cancel.o kbuf.o rsrc.o rw.o opdef.o \
notif.o waitid.o
obj-$(CONFIG_IO_WQ) += io-wq.o
+obj-$(CONFIG_FUTEX) += futex.o
diff --git a/io_uring/cancel.c b/io_uring/cancel.c
index eb77a51c5a79..3c19cccb1aec 100644
--- a/io_uring/cancel.c
+++ b/io_uring/cancel.c
@@ -16,6 +16,7 @@
#include "poll.h"
#include "timeout.h"
#include "waitid.h"
+#include "futex.h"
#include "cancel.h"
struct io_cancel {
@@ -124,6 +125,10 @@ int io_try_cancel(struct io_uring_task *tctx, struct io_cancel_data *cd,
if (ret != -ENOENT)
return ret;
+ ret = io_futex_cancel(ctx, cd, issue_flags);
+ if (ret != -ENOENT)
+ return ret;
+
spin_lock(&ctx->completion_lock);
if (!(cd->flags & IORING_ASYNC_CANCEL_FD))
ret = io_timeout_cancel(ctx, cd);
diff --git a/io_uring/cancel.h b/io_uring/cancel.h
index fc98622e6166..c0a8e7c520b6 100644
--- a/io_uring/cancel.h
+++ b/io_uring/cancel.h
@@ -1,4 +1,6 @@
// SPDX-License-Identifier: GPL-2.0
+#ifndef IORING_CANCEL_H
+#define IORING_CANCEL_H
#include <linux/io_uring_types.h>
@@ -22,3 +24,5 @@ void init_hash_table(struct io_hash_table *table, unsigned size);
int io_sync_cancel(struct io_ring_ctx *ctx, void __user *arg);
bool io_cancel_req_match(struct io_kiocb *req, struct io_cancel_data *cd);
+
+#endif
diff --git a/io_uring/futex.c b/io_uring/futex.c
new file mode 100644
index 000000000000..3c3575303c3d
--- /dev/null
+++ b/io_uring/futex.c
@@ -0,0 +1,386 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <linux/fs.h>
+#include <linux/file.h>
+#include <linux/io_uring.h>
+
+#include <uapi/linux/io_uring.h>
+
+#include "../kernel/futex/futex.h"
+#include "io_uring.h"
+#include "rsrc.h"
+#include "futex.h"
+
+struct io_futex {
+ struct file *file;
+ union {
+ u32 __user *uaddr;
+ struct futex_waitv __user *uwaitv;
+ };
+ unsigned long futex_val;
+ unsigned long futex_mask;
+ unsigned long futexv_owned;
+ u32 futex_flags;
+ unsigned int futex_nr;
+ bool futexv_unqueued;
+};
+
+struct io_futex_data {
+ union {
+ struct futex_q q;
+ struct io_cache_entry cache;
+ };
+ struct io_kiocb *req;
+};
+
+void io_futex_cache_init(struct io_ring_ctx *ctx)
+{
+ io_alloc_cache_init(&ctx->futex_cache, IO_NODE_ALLOC_CACHE_MAX,
+ sizeof(struct io_futex_data));
+}
+
+static void io_futex_cache_entry_free(struct io_cache_entry *entry)
+{
+ kfree(container_of(entry, struct io_futex_data, cache));
+}
+
+void io_futex_cache_free(struct io_ring_ctx *ctx)
+{
+ io_alloc_cache_free(&ctx->futex_cache, io_futex_cache_entry_free);
+}
+
+static void __io_futex_complete(struct io_kiocb *req, struct io_tw_state *ts)
+{
+ req->async_data = NULL;
+ hlist_del_init(&req->hash_node);
+ io_req_task_complete(req, ts);
+}
+
+static void io_futex_complete(struct io_kiocb *req, struct io_tw_state *ts)
+{
+ struct io_futex_data *ifd = req->async_data;
+ struct io_ring_ctx *ctx = req->ctx;
+
+ io_tw_lock(ctx, ts);
+ if (!io_alloc_cache_put(&ctx->futex_cache, &ifd->cache))
+ kfree(ifd);
+ __io_futex_complete(req, ts);
+}
+
+static void io_futexv_complete(struct io_kiocb *req, struct io_tw_state *ts)
+{
+ struct io_futex *iof = io_kiocb_to_cmd(req, struct io_futex);
+ struct futex_vector *futexv = req->async_data;
+
+ io_tw_lock(req->ctx, ts);
+
+ if (!iof->futexv_unqueued) {
+ int res;
+
+ res = futex_unqueue_multiple(futexv, iof->futex_nr);
+ if (res != -1)
+ io_req_set_res(req, res, 0);
+ }
+
+ kfree(req->async_data);
+ req->flags &= ~REQ_F_ASYNC_DATA;
+ __io_futex_complete(req, ts);
+}
+
+static bool io_futexv_claim(struct io_futex *iof)
+{
+ if (test_bit(0, &iof->futexv_owned) ||
+ test_and_set_bit_lock(0, &iof->futexv_owned))
+ return false;
+ return true;
+}
+
+static bool __io_futex_cancel(struct io_ring_ctx *ctx, struct io_kiocb *req)
+{
+ /* futex wake already done or in progress */
+ if (req->opcode == IORING_OP_FUTEX_WAIT) {
+ struct io_futex_data *ifd = req->async_data;
+
+ if (!futex_unqueue(&ifd->q))
+ return false;
+ req->io_task_work.func = io_futex_complete;
+ } else {
+ struct io_futex *iof = io_kiocb_to_cmd(req, struct io_futex);
+
+ if (!io_futexv_claim(iof))
+ return false;
+ req->io_task_work.func = io_futexv_complete;
+ }
+
+ hlist_del_init(&req->hash_node);
+ io_req_set_res(req, -ECANCELED, 0);
+ io_req_task_work_add(req);
+ return true;
+}
+
+int io_futex_cancel(struct io_ring_ctx *ctx, struct io_cancel_data *cd,
+ unsigned int issue_flags)
+{
+ struct hlist_node *tmp;
+ struct io_kiocb *req;
+ int nr = 0;
+
+ if (cd->flags & (IORING_ASYNC_CANCEL_FD|IORING_ASYNC_CANCEL_FD_FIXED))
+ return -ENOENT;
+
+ io_ring_submit_lock(ctx, issue_flags);
+ hlist_for_each_entry_safe(req, tmp, &ctx->futex_list, hash_node) {
+ if (req->cqe.user_data != cd->data &&
+ !(cd->flags & IORING_ASYNC_CANCEL_ANY))
+ continue;
+ if (__io_futex_cancel(ctx, req))
+ nr++;
+ if (!(cd->flags & IORING_ASYNC_CANCEL_ALL))
+ break;
+ }
+ io_ring_submit_unlock(ctx, issue_flags);
+
+ if (nr)
+ return nr;
+
+ return -ENOENT;
+}
+
+bool io_futex_remove_all(struct io_ring_ctx *ctx, struct task_struct *task,
+ bool cancel_all)
+{
+ struct hlist_node *tmp;
+ struct io_kiocb *req;
+ bool found = false;
+
+ lockdep_assert_held(&ctx->uring_lock);
+
+ hlist_for_each_entry_safe(req, tmp, &ctx->futex_list, hash_node) {
+ if (!io_match_task_safe(req, task, cancel_all))
+ continue;
+ __io_futex_cancel(ctx, req);
+ found = true;
+ }
+
+ return found;
+}
+
+int io_futex_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
+{
+ struct io_futex *iof = io_kiocb_to_cmd(req, struct io_futex);
+ u32 flags;
+
+ if (unlikely(sqe->len || sqe->futex_flags || sqe->buf_index ||
+ sqe->file_index))
+ return -EINVAL;
+
+ iof->uaddr = u64_to_user_ptr(READ_ONCE(sqe->addr));
+ iof->futex_val = READ_ONCE(sqe->addr2);
+ iof->futex_mask = READ_ONCE(sqe->addr3);
+ flags = READ_ONCE(sqe->fd);
+
+ if (flags & ~FUTEX2_VALID_MASK)
+ return -EINVAL;
+
+ iof->futex_flags = futex2_to_flags(flags);
+ if (!futex_flags_valid(iof->futex_flags))
+ return -EINVAL;
+
+ if (!futex_validate_input(iof->futex_flags, iof->futex_val) ||
+ !futex_validate_input(iof->futex_flags, iof->futex_mask))
+ return -EINVAL;
+
+ return 0;
+}
+
+static void io_futex_wakev_fn(struct wake_q_head *wake_q, struct futex_q *q)
+{
+ struct io_kiocb *req = q->wake_data;
+ struct io_futex *iof = io_kiocb_to_cmd(req, struct io_futex);
+
+ if (!io_futexv_claim(iof))
+ return;
+ if (unlikely(!__futex_wake_mark(q)))
+ return;
+
+ io_req_set_res(req, 0, 0);
+ req->io_task_work.func = io_futexv_complete;
+ io_req_task_work_add(req);
+}
+
+int io_futexv_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
+{
+ struct io_futex *iof = io_kiocb_to_cmd(req, struct io_futex);
+ struct futex_vector *futexv;
+ int ret;
+
+ /* No flags or mask supported for waitv */
+ if (unlikely(sqe->fd || sqe->buf_index || sqe->file_index ||
+ sqe->addr2 || sqe->futex_flags || sqe->addr3))
+ return -EINVAL;
+
+ iof->uaddr = u64_to_user_ptr(READ_ONCE(sqe->addr));
+ iof->futex_nr = READ_ONCE(sqe->len);
+ if (!iof->futex_nr || iof->futex_nr > FUTEX_WAITV_MAX)
+ return -EINVAL;
+
+ futexv = kcalloc(iof->futex_nr, sizeof(*futexv), GFP_KERNEL);
+ if (!futexv)
+ return -ENOMEM;
+
+ ret = futex_parse_waitv(futexv, iof->uwaitv, iof->futex_nr,
+ io_futex_wakev_fn, req);
+ if (ret) {
+ kfree(futexv);
+ return ret;
+ }
+
+ iof->futexv_owned = 0;
+ iof->futexv_unqueued = 0;
+ req->flags |= REQ_F_ASYNC_DATA;
+ req->async_data = futexv;
+ return 0;
+}
+
+static void io_futex_wake_fn(struct wake_q_head *wake_q, struct futex_q *q)
+{
+ struct io_futex_data *ifd = container_of(q, struct io_futex_data, q);
+ struct io_kiocb *req = ifd->req;
+
+ if (unlikely(!__futex_wake_mark(q)))
+ return;
+
+ io_req_set_res(req, 0, 0);
+ req->io_task_work.func = io_futex_complete;
+ io_req_task_work_add(req);
+}
+
+static struct io_futex_data *io_alloc_ifd(struct io_ring_ctx *ctx)
+{
+ struct io_cache_entry *entry;
+
+ entry = io_alloc_cache_get(&ctx->futex_cache);
+ if (entry)
+ return container_of(entry, struct io_futex_data, cache);
+
+ return kmalloc(sizeof(struct io_futex_data), GFP_NOWAIT);
+}
+
+int io_futexv_wait(struct io_kiocb *req, unsigned int issue_flags)
+{
+ struct io_futex *iof = io_kiocb_to_cmd(req, struct io_futex);
+ struct futex_vector *futexv = req->async_data;
+ struct io_ring_ctx *ctx = req->ctx;
+ int ret, woken = -1;
+
+ io_ring_submit_lock(ctx, issue_flags);
+
+ ret = futex_wait_multiple_setup(futexv, iof->futex_nr, &woken);
+
+ /*
+ * Error case, ret is < 0. Mark the request as failed.
+ */
+ if (unlikely(ret < 0)) {
+ io_ring_submit_unlock(ctx, issue_flags);
+ req_set_fail(req);
+ io_req_set_res(req, ret, 0);
+ kfree(futexv);
+ req->async_data = NULL;
+ req->flags &= ~REQ_F_ASYNC_DATA;
+ return IOU_OK;
+ }
+
+ /*
+ * 0 return means that we successfully setup the waiters, and that
+ * nobody triggered a wakeup while we were doing so. If the wakeup
+ * happened post setup, the task_work will be run post this issue and
+ * under the submission lock. 1 means We got woken while setting up,
+ * let that side do the completion. Note that
+ * futex_wait_multiple_setup() will have unqueued all the futexes in
+ * this case. Mark us as having done that already, since this is
+ * different from normal wakeup.
+ */
+ if (!ret) {
+ /*
+ * If futex_wait_multiple_setup() returns 0 for a
+ * successful setup, then the task state will not be
+ * runnable. This is fine for the sync syscall, as
+ * it'll be blocking unless we already got one of the
+ * futexes woken, but it obviously won't work for an
+ * async invocation. Mark us runnable again.
+ */
+ __set_current_state(TASK_RUNNING);
+ hlist_add_head(&req->hash_node, &ctx->futex_list);
+ } else {
+ iof->futexv_unqueued = 1;
+ if (woken != -1)
+ io_req_set_res(req, woken, 0);
+ }
+
+ io_ring_submit_unlock(ctx, issue_flags);
+ return IOU_ISSUE_SKIP_COMPLETE;
+}
+
+int io_futex_wait(struct io_kiocb *req, unsigned int issue_flags)
+{
+ struct io_futex *iof = io_kiocb_to_cmd(req, struct io_futex);
+ struct io_ring_ctx *ctx = req->ctx;
+ struct io_futex_data *ifd = NULL;
+ struct futex_hash_bucket *hb;
+ int ret;
+
+ if (!iof->futex_mask) {
+ ret = -EINVAL;
+ goto done;
+ }
+
+ io_ring_submit_lock(ctx, issue_flags);
+ ifd = io_alloc_ifd(ctx);
+ if (!ifd) {
+ ret = -ENOMEM;
+ goto done_unlock;
+ }
+
+ req->async_data = ifd;
+ ifd->q = futex_q_init;
+ ifd->q.bitset = iof->futex_mask;
+ ifd->q.wake = io_futex_wake_fn;
+ ifd->req = req;
+
+ ret = futex_wait_setup(iof->uaddr, iof->futex_val, iof->futex_flags,
+ &ifd->q, &hb);
+ if (!ret) {
+ hlist_add_head(&req->hash_node, &ctx->futex_list);
+ io_ring_submit_unlock(ctx, issue_flags);
+
+ futex_queue(&ifd->q, hb);
+ return IOU_ISSUE_SKIP_COMPLETE;
+ }
+
+done_unlock:
+ io_ring_submit_unlock(ctx, issue_flags);
+done:
+ if (ret < 0)
+ req_set_fail(req);
+ io_req_set_res(req, ret, 0);
+ kfree(ifd);
+ return IOU_OK;
+}
+
+int io_futex_wake(struct io_kiocb *req, unsigned int issue_flags)
+{
+ struct io_futex *iof = io_kiocb_to_cmd(req, struct io_futex);
+ int ret;
+
+ /*
+ * Strict flags - ensure that waking 0 futexes yields a 0 result.
+ * See commit 43adf8449510 ("futex: FLAGS_STRICT") for details.
+ */
+ ret = futex_wake(iof->uaddr, FLAGS_STRICT | iof->futex_flags,
+ iof->futex_val, iof->futex_mask);
+ if (ret < 0)
+ req_set_fail(req);
+ io_req_set_res(req, ret, 0);
+ return IOU_OK;
+}
diff --git a/io_uring/futex.h b/io_uring/futex.h
new file mode 100644
index 000000000000..0847e9e8a127
--- /dev/null
+++ b/io_uring/futex.h
@@ -0,0 +1,36 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include "cancel.h"
+
+int io_futex_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe);
+int io_futexv_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe);
+int io_futex_wait(struct io_kiocb *req, unsigned int issue_flags);
+int io_futexv_wait(struct io_kiocb *req, unsigned int issue_flags);
+int io_futex_wake(struct io_kiocb *req, unsigned int issue_flags);
+
+#if defined(CONFIG_FUTEX)
+int io_futex_cancel(struct io_ring_ctx *ctx, struct io_cancel_data *cd,
+ unsigned int issue_flags);
+bool io_futex_remove_all(struct io_ring_ctx *ctx, struct task_struct *task,
+ bool cancel_all);
+void io_futex_cache_init(struct io_ring_ctx *ctx);
+void io_futex_cache_free(struct io_ring_ctx *ctx);
+#else
+static inline int io_futex_cancel(struct io_ring_ctx *ctx,
+ struct io_cancel_data *cd,
+ unsigned int issue_flags)
+{
+ return 0;
+}
+static inline bool io_futex_remove_all(struct io_ring_ctx *ctx,
+ struct task_struct *task, bool cancel_all)
+{
+ return false;
+}
+static inline void io_futex_cache_init(struct io_ring_ctx *ctx)
+{
+}
+static inline void io_futex_cache_free(struct io_ring_ctx *ctx)
+{
+}
+#endif
diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c
index 36ae5ac2b070..ed254076c723 100644
--- a/io_uring/io_uring.c
+++ b/io_uring/io_uring.c
@@ -93,6 +93,7 @@
#include "net.h"
#include "notif.h"
#include "waitid.h"
+#include "futex.h"
#include "timeout.h"
#include "poll.h"
@@ -330,6 +331,7 @@ static __cold struct io_ring_ctx *io_ring_ctx_alloc(struct io_uring_params *p)
sizeof(struct async_poll));
io_alloc_cache_init(&ctx->netmsg_cache, IO_ALLOC_CACHE_MAX,
sizeof(struct io_async_msghdr));
+ io_futex_cache_init(ctx);
init_completion(&ctx->ref_comp);
xa_init_flags(&ctx->personalities, XA_FLAGS_ALLOC1);
mutex_init(&ctx->uring_lock);
@@ -349,6 +351,9 @@ static __cold struct io_ring_ctx *io_ring_ctx_alloc(struct io_uring_params *p)
ctx->submit_state.free_list.next = NULL;
INIT_WQ_LIST(&ctx->locked_free_list);
INIT_HLIST_HEAD(&ctx->waitid_list);
+#ifdef CONFIG_FUTEX
+ INIT_HLIST_HEAD(&ctx->futex_list);
+#endif
INIT_DELAYED_WORK(&ctx->fallback_work, io_fallback_req_func);
INIT_WQ_LIST(&ctx->submit_state.compl_reqs);
INIT_HLIST_HEAD(&ctx->cancelable_uring_cmd);
@@ -2914,6 +2919,7 @@ static __cold void io_ring_ctx_free(struct io_ring_ctx *ctx)
io_eventfd_unregister(ctx);
io_alloc_cache_free(&ctx->apoll_cache, io_apoll_cache_free);
io_alloc_cache_free(&ctx->netmsg_cache, io_netmsg_cache_free);
+ io_futex_cache_free(ctx);
io_destroy_buffers(ctx);
mutex_unlock(&ctx->uring_lock);
if (ctx->sq_creds)
@@ -3357,6 +3363,7 @@ static __cold bool io_uring_try_cancel_requests(struct io_ring_ctx *ctx,
mutex_lock(&ctx->uring_lock);
ret |= io_poll_remove_all(ctx, task, cancel_all);
ret |= io_waitid_remove_all(ctx, task, cancel_all);
+ ret |= io_futex_remove_all(ctx, task, cancel_all);
ret |= io_uring_try_cancel_uring_cmd(ctx, task, cancel_all);
mutex_unlock(&ctx->uring_lock);
ret |= io_kill_timeouts(ctx, task, cancel_all);
diff --git a/io_uring/opdef.c b/io_uring/opdef.c
index aadcbf7136b0..25a3515a177c 100644
--- a/io_uring/opdef.c
+++ b/io_uring/opdef.c
@@ -34,6 +34,7 @@
#include "cancel.h"
#include "rw.h"
#include "waitid.h"
+#include "futex.h"
static int io_no_issue(struct io_kiocb *req, unsigned int issue_flags)
{
@@ -444,6 +445,30 @@ const struct io_issue_def io_issue_defs[] = {
.prep = io_waitid_prep,
.issue = io_waitid,
},
+ [IORING_OP_FUTEX_WAIT] = {
+#if defined(CONFIG_FUTEX)
+ .prep = io_futex_prep,
+ .issue = io_futex_wait,
+#else
+ .prep = io_eopnotsupp_prep,
+#endif
+ },
+ [IORING_OP_FUTEX_WAKE] = {
+#if defined(CONFIG_FUTEX)
+ .prep = io_futex_prep,
+ .issue = io_futex_wake,
+#else
+ .prep = io_eopnotsupp_prep,
+#endif
+ },
+ [IORING_OP_FUTEX_WAITV] = {
+#if defined(CONFIG_FUTEX)
+ .prep = io_futexv_prep,
+ .issue = io_futexv_wait,
+#else
+ .prep = io_eopnotsupp_prep,
+#endif
+ },
};
const struct io_cold_def io_cold_defs[] = {
@@ -670,6 +695,15 @@ const struct io_cold_def io_cold_defs[] = {
.name = "WAITID",
.async_size = sizeof(struct io_waitid_async),
},
+ [IORING_OP_FUTEX_WAIT] = {
+ .name = "FUTEX_WAIT",
+ },
+ [IORING_OP_FUTEX_WAKE] = {
+ .name = "FUTEX_WAKE",
+ },
+ [IORING_OP_FUTEX_WAITV] = {
+ .name = "FUTEX_WAITV",
+ },
};
const char *io_uring_get_opcode(u8 opcode)
diff --git a/kernel/futex/futex.h b/kernel/futex/futex.h
index a06030a1a27b..8b195d06f4e8 100644
--- a/kernel/futex/futex.h
+++ b/kernel/futex/futex.h
@@ -52,6 +52,8 @@ static inline unsigned int futex_to_flags(unsigned int op)
return flags;
}
+#define FUTEX2_VALID_MASK (FUTEX2_SIZE_MASK | FUTEX2_PRIVATE)
+
/* FUTEX2_ to FLAGS_ */
static inline unsigned int futex2_to_flags(unsigned int flags2)
{
@@ -137,11 +139,16 @@ struct futex_pi_state {
union futex_key key;
} __randomize_layout;
+struct futex_q;
+typedef void (futex_wake_fn)(struct wake_q_head *wake_q, struct futex_q *q);
+
/**
* struct futex_q - The hashed futex queue entry, one per waiting task
* @list: priority-sorted list of tasks waiting on this futex
* @task: the task waiting on the futex
* @lock_ptr: the hash bucket lock
+ * @wake: the wake handler for this queue
+ * @wake_data: data associated with the wake handler
* @key: the key the futex is hashed on
* @pi_state: optional priority inheritance state
* @rt_waiter: rt_waiter storage for use with requeue_pi
@@ -166,6 +173,8 @@ struct futex_q {
struct task_struct *task;
spinlock_t *lock_ptr;
+ futex_wake_fn *wake;
+ void *wake_data;
union futex_key key;
struct futex_pi_state *pi_state;
struct rt_mutex_waiter *rt_waiter;
@@ -212,6 +221,7 @@ extern int futex_wait_setup(u32 __user *uaddr, u32 val, unsigned int flags,
struct futex_q *q, struct futex_hash_bucket **hb);
extern void futex_wait_queue(struct futex_hash_bucket *hb, struct futex_q *q,
struct hrtimer_sleeper *timeout);
+extern bool __futex_wake_mark(struct futex_q *q);
extern void futex_wake_mark(struct wake_q_head *wake_q, struct futex_q *q);
extern int fault_in_user_writeable(u32 __user *uaddr);
@@ -351,6 +361,16 @@ struct futex_vector {
struct futex_q q;
};
+extern int futex_parse_waitv(struct futex_vector *futexv,
+ struct futex_waitv __user *uwaitv,
+ unsigned int nr_futexes, futex_wake_fn *wake,
+ void *wake_data);
+
+extern int futex_wait_multiple_setup(struct futex_vector *vs, int count,
+ int *woken);
+
+extern int futex_unqueue_multiple(struct futex_vector *v, int count);
+
extern int futex_wait_multiple(struct futex_vector *vs, unsigned int count,
struct hrtimer_sleeper *to);
diff --git a/kernel/futex/requeue.c b/kernel/futex/requeue.c
index 16a3645bd786..eb21f065816b 100644
--- a/kernel/futex/requeue.c
+++ b/kernel/futex/requeue.c
@@ -58,6 +58,7 @@ enum {
const struct futex_q futex_q_init = {
/* list gets initialized in futex_queue()*/
+ .wake = futex_wake_mark,
.key = FUTEX_KEY_INIT,
.bitset = FUTEX_BITSET_MATCH_ANY,
.requeue_state = ATOMIC_INIT(Q_REQUEUE_PI_NONE),
@@ -593,7 +594,7 @@ retry_private:
/* Plain futexes just wake or requeue and are done */
if (!requeue_pi) {
if (++task_count <= nr_wake)
- futex_wake_mark(&wake_q, this);
+ this->wake(&wake_q, this);
else
requeue_futex(this, hb1, hb2, &key2);
continue;
diff --git a/kernel/futex/syscalls.c b/kernel/futex/syscalls.c
index 8200d86d30e1..4b6da9116aa6 100644
--- a/kernel/futex/syscalls.c
+++ b/kernel/futex/syscalls.c
@@ -179,19 +179,20 @@ SYSCALL_DEFINE6(futex, u32 __user *, uaddr, int, op, u32, val,
return do_futex(uaddr, op, val, tp, uaddr2, (unsigned long)utime, val3);
}
-#define FUTEX2_VALID_MASK (FUTEX2_SIZE_MASK | FUTEX2_PRIVATE)
-
/**
* futex_parse_waitv - Parse a waitv array from userspace
* @futexv: Kernel side list of waiters to be filled
* @uwaitv: Userspace list to be parsed
* @nr_futexes: Length of futexv
+ * @wake: Wake to call when futex is woken
+ * @wake_data: Data for the wake handler
*
* Return: Error code on failure, 0 on success
*/
-static int futex_parse_waitv(struct futex_vector *futexv,
- struct futex_waitv __user *uwaitv,
- unsigned int nr_futexes)
+int futex_parse_waitv(struct futex_vector *futexv,
+ struct futex_waitv __user *uwaitv,
+ unsigned int nr_futexes, futex_wake_fn *wake,
+ void *wake_data)
{
struct futex_waitv aux;
unsigned int i;
@@ -216,6 +217,8 @@ static int futex_parse_waitv(struct futex_vector *futexv,
futexv[i].w.val = aux.val;
futexv[i].w.uaddr = aux.uaddr;
futexv[i].q = futex_q_init;
+ futexv[i].q.wake = wake;
+ futexv[i].q.wake_data = wake_data;
}
return 0;
@@ -308,7 +311,8 @@ SYSCALL_DEFINE5(futex_waitv, struct futex_waitv __user *, waiters,
goto destroy_timer;
}
- ret = futex_parse_waitv(futexv, waiters, nr_futexes);
+ ret = futex_parse_waitv(futexv, waiters, nr_futexes, futex_wake_mark,
+ NULL);
if (!ret)
ret = futex_wait_multiple(futexv, nr_futexes, timeout ? &to : NULL);
@@ -423,7 +427,7 @@ SYSCALL_DEFINE4(futex_requeue,
if (!waiters)
return -EINVAL;
- ret = futex_parse_waitv(futexes, waiters, 2);
+ ret = futex_parse_waitv(futexes, waiters, 2, futex_wake_mark, NULL);
if (ret)
return ret;
diff --git a/kernel/futex/waitwake.c b/kernel/futex/waitwake.c
index 37860f794bf7..61b112897a84 100644
--- a/kernel/futex/waitwake.c
+++ b/kernel/futex/waitwake.c
@@ -106,20 +106,11 @@
* double_lock_hb() and double_unlock_hb(), respectively.
*/
-/*
- * The hash bucket lock must be held when this is called.
- * Afterwards, the futex_q must not be accessed. Callers
- * must ensure to later call wake_up_q() for the actual
- * wakeups to occur.
- */
-void futex_wake_mark(struct wake_q_head *wake_q, struct futex_q *q)
+bool __futex_wake_mark(struct futex_q *q)
{
- struct task_struct *p = q->task;
-
if (WARN(q->pi_state || q->rt_waiter, "refusing to wake PI futex\n"))
- return;
+ return false;
- get_task_struct(p);
__futex_unqueue(q);
/*
* The waiting task can free the futex_q as soon as q->lock_ptr = NULL
@@ -130,6 +121,26 @@ void futex_wake_mark(struct wake_q_head *wake_q, struct futex_q *q)
*/
smp_store_release(&q->lock_ptr, NULL);
+ return true;
+}
+
+/*
+ * The hash bucket lock must be held when this is called.
+ * Afterwards, the futex_q must not be accessed. Callers
+ * must ensure to later call wake_up_q() for the actual
+ * wakeups to occur.
+ */
+void futex_wake_mark(struct wake_q_head *wake_q, struct futex_q *q)
+{
+ struct task_struct *p = q->task;
+
+ get_task_struct(p);
+
+ if (!__futex_wake_mark(q)) {
+ put_task_struct(p);
+ return;
+ }
+
/*
* Queue the task for later wakeup for after we've released
* the hb->lock.
@@ -177,7 +188,7 @@ int futex_wake(u32 __user *uaddr, unsigned int flags, int nr_wake, u32 bitset)
if (!(this->bitset & bitset))
continue;
- futex_wake_mark(&wake_q, this);
+ this->wake(&wake_q, this);
if (++ret >= nr_wake)
break;
}
@@ -292,7 +303,7 @@ retry_private:
ret = -EINVAL;
goto out_unlock;
}
- futex_wake_mark(&wake_q, this);
+ this->wake(&wake_q, this);
if (++ret >= nr_wake)
break;
}
@@ -306,7 +317,7 @@ retry_private:
ret = -EINVAL;
goto out_unlock;
}
- futex_wake_mark(&wake_q, this);
+ this->wake(&wake_q, this);
if (++op_ret >= nr_wake2)
break;
}
@@ -361,7 +372,7 @@ void futex_wait_queue(struct futex_hash_bucket *hb, struct futex_q *q,
}
/**
- * unqueue_multiple - Remove various futexes from their hash bucket
+ * futex_unqueue_multiple - Remove various futexes from their hash bucket
* @v: The list of futexes to unqueue
* @count: Number of futexes in the list
*
@@ -371,7 +382,7 @@ void futex_wait_queue(struct futex_hash_bucket *hb, struct futex_q *q,
* - >=0 - Index of the last futex that was awoken;
* - -1 - No futex was awoken
*/
-static int unqueue_multiple(struct futex_vector *v, int count)
+int futex_unqueue_multiple(struct futex_vector *v, int count)
{
int ret = -1, i;
@@ -399,7 +410,7 @@ static int unqueue_multiple(struct futex_vector *v, int count)
* - 0 - Success
* - <0 - -EFAULT, -EWOULDBLOCK or -EINVAL
*/
-static int futex_wait_multiple_setup(struct futex_vector *vs, int count, int *woken)
+int futex_wait_multiple_setup(struct futex_vector *vs, int count, int *woken)
{
struct futex_hash_bucket *hb;
bool retry = false;
@@ -461,7 +472,7 @@ retry:
* was woken, we don't return error and return this index to
* userspace
*/
- *woken = unqueue_multiple(vs, i);
+ *woken = futex_unqueue_multiple(vs, i);
if (*woken >= 0)
return 1;
@@ -546,7 +557,7 @@ int futex_wait_multiple(struct futex_vector *vs, unsigned int count,
__set_current_state(TASK_RUNNING);
- ret = unqueue_multiple(vs, count);
+ ret = futex_unqueue_multiple(vs, count);
if (ret >= 0)
return ret;