summaryrefslogtreecommitdiff
path: root/include
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2020-08-03 13:01:22 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2020-08-03 13:01:22 -0700
commitcdc8fcb49905c0b67e355e027cb462ee168ffaa3 (patch)
tree51d691db50761bd26e71e9f491c3dc82c2fe147c /include
parent382625d0d4325fb14a29444eb8dce8dcc2eb9b51 (diff)
parentfa15bafb71fd7a4d6018dae87cfaf890fd4ab47f (diff)
Merge tag 'for-5.9/io_uring-20200802' of git://git.kernel.dk/linux-block
Pull io_uring updates from Jens Axboe: "Lots of cleanups in here, hardening the code and/or making it easier to read and fixing bugs, but a core feature/change too adding support for real async buffered reads. With the latter in place, we just need buffered write async support and we're done relying on kthreads for the fast path. In detail: - Cleanup how memory accounting is done on ring setup/free (Bijan) - sq array offset calculation fixup (Dmitry) - Consistently handle blocking off O_DIRECT submission path (me) - Support proper async buffered reads, instead of relying on kthread offload for that. This uses the page waitqueue to drive retries from task_work, like we handle poll based retry. (me) - IO completion optimizations (me) - Fix race with accounting and ring fd install (me) - Support EPOLLEXCLUSIVE (Jiufei) - Get rid of the io_kiocb unionizing, made possible by shrinking other bits (Pavel) - Completion side cleanups (Pavel) - Cleanup REQ_F_ flags handling, and kill off many of them (Pavel) - Request environment grabbing cleanups (Pavel) - File and socket read/write cleanups (Pavel) - Improve kiocb_set_rw_flags() (Pavel) - Tons of fixes and cleanups (Pavel) - IORING_SQ_NEED_WAKEUP clear fix (Xiaoguang)" * tag 'for-5.9/io_uring-20200802' of git://git.kernel.dk/linux-block: (127 commits) io_uring: flip if handling after io_setup_async_rw fs: optimise kiocb_set_rw_flags() io_uring: don't touch 'ctx' after installing file descriptor io_uring: get rid of atomic FAA for cq_timeouts io_uring: consolidate *_check_overflow accounting io_uring: fix stalled deferred requests io_uring: fix racy overflow count reporting io_uring: deduplicate __io_complete_rw() io_uring: de-unionise io_kiocb io-wq: update hash bits io_uring: fix missing io_queue_linked_timeout() io_uring: mark ->work uninitialised after cleanup io_uring: deduplicate io_grab_files() calls io_uring: don't do opcode prep twice io_uring: clear IORING_SQ_NEED_WAKEUP after executing task works io_uring: batch put_task_struct() tasks: add put_task_struct_many() io_uring: return locked and pinned page accounting io_uring: don't miscount pinned memory io_uring: don't open-code recv kbuf managment ...
Diffstat (limited to 'include')
-rw-r--r--include/linux/blkdev.h1
-rw-r--r--include/linux/fs.h26
-rw-r--r--include/linux/pagemap.h43
-rw-r--r--include/linux/sched/task.h6
-rw-r--r--include/uapi/linux/io_uring.h4
5 files changed, 73 insertions, 7 deletions
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 9ab06ea26894..06ecb2c1492f 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -1180,6 +1180,7 @@ struct blk_plug {
struct list_head cb_list; /* md requires an unplug callback */
unsigned short rq_count;
bool multiple_queues;
+ bool nowait;
};
#define BLK_MAX_REQUEST_COUNT 16
#define BLK_PLUG_FLUSH_SIZE (128 * 1024)
diff --git a/include/linux/fs.h b/include/linux/fs.h
index cdfed8c99750..bd7ec3eaeed0 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -175,6 +175,9 @@ typedef int (dio_iodone_t)(struct kiocb *iocb, loff_t offset,
/* File does not contribute to nr_files count */
#define FMODE_NOACCOUNT ((__force fmode_t)0x20000000)
+/* File supports async buffered reads */
+#define FMODE_BUF_RASYNC ((__force fmode_t)0x40000000)
+
/*
* Flag for rw_copy_check_uvector and compat_rw_copy_check_uvector
* that indicates that they should check the contents of the iovec are
@@ -315,6 +318,8 @@ enum rw_hint {
#define IOCB_SYNC (1 << 5)
#define IOCB_WRITE (1 << 6)
#define IOCB_NOWAIT (1 << 7)
+/* iocb->ki_waitq is valid */
+#define IOCB_WAITQ (1 << 8)
#define IOCB_NOIO (1 << 9)
struct kiocb {
@@ -329,7 +334,10 @@ struct kiocb {
int ki_flags;
u16 ki_hint;
u16 ki_ioprio; /* See linux/ioprio.h */
- unsigned int ki_cookie; /* for ->iopoll */
+ union {
+ unsigned int ki_cookie; /* for ->iopoll */
+ struct wait_page_queue *ki_waitq; /* for async buffered IO */
+ };
randomized_struct_fields_end
};
@@ -3275,22 +3283,28 @@ static inline int iocb_flags(struct file *file)
static inline int kiocb_set_rw_flags(struct kiocb *ki, rwf_t flags)
{
+ int kiocb_flags = 0;
+
+ if (!flags)
+ return 0;
if (unlikely(flags & ~RWF_SUPPORTED))
return -EOPNOTSUPP;
if (flags & RWF_NOWAIT) {
if (!(ki->ki_filp->f_mode & FMODE_NOWAIT))
return -EOPNOTSUPP;
- ki->ki_flags |= IOCB_NOWAIT;
+ kiocb_flags |= IOCB_NOWAIT;
}
if (flags & RWF_HIPRI)
- ki->ki_flags |= IOCB_HIPRI;
+ kiocb_flags |= IOCB_HIPRI;
if (flags & RWF_DSYNC)
- ki->ki_flags |= IOCB_DSYNC;
+ kiocb_flags |= IOCB_DSYNC;
if (flags & RWF_SYNC)
- ki->ki_flags |= (IOCB_DSYNC | IOCB_SYNC);
+ kiocb_flags |= (IOCB_DSYNC | IOCB_SYNC);
if (flags & RWF_APPEND)
- ki->ki_flags |= IOCB_APPEND;
+ kiocb_flags |= IOCB_APPEND;
+
+ ki->ki_flags |= kiocb_flags;
return 0;
}
diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h
index cf2468da68e9..d1f4eff605ad 100644
--- a/include/linux/pagemap.h
+++ b/include/linux/pagemap.h
@@ -496,8 +496,35 @@ static inline pgoff_t linear_page_index(struct vm_area_struct *vma,
return pgoff;
}
+/* This has the same layout as wait_bit_key - see fs/cachefiles/rdwr.c */
+struct wait_page_key {
+ struct page *page;
+ int bit_nr;
+ int page_match;
+};
+
+struct wait_page_queue {
+ struct page *page;
+ int bit_nr;
+ wait_queue_entry_t wait;
+};
+
+static inline bool wake_page_match(struct wait_page_queue *wait_page,
+ struct wait_page_key *key)
+{
+ if (wait_page->page != key->page)
+ return false;
+ key->page_match = 1;
+
+ if (wait_page->bit_nr != key->bit_nr)
+ return false;
+
+ return true;
+}
+
extern void __lock_page(struct page *page);
extern int __lock_page_killable(struct page *page);
+extern int __lock_page_async(struct page *page, struct wait_page_queue *wait);
extern int __lock_page_or_retry(struct page *page, struct mm_struct *mm,
unsigned int flags);
extern void unlock_page(struct page *page);
@@ -535,6 +562,22 @@ static inline int lock_page_killable(struct page *page)
}
/*
+ * lock_page_async - Lock the page, unless this would block. If the page
+ * is already locked, then queue a callback when the page becomes unlocked.
+ * This callback can then retry the operation.
+ *
+ * Returns 0 if the page is locked successfully, or -EIOCBQUEUED if the page
+ * was already locked and the callback defined in 'wait' was queued.
+ */
+static inline int lock_page_async(struct page *page,
+ struct wait_page_queue *wait)
+{
+ if (!trylock_page(page))
+ return __lock_page_async(page, wait);
+ return 0;
+}
+
+/*
* lock_page_or_retry - Lock the page, unless this would block and the
* caller indicated that it can handle a retry.
*
diff --git a/include/linux/sched/task.h b/include/linux/sched/task.h
index 38359071236a..1301077f9c24 100644
--- a/include/linux/sched/task.h
+++ b/include/linux/sched/task.h
@@ -126,6 +126,12 @@ static inline void put_task_struct(struct task_struct *t)
__put_task_struct(t);
}
+static inline void put_task_struct_many(struct task_struct *t, int nr)
+{
+ if (refcount_sub_and_test(nr, &t->usage))
+ __put_task_struct(t);
+}
+
void put_task_struct_rcu_user(struct task_struct *task);
#ifdef CONFIG_ARCH_WANTS_DYNAMIC_TASK_STRUCT
diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h
index 7843742b8b74..d65fde732518 100644
--- a/include/uapi/linux/io_uring.h
+++ b/include/uapi/linux/io_uring.h
@@ -31,7 +31,8 @@ struct io_uring_sqe {
union {
__kernel_rwf_t rw_flags;
__u32 fsync_flags;
- __u16 poll_events;
+ __u16 poll_events; /* compatibility */
+ __u32 poll32_events; /* word-reversed for BE */
__u32 sync_range_flags;
__u32 msg_flags;
__u32 timeout_flags;
@@ -249,6 +250,7 @@ struct io_uring_params {
#define IORING_FEAT_RW_CUR_POS (1U << 3)
#define IORING_FEAT_CUR_PERSONALITY (1U << 4)
#define IORING_FEAT_FAST_POLL (1U << 5)
+#define IORING_FEAT_POLL_32BITS (1U << 6)
/*
* io_uring_register(2) opcodes and arguments