summaryrefslogtreecommitdiff
path: root/block/mq-deadline.c
diff options
context:
space:
mode:
Diffstat (limited to 'block/mq-deadline.c')
-rw-r--r--block/mq-deadline.c224
1 files changed, 145 insertions, 79 deletions
diff --git a/block/mq-deadline.c b/block/mq-deadline.c
index 7f3c3932b723..85d919bf60c7 100644
--- a/block/mq-deadline.c
+++ b/block/mq-deadline.c
@@ -9,7 +9,6 @@
#include <linux/fs.h>
#include <linux/blkdev.h>
#include <linux/blk-mq.h>
-#include <linux/elevator.h>
#include <linux/bio.h>
#include <linux/module.h>
#include <linux/slab.h>
@@ -20,6 +19,7 @@
#include <trace/events/block.h>
+#include "elevator.h"
#include "blk.h"
#include "blk-mq.h"
#include "blk-mq-debugfs.h"
@@ -31,6 +31,11 @@
*/
static const int read_expire = HZ / 2; /* max time before a read is submitted. */
static const int write_expire = 5 * HZ; /* ditto for writes, these limits are SOFT! */
+/*
+ * Time after which to dispatch lower priority requests even if higher
+ * priority requests are pending.
+ */
+static const int prio_aging_expire = 10 * HZ;
static const int writes_starved = 2; /* max times reads can starve a write */
static const int fifo_batch = 16; /* # of sequential requests treated as one
by the above parameters. For throughput. */
@@ -51,17 +56,16 @@ enum dd_prio {
enum { DD_PRIO_COUNT = 3 };
-/* I/O statistics per I/O priority. */
+/*
+ * I/O statistics per I/O priority. It is fine if these counters overflow.
+ * What matters is that these counters are at least as wide as
+ * log2(max_outstanding_requests).
+ */
struct io_stats_per_prio {
- local_t inserted;
- local_t merged;
- local_t dispatched;
- local_t completed;
-};
-
-/* I/O statistics for all I/O priorities (enum dd_prio). */
-struct io_stats {
- struct io_stats_per_prio stats[DD_PRIO_COUNT];
+ uint32_t inserted;
+ uint32_t merged;
+ uint32_t dispatched;
+ atomic_t completed;
};
/*
@@ -74,6 +78,7 @@ struct dd_per_prio {
struct list_head fifo_list[DD_DIR_COUNT];
/* Next request in FIFO order. Read, write or both are NULL. */
struct request *next_rq[DD_DIR_COUNT];
+ struct io_stats_per_prio stats;
};
struct deadline_data {
@@ -88,8 +93,6 @@ struct deadline_data {
unsigned int batching; /* number of sequential requests made */
unsigned int starved; /* times reads have starved writes */
- struct io_stats __percpu *stats;
-
/*
* settings that change how the i/o scheduler behaves
*/
@@ -98,38 +101,12 @@ struct deadline_data {
int writes_starved;
int front_merges;
u32 async_depth;
+ int prio_aging_expire;
spinlock_t lock;
spinlock_t zone_lock;
};
-/* Count one event of type 'event_type' and with I/O priority 'prio' */
-#define dd_count(dd, event_type, prio) do { \
- struct io_stats *io_stats = get_cpu_ptr((dd)->stats); \
- \
- BUILD_BUG_ON(!__same_type((dd), struct deadline_data *)); \
- BUILD_BUG_ON(!__same_type((prio), enum dd_prio)); \
- local_inc(&io_stats->stats[(prio)].event_type); \
- put_cpu_ptr(io_stats); \
-} while (0)
-
-/*
- * Returns the total number of dd_count(dd, event_type, prio) calls across all
- * CPUs. No locking or barriers since it is fine if the returned sum is slightly
- * outdated.
- */
-#define dd_sum(dd, event_type, prio) ({ \
- unsigned int cpu; \
- u32 sum = 0; \
- \
- BUILD_BUG_ON(!__same_type((dd), struct deadline_data *)); \
- BUILD_BUG_ON(!__same_type((prio), enum dd_prio)); \
- for_each_present_cpu(cpu) \
- sum += local_read(&per_cpu_ptr((dd)->stats, cpu)-> \
- stats[(prio)].event_type); \
- sum; \
-})
-
/* Maps an I/O priority class to a deadline scheduler priority. */
static const enum dd_prio ioprio_class_to_prio[] = {
[IOPRIO_CLASS_NONE] = DD_BE_PRIO,
@@ -233,7 +210,9 @@ static void dd_merged_requests(struct request_queue *q, struct request *req,
const u8 ioprio_class = dd_rq_ioclass(next);
const enum dd_prio prio = ioprio_class_to_prio[ioprio_class];
- dd_count(dd, merged, prio);
+ lockdep_assert_held(&dd->lock);
+
+ dd->per_prio[prio].stats.merged++;
/*
* if next expires before rq, assign its expire time to rq
@@ -270,6 +249,16 @@ deadline_move_request(struct deadline_data *dd, struct dd_per_prio *per_prio,
deadline_remove_request(rq->q, per_prio, rq);
}
+/* Number of requests queued for a given priority level. */
+static u32 dd_queued(struct deadline_data *dd, enum dd_prio prio)
+{
+ const struct io_stats_per_prio *stats = &dd->per_prio[prio].stats;
+
+ lockdep_assert_held(&dd->lock);
+
+ return stats->inserted - atomic_read(&stats->completed);
+}
+
/*
* deadline_check_fifo returns 0 if there are no expired requests on the fifo,
* 1 otherwise. Requires !list_empty(&dd->fifo_list[data_dir])
@@ -356,11 +345,26 @@ deadline_next_request(struct deadline_data *dd, struct dd_per_prio *per_prio,
}
/*
+ * Returns true if and only if @rq started after @latest_start where
+ * @latest_start is in jiffies.
+ */
+static bool started_after(struct deadline_data *dd, struct request *rq,
+ unsigned long latest_start)
+{
+ unsigned long start_time = (unsigned long)rq->fifo_time;
+
+ start_time -= dd->fifo_expire[rq_data_dir(rq)];
+
+ return time_after(start_time, latest_start);
+}
+
+/*
* deadline_dispatch_requests selects the best request according to
- * read/write expire, fifo_batch, etc
+ * read/write expire, fifo_batch, etc and with a start time <= @latest_start.
*/
static struct request *__dd_dispatch_request(struct deadline_data *dd,
- struct dd_per_prio *per_prio)
+ struct dd_per_prio *per_prio,
+ unsigned long latest_start)
{
struct request *rq, *next_rq;
enum dd_data_dir data_dir;
@@ -372,6 +376,8 @@ static struct request *__dd_dispatch_request(struct deadline_data *dd,
if (!list_empty(&per_prio->dispatch)) {
rq = list_first_entry(&per_prio->dispatch, struct request,
queuelist);
+ if (started_after(dd, rq, latest_start))
+ return NULL;
list_del_init(&rq->queuelist);
goto done;
}
@@ -449,6 +455,9 @@ dispatch_find_request:
dd->batching = 0;
dispatch_request:
+ if (started_after(dd, rq, latest_start))
+ return NULL;
+
/*
* rq is the selected appropriate request.
*/
@@ -457,7 +466,7 @@ dispatch_request:
done:
ioprio_class = dd_rq_ioclass(rq);
prio = ioprio_class_to_prio[ioprio_class];
- dd_count(dd, dispatched, prio);
+ dd->per_prio[prio].stats.dispatched++;
/*
* If the request needs its target zone locked, do it.
*/
@@ -467,6 +476,34 @@ done:
}
/*
+ * Check whether there are any requests with priority other than DD_RT_PRIO
+ * that were inserted more than prio_aging_expire jiffies ago.
+ */
+static struct request *dd_dispatch_prio_aged_requests(struct deadline_data *dd,
+ unsigned long now)
+{
+ struct request *rq;
+ enum dd_prio prio;
+ int prio_cnt;
+
+ lockdep_assert_held(&dd->lock);
+
+ prio_cnt = !!dd_queued(dd, DD_RT_PRIO) + !!dd_queued(dd, DD_BE_PRIO) +
+ !!dd_queued(dd, DD_IDLE_PRIO);
+ if (prio_cnt < 2)
+ return NULL;
+
+ for (prio = DD_BE_PRIO; prio <= DD_PRIO_MAX; prio++) {
+ rq = __dd_dispatch_request(dd, &dd->per_prio[prio],
+ now - dd->prio_aging_expire);
+ if (rq)
+ return rq;
+ }
+
+ return NULL;
+}
+
+/*
* Called from blk_mq_run_hw_queue() -> __blk_mq_sched_dispatch_requests().
*
* One confusing aspect here is that we get called for a specific
@@ -477,15 +514,26 @@ done:
static struct request *dd_dispatch_request(struct blk_mq_hw_ctx *hctx)
{
struct deadline_data *dd = hctx->queue->elevator->elevator_data;
+ const unsigned long now = jiffies;
struct request *rq;
enum dd_prio prio;
spin_lock(&dd->lock);
+ rq = dd_dispatch_prio_aged_requests(dd, now);
+ if (rq)
+ goto unlock;
+
+ /*
+ * Next, dispatch requests in priority order. Ignore lower priority
+ * requests if any higher priority requests are pending.
+ */
for (prio = 0; prio <= DD_PRIO_MAX; prio++) {
- rq = __dd_dispatch_request(dd, &dd->per_prio[prio]);
- if (rq)
+ rq = __dd_dispatch_request(dd, &dd->per_prio[prio], now);
+ if (rq || dd_queued(dd, prio))
break;
}
+
+unlock:
spin_unlock(&dd->lock);
return rq;
@@ -519,7 +567,7 @@ static void dd_depth_updated(struct blk_mq_hw_ctx *hctx)
dd->async_depth = max(1UL, 3 * q->nr_requests / 4);
- sbitmap_queue_min_shallow_depth(tags->bitmap_tags, dd->async_depth);
+ sbitmap_queue_min_shallow_depth(&tags->bitmap_tags, dd->async_depth);
}
/* Called by blk_mq_init_hctx() and blk_mq_init_sched(). */
@@ -536,12 +584,21 @@ static void dd_exit_sched(struct elevator_queue *e)
for (prio = 0; prio <= DD_PRIO_MAX; prio++) {
struct dd_per_prio *per_prio = &dd->per_prio[prio];
+ const struct io_stats_per_prio *stats = &per_prio->stats;
+ uint32_t queued;
WARN_ON_ONCE(!list_empty(&per_prio->fifo_list[DD_READ]));
WARN_ON_ONCE(!list_empty(&per_prio->fifo_list[DD_WRITE]));
- }
- free_percpu(dd->stats);
+ spin_lock(&dd->lock);
+ queued = dd_queued(dd, prio);
+ spin_unlock(&dd->lock);
+
+ WARN_ONCE(queued != 0,
+ "statistics for priority %d: i %u m %u d %u c %u\n",
+ prio, stats->inserted, stats->merged,
+ stats->dispatched, atomic_read(&stats->completed));
+ }
kfree(dd);
}
@@ -566,11 +623,6 @@ static int dd_init_sched(struct request_queue *q, struct elevator_type *e)
eq->elevator_data = dd;
- dd->stats = alloc_percpu_gfp(typeof(*dd->stats),
- GFP_KERNEL | __GFP_ZERO);
- if (!dd->stats)
- goto free_dd;
-
for (prio = 0; prio <= DD_PRIO_MAX; prio++) {
struct dd_per_prio *per_prio = &dd->per_prio[prio];
@@ -586,15 +638,13 @@ static int dd_init_sched(struct request_queue *q, struct elevator_type *e)
dd->front_merges = 1;
dd->last_dir = DD_WRITE;
dd->fifo_batch = fifo_batch;
+ dd->prio_aging_expire = prio_aging_expire;
spin_lock_init(&dd->lock);
spin_lock_init(&dd->zone_lock);
q->elevator = eq;
return 0;
-free_dd:
- kfree(dd);
-
put_eq:
kobject_put(&eq->kobj);
return ret;
@@ -677,8 +727,11 @@ static void dd_insert_request(struct blk_mq_hw_ctx *hctx, struct request *rq,
blk_req_zone_write_unlock(rq);
prio = ioprio_class_to_prio[ioprio_class];
- dd_count(dd, inserted, prio);
- rq->elv.priv[0] = (void *)(uintptr_t)1;
+ per_prio = &dd->per_prio[prio];
+ if (!rq->elv.priv[0]) {
+ per_prio->stats.inserted++;
+ rq->elv.priv[0] = (void *)(uintptr_t)1;
+ }
if (blk_mq_sched_try_insert_merge(q, rq, &free)) {
blk_mq_free_requests(&free);
@@ -687,7 +740,6 @@ static void dd_insert_request(struct blk_mq_hw_ctx *hctx, struct request *rq,
trace_block_rq_insert(rq);
- per_prio = &dd->per_prio[prio];
if (at_head) {
list_add(&rq->queuelist, &per_prio->dispatch);
} else {
@@ -759,12 +811,13 @@ static void dd_finish_request(struct request *rq)
/*
* The block layer core may call dd_finish_request() without having
- * called dd_insert_requests(). Hence only update statistics for
- * requests for which dd_insert_requests() has been called. See also
- * blk_mq_request_bypass_insert().
+ * called dd_insert_requests(). Skip requests that bypassed I/O
+ * scheduling. See also blk_mq_request_bypass_insert().
*/
- if (rq->elv.priv[0])
- dd_count(dd, completed, prio);
+ if (!rq->elv.priv[0])
+ return;
+
+ atomic_inc(&per_prio->stats.completed);
if (blk_queue_is_zoned(q)) {
unsigned long flags;
@@ -809,6 +862,7 @@ static ssize_t __FUNC(struct elevator_queue *e, char *page) \
#define SHOW_JIFFIES(__FUNC, __VAR) SHOW_INT(__FUNC, jiffies_to_msecs(__VAR))
SHOW_JIFFIES(deadline_read_expire_show, dd->fifo_expire[DD_READ]);
SHOW_JIFFIES(deadline_write_expire_show, dd->fifo_expire[DD_WRITE]);
+SHOW_JIFFIES(deadline_prio_aging_expire_show, dd->prio_aging_expire);
SHOW_INT(deadline_writes_starved_show, dd->writes_starved);
SHOW_INT(deadline_front_merges_show, dd->front_merges);
SHOW_INT(deadline_async_depth_show, dd->front_merges);
@@ -838,6 +892,7 @@ static ssize_t __FUNC(struct elevator_queue *e, const char *page, size_t count)
STORE_FUNCTION(__FUNC, __PTR, MIN, MAX, msecs_to_jiffies)
STORE_JIFFIES(deadline_read_expire_store, &dd->fifo_expire[DD_READ], 0, INT_MAX);
STORE_JIFFIES(deadline_write_expire_store, &dd->fifo_expire[DD_WRITE], 0, INT_MAX);
+STORE_JIFFIES(deadline_prio_aging_expire_store, &dd->prio_aging_expire, 0, INT_MAX);
STORE_INT(deadline_writes_starved_store, &dd->writes_starved, INT_MIN, INT_MAX);
STORE_INT(deadline_front_merges_store, &dd->front_merges, 0, 1);
STORE_INT(deadline_async_depth_store, &dd->front_merges, 1, INT_MAX);
@@ -856,6 +911,7 @@ static struct elv_fs_entry deadline_attrs[] = {
DD_ATTR(front_merges),
DD_ATTR(async_depth),
DD_ATTR(fifo_batch),
+ DD_ATTR(prio_aging_expire),
__ATTR_NULL
};
@@ -947,38 +1003,48 @@ static int dd_async_depth_show(void *data, struct seq_file *m)
return 0;
}
-/* Number of requests queued for a given priority level. */
-static u32 dd_queued(struct deadline_data *dd, enum dd_prio prio)
-{
- return dd_sum(dd, inserted, prio) - dd_sum(dd, completed, prio);
-}
-
static int dd_queued_show(void *data, struct seq_file *m)
{
struct request_queue *q = data;
struct deadline_data *dd = q->elevator->elevator_data;
+ u32 rt, be, idle;
+
+ spin_lock(&dd->lock);
+ rt = dd_queued(dd, DD_RT_PRIO);
+ be = dd_queued(dd, DD_BE_PRIO);
+ idle = dd_queued(dd, DD_IDLE_PRIO);
+ spin_unlock(&dd->lock);
+
+ seq_printf(m, "%u %u %u\n", rt, be, idle);
- seq_printf(m, "%u %u %u\n", dd_queued(dd, DD_RT_PRIO),
- dd_queued(dd, DD_BE_PRIO),
- dd_queued(dd, DD_IDLE_PRIO));
return 0;
}
/* Number of requests owned by the block driver for a given priority. */
static u32 dd_owned_by_driver(struct deadline_data *dd, enum dd_prio prio)
{
- return dd_sum(dd, dispatched, prio) + dd_sum(dd, merged, prio)
- - dd_sum(dd, completed, prio);
+ const struct io_stats_per_prio *stats = &dd->per_prio[prio].stats;
+
+ lockdep_assert_held(&dd->lock);
+
+ return stats->dispatched + stats->merged -
+ atomic_read(&stats->completed);
}
static int dd_owned_by_driver_show(void *data, struct seq_file *m)
{
struct request_queue *q = data;
struct deadline_data *dd = q->elevator->elevator_data;
+ u32 rt, be, idle;
+
+ spin_lock(&dd->lock);
+ rt = dd_owned_by_driver(dd, DD_RT_PRIO);
+ be = dd_owned_by_driver(dd, DD_BE_PRIO);
+ idle = dd_owned_by_driver(dd, DD_IDLE_PRIO);
+ spin_unlock(&dd->lock);
+
+ seq_printf(m, "%u %u %u\n", rt, be, idle);
- seq_printf(m, "%u %u %u\n", dd_owned_by_driver(dd, DD_RT_PRIO),
- dd_owned_by_driver(dd, DD_BE_PRIO),
- dd_owned_by_driver(dd, DD_IDLE_PRIO));
return 0;
}