summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorCon Kolivas <kernel@kolivas.org>2016-10-20 15:36:26 +1100
committerCon Kolivas <kernel@kolivas.org>2016-10-20 15:36:26 +1100
commit86f11edf0eb94f4ca3219cce6fc3ea046c10120d (patch)
tree55b9d11ccb1e827dc515acc20a8ccfcba1539e3c
parentf6a0595a5c51590598d801e686d840f27970ace1 (diff)
wb-buf-throttle-v7.patch
-rw-r--r--Documentation/block/queue-sysfs.txt13
-rw-r--r--block/Kconfig1
-rw-r--r--block/Makefile2
-rw-r--r--block/blk-core.c22
-rw-r--r--block/blk-mq-sysfs.c47
-rw-r--r--block/blk-mq.c42
-rw-r--r--block/blk-mq.h3
-rw-r--r--block/blk-settings.c15
-rw-r--r--block/blk-sysfs.c151
-rw-r--r--block/cfq-iosched.c12
-rw-r--r--drivers/scsi/scsi.c3
-rw-r--r--fs/buffer.c2
-rw-r--r--fs/f2fs/data.c2
-rw-r--r--fs/f2fs/node.c2
-rw-r--r--fs/gfs2/meta_io.c3
-rw-r--r--fs/mpage.c2
-rw-r--r--fs/xfs/xfs_aops.c7
-rw-r--r--include/linux/backing-dev-defs.h2
-rw-r--r--include/linux/blk_types.h16
-rw-r--r--include/linux/blkdev.h19
-rw-r--r--include/linux/fs.h3
-rw-r--r--include/linux/writeback.h10
-rw-r--r--lib/Kconfig3
-rw-r--r--lib/Makefile1
-rw-r--r--mm/backing-dev.c1
-rw-r--r--mm/page-writeback.c1
26 files changed, 369 insertions, 16 deletions
diff --git a/Documentation/block/queue-sysfs.txt b/Documentation/block/queue-sysfs.txt
index 2a3904030dea..2847219ebd8c 100644
--- a/Documentation/block/queue-sysfs.txt
+++ b/Documentation/block/queue-sysfs.txt
@@ -169,5 +169,18 @@ This is the number of bytes the device can write in a single write-same
command. A value of '0' means write-same is not supported by this
device.
+wb_lat_usec (RW)
+----------------
+If the device is registered for writeback throttling, then this file shows
+the target minimum read latency. If this latency is exceeded in a given
+window of time (see wb_window_usec), then the writeback throttling will start
+scaling back writes.
+
+wb_window_usec (RW)
+-------------------
+If the device is registered for writeback throttling, then this file shows
+the value of the monitoring window in which we'll look at the target
+latency. See wb_lat_usec.
+
Jens Axboe <jens.axboe@oracle.com>, February 2009
diff --git a/block/Kconfig b/block/Kconfig
index 161491d0a879..6da79e670709 100644
--- a/block/Kconfig
+++ b/block/Kconfig
@@ -4,6 +4,7 @@
menuconfig BLOCK
bool "Enable the block layer" if EXPERT
default y
+ select WBT
help
Provide block layer support for the kernel.
diff --git a/block/Makefile b/block/Makefile
index 9eda2322b2d4..3446e0472df0 100644
--- a/block/Makefile
+++ b/block/Makefile
@@ -5,7 +5,7 @@
obj-$(CONFIG_BLOCK) := bio.o elevator.o blk-core.o blk-tag.o blk-sysfs.o \
blk-flush.o blk-settings.o blk-ioc.o blk-map.o \
blk-exec.o blk-merge.o blk-softirq.o blk-timeout.o \
- blk-lib.o blk-mq.o blk-mq-tag.o \
+ blk-lib.o blk-mq.o blk-mq-tag.o blk-stat.o \
blk-mq-sysfs.o blk-mq-cpu.o blk-mq-cpumap.o ioctl.o \
genhd.o scsi_ioctl.o partition-generic.o ioprio.o \
badblocks.o partitions/
diff --git a/block/blk-core.c b/block/blk-core.c
index 36c7ac328d8c..4f4ce050290c 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -33,6 +33,7 @@
#include <linux/ratelimit.h>
#include <linux/pm_runtime.h>
#include <linux/blk-cgroup.h>
+#include <linux/wbt.h>
#define CREATE_TRACE_POINTS
#include <trace/events/block.h>
@@ -882,6 +883,8 @@ blk_init_allocated_queue(struct request_queue *q, request_fn_proc *rfn,
fail:
blk_free_flush_queue(q->fq);
+ wbt_exit(q->rq_wb);
+ q->rq_wb = NULL;
return NULL;
}
EXPORT_SYMBOL(blk_init_allocated_queue);
@@ -1346,6 +1349,7 @@ void blk_requeue_request(struct request_queue *q, struct request *rq)
blk_delete_timer(rq);
blk_clear_rq_complete(rq);
trace_block_rq_requeue(q, rq);
+ wbt_requeue(q->rq_wb, &rq->wb_stat);
if (rq->cmd_flags & REQ_QUEUED)
blk_queue_end_tag(q, rq);
@@ -1436,6 +1440,8 @@ void __blk_put_request(struct request_queue *q, struct request *req)
/* this is a bio leak */
WARN_ON(req->bio != NULL);
+ wbt_done(q->rq_wb, &req->wb_stat);
+
/*
* Request may not have originated from ll_rw_blk. if not,
* it didn't come out of our reserved rq pools
@@ -1667,6 +1673,7 @@ static blk_qc_t blk_queue_bio(struct request_queue *q, struct bio *bio)
int el_ret, rw_flags = 0, where = ELEVATOR_INSERT_SORT;
struct request *req;
unsigned int request_count = 0;
+ unsigned int wb_acct;
/*
* low level driver can indicate that it wants pages above a
@@ -1719,6 +1726,8 @@ static blk_qc_t blk_queue_bio(struct request_queue *q, struct bio *bio)
}
get_rq:
+ wb_acct = wbt_wait(q->rq_wb, bio->bi_opf, q->queue_lock);
+
/*
* This sync check and mask will be re-done in init_request_from_bio(),
* but we need to set it earlier to expose the sync flag to the
@@ -1738,11 +1747,15 @@ get_rq:
*/
req = get_request(q, bio_data_dir(bio), rw_flags, bio, GFP_NOIO);
if (IS_ERR(req)) {
+ if (wb_acct & WBT_TRACKED)
+ __wbt_done(q->rq_wb);
bio->bi_error = PTR_ERR(req);
bio_endio(bio);
goto out_unlock;
}
+ wbt_track(&req->wb_stat, wb_acct);
+
/*
* After dropping the lock and possibly sleeping here, our request
* may now be mergeable after it had proven unmergeable (above).
@@ -2475,6 +2488,8 @@ void blk_start_request(struct request *req)
{
blk_dequeue_request(req);
+ wbt_issue(req->q->rq_wb, &req->wb_stat);
+
/*
* We are now handing the request to the hardware, initialize
* resid_len to full count and add the timeout handler.
@@ -2542,6 +2557,8 @@ bool blk_update_request(struct request *req, int error, unsigned int nr_bytes)
trace_block_rq_complete(req->q, req, nr_bytes);
+ blk_stat_add(&req->q->rq_stats[rq_data_dir(req)], req);
+
if (!req->bio)
return false;
@@ -2709,9 +2726,10 @@ void blk_finish_request(struct request *req, int error)
blk_account_io_done(req);
- if (req->end_io)
+ if (req->end_io) {
+ wbt_done(req->q->rq_wb, &req->wb_stat);
req->end_io(req, error);
- else {
+ } else {
if (blk_bidi_rq(req))
__blk_put_request(req->next_rq->q, req->next_rq);
diff --git a/block/blk-mq-sysfs.c b/block/blk-mq-sysfs.c
index fe822aa5b8e4..b66bbf13cc12 100644
--- a/block/blk-mq-sysfs.c
+++ b/block/blk-mq-sysfs.c
@@ -247,6 +247,47 @@ static ssize_t blk_mq_hw_sysfs_cpus_show(struct blk_mq_hw_ctx *hctx, char *page)
return ret;
}
+static void blk_mq_stat_clear(struct blk_mq_hw_ctx *hctx)
+{
+ struct blk_mq_ctx *ctx;
+ unsigned int i;
+
+ hctx_for_each_ctx(hctx, ctx, i) {
+ blk_stat_init(&ctx->stat[0]);
+ blk_stat_init(&ctx->stat[1]);
+ }
+}
+
+static ssize_t blk_mq_hw_sysfs_stat_store(struct blk_mq_hw_ctx *hctx,
+ const char *page, size_t count)
+{
+ blk_mq_stat_clear(hctx);
+ return count;
+}
+
+static ssize_t print_stat(char *page, struct blk_rq_stat *stat, const char *pre)
+{
+ return sprintf(page, "%s samples=%llu, mean=%lld, min=%lld, max=%lld\n",
+ pre, (long long) stat->nr_samples,
+ (long long) stat->mean, (long long) stat->min,
+ (long long) stat->max);
+}
+
+static ssize_t blk_mq_hw_sysfs_stat_show(struct blk_mq_hw_ctx *hctx, char *page)
+{
+ struct blk_rq_stat stat[2];
+ ssize_t ret;
+
+ blk_stat_init(&stat[0]);
+ blk_stat_init(&stat[1]);
+
+ blk_hctx_stat_get(hctx, stat);
+
+ ret = print_stat(page, &stat[0], "read :");
+ ret += print_stat(page + ret, &stat[1], "write:");
+ return ret;
+}
+
static struct blk_mq_ctx_sysfs_entry blk_mq_sysfs_dispatched = {
.attr = {.name = "dispatched", .mode = S_IRUGO },
.show = blk_mq_sysfs_dispatched_show,
@@ -304,6 +345,11 @@ static struct blk_mq_hw_ctx_sysfs_entry blk_mq_hw_sysfs_poll = {
.attr = {.name = "io_poll", .mode = S_IRUGO },
.show = blk_mq_hw_sysfs_poll_show,
};
+static struct blk_mq_hw_ctx_sysfs_entry blk_mq_hw_sysfs_stat = {
+ .attr = {.name = "stats", .mode = S_IRUGO | S_IWUSR },
+ .show = blk_mq_hw_sysfs_stat_show,
+ .store = blk_mq_hw_sysfs_stat_store,
+};
static struct attribute *default_hw_ctx_attrs[] = {
&blk_mq_hw_sysfs_queued.attr,
@@ -314,6 +360,7 @@ static struct attribute *default_hw_ctx_attrs[] = {
&blk_mq_hw_sysfs_cpus.attr,
&blk_mq_hw_sysfs_active.attr,
&blk_mq_hw_sysfs_poll.attr,
+ &blk_mq_hw_sysfs_stat.attr,
NULL,
};
diff --git a/block/blk-mq.c b/block/blk-mq.c
index c207fa9870eb..44d55195476d 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -22,6 +22,7 @@
#include <linux/sched/sysctl.h>
#include <linux/delay.h>
#include <linux/crash_dump.h>
+#include <linux/wbt.h>
#include <trace/events/block.h>
@@ -29,6 +30,7 @@
#include "blk.h"
#include "blk-mq.h"
#include "blk-mq-tag.h"
+#include "blk-stat.h"
static DEFINE_MUTEX(all_q_mutex);
static LIST_HEAD(all_q_list);
@@ -330,6 +332,8 @@ static void __blk_mq_free_request(struct blk_mq_hw_ctx *hctx,
if (rq->cmd_flags & REQ_MQ_INFLIGHT)
atomic_dec(&hctx->nr_active);
+
+ wbt_done(q->rq_wb, &rq->wb_stat);
rq->cmd_flags = 0;
clear_bit(REQ_ATOM_STARTED, &rq->atomic_flags);
@@ -362,6 +366,7 @@ inline void __blk_mq_end_request(struct request *rq, int error)
blk_account_io_done(rq);
if (rq->end_io) {
+ wbt_done(rq->q->rq_wb, &rq->wb_stat);
rq->end_io(rq, error);
} else {
if (unlikely(blk_bidi_rq(rq)))
@@ -412,10 +417,19 @@ static void blk_mq_ipi_complete_request(struct request *rq)
put_cpu();
}
+static void blk_mq_stat_add(struct request *rq)
+{
+ struct blk_rq_stat *stat = &rq->mq_ctx->stat[rq_data_dir(rq)];
+
+ blk_stat_add(stat, rq);
+}
+
static void __blk_mq_complete_request(struct request *rq)
{
struct request_queue *q = rq->q;
+ blk_mq_stat_add(rq);
+
if (!q->softirq_done_fn)
blk_mq_end_request(rq, rq->errors);
else
@@ -459,6 +473,8 @@ void blk_mq_start_request(struct request *rq)
if (unlikely(blk_bidi_rq(rq)))
rq->next_rq->resid_len = blk_rq_bytes(rq->next_rq);
+ wbt_issue(q->rq_wb, &rq->wb_stat);
+
blk_add_timer(rq);
/*
@@ -494,6 +510,7 @@ static void __blk_mq_requeue_request(struct request *rq)
struct request_queue *q = rq->q;
trace_block_rq_requeue(q, rq);
+ wbt_requeue(q->rq_wb, &rq->wb_stat);
if (test_and_clear_bit(REQ_ATOM_STARTED, &rq->atomic_flags)) {
if (q->dma_drain_size && blk_rq_bytes(rq))
@@ -1312,6 +1329,7 @@ static blk_qc_t blk_mq_make_request(struct request_queue *q, struct bio *bio)
struct blk_plug *plug;
struct request *same_queue_rq = NULL;
blk_qc_t cookie;
+ unsigned int wb_acct;
blk_queue_bounce(q, &bio);
@@ -1326,9 +1344,16 @@ static blk_qc_t blk_mq_make_request(struct request_queue *q, struct bio *bio)
blk_attempt_plug_merge(q, bio, &request_count, &same_queue_rq))
return BLK_QC_T_NONE;
+ wb_acct = wbt_wait(q->rq_wb, bio->bi_opf, NULL);
+
rq = blk_mq_map_request(q, bio, &data);
- if (unlikely(!rq))
+ if (unlikely(!rq)) {
+ if (wb_acct & WBT_TRACKED)
+ __wbt_done(q->rq_wb);
return BLK_QC_T_NONE;
+ }
+
+ wbt_track(&rq->wb_stat, wb_acct);
cookie = blk_tag_to_qc_t(rq->tag, data.hctx->queue_num);
@@ -1405,6 +1430,7 @@ static blk_qc_t blk_sq_make_request(struct request_queue *q, struct bio *bio)
struct blk_map_ctx data;
struct request *rq;
blk_qc_t cookie;
+ unsigned int wb_acct;
blk_queue_bounce(q, &bio);
@@ -1421,9 +1447,16 @@ static blk_qc_t blk_sq_make_request(struct request_queue *q, struct bio *bio)
} else
request_count = blk_plug_queued_count(q);
+ wb_acct = wbt_wait(q->rq_wb, bio->bi_opf, NULL);
+
rq = blk_mq_map_request(q, bio, &data);
- if (unlikely(!rq))
+ if (unlikely(!rq)) {
+ if (wb_acct & WBT_TRACKED)
+ __wbt_done(q->rq_wb);
return BLK_QC_T_NONE;
+ }
+
+ wbt_track(&rq->wb_stat, wb_acct);
cookie = blk_tag_to_qc_t(rq->tag, data.hctx->queue_num);
@@ -1807,6 +1840,8 @@ static void blk_mq_init_cpu_queues(struct request_queue *q,
spin_lock_init(&__ctx->lock);
INIT_LIST_HEAD(&__ctx->rq_list);
__ctx->queue = q;
+ blk_stat_init(&__ctx->stat[0]);
+ blk_stat_init(&__ctx->stat[1]);
/* If the cpu isn't online, the cpu is mapped to first hctx */
if (!cpu_online(i))
@@ -2145,6 +2180,9 @@ void blk_mq_free_queue(struct request_queue *q)
list_del_init(&q->all_q_node);
mutex_unlock(&all_q_mutex);
+ wbt_exit(q->rq_wb);
+ q->rq_wb = NULL;
+
blk_mq_del_queue_tag_set(q);
blk_mq_exit_hw_queues(q, set, set->nr_hw_queues);
diff --git a/block/blk-mq.h b/block/blk-mq.h
index 9087b11037b7..e107f700ff17 100644
--- a/block/blk-mq.h
+++ b/block/blk-mq.h
@@ -1,6 +1,8 @@
#ifndef INT_BLK_MQ_H
#define INT_BLK_MQ_H
+#include "blk-stat.h"
+
struct blk_mq_tag_set;
struct blk_mq_ctx {
@@ -20,6 +22,7 @@ struct blk_mq_ctx {
/* incremented at completion time */
unsigned long ____cacheline_aligned_in_smp rq_completed[2];
+ struct blk_rq_stat stat[2];
struct request_queue *queue;
struct kobject kobj;
diff --git a/block/blk-settings.c b/block/blk-settings.c
index f679ae122843..746dc9fee1ac 100644
--- a/block/blk-settings.c
+++ b/block/blk-settings.c
@@ -832,6 +832,19 @@ void blk_queue_flush_queueable(struct request_queue *q, bool queueable)
EXPORT_SYMBOL_GPL(blk_queue_flush_queueable);
/**
+ * blk_set_queue_depth - tell the block layer about the device queue depth
+ * @q: the request queue for the device
+ * @depth: queue depth
+ *
+ */
+void blk_set_queue_depth(struct request_queue *q, unsigned int depth)
+{
+ q->queue_depth = depth;
+ wbt_set_queue_depth(q->rq_wb, depth);
+}
+EXPORT_SYMBOL(blk_set_queue_depth);
+
+/**
* blk_queue_write_cache - configure queue's write cache
* @q: the request queue for the device
* @wc: write back cache on or off
@@ -851,6 +864,8 @@ void blk_queue_write_cache(struct request_queue *q, bool wc, bool fua)
else
queue_flag_clear(QUEUE_FLAG_FUA, q);
spin_unlock_irq(q->queue_lock);
+
+ wbt_set_write_cache(q->rq_wb, test_bit(QUEUE_FLAG_WC, &q->queue_flags));
}
EXPORT_SYMBOL_GPL(blk_queue_write_cache);
diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c
index f87a7e747d36..85c3dc22307b 100644
--- a/block/blk-sysfs.c
+++ b/block/blk-sysfs.c
@@ -10,6 +10,7 @@
#include <linux/blktrace_api.h>
#include <linux/blk-mq.h>
#include <linux/blk-cgroup.h>
+#include <linux/wbt.h>
#include "blk.h"
#include "blk-mq.h"
@@ -41,6 +42,19 @@ queue_var_store(unsigned long *var, const char *page, size_t count)
return count;
}
+static ssize_t queue_var_store64(u64 *var, const char *page)
+{
+ int err;
+ u64 v;
+
+ err = kstrtou64(page, 10, &v);
+ if (err < 0)
+ return err;
+
+ *var = v;
+ return 0;
+}
+
static ssize_t queue_requests_show(struct request_queue *q, char *page)
{
return queue_var_show(q->nr_requests, (page));
@@ -347,6 +361,58 @@ static ssize_t queue_poll_store(struct request_queue *q, const char *page,
return ret;
}
+static ssize_t queue_wb_win_show(struct request_queue *q, char *page)
+{
+ if (!q->rq_wb)
+ return -EINVAL;
+
+ return sprintf(page, "%llu\n", div_u64(q->rq_wb->win_nsec, 1000));
+}
+
+static ssize_t queue_wb_win_store(struct request_queue *q, const char *page,
+ size_t count)
+{
+ ssize_t ret;
+ u64 val;
+
+ if (!q->rq_wb)
+ return -EINVAL;
+
+ ret = queue_var_store64(&val, page);
+ if (ret < 0)
+ return ret;
+
+ q->rq_wb->win_nsec = val * 1000ULL;
+ wbt_update_limits(q->rq_wb);
+ return count;
+}
+
+static ssize_t queue_wb_lat_show(struct request_queue *q, char *page)
+{
+ if (!q->rq_wb)
+ return -EINVAL;
+
+ return sprintf(page, "%llu\n", div_u64(q->rq_wb->min_lat_nsec, 1000));
+}
+
+static ssize_t queue_wb_lat_store(struct request_queue *q, const char *page,
+ size_t count)
+{
+ ssize_t ret;
+ u64 val;
+
+ if (!q->rq_wb)
+ return -EINVAL;
+
+ ret = queue_var_store64(&val, page);
+ if (ret < 0)
+ return ret;
+
+ q->rq_wb->min_lat_nsec = val * 1000ULL;
+ wbt_update_limits(q->rq_wb);
+ return count;
+}
+
static ssize_t queue_wc_show(struct request_queue *q, char *page)
{
if (test_bit(QUEUE_FLAG_WC, &q->queue_flags))
@@ -384,6 +450,26 @@ static ssize_t queue_dax_show(struct request_queue *q, char *page)
return queue_var_show(blk_queue_dax(q), page);
}
+static ssize_t print_stat(char *page, struct blk_rq_stat *stat, const char *pre)
+{
+ return sprintf(page, "%s samples=%llu, mean=%lld, min=%lld, max=%lld\n",
+ pre, (long long) stat->nr_samples,
+ (long long) stat->mean, (long long) stat->min,
+ (long long) stat->max);
+}
+
+static ssize_t queue_stats_show(struct request_queue *q, char *page)
+{
+ struct blk_rq_stat stat[2];
+ ssize_t ret;
+
+ blk_queue_stat_get(q, stat);
+
+ ret = print_stat(page, &stat[0], "read :");
+ ret += print_stat(page + ret, &stat[1], "write:");
+ return ret;
+}
+
static struct queue_sysfs_entry queue_requests_entry = {
.attr = {.name = "nr_requests", .mode = S_IRUGO | S_IWUSR },
.show = queue_requests_show,
@@ -526,6 +612,23 @@ static struct queue_sysfs_entry queue_dax_entry = {
.show = queue_dax_show,
};
+static struct queue_sysfs_entry queue_stats_entry = {
+ .attr = {.name = "stats", .mode = S_IRUGO },
+ .show = queue_stats_show,
+};
+
+static struct queue_sysfs_entry queue_wb_lat_entry = {
+ .attr = {.name = "wbt_lat_usec", .mode = S_IRUGO | S_IWUSR },
+ .show = queue_wb_lat_show,
+ .store = queue_wb_lat_store,
+};
+
+static struct queue_sysfs_entry queue_wb_win_entry = {
+ .attr = {.name = "wbt_window_usec", .mode = S_IRUGO | S_IWUSR },
+ .show = queue_wb_win_show,
+ .store = queue_wb_win_store,
+};
+
static struct attribute *default_attrs[] = {
&queue_requests_entry.attr,
&queue_ra_entry.attr,
@@ -553,6 +656,9 @@ static struct attribute *default_attrs[] = {
&queue_poll_entry.attr,
&queue_wc_entry.attr,
&queue_dax_entry.attr,
+ &queue_stats_entry.attr,
+ &queue_wb_lat_entry.attr,
+ &queue_wb_win_entry.attr,
NULL,
};
@@ -667,6 +773,49 @@ struct kobj_type blk_queue_ktype = {
.release = blk_release_queue,
};
+static void blk_wb_stat_get(void *data, struct blk_rq_stat *stat)
+{
+ blk_queue_stat_get(data, stat);
+}
+
+static void blk_wb_stat_clear(void *data)
+{
+ blk_stat_clear(data);
+}
+
+static bool blk_wb_stat_is_current(struct blk_rq_stat *stat)
+{
+ return blk_stat_is_current(stat);
+}
+
+static struct wb_stat_ops wb_stat_ops = {
+ .get = blk_wb_stat_get,
+ .is_current = blk_wb_stat_is_current,
+ .clear = blk_wb_stat_clear,
+};
+
+static void blk_wb_init(struct request_queue *q)
+{
+ struct rq_wb *rwb;
+
+ rwb = wbt_init(&q->backing_dev_info, &wb_stat_ops, q);
+
+ /*
+ * If this fails, we don't get throttling
+ */
+ if (IS_ERR(rwb))
+ return;
+
+ if (blk_queue_nonrot(q))
+ rwb->min_lat_nsec = 2000000ULL;
+ else
+ rwb->min_lat_nsec = 75000000ULL;
+
+ wbt_set_queue_depth(rwb, blk_queue_depth(q));
+ wbt_set_write_cache(rwb, test_bit(QUEUE_FLAG_WC, &q->queue_flags));
+ q->rq_wb = rwb;
+}
+
int blk_register_queue(struct gendisk *disk)
{
int ret;
@@ -706,6 +855,8 @@ int blk_register_queue(struct gendisk *disk)
if (q->mq_ops)
blk_mq_register_disk(disk);
+ blk_wb_init(q);
+
if (!q->request_fn)
return 0;
diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c
index cc2f6dbd4303..ef61bda76317 100644
--- a/block/cfq-iosched.c
+++ b/block/cfq-iosched.c
@@ -3777,6 +3777,18 @@ static void check_blkcg_changed(struct cfq_io_cq *cic, struct bio *bio)
return;
/*
+ * If we have a non-root cgroup, we can depend on that to
+ * do proper throttling of writes. Turn off wbt for that
+ * case.
+ */
+ if (bio_blkcg(bio) != &blkcg_root) {
+ struct request_queue *q = cfqd->queue;
+
+ if (q->rq_wb)
+ wbt_disable(q->rq_wb);
+ }
+
+ /*
* Drop reference to queues. New queues will be assigned in new
* group upon arrival of fresh requests.
*/
diff --git a/drivers/scsi/scsi.c b/drivers/scsi/scsi.c
index 1deb6adc411f..75455d4dab68 100644
--- a/drivers/scsi/scsi.c
+++ b/drivers/scsi/scsi.c
@@ -621,6 +621,9 @@ int scsi_change_queue_depth(struct scsi_device *sdev, int depth)
wmb();
}
+ if (sdev->request_queue)
+ blk_set_queue_depth(sdev->request_queue, depth);
+
return sdev->queue_depth;
}
EXPORT_SYMBOL(scsi_change_queue_depth);
diff --git a/fs/buffer.c b/fs/buffer.c
index 9c8eb9b6db6a..6a5f1a01102e 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -1698,7 +1698,7 @@ int __block_write_full_page(struct inode *inode, struct page *page,
struct buffer_head *bh, *head;
unsigned int blocksize, bbits;
int nr_underway = 0;
- int write_flags = (wbc->sync_mode == WB_SYNC_ALL ? WRITE_SYNC : 0);
+ int write_flags = wbc_to_write_flags(wbc);
head = create_page_buffers(page, inode,
(1 << BH_Dirty)|(1 << BH_Uptodate));
diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
index ccb401eebc11..cb0528b31eb0 100644
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -1240,7 +1240,7 @@ static int f2fs_write_data_page(struct page *page,
.sbi = sbi,
.type = DATA,
.op = REQ_OP_WRITE,
- .op_flags = (wbc->sync_mode == WB_SYNC_ALL) ? WRITE_SYNC : 0,
+ .op_flags = wbc_to_write_flags(wbc),
.page = page,
.encrypted_page = NULL,
};
diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c
index f75d197d5beb..c1713da2542f 100644
--- a/fs/f2fs/node.c
+++ b/fs/f2fs/node.c
@@ -1561,7 +1561,7 @@ static int f2fs_write_node_page(struct page *page,
.sbi = sbi,
.type = NODE,
.op = REQ_OP_WRITE,
- .op_flags = (wbc->sync_mode == WB_SYNC_ALL) ? WRITE_SYNC : 0,
+ .op_flags = wbc_to_write_flags(wbc),
.page = page,
.encrypted_page = NULL,
};
diff --git a/fs/gfs2/meta_io.c b/fs/gfs2/meta_io.c
index 950b8be68e41..7991c62e9d6f 100644
--- a/fs/gfs2/meta_io.c
+++ b/fs/gfs2/meta_io.c
@@ -37,8 +37,7 @@ static int gfs2_aspace_writepage(struct page *page, struct writeback_control *wb
{
struct buffer_head *bh, *head;
int nr_underway = 0;
- int write_flags = REQ_META | REQ_PRIO |
- (wbc->sync_mode == WB_SYNC_ALL ? WRITE_SYNC : 0);
+ int write_flags = REQ_META | REQ_PRIO | wbc_to_write_flags(wbc);
BUG_ON(!PageLocked(page));
BUG_ON(!page_has_buffers(page));
diff --git a/fs/mpage.c b/fs/mpage.c
index d2413af0823a..d6f1afe3397a 100644
--- a/fs/mpage.c
+++ b/fs/mpage.c
@@ -489,7 +489,7 @@ static int __mpage_writepage(struct page *page, struct writeback_control *wbc,
struct buffer_head map_bh;
loff_t i_size = i_size_read(inode);
int ret = 0;
- int op_flags = (wbc->sync_mode == WB_SYNC_ALL ? WRITE_SYNC : 0);
+ int op_flags = wbc_to_write_flags(wbc);
if (page_has_buffers(page)) {
struct buffer_head *head = page_buffers(page);
diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c
index 7575cfc3ad15..a68645abde56 100644
--- a/fs/xfs/xfs_aops.c
+++ b/fs/xfs/xfs_aops.c
@@ -447,8 +447,8 @@ xfs_submit_ioend(
ioend->io_bio->bi_private = ioend;
ioend->io_bio->bi_end_io = xfs_end_bio;
- bio_set_op_attrs(ioend->io_bio, REQ_OP_WRITE,
- (wbc->sync_mode == WB_SYNC_ALL) ? WRITE_SYNC : 0);
+ bio_set_op_attrs(ioend->io_bio, REQ_OP_WRITE, wbc_to_write_flags(wbc));
+
/*
* If we are failing the IO now, just mark the ioend with an
* error and finish it. This will run IO completion immediately
@@ -519,8 +519,7 @@ xfs_chain_bio(
bio_chain(ioend->io_bio, new);
bio_get(ioend->io_bio); /* for xfs_destroy_ioend */
- bio_set_op_attrs(ioend->io_bio, REQ_OP_WRITE,
- (wbc->sync_mode == WB_SYNC_ALL) ? WRITE_SYNC : 0);
+ bio_set_op_attrs(ioend->io_bio, REQ_OP_WRITE, wbc_to_write_flags(wbc));
submit_bio(ioend->io_bio);
ioend->io_bio = new;
}
diff --git a/include/linux/backing-dev-defs.h b/include/linux/backing-dev-defs.h
index c357f27d5483..dc5f76d7f648 100644
--- a/include/linux/backing-dev-defs.h
+++ b/include/linux/backing-dev-defs.h
@@ -116,6 +116,8 @@ struct bdi_writeback {
struct list_head work_list;
struct delayed_work dwork; /* work item used for writeback */
+ unsigned long dirty_sleep; /* last wait */
+
struct list_head bdi_node; /* anchored at bdi->wb_list */
#ifdef CONFIG_CGROUP_WRITEBACK
diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h
index 436f43f87da9..95fbfa1fe010 100644
--- a/include/linux/blk_types.h
+++ b/include/linux/blk_types.h
@@ -155,6 +155,7 @@ enum rq_flag_bits {
__REQ_INTEGRITY, /* I/O includes block integrity payload */
__REQ_FUA, /* forced unit access */
__REQ_PREFLUSH, /* request for cache flush */
+ __REQ_BG, /* background activity */
/* bio only flags */
__REQ_RAHEAD, /* read ahead, can fail anytime */
@@ -198,7 +199,7 @@ enum rq_flag_bits {
(REQ_FAILFAST_DEV | REQ_FAILFAST_TRANSPORT | REQ_FAILFAST_DRIVER)
#define REQ_COMMON_MASK \
(REQ_FAILFAST_MASK | REQ_SYNC | REQ_META | REQ_PRIO | REQ_NOIDLE | \
- REQ_PREFLUSH | REQ_FUA | REQ_INTEGRITY | REQ_NOMERGE)
+ REQ_PREFLUSH | REQ_FUA | REQ_INTEGRITY | REQ_NOMERGE | REQ_BG)
#define REQ_CLONE_MASK REQ_COMMON_MASK
/* This mask is used for both bio and request merge checking */
@@ -223,6 +224,7 @@ enum rq_flag_bits {
#define REQ_COPY_USER (1ULL << __REQ_COPY_USER)
#define REQ_PREFLUSH (1ULL << __REQ_PREFLUSH)
#define REQ_FLUSH_SEQ (1ULL << __REQ_FLUSH_SEQ)
+#define REQ_BG (1ULL << __REQ_BG)
#define REQ_IO_STAT (1ULL << __REQ_IO_STAT)
#define REQ_MIXED_MERGE (1ULL << __REQ_MIXED_MERGE)
#define REQ_PM (1ULL << __REQ_PM)
@@ -264,4 +266,16 @@ static inline unsigned int blk_qc_t_to_tag(blk_qc_t cookie)
return cookie & ((1u << BLK_QC_T_SHIFT) - 1);
}
+#define BLK_RQ_STAT_BATCH 64
+
+struct blk_rq_stat {
+ s64 mean;
+ u64 min;
+ u64 max;
+ s32 nr_samples;
+ s32 nr_batch;
+ u64 batch;
+ s64 time;
+};
+
#endif /* __LINUX_BLK_TYPES_H */
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index e79055c8b577..45256d75c4b7 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -24,6 +24,7 @@
#include <linux/rcupdate.h>
#include <linux/percpu-refcount.h>
#include <linux/scatterlist.h>
+#include <linux/wbt.h>
struct module;
struct scsi_ioctl_command;
@@ -37,6 +38,7 @@ struct bsg_job;
struct blkcg_gq;
struct blk_flush_queue;
struct pr_ops;
+struct rq_wb;
#define BLKDEV_MIN_RQ 4
#define BLKDEV_MAX_RQ 128 /* Default maximum */
@@ -151,6 +153,7 @@ struct request {
struct gendisk *rq_disk;
struct hd_struct *part;
unsigned long start_time;
+ struct wb_issue_stat wb_stat;
#ifdef CONFIG_BLK_CGROUP
struct request_list *rl; /* rl this rq is alloced from */
unsigned long long start_time_ns;
@@ -302,6 +305,8 @@ struct request_queue {
int nr_rqs[2]; /* # allocated [a]sync rqs */
int nr_rqs_elvpriv; /* # allocated rqs w/ elvpriv */
+ struct rq_wb *rq_wb;
+
/*
* If blkcg is not used, @q->root_rl serves all requests. If blkcg
* is used, root blkg allocates from @q->root_rl and all other
@@ -327,6 +332,8 @@ struct request_queue {
struct blk_mq_ctx __percpu *queue_ctx;
unsigned int nr_queues;
+ unsigned int queue_depth;
+
/* hw dispatch queues */
struct blk_mq_hw_ctx **queue_hw_ctx;
unsigned int nr_hw_queues;
@@ -412,6 +419,9 @@ struct request_queue {
unsigned int nr_sorted;
unsigned int in_flight[2];
+
+ struct blk_rq_stat rq_stats[2];
+
/*
* Number of active block driver functions for which blk_drain_queue()
* must wait. Must be incremented around functions that unlock the
@@ -683,6 +693,14 @@ static inline bool blk_write_same_mergeable(struct bio *a, struct bio *b)
return false;
}
+static inline unsigned int blk_queue_depth(struct request_queue *q)
+{
+ if (q->queue_depth)
+ return q->queue_depth;
+
+ return q->nr_requests;
+}
+
/*
* q->prep_rq_fn return values
*/
@@ -999,6 +1017,7 @@ extern void blk_limits_io_min(struct queue_limits *limits, unsigned int min);
extern void blk_queue_io_min(struct request_queue *q, unsigned int min);
extern void blk_limits_io_opt(struct queue_limits *limits, unsigned int opt);
extern void blk_queue_io_opt(struct request_queue *q, unsigned int opt);
+extern void blk_set_queue_depth(struct request_queue *q, unsigned int depth);
extern void blk_set_default_limits(struct queue_limits *lim);
extern void blk_set_stacking_limits(struct queue_limits *lim);
extern int blk_stack_limits(struct queue_limits *t, struct queue_limits *b,
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 901e25d495cc..7c7951f1166a 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -189,6 +189,8 @@ typedef int (dio_iodone_t)(struct kiocb *iocb, loff_t offset,
* WRITE_FLUSH_FUA Combination of WRITE_FLUSH and FUA. The IO is preceded
* by a cache flush and data is guaranteed to be on
* non-volatile media on completion.
+ * WRITE_BG Background write. This is for background activity like
+ * the periodic flush and background threshold writeback
*
*/
#define RW_MASK REQ_OP_WRITE
@@ -202,6 +204,7 @@ typedef int (dio_iodone_t)(struct kiocb *iocb, loff_t offset,
#define WRITE_FLUSH (REQ_SYNC | REQ_NOIDLE | REQ_PREFLUSH)
#define WRITE_FUA (REQ_SYNC | REQ_NOIDLE | REQ_FUA)
#define WRITE_FLUSH_FUA (REQ_SYNC | REQ_NOIDLE | REQ_PREFLUSH | REQ_FUA)
+#define WRITE_BG (REQ_NOIDLE | REQ_BG)
/*
* Attribute flags. These should be or-ed together to figure out what
diff --git a/include/linux/writeback.h b/include/linux/writeback.h
index fc1e16c25a29..e53abf2bf2d8 100644
--- a/include/linux/writeback.h
+++ b/include/linux/writeback.h
@@ -100,6 +100,16 @@ struct writeback_control {
#endif
};
+static inline int wbc_to_write_flags(struct writeback_control *wbc)
+{
+ if (wbc->sync_mode == WB_SYNC_ALL)
+ return WRITE_SYNC;
+ else if (wbc->for_kupdate || wbc->for_background)
+ return WRITE_BG;
+
+ return 0;
+}
+
/*
* A wb_domain represents a domain that wb's (bdi_writeback's) belong to
* and are measured against each other in. There always is one global
diff --git a/lib/Kconfig b/lib/Kconfig
index d79909dc01ec..c585e4c40143 100644
--- a/lib/Kconfig
+++ b/lib/Kconfig
@@ -550,4 +550,7 @@ config STACKDEPOT
bool
select STACKTRACE
+config WBT
+ bool
+
endmenu
diff --git a/lib/Makefile b/lib/Makefile
index 5dc77a8ec297..23afd6329c33 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -177,6 +177,7 @@ obj-$(CONFIG_SG_SPLIT) += sg_split.o
obj-$(CONFIG_SG_POOL) += sg_pool.o
obj-$(CONFIG_STMP_DEVICE) += stmp_device.o
obj-$(CONFIG_IRQ_POLL) += irq_poll.o
+obj-$(CONFIG_WBT) += wbt.o
obj-$(CONFIG_STACKDEPOT) += stackdepot.o
KASAN_SANITIZE_stackdepot.o := n
diff --git a/mm/backing-dev.c b/mm/backing-dev.c
index 8fde443f36d7..3bfed5ab2475 100644
--- a/mm/backing-dev.c
+++ b/mm/backing-dev.c
@@ -310,6 +310,7 @@ static int wb_init(struct bdi_writeback *wb, struct backing_dev_info *bdi,
spin_lock_init(&wb->work_lock);
INIT_LIST_HEAD(&wb->work_list);
INIT_DELAYED_WORK(&wb->dwork, wb_workfn);
+ wb->dirty_sleep = jiffies;
wb->congested = wb_congested_get_create(bdi, blkcg_id, gfp);
if (!wb->congested)
diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index f4cd7d8005c9..98bc3fc301a1 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -1778,6 +1778,7 @@ pause:
pause,
start_time);
__set_current_state(TASK_KILLABLE);
+ wb->dirty_sleep = now;
io_schedule_timeout(pause);
current->dirty_paused_when = now + pause;