summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKent Overstreet <kmo@daterainc.com>2013-09-25 13:38:40 -0700
committerKent Overstreet <kmo@daterainc.com>2014-06-17 18:26:01 -0700
commitc013b1438060c77c103358041a775f3fe64b9daf (patch)
treea0d2734f6ff36bf57b4fbd6cc22c229cce31ff9c
parent72e9f7ff1fd38ada4eebc6aecb5e45ebd629940f (diff)
plugging
Signed-off-by: Kent Overstreet <kmo@daterainc.com>
-rw-r--r--block/bio.c39
-rw-r--r--block/blk-core.c256
-rw-r--r--drivers/md/raid10.c6
-rw-r--r--include/linux/blkdev.h5
-rw-r--r--include/linux/sched.h3
5 files changed, 57 insertions, 252 deletions
diff --git a/block/bio.c b/block/bio.c
index 103a75b92100..7ee2c3c1f5d0 100644
--- a/block/bio.c
+++ b/block/bio.c
@@ -341,6 +341,7 @@ static void bio_alloc_rescue(struct work_struct *work)
}
}
+#if 0
static void punt_bios_to_rescuer(struct bio_set *bs)
{
struct bio_list punt, nopunt;
@@ -371,6 +372,7 @@ static void punt_bios_to_rescuer(struct bio_set *bs)
queue_work(bs->rescue_workqueue, &bs->rescue_work);
}
+#endif
/**
* bio_alloc_bioset - allocate a bio for I/O
@@ -409,7 +411,6 @@ static void punt_bios_to_rescuer(struct bio_set *bs)
*/
struct bio *bio_alloc_bioset(gfp_t gfp_mask, int nr_iovecs, struct bio_set *bs)
{
- gfp_t saved_gfp = gfp_mask;
unsigned front_pad;
unsigned inline_vecs;
unsigned long idx = BIO_POOL_NONE;
@@ -427,37 +428,7 @@ struct bio *bio_alloc_bioset(gfp_t gfp_mask, int nr_iovecs, struct bio_set *bs)
front_pad = 0;
inline_vecs = nr_iovecs;
} else {
- /*
- * generic_make_request() converts recursion to iteration; this
- * means if we're running beneath it, any bios we allocate and
- * submit will not be submitted (and thus freed) until after we
- * return.
- *
- * This exposes us to a potential deadlock if we allocate
- * multiple bios from the same bio_set() while running
- * underneath generic_make_request(). If we were to allocate
- * multiple bios (say a stacking block driver that was splitting
- * bios), we would deadlock if we exhausted the mempool's
- * reserve.
- *
- * We solve this, and guarantee forward progress, with a rescuer
- * workqueue per bio_set. If we go to allocate and there are
- * bios on current->bio_list, we first try the allocation
- * without __GFP_WAIT; if that fails, we punt those bios we
- * would be blocking to the rescuer workqueue before we retry
- * with the original gfp_flags.
- */
-
- if (current->bio_list && !bio_list_empty(current->bio_list))
- gfp_mask &= ~__GFP_WAIT;
-
p = mempool_alloc(bs->bio_pool, gfp_mask);
- if (!p && gfp_mask != saved_gfp) {
- punt_bios_to_rescuer(bs);
- gfp_mask = saved_gfp;
- p = mempool_alloc(bs->bio_pool, gfp_mask);
- }
-
front_pad = bs->front_pad;
inline_vecs = BIO_INLINE_VECS;
}
@@ -470,12 +441,6 @@ struct bio *bio_alloc_bioset(gfp_t gfp_mask, int nr_iovecs, struct bio_set *bs)
if (nr_iovecs > inline_vecs) {
bvl = bvec_alloc(gfp_mask, nr_iovecs, &idx, bs->bvec_pool);
- if (!bvl && gfp_mask != saved_gfp) {
- punt_bios_to_rescuer(bs);
- gfp_mask = saved_gfp;
- bvl = bvec_alloc(gfp_mask, nr_iovecs, &idx, bs->bvec_pool);
- }
-
if (unlikely(!bvl))
goto err_free;
diff --git a/block/blk-core.c b/block/blk-core.c
index 6ed619cae4b3..1b47c9656849 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -1474,67 +1474,6 @@ bool bio_attempt_front_merge(struct request_queue *q, struct request *req,
return true;
}
-/**
- * blk_attempt_plug_merge - try to merge with %current's plugged list
- * @q: request_queue new bio is being queued at
- * @bio: new bio being queued
- * @request_count: out parameter for number of traversed plugged requests
- *
- * Determine whether @bio being queued on @q can be merged with a request
- * on %current's plugged list. Returns %true if merge was successful,
- * otherwise %false.
- *
- * Plugging coalesces IOs from the same issuer for the same purpose without
- * going through @q->queue_lock. As such it's more of an issuing mechanism
- * than scheduling, and the request, while may have elvpriv data, is not
- * added on the elevator at this point. In addition, we don't have
- * reliable access to the elevator outside queue lock. Only check basic
- * merging parameters without querying the elevator.
- *
- * Caller must ensure !blk_queue_nomerges(q) beforehand.
- */
-bool blk_attempt_plug_merge(struct request_queue *q, struct bio *bio,
- unsigned int *request_count)
-{
- struct blk_plug *plug;
- struct request *rq;
- bool ret = false;
- struct list_head *plug_list;
-
- plug = current->plug;
- if (!plug)
- goto out;
- *request_count = 0;
-
- if (q->mq_ops)
- plug_list = &plug->mq_list;
- else
- plug_list = &plug->list;
-
- list_for_each_entry_reverse(rq, plug_list, queuelist) {
- int el_ret;
-
- if (rq->q == q)
- (*request_count)++;
-
- if (rq->q != q || !blk_rq_merge_ok(rq, bio))
- continue;
-
- el_ret = blk_try_merge(rq, bio);
- if (el_ret == ELEVATOR_BACK_MERGE) {
- ret = bio_attempt_back_merge(q, rq, bio);
- if (ret)
- break;
- } else if (el_ret == ELEVATOR_FRONT_MERGE) {
- ret = bio_attempt_front_merge(q, rq, bio);
- if (ret)
- break;
- }
- }
-out:
- return ret;
-}
-
void init_request_from_bio(struct request *req, struct bio *bio)
{
req->cmd_type = REQ_TYPE_FS;
@@ -1552,10 +1491,10 @@ void init_request_from_bio(struct request *req, struct bio *bio)
void blk_queue_bio(struct request_queue *q, struct bio *bio)
{
const bool sync = !!(bio->bi_rw & REQ_SYNC);
- struct blk_plug *plug;
int el_ret, rw_flags, where = ELEVATOR_INSERT_SORT;
struct request *req;
- unsigned int request_count = 0;
+
+ spin_lock_irq(q->queue_lock);
blk_queue_split(q, &bio, q->bio_split);
@@ -1572,21 +1511,10 @@ void blk_queue_bio(struct request_queue *q, struct bio *bio)
}
if (bio->bi_rw & (REQ_FLUSH | REQ_FUA)) {
- spin_lock_irq(q->queue_lock);
where = ELEVATOR_INSERT_FLUSH;
goto get_rq;
}
- /*
- * Check if we can merge with the plugged list before grabbing
- * any locks.
- */
- if (!blk_queue_nomerges(q) &&
- blk_attempt_plug_merge(q, bio, &request_count))
- return;
-
- spin_lock_irq(q->queue_lock);
-
el_ret = elv_merge(q, &req, bio);
if (el_ret == ELEVATOR_BACK_MERGE) {
if (bio_attempt_back_merge(q, req, bio)) {
@@ -1635,29 +1563,11 @@ get_rq:
if (test_bit(QUEUE_FLAG_SAME_COMP, &q->queue_flags))
req->cpu = raw_smp_processor_id();
- plug = current->plug;
- if (plug) {
- /*
- * If this is the first request added after a plug, fire
- * of a plug trace.
- */
- if (!request_count)
- trace_block_plug(q);
- else {
- if (request_count >= BLK_MAX_REQUEST_COUNT) {
- blk_flush_plug_list(plug, false);
- trace_block_plug(q);
- }
- }
- list_add_tail(&req->queuelist, &plug->list);
- blk_account_io_start(req, true);
- } else {
- spin_lock_irq(q->queue_lock);
- add_acct_request(q, req, where);
- __blk_run_queue(q);
+ spin_lock_irq(q->queue_lock);
+ add_acct_request(q, req, where);
+ __blk_run_queue(q);
out_unlock:
- spin_unlock_irq(q->queue_lock);
- }
+ spin_unlock_irq(q->queue_lock);
}
EXPORT_SYMBOL_GPL(blk_queue_bio); /* for device mapper only */
@@ -1769,6 +1679,8 @@ generic_make_request_checks(struct bio *bio)
might_sleep();
+ BUG_ON(bio->bi_next);
+
if (bio_check_eod(bio, nr_sectors))
goto end_io;
@@ -1867,11 +1779,19 @@ end_io:
*/
void generic_make_request(struct bio *bio)
{
- struct bio_list bio_list_on_stack;
+ struct task_struct *tsk = current;
+ struct request_queue *q;
+ struct bio **p;
+ struct bio_list splits;
+ struct blk_plug plug;
+
+ bio_list_init(&splits);
if (!generic_make_request_checks(bio))
return;
+ q = bdev_get_queue(bio->bi_bdev);
+
/*
* We only want one ->make_request_fn to be active at a time, else
* stack usage with stacked devices could be a problem. So use
@@ -1882,36 +1802,27 @@ void generic_make_request(struct bio *bio)
* it is non-NULL, then a make_request is active, and new requests
* should be added at the tail
*/
- if (current->bio_list) {
- bio_list_add(current->bio_list, bio);
- return;
- }
+ blk_start_plug(&plug);
- /* following loop may be a bit non-obvious, and so deserves some
- * explanation.
- * Before entering the loop, bio->bi_next is NULL (as all callers
- * ensure that) so we have a list with a single bio.
- * We pretend that we have just taken it off a longer list, so
- * we assign bio_list to a pointer to the bio_list_on_stack,
- * thus initialising the bio_list of new bios to be
- * added. ->make_request() may indeed add some more bios
- * through a recursive call to generic_make_request. If it
- * did, we find a non-NULL value in bio_list and re-enter the loop
- * from the top. In this case we really did just take the bio
- * of the top of the list (no pretending) and so remove it from
- * bio_list, and call into ->make_request() again.
- */
- BUG_ON(bio->bi_next);
- bio_list_init(&bio_list_on_stack);
- current->bio_list = &bio_list_on_stack;
- do {
- struct request_queue *q = bdev_get_queue(bio->bi_bdev);
+ bio = splits.head;
+ p = &tsk->plug->list.head;
+
+ while (*p &&
+ ((bio->bi_bdev > (*p)->bi_bdev) ||
+ (bio->bi_bdev == (*p)->bi_bdev &&
+ bio->bi_iter.bi_sector > (*p)->bi_iter.bi_sector)))
+ p = &(*p)->bi_next;
- q->make_request_fn(q, bio);
+ splits.tail->bi_next = *p;
+ if (!splits.tail->bi_next)
+ tsk->plug->list.tail = splits.tail;
- bio = bio_list_pop(current->bio_list);
- } while (bio);
- current->bio_list = NULL; /* deactivate */
+ *p = splits.head;
+
+ if (tsk->plug != &plug)
+ return;
+
+ blk_finish_plug(&plug);
}
EXPORT_SYMBOL(generic_make_request);
@@ -2992,8 +2903,8 @@ void blk_start_plug(struct blk_plug *plug)
{
struct task_struct *tsk = current;
- INIT_LIST_HEAD(&plug->list);
INIT_LIST_HEAD(&plug->mq_list);
+ bio_list_init(&plug->list);
INIT_LIST_HEAD(&plug->cb_list);
/*
@@ -3010,34 +2921,6 @@ void blk_start_plug(struct blk_plug *plug)
}
EXPORT_SYMBOL(blk_start_plug);
-static int plug_rq_cmp(void *priv, struct list_head *a, struct list_head *b)
-{
- struct request *rqa = container_of(a, struct request, queuelist);
- struct request *rqb = container_of(b, struct request, queuelist);
-
- return !(rqa->q < rqb->q ||
- (rqa->q == rqb->q && blk_rq_pos(rqa) < blk_rq_pos(rqb)));
-}
-
-/*
- * If 'from_schedule' is true, then postpone the dispatch of requests
- * until a safe kblockd context. We due this to avoid accidental big
- * additional stack usage in driver dispatch, in places where the originally
- * plugger did not intend it.
- */
-static void queue_unplugged(struct request_queue *q, unsigned int depth,
- bool from_schedule)
- __releases(q->queue_lock)
-{
- trace_block_unplug(q, depth, !from_schedule);
-
- if (from_schedule)
- blk_run_queue_async(q);
- else
- __blk_run_queue(q);
- spin_unlock(q->queue_lock);
-}
-
static void flush_plug_callbacks(struct blk_plug *plug, bool from_schedule)
{
LIST_HEAD(callbacks);
@@ -3083,72 +2966,35 @@ EXPORT_SYMBOL(blk_check_plugged);
void blk_flush_plug_list(struct blk_plug *plug, bool from_schedule)
{
struct request_queue *q;
- unsigned long flags;
- struct request *rq;
- LIST_HEAD(list);
- unsigned int depth;
+ struct bio *bio, *end;
flush_plug_callbacks(plug, from_schedule);
if (!list_empty(&plug->mq_list))
blk_mq_flush_plug_list(plug, from_schedule);
- if (list_empty(&plug->list))
- return;
- list_splice_init(&plug->list, &list);
+ while (plug->list.head) {
+ end = bio = plug->list.head;
+ while (end->bi_next &&
+ end->bi_next->bi_bdev == bio->bi_bdev)
+ end = end->bi_next;
- list_sort(NULL, &list, plug_rq_cmp);
+ plug->list.head = end->bi_next;
+ end->bi_next = NULL;
- q = NULL;
- depth = 0;
+ q = bdev_get_queue(bio->bi_bdev);
- /*
- * Save and disable interrupts here, to avoid doing it for every
- * queue lock we have to take.
- */
- local_irq_save(flags);
- while (!list_empty(&list)) {
- rq = list_entry_rq(list.next);
- list_del_init(&rq->queuelist);
- BUG_ON(!rq->q);
- if (rq->q != q) {
- /*
- * This drops the queue lock
- */
- if (q)
- queue_unplugged(q, depth, from_schedule);
- q = rq->q;
- depth = 0;
- spin_lock(q->queue_lock);
- }
+ while (bio) {
+ struct bio *p = bio;
- /*
- * Short-circuit if @q is dead
- */
- if (unlikely(blk_queue_dying(q))) {
- __blk_end_request_all(rq, -ENODEV);
- continue;
+ bio = bio->bi_next;
+ p->bi_next = NULL;
+ q->make_request_fn(q, p);
}
-
- /*
- * rq is already accounted, so use raw insert
- */
- if (rq->cmd_flags & (REQ_FLUSH | REQ_FUA))
- __elv_add_request(q, rq, ELEVATOR_INSERT_FLUSH);
- else
- __elv_add_request(q, rq, ELEVATOR_INSERT_SORT_MERGE);
-
- depth++;
}
- /*
- * This drops the queue lock
- */
- if (q)
- queue_unplugged(q, depth, from_schedule);
-
- local_irq_restore(flags);
+ bio_list_init(&plug->list);
}
void blk_finish_plug(struct blk_plug *plug)
diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
index f6a63fb74478..c7e32d7cf0ee 100644
--- a/drivers/md/raid10.c
+++ b/drivers/md/raid10.c
@@ -954,9 +954,7 @@ static void wait_barrier(struct r10conf *conf)
*/
wait_event_lock_irq(conf->wait_barrier,
!conf->barrier ||
- (conf->nr_pending &&
- current->bio_list &&
- !bio_list_empty(current->bio_list)),
+ conf->nr_pending,
conf->resync_lock);
conf->nr_waiting--;
}
@@ -1032,7 +1030,7 @@ static void raid10_unplug(struct blk_plug_cb *cb, bool from_schedule)
struct r10conf *conf = mddev->private;
struct bio *bio;
- if (from_schedule || current->bio_list) {
+ if (from_schedule) {
spin_lock_irq(&conf->device_lock);
bio_list_merge(&conf->pending_bio_list, &plug->pending);
conf->pending_count += plug->pending_cnt;
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index fd348a70f304..93976824be8f 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -1085,11 +1085,10 @@ static inline void blk_post_runtime_resume(struct request_queue *q, int err) {}
* schedule() where blk_schedule_flush_plug() is called.
*/
struct blk_plug {
- struct list_head list; /* requests */
+ struct bio_list list;
struct list_head mq_list; /* blk-mq requests */
struct list_head cb_list; /* md requires an unplug callback */
};
-#define BLK_MAX_REQUEST_COUNT 16
struct blk_plug_cb;
typedef void (*blk_plug_cb_fn)(struct blk_plug_cb *, bool);
@@ -1125,7 +1124,7 @@ static inline bool blk_needs_flush_plug(struct task_struct *tsk)
struct blk_plug *plug = tsk->plug;
return plug &&
- (!list_empty(&plug->list) ||
+ (!bio_list_empty(&plug->list) ||
!list_empty(&plug->mq_list) ||
!list_empty(&plug->cb_list));
}
diff --git a/include/linux/sched.h b/include/linux/sched.h
index ea74596014a2..d33e4cde4d8c 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1475,9 +1475,6 @@ struct task_struct {
/* journalling filesystem info */
void *journal_info;
-/* stacked block device info */
- struct bio_list *bio_list;
-
#ifdef CONFIG_BLOCK
/* stack plugging */
struct blk_plug *plug;