summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--block/Makefile2
-rw-r--r--block/blk-core.c156
-rw-r--r--block/blk-iopoll.c227
-rw-r--r--block/blk-merge.c49
-rw-r--r--block/blk.h1
-rw-r--r--block/cfq-iosched.c71
-rw-r--r--block/elevator.c16
-rw-r--r--block/genhd.c18
-rw-r--r--drivers/block/cciss.c4
-rw-r--r--drivers/block/loop.c2
-rw-r--r--drivers/block/osdblk.c2
-rw-r--r--drivers/block/paride/pcd.c12
-rw-r--r--drivers/block/sx8.c4
-rw-r--r--drivers/block/viodasd.c12
-rw-r--r--drivers/md/dm-raid1.c2
-rw-r--r--drivers/md/dm-stripe.c2
-rw-r--r--drivers/md/dm.c12
-rw-r--r--drivers/md/linear.c2
-rw-r--r--drivers/md/multipath.c4
-rw-r--r--drivers/md/raid0.c2
-rw-r--r--drivers/md/raid1.c14
-rw-r--r--drivers/md/raid10.c6
-rw-r--r--drivers/md/raid5.c2
-rw-r--r--drivers/scsi/scsi_lib.c6
-rw-r--r--drivers/staging/dst/dcore.c5
-rw-r--r--fs/btrfs/volumes.c4
-rw-r--r--fs/splice.c8
-rw-r--r--include/linux/bio.h69
-rw-r--r--include/linux/blk-iopoll.h48
-rw-r--r--include/linux/blkdev.h32
-rw-r--r--include/linux/interrupt.h1
-rw-r--r--kernel/sysctl.c10
32 files changed, 617 insertions, 188 deletions
diff --git a/block/Makefile b/block/Makefile
index 6c54ed0ff755..ba74ca6bfa14 100644
--- a/block/Makefile
+++ b/block/Makefile
@@ -5,7 +5,7 @@
obj-$(CONFIG_BLOCK) := elevator.o blk-core.o blk-tag.o blk-sysfs.o \
blk-barrier.o blk-settings.o blk-ioc.o blk-map.o \
blk-exec.o blk-merge.o blk-softirq.o blk-timeout.o \
- ioctl.o genhd.o scsi_ioctl.o
+ blk-iopoll.o ioctl.o genhd.o scsi_ioctl.o
obj-$(CONFIG_BLK_DEV_BSG) += bsg.o
obj-$(CONFIG_IOSCHED_NOOP) += noop-iosched.o
diff --git a/block/blk-core.c b/block/blk-core.c
index e3299a77a0d8..93051d151635 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -1111,31 +1111,27 @@ void init_request_from_bio(struct request *req, struct bio *bio)
req->cmd_type = REQ_TYPE_FS;
/*
- * inherit FAILFAST from bio (for read-ahead, and explicit FAILFAST)
+ * Inherit FAILFAST from bio (for read-ahead, and explicit
+ * FAILFAST). FAILFAST flags are identical for req and bio.
*/
- if (bio_rw_ahead(bio))
- req->cmd_flags |= (REQ_FAILFAST_DEV | REQ_FAILFAST_TRANSPORT |
- REQ_FAILFAST_DRIVER);
- if (bio_failfast_dev(bio))
- req->cmd_flags |= REQ_FAILFAST_DEV;
- if (bio_failfast_transport(bio))
- req->cmd_flags |= REQ_FAILFAST_TRANSPORT;
- if (bio_failfast_driver(bio))
- req->cmd_flags |= REQ_FAILFAST_DRIVER;
-
- if (unlikely(bio_discard(bio))) {
+ if (bio_rw_flagged(bio, BIO_RW_AHEAD))
+ req->cmd_flags |= REQ_FAILFAST_MASK;
+ else
+ req->cmd_flags |= bio->bi_rw & REQ_FAILFAST_MASK;
+
+ if (unlikely(bio_rw_flagged(bio, BIO_RW_DISCARD))) {
req->cmd_flags |= REQ_DISCARD;
- if (bio_barrier(bio))
+ if (bio_rw_flagged(bio, BIO_RW_BARRIER))
req->cmd_flags |= REQ_SOFTBARRIER;
req->q->prepare_discard_fn(req->q, req);
- } else if (unlikely(bio_barrier(bio)))
+ } else if (unlikely(bio_rw_flagged(bio, BIO_RW_BARRIER)))
req->cmd_flags |= REQ_HARDBARRIER;
- if (bio_sync(bio))
+ if (bio_rw_flagged(bio, BIO_RW_SYNCIO))
req->cmd_flags |= REQ_RW_SYNC;
- if (bio_rw_meta(bio))
+ if (bio_rw_flagged(bio, BIO_RW_META))
req->cmd_flags |= REQ_RW_META;
- if (bio_noidle(bio))
+ if (bio_rw_flagged(bio, BIO_RW_NOIDLE))
req->cmd_flags |= REQ_NOIDLE;
req->errors = 0;
@@ -1150,7 +1146,7 @@ void init_request_from_bio(struct request *req, struct bio *bio)
*/
static inline bool queue_should_plug(struct request_queue *q)
{
- return !(blk_queue_nonrot(q) && blk_queue_tagged(q));
+ return !(blk_queue_nonrot(q) && blk_queue_queuing(q));
}
static int __make_request(struct request_queue *q, struct bio *bio)
@@ -1159,11 +1155,12 @@ static int __make_request(struct request_queue *q, struct bio *bio)
int el_ret;
unsigned int bytes = bio->bi_size;
const unsigned short prio = bio_prio(bio);
- const int sync = bio_sync(bio);
- const int unplug = bio_unplug(bio);
+ const bool sync = bio_rw_flagged(bio, BIO_RW_SYNCIO);
+ const bool unplug = bio_rw_flagged(bio, BIO_RW_UNPLUG);
+ const unsigned int ff = bio->bi_rw & REQ_FAILFAST_MASK;
int rw_flags;
- if (bio_barrier(bio) && bio_has_data(bio) &&
+ if (bio_rw_flagged(bio, BIO_RW_BARRIER) && bio_has_data(bio) &&
(q->next_ordered == QUEUE_ORDERED_NONE)) {
bio_endio(bio, -EOPNOTSUPP);
return 0;
@@ -1177,7 +1174,7 @@ static int __make_request(struct request_queue *q, struct bio *bio)
spin_lock_irq(q->queue_lock);
- if (unlikely(bio_barrier(bio)) || elv_queue_empty(q))
+ if (unlikely(bio_rw_flagged(bio, BIO_RW_BARRIER)) || elv_queue_empty(q))
goto get_rq;
el_ret = elv_merge(q, &req, bio);
@@ -1190,6 +1187,9 @@ static int __make_request(struct request_queue *q, struct bio *bio)
trace_block_bio_backmerge(q, bio);
+ if ((req->cmd_flags & REQ_FAILFAST_MASK) != ff)
+ blk_rq_set_mixed_merge(req);
+
req->biotail->bi_next = bio;
req->biotail = bio;
req->__data_len += bytes;
@@ -1209,6 +1209,12 @@ static int __make_request(struct request_queue *q, struct bio *bio)
trace_block_bio_frontmerge(q, bio);
+ if ((req->cmd_flags & REQ_FAILFAST_MASK) != ff) {
+ blk_rq_set_mixed_merge(req);
+ req->cmd_flags &= ~REQ_FAILFAST_MASK;
+ req->cmd_flags |= ff;
+ }
+
bio->bi_next = req->bio;
req->bio = bio;
@@ -1464,7 +1470,8 @@ static inline void __generic_make_request(struct bio *bio)
if (bio_check_eod(bio, nr_sectors))
goto end_io;
- if (bio_discard(bio) && !q->prepare_discard_fn) {
+ if (bio_rw_flagged(bio, BIO_RW_DISCARD) &&
+ !q->prepare_discard_fn) {
err = -EOPNOTSUPP;
goto end_io;
}
@@ -1653,6 +1660,50 @@ int blk_insert_cloned_request(struct request_queue *q, struct request *rq)
}
EXPORT_SYMBOL_GPL(blk_insert_cloned_request);
+/**
+ * blk_rq_err_bytes - determine number of bytes till the next failure boundary
+ * @rq: request to examine
+ *
+ * Description:
+ * A request could be merge of IOs which require different failure
+ * handling. This function determines the number of bytes which
+ * can be failed from the beginning of the request without
+ * crossing into area which need to be retried further.
+ *
+ * Return:
+ * The number of bytes to fail.
+ *
+ * Context:
+ * queue_lock must be held.
+ */
+unsigned int blk_rq_err_bytes(const struct request *rq)
+{
+ unsigned int ff = rq->cmd_flags & REQ_FAILFAST_MASK;
+ unsigned int bytes = 0;
+ struct bio *bio;
+
+ if (!(rq->cmd_flags & REQ_MIXED_MERGE))
+ return blk_rq_bytes(rq);
+
+ /*
+ * Currently the only 'mixing' which can happen is between
+ * different fastfail types. We can safely fail portions
+ * which have all the failfast bits that the first one has -
+ * the ones which are at least as eager to fail as the first
+ * one.
+ */
+ for (bio = rq->bio; bio; bio = bio->bi_next) {
+ if ((bio->bi_rw & ff) != ff)
+ break;
+ bytes += bio->bi_size;
+ }
+
+ /* this could lead to infinite loop */
+ BUG_ON(blk_rq_bytes(rq) && !bytes);
+ return bytes;
+}
+EXPORT_SYMBOL_GPL(blk_rq_err_bytes);
+
static void blk_account_io_completion(struct request *req, unsigned int bytes)
{
if (blk_do_io_stat(req)) {
@@ -1806,8 +1857,15 @@ void blk_dequeue_request(struct request *rq)
* and to it is freed is accounted as io that is in progress at
* the driver side.
*/
- if (blk_account_rq(rq))
+ if (blk_account_rq(rq)) {
q->in_flight[rq_is_sync(rq)]++;
+ /*
+ * Mark this device as supporting hardware queuing, if
+ * we have more IOs in flight than 4.
+ */
+ if (!blk_queue_queuing(q) && queue_in_flight(q) > 4)
+ set_bit(QUEUE_FLAG_CQ, &q->queue_flags);
+ }
}
/**
@@ -1999,6 +2057,12 @@ bool blk_update_request(struct request *req, int error, unsigned int nr_bytes)
if (blk_fs_request(req) || blk_discard_rq(req))
req->__sector += total_bytes >> 9;
+ /* mixed attributes always follow the first bio */
+ if (req->cmd_flags & REQ_MIXED_MERGE) {
+ req->cmd_flags &= ~REQ_FAILFAST_MASK;
+ req->cmd_flags |= req->bio->bi_rw & REQ_FAILFAST_MASK;
+ }
+
/*
* If total number of sectors is less than the first segment
* size, something has gone terribly wrong.
@@ -2178,6 +2242,25 @@ bool blk_end_request_cur(struct request *rq, int error)
EXPORT_SYMBOL(blk_end_request_cur);
/**
+ * blk_end_request_err - Finish a request till the next failure boundary.
+ * @rq: the request to finish till the next failure boundary for
+ * @error: must be negative errno
+ *
+ * Description:
+ * Complete @rq till the next failure boundary.
+ *
+ * Return:
+ * %false - we are done with this request
+ * %true - still buffers pending for this request
+ */
+bool blk_end_request_err(struct request *rq, int error)
+{
+ WARN_ON(error >= 0);
+ return blk_end_request(rq, error, blk_rq_err_bytes(rq));
+}
+EXPORT_SYMBOL_GPL(blk_end_request_err);
+
+/**
* __blk_end_request - Helper function for drivers to complete the request.
* @rq: the request being processed
* @error: %0 for success, < %0 for error
@@ -2236,12 +2319,31 @@ bool __blk_end_request_cur(struct request *rq, int error)
}
EXPORT_SYMBOL(__blk_end_request_cur);
+/**
+ * __blk_end_request_err - Finish a request till the next failure boundary.
+ * @rq: the request to finish till the next failure boundary for
+ * @error: must be negative errno
+ *
+ * Description:
+ * Complete @rq till the next failure boundary. Must be called
+ * with queue lock held.
+ *
+ * Return:
+ * %false - we are done with this request
+ * %true - still buffers pending for this request
+ */
+bool __blk_end_request_err(struct request *rq, int error)
+{
+ WARN_ON(error >= 0);
+ return __blk_end_request(rq, error, blk_rq_err_bytes(rq));
+}
+EXPORT_SYMBOL_GPL(__blk_end_request_err);
+
void blk_rq_bio_prep(struct request_queue *q, struct request *rq,
struct bio *bio)
{
- /* Bit 0 (R/W) is identical in rq->cmd_flags and bio->bi_rw, and
- we want BIO_RW_AHEAD (bit 1) to imply REQ_FAILFAST (bit 1). */
- rq->cmd_flags |= (bio->bi_rw & 3);
+ /* Bit 0 (R/W) is identical in rq->cmd_flags and bio->bi_rw */
+ rq->cmd_flags |= bio->bi_rw & REQ_RW;
if (bio_has_data(bio)) {
rq->nr_phys_segments = bio_phys_segments(q, bio);
diff --git a/block/blk-iopoll.c b/block/blk-iopoll.c
new file mode 100644
index 000000000000..ca564202ed7a
--- /dev/null
+++ b/block/blk-iopoll.c
@@ -0,0 +1,227 @@
+/*
+ * Functions related to interrupt-poll handling in the block layer. This
+ * is similar to NAPI for network devices.
+ */
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/bio.h>
+#include <linux/blkdev.h>
+#include <linux/interrupt.h>
+#include <linux/cpu.h>
+#include <linux/blk-iopoll.h>
+#include <linux/delay.h>
+
+#include "blk.h"
+
+int blk_iopoll_enabled = 1;
+EXPORT_SYMBOL(blk_iopoll_enabled);
+
+static unsigned int blk_iopoll_budget __read_mostly = 256;
+
+static DEFINE_PER_CPU(struct list_head, blk_cpu_iopoll);
+
+/**
+ * blk_iopoll_sched - Schedule a run of the iopoll handler
+ * @iop: The parent iopoll structure
+ *
+ * Description:
+ * Add this blk_iopoll structure to the pending poll list and trigger the
+ * raise of the blk iopoll softirq. The driver must already have gotten a
+ * succesful return from blk_iopoll_sched_prep() before calling this.
+ **/
+void blk_iopoll_sched(struct blk_iopoll *iop)
+{
+ unsigned long flags;
+
+ local_irq_save(flags);
+ list_add_tail(&iop->list, &__get_cpu_var(blk_cpu_iopoll));
+ __raise_softirq_irqoff(BLOCK_IOPOLL_SOFTIRQ);
+ local_irq_restore(flags);
+}
+EXPORT_SYMBOL(blk_iopoll_sched);
+
+/**
+ * __blk_iopoll_complete - Mark this @iop as un-polled again
+ * @iop: The parent iopoll structure
+ *
+ * Description:
+ * See blk_iopoll_complete(). This function must be called with interrupts
+ * disabled.
+ **/
+void __blk_iopoll_complete(struct blk_iopoll *iop)
+{
+ list_del(&iop->list);
+ smp_mb__before_clear_bit();
+ clear_bit_unlock(IOPOLL_F_SCHED, &iop->state);
+}
+EXPORT_SYMBOL(__blk_iopoll_complete);
+
+/**
+ * blk_iopoll_complete - Mark this @iop as un-polled again
+ * @iop: The parent iopoll structure
+ *
+ * Description:
+ * If a driver consumes less than the assigned budget in its run of the
+ * iopoll handler, it'll end the polled mode by calling this function. The
+ * iopoll handler will not be invoked again before blk_iopoll_sched_prep()
+ * is called.
+ **/
+void blk_iopoll_complete(struct blk_iopoll *iopoll)
+{
+ unsigned long flags;
+
+ local_irq_save(flags);
+ __blk_iopoll_complete(iopoll);
+ local_irq_restore(flags);
+}
+EXPORT_SYMBOL(blk_iopoll_complete);
+
+static void blk_iopoll_softirq(struct softirq_action *h)
+{
+ struct list_head *list = &__get_cpu_var(blk_cpu_iopoll);
+ int rearm = 0, budget = blk_iopoll_budget;
+ unsigned long start_time = jiffies;
+
+ local_irq_disable();
+
+ while (!list_empty(list)) {
+ struct blk_iopoll *iop;
+ int work, weight;
+
+ /*
+ * If softirq window is exhausted then punt.
+ */
+ if (budget <= 0 || time_after(jiffies, start_time)) {
+ rearm = 1;
+ break;
+ }
+
+ local_irq_enable();
+
+ /* Even though interrupts have been re-enabled, this
+ * access is safe because interrupts can only add new
+ * entries to the tail of this list, and only ->poll()
+ * calls can remove this head entry from the list.
+ */
+ iop = list_entry(list->next, struct blk_iopoll, list);
+
+ weight = iop->weight;
+ work = 0;
+ if (test_bit(IOPOLL_F_SCHED, &iop->state))
+ work = iop->poll(iop, weight);
+
+ budget -= work;
+
+ local_irq_disable();
+
+ /*
+ * Drivers must not modify the iopoll state, if they
+ * consume their assigned weight (or more, some drivers can't
+ * easily just stop processing, they have to complete an
+ * entire mask of commands).In such cases this code
+ * still "owns" the iopoll instance and therefore can
+ * move the instance around on the list at-will.
+ */
+ if (work >= weight) {
+ if (blk_iopoll_disable_pending(iop))
+ __blk_iopoll_complete(iop);
+ else
+ list_move_tail(&iop->list, list);
+ }
+ }
+
+ if (rearm)
+ __raise_softirq_irqoff(BLOCK_IOPOLL_SOFTIRQ);
+
+ local_irq_enable();
+}
+
+/**
+ * blk_iopoll_disable - Disable iopoll on this @iop
+ * @iop: The parent iopoll structure
+ *
+ * Description:
+ * Disable io polling and wait for any pending callbacks to have completed.
+ **/
+void blk_iopoll_disable(struct blk_iopoll *iop)
+{
+ set_bit(IOPOLL_F_DISABLE, &iop->state);
+ while (test_and_set_bit(IOPOLL_F_SCHED, &iop->state))
+ msleep(1);
+ clear_bit(IOPOLL_F_DISABLE, &iop->state);
+}
+EXPORT_SYMBOL(blk_iopoll_disable);
+
+/**
+ * blk_iopoll_enable - Enable iopoll on this @iop
+ * @iop: The parent iopoll structure
+ *
+ * Description:
+ * Enable iopoll on this @iop. Note that the handler run will not be
+ * scheduled, it will only mark it as active.
+ **/
+void blk_iopoll_enable(struct blk_iopoll *iop)
+{
+ BUG_ON(!test_bit(IOPOLL_F_SCHED, &iop->state));
+ smp_mb__before_clear_bit();
+ clear_bit_unlock(IOPOLL_F_SCHED, &iop->state);
+}
+EXPORT_SYMBOL(blk_iopoll_enable);
+
+/**
+ * blk_iopoll_init - Initialize this @iop
+ * @iop: The parent iopoll structure
+ * @weight: The default weight (or command completion budget)
+ * @poll_fn: The handler to invoke
+ *
+ * Description:
+ * Initialize this blk_iopoll structure. Before being actively used, the
+ * driver must call blk_iopoll_enable().
+ **/
+void blk_iopoll_init(struct blk_iopoll *iop, int weight, blk_iopoll_fn *poll_fn)
+{
+ memset(iop, 0, sizeof(*iop));
+ INIT_LIST_HEAD(&iop->list);
+ iop->weight = weight;
+ iop->poll = poll_fn;
+ set_bit(IOPOLL_F_SCHED, &iop->state);
+}
+EXPORT_SYMBOL(blk_iopoll_init);
+
+static int __cpuinit blk_iopoll_cpu_notify(struct notifier_block *self,
+ unsigned long action, void *hcpu)
+{
+ /*
+ * If a CPU goes away, splice its entries to the current CPU
+ * and trigger a run of the softirq
+ */
+ if (action == CPU_DEAD || action == CPU_DEAD_FROZEN) {
+ int cpu = (unsigned long) hcpu;
+
+ local_irq_disable();
+ list_splice_init(&per_cpu(blk_cpu_iopoll, cpu),
+ &__get_cpu_var(blk_cpu_iopoll));
+ __raise_softirq_irqoff(BLOCK_IOPOLL_SOFTIRQ);
+ local_irq_enable();
+ }
+
+ return NOTIFY_OK;
+}
+
+static struct notifier_block __cpuinitdata blk_iopoll_cpu_notifier = {
+ .notifier_call = blk_iopoll_cpu_notify,
+};
+
+static __init int blk_iopoll_setup(void)
+{
+ int i;
+
+ for_each_possible_cpu(i)
+ INIT_LIST_HEAD(&per_cpu(blk_cpu_iopoll, i));
+
+ open_softirq(BLOCK_IOPOLL_SOFTIRQ, blk_iopoll_softirq);
+ register_hotcpu_notifier(&blk_iopoll_cpu_notifier);
+ return 0;
+}
+subsys_initcall(blk_iopoll_setup);
diff --git a/block/blk-merge.c b/block/blk-merge.c
index e1999679a4d5..b0de8574fdc8 100644
--- a/block/blk-merge.c
+++ b/block/blk-merge.c
@@ -311,6 +311,36 @@ static int ll_merge_requests_fn(struct request_queue *q, struct request *req,
return 1;
}
+/**
+ * blk_rq_set_mixed_merge - mark a request as mixed merge
+ * @rq: request to mark as mixed merge
+ *
+ * Description:
+ * @rq is about to be mixed merged. Make sure the attributes
+ * which can be mixed are set in each bio and mark @rq as mixed
+ * merged.
+ */
+void blk_rq_set_mixed_merge(struct request *rq)
+{
+ unsigned int ff = rq->cmd_flags & REQ_FAILFAST_MASK;
+ struct bio *bio;
+
+ if (rq->cmd_flags & REQ_MIXED_MERGE)
+ return;
+
+ /*
+ * @rq will no longer represent mixable attributes for all the
+ * contained bios. It will just track those of the first one.
+ * Distributes the attributs to each bio.
+ */
+ for (bio = rq->bio; bio; bio = bio->bi_next) {
+ WARN_ON_ONCE((bio->bi_rw & REQ_FAILFAST_MASK) &&
+ (bio->bi_rw & REQ_FAILFAST_MASK) != ff);
+ bio->bi_rw |= ff;
+ }
+ rq->cmd_flags |= REQ_MIXED_MERGE;
+}
+
static void blk_account_io_merge(struct request *req)
{
if (blk_do_io_stat(req)) {
@@ -350,12 +380,6 @@ static int attempt_merge(struct request_queue *q, struct request *req,
if (blk_integrity_rq(req) != blk_integrity_rq(next))
return 0;
- /* don't merge requests of different failfast settings */
- if (blk_failfast_dev(req) != blk_failfast_dev(next) ||
- blk_failfast_transport(req) != blk_failfast_transport(next) ||
- blk_failfast_driver(req) != blk_failfast_driver(next))
- return 0;
-
/*
* If we are allowed to merge, then append bio list
* from next to rq and release next. merge_requests_fn
@@ -366,6 +390,19 @@ static int attempt_merge(struct request_queue *q, struct request *req,
return 0;
/*
+ * If failfast settings disagree or any of the two is already
+ * a mixed merge, mark both as mixed before proceeding. This
+ * makes sure that all involved bios have mixable attributes
+ * set properly.
+ */
+ if ((req->cmd_flags | next->cmd_flags) & REQ_MIXED_MERGE ||
+ (req->cmd_flags & REQ_FAILFAST_MASK) !=
+ (next->cmd_flags & REQ_FAILFAST_MASK)) {
+ blk_rq_set_mixed_merge(req);
+ blk_rq_set_mixed_merge(next);
+ }
+
+ /*
* At this point we have either done a back merge
* or front merge. We need the smaller start_time of
* the merged requests to be the current request
diff --git a/block/blk.h b/block/blk.h
index 3fae6add5430..5ee3d7e72feb 100644
--- a/block/blk.h
+++ b/block/blk.h
@@ -104,6 +104,7 @@ int ll_front_merge_fn(struct request_queue *q, struct request *req,
int attempt_back_merge(struct request_queue *q, struct request *rq);
int attempt_front_merge(struct request_queue *q, struct request *rq);
void blk_recalc_rq_segments(struct request *rq);
+void blk_rq_set_mixed_merge(struct request *rq);
void blk_queue_congestion_threshold(struct request_queue *q);
diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c
index fd7080ed7935..a34686f091db 100644
--- a/block/cfq-iosched.c
+++ b/block/cfq-iosched.c
@@ -134,13 +134,8 @@ struct cfq_data {
struct rb_root prio_trees[CFQ_PRIO_LISTS];
unsigned int busy_queues;
- /*
- * Used to track any pending rt requests so we can pre-empt current
- * non-RT cfqq in service when this value is non-zero.
- */
- unsigned int busy_rt_queues;
- int rq_in_driver;
+ int rq_in_driver[2];
int sync_flight;
/*
@@ -191,7 +186,6 @@ enum cfqq_state_flags {
CFQ_CFQQ_FLAG_on_rr = 0, /* on round-robin busy list */
CFQ_CFQQ_FLAG_wait_request, /* waiting for a request */
CFQ_CFQQ_FLAG_must_dispatch, /* must be allowed a dispatch */
- CFQ_CFQQ_FLAG_must_alloc, /* must be allowed rq alloc */
CFQ_CFQQ_FLAG_must_alloc_slice, /* per-slice must_alloc flag */
CFQ_CFQQ_FLAG_fifo_expire, /* FIFO checked in this slice */
CFQ_CFQQ_FLAG_idle_window, /* slice idling enabled */
@@ -218,7 +212,6 @@ static inline int cfq_cfqq_##name(const struct cfq_queue *cfqq) \
CFQ_CFQQ_FNS(on_rr);
CFQ_CFQQ_FNS(wait_request);
CFQ_CFQQ_FNS(must_dispatch);
-CFQ_CFQQ_FNS(must_alloc);
CFQ_CFQQ_FNS(must_alloc_slice);
CFQ_CFQQ_FNS(fifo_expire);
CFQ_CFQQ_FNS(idle_window);
@@ -239,6 +232,11 @@ static struct cfq_queue *cfq_get_queue(struct cfq_data *, int,
static struct cfq_io_context *cfq_cic_lookup(struct cfq_data *,
struct io_context *);
+static inline int rq_in_driver(struct cfq_data *cfqd)
+{
+ return cfqd->rq_in_driver[0] + cfqd->rq_in_driver[1];
+}
+
static inline struct cfq_queue *cic_to_cfqq(struct cfq_io_context *cic,
int is_sync)
{
@@ -257,7 +255,7 @@ static inline void cic_set_cfqq(struct cfq_io_context *cic,
*/
static inline int cfq_bio_sync(struct bio *bio)
{
- if (bio_data_dir(bio) == READ || bio_sync(bio))
+ if (bio_data_dir(bio) == READ || bio_rw_flagged(bio, BIO_RW_SYNCIO))
return 1;
return 0;
@@ -648,8 +646,6 @@ static void cfq_add_cfqq_rr(struct cfq_data *cfqd, struct cfq_queue *cfqq)
BUG_ON(cfq_cfqq_on_rr(cfqq));
cfq_mark_cfqq_on_rr(cfqq);
cfqd->busy_queues++;
- if (cfq_class_rt(cfqq))
- cfqd->busy_rt_queues++;
cfq_resort_rr_list(cfqd, cfqq);
}
@@ -673,8 +669,6 @@ static void cfq_del_cfqq_rr(struct cfq_data *cfqd, struct cfq_queue *cfqq)
BUG_ON(!cfqd->busy_queues);
cfqd->busy_queues--;
- if (cfq_class_rt(cfqq))
- cfqd->busy_rt_queues--;
}
/*
@@ -760,9 +754,9 @@ static void cfq_activate_request(struct request_queue *q, struct request *rq)
{
struct cfq_data *cfqd = q->elevator->elevator_data;
- cfqd->rq_in_driver++;
+ cfqd->rq_in_driver[rq_is_sync(rq)]++;
cfq_log_cfqq(cfqd, RQ_CFQQ(rq), "activate rq, drv=%d",
- cfqd->rq_in_driver);
+ rq_in_driver(cfqd));
cfqd->last_position = blk_rq_pos(rq) + blk_rq_sectors(rq);
}
@@ -770,11 +764,12 @@ static void cfq_activate_request(struct request_queue *q, struct request *rq)
static void cfq_deactivate_request(struct request_queue *q, struct request *rq)
{
struct cfq_data *cfqd = q->elevator->elevator_data;
+ const int sync = rq_is_sync(rq);
- WARN_ON(!cfqd->rq_in_driver);
- cfqd->rq_in_driver--;
+ WARN_ON(!cfqd->rq_in_driver[sync]);
+ cfqd->rq_in_driver[sync]--;
cfq_log_cfqq(cfqd, RQ_CFQQ(rq), "deactivate rq, drv=%d",
- cfqd->rq_in_driver);
+ rq_in_driver(cfqd));
}
static void cfq_remove_request(struct request *rq)
@@ -1080,7 +1075,7 @@ static void cfq_arm_slice_timer(struct cfq_data *cfqd)
/*
* still requests with the driver, don't idle
*/
- if (cfqd->rq_in_driver)
+ if (rq_in_driver(cfqd))
return;
/*
@@ -1179,20 +1174,6 @@ static struct cfq_queue *cfq_select_queue(struct cfq_data *cfqd)
goto expire;
/*
- * If we have a RT cfqq waiting, then we pre-empt the current non-rt
- * cfqq.
- */
- if (!cfq_class_rt(cfqq) && cfqd->busy_rt_queues) {
- /*
- * We simulate this as cfqq timed out so that it gets to bank
- * the remaining of its time slice.
- */
- cfq_log_cfqq(cfqd, cfqq, "preempt");
- cfq_slice_expired(cfqd, 1);
- goto new_queue;
- }
-
- /*
* The active queue has requests and isn't expired, allow it to
* dispatch.
*/
@@ -1312,6 +1293,12 @@ static int cfq_dispatch_requests(struct request_queue *q, int force)
return 0;
/*
+ * Drain async requests before we start sync IO
+ */
+ if (cfq_cfqq_idle_window(cfqq) && cfqd->rq_in_driver[BLK_RW_ASYNC])
+ return 0;
+
+ /*
* If this is an async queue and we have sync IO in flight, let it wait
*/
if (cfqd->sync_flight && !cfq_cfqq_sync(cfqq))
@@ -1362,7 +1349,7 @@ static int cfq_dispatch_requests(struct request_queue *q, int force)
cfq_slice_expired(cfqd, 0);
}
- cfq_log(cfqd, "dispatched a request");
+ cfq_log_cfqq(cfqd, cfqq, "dispatched a request");
return 1;
}
@@ -2130,11 +2117,11 @@ static void cfq_insert_request(struct request_queue *q, struct request *rq)
*/
static void cfq_update_hw_tag(struct cfq_data *cfqd)
{
- if (cfqd->rq_in_driver > cfqd->rq_in_driver_peak)
- cfqd->rq_in_driver_peak = cfqd->rq_in_driver;
+ if (rq_in_driver(cfqd) > cfqd->rq_in_driver_peak)
+ cfqd->rq_in_driver_peak = rq_in_driver(cfqd);
if (cfqd->rq_queued <= CFQ_HW_QUEUE_MIN &&
- cfqd->rq_in_driver <= CFQ_HW_QUEUE_MIN)
+ rq_in_driver(cfqd) <= CFQ_HW_QUEUE_MIN)
return;
if (cfqd->hw_tag_samples++ < 50)
@@ -2161,9 +2148,9 @@ static void cfq_completed_request(struct request_queue *q, struct request *rq)
cfq_update_hw_tag(cfqd);
- WARN_ON(!cfqd->rq_in_driver);
+ WARN_ON(!cfqd->rq_in_driver[sync]);
WARN_ON(!cfqq->dispatched);
- cfqd->rq_in_driver--;
+ cfqd->rq_in_driver[sync]--;
cfqq->dispatched--;
if (cfq_cfqq_sync(cfqq))
@@ -2197,7 +2184,7 @@ static void cfq_completed_request(struct request_queue *q, struct request *rq)
cfq_arm_slice_timer(cfqd);
}
- if (!cfqd->rq_in_driver)
+ if (!rq_in_driver(cfqd))
cfq_schedule_dispatch(cfqd);
}
@@ -2229,8 +2216,7 @@ static void cfq_prio_boost(struct cfq_queue *cfqq)
static inline int __cfq_may_queue(struct cfq_queue *cfqq)
{
- if ((cfq_cfqq_wait_request(cfqq) || cfq_cfqq_must_alloc(cfqq)) &&
- !cfq_cfqq_must_alloc_slice(cfqq)) {
+ if (cfq_cfqq_wait_request(cfqq) && !cfq_cfqq_must_alloc_slice(cfqq)) {
cfq_mark_cfqq_must_alloc_slice(cfqq);
return ELV_MQUEUE_MUST;
}
@@ -2317,7 +2303,6 @@ cfq_set_request(struct request_queue *q, struct request *rq, gfp_t gfp_mask)
}
cfqq->allocated[rw]++;
- cfq_clear_cfqq_must_alloc(cfqq);
atomic_inc(&cfqq->ref);
spin_unlock_irqrestore(q->queue_lock, flags);
diff --git a/block/elevator.c b/block/elevator.c
index 2d511f9105e1..51bb66236ebb 100644
--- a/block/elevator.c
+++ b/block/elevator.c
@@ -79,7 +79,8 @@ int elv_rq_merge_ok(struct request *rq, struct bio *bio)
/*
* Don't merge file system requests and discard requests
*/
- if (bio_discard(bio) != bio_discard(rq->bio))
+ if (bio_rw_flagged(bio, BIO_RW_DISCARD) !=
+ bio_rw_flagged(rq->bio, BIO_RW_DISCARD))
return 0;
/*
@@ -101,16 +102,11 @@ int elv_rq_merge_ok(struct request *rq, struct bio *bio)
return 0;
/*
- * Don't merge if failfast settings don't match.
- *
- * FIXME: The negation in front of each condition is necessary
- * because bio and request flags use different bit positions
- * and the accessors return those bits directly. This
- * ugliness will soon go away.
+ * Don't merge if failfast settings don't match. Just check the
+ * first four bits, they have identical mappings in the bio->bi_rw
+ * and rq->cmd_flags bits.
*/
- if (!bio_failfast_dev(bio) != !blk_failfast_dev(rq) ||
- !bio_failfast_transport(bio) != !blk_failfast_transport(rq) ||
- !bio_failfast_driver(bio) != !blk_failfast_driver(rq))
+ if ((bio->bi_rw & BIO_RW_RQ_MASK) != (rq->cmd_flags & BIO_RW_RQ_MASK))
return 0;
if (!elv_iosched_allow_merge(rq, bio))
diff --git a/block/genhd.c b/block/genhd.c
index f4c64c2b303a..b89328eceee2 100644
--- a/block/genhd.c
+++ b/block/genhd.c
@@ -1215,6 +1215,16 @@ void put_disk(struct gendisk *disk)
EXPORT_SYMBOL(put_disk);
+static void set_disk_ro_uevent(struct gendisk *gd, int ro)
+{
+ char event[] = "DISK_RO=1";
+ char *envp[] = { event, NULL };
+
+ if (!ro)
+ event[8] = '0';
+ kobject_uevent_env(&disk_to_dev(gd)->kobj, KOBJ_CHANGE, envp);
+}
+
void set_device_ro(struct block_device *bdev, int flag)
{
bdev->bd_part->policy = flag;
@@ -1227,8 +1237,12 @@ void set_disk_ro(struct gendisk *disk, int flag)
struct disk_part_iter piter;
struct hd_struct *part;
- disk_part_iter_init(&piter, disk,
- DISK_PITER_INCL_EMPTY | DISK_PITER_INCL_PART0);
+ if (disk->part0.policy != flag) {
+ set_disk_ro_uevent(disk, flag);
+ disk->part0.policy = flag;
+ }
+
+ disk_part_iter_init(&piter, disk, DISK_PITER_INCL_EMPTY);
while ((part = disk_part_iter_next(&piter)))
part->policy = flag;
disk_part_iter_exit(&piter);
diff --git a/drivers/block/cciss.c b/drivers/block/cciss.c
index a52cc7fe45ea..0589dfbbd7db 100644
--- a/drivers/block/cciss.c
+++ b/drivers/block/cciss.c
@@ -3889,7 +3889,7 @@ static int __devinit cciss_init_one(struct pci_dev *pdev,
int j = 0;
int rc;
int dac, return_code;
- InquiryData_struct *inq_buff = NULL;
+ InquiryData_struct *inq_buff;
if (reset_devices) {
/* Reset the controller with a PCI power-cycle */
@@ -4029,6 +4029,7 @@ static int __devinit cciss_init_one(struct pci_dev *pdev,
printk(KERN_WARNING "cciss: unable to determine firmware"
" version of controller\n");
}
+ kfree(inq_buff);
cciss_procinit(i);
@@ -4045,7 +4046,6 @@ static int __devinit cciss_init_one(struct pci_dev *pdev,
return 1;
clean4:
- kfree(inq_buff);
kfree(hba[i]->cmd_pool_bits);
if (hba[i]->cmd_pool)
pci_free_consistent(hba[i]->pdev,
diff --git a/drivers/block/loop.c b/drivers/block/loop.c
index 5757188cd1fb..bbb79441d895 100644
--- a/drivers/block/loop.c
+++ b/drivers/block/loop.c
@@ -475,7 +475,7 @@ static int do_bio_filebacked(struct loop_device *lo, struct bio *bio)
pos = ((loff_t) bio->bi_sector << 9) + lo->lo_offset;
if (bio_rw(bio) == WRITE) {
- int barrier = bio_barrier(bio);
+ bool barrier = bio_rw_flagged(bio, BIO_RW_BARRIER);
struct file *file = lo->lo_backing_file;
if (barrier) {
diff --git a/drivers/block/osdblk.c b/drivers/block/osdblk.c
index 13c1aee6aa3f..28f1f25f0f63 100644
--- a/drivers/block/osdblk.c
+++ b/drivers/block/osdblk.c
@@ -442,7 +442,7 @@ static int osdblk_init_disk(struct osdblk_device *osdev)
* sleep when allocating a lower-request and therefore cannot be
* bouncing.
*/
- blk_queue_stack_limits(q, osd_request_queue(osdev->osd));
+ blk_stack_limits(&q->limits, &osd_request_queue(osdev->osd)->limits, 0);
blk_queue_prep_rq(q, blk_queue_start_tag);
blk_queue_ordered(q, QUEUE_ORDERED_DRAIN_FLUSH, osdblk_prepare_flush);
diff --git a/drivers/block/paride/pcd.c b/drivers/block/paride/pcd.c
index 911dfd98d813..9f3518c515a1 100644
--- a/drivers/block/paride/pcd.c
+++ b/drivers/block/paride/pcd.c
@@ -219,8 +219,6 @@ static int pcd_sector; /* address of next requested sector */
static int pcd_count; /* number of blocks still to do */
static char *pcd_buf; /* buffer for request in progress */
-static int pcd_warned; /* Have we logged a phase warning ? */
-
/* kernel glue structures */
static int pcd_block_open(struct block_device *bdev, fmode_t mode)
@@ -417,12 +415,10 @@ static int pcd_completion(struct pcd_unit *cd, char *buf, char *fun)
printk
("%s: %s: Unexpected phase %d, d=%d, k=%d\n",
cd->name, fun, p, d, k);
- if ((verbose < 2) && !pcd_warned) {
- pcd_warned = 1;
- printk
- ("%s: WARNING: ATAPI phase errors\n",
- cd->name);
- }
+ if (verbose < 2)
+ printk_once(
+ "%s: WARNING: ATAPI phase errors\n",
+ cd->name);
mdelay(1);
}
if (k++ > PCD_TMO) {
diff --git a/drivers/block/sx8.c b/drivers/block/sx8.c
index da403b6a7f43..f5cd2e83ebcc 100644
--- a/drivers/block/sx8.c
+++ b/drivers/block/sx8.c
@@ -1564,15 +1564,13 @@ static int carm_init_shm(struct carm_host *host)
static int carm_init_one (struct pci_dev *pdev, const struct pci_device_id *ent)
{
- static unsigned int printed_version;
struct carm_host *host;
unsigned int pci_dac;
int rc;
struct request_queue *q;
unsigned int i;
- if (!printed_version++)
- printk(KERN_DEBUG DRV_NAME " version " DRV_VERSION "\n");
+ printk_once(KERN_DEBUG DRV_NAME " version " DRV_VERSION "\n");
rc = pci_enable_device(pdev);
if (rc)
diff --git a/drivers/block/viodasd.c b/drivers/block/viodasd.c
index 390d69bb7c48..b441ce3832e9 100644
--- a/drivers/block/viodasd.c
+++ b/drivers/block/viodasd.c
@@ -416,15 +416,9 @@ retry:
goto retry;
}
if (we.max_disk > (MAX_DISKNO - 1)) {
- static int warned;
-
- if (warned == 0) {
- warned++;
- printk(VIOD_KERN_INFO
- "Only examining the first %d "
- "of %d disks connected\n",
- MAX_DISKNO, we.max_disk + 1);
- }
+ printk_once(VIOD_KERN_INFO
+ "Only examining the first %d of %d disks connected\n",
+ MAX_DISKNO, we.max_disk + 1);
}
/* Send the close event to OS/400. We DON'T expect a response */
diff --git a/drivers/md/dm-raid1.c b/drivers/md/dm-raid1.c
index 9726577cde49..76811fd94e9e 100644
--- a/drivers/md/dm-raid1.c
+++ b/drivers/md/dm-raid1.c
@@ -1123,7 +1123,7 @@ static int mirror_end_io(struct dm_target *ti, struct bio *bio,
if (error == -EOPNOTSUPP)
goto out;
- if ((error == -EWOULDBLOCK) && bio_rw_ahead(bio))
+ if ((error == -EWOULDBLOCK) && bio_rw_flagged(bio, BIO_RW_AHEAD))
goto out;
if (unlikely(error)) {
diff --git a/drivers/md/dm-stripe.c b/drivers/md/dm-stripe.c
index 4e0e5937e42a..5aa30d1b2d6e 100644
--- a/drivers/md/dm-stripe.c
+++ b/drivers/md/dm-stripe.c
@@ -285,7 +285,7 @@ static int stripe_end_io(struct dm_target *ti, struct bio *bio,
if (!error)
return 0; /* I/O complete */
- if ((error == -EWOULDBLOCK) && bio_rw_ahead(bio))
+ if ((error == -EWOULDBLOCK) && bio_rw_flagged(bio, BIO_RW_AHEAD))
return error;
if (error == -EOPNOTSUPP)
diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index 8a311ea0d441..82350f590d98 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -586,7 +586,7 @@ static void dec_pending(struct dm_io *io, int error)
*/
spin_lock_irqsave(&md->deferred_lock, flags);
if (__noflush_suspending(md)) {
- if (!bio_barrier(io->bio))
+ if (!bio_rw_flagged(io->bio, BIO_RW_BARRIER))
bio_list_add_head(&md->deferred,
io->bio);
} else
@@ -598,7 +598,7 @@ static void dec_pending(struct dm_io *io, int error)
io_error = io->error;
bio = io->bio;
- if (bio_barrier(bio)) {
+ if (bio_rw_flagged(bio, BIO_RW_BARRIER)) {
/*
* There can be just one barrier request so we use
* a per-device variable for error reporting.
@@ -1204,7 +1204,7 @@ static void __split_and_process_bio(struct mapped_device *md, struct bio *bio)
ci.map = dm_get_table(md);
if (unlikely(!ci.map)) {
- if (!bio_barrier(bio))
+ if (!bio_rw_flagged(bio, BIO_RW_BARRIER))
bio_io_error(bio);
else
if (!md->barrier_error)
@@ -1316,7 +1316,7 @@ static int _dm_request(struct request_queue *q, struct bio *bio)
* we have to queue this io for later.
*/
if (unlikely(test_bit(DMF_QUEUE_IO_TO_THREAD, &md->flags)) ||
- unlikely(bio_barrier(bio))) {
+ unlikely(bio_rw_flagged(bio, BIO_RW_BARRIER))) {
up_read(&md->io_lock);
if (unlikely(test_bit(DMF_BLOCK_IO_FOR_SUSPEND, &md->flags)) &&
@@ -1339,7 +1339,7 @@ static int dm_make_request(struct request_queue *q, struct bio *bio)
{
struct mapped_device *md = q->queuedata;
- if (unlikely(bio_barrier(bio))) {
+ if (unlikely(bio_rw_flagged(bio, BIO_RW_BARRIER))) {
bio_endio(bio, -EOPNOTSUPP);
return 0;
}
@@ -2159,7 +2159,7 @@ static void dm_wq_work(struct work_struct *work)
if (dm_request_based(md))
generic_make_request(c);
else {
- if (bio_barrier(c))
+ if (bio_rw_flagged(c, BIO_RW_BARRIER))
process_barrier(md, c);
else
__split_and_process_bio(md, c);
diff --git a/drivers/md/linear.c b/drivers/md/linear.c
index 5fe39c2a3d2b..ea4842905444 100644
--- a/drivers/md/linear.c
+++ b/drivers/md/linear.c
@@ -288,7 +288,7 @@ static int linear_make_request (struct request_queue *q, struct bio *bio)
sector_t start_sector;
int cpu;
- if (unlikely(bio_barrier(bio))) {
+ if (unlikely(bio_rw_flagged(bio, BIO_RW_BARRIER))) {
bio_endio(bio, -EOPNOTSUPP);
return 0;
}
diff --git a/drivers/md/multipath.c b/drivers/md/multipath.c
index 7140909f6662..89e76819f61f 100644
--- a/drivers/md/multipath.c
+++ b/drivers/md/multipath.c
@@ -90,7 +90,7 @@ static void multipath_end_request(struct bio *bio, int error)
if (uptodate)
multipath_end_bh_io(mp_bh, 0);
- else if (!bio_rw_ahead(bio)) {
+ else if (!bio_rw_flagged(bio, BIO_RW_AHEAD)) {
/*
* oops, IO error:
*/
@@ -144,7 +144,7 @@ static int multipath_make_request (struct request_queue *q, struct bio * bio)
const int rw = bio_data_dir(bio);
int cpu;
- if (unlikely(bio_barrier(bio))) {
+ if (unlikely(bio_rw_flagged(bio, BIO_RW_BARRIER))) {
bio_endio(bio, -EOPNOTSUPP);
return 0;
}
diff --git a/drivers/md/raid0.c b/drivers/md/raid0.c
index 898e2bdfee47..f845ed98fec9 100644
--- a/drivers/md/raid0.c
+++ b/drivers/md/raid0.c
@@ -448,7 +448,7 @@ static int raid0_make_request(struct request_queue *q, struct bio *bio)
const int rw = bio_data_dir(bio);
int cpu;
- if (unlikely(bio_barrier(bio))) {
+ if (unlikely(bio_rw_flagged(bio, BIO_RW_BARRIER))) {
bio_endio(bio, -EOPNOTSUPP);
return 0;
}
diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c
index 8726fd7ebce5..ff7ed3335995 100644
--- a/drivers/md/raid1.c
+++ b/drivers/md/raid1.c
@@ -782,8 +782,9 @@ static int make_request(struct request_queue *q, struct bio * bio)
struct bio_list bl;
struct page **behind_pages = NULL;
const int rw = bio_data_dir(bio);
- const int do_sync = bio_sync(bio);
- int cpu, do_barriers;
+ const bool do_sync = bio_rw_flagged(bio, BIO_RW_SYNCIO);
+ int cpu;
+ bool do_barriers;
mdk_rdev_t *blocked_rdev;
/*
@@ -797,7 +798,8 @@ static int make_request(struct request_queue *q, struct bio * bio)
md_write_start(mddev, bio); /* wait on superblock update early */
- if (unlikely(!mddev->barriers_work && bio_barrier(bio))) {
+ if (unlikely(!mddev->barriers_work &&
+ bio_rw_flagged(bio, BIO_RW_BARRIER))) {
if (rw == WRITE)
md_write_end(mddev);
bio_endio(bio, -EOPNOTSUPP);
@@ -925,7 +927,7 @@ static int make_request(struct request_queue *q, struct bio * bio)
atomic_set(&r1_bio->remaining, 0);
atomic_set(&r1_bio->behind_remaining, 0);
- do_barriers = bio_barrier(bio);
+ do_barriers = bio_rw_flagged(bio, BIO_RW_BARRIER);
if (do_barriers)
set_bit(R1BIO_Barrier, &r1_bio->state);
@@ -1600,7 +1602,7 @@ static void raid1d(mddev_t *mddev)
* We already have a nr_pending reference on these rdevs.
*/
int i;
- const int do_sync = bio_sync(r1_bio->master_bio);
+ const bool do_sync = bio_rw_flagged(r1_bio->master_bio, BIO_RW_SYNCIO);
clear_bit(R1BIO_BarrierRetry, &r1_bio->state);
clear_bit(R1BIO_Barrier, &r1_bio->state);
for (i=0; i < conf->raid_disks; i++)
@@ -1654,7 +1656,7 @@ static void raid1d(mddev_t *mddev)
(unsigned long long)r1_bio->sector);
raid_end_bio_io(r1_bio);
} else {
- const int do_sync = bio_sync(r1_bio->master_bio);
+ const bool do_sync = bio_rw_flagged(r1_bio->master_bio, BIO_RW_SYNCIO);
r1_bio->bios[r1_bio->read_disk] =
mddev->ro ? IO_BLOCKED : NULL;
r1_bio->read_disk = disk;
diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
index 3d9020cf6f6e..d0a2152e064f 100644
--- a/drivers/md/raid10.c
+++ b/drivers/md/raid10.c
@@ -796,12 +796,12 @@ static int make_request(struct request_queue *q, struct bio * bio)
int i;
int chunk_sects = conf->chunk_mask + 1;
const int rw = bio_data_dir(bio);
- const int do_sync = bio_sync(bio);
+ const bool do_sync = bio_rw_flagged(bio, BIO_RW_SYNCIO);
struct bio_list bl;
unsigned long flags;
mdk_rdev_t *blocked_rdev;
- if (unlikely(bio_barrier(bio))) {
+ if (unlikely(bio_rw_flagged(bio, BIO_RW_BARRIER))) {
bio_endio(bio, -EOPNOTSUPP);
return 0;
}
@@ -1610,7 +1610,7 @@ static void raid10d(mddev_t *mddev)
raid_end_bio_io(r10_bio);
bio_put(bio);
} else {
- const int do_sync = bio_sync(r10_bio->master_bio);
+ const bool do_sync = bio_rw_flagged(r10_bio->master_bio, BIO_RW_SYNCIO);
bio_put(bio);
rdev = conf->mirrors[mirror].rdev;
if (printk_ratelimit())
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index b8a2c5dc67ba..826eb3467357 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -3606,7 +3606,7 @@ static int make_request(struct request_queue *q, struct bio * bi)
const int rw = bio_data_dir(bi);
int cpu, remaining;
- if (unlikely(bio_barrier(bi))) {
+ if (unlikely(bio_rw_flagged(bi, BIO_RW_BARRIER))) {
bio_endio(bi, -EOPNOTSUPP);
return 0;
}
diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c
index f3c40898fc7d..90c94da8baa4 100644
--- a/drivers/scsi/scsi_lib.c
+++ b/drivers/scsi/scsi_lib.c
@@ -897,8 +897,10 @@ void scsi_io_completion(struct scsi_cmnd *cmd, unsigned int good_bytes)
if (driver_byte(result) & DRIVER_SENSE)
scsi_print_sense("", cmd);
}
- blk_end_request_all(req, -EIO);
- scsi_next_command(cmd);
+ if (blk_end_request_err(req, -EIO))
+ scsi_requeue_command(q, cmd);
+ else
+ scsi_next_command(cmd);
break;
case ACTION_REPREP:
/* Unprep the request and put it back at the head of the queue.
diff --git a/drivers/staging/dst/dcore.c b/drivers/staging/dst/dcore.c
index fad25b753042..b1c258ca2102 100644
--- a/drivers/staging/dst/dcore.c
+++ b/drivers/staging/dst/dcore.c
@@ -112,8 +112,9 @@ static int dst_request(struct request_queue *q, struct bio *bio)
* I worked with.
*
* Empty barriers are not allowed anyway, see 51fd77bd9f512
- * for example, although later it was changed to bio_discard()
- * only, which does not work in this case.
+ * for example, although later it was changed to
+ * bio_rw_flagged(bio, BIO_RW_DISCARD) only, which does not
+ * work in this case.
*/
//err = -EOPNOTSUPP;
err = 0;
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 5dbefd11b4af..5cf405b0828d 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -260,7 +260,7 @@ loop_lock:
num_run++;
batch_run++;
- if (bio_sync(cur))
+ if (bio_rw_flagged(cur, BIO_RW_SYNCIO))
num_sync_run++;
if (need_resched()) {
@@ -2903,7 +2903,7 @@ static noinline int schedule_bio(struct btrfs_root *root,
bio->bi_rw |= rw;
spin_lock(&device->io_lock);
- if (bio_sync(bio))
+ if (bio_rw_flagged(bio, BIO_RW_SYNCIO))
pending_bios = &device->pending_sync_bios;
else
pending_bios = &device->pending_bios;
diff --git a/fs/splice.c b/fs/splice.c
index 73766d24f97b..aec4014291be 100644
--- a/fs/splice.c
+++ b/fs/splice.c
@@ -502,8 +502,10 @@ ssize_t generic_file_splice_read(struct file *in, loff_t *ppos,
len = left;
ret = __generic_file_splice_read(in, ppos, pipe, len, flags);
- if (ret > 0)
+ if (ret > 0) {
*ppos += ret;
+ file_accessed(in);
+ }
return ret;
}
@@ -963,8 +965,10 @@ generic_file_splice_write(struct pipe_inode_info *pipe, struct file *out,
mutex_lock_nested(&inode->i_mutex, I_MUTEX_CHILD);
ret = file_remove_suid(out);
- if (!ret)
+ if (!ret) {
+ file_update_time(out);
ret = splice_from_pipe_feed(pipe, &sd, pipe_to_file);
+ }
mutex_unlock(&inode->i_mutex);
} while (ret > 0);
splice_from_pipe_end(pipe, &sd);
diff --git a/include/linux/bio.h b/include/linux/bio.h
index 2892b710771c..5be93f18d842 100644
--- a/include/linux/bio.h
+++ b/include/linux/bio.h
@@ -142,56 +142,51 @@ struct bio {
*
* bit 0 -- data direction
* If not set, bio is a read from device. If set, it's a write to device.
- * bit 1 -- rw-ahead when set
- * bit 2 -- barrier
+ * bit 1 -- fail fast device errors
+ * bit 2 -- fail fast transport errors
+ * bit 3 -- fail fast driver errors
+ * bit 4 -- rw-ahead when set
+ * bit 5 -- barrier
* Insert a serialization point in the IO queue, forcing previously
* submitted IO to be completed before this one is issued.
- * bit 3 -- synchronous I/O hint.
- * bit 4 -- Unplug the device immediately after submitting this bio.
- * bit 5 -- metadata request
+ * bit 6 -- synchronous I/O hint.
+ * bit 7 -- Unplug the device immediately after submitting this bio.
+ * bit 8 -- metadata request
* Used for tracing to differentiate metadata and data IO. May also
* get some preferential treatment in the IO scheduler
- * bit 6 -- discard sectors
+ * bit 9 -- discard sectors
* Informs the lower level device that this range of sectors is no longer
* used by the file system and may thus be freed by the device. Used
* for flash based storage.
- * bit 7 -- fail fast device errors
- * bit 8 -- fail fast transport errors
- * bit 9 -- fail fast driver errors
* Don't want driver retries for any fast fail whatever the reason.
* bit 10 -- Tell the IO scheduler not to wait for more requests after this
one has been submitted, even if it is a SYNC request.
*/
-#define BIO_RW 0 /* Must match RW in req flags (blkdev.h) */
-#define BIO_RW_AHEAD 1 /* Must match FAILFAST in req flags */
-#define BIO_RW_BARRIER 2
-#define BIO_RW_SYNCIO 3
-#define BIO_RW_UNPLUG 4
-#define BIO_RW_META 5
-#define BIO_RW_DISCARD 6
-#define BIO_RW_FAILFAST_DEV 7
-#define BIO_RW_FAILFAST_TRANSPORT 8
-#define BIO_RW_FAILFAST_DRIVER 9
-#define BIO_RW_NOIDLE 10
-
-#define bio_rw_flagged(bio, flag) ((bio)->bi_rw & (1 << (flag)))
+enum bio_rw_flags {
+ BIO_RW,
+ BIO_RW_FAILFAST_DEV,
+ BIO_RW_FAILFAST_TRANSPORT,
+ BIO_RW_FAILFAST_DRIVER,
+ /* above flags must match REQ_* */
+ BIO_RW_AHEAD,
+ BIO_RW_BARRIER,
+ BIO_RW_SYNCIO,
+ BIO_RW_UNPLUG,
+ BIO_RW_META,
+ BIO_RW_DISCARD,
+ BIO_RW_NOIDLE,
+};
/*
- * Old defines, these should eventually be replaced by direct usage of
- * bio_rw_flagged()
+ * First four bits must match between bio->bi_rw and rq->cmd_flags, make
+ * that explicit here.
*/
-#define bio_barrier(bio) bio_rw_flagged(bio, BIO_RW_BARRIER)
-#define bio_sync(bio) bio_rw_flagged(bio, BIO_RW_SYNCIO)
-#define bio_unplug(bio) bio_rw_flagged(bio, BIO_RW_UNPLUG)
-#define bio_failfast_dev(bio) bio_rw_flagged(bio, BIO_RW_FAILFAST_DEV)
-#define bio_failfast_transport(bio) \
- bio_rw_flagged(bio, BIO_RW_FAILFAST_TRANSPORT)
-#define bio_failfast_driver(bio) \
- bio_rw_flagged(bio, BIO_RW_FAILFAST_DRIVER)
-#define bio_rw_ahead(bio) bio_rw_flagged(bio, BIO_RW_AHEAD)
-#define bio_rw_meta(bio) bio_rw_flagged(bio, BIO_RW_META)
-#define bio_discard(bio) bio_rw_flagged(bio, BIO_RW_DISCARD)
-#define bio_noidle(bio) bio_rw_flagged(bio, BIO_RW_NOIDLE)
+#define BIO_RW_RQ_MASK 0xf
+
+static inline bool bio_rw_flagged(struct bio *bio, enum bio_rw_flags flag)
+{
+ return (bio->bi_rw & (1 << flag)) != 0;
+}
/*
* upper 16 bits of bi_rw define the io priority of this bio
@@ -216,7 +211,7 @@ struct bio {
#define bio_offset(bio) bio_iovec((bio))->bv_offset
#define bio_segments(bio) ((bio)->bi_vcnt - (bio)->bi_idx)
#define bio_sectors(bio) ((bio)->bi_size >> 9)
-#define bio_empty_barrier(bio) (bio_barrier(bio) && !bio_has_data(bio) && !bio_discard(bio))
+#define bio_empty_barrier(bio) (bio_rw_flagged(bio, BIO_RW_BARRIER) && !bio_has_data(bio) && !bio_rw_flagged(bio, BIO_RW_DISCARD))
static inline unsigned int bio_cur_bytes(struct bio *bio)
{
diff --git a/include/linux/blk-iopoll.h b/include/linux/blk-iopoll.h
new file mode 100644
index 000000000000..308734d3d4a2
--- /dev/null
+++ b/include/linux/blk-iopoll.h
@@ -0,0 +1,48 @@
+#ifndef BLK_IOPOLL_H
+#define BLK_IOPOLL_H
+
+struct blk_iopoll;
+typedef int (blk_iopoll_fn)(struct blk_iopoll *, int);
+
+struct blk_iopoll {
+ struct list_head list;
+ unsigned long state;
+ unsigned long data;
+ int weight;
+ int max;
+ blk_iopoll_fn *poll;
+};
+
+enum {
+ IOPOLL_F_SCHED = 0,
+ IOPOLL_F_DISABLE = 1,
+};
+
+/*
+ * Returns 0 if we successfully set the IOPOLL_F_SCHED bit, indicating
+ * that we were the first to acquire this iop for scheduling. If this iop
+ * is currently disabled, return "failure".
+ */
+static inline int blk_iopoll_sched_prep(struct blk_iopoll *iop)
+{
+ if (!test_bit(IOPOLL_F_DISABLE, &iop->state))
+ return test_and_set_bit(IOPOLL_F_SCHED, &iop->state);
+
+ return 1;
+}
+
+static inline int blk_iopoll_disable_pending(struct blk_iopoll *iop)
+{
+ return test_bit(IOPOLL_F_DISABLE, &iop->state);
+}
+
+extern void blk_iopoll_sched(struct blk_iopoll *);
+extern void blk_iopoll_init(struct blk_iopoll *, int, blk_iopoll_fn *);
+extern void blk_iopoll_complete(struct blk_iopoll *);
+extern void __blk_iopoll_complete(struct blk_iopoll *);
+extern void blk_iopoll_enable(struct blk_iopoll *);
+extern void blk_iopoll_disable(struct blk_iopoll *);
+
+extern int blk_iopoll_enabled;
+
+#endif
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 69103e053c92..4c0f724511f5 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -86,13 +86,14 @@ enum {
};
/*
- * request type modified bits. first two bits match BIO_RW* bits, important
+ * request type modified bits. first four bits match BIO_RW* bits, important
*/
enum rq_flag_bits {
__REQ_RW, /* not set, read. set, write */
__REQ_FAILFAST_DEV, /* no driver retries of device errors */
__REQ_FAILFAST_TRANSPORT, /* no driver retries of transport errors */
__REQ_FAILFAST_DRIVER, /* no driver retries of driver errors */
+ /* above flags must match BIO_RW_* */
__REQ_DISCARD, /* request to discard sectors */
__REQ_SORTED, /* elevator knows about this request */
__REQ_SOFTBARRIER, /* may not be passed by ioscheduler */
@@ -114,6 +115,7 @@ enum rq_flag_bits {
__REQ_INTEGRITY, /* integrity metadata has been remapped */
__REQ_NOIDLE, /* Don't anticipate more IO after this one */
__REQ_IO_STAT, /* account I/O stat */
+ __REQ_MIXED_MERGE, /* merge of different types, fail separately */
__REQ_NR_BITS, /* stops here */
};
@@ -142,6 +144,10 @@ enum rq_flag_bits {
#define REQ_INTEGRITY (1 << __REQ_INTEGRITY)
#define REQ_NOIDLE (1 << __REQ_NOIDLE)
#define REQ_IO_STAT (1 << __REQ_IO_STAT)
+#define REQ_MIXED_MERGE (1 << __REQ_MIXED_MERGE)
+
+#define REQ_FAILFAST_MASK (REQ_FAILFAST_DEV | REQ_FAILFAST_TRANSPORT | \
+ REQ_FAILFAST_DRIVER)
#define BLK_MAX_CDB 16
@@ -453,6 +459,7 @@ struct request_queue
#define QUEUE_FLAG_NONROT 14 /* non-rotational device (SSD) */
#define QUEUE_FLAG_VIRT QUEUE_FLAG_NONROT /* paravirt device */
#define QUEUE_FLAG_IO_STAT 15 /* do IO stats */
+#define QUEUE_FLAG_CQ 16 /* hardware does queuing */
#define QUEUE_FLAG_DEFAULT ((1 << QUEUE_FLAG_IO_STAT) | \
(1 << QUEUE_FLAG_CLUSTER) | \
@@ -575,6 +582,7 @@ enum {
#define blk_queue_plugged(q) test_bit(QUEUE_FLAG_PLUGGED, &(q)->queue_flags)
#define blk_queue_tagged(q) test_bit(QUEUE_FLAG_QUEUED, &(q)->queue_flags)
+#define blk_queue_queuing(q) test_bit(QUEUE_FLAG_CQ, &(q)->queue_flags)
#define blk_queue_stopped(q) test_bit(QUEUE_FLAG_STOPPED, &(q)->queue_flags)
#define blk_queue_nomerges(q) test_bit(QUEUE_FLAG_NOMERGES, &(q)->queue_flags)
#define blk_queue_nonrot(q) test_bit(QUEUE_FLAG_NONROT, &(q)->queue_flags)
@@ -828,11 +836,13 @@ static inline void blk_run_address_space(struct address_space *mapping)
}
/*
- * blk_rq_pos() : the current sector
- * blk_rq_bytes() : bytes left in the entire request
- * blk_rq_cur_bytes() : bytes left in the current segment
- * blk_rq_sectors() : sectors left in the entire request
- * blk_rq_cur_sectors() : sectors left in the current segment
+ * blk_rq_pos() : the current sector
+ * blk_rq_bytes() : bytes left in the entire request
+ * blk_rq_cur_bytes() : bytes left in the current segment
+ * blk_rq_err_bytes() : bytes left till the next error boundary
+ * blk_rq_sectors() : sectors left in the entire request
+ * blk_rq_cur_sectors() : sectors left in the current segment
+ * blk_rq_err_sectors() : sectors left till the next error boundary
*/
static inline sector_t blk_rq_pos(const struct request *rq)
{
@@ -849,6 +859,8 @@ static inline int blk_rq_cur_bytes(const struct request *rq)
return rq->bio ? bio_cur_bytes(rq->bio) : 0;
}
+extern unsigned int blk_rq_err_bytes(const struct request *rq);
+
static inline unsigned int blk_rq_sectors(const struct request *rq)
{
return blk_rq_bytes(rq) >> 9;
@@ -859,6 +871,11 @@ static inline unsigned int blk_rq_cur_sectors(const struct request *rq)
return blk_rq_cur_bytes(rq) >> 9;
}
+static inline unsigned int blk_rq_err_sectors(const struct request *rq)
+{
+ return blk_rq_err_bytes(rq) >> 9;
+}
+
/*
* Request issue related functions.
*/
@@ -885,10 +902,12 @@ extern bool blk_end_request(struct request *rq, int error,
unsigned int nr_bytes);
extern void blk_end_request_all(struct request *rq, int error);
extern bool blk_end_request_cur(struct request *rq, int error);
+extern bool blk_end_request_err(struct request *rq, int error);
extern bool __blk_end_request(struct request *rq, int error,
unsigned int nr_bytes);
extern void __blk_end_request_all(struct request *rq, int error);
extern bool __blk_end_request_cur(struct request *rq, int error);
+extern bool __blk_end_request_err(struct request *rq, int error);
extern void blk_complete_request(struct request *);
extern void __blk_complete_request(struct request *);
@@ -921,7 +940,6 @@ extern int blk_stack_limits(struct queue_limits *t, struct queue_limits *b,
sector_t offset);
extern void disk_stack_limits(struct gendisk *disk, struct block_device *bdev,
sector_t offset);
-extern void blk_queue_stack_limits(struct request_queue *t, struct request_queue *b);
extern void blk_queue_dma_pad(struct request_queue *, unsigned int);
extern void blk_queue_update_dma_pad(struct request_queue *, unsigned int);
extern int blk_queue_dma_drain(struct request_queue *q,
diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h
index 35e7df1e9f30..edd8d5c90394 100644
--- a/include/linux/interrupt.h
+++ b/include/linux/interrupt.h
@@ -344,6 +344,7 @@ enum
NET_TX_SOFTIRQ,
NET_RX_SOFTIRQ,
BLOCK_SOFTIRQ,
+ BLOCK_IOPOLL_SOFTIRQ,
TASKLET_SOFTIRQ,
SCHED_SOFTIRQ,
HRTIMER_SOFTIRQ,
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 58be76017fd0..0ed9fa6f322e 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -92,6 +92,7 @@ extern int sysctl_nr_trim_pages;
#ifdef CONFIG_RCU_TORTURE_TEST
extern int rcutorture_runnable;
#endif /* #ifdef CONFIG_RCU_TORTURE_TEST */
+extern int blk_iopoll_enabled;
/* Constants used for minimum and maximum */
#ifdef CONFIG_DETECT_SOFTLOCKUP
@@ -990,7 +991,14 @@ static struct ctl_table kern_table[] = {
.proc_handler = &proc_dointvec,
},
#endif
-
+ {
+ .ctl_name = CTL_UNNUMBERED,
+ .procname = "blk_iopoll",
+ .data = &blk_iopoll_enabled,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec,
+ },
/*
* NOTE: do not add new entries to this table unless you have read
* Documentation/sysctl/ctl_unnumbered.txt