diff options
author | Jens Axboe <jens.axboe@oracle.com> | 2009-09-03 11:35:49 +0200 |
---|---|---|
committer | Jens Axboe <jens.axboe@oracle.com> | 2009-09-03 11:35:49 +0200 |
commit | 7a134af0a79796cb8436ad4539d14cf62cd3a06e (patch) | |
tree | 295c0c0416a42c2df551a4b34ad594f1721fa3ad | |
parent | 37d0892c5a94e208cf863e3b7bac014edee4346d (diff) | |
parent | 8accfab465bb2d3a97983af6e034c5f845308cb9 (diff) |
Merge branch 'for-2.6.32' into for-next
32 files changed, 617 insertions, 188 deletions
diff --git a/block/Makefile b/block/Makefile index 6c54ed0ff755..ba74ca6bfa14 100644 --- a/block/Makefile +++ b/block/Makefile @@ -5,7 +5,7 @@ obj-$(CONFIG_BLOCK) := elevator.o blk-core.o blk-tag.o blk-sysfs.o \ blk-barrier.o blk-settings.o blk-ioc.o blk-map.o \ blk-exec.o blk-merge.o blk-softirq.o blk-timeout.o \ - ioctl.o genhd.o scsi_ioctl.o + blk-iopoll.o ioctl.o genhd.o scsi_ioctl.o obj-$(CONFIG_BLK_DEV_BSG) += bsg.o obj-$(CONFIG_IOSCHED_NOOP) += noop-iosched.o diff --git a/block/blk-core.c b/block/blk-core.c index e3299a77a0d8..93051d151635 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -1111,31 +1111,27 @@ void init_request_from_bio(struct request *req, struct bio *bio) req->cmd_type = REQ_TYPE_FS; /* - * inherit FAILFAST from bio (for read-ahead, and explicit FAILFAST) + * Inherit FAILFAST from bio (for read-ahead, and explicit + * FAILFAST). FAILFAST flags are identical for req and bio. */ - if (bio_rw_ahead(bio)) - req->cmd_flags |= (REQ_FAILFAST_DEV | REQ_FAILFAST_TRANSPORT | - REQ_FAILFAST_DRIVER); - if (bio_failfast_dev(bio)) - req->cmd_flags |= REQ_FAILFAST_DEV; - if (bio_failfast_transport(bio)) - req->cmd_flags |= REQ_FAILFAST_TRANSPORT; - if (bio_failfast_driver(bio)) - req->cmd_flags |= REQ_FAILFAST_DRIVER; - - if (unlikely(bio_discard(bio))) { + if (bio_rw_flagged(bio, BIO_RW_AHEAD)) + req->cmd_flags |= REQ_FAILFAST_MASK; + else + req->cmd_flags |= bio->bi_rw & REQ_FAILFAST_MASK; + + if (unlikely(bio_rw_flagged(bio, BIO_RW_DISCARD))) { req->cmd_flags |= REQ_DISCARD; - if (bio_barrier(bio)) + if (bio_rw_flagged(bio, BIO_RW_BARRIER)) req->cmd_flags |= REQ_SOFTBARRIER; req->q->prepare_discard_fn(req->q, req); - } else if (unlikely(bio_barrier(bio))) + } else if (unlikely(bio_rw_flagged(bio, BIO_RW_BARRIER))) req->cmd_flags |= REQ_HARDBARRIER; - if (bio_sync(bio)) + if (bio_rw_flagged(bio, BIO_RW_SYNCIO)) req->cmd_flags |= REQ_RW_SYNC; - if (bio_rw_meta(bio)) + if (bio_rw_flagged(bio, BIO_RW_META)) req->cmd_flags |= REQ_RW_META; - if (bio_noidle(bio)) + if (bio_rw_flagged(bio, BIO_RW_NOIDLE)) req->cmd_flags |= REQ_NOIDLE; req->errors = 0; @@ -1150,7 +1146,7 @@ void init_request_from_bio(struct request *req, struct bio *bio) */ static inline bool queue_should_plug(struct request_queue *q) { - return !(blk_queue_nonrot(q) && blk_queue_tagged(q)); + return !(blk_queue_nonrot(q) && blk_queue_queuing(q)); } static int __make_request(struct request_queue *q, struct bio *bio) @@ -1159,11 +1155,12 @@ static int __make_request(struct request_queue *q, struct bio *bio) int el_ret; unsigned int bytes = bio->bi_size; const unsigned short prio = bio_prio(bio); - const int sync = bio_sync(bio); - const int unplug = bio_unplug(bio); + const bool sync = bio_rw_flagged(bio, BIO_RW_SYNCIO); + const bool unplug = bio_rw_flagged(bio, BIO_RW_UNPLUG); + const unsigned int ff = bio->bi_rw & REQ_FAILFAST_MASK; int rw_flags; - if (bio_barrier(bio) && bio_has_data(bio) && + if (bio_rw_flagged(bio, BIO_RW_BARRIER) && bio_has_data(bio) && (q->next_ordered == QUEUE_ORDERED_NONE)) { bio_endio(bio, -EOPNOTSUPP); return 0; @@ -1177,7 +1174,7 @@ static int __make_request(struct request_queue *q, struct bio *bio) spin_lock_irq(q->queue_lock); - if (unlikely(bio_barrier(bio)) || elv_queue_empty(q)) + if (unlikely(bio_rw_flagged(bio, BIO_RW_BARRIER)) || elv_queue_empty(q)) goto get_rq; el_ret = elv_merge(q, &req, bio); @@ -1190,6 +1187,9 @@ static int __make_request(struct request_queue *q, struct bio *bio) trace_block_bio_backmerge(q, bio); + if ((req->cmd_flags & REQ_FAILFAST_MASK) != ff) + blk_rq_set_mixed_merge(req); + req->biotail->bi_next = bio; req->biotail = bio; req->__data_len += bytes; @@ -1209,6 +1209,12 @@ static int __make_request(struct request_queue *q, struct bio *bio) trace_block_bio_frontmerge(q, bio); + if ((req->cmd_flags & REQ_FAILFAST_MASK) != ff) { + blk_rq_set_mixed_merge(req); + req->cmd_flags &= ~REQ_FAILFAST_MASK; + req->cmd_flags |= ff; + } + bio->bi_next = req->bio; req->bio = bio; @@ -1464,7 +1470,8 @@ static inline void __generic_make_request(struct bio *bio) if (bio_check_eod(bio, nr_sectors)) goto end_io; - if (bio_discard(bio) && !q->prepare_discard_fn) { + if (bio_rw_flagged(bio, BIO_RW_DISCARD) && + !q->prepare_discard_fn) { err = -EOPNOTSUPP; goto end_io; } @@ -1653,6 +1660,50 @@ int blk_insert_cloned_request(struct request_queue *q, struct request *rq) } EXPORT_SYMBOL_GPL(blk_insert_cloned_request); +/** + * blk_rq_err_bytes - determine number of bytes till the next failure boundary + * @rq: request to examine + * + * Description: + * A request could be merge of IOs which require different failure + * handling. This function determines the number of bytes which + * can be failed from the beginning of the request without + * crossing into area which need to be retried further. + * + * Return: + * The number of bytes to fail. + * + * Context: + * queue_lock must be held. + */ +unsigned int blk_rq_err_bytes(const struct request *rq) +{ + unsigned int ff = rq->cmd_flags & REQ_FAILFAST_MASK; + unsigned int bytes = 0; + struct bio *bio; + + if (!(rq->cmd_flags & REQ_MIXED_MERGE)) + return blk_rq_bytes(rq); + + /* + * Currently the only 'mixing' which can happen is between + * different fastfail types. We can safely fail portions + * which have all the failfast bits that the first one has - + * the ones which are at least as eager to fail as the first + * one. + */ + for (bio = rq->bio; bio; bio = bio->bi_next) { + if ((bio->bi_rw & ff) != ff) + break; + bytes += bio->bi_size; + } + + /* this could lead to infinite loop */ + BUG_ON(blk_rq_bytes(rq) && !bytes); + return bytes; +} +EXPORT_SYMBOL_GPL(blk_rq_err_bytes); + static void blk_account_io_completion(struct request *req, unsigned int bytes) { if (blk_do_io_stat(req)) { @@ -1806,8 +1857,15 @@ void blk_dequeue_request(struct request *rq) * and to it is freed is accounted as io that is in progress at * the driver side. */ - if (blk_account_rq(rq)) + if (blk_account_rq(rq)) { q->in_flight[rq_is_sync(rq)]++; + /* + * Mark this device as supporting hardware queuing, if + * we have more IOs in flight than 4. + */ + if (!blk_queue_queuing(q) && queue_in_flight(q) > 4) + set_bit(QUEUE_FLAG_CQ, &q->queue_flags); + } } /** @@ -1999,6 +2057,12 @@ bool blk_update_request(struct request *req, int error, unsigned int nr_bytes) if (blk_fs_request(req) || blk_discard_rq(req)) req->__sector += total_bytes >> 9; + /* mixed attributes always follow the first bio */ + if (req->cmd_flags & REQ_MIXED_MERGE) { + req->cmd_flags &= ~REQ_FAILFAST_MASK; + req->cmd_flags |= req->bio->bi_rw & REQ_FAILFAST_MASK; + } + /* * If total number of sectors is less than the first segment * size, something has gone terribly wrong. @@ -2178,6 +2242,25 @@ bool blk_end_request_cur(struct request *rq, int error) EXPORT_SYMBOL(blk_end_request_cur); /** + * blk_end_request_err - Finish a request till the next failure boundary. + * @rq: the request to finish till the next failure boundary for + * @error: must be negative errno + * + * Description: + * Complete @rq till the next failure boundary. + * + * Return: + * %false - we are done with this request + * %true - still buffers pending for this request + */ +bool blk_end_request_err(struct request *rq, int error) +{ + WARN_ON(error >= 0); + return blk_end_request(rq, error, blk_rq_err_bytes(rq)); +} +EXPORT_SYMBOL_GPL(blk_end_request_err); + +/** * __blk_end_request - Helper function for drivers to complete the request. * @rq: the request being processed * @error: %0 for success, < %0 for error @@ -2236,12 +2319,31 @@ bool __blk_end_request_cur(struct request *rq, int error) } EXPORT_SYMBOL(__blk_end_request_cur); +/** + * __blk_end_request_err - Finish a request till the next failure boundary. + * @rq: the request to finish till the next failure boundary for + * @error: must be negative errno + * + * Description: + * Complete @rq till the next failure boundary. Must be called + * with queue lock held. + * + * Return: + * %false - we are done with this request + * %true - still buffers pending for this request + */ +bool __blk_end_request_err(struct request *rq, int error) +{ + WARN_ON(error >= 0); + return __blk_end_request(rq, error, blk_rq_err_bytes(rq)); +} +EXPORT_SYMBOL_GPL(__blk_end_request_err); + void blk_rq_bio_prep(struct request_queue *q, struct request *rq, struct bio *bio) { - /* Bit 0 (R/W) is identical in rq->cmd_flags and bio->bi_rw, and - we want BIO_RW_AHEAD (bit 1) to imply REQ_FAILFAST (bit 1). */ - rq->cmd_flags |= (bio->bi_rw & 3); + /* Bit 0 (R/W) is identical in rq->cmd_flags and bio->bi_rw */ + rq->cmd_flags |= bio->bi_rw & REQ_RW; if (bio_has_data(bio)) { rq->nr_phys_segments = bio_phys_segments(q, bio); diff --git a/block/blk-iopoll.c b/block/blk-iopoll.c new file mode 100644 index 000000000000..ca564202ed7a --- /dev/null +++ b/block/blk-iopoll.c @@ -0,0 +1,227 @@ +/* + * Functions related to interrupt-poll handling in the block layer. This + * is similar to NAPI for network devices. + */ +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/init.h> +#include <linux/bio.h> +#include <linux/blkdev.h> +#include <linux/interrupt.h> +#include <linux/cpu.h> +#include <linux/blk-iopoll.h> +#include <linux/delay.h> + +#include "blk.h" + +int blk_iopoll_enabled = 1; +EXPORT_SYMBOL(blk_iopoll_enabled); + +static unsigned int blk_iopoll_budget __read_mostly = 256; + +static DEFINE_PER_CPU(struct list_head, blk_cpu_iopoll); + +/** + * blk_iopoll_sched - Schedule a run of the iopoll handler + * @iop: The parent iopoll structure + * + * Description: + * Add this blk_iopoll structure to the pending poll list and trigger the + * raise of the blk iopoll softirq. The driver must already have gotten a + * succesful return from blk_iopoll_sched_prep() before calling this. + **/ +void blk_iopoll_sched(struct blk_iopoll *iop) +{ + unsigned long flags; + + local_irq_save(flags); + list_add_tail(&iop->list, &__get_cpu_var(blk_cpu_iopoll)); + __raise_softirq_irqoff(BLOCK_IOPOLL_SOFTIRQ); + local_irq_restore(flags); +} +EXPORT_SYMBOL(blk_iopoll_sched); + +/** + * __blk_iopoll_complete - Mark this @iop as un-polled again + * @iop: The parent iopoll structure + * + * Description: + * See blk_iopoll_complete(). This function must be called with interrupts + * disabled. + **/ +void __blk_iopoll_complete(struct blk_iopoll *iop) +{ + list_del(&iop->list); + smp_mb__before_clear_bit(); + clear_bit_unlock(IOPOLL_F_SCHED, &iop->state); +} +EXPORT_SYMBOL(__blk_iopoll_complete); + +/** + * blk_iopoll_complete - Mark this @iop as un-polled again + * @iop: The parent iopoll structure + * + * Description: + * If a driver consumes less than the assigned budget in its run of the + * iopoll handler, it'll end the polled mode by calling this function. The + * iopoll handler will not be invoked again before blk_iopoll_sched_prep() + * is called. + **/ +void blk_iopoll_complete(struct blk_iopoll *iopoll) +{ + unsigned long flags; + + local_irq_save(flags); + __blk_iopoll_complete(iopoll); + local_irq_restore(flags); +} +EXPORT_SYMBOL(blk_iopoll_complete); + +static void blk_iopoll_softirq(struct softirq_action *h) +{ + struct list_head *list = &__get_cpu_var(blk_cpu_iopoll); + int rearm = 0, budget = blk_iopoll_budget; + unsigned long start_time = jiffies; + + local_irq_disable(); + + while (!list_empty(list)) { + struct blk_iopoll *iop; + int work, weight; + + /* + * If softirq window is exhausted then punt. + */ + if (budget <= 0 || time_after(jiffies, start_time)) { + rearm = 1; + break; + } + + local_irq_enable(); + + /* Even though interrupts have been re-enabled, this + * access is safe because interrupts can only add new + * entries to the tail of this list, and only ->poll() + * calls can remove this head entry from the list. + */ + iop = list_entry(list->next, struct blk_iopoll, list); + + weight = iop->weight; + work = 0; + if (test_bit(IOPOLL_F_SCHED, &iop->state)) + work = iop->poll(iop, weight); + + budget -= work; + + local_irq_disable(); + + /* + * Drivers must not modify the iopoll state, if they + * consume their assigned weight (or more, some drivers can't + * easily just stop processing, they have to complete an + * entire mask of commands).In such cases this code + * still "owns" the iopoll instance and therefore can + * move the instance around on the list at-will. + */ + if (work >= weight) { + if (blk_iopoll_disable_pending(iop)) + __blk_iopoll_complete(iop); + else + list_move_tail(&iop->list, list); + } + } + + if (rearm) + __raise_softirq_irqoff(BLOCK_IOPOLL_SOFTIRQ); + + local_irq_enable(); +} + +/** + * blk_iopoll_disable - Disable iopoll on this @iop + * @iop: The parent iopoll structure + * + * Description: + * Disable io polling and wait for any pending callbacks to have completed. + **/ +void blk_iopoll_disable(struct blk_iopoll *iop) +{ + set_bit(IOPOLL_F_DISABLE, &iop->state); + while (test_and_set_bit(IOPOLL_F_SCHED, &iop->state)) + msleep(1); + clear_bit(IOPOLL_F_DISABLE, &iop->state); +} +EXPORT_SYMBOL(blk_iopoll_disable); + +/** + * blk_iopoll_enable - Enable iopoll on this @iop + * @iop: The parent iopoll structure + * + * Description: + * Enable iopoll on this @iop. Note that the handler run will not be + * scheduled, it will only mark it as active. + **/ +void blk_iopoll_enable(struct blk_iopoll *iop) +{ + BUG_ON(!test_bit(IOPOLL_F_SCHED, &iop->state)); + smp_mb__before_clear_bit(); + clear_bit_unlock(IOPOLL_F_SCHED, &iop->state); +} +EXPORT_SYMBOL(blk_iopoll_enable); + +/** + * blk_iopoll_init - Initialize this @iop + * @iop: The parent iopoll structure + * @weight: The default weight (or command completion budget) + * @poll_fn: The handler to invoke + * + * Description: + * Initialize this blk_iopoll structure. Before being actively used, the + * driver must call blk_iopoll_enable(). + **/ +void blk_iopoll_init(struct blk_iopoll *iop, int weight, blk_iopoll_fn *poll_fn) +{ + memset(iop, 0, sizeof(*iop)); + INIT_LIST_HEAD(&iop->list); + iop->weight = weight; + iop->poll = poll_fn; + set_bit(IOPOLL_F_SCHED, &iop->state); +} +EXPORT_SYMBOL(blk_iopoll_init); + +static int __cpuinit blk_iopoll_cpu_notify(struct notifier_block *self, + unsigned long action, void *hcpu) +{ + /* + * If a CPU goes away, splice its entries to the current CPU + * and trigger a run of the softirq + */ + if (action == CPU_DEAD || action == CPU_DEAD_FROZEN) { + int cpu = (unsigned long) hcpu; + + local_irq_disable(); + list_splice_init(&per_cpu(blk_cpu_iopoll, cpu), + &__get_cpu_var(blk_cpu_iopoll)); + __raise_softirq_irqoff(BLOCK_IOPOLL_SOFTIRQ); + local_irq_enable(); + } + + return NOTIFY_OK; +} + +static struct notifier_block __cpuinitdata blk_iopoll_cpu_notifier = { + .notifier_call = blk_iopoll_cpu_notify, +}; + +static __init int blk_iopoll_setup(void) +{ + int i; + + for_each_possible_cpu(i) + INIT_LIST_HEAD(&per_cpu(blk_cpu_iopoll, i)); + + open_softirq(BLOCK_IOPOLL_SOFTIRQ, blk_iopoll_softirq); + register_hotcpu_notifier(&blk_iopoll_cpu_notifier); + return 0; +} +subsys_initcall(blk_iopoll_setup); diff --git a/block/blk-merge.c b/block/blk-merge.c index e1999679a4d5..b0de8574fdc8 100644 --- a/block/blk-merge.c +++ b/block/blk-merge.c @@ -311,6 +311,36 @@ static int ll_merge_requests_fn(struct request_queue *q, struct request *req, return 1; } +/** + * blk_rq_set_mixed_merge - mark a request as mixed merge + * @rq: request to mark as mixed merge + * + * Description: + * @rq is about to be mixed merged. Make sure the attributes + * which can be mixed are set in each bio and mark @rq as mixed + * merged. + */ +void blk_rq_set_mixed_merge(struct request *rq) +{ + unsigned int ff = rq->cmd_flags & REQ_FAILFAST_MASK; + struct bio *bio; + + if (rq->cmd_flags & REQ_MIXED_MERGE) + return; + + /* + * @rq will no longer represent mixable attributes for all the + * contained bios. It will just track those of the first one. + * Distributes the attributs to each bio. + */ + for (bio = rq->bio; bio; bio = bio->bi_next) { + WARN_ON_ONCE((bio->bi_rw & REQ_FAILFAST_MASK) && + (bio->bi_rw & REQ_FAILFAST_MASK) != ff); + bio->bi_rw |= ff; + } + rq->cmd_flags |= REQ_MIXED_MERGE; +} + static void blk_account_io_merge(struct request *req) { if (blk_do_io_stat(req)) { @@ -350,12 +380,6 @@ static int attempt_merge(struct request_queue *q, struct request *req, if (blk_integrity_rq(req) != blk_integrity_rq(next)) return 0; - /* don't merge requests of different failfast settings */ - if (blk_failfast_dev(req) != blk_failfast_dev(next) || - blk_failfast_transport(req) != blk_failfast_transport(next) || - blk_failfast_driver(req) != blk_failfast_driver(next)) - return 0; - /* * If we are allowed to merge, then append bio list * from next to rq and release next. merge_requests_fn @@ -366,6 +390,19 @@ static int attempt_merge(struct request_queue *q, struct request *req, return 0; /* + * If failfast settings disagree or any of the two is already + * a mixed merge, mark both as mixed before proceeding. This + * makes sure that all involved bios have mixable attributes + * set properly. + */ + if ((req->cmd_flags | next->cmd_flags) & REQ_MIXED_MERGE || + (req->cmd_flags & REQ_FAILFAST_MASK) != + (next->cmd_flags & REQ_FAILFAST_MASK)) { + blk_rq_set_mixed_merge(req); + blk_rq_set_mixed_merge(next); + } + + /* * At this point we have either done a back merge * or front merge. We need the smaller start_time of * the merged requests to be the current request diff --git a/block/blk.h b/block/blk.h index 3fae6add5430..5ee3d7e72feb 100644 --- a/block/blk.h +++ b/block/blk.h @@ -104,6 +104,7 @@ int ll_front_merge_fn(struct request_queue *q, struct request *req, int attempt_back_merge(struct request_queue *q, struct request *rq); int attempt_front_merge(struct request_queue *q, struct request *rq); void blk_recalc_rq_segments(struct request *rq); +void blk_rq_set_mixed_merge(struct request *rq); void blk_queue_congestion_threshold(struct request_queue *q); diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c index fd7080ed7935..a34686f091db 100644 --- a/block/cfq-iosched.c +++ b/block/cfq-iosched.c @@ -134,13 +134,8 @@ struct cfq_data { struct rb_root prio_trees[CFQ_PRIO_LISTS]; unsigned int busy_queues; - /* - * Used to track any pending rt requests so we can pre-empt current - * non-RT cfqq in service when this value is non-zero. - */ - unsigned int busy_rt_queues; - int rq_in_driver; + int rq_in_driver[2]; int sync_flight; /* @@ -191,7 +186,6 @@ enum cfqq_state_flags { CFQ_CFQQ_FLAG_on_rr = 0, /* on round-robin busy list */ CFQ_CFQQ_FLAG_wait_request, /* waiting for a request */ CFQ_CFQQ_FLAG_must_dispatch, /* must be allowed a dispatch */ - CFQ_CFQQ_FLAG_must_alloc, /* must be allowed rq alloc */ CFQ_CFQQ_FLAG_must_alloc_slice, /* per-slice must_alloc flag */ CFQ_CFQQ_FLAG_fifo_expire, /* FIFO checked in this slice */ CFQ_CFQQ_FLAG_idle_window, /* slice idling enabled */ @@ -218,7 +212,6 @@ static inline int cfq_cfqq_##name(const struct cfq_queue *cfqq) \ CFQ_CFQQ_FNS(on_rr); CFQ_CFQQ_FNS(wait_request); CFQ_CFQQ_FNS(must_dispatch); -CFQ_CFQQ_FNS(must_alloc); CFQ_CFQQ_FNS(must_alloc_slice); CFQ_CFQQ_FNS(fifo_expire); CFQ_CFQQ_FNS(idle_window); @@ -239,6 +232,11 @@ static struct cfq_queue *cfq_get_queue(struct cfq_data *, int, static struct cfq_io_context *cfq_cic_lookup(struct cfq_data *, struct io_context *); +static inline int rq_in_driver(struct cfq_data *cfqd) +{ + return cfqd->rq_in_driver[0] + cfqd->rq_in_driver[1]; +} + static inline struct cfq_queue *cic_to_cfqq(struct cfq_io_context *cic, int is_sync) { @@ -257,7 +255,7 @@ static inline void cic_set_cfqq(struct cfq_io_context *cic, */ static inline int cfq_bio_sync(struct bio *bio) { - if (bio_data_dir(bio) == READ || bio_sync(bio)) + if (bio_data_dir(bio) == READ || bio_rw_flagged(bio, BIO_RW_SYNCIO)) return 1; return 0; @@ -648,8 +646,6 @@ static void cfq_add_cfqq_rr(struct cfq_data *cfqd, struct cfq_queue *cfqq) BUG_ON(cfq_cfqq_on_rr(cfqq)); cfq_mark_cfqq_on_rr(cfqq); cfqd->busy_queues++; - if (cfq_class_rt(cfqq)) - cfqd->busy_rt_queues++; cfq_resort_rr_list(cfqd, cfqq); } @@ -673,8 +669,6 @@ static void cfq_del_cfqq_rr(struct cfq_data *cfqd, struct cfq_queue *cfqq) BUG_ON(!cfqd->busy_queues); cfqd->busy_queues--; - if (cfq_class_rt(cfqq)) - cfqd->busy_rt_queues--; } /* @@ -760,9 +754,9 @@ static void cfq_activate_request(struct request_queue *q, struct request *rq) { struct cfq_data *cfqd = q->elevator->elevator_data; - cfqd->rq_in_driver++; + cfqd->rq_in_driver[rq_is_sync(rq)]++; cfq_log_cfqq(cfqd, RQ_CFQQ(rq), "activate rq, drv=%d", - cfqd->rq_in_driver); + rq_in_driver(cfqd)); cfqd->last_position = blk_rq_pos(rq) + blk_rq_sectors(rq); } @@ -770,11 +764,12 @@ static void cfq_activate_request(struct request_queue *q, struct request *rq) static void cfq_deactivate_request(struct request_queue *q, struct request *rq) { struct cfq_data *cfqd = q->elevator->elevator_data; + const int sync = rq_is_sync(rq); - WARN_ON(!cfqd->rq_in_driver); - cfqd->rq_in_driver--; + WARN_ON(!cfqd->rq_in_driver[sync]); + cfqd->rq_in_driver[sync]--; cfq_log_cfqq(cfqd, RQ_CFQQ(rq), "deactivate rq, drv=%d", - cfqd->rq_in_driver); + rq_in_driver(cfqd)); } static void cfq_remove_request(struct request *rq) @@ -1080,7 +1075,7 @@ static void cfq_arm_slice_timer(struct cfq_data *cfqd) /* * still requests with the driver, don't idle */ - if (cfqd->rq_in_driver) + if (rq_in_driver(cfqd)) return; /* @@ -1179,20 +1174,6 @@ static struct cfq_queue *cfq_select_queue(struct cfq_data *cfqd) goto expire; /* - * If we have a RT cfqq waiting, then we pre-empt the current non-rt - * cfqq. - */ - if (!cfq_class_rt(cfqq) && cfqd->busy_rt_queues) { - /* - * We simulate this as cfqq timed out so that it gets to bank - * the remaining of its time slice. - */ - cfq_log_cfqq(cfqd, cfqq, "preempt"); - cfq_slice_expired(cfqd, 1); - goto new_queue; - } - - /* * The active queue has requests and isn't expired, allow it to * dispatch. */ @@ -1312,6 +1293,12 @@ static int cfq_dispatch_requests(struct request_queue *q, int force) return 0; /* + * Drain async requests before we start sync IO + */ + if (cfq_cfqq_idle_window(cfqq) && cfqd->rq_in_driver[BLK_RW_ASYNC]) + return 0; + + /* * If this is an async queue and we have sync IO in flight, let it wait */ if (cfqd->sync_flight && !cfq_cfqq_sync(cfqq)) @@ -1362,7 +1349,7 @@ static int cfq_dispatch_requests(struct request_queue *q, int force) cfq_slice_expired(cfqd, 0); } - cfq_log(cfqd, "dispatched a request"); + cfq_log_cfqq(cfqd, cfqq, "dispatched a request"); return 1; } @@ -2130,11 +2117,11 @@ static void cfq_insert_request(struct request_queue *q, struct request *rq) */ static void cfq_update_hw_tag(struct cfq_data *cfqd) { - if (cfqd->rq_in_driver > cfqd->rq_in_driver_peak) - cfqd->rq_in_driver_peak = cfqd->rq_in_driver; + if (rq_in_driver(cfqd) > cfqd->rq_in_driver_peak) + cfqd->rq_in_driver_peak = rq_in_driver(cfqd); if (cfqd->rq_queued <= CFQ_HW_QUEUE_MIN && - cfqd->rq_in_driver <= CFQ_HW_QUEUE_MIN) + rq_in_driver(cfqd) <= CFQ_HW_QUEUE_MIN) return; if (cfqd->hw_tag_samples++ < 50) @@ -2161,9 +2148,9 @@ static void cfq_completed_request(struct request_queue *q, struct request *rq) cfq_update_hw_tag(cfqd); - WARN_ON(!cfqd->rq_in_driver); + WARN_ON(!cfqd->rq_in_driver[sync]); WARN_ON(!cfqq->dispatched); - cfqd->rq_in_driver--; + cfqd->rq_in_driver[sync]--; cfqq->dispatched--; if (cfq_cfqq_sync(cfqq)) @@ -2197,7 +2184,7 @@ static void cfq_completed_request(struct request_queue *q, struct request *rq) cfq_arm_slice_timer(cfqd); } - if (!cfqd->rq_in_driver) + if (!rq_in_driver(cfqd)) cfq_schedule_dispatch(cfqd); } @@ -2229,8 +2216,7 @@ static void cfq_prio_boost(struct cfq_queue *cfqq) static inline int __cfq_may_queue(struct cfq_queue *cfqq) { - if ((cfq_cfqq_wait_request(cfqq) || cfq_cfqq_must_alloc(cfqq)) && - !cfq_cfqq_must_alloc_slice(cfqq)) { + if (cfq_cfqq_wait_request(cfqq) && !cfq_cfqq_must_alloc_slice(cfqq)) { cfq_mark_cfqq_must_alloc_slice(cfqq); return ELV_MQUEUE_MUST; } @@ -2317,7 +2303,6 @@ cfq_set_request(struct request_queue *q, struct request *rq, gfp_t gfp_mask) } cfqq->allocated[rw]++; - cfq_clear_cfqq_must_alloc(cfqq); atomic_inc(&cfqq->ref); spin_unlock_irqrestore(q->queue_lock, flags); diff --git a/block/elevator.c b/block/elevator.c index 2d511f9105e1..51bb66236ebb 100644 --- a/block/elevator.c +++ b/block/elevator.c @@ -79,7 +79,8 @@ int elv_rq_merge_ok(struct request *rq, struct bio *bio) /* * Don't merge file system requests and discard requests */ - if (bio_discard(bio) != bio_discard(rq->bio)) + if (bio_rw_flagged(bio, BIO_RW_DISCARD) != + bio_rw_flagged(rq->bio, BIO_RW_DISCARD)) return 0; /* @@ -101,16 +102,11 @@ int elv_rq_merge_ok(struct request *rq, struct bio *bio) return 0; /* - * Don't merge if failfast settings don't match. - * - * FIXME: The negation in front of each condition is necessary - * because bio and request flags use different bit positions - * and the accessors return those bits directly. This - * ugliness will soon go away. + * Don't merge if failfast settings don't match. Just check the + * first four bits, they have identical mappings in the bio->bi_rw + * and rq->cmd_flags bits. */ - if (!bio_failfast_dev(bio) != !blk_failfast_dev(rq) || - !bio_failfast_transport(bio) != !blk_failfast_transport(rq) || - !bio_failfast_driver(bio) != !blk_failfast_driver(rq)) + if ((bio->bi_rw & BIO_RW_RQ_MASK) != (rq->cmd_flags & BIO_RW_RQ_MASK)) return 0; if (!elv_iosched_allow_merge(rq, bio)) diff --git a/block/genhd.c b/block/genhd.c index f4c64c2b303a..b89328eceee2 100644 --- a/block/genhd.c +++ b/block/genhd.c @@ -1215,6 +1215,16 @@ void put_disk(struct gendisk *disk) EXPORT_SYMBOL(put_disk); +static void set_disk_ro_uevent(struct gendisk *gd, int ro) +{ + char event[] = "DISK_RO=1"; + char *envp[] = { event, NULL }; + + if (!ro) + event[8] = '0'; + kobject_uevent_env(&disk_to_dev(gd)->kobj, KOBJ_CHANGE, envp); +} + void set_device_ro(struct block_device *bdev, int flag) { bdev->bd_part->policy = flag; @@ -1227,8 +1237,12 @@ void set_disk_ro(struct gendisk *disk, int flag) struct disk_part_iter piter; struct hd_struct *part; - disk_part_iter_init(&piter, disk, - DISK_PITER_INCL_EMPTY | DISK_PITER_INCL_PART0); + if (disk->part0.policy != flag) { + set_disk_ro_uevent(disk, flag); + disk->part0.policy = flag; + } + + disk_part_iter_init(&piter, disk, DISK_PITER_INCL_EMPTY); while ((part = disk_part_iter_next(&piter))) part->policy = flag; disk_part_iter_exit(&piter); diff --git a/drivers/block/cciss.c b/drivers/block/cciss.c index a52cc7fe45ea..0589dfbbd7db 100644 --- a/drivers/block/cciss.c +++ b/drivers/block/cciss.c @@ -3889,7 +3889,7 @@ static int __devinit cciss_init_one(struct pci_dev *pdev, int j = 0; int rc; int dac, return_code; - InquiryData_struct *inq_buff = NULL; + InquiryData_struct *inq_buff; if (reset_devices) { /* Reset the controller with a PCI power-cycle */ @@ -4029,6 +4029,7 @@ static int __devinit cciss_init_one(struct pci_dev *pdev, printk(KERN_WARNING "cciss: unable to determine firmware" " version of controller\n"); } + kfree(inq_buff); cciss_procinit(i); @@ -4045,7 +4046,6 @@ static int __devinit cciss_init_one(struct pci_dev *pdev, return 1; clean4: - kfree(inq_buff); kfree(hba[i]->cmd_pool_bits); if (hba[i]->cmd_pool) pci_free_consistent(hba[i]->pdev, diff --git a/drivers/block/loop.c b/drivers/block/loop.c index 5757188cd1fb..bbb79441d895 100644 --- a/drivers/block/loop.c +++ b/drivers/block/loop.c @@ -475,7 +475,7 @@ static int do_bio_filebacked(struct loop_device *lo, struct bio *bio) pos = ((loff_t) bio->bi_sector << 9) + lo->lo_offset; if (bio_rw(bio) == WRITE) { - int barrier = bio_barrier(bio); + bool barrier = bio_rw_flagged(bio, BIO_RW_BARRIER); struct file *file = lo->lo_backing_file; if (barrier) { diff --git a/drivers/block/osdblk.c b/drivers/block/osdblk.c index 13c1aee6aa3f..28f1f25f0f63 100644 --- a/drivers/block/osdblk.c +++ b/drivers/block/osdblk.c @@ -442,7 +442,7 @@ static int osdblk_init_disk(struct osdblk_device *osdev) * sleep when allocating a lower-request and therefore cannot be * bouncing. */ - blk_queue_stack_limits(q, osd_request_queue(osdev->osd)); + blk_stack_limits(&q->limits, &osd_request_queue(osdev->osd)->limits, 0); blk_queue_prep_rq(q, blk_queue_start_tag); blk_queue_ordered(q, QUEUE_ORDERED_DRAIN_FLUSH, osdblk_prepare_flush); diff --git a/drivers/block/paride/pcd.c b/drivers/block/paride/pcd.c index 911dfd98d813..9f3518c515a1 100644 --- a/drivers/block/paride/pcd.c +++ b/drivers/block/paride/pcd.c @@ -219,8 +219,6 @@ static int pcd_sector; /* address of next requested sector */ static int pcd_count; /* number of blocks still to do */ static char *pcd_buf; /* buffer for request in progress */ -static int pcd_warned; /* Have we logged a phase warning ? */ - /* kernel glue structures */ static int pcd_block_open(struct block_device *bdev, fmode_t mode) @@ -417,12 +415,10 @@ static int pcd_completion(struct pcd_unit *cd, char *buf, char *fun) printk ("%s: %s: Unexpected phase %d, d=%d, k=%d\n", cd->name, fun, p, d, k); - if ((verbose < 2) && !pcd_warned) { - pcd_warned = 1; - printk - ("%s: WARNING: ATAPI phase errors\n", - cd->name); - } + if (verbose < 2) + printk_once( + "%s: WARNING: ATAPI phase errors\n", + cd->name); mdelay(1); } if (k++ > PCD_TMO) { diff --git a/drivers/block/sx8.c b/drivers/block/sx8.c index da403b6a7f43..f5cd2e83ebcc 100644 --- a/drivers/block/sx8.c +++ b/drivers/block/sx8.c @@ -1564,15 +1564,13 @@ static int carm_init_shm(struct carm_host *host) static int carm_init_one (struct pci_dev *pdev, const struct pci_device_id *ent) { - static unsigned int printed_version; struct carm_host *host; unsigned int pci_dac; int rc; struct request_queue *q; unsigned int i; - if (!printed_version++) - printk(KERN_DEBUG DRV_NAME " version " DRV_VERSION "\n"); + printk_once(KERN_DEBUG DRV_NAME " version " DRV_VERSION "\n"); rc = pci_enable_device(pdev); if (rc) diff --git a/drivers/block/viodasd.c b/drivers/block/viodasd.c index 390d69bb7c48..b441ce3832e9 100644 --- a/drivers/block/viodasd.c +++ b/drivers/block/viodasd.c @@ -416,15 +416,9 @@ retry: goto retry; } if (we.max_disk > (MAX_DISKNO - 1)) { - static int warned; - - if (warned == 0) { - warned++; - printk(VIOD_KERN_INFO - "Only examining the first %d " - "of %d disks connected\n", - MAX_DISKNO, we.max_disk + 1); - } + printk_once(VIOD_KERN_INFO + "Only examining the first %d of %d disks connected\n", + MAX_DISKNO, we.max_disk + 1); } /* Send the close event to OS/400. We DON'T expect a response */ diff --git a/drivers/md/dm-raid1.c b/drivers/md/dm-raid1.c index 9726577cde49..76811fd94e9e 100644 --- a/drivers/md/dm-raid1.c +++ b/drivers/md/dm-raid1.c @@ -1123,7 +1123,7 @@ static int mirror_end_io(struct dm_target *ti, struct bio *bio, if (error == -EOPNOTSUPP) goto out; - if ((error == -EWOULDBLOCK) && bio_rw_ahead(bio)) + if ((error == -EWOULDBLOCK) && bio_rw_flagged(bio, BIO_RW_AHEAD)) goto out; if (unlikely(error)) { diff --git a/drivers/md/dm-stripe.c b/drivers/md/dm-stripe.c index 4e0e5937e42a..5aa30d1b2d6e 100644 --- a/drivers/md/dm-stripe.c +++ b/drivers/md/dm-stripe.c @@ -285,7 +285,7 @@ static int stripe_end_io(struct dm_target *ti, struct bio *bio, if (!error) return 0; /* I/O complete */ - if ((error == -EWOULDBLOCK) && bio_rw_ahead(bio)) + if ((error == -EWOULDBLOCK) && bio_rw_flagged(bio, BIO_RW_AHEAD)) return error; if (error == -EOPNOTSUPP) diff --git a/drivers/md/dm.c b/drivers/md/dm.c index 8a311ea0d441..82350f590d98 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -586,7 +586,7 @@ static void dec_pending(struct dm_io *io, int error) */ spin_lock_irqsave(&md->deferred_lock, flags); if (__noflush_suspending(md)) { - if (!bio_barrier(io->bio)) + if (!bio_rw_flagged(io->bio, BIO_RW_BARRIER)) bio_list_add_head(&md->deferred, io->bio); } else @@ -598,7 +598,7 @@ static void dec_pending(struct dm_io *io, int error) io_error = io->error; bio = io->bio; - if (bio_barrier(bio)) { + if (bio_rw_flagged(bio, BIO_RW_BARRIER)) { /* * There can be just one barrier request so we use * a per-device variable for error reporting. @@ -1204,7 +1204,7 @@ static void __split_and_process_bio(struct mapped_device *md, struct bio *bio) ci.map = dm_get_table(md); if (unlikely(!ci.map)) { - if (!bio_barrier(bio)) + if (!bio_rw_flagged(bio, BIO_RW_BARRIER)) bio_io_error(bio); else if (!md->barrier_error) @@ -1316,7 +1316,7 @@ static int _dm_request(struct request_queue *q, struct bio *bio) * we have to queue this io for later. */ if (unlikely(test_bit(DMF_QUEUE_IO_TO_THREAD, &md->flags)) || - unlikely(bio_barrier(bio))) { + unlikely(bio_rw_flagged(bio, BIO_RW_BARRIER))) { up_read(&md->io_lock); if (unlikely(test_bit(DMF_BLOCK_IO_FOR_SUSPEND, &md->flags)) && @@ -1339,7 +1339,7 @@ static int dm_make_request(struct request_queue *q, struct bio *bio) { struct mapped_device *md = q->queuedata; - if (unlikely(bio_barrier(bio))) { + if (unlikely(bio_rw_flagged(bio, BIO_RW_BARRIER))) { bio_endio(bio, -EOPNOTSUPP); return 0; } @@ -2159,7 +2159,7 @@ static void dm_wq_work(struct work_struct *work) if (dm_request_based(md)) generic_make_request(c); else { - if (bio_barrier(c)) + if (bio_rw_flagged(c, BIO_RW_BARRIER)) process_barrier(md, c); else __split_and_process_bio(md, c); diff --git a/drivers/md/linear.c b/drivers/md/linear.c index 5fe39c2a3d2b..ea4842905444 100644 --- a/drivers/md/linear.c +++ b/drivers/md/linear.c @@ -288,7 +288,7 @@ static int linear_make_request (struct request_queue *q, struct bio *bio) sector_t start_sector; int cpu; - if (unlikely(bio_barrier(bio))) { + if (unlikely(bio_rw_flagged(bio, BIO_RW_BARRIER))) { bio_endio(bio, -EOPNOTSUPP); return 0; } diff --git a/drivers/md/multipath.c b/drivers/md/multipath.c index 7140909f6662..89e76819f61f 100644 --- a/drivers/md/multipath.c +++ b/drivers/md/multipath.c @@ -90,7 +90,7 @@ static void multipath_end_request(struct bio *bio, int error) if (uptodate) multipath_end_bh_io(mp_bh, 0); - else if (!bio_rw_ahead(bio)) { + else if (!bio_rw_flagged(bio, BIO_RW_AHEAD)) { /* * oops, IO error: */ @@ -144,7 +144,7 @@ static int multipath_make_request (struct request_queue *q, struct bio * bio) const int rw = bio_data_dir(bio); int cpu; - if (unlikely(bio_barrier(bio))) { + if (unlikely(bio_rw_flagged(bio, BIO_RW_BARRIER))) { bio_endio(bio, -EOPNOTSUPP); return 0; } diff --git a/drivers/md/raid0.c b/drivers/md/raid0.c index 898e2bdfee47..f845ed98fec9 100644 --- a/drivers/md/raid0.c +++ b/drivers/md/raid0.c @@ -448,7 +448,7 @@ static int raid0_make_request(struct request_queue *q, struct bio *bio) const int rw = bio_data_dir(bio); int cpu; - if (unlikely(bio_barrier(bio))) { + if (unlikely(bio_rw_flagged(bio, BIO_RW_BARRIER))) { bio_endio(bio, -EOPNOTSUPP); return 0; } diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c index 8726fd7ebce5..ff7ed3335995 100644 --- a/drivers/md/raid1.c +++ b/drivers/md/raid1.c @@ -782,8 +782,9 @@ static int make_request(struct request_queue *q, struct bio * bio) struct bio_list bl; struct page **behind_pages = NULL; const int rw = bio_data_dir(bio); - const int do_sync = bio_sync(bio); - int cpu, do_barriers; + const bool do_sync = bio_rw_flagged(bio, BIO_RW_SYNCIO); + int cpu; + bool do_barriers; mdk_rdev_t *blocked_rdev; /* @@ -797,7 +798,8 @@ static int make_request(struct request_queue *q, struct bio * bio) md_write_start(mddev, bio); /* wait on superblock update early */ - if (unlikely(!mddev->barriers_work && bio_barrier(bio))) { + if (unlikely(!mddev->barriers_work && + bio_rw_flagged(bio, BIO_RW_BARRIER))) { if (rw == WRITE) md_write_end(mddev); bio_endio(bio, -EOPNOTSUPP); @@ -925,7 +927,7 @@ static int make_request(struct request_queue *q, struct bio * bio) atomic_set(&r1_bio->remaining, 0); atomic_set(&r1_bio->behind_remaining, 0); - do_barriers = bio_barrier(bio); + do_barriers = bio_rw_flagged(bio, BIO_RW_BARRIER); if (do_barriers) set_bit(R1BIO_Barrier, &r1_bio->state); @@ -1600,7 +1602,7 @@ static void raid1d(mddev_t *mddev) * We already have a nr_pending reference on these rdevs. */ int i; - const int do_sync = bio_sync(r1_bio->master_bio); + const bool do_sync = bio_rw_flagged(r1_bio->master_bio, BIO_RW_SYNCIO); clear_bit(R1BIO_BarrierRetry, &r1_bio->state); clear_bit(R1BIO_Barrier, &r1_bio->state); for (i=0; i < conf->raid_disks; i++) @@ -1654,7 +1656,7 @@ static void raid1d(mddev_t *mddev) (unsigned long long)r1_bio->sector); raid_end_bio_io(r1_bio); } else { - const int do_sync = bio_sync(r1_bio->master_bio); + const bool do_sync = bio_rw_flagged(r1_bio->master_bio, BIO_RW_SYNCIO); r1_bio->bios[r1_bio->read_disk] = mddev->ro ? IO_BLOCKED : NULL; r1_bio->read_disk = disk; diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c index 3d9020cf6f6e..d0a2152e064f 100644 --- a/drivers/md/raid10.c +++ b/drivers/md/raid10.c @@ -796,12 +796,12 @@ static int make_request(struct request_queue *q, struct bio * bio) int i; int chunk_sects = conf->chunk_mask + 1; const int rw = bio_data_dir(bio); - const int do_sync = bio_sync(bio); + const bool do_sync = bio_rw_flagged(bio, BIO_RW_SYNCIO); struct bio_list bl; unsigned long flags; mdk_rdev_t *blocked_rdev; - if (unlikely(bio_barrier(bio))) { + if (unlikely(bio_rw_flagged(bio, BIO_RW_BARRIER))) { bio_endio(bio, -EOPNOTSUPP); return 0; } @@ -1610,7 +1610,7 @@ static void raid10d(mddev_t *mddev) raid_end_bio_io(r10_bio); bio_put(bio); } else { - const int do_sync = bio_sync(r10_bio->master_bio); + const bool do_sync = bio_rw_flagged(r10_bio->master_bio, BIO_RW_SYNCIO); bio_put(bio); rdev = conf->mirrors[mirror].rdev; if (printk_ratelimit()) diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index b8a2c5dc67ba..826eb3467357 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -3606,7 +3606,7 @@ static int make_request(struct request_queue *q, struct bio * bi) const int rw = bio_data_dir(bi); int cpu, remaining; - if (unlikely(bio_barrier(bi))) { + if (unlikely(bio_rw_flagged(bi, BIO_RW_BARRIER))) { bio_endio(bi, -EOPNOTSUPP); return 0; } diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c index f3c40898fc7d..90c94da8baa4 100644 --- a/drivers/scsi/scsi_lib.c +++ b/drivers/scsi/scsi_lib.c @@ -897,8 +897,10 @@ void scsi_io_completion(struct scsi_cmnd *cmd, unsigned int good_bytes) if (driver_byte(result) & DRIVER_SENSE) scsi_print_sense("", cmd); } - blk_end_request_all(req, -EIO); - scsi_next_command(cmd); + if (blk_end_request_err(req, -EIO)) + scsi_requeue_command(q, cmd); + else + scsi_next_command(cmd); break; case ACTION_REPREP: /* Unprep the request and put it back at the head of the queue. diff --git a/drivers/staging/dst/dcore.c b/drivers/staging/dst/dcore.c index fad25b753042..b1c258ca2102 100644 --- a/drivers/staging/dst/dcore.c +++ b/drivers/staging/dst/dcore.c @@ -112,8 +112,9 @@ static int dst_request(struct request_queue *q, struct bio *bio) * I worked with. * * Empty barriers are not allowed anyway, see 51fd77bd9f512 - * for example, although later it was changed to bio_discard() - * only, which does not work in this case. + * for example, although later it was changed to + * bio_rw_flagged(bio, BIO_RW_DISCARD) only, which does not + * work in this case. */ //err = -EOPNOTSUPP; err = 0; diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 5dbefd11b4af..5cf405b0828d 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -260,7 +260,7 @@ loop_lock: num_run++; batch_run++; - if (bio_sync(cur)) + if (bio_rw_flagged(cur, BIO_RW_SYNCIO)) num_sync_run++; if (need_resched()) { @@ -2903,7 +2903,7 @@ static noinline int schedule_bio(struct btrfs_root *root, bio->bi_rw |= rw; spin_lock(&device->io_lock); - if (bio_sync(bio)) + if (bio_rw_flagged(bio, BIO_RW_SYNCIO)) pending_bios = &device->pending_sync_bios; else pending_bios = &device->pending_bios; diff --git a/fs/splice.c b/fs/splice.c index 73766d24f97b..aec4014291be 100644 --- a/fs/splice.c +++ b/fs/splice.c @@ -502,8 +502,10 @@ ssize_t generic_file_splice_read(struct file *in, loff_t *ppos, len = left; ret = __generic_file_splice_read(in, ppos, pipe, len, flags); - if (ret > 0) + if (ret > 0) { *ppos += ret; + file_accessed(in); + } return ret; } @@ -963,8 +965,10 @@ generic_file_splice_write(struct pipe_inode_info *pipe, struct file *out, mutex_lock_nested(&inode->i_mutex, I_MUTEX_CHILD); ret = file_remove_suid(out); - if (!ret) + if (!ret) { + file_update_time(out); ret = splice_from_pipe_feed(pipe, &sd, pipe_to_file); + } mutex_unlock(&inode->i_mutex); } while (ret > 0); splice_from_pipe_end(pipe, &sd); diff --git a/include/linux/bio.h b/include/linux/bio.h index 2892b710771c..5be93f18d842 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -142,56 +142,51 @@ struct bio { * * bit 0 -- data direction * If not set, bio is a read from device. If set, it's a write to device. - * bit 1 -- rw-ahead when set - * bit 2 -- barrier + * bit 1 -- fail fast device errors + * bit 2 -- fail fast transport errors + * bit 3 -- fail fast driver errors + * bit 4 -- rw-ahead when set + * bit 5 -- barrier * Insert a serialization point in the IO queue, forcing previously * submitted IO to be completed before this one is issued. - * bit 3 -- synchronous I/O hint. - * bit 4 -- Unplug the device immediately after submitting this bio. - * bit 5 -- metadata request + * bit 6 -- synchronous I/O hint. + * bit 7 -- Unplug the device immediately after submitting this bio. + * bit 8 -- metadata request * Used for tracing to differentiate metadata and data IO. May also * get some preferential treatment in the IO scheduler - * bit 6 -- discard sectors + * bit 9 -- discard sectors * Informs the lower level device that this range of sectors is no longer * used by the file system and may thus be freed by the device. Used * for flash based storage. - * bit 7 -- fail fast device errors - * bit 8 -- fail fast transport errors - * bit 9 -- fail fast driver errors * Don't want driver retries for any fast fail whatever the reason. * bit 10 -- Tell the IO scheduler not to wait for more requests after this one has been submitted, even if it is a SYNC request. */ -#define BIO_RW 0 /* Must match RW in req flags (blkdev.h) */ -#define BIO_RW_AHEAD 1 /* Must match FAILFAST in req flags */ -#define BIO_RW_BARRIER 2 -#define BIO_RW_SYNCIO 3 -#define BIO_RW_UNPLUG 4 -#define BIO_RW_META 5 -#define BIO_RW_DISCARD 6 -#define BIO_RW_FAILFAST_DEV 7 -#define BIO_RW_FAILFAST_TRANSPORT 8 -#define BIO_RW_FAILFAST_DRIVER 9 -#define BIO_RW_NOIDLE 10 - -#define bio_rw_flagged(bio, flag) ((bio)->bi_rw & (1 << (flag))) +enum bio_rw_flags { + BIO_RW, + BIO_RW_FAILFAST_DEV, + BIO_RW_FAILFAST_TRANSPORT, + BIO_RW_FAILFAST_DRIVER, + /* above flags must match REQ_* */ + BIO_RW_AHEAD, + BIO_RW_BARRIER, + BIO_RW_SYNCIO, + BIO_RW_UNPLUG, + BIO_RW_META, + BIO_RW_DISCARD, + BIO_RW_NOIDLE, +}; /* - * Old defines, these should eventually be replaced by direct usage of - * bio_rw_flagged() + * First four bits must match between bio->bi_rw and rq->cmd_flags, make + * that explicit here. */ -#define bio_barrier(bio) bio_rw_flagged(bio, BIO_RW_BARRIER) -#define bio_sync(bio) bio_rw_flagged(bio, BIO_RW_SYNCIO) -#define bio_unplug(bio) bio_rw_flagged(bio, BIO_RW_UNPLUG) -#define bio_failfast_dev(bio) bio_rw_flagged(bio, BIO_RW_FAILFAST_DEV) -#define bio_failfast_transport(bio) \ - bio_rw_flagged(bio, BIO_RW_FAILFAST_TRANSPORT) -#define bio_failfast_driver(bio) \ - bio_rw_flagged(bio, BIO_RW_FAILFAST_DRIVER) -#define bio_rw_ahead(bio) bio_rw_flagged(bio, BIO_RW_AHEAD) -#define bio_rw_meta(bio) bio_rw_flagged(bio, BIO_RW_META) -#define bio_discard(bio) bio_rw_flagged(bio, BIO_RW_DISCARD) -#define bio_noidle(bio) bio_rw_flagged(bio, BIO_RW_NOIDLE) +#define BIO_RW_RQ_MASK 0xf + +static inline bool bio_rw_flagged(struct bio *bio, enum bio_rw_flags flag) +{ + return (bio->bi_rw & (1 << flag)) != 0; +} /* * upper 16 bits of bi_rw define the io priority of this bio @@ -216,7 +211,7 @@ struct bio { #define bio_offset(bio) bio_iovec((bio))->bv_offset #define bio_segments(bio) ((bio)->bi_vcnt - (bio)->bi_idx) #define bio_sectors(bio) ((bio)->bi_size >> 9) -#define bio_empty_barrier(bio) (bio_barrier(bio) && !bio_has_data(bio) && !bio_discard(bio)) +#define bio_empty_barrier(bio) (bio_rw_flagged(bio, BIO_RW_BARRIER) && !bio_has_data(bio) && !bio_rw_flagged(bio, BIO_RW_DISCARD)) static inline unsigned int bio_cur_bytes(struct bio *bio) { diff --git a/include/linux/blk-iopoll.h b/include/linux/blk-iopoll.h new file mode 100644 index 000000000000..308734d3d4a2 --- /dev/null +++ b/include/linux/blk-iopoll.h @@ -0,0 +1,48 @@ +#ifndef BLK_IOPOLL_H +#define BLK_IOPOLL_H + +struct blk_iopoll; +typedef int (blk_iopoll_fn)(struct blk_iopoll *, int); + +struct blk_iopoll { + struct list_head list; + unsigned long state; + unsigned long data; + int weight; + int max; + blk_iopoll_fn *poll; +}; + +enum { + IOPOLL_F_SCHED = 0, + IOPOLL_F_DISABLE = 1, +}; + +/* + * Returns 0 if we successfully set the IOPOLL_F_SCHED bit, indicating + * that we were the first to acquire this iop for scheduling. If this iop + * is currently disabled, return "failure". + */ +static inline int blk_iopoll_sched_prep(struct blk_iopoll *iop) +{ + if (!test_bit(IOPOLL_F_DISABLE, &iop->state)) + return test_and_set_bit(IOPOLL_F_SCHED, &iop->state); + + return 1; +} + +static inline int blk_iopoll_disable_pending(struct blk_iopoll *iop) +{ + return test_bit(IOPOLL_F_DISABLE, &iop->state); +} + +extern void blk_iopoll_sched(struct blk_iopoll *); +extern void blk_iopoll_init(struct blk_iopoll *, int, blk_iopoll_fn *); +extern void blk_iopoll_complete(struct blk_iopoll *); +extern void __blk_iopoll_complete(struct blk_iopoll *); +extern void blk_iopoll_enable(struct blk_iopoll *); +extern void blk_iopoll_disable(struct blk_iopoll *); + +extern int blk_iopoll_enabled; + +#endif diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 69103e053c92..4c0f724511f5 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -86,13 +86,14 @@ enum { }; /* - * request type modified bits. first two bits match BIO_RW* bits, important + * request type modified bits. first four bits match BIO_RW* bits, important */ enum rq_flag_bits { __REQ_RW, /* not set, read. set, write */ __REQ_FAILFAST_DEV, /* no driver retries of device errors */ __REQ_FAILFAST_TRANSPORT, /* no driver retries of transport errors */ __REQ_FAILFAST_DRIVER, /* no driver retries of driver errors */ + /* above flags must match BIO_RW_* */ __REQ_DISCARD, /* request to discard sectors */ __REQ_SORTED, /* elevator knows about this request */ __REQ_SOFTBARRIER, /* may not be passed by ioscheduler */ @@ -114,6 +115,7 @@ enum rq_flag_bits { __REQ_INTEGRITY, /* integrity metadata has been remapped */ __REQ_NOIDLE, /* Don't anticipate more IO after this one */ __REQ_IO_STAT, /* account I/O stat */ + __REQ_MIXED_MERGE, /* merge of different types, fail separately */ __REQ_NR_BITS, /* stops here */ }; @@ -142,6 +144,10 @@ enum rq_flag_bits { #define REQ_INTEGRITY (1 << __REQ_INTEGRITY) #define REQ_NOIDLE (1 << __REQ_NOIDLE) #define REQ_IO_STAT (1 << __REQ_IO_STAT) +#define REQ_MIXED_MERGE (1 << __REQ_MIXED_MERGE) + +#define REQ_FAILFAST_MASK (REQ_FAILFAST_DEV | REQ_FAILFAST_TRANSPORT | \ + REQ_FAILFAST_DRIVER) #define BLK_MAX_CDB 16 @@ -453,6 +459,7 @@ struct request_queue #define QUEUE_FLAG_NONROT 14 /* non-rotational device (SSD) */ #define QUEUE_FLAG_VIRT QUEUE_FLAG_NONROT /* paravirt device */ #define QUEUE_FLAG_IO_STAT 15 /* do IO stats */ +#define QUEUE_FLAG_CQ 16 /* hardware does queuing */ #define QUEUE_FLAG_DEFAULT ((1 << QUEUE_FLAG_IO_STAT) | \ (1 << QUEUE_FLAG_CLUSTER) | \ @@ -575,6 +582,7 @@ enum { #define blk_queue_plugged(q) test_bit(QUEUE_FLAG_PLUGGED, &(q)->queue_flags) #define blk_queue_tagged(q) test_bit(QUEUE_FLAG_QUEUED, &(q)->queue_flags) +#define blk_queue_queuing(q) test_bit(QUEUE_FLAG_CQ, &(q)->queue_flags) #define blk_queue_stopped(q) test_bit(QUEUE_FLAG_STOPPED, &(q)->queue_flags) #define blk_queue_nomerges(q) test_bit(QUEUE_FLAG_NOMERGES, &(q)->queue_flags) #define blk_queue_nonrot(q) test_bit(QUEUE_FLAG_NONROT, &(q)->queue_flags) @@ -828,11 +836,13 @@ static inline void blk_run_address_space(struct address_space *mapping) } /* - * blk_rq_pos() : the current sector - * blk_rq_bytes() : bytes left in the entire request - * blk_rq_cur_bytes() : bytes left in the current segment - * blk_rq_sectors() : sectors left in the entire request - * blk_rq_cur_sectors() : sectors left in the current segment + * blk_rq_pos() : the current sector + * blk_rq_bytes() : bytes left in the entire request + * blk_rq_cur_bytes() : bytes left in the current segment + * blk_rq_err_bytes() : bytes left till the next error boundary + * blk_rq_sectors() : sectors left in the entire request + * blk_rq_cur_sectors() : sectors left in the current segment + * blk_rq_err_sectors() : sectors left till the next error boundary */ static inline sector_t blk_rq_pos(const struct request *rq) { @@ -849,6 +859,8 @@ static inline int blk_rq_cur_bytes(const struct request *rq) return rq->bio ? bio_cur_bytes(rq->bio) : 0; } +extern unsigned int blk_rq_err_bytes(const struct request *rq); + static inline unsigned int blk_rq_sectors(const struct request *rq) { return blk_rq_bytes(rq) >> 9; @@ -859,6 +871,11 @@ static inline unsigned int blk_rq_cur_sectors(const struct request *rq) return blk_rq_cur_bytes(rq) >> 9; } +static inline unsigned int blk_rq_err_sectors(const struct request *rq) +{ + return blk_rq_err_bytes(rq) >> 9; +} + /* * Request issue related functions. */ @@ -885,10 +902,12 @@ extern bool blk_end_request(struct request *rq, int error, unsigned int nr_bytes); extern void blk_end_request_all(struct request *rq, int error); extern bool blk_end_request_cur(struct request *rq, int error); +extern bool blk_end_request_err(struct request *rq, int error); extern bool __blk_end_request(struct request *rq, int error, unsigned int nr_bytes); extern void __blk_end_request_all(struct request *rq, int error); extern bool __blk_end_request_cur(struct request *rq, int error); +extern bool __blk_end_request_err(struct request *rq, int error); extern void blk_complete_request(struct request *); extern void __blk_complete_request(struct request *); @@ -921,7 +940,6 @@ extern int blk_stack_limits(struct queue_limits *t, struct queue_limits *b, sector_t offset); extern void disk_stack_limits(struct gendisk *disk, struct block_device *bdev, sector_t offset); -extern void blk_queue_stack_limits(struct request_queue *t, struct request_queue *b); extern void blk_queue_dma_pad(struct request_queue *, unsigned int); extern void blk_queue_update_dma_pad(struct request_queue *, unsigned int); extern int blk_queue_dma_drain(struct request_queue *q, diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h index 35e7df1e9f30..edd8d5c90394 100644 --- a/include/linux/interrupt.h +++ b/include/linux/interrupt.h @@ -344,6 +344,7 @@ enum NET_TX_SOFTIRQ, NET_RX_SOFTIRQ, BLOCK_SOFTIRQ, + BLOCK_IOPOLL_SOFTIRQ, TASKLET_SOFTIRQ, SCHED_SOFTIRQ, HRTIMER_SOFTIRQ, diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 58be76017fd0..0ed9fa6f322e 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -92,6 +92,7 @@ extern int sysctl_nr_trim_pages; #ifdef CONFIG_RCU_TORTURE_TEST extern int rcutorture_runnable; #endif /* #ifdef CONFIG_RCU_TORTURE_TEST */ +extern int blk_iopoll_enabled; /* Constants used for minimum and maximum */ #ifdef CONFIG_DETECT_SOFTLOCKUP @@ -990,7 +991,14 @@ static struct ctl_table kern_table[] = { .proc_handler = &proc_dointvec, }, #endif - + { + .ctl_name = CTL_UNNUMBERED, + .procname = "blk_iopoll", + .data = &blk_iopoll_enabled, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = &proc_dointvec, + }, /* * NOTE: do not add new entries to this table unless you have read * Documentation/sysctl/ctl_unnumbered.txt |