summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorStephen Rothwell <sfr@canb.auug.org.au>2010-06-16 11:41:13 +1000
committerStephen Rothwell <sfr@canb.auug.org.au>2010-06-16 11:41:13 +1000
commitb8ea67b73d8a5d9c541217099e11d508d6e486cc (patch)
tree6ce092ed8dcb2c3655fc7672bb1941ca0906a07d
parenta4c772c8e13209f6996e538beb7c0d2b1df36cd8 (diff)
parenta26868912affb4819b9dd64d0d2e9c6b01b2466c (diff)
Merge remote branch 'block/for-next'
-rw-r--r--block/blk-core.c3
-rw-r--r--block/blk-sysfs.c82
-rw-r--r--drivers/block/cciss.c553
-rw-r--r--drivers/block/cciss.h115
-rw-r--r--drivers/block/cciss_cmd.h32
-rw-r--r--drivers/block/cciss_scsi.c5
-rw-r--r--drivers/block/cpqarray.c6
-rw-r--r--drivers/block/drbd/drbd_main.c2
-rw-r--r--drivers/block/drbd/drbd_nl.c6
-rw-r--r--drivers/block/floppy.c144
-rw-r--r--fs/afs/write.c1
-rw-r--r--fs/btrfs/extent_io.c2
-rw-r--r--fs/fs-writeback.c294
-rw-r--r--fs/ubifs/budget.c2
-rw-r--r--include/linux/backing-dev.h4
-rw-r--r--include/linux/blkdev.h5
-rw-r--r--include/linux/drbd.h2
-rw-r--r--include/linux/writeback.h7
-rw-r--r--mm/backing-dev.c3
-rw-r--r--mm/page-writeback.c8
20 files changed, 756 insertions, 520 deletions
diff --git a/block/blk-core.c b/block/blk-core.c
index f84cce42fc58..5cb6ca99fd7f 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -2114,7 +2114,8 @@ static bool blk_update_bidi_request(struct request *rq, int error,
blk_update_request(rq->next_rq, error, bidi_bytes))
return true;
- add_disk_randomness(rq->rq_disk);
+ if (blk_queue_add_random(rq->q))
+ add_disk_randomness(rq->rq_disk);
return false;
}
diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c
index 306759bbdf1b..001ab18078f5 100644
--- a/block/blk-sysfs.c
+++ b/block/blk-sysfs.c
@@ -180,26 +180,36 @@ static ssize_t queue_max_hw_sectors_show(struct request_queue *q, char *page)
return queue_var_show(max_hw_sectors_kb, (page));
}
-static ssize_t queue_nonrot_show(struct request_queue *q, char *page)
-{
- return queue_var_show(!blk_queue_nonrot(q), page);
+#define QUEUE_SYSFS_BIT_FNS(name, flag, neg) \
+static ssize_t \
+queue_show_##name(struct request_queue *q, char *page) \
+{ \
+ int bit; \
+ bit = test_bit(QUEUE_FLAG_##flag, &q->queue_flags); \
+ return queue_var_show(neg ? !bit : bit, page); \
+} \
+static ssize_t \
+queue_store_##name(struct request_queue *q, const char *page, size_t count) \
+{ \
+ unsigned long val; \
+ ssize_t ret; \
+ ret = queue_var_store(&val, page, count); \
+ if (neg) \
+ val = !val; \
+ \
+ spin_lock_irq(q->queue_lock); \
+ if (val) \
+ queue_flag_set(QUEUE_FLAG_##flag, q); \
+ else \
+ queue_flag_clear(QUEUE_FLAG_##flag, q); \
+ spin_unlock_irq(q->queue_lock); \
+ return ret; \
}
-static ssize_t queue_nonrot_store(struct request_queue *q, const char *page,
- size_t count)
-{
- unsigned long nm;
- ssize_t ret = queue_var_store(&nm, page, count);
-
- spin_lock_irq(q->queue_lock);
- if (nm)
- queue_flag_clear(QUEUE_FLAG_NONROT, q);
- else
- queue_flag_set(QUEUE_FLAG_NONROT, q);
- spin_unlock_irq(q->queue_lock);
-
- return ret;
-}
+QUEUE_SYSFS_BIT_FNS(nonrot, NONROT, 1);
+QUEUE_SYSFS_BIT_FNS(random, ADD_RANDOM, 0);
+QUEUE_SYSFS_BIT_FNS(iostats, IO_STAT, 0);
+#undef QUEUE_SYSFS_BIT_FNS
static ssize_t queue_nomerges_show(struct request_queue *q, char *page)
{
@@ -250,27 +260,6 @@ queue_rq_affinity_store(struct request_queue *q, const char *page, size_t count)
return ret;
}
-static ssize_t queue_iostats_show(struct request_queue *q, char *page)
-{
- return queue_var_show(blk_queue_io_stat(q), page);
-}
-
-static ssize_t queue_iostats_store(struct request_queue *q, const char *page,
- size_t count)
-{
- unsigned long stats;
- ssize_t ret = queue_var_store(&stats, page, count);
-
- spin_lock_irq(q->queue_lock);
- if (stats)
- queue_flag_set(QUEUE_FLAG_IO_STAT, q);
- else
- queue_flag_clear(QUEUE_FLAG_IO_STAT, q);
- spin_unlock_irq(q->queue_lock);
-
- return ret;
-}
-
static struct queue_sysfs_entry queue_requests_entry = {
.attr = {.name = "nr_requests", .mode = S_IRUGO | S_IWUSR },
.show = queue_requests_show,
@@ -352,8 +341,8 @@ static struct queue_sysfs_entry queue_discard_zeroes_data_entry = {
static struct queue_sysfs_entry queue_nonrot_entry = {
.attr = {.name = "rotational", .mode = S_IRUGO | S_IWUSR },
- .show = queue_nonrot_show,
- .store = queue_nonrot_store,
+ .show = queue_show_nonrot,
+ .store = queue_store_nonrot,
};
static struct queue_sysfs_entry queue_nomerges_entry = {
@@ -370,8 +359,14 @@ static struct queue_sysfs_entry queue_rq_affinity_entry = {
static struct queue_sysfs_entry queue_iostats_entry = {
.attr = {.name = "iostats", .mode = S_IRUGO | S_IWUSR },
- .show = queue_iostats_show,
- .store = queue_iostats_store,
+ .show = queue_show_iostats,
+ .store = queue_store_iostats,
+};
+
+static struct queue_sysfs_entry queue_random_entry = {
+ .attr = {.name = "add_random", .mode = S_IRUGO | S_IWUSR },
+ .show = queue_show_random,
+ .store = queue_store_random,
};
static struct attribute *default_attrs[] = {
@@ -394,6 +389,7 @@ static struct attribute *default_attrs[] = {
&queue_nomerges_entry.attr,
&queue_rq_affinity_entry.attr,
&queue_iostats_entry.attr,
+ &queue_random_entry.attr,
NULL,
};
diff --git a/drivers/block/cciss.c b/drivers/block/cciss.c
index 51ceaee98f9f..10a0268a1f92 100644
--- a/drivers/block/cciss.c
+++ b/drivers/block/cciss.c
@@ -56,16 +56,14 @@
#include <linux/kthread.h>
#define CCISS_DRIVER_VERSION(maj,min,submin) ((maj<<16)|(min<<8)|(submin))
-#define DRIVER_NAME "HP CISS Driver (v 3.6.20)"
-#define DRIVER_VERSION CCISS_DRIVER_VERSION(3, 6, 20)
+#define DRIVER_NAME "HP CISS Driver (v 3.6.26)"
+#define DRIVER_VERSION CCISS_DRIVER_VERSION(3, 6, 26)
/* Embedded module documentation macros - see modules.h */
MODULE_AUTHOR("Hewlett-Packard Company");
MODULE_DESCRIPTION("Driver for HP Smart Array Controllers");
-MODULE_SUPPORTED_DEVICE("HP SA5i SA5i+ SA532 SA5300 SA5312 SA641 SA642 SA6400"
- " SA6i P600 P800 P400 P400i E200 E200i E500 P700m"
- " Smart Array G2 Series SAS/SATA Controllers");
-MODULE_VERSION("3.6.20");
+MODULE_SUPPORTED_DEVICE("HP Smart Array Controllers");
+MODULE_VERSION("3.6.26");
MODULE_LICENSE("GPL");
static int cciss_allow_hpsa;
@@ -107,6 +105,11 @@ static const struct pci_device_id cciss_pci_device_id[] = {
{PCI_VENDOR_ID_HP, PCI_DEVICE_ID_HP_CISSE, 0x103C, 0x3249},
{PCI_VENDOR_ID_HP, PCI_DEVICE_ID_HP_CISSE, 0x103C, 0x324A},
{PCI_VENDOR_ID_HP, PCI_DEVICE_ID_HP_CISSE, 0x103C, 0x324B},
+ {PCI_VENDOR_ID_HP, PCI_DEVICE_ID_HP_CISSE, 0x103C, 0x3250},
+ {PCI_VENDOR_ID_HP, PCI_DEVICE_ID_HP_CISSE, 0x103C, 0x3251},
+ {PCI_VENDOR_ID_HP, PCI_DEVICE_ID_HP_CISSE, 0x103C, 0x3252},
+ {PCI_VENDOR_ID_HP, PCI_DEVICE_ID_HP_CISSE, 0x103C, 0x3253},
+ {PCI_VENDOR_ID_HP, PCI_DEVICE_ID_HP_CISSE, 0x103C, 0x3254},
{0,}
};
@@ -146,6 +149,11 @@ static struct board_type products[] = {
{0x3249103C, "Smart Array P812", &SA5_access},
{0x324A103C, "Smart Array P712m", &SA5_access},
{0x324B103C, "Smart Array P711m", &SA5_access},
+ {0x3250103C, "Smart Array", &SA5_access},
+ {0x3251103C, "Smart Array", &SA5_access},
+ {0x3252103C, "Smart Array", &SA5_access},
+ {0x3253103C, "Smart Array", &SA5_access},
+ {0x3254103C, "Smart Array", &SA5_access},
};
/* How long to wait (in milliseconds) for board to go into simple mode */
@@ -167,7 +175,8 @@ static DEFINE_MUTEX(scan_mutex);
static LIST_HEAD(scan_q);
static void do_cciss_request(struct request_queue *q);
-static irqreturn_t do_cciss_intr(int irq, void *dev_id);
+static irqreturn_t do_cciss_intx(int irq, void *dev_id);
+static irqreturn_t do_cciss_msix_intr(int irq, void *dev_id);
static int cciss_open(struct block_device *bdev, fmode_t mode);
static int cciss_release(struct gendisk *disk, fmode_t mode);
static int cciss_ioctl(struct block_device *bdev, fmode_t mode,
@@ -197,7 +206,6 @@ static int sendcmd_withirq_core(ctlr_info_t *h, CommandList_struct *c,
int attempt_retry);
static int process_sendcmd_error(ctlr_info_t *h, CommandList_struct *c);
-static void fail_all_cmds(unsigned long ctlr);
static int add_to_scan_list(struct ctlr_info *h);
static int scan_thread(void *data);
static int check_for_unit_attention(ctlr_info_t *h, CommandList_struct *c);
@@ -205,6 +213,12 @@ static void cciss_hba_release(struct device *dev);
static void cciss_device_release(struct device *dev);
static void cciss_free_gendisk(ctlr_info_t *h, int drv_index);
static void cciss_free_drive_info(ctlr_info_t *h, int drv_index);
+static inline u32 next_command(ctlr_info_t *h);
+
+/* performant mode helper functions */
+static void calc_bucket_map(int *bucket, int num_buckets, int nsgs,
+ int *bucket_map);
+static void cciss_put_controller_into_performant_mode(ctlr_info_t *h);
#ifdef CONFIG_PROC_FS
static void cciss_procinit(int i);
@@ -231,6 +245,16 @@ static const struct block_device_operations cciss_fops = {
.revalidate_disk = cciss_revalidate,
};
+/* set_performant_mode: Modify the tag for cciss performant
+ * set bit 0 for pull model, bits 3-1 for block fetch
+ * register number
+ */
+static void set_performant_mode(ctlr_info_t *h, CommandList_struct *c)
+{
+ if (likely(h->transMethod == CFGTBL_Trans_Performant))
+ c->busaddr |= 1 | (h->blockFetchTable[c->Header.SGList] << 1);
+}
+
/*
* Enqueuing and dequeuing functions for cmdlists.
*/
@@ -257,6 +281,18 @@ static inline void removeQ(CommandList_struct *c)
hlist_del_init(&c->list);
}
+static void enqueue_cmd_and_start_io(ctlr_info_t *h,
+ CommandList_struct *c)
+{
+ unsigned long flags;
+ set_performant_mode(h, c);
+ spin_lock_irqsave(&h->lock, flags);
+ addQ(&h->reqQ, c);
+ h->Qdepth++;
+ start_io(h);
+ spin_unlock_irqrestore(&h->lock, flags);
+}
+
static void cciss_free_sg_chain_blocks(SGDescriptor_struct **cmd_sg_list,
int nr_cmds)
{
@@ -366,7 +402,7 @@ static void cciss_seq_show_header(struct seq_file *seq)
h->product_name,
(unsigned long)h->board_id,
h->firm_ver[0], h->firm_ver[1], h->firm_ver[2],
- h->firm_ver[3], (unsigned int)h->intr[SIMPLE_MODE_INT],
+ h->firm_ver[3], (unsigned int)h->intr[PERF_MODE_INT],
h->num_luns,
h->Qdepth, h->commands_outstanding,
h->maxQsinceinit, h->max_outstanding, h->maxSG);
@@ -1377,7 +1413,6 @@ static int cciss_ioctl(struct block_device *bdev, fmode_t mode,
CommandList_struct *c;
char *buff = NULL;
u64bit temp64;
- unsigned long flags;
DECLARE_COMPLETION_ONSTACK(wait);
if (!arg)
@@ -1449,13 +1484,7 @@ static int cciss_ioctl(struct block_device *bdev, fmode_t mode,
}
c->waiting = &wait;
- /* Put the request on the tail of the request queue */
- spin_lock_irqsave(CCISS_LOCK(ctlr), flags);
- addQ(&host->reqQ, c);
- host->Qdepth++;
- start_io(host);
- spin_unlock_irqrestore(CCISS_LOCK(ctlr), flags);
-
+ enqueue_cmd_and_start_io(host, c);
wait_for_completion(&wait);
/* unlock the buffers from DMA */
@@ -1495,7 +1524,6 @@ static int cciss_ioctl(struct block_device *bdev, fmode_t mode,
unsigned char **buff = NULL;
int *buff_size = NULL;
u64bit temp64;
- unsigned long flags;
BYTE sg_used = 0;
int status = 0;
int i;
@@ -1602,12 +1630,7 @@ static int cciss_ioctl(struct block_device *bdev, fmode_t mode,
}
}
c->waiting = &wait;
- /* Put the request on the tail of the request queue */
- spin_lock_irqsave(CCISS_LOCK(ctlr), flags);
- addQ(&host->reqQ, c);
- host->Qdepth++;
- start_io(host);
- spin_unlock_irqrestore(CCISS_LOCK(ctlr), flags);
+ enqueue_cmd_and_start_io(host, c);
wait_for_completion(&wait);
/* unlock the buffers from DMA */
for (i = 0; i < sg_used; i++) {
@@ -1729,8 +1752,8 @@ static void cciss_softirq_done(struct request *rq)
CommandList_struct *cmd = rq->completion_data;
ctlr_info_t *h = hba[cmd->ctlr];
SGDescriptor_struct *curr_sg = cmd->SG;
- unsigned long flags;
u64bit temp64;
+ unsigned long flags;
int i, ddir;
int sg_index = 0;
@@ -2679,17 +2702,11 @@ static int sendcmd_withirq_core(ctlr_info_t *h, CommandList_struct *c,
{
DECLARE_COMPLETION_ONSTACK(wait);
u64bit buff_dma_handle;
- unsigned long flags;
int return_status = IO_OK;
resend_cmd2:
c->waiting = &wait;
- /* Put the request on the tail of the queue and send it */
- spin_lock_irqsave(CCISS_LOCK(h->ctlr), flags);
- addQ(&h->reqQ, c);
- h->Qdepth++;
- start_io(h);
- spin_unlock_irqrestore(CCISS_LOCK(h->ctlr), flags);
+ enqueue_cmd_and_start_io(h, c);
wait_for_completion(&wait);
@@ -3132,6 +3149,34 @@ after_error_processing:
blk_complete_request(cmd->rq);
}
+static inline u32 cciss_tag_contains_index(u32 tag)
+{
+#define DIRECT_LOOKUP_BIT 0x10
+ return tag & DIRECT_LOOKUP_BIT;
+}
+
+static inline u32 cciss_tag_to_index(u32 tag)
+{
+#define DIRECT_LOOKUP_SHIFT 5
+ return tag >> DIRECT_LOOKUP_SHIFT;
+}
+
+static inline u32 cciss_tag_discard_error_bits(u32 tag)
+{
+#define CCISS_ERROR_BITS 0x03
+ return tag & ~CCISS_ERROR_BITS;
+}
+
+static inline void cciss_mark_tag_indexed(u32 *tag)
+{
+ *tag |= DIRECT_LOOKUP_BIT;
+}
+
+static inline void cciss_set_tag_index(u32 *tag, u32 index)
+{
+ *tag |= (index << DIRECT_LOOKUP_SHIFT);
+}
+
/*
* Get a request and submit it to the controller.
*/
@@ -3180,8 +3225,8 @@ static void do_cciss_request(struct request_queue *q)
/* got command from pool, so use the command block index instead */
/* for direct lookups. */
/* The first 2 bits are reserved for controller error reporting. */
- c->Header.Tag.lower = (c->cmdindex << 3);
- c->Header.Tag.lower |= 0x04; /* flag for direct lookup. */
+ cciss_set_tag_index(&c->Header.Tag.lower, c->cmdindex);
+ cciss_mark_tag_indexed(&c->Header.Tag.lower);
memcpy(&c->Header.LUN, drv->LunID, sizeof(drv->LunID));
c->Request.CDBLen = 10; /* 12 byte commands not in FW yet; */
c->Request.Type.Type = TYPE_CMD; /* It is a command. */
@@ -3242,9 +3287,12 @@ static void do_cciss_request(struct request_queue *q)
blk_rq_sectors(creq), seg, chained);
#endif /* CCISS_DEBUG */
- c->Header.SGList = c->Header.SGTotal = seg + chained;
- if (seg > h->max_cmd_sgentries)
+ c->Header.SGTotal = seg + chained;
+ if (seg <= h->max_cmd_sgentries)
+ c->Header.SGList = c->Header.SGTotal;
+ else
c->Header.SGList = h->max_cmd_sgentries;
+ set_performant_mode(h, c);
if (likely(blk_fs_request(creq))) {
if(h->cciss_read == CCISS_READ_10) {
@@ -3313,16 +3361,97 @@ static inline int interrupt_pending(ctlr_info_t *h)
static inline long interrupt_not_for_us(ctlr_info_t *h)
{
- return (((h->access.intr_pending(h) == 0) ||
- (h->interrupts_enabled == 0)));
+ return !(h->msi_vector || h->msix_vector) &&
+ ((h->access.intr_pending(h) == 0) ||
+ (h->interrupts_enabled == 0));
}
-static irqreturn_t do_cciss_intr(int irq, void *dev_id)
+static inline int bad_tag(ctlr_info_t *h, u32 tag_index,
+ u32 raw_tag)
{
- ctlr_info_t *h = dev_id;
+ if (unlikely(tag_index >= h->nr_cmds)) {
+ dev_warn(&h->pdev->dev, "bad tag 0x%08x ignored.\n", raw_tag);
+ return 1;
+ }
+ return 0;
+}
+
+static inline void finish_cmd(ctlr_info_t *h, CommandList_struct *c,
+ u32 raw_tag)
+{
+ removeQ(c);
+ if (likely(c->cmd_type == CMD_RWREQ))
+ complete_command(h, c, 0);
+ else if (c->cmd_type == CMD_IOCTL_PEND)
+ complete(c->waiting);
+#ifdef CONFIG_CISS_SCSI_TAPE
+ else if (c->cmd_type == CMD_SCSI)
+ complete_scsi_command(c, 0, raw_tag);
+#endif
+}
+
+static inline u32 next_command(ctlr_info_t *h)
+{
+ u32 a;
+
+ if (unlikely(h->transMethod != CFGTBL_Trans_Performant))
+ return h->access.command_completed(h);
+
+ if ((*(h->reply_pool_head) & 1) == (h->reply_pool_wraparound)) {
+ a = *(h->reply_pool_head); /* Next cmd in ring buffer */
+ (h->reply_pool_head)++;
+ h->commands_outstanding--;
+ } else {
+ a = FIFO_EMPTY;
+ }
+ /* Check for wraparound */
+ if (h->reply_pool_head == (h->reply_pool + h->max_commands)) {
+ h->reply_pool_head = h->reply_pool;
+ h->reply_pool_wraparound ^= 1;
+ }
+ return a;
+}
+
+/* process completion of an indexed ("direct lookup") command */
+static inline u32 process_indexed_cmd(ctlr_info_t *h, u32 raw_tag)
+{
+ u32 tag_index;
CommandList_struct *c;
+
+ tag_index = cciss_tag_to_index(raw_tag);
+ if (bad_tag(h, tag_index, raw_tag))
+ return next_command(h);
+ c = h->cmd_pool + tag_index;
+ finish_cmd(h, c, raw_tag);
+ return next_command(h);
+}
+
+/* process completion of a non-indexed command */
+static inline u32 process_nonindexed_cmd(ctlr_info_t *h, u32 raw_tag)
+{
+ u32 tag;
+ CommandList_struct *c = NULL;
+ struct hlist_node *tmp;
+ __u32 busaddr_masked, tag_masked;
+
+ tag = cciss_tag_discard_error_bits(raw_tag);
+ hlist_for_each_entry(c, tmp, &h->cmpQ, list) {
+ busaddr_masked = cciss_tag_discard_error_bits(c->busaddr);
+ tag_masked = cciss_tag_discard_error_bits(tag);
+ if (busaddr_masked == tag_masked) {
+ finish_cmd(h, c, raw_tag);
+ return next_command(h);
+ }
+ }
+ bad_tag(h, h->nr_cmds + 1, raw_tag);
+ return next_command(h);
+}
+
+static irqreturn_t do_cciss_intx(int irq, void *dev_id)
+{
+ ctlr_info_t *h = dev_id;
unsigned long flags;
- __u32 a, a1, a2;
+ u32 raw_tag;
if (interrupt_not_for_us(h))
return IRQ_NONE;
@@ -3332,50 +3461,41 @@ static irqreturn_t do_cciss_intr(int irq, void *dev_id)
*/
spin_lock_irqsave(CCISS_LOCK(h->ctlr), flags);
while (interrupt_pending(h)) {
- while ((a = get_next_completion(h)) != FIFO_EMPTY) {
- a1 = a;
- if ((a & 0x04)) {
- a2 = (a >> 3);
- if (a2 >= h->nr_cmds) {
- printk(KERN_WARNING
- "cciss: controller cciss%d failed, stopping.\n",
- h->ctlr);
- spin_unlock_irqrestore(CCISS_LOCK(h->ctlr), flags);
- fail_all_cmds(h->ctlr);
- return IRQ_HANDLED;
- }
+ raw_tag = get_next_completion(h);
+ while (raw_tag != FIFO_EMPTY) {
+ if (cciss_tag_contains_index(raw_tag))
+ raw_tag = process_indexed_cmd(h, raw_tag);
+ else
+ raw_tag = process_nonindexed_cmd(h, raw_tag);
+ }
+ }
- c = h->cmd_pool + a2;
- a = c->busaddr;
+ spin_unlock_irqrestore(CCISS_LOCK(h->ctlr), flags);
+ return IRQ_HANDLED;
+}
- } else {
- struct hlist_node *tmp;
+/* Add a second interrupt handler for MSI/MSI-X mode. In this mode we never
+ * check the interrupt pending register because it is not set.
+ */
+static irqreturn_t do_cciss_msix_intr(int irq, void *dev_id)
+{
+ ctlr_info_t *h = dev_id;
+ unsigned long flags;
+ u32 raw_tag;
- a &= ~3;
- c = NULL;
- hlist_for_each_entry(c, tmp, &h->cmpQ, list) {
- if (c->busaddr == a)
- break;
- }
- }
- /*
- * If we've found the command, take it off the
- * completion Q and free it
- */
- if (c && c->busaddr == a) {
- removeQ(c);
- if (c->cmd_type == CMD_RWREQ) {
- complete_command(h, c, 0);
- } else if (c->cmd_type == CMD_IOCTL_PEND) {
- complete(c->waiting);
- }
-# ifdef CONFIG_CISS_SCSI_TAPE
- else if (c->cmd_type == CMD_SCSI)
- complete_scsi_command(c, 0, a1);
-# endif
- continue;
- }
- }
+ if (interrupt_not_for_us(h))
+ return IRQ_NONE;
+ /*
+ * If there are completed commands in the completion queue,
+ * we had better do something about it.
+ */
+ spin_lock_irqsave(CCISS_LOCK(h->ctlr), flags);
+ raw_tag = get_next_completion(h);
+ while (raw_tag != FIFO_EMPTY) {
+ if (cciss_tag_contains_index(raw_tag))
+ raw_tag = process_indexed_cmd(h, raw_tag);
+ else
+ raw_tag = process_nonindexed_cmd(h, raw_tag);
}
spin_unlock_irqrestore(CCISS_LOCK(h->ctlr), flags);
@@ -3630,6 +3750,155 @@ static int find_PCI_BAR_index(struct pci_dev *pdev, unsigned long pci_bar_addr)
return -1;
}
+/* Fill in bucket_map[], given nsgs (the max number of
+ * scatter gather elements supported) and bucket[],
+ * which is an array of 8 integers. The bucket[] array
+ * contains 8 different DMA transfer sizes (in 16
+ * byte increments) which the controller uses to fetch
+ * commands. This function fills in bucket_map[], which
+ * maps a given number of scatter gather elements to one of
+ * the 8 DMA transfer sizes. The point of it is to allow the
+ * controller to only do as much DMA as needed to fetch the
+ * command, with the DMA transfer size encoded in the lower
+ * bits of the command address.
+ */
+static void calc_bucket_map(int bucket[], int num_buckets,
+ int nsgs, int *bucket_map)
+{
+ int i, j, b, size;
+
+ /* even a command with 0 SGs requires 4 blocks */
+#define MINIMUM_TRANSFER_BLOCKS 4
+#define NUM_BUCKETS 8
+ /* Note, bucket_map must have nsgs+1 entries. */
+ for (i = 0; i <= nsgs; i++) {
+ /* Compute size of a command with i SG entries */
+ size = i + MINIMUM_TRANSFER_BLOCKS;
+ b = num_buckets; /* Assume the biggest bucket */
+ /* Find the bucket that is just big enough */
+ for (j = 0; j < 8; j++) {
+ if (bucket[j] >= size) {
+ b = j;
+ break;
+ }
+ }
+ /* for a command with i SG entries, use bucket b. */
+ bucket_map[i] = b;
+ }
+}
+
+static void
+cciss_put_controller_into_performant_mode(ctlr_info_t *h)
+{
+ int l = 0;
+ __u32 trans_support;
+ __u32 trans_offset;
+ /*
+ * 5 = 1 s/g entry or 4k
+ * 6 = 2 s/g entry or 8k
+ * 8 = 4 s/g entry or 16k
+ * 10 = 6 s/g entry or 24k
+ */
+ int bft[8] = { 5, 6, 8, 10, 12, 20, 28, MAXSGENTRIES + 4};
+ unsigned long register_value;
+
+ BUILD_BUG_ON(28 > MAXSGENTRIES + 4);
+
+ /* Attempt to put controller into performant mode if supported */
+ /* Does board support performant mode? */
+ trans_support = readl(&(h->cfgtable->TransportSupport));
+ if (!(trans_support & PERFORMANT_MODE))
+ return;
+
+ printk(KERN_WARNING "cciss%d: Placing controller into "
+ "performant mode\n", h->ctlr);
+ /* Performant mode demands commands on a 32 byte boundary
+ * pci_alloc_consistent aligns on page boundarys already.
+ * Just need to check if divisible by 32
+ */
+ if ((sizeof(CommandList_struct) % 32) != 0) {
+ printk(KERN_WARNING "%s %d %s\n",
+ "cciss info: command size[",
+ (int)sizeof(CommandList_struct),
+ "] not divisible by 32, no performant mode..\n");
+ return;
+ }
+
+ /* Performant mode ring buffer and supporting data structures */
+ h->reply_pool = (__u64 *)pci_alloc_consistent(
+ h->pdev, h->max_commands * sizeof(__u64),
+ &(h->reply_pool_dhandle));
+
+ /* Need a block fetch table for performant mode */
+ h->blockFetchTable = kmalloc(((h->maxsgentries+1) *
+ sizeof(__u32)), GFP_KERNEL);
+
+ if ((h->reply_pool == NULL) || (h->blockFetchTable == NULL))
+ goto clean_up;
+
+ h->reply_pool_wraparound = 1; /* spec: init to 1 */
+
+ /* Controller spec: zero out this buffer. */
+ memset(h->reply_pool, 0, h->max_commands * sizeof(__u64));
+ h->reply_pool_head = h->reply_pool;
+
+ trans_offset = readl(&(h->cfgtable->TransMethodOffset));
+ calc_bucket_map(bft, ARRAY_SIZE(bft), h->maxsgentries,
+ h->blockFetchTable);
+ writel(bft[0], &h->transtable->BlockFetch0);
+ writel(bft[1], &h->transtable->BlockFetch1);
+ writel(bft[2], &h->transtable->BlockFetch2);
+ writel(bft[3], &h->transtable->BlockFetch3);
+ writel(bft[4], &h->transtable->BlockFetch4);
+ writel(bft[5], &h->transtable->BlockFetch5);
+ writel(bft[6], &h->transtable->BlockFetch6);
+ writel(bft[7], &h->transtable->BlockFetch7);
+
+ /* size of controller ring buffer */
+ writel(h->max_commands, &h->transtable->RepQSize);
+ writel(1, &h->transtable->RepQCount);
+ writel(0, &h->transtable->RepQCtrAddrLow32);
+ writel(0, &h->transtable->RepQCtrAddrHigh32);
+ writel(h->reply_pool_dhandle, &h->transtable->RepQAddr0Low32);
+ writel(0, &h->transtable->RepQAddr0High32);
+ writel(CFGTBL_Trans_Performant,
+ &(h->cfgtable->HostWrite.TransportRequest));
+
+ h->transMethod = CFGTBL_Trans_Performant;
+ writel(CFGTBL_ChangeReq, h->vaddr + SA5_DOORBELL);
+ /* under certain very rare conditions, this can take awhile.
+ * (e.g.: hot replace a failed 144GB drive in a RAID 5 set right
+ * as we enter this code.) */
+ for (l = 0; l < MAX_CONFIG_WAIT; l++) {
+ register_value = readl(h->vaddr + SA5_DOORBELL);
+ if (!(register_value & CFGTBL_ChangeReq))
+ break;
+ /* delay and try again */
+ set_current_state(TASK_INTERRUPTIBLE);
+ schedule_timeout(10);
+ }
+ register_value = readl(&(h->cfgtable->TransportActive));
+ if (!(register_value & CFGTBL_Trans_Performant)) {
+ printk(KERN_WARNING "cciss: unable to get board into"
+ " performant mode\n");
+ return;
+ }
+
+ /* Change the access methods to the performant access methods */
+ h->access = SA5_performant_access;
+
+ return;
+clean_up:
+ kfree(h->blockFetchTable);
+ if (h->reply_pool)
+ pci_free_consistent(h->pdev,
+ h->max_commands * sizeof(__u64),
+ h->reply_pool,
+ h->reply_pool_dhandle);
+ return;
+
+} /* cciss_put_controller_into_performant_mode */
+
/* If MSI/MSI-X is supported by the kernel we will try to enable it on
* controllers that are capable. If not, we use IO-APIC mode.
*/
@@ -3679,7 +3948,7 @@ static void __devinit cciss_interrupt_mode(ctlr_info_t *c,
default_int_mode:
#endif /* CONFIG_PCI_MSI */
/* if we get here we're going to use the default interrupt mode */
- c->intr[SIMPLE_MODE_INT] = pdev->irq;
+ c->intr[PERF_MODE_INT] = pdev->irq;
return;
}
@@ -3691,6 +3960,7 @@ static int __devinit cciss_pci_init(ctlr_info_t *c, struct pci_dev *pdev)
__u32 cfg_base_addr;
__u64 cfg_base_addr_index;
int i, prod_index, err;
+ __u32 trans_offset;
subsystem_vendor_id = pdev->subsystem_vendor;
subsystem_device_id = pdev->subsystem_device;
@@ -3804,11 +4074,16 @@ static int __devinit cciss_pci_init(ctlr_info_t *c, struct pci_dev *pdev)
c->cfgtable = remap_pci_mem(pci_resource_start(pdev,
cfg_base_addr_index) +
cfg_offset, sizeof(CfgTable_struct));
+ /* Find performant mode table. */
+ trans_offset = readl(&(c->cfgtable->TransMethodOffset));
+ c->transtable = remap_pci_mem(pci_resource_start(pdev,
+ cfg_base_addr_index) + cfg_offset+trans_offset,
+ sizeof(*c->transtable));
c->board_id = board_id;
-#ifdef CCISS_DEBUG
- print_cfg_table(c->cfgtable);
-#endif /* CCISS_DEBUG */
+ #ifdef CCISS_DEBUG
+ print_cfg_table(c->cfgtable);
+ #endif /* CCISS_DEBUG */
/* Some controllers support Zero Memory Raid (ZMR).
* When configured in ZMR mode the number of supported
@@ -3818,7 +4093,7 @@ static int __devinit cciss_pci_init(ctlr_info_t *c, struct pci_dev *pdev)
* are supported on the controller then subtract 4 to
* leave a little room for ioctl calls.
*/
- c->max_commands = readl(&(c->cfgtable->CmdsOutMax));
+ c->max_commands = readl(&(c->cfgtable->MaxPerformantModeCommands));
c->maxsgentries = readl(&(c->cfgtable->MaxSGElements));
/*
@@ -3863,7 +4138,7 @@ static int __devinit cciss_pci_init(ctlr_info_t *c, struct pci_dev *pdev)
* kernels revealed a bug in the refetch if dom0 resides on a P600.
*/
if(board_id == 0x3225103C) {
- __u32 dma_prefetch;
+ __u32 dma_prefetch;
__u32 dma_refetch;
dma_prefetch = readl(c->vaddr + I2O_DMA1_CFG);
dma_prefetch |= 0x8000;
@@ -3874,38 +4149,9 @@ static int __devinit cciss_pci_init(ctlr_info_t *c, struct pci_dev *pdev)
}
#ifdef CCISS_DEBUG
- printk("Trying to put board into Simple mode\n");
-#endif /* CCISS_DEBUG */
- c->max_commands = readl(&(c->cfgtable->CmdsOutMax));
- /* Update the field, and then ring the doorbell */
- writel(CFGTBL_Trans_Simple, &(c->cfgtable->HostWrite.TransportRequest));
- writel(CFGTBL_ChangeReq, c->vaddr + SA5_DOORBELL);
-
- /* under certain very rare conditions, this can take awhile.
- * (e.g.: hot replace a failed 144GB drive in a RAID 5 set right
- * as we enter this code.) */
- for (i = 0; i < MAX_CONFIG_WAIT; i++) {
- if (!(readl(c->vaddr + SA5_DOORBELL) & CFGTBL_ChangeReq))
- break;
- /* delay and try again */
- set_current_state(TASK_INTERRUPTIBLE);
- schedule_timeout(msecs_to_jiffies(1));
- }
-
-#ifdef CCISS_DEBUG
- printk(KERN_DEBUG "I counter got to %d %x\n", i,
- readl(c->vaddr + SA5_DOORBELL));
-#endif /* CCISS_DEBUG */
-#ifdef CCISS_DEBUG
- print_cfg_table(c->cfgtable);
+ printk(KERN_WARNING "Trying to put board into Performant mode\n");
#endif /* CCISS_DEBUG */
-
- if (!(readl(&(c->cfgtable->TransportActive)) & CFGTBL_Trans_Simple)) {
- printk(KERN_WARNING "cciss: unable to get board into"
- " simple mode\n");
- err = -ENODEV;
- goto err_out_free_res;
- }
+ cciss_put_controller_into_performant_mode(c);
return 0;
err_out_free_res:
@@ -4190,7 +4436,6 @@ static int __devinit cciss_init_one(struct pci_dev *pdev,
i = alloc_cciss_hba();
if (i < 0)
return -1;
-
hba[i]->busy_initializing = 1;
INIT_HLIST_HEAD(&hba[i]->cmpQ);
INIT_HLIST_HEAD(&hba[i]->reqQ);
@@ -4238,16 +4483,26 @@ static int __devinit cciss_init_one(struct pci_dev *pdev,
/* make sure the board interrupts are off */
hba[i]->access.set_intr_mask(hba[i], CCISS_INTR_OFF);
- if (request_irq(hba[i]->intr[SIMPLE_MODE_INT], do_cciss_intr,
- IRQF_DISABLED | IRQF_SHARED, hba[i]->devname, hba[i])) {
- printk(KERN_ERR "cciss: Unable to get irq %d for %s\n",
- hba[i]->intr[SIMPLE_MODE_INT], hba[i]->devname);
- goto clean2;
+ if (hba[i]->msi_vector || hba[i]->msix_vector) {
+ if (request_irq(hba[i]->intr[PERF_MODE_INT],
+ do_cciss_msix_intr,
+ IRQF_DISABLED, hba[i]->devname, hba[i])) {
+ printk(KERN_ERR "cciss: Unable to get irq %d for %s\n",
+ hba[i]->intr[PERF_MODE_INT], hba[i]->devname);
+ goto clean2;
+ }
+ } else {
+ if (request_irq(hba[i]->intr[PERF_MODE_INT], do_cciss_intx,
+ IRQF_DISABLED, hba[i]->devname, hba[i])) {
+ printk(KERN_ERR "cciss: Unable to get irq %d for %s\n",
+ hba[i]->intr[PERF_MODE_INT], hba[i]->devname);
+ goto clean2;
+ }
}
printk(KERN_INFO "%s: <0x%x> at PCI %s IRQ %d%s using DAC\n",
hba[i]->devname, pdev->device, pci_name(pdev),
- hba[i]->intr[SIMPLE_MODE_INT], dac ? "" : " not");
+ hba[i]->intr[PERF_MODE_INT], dac ? "" : " not");
hba[i]->cmd_pool_bits =
kmalloc(DIV_ROUND_UP(hba[i]->nr_cmds, BITS_PER_LONG)
@@ -4353,7 +4608,7 @@ clean4:
hba[i]->nr_cmds * sizeof(ErrorInfo_struct),
hba[i]->errinfo_pool,
hba[i]->errinfo_pool_dhandle);
- free_irq(hba[i]->intr[SIMPLE_MODE_INT], hba[i]);
+ free_irq(hba[i]->intr[PERF_MODE_INT], hba[i]);
clean2:
unregister_blkdev(hba[i]->major, hba[i]->devname);
clean1:
@@ -4395,7 +4650,7 @@ static void cciss_shutdown(struct pci_dev *pdev)
printk(KERN_WARNING "cciss%d: Error flushing cache\n",
h->ctlr);
h->access.set_intr_mask(h, CCISS_INTR_OFF);
- free_irq(h->intr[2], h);
+ free_irq(h->intr[PERF_MODE_INT], h);
}
static void __devexit cciss_remove_one(struct pci_dev *pdev)
@@ -4495,7 +4750,6 @@ static int __init cciss_init(void)
* array of them, the size must be a multiple of 8 bytes.
*/
BUILD_BUG_ON(sizeof(CommandList_struct) % COMMANDLIST_ALIGNMENT);
-
printk(KERN_INFO DRIVER_NAME "\n");
err = bus_register(&cciss_bus_type);
@@ -4542,46 +4796,5 @@ static void __exit cciss_cleanup(void)
bus_unregister(&cciss_bus_type);
}
-static void fail_all_cmds(unsigned long ctlr)
-{
- /* If we get here, the board is apparently dead. */
- ctlr_info_t *h = hba[ctlr];
- CommandList_struct *c;
- unsigned long flags;
-
- printk(KERN_WARNING "cciss%d: controller not responding.\n", h->ctlr);
- h->alive = 0; /* the controller apparently died... */
-
- spin_lock_irqsave(CCISS_LOCK(ctlr), flags);
-
- pci_disable_device(h->pdev); /* Make sure it is really dead. */
-
- /* move everything off the request queue onto the completed queue */
- while (!hlist_empty(&h->reqQ)) {
- c = hlist_entry(h->reqQ.first, CommandList_struct, list);
- removeQ(c);
- h->Qdepth--;
- addQ(&h->cmpQ, c);
- }
-
- /* Now, fail everything on the completed queue with a HW error */
- while (!hlist_empty(&h->cmpQ)) {
- c = hlist_entry(h->cmpQ.first, CommandList_struct, list);
- removeQ(c);
- if (c->cmd_type != CMD_MSG_STALE)
- c->err_info->CommandStatus = CMD_HARDWARE_ERR;
- if (c->cmd_type == CMD_RWREQ) {
- complete_command(h, c, 0);
- } else if (c->cmd_type == CMD_IOCTL_PEND)
- complete(c->waiting);
-#ifdef CONFIG_CISS_SCSI_TAPE
- else if (c->cmd_type == CMD_SCSI)
- complete_scsi_command(c, 0, 0);
-#endif
- }
- spin_unlock_irqrestore(CCISS_LOCK(ctlr), flags);
- return;
-}
-
module_init(cciss_init);
module_exit(cciss_cleanup);
diff --git a/drivers/block/cciss.h b/drivers/block/cciss.h
index c5d411174db0..8a9f5b58daa8 100644
--- a/drivers/block/cciss.h
+++ b/drivers/block/cciss.h
@@ -25,7 +25,7 @@ struct access_method {
void (*submit_command)(ctlr_info_t *h, CommandList_struct *c);
void (*set_intr_mask)(ctlr_info_t *h, unsigned long val);
unsigned long (*fifo_full)(ctlr_info_t *h);
- unsigned long (*intr_pending)(ctlr_info_t *h);
+ bool (*intr_pending)(ctlr_info_t *h);
unsigned long (*command_completed)(ctlr_info_t *h);
};
typedef struct _drive_info_struct
@@ -85,8 +85,8 @@ struct ctlr_info
int max_cmd_sgentries;
SGDescriptor_struct **cmd_sg_list;
-# define DOORBELL_INT 0
-# define PERF_MODE_INT 1
+# define PERF_MODE_INT 0
+# define DOORBELL_INT 1
# define SIMPLE_MODE_INT 2
# define MEMQ_MODE_INT 3
unsigned int intr[4];
@@ -137,10 +137,27 @@ struct ctlr_info
struct list_head scan_list;
struct completion scan_wait;
struct device dev;
+ /*
+ * Performant mode tables.
+ */
+ u32 trans_support;
+ u32 trans_offset;
+ struct TransTable_struct *transtable;
+ unsigned long transMethod;
+
+ /*
+ * Performant mode completion buffer
+ */
+ u64 *reply_pool;
+ dma_addr_t reply_pool_dhandle;
+ u64 *reply_pool_head;
+ size_t reply_pool_size;
+ unsigned char reply_pool_wraparound;
+ u32 *blockFetchTable;
};
-/* Defining the diffent access_menthods */
-/*
+/* Defining the diffent access_methods
+ *
* Memory mapped FIFO interface (SMART 53xx cards)
*/
#define SA5_DOORBELL 0x20
@@ -159,6 +176,15 @@ struct ctlr_info
#define SA5B_INTR_PENDING 0x04
#define FIFO_EMPTY 0xffffffff
#define CCISS_FIRMWARE_READY 0xffff0000 /* value in scratchpad register */
+/* Perf. mode flags */
+#define SA5_PERF_INTR_PENDING 0x04
+#define SA5_PERF_INTR_OFF 0x05
+#define SA5_OUTDB_STATUS_PERF_BIT 0x01
+#define SA5_OUTDB_CLEAR_PERF_BIT 0x01
+#define SA5_OUTDB_CLEAR 0xA0
+#define SA5_OUTDB_CLEAR_PERF_BIT 0x01
+#define SA5_OUTDB_STATUS 0x9C
+
#define CISS_ERROR_BIT 0x02
@@ -170,8 +196,9 @@ struct ctlr_info
static void SA5_submit_command( ctlr_info_t *h, CommandList_struct *c)
{
#ifdef CCISS_DEBUG
- printk("Sending %x - down to controller\n", c->busaddr );
-#endif /* CCISS_DEBUG */
+ printk(KERN_WARNING "cciss%d: Sending %08x - down to controller\n",
+ h->ctlr, c->busaddr);
+#endif /* CCISS_DEBUG */
writel(c->busaddr, h->vaddr + SA5_REQUEST_PORT_OFFSET);
h->commands_outstanding++;
if ( h->commands_outstanding > h->max_outstanding)
@@ -214,6 +241,20 @@ static void SA5B_intr_mask(ctlr_info_t *h, unsigned long val)
h->vaddr + SA5_REPLY_INTR_MASK_OFFSET);
}
}
+
+/* Performant mode intr_mask */
+static void SA5_performant_intr_mask(ctlr_info_t *h, unsigned long val)
+{
+ if (val) { /* turn on interrupts */
+ h->interrupts_enabled = 1;
+ writel(0, h->vaddr + SA5_REPLY_INTR_MASK_OFFSET);
+ } else {
+ h->interrupts_enabled = 0;
+ writel(SA5_PERF_INTR_OFF,
+ h->vaddr + SA5_REPLY_INTR_MASK_OFFSET);
+ }
+}
+
/*
* Returns true if fifo is full.
*
@@ -250,10 +291,44 @@ static unsigned long SA5_completed(ctlr_info_t *h)
return ( register_value);
}
+
+/* Performant mode command completed */
+static unsigned long SA5_performant_completed(ctlr_info_t *h)
+{
+ unsigned long register_value = FIFO_EMPTY;
+
+ /* flush the controller write of the reply queue by reading
+ * outbound doorbell status register.
+ */
+ register_value = readl(h->vaddr + SA5_OUTDB_STATUS);
+ /* msi auto clears the interrupt pending bit. */
+ if (!(h->msi_vector || h->msix_vector)) {
+ writel(SA5_OUTDB_CLEAR_PERF_BIT, h->vaddr + SA5_OUTDB_CLEAR);
+ /* Do a read in order to flush the write to the controller
+ * (as per spec.)
+ */
+ register_value = readl(h->vaddr + SA5_OUTDB_STATUS);
+ }
+
+ if ((*(h->reply_pool_head) & 1) == (h->reply_pool_wraparound)) {
+ register_value = *(h->reply_pool_head);
+ (h->reply_pool_head)++;
+ h->commands_outstanding--;
+ } else {
+ register_value = FIFO_EMPTY;
+ }
+ /* Check for wraparound */
+ if (h->reply_pool_head == (h->reply_pool + h->max_commands)) {
+ h->reply_pool_head = h->reply_pool;
+ h->reply_pool_wraparound ^= 1;
+ }
+
+ return register_value;
+}
/*
* Returns true if an interrupt is pending..
*/
-static unsigned long SA5_intr_pending(ctlr_info_t *h)
+static bool SA5_intr_pending(ctlr_info_t *h)
{
unsigned long register_value =
readl(h->vaddr + SA5_INTR_STATUS);
@@ -268,7 +343,7 @@ static unsigned long SA5_intr_pending(ctlr_info_t *h)
/*
* Returns true if an interrupt is pending..
*/
-static unsigned long SA5B_intr_pending(ctlr_info_t *h)
+static bool SA5B_intr_pending(ctlr_info_t *h)
{
unsigned long register_value =
readl(h->vaddr + SA5_INTR_STATUS);
@@ -280,6 +355,20 @@ static unsigned long SA5B_intr_pending(ctlr_info_t *h)
return 0 ;
}
+static bool SA5_performant_intr_pending(ctlr_info_t *h)
+{
+ unsigned long register_value = readl(h->vaddr + SA5_INTR_STATUS);
+
+ if (!register_value)
+ return false;
+
+ if (h->msi_vector || h->msix_vector)
+ return true;
+
+ /* Read outbound doorbell to flush */
+ register_value = readl(h->vaddr + SA5_OUTDB_STATUS);
+ return register_value & SA5_OUTDB_STATUS_PERF_BIT;
+}
static struct access_method SA5_access = {
SA5_submit_command,
@@ -297,6 +386,14 @@ static struct access_method SA5B_access = {
SA5_completed,
};
+static struct access_method SA5_performant_access = {
+ SA5_submit_command,
+ SA5_performant_intr_mask,
+ SA5_fifo_full,
+ SA5_performant_intr_pending,
+ SA5_performant_completed,
+};
+
struct board_type {
__u32 board_id;
char *product_name;
diff --git a/drivers/block/cciss_cmd.h b/drivers/block/cciss_cmd.h
index e624ff959cb6..936b9666da6a 100644
--- a/drivers/block/cciss_cmd.h
+++ b/drivers/block/cciss_cmd.h
@@ -54,6 +54,7 @@
#define CFGTBL_AccCmds 0x00000001l
#define CFGTBL_Trans_Simple 0x00000002l
+#define CFGTBL_Trans_Performant 0x00000004l
#define CFGTBL_BusType_Ultra2 0x00000001l
#define CFGTBL_BusType_Ultra3 0x00000002l
@@ -173,12 +174,15 @@ typedef struct _SGDescriptor_struct {
* PAD_64 can be adjusted independently as needed for 32-bit
* and 64-bits systems.
*/
-#define COMMANDLIST_ALIGNMENT (8)
+#define COMMANDLIST_ALIGNMENT (32)
#define IS_64_BIT ((sizeof(long) - 4)/4)
#define IS_32_BIT (!IS_64_BIT)
#define PAD_32 (0)
#define PAD_64 (4)
#define PADSIZE (IS_32_BIT * PAD_32 + IS_64_BIT * PAD_64)
+#define DIRECT_LOOKUP_BIT 0x10
+#define DIRECT_LOOKUP_SHIFT 5
+
typedef struct _CommandList_struct {
CommandListHeader_struct Header;
RequestBlock_struct Request;
@@ -195,7 +199,7 @@ typedef struct _CommandList_struct {
struct completion *waiting;
int retry_count;
void * scsi_cmd;
- char pad[PADSIZE];
+ char pad[PADSIZE];
} CommandList_struct;
/* Configuration Table Structure */
@@ -209,12 +213,15 @@ typedef struct _HostWrite_struct {
typedef struct _CfgTable_struct {
BYTE Signature[4];
DWORD SpecValence;
+#define SIMPLE_MODE 0x02
+#define PERFORMANT_MODE 0x04
+#define MEMQ_MODE 0x08
DWORD TransportSupport;
DWORD TransportActive;
HostWrite_struct HostWrite;
DWORD CmdsOutMax;
DWORD BusTypes;
- DWORD Reserved;
+ DWORD TransMethodOffset;
BYTE ServerName[16];
DWORD HeartBeat;
DWORD SCSI_Prefetch;
@@ -222,6 +229,25 @@ typedef struct _CfgTable_struct {
DWORD MaxLogicalUnits;
DWORD MaxPhysicalDrives;
DWORD MaxPhysicalDrivesPerLogicalUnit;
+ DWORD MaxPerformantModeCommands;
} CfgTable_struct;
+
+struct TransTable_struct {
+ u32 BlockFetch0;
+ u32 BlockFetch1;
+ u32 BlockFetch2;
+ u32 BlockFetch3;
+ u32 BlockFetch4;
+ u32 BlockFetch5;
+ u32 BlockFetch6;
+ u32 BlockFetch7;
+ u32 RepQSize;
+ u32 RepQCount;
+ u32 RepQCtrAddrLow32;
+ u32 RepQCtrAddrHigh32;
+ u32 RepQAddr0Low32;
+ u32 RepQAddr0High32;
+};
+
#pragma pack()
#endif /* CCISS_CMD_H */
diff --git a/drivers/block/cciss_scsi.c b/drivers/block/cciss_scsi.c
index 3381505c8a6c..8e0a709286df 100644
--- a/drivers/block/cciss_scsi.c
+++ b/drivers/block/cciss_scsi.c
@@ -93,8 +93,8 @@ static struct scsi_host_template cciss_driver_template = {
#pragma pack(1)
-#define SCSI_PAD_32 0
-#define SCSI_PAD_64 0
+#define SCSI_PAD_32 8
+#define SCSI_PAD_64 8
struct cciss_scsi_cmd_stack_elem_t {
CommandList_struct cmd;
@@ -861,6 +861,7 @@ cciss_scsi_detect(int ctlr)
sh->n_io_port = 0; // I don't think we use these two...
sh->this_id = SELF_SCSI_ID;
sh->sg_tablesize = hba[ctlr]->maxsgentries;
+ sh->max_cmd_len = MAX_COMMAND_SIZE;
((struct cciss_scsi_adapter_data_t *)
hba[ctlr]->scsi_ctlr)->scsi_host = sh;
diff --git a/drivers/block/cpqarray.c b/drivers/block/cpqarray.c
index 91d11631cec9..abb4ec6690fc 100644
--- a/drivers/block/cpqarray.c
+++ b/drivers/block/cpqarray.c
@@ -386,7 +386,7 @@ static void __devexit cpqarray_remove_one_eisa (int i)
}
/* pdev is NULL for eisa */
-static int __init cpqarray_register_ctlr( int i, struct pci_dev *pdev)
+static int __devinit cpqarray_register_ctlr( int i, struct pci_dev *pdev)
{
struct request_queue *q;
int j;
@@ -503,7 +503,7 @@ Enomem4:
return -1;
}
-static int __init cpqarray_init_one( struct pci_dev *pdev,
+static int __devinit cpqarray_init_one( struct pci_dev *pdev,
const struct pci_device_id *ent)
{
int i;
@@ -740,7 +740,7 @@ __setup("smart2=", cpqarray_setup);
/*
* Find an EISA controller's signature. Set up an hba if we find it.
*/
-static int __init cpqarray_eisa_detect(void)
+static int __devinit cpqarray_eisa_detect(void)
{
int i=0, j;
__u32 board_id;
diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c
index 6b077f93acc6..7258c95e895e 100644
--- a/drivers/block/drbd/drbd_main.c
+++ b/drivers/block/drbd/drbd_main.c
@@ -1236,8 +1236,6 @@ static void after_state_ch(struct drbd_conf *mdev, union drbd_state os,
/* Last part of the attaching process ... */
if (ns.conn >= C_CONNECTED &&
os.disk == D_ATTACHING && ns.disk == D_NEGOTIATING) {
- kfree(mdev->p_uuid); /* We expect to receive up-to-date UUIDs soon. */
- mdev->p_uuid = NULL; /* ...to not use the old ones in the mean time */
drbd_send_sizes(mdev, 0, 0); /* to start sync... */
drbd_send_uuids(mdev);
drbd_send_state(mdev);
diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c
index 632e3245d1bb..2151f18b21de 100644
--- a/drivers/block/drbd/drbd_nl.c
+++ b/drivers/block/drbd/drbd_nl.c
@@ -1114,6 +1114,12 @@ static int drbd_nl_disk_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp
mdev->new_state_tmp.i = ns.i;
ns.i = os.i;
ns.disk = D_NEGOTIATING;
+
+ /* We expect to receive up-to-date UUIDs soon.
+ To avoid a race in receive_state, free p_uuid while
+ holding req_lock. I.e. atomic with the state change */
+ kfree(mdev->p_uuid);
+ mdev->p_uuid = NULL;
}
rv = _drbd_set_state(mdev, ns, CS_VERBOSE, NULL);
diff --git a/drivers/block/floppy.c b/drivers/block/floppy.c
index 90c4038702da..82c30f9f81ca 100644
--- a/drivers/block/floppy.c
+++ b/drivers/block/floppy.c
@@ -514,8 +514,6 @@ static unsigned long fdc_busy;
static DECLARE_WAIT_QUEUE_HEAD(fdc_wait);
static DECLARE_WAIT_QUEUE_HEAD(command_done);
-#define NO_SIGNAL (!interruptible || !signal_pending(current))
-
/* Errors during formatting are counted here. */
static int format_errors;
@@ -578,7 +576,7 @@ static void reset_fdc(void);
#define NEED_1_RECAL -2
#define NEED_2_RECAL -3
-static int usage_count;
+static atomic_t usage_count = ATOMIC_INIT(0);
/* buffer related variables */
static int buffer_track = -1;
@@ -858,36 +856,15 @@ static void set_fdc(int drive)
}
/* locks the driver */
-static int _lock_fdc(int drive, bool interruptible, int line)
+static int lock_fdc(int drive, bool interruptible)
{
- if (!usage_count) {
- pr_err("Trying to lock fdc while usage count=0 at line %d\n",
- line);
+ if (WARN(atomic_read(&usage_count) == 0,
+ "Trying to lock fdc while usage count=0\n"))
return -1;
- }
-
- if (test_and_set_bit(0, &fdc_busy)) {
- DECLARE_WAITQUEUE(wait, current);
- add_wait_queue(&fdc_wait, &wait);
- for (;;) {
- set_current_state(TASK_INTERRUPTIBLE);
-
- if (!test_and_set_bit(0, &fdc_busy))
- break;
-
- schedule();
-
- if (!NO_SIGNAL) {
- remove_wait_queue(&fdc_wait, &wait);
- return -EINTR;
- }
- }
+ if (wait_event_interruptible(fdc_wait, !test_and_set_bit(0, &fdc_busy)))
+ return -EINTR;
- set_current_state(TASK_RUNNING);
- remove_wait_queue(&fdc_wait, &wait);
- flush_scheduled_work();
- }
command_status = FD_COMMAND_NONE;
__reschedule_timeout(drive, "lock fdc");
@@ -895,11 +872,8 @@ static int _lock_fdc(int drive, bool interruptible, int line)
return 0;
}
-#define lock_fdc(drive, interruptible) \
- _lock_fdc(drive, interruptible, __LINE__)
-
/* unlocks the driver */
-static inline void unlock_fdc(void)
+static void unlock_fdc(void)
{
unsigned long flags;
@@ -1224,7 +1198,7 @@ static int need_more_output(void)
/* Set perpendicular mode as required, based on data rate, if supported.
* 82077 Now tested. 1Mbps data rate only possible with 82077-1.
*/
-static inline void perpendicular_mode(void)
+static void perpendicular_mode(void)
{
unsigned char perp_mode;
@@ -2015,25 +1989,10 @@ static int wait_til_done(void (*handler)(void), bool interruptible)
schedule_bh(handler);
- if (command_status < 2 && NO_SIGNAL) {
- DECLARE_WAITQUEUE(wait, current);
-
- add_wait_queue(&command_done, &wait);
- for (;;) {
- set_current_state(interruptible ?
- TASK_INTERRUPTIBLE :
- TASK_UNINTERRUPTIBLE);
-
- if (command_status >= 2 || !NO_SIGNAL)
- break;
-
- is_alive(__func__, "");
- schedule();
- }
-
- set_current_state(TASK_RUNNING);
- remove_wait_queue(&command_done, &wait);
- }
+ if (interruptible)
+ wait_event_interruptible(command_done, command_status >= 2);
+ else
+ wait_event(command_done, command_status >= 2);
if (command_status < 2) {
cancel_activity();
@@ -2583,10 +2542,8 @@ static int make_raw_rw_request(void)
int tracksize;
int ssize;
- if (max_buffer_sectors == 0) {
- pr_info("VFS: Block I/O scheduled on unopened device\n");
+ if (WARN(max_buffer_sectors == 0, "VFS: Block I/O scheduled on unopened device\n"))
return 0;
- }
set_fdc((long)current_req->rq_disk->private_data);
@@ -2936,19 +2893,16 @@ static void process_fd_request(void)
static void do_fd_request(struct request_queue *q)
{
- if (max_buffer_sectors == 0) {
- pr_info("VFS: %s called on non-open device\n", __func__);
+ if (WARN(max_buffer_sectors == 0,
+ "VFS: %s called on non-open device\n", __func__))
return;
- }
- if (usage_count == 0) {
- pr_info("warning: usage count=0, current_req=%p exiting\n",
- current_req);
- pr_info("sect=%ld type=%x flags=%x\n",
- (long)blk_rq_pos(current_req), current_req->cmd_type,
- current_req->cmd_flags);
+ if (WARN(atomic_read(&usage_count) == 0,
+ "warning: usage count=0, current_req=%p sect=%ld type=%x flags=%x\n",
+ current_req, (long)blk_rq_pos(current_req), current_req->cmd_type,
+ current_req->cmd_flags))
return;
- }
+
if (test_bit(0, &fdc_busy)) {
/* fdc busy, this new request will be treated when the
current one is done */
@@ -3033,7 +2987,7 @@ static inline int fd_copyin(void __user *param, void *address,
return copy_from_user(address, param, size) ? -EFAULT : 0;
}
-static inline const char *drive_name(int type, int drive)
+static const char *drive_name(int type, int drive)
{
struct floppy_struct *floppy;
@@ -3103,7 +3057,7 @@ static struct cont_t raw_cmd_cont = {
.done = raw_cmd_done
};
-static inline int raw_cmd_copyout(int cmd, void __user *param,
+static int raw_cmd_copyout(int cmd, void __user *param,
struct floppy_raw_cmd *ptr)
{
int ret;
@@ -3148,7 +3102,7 @@ static void raw_cmd_free(struct floppy_raw_cmd **ptr)
}
}
-static inline int raw_cmd_copyin(int cmd, void __user *param,
+static int raw_cmd_copyin(int cmd, void __user *param,
struct floppy_raw_cmd **rcmd)
{
struct floppy_raw_cmd *ptr;
@@ -3266,7 +3220,7 @@ static int invalidate_drive(struct block_device *bdev)
return 0;
}
-static inline int set_geometry(unsigned int cmd, struct floppy_struct *g,
+static int set_geometry(unsigned int cmd, struct floppy_struct *g,
int drive, int type, struct block_device *bdev)
{
int cnt;
@@ -3337,7 +3291,7 @@ static inline int set_geometry(unsigned int cmd, struct floppy_struct *g,
}
/* handle obsolete ioctl's */
-static int ioctl_table[] = {
+static unsigned int ioctl_table[] = {
FDCLRPRM,
FDSETPRM,
FDDEFPRM,
@@ -3365,7 +3319,7 @@ static int ioctl_table[] = {
FDTWADDLE
};
-static inline int normalize_ioctl(int *cmd, int *size)
+static int normalize_ioctl(unsigned int *cmd, int *size)
{
int i;
@@ -3829,6 +3783,7 @@ static int __floppy_read_block_0(struct block_device *bdev)
bio.bi_size = size;
bio.bi_bdev = bdev;
bio.bi_sector = 0;
+ bio.bi_flags = BIO_QUIET;
init_completion(&complete);
bio.bi_private = &complete;
bio.bi_end_io = floppy_rb0_complete;
@@ -3857,10 +3812,10 @@ static int floppy_revalidate(struct gendisk *disk)
if (test_bit(FD_DISK_CHANGED_BIT, &UDRS->flags) ||
test_bit(FD_VERIFY_BIT, &UDRS->flags) ||
test_bit(drive, &fake_change) || NO_GEOM) {
- if (usage_count == 0) {
- pr_info("VFS: revalidate called on non-open device.\n");
+ if (WARN(atomic_read(&usage_count) == 0,
+ "VFS: revalidate called on non-open device.\n"))
return -EFAULT;
- }
+
lock_fdc(drive, false);
cf = (test_bit(FD_DISK_CHANGED_BIT, &UDRS->flags) ||
test_bit(FD_VERIFY_BIT, &UDRS->flags));
@@ -4126,7 +4081,7 @@ static ssize_t floppy_cmos_show(struct device *dev,
return sprintf(buf, "%X\n", UDP->cmos);
}
-DEVICE_ATTR(cmos, S_IRUGO, floppy_cmos_show, NULL);
+static DEVICE_ATTR(cmos, S_IRUGO, floppy_cmos_show, NULL);
static void floppy_device_release(struct device *dev)
{
@@ -4175,6 +4130,9 @@ static int __init floppy_init(void)
int i, unit, drive;
int err, dr;
+ set_debugt();
+ interruptjiffies = resultjiffies = jiffies;
+
#if defined(CONFIG_PPC)
if (check_legacy_ioport(FDC1))
return -ENODEV;
@@ -4353,7 +4311,7 @@ out_unreg_platform_dev:
platform_device_unregister(&floppy_device[drive]);
out_flush_work:
flush_scheduled_work();
- if (usage_count)
+ if (atomic_read(&usage_count))
floppy_release_irq_and_dma();
out_unreg_region:
blk_unregister_region(MKDEV(FLOPPY_MAJOR, 0), 256);
@@ -4370,8 +4328,6 @@ out_put_disk:
return err;
}
-static DEFINE_SPINLOCK(floppy_usage_lock);
-
static const struct io_region {
int offset;
int size;
@@ -4417,14 +4373,8 @@ static void floppy_release_regions(int fdc)
static int floppy_grab_irq_and_dma(void)
{
- unsigned long flags;
-
- spin_lock_irqsave(&floppy_usage_lock, flags);
- if (usage_count++) {
- spin_unlock_irqrestore(&floppy_usage_lock, flags);
+ if (atomic_inc_return(&usage_count) > 1)
return 0;
- }
- spin_unlock_irqrestore(&floppy_usage_lock, flags);
/*
* We might have scheduled a free_irq(), wait it to
@@ -4435,9 +4385,7 @@ static int floppy_grab_irq_and_dma(void)
if (fd_request_irq()) {
DPRINT("Unable to grab IRQ%d for the floppy driver\n",
FLOPPY_IRQ);
- spin_lock_irqsave(&floppy_usage_lock, flags);
- usage_count--;
- spin_unlock_irqrestore(&floppy_usage_lock, flags);
+ atomic_dec(&usage_count);
return -1;
}
if (fd_request_dma()) {
@@ -4447,9 +4395,7 @@ static int floppy_grab_irq_and_dma(void)
use_virtual_dma = can_use_virtual_dma = 1;
if (!(can_use_virtual_dma & 1)) {
fd_free_irq();
- spin_lock_irqsave(&floppy_usage_lock, flags);
- usage_count--;
- spin_unlock_irqrestore(&floppy_usage_lock, flags);
+ atomic_dec(&usage_count);
return -1;
}
}
@@ -4484,9 +4430,7 @@ cleanup:
fd_free_dma();
while (--fdc >= 0)
floppy_release_regions(fdc);
- spin_lock_irqsave(&floppy_usage_lock, flags);
- usage_count--;
- spin_unlock_irqrestore(&floppy_usage_lock, flags);
+ atomic_dec(&usage_count);
return -1;
}
@@ -4498,14 +4442,10 @@ static void floppy_release_irq_and_dma(void)
#endif
long tmpsize;
unsigned long tmpaddr;
- unsigned long flags;
- spin_lock_irqsave(&floppy_usage_lock, flags);
- if (--usage_count) {
- spin_unlock_irqrestore(&floppy_usage_lock, flags);
+ if (!atomic_dec_and_test(&usage_count))
return;
- }
- spin_unlock_irqrestore(&floppy_usage_lock, flags);
+
if (irqdma_allocated) {
fd_disable_dma();
fd_free_dma();
@@ -4598,7 +4538,7 @@ static void __exit floppy_module_exit(void)
del_timer_sync(&fd_timer);
blk_cleanup_queue(floppy_queue);
- if (usage_count)
+ if (atomic_read(&usage_count))
floppy_release_irq_and_dma();
/* eject disk, if any */
diff --git a/fs/afs/write.c b/fs/afs/write.c
index 3dab9e9948d0..722743b152d8 100644
--- a/fs/afs/write.c
+++ b/fs/afs/write.c
@@ -680,7 +680,6 @@ int afs_writeback_all(struct afs_vnode *vnode)
{
struct address_space *mapping = vnode->vfs_inode.i_mapping;
struct writeback_control wbc = {
- .bdi = mapping->backing_dev_info,
.sync_mode = WB_SYNC_ALL,
.nr_to_write = LONG_MAX,
.range_cyclic = 1,
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index a4080c21ec55..d74e6af9b53a 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -2594,7 +2594,6 @@ int extent_write_full_page(struct extent_io_tree *tree, struct page *page,
.sync_io = wbc->sync_mode == WB_SYNC_ALL,
};
struct writeback_control wbc_writepages = {
- .bdi = wbc->bdi,
.sync_mode = wbc->sync_mode,
.older_than_this = NULL,
.nr_to_write = 64,
@@ -2628,7 +2627,6 @@ int extent_write_locked_range(struct extent_io_tree *tree, struct inode *inode,
.sync_io = mode == WB_SYNC_ALL,
};
struct writeback_control wbc_writepages = {
- .bdi = inode->i_mapping->backing_dev_info,
.sync_mode = mode,
.older_than_this = NULL,
.nr_to_write = nr_pages * 2,
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index 1d1088f48bc2..5455009b4142 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -63,24 +63,16 @@ struct bdi_work {
};
enum {
- WS_USED_B = 0,
- WS_ONSTACK_B,
+ WS_INPROGRESS = 0,
+ WS_ONSTACK,
};
-#define WS_USED (1 << WS_USED_B)
-#define WS_ONSTACK (1 << WS_ONSTACK_B)
-
-static inline bool bdi_work_on_stack(struct bdi_work *work)
-{
- return test_bit(WS_ONSTACK_B, &work->state);
-}
-
static inline void bdi_work_init(struct bdi_work *work,
struct wb_writeback_args *args)
{
INIT_RCU_HEAD(&work->rcu_head);
work->args = *args;
- work->state = WS_USED;
+ __set_bit(WS_INPROGRESS, &work->state);
}
/**
@@ -95,43 +87,16 @@ int writeback_in_progress(struct backing_dev_info *bdi)
return !list_empty(&bdi->work_list);
}
-static void bdi_work_clear(struct bdi_work *work)
-{
- clear_bit(WS_USED_B, &work->state);
- smp_mb__after_clear_bit();
- /*
- * work can have disappeared at this point. bit waitq functions
- * should be able to tolerate this, provided bdi_sched_wait does
- * not dereference it's pointer argument.
- */
- wake_up_bit(&work->state, WS_USED_B);
-}
-
static void bdi_work_free(struct rcu_head *head)
{
struct bdi_work *work = container_of(head, struct bdi_work, rcu_head);
- if (!bdi_work_on_stack(work))
- kfree(work);
- else
- bdi_work_clear(work);
-}
-
-static void wb_work_complete(struct bdi_work *work)
-{
- const enum writeback_sync_modes sync_mode = work->args.sync_mode;
- int onstack = bdi_work_on_stack(work);
+ clear_bit(WS_INPROGRESS, &work->state);
+ smp_mb__after_clear_bit();
+ wake_up_bit(&work->state, WS_INPROGRESS);
- /*
- * For allocated work, we can clear the done/seen bit right here.
- * For on-stack work, we need to postpone both the clear and free
- * to after the RCU grace period, since the stack could be invalidated
- * as soon as bdi_work_clear() has done the wakeup.
- */
- if (!onstack)
- bdi_work_clear(work);
- if (sync_mode == WB_SYNC_NONE || onstack)
- call_rcu(&work->rcu_head, bdi_work_free);
+ if (!test_bit(WS_ONSTACK, &work->state))
+ kfree(work);
}
static void wb_clear_pending(struct bdi_writeback *wb, struct bdi_work *work)
@@ -147,7 +112,7 @@ static void wb_clear_pending(struct bdi_writeback *wb, struct bdi_work *work)
list_del_rcu(&work->list);
spin_unlock(&bdi->wb_lock);
- wb_work_complete(work);
+ call_rcu(&work->rcu_head, bdi_work_free);
}
}
@@ -185,9 +150,9 @@ static void bdi_queue_work(struct backing_dev_info *bdi, struct bdi_work *work)
* Used for on-stack allocated work items. The caller needs to wait until
* the wb threads have acked the work before it's safe to continue.
*/
-static void bdi_wait_on_work_clear(struct bdi_work *work)
+static void bdi_wait_on_work_done(struct bdi_work *work)
{
- wait_on_bit(&work->state, WS_USED_B, bdi_sched_wait,
+ wait_on_bit(&work->state, WS_INPROGRESS, bdi_sched_wait,
TASK_UNINTERRUPTIBLE);
}
@@ -213,37 +178,28 @@ static void bdi_alloc_queue_work(struct backing_dev_info *bdi,
}
/**
- * bdi_sync_writeback - start and wait for writeback
- * @bdi: the backing device to write from
+ * bdi_queue_work_onstack - start and wait for writeback
* @sb: write inodes from this super_block
*
* Description:
- * This does WB_SYNC_ALL data integrity writeback and waits for the
- * IO to complete. Callers must hold the sb s_umount semaphore for
+ * This function initiates writeback and waits for the operation to
+ * complete. Callers must hold the sb s_umount semaphore for
* reading, to avoid having the super disappear before we are done.
*/
-static void bdi_sync_writeback(struct backing_dev_info *bdi,
- struct super_block *sb)
+static void bdi_queue_work_onstack(struct wb_writeback_args *args)
{
- struct wb_writeback_args args = {
- .sb = sb,
- .sync_mode = WB_SYNC_ALL,
- .nr_pages = LONG_MAX,
- .range_cyclic = 0,
- };
struct bdi_work work;
- bdi_work_init(&work, &args);
- work.state |= WS_ONSTACK;
+ bdi_work_init(&work, args);
+ __set_bit(WS_ONSTACK, &work.state);
- bdi_queue_work(bdi, &work);
- bdi_wait_on_work_clear(&work);
+ bdi_queue_work(args->sb->s_bdi, &work);
+ bdi_wait_on_work_done(&work);
}
/**
* bdi_start_writeback - start writeback
* @bdi: the backing device to write from
- * @sb: write inodes from this super_block
* @nr_pages: the number of pages to write
*
* Description:
@@ -252,25 +208,34 @@ static void bdi_sync_writeback(struct backing_dev_info *bdi,
* completion. Caller need not hold sb s_umount semaphore.
*
*/
-void bdi_start_writeback(struct backing_dev_info *bdi, struct super_block *sb,
- long nr_pages)
+void bdi_start_writeback(struct backing_dev_info *bdi, long nr_pages)
{
struct wb_writeback_args args = {
- .sb = sb,
.sync_mode = WB_SYNC_NONE,
.nr_pages = nr_pages,
.range_cyclic = 1,
};
- /*
- * We treat @nr_pages=0 as the special case to do background writeback,
- * ie. to sync pages until the background dirty threshold is reached.
- */
- if (!nr_pages) {
- args.nr_pages = LONG_MAX;
- args.for_background = 1;
- }
+ bdi_alloc_queue_work(bdi, &args);
+}
+/**
+ * bdi_start_background_writeback - start background writeback
+ * @bdi: the backing device to write from
+ *
+ * Description:
+ * This does WB_SYNC_NONE background writeback. The IO is only
+ * started when this function returns, we make no guarentees on
+ * completion. Caller need not hold sb s_umount semaphore.
+ */
+void bdi_start_background_writeback(struct backing_dev_info *bdi)
+{
+ struct wb_writeback_args args = {
+ .sync_mode = WB_SYNC_NONE,
+ .nr_pages = LONG_MAX,
+ .for_background = 1,
+ .range_cyclic = 1,
+ };
bdi_alloc_queue_work(bdi, &args);
}
@@ -561,75 +526,69 @@ select_queue:
return ret;
}
-static void unpin_sb_for_writeback(struct super_block *sb)
-{
- up_read(&sb->s_umount);
- put_super(sb);
-}
-
-enum sb_pin_state {
- SB_PINNED,
- SB_NOT_PINNED,
- SB_PIN_FAILED
-};
-
/*
- * For WB_SYNC_NONE writeback, the caller does not have the sb pinned
+ * For background writeback the caller does not have the sb pinned
* before calling writeback. So make sure that we do pin it, so it doesn't
* go away while we are writing inodes from it.
*/
-static enum sb_pin_state pin_sb_for_writeback(struct writeback_control *wbc,
- struct super_block *sb)
+static bool pin_sb_for_writeback(struct super_block *sb)
{
- /*
- * Caller must already hold the ref for this
- */
- if (wbc->sync_mode == WB_SYNC_ALL) {
- WARN_ON(!rwsem_is_locked(&sb->s_umount));
- return SB_NOT_PINNED;
- }
spin_lock(&sb_lock);
+ if (list_empty(&sb->s_instances)) {
+ spin_unlock(&sb_lock);
+ return false;
+ }
+
sb->s_count++;
+ spin_unlock(&sb_lock);
+
if (down_read_trylock(&sb->s_umount)) {
- if (sb->s_root) {
- spin_unlock(&sb_lock);
- return SB_PINNED;
- }
- /*
- * umounted, drop rwsem again and fall through to failure
- */
+ if (sb->s_root)
+ return true;
up_read(&sb->s_umount);
}
- sb->s_count--;
- spin_unlock(&sb_lock);
- return SB_PIN_FAILED;
+
+ put_super(sb);
+ return false;
}
/*
* Write a portion of b_io inodes which belong to @sb.
- * If @wbc->sb != NULL, then find and write all such
+ *
+ * If @only_this_sb is true, then find and write all such
* inodes. Otherwise write only ones which go sequentially
* in reverse order.
+ *
* Return 1, if the caller writeback routine should be
* interrupted. Otherwise return 0.
*/
-static int writeback_sb_inodes(struct super_block *sb,
- struct bdi_writeback *wb,
- struct writeback_control *wbc)
+static int writeback_sb_inodes(struct super_block *sb, struct bdi_writeback *wb,
+ struct writeback_control *wbc, bool only_this_sb)
{
while (!list_empty(&wb->b_io)) {
long pages_skipped;
struct inode *inode = list_entry(wb->b_io.prev,
struct inode, i_list);
- if (wbc->sb && sb != inode->i_sb) {
- /* super block given and doesn't
- match, skip this inode */
- redirty_tail(inode);
- continue;
- }
- if (sb != inode->i_sb)
- /* finish with this superblock */
+
+ if (inode->i_sb != sb) {
+ if (only_this_sb) {
+ /*
+ * We only want to write back data for this
+ * superblock, move all inodes not belonging
+ * to it back onto the dirty list.
+ */
+ redirty_tail(inode);
+ continue;
+ }
+
+ /*
+ * The inode belongs to a different superblock.
+ * Bounce back to the caller to unpin this and
+ * pin the next superblock.
+ */
return 0;
+ }
+
if (inode->i_state & (I_NEW | I_WILL_FREE)) {
requeue_io(inode);
continue;
@@ -667,8 +626,8 @@ static int writeback_sb_inodes(struct super_block *sb,
return 1;
}
-static void writeback_inodes_wb(struct bdi_writeback *wb,
- struct writeback_control *wbc)
+void writeback_inodes_wb(struct bdi_writeback *wb,
+ struct writeback_control *wbc)
{
int ret = 0;
@@ -681,24 +640,14 @@ static void writeback_inodes_wb(struct bdi_writeback *wb,
struct inode *inode = list_entry(wb->b_io.prev,
struct inode, i_list);
struct super_block *sb = inode->i_sb;
- enum sb_pin_state state;
- if (wbc->sb && sb != wbc->sb) {
- /* super block given and doesn't
- match, skip this inode */
- redirty_tail(inode);
- continue;
- }
- state = pin_sb_for_writeback(wbc, sb);
-
- if (state == SB_PIN_FAILED) {
+ if (!pin_sb_for_writeback(sb)) {
requeue_io(inode);
continue;
}
- ret = writeback_sb_inodes(sb, wb, wbc);
+ ret = writeback_sb_inodes(sb, wb, wbc, false);
+ drop_super(sb);
- if (state == SB_PINNED)
- unpin_sb_for_writeback(sb);
if (ret)
break;
}
@@ -706,11 +655,17 @@ static void writeback_inodes_wb(struct bdi_writeback *wb,
/* Leave any unwritten inodes on b_io */
}
-void writeback_inodes_wbc(struct writeback_control *wbc)
+static void __writeback_inodes_sb(struct super_block *sb,
+ struct bdi_writeback *wb, struct writeback_control *wbc)
{
- struct backing_dev_info *bdi = wbc->bdi;
+ WARN_ON(!rwsem_is_locked(&sb->s_umount));
- writeback_inodes_wb(&bdi->wb, wbc);
+ wbc->wb_start = jiffies; /* livelock avoidance */
+ spin_lock(&inode_lock);
+ if (!wbc->for_kupdate || list_empty(&wb->b_io))
+ queue_io(wb, wbc->older_than_this);
+ writeback_sb_inodes(sb, wb, wbc, true);
+ spin_unlock(&inode_lock);
}
/*
@@ -751,8 +706,6 @@ static long wb_writeback(struct bdi_writeback *wb,
struct wb_writeback_args *args)
{
struct writeback_control wbc = {
- .bdi = wb->bdi,
- .sb = args->sb,
.sync_mode = args->sync_mode,
.older_than_this = NULL,
.for_kupdate = args->for_kupdate,
@@ -790,7 +743,10 @@ static long wb_writeback(struct bdi_writeback *wb,
wbc.more_io = 0;
wbc.nr_to_write = MAX_WRITEBACK_PAGES;
wbc.pages_skipped = 0;
- writeback_inodes_wb(wb, &wbc);
+ if (args->sb)
+ __writeback_inodes_sb(args->sb, wb, &wbc);
+ else
+ writeback_inodes_wb(wb, &wbc);
args->nr_pages -= MAX_WRITEBACK_PAGES - wbc.nr_to_write;
wrote += MAX_WRITEBACK_PAGES - wbc.nr_to_write;
@@ -911,7 +867,7 @@ long wb_do_writeback(struct bdi_writeback *wb, int force_wait)
* If this isn't a data integrity operation, just notify
* that we have seen this work and we are now starting it.
*/
- if (args.sync_mode == WB_SYNC_NONE)
+ if (!test_bit(WS_ONSTACK, &work->state))
wb_clear_pending(wb, work);
wrote += wb_writeback(wb, &args);
@@ -920,7 +876,7 @@ long wb_do_writeback(struct bdi_writeback *wb, int force_wait)
* This is a data integrity writeback, so only do the
* notification when we have completed the work.
*/
- if (args.sync_mode == WB_SYNC_ALL)
+ if (test_bit(WS_ONSTACK, &work->state))
wb_clear_pending(wb, work);
}
@@ -978,42 +934,32 @@ int bdi_writeback_task(struct bdi_writeback *wb)
}
/*
- * Schedule writeback for all backing devices. This does WB_SYNC_NONE
- * writeback, for integrity writeback see bdi_sync_writeback().
+ * Start writeback of `nr_pages' pages. If `nr_pages' is zero, write back
+ * the whole world.
*/
-static void bdi_writeback_all(struct super_block *sb, long nr_pages)
+void wakeup_flusher_threads(long nr_pages)
{
+ struct backing_dev_info *bdi;
struct wb_writeback_args args = {
- .sb = sb,
- .nr_pages = nr_pages,
.sync_mode = WB_SYNC_NONE,
};
- struct backing_dev_info *bdi;
- rcu_read_lock();
+ if (nr_pages) {
+ args.nr_pages = nr_pages;
+ } else {
+ args.nr_pages = global_page_state(NR_FILE_DIRTY) +
+ global_page_state(NR_UNSTABLE_NFS);
+ }
+ rcu_read_lock();
list_for_each_entry_rcu(bdi, &bdi_list, bdi_list) {
if (!bdi_has_dirty_io(bdi))
continue;
-
bdi_alloc_queue_work(bdi, &args);
}
-
rcu_read_unlock();
}
-/*
- * Start writeback of `nr_pages' pages. If `nr_pages' is zero, write back
- * the whole world.
- */
-void wakeup_flusher_threads(long nr_pages)
-{
- if (nr_pages == 0)
- nr_pages = global_page_state(NR_FILE_DIRTY) +
- global_page_state(NR_UNSTABLE_NFS);
- bdi_writeback_all(NULL, nr_pages);
-}
-
static noinline void block_dump___mark_inode_dirty(struct inode *inode)
{
if (inode->i_ino || strcmp(inode->i_sb->s_id, "bdev")) {
@@ -1218,12 +1164,17 @@ void writeback_inodes_sb(struct super_block *sb)
{
unsigned long nr_dirty = global_page_state(NR_FILE_DIRTY);
unsigned long nr_unstable = global_page_state(NR_UNSTABLE_NFS);
- long nr_to_write;
+ struct wb_writeback_args args = {
+ .sb = sb,
+ .sync_mode = WB_SYNC_NONE,
+ };
- nr_to_write = nr_dirty + nr_unstable +
+ WARN_ON(!rwsem_is_locked(&sb->s_umount));
+
+ args.nr_pages = nr_dirty + nr_unstable +
(inodes_stat.nr_inodes - inodes_stat.nr_unused);
- bdi_start_writeback(sb->s_bdi, sb, nr_to_write);
+ bdi_queue_work_onstack(&args);
}
EXPORT_SYMBOL(writeback_inodes_sb);
@@ -1237,7 +1188,9 @@ EXPORT_SYMBOL(writeback_inodes_sb);
int writeback_inodes_sb_if_idle(struct super_block *sb)
{
if (!writeback_in_progress(sb->s_bdi)) {
+ down_read(&sb->s_umount);
writeback_inodes_sb(sb);
+ up_read(&sb->s_umount);
return 1;
} else
return 0;
@@ -1253,7 +1206,16 @@ EXPORT_SYMBOL(writeback_inodes_sb_if_idle);
*/
void sync_inodes_sb(struct super_block *sb)
{
- bdi_sync_writeback(sb->s_bdi, sb);
+ struct wb_writeback_args args = {
+ .sb = sb,
+ .sync_mode = WB_SYNC_ALL,
+ .nr_pages = LONG_MAX,
+ .range_cyclic = 0,
+ };
+
+ WARN_ON(!rwsem_is_locked(&sb->s_umount));
+
+ bdi_queue_work_onstack(&args);
wait_sb_inodes(sb);
}
EXPORT_SYMBOL(sync_inodes_sb);
diff --git a/fs/ubifs/budget.c b/fs/ubifs/budget.c
index 076ca50e9933..c8ff0d1ae5d3 100644
--- a/fs/ubifs/budget.c
+++ b/fs/ubifs/budget.c
@@ -62,7 +62,9 @@
*/
static void shrink_liability(struct ubifs_info *c, int nr_to_write)
{
+ down_read(&c->vfs_sb->s_umount);
writeback_inodes_sb(c->vfs_sb);
+ up_read(&c->vfs_sb->s_umount);
}
/**
diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h
index aee5f6ce166e..9ae2889096b6 100644
--- a/include/linux/backing-dev.h
+++ b/include/linux/backing-dev.h
@@ -105,8 +105,8 @@ int bdi_register(struct backing_dev_info *bdi, struct device *parent,
int bdi_register_dev(struct backing_dev_info *bdi, dev_t dev);
void bdi_unregister(struct backing_dev_info *bdi);
int bdi_setup_and_register(struct backing_dev_info *, char *, unsigned int);
-void bdi_start_writeback(struct backing_dev_info *bdi, struct super_block *sb,
- long nr_pages);
+void bdi_start_writeback(struct backing_dev_info *bdi, long nr_pages);
+void bdi_start_background_writeback(struct backing_dev_info *bdi);
int bdi_writeback_task(struct bdi_writeback *wb);
int bdi_has_dirty_io(struct backing_dev_info *bdi);
void bdi_arm_supers_timer(void);
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 09a840264d6f..b8224ea4a5de 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -467,11 +467,13 @@ struct request_queue
#define QUEUE_FLAG_IO_STAT 15 /* do IO stats */
#define QUEUE_FLAG_DISCARD 16 /* supports DISCARD */
#define QUEUE_FLAG_NOXMERGES 17 /* No extended merges */
+#define QUEUE_FLAG_ADD_RANDOM 18 /* Contributes to random pool */
#define QUEUE_FLAG_DEFAULT ((1 << QUEUE_FLAG_IO_STAT) | \
(1 << QUEUE_FLAG_CLUSTER) | \
(1 << QUEUE_FLAG_STACKABLE) | \
- (1 << QUEUE_FLAG_SAME_COMP))
+ (1 << QUEUE_FLAG_SAME_COMP) | \
+ (1 << QUEUE_FLAG_ADD_RANDOM))
static inline int queue_is_locked(struct request_queue *q)
{
@@ -596,6 +598,7 @@ enum {
test_bit(QUEUE_FLAG_NOXMERGES, &(q)->queue_flags)
#define blk_queue_nonrot(q) test_bit(QUEUE_FLAG_NONROT, &(q)->queue_flags)
#define blk_queue_io_stat(q) test_bit(QUEUE_FLAG_IO_STAT, &(q)->queue_flags)
+#define blk_queue_add_random(q) test_bit(QUEUE_FLAG_ADD_RANDOM, &(q)->queue_flags)
#define blk_queue_flushing(q) ((q)->ordseq)
#define blk_queue_stackable(q) \
test_bit(QUEUE_FLAG_STACKABLE, &(q)->queue_flags)
diff --git a/include/linux/drbd.h b/include/linux/drbd.h
index 30da4ae48972..b8d2516668aa 100644
--- a/include/linux/drbd.h
+++ b/include/linux/drbd.h
@@ -53,7 +53,7 @@
extern const char *drbd_buildtag(void);
-#define REL_VERSION "8.3.8rc2"
+#define REL_VERSION "8.3.8"
#define API_VERSION 88
#define PRO_VERSION_MIN 86
#define PRO_VERSION_MAX 94
diff --git a/include/linux/writeback.h b/include/linux/writeback.h
index d63ef8f9609f..c24eca71e80c 100644
--- a/include/linux/writeback.h
+++ b/include/linux/writeback.h
@@ -27,10 +27,6 @@ enum writeback_sync_modes {
* in a manner such that unspecified fields are set to zero.
*/
struct writeback_control {
- struct backing_dev_info *bdi; /* If !NULL, only write back this
- queue */
- struct super_block *sb; /* if !NULL, only write inodes from
- this super_block */
enum writeback_sync_modes sync_mode;
unsigned long *older_than_this; /* If !NULL, only write back inodes
older than this */
@@ -66,7 +62,8 @@ int inode_wait(void *);
void writeback_inodes_sb(struct super_block *);
int writeback_inodes_sb_if_idle(struct super_block *);
void sync_inodes_sb(struct super_block *);
-void writeback_inodes_wbc(struct writeback_control *wbc);
+void writeback_inodes_wb(struct bdi_writeback *wb,
+ struct writeback_control *wbc);
long wb_do_writeback(struct bdi_writeback *wb, int force_wait);
void wakeup_flusher_threads(long nr_pages);
diff --git a/mm/backing-dev.c b/mm/backing-dev.c
index 660a87a22511..6e0b09a1ec2c 100644
--- a/mm/backing-dev.c
+++ b/mm/backing-dev.c
@@ -340,14 +340,13 @@ int bdi_has_dirty_io(struct backing_dev_info *bdi)
static void bdi_flush_io(struct backing_dev_info *bdi)
{
struct writeback_control wbc = {
- .bdi = bdi,
.sync_mode = WB_SYNC_NONE,
.older_than_this = NULL,
.range_cyclic = 1,
.nr_to_write = 1024,
};
- writeback_inodes_wbc(&wbc);
+ writeback_inodes_wb(&bdi->wb, &wbc);
}
/*
diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index bbd396ac9546..37498ef61548 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -495,7 +495,6 @@ static void balance_dirty_pages(struct address_space *mapping,
for (;;) {
struct writeback_control wbc = {
- .bdi = bdi,
.sync_mode = WB_SYNC_NONE,
.older_than_this = NULL,
.nr_to_write = write_chunk,
@@ -537,7 +536,7 @@ static void balance_dirty_pages(struct address_space *mapping,
* up.
*/
if (bdi_nr_reclaimable > bdi_thresh) {
- writeback_inodes_wbc(&wbc);
+ writeback_inodes_wb(&bdi->wb, &wbc);
pages_written += write_chunk - wbc.nr_to_write;
get_dirty_limits(&background_thresh, &dirty_thresh,
&bdi_thresh, bdi);
@@ -597,7 +596,7 @@ static void balance_dirty_pages(struct address_space *mapping,
(!laptop_mode && ((global_page_state(NR_FILE_DIRTY)
+ global_page_state(NR_UNSTABLE_NFS))
> background_thresh)))
- bdi_start_writeback(bdi, NULL, 0);
+ bdi_start_background_writeback(bdi);
}
void set_page_dirty_balance(struct page *page, int page_mkwrite)
@@ -705,9 +704,8 @@ void laptop_mode_timer_fn(unsigned long data)
* We want to write everything out, not just down to the dirty
* threshold
*/
-
if (bdi_has_dirty_io(&q->backing_dev_info))
- bdi_start_writeback(&q->backing_dev_info, NULL, nr_pages);
+ bdi_start_writeback(&q->backing_dev_info, nr_pages);
}
/*