Merge remote-tracking branch 'block/for-next'

author: Stephen Rothwell <sfr@canb.auug.org.au> 2011-05-11 11:57:44 +1000
committer: Stephen Rothwell <sfr@canb.auug.org.au> 2011-05-11 11:57:44 +1000
commit: cdbf7656dc8e4a98d46d37705486d9851a28546c (patch)
tree: dbfeefc61a3b81e40aaf909cdb1109163d64c832
parent: b11bdc28ea5f972a5196f2c624f89446e536a5b0 (diff)
parent: 6c359be347a37bcf004f623a9f2d178e7c4c0f64 (diff)
22 files changed, 639 insertions, 239 deletions
diff --git a/Documentation/blockdev/cciss.txt b/Documentation/blockdev/cciss.txt
index 89698e8df7d4..c00c6a5ab21f 100644
--- a/Documentation/blockdev/cciss.txt
+++ b/Documentation/blockdev/cciss.txt
@@ -169,3 +169,18 @@ is issued which positions the tape to a known position.  Typically you
 must rewind the tape (by issuing "mt -f /dev/st0 rewind" for example)
 before i/o can proceed again to a tape drive which was reset.
 
+There is a cciss_tape_cmds module parameter which can be used to make cciss
+allocate more commands for use by tape drives.  Ordinarily only a few commands
+(6) are allocated for tape drives because tape drives are slow and
+infrequently used and the primary purpose of Smart Array controllers is to
+act as a RAID controller for disk drives, so the vast majority of commands
+are allocated for disk devices.  However, if you have more than a few tape
+drives attached to a smart array, the default number of commands may not be
+enought (for example, if you have 8 tape drives, you could only rewind 6
+at one time with the default number of commands.)  The cciss_tape_cmds module
+parameter allows more commands (up to 16 more) to be allocated for use by
+tape drives.  For example:
+
+        insmod cciss.ko cciss_tape_cmds=16
+
+Or, as a kernel boot parameter passed in via grub:  cciss.cciss_tape_cmds=8
diff --git a/block/blk-exec.c b/block/blk-exec.c
index 81e31819a597..8a0e7ec056e7 100644
--- a/block/blk-exec.c
+++ b/block/blk-exec.c
@@ -56,7 +56,7 @@ void blk_execute_rq_nowait(struct request_queue *q, struct gendisk *bd_disk,
 	spin_lock_irq(q->queue_lock);
 	__elv_add_request(q, rq, where);
 	__blk_run_queue(q);
-	/* the queue is stopped so it won't be plugged+unplugged */
+	/* the queue is stopped so it won't be run */
 	if (rq->cmd_type == REQ_TYPE_PM_RESUME)
 		q->request_fn(q);
 	spin_unlock_irq(q->queue_lock);
diff --git a/block/blk-flush.c b/block/blk-flush.c
index 6c9b5e189e62..bb21e4c36f70 100644
--- a/block/blk-flush.c
+++ b/block/blk-flush.c
@@ -212,13 +212,19 @@ static void flush_end_io(struct request *flush_rq, int error)
 	}
 
 	/*
-	 * Moving a request silently to empty queue_head may stall the
-	 * queue.  Kick the queue in those cases.  This function is called
-	 * from request completion path and calling directly into
-	 * request_fn may confuse the driver.  Always use kblockd.
+	 * Kick the queue to avoid stall for two cases:
+	 * 1. Moving a request silently to empty queue_head may stall the
+	 * queue.
+	 * 2. When flush request is running in non-queueable queue, the
+	 * queue is hold. Restart the queue after flush request is finished
+	 * to avoid stall.
+	 * This function is called from request completion path and calling
+	 * directly into request_fn may confuse the driver.  Always use
+	 * kblockd.
 	 */
-	if (queued)
+	if (queued || q->flush_queue_delayed)
 		blk_run_queue_async(q);
+	q->flush_queue_delayed = 0;
 }
 
 /**
diff --git a/block/blk-lib.c b/block/blk-lib.c
index 25de73e4759b..78e627e2581d 100644
--- a/block/blk-lib.c
+++ b/block/blk-lib.c
@@ -9,17 +9,20 @@
 
 #include "blk.h"
 
-static void blkdev_discard_end_io(struct bio *bio, int err)
-{
-	if (err) {
-		if (err == -EOPNOTSUPP)
-			set_bit(BIO_EOPNOTSUPP, &bio->bi_flags);
-		clear_bit(BIO_UPTODATE, &bio->bi_flags);
-	}
+struct bio_batch {
+	atomic_t		done;
+	unsigned long		flags;
+	struct completion	*wait;
+};
 
-	if (bio->bi_private)
-		complete(bio->bi_private);
+static void bio_batch_end_io(struct bio *bio, int err)
+{
+	struct bio_batch *bb = bio->bi_private;
 
+	if (err && (err != -EOPNOTSUPP))
+		clear_bit(BIO_UPTODATE, &bb->flags);
+	if (atomic_dec_and_test(&bb->done))
+		complete(bb->wait);
 	bio_put(bio);
 }
 
@@ -41,6 +44,7 @@ int blkdev_issue_discard(struct block_device *bdev, sector_t sector,
 	struct request_queue *q = bdev_get_queue(bdev);
 	int type = REQ_WRITE | REQ_DISCARD;
 	unsigned int max_discard_sectors;
+	struct bio_batch bb;
 	struct bio *bio;
 	int ret = 0;
 
@@ -67,7 +71,11 @@ int blkdev_issue_discard(struct block_device *bdev, sector_t sector,
 		type |= REQ_SECURE;
 	}
 
-	while (nr_sects && !ret) {
+	atomic_set(&bb.done, 1);
+	bb.flags = 1 << BIO_UPTODATE;
+	bb.wait = &wait;
+
+	while (nr_sects) {
 		bio = bio_alloc(gfp_mask, 1);
 		if (!bio) {
 			ret = -ENOMEM;
@@ -75,9 +83,9 @@ int blkdev_issue_discard(struct block_device *bdev, sector_t sector,
 		}
 
 		bio->bi_sector = sector;
-		bio->bi_end_io = blkdev_discard_end_io;
+		bio->bi_end_io = bio_batch_end_io;
 		bio->bi_bdev = bdev;
-		bio->bi_private = &wait;
+		bio->bi_private = &bb;
 
 		if (nr_sects > max_discard_sectors) {
 			bio->bi_size = max_discard_sectors << 9;
@@ -88,45 +96,21 @@ int blkdev_issue_discard(struct block_device *bdev, sector_t sector,
 			nr_sects = 0;
 		}
 
-		bio_get(bio);
+		atomic_inc(&bb.done);
 		submit_bio(type, bio);
+	}
 
+	/* Wait for bios in-flight */
+	if (!atomic_dec_and_test(&bb.done))
 		wait_for_completion(&wait);
 
-		if (bio_flagged(bio, BIO_EOPNOTSUPP))
-			ret = -EOPNOTSUPP;
-		else if (!bio_flagged(bio, BIO_UPTODATE))
-			ret = -EIO;
-		bio_put(bio);
-	}
+	if (!test_bit(BIO_UPTODATE, &bb.flags))
+		ret = -EIO;
 
 	return ret;
 }
 EXPORT_SYMBOL(blkdev_issue_discard);
 
-struct bio_batch
-{
-	atomic_t 		done;
-	unsigned long 		flags;
-	struct completion 	*wait;
-};
-
-static void bio_batch_end_io(struct bio *bio, int err)
-{
-	struct bio_batch *bb = bio->bi_private;
-
-	if (err) {
-		if (err == -EOPNOTSUPP)
-			set_bit(BIO_EOPNOTSUPP, &bb->flags);
-		else
-			clear_bit(BIO_UPTODATE, &bb->flags);
-	}
-	if (bb)
-		if (atomic_dec_and_test(&bb->done))
-			complete(bb->wait);
-	bio_put(bio);
-}
-
 /**
  * blkdev_issue_zeroout - generate number of zero filed write bios
  * @bdev:	blockdev to issue
@@ -151,7 +135,6 @@ int blkdev_issue_zeroout(struct block_device *bdev, sector_t sector,
 	bb.flags = 1 << BIO_UPTODATE;
 	bb.wait = &wait;
 
-submit:
 	ret = 0;
 	while (nr_sects != 0) {
 		bio = bio_alloc(gfp_mask,
@@ -168,9 +151,6 @@ submit:
 
 		while (nr_sects != 0) {
 			sz = min((sector_t) PAGE_SIZE >> 9 , nr_sects);
-			if (sz == 0)
-				/* bio has maximum size possible */
-				break;
 			ret = bio_add_page(bio, ZERO_PAGE(0), sz << 9, 0);
 			nr_sects -= ret >> 9;
 			sector += ret >> 9;
@@ -190,16 +170,6 @@ submit:
 		/* One of bios in the batch was completed with error.*/
 		ret = -EIO;
 
-	if (ret)
-		goto out;
-
-	if (test_bit(BIO_EOPNOTSUPP, &bb.flags)) {
-		ret = -EOPNOTSUPP;
-		goto out;
-	}
-	if (nr_sects != 0)
-		goto submit;
-out:
 	return ret;
 }
 EXPORT_SYMBOL(blkdev_issue_zeroout);
diff --git a/block/blk-settings.c b/block/blk-settings.c
index 1fa769293597..cd3c428e194f 100644
--- a/block/blk-settings.c
+++ b/block/blk-settings.c
@@ -790,6 +790,12 @@ void blk_queue_flush(struct request_queue *q, unsigned int flush)
 }
 EXPORT_SYMBOL_GPL(blk_queue_flush);
 
+void blk_queue_flush_queueable(struct request_queue *q, bool queueable)
+{
+	q->flush_not_queueable = !queueable;
+}
+EXPORT_SYMBOL_GPL(blk_queue_flush_queueable);
+
 static int __init blk_settings_init(void)
 {
 	blk_max_low_pfn = max_low_pfn - 1;
diff --git a/block/blk.h b/block/blk.h
index 61263463e38e..1f798b5a6f19 100644
--- a/block/blk.h
+++ b/block/blk.h
@@ -61,7 +61,26 @@ static inline struct request *__elv_next_request(struct request_queue *q)
 			rq = list_entry_rq(q->queue_head.next);
 			return rq;
 		}
-
+		/*
+		 * Flush request is running and flush request isn't queueable
+		 * in the drive, we can hold the queue till flush request is
+		 * finished. Even we don't do this, driver can't dispatch next
+		 * requests and will requeue them. And this can improve
+		 * throughput too. For example, we have request flush1, write1,
+		 * flush 2. flush1 is dispatched, then queue is hold, write1
+		 * isn't inserted to queue. After flush1 is finished, flush2
+		 * will be dispatched. Since disk cache is already clean,
+		 * flush2 will be finished very soon, so looks like flush2 is
+		 * folded to flush1.
+		 * Since the queue is hold, a flag is set to indicate the queue
+		 * should be restarted later. Please see flush_end_io() for
+		 * details.
+		 */
+		if (q->flush_pending_idx != q->flush_running_idx &&
+				!queue_flush_queueable(q)) {
+			q->flush_queue_delayed = 1;
+			return NULL;
+		}
 		if (!q->elevator->ops->elevator_dispatch_fn(q, 0))
 			return NULL;
 	}
diff --git a/block/elevator.c b/block/elevator.c
index 45ca1e34f582..2a0b653c90fd 100644
--- a/block/elevator.c
+++ b/block/elevator.c
@@ -155,13 +155,8 @@ static struct elevator_type *elevator_get(const char *name)
 
 	e = elevator_find(name);
 	if (!e) {
-		char elv[ELV_NAME_MAX + strlen("-iosched")];
-
 		spin_unlock(&elv_list_lock);
-
-		snprintf(elv, sizeof(elv), "%s-iosched", name);
-
-		request_module("%s", elv);
+		request_module("%s-iosched", name);
 		spin_lock(&elv_list_lock);
 		e = elevator_find(name);
 	}
diff --git a/drivers/ata/libata-scsi.c b/drivers/ata/libata-scsi.c
index e2f57e9e12f0..dad2c952e0d2 100644
--- a/drivers/ata/libata-scsi.c
+++ b/drivers/ata/libata-scsi.c
@@ -1089,21 +1089,21 @@ static int atapi_drain_needed(struct request *rq)
 static int ata_scsi_dev_config(struct scsi_device *sdev,
 			       struct ata_device *dev)
 {
+	struct request_queue *q = sdev->request_queue;
+
 	if (!ata_id_has_unload(dev->id))
 		dev->flags |= ATA_DFLAG_NO_UNLOAD;
 
 	/* configure max sectors */
-	blk_queue_max_hw_sectors(sdev->request_queue, dev->max_sectors);
+	blk_queue_max_hw_sectors(q, dev->max_sectors);
 
 	if (dev->class == ATA_DEV_ATAPI) {
-		struct request_queue *q = sdev->request_queue;
 		void *buf;
 
 		sdev->sector_size = ATA_SECT_SIZE;
 
 		/* set DMA padding */
-		blk_queue_update_dma_pad(sdev->request_queue,
-					 ATA_DMA_PAD_SZ - 1);
+		blk_queue_update_dma_pad(q, ATA_DMA_PAD_SZ - 1);
 
 		/* configure draining */
 		buf = kmalloc(ATAPI_MAX_DRAIN, q->bounce_gfp | GFP_KERNEL);
@@ -1131,8 +1131,7 @@ static int ata_scsi_dev_config(struct scsi_device *sdev,
 			"sector_size=%u > PAGE_SIZE, PIO may malfunction\n",
 			sdev->sector_size);
 
-	blk_queue_update_dma_alignment(sdev->request_queue,
-				       sdev->sector_size - 1);
+	blk_queue_update_dma_alignment(q, sdev->sector_size - 1);
 
 	if (dev->flags & ATA_DFLAG_AN)
 		set_bit(SDEV_EVT_MEDIA_CHANGE, sdev->supported_events);
@@ -1145,6 +1144,8 @@ static int ata_scsi_dev_config(struct scsi_device *sdev,
 		scsi_adjust_queue_depth(sdev, MSG_SIMPLE_TAG, depth);
 	}
 
+	blk_queue_flush_queueable(q, false);
+
 	dev->sdev = sdev;
 	return 0;
 }
diff --git a/drivers/block/cciss.c b/drivers/block/cciss.c
index 9bf13988f1a2..8f4ef656a1af 100644
--- a/drivers/block/cciss.c
+++ b/drivers/block/cciss.c
@@ -64,6 +64,10 @@ MODULE_DESCRIPTION("Driver for HP Smart Array Controllers");
 MODULE_SUPPORTED_DEVICE("HP Smart Array Controllers");
 MODULE_VERSION("3.6.26");
 MODULE_LICENSE("GPL");
+static int cciss_tape_cmds = 6;
+module_param(cciss_tape_cmds, int, 0644);
+MODULE_PARM_DESC(cciss_tape_cmds,
+	"number of commands to allocate for tape devices (default: 6)");
 
 static DEFINE_MUTEX(cciss_mutex);
 static struct proc_dir_entry *proc_cciss;
@@ -194,6 +198,8 @@ static int __devinit cciss_find_cfg_addrs(struct pci_dev *pdev,
 static int __devinit cciss_pci_find_memory_BAR(struct pci_dev *pdev,
 	unsigned long *memory_bar);
 static inline u32 cciss_tag_discard_error_bits(ctlr_info_t *h, u32 tag);
+static __devinit int write_driver_ver_to_cfgtable(
+	CfgTable_struct __iomem *cfgtable);
 
 /* performant mode helper functions */
 static void  calc_bucket_map(int *bucket, int num_buckets, int nsgs,
@@ -556,7 +562,7 @@ static void __devinit cciss_procinit(ctlr_info_t *h)
 #define to_hba(n) container_of(n, struct ctlr_info, dev)
 #define to_drv(n) container_of(n, drive_info_struct, dev)
 
-/* List of controllers which cannot be reset on kexec with reset_devices */
+/* List of controllers which cannot be hard reset on kexec with reset_devices */
 static u32 unresettable_controller[] = {
 	0x324a103C, /* Smart Array P712m */
 	0x324b103C, /* SmartArray P711m */
@@ -574,23 +580,45 @@ static u32 unresettable_controller[] = {
 	0x409D0E11, /* Smart Array 6400 EM */
 };
 
-static int ctlr_is_resettable(struct ctlr_info *h)
+/* List of controllers which cannot even be soft reset */
+static u32 soft_unresettable_controller[] = {
+	0x409C0E11, /* Smart Array 6400 */
+	0x409D0E11, /* Smart Array 6400 EM */
+};
+
+static int ctlr_is_hard_resettable(u32 board_id)
 {
 	int i;
 
 	for (i = 0; i < ARRAY_SIZE(unresettable_controller); i++)
-		if (unresettable_controller[i] == h->board_id)
+		if (unresettable_controller[i] == board_id)
 			return 0;
 	return 1;
 }
 
+static int ctlr_is_soft_resettable(u32 board_id)
+{
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(soft_unresettable_controller); i++)
+		if (soft_unresettable_controller[i] == board_id)
+			return 0;
+	return 1;
+}
+
+static int ctlr_is_resettable(u32 board_id)
+{
+	return ctlr_is_hard_resettable(board_id) ||
+		ctlr_is_soft_resettable(board_id);
+}
+
 static ssize_t host_show_resettable(struct device *dev,
 				    struct device_attribute *attr,
 				    char *buf)
 {
 	struct ctlr_info *h = to_hba(dev);
 
-	return snprintf(buf, 20, "%d\n", ctlr_is_resettable(h));
+	return snprintf(buf, 20, "%d\n", ctlr_is_resettable(h->board_id));
 }
 static DEVICE_ATTR(resettable, S_IRUGO, host_show_resettable, NULL);
 
@@ -2567,7 +2595,7 @@ static int fill_cmd(ctlr_info_t *h, CommandList_struct *c, __u8 cmd, void *buff,
 		}
 	} else if (cmd_type == TYPE_MSG) {
 		switch (cmd) {
-		case 0:	/* ABORT message */
+		case CCISS_ABORT_MSG:
 			c->Request.CDBLen = 12;
 			c->Request.Type.Attribute = ATTR_SIMPLE;
 			c->Request.Type.Direction = XFER_WRITE;
@@ -2577,16 +2605,16 @@ static int fill_cmd(ctlr_info_t *h, CommandList_struct *c, __u8 cmd, void *buff,
 			/* buff contains the tag of the command to abort */
 			memcpy(&c->Request.CDB[4], buff, 8);
 			break;
-		case 1:	/* RESET message */
+		case CCISS_RESET_MSG:
 			c->Request.CDBLen = 16;
 			c->Request.Type.Attribute = ATTR_SIMPLE;
 			c->Request.Type.Direction = XFER_NONE;
 			c->Request.Timeout = 0;
 			memset(&c->Request.CDB[0], 0, sizeof(c->Request.CDB));
 			c->Request.CDB[0] = cmd;	/* reset */
-			c->Request.CDB[1] = 0x03;	/* reset a target */
+			c->Request.CDB[1] = CCISS_RESET_TYPE_TARGET;
 			break;
-		case 3:	/* No-Op message */
+		case CCISS_NOOP_MSG:
 			c->Request.CDBLen = 1;
 			c->Request.Type.Attribute = ATTR_SIMPLE;
 			c->Request.Type.Direction = XFER_WRITE;
@@ -2615,6 +2643,31 @@ static int fill_cmd(ctlr_info_t *h, CommandList_struct *c, __u8 cmd, void *buff,
 	return status;
 }
 
+static int __devinit cciss_send_reset(ctlr_info_t *h, unsigned char *scsi3addr,
+	u8 reset_type)
+{
+	CommandList_struct *c;
+	int return_status;
+
+	c = cmd_alloc(h);
+	if (!c)
+		return -ENOMEM;
+	return_status = fill_cmd(h, c, CCISS_RESET_MSG, NULL, 0, 0,
+		CTLR_LUNID, TYPE_MSG);
+	c->Request.CDB[1] = reset_type; /* fill_cmd defaults to target reset */
+	if (return_status != IO_OK) {
+		cmd_special_free(h, c);
+		return return_status;
+	}
+	c->waiting = NULL;
+	enqueue_cmd_and_start_io(h, c);
+	/* Don't wait for completion, the reset won't complete.  Don't free
+	 * the command either.  This is the last command we will send before
+	 * re-initializing everything, so it doesn't matter and won't leak.
+	 */
+	return 0;
+}
+
 static int check_target_status(ctlr_info_t *h, CommandList_struct *c)
 {
 	switch (c->err_info->ScsiStatus) {
@@ -3461,6 +3514,63 @@ static inline u32 process_nonindexed_cmd(ctlr_info_t *h, u32 raw_tag)
 	return next_command(h);
 }
 
+/* Some controllers, like p400, will give us one interrupt
+ * after a soft reset, even if we turned interrupts off.
+ * Only need to check for this in the cciss_xxx_discard_completions
+ * functions.
+ */
+static int ignore_bogus_interrupt(ctlr_info_t *h)
+{
+	if (likely(!reset_devices))
+		return 0;
+
+	if (likely(h->interrupts_enabled))
+		return 0;
+
+	dev_info(&h->pdev->dev, "Received interrupt while interrupts disabled "
+		"(known firmware bug.)  Ignoring.\n");
+
+	return 1;
+}
+
+static irqreturn_t cciss_intx_discard_completions(int irq, void *dev_id)
+{
+	ctlr_info_t *h = dev_id;
+	unsigned long flags;
+	u32 raw_tag;
+
+	if (ignore_bogus_interrupt(h))
+		return IRQ_NONE;
+
+	if (interrupt_not_for_us(h))
+		return IRQ_NONE;
+	spin_lock_irqsave(&h->lock, flags);
+	while (interrupt_pending(h)) {
+		raw_tag = get_next_completion(h);
+		while (raw_tag != FIFO_EMPTY)
+			raw_tag = next_command(h);
+	}
+	spin_unlock_irqrestore(&h->lock, flags);
+	return IRQ_HANDLED;
+}
+
+static irqreturn_t cciss_msix_discard_completions(int irq, void *dev_id)
+{
+	ctlr_info_t *h = dev_id;
+	unsigned long flags;
+	u32 raw_tag;
+
+	if (ignore_bogus_interrupt(h))
+		return IRQ_NONE;
+
+	spin_lock_irqsave(&h->lock, flags);
+	raw_tag = get_next_completion(h);
+	while (raw_tag != FIFO_EMPTY)
+		raw_tag = next_command(h);
+	spin_unlock_irqrestore(&h->lock, flags);
+	return IRQ_HANDLED;
+}
+
 static irqreturn_t do_cciss_intx(int irq, void *dev_id)
 {
 	ctlr_info_t *h = dev_id;
@@ -4078,6 +4188,9 @@ static int __devinit cciss_find_cfgtables(ctlr_info_t *h)
 		cfg_base_addr_index) + cfg_offset, sizeof(h->cfgtable));
 	if (!h->cfgtable)
 		return -ENOMEM;
+	rc = write_driver_ver_to_cfgtable(h->cfgtable);
+	if (rc)
+		return rc;
 	/* Find performant mode table. */
 	trans_offset = readl(&h->cfgtable->TransMethodOffset);
 	h->transtable = remap_pci_mem(pci_resource_start(h->pdev,
@@ -4112,7 +4225,7 @@ static void __devinit cciss_get_max_perf_mode_cmds(struct ctlr_info *h)
 static void __devinit cciss_find_board_params(ctlr_info_t *h)
 {
 	cciss_get_max_perf_mode_cmds(h);
-	h->nr_cmds = h->max_commands - 4; /* Allow room for some ioctls */
+	h->nr_cmds = h->max_commands - 4 - cciss_tape_cmds;
 	h->maxsgentries = readl(&(h->cfgtable->MaxSGElements));
 	/*
 	 * Limit in-command s/g elements to 32 save dma'able memory.
@@ -4348,7 +4461,7 @@ static __devinit int cciss_message(struct pci_dev *pdev, unsigned char opcode, u
 		tag = readl(vaddr + SA5_REPLY_PORT_OFFSET);
 		if ((tag & ~3) == paddr32)
 			break;
-		schedule_timeout_uninterruptible(HZ);
+		msleep(CCISS_POST_RESET_NOOP_TIMEOUT_MSECS);
 	}
 
 	iounmap(vaddr);
@@ -4375,11 +4488,10 @@ static __devinit int cciss_message(struct pci_dev *pdev, unsigned char opcode, u
 	return 0;
 }
 
-#define cciss_soft_reset_controller(p) cciss_message(p, 1, 0)
 #define cciss_noop(p) cciss_message(p, 3, 0)
 
 static int cciss_controller_hard_reset(struct pci_dev *pdev,
-	void * __iomem vaddr, bool use_doorbell)
+	void * __iomem vaddr, u32 use_doorbell)
 {
 	u16 pmcsr;
 	int pos;
@@ -4390,8 +4502,7 @@ static int cciss_controller_hard_reset(struct pci_dev *pdev,
 		 * other way using the doorbell register.
 		 */
 		dev_info(&pdev->dev, "using doorbell to reset controller\n");
-		writel(DOORBELL_CTLR_RESET, vaddr + SA5_DOORBELL);
-		msleep(1000);
+		writel(use_doorbell, vaddr + SA5_DOORBELL);
 	} else { /* Try to do it the PCI power state way */
 
 		/* Quoting from the Open CISS Specification: "The Power
@@ -4422,12 +4533,64 @@ static int cciss_controller_hard_reset(struct pci_dev *pdev,
 		pmcsr &= ~PCI_PM_CTRL_STATE_MASK;
 		pmcsr |= PCI_D0;
 		pci_write_config_word(pdev, pos + PCI_PM_CTRL, pmcsr);
-
-		msleep(500);
 	}
 	return 0;
 }
 
+static __devinit void init_driver_version(char *driver_version, int len)
+{
+	memset(driver_version, 0, len);
+	strncpy(driver_version, "cciss " DRIVER_NAME, len - 1);
+}
+
+static __devinit int write_driver_ver_to_cfgtable(
+	CfgTable_struct __iomem *cfgtable)
+{
+	char *driver_version;
+	int i, size = sizeof(cfgtable->driver_version);
+
+	driver_version = kmalloc(size, GFP_KERNEL);
+	if (!driver_version)
+		return -ENOMEM;
+
+	init_driver_version(driver_version, size);
+	for (i = 0; i < size; i++)
+		writeb(driver_version[i], &cfgtable->driver_version[i]);
+	kfree(driver_version);
+	return 0;
+}
+
+static __devinit void read_driver_ver_from_cfgtable(
+	CfgTable_struct __iomem *cfgtable, unsigned char *driver_ver)
+{
+	int i;
+
+	for (i = 0; i < sizeof(cfgtable->driver_version); i++)
+		driver_ver[i] = readb(&cfgtable->driver_version[i]);
+}
+
+static __devinit int controller_reset_failed(
+	CfgTable_struct __iomem *cfgtable)
+{
+
+	char *driver_ver, *old_driver_ver;
+	int rc, size = sizeof(cfgtable->driver_version);
+
+	old_driver_ver = kmalloc(2 * size, GFP_KERNEL);
+	if (!old_driver_ver)
+		return -ENOMEM;
+	driver_ver = old_driver_ver + size;
+
+	/* After a reset, the 32 bytes of "driver version" in the cfgtable
+	 * should have been changed, otherwise we know the reset failed.
+	 */
+	init_driver_version(old_driver_ver, size);
+	read_driver_ver_from_cfgtable(cfgtable, driver_ver);
+	rc = !memcmp(driver_ver, old_driver_ver, size);
+	kfree(old_driver_ver);
+	return rc;
+}
+
 /* This does a hard reset of the controller using PCI power management
  * states or using the doorbell register. */
 static __devinit int cciss_kdump_hard_reset_controller(struct pci_dev *pdev)
@@ -4437,10 +4600,10 @@ static __devinit int cciss_kdump_hard_reset_controller(struct pci_dev *pdev)
 	u64 cfg_base_addr_index;
 	void __iomem *vaddr;
 	unsigned long paddr;
-	u32 misc_fw_support, active_transport;
+	u32 misc_fw_support;
 	int rc;
 	CfgTable_struct __iomem *cfgtable;
-	bool use_doorbell;
+	u32 use_doorbell;
 	u32 board_id;
 	u16 command_register;
 
@@ -4464,12 +4627,16 @@ static __devinit int cciss_kdump_hard_reset_controller(struct pci_dev *pdev)
 	 * likely not be happy.  Just forbid resetting this conjoined mess.
 	 */
 	cciss_lookup_board_id(pdev, &board_id);
-	if (board_id == 0x409C0E11 || board_id == 0x409D0E11) {
+	if (!ctlr_is_resettable(board_id)) {
 		dev_warn(&pdev->dev, "Cannot reset Smart Array 640x "
 				"due to shared cache module.");
 		return -ENODEV;
 	}
 
+	/* if controller is soft- but not hard resettable... */
+	if (!ctlr_is_hard_resettable(board_id))
+		return -ENOTSUPP; /* try soft reset later. */
+
 	/* Save the PCI command register */
 	pci_read_config_word(pdev, 4, &command_register);
 	/* Turn the board off.  This is so that later pci_restore_state()
@@ -4497,16 +4664,28 @@ static __devinit int cciss_kdump_hard_reset_controller(struct pci_dev *pdev)
 		rc = -ENOMEM;
 		goto unmap_vaddr;
 	}
+	rc = write_driver_ver_to_cfgtable(cfgtable);
+	if (rc)
+		goto unmap_vaddr;
 
-	/* If reset via doorbell register is supported, use that. */
-	misc_fw_support = readl(&cfgtable->misc_fw_support);
-	use_doorbell = misc_fw_support & MISC_FW_DOORBELL_RESET;
-
-	/* The doorbell reset seems to cause lockups on some Smart
-	 * Arrays (e.g. P410, P410i, maybe others).  Until this is
-	 * fixed or at least isolated, avoid the doorbell reset.
+	/* If reset via doorbell register is supported, use that.
+	 * There are two such methods.  Favor the newest method.
 	 */
-	use_doorbell = 0;
+	misc_fw_support = readl(&cfgtable->misc_fw_support);
+	use_doorbell = misc_fw_support & MISC_FW_DOORBELL_RESET2;
+	if (use_doorbell) {
+		use_doorbell = DOORBELL_CTLR_RESET2;
+	} else {
+		use_doorbell = misc_fw_support & MISC_FW_DOORBELL_RESET;
+		if (use_doorbell) {
+			dev_warn(&pdev->dev, "Controller claims that "
+				"'Bit 2 doorbell reset' is "
+				"supported, but not 'bit 5 doorbell reset'.  "
+				"Firmware update is recommended.\n");
+			rc = -ENOTSUPP; /* use the soft reset */
+			goto unmap_cfgtable;
+		}
+	}
 
 	rc = cciss_controller_hard_reset(pdev, vaddr, use_doorbell);
 	if (rc)
@@ -4524,30 +4703,31 @@ static __devinit int cciss_kdump_hard_reset_controller(struct pci_dev *pdev)
 	msleep(CCISS_POST_RESET_PAUSE_MSECS);
 
 	/* Wait for board to become not ready, then ready. */
-	dev_info(&pdev->dev, "Waiting for board to become ready.\n");
+	dev_info(&pdev->dev, "Waiting for board to reset.\n");
 	rc = cciss_wait_for_board_state(pdev, vaddr, BOARD_NOT_READY);
-	if (rc) /* Don't bail, might be E500, etc. which can't be reset */
-		dev_warn(&pdev->dev,
-			"failed waiting for board to become not ready\n");
+	if (rc) {
+		dev_warn(&pdev->dev, "Failed waiting for board to hard reset."
+				"  Will try soft reset.\n");
+		rc = -ENOTSUPP; /* Not expected, but try soft reset later */
+		goto unmap_cfgtable;
+	}
 	rc = cciss_wait_for_board_state(pdev, vaddr, BOARD_READY);
 	if (rc) {
 		dev_warn(&pdev->dev,
-			"failed waiting for board to become ready\n");
+			"failed waiting for board to become ready "
+			"after hard reset\n");
 		goto unmap_cfgtable;
 	}
-	dev_info(&pdev->dev, "board ready.\n");
 
-	/* Controller should be in simple mode at this point.  If it's not,
-	 * It means we're on one of those controllers which doesn't support
-	 * the doorbell reset method and on which the PCI power management reset
-	 * method doesn't work (P800, for example.)
-	 * In those cases, don't try to proceed, as it generally doesn't work.
-	 */
-	active_transport = readl(&cfgtable->TransportActive);
-	if (active_transport & PERFORMANT_MODE) {
-		dev_warn(&pdev->dev, "Unable to successfully reset controller,"
-			" Ignoring controller.\n");
-		rc = -ENODEV;
+	rc = controller_reset_failed(vaddr);
+	if (rc < 0)
+		goto unmap_cfgtable;
+	if (rc) {
+		dev_warn(&pdev->dev, "Unable to successfully hard reset "
+			"controller. Will try soft reset.\n");
+		rc = -ENOTSUPP; /* Not expected, but try soft reset later */
+	} else {
+		dev_info(&pdev->dev, "Board ready after hard reset.\n");
 	}
 
 unmap_cfgtable:
@@ -4574,11 +4754,12 @@ static __devinit int cciss_init_reset_devices(struct pci_dev *pdev)
 	 * due to concerns about shared bbwc between 6402/6404 pair.
 	 */
 	if (rc == -ENOTSUPP)
-		return 0; /* just try to do the kdump anyhow. */
+		return rc; /* just try to do the kdump anyhow. */
 	if (rc)
 		return -ENODEV;
 
 	/* Now try to get the controller to respond to a no-op */
+	dev_warn(&pdev->dev, "Waiting for controller to respond to no-op\n");
 	for (i = 0; i < CCISS_POST_RESET_NOOP_RETRIES; i++) {
 		if (cciss_noop(pdev) == 0)
 			break;
@@ -4591,6 +4772,148 @@ static __devinit int cciss_init_reset_devices(struct pci_dev *pdev)
 	return 0;
 }
 
+static __devinit int cciss_allocate_cmd_pool(ctlr_info_t *h)
+{
+	h->cmd_pool_bits = kmalloc(
+		DIV_ROUND_UP(h->nr_cmds, BITS_PER_LONG) *
+		sizeof(unsigned long), GFP_KERNEL);
+	h->cmd_pool = pci_alloc_consistent(h->pdev,
+		h->nr_cmds * sizeof(CommandList_struct),
+		&(h->cmd_pool_dhandle));
+	h->errinfo_pool = pci_alloc_consistent(h->pdev,
+		h->nr_cmds * sizeof(ErrorInfo_struct),
+		&(h->errinfo_pool_dhandle));
+	if ((h->cmd_pool_bits == NULL)
+		|| (h->cmd_pool == NULL)
+		|| (h->errinfo_pool == NULL)) {
+		dev_err(&h->pdev->dev, "out of memory");
+		return -ENOMEM;
+	}
+	return 0;
+}
+
+static __devinit int cciss_allocate_scatterlists(ctlr_info_t *h)
+{
+	int i;
+
+	/* zero it, so that on free we need not know how many were alloc'ed */
+	h->scatter_list = kzalloc(h->max_commands *
+				sizeof(struct scatterlist *), GFP_KERNEL);
+	if (!h->scatter_list)
+		return -ENOMEM;
+
+	for (i = 0; i < h->nr_cmds; i++) {
+		h->scatter_list[i] = kmalloc(sizeof(struct scatterlist) *
+						h->maxsgentries, GFP_KERNEL);
+		if (h->scatter_list[i] == NULL) {
+			dev_err(&h->pdev->dev, "could not allocate "
+				"s/g lists\n");
+			return -ENOMEM;
+		}
+	}
+	return 0;
+}
+
+static void cciss_free_scatterlists(ctlr_info_t *h)
+{
+	int i;
+
+	if (h->scatter_list) {
+		for (i = 0; i < h->nr_cmds; i++)
+			kfree(h->scatter_list[i]);
+		kfree(h->scatter_list);
+	}
+}
+
+static void cciss_free_cmd_pool(ctlr_info_t *h)
+{
+	kfree(h->cmd_pool_bits);
+	if (h->cmd_pool)
+		pci_free_consistent(h->pdev,
+			h->nr_cmds * sizeof(CommandList_struct),
+			h->cmd_pool, h->cmd_pool_dhandle);
+	if (h->errinfo_pool)
+		pci_free_consistent(h->pdev,
+			h->nr_cmds * sizeof(ErrorInfo_struct),
+			h->errinfo_pool, h->errinfo_pool_dhandle);
+}
+
+static int cciss_request_irq(ctlr_info_t *h,
+	irqreturn_t (*msixhandler)(int, void *),
+	irqreturn_t (*intxhandler)(int, void *))
+{
+	if (h->msix_vector || h->msi_vector) {
+		if (!request_irq(h->intr[PERF_MODE_INT], msixhandler,
+				IRQF_DISABLED, h->devname, h))
+			return 0;
+		dev_err(&h->pdev->dev, "Unable to get msi irq %d"
+			" for %s\n", h->intr[PERF_MODE_INT],
+			h->devname);
+		return -1;
+	}
+
+	if (!request_irq(h->intr[PERF_MODE_INT], intxhandler,
+			IRQF_DISABLED, h->devname, h))
+		return 0;
+	dev_err(&h->pdev->dev, "Unable to get irq %d for %s\n",
+		h->intr[PERF_MODE_INT], h->devname);
+	return -1;
+}
+
+static int __devinit cciss_kdump_soft_reset(ctlr_info_t *h)
+{
+	if (cciss_send_reset(h, CTLR_LUNID, CCISS_RESET_TYPE_CONTROLLER)) {
+		dev_warn(&h->pdev->dev, "Resetting array controller failed.\n");
+		return -EIO;
+	}
+
+	dev_info(&h->pdev->dev, "Waiting for board to soft reset.\n");
+	if (cciss_wait_for_board_state(h->pdev, h->vaddr, BOARD_NOT_READY)) {
+		dev_warn(&h->pdev->dev, "Soft reset had no effect.\n");
+		return -1;
+	}
+
+	dev_info(&h->pdev->dev, "Board reset, awaiting READY status.\n");
+	if (cciss_wait_for_board_state(h->pdev, h->vaddr, BOARD_READY)) {
+		dev_warn(&h->pdev->dev, "Board failed to become ready "
+			"after soft reset.\n");
+		return -1;
+	}
+
+	return 0;
+}
+
+static void cciss_undo_allocations_after_kdump_soft_reset(ctlr_info_t *h)
+{
+	int ctlr = h->ctlr;
+
+	free_irq(h->intr[PERF_MODE_INT], h);
+#ifdef CONFIG_PCI_MSI
+	if (h->msix_vector)
+		pci_disable_msix(h->pdev);
+	else if (h->msi_vector)
+		pci_disable_msi(h->pdev);
+#endif /* CONFIG_PCI_MSI */
+	cciss_free_sg_chain_blocks(h->cmd_sg_list, h->nr_cmds);
+	cciss_free_scatterlists(h);
+	cciss_free_cmd_pool(h);
+	kfree(h->blockFetchTable);
+	if (h->reply_pool)
+		pci_free_consistent(h->pdev, h->max_commands * sizeof(__u64),
+				h->reply_pool, h->reply_pool_dhandle);
+	if (h->transtable)
+		iounmap(h->transtable);
+	if (h->cfgtable)
+		iounmap(h->cfgtable);
+	if (h->vaddr)
+		iounmap(h->vaddr);
+	unregister_blkdev(h->major, h->devname);
+	cciss_destroy_hba_sysfs_entry(h);
+	pci_release_regions(h->pdev);
+	kfree(h);
+	hba[ctlr] = NULL;
+}
+
 /*
  *  This is it.  Find all the controllers and register them.  I really hate
  *  stealing all these major device numbers.
@@ -4601,15 +4924,28 @@ static int __devinit cciss_init_one(struct pci_dev *pdev,
 {
 	int i;
 	int j = 0;
-	int k = 0;
 	int rc;
+	int try_soft_reset = 0;
 	int dac, return_code;
 	InquiryData_struct *inq_buff;
 	ctlr_info_t *h;
+	unsigned long flags;
 
 	rc = cciss_init_reset_devices(pdev);
-	if (rc)
-		return rc;
+	if (rc) {
+		if (rc != -ENOTSUPP)
+			return rc;
+		/* If the reset fails in a particular way (it has no way to do
+		 * a proper hard reset, so returns -ENOTSUPP) we can try to do
+		 * a soft reset once we get the controller configured up to the
+		 * point that it can accept a command.
+		 */
+		try_soft_reset = 1;
+		rc = 0;
+	}
+
+reinit_after_soft_reset:
+
 	i = alloc_cciss_hba(pdev);
 	if (i < 0)
 		return -1;
@@ -4627,6 +4963,11 @@ static int __devinit cciss_init_one(struct pci_dev *pdev,
 	sprintf(h->devname, "cciss%d", i);
 	h->ctlr = i;
 
+	if (cciss_tape_cmds < 2)
+		cciss_tape_cmds = 2;
+	if (cciss_tape_cmds > 16)
+		cciss_tape_cmds = 16;
+
 	init_completion(&h->scan_wait);
 
 	if (cciss_create_hba_sysfs_entry(h))
@@ -4662,62 +5003,20 @@ static int __devinit cciss_init_one(struct pci_dev *pdev,
 
 	/* make sure the board interrupts are off */
 	h->access.set_intr_mask(h, CCISS_INTR_OFF);
-	if (h->msi_vector || h->msix_vector) {
-		if (request_irq(h->intr[PERF_MODE_INT],
-				do_cciss_msix_intr,
-				IRQF_DISABLED, h->devname, h)) {
-			dev_err(&h->pdev->dev, "Unable to get irq %d for %s\n",
-			       h->intr[PERF_MODE_INT], h->devname);
-			goto clean2;
-		}
-	} else {
-		if (request_irq(h->intr[PERF_MODE_INT], do_cciss_intx,
-				IRQF_DISABLED, h->devname, h)) {
-			dev_err(&h->pdev->dev, "Unable to get irq %d for %s\n",
-			       h->intr[PERF_MODE_INT], h->devname);
-			goto clean2;
-		}
-	}
+	rc = cciss_request_irq(h, do_cciss_msix_intr, do_cciss_intx);
+	if (rc)
+		goto clean2;
 
 	dev_info(&h->pdev->dev, "%s: <0x%x> at PCI %s IRQ %d%s using DAC\n",
 	       h->devname, pdev->device, pci_name(pdev),
 	       h->intr[PERF_MODE_INT], dac ? "" : " not");
 
-	h->cmd_pool_bits =
-	    kmalloc(DIV_ROUND_UP(h->nr_cmds, BITS_PER_LONG)
-			* sizeof(unsigned long), GFP_KERNEL);
-	h->cmd_pool = (CommandList_struct *)
-	    pci_alloc_consistent(h->pdev,
-		    h->nr_cmds * sizeof(CommandList_struct),
-		    &(h->cmd_pool_dhandle));
-	h->errinfo_pool = (ErrorInfo_struct *)
-	    pci_alloc_consistent(h->pdev,
-		    h->nr_cmds * sizeof(ErrorInfo_struct),
-		    &(h->errinfo_pool_dhandle));
-	if ((h->cmd_pool_bits == NULL)
-	    || (h->cmd_pool == NULL)
-	    || (h->errinfo_pool == NULL)) {
-		dev_err(&h->pdev->dev, "out of memory");
+	if (cciss_allocate_cmd_pool(h))
 		goto clean4;
-	}
 
-	/* Need space for temp scatter list */
-	h->scatter_list = kmalloc(h->max_commands *
-						sizeof(struct scatterlist *),
-						GFP_KERNEL);
-	if (!h->scatter_list)
+	if (cciss_allocate_scatterlists(h))
 		goto clean4;
 
-	for (k = 0; k < h->nr_cmds; k++) {
-		h->scatter_list[k] = kmalloc(sizeof(struct scatterlist) *
-							h->maxsgentries,
-							GFP_KERNEL);
-		if (h->scatter_list[k] == NULL) {
-			dev_err(&h->pdev->dev,
-				"could not allocate s/g lists\n");
-			goto clean4;
-		}
-	}
 	h->cmd_sg_list = cciss_allocate_sg_chain_blocks(h,
 		h->chainsize, h->nr_cmds);
 	if (!h->cmd_sg_list && h->chainsize > 0)
@@ -4741,6 +5040,62 @@ static int __devinit cciss_init_one(struct pci_dev *pdev,
 		h->gendisk[j] = NULL;
 	}
 
+	/* At this point, the controller is ready to take commands.
+	 * Now, if reset_devices and the hard reset didn't work, try
+	 * the soft reset and see if that works.
+	 */
+	if (try_soft_reset) {
+
+		/* This is kind of gross.  We may or may not get a completion
+		 * from the soft reset command, and if we do, then the value
+		 * from the fifo may or may not be valid.  So, we wait 10 secs
+		 * after the reset throwing away any completions we get during
+		 * that time.  Unregister the interrupt handler and register
+		 * fake ones to scoop up any residual completions.
+		 */
+		spin_lock_irqsave(&h->lock, flags);
+		h->access.set_intr_mask(h, CCISS_INTR_OFF);
+		spin_unlock_irqrestore(&h->lock, flags);
+		free_irq(h->intr[PERF_MODE_INT], h);
+		rc = cciss_request_irq(h, cciss_msix_discard_completions,
+					cciss_intx_discard_completions);
+		if (rc) {
+			dev_warn(&h->pdev->dev, "Failed to request_irq after "
+				"soft reset.\n");
+			goto clean4;
+		}
+
+		rc = cciss_kdump_soft_reset(h);
+		if (rc) {
+			dev_warn(&h->pdev->dev, "Soft reset failed.\n");
+			goto clean4;
+		}
+
+		dev_info(&h->pdev->dev, "Board READY.\n");
+		dev_info(&h->pdev->dev,
+			"Waiting for stale completions to drain.\n");
+		h->access.set_intr_mask(h, CCISS_INTR_ON);
+		msleep(10000);
+		h->access.set_intr_mask(h, CCISS_INTR_OFF);
+
+		rc = controller_reset_failed(h->cfgtable);
+		if (rc)
+			dev_info(&h->pdev->dev,
+				"Soft reset appears to have failed.\n");
+
+		/* since the controller's reset, we have to go back and re-init
+		 * everything.  Easiest to just forget what we've done and do it
+		 * all over again.
+		 */
+		cciss_undo_allocations_after_kdump_soft_reset(h);
+		try_soft_reset = 0;
+		if (rc)
+			/* don't go to clean4, we already unallocated */
+			return -ENODEV;
+
+		goto reinit_after_soft_reset;
+	}
+
 	cciss_scsi_setup(h);
 
 	/* Turn the interrupts on so we can service requests */
@@ -4775,21 +5130,9 @@ static int __devinit cciss_init_one(struct pci_dev *pdev,
 	return 1;
 
 clean4:
-	kfree(h->cmd_pool_bits);
-	/* Free up sg elements */
-	for (k-- ; k >= 0; k--)
-		kfree(h->scatter_list[k]);
-	kfree(h->scatter_list);
+	cciss_free_cmd_pool(h);
+	cciss_free_scatterlists(h);
 	cciss_free_sg_chain_blocks(h->cmd_sg_list, h->nr_cmds);
-	if (h->cmd_pool)
-		pci_free_consistent(h->pdev,
-				    h->nr_cmds * sizeof(CommandList_struct),
-				    h->cmd_pool, h->cmd_pool_dhandle);
-	if (h->errinfo_pool)
-		pci_free_consistent(h->pdev,
-				    h->nr_cmds * sizeof(ErrorInfo_struct),
-				    h->errinfo_pool,
-				    h->errinfo_pool_dhandle);
 	free_irq(h->intr[PERF_MODE_INT], h);
 clean2:
 	unregister_blkdev(h->major, h->devname);
@@ -4887,16 +5230,16 @@ static void __devexit cciss_remove_one(struct pci_dev *pdev)
 	iounmap(h->cfgtable);
 	iounmap(h->vaddr);
 
-	pci_free_consistent(h->pdev, h->nr_cmds * sizeof(CommandList_struct),
-			    h->cmd_pool, h->cmd_pool_dhandle);
-	pci_free_consistent(h->pdev, h->nr_cmds * sizeof(ErrorInfo_struct),
-			    h->errinfo_pool, h->errinfo_pool_dhandle);
-	kfree(h->cmd_pool_bits);
+	cciss_free_cmd_pool(h);
 	/* Free up sg elements */
 	for (j = 0; j < h->nr_cmds; j++)
 		kfree(h->scatter_list[j]);
 	kfree(h->scatter_list);
 	cciss_free_sg_chain_blocks(h->cmd_sg_list, h->nr_cmds);
+	kfree(h->blockFetchTable);
+	if (h->reply_pool)
+		pci_free_consistent(h->pdev, h->max_commands * sizeof(__u64),
+				h->reply_pool, h->reply_pool_dhandle);
 	/*
 	 * Deliberately omit pci_disable_device(): it does something nasty to
 	 * Smart Array controllers that pci_enable_device does not undo
diff --git a/drivers/block/cciss.h b/drivers/block/cciss.h
index 554bbd907d14..16b4d58d84dd 100644
--- a/drivers/block/cciss.h
+++ b/drivers/block/cciss.h
@@ -200,7 +200,7 @@ struct ctlr_info
  * the above.
  */
 #define CCISS_BOARD_READY_WAIT_SECS (120)
-#define CCISS_BOARD_NOT_READY_WAIT_SECS (10)
+#define CCISS_BOARD_NOT_READY_WAIT_SECS (100)
 #define CCISS_BOARD_READY_POLL_INTERVAL_MSECS (100)
 #define CCISS_BOARD_READY_ITERATIONS \
 	((CCISS_BOARD_READY_WAIT_SECS * 1000) / \
@@ -209,8 +209,9 @@ struct ctlr_info
 	((CCISS_BOARD_NOT_READY_WAIT_SECS * 1000) / \
 		CCISS_BOARD_READY_POLL_INTERVAL_MSECS)
 #define CCISS_POST_RESET_PAUSE_MSECS (3000)
-#define CCISS_POST_RESET_NOOP_INTERVAL_MSECS (1000)
+#define CCISS_POST_RESET_NOOP_INTERVAL_MSECS (4000)
 #define CCISS_POST_RESET_NOOP_RETRIES (12)
+#define CCISS_POST_RESET_NOOP_TIMEOUT_MSECS (10000)
 
 /* 
 	Send the command to the hardware 
@@ -239,11 +240,13 @@ static void SA5_intr_mask(ctlr_info_t *h, unsigned long val)
 	{ /* Turn interrupts on */
 		h->interrupts_enabled = 1;
 		writel(0, h->vaddr + SA5_REPLY_INTR_MASK_OFFSET);
+		(void) readl(h->vaddr + SA5_REPLY_INTR_MASK_OFFSET);
 	} else /* Turn them off */
 	{
 		h->interrupts_enabled = 0;
         	writel( SA5_INTR_OFF, 
 			h->vaddr + SA5_REPLY_INTR_MASK_OFFSET);
+		(void) readl(h->vaddr + SA5_REPLY_INTR_MASK_OFFSET);
 	}
 }
 /*
@@ -257,11 +260,13 @@ static void SA5B_intr_mask(ctlr_info_t *h, unsigned long val)
         { /* Turn interrupts on */
 		h->interrupts_enabled = 1;
                 writel(0, h->vaddr + SA5_REPLY_INTR_MASK_OFFSET);
+		(void) readl(h->vaddr + SA5_REPLY_INTR_MASK_OFFSET);
         } else /* Turn them off */
         {
 		h->interrupts_enabled = 0;
                 writel( SA5B_INTR_OFF,
                         h->vaddr + SA5_REPLY_INTR_MASK_OFFSET);
+		(void) readl(h->vaddr + SA5_REPLY_INTR_MASK_OFFSET);
         }
 }
 
@@ -271,10 +276,12 @@ static void SA5_performant_intr_mask(ctlr_info_t *h, unsigned long val)
 	if (val) { /* turn on interrupts */
 		h->interrupts_enabled = 1;
 		writel(0, h->vaddr + SA5_REPLY_INTR_MASK_OFFSET);
+		(void) readl(h->vaddr + SA5_REPLY_INTR_MASK_OFFSET);
 	} else {
 		h->interrupts_enabled = 0;
 		writel(SA5_PERF_INTR_OFF,
 				h->vaddr + SA5_REPLY_INTR_MASK_OFFSET);
+		(void) readl(h->vaddr + SA5_REPLY_INTR_MASK_OFFSET);
 	}
 }
 
diff --git a/drivers/block/cciss_cmd.h b/drivers/block/cciss_cmd.h
index cd441bef031f..d9be6b4d49a6 100644
--- a/drivers/block/cciss_cmd.h
+++ b/drivers/block/cciss_cmd.h
@@ -53,6 +53,7 @@
 #define CFGTBL_ChangeReq        0x00000001l
 #define CFGTBL_AccCmds          0x00000001l
 #define DOORBELL_CTLR_RESET     0x00000004l
+#define DOORBELL_CTLR_RESET2    0x00000020l
 
 #define CFGTBL_Trans_Simple     0x00000002l
 #define CFGTBL_Trans_Performant 0x00000004l
@@ -142,6 +143,14 @@ typedef struct _ReadCapdata_struct_16
 #define BMIC_CACHE_FLUSH 0xc2
 #define CCISS_CACHE_FLUSH 0x01	/* C2 was already being used by CCISS */
 
+#define CCISS_ABORT_MSG 0x00
+#define CCISS_RESET_MSG 0x01
+#define CCISS_RESET_TYPE_CONTROLLER 0x00
+#define CCISS_RESET_TYPE_BUS 0x01
+#define CCISS_RESET_TYPE_TARGET 0x03
+#define CCISS_RESET_TYPE_LUN 0x04
+#define CCISS_NOOP_MSG 0x03
+
 /* Command List Structure */
 #define CTLR_LUNID "\0\0\0\0\0\0\0\0"
 
@@ -235,6 +244,8 @@ typedef struct _CfgTable_struct {
   u8		   reserved[0x78 - 0x58];
   u32		   misc_fw_support; /* offset 0x78 */
 #define MISC_FW_DOORBELL_RESET (0x02)
+#define MISC_FW_DOORBELL_RESET2 (0x10)
+	u8	   driver_version[32];
 } CfgTable_struct;
 
 struct TransTable_struct {
diff --git a/drivers/block/cciss_scsi.c b/drivers/block/cciss_scsi.c
index df793803f5ae..696100241a6f 100644
--- a/drivers/block/cciss_scsi.c
+++ b/drivers/block/cciss_scsi.c
@@ -84,7 +84,6 @@ static struct scsi_host_template cciss_driver_template = {
 	.proc_name		= "cciss",
 	.proc_info		= cciss_scsi_proc_info,
 	.queuecommand		= cciss_scsi_queue_command,
-	.can_queue		= SCSI_CCISS_CAN_QUEUE,
 	.this_id		= 7,
 	.cmd_per_lun		= 1,
 	.use_clustering		= DISABLE_CLUSTERING,
@@ -108,16 +107,13 @@ struct cciss_scsi_cmd_stack_elem_t {
 
 #pragma pack()
 
-#define CMD_STACK_SIZE (SCSI_CCISS_CAN_QUEUE * \
-		CCISS_MAX_SCSI_DEVS_PER_HBA + 2)
-			// plus two for init time usage
-
 #pragma pack(1)
 struct cciss_scsi_cmd_stack_t {
 	struct cciss_scsi_cmd_stack_elem_t *pool;
-	struct cciss_scsi_cmd_stack_elem_t *elem[CMD_STACK_SIZE];
+	struct cciss_scsi_cmd_stack_elem_t **elem;
 	dma_addr_t cmd_pool_handle;
 	int top;
+	int nelems;
 };
 #pragma pack()
 
@@ -191,7 +187,7 @@ scsi_cmd_free(ctlr_info_t *h, CommandList_struct *c)
 	sa = h->scsi_ctlr;
 	stk = &sa->cmd_stack; 
 	stk->top++;
-	if (stk->top >= CMD_STACK_SIZE) {
+	if (stk->top >= stk->nelems) {
 		dev_err(&h->pdev->dev,
 			"scsi_cmd_free called too many times.\n");
 		BUG();
@@ -206,13 +202,14 @@ scsi_cmd_stack_setup(ctlr_info_t *h, struct cciss_scsi_adapter_data_t *sa)
 	struct cciss_scsi_cmd_stack_t *stk;
 	size_t size;
 
+	stk = &sa->cmd_stack;
+	stk->nelems = cciss_tape_cmds + 2;
 	sa->cmd_sg_list = cciss_allocate_sg_chain_blocks(h,
-		h->chainsize, CMD_STACK_SIZE);
+		h->chainsize, stk->nelems);
 	if (!sa->cmd_sg_list && h->chainsize > 0)
 		return -ENOMEM;
 
-	stk = &sa->cmd_stack; 
-	size = sizeof(struct cciss_scsi_cmd_stack_elem_t) * CMD_STACK_SIZE;
+	size = sizeof(struct cciss_scsi_cmd_stack_elem_t) * stk->nelems;
 
 	/* Check alignment, see cciss_cmd.h near CommandList_struct def. */
 	BUILD_BUG_ON((sizeof(*stk->pool) % COMMANDLIST_ALIGNMENT) != 0);
@@ -221,18 +218,23 @@ scsi_cmd_stack_setup(ctlr_info_t *h, struct cciss_scsi_adapter_data_t *sa)
 		pci_alloc_consistent(h->pdev, size, &stk->cmd_pool_handle);
 
 	if (stk->pool == NULL) {
-		cciss_free_sg_chain_blocks(sa->cmd_sg_list, CMD_STACK_SIZE);
+		cciss_free_sg_chain_blocks(sa->cmd_sg_list, stk->nelems);
 		sa->cmd_sg_list = NULL;
 		return -ENOMEM;
 	}
-
-	for (i=0; i<CMD_STACK_SIZE; i++) {
+	stk->elem = kmalloc(sizeof(stk->elem[0]) * stk->nelems, GFP_KERNEL);
+	if (!stk->elem) {
+		pci_free_consistent(h->pdev, size, stk->pool,
+		stk->cmd_pool_handle);
+		return -1;
+	}
+	for (i = 0; i < stk->nelems; i++) {
 		stk->elem[i] = &stk->pool[i];
 		stk->elem[i]->busaddr = (__u32) (stk->cmd_pool_handle + 
 			(sizeof(struct cciss_scsi_cmd_stack_elem_t) * i));
 		stk->elem[i]->cmdindex = i;
 	}
-	stk->top = CMD_STACK_SIZE-1;
+	stk->top = stk->nelems-1;
 	return 0;
 }
 
@@ -245,16 +247,18 @@ scsi_cmd_stack_free(ctlr_info_t *h)
 
 	sa = h->scsi_ctlr;
 	stk = &sa->cmd_stack; 
-	if (stk->top != CMD_STACK_SIZE-1) {
+	if (stk->top != stk->nelems-1) {
 		dev_warn(&h->pdev->dev,
 			"bug: %d scsi commands are still outstanding.\n",
-			CMD_STACK_SIZE - stk->top);
+			stk->nelems - stk->top);
 	}
-	size = sizeof(struct cciss_scsi_cmd_stack_elem_t) * CMD_STACK_SIZE;
+	size = sizeof(struct cciss_scsi_cmd_stack_elem_t) * stk->nelems;
 
 	pci_free_consistent(h->pdev, size, stk->pool, stk->cmd_pool_handle);
 	stk->pool = NULL;
-	cciss_free_sg_chain_blocks(sa->cmd_sg_list, CMD_STACK_SIZE);
+	cciss_free_sg_chain_blocks(sa->cmd_sg_list, stk->nelems);
+	kfree(stk->elem);
+	stk->elem = NULL;
 }
 
 #if 0
@@ -859,6 +863,7 @@ cciss_scsi_detect(ctlr_info_t *h)
 	sh->io_port = 0;	// good enough?  FIXME, 
 	sh->n_io_port = 0;	// I don't think we use these two...
 	sh->this_id = SELF_SCSI_ID;  
+	sh->can_queue = cciss_tape_cmds;
 	sh->sg_tablesize = h->maxsgentries;
 	sh->max_cmd_len = MAX_COMMAND_SIZE;
 
diff --git a/drivers/block/cciss_scsi.h b/drivers/block/cciss_scsi.h
index 6d5822fe851a..e71d986727ca 100644
--- a/drivers/block/cciss_scsi.h
+++ b/drivers/block/cciss_scsi.h
@@ -36,13 +36,9 @@
 		   addressible natively, and may in fact turn
 		   out to be not scsi at all. */
 
-#define SCSI_CCISS_CAN_QUEUE 2
 
 /* 
 
-Note, cmd_per_lun could give us some trouble, so I'm setting it very low.
-Likewise, SCSI_CCISS_CAN_QUEUE is set very conservatively.
-
 If the upper scsi layer tries to track how many commands we have 
 outstanding, it will be operating under the misapprehension that it is
 the only one sending us requests.  We also have the block interface,
diff --git a/drivers/block/paride/pcd.c b/drivers/block/paride/pcd.c
index 2f2ccf686251..a0aabd904a51 100644
--- a/drivers/block/paride/pcd.c
+++ b/drivers/block/paride/pcd.c
@@ -320,6 +320,7 @@ static void pcd_init_units(void)
 		disk->first_minor = unit;
 		strcpy(disk->disk_name, cd->name);	/* umm... */
 		disk->fops = &pcd_bdops;
+		disk->flags = GENHD_FL_BLOCK_EVENTS_ON_EXCL_WRITE;
 		disk->events = DISK_EVENT_MEDIA_CHANGE;
 	}
 }
diff --git a/drivers/cdrom/cdrom.c b/drivers/cdrom/cdrom.c
index 514dd8efaf73..75fb965b8f72 100644
--- a/drivers/cdrom/cdrom.c
+++ b/drivers/cdrom/cdrom.c
@@ -986,6 +986,9 @@ int cdrom_open(struct cdrom_device_info *cdi, struct block_device *bdev, fmode_t
 
 	cdinfo(CD_OPEN, "entering cdrom_open\n"); 
 
+	/* open is event synchronization point, check events first */
+	check_disk_change(bdev);
+
 	/* if this was a O_NONBLOCK open and we should honor the flags,
 	 * do a quick open without drive/disc integrity checks. */
 	cdi->use_count++;
@@ -1012,9 +1015,6 @@ int cdrom_open(struct cdrom_device_info *cdi, struct block_device *bdev, fmode_t
 
 	cdinfo(CD_OPEN, "Use count for \"/dev/%s\" now %d\n",
 			cdi->name, cdi->use_count);
-	/* Do this on open.  Don't wait for mount, because they might
-	    not be mounting, but opening with O_NONBLOCK */
-	check_disk_change(bdev);
 	return 0;
 err_release:
 	if (CDROM_CAN(CDC_LOCK) && cdi->options & CDO_LOCK) {
diff --git a/drivers/cdrom/viocd.c b/drivers/cdrom/viocd.c
index 4e874c5fa605..ae15a4ddaa9b 100644
--- a/drivers/cdrom/viocd.c
+++ b/drivers/cdrom/viocd.c
@@ -625,7 +625,8 @@ static int viocd_probe(struct vio_dev *vdev, const struct vio_device_id *id)
 	blk_queue_max_hw_sectors(q, 4096 / 512);
 	gendisk->queue = q;
 	gendisk->fops = &viocd_fops;
-	gendisk->flags = GENHD_FL_CD|GENHD_FL_REMOVABLE;
+	gendisk->flags = GENHD_FL_CD | GENHD_FL_REMOVABLE |
+			 GENHD_FL_BLOCK_EVENTS_ON_EXCL_WRITE;
 	gendisk->events = DISK_EVENT_MEDIA_CHANGE;
 	set_capacity(gendisk, 0);
 	gendisk->private_data = d;
diff --git a/drivers/ide/ide-cd.c b/drivers/ide/ide-cd.c
index a5ec5a7cb381..6e5123b1d341 100644
--- a/drivers/ide/ide-cd.c
+++ b/drivers/ide/ide-cd.c
@@ -1781,7 +1781,8 @@ static int ide_cd_probe(ide_drive_t *drive)
 
 	ide_cd_read_toc(drive, &sense);
 	g->fops = &idecd_ops;
-	g->flags |= GENHD_FL_REMOVABLE;
+	g->flags |= GENHD_FL_REMOVABLE | GENHD_FL_BLOCK_EVENTS_ON_EXCL_WRITE;
+	g->events = DISK_EVENT_MEDIA_CHANGE;
 	add_disk(g);
 	return 0;
 
diff --git a/drivers/scsi/sr.c b/drivers/scsi/sr.c
index 95019c747cc1..4778e2707168 100644
--- a/drivers/scsi/sr.c
+++ b/drivers/scsi/sr.c
@@ -636,7 +636,7 @@ static int sr_probe(struct device *dev)
 	disk->first_minor = minor;
 	sprintf(disk->disk_name, "sr%d", minor);
 	disk->fops = &sr_bdops;
-	disk->flags = GENHD_FL_CD;
+	disk->flags = GENHD_FL_CD | GENHD_FL_BLOCK_EVENTS_ON_EXCL_WRITE;
 	disk->events = DISK_EVENT_MEDIA_CHANGE | DISK_EVENT_EJECT_REQUEST;
 
 	blk_queue_rq_timeout(sdev->request_queue, SR_TIMEOUT);
diff --git a/fs/block_dev.c b/fs/block_dev.c
index 5147bdd3b8e1..d7c2e0fddc6f 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -1102,6 +1102,7 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part)
 			if (!bdev->bd_part)
 				goto out_clear;
 
+			ret = 0;
 			if (disk->fops->open) {
 				ret = disk->fops->open(bdev, mode);
 				if (ret == -ERESTARTSYS) {
@@ -1118,9 +1119,18 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part)
 					put_disk(disk);
 					goto restart;
 				}
-				if (ret)
-					goto out_clear;
 			}
+			/*
+			 * If the device is invalidated, rescan partition
+			 * if open succeeded or failed with -ENOMEDIUM.
+			 * The latter is necessary to prevent ghost
+			 * partitions on a removed medium.
+			 */
+			if (bdev->bd_invalidated && (!ret || ret == -ENOMEDIUM))
+				rescan_partitions(disk, bdev);
+			if (ret)
+				goto out_clear;
+
 			if (!bdev->bd_openers) {
 				bd_set_size(bdev,(loff_t)get_capacity(disk)<<9);
 				bdi = blk_get_backing_dev_info(bdev);
@@ -1128,8 +1138,6 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part)
 					bdi = &default_backing_dev_info;
 				bdev_inode_switch_bdi(bdev->bd_inode, bdi);
 			}
-			if (bdev->bd_invalidated)
-				rescan_partitions(disk, bdev);
 		} else {
 			struct block_device *whole;
 			whole = bdget_disk(disk, 0);
@@ -1153,13 +1161,14 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part)
 		}
 	} else {
 		if (bdev->bd_contains == bdev) {
-			if (bdev->bd_disk->fops->open) {
+			ret = 0;
+			if (bdev->bd_disk->fops->open)
 				ret = bdev->bd_disk->fops->open(bdev, mode);
-				if (ret)
-					goto out_unlock_bdev;
-			}
-			if (bdev->bd_invalidated)
+			/* the same as first opener case, read comment there */
+			if (bdev->bd_invalidated && (!ret || ret == -ENOMEDIUM))
 				rescan_partitions(bdev->bd_disk, bdev);
+			if (ret)
+				goto out_unlock_bdev;
 		}
 		/* only one opener holds refs to the module and disk */
 		module_put(disk->fops->owner);
@@ -1228,6 +1237,8 @@ int blkdev_get(struct block_device *bdev, fmode_t mode, void *holder)
 	res = __blkdev_get(bdev, mode, 0);
 
 	if (whole) {
+		struct gendisk *disk = whole->bd_disk;
+
 		/* finish claiming */
 		mutex_lock(&bdev->bd_mutex);
 		spin_lock(&bdev_lock);
@@ -1254,15 +1265,16 @@ int blkdev_get(struct block_device *bdev, fmode_t mode, void *holder)
 		spin_unlock(&bdev_lock);
 
 		/*
-		 * Block event polling for write claims.  Any write
-		 * holder makes the write_holder state stick until all
-		 * are released.  This is good enough and tracking
-		 * individual writeable reference is too fragile given
-		 * the way @mode is used in blkdev_get/put().
+		 * Block event polling for write claims if requested.  Any
+		 * write holder makes the write_holder state stick until
+		 * all are released.  This is good enough and tracking
+		 * individual writeable reference is too fragile given the
+		 * way @mode is used in blkdev_get/put().
 		 */
-		if (!res && (mode & FMODE_WRITE) && !bdev->bd_write_holder) {
+		if ((disk->flags & GENHD_FL_BLOCK_EVENTS_ON_EXCL_WRITE) &&
+		    !res && (mode & FMODE_WRITE) && !bdev->bd_write_holder) {
 			bdev->bd_write_holder = true;
-			disk_block_events(bdev->bd_disk);
+			disk_block_events(disk);
 		}
 
 		mutex_unlock(&bdev->bd_mutex);
diff --git a/fs/partitions/check.c b/fs/partitions/check.c
index d545e97d99c3..b7e16bccd5e5 100644
--- a/fs/partitions/check.c
+++ b/fs/partitions/check.c
@@ -255,7 +255,12 @@ ssize_t part_discard_alignment_show(struct device *dev,
 				   struct device_attribute *attr, char *buf)
 {
 	struct hd_struct *p = dev_to_part(dev);
-	return sprintf(buf, "%u\n", p->discard_alignment);
+	struct gendisk *disk = dev_to_disk(dev);
+
+	return sprintf(buf, "%u\n",
+		       (unsigned long long)queue_limit_discard_alignment(
+							&disk->queue->limits,
+							p->start_sect));
 }
 
 ssize_t part_stat_show(struct device *dev,
@@ -449,8 +454,6 @@ struct hd_struct *add_partition(struct gendisk *disk, int partno,
 	p->start_sect = start;
 	p->alignment_offset =
 		queue_limit_alignment_offset(&disk->queue->limits, start);
-	p->discard_alignment =
-		queue_limit_discard_alignment(&disk->queue->limits, start);
 	p->nr_sects = len;
 	p->partno = partno;
 	p->policy = get_disk_ro(disk);
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 2ad95fa1d130..75851eb18ff7 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -364,6 +364,8 @@ struct request_queue
 	 * for flush operations
 	 */
 	unsigned int		flush_flags;
+	unsigned int		flush_not_queueable:1;
+	unsigned int		flush_queue_delayed:1;
 	unsigned int		flush_pending_idx:1;
 	unsigned int		flush_running_idx:1;
 	unsigned long		flush_pending_since;
@@ -843,6 +845,7 @@ extern void blk_queue_softirq_done(struct request_queue *, softirq_done_fn *);
 extern void blk_queue_rq_timed_out(struct request_queue *, rq_timed_out_fn *);
 extern void blk_queue_rq_timeout(struct request_queue *, unsigned int);
 extern void blk_queue_flush(struct request_queue *q, unsigned int flush);
+extern void blk_queue_flush_queueable(struct request_queue *q, bool queueable);
 extern struct backing_dev_info *blk_get_backing_dev_info(struct block_device *bdev);
 
 extern int blk_rq_map_sg(struct request_queue *, struct request *, struct scatterlist *);
@@ -1111,6 +1114,11 @@ static inline unsigned int block_size(struct block_device *bdev)
 	return bdev->bd_block_size;
 }
 
+static inline bool queue_flush_queueable(struct request_queue *q)
+{
+	return !q->flush_not_queueable;
+}
+
 typedef struct {struct page *v;} Sector;
 
 unsigned char *read_dev_sector(struct block_device *, sector_t, Sector *);
diff --git a/include/linux/genhd.h b/include/linux/genhd.h
index d764a426e9fd..b78956b3c2e7 100644
--- a/include/linux/genhd.h
+++ b/include/linux/genhd.h
@@ -100,7 +100,6 @@ struct hd_struct {
 	sector_t start_sect;
 	sector_t nr_sects;
 	sector_t alignment_offset;
-	unsigned int discard_alignment;
 	struct device __dev;
 	struct kobject *holder_dir;
 	int policy, partno;
@@ -127,6 +126,7 @@ struct hd_struct {
 #define GENHD_FL_SUPPRESS_PARTITION_INFO	32
 #define GENHD_FL_EXT_DEVT			64 /* allow extended devt */
 #define GENHD_FL_NATIVE_CAPACITY		128
+#define GENHD_FL_BLOCK_EVENTS_ON_EXCL_WRITE	256
 
 enum {
 	DISK_EVENT_MEDIA_CHANGE			= 1 << 0, /* media changed */
author	Stephen Rothwell <sfr@canb.auug.org.au>	2011-05-11 11:57:44 +1000
committer	Stephen Rothwell <sfr@canb.auug.org.au>	2011-05-11 11:57:44 +1000
commit	cdbf7656dc8e4a98d46d37705486d9851a28546c (patch)
tree	dbfeefc61a3b81e40aaf909cdb1109163d64c832
parent	b11bdc28ea5f972a5196f2c624f89446e536a5b0 (diff)
parent	6c359be347a37bcf004f623a9f2d178e7c4c0f64 (diff)