From 6f8830f5bbab16e54f261de187f3df4644a5b977 Mon Sep 17 00:00:00 2001 From: Chris Leech Date: Mon, 27 Feb 2017 16:58:36 -0800 Subject: scsi: libiscsi: add lock around task lists to fix list corruption regression There's a rather long standing regression from the commit "libiscsi: Reduce locking contention in fast path" Depending on iSCSI target behavior, it's possible to hit the case in iscsi_complete_task where the task is still on a pending list (!list_empty(&task->running)). When that happens the task is removed from the list while holding the session back_lock, but other task list modification occur under the frwd_lock. That leads to linked list corruption and eventually a panicked system. Rather than back out the session lock split entirely, in order to try and keep some of the performance gains this patch adds another lock to maintain the task lists integrity. Major enterprise supported kernels have been backing out the lock split for while now, thanks to the efforts at IBM where a lab setup has the most reliable reproducer I've seen on this issue. This patch has been tested there successfully. Signed-off-by: Chris Leech Fixes: 659743b02c41 ("[SCSI] libiscsi: Reduce locking contention in fast path") Reported-by: Prashantha Subbarao Reviewed-by: Guilherme G. Piccoli Cc: # v3.15+ Signed-off-by: Martin K. Petersen --- include/scsi/libiscsi.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/scsi/libiscsi.h b/include/scsi/libiscsi.h index b0e275de6dec..583875ea136a 100644 --- a/include/scsi/libiscsi.h +++ b/include/scsi/libiscsi.h @@ -196,6 +196,7 @@ struct iscsi_conn { struct iscsi_task *task; /* xmit task in progress */ /* xmit */ + spinlock_t taskqueuelock; /* protects the next three lists */ struct list_head mgmtqueue; /* mgmt (control) xmit queue */ struct list_head cmdqueue; /* data-path cmd queue */ struct list_head requeue; /* tasks needing another run */ -- cgit v1.2.3 From 8893cf6cb1cf56334c05120e23092dbfc9423ebb Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Wed, 1 Mar 2017 09:00:36 -0800 Subject: scsi: mpt3sas: Avoid sleeping in interrupt context Commit 669f044170d8 ("scsi: srp_transport: Move queuecommand() wait code to SCSI core") can make scsi_internal_device_block() sleep. However, the mpt3sas driver can call this function from an interrupt handler. Hence add a second argument to scsi_internal_device_block() that restores the old behavior of this function for the mpt3sas handler. The call chain that triggered an "IRQ handler enabled interrupts" complaint is as follows: _base_interrupt() -> _base_async_event() -> mpt3sas_scsih_event_callback() -> _scsih_check_topo_delete_events() -> _scsih_block_io_to_children_attached_directly() -> _scsih_block_io_device() -> _scsih_internal_device_block() -> scsi_internal_device_block() Reported-by: Omar Sandoval Signed-off-by: Bart Van Assche Cc: Omar Sandoval Cc: Hannes Reinecke Cc: Sagi Grimberg Cc: Christoph Hellwig Cc: Sathya Prakash Cc: Chaitra P B Cc: Suganath Prabu Subramani Cc: Sreekanth Reddy Cc: # v4.10+ Tested-by: Omar Sandoval Signed-off-by: Martin K. Petersen --- drivers/scsi/mpt3sas/mpt3sas_base.h | 3 --- drivers/scsi/mpt3sas/mpt3sas_scsih.c | 4 ++-- drivers/scsi/scsi_lib.c | 14 ++++++++++---- drivers/scsi/scsi_priv.h | 3 --- include/scsi/scsi_device.h | 4 ++++ 5 files changed, 16 insertions(+), 12 deletions(-) (limited to 'include') diff --git a/drivers/scsi/mpt3sas/mpt3sas_base.h b/drivers/scsi/mpt3sas/mpt3sas_base.h index 7fe7e6ed595b..8981806fb13f 100644 --- a/drivers/scsi/mpt3sas/mpt3sas_base.h +++ b/drivers/scsi/mpt3sas/mpt3sas_base.h @@ -1442,9 +1442,6 @@ void mpt3sas_transport_update_links(struct MPT3SAS_ADAPTER *ioc, u64 sas_address, u16 handle, u8 phy_number, u8 link_rate); extern struct sas_function_template mpt3sas_transport_functions; extern struct scsi_transport_template *mpt3sas_transport_template; -extern int scsi_internal_device_block(struct scsi_device *sdev); -extern int scsi_internal_device_unblock(struct scsi_device *sdev, - enum scsi_device_state new_state); /* trigger data externs */ void mpt3sas_send_trigger_data_event(struct MPT3SAS_ADAPTER *ioc, struct SL_WH_TRIGGERS_EVENT_DATA_T *event_data); diff --git a/drivers/scsi/mpt3sas/mpt3sas_scsih.c b/drivers/scsi/mpt3sas/mpt3sas_scsih.c index 69c29c560575..919ba2bb15f1 100644 --- a/drivers/scsi/mpt3sas/mpt3sas_scsih.c +++ b/drivers/scsi/mpt3sas/mpt3sas_scsih.c @@ -2859,7 +2859,7 @@ _scsih_internal_device_block(struct scsi_device *sdev, sas_device_priv_data->sas_target->handle); sas_device_priv_data->block = 1; - r = scsi_internal_device_block(sdev); + r = scsi_internal_device_block(sdev, false); if (r == -EINVAL) sdev_printk(KERN_WARNING, sdev, "device_block failed with return(%d) for handle(0x%04x)\n", @@ -2895,7 +2895,7 @@ _scsih_internal_device_unblock(struct scsi_device *sdev, "performing a block followed by an unblock\n", r, sas_device_priv_data->sas_target->handle); sas_device_priv_data->block = 1; - r = scsi_internal_device_block(sdev); + r = scsi_internal_device_block(sdev, false); if (r) sdev_printk(KERN_WARNING, sdev, "retried device_block " "failed with return(%d) for handle(0x%04x)\n", diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c index f5e45a252485..f41e6b84a1bd 100644 --- a/drivers/scsi/scsi_lib.c +++ b/drivers/scsi/scsi_lib.c @@ -2932,6 +2932,8 @@ EXPORT_SYMBOL(scsi_target_resume); /** * scsi_internal_device_block - internal function to put a device temporarily into the SDEV_BLOCK state * @sdev: device to block + * @wait: Whether or not to wait until ongoing .queuecommand() / + * .queue_rq() calls have finished. * * Block request made by scsi lld's to temporarily stop all * scsi commands on the specified device. May sleep. @@ -2949,7 +2951,7 @@ EXPORT_SYMBOL(scsi_target_resume); * remove the rport mutex lock and unlock calls from srp_queuecommand(). */ int -scsi_internal_device_block(struct scsi_device *sdev) +scsi_internal_device_block(struct scsi_device *sdev, bool wait) { struct request_queue *q = sdev->request_queue; unsigned long flags; @@ -2969,12 +2971,16 @@ scsi_internal_device_block(struct scsi_device *sdev) * request queue. */ if (q->mq_ops) { - blk_mq_quiesce_queue(q); + if (wait) + blk_mq_quiesce_queue(q); + else + blk_mq_stop_hw_queues(q); } else { spin_lock_irqsave(q->queue_lock, flags); blk_stop_queue(q); spin_unlock_irqrestore(q->queue_lock, flags); - scsi_wait_for_queuecommand(sdev); + if (wait) + scsi_wait_for_queuecommand(sdev); } return 0; @@ -3036,7 +3042,7 @@ EXPORT_SYMBOL_GPL(scsi_internal_device_unblock); static void device_block(struct scsi_device *sdev, void *data) { - scsi_internal_device_block(sdev); + scsi_internal_device_block(sdev, true); } static int diff --git a/drivers/scsi/scsi_priv.h b/drivers/scsi/scsi_priv.h index 99bfc985e190..f11bd102d6d5 100644 --- a/drivers/scsi/scsi_priv.h +++ b/drivers/scsi/scsi_priv.h @@ -188,8 +188,5 @@ static inline void scsi_dh_remove_device(struct scsi_device *sdev) { } */ #define SCSI_DEVICE_BLOCK_MAX_TIMEOUT 600 /* units in seconds */ -extern int scsi_internal_device_block(struct scsi_device *sdev); -extern int scsi_internal_device_unblock(struct scsi_device *sdev, - enum scsi_device_state new_state); #endif /* _SCSI_PRIV_H */ diff --git a/include/scsi/scsi_device.h b/include/scsi/scsi_device.h index 6f22b39f1b0c..080c7ce9bae8 100644 --- a/include/scsi/scsi_device.h +++ b/include/scsi/scsi_device.h @@ -472,6 +472,10 @@ static inline int scsi_device_created(struct scsi_device *sdev) sdev->sdev_state == SDEV_CREATED_BLOCK; } +int scsi_internal_device_block(struct scsi_device *sdev, bool wait); +int scsi_internal_device_unblock(struct scsi_device *sdev, + enum scsi_device_state new_state); + /* accessor functions for the SCSI parameters */ static inline int scsi_device_sync(struct scsi_device *sdev) { -- cgit v1.2.3