From e62cca19a9bbfc72c62632d95a7c01cd6476708c Mon Sep 17 00:00:00 2001 From: "kashyap.desai@lsi.com" Date: Thu, 4 Aug 2011 16:42:15 +0530 Subject: [SCSI] mptfusion: Better handling of DEAD IOC PCI-E Link down error condition Find Non-Operation IOC and remove it from OS: Detecting dead(non-functional) ioc will be done reading doorbell register value from fault reset thread, which has been called from work thread context after each specific interval. If doorbell value is 0xFFFFFFFF, it will be considered as IOC is non-operational and marked as dead ioc. Once Dead IOC has been detected, it will be removed at pci layer using "pci_remove_bus_device" API. Signed-off-by: Kashyap Desai Signed-off-by: James Bottomley --- drivers/message/fusion/mptbase.c | 63 +++++++++++++++++++++++++++++++++++++++- 1 file changed, 62 insertions(+), 1 deletion(-) (limited to 'drivers/message/fusion/mptbase.c') diff --git a/drivers/message/fusion/mptbase.c b/drivers/message/fusion/mptbase.c index 7956a10f9488..517621fa8bca 100644 --- a/drivers/message/fusion/mptbase.c +++ b/drivers/message/fusion/mptbase.c @@ -63,6 +63,8 @@ #ifdef CONFIG_MTRR #include #endif +#include +#include #include "mptbase.h" #include "lsi/mpi_log_fc.h" @@ -323,6 +325,32 @@ mpt_is_discovery_complete(MPT_ADAPTER *ioc) return rc; } + +/** + * mpt_remove_dead_ioc_func - kthread context to remove dead ioc + * @arg: input argument, used to derive ioc + * + * Return 0 if controller is removed from pci subsystem. + * Return -1 for other case. + */ +static int mpt_remove_dead_ioc_func(void *arg) +{ + MPT_ADAPTER *ioc = (MPT_ADAPTER *)arg; + struct pci_dev *pdev; + + if ((ioc == NULL)) + return -1; + + pdev = ioc->pcidev; + if ((pdev == NULL)) + return -1; + + pci_remove_bus_device(pdev); + return 0; +} + + + /** * mpt_fault_reset_work - work performed on workq after ioc fault * @work: input argument, used to derive ioc @@ -336,12 +364,45 @@ mpt_fault_reset_work(struct work_struct *work) u32 ioc_raw_state; int rc; unsigned long flags; + MPT_SCSI_HOST *hd; + struct task_struct *p; if (ioc->ioc_reset_in_progress || !ioc->active) goto out; + ioc_raw_state = mpt_GetIocState(ioc, 0); - if ((ioc_raw_state & MPI_IOC_STATE_MASK) == MPI_IOC_STATE_FAULT) { + if ((ioc_raw_state & MPI_IOC_STATE_MASK) == MPI_IOC_STATE_MASK) { + printk(MYIOC_s_INFO_FMT "%s: IOC is non-operational !!!!\n", + ioc->name, __func__); + + /* + * Call mptscsih_flush_pending_cmds callback so that we + * flush all pending commands back to OS. + * This call is required to aovid deadlock at block layer. + * Dead IOC will fail to do diag reset,and this call is safe + * since dead ioc will never return any command back from HW. + */ + hd = shost_priv(ioc->sh); + ioc->schedule_dead_ioc_flush_running_cmds(hd); + + /*Remove the Dead Host */ + p = kthread_run(mpt_remove_dead_ioc_func, ioc, + "mpt_dead_ioc_%d", ioc->id); + if (IS_ERR(p)) { + printk(MYIOC_s_ERR_FMT + "%s: Running mpt_dead_ioc thread failed !\n", + ioc->name, __func__); + } else { + printk(MYIOC_s_WARN_FMT + "%s: Running mpt_dead_ioc thread success !\n", + ioc->name, __func__); + } + return; /* don't rearm timer */ + } + + if ((ioc_raw_state & MPI_IOC_STATE_MASK) + == MPI_IOC_STATE_FAULT) { printk(MYIOC_s_WARN_FMT "IOC is in FAULT state (%04xh)!!!\n", ioc->name, ioc_raw_state & MPI_DOORBELL_DATA_MASK); printk(MYIOC_s_WARN_FMT "Issuing HardReset from %s!!\n", -- cgit v1.2.3 From 98cbe371fd373f13806595835b79da07f3a2f934 Mon Sep 17 00:00:00 2001 From: "kashyap.desai@lsi.com" Date: Fri, 5 Aug 2011 11:04:37 +0530 Subject: [SCSI] mptfusion: Fix for device offline while doing aggressive HBA reset [Resend patch as per Bernd Schubert comment ] Issue: Device goes offline while doing aggressive HBA reset along with IO using some utility. Root cause: FW goes into bad state due to aggressive reset. Softreset does not help to recover FW. And also aggressive reset open up the window for Error handling thread to kicked off at the same time HBA will be in constant RESET loop as part of aggressive reset test case can lead Device to goes offline. Changes: 1. Added extra check as below inside eh_timed_out call back as below. if(ioc->ioc_reset_in_progress) Rc = EH_TIMER_RESET 2. Removed " DOORBELL_ACTIVE" check for SAS controller from task management context. Since SAS controller uses high priority queue for task management. This check is not required for SAS controller. 3. Moved SoftReset call to HardReset from Task Mgmt context. Signed-off-by: Kashyap Desai Signed-off-by: James Bottomley --- drivers/message/fusion/mptbase.c | 29 ++++++++++++++++++++++++++--- drivers/message/fusion/mptbase.h | 2 ++ drivers/message/fusion/mptsas.c | 9 +++++++++ drivers/message/fusion/mptscsih.c | 12 ++++++++++-- 4 files changed, 47 insertions(+), 5 deletions(-) (limited to 'drivers/message/fusion/mptbase.c') diff --git a/drivers/message/fusion/mptbase.c b/drivers/message/fusion/mptbase.c index 517621fa8bca..e9c6a6047a00 100644 --- a/drivers/message/fusion/mptbase.c +++ b/drivers/message/fusion/mptbase.c @@ -6474,8 +6474,19 @@ mpt_config(MPT_ADAPTER *ioc, CONFIGPARMS *pCfg) pReq->Action, ioc->mptbase_cmds.status, timeleft)); if (ioc->mptbase_cmds.status & MPT_MGMT_STATUS_DID_IOCRESET) goto out; - if (!timeleft) + if (!timeleft) { + spin_lock_irqsave(&ioc->taskmgmt_lock, flags); + if (ioc->ioc_reset_in_progress) { + spin_unlock_irqrestore(&ioc->taskmgmt_lock, + flags); + printk(MYIOC_s_INFO_FMT "%s: host reset in" + " progress mpt_config timed out.!!\n", + __func__, ioc->name); + return -EFAULT; + } + spin_unlock_irqrestore(&ioc->taskmgmt_lock, flags); issue_hard_reset = 1; + } goto out; } @@ -7189,7 +7200,18 @@ mpt_HardResetHandler(MPT_ADAPTER *ioc, int sleepFlag) spin_lock_irqsave(&ioc->taskmgmt_lock, flags); if (ioc->ioc_reset_in_progress) { spin_unlock_irqrestore(&ioc->taskmgmt_lock, flags); - return 0; + ioc->wait_on_reset_completion = 1; + do { + ssleep(1); + } while (ioc->ioc_reset_in_progress == 1); + ioc->wait_on_reset_completion = 0; + return ioc->reset_status; + } + if (ioc->wait_on_reset_completion) { + spin_unlock_irqrestore(&ioc->taskmgmt_lock, flags); + rc = 0; + time_count = jiffies; + goto exit; } ioc->ioc_reset_in_progress = 1; if (ioc->alt_ioc) @@ -7226,6 +7248,7 @@ mpt_HardResetHandler(MPT_ADAPTER *ioc, int sleepFlag) ioc->ioc_reset_in_progress = 0; ioc->taskmgmt_quiesce_io = 0; ioc->taskmgmt_in_progress = 0; + ioc->reset_status = rc; if (ioc->alt_ioc) { ioc->alt_ioc->ioc_reset_in_progress = 0; ioc->alt_ioc->taskmgmt_quiesce_io = 0; @@ -7241,7 +7264,7 @@ mpt_HardResetHandler(MPT_ADAPTER *ioc, int sleepFlag) ioc->alt_ioc, MPT_IOC_POST_RESET); } } - +exit: dtmprintk(ioc, printk(MYIOC_s_DEBUG_FMT "HardResetHandler: completed (%d seconds): %s\n", ioc->name, diff --git a/drivers/message/fusion/mptbase.h b/drivers/message/fusion/mptbase.h index a4048ea45c92..b4d24dc081ae 100644 --- a/drivers/message/fusion/mptbase.h +++ b/drivers/message/fusion/mptbase.h @@ -753,6 +753,8 @@ typedef struct _MPT_ADAPTER int taskmgmt_in_progress; u8 taskmgmt_quiesce_io; u8 ioc_reset_in_progress; + u8 reset_status; + u8 wait_on_reset_completion; MPT_SCHEDULE_TARGET_RESET schedule_target_reset; MPT_FLUSH_RUNNING_CMDS schedule_dead_ioc_flush_running_cmds; struct work_struct sas_persist_task; diff --git a/drivers/message/fusion/mptsas.c b/drivers/message/fusion/mptsas.c index 074e52254fcd..9d9504298549 100644 --- a/drivers/message/fusion/mptsas.c +++ b/drivers/message/fusion/mptsas.c @@ -1950,6 +1950,15 @@ static enum blk_eh_timer_return mptsas_eh_timed_out(struct scsi_cmnd *sc) goto done; } + /* In case if IOC is in reset from internal context. + * Do not execute EEH for the same IOC. SML should to reset timer. + */ + if (ioc->ioc_reset_in_progress) { + dtmprintk(ioc, printk(MYIOC_s_WARN_FMT ": %s: ioc is in reset," + "SML need to reset the timer (sc=%p)\n", + ioc->name, __func__, sc)); + rc = BLK_EH_RESET_TIMER; + } vdevice = sc->device->hostdata; if (vdevice && vdevice->vtarget && (vdevice->vtarget->inDMD || vdevice->vtarget->deleted)) { diff --git a/drivers/message/fusion/mptscsih.c b/drivers/message/fusion/mptscsih.c index de8cf92d8614..ced6e4dc0847 100644 --- a/drivers/message/fusion/mptscsih.c +++ b/drivers/message/fusion/mptscsih.c @@ -1630,7 +1630,13 @@ mptscsih_IssueTaskMgmt(MPT_SCSI_HOST *hd, u8 type, u8 channel, u8 id, int lun, return 0; } - if (ioc_raw_state & MPI_DOORBELL_ACTIVE) { + /* DOORBELL ACTIVE check is not required if + * MPI_IOCFACTS_CAPABILITY_HIGH_PRI_Q is supported. + */ + + if (!((ioc->facts.IOCCapabilities & MPI_IOCFACTS_CAPABILITY_HIGH_PRI_Q) + && (ioc->facts.MsgVersion >= MPI_VERSION_01_05)) && + (ioc_raw_state & MPI_DOORBELL_ACTIVE)) { printk(MYIOC_s_WARN_FMT "TaskMgmt type=%x: ioc_state: " "DOORBELL_ACTIVE (0x%x)!\n", @@ -1729,7 +1735,9 @@ mptscsih_IssueTaskMgmt(MPT_SCSI_HOST *hd, u8 type, u8 channel, u8 id, int lun, printk(MYIOC_s_WARN_FMT "Issuing Reset from %s!! doorbell=0x%08x\n", ioc->name, __func__, mpt_GetIocState(ioc, 0)); - retval = mpt_Soft_Hard_ResetHandler(ioc, CAN_SLEEP); + retval = (ioc->bus_type == SAS) ? + mpt_HardResetHandler(ioc, CAN_SLEEP) : + mpt_Soft_Hard_ResetHandler(ioc, CAN_SLEEP); mpt_free_msg_frame(ioc, mf); } -- cgit v1.2.3