From d224fe0d609734888af63656ddaf3a8352f0a7b5 Mon Sep 17 00:00:00 2001 From: Calvin Owens Date: Thu, 13 Aug 2015 18:48:09 -0700 Subject: mpt2sas: Refcount sas_device objects and fix unsafe list usage These objects can be referenced concurrently throughout the driver, we need a way to make sure threads can't delete them out from under each other. This patch adds the refcount, and refactors the code to use it. Additionally, we cannot iterate over the sas_device_list without holding the lock, or we risk corrupting random memory if items are added or deleted as we iterate. This patch refactors _scsih_probe_sas() to use the sas_device_list in a safe way. Signed-off-by: Calvin Owens Reviewed-by: Christoph Hellwig Reviewed-by: Nicholas Bellinger Tested-by: Chaitra Basappa Acked-by: Sreekanth Reddy Signed-off-by: James Bottomley --- drivers/scsi/mpt2sas/mpt2sas_scsih.c | 480 ++++++++++++++++++++++++----------- 1 file changed, 331 insertions(+), 149 deletions(-) (limited to 'drivers/scsi/mpt2sas/mpt2sas_scsih.c') diff --git a/drivers/scsi/mpt2sas/mpt2sas_scsih.c b/drivers/scsi/mpt2sas/mpt2sas_scsih.c index 3f26147bbc64..5eca3a4a9a7a 100644 --- a/drivers/scsi/mpt2sas/mpt2sas_scsih.c +++ b/drivers/scsi/mpt2sas/mpt2sas_scsih.c @@ -526,8 +526,61 @@ _scsih_determine_boot_device(struct MPT2SAS_ADAPTER *ioc, } } +static struct _sas_device * +__mpt2sas_get_sdev_from_target(struct MPT2SAS_ADAPTER *ioc, + struct MPT2SAS_TARGET *tgt_priv) +{ + struct _sas_device *ret; + + assert_spin_locked(&ioc->sas_device_lock); + + ret = tgt_priv->sdev; + if (ret) + sas_device_get(ret); + + return ret; +} + +static struct _sas_device * +mpt2sas_get_sdev_from_target(struct MPT2SAS_ADAPTER *ioc, + struct MPT2SAS_TARGET *tgt_priv) +{ + struct _sas_device *ret; + unsigned long flags; + + spin_lock_irqsave(&ioc->sas_device_lock, flags); + ret = __mpt2sas_get_sdev_from_target(ioc, tgt_priv); + spin_unlock_irqrestore(&ioc->sas_device_lock, flags); + + return ret; +} + + +struct _sas_device * +__mpt2sas_get_sdev_by_addr(struct MPT2SAS_ADAPTER *ioc, + u64 sas_address) +{ + struct _sas_device *sas_device; + + assert_spin_locked(&ioc->sas_device_lock); + + list_for_each_entry(sas_device, &ioc->sas_device_list, list) + if (sas_device->sas_address == sas_address) + goto found_device; + + list_for_each_entry(sas_device, &ioc->sas_device_init_list, list) + if (sas_device->sas_address == sas_address) + goto found_device; + + return NULL; + +found_device: + sas_device_get(sas_device); + return sas_device; +} + /** - * mpt2sas_scsih_sas_device_find_by_sas_address - sas device search + * mpt2sas_get_sdev_by_addr - sas device search * @ioc: per adapter object * @sas_address: sas address * Context: Calling function should acquire ioc->sas_device_lock @@ -536,24 +589,44 @@ _scsih_determine_boot_device(struct MPT2SAS_ADAPTER *ioc, * object. */ struct _sas_device * -mpt2sas_scsih_sas_device_find_by_sas_address(struct MPT2SAS_ADAPTER *ioc, +mpt2sas_get_sdev_by_addr(struct MPT2SAS_ADAPTER *ioc, u64 sas_address) +{ + struct _sas_device *sas_device; + unsigned long flags; + + spin_lock_irqsave(&ioc->sas_device_lock, flags); + sas_device = __mpt2sas_get_sdev_by_addr(ioc, + sas_address); + spin_unlock_irqrestore(&ioc->sas_device_lock, flags); + + return sas_device; +} + +static struct _sas_device * +__mpt2sas_get_sdev_by_handle(struct MPT2SAS_ADAPTER *ioc, u16 handle) { struct _sas_device *sas_device; + assert_spin_locked(&ioc->sas_device_lock); + list_for_each_entry(sas_device, &ioc->sas_device_list, list) - if (sas_device->sas_address == sas_address) - return sas_device; + if (sas_device->handle == handle) + goto found_device; list_for_each_entry(sas_device, &ioc->sas_device_init_list, list) - if (sas_device->sas_address == sas_address) - return sas_device; + if (sas_device->handle == handle) + goto found_device; return NULL; + +found_device: + sas_device_get(sas_device); + return sas_device; } /** - * _scsih_sas_device_find_by_handle - sas device search + * mpt2sas_get_sdev_by_handle - sas device search * @ioc: per adapter object * @handle: sas device handle (assigned by firmware) * Context: Calling function should acquire ioc->sas_device_lock @@ -562,19 +635,16 @@ mpt2sas_scsih_sas_device_find_by_sas_address(struct MPT2SAS_ADAPTER *ioc, * object. */ static struct _sas_device * -_scsih_sas_device_find_by_handle(struct MPT2SAS_ADAPTER *ioc, u16 handle) +mpt2sas_get_sdev_by_handle(struct MPT2SAS_ADAPTER *ioc, u16 handle) { struct _sas_device *sas_device; + unsigned long flags; - list_for_each_entry(sas_device, &ioc->sas_device_list, list) - if (sas_device->handle == handle) - return sas_device; - - list_for_each_entry(sas_device, &ioc->sas_device_init_list, list) - if (sas_device->handle == handle) - return sas_device; + spin_lock_irqsave(&ioc->sas_device_lock, flags); + sas_device = __mpt2sas_get_sdev_by_handle(ioc, handle); + spin_unlock_irqrestore(&ioc->sas_device_lock, flags); - return NULL; + return sas_device; } /** @@ -583,7 +653,7 @@ _scsih_sas_device_find_by_handle(struct MPT2SAS_ADAPTER *ioc, u16 handle) * @sas_device: the sas_device object * Context: This function will acquire ioc->sas_device_lock. * - * Removing object and freeing associated memory from the ioc->sas_device_list. + * If sas_device is on the list, remove it and decrement its reference count. */ static void _scsih_sas_device_remove(struct MPT2SAS_ADAPTER *ioc, @@ -594,9 +664,15 @@ _scsih_sas_device_remove(struct MPT2SAS_ADAPTER *ioc, if (!sas_device) return; + /* + * The lock serializes access to the list, but we still need to verify + * that nobody removed the entry while we were waiting on the lock. + */ spin_lock_irqsave(&ioc->sas_device_lock, flags); - list_del(&sas_device->list); - kfree(sas_device); + if (!list_empty(&sas_device->list)) { + list_del_init(&sas_device->list); + sas_device_put(sas_device); + } spin_unlock_irqrestore(&ioc->sas_device_lock, flags); } @@ -620,6 +696,7 @@ _scsih_sas_device_add(struct MPT2SAS_ADAPTER *ioc, sas_device->handle, (unsigned long long)sas_device->sas_address)); spin_lock_irqsave(&ioc->sas_device_lock, flags); + sas_device_get(sas_device); list_add_tail(&sas_device->list, &ioc->sas_device_list); spin_unlock_irqrestore(&ioc->sas_device_lock, flags); @@ -659,6 +736,7 @@ _scsih_sas_device_init_add(struct MPT2SAS_ADAPTER *ioc, sas_device->handle, (unsigned long long)sas_device->sas_address)); spin_lock_irqsave(&ioc->sas_device_lock, flags); + sas_device_get(sas_device); list_add_tail(&sas_device->list, &ioc->sas_device_init_list); _scsih_determine_boot_device(ioc, sas_device, 0); spin_unlock_irqrestore(&ioc->sas_device_lock, flags); @@ -1208,12 +1286,15 @@ _scsih_change_queue_depth(struct scsi_device *sdev, int qdepth) goto not_sata; if ((sas_target_priv_data->flags & MPT_TARGET_FLAGS_VOLUME)) goto not_sata; + spin_lock_irqsave(&ioc->sas_device_lock, flags); - sas_device = mpt2sas_scsih_sas_device_find_by_sas_address(ioc, - sas_device_priv_data->sas_target->sas_address); - if (sas_device && sas_device->device_info & - MPI2_SAS_DEVICE_INFO_SATA_DEVICE) - max_depth = MPT2SAS_SATA_QUEUE_DEPTH; + sas_device = __mpt2sas_get_sdev_from_target(ioc, sas_target_priv_data); + if (sas_device) { + if (sas_device->device_info & MPI2_SAS_DEVICE_INFO_SATA_DEVICE) + max_depth = MPT2SAS_SATA_QUEUE_DEPTH; + + sas_device_put(sas_device); + } spin_unlock_irqrestore(&ioc->sas_device_lock, flags); not_sata: @@ -1271,18 +1352,20 @@ _scsih_target_alloc(struct scsi_target *starget) /* sas/sata devices */ spin_lock_irqsave(&ioc->sas_device_lock, flags); rphy = dev_to_rphy(starget->dev.parent); - sas_device = mpt2sas_scsih_sas_device_find_by_sas_address(ioc, + sas_device = __mpt2sas_get_sdev_by_addr(ioc, rphy->identify.sas_address); if (sas_device) { sas_target_priv_data->handle = sas_device->handle; sas_target_priv_data->sas_address = sas_device->sas_address; + sas_target_priv_data->sdev = sas_device; sas_device->starget = starget; sas_device->id = starget->id; sas_device->channel = starget->channel; if (test_bit(sas_device->handle, ioc->pd_handles)) sas_target_priv_data->flags |= MPT_TARGET_FLAGS_RAID_COMPONENT; + } spin_unlock_irqrestore(&ioc->sas_device_lock, flags); @@ -1324,13 +1407,21 @@ _scsih_target_destroy(struct scsi_target *starget) spin_lock_irqsave(&ioc->sas_device_lock, flags); rphy = dev_to_rphy(starget->dev.parent); - sas_device = mpt2sas_scsih_sas_device_find_by_sas_address(ioc, - rphy->identify.sas_address); + sas_device = __mpt2sas_get_sdev_from_target(ioc, sas_target_priv_data); if (sas_device && (sas_device->starget == starget) && (sas_device->id == starget->id) && (sas_device->channel == starget->channel)) sas_device->starget = NULL; + if (sas_device) { + /* + * Corresponding get() is in _scsih_target_alloc() + */ + sas_target_priv_data->sdev = NULL; + sas_device_put(sas_device); + + sas_device_put(sas_device); + } spin_unlock_irqrestore(&ioc->sas_device_lock, flags); out: @@ -1386,7 +1477,7 @@ _scsih_slave_alloc(struct scsi_device *sdev) if (!(sas_target_priv_data->flags & MPT_TARGET_FLAGS_VOLUME)) { spin_lock_irqsave(&ioc->sas_device_lock, flags); - sas_device = mpt2sas_scsih_sas_device_find_by_sas_address(ioc, + sas_device = __mpt2sas_get_sdev_by_addr(ioc, sas_target_priv_data->sas_address); if (sas_device && (sas_device->starget == NULL)) { sdev_printk(KERN_INFO, sdev, @@ -1394,6 +1485,10 @@ _scsih_slave_alloc(struct scsi_device *sdev) __func__, __LINE__); sas_device->starget = starget; } + + if (sas_device) + sas_device_put(sas_device); + spin_unlock_irqrestore(&ioc->sas_device_lock, flags); } @@ -1428,10 +1523,13 @@ _scsih_slave_destroy(struct scsi_device *sdev) if (!(sas_target_priv_data->flags & MPT_TARGET_FLAGS_VOLUME)) { spin_lock_irqsave(&ioc->sas_device_lock, flags); - sas_device = mpt2sas_scsih_sas_device_find_by_sas_address(ioc, - sas_target_priv_data->sas_address); + sas_device = __mpt2sas_get_sdev_from_target(ioc, + sas_target_priv_data); if (sas_device && !sas_target_priv_data->num_luns) sas_device->starget = NULL; + + if (sas_device) + sas_device_put(sas_device); spin_unlock_irqrestore(&ioc->sas_device_lock, flags); } @@ -2078,7 +2176,7 @@ _scsih_slave_configure(struct scsi_device *sdev) } spin_lock_irqsave(&ioc->sas_device_lock, flags); - sas_device = mpt2sas_scsih_sas_device_find_by_sas_address(ioc, + sas_device = __mpt2sas_get_sdev_by_addr(ioc, sas_device_priv_data->sas_target->sas_address); if (!sas_device) { spin_unlock_irqrestore(&ioc->sas_device_lock, flags); @@ -2112,17 +2210,18 @@ _scsih_slave_configure(struct scsi_device *sdev) (unsigned long long) sas_device->enclosure_logical_id, sas_device->slot); + sas_device_put(sas_device); spin_unlock_irqrestore(&ioc->sas_device_lock, flags); if (!ssp_target) _scsih_display_sata_capabilities(ioc, handle, sdev); - _scsih_change_queue_depth(sdev, qdepth); if (ssp_target) { sas_read_port_mode_page(sdev); _scsih_enable_tlr(ioc, sdev); } + return 0; } @@ -2509,8 +2608,7 @@ _scsih_tm_display_info(struct MPT2SAS_ADAPTER *ioc, struct scsi_cmnd *scmd) device_str, (unsigned long long)priv_target->sas_address); } else { spin_lock_irqsave(&ioc->sas_device_lock, flags); - sas_device = mpt2sas_scsih_sas_device_find_by_sas_address(ioc, - priv_target->sas_address); + sas_device = __mpt2sas_get_sdev_from_target(ioc, priv_target); if (sas_device) { if (priv_target->flags & MPT_TARGET_FLAGS_RAID_COMPONENT) { @@ -2529,6 +2627,8 @@ _scsih_tm_display_info(struct MPT2SAS_ADAPTER *ioc, struct scsi_cmnd *scmd) "enclosure_logical_id(0x%016llx), slot(%d)\n", (unsigned long long)sas_device->enclosure_logical_id, sas_device->slot); + + sas_device_put(sas_device); } spin_unlock_irqrestore(&ioc->sas_device_lock, flags); } @@ -2604,12 +2704,12 @@ _scsih_dev_reset(struct scsi_cmnd *scmd) { struct MPT2SAS_ADAPTER *ioc = shost_priv(scmd->device->host); struct MPT2SAS_DEVICE *sas_device_priv_data; - struct _sas_device *sas_device; - unsigned long flags; + struct _sas_device *sas_device = NULL; u16 handle; int r; struct scsi_target *starget = scmd->device->sdev_target; + struct MPT2SAS_TARGET *target_priv_data = starget->hostdata; starget_printk(KERN_INFO, starget, "attempting device reset! " "scmd(%p)\n", scmd); @@ -2629,12 +2729,10 @@ _scsih_dev_reset(struct scsi_cmnd *scmd) handle = 0; if (sas_device_priv_data->sas_target->flags & MPT_TARGET_FLAGS_RAID_COMPONENT) { - spin_lock_irqsave(&ioc->sas_device_lock, flags); - sas_device = _scsih_sas_device_find_by_handle(ioc, - sas_device_priv_data->sas_target->handle); + sas_device = mpt2sas_get_sdev_from_target(ioc, + target_priv_data); if (sas_device) handle = sas_device->volume_handle; - spin_unlock_irqrestore(&ioc->sas_device_lock, flags); } else handle = sas_device_priv_data->sas_target->handle; @@ -2651,6 +2749,10 @@ _scsih_dev_reset(struct scsi_cmnd *scmd) out: sdev_printk(KERN_INFO, scmd->device, "device reset: %s scmd(%p)\n", ((r == SUCCESS) ? "SUCCESS" : "FAILED"), scmd); + + if (sas_device) + sas_device_put(sas_device); + return r; } @@ -2665,11 +2767,11 @@ _scsih_target_reset(struct scsi_cmnd *scmd) { struct MPT2SAS_ADAPTER *ioc = shost_priv(scmd->device->host); struct MPT2SAS_DEVICE *sas_device_priv_data; - struct _sas_device *sas_device; - unsigned long flags; + struct _sas_device *sas_device = NULL; u16 handle; int r; struct scsi_target *starget = scmd->device->sdev_target; + struct MPT2SAS_TARGET *target_priv_data = starget->hostdata; starget_printk(KERN_INFO, starget, "attempting target reset! " "scmd(%p)\n", scmd); @@ -2689,12 +2791,10 @@ _scsih_target_reset(struct scsi_cmnd *scmd) handle = 0; if (sas_device_priv_data->sas_target->flags & MPT_TARGET_FLAGS_RAID_COMPONENT) { - spin_lock_irqsave(&ioc->sas_device_lock, flags); - sas_device = _scsih_sas_device_find_by_handle(ioc, - sas_device_priv_data->sas_target->handle); + sas_device = mpt2sas_get_sdev_from_target(ioc, + target_priv_data); if (sas_device) handle = sas_device->volume_handle; - spin_unlock_irqrestore(&ioc->sas_device_lock, flags); } else handle = sas_device_priv_data->sas_target->handle; @@ -2711,6 +2811,10 @@ _scsih_target_reset(struct scsi_cmnd *scmd) out: starget_printk(KERN_INFO, starget, "target reset: %s scmd(%p)\n", ((r == SUCCESS) ? "SUCCESS" : "FAILED"), scmd); + + if (sas_device) + sas_device_put(sas_device); + return r; } @@ -3002,15 +3106,15 @@ _scsih_block_io_to_children_attached_to_ex(struct MPT2SAS_ADAPTER *ioc, list_for_each_entry(mpt2sas_port, &sas_expander->sas_port_list, port_list) { - if (mpt2sas_port->remote_identify.device_type == - SAS_END_DEVICE) { + if (mpt2sas_port->remote_identify.device_type == SAS_END_DEVICE) { spin_lock_irqsave(&ioc->sas_device_lock, flags); - sas_device = - mpt2sas_scsih_sas_device_find_by_sas_address(ioc, - mpt2sas_port->remote_identify.sas_address); - if (sas_device) + sas_device = __mpt2sas_get_sdev_by_addr(ioc, + mpt2sas_port->remote_identify.sas_address); + if (sas_device) { set_bit(sas_device->handle, - ioc->blocking_handles); + ioc->blocking_handles); + sas_device_put(sas_device); + } spin_unlock_irqrestore(&ioc->sas_device_lock, flags); } } @@ -3080,7 +3184,7 @@ _scsih_tm_tr_send(struct MPT2SAS_ADAPTER *ioc, u16 handle) { Mpi2SCSITaskManagementRequest_t *mpi_request; u16 smid; - struct _sas_device *sas_device; + struct _sas_device *sas_device = NULL; struct MPT2SAS_TARGET *sas_target_priv_data = NULL; u64 sas_address = 0; unsigned long flags; @@ -3110,7 +3214,7 @@ _scsih_tm_tr_send(struct MPT2SAS_ADAPTER *ioc, u16 handle) return; spin_lock_irqsave(&ioc->sas_device_lock, flags); - sas_device = _scsih_sas_device_find_by_handle(ioc, handle); + sas_device = __mpt2sas_get_sdev_by_handle(ioc, handle); if (sas_device && sas_device->starget && sas_device->starget->hostdata) { sas_target_priv_data = sas_device->starget->hostdata; @@ -3131,14 +3235,14 @@ _scsih_tm_tr_send(struct MPT2SAS_ADAPTER *ioc, u16 handle) if (!smid) { delayed_tr = kzalloc(sizeof(*delayed_tr), GFP_ATOMIC); if (!delayed_tr) - return; + goto out; INIT_LIST_HEAD(&delayed_tr->list); delayed_tr->handle = handle; list_add_tail(&delayed_tr->list, &ioc->delayed_tr_list); dewtprintk(ioc, printk(MPT2SAS_INFO_FMT "DELAYED:tr:handle(0x%04x), (open)\n", ioc->name, handle)); - return; + goto out; } dewtprintk(ioc, printk(MPT2SAS_INFO_FMT "tr_send:handle(0x%04x), " @@ -3150,6 +3254,9 @@ _scsih_tm_tr_send(struct MPT2SAS_ADAPTER *ioc, u16 handle) mpi_request->DevHandle = cpu_to_le16(handle); mpi_request->TaskType = MPI2_SCSITASKMGMT_TASKTYPE_TARGET_RESET; mpt2sas_base_put_smid_hi_priority(ioc, smid); +out: + if (sas_device) + sas_device_put(sas_device); } @@ -4068,7 +4175,6 @@ _scsih_scsi_ioc_info(struct MPT2SAS_ADAPTER *ioc, struct scsi_cmnd *scmd, char *desc_scsi_state = ioc->tmp_string; u32 log_info = le32_to_cpu(mpi_reply->IOCLogInfo); struct _sas_device *sas_device = NULL; - unsigned long flags; struct scsi_target *starget = scmd->device->sdev_target; struct MPT2SAS_TARGET *priv_target = starget->hostdata; char *device_str = NULL; @@ -4200,9 +4306,7 @@ _scsih_scsi_ioc_info(struct MPT2SAS_ADAPTER *ioc, struct scsi_cmnd *scmd, printk(MPT2SAS_WARN_FMT "\t%s wwid(0x%016llx)\n", ioc->name, device_str, (unsigned long long)priv_target->sas_address); } else { - spin_lock_irqsave(&ioc->sas_device_lock, flags); - sas_device = mpt2sas_scsih_sas_device_find_by_sas_address(ioc, - priv_target->sas_address); + sas_device = mpt2sas_get_sdev_from_target(ioc, priv_target); if (sas_device) { printk(MPT2SAS_WARN_FMT "\tsas_address(0x%016llx), " "phy(%d)\n", ioc->name, sas_device->sas_address, @@ -4211,8 +4315,9 @@ _scsih_scsi_ioc_info(struct MPT2SAS_ADAPTER *ioc, struct scsi_cmnd *scmd, "\tenclosure_logical_id(0x%016llx), slot(%d)\n", ioc->name, sas_device->enclosure_logical_id, sas_device->slot); + + sas_device_put(sas_device); } - spin_unlock_irqrestore(&ioc->sas_device_lock, flags); } printk(MPT2SAS_WARN_FMT "\thandle(0x%04x), ioc_status(%s)(0x%04x), " @@ -4259,7 +4364,7 @@ _scsih_turn_on_pfa_led(struct MPT2SAS_ADAPTER *ioc, u16 handle) Mpi2SepRequest_t mpi_request; struct _sas_device *sas_device; - sas_device = _scsih_sas_device_find_by_handle(ioc, handle); + sas_device = mpt2sas_get_sdev_by_handle(ioc, handle); if (!sas_device) return; @@ -4274,7 +4379,7 @@ _scsih_turn_on_pfa_led(struct MPT2SAS_ADAPTER *ioc, u16 handle) &mpi_request)) != 0) { printk(MPT2SAS_ERR_FMT "failure at %s:%d/%s()!\n", ioc->name, __FILE__, __LINE__, __func__); - return; + goto out; } sas_device->pfa_led_on = 1; @@ -4284,8 +4389,10 @@ _scsih_turn_on_pfa_led(struct MPT2SAS_ADAPTER *ioc, u16 handle) "enclosure_processor: ioc_status (0x%04x), loginfo(0x%08x)\n", ioc->name, le16_to_cpu(mpi_reply.IOCStatus), le32_to_cpu(mpi_reply.IOCLogInfo))); - return; + goto out; } +out: + sas_device_put(sas_device); } /** @@ -4370,19 +4477,17 @@ _scsih_smart_predicted_fault(struct MPT2SAS_ADAPTER *ioc, u16 handle) /* only handle non-raid devices */ spin_lock_irqsave(&ioc->sas_device_lock, flags); - sas_device = _scsih_sas_device_find_by_handle(ioc, handle); + sas_device = __mpt2sas_get_sdev_by_handle(ioc, handle); if (!sas_device) { - spin_unlock_irqrestore(&ioc->sas_device_lock, flags); - return; + goto out_unlock; } starget = sas_device->starget; sas_target_priv_data = starget->hostdata; if ((sas_target_priv_data->flags & MPT_TARGET_FLAGS_RAID_COMPONENT) || - ((sas_target_priv_data->flags & MPT_TARGET_FLAGS_VOLUME))) { - spin_unlock_irqrestore(&ioc->sas_device_lock, flags); - return; - } + ((sas_target_priv_data->flags & MPT_TARGET_FLAGS_VOLUME))) + goto out_unlock; + starget_printk(KERN_WARNING, starget, "predicted fault\n"); spin_unlock_irqrestore(&ioc->sas_device_lock, flags); @@ -4396,7 +4501,7 @@ _scsih_smart_predicted_fault(struct MPT2SAS_ADAPTER *ioc, u16 handle) if (!event_reply) { printk(MPT2SAS_ERR_FMT "failure at %s:%d/%s()!\n", ioc->name, __FILE__, __LINE__, __func__); - return; + goto out; } event_reply->Function = MPI2_FUNCTION_EVENT_NOTIFICATION; @@ -4413,6 +4518,14 @@ _scsih_smart_predicted_fault(struct MPT2SAS_ADAPTER *ioc, u16 handle) event_data->SASAddress = cpu_to_le64(sas_target_priv_data->sas_address); mpt2sas_ctl_add_to_event_log(ioc, event_reply); kfree(event_reply); +out: + if (sas_device) + sas_device_put(sas_device); + return; + +out_unlock: + spin_unlock_irqrestore(&ioc->sas_device_lock, flags); + goto out; } /** @@ -5148,14 +5261,13 @@ _scsih_check_device(struct MPT2SAS_ADAPTER *ioc, u16 handle) spin_lock_irqsave(&ioc->sas_device_lock, flags); sas_address = le64_to_cpu(sas_device_pg0.SASAddress); - sas_device = mpt2sas_scsih_sas_device_find_by_sas_address(ioc, + sas_device = __mpt2sas_get_sdev_by_addr(ioc, sas_address); if (!sas_device) { printk(MPT2SAS_ERR_FMT "device is not present " "handle(0x%04x), no sas_device!!!\n", ioc->name, handle); - spin_unlock_irqrestore(&ioc->sas_device_lock, flags); - return; + goto out_unlock; } if (unlikely(sas_device->handle != handle)) { @@ -5172,19 +5284,24 @@ _scsih_check_device(struct MPT2SAS_ADAPTER *ioc, u16 handle) MPI2_SAS_DEVICE0_FLAGS_DEVICE_PRESENT)) { printk(MPT2SAS_ERR_FMT "device is not present " "handle(0x%04x), flags!!!\n", ioc->name, handle); - spin_unlock_irqrestore(&ioc->sas_device_lock, flags); - return; + goto out_unlock; } /* check if there were any issues with discovery */ if (_scsih_check_access_status(ioc, sas_address, handle, - sas_device_pg0.AccessStatus)) { - spin_unlock_irqrestore(&ioc->sas_device_lock, flags); - return; - } + sas_device_pg0.AccessStatus)) + goto out_unlock; + spin_unlock_irqrestore(&ioc->sas_device_lock, flags); _scsih_ublock_io_device(ioc, sas_address); + if (sas_device) + sas_device_put(sas_device); + return; +out_unlock: + spin_unlock_irqrestore(&ioc->sas_device_lock, flags); + if (sas_device) + sas_device_put(sas_device); } /** @@ -5208,7 +5325,6 @@ _scsih_add_device(struct MPT2SAS_ADAPTER *ioc, u16 handle, u8 phy_num, u8 is_pd) u32 ioc_status; __le64 sas_address; u32 device_info; - unsigned long flags; if ((mpt2sas_config_get_sas_device_pg0(ioc, &mpi_reply, &sas_device_pg0, MPI2_SAS_DEVICE_PGAD_FORM_HANDLE, handle))) { @@ -5250,14 +5366,13 @@ _scsih_add_device(struct MPT2SAS_ADAPTER *ioc, u16 handle, u8 phy_num, u8 is_pd) return -1; } - - spin_lock_irqsave(&ioc->sas_device_lock, flags); - sas_device = mpt2sas_scsih_sas_device_find_by_sas_address(ioc, + sas_device = mpt2sas_get_sdev_by_addr(ioc, sas_address); - spin_unlock_irqrestore(&ioc->sas_device_lock, flags); - if (sas_device) + if (sas_device) { + sas_device_put(sas_device); return 0; + } sas_device = kzalloc(sizeof(struct _sas_device), GFP_KERNEL); @@ -5267,6 +5382,7 @@ _scsih_add_device(struct MPT2SAS_ADAPTER *ioc, u16 handle, u8 phy_num, u8 is_pd) return -1; } + kref_init(&sas_device->refcount); sas_device->handle = handle; if (_scsih_get_sas_address(ioc, le16_to_cpu (sas_device_pg0.ParentDevHandle), @@ -5296,6 +5412,7 @@ _scsih_add_device(struct MPT2SAS_ADAPTER *ioc, u16 handle, u8 phy_num, u8 is_pd) else _scsih_sas_device_add(ioc, sas_device); + sas_device_put(sas_device); return 0; } @@ -5344,7 +5461,6 @@ _scsih_remove_device(struct MPT2SAS_ADAPTER *ioc, "handle(0x%04x), sas_addr(0x%016llx)\n", ioc->name, __func__, sas_device->handle, (unsigned long long) sas_device->sas_address)); - kfree(sas_device); } /** * _scsih_device_remove_by_handle - removing device object by handle @@ -5363,12 +5479,17 @@ _scsih_device_remove_by_handle(struct MPT2SAS_ADAPTER *ioc, u16 handle) return; spin_lock_irqsave(&ioc->sas_device_lock, flags); - sas_device = _scsih_sas_device_find_by_handle(ioc, handle); - if (sas_device) - list_del(&sas_device->list); + sas_device = __mpt2sas_get_sdev_by_handle(ioc, handle); + if (sas_device) { + list_del_init(&sas_device->list); + sas_device_put(sas_device); + } spin_unlock_irqrestore(&ioc->sas_device_lock, flags); - if (sas_device) + + if (sas_device) { _scsih_remove_device(ioc, sas_device); + sas_device_put(sas_device); + } } /** @@ -5389,13 +5510,17 @@ mpt2sas_device_remove_by_sas_address(struct MPT2SAS_ADAPTER *ioc, return; spin_lock_irqsave(&ioc->sas_device_lock, flags); - sas_device = mpt2sas_scsih_sas_device_find_by_sas_address(ioc, - sas_address); - if (sas_device) - list_del(&sas_device->list); + sas_device = __mpt2sas_get_sdev_by_addr(ioc, sas_address); + if (sas_device) { + list_del_init(&sas_device->list); + sas_device_put(sas_device); + } spin_unlock_irqrestore(&ioc->sas_device_lock, flags); - if (sas_device) + + if (sas_device) { _scsih_remove_device(ioc, sas_device); + sas_device_put(sas_device); + } } #ifdef CONFIG_SCSI_MPT2SAS_LOGGING /** @@ -5716,26 +5841,28 @@ _scsih_sas_device_status_change_event(struct MPT2SAS_ADAPTER *ioc, spin_lock_irqsave(&ioc->sas_device_lock, flags); sas_address = le64_to_cpu(event_data->SASAddress); - sas_device = mpt2sas_scsih_sas_device_find_by_sas_address(ioc, + sas_device = __mpt2sas_get_sdev_by_addr(ioc, sas_address); - if (!sas_device || !sas_device->starget) { - spin_unlock_irqrestore(&ioc->sas_device_lock, flags); - return; - } + if (!sas_device || !sas_device->starget) + goto out; target_priv_data = sas_device->starget->hostdata; - if (!target_priv_data) { - spin_unlock_irqrestore(&ioc->sas_device_lock, flags); - return; - } + if (!target_priv_data) + goto out; if (event_data->ReasonCode == MPI2_EVENT_SAS_DEV_STAT_RC_INTERNAL_DEVICE_RESET) target_priv_data->tm_busy = 1; else target_priv_data->tm_busy = 0; + +out: + if (sas_device) + sas_device_put(sas_device); + spin_unlock_irqrestore(&ioc->sas_device_lock, flags); + } #ifdef CONFIG_SCSI_MPT2SAS_LOGGING @@ -6123,7 +6250,7 @@ _scsih_sas_pd_expose(struct MPT2SAS_ADAPTER *ioc, u16 handle = le16_to_cpu(element->PhysDiskDevHandle); spin_lock_irqsave(&ioc->sas_device_lock, flags); - sas_device = _scsih_sas_device_find_by_handle(ioc, handle); + sas_device = __mpt2sas_get_sdev_by_handle(ioc, handle); if (sas_device) { sas_device->volume_handle = 0; sas_device->volume_wwid = 0; @@ -6142,6 +6269,8 @@ _scsih_sas_pd_expose(struct MPT2SAS_ADAPTER *ioc, /* exposing raid component */ if (starget) starget_for_each_device(starget, NULL, _scsih_reprobe_lun); + + sas_device_put(sas_device); } /** @@ -6170,7 +6299,7 @@ _scsih_sas_pd_hide(struct MPT2SAS_ADAPTER *ioc, &volume_wwid); spin_lock_irqsave(&ioc->sas_device_lock, flags); - sas_device = _scsih_sas_device_find_by_handle(ioc, handle); + sas_device = __mpt2sas_get_sdev_by_handle(ioc, handle); if (sas_device) { set_bit(handle, ioc->pd_handles); if (sas_device->starget && sas_device->starget->hostdata) { @@ -6189,6 +6318,8 @@ _scsih_sas_pd_hide(struct MPT2SAS_ADAPTER *ioc, /* hiding raid component */ if (starget) starget_for_each_device(starget, (void *)1, _scsih_reprobe_lun); + + sas_device_put(sas_device); } /** @@ -6221,7 +6352,6 @@ _scsih_sas_pd_add(struct MPT2SAS_ADAPTER *ioc, Mpi2EventIrConfigElement_t *element) { struct _sas_device *sas_device; - unsigned long flags; u16 handle = le16_to_cpu(element->PhysDiskDevHandle); Mpi2ConfigReply_t mpi_reply; Mpi2SasDevicePage0_t sas_device_pg0; @@ -6231,11 +6361,11 @@ _scsih_sas_pd_add(struct MPT2SAS_ADAPTER *ioc, set_bit(handle, ioc->pd_handles); - spin_lock_irqsave(&ioc->sas_device_lock, flags); - sas_device = _scsih_sas_device_find_by_handle(ioc, handle); - spin_unlock_irqrestore(&ioc->sas_device_lock, flags); - if (sas_device) + sas_device = mpt2sas_get_sdev_by_handle(ioc, handle); + if (sas_device) { + sas_device_put(sas_device); return; + } if ((mpt2sas_config_get_sas_device_pg0(ioc, &mpi_reply, &sas_device_pg0, MPI2_SAS_DEVICE_PGAD_FORM_HANDLE, handle))) { @@ -6509,7 +6639,6 @@ _scsih_sas_ir_physical_disk_event(struct MPT2SAS_ADAPTER *ioc, u16 handle, parent_handle; u32 state; struct _sas_device *sas_device; - unsigned long flags; Mpi2ConfigReply_t mpi_reply; Mpi2SasDevicePage0_t sas_device_pg0; u32 ioc_status; @@ -6542,12 +6671,11 @@ _scsih_sas_ir_physical_disk_event(struct MPT2SAS_ADAPTER *ioc, if (!ioc->is_warpdrive) set_bit(handle, ioc->pd_handles); - spin_lock_irqsave(&ioc->sas_device_lock, flags); - sas_device = _scsih_sas_device_find_by_handle(ioc, handle); - spin_unlock_irqrestore(&ioc->sas_device_lock, flags); - - if (sas_device) + sas_device = mpt2sas_get_sdev_by_handle(ioc, handle); + if (sas_device) { + sas_device_put(sas_device); return; + } if ((mpt2sas_config_get_sas_device_pg0(ioc, &mpi_reply, &sas_device_pg0, MPI2_SAS_DEVICE_PGAD_FORM_HANDLE, @@ -7015,6 +7143,7 @@ _scsih_remove_unresponding_sas_devices(struct MPT2SAS_ADAPTER *ioc) struct _raid_device *raid_device, *raid_device_next; struct list_head tmp_list; unsigned long flags; + LIST_HEAD(head); printk(MPT2SAS_INFO_FMT "removing unresponding devices: start\n", ioc->name); @@ -7022,14 +7151,29 @@ _scsih_remove_unresponding_sas_devices(struct MPT2SAS_ADAPTER *ioc) /* removing unresponding end devices */ printk(MPT2SAS_INFO_FMT "removing unresponding devices: end-devices\n", ioc->name); + + /* + * Iterate, pulling off devices marked as non-responding. We become the + * owner for the reference the list had on any object we prune. + */ + spin_lock_irqsave(&ioc->sas_device_lock, flags); list_for_each_entry_safe(sas_device, sas_device_next, - &ioc->sas_device_list, list) { + &ioc->sas_device_list, list) { if (!sas_device->responding) - mpt2sas_device_remove_by_sas_address(ioc, - sas_device->sas_address); + list_move_tail(&sas_device->list, &head); else sas_device->responding = 0; } + spin_unlock_irqrestore(&ioc->sas_device_lock, flags); + + /* + * Now, uninitialize and remove the unresponding devices we pruned. + */ + list_for_each_entry_safe(sas_device, sas_device_next, &head, list) { + _scsih_remove_device(ioc, sas_device); + list_del_init(&sas_device->list); + sas_device_put(sas_device); + } /* removing unresponding volumes */ if (ioc->ir_firmware) { @@ -7179,11 +7323,11 @@ _scsih_scan_for_devices_after_reset(struct MPT2SAS_ADAPTER *ioc) } phys_disk_num = pd_pg0.PhysDiskNum; handle = le16_to_cpu(pd_pg0.DevHandle); - spin_lock_irqsave(&ioc->sas_device_lock, flags); - sas_device = _scsih_sas_device_find_by_handle(ioc, handle); - spin_unlock_irqrestore(&ioc->sas_device_lock, flags); - if (sas_device) + sas_device = mpt2sas_get_sdev_by_handle(ioc, handle); + if (sas_device) { + sas_device_put(sas_device); continue; + } if (mpt2sas_config_get_sas_device_pg0(ioc, &mpi_reply, &sas_device_pg0, MPI2_SAS_DEVICE_PGAD_FORM_HANDLE, handle) != 0) @@ -7302,12 +7446,12 @@ _scsih_scan_for_devices_after_reset(struct MPT2SAS_ADAPTER *ioc) if (!(_scsih_is_end_device( le32_to_cpu(sas_device_pg0.DeviceInfo)))) continue; - spin_lock_irqsave(&ioc->sas_device_lock, flags); - sas_device = mpt2sas_scsih_sas_device_find_by_sas_address(ioc, + sas_device = mpt2sas_get_sdev_by_addr(ioc, le64_to_cpu(sas_device_pg0.SASAddress)); - spin_unlock_irqrestore(&ioc->sas_device_lock, flags); - if (sas_device) + if (sas_device) { + sas_device_put(sas_device); continue; + } parent_handle = le16_to_cpu(sas_device_pg0.ParentDevHandle); if (!_scsih_get_sas_address(ioc, parent_handle, &sas_address)) { printk(MPT2SAS_INFO_FMT "\tBEFORE adding end device: " @@ -7966,6 +8110,48 @@ _scsih_probe_raid(struct MPT2SAS_ADAPTER *ioc) } } +static struct _sas_device *get_next_sas_device(struct MPT2SAS_ADAPTER *ioc) +{ + struct _sas_device *sas_device = NULL; + unsigned long flags; + + spin_lock_irqsave(&ioc->sas_device_lock, flags); + if (!list_empty(&ioc->sas_device_init_list)) { + sas_device = list_first_entry(&ioc->sas_device_init_list, + struct _sas_device, list); + sas_device_get(sas_device); + } + spin_unlock_irqrestore(&ioc->sas_device_lock, flags); + + return sas_device; +} + +static void sas_device_make_active(struct MPT2SAS_ADAPTER *ioc, + struct _sas_device *sas_device) +{ + unsigned long flags; + + spin_lock_irqsave(&ioc->sas_device_lock, flags); + + /* + * Since we dropped the lock during the call to port_add(), we need to + * be careful here that somebody else didn't move or delete this item + * while we were busy with other things. + * + * If it was on the list, we need a put() for the reference the list + * had. Either way, we need a get() for the destination list. + */ + if (!list_empty(&sas_device->list)) { + list_del_init(&sas_device->list); + sas_device_put(sas_device); + } + + sas_device_get(sas_device); + list_add_tail(&sas_device->list, &ioc->sas_device_list); + + spin_unlock_irqrestore(&ioc->sas_device_lock, flags); +} + /** * _scsih_probe_sas - reporting sas devices to sas transport * @ioc: per adapter object @@ -7975,34 +8161,30 @@ _scsih_probe_raid(struct MPT2SAS_ADAPTER *ioc) static void _scsih_probe_sas(struct MPT2SAS_ADAPTER *ioc) { - struct _sas_device *sas_device, *next; - unsigned long flags; - - /* SAS Device List */ - list_for_each_entry_safe(sas_device, next, &ioc->sas_device_init_list, - list) { + struct _sas_device *sas_device; - if (ioc->hide_drives) - continue; + if (ioc->hide_drives) + return; + while ((sas_device = get_next_sas_device(ioc))) { if (!mpt2sas_transport_port_add(ioc, sas_device->handle, - sas_device->sas_address_parent)) { - list_del(&sas_device->list); - kfree(sas_device); + sas_device->sas_address_parent)) { + _scsih_sas_device_remove(ioc, sas_device); + sas_device_put(sas_device); continue; } else if (!sas_device->starget) { if (!ioc->is_driver_loading) { mpt2sas_transport_port_remove(ioc, - sas_device->sas_address, - sas_device->sas_address_parent); - list_del(&sas_device->list); - kfree(sas_device); + sas_device->sas_address, + sas_device->sas_address_parent); + _scsih_sas_device_remove(ioc, sas_device); + sas_device_put(sas_device); continue; } } - spin_lock_irqsave(&ioc->sas_device_lock, flags); - list_move_tail(&sas_device->list, &ioc->sas_device_list); - spin_unlock_irqrestore(&ioc->sas_device_lock, flags); + + sas_device_make_active(ioc, sas_device); + sas_device_put(sas_device); } } -- cgit v1.2.3 From 008549f6e8a1dc4aeea4a8d64184909786b27713 Mon Sep 17 00:00:00 2001 From: Calvin Owens Date: Thu, 13 Aug 2015 18:48:10 -0700 Subject: mpt2sas: Refcount fw_events and fix unsafe list usage The fw_event_work struct is concurrently referenced at shutdown, so add a refcount to protect it, and refactor the code to use it. Additionally, refactor _scsih_fw_event_cleanup_queue() such that it no longer iterates over the list without holding the lock, since _firmware_event_work() concurrently deletes items from the list. Signed-off-by: Calvin Owens Reviewed-by: Christoph Hellwig Reviewed-by: Nicholas Bellinger Tested-by: Chaitra Basappa Acked-by: Sreekanth Reddy Signed-off-by: James Bottomley --- drivers/scsi/mpt2sas/mpt2sas_scsih.c | 112 ++++++++++++++++++++++++++++------- 1 file changed, 91 insertions(+), 21 deletions(-) (limited to 'drivers/scsi/mpt2sas/mpt2sas_scsih.c') diff --git a/drivers/scsi/mpt2sas/mpt2sas_scsih.c b/drivers/scsi/mpt2sas/mpt2sas_scsih.c index 5eca3a4a9a7a..c0ff55b0d3cc 100644 --- a/drivers/scsi/mpt2sas/mpt2sas_scsih.c +++ b/drivers/scsi/mpt2sas/mpt2sas_scsih.c @@ -176,9 +176,37 @@ struct fw_event_work { u8 VP_ID; u8 ignore; u16 event; + struct kref refcount; char event_data[0] __aligned(4); }; +static void fw_event_work_free(struct kref *r) +{ + kfree(container_of(r, struct fw_event_work, refcount)); +} + +static void fw_event_work_get(struct fw_event_work *fw_work) +{ + kref_get(&fw_work->refcount); +} + +static void fw_event_work_put(struct fw_event_work *fw_work) +{ + kref_put(&fw_work->refcount, fw_event_work_free); +} + +static struct fw_event_work *alloc_fw_event_work(int len) +{ + struct fw_event_work *fw_event; + + fw_event = kzalloc(sizeof(*fw_event) + len, GFP_ATOMIC); + if (!fw_event) + return NULL; + + kref_init(&fw_event->refcount); + return fw_event; +} + /* raid transport support */ static struct raid_template *mpt2sas_raid_template; @@ -2872,36 +2900,39 @@ _scsih_fw_event_add(struct MPT2SAS_ADAPTER *ioc, struct fw_event_work *fw_event) return; spin_lock_irqsave(&ioc->fw_event_lock, flags); + fw_event_work_get(fw_event); list_add_tail(&fw_event->list, &ioc->fw_event_list); INIT_DELAYED_WORK(&fw_event->delayed_work, _firmware_event_work); + fw_event_work_get(fw_event); queue_delayed_work(ioc->firmware_event_thread, &fw_event->delayed_work, 0); spin_unlock_irqrestore(&ioc->fw_event_lock, flags); } /** - * _scsih_fw_event_free - delete fw_event + * _scsih_fw_event_del_from_list - delete fw_event from the list * @ioc: per adapter object * @fw_event: object describing the event * Context: This function will acquire ioc->fw_event_lock. * - * This removes firmware event object from link list, frees associated memory. + * If the fw_event is on the fw_event_list, remove it and do a put. * * Return nothing. */ static void -_scsih_fw_event_free(struct MPT2SAS_ADAPTER *ioc, struct fw_event_work +_scsih_fw_event_del_from_list(struct MPT2SAS_ADAPTER *ioc, struct fw_event_work *fw_event) { unsigned long flags; spin_lock_irqsave(&ioc->fw_event_lock, flags); - list_del(&fw_event->list); - kfree(fw_event); + if (!list_empty(&fw_event->list)) { + list_del_init(&fw_event->list); + fw_event_work_put(fw_event); + } spin_unlock_irqrestore(&ioc->fw_event_lock, flags); } - /** * _scsih_error_recovery_delete_devices - remove devices not responding * @ioc: per adapter object @@ -2916,13 +2947,14 @@ _scsih_error_recovery_delete_devices(struct MPT2SAS_ADAPTER *ioc) if (ioc->is_driver_loading) return; - fw_event = kzalloc(sizeof(struct fw_event_work), GFP_ATOMIC); + fw_event = alloc_fw_event_work(0); if (!fw_event) return; fw_event->event = MPT2SAS_REMOVE_UNRESPONDING_DEVICES; fw_event->ioc = ioc; _scsih_fw_event_add(ioc, fw_event); + fw_event_work_put(fw_event); } /** @@ -2936,12 +2968,29 @@ mpt2sas_port_enable_complete(struct MPT2SAS_ADAPTER *ioc) { struct fw_event_work *fw_event; - fw_event = kzalloc(sizeof(struct fw_event_work), GFP_ATOMIC); + fw_event = alloc_fw_event_work(0); if (!fw_event) return; fw_event->event = MPT2SAS_PORT_ENABLE_COMPLETE; fw_event->ioc = ioc; _scsih_fw_event_add(ioc, fw_event); + fw_event_work_put(fw_event); +} + +static struct fw_event_work *dequeue_next_fw_event(struct MPT2SAS_ADAPTER *ioc) +{ + unsigned long flags; + struct fw_event_work *fw_event = NULL; + + spin_lock_irqsave(&ioc->fw_event_lock, flags); + if (!list_empty(&ioc->fw_event_list)) { + fw_event = list_first_entry(&ioc->fw_event_list, + struct fw_event_work, list); + list_del_init(&fw_event->list); + } + spin_unlock_irqrestore(&ioc->fw_event_lock, flags); + + return fw_event; } /** @@ -2956,17 +3005,25 @@ mpt2sas_port_enable_complete(struct MPT2SAS_ADAPTER *ioc) static void _scsih_fw_event_cleanup_queue(struct MPT2SAS_ADAPTER *ioc) { - struct fw_event_work *fw_event, *next; + struct fw_event_work *fw_event; if (list_empty(&ioc->fw_event_list) || !ioc->firmware_event_thread || in_interrupt()) return; - list_for_each_entry_safe(fw_event, next, &ioc->fw_event_list, list) { - if (cancel_delayed_work_sync(&fw_event->delayed_work)) { - _scsih_fw_event_free(ioc, fw_event); - continue; - } + while ((fw_event = dequeue_next_fw_event(ioc))) { + /* + * Wait on the fw_event to complete. If this returns 1, then + * the event was never executed, and we need a put for the + * reference the delayed_work had on the fw_event. + * + * If it did execute, we wait for it to finish, and the put will + * happen from _firmware_event_work() + */ + if (cancel_delayed_work_sync(&fw_event->delayed_work)) + fw_event_work_put(fw_event); + + fw_event_work_put(fw_event); } } @@ -4447,13 +4504,14 @@ _scsih_send_event_to_turn_on_pfa_led(struct MPT2SAS_ADAPTER *ioc, u16 handle) { struct fw_event_work *fw_event; - fw_event = kzalloc(sizeof(struct fw_event_work), GFP_ATOMIC); + fw_event = alloc_fw_event_work(0); if (!fw_event) return; fw_event->event = MPT2SAS_TURN_ON_PFA_LED; fw_event->device_handle = handle; fw_event->ioc = ioc; _scsih_fw_event_add(ioc, fw_event); + fw_event_work_put(fw_event); } /** @@ -7554,17 +7612,27 @@ _firmware_event_work(struct work_struct *work) struct fw_event_work, delayed_work.work); struct MPT2SAS_ADAPTER *ioc = fw_event->ioc; + _scsih_fw_event_del_from_list(ioc, fw_event); + /* the queue is being flushed so ignore this event */ - if (ioc->remove_host || - ioc->pci_error_recovery) { - _scsih_fw_event_free(ioc, fw_event); + if (ioc->remove_host || ioc->pci_error_recovery) { + fw_event_work_put(fw_event); return; } switch (fw_event->event) { case MPT2SAS_REMOVE_UNRESPONDING_DEVICES: - while (scsi_host_in_recovery(ioc->shost) || ioc->shost_recovery) + while (scsi_host_in_recovery(ioc->shost) || + ioc->shost_recovery) { + /* + * If we're unloading, bail. Otherwise, this can become + * an infinite loop. + */ + if (ioc->remove_host) + goto out; + ssleep(1); + } _scsih_remove_unresponding_sas_devices(ioc); _scsih_scan_for_devices_after_reset(ioc); break; @@ -7613,7 +7681,8 @@ _firmware_event_work(struct work_struct *work) _scsih_sas_ir_operation_status_event(ioc, fw_event); break; } - _scsih_fw_event_free(ioc, fw_event); +out: + fw_event_work_put(fw_event); } /** @@ -7751,7 +7820,7 @@ mpt2sas_scsih_event_callback(struct MPT2SAS_ADAPTER *ioc, u8 msix_index, } sz = le16_to_cpu(mpi_reply->EventDataLength) * 4; - fw_event = kzalloc(sizeof(*fw_event) + sz, GFP_ATOMIC); + fw_event = alloc_fw_event_work(sz); if (!fw_event) { printk(MPT2SAS_ERR_FMT "failure at %s:%d/%s()!\n", ioc->name, __FILE__, __LINE__, __func__); @@ -7764,6 +7833,7 @@ mpt2sas_scsih_event_callback(struct MPT2SAS_ADAPTER *ioc, u8 msix_index, fw_event->VP_ID = mpi_reply->VP_ID; fw_event->event = event; _scsih_fw_event_add(ioc, fw_event); + fw_event_work_put(fw_event); return; } -- cgit v1.2.3 From 6229b414b3adb3aac0b54e67d72d6462fc230c0d Mon Sep 17 00:00:00 2001 From: Nagarajkumar Narayanan Date: Tue, 18 Aug 2015 13:27:10 +0530 Subject: mpt2sas: setpci reset kernel oops fix mpt2sas: setpci reset on nytro warpdrive card along with sysfs access and cli ioctl access resulted in kernel oops 1. pci_access_mutex lock added to provide synchronization between IOCTL, sysfs, PCI resource handling path 2. gioc_lock spinlock to protect list operations over multiple controllers >From c53a1cff4c07528b8b9ec7f6716e94950283e8f9 Mon Sep 17 00:00:00 2001 From: Nagarajkumar Narayanan Date: Tue, 18 Aug 2015 11:58:13 +0530 Subject: [PATCH] mpt2sas setpci reset oops fix In mpt2sas driver due to lack of synchronization between ioctl, BRM status access through sysfs, pci resource removal kernel oops happen as ioctl path and BRM status sysfs access path still tries to access the removed resources Two locks added to provide syncrhonization 1. pci_access_mutex: Mutex to synchronize ioctl,sysfs show path and pci resource handling. PCI resource freeing will lead to free vital hardware/memory resource, which might be in use by cli/sysfs path functions resulting in Null pointer reference followed by kernel crash. To avoid the above race condition we use mutex syncrhonization which ensures the syncrhonization between cli/sysfs_show path Note: pci_access_mutex is used only if nytro warpdrive cards (ioc->is_warpdrive based on device id) are used as we could not test this case with other SAS2 HBA cards We can remove this check if this behaviour confirmed from other cards. 2. spinlock on list operations over IOCs Case: when multiple warpdrive cards(IOCs) are in use Each IOC will added to the ioc list stucture on initialization. Watchdog threads run at regular intervals to check IOC for any fault conditions which will trigger the dead_ioc thread to deallocate pci resource, resulting deleting the IOC netry from list, this deletion need to protected by spinlock to enusre that ioc removal is syncrhonized, if not synchronized it might lead to list_del corruption as the ioc list is traversed in cli path Signed-off-by: Nagarajkumar Narayanan Reviewed-by: Johannes Thumshirn Acked-by: Sreekanth Reddy Signed-off-by: James Bottomley --- drivers/scsi/mpt2sas/mpt2sas_base.c | 6 ++++++ drivers/scsi/mpt2sas/mpt2sas_base.h | 19 +++++++++++++++++- drivers/scsi/mpt2sas/mpt2sas_ctl.c | 38 +++++++++++++++++++++++++++++------- drivers/scsi/mpt2sas/mpt2sas_scsih.c | 13 +++++++++++- 4 files changed, 67 insertions(+), 9 deletions(-) (limited to 'drivers/scsi/mpt2sas/mpt2sas_scsih.c') diff --git a/drivers/scsi/mpt2sas/mpt2sas_base.c b/drivers/scsi/mpt2sas/mpt2sas_base.c index 6dec7cff316f..c167911221e9 100644 --- a/drivers/scsi/mpt2sas/mpt2sas_base.c +++ b/drivers/scsi/mpt2sas/mpt2sas_base.c @@ -112,9 +112,12 @@ _scsih_set_fwfault_debug(const char *val, struct kernel_param *kp) if (ret) return ret; + /* global ioc spinlock to protect controller list on list operations */ printk(KERN_INFO "setting fwfault_debug(%d)\n", mpt2sas_fwfault_debug); + spin_lock(&gioc_lock); list_for_each_entry(ioc, &mpt2sas_ioc_list, list) ioc->fwfault_debug = mpt2sas_fwfault_debug; + spin_unlock(&gioc_lock); return 0; } @@ -4437,6 +4440,8 @@ mpt2sas_base_free_resources(struct MPT2SAS_ADAPTER *ioc) dexitprintk(ioc, printk(MPT2SAS_INFO_FMT "%s\n", ioc->name, __func__)); + /* synchronizing freeing resource with pci_access_mutex lock */ + mutex_lock(&ioc->pci_access_mutex); if (ioc->chip_phys && ioc->chip) { _base_mask_interrupts(ioc); ioc->shost_recovery = 1; @@ -4456,6 +4461,7 @@ mpt2sas_base_free_resources(struct MPT2SAS_ADAPTER *ioc) pci_disable_pcie_error_reporting(pdev); pci_disable_device(pdev); } + mutex_unlock(&ioc->pci_access_mutex); return; } diff --git a/drivers/scsi/mpt2sas/mpt2sas_base.h b/drivers/scsi/mpt2sas/mpt2sas_base.h index 78f41aca9598..97ea360c6920 100644 --- a/drivers/scsi/mpt2sas/mpt2sas_base.h +++ b/drivers/scsi/mpt2sas/mpt2sas_base.h @@ -817,6 +817,12 @@ typedef void (*MPT2SAS_FLUSH_RUNNING_CMDS)(struct MPT2SAS_ADAPTER *ioc); * @delayed_tr_list: target reset link list * @delayed_tr_volume_list: volume target reset link list * @@temp_sensors_count: flag to carry the number of temperature sensors + * @pci_access_mutex: Mutex to synchronize ioctl,sysfs show path and + * pci resource handling. PCI resource freeing will lead to free + * vital hardware/memory resource, which might be in use by cli/sysfs + * path functions resulting in Null pointer reference followed by kernel + * crash. To avoid the above race condition we use mutex syncrhonization + * which ensures the syncrhonization between cli/sysfs_show path */ struct MPT2SAS_ADAPTER { struct list_head list; @@ -1033,6 +1039,7 @@ struct MPT2SAS_ADAPTER { u8 mfg_pg10_hide_flag; u8 hide_drives; + struct mutex pci_access_mutex; }; typedef u8 (*MPT_CALLBACK)(struct MPT2SAS_ADAPTER *ioc, u16 smid, u8 msix_index, @@ -1041,6 +1048,17 @@ typedef u8 (*MPT_CALLBACK)(struct MPT2SAS_ADAPTER *ioc, u16 smid, u8 msix_index, /* base shared API */ extern struct list_head mpt2sas_ioc_list; +/* spinlock on list operations over IOCs + * Case: when multiple warpdrive cards(IOCs) are in use + * Each IOC will added to the ioc list stucture on initialization. + * Watchdog threads run at regular intervals to check IOC for any + * fault conditions which will trigger the dead_ioc thread to + * deallocate pci resource, resulting deleting the IOC netry from list, + * this deletion need to protected by spinlock to enusre that + * ioc removal is syncrhonized, if not synchronized it might lead to + * list_del corruption as the ioc list is traversed in cli path + */ +extern spinlock_t gioc_lock; void mpt2sas_base_start_watchdog(struct MPT2SAS_ADAPTER *ioc); void mpt2sas_base_stop_watchdog(struct MPT2SAS_ADAPTER *ioc); @@ -1119,7 +1137,6 @@ struct _sas_device *__mpt2sas_get_sdev_by_addr( struct MPT2SAS_ADAPTER *ioc, u64 sas_address); void mpt2sas_port_enable_complete(struct MPT2SAS_ADAPTER *ioc); - void mpt2sas_scsih_reset_handler(struct MPT2SAS_ADAPTER *ioc, int reset_phase); /* config shared API */ diff --git a/drivers/scsi/mpt2sas/mpt2sas_ctl.c b/drivers/scsi/mpt2sas/mpt2sas_ctl.c index 4e509604b571..3694b63bd993 100644 --- a/drivers/scsi/mpt2sas/mpt2sas_ctl.c +++ b/drivers/scsi/mpt2sas/mpt2sas_ctl.c @@ -427,13 +427,16 @@ static int _ctl_verify_adapter(int ioc_number, struct MPT2SAS_ADAPTER **iocpp) { struct MPT2SAS_ADAPTER *ioc; - + /* global ioc lock to protect controller on list operations */ + spin_lock(&gioc_lock); list_for_each_entry(ioc, &mpt2sas_ioc_list, list) { if (ioc->id != ioc_number) continue; + spin_unlock(&gioc_lock); *iocpp = ioc; return ioc_number; } + spin_unlock(&gioc_lock); *iocpp = NULL; return -1; } @@ -522,10 +525,15 @@ _ctl_poll(struct file *filep, poll_table *wait) poll_wait(filep, &ctl_poll_wait, wait); + /* global ioc lock to protect controller on list operations */ + spin_lock(&gioc_lock); list_for_each_entry(ioc, &mpt2sas_ioc_list, list) { - if (ioc->aen_event_read_flag) + if (ioc->aen_event_read_flag) { + spin_unlock(&gioc_lock); return POLLIN | POLLRDNORM; + } } + spin_unlock(&gioc_lock); return 0; } @@ -2168,16 +2176,23 @@ _ctl_ioctl_main(struct file *file, unsigned int cmd, void __user *arg, if (_ctl_verify_adapter(ioctl_header.ioc_number, &ioc) == -1 || !ioc) return -ENODEV; + /* pci_access_mutex lock acquired by ioctl path */ + mutex_lock(&ioc->pci_access_mutex); if (ioc->shost_recovery || ioc->pci_error_recovery || - ioc->is_driver_loading) - return -EAGAIN; + ioc->is_driver_loading || ioc->remove_host) { + ret = -EAGAIN; + goto out_unlock_pciaccess; + } state = (file->f_flags & O_NONBLOCK) ? NON_BLOCKING : BLOCKING; if (state == NON_BLOCKING) { - if (!mutex_trylock(&ioc->ctl_cmds.mutex)) - return -EAGAIN; + if (!mutex_trylock(&ioc->ctl_cmds.mutex)) { + ret = -EAGAIN; + goto out_unlock_pciaccess; + } } else if (mutex_lock_interruptible(&ioc->ctl_cmds.mutex)) { - return -ERESTARTSYS; + ret = -ERESTARTSYS; + goto out_unlock_pciaccess; } switch (cmd) { @@ -2258,6 +2273,8 @@ _ctl_ioctl_main(struct file *file, unsigned int cmd, void __user *arg, } mutex_unlock(&ioc->ctl_cmds.mutex); +out_unlock_pciaccess: + mutex_unlock(&ioc->pci_access_mutex); return ret; } @@ -2711,6 +2728,12 @@ _ctl_BRM_status_show(struct device *cdev, struct device_attribute *attr, "warpdrive\n", ioc->name, __func__); goto out; } + /* pci_access_mutex lock acquired by sysfs show path */ + mutex_lock(&ioc->pci_access_mutex); + if (ioc->pci_error_recovery || ioc->remove_host) { + mutex_unlock(&ioc->pci_access_mutex); + return 0; + } /* allocate upto GPIOVal 36 entries */ sz = offsetof(Mpi2IOUnitPage3_t, GPIOVal) + (sizeof(u16) * 36); @@ -2749,6 +2772,7 @@ _ctl_BRM_status_show(struct device *cdev, struct device_attribute *attr, out: kfree(io_unit_pg3); + mutex_unlock(&ioc->pci_access_mutex); return rc; } static DEVICE_ATTR(BRM_status, S_IRUGO, _ctl_BRM_status_show, NULL); diff --git a/drivers/scsi/mpt2sas/mpt2sas_scsih.c b/drivers/scsi/mpt2sas/mpt2sas_scsih.c index c0ff55b0d3cc..0ad09b2bff9c 100644 --- a/drivers/scsi/mpt2sas/mpt2sas_scsih.c +++ b/drivers/scsi/mpt2sas/mpt2sas_scsih.c @@ -79,7 +79,8 @@ static int _scsih_scan_finished(struct Scsi_Host *shost, unsigned long time); /* global parameters */ LIST_HEAD(mpt2sas_ioc_list); - +/* global ioc lock for list operations */ +DEFINE_SPINLOCK(gioc_lock); /* local parameters */ static u8 scsi_io_cb_idx = -1; static u8 tm_cb_idx = -1; @@ -321,8 +322,10 @@ _scsih_set_debug_level(const char *val, struct kernel_param *kp) return ret; printk(KERN_INFO "setting logging_level(0x%08x)\n", logging_level); + spin_lock(&gioc_lock); list_for_each_entry(ioc, &mpt2sas_ioc_list, list) ioc->logging_level = logging_level; + spin_unlock(&gioc_lock); return 0; } module_param_call(logging_level, _scsih_set_debug_level, param_get_int, @@ -8081,7 +8084,9 @@ _scsih_remove(struct pci_dev *pdev) sas_remove_host(shost); scsi_remove_host(shost); mpt2sas_base_detach(ioc); + spin_lock(&gioc_lock); list_del(&ioc->list); + spin_unlock(&gioc_lock); scsi_host_put(shost); } @@ -8394,7 +8399,9 @@ _scsih_probe(struct pci_dev *pdev, const struct pci_device_id *id) ioc = shost_priv(shost); memset(ioc, 0, sizeof(struct MPT2SAS_ADAPTER)); INIT_LIST_HEAD(&ioc->list); + spin_lock(&gioc_lock); list_add_tail(&ioc->list, &mpt2sas_ioc_list); + spin_unlock(&gioc_lock); ioc->shost = shost; ioc->id = mpt_ids++; sprintf(ioc->name, "%s%d", MPT2SAS_DRIVER_NAME, ioc->id); @@ -8419,6 +8426,8 @@ _scsih_probe(struct pci_dev *pdev, const struct pci_device_id *id) ioc->schedule_dead_ioc_flush_running_cmds = &_scsih_flush_running_cmds; /* misc semaphores and spin locks */ mutex_init(&ioc->reset_in_progress_mutex); + /* initializing pci_access_mutex lock */ + mutex_init(&ioc->pci_access_mutex); spin_lock_init(&ioc->ioc_reset_in_progress_lock); spin_lock_init(&ioc->scsi_lookup_lock); spin_lock_init(&ioc->sas_device_lock); @@ -8521,7 +8530,9 @@ _scsih_probe(struct pci_dev *pdev, const struct pci_device_id *id) out_attach_fail: destroy_workqueue(ioc->firmware_event_thread); out_thread_fail: + spin_lock(&gioc_lock); list_del(&ioc->list); + spin_unlock(&gioc_lock); scsi_host_put(shost); return rv; } -- cgit v1.2.3