summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Documentation/ABI/testing/sysfs-bus-pci-devices-aer (renamed from Documentation/ABI/testing/sysfs-bus-pci-devices-aer_stats)44
-rw-r--r--Documentation/PCI/pcieaer-howto.rst17
-rw-r--r--Documentation/driver-api/driver-model/devres.rst3
-rw-r--r--drivers/block/mtip32xx/mtip32xx.c7
-rw-r--r--drivers/pci/devres.c215
-rw-r--r--drivers/pci/hotplug/pciehp_ctrl.c2
-rw-r--r--drivers/pci/iomap.c16
-rw-r--r--drivers/pci/pci-sysfs.c1
-rw-r--r--drivers/pci/pci.c61
-rw-r--r--drivers/pci/pci.h28
-rw-r--r--drivers/pci/pcie/aer.c442
-rw-r--r--drivers/pci/pcie/bwctrl.c86
-rw-r--r--drivers/pci/pcie/dpc.c75
-rw-r--r--drivers/pci/pcie/err.c1
-rw-r--r--drivers/pci/pcie/tlp.c6
-rw-r--r--drivers/pci/quirks.c10
-rw-r--r--include/linux/pci.h3
17 files changed, 503 insertions, 514 deletions
diff --git a/Documentation/ABI/testing/sysfs-bus-pci-devices-aer_stats b/Documentation/ABI/testing/sysfs-bus-pci-devices-aer
index d1f67bb81d5d..5ed284523956 100644
--- a/Documentation/ABI/testing/sysfs-bus-pci-devices-aer_stats
+++ b/Documentation/ABI/testing/sysfs-bus-pci-devices-aer
@@ -117,3 +117,47 @@ Date: July 2018
KernelVersion: 4.19.0
Contact: linux-pci@vger.kernel.org, rajatja@google.com
Description: Total number of ERR_NONFATAL messages reported to rootport.
+
+PCIe AER ratelimits
+-------------------
+
+These attributes show up under all the devices that are AER capable.
+They represent configurable ratelimits of logs per error type.
+
+See Documentation/PCI/pcieaer-howto.rst for more info on ratelimits.
+
+What: /sys/bus/pci/devices/<dev>/aer/correctable_ratelimit_interval_ms
+Date: May 2025
+KernelVersion: 6.16.0
+Contact: linux-pci@vger.kernel.org
+Description: Writing 0 disables AER correctable error log ratelimiting.
+ Writing a positive value sets the ratelimit interval in ms.
+ Default is DEFAULT_RATELIMIT_INTERVAL (5000 ms).
+
+What: /sys/bus/pci/devices/<dev>/aer/correctable_ratelimit_burst
+Date: May 2025
+KernelVersion: 6.16.0
+Contact: linux-pci@vger.kernel.org
+Description: Ratelimit burst for correctable error logs. Writing a value
+ changes the number of errors (burst) allowed per interval
+ before ratelimiting. Reading gets the current ratelimit
+ burst. Default is DEFAULT_RATELIMIT_BURST (10).
+
+What: /sys/bus/pci/devices/<dev>/aer/nonfatal_ratelimit_interval_ms
+Date: May 2025
+KernelVersion: 6.16.0
+Contact: linux-pci@vger.kernel.org
+Description: Writing 0 disables AER non-fatal uncorrectable error log
+ ratelimiting. Writing a positive value sets the ratelimit
+ interval in ms. Default is DEFAULT_RATELIMIT_INTERVAL
+ (5000 ms).
+
+What: /sys/bus/pci/devices/<dev>/aer/nonfatal_ratelimit_burst
+Date: May 2025
+KernelVersion: 6.16.0
+Contact: linux-pci@vger.kernel.org
+Description: Ratelimit burst for non-fatal uncorrectable error logs.
+ Writing a value changes the number of errors (burst)
+ allowed per interval before ratelimiting. Reading gets the
+ current ratelimit burst. Default is DEFAULT_RATELIMIT_BURST
+ (10).
diff --git a/Documentation/PCI/pcieaer-howto.rst b/Documentation/PCI/pcieaer-howto.rst
index f013f3b27c82..4b71e2f43ca7 100644
--- a/Documentation/PCI/pcieaer-howto.rst
+++ b/Documentation/PCI/pcieaer-howto.rst
@@ -85,12 +85,27 @@ In the example, 'Requester ID' means the ID of the device that sent
the error message to the Root Port. Please refer to PCIe specs for other
fields.
+AER Ratelimits
+--------------
+
+Since error messages can be generated for each transaction, we may see
+large volumes of errors reported. To prevent spammy devices from flooding
+the console/stalling execution, messages are throttled by device and error
+type (correctable vs. non-fatal uncorrectable). Fatal errors, including
+DPC errors, are not ratelimited.
+
+AER uses the default ratelimit of DEFAULT_RATELIMIT_BURST (10 events) over
+DEFAULT_RATELIMIT_INTERVAL (5 seconds).
+
+Ratelimits are exposed in the form of sysfs attributes and configurable.
+See Documentation/ABI/testing/sysfs-bus-pci-devices-aer.
+
AER Statistics / Counters
-------------------------
When PCIe AER errors are captured, the counters / statistics are also exposed
in the form of sysfs attributes which are documented at
-Documentation/ABI/testing/sysfs-bus-pci-devices-aer_stats
+Documentation/ABI/testing/sysfs-bus-pci-devices-aer.
Developer Guide
===============
diff --git a/Documentation/driver-api/driver-model/devres.rst b/Documentation/driver-api/driver-model/devres.rst
index d75728eb05f8..3d56f94ac2ee 100644
--- a/Documentation/driver-api/driver-model/devres.rst
+++ b/Documentation/driver-api/driver-model/devres.rst
@@ -391,12 +391,11 @@ PCI
devm_pci_remap_cfgspace() : ioremap PCI configuration space
devm_pci_remap_cfg_resource() : ioremap PCI configuration space resource
- pcim_enable_device() : after success, some PCI ops become managed
+ pcim_enable_device() : after success, the PCI device gets disabled automatically on driver detach
pcim_iomap() : do iomap() on a single BAR
pcim_iomap_regions() : do request_region() and iomap() on multiple BARs
pcim_iomap_table() : array of mapped addresses indexed by BAR
pcim_iounmap() : do iounmap() on a single BAR
- pcim_iounmap_regions() : do iounmap() and release_region() on multiple BARs
pcim_pin_device() : keep PCI device enabled after release
pcim_set_mwi() : enable Memory-Write-Invalidate PCI transaction
diff --git a/drivers/block/mtip32xx/mtip32xx.c b/drivers/block/mtip32xx/mtip32xx.c
index 0d619df03fa9..66ce6b81c7d9 100644
--- a/drivers/block/mtip32xx/mtip32xx.c
+++ b/drivers/block/mtip32xx/mtip32xx.c
@@ -3717,7 +3717,7 @@ static int mtip_pci_probe(struct pci_dev *pdev,
rv = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64));
if (rv) {
dev_warn(&pdev->dev, "64-bit DMA enable failed\n");
- goto setmask_err;
+ goto iomap_err;
}
/* Copy the info we may need later into the private data structure. */
@@ -3733,7 +3733,7 @@ static int mtip_pci_probe(struct pci_dev *pdev,
if (!dd->isr_workq) {
dev_warn(&pdev->dev, "Can't create wq %d\n", dd->instance);
rv = -ENOMEM;
- goto setmask_err;
+ goto iomap_err;
}
memset(cpu_list, 0, sizeof(cpu_list));
@@ -3830,8 +3830,6 @@ msi_initialize_err:
drop_cpu(dd->work[1].cpu_binding);
drop_cpu(dd->work[2].cpu_binding);
}
-setmask_err:
- pcim_iounmap_regions(pdev, 1 << MTIP_ABAR);
iomap_err:
kfree(dd);
@@ -3907,7 +3905,6 @@ static void mtip_pci_remove(struct pci_dev *pdev)
pci_disable_msi(pdev);
- pcim_iounmap_regions(pdev, 1 << MTIP_ABAR);
pci_set_drvdata(pdev, NULL);
put_disk(dd->disk);
diff --git a/drivers/pci/devres.c b/drivers/pci/devres.c
index 73047316889e..9f4190501395 100644
--- a/drivers/pci/devres.c
+++ b/drivers/pci/devres.c
@@ -6,30 +6,13 @@
/*
* On the state of PCI's devres implementation:
*
- * The older devres API for PCI has two significant problems:
+ * The older PCI devres API has one significant problem:
*
- * 1. It is very strongly tied to the statically allocated mapping table in
- * struct pcim_iomap_devres below. This is mostly solved in the sense of the
- * pcim_ functions in this file providing things like ranged mapping by
- * bypassing this table, whereas the functions that were present in the old
- * API still enter the mapping addresses into the table for users of the old
- * API.
- *
- * 2. The region-request-functions in pci.c do become managed IF the device has
- * been enabled with pcim_enable_device() instead of pci_enable_device().
- * This resulted in the API becoming inconsistent: Some functions have an
- * obviously managed counter-part (e.g., pci_iomap() <-> pcim_iomap()),
- * whereas some don't and are never managed, while others don't and are
- * _sometimes_ managed (e.g. pci_request_region()).
- *
- * Consequently, in the new API, region requests performed by the pcim_
- * functions are automatically cleaned up through the devres callback
- * pcim_addr_resource_release().
- *
- * Users of pcim_enable_device() + pci_*region*() are redirected in
- * pci.c to the managed functions here in this file. This isn't exactly
- * perfect, but the only alternative way would be to port ALL drivers
- * using said combination to pcim_ functions.
+ * It is very strongly tied to the statically allocated mapping table in struct
+ * pcim_iomap_devres below. This is mostly solved in the sense of the pcim_
+ * functions in this file providing things like ranged mapping by bypassing
+ * this table, whereas the functions that were present in the old API still
+ * enter the mapping addresses into the table for users of the old API.
*
* TODO:
* Remove the legacy table entirely once all calls to pcim_iomap_table() in
@@ -87,104 +70,6 @@ static inline void pcim_addr_devres_clear(struct pcim_addr_devres *res)
res->bar = -1;
}
-/*
- * The following functions, __pcim_*_region*, exist as counterparts to the
- * versions from pci.c - which, unfortunately, can be in "hybrid mode", i.e.,
- * sometimes managed, sometimes not.
- *
- * To separate the APIs cleanly, we define our own, simplified versions here.
- */
-
-/**
- * __pcim_request_region_range - Request a ranged region
- * @pdev: PCI device the region belongs to
- * @bar: BAR the range is within
- * @offset: offset from the BAR's start address
- * @maxlen: length in bytes, beginning at @offset
- * @name: name of the driver requesting the resource
- * @req_flags: flags for the request, e.g., for kernel-exclusive requests
- *
- * Returns: 0 on success, a negative error code on failure.
- *
- * Request a range within a device's PCI BAR. Sanity check the input.
- */
-static int __pcim_request_region_range(struct pci_dev *pdev, int bar,
- unsigned long offset,
- unsigned long maxlen,
- const char *name, int req_flags)
-{
- resource_size_t start = pci_resource_start(pdev, bar);
- resource_size_t len = pci_resource_len(pdev, bar);
- unsigned long dev_flags = pci_resource_flags(pdev, bar);
-
- if (start == 0 || len == 0) /* Unused BAR. */
- return 0;
- if (len <= offset)
- return -EINVAL;
-
- start += offset;
- len -= offset;
-
- if (len > maxlen && maxlen != 0)
- len = maxlen;
-
- if (dev_flags & IORESOURCE_IO) {
- if (!request_region(start, len, name))
- return -EBUSY;
- } else if (dev_flags & IORESOURCE_MEM) {
- if (!__request_mem_region(start, len, name, req_flags))
- return -EBUSY;
- } else {
- /* That's not a device we can request anything on. */
- return -ENODEV;
- }
-
- return 0;
-}
-
-static void __pcim_release_region_range(struct pci_dev *pdev, int bar,
- unsigned long offset,
- unsigned long maxlen)
-{
- resource_size_t start = pci_resource_start(pdev, bar);
- resource_size_t len = pci_resource_len(pdev, bar);
- unsigned long flags = pci_resource_flags(pdev, bar);
-
- if (len <= offset || start == 0)
- return;
-
- if (len == 0 || maxlen == 0) /* This an unused BAR. Do nothing. */
- return;
-
- start += offset;
- len -= offset;
-
- if (len > maxlen)
- len = maxlen;
-
- if (flags & IORESOURCE_IO)
- release_region(start, len);
- else if (flags & IORESOURCE_MEM)
- release_mem_region(start, len);
-}
-
-static int __pcim_request_region(struct pci_dev *pdev, int bar,
- const char *name, int flags)
-{
- unsigned long offset = 0;
- unsigned long len = pci_resource_len(pdev, bar);
-
- return __pcim_request_region_range(pdev, bar, offset, len, name, flags);
-}
-
-static void __pcim_release_region(struct pci_dev *pdev, int bar)
-{
- unsigned long offset = 0;
- unsigned long len = pci_resource_len(pdev, bar);
-
- __pcim_release_region_range(pdev, bar, offset, len);
-}
-
static void pcim_addr_resource_release(struct device *dev, void *resource_raw)
{
struct pci_dev *pdev = to_pci_dev(dev);
@@ -192,11 +77,11 @@ static void pcim_addr_resource_release(struct device *dev, void *resource_raw)
switch (res->type) {
case PCIM_ADDR_DEVRES_TYPE_REGION:
- __pcim_release_region(pdev, res->bar);
+ pci_release_region(pdev, res->bar);
break;
case PCIM_ADDR_DEVRES_TYPE_REGION_MAPPING:
pci_iounmap(pdev, res->baseaddr);
- __pcim_release_region(pdev, res->bar);
+ pci_release_region(pdev, res->bar);
break;
case PCIM_ADDR_DEVRES_TYPE_MAPPING:
pci_iounmap(pdev, res->baseaddr);
@@ -735,7 +620,7 @@ void __iomem *pcim_iomap_region(struct pci_dev *pdev, int bar,
res->type = PCIM_ADDR_DEVRES_TYPE_REGION_MAPPING;
res->bar = bar;
- ret = __pcim_request_region(pdev, bar, name, 0);
+ ret = pci_request_region(pdev, bar, name);
if (ret != 0)
goto err_region;
@@ -749,7 +634,7 @@ void __iomem *pcim_iomap_region(struct pci_dev *pdev, int bar,
return res->baseaddr;
err_iomap:
- __pcim_release_region(pdev, bar);
+ pci_release_region(pdev, bar);
err_region:
pcim_addr_devres_free(res);
@@ -823,8 +708,20 @@ err:
}
EXPORT_SYMBOL(pcim_iomap_regions);
-static int _pcim_request_region(struct pci_dev *pdev, int bar, const char *name,
- int request_flags)
+/**
+ * pcim_request_region - Request a PCI BAR
+ * @pdev: PCI device to request region for
+ * @bar: Index of BAR to request
+ * @name: Name of the driver requesting the resource
+ *
+ * Returns: 0 on success, a negative error code on failure.
+ *
+ * Request region specified by @bar.
+ *
+ * The region will automatically be released on driver detach. If desired,
+ * release manually only with pcim_release_region().
+ */
+int pcim_request_region(struct pci_dev *pdev, int bar, const char *name)
{
int ret;
struct pcim_addr_devres *res;
@@ -838,7 +735,7 @@ static int _pcim_request_region(struct pci_dev *pdev, int bar, const char *name,
res->type = PCIM_ADDR_DEVRES_TYPE_REGION;
res->bar = bar;
- ret = __pcim_request_region(pdev, bar, name, request_flags);
+ ret = pci_request_region(pdev, bar, name);
if (ret != 0) {
pcim_addr_devres_free(res);
return ret;
@@ -847,45 +744,9 @@ static int _pcim_request_region(struct pci_dev *pdev, int bar, const char *name,
devres_add(&pdev->dev, res);
return 0;
}
-
-/**
- * pcim_request_region - Request a PCI BAR
- * @pdev: PCI device to request region for
- * @bar: Index of BAR to request
- * @name: Name of the driver requesting the resource
- *
- * Returns: 0 on success, a negative error code on failure.
- *
- * Request region specified by @bar.
- *
- * The region will automatically be released on driver detach. If desired,
- * release manually only with pcim_release_region().
- */
-int pcim_request_region(struct pci_dev *pdev, int bar, const char *name)
-{
- return _pcim_request_region(pdev, bar, name, 0);
-}
EXPORT_SYMBOL(pcim_request_region);
/**
- * pcim_request_region_exclusive - Request a PCI BAR exclusively
- * @pdev: PCI device to request region for
- * @bar: Index of BAR to request
- * @name: Name of the driver requesting the resource
- *
- * Returns: 0 on success, a negative error code on failure.
- *
- * Request region specified by @bar exclusively.
- *
- * The region will automatically be released on driver detach. If desired,
- * release manually only with pcim_release_region().
- */
-int pcim_request_region_exclusive(struct pci_dev *pdev, int bar, const char *name)
-{
- return _pcim_request_region(pdev, bar, name, IORESOURCE_EXCLUSIVE);
-}
-
-/**
* pcim_release_region - Release a PCI BAR
* @pdev: PCI device to operate on
* @bar: Index of BAR to release
@@ -893,7 +754,7 @@ int pcim_request_region_exclusive(struct pci_dev *pdev, int bar, const char *nam
* Release a region manually that was previously requested by
* pcim_request_region().
*/
-void pcim_release_region(struct pci_dev *pdev, int bar)
+static void pcim_release_region(struct pci_dev *pdev, int bar)
{
struct pcim_addr_devres res_searched;
@@ -956,30 +817,6 @@ err:
EXPORT_SYMBOL(pcim_request_all_regions);
/**
- * pcim_iounmap_regions - Unmap and release PCI BARs (DEPRECATED)
- * @pdev: PCI device to map IO resources for
- * @mask: Mask of BARs to unmap and release
- *
- * Unmap and release regions specified by @mask.
- *
- * This function is DEPRECATED. Do not use it in new code.
- * Use pcim_iounmap_region() instead.
- */
-void pcim_iounmap_regions(struct pci_dev *pdev, int mask)
-{
- int i;
-
- for (i = 0; i < PCI_STD_NUM_BARS; i++) {
- if (!mask_contains_bar(mask, i))
- continue;
-
- pcim_iounmap_region(pdev, i);
- pcim_remove_bar_from_legacy_table(pdev, i);
- }
-}
-EXPORT_SYMBOL(pcim_iounmap_regions);
-
-/**
* pcim_iomap_range - Create a ranged __iomap mapping within a PCI BAR
* @pdev: PCI device to map IO resources for
* @bar: Index of the BAR
diff --git a/drivers/pci/hotplug/pciehp_ctrl.c b/drivers/pci/hotplug/pciehp_ctrl.c
index d603a7aa7483..bcc938d4420f 100644
--- a/drivers/pci/hotplug/pciehp_ctrl.c
+++ b/drivers/pci/hotplug/pciehp_ctrl.c
@@ -131,7 +131,7 @@ static void remove_board(struct controller *ctrl, bool safe_removal)
INDICATOR_NOOP);
/* Don't carry LBMS indications across */
- pcie_reset_lbms_count(ctrl->pcie->port);
+ pcie_reset_lbms(ctrl->pcie->port);
}
static int pciehp_enable_slot(struct controller *ctrl);
diff --git a/drivers/pci/iomap.c b/drivers/pci/iomap.c
index fe706ed946df..ea86c282a386 100644
--- a/drivers/pci/iomap.c
+++ b/drivers/pci/iomap.c
@@ -25,10 +25,6 @@
*
* @maxlen specifies the maximum length to map. If you want to get access to
* the complete BAR from offset to the end, pass %0 here.
- *
- * NOTE:
- * This function is never managed, even if you initialized with
- * pcim_enable_device().
* */
void __iomem *pci_iomap_range(struct pci_dev *dev,
int bar,
@@ -76,10 +72,6 @@ EXPORT_SYMBOL(pci_iomap_range);
*
* @maxlen specifies the maximum length to map. If you want to get access to
* the complete BAR from offset to the end, pass %0 here.
- *
- * NOTE:
- * This function is never managed, even if you initialized with
- * pcim_enable_device().
* */
void __iomem *pci_iomap_wc_range(struct pci_dev *dev,
int bar,
@@ -127,10 +119,6 @@ EXPORT_SYMBOL_GPL(pci_iomap_wc_range);
*
* @maxlen specifies the maximum length to map. If you want to get access to
* the complete BAR without checking for its length first, pass %0 here.
- *
- * NOTE:
- * This function is never managed, even if you initialized with
- * pcim_enable_device(). If you need automatic cleanup, use pcim_iomap().
* */
void __iomem *pci_iomap(struct pci_dev *dev, int bar, unsigned long maxlen)
{
@@ -152,10 +140,6 @@ EXPORT_SYMBOL(pci_iomap);
*
* @maxlen specifies the maximum length to map. If you want to get access to
* the complete BAR without checking for its length first, pass %0 here.
- *
- * NOTE:
- * This function is never managed, even if you initialized with
- * pcim_enable_device().
* */
void __iomem *pci_iomap_wc(struct pci_dev *dev, int bar, unsigned long maxlen)
{
diff --git a/drivers/pci/pci-sysfs.c b/drivers/pci/pci-sysfs.c
index c6cda56ca52c..278de99b00ce 100644
--- a/drivers/pci/pci-sysfs.c
+++ b/drivers/pci/pci-sysfs.c
@@ -1805,6 +1805,7 @@ const struct attribute_group *pci_dev_attr_groups[] = {
&pcie_dev_attr_group,
#ifdef CONFIG_PCIEAER
&aer_stats_attr_group,
+ &aer_attr_group,
#endif
#ifdef CONFIG_PCIEASPM
&aspm_ctrl_attr_group,
diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c
index 53a070394739..da382ddaafeb 100644
--- a/drivers/pci/pci.c
+++ b/drivers/pci/pci.c
@@ -3937,16 +3937,6 @@ void pci_release_region(struct pci_dev *pdev, int bar)
if (!pci_bar_index_is_valid(bar))
return;
- /*
- * This is done for backwards compatibility, because the old PCI devres
- * API had a mode in which the function became managed if it had been
- * enabled with pcim_enable_device() instead of pci_enable_device().
- */
- if (pci_is_managed(pdev)) {
- pcim_release_region(pdev, bar);
- return;
- }
-
if (pci_resource_len(pdev, bar) == 0)
return;
if (pci_resource_flags(pdev, bar) & IORESOURCE_IO)
@@ -3984,13 +3974,6 @@ static int __pci_request_region(struct pci_dev *pdev, int bar,
if (!pci_bar_index_is_valid(bar))
return -EINVAL;
- if (pci_is_managed(pdev)) {
- if (exclusive == IORESOURCE_EXCLUSIVE)
- return pcim_request_region_exclusive(pdev, bar, name);
-
- return pcim_request_region(pdev, bar, name);
- }
-
if (pci_resource_len(pdev, bar) == 0)
return 0;
@@ -4027,11 +4010,6 @@ err_out:
*
* Returns 0 on success, or %EBUSY on error. A warning
* message is also printed on failure.
- *
- * NOTE:
- * This is a "hybrid" function: It's normally unmanaged, but becomes managed
- * when pcim_enable_device() has been called in advance. This hybrid feature is
- * DEPRECATED! If you want managed cleanup, use the pcim_* functions instead.
*/
int pci_request_region(struct pci_dev *pdev, int bar, const char *name)
{
@@ -4084,11 +4062,6 @@ err_out:
* @name: Name of the driver requesting the resources
*
* Returns: 0 on success, negative error code on failure.
- *
- * NOTE:
- * This is a "hybrid" function: It's normally unmanaged, but becomes managed
- * when pcim_enable_device() has been called in advance. This hybrid feature is
- * DEPRECATED! If you want managed cleanup, use the pcim_* functions instead.
*/
int pci_request_selected_regions(struct pci_dev *pdev, int bars,
const char *name)
@@ -4104,11 +4077,6 @@ EXPORT_SYMBOL(pci_request_selected_regions);
* @name: name of the driver requesting the resources
*
* Returns: 0 on success, negative error code on failure.
- *
- * NOTE:
- * This is a "hybrid" function: It's normally unmanaged, but becomes managed
- * when pcim_enable_device() has been called in advance. This hybrid feature is
- * DEPRECATED! If you want managed cleanup, use the pcim_* functions instead.
*/
int pci_request_selected_regions_exclusive(struct pci_dev *pdev, int bars,
const char *name)
@@ -4144,11 +4112,6 @@ EXPORT_SYMBOL(pci_release_regions);
*
* Returns 0 on success, or %EBUSY on error. A warning
* message is also printed on failure.
- *
- * NOTE:
- * This is a "hybrid" function: It's normally unmanaged, but becomes managed
- * when pcim_enable_device() has been called in advance. This hybrid feature is
- * DEPRECATED! If you want managed cleanup, use the pcim_* functions instead.
*/
int pci_request_regions(struct pci_dev *pdev, const char *name)
{
@@ -4173,11 +4136,6 @@ EXPORT_SYMBOL(pci_request_regions);
*
* Returns 0 on success, or %EBUSY on error. A warning message is also
* printed on failure.
- *
- * NOTE:
- * This is a "hybrid" function: It's normally unmanaged, but becomes managed
- * when pcim_enable_device() has been called in advance. This hybrid feature is
- * DEPRECATED! If you want managed cleanup, use the pcim_* functions instead.
*/
int pci_request_regions_exclusive(struct pci_dev *pdev, const char *name)
{
@@ -4718,6 +4676,11 @@ static int pcie_wait_for_link_status(struct pci_dev *pdev,
* @pdev: Device whose link to retrain.
* @use_lt: Use the LT bit if TRUE, or the DLLLA bit if FALSE, for status.
*
+ * Trigger retraining of the PCIe Link and wait for the completion of the
+ * retraining. As link retraining is known to asserts LBMS and may change
+ * the Link Speed, LBMS is cleared after the retraining and the Link Speed
+ * of the subordinate bus is updated.
+ *
* Retrain completion status is retrieved from the Link Status Register
* according to @use_lt. It is not verified whether the use of the DLLLA
* bit is valid.
@@ -4757,7 +4720,19 @@ int pcie_retrain_link(struct pci_dev *pdev, bool use_lt)
* to track link speed or width changes made by hardware itself
* in attempt to correct unreliable link operation.
*/
- pcie_reset_lbms_count(pdev);
+ pcie_reset_lbms(pdev);
+
+ /*
+ * Ensure the Link Speed updates after retraining in case the Link
+ * Speed was changed because of the retraining. While the bwctrl's
+ * IRQ handler normally picks up the new Link Speed, clearing LBMS
+ * races with the IRQ handler reading the Link Status register and
+ * can result in the handler returning early without updating the
+ * Link Speed.
+ */
+ if (pdev->subordinate)
+ pcie_update_link_speed(pdev->subordinate);
+
return rc;
}
diff --git a/drivers/pci/pci.h b/drivers/pci/pci.h
index de5d4ef943b2..aed7affd4b8c 100644
--- a/drivers/pci/pci.h
+++ b/drivers/pci/pci.h
@@ -557,6 +557,7 @@ static inline int pci_dev_set_disconnected(struct pci_dev *dev, void *unused)
#define PCI_DPC_RECOVERED 1
#define PCI_DPC_RECOVERING 2
#define PCI_DEV_REMOVED 3
+#define PCI_LINK_LBMS_SEEN 6
#define PCI_DEV_ALLOW_BINDING 7
static inline void pci_dev_assign_added(struct pci_dev *dev)
@@ -598,12 +599,15 @@ static inline bool pci_dev_binding_disallowed(struct pci_dev *dev)
struct aer_err_info {
struct pci_dev *dev[AER_MAX_MULTI_ERR_DEVICES];
+ int ratelimit_print[AER_MAX_MULTI_ERR_DEVICES];
int error_dev_num;
+ const char *level; /* printk level */
unsigned int id:16;
unsigned int severity:2; /* 0:NONFATAL | 1:FATAL | 2:COR */
- unsigned int __pad1:5;
+ unsigned int root_ratelimit_print:1; /* 0=skip, 1=print */
+ unsigned int __pad1:4;
unsigned int multi_error_valid:1;
unsigned int first_error:5;
@@ -615,15 +619,16 @@ struct aer_err_info {
struct pcie_tlp_log tlp; /* TLP Header */
};
-int aer_get_device_error_info(struct pci_dev *dev, struct aer_err_info *info);
-void aer_print_error(struct pci_dev *dev, struct aer_err_info *info);
+int aer_get_device_error_info(struct aer_err_info *info, int i);
+void aer_print_error(struct aer_err_info *info, int i);
int pcie_read_tlp_log(struct pci_dev *dev, int where, int where2,
unsigned int tlp_len, bool flit,
struct pcie_tlp_log *log);
unsigned int aer_tlp_log_len(struct pci_dev *dev, u32 aercc);
void pcie_print_tlp_log(const struct pci_dev *dev,
- const struct pcie_tlp_log *log, const char *pfx);
+ const struct pcie_tlp_log *log, const char *level,
+ const char *pfx);
#endif /* CONFIG_PCIEAER */
#ifdef CONFIG_PCIEPORTBUS
@@ -835,14 +840,9 @@ static inline void pcie_ecrc_get_policy(char *str) { }
#endif
#ifdef CONFIG_PCIEPORTBUS
-void pcie_reset_lbms_count(struct pci_dev *port);
-int pcie_lbms_count(struct pci_dev *port, unsigned long *val);
+void pcie_reset_lbms(struct pci_dev *port);
#else
-static inline void pcie_reset_lbms_count(struct pci_dev *port) {}
-static inline int pcie_lbms_count(struct pci_dev *port, unsigned long *val)
-{
- return -EOPNOTSUPP;
-}
+static inline void pcie_reset_lbms(struct pci_dev *port) {}
#endif
struct pci_dev_reset_methods {
@@ -972,6 +972,7 @@ void pci_no_aer(void);
void pci_aer_init(struct pci_dev *dev);
void pci_aer_exit(struct pci_dev *dev);
extern const struct attribute_group aer_stats_attr_group;
+extern const struct attribute_group aer_attr_group;
void pci_aer_clear_fatal_status(struct pci_dev *dev);
int pci_aer_clear_status(struct pci_dev *dev);
int pci_aer_raw_clear_status(struct pci_dev *dev);
@@ -1070,11 +1071,6 @@ static inline pci_power_t mid_pci_get_power_state(struct pci_dev *pdev)
}
#endif
-int pcim_intx(struct pci_dev *dev, int enable);
-int pcim_request_region_exclusive(struct pci_dev *pdev, int bar,
- const char *name);
-void pcim_release_region(struct pci_dev *pdev, int bar);
-
/*
* Config Address for PCI Configuration Mechanism #1
*
diff --git a/drivers/pci/pcie/aer.c b/drivers/pci/pcie/aer.c
index a1cf8c7ef628..70ac66188367 100644
--- a/drivers/pci/pcie/aer.c
+++ b/drivers/pci/pcie/aer.c
@@ -28,6 +28,7 @@
#include <linux/interrupt.h>
#include <linux/delay.h>
#include <linux/kfifo.h>
+#include <linux/ratelimit.h>
#include <linux/slab.h>
#include <acpi/apei.h>
#include <acpi/ghes.h>
@@ -54,8 +55,8 @@ struct aer_rpc {
DECLARE_KFIFO(aer_fifo, struct aer_err_source, AER_ERROR_SOURCES_MAX);
};
-/* AER stats for the device */
-struct aer_stats {
+/* AER info for the device */
+struct aer_info {
/*
* Fields for all AER capable devices. They indicate the errors
@@ -88,6 +89,10 @@ struct aer_stats {
u64 rootport_total_cor_errs;
u64 rootport_total_fatal_errs;
u64 rootport_total_nonfatal_errs;
+
+ /* Ratelimits for errors */
+ struct ratelimit_state correctable_ratelimit;
+ struct ratelimit_state nonfatal_ratelimit;
};
#define AER_LOG_TLP_MASKS (PCI_ERR_UNC_POISON_TLP| \
@@ -377,7 +382,12 @@ void pci_aer_init(struct pci_dev *dev)
if (!dev->aer_cap)
return;
- dev->aer_stats = kzalloc(sizeof(struct aer_stats), GFP_KERNEL);
+ dev->aer_info = kzalloc(sizeof(*dev->aer_info), GFP_KERNEL);
+
+ ratelimit_state_init(&dev->aer_info->correctable_ratelimit,
+ DEFAULT_RATELIMIT_INTERVAL, DEFAULT_RATELIMIT_BURST);
+ ratelimit_state_init(&dev->aer_info->nonfatal_ratelimit,
+ DEFAULT_RATELIMIT_INTERVAL, DEFAULT_RATELIMIT_BURST);
/*
* We save/restore PCI_ERR_UNCOR_MASK, PCI_ERR_UNCOR_SEVER,
@@ -398,8 +408,8 @@ void pci_aer_init(struct pci_dev *dev)
void pci_aer_exit(struct pci_dev *dev)
{
- kfree(dev->aer_stats);
- dev->aer_stats = NULL;
+ kfree(dev->aer_info);
+ dev->aer_info = NULL;
}
#define AER_AGENT_RECEIVER 0
@@ -537,10 +547,10 @@ static const char *aer_agent_string[] = {
{ \
unsigned int i; \
struct pci_dev *pdev = to_pci_dev(dev); \
- u64 *stats = pdev->aer_stats->stats_array; \
+ u64 *stats = pdev->aer_info->stats_array; \
size_t len = 0; \
\
- for (i = 0; i < ARRAY_SIZE(pdev->aer_stats->stats_array); i++) {\
+ for (i = 0; i < ARRAY_SIZE(pdev->aer_info->stats_array); i++) { \
if (strings_array[i]) \
len += sysfs_emit_at(buf, len, "%s %llu\n", \
strings_array[i], \
@@ -551,7 +561,7 @@ static const char *aer_agent_string[] = {
i, stats[i]); \
} \
len += sysfs_emit_at(buf, len, "TOTAL_%s %llu\n", total_string, \
- pdev->aer_stats->total_field); \
+ pdev->aer_info->total_field); \
return len; \
} \
static DEVICE_ATTR_RO(name)
@@ -572,7 +582,7 @@ aer_stats_dev_attr(aer_dev_nonfatal, dev_nonfatal_errs,
char *buf) \
{ \
struct pci_dev *pdev = to_pci_dev(dev); \
- return sysfs_emit(buf, "%llu\n", pdev->aer_stats->field); \
+ return sysfs_emit(buf, "%llu\n", pdev->aer_info->field); \
} \
static DEVICE_ATTR_RO(name)
@@ -599,7 +609,7 @@ static umode_t aer_stats_attrs_are_visible(struct kobject *kobj,
struct device *dev = kobj_to_dev(kobj);
struct pci_dev *pdev = to_pci_dev(dev);
- if (!pdev->aer_stats)
+ if (!pdev->aer_info)
return 0;
if ((a == &dev_attr_aer_rootport_total_err_cor.attr ||
@@ -617,31 +627,136 @@ const struct attribute_group aer_stats_attr_group = {
.is_visible = aer_stats_attrs_are_visible,
};
+/*
+ * Ratelimit interval
+ * <=0: disabled with ratelimit.interval = 0
+ * >0: enabled with ratelimit.interval in ms
+ */
+#define aer_ratelimit_interval_attr(name, ratelimit) \
+ static ssize_t \
+ name##_show(struct device *dev, struct device_attribute *attr, \
+ char *buf) \
+ { \
+ struct pci_dev *pdev = to_pci_dev(dev); \
+ \
+ return sysfs_emit(buf, "%d\n", \
+ pdev->aer_info->ratelimit.interval); \
+ } \
+ \
+ static ssize_t \
+ name##_store(struct device *dev, struct device_attribute *attr, \
+ const char *buf, size_t count) \
+ { \
+ struct pci_dev *pdev = to_pci_dev(dev); \
+ int interval; \
+ \
+ if (!capable(CAP_SYS_ADMIN)) \
+ return -EPERM; \
+ \
+ if (kstrtoint(buf, 0, &interval) < 0) \
+ return -EINVAL; \
+ \
+ if (interval <= 0) \
+ interval = 0; \
+ else \
+ interval = msecs_to_jiffies(interval); \
+ \
+ pdev->aer_info->ratelimit.interval = interval; \
+ \
+ return count; \
+ } \
+ static DEVICE_ATTR_RW(name);
+
+#define aer_ratelimit_burst_attr(name, ratelimit) \
+ static ssize_t \
+ name##_show(struct device *dev, struct device_attribute *attr, \
+ char *buf) \
+ { \
+ struct pci_dev *pdev = to_pci_dev(dev); \
+ \
+ return sysfs_emit(buf, "%d\n", \
+ pdev->aer_info->ratelimit.burst); \
+ } \
+ \
+ static ssize_t \
+ name##_store(struct device *dev, struct device_attribute *attr, \
+ const char *buf, size_t count) \
+ { \
+ struct pci_dev *pdev = to_pci_dev(dev); \
+ int burst; \
+ \
+ if (!capable(CAP_SYS_ADMIN)) \
+ return -EPERM; \
+ \
+ if (kstrtoint(buf, 0, &burst) < 0) \
+ return -EINVAL; \
+ \
+ pdev->aer_info->ratelimit.burst = burst; \
+ \
+ return count; \
+ } \
+ static DEVICE_ATTR_RW(name);
+
+#define aer_ratelimit_attrs(name) \
+ aer_ratelimit_interval_attr(name##_ratelimit_interval_ms, \
+ name##_ratelimit) \
+ aer_ratelimit_burst_attr(name##_ratelimit_burst, \
+ name##_ratelimit)
+
+aer_ratelimit_attrs(correctable)
+aer_ratelimit_attrs(nonfatal)
+
+static struct attribute *aer_attrs[] = {
+ &dev_attr_correctable_ratelimit_interval_ms.attr,
+ &dev_attr_correctable_ratelimit_burst.attr,
+ &dev_attr_nonfatal_ratelimit_interval_ms.attr,
+ &dev_attr_nonfatal_ratelimit_burst.attr,
+ NULL
+};
+
+static umode_t aer_attrs_are_visible(struct kobject *kobj,
+ struct attribute *a, int n)
+{
+ struct device *dev = kobj_to_dev(kobj);
+ struct pci_dev *pdev = to_pci_dev(dev);
+
+ if (!pdev->aer_info)
+ return 0;
+
+ return a->mode;
+}
+
+const struct attribute_group aer_attr_group = {
+ .name = "aer",
+ .attrs = aer_attrs,
+ .is_visible = aer_attrs_are_visible,
+};
+
static void pci_dev_aer_stats_incr(struct pci_dev *pdev,
struct aer_err_info *info)
{
unsigned long status = info->status & ~info->mask;
int i, max = -1;
u64 *counter = NULL;
- struct aer_stats *aer_stats = pdev->aer_stats;
+ struct aer_info *aer_info = pdev->aer_info;
- if (!aer_stats)
+ if (!aer_info)
return;
switch (info->severity) {
case AER_CORRECTABLE:
- aer_stats->dev_total_cor_errs++;
- counter = &aer_stats->dev_cor_errs[0];
+ aer_info->dev_total_cor_errs++;
+ counter = &aer_info->dev_cor_errs[0];
max = AER_MAX_TYPEOF_COR_ERRS;
break;
case AER_NONFATAL:
- aer_stats->dev_total_nonfatal_errs++;
- counter = &aer_stats->dev_nonfatal_errs[0];
+ aer_info->dev_total_nonfatal_errs++;
+ counter = &aer_info->dev_nonfatal_errs[0];
max = AER_MAX_TYPEOF_UNCOR_ERRS;
break;
case AER_FATAL:
- aer_stats->dev_total_fatal_errs++;
- counter = &aer_stats->dev_fatal_errs[0];
+ aer_info->dev_total_fatal_errs++;
+ counter = &aer_info->dev_fatal_errs[0];
max = AER_MAX_TYPEOF_UNCOR_ERRS;
break;
}
@@ -653,37 +768,46 @@ static void pci_dev_aer_stats_incr(struct pci_dev *pdev,
static void pci_rootport_aer_stats_incr(struct pci_dev *pdev,
struct aer_err_source *e_src)
{
- struct aer_stats *aer_stats = pdev->aer_stats;
+ struct aer_info *aer_info = pdev->aer_info;
- if (!aer_stats)
+ if (!aer_info)
return;
if (e_src->status & PCI_ERR_ROOT_COR_RCV)
- aer_stats->rootport_total_cor_errs++;
+ aer_info->rootport_total_cor_errs++;
if (e_src->status & PCI_ERR_ROOT_UNCOR_RCV) {
if (e_src->status & PCI_ERR_ROOT_FATAL_RCV)
- aer_stats->rootport_total_fatal_errs++;
+ aer_info->rootport_total_fatal_errs++;
else
- aer_stats->rootport_total_nonfatal_errs++;
+ aer_info->rootport_total_nonfatal_errs++;
+ }
+}
+
+static int aer_ratelimit(struct pci_dev *dev, unsigned int severity)
+{
+ switch (severity) {
+ case AER_NONFATAL:
+ return __ratelimit(&dev->aer_info->nonfatal_ratelimit);
+ case AER_CORRECTABLE:
+ return __ratelimit(&dev->aer_info->correctable_ratelimit);
+ default:
+ return 1; /* Don't ratelimit fatal errors */
}
}
-static void __aer_print_error(struct pci_dev *dev,
- struct aer_err_info *info)
+static void __aer_print_error(struct pci_dev *dev, struct aer_err_info *info)
{
const char **strings;
unsigned long status = info->status & ~info->mask;
- const char *level, *errmsg;
+ const char *level = info->level;
+ const char *errmsg;
int i;
- if (info->severity == AER_CORRECTABLE) {
+ if (info->severity == AER_CORRECTABLE)
strings = aer_correctable_error_string;
- level = KERN_WARNING;
- } else {
+ else
strings = aer_uncorrectable_error_string;
- level = KERN_ERR;
- }
for_each_set_bit(i, &status, 32) {
errmsg = strings[i];
@@ -693,14 +817,39 @@ static void __aer_print_error(struct pci_dev *dev,
aer_printk(level, dev, " [%2d] %-22s%s\n", i, errmsg,
info->first_error == i ? " (First)" : "");
}
- pci_dev_aer_stats_incr(dev, info);
}
-void aer_print_error(struct pci_dev *dev, struct aer_err_info *info)
+static void aer_print_source(struct pci_dev *dev, struct aer_err_info *info,
+ bool found)
+{
+ u16 source = info->id;
+
+ pci_info(dev, "%s%s error message received from %04x:%02x:%02x.%d%s\n",
+ info->multi_error_valid ? "Multiple " : "",
+ aer_error_severity_string[info->severity],
+ pci_domain_nr(dev->bus), PCI_BUS_NUM(source),
+ PCI_SLOT(source), PCI_FUNC(source),
+ found ? "" : " (no details found");
+}
+
+void aer_print_error(struct aer_err_info *info, int i)
{
- int layer, agent;
- int id = pci_dev_id(dev);
- const char *level;
+ struct pci_dev *dev;
+ int layer, agent, id;
+ const char *level = info->level;
+
+ if (WARN_ON_ONCE(i >= AER_MAX_MULTI_ERR_DEVICES))
+ return;
+
+ dev = info->dev[i];
+ id = pci_dev_id(dev);
+
+ pci_dev_aer_stats_incr(dev, info);
+ trace_aer_event(pci_name(dev), (info->status & ~info->mask),
+ info->severity, info->tlp_header_valid, &info->tlp);
+
+ if (!info->ratelimit_print[i])
+ return;
if (!info->status) {
pci_err(dev, "PCIe Bus Error: severity=%s, type=Inaccessible, (Unregistered Agent ID)\n",
@@ -711,8 +860,6 @@ void aer_print_error(struct pci_dev *dev, struct aer_err_info *info)
layer = AER_GET_LAYER_ERROR(info->severity, info->status);
agent = AER_GET_AGENT(info->severity, info->status);
- level = (info->severity == AER_CORRECTABLE) ? KERN_WARNING : KERN_ERR;
-
aer_printk(level, dev, "PCIe Bus Error: severity=%s, type=%s, (%s)\n",
aer_error_severity_string[info->severity],
aer_error_layer[layer], aer_agent_string[agent]);
@@ -723,26 +870,11 @@ void aer_print_error(struct pci_dev *dev, struct aer_err_info *info)
__aer_print_error(dev, info);
if (info->tlp_header_valid)
- pcie_print_tlp_log(dev, &info->tlp, dev_fmt(" "));
+ pcie_print_tlp_log(dev, &info->tlp, level, dev_fmt(" "));
out:
if (info->id && info->error_dev_num > 1 && info->id == id)
pci_err(dev, " Error of this Agent is reported first\n");
-
- trace_aer_event(dev_name(&dev->dev), (info->status & ~info->mask),
- info->severity, info->tlp_header_valid, &info->tlp);
-}
-
-static void aer_print_port_info(struct pci_dev *dev, struct aer_err_info *info)
-{
- u8 bus = info->id >> 8;
- u8 devfn = info->id & 0xff;
-
- pci_info(dev, "%s%s error message received from %04x:%02x:%02x.%d\n",
- info->multi_error_valid ? "Multiple " : "",
- aer_error_severity_string[info->severity],
- pci_domain_nr(dev->bus), bus, PCI_SLOT(devfn),
- PCI_FUNC(devfn));
}
#ifdef CONFIG_ACPI_APEI_PCIEAER
@@ -765,40 +897,48 @@ void pci_print_aer(struct pci_dev *dev, int aer_severity,
{
int layer, agent, tlp_header_valid = 0;
u32 status, mask;
- struct aer_err_info info;
+ struct aer_err_info info = {
+ .severity = aer_severity,
+ .first_error = PCI_ERR_CAP_FEP(aer->cap_control),
+ };
if (aer_severity == AER_CORRECTABLE) {
status = aer->cor_status;
mask = aer->cor_mask;
+ info.level = KERN_WARNING;
} else {
status = aer->uncor_status;
mask = aer->uncor_mask;
+ info.level = KERN_ERR;
tlp_header_valid = status & AER_LOG_TLP_MASKS;
}
- layer = AER_GET_LAYER_ERROR(aer_severity, status);
- agent = AER_GET_AGENT(aer_severity, status);
-
- memset(&info, 0, sizeof(info));
- info.severity = aer_severity;
info.status = status;
info.mask = mask;
- info.first_error = PCI_ERR_CAP_FEP(aer->cap_control);
- pci_err(dev, "aer_status: 0x%08x, aer_mask: 0x%08x\n", status, mask);
+ pci_dev_aer_stats_incr(dev, &info);
+ trace_aer_event(pci_name(dev), (status & ~mask),
+ aer_severity, tlp_header_valid, &aer->header_log);
+
+ if (!aer_ratelimit(dev, info.severity))
+ return;
+
+ layer = AER_GET_LAYER_ERROR(aer_severity, status);
+ agent = AER_GET_AGENT(aer_severity, status);
+
+ aer_printk(info.level, dev, "aer_status: 0x%08x, aer_mask: 0x%08x\n",
+ status, mask);
__aer_print_error(dev, &info);
- pci_err(dev, "aer_layer=%s, aer_agent=%s\n",
- aer_error_layer[layer], aer_agent_string[agent]);
+ aer_printk(info.level, dev, "aer_layer=%s, aer_agent=%s\n",
+ aer_error_layer[layer], aer_agent_string[agent]);
if (aer_severity != AER_CORRECTABLE)
- pci_err(dev, "aer_uncor_severity: 0x%08x\n",
- aer->uncor_severity);
+ aer_printk(info.level, dev, "aer_uncor_severity: 0x%08x\n",
+ aer->uncor_severity);
if (tlp_header_valid)
- pcie_print_tlp_log(dev, &aer->header_log, dev_fmt(" "));
-
- trace_aer_event(dev_name(&dev->dev), (status & ~mask),
- aer_severity, tlp_header_valid, &aer->header_log);
+ pcie_print_tlp_log(dev, &aer->header_log, info.level,
+ dev_fmt(" "));
}
EXPORT_SYMBOL_NS_GPL(pci_print_aer, "CXL");
@@ -809,12 +949,27 @@ EXPORT_SYMBOL_NS_GPL(pci_print_aer, "CXL");
*/
static int add_error_device(struct aer_err_info *e_info, struct pci_dev *dev)
{
- if (e_info->error_dev_num < AER_MAX_MULTI_ERR_DEVICES) {
- e_info->dev[e_info->error_dev_num] = pci_dev_get(dev);
- e_info->error_dev_num++;
- return 0;
+ int i = e_info->error_dev_num;
+
+ if (i >= AER_MAX_MULTI_ERR_DEVICES)
+ return -ENOSPC;
+
+ e_info->dev[i] = pci_dev_get(dev);
+ e_info->error_dev_num++;
+
+ /*
+ * Ratelimit AER log messages. "dev" is either the source
+ * identified by the root's Error Source ID or it has an unmasked
+ * error logged in its own AER Capability. Messages are emitted
+ * when "ratelimit_print[i]" is non-zero. If we will print detail
+ * for a downstream device, make sure we print the Error Source ID
+ * from the root as well.
+ */
+ if (aer_ratelimit(dev, e_info->severity)) {
+ e_info->ratelimit_print[i] = 1;
+ e_info->root_ratelimit_print = 1;
}
- return -ENOSPC;
+ return 0;
}
/**
@@ -908,7 +1063,7 @@ static int find_device_iter(struct pci_dev *dev, void *data)
* e_info->error_dev_num and e_info->dev[], based on the given information.
*/
static bool find_source_device(struct pci_dev *parent,
- struct aer_err_info *e_info)
+ struct aer_err_info *e_info)
{
struct pci_dev *dev = parent;
int result;
@@ -926,15 +1081,8 @@ static bool find_source_device(struct pci_dev *parent,
else
pci_walk_bus(parent->subordinate, find_device_iter, e_info);
- if (!e_info->error_dev_num) {
- u8 bus = e_info->id >> 8;
- u8 devfn = e_info->id & 0xff;
-
- pci_info(parent, "found no error details for %04x:%02x:%02x.%d\n",
- pci_domain_nr(parent->bus), bus, PCI_SLOT(devfn),
- PCI_FUNC(devfn));
+ if (!e_info->error_dev_num)
return false;
- }
return true;
}
@@ -1141,9 +1289,10 @@ static void aer_recover_work_func(struct work_struct *work)
pdev = pci_get_domain_bus_and_slot(entry.domain, entry.bus,
entry.devfn);
if (!pdev) {
- pr_err("no pci_dev for %04x:%02x:%02x.%x\n",
- entry.domain, entry.bus,
- PCI_SLOT(entry.devfn), PCI_FUNC(entry.devfn));
+ pr_err_ratelimited("%04x:%02x:%02x.%x: no pci_dev found\n",
+ entry.domain, entry.bus,
+ PCI_SLOT(entry.devfn),
+ PCI_FUNC(entry.devfn));
continue;
}
pci_print_aer(pdev, entry.severity, entry.regs);
@@ -1199,19 +1348,26 @@ EXPORT_SYMBOL_GPL(aer_recover_queue);
/**
* aer_get_device_error_info - read error status from dev and store it to info
- * @dev: pointer to the device expected to have an error record
* @info: pointer to structure to store the error record
+ * @i: index into info->dev[]
*
* Return: 1 on success, 0 on error.
*
* Note that @info is reused among all error devices. Clear fields properly.
*/
-int aer_get_device_error_info(struct pci_dev *dev, struct aer_err_info *info)
+int aer_get_device_error_info(struct aer_err_info *info, int i)
{
- int type = pci_pcie_type(dev);
- int aer = dev->aer_cap;
+ struct pci_dev *dev;
+ int type, aer;
u32 aercc;
+ if (i >= AER_MAX_MULTI_ERR_DEVICES)
+ return 0;
+
+ dev = info->dev[i];
+ aer = dev->aer_cap;
+ type = pci_pcie_type(dev);
+
/* Must reset in this function */
info->status = 0;
info->tlp_header_valid = 0;
@@ -1263,63 +1419,87 @@ static inline void aer_process_err_devices(struct aer_err_info *e_info)
/* Report all before handling them, to not lose records by reset etc. */
for (i = 0; i < e_info->error_dev_num && e_info->dev[i]; i++) {
- if (aer_get_device_error_info(e_info->dev[i], e_info))
- aer_print_error(e_info->dev[i], e_info);
+ if (aer_get_device_error_info(e_info, i))
+ aer_print_error(e_info, i);
}
for (i = 0; i < e_info->error_dev_num && e_info->dev[i]; i++) {
- if (aer_get_device_error_info(e_info->dev[i], e_info))
+ if (aer_get_device_error_info(e_info, i))
handle_error_source(e_info->dev[i], e_info);
}
}
/**
- * aer_isr_one_error - consume an error detected by Root Port
- * @rpc: pointer to the Root Port which holds an error
- * @e_src: pointer to an error source
+ * aer_isr_one_error_type - consume a Correctable or Uncorrectable Error
+ * detected by Root Port or RCEC
+ * @root: pointer to Root Port or RCEC that signaled AER interrupt
+ * @info: pointer to AER error info
*/
-static void aer_isr_one_error(struct aer_rpc *rpc,
- struct aer_err_source *e_src)
+static void aer_isr_one_error_type(struct pci_dev *root,
+ struct aer_err_info *info)
{
- struct pci_dev *pdev = rpc->rpd;
- struct aer_err_info e_info;
+ bool found;
- pci_rootport_aer_stats_incr(pdev, e_src);
+ found = find_source_device(root, info);
/*
- * There is a possibility that both correctable error and
- * uncorrectable error being logged. Report correctable error first.
+ * If we're going to log error messages, we've already set
+ * "info->root_ratelimit_print" and "info->ratelimit_print[i]" to
+ * non-zero (which enables printing) because this is either an
+ * ERR_FATAL or we found a device with an error logged in its AER
+ * Capability.
+ *
+ * If we didn't find the Error Source device, at least log the
+ * Requester ID from the ERR_* Message received by the Root Port or
+ * RCEC, ratelimited by the RP or RCEC.
*/
- if (e_src->status & PCI_ERR_ROOT_COR_RCV) {
- e_info.id = ERR_COR_ID(e_src->id);
- e_info.severity = AER_CORRECTABLE;
-
- if (e_src->status & PCI_ERR_ROOT_MULTI_COR_RCV)
- e_info.multi_error_valid = 1;
- else
- e_info.multi_error_valid = 0;
- aer_print_port_info(pdev, &e_info);
+ if (info->root_ratelimit_print ||
+ (!found && aer_ratelimit(root, info->severity)))
+ aer_print_source(root, info, found);
- if (find_source_device(pdev, &e_info))
- aer_process_err_devices(&e_info);
- }
-
- if (e_src->status & PCI_ERR_ROOT_UNCOR_RCV) {
- e_info.id = ERR_UNCOR_ID(e_src->id);
+ if (found)
+ aer_process_err_devices(info);
+}
- if (e_src->status & PCI_ERR_ROOT_FATAL_RCV)
- e_info.severity = AER_FATAL;
- else
- e_info.severity = AER_NONFATAL;
+/**
+ * aer_isr_one_error - consume error(s) signaled by an AER interrupt from
+ * Root Port or RCEC
+ * @root: pointer to Root Port or RCEC that signaled AER interrupt
+ * @e_src: pointer to an error source
+ */
+static void aer_isr_one_error(struct pci_dev *root,
+ struct aer_err_source *e_src)
+{
+ u32 status = e_src->status;
- if (e_src->status & PCI_ERR_ROOT_MULTI_UNCOR_RCV)
- e_info.multi_error_valid = 1;
- else
- e_info.multi_error_valid = 0;
+ pci_rootport_aer_stats_incr(root, e_src);
- aer_print_port_info(pdev, &e_info);
+ /*
+ * There is a possibility that both correctable error and
+ * uncorrectable error being logged. Report correctable error first.
+ */
+ if (status & PCI_ERR_ROOT_COR_RCV) {
+ int multi = status & PCI_ERR_ROOT_MULTI_COR_RCV;
+ struct aer_err_info e_info = {
+ .id = ERR_COR_ID(e_src->id),
+ .severity = AER_CORRECTABLE,
+ .level = KERN_WARNING,
+ .multi_error_valid = multi ? 1 : 0,
+ };
+
+ aer_isr_one_error_type(root, &e_info);
+ }
- if (find_source_device(pdev, &e_info))
- aer_process_err_devices(&e_info);
+ if (status & PCI_ERR_ROOT_UNCOR_RCV) {
+ int fatal = status & PCI_ERR_ROOT_FATAL_RCV;
+ int multi = status & PCI_ERR_ROOT_MULTI_UNCOR_RCV;
+ struct aer_err_info e_info = {
+ .id = ERR_UNCOR_ID(e_src->id),
+ .severity = fatal ? AER_FATAL : AER_NONFATAL,
+ .level = KERN_ERR,
+ .multi_error_valid = multi ? 1 : 0,
+ };
+
+ aer_isr_one_error_type(root, &e_info);
}
}
@@ -1340,7 +1520,7 @@ static irqreturn_t aer_isr(int irq, void *context)
return IRQ_NONE;
while (kfifo_get(&rpc->aer_fifo, &e_src))
- aer_isr_one_error(rpc, &e_src);
+ aer_isr_one_error(rpc->rpd, &e_src);
return IRQ_HANDLED;
}
diff --git a/drivers/pci/pcie/bwctrl.c b/drivers/pci/pcie/bwctrl.c
index d8d2aa85a229..36f939f23d34 100644
--- a/drivers/pci/pcie/bwctrl.c
+++ b/drivers/pci/pcie/bwctrl.c
@@ -38,24 +38,14 @@
/**
* struct pcie_bwctrl_data - PCIe bandwidth controller
* @set_speed_mutex: Serializes link speed changes
- * @lbms_count: Count for LBMS (since last reset)
* @cdev: Thermal cooling device associated with the port
*/
struct pcie_bwctrl_data {
struct mutex set_speed_mutex;
- atomic_t lbms_count;
struct thermal_cooling_device *cdev;
};
-/*
- * Prevent port removal during LBMS count accessors and Link Speed changes.
- *
- * These have to be differentiated because pcie_bwctrl_change_speed() calls
- * pcie_retrain_link() which uses LBMS count reset accessor on success
- * (using just one rwsem triggers "possible recursive locking detected"
- * warning).
- */
-static DECLARE_RWSEM(pcie_bwctrl_lbms_rwsem);
+/* Prevent port removal during Link Speed changes. */
static DECLARE_RWSEM(pcie_bwctrl_setspeed_rwsem);
static bool pcie_valid_speed(enum pci_bus_speed speed)
@@ -127,18 +117,7 @@ static int pcie_bwctrl_change_speed(struct pci_dev *port, u16 target_speed, bool
if (ret != PCIBIOS_SUCCESSFUL)
return pcibios_err_to_errno(ret);
- ret = pcie_retrain_link(port, use_lt);
- if (ret < 0)
- return ret;
-
- /*
- * Ensure link speed updates also with platforms that have problems
- * with notifications.
- */
- if (port->subordinate)
- pcie_update_link_speed(port->subordinate);
-
- return 0;
+ return pcie_retrain_link(port, use_lt);
}
/**
@@ -202,15 +181,14 @@ int pcie_set_target_speed(struct pci_dev *port, enum pci_bus_speed speed_req,
static void pcie_bwnotif_enable(struct pcie_device *srv)
{
- struct pcie_bwctrl_data *data = srv->port->link_bwctrl;
struct pci_dev *port = srv->port;
u16 link_status;
int ret;
- /* Count LBMS seen so far as one */
+ /* Note if LBMS has been seen so far */
ret = pcie_capability_read_word(port, PCI_EXP_LNKSTA, &link_status);
if (ret == PCIBIOS_SUCCESSFUL && link_status & PCI_EXP_LNKSTA_LBMS)
- atomic_inc(&data->lbms_count);
+ set_bit(PCI_LINK_LBMS_SEEN, &port->priv_flags);
pcie_capability_set_word(port, PCI_EXP_LNKCTL,
PCI_EXP_LNKCTL_LBMIE | PCI_EXP_LNKCTL_LABIE);
@@ -233,7 +211,6 @@ static void pcie_bwnotif_disable(struct pci_dev *port)
static irqreturn_t pcie_bwnotif_irq(int irq, void *context)
{
struct pcie_device *srv = context;
- struct pcie_bwctrl_data *data = srv->port->link_bwctrl;
struct pci_dev *port = srv->port;
u16 link_status, events;
int ret;
@@ -247,7 +224,7 @@ static irqreturn_t pcie_bwnotif_irq(int irq, void *context)
return IRQ_NONE;
if (events & PCI_EXP_LNKSTA_LBMS)
- atomic_inc(&data->lbms_count);
+ set_bit(PCI_LINK_LBMS_SEEN, &port->priv_flags);
pcie_capability_write_word(port, PCI_EXP_LNKSTA, events);
@@ -262,31 +239,10 @@ static irqreturn_t pcie_bwnotif_irq(int irq, void *context)
return IRQ_HANDLED;
}
-void pcie_reset_lbms_count(struct pci_dev *port)
+void pcie_reset_lbms(struct pci_dev *port)
{
- struct pcie_bwctrl_data *data;
-
- guard(rwsem_read)(&pcie_bwctrl_lbms_rwsem);
- data = port->link_bwctrl;
- if (data)
- atomic_set(&data->lbms_count, 0);
- else
- pcie_capability_write_word(port, PCI_EXP_LNKSTA,
- PCI_EXP_LNKSTA_LBMS);
-}
-
-int pcie_lbms_count(struct pci_dev *port, unsigned long *val)
-{
- struct pcie_bwctrl_data *data;
-
- guard(rwsem_read)(&pcie_bwctrl_lbms_rwsem);
- data = port->link_bwctrl;
- if (!data)
- return -ENOTTY;
-
- *val = atomic_read(&data->lbms_count);
-
- return 0;
+ clear_bit(PCI_LINK_LBMS_SEEN, &port->priv_flags);
+ pcie_capability_write_word(port, PCI_EXP_LNKSTA, PCI_EXP_LNKSTA_LBMS);
}
static int pcie_bwnotif_probe(struct pcie_device *srv)
@@ -308,18 +264,16 @@ static int pcie_bwnotif_probe(struct pcie_device *srv)
return ret;
scoped_guard(rwsem_write, &pcie_bwctrl_setspeed_rwsem) {
- scoped_guard(rwsem_write, &pcie_bwctrl_lbms_rwsem) {
- port->link_bwctrl = data;
-
- ret = request_irq(srv->irq, pcie_bwnotif_irq,
- IRQF_SHARED, "PCIe bwctrl", srv);
- if (ret) {
- port->link_bwctrl = NULL;
- return ret;
- }
+ port->link_bwctrl = data;
- pcie_bwnotif_enable(srv);
+ ret = request_irq(srv->irq, pcie_bwnotif_irq,
+ IRQF_SHARED, "PCIe bwctrl", srv);
+ if (ret) {
+ port->link_bwctrl = NULL;
+ return ret;
}
+
+ pcie_bwnotif_enable(srv);
}
pci_dbg(port, "enabled with IRQ %d\n", srv->irq);
@@ -339,13 +293,11 @@ static void pcie_bwnotif_remove(struct pcie_device *srv)
pcie_cooling_device_unregister(data->cdev);
scoped_guard(rwsem_write, &pcie_bwctrl_setspeed_rwsem) {
- scoped_guard(rwsem_write, &pcie_bwctrl_lbms_rwsem) {
- pcie_bwnotif_disable(srv->port);
+ pcie_bwnotif_disable(srv->port);
- free_irq(srv->irq, srv);
+ free_irq(srv->irq, srv);
- srv->port->link_bwctrl = NULL;
- }
+ srv->port->link_bwctrl = NULL;
}
}
diff --git a/drivers/pci/pcie/dpc.c b/drivers/pci/pcie/dpc.c
index df42f15c9829..fc18349614d7 100644
--- a/drivers/pci/pcie/dpc.c
+++ b/drivers/pci/pcie/dpc.c
@@ -222,7 +222,7 @@ static void dpc_process_rp_pio_error(struct pci_dev *pdev)
dpc_tlp_log_len(pdev),
pdev->subordinate->flit_mode,
&tlp_log);
- pcie_print_tlp_log(pdev, &tlp_log, dev_fmt(""));
+ pcie_print_tlp_log(pdev, &tlp_log, KERN_ERR, dev_fmt(""));
if (pdev->dpc_rp_log_size < PCIE_STD_NUM_TLP_HEADERLOG + 1)
goto clear_status;
@@ -252,46 +252,59 @@ static int dpc_get_aer_uncorrect_severity(struct pci_dev *dev,
else
info->severity = AER_NONFATAL;
+ info->level = KERN_ERR;
+
+ info->dev[0] = dev;
+ info->error_dev_num = 1;
+
return 1;
}
void dpc_process_error(struct pci_dev *pdev)
{
u16 cap = pdev->dpc_cap, status, source, reason, ext_reason;
- struct aer_err_info info;
+ struct aer_err_info info = {};
pci_read_config_word(pdev, cap + PCI_EXP_DPC_STATUS, &status);
- pci_read_config_word(pdev, cap + PCI_EXP_DPC_SOURCE_ID, &source);
-
- pci_info(pdev, "containment event, status:%#06x source:%#06x\n",
- status, source);
reason = status & PCI_EXP_DPC_STATUS_TRIGGER_RSN;
- ext_reason = status & PCI_EXP_DPC_STATUS_TRIGGER_RSN_EXT;
- pci_warn(pdev, "%s detected\n",
- (reason == PCI_EXP_DPC_STATUS_TRIGGER_RSN_UNCOR) ?
- "unmasked uncorrectable error" :
- (reason == PCI_EXP_DPC_STATUS_TRIGGER_RSN_NFE) ?
- "ERR_NONFATAL" :
- (reason == PCI_EXP_DPC_STATUS_TRIGGER_RSN_FE) ?
- "ERR_FATAL" :
- (ext_reason == PCI_EXP_DPC_STATUS_TRIGGER_RSN_RP_PIO) ?
- "RP PIO error" :
- (ext_reason == PCI_EXP_DPC_STATUS_TRIGGER_RSN_SW_TRIGGER) ?
- "software trigger" :
- "reserved error");
-
- /* show RP PIO error detail information */
- if (pdev->dpc_rp_extensions &&
- reason == PCI_EXP_DPC_STATUS_TRIGGER_RSN_IN_EXT &&
- ext_reason == PCI_EXP_DPC_STATUS_TRIGGER_RSN_RP_PIO)
- dpc_process_rp_pio_error(pdev);
- else if (reason == PCI_EXP_DPC_STATUS_TRIGGER_RSN_UNCOR &&
- dpc_get_aer_uncorrect_severity(pdev, &info) &&
- aer_get_device_error_info(pdev, &info)) {
- aer_print_error(pdev, &info);
- pci_aer_clear_nonfatal_status(pdev);
- pci_aer_clear_fatal_status(pdev);
+
+ switch (reason) {
+ case PCI_EXP_DPC_STATUS_TRIGGER_RSN_UNCOR:
+ pci_warn(pdev, "containment event, status:%#06x: unmasked uncorrectable error detected\n",
+ status);
+ if (dpc_get_aer_uncorrect_severity(pdev, &info) &&
+ aer_get_device_error_info(&info, 0)) {
+ aer_print_error(&info, 0);
+ pci_aer_clear_nonfatal_status(pdev);
+ pci_aer_clear_fatal_status(pdev);
+ }
+ break;
+ case PCI_EXP_DPC_STATUS_TRIGGER_RSN_NFE:
+ case PCI_EXP_DPC_STATUS_TRIGGER_RSN_FE:
+ pci_read_config_word(pdev, cap + PCI_EXP_DPC_SOURCE_ID,
+ &source);
+ pci_warn(pdev, "containment event, status:%#06x, %s received from %04x:%02x:%02x.%d\n",
+ status,
+ (reason == PCI_EXP_DPC_STATUS_TRIGGER_RSN_FE) ?
+ "ERR_FATAL" : "ERR_NONFATAL",
+ pci_domain_nr(pdev->bus), PCI_BUS_NUM(source),
+ PCI_SLOT(source), PCI_FUNC(source));
+ break;
+ case PCI_EXP_DPC_STATUS_TRIGGER_RSN_IN_EXT:
+ ext_reason = status & PCI_EXP_DPC_STATUS_TRIGGER_RSN_EXT;
+ pci_warn(pdev, "containment event, status:%#06x: %s detected\n",
+ status,
+ (ext_reason == PCI_EXP_DPC_STATUS_TRIGGER_RSN_RP_PIO) ?
+ "RP PIO error" :
+ (ext_reason == PCI_EXP_DPC_STATUS_TRIGGER_RSN_SW_TRIGGER) ?
+ "software trigger" :
+ "reserved error");
+ /* show RP PIO error detail information */
+ if (ext_reason == PCI_EXP_DPC_STATUS_TRIGGER_RSN_RP_PIO &&
+ pdev->dpc_rp_extensions)
+ dpc_process_rp_pio_error(pdev);
+ break;
}
}
diff --git a/drivers/pci/pcie/err.c b/drivers/pci/pcie/err.c
index 31090770fffc..de6381c690f5 100644
--- a/drivers/pci/pcie/err.c
+++ b/drivers/pci/pcie/err.c
@@ -271,7 +271,6 @@ failed:
pci_uevent_ers(bridge, PCI_ERS_RESULT_DISCONNECT);
- /* TODO: Should kernel panic here? */
pci_info(bridge, "device recovery failed\n");
return status;
diff --git a/drivers/pci/pcie/tlp.c b/drivers/pci/pcie/tlp.c
index 890d5391d7f5..71f8fc9ea2ed 100644
--- a/drivers/pci/pcie/tlp.c
+++ b/drivers/pci/pcie/tlp.c
@@ -98,12 +98,14 @@ int pcie_read_tlp_log(struct pci_dev *dev, int where, int where2,
* pcie_print_tlp_log - Print TLP Header / Prefix Log contents
* @dev: PCIe device
* @log: TLP Log structure
+ * @level: Printk log level
* @pfx: String prefix
*
* Prints TLP Header and Prefix Log information held by @log.
*/
void pcie_print_tlp_log(const struct pci_dev *dev,
- const struct pcie_tlp_log *log, const char *pfx)
+ const struct pcie_tlp_log *log, const char *level,
+ const char *pfx)
{
/* EE_PREFIX_STR fits the extended DW space needed for the Flit mode */
char buf[11 * PCIE_STD_MAX_TLP_HEADERLOG + 1];
@@ -130,6 +132,6 @@ void pcie_print_tlp_log(const struct pci_dev *dev,
}
}
- pci_err(dev, "%sTLP Header%s: %s\n", pfx,
+ dev_printk(level, &dev->dev, "%sTLP Header%s: %s\n", pfx,
log->flit ? " (Flit)" : "", buf);
}
diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c
index 8d610c17e0f2..64ac1ee944d3 100644
--- a/drivers/pci/quirks.c
+++ b/drivers/pci/quirks.c
@@ -38,14 +38,10 @@
static bool pcie_lbms_seen(struct pci_dev *dev, u16 lnksta)
{
- unsigned long count;
- int ret;
-
- ret = pcie_lbms_count(dev, &count);
- if (ret < 0)
- return lnksta & PCI_EXP_LNKSTA_LBMS;
+ if (test_bit(PCI_LINK_LBMS_SEEN, &dev->priv_flags))
+ return true;
- return count > 0;
+ return lnksta & PCI_EXP_LNKSTA_LBMS;
}
/*
diff --git a/include/linux/pci.h b/include/linux/pci.h
index 053d9dd494d4..d8a12aa896f7 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -346,7 +346,7 @@ struct pci_dev {
u8 hdr_type; /* PCI header type (`multi' flag masked out) */
#ifdef CONFIG_PCIEAER
u16 aer_cap; /* AER capability offset */
- struct aer_stats *aer_stats; /* AER stats for this device */
+ struct aer_info *aer_info; /* AER info for this device */
#endif
#ifdef CONFIG_PCIEPORTBUS
struct rcec_ea *rcec_ea; /* RCEC cached endpoint association */
@@ -2317,7 +2317,6 @@ void pcim_iounmap(struct pci_dev *pdev, void __iomem *addr);
void __iomem * const *pcim_iomap_table(struct pci_dev *pdev);
int pcim_request_region(struct pci_dev *pdev, int bar, const char *name);
int pcim_iomap_regions(struct pci_dev *pdev, int mask, const char *name);
-void pcim_iounmap_regions(struct pci_dev *pdev, int mask);
void __iomem *pcim_iomap_range(struct pci_dev *pdev, int bar,
unsigned long offset, unsigned long len);