From 50246dd41ccbcb47beb06d6c1d9355f6b7137a11 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Fri, 16 Jan 2009 08:14:51 -0800 Subject: Revert "PCI PM: Register power state of devices during initialization" This reverts commit 98e6e286d7b01deb7453b717aa38ebb69d6cefc0, as Yinghai Lu reports that it breaks kexec with at least the e1000 and e1000e drivers. The reason is that the shutdown sequence puts the hardware into D3 sleep, and the commit causes us to claim that it then is in D0 (running) state just because we don't understand the PM capabilities. Which then later makes "pci_set_power_state()" not do anything, and the device never wakes up properly and just returns 0xff to everything. Reported-by: Yinghai Lu Acked-by: From: Rafael J. Wysocki Cc: Jesse Barnes Signed-off-by: Linus Torvalds --- drivers/pci/pci.c | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) (limited to 'drivers/pci/pci.c') diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c index c12f6c790698..e491fdedf705 100644 --- a/drivers/pci/pci.c +++ b/drivers/pci/pci.c @@ -1260,15 +1260,14 @@ void pci_pm_init(struct pci_dev *dev) /* find PCI PM capability in list */ pm = pci_find_capability(dev, PCI_CAP_ID_PM); if (!pm) - goto Exit; - + return; /* Check device's ability to generate PME# */ pci_read_config_word(dev, pm + PCI_PM_PMC, &pmc); if ((pmc & PCI_PM_CAP_VER_MASK) > 3) { dev_err(&dev->dev, "unsupported PM cap regs version (%u)\n", pmc & PCI_PM_CAP_VER_MASK); - goto Exit; + return; } dev->pm_cap = pm; @@ -1307,9 +1306,6 @@ void pci_pm_init(struct pci_dev *dev) } else { dev->pme_support = 0; } - - Exit: - pci_update_current_state(dev, PCI_D0); } /** -- cgit v1.2.3 From aa8c6c93747f7b55fa11e1624fec8ca33763a805 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Fri, 16 Jan 2009 21:54:43 +0100 Subject: PCI PM: Restore standard config registers of all devices early There is a problem in our handling of suspend-resume of PCI devices that many of them have their standard config registers restored with interrupts enabled and they are put into the full power state with interrupts enabled as well. This may lead to the following scenario: * an interrupt vector is shared between two or more devices * one device is resumed earlier and generates an interrupt * the interrupt handler of another device tries to handle it and attempts to access the device the config space of which hasn't been restored yet and/or which still is in a low power state * the system crashes as a result To prevent this from happening we should restore the standard configuration registers of all devices with interrupts disabled and we should put them into the D0 power state right after that. Unfortunately, this cannot be done using the existing pci_set_power_state(), because it can sleep. Also, to do it we have to make sure that the config spaces of all devices were actually saved during suspend. Signed-off-by: Rafael J. Wysocki Acked-by: Linus Torvalds Signed-off-by: Jesse Barnes --- drivers/pci/pci-driver.c | 91 ++++++++++++++---------------------------------- drivers/pci/pci.c | 63 +++++++++++++++++++++++++++++---- drivers/pci/pci.h | 6 ++++ include/linux/pci.h | 5 +++ 4 files changed, 95 insertions(+), 70 deletions(-) (limited to 'drivers/pci/pci.c') diff --git a/drivers/pci/pci-driver.c b/drivers/pci/pci-driver.c index c697f2680856..9de07b75b993 100644 --- a/drivers/pci/pci-driver.c +++ b/drivers/pci/pci-driver.c @@ -355,17 +355,27 @@ static int pci_legacy_suspend(struct device *dev, pm_message_t state) int i = 0; if (drv && drv->suspend) { + pci_dev->state_saved = false; + i = drv->suspend(pci_dev, state); suspend_report_result(drv->suspend, i); - } else { - pci_save_state(pci_dev); - /* - * This is for compatibility with existing code with legacy PM - * support. - */ - pci_pm_set_unknown_state(pci_dev); + if (i) + return i; + + if (pci_dev->state_saved) + goto Fixup; + + if (WARN_ON_ONCE(pci_dev->current_state != PCI_D0)) + goto Fixup; } + pci_save_state(pci_dev); + /* + * This is for compatibility with existing code with legacy PM support. + */ + pci_pm_set_unknown_state(pci_dev); + + Fixup: pci_fixup_device(pci_fixup_suspend, pci_dev); return i; @@ -386,81 +396,34 @@ static int pci_legacy_suspend_late(struct device *dev, pm_message_t state) static int pci_legacy_resume_early(struct device *dev) { - int error = 0; struct pci_dev * pci_dev = to_pci_dev(dev); struct pci_driver * drv = pci_dev->driver; - pci_fixup_device(pci_fixup_resume_early, pci_dev); - - if (drv && drv->resume_early) - error = drv->resume_early(pci_dev); - return error; + return drv && drv->resume_early ? + drv->resume_early(pci_dev) : 0; } static int pci_legacy_resume(struct device *dev) { - int error; struct pci_dev * pci_dev = to_pci_dev(dev); struct pci_driver * drv = pci_dev->driver; pci_fixup_device(pci_fixup_resume, pci_dev); - if (drv && drv->resume) { - error = drv->resume(pci_dev); - } else { - /* restore the PCI config space */ - pci_restore_state(pci_dev); - error = pci_pm_reenable_device(pci_dev); - } - return error; + return drv && drv->resume ? + drv->resume(pci_dev) : pci_pm_reenable_device(pci_dev); } /* Auxiliary functions used by the new power management framework */ -static int pci_restore_standard_config(struct pci_dev *pci_dev) -{ - struct pci_dev *parent = pci_dev->bus->self; - int error = 0; - - /* Check if the device's bus is operational */ - if (!parent || parent->current_state == PCI_D0) { - pci_restore_state(pci_dev); - pci_update_current_state(pci_dev, PCI_D0); - } else { - dev_warn(&pci_dev->dev, "unable to restore config, " - "bridge %s in low power state D%d\n", pci_name(parent), - parent->current_state); - pci_dev->current_state = PCI_UNKNOWN; - error = -EAGAIN; - } - - return error; -} - -static bool pci_is_bridge(struct pci_dev *pci_dev) -{ - return !!(pci_dev->subordinate); -} - static void pci_pm_default_resume_noirq(struct pci_dev *pci_dev) { - if (pci_restore_standard_config(pci_dev)) - pci_fixup_device(pci_fixup_resume_early, pci_dev); + pci_restore_standard_config(pci_dev); + pci_fixup_device(pci_fixup_resume_early, pci_dev); } static int pci_pm_default_resume(struct pci_dev *pci_dev) { - /* - * pci_restore_standard_config() should have been called once already, - * but it would have failed if the device's parent bridge had not been - * in power state D0 at that time. Check it and try again if necessary. - */ - if (pci_dev->current_state == PCI_UNKNOWN) { - int error = pci_restore_standard_config(pci_dev); - if (error) - return error; - } - pci_fixup_device(pci_fixup_resume, pci_dev); if (!pci_is_bridge(pci_dev)) @@ -575,11 +538,11 @@ static int pci_pm_resume_noirq(struct device *dev) struct device_driver *drv = dev->driver; int error = 0; + pci_pm_default_resume_noirq(pci_dev); + if (pci_has_legacy_pm_support(pci_dev)) return pci_legacy_resume_early(dev); - pci_pm_default_resume_noirq(pci_dev); - if (drv && drv->pm && drv->pm->resume_noirq) error = drv->pm->resume_noirq(dev); @@ -730,11 +693,11 @@ static int pci_pm_restore_noirq(struct device *dev) struct device_driver *drv = dev->driver; int error = 0; + pci_pm_default_resume_noirq(pci_dev); + if (pci_has_legacy_pm_support(pci_dev)) return pci_legacy_resume_early(dev); - pci_pm_default_resume_noirq(pci_dev); - if (drv && drv->pm && drv->pm->restore_noirq) error = drv->pm->restore_noirq(dev); diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c index e491fdedf705..17bd9325a245 100644 --- a/drivers/pci/pci.c +++ b/drivers/pci/pci.c @@ -22,7 +22,7 @@ #include /* isa_dma_bridge_buggy */ #include "pci.h" -unsigned int pci_pm_d3_delay = 10; +unsigned int pci_pm_d3_delay = PCI_PM_D3_WAIT; #ifdef CONFIG_PCI_DOMAINS int pci_domains_supported = 1; @@ -426,6 +426,7 @@ static inline int platform_pci_sleep_wake(struct pci_dev *dev, bool enable) * given PCI device * @dev: PCI device to handle. * @state: PCI power state (D0, D1, D2, D3hot) to put the device into. + * @wait: If 'true', wait for the device to change its power state * * RETURN VALUE: * -EINVAL if the requested state is invalid. @@ -435,7 +436,7 @@ static inline int platform_pci_sleep_wake(struct pci_dev *dev, bool enable) * 0 if device's power state has been successfully changed. */ static int -pci_raw_set_power_state(struct pci_dev *dev, pci_power_t state) +pci_raw_set_power_state(struct pci_dev *dev, pci_power_t state, bool wait) { u16 pmcsr; bool need_restore = false; @@ -480,8 +481,10 @@ pci_raw_set_power_state(struct pci_dev *dev, pci_power_t state) break; case PCI_UNKNOWN: /* Boot-up */ if ((pmcsr & PCI_PM_CTRL_STATE_MASK) == PCI_D3hot - && !(pmcsr & PCI_PM_CTRL_NO_SOFT_RESET)) + && !(pmcsr & PCI_PM_CTRL_NO_SOFT_RESET)) { need_restore = true; + wait = true; + } /* Fall-through: force to D0 */ default: pmcsr = 0; @@ -491,12 +494,15 @@ pci_raw_set_power_state(struct pci_dev *dev, pci_power_t state) /* enter specified state */ pci_write_config_word(dev, dev->pm_cap + PCI_PM_CTRL, pmcsr); + if (!wait) + return 0; + /* Mandatory power management transition delays */ /* see PCI PM 1.1 5.6.1 table 18 */ if (state == PCI_D3hot || dev->current_state == PCI_D3hot) msleep(pci_pm_d3_delay); else if (state == PCI_D2 || dev->current_state == PCI_D2) - udelay(200); + udelay(PCI_PM_D2_DELAY); dev->current_state = state; @@ -515,7 +521,7 @@ pci_raw_set_power_state(struct pci_dev *dev, pci_power_t state) if (need_restore) pci_restore_bars(dev); - if (dev->bus->self) + if (wait && dev->bus->self) pcie_aspm_pm_state_change(dev->bus->self); return 0; @@ -585,7 +591,7 @@ int pci_set_power_state(struct pci_dev *dev, pci_power_t state) if (state == PCI_D3hot && (dev->dev_flags & PCI_DEV_FLAGS_NO_D3)) return 0; - error = pci_raw_set_power_state(dev, state); + error = pci_raw_set_power_state(dev, state, true); if (state > PCI_D0 && platform_pci_power_manageable(dev)) { /* Allow the platform to finalize the transition */ @@ -730,6 +736,7 @@ pci_save_state(struct pci_dev *dev) /* XXX: 100% dword access ok here? */ for (i = 0; i < 16; i++) pci_read_config_dword(dev, i * 4,&dev->saved_config_space[i]); + dev->state_saved = true; if ((i = pci_save_pcie_state(dev)) != 0) return i; if ((i = pci_save_pcix_state(dev)) != 0) @@ -1373,6 +1380,50 @@ void pci_allocate_cap_save_buffers(struct pci_dev *dev) "unable to preallocate PCI-X save buffer\n"); } +/** + * pci_restore_standard_config - restore standard config registers of PCI device + * @dev: PCI device to handle + * + * This function assumes that the device's configuration space is accessible. + * If the device needs to be powered up, the function will wait for it to + * change the state. + */ +int pci_restore_standard_config(struct pci_dev *dev) +{ + pci_power_t prev_state; + int error; + + pci_restore_state(dev); + pci_update_current_state(dev, PCI_D0); + + prev_state = dev->current_state; + if (prev_state == PCI_D0) + return 0; + + error = pci_raw_set_power_state(dev, PCI_D0, false); + if (error) + return error; + + if (pci_is_bridge(dev)) { + if (prev_state > PCI_D1) + mdelay(PCI_PM_BUS_WAIT); + } else { + switch(prev_state) { + case PCI_D3cold: + case PCI_D3hot: + mdelay(pci_pm_d3_delay); + break; + case PCI_D2: + udelay(PCI_PM_D2_DELAY); + break; + } + } + + dev->current_state = PCI_D0; + + return 0; +} + /** * pci_enable_ari - enable ARI forwarding if hardware support it * @dev: the PCI device diff --git a/drivers/pci/pci.h b/drivers/pci/pci.h index 1351bb4addde..26ddf78ac300 100644 --- a/drivers/pci/pci.h +++ b/drivers/pci/pci.h @@ -49,6 +49,12 @@ extern void pci_disable_enabled_device(struct pci_dev *dev); extern void pci_pm_init(struct pci_dev *dev); extern void platform_pci_wakeup_init(struct pci_dev *dev); extern void pci_allocate_cap_save_buffers(struct pci_dev *dev); +extern int pci_restore_standard_config(struct pci_dev *dev); + +static inline bool pci_is_bridge(struct pci_dev *pci_dev) +{ + return !!(pci_dev->subordinate); +} extern int pci_user_read_config_byte(struct pci_dev *dev, int where, u8 *val); extern int pci_user_read_config_word(struct pci_dev *dev, int where, u16 *val); diff --git a/include/linux/pci.h b/include/linux/pci.h index 80f8b8b65fde..48890cf3f96e 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -117,6 +117,10 @@ typedef int __bitwise pci_power_t; #define PCI_UNKNOWN ((pci_power_t __force) 5) #define PCI_POWER_ERROR ((pci_power_t __force) -1) +#define PCI_PM_D2_DELAY 200 +#define PCI_PM_D3_WAIT 10 +#define PCI_PM_BUS_WAIT 50 + /** The pci_channel state describes connectivity between the CPU and * the pci device. If some PCI bus between here and the pci device * has crashed or locked up, this info is reflected here. @@ -252,6 +256,7 @@ struct pci_dev { unsigned int ari_enabled:1; /* ARI forwarding */ unsigned int is_managed:1; unsigned int is_pcie:1; + unsigned int state_saved:1; pci_dev_flags_t dev_flags; atomic_t enable_cnt; /* pci_enable_device has been called */ -- cgit v1.2.3 From 48f67f54a53bb68619a63c3f38cf7f502ed74b1d Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Thu, 22 Jan 2009 23:38:31 +0100 Subject: PCI PM: Power up devices before restoring their state Devices that have MSI-X enabled before suspend to RAM or hibernation and that are in a low power state during resume will not be handled correctly by pci_restore_standard_config(). Namely, it first calls pci_restore_state() which calls pci_restore_msi_state(), which in turn executes __pci_restore_msix_state() that accesses the device's memory space to restore the contents of the MSI-X table. However, if the device is in a low power state at this point, it's memory space is not accessible. The easiest way to fix this potential problem is to make pci_restore_standard_config() call pci_restore_state() after it has put the device into the full power state, D0. Fortunately, all of this is done with interrupts off, so the change of ordering should not cause any trouble. Signed-off-by: Rafael J. Wysocki Signed-off-by: Jesse Barnes --- drivers/pci/pci.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'drivers/pci/pci.c') diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c index 17bd9325a245..f0aa3d533839 100644 --- a/drivers/pci/pci.c +++ b/drivers/pci/pci.c @@ -1393,12 +1393,11 @@ int pci_restore_standard_config(struct pci_dev *dev) pci_power_t prev_state; int error; - pci_restore_state(dev); pci_update_current_state(dev, PCI_D0); prev_state = dev->current_state; if (prev_state == PCI_D0) - return 0; + goto Restore; error = pci_raw_set_power_state(dev, PCI_D0, false); if (error) @@ -1421,7 +1420,8 @@ int pci_restore_standard_config(struct pci_dev *dev) dev->current_state = PCI_D0; - return 0; + Restore: + return pci_restore_state(dev); } /** -- cgit v1.2.3 From 476e7faefc43f106a90b5c96166c59b75de19d30 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Thu, 22 Jan 2009 23:39:57 +0100 Subject: PCI PM: Do not wait for buses in B2 or B3 during resume pci_restore_standard_config() adds extra delay for PCI buses in low power states (B2 or B3), but this is only correct for buses in B2, because the buses in B3 are reset when they are put back into B0. Thus we should wait for such buses to settle after the reset, but it's not a good idea to wait that long (1.1 s) with interrupts off. On the other hand, we have never waited for buses in B2 and B3 during resume and it seems reasonable to go back to this well tested behaviour. Signed-off-by: Rafael J. Wysocki Signed-off-by: Jesse Barnes --- drivers/pci/pci.c | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) (limited to 'drivers/pci/pci.c') diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c index f0aa3d533839..48807556b47a 100644 --- a/drivers/pci/pci.c +++ b/drivers/pci/pci.c @@ -1403,19 +1403,19 @@ int pci_restore_standard_config(struct pci_dev *dev) if (error) return error; - if (pci_is_bridge(dev)) { - if (prev_state > PCI_D1) - mdelay(PCI_PM_BUS_WAIT); - } else { - switch(prev_state) { - case PCI_D3cold: - case PCI_D3hot: - mdelay(pci_pm_d3_delay); - break; - case PCI_D2: - udelay(PCI_PM_D2_DELAY); - break; - } + /* + * This assumes that we won't get a bus in B2 or B3 from the BIOS, but + * we've made this assumption forever and it appears to be universally + * satisfied. + */ + switch(prev_state) { + case PCI_D3cold: + case PCI_D3hot: + mdelay(pci_pm_d3_delay); + break; + case PCI_D2: + udelay(PCI_PM_D2_DELAY); + break; } dev->current_state = PCI_D0; -- cgit v1.2.3 From 144a76bc885ef4852601c66595326e59f12877f8 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Wed, 4 Feb 2009 01:57:22 +0100 Subject: PCI PM: Check if the state has been saved before trying to restore it Check if the standard configuration registers of a PCI device have been saved during suspend before trying to restore them during resume. Signed-off-by: Rafael J. Wysocki Reported-By: Benjamin Herrenschmidt Acked-by: Linus Torvalds Signed-off-by: Jesse Barnes --- drivers/pci/pci.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/pci/pci.c') diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c index 48807556b47a..87c904233bf5 100644 --- a/drivers/pci/pci.c +++ b/drivers/pci/pci.c @@ -1421,7 +1421,7 @@ int pci_restore_standard_config(struct pci_dev *dev) dev->current_state = PCI_D0; Restore: - return pci_restore_state(dev); + return dev->state_saved ? pci_restore_state(dev) : 0; } /** -- cgit v1.2.3 From 49c968111aee4a463d3247937b63efa63a65f378 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Wed, 4 Feb 2009 02:02:15 +0100 Subject: PCI PM: Read power state from device after trying to change it on resume pci_restore_standard_config() unconditionally changes current_state to PCI_D0 after attempting to change the device's power state, but it should rather read the actual current power state from the device. Signed-off-by: Rafael J. Wysocki Acked-by: Linus Torvalds Signed-off-by: Jesse Barnes --- drivers/pci/pci.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/pci/pci.c') diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c index 87c904233bf5..e3efe6b19ee7 100644 --- a/drivers/pci/pci.c +++ b/drivers/pci/pci.c @@ -1418,7 +1418,7 @@ int pci_restore_standard_config(struct pci_dev *dev) break; } - dev->current_state = PCI_D0; + pci_update_current_state(dev, PCI_D0); Restore: return dev->state_saved ? pci_restore_state(dev) : 0; -- cgit v1.2.3 From f5ddcac435b6c6133a9c137c345abef53b93cf55 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Fri, 9 Jan 2009 17:03:20 -0800 Subject: PCI: fix missing kernel-doc and typos Fix pci kernel-doc parameter missing notation, correct function name, and fix typo: Warning(linux-2.6.28-git10//drivers/pci/pci.c:1511): No description found for parameter 'exclusive' Signed-off-by: Randy Dunlap Signed-off-by: Jesse Barnes --- drivers/pci/pci.c | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) (limited to 'drivers/pci/pci.c') diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c index e3efe6b19ee7..6d6120007af4 100644 --- a/drivers/pci/pci.c +++ b/drivers/pci/pci.c @@ -1540,16 +1540,21 @@ void pci_release_region(struct pci_dev *pdev, int bar) } /** - * pci_request_region - Reserved PCI I/O and memory resource + * __pci_request_region - Reserved PCI I/O and memory resource * @pdev: PCI device whose resources are to be reserved * @bar: BAR to be reserved * @res_name: Name to be associated with resource. + * @exclusive: whether the region access is exclusive or not * * Mark the PCI region associated with PCI device @pdev BR @bar as * being reserved by owner @res_name. Do not access any * address inside the PCI regions unless this call returns * successfully. * + * If @exclusive is set, then the region is marked so that userspace + * is explicitly not allowed to map the resource via /dev/mem or + * sysfs MMIO access. + * * Returns 0 on success, or %EBUSY on error. A warning * message is also printed on failure. */ @@ -1588,12 +1593,12 @@ err_out: } /** - * pci_request_region - Reserved PCI I/O and memory resource + * pci_request_region - Reserve PCI I/O and memory resource * @pdev: PCI device whose resources are to be reserved * @bar: BAR to be reserved - * @res_name: Name to be associated with resource. + * @res_name: Name to be associated with resource * - * Mark the PCI region associated with PCI device @pdev BR @bar as + * Mark the PCI region associated with PCI device @pdev BAR @bar as * being reserved by owner @res_name. Do not access any * address inside the PCI regions unless this call returns * successfully. -- cgit v1.2.3