From 68227c03cba84a24faf8a7277d2b1a03c8959c2c Mon Sep 17 00:00:00 2001 From: Mateusz Jurczyk Date: Wed, 7 Jun 2017 12:26:49 +0200 Subject: fuse: initialize the flock flag in fuse_file on allocation Before the patch, the flock flag could remain uninitialized for the lifespan of the fuse_file allocation. Unless set to true in fuse_file_flock(), it would remain in an indeterminate state until read in an if statement in fuse_release_common(). This could consequently lead to taking an unexpected branch in the code. The bug was discovered by a runtime instrumentation designed to detect use of uninitialized memory in the kernel. Signed-off-by: Mateusz Jurczyk Fixes: 37fb3a30b462 ("fuse: fix flock") Cc: # v3.1+ Signed-off-by: Miklos Szeredi --- fs/fuse/file.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/fuse/file.c b/fs/fuse/file.c index 3ee4fdc3da9e..76eac2a554c4 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -46,7 +46,7 @@ struct fuse_file *fuse_file_alloc(struct fuse_conn *fc) { struct fuse_file *ff; - ff = kmalloc(sizeof(struct fuse_file), GFP_KERNEL); + ff = kzalloc(sizeof(struct fuse_file), GFP_KERNEL); if (unlikely(!ff)) return NULL; -- cgit v1.2.3 From a3507e48d3f99a93a3056a34a5365f310434570f Mon Sep 17 00:00:00 2001 From: Akinobu Mita Date: Wed, 21 Jun 2017 01:46:37 +0900 Subject: iio: light: tsl2563: use correct event code The TSL2563 driver provides three iio channels, two of which are raw ADC channels (channel 0 and channel 1) in the device and the remaining one is calculated by the two. The ADC channel 0 only supports programmable interrupt with threshold settings and this driver supports the event but the generated event code does not contain the corresponding iio channel type. This is going to change userspace ABI. Hopefully fixing this to be what it should always have been won't break any userspace code. Cc: Jonathan Cameron Signed-off-by: Akinobu Mita Cc: Signed-off-by: Jonathan Cameron --- drivers/iio/light/tsl2563.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/iio/light/tsl2563.c b/drivers/iio/light/tsl2563.c index e7d4ea75e007..7599693f7fe9 100644 --- a/drivers/iio/light/tsl2563.c +++ b/drivers/iio/light/tsl2563.c @@ -626,7 +626,7 @@ static irqreturn_t tsl2563_event_handler(int irq, void *private) struct tsl2563_chip *chip = iio_priv(dev_info); iio_push_event(dev_info, - IIO_UNMOD_EVENT_CODE(IIO_LIGHT, + IIO_UNMOD_EVENT_CODE(IIO_INTENSITY, 0, IIO_EV_TYPE_THRESH, IIO_EV_DIR_EITHER), -- cgit v1.2.3 From add6e6ab3ee0e237822e0951476d3df039b49ec8 Mon Sep 17 00:00:00 2001 From: Lorenzo Bianconi Date: Thu, 22 Jun 2017 19:46:43 +0200 Subject: iio: pressure: st_pressure_core: disable multiread by default for LPS22HB Set multiread variable to false for LPS22HB pressure sensor since it is already enabled in CTRL_REG2. Previous configuration does not cause any issue in I2C communication since SUB Msb has no meaning whereas it breaks register address in SPI communication Fixes: e039e2f5b4da (iio:st_pressure:initial lps22hb sensor support) Signed-off-by: Lorenzo Bianconi Cc: Signed-off-by: Jonathan Cameron --- drivers/iio/pressure/st_pressure_core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/iio/pressure/st_pressure_core.c b/drivers/iio/pressure/st_pressure_core.c index fd0edca0e656..99448012b47f 100644 --- a/drivers/iio/pressure/st_pressure_core.c +++ b/drivers/iio/pressure/st_pressure_core.c @@ -456,7 +456,7 @@ static const struct st_sensor_settings st_press_sensors_settings[] = { .mask_od = 0x40, .addr_stat_drdy = ST_SENSORS_DEFAULT_STAT_ADDR, }, - .multi_read_bit = true, + .multi_read_bit = false, .bootime = 2, }, }; -- cgit v1.2.3 From 469bcef53c546bb792aa66303933272991b7831d Mon Sep 17 00:00:00 2001 From: Boris Brezillon Date: Tue, 4 Jul 2017 11:10:39 +0200 Subject: irqchip/atmel-aic: Fix unbalanced of_node_put() in aic_common_irq_fixup() aic_common_irq_fixup() is calling twice of_node_put() on the same node thus leading to an unbalanced refcount on the root node. Signed-off-by: Boris Brezillon Reported-by: Alexandre Belloni Fixes: b2f579b58e93 ("irqchip: atmel-aic: Add irq fixup infrastructure") Cc: Signed-off-by: Marc Zyngier --- drivers/irqchip/irq-atmel-aic-common.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/irqchip/irq-atmel-aic-common.c b/drivers/irqchip/irq-atmel-aic-common.c index 28b26c80f4cf..7c5a43488d27 100644 --- a/drivers/irqchip/irq-atmel-aic-common.c +++ b/drivers/irqchip/irq-atmel-aic-common.c @@ -196,7 +196,6 @@ static void __init aic_common_irq_fixup(const struct of_device_id *matches) return; match = of_match_node(matches, root); - of_node_put(root); if (match) { void (*fixup)(struct device_node *) = match->data; -- cgit v1.2.3 From 277867ade8262583f4280cadbe90e0031a3706a7 Mon Sep 17 00:00:00 2001 From: Boris Brezillon Date: Tue, 4 Jul 2017 11:10:40 +0200 Subject: irqchip/atmel-aic: Fix unbalanced refcount in aic_common_rtc_irq_fixup() of_find_compatible_node() is calling of_node_put() on its first argument thus leading to an unbalanced of_node_get/put() issue if the node has not been retained before that. Instead of passing the root node, pass NULL, which does exactly the same: iterate over all DT nodes, starting from the root node. Signed-off-by: Boris Brezillon Reported-by: Alexandre Belloni Fixes: 3d61467f9bab ("irqchip: atmel-aic: Implement RTC irq fixup") Cc: Signed-off-by: Marc Zyngier --- drivers/irqchip/irq-atmel-aic-common.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/irqchip/irq-atmel-aic-common.c b/drivers/irqchip/irq-atmel-aic-common.c index 7c5a43488d27..056507099725 100644 --- a/drivers/irqchip/irq-atmel-aic-common.c +++ b/drivers/irqchip/irq-atmel-aic-common.c @@ -142,9 +142,9 @@ void __init aic_common_rtc_irq_fixup(struct device_node *root) struct device_node *np; void __iomem *regs; - np = of_find_compatible_node(root, NULL, "atmel,at91rm9200-rtc"); + np = of_find_compatible_node(NULL, NULL, "atmel,at91rm9200-rtc"); if (!np) - np = of_find_compatible_node(root, NULL, + np = of_find_compatible_node(NULL, NULL, "atmel,at91sam9x5-rtc"); if (!np) -- cgit v1.2.3 From 0a46230bf03549435156b36dee9e7489b8270be7 Mon Sep 17 00:00:00 2001 From: Boris Brezillon Date: Tue, 4 Jul 2017 11:10:41 +0200 Subject: irqchip/atmel-aic: Remove root argument from ->fixup() prototype We are no longer using the root argument passed to the ->fixup() hooks. Remove it. Signed-off-by: Boris Brezillon Signed-off-by: Marc Zyngier --- drivers/irqchip/irq-atmel-aic-common.c | 8 ++++---- drivers/irqchip/irq-atmel-aic-common.h | 4 ++-- drivers/irqchip/irq-atmel-aic.c | 14 +++++++------- drivers/irqchip/irq-atmel-aic5.c | 4 ++-- 4 files changed, 15 insertions(+), 15 deletions(-) diff --git a/drivers/irqchip/irq-atmel-aic-common.c b/drivers/irqchip/irq-atmel-aic-common.c index 056507099725..072bd227b6c6 100644 --- a/drivers/irqchip/irq-atmel-aic-common.c +++ b/drivers/irqchip/irq-atmel-aic-common.c @@ -137,7 +137,7 @@ static void __init aic_common_ext_irq_of_init(struct irq_domain *domain) #define AT91_RTC_IMR 0x28 #define AT91_RTC_IRQ_MASK 0x1f -void __init aic_common_rtc_irq_fixup(struct device_node *root) +void __init aic_common_rtc_irq_fixup(void) { struct device_node *np; void __iomem *regs; @@ -165,7 +165,7 @@ void __init aic_common_rtc_irq_fixup(struct device_node *root) #define AT91_RTT_ALMIEN (1 << 16) /* Alarm Interrupt Enable */ #define AT91_RTT_RTTINCIEN (1 << 17) /* Real Time Timer Increment Interrupt Enable */ -void __init aic_common_rtt_irq_fixup(struct device_node *root) +void __init aic_common_rtt_irq_fixup(void) { struct device_node *np; void __iomem *regs; @@ -198,8 +198,8 @@ static void __init aic_common_irq_fixup(const struct of_device_id *matches) match = of_match_node(matches, root); if (match) { - void (*fixup)(struct device_node *) = match->data; - fixup(root); + void (*fixup)(void) = match->data; + fixup(); } of_node_put(root); diff --git a/drivers/irqchip/irq-atmel-aic-common.h b/drivers/irqchip/irq-atmel-aic-common.h index af60376d50de..242e62c1851e 100644 --- a/drivers/irqchip/irq-atmel-aic-common.h +++ b/drivers/irqchip/irq-atmel-aic-common.h @@ -33,8 +33,8 @@ struct irq_domain *__init aic_common_of_init(struct device_node *node, const char *name, int nirqs, const struct of_device_id *matches); -void __init aic_common_rtc_irq_fixup(struct device_node *root); +void __init aic_common_rtc_irq_fixup(void); -void __init aic_common_rtt_irq_fixup(struct device_node *root); +void __init aic_common_rtt_irq_fixup(void); #endif /* __IRQ_ATMEL_AIC_COMMON_H */ diff --git a/drivers/irqchip/irq-atmel-aic.c b/drivers/irqchip/irq-atmel-aic.c index 37f952dd9fc9..bb1ad451392f 100644 --- a/drivers/irqchip/irq-atmel-aic.c +++ b/drivers/irqchip/irq-atmel-aic.c @@ -209,20 +209,20 @@ static const struct irq_domain_ops aic_irq_ops = { .xlate = aic_irq_domain_xlate, }; -static void __init at91rm9200_aic_irq_fixup(struct device_node *root) +static void __init at91rm9200_aic_irq_fixup(void) { - aic_common_rtc_irq_fixup(root); + aic_common_rtc_irq_fixup(); } -static void __init at91sam9260_aic_irq_fixup(struct device_node *root) +static void __init at91sam9260_aic_irq_fixup(void) { - aic_common_rtt_irq_fixup(root); + aic_common_rtt_irq_fixup(); } -static void __init at91sam9g45_aic_irq_fixup(struct device_node *root) +static void __init at91sam9g45_aic_irq_fixup(void) { - aic_common_rtc_irq_fixup(root); - aic_common_rtt_irq_fixup(root); + aic_common_rtc_irq_fixup(); + aic_common_rtt_irq_fixup(); } static const struct of_device_id aic_irq_fixups[] __initconst = { diff --git a/drivers/irqchip/irq-atmel-aic5.c b/drivers/irqchip/irq-atmel-aic5.c index c04ee9a23d09..6acad2ea0fb3 100644 --- a/drivers/irqchip/irq-atmel-aic5.c +++ b/drivers/irqchip/irq-atmel-aic5.c @@ -305,9 +305,9 @@ static const struct irq_domain_ops aic5_irq_ops = { .xlate = aic5_irq_domain_xlate, }; -static void __init sama5d3_aic_irq_fixup(struct device_node *root) +static void __init sama5d3_aic_irq_fixup(void) { - aic_common_rtc_irq_fixup(root); + aic_common_rtc_irq_fixup(); } static const struct of_device_id aic5_irq_fixups[] __initconst = { -- cgit v1.2.3 From 456c59c31c5126fe31c64956c43670060ea9debd Mon Sep 17 00:00:00 2001 From: Suzuki K Poulose Date: Tue, 4 Jul 2017 10:56:34 +0100 Subject: irqchip/gic-v2: Report failures in gic_irq_domain_alloc If the GIC cannot map an IRQ via irq_domain_ops->alloc(), it doesn't return an error code. This can cause a problem with drivers, where it thinks it has successfully got an IRQ for the device, but requesting the same ends up failure with -ENOSYS (as the IRQ's chip is not set). Fixes: commit 9a1091ef0017c ("irqchip: gic: Support hierarchy irq domain.") Cc: Yingjoe Chen Cc: Marc Zyngier Signed-off-by: Suzuki K Poulose Signed-off-by: Marc Zyngier --- drivers/irqchip/irq-gic.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/irqchip/irq-gic.c b/drivers/irqchip/irq-gic.c index 1b1df4f770bd..940c16278758 100644 --- a/drivers/irqchip/irq-gic.c +++ b/drivers/irqchip/irq-gic.c @@ -1027,8 +1027,11 @@ static int gic_irq_domain_alloc(struct irq_domain *domain, unsigned int virq, if (ret) return ret; - for (i = 0; i < nr_irqs; i++) - gic_irq_domain_map(domain, virq + i, hwirq + i); + for (i = 0; i < nr_irqs; i++) { + ret = gic_irq_domain_map(domain, virq + i, hwirq + i); + if (ret) + return ret; + } return 0; } -- cgit v1.2.3 From 63c16c6eacb69d0cbdaee5dea0dd56d238375fe6 Mon Sep 17 00:00:00 2001 From: Suzuki K Poulose Date: Tue, 4 Jul 2017 10:56:33 +0100 Subject: irqchip/gic-v3: Report failures in gic_irq_domain_alloc If the GIC cannot map an IRQ via irq_domain_ops->alloc(), it doesn't return an error code. This can cause a problem with drivers, where it thinks it has successfully got an IRQ for the device, but requesting the same ends up failure with -ENOSYS (as the IRQ's chip is not set). Fixes: commit 443acc4f37f6 ("irqchip: GICv3: Convert to domain hierarchy") Cc: Marc Zyngier Signed-off-by: Suzuki K Poulose Signed-off-by: Marc Zyngier --- drivers/irqchip/irq-gic-v3.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/irqchip/irq-gic-v3.c b/drivers/irqchip/irq-gic-v3.c index dbffb7ab6203..47630e9998b3 100644 --- a/drivers/irqchip/irq-gic-v3.c +++ b/drivers/irqchip/irq-gic-v3.c @@ -831,8 +831,11 @@ static int gic_irq_domain_alloc(struct irq_domain *domain, unsigned int virq, if (ret) return ret; - for (i = 0; i < nr_irqs; i++) - gic_irq_domain_map(domain, virq + i, hwirq + i); + for (i = 0; i < nr_irqs; i++) { + ret = gic_irq_domain_map(domain, virq + i, hwirq + i); + if (ret) + return ret; + } return 0; } -- cgit v1.2.3 From 65a30f8b300107266f316d550f060ccc186201a3 Mon Sep 17 00:00:00 2001 From: Suzuki K Poulose Date: Tue, 4 Jul 2017 10:56:35 +0100 Subject: irqchip/gic-v3: Honor forced affinity setting Honor the 'force' flag for set_affinity, by selecting a CPU from the given mask (which may not be reported "online" by the cpu_online_mask). Some drivers, like ARM PMU, rely on it. Cc: Marc Zyngier Reported-by: Mark Rutland Signed-off-by: Suzuki K Poulose Signed-off-by: Marc Zyngier --- drivers/irqchip/irq-gic-v3.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/irqchip/irq-gic-v3.c b/drivers/irqchip/irq-gic-v3.c index 47630e9998b3..5ba64a7584a3 100644 --- a/drivers/irqchip/irq-gic-v3.c +++ b/drivers/irqchip/irq-gic-v3.c @@ -640,11 +640,16 @@ static void gic_smp_init(void) static int gic_set_affinity(struct irq_data *d, const struct cpumask *mask_val, bool force) { - unsigned int cpu = cpumask_any_and(mask_val, cpu_online_mask); + unsigned int cpu; void __iomem *reg; int enabled; u64 val; + if (force) + cpu = cpumask_first(mask_val); + else + cpu = cpumask_any_and(mask_val, cpu_online_mask); + if (cpu >= nr_cpu_ids) return -EINVAL; -- cgit v1.2.3 From be2ea53330f1865ac5b00377f2074f8e255bf74c Mon Sep 17 00:00:00 2001 From: Quentin Schulz Date: Mon, 3 Jul 2017 15:09:26 +0200 Subject: iio: adc: sun4i-gpadc-iio: fix unbalanced irq enable/disable MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When initializing interrupts, the devm_request_any_context_irq will enable them right away. An atomic flag was set in sun4i_irq_init and read in the interrupt handler to make sure no unwanted interrupts were handled. If an unwanted interrupt occurred, the handler would disable the irq and return IRQ_HANDLED. However, at the end of sun4i_irq_init, the irq would be disabled as well, resulting in an unbalanced enable (since there are more disables than enables, the code enabling the interrupt would never be called). When reading the ADC or the temperature, the respective irq would be enabled in the read function and disabled in the irq handler. In the read function, we would wait for a completion (with a timeout) that will be set in the irq handler. However, if the completion is never set or if the wait for completion times out, the irq would not be disabled in the read function resulting in an unbalanced enable once the read function is called again (since there are 2+ enables for no disable). Moving disable_irq from the irq handler to the read function get rid of these two cases of unbalanced enable. Fixes: d1caa9905538 ("iio: adc: add support for Allwinner SoCs ADC") Reported-by: Andreas Färber Signed-off-by: Quentin Schulz Acked-by: Maxime Ripard Signed-off-by: Jonathan Cameron --- drivers/iio/adc/sun4i-gpadc-iio.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/iio/adc/sun4i-gpadc-iio.c b/drivers/iio/adc/sun4i-gpadc-iio.c index 81d4c39e414a..137f577d9432 100644 --- a/drivers/iio/adc/sun4i-gpadc-iio.c +++ b/drivers/iio/adc/sun4i-gpadc-iio.c @@ -256,6 +256,7 @@ static int sun4i_gpadc_read(struct iio_dev *indio_dev, int channel, int *val, err: pm_runtime_put_autosuspend(indio_dev->dev.parent); + disable_irq(irq); mutex_unlock(&info->mutex); return ret; @@ -365,7 +366,6 @@ static irqreturn_t sun4i_gpadc_temp_data_irq_handler(int irq, void *dev_id) complete(&info->completion); out: - disable_irq_nosync(info->temp_data_irq); return IRQ_HANDLED; } @@ -380,7 +380,6 @@ static irqreturn_t sun4i_gpadc_fifo_data_irq_handler(int irq, void *dev_id) complete(&info->completion); out: - disable_irq_nosync(info->fifo_data_irq); return IRQ_HANDLED; } -- cgit v1.2.3 From 631b010abc5b57009c6a8328f51492665f6ef310 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Fri, 30 Jun 2017 19:42:54 +0200 Subject: iio: adc: Revert "axp288: Drop bogus AXP288_ADC_TS_PIN_CTRL register modifications" Inheriting the ADC BIAS current settings from the BIOS instead of hardcoding then causes the AXP288 to disable charging (I think it mis-detects an overheated battery) on at least one model tablet. So lets go back to hard coding the values, this reverts commit fa2849e9649b ("iio: adc: axp288: Drop bogus AXP288_ADC_TS_PIN_CTRL register modifications"), fixing charging not working on the model tablet in question. The exact cause is not fully understood, hence the revert to a known working state. Cc: stable@vger.kernel.org Reported-by: Umberto Ixxo Signed-off-by: Hans de Goede Signed-off-by: Jonathan Cameron --- drivers/iio/adc/axp288_adc.c | 32 +++++++++++++++++++++++++++++++- 1 file changed, 31 insertions(+), 1 deletion(-) diff --git a/drivers/iio/adc/axp288_adc.c b/drivers/iio/adc/axp288_adc.c index 64799ad7ebad..7fd24949c0c1 100644 --- a/drivers/iio/adc/axp288_adc.c +++ b/drivers/iio/adc/axp288_adc.c @@ -28,6 +28,8 @@ #include #define AXP288_ADC_EN_MASK 0xF1 +#define AXP288_ADC_TS_PIN_GPADC 0xF2 +#define AXP288_ADC_TS_PIN_ON 0xF3 enum axp288_adc_id { AXP288_ADC_TS, @@ -121,6 +123,16 @@ static int axp288_adc_read_channel(int *val, unsigned long address, return IIO_VAL_INT; } +static int axp288_adc_set_ts(struct regmap *regmap, unsigned int mode, + unsigned long address) +{ + /* channels other than GPADC do not need to switch TS pin */ + if (address != AXP288_GP_ADC_H) + return 0; + + return regmap_write(regmap, AXP288_ADC_TS_PIN_CTRL, mode); +} + static int axp288_adc_read_raw(struct iio_dev *indio_dev, struct iio_chan_spec const *chan, int *val, int *val2, long mask) @@ -131,7 +143,16 @@ static int axp288_adc_read_raw(struct iio_dev *indio_dev, mutex_lock(&indio_dev->mlock); switch (mask) { case IIO_CHAN_INFO_RAW: + if (axp288_adc_set_ts(info->regmap, AXP288_ADC_TS_PIN_GPADC, + chan->address)) { + dev_err(&indio_dev->dev, "GPADC mode\n"); + ret = -EINVAL; + break; + } ret = axp288_adc_read_channel(val, chan->address, info->regmap); + if (axp288_adc_set_ts(info->regmap, AXP288_ADC_TS_PIN_ON, + chan->address)) + dev_err(&indio_dev->dev, "TS pin restore\n"); break; default: ret = -EINVAL; @@ -141,6 +162,15 @@ static int axp288_adc_read_raw(struct iio_dev *indio_dev, return ret; } +static int axp288_adc_set_state(struct regmap *regmap) +{ + /* ADC should be always enabled for internal FG to function */ + if (regmap_write(regmap, AXP288_ADC_TS_PIN_CTRL, AXP288_ADC_TS_PIN_ON)) + return -EIO; + + return regmap_write(regmap, AXP20X_ADC_EN1, AXP288_ADC_EN_MASK); +} + static const struct iio_info axp288_adc_iio_info = { .read_raw = &axp288_adc_read_raw, .driver_module = THIS_MODULE, @@ -169,7 +199,7 @@ static int axp288_adc_probe(struct platform_device *pdev) * Set ADC to enabled state at all time, including system suspend. * otherwise internal fuel gauge functionality may be affected. */ - ret = regmap_write(info->regmap, AXP20X_ADC_EN1, AXP288_ADC_EN_MASK); + ret = axp288_adc_set_state(axp20x->regmap); if (ret) { dev_err(&pdev->dev, "unable to enable ADC device\n"); return ret; -- cgit v1.2.3 From a7b8829d242b1a58107e9c02b09e93aec446d55c Mon Sep 17 00:00:00 2001 From: Lorenzo Bianconi Date: Wed, 5 Jul 2017 20:30:01 +0200 Subject: iio: accel: st_accel: add SPI-3wire support Add SPI Serial Interface Mode (SIM) register information in st_sensor_settings look up table to support devices (like LSM303AGR accel sensor) that allow just SPI-3wire communication mode. SIM mode has to be configured before any other operation since it is not enabled by default and the driver is not able to read without that configuration Whilst a fairly substantial patch, the actual logic is simple and it is better to have the generic fix than a band aid. Fixes: ddc05fa28606 (iio: st-accel: add support for lsm303agr accel) Signed-off-by: Lorenzo Bianconi Cc: Signed-off-by: Jonathan Cameron --- drivers/iio/accel/st_accel_core.c | 32 +++++++++++++++++++++++++ drivers/iio/common/st_sensors/st_sensors_core.c | 29 ++++++++++++++++++++++ include/linux/iio/common/st_sensors.h | 7 ++++++ include/linux/platform_data/st_sensors_pdata.h | 2 ++ 4 files changed, 70 insertions(+) diff --git a/drivers/iio/accel/st_accel_core.c b/drivers/iio/accel/st_accel_core.c index 784670e2736b..2ee3ae11eb2a 100644 --- a/drivers/iio/accel/st_accel_core.c +++ b/drivers/iio/accel/st_accel_core.c @@ -166,6 +166,10 @@ static const struct st_sensor_settings st_accel_sensors_settings[] = { .mask_ihl = 0x02, .addr_stat_drdy = ST_SENSORS_DEFAULT_STAT_ADDR, }, + .sim = { + .addr = 0x23, + .value = BIT(0), + }, .multi_read_bit = true, .bootime = 2, }, @@ -234,6 +238,10 @@ static const struct st_sensor_settings st_accel_sensors_settings[] = { .mask_od = 0x40, .addr_stat_drdy = ST_SENSORS_DEFAULT_STAT_ADDR, }, + .sim = { + .addr = 0x23, + .value = BIT(0), + }, .multi_read_bit = true, .bootime = 2, }, @@ -316,6 +324,10 @@ static const struct st_sensor_settings st_accel_sensors_settings[] = { .en_mask = 0x08, }, }, + .sim = { + .addr = 0x24, + .value = BIT(0), + }, .multi_read_bit = false, .bootime = 2, }, @@ -379,6 +391,10 @@ static const struct st_sensor_settings st_accel_sensors_settings[] = { .mask_int1 = 0x04, .addr_stat_drdy = ST_SENSORS_DEFAULT_STAT_ADDR, }, + .sim = { + .addr = 0x21, + .value = BIT(1), + }, .multi_read_bit = true, .bootime = 2, /* guess */ }, @@ -437,6 +453,10 @@ static const struct st_sensor_settings st_accel_sensors_settings[] = { .mask_od = 0x40, .addr_stat_drdy = ST_SENSORS_DEFAULT_STAT_ADDR, }, + .sim = { + .addr = 0x21, + .value = BIT(7), + }, .multi_read_bit = false, .bootime = 2, /* guess */ }, @@ -499,6 +519,10 @@ static const struct st_sensor_settings st_accel_sensors_settings[] = { .addr_ihl = 0x22, .mask_ihl = 0x80, }, + .sim = { + .addr = 0x23, + .value = BIT(0), + }, .multi_read_bit = true, .bootime = 2, }, @@ -547,6 +571,10 @@ static const struct st_sensor_settings st_accel_sensors_settings[] = { .mask_int1 = 0x04, .addr_stat_drdy = ST_SENSORS_DEFAULT_STAT_ADDR, }, + .sim = { + .addr = 0x21, + .value = BIT(1), + }, .multi_read_bit = false, .bootime = 2, }, @@ -614,6 +642,10 @@ static const struct st_sensor_settings st_accel_sensors_settings[] = { .mask_ihl = 0x02, .addr_stat_drdy = ST_SENSORS_DEFAULT_STAT_ADDR, }, + .sim = { + .addr = 0x23, + .value = BIT(0), + }, .multi_read_bit = true, .bootime = 2, }, diff --git a/drivers/iio/common/st_sensors/st_sensors_core.c b/drivers/iio/common/st_sensors/st_sensors_core.c index 79c8c7cd70d5..6e6a1ecc99dd 100644 --- a/drivers/iio/common/st_sensors/st_sensors_core.c +++ b/drivers/iio/common/st_sensors/st_sensors_core.c @@ -550,6 +550,31 @@ out: } EXPORT_SYMBOL(st_sensors_read_info_raw); +static int st_sensors_init_interface_mode(struct iio_dev *indio_dev, + const struct st_sensor_settings *sensor_settings) +{ + struct st_sensor_data *sdata = iio_priv(indio_dev); + struct device_node *np = sdata->dev->of_node; + struct st_sensors_platform_data *pdata; + + pdata = (struct st_sensors_platform_data *)sdata->dev->platform_data; + if (((np && of_property_read_bool(np, "spi-3wire")) || + (pdata && pdata->spi_3wire)) && sensor_settings->sim.addr) { + int err; + + err = sdata->tf->write_byte(&sdata->tb, sdata->dev, + sensor_settings->sim.addr, + sensor_settings->sim.value); + if (err < 0) { + dev_err(&indio_dev->dev, + "failed to init interface mode\n"); + return err; + } + } + + return 0; +} + int st_sensors_check_device_support(struct iio_dev *indio_dev, int num_sensors_list, const struct st_sensor_settings *sensor_settings) @@ -574,6 +599,10 @@ int st_sensors_check_device_support(struct iio_dev *indio_dev, return -ENODEV; } + err = st_sensors_init_interface_mode(indio_dev, &sensor_settings[i]); + if (err < 0) + return err; + if (sensor_settings[i].wai_addr) { err = sdata->tf->read_byte(&sdata->tb, sdata->dev, sensor_settings[i].wai_addr, &wai); diff --git a/include/linux/iio/common/st_sensors.h b/include/linux/iio/common/st_sensors.h index 497f2b3a5a62..97f1b465d04f 100644 --- a/include/linux/iio/common/st_sensors.h +++ b/include/linux/iio/common/st_sensors.h @@ -105,6 +105,11 @@ struct st_sensor_fullscale { struct st_sensor_fullscale_avl fs_avl[ST_SENSORS_FULLSCALE_AVL_MAX]; }; +struct st_sensor_sim { + u8 addr; + u8 value; +}; + /** * struct st_sensor_bdu - ST sensor device block data update * @addr: address of the register. @@ -197,6 +202,7 @@ struct st_sensor_transfer_function { * @bdu: Block data update register. * @das: Data Alignment Selection register. * @drdy_irq: Data ready register of the sensor. + * @sim: SPI serial interface mode register of the sensor. * @multi_read_bit: Use or not particular bit for [I2C/SPI] multi-read. * @bootime: samples to discard when sensor passing from power-down to power-up. */ @@ -213,6 +219,7 @@ struct st_sensor_settings { struct st_sensor_bdu bdu; struct st_sensor_das das; struct st_sensor_data_ready_irq drdy_irq; + struct st_sensor_sim sim; bool multi_read_bit; unsigned int bootime; }; diff --git a/include/linux/platform_data/st_sensors_pdata.h b/include/linux/platform_data/st_sensors_pdata.h index 79b0e4cdb814..f8274b0c6888 100644 --- a/include/linux/platform_data/st_sensors_pdata.h +++ b/include/linux/platform_data/st_sensors_pdata.h @@ -17,10 +17,12 @@ * Available only for accelerometer and pressure sensors. * Accelerometer DRDY on LSM330 available only on pin 1 (see datasheet). * @open_drain: set the interrupt line to be open drain if possible. + * @spi_3wire: enable spi-3wire mode. */ struct st_sensors_platform_data { u8 drdy_int_pin; bool open_drain; + bool spi_3wire; }; #endif /* ST_SENSORS_PDATA_H */ -- cgit v1.2.3 From d466d3c1217406b14b834335b5b4b33c0d45bd09 Mon Sep 17 00:00:00 2001 From: Stefan-Gabriel Mirea Date: Thu, 6 Jul 2017 10:06:41 +0100 Subject: iio: adc: vf610_adc: Fix VALT selection value for REFSEL bits In order to select the alternate voltage reference pair (VALTH/VALTL), the right value for the REFSEL field in the ADCx_CFG register is "01", leading to 0x800 as register mask. See section 8.2.6.4 in the reference manual[1]. [1] http://www.nxp.com/docs/en/reference-manual/VFXXXRM.pdf Fixes: a775427632fd ("iio:adc:imx: add Freescale Vybrid vf610 adc driver") Signed-off-by: Stefan-Gabriel Mirea Signed-off-by: Jonathan Cameron --- drivers/iio/adc/vf610_adc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/iio/adc/vf610_adc.c b/drivers/iio/adc/vf610_adc.c index 01fc76f7d660..c168e0db329a 100644 --- a/drivers/iio/adc/vf610_adc.c +++ b/drivers/iio/adc/vf610_adc.c @@ -77,7 +77,7 @@ #define VF610_ADC_ADSTS_MASK 0x300 #define VF610_ADC_ADLPC_EN 0x80 #define VF610_ADC_ADHSC_EN 0x400 -#define VF610_ADC_REFSEL_VALT 0x100 +#define VF610_ADC_REFSEL_VALT 0x800 #define VF610_ADC_REFSEL_VBG 0x1000 #define VF610_ADC_ADTRG_HARD 0x2000 #define VF610_ADC_AVGS_8 0x4000 -- cgit v1.2.3 From 3091141d7803608950adc001518ffb544a4e3308 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Sat, 8 Jul 2017 15:11:57 +0200 Subject: iio: adc: axp288: Fix the GPADC pin reading often wrongly returning 0 I noticed in its DSDT that one of my tablets actually is using the GPADC pin for temperature monitoring. The whole axp288_adc_set_ts() function is a bit weird, in the past it was removed because it seems to make no sense, then this was reverted because of regressions. So I decided to test the special GPADC pin handling on this tablet. Conclusion: not only is axp288_adc_set_ts() necessary, we need to sleep a bit after making the AXP288_ADC_TS_PIN_CTRL changes before sampling the GPADC, otherwise it will often (about 80% of the time) read 0 instead of its actual value. It seems that there is only 1 bias current source and to be able to use it for the GPIO0 pin in GPADC mode it must be temporarily turned off for the TS pin, but the datasheet does not mention this. This commit adds a sleep after disabling the TS pin bias current, fixing the GPADC more often then not wrongly returning 0. Signed-off-by: Hans de Goede Signed-off-by: Jonathan Cameron --- drivers/iio/adc/axp288_adc.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/drivers/iio/adc/axp288_adc.c b/drivers/iio/adc/axp288_adc.c index 7fd24949c0c1..462a99c13e7a 100644 --- a/drivers/iio/adc/axp288_adc.c +++ b/drivers/iio/adc/axp288_adc.c @@ -126,11 +126,21 @@ static int axp288_adc_read_channel(int *val, unsigned long address, static int axp288_adc_set_ts(struct regmap *regmap, unsigned int mode, unsigned long address) { + int ret; + /* channels other than GPADC do not need to switch TS pin */ if (address != AXP288_GP_ADC_H) return 0; - return regmap_write(regmap, AXP288_ADC_TS_PIN_CTRL, mode); + ret = regmap_write(regmap, AXP288_ADC_TS_PIN_CTRL, mode); + if (ret) + return ret; + + /* When switching to the GPADC pin give things some time to settle */ + if (mode == AXP288_ADC_TS_PIN_GPADC) + usleep_range(6000, 10000); + + return 0; } static int axp288_adc_read_raw(struct iio_dev *indio_dev, -- cgit v1.2.3 From 105967ad68d2eb1a041bc041f9cf96af2a653b65 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Fri, 14 Jul 2017 11:31:03 +0200 Subject: staging:iio:resolver:ad2s1210 fix negative IIO_ANGL_VEL read gcc-7 points out an older regression: drivers/staging/iio/resolver/ad2s1210.c: In function 'ad2s1210_read_raw': drivers/staging/iio/resolver/ad2s1210.c:515:42: error: '<<' in boolean context, did you mean '<' ? [-Werror=int-in-bool-context] The original code had 'unsigned short' here, but incorrectly got converted to 'bool'. This reverts the regression and uses a normal type instead. Fixes: 29148543c521 ("staging:iio:resolver:ad2s1210 minimal chan spec conversion.") Cc: stable@vger.kernel.org Signed-off-by: Arnd Bergmann Signed-off-by: Jonathan Cameron --- drivers/staging/iio/resolver/ad2s1210.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/staging/iio/resolver/ad2s1210.c b/drivers/staging/iio/resolver/ad2s1210.c index a6a8393d6664..3e00df74b18c 100644 --- a/drivers/staging/iio/resolver/ad2s1210.c +++ b/drivers/staging/iio/resolver/ad2s1210.c @@ -472,7 +472,7 @@ static int ad2s1210_read_raw(struct iio_dev *indio_dev, long m) { struct ad2s1210_state *st = iio_priv(indio_dev); - bool negative; + u16 negative; int ret = 0; u16 pos; s16 vel; -- cgit v1.2.3 From 05969566e6d64113a861adc6c17cbba685c640b3 Mon Sep 17 00:00:00 2001 From: Fabio Estevam Date: Fri, 30 Jun 2017 17:43:02 -0300 Subject: ARM: dts: imx7d-sdb: Put pinctrl_spi4 in the correct location pinctrl_spi4 pin group is not part of the low power iomux controller, so move it under the normal iomuxc node. Fixes: 184f39b57cab6 ("ARM: dts: imx7d-sdb: Add GPIO expander node") Signed-off-by: Fabio Estevam Signed-off-by: Shawn Guo --- arch/arm/boot/dts/imx7d-sdb.dts | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/arch/arm/boot/dts/imx7d-sdb.dts b/arch/arm/boot/dts/imx7d-sdb.dts index 54c45402286b..0a24d1bf3c39 100644 --- a/arch/arm/boot/dts/imx7d-sdb.dts +++ b/arch/arm/boot/dts/imx7d-sdb.dts @@ -557,6 +557,14 @@ >; }; + pinctrl_spi4: spi4grp { + fsl,pins = < + MX7D_PAD_GPIO1_IO09__GPIO1_IO9 0x59 + MX7D_PAD_GPIO1_IO12__GPIO1_IO12 0x59 + MX7D_PAD_GPIO1_IO13__GPIO1_IO13 0x59 + >; + }; + pinctrl_tsc2046_pendown: tsc2046_pendown { fsl,pins = < MX7D_PAD_EPDC_BDR1__GPIO2_IO29 0x59 @@ -697,13 +705,5 @@ fsl,pins = < MX7D_PAD_LPSR_GPIO1_IO01__PWM1_OUT 0x110b0 >; - - pinctrl_spi4: spi4grp { - fsl,pins = < - MX7D_PAD_GPIO1_IO09__GPIO1_IO9 0x59 - MX7D_PAD_GPIO1_IO12__GPIO1_IO12 0x59 - MX7D_PAD_GPIO1_IO13__GPIO1_IO13 0x59 - >; - }; }; }; -- cgit v1.2.3 From d1510a2e5ab6cb3a67f1c55ca5e7a6d2c6dec340 Mon Sep 17 00:00:00 2001 From: Chris Gorman Date: Wed, 12 Jul 2017 13:31:26 -0400 Subject: i2c: mux: pinctrl: mention correct module name in Kconfig help text Kconfig says the resulting module is pinctrl-i2cmux, but the module when built is i2c-mux-pinctrl. Fixes: ae58d1e40698 ("i2c: Add generic I2C multiplexer using pinctrl API") Signed-off-by: Chris Gorman Signed-off-by: Peter Rosin --- drivers/i2c/muxes/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/i2c/muxes/Kconfig b/drivers/i2c/muxes/Kconfig index 2c64d0e0740f..17121329bb79 100644 --- a/drivers/i2c/muxes/Kconfig +++ b/drivers/i2c/muxes/Kconfig @@ -83,7 +83,7 @@ config I2C_MUX_PINCTRL different sets of pins at run-time. This driver can also be built as a module. If so, the module will be - called pinctrl-i2cmux. + called i2c-mux-pinctrl. config I2C_MUX_REG tristate "Register-based I2C multiplexer" -- cgit v1.2.3 From fa405fd9dd7b0a5367fb5a773e93ac59efb98f44 Mon Sep 17 00:00:00 2001 From: Ludovic Desroches Date: Tue, 11 Jul 2017 09:40:15 +0200 Subject: ARM: dts: at91: sama5d2: use sama5d2 compatible string for SMC A new compatible string has been introduced for sama5d2 SMC to allow to manage the registers mapping change. Signed-off-by: Ludovic Desroches Signed-off-by: Alexandre Belloni --- arch/arm/boot/dts/sama5d2.dtsi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/boot/dts/sama5d2.dtsi b/arch/arm/boot/dts/sama5d2.dtsi index cc06da394366..3e6e2dbc2595 100644 --- a/arch/arm/boot/dts/sama5d2.dtsi +++ b/arch/arm/boot/dts/sama5d2.dtsi @@ -1048,7 +1048,7 @@ }; hsmc: hsmc@f8014000 { - compatible = "atmel,sama5d3-smc", "syscon", "simple-mfd"; + compatible = "atmel,sama5d2-smc", "syscon", "simple-mfd"; reg = <0xf8014000 0x1000>; interrupts = <5 IRQ_TYPE_LEVEL_HIGH 6>; clocks = <&hsmc_clk>; -- cgit v1.2.3 From 8ff235fe7a8c4a5824c223b9996457174442e73a Mon Sep 17 00:00:00 2001 From: Ludovic Desroches Date: Fri, 7 Jul 2017 15:33:10 +0200 Subject: ARM: dts: at91: sama5d2: fix EBI/NAND controllers declaration Fix HSMC interrupt ID, PMECC registers and EBI ones. Fixes: d9c41bf30cf8 ("ARM: dts: at91: Declare EBI/NAND controllers") Signed-off-by: Ludovic Desroches Acked-by: Nicolas Ferre Signed-off-by: Alexandre Belloni --- arch/arm/boot/dts/sama5d2.dtsi | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/arch/arm/boot/dts/sama5d2.dtsi b/arch/arm/boot/dts/sama5d2.dtsi index 3e6e2dbc2595..60e69aeacbdb 100644 --- a/arch/arm/boot/dts/sama5d2.dtsi +++ b/arch/arm/boot/dts/sama5d2.dtsi @@ -303,7 +303,7 @@ #size-cells = <1>; atmel,smc = <&hsmc>; reg = <0x10000000 0x10000000 - 0x40000000 0x30000000>; + 0x60000000 0x30000000>; ranges = <0x0 0x0 0x10000000 0x10000000 0x1 0x0 0x60000000 0x10000000 0x2 0x0 0x70000000 0x10000000 @@ -1050,16 +1050,16 @@ hsmc: hsmc@f8014000 { compatible = "atmel,sama5d2-smc", "syscon", "simple-mfd"; reg = <0xf8014000 0x1000>; - interrupts = <5 IRQ_TYPE_LEVEL_HIGH 6>; + interrupts = <17 IRQ_TYPE_LEVEL_HIGH 6>; clocks = <&hsmc_clk>; #address-cells = <1>; #size-cells = <1>; ranges; - pmecc: ecc-engine@ffffc070 { + pmecc: ecc-engine@f8014070 { compatible = "atmel,sama5d2-pmecc"; - reg = <0xffffc070 0x490>, - <0xffffc500 0x100>; + reg = <0xf8014070 0x490>, + <0xf8014500 0x100>; }; }; -- cgit v1.2.3 From 9585e340db9f6cc1c0928d82c3a23cc4460f0a3f Mon Sep 17 00:00:00 2001 From: Stefan Triller Date: Fri, 30 Jun 2017 14:44:03 +0200 Subject: USB: serial: cp210x: add support for Qivicon USB ZigBee dongle The German Telekom offers a ZigBee USB Stick under the brand name Qivicon for their SmartHome Home Base in its 1. Generation. The productId is not known by the according kernel module, this patch adds support for it. Signed-off-by: Stefan Triller Reviewed-by: Frans Klaver Cc: stable Signed-off-by: Johan Hovold --- drivers/usb/serial/cp210x.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/usb/serial/cp210x.c b/drivers/usb/serial/cp210x.c index f64e914a8985..2d945c9f975c 100644 --- a/drivers/usb/serial/cp210x.c +++ b/drivers/usb/serial/cp210x.c @@ -142,6 +142,7 @@ static const struct usb_device_id id_table[] = { { USB_DEVICE(0x10C4, 0x8998) }, /* KCF Technologies PRN */ { USB_DEVICE(0x10C4, 0x8A2A) }, /* HubZ dual ZigBee and Z-Wave dongle */ { USB_DEVICE(0x10C4, 0x8A5E) }, /* CEL EM3588 ZigBee USB Stick Long Range */ + { USB_DEVICE(0x10C4, 0x8B34) }, /* Qivicon ZigBee USB Radio Stick */ { USB_DEVICE(0x10C4, 0xEA60) }, /* Silicon Labs factory default */ { USB_DEVICE(0x10C4, 0xEA61) }, /* Silicon Labs factory default */ { USB_DEVICE(0x10C4, 0xEA70) }, /* Silicon Labs factory default */ -- cgit v1.2.3 From e59e18989c68a8d7941005f81ad6abc4ca682de0 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Thu, 13 Jul 2017 15:13:41 +0200 Subject: iio: accel: bmc150: Always restore device to normal mode after suspend-resume After probe we would put the device in normal mode, after a runtime suspend-resume we would put it back in normal mode. But for a regular suspend-resume we would only put it back in normal mode if triggers or events have been requested. This is not consistent and breaks reading raw values after a suspend-resume. This commit changes the regular resume path to also unconditionally put the device back in normal mode, fixing reading of raw values not working after a regular suspend-resume cycle. Signed-off-by: Hans de Goede Reviewed-by: Srinivas Pandruvada Cc: Signed-off-by: Jonathan Cameron --- drivers/iio/accel/bmc150-accel-core.c | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/drivers/iio/accel/bmc150-accel-core.c b/drivers/iio/accel/bmc150-accel-core.c index 6b5d3be283c4..807299dd45eb 100644 --- a/drivers/iio/accel/bmc150-accel-core.c +++ b/drivers/iio/accel/bmc150-accel-core.c @@ -193,7 +193,6 @@ struct bmc150_accel_data { struct regmap *regmap; int irq; struct bmc150_accel_interrupt interrupts[BMC150_ACCEL_INTERRUPTS]; - atomic_t active_intr; struct bmc150_accel_trigger triggers[BMC150_ACCEL_TRIGGERS]; struct mutex mutex; u8 fifo_mode, watermark; @@ -493,11 +492,6 @@ static int bmc150_accel_set_interrupt(struct bmc150_accel_data *data, int i, goto out_fix_power_state; } - if (state) - atomic_inc(&data->active_intr); - else - atomic_dec(&data->active_intr); - return 0; out_fix_power_state: @@ -1710,8 +1704,7 @@ static int bmc150_accel_resume(struct device *dev) struct bmc150_accel_data *data = iio_priv(indio_dev); mutex_lock(&data->mutex); - if (atomic_read(&data->active_intr)) - bmc150_accel_set_mode(data, BMC150_ACCEL_SLEEP_MODE_NORMAL, 0); + bmc150_accel_set_mode(data, BMC150_ACCEL_SLEEP_MODE_NORMAL, 0); bmc150_accel_fifo_set_mode(data); mutex_unlock(&data->mutex); -- cgit v1.2.3 From 293b915fd9bebf33cdc906516fb28d54649a25ac Mon Sep 17 00:00:00 2001 From: Oscar Campos Date: Tue, 18 Jul 2017 17:20:36 -0700 Subject: Input: trackpoint - assume 3 buttons when buttons detection fails Trackpoint buttons detection fails on ThinkPad 570 and 470 series, this makes the middle button of the trackpoint to not being recogized. As I don't believe there is any trackpoint with less than 3 buttons this patch just assumes three buttons when the extended button information read fails. Signed-off-by: Oscar Campos Acked-by: Peter Hutterer Signed-off-by: Dmitry Torokhov --- drivers/input/mouse/trackpoint.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/input/mouse/trackpoint.c b/drivers/input/mouse/trackpoint.c index 922ea02edcc3..20b5b21c1bba 100644 --- a/drivers/input/mouse/trackpoint.c +++ b/drivers/input/mouse/trackpoint.c @@ -380,8 +380,8 @@ int trackpoint_detect(struct psmouse *psmouse, bool set_properties) return 0; if (trackpoint_read(ps2dev, TP_EXT_BTN, &button_info)) { - psmouse_warn(psmouse, "failed to get extended button data\n"); - button_info = 0; + psmouse_warn(psmouse, "failed to get extended button data, assuming 3 buttons\n"); + button_info = 0x33; } psmouse->private = kzalloc(sizeof(struct trackpoint_data), GFP_KERNEL); -- cgit v1.2.3 From 3aa0907675a38498d8f2d343e94207ad28a117cf Mon Sep 17 00:00:00 2001 From: Boris Brezillon Date: Mon, 17 Jul 2017 20:20:08 +0200 Subject: mtd: nand: atmel: Fix DT backward compatibility in pmecc.c PMECC caps extraction from old DT bindings is broken, thus leading to erroneous EL registers offset, which in turn make HW ECC unusable on sama5d2 when old bindings are in use. Passing the NAND dev node instead of the NFC node to of_match_node() solves the problem. Signed-off-by: Boris Brezillon Fixes: f88fc122cc34 ("mtd: nand: Cleanup/rework the atmel_nand driver") Cc: Tested-by: Romain Izard --- drivers/mtd/nand/atmel/pmecc.c | 21 ++++++--------------- 1 file changed, 6 insertions(+), 15 deletions(-) diff --git a/drivers/mtd/nand/atmel/pmecc.c b/drivers/mtd/nand/atmel/pmecc.c index 55a8ee5306ea..8c210a5776bc 100644 --- a/drivers/mtd/nand/atmel/pmecc.c +++ b/drivers/mtd/nand/atmel/pmecc.c @@ -945,6 +945,7 @@ struct atmel_pmecc *devm_atmel_pmecc_get(struct device *userdev) */ struct platform_device *pdev = to_platform_device(userdev); const struct atmel_pmecc_caps *caps; + const struct of_device_id *match; /* No PMECC engine available. */ if (!of_property_read_bool(userdev->of_node, @@ -953,21 +954,11 @@ struct atmel_pmecc *devm_atmel_pmecc_get(struct device *userdev) caps = &at91sam9g45_caps; - /* - * Try to find the NFC subnode and extract the associated caps - * from there. - */ - np = of_find_compatible_node(userdev->of_node, NULL, - "atmel,sama5d3-nfc"); - if (np) { - const struct of_device_id *match; - - match = of_match_node(atmel_pmecc_legacy_match, np); - if (match && match->data) - caps = match->data; - - of_node_put(np); - } + /* Find the caps associated to the NAND dev node. */ + match = of_match_node(atmel_pmecc_legacy_match, + userdev->of_node); + if (match && match->data) + caps = match->data; pmecc = atmel_pmecc_create(pdev, caps, 1, 2); } -- cgit v1.2.3 From b82d6cb41eb54bf1c0e5d2ae73f49a4dff54c54b Mon Sep 17 00:00:00 2001 From: Tobias Klauser Date: Fri, 21 Jul 2017 10:35:31 +0200 Subject: xtensa: remove wrapper header for asm/device.h xtensa's asm/device.h is a verbatim copy of asm-generic/device.h and does not add any arch specific extensions. Thus, use the asm-generic header directly. Acked-by: Max Filippov Signed-off-by: Tobias Klauser Signed-off-by: Max Filippov --- arch/xtensa/include/asm/Kbuild | 1 + arch/xtensa/include/asm/device.h | 15 --------------- 2 files changed, 1 insertion(+), 15 deletions(-) delete mode 100644 arch/xtensa/include/asm/device.h diff --git a/arch/xtensa/include/asm/Kbuild b/arch/xtensa/include/asm/Kbuild index 2d716ebc5a5e..a2fdabfce43a 100644 --- a/arch/xtensa/include/asm/Kbuild +++ b/arch/xtensa/include/asm/Kbuild @@ -1,5 +1,6 @@ generic-y += bug.h generic-y += clkdev.h +generic-y += device.h generic-y += div64.h generic-y += dma-contiguous.h generic-y += emergency-restart.h diff --git a/arch/xtensa/include/asm/device.h b/arch/xtensa/include/asm/device.h deleted file mode 100644 index 1deeb8ebbb1b..000000000000 --- a/arch/xtensa/include/asm/device.h +++ /dev/null @@ -1,15 +0,0 @@ -/* - * Arch specific extensions to struct device - * - * This file is released under the GPLv2 - */ -#ifndef _ASM_XTENSA_DEVICE_H -#define _ASM_XTENSA_DEVICE_H - -struct dev_archdata { -}; - -struct pdev_archdata { -}; - -#endif /* _ASM_XTENSA_DEVICE_H */ -- cgit v1.2.3 From 536dcc9c34035778892a7e3cbf45166ce73f8d34 Mon Sep 17 00:00:00 2001 From: Tobias Klauser Date: Fri, 21 Jul 2017 10:44:07 +0200 Subject: xtensa: remove wrapper header for asm/param.h xtensa's asm/device.h is a verbatim copy of asm-generic/device.h and does not add any arch specific extensions. Thus, use the asm-generic header directly. Signed-off-by: Tobias Klauser Signed-off-by: Max Filippov --- arch/xtensa/include/asm/Kbuild | 1 + arch/xtensa/include/asm/param.h | 18 ------------------ 2 files changed, 1 insertion(+), 18 deletions(-) delete mode 100644 arch/xtensa/include/asm/param.h diff --git a/arch/xtensa/include/asm/Kbuild b/arch/xtensa/include/asm/Kbuild index a2fdabfce43a..dff7cc39437c 100644 --- a/arch/xtensa/include/asm/Kbuild +++ b/arch/xtensa/include/asm/Kbuild @@ -18,6 +18,7 @@ generic-y += local.h generic-y += local64.h generic-y += mcs_spinlock.h generic-y += mm-arch-hooks.h +generic-y += param.h generic-y += percpu.h generic-y += preempt.h generic-y += rwsem.h diff --git a/arch/xtensa/include/asm/param.h b/arch/xtensa/include/asm/param.h deleted file mode 100644 index 0a70e780ef2a..000000000000 --- a/arch/xtensa/include/asm/param.h +++ /dev/null @@ -1,18 +0,0 @@ -/* - * include/asm-xtensa/param.h - * - * This file is subject to the terms and conditions of the GNU General Public - * License. See the file "COPYING" in the main directory of this archive - * for more details. - * - * Copyright (C) 2001 - 2005 Tensilica Inc. - */ -#ifndef _XTENSA_PARAM_H -#define _XTENSA_PARAM_H - -#include - -# define HZ CONFIG_HZ /* internal timer frequency */ -# define USER_HZ 100 /* for user interfaces in "ticks" */ -# define CLOCKS_PER_SEC (USER_HZ) /* frequnzy at which times() counts */ -#endif /* _XTENSA_PARAM_H */ -- cgit v1.2.3 From edf3f301db7af7e784d06f7059dfc8a69359af13 Mon Sep 17 00:00:00 2001 From: Feras Daoud Date: Mon, 10 Jul 2017 18:45:41 +0300 Subject: IB/ipoib: Fix race between light events and interface restart A potential race between light_event and interface restart may attach multicast group to an already attached QP. Scenario: light_event flow goes through ipoib_mcast_dev_flush function, if a context switch occurs before calling ipoib_mcast_remove_list, then we may face a situation where the broadcast of the priv is null and the corresponding QP is not detached yet. If an "interface restart" runs during the previous context switch, the following scenario occurs: When the device goes up, ipoib_ib_dev_up function will be called, it will send a new registration request to the broadcast group and then attach the group to the QP that was not detached before. IPOIB_FLUSH_LIGHT INTERFACE RESTART __ipoib_ib_dev_flush | | | | | | | ipoib_mcast_dev_flush | Move mcast list and broadcast to remove_list | | | | | Context Switch--> | | ipoib_ib_dev_down | | | | | ipoib_ib_dev_up | | | | | ipoib_mcast_join_task | allocate new broadcast | | | | | Attach QP to multicast group | | | | | <--Context Switch ipoib_mcast_leave Detach QP from multicast group Signed-off-by: Feras Daoud Signed-off-by: Leon Romanovsky --- drivers/infiniband/ulp/ipoib/ipoib.h | 1 + drivers/infiniband/ulp/ipoib/ipoib_main.c | 1 + drivers/infiniband/ulp/ipoib/ipoib_multicast.c | 2 ++ 3 files changed, 4 insertions(+) diff --git a/drivers/infiniband/ulp/ipoib/ipoib.h b/drivers/infiniband/ulp/ipoib/ipoib.h index ff50a7bd66d8..7ac25059c40f 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib.h +++ b/drivers/infiniband/ulp/ipoib/ipoib.h @@ -336,6 +336,7 @@ struct ipoib_dev_priv { unsigned long flags; struct rw_semaphore vlan_rwsem; + struct mutex mcast_mutex; struct rb_root path_tree; struct list_head path_list; diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c index 4ce315c92b48..144187b407bd 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_main.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c @@ -1877,6 +1877,7 @@ static void ipoib_build_priv(struct net_device *dev) priv->dev = dev; spin_lock_init(&priv->lock); init_rwsem(&priv->vlan_rwsem); + mutex_init(&priv->mcast_mutex); INIT_LIST_HEAD(&priv->path_list); INIT_LIST_HEAD(&priv->child_intfs); diff --git a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c index 057f58e6afca..0a0b2ce45cbc 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c @@ -838,6 +838,7 @@ void ipoib_mcast_dev_flush(struct net_device *dev) struct ipoib_mcast *mcast, *tmcast; unsigned long flags; + mutex_lock(&priv->mcast_mutex); ipoib_dbg_mcast(priv, "flushing multicast list\n"); spin_lock_irqsave(&priv->lock, flags); @@ -865,6 +866,7 @@ void ipoib_mcast_dev_flush(struct net_device *dev) wait_for_completion(&mcast->done); ipoib_mcast_remove_list(&remove_list); + mutex_unlock(&priv->mcast_mutex); } static int ipoib_mcast_addr_is_valid(const u8 *addr, const u8 *broadcast) -- cgit v1.2.3 From 6bdc8de2e86e717124a715ecc480892a2c331ff5 Mon Sep 17 00:00:00 2001 From: Erez Shitrit Date: Wed, 12 Jul 2017 10:40:25 +0300 Subject: IB/ipoib: Use cancel_delayed_work_sync when needed The work mcast_task can re-queue itself, so instead of doing cancel && flush_workqueue, that still can leave a queued task on the air, use cancel_delayed_work_sync. Also, no need to use lock over the cancel, the original lock was due to bit assignment setting (IPOIB_MCAST_RUN) that is not in use anymore. Signed-off-by: Erez Shitrit Signed-off-by: Leon Romanovsky --- drivers/infiniband/ulp/ipoib/ipoib_multicast.c | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c index 0a0b2ce45cbc..f80bf0f5d7cf 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c @@ -684,15 +684,10 @@ void ipoib_mcast_start_thread(struct net_device *dev) int ipoib_mcast_stop_thread(struct net_device *dev) { struct ipoib_dev_priv *priv = ipoib_priv(dev); - unsigned long flags; ipoib_dbg_mcast(priv, "stopping multicast thread\n"); - spin_lock_irqsave(&priv->lock, flags); - cancel_delayed_work(&priv->mcast_task); - spin_unlock_irqrestore(&priv->lock, flags); - - flush_workqueue(priv->wq); + cancel_delayed_work_sync(&priv->mcast_task); return 0; } -- cgit v1.2.3 From a08e1120627f72e9ed7c291e3b9f8dd29c1513ab Mon Sep 17 00:00:00 2001 From: Erez Shitrit Date: Wed, 12 Jul 2017 13:11:54 +0300 Subject: IB/ipoib: Make sure no in-flight joins while leaving that mcast While cleaning neighs and there is a send-only mcast neigh, the driver should wait to finish its join process before trying to remove it. Without this patch, we will see messages like: "ipoib_mcast_leave on an in-flight join" and unexpected results in the join_complete. Signed-off-by: Erez Shitrit Signed-off-by: Leon Romanovsky --- drivers/infiniband/ulp/ipoib/ipoib_multicast.c | 24 ++++++++---------------- 1 file changed, 8 insertions(+), 16 deletions(-) diff --git a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c index f80bf0f5d7cf..93e149efc1f5 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c @@ -743,6 +743,14 @@ void ipoib_mcast_remove_list(struct list_head *remove_list) { struct ipoib_mcast *mcast, *tmcast; + /* + * make sure the in-flight joins have finished before we attempt + * to leave + */ + list_for_each_entry_safe(mcast, tmcast, remove_list, list) + if (test_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags)) + wait_for_completion(&mcast->done); + list_for_each_entry_safe(mcast, tmcast, remove_list, list) { ipoib_mcast_leave(mcast->dev, mcast); ipoib_mcast_free(mcast); @@ -852,14 +860,6 @@ void ipoib_mcast_dev_flush(struct net_device *dev) spin_unlock_irqrestore(&priv->lock, flags); - /* - * make sure the in-flight joins have finished before we attempt - * to leave - */ - list_for_each_entry_safe(mcast, tmcast, &remove_list, list) - if (test_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags)) - wait_for_completion(&mcast->done); - ipoib_mcast_remove_list(&remove_list); mutex_unlock(&priv->mcast_mutex); } @@ -979,14 +979,6 @@ void ipoib_mcast_restart_task(struct work_struct *work) netif_addr_unlock(dev); local_irq_restore(flags); - /* - * make sure the in-flight joins have finished before we attempt - * to leave - */ - list_for_each_entry_safe(mcast, tmcast, &remove_list, list) - if (test_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags)) - wait_for_completion(&mcast->done); - ipoib_mcast_remove_list(&remove_list); /* -- cgit v1.2.3 From 11f74b40359b19f760964e71d04882a6caf530cc Mon Sep 17 00:00:00 2001 From: Alex Vesker Date: Thu, 13 Jul 2017 11:27:12 +0300 Subject: IB/ipoib: Prevent setting negative values to max_nonsrq_conn_qp Don't allow negative values to max_nonsrq_conn_qp. There is no functional impact on a negative value but it is logicically incorrect. Fixes: 68e995a29572 ("IPoIB/cm: Add connected mode support for devices without SRQs") Signed-off-by: Alex Vesker Signed-off-by: Leon Romanovsky --- drivers/infiniband/ulp/ipoib/ipoib_main.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c index 144187b407bd..8b7ec15a9d6e 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_main.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c @@ -2366,6 +2366,7 @@ static int __init ipoib_init_module(void) ipoib_sendq_size = max3(ipoib_sendq_size, 2 * MAX_SEND_CQE, IPOIB_MIN_QUEUE_SIZE); #ifdef CONFIG_INFINIBAND_IPOIB_CM ipoib_max_conn_qp = min(ipoib_max_conn_qp, IPOIB_CM_MAX_CONN_QP); + ipoib_max_conn_qp = max(ipoib_max_conn_qp, 0); #endif /* -- cgit v1.2.3 From d2e46fccc3e3d73a741efe433f00960331280696 Mon Sep 17 00:00:00 2001 From: Feras Daoud Date: Sun, 16 Jul 2017 11:33:01 +0300 Subject: IB/ipoib: Set IPOIB_NEIGH_TBL_FLUSH after flushed completion initialization Set IPOIB_NEIGH_TBL_FLUSH bit after initializing the neighbor flushed completion, otherwise the garbage collector may signal a completion while it is not initialized yet. Fixes: b63b70d87741 ("IPoIB: Use a private hash table for path lookup in xmit path") Signed-off-by: Feras Daoud Signed-off-by: Alex Vesker Signed-off-by: Leon Romanovsky --- drivers/infiniband/ulp/ipoib/ipoib_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c index 8b7ec15a9d6e..f4403c52cd67 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_main.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c @@ -1560,6 +1560,7 @@ static void ipoib_flush_neighs(struct ipoib_dev_priv *priv) int i, wait_flushed = 0; init_completion(&priv->ntbl.flushed); + set_bit(IPOIB_NEIGH_TBL_FLUSH, &priv->flags); spin_lock_irqsave(&priv->lock, flags); @@ -1604,7 +1605,6 @@ static void ipoib_neigh_hash_uninit(struct net_device *dev) ipoib_dbg(priv, "ipoib_neigh_hash_uninit\n"); init_completion(&priv->ntbl.deleted); - set_bit(IPOIB_NEIGH_TBL_FLUSH, &priv->flags); /* Stop GC if called at init fail need to cancel work */ stopped = test_and_set_bit(IPOIB_STOP_NEIGH_GC, &priv->flags); -- cgit v1.2.3 From 4829d964dfb027558c732cfa0d13b716ab3f0838 Mon Sep 17 00:00:00 2001 From: Alex Vesker Date: Mon, 10 Jul 2017 18:12:43 +0300 Subject: IB/ipoib: Add multicast packets statistics Update the multicast counter when multicast packets are received and provide this information through ethtool support. Signed-off-by: Alex Vesker Signed-off-by: Leon Romanovsky --- drivers/infiniband/ulp/ipoib/ipoib_ethtool.c | 3 ++- drivers/infiniband/ulp/ipoib/ipoib_ib.c | 2 ++ 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/infiniband/ulp/ipoib/ipoib_ethtool.c b/drivers/infiniband/ulp/ipoib/ipoib_ethtool.c index 7871379342f4..184a22f48027 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_ethtool.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_ethtool.c @@ -52,7 +52,8 @@ static const struct ipoib_stats ipoib_gstrings_stats[] = { IPOIB_NETDEV_STAT(tx_bytes), IPOIB_NETDEV_STAT(tx_errors), IPOIB_NETDEV_STAT(rx_dropped), - IPOIB_NETDEV_STAT(tx_dropped) + IPOIB_NETDEV_STAT(tx_dropped), + IPOIB_NETDEV_STAT(multicast), }; #define IPOIB_GLOBAL_STATS_LEN ARRAY_SIZE(ipoib_gstrings_stats) diff --git a/drivers/infiniband/ulp/ipoib/ipoib_ib.c b/drivers/infiniband/ulp/ipoib/ipoib_ib.c index 57a9655e844d..02eda1f53a67 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_ib.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_ib.c @@ -256,6 +256,8 @@ static void ipoib_ib_handle_rx_wc(struct net_device *dev, struct ib_wc *wc) ++dev->stats.rx_packets; dev->stats.rx_bytes += skb->len; + if (skb->pkt_type == PACKET_MULTICAST) + dev->stats.multicast++; skb->dev = dev; if ((dev->features & NETIF_F_RXCSUM) && -- cgit v1.2.3 From eb54714ddcb2462d4d4b8aa78d028b61e217a835 Mon Sep 17 00:00:00 2001 From: Feras Daoud Date: Sun, 2 Jul 2017 15:05:59 +0300 Subject: IB/ipoib: Add get statistics support to SRIOV VF Add SRIOV VF support to get traffic statistics. Signed-off-by: Feras Daoud Signed-off-by: Leon Romanovsky --- drivers/infiniband/ulp/ipoib/ipoib_main.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c index f4403c52cd67..24fa87fe0952 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_main.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c @@ -1847,6 +1847,7 @@ static const struct net_device_ops ipoib_netdev_ops_vf = { .ndo_tx_timeout = ipoib_timeout, .ndo_set_rx_mode = ipoib_set_mcast_list, .ndo_get_iflink = ipoib_get_iflink, + .ndo_get_stats64 = ipoib_get_stats, }; void ipoib_setup_common(struct net_device *dev) -- cgit v1.2.3 From dc892e17bbae670a3d7aa6ab8bd1033b15b24645 Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Thu, 13 Jul 2017 13:34:19 +0300 Subject: IB/ipoib: Clean error paths in add port Refactor error paths in ipoib_add_port() function. The code flow ensures that the function terminates on every error flow and it makes redundant all "else" cases. The functions are called during the flow are returning "result < 0", in case of error, so there is no need to check it explicitly. Fixes: 58e9cc90cda7 ("IB/IPoIB: Fix bad error flow in ipoib_add_port()") Signed-off-by: Leon Romanovsky --- drivers/infiniband/ulp/ipoib/ipoib_main.c | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c index 24fa87fe0952..6c77df34869d 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_main.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c @@ -2175,14 +2175,14 @@ static struct net_device *ipoib_add_port(const char *format, priv->dev->dev_id = port - 1; result = ib_query_port(hca, port, &attr); - if (!result) - priv->max_ib_mtu = ib_mtu_enum_to_int(attr.max_mtu); - else { + if (result) { printk(KERN_WARNING "%s: ib_query_port %d failed\n", hca->name, port); goto device_init_failed; } + priv->max_ib_mtu = ib_mtu_enum_to_int(attr.max_mtu); + /* MTU will be reset when mcast join happens */ priv->dev->mtu = IPOIB_UD_MTU(priv->max_ib_mtu); priv->mcast_mtu = priv->admin_mtu = priv->dev->mtu; @@ -2213,12 +2213,14 @@ static struct net_device *ipoib_add_port(const char *format, printk(KERN_WARNING "%s: ib_query_gid port %d failed (ret = %d)\n", hca->name, port, result); goto device_init_failed; - } else - memcpy(priv->dev->dev_addr + 4, priv->local_gid.raw, sizeof (union ib_gid)); + } + + memcpy(priv->dev->dev_addr + 4, priv->local_gid.raw, + sizeof(union ib_gid)); set_bit(IPOIB_FLAG_DEV_ADDR_SET, &priv->flags); result = ipoib_dev_init(priv->dev, hca, port); - if (result < 0) { + if (result) { printk(KERN_WARNING "%s: failed to initialize port %d (ret = %d)\n", hca->name, port, result); goto device_init_failed; -- cgit v1.2.3 From 1b355094b308f3377c8f574ce86135ee159c6285 Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Sat, 15 Jul 2017 16:26:55 +0300 Subject: IB/ipoib: Remove double pointer assigning There is no need to assign "p" pointer twice. This patch fixes the following smatch warning: drivers/infiniband/ulp/ipoib/ipoib_cm.c:517 ipoib_cm_rx_handler() warn: missing break? reassigning 'p->id' Fixes: 839fcaba355a ("IPoIB: Connected mode experimental support") Signed-off-by: Leon Romanovsky --- drivers/infiniband/ulp/ipoib/ipoib_cm.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/infiniband/ulp/ipoib/ipoib_cm.c b/drivers/infiniband/ulp/ipoib/ipoib_cm.c index f87d104837dc..d69410c2ed97 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_cm.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_cm.c @@ -511,7 +511,6 @@ static int ipoib_cm_rx_handler(struct ib_cm_id *cm_id, case IB_CM_REQ_RECEIVED: return ipoib_cm_req_handler(cm_id, event); case IB_CM_DREQ_RECEIVED: - p = cm_id->context; ib_send_cm_drep(cm_id, NULL, 0); /* Fall through */ case IB_CM_REJ_RECEIVED: -- cgit v1.2.3 From b287b76e89503ef1d403cc5cc8bd74b035d25bfa Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Sun, 23 Jul 2017 10:46:14 +0300 Subject: Revert "IB/core: Allow QP state transition from reset to error" The commit ebc9ca43e1d5 ("IB/core: Allow QP state transition from reset to error") allowed transition from Reset to Error state for the QPs. This behavior doesn't follow the IBTA specification 1.3, which in 10.3.1 QUEUE PAIR AND EE CONTEXT STATES section. The quote from the spec: "An error can be forced from any state, except Reset, with the Modify QP/EE Verb." Signed-off-by: Leon Romanovsky --- drivers/infiniband/core/verbs.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c index fb98ed67d5bc..7f8fe443df46 100644 --- a/drivers/infiniband/core/verbs.c +++ b/drivers/infiniband/core/verbs.c @@ -895,7 +895,6 @@ static const struct { } qp_state_table[IB_QPS_ERR + 1][IB_QPS_ERR + 1] = { [IB_QPS_RESET] = { [IB_QPS_RESET] = { .valid = 1 }, - [IB_QPS_ERR] = { .valid = 1 }, [IB_QPS_INIT] = { .valid = 1, .req_param = { -- cgit v1.2.3 From 5dc78ad1904db597bdb4427f3ead437aae86f54c Mon Sep 17 00:00:00 2001 From: Erez Shitrit Date: Thu, 13 Jul 2017 14:29:08 +0300 Subject: IB/ipoib: Notify on modify QP failure only when relevant Modify QP can fail and it can be acceptable, like when moving from RST to ERR state, all the rest are not acceptable and a message to the log should be printed. The current code prints on all failures and many messages like: "Failed to modify QP to ERROR state" appear, even when supported by the state machine of the QP object. Signed-off-by: Erez Shitrit Signed-off-by: Leon Romanovsky --- drivers/infiniband/ulp/ipoib/ipoib_ib.c | 23 ++++++++++++++++++++++- 1 file changed, 22 insertions(+), 1 deletion(-) diff --git a/drivers/infiniband/ulp/ipoib/ipoib_ib.c b/drivers/infiniband/ulp/ipoib/ipoib_ib.c index 02eda1f53a67..2e075377242e 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_ib.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_ib.c @@ -711,6 +711,27 @@ static int recvs_pending(struct net_device *dev) return pending; } +static void check_qp_movement_and_print(struct ipoib_dev_priv *priv, + struct ib_qp *qp, + enum ib_qp_state new_state) +{ + struct ib_qp_attr qp_attr; + struct ib_qp_init_attr query_init_attr; + int ret; + + ret = ib_query_qp(qp, &qp_attr, IB_QP_STATE, &query_init_attr); + if (ret) { + ipoib_warn(priv, "%s: Failed to query QP\n", __func__); + return; + } + /* print according to the new-state and the previous state.*/ + if (new_state == IB_QPS_ERR && qp_attr.qp_state == IB_QPS_RESET) + ipoib_dbg(priv, "Failed modify QP, IB_QPS_RESET to IB_QPS_ERR, acceptable\n"); + else + ipoib_warn(priv, "Failed to modify QP to state: %d from state: %d\n", + new_state, qp_attr.qp_state); +} + int ipoib_ib_dev_stop_default(struct net_device *dev) { struct ipoib_dev_priv *priv = ipoib_priv(dev); @@ -730,7 +751,7 @@ int ipoib_ib_dev_stop_default(struct net_device *dev) */ qp_attr.qp_state = IB_QPS_ERR; if (ib_modify_qp(priv->qp, &qp_attr, IB_QP_STATE)) - ipoib_warn(priv, "Failed to modify QP to ERROR state\n"); + check_qp_movement_and_print(priv, priv->qp, IB_QPS_ERR); /* Wait for all sends and receives to complete */ begin = jiffies; -- cgit v1.2.3 From 737cc2a593782df6846b3cab7e0f64384f58364a Mon Sep 17 00:00:00 2001 From: Mykola Kostenok Date: Mon, 17 Jul 2017 12:00:35 +0300 Subject: iio: aspeed-adc: wait for initial sequence. This patch enables adc engine at initialization time and waits for the initial sequence completion before enabling adc channels. Without this code adc channels are not functional and shows zeros for all connected channels. Tested on mellanox msn platform. v1 -> v2: Pointed by Rick Altherr: - Wait init sequence code enabled by bool from OF match table. Signed-off-by: Mykola Kostenok Reviewed-by: Rick Altherr Cc: Signed-off-by: Jonathan Cameron --- drivers/iio/adc/aspeed_adc.c | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/drivers/iio/adc/aspeed_adc.c b/drivers/iio/adc/aspeed_adc.c index 62670cbfa2bb..87fd6e0ce5ee 100644 --- a/drivers/iio/adc/aspeed_adc.c +++ b/drivers/iio/adc/aspeed_adc.c @@ -22,6 +22,7 @@ #include #include +#include #define ASPEED_RESOLUTION_BITS 10 #define ASPEED_CLOCKS_PER_SAMPLE 12 @@ -38,11 +39,17 @@ #define ASPEED_ENGINE_ENABLE BIT(0) +#define ASPEED_ADC_CTRL_INIT_RDY BIT(8) + +#define ASPEED_ADC_INIT_POLLING_TIME 500 +#define ASPEED_ADC_INIT_TIMEOUT 500000 + struct aspeed_adc_model_data { const char *model_name; unsigned int min_sampling_rate; // Hz unsigned int max_sampling_rate; // Hz unsigned int vref_voltage; // mV + bool wait_init_sequence; }; struct aspeed_adc_data { @@ -211,6 +218,24 @@ static int aspeed_adc_probe(struct platform_device *pdev) goto scaler_error; } + model_data = of_device_get_match_data(&pdev->dev); + + if (model_data->wait_init_sequence) { + /* Enable engine in normal mode. */ + writel(ASPEED_OPERATION_MODE_NORMAL | ASPEED_ENGINE_ENABLE, + data->base + ASPEED_REG_ENGINE_CONTROL); + + /* Wait for initial sequence complete. */ + ret = readl_poll_timeout(data->base + ASPEED_REG_ENGINE_CONTROL, + adc_engine_control_reg_val, + adc_engine_control_reg_val & + ASPEED_ADC_CTRL_INIT_RDY, + ASPEED_ADC_INIT_POLLING_TIME, + ASPEED_ADC_INIT_TIMEOUT); + if (ret) + goto scaler_error; + } + /* Start all channels in normal mode. */ clk_prepare_enable(data->clk_scaler->clk); adc_engine_control_reg_val = GENMASK(31, 16) | @@ -270,6 +295,7 @@ static const struct aspeed_adc_model_data ast2500_model_data = { .vref_voltage = 1800, // mV .min_sampling_rate = 1, .max_sampling_rate = 1000000, + .wait_init_sequence = true, }; static const struct of_device_id aspeed_adc_matches[] = { -- cgit v1.2.3 From 8addebc14a322fa8ca67cd57c6038069acde8ddc Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 24 Jul 2017 12:52:56 +0200 Subject: scsi: bnx2fc: Plug CPU hotplug race bnx2fc_process_new_cqes() has protection against CPU hotplug, which relies on the per cpu thread pointer. This protection is racy because it happens only partially with the per cpu fp_work_lock held. If the CPU is unplugged after the lock is dropped, the wakeup code can dereference a NULL pointer or access freed and potentially reused memory. Restructure the code so the thread check and wakeup happens with the fp_work_lock held. Signed-off-by: Thomas Gleixner Acked-by: Chad Dupuis Signed-off-by: Martin K. Petersen --- drivers/scsi/bnx2fc/bnx2fc_hwi.c | 45 ++++++++++++++++++++-------------------- 1 file changed, 23 insertions(+), 22 deletions(-) diff --git a/drivers/scsi/bnx2fc/bnx2fc_hwi.c b/drivers/scsi/bnx2fc/bnx2fc_hwi.c index 913c750205ce..26de61d65a4d 100644 --- a/drivers/scsi/bnx2fc/bnx2fc_hwi.c +++ b/drivers/scsi/bnx2fc/bnx2fc_hwi.c @@ -1008,6 +1008,28 @@ static struct bnx2fc_work *bnx2fc_alloc_work(struct bnx2fc_rport *tgt, u16 wqe) return work; } +/* Pending work request completion */ +static void bnx2fc_pending_work(struct bnx2fc_rport *tgt, unsigned int wqe) +{ + unsigned int cpu = wqe % num_possible_cpus(); + struct bnx2fc_percpu_s *fps; + struct bnx2fc_work *work; + + fps = &per_cpu(bnx2fc_percpu, cpu); + spin_lock_bh(&fps->fp_work_lock); + if (fps->iothread) { + work = bnx2fc_alloc_work(tgt, wqe); + if (work) { + list_add_tail(&work->list, &fps->work_list); + wake_up_process(fps->iothread); + spin_unlock_bh(&fps->fp_work_lock); + return; + } + } + spin_unlock_bh(&fps->fp_work_lock); + bnx2fc_process_cq_compl(tgt, wqe); +} + int bnx2fc_process_new_cqes(struct bnx2fc_rport *tgt) { struct fcoe_cqe *cq; @@ -1042,28 +1064,7 @@ int bnx2fc_process_new_cqes(struct bnx2fc_rport *tgt) /* Unsolicited event notification */ bnx2fc_process_unsol_compl(tgt, wqe); } else { - /* Pending work request completion */ - struct bnx2fc_work *work = NULL; - struct bnx2fc_percpu_s *fps = NULL; - unsigned int cpu = wqe % num_possible_cpus(); - - fps = &per_cpu(bnx2fc_percpu, cpu); - spin_lock_bh(&fps->fp_work_lock); - if (unlikely(!fps->iothread)) - goto unlock; - - work = bnx2fc_alloc_work(tgt, wqe); - if (work) - list_add_tail(&work->list, - &fps->work_list); -unlock: - spin_unlock_bh(&fps->fp_work_lock); - - /* Pending work request completion */ - if (fps->iothread && work) - wake_up_process(fps->iothread); - else - bnx2fc_process_cq_compl(tgt, wqe); + bnx2fc_pending_work(tgt, wqe); num_free_sqes++; } cqe++; -- cgit v1.2.3 From 2c2b66ae9d11d99f5f6d7010f0d46401ed9031ce Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 24 Jul 2017 12:52:57 +0200 Subject: scsi: bnx2fc: Prevent recursive cpuhotplug locking The BNX2FC module init/exit code installs/removes the hotplug callbacks with the cpu hotplug lock held. This worked with the old CPU locking implementation which allowed recursive locking, but with the new percpu rwsem based mechanism this is not longer allowed. Use the _cpuslocked() variants to fix this. Reported-by: kernel test robot Acked-by: Chad Dupuis Signed-off-by: Thomas Gleixner Signed-off-by: Martin K. Petersen --- drivers/scsi/bnx2fc/bnx2fc_fcoe.c | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/drivers/scsi/bnx2fc/bnx2fc_fcoe.c b/drivers/scsi/bnx2fc/bnx2fc_fcoe.c index 7dfe709a7138..c4ebf8c5d884 100644 --- a/drivers/scsi/bnx2fc/bnx2fc_fcoe.c +++ b/drivers/scsi/bnx2fc/bnx2fc_fcoe.c @@ -2766,15 +2766,16 @@ static int __init bnx2fc_mod_init(void) for_each_online_cpu(cpu) bnx2fc_percpu_thread_create(cpu); - rc = cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN, - "scsi/bnx2fc:online", - bnx2fc_cpu_online, NULL); + rc = cpuhp_setup_state_nocalls_cpuslocked(CPUHP_AP_ONLINE_DYN, + "scsi/bnx2fc:online", + bnx2fc_cpu_online, NULL); if (rc < 0) goto stop_threads; bnx2fc_online_state = rc; - cpuhp_setup_state_nocalls(CPUHP_SCSI_BNX2FC_DEAD, "scsi/bnx2fc:dead", - NULL, bnx2fc_cpu_dead); + cpuhp_setup_state_nocalls_cpuslocked(CPUHP_SCSI_BNX2FC_DEAD, + "scsi/bnx2fc:dead", + NULL, bnx2fc_cpu_dead); put_online_cpus(); cnic_register_driver(CNIC_ULP_FCOE, &bnx2fc_cnic_cb); @@ -2850,8 +2851,8 @@ static void __exit bnx2fc_mod_exit(void) bnx2fc_percpu_thread_destroy(cpu); } - cpuhp_remove_state_nocalls(bnx2fc_online_state); - cpuhp_remove_state_nocalls(CPUHP_SCSI_BNX2FC_DEAD); + cpuhp_remove_state_nocalls_cpuslocked(bnx2fc_online_state); + cpuhp_remove_state_nocalls_cpuslocked(CPUHP_SCSI_BNX2FC_DEAD); put_online_cpus(); -- cgit v1.2.3 From 2fa2fa1ae6b42fc4c4995fdbf8fd7df96bb25ba4 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 24 Jul 2017 12:52:58 +0200 Subject: scsi: bnx2i: Prevent recursive cpuhotplug locking The BNX2I module init/exit code installs/removes the hotplug callbacks with the cpu hotplug lock held. This worked with the old CPU locking implementation which allowed recursive locking, but with the new percpu rwsem based mechanism this is not longer allowed. Use the _cpuslocked() variants to fix this. Reported-by: Steven Rostedt Acked-by: Chad Dupuis Signed-off-by: Thomas Gleixner Signed-off-by: Martin K. Petersen --- drivers/scsi/bnx2i/bnx2i_init.c | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/drivers/scsi/bnx2i/bnx2i_init.c b/drivers/scsi/bnx2i/bnx2i_init.c index 86afc002814c..7487b653e799 100644 --- a/drivers/scsi/bnx2i/bnx2i_init.c +++ b/drivers/scsi/bnx2i/bnx2i_init.c @@ -516,15 +516,16 @@ static int __init bnx2i_mod_init(void) for_each_online_cpu(cpu) bnx2i_percpu_thread_create(cpu); - err = cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN, - "scsi/bnx2i:online", - bnx2i_cpu_online, NULL); + err = cpuhp_setup_state_nocalls_cpuslocked(CPUHP_AP_ONLINE_DYN, + "scsi/bnx2i:online", + bnx2i_cpu_online, NULL); if (err < 0) goto remove_threads; bnx2i_online_state = err; - cpuhp_setup_state_nocalls(CPUHP_SCSI_BNX2I_DEAD, "scsi/bnx2i:dead", - NULL, bnx2i_cpu_dead); + cpuhp_setup_state_nocalls_cpuslocked(CPUHP_SCSI_BNX2I_DEAD, + "scsi/bnx2i:dead", + NULL, bnx2i_cpu_dead); put_online_cpus(); return 0; @@ -574,8 +575,8 @@ static void __exit bnx2i_mod_exit(void) for_each_online_cpu(cpu) bnx2i_percpu_thread_destroy(cpu); - cpuhp_remove_state_nocalls(bnx2i_online_state); - cpuhp_remove_state_nocalls(CPUHP_SCSI_BNX2I_DEAD); + cpuhp_remove_state_nocalls_cpuslocked(bnx2i_online_state); + cpuhp_remove_state_nocalls_cpuslocked(CPUHP_SCSI_BNX2I_DEAD); put_online_cpus(); iscsi_unregister_transport(&bnx2i_iscsi_transport); -- cgit v1.2.3 From 1937f8a29f4a650bc27e0311b43b53509a34fd22 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 24 Jul 2017 12:52:59 +0200 Subject: scsi: bnx2fc: Simplify CPU hotplug code The CPU hotplug related code of this driver can be simplified by: 1) Consolidating the callbacks into a single state. The CPU thread can be torn down on the CPU which goes offline. There is no point in delaying that to the CPU dead state 2) Let the core code invoke the online/offline callbacks and remove the extra for_each_online_cpu() loops. Signed-off-by: Thomas Gleixner Signed-off-by: Martin K. Petersen --- drivers/scsi/bnx2fc/bnx2fc_fcoe.c | 69 +++++++++------------------------------ include/linux/cpuhotplug.h | 1 - 2 files changed, 15 insertions(+), 55 deletions(-) diff --git a/drivers/scsi/bnx2fc/bnx2fc_fcoe.c b/drivers/scsi/bnx2fc/bnx2fc_fcoe.c index c4ebf8c5d884..6844ba361616 100644 --- a/drivers/scsi/bnx2fc/bnx2fc_fcoe.c +++ b/drivers/scsi/bnx2fc/bnx2fc_fcoe.c @@ -2624,12 +2624,11 @@ static struct fcoe_transport bnx2fc_transport = { }; /** - * bnx2fc_percpu_thread_create - Create a receive thread for an - * online CPU + * bnx2fc_cpu_online - Create a receive thread for an online CPU * * @cpu: cpu index for the online cpu */ -static void bnx2fc_percpu_thread_create(unsigned int cpu) +static int bnx2fc_cpu_online(unsigned int cpu) { struct bnx2fc_percpu_s *p; struct task_struct *thread; @@ -2639,15 +2638,17 @@ static void bnx2fc_percpu_thread_create(unsigned int cpu) thread = kthread_create_on_node(bnx2fc_percpu_io_thread, (void *)p, cpu_to_node(cpu), "bnx2fc_thread/%d", cpu); + if (IS_ERR(thread)) + return PTR_ERR(thread); + /* bind thread to the cpu */ - if (likely(!IS_ERR(thread))) { - kthread_bind(thread, cpu); - p->iothread = thread; - wake_up_process(thread); - } + kthread_bind(thread, cpu); + p->iothread = thread; + wake_up_process(thread); + return 0; } -static void bnx2fc_percpu_thread_destroy(unsigned int cpu) +static int bnx2fc_cpu_offline(unsigned int cpu) { struct bnx2fc_percpu_s *p; struct task_struct *thread; @@ -2661,7 +2662,6 @@ static void bnx2fc_percpu_thread_destroy(unsigned int cpu) thread = p->iothread; p->iothread = NULL; - /* Free all work in the list */ list_for_each_entry_safe(work, tmp, &p->work_list, list) { list_del_init(&work->list); @@ -2673,20 +2673,6 @@ static void bnx2fc_percpu_thread_destroy(unsigned int cpu) if (thread) kthread_stop(thread); -} - - -static int bnx2fc_cpu_online(unsigned int cpu) -{ - printk(PFX "CPU %x online: Create Rx thread\n", cpu); - bnx2fc_percpu_thread_create(cpu); - return 0; -} - -static int bnx2fc_cpu_dead(unsigned int cpu) -{ - printk(PFX "CPU %x offline: Remove Rx thread\n", cpu); - bnx2fc_percpu_thread_destroy(cpu); return 0; } @@ -2761,31 +2747,16 @@ static int __init bnx2fc_mod_init(void) spin_lock_init(&p->fp_work_lock); } - get_online_cpus(); - - for_each_online_cpu(cpu) - bnx2fc_percpu_thread_create(cpu); - - rc = cpuhp_setup_state_nocalls_cpuslocked(CPUHP_AP_ONLINE_DYN, - "scsi/bnx2fc:online", - bnx2fc_cpu_online, NULL); + rc = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "scsi/bnx2fc:online", + bnx2fc_cpu_online, bnx2fc_cpu_offline); if (rc < 0) - goto stop_threads; + goto stop_thread; bnx2fc_online_state = rc; - cpuhp_setup_state_nocalls_cpuslocked(CPUHP_SCSI_BNX2FC_DEAD, - "scsi/bnx2fc:dead", - NULL, bnx2fc_cpu_dead); - put_online_cpus(); - cnic_register_driver(CNIC_ULP_FCOE, &bnx2fc_cnic_cb); - return 0; -stop_threads: - for_each_online_cpu(cpu) - bnx2fc_percpu_thread_destroy(cpu); - put_online_cpus(); +stop_thread: kthread_stop(l2_thread); free_wq: destroy_workqueue(bnx2fc_wq); @@ -2804,7 +2775,6 @@ static void __exit bnx2fc_mod_exit(void) struct fcoe_percpu_s *bg; struct task_struct *l2_thread; struct sk_buff *skb; - unsigned int cpu = 0; /* * NOTE: Since cnic calls register_driver routine rtnl_lock, @@ -2845,16 +2815,7 @@ static void __exit bnx2fc_mod_exit(void) if (l2_thread) kthread_stop(l2_thread); - get_online_cpus(); - /* Destroy per cpu threads */ - for_each_online_cpu(cpu) { - bnx2fc_percpu_thread_destroy(cpu); - } - - cpuhp_remove_state_nocalls_cpuslocked(bnx2fc_online_state); - cpuhp_remove_state_nocalls_cpuslocked(CPUHP_SCSI_BNX2FC_DEAD); - - put_online_cpus(); + cpuhp_remove_state(bnx2fc_online_state); destroy_workqueue(bnx2fc_wq); /* diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h index b56573bf440d..2e7b1731ad12 100644 --- a/include/linux/cpuhotplug.h +++ b/include/linux/cpuhotplug.h @@ -39,7 +39,6 @@ enum cpuhp_state { CPUHP_PCI_XGENE_DEAD, CPUHP_IOMMU_INTEL_DEAD, CPUHP_LUSTRE_CFS_DEAD, - CPUHP_SCSI_BNX2FC_DEAD, CPUHP_SCSI_BNX2I_DEAD, CPUHP_WORKQUEUE_PREP, CPUHP_POWER_NUMA_PREPARE, -- cgit v1.2.3 From f9f22a86912f9d36b50e9b3b383fabfb9f22dd46 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 24 Jul 2017 12:53:00 +0200 Subject: scsi: bnx2i: Simplify cpu hotplug code The CPU hotplug related code of this driver can be simplified by: 1) Consolidating the callbacks into a single state. The CPU thread can be torn down on the CPU which goes offline. There is no point in delaying that to the CPU dead state 2) Let the core code invoke the online/offline callbacks and remove the extra for_each_online_cpu() loops. Signed-off-by: Thomas Gleixner Acked-by: Chad Dupuis Signed-off-by: Martin K. Petersen --- drivers/scsi/bnx2i/bnx2i_init.c | 65 ++++++++++------------------------------- include/linux/cpuhotplug.h | 1 - 2 files changed, 15 insertions(+), 51 deletions(-) diff --git a/drivers/scsi/bnx2i/bnx2i_init.c b/drivers/scsi/bnx2i/bnx2i_init.c index 7487b653e799..4ebcda8d9500 100644 --- a/drivers/scsi/bnx2i/bnx2i_init.c +++ b/drivers/scsi/bnx2i/bnx2i_init.c @@ -404,12 +404,11 @@ int bnx2i_get_stats(void *handle) /** - * bnx2i_percpu_thread_create - Create a receive thread for an - * online CPU + * bnx2i_cpu_online - Create a receive thread for an online CPU * * @cpu: cpu index for the online cpu */ -static void bnx2i_percpu_thread_create(unsigned int cpu) +static int bnx2i_cpu_online(unsigned int cpu) { struct bnx2i_percpu_s *p; struct task_struct *thread; @@ -419,16 +418,17 @@ static void bnx2i_percpu_thread_create(unsigned int cpu) thread = kthread_create_on_node(bnx2i_percpu_io_thread, (void *)p, cpu_to_node(cpu), "bnx2i_thread/%d", cpu); + if (IS_ERR(thread)) + return PTR_ERR(thread); + /* bind thread to the cpu */ - if (likely(!IS_ERR(thread))) { - kthread_bind(thread, cpu); - p->iothread = thread; - wake_up_process(thread); - } + kthread_bind(thread, cpu); + p->iothread = thread; + wake_up_process(thread); + return 0; } - -static void bnx2i_percpu_thread_destroy(unsigned int cpu) +static int bnx2i_cpu_offline(unsigned int cpu) { struct bnx2i_percpu_s *p; struct task_struct *thread; @@ -451,19 +451,6 @@ static void bnx2i_percpu_thread_destroy(unsigned int cpu) spin_unlock_bh(&p->p_work_lock); if (thread) kthread_stop(thread); -} - -static int bnx2i_cpu_online(unsigned int cpu) -{ - pr_info("bnx2i: CPU %x online: Create Rx thread\n", cpu); - bnx2i_percpu_thread_create(cpu); - return 0; -} - -static int bnx2i_cpu_dead(unsigned int cpu) -{ - pr_info("CPU %x offline: Remove Rx thread\n", cpu); - bnx2i_percpu_thread_destroy(cpu); return 0; } @@ -511,28 +498,14 @@ static int __init bnx2i_mod_init(void) p->iothread = NULL; } - get_online_cpus(); - - for_each_online_cpu(cpu) - bnx2i_percpu_thread_create(cpu); - - err = cpuhp_setup_state_nocalls_cpuslocked(CPUHP_AP_ONLINE_DYN, - "scsi/bnx2i:online", - bnx2i_cpu_online, NULL); + err = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "scsi/bnx2i:online", + bnx2i_cpu_online, bnx2i_cpu_offline); if (err < 0) - goto remove_threads; + goto unreg_driver; bnx2i_online_state = err; - - cpuhp_setup_state_nocalls_cpuslocked(CPUHP_SCSI_BNX2I_DEAD, - "scsi/bnx2i:dead", - NULL, bnx2i_cpu_dead); - put_online_cpus(); return 0; -remove_threads: - for_each_online_cpu(cpu) - bnx2i_percpu_thread_destroy(cpu); - put_online_cpus(); +unreg_driver: cnic_unregister_driver(CNIC_ULP_ISCSI); unreg_xport: iscsi_unregister_transport(&bnx2i_iscsi_transport); @@ -552,7 +525,6 @@ out: static void __exit bnx2i_mod_exit(void) { struct bnx2i_hba *hba; - unsigned cpu = 0; mutex_lock(&bnx2i_dev_lock); while (!list_empty(&adapter_list)) { @@ -570,14 +542,7 @@ static void __exit bnx2i_mod_exit(void) } mutex_unlock(&bnx2i_dev_lock); - get_online_cpus(); - - for_each_online_cpu(cpu) - bnx2i_percpu_thread_destroy(cpu); - - cpuhp_remove_state_nocalls_cpuslocked(bnx2i_online_state); - cpuhp_remove_state_nocalls_cpuslocked(CPUHP_SCSI_BNX2I_DEAD); - put_online_cpus(); + cpuhp_remove_state(bnx2i_online_state); iscsi_unregister_transport(&bnx2i_iscsi_transport); cnic_unregister_driver(CNIC_ULP_ISCSI); diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h index 2e7b1731ad12..82b30e638430 100644 --- a/include/linux/cpuhotplug.h +++ b/include/linux/cpuhotplug.h @@ -39,7 +39,6 @@ enum cpuhp_state { CPUHP_PCI_XGENE_DEAD, CPUHP_IOMMU_INTEL_DEAD, CPUHP_LUSTRE_CFS_DEAD, - CPUHP_SCSI_BNX2I_DEAD, CPUHP_WORKQUEUE_PREP, CPUHP_POWER_NUMA_PREPARE, CPUHP_HRTIMERS_PREPARE, -- cgit v1.2.3 From 722477c4f2c50bc8d800d293a3bc5aa843f16e8d Mon Sep 17 00:00:00 2001 From: Thomas Bogendoerfer Date: Tue, 25 Jul 2017 11:19:21 +0200 Subject: scsi: qedf: Limit number of CQs FCOE offloading failed with: [qed_sp_fcoe_func_start:150(sp-0-3b:00.02)]Cannot satisfy CQ amount. CQs requested 8, CQs available 6. Aborting function start [qed_fcoe_start:821()]Failed to start fcoe [__qedf_probe:3041]:6: Cannot start FCoE function. The reason is a newly introduced check in the qed main part. This change also provides the information about how many CQs are available, so we simply limit the number of requested CQs.. Fixes: 3c5da9427802 ("qed: Share additional information with qedf") Signed-off-by: Thomas Bogendoerfer Reviewed-by: Johannes Thumshirn Acked-by: Chad Dupuis Signed-off-by: Martin K. Petersen --- drivers/scsi/qedf/qedf.h | 3 ++- drivers/scsi/qedf/qedf_main.c | 20 +++++++++----------- 2 files changed, 11 insertions(+), 12 deletions(-) diff --git a/drivers/scsi/qedf/qedf.h b/drivers/scsi/qedf/qedf.h index 4d038926a455..351f06dfc5a0 100644 --- a/drivers/scsi/qedf/qedf.h +++ b/drivers/scsi/qedf/qedf.h @@ -528,7 +528,8 @@ struct fip_vlan { #define QEDF_WRITE (1 << 0) #define MAX_FIBRE_LUNS 0xffffffff -#define QEDF_MAX_NUM_CQS 8 +#define MIN_NUM_CPUS_MSIX(x) min_t(u32, x->dev_info.num_cqs, \ + num_online_cpus()) /* * PCI function probe defines diff --git a/drivers/scsi/qedf/qedf_main.c b/drivers/scsi/qedf/qedf_main.c index 7786c97e033f..1d13c9ca517d 100644 --- a/drivers/scsi/qedf/qedf_main.c +++ b/drivers/scsi/qedf/qedf_main.c @@ -2760,11 +2760,9 @@ static int qedf_set_fcoe_pf_param(struct qedf_ctx *qedf) * we allocation is the minimum off: * * Number of CPUs - * Number of MSI-X vectors - * Max number allocated in hardware (QEDF_MAX_NUM_CQS) + * Number allocated by qed for our PCI function */ - qedf->num_queues = min((unsigned int)QEDF_MAX_NUM_CQS, - num_online_cpus()); + qedf->num_queues = MIN_NUM_CPUS_MSIX(qedf); QEDF_INFO(&(qedf->dbg_ctx), QEDF_LOG_DISC, "Number of CQs is %d.\n", qedf->num_queues); @@ -2962,6 +2960,13 @@ static int __qedf_probe(struct pci_dev *pdev, int mode) goto err1; } + /* Learn information crucial for qedf to progress */ + rc = qed_ops->fill_dev_info(qedf->cdev, &qedf->dev_info); + if (rc) { + QEDF_ERR(&(qedf->dbg_ctx), "Failed to dev info.\n"); + goto err1; + } + /* queue allocation code should come here * order should be * slowpath_start @@ -2977,13 +2982,6 @@ static int __qedf_probe(struct pci_dev *pdev, int mode) } qed_ops->common->update_pf_params(qedf->cdev, &qedf->pf_params); - /* Learn information crucial for qedf to progress */ - rc = qed_ops->fill_dev_info(qedf->cdev, &qedf->dev_info); - if (rc) { - QEDF_ERR(&(qedf->dbg_ctx), "Failed to dev info.\n"); - goto err1; - } - /* Record BDQ producer doorbell addresses */ qedf->bdq_primary_prod = qedf->dev_info.primary_dbq_rq_addr; qedf->bdq_secondary_prod = qedf->dev_info.secondary_bdq_rq_addr; -- cgit v1.2.3 From e6fd916a625110d75bd4d15b8488df44cf41c9fc Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Tue, 25 Jul 2017 22:49:55 +0300 Subject: scsi: aacraid: reading out of bounds "qd.id" comes directly from the copy_from_user() on the line before so we should verify that it's within bounds. Signed-off-by: Dan Carpenter Signed-off-by: Martin K. Petersen --- drivers/scsi/aacraid/aachba.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/scsi/aacraid/aachba.c b/drivers/scsi/aacraid/aachba.c index 707ee2f5954d..4591113c49de 100644 --- a/drivers/scsi/aacraid/aachba.c +++ b/drivers/scsi/aacraid/aachba.c @@ -3198,10 +3198,11 @@ static int query_disk(struct aac_dev *dev, void __user *arg) return -EBUSY; if (copy_from_user(&qd, arg, sizeof (struct aac_query_disk))) return -EFAULT; - if (qd.cnum == -1) + if (qd.cnum == -1) { + if (qd.id < 0 || qd.id >= dev->maximum_num_containers) + return -EINVAL; qd.cnum = qd.id; - else if ((qd.bus == -1) && (qd.id == -1) && (qd.lun == -1)) - { + } else if ((qd.bus == -1) && (qd.id == -1) && (qd.lun == -1)) { if (qd.cnum < 0 || qd.cnum >= dev->maximum_num_containers) return -EINVAL; qd.instance = dev->scsi_host_ptr->host_no; -- cgit v1.2.3 From f930c7043663188429cd9b254e9d761edfc101ce Mon Sep 17 00:00:00 2001 From: Johannes Thumshirn Date: Thu, 27 Jul 2017 09:11:26 +0200 Subject: scsi: sg: only check for dxfer_len greater than 256M Don't make any assumptions on the sg_io_hdr_t::dxfer_direction or the sg_io_hdr_t::dxferp in order to determine if it is a valid request. The only way we can check for bad requests is by checking if the length exceeds 256M. Signed-off-by: Johannes Thumshirn Fixes: 28676d869bbb (scsi: sg: check for valid direction before starting the request) Reported-by: Jason L Tibbitts III Tested-by: Jason L Tibbitts III Suggested-by: Doug Gilbert Cc: Doug Gilbert Cc: Reviewed-by: Hannes Reinecke Signed-off-by: Martin K. Petersen --- drivers/scsi/sg.c | 31 +------------------------------ 1 file changed, 1 insertion(+), 30 deletions(-) diff --git a/drivers/scsi/sg.c b/drivers/scsi/sg.c index 4fe606b000b4..d7ff71e0c85c 100644 --- a/drivers/scsi/sg.c +++ b/drivers/scsi/sg.c @@ -751,35 +751,6 @@ sg_new_write(Sg_fd *sfp, struct file *file, const char __user *buf, return count; } -static bool sg_is_valid_dxfer(sg_io_hdr_t *hp) -{ - switch (hp->dxfer_direction) { - case SG_DXFER_NONE: - if (hp->dxferp || hp->dxfer_len > 0) - return false; - return true; - case SG_DXFER_FROM_DEV: - /* - * for SG_DXFER_FROM_DEV we always set dxfer_len to > 0. dxferp - * can either be NULL or != NULL so there's no point in checking - * it either. So just return true. - */ - return true; - case SG_DXFER_TO_DEV: - case SG_DXFER_TO_FROM_DEV: - if (!hp->dxferp || hp->dxfer_len == 0) - return false; - return true; - case SG_DXFER_UNKNOWN: - if ((!hp->dxferp && hp->dxfer_len) || - (hp->dxferp && hp->dxfer_len == 0)) - return false; - return true; - default: - return false; - } -} - static int sg_common_write(Sg_fd * sfp, Sg_request * srp, unsigned char *cmnd, int timeout, int blocking) @@ -800,7 +771,7 @@ sg_common_write(Sg_fd * sfp, Sg_request * srp, "sg_common_write: scsi opcode=0x%02x, cmd_size=%d\n", (int) cmnd[0], (int) hp->cmd_len)); - if (!sg_is_valid_dxfer(hp)) + if (hp->dxfer_len >= SZ_256M) return -EINVAL; k = sg_start_req(srp, cmnd); -- cgit v1.2.3 From d630213f2a47933e1037909273a20023ba3e598d Mon Sep 17 00:00:00 2001 From: Lucas Stach Date: Mon, 10 Jul 2017 14:41:25 +0200 Subject: drm/bridge: tc358767: fix probe without attached output node The output node of the TC358767 is only used if another bridge is chained behind it. Panels attached to the TC358767 can be detected using the usual DP AUX probing. This restores the old behavior of ignoring the output if no endpoint is found. Fixes: ebc944613567 (drm: convert drivers to use drm_of_find_panel_or_bridge) CC: stable@vger.kernel.org Acked-by: Andrey Gusakov Signed-off-by: Lucas Stach Signed-off-by: Archit Taneja Link: https://patchwork.freedesktop.org/patch/msgid/20170710124125.9019-1-l.stach@pengutronix.de --- drivers/gpu/drm/bridge/tc358767.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/bridge/tc358767.c b/drivers/gpu/drm/bridge/tc358767.c index 5c26488e7a2d..0529e500c534 100644 --- a/drivers/gpu/drm/bridge/tc358767.c +++ b/drivers/gpu/drm/bridge/tc358767.c @@ -1255,7 +1255,7 @@ static int tc_probe(struct i2c_client *client, const struct i2c_device_id *id) /* port@2 is the output port */ ret = drm_of_find_panel_or_bridge(dev->of_node, 2, 0, &tc->panel, NULL); - if (ret) + if (ret && ret != -ENODEV) return ret; /* Shut down GPIO is optional */ -- cgit v1.2.3 From 6d0f581d1768d3eaba15776e7dd1fdfec10cfe36 Mon Sep 17 00:00:00 2001 From: Max Filippov Date: Fri, 28 Jul 2017 17:42:59 -0700 Subject: xtensa: fix cache aliasing handling code for WT cache Currently building kernel for xtensa core with aliasing WT cache fails with the following messages: mm/memory.c:2152: undefined reference to `flush_dcache_page' mm/memory.c:2332: undefined reference to `local_flush_cache_page' mm/memory.c:1919: undefined reference to `local_flush_cache_range' mm/memory.c:4179: undefined reference to `copy_to_user_page' mm/memory.c:4183: undefined reference to `copy_from_user_page' This happens because implementation of these functions is only compiled when data cache is WB, which looks wrong: even when data cache doesn't need flushing it still needs invalidation. The functions like __flush_[invalidate_]dcache_* are correctly defined for both WB and WT caches (and even if they weren't that'd still be ok, just slower). Fix this by providing the same implementation of the above functions for both WB and WT cache. Cc: stable@vger.kernel.org Signed-off-by: Max Filippov --- arch/xtensa/mm/cache.c | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/arch/xtensa/mm/cache.c b/arch/xtensa/mm/cache.c index 1a804a2f9a5b..dbb1cdef3663 100644 --- a/arch/xtensa/mm/cache.c +++ b/arch/xtensa/mm/cache.c @@ -120,10 +120,6 @@ void copy_user_highpage(struct page *dst, struct page *src, preempt_enable(); } -#endif /* DCACHE_WAY_SIZE > PAGE_SIZE */ - -#if (DCACHE_WAY_SIZE > PAGE_SIZE) && XCHAL_DCACHE_IS_WRITEBACK - /* * Any time the kernel writes to a user page cache page, or it is about to * read from a page cache page this routine is called. @@ -208,7 +204,7 @@ void local_flush_cache_page(struct vm_area_struct *vma, unsigned long address, __invalidate_icache_page_alias(virt, phys); } -#endif +#endif /* DCACHE_WAY_SIZE > PAGE_SIZE */ void update_mmu_cache(struct vm_area_struct * vma, unsigned long addr, pte_t *ptep) @@ -225,7 +221,7 @@ update_mmu_cache(struct vm_area_struct * vma, unsigned long addr, pte_t *ptep) flush_tlb_page(vma, addr); -#if (DCACHE_WAY_SIZE > PAGE_SIZE) && XCHAL_DCACHE_IS_WRITEBACK +#if (DCACHE_WAY_SIZE > PAGE_SIZE) if (!PageReserved(page) && test_bit(PG_arch_1, &page->flags)) { unsigned long phys = page_to_phys(page); @@ -256,7 +252,7 @@ update_mmu_cache(struct vm_area_struct * vma, unsigned long addr, pte_t *ptep) * flush_dcache_page() on the page. */ -#if (DCACHE_WAY_SIZE > PAGE_SIZE) && XCHAL_DCACHE_IS_WRITEBACK +#if (DCACHE_WAY_SIZE > PAGE_SIZE) void copy_to_user_page(struct vm_area_struct *vma, struct page *page, unsigned long vaddr, void *dst, const void *src, -- cgit v1.2.3 From 6ab1d8da972d4c4e318607e96c5ecb32101c80f4 Mon Sep 17 00:00:00 2001 From: Paolo Valente Date: Fri, 28 Jul 2017 21:41:18 +0200 Subject: block, bfq: reset in_service_entity if it becomes idle BFQ implements hierarchical scheduling by representing each group of queues with a generic parent entity. For each parent entity, BFQ maintains an in_service_entity pointer: if one of the child entities happens to be in service, in_service_entity points to it. The resetting of these pointers happens only on queue expirations: when the in-service queue is expired, i.e., stops to be the queue in service, BFQ resets all in_service_entity pointers along the parent-entity path from this queue to the root entity. Functions handling the scheduling of entities assume, naturally, that in-service entities are active, i.e., have pending I/O requests (or, as a special case, even if they have no pending requests, they are expected to receive a new request very soon, with the scheduler idling the storage device while waiting for such an event). Unfortunately, the above resetting scheme of the in_service_entity pointers may cause this assumption to be violated. For example, the in-service queue may happen to remain without requests because of a request merge. In this case the queue does become idle, and all related data structures are updated accordingly. But in_service_entity still points to the queue in the parent entity. This inconsistency may even propagate to higher-level parent entities, if they happen to become idle as well, as a consequence of the leaf queue becoming idle. For this queue and parent entities, scheduling functions have an undefined behaviour, and, as reported, may easily lead to kernel crashes or hangs. This commit addresses this issue by simply resetting the in_service_entity field also when it is detected to point to an entity becoming idle (regardless of why the entity becomes idle). Reported-by: Laurentiu Nicola Signed-off-by: Paolo Valente Tested-by: Laurentiu Nicola Signed-off-by: Jens Axboe --- block/bfq-wf2q.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/block/bfq-wf2q.c b/block/bfq-wf2q.c index 979f8f21b7e2..881bbe5e1827 100644 --- a/block/bfq-wf2q.c +++ b/block/bfq-wf2q.c @@ -1158,8 +1158,10 @@ bool __bfq_deactivate_entity(struct bfq_entity *entity, bool ins_into_idle_tree) st = bfq_entity_service_tree(entity); is_in_service = entity == sd->in_service_entity; - if (is_in_service) + if (is_in_service) { bfq_calc_finish(entity, entity->service); + sd->in_service_entity = NULL; + } if (entity->tree == &st->active) bfq_active_extract(st, entity); -- cgit v1.2.3 From 46d556e6aaa0ec4dc83648ab1ca3d01dd2fa3ea3 Mon Sep 17 00:00:00 2001 From: Paolo Valente Date: Sat, 29 Jul 2017 12:42:56 +0200 Subject: block, bfq: consider also in_service_entity to state whether an entity is active Groups of BFQ queues are represented by generic entities in BFQ. When a queue belonging to a parent entity is deactivated, the parent entity may need to be deactivated too, in case the deactivated queue was the only active queue for the parent entity. This deactivation may need to be propagated upwards if the entity belongs, in its turn, to a further higher-level entity, and so on. In particular, the upward propagation of deactivation stops at the first parent entity that remains active even if one of its child entities has been deactivated. To decide whether the last non-deactivation condition holds for a parent entity, BFQ checks whether the field next_in_service is still not NULL for the parent entity, after the deactivation of one of its child entity. If it is not NULL, then there are certainly other active entities in the parent entity, and deactivations can stop. Unfortunately, this check misses a corner case: if in_service_entity is not NULL, then next_in_service may happen to be NULL, although the parent entity is evidently active. This happens if: 1) the entity pointed by in_service_entity is the only active entity in the parent entity, and 2) according to the definition of next_in_service, the in_service_entity cannot be considered as next_in_service. See the comments on the definition of next_in_service for details on this second point. Hitting the above corner case causes crashes. To address this issue, this commit: 1) Extends the above check on only next_in_service to controlling both next_in_service and in_service_entity (if any of them is not NULL, then no further deactivation is performed) 2) Improves the (important) comments on how next_in_service is defined and updated; in particular it fixes a few rather obscure paragraphs Reported-by: Eric Wheeler Reported-by: Rick Yiu Reported-by: Tom X Nguyen Signed-off-by: Paolo Valente Tested-by: Eric Wheeler Tested-by: Rick Yiu Tested-by: Laurentiu Nicola Tested-by: Tom X Nguyen Signed-off-by: Jens Axboe --- block/bfq-iosched.h | 22 ++++++-- block/bfq-wf2q.c | 142 +++++++++++++++++++++++++++++----------------------- 2 files changed, 95 insertions(+), 69 deletions(-) diff --git a/block/bfq-iosched.h b/block/bfq-iosched.h index 63e771ab56d8..859f0a8c97c8 100644 --- a/block/bfq-iosched.h +++ b/block/bfq-iosched.h @@ -71,17 +71,29 @@ struct bfq_service_tree { * * bfq_sched_data is the basic scheduler queue. It supports three * ioprio_classes, and can be used either as a toplevel queue or as an - * intermediate queue on a hierarchical setup. @next_in_service - * points to the active entity of the sched_data service trees that - * will be scheduled next. It is used to reduce the number of steps - * needed for each hierarchical-schedule update. + * intermediate queue in a hierarchical setup. * * The supported ioprio_classes are the same as in CFQ, in descending * priority order, IOPRIO_CLASS_RT, IOPRIO_CLASS_BE, IOPRIO_CLASS_IDLE. * Requests from higher priority queues are served before all the * requests from lower priority queues; among requests of the same * queue requests are served according to B-WF2Q+. - * All the fields are protected by the queue lock of the containing bfqd. + * + * The schedule is implemented by the service trees, plus the field + * @next_in_service, which points to the entity on the active trees + * that will be served next, if 1) no changes in the schedule occurs + * before the current in-service entity is expired, 2) the in-service + * queue becomes idle when it expires, and 3) if the entity pointed by + * in_service_entity is not a queue, then the in-service child entity + * of the entity pointed by in_service_entity becomes idle on + * expiration. This peculiar definition allows for the following + * optimization, not yet exploited: while a given entity is still in + * service, we already know which is the best candidate for next + * service among the other active entitities in the same parent + * entity. We can then quickly compare the timestamps of the + * in-service entity with those of such best candidate. + * + * All fields are protected by the lock of the containing bfqd. */ struct bfq_sched_data { /* entity in service */ diff --git a/block/bfq-wf2q.c b/block/bfq-wf2q.c index 881bbe5e1827..911aa7431dbe 100644 --- a/block/bfq-wf2q.c +++ b/block/bfq-wf2q.c @@ -188,21 +188,23 @@ static bool bfq_update_parent_budget(struct bfq_entity *next_in_service) /* * This function tells whether entity stops being a candidate for next - * service, according to the following logic. + * service, according to the restrictive definition of the field + * next_in_service. In particular, this function is invoked for an + * entity that is about to be set in service. * - * This function is invoked for an entity that is about to be set in - * service. If such an entity is a queue, then the entity is no longer - * a candidate for next service (i.e, a candidate entity to serve - * after the in-service entity is expired). The function then returns - * true. + * If entity is a queue, then the entity is no longer a candidate for + * next service according to the that definition, because entity is + * about to become the in-service queue. This function then returns + * true if entity is a queue. * - * In contrast, the entity could stil be a candidate for next service - * if it is not a queue, and has more than one child. In fact, even if - * one of its children is about to be set in service, other children - * may still be the next to serve. As a consequence, a non-queue - * entity is not a candidate for next-service only if it has only one - * child. And only if this condition holds, then the function returns - * true for a non-queue entity. + * In contrast, entity could still be a candidate for next service if + * it is not a queue, and has more than one active child. In fact, + * even if one of its children is about to be set in service, other + * active children may still be the next to serve, for the parent + * entity, even according to the above definition. As a consequence, a + * non-queue entity is not a candidate for next-service only if it has + * only one active child. And only if this condition holds, then this + * function returns true for a non-queue entity. */ static bool bfq_no_longer_next_in_service(struct bfq_entity *entity) { @@ -213,6 +215,18 @@ static bool bfq_no_longer_next_in_service(struct bfq_entity *entity) bfqg = container_of(entity, struct bfq_group, entity); + /* + * The field active_entities does not always contain the + * actual number of active children entities: it happens to + * not account for the in-service entity in case the latter is + * removed from its active tree (which may get done after + * invoking the function bfq_no_longer_next_in_service in + * bfq_get_next_queue). Fortunately, here, i.e., while + * bfq_no_longer_next_in_service is not yet completed in + * bfq_get_next_queue, bfq_active_extract has not yet been + * invoked, and thus active_entities still coincides with the + * actual number of active entities. + */ if (bfqg->active_entities == 1) return true; @@ -954,7 +968,7 @@ static void bfq_update_fin_time_enqueue(struct bfq_entity *entity, * one of its children receives a new request. * * Basically, this function updates the timestamps of entity and - * inserts entity into its active tree, ater possible extracting it + * inserts entity into its active tree, ater possibly extracting it * from its idle tree. */ static void __bfq_activate_entity(struct bfq_entity *entity, @@ -1048,7 +1062,7 @@ static void __bfq_requeue_entity(struct bfq_entity *entity) entity->start = entity->finish; /* * In addition, if the entity had more than one child - * when set in service, then was not extracted from + * when set in service, then it was not extracted from * the active tree. This implies that the position of * the entity in the active tree may need to be * changed now, because we have just updated the start @@ -1056,9 +1070,8 @@ static void __bfq_requeue_entity(struct bfq_entity *entity) * time in a moment (the requeueing is then, more * precisely, a repositioning in this case). To * implement this repositioning, we: 1) dequeue the - * entity here, 2) update the finish time and - * requeue the entity according to the new - * timestamps below. + * entity here, 2) update the finish time and requeue + * the entity according to the new timestamps below. */ if (entity->tree) bfq_active_extract(st, entity); @@ -1105,9 +1118,10 @@ static void __bfq_activate_requeue_entity(struct bfq_entity *entity, /** - * bfq_activate_entity - activate or requeue an entity representing a bfq_queue, - * and activate, requeue or reposition all ancestors - * for which such an update becomes necessary. + * bfq_activate_requeue_entity - activate or requeue an entity representing a + * bfq_queue, and activate, requeue or reposition + * all ancestors for which such an update becomes + * necessary. * @entity: the entity to activate. * @non_blocking_wait_rq: true if this entity was waiting for a request * @requeue: true if this is a requeue, which implies that bfqq is @@ -1135,9 +1149,9 @@ static void bfq_activate_requeue_entity(struct bfq_entity *entity, * @ins_into_idle_tree: if false, the entity will not be put into the * idle tree. * - * Deactivates an entity, independently from its previous state. Must + * Deactivates an entity, independently of its previous state. Must * be invoked only if entity is on a service tree. Extracts the entity - * from that tree, and if necessary and allowed, puts it on the idle + * from that tree, and if necessary and allowed, puts it into the idle * tree. */ bool __bfq_deactivate_entity(struct bfq_entity *entity, bool ins_into_idle_tree) @@ -1179,7 +1193,7 @@ bool __bfq_deactivate_entity(struct bfq_entity *entity, bool ins_into_idle_tree) /** * bfq_deactivate_entity - deactivate an entity representing a bfq_queue. * @entity: the entity to deactivate. - * @ins_into_idle_tree: true if the entity can be put on the idle tree + * @ins_into_idle_tree: true if the entity can be put into the idle tree */ static void bfq_deactivate_entity(struct bfq_entity *entity, bool ins_into_idle_tree, @@ -1210,16 +1224,29 @@ static void bfq_deactivate_entity(struct bfq_entity *entity, */ bfq_update_next_in_service(sd, NULL); - if (sd->next_in_service) + if (sd->next_in_service || sd->in_service_entity) { /* - * The parent entity is still backlogged, - * because next_in_service is not NULL. So, no - * further upwards deactivation must be - * performed. Yet, next_in_service has - * changed. Then the schedule does need to be - * updated upwards. + * The parent entity is still active, because + * either next_in_service or in_service_entity + * is not NULL. So, no further upwards + * deactivation must be performed. Yet, + * next_in_service has changed. Then the + * schedule does need to be updated upwards. + * + * NOTE If in_service_entity is not NULL, then + * next_in_service may happen to be NULL, + * although the parent entity is evidently + * active. This happens if 1) the entity + * pointed by in_service_entity is the only + * active entity in the parent entity, and 2) + * according to the definition of + * next_in_service, the in_service_entity + * cannot be considered as + * next_in_service. See the comments on the + * definition of next_in_service for details. */ break; + } /* * If we get here, then the parent is no more @@ -1496,47 +1523,34 @@ struct bfq_queue *bfq_get_next_queue(struct bfq_data *bfqd) /* * If entity is no longer a candidate for next - * service, then we extract it from its active tree, - * for the following reason. To further boost the - * throughput in some special case, BFQ needs to know - * which is the next candidate entity to serve, while - * there is already an entity in service. In this - * respect, to make it easy to compute/update the next - * candidate entity to serve after the current - * candidate has been set in service, there is a case - * where it is necessary to extract the current - * candidate from its service tree. Such a case is - * when the entity just set in service cannot be also - * a candidate for next service. Details about when - * this conditions holds are reported in the comments - * on the function bfq_no_longer_next_in_service() - * invoked below. + * service, then it must be extracted from its active + * tree, so as to make sure that it won't be + * considered when computing next_in_service. See the + * comments on the function + * bfq_no_longer_next_in_service() for details. */ if (bfq_no_longer_next_in_service(entity)) bfq_active_extract(bfq_entity_service_tree(entity), entity); /* - * For the same reason why we may have just extracted - * entity from its active tree, we may need to update - * next_in_service for the sched_data of entity too, - * regardless of whether entity has been extracted. - * In fact, even if entity has not been extracted, a - * descendant entity may get extracted. Such an event - * would cause a change in next_in_service for the - * level of the descendant entity, and thus possibly - * back to upper levels. + * Even if entity is not to be extracted according to + * the above check, a descendant entity may get + * extracted in one of the next iterations of this + * loop. Such an event could cause a change in + * next_in_service for the level of the descendant + * entity, and thus possibly back to this level. * - * We cannot perform the resulting needed update - * before the end of this loop, because, to know which - * is the correct next-to-serve candidate entity for - * each level, we need first to find the leaf entity - * to set in service. In fact, only after we know - * which is the next-to-serve leaf entity, we can - * discover whether the parent entity of the leaf - * entity becomes the next-to-serve, and so on. + * However, we cannot perform the resulting needed + * update of next_in_service for this level before the + * end of the whole loop, because, to know which is + * the correct next-to-serve candidate entity for each + * level, we need first to find the leaf entity to set + * in service. In fact, only after we know which is + * the next-to-serve leaf entity, we can discover + * whether the parent entity of the leaf entity + * becomes the next-to-serve, and so on. */ - } bfqq = bfq_entity_to_bfqq(entity); -- cgit v1.2.3 From cd5a6a4fdaba150089af2afc220eae0fef74878a Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Tue, 25 Jul 2017 23:58:50 +0200 Subject: USB: hcd: Mark secondary HCD as dead if the primary one died Make usb_hc_died() clear the HCD_FLAG_RH_RUNNING flag for the shared HCD and set HCD_FLAG_DEAD for it, in analogy with what is done for the primary one. Among other thigs, this prevents check_root_hub_suspended() from returning -EBUSY for dead HCDs which helps to work around system suspend issues in some situations. This actually fixes occasional suspend failures on one of my test machines. Suggested-by: Alan Stern Signed-off-by: Rafael J. Wysocki Acked-by: Alan Stern Cc: stable Signed-off-by: Greg Kroah-Hartman --- drivers/usb/core/hcd.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/usb/core/hcd.c b/drivers/usb/core/hcd.c index ab1bb3b538ac..e518f6438877 100644 --- a/drivers/usb/core/hcd.c +++ b/drivers/usb/core/hcd.c @@ -2485,6 +2485,8 @@ void usb_hc_died (struct usb_hcd *hcd) } if (usb_hcd_is_primary_hcd(hcd) && hcd->shared_hcd) { hcd = hcd->shared_hcd; + clear_bit(HCD_FLAG_RH_RUNNING, &hcd->flags); + set_bit(HCD_FLAG_DEAD, &hcd->flags); if (hcd->rh_registered) { clear_bit(HCD_FLAG_POLL_RH, &hcd->flags); -- cgit v1.2.3 From 89f23d51defcb94a5026d4b5da13faf4e1150a6f Mon Sep 17 00:00:00 2001 From: Alan Swanson Date: Wed, 26 Jul 2017 12:03:33 +0100 Subject: uas: Add US_FL_IGNORE_RESIDUE for Initio Corporation INIC-3069 Similar to commit d595259fbb7a ("usb-storage: Add ignore-residue quirk for Initio INIC-3619") for INIC-3169 in unusual_devs.h but INIC-3069 already present in unusual_uas.h. Both in same controller IC family. Issue is that MakeMKV fails during key exchange with installed bluray drive with following error: 002004:0000 Error 'Scsi error - ILLEGAL REQUEST:COPY PROTECTION KEY EXCHANGE FAILURE - KEY NOT ESTABLISHED' occurred while issuing SCSI command AD010..080002400 to device 'SG:dev_11:0' Signed-off-by: Alan Swanson Acked-by: Oliver Neukum Cc: stable Signed-off-by: Greg Kroah-Hartman --- drivers/usb/storage/unusual_uas.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/usb/storage/unusual_uas.h b/drivers/usb/storage/unusual_uas.h index cbea9f329e71..cde115359793 100644 --- a/drivers/usb/storage/unusual_uas.h +++ b/drivers/usb/storage/unusual_uas.h @@ -124,9 +124,9 @@ UNUSUAL_DEV(0x0bc2, 0xab2a, 0x0000, 0x9999, /* Reported-by: Benjamin Tissoires */ UNUSUAL_DEV(0x13fd, 0x3940, 0x0000, 0x9999, "Initio Corporation", - "", + "INIC-3069", USB_SC_DEVICE, USB_PR_DEVICE, NULL, - US_FL_NO_ATA_1X), + US_FL_NO_ATA_1X | US_FL_IGNORE_RESIDUE), /* Reported-by: Tom Arild Naess */ UNUSUAL_DEV(0x152d, 0x0539, 0x0000, 0x9999, -- cgit v1.2.3 From 8b52291a0743fc4db4a7495c846a6f31ee84d282 Mon Sep 17 00:00:00 2001 From: Alan Stern Date: Wed, 26 Jul 2017 11:49:19 -0400 Subject: usb-storage: fix deadlock involving host lock and scsi_done Christoph Hellwig says that since version 4.12, the kernel switched to using blk-mq by default. The old code used a softirq for handling request completions, but blk-mq can handle completions in the caller's context. This may cause a problem for usb-storage, because it invokes the ->scsi_done callback while holding the host lock, and the completion routine sometimes tries to acquire the same lock (when running the error handler, for example). The consequence is that the existing code will sometimes deadlock upon error completion of a SCSI command (with a lockdep warning). This is easy enough to fix, since usb-storage doesn't really need to hold the host lock while the callback runs. It was simpler to write it that way, but moving the call outside the locked region is pretty easy and there's no downside. That's what this patch does. Signed-off-by: Alan Stern Reported-and-tested-by: Arthur Marsh CC: Christoph Hellwig CC: Signed-off-by: Greg Kroah-Hartman --- drivers/usb/storage/usb.c | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/drivers/usb/storage/usb.c b/drivers/usb/storage/usb.c index 06615934fed1..0dceb9fa3a06 100644 --- a/drivers/usb/storage/usb.c +++ b/drivers/usb/storage/usb.c @@ -315,6 +315,7 @@ static int usb_stor_control_thread(void * __us) { struct us_data *us = (struct us_data *)__us; struct Scsi_Host *host = us_to_host(us); + struct scsi_cmnd *srb; for (;;) { usb_stor_dbg(us, "*** thread sleeping\n"); @@ -330,6 +331,7 @@ static int usb_stor_control_thread(void * __us) scsi_lock(host); /* When we are called with no command pending, we're done */ + srb = us->srb; if (us->srb == NULL) { scsi_unlock(host); mutex_unlock(&us->dev_mutex); @@ -398,14 +400,11 @@ static int usb_stor_control_thread(void * __us) /* lock access to the state */ scsi_lock(host); - /* indicate that the command is done */ - if (us->srb->result != DID_ABORT << 16) { - usb_stor_dbg(us, "scsi cmd done, result=0x%x\n", - us->srb->result); - us->srb->scsi_done(us->srb); - } else { + /* was the command aborted? */ + if (us->srb->result == DID_ABORT << 16) { SkipForAbort: usb_stor_dbg(us, "scsi command aborted\n"); + srb = NULL; /* Don't call srb->scsi_done() */ } /* @@ -429,6 +428,13 @@ SkipForAbort: /* unlock the device pointers */ mutex_unlock(&us->dev_mutex); + + /* now that the locks are released, notify the SCSI core */ + if (srb) { + usb_stor_dbg(us, "scsi cmd done, result=0x%x\n", + srb->result); + srb->scsi_done(srb); + } } /* for (;;) */ /* Wait until we are told to stop */ -- cgit v1.2.3 From 2eac13624364db5b5e1666ae0bb3a4d36bc56b6e Mon Sep 17 00:00:00 2001 From: Bin Liu Date: Tue, 25 Jul 2017 09:31:33 -0500 Subject: usb: core: unlink urbs from the tail of the endpoint's urb_list While unlink an urb, if the urb has been programmed in the controller, the controller driver might do some hw related actions to tear down the urb. Currently usb_hcd_flush_endpoint() passes each urb from the head of the endpoint's urb_list to the controller driver, which could make the controller driver think each urb has been programmed and take the unnecessary actions for each urb. This patch changes the behavior in usb_hcd_flush_endpoint() to pass the urbs from the tail of the list, to avoid any unnecessary actions in an controller driver. Cc: stable@vger.kernel.org # v4.4+ Acked-by: Alan Stern Signed-off-by: Bin Liu Signed-off-by: Greg Kroah-Hartman --- drivers/usb/core/hcd.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/usb/core/hcd.c b/drivers/usb/core/hcd.c index e518f6438877..7f277b092b5b 100644 --- a/drivers/usb/core/hcd.c +++ b/drivers/usb/core/hcd.c @@ -1888,7 +1888,7 @@ void usb_hcd_flush_endpoint(struct usb_device *udev, /* No more submits can occur */ spin_lock_irq(&hcd_urb_list_lock); rescan: - list_for_each_entry (urb, &ep->urb_list, urb_list) { + list_for_each_entry_reverse(urb, &ep->urb_list, urb_list) { int is_in; if (urb->unlinked) -- cgit v1.2.3 From 45d73860530a14c608f410b91c6c341777bfa85d Mon Sep 17 00:00:00 2001 From: Bin Liu Date: Tue, 25 Jul 2017 09:31:34 -0500 Subject: usb: musb: fix tx fifo flush handling again commit 68fe05e2a451 ("usb: musb: fix tx fifo flush handling") drops the 1ms delay trying to solve the long disconnect time issue when application queued many tx urbs. However, the 1ms delay is needed for some use cases, for example, without the delay, reconnecting AR9271 WIFI dongle no longer works if the connection is dropped from the AP. So let's add back the 1ms delay in musb_h_tx_flush_fifo(), and solve the long disconnect time problem with a separate patch for usb_hcd_flush_endpoint(). Cc: stable@vger.kernel.org # v4.4+ Signed-off-by: Bin Liu Signed-off-by: Greg Kroah-Hartman --- drivers/usb/musb/musb_host.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/usb/musb/musb_host.c b/drivers/usb/musb/musb_host.c index 76decb8011eb..3344ffd5bb13 100644 --- a/drivers/usb/musb/musb_host.c +++ b/drivers/usb/musb/musb_host.c @@ -139,6 +139,7 @@ static void musb_h_tx_flush_fifo(struct musb_hw_ep *ep) "Could not flush host TX%d fifo: csr: %04x\n", ep->epnum, csr)) return; + mdelay(1); } } -- cgit v1.2.3 From 37ef38f3f83891a2f413fb872bae7d0f9bb95b27 Mon Sep 17 00:00:00 2001 From: Timur Tabi Date: Thu, 27 Jul 2017 16:15:52 -0500 Subject: tty: pl011: fix initialization order of QDF2400 E44 The work-around for Qualcomm Technologies QDF2400 Erratum 44 hinges on a global variable defined in the pl011 driver. The ACPI SPCR parsing code determines whether the work-around is needed, and if so, it changes the console name from "pl011" to "qdf2400_e44". The expectation is that the pl011 driver will implement the work-around when it sees the console name. The global variable qdf2400_e44_present is set when that happens. The problem is that work-around needs to be enabled when the pl011 driver probes, not when the console name is queried. However, sbsa_probe() is called before pl011_console_match(). The work-around appeared to work previously because the default console on QDF2400 platforms was always ttyAMA1. The first time sbsa_probe() is called (for ttyAMA0), qdf2400_e44_present is still false. Then pl011_console_match() is called, and it sets qdf2400_e44_present to true. All subsequent calls to sbsa_probe() enable the work-around. The solution is to move the global variable into spcr.c and let the pl011 driver query it during probe time. This works because all QDF2400 platforms require SPCR, so parse_spcr() will always be called. pl011_console_match still checks for the "qdf2400_e44" console name, but it doesn't do anything else special. Fixes: 5a0722b898f8 ("tty: pl011: use "qdf2400_e44" as the earlycon name for QDF2400 E44") Tested-by: Jeffrey Hugo Signed-off-by: Timur Tabi Signed-off-by: Greg Kroah-Hartman --- drivers/acpi/spcr.c | 36 ++++++++++++++++++++++++++++++++++-- drivers/tty/serial/amba-pl011.c | 37 +++++++++++++++++++------------------ include/linux/acpi.h | 1 + 3 files changed, 54 insertions(+), 20 deletions(-) diff --git a/drivers/acpi/spcr.c b/drivers/acpi/spcr.c index 4ac3e06b41d8..98aa8c808a33 100644 --- a/drivers/acpi/spcr.c +++ b/drivers/acpi/spcr.c @@ -16,6 +16,16 @@ #include #include +/* + * Erratum 44 for QDF2432v1 and QDF2400v1 SoCs describes the BUSY bit as + * occasionally getting stuck as 1. To avoid the potential for a hang, check + * TXFE == 0 instead of BUSY == 1. This may not be suitable for all UART + * implementations, so only do so if an affected platform is detected in + * parse_spcr(). + */ +bool qdf2400_e44_present; +EXPORT_SYMBOL(qdf2400_e44_present); + /* * Some Qualcomm Datacenter Technologies SoCs have a defective UART BUSY bit. * Detect them by examining the OEM fields in the SPCR header, similiar to PCI @@ -147,8 +157,30 @@ int __init parse_spcr(bool earlycon) goto done; } - if (qdf2400_erratum_44_present(&table->header)) - uart = "qdf2400_e44"; + /* + * If the E44 erratum is required, then we need to tell the pl011 + * driver to implement the work-around. + * + * The global variable is used by the probe function when it + * creates the UARTs, whether or not they're used as a console. + * + * If the user specifies "traditional" earlycon, the qdf2400_e44 + * console name matches the EARLYCON_DECLARE() statement, and + * SPCR is not used. Parameter "earlycon" is false. + * + * If the user specifies "SPCR" earlycon, then we need to update + * the console name so that it also says "qdf2400_e44". Parameter + * "earlycon" is true. + * + * For consistency, if we change the console name, then we do it + * for everyone, not just earlycon. + */ + if (qdf2400_erratum_44_present(&table->header)) { + qdf2400_e44_present = true; + if (earlycon) + uart = "qdf2400_e44"; + } + if (xgene_8250_erratum_present(table)) iotype = "mmio32"; diff --git a/drivers/tty/serial/amba-pl011.c b/drivers/tty/serial/amba-pl011.c index 8a857bb34fbb..1888d168a41c 100644 --- a/drivers/tty/serial/amba-pl011.c +++ b/drivers/tty/serial/amba-pl011.c @@ -142,15 +142,7 @@ static struct vendor_data vendor_sbsa = { .fixed_options = true, }; -/* - * Erratum 44 for QDF2432v1 and QDF2400v1 SoCs describes the BUSY bit as - * occasionally getting stuck as 1. To avoid the potential for a hang, check - * TXFE == 0 instead of BUSY == 1. This may not be suitable for all UART - * implementations, so only do so if an affected platform is detected in - * parse_spcr(). - */ -static bool qdf2400_e44_present = false; - +#ifdef CONFIG_ACPI_SPCR_TABLE static struct vendor_data vendor_qdt_qdf2400_e44 = { .reg_offset = pl011_std_offsets, .fr_busy = UART011_FR_TXFE, @@ -165,6 +157,7 @@ static struct vendor_data vendor_qdt_qdf2400_e44 = { .always_enabled = true, .fixed_options = true, }; +#endif static u16 pl011_st_offsets[REG_ARRAY_SIZE] = { [REG_DR] = UART01x_DR, @@ -2375,12 +2368,14 @@ static int __init pl011_console_match(struct console *co, char *name, int idx, resource_size_t addr; int i; - if (strcmp(name, "qdf2400_e44") == 0) { - pr_info_once("UART: Working around QDF2400 SoC erratum 44"); - qdf2400_e44_present = true; - } else if (strcmp(name, "pl011") != 0) { + /* + * Systems affected by the Qualcomm Technologies QDF2400 E44 erratum + * have a distinct console name, so make sure we check for that. + * The actual implementation of the erratum occurs in the probe + * function. + */ + if ((strcmp(name, "qdf2400_e44") != 0) && (strcmp(name, "pl011") != 0)) return -ENODEV; - } if (uart_parse_earlycon(options, &iotype, &addr, &options)) return -ENODEV; @@ -2734,11 +2729,17 @@ static int sbsa_uart_probe(struct platform_device *pdev) } uap->port.irq = ret; - uap->reg_offset = vendor_sbsa.reg_offset; - uap->vendor = qdf2400_e44_present ? - &vendor_qdt_qdf2400_e44 : &vendor_sbsa; +#ifdef CONFIG_ACPI_SPCR_TABLE + if (qdf2400_e44_present) { + dev_info(&pdev->dev, "working around QDF2400 SoC erratum 44\n"); + uap->vendor = &vendor_qdt_qdf2400_e44; + } else +#endif + uap->vendor = &vendor_sbsa; + + uap->reg_offset = uap->vendor->reg_offset; uap->fifosize = 32; - uap->port.iotype = vendor_sbsa.access_32b ? UPIO_MEM32 : UPIO_MEM; + uap->port.iotype = uap->vendor->access_32b ? UPIO_MEM32 : UPIO_MEM; uap->port.ops = &sbsa_uart_pops; uap->fixed_baud = baudrate; diff --git a/include/linux/acpi.h b/include/linux/acpi.h index c749eef1daa1..27b4b6615263 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -1209,6 +1209,7 @@ static inline bool acpi_has_watchdog(void) { return false; } #endif #ifdef CONFIG_ACPI_SPCR_TABLE +extern bool qdf2400_e44_present; int parse_spcr(bool earlycon); #else static inline int parse_spcr(bool earlycon) { return 0; } -- cgit v1.2.3 From cef988642cdac44e910a27cb6e8166c96f86a0df Mon Sep 17 00:00:00 2001 From: Ian Abbott Date: Fri, 28 Jul 2017 16:22:31 +0100 Subject: staging: comedi: comedi_fops: do not call blocking ops when !TASK_RUNNING Comedi's read and write file operation handlers (`comedi_read()` and `comedi_write()`) currently call `copy_to_user()` or `copy_from_user()` whilst in the `TASK_INTERRUPTIBLE` state, which falls foul of the `might_fault()` checks when enabled. Fix it by setting the current task state back to `TASK_RUNNING` a bit earlier before calling these functions. Reported-by: Piotr Gregor Signed-off-by: Ian Abbott Cc: # 4.5+ Signed-off-by: Greg Kroah-Hartman --- drivers/staging/comedi/comedi_fops.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/staging/comedi/comedi_fops.c b/drivers/staging/comedi/comedi_fops.c index ca11be21f64b..34ca7823255d 100644 --- a/drivers/staging/comedi/comedi_fops.c +++ b/drivers/staging/comedi/comedi_fops.c @@ -2396,6 +2396,7 @@ static ssize_t comedi_write(struct file *file, const char __user *buf, continue; } + set_current_state(TASK_RUNNING); wp = async->buf_write_ptr; n1 = min(n, async->prealloc_bufsz - wp); n2 = n - n1; @@ -2528,6 +2529,8 @@ static ssize_t comedi_read(struct file *file, char __user *buf, size_t nbytes, } continue; } + + set_current_state(TASK_RUNNING); rp = async->buf_read_ptr; n1 = min(n, async->prealloc_bufsz - rp); n2 = n - n1; -- cgit v1.2.3 From c542942cb42186f99b6d715a833c7afad359f48f Mon Sep 17 00:00:00 2001 From: Xiubo Li Date: Wed, 12 Jul 2017 15:51:17 +0800 Subject: tcmu: Fix possible to/from address overflow when doing the memcpy For most case the sg->length equals to PAGE_SIZE, so this bug won't be triggered. Otherwise this will crash the kernel, for example when all segments' sg->length equal to 1K. Signed-off-by: Xiubo Li Reviewed-by: Mike Christie Signed-off-by: Nicholas Bellinger --- drivers/target/target_core_user.c | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/drivers/target/target_core_user.c b/drivers/target/target_core_user.c index 80ee130f8253..db29b5cd0b95 100644 --- a/drivers/target/target_core_user.c +++ b/drivers/target/target_core_user.c @@ -563,8 +563,6 @@ static int scatter_data_area(struct tcmu_dev *udev, block_remaining); to_offset = get_block_offset_user(udev, dbi, block_remaining); - offset = DATA_BLOCK_SIZE - block_remaining; - to += offset; if (*iov_cnt != 0 && to_offset == iov_tail(*iov)) { @@ -575,8 +573,10 @@ static int scatter_data_area(struct tcmu_dev *udev, (*iov)->iov_len = copy_bytes; } if (copy_data) { - memcpy(to, from + sg->length - sg_remaining, - copy_bytes); + offset = DATA_BLOCK_SIZE - block_remaining; + memcpy(to + offset, + from + sg->length - sg_remaining, + copy_bytes); tcmu_flush_dcache_range(to, copy_bytes); } sg_remaining -= copy_bytes; @@ -637,9 +637,8 @@ static void gather_data_area(struct tcmu_dev *udev, struct tcmu_cmd *cmd, copy_bytes = min_t(size_t, sg_remaining, block_remaining); offset = DATA_BLOCK_SIZE - block_remaining; - from += offset; tcmu_flush_dcache_range(from, copy_bytes); - memcpy(to + sg->length - sg_remaining, from, + memcpy(to + sg->length - sg_remaining, from + offset, copy_bytes); sg_remaining -= copy_bytes; -- cgit v1.2.3 From ededd039d1b96035b23592c049efcae53922cfce Mon Sep 17 00:00:00 2001 From: "Bryant G. Ly" Date: Fri, 14 Jul 2017 08:11:04 -0500 Subject: tcmu: free old string on reconfig On initial tcmu_configure_device call the info->name would have already been allocated and set, so on the second call make sure to free it first. Reported-by: Mike Christie Reviewed-by: Mike Christie Signed-off-by: Bryant G. Ly Signed-off-by: Nicholas Bellinger --- drivers/target/target_core_user.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/target/target_core_user.c b/drivers/target/target_core_user.c index db29b5cd0b95..942d094269fb 100644 --- a/drivers/target/target_core_user.c +++ b/drivers/target/target_core_user.c @@ -1432,6 +1432,8 @@ static int tcmu_update_uio_info(struct tcmu_dev *udev) if (udev->dev_config[0]) snprintf(str + used, size - used, "/%s", udev->dev_config); + /* If the old string exists, free it */ + kfree(info->name); info->name = str; return 0; -- cgit v1.2.3 From 66b59f9b1f41ee61eb4862bbcfc1e7db5298ba9e Mon Sep 17 00:00:00 2001 From: Varun Prakash Date: Sun, 23 Jul 2017 20:03:07 +0530 Subject: cxgbit: add missing __kfree_skb() Call __kfree_skb() after processing skb to avoid memory leak. Signed-off-by: Varun Prakash Signed-off-by: Nicholas Bellinger --- drivers/target/iscsi/cxgbit/cxgbit_cm.c | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/drivers/target/iscsi/cxgbit/cxgbit_cm.c b/drivers/target/iscsi/cxgbit/cxgbit_cm.c index e583dd8a418b..d4fa41be80f9 100644 --- a/drivers/target/iscsi/cxgbit/cxgbit_cm.c +++ b/drivers/target/iscsi/cxgbit/cxgbit_cm.c @@ -1510,11 +1510,13 @@ cxgbit_pass_open_rpl(struct cxgbit_device *cdev, struct sk_buff *skb) if (!cnp) { pr_info("%s stid %d lookup failure\n", __func__, stid); - return; + goto rel_skb; } cxgbit_wake_up(&cnp->com.wr_wait, __func__, rpl->status); cxgbit_put_cnp(cnp); +rel_skb: + __kfree_skb(skb); } static void @@ -1530,11 +1532,13 @@ cxgbit_close_listsrv_rpl(struct cxgbit_device *cdev, struct sk_buff *skb) if (!cnp) { pr_info("%s stid %d lookup failure\n", __func__, stid); - return; + goto rel_skb; } cxgbit_wake_up(&cnp->com.wr_wait, __func__, rpl->status); cxgbit_put_cnp(cnp); +rel_skb: + __kfree_skb(skb); } static void @@ -1819,12 +1823,16 @@ static void cxgbit_set_tcb_rpl(struct cxgbit_device *cdev, struct sk_buff *skb) struct tid_info *t = lldi->tids; csk = lookup_tid(t, tid); - if (unlikely(!csk)) + if (unlikely(!csk)) { pr_err("can't find connection for tid %u.\n", tid); - else + goto rel_skb; + } else { cxgbit_wake_up(&csk->com.wr_wait, __func__, rpl->status); + } cxgbit_put_csk(csk); +rel_skb: + __kfree_skb(skb); } static void cxgbit_rx_data(struct cxgbit_device *cdev, struct sk_buff *skb) -- cgit v1.2.3 From ea8dc5b4cd2195ee582cae28afa4164c6dea1738 Mon Sep 17 00:00:00 2001 From: Varun Prakash Date: Sun, 23 Jul 2017 20:03:33 +0530 Subject: iscsi-target: fix memory leak in iscsit_setup_text_cmd() On receiving text request iscsi-target allocates buffer for payload in iscsit_handle_text_cmd() and assigns buffer pointer to cmd->text_in_ptr, this buffer is currently freed in iscsit_release_cmd(), if iscsi-target sets 'C' bit in text response then it will receive another text request from the initiator with ttt != 0xffffffff in this case iscsi-target will find cmd using itt and call iscsit_setup_text_cmd() which will set cmd->text_in_ptr to NULL without freeing previously allocated buffer. This patch fixes this issue by calling kfree(cmd->text_in_ptr) in iscsit_setup_text_cmd() before assigning NULL to it. For the first text request cmd->text_in_ptr is NULL as cmd is memset to 0 in iscsit_allocate_cmd(). Signed-off-by: Varun Prakash Cc: # 4.0+ Signed-off-by: Nicholas Bellinger --- drivers/target/iscsi/iscsi_target.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/target/iscsi/iscsi_target.c b/drivers/target/iscsi/iscsi_target.c index 74e4975dd1b1..2688918b879a 100644 --- a/drivers/target/iscsi/iscsi_target.c +++ b/drivers/target/iscsi/iscsi_target.c @@ -2167,6 +2167,7 @@ iscsit_setup_text_cmd(struct iscsi_conn *conn, struct iscsi_cmd *cmd, cmd->cmd_sn = be32_to_cpu(hdr->cmdsn); cmd->exp_stat_sn = be32_to_cpu(hdr->exp_statsn); cmd->data_direction = DMA_NONE; + kfree(cmd->text_in_ptr); cmd->text_in_ptr = NULL; return 0; -- cgit v1.2.3 From 310d40a973c560a24c79f84cb5f16dc540a05686 Mon Sep 17 00:00:00 2001 From: Varun Prakash Date: Sun, 23 Jul 2017 20:03:45 +0530 Subject: iscsi-target: fix invalid flags in text response In case of multiple text responses iscsi-target sets both 'F' and 'C' bit for the final text response pdu, this issue happens because hdr->flags is not zeroed out before ORing with 'F' bit. This patch removes the | operator to fix this issue. Signed-off-by: Varun Prakash Signed-off-by: Nicholas Bellinger --- drivers/target/iscsi/iscsi_target.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/target/iscsi/iscsi_target.c b/drivers/target/iscsi/iscsi_target.c index 2688918b879a..12803de99400 100644 --- a/drivers/target/iscsi/iscsi_target.c +++ b/drivers/target/iscsi/iscsi_target.c @@ -3488,9 +3488,9 @@ iscsit_build_text_rsp(struct iscsi_cmd *cmd, struct iscsi_conn *conn, return text_length; if (completed) { - hdr->flags |= ISCSI_FLAG_CMD_FINAL; + hdr->flags = ISCSI_FLAG_CMD_FINAL; } else { - hdr->flags |= ISCSI_FLAG_TEXT_CONTINUE; + hdr->flags = ISCSI_FLAG_TEXT_CONTINUE; cmd->read_data_done += text_length; if (cmd->targ_xfer_tag == 0xFFFFFFFF) cmd->targ_xfer_tag = session_get_next_ttt(conn->sess); -- cgit v1.2.3 From d96adb9b076a12d30500347e2e667689062f44a0 Mon Sep 17 00:00:00 2001 From: Varun Prakash Date: Sat, 29 Jul 2017 21:01:49 +0530 Subject: cxgbit: fix sg_nents calculation The current logic of calculating sg_nents can fail if data_offset % PAGE_SIZE is not zero. For example - PAGE_SIZE = 4096 data_len = 3072 data_offset = 3072 As per current logic sg_nents = max(1UL, DIV_ROUND_UP(data_len, PAGE_SIZE)); sg_nents = max(1UL, DIV_ROUND_UP(3072, 4096)); sg_nents = 1 But as data_offset % PAGE_SIZE = 3072 we should skip 3072 bytes skip = 3K sg_nents = max(1UL, DIV_ROUND_UP(3K(skip) + 3K(data_len), 4K(PAGE_SIZE)); sg_nents = 2; This patch fixes this issue by adding skip to data_len. Signed-off-by: Varun Prakash Signed-off-by: Nicholas Bellinger --- drivers/target/iscsi/cxgbit/cxgbit_target.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/drivers/target/iscsi/cxgbit/cxgbit_target.c b/drivers/target/iscsi/cxgbit/cxgbit_target.c index dda13f1af38e..514986b57c2d 100644 --- a/drivers/target/iscsi/cxgbit/cxgbit_target.c +++ b/drivers/target/iscsi/cxgbit/cxgbit_target.c @@ -827,7 +827,7 @@ cxgbit_put_login_tx(struct iscsi_conn *conn, struct iscsi_login *login, static void cxgbit_skb_copy_to_sg(struct sk_buff *skb, struct scatterlist *sg, - unsigned int nents) + unsigned int nents, u32 skip) { struct skb_seq_state st; const u8 *buf; @@ -846,7 +846,7 @@ cxgbit_skb_copy_to_sg(struct sk_buff *skb, struct scatterlist *sg, } consumed += sg_pcopy_from_buffer(sg, nents, (void *)buf, - buf_len, consumed); + buf_len, skip + consumed); } } @@ -912,7 +912,7 @@ cxgbit_handle_immediate_data(struct iscsi_cmd *cmd, struct iscsi_scsi_req *hdr, struct scatterlist *sg = &cmd->se_cmd.t_data_sg[0]; u32 sg_nents = max(1UL, DIV_ROUND_UP(pdu_cb->dlen, PAGE_SIZE)); - cxgbit_skb_copy_to_sg(csk->skb, sg, sg_nents); + cxgbit_skb_copy_to_sg(csk->skb, sg, sg_nents, 0); } cmd->write_data_done += pdu_cb->dlen; @@ -1069,11 +1069,13 @@ static int cxgbit_handle_iscsi_dataout(struct cxgbit_sock *csk) cmd->se_cmd.data_length); if (!(pdu_cb->flags & PDUCBF_RX_DATA_DDPD)) { + u32 skip = data_offset % PAGE_SIZE; + sg_off = data_offset / PAGE_SIZE; sg_start = &cmd->se_cmd.t_data_sg[sg_off]; - sg_nents = max(1UL, DIV_ROUND_UP(data_len, PAGE_SIZE)); + sg_nents = max(1UL, DIV_ROUND_UP(skip + data_len, PAGE_SIZE)); - cxgbit_skb_copy_to_sg(csk->skb, sg_start, sg_nents); + cxgbit_skb_copy_to_sg(csk->skb, sg_start, sg_nents, skip); } check_payload: -- cgit v1.2.3 From 6bcbb3174caa5f1ccc894f8ae077631659d5a629 Mon Sep 17 00:00:00 2001 From: Nicholas Bellinger Date: Fri, 30 Jun 2017 00:08:13 -0700 Subject: qla2xxx: Fix incorrect tcm_qla2xxx_free_cmd use during TMR ABORT (v2) This patch drops two incorrect usages of tcm_qla2xxx_free_cmd() during TMR ABORT within tcm_qla2xxx_handle_data_work() and tcm_qla2xxx_aborted_task(), which where attempting to dispatch into workqueue context to do tcm_qla2xxx_complete_free() and subsequently invoke transport_generic_free_cmd(). This is incorrect because during TMR ABORT target-core will drop the outstanding se_cmd->cmd_kref references once it has quiesced the se_cmd via transport_wait_for_tasks(), and in the case of qla2xxx it should not attempt to do it's own transport_generic_free_cmd() once the abort has occured. As reported by Pascal, this was originally manifesting as a BUG_ON(cmd->cmd_in_wq) in qlt_free_cmd() during TMR ABORT, with a LIO backend that had sufficently high enough WRITE latency to trigger a host side TMR ABORT_TASK. (v2: Drop the qla_tgt_cmd->write_pending_abort_comp changes, as they will be addressed in a seperate series) Reported-by: Pascal de Bruijn Tested-by: Pascal de Bruijn Cc: Pascal de Bruijn Reported-by: Lukasz Engel Cc: Lukasz Engel Acked-by: Himanshu Madhani Cc: Quinn Tran Cc: # 3.10+ Signed-off-by: Nicholas Bellinger --- drivers/scsi/qla2xxx/tcm_qla2xxx.c | 30 ------------------------------ 1 file changed, 30 deletions(-) diff --git a/drivers/scsi/qla2xxx/tcm_qla2xxx.c b/drivers/scsi/qla2xxx/tcm_qla2xxx.c index b20da0d27ad7..3f82ea1b72dc 100644 --- a/drivers/scsi/qla2xxx/tcm_qla2xxx.c +++ b/drivers/scsi/qla2xxx/tcm_qla2xxx.c @@ -500,7 +500,6 @@ static int tcm_qla2xxx_handle_cmd(scsi_qla_host_t *vha, struct qla_tgt_cmd *cmd, static void tcm_qla2xxx_handle_data_work(struct work_struct *work) { struct qla_tgt_cmd *cmd = container_of(work, struct qla_tgt_cmd, work); - unsigned long flags; /* * Ensure that the complete FCP WRITE payload has been received. @@ -508,17 +507,6 @@ static void tcm_qla2xxx_handle_data_work(struct work_struct *work) */ cmd->cmd_in_wq = 0; - spin_lock_irqsave(&cmd->cmd_lock, flags); - cmd->data_work = 1; - if (cmd->aborted) { - cmd->data_work_free = 1; - spin_unlock_irqrestore(&cmd->cmd_lock, flags); - - tcm_qla2xxx_free_cmd(cmd); - return; - } - spin_unlock_irqrestore(&cmd->cmd_lock, flags); - cmd->qpair->tgt_counters.qla_core_ret_ctio++; if (!cmd->write_data_transferred) { /* @@ -765,31 +753,13 @@ static void tcm_qla2xxx_queue_tm_rsp(struct se_cmd *se_cmd) qlt_xmit_tm_rsp(mcmd); } -#define DATA_WORK_NOT_FREE(_cmd) (_cmd->data_work && !_cmd->data_work_free) static void tcm_qla2xxx_aborted_task(struct se_cmd *se_cmd) { struct qla_tgt_cmd *cmd = container_of(se_cmd, struct qla_tgt_cmd, se_cmd); - unsigned long flags; if (qlt_abort_cmd(cmd)) return; - - spin_lock_irqsave(&cmd->cmd_lock, flags); - if ((cmd->state == QLA_TGT_STATE_NEW)|| - ((cmd->state == QLA_TGT_STATE_DATA_IN) && - DATA_WORK_NOT_FREE(cmd))) { - cmd->data_work_free = 1; - spin_unlock_irqrestore(&cmd->cmd_lock, flags); - /* - * cmd has not reached fw, Use this trigger to free it. - */ - tcm_qla2xxx_free_cmd(cmd); - return; - } - spin_unlock_irqrestore(&cmd->cmd_lock, flags); - return; - } static void tcm_qla2xxx_clear_sess_lookup(struct tcm_qla2xxx_lport *, -- cgit v1.2.3 From 54e22f265e872ae140755b3318521d400a094605 Mon Sep 17 00:00:00 2001 From: Linus Lüssing Date: Thu, 6 Jul 2017 07:02:25 +0200 Subject: batman-adv: fix TT sync flag inconsistencies MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This patch fixes an issue in the translation table code potentially leading to a TT Request + Response storm. The issue may occur for nodes involving BLA and an inconsistent configuration of the batman-adv AP isolation feature. However, since the new multicast optimizations, a single, malformed packet may lead to a mesh-wide, persistent Denial-of-Service, too. The issue occurs because nodes are currently OR-ing the TT sync flags of all originators announcing a specific MAC address via the translation table. When an intermediate node now receives a TT Request and wants to answer this on behalf of the destination node, then this intermediate node now responds with an altered flag field and broken CRC. The next OGM of the real destination will lead to a CRC mismatch and triggering a TT Request and Response again. Furthermore, the OR-ing is currently never undone as long as at least one originator announcing the according MAC address remains, leading to the potential persistency of this issue. This patch fixes this issue by storing the flags used in the CRC calculation on a a per TT orig entry basis to be able to respond with the correct, original flags in an intermediate TT Response for one thing. And to be able to correctly unset sync flags once all nodes announcing a sync flag vanish for another. Fixes: e9c00136a475 ("batman-adv: fix tt_global_entries flags update") Signed-off-by: Linus Lüssing Acked-by: Antonio Quartulli [sw: typo in commit message] Signed-off-by: Simon Wunderlich --- net/batman-adv/translation-table.c | 60 ++++++++++++++++++++++++++++++++------ net/batman-adv/types.h | 2 ++ 2 files changed, 53 insertions(+), 9 deletions(-) diff --git a/net/batman-adv/translation-table.c b/net/batman-adv/translation-table.c index e1133bc634b5..8a3ce79b1307 100644 --- a/net/batman-adv/translation-table.c +++ b/net/batman-adv/translation-table.c @@ -1549,9 +1549,41 @@ batadv_tt_global_entry_has_orig(const struct batadv_tt_global_entry *entry, return found; } +/** + * batadv_tt_global_sync_flags - update TT sync flags + * @tt_global: the TT global entry to update sync flags in + * + * Updates the sync flag bits in the tt_global flag attribute with a logical + * OR of all sync flags from any of its TT orig entries. + */ +static void +batadv_tt_global_sync_flags(struct batadv_tt_global_entry *tt_global) +{ + struct batadv_tt_orig_list_entry *orig_entry; + const struct hlist_head *head; + u16 flags = BATADV_NO_FLAGS; + + rcu_read_lock(); + head = &tt_global->orig_list; + hlist_for_each_entry_rcu(orig_entry, head, list) + flags |= orig_entry->flags; + rcu_read_unlock(); + + flags |= tt_global->common.flags & (~BATADV_TT_SYNC_MASK); + tt_global->common.flags = flags; +} + +/** + * batadv_tt_global_orig_entry_add - add or update a TT orig entry + * @tt_global: the TT global entry to add an orig entry in + * @orig_node: the originator to add an orig entry for + * @ttvn: translation table version number of this changeset + * @flags: TT sync flags + */ static void batadv_tt_global_orig_entry_add(struct batadv_tt_global_entry *tt_global, - struct batadv_orig_node *orig_node, int ttvn) + struct batadv_orig_node *orig_node, int ttvn, + u8 flags) { struct batadv_tt_orig_list_entry *orig_entry; @@ -1561,7 +1593,8 @@ batadv_tt_global_orig_entry_add(struct batadv_tt_global_entry *tt_global, * was added during a "temporary client detection" */ orig_entry->ttvn = ttvn; - goto out; + orig_entry->flags = flags; + goto sync_flags; } orig_entry = kmem_cache_zalloc(batadv_tt_orig_cache, GFP_ATOMIC); @@ -1573,6 +1606,7 @@ batadv_tt_global_orig_entry_add(struct batadv_tt_global_entry *tt_global, batadv_tt_global_size_inc(orig_node, tt_global->common.vid); orig_entry->orig_node = orig_node; orig_entry->ttvn = ttvn; + orig_entry->flags = flags; kref_init(&orig_entry->refcount); spin_lock_bh(&tt_global->list_lock); @@ -1582,6 +1616,8 @@ batadv_tt_global_orig_entry_add(struct batadv_tt_global_entry *tt_global, spin_unlock_bh(&tt_global->list_lock); atomic_inc(&tt_global->orig_list_count); +sync_flags: + batadv_tt_global_sync_flags(tt_global); out: if (orig_entry) batadv_tt_orig_list_entry_put(orig_entry); @@ -1703,10 +1739,10 @@ static bool batadv_tt_global_add(struct batadv_priv *bat_priv, } /* the change can carry possible "attribute" flags like the - * TT_CLIENT_WIFI, therefore they have to be copied in the + * TT_CLIENT_TEMP, therefore they have to be copied in the * client entry */ - common->flags |= flags; + common->flags |= flags & (~BATADV_TT_SYNC_MASK); /* If there is the BATADV_TT_CLIENT_ROAM flag set, there is only * one originator left in the list and we previously received a @@ -1723,7 +1759,8 @@ static bool batadv_tt_global_add(struct batadv_priv *bat_priv, } add_orig_entry: /* add the new orig_entry (if needed) or update it */ - batadv_tt_global_orig_entry_add(tt_global_entry, orig_node, ttvn); + batadv_tt_global_orig_entry_add(tt_global_entry, orig_node, ttvn, + flags & BATADV_TT_SYNC_MASK); batadv_dbg(BATADV_DBG_TT, bat_priv, "Creating new global tt entry: %pM (vid: %d, via %pM)\n", @@ -1946,6 +1983,7 @@ batadv_tt_global_dump_subentry(struct sk_buff *msg, u32 portid, u32 seq, struct batadv_tt_orig_list_entry *orig, bool best) { + u16 flags = (common->flags & (~BATADV_TT_SYNC_MASK)) | orig->flags; void *hdr; struct batadv_orig_node_vlan *vlan; u8 last_ttvn; @@ -1975,7 +2013,7 @@ batadv_tt_global_dump_subentry(struct sk_buff *msg, u32 portid, u32 seq, nla_put_u8(msg, BATADV_ATTR_TT_LAST_TTVN, last_ttvn) || nla_put_u32(msg, BATADV_ATTR_TT_CRC32, crc) || nla_put_u16(msg, BATADV_ATTR_TT_VID, common->vid) || - nla_put_u32(msg, BATADV_ATTR_TT_FLAGS, common->flags)) + nla_put_u32(msg, BATADV_ATTR_TT_FLAGS, flags)) goto nla_put_failure; if (best && nla_put_flag(msg, BATADV_ATTR_FLAG_BEST)) @@ -2589,6 +2627,7 @@ static u32 batadv_tt_global_crc(struct batadv_priv *bat_priv, unsigned short vid) { struct batadv_hashtable *hash = bat_priv->tt.global_hash; + struct batadv_tt_orig_list_entry *tt_orig; struct batadv_tt_common_entry *tt_common; struct batadv_tt_global_entry *tt_global; struct hlist_head *head; @@ -2627,8 +2666,9 @@ static u32 batadv_tt_global_crc(struct batadv_priv *bat_priv, /* find out if this global entry is announced by this * originator */ - if (!batadv_tt_global_entry_has_orig(tt_global, - orig_node)) + tt_orig = batadv_tt_global_orig_entry_find(tt_global, + orig_node); + if (!tt_orig) continue; /* use network order to read the VID: this ensures that @@ -2640,10 +2680,12 @@ static u32 batadv_tt_global_crc(struct batadv_priv *bat_priv, /* compute the CRC on flags that have to be kept in sync * among nodes */ - flags = tt_common->flags & BATADV_TT_SYNC_MASK; + flags = tt_orig->flags; crc_tmp = crc32c(crc_tmp, &flags, sizeof(flags)); crc ^= crc32c(crc_tmp, tt_common->addr, ETH_ALEN); + + batadv_tt_orig_list_entry_put(tt_orig); } rcu_read_unlock(); } diff --git a/net/batman-adv/types.h b/net/batman-adv/types.h index ea43a6449247..a62795868794 100644 --- a/net/batman-adv/types.h +++ b/net/batman-adv/types.h @@ -1260,6 +1260,7 @@ struct batadv_tt_global_entry { * struct batadv_tt_orig_list_entry - orig node announcing a non-mesh client * @orig_node: pointer to orig node announcing this non-mesh client * @ttvn: translation table version number which added the non-mesh client + * @flags: per orig entry TT sync flags * @list: list node for batadv_tt_global_entry::orig_list * @refcount: number of contexts the object is used * @rcu: struct used for freeing in an RCU-safe manner @@ -1267,6 +1268,7 @@ struct batadv_tt_global_entry { struct batadv_tt_orig_list_entry { struct batadv_orig_node *orig_node; u8 ttvn; + u8 flags; struct hlist_node list; struct kref refcount; struct rcu_head rcu; -- cgit v1.2.3 From b962e2cd357f2a529fe14387092401e6552f43d6 Mon Sep 17 00:00:00 2001 From: Shawn Guo Date: Sat, 1 Jul 2017 11:45:36 +0800 Subject: pinctrl: zte: fix dereference of 'data' in zx_set_mux() It fixes the following Smatch complaint: drivers/pinctrl/zte/pinctrl-zx.c:76 zx_set_mux() warn: variable dereferenced before check 'data' (see line 67) Reported-by: Dan Carpenter Fixes: cbff0c4d27f4 ("pinctrl: add ZTE ZX pinctrl driver support") Signed-off-by: Shawn Guo Signed-off-by: Linus Walleij --- drivers/pinctrl/zte/pinctrl-zx.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/drivers/pinctrl/zte/pinctrl-zx.c b/drivers/pinctrl/zte/pinctrl-zx.c index 787e3967bd5c..f828ee340a98 100644 --- a/drivers/pinctrl/zte/pinctrl-zx.c +++ b/drivers/pinctrl/zte/pinctrl-zx.c @@ -64,10 +64,8 @@ static int zx_set_mux(struct pinctrl_dev *pctldev, unsigned int func_selector, struct zx_pinctrl_soc_info *info = zpctl->info; const struct pinctrl_pin_desc *pindesc = info->pins + group_selector; struct zx_pin_data *data = pindesc->drv_data; - struct zx_mux_desc *mux = data->muxes; - u32 mask = (1 << data->width) - 1; - u32 offset = data->offset; - u32 bitpos = data->bitpos; + struct zx_mux_desc *mux; + u32 mask, offset, bitpos; struct function_desc *func; unsigned long flags; u32 val, mval; @@ -76,6 +74,11 @@ static int zx_set_mux(struct pinctrl_dev *pctldev, unsigned int func_selector, if (!data) return -EINVAL; + mux = data->muxes; + mask = (1 << data->width) - 1; + offset = data->offset; + bitpos = data->bitpos; + func = pinmux_generic_get_function(pctldev, func_selector); if (!func) return -EINVAL; -- cgit v1.2.3 From 65ff135b4f748f7d5f0d6a21b860e88a0d1fb7c5 Mon Sep 17 00:00:00 2001 From: Kunihiko Hayashi Date: Thu, 6 Jul 2017 15:54:23 +0900 Subject: pinctrl: uniphier: fix USB3 pin assignment for Pro4 According to pinctrl assignment for Pro4, each definition of USB#2 and USB#3 are as follows. 184: USB2VBUS 185: USB2OD 186: USB2ID 187: USB3VBUS 188: USB3OD USB#2 has an additional pin "USB2ID", but the chip doesn't use this pin while in host-mode. Considering this pin, the pin definitions for USB#3 should be {187, 188}. Signed-off-by: Kunihiko Hayashi Acked-by: Masahiro Yamada Signed-off-by: Linus Walleij --- drivers/pinctrl/uniphier/pinctrl-uniphier-pro4.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/pinctrl/uniphier/pinctrl-uniphier-pro4.c b/drivers/pinctrl/uniphier/pinctrl-uniphier-pro4.c index a433a306a2d0..c75e094b2d90 100644 --- a/drivers/pinctrl/uniphier/pinctrl-uniphier-pro4.c +++ b/drivers/pinctrl/uniphier/pinctrl-uniphier-pro4.c @@ -1084,7 +1084,7 @@ static const unsigned usb1_pins[] = {182, 183}; static const int usb1_muxvals[] = {0, 0}; static const unsigned usb2_pins[] = {184, 185}; static const int usb2_muxvals[] = {0, 0}; -static const unsigned usb3_pins[] = {186, 187}; +static const unsigned usb3_pins[] = {187, 188}; static const int usb3_muxvals[] = {0, 0}; static const unsigned port_range0_pins[] = { 300, 301, 302, 303, 304, 305, 306, 307, /* PORT0x */ -- cgit v1.2.3 From c64ffff7a9d1eee6624d3eaab36968fe6df31a9f Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Mon, 17 Jul 2017 17:13:28 +0300 Subject: i2c: core: Allow empty id_table in ACPI case as well For now empty ID table is not allowed with ACPI and prevents driver to be probed. Add a check to allow empty ID table. This introduces a helper i2c_acpi_match_device(). Note, we rename some static function in i2c-core-acpi.c to distinguish with public API. Fixes: da10c06a044b ("i2c: Make I2C ID tables non-mandatory for DT'ed devices") Signed-off-by: Andy Shevchenko Tested-by: Rajmohan Mani Acked-by: Mika Westerberg [wsa: needed to get some drivers probed again] Signed-off-by: Wolfram Sang --- drivers/i2c/i2c-core-acpi.c | 19 +++++++++++++++---- drivers/i2c/i2c-core-base.c | 1 + drivers/i2c/i2c-core.h | 9 +++++++++ 3 files changed, 25 insertions(+), 4 deletions(-) diff --git a/drivers/i2c/i2c-core-acpi.c b/drivers/i2c/i2c-core-acpi.c index 4842ec3a5451..a9126b3cda61 100644 --- a/drivers/i2c/i2c-core-acpi.c +++ b/drivers/i2c/i2c-core-acpi.c @@ -230,6 +230,16 @@ void i2c_acpi_register_devices(struct i2c_adapter *adap) dev_warn(&adap->dev, "failed to enumerate I2C slaves\n"); } +const struct acpi_device_id * +i2c_acpi_match_device(const struct acpi_device_id *matches, + struct i2c_client *client) +{ + if (!(client && matches)) + return NULL; + + return acpi_match_device(matches, &client->dev); +} + static acpi_status i2c_acpi_lookup_speed(acpi_handle handle, u32 level, void *data, void **return_value) { @@ -289,7 +299,7 @@ u32 i2c_acpi_find_bus_speed(struct device *dev) } EXPORT_SYMBOL_GPL(i2c_acpi_find_bus_speed); -static int i2c_acpi_match_adapter(struct device *dev, void *data) +static int i2c_acpi_find_match_adapter(struct device *dev, void *data) { struct i2c_adapter *adapter = i2c_verify_adapter(dev); @@ -299,7 +309,7 @@ static int i2c_acpi_match_adapter(struct device *dev, void *data) return ACPI_HANDLE(dev) == (acpi_handle)data; } -static int i2c_acpi_match_device(struct device *dev, void *data) +static int i2c_acpi_find_match_device(struct device *dev, void *data) { return ACPI_COMPANION(dev) == data; } @@ -309,7 +319,7 @@ static struct i2c_adapter *i2c_acpi_find_adapter_by_handle(acpi_handle handle) struct device *dev; dev = bus_find_device(&i2c_bus_type, NULL, handle, - i2c_acpi_match_adapter); + i2c_acpi_find_match_adapter); return dev ? i2c_verify_adapter(dev) : NULL; } @@ -317,7 +327,8 @@ static struct i2c_client *i2c_acpi_find_client_by_adev(struct acpi_device *adev) { struct device *dev; - dev = bus_find_device(&i2c_bus_type, NULL, adev, i2c_acpi_match_device); + dev = bus_find_device(&i2c_bus_type, NULL, adev, + i2c_acpi_find_match_device); return dev ? i2c_verify_client(dev) : NULL; } diff --git a/drivers/i2c/i2c-core-base.c b/drivers/i2c/i2c-core-base.c index c89dac7fd2e7..12822a4b8f8f 100644 --- a/drivers/i2c/i2c-core-base.c +++ b/drivers/i2c/i2c-core-base.c @@ -357,6 +357,7 @@ static int i2c_device_probe(struct device *dev) * Tree match table entry is supplied for the probing device. */ if (!driver->id_table && + !i2c_acpi_match_device(dev->driver->acpi_match_table, client) && !i2c_of_match_device(dev->driver->of_match_table, client)) return -ENODEV; diff --git a/drivers/i2c/i2c-core.h b/drivers/i2c/i2c-core.h index 3b63f5e5b89c..3d3d9bf02101 100644 --- a/drivers/i2c/i2c-core.h +++ b/drivers/i2c/i2c-core.h @@ -31,9 +31,18 @@ int i2c_check_addr_validity(unsigned addr, unsigned short flags); int i2c_check_7bit_addr_validity_strict(unsigned short addr); #ifdef CONFIG_ACPI +const struct acpi_device_id * +i2c_acpi_match_device(const struct acpi_device_id *matches, + struct i2c_client *client); void i2c_acpi_register_devices(struct i2c_adapter *adap); #else /* CONFIG_ACPI */ static inline void i2c_acpi_register_devices(struct i2c_adapter *adap) { } +static inline const struct acpi_device_id * +i2c_acpi_match_device(const struct acpi_device_id *matches, + struct i2c_client *client) +{ + return NULL; +} #endif /* CONFIG_ACPI */ extern struct notifier_block i2c_acpi_notifier; -- cgit v1.2.3 From d81ece747d8727bb8b1cfc9a20dbe62f09a4e35a Mon Sep 17 00:00:00 2001 From: Icenowy Zheng Date: Sat, 22 Jul 2017 10:50:53 +0800 Subject: pinctrl: sunxi: add a missing function of A10/A20 pinctrl driver The PH16 pin has a function with mux id 0x5, which is the DET pin of the "sim" (smart card reader) IP block. This function is missing in old versions of A10/A20 SoCs' datasheets and user manuals, so it's also missing in the old drivers. The newest A10 Datasheet V1.70 and A20 Datasheet V1.41 contain this pin function, and it's discovered during implementing R40 pinctrl driver. Add it to the driver. As we now merged A20 pinctrl driver to the A10 one, we need to only fix the A10 driver now. Fixes: f2821b1ca3a2 ("pinctrl: sunxi: Move Allwinner A10 pinctrl driver to a driver of its own") Signed-off-by: Icenowy Zheng Reviewed-by: Chen-Yu Tsai Signed-off-by: Linus Walleij --- drivers/pinctrl/sunxi/pinctrl-sun4i-a10.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/pinctrl/sunxi/pinctrl-sun4i-a10.c b/drivers/pinctrl/sunxi/pinctrl-sun4i-a10.c index 159580c04b14..47a392bc73c8 100644 --- a/drivers/pinctrl/sunxi/pinctrl-sun4i-a10.c +++ b/drivers/pinctrl/sunxi/pinctrl-sun4i-a10.c @@ -918,6 +918,7 @@ static const struct sunxi_desc_pin sun4i_a10_pins[] = { SUNXI_FUNCTION_VARIANT(0x3, "emac", /* ETXD1 */ PINCTRL_SUN7I_A20), SUNXI_FUNCTION(0x4, "keypad"), /* IN6 */ + SUNXI_FUNCTION(0x5, "sim"), /* DET */ SUNXI_FUNCTION_IRQ(0x6, 16), /* EINT16 */ SUNXI_FUNCTION(0x7, "csi1")), /* D16 */ SUNXI_PIN(SUNXI_PINCTRL_PIN(H, 17), -- cgit v1.2.3 From 99f828436788f0155798145853607ca8f0e6de93 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 28 Jul 2017 22:29:51 +0100 Subject: dma-buf/sync_file: Allow multiple sync_files to wrap a single dma-fence Up until recently sync_file were create to export a single dma-fence to userspace, and so we could canabalise a bit insie dma-fence to mark whether or not we had enable polling for the sync_file itself. However, with the advent of syncobj, we do allow userspace to create multiple sync_files for a single dma-fence. (Similarly, that the sw-sync validation framework also started returning multiple sync-files wrapping a single dma-fence for a syncpt also triggering the problem.) This patch reverts my suggestion in commit e24165537312 ("dma-buf/sync_file: only enable fence signalling on poll()") to use a single bit in the shared dma-fence and restores the sync_file->flags for tracking the bits individually. Reported-by: Gustavo Padovan Fixes: f1e8c67123cf ("dma-buf/sw-sync: Use an rbtree to sort fences in the timeline") Fixes: e9083420bbac ("drm: introduce sync objects (v4)") Signed-off-by: Chris Wilson Cc: Sumit Semwal Cc: Sean Paul Cc: Gustavo Padovan Cc: dri-devel@lists.freedesktop.org Cc: # v4.13-rc1+ Signed-off-by: Gustavo Padovan Link: http://patchwork.freedesktop.org/patch/msgid/20170728212951.7818-1-chris@chris-wilson.co.uk (cherry picked from commit db1fc97ca0c0d3fdeeadf314d99a26188438940a) --- drivers/dma-buf/sync_file.c | 5 +++-- include/linux/sync_file.h | 3 ++- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/dma-buf/sync_file.c b/drivers/dma-buf/sync_file.c index d7e219d2669d..66fb40d0ebdb 100644 --- a/drivers/dma-buf/sync_file.c +++ b/drivers/dma-buf/sync_file.c @@ -304,7 +304,7 @@ static int sync_file_release(struct inode *inode, struct file *file) { struct sync_file *sync_file = file->private_data; - if (test_bit(POLL_ENABLED, &sync_file->fence->flags)) + if (test_bit(POLL_ENABLED, &sync_file->flags)) dma_fence_remove_callback(sync_file->fence, &sync_file->cb); dma_fence_put(sync_file->fence); kfree(sync_file); @@ -318,7 +318,8 @@ static unsigned int sync_file_poll(struct file *file, poll_table *wait) poll_wait(file, &sync_file->wq, wait); - if (!test_and_set_bit(POLL_ENABLED, &sync_file->fence->flags)) { + if (list_empty(&sync_file->cb.node) && + !test_and_set_bit(POLL_ENABLED, &sync_file->flags)) { if (dma_fence_add_callback(sync_file->fence, &sync_file->cb, fence_check_cb_func) < 0) wake_up_all(&sync_file->wq); diff --git a/include/linux/sync_file.h b/include/linux/sync_file.h index 5726107963b2..0ad87c434ae6 100644 --- a/include/linux/sync_file.h +++ b/include/linux/sync_file.h @@ -43,12 +43,13 @@ struct sync_file { #endif wait_queue_head_t wq; + unsigned long flags; struct dma_fence *fence; struct dma_fence_cb cb; }; -#define POLL_ENABLED DMA_FENCE_FLAG_USER_BITS +#define POLL_ENABLED 0 struct sync_file *sync_file_create(struct dma_fence *fence); struct dma_fence *sync_file_get_fence(int fd); -- cgit v1.2.3 From 22acc37b8681461707a3fc73505af808f9af8260 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Thu, 13 Jul 2017 15:45:01 +0200 Subject: i2c: designware: Print clock freq on invalid clock freq error When we refuse to probe due to an invalid clock frequency, log the frequency which is causing this error. Signed-off-by: Hans de Goede Reviewed-by: Andy Shevchenko Signed-off-by: Wolfram Sang --- drivers/i2c/busses/i2c-designware-platdrv.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/i2c/busses/i2c-designware-platdrv.c b/drivers/i2c/busses/i2c-designware-platdrv.c index 2ea6d0d25a01..d139b156f9c9 100644 --- a/drivers/i2c/busses/i2c-designware-platdrv.c +++ b/drivers/i2c/busses/i2c-designware-platdrv.c @@ -319,7 +319,8 @@ static int dw_i2c_plat_probe(struct platform_device *pdev) if (dev->clk_freq != 100000 && dev->clk_freq != 400000 && dev->clk_freq != 1000000 && dev->clk_freq != 3400000) { dev_err(&pdev->dev, - "Only 100kHz, 400kHz, 1MHz and 3.4MHz supported"); + "%d Hz is unsupported, only 100kHz, 400kHz, 1MHz and 3.4MHz are supported\n", + dev->clk_freq); ret = -EINVAL; goto exit_reset; } -- cgit v1.2.3 From 682c6c2188f39d13548ccdc89c9888fbcb547889 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Thu, 13 Jul 2017 15:45:02 +0200 Subject: i2c: designware: Some broken DSTDs use 1MiHz instead of 1MHz At least the Acer Iconia Tab8 / aka W1-810 uses 1MiHz instead of 1MHz for one of its busses, fix this up to 1MHz instead of failing the probe of that bus. This fixes the accelerometer on the Acer Iconia Tab8 not working. Cc: stable@vger.kernel.org Signed-off-by: Hans de Goede Reviewed-by: Andy Shevchenko Signed-off-by: Wolfram Sang --- drivers/i2c/busses/i2c-designware-platdrv.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/i2c/busses/i2c-designware-platdrv.c b/drivers/i2c/busses/i2c-designware-platdrv.c index d139b156f9c9..143a8fd582b4 100644 --- a/drivers/i2c/busses/i2c-designware-platdrv.c +++ b/drivers/i2c/busses/i2c-designware-platdrv.c @@ -298,6 +298,9 @@ static int dw_i2c_plat_probe(struct platform_device *pdev) } acpi_speed = i2c_acpi_find_bus_speed(&pdev->dev); + /* Some broken DSTDs use 1MiHz instead of 1MHz */ + if (acpi_speed == 1048576) + acpi_speed = 1000000; /* * Find bus speed from the "clock-frequency" device property, ACPI * or by using fast mode if neither is set. -- cgit v1.2.3 From 5871a1538cd2d7a4a18e5e21adc7e9e6dab702e2 Mon Sep 17 00:00:00 2001 From: Russell King Date: Wed, 26 Jul 2017 11:03:49 +0100 Subject: i2c: allow i2c-versatile for ARM MPS platforms Allow i2c-versatile to be enabled for ARM MPS platforms. Signed-off-by: Russell King Signed-off-by: Wolfram Sang --- drivers/i2c/busses/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/i2c/busses/Kconfig b/drivers/i2c/busses/Kconfig index 1006b230b236..65fa29591d21 100644 --- a/drivers/i2c/busses/Kconfig +++ b/drivers/i2c/busses/Kconfig @@ -983,7 +983,7 @@ config I2C_UNIPHIER_F config I2C_VERSATILE tristate "ARM Versatile/Realview I2C bus support" - depends on ARCH_VERSATILE || ARCH_REALVIEW || ARCH_VEXPRESS || COMPILE_TEST + depends on ARCH_MPS2 || ARCH_VERSATILE || ARCH_REALVIEW || ARCH_VEXPRESS || COMPILE_TEST select I2C_ALGOBIT help Say yes if you want to support the I2C serial bus on ARMs Versatile -- cgit v1.2.3 From 9c80034921d1ece5c0e3241ba1645bce32387684 Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Sat, 29 Jul 2017 14:11:43 +0200 Subject: i2c: rephrase explanation of I2C_CLASS_DEPRECATED Hopefully making clear that it is not needed for new drivers. Signed-off-by: Wolfram Sang --- include/linux/i2c.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/include/linux/i2c.h b/include/linux/i2c.h index 00ca5b86a753..d501d3956f13 100644 --- a/include/linux/i2c.h +++ b/include/linux/i2c.h @@ -689,7 +689,8 @@ i2c_unlock_adapter(struct i2c_adapter *adapter) #define I2C_CLASS_HWMON (1<<0) /* lm_sensors, ... */ #define I2C_CLASS_DDC (1<<3) /* DDC bus on graphics adapters */ #define I2C_CLASS_SPD (1<<7) /* Memory modules */ -#define I2C_CLASS_DEPRECATED (1<<8) /* Warn users that adapter will stop using classes */ +/* Warn users that the adapter doesn't support classes anymore */ +#define I2C_CLASS_DEPRECATED (1<<8) /* Internal numbers to terminate lists */ #define I2C_CLIENT_END 0xfffeU -- cgit v1.2.3 From 5ae29649e03f58be0f412c21b62b203aa7cf1680 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Mon, 31 Jul 2017 18:45:41 +0200 Subject: video: fbdev: imxfb: use after free in imxfb_remove() We free "info" then dereference it on the next line. Really this whole function would be better if we wrote it to unwind in the mirror of how things are allocated in the probe. Signed-off-by: Dan Carpenter Cc: Alexander Shiyan Cc: Sascha Hauer Signed-off-by: Bartlomiej Zolnierkiewicz --- drivers/video/fbdev/imxfb.c | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/drivers/video/fbdev/imxfb.c b/drivers/video/fbdev/imxfb.c index c166e0725be5..ba82f97fb42b 100644 --- a/drivers/video/fbdev/imxfb.c +++ b/drivers/video/fbdev/imxfb.c @@ -1073,20 +1073,16 @@ static int imxfb_remove(struct platform_device *pdev) imxfb_disable_controller(fbi); unregister_framebuffer(info); - + fb_dealloc_cmap(&info->cmap); pdata = dev_get_platdata(&pdev->dev); if (pdata && pdata->exit) pdata->exit(fbi->pdev); - - fb_dealloc_cmap(&info->cmap); - kfree(info->pseudo_palette); - framebuffer_release(info); - dma_free_wc(&pdev->dev, fbi->map_size, info->screen_base, fbi->map_dma); - iounmap(fbi->regs); release_mem_region(res->start, resource_size(res)); + kfree(info->pseudo_palette); + framebuffer_release(info); return 0; } -- cgit v1.2.3 From 7b4dfbe7880cf2092db3c94c6a34ae6ffa8aa344 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Mon, 31 Jul 2017 18:45:41 +0200 Subject: fbdev: omapfb: remove unused variable Removing the default display name left a harmless warning: fbdev/omap2/omapfb/dss/core.c: In function 'omap_dss_probe': fbdev/omap2/omapfb/dss/core.c:196:30: error: unused variable 'pdata' [-Werror=unused-variable] This removes the now-unused variable as well. Fixes: 278cba7eaf54 ("drm: omapdrm: Remove unused default display name support") Signed-off-by: Arnd Bergmann Reviewed-by: Laurent Pinchart Cc: Tomi Valkeinen Signed-off-by: Bartlomiej Zolnierkiewicz --- drivers/video/fbdev/omap2/omapfb/dss/core.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/video/fbdev/omap2/omapfb/dss/core.c b/drivers/video/fbdev/omap2/omapfb/dss/core.c index eecf695c16f4..09e5bb013d28 100644 --- a/drivers/video/fbdev/omap2/omapfb/dss/core.c +++ b/drivers/video/fbdev/omap2/omapfb/dss/core.c @@ -193,7 +193,6 @@ static struct notifier_block omap_dss_pm_notif_block = { static int __init omap_dss_probe(struct platform_device *pdev) { - struct omap_dss_board_info *pdata = pdev->dev.platform_data; int r; core.pdev = pdev; -- cgit v1.2.3 From dd0c41f8a7e0babdadc61d5201ac8505a79dec05 Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Mon, 31 Jul 2017 18:45:41 +0200 Subject: efifb: allow user to disable write combined mapping. This patch allows the user to disable write combined mapping of the efifb framebuffer console using an nowc option. A customer noticed major slowdowns while logging to the console with write combining enabled, on other tasks running on the same CPU. (10x or greater slow down on all other cores on the same CPU as is doing the logging). I reproduced this on a machine with dual CPUs. Intel(R) Xeon(R) CPU E5-2609 v3 @ 1.90GHz (6 core) I wrote a test that just mmaps the pci bar and writes to it in a loop, while this was running in the background one a single core with (taskset -c 1), building a kernel up to init/version.o (taskset -c 8) went from 13s to 133s or so. I've yet to explain why this occurs or what is going wrong I haven't managed to find a perf command that in any way gives insight into this. 11,885,070,715 instructions # 1.39 insns per cycle vs 12,082,592,342 instructions # 0.13 insns per cycle is the only thing I've spotted of interest, I've tried at least: dTLB-stores,dTLB-store-misses,L1-dcache-stores,LLC-store,LLC-store-misses,LLC-load-misses,LLC-loads,\mem-loads,mem-stores,iTLB-loads,iTLB-load-misses,cache-references,cache-misses For now it seems at least a good idea to allow a user to disable write combining if they see this until we can figure it out. Note also most users get a real framebuffer driver loaded when kms kicks in, it just happens on these machines the kernel didn't support the gpu specific driver. Signed-off-by: Dave Airlie Acked-by: Peter Jones Cc: Andy Lutomirski Cc: H. Peter Anvin Cc: Linus Torvalds Signed-off-by: Bartlomiej Zolnierkiewicz --- Documentation/fb/efifb.txt | 6 ++++++ drivers/video/fbdev/efifb.c | 8 +++++++- 2 files changed, 13 insertions(+), 1 deletion(-) diff --git a/Documentation/fb/efifb.txt b/Documentation/fb/efifb.txt index a59916c29b33..1a85c1bdaf38 100644 --- a/Documentation/fb/efifb.txt +++ b/Documentation/fb/efifb.txt @@ -27,5 +27,11 @@ You have to add the following kernel parameters in your elilo.conf: Macbook Pro 17", iMac 20" : video=efifb:i20 +Accepted options: + +nowc Don't map the framebuffer write combined. This can be used + to workaround side-effects and slowdowns on other CPU cores + when large amounts of console data are written. + -- Edgar Hucek diff --git a/drivers/video/fbdev/efifb.c b/drivers/video/fbdev/efifb.c index ff01bed7112f..1e784adb89b1 100644 --- a/drivers/video/fbdev/efifb.c +++ b/drivers/video/fbdev/efifb.c @@ -17,6 +17,7 @@ #include static bool request_mem_succeeded = false; +static bool nowc = false; static struct fb_var_screeninfo efifb_defined = { .activate = FB_ACTIVATE_NOW, @@ -99,6 +100,8 @@ static int efifb_setup(char *options) screen_info.lfb_height = simple_strtoul(this_opt+7, NULL, 0); else if (!strncmp(this_opt, "width:", 6)) screen_info.lfb_width = simple_strtoul(this_opt+6, NULL, 0); + else if (!strcmp(this_opt, "nowc")) + nowc = true; } } @@ -255,7 +258,10 @@ static int efifb_probe(struct platform_device *dev) info->apertures->ranges[0].base = efifb_fix.smem_start; info->apertures->ranges[0].size = size_remap; - info->screen_base = ioremap_wc(efifb_fix.smem_start, efifb_fix.smem_len); + if (nowc) + info->screen_base = ioremap(efifb_fix.smem_start, efifb_fix.smem_len); + else + info->screen_base = ioremap_wc(efifb_fix.smem_start, efifb_fix.smem_len); if (!info->screen_base) { pr_err("efifb: abort, cannot ioremap video memory 0x%x @ 0x%lx\n", efifb_fix.smem_len, efifb_fix.smem_start); -- cgit v1.2.3 From 3f25bb4b7f7718d391321608f947840a79934568 Mon Sep 17 00:00:00 2001 From: Emmanuel Grumbach Date: Wed, 21 Jun 2017 10:35:09 +0300 Subject: iwlwifi: mvm: fix TCP CSUM offload with WEP and A000 series When we enabled TCP checksum offload, we need to tell the firmware where the IP header starts. If we have an IV, then we need to adapt that value since the IV is placed before the SNAP header. This is true only for cases where the driver adds the IV, not the WEP case in which the IV is added by the firmware itself. On A000 devices series, the IV is always added by the device. Fix this. Fixes: 5e6a98dc4863 ("iwlwifi: mvm: enable TCP/UDP checksum support for 9000 family") Signed-off-by: Emmanuel Grumbach Signed-off-by: Luca Coelho --- drivers/net/wireless/intel/iwlwifi/mvm/tx.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/tx.c b/drivers/net/wireless/intel/iwlwifi/mvm/tx.c index 60360ed73f26..e5d3eba2e82a 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/tx.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/tx.c @@ -185,8 +185,14 @@ static u16 iwl_mvm_tx_csum(struct iwl_mvm *mvm, struct sk_buff *skb, else udp_hdr(skb)->check = 0; - /* mac header len should include IV, size is in words */ - if (info->control.hw_key) + /* + * mac header len should include IV, size is in words unless + * the IV is added by the firmware like in WEP. + * In new Tx API, the IV is always added by the firmware. + */ + if (!iwl_mvm_has_new_tx_api(mvm) && info->control.hw_key && + info->control.hw_key->cipher != WLAN_CIPHER_SUITE_WEP40 && + info->control.hw_key->cipher != WLAN_CIPHER_SUITE_WEP104) mh_len += info->control.hw_key->iv_len; mh_len /= 2; offload_assist |= mh_len << TX_CMD_OFFLD_MH_SIZE; -- cgit v1.2.3 From 58877d7428b0747134cb65096d51dfabdc62000d Mon Sep 17 00:00:00 2001 From: Emmanuel Grumbach Date: Thu, 29 Jun 2017 16:18:21 +0300 Subject: iwlwifi: add TLV for MLME offload firmware capability The firmware now adds a new DWORD for the MLME offload's capability even on firmware versions that don't support it. Add the TLV bit to avoid getting the print: capa flags index 3 larger than supported by driver. This fixes the bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=196195 Signed-off-by: Emmanuel Grumbach Signed-off-by: Luca Coelho --- drivers/net/wireless/intel/iwlwifi/fw/file.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/wireless/intel/iwlwifi/fw/file.h b/drivers/net/wireless/intel/iwlwifi/fw/file.h index 0fa8c473f1e2..c73a6438ce8f 100644 --- a/drivers/net/wireless/intel/iwlwifi/fw/file.h +++ b/drivers/net/wireless/intel/iwlwifi/fw/file.h @@ -328,6 +328,7 @@ typedef unsigned int __bitwise iwl_ucode_tlv_capa_t; * @IWL_UCODE_TLV_CAPA_TX_POWER_ACK: reduced TX power API has larger * command size (command version 4) that supports toggling ACK TX * power reduction. + * @IWL_UCODE_TLV_CAPA_MLME_OFFLOAD: supports MLME offload * * @NUM_IWL_UCODE_TLV_CAPA: number of bits used */ @@ -373,6 +374,7 @@ enum iwl_ucode_tlv_capa { IWL_UCODE_TLV_CAPA_EXTEND_SHARED_MEM_CFG = (__force iwl_ucode_tlv_capa_t)80, IWL_UCODE_TLV_CAPA_LQM_SUPPORT = (__force iwl_ucode_tlv_capa_t)81, IWL_UCODE_TLV_CAPA_TX_POWER_ACK = (__force iwl_ucode_tlv_capa_t)84, + IWL_UCODE_TLV_CAPA_MLME_OFFLOAD = (__force iwl_ucode_tlv_capa_t)96, NUM_IWL_UCODE_TLV_CAPA #ifdef __CHECKER__ -- cgit v1.2.3 From 92b0f7b26b313b23cc9bef0bd406607f4566c0c0 Mon Sep 17 00:00:00 2001 From: Emmanuel Grumbach Date: Mon, 3 Jul 2017 16:25:33 +0300 Subject: iwlwifi: split the regulatory rules when the bandwidth flags require it When we create a regulatory domain out of an MCC notification, we need to make sure that all the channels in the rule have the exact same properties. The current code mixes channel 36 and 40 although 36 can be a control channel with HT40+ (36, 40) whereas 40 can't be a control channel with HT40+ since (40, 44) is invalid. Because of that, cfg80211 would allow to connect in 40MHz to APs that are configured to channel 40 HT40+ and that made our firmware assert. Fix this by checking the bandwidth flags before taking the decision if the rule should be split. This fixes https://bugzilla.kernel.org/show_bug.cgi?id=195299 partly. Fixes: af45a9003f1f ("iwlwifi: create regdomain from mcc_update_cmd response") Signed-off-by: Emmanuel Grumbach Signed-off-by: Luca Coelho --- drivers/net/wireless/intel/iwlwifi/iwl-nvm-parse.c | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-nvm-parse.c b/drivers/net/wireless/intel/iwlwifi/iwl-nvm-parse.c index 5c08f4d40f6a..3ee6767392b6 100644 --- a/drivers/net/wireless/intel/iwlwifi/iwl-nvm-parse.c +++ b/drivers/net/wireless/intel/iwlwifi/iwl-nvm-parse.c @@ -785,7 +785,8 @@ iwl_parse_nvm_mcc_info(struct device *dev, const struct iwl_cfg *cfg, int num_of_ch, __le32 *channels, u16 fw_mcc) { int ch_idx; - u16 ch_flags, prev_ch_flags = 0; + u16 ch_flags; + u32 reg_rule_flags, prev_reg_rule_flags = 0; const u8 *nvm_chan = cfg->ext_nvm ? iwl_ext_nvm_channels : iwl_nvm_channels; struct ieee80211_regdomain *regd; @@ -834,8 +835,11 @@ iwl_parse_nvm_mcc_info(struct device *dev, const struct iwl_cfg *cfg, continue; } + reg_rule_flags = iwl_nvm_get_regdom_bw_flags(nvm_chan, ch_idx, + ch_flags, cfg); + /* we can't continue the same rule */ - if (ch_idx == 0 || prev_ch_flags != ch_flags || + if (ch_idx == 0 || prev_reg_rule_flags != reg_rule_flags || center_freq - prev_center_freq > 20) { valid_rules++; new_rule = true; @@ -854,18 +858,17 @@ iwl_parse_nvm_mcc_info(struct device *dev, const struct iwl_cfg *cfg, rule->power_rule.max_eirp = DBM_TO_MBM(IWL_DEFAULT_MAX_TX_POWER); - rule->flags = iwl_nvm_get_regdom_bw_flags(nvm_chan, ch_idx, - ch_flags, cfg); + rule->flags = reg_rule_flags; /* rely on auto-calculation to merge BW of contiguous chans */ rule->flags |= NL80211_RRF_AUTO_BW; rule->freq_range.max_bandwidth_khz = 0; - prev_ch_flags = ch_flags; prev_center_freq = center_freq; + prev_reg_rule_flags = reg_rule_flags; IWL_DEBUG_DEV(dev, IWL_DL_LAR, - "Ch. %d [%sGHz] %s%s%s%s%s%s%s%s%s(0x%02x): Ad-Hoc %ssupported\n", + "Ch. %d [%sGHz] %s%s%s%s%s%s%s%s%s(0x%02x) reg_flags 0x%x: %s\n", center_freq, band == NL80211_BAND_5GHZ ? "5.2" : "2.4", CHECK_AND_PRINT_I(VALID), @@ -877,10 +880,10 @@ iwl_parse_nvm_mcc_info(struct device *dev, const struct iwl_cfg *cfg, CHECK_AND_PRINT_I(160MHZ), CHECK_AND_PRINT_I(INDOOR_ONLY), CHECK_AND_PRINT_I(GO_CONCURRENT), - ch_flags, + ch_flags, reg_rule_flags, ((ch_flags & NVM_CHANNEL_ACTIVE) && !(ch_flags & NVM_CHANNEL_RADAR)) - ? "" : "not "); + ? "Ad-Hoc" : ""); } regd->n_reg_rules = valid_rules; -- cgit v1.2.3 From 9465c3f8ba67cff697c0529de5a03cc5e1509d41 Mon Sep 17 00:00:00 2001 From: Gregory Greenman Date: Thu, 6 Jul 2017 05:07:33 +0300 Subject: iwlwifi: mvm: set A-MPDU bit upon empty BA notification from FW The bit was set only if there was at least one reclaimed frame in an aggregation. It's important to set it also in the case that the whole A-MPDU was lost, otherwise rate scaling statistics will not be updated correctly. Thus, set it always in ba notification handler. This fixes a throughput degradation of about 20% in certain scenarios with multiple streams on 11ac. Signed-off-by: Gregory Greenman Signed-off-by: Luca Coelho --- drivers/net/wireless/intel/iwlwifi/mvm/tx.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/tx.c b/drivers/net/wireless/intel/iwlwifi/mvm/tx.c index e5d3eba2e82a..5fcc9dd6be56 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/tx.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/tx.c @@ -1821,6 +1821,8 @@ void iwl_mvm_rx_ba_notif(struct iwl_mvm *mvm, struct iwl_rx_cmd_buffer *rxb) struct iwl_mvm_tid_data *tid_data; struct iwl_mvm_sta *mvmsta; + ba_info.flags = IEEE80211_TX_STAT_AMPDU; + if (iwl_mvm_has_new_tx_api(mvm)) { struct iwl_mvm_compressed_ba_notif *ba_res = (void *)pkt->data; -- cgit v1.2.3 From 87f55616f81bf6c82f0e94cf4661151399d2a7b6 Mon Sep 17 00:00:00 2001 From: Gregory Greenman Date: Thu, 6 Jul 2017 05:27:55 +0300 Subject: iwlwifi: mvm: rs: fix TLC statistics collection Statistics should be collected according to the actual rate a frame/aggregation was transmitted and not according to the initial rate from the last LQ command (these rates are different if the frames were retransmitted at a lower rate from the rate scale table). This is needed to remove throughput degradation. Signed-off-by: Gregory Greenman Signed-off-by: Luca Coelho --- drivers/net/wireless/intel/iwlwifi/mvm/rs.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/rs.c b/drivers/net/wireless/intel/iwlwifi/mvm/rs.c index 65beca3a457a..8999a1199d60 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/rs.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/rs.c @@ -1291,7 +1291,7 @@ void iwl_mvm_rs_tx_status(struct iwl_mvm *mvm, struct ieee80211_sta *sta, * first index into rate scale table. */ if (info->flags & IEEE80211_TX_STAT_AMPDU) { - rs_collect_tpc_data(mvm, lq_sta, curr_tbl, lq_rate.index, + rs_collect_tpc_data(mvm, lq_sta, curr_tbl, tx_resp_rate.index, info->status.ampdu_len, info->status.ampdu_ack_len, reduced_txp); @@ -1312,7 +1312,7 @@ void iwl_mvm_rs_tx_status(struct iwl_mvm *mvm, struct ieee80211_sta *sta, if (info->status.ampdu_ack_len == 0) info->status.ampdu_len = 1; - rs_collect_tlc_data(mvm, lq_sta, curr_tbl, lq_rate.index, + rs_collect_tlc_data(mvm, lq_sta, curr_tbl, tx_resp_rate.index, info->status.ampdu_len, info->status.ampdu_ack_len); @@ -1348,11 +1348,11 @@ void iwl_mvm_rs_tx_status(struct iwl_mvm *mvm, struct ieee80211_sta *sta, continue; rs_collect_tpc_data(mvm, lq_sta, tmp_tbl, - lq_rate.index, 1, + tx_resp_rate.index, 1, i < retries ? 0 : legacy_success, reduced_txp); rs_collect_tlc_data(mvm, lq_sta, tmp_tbl, - lq_rate.index, 1, + tx_resp_rate.index, 1, i < retries ? 0 : legacy_success); } -- cgit v1.2.3 From e9fb92e13d5e743a6ff13d3206c0f9e5e8cbc1f4 Mon Sep 17 00:00:00 2001 From: Haim Dreyfuss Date: Sun, 9 Jul 2017 15:51:44 +0300 Subject: iwlwifi: fix fw_pre_next_step to apply also for C step C step NICs should use the latest FW (currently B step). Correct the condition to make C step NICs advanced its default FW name to the latest one. Also rename _next_ to b_or_c to avoid confusion. Fixes: 5da083d1922c ("iwlwifi: add support for 9000 HW B-step NICs") Signed-off-by: Haim Dreyfuss Signed-off-by: Luca Coelho --- drivers/net/wireless/intel/iwlwifi/cfg/9000.c | 14 +++++++------- drivers/net/wireless/intel/iwlwifi/iwl-config.h | 8 ++++---- drivers/net/wireless/intel/iwlwifi/iwl-drv.c | 5 +++-- 3 files changed, 14 insertions(+), 13 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/cfg/9000.c b/drivers/net/wireless/intel/iwlwifi/cfg/9000.c index b4ecd1fe1374..97208ce19f92 100644 --- a/drivers/net/wireless/intel/iwlwifi/cfg/9000.c +++ b/drivers/net/wireless/intel/iwlwifi/cfg/9000.c @@ -154,7 +154,7 @@ static const struct iwl_tt_params iwl9000_tt_params = { const struct iwl_cfg iwl9160_2ac_cfg = { .name = "Intel(R) Dual Band Wireless AC 9160", .fw_name_pre = IWL9260A_FW_PRE, - .fw_name_pre_next_step = IWL9260B_FW_PRE, + .fw_name_pre_b_or_c_step = IWL9260B_FW_PRE, IWL_DEVICE_9000, .ht_params = &iwl9000_ht_params, .nvm_ver = IWL9000_NVM_VERSION, @@ -165,7 +165,7 @@ const struct iwl_cfg iwl9160_2ac_cfg = { const struct iwl_cfg iwl9260_2ac_cfg = { .name = "Intel(R) Dual Band Wireless AC 9260", .fw_name_pre = IWL9260A_FW_PRE, - .fw_name_pre_next_step = IWL9260B_FW_PRE, + .fw_name_pre_b_or_c_step = IWL9260B_FW_PRE, IWL_DEVICE_9000, .ht_params = &iwl9000_ht_params, .nvm_ver = IWL9000_NVM_VERSION, @@ -176,7 +176,7 @@ const struct iwl_cfg iwl9260_2ac_cfg = { const struct iwl_cfg iwl9270_2ac_cfg = { .name = "Intel(R) Dual Band Wireless AC 9270", .fw_name_pre = IWL9260A_FW_PRE, - .fw_name_pre_next_step = IWL9260B_FW_PRE, + .fw_name_pre_b_or_c_step = IWL9260B_FW_PRE, IWL_DEVICE_9000, .ht_params = &iwl9000_ht_params, .nvm_ver = IWL9000_NVM_VERSION, @@ -186,8 +186,8 @@ const struct iwl_cfg iwl9270_2ac_cfg = { const struct iwl_cfg iwl9460_2ac_cfg = { .name = "Intel(R) Dual Band Wireless AC 9460", - .fw_name_pre = IWL9000_FW_PRE, - .fw_name_pre_rf_next_step = IWL9000RFB_FW_PRE, + .fw_name_pre = IWL9260A_FW_PRE, + .fw_name_pre_b_or_c_step = IWL9260B_FW_PRE, IWL_DEVICE_9000, .ht_params = &iwl9000_ht_params, .nvm_ver = IWL9000_NVM_VERSION, @@ -198,8 +198,8 @@ const struct iwl_cfg iwl9460_2ac_cfg = { const struct iwl_cfg iwl9560_2ac_cfg = { .name = "Intel(R) Dual Band Wireless AC 9560", - .fw_name_pre = IWL9000_FW_PRE, - .fw_name_pre_rf_next_step = IWL9000RFB_FW_PRE, + .fw_name_pre = IWL9260A_FW_PRE, + .fw_name_pre_b_or_c_step = IWL9260B_FW_PRE, IWL_DEVICE_9000, .ht_params = &iwl9000_ht_params, .nvm_ver = IWL9000_NVM_VERSION, diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-config.h b/drivers/net/wireless/intel/iwlwifi/iwl-config.h index c52623cb7c2a..d19c74827fbb 100644 --- a/drivers/net/wireless/intel/iwlwifi/iwl-config.h +++ b/drivers/net/wireless/intel/iwlwifi/iwl-config.h @@ -276,10 +276,10 @@ struct iwl_pwr_tx_backoff { * @fw_name_pre: Firmware filename prefix. The api version and extension * (.ucode) will be added to filename before loading from disk. The * filename is constructed as fw_name_pre.ucode. - * @fw_name_pre_next_step: same as @fw_name_pre, only for next step + * @fw_name_pre_b_or_c_step: same as @fw_name_pre, only for b or c steps * (if supported) - * @fw_name_pre_rf_next_step: same as @fw_name_pre_next_step, only for rf next - * step. Supported only in integrated solutions. + * @fw_name_pre_rf_next_step: same as @fw_name_pre_b_or_c_step, only for rf + * next step. Supported only in integrated solutions. * @ucode_api_max: Highest version of uCode API supported by driver. * @ucode_api_min: Lowest version of uCode API supported by driver. * @max_inst_size: The maximal length of the fw inst section @@ -330,7 +330,7 @@ struct iwl_cfg { /* params specific to an individual device within a device family */ const char *name; const char *fw_name_pre; - const char *fw_name_pre_next_step; + const char *fw_name_pre_b_or_c_step; const char *fw_name_pre_rf_next_step; /* params not likely to change within a device family */ const struct iwl_base_params *base_params; diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-drv.c b/drivers/net/wireless/intel/iwlwifi/iwl-drv.c index 6fdb5921e17f..4e0f86fe0a6f 100644 --- a/drivers/net/wireless/intel/iwlwifi/iwl-drv.c +++ b/drivers/net/wireless/intel/iwlwifi/iwl-drv.c @@ -216,8 +216,9 @@ static int iwl_request_firmware(struct iwl_drv *drv, bool first) const char *fw_pre_name; if (drv->trans->cfg->device_family == IWL_DEVICE_FAMILY_9000 && - CSR_HW_REV_STEP(drv->trans->hw_rev) == SILICON_B_STEP) - fw_pre_name = cfg->fw_name_pre_next_step; + (CSR_HW_REV_STEP(drv->trans->hw_rev) == SILICON_B_STEP || + CSR_HW_REV_STEP(drv->trans->hw_rev) == SILICON_C_STEP)) + fw_pre_name = cfg->fw_name_pre_b_or_c_step; else if (drv->trans->cfg->integrated && CSR_HW_RFID_STEP(drv->trans->hw_rf_id) == SILICON_B_STEP && cfg->fw_name_pre_rf_next_step) -- cgit v1.2.3 From 8addabf8e6e299f790038fdc92ddceaaf76adab8 Mon Sep 17 00:00:00 2001 From: Naftali Goldstein Date: Thu, 27 Jul 2017 04:53:55 +0300 Subject: iwlwifi: mvm: set the RTS_MIMO_PROT bit in flag mask when sending sta to fw Set the STA_FLG_RTS_MIMO_PROT bit in station_flags_msk of the add sta command, so that when smps mode changes, the FW will know about it. In particular, in AP mode, clients are added upon receival of an auth request, at which point there's no knowledge of the client's smps mode. When the assoc request arrives, the add_sta command is resent to modify the station parameters. At this point the driver knows the smps mode, but since the corresponding bit in the mask is not set, the fw doesn't update this field so there's no rts protection for mimo. Fixes: 5bc5aaad407c ("iwlwifi: mvm: set up initial SMPS/NSS station info") Signed-off-by: Naftali Goldstein Signed-off-by: Luca Coelho --- drivers/net/wireless/intel/iwlwifi/mvm/sta.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/sta.c b/drivers/net/wireless/intel/iwlwifi/mvm/sta.c index ab66b4394dfc..dcaef7c043ac 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/sta.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/sta.c @@ -121,7 +121,8 @@ int iwl_mvm_sta_send_to_fw(struct iwl_mvm *mvm, struct ieee80211_sta *sta, .mac_id_n_color = cpu_to_le32(mvm_sta->mac_id_n_color), .add_modify = update ? 1 : 0, .station_flags_msk = cpu_to_le32(STA_FLG_FAT_EN_MSK | - STA_FLG_MIMO_EN_MSK), + STA_FLG_MIMO_EN_MSK | + STA_FLG_RTS_MIMO_PROT), .tid_disable_tx = cpu_to_le16(mvm_sta->tid_disable_agg), }; int ret; -- cgit v1.2.3 From 558f479f687aca6a336e13309424a7c3afd32721 Mon Sep 17 00:00:00 2001 From: Tzipi Peres Date: Sun, 30 Jul 2017 13:29:30 +0300 Subject: iwlwifi: add the new 9000 series PCI IDs Add two PCI IDs for the 9160 series. Add five PCI IDs for the 9260 series. Add one PCI IDs for the 9270 series. Add seven PCI IDs for the 9460 series. Add five PCI IDs for the 9560 series. Signed-off-by: Tzipi Peres Signed-off-by: Luca Coelho --- drivers/net/wireless/intel/iwlwifi/pcie/drv.c | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/drv.c b/drivers/net/wireless/intel/iwlwifi/pcie/drv.c index f16c1bb9bf94..84f4ba01e14f 100644 --- a/drivers/net/wireless/intel/iwlwifi/pcie/drv.c +++ b/drivers/net/wireless/intel/iwlwifi/pcie/drv.c @@ -510,9 +510,17 @@ static const struct pci_device_id iwl_hw_card_ids[] = { /* 9000 Series */ {IWL_PCI_DEVICE(0x271B, 0x0010, iwl9160_2ac_cfg)}, + {IWL_PCI_DEVICE(0x271B, 0x0014, iwl9160_2ac_cfg)}, + {IWL_PCI_DEVICE(0x271B, 0x0210, iwl9160_2ac_cfg)}, {IWL_PCI_DEVICE(0x2526, 0x0000, iwl9260_2ac_cfg)}, {IWL_PCI_DEVICE(0x2526, 0x0010, iwl9260_2ac_cfg)}, + {IWL_PCI_DEVICE(0x2526, 0x0014, iwl9260_2ac_cfg)}, + {IWL_PCI_DEVICE(0x2526, 0xA014, iwl9260_2ac_cfg)}, + {IWL_PCI_DEVICE(0x2526, 0x4010, iwl9260_2ac_cfg)}, + {IWL_PCI_DEVICE(0x2526, 0x0210, iwl9260_2ac_cfg)}, + {IWL_PCI_DEVICE(0x2526, 0x0214, iwl9260_2ac_cfg)}, {IWL_PCI_DEVICE(0x2526, 0x1410, iwl9270_2ac_cfg)}, + {IWL_PCI_DEVICE(0x2526, 0x1610, iwl9270_2ac_cfg)}, {IWL_PCI_DEVICE(0x9DF0, 0x0A10, iwl9460_2ac_cfg)}, {IWL_PCI_DEVICE(0x9DF0, 0x0010, iwl9460_2ac_cfg)}, {IWL_PCI_DEVICE(0x9DF0, 0x0210, iwl9460_2ac_cfg)}, @@ -527,10 +535,22 @@ static const struct pci_device_id iwl_hw_card_ids[] = { {IWL_PCI_DEVICE(0x9DF0, 0x2A10, iwl9460_2ac_cfg)}, {IWL_PCI_DEVICE(0x30DC, 0x0060, iwl9460_2ac_cfg)}, {IWL_PCI_DEVICE(0x2526, 0x0060, iwl9460_2ac_cfg)}, + {IWL_PCI_DEVICE(0x2526, 0x0260, iwl9460_2ac_cfg)}, + {IWL_PCI_DEVICE(0x2526, 0x0064, iwl9460_2ac_cfg)}, + {IWL_PCI_DEVICE(0x2526, 0x00A4, iwl9460_2ac_cfg)}, + {IWL_PCI_DEVICE(0x2526, 0x40A4, iwl9460_2ac_cfg)}, + {IWL_PCI_DEVICE(0x2526, 0x02A4, iwl9460_2ac_cfg)}, + {IWL_PCI_DEVICE(0x2526, 0x00A0, iwl9460_2ac_cfg)}, + {IWL_PCI_DEVICE(0x2526, 0x02A0, iwl9460_2ac_cfg)}, {IWL_PCI_DEVICE(0x9DF0, 0x0060, iwl9460_2ac_cfg)}, {IWL_PCI_DEVICE(0xA370, 0x0060, iwl9460_2ac_cfg)}, {IWL_PCI_DEVICE(0x31DC, 0x0060, iwl9460_2ac_cfg)}, {IWL_PCI_DEVICE(0x2526, 0x0030, iwl9560_2ac_cfg)}, + {IWL_PCI_DEVICE(0x2526, 0x4030, iwl9560_2ac_cfg)}, + {IWL_PCI_DEVICE(0x2526, 0x0230, iwl9560_2ac_cfg)}, + {IWL_PCI_DEVICE(0x2526, 0x0234, iwl9560_2ac_cfg)}, + {IWL_PCI_DEVICE(0x2526, 0x0238, iwl9560_2ac_cfg)}, + {IWL_PCI_DEVICE(0x2526, 0x023C, iwl9560_2ac_cfg)}, {IWL_PCI_DEVICE(0x9DF0, 0x0030, iwl9560_2ac_cfg)}, {IWL_PCI_DEVICE(0xA370, 0x0030, iwl9560_2ac_cfg)}, {IWL_PCI_DEVICE(0x31DC, 0x0030, iwl9560_2ac_cfg)}, -- cgit v1.2.3 From 9527b82ae3af1ebf465506868fb55e7f862cd9da Mon Sep 17 00:00:00 2001 From: Sean Young Date: Tue, 1 Aug 2017 12:24:17 +0100 Subject: Revert "serial: Delete dead code for CIR serial ports" This reverts commit 1104321a7b3bb670dc614ffa7958c553e7b3b836. The code is not dead at all and breaks winbond-cir. Serial: 8250/16550 driver, 32 ports, IRQ sharing enabled 00:02: ttyS0 at I/O 0x3f8 (irq = 4, base_baud = 115200) is a 16550A 00:03: ttyS1 at I/O 0x2f8 (irq = 3, base_baud = 115200) is a CIR port lirc lirc0: lirc_dev: driver ir-lirc-codec (winbond-cir) registered at minor = 0 winbond-cir 00:03: Region 0x2f8-0x2ff already in use! winbond-cir: probe of 00:03 failed with error -16 Signed-off-by: Sean Young Reviewed-by: Matthias Brugger Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/8250/8250_core.c | 23 +++++++++++++++++------ 1 file changed, 17 insertions(+), 6 deletions(-) diff --git a/drivers/tty/serial/8250/8250_core.c b/drivers/tty/serial/8250/8250_core.c index b5def356af63..1aab3010fbfa 100644 --- a/drivers/tty/serial/8250/8250_core.c +++ b/drivers/tty/serial/8250/8250_core.c @@ -1043,13 +1043,24 @@ int serial8250_register_8250_port(struct uart_8250_port *up) if (up->dl_write) uart->dl_write = up->dl_write; - if (serial8250_isa_config != NULL) - serial8250_isa_config(0, &uart->port, - &uart->capabilities); + if (uart->port.type != PORT_8250_CIR) { + if (serial8250_isa_config != NULL) + serial8250_isa_config(0, &uart->port, + &uart->capabilities); + + ret = uart_add_one_port(&serial8250_reg, + &uart->port); + if (ret == 0) + ret = uart->port.line; + } else { + dev_info(uart->port.dev, + "skipping CIR port at 0x%lx / 0x%llx, IRQ %d\n", + uart->port.iobase, + (unsigned long long)uart->port.mapbase, + uart->port.irq); - ret = uart_add_one_port(&serial8250_reg, &uart->port); - if (ret == 0) - ret = uart->port.line; + ret = 0; + } } mutex_unlock(&serial_mutex); -- cgit v1.2.3 From 7f81e55c737a8fa82c71f290945d729a4902f8d2 Mon Sep 17 00:00:00 2001 From: Max Filippov Date: Tue, 1 Aug 2017 11:02:46 -0700 Subject: xtensa: don't limit csum_partial export by CONFIG_NET csum_partial and csum_partial_copy_generic are defined unconditionally and are available even when CONFIG_NET is disabled. They are used not only by the network drivers, but also by scsi and media. Don't limit these functions export by CONFIG_NET. Cc: stable@vger.kernel.org Signed-off-by: Max Filippov --- arch/xtensa/kernel/xtensa_ksyms.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/arch/xtensa/kernel/xtensa_ksyms.c b/arch/xtensa/kernel/xtensa_ksyms.c index d159e9b9c018..672391003e40 100644 --- a/arch/xtensa/kernel/xtensa_ksyms.c +++ b/arch/xtensa/kernel/xtensa_ksyms.c @@ -94,13 +94,11 @@ unsigned long __sync_fetch_and_or_4(unsigned long *p, unsigned long v) } EXPORT_SYMBOL(__sync_fetch_and_or_4); -#ifdef CONFIG_NET /* * Networking support */ EXPORT_SYMBOL(csum_partial); EXPORT_SYMBOL(csum_partial_copy_generic); -#endif /* CONFIG_NET */ /* * Architecture-specific symbols -- cgit v1.2.3 From bc652eb6a0d5cffaea7dc8e8ad488aab2a1bf1ed Mon Sep 17 00:00:00 2001 From: Max Filippov Date: Tue, 1 Aug 2017 11:15:15 -0700 Subject: xtensa: mm/cache: add missing EXPORT_SYMBOLs Functions clear_user_highpage, copy_user_highpage, flush_dcache_page, local_flush_cache_range and local_flush_cache_page may be used from modules. Export them. Cc: stable@vger.kernel.org Signed-off-by: Max Filippov --- arch/xtensa/mm/cache.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/arch/xtensa/mm/cache.c b/arch/xtensa/mm/cache.c index dbb1cdef3663..3c75c4e597da 100644 --- a/arch/xtensa/mm/cache.c +++ b/arch/xtensa/mm/cache.c @@ -103,6 +103,7 @@ void clear_user_highpage(struct page *page, unsigned long vaddr) clear_page_alias(kvaddr, paddr); preempt_enable(); } +EXPORT_SYMBOL(clear_user_highpage); void copy_user_highpage(struct page *dst, struct page *src, unsigned long vaddr, struct vm_area_struct *vma) @@ -119,6 +120,7 @@ void copy_user_highpage(struct page *dst, struct page *src, copy_page_alias(dst_vaddr, src_vaddr, dst_paddr, src_paddr); preempt_enable(); } +EXPORT_SYMBOL(copy_user_highpage); /* * Any time the kernel writes to a user page cache page, or it is about to @@ -172,7 +174,7 @@ void flush_dcache_page(struct page *page) /* There shouldn't be an entry in the cache for this page anymore. */ } - +EXPORT_SYMBOL(flush_dcache_page); /* * For now, flush the whole cache. FIXME?? @@ -184,6 +186,7 @@ void local_flush_cache_range(struct vm_area_struct *vma, __flush_invalidate_dcache_all(); __invalidate_icache_all(); } +EXPORT_SYMBOL(local_flush_cache_range); /* * Remove any entry in the cache for this page. @@ -203,6 +206,7 @@ void local_flush_cache_page(struct vm_area_struct *vma, unsigned long address, __flush_invalidate_dcache_page_alias(virt, phys); __invalidate_icache_page_alias(virt, phys); } +EXPORT_SYMBOL(local_flush_cache_page); #endif /* DCACHE_WAY_SIZE > PAGE_SIZE */ -- cgit v1.2.3 From fd1b8668af59a11bb754a6c9b0051c6c5ce73b74 Mon Sep 17 00:00:00 2001 From: Hector Martin Date: Wed, 2 Aug 2017 00:45:06 +0900 Subject: USB: serial: option: add D-Link DWM-222 device ID Add device id for D-Link DWM-222. Cc: stable@vger.kernel.org Signed-off-by: Hector Martin Signed-off-by: Johan Hovold --- drivers/usb/serial/option.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c index ebe51f11105d..fe123153b1a5 100644 --- a/drivers/usb/serial/option.c +++ b/drivers/usb/serial/option.c @@ -2025,6 +2025,8 @@ static const struct usb_device_id option_ids[] = { { USB_DEVICE_INTERFACE_CLASS(0x2001, 0x7d04, 0xff) }, /* D-Link DWM-158 */ { USB_DEVICE_INTERFACE_CLASS(0x2001, 0x7e19, 0xff), /* D-Link DWM-221 B1 */ .driver_info = (kernel_ulong_t)&net_intf4_blacklist }, + { USB_DEVICE_INTERFACE_CLASS(0x2001, 0x7e35, 0xff), /* D-Link DWM-222 */ + .driver_info = (kernel_ulong_t)&net_intf4_blacklist }, { USB_DEVICE_AND_INTERFACE_INFO(0x07d1, 0x3e01, 0xff, 0xff, 0xff) }, /* D-Link DWM-152/C1 */ { USB_DEVICE_AND_INTERFACE_INFO(0x07d1, 0x3e02, 0xff, 0xff, 0xff) }, /* D-Link DWM-156/C1 */ { USB_DEVICE_AND_INTERFACE_INFO(0x07d1, 0x7e11, 0xff, 0xff, 0xff) }, /* D-Link DWM-156/A3 */ -- cgit v1.2.3 From d490c9cd2f67399e1dbc951f190d03724b81d0c8 Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Thu, 29 Jun 2017 14:49:59 +0530 Subject: drm/msm/mdp5: Fix compilation warnings Following compilation warnings were observed for these files: CC [M] drivers/gpu/drm/msm/mdp/mdp5/mdp5_mdss.o drivers/gpu/drm/msm/mdp/mdp5/mdp5_crtc.c: In function 'blend_setup': drivers/gpu/drm/msm/mdp/mdp5/mdp5_crtc.c:223:7: warning: missing braces around initializer [-Wmissing-braces] enum mdp5_pipe stage[STAGE_MAX + 1][MAX_PIPE_STAGE] = { SSPP_NONE }; ^ drivers/gpu/drm/msm/mdp/mdp5/mdp5_crtc.c:223:7: warning: (near initialization for 'stage[0]') [-Wmissing-braces] drivers/gpu/drm/msm/mdp/mdp5/mdp5_crtc.c:224:7: warning: missing braces around initializer [-Wmissing-braces] enum mdp5_pipe r_stage[STAGE_MAX + 1][MAX_PIPE_STAGE] = { SSPP_NONE }; ^ drivers/gpu/drm/msm/mdp/mdp5/mdp5_crtc.c:224:7: warning: (near initialization for 'r_stage[0]') [-Wmissing-braces] drivers/gpu/drm/msm/mdp/mdp5/mdp5_plane.c: In function 'mdp5_plane_mode_set': drivers/gpu/drm/msm/mdp/mdp5/mdp5_plane.c:892:9: warning: missing braces around initializer [-Wmissing-braces] struct phase_step step = { 0 }; ^ drivers/gpu/drm/msm/mdp/mdp5/mdp5_plane.c:892:9: warning: (near initialization for 'step.x') [-Wmissing-braces] drivers/gpu/drm/msm/mdp/mdp5/mdp5_plane.c:893:9: warning: missing braces around initializer [-Wmissing-braces] struct pixel_ext pe = { 0 }; ^ drivers/gpu/drm/msm/mdp/mdp5/mdp5_plane.c:893:9: warning: (near initialization for 'pe.left') [-Wmissing-braces] This happens because in the first case we were initializing a two dimensional array with {0} and in the second case we were initializing a struct containing two arrays with {0}. Fix them by adding another pair of {}. Signed-off-by: Viresh Kumar Signed-off-by: Rob Clark --- drivers/gpu/drm/msm/mdp/mdp5/mdp5_crtc.c | 4 ++-- drivers/gpu/drm/msm/mdp/mdp5/mdp5_plane.c | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/msm/mdp/mdp5/mdp5_crtc.c b/drivers/gpu/drm/msm/mdp/mdp5/mdp5_crtc.c index cb5415d6c04b..62a0fb377e7a 100644 --- a/drivers/gpu/drm/msm/mdp/mdp5/mdp5_crtc.c +++ b/drivers/gpu/drm/msm/mdp/mdp5/mdp5_crtc.c @@ -221,8 +221,8 @@ static void blend_setup(struct drm_crtc *crtc) struct mdp5_ctl *ctl = mdp5_cstate->ctl; uint32_t blend_op, fg_alpha, bg_alpha, ctl_blend_flags = 0; unsigned long flags; - enum mdp5_pipe stage[STAGE_MAX + 1][MAX_PIPE_STAGE] = { SSPP_NONE }; - enum mdp5_pipe r_stage[STAGE_MAX + 1][MAX_PIPE_STAGE] = { SSPP_NONE }; + enum mdp5_pipe stage[STAGE_MAX + 1][MAX_PIPE_STAGE] = { { SSPP_NONE } }; + enum mdp5_pipe r_stage[STAGE_MAX + 1][MAX_PIPE_STAGE] = { { SSPP_NONE } }; int i, plane_cnt = 0; bool bg_alpha_enabled = false; u32 mixer_op_mode = 0; diff --git a/drivers/gpu/drm/msm/mdp/mdp5/mdp5_plane.c b/drivers/gpu/drm/msm/mdp/mdp5/mdp5_plane.c index fe3a4de1a433..61f39c86dd09 100644 --- a/drivers/gpu/drm/msm/mdp/mdp5/mdp5_plane.c +++ b/drivers/gpu/drm/msm/mdp/mdp5/mdp5_plane.c @@ -890,8 +890,8 @@ static int mdp5_plane_mode_set(struct drm_plane *plane, struct mdp5_hw_pipe *right_hwpipe; const struct mdp_format *format; uint32_t nplanes, config = 0; - struct phase_step step = { 0 }; - struct pixel_ext pe = { 0 }; + struct phase_step step = { { 0 } }; + struct pixel_ext pe = { { 0 } }; uint32_t hdecm = 0, vdecm = 0; uint32_t pix_format; unsigned int rotation; -- cgit v1.2.3 From 65e93108891e571f177c202add9288eda9ac4100 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Fri, 30 Jun 2017 10:59:15 +0300 Subject: drm/msm: fix an integer overflow test We recently added an integer overflow check but it needs an additional tweak to work properly on 32 bit systems. The problem is that we're doing the right hand side of the assignment as type unsigned long so the max it will have an integer overflow instead of being larger than SIZE_MAX. That means the "sz > SIZE_MAX" condition is never true even on 32 bit systems. We need to first cast it to u64 and then do the math. Fixes: 4a630fadbb29 ("drm/msm: Fix potential buffer overflow issue") Signed-off-by: Dan Carpenter Acked-by: Jordan Crouse Signed-off-by: Rob Clark --- drivers/gpu/drm/msm/msm_gem_submit.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/msm/msm_gem_submit.c b/drivers/gpu/drm/msm/msm_gem_submit.c index 6bfca7470141..8095658e8cb4 100644 --- a/drivers/gpu/drm/msm/msm_gem_submit.c +++ b/drivers/gpu/drm/msm/msm_gem_submit.c @@ -34,8 +34,8 @@ static struct msm_gem_submit *submit_create(struct drm_device *dev, struct msm_gpu *gpu, uint32_t nr_bos, uint32_t nr_cmds) { struct msm_gem_submit *submit; - uint64_t sz = sizeof(*submit) + (nr_bos * sizeof(submit->bos[0])) + - (nr_cmds * sizeof(submit->cmd[0])); + uint64_t sz = sizeof(*submit) + ((u64)nr_bos * sizeof(submit->bos[0])) + + ((u64)nr_cmds * sizeof(submit->cmd[0])); if (sz > SIZE_MAX) return NULL; -- cgit v1.2.3 From 71e3dfa167b105d3c06e76fee1d0e2fd1e502cf6 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Mon, 10 Jul 2017 10:20:42 +0300 Subject: drm/msm: unlock on error in msm_gem_get_iova() We recently added locking to this function but there was a direct return that was overlooked where we need to unlock. Fixes: 0e08270a1f01 ("drm/msm: Separate locking of buffer resources from struct_mutex") Signed-off-by: Dan Carpenter Signed-off-by: Rob Clark --- drivers/gpu/drm/msm/msm_gem.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/msm/msm_gem.c b/drivers/gpu/drm/msm/msm_gem.c index 65f35544c1ec..065d933df2c3 100644 --- a/drivers/gpu/drm/msm/msm_gem.c +++ b/drivers/gpu/drm/msm/msm_gem.c @@ -383,8 +383,10 @@ int msm_gem_get_iova(struct drm_gem_object *obj, struct page **pages; vma = add_vma(obj, aspace); - if (IS_ERR(vma)) - return PTR_ERR(vma); + if (IS_ERR(vma)) { + ret = PTR_ERR(vma); + goto unlock; + } pages = get_pages(obj); if (IS_ERR(pages)) { @@ -405,7 +407,7 @@ int msm_gem_get_iova(struct drm_gem_object *obj, fail: del_vma(vma); - +unlock: mutex_unlock(&msm_obj->lock); return ret; } -- cgit v1.2.3 From af1f5f12c21bd9dc08f578d86adc192eec4eb28a Mon Sep 17 00:00:00 2001 From: Rob Clark Date: Mon, 3 Jul 2017 13:15:57 -0400 Subject: drm/msm/mdp5: fix unclocked register access in _cursor_set() Fixes an insta-reboot when screen-blanking kicks in, due to cursor updates without clocks enabled. Signed-off-by: Rob Clark --- drivers/gpu/drm/msm/mdp/mdp5/mdp5_crtc.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/gpu/drm/msm/mdp/mdp5/mdp5_crtc.c b/drivers/gpu/drm/msm/mdp/mdp5/mdp5_crtc.c index 62a0fb377e7a..735a87a699fa 100644 --- a/drivers/gpu/drm/msm/mdp/mdp5/mdp5_crtc.c +++ b/drivers/gpu/drm/msm/mdp/mdp5/mdp5_crtc.c @@ -753,6 +753,7 @@ static int mdp5_crtc_cursor_set(struct drm_crtc *crtc, if (!handle) { DBG("Cursor off"); cursor_enable = false; + mdp5_enable(mdp5_kms); goto set_cursor; } @@ -776,6 +777,8 @@ static int mdp5_crtc_cursor_set(struct drm_crtc *crtc, get_roi(crtc, &roi_w, &roi_h); + mdp5_enable(mdp5_kms); + mdp5_write(mdp5_kms, REG_MDP5_LM_CURSOR_STRIDE(lm), stride); mdp5_write(mdp5_kms, REG_MDP5_LM_CURSOR_FORMAT(lm), MDP5_LM_CURSOR_FORMAT_FORMAT(CURSOR_FMT_ARGB8888)); @@ -804,6 +807,7 @@ set_cursor: crtc_flush(crtc, flush_mask); end: + mdp5_disable(mdp5_kms); if (old_bo) { drm_flip_work_queue(&mdp5_crtc->unref_cursor_work, old_bo); /* enable vblank to complete cursor work: */ @@ -836,6 +840,8 @@ static int mdp5_crtc_cursor_move(struct drm_crtc *crtc, int x, int y) get_roi(crtc, &roi_w, &roi_h); + mdp5_enable(mdp5_kms); + spin_lock_irqsave(&mdp5_crtc->cursor.lock, flags); mdp5_write(mdp5_kms, REG_MDP5_LM_CURSOR_SIZE(lm), MDP5_LM_CURSOR_SIZE_ROI_H(roi_h) | @@ -847,6 +853,8 @@ static int mdp5_crtc_cursor_move(struct drm_crtc *crtc, int x, int y) crtc_flush(crtc, flush_mask); + mdp5_disable(mdp5_kms); + return 0; } -- cgit v1.2.3 From d4cea38ebb4de90913085391ce4febde1a4ba9aa Mon Sep 17 00:00:00 2001 From: Archit Taneja Date: Wed, 12 Jul 2017 15:09:55 +0530 Subject: drm/msm/dsi: Calculate link clock rates with updated dsi->lanes After the commit mentioned below, we start computing the byte and pixel clocks (dsi_calc_clk_rate) in the DSI bridge's mode_set() op. The calculation involves the number of DSI lanes being used by the downstream bridge/panel. If the downstream bridge/panel tries to change the number of DSI lanes (as done in the ADV7533 driver) in its mode_set() op, then our DSI host driver will not have the correct number of lanes when computing byte/pixel clocks. Fix this by delaying the clock rate calculation in the DSI bridge enable path. In particular, compute the clock rates in msm_dsi_host_get_phy_clk_req(). This fixes the DSI host error interrupts seen when we try to switch between modes that require different number of lanes (4 to 3 lanes, or vice versa) on db410c. The error interrupts occur since the byte/pixel clock rates aren't according to what the DSI video mode timing engine expects. Fixes: b62aa70a98c5 ("drm/msm/dsi: Move PHY operations out of host") Signed-off-by: Archit Taneja Signed-off-by: Rob Clark --- drivers/gpu/drm/msm/dsi/dsi_host.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/msm/dsi/dsi_host.c b/drivers/gpu/drm/msm/dsi/dsi_host.c index 9e9c5696bc03..c7b612c3d771 100644 --- a/drivers/gpu/drm/msm/dsi/dsi_host.c +++ b/drivers/gpu/drm/msm/dsi/dsi_host.c @@ -2137,6 +2137,13 @@ void msm_dsi_host_get_phy_clk_req(struct mipi_dsi_host *host, struct msm_dsi_phy_clk_request *clk_req) { struct msm_dsi_host *msm_host = to_msm_dsi_host(host); + int ret; + + ret = dsi_calc_clk_rate(msm_host); + if (ret) { + pr_err("%s: unable to calc clk rate, %d\n", __func__, ret); + return; + } clk_req->bitclk_rate = msm_host->byte_clk_rate * 8; clk_req->escclk_rate = msm_host->esc_clk_rate; @@ -2280,7 +2287,6 @@ int msm_dsi_host_set_display_mode(struct mipi_dsi_host *host, struct drm_display_mode *mode) { struct msm_dsi_host *msm_host = to_msm_dsi_host(host); - int ret; if (msm_host->mode) { drm_mode_destroy(msm_host->dev, msm_host->mode); @@ -2293,12 +2299,6 @@ int msm_dsi_host_set_display_mode(struct mipi_dsi_host *host, return -ENOMEM; } - ret = dsi_calc_clk_rate(msm_host); - if (ret) { - pr_err("%s: unable to calc clk rate, %d\n", __func__, ret); - return ret; - } - return 0; } -- cgit v1.2.3 From d9535cb7b7603aeb549c697ecdf92024e4d0a650 Mon Sep 17 00:00:00 2001 From: Grygorii Strashko Date: Fri, 28 Jul 2017 17:30:02 -0500 Subject: ptp: introduce ptp auxiliary worker Many PTP drivers required to perform some asynchronous or periodic work, like periodically handling PHC counter overflow or handle delayed timestamp for RX/TX network packets. In most of the cases, such work is implemented using workqueues. Unfortunately, Kernel workqueues might introduce significant delay in work scheduling under high system load and on -RT, which could cause misbehavior of PTP drivers due to internal counter overflow, for example, and there is no way to tune its execution policy and priority manuallly. Hence, The kthread_worker can be used insted of workqueues, as it create separte named kthread for each worker and its its execution policy and priority can be configured using chrt tool. This prblem was reported for two drivers TI CPSW CPTS and dp83640, so instead of modifying each of these driver it was proposed to add PTP auxiliary worker to the PHC subsystem. The patch adds PTP auxiliary worker in PHC subsystem using kthread_worker and kthread_delayed_work and introduces two new PHC subsystem APIs: - long (*do_aux_work)(struct ptp_clock_info *ptp) callback in ptp_clock_info structure, which driver should assign if it require to perform asynchronous or periodic work. Driver should return the delay of the PTP next auxiliary work scheduling time (>=0) or negative value in case further scheduling is not required. - int ptp_schedule_worker(struct ptp_clock *ptp, unsigned long delay) which allows schedule PTP auxiliary work. The name of kthread_worker thread corresponds PTP PHC device name "ptp%d". Signed-off-by: Grygorii Strashko Signed-off-by: David S. Miller --- drivers/ptp/ptp_clock.c | 42 ++++++++++++++++++++++++++++++++++++++++ drivers/ptp/ptp_private.h | 3 +++ include/linux/ptp_clock_kernel.h | 20 +++++++++++++++++++ 3 files changed, 65 insertions(+) diff --git a/drivers/ptp/ptp_clock.c b/drivers/ptp/ptp_clock.c index b77435783ef3..7eacc1c4b3b1 100644 --- a/drivers/ptp/ptp_clock.c +++ b/drivers/ptp/ptp_clock.c @@ -28,6 +28,7 @@ #include #include #include +#include #include "ptp_private.h" @@ -184,6 +185,19 @@ static void delete_ptp_clock(struct posix_clock *pc) kfree(ptp); } +static void ptp_aux_kworker(struct kthread_work *work) +{ + struct ptp_clock *ptp = container_of(work, struct ptp_clock, + aux_work.work); + struct ptp_clock_info *info = ptp->info; + long delay; + + delay = info->do_aux_work(info); + + if (delay >= 0) + kthread_queue_delayed_work(ptp->kworker, &ptp->aux_work, delay); +} + /* public interface */ struct ptp_clock *ptp_clock_register(struct ptp_clock_info *info, @@ -217,6 +231,20 @@ struct ptp_clock *ptp_clock_register(struct ptp_clock_info *info, mutex_init(&ptp->pincfg_mux); init_waitqueue_head(&ptp->tsev_wq); + if (ptp->info->do_aux_work) { + char *worker_name = kasprintf(GFP_KERNEL, "ptp%d", ptp->index); + + kthread_init_delayed_work(&ptp->aux_work, ptp_aux_kworker); + ptp->kworker = kthread_create_worker(0, worker_name ? + worker_name : info->name); + kfree(worker_name); + if (IS_ERR(ptp->kworker)) { + err = PTR_ERR(ptp->kworker); + pr_err("failed to create ptp aux_worker %d\n", err); + goto kworker_err; + } + } + err = ptp_populate_pin_groups(ptp); if (err) goto no_pin_groups; @@ -259,6 +287,9 @@ no_pps: no_device: ptp_cleanup_pin_groups(ptp); no_pin_groups: + if (ptp->kworker) + kthread_destroy_worker(ptp->kworker); +kworker_err: mutex_destroy(&ptp->tsevq_mux); mutex_destroy(&ptp->pincfg_mux); ida_simple_remove(&ptp_clocks_map, index); @@ -274,6 +305,11 @@ int ptp_clock_unregister(struct ptp_clock *ptp) ptp->defunct = 1; wake_up_interruptible(&ptp->tsev_wq); + if (ptp->kworker) { + kthread_cancel_delayed_work_sync(&ptp->aux_work); + kthread_destroy_worker(ptp->kworker); + } + /* Release the clock's resources. */ if (ptp->pps_source) pps_unregister_source(ptp->pps_source); @@ -339,6 +375,12 @@ int ptp_find_pin(struct ptp_clock *ptp, } EXPORT_SYMBOL(ptp_find_pin); +int ptp_schedule_worker(struct ptp_clock *ptp, unsigned long delay) +{ + return kthread_mod_delayed_work(ptp->kworker, &ptp->aux_work, delay); +} +EXPORT_SYMBOL(ptp_schedule_worker); + /* module operations */ static void __exit ptp_exit(void) diff --git a/drivers/ptp/ptp_private.h b/drivers/ptp/ptp_private.h index d95888974d0c..b86f1bfecd6f 100644 --- a/drivers/ptp/ptp_private.h +++ b/drivers/ptp/ptp_private.h @@ -22,6 +22,7 @@ #include #include +#include #include #include #include @@ -56,6 +57,8 @@ struct ptp_clock { struct attribute_group pin_attr_group; /* 1st entry is a pointer to the real group, 2nd is NULL terminator */ const struct attribute_group *pin_attr_groups[2]; + struct kthread_worker *kworker; + struct kthread_delayed_work aux_work; }; /* diff --git a/include/linux/ptp_clock_kernel.h b/include/linux/ptp_clock_kernel.h index a026bfd089db..51349d124ee5 100644 --- a/include/linux/ptp_clock_kernel.h +++ b/include/linux/ptp_clock_kernel.h @@ -99,6 +99,11 @@ struct system_device_crosststamp; * parameter func: the desired function to use. * parameter chan: the function channel index to use. * + * @do_work: Request driver to perform auxiliary (periodic) operations + * Driver should return delay of the next auxiliary work scheduling + * time (>=0) or negative value in case further scheduling + * is not required. + * * Drivers should embed their ptp_clock_info within a private * structure, obtaining a reference to it using container_of(). * @@ -126,6 +131,7 @@ struct ptp_clock_info { struct ptp_clock_request *request, int on); int (*verify)(struct ptp_clock_info *ptp, unsigned int pin, enum ptp_pin_function func, unsigned int chan); + long (*do_aux_work)(struct ptp_clock_info *ptp); }; struct ptp_clock; @@ -211,6 +217,16 @@ extern int ptp_clock_index(struct ptp_clock *ptp); int ptp_find_pin(struct ptp_clock *ptp, enum ptp_pin_function func, unsigned int chan); +/** + * ptp_schedule_worker() - schedule ptp auxiliary work + * + * @ptp: The clock obtained from ptp_clock_register(). + * @delay: number of jiffies to wait before queuing + * See kthread_queue_delayed_work() for more info. + */ + +int ptp_schedule_worker(struct ptp_clock *ptp, unsigned long delay); + #else static inline struct ptp_clock *ptp_clock_register(struct ptp_clock_info *info, struct device *parent) @@ -225,6 +241,10 @@ static inline int ptp_clock_index(struct ptp_clock *ptp) static inline int ptp_find_pin(struct ptp_clock *ptp, enum ptp_pin_function func, unsigned int chan) { return -1; } +static inline int ptp_schedule_worker(struct ptp_clock *ptp, + unsigned long delay) +{ return -EOPNOTSUPP; } + #endif #endif -- cgit v1.2.3 From 999f129289ab1599e7cdc08804a9b5ec4e1418f4 Mon Sep 17 00:00:00 2001 From: Grygorii Strashko Date: Fri, 28 Jul 2017 17:30:03 -0500 Subject: net: ethernet: ti: cpts: convert to use ptp auxiliary worker There could be significant delay in CPTS work schedule under high system load and on -RT which could cause CPTS misbehavior due to internal counter overflow. Usage of own kthread_worker allows to avoid such kind of issues and makes it possible to tune priority of CPTS kthread_worker thread on -RT (thread name "cpts"). Hence, the CPTS driver is converted to use PTP auxiliary worker as PHC subsystem implements such functionality in a generic way now. Signed-off-by: Grygorii Strashko Signed-off-by: David S. Miller --- drivers/net/ethernet/ti/cpts.c | 27 +++++++++++++-------------- drivers/net/ethernet/ti/cpts.h | 1 - 2 files changed, 13 insertions(+), 15 deletions(-) diff --git a/drivers/net/ethernet/ti/cpts.c b/drivers/net/ethernet/ti/cpts.c index 32279d21c836..3ed438ef9e1e 100644 --- a/drivers/net/ethernet/ti/cpts.c +++ b/drivers/net/ethernet/ti/cpts.c @@ -224,6 +224,17 @@ static int cpts_ptp_enable(struct ptp_clock_info *ptp, return -EOPNOTSUPP; } +static long cpts_overflow_check(struct ptp_clock_info *ptp) +{ + struct cpts *cpts = container_of(ptp, struct cpts, info); + unsigned long delay = cpts->ov_check_period; + struct timespec64 ts; + + cpts_ptp_gettime(&cpts->info, &ts); + pr_debug("cpts overflow check at %lld.%09lu\n", ts.tv_sec, ts.tv_nsec); + return (long)delay; +} + static struct ptp_clock_info cpts_info = { .owner = THIS_MODULE, .name = "CTPS timer", @@ -236,18 +247,9 @@ static struct ptp_clock_info cpts_info = { .gettime64 = cpts_ptp_gettime, .settime64 = cpts_ptp_settime, .enable = cpts_ptp_enable, + .do_aux_work = cpts_overflow_check, }; -static void cpts_overflow_check(struct work_struct *work) -{ - struct timespec64 ts; - struct cpts *cpts = container_of(work, struct cpts, overflow_work.work); - - cpts_ptp_gettime(&cpts->info, &ts); - pr_debug("cpts overflow check at %lld.%09lu\n", ts.tv_sec, ts.tv_nsec); - schedule_delayed_work(&cpts->overflow_work, cpts->ov_check_period); -} - static int cpts_match(struct sk_buff *skb, unsigned int ptp_class, u16 ts_seqid, u8 ts_msgtype) { @@ -378,7 +380,7 @@ int cpts_register(struct cpts *cpts) } cpts->phc_index = ptp_clock_index(cpts->clock); - schedule_delayed_work(&cpts->overflow_work, cpts->ov_check_period); + ptp_schedule_worker(cpts->clock, cpts->ov_check_period); return 0; err_ptp: @@ -392,8 +394,6 @@ void cpts_unregister(struct cpts *cpts) if (WARN_ON(!cpts->clock)) return; - cancel_delayed_work_sync(&cpts->overflow_work); - ptp_clock_unregister(cpts->clock); cpts->clock = NULL; @@ -476,7 +476,6 @@ struct cpts *cpts_create(struct device *dev, void __iomem *regs, cpts->dev = dev; cpts->reg = (struct cpsw_cpts __iomem *)regs; spin_lock_init(&cpts->lock); - INIT_DELAYED_WORK(&cpts->overflow_work, cpts_overflow_check); ret = cpts_of_parse(cpts, node); if (ret) diff --git a/drivers/net/ethernet/ti/cpts.h b/drivers/net/ethernet/ti/cpts.h index 01ea82ba9cdc..586edd9c7de0 100644 --- a/drivers/net/ethernet/ti/cpts.h +++ b/drivers/net/ethernet/ti/cpts.h @@ -119,7 +119,6 @@ struct cpts { u32 cc_mult; /* for the nominal frequency */ struct cyclecounter cc; struct timecounter tc; - struct delayed_work overflow_work; int phc_index; struct clk *refclk; struct list_head events; -- cgit v1.2.3 From 0d5f54fec0a18eea5e4ac005646fd2bc2118636c Mon Sep 17 00:00:00 2001 From: Grygorii Strashko Date: Fri, 28 Jul 2017 17:30:04 -0500 Subject: net: ethernet: ti: cpts: fix tx timestamping timeout With the low speed Ethernet connection CPDMA notification about packet processing can be received before CPTS TX timestamp event, which is set when packet actually left CPSW while cpdma notification is sent when packet pushed in CPSW fifo. As result, when connection is slow and CPU is fast enough TX timestamping is not working properly. Fix it, by introducing TX SKB queue to store PTP SKBs for which Ethernet Transmit Event hasn't been received yet and then re-check this queue with new Ethernet Transmit Events by scheduling CPTS overflow work more often (every 1 jiffies) until TX SKB queue is not empty. Side effect of this change is: - User space tools require to take into account possible delay in TX timestamp processing (for example ptp4l works with tx_timestamp_timeout=400 under net traffic and tx_timestamp_timeout=25 in idle). Signed-off-by: Grygorii Strashko Signed-off-by: David S. Miller --- drivers/net/ethernet/ti/cpts.c | 84 +++++++++++++++++++++++++++++++++++++++++- drivers/net/ethernet/ti/cpts.h | 1 + 2 files changed, 83 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/ti/cpts.c b/drivers/net/ethernet/ti/cpts.c index 3ed438ef9e1e..95a0076773b3 100644 --- a/drivers/net/ethernet/ti/cpts.c +++ b/drivers/net/ethernet/ti/cpts.c @@ -31,9 +31,18 @@ #include "cpts.h" +#define CPTS_SKB_TX_WORK_TIMEOUT 1 /* jiffies */ + +struct cpts_skb_cb_data { + unsigned long tmo; +}; + #define cpts_read32(c, r) readl_relaxed(&c->reg->r) #define cpts_write32(c, v, r) writel_relaxed(v, &c->reg->r) +static int cpts_match(struct sk_buff *skb, unsigned int ptp_class, + u16 ts_seqid, u8 ts_msgtype); + static int event_expired(struct cpts_event *event) { return time_after(jiffies, event->tmo); @@ -77,6 +86,47 @@ static int cpts_purge_events(struct cpts *cpts) return removed ? 0 : -1; } +static bool cpts_match_tx_ts(struct cpts *cpts, struct cpts_event *event) +{ + struct sk_buff *skb, *tmp; + u16 seqid; + u8 mtype; + bool found = false; + + mtype = (event->high >> MESSAGE_TYPE_SHIFT) & MESSAGE_TYPE_MASK; + seqid = (event->high >> SEQUENCE_ID_SHIFT) & SEQUENCE_ID_MASK; + + /* no need to grab txq.lock as access is always done under cpts->lock */ + skb_queue_walk_safe(&cpts->txq, skb, tmp) { + struct skb_shared_hwtstamps ssh; + unsigned int class = ptp_classify_raw(skb); + struct cpts_skb_cb_data *skb_cb = + (struct cpts_skb_cb_data *)skb->cb; + + if (cpts_match(skb, class, seqid, mtype)) { + u64 ns = timecounter_cyc2time(&cpts->tc, event->low); + + memset(&ssh, 0, sizeof(ssh)); + ssh.hwtstamp = ns_to_ktime(ns); + skb_tstamp_tx(skb, &ssh); + found = true; + __skb_unlink(skb, &cpts->txq); + dev_consume_skb_any(skb); + dev_dbg(cpts->dev, "match tx timestamp mtype %u seqid %04x\n", + mtype, seqid); + } else if (time_after(jiffies, skb_cb->tmo)) { + /* timeout any expired skbs over 1s */ + dev_dbg(cpts->dev, + "expiring tx timestamp mtype %u seqid %04x\n", + mtype, seqid); + __skb_unlink(skb, &cpts->txq); + dev_consume_skb_any(skb); + } + } + + return found; +} + /* * Returns zero if matching event type was found. */ @@ -101,9 +151,15 @@ static int cpts_fifo_read(struct cpts *cpts, int match) event->low = lo; type = event_type(event); switch (type) { + case CPTS_EV_TX: + if (cpts_match_tx_ts(cpts, event)) { + /* if the new event matches an existing skb, + * then don't queue it + */ + break; + } case CPTS_EV_PUSH: case CPTS_EV_RX: - case CPTS_EV_TX: list_del_init(&event->list); list_add_tail(&event->list, &cpts->events); break; @@ -229,8 +285,15 @@ static long cpts_overflow_check(struct ptp_clock_info *ptp) struct cpts *cpts = container_of(ptp, struct cpts, info); unsigned long delay = cpts->ov_check_period; struct timespec64 ts; + unsigned long flags; + + spin_lock_irqsave(&cpts->lock, flags); + ts = ns_to_timespec64(timecounter_read(&cpts->tc)); + + if (!skb_queue_empty(&cpts->txq)) + delay = CPTS_SKB_TX_WORK_TIMEOUT; + spin_unlock_irqrestore(&cpts->lock, flags); - cpts_ptp_gettime(&cpts->info, &ts); pr_debug("cpts overflow check at %lld.%09lu\n", ts.tv_sec, ts.tv_nsec); return (long)delay; } @@ -319,6 +382,19 @@ static u64 cpts_find_ts(struct cpts *cpts, struct sk_buff *skb, int ev_type) break; } } + + if (ev_type == CPTS_EV_TX && !ns) { + struct cpts_skb_cb_data *skb_cb = + (struct cpts_skb_cb_data *)skb->cb; + /* Not found, add frame to queue for processing later. + * The periodic FIFO check will handle this. + */ + skb_get(skb); + /* get the timestamp for timeouts */ + skb_cb->tmo = jiffies + msecs_to_jiffies(100); + __skb_queue_tail(&cpts->txq, skb); + ptp_schedule_worker(cpts->clock, 0); + } spin_unlock_irqrestore(&cpts->lock, flags); return ns; @@ -360,6 +436,7 @@ int cpts_register(struct cpts *cpts) { int err, i; + skb_queue_head_init(&cpts->txq); INIT_LIST_HEAD(&cpts->events); INIT_LIST_HEAD(&cpts->pool); for (i = 0; i < CPTS_MAX_EVENTS; i++) @@ -400,6 +477,9 @@ void cpts_unregister(struct cpts *cpts) cpts_write32(cpts, 0, int_enable); cpts_write32(cpts, 0, control); + /* Drop all packet */ + skb_queue_purge(&cpts->txq); + clk_disable(cpts->refclk); } EXPORT_SYMBOL_GPL(cpts_unregister); diff --git a/drivers/net/ethernet/ti/cpts.h b/drivers/net/ethernet/ti/cpts.h index 586edd9c7de0..73d73faf0f38 100644 --- a/drivers/net/ethernet/ti/cpts.h +++ b/drivers/net/ethernet/ti/cpts.h @@ -125,6 +125,7 @@ struct cpts { struct list_head pool; struct cpts_event pool_data[CPTS_MAX_EVENTS]; unsigned long ov_check_period; + struct sk_buff_head txq; }; void cpts_rx_timestamp(struct cpts *cpts, struct sk_buff *skb); -- cgit v1.2.3 From a93439cce2c1d31469e8245c504dbb6d1bed8749 Mon Sep 17 00:00:00 2001 From: Grygorii Strashko Date: Fri, 28 Jul 2017 17:30:05 -0500 Subject: net: ethernet: ti: cpts: fix fifo read in cpts_find_ts Now the call chain cpts_find_ts() |- cpts_fifo_read(cpts, CPTS_EV_PUSH) will stop reading CPTS FIFO if PUSH event is found. But this is not expected and CPTS FIFI should be completely drained here. This is most probably copy-paste error and it has no negative impact as CPTS_EV_PUSH should not be present in FIFO without TS_PUSH request and cpts_systim_read() and cpts_find_ts() synchronized by spin_lock. Correct above by calling cpts_fifo_read() with -1 parameter, so it will read all CPTS event from FIFO. Signed-off-by: Grygorii Strashko Signed-off-by: David S. Miller --- drivers/net/ethernet/ti/cpts.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/ti/cpts.c b/drivers/net/ethernet/ti/cpts.c index 95a0076773b3..c2121d214f08 100644 --- a/drivers/net/ethernet/ti/cpts.c +++ b/drivers/net/ethernet/ti/cpts.c @@ -364,7 +364,7 @@ static u64 cpts_find_ts(struct cpts *cpts, struct sk_buff *skb, int ev_type) return 0; spin_lock_irqsave(&cpts->lock, flags); - cpts_fifo_read(cpts, CPTS_EV_PUSH); + cpts_fifo_read(cpts, -1); list_for_each_safe(this, next, &cpts->events) { event = list_entry(this, struct cpts_event, list); if (event_expired(event)) { -- cgit v1.2.3 From 40413955ee265a5e42f710940ec78f5450d49149 Mon Sep 17 00:00:00 2001 From: "yujuan.qi" Date: Mon, 31 Jul 2017 11:23:01 +0800 Subject: Cipso: cipso_v4_optptr enter infinite loop in for(),if((optlen > 0) && (optptr[1] == 0)), enter infinite loop. Test: receive a packet which the ip length > 20 and the first byte of ip option is 0, produce this issue Signed-off-by: yujuan.qi Acked-by: Paul Moore Signed-off-by: David S. Miller --- net/ipv4/cipso_ipv4.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/net/ipv4/cipso_ipv4.c b/net/ipv4/cipso_ipv4.c index c4c6e1969ed0..2ae8f54cb321 100644 --- a/net/ipv4/cipso_ipv4.c +++ b/net/ipv4/cipso_ipv4.c @@ -1523,9 +1523,17 @@ unsigned char *cipso_v4_optptr(const struct sk_buff *skb) int taglen; for (optlen = iph->ihl*4 - sizeof(struct iphdr); optlen > 0; ) { - if (optptr[0] == IPOPT_CIPSO) + switch (optptr[0]) { + case IPOPT_CIPSO: return optptr; - taglen = optptr[1]; + case IPOPT_END: + return NULL; + case IPOPT_NOOP: + taglen = 1; + break; + default: + taglen = optptr[1]; + } optlen -= taglen; optptr += taglen; } -- cgit v1.2.3 From b3949a9a3e09f87e0371e80a2bef6ec0d48b575d Mon Sep 17 00:00:00 2001 From: Hans Verkuil Date: Sun, 30 Jul 2017 14:42:36 +0200 Subject: drm/msm: fix WARN_ON in add_vma() with no iommu While I was testing the upcoming adv7533 CEC support with my Dragonboard c410 I encountered this warning several times during boot: [ 4.408309] WARNING: CPU: 3 PID: 1347 at drivers/gpu/drm/msm/msm_gem.c:312 add_vma+0x78/0x88 [msm] [ 4.412951] Modules linked in: snd_soc_hdmi_codec adv7511 cec qcom_wcnss_pil msm mdt_loader drm_kms_helper msm_rng rng_core drm [ 4.421728] CPU: 3 PID: 1347 Comm: kworker/3:3 Not tainted 4.13.0-rc1-dragonboard #111 [ 4.433090] Hardware name: Qualcomm Technologies, Inc. APQ 8016 SBC (DT) [ 4.441081] Workqueue: events deferred_probe_work_func [ 4.447929] task: ffff800031243600 task.stack: ffff800003394000 [ 4.453023] PC is at add_vma+0x78/0x88 [msm] [ 4.458823] LR is at _msm_gem_new+0xd4/0x188 [msm] [ 4.463207] pc : [] lr : [] pstate: 40000145 [ 4.467811] sp : ffff8000033978a0 [ 4.475357] x29: ffff8000033978a0 x28: ffff8000031dea18 [ 4.478572] x27: ffff800003933a00 x26: ffff800003b39800 [ 4.483953] x25: ffff8000338ff800 x24: 0000000000000001 [ 4.489249] x23: 0000000000000000 x22: ffff800003b39800 [ 4.494544] x21: ffff8000338ff800 x20: 0000000000000000 [ 4.499839] x19: ffff800003932600 x18: 0000000000000001 [ 4.505135] x17: 0000ffff8969e9e0 x16: ffff7e00000ce7a0 [ 4.510429] x15: ffffffffffffffff x14: ffff8000833977ef [ 4.515724] x13: ffff8000033977f3 x12: 0000000000000038 [ 4.521020] x11: 0101010101010101 x10: ffffff7f7fff7f7f [ 4.526315] x9 : 0000000000000000 x8 : ffff800003932800 [ 4.531633] x7 : 0000000000000000 x6 : 000000000000003f [ 4.531644] x5 : 0000000000000040 x4 : 0000000000000000 [ 4.531650] x3 : ffff800031243600 x2 : 0000000000000000 [ 4.531655] x1 : 0000000000000000 x0 : 0000000000000000 [ 4.531670] Call trace: [ 4.531676] Exception stack(0xffff8000033976c0 to 0xffff8000033977f0) [ 4.531683] 76c0: ffff800003932600 0001000000000000 ffff8000033978a0 ffff000000ac01f8 [ 4.531688] 76e0: 0000000000000140 0000000000000000 ffff800003932550 ffff800003397780 [ 4.531694] 7700: ffff800003397730 ffff000008261ce8 0000000000000000 ffff8000031d2f80 [ 4.531699] 7720: ffff800003397800 ffff0000081d671c 0000000000000140 0000000000000000 [ 4.531705] 7740: ffff000000ac04c0 0000000000004003 ffff800003397908 00000000014080c0 [ 4.531710] 7760: 0000000000000000 ffff800003b39800 0000000000000000 0000000000000000 [ 4.531716] 7780: 0000000000000000 ffff800031243600 0000000000000000 0000000000000040 [ 4.531721] 77a0: 000000000000003f 0000000000000000 ffff800003932800 0000000000000000 [ 4.531726] 77c0: ffffff7f7fff7f7f 0101010101010101 0000000000000038 ffff8000033977f3 [ 4.531730] 77e0: ffff8000833977ef ffffffffffffffff [ 4.531881] [] add_vma+0x78/0x88 [msm] [ 4.532011] [] _msm_gem_new+0xd4/0x188 [msm] [ 4.532134] [] msm_gem_new+0x10/0x18 [msm] [ 4.532260] [] msm_dsi_host_modeset_init+0x17c/0x268 [msm] [ 4.532384] [] msm_dsi_modeset_init+0x34/0x1b8 [msm] [ 4.532504] [] modeset_init+0x408/0x488 [msm] [ 4.532623] [] mdp5_kms_init+0x2b4/0x338 [msm] [ 4.532745] [] msm_drm_bind+0x218/0x4e8 [msm] [ 4.532755] [] try_to_bring_up_master+0x1f4/0x318 [ 4.532762] [] component_add+0x98/0x180 [ 4.532887] [] dsi_dev_probe+0x18/0x28 [msm] [ 4.532895] [] platform_drv_probe+0x58/0xc0 [ 4.532901] [] driver_probe_device+0x324/0x458 [ 4.532907] [] __device_attach_driver+0xac/0x170 [ 4.532913] [] bus_for_each_drv+0x4c/0x98 [ 4.532918] [] __device_attach+0xc0/0x160 [ 4.532924] [] device_initial_probe+0x10/0x18 [ 4.532929] [] bus_probe_device+0x94/0xa0 [ 4.532934] [] deferred_probe_work_func+0x8c/0xe8 [ 4.532941] [] process_one_work+0x1d4/0x330 [ 4.532946] [] worker_thread+0x48/0x468 [ 4.532952] [] kthread+0x12c/0x130 [ 4.532958] [] ret_from_fork+0x10/0x40 [ 4.532962] ---[ end trace b1ac6888ec40b0bb ]--- Signed-off-by: Hans Verkuil Signed-off-by: Rob Clark --- drivers/gpu/drm/msm/msm_gem.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/gpu/drm/msm/msm_gem.c b/drivers/gpu/drm/msm/msm_gem.c index 065d933df2c3..a0c60e738db8 100644 --- a/drivers/gpu/drm/msm/msm_gem.c +++ b/drivers/gpu/drm/msm/msm_gem.c @@ -930,8 +930,12 @@ static struct drm_gem_object *_msm_gem_new(struct drm_device *dev, if (use_vram) { struct msm_gem_vma *vma; struct page **pages; + struct msm_gem_object *msm_obj = to_msm_bo(obj); + + mutex_lock(&msm_obj->lock); vma = add_vma(obj, NULL); + mutex_unlock(&msm_obj->lock); if (IS_ERR(vma)) { ret = PTR_ERR(vma); goto fail; -- cgit v1.2.3 From 79687057c2880d871451f107548187e4853c38e6 Mon Sep 17 00:00:00 2001 From: Hans Verkuil Date: Sun, 30 Jul 2017 14:46:56 +0200 Subject: drm/msm: NULL pointer dereference in drivers/gpu/drm/msm/msm_gem_vma.c While I was testing the upcoming adv7533 CEC support with my Dragonboard c410 I encountered this NULL pointer dereference: [ 17.912822] Unable to handle kernel NULL pointer dereference at virtual address 000000e8 [ 17.917191] user pgtable: 4k pages, 48-bit VAs, pgd = ffff800030e9f000 [ 17.925249] [00000000000000e8] *pgd=00000000b0daf003, *pud=0000000000000000 [ 17.931650] Internal error: Oops: 96000005 [#1] PREEMPT SMP [ 17.938395] Modules linked in: btqcomsmd btqca arc4 wcn36xx mac80211 bluetooth cfg80211 ecdh_generic r8152 snd_soc_hdmi_codec adv7511 cec qcom_wcnss_pil msm mdt_loader drm_kms_helper msm_rng rng_core drm [ 17.943967] CPU: 0 PID: 1684 Comm: Xorg Tainted: G W 4.13.0-rc1-dragonboard #111 [ 17.962005] Hardware name: Qualcomm Technologies, Inc. APQ 8016 SBC (DT) [ 17.970685] task: ffff800031236c00 task.stack: ffff800033fbc000 [ 17.977582] PC is at msm_gem_unmap_vma+0x20/0x80 [msm] [ 17.983213] LR is at put_iova+0x60/0xb8 [msm] [ 17.988303] pc : [] lr : [] pstate: 20000145 [ 17.992733] sp : ffff800033fbfb30 [ 18.000193] x29: ffff800033fbfb30 x28: ffff800030b5f000 [ 18.003407] x27: 00000000000000b4 x26: ffff0000009f8cd8 [ 18.008789] x25: 0000000000000004 x24: dead000000000100 [ 18.014085] x23: dead000000000200 x22: ffff800030b5fd40 [ 18.019379] x21: ffff800030b5fc00 x20: 0000000000000000 [ 18.024675] x19: ffff80003082bf00 x18: 0000000000000000 [ 18.029970] x17: 0000ffffb3347e70 x16: ffff000008207638 [ 18.035265] x15: 0000000000000053 x14: 0000000000000000 [ 18.040560] x13: 0000000000000038 x12: 0101010101010101 [ 18.045855] x11: 7f7f7f7f7f7f7f7f x10: 0000000000000040 [ 18.051150] x9 : ffff800030b5f038 x8 : ffff800031657b50 [ 18.056446] x7 : ffff800031657b78 x6 : 0000000000000000 [ 18.061740] x5 : 0000000000000000 x4 : 00000000b5c01000 [ 18.067036] x3 : 0000000000000000 x2 : ffff8000337bf300 [ 18.072330] x1 : ffff80003082bf00 x0 : 0000000000000000 [ 18.077629] Process Xorg (pid: 1684, stack limit = 0xffff800033fbc000) [ 18.082925] Stack: (0xffff800033fbfb30 to 0xffff800033fc0000) [ 18.089262] fb20: ffff800033fbfb60 ffff000000ac07c8 [ 18.095081] fb40: ffff80003082bf00 ffff800030b5fc90 ffff800030b5fc00 ffff000000abf4a0 [ 18.102893] fb60: ffff800033fbfba0 ffff000000ac16b0 ffff800030b5fc00 ffff8000338ff870 [ 18.110706] fb80: ffff8000338ff800 ffff800030b5fc00 ffff800030b5fda8 ffff800033fbfd80 [ 18.118518] fba0: ffff800033fbfbe0 ffff0000009d4244 ffff800030b5fc00 ffff800030b5f038 [ 18.126332] fbc0: ffff800033fbfbd0 ffff800030b5fc00 ffff800030b5f038 ffff0000009d4840 [ 18.134144] fbe0: ffff800033fbfbf0 ffff0000009d4858 ffff800033fbfc10 ffff0000009d48e4 [ 18.141955] fc00: ffff800030b5fc00 ffff8000338ffd98 ffff800033fbfc30 ffff0000009d49a4 [ 18.149768] fc20: ffff800030b5fc00 ffff800030b5f000 ffff800033fbfc60 ffff0000009d4a4c [ 18.157581] fc40: ffff800030b5f050 ffff800030b5f000 0000000000000001 ffff800030b5fc00 [ 18.165394] fc60: ffff800033fbfca0 ffff0000009d4ab0 0000000000000018 ffff800030b5f000 [ 18.173206] fc80: ffff0000009efd28 ffff800033fbfd80 ffff8000338ff800 ffff0000009d56a8 [ 18.181019] fca0: ffff800033fbfcb0 ffff0000009efd54 ffff800033fbfcc0 ffff0000009d56c8 [ 18.188831] fcc0: ffff800033fbfd00 ffff0000009d58e0 ffff0000009fa6e0 00000000c00464b4 [ 18.196643] fce0: 0000000000000004 ffff80003082b400 0000ffffea1f0e00 0000000000000000 [ 18.204456] fd00: ffff800033fbfe00 ffff000008206f0c ffff80000335caf8 ffff80003082b400 [ 18.212269] fd20: 0000ffffea1f0e00 ffff80003082b400 00000000c00464b4 0000ffffea1f0e00 [ 18.220081] fd40: 0000000000000124 000000000000001d ffff0000089d2000 ffff800031236c00 [ 18.227894] fd60: ffff800033fbfd80 0000000000000004 ffff0000009efd28 ffff800033fbfd80 [ 18.235706] fd80: 0000000100000001 0000008000000001 0000001800000020 0000000000000001 [ 18.243518] fda0: 0000000100000000 0000000100000001 0000ffff00000000 0000ffff00000000 [ 18.251331] fdc0: 0000000000000124 0000000000000038 ffff0000089d2000 ffff800031236c00 [ 18.259144] fde0: ffff800033fbfe40 ffff000008214124 ffff800033fbfe30 ffff000008203290 [ 18.266956] fe00: ffff800033fbfe80 ffff0000082076b4 0000000000000000 ffff800030d8a000 [ 18.274768] fe20: ffff80003082b400 0000000000000016 ffff800033fbfe50 ffff0000081f0488 [ 18.282581] fe40: ffff800033fbfe80 ffff000008207678 0000000000000000 ffff80003082b400 [ 18.290393] fe60: ffff800033fbfe70 ffff0000082138b0 ffff800033fbfe80 ffff000008207658 [ 18.298207] fe80: 0000000000000000 ffff000008082f84 0000000000000000 0000800034a16000 [ 18.306017] fea0: ffffffffffffffff 0000ffffb3347e7c 0000000000000000 0000000000000015 [ 18.313832] fec0: 0000000000000016 00000000c00464b4 0000ffffea1f0e00 0000000000000001 [ 18.321643] fee0: 0000000000000020 0000000000000080 0000000000000001 0000000000000000 [ 18.329456] ff00: 000000000000001d 000000012692c5b0 0101010101010101 7f7f7f7f7f7f7f7f [ 18.337269] ff20: 0101010101010101 0000000000000038 0000000000000000 0000000000000053 [ 18.345082] ff40: 0000ffffb368b2b8 0000ffffb3347e70 0000000000000000 0000ffffb3847000 [ 18.352894] ff60: 0000ffffea1f0e00 00000000c00464b4 0000000000000016 0000ffffea1f0edc [ 18.360705] ff80: 000000012692ad20 0000000000000003 00000001214282e4 0000000121428388 [ 18.368518] ffa0: 0000000000000000 0000ffffea1f0da0 0000ffffb367185c 0000ffffea1f0da0 [ 18.376332] ffc0: 0000ffffb3347e7c 0000000000000000 0000000000000016 000000000000001d [ 18.384142] ffe0: 0000000000000000 0000000000000000 0000000000000000 0000000000000000 [ 18.391953] Call trace: [ 18.399760] Exception stack(0xffff800033fbf950 to 0xffff800033fbfa80) [ 18.402023] f940: ffff80003082bf00 0001000000000000 [ 18.408622] f960: ffff800033fbfb30 ffff000000ac2d58 0000000020000145 ffff8000338ffa78 [ 18.416435] f980: 0000000000000000 0000000000000000 ffff800033fbf9e0 ffff0000089afcf0 [ 18.424248] f9a0: ffff80000348f230 ffff8000338ffa78 0000000000000000 0000000000000000 [ 18.432060] f9c0: ffff8000338ffaa8 0000000000000001 ffff800033fbfb80 ffff0000009e8f38 [ 18.439872] f9e0: ffff800033fbfa10 ffff0000089a9ff8 0000000000000027 ffff80003082b918 [ 18.447684] fa00: 0000000000000000 ffff80003082bf00 ffff8000337bf300 0000000000000000 [ 18.455497] fa20: 00000000b5c01000 0000000000000000 0000000000000000 ffff800031657b78 [ 18.463310] fa40: ffff800031657b50 ffff800030b5f038 0000000000000040 7f7f7f7f7f7f7f7f [ 18.471122] fa60: 0101010101010101 0000000000000038 0000000000000000 0000000000000053 [ 18.479062] [] msm_gem_unmap_vma+0x20/0x80 [msm] [ 18.486862] [] put_iova+0x60/0xb8 [msm] [ 18.492938] [] msm_gem_free_object+0x60/0x198 [msm] [ 18.498432] [] drm_gem_object_free+0x1c/0x58 [drm] [ 18.504854] [] drm_gem_object_put_unlocked+0x90/0xa0 [drm] [ 18.511273] [] drm_gem_object_handle_put_unlocked+0x64/0xd0 [drm] [ 18.518300] [] drm_gem_object_release_handle+0x54/0x98 [drm] [ 18.525679] [] drm_gem_handle_delete+0x64/0xb8 [drm] [ 18.532968] [] drm_gem_dumb_destroy+0x10/0x18 [drm] [ 18.539479] [] drm_mode_destroy_dumb_ioctl+0x2c/0x40 [drm] [ 18.545992] [] drm_ioctl_kernel+0x68/0xe0 [drm] [ 18.553105] [] drm_ioctl+0x178/0x3b0 [drm] [ 18.558970] [] do_vfs_ioctl+0xa4/0x7d0 [ 18.564694] [] SyS_ioctl+0x7c/0x98 [ 18.569992] [] el0_svc_naked+0x38/0x3c [ 18.574941] Code: a90153f3 aa0003f4 f90013f5 aa0103f3 (f9407400) [ 18.580502] ---[ end trace b1ac6888ec40b0be ]--- It turns out that the aspace argument in msm_gem_unmap_vma() is NULL. Signed-off-by: Hans Verkuil [Note: this case gets hit with !IOMMU config] Signed-off-by: Rob Clark --- drivers/gpu/drm/msm/msm_gem_vma.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/msm/msm_gem_vma.c b/drivers/gpu/drm/msm/msm_gem_vma.c index c36321bc8714..d34e331554f3 100644 --- a/drivers/gpu/drm/msm/msm_gem_vma.c +++ b/drivers/gpu/drm/msm/msm_gem_vma.c @@ -42,7 +42,7 @@ void msm_gem_unmap_vma(struct msm_gem_address_space *aspace, struct msm_gem_vma *vma, struct sg_table *sgt) { - if (!vma->iova) + if (!aspace || !vma->iova) return; if (aspace->mmu) { -- cgit v1.2.3 From b0e77fd87cf302351e537881c8daf8d1c69cf541 Mon Sep 17 00:00:00 2001 From: Archit Taneja Date: Fri, 28 Jul 2017 16:16:59 +0530 Subject: drm/msm/mdp5: Fix typo in encoder_enable path The mdp5_cmd_encoder_disable is accidentally called in the encoder enable path. We've not seen any problems since we haven't tested with command mode panels in a while. Fix the copy-paste error. Signed-off-by: Archit Taneja Signed-off-by: Rob Clark --- drivers/gpu/drm/msm/mdp/mdp5/mdp5_encoder.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/msm/mdp/mdp5/mdp5_encoder.c b/drivers/gpu/drm/msm/mdp/mdp5/mdp5_encoder.c index 97f3294fbfc6..70bef51245af 100644 --- a/drivers/gpu/drm/msm/mdp/mdp5/mdp5_encoder.c +++ b/drivers/gpu/drm/msm/mdp/mdp5/mdp5_encoder.c @@ -299,7 +299,7 @@ static void mdp5_encoder_enable(struct drm_encoder *encoder) struct mdp5_interface *intf = mdp5_encoder->intf; if (intf->mode == MDP5_INTF_DSI_MODE_COMMAND) - mdp5_cmd_encoder_disable(encoder); + mdp5_cmd_encoder_enable(encoder); else mdp5_vid_encoder_enable(encoder); } -- cgit v1.2.3 From d0538f5048fafe5633c58f25c3332f67739cdeb4 Mon Sep 17 00:00:00 2001 From: Archit Taneja Date: Fri, 28 Jul 2017 16:17:00 +0530 Subject: drm/msm/mdp5: Drop clock names with "_clk" suffix We have upstream bindings (msm8916) that have the "_clk" suffix in the clock names. The downstream bindings also require it. We want to drop the "_clk" suffix and at the same time support existing bindings. Update the MDP5 code with the the msm_clk_get() helper to support both old and new clock names. Signed-off-by: Archit Taneja Signed-off-by: Rob Clark --- drivers/gpu/drm/msm/mdp/mdp5/mdp5_kms.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/msm/mdp/mdp5/mdp5_kms.c b/drivers/gpu/drm/msm/mdp/mdp5/mdp5_kms.c index 5d13fa5381ee..1c603aef3c59 100644 --- a/drivers/gpu/drm/msm/mdp/mdp5/mdp5_kms.c +++ b/drivers/gpu/drm/msm/mdp/mdp5/mdp5_kms.c @@ -502,7 +502,7 @@ static int get_clk(struct platform_device *pdev, struct clk **clkp, const char *name, bool mandatory) { struct device *dev = &pdev->dev; - struct clk *clk = devm_clk_get(dev, name); + struct clk *clk = msm_clk_get(pdev, name); if (IS_ERR(clk) && mandatory) { dev_err(dev, "failed to get %s (%ld)\n", name, PTR_ERR(clk)); return PTR_ERR(clk); @@ -887,21 +887,21 @@ static int mdp5_init(struct platform_device *pdev, struct drm_device *dev) } /* mandatory clocks: */ - ret = get_clk(pdev, &mdp5_kms->axi_clk, "bus_clk", true); + ret = get_clk(pdev, &mdp5_kms->axi_clk, "bus", true); if (ret) goto fail; - ret = get_clk(pdev, &mdp5_kms->ahb_clk, "iface_clk", true); + ret = get_clk(pdev, &mdp5_kms->ahb_clk, "iface", true); if (ret) goto fail; - ret = get_clk(pdev, &mdp5_kms->core_clk, "core_clk", true); + ret = get_clk(pdev, &mdp5_kms->core_clk, "core", true); if (ret) goto fail; - ret = get_clk(pdev, &mdp5_kms->vsync_clk, "vsync_clk", true); + ret = get_clk(pdev, &mdp5_kms->vsync_clk, "vsync", true); if (ret) goto fail; /* optional clocks: */ - get_clk(pdev, &mdp5_kms->lut_clk, "lut_clk", false); + get_clk(pdev, &mdp5_kms->lut_clk, "lut", false); /* we need to set a default rate before enabling. Set a safe * rate first, then figure out hw revision, and then set a -- cgit v1.2.3 From be73b3043bf465455d4c9b88f68e03b6447bcfb0 Mon Sep 17 00:00:00 2001 From: "K. Den" Date: Tue, 1 Aug 2017 01:05:20 +0900 Subject: vxlan: fix remcsum when GRO on and CHECKSUM_PARTIAL boundary is outer UDP In the case that GRO is turned on and the original received packet is CHECKSUM_PARTIAL, if the outer UDP header is exactly at the last csum-unnecessary point, which for instance could occur if the packet comes from another Linux guest on the same Linux host, we have to do either remcsum_adjust or set up CHECKSUM_PARTIAL again with its csum_start properly reset considering RCO. However, since b7fe10e5ebac("gro: Fix remcsum offload to deal with frags in GRO") that barrier in such case could be skipped if GRO turned on, hence we pass over it and the inner L4 validation mistakenly reckons it as a bad csum. This patch makes remcsum_offload being reset at the same time of GRO remcsum cleanup, so as to make it work in such case as before. Fixes: b7fe10e5ebac ("gro: Fix remcsum offload to deal with frags in GRO") Signed-off-by: Koichiro Den Signed-off-by: David S. Miller --- drivers/net/vxlan.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c index 96aa7e6cf214..e17baac70f43 100644 --- a/drivers/net/vxlan.c +++ b/drivers/net/vxlan.c @@ -623,6 +623,7 @@ static struct sk_buff **vxlan_gro_receive(struct sock *sk, out: skb_gro_remcsum_cleanup(skb, &grc); + skb->remcsum_offload = 0; NAPI_GRO_CB(skb)->flush |= flush; return pp; -- cgit v1.2.3 From 1bff8a0c1f8c236209ee369b7952751c04eaa71a Mon Sep 17 00:00:00 2001 From: "K. Den" Date: Tue, 1 Aug 2017 01:05:39 +0900 Subject: gue: fix remcsum when GRO on and CHECKSUM_PARTIAL boundary is outer UDP In the case that GRO is turned on and the original received packet is CHECKSUM_PARTIAL, if the outer UDP header is exactly at the last csum-unnecessary point, which for instance could occur if the packet comes from another Linux guest on the same Linux host, we have to do either remcsum_adjust or set up CHECKSUM_PARTIAL again with its csum_start properly reset considering RCO. However, since b7fe10e5ebac ("gro: Fix remcsum offload to deal with frags in GRO") that barrier in such case could be skipped if GRO turned on, hence we pass over it and the inner L4 validation mistakenly reckons it as a bad csum. This patch makes remcsum_offload being reset at the same time of GRO remcsum cleanup, so as to make it work in such case as before. Fixes: b7fe10e5ebac ("gro: Fix remcsum offload to deal with frags in GRO") Signed-off-by: Koichiro Den Signed-off-by: David S. Miller --- net/ipv4/fou.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/ipv4/fou.c b/net/ipv4/fou.c index 8e0257d01200..1540db65241a 100644 --- a/net/ipv4/fou.c +++ b/net/ipv4/fou.c @@ -450,6 +450,7 @@ out_unlock: out: NAPI_GRO_CB(skb)->flush |= flush; skb_gro_remcsum_cleanup(skb, &grc); + skb->remcsum_offload = 0; return pp; } -- cgit v1.2.3 From 3394f5618dfe8e686a2429aad75edf7ff6540911 Mon Sep 17 00:00:00 2001 From: Jordan Crouse Date: Thu, 27 Jul 2017 10:42:30 -0600 Subject: drm/msm: Remove some potentially blocked register ranges The 0xf400 and 0xf800 ranges are in the RBBM_SECVID block which may be protected from CPU access. Skip dumping them since they are minimally useful for debugging and they aren't worth a system hang. Signed-off-by: Jordan Crouse Signed-off-by: Rob Clark --- drivers/gpu/drm/msm/adreno/a5xx_gpu.c | 49 +++++++++++++++++------------------ 1 file changed, 24 insertions(+), 25 deletions(-) diff --git a/drivers/gpu/drm/msm/adreno/a5xx_gpu.c b/drivers/gpu/drm/msm/adreno/a5xx_gpu.c index b4b54f1c24bc..1f60a9a885b4 100644 --- a/drivers/gpu/drm/msm/adreno/a5xx_gpu.c +++ b/drivers/gpu/drm/msm/adreno/a5xx_gpu.c @@ -920,31 +920,30 @@ static const u32 a5xx_registers[] = { 0x0000, 0x0002, 0x0004, 0x0020, 0x0022, 0x0026, 0x0029, 0x002B, 0x002E, 0x0035, 0x0038, 0x0042, 0x0044, 0x0044, 0x0047, 0x0095, 0x0097, 0x00BB, 0x03A0, 0x0464, 0x0469, 0x046F, 0x04D2, 0x04D3, - 0x04E0, 0x0533, 0x0540, 0x0555, 0xF400, 0xF400, 0xF800, 0xF807, - 0x0800, 0x081A, 0x081F, 0x0841, 0x0860, 0x0860, 0x0880, 0x08A0, - 0x0B00, 0x0B12, 0x0B15, 0x0B28, 0x0B78, 0x0B7F, 0x0BB0, 0x0BBD, - 0x0BC0, 0x0BC6, 0x0BD0, 0x0C53, 0x0C60, 0x0C61, 0x0C80, 0x0C82, - 0x0C84, 0x0C85, 0x0C90, 0x0C98, 0x0CA0, 0x0CA0, 0x0CB0, 0x0CB2, - 0x2180, 0x2185, 0x2580, 0x2585, 0x0CC1, 0x0CC1, 0x0CC4, 0x0CC7, - 0x0CCC, 0x0CCC, 0x0CD0, 0x0CD8, 0x0CE0, 0x0CE5, 0x0CE8, 0x0CE8, - 0x0CEC, 0x0CF1, 0x0CFB, 0x0D0E, 0x2100, 0x211E, 0x2140, 0x2145, - 0x2500, 0x251E, 0x2540, 0x2545, 0x0D10, 0x0D17, 0x0D20, 0x0D23, - 0x0D30, 0x0D30, 0x20C0, 0x20C0, 0x24C0, 0x24C0, 0x0E40, 0x0E43, - 0x0E4A, 0x0E4A, 0x0E50, 0x0E57, 0x0E60, 0x0E7C, 0x0E80, 0x0E8E, - 0x0E90, 0x0E96, 0x0EA0, 0x0EA8, 0x0EB0, 0x0EB2, 0xE140, 0xE147, - 0xE150, 0xE187, 0xE1A0, 0xE1A9, 0xE1B0, 0xE1B6, 0xE1C0, 0xE1C7, - 0xE1D0, 0xE1D1, 0xE200, 0xE201, 0xE210, 0xE21C, 0xE240, 0xE268, - 0xE000, 0xE006, 0xE010, 0xE09A, 0xE0A0, 0xE0A4, 0xE0AA, 0xE0EB, - 0xE100, 0xE105, 0xE380, 0xE38F, 0xE3B0, 0xE3B0, 0xE400, 0xE405, - 0xE408, 0xE4E9, 0xE4F0, 0xE4F0, 0xE280, 0xE280, 0xE282, 0xE2A3, - 0xE2A5, 0xE2C2, 0xE940, 0xE947, 0xE950, 0xE987, 0xE9A0, 0xE9A9, - 0xE9B0, 0xE9B6, 0xE9C0, 0xE9C7, 0xE9D0, 0xE9D1, 0xEA00, 0xEA01, - 0xEA10, 0xEA1C, 0xEA40, 0xEA68, 0xE800, 0xE806, 0xE810, 0xE89A, - 0xE8A0, 0xE8A4, 0xE8AA, 0xE8EB, 0xE900, 0xE905, 0xEB80, 0xEB8F, - 0xEBB0, 0xEBB0, 0xEC00, 0xEC05, 0xEC08, 0xECE9, 0xECF0, 0xECF0, - 0xEA80, 0xEA80, 0xEA82, 0xEAA3, 0xEAA5, 0xEAC2, 0xA800, 0xA8FF, - 0xAC60, 0xAC60, 0xB000, 0xB97F, 0xB9A0, 0xB9BF, - ~0 + 0x04E0, 0x0533, 0x0540, 0x0555, 0x0800, 0x081A, 0x081F, 0x0841, + 0x0860, 0x0860, 0x0880, 0x08A0, 0x0B00, 0x0B12, 0x0B15, 0x0B28, + 0x0B78, 0x0B7F, 0x0BB0, 0x0BBD, 0x0BC0, 0x0BC6, 0x0BD0, 0x0C53, + 0x0C60, 0x0C61, 0x0C80, 0x0C82, 0x0C84, 0x0C85, 0x0C90, 0x0C98, + 0x0CA0, 0x0CA0, 0x0CB0, 0x0CB2, 0x2180, 0x2185, 0x2580, 0x2585, + 0x0CC1, 0x0CC1, 0x0CC4, 0x0CC7, 0x0CCC, 0x0CCC, 0x0CD0, 0x0CD8, + 0x0CE0, 0x0CE5, 0x0CE8, 0x0CE8, 0x0CEC, 0x0CF1, 0x0CFB, 0x0D0E, + 0x2100, 0x211E, 0x2140, 0x2145, 0x2500, 0x251E, 0x2540, 0x2545, + 0x0D10, 0x0D17, 0x0D20, 0x0D23, 0x0D30, 0x0D30, 0x20C0, 0x20C0, + 0x24C0, 0x24C0, 0x0E40, 0x0E43, 0x0E4A, 0x0E4A, 0x0E50, 0x0E57, + 0x0E60, 0x0E7C, 0x0E80, 0x0E8E, 0x0E90, 0x0E96, 0x0EA0, 0x0EA8, + 0x0EB0, 0x0EB2, 0xE140, 0xE147, 0xE150, 0xE187, 0xE1A0, 0xE1A9, + 0xE1B0, 0xE1B6, 0xE1C0, 0xE1C7, 0xE1D0, 0xE1D1, 0xE200, 0xE201, + 0xE210, 0xE21C, 0xE240, 0xE268, 0xE000, 0xE006, 0xE010, 0xE09A, + 0xE0A0, 0xE0A4, 0xE0AA, 0xE0EB, 0xE100, 0xE105, 0xE380, 0xE38F, + 0xE3B0, 0xE3B0, 0xE400, 0xE405, 0xE408, 0xE4E9, 0xE4F0, 0xE4F0, + 0xE280, 0xE280, 0xE282, 0xE2A3, 0xE2A5, 0xE2C2, 0xE940, 0xE947, + 0xE950, 0xE987, 0xE9A0, 0xE9A9, 0xE9B0, 0xE9B6, 0xE9C0, 0xE9C7, + 0xE9D0, 0xE9D1, 0xEA00, 0xEA01, 0xEA10, 0xEA1C, 0xEA40, 0xEA68, + 0xE800, 0xE806, 0xE810, 0xE89A, 0xE8A0, 0xE8A4, 0xE8AA, 0xE8EB, + 0xE900, 0xE905, 0xEB80, 0xEB8F, 0xEBB0, 0xEBB0, 0xEC00, 0xEC05, + 0xEC08, 0xECE9, 0xECF0, 0xECF0, 0xEA80, 0xEA80, 0xEA82, 0xEAA3, + 0xEAA5, 0xEAC2, 0xA800, 0xA8FF, 0xAC60, 0xAC60, 0xB000, 0xB97F, + 0xB9A0, 0xB9BF, ~0 }; static void a5xx_dump(struct msm_gpu *gpu) -- cgit v1.2.3 From 6e749e5971fc7c7a33d7a673fbe4944604b397cf Mon Sep 17 00:00:00 2001 From: Jordan Crouse Date: Thu, 27 Jul 2017 10:42:31 -0600 Subject: drm/msm: Allow hardware clock gating to be toggled There are some use cases wherein we need to turn off hardware clock gating before reading certain registers. Modify the A5XX HWCG function to allow user to enable or disable clock gating at will. Signed-off-by: Jordan Crouse Signed-off-by: Rob Clark --- drivers/gpu/drm/msm/adreno/a5xx_gpu.c | 42 ++++++++--------------------------- drivers/gpu/drm/msm/adreno/a5xx_gpu.h | 1 + 2 files changed, 10 insertions(+), 33 deletions(-) diff --git a/drivers/gpu/drm/msm/adreno/a5xx_gpu.c b/drivers/gpu/drm/msm/adreno/a5xx_gpu.c index 1f60a9a885b4..c1f8c20414f1 100644 --- a/drivers/gpu/drm/msm/adreno/a5xx_gpu.c +++ b/drivers/gpu/drm/msm/adreno/a5xx_gpu.c @@ -117,12 +117,10 @@ static void a5xx_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit, gpu->funcs->flush(gpu); } -struct a5xx_hwcg { +static const struct { u32 offset; u32 value; -}; - -static const struct a5xx_hwcg a530_hwcg[] = { +} a5xx_hwcg[] = { {REG_A5XX_RBBM_CLOCK_CNTL_SP0, 0x02222222}, {REG_A5XX_RBBM_CLOCK_CNTL_SP1, 0x02222222}, {REG_A5XX_RBBM_CLOCK_CNTL_SP2, 0x02222222}, @@ -217,38 +215,16 @@ static const struct a5xx_hwcg a530_hwcg[] = { {REG_A5XX_RBBM_CLOCK_DELAY_VFD, 0x00002222} }; -static const struct { - int (*test)(struct adreno_gpu *gpu); - const struct a5xx_hwcg *regs; - unsigned int count; -} a5xx_hwcg_regs[] = { - { adreno_is_a530, a530_hwcg, ARRAY_SIZE(a530_hwcg), }, -}; - -static void _a5xx_enable_hwcg(struct msm_gpu *gpu, - const struct a5xx_hwcg *regs, unsigned int count) +void a5xx_set_hwcg(struct msm_gpu *gpu, bool state) { unsigned int i; - for (i = 0; i < count; i++) - gpu_write(gpu, regs[i].offset, regs[i].value); - - gpu_write(gpu, REG_A5XX_RBBM_CLOCK_CNTL, 0xAAA8AA00); - gpu_write(gpu, REG_A5XX_RBBM_ISDB_CNT, 0x182); -} + for (i = 0; i < ARRAY_SIZE(a5xx_hwcg); i++) + gpu_write(gpu, a5xx_hwcg[i].offset, + state ? a5xx_hwcg[i].value : 0); -static void a5xx_enable_hwcg(struct msm_gpu *gpu) -{ - struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); - unsigned int i; - - for (i = 0; i < ARRAY_SIZE(a5xx_hwcg_regs); i++) { - if (a5xx_hwcg_regs[i].test(adreno_gpu)) { - _a5xx_enable_hwcg(gpu, a5xx_hwcg_regs[i].regs, - a5xx_hwcg_regs[i].count); - return; - } - } + gpu_write(gpu, REG_A5XX_RBBM_CLOCK_CNTL, state ? 0xAAA8AA00 : 0); + gpu_write(gpu, REG_A5XX_RBBM_ISDB_CNT, state ? 0x182 : 0x180); } static int a5xx_me_init(struct msm_gpu *gpu) @@ -545,7 +521,7 @@ static int a5xx_hw_init(struct msm_gpu *gpu) gpu_write(gpu, REG_A5XX_RBBM_AHB_CNTL1, 0xA6FFFFFF); /* Enable HWCG */ - a5xx_enable_hwcg(gpu); + a5xx_set_hwcg(gpu, true); gpu_write(gpu, REG_A5XX_RBBM_AHB_CNTL2, 0x0000003F); diff --git a/drivers/gpu/drm/msm/adreno/a5xx_gpu.h b/drivers/gpu/drm/msm/adreno/a5xx_gpu.h index 6638bc85645d..d24796f3a706 100644 --- a/drivers/gpu/drm/msm/adreno/a5xx_gpu.h +++ b/drivers/gpu/drm/msm/adreno/a5xx_gpu.h @@ -59,5 +59,6 @@ static inline int spin_usecs(struct msm_gpu *gpu, uint32_t usecs, } bool a5xx_idle(struct msm_gpu *gpu); +void a5xx_set_hwcg(struct msm_gpu *gpu, bool state); #endif /* __A5XX_GPU_H__ */ -- cgit v1.2.3 From a23cb3b52fec73b22671bac65356d8c55bf37706 Mon Sep 17 00:00:00 2001 From: Jordan Crouse Date: Thu, 27 Jul 2017 10:42:32 -0600 Subject: drm/msm: Turn off hardware clock gating before reading A5XX registers On A5XX GPU hardware clock gating needs to be turned off before reading certain GPU registers via AHB. Turn off HWCG before calling adreno_show() to safely dump all the registers without a system hang. Signed-off-by: Jordan Crouse Signed-off-by: Rob Clark --- drivers/gpu/drm/msm/adreno/a5xx_gpu.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/gpu/drm/msm/adreno/a5xx_gpu.c b/drivers/gpu/drm/msm/adreno/a5xx_gpu.c index c1f8c20414f1..33763b005b7b 100644 --- a/drivers/gpu/drm/msm/adreno/a5xx_gpu.c +++ b/drivers/gpu/drm/msm/adreno/a5xx_gpu.c @@ -995,7 +995,14 @@ static void a5xx_show(struct msm_gpu *gpu, struct seq_file *m) { seq_printf(m, "status: %08x\n", gpu_read(gpu, REG_A5XX_RBBM_STATUS)); + + /* + * Temporarily disable hardware clock gating before going into + * adreno_show to avoid issues while reading the registers + */ + a5xx_set_hwcg(gpu, false); adreno_show(gpu, m); + a5xx_set_hwcg(gpu, true); } #endif -- cgit v1.2.3 From b0135ab91af16be57e444e74023e48b1f379ab36 Mon Sep 17 00:00:00 2001 From: Jordan Crouse Date: Thu, 27 Jul 2017 10:42:34 -0600 Subject: drm/msm: args->fence should be args->flags Fix a typo in msm_ioctl_gem_submit - check args->flags for the MSM_SUBMIT_NO_IMPLICIT flag instead of args->fence. Signed-off-by: Jordan Crouse Signed-off-by: Rob Clark --- drivers/gpu/drm/msm/msm_gem_submit.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/msm/msm_gem_submit.c b/drivers/gpu/drm/msm/msm_gem_submit.c index 8095658e8cb4..8a75c0bd8a78 100644 --- a/drivers/gpu/drm/msm/msm_gem_submit.c +++ b/drivers/gpu/drm/msm/msm_gem_submit.c @@ -451,7 +451,7 @@ int msm_ioctl_gem_submit(struct drm_device *dev, void *data, if (ret) goto out; - if (!(args->fence & MSM_SUBMIT_NO_IMPLICIT)) { + if (!(args->flags & MSM_SUBMIT_NO_IMPLICIT)) { ret = submit_fence_sync(submit); if (ret) goto out; -- cgit v1.2.3 From cdbc78ba702650b02b9e3e957dbaa725423bdf1e Mon Sep 17 00:00:00 2001 From: Jordan Crouse Date: Thu, 27 Jul 2017 10:42:35 -0600 Subject: drm/msm: Remove __user from __u64 data types __user should be used to identify user pointers and not __u64 variables containing pointers. Signed-off-by: Jordan Crouse Signed-off-by: Rob Clark --- include/uapi/drm/msm_drm.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/include/uapi/drm/msm_drm.h b/include/uapi/drm/msm_drm.h index 26c54f6d595d..ad4eb2863e70 100644 --- a/include/uapi/drm/msm_drm.h +++ b/include/uapi/drm/msm_drm.h @@ -171,7 +171,7 @@ struct drm_msm_gem_submit_cmd { __u32 size; /* in, cmdstream size */ __u32 pad; __u32 nr_relocs; /* in, number of submit_reloc's */ - __u64 __user relocs; /* in, ptr to array of submit_reloc's */ + __u64 relocs; /* in, ptr to array of submit_reloc's */ }; /* Each buffer referenced elsewhere in the cmdstream submit (ie. the @@ -215,8 +215,8 @@ struct drm_msm_gem_submit { __u32 fence; /* out */ __u32 nr_bos; /* in, number of submit_bo's */ __u32 nr_cmds; /* in, number of submit_cmd's */ - __u64 __user bos; /* in, ptr to array of submit_bo's */ - __u64 __user cmds; /* in, ptr to array of submit_cmd's */ + __u64 bos; /* in, ptr to array of submit_bo's */ + __u64 cmds; /* in, ptr to array of submit_cmd's */ __s32 fence_fd; /* in/out fence fd (see MSM_SUBMIT_FENCE_FD_IN/OUT) */ }; -- cgit v1.2.3 From 541de4c9c953438b677c31072e7762115ac11299 Mon Sep 17 00:00:00 2001 From: Archit Taneja Date: Fri, 28 Jul 2017 16:17:08 +0530 Subject: drm/msm/adreno: Prevent unclocked access when retrieving timestamps msm_gpu's get_timestamp() op (called by the MSM_GET_PARAM ioctl) can result in register accesses. We need our power domain and clocks to be active for that. Make sure they are enabled here. Signed-off-by: Archit Taneja Signed-off-by: Rob Clark --- drivers/gpu/drm/msm/adreno/adreno_gpu.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/msm/adreno/adreno_gpu.c b/drivers/gpu/drm/msm/adreno/adreno_gpu.c index f1ab2703674a..7414c6bbd582 100644 --- a/drivers/gpu/drm/msm/adreno/adreno_gpu.c +++ b/drivers/gpu/drm/msm/adreno/adreno_gpu.c @@ -48,8 +48,15 @@ int adreno_get_param(struct msm_gpu *gpu, uint32_t param, uint64_t *value) *value = adreno_gpu->base.fast_rate; return 0; case MSM_PARAM_TIMESTAMP: - if (adreno_gpu->funcs->get_timestamp) - return adreno_gpu->funcs->get_timestamp(gpu, value); + if (adreno_gpu->funcs->get_timestamp) { + int ret; + + pm_runtime_get_sync(&gpu->pdev->dev); + ret = adreno_gpu->funcs->get_timestamp(gpu, value); + pm_runtime_put_autosuspend(&gpu->pdev->dev); + + return ret; + } return -EINVAL; default: DBG("%s: invalid param: %u", gpu->name, param); -- cgit v1.2.3 From bdab8e8b2bc89dce73b6bcabcd295806ce2e5ef9 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Wed, 26 Jul 2017 17:52:44 +0200 Subject: drm/msm: gpu: call qcom_mdt interfaces only for ARCH_QCOM When compile-testing for something other than ARCH_QCOM, we run into a link error: drivers/gpu/drm/msm/adreno/a5xx_gpu.o: In function `a5xx_hw_init': a5xx_gpu.c:(.text.a5xx_hw_init+0x600): undefined reference to `qcom_mdt_get_size' a5xx_gpu.c:(.text.a5xx_hw_init+0x93c): undefined reference to `qcom_mdt_load' There is already an #ifdef that tries to check for CONFIG_QCOM_MDT_LOADER, but that symbol is only meaningful when building for ARCH_QCOM. This adds a compile-time check for ARCH_QCOM, and clarifies the Kconfig select statement so we don't even try it for other targets. The check for CONFIG_QCOM_MDT_LOADER can then go away, which also improves compile-time coverage and makes the code a little nicer to read. Fixes: 7c65817e6d38 ("drm/msm: gpu: Enable zap shader for A5XX") Acked-by: Jordan Crouse Acked-by: Bjorn Andersson Signed-off-by: Arnd Bergmann Signed-off-by: Rob Clark --- drivers/gpu/drm/msm/Kconfig | 2 +- drivers/gpu/drm/msm/adreno/a5xx_gpu.c | 11 +++-------- 2 files changed, 4 insertions(+), 9 deletions(-) diff --git a/drivers/gpu/drm/msm/Kconfig b/drivers/gpu/drm/msm/Kconfig index b638d192ce5e..99d39b2aefa6 100644 --- a/drivers/gpu/drm/msm/Kconfig +++ b/drivers/gpu/drm/msm/Kconfig @@ -5,7 +5,7 @@ config DRM_MSM depends on ARCH_QCOM || (ARM && COMPILE_TEST) depends on OF && COMMON_CLK depends on MMU - select QCOM_MDT_LOADER + select QCOM_MDT_LOADER if ARCH_QCOM select REGULATOR select DRM_KMS_HELPER select DRM_PANEL diff --git a/drivers/gpu/drm/msm/adreno/a5xx_gpu.c b/drivers/gpu/drm/msm/adreno/a5xx_gpu.c index 33763b005b7b..a3393e1dc497 100644 --- a/drivers/gpu/drm/msm/adreno/a5xx_gpu.c +++ b/drivers/gpu/drm/msm/adreno/a5xx_gpu.c @@ -26,8 +26,6 @@ static void a5xx_dump(struct msm_gpu *gpu); #define GPU_PAS_ID 13 -#if IS_ENABLED(CONFIG_QCOM_MDT_LOADER) - static int zap_shader_load_mdt(struct device *dev, const char *fwname) { const struct firmware *fw; @@ -36,6 +34,9 @@ static int zap_shader_load_mdt(struct device *dev, const char *fwname) void *mem_region = NULL; int ret; + if (!IS_ENABLED(CONFIG_ARCH_QCOM)) + return -EINVAL; + /* Request the MDT file for the firmware */ ret = request_firmware(&fw, fwname, dev); if (ret) { @@ -73,12 +74,6 @@ out: return ret; } -#else -static int zap_shader_load_mdt(struct device *dev, const char *fwname) -{ - return -ENODEV; -} -#endif static void a5xx_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit, struct msm_file_private *ctx) -- cgit v1.2.3 From 8f93e043d048b671c32c6f0a5102fefa800c4618 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Wed, 26 Jul 2017 21:59:21 +0200 Subject: drm/msm: gpu: don't abuse dma_alloc for non-DMA allocations In zap_shader_load_mdt(), we pass a pointer to a phys_addr_t into dmam_alloc_coherent, which the compiler warns about: drivers/gpu/drm/msm/adreno/a5xx_gpu.c: In function 'zap_shader_load_mdt': drivers/gpu/drm/msm/adreno/a5xx_gpu.c:54:50: error: passing argument 3 of 'dmam_alloc_coherent' from incompatible pointer type [-Werror=incompatible-pointer-types] The returned DMA address is later passed on to a function that takes a phys_addr_t, so it's clearly wrong to use the DMA mapping interface here: the memory may be uncached, or the address may be completely wrong if there is an IOMMU connected to the device. What the code actually wants to do is to get the physical address from the reserved-mem node. It goes through the dma-mapping interfaces for obscure reasons, and this apparently only works by chance, relying on specific bugs in the error handling of the arm64 dma-mapping implementation. The same problem existed in the "venus" media driver, which was now fixed by Stanimir Varbanov after long discussions. In order to make some progress here, I have now ported his approach over to the adreno driver. The patch is currently untested, and should get a good review, but it is now much simpler than the original, and it should be obvious what goes wrong if I made a mistake in the port. See also: a6e2d36bf6b7 ("media: venus: don't abuse dma_alloc for non-DMA allocations") Cc: Stanimir Varbanov Fixes: 7c65817e6d38 ("drm/msm: gpu: Enable zap shader for A5XX") Acked-by: Bjorn Andersson Acked-and-Tested-by: Jordan Crouse Signed-off-by: Arnd Bergmann Signed-off-by: Rob Clark --- drivers/gpu/drm/msm/adreno/a5xx_gpu.c | 72 +++++++++++------------------------ drivers/gpu/drm/msm/adreno/a5xx_gpu.h | 2 - 2 files changed, 23 insertions(+), 51 deletions(-) diff --git a/drivers/gpu/drm/msm/adreno/a5xx_gpu.c b/drivers/gpu/drm/msm/adreno/a5xx_gpu.c index a3393e1dc497..f9eae03aa1dc 100644 --- a/drivers/gpu/drm/msm/adreno/a5xx_gpu.c +++ b/drivers/gpu/drm/msm/adreno/a5xx_gpu.c @@ -15,7 +15,7 @@ #include #include #include -#include +#include #include #include "msm_gem.h" #include "msm_mmu.h" @@ -29,6 +29,8 @@ static void a5xx_dump(struct msm_gpu *gpu); static int zap_shader_load_mdt(struct device *dev, const char *fwname) { const struct firmware *fw; + struct device_node *np; + struct resource r; phys_addr_t mem_phys; ssize_t mem_size; void *mem_region = NULL; @@ -37,6 +39,21 @@ static int zap_shader_load_mdt(struct device *dev, const char *fwname) if (!IS_ENABLED(CONFIG_ARCH_QCOM)) return -EINVAL; + np = of_get_child_by_name(dev->of_node, "zap-shader"); + if (!np) + return -ENODEV; + + np = of_parse_phandle(np, "memory-region", 0); + if (!np) + return -EINVAL; + + ret = of_address_to_resource(np, 0, &r); + if (ret) + return ret; + + mem_phys = r.start; + mem_size = resource_size(&r); + /* Request the MDT file for the firmware */ ret = request_firmware(&fw, fwname, dev); if (ret) { @@ -52,7 +69,7 @@ static int zap_shader_load_mdt(struct device *dev, const char *fwname) } /* Allocate memory for the firmware image */ - mem_region = dmam_alloc_coherent(dev, mem_size, &mem_phys, GFP_KERNEL); + mem_region = memremap(mem_phys, mem_size, MEMREMAP_WC); if (!mem_region) { ret = -ENOMEM; goto out; @@ -70,6 +87,9 @@ static int zap_shader_load_mdt(struct device *dev, const char *fwname) DRM_DEV_ERROR(dev, "Unable to authorize the image\n"); out: + if (mem_region) + memunmap(mem_region); + release_firmware(fw); return ret; @@ -348,45 +368,6 @@ static int a5xx_zap_shader_resume(struct msm_gpu *gpu) return ret; } -/* Set up a child device to "own" the zap shader */ -static int a5xx_zap_shader_dev_init(struct device *parent, struct device *dev) -{ - struct device_node *node; - int ret; - - if (dev->parent) - return 0; - - /* Find the sub-node for the zap shader */ - node = of_get_child_by_name(parent->of_node, "zap-shader"); - if (!node) { - DRM_DEV_ERROR(parent, "zap-shader not found in device tree\n"); - return -ENODEV; - } - - dev->parent = parent; - dev->of_node = node; - dev_set_name(dev, "adreno_zap_shader"); - - ret = device_register(dev); - if (ret) { - DRM_DEV_ERROR(parent, "Couldn't register zap shader device\n"); - goto out; - } - - ret = of_reserved_mem_device_init(dev); - if (ret) { - DRM_DEV_ERROR(parent, "Unable to set up the reserved memory\n"); - device_unregister(dev); - } - -out: - if (ret) - dev->parent = NULL; - - return ret; -} - static int a5xx_zap_shader_init(struct msm_gpu *gpu) { static bool loaded; @@ -415,11 +396,7 @@ static int a5xx_zap_shader_init(struct msm_gpu *gpu) return -ENODEV; } - ret = a5xx_zap_shader_dev_init(&pdev->dev, &a5xx_gpu->zap_dev); - - if (!ret) - ret = zap_shader_load_mdt(&a5xx_gpu->zap_dev, - adreno_gpu->info->zapfw); + ret = zap_shader_load_mdt(&pdev->dev, adreno_gpu->info->zapfw); loaded = !ret; @@ -662,9 +639,6 @@ static void a5xx_destroy(struct msm_gpu *gpu) DBG("%s", gpu->name); - if (a5xx_gpu->zap_dev.parent) - device_unregister(&a5xx_gpu->zap_dev); - if (a5xx_gpu->pm4_bo) { if (a5xx_gpu->pm4_iova) msm_gem_put_iova(a5xx_gpu->pm4_bo, gpu->aspace); diff --git a/drivers/gpu/drm/msm/adreno/a5xx_gpu.h b/drivers/gpu/drm/msm/adreno/a5xx_gpu.h index d24796f3a706..1137092241d5 100644 --- a/drivers/gpu/drm/msm/adreno/a5xx_gpu.h +++ b/drivers/gpu/drm/msm/adreno/a5xx_gpu.h @@ -36,8 +36,6 @@ struct a5xx_gpu { uint32_t gpmu_dwords; uint32_t lm_leakage; - - struct device zap_dev; }; #define to_a5xx_gpu(x) container_of(x, struct a5xx_gpu, base) -- cgit v1.2.3 From a477b9cd37aa81a490dfa3265b7ff4f2c5a92463 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 1 Aug 2017 20:11:02 -0500 Subject: PCI: Add pci_reset_function_locked() The implementation of PCI workarounds may require that the device is reset from its probe function. This implies that the PCI device lock is already held, and makes calling pci_reset_function() impossible (since it will itself try to take that lock). Add pci_reset_function_locked(), which is the equivalent of pci_reset_function(), except that it requires the PCI device lock to be already held by the caller. Tested-by: Ard Biesheuvel Signed-off-by: Marc Zyngier [bhelgaas: folded in fix for conflict with 52354b9d1f46 ("PCI: Remove __pci_dev_reset() and pci_dev_reset()")] Signed-off-by: Bjorn Helgaas Cc: stable@vger.kernel.org # 4.11: 52354b9d1f46: PCI: Remove __pci_dev_reset() and pci_dev_reset() Cc: stable@vger.kernel.org # 4.11 --- drivers/pci/pci.c | 35 +++++++++++++++++++++++++++++++++++ include/linux/pci.h | 1 + 2 files changed, 36 insertions(+) diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c index af0cc3456dc1..b4b7eab29400 100644 --- a/drivers/pci/pci.c +++ b/drivers/pci/pci.c @@ -4259,6 +4259,41 @@ int pci_reset_function(struct pci_dev *dev) } EXPORT_SYMBOL_GPL(pci_reset_function); +/** + * pci_reset_function_locked - quiesce and reset a PCI device function + * @dev: PCI device to reset + * + * Some devices allow an individual function to be reset without affecting + * other functions in the same device. The PCI device must be responsive + * to PCI config space in order to use this function. + * + * This function does not just reset the PCI portion of a device, but + * clears all the state associated with the device. This function differs + * from __pci_reset_function() in that it saves and restores device state + * over the reset. It also differs from pci_reset_function() in that it + * requires the PCI device lock to be held. + * + * Returns 0 if the device function was successfully reset or negative if the + * device doesn't support resetting a single function. + */ +int pci_reset_function_locked(struct pci_dev *dev) +{ + int rc; + + rc = pci_probe_reset_function(dev); + if (rc) + return rc; + + pci_dev_save_and_disable(dev); + + rc = __pci_reset_function_locked(dev); + + pci_dev_restore(dev); + + return rc; +} +EXPORT_SYMBOL_GPL(pci_reset_function_locked); + /** * pci_try_reset_function - quiesce and reset a PCI device function * @dev: PCI device to reset diff --git a/include/linux/pci.h b/include/linux/pci.h index 4869e66dd659..a75c13673852 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -1067,6 +1067,7 @@ void pcie_flr(struct pci_dev *dev); int __pci_reset_function(struct pci_dev *dev); int __pci_reset_function_locked(struct pci_dev *dev); int pci_reset_function(struct pci_dev *dev); +int pci_reset_function_locked(struct pci_dev *dev); int pci_try_reset_function(struct pci_dev *dev); int pci_probe_reset_slot(struct pci_slot *slot); int pci_reset_slot(struct pci_slot *slot); -- cgit v1.2.3 From 6184cc8ddbb318758a000da68c5285fc2dd74338 Mon Sep 17 00:00:00 2001 From: Chuanxiao Dong Date: Tue, 1 Aug 2017 17:47:25 +0800 Subject: drm/i915/gvt: change resetting to resetting_eng Use resetting_eng to identify which engine is resetting so the rest ones' workload won't be impacted v2: - use ENGINE_MASK(ring_id) instead of (1 << ring_id). (Zhenyu) Signed-off-by: Chuanxiao Dong Cc: Zhenyu Wang Signed-off-by: Zhenyu Wang --- drivers/gpu/drm/i915/gvt/execlist.c | 10 +++++----- drivers/gpu/drm/i915/gvt/gvt.h | 2 +- drivers/gpu/drm/i915/gvt/scheduler.c | 3 ++- drivers/gpu/drm/i915/gvt/vgpu.c | 8 +++++--- 4 files changed, 13 insertions(+), 10 deletions(-) diff --git a/drivers/gpu/drm/i915/gvt/execlist.c b/drivers/gpu/drm/i915/gvt/execlist.c index 700050556242..cac669b2b320 100644 --- a/drivers/gpu/drm/i915/gvt/execlist.c +++ b/drivers/gpu/drm/i915/gvt/execlist.c @@ -499,10 +499,10 @@ static void release_shadow_wa_ctx(struct intel_shadow_wa_ctx *wa_ctx) static int complete_execlist_workload(struct intel_vgpu_workload *workload) { struct intel_vgpu *vgpu = workload->vgpu; - struct intel_vgpu_execlist *execlist = - &vgpu->execlist[workload->ring_id]; + int ring_id = workload->ring_id; + struct intel_vgpu_execlist *execlist = &vgpu->execlist[ring_id]; struct intel_vgpu_workload *next_workload; - struct list_head *next = workload_q_head(vgpu, workload->ring_id)->next; + struct list_head *next = workload_q_head(vgpu, ring_id)->next; bool lite_restore = false; int ret; @@ -512,10 +512,10 @@ static int complete_execlist_workload(struct intel_vgpu_workload *workload) release_shadow_batch_buffer(workload); release_shadow_wa_ctx(&workload->wa_ctx); - if (workload->status || vgpu->resetting) + if (workload->status || (vgpu->resetting_eng & ENGINE_MASK(ring_id))) goto out; - if (!list_empty(workload_q_head(vgpu, workload->ring_id))) { + if (!list_empty(workload_q_head(vgpu, ring_id))) { struct execlist_ctx_descriptor_format *this_desc, *next_desc; next_workload = container_of(next, diff --git a/drivers/gpu/drm/i915/gvt/gvt.h b/drivers/gpu/drm/i915/gvt/gvt.h index 3a74e79eac2f..d96c41aa5aa7 100644 --- a/drivers/gpu/drm/i915/gvt/gvt.h +++ b/drivers/gpu/drm/i915/gvt/gvt.h @@ -149,7 +149,7 @@ struct intel_vgpu { bool active; bool pv_notified; bool failsafe; - bool resetting; + unsigned int resetting_eng; void *sched_data; struct vgpu_sched_ctl sched_ctl; diff --git a/drivers/gpu/drm/i915/gvt/scheduler.c b/drivers/gpu/drm/i915/gvt/scheduler.c index 4f7057d62d88..22e08eb2d0b7 100644 --- a/drivers/gpu/drm/i915/gvt/scheduler.c +++ b/drivers/gpu/drm/i915/gvt/scheduler.c @@ -432,7 +432,8 @@ static void complete_current_workload(struct intel_gvt *gvt, int ring_id) i915_gem_request_put(fetch_and_zero(&workload->req)); - if (!workload->status && !vgpu->resetting) { + if (!workload->status && !(vgpu->resetting_eng & + ENGINE_MASK(ring_id))) { update_guest_context(workload); for_each_set_bit(event, workload->pending_events, diff --git a/drivers/gpu/drm/i915/gvt/vgpu.c b/drivers/gpu/drm/i915/gvt/vgpu.c index 90c14e6e3ea0..3deadcbd5a24 100644 --- a/drivers/gpu/drm/i915/gvt/vgpu.c +++ b/drivers/gpu/drm/i915/gvt/vgpu.c @@ -480,11 +480,13 @@ void intel_gvt_reset_vgpu_locked(struct intel_vgpu *vgpu, bool dmlr, { struct intel_gvt *gvt = vgpu->gvt; struct intel_gvt_workload_scheduler *scheduler = &gvt->scheduler; + unsigned int resetting_eng = dmlr ? ALL_ENGINES : engine_mask; gvt_dbg_core("------------------------------------------\n"); gvt_dbg_core("resseting vgpu%d, dmlr %d, engine_mask %08x\n", vgpu->id, dmlr, engine_mask); - vgpu->resetting = true; + + vgpu->resetting_eng = resetting_eng; intel_vgpu_stop_schedule(vgpu); /* @@ -497,7 +499,7 @@ void intel_gvt_reset_vgpu_locked(struct intel_vgpu *vgpu, bool dmlr, mutex_lock(&gvt->lock); } - intel_vgpu_reset_execlist(vgpu, dmlr ? ALL_ENGINES : engine_mask); + intel_vgpu_reset_execlist(vgpu, resetting_eng); /* full GPU reset or device model level reset */ if (engine_mask == ALL_ENGINES || dmlr) { @@ -520,7 +522,7 @@ void intel_gvt_reset_vgpu_locked(struct intel_vgpu *vgpu, bool dmlr, } } - vgpu->resetting = false; + vgpu->resetting_eng = 0; gvt_dbg_core("reset vgpu%d done\n", vgpu->id); gvt_dbg_core("------------------------------------------\n"); } -- cgit v1.2.3 From f2e2c00adcdc59b9fe86c82259abdaf32d0ee6ea Mon Sep 17 00:00:00 2001 From: Chuanxiao Dong Date: Tue, 1 Aug 2017 17:47:26 +0800 Subject: drm/i915/gvt: clean workload queue if error happened If a workload caused a HW GPU hang or it is in the middle of vGPU reset, the workload queue should be cleaned up to emulate the hang state of the GPU. v2: - use ENGINE_MASK(ring_id) instead of (1 << ring_id). (Zhenyu) Signed-off-by: Chuanxiao Dong Cc: Zhenyu Wang Signed-off-by: Zhenyu Wang --- drivers/gpu/drm/i915/gvt/execlist.c | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/gvt/execlist.c b/drivers/gpu/drm/i915/gvt/execlist.c index cac669b2b320..1648887d3f55 100644 --- a/drivers/gpu/drm/i915/gvt/execlist.c +++ b/drivers/gpu/drm/i915/gvt/execlist.c @@ -46,6 +46,8 @@ #define same_context(a, b) (((a)->context_id == (b)->context_id) && \ ((a)->lrca == (b)->lrca)) +static void clean_workloads(struct intel_vgpu *vgpu, unsigned long engine_mask); + static int context_switch_events[] = { [RCS] = RCS_AS_CONTEXT_SWITCH, [BCS] = BCS_AS_CONTEXT_SWITCH, @@ -512,8 +514,23 @@ static int complete_execlist_workload(struct intel_vgpu_workload *workload) release_shadow_batch_buffer(workload); release_shadow_wa_ctx(&workload->wa_ctx); - if (workload->status || (vgpu->resetting_eng & ENGINE_MASK(ring_id))) + if (workload->status || (vgpu->resetting_eng & ENGINE_MASK(ring_id))) { + /* if workload->status is not successful means HW GPU + * has occurred GPU hang or something wrong with i915/GVT, + * and GVT won't inject context switch interrupt to guest. + * So this error is a vGPU hang actually to the guest. + * According to this we should emunlate a vGPU hang. If + * there are pending workloads which are already submitted + * from guest, we should clean them up like HW GPU does. + * + * if it is in middle of engine resetting, the pending + * workloads won't be submitted to HW GPU and will be + * cleaned up during the resetting process later, so doing + * the workload clean up here doesn't have any impact. + **/ + clean_workloads(vgpu, ENGINE_MASK(ring_id)); goto out; + } if (!list_empty(workload_q_head(vgpu, ring_id))) { struct execlist_ctx_descriptor_format *this_desc, *next_desc; -- cgit v1.2.3 From e43c9f23efadade684773a855675c99da278c862 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Tue, 1 Aug 2017 12:11:06 -0700 Subject: b44: Initialize 64-bit stats seqcount On 32-bit hosts and with CONFIG_DEBUG_LOCK_ALLOC we should be seeing a lockdep splat indicating this seqcount is not correctly initialized, fix that. Fixes: eeda8585522b ("b44: add 64 bit stats") Signed-off-by: Florian Fainelli Acked-by: Michael Chan Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/b44.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/broadcom/b44.c b/drivers/net/ethernet/broadcom/b44.c index f411936b744c..a1125d10c825 100644 --- a/drivers/net/ethernet/broadcom/b44.c +++ b/drivers/net/ethernet/broadcom/b44.c @@ -2368,6 +2368,7 @@ static int b44_init_one(struct ssb_device *sdev, bp->msg_enable = netif_msg_init(b44_debug, B44_DEF_MSG_ENABLE); spin_lock_init(&bp->lock); + u64_stats_init(&bp->hw_stats.syncp); bp->rx_pending = B44_DEF_RX_RING_PENDING; bp->tx_pending = B44_DEF_TX_RING_PENDING; -- cgit v1.2.3 From 7d6d067790289e4f61f59fa60550ca5918aa25bd Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Tue, 1 Aug 2017 12:11:07 -0700 Subject: i40e: Initialize 64-bit statistics TX ring seqcount On 32-bit hosts and with CONFIG_DEBUG_LOCK_ALLOC we should be seeing a lockdep splat indicating this seqcount is not correctly initialized, fix that. Fixes: 980e9b118642 ("i40e: Add support for 64 bit netstats") Signed-off-by: Florian Fainelli Signed-off-by: David S. Miller --- drivers/net/ethernet/intel/i40e/i40e_txrx.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.c b/drivers/net/ethernet/intel/i40e/i40e_txrx.c index b936febc315a..2194960d5855 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_txrx.c +++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.c @@ -1113,6 +1113,8 @@ int i40e_setup_tx_descriptors(struct i40e_ring *tx_ring) if (!tx_ring->tx_bi) goto err; + u64_stats_init(&tx_ring->syncp); + /* round up to nearest 4K */ tx_ring->size = tx_ring->count * sizeof(struct i40e_tx_desc); /* add u32 for head writeback, align after this takes care of -- cgit v1.2.3 From 7c3a4626eb65e78ebe208f48ffa21a5002f7f38e Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Tue, 1 Aug 2017 12:11:08 -0700 Subject: ixgbe: Initialize 64-bit stats seqcounts On 32-bit hosts and with CONFIG_DEBUG_LOCK_ALLOC we should be seeing a lockdep splat indicating this seqcount is not correctly initialized, fix that. Fixes: 4197aa7bb818 ("ixgbevf: provide 64 bit statistics") Signed-off-by: Florian Fainelli Signed-off-by: David S. Miller --- drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c index 084c53582793..032f8ac06357 100644 --- a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c +++ b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c @@ -2988,6 +2988,8 @@ int ixgbevf_setup_tx_resources(struct ixgbevf_ring *tx_ring) if (!tx_ring->tx_buffer_info) goto err; + u64_stats_init(&tx_ring->syncp); + /* round up to nearest 4K */ tx_ring->size = tx_ring->count * sizeof(union ixgbe_adv_tx_desc); tx_ring->size = ALIGN(tx_ring->size, 4096); @@ -3046,6 +3048,8 @@ int ixgbevf_setup_rx_resources(struct ixgbevf_ring *rx_ring) if (!rx_ring->rx_buffer_info) goto err; + u64_stats_init(&rx_ring->syncp); + /* Round up to nearest 4K */ rx_ring->size = rx_ring->count * sizeof(union ixgbe_adv_rx_desc); rx_ring->size = ALIGN(rx_ring->size, 4096); -- cgit v1.2.3 From 7ceb781a1aa0356086e2bdc76bcd953fcb7ac377 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Tue, 1 Aug 2017 12:11:09 -0700 Subject: nfp: Initialize RX and TX ring 64-bit stats seqcounts On 32-bit hosts and with CONFIG_DEBUG_LOCK_ALLOC we should be seeing a lockdep splat indicating this seqcount is not correctly initialized, fix that. Fixes: 4c3523623dc0 ("net: add driver for Netronome NFP4000/NFP6000 NIC VFs") Signed-off-by: Florian Fainelli Reviewed-by: Simon Horman Signed-off-by: David S. Miller --- drivers/net/ethernet/netronome/nfp/nfp_net_common.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c index 18750ff0ede6..4631ca8b8eb2 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c +++ b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c @@ -513,6 +513,7 @@ nfp_net_tx_ring_init(struct nfp_net_tx_ring *tx_ring, tx_ring->idx = idx; tx_ring->r_vec = r_vec; tx_ring->is_xdp = is_xdp; + u64_stats_init(&tx_ring->r_vec->tx_sync); tx_ring->qcidx = tx_ring->idx * nn->stride_tx; tx_ring->qcp_q = nn->tx_bar + NFP_QCP_QUEUE_OFF(tx_ring->qcidx); @@ -532,6 +533,7 @@ nfp_net_rx_ring_init(struct nfp_net_rx_ring *rx_ring, rx_ring->idx = idx; rx_ring->r_vec = r_vec; + u64_stats_init(&rx_ring->r_vec->rx_sync); rx_ring->fl_qcidx = rx_ring->idx * nn->stride_rx; rx_ring->qcp_fl = nn->rx_bar + NFP_QCP_QUEUE_OFF(rx_ring->fl_qcidx); -- cgit v1.2.3 From 790cb2ebb3f9c5d26a320117d5d13cafe479484d Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Tue, 1 Aug 2017 12:11:10 -0700 Subject: gtp: Initialize 64-bit per-cpu stats correctly On 32-bit hosts and with CONFIG_DEBUG_LOCK_ALLOC we should be seeing a lockdep splat indicating this seqcount is not correctly initialized, fix that by using netdev_alloc_pcpu_stats() instead of an open coded allocation. Fixes: 459aa660eb1d ("gtp: add initial driver for datapath of GPRS Tunneling Protocol (GTP-U)") Signed-off-by: Florian Fainelli Signed-off-by: David S. Miller --- drivers/net/gtp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/gtp.c b/drivers/net/gtp.c index 1542e837fdfa..f38e32a7ec9c 100644 --- a/drivers/net/gtp.c +++ b/drivers/net/gtp.c @@ -364,7 +364,7 @@ static int gtp_dev_init(struct net_device *dev) gtp->dev = dev; - dev->tstats = alloc_percpu(struct pcpu_sw_netstats); + dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats); if (!dev->tstats) return -ENOMEM; -- cgit v1.2.3 From 4a0dee1ffe0e8f4101e704a325e97f8997b0abcc Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Tue, 1 Aug 2017 12:11:12 -0700 Subject: netvsc: Initialize 64-bit stats seqcount On 32-bit hosts and with CONFIG_DEBUG_LOCK_ALLOC we should be seeing a lockdep splat indicating this seqcount is not correctly initialized, fix that. In commit 6c80f3fc2398 ("netvsc: report per-channel stats in ethtool statistics") netdev_alloc_pcpu_stats() was removed in favor of open-coding the 64-bits statistics, except that u64_stats_init() was missed. Fixes: 6c80f3fc2398 ("netvsc: report per-channel stats in ethtool statistics") Signed-off-by: Florian Fainelli Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- drivers/net/hyperv/netvsc.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/hyperv/netvsc.c b/drivers/net/hyperv/netvsc.c index 0a9167dd72fb..96f90c75d1b7 100644 --- a/drivers/net/hyperv/netvsc.c +++ b/drivers/net/hyperv/netvsc.c @@ -1302,6 +1302,8 @@ int netvsc_device_add(struct hv_device *device, struct netvsc_channel *nvchan = &net_device->chan_table[i]; nvchan->channel = device->channel; + u64_stats_init(&nvchan->tx_stats.syncp); + u64_stats_init(&nvchan->rx_stats.syncp); } /* Enable NAPI handler before init callbacks */ -- cgit v1.2.3 From 87173cd6cf242f2340db187d82bd47a48fe5e5fe Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Tue, 1 Aug 2017 12:11:13 -0700 Subject: ipvlan: Fix 64-bit statistics seqcount initialization On 32-bit hosts and with CONFIG_DEBUG_LOCK_ALLOC we should be seeing a lockdep splat indicating this seqcount is not correctly initialized, fix that by using the proper helper function: netdev_alloc_pcpu_stats(). Fixes: 2ad7bf363841 ("ipvlan: Initial check-in of the IPVLAN driver.") Signed-off-by: Florian Fainelli Signed-off-by: David S. Miller --- drivers/net/ipvlan/ipvlan_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ipvlan/ipvlan_main.c b/drivers/net/ipvlan/ipvlan_main.c index f37e3c1fd4e7..8dab74a81303 100644 --- a/drivers/net/ipvlan/ipvlan_main.c +++ b/drivers/net/ipvlan/ipvlan_main.c @@ -192,7 +192,7 @@ static int ipvlan_init(struct net_device *dev) netdev_lockdep_set_classes(dev); - ipvlan->pcpu_stats = alloc_percpu(struct ipvl_pcpu_stats); + ipvlan->pcpu_stats = netdev_alloc_pcpu_stats(struct ipvl_pcpu_stats); if (!ipvlan->pcpu_stats) return -ENOMEM; -- cgit v1.2.3 From f7f8c1756e9a5f1258a7cc6b663f8451b724900f Mon Sep 17 00:00:00 2001 From: Miquel Raynal Date: Wed, 5 Jul 2017 08:51:09 +0200 Subject: nand: fix wrong default oob layout for small pages using soft ecc When using soft ecc, if no ooblayout is given, the core automatically uses one of the nand_ooblayout_{sp,lp}*() functions to determine the layout inside the out of band data. Until kernel version 4.6, struct nand_ecclayout was used for that purpose. During the migration from 4.6 to 4.7, an error shown up in the small page layout, in the case oob section is only 8 bytes long. The layout was using three bytes (0, 1, 2) for ecc, two bytes (3, 4) as free bytes, one byte (5) for bad block marker and finally two bytes (6, 7) as free bytes, as shown there: [linux-4.6] drivers/mtd/nand/nand_base.c:52 static struct nand_ecclayout nand_oob_8 = { .eccbytes = 3, .eccpos = {0, 1, 2}, .oobfree = { {.offset = 3, .length = 2}, {.offset = 6, .length = 2} } }; This fixes the current implementation which is incoherent. It references bit 3 at the same time as an ecc byte and a free byte. Furthermore, it is clear with the previous implementation that there is only one ecc section with 8 bytes oob sections. We shall return -ERANGE in the nand_ooblayout_ecc_sp() function when asked for the second section. Signed-off-by: Miquel Raynal Fixes: 41b207a70d3a ("mtd: nand: implement the default mtd_ooblayout_ops") Cc: Signed-off-by: Boris Brezillon --- drivers/mtd/nand/nand_base.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/mtd/nand/nand_base.c b/drivers/mtd/nand/nand_base.c index 5fa5ddc94834..7b3826b42447 100644 --- a/drivers/mtd/nand/nand_base.c +++ b/drivers/mtd/nand/nand_base.c @@ -65,8 +65,14 @@ static int nand_ooblayout_ecc_sp(struct mtd_info *mtd, int section, if (!section) { oobregion->offset = 0; - oobregion->length = 4; + if (mtd->oobsize == 16) + oobregion->length = 4; + else + oobregion->length = 3; } else { + if (mtd->oobsize == 8) + return -ERANGE; + oobregion->offset = 6; oobregion->length = ecc->total - 4; } -- cgit v1.2.3 From 791eccd94965a8029ae09c5530bcb9a76794e408 Mon Sep 17 00:00:00 2001 From: Bryan O'Donoghue Date: Fri, 28 Jul 2017 14:22:57 +0100 Subject: mtd: nand: sunxi: fix potential divide-by-zero error clk_round_rate() can return <= 0. Currently the value returned by clk_round_rate() is used directly for a division. This patch introduces a guard to ensure a divide-by-zero or a divide by a negative number for that matter can't happen by bugging out returning -EINVAL if clk_round_rate() returns <= 0. Fixes: 2d43457f79e4 ("mtd: nand: sunxi: fix EDO mode selection") Signed-off-by: Bryan O'Donoghue Signed-off-by: Boris Brezillon --- drivers/mtd/nand/sunxi_nand.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/mtd/nand/sunxi_nand.c b/drivers/mtd/nand/sunxi_nand.c index d0b6f8f9f297..6abd142b1324 100644 --- a/drivers/mtd/nand/sunxi_nand.c +++ b/drivers/mtd/nand/sunxi_nand.c @@ -1728,6 +1728,10 @@ static int sunxi_nfc_setup_data_interface(struct mtd_info *mtd, int csline, */ chip->clk_rate = NSEC_PER_SEC / min_clk_period; real_clk_rate = clk_round_rate(nfc->mod_clk, chip->clk_rate); + if (real_clk_rate <= 0) { + dev_err(nfc->dev, "Unable to round clk %lu\n", chip->clk_rate); + return -EINVAL; + } /* * ONFI specification 3.1, paragraph 4.15.2 dictates that EDO data -- cgit v1.2.3 From cb25fae18207330caac848d850e11f9cdb500dbf Mon Sep 17 00:00:00 2001 From: Jonathan Corbet Date: Sun, 30 Jul 2017 16:18:03 -0600 Subject: mtd: nand: Fix a docs build warning Commit 0b4773fd1649 (mtd: nand: Drop unused cached programming support) removed the "cached" parameter from nand_write_page(), but did not update the kerneldoc comments, creating this docs build warning: ./drivers/mtd/nand/nand_base.c:2751: warning: Excess function parameter 'cached' description in 'nand_write_page' Remove the offending line so we can have a little peace and quiet. Signed-off-by: Jonathan Corbet Signed-off-by: Boris Brezillon --- drivers/mtd/nand/nand_base.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/mtd/nand/nand_base.c b/drivers/mtd/nand/nand_base.c index 7b3826b42447..5b7c8d05360f 100644 --- a/drivers/mtd/nand/nand_base.c +++ b/drivers/mtd/nand/nand_base.c @@ -2747,7 +2747,6 @@ static int nand_write_page_syndrome(struct mtd_info *mtd, * @buf: the data to write * @oob_required: must write chip->oob_poi to OOB * @page: page number to write - * @cached: cached programming * @raw: use _raw version of write_page */ static int nand_write_page(struct mtd_info *mtd, struct nand_chip *chip, -- cgit v1.2.3 From a11bf5ed951f8900d244d09eb03a888b59c7fc82 Mon Sep 17 00:00:00 2001 From: Boris Brezillon Date: Mon, 31 Jul 2017 10:29:56 +0200 Subject: mtd: nand: Fix timing setup for NANDs that do not support SET FEATURES Some ONFI NANDs do not support the SET/GET FEATURES commands, which, according to the spec, is perfectly valid. On these NANDs we can't set a specific timing mode using the "timing mode" feature, and we should assume the NAND does not require any setup to enter a specific timing mode. Signed-off-by: Boris Brezillon Fixes: d8e725dd8311 ("mtd: nand: automate NAND timings selection") Reported-by: Alexander Dahl Cc: Tested-by: Alexander Dahl Signed-off-by: Boris Brezillon --- drivers/mtd/nand/nand_base.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/mtd/nand/nand_base.c b/drivers/mtd/nand/nand_base.c index 5b7c8d05360f..c6c18b82f8f4 100644 --- a/drivers/mtd/nand/nand_base.c +++ b/drivers/mtd/nand/nand_base.c @@ -1131,7 +1131,9 @@ static int nand_setup_data_interface(struct nand_chip *chip, int chipnr) * Ensure the timing mode has been changed on the chip side * before changing timings on the controller side. */ - if (chip->onfi_version) { + if (chip->onfi_version && + (le16_to_cpu(chip->onfi_params.opt_cmd) & + ONFI_OPT_CMD_SET_GET_FEATURES)) { u8 tmode_param[ONFI_SUBFEATURE_PARAM_LEN] = { chip->onfi_timing_mode_default, }; -- cgit v1.2.3 From 6d29231000bbe0fb9e4893a9c68151ffdd3b5469 Mon Sep 17 00:00:00 2001 From: Boris Brezillon Date: Mon, 31 Jul 2017 10:31:27 +0200 Subject: mtd: nand: Declare tBERS, tR and tPROG as u64 to avoid integer overflow All timings in nand_sdr_timings are expressed in picoseconds but some of them may not fit in an u32. Signed-off-by: Boris Brezillon Fixes: 204e7ecd47e2 ("mtd: nand: Add a few more timings to nand_sdr_timings") Reported-by: Alexander Dahl Cc: Reviewed-by: Alexander Dahl Tested-by: Alexander Dahl Signed-off-by: Boris Brezillon --- drivers/mtd/nand/nand_timings.c | 6 +++--- include/linux/mtd/nand.h | 6 +++--- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/mtd/nand/nand_timings.c b/drivers/mtd/nand/nand_timings.c index f06312df3669..7e36d7d13c26 100644 --- a/drivers/mtd/nand/nand_timings.c +++ b/drivers/mtd/nand/nand_timings.c @@ -311,9 +311,9 @@ int onfi_init_data_interface(struct nand_chip *chip, struct nand_sdr_timings *timings = &iface->timings.sdr; /* microseconds -> picoseconds */ - timings->tPROG_max = 1000000UL * le16_to_cpu(params->t_prog); - timings->tBERS_max = 1000000UL * le16_to_cpu(params->t_bers); - timings->tR_max = 1000000UL * le16_to_cpu(params->t_r); + timings->tPROG_max = 1000000ULL * le16_to_cpu(params->t_prog); + timings->tBERS_max = 1000000ULL * le16_to_cpu(params->t_bers); + timings->tR_max = 1000000ULL * le16_to_cpu(params->t_r); /* nanoseconds -> picoseconds */ timings->tCCS_min = 1000UL * le16_to_cpu(params->t_ccs); diff --git a/include/linux/mtd/nand.h b/include/linux/mtd/nand.h index 892148c448cc..5216d2eb2289 100644 --- a/include/linux/mtd/nand.h +++ b/include/linux/mtd/nand.h @@ -681,10 +681,10 @@ struct nand_buffers { * @tWW_min: WP# transition to WE# low */ struct nand_sdr_timings { - u32 tBERS_max; + u64 tBERS_max; u32 tCCS_min; - u32 tPROG_max; - u32 tR_max; + u64 tPROG_max; + u64 tR_max; u32 tALH_min; u32 tADL_min; u32 tALS_min; -- cgit v1.2.3 From ee02f73e04c0e690600f621a3a1d2245834af7fe Mon Sep 17 00:00:00 2001 From: Boris Brezillon Date: Mon, 31 Jul 2017 10:32:21 +0200 Subject: mtd: nand: atmel: Fix EDO mode check EDO mode should be used when tRC is less than 30ns, but timings are expressed in picoseconds in the nand_sdr_timings struct. Signed-off-by: Boris Brezillon Fixes: f9ce2eddf176 ("mtd: nand: atmel: Add ->setup_data_interface() hooks") Reported-by: Alexander Dahl Tested-by: Alexander Dahl Signed-off-by: Boris Brezillon --- drivers/mtd/nand/atmel/nand-controller.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/mtd/nand/atmel/nand-controller.c b/drivers/mtd/nand/atmel/nand-controller.c index d922a88e407f..2c8baa0c2c4e 100644 --- a/drivers/mtd/nand/atmel/nand-controller.c +++ b/drivers/mtd/nand/atmel/nand-controller.c @@ -1201,7 +1201,7 @@ static int atmel_smc_nand_prepare_smcconf(struct atmel_nand *nand, * tRC < 30ns implies EDO mode. This controller does not support this * mode. */ - if (conf->timings.sdr.tRC_min < 30) + if (conf->timings.sdr.tRC_min < 30000) return -ENOTSUPP; atmel_smc_cs_conf_init(smcconf); -- cgit v1.2.3 From 1ad43c0078b79a76accd0fe64062e47b3430dc6b Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Wed, 2 Aug 2017 08:01:45 +0800 Subject: blk-mq: don't leak preempt counter/q_usage_counter when allocating rq failed When blk_mq_get_request() failed, preempt counter isn't released, and blk_mq_make_request() doesn't release the counter too. This patch fixes the issue, and makes sure that preempt counter is only held if rq is allocated successfully. The same policy is applied on .q_usage_counter too. Signed-off-by: Ming Lei Signed-off-by: Jens Axboe --- block/blk-mq.c | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/block/blk-mq.c b/block/blk-mq.c index 041f7b7fa0d6..211ef367345f 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -301,11 +301,12 @@ static struct request *blk_mq_get_request(struct request_queue *q, struct elevator_queue *e = q->elevator; struct request *rq; unsigned int tag; + struct blk_mq_ctx *local_ctx = NULL; blk_queue_enter_live(q); data->q = q; if (likely(!data->ctx)) - data->ctx = blk_mq_get_ctx(q); + data->ctx = local_ctx = blk_mq_get_ctx(q); if (likely(!data->hctx)) data->hctx = blk_mq_map_queue(q, data->ctx->cpu); if (op & REQ_NOWAIT) @@ -324,6 +325,10 @@ static struct request *blk_mq_get_request(struct request_queue *q, tag = blk_mq_get_tag(data); if (tag == BLK_MQ_TAG_FAIL) { + if (local_ctx) { + blk_mq_put_ctx(local_ctx); + data->ctx = NULL; + } blk_queue_exit(q); return NULL; } @@ -356,12 +361,12 @@ struct request *blk_mq_alloc_request(struct request_queue *q, unsigned int op, rq = blk_mq_get_request(q, NULL, op, &alloc_data); - blk_mq_put_ctx(alloc_data.ctx); - blk_queue_exit(q); - if (!rq) return ERR_PTR(-EWOULDBLOCK); + blk_mq_put_ctx(alloc_data.ctx); + blk_queue_exit(q); + rq->__data_len = 0; rq->__sector = (sector_t) -1; rq->bio = rq->biotail = NULL; @@ -407,11 +412,11 @@ struct request *blk_mq_alloc_request_hctx(struct request_queue *q, rq = blk_mq_get_request(q, NULL, op, &alloc_data); - blk_queue_exit(q); - if (!rq) return ERR_PTR(-EWOULDBLOCK); + blk_queue_exit(q); + return rq; } EXPORT_SYMBOL_GPL(blk_mq_alloc_request_hctx); -- cgit v1.2.3 From d1ce263feb40e6b3208f3e1ebec6dbe86df6f522 Mon Sep 17 00:00:00 2001 From: Robert Richter Date: Wed, 12 Jul 2017 15:25:09 +0200 Subject: irqchip/gic-v3-its: Remove ACPICA version check for ACPI NUMA The version check was added due to dependency to a618c7f89a02 ACPICA: Add support for new SRAT subtable Now, that this code is in the kernel, remove the check. This is esp. useful to enable backports. Signed-off-by: Robert Richter Signed-off-by: Marc Zyngier --- drivers/irqchip/irq-gic-v3-its.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/irqchip/irq-gic-v3-its.c b/drivers/irqchip/irq-gic-v3-its.c index fed99c55e2f4..3bfbf8d96a0e 100644 --- a/drivers/irqchip/irq-gic-v3-its.c +++ b/drivers/irqchip/irq-gic-v3-its.c @@ -1835,7 +1835,7 @@ static int __init its_of_probe(struct device_node *node) #define ACPI_GICV3_ITS_MEM_SIZE (SZ_128K) -#if defined(CONFIG_ACPI_NUMA) && (ACPI_CA_VERSION >= 0x20170531) +#ifdef CONFIG_ACPI_NUMA struct its_srat_map { /* numa node id */ u32 numa_node; -- cgit v1.2.3 From 39a06b67c2c1256bcf2361a1f67d2529f70ab206 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Tue, 18 Jul 2017 18:37:55 +0100 Subject: irqchip/gic: Ensure we have an ISB between ack and ->handle_irq Devices that expose their interrupt status registers via system registers (e.g. Statistical profiling, CPU PMU, DynamIQ PMU, arch timer, vgic (although unused by Linux), ...) rely on a context synchronising operation on the CPU to ensure that the updated status register is visible to the CPU when handling the interrupt. This usually happens as a result of taking the IRQ exception in the first place, but there are two race scenarios where this isn't the case. For example, let's say we have two peripherals (X and Y), where Y uses a system register for its interrupt status. Case 1: 1. CPU takes an IRQ exception as a result of X raising an interrupt 2. Y then raises its interrupt line, but the update to its system register is not yet visible to the CPU 3. The GIC decides to expose Y's interrupt number first in the Ack register 4. The CPU runs the IRQ handler for Y, but the status register is stale Case 2: 1. CPU takes an IRQ exception as a result of X raising an interrupt 2. CPU reads the interrupt number for X from the Ack register and runs its IRQ handler 3. Y raises its interrupt line and the Ack register is updated, but again, the update to its system register is not yet visible to the CPU. 4. Since the GIC drivers poll the Ack register, we read Y's interrupt number and run its handler without a context synchronisation operation, therefore seeing the stale register value. In either case, we run the risk of missing an IRQ. This patch solves the problem by ensuring that we execute an ISB in the GIC drivers prior to invoking the interrupt handler. This is already the case for GICv3 and EOIMode 1 (the usual case for the host). Cc: Marc Zyngier Signed-off-by: Will Deacon Signed-off-by: Marc Zyngier --- drivers/irqchip/irq-gic-v3.c | 2 ++ drivers/irqchip/irq-gic.c | 7 +++++-- 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/drivers/irqchip/irq-gic-v3.c b/drivers/irqchip/irq-gic-v3.c index 5ba64a7584a3..984c3ecfd22c 100644 --- a/drivers/irqchip/irq-gic-v3.c +++ b/drivers/irqchip/irq-gic-v3.c @@ -353,6 +353,8 @@ static asmlinkage void __exception_irq_entry gic_handle_irq(struct pt_regs *regs if (static_key_true(&supports_deactivate)) gic_write_eoir(irqnr); + else + isb(); err = handle_domain_irq(gic_data.domain, irqnr, regs); if (err) { diff --git a/drivers/irqchip/irq-gic.c b/drivers/irqchip/irq-gic.c index 940c16278758..d3e7c43718b8 100644 --- a/drivers/irqchip/irq-gic.c +++ b/drivers/irqchip/irq-gic.c @@ -361,6 +361,7 @@ static void __exception_irq_entry gic_handle_irq(struct pt_regs *regs) if (likely(irqnr > 15 && irqnr < 1020)) { if (static_key_true(&supports_deactivate)) writel_relaxed(irqstat, cpu_base + GIC_CPU_EOI); + isb(); handle_domain_irq(gic->domain, irqnr, regs); continue; } @@ -401,10 +402,12 @@ static void gic_handle_cascade_irq(struct irq_desc *desc) goto out; cascade_irq = irq_find_mapping(chip_data->domain, gic_irq); - if (unlikely(gic_irq < 32 || gic_irq > 1020)) + if (unlikely(gic_irq < 32 || gic_irq > 1020)) { handle_bad_irq(desc); - else + } else { + isb(); generic_handle_irq(cascade_irq); + } out: chained_irq_exit(chip, desc); -- cgit v1.2.3 From 8466489ef5ba48272ba4fa4ea9f8f403306de4c7 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 1 Aug 2017 20:11:08 -0500 Subject: xhci: Reset Renesas uPD72020x USB controller for 32-bit DMA issue The Renesas uPD72020x XHCI controller seems to suffer from a really annoying bug, where it may retain some of its DMA programming across a XHCI reset, and despite the driver correctly programming new DMA addresses. This is visible if the device has been using 64-bit DMA addresses, and is then switched to using 32-bit DMA addresses. The top 32 bits of the address (now zero) are ignored are replaced by the 32 bits from the *previous* programming. Sticking with 64-bit DMA always works, but doesn't seem very appropriate. A PCI reset of the device restores the normal functionality, which is done at probe time. Unfortunately, this has to be done before any quirk has been discovered, hence the intrusive nature of the fix. Tested-by: Ard Biesheuvel Signed-off-by: Marc Zyngier Signed-off-by: Bjorn Helgaas Acked-by: Mathias Nyman CC: stable@vger.kernel.org # v4.11+ --- drivers/usb/host/pci-quirks.c | 20 ++++++++++++++++++++ drivers/usb/host/pci-quirks.h | 1 + drivers/usb/host/xhci-pci.c | 7 +++++++ 3 files changed, 28 insertions(+) diff --git a/drivers/usb/host/pci-quirks.c b/drivers/usb/host/pci-quirks.c index c8989c62a262..858fefd67ebe 100644 --- a/drivers/usb/host/pci-quirks.c +++ b/drivers/usb/host/pci-quirks.c @@ -1150,3 +1150,23 @@ static void quirk_usb_early_handoff(struct pci_dev *pdev) } DECLARE_PCI_FIXUP_CLASS_FINAL(PCI_ANY_ID, PCI_ANY_ID, PCI_CLASS_SERIAL_USB, 8, quirk_usb_early_handoff); + +bool usb_xhci_needs_pci_reset(struct pci_dev *pdev) +{ + /* + * Our dear uPD72020{1,2} friend only partially resets when + * asked to via the XHCI interface, and may end up doing DMA + * at the wrong addresses, as it keeps the top 32bit of some + * addresses from its previous programming under obscure + * circumstances. + * Give it a good wack at probe time. Unfortunately, this + * needs to happen before we've had a chance to discover any + * quirk, or the system will be in a rather bad state. + */ + if (pdev->vendor == PCI_VENDOR_ID_RENESAS && + (pdev->device == 0x0014 || pdev->device == 0x0015)) + return true; + + return false; +} +EXPORT_SYMBOL_GPL(usb_xhci_needs_pci_reset); diff --git a/drivers/usb/host/pci-quirks.h b/drivers/usb/host/pci-quirks.h index 655994480198..5582cbafecd4 100644 --- a/drivers/usb/host/pci-quirks.h +++ b/drivers/usb/host/pci-quirks.h @@ -15,6 +15,7 @@ void usb_asmedia_modifyflowcontrol(struct pci_dev *pdev); void usb_enable_intel_xhci_ports(struct pci_dev *xhci_pdev); void usb_disable_xhci_ports(struct pci_dev *xhci_pdev); void sb800_prefetch(struct device *dev, int on); +bool usb_xhci_needs_pci_reset(struct pci_dev *pdev); #else struct pci_dev; static inline void usb_amd_quirk_pll_disable(void) {} diff --git a/drivers/usb/host/xhci-pci.c b/drivers/usb/host/xhci-pci.c index 5b0fa553c8bc..8071c8fdd15e 100644 --- a/drivers/usb/host/xhci-pci.c +++ b/drivers/usb/host/xhci-pci.c @@ -284,6 +284,13 @@ static int xhci_pci_probe(struct pci_dev *dev, const struct pci_device_id *id) driver = (struct hc_driver *)id->driver_data; + /* For some HW implementation, a XHCI reset is just not enough... */ + if (usb_xhci_needs_pci_reset(dev)) { + dev_info(&dev->dev, "Resetting\n"); + if (pci_reset_function_locked(dev)) + dev_warn(&dev->dev, "Reset failed"); + } + /* Prevent runtime suspending between USB-2 and USB-3 initialization */ pm_runtime_get_noresume(&dev->dev); -- cgit v1.2.3 From fb52c3b597f0b44e893ded8c253845561f9f57da Mon Sep 17 00:00:00 2001 From: Nisar Sayed Date: Tue, 1 Aug 2017 10:24:17 +0000 Subject: lan78xx: USB fast connect/disconnect crash fix USB fast connect/disconnect crash fix When USB plugged/unplugged at fast rate, lan78xx_mdio_init() in lan78xx_bind() failing case is not handled. Whenever lan78xx_mdio_init() failed, dev->mdiobus will be freed, however since lan78xx_bind() not consider as error and try to proceed for further initialization in lan78xx_probe() which leads system hung/crash. Also when register_netdev() failed, netdev is freed without calling lan78xx_unbind(). Hence halting the failed cases right manner fixes the system crash/hung issue. Signed-off-by: Nisar Sayed Signed-off-by: David S. Miller --- drivers/net/usb/lan78xx.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/net/usb/lan78xx.c b/drivers/net/usb/lan78xx.c index 5833f7e2a127..8ef3639649a0 100644 --- a/drivers/net/usb/lan78xx.c +++ b/drivers/net/usb/lan78xx.c @@ -2858,13 +2858,13 @@ static int lan78xx_bind(struct lan78xx_net *dev, struct usb_interface *intf) /* Init all registers */ ret = lan78xx_reset(dev); - lan78xx_mdio_init(dev); + ret = lan78xx_mdio_init(dev); dev->net->flags |= IFF_MULTICAST; pdata->wol = WAKE_MAGIC; - return 0; + return ret; } static void lan78xx_unbind(struct lan78xx_net *dev, struct usb_interface *intf) @@ -3525,11 +3525,11 @@ static int lan78xx_probe(struct usb_interface *intf, udev = interface_to_usbdev(intf); udev = usb_get_dev(udev); - ret = -ENOMEM; netdev = alloc_etherdev(sizeof(struct lan78xx_net)); if (!netdev) { - dev_err(&intf->dev, "Error: OOM\n"); - goto out1; + dev_err(&intf->dev, "Error: OOM\n"); + ret = -ENOMEM; + goto out1; } /* netdev_printk() needs this */ @@ -3610,7 +3610,7 @@ static int lan78xx_probe(struct usb_interface *intf, ret = register_netdev(netdev); if (ret != 0) { netif_err(dev, probe, netdev, "couldn't register the device\n"); - goto out2; + goto out3; } usb_set_intfdata(intf, dev); -- cgit v1.2.3 From 0573f94b1abfb42952275ca67f665dbd720b00d7 Mon Sep 17 00:00:00 2001 From: Nisar Sayed Date: Tue, 1 Aug 2017 10:24:33 +0000 Subject: lan78xx: Fix to handle hard_header_len update Fix to handle hard_header_len update When ifconfig up/down sequence is initiated hard_header_len get updated incrementally for each ifconfig up /down sequence, this leads invalid hard_header_len, moving to lan78xx_bind to have one time update of hard_header_len addresses the issue. Signed-off-by: Nisar Sayed Signed-off-by: David S. Miller --- drivers/net/usb/lan78xx.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/net/usb/lan78xx.c b/drivers/net/usb/lan78xx.c index 8ef3639649a0..b99a7fb09f8e 100644 --- a/drivers/net/usb/lan78xx.c +++ b/drivers/net/usb/lan78xx.c @@ -2367,9 +2367,6 @@ static int lan78xx_reset(struct lan78xx_net *dev) /* Init LTM */ lan78xx_init_ltm(dev); - dev->net->hard_header_len += TX_OVERHEAD; - dev->hard_mtu = dev->net->mtu + dev->net->hard_header_len; - if (dev->udev->speed == USB_SPEED_SUPER) { buf = DEFAULT_BURST_CAP_SIZE / SS_USB_PKT_SIZE; dev->rx_urb_size = DEFAULT_BURST_CAP_SIZE; @@ -2855,6 +2852,9 @@ static int lan78xx_bind(struct lan78xx_net *dev, struct usb_interface *intf) return ret; } + dev->net->hard_header_len += TX_OVERHEAD; + dev->hard_mtu = dev->net->mtu + dev->net->hard_header_len; + /* Init all registers */ ret = lan78xx_reset(dev); -- cgit v1.2.3 From c994f778bb1cca8ebe7a4e528cefec233e93b5cc Mon Sep 17 00:00:00 2001 From: Inbar Karmy Date: Tue, 1 Aug 2017 16:43:43 +0300 Subject: net/mlx4_en: Fix wrong indication of Wake-on-LAN (WoL) support Currently when WoL is supported but disabled, ethtool reports: "Supports Wake-on: d". Fix the indication of Wol support, so that the indication remains "g" all the time if the NIC supports WoL. Tested: As accepted, when NIC supports WoL- ethtool reports: Supports Wake-on: g Wake-on: d when NIC doesn't support WoL- ethtool reports: Supports Wake-on: d Wake-on: d Fixes: 14c07b1358ed ("mlx4: Wake on LAN support") Signed-off-by: Inbar Karmy Signed-off-by: Tariq Toukan Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx4/en_ethtool.c | 15 ++++++++------- drivers/net/ethernet/mellanox/mlx4/fw.c | 4 ++++ drivers/net/ethernet/mellanox/mlx4/fw.h | 1 + drivers/net/ethernet/mellanox/mlx4/main.c | 2 ++ include/linux/mlx4/device.h | 1 + 5 files changed, 16 insertions(+), 7 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c index c751a1d434ad..3d4e4a5d00d1 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c @@ -223,6 +223,7 @@ static void mlx4_en_get_wol(struct net_device *netdev, struct ethtool_wolinfo *wol) { struct mlx4_en_priv *priv = netdev_priv(netdev); + struct mlx4_caps *caps = &priv->mdev->dev->caps; int err = 0; u64 config = 0; u64 mask; @@ -235,24 +236,24 @@ static void mlx4_en_get_wol(struct net_device *netdev, mask = (priv->port == 1) ? MLX4_DEV_CAP_FLAG_WOL_PORT1 : MLX4_DEV_CAP_FLAG_WOL_PORT2; - if (!(priv->mdev->dev->caps.flags & mask)) { + if (!(caps->flags & mask)) { wol->supported = 0; wol->wolopts = 0; return; } + if (caps->wol_port[priv->port]) + wol->supported = WAKE_MAGIC; + else + wol->supported = 0; + err = mlx4_wol_read(priv->mdev->dev, &config, priv->port); if (err) { en_err(priv, "Failed to get WoL information\n"); return; } - if (config & MLX4_EN_WOL_MAGIC) - wol->supported = WAKE_MAGIC; - else - wol->supported = 0; - - if (config & MLX4_EN_WOL_ENABLED) + if ((config & MLX4_EN_WOL_ENABLED) && (config & MLX4_EN_WOL_MAGIC)) wol->wolopts = WAKE_MAGIC; else wol->wolopts = 0; diff --git a/drivers/net/ethernet/mellanox/mlx4/fw.c b/drivers/net/ethernet/mellanox/mlx4/fw.c index 37e84a59e751..c165f16623a9 100644 --- a/drivers/net/ethernet/mellanox/mlx4/fw.c +++ b/drivers/net/ethernet/mellanox/mlx4/fw.c @@ -764,6 +764,7 @@ int mlx4_QUERY_DEV_CAP(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap) #define QUERY_DEV_CAP_CQ_TS_SUPPORT_OFFSET 0x3e #define QUERY_DEV_CAP_MAX_PKEY_OFFSET 0x3f #define QUERY_DEV_CAP_EXT_FLAGS_OFFSET 0x40 +#define QUERY_DEV_CAP_WOL_OFFSET 0x43 #define QUERY_DEV_CAP_FLAGS_OFFSET 0x44 #define QUERY_DEV_CAP_RSVD_UAR_OFFSET 0x48 #define QUERY_DEV_CAP_UAR_SZ_OFFSET 0x49 @@ -920,6 +921,9 @@ int mlx4_QUERY_DEV_CAP(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap) MLX4_GET(ext_flags, outbox, QUERY_DEV_CAP_EXT_FLAGS_OFFSET); MLX4_GET(flags, outbox, QUERY_DEV_CAP_FLAGS_OFFSET); dev_cap->flags = flags | (u64)ext_flags << 32; + MLX4_GET(field, outbox, QUERY_DEV_CAP_WOL_OFFSET); + dev_cap->wol_port[1] = !!(field & 0x20); + dev_cap->wol_port[2] = !!(field & 0x40); MLX4_GET(field, outbox, QUERY_DEV_CAP_RSVD_UAR_OFFSET); dev_cap->reserved_uars = field >> 4; MLX4_GET(field, outbox, QUERY_DEV_CAP_UAR_SZ_OFFSET); diff --git a/drivers/net/ethernet/mellanox/mlx4/fw.h b/drivers/net/ethernet/mellanox/mlx4/fw.h index 5343a0599253..b52ba01aa486 100644 --- a/drivers/net/ethernet/mellanox/mlx4/fw.h +++ b/drivers/net/ethernet/mellanox/mlx4/fw.h @@ -129,6 +129,7 @@ struct mlx4_dev_cap { u32 dmfs_high_rate_qpn_range; struct mlx4_rate_limit_caps rl_caps; struct mlx4_port_cap port_cap[MLX4_MAX_PORTS + 1]; + bool wol_port[MLX4_MAX_PORTS + 1]; }; struct mlx4_func_cap { diff --git a/drivers/net/ethernet/mellanox/mlx4/main.c b/drivers/net/ethernet/mellanox/mlx4/main.c index a27c9c13a36e..09b9bc17bce9 100644 --- a/drivers/net/ethernet/mellanox/mlx4/main.c +++ b/drivers/net/ethernet/mellanox/mlx4/main.c @@ -424,6 +424,8 @@ static int mlx4_dev_cap(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap) dev->caps.stat_rate_support = dev_cap->stat_rate_support; dev->caps.max_gso_sz = dev_cap->max_gso_sz; dev->caps.max_rss_tbl_sz = dev_cap->max_rss_tbl_sz; + dev->caps.wol_port[1] = dev_cap->wol_port[1]; + dev->caps.wol_port[2] = dev_cap->wol_port[2]; /* Save uar page shift */ if (!mlx4_is_slave(dev)) { diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h index aad5d81dfb44..b54517c05e9a 100644 --- a/include/linux/mlx4/device.h +++ b/include/linux/mlx4/device.h @@ -620,6 +620,7 @@ struct mlx4_caps { u32 dmfs_high_rate_qpn_base; u32 dmfs_high_rate_qpn_range; u32 vf_caps; + bool wol_port[MLX4_MAX_PORTS + 1]; struct mlx4_rate_limit_caps rl_caps; }; -- cgit v1.2.3 From 5886259c127026ee4a6a7093d6c320440d833fda Mon Sep 17 00:00:00 2001 From: Jack Morgenstein Date: Tue, 1 Aug 2017 16:43:44 +0300 Subject: net/mlx4_core: Fix sl_to_vl_change bit offset in flags2 dump The index value in function dump_dev_cap_flags2() for outputting "sl to vl mapping table change event support" needs to be consistent with the value of the enumerated constant MLX4_DEV_CAP_FLAG2_SL_TO_VL_CHANGE_EVENT defined in file include/linux/mlx4_device.h The change here restores that consistency. Fixes: fd10ed8e6f42 ("IB/mlx4: Fix possible vl/sl field mismatch in LRH header in QP1 packets") Reported-by: Mukesh Kacker Signed-off-by: Jack Morgenstein Signed-off-by: Tariq Toukan Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx4/fw.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx4/fw.c b/drivers/net/ethernet/mellanox/mlx4/fw.c index c165f16623a9..009dd03466d6 100644 --- a/drivers/net/ethernet/mellanox/mlx4/fw.c +++ b/drivers/net/ethernet/mellanox/mlx4/fw.c @@ -160,7 +160,7 @@ static void dump_dev_cap_flags2(struct mlx4_dev *dev, u64 flags) [33] = "RoCEv2 support", [34] = "DMFS Sniffer support (UC & MC)", [35] = "QinQ VST mode support", - [36] = "sl to vl mapping table change event support" + [37] = "sl to vl mapping table change event support", }; int i; -- cgit v1.2.3 From f9fb9d0b8519a795eeaca5108205593b66068cce Mon Sep 17 00:00:00 2001 From: Jack Morgenstein Date: Tue, 1 Aug 2017 16:43:45 +0300 Subject: net/mlx4_core: Fix namespace misalignment in QinQ VST support commit The cited commit introduced the following new enum value in file include/linux/mlx4/device.h: MLX4_DEV_CAP_FLAG2_SVLAN_BY_QP However the value of MLX4_DEV_CAP_FLAG2_SVLAN_BY_QP needs to stay consistent with the value used in another namespace in function dump_dev_cap_flags2(), which is manually kept in sync. The change here restores that consistency. Fixes: 7c3d21c8153c ("net/mlx4_core: Preparation for VF vlan protocol 802.1ad") Reported-by: Mukesh Kacker Signed-off-by: Jack Morgenstein Signed-off-by: Tariq Toukan Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx4/fw.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx4/fw.c b/drivers/net/ethernet/mellanox/mlx4/fw.c index 009dd03466d6..7c9502bae1cc 100644 --- a/drivers/net/ethernet/mellanox/mlx4/fw.c +++ b/drivers/net/ethernet/mellanox/mlx4/fw.c @@ -159,7 +159,7 @@ static void dump_dev_cap_flags2(struct mlx4_dev *dev, u64 flags) [32] = "Loopback source checks support", [33] = "RoCEv2 support", [34] = "DMFS Sniffer support (UC & MC)", - [35] = "QinQ VST mode support", + [36] = "QinQ VST mode support", [37] = "sl to vl mapping table change event support", }; int i; -- cgit v1.2.3 From bff0c6840c135c14675404ba697aae2638c37cb6 Mon Sep 17 00:00:00 2001 From: Jack Morgenstein Date: Tue, 1 Aug 2017 16:43:46 +0300 Subject: net/mlx4_core: Fixes missing capability bit in flags2 capability dump The cited commit introduced the following new enum value in file include/linux/mlx4/device.h: QUERY_DEV_CAP_DIAG_RPRT_PER_PORT However, it failed to introduce a corresponding entry in function dump_dev_cap_flags2() for outputting a line in the message log when this capability bit is set. The change here fixes that omission. Fixes: c7c122ed67e4 ("net/mlx4: Add diagnostic counters capability bit") Reported-by: Mukesh Kacker Signed-off-by: Jack Morgenstein Signed-off-by: Tariq Toukan Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx4/fw.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/mellanox/mlx4/fw.c b/drivers/net/ethernet/mellanox/mlx4/fw.c index 7c9502bae1cc..041c0ed65929 100644 --- a/drivers/net/ethernet/mellanox/mlx4/fw.c +++ b/drivers/net/ethernet/mellanox/mlx4/fw.c @@ -159,6 +159,7 @@ static void dump_dev_cap_flags2(struct mlx4_dev *dev, u64 flags) [32] = "Loopback source checks support", [33] = "RoCEv2 support", [34] = "DMFS Sniffer support (UC & MC)", + [35] = "Diag counters per port", [36] = "QinQ VST mode support", [37] = "sl to vl mapping table change event support", }; -- cgit v1.2.3 From bed9ff165960921303a100228585f2d1691b42eb Mon Sep 17 00:00:00 2001 From: Hector Martin Date: Wed, 2 Aug 2017 00:45:44 +0900 Subject: usb: qmi_wwan: add D-Link DWM-222 device ID Signed-off-by: Hector Martin Signed-off-by: David S. Miller --- drivers/net/usb/qmi_wwan.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/usb/qmi_wwan.c b/drivers/net/usb/qmi_wwan.c index 5894e3c9468f..ff6f39fe6c00 100644 --- a/drivers/net/usb/qmi_wwan.c +++ b/drivers/net/usb/qmi_wwan.c @@ -1175,6 +1175,7 @@ static const struct usb_device_id products[] = { {QMI_FIXED_INTF(0x19d2, 0x1428, 2)}, /* Telewell TW-LTE 4G v2 */ {QMI_FIXED_INTF(0x19d2, 0x2002, 4)}, /* ZTE (Vodafone) K3765-Z */ {QMI_FIXED_INTF(0x2001, 0x7e19, 4)}, /* D-Link DWM-221 B1 */ + {QMI_FIXED_INTF(0x2001, 0x7e35, 4)}, /* D-Link DWM-222 */ {QMI_FIXED_INTF(0x0f3d, 0x68a2, 8)}, /* Sierra Wireless MC7700 */ {QMI_FIXED_INTF(0x114f, 0x68a2, 8)}, /* Sierra Wireless MC7750 */ {QMI_FIXED_INTF(0x1199, 0x68a2, 8)}, /* Sierra Wireless MC7710 in QMI mode */ -- cgit v1.2.3 From 4d96f12a072c669d48dc3a2c6b539a9faeca138d Mon Sep 17 00:00:00 2001 From: Thomas Falcon Date: Tue, 1 Aug 2017 15:04:36 -0500 Subject: ibmvnic: Initialize SCRQ's during login renegotiation SCRQ resources are freed during renegotiation, but they are not re-allocated afterwards due to some changes in the initialization process. Fix that by re-allocating the memory after renegotation. SCRQ's can also be freed if a server capabilities request fails. If this were encountered during a device reset for example, SCRQ's may not be re-allocated. This operation is not necessary anymore so remove it. Signed-off-by: Thomas Falcon Signed-off-by: David S. Miller --- drivers/net/ethernet/ibm/ibmvnic.c | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c index a3e694679635..c45e8e3b82d3 100644 --- a/drivers/net/ethernet/ibm/ibmvnic.c +++ b/drivers/net/ethernet/ibm/ibmvnic.c @@ -111,6 +111,7 @@ static void send_request_map(struct ibmvnic_adapter *, dma_addr_t, __be32, u8); static void send_request_unmap(struct ibmvnic_adapter *, u8); static void send_login(struct ibmvnic_adapter *adapter); static void send_cap_queries(struct ibmvnic_adapter *adapter); +static int init_sub_crqs(struct ibmvnic_adapter *); static int init_sub_crq_irqs(struct ibmvnic_adapter *adapter); static int ibmvnic_init(struct ibmvnic_adapter *); static void release_crq_queue(struct ibmvnic_adapter *); @@ -651,6 +652,7 @@ static int ibmvnic_login(struct net_device *netdev) struct ibmvnic_adapter *adapter = netdev_priv(netdev); unsigned long timeout = msecs_to_jiffies(30000); struct device *dev = &adapter->vdev->dev; + int rc; do { if (adapter->renegotiate) { @@ -664,6 +666,18 @@ static int ibmvnic_login(struct net_device *netdev) dev_err(dev, "Capabilities query timeout\n"); return -1; } + rc = init_sub_crqs(adapter); + if (rc) { + dev_err(dev, + "Initialization of SCRQ's failed\n"); + return -1; + } + rc = init_sub_crq_irqs(adapter); + if (rc) { + dev_err(dev, + "Initialization of SCRQ's irqs failed\n"); + return -1; + } } reinit_completion(&adapter->init_done); @@ -3004,7 +3018,6 @@ static void handle_request_cap_rsp(union ibmvnic_crq *crq, *req_value, (long int)be64_to_cpu(crq->request_capability_rsp. number), name); - release_sub_crqs(adapter); *req_value = be64_to_cpu(crq->request_capability_rsp.number); ibmvnic_send_req_caps(adapter, 1); return; -- cgit v1.2.3 From ed254971edea92c3ac5c67c6a05247a92aa6075e Mon Sep 17 00:00:00 2001 From: Yuchung Cheng Date: Tue, 1 Aug 2017 13:22:32 -0700 Subject: tcp: avoid setting cwnd to invalid ssthresh after cwnd reduction states If the sender switches the congestion control during ECN-triggered cwnd-reduction state (CA_CWR), upon exiting recovery cwnd is set to the ssthresh value calculated by the previous congestion control. If the previous congestion control is BBR that always keep ssthresh to TCP_INIFINITE_SSTHRESH, cwnd ends up being infinite. The safe step is to avoid assigning invalid ssthresh value when recovery ends. Signed-off-by: Yuchung Cheng Signed-off-by: Neal Cardwell Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/tcp_input.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 2920e0cb09f8..dad026fcfd09 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -2520,8 +2520,8 @@ static inline void tcp_end_cwnd_reduction(struct sock *sk) return; /* Reset cwnd to ssthresh in CWR or Recovery (unless it's undone) */ - if (inet_csk(sk)->icsk_ca_state == TCP_CA_CWR || - (tp->undo_marker && tp->snd_ssthresh < TCP_INFINITE_SSTHRESH)) { + if (tp->snd_ssthresh < TCP_INFINITE_SSTHRESH && + (inet_csk(sk)->icsk_ca_state == TCP_CA_CWR || tp->undo_marker)) { tp->snd_cwnd = tp->snd_ssthresh; tp->snd_cwnd_stamp = tcp_jiffies32; } -- cgit v1.2.3 From 42ef3bedf30020a3b5298c2b918ccd7b6e1794d3 Mon Sep 17 00:00:00 2001 From: Antoine Ténart Date: Wed, 19 Jul 2017 11:02:30 +0200 Subject: crypto: inside-secure - fix invalidation check in hmac_sha1_setkey The safexcel_hmac_sha1_setkey function checks if an invalidation command should be issued, i.e. when the context ipad/opad change. This checks is done after filling the ipad/opad which and it can't be true. The patch fixes this by moving the check before the ipad/opad memcpy operations. Fixes: 1b44c5a60c13 ("crypto: inside-secure - add SafeXcel EIP197 crypto engine driver") Signed-off-by: Antoine Tenart Signed-off-by: Herbert Xu --- drivers/crypto/inside-secure/safexcel_hash.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/crypto/inside-secure/safexcel_hash.c b/drivers/crypto/inside-secure/safexcel_hash.c index 8527a5899a2f..a11b2edb41b9 100644 --- a/drivers/crypto/inside-secure/safexcel_hash.c +++ b/drivers/crypto/inside-secure/safexcel_hash.c @@ -883,9 +883,6 @@ static int safexcel_hmac_sha1_setkey(struct crypto_ahash *tfm, const u8 *key, if (ret) return ret; - memcpy(ctx->ipad, &istate.state, SHA1_DIGEST_SIZE); - memcpy(ctx->opad, &ostate.state, SHA1_DIGEST_SIZE); - for (i = 0; i < ARRAY_SIZE(istate.state); i++) { if (ctx->ipad[i] != le32_to_cpu(istate.state[i]) || ctx->opad[i] != le32_to_cpu(ostate.state[i])) { @@ -894,6 +891,9 @@ static int safexcel_hmac_sha1_setkey(struct crypto_ahash *tfm, const u8 *key, } } + memcpy(ctx->ipad, &istate.state, SHA1_DIGEST_SIZE); + memcpy(ctx->opad, &ostate.state, SHA1_DIGEST_SIZE); + return 0; } -- cgit v1.2.3 From 60c4081ec4195389f4fa1fce83eaad5e4b948748 Mon Sep 17 00:00:00 2001 From: Antoine Ténart Date: Wed, 19 Jul 2017 11:02:31 +0200 Subject: crypto: inside-secure - fix the sha state length in hmac_sha1_setkey A check is performed on the ipad/opad in the safexcel_hmac_sha1_setkey function, but the index used by the loop doing it is wrong. It is currently the size of the state array while it should be the size of a sha1 state. This patch fixes it. Fixes: 1b44c5a60c13 ("crypto: inside-secure - add SafeXcel EIP197 crypto engine driver") Reported-by: Dan Carpenter Signed-off-by: Antoine Tenart Signed-off-by: Herbert Xu --- drivers/crypto/inside-secure/safexcel_hash.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/crypto/inside-secure/safexcel_hash.c b/drivers/crypto/inside-secure/safexcel_hash.c index a11b2edb41b9..3f819399cd95 100644 --- a/drivers/crypto/inside-secure/safexcel_hash.c +++ b/drivers/crypto/inside-secure/safexcel_hash.c @@ -883,7 +883,7 @@ static int safexcel_hmac_sha1_setkey(struct crypto_ahash *tfm, const u8 *key, if (ret) return ret; - for (i = 0; i < ARRAY_SIZE(istate.state); i++) { + for (i = 0; i < SHA1_DIGEST_SIZE / sizeof(u32); i++) { if (ctx->ipad[i] != le32_to_cpu(istate.state[i]) || ctx->opad[i] != le32_to_cpu(ostate.state[i])) { ctx->base.needs_inv = true; -- cgit v1.2.3 From 2d80bd3f7eb69204cd5dec4fa7fe7e12cbfaed13 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Tue, 4 Jul 2017 15:58:39 +0300 Subject: pinctrl: cherryview: Add Setzer models to the Chromebook DMI quirk Add one more model to the Chromebook DMI quirk to make it working again. Link: https://bugzilla.kernel.org/show_bug.cgi?id=194945 Fixes: 2a8209fa6823 ("pinctrl: cherryview: Extend the Chromebook DMI quirk to Intel_Strago systems") Reported-by: mail@abhishek.geek.nz Signed-off-by: Andy Shevchenko Acked-by: Mika Westerberg Signed-off-by: Linus Walleij --- drivers/pinctrl/intel/pinctrl-cherryview.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/pinctrl/intel/pinctrl-cherryview.c b/drivers/pinctrl/intel/pinctrl-cherryview.c index 20f1b4493994..04e929fd0ffe 100644 --- a/drivers/pinctrl/intel/pinctrl-cherryview.c +++ b/drivers/pinctrl/intel/pinctrl-cherryview.c @@ -1547,6 +1547,13 @@ static const struct dmi_system_id chv_no_valid_mask[] = { DMI_MATCH(DMI_PRODUCT_FAMILY, "Intel_Strago"), }, }, + { + .ident = "HP Chromebook 11 G5 (Setzer)", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "HP"), + DMI_MATCH(DMI_PRODUCT_NAME, "Setzer"), + }, + }, { .ident = "Acer Chromebook R11 (Cyan)", .matches = { -- cgit v1.2.3 From 40d829fb2ec636b6b4b0cc95e2546ab9aca04cc9 Mon Sep 17 00:00:00 2001 From: Manu Gautam Date: Wed, 19 Jul 2017 17:07:10 +0530 Subject: usb: dwc3: gadget: Correct ISOC DATA PIDs for short packets The PIDs for Isochronous data transfers are incorrect for high bandwidth IN endpoints when the request length is less than EP wMaxPacketSize. As per spec correct PIDs for ISOC data transfers are: 1) For request length <= maxpacket - DATA0, 2) For maxpacket < length <= (2 * maxpacket) - DATA1, DATA0 3) For (2 * maxpacket) < length <= (3 * maxpacket) - DATA2, DATA1, DATA0. But driver always sets PCM fields based on wMaxPacketSize due to which DATA2 happens even for small requests. Fix this by setting the PCM field of trb->size depending on request length rather than fixing it to the value depending on wMaxPacketSize. Ideally it shouldn't give any issues as dwc3 will send 0-length packet for next IN token if host sends (even after receiving a short packet). Windows seems to ignore this but with MacOS frame loss observed when using f_uvc. Signed-off-by: Manu Gautam Signed-off-by: Felipe Balbi --- drivers/usb/dwc3/gadget.c | 33 ++++++++++++++++++++++++++++++++- 1 file changed, 32 insertions(+), 1 deletion(-) diff --git a/drivers/usb/dwc3/gadget.c b/drivers/usb/dwc3/gadget.c index 6b299c7b7656..f064f1549333 100644 --- a/drivers/usb/dwc3/gadget.c +++ b/drivers/usb/dwc3/gadget.c @@ -896,9 +896,40 @@ static void __dwc3_prepare_one_trb(struct dwc3_ep *dep, struct dwc3_trb *trb, if (!node) { trb->ctrl = DWC3_TRBCTL_ISOCHRONOUS_FIRST; + /* + * USB Specification 2.0 Section 5.9.2 states that: "If + * there is only a single transaction in the microframe, + * only a DATA0 data packet PID is used. If there are + * two transactions per microframe, DATA1 is used for + * the first transaction data packet and DATA0 is used + * for the second transaction data packet. If there are + * three transactions per microframe, DATA2 is used for + * the first transaction data packet, DATA1 is used for + * the second, and DATA0 is used for the third." + * + * IOW, we should satisfy the following cases: + * + * 1) length <= maxpacket + * - DATA0 + * + * 2) maxpacket < length <= (2 * maxpacket) + * - DATA1, DATA0 + * + * 3) (2 * maxpacket) < length <= (3 * maxpacket) + * - DATA2, DATA1, DATA0 + */ if (speed == USB_SPEED_HIGH) { struct usb_ep *ep = &dep->endpoint; - trb->size |= DWC3_TRB_SIZE_PCM1(ep->mult - 1); + unsigned int mult = ep->mult - 1; + unsigned int maxp = usb_endpoint_maxp(ep->desc); + + if (length <= (2 * maxp)) + mult--; + + if (length <= maxp) + mult--; + + trb->size |= DWC3_TRB_SIZE_PCM1(mult); } } else { trb->ctrl = DWC3_TRBCTL_ISOCHRONOUS; -- cgit v1.2.3 From aca5b9ebd096039657417c321a9252c696b359c2 Mon Sep 17 00:00:00 2001 From: Yoshihiro Shimoda Date: Wed, 2 Aug 2017 21:06:35 +0900 Subject: usb: gadget: udc: renesas_usb3: Fix usb_gadget_giveback_request() calling According to the gadget.h, a "complete" function will always be called with interrupts disabled. However, sometimes usb3_request_done() function is called with interrupts enabled. So, this function should be held by spin_lock_irqsave() to disable interruption. Also, this driver has to call spin_unlock() to avoid spinlock recursion by this driver before calling usb_gadget_giveback_request(). Reported-by: Kazuya Mizuguchi Tested-by: Kazuya Mizuguchi Fixes: 746bfe63bba3 ("usb: gadget: renesas_usb3: add support for Renesas USB3.0 peripheral controller") Cc: # v4.5+ Signed-off-by: Yoshihiro Shimoda Signed-off-by: Felipe Balbi --- drivers/usb/gadget/udc/renesas_usb3.c | 21 ++++++++++++++++----- 1 file changed, 16 insertions(+), 5 deletions(-) diff --git a/drivers/usb/gadget/udc/renesas_usb3.c b/drivers/usb/gadget/udc/renesas_usb3.c index 62dc9c7798e7..e1de8fe599a3 100644 --- a/drivers/usb/gadget/udc/renesas_usb3.c +++ b/drivers/usb/gadget/udc/renesas_usb3.c @@ -838,21 +838,32 @@ static struct renesas_usb3_request *usb3_get_request(struct renesas_usb3_ep return usb3_req; } -static void usb3_request_done(struct renesas_usb3_ep *usb3_ep, - struct renesas_usb3_request *usb3_req, int status) +static void __usb3_request_done(struct renesas_usb3_ep *usb3_ep, + struct renesas_usb3_request *usb3_req, + int status) { struct renesas_usb3 *usb3 = usb3_ep_to_usb3(usb3_ep); - unsigned long flags; dev_dbg(usb3_to_dev(usb3), "giveback: ep%2d, %u, %u, %d\n", usb3_ep->num, usb3_req->req.length, usb3_req->req.actual, status); usb3_req->req.status = status; - spin_lock_irqsave(&usb3->lock, flags); usb3_ep->started = false; list_del_init(&usb3_req->queue); - spin_unlock_irqrestore(&usb3->lock, flags); + spin_unlock(&usb3->lock); usb_gadget_giveback_request(&usb3_ep->ep, &usb3_req->req); + spin_lock(&usb3->lock); +} + +static void usb3_request_done(struct renesas_usb3_ep *usb3_ep, + struct renesas_usb3_request *usb3_req, int status) +{ + struct renesas_usb3 *usb3 = usb3_ep_to_usb3(usb3_ep); + unsigned long flags; + + spin_lock_irqsave(&usb3->lock, flags); + __usb3_request_done(usb3_ep, usb3_req, status); + spin_unlock_irqrestore(&usb3->lock, flags); } static void usb3_irq_epc_pipe0_status_end(struct renesas_usb3 *usb3) -- cgit v1.2.3 From 5a8141bd41f0e7f7758956e2340e10cdf5f2b0b9 Mon Sep 17 00:00:00 2001 From: Rajendra Nayak Date: Wed, 2 Aug 2017 13:15:42 +0530 Subject: usb: phy: phy-msm-usb: Fix usage of devm_regulator_bulk_get() The regulator_bulk_data pointer passed to devm_regulator_bulk_get() is used to store the client handles for the regulators, which is later used by devm_regulator_bulk_release() to free the regulators. Passing a local array as is done here means the memory used to store the handles is freed causing the handles to be corrupted, resulting in a crash when devm_regulator_bulk_release() tries to free them. Fix this my moving the array inside of the msm_otg structure. Signed-off-by: Rajendra Nayak Signed-off-by: Felipe Balbi --- drivers/usb/phy/phy-msm-usb.c | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/drivers/usb/phy/phy-msm-usb.c b/drivers/usb/phy/phy-msm-usb.c index 8fb86a5f458e..3d0dd2f97415 100644 --- a/drivers/usb/phy/phy-msm-usb.c +++ b/drivers/usb/phy/phy-msm-usb.c @@ -197,6 +197,7 @@ struct msm_otg { struct regulator *v3p3; struct regulator *v1p8; struct regulator *vddcx; + struct regulator_bulk_data supplies[3]; struct reset_control *phy_rst; struct reset_control *link_rst; @@ -1731,7 +1732,6 @@ static int msm_otg_reboot_notify(struct notifier_block *this, static int msm_otg_probe(struct platform_device *pdev) { - struct regulator_bulk_data regs[3]; int ret = 0; struct device_node *np = pdev->dev.of_node; struct msm_otg_platform_data *pdata; @@ -1817,17 +1817,18 @@ static int msm_otg_probe(struct platform_device *pdev) return motg->irq; } - regs[0].supply = "vddcx"; - regs[1].supply = "v3p3"; - regs[2].supply = "v1p8"; + motg->supplies[0].supply = "vddcx"; + motg->supplies[1].supply = "v3p3"; + motg->supplies[2].supply = "v1p8"; - ret = devm_regulator_bulk_get(motg->phy.dev, ARRAY_SIZE(regs), regs); + ret = devm_regulator_bulk_get(motg->phy.dev, ARRAY_SIZE(motg->supplies), + motg->supplies); if (ret) return ret; - motg->vddcx = regs[0].consumer; - motg->v3p3 = regs[1].consumer; - motg->v1p8 = regs[2].consumer; + motg->vddcx = motg->supplies[0].consumer; + motg->v3p3 = motg->supplies[1].consumer; + motg->v1p8 = motg->supplies[2].consumer; clk_set_rate(motg->clk, 60000000); -- cgit v1.2.3 From 2acecd58969897795cf015c9057ebd349a3fda8a Mon Sep 17 00:00:00 2001 From: Yoshihiro Shimoda Date: Wed, 2 Aug 2017 13:21:45 +0900 Subject: usb: renesas_usbhs: Fix UGCTRL2 value for R-Car Gen3 The latest HW manual (Rev.0.55) shows us this UGCTRL2.VBUSSEL bit. If the bit sets to 1, the VBUS drive is controlled by phy related registers (called "UCOM Registers" on the manual). Since R-Car Gen3 environment will control VBUS by phy-rcar-gen3-usb2 driver, the UGCTRL2.VBUSSEL bit should be set to 1. So, this patch fixes the register's value. Otherwise, even if the ID pin indicates to peripheral, the R-Car will output USBn_PWEN to 1 when a host driver is running. Fixes: de18757e272d ("usb: renesas_usbhs: add R-Car Gen3 power control" Cc: # v4.6+ Signed-off-by: Yoshihiro Shimoda Signed-off-by: Felipe Balbi --- drivers/usb/renesas_usbhs/rcar3.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/drivers/usb/renesas_usbhs/rcar3.c b/drivers/usb/renesas_usbhs/rcar3.c index d544b331c9f2..02b67abfc2a1 100644 --- a/drivers/usb/renesas_usbhs/rcar3.c +++ b/drivers/usb/renesas_usbhs/rcar3.c @@ -20,9 +20,13 @@ /* Low Power Status register (LPSTS) */ #define LPSTS_SUSPM 0x4000 -/* USB General control register 2 (UGCTRL2), bit[31:6] should be 0 */ +/* + * USB General control register 2 (UGCTRL2) + * Remarks: bit[31:11] and bit[9:6] should be 0 + */ #define UGCTRL2_RESERVED_3 0x00000001 /* bit[3:0] should be B'0001 */ #define UGCTRL2_USB0SEL_OTG 0x00000030 +#define UGCTRL2_VBUSSEL 0x00000400 static void usbhs_write32(struct usbhs_priv *priv, u32 reg, u32 data) { @@ -34,7 +38,8 @@ static int usbhs_rcar3_power_ctrl(struct platform_device *pdev, { struct usbhs_priv *priv = usbhs_pdev_to_priv(pdev); - usbhs_write32(priv, UGCTRL2, UGCTRL2_RESERVED_3 | UGCTRL2_USB0SEL_OTG); + usbhs_write32(priv, UGCTRL2, UGCTRL2_RESERVED_3 | UGCTRL2_USB0SEL_OTG | + UGCTRL2_VBUSSEL); if (enable) { usbhs_bset(priv, LPSTS, LPSTS_SUSPM, LPSTS_SUSPM); -- cgit v1.2.3 From b7d44c36a6f6d956e1539e0dd42f98b26e5a4684 Mon Sep 17 00:00:00 2001 From: Yoshihiro Shimoda Date: Fri, 28 Jul 2017 19:28:57 +0900 Subject: usb: renesas_usbhs: gadget: fix unused-but-set-variable warning The commit b8b9c974afee ("usb: renesas_usbhs: gadget: disable all eps when the driver stops") causes the unused-but-set-variable warning. But, if the usbhsg_ep_disable() will return non-zero value, udc/core.c doesn't clear the ep->enabled flag. So, this driver should not return non-zero value, if the pipe is zero because this means the pipe is already disabled. Otherwise, the ep->enabled flag is never cleared when the usbhsg_ep_disable() is called by the renesas_usbhs driver first. Fixes: b8b9c974afee ("usb: renesas_usbhs: gadget: disable all eps when the driver stops") Fixes: 11432050f070 ("usb: renesas_usbhs: gadget: fix NULL pointer dereference in ep_disable()") Signed-off-by: Yoshihiro Shimoda Signed-off-by: Felipe Balbi --- drivers/usb/renesas_usbhs/mod_gadget.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/drivers/usb/renesas_usbhs/mod_gadget.c b/drivers/usb/renesas_usbhs/mod_gadget.c index 93fba9033b00..2c8161bcf5b5 100644 --- a/drivers/usb/renesas_usbhs/mod_gadget.c +++ b/drivers/usb/renesas_usbhs/mod_gadget.c @@ -639,14 +639,11 @@ static int usbhsg_ep_disable(struct usb_ep *ep) struct usbhsg_uep *uep = usbhsg_ep_to_uep(ep); struct usbhs_pipe *pipe; unsigned long flags; - int ret = 0; spin_lock_irqsave(&uep->lock, flags); pipe = usbhsg_uep_to_pipe(uep); - if (!pipe) { - ret = -EINVAL; + if (!pipe) goto out; - } usbhsg_pipe_disable(uep); usbhs_pipe_free(pipe); -- cgit v1.2.3 From 61c12b49e1c9c77d7a1bcc161de540d0fd21cf0c Mon Sep 17 00:00:00 2001 From: Ashish Samant Date: Wed, 12 Jul 2017 19:26:58 -0700 Subject: fuse: Dont call set_page_dirty_lock() for ITER_BVEC pages for async_dio Commit 8fba54aebbdf ("fuse: direct-io: don't dirty ITER_BVEC pages") fixes the ITER_BVEC page deadlock for direct io in fuse by checking in fuse_direct_io(), whether the page is a bvec page or not, before locking it. However, this check is missed when the "async_dio" mount option is enabled. In this case, set_page_dirty_lock() is called from the req->end callback in request_end(), when the fuse thread is returning from userspace to respond to the read request. This will cause the same deadlock because the bvec condition is not checked in this path. Here is the stack of the deadlocked thread, while returning from userspace: [13706.656686] INFO: task glusterfs:3006 blocked for more than 120 seconds. [13706.657808] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message. [13706.658788] glusterfs D ffffffff816c80f0 0 3006 1 0x00000080 [13706.658797] ffff8800d6713a58 0000000000000086 ffff8800d9ad7000 ffff8800d9ad5400 [13706.658799] ffff88011ffd5cc0 ffff8800d6710008 ffff88011fd176c0 7fffffffffffffff [13706.658801] 0000000000000002 ffffffff816c80f0 ffff8800d6713a78 ffffffff816c790e [13706.658803] Call Trace: [13706.658809] [] ? bit_wait_io_timeout+0x80/0x80 [13706.658811] [] schedule+0x3e/0x90 [13706.658813] [] schedule_timeout+0x1b5/0x210 [13706.658816] [] ? gup_pud_range+0x1db/0x1f0 [13706.658817] [] ? kvm_clock_read+0x1e/0x20 [13706.658819] [] ? kvm_clock_get_cycles+0x9/0x10 [13706.658822] [] ? ktime_get+0x52/0xc0 [13706.658824] [] io_schedule_timeout+0xa4/0x110 [13706.658826] [] bit_wait_io+0x36/0x50 [13706.658828] [] __wait_on_bit_lock+0x76/0xb0 [13706.658831] [] ? lock_request+0x46/0x70 [fuse] [13706.658834] [] __lock_page+0xaa/0xb0 [13706.658836] [] ? wake_atomic_t_function+0x40/0x40 [13706.658838] [] set_page_dirty_lock+0x58/0x60 [13706.658841] [] fuse_release_user_pages+0x58/0x70 [fuse] [13706.658844] [] ? fuse_aio_complete+0x190/0x190 [fuse] [13706.658847] [] fuse_aio_complete_req+0x29/0x90 [fuse] [13706.658849] [] request_end+0xd9/0x190 [fuse] [13706.658852] [] fuse_dev_do_write+0x336/0x490 [fuse] [13706.658854] [] fuse_dev_write+0x6e/0xa0 [fuse] [13706.658857] [] ? security_file_permission+0x23/0x90 [13706.658859] [] do_iter_readv_writev+0x60/0x90 [13706.658862] [] ? fuse_dev_splice_write+0x350/0x350 [fuse] [13706.658863] [] do_readv_writev+0x171/0x1f0 [13706.658866] [] ? try_to_wake_up+0x210/0x210 [13706.658868] [] vfs_writev+0x41/0x50 [13706.658870] [] SyS_writev+0x56/0xf0 [13706.658872] [] ? syscall_trace_leave+0xf1/0x160 [13706.658874] [] system_call_fastpath+0x12/0x71 Fix this by making should_dirty a fuse_io_priv parameter that can be checked in fuse_aio_complete_req(). Reported-by: Tiger Yang Signed-off-by: Ashish Samant Signed-off-by: Miklos Szeredi --- fs/fuse/file.c | 6 +++--- fs/fuse/fuse_i.h | 1 + 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/fs/fuse/file.c b/fs/fuse/file.c index 76eac2a554c4..810ed4f99e38 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -609,7 +609,7 @@ static void fuse_aio_complete_req(struct fuse_conn *fc, struct fuse_req *req) struct fuse_io_priv *io = req->io; ssize_t pos = -1; - fuse_release_user_pages(req, !io->write); + fuse_release_user_pages(req, io->should_dirty); if (io->write) { if (req->misc.write.in.size != req->misc.write.out.size) @@ -1316,7 +1316,6 @@ ssize_t fuse_direct_io(struct fuse_io_priv *io, struct iov_iter *iter, loff_t *ppos, int flags) { int write = flags & FUSE_DIO_WRITE; - bool should_dirty = !write && iter_is_iovec(iter); int cuse = flags & FUSE_DIO_CUSE; struct file *file = io->file; struct inode *inode = file->f_mapping->host; @@ -1346,6 +1345,7 @@ ssize_t fuse_direct_io(struct fuse_io_priv *io, struct iov_iter *iter, inode_unlock(inode); } + io->should_dirty = !write && iter_is_iovec(iter); while (count) { size_t nres; fl_owner_t owner = current->files; @@ -1360,7 +1360,7 @@ ssize_t fuse_direct_io(struct fuse_io_priv *io, struct iov_iter *iter, nres = fuse_send_read(req, io, pos, nbytes, owner); if (!io->async) - fuse_release_user_pages(req, should_dirty); + fuse_release_user_pages(req, io->should_dirty); if (req->out.h.error) { err = req->out.h.error; break; diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h index 1bd7ffdad593..bd4d2a3e1ec1 100644 --- a/fs/fuse/fuse_i.h +++ b/fs/fuse/fuse_i.h @@ -249,6 +249,7 @@ struct fuse_io_priv { size_t size; __u64 offset; bool write; + bool should_dirty; int err; struct kiocb *iocb; struct file *file; -- cgit v1.2.3 From 2dda640040876cd8ae646408b69eea40c24f9ae9 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 2 Aug 2017 23:10:46 -0700 Subject: net: fix keepalive code vs TCP_FASTOPEN_CONNECT syzkaller was able to trigger a divide by 0 in TCP stack [1] Issue here is that keepalive timer needs to be updated to not attempt to send a probe if the connection setup was deferred using TCP_FASTOPEN_CONNECT socket option added in linux-4.11 [1] divide error: 0000 [#1] SMP CPU: 18 PID: 0 Comm: swapper/18 Not tainted task: ffff986f62f4b040 ti: ffff986f62fa2000 task.ti: ffff986f62fa2000 RIP: 0010:[] [] __tcp_select_window+0x8d/0x160 Call Trace: [] tcp_transmit_skb+0x11/0x20 [] tcp_xmit_probe_skb+0xc1/0xe0 [] tcp_write_wakeup+0x68/0x160 [] tcp_keepalive_timer+0x17b/0x230 [] call_timer_fn+0x39/0xf0 [] run_timer_softirq+0x1d7/0x280 [] __do_softirq+0xcb/0x257 [] irq_exit+0x9c/0xb0 [] smp_apic_timer_interrupt+0x6a/0x80 [] apic_timer_interrupt+0x7f/0x90 [] ? cpuidle_enter_state+0x13a/0x3b0 [] ? cpuidle_enter_state+0x11d/0x3b0 Tested: Following packetdrill no longer crashes the kernel `echo 0 >/proc/sys/net/ipv4/tcp_timestamps` // Cache warmup: send a Fast Open cookie request 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 +0 fcntl(3, F_SETFL, O_RDWR|O_NONBLOCK) = 0 +0 setsockopt(3, SOL_TCP, TCP_FASTOPEN_CONNECT, [1], 4) = 0 +0 connect(3, ..., ...) = -1 EINPROGRESS (Operation is now in progress) +0 > S 0:0(0) +.01 < S. 123:123(0) ack 1 win 14600 +0 > . 1:1(0) ack 1 +0 close(3) = 0 +0 > F. 1:1(0) ack 1 +0 < F. 1:1(0) ack 2 win 92 +0 > . 2:2(0) ack 2 +0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 4 +0 fcntl(4, F_SETFL, O_RDWR|O_NONBLOCK) = 0 +0 setsockopt(4, SOL_TCP, TCP_FASTOPEN_CONNECT, [1], 4) = 0 +0 setsockopt(4, SOL_SOCKET, SO_KEEPALIVE, [1], 4) = 0 +.01 connect(4, ..., ...) = 0 +0 setsockopt(4, SOL_TCP, TCP_KEEPIDLE, [5], 4) = 0 +10 close(4) = 0 `echo 1 >/proc/sys/net/ipv4/tcp_timestamps` Fixes: 19f6d3f3c842 ("net/tcp-fastopen: Add new API support") Signed-off-by: Eric Dumazet Reported-by: Dmitry Vyukov Cc: Wei Wang Cc: Yuchung Cheng Signed-off-by: David S. Miller --- net/ipv4/tcp_timer.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c index c0feeeef962a..e906014890b6 100644 --- a/net/ipv4/tcp_timer.c +++ b/net/ipv4/tcp_timer.c @@ -652,7 +652,8 @@ static void tcp_keepalive_timer (unsigned long data) goto death; } - if (!sock_flag(sk, SOCK_KEEPOPEN) || sk->sk_state == TCP_CLOSE) + if (!sock_flag(sk, SOCK_KEEPOPEN) || + ((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_SYN_SENT))) goto out; elapsed = keepalive_time_when(tp); -- cgit v1.2.3 From b91d532928dff2141ea9c107c3e73104d9843767 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Thu, 3 Aug 2017 14:13:46 +0800 Subject: ipv6: set rt6i_protocol properly in the route when it is installed After commit c2ed1880fd61 ("net: ipv6: check route protocol when deleting routes"), ipv6 route checks rt protocol when trying to remove a rt entry. It introduced a side effect causing 'ip -6 route flush cache' not to work well. When flushing caches with iproute, all route caches get dumped from kernel then removed one by one by sending DELROUTE requests to kernel for each cache. The thing is iproute sends the request with the cache whose proto is set with RTPROT_REDIRECT by rt6_fill_node() when kernel dumps it. But in kernel the rt_cache protocol is still 0, which causes the cache not to be matched and removed. So the real reason is rt6i_protocol in the route is not set when it is allocated. As David Ahern's suggestion, this patch is to set rt6i_protocol properly in the route when it is installed and remove the codes setting rtm_protocol according to rt6i_flags in rt6_fill_node. This is also an improvement to keep rt6i_protocol consistent with rtm_protocol. Fixes: c2ed1880fd61 ("net: ipv6: check route protocol when deleting routes") Reported-by: Jianlin Shi Suggested-by: David Ahern Signed-off-by: Xin Long Signed-off-by: David S. Miller --- net/ipv6/route.c | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 4d30c96a819d..a640fbcba15d 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -2351,6 +2351,7 @@ static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_bu if (on_link) nrt->rt6i_flags &= ~RTF_GATEWAY; + nrt->rt6i_protocol = RTPROT_REDIRECT; nrt->rt6i_gateway = *(struct in6_addr *)neigh->primary_key; if (ip6_ins_rt(nrt)) @@ -2461,6 +2462,7 @@ static struct rt6_info *rt6_add_route_info(struct net *net, .fc_dst_len = prefixlen, .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO | RTF_UP | RTF_PREF(pref), + .fc_protocol = RTPROT_RA, .fc_nlinfo.portid = 0, .fc_nlinfo.nlh = NULL, .fc_nlinfo.nl_net = net, @@ -2513,6 +2515,7 @@ struct rt6_info *rt6_add_dflt_router(const struct in6_addr *gwaddr, .fc_ifindex = dev->ifindex, .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT | RTF_UP | RTF_EXPIRES | RTF_PREF(pref), + .fc_protocol = RTPROT_RA, .fc_nlinfo.portid = 0, .fc_nlinfo.nlh = NULL, .fc_nlinfo.nl_net = dev_net(dev), @@ -3424,14 +3427,6 @@ static int rt6_fill_node(struct net *net, rtm->rtm_flags = 0; rtm->rtm_scope = RT_SCOPE_UNIVERSE; rtm->rtm_protocol = rt->rt6i_protocol; - if (rt->rt6i_flags & RTF_DYNAMIC) - rtm->rtm_protocol = RTPROT_REDIRECT; - else if (rt->rt6i_flags & RTF_ADDRCONF) { - if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ROUTEINFO)) - rtm->rtm_protocol = RTPROT_RA; - else - rtm->rtm_protocol = RTPROT_KERNEL; - } if (rt->rt6i_flags & RTF_CACHE) rtm->rtm_flags |= RTM_F_CLONED; -- cgit v1.2.3 From e1a10ef7fa876f8510aaec36ea5c0cf34baba410 Mon Sep 17 00:00:00 2001 From: Neal Cardwell Date: Thu, 3 Aug 2017 09:19:52 -0400 Subject: tcp: introduce tcp_rto_delta_us() helper for xmit timer fix Pure refactor. This helper will be required in the xmit timer fix later in the patch series. (Because the TLP logic will want to make this calculation.) Fixes: 6ba8a3b19e76 ("tcp: Tail loss probe (TLP)") Signed-off-by: Neal Cardwell Signed-off-by: Yuchung Cheng Signed-off-by: Nandita Dukkipati Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/tcp.h | 10 ++++++++++ net/ipv4/tcp_input.c | 5 +---- 2 files changed, 11 insertions(+), 4 deletions(-) diff --git a/include/net/tcp.h b/include/net/tcp.h index 70483296157f..ada65e767b28 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -1916,6 +1916,16 @@ extern void tcp_rack_advance(struct tcp_sock *tp, u8 sacked, u32 end_seq, u64 xmit_time); extern void tcp_rack_reo_timeout(struct sock *sk); +/* At how many usecs into the future should the RTO fire? */ +static inline s64 tcp_rto_delta_us(const struct sock *sk) +{ + const struct sk_buff *skb = tcp_write_queue_head(sk); + u32 rto = inet_csk(sk)->icsk_rto; + u64 rto_time_stamp_us = skb->skb_mstamp + jiffies_to_usecs(rto); + + return rto_time_stamp_us - tcp_sk(sk)->tcp_mstamp; +} + /* * Save and compile IPv4 options, return a pointer to it */ diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index dad026fcfd09..b9ba8d55d8b8 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -3004,10 +3004,7 @@ void tcp_rearm_rto(struct sock *sk) /* Offset the time elapsed after installing regular RTO */ if (icsk->icsk_pending == ICSK_TIME_REO_TIMEOUT || icsk->icsk_pending == ICSK_TIME_LOSS_PROBE) { - struct sk_buff *skb = tcp_write_queue_head(sk); - u64 rto_time_stamp = skb->skb_mstamp + - jiffies_to_usecs(rto); - s64 delta_us = rto_time_stamp - tp->tcp_mstamp; + s64 delta_us = tcp_rto_delta_us(sk); /* delta_us may not be positive if the socket is locked * when the retrans timer fires and is rescheduled. */ -- cgit v1.2.3 From a2815817ffa68c7933a43eb55836d6e789bd4389 Mon Sep 17 00:00:00 2001 From: Neal Cardwell Date: Thu, 3 Aug 2017 09:19:53 -0400 Subject: tcp: enable xmit timer fix by having TLP use time when RTO should fire Have tcp_schedule_loss_probe() base the TLP scheduling decision based on when the RTO *should* fire. This is to enable the upcoming xmit timer fix in this series, where tcp_schedule_loss_probe() cannot assume that the last timer installed was an RTO timer (because we are no longer doing the "rearm RTO, rearm RTO, rearm TLP" dance on every ACK). So tcp_schedule_loss_probe() must independently figure out when an RTO would want to fire. In the new TLP implementation following in this series, we cannot assume that icsk_timeout was set based on an RTO; after processing a cumulative ACK the icsk_timeout we see can be from a previous TLP or RTO. So we need to independently recalculate the RTO time (instead of reading it out of icsk_timeout). Removing this dependency on the nature of icsk_timeout makes things a little easier to reason about anyway. Note that the old and new code should be equivalent, since they are both saying: "if the RTO is in the future, but at an earlier time than the normal TLP time, then set the TLP timer to fire when the RTO would have fired". Fixes: 6ba8a3b19e76 ("tcp: Tail loss probe (TLP)") Signed-off-by: Neal Cardwell Signed-off-by: Yuchung Cheng Signed-off-by: Nandita Dukkipati Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/tcp_output.c | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 2f1588bf73da..cd8e257492c4 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -2377,8 +2377,8 @@ bool tcp_schedule_loss_probe(struct sock *sk) { struct inet_connection_sock *icsk = inet_csk(sk); struct tcp_sock *tp = tcp_sk(sk); - u32 timeout, tlp_time_stamp, rto_time_stamp; u32 rtt = usecs_to_jiffies(tp->srtt_us >> 3); + u32 timeout, rto_delta_us; /* No consecutive loss probes. */ if (WARN_ON(icsk->icsk_pending == ICSK_TIME_LOSS_PROBE)) { @@ -2417,14 +2417,10 @@ bool tcp_schedule_loss_probe(struct sock *sk) (rtt + (rtt >> 1) + TCP_DELACK_MAX)); timeout = max_t(u32, timeout, msecs_to_jiffies(10)); - /* If RTO is shorter, just schedule TLP in its place. */ - tlp_time_stamp = tcp_jiffies32 + timeout; - rto_time_stamp = (u32)inet_csk(sk)->icsk_timeout; - if ((s32)(tlp_time_stamp - rto_time_stamp) > 0) { - s32 delta = rto_time_stamp - tcp_jiffies32; - if (delta > 0) - timeout = delta; - } + /* If the RTO formula yields an earlier time, then use that time. */ + rto_delta_us = tcp_rto_delta_us(sk); /* How far in future is RTO? */ + if (rto_delta_us > 0) + timeout = min_t(u32, timeout, usecs_to_jiffies(rto_delta_us)); inet_csk_reset_xmit_timer(sk, ICSK_TIME_LOSS_PROBE, timeout, TCP_RTO_MAX); -- cgit v1.2.3 From df92c8394e6ea0469e8056946ef8add740ab8046 Mon Sep 17 00:00:00 2001 From: Neal Cardwell Date: Thu, 3 Aug 2017 09:19:54 -0400 Subject: tcp: fix xmit timer to only be reset if data ACKed/SACKed Fix a TCP loss recovery performance bug raised recently on the netdev list, in two threads: (i) July 26, 2017: netdev thread "TCP fast retransmit issues" (ii) July 26, 2017: netdev thread: "[PATCH V2 net-next] TLP: Don't reschedule PTO when there's one outstanding TLP retransmission" The basic problem is that incoming TCP packets that did not indicate forward progress could cause the xmit timer (TLP or RTO) to be rearmed and pushed back in time. In certain corner cases this could result in the following problems noted in these threads: - Repeated ACKs coming in with bogus SACKs corrupted by middleboxes could cause TCP to repeatedly schedule TLPs forever. We kept sending TLPs after every ~200ms, which elicited bogus SACKs, which caused more TLPs, ad infinitum; we never fired an RTO to fill in the holes. - Incoming data segments could, in some cases, cause us to reschedule our RTO or TLP timer further out in time, for no good reason. This could cause repeated inbound data to result in stalls in outbound data, in the presence of packet loss. This commit fixes these bugs by changing the TLP and RTO ACK processing to: (a) Only reschedule the xmit timer once per ACK. (b) Only reschedule the xmit timer if tcp_clean_rtx_queue() deems the ACK indicates sufficient forward progress (a packet was cumulatively ACKed, or we got a SACK for a packet that was sent before the most recent retransmit of the write queue head). This brings us back into closer compliance with the RFCs, since, as the comment for tcp_rearm_rto() notes, we should only restart the RTO timer after forward progress on the connection. Previously we were restarting the xmit timer even in these cases where there was no forward progress. As a side benefit, this commit simplifies and speeds up the TCP timer arming logic. We had been calling inet_csk_reset_xmit_timer() three times on normal ACKs that cumulatively acknowledged some data: 1) Once near the top of tcp_ack() to switch from TLP timer to RTO: if (icsk->icsk_pending == ICSK_TIME_LOSS_PROBE) tcp_rearm_rto(sk); 2) Once in tcp_clean_rtx_queue(), to update the RTO: if (flag & FLAG_ACKED) { tcp_rearm_rto(sk); 3) Once in tcp_ack() after tcp_fastretrans_alert() to switch from RTO to TLP: if (icsk->icsk_pending == ICSK_TIME_RETRANS) tcp_schedule_loss_probe(sk); This commit, by only rescheduling the xmit timer once per ACK, simplifies the code and reduces CPU overhead. This commit was tested in an A/B test with Google web server traffic. SNMP stats and request latency metrics were within noise levels, substantiating that for normal web traffic patterns this is a rare issue. This commit was also tested with packetdrill tests to verify that it fixes the timer behavior in the corner cases discussed in the netdev threads mentioned above. This patch is a bug fix patch intended to be queued for -stable relases. Fixes: 6ba8a3b19e76 ("tcp: Tail loss probe (TLP)") Reported-by: Klavs Klavsen Reported-by: Mao Wenan Signed-off-by: Neal Cardwell Signed-off-by: Yuchung Cheng Signed-off-by: Nandita Dukkipati Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/tcp_input.c | 25 ++++++++++++++++--------- net/ipv4/tcp_output.c | 9 --------- 2 files changed, 16 insertions(+), 18 deletions(-) diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index b9ba8d55d8b8..53de1424c13c 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -107,6 +107,7 @@ int sysctl_tcp_invalid_ratelimit __read_mostly = HZ/2; #define FLAG_ORIG_SACK_ACKED 0x200 /* Never retransmitted data are (s)acked */ #define FLAG_SND_UNA_ADVANCED 0x400 /* Snd_una was changed (!= FLAG_DATA_ACKED) */ #define FLAG_DSACKING_ACK 0x800 /* SACK blocks contained D-SACK info */ +#define FLAG_SET_XMIT_TIMER 0x1000 /* Set TLP or RTO timer */ #define FLAG_SACK_RENEGING 0x2000 /* snd_una advanced to a sacked seq */ #define FLAG_UPDATE_TS_RECENT 0x4000 /* tcp_replace_ts_recent() */ #define FLAG_NO_CHALLENGE_ACK 0x8000 /* do not call tcp_send_challenge_ack() */ @@ -3016,6 +3017,13 @@ void tcp_rearm_rto(struct sock *sk) } } +/* Try to schedule a loss probe; if that doesn't work, then schedule an RTO. */ +static void tcp_set_xmit_timer(struct sock *sk) +{ + if (!tcp_schedule_loss_probe(sk)) + tcp_rearm_rto(sk); +} + /* If we get here, the whole TSO packet has not been acked. */ static u32 tcp_tso_acked(struct sock *sk, struct sk_buff *skb) { @@ -3177,7 +3185,7 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets, ca_rtt_us, sack->rate); if (flag & FLAG_ACKED) { - tcp_rearm_rto(sk); + flag |= FLAG_SET_XMIT_TIMER; /* set TLP or RTO timer */ if (unlikely(icsk->icsk_mtup.probe_size && !after(tp->mtu_probe.probe_seq_end, tp->snd_una))) { tcp_mtup_probe_success(sk); @@ -3205,7 +3213,7 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets, * after when the head was last (re)transmitted. Otherwise the * timeout may continue to extend in loss recovery. */ - tcp_rearm_rto(sk); + flag |= FLAG_SET_XMIT_TIMER; /* set TLP or RTO timer */ } if (icsk->icsk_ca_ops->pkts_acked) { @@ -3577,9 +3585,6 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag) if (after(ack, tp->snd_nxt)) goto invalid_ack; - if (icsk->icsk_pending == ICSK_TIME_LOSS_PROBE) - tcp_rearm_rto(sk); - if (after(ack, prior_snd_una)) { flag |= FLAG_SND_UNA_ADVANCED; icsk->icsk_retransmits = 0; @@ -3644,18 +3649,20 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag) flag |= tcp_clean_rtx_queue(sk, prior_fackets, prior_snd_una, &acked, &sack_state); + if (tp->tlp_high_seq) + tcp_process_tlp_ack(sk, ack, flag); + /* If needed, reset TLP/RTO timer; RACK may later override this. */ + if (flag & FLAG_SET_XMIT_TIMER) + tcp_set_xmit_timer(sk); + if (tcp_ack_is_dubious(sk, flag)) { is_dupack = !(flag & (FLAG_SND_UNA_ADVANCED | FLAG_NOT_DUP)); tcp_fastretrans_alert(sk, acked, is_dupack, &flag, &rexmit); } - if (tp->tlp_high_seq) - tcp_process_tlp_ack(sk, ack, flag); if ((flag & FLAG_FORWARD_PROGRESS) || !(flag & FLAG_NOT_DUP)) sk_dst_confirm(sk); - if (icsk->icsk_pending == ICSK_TIME_RETRANS) - tcp_schedule_loss_probe(sk); delivered = tp->delivered - delivered; /* freshly ACKed or SACKed */ lost = tp->lost - lost; /* freshly marked lost */ tcp_rate_gen(sk, delivered, lost, sack_state.rate); diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index cd8e257492c4..276406a83a37 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -2380,21 +2380,12 @@ bool tcp_schedule_loss_probe(struct sock *sk) u32 rtt = usecs_to_jiffies(tp->srtt_us >> 3); u32 timeout, rto_delta_us; - /* No consecutive loss probes. */ - if (WARN_ON(icsk->icsk_pending == ICSK_TIME_LOSS_PROBE)) { - tcp_rearm_rto(sk); - return false; - } /* Don't do any loss probe on a Fast Open connection before 3WHS * finishes. */ if (tp->fastopen_rsk) return false; - /* TLP is only scheduled when next timer event is RTO. */ - if (icsk->icsk_pending != ICSK_TIME_RETRANS) - return false; - /* Schedule a loss probe in 2*RTT for SACK capable connections * in Open state, that are either limited by cwnd or application. */ -- cgit v1.2.3 From da6c9bbf418dcb0f8be261f4353400fa959b1e92 Mon Sep 17 00:00:00 2001 From: Mark yao Date: Mon, 31 Jul 2017 17:49:42 +0800 Subject: drm/rockchip: vop: fix iommu page fault when resume Iommu would get page fault with following path: vop_disable: 1, disable all windows and set vop config done 2, vop enter to standy, all windows not works, but their registers are not clean, when you read window's enable bit, may found the window is enable. vop_enable: 1, memcpy(vop->regsbak, vop->regs, len) save current vop registers to vop->regsbak, then you can found window is enable on regsbak. 2, VOP_WIN_SET(vop, win, gate, 1); force enable window gate, but gate and enable are on same hardware register, then window enable bit rewrite to vop hardware. 3, vop power on, and vop might try to scan destroyed buffer, then iommu get page fault. Move windows disable after vop regsbak restore, then vop regsbak mechanism would keep tracing the modify, everything would be safe. Signed-off-by: Mark Yao Reviewed-by: Sandy huang Link: https://patchwork.freedesktop.org/patch/msgid/1501494582-6934-1-git-send-email-mark.yao@rock-chips.com --- drivers/gpu/drm/rockchip/rockchip_drm_vop.c | 33 +++++++++++++---------------- 1 file changed, 15 insertions(+), 18 deletions(-) diff --git a/drivers/gpu/drm/rockchip/rockchip_drm_vop.c b/drivers/gpu/drm/rockchip/rockchip_drm_vop.c index 5d450332c2fd..804d0ff53059 100644 --- a/drivers/gpu/drm/rockchip/rockchip_drm_vop.c +++ b/drivers/gpu/drm/rockchip/rockchip_drm_vop.c @@ -500,7 +500,7 @@ static void vop_line_flag_irq_disable(struct vop *vop) static int vop_enable(struct drm_crtc *crtc) { struct vop *vop = to_vop(crtc); - int ret; + int ret, i; ret = pm_runtime_get_sync(vop->dev); if (ret < 0) { @@ -533,6 +533,20 @@ static int vop_enable(struct drm_crtc *crtc) } memcpy(vop->regs, vop->regsbak, vop->len); + /* + * We need to make sure that all windows are disabled before we + * enable the crtc. Otherwise we might try to scan from a destroyed + * buffer later. + */ + for (i = 0; i < vop->data->win_size; i++) { + struct vop_win *vop_win = &vop->win[i]; + const struct vop_win_data *win = vop_win->data; + + spin_lock(&vop->reg_lock); + VOP_WIN_SET(vop, win, enable, 0); + spin_unlock(&vop->reg_lock); + } + vop_cfg_done(vop); /* @@ -566,28 +580,11 @@ err_put_pm_runtime: static void vop_crtc_disable(struct drm_crtc *crtc) { struct vop *vop = to_vop(crtc); - int i; WARN_ON(vop->event); rockchip_drm_psr_deactivate(&vop->crtc); - /* - * We need to make sure that all windows are disabled before we - * disable that crtc. Otherwise we might try to scan from a destroyed - * buffer later. - */ - for (i = 0; i < vop->data->win_size; i++) { - struct vop_win *vop_win = &vop->win[i]; - const struct vop_win_data *win = vop_win->data; - - spin_lock(&vop->reg_lock); - VOP_WIN_SET(vop, win, enable, 0); - spin_unlock(&vop->reg_lock); - } - - vop_cfg_done(vop); - drm_crtc_vblank_off(crtc); /* -- cgit v1.2.3 From 6f04f5925ce763e07cb405d1fbe97a53b6c026a3 Mon Sep 17 00:00:00 2001 From: Mark yao Date: Mon, 31 Jul 2017 17:49:46 +0800 Subject: drm/rockchip: vop: fix NV12 video display error fixup the scale calculation formula on the case src_height == (dst_height/2). Signed-off-by: Mark Yao Reviewed-by: Sandy huang Link: https://patchwork.freedesktop.org/patch/msgid/1501494586-6984-1-git-send-email-mark.yao@rock-chips.com --- drivers/gpu/drm/rockchip/rockchip_drm_vop.h | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/rockchip/rockchip_drm_vop.h b/drivers/gpu/drm/rockchip/rockchip_drm_vop.h index 9979fd0c2282..27eefbfcf3d0 100644 --- a/drivers/gpu/drm/rockchip/rockchip_drm_vop.h +++ b/drivers/gpu/drm/rockchip/rockchip_drm_vop.h @@ -282,6 +282,9 @@ static inline uint16_t scl_get_bili_dn_vskip(int src_h, int dst_h, act_height = (src_h + vskiplines - 1) / vskiplines; + if (act_height == dst_h) + return GET_SCL_FT_BILI_DN(src_h, dst_h) / vskiplines; + return GET_SCL_FT_BILI_DN(act_height, dst_h); } -- cgit v1.2.3 From 79a0b149d4e3d45fc46673f4b29a157776c174e2 Mon Sep 17 00:00:00 2001 From: Mark yao Date: Mon, 31 Jul 2017 17:49:50 +0800 Subject: drm/rockchip: vop: round_up pitches to word align VOP pitch register is word align, need align to word. VOP_WIN0_VIR: bit[31:16] win0_vir_stride_uv Number of words of Win0 uv Virtual width bit[15:0] win0_vir_width Number of words of Win0 yrgb Virtual width ARGB888 : win0_vir_width RGB888 : (win0_vir_width*3/4) + (win0_vir_width%3) RGB565 : ceil(win0_vir_width/2) YUV : ceil(win0_vir_width/4) Signed-off-by: Mark Yao Reviewed-by: Sandy huang Link: https://patchwork.freedesktop.org/patch/msgid/1501494591-7034-1-git-send-email-mark.yao@rock-chips.com --- drivers/gpu/drm/rockchip/rockchip_drm_vop.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/rockchip/rockchip_drm_vop.c b/drivers/gpu/drm/rockchip/rockchip_drm_vop.c index 804d0ff53059..e205c5cf9019 100644 --- a/drivers/gpu/drm/rockchip/rockchip_drm_vop.c +++ b/drivers/gpu/drm/rockchip/rockchip_drm_vop.c @@ -761,7 +761,7 @@ static void vop_plane_atomic_update(struct drm_plane *plane, spin_lock(&vop->reg_lock); VOP_WIN_SET(vop, win, format, format); - VOP_WIN_SET(vop, win, yrgb_vir, fb->pitches[0] >> 2); + VOP_WIN_SET(vop, win, yrgb_vir, DIV_ROUND_UP(fb->pitches[0], 4)); VOP_WIN_SET(vop, win, yrgb_mst, dma_addr); if (is_yuv_support(fb->format->format)) { int hsub = drm_format_horz_chroma_subsampling(fb->format->format); @@ -775,7 +775,7 @@ static void vop_plane_atomic_update(struct drm_plane *plane, offset += (src->y1 >> 16) * fb->pitches[1] / vsub; dma_addr = rk_uv_obj->dma_addr + offset + fb->offsets[1]; - VOP_WIN_SET(vop, win, uv_vir, fb->pitches[1] >> 2); + VOP_WIN_SET(vop, win, uv_vir, DIV_ROUND_UP(fb->pitches[1], 4)); VOP_WIN_SET(vop, win, uv_mst, dma_addr); } -- cgit v1.2.3 From 80c471ea040ad9006ebff6d64221a04e8fa1b7f6 Mon Sep 17 00:00:00 2001 From: Mark yao Date: Mon, 31 Jul 2017 17:49:55 +0800 Subject: drm/rockchip: vop: report error when check resource error The user would be confused while facing a error commit without any error report. Signed-off-by: Mark Yao Reviewed-by: Sandy huang Link: https://patchwork.freedesktop.org/patch/msgid/1501494596-7090-1-git-send-email-mark.yao@rock-chips.com --- drivers/gpu/drm/rockchip/rockchip_drm_vop.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/rockchip/rockchip_drm_vop.c b/drivers/gpu/drm/rockchip/rockchip_drm_vop.c index e205c5cf9019..2900f1410d95 100644 --- a/drivers/gpu/drm/rockchip/rockchip_drm_vop.c +++ b/drivers/gpu/drm/rockchip/rockchip_drm_vop.c @@ -679,8 +679,10 @@ static int vop_plane_atomic_check(struct drm_plane *plane, * Src.x1 can be odd when do clip, but yuv plane start point * need align with 2 pixel. */ - if (is_yuv_support(fb->format->format) && ((state->src.x1 >> 16) % 2)) + if (is_yuv_support(fb->format->format) && ((state->src.x1 >> 16) % 2)) { + DRM_ERROR("Invalid Source: Yuv format not support odd xpos\n"); return -EINVAL; + } return 0; } -- cgit v1.2.3 From 02b6ed44304e458d68e454493ab272f4e3c3c53a Mon Sep 17 00:00:00 2001 From: Tina Zhang Date: Fri, 4 Aug 2017 17:39:41 +0800 Subject: drm/i915/gvt: Initialize MMIO Block with HW state MMIO block with tracked mmio, is introduced for the sake of performance of searching tracked mmio. All the tracked mmio needs to get the initial value from the HW state during vGPU being created. This patch is to initialize the tracked registers in MMIO block with the HW state. v2: Add "Fixes:" line for this patch (Zhenyu) Fixes: 65f9f6febf12 ("drm/i915/gvt: Optimize MMIO register handling for some large MMIO blocks") Signed-off-by: Tina Zhang Signed-off-by: Zhenyu Wang --- drivers/gpu/drm/i915/gvt/firmware.c | 11 ++++++++++- drivers/gpu/drm/i915/gvt/gvt.h | 12 ++++++++++++ drivers/gpu/drm/i915/gvt/handlers.c | 36 +++++++++++++++++------------------- 3 files changed, 39 insertions(+), 20 deletions(-) diff --git a/drivers/gpu/drm/i915/gvt/firmware.c b/drivers/gpu/drm/i915/gvt/firmware.c index 5dad9298b2d5..a26c1705430e 100644 --- a/drivers/gpu/drm/i915/gvt/firmware.c +++ b/drivers/gpu/drm/i915/gvt/firmware.c @@ -72,11 +72,13 @@ static int expose_firmware_sysfs(struct intel_gvt *gvt) struct intel_gvt_device_info *info = &gvt->device_info; struct pci_dev *pdev = gvt->dev_priv->drm.pdev; struct intel_gvt_mmio_info *e; + struct gvt_mmio_block *block = gvt->mmio.mmio_block; + int num = gvt->mmio.num_mmio_block; struct gvt_firmware_header *h; void *firmware; void *p; unsigned long size, crc32_start; - int i; + int i, j; int ret; size = sizeof(*h) + info->mmio_size + info->cfg_space_size; @@ -105,6 +107,13 @@ static int expose_firmware_sysfs(struct intel_gvt *gvt) hash_for_each(gvt->mmio.mmio_info_table, i, e, node) *(u32 *)(p + e->offset) = I915_READ_NOTRACE(_MMIO(e->offset)); + for (i = 0; i < num; i++, block++) { + for (j = 0; j < block->size; j += 4) + *(u32 *)(p + INTEL_GVT_MMIO_OFFSET(block->offset) + j) = + I915_READ_NOTRACE(_MMIO(INTEL_GVT_MMIO_OFFSET( + block->offset) + j)); + } + memcpy(gvt->firmware.mmio, p, info->mmio_size); crc32_start = offsetof(struct gvt_firmware_header, crc32) + 4; diff --git a/drivers/gpu/drm/i915/gvt/gvt.h b/drivers/gpu/drm/i915/gvt/gvt.h index d96c41aa5aa7..2964a4d01a66 100644 --- a/drivers/gpu/drm/i915/gvt/gvt.h +++ b/drivers/gpu/drm/i915/gvt/gvt.h @@ -195,6 +195,15 @@ struct intel_gvt_fence { unsigned long vgpu_allocated_fence_num; }; +/* Special MMIO blocks. */ +struct gvt_mmio_block { + unsigned int device; + i915_reg_t offset; + unsigned int size; + gvt_mmio_func read; + gvt_mmio_func write; +}; + #define INTEL_GVT_MMIO_HASH_BITS 11 struct intel_gvt_mmio { @@ -214,6 +223,9 @@ struct intel_gvt_mmio { /* This reg could be accessed by unaligned address */ #define F_UNALIGN (1 << 6) + struct gvt_mmio_block *mmio_block; + unsigned int num_mmio_block; + DECLARE_HASHTABLE(mmio_info_table, INTEL_GVT_MMIO_HASH_BITS); unsigned int num_tracked_mmio; }; diff --git a/drivers/gpu/drm/i915/gvt/handlers.c b/drivers/gpu/drm/i915/gvt/handlers.c index 17febe830ff6..323664a238f5 100644 --- a/drivers/gpu/drm/i915/gvt/handlers.c +++ b/drivers/gpu/drm/i915/gvt/handlers.c @@ -2857,31 +2857,15 @@ static int init_skl_mmio_info(struct intel_gvt *gvt) return 0; } -/* Special MMIO blocks. */ -static struct gvt_mmio_block { - unsigned int device; - i915_reg_t offset; - unsigned int size; - gvt_mmio_func read; - gvt_mmio_func write; -} gvt_mmio_blocks[] = { - {D_SKL_PLUS, _MMIO(CSR_MMIO_START_RANGE), 0x3000, NULL, NULL}, - {D_ALL, _MMIO(MCHBAR_MIRROR_BASE_SNB), 0x40000, NULL, NULL}, - {D_ALL, _MMIO(VGT_PVINFO_PAGE), VGT_PVINFO_SIZE, - pvinfo_mmio_read, pvinfo_mmio_write}, - {D_ALL, LGC_PALETTE(PIPE_A, 0), 1024, NULL, NULL}, - {D_ALL, LGC_PALETTE(PIPE_B, 0), 1024, NULL, NULL}, - {D_ALL, LGC_PALETTE(PIPE_C, 0), 1024, NULL, NULL}, -}; - static struct gvt_mmio_block *find_mmio_block(struct intel_gvt *gvt, unsigned int offset) { unsigned long device = intel_gvt_get_device_type(gvt); - struct gvt_mmio_block *block = gvt_mmio_blocks; + struct gvt_mmio_block *block = gvt->mmio.mmio_block; + int num = gvt->mmio.num_mmio_block; int i; - for (i = 0; i < ARRAY_SIZE(gvt_mmio_blocks); i++, block++) { + for (i = 0; i < num; i++, block++) { if (!(device & block->device)) continue; if (offset >= INTEL_GVT_MMIO_OFFSET(block->offset) && @@ -2912,6 +2896,17 @@ void intel_gvt_clean_mmio_info(struct intel_gvt *gvt) gvt->mmio.mmio_attribute = NULL; } +/* Special MMIO blocks. */ +static struct gvt_mmio_block mmio_blocks[] = { + {D_SKL_PLUS, _MMIO(CSR_MMIO_START_RANGE), 0x3000, NULL, NULL}, + {D_ALL, _MMIO(MCHBAR_MIRROR_BASE_SNB), 0x40000, NULL, NULL}, + {D_ALL, _MMIO(VGT_PVINFO_PAGE), VGT_PVINFO_SIZE, + pvinfo_mmio_read, pvinfo_mmio_write}, + {D_ALL, LGC_PALETTE(PIPE_A, 0), 1024, NULL, NULL}, + {D_ALL, LGC_PALETTE(PIPE_B, 0), 1024, NULL, NULL}, + {D_ALL, LGC_PALETTE(PIPE_C, 0), 1024, NULL, NULL}, +}; + /** * intel_gvt_setup_mmio_info - setup MMIO information table for GVT device * @gvt: GVT device @@ -2951,6 +2946,9 @@ int intel_gvt_setup_mmio_info(struct intel_gvt *gvt) goto err; } + gvt->mmio.mmio_block = mmio_blocks; + gvt->mmio.num_mmio_block = ARRAY_SIZE(mmio_blocks); + gvt_dbg_mmio("traced %u virtual mmio registers\n", gvt->mmio.num_tracked_mmio); return 0; -- cgit v1.2.3 From 9e48cd4a77d5170433a2e611eeda7accdf96604d Mon Sep 17 00:00:00 2001 From: Allen Pais Date: Mon, 24 Jul 2017 11:44:17 +0530 Subject: sparc64: properly name the cpu constants Acked-by: Sam Ravnborg Acked-by: David S. Miller Signed-off-by: Allen Pais Signed-off-by: David S. Miller --- arch/sparc/include/asm/spitfire.h | 14 ++++++++++++++ arch/sparc/kernel/head_64.S | 16 ++++++++-------- 2 files changed, 22 insertions(+), 8 deletions(-) diff --git a/arch/sparc/include/asm/spitfire.h b/arch/sparc/include/asm/spitfire.h index 1d8321c827a8..9cc2afe10ef0 100644 --- a/arch/sparc/include/asm/spitfire.h +++ b/arch/sparc/include/asm/spitfire.h @@ -51,6 +51,20 @@ #define SUN4V_CHIP_SPARC_SN 0x8b #define SUN4V_CHIP_UNKNOWN 0xff +/* + * The following CPU_ID_xxx constants are used + * to identify the CPU type in the setup phase + * (see head_64.S) + */ +#define CPU_ID_NIAGARA1 ('1') +#define CPU_ID_NIAGARA2 ('2') +#define CPU_ID_NIAGARA3 ('3') +#define CPU_ID_NIAGARA4 ('4') +#define CPU_ID_NIAGARA5 ('5') +#define CPU_ID_M6 ('6') +#define CPU_ID_M7 ('7') +#define CPU_ID_SONOMA1 ('N') + #ifndef __ASSEMBLY__ enum ultra_tlb_layout { diff --git a/arch/sparc/kernel/head_64.S b/arch/sparc/kernel/head_64.S index 41a407328667..ddb5e24adf49 100644 --- a/arch/sparc/kernel/head_64.S +++ b/arch/sparc/kernel/head_64.S @@ -424,22 +424,22 @@ EXPORT_SYMBOL(sun4v_chip_type) nop 70: ldub [%g1 + 7], %g2 - cmp %g2, '3' + cmp %g2, CPU_ID_NIAGARA3 be,pt %xcc, 5f mov SUN4V_CHIP_NIAGARA3, %g4 - cmp %g2, '4' + cmp %g2, CPU_ID_NIAGARA4 be,pt %xcc, 5f mov SUN4V_CHIP_NIAGARA4, %g4 - cmp %g2, '5' + cmp %g2, CPU_ID_NIAGARA5 be,pt %xcc, 5f mov SUN4V_CHIP_NIAGARA5, %g4 - cmp %g2, '6' + cmp %g2, CPU_ID_M6 be,pt %xcc, 5f mov SUN4V_CHIP_SPARC_M6, %g4 - cmp %g2, '7' + cmp %g2, CPU_ID_M7 be,pt %xcc, 5f mov SUN4V_CHIP_SPARC_M7, %g4 - cmp %g2, 'N' + cmp %g2, CPU_ID_SONOMA1 be,pt %xcc, 5f mov SUN4V_CHIP_SPARC_SN, %g4 ba,pt %xcc, 49f @@ -448,10 +448,10 @@ EXPORT_SYMBOL(sun4v_chip_type) 91: sethi %hi(prom_cpu_compatible), %g1 or %g1, %lo(prom_cpu_compatible), %g1 ldub [%g1 + 17], %g2 - cmp %g2, '1' + cmp %g2, CPU_ID_NIAGARA1 be,pt %xcc, 5f mov SUN4V_CHIP_NIAGARA1, %g4 - cmp %g2, '2' + cmp %g2, CPU_ID_NIAGARA2 be,pt %xcc, 5f mov SUN4V_CHIP_NIAGARA2, %g4 -- cgit v1.2.3 From 7d484acb2f90643de7e242fd47e48c3ebb22df3a Mon Sep 17 00:00:00 2001 From: Allen Pais Date: Mon, 24 Jul 2017 11:44:18 +0530 Subject: sparc64: recognize and support sparc M8 cpu type Recognize SPARC-M8 cpu type, hardware caps and cpu distribution map. Signed-off-by: Allen Pais Signed-off-by: David Aldridge Signed-off-by: David S. Miller --- arch/sparc/include/asm/spitfire.h | 2 ++ arch/sparc/kernel/cpu.c | 6 ++++++ arch/sparc/kernel/cpumap.c | 1 + arch/sparc/kernel/head_64.S | 6 ++++++ arch/sparc/kernel/setup_64.c | 15 +++++++++++++-- arch/sparc/mm/init_64.c | 2 ++ 6 files changed, 30 insertions(+), 2 deletions(-) diff --git a/arch/sparc/include/asm/spitfire.h b/arch/sparc/include/asm/spitfire.h index 9cc2afe10ef0..1b1286d05069 100644 --- a/arch/sparc/include/asm/spitfire.h +++ b/arch/sparc/include/asm/spitfire.h @@ -47,6 +47,7 @@ #define SUN4V_CHIP_NIAGARA5 0x05 #define SUN4V_CHIP_SPARC_M6 0x06 #define SUN4V_CHIP_SPARC_M7 0x07 +#define SUN4V_CHIP_SPARC_M8 0x08 #define SUN4V_CHIP_SPARC64X 0x8a #define SUN4V_CHIP_SPARC_SN 0x8b #define SUN4V_CHIP_UNKNOWN 0xff @@ -63,6 +64,7 @@ #define CPU_ID_NIAGARA5 ('5') #define CPU_ID_M6 ('6') #define CPU_ID_M7 ('7') +#define CPU_ID_M8 ('8') #define CPU_ID_SONOMA1 ('N') #ifndef __ASSEMBLY__ diff --git a/arch/sparc/kernel/cpu.c b/arch/sparc/kernel/cpu.c index 493e023a468a..ef4f18f7a674 100644 --- a/arch/sparc/kernel/cpu.c +++ b/arch/sparc/kernel/cpu.c @@ -506,6 +506,12 @@ static void __init sun4v_cpu_probe(void) sparc_pmu_type = "sparc-m7"; break; + case SUN4V_CHIP_SPARC_M8: + sparc_cpu_type = "SPARC-M8"; + sparc_fpu_type = "SPARC-M8 integrated FPU"; + sparc_pmu_type = "sparc-m8"; + break; + case SUN4V_CHIP_SPARC_SN: sparc_cpu_type = "SPARC-SN"; sparc_fpu_type = "SPARC-SN integrated FPU"; diff --git a/arch/sparc/kernel/cpumap.c b/arch/sparc/kernel/cpumap.c index 45c820e1cba5..90d550bbfeef 100644 --- a/arch/sparc/kernel/cpumap.c +++ b/arch/sparc/kernel/cpumap.c @@ -328,6 +328,7 @@ static int iterate_cpu(struct cpuinfo_tree *t, unsigned int root_index) case SUN4V_CHIP_NIAGARA5: case SUN4V_CHIP_SPARC_M6: case SUN4V_CHIP_SPARC_M7: + case SUN4V_CHIP_SPARC_M8: case SUN4V_CHIP_SPARC_SN: case SUN4V_CHIP_SPARC64X: rover_inc_table = niagara_iterate_method; diff --git a/arch/sparc/kernel/head_64.S b/arch/sparc/kernel/head_64.S index ddb5e24adf49..78e0211753d2 100644 --- a/arch/sparc/kernel/head_64.S +++ b/arch/sparc/kernel/head_64.S @@ -439,6 +439,9 @@ EXPORT_SYMBOL(sun4v_chip_type) cmp %g2, CPU_ID_M7 be,pt %xcc, 5f mov SUN4V_CHIP_SPARC_M7, %g4 + cmp %g2, CPU_ID_M8 + be,pt %xcc, 5f + mov SUN4V_CHIP_SPARC_M8, %g4 cmp %g2, CPU_ID_SONOMA1 be,pt %xcc, 5f mov SUN4V_CHIP_SPARC_SN, %g4 @@ -600,6 +603,9 @@ niagara_tlb_fixup: be,pt %xcc, niagara4_patch nop cmp %g1, SUN4V_CHIP_SPARC_M7 + be,pt %xcc, niagara4_patch + nop + cmp %g1, SUN4V_CHIP_SPARC_M8 be,pt %xcc, niagara4_patch nop cmp %g1, SUN4V_CHIP_SPARC_SN diff --git a/arch/sparc/kernel/setup_64.c b/arch/sparc/kernel/setup_64.c index 4d9c3e13c150..150ee7d4b059 100644 --- a/arch/sparc/kernel/setup_64.c +++ b/arch/sparc/kernel/setup_64.c @@ -288,10 +288,17 @@ static void __init sun4v_patch(void) sun4v_patch_2insn_range(&__sun4v_2insn_patch, &__sun4v_2insn_patch_end); - if (sun4v_chip_type == SUN4V_CHIP_SPARC_M7 || - sun4v_chip_type == SUN4V_CHIP_SPARC_SN) + + switch (sun4v_chip_type) { + case SUN4V_CHIP_SPARC_M7: + case SUN4V_CHIP_SPARC_M8: + case SUN4V_CHIP_SPARC_SN: sun_m7_patch_2insn_range(&__sun_m7_2insn_patch, &__sun_m7_2insn_patch_end); + break; + default: + break; + } sun4v_hvapi_init(); } @@ -529,6 +536,7 @@ static void __init init_sparc64_elf_hwcap(void) sun4v_chip_type == SUN4V_CHIP_NIAGARA5 || sun4v_chip_type == SUN4V_CHIP_SPARC_M6 || sun4v_chip_type == SUN4V_CHIP_SPARC_M7 || + sun4v_chip_type == SUN4V_CHIP_SPARC_M8 || sun4v_chip_type == SUN4V_CHIP_SPARC_SN || sun4v_chip_type == SUN4V_CHIP_SPARC64X) cap |= HWCAP_SPARC_BLKINIT; @@ -538,6 +546,7 @@ static void __init init_sparc64_elf_hwcap(void) sun4v_chip_type == SUN4V_CHIP_NIAGARA5 || sun4v_chip_type == SUN4V_CHIP_SPARC_M6 || sun4v_chip_type == SUN4V_CHIP_SPARC_M7 || + sun4v_chip_type == SUN4V_CHIP_SPARC_M8 || sun4v_chip_type == SUN4V_CHIP_SPARC_SN || sun4v_chip_type == SUN4V_CHIP_SPARC64X) cap |= HWCAP_SPARC_N2; @@ -568,6 +577,7 @@ static void __init init_sparc64_elf_hwcap(void) sun4v_chip_type == SUN4V_CHIP_NIAGARA5 || sun4v_chip_type == SUN4V_CHIP_SPARC_M6 || sun4v_chip_type == SUN4V_CHIP_SPARC_M7 || + sun4v_chip_type == SUN4V_CHIP_SPARC_M8 || sun4v_chip_type == SUN4V_CHIP_SPARC_SN || sun4v_chip_type == SUN4V_CHIP_SPARC64X) cap |= (AV_SPARC_VIS | AV_SPARC_VIS2 | @@ -578,6 +588,7 @@ static void __init init_sparc64_elf_hwcap(void) sun4v_chip_type == SUN4V_CHIP_NIAGARA5 || sun4v_chip_type == SUN4V_CHIP_SPARC_M6 || sun4v_chip_type == SUN4V_CHIP_SPARC_M7 || + sun4v_chip_type == SUN4V_CHIP_SPARC_M8 || sun4v_chip_type == SUN4V_CHIP_SPARC_SN || sun4v_chip_type == SUN4V_CHIP_SPARC64X) cap |= (AV_SPARC_VIS3 | AV_SPARC_HPC | diff --git a/arch/sparc/mm/init_64.c b/arch/sparc/mm/init_64.c index fed73f14aa49..c24f4bfc3b84 100644 --- a/arch/sparc/mm/init_64.c +++ b/arch/sparc/mm/init_64.c @@ -2161,6 +2161,7 @@ static void __init sun4v_linear_pte_xor_finalize(void) */ switch (sun4v_chip_type) { case SUN4V_CHIP_SPARC_M7: + case SUN4V_CHIP_SPARC_M8: case SUN4V_CHIP_SPARC_SN: pagecv_flag = 0x00; break; @@ -2313,6 +2314,7 @@ void __init paging_init(void) */ switch (sun4v_chip_type) { case SUN4V_CHIP_SPARC_M7: + case SUN4V_CHIP_SPARC_M8: case SUN4V_CHIP_SPARC_SN: page_cache4v_flag = _PAGE_CP_4V; break; -- cgit v1.2.3 From fdaccf74fe83ab3438df014efd55788a1dd414b5 Mon Sep 17 00:00:00 2001 From: Vijay Kumar Date: Fri, 28 Jul 2017 19:29:32 -0600 Subject: sparc64: Increase max_phys_bits to 51 and VA bits to 53 for M8. On M8 chips, use a max_phys_bits value of 51. Also, M8 supports VA bits up to 54 bits. However, for now restrict VA bits to 53 due to 4-level pagetable limitation. Signed-off-by: Vijay Kumar Reviewed-by: Bob Picco Signed-off-by: David S. Miller --- arch/sparc/mm/init_64.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/arch/sparc/mm/init_64.c b/arch/sparc/mm/init_64.c index c24f4bfc3b84..afa0099f3748 100644 --- a/arch/sparc/mm/init_64.c +++ b/arch/sparc/mm/init_64.c @@ -1944,12 +1944,22 @@ static void __init setup_page_offset(void) break; case SUN4V_CHIP_SPARC_M7: case SUN4V_CHIP_SPARC_SN: - default: /* M7 and later support 52-bit virtual addresses. */ sparc64_va_hole_top = 0xfff8000000000000UL; sparc64_va_hole_bottom = 0x0008000000000000UL; max_phys_bits = 49; break; + case SUN4V_CHIP_SPARC_M8: + default: + /* M8 and later support 54-bit virtual addresses. + * However, restricting M8 and above VA bits to 53 + * as 4-level page table cannot support more than + * 53 VA bits. + */ + sparc64_va_hole_top = 0xfff0000000000000UL; + sparc64_va_hole_bottom = 0x0010000000000000UL; + max_phys_bits = 51; + break; } } -- cgit v1.2.3 From 17b334a876fe121abbd6634ace83ca58deea4bc5 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Fri, 4 Aug 2017 14:12:29 +0200 Subject: mlxsw: spectrum_switchdev: Don't warn about valid situations Some operations in the bridge driver such as MDB deletion are preformed in an atomic context and thus deferred to a process context by the switchdev infrastructure. Therefore, by the time the operation is performed by the underlying device driver it's possible the bridge port context is already gone. This is especially true for removal flows, but theoretically can also be invoked during addition. Remove the warnings in such situations and return normally. Fixes: c57529e1d5d8 ("mlxsw: spectrum: Replace vPorts with Port-VLAN") Fixes: 3922285d96e7 ("net: bridge: Add support for offloading port attributes") Signed-off-by: Ido Schimmel Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- .../ethernet/mellanox/mlxsw/spectrum_switchdev.c | 24 +++++++++++----------- 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c index 656b2d3f1bee..73d54df89578 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c @@ -626,8 +626,8 @@ static int mlxsw_sp_port_attr_br_flags_set(struct mlxsw_sp_port *mlxsw_sp_port, bridge_port = mlxsw_sp_bridge_port_find(mlxsw_sp_port->mlxsw_sp->bridge, orig_dev); - if (WARN_ON(!bridge_port)) - return -EINVAL; + if (!bridge_port) + return 0; err = mlxsw_sp_bridge_port_flood_table_set(mlxsw_sp_port, bridge_port, MLXSW_SP_FLOOD_TYPE_UC, @@ -711,8 +711,8 @@ static int mlxsw_sp_port_attr_mc_router_set(struct mlxsw_sp_port *mlxsw_sp_port, bridge_port = mlxsw_sp_bridge_port_find(mlxsw_sp_port->mlxsw_sp->bridge, orig_dev); - if (WARN_ON(!bridge_port)) - return -EINVAL; + if (!bridge_port) + return 0; if (!bridge_port->bridge_device->multicast_enabled) return 0; @@ -1283,15 +1283,15 @@ static int mlxsw_sp_port_mdb_add(struct mlxsw_sp_port *mlxsw_sp_port, return 0; bridge_port = mlxsw_sp_bridge_port_find(mlxsw_sp->bridge, orig_dev); - if (WARN_ON(!bridge_port)) - return -EINVAL; + if (!bridge_port) + return 0; bridge_device = bridge_port->bridge_device; mlxsw_sp_port_vlan = mlxsw_sp_port_vlan_find_by_bridge(mlxsw_sp_port, bridge_device, mdb->vid); - if (WARN_ON(!mlxsw_sp_port_vlan)) - return -EINVAL; + if (!mlxsw_sp_port_vlan) + return 0; fid_index = mlxsw_sp_fid_index(mlxsw_sp_port_vlan->fid); @@ -1407,15 +1407,15 @@ static int mlxsw_sp_port_mdb_del(struct mlxsw_sp_port *mlxsw_sp_port, int err = 0; bridge_port = mlxsw_sp_bridge_port_find(mlxsw_sp->bridge, orig_dev); - if (WARN_ON(!bridge_port)) - return -EINVAL; + if (!bridge_port) + return 0; bridge_device = bridge_port->bridge_device; mlxsw_sp_port_vlan = mlxsw_sp_port_vlan_find_by_bridge(mlxsw_sp_port, bridge_device, mdb->vid); - if (WARN_ON(!mlxsw_sp_port_vlan)) - return -EINVAL; + if (!mlxsw_sp_port_vlan) + return 0; fid_index = mlxsw_sp_fid_index(mlxsw_sp_port_vlan->fid); -- cgit v1.2.3 From 852cfeed0ebe93a1c1e0c153c04a57221f72bd7e Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Fri, 4 Aug 2017 14:12:30 +0200 Subject: mlxsw: spectrum_switchdev: Release multicast groups during fini Each multicast group (MID) stores a bitmap of ports to which a packet should be forwarded to in case an MDB entry associated with the MID is hit. Since the initial introduction of IGMP snooping in commit 3a49b4fde2a1 ("mlxsw: Adding layer 2 multicast support") the driver didn't correctly free these multicast groups upon ungraceful situations such as the removal of the upper bridge device or module removal. The correct way to fix this is to associate each MID with the bridge ports member in it and then drop the reference in case the bridge port is destroyed, but this will result in a lot more code and will be fixed in net-next. For now, upon module removal, traverse the MID list and release each one. Fixes: 3a49b4fde2a1 ("mlxsw: Adding layer 2 multicast support") Signed-off-by: Ido Schimmel Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c index 73d54df89578..5eb1606765c5 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c @@ -1974,6 +1974,17 @@ static void mlxsw_sp_fdb_fini(struct mlxsw_sp *mlxsw_sp) } +static void mlxsw_sp_mids_fini(struct mlxsw_sp *mlxsw_sp) +{ + struct mlxsw_sp_mid *mid, *tmp; + + list_for_each_entry_safe(mid, tmp, &mlxsw_sp->bridge->mids_list, list) { + list_del(&mid->list); + clear_bit(mid->mid, mlxsw_sp->bridge->mids_bitmap); + kfree(mid); + } +} + int mlxsw_sp_switchdev_init(struct mlxsw_sp *mlxsw_sp) { struct mlxsw_sp_bridge *bridge; @@ -1996,7 +2007,7 @@ int mlxsw_sp_switchdev_init(struct mlxsw_sp *mlxsw_sp) void mlxsw_sp_switchdev_fini(struct mlxsw_sp *mlxsw_sp) { mlxsw_sp_fdb_fini(mlxsw_sp); - WARN_ON(!list_empty(&mlxsw_sp->bridge->mids_list)); + mlxsw_sp_mids_fini(mlxsw_sp); WARN_ON(!list_empty(&mlxsw_sp->bridge->bridges_list)); kfree(mlxsw_sp->bridge); } -- cgit v1.2.3 From b0a0c2566f28e71e5e32121992ac8060cec75510 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Fri, 4 Aug 2017 14:20:54 +0200 Subject: bpf, s390: fix jit branch offset related to ldimm64 While testing some other work that required JIT modifications, I run into test_bpf causing a hang when JIT enabled on s390. The problematic test case was the one from ddc665a4bb4b (bpf, arm64: fix jit branch offset related to ldimm64), and turns out that we do have a similar issue on s390 as well. In bpf_jit_prog() we update next instruction address after returning from bpf_jit_insn() with an insn_count. bpf_jit_insn() returns either -1 in case of error (e.g. unsupported insn), 1 or 2. The latter is only the case for ldimm64 due to spanning 2 insns, however, next address is only set to i + 1 not taking actual insn_count into account, thus fix is to use insn_count instead of 1. bpf_jit_enable in mode 2 provides also disasm on s390: Before fix: 000003ff800349b6: a7f40003 brc 15,3ff800349bc ; target 000003ff800349ba: 0000 unknown 000003ff800349bc: e3b0f0700024 stg %r11,112(%r15) 000003ff800349c2: e3e0f0880024 stg %r14,136(%r15) 000003ff800349c8: 0db0 basr %r11,%r0 000003ff800349ca: c0ef00000000 llilf %r14,0 000003ff800349d0: e320b0360004 lg %r2,54(%r11) 000003ff800349d6: e330b03e0004 lg %r3,62(%r11) 000003ff800349dc: ec23ffeda065 clgrj %r2,%r3,10,3ff800349b6 ; jmp 000003ff800349e2: e3e0b0460004 lg %r14,70(%r11) 000003ff800349e8: e3e0b04e0004 lg %r14,78(%r11) 000003ff800349ee: b904002e lgr %r2,%r14 000003ff800349f2: e3b0f0700004 lg %r11,112(%r15) 000003ff800349f8: e3e0f0880004 lg %r14,136(%r15) 000003ff800349fe: 07fe bcr 15,%r14 After fix: 000003ff80ef3db4: a7f40003 brc 15,3ff80ef3dba 000003ff80ef3db8: 0000 unknown 000003ff80ef3dba: e3b0f0700024 stg %r11,112(%r15) 000003ff80ef3dc0: e3e0f0880024 stg %r14,136(%r15) 000003ff80ef3dc6: 0db0 basr %r11,%r0 000003ff80ef3dc8: c0ef00000000 llilf %r14,0 000003ff80ef3dce: e320b0360004 lg %r2,54(%r11) 000003ff80ef3dd4: e330b03e0004 lg %r3,62(%r11) 000003ff80ef3dda: ec230006a065 clgrj %r2,%r3,10,3ff80ef3de6 ; jmp 000003ff80ef3de0: e3e0b0460004 lg %r14,70(%r11) 000003ff80ef3de6: e3e0b04e0004 lg %r14,78(%r11) ; target 000003ff80ef3dec: b904002e lgr %r2,%r14 000003ff80ef3df0: e3b0f0700004 lg %r11,112(%r15) 000003ff80ef3df6: e3e0f0880004 lg %r14,136(%r15) 000003ff80ef3dfc: 07fe bcr 15,%r14 test_bpf.ko suite runs fine after the fix. Fixes: 054623105728 ("s390/bpf: Add s390x eBPF JIT compiler backend") Signed-off-by: Daniel Borkmann Tested-by: Michael Holzheu Signed-off-by: David S. Miller --- arch/s390/net/bpf_jit_comp.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/s390/net/bpf_jit_comp.c b/arch/s390/net/bpf_jit_comp.c index 01c6fbc3e85b..1803797fc885 100644 --- a/arch/s390/net/bpf_jit_comp.c +++ b/arch/s390/net/bpf_jit_comp.c @@ -1253,7 +1253,8 @@ static int bpf_jit_prog(struct bpf_jit *jit, struct bpf_prog *fp) insn_count = bpf_jit_insn(jit, fp, i); if (insn_count < 0) return -1; - jit->addrs[i + 1] = jit->prg; /* Next instruction address */ + /* Next instruction address */ + jit->addrs[i + insn_count] = jit->prg; } bpf_jit_epilogue(jit); -- cgit v1.2.3 From bad1926dd2f60bbb4af5223ace3a7b34a0219a66 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Fri, 4 Aug 2017 14:20:55 +0200 Subject: bpf, s390: fix build for libbpf and selftest suite The BPF feature test as well as libbpf is missing the __NR_bpf define for s390 and currently refuses to compile (selftest suite depends on libbpf as well). Similar issue was fixed some time ago via b0c47807d31d ("bpf: Add sparc support to tools and samples."), just do the same and add definitions. Signed-off-by: Daniel Borkmann Signed-off-by: David S. Miller --- tools/build/feature/test-bpf.c | 2 ++ tools/lib/bpf/bpf.c | 2 ++ 2 files changed, 4 insertions(+) diff --git a/tools/build/feature/test-bpf.c b/tools/build/feature/test-bpf.c index 7598361ef1f1..da2172ff9662 100644 --- a/tools/build/feature/test-bpf.c +++ b/tools/build/feature/test-bpf.c @@ -11,6 +11,8 @@ # define __NR_bpf 280 # elif defined(__sparc__) # define __NR_bpf 349 +# elif defined(__s390__) +# define __NR_bpf 351 # else # error __NR_bpf not defined. libbpf does not support your arch. # endif diff --git a/tools/lib/bpf/bpf.c b/tools/lib/bpf/bpf.c index 256f571f2ab5..e5bbb090bf88 100644 --- a/tools/lib/bpf/bpf.c +++ b/tools/lib/bpf/bpf.c @@ -39,6 +39,8 @@ # define __NR_bpf 280 # elif defined(__sparc__) # define __NR_bpf 349 +# elif defined(__s390__) +# define __NR_bpf 351 # else # error __NR_bpf not defined. libbpf does not support your arch. # endif -- cgit v1.2.3 From b6bd53f9c4e825fca82fe1392157c78443c814ab Mon Sep 17 00:00:00 2001 From: David Daney Date: Thu, 3 Aug 2017 17:10:12 -0700 Subject: MIPS: Add missing file for eBPF JIT. Inexplicably, commit f381bf6d82f0 ("MIPS: Add support for eBPF JIT.") lost a file somewhere on its path to Linus' tree. Add back the missing ebpf_jit.c so that we can build with CONFIG_BPF_JIT selected. This version of ebpf_jit.c is identical to the original except for two minor change need to resolve conflicts with changes merged from the BPF branch: A) Set prog->jited_len = image_size; B) Use BPF_TAIL_CALL instead of BPF_CALL | BPF_X Fixes: f381bf6d82f0 ("MIPS: Add support for eBPF JIT.") Signed-off-by: David Daney Acked-by: Daniel Borkmann Signed-off-by: David S. Miller --- arch/mips/net/ebpf_jit.c | 1950 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 1950 insertions(+) create mode 100644 arch/mips/net/ebpf_jit.c diff --git a/arch/mips/net/ebpf_jit.c b/arch/mips/net/ebpf_jit.c new file mode 100644 index 000000000000..3f87b96da5c4 --- /dev/null +++ b/arch/mips/net/ebpf_jit.c @@ -0,0 +1,1950 @@ +/* + * Just-In-Time compiler for eBPF filters on MIPS + * + * Copyright (c) 2017 Cavium, Inc. + * + * Based on code from: + * + * Copyright (c) 2014 Imagination Technologies Ltd. + * Author: Markos Chandras + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; version 2 of the License. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +/* Registers used by JIT */ +#define MIPS_R_ZERO 0 +#define MIPS_R_AT 1 +#define MIPS_R_V0 2 /* BPF_R0 */ +#define MIPS_R_V1 3 +#define MIPS_R_A0 4 /* BPF_R1 */ +#define MIPS_R_A1 5 /* BPF_R2 */ +#define MIPS_R_A2 6 /* BPF_R3 */ +#define MIPS_R_A3 7 /* BPF_R4 */ +#define MIPS_R_A4 8 /* BPF_R5 */ +#define MIPS_R_T4 12 /* BPF_AX */ +#define MIPS_R_T5 13 +#define MIPS_R_T6 14 +#define MIPS_R_T7 15 +#define MIPS_R_S0 16 /* BPF_R6 */ +#define MIPS_R_S1 17 /* BPF_R7 */ +#define MIPS_R_S2 18 /* BPF_R8 */ +#define MIPS_R_S3 19 /* BPF_R9 */ +#define MIPS_R_S4 20 /* BPF_TCC */ +#define MIPS_R_S5 21 +#define MIPS_R_S6 22 +#define MIPS_R_S7 23 +#define MIPS_R_T8 24 +#define MIPS_R_T9 25 +#define MIPS_R_SP 29 +#define MIPS_R_RA 31 + +/* eBPF flags */ +#define EBPF_SAVE_S0 BIT(0) +#define EBPF_SAVE_S1 BIT(1) +#define EBPF_SAVE_S2 BIT(2) +#define EBPF_SAVE_S3 BIT(3) +#define EBPF_SAVE_S4 BIT(4) +#define EBPF_SAVE_RA BIT(5) +#define EBPF_SEEN_FP BIT(6) +#define EBPF_SEEN_TC BIT(7) +#define EBPF_TCC_IN_V1 BIT(8) + +/* + * For the mips64 ISA, we need to track the value range or type for + * each JIT register. The BPF machine requires zero extended 32-bit + * values, but the mips64 ISA requires sign extended 32-bit values. + * At each point in the BPF program we track the state of every + * register so that we can zero extend or sign extend as the BPF + * semantics require. + */ +enum reg_val_type { + /* uninitialized */ + REG_UNKNOWN, + /* not known to be 32-bit compatible. */ + REG_64BIT, + /* 32-bit compatible, no truncation needed for 64-bit ops. */ + REG_64BIT_32BIT, + /* 32-bit compatible, need truncation for 64-bit ops. */ + REG_32BIT, + /* 32-bit zero extended. */ + REG_32BIT_ZERO_EX, + /* 32-bit no sign/zero extension needed. */ + REG_32BIT_POS +}; + +/* + * high bit of offsets indicates if long branch conversion done at + * this insn. + */ +#define OFFSETS_B_CONV BIT(31) + +/** + * struct jit_ctx - JIT context + * @skf: The sk_filter + * @stack_size: eBPF stack size + * @tmp_offset: eBPF $sp offset to 8-byte temporary memory + * @idx: Instruction index + * @flags: JIT flags + * @offsets: Instruction offsets + * @target: Memory location for the compiled filter + * @reg_val_types Packed enum reg_val_type for each register. + */ +struct jit_ctx { + const struct bpf_prog *skf; + int stack_size; + int tmp_offset; + u32 idx; + u32 flags; + u32 *offsets; + u32 *target; + u64 *reg_val_types; + unsigned int long_b_conversion:1; + unsigned int gen_b_offsets:1; +}; + +static void set_reg_val_type(u64 *rvt, int reg, enum reg_val_type type) +{ + *rvt &= ~(7ull << (reg * 3)); + *rvt |= ((u64)type << (reg * 3)); +} + +static enum reg_val_type get_reg_val_type(const struct jit_ctx *ctx, + int index, int reg) +{ + return (ctx->reg_val_types[index] >> (reg * 3)) & 7; +} + +/* Simply emit the instruction if the JIT memory space has been allocated */ +#define emit_instr(ctx, func, ...) \ +do { \ + if ((ctx)->target != NULL) { \ + u32 *p = &(ctx)->target[ctx->idx]; \ + uasm_i_##func(&p, ##__VA_ARGS__); \ + } \ + (ctx)->idx++; \ +} while (0) + +static unsigned int j_target(struct jit_ctx *ctx, int target_idx) +{ + unsigned long target_va, base_va; + unsigned int r; + + if (!ctx->target) + return 0; + + base_va = (unsigned long)ctx->target; + target_va = base_va + (ctx->offsets[target_idx] & ~OFFSETS_B_CONV); + + if ((base_va & ~0x0ffffffful) != (target_va & ~0x0ffffffful)) + return (unsigned int)-1; + r = target_va & 0x0ffffffful; + return r; +} + +/* Compute the immediate value for PC-relative branches. */ +static u32 b_imm(unsigned int tgt, struct jit_ctx *ctx) +{ + if (!ctx->gen_b_offsets) + return 0; + + /* + * We want a pc-relative branch. tgt is the instruction offset + * we want to jump to. + + * Branch on MIPS: + * I: target_offset <- sign_extend(offset) + * I+1: PC += target_offset (delay slot) + * + * ctx->idx currently points to the branch instruction + * but the offset is added to the delay slot so we need + * to subtract 4. + */ + return (ctx->offsets[tgt] & ~OFFSETS_B_CONV) - + (ctx->idx * 4) - 4; +} + +int bpf_jit_enable __read_mostly; + +enum which_ebpf_reg { + src_reg, + src_reg_no_fp, + dst_reg, + dst_reg_fp_ok +}; + +/* + * For eBPF, the register mapping naturally falls out of the + * requirements of eBPF and the MIPS n64 ABI. We don't maintain a + * separate frame pointer, so BPF_REG_10 relative accesses are + * adjusted to be $sp relative. + */ +int ebpf_to_mips_reg(struct jit_ctx *ctx, const struct bpf_insn *insn, + enum which_ebpf_reg w) +{ + int ebpf_reg = (w == src_reg || w == src_reg_no_fp) ? + insn->src_reg : insn->dst_reg; + + switch (ebpf_reg) { + case BPF_REG_0: + return MIPS_R_V0; + case BPF_REG_1: + return MIPS_R_A0; + case BPF_REG_2: + return MIPS_R_A1; + case BPF_REG_3: + return MIPS_R_A2; + case BPF_REG_4: + return MIPS_R_A3; + case BPF_REG_5: + return MIPS_R_A4; + case BPF_REG_6: + ctx->flags |= EBPF_SAVE_S0; + return MIPS_R_S0; + case BPF_REG_7: + ctx->flags |= EBPF_SAVE_S1; + return MIPS_R_S1; + case BPF_REG_8: + ctx->flags |= EBPF_SAVE_S2; + return MIPS_R_S2; + case BPF_REG_9: + ctx->flags |= EBPF_SAVE_S3; + return MIPS_R_S3; + case BPF_REG_10: + if (w == dst_reg || w == src_reg_no_fp) + goto bad_reg; + ctx->flags |= EBPF_SEEN_FP; + /* + * Needs special handling, return something that + * cannot be clobbered just in case. + */ + return MIPS_R_ZERO; + case BPF_REG_AX: + return MIPS_R_T4; + default: +bad_reg: + WARN(1, "Illegal bpf reg: %d\n", ebpf_reg); + return -EINVAL; + } +} +/* + * eBPF stack frame will be something like: + * + * Entry $sp ------> +--------------------------------+ + * | $ra (optional) | + * +--------------------------------+ + * | $s0 (optional) | + * +--------------------------------+ + * | $s1 (optional) | + * +--------------------------------+ + * | $s2 (optional) | + * +--------------------------------+ + * | $s3 (optional) | + * +--------------------------------+ + * | $s4 (optional) | + * +--------------------------------+ + * | tmp-storage (if $ra saved) | + * $sp + tmp_offset --> +--------------------------------+ <--BPF_REG_10 + * | BPF_REG_10 relative storage | + * | MAX_BPF_STACK (optional) | + * | . | + * | . | + * | . | + * $sp --------> +--------------------------------+ + * + * If BPF_REG_10 is never referenced, then the MAX_BPF_STACK sized + * area is not allocated. + */ +static int gen_int_prologue(struct jit_ctx *ctx) +{ + int stack_adjust = 0; + int store_offset; + int locals_size; + + if (ctx->flags & EBPF_SAVE_RA) + /* + * If RA we are doing a function call and may need + * extra 8-byte tmp area. + */ + stack_adjust += 16; + if (ctx->flags & EBPF_SAVE_S0) + stack_adjust += 8; + if (ctx->flags & EBPF_SAVE_S1) + stack_adjust += 8; + if (ctx->flags & EBPF_SAVE_S2) + stack_adjust += 8; + if (ctx->flags & EBPF_SAVE_S3) + stack_adjust += 8; + if (ctx->flags & EBPF_SAVE_S4) + stack_adjust += 8; + + BUILD_BUG_ON(MAX_BPF_STACK & 7); + locals_size = (ctx->flags & EBPF_SEEN_FP) ? MAX_BPF_STACK : 0; + + stack_adjust += locals_size; + ctx->tmp_offset = locals_size; + + ctx->stack_size = stack_adjust; + + /* + * First instruction initializes the tail call count (TCC). + * On tail call we skip this instruction, and the TCC is + * passed in $v1 from the caller. + */ + emit_instr(ctx, daddiu, MIPS_R_V1, MIPS_R_ZERO, MAX_TAIL_CALL_CNT); + if (stack_adjust) + emit_instr(ctx, daddiu, MIPS_R_SP, MIPS_R_SP, -stack_adjust); + else + return 0; + + store_offset = stack_adjust - 8; + + if (ctx->flags & EBPF_SAVE_RA) { + emit_instr(ctx, sd, MIPS_R_RA, store_offset, MIPS_R_SP); + store_offset -= 8; + } + if (ctx->flags & EBPF_SAVE_S0) { + emit_instr(ctx, sd, MIPS_R_S0, store_offset, MIPS_R_SP); + store_offset -= 8; + } + if (ctx->flags & EBPF_SAVE_S1) { + emit_instr(ctx, sd, MIPS_R_S1, store_offset, MIPS_R_SP); + store_offset -= 8; + } + if (ctx->flags & EBPF_SAVE_S2) { + emit_instr(ctx, sd, MIPS_R_S2, store_offset, MIPS_R_SP); + store_offset -= 8; + } + if (ctx->flags & EBPF_SAVE_S3) { + emit_instr(ctx, sd, MIPS_R_S3, store_offset, MIPS_R_SP); + store_offset -= 8; + } + if (ctx->flags & EBPF_SAVE_S4) { + emit_instr(ctx, sd, MIPS_R_S4, store_offset, MIPS_R_SP); + store_offset -= 8; + } + + if ((ctx->flags & EBPF_SEEN_TC) && !(ctx->flags & EBPF_TCC_IN_V1)) + emit_instr(ctx, daddu, MIPS_R_S4, MIPS_R_V1, MIPS_R_ZERO); + + return 0; +} + +static int build_int_epilogue(struct jit_ctx *ctx, int dest_reg) +{ + const struct bpf_prog *prog = ctx->skf; + int stack_adjust = ctx->stack_size; + int store_offset = stack_adjust - 8; + int r0 = MIPS_R_V0; + + if (dest_reg == MIPS_R_RA && + get_reg_val_type(ctx, prog->len, BPF_REG_0) == REG_32BIT_ZERO_EX) + /* Don't let zero extended value escape. */ + emit_instr(ctx, sll, r0, r0, 0); + + if (ctx->flags & EBPF_SAVE_RA) { + emit_instr(ctx, ld, MIPS_R_RA, store_offset, MIPS_R_SP); + store_offset -= 8; + } + if (ctx->flags & EBPF_SAVE_S0) { + emit_instr(ctx, ld, MIPS_R_S0, store_offset, MIPS_R_SP); + store_offset -= 8; + } + if (ctx->flags & EBPF_SAVE_S1) { + emit_instr(ctx, ld, MIPS_R_S1, store_offset, MIPS_R_SP); + store_offset -= 8; + } + if (ctx->flags & EBPF_SAVE_S2) { + emit_instr(ctx, ld, MIPS_R_S2, store_offset, MIPS_R_SP); + store_offset -= 8; + } + if (ctx->flags & EBPF_SAVE_S3) { + emit_instr(ctx, ld, MIPS_R_S3, store_offset, MIPS_R_SP); + store_offset -= 8; + } + if (ctx->flags & EBPF_SAVE_S4) { + emit_instr(ctx, ld, MIPS_R_S4, store_offset, MIPS_R_SP); + store_offset -= 8; + } + emit_instr(ctx, jr, dest_reg); + + if (stack_adjust) + emit_instr(ctx, daddiu, MIPS_R_SP, MIPS_R_SP, stack_adjust); + else + emit_instr(ctx, nop); + + return 0; +} + +static void gen_imm_to_reg(const struct bpf_insn *insn, int reg, + struct jit_ctx *ctx) +{ + if (insn->imm >= S16_MIN && insn->imm <= S16_MAX) { + emit_instr(ctx, addiu, reg, MIPS_R_ZERO, insn->imm); + } else { + int lower = (s16)(insn->imm & 0xffff); + int upper = insn->imm - lower; + + emit_instr(ctx, lui, reg, upper >> 16); + emit_instr(ctx, addiu, reg, reg, lower); + } + +} + +static int gen_imm_insn(const struct bpf_insn *insn, struct jit_ctx *ctx, + int idx) +{ + int upper_bound, lower_bound; + int dst = ebpf_to_mips_reg(ctx, insn, dst_reg); + + if (dst < 0) + return dst; + + switch (BPF_OP(insn->code)) { + case BPF_MOV: + case BPF_ADD: + upper_bound = S16_MAX; + lower_bound = S16_MIN; + break; + case BPF_SUB: + upper_bound = -(int)S16_MIN; + lower_bound = -(int)S16_MAX; + break; + case BPF_AND: + case BPF_OR: + case BPF_XOR: + upper_bound = 0xffff; + lower_bound = 0; + break; + case BPF_RSH: + case BPF_LSH: + case BPF_ARSH: + /* Shift amounts are truncated, no need for bounds */ + upper_bound = S32_MAX; + lower_bound = S32_MIN; + break; + default: + return -EINVAL; + } + + /* + * Immediate move clobbers the register, so no sign/zero + * extension needed. + */ + if (BPF_CLASS(insn->code) == BPF_ALU64 && + BPF_OP(insn->code) != BPF_MOV && + get_reg_val_type(ctx, idx, insn->dst_reg) == REG_32BIT) + emit_instr(ctx, dinsu, dst, MIPS_R_ZERO, 32, 32); + /* BPF_ALU | BPF_LSH doesn't need separate sign extension */ + if (BPF_CLASS(insn->code) == BPF_ALU && + BPF_OP(insn->code) != BPF_LSH && + BPF_OP(insn->code) != BPF_MOV && + get_reg_val_type(ctx, idx, insn->dst_reg) != REG_32BIT) + emit_instr(ctx, sll, dst, dst, 0); + + if (insn->imm >= lower_bound && insn->imm <= upper_bound) { + /* single insn immediate case */ + switch (BPF_OP(insn->code) | BPF_CLASS(insn->code)) { + case BPF_ALU64 | BPF_MOV: + emit_instr(ctx, daddiu, dst, MIPS_R_ZERO, insn->imm); + break; + case BPF_ALU64 | BPF_AND: + case BPF_ALU | BPF_AND: + emit_instr(ctx, andi, dst, dst, insn->imm); + break; + case BPF_ALU64 | BPF_OR: + case BPF_ALU | BPF_OR: + emit_instr(ctx, ori, dst, dst, insn->imm); + break; + case BPF_ALU64 | BPF_XOR: + case BPF_ALU | BPF_XOR: + emit_instr(ctx, xori, dst, dst, insn->imm); + break; + case BPF_ALU64 | BPF_ADD: + emit_instr(ctx, daddiu, dst, dst, insn->imm); + break; + case BPF_ALU64 | BPF_SUB: + emit_instr(ctx, daddiu, dst, dst, -insn->imm); + break; + case BPF_ALU64 | BPF_RSH: + emit_instr(ctx, dsrl_safe, dst, dst, insn->imm & 0x3f); + break; + case BPF_ALU | BPF_RSH: + emit_instr(ctx, srl, dst, dst, insn->imm & 0x1f); + break; + case BPF_ALU64 | BPF_LSH: + emit_instr(ctx, dsll_safe, dst, dst, insn->imm & 0x3f); + break; + case BPF_ALU | BPF_LSH: + emit_instr(ctx, sll, dst, dst, insn->imm & 0x1f); + break; + case BPF_ALU64 | BPF_ARSH: + emit_instr(ctx, dsra_safe, dst, dst, insn->imm & 0x3f); + break; + case BPF_ALU | BPF_ARSH: + emit_instr(ctx, sra, dst, dst, insn->imm & 0x1f); + break; + case BPF_ALU | BPF_MOV: + emit_instr(ctx, addiu, dst, MIPS_R_ZERO, insn->imm); + break; + case BPF_ALU | BPF_ADD: + emit_instr(ctx, addiu, dst, dst, insn->imm); + break; + case BPF_ALU | BPF_SUB: + emit_instr(ctx, addiu, dst, dst, -insn->imm); + break; + default: + return -EINVAL; + } + } else { + /* multi insn immediate case */ + if (BPF_OP(insn->code) == BPF_MOV) { + gen_imm_to_reg(insn, dst, ctx); + } else { + gen_imm_to_reg(insn, MIPS_R_AT, ctx); + switch (BPF_OP(insn->code) | BPF_CLASS(insn->code)) { + case BPF_ALU64 | BPF_AND: + case BPF_ALU | BPF_AND: + emit_instr(ctx, and, dst, dst, MIPS_R_AT); + break; + case BPF_ALU64 | BPF_OR: + case BPF_ALU | BPF_OR: + emit_instr(ctx, or, dst, dst, MIPS_R_AT); + break; + case BPF_ALU64 | BPF_XOR: + case BPF_ALU | BPF_XOR: + emit_instr(ctx, xor, dst, dst, MIPS_R_AT); + break; + case BPF_ALU64 | BPF_ADD: + emit_instr(ctx, daddu, dst, dst, MIPS_R_AT); + break; + case BPF_ALU64 | BPF_SUB: + emit_instr(ctx, dsubu, dst, dst, MIPS_R_AT); + break; + case BPF_ALU | BPF_ADD: + emit_instr(ctx, addu, dst, dst, MIPS_R_AT); + break; + case BPF_ALU | BPF_SUB: + emit_instr(ctx, subu, dst, dst, MIPS_R_AT); + break; + default: + return -EINVAL; + } + } + } + + return 0; +} + +static void * __must_check +ool_skb_header_pointer(const struct sk_buff *skb, int offset, + int len, void *buffer) +{ + return skb_header_pointer(skb, offset, len, buffer); +} + +static int size_to_len(const struct bpf_insn *insn) +{ + switch (BPF_SIZE(insn->code)) { + case BPF_B: + return 1; + case BPF_H: + return 2; + case BPF_W: + return 4; + case BPF_DW: + return 8; + } + return 0; +} + +static void emit_const_to_reg(struct jit_ctx *ctx, int dst, u64 value) +{ + if (value >= 0xffffffffffff8000ull || value < 0x8000ull) { + emit_instr(ctx, daddiu, dst, MIPS_R_ZERO, (int)value); + } else if (value >= 0xffffffff80000000ull || + (value < 0x80000000 && value > 0xffff)) { + emit_instr(ctx, lui, dst, (s32)(s16)(value >> 16)); + emit_instr(ctx, ori, dst, dst, (unsigned int)(value & 0xffff)); + } else { + int i; + bool seen_part = false; + int needed_shift = 0; + + for (i = 0; i < 4; i++) { + u64 part = (value >> (16 * (3 - i))) & 0xffff; + + if (seen_part && needed_shift > 0 && (part || i == 3)) { + emit_instr(ctx, dsll_safe, dst, dst, needed_shift); + needed_shift = 0; + } + if (part) { + if (i == 0 || (!seen_part && i < 3 && part < 0x8000)) { + emit_instr(ctx, lui, dst, (s32)(s16)part); + needed_shift = -16; + } else { + emit_instr(ctx, ori, dst, + seen_part ? dst : MIPS_R_ZERO, + (unsigned int)part); + } + seen_part = true; + } + if (seen_part) + needed_shift += 16; + } + } +} + +static int emit_bpf_tail_call(struct jit_ctx *ctx, int this_idx) +{ + int off, b_off; + + ctx->flags |= EBPF_SEEN_TC; + /* + * if (index >= array->map.max_entries) + * goto out; + */ + off = offsetof(struct bpf_array, map.max_entries); + emit_instr(ctx, lwu, MIPS_R_T5, off, MIPS_R_A1); + emit_instr(ctx, sltu, MIPS_R_AT, MIPS_R_T5, MIPS_R_A2); + b_off = b_imm(this_idx + 1, ctx); + emit_instr(ctx, bne, MIPS_R_AT, MIPS_R_ZERO, b_off); + /* + * if (--TCC < 0) + * goto out; + */ + /* Delay slot */ + emit_instr(ctx, daddiu, MIPS_R_T5, + (ctx->flags & EBPF_TCC_IN_V1) ? MIPS_R_V1 : MIPS_R_S4, -1); + b_off = b_imm(this_idx + 1, ctx); + emit_instr(ctx, bltz, MIPS_R_T5, b_off); + /* + * prog = array->ptrs[index]; + * if (prog == NULL) + * goto out; + */ + /* Delay slot */ + emit_instr(ctx, dsll, MIPS_R_T8, MIPS_R_A2, 3); + emit_instr(ctx, daddu, MIPS_R_T8, MIPS_R_T8, MIPS_R_A1); + off = offsetof(struct bpf_array, ptrs); + emit_instr(ctx, ld, MIPS_R_AT, off, MIPS_R_T8); + b_off = b_imm(this_idx + 1, ctx); + emit_instr(ctx, beq, MIPS_R_AT, MIPS_R_ZERO, b_off); + /* Delay slot */ + emit_instr(ctx, nop); + + /* goto *(prog->bpf_func + 4); */ + off = offsetof(struct bpf_prog, bpf_func); + emit_instr(ctx, ld, MIPS_R_T9, off, MIPS_R_AT); + /* All systems are go... propagate TCC */ + emit_instr(ctx, daddu, MIPS_R_V1, MIPS_R_T5, MIPS_R_ZERO); + /* Skip first instruction (TCC initialization) */ + emit_instr(ctx, daddiu, MIPS_R_T9, MIPS_R_T9, 4); + return build_int_epilogue(ctx, MIPS_R_T9); +} + +static bool use_bbit_insns(void) +{ + switch (current_cpu_type()) { + case CPU_CAVIUM_OCTEON: + case CPU_CAVIUM_OCTEON_PLUS: + case CPU_CAVIUM_OCTEON2: + case CPU_CAVIUM_OCTEON3: + return true; + default: + return false; + } +} + +static bool is_bad_offset(int b_off) +{ + return b_off > 0x1ffff || b_off < -0x20000; +} + +/* Returns the number of insn slots consumed. */ +static int build_one_insn(const struct bpf_insn *insn, struct jit_ctx *ctx, + int this_idx, int exit_idx) +{ + int src, dst, r, td, ts, mem_off, b_off; + bool need_swap, did_move, cmp_eq; + unsigned int target; + u64 t64; + s64 t64s; + + switch (insn->code) { + case BPF_ALU64 | BPF_ADD | BPF_K: /* ALU64_IMM */ + case BPF_ALU64 | BPF_SUB | BPF_K: /* ALU64_IMM */ + case BPF_ALU64 | BPF_OR | BPF_K: /* ALU64_IMM */ + case BPF_ALU64 | BPF_AND | BPF_K: /* ALU64_IMM */ + case BPF_ALU64 | BPF_LSH | BPF_K: /* ALU64_IMM */ + case BPF_ALU64 | BPF_RSH | BPF_K: /* ALU64_IMM */ + case BPF_ALU64 | BPF_XOR | BPF_K: /* ALU64_IMM */ + case BPF_ALU64 | BPF_ARSH | BPF_K: /* ALU64_IMM */ + case BPF_ALU64 | BPF_MOV | BPF_K: /* ALU64_IMM */ + case BPF_ALU | BPF_MOV | BPF_K: /* ALU32_IMM */ + case BPF_ALU | BPF_ADD | BPF_K: /* ALU32_IMM */ + case BPF_ALU | BPF_SUB | BPF_K: /* ALU32_IMM */ + case BPF_ALU | BPF_OR | BPF_K: /* ALU64_IMM */ + case BPF_ALU | BPF_AND | BPF_K: /* ALU64_IMM */ + case BPF_ALU | BPF_LSH | BPF_K: /* ALU64_IMM */ + case BPF_ALU | BPF_RSH | BPF_K: /* ALU64_IMM */ + case BPF_ALU | BPF_XOR | BPF_K: /* ALU64_IMM */ + case BPF_ALU | BPF_ARSH | BPF_K: /* ALU64_IMM */ + r = gen_imm_insn(insn, ctx, this_idx); + if (r < 0) + return r; + break; + case BPF_ALU64 | BPF_MUL | BPF_K: /* ALU64_IMM */ + dst = ebpf_to_mips_reg(ctx, insn, dst_reg); + if (dst < 0) + return dst; + if (get_reg_val_type(ctx, this_idx, insn->dst_reg) == REG_32BIT) + emit_instr(ctx, dinsu, dst, MIPS_R_ZERO, 32, 32); + if (insn->imm == 1) /* Mult by 1 is a nop */ + break; + gen_imm_to_reg(insn, MIPS_R_AT, ctx); + emit_instr(ctx, dmultu, MIPS_R_AT, dst); + emit_instr(ctx, mflo, dst); + break; + case BPF_ALU64 | BPF_NEG | BPF_K: /* ALU64_IMM */ + dst = ebpf_to_mips_reg(ctx, insn, dst_reg); + if (dst < 0) + return dst; + if (get_reg_val_type(ctx, this_idx, insn->dst_reg) == REG_32BIT) + emit_instr(ctx, dinsu, dst, MIPS_R_ZERO, 32, 32); + emit_instr(ctx, dsubu, dst, MIPS_R_ZERO, dst); + break; + case BPF_ALU | BPF_MUL | BPF_K: /* ALU_IMM */ + dst = ebpf_to_mips_reg(ctx, insn, dst_reg); + if (dst < 0) + return dst; + td = get_reg_val_type(ctx, this_idx, insn->dst_reg); + if (td == REG_64BIT || td == REG_32BIT_ZERO_EX) { + /* sign extend */ + emit_instr(ctx, sll, dst, dst, 0); + } + if (insn->imm == 1) /* Mult by 1 is a nop */ + break; + gen_imm_to_reg(insn, MIPS_R_AT, ctx); + emit_instr(ctx, multu, dst, MIPS_R_AT); + emit_instr(ctx, mflo, dst); + break; + case BPF_ALU | BPF_NEG | BPF_K: /* ALU_IMM */ + dst = ebpf_to_mips_reg(ctx, insn, dst_reg); + if (dst < 0) + return dst; + td = get_reg_val_type(ctx, this_idx, insn->dst_reg); + if (td == REG_64BIT || td == REG_32BIT_ZERO_EX) { + /* sign extend */ + emit_instr(ctx, sll, dst, dst, 0); + } + emit_instr(ctx, subu, dst, MIPS_R_ZERO, dst); + break; + case BPF_ALU | BPF_DIV | BPF_K: /* ALU_IMM */ + case BPF_ALU | BPF_MOD | BPF_K: /* ALU_IMM */ + dst = ebpf_to_mips_reg(ctx, insn, dst_reg); + if (dst < 0) + return dst; + if (insn->imm == 0) { /* Div by zero */ + b_off = b_imm(exit_idx, ctx); + if (is_bad_offset(b_off)) + return -E2BIG; + emit_instr(ctx, beq, MIPS_R_ZERO, MIPS_R_ZERO, b_off); + emit_instr(ctx, addu, MIPS_R_V0, MIPS_R_ZERO, MIPS_R_ZERO); + } + td = get_reg_val_type(ctx, this_idx, insn->dst_reg); + if (td == REG_64BIT || td == REG_32BIT_ZERO_EX) + /* sign extend */ + emit_instr(ctx, sll, dst, dst, 0); + if (insn->imm == 1) { + /* div by 1 is a nop, mod by 1 is zero */ + if (BPF_OP(insn->code) == BPF_MOD) + emit_instr(ctx, addu, dst, MIPS_R_ZERO, MIPS_R_ZERO); + break; + } + gen_imm_to_reg(insn, MIPS_R_AT, ctx); + emit_instr(ctx, divu, dst, MIPS_R_AT); + if (BPF_OP(insn->code) == BPF_DIV) + emit_instr(ctx, mflo, dst); + else + emit_instr(ctx, mfhi, dst); + break; + case BPF_ALU64 | BPF_DIV | BPF_K: /* ALU_IMM */ + case BPF_ALU64 | BPF_MOD | BPF_K: /* ALU_IMM */ + dst = ebpf_to_mips_reg(ctx, insn, dst_reg); + if (dst < 0) + return dst; + if (insn->imm == 0) { /* Div by zero */ + b_off = b_imm(exit_idx, ctx); + if (is_bad_offset(b_off)) + return -E2BIG; + emit_instr(ctx, beq, MIPS_R_ZERO, MIPS_R_ZERO, b_off); + emit_instr(ctx, addu, MIPS_R_V0, MIPS_R_ZERO, MIPS_R_ZERO); + } + if (get_reg_val_type(ctx, this_idx, insn->dst_reg) == REG_32BIT) + emit_instr(ctx, dinsu, dst, MIPS_R_ZERO, 32, 32); + + if (insn->imm == 1) { + /* div by 1 is a nop, mod by 1 is zero */ + if (BPF_OP(insn->code) == BPF_MOD) + emit_instr(ctx, addu, dst, MIPS_R_ZERO, MIPS_R_ZERO); + break; + } + gen_imm_to_reg(insn, MIPS_R_AT, ctx); + emit_instr(ctx, ddivu, dst, MIPS_R_AT); + if (BPF_OP(insn->code) == BPF_DIV) + emit_instr(ctx, mflo, dst); + else + emit_instr(ctx, mfhi, dst); + break; + case BPF_ALU64 | BPF_MOV | BPF_X: /* ALU64_REG */ + case BPF_ALU64 | BPF_ADD | BPF_X: /* ALU64_REG */ + case BPF_ALU64 | BPF_SUB | BPF_X: /* ALU64_REG */ + case BPF_ALU64 | BPF_XOR | BPF_X: /* ALU64_REG */ + case BPF_ALU64 | BPF_OR | BPF_X: /* ALU64_REG */ + case BPF_ALU64 | BPF_AND | BPF_X: /* ALU64_REG */ + case BPF_ALU64 | BPF_MUL | BPF_X: /* ALU64_REG */ + case BPF_ALU64 | BPF_DIV | BPF_X: /* ALU64_REG */ + case BPF_ALU64 | BPF_MOD | BPF_X: /* ALU64_REG */ + case BPF_ALU64 | BPF_LSH | BPF_X: /* ALU64_REG */ + case BPF_ALU64 | BPF_RSH | BPF_X: /* ALU64_REG */ + case BPF_ALU64 | BPF_ARSH | BPF_X: /* ALU64_REG */ + src = ebpf_to_mips_reg(ctx, insn, src_reg); + dst = ebpf_to_mips_reg(ctx, insn, dst_reg); + if (src < 0 || dst < 0) + return -EINVAL; + if (get_reg_val_type(ctx, this_idx, insn->dst_reg) == REG_32BIT) + emit_instr(ctx, dinsu, dst, MIPS_R_ZERO, 32, 32); + did_move = false; + if (insn->src_reg == BPF_REG_10) { + if (BPF_OP(insn->code) == BPF_MOV) { + emit_instr(ctx, daddiu, dst, MIPS_R_SP, MAX_BPF_STACK); + did_move = true; + } else { + emit_instr(ctx, daddiu, MIPS_R_AT, MIPS_R_SP, MAX_BPF_STACK); + src = MIPS_R_AT; + } + } else if (get_reg_val_type(ctx, this_idx, insn->src_reg) == REG_32BIT) { + int tmp_reg = MIPS_R_AT; + + if (BPF_OP(insn->code) == BPF_MOV) { + tmp_reg = dst; + did_move = true; + } + emit_instr(ctx, daddu, tmp_reg, src, MIPS_R_ZERO); + emit_instr(ctx, dinsu, tmp_reg, MIPS_R_ZERO, 32, 32); + src = MIPS_R_AT; + } + switch (BPF_OP(insn->code)) { + case BPF_MOV: + if (!did_move) + emit_instr(ctx, daddu, dst, src, MIPS_R_ZERO); + break; + case BPF_ADD: + emit_instr(ctx, daddu, dst, dst, src); + break; + case BPF_SUB: + emit_instr(ctx, dsubu, dst, dst, src); + break; + case BPF_XOR: + emit_instr(ctx, xor, dst, dst, src); + break; + case BPF_OR: + emit_instr(ctx, or, dst, dst, src); + break; + case BPF_AND: + emit_instr(ctx, and, dst, dst, src); + break; + case BPF_MUL: + emit_instr(ctx, dmultu, dst, src); + emit_instr(ctx, mflo, dst); + break; + case BPF_DIV: + case BPF_MOD: + b_off = b_imm(exit_idx, ctx); + if (is_bad_offset(b_off)) + return -E2BIG; + emit_instr(ctx, beq, src, MIPS_R_ZERO, b_off); + emit_instr(ctx, movz, MIPS_R_V0, MIPS_R_ZERO, src); + emit_instr(ctx, ddivu, dst, src); + if (BPF_OP(insn->code) == BPF_DIV) + emit_instr(ctx, mflo, dst); + else + emit_instr(ctx, mfhi, dst); + break; + case BPF_LSH: + emit_instr(ctx, dsllv, dst, dst, src); + break; + case BPF_RSH: + emit_instr(ctx, dsrlv, dst, dst, src); + break; + case BPF_ARSH: + emit_instr(ctx, dsrav, dst, dst, src); + break; + default: + pr_err("ALU64_REG NOT HANDLED\n"); + return -EINVAL; + } + break; + case BPF_ALU | BPF_MOV | BPF_X: /* ALU_REG */ + case BPF_ALU | BPF_ADD | BPF_X: /* ALU_REG */ + case BPF_ALU | BPF_SUB | BPF_X: /* ALU_REG */ + case BPF_ALU | BPF_XOR | BPF_X: /* ALU_REG */ + case BPF_ALU | BPF_OR | BPF_X: /* ALU_REG */ + case BPF_ALU | BPF_AND | BPF_X: /* ALU_REG */ + case BPF_ALU | BPF_MUL | BPF_X: /* ALU_REG */ + case BPF_ALU | BPF_DIV | BPF_X: /* ALU_REG */ + case BPF_ALU | BPF_MOD | BPF_X: /* ALU_REG */ + case BPF_ALU | BPF_LSH | BPF_X: /* ALU_REG */ + case BPF_ALU | BPF_RSH | BPF_X: /* ALU_REG */ + src = ebpf_to_mips_reg(ctx, insn, src_reg_no_fp); + dst = ebpf_to_mips_reg(ctx, insn, dst_reg); + if (src < 0 || dst < 0) + return -EINVAL; + td = get_reg_val_type(ctx, this_idx, insn->dst_reg); + if (td == REG_64BIT || td == REG_32BIT_ZERO_EX) { + /* sign extend */ + emit_instr(ctx, sll, dst, dst, 0); + } + did_move = false; + ts = get_reg_val_type(ctx, this_idx, insn->src_reg); + if (ts == REG_64BIT || ts == REG_32BIT_ZERO_EX) { + int tmp_reg = MIPS_R_AT; + + if (BPF_OP(insn->code) == BPF_MOV) { + tmp_reg = dst; + did_move = true; + } + /* sign extend */ + emit_instr(ctx, sll, tmp_reg, src, 0); + src = MIPS_R_AT; + } + switch (BPF_OP(insn->code)) { + case BPF_MOV: + if (!did_move) + emit_instr(ctx, addu, dst, src, MIPS_R_ZERO); + break; + case BPF_ADD: + emit_instr(ctx, addu, dst, dst, src); + break; + case BPF_SUB: + emit_instr(ctx, subu, dst, dst, src); + break; + case BPF_XOR: + emit_instr(ctx, xor, dst, dst, src); + break; + case BPF_OR: + emit_instr(ctx, or, dst, dst, src); + break; + case BPF_AND: + emit_instr(ctx, and, dst, dst, src); + break; + case BPF_MUL: + emit_instr(ctx, mul, dst, dst, src); + break; + case BPF_DIV: + case BPF_MOD: + b_off = b_imm(exit_idx, ctx); + if (is_bad_offset(b_off)) + return -E2BIG; + emit_instr(ctx, beq, src, MIPS_R_ZERO, b_off); + emit_instr(ctx, movz, MIPS_R_V0, MIPS_R_ZERO, src); + emit_instr(ctx, divu, dst, src); + if (BPF_OP(insn->code) == BPF_DIV) + emit_instr(ctx, mflo, dst); + else + emit_instr(ctx, mfhi, dst); + break; + case BPF_LSH: + emit_instr(ctx, sllv, dst, dst, src); + break; + case BPF_RSH: + emit_instr(ctx, srlv, dst, dst, src); + break; + default: + pr_err("ALU_REG NOT HANDLED\n"); + return -EINVAL; + } + break; + case BPF_JMP | BPF_EXIT: + if (this_idx + 1 < exit_idx) { + b_off = b_imm(exit_idx, ctx); + if (is_bad_offset(b_off)) + return -E2BIG; + emit_instr(ctx, beq, MIPS_R_ZERO, MIPS_R_ZERO, b_off); + emit_instr(ctx, nop); + } + break; + case BPF_JMP | BPF_JEQ | BPF_K: /* JMP_IMM */ + case BPF_JMP | BPF_JNE | BPF_K: /* JMP_IMM */ + cmp_eq = (BPF_OP(insn->code) == BPF_JEQ); + dst = ebpf_to_mips_reg(ctx, insn, dst_reg_fp_ok); + if (dst < 0) + return dst; + if (insn->imm == 0) { + src = MIPS_R_ZERO; + } else { + gen_imm_to_reg(insn, MIPS_R_AT, ctx); + src = MIPS_R_AT; + } + goto jeq_common; + case BPF_JMP | BPF_JEQ | BPF_X: /* JMP_REG */ + case BPF_JMP | BPF_JNE | BPF_X: + case BPF_JMP | BPF_JSGT | BPF_X: + case BPF_JMP | BPF_JSGE | BPF_X: + case BPF_JMP | BPF_JGT | BPF_X: + case BPF_JMP | BPF_JGE | BPF_X: + case BPF_JMP | BPF_JSET | BPF_X: + src = ebpf_to_mips_reg(ctx, insn, src_reg_no_fp); + dst = ebpf_to_mips_reg(ctx, insn, dst_reg); + if (src < 0 || dst < 0) + return -EINVAL; + td = get_reg_val_type(ctx, this_idx, insn->dst_reg); + ts = get_reg_val_type(ctx, this_idx, insn->src_reg); + if (td == REG_32BIT && ts != REG_32BIT) { + emit_instr(ctx, sll, MIPS_R_AT, src, 0); + src = MIPS_R_AT; + } else if (ts == REG_32BIT && td != REG_32BIT) { + emit_instr(ctx, sll, MIPS_R_AT, dst, 0); + dst = MIPS_R_AT; + } + if (BPF_OP(insn->code) == BPF_JSET) { + emit_instr(ctx, and, MIPS_R_AT, dst, src); + cmp_eq = false; + dst = MIPS_R_AT; + src = MIPS_R_ZERO; + } else if (BPF_OP(insn->code) == BPF_JSGT) { + emit_instr(ctx, dsubu, MIPS_R_AT, dst, src); + if ((insn + 1)->code == (BPF_JMP | BPF_EXIT) && insn->off == 1) { + b_off = b_imm(exit_idx, ctx); + if (is_bad_offset(b_off)) + return -E2BIG; + emit_instr(ctx, blez, MIPS_R_AT, b_off); + emit_instr(ctx, nop); + return 2; /* We consumed the exit. */ + } + b_off = b_imm(this_idx + insn->off + 1, ctx); + if (is_bad_offset(b_off)) + return -E2BIG; + emit_instr(ctx, bgtz, MIPS_R_AT, b_off); + emit_instr(ctx, nop); + break; + } else if (BPF_OP(insn->code) == BPF_JSGE) { + emit_instr(ctx, slt, MIPS_R_AT, dst, src); + cmp_eq = true; + dst = MIPS_R_AT; + src = MIPS_R_ZERO; + } else if (BPF_OP(insn->code) == BPF_JGT) { + /* dst or src could be AT */ + emit_instr(ctx, dsubu, MIPS_R_T8, dst, src); + emit_instr(ctx, sltu, MIPS_R_AT, dst, src); + /* SP known to be non-zero, movz becomes boolean not */ + emit_instr(ctx, movz, MIPS_R_T9, MIPS_R_SP, MIPS_R_T8); + emit_instr(ctx, movn, MIPS_R_T9, MIPS_R_ZERO, MIPS_R_T8); + emit_instr(ctx, or, MIPS_R_AT, MIPS_R_T9, MIPS_R_AT); + cmp_eq = true; + dst = MIPS_R_AT; + src = MIPS_R_ZERO; + } else if (BPF_OP(insn->code) == BPF_JGE) { + emit_instr(ctx, sltu, MIPS_R_AT, dst, src); + cmp_eq = true; + dst = MIPS_R_AT; + src = MIPS_R_ZERO; + } else { /* JNE/JEQ case */ + cmp_eq = (BPF_OP(insn->code) == BPF_JEQ); + } +jeq_common: + /* + * If the next insn is EXIT and we are jumping arround + * only it, invert the sense of the compare and + * conditionally jump to the exit. Poor man's branch + * chaining. + */ + if ((insn + 1)->code == (BPF_JMP | BPF_EXIT) && insn->off == 1) { + b_off = b_imm(exit_idx, ctx); + if (is_bad_offset(b_off)) { + target = j_target(ctx, exit_idx); + if (target == (unsigned int)-1) + return -E2BIG; + cmp_eq = !cmp_eq; + b_off = 4 * 3; + if (!(ctx->offsets[this_idx] & OFFSETS_B_CONV)) { + ctx->offsets[this_idx] |= OFFSETS_B_CONV; + ctx->long_b_conversion = 1; + } + } + + if (cmp_eq) + emit_instr(ctx, bne, dst, src, b_off); + else + emit_instr(ctx, beq, dst, src, b_off); + emit_instr(ctx, nop); + if (ctx->offsets[this_idx] & OFFSETS_B_CONV) { + emit_instr(ctx, j, target); + emit_instr(ctx, nop); + } + return 2; /* We consumed the exit. */ + } + b_off = b_imm(this_idx + insn->off + 1, ctx); + if (is_bad_offset(b_off)) { + target = j_target(ctx, this_idx + insn->off + 1); + if (target == (unsigned int)-1) + return -E2BIG; + cmp_eq = !cmp_eq; + b_off = 4 * 3; + if (!(ctx->offsets[this_idx] & OFFSETS_B_CONV)) { + ctx->offsets[this_idx] |= OFFSETS_B_CONV; + ctx->long_b_conversion = 1; + } + } + + if (cmp_eq) + emit_instr(ctx, beq, dst, src, b_off); + else + emit_instr(ctx, bne, dst, src, b_off); + emit_instr(ctx, nop); + if (ctx->offsets[this_idx] & OFFSETS_B_CONV) { + emit_instr(ctx, j, target); + emit_instr(ctx, nop); + } + break; + case BPF_JMP | BPF_JSGT | BPF_K: /* JMP_IMM */ + case BPF_JMP | BPF_JSGE | BPF_K: /* JMP_IMM */ + cmp_eq = (BPF_OP(insn->code) == BPF_JSGE); + dst = ebpf_to_mips_reg(ctx, insn, dst_reg_fp_ok); + if (dst < 0) + return dst; + + if (insn->imm == 0) { + if ((insn + 1)->code == (BPF_JMP | BPF_EXIT) && insn->off == 1) { + b_off = b_imm(exit_idx, ctx); + if (is_bad_offset(b_off)) + return -E2BIG; + if (cmp_eq) + emit_instr(ctx, bltz, dst, b_off); + else + emit_instr(ctx, blez, dst, b_off); + emit_instr(ctx, nop); + return 2; /* We consumed the exit. */ + } + b_off = b_imm(this_idx + insn->off + 1, ctx); + if (is_bad_offset(b_off)) + return -E2BIG; + if (cmp_eq) + emit_instr(ctx, bgez, dst, b_off); + else + emit_instr(ctx, bgtz, dst, b_off); + emit_instr(ctx, nop); + break; + } + /* + * only "LT" compare available, so we must use imm + 1 + * to generate "GT" + */ + t64s = insn->imm + (cmp_eq ? 0 : 1); + if (t64s >= S16_MIN && t64s <= S16_MAX) { + emit_instr(ctx, slti, MIPS_R_AT, dst, (int)t64s); + src = MIPS_R_AT; + dst = MIPS_R_ZERO; + cmp_eq = true; + goto jeq_common; + } + emit_const_to_reg(ctx, MIPS_R_AT, (u64)t64s); + emit_instr(ctx, slt, MIPS_R_AT, dst, MIPS_R_AT); + src = MIPS_R_AT; + dst = MIPS_R_ZERO; + cmp_eq = true; + goto jeq_common; + + case BPF_JMP | BPF_JGT | BPF_K: + case BPF_JMP | BPF_JGE | BPF_K: + cmp_eq = (BPF_OP(insn->code) == BPF_JGE); + dst = ebpf_to_mips_reg(ctx, insn, dst_reg_fp_ok); + if (dst < 0) + return dst; + /* + * only "LT" compare available, so we must use imm + 1 + * to generate "GT" + */ + t64s = (u64)(u32)(insn->imm) + (cmp_eq ? 0 : 1); + if (t64s >= 0 && t64s <= S16_MAX) { + emit_instr(ctx, sltiu, MIPS_R_AT, dst, (int)t64s); + src = MIPS_R_AT; + dst = MIPS_R_ZERO; + cmp_eq = true; + goto jeq_common; + } + emit_const_to_reg(ctx, MIPS_R_AT, (u64)t64s); + emit_instr(ctx, sltu, MIPS_R_AT, dst, MIPS_R_AT); + src = MIPS_R_AT; + dst = MIPS_R_ZERO; + cmp_eq = true; + goto jeq_common; + + case BPF_JMP | BPF_JSET | BPF_K: /* JMP_IMM */ + dst = ebpf_to_mips_reg(ctx, insn, dst_reg_fp_ok); + if (dst < 0) + return dst; + + if (use_bbit_insns() && hweight32((u32)insn->imm) == 1) { + if ((insn + 1)->code == (BPF_JMP | BPF_EXIT) && insn->off == 1) { + b_off = b_imm(exit_idx, ctx); + if (is_bad_offset(b_off)) + return -E2BIG; + emit_instr(ctx, bbit0, dst, ffs((u32)insn->imm) - 1, b_off); + emit_instr(ctx, nop); + return 2; /* We consumed the exit. */ + } + b_off = b_imm(this_idx + insn->off + 1, ctx); + if (is_bad_offset(b_off)) + return -E2BIG; + emit_instr(ctx, bbit1, dst, ffs((u32)insn->imm) - 1, b_off); + emit_instr(ctx, nop); + break; + } + t64 = (u32)insn->imm; + emit_const_to_reg(ctx, MIPS_R_AT, t64); + emit_instr(ctx, and, MIPS_R_AT, dst, MIPS_R_AT); + src = MIPS_R_AT; + dst = MIPS_R_ZERO; + cmp_eq = false; + goto jeq_common; + + case BPF_JMP | BPF_JA: + /* + * Prefer relative branch for easier debugging, but + * fall back if needed. + */ + b_off = b_imm(this_idx + insn->off + 1, ctx); + if (is_bad_offset(b_off)) { + target = j_target(ctx, this_idx + insn->off + 1); + if (target == (unsigned int)-1) + return -E2BIG; + emit_instr(ctx, j, target); + } else { + emit_instr(ctx, b, b_off); + } + emit_instr(ctx, nop); + break; + case BPF_LD | BPF_DW | BPF_IMM: + if (insn->src_reg != 0) + return -EINVAL; + dst = ebpf_to_mips_reg(ctx, insn, dst_reg); + if (dst < 0) + return dst; + t64 = ((u64)(u32)insn->imm) | ((u64)(insn + 1)->imm << 32); + emit_const_to_reg(ctx, dst, t64); + return 2; /* Double slot insn */ + + case BPF_JMP | BPF_CALL: + ctx->flags |= EBPF_SAVE_RA; + t64s = (s64)insn->imm + (s64)__bpf_call_base; + emit_const_to_reg(ctx, MIPS_R_T9, (u64)t64s); + emit_instr(ctx, jalr, MIPS_R_RA, MIPS_R_T9); + /* delay slot */ + emit_instr(ctx, nop); + break; + + case BPF_JMP | BPF_TAIL_CALL: + if (emit_bpf_tail_call(ctx, this_idx)) + return -EINVAL; + break; + + case BPF_LD | BPF_B | BPF_ABS: + case BPF_LD | BPF_H | BPF_ABS: + case BPF_LD | BPF_W | BPF_ABS: + case BPF_LD | BPF_DW | BPF_ABS: + ctx->flags |= EBPF_SAVE_RA; + + gen_imm_to_reg(insn, MIPS_R_A1, ctx); + emit_instr(ctx, addiu, MIPS_R_A2, MIPS_R_ZERO, size_to_len(insn)); + + if (insn->imm < 0) { + emit_const_to_reg(ctx, MIPS_R_T9, (u64)bpf_internal_load_pointer_neg_helper); + } else { + emit_const_to_reg(ctx, MIPS_R_T9, (u64)ool_skb_header_pointer); + emit_instr(ctx, daddiu, MIPS_R_A3, MIPS_R_SP, ctx->tmp_offset); + } + goto ld_skb_common; + + case BPF_LD | BPF_B | BPF_IND: + case BPF_LD | BPF_H | BPF_IND: + case BPF_LD | BPF_W | BPF_IND: + case BPF_LD | BPF_DW | BPF_IND: + ctx->flags |= EBPF_SAVE_RA; + src = ebpf_to_mips_reg(ctx, insn, src_reg_no_fp); + if (src < 0) + return src; + ts = get_reg_val_type(ctx, this_idx, insn->src_reg); + if (ts == REG_32BIT_ZERO_EX) { + /* sign extend */ + emit_instr(ctx, sll, MIPS_R_A1, src, 0); + src = MIPS_R_A1; + } + if (insn->imm >= S16_MIN && insn->imm <= S16_MAX) { + emit_instr(ctx, daddiu, MIPS_R_A1, src, insn->imm); + } else { + gen_imm_to_reg(insn, MIPS_R_AT, ctx); + emit_instr(ctx, daddu, MIPS_R_A1, MIPS_R_AT, src); + } + /* truncate to 32-bit int */ + emit_instr(ctx, sll, MIPS_R_A1, MIPS_R_A1, 0); + emit_instr(ctx, daddiu, MIPS_R_A3, MIPS_R_SP, ctx->tmp_offset); + emit_instr(ctx, slt, MIPS_R_AT, MIPS_R_A1, MIPS_R_ZERO); + + emit_const_to_reg(ctx, MIPS_R_T8, (u64)bpf_internal_load_pointer_neg_helper); + emit_const_to_reg(ctx, MIPS_R_T9, (u64)ool_skb_header_pointer); + emit_instr(ctx, addiu, MIPS_R_A2, MIPS_R_ZERO, size_to_len(insn)); + emit_instr(ctx, movn, MIPS_R_T9, MIPS_R_T8, MIPS_R_AT); + +ld_skb_common: + emit_instr(ctx, jalr, MIPS_R_RA, MIPS_R_T9); + /* delay slot move */ + emit_instr(ctx, daddu, MIPS_R_A0, MIPS_R_S0, MIPS_R_ZERO); + + /* Check the error value */ + b_off = b_imm(exit_idx, ctx); + if (is_bad_offset(b_off)) { + target = j_target(ctx, exit_idx); + if (target == (unsigned int)-1) + return -E2BIG; + + if (!(ctx->offsets[this_idx] & OFFSETS_B_CONV)) { + ctx->offsets[this_idx] |= OFFSETS_B_CONV; + ctx->long_b_conversion = 1; + } + emit_instr(ctx, bne, MIPS_R_V0, MIPS_R_ZERO, 4 * 3); + emit_instr(ctx, nop); + emit_instr(ctx, j, target); + emit_instr(ctx, nop); + } else { + emit_instr(ctx, beq, MIPS_R_V0, MIPS_R_ZERO, b_off); + emit_instr(ctx, nop); + } + +#ifdef __BIG_ENDIAN + need_swap = false; +#else + need_swap = true; +#endif + dst = MIPS_R_V0; + switch (BPF_SIZE(insn->code)) { + case BPF_B: + emit_instr(ctx, lbu, dst, 0, MIPS_R_V0); + break; + case BPF_H: + emit_instr(ctx, lhu, dst, 0, MIPS_R_V0); + if (need_swap) + emit_instr(ctx, wsbh, dst, dst); + break; + case BPF_W: + emit_instr(ctx, lw, dst, 0, MIPS_R_V0); + if (need_swap) { + emit_instr(ctx, wsbh, dst, dst); + emit_instr(ctx, rotr, dst, dst, 16); + } + break; + case BPF_DW: + emit_instr(ctx, ld, dst, 0, MIPS_R_V0); + if (need_swap) { + emit_instr(ctx, dsbh, dst, dst); + emit_instr(ctx, dshd, dst, dst); + } + break; + } + + break; + case BPF_ALU | BPF_END | BPF_FROM_BE: + case BPF_ALU | BPF_END | BPF_FROM_LE: + dst = ebpf_to_mips_reg(ctx, insn, dst_reg); + if (dst < 0) + return dst; + td = get_reg_val_type(ctx, this_idx, insn->dst_reg); + if (insn->imm == 64 && td == REG_32BIT) + emit_instr(ctx, dinsu, dst, MIPS_R_ZERO, 32, 32); + + if (insn->imm != 64 && + (td == REG_64BIT || td == REG_32BIT_ZERO_EX)) { + /* sign extend */ + emit_instr(ctx, sll, dst, dst, 0); + } + +#ifdef __BIG_ENDIAN + need_swap = (BPF_SRC(insn->code) == BPF_FROM_LE); +#else + need_swap = (BPF_SRC(insn->code) == BPF_FROM_BE); +#endif + if (insn->imm == 16) { + if (need_swap) + emit_instr(ctx, wsbh, dst, dst); + emit_instr(ctx, andi, dst, dst, 0xffff); + } else if (insn->imm == 32) { + if (need_swap) { + emit_instr(ctx, wsbh, dst, dst); + emit_instr(ctx, rotr, dst, dst, 16); + } + } else { /* 64-bit*/ + if (need_swap) { + emit_instr(ctx, dsbh, dst, dst); + emit_instr(ctx, dshd, dst, dst); + } + } + break; + + case BPF_ST | BPF_B | BPF_MEM: + case BPF_ST | BPF_H | BPF_MEM: + case BPF_ST | BPF_W | BPF_MEM: + case BPF_ST | BPF_DW | BPF_MEM: + if (insn->dst_reg == BPF_REG_10) { + ctx->flags |= EBPF_SEEN_FP; + dst = MIPS_R_SP; + mem_off = insn->off + MAX_BPF_STACK; + } else { + dst = ebpf_to_mips_reg(ctx, insn, dst_reg); + if (dst < 0) + return dst; + mem_off = insn->off; + } + gen_imm_to_reg(insn, MIPS_R_AT, ctx); + switch (BPF_SIZE(insn->code)) { + case BPF_B: + emit_instr(ctx, sb, MIPS_R_AT, mem_off, dst); + break; + case BPF_H: + emit_instr(ctx, sh, MIPS_R_AT, mem_off, dst); + break; + case BPF_W: + emit_instr(ctx, sw, MIPS_R_AT, mem_off, dst); + break; + case BPF_DW: + emit_instr(ctx, sd, MIPS_R_AT, mem_off, dst); + break; + } + break; + + case BPF_LDX | BPF_B | BPF_MEM: + case BPF_LDX | BPF_H | BPF_MEM: + case BPF_LDX | BPF_W | BPF_MEM: + case BPF_LDX | BPF_DW | BPF_MEM: + if (insn->src_reg == BPF_REG_10) { + ctx->flags |= EBPF_SEEN_FP; + src = MIPS_R_SP; + mem_off = insn->off + MAX_BPF_STACK; + } else { + src = ebpf_to_mips_reg(ctx, insn, src_reg_no_fp); + if (src < 0) + return src; + mem_off = insn->off; + } + dst = ebpf_to_mips_reg(ctx, insn, dst_reg); + if (dst < 0) + return dst; + switch (BPF_SIZE(insn->code)) { + case BPF_B: + emit_instr(ctx, lbu, dst, mem_off, src); + break; + case BPF_H: + emit_instr(ctx, lhu, dst, mem_off, src); + break; + case BPF_W: + emit_instr(ctx, lw, dst, mem_off, src); + break; + case BPF_DW: + emit_instr(ctx, ld, dst, mem_off, src); + break; + } + break; + + case BPF_STX | BPF_B | BPF_MEM: + case BPF_STX | BPF_H | BPF_MEM: + case BPF_STX | BPF_W | BPF_MEM: + case BPF_STX | BPF_DW | BPF_MEM: + case BPF_STX | BPF_W | BPF_XADD: + case BPF_STX | BPF_DW | BPF_XADD: + if (insn->dst_reg == BPF_REG_10) { + ctx->flags |= EBPF_SEEN_FP; + dst = MIPS_R_SP; + mem_off = insn->off + MAX_BPF_STACK; + } else { + dst = ebpf_to_mips_reg(ctx, insn, dst_reg); + if (dst < 0) + return dst; + mem_off = insn->off; + } + src = ebpf_to_mips_reg(ctx, insn, src_reg_no_fp); + if (src < 0) + return dst; + if (BPF_MODE(insn->code) == BPF_XADD) { + switch (BPF_SIZE(insn->code)) { + case BPF_W: + if (get_reg_val_type(ctx, this_idx, insn->src_reg) == REG_32BIT) { + emit_instr(ctx, sll, MIPS_R_AT, src, 0); + src = MIPS_R_AT; + } + emit_instr(ctx, ll, MIPS_R_T8, mem_off, dst); + emit_instr(ctx, addu, MIPS_R_T8, MIPS_R_T8, src); + emit_instr(ctx, sc, MIPS_R_T8, mem_off, dst); + /* + * On failure back up to LL (-4 + * instructions of 4 bytes each + */ + emit_instr(ctx, beq, MIPS_R_T8, MIPS_R_ZERO, -4 * 4); + emit_instr(ctx, nop); + break; + case BPF_DW: + if (get_reg_val_type(ctx, this_idx, insn->src_reg) == REG_32BIT) { + emit_instr(ctx, daddu, MIPS_R_AT, src, MIPS_R_ZERO); + emit_instr(ctx, dinsu, MIPS_R_AT, MIPS_R_ZERO, 32, 32); + src = MIPS_R_AT; + } + emit_instr(ctx, lld, MIPS_R_T8, mem_off, dst); + emit_instr(ctx, daddu, MIPS_R_T8, MIPS_R_T8, src); + emit_instr(ctx, scd, MIPS_R_T8, mem_off, dst); + emit_instr(ctx, beq, MIPS_R_T8, MIPS_R_ZERO, -4 * 4); + emit_instr(ctx, nop); + break; + } + } else { /* BPF_MEM */ + switch (BPF_SIZE(insn->code)) { + case BPF_B: + emit_instr(ctx, sb, src, mem_off, dst); + break; + case BPF_H: + emit_instr(ctx, sh, src, mem_off, dst); + break; + case BPF_W: + emit_instr(ctx, sw, src, mem_off, dst); + break; + case BPF_DW: + if (get_reg_val_type(ctx, this_idx, insn->src_reg) == REG_32BIT) { + emit_instr(ctx, daddu, MIPS_R_AT, src, MIPS_R_ZERO); + emit_instr(ctx, dinsu, MIPS_R_AT, MIPS_R_ZERO, 32, 32); + src = MIPS_R_AT; + } + emit_instr(ctx, sd, src, mem_off, dst); + break; + } + } + break; + + default: + pr_err("NOT HANDLED %d - (%02x)\n", + this_idx, (unsigned int)insn->code); + return -EINVAL; + } + return 1; +} + +#define RVT_VISITED_MASK 0xc000000000000000ull +#define RVT_FALL_THROUGH 0x4000000000000000ull +#define RVT_BRANCH_TAKEN 0x8000000000000000ull +#define RVT_DONE (RVT_FALL_THROUGH | RVT_BRANCH_TAKEN) + +static int build_int_body(struct jit_ctx *ctx) +{ + const struct bpf_prog *prog = ctx->skf; + const struct bpf_insn *insn; + int i, r; + + for (i = 0; i < prog->len; ) { + insn = prog->insnsi + i; + if ((ctx->reg_val_types[i] & RVT_VISITED_MASK) == 0) { + /* dead instruction, don't emit it. */ + i++; + continue; + } + + if (ctx->target == NULL) + ctx->offsets[i] = (ctx->offsets[i] & OFFSETS_B_CONV) | (ctx->idx * 4); + + r = build_one_insn(insn, ctx, i, prog->len); + if (r < 0) + return r; + i += r; + } + /* epilogue offset */ + if (ctx->target == NULL) + ctx->offsets[i] = ctx->idx * 4; + + /* + * All exits have an offset of the epilogue, some offsets may + * not have been set due to banch-around threading, so set + * them now. + */ + if (ctx->target == NULL) + for (i = 0; i < prog->len; i++) { + insn = prog->insnsi + i; + if (insn->code == (BPF_JMP | BPF_EXIT)) + ctx->offsets[i] = ctx->idx * 4; + } + return 0; +} + +/* return the last idx processed, or negative for error */ +static int reg_val_propagate_range(struct jit_ctx *ctx, u64 initial_rvt, + int start_idx, bool follow_taken) +{ + const struct bpf_prog *prog = ctx->skf; + const struct bpf_insn *insn; + u64 exit_rvt = initial_rvt; + u64 *rvt = ctx->reg_val_types; + int idx; + int reg; + + for (idx = start_idx; idx < prog->len; idx++) { + rvt[idx] = (rvt[idx] & RVT_VISITED_MASK) | exit_rvt; + insn = prog->insnsi + idx; + switch (BPF_CLASS(insn->code)) { + case BPF_ALU: + switch (BPF_OP(insn->code)) { + case BPF_ADD: + case BPF_SUB: + case BPF_MUL: + case BPF_DIV: + case BPF_OR: + case BPF_AND: + case BPF_LSH: + case BPF_RSH: + case BPF_NEG: + case BPF_MOD: + case BPF_XOR: + set_reg_val_type(&exit_rvt, insn->dst_reg, REG_32BIT); + break; + case BPF_MOV: + if (BPF_SRC(insn->code)) { + set_reg_val_type(&exit_rvt, insn->dst_reg, REG_32BIT); + } else { + /* IMM to REG move*/ + if (insn->imm >= 0) + set_reg_val_type(&exit_rvt, insn->dst_reg, REG_32BIT_POS); + else + set_reg_val_type(&exit_rvt, insn->dst_reg, REG_32BIT); + } + break; + case BPF_END: + if (insn->imm == 64) + set_reg_val_type(&exit_rvt, insn->dst_reg, REG_64BIT); + else if (insn->imm == 32) + set_reg_val_type(&exit_rvt, insn->dst_reg, REG_32BIT); + else /* insn->imm == 16 */ + set_reg_val_type(&exit_rvt, insn->dst_reg, REG_32BIT_POS); + break; + } + rvt[idx] |= RVT_DONE; + break; + case BPF_ALU64: + switch (BPF_OP(insn->code)) { + case BPF_MOV: + if (BPF_SRC(insn->code)) { + /* REG to REG move*/ + set_reg_val_type(&exit_rvt, insn->dst_reg, REG_64BIT); + } else { + /* IMM to REG move*/ + if (insn->imm >= 0) + set_reg_val_type(&exit_rvt, insn->dst_reg, REG_32BIT_POS); + else + set_reg_val_type(&exit_rvt, insn->dst_reg, REG_64BIT_32BIT); + } + break; + default: + set_reg_val_type(&exit_rvt, insn->dst_reg, REG_64BIT); + } + rvt[idx] |= RVT_DONE; + break; + case BPF_LD: + switch (BPF_SIZE(insn->code)) { + case BPF_DW: + if (BPF_MODE(insn->code) == BPF_IMM) { + s64 val; + + val = (s64)((u32)insn->imm | ((u64)(insn + 1)->imm << 32)); + if (val > 0 && val <= S32_MAX) + set_reg_val_type(&exit_rvt, insn->dst_reg, REG_32BIT_POS); + else if (val >= S32_MIN && val <= S32_MAX) + set_reg_val_type(&exit_rvt, insn->dst_reg, REG_64BIT_32BIT); + else + set_reg_val_type(&exit_rvt, insn->dst_reg, REG_64BIT); + rvt[idx] |= RVT_DONE; + idx++; + } else { + set_reg_val_type(&exit_rvt, insn->dst_reg, REG_64BIT); + } + break; + case BPF_B: + case BPF_H: + set_reg_val_type(&exit_rvt, insn->dst_reg, REG_32BIT_POS); + break; + case BPF_W: + if (BPF_MODE(insn->code) == BPF_IMM) + set_reg_val_type(&exit_rvt, insn->dst_reg, + insn->imm >= 0 ? REG_32BIT_POS : REG_32BIT); + else + set_reg_val_type(&exit_rvt, insn->dst_reg, REG_32BIT); + break; + } + rvt[idx] |= RVT_DONE; + break; + case BPF_LDX: + switch (BPF_SIZE(insn->code)) { + case BPF_DW: + set_reg_val_type(&exit_rvt, insn->dst_reg, REG_64BIT); + break; + case BPF_B: + case BPF_H: + set_reg_val_type(&exit_rvt, insn->dst_reg, REG_32BIT_POS); + break; + case BPF_W: + set_reg_val_type(&exit_rvt, insn->dst_reg, REG_32BIT); + break; + } + rvt[idx] |= RVT_DONE; + break; + case BPF_JMP: + switch (BPF_OP(insn->code)) { + case BPF_EXIT: + rvt[idx] = RVT_DONE | exit_rvt; + rvt[prog->len] = exit_rvt; + return idx; + case BPF_JA: + rvt[idx] |= RVT_DONE; + idx += insn->off; + break; + case BPF_JEQ: + case BPF_JGT: + case BPF_JGE: + case BPF_JSET: + case BPF_JNE: + case BPF_JSGT: + case BPF_JSGE: + if (follow_taken) { + rvt[idx] |= RVT_BRANCH_TAKEN; + idx += insn->off; + follow_taken = false; + } else { + rvt[idx] |= RVT_FALL_THROUGH; + } + break; + case BPF_CALL: + set_reg_val_type(&exit_rvt, BPF_REG_0, REG_64BIT); + /* Upon call return, argument registers are clobbered. */ + for (reg = BPF_REG_0; reg <= BPF_REG_5; reg++) + set_reg_val_type(&exit_rvt, reg, REG_64BIT); + + rvt[idx] |= RVT_DONE; + break; + default: + WARN(1, "Unhandled BPF_JMP case.\n"); + rvt[idx] |= RVT_DONE; + break; + } + break; + default: + rvt[idx] |= RVT_DONE; + break; + } + } + return idx; +} + +/* + * Track the value range (i.e. 32-bit vs. 64-bit) of each register at + * each eBPF insn. This allows unneeded sign and zero extension + * operations to be omitted. + * + * Doesn't handle yet confluence of control paths with conflicting + * ranges, but it is good enough for most sane code. + */ +static int reg_val_propagate(struct jit_ctx *ctx) +{ + const struct bpf_prog *prog = ctx->skf; + u64 exit_rvt; + int reg; + int i; + + /* + * 11 registers * 3 bits/reg leaves top bits free for other + * uses. Bit-62..63 used to see if we have visited an insn. + */ + exit_rvt = 0; + + /* Upon entry, argument registers are 64-bit. */ + for (reg = BPF_REG_1; reg <= BPF_REG_5; reg++) + set_reg_val_type(&exit_rvt, reg, REG_64BIT); + + /* + * First follow all conditional branches on the fall-through + * edge of control flow.. + */ + reg_val_propagate_range(ctx, exit_rvt, 0, false); +restart_search: + /* + * Then repeatedly find the first conditional branch where + * both edges of control flow have not been taken, and follow + * the branch taken edge. We will end up restarting the + * search once per conditional branch insn. + */ + for (i = 0; i < prog->len; i++) { + u64 rvt = ctx->reg_val_types[i]; + + if ((rvt & RVT_VISITED_MASK) == RVT_DONE || + (rvt & RVT_VISITED_MASK) == 0) + continue; + if ((rvt & RVT_VISITED_MASK) == RVT_FALL_THROUGH) { + reg_val_propagate_range(ctx, rvt & ~RVT_VISITED_MASK, i, true); + } else { /* RVT_BRANCH_TAKEN */ + WARN(1, "Unexpected RVT_BRANCH_TAKEN case.\n"); + reg_val_propagate_range(ctx, rvt & ~RVT_VISITED_MASK, i, false); + } + goto restart_search; + } + /* + * Eventually all conditional branches have been followed on + * both branches and we are done. Any insn that has not been + * visited at this point is dead. + */ + + return 0; +} + +static void jit_fill_hole(void *area, unsigned int size) +{ + u32 *p; + + /* We are guaranteed to have aligned memory. */ + for (p = area; size >= sizeof(u32); size -= sizeof(u32)) + uasm_i_break(&p, BRK_BUG); /* Increments p */ +} + +struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog) +{ + struct bpf_prog *orig_prog = prog; + bool tmp_blinded = false; + struct bpf_prog *tmp; + struct bpf_binary_header *header = NULL; + struct jit_ctx ctx; + unsigned int image_size; + u8 *image_ptr; + + if (!bpf_jit_enable || !cpu_has_mips64r2) + return prog; + + tmp = bpf_jit_blind_constants(prog); + /* If blinding was requested and we failed during blinding, + * we must fall back to the interpreter. + */ + if (IS_ERR(tmp)) + return orig_prog; + if (tmp != prog) { + tmp_blinded = true; + prog = tmp; + } + + memset(&ctx, 0, sizeof(ctx)); + + ctx.offsets = kcalloc(prog->len + 1, sizeof(*ctx.offsets), GFP_KERNEL); + if (ctx.offsets == NULL) + goto out_err; + + ctx.reg_val_types = kcalloc(prog->len + 1, sizeof(*ctx.reg_val_types), GFP_KERNEL); + if (ctx.reg_val_types == NULL) + goto out_err; + + ctx.skf = prog; + + if (reg_val_propagate(&ctx)) + goto out_err; + + /* + * First pass discovers used resources and instruction offsets + * assuming short branches are used. + */ + if (build_int_body(&ctx)) + goto out_err; + + /* + * If no calls are made (EBPF_SAVE_RA), then tail call count + * in $v1, else we must save in n$s4. + */ + if (ctx.flags & EBPF_SEEN_TC) { + if (ctx.flags & EBPF_SAVE_RA) + ctx.flags |= EBPF_SAVE_S4; + else + ctx.flags |= EBPF_TCC_IN_V1; + } + + /* + * Second pass generates offsets, if any branches are out of + * range a jump-around long sequence is generated, and we have + * to try again from the beginning to generate the new + * offsets. This is done until no additional conversions are + * necessary. + */ + do { + ctx.idx = 0; + ctx.gen_b_offsets = 1; + ctx.long_b_conversion = 0; + if (gen_int_prologue(&ctx)) + goto out_err; + if (build_int_body(&ctx)) + goto out_err; + if (build_int_epilogue(&ctx, MIPS_R_RA)) + goto out_err; + } while (ctx.long_b_conversion); + + image_size = 4 * ctx.idx; + + header = bpf_jit_binary_alloc(image_size, &image_ptr, + sizeof(u32), jit_fill_hole); + if (header == NULL) + goto out_err; + + ctx.target = (u32 *)image_ptr; + + /* Third pass generates the code */ + ctx.idx = 0; + if (gen_int_prologue(&ctx)) + goto out_err; + if (build_int_body(&ctx)) + goto out_err; + if (build_int_epilogue(&ctx, MIPS_R_RA)) + goto out_err; + + /* Update the icache */ + flush_icache_range((unsigned long)ctx.target, + (unsigned long)(ctx.target + ctx.idx * sizeof(u32))); + + if (bpf_jit_enable > 1) + /* Dump JIT code */ + bpf_jit_dump(prog->len, image_size, 2, ctx.target); + + bpf_jit_binary_lock_ro(header); + prog->bpf_func = (void *)ctx.target; + prog->jited = 1; + prog->jited_len = image_size; +out_normal: + if (tmp_blinded) + bpf_jit_prog_release_other(prog, prog == orig_prog ? + tmp : orig_prog); + kfree(ctx.offsets); + kfree(ctx.reg_val_types); + + return prog; + +out_err: + prog = orig_prog; + if (header) + bpf_jit_binary_free(header); + goto out_normal; +} -- cgit v1.2.3 From 5fff41e1f89d93feef9833c49a415dc337af5a99 Mon Sep 17 00:00:00 2001 From: Parav Pandit Date: Tue, 1 Aug 2017 09:41:34 +0300 Subject: IB/core: Fix race condition in resolving IP to MAC Currently while resolving IP address to MAC address single delayed work is used for resolving multiple such resolve requests. This singled work is essentially performs two tasks. (a) any retry needed to resolve and (b) it executes the callback function for all completed requests While work is executing callbacks, any new work scheduled on for this workqueue is lost because workqueue has completed looking at all pending requests and now looking at callbacks, but work is still under execution. Any further retry to look at pending requests in process_req() after executing callbacks would lead to similar race condition (may be reduce the probably further but doesn't eliminate it). Retrying to enqueue work that from queue_req() context is not something rest of the kernel modules have followed. Therefore fix in this patch utilizes kernel facility to enqueue multiple work items to a workqueue. This ensures that no such requests gets lost in synchronization. Request list is still maintained so that rdma_cancel_addr() can unlink the request and get the completion with error sooner. Neighbour update event handling continues to be handled in same way as before. Additionally process_req() work entry cancels any pending work for a request that gets completed while processing those requests. Originally ib_addr was ST workqueue, but it became MT work queue with patch of [1]. This patch again makes it similar to ST so that neighbour update events handler work item doesn't race with other work items. In one such below trace, (though on 4.5 based kernel) it can be seen that process_req() never executed the callback, which is likely for an event that was schedule by queue_req() when previous callback was getting executed by workqueue. [] schedule+0x3e/0x90 [] schedule_timeout+0x1b5/0x210 [] ? ip_route_output_flow+0x27/0x70 [] ? addr_resolve+0x149/0x1b0 [ib_addr] [] wait_for_completion+0x10f/0x170 [] ? try_to_wake_up+0x210/0x210 [] ? rdma_copy_addr+0xa0/0xa0 [ib_addr] [] rdma_addr_find_l2_eth_by_grh+0x1d0/0x278 [ib_addr] [] ? sub_alloc+0x77/0x1c0 [] ib_init_ah_from_wc+0x3a7/0x5a0 [ib_core] [] cm_req_handler+0xea/0x580 [ib_cm] [] ? __switch_to+0x212/0x5e0 [] cm_work_handler+0x6d/0x150 [ib_cm] [] process_one_work+0x151/0x4b0 [] worker_thread+0x120/0x480 [] ? __schedule+0x30b/0x890 [] ? process_one_work+0x4b0/0x4b0 [] ? process_one_work+0x4b0/0x4b0 [] kthread+0xce/0xf0 [] ? kthread_freezable_should_stop+0x70/0x70 [] ret_from_fork+0x42/0x70 [] ? kthread_freezable_should_stop+0x70/0x70 INFO: task kworker/u144:1:156520 blocked for more than 120 seconds. "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message. kworker/u144:1 D ffff883ffe1d7600 0 156520 2 0x00000080 Workqueue: ib_addr process_req [ib_addr] ffff883f446fbbd8 0000000000000046 ffff881f95280000 ffff881ff24de200 ffff883f66120000 ffff883f446f8008 ffff881f95280000 ffff883f6f9208c4 ffff883f6f9208c8 00000000ffffffff ffff883f446fbbf8 ffffffff816b0dde [1] http://lkml.iu.edu/hypermail/linux/kernel/1608.1/05834.html Signed-off-by: Parav Pandit Reviewed-by: Mark Bloch Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/core/addr.c | 62 ++++++++++++++++++++++++++++++++---------- 1 file changed, 48 insertions(+), 14 deletions(-) diff --git a/drivers/infiniband/core/addr.c b/drivers/infiniband/core/addr.c index 01236cef7bfb..437522ca97b4 100644 --- a/drivers/infiniband/core/addr.c +++ b/drivers/infiniband/core/addr.c @@ -61,6 +61,7 @@ struct addr_req { void (*callback)(int status, struct sockaddr *src_addr, struct rdma_dev_addr *addr, void *context); unsigned long timeout; + struct delayed_work work; int status; u32 seq; }; @@ -295,7 +296,7 @@ int rdma_translate_ip(const struct sockaddr *addr, } EXPORT_SYMBOL(rdma_translate_ip); -static void set_timeout(unsigned long time) +static void set_timeout(struct delayed_work *delayed_work, unsigned long time) { unsigned long delay; @@ -303,7 +304,7 @@ static void set_timeout(unsigned long time) if ((long)delay < 0) delay = 0; - mod_delayed_work(addr_wq, &work, delay); + mod_delayed_work(addr_wq, delayed_work, delay); } static void queue_req(struct addr_req *req) @@ -318,8 +319,7 @@ static void queue_req(struct addr_req *req) list_add(&req->list, &temp_req->list); - if (req_list.next == &req->list) - set_timeout(req->timeout); + set_timeout(&req->work, req->timeout); mutex_unlock(&lock); } @@ -574,6 +574,37 @@ static int addr_resolve(struct sockaddr *src_in, return ret; } +static void process_one_req(struct work_struct *_work) +{ + struct addr_req *req; + struct sockaddr *src_in, *dst_in; + + mutex_lock(&lock); + req = container_of(_work, struct addr_req, work.work); + + if (req->status == -ENODATA) { + src_in = (struct sockaddr *)&req->src_addr; + dst_in = (struct sockaddr *)&req->dst_addr; + req->status = addr_resolve(src_in, dst_in, req->addr, + true, req->seq); + if (req->status && time_after_eq(jiffies, req->timeout)) { + req->status = -ETIMEDOUT; + } else if (req->status == -ENODATA) { + /* requeue the work for retrying again */ + set_timeout(&req->work, req->timeout); + mutex_unlock(&lock); + return; + } + } + list_del(&req->list); + mutex_unlock(&lock); + + req->callback(req->status, (struct sockaddr *)&req->src_addr, + req->addr, req->context); + put_client(req->client); + kfree(req); +} + static void process_req(struct work_struct *work) { struct addr_req *req, *temp_req; @@ -591,20 +622,23 @@ static void process_req(struct work_struct *work) true, req->seq); if (req->status && time_after_eq(jiffies, req->timeout)) req->status = -ETIMEDOUT; - else if (req->status == -ENODATA) + else if (req->status == -ENODATA) { + set_timeout(&req->work, req->timeout); continue; + } } list_move_tail(&req->list, &done_list); } - if (!list_empty(&req_list)) { - req = list_entry(req_list.next, struct addr_req, list); - set_timeout(req->timeout); - } mutex_unlock(&lock); list_for_each_entry_safe(req, temp_req, &done_list, list) { list_del(&req->list); + /* It is safe to cancel other work items from this work item + * because at a time there can be only one work item running + * with this single threaded work queue. + */ + cancel_delayed_work(&req->work); req->callback(req->status, (struct sockaddr *) &req->src_addr, req->addr, req->context); put_client(req->client); @@ -647,6 +681,7 @@ int rdma_resolve_ip(struct rdma_addr_client *client, req->context = context; req->client = client; atomic_inc(&client->refcount); + INIT_DELAYED_WORK(&req->work, process_one_req); req->seq = (u32)atomic_inc_return(&ib_nl_addr_request_seq); req->status = addr_resolve(src_in, dst_in, addr, true, req->seq); @@ -701,7 +736,7 @@ void rdma_addr_cancel(struct rdma_dev_addr *addr) req->status = -ECANCELED; req->timeout = jiffies; list_move(&req->list, &req_list); - set_timeout(req->timeout); + set_timeout(&req->work, req->timeout); break; } } @@ -807,9 +842,8 @@ static int netevent_callback(struct notifier_block *self, unsigned long event, if (event == NETEVENT_NEIGH_UPDATE) { struct neighbour *neigh = ctx; - if (neigh->nud_state & NUD_VALID) { - set_timeout(jiffies); - } + if (neigh->nud_state & NUD_VALID) + set_timeout(&work, jiffies); } return 0; } @@ -820,7 +854,7 @@ static struct notifier_block nb = { int addr_init(void) { - addr_wq = alloc_workqueue("ib_addr", WQ_MEM_RECLAIM, 0); + addr_wq = alloc_ordered_workqueue("ib_addr", WQ_MEM_RECLAIM); if (!addr_wq) return -ENOMEM; -- cgit v1.2.3 From f7a6cb7b38c6845b26aaa8bbdf519ff6e3090831 Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Tue, 1 Aug 2017 09:41:35 +0300 Subject: RDMA/uverbs: Prevent leak of reserved field initialize to zero the response structure to prevent the leakage of "resp.reserved" field. drivers/infiniband/core/uverbs_cmd.c:1178 ib_uverbs_resize_cq() warn: check that 'resp.reserved' doesn't leak information Fixes: 33b9b3ee9709 ("IB: Add userspace support for resizing CQs") Signed-off-by: Leon Romanovsky Reviewed-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/core/uverbs_cmd.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c index 2c98533a0203..c551d2b275fd 100644 --- a/drivers/infiniband/core/uverbs_cmd.c +++ b/drivers/infiniband/core/uverbs_cmd.c @@ -1153,7 +1153,7 @@ ssize_t ib_uverbs_resize_cq(struct ib_uverbs_file *file, int out_len) { struct ib_uverbs_resize_cq cmd; - struct ib_uverbs_resize_cq_resp resp; + struct ib_uverbs_resize_cq_resp resp = {}; struct ib_udata udata; struct ib_cq *cq; int ret = -EINVAL; -- cgit v1.2.3 From efdd6f53b10aead0f5cf19a93dd3eb268ac0d991 Mon Sep 17 00:00:00 2001 From: Yishai Hadas Date: Tue, 1 Aug 2017 09:41:36 +0300 Subject: IB/uverbs: Fix device cleanup Uverbs device should be cleaned up only when there is no potential usage of. As part of ib_uverbs_remove_one which might be triggered upon reset flow the device reference count is decreased as expected and leave the final cleanup to the FDs that were opened. Current code increases reference count upon opening a new command FD and decreases it upon closing the file. The event FD is opened internally and rely on the command FD by taking on it a reference count. In case that the command FD was closed and just later the event FD we may ensure that the device resources as of srcu are still alive as they are still in use. Fixing the above by moving the reference count decreasing to the place where the command FD is really freed instead of doing that when it was just closed. fixes: 036b10635739 ("IB/uverbs: Enable device removal when there are active user space applications") Signed-off-by: Yishai Hadas Reviewed-by: Matan Barak Reviewed-by: Jason Gunthorpe Tested-by: Jason Gunthorpe Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/core/uverbs_main.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/infiniband/core/uverbs_main.c b/drivers/infiniband/core/uverbs_main.c index 3d2609608f58..c023e2c81b8f 100644 --- a/drivers/infiniband/core/uverbs_main.c +++ b/drivers/infiniband/core/uverbs_main.c @@ -250,6 +250,7 @@ void ib_uverbs_release_file(struct kref *ref) if (atomic_dec_and_test(&file->device->refcount)) ib_uverbs_comp_dev(file->device); + kobject_put(&file->device->kobj); kfree(file); } @@ -917,7 +918,6 @@ err: static int ib_uverbs_close(struct inode *inode, struct file *filp) { struct ib_uverbs_file *file = filp->private_data; - struct ib_uverbs_device *dev = file->device; mutex_lock(&file->cleanup_mutex); if (file->ucontext) { @@ -939,7 +939,6 @@ static int ib_uverbs_close(struct inode *inode, struct file *filp) ib_uverbs_release_async_event_file); kref_put(&file->ref, ib_uverbs_release_file); - kobject_put(&dev->kobj); return 0; } -- cgit v1.2.3 From 931b3c1a832621b4bdcbaf783096fc267eb36fbe Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Tue, 1 Aug 2017 09:41:37 +0300 Subject: RDMA/mlx5: Fix existence check for extended address vector The extended address vector is the highest bit in be32 variable, but it was compared with the lowest. This patch fixes the endianness of that check and removes already declared define. Fixes: 17d2f88f92ce ("IB/mlx5: Add ODP atomics support") Reviewed-by: Artemy Kovalyov Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/hw/mlx5/odp.c | 2 +- include/linux/mlx5/qp.h | 1 - 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/odp.c b/drivers/infiniband/hw/mlx5/odp.c index ae0746754008..3d701c7a4c91 100644 --- a/drivers/infiniband/hw/mlx5/odp.c +++ b/drivers/infiniband/hw/mlx5/odp.c @@ -939,7 +939,7 @@ static int mlx5_ib_mr_initiator_pfault_handler( if (qp->ibqp.qp_type != IB_QPT_RC) { av = *wqe; - if (av->dqp_dct & be32_to_cpu(MLX5_WQE_AV_EXT)) + if (av->dqp_dct & cpu_to_be32(MLX5_EXTENDED_UD_AV)) *wqe += sizeof(struct mlx5_av); else *wqe += sizeof(struct mlx5_base_av); diff --git a/include/linux/mlx5/qp.h b/include/linux/mlx5/qp.h index 6f41270d80c0..f378dc0e7eaf 100644 --- a/include/linux/mlx5/qp.h +++ b/include/linux/mlx5/qp.h @@ -212,7 +212,6 @@ struct mlx5_wqe_ctrl_seg { #define MLX5_WQE_CTRL_OPCODE_MASK 0xff #define MLX5_WQE_CTRL_WQE_INDEX_MASK 0x00ffff00 #define MLX5_WQE_CTRL_WQE_INDEX_SHIFT 8 -#define MLX5_WQE_AV_EXT 0x80000000 enum { MLX5_ETH_WQE_L3_INNER_CSUM = 1 << 4, -- cgit v1.2.3 From aaf83aecd294fd61db6b34051f718f3e7ea34c22 Mon Sep 17 00:00:00 2001 From: Thomas Bogendoerfer Date: Thu, 3 Aug 2017 15:43:14 +0200 Subject: xgene: Always get clk source, but ignore if it's missing for SGMII ports Even the driver doesn't do anything with the clk source for SGMII ports it needs to be enabled by doing a devm_clk_get(), if there is a clk source in DT. Fixes: 0db01097cabd ('xgene: Don't fail probe, if there is no clk resource for SGMII interfaces') Signed-off-by: Thomas Bogendoerfer Tested-by: Laura Abbott Acked-by: Iyappan Subramanian Tested-by: Will Deacon Signed-off-by: David S. Miller --- drivers/net/ethernet/apm/xgene/xgene_enet_main.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/apm/xgene/xgene_enet_main.c b/drivers/net/ethernet/apm/xgene/xgene_enet_main.c index 86058a9f3417..1d307f2def2d 100644 --- a/drivers/net/ethernet/apm/xgene/xgene_enet_main.c +++ b/drivers/net/ethernet/apm/xgene/xgene_enet_main.c @@ -1785,9 +1785,9 @@ static int xgene_enet_get_resources(struct xgene_enet_pdata *pdata) xgene_enet_gpiod_get(pdata); - if (pdata->phy_mode != PHY_INTERFACE_MODE_SGMII) { - pdata->clk = devm_clk_get(&pdev->dev, NULL); - if (IS_ERR(pdata->clk)) { + pdata->clk = devm_clk_get(&pdev->dev, NULL); + if (IS_ERR(pdata->clk)) { + if (pdata->phy_mode != PHY_INTERFACE_MODE_SGMII) { /* Abort if the clock is defined but couldn't be * retrived. Always abort if the clock is missing on * DT system as the driver can't cope with this case. -- cgit v1.2.3 From 5db465f235e74293e285e1fa924a55e52ba52a98 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Fri, 4 Aug 2017 11:12:08 +0300 Subject: IB/hns: checking for IS_ERR() instead of NULL The hns_roce_v1_create_lp_qp() returns NULL on error, not error pointers. Fixes: bfcc681bd09d ("IB/hns: Fix the bug when free mr") Signed-off-by: Dan Carpenter Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hns/hns_roce_hw_v1.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v1.c b/drivers/infiniband/hw/hns/hns_roce_hw_v1.c index 23fad6d96944..2540b65e242c 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v1.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v1.c @@ -733,7 +733,7 @@ static int hns_roce_v1_rsv_lp_qp(struct hns_roce_dev *hr_dev) continue; free_mr->mr_free_qp[i] = hns_roce_v1_create_lp_qp(hr_dev, pd); - if (IS_ERR(free_mr->mr_free_qp[i])) { + if (!free_mr->mr_free_qp[i]) { dev_err(dev, "Create loop qp failed!\n"); goto create_lp_qp_failed; } -- cgit v1.2.3 From ea7bd56fa309d10a41b1041827a63c0746c47554 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Wed, 2 Aug 2017 12:37:16 -0700 Subject: xfs: Fix leak of discard bio The bio describing discard operation is allocated by __blkdev_issue_discard() which returns us a reference to it. That reference is never released and thus we leak this bio. Drop the bio reference once it completes in xlog_discard_endio(). CC: stable@vger.kernel.org Fixes: 4560e78f40cb55bd2ea8f1ef4001c5baa88531c7 Signed-off-by: Jan Kara Reviewed-by: Darrick J. Wong Signed-off-by: Darrick J. Wong --- fs/xfs/xfs_log_cil.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/xfs/xfs_log_cil.c b/fs/xfs/xfs_log_cil.c index fbe72b134bef..43aa42a3a5d3 100644 --- a/fs/xfs/xfs_log_cil.c +++ b/fs/xfs/xfs_log_cil.c @@ -539,6 +539,7 @@ xlog_discard_endio( INIT_WORK(&ctx->discard_endio_work, xlog_discard_endio_work); queue_work(xfs_discard_wq, &ctx->discard_endio_work); + bio_put(bio); } static void -- cgit v1.2.3 From 56bdf855e676f1f2ed7033f288f57dfd315725ba Mon Sep 17 00:00:00 2001 From: Lukas Czerner Date: Thu, 3 Aug 2017 13:19:13 -0700 Subject: xfs: Fix per-inode DAX flag inheritance According to the commit that implemented per-inode DAX flag: commit 58f88ca2df72 ("xfs: introduce per-inode DAX enablement") the flag is supposed to act as "inherit flag". Currently this only works in the situations where parent directory already has a flag in di_flags set, otherwise inheritance does not work. This is because setting the XFS_DIFLAG2_DAX flag is done in a wrong branch designated for di_flags, not di_flags2. Fix this by moving the code to branch designated for setting di_flags2, which does test for flags in di_flags2. Fixes: 58f88ca2df72 ("xfs: introduce per-inode DAX enablement") Signed-off-by: Lukas Czerner Reviewed-by: Darrick J. Wong Signed-off-by: Darrick J. Wong --- fs/xfs/xfs_inode.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index ceef77c0416a..ff48f0096810 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c @@ -874,7 +874,6 @@ xfs_ialloc( case S_IFREG: case S_IFDIR: if (pip && (pip->i_d.di_flags & XFS_DIFLAG_ANY)) { - uint64_t di_flags2 = 0; uint di_flags = 0; if (S_ISDIR(mode)) { @@ -911,20 +910,23 @@ xfs_ialloc( di_flags |= XFS_DIFLAG_NODEFRAG; if (pip->i_d.di_flags & XFS_DIFLAG_FILESTREAM) di_flags |= XFS_DIFLAG_FILESTREAM; - if (pip->i_d.di_flags2 & XFS_DIFLAG2_DAX) - di_flags2 |= XFS_DIFLAG2_DAX; ip->i_d.di_flags |= di_flags; - ip->i_d.di_flags2 |= di_flags2; } if (pip && (pip->i_d.di_flags2 & XFS_DIFLAG2_ANY) && pip->i_d.di_version == 3 && ip->i_d.di_version == 3) { + uint64_t di_flags2 = 0; + if (pip->i_d.di_flags2 & XFS_DIFLAG2_COWEXTSIZE) { - ip->i_d.di_flags2 |= XFS_DIFLAG2_COWEXTSIZE; + di_flags2 |= XFS_DIFLAG2_COWEXTSIZE; ip->i_d.di_cowextsize = pip->i_d.di_cowextsize; } + if (pip->i_d.di_flags2 & XFS_DIFLAG2_DAX) + di_flags2 |= XFS_DIFLAG2_DAX; + + ip->i_d.di_flags2 |= di_flags2; } /* FALLTHROUGH */ case S_IFLNK: -- cgit v1.2.3 From 2c460621bb2e6baf8a475c407cdb29029b2497ac Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Fri, 4 Aug 2017 22:24:41 +0200 Subject: bpf: fix byte order test in test_verifier We really must check with #if __BYTE_ORDER == XYZ instead of just presence of #ifdef __LITTLE_ENDIAN. I noticed that when actually running this on big endian machine, the latter test resolves to true for user space, same for #ifdef __BIG_ENDIAN. E.g., looking at endian.h from libc, both are also defined there, so we really must test this against __BYTE_ORDER instead for proper insns selection. For the kernel, such checks are fine though e.g. see 13da9e200fe4 ("Revert "endian: #define __BYTE_ORDER"") and 415586c9e6d3 ("UAPI: fix endianness conditionals in M32R's asm/stat.h") for some more context, but not for user space. Lets also make sure to properly include endian.h. After that, suite passes for me: ./test_verifier: ELF 64-bit MSB executable, [...] Linux foo 4.13.0-rc3+ #4 SMP Fri Aug 4 06:59:30 EDT 2017 s390x s390x s390x GNU/Linux Before fix: Summary: 505 PASSED, 11 FAILED After fix: Summary: 516 PASSED, 0 FAILED Fixes: 18f3d6be6be1 ("selftests/bpf: Add test cases to test narrower ctx field loads") Signed-off-by: Daniel Borkmann Acked-by: Yonghong Signed-off-by: David S. Miller --- tools/testing/selftests/bpf/test_verifier.c | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c index addea82f76c9..d3ed7324105e 100644 --- a/tools/testing/selftests/bpf/test_verifier.c +++ b/tools/testing/selftests/bpf/test_verifier.c @@ -8,6 +8,7 @@ * License as published by the Free Software Foundation. */ +#include #include #include #include @@ -1098,7 +1099,7 @@ static struct bpf_test tests[] = { "check skb->hash byte load permitted", .insns = { BPF_MOV64_IMM(BPF_REG_0, 0), -#ifdef __LITTLE_ENDIAN +#if __BYTE_ORDER == __LITTLE_ENDIAN BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, offsetof(struct __sk_buff, hash)), #else @@ -1135,7 +1136,7 @@ static struct bpf_test tests[] = { "check skb->hash byte load not permitted 3", .insns = { BPF_MOV64_IMM(BPF_REG_0, 0), -#ifdef __LITTLE_ENDIAN +#if __BYTE_ORDER == __LITTLE_ENDIAN BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, offsetof(struct __sk_buff, hash) + 3), #else @@ -1244,7 +1245,7 @@ static struct bpf_test tests[] = { "check skb->hash half load permitted", .insns = { BPF_MOV64_IMM(BPF_REG_0, 0), -#ifdef __LITTLE_ENDIAN +#if __BYTE_ORDER == __LITTLE_ENDIAN BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1, offsetof(struct __sk_buff, hash)), #else @@ -1259,7 +1260,7 @@ static struct bpf_test tests[] = { "check skb->hash half load not permitted", .insns = { BPF_MOV64_IMM(BPF_REG_0, 0), -#ifdef __LITTLE_ENDIAN +#if __BYTE_ORDER == __LITTLE_ENDIAN BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1, offsetof(struct __sk_buff, hash) + 2), #else @@ -5422,7 +5423,7 @@ static struct bpf_test tests[] = { "check bpf_perf_event_data->sample_period byte load permitted", .insns = { BPF_MOV64_IMM(BPF_REG_0, 0), -#ifdef __LITTLE_ENDIAN +#if __BYTE_ORDER == __LITTLE_ENDIAN BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, offsetof(struct bpf_perf_event_data, sample_period)), #else @@ -5438,7 +5439,7 @@ static struct bpf_test tests[] = { "check bpf_perf_event_data->sample_period half load permitted", .insns = { BPF_MOV64_IMM(BPF_REG_0, 0), -#ifdef __LITTLE_ENDIAN +#if __BYTE_ORDER == __LITTLE_ENDIAN BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1, offsetof(struct bpf_perf_event_data, sample_period)), #else @@ -5454,7 +5455,7 @@ static struct bpf_test tests[] = { "check bpf_perf_event_data->sample_period word load permitted", .insns = { BPF_MOV64_IMM(BPF_REG_0, 0), -#ifdef __LITTLE_ENDIAN +#if __BYTE_ORDER == __LITTLE_ENDIAN BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, offsetof(struct bpf_perf_event_data, sample_period)), #else @@ -5481,7 +5482,7 @@ static struct bpf_test tests[] = { "check skb->data half load not permitted", .insns = { BPF_MOV64_IMM(BPF_REG_0, 0), -#ifdef __LITTLE_ENDIAN +#if __BYTE_ORDER == __LITTLE_ENDIAN BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1, offsetof(struct __sk_buff, data)), #else @@ -5497,7 +5498,7 @@ static struct bpf_test tests[] = { "check skb->tc_classid half load not permitted for lwt prog", .insns = { BPF_MOV64_IMM(BPF_REG_0, 0), -#ifdef __LITTLE_ENDIAN +#if __BYTE_ORDER == __LITTLE_ENDIAN BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1, offsetof(struct __sk_buff, tc_classid)), #else -- cgit v1.2.3 From 9e01e2d56db23485a75864b6aeee8e443f024ddb Mon Sep 17 00:00:00 2001 From: Stefan Agner Date: Wed, 2 Aug 2017 12:51:29 -0700 Subject: soc: imx: gpcv2: fix regulator deferred probe If a regulator requests a deferred probe, the power domain gets initialized twice. This leads to a list double add (without list debugging the kernel hangs due to the double add later): WARNING: CPU: 0 PID: 19 at lib/list_debug.c:31 __list_add_valid+0xbc/0xc4 list_add double add: new=c1229754, prev=c12383b4, next=c1229754. Initialize the power domain after we get the regulator. Also do not print an error in case the regulator defers probing. Cc: Fabio Estevam Cc: Andrey Smirnov Cc: linux-arm-kernel@lists.infradead.org Cc: linux-kernel@vger.kernel.org Fixes: 03aa12629fc4 ("soc: imx: Add GPCv2 power gating driver") Signed-off-by: Stefan Agner Acked-by: Andrey Smirnov Tested-by: Andrey Smirnov Signed-off-by: Shawn Guo --- drivers/soc/imx/gpcv2.c | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/drivers/soc/imx/gpcv2.c b/drivers/soc/imx/gpcv2.c index 3039072911a5..afc7ecc3c187 100644 --- a/drivers/soc/imx/gpcv2.c +++ b/drivers/soc/imx/gpcv2.c @@ -200,16 +200,11 @@ static int imx7_pgc_domain_probe(struct platform_device *pdev) domain->dev = &pdev->dev; - ret = pm_genpd_init(&domain->genpd, NULL, true); - if (ret) { - dev_err(domain->dev, "Failed to init power domain\n"); - return ret; - } - domain->regulator = devm_regulator_get_optional(domain->dev, "power"); if (IS_ERR(domain->regulator)) { if (PTR_ERR(domain->regulator) != -ENODEV) { - dev_err(domain->dev, "Failed to get domain's regulator\n"); + if (PTR_ERR(domain->regulator) != -EPROBE_DEFER) + dev_err(domain->dev, "Failed to get domain's regulator\n"); return PTR_ERR(domain->regulator); } } else { @@ -217,6 +212,12 @@ static int imx7_pgc_domain_probe(struct platform_device *pdev) domain->voltage, domain->voltage); } + ret = pm_genpd_init(&domain->genpd, NULL, true); + if (ret) { + dev_err(domain->dev, "Failed to init power domain\n"); + return ret; + } + ret = of_genpd_add_provider_simple(domain->dev->of_node, &domain->genpd); if (ret) { -- cgit v1.2.3 From 8317562097acec4c9e9750eb91115687931bca35 Mon Sep 17 00:00:00 2001 From: Martin Kaiser Date: Wed, 2 Aug 2017 22:06:11 +0200 Subject: ARM: dts: i.MX25: add ranges to tscadc Add a ranges; line to the tscadc node. This creates a 1:1 mapping between the addresses used by tscadc and those in its child nodes (adc, tsc). Without such a mapping, the reg = ... lines in the tsc and adc nodes do not create a resource. Probing the fsl-imx25-tcq and fsl-imx25-tsadc drivers will then fail since there's no IORESOURCE_MEM. Signed-off-by: Martin Kaiser Fixes: 92f651f39b42 ("ARM: dts: imx25: Add TSC and ADC support") Signed-off-by: Shawn Guo --- arch/arm/boot/dts/imx25.dtsi | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm/boot/dts/imx25.dtsi b/arch/arm/boot/dts/imx25.dtsi index dfcc8e00cf1c..0ade3619f3c3 100644 --- a/arch/arm/boot/dts/imx25.dtsi +++ b/arch/arm/boot/dts/imx25.dtsi @@ -297,6 +297,7 @@ #address-cells = <1>; #size-cells = <1>; status = "disabled"; + ranges; adc: adc@50030800 { compatible = "fsl,imx25-gcq"; -- cgit v1.2.3 From aae9d563230f974f2daa7135f911f021b2bba9e6 Mon Sep 17 00:00:00 2001 From: Christophe Jaillet Date: Fri, 14 Jul 2017 12:06:59 +0200 Subject: iwlwifi: mvm: Fix a memory leak in an error handling path in 'iwl_mvm_sar_get_wgds_table()' We should free 'wgds.pointer' here as done a few lines above in another error handling path. It was allocated within 'acpi_evaluate_object()'. Fixes: c52030a01ccc ("iwlwifi: mvm: add GEO_TX_POWER_LIMIT cmd for geographic tx power table") Signed-off-by: Christophe JAILLET Signed-off-by: Luca Coelho --- drivers/net/wireless/intel/iwlwifi/mvm/fw.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/fw.c b/drivers/net/wireless/intel/iwlwifi/mvm/fw.c index 79e7a7a285dc..82863e9273eb 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/fw.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/fw.c @@ -1275,8 +1275,10 @@ static int iwl_mvm_sar_get_wgds_table(struct iwl_mvm *mvm) entry = &wifi_pkg->package.elements[idx++]; if ((entry->type != ACPI_TYPE_INTEGER) || - (entry->integer.value > U8_MAX)) - return -EINVAL; + (entry->integer.value > U8_MAX)) { + ret = -EINVAL; + goto out_free; + } mvm->geo_profiles[i].values[j] = entry->integer.value; } -- cgit v1.2.3 From 978d13d60c34818a41fc35962602bdfa5c03f214 Mon Sep 17 00:00:00 2001 From: Nicholas Bellinger Date: Fri, 4 Aug 2017 23:59:31 -0700 Subject: iscsi-target: Fix iscsi_np reset hung task during parallel delete This patch fixes a bug associated with iscsit_reset_np_thread() that can occur during parallel configfs rmdir of a single iscsi_np used across multiple iscsi-target instances, that would result in hung task(s) similar to below where configfs rmdir process context was blocked indefinately waiting for iscsi_np->np_restart_comp to finish: [ 6726.112076] INFO: task dcp_proxy_node_:15550 blocked for more than 120 seconds. [ 6726.119440] Tainted: G W O 4.1.26-3321 #2 [ 6726.125045] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message. [ 6726.132927] dcp_proxy_node_ D ffff8803f202bc88 0 15550 1 0x00000000 [ 6726.140058] ffff8803f202bc88 ffff88085c64d960 ffff88083b3b1ad0 ffff88087fffeb08 [ 6726.147593] ffff8803f202c000 7fffffffffffffff ffff88083f459c28 ffff88083b3b1ad0 [ 6726.155132] ffff88035373c100 ffff8803f202bca8 ffffffff8168ced2 ffff8803f202bcb8 [ 6726.162667] Call Trace: [ 6726.165150] [] schedule+0x32/0x80 [ 6726.170156] [] schedule_timeout+0x214/0x290 [ 6726.176030] [] ? __send_signal+0x52/0x4a0 [ 6726.181728] [] wait_for_completion+0x96/0x100 [ 6726.187774] [] ? wake_up_state+0x10/0x10 [ 6726.193395] [] iscsit_reset_np_thread+0x62/0xe0 [iscsi_target_mod] [ 6726.201278] [] iscsit_tpg_disable_portal_group+0x96/0x190 [iscsi_target_mod] [ 6726.210033] [] lio_target_tpg_store_enable+0x4f/0xc0 [iscsi_target_mod] [ 6726.218351] [] configfs_write_file+0xaa/0x110 [ 6726.224392] [] vfs_write+0xa4/0x1b0 [ 6726.229576] [] SyS_write+0x41/0xb0 [ 6726.234659] [] system_call_fastpath+0x12/0x71 It would happen because each iscsit_reset_np_thread() sets state to ISCSI_NP_THREAD_RESET, sends SIGINT, and then blocks waiting for completion on iscsi_np->np_restart_comp. However, if iscsi_np was active processing a login request and more than a single iscsit_reset_np_thread() caller to the same iscsi_np was blocked on iscsi_np->np_restart_comp, iscsi_np kthread process context in __iscsi_target_login_thread() would flush pending signals and only perform a single completion of np->np_restart_comp before going back to sleep within transport specific iscsit_transport->iscsi_accept_np code. To address this bug, add a iscsi_np->np_reset_count and update __iscsi_target_login_thread() to keep completing np->np_restart_comp until ->np_reset_count has reached zero. Reported-by: Gary Guo Tested-by: Gary Guo Cc: Mike Christie Cc: Hannes Reinecke Cc: stable@vger.kernel.org # 3.10+ Signed-off-by: Nicholas Bellinger --- drivers/target/iscsi/iscsi_target.c | 1 + drivers/target/iscsi/iscsi_target_login.c | 7 +++++-- include/target/iscsi/iscsi_target_core.h | 1 + 3 files changed, 7 insertions(+), 2 deletions(-) diff --git a/drivers/target/iscsi/iscsi_target.c b/drivers/target/iscsi/iscsi_target.c index 12803de99400..5001261f5d69 100644 --- a/drivers/target/iscsi/iscsi_target.c +++ b/drivers/target/iscsi/iscsi_target.c @@ -418,6 +418,7 @@ int iscsit_reset_np_thread( return 0; } np->np_thread_state = ISCSI_NP_THREAD_RESET; + atomic_inc(&np->np_reset_count); if (np->np_thread) { spin_unlock_bh(&np->np_thread_lock); diff --git a/drivers/target/iscsi/iscsi_target_login.c b/drivers/target/iscsi/iscsi_target_login.c index e9bdc8b86e7d..dc13afbd4c88 100644 --- a/drivers/target/iscsi/iscsi_target_login.c +++ b/drivers/target/iscsi/iscsi_target_login.c @@ -1243,9 +1243,11 @@ static int __iscsi_target_login_thread(struct iscsi_np *np) flush_signals(current); spin_lock_bh(&np->np_thread_lock); - if (np->np_thread_state == ISCSI_NP_THREAD_RESET) { + if (atomic_dec_if_positive(&np->np_reset_count) >= 0) { np->np_thread_state = ISCSI_NP_THREAD_ACTIVE; + spin_unlock_bh(&np->np_thread_lock); complete(&np->np_restart_comp); + return 1; } else if (np->np_thread_state == ISCSI_NP_THREAD_SHUTDOWN) { spin_unlock_bh(&np->np_thread_lock); goto exit; @@ -1278,7 +1280,8 @@ static int __iscsi_target_login_thread(struct iscsi_np *np) goto exit; } else if (rc < 0) { spin_lock_bh(&np->np_thread_lock); - if (np->np_thread_state == ISCSI_NP_THREAD_RESET) { + if (atomic_dec_if_positive(&np->np_reset_count) >= 0) { + np->np_thread_state = ISCSI_NP_THREAD_ACTIVE; spin_unlock_bh(&np->np_thread_lock); complete(&np->np_restart_comp); iscsit_put_transport(conn->conn_transport); diff --git a/include/target/iscsi/iscsi_target_core.h b/include/target/iscsi/iscsi_target_core.h index 0ca1fb08805b..fb87d32f5e51 100644 --- a/include/target/iscsi/iscsi_target_core.h +++ b/include/target/iscsi/iscsi_target_core.h @@ -786,6 +786,7 @@ struct iscsi_np { int np_sock_type; enum np_thread_state_table np_thread_state; bool enabled; + atomic_t np_reset_count; enum iscsi_timer_flags_table np_login_timer_flags; u32 np_exports; enum np_flags_table np_flags; -- cgit v1.2.3 From 732e49850c5e15231e11a0a464748bcbade5e3c2 Mon Sep 17 00:00:00 2001 From: stephen hemminger Date: Thu, 3 Aug 2017 17:13:54 -0700 Subject: netvsc: fix race on sub channel creation The existing sub channel code did not wait for all the sub-channels to completely initialize. This could lead to race causing crash in napi_netif_del() from bad list. The existing code would send an init message, then wait only for the initial response that the init message was received. It thought it was waiting for sub channels but really the init response did the wakeup. The new code keeps track of the number of open channels and waits until that many are open. Other issues here were: * host might return less sub-channels than was requested. * the new init status is not valid until after init was completed. Fixes: b3e6b82a0099 ("hv_netvsc: Wait for sub-channels to be processed during probe") Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- drivers/net/hyperv/hyperv_net.h | 3 ++- drivers/net/hyperv/netvsc.c | 1 + drivers/net/hyperv/rndis_filter.c | 14 ++++++++------ 3 files changed, 11 insertions(+), 7 deletions(-) diff --git a/drivers/net/hyperv/hyperv_net.h b/drivers/net/hyperv/hyperv_net.h index d6c25580f8dd..12cc64bfcff8 100644 --- a/drivers/net/hyperv/hyperv_net.h +++ b/drivers/net/hyperv/hyperv_net.h @@ -765,7 +765,8 @@ struct netvsc_device { u32 max_chn; u32 num_chn; - refcount_t sc_offered; + atomic_t open_chn; + wait_queue_head_t subchan_open; struct rndis_device *extension; diff --git a/drivers/net/hyperv/netvsc.c b/drivers/net/hyperv/netvsc.c index 96f90c75d1b7..d18c3326a1f7 100644 --- a/drivers/net/hyperv/netvsc.c +++ b/drivers/net/hyperv/netvsc.c @@ -78,6 +78,7 @@ static struct netvsc_device *alloc_net_device(void) net_device->max_pkt = RNDIS_MAX_PKT_DEFAULT; net_device->pkt_align = RNDIS_PKT_ALIGN_DEFAULT; init_completion(&net_device->channel_init_wait); + init_waitqueue_head(&net_device->subchan_open); return net_device; } diff --git a/drivers/net/hyperv/rndis_filter.c b/drivers/net/hyperv/rndis_filter.c index 85c00e1c52b6..d6308ffda53e 100644 --- a/drivers/net/hyperv/rndis_filter.c +++ b/drivers/net/hyperv/rndis_filter.c @@ -1048,8 +1048,8 @@ static void netvsc_sc_open(struct vmbus_channel *new_sc) else netif_napi_del(&nvchan->napi); - if (refcount_dec_and_test(&nvscdev->sc_offered)) - complete(&nvscdev->channel_init_wait); + atomic_inc(&nvscdev->open_chn); + wake_up(&nvscdev->subchan_open); } int rndis_filter_device_add(struct hv_device *dev, @@ -1090,8 +1090,6 @@ int rndis_filter_device_add(struct hv_device *dev, net_device->max_chn = 1; net_device->num_chn = 1; - refcount_set(&net_device->sc_offered, 0); - net_device->extension = rndis_device; rndis_device->ndev = net; @@ -1221,11 +1219,11 @@ int rndis_filter_device_add(struct hv_device *dev, rndis_device->ind_table[i] = ethtool_rxfh_indir_default(i, net_device->num_chn); + atomic_set(&net_device->open_chn, 1); num_rss_qs = net_device->num_chn - 1; if (num_rss_qs == 0) return 0; - refcount_set(&net_device->sc_offered, num_rss_qs); vmbus_set_sc_create_callback(dev->channel, netvsc_sc_open); init_packet = &net_device->channel_init_pkt; @@ -1242,15 +1240,19 @@ int rndis_filter_device_add(struct hv_device *dev, if (ret) goto out; + wait_for_completion(&net_device->channel_init_wait); if (init_packet->msg.v5_msg.subchn_comp.status != NVSP_STAT_SUCCESS) { ret = -ENODEV; goto out; } - wait_for_completion(&net_device->channel_init_wait); net_device->num_chn = 1 + init_packet->msg.v5_msg.subchn_comp.num_subchannels; + /* wait for all sub channels to open */ + wait_event(net_device->subchan_open, + atomic_read(&net_device->open_chn) == net_device->num_chn); + /* ignore failues from setting rss parameters, still have channels */ rndis_filter_set_rss_param(rndis_device, netvsc_hash_key, net_device->num_chn); -- cgit v1.2.3 From 8cd7b51ff57c74260b20c97623b0e0d420c22be8 Mon Sep 17 00:00:00 2001 From: Kuninori Morimoto Date: Wed, 2 Aug 2017 10:26:58 +0000 Subject: arm64: renesas: salvator-common: avoid audio_clkout naming conflict clock name of "audio_clkout" is used by Renesas sound driver. This duplicated naming breaks its clock registering/unregistering. Especially, when unbind/bind it can't handle clkout correctly. This patch renames "audio_clkout" to "audio-clkout" to avoid naming conflict. Fixes: 8a8f181d2cfd ("arm64: renesas: salvator-x: use CS2000 as AUDIO_CLK_B") Signed-off-by: Kuninori Morimoto Signed-off-by: Simon Horman --- arch/arm64/boot/dts/renesas/salvator-common.dtsi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/boot/dts/renesas/salvator-common.dtsi b/arch/arm64/boot/dts/renesas/salvator-common.dtsi index a451996f590a..f903957da504 100644 --- a/arch/arm64/boot/dts/renesas/salvator-common.dtsi +++ b/arch/arm64/boot/dts/renesas/salvator-common.dtsi @@ -45,7 +45,7 @@ stdout-path = "serial0:115200n8"; }; - audio_clkout: audio_clkout { + audio_clkout: audio-clkout { /* * This is same as <&rcar_sound 0> * but needed to avoid cs2000/rcar_sound probe dead-lock -- cgit v1.2.3 From d6086598d34e1cf9091c7be201f5b2041dc6203e Mon Sep 17 00:00:00 2001 From: Xiong Zhang Date: Wed, 2 Aug 2017 10:31:01 +0800 Subject: drm/i915/gvt: Change the max length of mmio_reg_rw from 4 to 8 When linux guest access mmio with __raw_i915_read64 or __raw_i915_write64, its length is 8 bytes. This fix the linux guest in xengt couldn't boot up as it fail in reading pv_info->magic. Fixes: 65f9f6febf12 ("drm/i915/gvt: Optimize MMIO register handling for some large MMIO blocks") Signed-off-by: Xiong Zhang Signed-off-by: Zhenyu Wang --- drivers/gpu/drm/i915/gvt/handlers.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/gvt/handlers.c b/drivers/gpu/drm/i915/gvt/handlers.c index 323664a238f5..feed9921b3b3 100644 --- a/drivers/gpu/drm/i915/gvt/handlers.c +++ b/drivers/gpu/drm/i915/gvt/handlers.c @@ -3028,7 +3028,7 @@ int intel_vgpu_mmio_reg_rw(struct intel_vgpu *vgpu, unsigned int offset, gvt_mmio_func func; int ret; - if (WARN_ON(bytes > 4)) + if (WARN_ON(bytes > 8)) return -EINVAL; /* -- cgit v1.2.3 From 5279fc7724ae3a82c9cfe5b09c1fb07ff0e41056 Mon Sep 17 00:00:00 2001 From: Maarten Lankhorst Date: Mon, 24 Jul 2017 11:14:31 +0200 Subject: drm/i915: Fix out-of-bounds array access in bdw_load_gamma_lut bdw_load_gamma_lut is writing beyond the array to the maximum value. The intend of the function is to clamp values > 1 to 1, so write the intended color to the max register. This fixes the following KASAN warning: [ 197.020857] [IGT] kms_pipe_color: executing [ 197.063434] [IGT] kms_pipe_color: starting subtest ctm-0-25-pipe0 [ 197.078989] ================================================================== [ 197.079127] BUG: KASAN: slab-out-of-bounds in bdw_load_gamma_lut.isra.2+0x3b9/0x570 [i915] [ 197.079188] Read of size 2 at addr ffff8800d38db150 by task kms_pipe_color/1839 [ 197.079208] CPU: 2 PID: 1839 Comm: kms_pipe_color Tainted: G U 4.13.0-rc1-patser+ #5211 [ 197.079215] Hardware name: NUC5i7RYB, BIOS RYBDWi35.86A.0246.2015.0309.1355 03/09/2015 [ 197.079220] Call Trace: [ 197.079230] dump_stack+0x68/0x9e [ 197.079239] print_address_description+0x6f/0x250 [ 197.079251] kasan_report+0x216/0x370 [ 197.079374] ? bdw_load_gamma_lut.isra.2+0x3b9/0x570 [i915] [ 197.079451] ? gen8_write16+0x4e0/0x4e0 [i915] [ 197.079460] __asan_report_load2_noabort+0x14/0x20 [ 197.079535] bdw_load_gamma_lut.isra.2+0x3b9/0x570 [i915] [ 197.079612] broadwell_load_luts+0x1df/0x550 [i915] [ 197.079690] intel_color_load_luts+0x7b/0x80 [i915] [ 197.079764] intel_begin_crtc_commit+0x138/0x760 [i915] [ 197.079783] drm_atomic_helper_commit_planes_on_crtc+0x1a3/0x820 [drm_kms_helper] [ 197.079859] ? intel_pre_plane_update+0x571/0x580 [i915] [ 197.079937] intel_update_crtc+0x238/0x330 [i915] [ 197.080016] intel_update_crtcs+0x10f/0x210 [i915] [ 197.080092] intel_atomic_commit_tail+0x1552/0x3340 [i915] [ 197.080101] ? _raw_spin_unlock+0x3c/0x40 [ 197.080110] ? __queue_work+0xb40/0xbf0 [ 197.080188] ? skl_update_crtcs+0xc00/0xc00 [i915] [ 197.080195] ? trace_hardirqs_on+0xd/0x10 [ 197.080269] ? intel_atomic_commit_ready+0x128/0x13c [i915] [ 197.080329] ? __i915_sw_fence_complete+0x5b8/0x6d0 [i915] [ 197.080336] ? debug_object_activate+0x39e/0x580 [ 197.080397] ? i915_sw_fence_await+0x30/0x30 [i915] [ 197.080409] ? __might_sleep+0x15b/0x180 [ 197.080483] intel_atomic_commit+0x944/0xa70 [i915] [ 197.080490] ? refcount_dec_and_test+0x11/0x20 [ 197.080567] ? intel_atomic_commit_tail+0x3340/0x3340 [i915] [ 197.080597] ? drm_atomic_crtc_set_property+0x303/0x580 [drm] [ 197.080674] ? intel_atomic_commit_tail+0x3340/0x3340 [i915] [ 197.080704] drm_atomic_commit+0xd7/0xe0 [drm] [ 197.080722] drm_atomic_helper_crtc_set_property+0xec/0x130 [drm_kms_helper] [ 197.080749] drm_mode_crtc_set_obj_prop+0x7d/0xb0 [drm] [ 197.080775] drm_mode_obj_set_property_ioctl+0x50b/0x5d0 [drm] [ 197.080783] ? __might_fault+0x104/0x180 [ 197.080809] ? drm_mode_obj_find_prop_id+0x160/0x160 [drm] [ 197.080838] ? drm_mode_obj_find_prop_id+0x160/0x160 [drm] [ 197.080861] drm_ioctl_kernel+0x154/0x1a0 [drm] [ 197.080885] drm_ioctl+0x624/0x8f0 [drm] [ 197.080910] ? drm_mode_obj_find_prop_id+0x160/0x160 [drm] [ 197.080934] ? drm_getunique+0x210/0x210 [drm] [ 197.080943] ? __handle_mm_fault+0x1bd0/0x1ce0 [ 197.080949] ? lock_downgrade+0x610/0x610 [ 197.080957] ? __lru_cache_add+0x15a/0x180 [ 197.080967] do_vfs_ioctl+0xd92/0xe40 [ 197.080975] ? ioctl_preallocate+0x1b0/0x1b0 [ 197.080982] ? selinux_capable+0x20/0x20 [ 197.080991] ? __do_page_fault+0x7b7/0x9a0 [ 197.080997] ? lock_downgrade+0x5bb/0x610 [ 197.081007] ? security_file_ioctl+0x57/0x90 [ 197.081016] SyS_ioctl+0x4e/0x80 [ 197.081024] entry_SYSCALL_64_fastpath+0x18/0xad [ 197.081030] RIP: 0033:0x7f61f287a987 [ 197.081035] RSP: 002b:00007fff7d44d188 EFLAGS: 00000246 ORIG_RAX: 0000000000000010 [ 197.081043] RAX: ffffffffffffffda RBX: 0000000000000000 RCX: 00007f61f287a987 [ 197.081048] RDX: 00007fff7d44d1c0 RSI: 00000000c01864ba RDI: 0000000000000003 [ 197.081053] RBP: 00007f61f2b3eb00 R08: 0000000000000059 R09: 0000000000000000 [ 197.081058] R10: 0000002ea5c4a290 R11: 0000000000000246 R12: 00007f61f2b3eb58 [ 197.081063] R13: 0000000000001010 R14: 00007f61f2b3eb58 R15: 0000000000002702 Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=101659 Signed-off-by: Maarten Lankhorst Reported-by: Martin Peres Cc: Martin Peres Fixes: 82cf435b3134 ("drm/i915: Implement color management on bdw/skl/bxt/kbl") Cc: Shashank Sharma Cc: Kiran S Kumar Cc: Kausal Malladi Cc: Lionel Landwerlin Cc: Matt Roper Cc: Daniel Vetter Cc: Jani Nikula Cc: intel-gfx@lists.freedesktop.org Cc: # v4.7+ Link: https://patchwork.freedesktop.org/patch/msgid/20170724091431.24251-1-maarten.lankhorst@linux.intel.com Reviewed-by: Lionel Landwerlin (cherry picked from commit 09a92bc8773b4314e02b478e003fe5936ce85adb) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/intel_color.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/i915/intel_color.c b/drivers/gpu/drm/i915/intel_color.c index 306c6b06b330..17c4ae7e4e7c 100644 --- a/drivers/gpu/drm/i915/intel_color.c +++ b/drivers/gpu/drm/i915/intel_color.c @@ -398,6 +398,7 @@ static void bdw_load_gamma_lut(struct drm_crtc_state *state, u32 offset) } /* Program the max register to clamp values > 1.0. */ + i = lut_size - 1; I915_WRITE(PREC_PAL_GC_MAX(pipe, 0), drm_color_lut_extract(lut[i].red, 16)); I915_WRITE(PREC_PAL_GC_MAX(pipe, 1), -- cgit v1.2.3 From 00e06297b351d286199c6cf542ea70b8fbabafee Mon Sep 17 00:00:00 2001 From: Thomas Petazzoni Date: Thu, 3 Aug 2017 21:16:15 +0200 Subject: MIPS: mm: remove duplicate "const" qualifier on insn_table MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fixes the following gcc 7.x build error: arch/mips/mm/uasm-mips.c:51:26: error: duplicate ‘const’ declaration specifier [-Werror=duplicate-decl-specifier] static const struct insn const insn_table[insn_invalid] = { Signed-off-by: Thomas Petazzoni Fixes: ce807d5f67ed ("MIPS: Optimize uasm insn lookup.") Cc: David Daney Cc: linux-mips@linux-mips.org Cc: linux-kernel@vger.kernel.org Patchwork: https://patchwork.linux-mips.org/patch/16926/ Signed-off-by: Ralf Baechle --- arch/mips/mm/uasm-mips.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/mips/mm/uasm-mips.c b/arch/mips/mm/uasm-mips.c index 3f74f6c1f065..9fea6c6bbf49 100644 --- a/arch/mips/mm/uasm-mips.c +++ b/arch/mips/mm/uasm-mips.c @@ -48,7 +48,7 @@ #include "uasm.c" -static const struct insn const insn_table[insn_invalid] = { +static const struct insn insn_table[insn_invalid] = { [insn_addiu] = {M(addiu_op, 0, 0, 0, 0, 0), RS | RT | SIMM}, [insn_addu] = {M(spec_op, 0, 0, 0, 0, addu_op), RS | RT | RD}, [insn_and] = {M(spec_op, 0, 0, 0, 0, and_op), RS | RT | RD}, -- cgit v1.2.3 From 6f542ebeaee0ee552a902ce3892220fc22c7ec8e Mon Sep 17 00:00:00 2001 From: Matija Glavinic Pecotic Date: Thu, 3 Aug 2017 08:20:22 +0200 Subject: MIPS: Fix race on setting and getting cpu_online_mask While testing cpu hoptlug (cpu down and up in loops) on kernel 4.4, it was observed that occasionally check for cpu online will fail in kernel/cpu.c, _cpu_up: https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux-stable.git/tree/kernel/cpu.c?h=v4.4.79#n485 518 /* Arch-specific enabling code. */ 519 ret = __cpu_up(cpu, idle); 520 521 if (ret != 0) 522 goto out_notify; 523 BUG_ON(!cpu_online(cpu)); Reason is race between start_secondary and _cpu_up. cpu_callin_map is set before cpu_online_mask. In __cpu_up, cpu_callin_map is waited for, but cpu online mask is not, resulting in race in which secondary processor started and set cpu_callin_map, but not yet set the online mask,resulting in above BUG being hit. Upstream differs in the area. cpu_online check is in bringup_wait_for_ap, which is after cpu reached AP_ONLINE_IDLE,where secondary passed its start function. Nonetheless, fix makes start_secondary safe and not depending on other locks throughout the code. It protects as well against cpu_online checks put in between sometimes in the future. Fix this by moving completion after all flags are set. Signed-off-by: Matija Glavinic Pecotic Cc: Alexander Sverdlin Cc: linux-mips@linux-mips.org Patchwork: https://patchwork.linux-mips.org/patch/16925/ Signed-off-by: Ralf Baechle --- arch/mips/kernel/smp.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/mips/kernel/smp.c b/arch/mips/kernel/smp.c index 770d4d1516cb..6bace7695788 100644 --- a/arch/mips/kernel/smp.c +++ b/arch/mips/kernel/smp.c @@ -376,9 +376,6 @@ asmlinkage void start_secondary(void) cpumask_set_cpu(cpu, &cpu_coherent_mask); notify_cpu_starting(cpu); - complete(&cpu_running); - synchronise_count_slave(cpu); - set_cpu_online(cpu, true); set_cpu_sibling_map(cpu); @@ -386,6 +383,9 @@ asmlinkage void start_secondary(void) calculate_cpu_foreign_map(); + complete(&cpu_running); + synchronise_count_slave(cpu); + /* * irq will be enabled in ->smp_finish(), enabling it too early * is dangerous. -- cgit v1.2.3 From 33a73649acc412848996ae0921d577f33eb54af5 Mon Sep 17 00:00:00 2001 From: Bartosz Golaszewski Date: Wed, 2 Aug 2017 17:04:04 +0200 Subject: MIPS: gitignore: ignore generated .c files Add ashldi3.c and bswapsi.c to the list of ignored files. Signed-off-by: Bartosz Golaszewski Reviewed-by: James Hogan Cc: linux-mips@linux-mips.org Cc: linux-kernel@vger.kernel.org Patchwork: https://patchwork.linux-mips.org/patch/16905/ Signed-off-by: Ralf Baechle --- arch/mips/boot/compressed/.gitignore | 2 ++ 1 file changed, 2 insertions(+) create mode 100644 arch/mips/boot/compressed/.gitignore diff --git a/arch/mips/boot/compressed/.gitignore b/arch/mips/boot/compressed/.gitignore new file mode 100644 index 000000000000..ebae133f1d00 --- /dev/null +++ b/arch/mips/boot/compressed/.gitignore @@ -0,0 +1,2 @@ +ashldi3.c +bswapsi.c -- cgit v1.2.3 From 81a67e52763d1db6b3200c648d1efa16daddc536 Mon Sep 17 00:00:00 2001 From: "Steven J. Hill" Date: Wed, 2 Aug 2017 12:39:28 -0500 Subject: MIPS: Octeon: Fix broken EDAC driver. Commit "MIPS: Octeon: Remove unused L2C types and macros." broke the the EDAC driver. Bring back 'cvmx-l2d-defs.h' file and the missing types for L2C. Fixes: 15f6847923a8 ("MIPS: Octeon: Remove unused L2C types and macros.") Fixes: 15f6847923a8 ("MIPS: Octeon: Remove unused L2C types and macros.") Signed-off-by: Steven J. Hill Reviewed-by: James Hogan Cc: linux-mips@linux-mips.org Cc: # 4.12+ Patchwork: https://patchwork.linux-mips.org/patch/16906/ Signed-off-by: Ralf Baechle --- arch/mips/include/asm/octeon/cvmx-l2c-defs.h | 37 ++++++++++++++++- arch/mips/include/asm/octeon/cvmx-l2d-defs.h | 60 ++++++++++++++++++++++++++++ arch/mips/include/asm/octeon/cvmx.h | 1 + 3 files changed, 97 insertions(+), 1 deletion(-) create mode 100644 arch/mips/include/asm/octeon/cvmx-l2d-defs.h diff --git a/arch/mips/include/asm/octeon/cvmx-l2c-defs.h b/arch/mips/include/asm/octeon/cvmx-l2c-defs.h index d045973ddb33..3ea84acf1814 100644 --- a/arch/mips/include/asm/octeon/cvmx-l2c-defs.h +++ b/arch/mips/include/asm/octeon/cvmx-l2c-defs.h @@ -33,6 +33,10 @@ #define CVMX_L2C_DBG (CVMX_ADD_IO_SEG(0x0001180080000030ull)) #define CVMX_L2C_CFG (CVMX_ADD_IO_SEG(0x0001180080000000ull)) #define CVMX_L2C_CTL (CVMX_ADD_IO_SEG(0x0001180080800000ull)) +#define CVMX_L2C_ERR_TDTX(block_id) \ + (CVMX_ADD_IO_SEG(0x0001180080A007E0ull) + ((block_id) & 3) * 0x40000ull) +#define CVMX_L2C_ERR_TTGX(block_id) \ + (CVMX_ADD_IO_SEG(0x0001180080A007E8ull) + ((block_id) & 3) * 0x40000ull) #define CVMX_L2C_LCKBASE (CVMX_ADD_IO_SEG(0x0001180080000058ull)) #define CVMX_L2C_LCKOFF (CVMX_ADD_IO_SEG(0x0001180080000060ull)) #define CVMX_L2C_PFCTL (CVMX_ADD_IO_SEG(0x0001180080000090ull)) @@ -66,9 +70,40 @@ ((offset) & 1) * 8) #define CVMX_L2C_WPAR_PPX(offset) (CVMX_ADD_IO_SEG(0x0001180080840000ull) + \ ((offset) & 31) * 8) -#define CVMX_L2D_FUS3 (CVMX_ADD_IO_SEG(0x00011800800007B8ull)) +union cvmx_l2c_err_tdtx { + uint64_t u64; + struct cvmx_l2c_err_tdtx_s { + __BITFIELD_FIELD(uint64_t dbe:1, + __BITFIELD_FIELD(uint64_t sbe:1, + __BITFIELD_FIELD(uint64_t vdbe:1, + __BITFIELD_FIELD(uint64_t vsbe:1, + __BITFIELD_FIELD(uint64_t syn:10, + __BITFIELD_FIELD(uint64_t reserved_22_49:28, + __BITFIELD_FIELD(uint64_t wayidx:18, + __BITFIELD_FIELD(uint64_t reserved_2_3:2, + __BITFIELD_FIELD(uint64_t type:2, + ;))))))))) + } s; +}; + +union cvmx_l2c_err_ttgx { + uint64_t u64; + struct cvmx_l2c_err_ttgx_s { + __BITFIELD_FIELD(uint64_t dbe:1, + __BITFIELD_FIELD(uint64_t sbe:1, + __BITFIELD_FIELD(uint64_t noway:1, + __BITFIELD_FIELD(uint64_t reserved_56_60:5, + __BITFIELD_FIELD(uint64_t syn:6, + __BITFIELD_FIELD(uint64_t reserved_22_49:28, + __BITFIELD_FIELD(uint64_t wayidx:15, + __BITFIELD_FIELD(uint64_t reserved_2_6:5, + __BITFIELD_FIELD(uint64_t type:2, + ;))))))))) + } s; +}; + union cvmx_l2c_cfg { uint64_t u64; struct cvmx_l2c_cfg_s { diff --git a/arch/mips/include/asm/octeon/cvmx-l2d-defs.h b/arch/mips/include/asm/octeon/cvmx-l2d-defs.h new file mode 100644 index 000000000000..a951ad5d65ad --- /dev/null +++ b/arch/mips/include/asm/octeon/cvmx-l2d-defs.h @@ -0,0 +1,60 @@ +/***********************license start*************** + * Author: Cavium Networks + * + * Contact: support@caviumnetworks.com + * This file is part of the OCTEON SDK + * + * Copyright (c) 2003-2017 Cavium, Inc. + * + * This file is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, Version 2, as + * published by the Free Software Foundation. + * + * This file is distributed in the hope that it will be useful, but + * AS-IS and WITHOUT ANY WARRANTY; without even the implied warranty + * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE, TITLE, or + * NONINFRINGEMENT. See the GNU General Public License for more + * details. + * + * You should have received a copy of the GNU General Public License + * along with this file; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * or visit http://www.gnu.org/licenses/. + * + * This file may also be available under a different license from Cavium. + * Contact Cavium Networks for more information + ***********************license end**************************************/ + +#ifndef __CVMX_L2D_DEFS_H__ +#define __CVMX_L2D_DEFS_H__ + +#define CVMX_L2D_ERR (CVMX_ADD_IO_SEG(0x0001180080000010ull)) +#define CVMX_L2D_FUS3 (CVMX_ADD_IO_SEG(0x00011800800007B8ull)) + + +union cvmx_l2d_err { + uint64_t u64; + struct cvmx_l2d_err_s { + __BITFIELD_FIELD(uint64_t reserved_6_63:58, + __BITFIELD_FIELD(uint64_t bmhclsel:1, + __BITFIELD_FIELD(uint64_t ded_err:1, + __BITFIELD_FIELD(uint64_t sec_err:1, + __BITFIELD_FIELD(uint64_t ded_intena:1, + __BITFIELD_FIELD(uint64_t sec_intena:1, + __BITFIELD_FIELD(uint64_t ecc_ena:1, + ;))))))) + } s; +}; + +union cvmx_l2d_fus3 { + uint64_t u64; + struct cvmx_l2d_fus3_s { + __BITFIELD_FIELD(uint64_t reserved_40_63:24, + __BITFIELD_FIELD(uint64_t ema_ctl:3, + __BITFIELD_FIELD(uint64_t reserved_34_36:3, + __BITFIELD_FIELD(uint64_t q3fus:34, + ;)))) + } s; +}; + +#endif diff --git a/arch/mips/include/asm/octeon/cvmx.h b/arch/mips/include/asm/octeon/cvmx.h index 9742202f2a32..e638735cc3ac 100644 --- a/arch/mips/include/asm/octeon/cvmx.h +++ b/arch/mips/include/asm/octeon/cvmx.h @@ -62,6 +62,7 @@ enum cvmx_mips_space { #include #include #include +#include #include #include #include -- cgit v1.2.3 From bed58464303fabdbf126544cbdd95a9e1c2e9949 Mon Sep 17 00:00:00 2001 From: "Steven J. Hill" Date: Wed, 2 Aug 2017 12:39:50 -0500 Subject: MIPS: OCTEON: Fix USB platform code breakage. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fix build error when CONFIG_SMP is turned off: CC [M] arch/mips/cavium-octeon/octeon-usb.o arch/mips/cavium-octeon/octeon-usb.c: In function ‘dwc3_octeon_device_init’: arch/mips/cavium-octeon/octeon-usb.c:540:4: error: implicit declaration of function ‘devm_iounmap’ [-Werror=implicit-function-declaration] devm_iounmap(&pdev->dev, base); Signed-off-by: Steven J. Hill Reviewed-by: James Hogan Tested-by: Matt Redfearn Cc: linux-mips@linux-mips.org Patchwork: https://patchwork.linux-mips.org/patch/16907/ Signed-off-by: Ralf Baechle --- arch/mips/cavium-octeon/octeon-usb.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/mips/cavium-octeon/octeon-usb.c b/arch/mips/cavium-octeon/octeon-usb.c index 542be1cd0f32..bfdfaf32d2c4 100644 --- a/arch/mips/cavium-octeon/octeon-usb.c +++ b/arch/mips/cavium-octeon/octeon-usb.c @@ -13,9 +13,9 @@ #include #include #include +#include #include -#include /* USB Control Register */ union cvm_usbdrd_uctl_ctl { -- cgit v1.2.3 From ae5b0675942ab30cde96099c68a2290bd1aafcca Mon Sep 17 00:00:00 2001 From: Paul Burton Date: Tue, 1 Aug 2017 13:32:57 -0700 Subject: Revert "MIPS: Don't unnecessarily include kmalloc.h into ." Commit 296e46db0073 ("MIPS: Don't unnecessarily include kmalloc.h into .") claimed that the inclusion of the machine's kmalloc.h from asm/cache.h is unnecessary, but this is not true. Without including kmalloc.h we don't get a definition for ARCH_DMA_MINALIGN, which means we no longer suitably align DMA. Further to this the definition of ARCH_KMALLOC_MINALIGN provided by linux/slab.h ends up being set to the alignment of an unsigned long long value rather than to ARCH_DMA_MINALIGN, which means that buffers allocated using kmalloc may no longer be safely aligned for use with DMA. Fix this by re-adding the include of kmalloc.h in asm/cache.h. This reverts commit 296e46db0073 ("MIPS: Don't unnecessarily include kmalloc.h into .") Signed-off-by: Paul Burton Fixes: 296e46db0073 ("MIPS: Don't unnecessarily include kmalloc.h into .") Cc: linux-mips@linux-mips.org Cc: stable # v4.12+ Patchwork: https://patchwork.linux-mips.org/patch/16895/ Signed-off-by: Ralf Baechle --- arch/mips/include/asm/cache.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/mips/include/asm/cache.h b/arch/mips/include/asm/cache.h index fc67947ed658..8b14c2706aa5 100644 --- a/arch/mips/include/asm/cache.h +++ b/arch/mips/include/asm/cache.h @@ -9,6 +9,8 @@ #ifndef _ASM_CACHE_H #define _ASM_CACHE_H +#include + #define L1_CACHE_SHIFT CONFIG_MIPS_L1_CACHE_SHIFT #define L1_CACHE_BYTES (1 << L1_CACHE_SHIFT) -- cgit v1.2.3 From b5fa57ddc4a2492441a1391f07d5c8a282271249 Mon Sep 17 00:00:00 2001 From: Lionel Landwerlin Date: Thu, 3 Aug 2017 17:58:07 +0100 Subject: drm/i915/perf: fix flex eu registers programming We were reserving fewer dwords in the ring than necessary. Indeed we're always writing all registers once, so discard the actual number of registers given by the user and just program the whitelisted ones once. Fixes: 19f81df2859e ("drm/i915/perf: Add OA unit support for Gen 8+") Reported-by: Matthew Auld Signed-off-by: Lionel Landwerlin Reviewed-by: Matthew Auld Cc: # v4.12+ Link: https://patchwork.freedesktop.org/patch/msgid/20170803165812.2373-6-lionel.g.landwerlin@intel.com (cherry picked from commit 01d928e9a1644eb2e28f684905f888e700c7b9dc) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/i915_perf.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c index 9cd22f83b0cf..f33d90226704 100644 --- a/drivers/gpu/drm/i915/i915_perf.c +++ b/drivers/gpu/drm/i915/i915_perf.c @@ -1601,11 +1601,11 @@ static int gen8_emit_oa_config(struct drm_i915_gem_request *req) u32 *cs; int i; - cs = intel_ring_begin(req, n_flex_regs * 2 + 4); + cs = intel_ring_begin(req, ARRAY_SIZE(flex_mmio) * 2 + 4); if (IS_ERR(cs)) return PTR_ERR(cs); - *cs++ = MI_LOAD_REGISTER_IMM(n_flex_regs + 1); + *cs++ = MI_LOAD_REGISTER_IMM(ARRAY_SIZE(flex_mmio) + 1); *cs++ = i915_mmio_reg_offset(GEN8_OACTXCONTROL); *cs++ = (dev_priv->perf.oa.period_exponent << GEN8_OA_TIMER_PERIOD_SHIFT) | -- cgit v1.2.3 From cd82f37a9ddaaafb33d8bc3f44857edbad5d52bf Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 4 Aug 2017 11:41:35 +0100 Subject: drm/i915/shrinker: Wrap need_resched() inside preempt-disable In order for us to successfully detect the end of a timeslice, preemption must be disabled. Otherwise, inside the loop we may be preempted many times without our noticing, and each time our timeslice will be reset, invalidating need_resched() Reported-by: Joonas Lahtinen Reported-by: Tomi Sarvela Fixes: 290271de34f6 ("drm/i915: Spin for struct_mutex inside shrinker") Signed-off-by: Chris Wilson Cc: Mika Kuoppala Cc: Joonas Lahtinen Cc: # v4.13-rc1+ Link: https://patchwork.freedesktop.org/patch/msgid/20170804104135.26805-1-chris@chris-wilson.co.uk Tested-by: Joonas Lahtinen Reviewed-by: Joonas Lahtinen (cherry picked from commit 6cb0c6ad9e07f2c7971c4e8e0d9b7ceba151a925) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/i915_gem_shrinker.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem_shrinker.c b/drivers/gpu/drm/i915/i915_gem_shrinker.c index 1032f98add11..77fb39808131 100644 --- a/drivers/gpu/drm/i915/i915_gem_shrinker.c +++ b/drivers/gpu/drm/i915/i915_gem_shrinker.c @@ -43,16 +43,21 @@ static bool shrinker_lock(struct drm_i915_private *dev_priv, bool *unlock) return true; case MUTEX_TRYLOCK_FAILED: + *unlock = false; + preempt_disable(); do { cpu_relax(); if (mutex_trylock(&dev_priv->drm.struct_mutex)) { - case MUTEX_TRYLOCK_SUCCESS: *unlock = true; - return true; + break; } } while (!need_resched()); + preempt_enable(); + return *unlock; - return false; + case MUTEX_TRYLOCK_SUCCESS: + *unlock = true; + return true; } BUG(); -- cgit v1.2.3 From 1e2ba788787c86f527eca6ffd9adb97d691a810e Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Wed, 31 May 2017 11:33:55 +0300 Subject: drm/i915: fix backlight invert for non-zero minimum brightness When we started following the backlight minimum brightness in 6dda730e55f4 ("drm/i915: respect the VBT minimum backlight brightness") we overlooked the brightness invert quirk. Even if we invert the brightness, we need to take the min limit into account. We probably missed this because the invert has only been required on gen4 for proper operation. Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=101127 Fixes: 6dda730e55f4 ("drm/i915: respect the VBT minimum backlight brightness") Cc: Daniel Vetter Reviewed-by: Daniel Vetter Signed-off-by: Jani Nikula Link: https://patchwork.freedesktop.org/patch/msgid/20170531083355.7898-1-jani.nikula@intel.com (cherry picked from commit e9d7486eac949f2a8d121657e536c8abdd4ea088) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/intel_panel.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/intel_panel.c b/drivers/gpu/drm/i915/intel_panel.c index 96c2cbd81869..593349be8b9d 100644 --- a/drivers/gpu/drm/i915/intel_panel.c +++ b/drivers/gpu/drm/i915/intel_panel.c @@ -469,7 +469,7 @@ static u32 intel_panel_compute_brightness(struct intel_connector *connector, if (i915.invert_brightness > 0 || dev_priv->quirks & QUIRK_INVERT_BRIGHTNESS) { - return panel->backlight.max - val; + return panel->backlight.max - val + panel->backlight.min; } return val; -- cgit v1.2.3 From 44a12806d010944a5727f1dc99123121e3e2c8c6 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Mon, 7 Aug 2017 21:25:01 +1000 Subject: Revert "powerpc/64: Avoid restore_math call if possible in syscall exit" This reverts commit bc4f65e4cf9d6cc43e0e9ba0b8648cf9201cd55f. As reported by Andreas, this commit is causing unrecoverable SLB misses in the system call exit path: Unrecoverable exception 4100 at c00000000000a1ec Oops: Unrecoverable exception, sig: 6 [#1] SMP NR_CPUS=2 PowerMac ... CPU: 0 PID: 18626 Comm: rm Not tainted 4.13.0-rc3 #1 task: c00000018335e080 task.stack: c000000139e50000 NIP: c00000000000a1ec LR: c00000000000a118 CTR: 0000000000000000 REGS: c000000139e53bb0 TRAP: 4100 Not tainted (4.13.0-rc3) MSR: 9000000000001030 CR: 24000044 XER: 20000000 SOFTE: 1 GPR00: 0000000000000000 c000000139e53e30 c000000000abb500 fffffffffffffffe GPR04: c0000001eb866298 0000000000000000 0000000000000000 c00000018335e080 GPR08: 900000000000d032 0000000000000000 0000000000000002 fffffffffffff001 GPR12: c000000139e50000 c00000000ffff000 00003fffa8c0dca0 00003fffa8c0dc88 GPR16: 0000000010000000 0000000000000001 00003fffa8c0eaa0 0000000000000000 GPR20: 00003fffa8c27528 00003fffa8c27b00 0000000000000000 0000000000000000 GPR24: 00003fffa8c0d918 00003ffff1b3efa0 00003fffa8c26d68 0000000000000000 GPR28: 00003fffa8c249e8 00003fffa8c263d0 00003fffa8c27550 00003ffff1b3ef10 NIP [c00000000000a1ec] system_call_exit+0xc0/0x21c LR [c00000000000a118] system_call+0x58/0x6c Call Trace: [c000000139e53e30] [c00000000000a118] system_call+0x58/0x6c (unreliable) Instruction dump: 64a51000 7c6300d0 f8a101a0 4bffff9c 3c000000 60000006 780007c6 64000000 60000000 7c004039 4082001c e8ed0170 <88070b78> 88c70b79 7c003214 2c200000 This is caused by us trying to load THREAD_LOAD_FP with MSR_RI=0, and taking an SLB miss on the thread struct. Reported-by: Andreas Schwab Diagnosed-by: Nicholas Piggin Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/entry_64.S | 60 +++++++++++++----------------------------- arch/powerpc/kernel/process.c | 4 --- 2 files changed, 18 insertions(+), 46 deletions(-) diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S index 49d8422767b4..e925c1c99c71 100644 --- a/arch/powerpc/kernel/entry_64.S +++ b/arch/powerpc/kernel/entry_64.S @@ -223,17 +223,27 @@ system_call_exit: andi. r0,r9,(_TIF_SYSCALL_DOTRACE|_TIF_SINGLESTEP|_TIF_USER_WORK_MASK|_TIF_PERSYSCALL_MASK) bne- .Lsyscall_exit_work - /* If MSR_FP and MSR_VEC are set in user msr, then no need to restore */ - li r7,MSR_FP + andi. r0,r8,MSR_FP + beq 2f #ifdef CONFIG_ALTIVEC - oris r7,r7,MSR_VEC@h + andis. r0,r8,MSR_VEC@h + bne 3f #endif - and r0,r8,r7 - cmpd r0,r7 - bne .Lsyscall_restore_math -.Lsyscall_restore_math_cont: +2: addi r3,r1,STACK_FRAME_OVERHEAD +#ifdef CONFIG_PPC_BOOK3S + li r10,MSR_RI + mtmsrd r10,1 /* Restore RI */ +#endif + bl restore_math +#ifdef CONFIG_PPC_BOOK3S + li r11,0 + mtmsrd r11,1 +#endif + ld r8,_MSR(r1) + ld r3,RESULT(r1) + li r11,-MAX_ERRNO - cmpld r3,r11 +3: cmpld r3,r11 ld r5,_CCR(r1) bge- .Lsyscall_error .Lsyscall_error_cont: @@ -267,40 +277,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR) std r5,_CCR(r1) b .Lsyscall_error_cont -.Lsyscall_restore_math: - /* - * Some initial tests from restore_math to avoid the heavyweight - * C code entry and MSR manipulations. - */ - LOAD_REG_IMMEDIATE(r0, MSR_TS_MASK) - and. r0,r0,r8 - bne 1f - - ld r7,PACACURRENT(r13) - lbz r0,THREAD+THREAD_LOAD_FP(r7) -#ifdef CONFIG_ALTIVEC - lbz r6,THREAD+THREAD_LOAD_VEC(r7) - add r0,r0,r6 -#endif - cmpdi r0,0 - beq .Lsyscall_restore_math_cont - -1: addi r3,r1,STACK_FRAME_OVERHEAD -#ifdef CONFIG_PPC_BOOK3S - li r10,MSR_RI - mtmsrd r10,1 /* Restore RI */ -#endif - bl restore_math -#ifdef CONFIG_PPC_BOOK3S - li r11,0 - mtmsrd r11,1 -#endif - /* Restore volatiles, reload MSR from updated one */ - ld r8,_MSR(r1) - ld r3,RESULT(r1) - li r11,-MAX_ERRNO - b .Lsyscall_restore_math_cont - /* Traced system call support */ .Lsyscall_dotrace: bl save_nvgprs diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index 9f3e2c932dcc..ec480966f9bf 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -511,10 +511,6 @@ void restore_math(struct pt_regs *regs) { unsigned long msr; - /* - * Syscall exit makes a similar initial check before branching - * to restore_math. Keep them in synch. - */ if (!msr_tm_active(regs->msr) && !current->thread.load_fp && !loadvec(current->thread)) return; -- cgit v1.2.3 From b399ee28c29c07f6a7ad87dade9148828757e6e9 Mon Sep 17 00:00:00 2001 From: Goran Ferenc Date: Thu, 27 Jul 2017 18:08:47 +0200 Subject: MIPS: VDSO: Fix clobber lists in fallback code paths Extend clobber lists to include all GP registers. Fixes: 0b523a85e134 ("MIPS: VDSO: Add implementation of gettimeofday() fallback") Signed-off-by: Miodrag Dinic Signed-off-by: Goran Ferenc Signed-off-by: Aleksandar Markovic Reviewed-by: James Hogan Cc: Bo Hu Cc: Douglas Leung Cc: James Hogan Cc: Jin Qian Cc: Paul Burton Cc: Petar Jovanovic Cc: Raghu Gandham Cc: linux-mips@linux-mips.org Cc: linux-kernel@vger.kernel.org Patchwork: https://patchwork.linux-mips.org/patch/16879/ Signed-off-by: Ralf Baechle --- arch/mips/vdso/gettimeofday.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/arch/mips/vdso/gettimeofday.c b/arch/mips/vdso/gettimeofday.c index 974276e828b2..e2690d7ca4dd 100644 --- a/arch/mips/vdso/gettimeofday.c +++ b/arch/mips/vdso/gettimeofday.c @@ -35,7 +35,8 @@ static __always_inline long gettimeofday_fallback(struct timeval *_tv, " syscall\n" : "=r" (ret), "=r" (error) : "r" (tv), "r" (tz), "r" (nr) - : "memory"); + : "$1", "$3", "$8", "$9", "$10", "$11", "$12", "$13", + "$14", "$15", "$24", "$25", "hi", "lo", "memory"); return error ? -ret : ret; } @@ -55,7 +56,8 @@ static __always_inline long clock_gettime_fallback(clockid_t _clkid, " syscall\n" : "=r" (ret), "=r" (error) : "r" (clkid), "r" (ts), "r" (nr) - : "memory"); + : "$1", "$3", "$8", "$9", "$10", "$11", "$12", "$13", + "$14", "$15", "$24", "$25", "hi", "lo", "memory"); return error ? -ret : ret; } -- cgit v1.2.3 From 9ac6e7ccc11e795a6e3eecc1f59346a99e51cd07 Mon Sep 17 00:00:00 2001 From: Gregory CLEMENT Date: Tue, 1 Aug 2017 17:57:19 +0200 Subject: pinctrl: armada-37xx: Fix the pin 23 on south bridge Pin 23 on South bridge does not belong to the rgmii group. It belongs to a separate group which can have 3 functions. Due to this the fix also have to update the way the functions are managed. Until now each groups used NB_FUNCS(which was 2) functions. For the mpp23, 3 functions are available but it is the only group which needs it, so on the loop involving NB_FUNCS an extra test was added to handle only the functions added. The bug was visible with the merge of the commit 07d065abf93d "arm64: dts: marvell: armada-3720-db: Add vqmmc regulator for SD slot", the gpio regulator used the gpio 23, due to this the whole rgmii group was setup to gpio which broke the Ethernet support on the Armada 3720 DB board. Thanks to this patch, the UHS SD cards (which need the vqmmc) _and_ the Ethernet work again. Cc: stable@vger.kernel.org Fixes: 87466ccd9401 ("pinctrl: armada-37xx: Add pin controller support for Armada 37xx") Signed-off-by: Gregory CLEMENT Signed-off-by: Linus Walleij --- drivers/pinctrl/mvebu/pinctrl-armada-37xx.c | 23 +++++++++++++++++------ 1 file changed, 17 insertions(+), 6 deletions(-) diff --git a/drivers/pinctrl/mvebu/pinctrl-armada-37xx.c b/drivers/pinctrl/mvebu/pinctrl-armada-37xx.c index f024e25787fc..c95c76ecc3f7 100644 --- a/drivers/pinctrl/mvebu/pinctrl-armada-37xx.c +++ b/drivers/pinctrl/mvebu/pinctrl-armada-37xx.c @@ -37,7 +37,7 @@ #define IRQ_STATUS 0x10 #define IRQ_WKUP 0x18 -#define NB_FUNCS 2 +#define NB_FUNCS 3 #define GPIO_PER_REG 32 /** @@ -126,6 +126,16 @@ struct armada_37xx_pinctrl { .funcs = {_func1, "gpio"} \ } +#define PIN_GRP_GPIO_3(_name, _start, _nr, _mask, _v1, _v2, _v3, _f1, _f2) \ + { \ + .name = _name, \ + .start_pin = _start, \ + .npins = _nr, \ + .reg_mask = _mask, \ + .val = {_v1, _v2, _v3}, \ + .funcs = {_f1, _f2, "gpio"} \ + } + #define PIN_GRP_EXTRA(_name, _start, _nr, _mask, _v1, _v2, _start2, _nr2, \ _f1, _f2) \ { \ @@ -171,12 +181,13 @@ static struct armada_37xx_pin_group armada_37xx_sb_groups[] = { PIN_GRP_GPIO("usb32_drvvbus0", 0, 1, BIT(0), "drvbus"), PIN_GRP_GPIO("usb2_drvvbus1", 1, 1, BIT(1), "drvbus"), PIN_GRP_GPIO("sdio_sb", 24, 6, BIT(2), "sdio"), - PIN_GRP_EXTRA("rgmii", 6, 12, BIT(3), 0, BIT(3), 23, 1, "mii", "gpio"), + PIN_GRP_GPIO("rgmii", 6, 12, BIT(3), "mii"), PIN_GRP_GPIO("pcie1", 3, 2, BIT(4), "pcie"), PIN_GRP_GPIO("ptp", 20, 3, BIT(5), "ptp"), PIN_GRP("ptp_clk", 21, 1, BIT(6), "ptp", "mii"), PIN_GRP("ptp_trig", 22, 1, BIT(7), "ptp", "mii"), - PIN_GRP("mii_col", 23, 1, BIT(8), "mii", "mii_err"), + PIN_GRP_GPIO_3("mii_col", 23, 1, BIT(8) | BIT(14), 0, BIT(8), BIT(14), + "mii", "mii_err"), }; const struct armada_37xx_pin_data armada_37xx_pin_nb = { @@ -208,7 +219,7 @@ static int armada_37xx_get_func_reg(struct armada_37xx_pin_group *grp, { int f; - for (f = 0; f < NB_FUNCS; f++) + for (f = 0; (f < NB_FUNCS) && grp->funcs[f]; f++) if (!strcmp(grp->funcs[f], func)) return f; @@ -795,7 +806,7 @@ static int armada_37xx_fill_group(struct armada_37xx_pinctrl *info) for (j = 0; j < grp->extra_npins; j++) grp->pins[i+j] = grp->extra_pin + j; - for (f = 0; f < NB_FUNCS; f++) { + for (f = 0; (f < NB_FUNCS) && grp->funcs[f]; f++) { int ret; /* check for unique functions and count groups */ ret = armada_37xx_add_function(info->funcs, &funcsize, @@ -847,7 +858,7 @@ static int armada_37xx_fill_func(struct armada_37xx_pinctrl *info) struct armada_37xx_pin_group *gp = &info->groups[g]; int f; - for (f = 0; f < NB_FUNCS; f++) { + for (f = 0; (f < NB_FUNCS) && gp->funcs[f]; f++) { if (strcmp(gp->funcs[f], name) == 0) { *groups = gp->name; groups++; -- cgit v1.2.3 From 6b67c3906cd74d88da4f6717b4b294bd52fce56a Mon Sep 17 00:00:00 2001 From: Gregory CLEMENT Date: Tue, 1 Aug 2017 17:57:20 +0200 Subject: pinctrl: armada-37xx: Fix number of pin in south bridge On the south bridge we have pin from to 29, so it gives 30 pins (and not 29). Without this patch the kernel complain with the following traces: cat /sys/kernel/debug/pinctrl/d0018800.pinctrl/pingroups [ 154.530205] armada-37xx-pinctrl d0018800.pinctrl: failed to get pin(29) name [ 154.537567] ------------[ cut here ]------------ [ 154.542348] WARNING: CPU: 1 PID: 1347 at /home/gclement/open/kernel/marvell-mainline-linux/drivers/pinctrl/core.c:1610 pinctrl_groups_show+0x15c/0x1a0 [ 154.555918] Modules linked in: [ 154.558890] CPU: 1 PID: 1347 Comm: cat Tainted: G W 4.13.0-rc1-00001-g19e1b9fa219d #525 [ 154.568316] Hardware name: Marvell Armada 3720 Development Board DB-88F3720-DDR3 (DT) [ 154.576311] task: ffff80001d32d100 task.stack: ffff80001bdc0000 [ 154.583048] PC is at pinctrl_groups_show+0x15c/0x1a0 [ 154.587816] LR is at pinctrl_groups_show+0x148/0x1a0 [ 154.592847] pc : [] lr : [] pstate: 00000145 [ 154.600840] sp : ffff80001bdc3c80 [ 154.604255] x29: ffff80001bdc3c80 x28: 00000000f7750000 [ 154.609825] x27: ffff80001d05d198 x26: 0000000000000009 [ 154.615224] x25: ffff0000089ead20 x24: 0000000000000002 [ 154.620705] x23: ffff000008c8e1d0 x22: ffff80001be55700 [ 154.626187] x21: ffff80001d05d100 x20: 0000000000000005 [ 154.631667] x19: 0000000000000006 x18: 0000000000000010 [ 154.637238] x17: 0000000000000000 x16: ffff0000081fc4b8 [ 154.642726] x15: 0000000000000006 x14: ffff0000899e537f [ 154.648214] x13: ffff0000099e538d x12: 206f742064656c69 [ 154.653613] x11: 6166203a6c727463 x10: 0000000005f5e0ff [ 154.659094] x9 : ffff80001bdc38c0 x8 : 286e697020746567 [ 154.664576] x7 : ffff000008551870 x6 : 000000000000011b [ 154.670146] x5 : 0000000000000000 x4 : 0000000000000000 [ 154.675544] x3 : 0000000000000000 x2 : 0000000000000000 [ 154.681025] x1 : ffff000008c8e1d0 x0 : ffff80001be55700 [ 154.686507] Call trace: [ 154.688668] Exception stack(0xffff80001bdc3ab0 to 0xffff80001bdc3be0) [ 154.695224] 3aa0: 0000000000000006 0001000000000000 [ 154.703310] 3ac0: ffff80001bdc3c80 ffff0000083e3adc ffff80001bdc3bb0 00000000ffffffd8 [ 154.711304] 3ae0: 4554535953425553 6f6674616c703d4d 4349564544006d72 6674616c702b3d45 [ 154.719478] 3b00: 313030643a6d726f 6e69702e30303838 ffff80006c727463 ffff0000089635d8 [ 154.727562] 3b20: ffff80001d1ca0cb ffff000008af0fa4 ffff80001bdc3b40 ffff000008c8e1dc [ 154.735648] 3b40: ffff80001bdc3bc0 ffff000008223174 ffff80001be55700 ffff000008c8e1d0 [ 154.743731] 3b60: 0000000000000000 0000000000000000 0000000000000000 0000000000000000 [ 154.752354] 3b80: 000000000000011b ffff000008551870 286e697020746567 ffff80001bdc38c0 [ 154.760446] 3ba0: 0000000005f5e0ff 6166203a6c727463 206f742064656c69 ffff0000099e538d [ 154.767910] 3bc0: ffff0000899e537f 0000000000000006 ffff0000081fc4b8 0000000000000000 [ 154.776085] [] pinctrl_groups_show+0x15c/0x1a0 [ 154.782823] [] seq_read+0x184/0x460 [ 154.787505] [] full_proxy_read+0x60/0xa8 [ 154.793431] [] __vfs_read+0x1c/0x110 [ 154.799001] [] vfs_read+0x84/0x140 [ 154.803860] [] SyS_read+0x44/0xa0 [ 154.808983] [] el0_svc_naked+0x24/0x28 [ 154.814459] ---[ end trace 4cbb00a92d616b95 ]--- Cc: stable@vger.kernel.org Fixes: 87466ccd9401 ("pinctrl: armada-37xx: Add pin controller support for Armada 37xx") Signed-off-by: Gregory CLEMENT Signed-off-by: Linus Walleij --- drivers/pinctrl/mvebu/pinctrl-armada-37xx.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/pinctrl/mvebu/pinctrl-armada-37xx.c b/drivers/pinctrl/mvebu/pinctrl-armada-37xx.c index c95c76ecc3f7..0c6d7812d6fd 100644 --- a/drivers/pinctrl/mvebu/pinctrl-armada-37xx.c +++ b/drivers/pinctrl/mvebu/pinctrl-armada-37xx.c @@ -198,7 +198,7 @@ const struct armada_37xx_pin_data armada_37xx_pin_nb = { }; const struct armada_37xx_pin_data armada_37xx_pin_sb = { - .nr_pins = 29, + .nr_pins = 30, .name = "GPIO2", .groups = armada_37xx_sb_groups, .ngroups = ARRAY_SIZE(armada_37xx_sb_groups), -- cgit v1.2.3 From c017d21147848fe017772764a77a7f32c5b017f9 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Thu, 27 Jul 2017 15:38:17 -0700 Subject: irqchip: brcmstb-l2: Define an irq_pm_shutdown function The Broadcom STB platforms support S5 and we allow specific hardware wake-up events to take us out of this state. Because we were not defining an irq_pm_shutdown() function pointer, we would not be correctly masking non-wakeup events, which would result in spurious wake-ups from sources that were not explicitly configured for wake-up. Fixes: 7f646e92766e ("irqchip: brcmstb-l2: Add Broadcom Set Top Box Level-2 interrupt controller") Acked-by: Gregory Fong Signed-off-by: Florian Fainelli Signed-off-by: Marc Zyngier --- drivers/irqchip/irq-brcmstb-l2.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/irqchip/irq-brcmstb-l2.c b/drivers/irqchip/irq-brcmstb-l2.c index bddf169c4b37..b009b916a292 100644 --- a/drivers/irqchip/irq-brcmstb-l2.c +++ b/drivers/irqchip/irq-brcmstb-l2.c @@ -189,6 +189,7 @@ static int __init brcmstb_l2_intc_of_init(struct device_node *np, ct->chip.irq_suspend = brcmstb_l2_intc_suspend; ct->chip.irq_resume = brcmstb_l2_intc_resume; + ct->chip.irq_pm_shutdown = brcmstb_l2_intc_suspend; if (data->can_wake) { /* This IRQ chip can wake the system, set all child interrupts -- cgit v1.2.3 From 5d996132d921c391af5f267123eca1a6a3148ecd Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Fri, 4 Aug 2017 19:26:34 +0300 Subject: pinctrl: intel: merrifield: Correct UART pin lists UART pin lists consist GPIO numbers which is simply wrong. Replace it by pin numbers. Fixes: 4e80c8f50574 ("pinctrl: intel: Add Intel Merrifield pin controller support") Signed-off-by: Andy Shevchenko Acked-by: Mika Westerberg Signed-off-by: Linus Walleij --- drivers/pinctrl/intel/pinctrl-merrifield.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/pinctrl/intel/pinctrl-merrifield.c b/drivers/pinctrl/intel/pinctrl-merrifield.c index 4d4ef42a39b5..86c4b3fab7b0 100644 --- a/drivers/pinctrl/intel/pinctrl-merrifield.c +++ b/drivers/pinctrl/intel/pinctrl-merrifield.c @@ -343,9 +343,9 @@ static const struct pinctrl_pin_desc mrfld_pins[] = { static const unsigned int mrfld_sdio_pins[] = { 50, 51, 52, 53, 54, 55, 56 }; static const unsigned int mrfld_spi5_pins[] = { 90, 91, 92, 93, 94, 95, 96 }; -static const unsigned int mrfld_uart0_pins[] = { 124, 125, 126, 127 }; -static const unsigned int mrfld_uart1_pins[] = { 128, 129, 130, 131 }; -static const unsigned int mrfld_uart2_pins[] = { 132, 133, 134, 135 }; +static const unsigned int mrfld_uart0_pins[] = { 115, 116, 117, 118 }; +static const unsigned int mrfld_uart1_pins[] = { 119, 120, 121, 122 }; +static const unsigned int mrfld_uart2_pins[] = { 123, 124, 125, 126 }; static const unsigned int mrfld_pwm0_pins[] = { 144 }; static const unsigned int mrfld_pwm1_pins[] = { 145 }; static const unsigned int mrfld_pwm2_pins[] = { 132 }; -- cgit v1.2.3 From 0cca6c8920ade95e2741b2062cf1397dc546fb0f Mon Sep 17 00:00:00 2001 From: Ludovic Desroches Date: Sun, 6 Aug 2017 16:00:05 +0200 Subject: pinctrl: generic: update references to Documentation/pinctrl.txt Update deprecated references to Documentation/pinctrl.txt since it has been moved to Documentation/driver-api/pinctl.rst. Signed-off-by: Ludovic Desroches Fixes: 5a9b73832e9e ("pinctrl.txt: move it to the driver-api book") Signed-off-by: Linus Walleij --- Documentation/gpio/gpio-legacy.txt | 2 +- MAINTAINERS | 2 +- include/linux/device.h | 2 +- include/linux/pinctrl/pinconf-generic.h | 4 ++-- 4 files changed, 5 insertions(+), 5 deletions(-) diff --git a/Documentation/gpio/gpio-legacy.txt b/Documentation/gpio/gpio-legacy.txt index b34fd94f7089..5eacc147ea87 100644 --- a/Documentation/gpio/gpio-legacy.txt +++ b/Documentation/gpio/gpio-legacy.txt @@ -459,7 +459,7 @@ pin controller? This is done by registering "ranges" of pins, which are essentially cross-reference tables. These are described in -Documentation/pinctrl.txt +Documentation/driver-api/pinctl.rst While the pin allocation is totally managed by the pinctrl subsystem, gpio (under gpiolib) is still maintained by gpio drivers. It may happen diff --git a/MAINTAINERS b/MAINTAINERS index f66488dfdbc9..2c85558aec19 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -10375,7 +10375,7 @@ L: linux-gpio@vger.kernel.org T: git git://git.kernel.org/pub/scm/linux/kernel/git/linusw/linux-pinctrl.git S: Maintained F: Documentation/devicetree/bindings/pinctrl/ -F: Documentation/pinctrl.txt +F: Documentation/driver-api/pinctl.rst F: drivers/pinctrl/ F: include/linux/pinctrl/ diff --git a/include/linux/device.h b/include/linux/device.h index 723cd54b94da..beabdbc08420 100644 --- a/include/linux/device.h +++ b/include/linux/device.h @@ -843,7 +843,7 @@ struct dev_links_info { * hibernation, system resume and during runtime PM transitions * along with subsystem-level and driver-level callbacks. * @pins: For device pin management. - * See Documentation/pinctrl.txt for details. + * See Documentation/driver-api/pinctl.rst for details. * @msi_list: Hosts MSI descriptors * @msi_domain: The generic MSI domain this device is using. * @numa_node: NUMA node this device is close to. diff --git a/include/linux/pinctrl/pinconf-generic.h b/include/linux/pinctrl/pinconf-generic.h index 231d3075815a..e91d1b6a260d 100644 --- a/include/linux/pinctrl/pinconf-generic.h +++ b/include/linux/pinctrl/pinconf-generic.h @@ -81,8 +81,8 @@ * it. * @PIN_CONFIG_OUTPUT: this will configure the pin as an output and drive a * value on the line. Use argument 1 to indicate high level, argument 0 to - * indicate low level. (Please see Documentation/pinctrl.txt, section - * "GPIO mode pitfalls" for a discussion around this parameter.) + * indicate low level. (Please see Documentation/driver-api/pinctl.rst, + * section "GPIO mode pitfalls" for a discussion around this parameter.) * @PIN_CONFIG_POWER_SOURCE: if the pin can select between different power * supplies, the argument to this parameter (on a custom format) tells * the driver which alternative power source to use. -- cgit v1.2.3 From 68fe55680d0f3342969f49412fceabb90bdfadba Mon Sep 17 00:00:00 2001 From: "Maciej W. Rozycki" Date: Sun, 30 Jul 2017 21:28:15 +0100 Subject: MIPS: DEC: Fix an int-handler.S CPU_DADDI_WORKAROUNDS regression Fix a commit 3021773c7c3e ("MIPS: DEC: Avoid la pseudo-instruction in delay slots") regression and remove assembly errors: arch/mips/dec/int-handler.S: Assembler messages: arch/mips/dec/int-handler.S:162: Error: Macro used $at after ".set noat" arch/mips/dec/int-handler.S:163: Error: Macro used $at after ".set noat" arch/mips/dec/int-handler.S:229: Error: Macro used $at after ".set noat" arch/mips/dec/int-handler.S:230: Error: Macro used $at after ".set noat" triggering with with the CPU_DADDI_WORKAROUNDS option set and the DADDIU instruction. This is because with that option in place the instruction becomes a macro, which expands to an LI/DADDU (or actually ADDIU/DADDU) sequence that uses $at as a temporary register. With CPU_DADDI_WORKAROUNDS we only support `-msym32' compilation though, and this is already enforced in arch/mips/Makefile, so choose the 32-bit expansion variant for the supported configurations and then replace the 64-bit variant with #error just in case. Fixes: 3021773c7c3e ("MIPS: DEC: Avoid la pseudo-instruction in delay slots") Signed-off-by: Maciej W. Rozycki Cc: linux-mips@linux-mips.org Cc: stable@vger.kernel.org # 4.8+ Patchwork: https://patchwork.linux-mips.org/patch/16893/ Signed-off-by: Ralf Baechle --- arch/mips/dec/int-handler.S | 34 ++++++---------------------------- 1 file changed, 6 insertions(+), 28 deletions(-) diff --git a/arch/mips/dec/int-handler.S b/arch/mips/dec/int-handler.S index 1910223a9c02..cea2bb1621e6 100644 --- a/arch/mips/dec/int-handler.S +++ b/arch/mips/dec/int-handler.S @@ -147,23 +147,12 @@ * Find irq with highest priority */ # open coded PTR_LA t1, cpu_mask_nr_tbl -#if (_MIPS_SZPTR == 32) +#if defined(CONFIG_32BIT) || defined(KBUILD_64BIT_SYM32) # open coded la t1, cpu_mask_nr_tbl lui t1, %hi(cpu_mask_nr_tbl) addiu t1, %lo(cpu_mask_nr_tbl) - -#endif -#if (_MIPS_SZPTR == 64) - # open coded dla t1, cpu_mask_nr_tbl - .set push - .set noat - lui t1, %highest(cpu_mask_nr_tbl) - lui AT, %hi(cpu_mask_nr_tbl) - daddiu t1, t1, %higher(cpu_mask_nr_tbl) - daddiu AT, AT, %lo(cpu_mask_nr_tbl) - dsll t1, 32 - daddu t1, t1, AT - .set pop +#else +#error GCC `-msym32' option required for 64-bit DECstation builds #endif 1: lw t2,(t1) nop @@ -214,23 +203,12 @@ * Find irq with highest priority */ # open coded PTR_LA t1,asic_mask_nr_tbl -#if (_MIPS_SZPTR == 32) +#if defined(CONFIG_32BIT) || defined(KBUILD_64BIT_SYM32) # open coded la t1, asic_mask_nr_tbl lui t1, %hi(asic_mask_nr_tbl) addiu t1, %lo(asic_mask_nr_tbl) - -#endif -#if (_MIPS_SZPTR == 64) - # open coded dla t1, asic_mask_nr_tbl - .set push - .set noat - lui t1, %highest(asic_mask_nr_tbl) - lui AT, %hi(asic_mask_nr_tbl) - daddiu t1, t1, %higher(asic_mask_nr_tbl) - daddiu AT, AT, %lo(asic_mask_nr_tbl) - dsll t1, 32 - daddu t1, t1, AT - .set pop +#else +#error GCC `-msym32' option required for 64-bit DECstation builds #endif 2: lw t2,(t1) nop -- cgit v1.2.3 From 41e327b586762833e48b3703d53312ac32f05f24 Mon Sep 17 00:00:00 2001 From: "zhangyi (F)" Date: Mon, 7 Aug 2017 21:35:05 +0800 Subject: quota: correct space limit check Currently we compare total space (curspace + rsvspace) with space limit in quota-tools when setting grace time and also in check_bdq(), but we missing rsvspace in somewhere else, correct them. This patch also fix incorrect zero dqb_btime and grace time updating failure when we use rsvspace(e.g. ext4 dalloc feature). Signed-off-by: zhangyi (F) Signed-off-by: Jan Kara --- fs/quota/dquot.c | 21 +++++++++++++++------ 1 file changed, 15 insertions(+), 6 deletions(-) diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c index 53a17496c5c5..566e6ef99f07 100644 --- a/fs/quota/dquot.c +++ b/fs/quota/dquot.c @@ -1124,6 +1124,10 @@ void dquot_free_reserved_space(struct dquot *dquot, qsize_t number) WARN_ON_ONCE(1); dquot->dq_dqb.dqb_rsvspace = 0; } + if (dquot->dq_dqb.dqb_curspace + dquot->dq_dqb.dqb_rsvspace <= + dquot->dq_dqb.dqb_bsoftlimit) + dquot->dq_dqb.dqb_btime = (time64_t) 0; + clear_bit(DQ_BLKS_B, &dquot->dq_flags); } static void dquot_decr_inodes(struct dquot *dquot, qsize_t number) @@ -1145,7 +1149,8 @@ static void dquot_decr_space(struct dquot *dquot, qsize_t number) dquot->dq_dqb.dqb_curspace -= number; else dquot->dq_dqb.dqb_curspace = 0; - if (dquot->dq_dqb.dqb_curspace <= dquot->dq_dqb.dqb_bsoftlimit) + if (dquot->dq_dqb.dqb_curspace + dquot->dq_dqb.dqb_rsvspace <= + dquot->dq_dqb.dqb_bsoftlimit) dquot->dq_dqb.dqb_btime = (time64_t) 0; clear_bit(DQ_BLKS_B, &dquot->dq_flags); } @@ -1381,14 +1386,18 @@ static int info_idq_free(struct dquot *dquot, qsize_t inodes) static int info_bdq_free(struct dquot *dquot, qsize_t space) { + qsize_t tspace; + + tspace = dquot->dq_dqb.dqb_curspace + dquot->dq_dqb.dqb_rsvspace; + if (test_bit(DQ_FAKE_B, &dquot->dq_flags) || - dquot->dq_dqb.dqb_curspace <= dquot->dq_dqb.dqb_bsoftlimit) + tspace <= dquot->dq_dqb.dqb_bsoftlimit) return QUOTA_NL_NOWARN; - if (dquot->dq_dqb.dqb_curspace - space <= dquot->dq_dqb.dqb_bsoftlimit) + if (tspace - space <= dquot->dq_dqb.dqb_bsoftlimit) return QUOTA_NL_BSOFTBELOW; - if (dquot->dq_dqb.dqb_curspace >= dquot->dq_dqb.dqb_bhardlimit && - dquot->dq_dqb.dqb_curspace - space < dquot->dq_dqb.dqb_bhardlimit) + if (tspace >= dquot->dq_dqb.dqb_bhardlimit && + tspace - space < dquot->dq_dqb.dqb_bhardlimit) return QUOTA_NL_BHARDBELOW; return QUOTA_NL_NOWARN; } @@ -2681,7 +2690,7 @@ static int do_set_dqblk(struct dquot *dquot, struct qc_dqblk *di) if (check_blim) { if (!dm->dqb_bsoftlimit || - dm->dqb_curspace < dm->dqb_bsoftlimit) { + dm->dqb_curspace + dm->dqb_rsvspace < dm->dqb_bsoftlimit) { dm->dqb_btime = 0; clear_bit(DQ_BLKS_B, &dquot->dq_flags); } else if (!(di->d_fieldmask & QC_SPC_TIMER)) -- cgit v1.2.3 From f9ea3225ddafa269cf1f6b495d132c26fde93903 Mon Sep 17 00:00:00 2001 From: Thomas Richter Date: Mon, 7 Aug 2017 10:16:36 +0200 Subject: bpf: fix selftest/bpf/test_pkt_md_access on s390x Commit 18f3d6be6be1 ("selftests/bpf: Add test cases to test narrower ctx field loads") introduced new eBPF test cases. One of them (test_pkt_md_access.c) fails on s390x. The BPF verifier error message is: [root@s8360046 bpf]# ./test_progs test_pkt_access:PASS:ipv4 349 nsec test_pkt_access:PASS:ipv6 212 nsec [....] libbpf: load bpf program failed: Permission denied libbpf: -- BEGIN DUMP LOG --- libbpf: 0: (71) r2 = *(u8 *)(r1 +0) invalid bpf_context access off=0 size=1 libbpf: -- END LOG -- libbpf: failed to load program 'test1' libbpf: failed to load object './test_pkt_md_access.o' Summary: 29 PASSED, 1 FAILED [root@s8360046 bpf]# This is caused by a byte endianness issue. S390x is a big endian architecture. Pointer access to the lowest byte or halfword of a four byte value need to add an offset. On little endian architectures this offset is not needed. Fix this and use the same approach as the originator used for other files (for example test_verifier.c) in his original commit. With this fix the test program test_progs succeeds on s390x: [root@s8360046 bpf]# ./test_progs test_pkt_access:PASS:ipv4 236 nsec test_pkt_access:PASS:ipv6 217 nsec test_xdp:PASS:ipv4 3624 nsec test_xdp:PASS:ipv6 1722 nsec test_l4lb:PASS:ipv4 926 nsec test_l4lb:PASS:ipv6 1322 nsec test_tcp_estats:PASS: 0 nsec test_bpf_obj_id:PASS:get-fd-by-notexist-prog-id 0 nsec test_bpf_obj_id:PASS:get-fd-by-notexist-map-id 0 nsec test_bpf_obj_id:PASS:get-prog-info(fd) 0 nsec test_bpf_obj_id:PASS:get-map-info(fd) 0 nsec test_bpf_obj_id:PASS:get-prog-info(fd) 0 nsec test_bpf_obj_id:PASS:get-map-info(fd) 0 nsec test_bpf_obj_id:PASS:get-prog-fd(next_id) 0 nsec test_bpf_obj_id:PASS:get-prog-info(next_id->fd) 0 nsec test_bpf_obj_id:PASS:get-prog-fd(next_id) 0 nsec test_bpf_obj_id:PASS:get-prog-info(next_id->fd) 0 nsec test_bpf_obj_id:PASS:check total prog id found by get_next_id 0 nsec test_bpf_obj_id:PASS:get-map-fd(next_id) 0 nsec test_bpf_obj_id:PASS:get-map-fd(next_id) 0 nsec test_bpf_obj_id:PASS:get-map-fd(next_id) 0 nsec test_bpf_obj_id:PASS:get-map-fd(next_id) 0 nsec test_bpf_obj_id:PASS:get-map-fd(next_id) 0 nsec test_bpf_obj_id:PASS:get-map-fd(next_id) 0 nsec test_bpf_obj_id:PASS:get-map-fd(next_id) 0 nsec test_bpf_obj_id:PASS:check get-map-info(next_id->fd) 0 nsec test_bpf_obj_id:PASS:get-map-fd(next_id) 0 nsec test_bpf_obj_id:PASS:check get-map-info(next_id->fd) 0 nsec test_bpf_obj_id:PASS:check total map id found by get_next_id 0 nsec test_pkt_md_access:PASS: 277 nsec Summary: 30 PASSED, 0 FAILED [root@s8360046 bpf]# Fixes: 18f3d6be6be1 ("selftests/bpf: Add test cases to test narrower ctx field loads") Signed-off-by: Thomas Richter Acked-by: Daniel Borkmann Signed-off-by: David S. Miller --- tools/testing/selftests/bpf/test_pkt_md_access.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/tools/testing/selftests/bpf/test_pkt_md_access.c b/tools/testing/selftests/bpf/test_pkt_md_access.c index 71729d47eb85..7956302ecdf2 100644 --- a/tools/testing/selftests/bpf/test_pkt_md_access.c +++ b/tools/testing/selftests/bpf/test_pkt_md_access.c @@ -12,12 +12,23 @@ int _version SEC("version") = 1; +#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ #define TEST_FIELD(TYPE, FIELD, MASK) \ { \ TYPE tmp = *(volatile TYPE *)&skb->FIELD; \ if (tmp != ((*(volatile __u32 *)&skb->FIELD) & MASK)) \ return TC_ACT_SHOT; \ } +#else +#define TEST_FIELD_OFFSET(a, b) ((sizeof(a) - sizeof(b)) / sizeof(b)) +#define TEST_FIELD(TYPE, FIELD, MASK) \ + { \ + TYPE tmp = *((volatile TYPE *)&skb->FIELD + \ + TEST_FIELD_OFFSET(skb->FIELD, TYPE)); \ + if (tmp != ((*(volatile __u32 *)&skb->FIELD) & MASK)) \ + return TC_ACT_SHOT; \ + } +#endif SEC("test1") int process(struct __sk_buff *skb) -- cgit v1.2.3 From 22889dbbd98a0e3390e9120074c39c6e5a3fea5e Mon Sep 17 00:00:00 2001 From: Dean Jenkins Date: Mon, 7 Aug 2017 09:50:14 +0100 Subject: asix: Add rx->ax_skb = NULL after usbnet_skb_return() In asix_rx_fixup_internal() there is a risk that rx->ax_skb gets reused after passing the Ethernet frame into the network stack via usbnet_skb_return(). The risks include: a) asynchronously freeing rx->ax_skb after passing the netdev buffer to the NAPI layer which might corrupt the backlog queue. b) erroneously reusing rx->ax_skb such as calling skb_put_data() multiple times which causes writing off the end of the netdev buffer. Therefore add a defensive rx->ax_skb = NULL after usbnet_skb_return() so that it is not possible to free rx->ax_skb or to apply skb_put_data() too many times. Signed-off-by: Dean Jenkins Signed-off-by: David S. Miller --- drivers/net/usb/asix_common.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/net/usb/asix_common.c b/drivers/net/usb/asix_common.c index 7847436c441e..6983b6bd8cf9 100644 --- a/drivers/net/usb/asix_common.c +++ b/drivers/net/usb/asix_common.c @@ -168,8 +168,10 @@ int asix_rx_fixup_internal(struct usbnet *dev, struct sk_buff *skb, if (rx->ax_skb) { skb_put_data(rx->ax_skb, skb->data + offset, copy_length); - if (!rx->remaining) + if (!rx->remaining) { usbnet_skb_return(dev, rx->ax_skb); + rx->ax_skb = NULL; + } } offset += (copy_length + 1) & 0xfffe; -- cgit v1.2.3 From 960eb4eeaa47a3a5061a4e47e28411e85840ab2c Mon Sep 17 00:00:00 2001 From: Dean Jenkins Date: Mon, 7 Aug 2017 09:50:15 +0100 Subject: asix: Ensure asix_rx_fixup_info members are all reset There is a risk that the members of the structure asix_rx_fixup_info become unsynchronised leading to the possibility of a malfunction. For example, rx->split_head was not being set to false after an error was detected so potentially could cause a malformed 32-bit Data header word to be formed. Therefore add function reset_asix_rx_fixup_info() to reset all the members of asix_rx_fixup_info so that future processing will start with known initial conditions. Also, if (skb->len != offset) becomes true then call reset_asix_rx_fixup_info() so that the processing of the next URB starts with known initial conditions. Without the call, the check does nothing which potentially could lead to a malfunction when the next URB is processed. In addition, for robustness, call reset_asix_rx_fixup_info() before every error path's "return 0". This ensures that the next URB is processed from known initial conditions. Signed-off-by: Dean Jenkins Signed-off-by: David S. Miller --- drivers/net/usb/asix_common.c | 34 +++++++++++++++++++++++++--------- 1 file changed, 25 insertions(+), 9 deletions(-) diff --git a/drivers/net/usb/asix_common.c b/drivers/net/usb/asix_common.c index 6983b6bd8cf9..fda74f33e3ec 100644 --- a/drivers/net/usb/asix_common.c +++ b/drivers/net/usb/asix_common.c @@ -75,6 +75,27 @@ void asix_write_cmd_async(struct usbnet *dev, u8 cmd, u16 value, u16 index, value, index, data, size); } +static void reset_asix_rx_fixup_info(struct asix_rx_fixup_info *rx) +{ + /* Reset the variables that have a lifetime outside of + * asix_rx_fixup_internal() so that future processing starts from a + * known set of initial conditions. + */ + + if (rx->ax_skb) { + /* Discard any incomplete Ethernet frame in the netdev buffer */ + kfree_skb(rx->ax_skb); + rx->ax_skb = NULL; + } + + /* Assume the Data header 32-bit word is at the start of the current + * or next URB socket buffer so reset all the state variables. + */ + rx->remaining = 0; + rx->split_head = false; + rx->header = 0; +} + int asix_rx_fixup_internal(struct usbnet *dev, struct sk_buff *skb, struct asix_rx_fixup_info *rx) { @@ -99,15 +120,7 @@ int asix_rx_fixup_internal(struct usbnet *dev, struct sk_buff *skb, if (size != ((~rx->header >> 16) & 0x7ff)) { netdev_err(dev->net, "asix_rx_fixup() Data Header synchronisation was lost, remaining %d\n", rx->remaining); - if (rx->ax_skb) { - kfree_skb(rx->ax_skb); - rx->ax_skb = NULL; - /* Discard the incomplete netdev Ethernet frame - * and assume the Data header is at the start of - * the current URB socket buffer. - */ - } - rx->remaining = 0; + reset_asix_rx_fixup_info(rx); } } @@ -139,11 +152,13 @@ int asix_rx_fixup_internal(struct usbnet *dev, struct sk_buff *skb, if (size != ((~rx->header >> 16) & 0x7ff)) { netdev_err(dev->net, "asix_rx_fixup() Bad Header Length 0x%x, offset %d\n", rx->header, offset); + reset_asix_rx_fixup_info(rx); return 0; } if (size > dev->net->mtu + ETH_HLEN + VLAN_HLEN) { netdev_dbg(dev->net, "asix_rx_fixup() Bad RX Length %d\n", size); + reset_asix_rx_fixup_info(rx); return 0; } @@ -180,6 +195,7 @@ int asix_rx_fixup_internal(struct usbnet *dev, struct sk_buff *skb, if (skb->len != offset) { netdev_err(dev->net, "asix_rx_fixup() Bad SKB Length %d, %d\n", skb->len, offset); + reset_asix_rx_fixup_info(rx); return 0; } -- cgit v1.2.3 From d0c8f338ab41438bdf8472cb4209d4ab54d439d5 Mon Sep 17 00:00:00 2001 From: Dean Jenkins Date: Mon, 7 Aug 2017 09:50:16 +0100 Subject: asix: Fix small memory leak in ax88772_unbind() When Ethernet frames span mulitple URBs, the netdev buffer memory pointed to by the asix_rx_fixup_info structure remains allocated during the time gap between the 2 executions of asix_rx_fixup_internal(). This means that if ax88772_unbind() is called within this time gap to free the memory of the parent private data structure then a memory leak of the part filled netdev buffer memory will occur. Therefore, create a new function asix_rx_fixup_common_free() to free the memory of the netdev buffer and add a call to asix_rx_fixup_common_free() from inside ax88772_unbind(). Consequently when an unbind occurs part way through receiving an Ethernet frame, the netdev buffer memory that is holding part of the received Ethernet frame will now be freed. Signed-off-by: Dean Jenkins Signed-off-by: David S. Miller --- drivers/net/usb/asix.h | 1 + drivers/net/usb/asix_common.c | 15 +++++++++++++++ drivers/net/usb/asix_devices.c | 1 + 3 files changed, 17 insertions(+) diff --git a/drivers/net/usb/asix.h b/drivers/net/usb/asix.h index d1092421aaa7..9a4171b90947 100644 --- a/drivers/net/usb/asix.h +++ b/drivers/net/usb/asix.h @@ -209,6 +209,7 @@ void asix_write_cmd_async(struct usbnet *dev, u8 cmd, u16 value, int asix_rx_fixup_internal(struct usbnet *dev, struct sk_buff *skb, struct asix_rx_fixup_info *rx); int asix_rx_fixup_common(struct usbnet *dev, struct sk_buff *skb); +void asix_rx_fixup_common_free(struct asix_common_private *dp); struct sk_buff *asix_tx_fixup(struct usbnet *dev, struct sk_buff *skb, gfp_t flags); diff --git a/drivers/net/usb/asix_common.c b/drivers/net/usb/asix_common.c index fda74f33e3ec..522d2900cd1d 100644 --- a/drivers/net/usb/asix_common.c +++ b/drivers/net/usb/asix_common.c @@ -210,6 +210,21 @@ int asix_rx_fixup_common(struct usbnet *dev, struct sk_buff *skb) return asix_rx_fixup_internal(dev, skb, rx); } +void asix_rx_fixup_common_free(struct asix_common_private *dp) +{ + struct asix_rx_fixup_info *rx; + + if (!dp) + return; + + rx = &dp->rx_fixup_info; + + if (rx->ax_skb) { + kfree_skb(rx->ax_skb); + rx->ax_skb = NULL; + } +} + struct sk_buff *asix_tx_fixup(struct usbnet *dev, struct sk_buff *skb, gfp_t flags) { diff --git a/drivers/net/usb/asix_devices.c b/drivers/net/usb/asix_devices.c index a3aa0a27dfe5..b2ff88e69a81 100644 --- a/drivers/net/usb/asix_devices.c +++ b/drivers/net/usb/asix_devices.c @@ -764,6 +764,7 @@ static int ax88772_bind(struct usbnet *dev, struct usb_interface *intf) static void ax88772_unbind(struct usbnet *dev, struct usb_interface *intf) { + asix_rx_fixup_common_free(dev->driver_priv); kfree(dev->driver_priv); } -- cgit v1.2.3 From ec2c6726322f0d270bab477e4904bf9496f70ee5 Mon Sep 17 00:00:00 2001 From: Julian Wiedmann Date: Mon, 7 Aug 2017 13:28:39 +0200 Subject: s390/qeth: fix L3 next-hop in xmit qeth hdr On L3, the qeth_hdr struct needs to be filled with the next-hop IP address. The current code accesses rtable->rt_gateway without checking that rtable is a valid address. The accidental access to a lowcore area results in a random next-hop address in the qeth_hdr. rtable (or more precisely, skb_dst(skb)) can be NULL in rare cases (for instance together with AF_PACKET sockets). This patch adds the missing NULL-ptr checks. Signed-off-by: Julian Wiedmann Signed-off-by: Ursula Braun Fixes: 87e7597b5a3 qeth: Move away from using neighbour entries in qeth_l3_fill_header() Signed-off-by: David S. Miller --- drivers/s390/net/qeth_l3_main.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/s390/net/qeth_l3_main.c b/drivers/s390/net/qeth_l3_main.c index 8975cd321390..d42e758518ed 100644 --- a/drivers/s390/net/qeth_l3_main.c +++ b/drivers/s390/net/qeth_l3_main.c @@ -2512,7 +2512,7 @@ static void qeth_l3_fill_header(struct qeth_card *card, struct qeth_hdr *hdr, struct rtable *rt = (struct rtable *) dst; __be32 *pkey = &ip_hdr(skb)->daddr; - if (rt->rt_gateway) + if (rt && rt->rt_gateway) pkey = &rt->rt_gateway; /* IPv4 */ @@ -2523,7 +2523,7 @@ static void qeth_l3_fill_header(struct qeth_card *card, struct qeth_hdr *hdr, struct rt6_info *rt = (struct rt6_info *) dst; struct in6_addr *pkey = &ipv6_hdr(skb)->daddr; - if (!ipv6_addr_any(&rt->rt6i_gateway)) + if (rt && !ipv6_addr_any(&rt->rt6i_gateway)) pkey = &rt->rt6i_gateway; /* IPv6 */ -- cgit v1.2.3 From b925ef37b0a152b0c06aa43bc9204d0116f676d7 Mon Sep 17 00:00:00 2001 From: Anton Volkov Date: Mon, 7 Aug 2017 15:54:14 +0300 Subject: hysdn: fix to a race condition in put_log_buffer The synchronization type that was used earlier to guard the loop that deletes unused log buffers may lead to a situation that prevents any thread from going through the loop. The patch deletes previously used synchronization mechanism and moves the loop under the spin_lock so the similar cases won't be feasible in the future. Found by by Linux Driver Verification project (linuxtesting.org). Signed-off-by: Anton Volkov Signed-off-by: David S. Miller --- drivers/isdn/hysdn/hysdn_proclog.c | 28 +++++++++++++--------------- 1 file changed, 13 insertions(+), 15 deletions(-) diff --git a/drivers/isdn/hysdn/hysdn_proclog.c b/drivers/isdn/hysdn/hysdn_proclog.c index 7b5fd8fb1761..aaca0b3d662e 100644 --- a/drivers/isdn/hysdn/hysdn_proclog.c +++ b/drivers/isdn/hysdn/hysdn_proclog.c @@ -44,7 +44,6 @@ struct procdata { char log_name[15]; /* log filename */ struct log_data *log_head, *log_tail; /* head and tail for queue */ int if_used; /* open count for interface */ - int volatile del_lock; /* lock for delete operations */ unsigned char logtmp[LOG_MAX_LINELEN]; wait_queue_head_t rd_queue; }; @@ -102,7 +101,6 @@ put_log_buffer(hysdn_card *card, char *cp) { struct log_data *ib; struct procdata *pd = card->proclog; - int i; unsigned long flags; if (!pd) @@ -126,21 +124,21 @@ put_log_buffer(hysdn_card *card, char *cp) else pd->log_tail->next = ib; /* follows existing messages */ pd->log_tail = ib; /* new tail */ - i = pd->del_lock++; /* get lock state */ - spin_unlock_irqrestore(&card->hysdn_lock, flags); /* delete old entrys */ - if (!i) - while (pd->log_head->next) { - if ((pd->log_head->usage_cnt <= 0) && - (pd->log_head->next->usage_cnt <= 0)) { - ib = pd->log_head; - pd->log_head = pd->log_head->next; - kfree(ib); - } else - break; - } /* pd->log_head->next */ - pd->del_lock--; /* release lock level */ + while (pd->log_head->next) { + if ((pd->log_head->usage_cnt <= 0) && + (pd->log_head->next->usage_cnt <= 0)) { + ib = pd->log_head; + pd->log_head = pd->log_head->next; + kfree(ib); + } else { + break; + } + } /* pd->log_head->next */ + + spin_unlock_irqrestore(&card->hysdn_lock, flags); + wake_up_interruptible(&(pd->rd_queue)); /* announce new entry */ } /* put_log_buffer */ -- cgit v1.2.3 From 0a9c40cea70d3f2dfa401bc259b318cd9aec8613 Mon Sep 17 00:00:00 2001 From: "Luis R. Rodriguez" Date: Thu, 3 Aug 2017 09:57:12 -0700 Subject: test_kmod: fix kmod.sh by making it executable We had just forgotten to do this. Without this if we run the following we get a permission denied: sudo make -C tools/testing/selftests/kmod/ run_tests make: Entering directory '/home/mcgrof/linux-next/tools/testing/selftests/kmod' /bin/sh: ./kmod.sh: Permission denied selftests: kmod.sh [FAIL] /home/mcgrof/linux-next/tools/testing/selftests/kmod make: Leaving directory '/home/mcgrof/linux-next/tools/testing/selftests/kmod Fixes: 39258f448d71 ("kmod: add test driver to stress test the module loader") Signed-off-by: Luis R. Rodriguez Signed-off-by: Shuah Khan --- tools/testing/selftests/kmod/kmod.sh | 0 1 file changed, 0 insertions(+), 0 deletions(-) mode change 100644 => 100755 tools/testing/selftests/kmod/kmod.sh diff --git a/tools/testing/selftests/kmod/kmod.sh b/tools/testing/selftests/kmod/kmod.sh old mode 100644 new mode 100755 -- cgit v1.2.3 From 8b0949d407431559f7c80a02f5382f5e1c77b8d1 Mon Sep 17 00:00:00 2001 From: "Luis R. Rodriguez" Date: Thu, 3 Aug 2017 09:57:13 -0700 Subject: test_sysctl: fix sysctl.sh by making it executable We had just forogtten to do this. Without this the following test fails: $ sudo make -C tools/testing/selftests/sysctl/ run_tests make: Entering directory '/home/mcgrof/linux-next/tools/testing/selftests/sysctl' /bin/sh: ./sysctl.sh: Permission denied selftests: sysctl.sh [FAIL] /home/mcgrof/linux-next/tools/testing/selftests/sysctl make: Leaving directory '/home/mcgrof/linux-next/tools/testing/selftests/sysctl' Fixes: 64b671204afd71 ("test_sysctl: add generic script to expand on tests") Signed-off-by: Luis R. Rodriguez Signed-off-by: Shuah Khan --- tools/testing/selftests/sysctl/sysctl.sh | 0 1 file changed, 0 insertions(+), 0 deletions(-) mode change 100644 => 100755 tools/testing/selftests/sysctl/sysctl.sh diff --git a/tools/testing/selftests/sysctl/sysctl.sh b/tools/testing/selftests/sysctl/sysctl.sh old mode 100644 new mode 100755 -- cgit v1.2.3 From eb2a6b800c2d1336cce6709d48c42753a611c07b Mon Sep 17 00:00:00 2001 From: Christophe Jaillet Date: Mon, 7 Aug 2017 00:00:17 +0200 Subject: qed: Fix a memory allocation failure test in 'qed_mcp_cmd_init()' We allocate 'p_info->mfw_mb_cur' and 'p_info->mfw_mb_shadow' but we check 'p_info->mfw_mb_addr' instead of 'p_info->mfw_mb_cur'. 'p_info->mfw_mb_addr' is never 0, because it is initiliazed a few lines above in 'qed_load_mcp_offsets()'. Update the test and check the result of the 2 'kzalloc()' instead. Signed-off-by: Christophe JAILLET Acked-by: Tomer Tayar Signed-off-by: David S. Miller --- drivers/net/ethernet/qlogic/qed/qed_mcp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/qlogic/qed/qed_mcp.c b/drivers/net/ethernet/qlogic/qed/qed_mcp.c index 9da91045d167..3eb241657368 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_mcp.c +++ b/drivers/net/ethernet/qlogic/qed/qed_mcp.c @@ -253,7 +253,7 @@ int qed_mcp_cmd_init(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt) size = MFW_DRV_MSG_MAX_DWORDS(p_info->mfw_mb_length) * sizeof(u32); p_info->mfw_mb_cur = kzalloc(size, GFP_KERNEL); p_info->mfw_mb_shadow = kzalloc(size, GFP_KERNEL); - if (!p_info->mfw_mb_shadow || !p_info->mfw_mb_addr) + if (!p_info->mfw_mb_cur || !p_info->mfw_mb_shadow) goto err; return 0; -- cgit v1.2.3 From 21da5332327b6d183bd93336ecf29c70bc609b7b Mon Sep 17 00:00:00 2001 From: Matt Redfearn Date: Wed, 26 Jul 2017 08:41:08 +0100 Subject: MIPS: Introduce cpu_tcache_line_size There exist macros to return the cache line size of the L1 dcache and L2 scache but there is currently no macro for the L3 tcache. Add this macro which will be used by the following patch "MIPS: PCI: Fix smp_processor_id() in preemptible" Signed-off-by: Matt Redfearn Cc: Maciej W. Rozycki Cc: James Hogan Cc: Paul Burton Cc: linux-mips@linux-mips.org Cc: linux-kernel@vger.kernel.org Patchwork: https://patchwork.linux-mips.org/patch/16871/ Signed-off-by: Ralf Baechle --- arch/mips/include/asm/cpu-features.h | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/mips/include/asm/cpu-features.h b/arch/mips/include/asm/cpu-features.h index 8baa9033b181..721b698bfe3c 100644 --- a/arch/mips/include/asm/cpu-features.h +++ b/arch/mips/include/asm/cpu-features.h @@ -428,6 +428,9 @@ #ifndef cpu_scache_line_size #define cpu_scache_line_size() cpu_data[0].scache.linesz #endif +#ifndef cpu_tcache_line_size +#define cpu_tcache_line_size() cpu_data[0].tcache.linesz +#endif #ifndef cpu_hwrena_impl_bits #define cpu_hwrena_impl_bits 0 -- cgit v1.2.3 From 735302665c353d6756e7fa2a2cf41b039299f732 Mon Sep 17 00:00:00 2001 From: Matt Redfearn Date: Wed, 26 Jul 2017 08:41:09 +0100 Subject: MIPS: PCI: Fix smp_processor_id() in preemptible Commit 1c3c5eab1715 ("sched/core: Enable might_sleep() and smp_processor_id() checks early") enables checks for might_sleep() and smp_processor_id() being used in preemptible code earlier in the boot than before. This results in a new BUG from pcibios_set_cache_line_size(). BUG: using smp_processor_id() in preemptible [00000000] code: swapper/0/1 caller is pcibios_set_cache_line_size+0x10/0x70 CPU: 1 PID: 1 Comm: swapper/0 Not tainted 4.13.0-rc1-00007-g3ce3e4ba4275 #615 Stack: 0000000000000000 ffffffff81189694 0000000000000000 ffffffff81822318 000000000000004e 0000000000000001 800000000e20bd08 20c49ba5e3540000 0000000000000000 0000000000000000 ffffffff818d0000 0000000000000000 0000000000000000 ffffffff81189328 ffffffff818ce692 0000000000000000 0000000000000000 ffffffff81189bc8 ffffffff818d0000 0000000000000000 ffffffff81828907 ffffffff81769970 800000020ec78d80 ffffffff818c7b48 0000000000000001 0000000000000001 ffffffff818652b0 ffffffff81896268 ffffffff818c0000 800000020ec7fb40 800000020ec7fc58 ffffffff81684cac 0000000000000000 ffffffff8118ab50 0000000000000030 ffffffff81769970 0000000000000001 ffffffff81122a58 0000000000000000 0000000000000000 ... Call Trace: [] show_stack+0x90/0xb0 [] dump_stack+0xac/0xf0 [] check_preemption_disabled+0x120/0x128 [] pcibios_set_cache_line_size+0x10/0x70 [] do_one_initcall+0x48/0x140 [] kernel_init_freeable+0x194/0x24c [] kernel_init+0x14/0x118 [] ret_from_kernel_thread+0x14/0x1c Fix this by using the cpu_*cache_line_size() macros instead. These macros are the "proper" way to determine the CPU cache sizes. This makes use of the newly added cpu_tcache_line_size. Fixes: 1c3c5eab1715 ("sched/core: Enable might_sleep() and smp_processor_id() checks early") Signed-off-by: Matt Redfearn Suggested-by: James Hogan Reviewed-by: James Hogan Signed-off-by: Ralf Baechle --- arch/mips/pci/pci.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/arch/mips/pci/pci.c b/arch/mips/pci/pci.c index bd67ac74fe2d..9632436d74d7 100644 --- a/arch/mips/pci/pci.c +++ b/arch/mips/pci/pci.c @@ -28,16 +28,15 @@ EXPORT_SYMBOL(PCIBIOS_MIN_MEM); static int __init pcibios_set_cache_line_size(void) { - struct cpuinfo_mips *c = ¤t_cpu_data; unsigned int lsize; /* * Set PCI cacheline size to that of the highest level in the * cache hierarchy. */ - lsize = c->dcache.linesz; - lsize = c->scache.linesz ? : lsize; - lsize = c->tcache.linesz ? : lsize; + lsize = cpu_dcache_line_size(); + lsize = cpu_scache_line_size() ? : lsize; + lsize = cpu_tcache_line_size() ? : lsize; BUG_ON(!lsize); -- cgit v1.2.3 From ac2b21157a3104ad2daa21c65e6cc73604edba0b Mon Sep 17 00:00:00 2001 From: Kishon Vijay Abraham I Date: Tue, 8 Aug 2017 10:48:15 +0530 Subject: mmc: host: omap_hsmmc: Add CMD23 capability to omap_hsmmc driver omap_hsmmc driver always relied on CMD12 to stop transmission. However if CMD12 is not issued at the correct timing, the card will indicate a out of range error. With certain cards in some of the DRA7 based boards, -EIO error is observed. By Adding CMD23 capability, the MMC core will send MMC_SET_BLOCK_COUNT command before MMC_READ_MULTIPLE_BLOCK/MMC_WRITE_MULTIPLE_BLOCK commands. commit a04e6bae9e6f12 ("mmc: core: check also R1 response for stop commands") exposed this bug in omap_hsmmc driver. Signed-off-by: Kishon Vijay Abraham I Signed-off-by: Ulf Hansson --- drivers/mmc/host/omap_hsmmc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/mmc/host/omap_hsmmc.c b/drivers/mmc/host/omap_hsmmc.c index 04ff3c97a535..2ab4788d021f 100644 --- a/drivers/mmc/host/omap_hsmmc.c +++ b/drivers/mmc/host/omap_hsmmc.c @@ -2086,7 +2086,7 @@ static int omap_hsmmc_probe(struct platform_device *pdev) mmc->max_seg_size = mmc->max_req_size; mmc->caps |= MMC_CAP_MMC_HIGHSPEED | MMC_CAP_SD_HIGHSPEED | - MMC_CAP_WAIT_WHILE_BUSY | MMC_CAP_ERASE; + MMC_CAP_WAIT_WHILE_BUSY | MMC_CAP_ERASE | MMC_CAP_CMD23; mmc->caps |= mmc_pdata(host)->caps; if (mmc->caps & MMC_CAP_8_BIT_DATA) -- cgit v1.2.3 From 785a12afdb4a52903447fd890633c82fdda4b6f7 Mon Sep 17 00:00:00 2001 From: "Gautham R. Shenoy" Date: Tue, 8 Aug 2017 14:13:15 +0530 Subject: powerpc/powernv/idle: Disable LOSE_FULL_CONTEXT states when stop-api fails Currently, we use the opal call opal_slw_set_reg() to inform the Sleep-Winkle Engine (SLW) to restore the contents of some of the Hypervisor state on wakeup from deep idle states that lose full hypervisor context (characterized by the flag OPAL_PM_LOSE_FULL_CONTEXT). However, the current code has a bug in that if opal_slw_set_reg() fails, we don't disable the use of these deep states (winkle on POWER8, stop4 onwards on POWER9). This patch fixes this bug by ensuring that if programing the sleep-winkle engine to restore the hypervisor states in pnv_save_sprs_for_deep_states() fails, then we exclude such states by clearing the OPAL_PM_LOSE_FULL_CONTEXT flag from supported_cpuidle_states. As a result POWER8 will be prevented from using winkle for CPU-Hotplug, and POWER9 will put the offlined CPUs to the default stop state when available. Further, we ensure in the initialization of the cpuidle-powernv driver to only include those states whose flags are present in supported_cpuidle_states, thereby skipping OPAL_PM_LOSE_FULL_CONTEXT states when they have been disabled due to stop-api failure. Fixes: 1e1601b38e6 ("powerpc/powernv/idle: Restore SPRs for deep idle states via stop API.") Signed-off-by: Gautham R. Shenoy Signed-off-by: Michael Ellerman --- arch/powerpc/platforms/powernv/idle.c | 41 ++++++++++++++++++++++++++++++++--- drivers/cpuidle/cpuidle-powernv.c | 10 +++++++++ 2 files changed, 48 insertions(+), 3 deletions(-) diff --git a/arch/powerpc/platforms/powernv/idle.c b/arch/powerpc/platforms/powernv/idle.c index 2abee070373f..a553aeea7af6 100644 --- a/arch/powerpc/platforms/powernv/idle.c +++ b/arch/powerpc/platforms/powernv/idle.c @@ -56,6 +56,7 @@ u64 pnv_first_deep_stop_state = MAX_STOP_STATE; */ static u64 pnv_deepest_stop_psscr_val; static u64 pnv_deepest_stop_psscr_mask; +static u64 pnv_deepest_stop_flag; static bool deepest_stop_found; static int pnv_save_sprs_for_deep_states(void) @@ -185,8 +186,40 @@ static void pnv_alloc_idle_core_states(void) update_subcore_sibling_mask(); - if (supported_cpuidle_states & OPAL_PM_LOSE_FULL_CONTEXT) - pnv_save_sprs_for_deep_states(); + if (supported_cpuidle_states & OPAL_PM_LOSE_FULL_CONTEXT) { + int rc = pnv_save_sprs_for_deep_states(); + + if (likely(!rc)) + return; + + /* + * The stop-api is unable to restore hypervisor + * resources on wakeup from platform idle states which + * lose full context. So disable such states. + */ + supported_cpuidle_states &= ~OPAL_PM_LOSE_FULL_CONTEXT; + pr_warn("cpuidle-powernv: Disabling idle states that lose full context\n"); + pr_warn("cpuidle-powernv: Idle power-savings, CPU-Hotplug affected\n"); + + if (cpu_has_feature(CPU_FTR_ARCH_300) && + (pnv_deepest_stop_flag & OPAL_PM_LOSE_FULL_CONTEXT)) { + /* + * Use the default stop state for CPU-Hotplug + * if available. + */ + if (default_stop_found) { + pnv_deepest_stop_psscr_val = + pnv_default_stop_val; + pnv_deepest_stop_psscr_mask = + pnv_default_stop_mask; + pr_warn("cpuidle-powernv: Offlined CPUs will stop with psscr = 0x%016llx\n", + pnv_deepest_stop_psscr_val); + } else { /* Fallback to snooze loop for CPU-Hotplug */ + deepest_stop_found = false; + pr_warn("cpuidle-powernv: Offlined CPUs will busy wait\n"); + } + } + } } u32 pnv_get_supported_cpuidle_states(void) @@ -375,7 +408,8 @@ unsigned long pnv_cpu_offline(unsigned int cpu) pnv_deepest_stop_psscr_val; srr1 = power9_idle_stop(psscr); - } else if (idle_states & OPAL_PM_WINKLE_ENABLED) { + } else if ((idle_states & OPAL_PM_WINKLE_ENABLED) && + (idle_states & OPAL_PM_LOSE_FULL_CONTEXT)) { srr1 = power7_idle_insn(PNV_THREAD_WINKLE); } else if ((idle_states & OPAL_PM_SLEEP_ENABLED) || (idle_states & OPAL_PM_SLEEP_ENABLED_ER1)) { @@ -553,6 +587,7 @@ static int __init pnv_power9_idle_init(struct device_node *np, u32 *flags, max_residency_ns = residency_ns[i]; pnv_deepest_stop_psscr_val = psscr_val[i]; pnv_deepest_stop_psscr_mask = psscr_mask[i]; + pnv_deepest_stop_flag = flags[i]; deepest_stop_found = true; } diff --git a/drivers/cpuidle/cpuidle-powernv.c b/drivers/cpuidle/cpuidle-powernv.c index 37b0698b7193..42896a67aeae 100644 --- a/drivers/cpuidle/cpuidle-powernv.c +++ b/drivers/cpuidle/cpuidle-powernv.c @@ -235,6 +235,7 @@ static inline int validate_dt_prop_sizes(const char *prop1, int prop1_len, return -1; } +extern u32 pnv_get_supported_cpuidle_states(void); static int powernv_add_idle_states(void) { struct device_node *power_mgt; @@ -248,6 +249,8 @@ static int powernv_add_idle_states(void) const char *names[CPUIDLE_STATE_MAX]; u32 has_stop_states = 0; int i, rc; + u32 supported_flags = pnv_get_supported_cpuidle_states(); + /* Currently we have snooze statically defined */ @@ -362,6 +365,13 @@ static int powernv_add_idle_states(void) for (i = 0; i < dt_idle_states; i++) { unsigned int exit_latency, target_residency; bool stops_timebase = false; + + /* + * Skip the platform idle state whose flag isn't in + * the supported_cpuidle_states flag mask. + */ + if ((flags[i] & supported_flags) != flags[i]) + continue; /* * If an idle state has exit latency beyond * POWERNV_THRESHOLD_LATENCY_NS then don't use it -- cgit v1.2.3 From 527f10285bc3f700407bf3aab0ea7b3b9f9338da Mon Sep 17 00:00:00 2001 From: Paul Burton Date: Mon, 7 Aug 2017 16:18:04 -0700 Subject: MIPS: Prevent building MT support for microMIPS kernels We don't currently support the MT ASE for microMIPS kernels, and there are no CPUs currently in existence that use both. They can however both be enabled in Kconfig, resulting in build failures such as: AS arch/mips/kernel/cps-vec.o arch/mips/kernel/cps-vec.S: Assembler messages: arch/mips/kernel/cps-vec.S:242: Warning: the 32-bit microMIPS architecture does not support the `mt' extension arch/mips/kernel/cps-vec.S:276: Error: unrecognized opcode `mttc0 $13,$2,2' arch/mips/kernel/cps-vec.S:282: Error: unrecognized opcode `mttc0 $8,$1,2' arch/mips/kernel/cps-vec.S:285: Error: unrecognized opcode `mttc0 $0,$2,1' ... Fix this by preventing MT from being enabled when targeting microMIPS. Signed-off-by: Paul Burton Cc: linux-mips@linux-mips.org Patchwork: https://patchwork.linux-mips.org/patch/16951/ Signed-off-by: Ralf Baechle --- arch/mips/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig index 8dd20358464f..48d91d5be4e9 100644 --- a/arch/mips/Kconfig +++ b/arch/mips/Kconfig @@ -2260,7 +2260,7 @@ config CPU_R4K_CACHE_TLB config MIPS_MT_SMP bool "MIPS MT SMP support (1 TC on each available VPE)" - depends on SYS_SUPPORTS_MULTITHREADING && !CPU_MIPSR6 + depends on SYS_SUPPORTS_MULTITHREADING && !CPU_MIPSR6 && !CPU_MICROMIPS select CPU_MIPSR2_IRQ_VI select CPU_MIPSR2_IRQ_EI select SYNC_R4K -- cgit v1.2.3 From 5fc9484f5e41b239d1e7a123219e53f333e43ba5 Mon Sep 17 00:00:00 2001 From: Paul Burton Date: Mon, 7 Aug 2017 16:16:47 -0700 Subject: MIPS: Set ISA bit in entry-y for microMIPS kernels When building a kernel for the microMIPS ISA, ensure that the ISA bit (ie. bit 0) in the entry address is set. Otherwise we may include an entry address in images which bootloaders will jump to as MIPS32 code. I originally tried using "objdump -f" to obtain the entry address, which works for microMIPS but it always outputs a 32 bit address for a 32 bit ELF whilst nm will sign extend to 64 bit. That matters for systems where we might want to run a MIPS32 kernel on a MIPS64 CPU & load it with a MIPS64 bootloader, which would then jump to a non-canonical (non-sign-extended) address. This works in all cases as it only changes the behaviour for microMIPS kernels, but isn't the prettiest solution. A possible alternative would be to write a custom tool to just extract, sign extend & print the entry point of an ELF executable. I'm open to feedback if that would be preferred. Signed-off-by: Paul Burton Cc: linux-mips@linux-mips.org Patchwork: https://patchwork.linux-mips.org/patch/16950/ Signed-off-by: Ralf Baechle --- arch/mips/Makefile | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/arch/mips/Makefile b/arch/mips/Makefile index 04343625b929..bc2708c9ada4 100644 --- a/arch/mips/Makefile +++ b/arch/mips/Makefile @@ -243,8 +243,21 @@ include arch/mips/Kbuild.platforms ifdef CONFIG_PHYSICAL_START load-y = $(CONFIG_PHYSICAL_START) endif -entry-y = 0x$(shell $(NM) vmlinux 2>/dev/null \ + +entry-noisa-y = 0x$(shell $(NM) vmlinux 2>/dev/null \ | grep "\bkernel_entry\b" | cut -f1 -d \ ) +ifdef CONFIG_CPU_MICROMIPS + # + # Set the ISA bit, since the kernel_entry symbol in the ELF will have it + # clear which would lead to images containing addresses which bootloaders may + # jump to as MIPS32 code. + # + entry-y = $(patsubst %0,%1,$(patsubst %2,%3,$(patsubst %4,%5, \ + $(patsubst %6,%7,$(patsubst %8,%9,$(patsubst %a,%b, \ + $(patsubst %c,%d,$(patsubst %e,%f,$(entry-noisa-y))))))))) +else + entry-y = $(entry-noisa-y) +endif cflags-y += -I$(srctree)/arch/mips/include/asm/mach-generic drivers-$(CONFIG_PCI) += arch/mips/pci/ -- cgit v1.2.3 From d6f756e09f01ea7a0efbbcef269a1e384a35d824 Mon Sep 17 00:00:00 2001 From: "Wladimir J. van der Laan" Date: Tue, 25 Jul 2017 14:33:36 +0200 Subject: drm/etnaviv: Fix off-by-one error in reloc checking A relocation pointing to the last four bytes of a buffer can legitimately happen in the case of small vertex buffers. CC: stable@vger.kernel.org #4.9+ Signed-off-by: Wladimir J. van der Laan Reviewed-by: Philipp Zabel Reviewed-by: Christian Gmeiner Signed-off-by: Lucas Stach --- drivers/gpu/drm/etnaviv/etnaviv_gem_submit.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/etnaviv/etnaviv_gem_submit.c b/drivers/gpu/drm/etnaviv/etnaviv_gem_submit.c index 5bd93169dac2..6463fc2c736f 100644 --- a/drivers/gpu/drm/etnaviv/etnaviv_gem_submit.c +++ b/drivers/gpu/drm/etnaviv/etnaviv_gem_submit.c @@ -270,8 +270,8 @@ static int submit_reloc(struct etnaviv_gem_submit *submit, void *stream, if (ret) return ret; - if (r->reloc_offset >= bo->obj->base.size - sizeof(*ptr)) { - DRM_ERROR("relocation %u outside object", i); + if (r->reloc_offset > bo->obj->base.size - sizeof(*ptr)) { + DRM_ERROR("relocation %u outside object\n", i); return -EINVAL; } -- cgit v1.2.3 From 33182d15c6bf182f7ae32a66ea4a547d979cd6d7 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Tue, 8 Aug 2017 16:56:36 +1000 Subject: md: always clear ->safemode when md_check_recovery gets the mddev lock. If ->safemode == 1, md_check_recovery() will try to get the mddev lock and perform various other checks. If mddev->in_sync is zero, it will call set_in_sync, and clear ->safemode. However if mddev->in_sync is not zero, ->safemode will not be cleared. When md_check_recovery() drops the mddev lock, the thread is woken up again. Normally it would just check if there was anything else to do, find nothing, and go to sleep. However as ->safemode was not cleared, it will take the mddev lock again, then wake itself up when unlocking. This results in an infinite loop, repeatedly calling md_check_recovery(), which RCU or the soft-lockup detector will eventually complain about. Prior to commit 4ad23a976413 ("MD: use per-cpu counter for writes_pending"), safemode would only be set to one when the writes_pending counter reached zero, and would be cleared again when writes_pending is incremented. Since that patch, safemode is set more freely, but is not reliably cleared. So in md_check_recovery() clear ->safemode before checking ->in_sync. Fixes: 4ad23a976413 ("MD: use per-cpu counter for writes_pending") Cc: stable@vger.kernel.org (4.12+) Reported-by: Dominik Brodowski Reported-by: David R Signed-off-by: NeilBrown Signed-off-by: Shaohua Li --- drivers/md/md.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/md/md.c b/drivers/md/md.c index c99634612fc4..d84aceede1cb 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -8656,6 +8656,9 @@ void md_check_recovery(struct mddev *mddev) if (mddev_trylock(mddev)) { int spares = 0; + if (mddev->safemode == 1) + mddev->safemode = 0; + if (mddev->ro) { struct md_rdev *rdev; if (!mddev->external && mddev->in_sync) -- cgit v1.2.3 From 81fe48e9aa00bdd509bd3c37a76d1132da6b9f09 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Tue, 8 Aug 2017 16:56:36 +1000 Subject: md: fix test in md_write_start() md_write_start() needs to clear the in_sync flag is it is set, or if there might be a race with set_in_sync() such that the later will set it very soon. In the later case it is sufficient to take the spinlock to synchronize with set_in_sync(), and then set the flag if needed. The current test is incorrect. It should be: if "flag is set" or "race is possible" "flag is set" is trivially "mddev->in_sync". "race is possible" should be tested by "mddev->sync_checkers". If sync_checkers is 0, then there can be no race. set_in_sync() will wait in percpu_ref_switch_to_atomic_sync() for an RCU grace period, and as md_write_start() holds the rcu_read_lock(), set_in_sync() will be sure ot see the update to writes_pending. If sync_checkers is > 0, there could be race. If md_write_start() happened entirely between if (!mddev->in_sync && percpu_ref_is_zero(&mddev->writes_pending)) { and mddev->in_sync = 1; in set_in_sync(), then it would not see that is_sync had been set, and set_in_sync() would not see that writes_pending had been incremented. This bug means that in_sync is sometimes not set when it should be. Consequently there is a small chance that the array will be marked as "clean" when in fact it is inconsistent. Fixes: 4ad23a976413 ("MD: use per-cpu counter for writes_pending") cc: stable@vger.kernel.org (v4.12+) Signed-off-by: NeilBrown Signed-off-by: Shaohua Li --- drivers/md/md.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/md/md.c b/drivers/md/md.c index d84aceede1cb..e4ba95f6cd59 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -7996,7 +7996,7 @@ bool md_write_start(struct mddev *mddev, struct bio *bi) if (mddev->safemode == 1) mddev->safemode = 0; /* sync_checkers is always 0 when writes_pending is in per-cpu mode */ - if (mddev->in_sync || !mddev->sync_checkers) { + if (mddev->in_sync || mddev->sync_checkers) { spin_lock(&mddev->lock); if (mddev->in_sync) { mddev->in_sync = 0; -- cgit v1.2.3 From b44886c54a999771060371c3a05d5fedfc7e2102 Mon Sep 17 00:00:00 2001 From: Song Liu Date: Mon, 31 Jul 2017 14:52:26 -0700 Subject: md/r5cache: call mddev_lock/unlock() in r5c_journal_mode_set In r5c_journal_mode_set(), it is necessary to call mddev_lock() before accessing conf and conf->log. Otherwise, the conf->log may change (and become NULL). Shaohua: fix unlock in failure cases Signed-off-by: Song Liu Signed-off-by: Shaohua Li --- drivers/md/raid5-cache.c | 21 +++++++++++++++------ 1 file changed, 15 insertions(+), 6 deletions(-) diff --git a/drivers/md/raid5-cache.c b/drivers/md/raid5-cache.c index bfa1e907c472..ce98414a6e34 100644 --- a/drivers/md/raid5-cache.c +++ b/drivers/md/raid5-cache.c @@ -2540,23 +2540,32 @@ static ssize_t r5c_journal_mode_show(struct mddev *mddev, char *page) */ int r5c_journal_mode_set(struct mddev *mddev, int mode) { - struct r5conf *conf = mddev->private; - struct r5l_log *log = conf->log; - - if (!log) - return -ENODEV; + struct r5conf *conf; + int err; if (mode < R5C_JOURNAL_MODE_WRITE_THROUGH || mode > R5C_JOURNAL_MODE_WRITE_BACK) return -EINVAL; + err = mddev_lock(mddev); + if (err) + return err; + conf = mddev->private; + if (!conf || !conf->log) { + mddev_unlock(mddev); + return -ENODEV; + } + if (raid5_calc_degraded(conf) > 0 && - mode == R5C_JOURNAL_MODE_WRITE_BACK) + mode == R5C_JOURNAL_MODE_WRITE_BACK) { + mddev_unlock(mddev); return -EINVAL; + } mddev_suspend(mddev); conf->log->r5c_journal_mode = mode; mddev_resume(mddev); + mddev_unlock(mddev); pr_debug("md/raid:%s: setting r5c cache mode to %d: %s\n", mdname(mddev), mode, r5c_journal_mode_str[mode]); -- cgit v1.2.3 From a9501d742127e613d744e29814e9532bacb147e8 Mon Sep 17 00:00:00 2001 From: Song Liu Date: Thu, 3 Aug 2017 10:03:17 -0700 Subject: md/r5cache: fix io_unit handling in r5l_log_endio() In r5l_log_endio(), once log->io_list_lock is released, the io unit may be accessed (or even freed) by other threads. Current code doesn't handle the io_unit properly, which leads to potential race conditions. This patch solves this race condition by: 1. Add a pending_stripe count flush_payload. Multiple flush_payloads are counted as only one pending_stripe. Flag has_flush_payload is added to show whether the io unit has flush_payload; 2. In r5l_log_endio(), check flags has_null_flush and has_flush_payload with log->io_list_lock held. After the lock is released, this IO unit is only accessed when we know the pending_stripe counter cannot be zeroed by other threads. Signed-off-by: Song Liu Signed-off-by: Shaohua Li --- drivers/md/raid5-cache.c | 40 +++++++++++++++++++++++++++++++--------- 1 file changed, 31 insertions(+), 9 deletions(-) diff --git a/drivers/md/raid5-cache.c b/drivers/md/raid5-cache.c index ce98414a6e34..2dcbafa8e66c 100644 --- a/drivers/md/raid5-cache.c +++ b/drivers/md/raid5-cache.c @@ -236,9 +236,10 @@ struct r5l_io_unit { bool need_split_bio; struct bio *split_bio; - unsigned int has_flush:1; /* include flush request */ - unsigned int has_fua:1; /* include fua request */ - unsigned int has_null_flush:1; /* include empty flush request */ + unsigned int has_flush:1; /* include flush request */ + unsigned int has_fua:1; /* include fua request */ + unsigned int has_null_flush:1; /* include null flush request */ + unsigned int has_flush_payload:1; /* include flush payload */ /* * io isn't sent yet, flush/fua request can only be submitted till it's * the first IO in running_ios list @@ -571,6 +572,8 @@ static void r5l_log_endio(struct bio *bio) struct r5l_io_unit *io_deferred; struct r5l_log *log = io->log; unsigned long flags; + bool has_null_flush; + bool has_flush_payload; if (bio->bi_status) md_error(log->rdev->mddev, log->rdev); @@ -580,6 +583,16 @@ static void r5l_log_endio(struct bio *bio) spin_lock_irqsave(&log->io_list_lock, flags); __r5l_set_io_unit_state(io, IO_UNIT_IO_END); + + /* + * if the io doesn't not have null_flush or flush payload, + * it is not safe to access it after releasing io_list_lock. + * Therefore, it is necessary to check the condition with + * the lock held. + */ + has_null_flush = io->has_null_flush; + has_flush_payload = io->has_flush_payload; + if (log->need_cache_flush && !list_empty(&io->stripe_list)) r5l_move_to_end_ios(log); else @@ -600,19 +613,23 @@ static void r5l_log_endio(struct bio *bio) if (log->need_cache_flush) md_wakeup_thread(log->rdev->mddev->thread); - if (io->has_null_flush) { + /* finish flush only io_unit and PAYLOAD_FLUSH only io_unit */ + if (has_null_flush) { struct bio *bi; WARN_ON(bio_list_empty(&io->flush_barriers)); while ((bi = bio_list_pop(&io->flush_barriers)) != NULL) { bio_endio(bi); - atomic_dec(&io->pending_stripe); + if (atomic_dec_and_test(&io->pending_stripe)) { + __r5l_stripe_write_finished(io); + return; + } } } - - /* finish flush only io_unit and PAYLOAD_FLUSH only io_unit */ - if (atomic_read(&io->pending_stripe) == 0) - __r5l_stripe_write_finished(io); + /* decrease pending_stripe for flush payload */ + if (has_flush_payload) + if (atomic_dec_and_test(&io->pending_stripe)) + __r5l_stripe_write_finished(io); } static void r5l_do_submit_io(struct r5l_log *log, struct r5l_io_unit *io) @@ -881,6 +898,11 @@ static void r5l_append_flush_payload(struct r5l_log *log, sector_t sect) payload->size = cpu_to_le32(sizeof(__le64)); payload->flush_stripes[0] = cpu_to_le64(sect); io->meta_offset += meta_size; + /* multiple flush payloads count as one pending_stripe */ + if (!io->has_flush_payload) { + io->has_flush_payload = 1; + atomic_inc(&io->pending_stripe); + } mutex_unlock(&log->io_mutex); } -- cgit v1.2.3 From be37aa4b993bd5d4191f76a7bd43be33f987b972 Mon Sep 17 00:00:00 2001 From: Michael Hernandez Date: Mon, 31 Jul 2017 14:45:10 -0700 Subject: scsi: qla2xxx: Fix system crash while triggering FW dump This patch fixes system hang/crash while firmware dump is attempted with Block MQ enabled in qla2xxx driver. Fix is to remove check in fw dump template entries for existing request and response queues so that full buffer size is calculated during template size calculation. Following stack trace is seen during firmware dump capture process [ 694.390588] qla2xxx [0000:81:00.0]-5003:11: ISP System Error - mbx1=4b1fh mbx2=10h mbx3=2ah mbx7=0h. [ 694.402336] BUG: unable to handle kernel paging request at ffffc90008c7b000 [ 694.402372] IP: memcpy_erms+0x6/0x10 [ 694.402386] PGD 105f01a067 [ 694.402386] PUD 85f89c067 [ 694.402398] PMD 10490cb067 [ 694.402409] PTE 0 [ 694.402421] [ 694.402437] Oops: 0002 [#1] PREEMPT SMP [ 694.402452] Modules linked in: netconsole configfs qla2xxx scsi_transport_fc nvme_fc nvme_fabrics bnep bluetooth rfkill xt_tcpudp unix_diag xt_multiport ip6table_filter ip6_tables iptable_filter ip_tables x_tables af_packet iscsi_ibft iscsi_boot_sysfs xfs libcrc32c ipmi_ssif sb_edac edac_core x86_pkg_temp_thermal intel_powerclamp coretemp kvm_intel kvm irqbypass igb crct10dif_pclmul crc32_pclmul ghash_clmulni_intel pcbc aesni_intel iTCO_wdt aes_x86_64 crypto_simd ptp iTCO_vendor_support glue_helper cryptd lpc_ich joydev i2c_i801 pcspkr ioatdma mei_me pps_core tpm_tis mei mfd_core acpi_power_meter tpm_tis_core ipmi_si ipmi_devintf tpm ipmi_msghandler shpchp wmi dca button acpi_pad btrfs xor uas usb_storage hid_generic usbhid raid6_pq crc32c_intel ast i2c_algo_bit drm_kms_helper syscopyarea sysfillrect [ 694.402692] sysimgblt fb_sys_fops xhci_pci ttm ehci_pci sr_mod xhci_hcd cdrom ehci_hcd drm usbcore sg [ 694.402730] CPU: 0 PID: 0 Comm: swapper/0 Not tainted 4.10.0-1-default+ #19 [ 694.402753] Hardware name: Supermicro X10DRi/X10DRi, BIOS 1.1a 10/16/2015 [ 694.402776] task: ffffffff81c0e4c0 task.stack: ffffffff81c00000 [ 694.402798] RIP: 0010:memcpy_erms+0x6/0x10 [ 694.402813] RSP: 0018:ffff88085fc03cd0 EFLAGS: 00210006 [ 694.402832] RAX: ffffc90008c7ae0c RBX: 0000000000000004 RCX: 000000000001fe0c [ 694.402856] RDX: 0000000000020000 RSI: ffff8810332c01f4 RDI: ffffc90008c7b000 [ 694.402879] RBP: ffff88085fc03d18 R08: 0000000000020000 R09: 0000000000279e0a [ 694.402903] R10: 0000000000000000 R11: f000000000000000 R12: ffff88085fc03d80 [ 694.402927] R13: ffffc90008a01000 R14: ffffc90008a056d4 R15: ffff881052ef17e0 [ 694.402951] FS: 0000000000000000(0000) GS:ffff88085fc00000(0000) knlGS:0000000000000000 [ 694.402977] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 694.403012] CR2: ffffc90008c7b000 CR3: 0000000001c09000 CR4: 00000000001406f0 [ 694.403036] Call Trace: [ 694.403047] [ 694.403072] ? qla27xx_fwdt_entry_t263+0x18e/0x380 [qla2xxx] [ 694.403099] qla27xx_walk_template+0x9d/0x1a0 [qla2xxx] [ 694.403124] qla27xx_fwdump+0x1f3/0x272 [qla2xxx] [ 694.403149] qla2x00_async_event+0xb08/0x1a50 [qla2xxx] [ 694.403169] ? enqueue_task_fair+0xa2/0x9d0 Signed-off-by: Mike Hernandez Signed-off-by: Joe Carnuccio Signed-off-by: Himanshu Madhani Signed-off-by: Martin K. Petersen --- drivers/scsi/qla2xxx/qla_tmpl.c | 12 ------------ 1 file changed, 12 deletions(-) diff --git a/drivers/scsi/qla2xxx/qla_tmpl.c b/drivers/scsi/qla2xxx/qla_tmpl.c index 33142610882f..b18646d6057f 100644 --- a/drivers/scsi/qla2xxx/qla_tmpl.c +++ b/drivers/scsi/qla2xxx/qla_tmpl.c @@ -401,9 +401,6 @@ qla27xx_fwdt_entry_t263(struct scsi_qla_host *vha, for (i = 0; i < vha->hw->max_req_queues; i++) { struct req_que *req = vha->hw->req_q_map[i]; - if (!test_bit(i, vha->hw->req_qid_map)) - continue; - if (req || !buf) { length = req ? req->length : REQUEST_ENTRY_CNT_24XX; @@ -418,9 +415,6 @@ qla27xx_fwdt_entry_t263(struct scsi_qla_host *vha, for (i = 0; i < vha->hw->max_rsp_queues; i++) { struct rsp_que *rsp = vha->hw->rsp_q_map[i]; - if (!test_bit(i, vha->hw->rsp_qid_map)) - continue; - if (rsp || !buf) { length = rsp ? rsp->length : RESPONSE_ENTRY_CNT_MQ; @@ -660,9 +654,6 @@ qla27xx_fwdt_entry_t274(struct scsi_qla_host *vha, for (i = 0; i < vha->hw->max_req_queues; i++) { struct req_que *req = vha->hw->req_q_map[i]; - if (!test_bit(i, vha->hw->req_qid_map)) - continue; - if (req || !buf) { qla27xx_insert16(i, buf, len); qla27xx_insert16(1, buf, len); @@ -675,9 +666,6 @@ qla27xx_fwdt_entry_t274(struct scsi_qla_host *vha, for (i = 0; i < vha->hw->max_rsp_queues; i++) { struct rsp_que *rsp = vha->hw->rsp_q_map[i]; - if (!test_bit(i, vha->hw->rsp_qid_map)) - continue; - if (rsp || !buf) { qla27xx_insert16(i, buf, len); qla27xx_insert16(1, buf, len); -- cgit v1.2.3 From 180efde0a3f43dbe533e4be203c2918793482d4e Mon Sep 17 00:00:00 2001 From: Bodo Stroesser Date: Tue, 1 Aug 2017 14:42:54 +0200 Subject: scsi: st: fix blk_get_queue usage MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit If blk_queue_get() in st_probe fails, disk->queue must not be set to SDp->request_queue, as that would result in put_disk() dropping a not taken reference. Thus, disk->queue should be set only after a successful blk_queue_get(). Fixes: 2b5bebccd282 ("st: Take additional queue ref in st_probe") Signed-off-by: Bodo Stroesser Acked-by: Shirish Pargaonkar Signed-off-by: Hannes Reinecke Reviewed-by: Ewan D. Milne Acked-by: Kai Mäkisara Signed-off-by: Martin K. Petersen --- drivers/scsi/st.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/scsi/st.c b/drivers/scsi/st.c index 8e5013d9cad4..94e402ed30f6 100644 --- a/drivers/scsi/st.c +++ b/drivers/scsi/st.c @@ -4299,11 +4299,11 @@ static int st_probe(struct device *dev) kref_init(&tpnt->kref); tpnt->disk = disk; disk->private_data = &tpnt->driver; - disk->queue = SDp->request_queue; /* SCSI tape doesn't register this gendisk via add_disk(). Manually * take queue reference that release_disk() expects. */ - if (!blk_get_queue(disk->queue)) + if (!blk_get_queue(SDp->request_queue)) goto out_put_disk; + disk->queue = SDp->request_queue; tpnt->driver = &st_template; tpnt->device = SDp; -- cgit v1.2.3 From b0e17a9b0df29590c45dfb296f541270a5941f41 Mon Sep 17 00:00:00 2001 From: Brian King Date: Tue, 1 Aug 2017 10:21:30 -0500 Subject: scsi: ipr: Fix scsi-mq lockdep issue Fixes the following lockdep warning that can occur when scsi-mq is enabled with ipr due to ipr calling scsi_unblock_requests from irq context. The fix is to move the call to scsi_unblock_requests to ipr's existing workqueue. stack backtrace: CPU: 28 PID: 0 Comm: swapper/28 Not tainted 4.13.0-rc2-gcc6x-gf74c89b #1 Call Trace: [c000001fffe97550] [c000000000b50818] dump_stack+0xe8/0x160 (unreliable) [c000001fffe97590] [c0000000001586d0] print_usage_bug+0x2d0/0x390 [c000001fffe97640] [c000000000158f34] mark_lock+0x7a4/0x8e0 [c000001fffe976f0] [c00000000015a000] __lock_acquire+0x6a0/0x1a70 [c000001fffe97860] [c00000000015befc] lock_acquire+0xec/0x2e0 [c000001fffe97930] [c000000000b71514] _raw_spin_lock+0x44/0x70 [c000001fffe97960] [c0000000005b60f4] blk_mq_sched_dispatch_requests+0xa4/0x2a0 [c000001fffe979c0] [c0000000005acac0] __blk_mq_run_hw_queue+0x100/0x2c0 [c000001fffe97a00] [c0000000005ad478] __blk_mq_delay_run_hw_queue+0x118/0x130 [c000001fffe97a40] [c0000000005ad61c] blk_mq_start_hw_queues+0x6c/0xa0 [c000001fffe97a80] [c000000000797aac] scsi_kick_queue+0x2c/0x60 [c000001fffe97aa0] [c000000000797cf0] scsi_run_queue+0x210/0x360 [c000001fffe97b10] [c00000000079b888] scsi_run_host_queues+0x48/0x80 [c000001fffe97b40] [c0000000007b6090] ipr_ioa_bringdown_done+0x70/0x1e0 [c000001fffe97bc0] [c0000000007bc860] ipr_reset_ioa_job+0x80/0xf0 [c000001fffe97bf0] [c0000000007b4d50] ipr_reset_timer_done+0xd0/0x100 [c000001fffe97c30] [c0000000001937bc] call_timer_fn+0xdc/0x4b0 [c000001fffe97cf0] [c000000000193d08] expire_timers+0x178/0x330 [c000001fffe97d60] [c0000000001940c8] run_timer_softirq+0xb8/0x120 [c000001fffe97de0] [c000000000b726a8] __do_softirq+0x168/0x6d8 [c000001fffe97ef0] [c0000000000df2c8] irq_exit+0x108/0x150 [c000001fffe97f10] [c000000000017bf4] __do_irq+0x2a4/0x4a0 [c000001fffe97f90] [c00000000002da50] call_do_irq+0x14/0x24 [c0000007fad93aa0] [c000000000017e8c] do_IRQ+0x9c/0x140 [c0000007fad93af0] [c000000000008b98] hardware_interrupt_common+0x138/0x140 Reported-by: Michael Ellerman Signed-off-by: Brian King Signed-off-by: Martin K. Petersen --- drivers/scsi/ipr.c | 33 +++++++++++++++++++-------------- drivers/scsi/ipr.h | 2 ++ 2 files changed, 21 insertions(+), 14 deletions(-) diff --git a/drivers/scsi/ipr.c b/drivers/scsi/ipr.c index b0c68d24db01..da5bdbdcce52 100644 --- a/drivers/scsi/ipr.c +++ b/drivers/scsi/ipr.c @@ -3351,6 +3351,16 @@ static void ipr_worker_thread(struct work_struct *work) return; } + if (ioa_cfg->scsi_unblock) { + ioa_cfg->scsi_unblock = 0; + ioa_cfg->scsi_blocked = 0; + spin_unlock_irqrestore(ioa_cfg->host->host_lock, lock_flags); + scsi_unblock_requests(ioa_cfg->host); + spin_lock_irqsave(ioa_cfg->host->host_lock, lock_flags); + if (ioa_cfg->scsi_blocked) + scsi_block_requests(ioa_cfg->host); + } + if (!ioa_cfg->scan_enabled) { spin_unlock_irqrestore(ioa_cfg->host->host_lock, lock_flags); return; @@ -7211,9 +7221,8 @@ static int ipr_ioa_bringdown_done(struct ipr_cmnd *ipr_cmd) ENTER; if (!ioa_cfg->hrrq[IPR_INIT_HRRQ].removing_ioa) { ipr_trace; - spin_unlock_irq(ioa_cfg->host->host_lock); - scsi_unblock_requests(ioa_cfg->host); - spin_lock_irq(ioa_cfg->host->host_lock); + ioa_cfg->scsi_unblock = 1; + schedule_work(&ioa_cfg->work_q); } ioa_cfg->in_reset_reload = 0; @@ -7287,13 +7296,7 @@ static int ipr_ioa_reset_done(struct ipr_cmnd *ipr_cmd) list_add_tail(&ipr_cmd->queue, &ipr_cmd->hrrq->hrrq_free_q); wake_up_all(&ioa_cfg->reset_wait_q); - spin_unlock(ioa_cfg->host->host_lock); - scsi_unblock_requests(ioa_cfg->host); - spin_lock(ioa_cfg->host->host_lock); - - if (!ioa_cfg->hrrq[IPR_INIT_HRRQ].allow_cmds) - scsi_block_requests(ioa_cfg->host); - + ioa_cfg->scsi_unblock = 1; schedule_work(&ioa_cfg->work_q); LEAVE; return IPR_RC_JOB_RETURN; @@ -9249,8 +9252,11 @@ static void _ipr_initiate_ioa_reset(struct ipr_ioa_cfg *ioa_cfg, spin_unlock(&ioa_cfg->hrrq[i]._lock); } wmb(); - if (!ioa_cfg->hrrq[IPR_INIT_HRRQ].removing_ioa) + if (!ioa_cfg->hrrq[IPR_INIT_HRRQ].removing_ioa) { + ioa_cfg->scsi_unblock = 0; + ioa_cfg->scsi_blocked = 1; scsi_block_requests(ioa_cfg->host); + } ipr_cmd = ipr_get_free_ipr_cmnd(ioa_cfg); ioa_cfg->reset_cmd = ipr_cmd; @@ -9306,9 +9312,8 @@ static void ipr_initiate_ioa_reset(struct ipr_ioa_cfg *ioa_cfg, wake_up_all(&ioa_cfg->reset_wait_q); if (!ioa_cfg->hrrq[IPR_INIT_HRRQ].removing_ioa) { - spin_unlock_irq(ioa_cfg->host->host_lock); - scsi_unblock_requests(ioa_cfg->host); - spin_lock_irq(ioa_cfg->host->host_lock); + ioa_cfg->scsi_unblock = 1; + schedule_work(&ioa_cfg->work_q); } return; } else { diff --git a/drivers/scsi/ipr.h b/drivers/scsi/ipr.h index e98a87a65335..c7f0e9e3cd7d 100644 --- a/drivers/scsi/ipr.h +++ b/drivers/scsi/ipr.h @@ -1488,6 +1488,8 @@ struct ipr_ioa_cfg { u8 cfg_locked:1; u8 clear_isr:1; u8 probe_done:1; + u8 scsi_unblock:1; + u8 scsi_blocked:1; u8 revid; -- cgit v1.2.3 From 424f727b94132a4193af401dd823c44612d1d59f Mon Sep 17 00:00:00 2001 From: Brian King Date: Tue, 1 Aug 2017 13:45:36 -0500 Subject: scsi: ses: Fix wrong page error If a SES device returns an error on a requested diagnostic page, we are currently printing an error indicating the wrong page was received. Fix this up to simply return a failure and only check the returned page when the diagnostic page buffer was populated by the device. Signed-off-by: Brian King Signed-off-by: Martin K. Petersen --- drivers/scsi/ses.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/scsi/ses.c b/drivers/scsi/ses.c index f1cdf32d7514..8927f9f54ad9 100644 --- a/drivers/scsi/ses.c +++ b/drivers/scsi/ses.c @@ -99,7 +99,7 @@ static int ses_recv_diag(struct scsi_device *sdev, int page_code, ret = scsi_execute_req(sdev, cmd, DMA_FROM_DEVICE, buf, bufflen, NULL, SES_TIMEOUT, SES_RETRIES, NULL); - if (unlikely(!ret)) + if (unlikely(ret)) return ret; recv_page_code = ((unsigned char *)buf)[0]; -- cgit v1.2.3 From 51d96dc2e2dc2cf9b81cf976cc93c51ba3ac2f92 Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Tue, 8 Aug 2017 18:28:41 +0200 Subject: random: fix warning message on ia64 and parisc Fix the warning message on the parisc and IA64 architectures to show the correct function name of the caller by using %pS instead of %pF. The message is printed with the value of _RET_IP_ which calls __builtin_return_address(0) and as such returns the IP address caller instead of pointer to a function descriptor of the caller. The effect of this patch is visible on the parisc and ia64 architectures only since those are the ones which use function descriptors while on all others %pS and %pF will behave the same. Cc: Theodore Ts'o Cc: Jason A. Donenfeld Signed-off-by: Helge Deller Fixes: eecabf567422 ("random: suppress spammy warnings about unseeded randomness") Fixes: d06bfd1989fe ("random: warn when kernel uses unseeded randomness") Signed-off-by: Linus Torvalds --- drivers/char/random.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/char/random.c b/drivers/char/random.c index afa3ce7d3e72..8ad92707e45f 100644 --- a/drivers/char/random.c +++ b/drivers/char/random.c @@ -1492,7 +1492,7 @@ static void _warn_unseeded_randomness(const char *func_name, void *caller, #ifndef CONFIG_WARN_ALL_UNSEEDED_RANDOM print_once = true; #endif - pr_notice("random: %s called from %pF with crng_init=%d\n", + pr_notice("random: %s called from %pS with crng_init=%d\n", func_name, caller, crng_init); } -- cgit v1.2.3 From 92ddd95919466de5d34f3cb43635da9a7f9ab814 Mon Sep 17 00:00:00 2001 From: Haibo Chen Date: Tue, 8 Aug 2017 18:54:01 +0800 Subject: mmc: mmc: correct the logic for setting HS400ES signal voltage Change the default err value to -EINVAL, make sure the card only has type EXT_CSD_CARD_TYPE_HS400_1_8V also do the signal voltage setting when select hs400es mode. Fixes: commit 1720d3545b77 ("mmc: core: switch to 1V8 or 1V2 for hs400es mode") Cc: Signed-off-by: Haibo Chen Reviewed-by: Shawn Lin Signed-off-by: Ulf Hansson --- drivers/mmc/core/mmc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/mmc/core/mmc.c b/drivers/mmc/core/mmc.c index 4ffea14b7eb6..2bae69e39544 100644 --- a/drivers/mmc/core/mmc.c +++ b/drivers/mmc/core/mmc.c @@ -1289,7 +1289,7 @@ out_err: static int mmc_select_hs400es(struct mmc_card *card) { struct mmc_host *host = card->host; - int err = 0; + int err = -EINVAL; u8 val; if (!(host->caps & MMC_CAP_8_BIT_DATA)) { -- cgit v1.2.3 From a1ffc2d25ae98878f37445d223b3344686b5c822 Mon Sep 17 00:00:00 2001 From: Sedat Dilek Date: Tue, 25 Jul 2017 14:53:42 +0200 Subject: MAINTAINERS: greybus: Fix typo s/LOOBACK/LOOPBACK Fixes: f47e07bc5f1a5c48 ("Fix up MAINTAINERS file problems") Cc: Joe Perches Signed-off-by: Sedat Dilek Signed-off-by: Linus Torvalds --- MAINTAINERS | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index 44cb004c765d..01609e368bc7 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -5834,7 +5834,7 @@ F: drivers/staging/greybus/spi.c F: drivers/staging/greybus/spilib.c F: drivers/staging/greybus/spilib.h -GREYBUS LOOBACK/TIME PROTOCOLS DRIVERS +GREYBUS LOOPBACK/TIME PROTOCOLS DRIVERS M: Bryan O'Donoghue S: Maintained F: drivers/staging/greybus/loopback.c -- cgit v1.2.3 From 6209ef67883ae6407cceb557b9ec4b2117d91697 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Wed, 2 Aug 2017 10:57:45 -0700 Subject: MAINTAINERS: openbmc mailing list is moderated The openbmc mailing list is moderated for non-subscribers. Signed-off-by: Randy Dunlap Acked-by: Brendan Higgins Cc: Benjamin Herrenschmidt Cc: Joel Stanley Signed-off-by: Linus Torvalds --- MAINTAINERS | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index 01609e368bc7..3c419022ed93 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -1161,7 +1161,7 @@ M: Brendan Higgins R: Benjamin Herrenschmidt R: Joel Stanley L: linux-i2c@vger.kernel.org -L: openbmc@lists.ozlabs.org +L: openbmc@lists.ozlabs.org (moderated for non-subscribers) S: Maintained F: drivers/irqchip/irq-aspeed-i2c-ic.c F: drivers/i2c/busses/i2c-aspeed.c -- cgit v1.2.3 From 6f7d98ec445b61f8f416fedfe607cb4f1653a8bc Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Fri, 4 Aug 2017 21:45:48 -0700 Subject: get_maintainer: Prepare for separate MAINTAINERS files Allow for MAINTAINERS to become a directory and if it is, read all the files in the directory for maintained sections. Optionally look for all files named MAINTAINERS in directories excluding the .git directory by using --find-maintainer-files. This optional feature adds ~.3 seconds of CPU on an Intel i5-6200 with an SSD. Miscellanea: - Create a read_maintainer_file subroutine from the existing code - Test only the existence of MAINTAINERS, not whether it's a file Signed-off-by: Joe Perches Signed-off-by: Linus Torvalds --- scripts/get_maintainer.pl | 91 ++++++++++++++++++++++++++++++++++------------- 1 file changed, 66 insertions(+), 25 deletions(-) diff --git a/scripts/get_maintainer.pl b/scripts/get_maintainer.pl index 3bd5f4f30235..bc443201d3ef 100755 --- a/scripts/get_maintainer.pl +++ b/scripts/get_maintainer.pl @@ -18,6 +18,7 @@ my $V = '0.26'; use Getopt::Long qw(:config no_auto_abbrev); use Cwd; +use File::Find; my $cur_path = fastgetcwd() . '/'; my $lk_path = "./"; @@ -58,6 +59,7 @@ my $from_filename = 0; my $pattern_depth = 0; my $version = 0; my $help = 0; +my $find_maintainer_files = 0; my $vcs_used = 0; @@ -249,6 +251,7 @@ if (!GetOptions( 'sections!' => \$sections, 'fe|file-emails!' => \$file_emails, 'f|file' => \$from_filename, + 'find-maintainer-files' => \$find_maintainer_files, 'v|version' => \$version, 'h|help|usage' => \$help, )) { @@ -307,36 +310,74 @@ if (!top_of_kernel_tree($lk_path)) { my @typevalue = (); my %keyword_hash; +my @mfiles = (); -open (my $maint, '<', "${lk_path}MAINTAINERS") - or die "$P: Can't open MAINTAINERS: $!\n"; -while (<$maint>) { - my $line = $_; - - if ($line =~ m/^([A-Z]):\s*(.*)/) { - my $type = $1; - my $value = $2; - - ##Filename pattern matching - if ($type eq "F" || $type eq "X") { - $value =~ s@\.@\\\.@g; ##Convert . to \. - $value =~ s/\*/\.\*/g; ##Convert * to .* - $value =~ s/\?/\./g; ##Convert ? to . - ##if pattern is a directory and it lacks a trailing slash, add one - if ((-d $value)) { - $value =~ s@([^/])$@$1/@; +sub read_maintainer_file { + my ($file) = @_; + + open (my $maint, '<', "$file") + or die "$P: Can't open MAINTAINERS file '$file': $!\n"; + while (<$maint>) { + my $line = $_; + + if ($line =~ m/^([A-Z]):\s*(.*)/) { + my $type = $1; + my $value = $2; + + ##Filename pattern matching + if ($type eq "F" || $type eq "X") { + $value =~ s@\.@\\\.@g; ##Convert . to \. + $value =~ s/\*/\.\*/g; ##Convert * to .* + $value =~ s/\?/\./g; ##Convert ? to . + ##if pattern is a directory and it lacks a trailing slash, add one + if ((-d $value)) { + $value =~ s@([^/])$@$1/@; + } + } elsif ($type eq "K") { + $keyword_hash{@typevalue} = $value; } - } elsif ($type eq "K") { - $keyword_hash{@typevalue} = $value; + push(@typevalue, "$type:$value"); + } elsif (!(/^\s*$/ || /^\s*\#/)) { + $line =~ s/\n$//g; + push(@typevalue, $line); } - push(@typevalue, "$type:$value"); - } elsif (!/^(\s)*$/) { - $line =~ s/\n$//g; - push(@typevalue, $line); } + close($maint); +} + +sub find_is_maintainer_file { + my ($file) = $_; + return if ($file !~ m@/MAINTAINERS$@); + $file = $File::Find::name; + return if (! -f $file); + push(@mfiles, $file); } -close($maint); +sub find_ignore_git { + return grep { $_ !~ /^\.git$/; } @_; +} + +if (-d "${lk_path}MAINTAINERS") { + opendir(DIR, "${lk_path}MAINTAINERS") or die $!; + my @files = readdir(DIR); + closedir(DIR); + foreach my $file (@files) { + push(@mfiles, "${lk_path}MAINTAINERS/$file") if ($file !~ /^\./); + } +} + +if ($find_maintainer_files) { + find( { wanted => \&find_is_maintainer_file, + preprocess => \&find_ignore_git, + no_chdir => 1, + }, "${lk_path}"); +} else { + push(@mfiles, "${lk_path}MAINTAINERS") if -f "${lk_path}MAINTAINERS"; +} + +foreach my $file (@mfiles) { + read_maintainer_file("$file"); +} # # Read mail address map @@ -873,7 +914,7 @@ sub top_of_kernel_tree { if ( (-f "${lk_path}COPYING") && (-f "${lk_path}CREDITS") && (-f "${lk_path}Kbuild") - && (-f "${lk_path}MAINTAINERS") + && (-e "${lk_path}MAINTAINERS") && (-f "${lk_path}Makefile") && (-f "${lk_path}README") && (-d "${lk_path}Documentation") -- cgit v1.2.3 From 61f741645a354d91fece7b9cbb2f3f3587db8b8a Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Sat, 5 Aug 2017 18:45:47 -0700 Subject: parse-maintainers: Add section pattern sorting Section [A-Z]: patterns are not currently in any required sorting order. Add a specific sorting sequence to MAINTAINERS entries. Sort F: and X: patterns in alphabetic order. The preferred section ordering is: SECTION HEADER M: Maintainers R: Reviewers P: Named persons without email addresses L: Mailing list addresses S: Status of this section (Supported, Maintained, Orphan, etc...) W: Any relevant URLs T: Source code control type (git, quilt, etc) Q: Patchwork patch acceptance queue site B: Bug tracking URIs C: Chat URIs F: Files with wildcard patterns (alphabetic ordered) X: Excluded files with wildcard patterns (alphabetic ordered) N: Files with regex patterns K: Keyword regexes in source code for maintainership identification Miscellaneous perl neatening: - Rename %map to %hash, map has a different meaning in perl - Avoid using \& and local variables for function indirection - Use return for a little c like clarity - Use c-like function call style instead of &function Signed-off-by: Joe Perches Signed-off-by: Linus Torvalds --- scripts/parse-maintainers.pl | 70 +++++++++++++++++++++++++++++++------------- 1 file changed, 49 insertions(+), 21 deletions(-) diff --git a/scripts/parse-maintainers.pl b/scripts/parse-maintainers.pl index a0fe34349b24..5c8e0504c67e 100644 --- a/scripts/parse-maintainers.pl +++ b/scripts/parse-maintainers.pl @@ -2,9 +2,9 @@ use strict; -my %map; +my %hash; -# sort comparison function +# sort comparison functions sub by_category($$) { my ($a, $b) = @_; @@ -15,20 +15,47 @@ sub by_category($$) { $a =~ s/THE REST/ZZZZZZ/g; $b =~ s/THE REST/ZZZZZZ/g; - $a cmp $b; + return $a cmp $b; +} + +sub by_pattern($$) { + my ($a, $b) = @_; + my $preferred_order = 'MRPLSWTQBCFXNK'; + + my $a1 = uc(substr($a, 0, 1)); + my $b1 = uc(substr($b, 0, 1)); + + my $a_index = index($preferred_order, $a1); + my $b_index = index($preferred_order, $b1); + + $a_index = 1000 if ($a_index == -1); + $b_index = 1000 if ($b_index == -1); + + if (($a1 =~ /^F$/ && $b1 =~ /^F$/) || + ($a1 =~ /^X$/ && $b1 =~ /^X$/)) { + return $a cmp $b; + } + + if ($a_index < $b_index) { + return -1; + } elsif ($a_index == $b_index) { + return 0; + } else { + return 1; + } } sub alpha_output { - my $key; - my $sort_method = \&by_category; - my $sep = ""; - - foreach $key (sort $sort_method keys %map) { - if ($key ne " ") { - print $sep . $key . "\n"; - $sep = "\n"; - } - print $map{$key}; + foreach my $key (sort by_category keys %hash) { + if ($key eq " ") { + chomp $hash{$key}; + print $hash{$key}; + } else { + print "\n" . $key . "\n"; + foreach my $pattern (sort by_pattern split('\n', $hash{$key})) { + print($pattern . "\n"); + } + } } } @@ -42,7 +69,7 @@ sub trim { sub file_input { my $lastline = ""; my $case = " "; - $map{$case} = ""; + $hash{$case} = ""; while (<>) { my $line = $_; @@ -51,27 +78,28 @@ sub file_input { if ($line =~ m/^([A-Z]):\s*(.*)/) { $line = $1 . ":\t" . trim($2) . "\n"; if ($lastline eq "") { - $map{$case} = $map{$case} . $line; + $hash{$case} = $hash{$case} . $line; next; } $case = trim($lastline); - exists $map{$case} and die "Header '$case' already exists"; - $map{$case} = $line; + exists $hash{$case} and die "Header '$case' already exists"; + $hash{$case} = $line; $lastline = ""; next; } if ($case eq " ") { - $map{$case} = $map{$case} . $lastline; + $hash{$case} = $hash{$case} . $lastline; $lastline = $line; next; } trim($lastline) eq "" or die ("Odd non-pattern line '$lastline' for '$case'"); $lastline = $line; } - $map{$case} = $map{$case} . $lastline; + $hash{$case} = $hash{$case} . $lastline; } -&file_input; -&alpha_output; +file_input(); +alpha_output(); + exit(0); -- cgit v1.2.3 From fe9090301fed202a80a6113534efaa0d9184543b Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Sat, 5 Aug 2017 18:45:48 -0700 Subject: parse-maintainers: Use perl hash references and specific filenames Instead of reading STDIN and writing STDOUT, use specific filenames of MAINTAINERS and MAINTAINERS.new. Use hash references instead of global hash %hash so future modifications can read and write specific hashes to split up MAINTAINERS into multiple files using a script. Signed-off-by: Joe Perches Signed-off-by: Linus Torvalds --- scripts/parse-maintainers.pl | 57 ++++++++++++++++++++++++++------------------ 1 file changed, 34 insertions(+), 23 deletions(-) diff --git a/scripts/parse-maintainers.pl b/scripts/parse-maintainers.pl index 5c8e0504c67e..c286154a2b68 100644 --- a/scripts/parse-maintainers.pl +++ b/scripts/parse-maintainers.pl @@ -2,7 +2,7 @@ use strict; -my %hash; +my $P = $0; # sort comparison functions sub by_category($$) { @@ -45,61 +45,72 @@ sub by_pattern($$) { } } +sub trim { + my $s = shift; + $s =~ s/\s+$//; + $s =~ s/^\s+//; + return $s; +} + sub alpha_output { - foreach my $key (sort by_category keys %hash) { + my ($hashref, $filename) = (@_); + + open(my $file, '>', "$filename") or die "$P: $filename: open failed - $!\n"; + foreach my $key (sort by_category keys %$hashref) { if ($key eq " ") { - chomp $hash{$key}; - print $hash{$key}; + chomp $$hashref{$key}; + print $file $$hashref{$key}; } else { - print "\n" . $key . "\n"; - foreach my $pattern (sort by_pattern split('\n', $hash{$key})) { - print($pattern . "\n"); + print $file "\n" . $key . "\n"; + foreach my $pattern (sort by_pattern split('\n', %$hashref{$key})) { + print $file ($pattern . "\n"); } } } -} - -sub trim { - my $s = shift; - $s =~ s/\s+$//; - $s =~ s/^\s+//; - return $s; + close($file); } sub file_input { + my ($hashref, $filename) = (@_); + my $lastline = ""; my $case = " "; - $hash{$case} = ""; + $$hashref{$case} = ""; + + open(my $file, '<', "$filename") or die "$P: $filename: open failed - $!\n"; - while (<>) { + while (<$file>) { my $line = $_; # Pattern line? if ($line =~ m/^([A-Z]):\s*(.*)/) { $line = $1 . ":\t" . trim($2) . "\n"; if ($lastline eq "") { - $hash{$case} = $hash{$case} . $line; + $$hashref{$case} = $$hashref{$case} . $line; next; } $case = trim($lastline); - exists $hash{$case} and die "Header '$case' already exists"; - $hash{$case} = $line; + exists $$hashref{$case} and die "Header '$case' already exists"; + $$hashref{$case} = $line; $lastline = ""; next; } if ($case eq " ") { - $hash{$case} = $hash{$case} . $lastline; + $$hashref{$case} = $$hashref{$case} . $lastline; $lastline = $line; next; } trim($lastline) eq "" or die ("Odd non-pattern line '$lastline' for '$case'"); $lastline = $line; } - $hash{$case} = $hash{$case} . $lastline; + $$hashref{$case} = $$hashref{$case} . $lastline; + close($file); } -file_input(); -alpha_output(); +my %hash; + +file_input(\%hash, "MAINTAINERS"); +alpha_output(\%hash, "MAINTAINERS.new"); exit(0); -- cgit v1.2.3 From b95c29a20f070babfea92ab8f741ec94695617d3 Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Sat, 5 Aug 2017 18:45:49 -0700 Subject: parse-maintainers: Move matching sections from MAINTAINERS Allow any number of command line arguments to match either the section header or the section contents and create new files. Create MAINTAINERS.new and SECTION.new. This allows scripting of the movement of various sections from MAINTAINERS. Signed-off-by: Joe Perches Signed-off-by: Linus Torvalds --- scripts/parse-maintainers.pl | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/scripts/parse-maintainers.pl b/scripts/parse-maintainers.pl index c286154a2b68..e40b53db7f9f 100644 --- a/scripts/parse-maintainers.pl +++ b/scripts/parse-maintainers.pl @@ -109,8 +109,20 @@ sub file_input { } my %hash; +my %new_hash; file_input(\%hash, "MAINTAINERS"); + +foreach my $type (@ARGV) { + foreach my $key (keys %hash) { + if ($key =~ /$type/ || $hash{$key} =~ /$type/) { + $new_hash{$key} = $hash{$key}; + delete $hash{$key}; + } + } +} + alpha_output(\%hash, "MAINTAINERS.new"); +alpha_output(\%new_hash, "SECTION.new"); exit(0); -- cgit v1.2.3 From 1feb26162bee7b2f110facfec71b5c7bdbc7d14d Mon Sep 17 00:00:00 2001 From: Weston Andros Adamson Date: Tue, 1 Aug 2017 16:25:01 -0400 Subject: nfs/flexfiles: fix leak of nfs4_ff_ds_version arrays The client was freeing the nfs4_ff_layout_ds, but not the contained nfs4_ff_ds_version array. Signed-off-by: Weston Andros Adamson Cc: stable@vger.kernel.org # v4.0+ Signed-off-by: Anna Schumaker --- fs/nfs/flexfilelayout/flexfilelayoutdev.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/nfs/flexfilelayout/flexfilelayoutdev.c b/fs/nfs/flexfilelayout/flexfilelayoutdev.c index 6df7a0cf5660..f32c58bbe556 100644 --- a/fs/nfs/flexfilelayout/flexfilelayoutdev.c +++ b/fs/nfs/flexfilelayout/flexfilelayoutdev.c @@ -32,6 +32,7 @@ void nfs4_ff_layout_free_deviceid(struct nfs4_ff_layout_ds *mirror_ds) { nfs4_print_deviceid(&mirror_ds->id_node.deviceid); nfs4_pnfs_ds_put(mirror_ds->ds); + kfree(mirror_ds->ds_versions); kfree_rcu(mirror_ds, id_node.rcu); } -- cgit v1.2.3 From 1899bd57570a3e610db574b57d1e7e66378aa908 Mon Sep 17 00:00:00 2001 From: Marek Szyprowski Date: Wed, 12 Jul 2017 12:09:22 +0200 Subject: drm/exynos: forbid creating framebuffers from too small GEM buffers Add a check if the framebuffer described by the provided drm_mode_fb_cmd2 structure fits into provided GEM buffers. Without this check it is possible to create a framebuffer object from a small buffer and set it to the hardware, what results in displaying system memory outside the allocated GEM buffer. Signed-off-by: Marek Szyprowski Reviewed-by: Tobias Jakobi Signed-off-by: Inki Dae --- drivers/gpu/drm/exynos/exynos_drm_fb.c | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/exynos/exynos_drm_fb.c b/drivers/gpu/drm/exynos/exynos_drm_fb.c index d48fd7c918f8..73217c281c9a 100644 --- a/drivers/gpu/drm/exynos/exynos_drm_fb.c +++ b/drivers/gpu/drm/exynos/exynos_drm_fb.c @@ -145,13 +145,19 @@ static struct drm_framebuffer * exynos_user_fb_create(struct drm_device *dev, struct drm_file *file_priv, const struct drm_mode_fb_cmd2 *mode_cmd) { + const struct drm_format_info *info = drm_get_format_info(dev, mode_cmd); struct exynos_drm_gem *exynos_gem[MAX_FB_BUFFER]; struct drm_gem_object *obj; struct drm_framebuffer *fb; int i; int ret; - for (i = 0; i < drm_format_num_planes(mode_cmd->pixel_format); i++) { + for (i = 0; i < info->num_planes; i++) { + unsigned int height = (i == 0) ? mode_cmd->height : + DIV_ROUND_UP(mode_cmd->height, info->vsub); + unsigned long size = height * mode_cmd->pitches[i] + + mode_cmd->offsets[i]; + obj = drm_gem_object_lookup(file_priv, mode_cmd->handles[i]); if (!obj) { DRM_ERROR("failed to lookup gem object\n"); @@ -160,6 +166,12 @@ exynos_user_fb_create(struct drm_device *dev, struct drm_file *file_priv, } exynos_gem[i] = to_exynos_gem(obj); + + if (size > exynos_gem[i]->size) { + i++; + ret = -EINVAL; + goto err; + } } fb = exynos_drm_framebuffer_init(dev, mode_cmd, exynos_gem, i); -- cgit v1.2.3 From e718fe450e616227b74d27a233cdf37b4df0c82b Mon Sep 17 00:00:00 2001 From: Davide Caratti Date: Thu, 3 Aug 2017 22:54:48 +0200 Subject: net/mlx4_en: don't set CHECKSUM_COMPLETE on SCTP packets if the NIC fails to validate the checksum on TCP/UDP, and validation of IP checksum is successful, the driver subtracts the pseudo-header checksum from the value obtained by the hardware and sets CHECKSUM_COMPLETE. Don't do that if protocol is IPPROTO_SCTP, otherwise CRC32c validation fails. V2: don't test MLX4_CQE_STATUS_IPV6 if MLX4_CQE_STATUS_IPV4 is set Reported-by: Shuang Li Fixes: f8c6455bb04b ("net/mlx4_en: Extend checksum offloading by CHECKSUM COMPLETE") Signed-off-by: Davide Caratti Acked-by: Saeed Mahameed Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx4/en_rx.c | 29 ++++++++++++++++++----------- 1 file changed, 18 insertions(+), 11 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx4/en_rx.c b/drivers/net/ethernet/mellanox/mlx4/en_rx.c index 436f7689a032..bf1638044a7a 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_rx.c @@ -574,16 +574,21 @@ static inline __wsum get_fixed_vlan_csum(__wsum hw_checksum, * header, the HW adds it. To address that, we are subtracting the pseudo * header checksum from the checksum value provided by the HW. */ -static void get_fixed_ipv4_csum(__wsum hw_checksum, struct sk_buff *skb, - struct iphdr *iph) +static int get_fixed_ipv4_csum(__wsum hw_checksum, struct sk_buff *skb, + struct iphdr *iph) { __u16 length_for_csum = 0; __wsum csum_pseudo_header = 0; + __u8 ipproto = iph->protocol; + + if (unlikely(ipproto == IPPROTO_SCTP)) + return -1; length_for_csum = (be16_to_cpu(iph->tot_len) - (iph->ihl << 2)); csum_pseudo_header = csum_tcpudp_nofold(iph->saddr, iph->daddr, - length_for_csum, iph->protocol, 0); + length_for_csum, ipproto, 0); skb->csum = csum_sub(hw_checksum, csum_pseudo_header); + return 0; } #if IS_ENABLED(CONFIG_IPV6) @@ -594,17 +599,20 @@ static void get_fixed_ipv4_csum(__wsum hw_checksum, struct sk_buff *skb, static int get_fixed_ipv6_csum(__wsum hw_checksum, struct sk_buff *skb, struct ipv6hdr *ipv6h) { + __u8 nexthdr = ipv6h->nexthdr; __wsum csum_pseudo_hdr = 0; - if (unlikely(ipv6h->nexthdr == IPPROTO_FRAGMENT || - ipv6h->nexthdr == IPPROTO_HOPOPTS)) + if (unlikely(nexthdr == IPPROTO_FRAGMENT || + nexthdr == IPPROTO_HOPOPTS || + nexthdr == IPPROTO_SCTP)) return -1; - hw_checksum = csum_add(hw_checksum, (__force __wsum)htons(ipv6h->nexthdr)); + hw_checksum = csum_add(hw_checksum, (__force __wsum)htons(nexthdr)); csum_pseudo_hdr = csum_partial(&ipv6h->saddr, sizeof(ipv6h->saddr) + sizeof(ipv6h->daddr), 0); csum_pseudo_hdr = csum_add(csum_pseudo_hdr, (__force __wsum)ipv6h->payload_len); - csum_pseudo_hdr = csum_add(csum_pseudo_hdr, (__force __wsum)ntohs(ipv6h->nexthdr)); + csum_pseudo_hdr = csum_add(csum_pseudo_hdr, + (__force __wsum)htons(nexthdr)); skb->csum = csum_sub(hw_checksum, csum_pseudo_hdr); skb->csum = csum_add(skb->csum, csum_partial(ipv6h, sizeof(struct ipv6hdr), 0)); @@ -627,11 +635,10 @@ static int check_csum(struct mlx4_cqe *cqe, struct sk_buff *skb, void *va, } if (cqe->status & cpu_to_be16(MLX4_CQE_STATUS_IPV4)) - get_fixed_ipv4_csum(hw_checksum, skb, hdr); + return get_fixed_ipv4_csum(hw_checksum, skb, hdr); #if IS_ENABLED(CONFIG_IPV6) - else if (cqe->status & cpu_to_be16(MLX4_CQE_STATUS_IPV6)) - if (unlikely(get_fixed_ipv6_csum(hw_checksum, skb, hdr))) - return -1; + if (cqe->status & cpu_to_be16(MLX4_CQE_STATUS_IPV6)) + return get_fixed_ipv6_csum(hw_checksum, skb, hdr); #endif return 0; } -- cgit v1.2.3 From 8e6f1521ec431dbade73f57e21e5dc46eaae50ba Mon Sep 17 00:00:00 2001 From: John Crispin Date: Mon, 7 Aug 2017 16:20:49 +0200 Subject: net: dsa: mediatek: add adjust link support for user ports Manually adjust the port settings of user ports once PHY polling has completed. This patch extends the adjust_link callback to configure the per port PMCR register, applying the proper values polled from the PHY. Without this patch flow control was not always getting setup properly. Signed-off-by: Shashidhar Lakkavalli Signed-off-by: Muciri Gatimu Signed-off-by: John Crispin Signed-off-by: David S. Miller --- drivers/net/dsa/mt7530.c | 38 ++++++++++++++++++++++++++++++++++++++ drivers/net/dsa/mt7530.h | 1 + 2 files changed, 39 insertions(+) diff --git a/drivers/net/dsa/mt7530.c b/drivers/net/dsa/mt7530.c index 1e46418a3b74..264b281eb86b 100644 --- a/drivers/net/dsa/mt7530.c +++ b/drivers/net/dsa/mt7530.c @@ -625,6 +625,44 @@ static void mt7530_adjust_link(struct dsa_switch *ds, int port, * all finished. */ mt7623_pad_clk_setup(ds); + } else { + u16 lcl_adv = 0, rmt_adv = 0; + u8 flowctrl; + u32 mcr = PMCR_USERP_LINK | PMCR_FORCE_MODE; + + switch (phydev->speed) { + case SPEED_1000: + mcr |= PMCR_FORCE_SPEED_1000; + break; + case SPEED_100: + mcr |= PMCR_FORCE_SPEED_100; + break; + }; + + if (phydev->link) + mcr |= PMCR_FORCE_LNK; + + if (phydev->duplex) { + mcr |= PMCR_FORCE_FDX; + + if (phydev->pause) + rmt_adv = LPA_PAUSE_CAP; + if (phydev->asym_pause) + rmt_adv |= LPA_PAUSE_ASYM; + + if (phydev->advertising & ADVERTISED_Pause) + lcl_adv |= ADVERTISE_PAUSE_CAP; + if (phydev->advertising & ADVERTISED_Asym_Pause) + lcl_adv |= ADVERTISE_PAUSE_ASYM; + + flowctrl = mii_resolve_flowctrl_fdx(lcl_adv, rmt_adv); + + if (flowctrl & FLOW_CTRL_TX) + mcr |= PMCR_TX_FC_EN; + if (flowctrl & FLOW_CTRL_RX) + mcr |= PMCR_RX_FC_EN; + } + mt7530_write(priv, MT7530_PMCR_P(port), mcr); } } diff --git a/drivers/net/dsa/mt7530.h b/drivers/net/dsa/mt7530.h index b83d76b99802..74db9822eb40 100644 --- a/drivers/net/dsa/mt7530.h +++ b/drivers/net/dsa/mt7530.h @@ -151,6 +151,7 @@ enum mt7530_stp_state { #define PMCR_TX_FC_EN BIT(5) #define PMCR_RX_FC_EN BIT(4) #define PMCR_FORCE_SPEED_1000 BIT(3) +#define PMCR_FORCE_SPEED_100 BIT(2) #define PMCR_FORCE_FDX BIT(1) #define PMCR_FORCE_LNK BIT(0) #define PMCR_COMMON_LINK (PMCR_IFG_XMIT(1) | PMCR_MAC_MODE | \ -- cgit v1.2.3 From ec0acb09313074ba1a4976945791d9c6815f39fb Mon Sep 17 00:00:00 2001 From: Xin Long Date: Tue, 8 Aug 2017 15:25:25 +0800 Subject: net: sched: set xt_tgchk_param par.net properly in ipt_init_target Now xt_tgchk_param par in ipt_init_target is a local varibale, par.net is not initialized there. Later when xt_check_target calls target's checkentry in which it may access par.net, it would cause kernel panic. Jaroslav found this panic when running: # ip link add TestIface type dummy # tc qd add dev TestIface ingress handle ffff: # tc filter add dev TestIface parent ffff: u32 match u32 0 0 \ action xt -j CONNMARK --set-mark 4 This patch is to pass net param into ipt_init_target and set par.net with it properly in there. v1->v2: As Wang Cong pointed, I missed ipt_net_id != xt_net_id, so fix it by also passing net_id to __tcf_ipt_init. v2->v3: Missed the fixes tag, so add it. Fixes: ecb2421b5ddf ("netfilter: add and use nf_ct_netns_get/put") Reported-by: Jaroslav Aster Signed-off-by: Xin Long Acked-by: Jiri Pirko Signed-off-by: David S. Miller --- net/sched/act_ipt.c | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/net/sched/act_ipt.c b/net/sched/act_ipt.c index 36f0ced9e60c..94ba5cfab860 100644 --- a/net/sched/act_ipt.c +++ b/net/sched/act_ipt.c @@ -36,8 +36,8 @@ static struct tc_action_ops act_ipt_ops; static unsigned int xt_net_id; static struct tc_action_ops act_xt_ops; -static int ipt_init_target(struct xt_entry_target *t, char *table, - unsigned int hook) +static int ipt_init_target(struct net *net, struct xt_entry_target *t, + char *table, unsigned int hook) { struct xt_tgchk_param par; struct xt_target *target; @@ -49,6 +49,7 @@ static int ipt_init_target(struct xt_entry_target *t, char *table, return PTR_ERR(target); t->u.kernel.target = target; + par.net = net; par.table = table; par.entryinfo = NULL; par.target = target; @@ -91,10 +92,11 @@ static const struct nla_policy ipt_policy[TCA_IPT_MAX + 1] = { [TCA_IPT_TARG] = { .len = sizeof(struct xt_entry_target) }, }; -static int __tcf_ipt_init(struct tc_action_net *tn, struct nlattr *nla, +static int __tcf_ipt_init(struct net *net, unsigned int id, struct nlattr *nla, struct nlattr *est, struct tc_action **a, const struct tc_action_ops *ops, int ovr, int bind) { + struct tc_action_net *tn = net_generic(net, id); struct nlattr *tb[TCA_IPT_MAX + 1]; struct tcf_ipt *ipt; struct xt_entry_target *td, *t; @@ -159,7 +161,7 @@ static int __tcf_ipt_init(struct tc_action_net *tn, struct nlattr *nla, if (unlikely(!t)) goto err2; - err = ipt_init_target(t, tname, hook); + err = ipt_init_target(net, t, tname, hook); if (err < 0) goto err3; @@ -193,18 +195,16 @@ static int tcf_ipt_init(struct net *net, struct nlattr *nla, struct nlattr *est, struct tc_action **a, int ovr, int bind) { - struct tc_action_net *tn = net_generic(net, ipt_net_id); - - return __tcf_ipt_init(tn, nla, est, a, &act_ipt_ops, ovr, bind); + return __tcf_ipt_init(net, ipt_net_id, nla, est, a, &act_ipt_ops, ovr, + bind); } static int tcf_xt_init(struct net *net, struct nlattr *nla, struct nlattr *est, struct tc_action **a, int ovr, int bind) { - struct tc_action_net *tn = net_generic(net, xt_net_id); - - return __tcf_ipt_init(tn, nla, est, a, &act_xt_ops, ovr, bind); + return __tcf_ipt_init(net, xt_net_id, nla, est, a, &act_xt_ops, ovr, + bind); } static int tcf_ipt(struct sk_buff *skb, const struct tc_action *a, -- cgit v1.2.3 From 8ba60924710cde564a3905588b6219741d6356d0 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 8 Aug 2017 01:41:58 -0700 Subject: tcp: fastopen: tcp_connect() must refresh the route With new TCP_FASTOPEN_CONNECT socket option, there is a possibility to call tcp_connect() while socket sk_dst_cache is either NULL or invalid. +0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 4 +0 fcntl(4, F_SETFL, O_RDWR|O_NONBLOCK) = 0 +0 setsockopt(4, SOL_TCP, TCP_FASTOPEN_CONNECT, [1], 4) = 0 +0 connect(4, ..., ...) = 0 << sk->sk_dst_cache becomes obsolete, or even set to NULL >> +1 sendto(4, ..., 1000, MSG_FASTOPEN, ..., ...) = 1000 We need to refresh the route otherwise bad things can happen, especially when syzkaller is running on the host :/ Fixes: 19f6d3f3c8422 ("net/tcp-fastopen: Add new API support") Reported-by: Dmitry Vyukov Signed-off-by: Eric Dumazet Cc: Wei Wang Cc: Yuchung Cheng Acked-by: Wei Wang Acked-by: Yuchung Cheng Signed-off-by: David S. Miller --- net/ipv4/tcp_output.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 276406a83a37..b7661a68d498 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -3436,6 +3436,10 @@ int tcp_connect(struct sock *sk) int err; tcp_call_bpf(sk, BPF_SOCK_OPS_TCP_CONNECT_CB); + + if (inet_csk(sk)->icsk_af_ops->rebuild_header(sk)) + return -EHOSTUNREACH; /* Routing failure or similar. */ + tcp_connect_init(sk); if (unlikely(tp->repair)) { -- cgit v1.2.3 From 05bfd7dbb53a10be4a3e7aebaeec04b558198d49 Mon Sep 17 00:00:00 2001 From: Håkon Bugge Date: Tue, 8 Aug 2017 11:13:32 +0200 Subject: rds: Reintroduce statistics counting MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In commit 7e3f2952eeb1 ("rds: don't let RDS shutdown a connection while senders are present"), refilling the receive queue was removed from rds_ib_recv(), along with the increment of s_ib_rx_refill_from_thread. Commit 73ce4317bf98 ("RDS: make sure we post recv buffers") re-introduces filling the receive queue from rds_ib_recv(), but does not add the statistics counter. rds_ib_recv() was later renamed to rds_ib_recv_path(). This commit reintroduces the statistics counting of s_ib_rx_refill_from_thread and s_ib_rx_refill_from_cq. Signed-off-by: Håkon Bugge Reviewed-by: Knut Omang Reviewed-by: Wei Lin Guay Reviewed-by: Shamir Rabinovitch Acked-by: Santosh Shilimkar Signed-off-by: David S. Miller --- net/rds/ib_recv.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/net/rds/ib_recv.c b/net/rds/ib_recv.c index e10624aa6959..9722bf839d9d 100644 --- a/net/rds/ib_recv.c +++ b/net/rds/ib_recv.c @@ -1015,8 +1015,10 @@ void rds_ib_recv_cqe_handler(struct rds_ib_connection *ic, if (rds_ib_ring_empty(&ic->i_recv_ring)) rds_ib_stats_inc(s_ib_rx_ring_empty); - if (rds_ib_ring_low(&ic->i_recv_ring)) + if (rds_ib_ring_low(&ic->i_recv_ring)) { rds_ib_recv_refill(conn, 0, GFP_NOWAIT); + rds_ib_stats_inc(s_ib_rx_refill_from_cq); + } } int rds_ib_recv_path(struct rds_conn_path *cp) @@ -1029,6 +1031,7 @@ int rds_ib_recv_path(struct rds_conn_path *cp) if (rds_conn_up(conn)) { rds_ib_attempt_ack(ic); rds_ib_recv_refill(conn, 0, GFP_KERNEL); + rds_ib_stats_inc(s_ib_rx_refill_from_thread); } return ret; -- cgit v1.2.3 From 0a0e1a85c83775a648041be2b15de6d0a2f2b8eb Mon Sep 17 00:00:00 2001 From: Guillaume Nault Date: Tue, 8 Aug 2017 11:43:24 +0200 Subject: ppp: fix xmit recursion detection on ppp channels Commit e5dadc65f9e0 ("ppp: Fix false xmit recursion detect with two ppp devices") dropped the xmit_recursion counter incrementation in ppp_channel_push() and relied on ppp_xmit_process() for this task. But __ppp_channel_push() can also send packets directly (using the .start_xmit() channel callback), in which case the xmit_recursion counter isn't incremented anymore. If such packets get routed back to the parent ppp unit, ppp_xmit_process() won't notice the recursion and will call ppp_channel_push() on the same channel, effectively creating the deadlock situation that the xmit_recursion mechanism was supposed to prevent. This patch re-introduces the xmit_recursion counter incrementation in ppp_channel_push(). Since the xmit_recursion variable is now part of the parent ppp unit, incrementation is skipped if the channel doesn't have any. This is fine because only packets routed through the parent unit may enter the channel recursively. Finally, we have to ensure that pch->ppp is not going to be modified while executing ppp_channel_push(). Instead of taking this lock only while calling ppp_xmit_process(), we now have to hold it for the full ppp_channel_push() execution. This respects the ppp locks ordering which requires locking ->upl before ->downl. Fixes: e5dadc65f9e0 ("ppp: Fix false xmit recursion detect with two ppp devices") Signed-off-by: Guillaume Nault Signed-off-by: David S. Miller --- drivers/net/ppp/ppp_generic.c | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/drivers/net/ppp/ppp_generic.c b/drivers/net/ppp/ppp_generic.c index bd4303944e44..a404552555d4 100644 --- a/drivers/net/ppp/ppp_generic.c +++ b/drivers/net/ppp/ppp_generic.c @@ -1915,21 +1915,23 @@ static void __ppp_channel_push(struct channel *pch) spin_unlock(&pch->downl); /* see if there is anything from the attached unit to be sent */ if (skb_queue_empty(&pch->file.xq)) { - read_lock(&pch->upl); ppp = pch->ppp; if (ppp) - ppp_xmit_process(ppp); - read_unlock(&pch->upl); + __ppp_xmit_process(ppp); } } static void ppp_channel_push(struct channel *pch) { - local_bh_disable(); - - __ppp_channel_push(pch); - - local_bh_enable(); + read_lock_bh(&pch->upl); + if (pch->ppp) { + (*this_cpu_ptr(pch->ppp->xmit_recursion))++; + __ppp_channel_push(pch); + (*this_cpu_ptr(pch->ppp->xmit_recursion))--; + } else { + __ppp_channel_push(pch); + } + read_unlock_bh(&pch->upl); } /* -- cgit v1.2.3 From bbae08e592706dc32e5c7c97827b13c1c178668b Mon Sep 17 00:00:00 2001 From: Bjørn Mork Date: Tue, 8 Aug 2017 18:02:11 +0200 Subject: qmi_wwan: fix NULL deref on disconnect MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit qmi_wwan_disconnect is called twice when disconnecting devices with separate control and data interfaces. The first invocation will set the interface data to NULL for both interfaces to flag that the disconnect has been handled. But the matching NULL check was left out when qmi_wwan_disconnect was added, resulting in this oops: usb 2-1.4: USB disconnect, device number 4 qmi_wwan 2-1.4:1.6 wwp0s29u1u4i6: unregister 'qmi_wwan' usb-0000:00:1d.0-1.4, WWAN/QMI device BUG: unable to handle kernel NULL pointer dereference at 00000000000000e0 IP: qmi_wwan_disconnect+0x25/0xc0 [qmi_wwan] PGD 0 P4D 0 Oops: 0000 [#1] SMP Modules linked in: CPU: 2 PID: 33 Comm: kworker/2:1 Tainted: G E 4.12.3-nr44-normandy-r1500619820+ #1 Hardware name: LENOVO 4291LR7/4291LR7, BIOS CBET4000 4.6-810-g50522254fb 07/21/2017 Workqueue: usb_hub_wq hub_event [usbcore] task: ffff8c882b716040 task.stack: ffffb8e800d84000 RIP: 0010:qmi_wwan_disconnect+0x25/0xc0 [qmi_wwan] RSP: 0018:ffffb8e800d87b38 EFLAGS: 00010246 RAX: 0000000000000000 RBX: 0000000000000000 RCX: 0000000000000000 RDX: 0000000000000001 RSI: ffff8c8824f3f1d0 RDI: ffff8c8824ef6400 RBP: ffff8c8824ef6400 R08: 0000000000000000 R09: 0000000000000000 R10: ffffb8e800d87780 R11: 0000000000000011 R12: ffffffffc07ea0e8 R13: ffff8c8824e2e000 R14: ffff8c8824e2e098 R15: 0000000000000000 FS: 0000000000000000(0000) GS:ffff8c8835300000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00000000000000e0 CR3: 0000000229ca5000 CR4: 00000000000406e0 Call Trace: ? usb_unbind_interface+0x71/0x270 [usbcore] ? device_release_driver_internal+0x154/0x210 ? qmi_wwan_unbind+0x6d/0xc0 [qmi_wwan] ? usbnet_disconnect+0x6c/0xf0 [usbnet] ? qmi_wwan_disconnect+0x87/0xc0 [qmi_wwan] ? usb_unbind_interface+0x71/0x270 [usbcore] ? device_release_driver_internal+0x154/0x210 Reported-and-tested-by: Nathaniel Roach Fixes: c6adf77953bc ("net: usb: qmi_wwan: add qmap mux protocol support") Cc: Daniele Palmas Signed-off-by: Bjørn Mork Signed-off-by: David S. Miller --- drivers/net/usb/qmi_wwan.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/net/usb/qmi_wwan.c b/drivers/net/usb/qmi_wwan.c index ff6f39fe6c00..8c3733608271 100644 --- a/drivers/net/usb/qmi_wwan.c +++ b/drivers/net/usb/qmi_wwan.c @@ -1341,10 +1341,14 @@ static int qmi_wwan_probe(struct usb_interface *intf, static void qmi_wwan_disconnect(struct usb_interface *intf) { struct usbnet *dev = usb_get_intfdata(intf); - struct qmi_wwan_state *info = (void *)&dev->data; + struct qmi_wwan_state *info; struct list_head *iter; struct net_device *ldev; + /* called twice if separate control and data intf */ + if (!dev) + return; + info = (void *)&dev->data; if (info->flags & QMI_WWAN_FLAG_MUX) { if (!rtnl_trylock()) { restart_syscall(); -- cgit v1.2.3 From 8d63bee643f1fb53e472f0e135cae4eb99d62d19 Mon Sep 17 00:00:00 2001 From: Willem de Bruijn Date: Tue, 8 Aug 2017 14:22:55 -0400 Subject: net: avoid skb_warn_bad_offload false positives on UFO skb_warn_bad_offload triggers a warning when an skb enters the GSO stack at __skb_gso_segment that does not have CHECKSUM_PARTIAL checksum offload set. Commit b2504a5dbef3 ("net: reduce skb_warn_bad_offload() noise") observed that SKB_GSO_DODGY producers can trigger the check and that passing those packets through the GSO handlers will fix it up. But, the software UFO handler will set ip_summed to CHECKSUM_NONE. When __skb_gso_segment is called from the receive path, this triggers the warning again. Make UFO set CHECKSUM_UNNECESSARY instead of CHECKSUM_NONE. On Tx these two are equivalent. On Rx, this better matches the skb state (checksum computed), as CHECKSUM_NONE here means no checksum computed. See also this thread for context: http://patchwork.ozlabs.org/patch/799015/ Fixes: b2504a5dbef3 ("net: reduce skb_warn_bad_offload() noise") Signed-off-by: Willem de Bruijn Signed-off-by: David S. Miller --- net/core/dev.c | 2 +- net/ipv4/udp_offload.c | 2 +- net/ipv6/udp_offload.c | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/net/core/dev.c b/net/core/dev.c index 8515f8fe0460..ce15a06d5558 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -2739,7 +2739,7 @@ static inline bool skb_needs_check(struct sk_buff *skb, bool tx_path) { if (tx_path) return skb->ip_summed != CHECKSUM_PARTIAL && - skb->ip_summed != CHECKSUM_NONE; + skb->ip_summed != CHECKSUM_UNNECESSARY; return skb->ip_summed == CHECKSUM_NONE; } diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c index 781250151d40..0932c85b42af 100644 --- a/net/ipv4/udp_offload.c +++ b/net/ipv4/udp_offload.c @@ -235,7 +235,7 @@ static struct sk_buff *udp4_ufo_fragment(struct sk_buff *skb, if (uh->check == 0) uh->check = CSUM_MANGLED_0; - skb->ip_summed = CHECKSUM_NONE; + skb->ip_summed = CHECKSUM_UNNECESSARY; /* If there is no outer header we can fake a checksum offload * due to the fact that we have already done the checksum in diff --git a/net/ipv6/udp_offload.c b/net/ipv6/udp_offload.c index a2267f80febb..e7d378c032cb 100644 --- a/net/ipv6/udp_offload.c +++ b/net/ipv6/udp_offload.c @@ -72,7 +72,7 @@ static struct sk_buff *udp6_ufo_fragment(struct sk_buff *skb, if (uh->check == 0) uh->check = CSUM_MANGLED_0; - skb->ip_summed = CHECKSUM_NONE; + skb->ip_summed = CHECKSUM_UNNECESSARY; /* If there is no outer header we can fake a checksum offload * due to the fact that we have already done the checksum in -- cgit v1.2.3 From 7e39a00d593133ca8fcd3eef0409685e7c895ee6 Mon Sep 17 00:00:00 2001 From: Avraham Stern Date: Wed, 26 Jul 2017 15:08:45 +0300 Subject: iwlwifi: mvm: start mac queues when deferred tx frames are purged In AP mode, if a station is removed just as it is adding a new stream, the queue in question will remain stopped and no more TX will happen in this queue, leading to connection failures and other problems. This is because under DQA, when tx is deferred because a queue needs to be allocated, the mac queue for that TID is stopped until the new stream is added. If at this point the station that this stream belongs to is removed, all the deferred tx frames are purged, but the mac queue is not restarted. As a result, all following tx on this queue will not be transmitted. Fix this by starting the relevant mac queues when the deferred tx frames are purged. Fixes: 24afba7690e4 ("iwlwifi: mvm: support bss dynamic alloc/dealloc of queues") Signed-off-by: Avraham Stern Signed-off-by: Luca Coelho --- drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c b/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c index c7b1e58e3384..ce901be5fba8 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c @@ -2597,8 +2597,18 @@ static void iwl_mvm_purge_deferred_tx_frames(struct iwl_mvm *mvm, spin_lock_bh(&mvm_sta->lock); for (i = 0; i <= IWL_MAX_TID_COUNT; i++) { tid_data = &mvm_sta->tid_data[i]; - while ((skb = __skb_dequeue(&tid_data->deferred_tx_frames))) + + while ((skb = __skb_dequeue(&tid_data->deferred_tx_frames))) { + struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb); + + /* + * The first deferred frame should've stopped the MAC + * queues, so we should never get a second deferred + * frame for the RA/TID. + */ + iwl_mvm_start_mac_queues(mvm, info->hw_queue); ieee80211_free_txskb(mvm->hw, skb); + } } spin_unlock_bh(&mvm_sta->lock); } -- cgit v1.2.3 From a600852a9d00be08c539307a42729fd46b0a654e Mon Sep 17 00:00:00 2001 From: Emmanuel Grumbach Date: Thu, 27 Jul 2017 15:34:12 +0300 Subject: iwlwifi: mvm: don't WARN when a legit race happens in A-MPDU When we start an Rx A-MPDU session, we first get the AddBA request, then we send an ADD_STA command to the firmware that will reply with a BAID which is a hardware resource that tracks the BA session. This BAID will appear on each and every frame that we get from the firwmare until the A-MPDU session is torn down. In the Rx path, we look at this BAID to manage the reordering buffer. This flow is inherently racy since the hardware will start to put the BAID in the frames it receives even if the firmware hasn't sent the response to the ADD_STA command. This basically means that the driver can get frames with a valid BAID that it doesn't know yet. When that happens, the driver used to WARN. Fix this by simply not WARN in this case. When the driver will know abou the BAID, it will initialise the relevant states and the next frame with a valid BAID will refresh them. Fixes: b915c10174fb ("iwlwifi: mvm: add reorder buffer per queue") Signed-off-by: Emmanuel Grumbach Signed-off-by: Luca Coelho --- drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c b/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c index f3e608196369..71c8b800ffa9 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c @@ -636,9 +636,9 @@ static bool iwl_mvm_reorder(struct iwl_mvm *mvm, baid_data = rcu_dereference(mvm->baid_map[baid]); if (!baid_data) { - WARN(!(reorder & IWL_RX_MPDU_REORDER_BA_OLD_SN), - "Received baid %d, but no data exists for this BAID\n", - baid); + IWL_DEBUG_RX(mvm, + "Got valid BAID but no baid allocated, bypass the re-ordering buffer. Baid %d reorder 0x%x\n", + baid, reorder); return false; } @@ -759,7 +759,9 @@ static void iwl_mvm_agg_rx_received(struct iwl_mvm *mvm, data = rcu_dereference(mvm->baid_map[baid]); if (!data) { - WARN_ON(!(reorder_data & IWL_RX_MPDU_REORDER_BA_OLD_SN)); + IWL_DEBUG_RX(mvm, + "Got valid BAID but no baid allocated, bypass the re-ordering buffer. Baid %d reorder 0x%x\n", + baid, reorder_data); goto out; } -- cgit v1.2.3 From 04c2cf34362f133be09878bd752f8b014318b59a Mon Sep 17 00:00:00 2001 From: Naftali Goldstein Date: Tue, 11 Jul 2017 10:07:25 +0300 Subject: mac80211: add api to start ba session timer expired flow Some drivers handle rx buffer reordering internally (and by extension handle also the rx ba session timer internally), but do not ofload the addba/delba negotiation. Add an api for these drivers to properly tear-down the ba session, including sending a delba. Signed-off-by: Naftali Goldstein Signed-off-by: Luca Coelho --- include/net/mac80211.h | 15 +++++++++++++++ net/mac80211/agg-rx.c | 22 +++++++++++++++++++++- 2 files changed, 36 insertions(+), 1 deletion(-) diff --git a/include/net/mac80211.h b/include/net/mac80211.h index b2b5419467cc..f8149ca192b4 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -5499,6 +5499,21 @@ static inline void ieee80211_stop_rx_ba_session_offl(struct ieee80211_vif *vif, ieee80211_manage_rx_ba_offl(vif, addr, tid + IEEE80211_NUM_TIDS); } +/** + * ieee80211_rx_ba_timer_expired - stop a Rx BA session due to timeout + * + * Some device drivers do not offload AddBa/DelBa negotiation, but handle rx + * buffer reording internally, and therefore also handle the session timer. + * + * Trigger the timeout flow, which sends a DelBa. + * + * @vif: &struct ieee80211_vif pointer from the add_interface callback + * @addr: station mac address + * @tid: the rx tid + */ +void ieee80211_rx_ba_timer_expired(struct ieee80211_vif *vif, + const u8 *addr, unsigned int tid); + /* Rate control API */ /** diff --git a/net/mac80211/agg-rx.c b/net/mac80211/agg-rx.c index 8708cbe8af5b..2b36eff5d97e 100644 --- a/net/mac80211/agg-rx.c +++ b/net/mac80211/agg-rx.c @@ -7,7 +7,7 @@ * Copyright 2006-2007 Jiri Benc * Copyright 2007, Michael Wu * Copyright 2007-2010, Intel Corporation - * Copyright(c) 2015 Intel Deutschland GmbH + * Copyright(c) 2015-2017 Intel Deutschland GmbH * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as @@ -466,3 +466,23 @@ void ieee80211_manage_rx_ba_offl(struct ieee80211_vif *vif, rcu_read_unlock(); } EXPORT_SYMBOL(ieee80211_manage_rx_ba_offl); + +void ieee80211_rx_ba_timer_expired(struct ieee80211_vif *vif, + const u8 *addr, unsigned int tid) +{ + struct ieee80211_sub_if_data *sdata = vif_to_sdata(vif); + struct ieee80211_local *local = sdata->local; + struct sta_info *sta; + + rcu_read_lock(); + sta = sta_info_get_bss(sdata, addr); + if (!sta) + goto unlock; + + set_bit(tid, sta->ampdu_mlme.tid_rx_timer_expired); + ieee80211_queue_work(&local->hw, &sta->ampdu_mlme.work); + + unlock: + rcu_read_unlock(); +} +EXPORT_SYMBOL(ieee80211_rx_ba_timer_expired); -- cgit v1.2.3 From 20fc690f38d17b8f961101a477a9aa0841fb6e20 Mon Sep 17 00:00:00 2001 From: Naftali Goldstein Date: Tue, 11 Jul 2017 10:07:32 +0300 Subject: iwlwifi: mvm: send delba upon rx ba session timeout When an RX block-ack session times out, the firmware, which offloads RX reordering but not the BA session negotiation, stops the session but doesn't send a DELBA. This causes the the session to remain active in the remote device, so no more BA sessions will be established, causing a severe throughput degradation due to the lack of aggregation. Use the new ieee80211_rx_ba_timer_expired API when the ba session timer expires, since this will tear down the ba session and also send a delba. The previous API used is intended for drivers that offload the addba/delba negotiation, but not the rx reordering, while our driver does the opposite. This patch depends on "mac80211: add api to start ba session timer expired flow". Signed-off-by: Naftali Goldstein Signed-off-by: Luca Coelho --- drivers/net/wireless/intel/iwlwifi/mvm/sta.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/sta.c b/drivers/net/wireless/intel/iwlwifi/mvm/sta.c index dcaef7c043ac..027ee5e72172 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/sta.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/sta.c @@ -291,8 +291,8 @@ static void iwl_mvm_rx_agg_session_expired(unsigned long data) goto unlock; mvm_sta = iwl_mvm_sta_from_mac80211(sta); - ieee80211_stop_rx_ba_session_offl(mvm_sta->vif, - sta->addr, ba_data->tid); + ieee80211_rx_ba_timer_expired(mvm_sta->vif, + sta->addr, ba_data->tid); unlock: rcu_read_unlock(); } -- cgit v1.2.3 From 3f8b23a09a87aa65df3e13129cb2d9cffcb394db Mon Sep 17 00:00:00 2001 From: Michał Mirosław Date: Wed, 9 Aug 2017 01:48:59 +0200 Subject: mmc: block: fix lockdep splat when removing mmc_block module MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fix lockdep splat introduced in v4.13-rc4. [ 266.297226] ------------[ cut here ]------------ [ 266.300078] WARNING: CPU: 2 PID: 176 at /mnt/src/jaja/git/tf300t/include/linux/blkdev.h:657 mmc_blk_remove_req+0xd0/0xe8 [mmc_block] [ 266.302937] Modules linked in: mmc_block(-) sdhci_tegra sdhci_pltfm sdhci pwrseq_simple pwrseq_emmc mmc_core [ 266.305941] CPU: 2 PID: 176 Comm: rmmod Tainted: G W 4.13.0-rc4mq-00208-gb691e67724b8-dirty #694 [ 266.308852] Hardware name: NVIDIA Tegra SoC (Flattened Device Tree) [ 266.311719] [] (unwind_backtrace) from [] (show_stack+0x18/0x1c) [ 266.314664] [] (show_stack) from [] (dump_stack+0x84/0x98) [ 266.317644] [] (dump_stack) from [] (__warn+0xf4/0x10c) [ 266.320542] [] (__warn) from [] (warn_slowpath_null+0x28/0x30) [ 266.323534] [] (warn_slowpath_null) from [] (mmc_blk_remove_req+0xd0/0xe8 [mmc_block]) [ 266.326568] [] (mmc_blk_remove_req [mmc_block]) from [] (mmc_blk_remove_parts.constprop.6+0x50/0x64 [mmc_block]) [ 266.329678] [] (mmc_blk_remove_parts.constprop.6 [mmc_block]) from [] (mmc_blk_remove+0x24/0x140 [mmc_block]) [ 266.332894] [] (mmc_blk_remove [mmc_block]) from [] (mmc_bus_remove+0x20/0x28 [mmc_core]) [ 266.336198] [] (mmc_bus_remove [mmc_core]) from [] (device_release_driver_internal+0x164/0x200) [ 266.339367] [] (device_release_driver_internal) from [] (driver_detach+0x40/0x74) [ 266.342537] [] (driver_detach) from [] (bus_remove_driver+0x68/0xdc) [ 266.345660] [] (bus_remove_driver) from [] (mmc_blk_exit+0xc/0x2cc [mmc_block]) [ 266.348875] [] (mmc_blk_exit [mmc_block]) from [] (SyS_delete_module+0x1c4/0x254) [ 266.352068] [] (SyS_delete_module) from [] (ret_fast_syscall+0x0/0x34) [ 266.355308] ---[ end trace f68728a0d3053b72 ]--- Fixes: 7c84b8b43d3d ("mmc: block: bypass the queue even if usage is present for hotplug") Signed-off-by: Michał Mirosław Reviewed-by: Shawn Lin Signed-off-by: Ulf Hansson --- drivers/mmc/core/block.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/mmc/core/block.c b/drivers/mmc/core/block.c index e5938c791330..f1bbfd389367 100644 --- a/drivers/mmc/core/block.c +++ b/drivers/mmc/core/block.c @@ -2170,7 +2170,9 @@ static void mmc_blk_remove_req(struct mmc_blk_data *md) * from being accepted. */ card = md->queue.card; + spin_lock_irq(md->queue.queue->queue_lock); queue_flag_set(QUEUE_FLAG_BYPASS, md->queue.queue); + spin_unlock_irq(md->queue.queue->queue_lock); blk_set_queue_dying(md->queue.queue); mmc_cleanup_queue(&md->queue); if (md->disk->flags & GENHD_FL_UP) { -- cgit v1.2.3 From 28389575a8cf933a5f3c378556b9f4d3cce0efd2 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Wed, 2 Aug 2017 16:40:47 +0800 Subject: crypto: ixp4xx - Fix error handling path in 'aead_perform()' In commit 0f987e25cb8a, the source processing has been moved in front of the destination processing, but the error handling path has not been modified accordingly. Free resources in the correct order to avoid some leaks. Cc: Fixes: 0f987e25cb8a ("crypto: ixp4xx - Fix false lastlen uninitialised warning") Reported-by: Christophe JAILLET Signed-off-by: Herbert Xu Reviewed-by: Arnd Bergmann --- drivers/crypto/ixp4xx_crypto.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/crypto/ixp4xx_crypto.c b/drivers/crypto/ixp4xx_crypto.c index 427cbe012729..dadc4a808df5 100644 --- a/drivers/crypto/ixp4xx_crypto.c +++ b/drivers/crypto/ixp4xx_crypto.c @@ -1073,7 +1073,7 @@ static int aead_perform(struct aead_request *req, int encrypt, req_ctx->hmac_virt = dma_pool_alloc(buffer_pool, flags, &crypt->icv_rev_aes); if (unlikely(!req_ctx->hmac_virt)) - goto free_buf_src; + goto free_buf_dst; if (!encrypt) { scatterwalk_map_and_copy(req_ctx->hmac_virt, req->src, cryptlen, authsize, 0); @@ -1088,10 +1088,10 @@ static int aead_perform(struct aead_request *req, int encrypt, BUG_ON(qmgr_stat_overflow(SEND_QID)); return -EINPROGRESS; -free_buf_src: - free_buf_chain(dev, req_ctx->src, crypt->src_buf); free_buf_dst: free_buf_chain(dev, req_ctx->dst, crypt->dst_buf); +free_buf_src: + free_buf_chain(dev, req_ctx->src, crypt->src_buf); crypt->ctl_flags = CTL_FLAG_UNUSED; return -ENOMEM; } -- cgit v1.2.3 From 8861249c740fc4af9ddc5aee321eafefb960d7c6 Mon Sep 17 00:00:00 2001 From: "megha.dey@linux.intel.com" Date: Wed, 2 Aug 2017 13:49:09 -0700 Subject: crypto: x86/sha1 - Fix reads beyond the number of blocks passed It was reported that the sha1 AVX2 function(sha1_transform_avx2) is reading ahead beyond its intended data, and causing a crash if the next block is beyond page boundary: http://marc.info/?l=linux-crypto-vger&m=149373371023377 This patch makes sure that there is no overflow for any buffer length. It passes the tests written by Jan Stancek that revealed this problem: https://github.com/jstancek/sha1-avx2-crash I have re-enabled sha1-avx2 by reverting commit b82ce24426a4071da9529d726057e4e642948667 Cc: Fixes: b82ce24426a4 ("crypto: sha1-ssse3 - Disable avx2") Originally-by: Ilya Albrekht Tested-by: Jan Stancek Signed-off-by: Megha Dey Reported-by: Jan Stancek Signed-off-by: Herbert Xu --- arch/x86/crypto/sha1_avx2_x86_64_asm.S | 67 ++++++++++++++++++---------------- arch/x86/crypto/sha1_ssse3_glue.c | 2 +- 2 files changed, 37 insertions(+), 32 deletions(-) diff --git a/arch/x86/crypto/sha1_avx2_x86_64_asm.S b/arch/x86/crypto/sha1_avx2_x86_64_asm.S index 1cd792db15ef..1eab79c9ac48 100644 --- a/arch/x86/crypto/sha1_avx2_x86_64_asm.S +++ b/arch/x86/crypto/sha1_avx2_x86_64_asm.S @@ -117,11 +117,10 @@ .set T1, REG_T1 .endm -#define K_BASE %r8 #define HASH_PTR %r9 +#define BLOCKS_CTR %r8 #define BUFFER_PTR %r10 #define BUFFER_PTR2 %r13 -#define BUFFER_END %r11 #define PRECALC_BUF %r14 #define WK_BUF %r15 @@ -205,14 +204,14 @@ * blended AVX2 and ALU instruction scheduling * 1 vector iteration per 8 rounds */ - vmovdqu ((i * 2) + PRECALC_OFFSET)(BUFFER_PTR), W_TMP + vmovdqu (i * 2)(BUFFER_PTR), W_TMP .elseif ((i & 7) == 1) - vinsertf128 $1, (((i-1) * 2)+PRECALC_OFFSET)(BUFFER_PTR2),\ + vinsertf128 $1, ((i-1) * 2)(BUFFER_PTR2),\ WY_TMP, WY_TMP .elseif ((i & 7) == 2) vpshufb YMM_SHUFB_BSWAP, WY_TMP, WY .elseif ((i & 7) == 4) - vpaddd K_XMM(K_BASE), WY, WY_TMP + vpaddd K_XMM + K_XMM_AR(%rip), WY, WY_TMP .elseif ((i & 7) == 7) vmovdqu WY_TMP, PRECALC_WK(i&~7) @@ -255,7 +254,7 @@ vpxor WY, WY_TMP, WY_TMP .elseif ((i & 7) == 7) vpxor WY_TMP2, WY_TMP, WY - vpaddd K_XMM(K_BASE), WY, WY_TMP + vpaddd K_XMM + K_XMM_AR(%rip), WY, WY_TMP vmovdqu WY_TMP, PRECALC_WK(i&~7) PRECALC_ROTATE_WY @@ -291,7 +290,7 @@ vpsrld $30, WY, WY vpor WY, WY_TMP, WY .elseif ((i & 7) == 7) - vpaddd K_XMM(K_BASE), WY, WY_TMP + vpaddd K_XMM + K_XMM_AR(%rip), WY, WY_TMP vmovdqu WY_TMP, PRECALC_WK(i&~7) PRECALC_ROTATE_WY @@ -446,6 +445,16 @@ .endm +/* Add constant only if (%2 > %3) condition met (uses RTA as temp) + * %1 + %2 >= %3 ? %4 : 0 + */ +.macro ADD_IF_GE a, b, c, d + mov \a, RTA + add $\d, RTA + cmp $\c, \b + cmovge RTA, \a +.endm + /* * macro implements 80 rounds of SHA-1, for multiple blocks with s/w pipelining */ @@ -463,13 +472,16 @@ lea (2*4*80+32)(%rsp), WK_BUF # Precalc WK for first 2 blocks - PRECALC_OFFSET = 0 + ADD_IF_GE BUFFER_PTR2, BLOCKS_CTR, 2, 64 .set i, 0 .rept 160 PRECALC i .set i, i + 1 .endr - PRECALC_OFFSET = 128 + + /* Go to next block if needed */ + ADD_IF_GE BUFFER_PTR, BLOCKS_CTR, 3, 128 + ADD_IF_GE BUFFER_PTR2, BLOCKS_CTR, 4, 128 xchg WK_BUF, PRECALC_BUF .align 32 @@ -479,8 +491,8 @@ _loop: * we use K_BASE value as a signal of a last block, * it is set below by: cmovae BUFFER_PTR, K_BASE */ - cmp K_BASE, BUFFER_PTR - jne _begin + test BLOCKS_CTR, BLOCKS_CTR + jnz _begin .align 32 jmp _end .align 32 @@ -512,10 +524,10 @@ _loop0: .set j, j+2 .endr - add $(2*64), BUFFER_PTR /* move to next odd-64-byte block */ - cmp BUFFER_END, BUFFER_PTR /* is current block the last one? */ - cmovae K_BASE, BUFFER_PTR /* signal the last iteration smartly */ - + /* Update Counter */ + sub $1, BLOCKS_CTR + /* Move to the next block only if needed*/ + ADD_IF_GE BUFFER_PTR, BLOCKS_CTR, 4, 128 /* * rounds * 60,62,64,66,68 @@ -532,8 +544,8 @@ _loop0: UPDATE_HASH 12(HASH_PTR), D UPDATE_HASH 16(HASH_PTR), E - cmp K_BASE, BUFFER_PTR /* is current block the last one? */ - je _loop + test BLOCKS_CTR, BLOCKS_CTR + jz _loop mov TB, B @@ -575,10 +587,10 @@ _loop2: .set j, j+2 .endr - add $(2*64), BUFFER_PTR2 /* move to next even-64-byte block */ - - cmp BUFFER_END, BUFFER_PTR2 /* is current block the last one */ - cmovae K_BASE, BUFFER_PTR /* signal the last iteration smartly */ + /* update counter */ + sub $1, BLOCKS_CTR + /* Move to the next block only if needed*/ + ADD_IF_GE BUFFER_PTR2, BLOCKS_CTR, 4, 128 jmp _loop3 _loop3: @@ -641,19 +653,12 @@ _loop3: avx2_zeroupper - lea K_XMM_AR(%rip), K_BASE - + /* Setup initial values */ mov CTX, HASH_PTR mov BUF, BUFFER_PTR - lea 64(BUF), BUFFER_PTR2 - - shl $6, CNT /* mul by 64 */ - add BUF, CNT - add $64, CNT - mov CNT, BUFFER_END - cmp BUFFER_END, BUFFER_PTR2 - cmovae K_BASE, BUFFER_PTR2 + mov BUF, BUFFER_PTR2 + mov CNT, BLOCKS_CTR xmm_mov BSWAP_SHUFB_CTL(%rip), YMM_SHUFB_BSWAP diff --git a/arch/x86/crypto/sha1_ssse3_glue.c b/arch/x86/crypto/sha1_ssse3_glue.c index f960a043cdeb..fc61739150e7 100644 --- a/arch/x86/crypto/sha1_ssse3_glue.c +++ b/arch/x86/crypto/sha1_ssse3_glue.c @@ -201,7 +201,7 @@ asmlinkage void sha1_transform_avx2(u32 *digest, const char *data, static bool avx2_usable(void) { - if (false && avx_usable() && boot_cpu_has(X86_FEATURE_AVX2) + if (avx_usable() && boot_cpu_has(X86_FEATURE_AVX2) && boot_cpu_has(X86_FEATURE_BMI1) && boot_cpu_has(X86_FEATURE_BMI2)) return true; -- cgit v1.2.3 From 7310d5c8c55e8987a854507f71022f8de676bbf4 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Wed, 9 Aug 2017 20:57:55 +1000 Subject: powerpc/configs: Re-enable HARD/SOFT lockup detectors In commit 05a4a9527931 ("kernel/watchdog: split up config options"), CONFIG_LOCKUP_DETECTOR was split into two separate config options, CONFIG_HARDLOCKUP_DETECTOR and CONFIG_SOFTLOCKUP_DETECTOR. Our defconfigs still have CONFIG_LOCKUP_DETECTOR=y, but that is no longer user selectable, and we don't mention the new options, so we end up with none of them enabled. So update the defconfigs to turn on the new SOFT and HARD options, the end result being the same as what we had previously. Fixes: 05a4a9527931 ("kernel/watchdog: split up config options") Signed-off-by: Michael Ellerman --- arch/powerpc/configs/powernv_defconfig | 3 ++- arch/powerpc/configs/ppc64_defconfig | 3 ++- arch/powerpc/configs/pseries_defconfig | 3 ++- 3 files changed, 6 insertions(+), 3 deletions(-) diff --git a/arch/powerpc/configs/powernv_defconfig b/arch/powerpc/configs/powernv_defconfig index 0695ce047d56..34fc9bbfca9e 100644 --- a/arch/powerpc/configs/powernv_defconfig +++ b/arch/powerpc/configs/powernv_defconfig @@ -293,7 +293,8 @@ CONFIG_MAGIC_SYSRQ=y CONFIG_DEBUG_KERNEL=y CONFIG_DEBUG_STACK_USAGE=y CONFIG_DEBUG_STACKOVERFLOW=y -CONFIG_LOCKUP_DETECTOR=y +CONFIG_SOFTLOCKUP_DETECTOR=y +CONFIG_HARDLOCKUP_DETECTOR=y CONFIG_LATENCYTOP=y CONFIG_SCHED_TRACER=y CONFIG_BLK_DEV_IO_TRACE=y diff --git a/arch/powerpc/configs/ppc64_defconfig b/arch/powerpc/configs/ppc64_defconfig index 5175028c56ce..c5246d29f385 100644 --- a/arch/powerpc/configs/ppc64_defconfig +++ b/arch/powerpc/configs/ppc64_defconfig @@ -324,7 +324,8 @@ CONFIG_MAGIC_SYSRQ=y CONFIG_DEBUG_KERNEL=y CONFIG_DEBUG_STACK_USAGE=y CONFIG_DEBUG_STACKOVERFLOW=y -CONFIG_LOCKUP_DETECTOR=y +CONFIG_SOFTLOCKUP_DETECTOR=y +CONFIG_HARDLOCKUP_DETECTOR=y CONFIG_DEBUG_MUTEXES=y CONFIG_LATENCYTOP=y CONFIG_SCHED_TRACER=y diff --git a/arch/powerpc/configs/pseries_defconfig b/arch/powerpc/configs/pseries_defconfig index 1a61aa20dfba..fd5d98a0b95c 100644 --- a/arch/powerpc/configs/pseries_defconfig +++ b/arch/powerpc/configs/pseries_defconfig @@ -291,7 +291,8 @@ CONFIG_MAGIC_SYSRQ=y CONFIG_DEBUG_KERNEL=y CONFIG_DEBUG_STACK_USAGE=y CONFIG_DEBUG_STACKOVERFLOW=y -CONFIG_LOCKUP_DETECTOR=y +CONFIG_SOFTLOCKUP_DETECTOR=y +CONFIG_HARDLOCKUP_DETECTOR=y CONFIG_LATENCYTOP=y CONFIG_SCHED_TRACER=y CONFIG_BLK_DEV_IO_TRACE=y -- cgit v1.2.3 From 0459ddfdb31e7d812b555a2530ecbacdf96961a6 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Wed, 9 Aug 2017 22:41:21 +1000 Subject: powerpc: NMI IPI improve lock primitive When the NMI IPI lock is contended, spin at low SMT priority, using loads only, and with interrupts enabled (where possible). This improves behaviour under high contention (e.g., a system crash when a number of CPUs are trying to enter the debugger). Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/smp.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c index cf0e1245b8cc..8d3320562c70 100644 --- a/arch/powerpc/kernel/smp.c +++ b/arch/powerpc/kernel/smp.c @@ -351,7 +351,7 @@ static void nmi_ipi_lock_start(unsigned long *flags) hard_irq_disable(); while (atomic_cmpxchg(&__nmi_ipi_lock, 0, 1) == 1) { raw_local_irq_restore(*flags); - cpu_relax(); + spin_until_cond(atomic_read(&__nmi_ipi_lock) == 0); raw_local_irq_save(*flags); hard_irq_disable(); } @@ -360,7 +360,7 @@ static void nmi_ipi_lock_start(unsigned long *flags) static void nmi_ipi_lock(void) { while (atomic_cmpxchg(&__nmi_ipi_lock, 0, 1) == 1) - cpu_relax(); + spin_until_cond(atomic_read(&__nmi_ipi_lock) == 0); } static void nmi_ipi_unlock(void) @@ -475,7 +475,7 @@ int smp_send_nmi_ipi(int cpu, void (*fn)(struct pt_regs *), u64 delay_us) nmi_ipi_lock_start(&flags); while (nmi_ipi_busy_count) { nmi_ipi_unlock_end(&flags); - cpu_relax(); + spin_until_cond(nmi_ipi_busy_count == 0); nmi_ipi_lock_start(&flags); } -- cgit v1.2.3 From d8e2a4053574002135fbb032c2e74f1d1dbb2103 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Wed, 9 Aug 2017 22:41:22 +1000 Subject: powerpc/watchdog: Improve watchdog lock primitive - Hard-disable interrupts before taking the lock, which prevents soft-NMI re-entrancy and therefore can prevent deadlocks. - Use raw_ variants of local_irq_disable to avoid irq debugging. - When the lock is contended, spin at low SMT priority, using loads only, and with interrupts enabled (where possible). Some stalls have been noticed at high loads that go away with improved locking. There should not be so much locking contention in the first place (which is addressed in a subsequent patch), but locking should still be improved. Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/watchdog.c | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/arch/powerpc/kernel/watchdog.c b/arch/powerpc/kernel/watchdog.c index b67f8b03a32d..fda4d044d326 100644 --- a/arch/powerpc/kernel/watchdog.c +++ b/arch/powerpc/kernel/watchdog.c @@ -71,15 +71,20 @@ static inline void wd_smp_lock(unsigned long *flags) * This may be called from low level interrupt handlers at some * point in future. */ - local_irq_save(*flags); - while (unlikely(test_and_set_bit_lock(0, &__wd_smp_lock))) - cpu_relax(); + raw_local_irq_save(*flags); + hard_irq_disable(); /* Make it soft-NMI safe */ + while (unlikely(test_and_set_bit_lock(0, &__wd_smp_lock))) { + raw_local_irq_restore(*flags); + spin_until_cond(!test_bit(0, &__wd_smp_lock)); + raw_local_irq_save(*flags); + hard_irq_disable(); + } } static inline void wd_smp_unlock(unsigned long *flags) { clear_bit_unlock(0, &__wd_smp_lock); - local_irq_restore(*flags); + raw_local_irq_restore(*flags); } static void wd_lockup_ipi(struct pt_regs *regs) -- cgit v1.2.3 From 26c5c6e129ee725f103938262a034861ada467ae Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Wed, 9 Aug 2017 22:41:23 +1000 Subject: powerpc/watchdog: Moderate touch_nmi_watchdog overhead Some code can go into a tight loop calling touch_nmi_watchdog (e.g., stop_machine CPU hotplug code). This can cause contention on watchdog locks particularly if all CPUs with watchdog enabled are spinning in the loops. Avoid this storm of activity by running the watchdog timer callback from this path if we have exceeded the timer period since it was last run. Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/watchdog.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/kernel/watchdog.c b/arch/powerpc/kernel/watchdog.c index fda4d044d326..426dd34891d6 100644 --- a/arch/powerpc/kernel/watchdog.c +++ b/arch/powerpc/kernel/watchdog.c @@ -263,9 +263,11 @@ static void wd_timer_fn(unsigned long data) void arch_touch_nmi_watchdog(void) { + unsigned long ticks = tb_ticks_per_usec * wd_timer_period_ms * 1000; int cpu = smp_processor_id(); - watchdog_timer_interrupt(cpu); + if (get_tb() - per_cpu(wd_timer_tb, cpu) >= ticks) + watchdog_timer_interrupt(cpu); } EXPORT_SYMBOL(arch_touch_nmi_watchdog); -- cgit v1.2.3 From 8e23692175ad465628b8c86c1acc154fecad97be Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Wed, 9 Aug 2017 22:41:24 +1000 Subject: powerpc/watchdog: Fix final-check recovered case When the watchdog decides to panic, it takes the lock and double checks everything (to avoid races with the CPU being unstuck or panic()ed by something else). The exit label was misplaced and would result in all-CPUs backtrace and watchdog panic even in the case that the condition was found to be resolved. Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/watchdog.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/kernel/watchdog.c b/arch/powerpc/kernel/watchdog.c index 426dd34891d6..7d16dafa1bb6 100644 --- a/arch/powerpc/kernel/watchdog.c +++ b/arch/powerpc/kernel/watchdog.c @@ -144,7 +144,6 @@ static void watchdog_smp_panic(int cpu, u64 tb) for_each_cpu(c, &wd_smp_cpus_pending) set_cpu_stuck(c, tb); -out: wd_smp_unlock(&flags); printk_safe_flush(); @@ -157,6 +156,11 @@ out: if (hardlockup_panic) nmi_panic(NULL, "Hard LOCKUP"); + + return; + +out: + wd_smp_unlock(&flags); } static void wd_smp_clear_cpu_pending(int cpu, u64 tb) -- cgit v1.2.3 From 87607a30be92f1ecee3af6f4a5779e179db98118 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Wed, 9 Aug 2017 22:41:25 +1000 Subject: powerpc/watchdog: Fix marking of stuck CPUs When the SMP detector finds other CPUs stuck, it iterates over them and marks them as stuck. This pulls them out of the pending mask and allows the detector to continue with remaining good CPUs (if nmi_watchdog=panic is not enabled). The code to dothat was buggy because when setting a CPU stuck, if the pending mask became empty, it resets it to keep the watchdog running. However the iterator will continue to run over the new pending mask and mark remaining good CPUs sas stuck. Fix this by doing it with cpumask bitwise operations. Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/watchdog.c | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/arch/powerpc/kernel/watchdog.c b/arch/powerpc/kernel/watchdog.c index 7d16dafa1bb6..12e90ae712fe 100644 --- a/arch/powerpc/kernel/watchdog.c +++ b/arch/powerpc/kernel/watchdog.c @@ -101,10 +101,10 @@ static void wd_lockup_ipi(struct pt_regs *regs) nmi_panic(regs, "Hard LOCKUP"); } -static void set_cpu_stuck(int cpu, u64 tb) +static void set_cpumask_stuck(const struct cpumask *cpumask, u64 tb) { - cpumask_set_cpu(cpu, &wd_smp_cpus_stuck); - cpumask_clear_cpu(cpu, &wd_smp_cpus_pending); + cpumask_or(&wd_smp_cpus_stuck, &wd_smp_cpus_stuck, cpumask); + cpumask_andnot(&wd_smp_cpus_pending, &wd_smp_cpus_pending, cpumask); if (cpumask_empty(&wd_smp_cpus_pending)) { wd_smp_last_reset_tb = tb; cpumask_andnot(&wd_smp_cpus_pending, @@ -112,6 +112,10 @@ static void set_cpu_stuck(int cpu, u64 tb) &wd_smp_cpus_stuck); } } +static void set_cpu_stuck(int cpu, u64 tb) +{ + set_cpumask_stuck(cpumask_of(cpu), tb); +} static void watchdog_smp_panic(int cpu, u64 tb) { @@ -140,9 +144,8 @@ static void watchdog_smp_panic(int cpu, u64 tb) } smp_flush_nmi_ipi(1000000); - /* Take the stuck CPU out of the watch group */ - for_each_cpu(c, &wd_smp_cpus_pending) - set_cpu_stuck(c, tb); + /* Take the stuck CPUs out of the watch group */ + set_cpumask_stuck(&wd_smp_cpus_pending, tb); wd_smp_unlock(&flags); -- cgit v1.2.3 From 96ea91e7b6ee2c406598d859e7348b4829404eea Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Wed, 9 Aug 2017 22:41:26 +1000 Subject: powerpc/watchdog: add locking around init/exit functions When CPUs start and stop the watchdog, they manipulate shared data that is normally protected by the lock. Other CPUs can be running concurrently at this time, so it's a good idea to use locking here to be on the safe side. Remove the barrier which is undocumented and didn't do anything. Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/watchdog.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/kernel/watchdog.c b/arch/powerpc/kernel/watchdog.c index 12e90ae712fe..34721a257a77 100644 --- a/arch/powerpc/kernel/watchdog.c +++ b/arch/powerpc/kernel/watchdog.c @@ -297,6 +297,8 @@ static void stop_watchdog_timer_on(unsigned int cpu) static int start_wd_on_cpu(unsigned int cpu) { + unsigned long flags; + if (cpumask_test_cpu(cpu, &wd_cpus_enabled)) { WARN_ON(1); return 0; @@ -311,12 +313,14 @@ static int start_wd_on_cpu(unsigned int cpu) if (!cpumask_test_cpu(cpu, &watchdog_cpumask)) return 0; + wd_smp_lock(&flags); cpumask_set_cpu(cpu, &wd_cpus_enabled); if (cpumask_weight(&wd_cpus_enabled) == 1) { cpumask_set_cpu(cpu, &wd_smp_cpus_pending); wd_smp_last_reset_tb = get_tb(); } - smp_wmb(); + wd_smp_unlock(&flags); + start_watchdog_timer_on(cpu); return 0; @@ -324,12 +328,17 @@ static int start_wd_on_cpu(unsigned int cpu) static int stop_wd_on_cpu(unsigned int cpu) { + unsigned long flags; + if (!cpumask_test_cpu(cpu, &wd_cpus_enabled)) return 0; /* Can happen in CPU unplug case */ stop_watchdog_timer_on(cpu); + wd_smp_lock(&flags); cpumask_clear_cpu(cpu, &wd_cpus_enabled); + wd_smp_unlock(&flags); + wd_smp_clear_cpu_pending(cpu, get_tb()); return 0; -- cgit v1.2.3 From 7ba190be873a86b2b90a25daca93ca1ced6c4423 Mon Sep 17 00:00:00 2001 From: Shuah Khan Date: Wed, 2 Aug 2017 15:47:33 -0600 Subject: selftests: futex: fix run_tests target make -C tools/testing/selftests/futex/ run_tests doesn't run the futex tests. Running the tests when `dirname $(OUTPUT)` == $(PWD) doesn't work when the $(OUTPUT) is $(PWD) which is the case when the test is run using make -C tools/testing/selftests/futex/ run_tests. Fixes: a8ba798bc8ec ("selftests: enable O and KBUILD_OUTPUT") Signed-off-by: Shuah Khan Reviewed-by: Darren Hart (VMware) Signed-off-by: Shuah Khan --- tools/testing/selftests/futex/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/futex/Makefile b/tools/testing/selftests/futex/Makefile index e2fbb890aef9..7c647f619d63 100644 --- a/tools/testing/selftests/futex/Makefile +++ b/tools/testing/selftests/futex/Makefile @@ -14,7 +14,7 @@ all: done override define RUN_TESTS - @if [ `dirname $(OUTPUT)` = $(PWD) ]; then ./run.sh; fi + $(OUTPUT)/run.sh endef override define INSTALL_RULE -- cgit v1.2.3 From c0ca0e5934b3bd70d456b363a6a989a3b4a692f1 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 8 Aug 2017 21:39:28 -0400 Subject: NFSv4: Ignore NFS4ERR_OLD_STATEID in nfs41_check_open_stateid() If the call to TEST_STATEID returns NFS4ERR_OLD_STATEID, then it just means we raced with other calls to OPEN. Signed-off-by: Trond Myklebust Signed-off-by: Anna Schumaker --- fs/nfs/nfs4proc.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 3e46be8c9be1..cbf9cdd48a3b 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -2553,9 +2553,8 @@ static int nfs41_check_open_stateid(struct nfs4_state *state) clear_bit(NFS_O_RDWR_STATE, &state->flags); clear_bit(NFS_OPEN_STATE, &state->flags); stateid->type = NFS4_INVALID_STATEID_TYPE; - } - if (status != NFS_OK) return status; + } if (nfs_open_stateid_recover_openmode(state)) return -NFS4ERR_OPENMODE; return NFS_OK; -- cgit v1.2.3 From 48fb6f4db940e92cfb16cd878cddd59ea6120d06 Mon Sep 17 00:00:00 2001 From: Mel Gorman Date: Wed, 9 Aug 2017 08:27:11 +0100 Subject: futex: Remove unnecessary warning from get_futex_key Commit 65d8fc777f6d ("futex: Remove requirement for lock_page() in get_futex_key()") removed an unnecessary lock_page() with the side-effect that page->mapping needed to be treated very carefully. Two defensive warnings were added in case any assumption was missed and the first warning assumed a correct application would not alter a mapping backing a futex key. Since merging, it has not triggered for any unexpected case but Mark Rutland reported the following bug triggering due to the first warning. kernel BUG at kernel/futex.c:679! Internal error: Oops - BUG: 0 [#1] PREEMPT SMP Modules linked in: CPU: 0 PID: 3695 Comm: syz-executor1 Not tainted 4.13.0-rc3-00020-g307fec773ba3 #3 Hardware name: linux,dummy-virt (DT) task: ffff80001e271780 task.stack: ffff000010908000 PC is at get_futex_key+0x6a4/0xcf0 kernel/futex.c:679 LR is at get_futex_key+0x6a4/0xcf0 kernel/futex.c:679 pc : [] lr : [] pstate: 80000145 The fact that it's a bug instead of a warning was due to an unrelated arm64 problem, but the warning itself triggered because the underlying mapping changed. This is an application issue but from a kernel perspective it's a recoverable situation and the warning is unnecessary so this patch removes the warning. The warning may potentially be triggered with the following test program from Mark although it may be necessary to adjust NR_FUTEX_THREADS to be a value smaller than the number of CPUs in the system. #include #include #include #include #include #include #include #include #define NR_FUTEX_THREADS 16 pthread_t threads[NR_FUTEX_THREADS]; void *mem; #define MEM_PROT (PROT_READ | PROT_WRITE) #define MEM_SIZE 65536 static int futex_wrapper(int *uaddr, int op, int val, const struct timespec *timeout, int *uaddr2, int val3) { syscall(SYS_futex, uaddr, op, val, timeout, uaddr2, val3); } void *poll_futex(void *unused) { for (;;) { futex_wrapper(mem, FUTEX_CMP_REQUEUE_PI, 1, NULL, mem + 4, 1); } } int main(int argc, char *argv[]) { int i; mem = mmap(NULL, MEM_SIZE, MEM_PROT, MAP_SHARED | MAP_ANONYMOUS, -1, 0); printf("Mapping @ %p\n", mem); printf("Creating futex threads...\n"); for (i = 0; i < NR_FUTEX_THREADS; i++) pthread_create(&threads[i], NULL, poll_futex, NULL); printf("Flipping mapping...\n"); for (;;) { mmap(mem, MEM_SIZE, MEM_PROT, MAP_FIXED | MAP_SHARED | MAP_ANONYMOUS, -1, 0); } return 0; } Reported-and-tested-by: Mark Rutland Signed-off-by: Mel Gorman Acked-by: Peter Zijlstra (Intel) Cc: stable@vger.kernel.org # 4.7+ Signed-off-by: Linus Torvalds --- kernel/futex.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/kernel/futex.c b/kernel/futex.c index 16dbe4c93895..f50b434756c1 100644 --- a/kernel/futex.c +++ b/kernel/futex.c @@ -670,13 +670,14 @@ again: * this reference was taken by ihold under the page lock * pinning the inode in place so i_lock was unnecessary. The * only way for this check to fail is if the inode was - * truncated in parallel so warn for now if this happens. + * truncated in parallel which is almost certainly an + * application bug. In such a case, just retry. * * We are not calling into get_futex_key_refs() in file-backed * cases, therefore a successful atomic_inc return below will * guarantee that get_futex_key() will still imply smp_mb(); (B). */ - if (WARN_ON_ONCE(!atomic_inc_not_zero(&inode->i_count))) { + if (!atomic_inc_not_zero(&inode->i_count)) { rcu_read_unlock(); put_page(page); -- cgit v1.2.3 From 92f190aba2d57c91ef99e227e36461234c35eb7b Mon Sep 17 00:00:00 2001 From: Michał Mirosław Date: Wed, 9 Aug 2017 01:42:22 +0200 Subject: drm: make DRM_STM default n MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Default config value for all other drivers is N. Signed-off-by: Michał Mirosław Signed-off-by: Dave Airlie --- drivers/gpu/drm/stm/Kconfig | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/gpu/drm/stm/Kconfig b/drivers/gpu/drm/stm/Kconfig index 2c4817fb0890..8fe5b184b4e8 100644 --- a/drivers/gpu/drm/stm/Kconfig +++ b/drivers/gpu/drm/stm/Kconfig @@ -7,7 +7,6 @@ config DRM_STM select DRM_PANEL select VIDEOMODE_HELPERS select FB_PROVIDE_GET_FB_UNMAPPED_AREA - default y help Enable support for the on-chip display controller on -- cgit v1.2.3 From 372aa73e20197d463fc8b34524c20b089a98b1c3 Mon Sep 17 00:00:00 2001 From: Ben Skeggs Date: Thu, 10 Aug 2017 11:32:18 +1000 Subject: drm/nouveau/disp/nv04: avoid creation of output paths Fixes hitting WARN_ON() during initialisation of pre-NV50 GPUs, caused by the recent changes to support pad macro routing on GM20x. We currently don't use them here for older GPUs anyway. Signed-off-by: Ben Skeggs --- drivers/gpu/drm/nouveau/nvkm/engine/disp/base.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/base.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/base.c index c7c84d34d97e..88582af8bd89 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/base.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/base.c @@ -267,6 +267,8 @@ nvkm_disp_oneinit(struct nvkm_engine *engine) /* Create output path objects for each VBIOS display path. */ i = -1; while ((data = dcb_outp_parse(bios, ++i, &ver, &hdr, &dcbE))) { + if (ver < 0x40) /* No support for chipsets prior to NV50. */ + break; if (dcbE.type == DCB_OUTPUT_UNUSED) continue; if (dcbE.type == DCB_OUTPUT_EOL) -- cgit v1.2.3 From c775d2098d35bd130a883bbdf6af9401a8c4cb2d Mon Sep 17 00:00:00 2001 From: Milan Broz Date: Wed, 9 Aug 2017 17:47:26 +0200 Subject: bio-integrity: Fix regression if profile verify_fn is NULL In dm-integrity target we register integrity profile that have both generate_fn and verify_fn callbacks set to NULL. This is used if dm-integrity is stacked under a dm-crypt device for authenticated encryption (integrity payload contains authentication tag and IV seed). In this case the verification is done through own crypto API processing inside dm-crypt; integrity profile is only holder of these data. (And memory is owned by dm-crypt as well.) After the commit (and previous changes) Commit 7c20f11680a441df09de7235206f70115fbf6290 Author: Christoph Hellwig Date: Mon Jul 3 16:58:43 2017 -0600 bio-integrity: stop abusing bi_end_io we get this crash: : BUG: unable to handle kernel NULL pointer dereference at (null) : IP: (null) : *pde = 00000000 ... : : Workqueue: kintegrityd bio_integrity_verify_fn : task: f48ae180 task.stack: f4b5c000 : EIP: (null) : EFLAGS: 00210286 CPU: 0 : EAX: f4b5debc EBX: 00001000 ECX: 00000001 EDX: 00000000 : ESI: 00001000 EDI: ed25f000 EBP: f4b5dee8 ESP: f4b5dea4 : DS: 007b ES: 007b FS: 00d8 GS: 00e0 SS: 0068 : CR0: 80050033 CR2: 00000000 CR3: 32823000 CR4: 001406d0 : Call Trace: : ? bio_integrity_process+0xe3/0x1e0 : bio_integrity_verify_fn+0xea/0x150 : process_one_work+0x1c7/0x5c0 : worker_thread+0x39/0x380 : kthread+0xd6/0x110 : ? process_one_work+0x5c0/0x5c0 : ? kthread_worker_fn+0x100/0x100 : ? kthread_worker_fn+0x100/0x100 : ret_from_fork+0x19/0x24 : Code: Bad EIP value. : EIP: (null) SS:ESP: 0068:f4b5dea4 : CR2: 0000000000000000 Patch just skip the whole verify workqueue if verify_fn is set to NULL. Fixes: 7c20f116 ("bio-integrity: stop abusing bi_end_io") Signed-off-by: Milan Broz [hch: trivial whitespace fix] Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- block/bio-integrity.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/block/bio-integrity.c b/block/bio-integrity.c index 83e92beb3c9f..0fd9604974da 100644 --- a/block/bio-integrity.c +++ b/block/bio-integrity.c @@ -387,7 +387,10 @@ static void bio_integrity_verify_fn(struct work_struct *work) */ bool __bio_integrity_endio(struct bio *bio) { - if (bio_op(bio) == REQ_OP_READ && !bio->bi_status) { + struct blk_integrity *bi = bdev_get_integrity(bio->bi_bdev); + + if (bio_op(bio) == REQ_OP_READ && !bio->bi_status && + bi->profile->verify_fn) { struct bio_integrity_payload *bip = bio_integrity(bio); INIT_WORK(&bip->bip_work, bio_integrity_verify_fn); -- cgit v1.2.3 From f86e28c4dc8d475cb82ca8d018daaa1564534aad Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 9 Aug 2017 17:47:27 +0200 Subject: bio-integrity: only verify integrity on the lowest stacked driver This gets us back to the behavior in 4.12 and earlier. Signed-off-by: Christoph Hellwig Fixes: 7c20f116 ("bio-integrity: stop abusing bi_end_io") Signed-off-by: Jens Axboe --- block/bio-integrity.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/block/bio-integrity.c b/block/bio-integrity.c index 0fd9604974da..9b1ea478577b 100644 --- a/block/bio-integrity.c +++ b/block/bio-integrity.c @@ -388,11 +388,10 @@ static void bio_integrity_verify_fn(struct work_struct *work) bool __bio_integrity_endio(struct bio *bio) { struct blk_integrity *bi = bdev_get_integrity(bio->bi_bdev); + struct bio_integrity_payload *bip = bio_integrity(bio); if (bio_op(bio) == REQ_OP_READ && !bio->bi_status && - bi->profile->verify_fn) { - struct bio_integrity_payload *bip = bio_integrity(bio); - + (bip->bip_flags & BIP_BLOCK_INTEGRITY) && bi->profile->verify_fn) { INIT_WORK(&bip->bip_work, bio_integrity_verify_fn); queue_work(kintegrityd_wq, &bip->bip_work); return false; -- cgit v1.2.3 From d4acf3650c7c968f46ad932b9a25d1cc24cf4998 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Wed, 9 Aug 2017 11:28:06 -0700 Subject: block: Make blk_mq_delay_kick_requeue_list() rerun the queue at a quiet time The blk_mq_delay_kick_requeue_list() function is used by the device mapper and only by the device mapper to rerun the queue and requeue list after a delay. This function is called once per request that gets requeued. Modify this function such that the queue is run once per path change event instead of once per request that is requeued. Fixes: commit 2849450ad39d ("blk-mq: introduce blk_mq_delay_kick_requeue_list()") Signed-off-by: Bart Van Assche Cc: Mike Snitzer Cc: Laurence Oberman Cc: Signed-off-by: Jens Axboe --- block/blk-mq.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/block/blk-mq.c b/block/blk-mq.c index 211ef367345f..535cbdf32aab 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -684,8 +684,8 @@ EXPORT_SYMBOL(blk_mq_kick_requeue_list); void blk_mq_delay_kick_requeue_list(struct request_queue *q, unsigned long msecs) { - kblockd_schedule_delayed_work(&q->requeue_work, - msecs_to_jiffies(msecs)); + kblockd_mod_delayed_work_on(WORK_CPU_UNBOUND, &q->requeue_work, + msecs_to_jiffies(msecs)); } EXPORT_SYMBOL(blk_mq_delay_kick_requeue_list); -- cgit v1.2.3 From 6f48655facfd7f7ccfe6d252ac0fe319ab02e4dd Mon Sep 17 00:00:00 2001 From: Nicholas Bellinger Date: Sun, 6 Aug 2017 16:10:03 -0700 Subject: target: Fix node_acl demo-mode + uncached dynamic shutdown regression This patch fixes a generate_node_acls = 1 + cache_dynamic_acls = 0 regression, that was introduced by commit 01d4d673558985d9a118e1e05026633c3e2ade9b Author: Nicholas Bellinger Date: Wed Dec 7 12:55:54 2016 -0800 which originally had the proper list_del_init() usage, but was dropped during list review as it was thought unnecessary by HCH. However, list_del_init() usage is required during the special generate_node_acls = 1 + cache_dynamic_acls = 0 case when transport_free_session() does a list_del(&se_nacl->acl_list), followed by target_complete_nacl() doing the same thing. This was manifesting as a general protection fault as reported by Justin: kernel: general protection fault: 0000 [#1] SMP kernel: Modules linked in: kernel: CPU: 0 PID: 11047 Comm: iscsi_ttx Not tainted 4.13.0-rc2.x86_64.1+ #20 kernel: Hardware name: Intel Corporation S5500BC/S5500BC, BIOS S5500.86B.01.00.0064.050520141428 05/05/2014 kernel: task: ffff88026939e800 task.stack: ffffc90007884000 kernel: RIP: 0010:target_put_nacl+0x49/0xb0 kernel: RSP: 0018:ffffc90007887d70 EFLAGS: 00010246 kernel: RAX: dead000000000200 RBX: ffff8802556ca000 RCX: 0000000000000000 kernel: RDX: dead000000000100 RSI: 0000000000000246 RDI: ffff8802556ce028 kernel: RBP: ffffc90007887d88 R08: 0000000000000001 R09: 0000000000000000 kernel: R10: ffffc90007887df8 R11: ffffea0009986900 R12: ffff8802556ce020 kernel: R13: ffff8802556ce028 R14: ffff8802556ce028 R15: ffffffff88d85540 kernel: FS: 0000000000000000(0000) GS:ffff88027fc00000(0000) knlGS:0000000000000000 kernel: CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 kernel: CR2: 00007fffe36f5f94 CR3: 0000000009209000 CR4: 00000000003406f0 kernel: DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 kernel: DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 kernel: Call Trace: kernel: transport_free_session+0x67/0x140 kernel: transport_deregister_session+0x7a/0xc0 kernel: iscsit_close_session+0x92/0x210 kernel: iscsit_close_connection+0x5f9/0x840 kernel: iscsit_take_action_for_connection_exit+0xfe/0x110 kernel: iscsi_target_tx_thread+0x140/0x1e0 kernel: ? wait_woken+0x90/0x90 kernel: kthread+0x124/0x160 kernel: ? iscsit_thread_get_cpumask+0x90/0x90 kernel: ? kthread_create_on_node+0x40/0x40 kernel: ret_from_fork+0x22/0x30 kernel: Code: 00 48 89 fb 4c 8b a7 48 01 00 00 74 68 4d 8d 6c 24 08 4c 89 ef e8 e8 28 43 00 48 8b 93 20 04 00 00 48 8b 83 28 04 00 00 4c 89 ef <48> 89 42 08 48 89 10 48 b8 00 01 00 00 00 00 ad de 48 89 83 20 kernel: RIP: target_put_nacl+0x49/0xb0 RSP: ffffc90007887d70 kernel: ---[ end trace f12821adbfd46fed ]--- To address this, go ahead and use proper list_del_list() for all cases of se_nacl->acl_list deletion. Reported-by: Justin Maggard Tested-by: Justin Maggard Cc: Justin Maggard Cc: stable@vger.kernel.org # 4.1+ Signed-off-by: Nicholas Bellinger --- drivers/target/target_core_tpg.c | 4 ++-- drivers/target/target_core_transport.c | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/target/target_core_tpg.c b/drivers/target/target_core_tpg.c index 36913734c6bc..02e8a5d86658 100644 --- a/drivers/target/target_core_tpg.c +++ b/drivers/target/target_core_tpg.c @@ -364,7 +364,7 @@ void core_tpg_del_initiator_node_acl(struct se_node_acl *acl) mutex_lock(&tpg->acl_node_mutex); if (acl->dynamic_node_acl) acl->dynamic_node_acl = 0; - list_del(&acl->acl_list); + list_del_init(&acl->acl_list); mutex_unlock(&tpg->acl_node_mutex); target_shutdown_sessions(acl); @@ -548,7 +548,7 @@ int core_tpg_deregister(struct se_portal_group *se_tpg) * in transport_deregister_session(). */ list_for_each_entry_safe(nacl, nacl_tmp, &node_list, acl_list) { - list_del(&nacl->acl_list); + list_del_init(&nacl->acl_list); core_tpg_wait_for_nacl_pr_ref(nacl); core_free_device_list_for_node(nacl, se_tpg); diff --git a/drivers/target/target_core_transport.c b/drivers/target/target_core_transport.c index 97fed9a298bd..836d552b0385 100644 --- a/drivers/target/target_core_transport.c +++ b/drivers/target/target_core_transport.c @@ -466,7 +466,7 @@ static void target_complete_nacl(struct kref *kref) } mutex_lock(&se_tpg->acl_node_mutex); - list_del(&nacl->acl_list); + list_del_init(&nacl->acl_list); mutex_unlock(&se_tpg->acl_node_mutex); core_tpg_wait_for_nacl_pr_ref(nacl); @@ -538,7 +538,7 @@ void transport_free_session(struct se_session *se_sess) spin_unlock_irqrestore(&se_nacl->nacl_sess_lock, flags); if (se_nacl->dynamic_stop) - list_del(&se_nacl->acl_list); + list_del_init(&se_nacl->acl_list); } mutex_unlock(&se_tpg->acl_node_mutex); -- cgit v1.2.3 From 3ee70591d6c47ef4c4699b3395ba96ce287db937 Mon Sep 17 00:00:00 2001 From: Jim Quigley Date: Fri, 21 Jul 2017 09:20:15 -0400 Subject: sunvdc: prevent sunvdc panic when mpgroup disk added to guest domain Using mpgroup to define multiple paths for a virtual disk causes multiple virtual-device-port ports to be created for that virtual device. Each virtual-device-port port then gets a vdisk created for it by the Linux sunvdc driver. As mpgroup is not supported by the Linux sunvdc driver it cannot handle multiple ports for a single vdisk, leading to a kernel panic at startup. This fix prevents more than one vdisk per virtual-device-port being created until full virtual disk multipathing (mpgroup) support is implemented. Signed-off-by: Jim Quigley Reviewed-by: Liam Merwick Reviewed-by: Shannon Nelson Reviewed-by: Alexandre Chartre Reviewed-by: Aaron Young Signed-off-by: David S. Miller --- drivers/block/sunvdc.c | 61 ++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 61 insertions(+) diff --git a/drivers/block/sunvdc.c b/drivers/block/sunvdc.c index 6b16ead1da58..ad9749463d4f 100644 --- a/drivers/block/sunvdc.c +++ b/drivers/block/sunvdc.c @@ -875,6 +875,56 @@ static void print_version(void) printk(KERN_INFO "%s", version); } +struct vdc_check_port_data { + int dev_no; + char *type; +}; + +static int vdc_device_probed(struct device *dev, void *arg) +{ + struct vio_dev *vdev = to_vio_dev(dev); + struct vdc_check_port_data *port_data; + + port_data = (struct vdc_check_port_data *)arg; + + if ((vdev->dev_no == port_data->dev_no) && + (!(strcmp((char *)&vdev->type, port_data->type))) && + dev_get_drvdata(dev)) { + /* This device has already been configured + * by vdc_port_probe() + */ + return 1; + } else { + return 0; + } +} + +/* Determine whether the VIO device is part of an mpgroup + * by locating all the virtual-device-port nodes associated + * with the parent virtual-device node for the VIO device + * and checking whether any of these nodes are vdc-ports + * which have already been configured. + * + * Returns true if this device is part of an mpgroup and has + * already been probed. + */ +static bool vdc_port_mpgroup_check(struct vio_dev *vdev) +{ + struct vdc_check_port_data port_data; + struct device *dev; + + port_data.dev_no = vdev->dev_no; + port_data.type = (char *)&vdev->type; + + dev = device_find_child(vdev->dev.parent, &port_data, + vdc_device_probed); + + if (dev) + return true; + + return false; +} + static int vdc_port_probe(struct vio_dev *vdev, const struct vio_device_id *id) { struct mdesc_handle *hp; @@ -893,6 +943,14 @@ static int vdc_port_probe(struct vio_dev *vdev, const struct vio_device_id *id) goto err_out_release_mdesc; } + /* Check if this device is part of an mpgroup */ + if (vdc_port_mpgroup_check(vdev)) { + printk(KERN_WARNING + "VIO: Ignoring extra vdisk port %s", + dev_name(&vdev->dev)); + goto err_out_release_mdesc; + } + port = kzalloc(sizeof(*port), GFP_KERNEL); err = -ENOMEM; if (!port) { @@ -943,6 +1001,9 @@ static int vdc_port_probe(struct vio_dev *vdev, const struct vio_device_id *id) if (err) goto err_out_free_tx_ring; + /* Note that the device driver_data is used to determine + * whether the port has been probed. + */ dev_set_drvdata(&vdev->dev, port); mdesc_release(hp); -- cgit v1.2.3 From ed43594aede9719e56eca72fc6a9a200c60b60e6 Mon Sep 17 00:00:00 2001 From: Jon Paul Maloy Date: Tue, 8 Aug 2017 22:23:56 +0200 Subject: tipc: remove premature ESTABLISH FSM event at link synchronization When a link between two nodes come up, both endpoints will initially send out a STATE message to the peer, to increase the probability that the peer endpoint also is up when the first traffic message arrives. Thereafter, if the establishing link is the second link between two nodes, this first "traffic" message is a TUNNEL_PROTOCOL/SYNCH message, helping the peer to perform initial synchronization between the two links. However, the initial STATE message may be lost, in which case the SYNCH message will be the first one arriving at the peer. This should also work, as the SYNCH message itself will be used to take up the link endpoint before initializing synchronization. Unfortunately the code for this case is broken. Currently, the link is brought up through a tipc_link_fsm_evt(ESTABLISHED) when a SYNCH arrives, whereupon __tipc_node_link_up() is called to distribute the link slots and take the link into traffic. But, __tipc_node_link_up() is itself starting with a test for whether the link is up, and if true, returns without action. Clearly, the tipc_link_fsm_evt(ESTABLISHED) call is unnecessary, since tipc_node_link_up() is itself issuing such an event, but also harmful, since it inhibits tipc_node_link_up() to perform the test of its tasks, and the link endpoint in question hence is never taken into traffic. This problem has been exposed when we set up dual links between pre- and post-4.4 kernels, because the former ones don't send out the initial STATE message described above. We fix this by removing the unnecessary event call. Signed-off-by: Jon Maloy Signed-off-by: David S. Miller --- net/tipc/node.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/net/tipc/node.c b/net/tipc/node.c index aeef8011ac7d..9b4dcb6a16b5 100644 --- a/net/tipc/node.c +++ b/net/tipc/node.c @@ -1455,10 +1455,8 @@ static bool tipc_node_check_state(struct tipc_node *n, struct sk_buff *skb, /* Initiate synch mode if applicable */ if ((usr == TUNNEL_PROTOCOL) && (mtyp == SYNCH_MSG) && (oseqno == 1)) { syncpt = iseqno + exp_pkts - 1; - if (!tipc_link_is_up(l)) { - tipc_link_fsm_evt(l, LINK_ESTABLISH_EVT); + if (!tipc_link_is_up(l)) __tipc_node_link_up(n, bearer_id, xmitq); - } if (n->state == SELF_UP_PEER_UP) { n->sync_point = syncpt; tipc_link_fsm_evt(l, LINK_SYNCH_BEGIN_EVT); -- cgit v1.2.3 From 50ddfbafcd1e1f95185dfaa4ad794f742b0beaf8 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Tue, 8 Aug 2017 14:45:09 -0700 Subject: net: systemport: Fix software statistics for SYSTEMPORT Lite With SYSTEMPORT Lite we have holes in our statistics layout that make us skip over the hardware MIB counters, bcm_sysport_get_stats() was not taking that into account resulting in reporting 0 for all SW-maintained statistics, fix this by skipping accordingly. Fixes: 44a4524c54af ("net: systemport: Add support for SYSTEMPORT Lite") Signed-off-by: Florian Fainelli Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bcmsysport.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/net/ethernet/broadcom/bcmsysport.c b/drivers/net/ethernet/broadcom/bcmsysport.c index 5333601f855f..dc3052751bc1 100644 --- a/drivers/net/ethernet/broadcom/bcmsysport.c +++ b/drivers/net/ethernet/broadcom/bcmsysport.c @@ -449,6 +449,10 @@ static void bcm_sysport_get_stats(struct net_device *dev, p = (char *)&dev->stats; else p = (char *)priv; + + if (priv->is_lite && !bcm_sysport_lite_stat_valid(s->type)) + continue; + p += s->stat_offset; data[j] = *(unsigned long *)p; j++; -- cgit v1.2.3 From 04db70d9fe7019d96118158fbaaccb4959ba2bd4 Mon Sep 17 00:00:00 2001 From: Girish Moodalbail Date: Tue, 8 Aug 2017 17:26:24 -0700 Subject: geneve: maximum value of VNI cannot be used Geneve's Virtual Network Identifier (VNI) is 24 bit long, so the range of values for it would be from 0 to 16777215 (2^24 -1). However, one cannot create a geneve device with VNI set to 16777215. This patch fixes this issue. Signed-off-by: Girish Moodalbail Signed-off-by: David S. Miller --- drivers/net/geneve.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/geneve.c b/drivers/net/geneve.c index de8156c6b292..2bbda71818ad 100644 --- a/drivers/net/geneve.c +++ b/drivers/net/geneve.c @@ -1091,7 +1091,7 @@ static int geneve_validate(struct nlattr *tb[], struct nlattr *data[], if (data[IFLA_GENEVE_ID]) { __u32 vni = nla_get_u32(data[IFLA_GENEVE_ID]); - if (vni >= GENEVE_VID_MASK) + if (vni >= GENEVE_N_VID) return -ERANGE; } -- cgit v1.2.3 From 1714020e42b17135032c8606f7185b3fb2ba5d78 Mon Sep 17 00:00:00 2001 From: Nikolay Borisov Date: Wed, 9 Aug 2017 14:38:04 +0300 Subject: igmp: Fix regression caused by igmp sysctl namespace code. Commit dcd87999d415 ("igmp: net: Move igmp namespace init to correct file") moved the igmp sysctls initialization from tcp_sk_init to igmp_net_init. This function is only called as part of per-namespace initialization, only if CONFIG_IP_MULTICAST is defined, otherwise igmp_mc_init() call in ip_init is compiled out, casuing the igmp pernet ops to not be registerd and those sysctl being left initialized with 0. However, there are certain functions, such as ip_mc_join_group which are always compiled and make use of some of those sysctls. Let's do a partial revert of the aforementioned commit and move the sysctl initialization into inet_init_net, that way they will always have sane values. Fixes: dcd87999d415 ("igmp: net: Move igmp namespace init to correct file") Link: https://bugzilla.kernel.org/show_bug.cgi?id=196595 Reported-by: Gerardo Exequiel Pozzi Signed-off-by: Nikolay Borisov Signed-off-by: David S. Miller --- net/ipv4/af_inet.c | 7 +++++++ net/ipv4/igmp.c | 6 ------ 2 files changed, 7 insertions(+), 6 deletions(-) diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index 76c2077c3f5b..2e548eca3489 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -1731,6 +1731,13 @@ static __net_init int inet_init_net(struct net *net) net->ipv4.sysctl_ip_prot_sock = PROT_SOCK; #endif + /* Some igmp sysctl, whose values are always used */ + net->ipv4.sysctl_igmp_max_memberships = 20; + net->ipv4.sysctl_igmp_max_msf = 10; + /* IGMP reports for link-local multicast groups are enabled by default */ + net->ipv4.sysctl_igmp_llm_reports = 1; + net->ipv4.sysctl_igmp_qrv = 2; + return 0; } diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c index 28f14afd0dd3..498706b072fb 100644 --- a/net/ipv4/igmp.c +++ b/net/ipv4/igmp.c @@ -2974,12 +2974,6 @@ static int __net_init igmp_net_init(struct net *net) goto out_sock; } - /* Sysctl initialization */ - net->ipv4.sysctl_igmp_max_memberships = 20; - net->ipv4.sysctl_igmp_max_msf = 10; - /* IGMP reports for link-local multicast groups are enabled by default */ - net->ipv4.sysctl_igmp_llm_reports = 1; - net->ipv4.sysctl_igmp_qrv = 2; return 0; out_sock: -- cgit v1.2.3 From 96d9703050a0036a3360ec98bb41e107c90664fe Mon Sep 17 00:00:00 2001 From: Xin Long Date: Wed, 9 Aug 2017 18:15:19 +0800 Subject: net: sched: set xt_tgchk_param par.nft_compat as 0 in ipt_init_target Commit 55917a21d0cc ("netfilter: x_tables: add context to know if extension runs from nft_compat") introduced a member nft_compat to xt_tgchk_param structure. But it didn't set it's value for ipt_init_target. With unexpected value in par.nft_compat, it may return unexpected result in some target's checkentry. This patch is to set all it's fields as 0 and only initialize the non-zero fields in ipt_init_target. v1->v2: As Wang Cong's suggestion, fix it by setting all it's fields as 0 and only initializing the non-zero fields. Fixes: 55917a21d0cc ("netfilter: x_tables: add context to know if extension runs from nft_compat") Suggested-by: Cong Wang Signed-off-by: Xin Long Signed-off-by: David S. Miller --- net/sched/act_ipt.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/sched/act_ipt.c b/net/sched/act_ipt.c index 94ba5cfab860..d516ba8178b8 100644 --- a/net/sched/act_ipt.c +++ b/net/sched/act_ipt.c @@ -49,9 +49,9 @@ static int ipt_init_target(struct net *net, struct xt_entry_target *t, return PTR_ERR(target); t->u.kernel.target = target; + memset(&par, 0, sizeof(par)); par.net = net; par.table = table; - par.entryinfo = NULL; par.target = target; par.targinfo = t->data; par.hook_mask = hook; -- cgit v1.2.3 From 7dc88d2afb6e5c6bd6bbedc394eb43c1e3114bdd Mon Sep 17 00:00:00 2001 From: Icenowy Zheng Date: Sat, 22 Jul 2017 10:28:50 +0800 Subject: arm64: allwinner: a64: bananapi-m64: add missing ethernet0 alias The EMAC Ethernet controller was enabled, but an accompanying alias was not added. This results in unstable numbering if other Ethernet devices, such as a USB dongle, are present. Also, the bootloader uses the alias to assign a generated stable MAC address to the device node. Signed-off-by: Icenowy Zheng Signed-off-by: Maxime Ripard Fixes: e7295499903d ("arm64: allwinner: bananapi-m64: Enable dwmac-sun8i") [wens@csie.org: Rewrite commit log as fixing a previous patch with Fixes] Signed-off-by: Chen-Yu Tsai --- arch/arm64/boot/dts/allwinner/sun50i-a64-bananapi-m64.dts | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm64/boot/dts/allwinner/sun50i-a64-bananapi-m64.dts b/arch/arm64/boot/dts/allwinner/sun50i-a64-bananapi-m64.dts index 0d1f026d831a..ba2fde2909f9 100644 --- a/arch/arm64/boot/dts/allwinner/sun50i-a64-bananapi-m64.dts +++ b/arch/arm64/boot/dts/allwinner/sun50i-a64-bananapi-m64.dts @@ -51,6 +51,7 @@ compatible = "sinovoip,bananapi-m64", "allwinner,sun50i-a64"; aliases { + ethernet0 = &emac; serial0 = &uart0; serial1 = &uart1; }; -- cgit v1.2.3 From dff751c68904cf587d918cfb6b2f5b0112f73bc9 Mon Sep 17 00:00:00 2001 From: Icenowy Zheng Date: Sat, 22 Jul 2017 10:28:51 +0800 Subject: arm64: allwinner: a64: pine64: add missing ethernet0 alias The EMAC Ethernet controller was enabled, but an accompanying alias was not added. This results in unstable numbering if other Ethernet devices, such as a USB dongle, are present. Also, the bootloader uses the alias to assign a generated stable MAC address to the device node. Signed-off-by: Icenowy Zheng Signed-off-by: Maxime Ripard Fixes: 970239437493 ("arm64: allwinner: pine64: Enable dwmac-sun8i") [wens@csie.org: Rewrite commit log as fixing a previous patch with Fixes] Signed-off-by: Chen-Yu Tsai --- arch/arm64/boot/dts/allwinner/sun50i-a64-pine64.dts | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm64/boot/dts/allwinner/sun50i-a64-pine64.dts b/arch/arm64/boot/dts/allwinner/sun50i-a64-pine64.dts index 08cda24ea194..827168bc22ed 100644 --- a/arch/arm64/boot/dts/allwinner/sun50i-a64-pine64.dts +++ b/arch/arm64/boot/dts/allwinner/sun50i-a64-pine64.dts @@ -51,6 +51,7 @@ compatible = "pine64,pine64", "allwinner,sun50i-a64"; aliases { + ethernet0 = &emac; serial0 = &uart0; serial1 = &uart1; serial2 = &uart2; -- cgit v1.2.3 From 56a9155074b4d23aa07a98c35c6f107dd50a9367 Mon Sep 17 00:00:00 2001 From: Icenowy Zheng Date: Sat, 22 Jul 2017 10:28:52 +0800 Subject: arm64: allwinner: a64: sopine: add missing ethernet0 alias The EMAC Ethernet controller was enabled, but an accompanying alias was not added. This results in unstable numbering if other Ethernet devices, such as a USB dongle, are present. Also, the bootloader uses the alias to assign a generated stable MAC address to the device node. Signed-off-by: Icenowy Zheng Signed-off-by: Maxime Ripard Fixes: 96219b004865 ("arm64: allwinner: a64: add device tree for SoPine with baseboard") [wens@csie.org: Rewrite commit log as fixing a previous patch with Fixes] Signed-off-by: Chen-Yu Tsai --- arch/arm64/boot/dts/allwinner/sun50i-a64-sopine-baseboard.dts | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm64/boot/dts/allwinner/sun50i-a64-sopine-baseboard.dts b/arch/arm64/boot/dts/allwinner/sun50i-a64-sopine-baseboard.dts index 17eb1cc5bf6b..216e3a5dafae 100644 --- a/arch/arm64/boot/dts/allwinner/sun50i-a64-sopine-baseboard.dts +++ b/arch/arm64/boot/dts/allwinner/sun50i-a64-sopine-baseboard.dts @@ -53,6 +53,7 @@ "allwinner,sun50i-a64"; aliases { + ethernet0 = &emac; serial0 = &uart0; }; -- cgit v1.2.3 From 758f3735580c21b8a36d644128af6608120a1dde Mon Sep 17 00:00:00 2001 From: Martin Wilck Date: Thu, 20 Jul 2017 18:34:02 +0200 Subject: nvme: strip trailing 0-bytes in wwid_show Some broken controllers (such as earlier Linux targets) pad model or serial fields with 0-bytes rather than spaces. The NVMe spec disallows 0 bytes in "ASCII" fields. Thus strip trailing 0-bytes, too. Also make sure that we get no underflow for pathological input. Signed-off-by: Martin Wilck Reviewed-by: Hannes Reinecke Reviewed-by: Keith Busch Signed-off-by: Christoph Hellwig --- drivers/nvme/host/core.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index c49f1f8b2e57..1e9290983694 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -2004,9 +2004,11 @@ static ssize_t wwid_show(struct device *dev, struct device_attribute *attr, if (memchr_inv(ns->eui, 0, sizeof(ns->eui))) return sprintf(buf, "eui.%8phN\n", ns->eui); - while (ctrl->serial[serial_len - 1] == ' ') + while (serial_len > 0 && (ctrl->serial[serial_len - 1] == ' ' || + ctrl->serial[serial_len - 1] == '\0')) serial_len--; - while (ctrl->model[model_len - 1] == ' ') + while (model_len > 0 && (ctrl->model[model_len - 1] == ' ' || + ctrl->model[model_len - 1] == '\0')) model_len--; return sprintf(buf, "nvme.%04x-%*phN-%*phN-%08x\n", ctrl->vid, -- cgit v1.2.3 From 0fb228d30b8d72bfee51f57e638d412324d44a11 Mon Sep 17 00:00:00 2001 From: James Smart Date: Tue, 1 Aug 2017 15:12:39 -0700 Subject: nvmet_fc: add defer_req callback for deferment of cmd buffer return At queue creation, the transport allocates a local job struct (struct nvmet_fc_fcp_iod) for each possible element of the queue. When a new CMD is received from the wire, a jobs struct is allocated from the queue and then used for the duration of the command. The job struct contains buffer space for the wire command iu. Thus, upon allocation of the job struct, the cmd iu buffer is copied to the job struct and the LLDD may immediately free/reuse the CMD IU buffer passed in the call. However, in some circumstances, due to the packetized nature of FC and the api of the FC LLDD which may issue a hw command to send the wire response, but the LLDD may not get the hw completion for the command and upcall the nvmet_fc layer before a new command may be asynchronously received on the wire. In other words, its possible for the initiator to get the response from the wire, thus believe a command slot free, and send a new command iu. The new command iu may be received by the LLDD and passed to the transport before the LLDD had serviced the hw completion and made the teardown calls for the original job struct. As such, there is no available job struct available for the new io. E.g. it appears like the host sent more queue elements than the queue size. It didn't based on it's understanding. Rather than treat this as a hard connection failure queue the new request until the job struct does free up. As the buffer isn't copied as there's no job struct, a special return value must be returned to the LLDD to signify to hold off on recycling the cmd iu buffer. And later, when a job struct is allocated and the buffer copied, a new LLDD callback is introduced to notify the LLDD and allow it to recycle it's command iu buffer. Signed-off-by: James Smart Reviewed-by: Johannes Thumshirn Signed-off-by: Christoph Hellwig --- drivers/nvme/target/fc.c | 212 +++++++++++++++++++++++++++++++++++------ include/linux/nvme-fc-driver.h | 7 ++ 2 files changed, 191 insertions(+), 28 deletions(-) diff --git a/drivers/nvme/target/fc.c b/drivers/nvme/target/fc.c index 31ca55dfcb1d..1b7f2520a20d 100644 --- a/drivers/nvme/target/fc.c +++ b/drivers/nvme/target/fc.c @@ -114,6 +114,11 @@ struct nvmet_fc_tgtport { struct kref ref; }; +struct nvmet_fc_defer_fcp_req { + struct list_head req_list; + struct nvmefc_tgt_fcp_req *fcp_req; +}; + struct nvmet_fc_tgt_queue { bool ninetypercent; u16 qid; @@ -132,6 +137,8 @@ struct nvmet_fc_tgt_queue { struct nvmet_fc_tgt_assoc *assoc; struct nvmet_fc_fcp_iod *fod; /* array of fcp_iods */ struct list_head fod_list; + struct list_head pending_cmd_list; + struct list_head avail_defer_list; struct workqueue_struct *work_q; struct kref ref; } __aligned(sizeof(unsigned long long)); @@ -223,6 +230,8 @@ static void nvmet_fc_tgt_q_put(struct nvmet_fc_tgt_queue *queue); static int nvmet_fc_tgt_q_get(struct nvmet_fc_tgt_queue *queue); static void nvmet_fc_tgtport_put(struct nvmet_fc_tgtport *tgtport); static int nvmet_fc_tgtport_get(struct nvmet_fc_tgtport *tgtport); +static void nvmet_fc_handle_fcp_rqst(struct nvmet_fc_tgtport *tgtport, + struct nvmet_fc_fcp_iod *fod); /* *********************** FC-NVME DMA Handling **************************** */ @@ -463,9 +472,9 @@ static struct nvmet_fc_fcp_iod * nvmet_fc_alloc_fcp_iod(struct nvmet_fc_tgt_queue *queue) { static struct nvmet_fc_fcp_iod *fod; - unsigned long flags; - spin_lock_irqsave(&queue->qlock, flags); + lockdep_assert_held(&queue->qlock); + fod = list_first_entry_or_null(&queue->fod_list, struct nvmet_fc_fcp_iod, fcp_list); if (fod) { @@ -477,17 +486,37 @@ nvmet_fc_alloc_fcp_iod(struct nvmet_fc_tgt_queue *queue) * will "inherit" that reference. */ } - spin_unlock_irqrestore(&queue->qlock, flags); return fod; } +static void +nvmet_fc_queue_fcp_req(struct nvmet_fc_tgtport *tgtport, + struct nvmet_fc_tgt_queue *queue, + struct nvmefc_tgt_fcp_req *fcpreq) +{ + struct nvmet_fc_fcp_iod *fod = fcpreq->nvmet_fc_private; + + /* + * put all admin cmds on hw queue id 0. All io commands go to + * the respective hw queue based on a modulo basis + */ + fcpreq->hwqid = queue->qid ? + ((queue->qid - 1) % tgtport->ops->max_hw_queues) : 0; + + if (tgtport->ops->target_features & NVMET_FCTGTFEAT_CMD_IN_ISR) + queue_work_on(queue->cpu, queue->work_q, &fod->work); + else + nvmet_fc_handle_fcp_rqst(tgtport, fod); +} + static void nvmet_fc_free_fcp_iod(struct nvmet_fc_tgt_queue *queue, struct nvmet_fc_fcp_iod *fod) { struct nvmefc_tgt_fcp_req *fcpreq = fod->fcpreq; struct nvmet_fc_tgtport *tgtport = fod->tgtport; + struct nvmet_fc_defer_fcp_req *deferfcp; unsigned long flags; fc_dma_sync_single_for_cpu(tgtport->dev, fod->rspdma, @@ -495,21 +524,56 @@ nvmet_fc_free_fcp_iod(struct nvmet_fc_tgt_queue *queue, fcpreq->nvmet_fc_private = NULL; - spin_lock_irqsave(&queue->qlock, flags); - list_add_tail(&fod->fcp_list, &fod->queue->fod_list); fod->active = false; fod->abort = false; fod->aborted = false; fod->writedataactive = false; fod->fcpreq = NULL; + + tgtport->ops->fcp_req_release(&tgtport->fc_target_port, fcpreq); + + spin_lock_irqsave(&queue->qlock, flags); + deferfcp = list_first_entry_or_null(&queue->pending_cmd_list, + struct nvmet_fc_defer_fcp_req, req_list); + if (!deferfcp) { + list_add_tail(&fod->fcp_list, &fod->queue->fod_list); + spin_unlock_irqrestore(&queue->qlock, flags); + + /* Release reference taken at queue lookup and fod allocation */ + nvmet_fc_tgt_q_put(queue); + return; + } + + /* Re-use the fod for the next pending cmd that was deferred */ + list_del(&deferfcp->req_list); + + fcpreq = deferfcp->fcp_req; + + /* deferfcp can be reused for another IO at a later date */ + list_add_tail(&deferfcp->req_list, &queue->avail_defer_list); + spin_unlock_irqrestore(&queue->qlock, flags); + /* Save NVME CMD IO in fod */ + memcpy(&fod->cmdiubuf, fcpreq->rspaddr, fcpreq->rsplen); + + /* Setup new fcpreq to be processed */ + fcpreq->rspaddr = NULL; + fcpreq->rsplen = 0; + fcpreq->nvmet_fc_private = fod; + fod->fcpreq = fcpreq; + fod->active = true; + + /* inform LLDD IO is now being processed */ + tgtport->ops->defer_rcv(&tgtport->fc_target_port, fcpreq); + + /* Submit deferred IO for processing */ + nvmet_fc_queue_fcp_req(tgtport, queue, fcpreq); + /* - * release the reference taken at queue lookup and fod allocation + * Leave the queue lookup get reference taken when + * fod was originally allocated. */ - nvmet_fc_tgt_q_put(queue); - - tgtport->ops->fcp_req_release(&tgtport->fc_target_port, fcpreq); } static int @@ -569,6 +633,8 @@ nvmet_fc_alloc_target_queue(struct nvmet_fc_tgt_assoc *assoc, queue->port = assoc->tgtport->port; queue->cpu = nvmet_fc_queue_to_cpu(assoc->tgtport, qid); INIT_LIST_HEAD(&queue->fod_list); + INIT_LIST_HEAD(&queue->avail_defer_list); + INIT_LIST_HEAD(&queue->pending_cmd_list); atomic_set(&queue->connected, 0); atomic_set(&queue->sqtail, 0); atomic_set(&queue->rsn, 1); @@ -638,6 +704,7 @@ nvmet_fc_delete_target_queue(struct nvmet_fc_tgt_queue *queue) { struct nvmet_fc_tgtport *tgtport = queue->assoc->tgtport; struct nvmet_fc_fcp_iod *fod = queue->fod; + struct nvmet_fc_defer_fcp_req *deferfcp; unsigned long flags; int i, writedataactive; bool disconnect; @@ -666,6 +733,35 @@ nvmet_fc_delete_target_queue(struct nvmet_fc_tgt_queue *queue) } } } + + /* Cleanup defer'ed IOs in queue */ + list_for_each_entry(deferfcp, &queue->avail_defer_list, req_list) { + list_del(&deferfcp->req_list); + kfree(deferfcp); + } + + for (;;) { + deferfcp = list_first_entry_or_null(&queue->pending_cmd_list, + struct nvmet_fc_defer_fcp_req, req_list); + if (!deferfcp) + break; + + list_del(&deferfcp->req_list); + spin_unlock_irqrestore(&queue->qlock, flags); + + tgtport->ops->defer_rcv(&tgtport->fc_target_port, + deferfcp->fcp_req); + + tgtport->ops->fcp_abort(&tgtport->fc_target_port, + deferfcp->fcp_req); + + tgtport->ops->fcp_req_release(&tgtport->fc_target_port, + deferfcp->fcp_req); + + kfree(deferfcp); + + spin_lock_irqsave(&queue->qlock, flags); + } spin_unlock_irqrestore(&queue->qlock, flags); flush_workqueue(queue->work_q); @@ -2172,11 +2268,38 @@ nvmet_fc_handle_fcp_rqst_work(struct work_struct *work) * Pass a FC-NVME FCP CMD IU received from the FC link to the nvmet-fc * layer for processing. * - * The nvmet-fc layer will copy cmd payload to an internal structure for - * processing. As such, upon completion of the routine, the LLDD may - * immediately free/reuse the CMD IU buffer passed in the call. + * The nvmet_fc layer allocates a local job structure (struct + * nvmet_fc_fcp_iod) from the queue for the io and copies the + * CMD IU buffer to the job structure. As such, on a successful + * completion (returns 0), the LLDD may immediately free/reuse + * the CMD IU buffer passed in the call. + * + * However, in some circumstances, due to the packetized nature of FC + * and the api of the FC LLDD which may issue a hw command to send the + * response, but the LLDD may not get the hw completion for that command + * and upcall the nvmet_fc layer before a new command may be + * asynchronously received - its possible for a command to be received + * before the LLDD and nvmet_fc have recycled the job structure. It gives + * the appearance of more commands received than fits in the sq. + * To alleviate this scenario, a temporary queue is maintained in the + * transport for pending LLDD requests waiting for a queue job structure. + * In these "overrun" cases, a temporary queue element is allocated + * the LLDD request and CMD iu buffer information remembered, and the + * routine returns a -EOVERFLOW status. Subsequently, when a queue job + * structure is freed, it is immediately reallocated for anything on the + * pending request list. The LLDDs defer_rcv() callback is called, + * informing the LLDD that it may reuse the CMD IU buffer, and the io + * is then started normally with the transport. * - * If this routine returns error, the lldd should abort the exchange. + * The LLDD, when receiving an -EOVERFLOW completion status, is to treat + * the completion as successful but must not reuse the CMD IU buffer + * until the LLDD's defer_rcv() callback has been called for the + * corresponding struct nvmefc_tgt_fcp_req pointer. + * + * If there is any other condition in which an error occurs, the + * transport will return a non-zero status indicating the error. + * In all cases other than -EOVERFLOW, the transport has not accepted the + * request and the LLDD should abort the exchange. * * @target_port: pointer to the (registered) target port the FCP CMD IU * was received on. @@ -2194,6 +2317,8 @@ nvmet_fc_rcv_fcp_req(struct nvmet_fc_target_port *target_port, struct nvme_fc_cmd_iu *cmdiu = cmdiubuf; struct nvmet_fc_tgt_queue *queue; struct nvmet_fc_fcp_iod *fod; + struct nvmet_fc_defer_fcp_req *deferfcp; + unsigned long flags; /* validate iu, so the connection id can be used to find the queue */ if ((cmdiubuf_len != sizeof(*cmdiu)) || @@ -2214,29 +2339,60 @@ nvmet_fc_rcv_fcp_req(struct nvmet_fc_target_port *target_port, * when the fod is freed. */ + spin_lock_irqsave(&queue->qlock, flags); + fod = nvmet_fc_alloc_fcp_iod(queue); - if (!fod) { + if (fod) { + spin_unlock_irqrestore(&queue->qlock, flags); + + fcpreq->nvmet_fc_private = fod; + fod->fcpreq = fcpreq; + + memcpy(&fod->cmdiubuf, cmdiubuf, cmdiubuf_len); + + nvmet_fc_queue_fcp_req(tgtport, queue, fcpreq); + + return 0; + } + + if (!tgtport->ops->defer_rcv) { + spin_unlock_irqrestore(&queue->qlock, flags); /* release the queue lookup reference */ nvmet_fc_tgt_q_put(queue); return -ENOENT; } - fcpreq->nvmet_fc_private = fod; - fod->fcpreq = fcpreq; - /* - * put all admin cmds on hw queue id 0. All io commands go to - * the respective hw queue based on a modulo basis - */ - fcpreq->hwqid = queue->qid ? - ((queue->qid - 1) % tgtport->ops->max_hw_queues) : 0; - memcpy(&fod->cmdiubuf, cmdiubuf, cmdiubuf_len); + deferfcp = list_first_entry_or_null(&queue->avail_defer_list, + struct nvmet_fc_defer_fcp_req, req_list); + if (deferfcp) { + /* Just re-use one that was previously allocated */ + list_del(&deferfcp->req_list); + } else { + spin_unlock_irqrestore(&queue->qlock, flags); - if (tgtport->ops->target_features & NVMET_FCTGTFEAT_CMD_IN_ISR) - queue_work_on(queue->cpu, queue->work_q, &fod->work); - else - nvmet_fc_handle_fcp_rqst(tgtport, fod); + /* Now we need to dynamically allocate one */ + deferfcp = kmalloc(sizeof(*deferfcp), GFP_KERNEL); + if (!deferfcp) { + /* release the queue lookup reference */ + nvmet_fc_tgt_q_put(queue); + return -ENOMEM; + } + spin_lock_irqsave(&queue->qlock, flags); + } - return 0; + /* For now, use rspaddr / rsplen to save payload information */ + fcpreq->rspaddr = cmdiubuf; + fcpreq->rsplen = cmdiubuf_len; + deferfcp->fcp_req = fcpreq; + + /* defer processing till a fod becomes available */ + list_add_tail(&deferfcp->req_list, &queue->pending_cmd_list); + + /* NOTE: the queue lookup reference is still valid */ + + spin_unlock_irqrestore(&queue->qlock, flags); + + return -EOVERFLOW; } EXPORT_SYMBOL_GPL(nvmet_fc_rcv_fcp_req); diff --git a/include/linux/nvme-fc-driver.h b/include/linux/nvme-fc-driver.h index 6c8c5d8041b7..2591878c1d48 100644 --- a/include/linux/nvme-fc-driver.h +++ b/include/linux/nvme-fc-driver.h @@ -346,6 +346,11 @@ struct nvme_fc_remote_port { * indicating an FC transport Aborted status. * Entrypoint is Mandatory. * + * @defer_rcv: Called by the transport to signal the LLLD that it has + * begun processing of a previously received NVME CMD IU. The LLDD + * is now free to re-use the rcv buffer associated with the + * nvmefc_tgt_fcp_req. + * * @max_hw_queues: indicates the maximum number of hw queues the LLDD * supports for cpu affinitization. * Value is Mandatory. Must be at least 1. @@ -846,6 +851,8 @@ struct nvmet_fc_target_template { struct nvmefc_tgt_fcp_req *fcpreq); void (*fcp_req_release)(struct nvmet_fc_target_port *tgtport, struct nvmefc_tgt_fcp_req *fcpreq); + void (*defer_rcv)(struct nvmet_fc_target_port *tgtport, + struct nvmefc_tgt_fcp_req *fcpreq); u32 max_hw_queues; u16 max_sgl_segments; -- cgit v1.2.3 From 507384209371fc25cab203b95e7bdf50e58b47d5 Mon Sep 17 00:00:00 2001 From: James Smart Date: Tue, 1 Aug 2017 15:12:40 -0700 Subject: lpfc: support nvmet_fc defer_rcv callback Currently, calls to nvmet_fc_rcv_fcp_req() always copied the FC-NVME cmd iu to a temporary buffer before returning, allowing the driver to immediately repost the buffer to the hardware. To address timing conditions on queue element structures vs async command reception, the nvmet_fc transport occasionally may need to hold on to the command iu buffer for a short period. In these cases, the nvmet_fc_rcv_fcp_req() will return a special return code (-EOVERFLOW). In these cases, the LLDD must delay until the new defer_rcv lldd callback is called before recycling the buffer back to the hw. This patch adds support for the new nvmet_fc transport defer_rcv callback and recognition of the new error code when passing commands to the transport. Signed-off-by: Dick Kennedy Signed-off-by: James Smart Signed-off-by: Christoph Hellwig --- drivers/scsi/lpfc/lpfc_attr.c | 4 +++- drivers/scsi/lpfc/lpfc_debugfs.c | 5 ++++- drivers/scsi/lpfc/lpfc_nvmet.c | 30 ++++++++++++++++++++++++++++++ drivers/scsi/lpfc/lpfc_nvmet.h | 1 + 4 files changed, 38 insertions(+), 2 deletions(-) diff --git a/drivers/scsi/lpfc/lpfc_attr.c b/drivers/scsi/lpfc/lpfc_attr.c index 4ed48ed38e79..7ee1a94c0b33 100644 --- a/drivers/scsi/lpfc/lpfc_attr.c +++ b/drivers/scsi/lpfc/lpfc_attr.c @@ -205,8 +205,10 @@ lpfc_nvme_info_show(struct device *dev, struct device_attribute *attr, atomic_read(&tgtp->xmt_ls_rsp_error)); len += snprintf(buf+len, PAGE_SIZE-len, - "FCP: Rcv %08x Release %08x Drop %08x\n", + "FCP: Rcv %08x Defer %08x Release %08x " + "Drop %08x\n", atomic_read(&tgtp->rcv_fcp_cmd_in), + atomic_read(&tgtp->rcv_fcp_cmd_defer), atomic_read(&tgtp->xmt_fcp_release), atomic_read(&tgtp->rcv_fcp_cmd_drop)); diff --git a/drivers/scsi/lpfc/lpfc_debugfs.c b/drivers/scsi/lpfc/lpfc_debugfs.c index 5cc8b0f7d885..744f3f395b64 100644 --- a/drivers/scsi/lpfc/lpfc_debugfs.c +++ b/drivers/scsi/lpfc/lpfc_debugfs.c @@ -782,8 +782,11 @@ lpfc_debugfs_nvmestat_data(struct lpfc_vport *vport, char *buf, int size) atomic_read(&tgtp->xmt_ls_rsp_error)); len += snprintf(buf + len, size - len, - "FCP: Rcv %08x Drop %08x\n", + "FCP: Rcv %08x Defer %08x Release %08x " + "Drop %08x\n", atomic_read(&tgtp->rcv_fcp_cmd_in), + atomic_read(&tgtp->rcv_fcp_cmd_defer), + atomic_read(&tgtp->xmt_fcp_release), atomic_read(&tgtp->rcv_fcp_cmd_drop)); if (atomic_read(&tgtp->rcv_fcp_cmd_in) != diff --git a/drivers/scsi/lpfc/lpfc_nvmet.c b/drivers/scsi/lpfc/lpfc_nvmet.c index fbeec344c6cc..bbbd0f84160d 100644 --- a/drivers/scsi/lpfc/lpfc_nvmet.c +++ b/drivers/scsi/lpfc/lpfc_nvmet.c @@ -841,12 +841,31 @@ lpfc_nvmet_xmt_fcp_release(struct nvmet_fc_target_port *tgtport, lpfc_nvmet_ctxbuf_post(phba, ctxp->ctxbuf); } +static void +lpfc_nvmet_defer_rcv(struct nvmet_fc_target_port *tgtport, + struct nvmefc_tgt_fcp_req *rsp) +{ + struct lpfc_nvmet_tgtport *tgtp; + struct lpfc_nvmet_rcv_ctx *ctxp = + container_of(rsp, struct lpfc_nvmet_rcv_ctx, ctx.fcp_req); + struct rqb_dmabuf *nvmebuf = ctxp->rqb_buffer; + struct lpfc_hba *phba = ctxp->phba; + + lpfc_nvmeio_data(phba, "NVMET DEFERRCV: xri x%x sz %d CPU %02x\n", + ctxp->oxid, ctxp->size, smp_processor_id()); + + tgtp = phba->targetport->private; + atomic_inc(&tgtp->rcv_fcp_cmd_defer); + lpfc_rq_buf_free(phba, &nvmebuf->hbuf); /* repost */ +} + static struct nvmet_fc_target_template lpfc_tgttemplate = { .targetport_delete = lpfc_nvmet_targetport_delete, .xmt_ls_rsp = lpfc_nvmet_xmt_ls_rsp, .fcp_op = lpfc_nvmet_xmt_fcp_op, .fcp_abort = lpfc_nvmet_xmt_fcp_abort, .fcp_req_release = lpfc_nvmet_xmt_fcp_release, + .defer_rcv = lpfc_nvmet_defer_rcv, .max_hw_queues = 1, .max_sgl_segments = LPFC_NVMET_DEFAULT_SEGS, @@ -1504,6 +1523,17 @@ lpfc_nvmet_unsol_fcp_buffer(struct lpfc_hba *phba, return; } + /* Processing of FCP command is deferred */ + if (rc == -EOVERFLOW) { + lpfc_nvmeio_data(phba, + "NVMET RCV BUSY: xri x%x sz %d from %06x\n", + oxid, size, sid); + /* defer reposting rcv buffer till .defer_rcv callback */ + ctxp->rqb_buffer = nvmebuf; + atomic_inc(&tgtp->rcv_fcp_cmd_out); + return; + } + atomic_inc(&tgtp->rcv_fcp_cmd_drop); lpfc_printf_log(phba, KERN_ERR, LOG_NVME_IOERR, "6159 FCP Drop IO x%x: err x%x: x%x x%x x%x\n", diff --git a/drivers/scsi/lpfc/lpfc_nvmet.h b/drivers/scsi/lpfc/lpfc_nvmet.h index e675ef17be08..48a76788b003 100644 --- a/drivers/scsi/lpfc/lpfc_nvmet.h +++ b/drivers/scsi/lpfc/lpfc_nvmet.h @@ -49,6 +49,7 @@ struct lpfc_nvmet_tgtport { atomic_t rcv_fcp_cmd_in; atomic_t rcv_fcp_cmd_out; atomic_t rcv_fcp_cmd_drop; + atomic_t rcv_fcp_cmd_defer; atomic_t xmt_fcp_release; /* Stats counters - lpfc_nvmet_xmt_fcp_op */ -- cgit v1.2.3 From 1c78f7735b2bdd0afbe5d14c5c8b6d8d381b6f13 Mon Sep 17 00:00:00 2001 From: Max Gurtovoy Date: Sun, 30 Jul 2017 01:45:08 +0300 Subject: nvme-pci: fix CMB sysfs file removal in reset path Currently we create the sysfs entry even if we fail mapping it. In that case, the unmapping will not remove the sysfs created file. There is no good reason to create a sysfs entry for a non working CMB and show his characteristics. Fixes: f63572dff ("nvme: unmap CMB and remove sysfs file in reset path") Signed-off-by: Max Gurtovoy Reviewed-by: Stephen Bates Signed-off-by: Christoph Hellwig --- drivers/nvme/host/pci.c | 18 +++++++----------- 1 file changed, 7 insertions(+), 11 deletions(-) diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index cd888a47d0fc..74a124a06264 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -1558,11 +1558,9 @@ static inline void nvme_release_cmb(struct nvme_dev *dev) if (dev->cmb) { iounmap(dev->cmb); dev->cmb = NULL; - if (dev->cmbsz) { - sysfs_remove_file_from_group(&dev->ctrl.device->kobj, - &dev_attr_cmb.attr, NULL); - dev->cmbsz = 0; - } + sysfs_remove_file_from_group(&dev->ctrl.device->kobj, + &dev_attr_cmb.attr, NULL); + dev->cmbsz = 0; } } @@ -1953,16 +1951,14 @@ static int nvme_pci_enable(struct nvme_dev *dev) /* * CMBs can currently only exist on >=1.2 PCIe devices. We only - * populate sysfs if a CMB is implemented. Note that we add the - * CMB attribute to the nvme_ctrl kobj which removes the need to remove - * it on exit. Since nvme_dev_attrs_group has no name we can pass - * NULL as final argument to sysfs_add_file_to_group. + * populate sysfs if a CMB is implemented. Since nvme_dev_attrs_group + * has no name we can pass NULL as final argument to + * sysfs_add_file_to_group. */ if (readl(dev->bar + NVME_REG_VS) >= NVME_VS(1, 2, 0)) { dev->cmb = nvme_map_cmb(dev); - - if (dev->cmbsz) { + if (dev->cmb) { if (sysfs_add_file_to_group(&dev->ctrl.device->kobj, &dev_attr_cmb.attr, NULL)) dev_warn(dev->ctrl.device, -- cgit v1.2.3 From bfe334924ccd9f4a53f30240c03cf2f43f5b2df1 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 2 Aug 2017 19:39:30 +0200 Subject: perf/x86: Fix RDPMC vs. mm_struct tracking Vince reported the following rdpmc() testcase failure: > Failing test case: > > fd=perf_event_open(); > addr=mmap(fd); > exec() // without closing or unmapping the event > fd=perf_event_open(); > addr=mmap(fd); > rdpmc() // GPFs due to rdpmc being disabled The problem is of course that exec() plays tricks with what is current->mm, only destroying the old mappings after having installed the new mm. Fix this confusion by passing along vma->vm_mm instead of relying on current->mm. Reported-by: Vince Weaver Tested-by: Vince Weaver Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Andy Lutomirski Cc: Arnaldo Carvalho de Melo Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Thomas Gleixner Cc: stable@vger.kernel.org Fixes: 1e0fb9ec679c ("perf: Add pmu callbacks to track event mapping and unmapping") Link: http://lkml.kernel.org/r/20170802173930.cstykcqefmqt7jau@hirez.programming.kicks-ass.net [ Minor cleanups. ] Signed-off-by: Ingo Molnar --- arch/x86/events/core.c | 16 +++++++--------- include/linux/perf_event.h | 4 ++-- kernel/events/core.c | 6 +++--- 3 files changed, 12 insertions(+), 14 deletions(-) diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c index 8e3db8f642a7..af12e294caed 100644 --- a/arch/x86/events/core.c +++ b/arch/x86/events/core.c @@ -2114,7 +2114,7 @@ static void refresh_pce(void *ignored) load_mm_cr4(this_cpu_read(cpu_tlbstate.loaded_mm)); } -static void x86_pmu_event_mapped(struct perf_event *event) +static void x86_pmu_event_mapped(struct perf_event *event, struct mm_struct *mm) { if (!(event->hw.flags & PERF_X86_EVENT_RDPMC_ALLOWED)) return; @@ -2129,22 +2129,20 @@ static void x86_pmu_event_mapped(struct perf_event *event) * For now, this can't happen because all callers hold mmap_sem * for write. If this changes, we'll need a different solution. */ - lockdep_assert_held_exclusive(¤t->mm->mmap_sem); + lockdep_assert_held_exclusive(&mm->mmap_sem); - if (atomic_inc_return(¤t->mm->context.perf_rdpmc_allowed) == 1) - on_each_cpu_mask(mm_cpumask(current->mm), refresh_pce, NULL, 1); + if (atomic_inc_return(&mm->context.perf_rdpmc_allowed) == 1) + on_each_cpu_mask(mm_cpumask(mm), refresh_pce, NULL, 1); } -static void x86_pmu_event_unmapped(struct perf_event *event) +static void x86_pmu_event_unmapped(struct perf_event *event, struct mm_struct *mm) { - if (!current->mm) - return; if (!(event->hw.flags & PERF_X86_EVENT_RDPMC_ALLOWED)) return; - if (atomic_dec_and_test(¤t->mm->context.perf_rdpmc_allowed)) - on_each_cpu_mask(mm_cpumask(current->mm), refresh_pce, NULL, 1); + if (atomic_dec_and_test(&mm->context.perf_rdpmc_allowed)) + on_each_cpu_mask(mm_cpumask(mm), refresh_pce, NULL, 1); } static int x86_pmu_event_idx(struct perf_event *event) diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index a3b873fc59e4..b14095bcf4bb 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -310,8 +310,8 @@ struct pmu { * Notification that the event was mapped or unmapped. Called * in the context of the mapping task. */ - void (*event_mapped) (struct perf_event *event); /*optional*/ - void (*event_unmapped) (struct perf_event *event); /*optional*/ + void (*event_mapped) (struct perf_event *event, struct mm_struct *mm); /* optional */ + void (*event_unmapped) (struct perf_event *event, struct mm_struct *mm); /* optional */ /* * Flags for ->add()/->del()/ ->start()/->stop(). There are diff --git a/kernel/events/core.c b/kernel/events/core.c index 426c2ffba16d..a654b8a3586f 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -5090,7 +5090,7 @@ static void perf_mmap_open(struct vm_area_struct *vma) atomic_inc(&event->rb->aux_mmap_count); if (event->pmu->event_mapped) - event->pmu->event_mapped(event); + event->pmu->event_mapped(event, vma->vm_mm); } static void perf_pmu_output_stop(struct perf_event *event); @@ -5113,7 +5113,7 @@ static void perf_mmap_close(struct vm_area_struct *vma) unsigned long size = perf_data_size(rb); if (event->pmu->event_unmapped) - event->pmu->event_unmapped(event); + event->pmu->event_unmapped(event, vma->vm_mm); /* * rb->aux_mmap_count will always drop before rb->mmap_count and @@ -5411,7 +5411,7 @@ aux_unlock: vma->vm_ops = &perf_mmap_vmops; if (event->pmu->event_mapped) - event->pmu->event_mapped(event); + event->pmu->event_mapped(event, vma->vm_mm); return ret; } -- cgit v1.2.3 From 9b231d9f47c6114d317ce28cff92a74ad80547f5 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 3 Aug 2017 15:42:09 +0200 Subject: perf/core: Fix time on IOC_ENABLE Vince reported that when we do IOC_ENABLE/IOC_DISABLE while the task is SIGSTOP'ed state the timestamps go wobbly. It turns out we indeed fail to correctly account time while in 'OFF' state and doing IOC_ENABLE without getting scheduled in exposes the problem. Further thinking about this problem, it occurred to me that we can suffer a similar fate when we migrate an uncore event between CPUs. The perf_event_install() on the 'new' CPU will do add_event_to_ctx() which will reset all the time stamp, resulting in a subsequent update_event_times() to overwrite the total_time_* fields with smaller values. Reported-by: Vince Weaver Signed-off-by: Peter Zijlstra (Intel) Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Signed-off-by: Ingo Molnar --- kernel/events/core.c | 41 ++++++++++++++++++++++++++++++++++++----- 1 file changed, 36 insertions(+), 5 deletions(-) diff --git a/kernel/events/core.c b/kernel/events/core.c index a654b8a3586f..ee20d4c546b5 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -2217,6 +2217,33 @@ static int group_can_go_on(struct perf_event *event, return can_add_hw; } +/* + * Complement to update_event_times(). This computes the tstamp_* values to + * continue 'enabled' state from @now, and effectively discards the time + * between the prior tstamp_stopped and now (as we were in the OFF state, or + * just switched (context) time base). + * + * This further assumes '@event->state == INACTIVE' (we just came from OFF) and + * cannot have been scheduled in yet. And going into INACTIVE state means + * '@event->tstamp_stopped = @now'. + * + * Thus given the rules of update_event_times(): + * + * total_time_enabled = tstamp_stopped - tstamp_enabled + * total_time_running = tstamp_stopped - tstamp_running + * + * We can insert 'tstamp_stopped == now' and reverse them to compute new + * tstamp_* values. + */ +static void __perf_event_enable_time(struct perf_event *event, u64 now) +{ + WARN_ON_ONCE(event->state != PERF_EVENT_STATE_INACTIVE); + + event->tstamp_stopped = now; + event->tstamp_enabled = now - event->total_time_enabled; + event->tstamp_running = now - event->total_time_running; +} + static void add_event_to_ctx(struct perf_event *event, struct perf_event_context *ctx) { @@ -2224,9 +2251,12 @@ static void add_event_to_ctx(struct perf_event *event, list_add_event(event, ctx); perf_group_attach(event); - event->tstamp_enabled = tstamp; - event->tstamp_running = tstamp; - event->tstamp_stopped = tstamp; + /* + * We can be called with event->state == STATE_OFF when we create with + * .disabled = 1. In that case the IOC_ENABLE will call this function. + */ + if (event->state == PERF_EVENT_STATE_INACTIVE) + __perf_event_enable_time(event, tstamp); } static void ctx_sched_out(struct perf_event_context *ctx, @@ -2471,10 +2501,11 @@ static void __perf_event_mark_enabled(struct perf_event *event) u64 tstamp = perf_event_time(event); event->state = PERF_EVENT_STATE_INACTIVE; - event->tstamp_enabled = tstamp - event->total_time_enabled; + __perf_event_enable_time(event, tstamp); list_for_each_entry(sub, &event->sibling_list, group_entry) { + /* XXX should not be > INACTIVE if event isn't */ if (sub->state >= PERF_EVENT_STATE_INACTIVE) - sub->tstamp_enabled = tstamp - sub->total_time_enabled; + __perf_event_enable_time(sub, tstamp); } } -- cgit v1.2.3 From e93c17301ac55321fc18e0f8316e924e58a83c8c Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Mon, 7 Aug 2017 19:43:13 -0700 Subject: x86/asm/64: Clear AC on NMI entries This closes a hole in our SMAP implementation. This patch comes from grsecurity. Good catch! Signed-off-by: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Josh Poimboeuf Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: stable@vger.kernel.org Link: http://lkml.kernel.org/r/314cc9f294e8f14ed85485727556ad4f15bb1659.1502159503.git.luto@kernel.org Signed-off-by: Ingo Molnar --- arch/x86/entry/entry_64.S | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S index d271fb79248f..6d078b89a5e8 100644 --- a/arch/x86/entry/entry_64.S +++ b/arch/x86/entry/entry_64.S @@ -1211,6 +1211,8 @@ ENTRY(nmi) * other IST entries. */ + ASM_CLAC + /* Use %rdx as our temp variable throughout */ pushq %rdx -- cgit v1.2.3 From d197f7988721221fac64f899efd7657c15281810 Mon Sep 17 00:00:00 2001 From: Matthias Kaehlcke Date: Mon, 31 Jul 2017 11:37:28 -0700 Subject: clocksource/drivers/arm_arch_timer: Fix mem frame loop initialization The loop to find the best memory frame in arch_timer_mem_acpi_init() initializes the loop counter with itself ('i = i'), which is suspicious in the first place and pointed out by clang. The loop condition is 'i < timer_count' and a prior for loop exits when 'i' reaches 'timer_count', therefore the second loop is never executed. Initialize the loop counter with 0 to iterate over all timers, which supposedly was the intention before the typo monster attacked. Fixes: c2743a36765d3 ("clocksource: arm_arch_timer: add GTDT support for memory-mapped timer") Signed-off-by: Matthias Kaehlcke Reported-by: Ard Biesheuvel Acked-by: Mark Rutland Signed-off-by: Daniel Lezcano --- drivers/clocksource/arm_arch_timer.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/clocksource/arm_arch_timer.c b/drivers/clocksource/arm_arch_timer.c index aae87c4c546e..72bbfccef113 100644 --- a/drivers/clocksource/arm_arch_timer.c +++ b/drivers/clocksource/arm_arch_timer.c @@ -1440,7 +1440,7 @@ static int __init arch_timer_mem_acpi_init(int platform_timer_count) * While unlikely, it's theoretically possible that none of the frames * in a timer expose the combination of feature we want. */ - for (i = i; i < timer_count; i++) { + for (i = 0; i < timer_count; i++) { timer = &timers[i]; frame = arch_timer_mem_find_best_frame(timer); -- cgit v1.2.3 From 5c23a558a65406cac472df07fd26a2688a42cad2 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Fri, 30 Jun 2017 01:14:45 -0500 Subject: clocksource/drivers/em_sti: Fix error return codes in em_sti_probe() Propagate the return values of platform_get_irq and devm_request_irq on failure. Cc: Frans Klaver Signed-off-by: Gustavo A. R. Silva Signed-off-by: Daniel Lezcano --- drivers/clocksource/em_sti.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/drivers/clocksource/em_sti.c b/drivers/clocksource/em_sti.c index bc48cbf6a795..269db74a0658 100644 --- a/drivers/clocksource/em_sti.c +++ b/drivers/clocksource/em_sti.c @@ -305,7 +305,7 @@ static int em_sti_probe(struct platform_device *pdev) irq = platform_get_irq(pdev, 0); if (irq < 0) { dev_err(&pdev->dev, "failed to get irq\n"); - return -EINVAL; + return irq; } /* map memory, let base point to the STI instance */ @@ -314,11 +314,12 @@ static int em_sti_probe(struct platform_device *pdev) if (IS_ERR(p->base)) return PTR_ERR(p->base); - if (devm_request_irq(&pdev->dev, irq, em_sti_interrupt, - IRQF_TIMER | IRQF_IRQPOLL | IRQF_NOBALANCING, - dev_name(&pdev->dev), p)) { + ret = devm_request_irq(&pdev->dev, irq, em_sti_interrupt, + IRQF_TIMER | IRQF_IRQPOLL | IRQF_NOBALANCING, + dev_name(&pdev->dev), p); + if (ret) { dev_err(&pdev->dev, "failed to request low IRQ\n"); - return -ENOENT; + return ret; } /* get hold of clock */ -- cgit v1.2.3 From 10e66760fa8ee11f254a69433fc132d04758a5fc Mon Sep 17 00:00:00 2001 From: Vitaly Kuznetsov Date: Thu, 3 Aug 2017 12:58:18 +0200 Subject: x86/smpboot: Unbreak CPU0 hotplug A hang on CPU0 onlining after a preceding offlining is observed. Trace shows that CPU0 is stuck in check_tsc_sync_target() waiting for source CPU to run check_tsc_sync_source() but this never happens. Source CPU, in its turn, is stuck on synchronize_sched() which is called from native_cpu_up() -> do_boot_cpu() -> unregister_nmi_handler(). So it's a classic ABBA deadlock, due to the use of synchronize_sched() in unregister_nmi_handler(). Fix the bug by moving unregister_nmi_handler() from do_boot_cpu() to native_cpu_up() after cpu onlining is done. Signed-off-by: Vitaly Kuznetsov Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/20170803105818.9934-1-vkuznets@redhat.com Signed-off-by: Ingo Molnar --- arch/x86/kernel/smpboot.c | 30 +++++++++++++++++------------- 1 file changed, 17 insertions(+), 13 deletions(-) diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index b474c8de7fba..54b9e89d4d6b 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -971,7 +971,8 @@ void common_cpu_up(unsigned int cpu, struct task_struct *idle) * Returns zero if CPU booted OK, else error code from * ->wakeup_secondary_cpu. */ -static int do_boot_cpu(int apicid, int cpu, struct task_struct *idle) +static int do_boot_cpu(int apicid, int cpu, struct task_struct *idle, + int *cpu0_nmi_registered) { volatile u32 *trampoline_status = (volatile u32 *) __va(real_mode_header->trampoline_status); @@ -979,7 +980,6 @@ static int do_boot_cpu(int apicid, int cpu, struct task_struct *idle) unsigned long start_ip = real_mode_header->trampoline_start; unsigned long boot_error = 0; - int cpu0_nmi_registered = 0; unsigned long timeout; idle->thread.sp = (unsigned long)task_pt_regs(idle); @@ -1035,7 +1035,7 @@ static int do_boot_cpu(int apicid, int cpu, struct task_struct *idle) boot_error = apic->wakeup_secondary_cpu(apicid, start_ip); else boot_error = wakeup_cpu_via_init_nmi(cpu, start_ip, apicid, - &cpu0_nmi_registered); + cpu0_nmi_registered); if (!boot_error) { /* @@ -1080,12 +1080,6 @@ static int do_boot_cpu(int apicid, int cpu, struct task_struct *idle) */ smpboot_restore_warm_reset_vector(); } - /* - * Clean up the nmi handler. Do this after the callin and callout sync - * to avoid impact of possible long unregister time. - */ - if (cpu0_nmi_registered) - unregister_nmi_handler(NMI_LOCAL, "wake_cpu0"); return boot_error; } @@ -1093,8 +1087,9 @@ static int do_boot_cpu(int apicid, int cpu, struct task_struct *idle) int native_cpu_up(unsigned int cpu, struct task_struct *tidle) { int apicid = apic->cpu_present_to_apicid(cpu); + int cpu0_nmi_registered = 0; unsigned long flags; - int err; + int err, ret = 0; WARN_ON(irqs_disabled()); @@ -1131,10 +1126,11 @@ int native_cpu_up(unsigned int cpu, struct task_struct *tidle) common_cpu_up(cpu, tidle); - err = do_boot_cpu(apicid, cpu, tidle); + err = do_boot_cpu(apicid, cpu, tidle, &cpu0_nmi_registered); if (err) { pr_err("do_boot_cpu failed(%d) to wakeup CPU#%u\n", err, cpu); - return -EIO; + ret = -EIO; + goto unreg_nmi; } /* @@ -1150,7 +1146,15 @@ int native_cpu_up(unsigned int cpu, struct task_struct *tidle) touch_nmi_watchdog(); } - return 0; +unreg_nmi: + /* + * Clean up the nmi handler. Do this after the callin and callout sync + * to avoid impact of possible long unregister time. + */ + if (cpu0_nmi_registered) + unregister_nmi_handler(NMI_LOCAL, "wake_cpu0"); + + return ret; } /** -- cgit v1.2.3 From fdf6e7a8c96ebe115b6460768c82dd136ecbd8db Mon Sep 17 00:00:00 2001 From: Hanjun Guo Date: Wed, 26 Jul 2017 18:15:49 +0800 Subject: irqchip/gic-v3-its: Allow GIC ITS number more than MAX_NUMNODES When enabling ITS NUMA support on D05, I got the boot log: [ 0.000000] SRAT: PXM 0 -> ITS 0 -> Node 0 [ 0.000000] SRAT: PXM 0 -> ITS 1 -> Node 0 [ 0.000000] SRAT: PXM 0 -> ITS 2 -> Node 0 [ 0.000000] SRAT: PXM 1 -> ITS 3 -> Node 1 [ 0.000000] SRAT: ITS affinity exceeding max count[4] This is wrong on D05 as we have 8 ITSs with 4 NUMA nodes. So dynamically alloc the memory needed instead of using its_srat_maps[MAX_NUMNODES], which count the number of ITS entry(ies) in SRAT and alloc its_srat_maps as needed, then build the mapping of numa node to ITS ID. Of course, its_srat_maps will be freed after ITS probing because we don't need that after boot. After doing this, I got what I wanted: [ 0.000000] SRAT: PXM 0 -> ITS 0 -> Node 0 [ 0.000000] SRAT: PXM 0 -> ITS 1 -> Node 0 [ 0.000000] SRAT: PXM 0 -> ITS 2 -> Node 0 [ 0.000000] SRAT: PXM 1 -> ITS 3 -> Node 1 [ 0.000000] SRAT: PXM 2 -> ITS 4 -> Node 2 [ 0.000000] SRAT: PXM 2 -> ITS 5 -> Node 2 [ 0.000000] SRAT: PXM 2 -> ITS 6 -> Node 2 [ 0.000000] SRAT: PXM 3 -> ITS 7 -> Node 3 Fixes: dbd2b8267233 ("irqchip/gic-v3-its: Add ACPI NUMA node mapping") Signed-off-by: Hanjun Guo Reviewed-by: Lorenzo Pieralisi Cc: Ganapatrao Kulkarni Cc: John Garry Signed-off-by: Marc Zyngier --- drivers/irqchip/irq-gic-v3-its.c | 38 +++++++++++++++++++++++++++++++------- 1 file changed, 31 insertions(+), 7 deletions(-) diff --git a/drivers/irqchip/irq-gic-v3-its.c b/drivers/irqchip/irq-gic-v3-its.c index 3bfbf8d96a0e..5fd5f62c925d 100644 --- a/drivers/irqchip/irq-gic-v3-its.c +++ b/drivers/irqchip/irq-gic-v3-its.c @@ -1843,7 +1843,7 @@ struct its_srat_map { u32 its_id; }; -static struct its_srat_map its_srat_maps[MAX_NUMNODES] __initdata; +static struct its_srat_map *its_srat_maps __initdata; static int its_in_srat __initdata; static int __init acpi_get_its_numa_node(u32 its_id) @@ -1857,6 +1857,12 @@ static int __init acpi_get_its_numa_node(u32 its_id) return NUMA_NO_NODE; } +static int __init gic_acpi_match_srat_its(struct acpi_subtable_header *header, + const unsigned long end) +{ + return 0; +} + static int __init gic_acpi_parse_srat_its(struct acpi_subtable_header *header, const unsigned long end) { @@ -1873,12 +1879,6 @@ static int __init gic_acpi_parse_srat_its(struct acpi_subtable_header *header, return -EINVAL; } - if (its_in_srat >= MAX_NUMNODES) { - pr_err("SRAT: ITS affinity exceeding max count[%d]\n", - MAX_NUMNODES); - return -EINVAL; - } - node = acpi_map_pxm_to_node(its_affinity->proximity_domain); if (node == NUMA_NO_NODE || node >= MAX_NUMNODES) { @@ -1897,14 +1897,37 @@ static int __init gic_acpi_parse_srat_its(struct acpi_subtable_header *header, static void __init acpi_table_parse_srat_its(void) { + int count; + + count = acpi_table_parse_entries(ACPI_SIG_SRAT, + sizeof(struct acpi_table_srat), + ACPI_SRAT_TYPE_GIC_ITS_AFFINITY, + gic_acpi_match_srat_its, 0); + if (count <= 0) + return; + + its_srat_maps = kmalloc(count * sizeof(struct its_srat_map), + GFP_KERNEL); + if (!its_srat_maps) { + pr_warn("SRAT: Failed to allocate memory for its_srat_maps!\n"); + return; + } + acpi_table_parse_entries(ACPI_SIG_SRAT, sizeof(struct acpi_table_srat), ACPI_SRAT_TYPE_GIC_ITS_AFFINITY, gic_acpi_parse_srat_its, 0); } + +/* free the its_srat_maps after ITS probing */ +static void __init acpi_its_srat_maps_free(void) +{ + kfree(its_srat_maps); +} #else static void __init acpi_table_parse_srat_its(void) { } static int __init acpi_get_its_numa_node(u32 its_id) { return NUMA_NO_NODE; } +static void __init acpi_its_srat_maps_free(void) { } #endif static int __init gic_acpi_parse_madt_its(struct acpi_subtable_header *header, @@ -1951,6 +1974,7 @@ static void __init its_acpi_probe(void) acpi_table_parse_srat_its(); acpi_table_parse_madt(ACPI_MADT_TYPE_GENERIC_TRANSLATOR, gic_acpi_parse_madt_its, 0); + acpi_its_srat_maps_free(); } #else static void __init its_acpi_probe(void) { } -- cgit v1.2.3 From a008873740a3d44946c7e72e456f15146cfd7287 Mon Sep 17 00:00:00 2001 From: Lorenzo Pieralisi Date: Thu, 10 Aug 2017 15:41:17 +0100 Subject: irqchip/gic-v3-its-platform-msi: Fix msi-parent parsing loop While parsing the msi-parent property to chase up the IRQ domain a given device belongs to, the index into the msi-parent tuple should be incremented to ensure all properties entries are taken into account. Current code missed the index update so the parsing loop does not work in case multiple msi-parent phandles are present and may turn into an infinite loop in of_pmsi_get_dev_id() if phandle at index 0 does not correspond to the domain we are actually looking-up. Fix the code by updating the phandle index at each iteration in of_pmsi_get_dev_id(). Fixes: deac7fc1c87f ("irqchip/gic-v3-its: Parse new version of msi-parent property") Signed-off-by: Lorenzo Pieralisi Signed-off-by: Marc Zyngier --- drivers/irqchip/irq-gic-v3-its-platform-msi.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/irqchip/irq-gic-v3-its-platform-msi.c b/drivers/irqchip/irq-gic-v3-its-platform-msi.c index 249240d9a425..833a90fe33ae 100644 --- a/drivers/irqchip/irq-gic-v3-its-platform-msi.c +++ b/drivers/irqchip/irq-gic-v3-its-platform-msi.c @@ -43,6 +43,7 @@ static int of_pmsi_get_dev_id(struct irq_domain *domain, struct device *dev, *dev_id = args.args[0]; break; } + index++; } while (!ret); return ret; -- cgit v1.2.3 From 85f1bd9a7b5a79d5baa8bf44af19658f7bf77bfa Mon Sep 17 00:00:00 2001 From: Willem de Bruijn Date: Thu, 10 Aug 2017 12:29:19 -0400 Subject: udp: consistently apply ufo or fragmentation When iteratively building a UDP datagram with MSG_MORE and that datagram exceeds MTU, consistently choose UFO or fragmentation. Once skb_is_gso, always apply ufo. Conversely, once a datagram is split across multiple skbs, do not consider ufo. Sendpage already maintains the first invariant, only add the second. IPv6 does not have a sendpage implementation to modify. A gso skb must have a partial checksum, do not follow sk_no_check_tx in udp_send_skb. Found by syzkaller. Fixes: e89e9cf539a2 ("[IPv4/IPv6]: UFO Scatter-gather approach") Reported-by: Andrey Konovalov Signed-off-by: Willem de Bruijn Signed-off-by: David S. Miller --- net/ipv4/ip_output.c | 8 +++++--- net/ipv4/udp.c | 2 +- net/ipv6/ip6_output.c | 7 ++++--- 3 files changed, 10 insertions(+), 7 deletions(-) diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index 50c74cd890bc..e153c40c2436 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -965,11 +965,12 @@ static int __ip_append_data(struct sock *sk, csummode = CHECKSUM_PARTIAL; cork->length += length; - if ((((length + (skb ? skb->len : fragheaderlen)) > mtu) || - (skb && skb_is_gso(skb))) && + if ((skb && skb_is_gso(skb)) || + (((length + (skb ? skb->len : fragheaderlen)) > mtu) && + (skb_queue_len(queue) <= 1) && (sk->sk_protocol == IPPROTO_UDP) && (rt->dst.dev->features & NETIF_F_UFO) && !dst_xfrm(&rt->dst) && - (sk->sk_type == SOCK_DGRAM) && !sk->sk_no_check_tx) { + (sk->sk_type == SOCK_DGRAM) && !sk->sk_no_check_tx)) { err = ip_ufo_append_data(sk, queue, getfrag, from, length, hh_len, fragheaderlen, transhdrlen, maxfraglen, flags); @@ -1288,6 +1289,7 @@ ssize_t ip_append_page(struct sock *sk, struct flowi4 *fl4, struct page *page, return -EINVAL; if ((size + skb->len > mtu) && + (skb_queue_len(&sk->sk_write_queue) == 1) && (sk->sk_protocol == IPPROTO_UDP) && (rt->dst.dev->features & NETIF_F_UFO)) { if (skb->ip_summed != CHECKSUM_PARTIAL) diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index e6276fa3750b..a7c804f73990 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -802,7 +802,7 @@ static int udp_send_skb(struct sk_buff *skb, struct flowi4 *fl4) if (is_udplite) /* UDP-Lite */ csum = udplite_csum(skb); - else if (sk->sk_no_check_tx) { /* UDP csum disabled */ + else if (sk->sk_no_check_tx && !skb_is_gso(skb)) { /* UDP csum off */ skb->ip_summed = CHECKSUM_NONE; goto send; diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 162efba0d0cd..2dfe50d8d609 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -1381,11 +1381,12 @@ emsgsize: */ cork->length += length; - if ((((length + (skb ? skb->len : headersize)) > mtu) || - (skb && skb_is_gso(skb))) && + if ((skb && skb_is_gso(skb)) || + (((length + (skb ? skb->len : headersize)) > mtu) && + (skb_queue_len(queue) <= 1) && (sk->sk_protocol == IPPROTO_UDP) && (rt->dst.dev->features & NETIF_F_UFO) && !dst_xfrm(&rt->dst) && - (sk->sk_type == SOCK_DGRAM) && !udp_get_no_check6_tx(sk)) { + (sk->sk_type == SOCK_DGRAM) && !udp_get_no_check6_tx(sk))) { err = ip6_ufo_append_data(sk, queue, getfrag, from, length, hh_len, fragheaderlen, exthdrlen, transhdrlen, mtu, flags, fl6); -- cgit v1.2.3 From c27927e372f0785f3303e8fad94b85945e2c97b7 Mon Sep 17 00:00:00 2001 From: Willem de Bruijn Date: Thu, 10 Aug 2017 12:41:58 -0400 Subject: packet: fix tp_reserve race in packet_set_ring Updates to tp_reserve can race with reads of the field in packet_set_ring. Avoid this by holding the socket lock during updates in setsockopt PACKET_RESERVE. This bug was discovered by syzkaller. Fixes: 8913336a7e8d ("packet: add PACKET_RESERVE sockopt") Reported-by: Andrey Konovalov Signed-off-by: Willem de Bruijn Signed-off-by: David S. Miller --- net/packet/af_packet.c | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index 0615c2a950fa..008a45ca3112 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -3700,14 +3700,19 @@ packet_setsockopt(struct socket *sock, int level, int optname, char __user *optv if (optlen != sizeof(val)) return -EINVAL; - if (po->rx_ring.pg_vec || po->tx_ring.pg_vec) - return -EBUSY; if (copy_from_user(&val, optval, sizeof(val))) return -EFAULT; if (val > INT_MAX) return -EINVAL; - po->tp_reserve = val; - return 0; + lock_sock(sk); + if (po->rx_ring.pg_vec || po->tx_ring.pg_vec) { + ret = -EBUSY; + } else { + po->tp_reserve = val; + ret = 0; + } + release_sock(sk); + return ret; } case PACKET_LOSS: { -- cgit v1.2.3 From 634b8325905031eeafa61951623681ebc1329385 Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Thu, 10 Aug 2017 11:23:31 +0200 Subject: nvme: fix nvme reset command timeout handling We need to return an error if a timeout occurs on any NVMe command during initialization. Without this, the nvme reset work will be stuck. A timeout will have a negative error code, meaning we need to stop initializing the controller. All postitive returns mean the controller is still usable. bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=196325 Signed-off-by: Keith Busch Cc: Martin Peres [jth consolidated cleanup path ] Signed-off-by: Johannes Thumshirn Signed-off-by: Christoph Hellwig --- drivers/nvme/host/core.c | 27 ++++++++++++++++++++------- 1 file changed, 20 insertions(+), 7 deletions(-) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 1e9290983694..6212cf4e9829 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -1509,7 +1509,7 @@ static void nvme_set_queue_limits(struct nvme_ctrl *ctrl, blk_queue_write_cache(q, vwc, vwc); } -static void nvme_configure_apst(struct nvme_ctrl *ctrl) +static int nvme_configure_apst(struct nvme_ctrl *ctrl) { /* * APST (Autonomous Power State Transition) lets us program a @@ -1538,16 +1538,16 @@ static void nvme_configure_apst(struct nvme_ctrl *ctrl) * then don't do anything. */ if (!ctrl->apsta) - return; + return 0; if (ctrl->npss > 31) { dev_warn(ctrl->device, "NPSS is invalid; not using APST\n"); - return; + return 0; } table = kzalloc(sizeof(*table), GFP_KERNEL); if (!table) - return; + return 0; if (!ctrl->apst_enabled || ctrl->ps_max_latency_us == 0) { /* Turn off APST. */ @@ -1629,6 +1629,7 @@ static void nvme_configure_apst(struct nvme_ctrl *ctrl) dev_err(ctrl->device, "failed to set APST feature (%d)\n", ret); kfree(table); + return ret; } static void nvme_set_latency_tolerance(struct device *dev, s32 val) @@ -1835,13 +1836,16 @@ int nvme_init_identify(struct nvme_ctrl *ctrl) * In fabrics we need to verify the cntlid matches the * admin connect */ - if (ctrl->cntlid != le16_to_cpu(id->cntlid)) + if (ctrl->cntlid != le16_to_cpu(id->cntlid)) { ret = -EINVAL; + goto out_free; + } if (!ctrl->opts->discovery_nqn && !ctrl->kas) { dev_err(ctrl->device, "keep-alive support is mandatory for fabrics\n"); ret = -EINVAL; + goto out_free; } } else { ctrl->cntlid = le16_to_cpu(id->cntlid); @@ -1856,11 +1860,20 @@ int nvme_init_identify(struct nvme_ctrl *ctrl) else if (!ctrl->apst_enabled && prev_apst_enabled) dev_pm_qos_hide_latency_tolerance(ctrl->device); - nvme_configure_apst(ctrl); - nvme_configure_directives(ctrl); + ret = nvme_configure_apst(ctrl); + if (ret < 0) + return ret; + + ret = nvme_configure_directives(ctrl); + if (ret < 0) + return ret; ctrl->identified = true; + return 0; + +out_free: + kfree(id); return ret; } EXPORT_SYMBOL_GPL(nvme_init_identify); -- cgit v1.2.3 From a082b426286d1ead97fb87646ea361d528be023d Mon Sep 17 00:00:00 2001 From: "Kwan (Hingkwan) Huen-SSI" Date: Wed, 9 Aug 2017 11:26:29 -0700 Subject: nvme: fix directive command numd calculation The numd field of directive receive command takes number of dwords to transfer. This fix has the correct calculation for numd. Signed-off-by: Kwan (Hingkwan) Huen-SSI Reviewed-by: Jens Axboe Signed-off-by: Christoph Hellwig --- drivers/nvme/host/core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 6212cf4e9829..37046ac2c441 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -336,7 +336,7 @@ static int nvme_get_stream_params(struct nvme_ctrl *ctrl, c.directive.opcode = nvme_admin_directive_recv; c.directive.nsid = cpu_to_le32(nsid); - c.directive.numd = cpu_to_le32(sizeof(*s)); + c.directive.numd = cpu_to_le32((sizeof(*s) >> 2) - 1); c.directive.doper = NVME_DIR_RCV_ST_OP_PARAM; c.directive.dtype = NVME_DIR_STREAMS; -- cgit v1.2.3 From e788787ef4f9c24aafefc480a8da5f92b914e5e6 Mon Sep 17 00:00:00 2001 From: Sandeep Singh Date: Fri, 4 Aug 2017 16:35:56 +0530 Subject: usb:xhci:Add quirk for Certain failing HP keyboard on reset after resume Certain HP keyboards would keep inputting a character automatically which is the wake-up key after S3 resume On some AMD platforms USB host fails to respond (by holding resume-K) to USB device (an HP keyboard) resume request within 1ms (TURSM) and ensures that resume is signaled for at least 20 ms (TDRSMDN), which is defined in USB 2.0 spec. The result is that the keyboard is out of function. In SNPS USB design, the host responds to the resume request only after system gets back to S0 and the host gets to functional after the internal HW restore operation that is more than 1 second after the initial resume request from the USB device. As a workaround for specific keyboard ID(HP Keyboards), applying port reset after resume when the keyboard is plugged in. Signed-off-by: Sandeep Singh Signed-off-by: Shyam Sundar S K cc: Nehal Shah Reviewed-by: Felipe Balbi Cc: stable Signed-off-by: Greg Kroah-Hartman --- drivers/usb/core/quirks.c | 1 + drivers/usb/host/pci-quirks.c | 17 ++++++++++++----- 2 files changed, 13 insertions(+), 5 deletions(-) diff --git a/drivers/usb/core/quirks.c b/drivers/usb/core/quirks.c index 3116edfcdc18..2e9cde0b25d7 100644 --- a/drivers/usb/core/quirks.c +++ b/drivers/usb/core/quirks.c @@ -249,6 +249,7 @@ static const struct usb_device_id usb_amd_resume_quirk_list[] = { { USB_DEVICE(0x093a, 0x2500), .driver_info = USB_QUIRK_RESET_RESUME }, { USB_DEVICE(0x093a, 0x2510), .driver_info = USB_QUIRK_RESET_RESUME }, { USB_DEVICE(0x093a, 0x2521), .driver_info = USB_QUIRK_RESET_RESUME }, + { USB_DEVICE(0x03f0, 0x2b4a), .driver_info = USB_QUIRK_RESET_RESUME }, /* Logitech Optical Mouse M90/M100 */ { USB_DEVICE(0x046d, 0xc05a), .driver_info = USB_QUIRK_RESET_RESUME }, diff --git a/drivers/usb/host/pci-quirks.c b/drivers/usb/host/pci-quirks.c index c8989c62a262..5f4ca7890435 100644 --- a/drivers/usb/host/pci-quirks.c +++ b/drivers/usb/host/pci-quirks.c @@ -98,6 +98,7 @@ enum amd_chipset_gen { AMD_CHIPSET_HUDSON2, AMD_CHIPSET_BOLTON, AMD_CHIPSET_YANGTZE, + AMD_CHIPSET_TAISHAN, AMD_CHIPSET_UNKNOWN, }; @@ -141,6 +142,11 @@ static int amd_chipset_sb_type_init(struct amd_chipset_info *pinfo) pinfo->sb_type.gen = AMD_CHIPSET_SB700; else if (rev >= 0x40 && rev <= 0x4f) pinfo->sb_type.gen = AMD_CHIPSET_SB800; + } + pinfo->smbus_dev = pci_get_device(PCI_VENDOR_ID_AMD, + 0x145c, NULL); + if (pinfo->smbus_dev) { + pinfo->sb_type.gen = AMD_CHIPSET_TAISHAN; } else { pinfo->smbus_dev = pci_get_device(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_HUDSON2_SMBUS, NULL); @@ -260,11 +266,12 @@ int usb_hcd_amd_remote_wakeup_quirk(struct pci_dev *pdev) { /* Make sure amd chipset type has already been initialized */ usb_amd_find_chipset_info(); - if (amd_chipset.sb_type.gen != AMD_CHIPSET_YANGTZE) - return 0; - - dev_dbg(&pdev->dev, "QUIRK: Enable AMD remote wakeup fix\n"); - return 1; + if (amd_chipset.sb_type.gen == AMD_CHIPSET_YANGTZE || + amd_chipset.sb_type.gen == AMD_CHIPSET_TAISHAN) { + dev_dbg(&pdev->dev, "QUIRK: Enable AMD remote wakeup fix\n"); + return 1; + } + return 0; } EXPORT_SYMBOL_GPL(usb_hcd_amd_remote_wakeup_quirk); -- cgit v1.2.3 From 94c43b9897abf4ea366ed4dba027494e080c7050 Mon Sep 17 00:00:00 2001 From: Alan Stern Date: Tue, 1 Aug 2017 10:41:56 -0400 Subject: USB: Check for dropped connection before switching to full speed Some buggy USB disk adapters disconnect and reconnect multiple times during the enumeration procedure. This may lead to a device connecting at full speed instead of high speed, because when the USB stack sees that a device isn't able to enumerate at high speed, it tries to hand the connection over to a full-speed companion controller. The logic for doing this is careful to check that the device is still connected. But this check is inadequate if the device disconnects and reconnects before the check is done. The symptom is that a device works, but much more slowly than it is capable of operating. The situation was made worse recently by commit 22547c4cc4fe ("usb: hub: Wait for connection to be reestablished after port reset"), which increases the delay following a reset before a disconnect is recognized, thus giving the device more time to reconnect. This patch makes the check more robust. If the device was disconnected at any time during enumeration, we will now skip the full-speed handover. Signed-off-by: Alan Stern Reported-and-tested-by: Zdenek Kabelac Reviewed-by: Guenter Roeck Cc: stable Signed-off-by: Greg Kroah-Hartman --- drivers/usb/core/hub.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/drivers/usb/core/hub.c b/drivers/usb/core/hub.c index 6e6797d145dd..822f8c50e423 100644 --- a/drivers/usb/core/hub.c +++ b/drivers/usb/core/hub.c @@ -4725,7 +4725,8 @@ hub_power_remaining(struct usb_hub *hub) static void hub_port_connect(struct usb_hub *hub, int port1, u16 portstatus, u16 portchange) { - int status, i; + int status = -ENODEV; + int i; unsigned unit_load; struct usb_device *hdev = hub->hdev; struct usb_hcd *hcd = bus_to_hcd(hdev->bus); @@ -4929,9 +4930,10 @@ loop: done: hub_port_disable(hub, port1, 1); - if (hcd->driver->relinquish_port && !hub->hdev->parent) - hcd->driver->relinquish_port(hcd, port1); - + if (hcd->driver->relinquish_port && !hub->hdev->parent) { + if (status != -ENOTCONN && status != -ENODEV) + hcd->driver->relinquish_port(hcd, port1); + } } /* Handle physical or logical connection change events. -- cgit v1.2.3 From 7496cfe5431f21da5d27a8388c326397e3f0a5db Mon Sep 17 00:00:00 2001 From: Kai-Heng Feng Date: Tue, 8 Aug 2017 17:51:27 +0800 Subject: usb: quirks: Add no-lpm quirk for Moshi USB to Ethernet Adapter Moshi USB to Ethernet Adapter internally uses a Genesys Logic hub to connect to Realtek r8153. The Realtek r8153 ethernet does not work on the internal hub, no-lpm quirk can make it work. Since another r8153 dongle at my hand does not have the issue, so add the quirk to the Genesys Logic hub instead. Signed-off-by: Kai-Heng Feng Cc: stable Signed-off-by: Greg Kroah-Hartman --- drivers/usb/core/quirks.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/usb/core/quirks.c b/drivers/usb/core/quirks.c index 2e9cde0b25d7..574da2b4529c 100644 --- a/drivers/usb/core/quirks.c +++ b/drivers/usb/core/quirks.c @@ -150,6 +150,9 @@ static const struct usb_device_id usb_quirk_list[] = { /* appletouch */ { USB_DEVICE(0x05ac, 0x021a), .driver_info = USB_QUIRK_RESET_RESUME }, + /* Genesys Logic hub, internally used by Moshi USB to Ethernet Adapter */ + { USB_DEVICE(0x05e3, 0x0616), .driver_info = USB_QUIRK_NO_LPM }, + /* Avision AV600U */ { USB_DEVICE(0x0638, 0x0a13), .driver_info = USB_QUIRK_STRING_FETCH_255 }, -- cgit v1.2.3 From 3b6bcd3d093c698d32e93d4da57679b8fbc5e01e Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Thu, 10 Aug 2017 11:54:12 -0700 Subject: USB: serial: pl2303: add new ATEN device id This adds a new ATEN device id for a new pl2303-based device. Reported-by: Peter Kuo Cc: stable Cc: Johan Hovold Signed-off-by: Greg Kroah-Hartman --- drivers/usb/serial/pl2303.c | 2 ++ drivers/usb/serial/pl2303.h | 1 + 2 files changed, 3 insertions(+) diff --git a/drivers/usb/serial/pl2303.c b/drivers/usb/serial/pl2303.c index c9ebefd8f35f..a585b477415d 100644 --- a/drivers/usb/serial/pl2303.c +++ b/drivers/usb/serial/pl2303.c @@ -52,6 +52,8 @@ static const struct usb_device_id id_table[] = { { USB_DEVICE(IODATA_VENDOR_ID, IODATA_PRODUCT_ID_RSAQ5) }, { USB_DEVICE(ATEN_VENDOR_ID, ATEN_PRODUCT_ID), .driver_info = PL2303_QUIRK_ENDPOINT_HACK }, + { USB_DEVICE(ATEN_VENDOR_ID, ATEN_PRODUCT_UC485), + .driver_info = PL2303_QUIRK_ENDPOINT_HACK }, { USB_DEVICE(ATEN_VENDOR_ID, ATEN_PRODUCT_ID2) }, { USB_DEVICE(ATEN_VENDOR_ID2, ATEN_PRODUCT_ID) }, { USB_DEVICE(ELCOM_VENDOR_ID, ELCOM_PRODUCT_ID) }, diff --git a/drivers/usb/serial/pl2303.h b/drivers/usb/serial/pl2303.h index 09d9be88209e..3b5a15d1dc0d 100644 --- a/drivers/usb/serial/pl2303.h +++ b/drivers/usb/serial/pl2303.h @@ -27,6 +27,7 @@ #define ATEN_VENDOR_ID 0x0557 #define ATEN_VENDOR_ID2 0x0547 #define ATEN_PRODUCT_ID 0x2008 +#define ATEN_PRODUCT_UC485 0x2021 #define ATEN_PRODUCT_ID2 0x2118 #define IODATA_VENDOR_ID 0x04bb -- cgit v1.2.3 From e44565f62a72064e686f7a852137595ec94d78f2 Mon Sep 17 00:00:00 2001 From: "Luis R. Rodriguez" Date: Thu, 20 Jul 2017 13:13:09 -0700 Subject: firmware: fix batched requests - wake all waiters The firmware cache mechanism serves two purposes, the secondary purpose is not well documented nor understood. This fixes a regression with the secondary purpose of the firmware cache mechanism: batched requests on successful lookups. Without this fix *any* time a batched request is triggered, secondary requests for which the batched request mechanism was designed for will seem to last forver and seem to never return. This issue is present for all kernel builds possible, and a hard reset is required. The firmware cache is used for: 1) Addressing races with file lookups during the suspend/resume cycle by keeping firmware in memory during the suspend/resume cycle 2) Batched requests for the same file rely only on work from the first file lookup, which keeps the firmware in memory until the last release_firmware() is called Batched requests *only* take effect if secondary requests come in prior to the first user calling release_firmware(). The devres name used for the internal firmware cache is used as a hint other pending requests are ongoing, the firmware buffer data is kept in memory until the last user of the buffer calls release_firmware(), therefore serializing requests and delaying the release until all requests are done. Batched requests wait for a wakup or signal so we can rely on the first file fetch to write to the pending secondary requests. Commit 5b029624948d ("firmware: do not use fw_lock for fw_state protection") ported the firmware API to use swait, and in doing so failed to convert complete_all() to swake_up_all() -- it used swake_up(), loosing the ability for *some* batched requests to take effect. We *could* fix this by just using swake_up_all() *but* swait is now known to be very special use case, so its best to just move away from it. So we just go back to using completions as before commit 5b029624948d ("firmware: do not use fw_lock for fw_state protection") given this was using complete_all(). Without this fix it has been reported plugging in two Intel 6260 Wifi cards on a system will end up enumerating the two devices only 50% of the time [0]. The ported swake_up() should have actually handled the case with two devices, however, *if more than two cards are used* the swake_up() would not have sufficed. This change is only part of the required fixes for batched requests. Another fix is provided in the next patch. This particular change should fix the cases where more than three requests with the same firmware name is used, otherwise batched requests will wait for MAX_SCHEDULE_TIMEOUT and just timeout eventually. Below is a summary of tests triggering batched requests on different kernel builds. Before this patch: ============================================================================ CONFIG_FW_LOADER_USER_HELPER_FALLBACK=n CONFIG_FW_LOADER_USER_HELPER=y Most common Linux distribution setup. API-type no-firmware-found firmware-found ---------------------------------------------------------------------- request_firmware() FAIL FAIL request_firmware_direct() FAIL FAIL request_firmware_nowait(uevent=true) FAIL FAIL request_firmware_nowait(uevent=false) FAIL FAIL ============================================================================ CONFIG_FW_LOADER_USER_HELPER_FALLBACK=n CONFIG_FW_LOADER_USER_HELPER=n Only possible if CONFIG_DELL_RBU=n and CONFIG_LEDS_LP55XX_COMMON=n, rare. API-type no-firmware-found firmware-found ---------------------------------------------------------------------- request_firmware() FAIL FAIL request_firmware_direct() FAIL FAIL request_firmware_nowait(uevent=true) FAIL FAIL request_firmware_nowait(uevent=false) FAIL FAIL ============================================================================ CONFIG_FW_LOADER_USER_HELPER_FALLBACK=y CONFIG_FW_LOADER_USER_HELPER=y Google Android setup. API-type no-firmware-found firmware-found ---------------------------------------------------------------------- request_firmware() FAIL FAIL request_firmware_direct() FAIL FAIL request_firmware_nowait(uevent=true) FAIL FAIL request_firmware_nowait(uevent=false) FAIL FAIL ============================================================================ After this patch: ============================================================================ CONFIG_FW_LOADER_USER_HELPER_FALLBACK=n CONFIG_FW_LOADER_USER_HELPER=y Most common Linux distribution setup. API-type no-firmware-found firmware-found ---------------------------------------------------------------------- request_firmware() FAIL OK request_firmware_direct() FAIL OK request_firmware_nowait(uevent=true) FAIL OK request_firmware_nowait(uevent=false) FAIL OK ============================================================================ CONFIG_FW_LOADER_USER_HELPER_FALLBACK=n CONFIG_FW_LOADER_USER_HELPER=n Only possible if CONFIG_DELL_RBU=n and CONFIG_LEDS_LP55XX_COMMON=n, rare. API-type no-firmware-found firmware-found ---------------------------------------------------------------------- request_firmware() FAIL OK request_firmware_direct() FAIL OK request_firmware_nowait(uevent=true) FAIL OK request_firmware_nowait(uevent=false) FAIL OK ============================================================================ CONFIG_FW_LOADER_USER_HELPER_FALLBACK=y CONFIG_FW_LOADER_USER_HELPER=y Google Android setup. API-type no-firmware-found firmware-found ---------------------------------------------------------------------- request_firmware() OK OK request_firmware_direct() FAIL OK request_firmware_nowait(uevent=true) OK OK request_firmware_nowait(uevent=false) OK OK ============================================================================ [0] https://bugzilla.kernel.org/show_bug.cgi?id=195477 CC: [4.10+] Cc: Ming Lei Fixes: 5b029624948d ("firmware: do not use fw_lock for fw_state protection") Reported-by: Jakub Kicinski Signed-off-by: Luis R. Rodriguez Signed-off-by: Greg Kroah-Hartman --- drivers/base/firmware_class.c | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/drivers/base/firmware_class.c b/drivers/base/firmware_class.c index b9f907eedbf7..f50ec6e367bd 100644 --- a/drivers/base/firmware_class.c +++ b/drivers/base/firmware_class.c @@ -30,7 +30,6 @@ #include #include #include -#include #include @@ -112,13 +111,13 @@ static inline long firmware_loading_timeout(void) * state of the firmware loading. */ struct fw_state { - struct swait_queue_head wq; + struct completion completion; enum fw_status status; }; static void fw_state_init(struct fw_state *fw_st) { - init_swait_queue_head(&fw_st->wq); + init_completion(&fw_st->completion); fw_st->status = FW_STATUS_UNKNOWN; } @@ -131,9 +130,8 @@ static int __fw_state_wait_common(struct fw_state *fw_st, long timeout) { long ret; - ret = swait_event_interruptible_timeout(fw_st->wq, - __fw_state_is_done(READ_ONCE(fw_st->status)), - timeout); + ret = wait_for_completion_interruptible_timeout(&fw_st->completion, + timeout); if (ret != 0 && fw_st->status == FW_STATUS_ABORTED) return -ENOENT; if (!ret) @@ -148,7 +146,7 @@ static void __fw_state_set(struct fw_state *fw_st, WRITE_ONCE(fw_st->status, status); if (status == FW_STATUS_DONE || status == FW_STATUS_ABORTED) - swake_up(&fw_st->wq); + complete_all(&fw_st->completion); } #define fw_state_start(fw_st) \ -- cgit v1.2.3 From 90d41e74a9c36a84e2efbd2a5b8d79299feee6fa Mon Sep 17 00:00:00 2001 From: "Luis R. Rodriguez" Date: Thu, 20 Jul 2017 13:13:10 -0700 Subject: firmware: fix batched requests - send wake up on failure on direct lookups Fix batched requests from waiting forever on failure. The firmware API batched requests feature has been broken since the API call request_firmware_direct() was introduced on commit bba3a87e982ad ("firmware: Introduce request_firmware_direct()"), added on v3.14 *iff* the firmware being requested was not present in *certain kernel builds* [0]. When no firmware is found the worker which goes on to finish never informs waiters queued up of this, so any batched request will stall in what seems to be forever (MAX_SCHEDULE_TIMEOUT). Sadly, a reboot will also stall, as the reboot notifier was only designed to kill custom fallback workers. The issue seems to the user as a type of soft lockup, what *actually* happens underneath the hood is a wait call which never completes as we failed to issue a completion on error. For device drivers with optional firmware schemes (ie, Intel iwlwifi, or Netronome -- even though it uses request_firmware() and not request_firmware_direct()), this could mean that when you boot a system with multiple cards the firmware will seem to never load on the system, or that the card is just not responsive even the driver initialization. Due to differences in scheduling possible this should not always trigger -- one would need to to ensure that multiple requests are in place at the right time for this to work, also release_firmware() must not be called prior to any other incoming request. The complexity may not be worth supporting batched requests in the future given the wait mechanism is only used also for the fallback mechanism. We'll keep it for now and just fix it. Its reported that at least with the Intel WiFi cards on one system this issue was creeping up 50% of the boots [0]. Before this commit batched requests testing revealed: ============================================================================ CONFIG_FW_LOADER_USER_HELPER_FALLBACK=n CONFIG_FW_LOADER_USER_HELPER=y Most common Linux distribution setup. API-type no-firmware-found firmware-found ---------------------------------------------------------------------- request_firmware() FAIL OK request_firmware_direct() FAIL OK request_firmware_nowait(uevent=true) FAIL OK request_firmware_nowait(uevent=false) FAIL OK ============================================================================ CONFIG_FW_LOADER_USER_HELPER_FALLBACK=n CONFIG_FW_LOADER_USER_HELPER=n Only possible if CONFIG_DELL_RBU=n and CONFIG_LEDS_LP55XX_COMMON=n, rare. API-type no-firmware-found firmware-found ---------------------------------------------------------------------- request_firmware() FAIL OK request_firmware_direct() FAIL OK request_firmware_nowait(uevent=true) FAIL OK request_firmware_nowait(uevent=false) FAIL OK ============================================================================ CONFIG_FW_LOADER_USER_HELPER_FALLBACK=y CONFIG_FW_LOADER_USER_HELPER=y Google Android setup. API-type no-firmware-found firmware-found ---------------------------------------------------------------------- request_firmware() OK OK request_firmware_direct() FAIL OK request_firmware_nowait(uevent=true) OK OK request_firmware_nowait(uevent=false) OK OK ============================================================================ Ater this commit batched testing results: ============================================================================ CONFIG_FW_LOADER_USER_HELPER_FALLBACK=n CONFIG_FW_LOADER_USER_HELPER=y Most common Linux distribution setup. API-type no-firmware-found firmware-found ---------------------------------------------------------------------- request_firmware() OK OK request_firmware_direct() OK OK request_firmware_nowait(uevent=true) OK OK request_firmware_nowait(uevent=false) OK OK ============================================================================ CONFIG_FW_LOADER_USER_HELPER_FALLBACK=n CONFIG_FW_LOADER_USER_HELPER=n Only possible if CONFIG_DELL_RBU=n and CONFIG_LEDS_LP55XX_COMMON=n, rare. API-type no-firmware-found firmware-found ---------------------------------------------------------------------- request_firmware() OK OK request_firmware_direct() OK OK request_firmware_nowait(uevent=true) OK OK request_firmware_nowait(uevent=false) OK OK ============================================================================ CONFIG_FW_LOADER_USER_HELPER_FALLBACK=y CONFIG_FW_LOADER_USER_HELPER=y Google Android setup. API-type no-firmware-found firmware-found ---------------------------------------------------------------------- request_firmware() OK OK request_firmware_direct() OK OK request_firmware_nowait(uevent=true) OK OK request_firmware_nowait(uevent=false) OK OK ============================================================================ [0] https://bugzilla.kernel.org/show_bug.cgi?id=195477 Cc: stable # v3.14 Fixes: bba3a87e982ad ("firmware: Introduce request_firmware_direct()" Reported-by: Nicolas Reported-by: John Ewalt Reported-by: Jakub Kicinski Signed-off-by: Luis R. Rodriguez Signed-off-by: Greg Kroah-Hartman --- drivers/base/firmware_class.c | 38 ++++++++++++++++++++++++++++++-------- 1 file changed, 30 insertions(+), 8 deletions(-) diff --git a/drivers/base/firmware_class.c b/drivers/base/firmware_class.c index f50ec6e367bd..76f1b702bdd6 100644 --- a/drivers/base/firmware_class.c +++ b/drivers/base/firmware_class.c @@ -153,28 +153,27 @@ static void __fw_state_set(struct fw_state *fw_st, __fw_state_set(fw_st, FW_STATUS_LOADING) #define fw_state_done(fw_st) \ __fw_state_set(fw_st, FW_STATUS_DONE) +#define fw_state_aborted(fw_st) \ + __fw_state_set(fw_st, FW_STATUS_ABORTED) #define fw_state_wait(fw_st) \ __fw_state_wait_common(fw_st, MAX_SCHEDULE_TIMEOUT) -#ifndef CONFIG_FW_LOADER_USER_HELPER - -#define fw_state_is_aborted(fw_st) false - -#else /* CONFIG_FW_LOADER_USER_HELPER */ - static int __fw_state_check(struct fw_state *fw_st, enum fw_status status) { return fw_st->status == status; } +#define fw_state_is_aborted(fw_st) \ + __fw_state_check(fw_st, FW_STATUS_ABORTED) + +#ifdef CONFIG_FW_LOADER_USER_HELPER + #define fw_state_aborted(fw_st) \ __fw_state_set(fw_st, FW_STATUS_ABORTED) #define fw_state_is_done(fw_st) \ __fw_state_check(fw_st, FW_STATUS_DONE) #define fw_state_is_loading(fw_st) \ __fw_state_check(fw_st, FW_STATUS_LOADING) -#define fw_state_is_aborted(fw_st) \ - __fw_state_check(fw_st, FW_STATUS_ABORTED) #define fw_state_wait_timeout(fw_st, timeout) \ __fw_state_wait_common(fw_st, timeout) @@ -1198,6 +1197,28 @@ _request_firmware_prepare(struct firmware **firmware_p, const char *name, return 1; /* need to load */ } +/* + * Batched requests need only one wake, we need to do this step last due to the + * fallback mechanism. The buf is protected with kref_get(), and it won't be + * released until the last user calls release_firmware(). + * + * Failed batched requests are possible as well, in such cases we just share + * the struct firmware_buf and won't release it until all requests are woken + * and have gone through this same path. + */ +static void fw_abort_batch_reqs(struct firmware *fw) +{ + struct firmware_buf *buf; + + /* Loaded directly? */ + if (!fw || !fw->priv) + return; + + buf = fw->priv; + if (!fw_state_is_aborted(&buf->fw_st)) + fw_state_aborted(&buf->fw_st); +} + /* called from request_firmware() and request_firmware_work_func() */ static int _request_firmware(const struct firmware **firmware_p, const char *name, @@ -1241,6 +1262,7 @@ _request_firmware(const struct firmware **firmware_p, const char *name, out: if (ret < 0) { + fw_abort_batch_reqs(fw); release_firmware(fw); fw = NULL; } -- cgit v1.2.3 From 260d9f2fc5655a2552701cdd0b909c4466732f19 Mon Sep 17 00:00:00 2001 From: "Luis R. Rodriguez" Date: Thu, 20 Jul 2017 13:13:11 -0700 Subject: firmware: avoid invalid fallback aborts by using killable wait Commit 0cb64249ca500 ("firmware_loader: abort request if wait_for_completion is interrupted") added via 4.0 added support to abort the fallback mechanism when a signal was detected and wait_for_completion_interruptible() returned -ERESTARTSYS -- for instance when a user hits CTRL-C. The abort was overly *too* effective. When a child process terminates (successful or not) the signal SIGCHLD can be sent to the parent process which ran the child in the background and later triggered a sync request for firmware through a sysfs interface which relies on the fallback mechanism. This signal in turn can be recieved by the interruptible wait we constructed on firmware_class and detects it as an abort *before* userspace could get a chance to write the firmware. Upon failure -EAGAIN is returned, so userspace is also kept in the dark about exactly what happened. We can reproduce the issue with the fw_fallback.sh selftest: Before this patch: $ sudo tools/testing/selftests/firmware/fw_fallback.sh ... tools/testing/selftests/firmware/fw_fallback.sh: error - sync firmware request cancelled due to SIGCHLD After this patch: $ sudo tools/testing/selftests/firmware/fw_fallback.sh ... tools/testing/selftests/firmware/fw_fallback.sh: SIGCHLD on sync ignored as expected Fix this by making the wait killable -- only killable by SIGKILL (kill -9). We loose the ability to allow userspace to cancel a write with CTRL-C (SIGINT), however its been decided the compromise to require SIGKILL is worth the gains. Chances of this issue occuring are low due to the number of drivers upstream exclusively relying on the fallback mechanism for firmware (2 drivers), however this is observed in the field with custom drivers with sysfs triggers to load firmware. Only distributions relying on the fallback mechanism are impacted as well. An example reported issue was on Android, as follows: 1) Android init (pid=1) fork()s (say pid=42) [this child process is totally unrelated to firmware loading, it could be sleep 2; for all we care ] 2) Android init (pid=1) does a write() on a (driver custom) sysfs file which ends up calling request_firmware() kernel side 3) The firmware loading fallback mechanism is used, the request is sent to userspace and pid 1 waits in the kernel on wait_* 4) before firmware loading completes pid 42 dies (for any reason, even normal termination) 5) Kernel delivers SIGCHLD to pid=1 to tell it a child has died, which causes -ERESTARTSYS to be returned from wait_* 6) The kernel's wait aborts and return -EAGAIN for the request_firmware() caller. Cc: stable # 4.0 Fixes: 0cb64249ca500 ("firmware_loader: abort request if wait_for_completion is interrupted") Suggested-by: "Eric W. Biederman" Suggested-by: Dmitry Torokhov Tested-by: Martin Fuzzey Reported-by: Martin Fuzzey Signed-off-by: Luis R. Rodriguez Signed-off-by: Greg Kroah-Hartman --- drivers/base/firmware_class.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/base/firmware_class.c b/drivers/base/firmware_class.c index 76f1b702bdd6..bfbe1e154128 100644 --- a/drivers/base/firmware_class.c +++ b/drivers/base/firmware_class.c @@ -130,8 +130,7 @@ static int __fw_state_wait_common(struct fw_state *fw_st, long timeout) { long ret; - ret = wait_for_completion_interruptible_timeout(&fw_st->completion, - timeout); + ret = wait_for_completion_killable_timeout(&fw_st->completion, timeout); if (ret != 0 && fw_st->status == FW_STATUS_ABORTED) return -ENOENT; if (!ret) -- cgit v1.2.3 From 557909e195aea23d9e6b29cef0be3d795f4cf675 Mon Sep 17 00:00:00 2001 From: Alexander Usyskin Date: Thu, 3 Aug 2017 17:30:19 +0300 Subject: mei: exclude device from suspend direct complete optimization MEI device performs link reset during system suspend sequence. The link reset cannot be performed while device is in runtime suspend state. The resume sequence is bypassed with suspend direct complete optimization,so the optimization should be disabled for mei devices. Fixes: [ 192.940537] Restarting tasks ... [ 192.940610] PGI is not set [ 192.940619] ------------[ cut here ]------------ [ 192.940623] WARNING: CPU: 0 me.c:653 mei_me_pg_exit_sync+0x351/0x360 [ 192.940624] Modules linked in: [ 192.940627] CPU: 0 PID: 1661 Comm: kworker/0:3 Not tainted 4.13.0-rc2+ #2 [ 192.940628] Hardware name: Dell Inc. XPS 13 9343/0TM99H, BIOS A11 12/08/2016 [ 192.940630] Workqueue: pm pm_runtime_work [ 192.940642] Call Trace: [ 192.940646] ? pci_pme_active+0x1de/0x1f0 [ 192.940649] ? pci_restore_standard_config+0x50/0x50 [ 192.940651] ? kfree+0x172/0x190 [ 192.940653] ? kfree+0x172/0x190 [ 192.940655] ? pci_restore_standard_config+0x50/0x50 [ 192.940663] mei_me_pm_runtime_resume+0x3f/0xc0 [ 192.940665] pci_pm_runtime_resume+0x7a/0xa0 [ 192.940667] __rpm_callback+0xb9/0x1e0 [ 192.940668] ? preempt_count_add+0x6d/0xc0 [ 192.940670] rpm_callback+0x24/0x90 [ 192.940672] ? pci_restore_standard_config+0x50/0x50 [ 192.940674] rpm_resume+0x4e8/0x800 [ 192.940676] pm_runtime_work+0x55/0xb0 [ 192.940678] process_one_work+0x184/0x3e0 [ 192.940680] worker_thread+0x4d/0x3a0 [ 192.940681] ? preempt_count_sub+0x9b/0x100 [ 192.940683] kthread+0x122/0x140 [ 192.940684] ? process_one_work+0x3e0/0x3e0 [ 192.940685] ? __kthread_create_on_node+0x1a0/0x1a0 [ 192.940688] ret_from_fork+0x27/0x40 [ 192.940690] Code: 96 3a 9e ff 48 8b 7d 98 e8 cd 21 58 00 83 bb bc 01 00 00 04 0f 85 40 fe ff ff e9 41 fe ff ff 48 c7 c7 5f 04 99 96 e8 93 6b 9f ff <0f> ff e9 5d fd ff ff e8 33 fe 99 ff 0f 1f 00 0f 1f 44 00 00 55 [ 192.940719] ---[ end trace a86955597774ead8 ]--- [ 192.942540] done. Suggested-by: Rafael J. Wysocki Reported-by: Dominik Brodowski Cc: Rafael J. Wysocki Cc: Dominik Brodowski Signed-off-by: Alexander Usyskin Signed-off-by: Tomas Winkler Signed-off-by: Greg Kroah-Hartman --- drivers/misc/mei/pci-me.c | 6 ++++++ drivers/misc/mei/pci-txe.c | 6 ++++++ 2 files changed, 12 insertions(+) diff --git a/drivers/misc/mei/pci-me.c b/drivers/misc/mei/pci-me.c index 8621a198a2ce..bac33311f55a 100644 --- a/drivers/misc/mei/pci-me.c +++ b/drivers/misc/mei/pci-me.c @@ -215,6 +215,12 @@ static int mei_me_probe(struct pci_dev *pdev, const struct pci_device_id *ent) pci_set_drvdata(pdev, dev); + /* + * MEI requires to resume from runtime suspend mode + * in order to perform link reset flow upon system suspend. + */ + pdev->dev_flags |= PCI_DEV_FLAGS_NEEDS_RESUME; + /* * For not wake-able HW runtime pm framework * can't be used on pci device level. diff --git a/drivers/misc/mei/pci-txe.c b/drivers/misc/mei/pci-txe.c index f811cd524468..e38a5f144373 100644 --- a/drivers/misc/mei/pci-txe.c +++ b/drivers/misc/mei/pci-txe.c @@ -137,6 +137,12 @@ static int mei_txe_probe(struct pci_dev *pdev, const struct pci_device_id *ent) pci_set_drvdata(pdev, dev); + /* + * MEI requires to resume from runtime suspend mode + * in order to perform link reset flow upon system suspend. + */ + pdev->dev_flags |= PCI_DEV_FLAGS_NEEDS_RESUME; + /* * For not wake-able HW runtime pm framework * can't be used on pci device level. -- cgit v1.2.3 From 1cd65d17612e8b64989f7af20213d4bb7a7f4d91 Mon Sep 17 00:00:00 2001 From: Mika Westerberg Date: Tue, 25 Jul 2017 17:41:58 +0300 Subject: thunderbolt: Do not enumerate more ports from DROM than the controller has Some Alpine Ridge LP DROMs (there might be others) erroneusly list more ports than the controller actually has. Most probably because DROM of the full Dual/Single port Thunderbolt controller was reused for LP version. The current DROM parser does not check the upper bound thus it leads to crash when sw->ports[] is accessed over bounds: BUG: unable to handle kernel NULL pointer dereference at 00000000000002ec IP: tb_drom_read+0x383/0x890 [thunderbolt] PGD 0 P4D 0 Oops: 0000 [#1] SMP CPU: 3 PID: 12248 Comm: systemd-udevd Not tainted 4.13.0-rc1-next-20170719 #1 Hardware name: LENOVO 20HF000YGE/20HF000YGE, BIOS N1WET32W (1.11 ) 05/23/2017 task: ffff8a293e4bcd80 task.stack: ffffa698027a8000 RIP: 0010:tb_drom_read+0x383/0x890 [thunderbolt] RSP: 0018:ffffa698027ab990 EFLAGS: 00010246 RAX: 0000000000000000 RBX: ffff8a2940af7800 RCX: 0000000000000000 RDX: ffff8a2940ebb400 RSI: 0000000000000000 RDI: ffffa698027ab9a0 RBP: ffffa698027ab9d0 R08: 0000000000000001 R09: 0000000000000002 R10: ffff8a2940ebb5b0 R11: 0000000000000000 R12: ffff8a293bfa968c R13: 000000000000002c R14: 0000000000000056 R15: 0000000000000056 FS: 00007f0a945a38c0(0000) GS:ffff8a2961580000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00000000000002ec CR3: 000000043e785000 CR4: 00000000003606e0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: tb_switch_add+0x9d/0x730 [thunderbolt] ? tb_switch_alloc+0x3cd/0x4d0 [thunderbolt] icm_start+0x5a/0xa0 [thunderbolt] tb_domain_add+0xc3/0xf0 [thunderbolt] nhi_probe+0x19e/0x310 [thunderbolt] local_pci_probe+0x42/0xa0 pci_device_probe+0x18d/0x1a0 driver_probe_device+0x2ff/0x450 __driver_attach+0xa4/0xe0 ? driver_probe_device+0x450/0x450 bus_for_each_dev+0x6e/0xb0 driver_attach+0x1e/0x20 bus_add_driver+0x1d0/0x270 ? 0xffffffffc0bbb000 driver_register+0x60/0xe0 ? 0xffffffffc0bbb000 __pci_register_driver+0x4c/0x50 nhi_init+0x28/0x1000 [thunderbolt] do_one_initcall+0x50/0x190 ? __vunmap+0x81/0xb0 ? _cond_resched+0x1a/0x50 ? kmem_cache_alloc_trace+0x15f/0x1c0 ? do_init_module+0x27/0x1e9 do_init_module+0x5f/0x1e9 load_module+0x24e7/0x2a60 ? vfs_read+0x115/0x130 SYSC_finit_module+0xfc/0x120 ? SYSC_finit_module+0xfc/0x120 SyS_finit_module+0xe/0x10 do_syscall_64+0x67/0x170 entry_SYSCALL64_slow_path+0x25/0x25 Fix this by making sure we only enumerate DROM port entries the hardware actually has. Reported-by: Christian Kellner Signed-off-by: Mika Westerberg Reviewed-by: Lukas Wunner Tested-by: Christian Kellner Cc: stable Signed-off-by: Greg Kroah-Hartman --- drivers/thunderbolt/eeprom.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/drivers/thunderbolt/eeprom.c b/drivers/thunderbolt/eeprom.c index 308b6e17c88a..fe2f00ceafc5 100644 --- a/drivers/thunderbolt/eeprom.c +++ b/drivers/thunderbolt/eeprom.c @@ -333,6 +333,15 @@ static int tb_drom_parse_entry_port(struct tb_switch *sw, int res; enum tb_port_type type; + /* + * Some DROMs list more ports than the controller actually has + * so we skip those but allow the parser to continue. + */ + if (header->index > sw->config.max_port_number) { + dev_info_once(&sw->dev, "ignoring unnecessary extra entries in DROM\n"); + return 0; + } + port = &sw->ports[header->index]; port->disabled = header->port_disabled; if (port->disabled) -- cgit v1.2.3 From d507e2ebd2c7be9138e5cf5c0cb1931c90c42ab1 Mon Sep 17 00:00:00 2001 From: Johannes Weiner Date: Thu, 10 Aug 2017 15:23:31 -0700 Subject: mm: fix global NR_SLAB_.*CLAIMABLE counter reads As Tetsuo points out: "Commit 385386cff4c6 ("mm: vmstat: move slab statistics from zone to node counters") broke "Slab:" field of /proc/meminfo . It shows nearly 0kB" In addition to /proc/meminfo, this problem also affects the slab counters OOM/allocation failure info dumps, can cause early -ENOMEM from overcommit protection, and miscalculate image size requirements during suspend-to-disk. This is because the patch in question switched the slab counters from the zone level to the node level, but forgot to update the global accessor functions to read the aggregate node data instead of the aggregate zone data. Use global_node_page_state() to access the global slab counters. Fixes: 385386cff4c6 ("mm: vmstat: move slab statistics from zone to node counters") Link: http://lkml.kernel.org/r/20170801134256.5400-1-hannes@cmpxchg.org Signed-off-by: Johannes Weiner Reported-by: Tetsuo Handa Acked-by: Michal Hocko Cc: Josef Bacik Cc: Vladimir Davydov Cc: Stefan Agner Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/proc/meminfo.c | 8 ++++---- kernel/power/snapshot.c | 2 +- mm/page_alloc.c | 9 +++++---- mm/util.c | 2 +- 4 files changed, 11 insertions(+), 10 deletions(-) diff --git a/fs/proc/meminfo.c b/fs/proc/meminfo.c index 8a428498d6b2..509a61668d90 100644 --- a/fs/proc/meminfo.c +++ b/fs/proc/meminfo.c @@ -106,13 +106,13 @@ static int meminfo_proc_show(struct seq_file *m, void *v) global_node_page_state(NR_FILE_MAPPED)); show_val_kb(m, "Shmem: ", i.sharedram); show_val_kb(m, "Slab: ", - global_page_state(NR_SLAB_RECLAIMABLE) + - global_page_state(NR_SLAB_UNRECLAIMABLE)); + global_node_page_state(NR_SLAB_RECLAIMABLE) + + global_node_page_state(NR_SLAB_UNRECLAIMABLE)); show_val_kb(m, "SReclaimable: ", - global_page_state(NR_SLAB_RECLAIMABLE)); + global_node_page_state(NR_SLAB_RECLAIMABLE)); show_val_kb(m, "SUnreclaim: ", - global_page_state(NR_SLAB_UNRECLAIMABLE)); + global_node_page_state(NR_SLAB_UNRECLAIMABLE)); seq_printf(m, "KernelStack: %8lu kB\n", global_page_state(NR_KERNEL_STACK_KB)); show_val_kb(m, "PageTables: ", diff --git a/kernel/power/snapshot.c b/kernel/power/snapshot.c index 222317721c5a..0972a8e09d08 100644 --- a/kernel/power/snapshot.c +++ b/kernel/power/snapshot.c @@ -1650,7 +1650,7 @@ static unsigned long minimum_image_size(unsigned long saveable) { unsigned long size; - size = global_page_state(NR_SLAB_RECLAIMABLE) + size = global_node_page_state(NR_SLAB_RECLAIMABLE) + global_node_page_state(NR_ACTIVE_ANON) + global_node_page_state(NR_INACTIVE_ANON) + global_node_page_state(NR_ACTIVE_FILE) diff --git a/mm/page_alloc.c b/mm/page_alloc.c index fc32aa81f359..626a430e32d1 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -4458,8 +4458,9 @@ long si_mem_available(void) * Part of the reclaimable slab consists of items that are in use, * and cannot be freed. Cap this estimate at the low watermark. */ - available += global_page_state(NR_SLAB_RECLAIMABLE) - - min(global_page_state(NR_SLAB_RECLAIMABLE) / 2, wmark_low); + available += global_node_page_state(NR_SLAB_RECLAIMABLE) - + min(global_node_page_state(NR_SLAB_RECLAIMABLE) / 2, + wmark_low); if (available < 0) available = 0; @@ -4602,8 +4603,8 @@ void show_free_areas(unsigned int filter, nodemask_t *nodemask) global_node_page_state(NR_FILE_DIRTY), global_node_page_state(NR_WRITEBACK), global_node_page_state(NR_UNSTABLE_NFS), - global_page_state(NR_SLAB_RECLAIMABLE), - global_page_state(NR_SLAB_UNRECLAIMABLE), + global_node_page_state(NR_SLAB_RECLAIMABLE), + global_node_page_state(NR_SLAB_UNRECLAIMABLE), global_node_page_state(NR_FILE_MAPPED), global_node_page_state(NR_SHMEM), global_page_state(NR_PAGETABLE), diff --git a/mm/util.c b/mm/util.c index 7b07ec852e01..9ecddf568fe3 100644 --- a/mm/util.c +++ b/mm/util.c @@ -633,7 +633,7 @@ int __vm_enough_memory(struct mm_struct *mm, long pages, int cap_sys_admin) * which are reclaimable, under pressure. The dentry * cache and most inode caches should fall into this */ - free += global_page_state(NR_SLAB_RECLAIMABLE); + free += global_node_page_state(NR_SLAB_RECLAIMABLE); /* * Leave reserved pages. The pages are not for anonymous pages. -- cgit v1.2.3 From 75dddef32514f7aa58930bde6a1263253bc3d4ba Mon Sep 17 00:00:00 2001 From: Jonathan Toppins Date: Thu, 10 Aug 2017 15:23:35 -0700 Subject: mm: ratelimit PFNs busy info message The RDMA subsystem can generate several thousand of these messages per second eventually leading to a kernel crash. Ratelimit these messages to prevent this crash. Doug said: "I've been carrying a version of this for several kernel versions. I don't remember when they started, but we have one (and only one) class of machines: Dell PE R730xd, that generate these errors. When it happens, without a rate limit, we get rcu timeouts and kernel oopses. With the rate limit, we just get a lot of annoying kernel messages but the machine continues on, recovers, and eventually the memory operations all succeed" And: "> Well... why are all these EBUSY's occurring? It sounds inefficient > (at least) but if it is expected, normal and unavoidable then > perhaps we should just remove that message altogether? I don't have an answer to that question. To be honest, I haven't looked real hard. We never had this at all, then it started out of the blue, but only on our Dell 730xd machines (and it hits all of them), but no other classes or brands of machines. And we have our 730xd machines loaded up with different brands and models of cards (for instance one dedicated to mlx4 hardware, one for qib, one for mlx5, an ocrdma/cxgb4 combo, etc), so the fact that it hit all of the machines meant it wasn't tied to any particular brand/model of RDMA hardware. To me, it always smelled of a hardware oddity specific to maybe the CPUs or mainboard chipsets in these machines, so given that I'm not an mm expert anyway, I never chased it down. A few other relevant details: it showed up somewhere around 4.8/4.9 or thereabouts. It never happened before, but the prinkt has been there since the 3.18 days, so possibly the test to trigger this message was changed, or something else in the allocator changed such that the situation started happening on these machines? And, like I said, it is specific to our 730xd machines (but they are all identical, so that could mean it's something like their specific ram configuration is causing the allocator to hit this on these machine but not on other machines in the cluster, I don't want to say it's necessarily the model of chipset or CPU, there are other bits of identicalness between these machines)" Link: http://lkml.kernel.org/r/499c0f6cc10d6eb829a67f2a4d75b4228a9b356e.1501695897.git.jtoppins@redhat.com Signed-off-by: Jonathan Toppins Reviewed-by: Doug Ledford Tested-by: Doug Ledford Cc: Michal Hocko Cc: Vlastimil Babka Cc: Mel Gorman Cc: Hillf Danton Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/page_alloc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 626a430e32d1..6d00f746c2fd 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -7669,7 +7669,7 @@ int alloc_contig_range(unsigned long start, unsigned long end, /* Make sure the range is really isolated. */ if (test_pages_isolated(outer_start, end, false)) { - pr_info("%s: [%lx, %lx) PFNs busy\n", + pr_info_ratelimited("%s: [%lx, %lx) PFNs busy\n", __func__, outer_start, end); ret = -EBUSY; goto done; -- cgit v1.2.3 From 5af10dfd0afc559bb4b0f7e3e8227a1578333995 Mon Sep 17 00:00:00 2001 From: Andrea Arcangeli Date: Thu, 10 Aug 2017 15:23:38 -0700 Subject: userfaultfd: hugetlbfs: remove superfluous page unlock in VM_SHARED case huge_add_to_page_cache->add_to_page_cache implicitly unlocks the page before returning in case of errors. The error returned was -EEXIST by running UFFDIO_COPY on a non-hole offset of a VM_SHARED hugetlbfs mapping. It was an userland bug that triggered it and the kernel must cope with it returning -EEXIST from ioctl(UFFDIO_COPY) as expected. page dumped because: VM_BUG_ON_PAGE(!PageLocked(page)) kernel BUG at mm/filemap.c:964! invalid opcode: 0000 [#1] SMP CPU: 1 PID: 22582 Comm: qemu-system-x86 Not tainted 4.11.11-300.fc26.x86_64 #1 RIP: unlock_page+0x4a/0x50 Call Trace: hugetlb_mcopy_atomic_pte+0xc0/0x320 mcopy_atomic+0x96f/0xbe0 userfaultfd_ioctl+0x218/0xe90 do_vfs_ioctl+0xa5/0x600 SyS_ioctl+0x79/0x90 entry_SYSCALL_64_fastpath+0x1a/0xa9 Link: http://lkml.kernel.org/r/20170802165145.22628-2-aarcange@redhat.com Signed-off-by: Andrea Arcangeli Tested-by: Maxime Coquelin Reviewed-by: Mike Kravetz Cc: "Dr. David Alan Gilbert" Cc: Mike Rapoport Cc: Alexey Perevalov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/hugetlb.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/hugetlb.c b/mm/hugetlb.c index a1a0ac0ad6f6..31e207cb399b 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -4062,9 +4062,9 @@ out: return ret; out_release_unlock: spin_unlock(ptl); -out_release_nounlock: if (vm_shared) unlock_page(page); +out_release_nounlock: put_page(page); goto out; } -- cgit v1.2.3 From a4afe8cdec1646c3d258b02d1cfdfb1313b76eb1 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Thu, 10 Aug 2017 15:23:40 -0700 Subject: test_kmod: fix spelling mistake: "EMTPY" -> "EMPTY" Trivial fix to spelling mistake in snprintf text [mcgrof@kernel.org: massaged commit message] Link: http://lkml.kernel.org/r/20170802211450.27928-3-mcgrof@kernel.org Fixes: 39258f448d71 ("kmod: add test driver to stress test the module loader") Signed-off-by: Colin Ian King Signed-off-by: Luis R. Rodriguez Cc: Dmitry Torokhov Cc: Kees Cook Cc: Jessica Yu Cc: Rusty Russell Cc: Michal Marek Cc: Petr Mladek Cc: Miroslav Benes Cc: Josh Poimboeuf Cc: Eric W. Biederman Cc: Shuah Khan Cc: Dan Carpenter Cc: David Binderman Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/test_kmod.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/test_kmod.c b/lib/test_kmod.c index 6c1d678bcf8b..90c91541fc16 100644 --- a/lib/test_kmod.c +++ b/lib/test_kmod.c @@ -485,7 +485,7 @@ static ssize_t config_show(struct device *dev, config->test_driver); else len += snprintf(buf+len, PAGE_SIZE - len, - "driver:\tEMTPY\n"); + "driver:\tEMPTY\n"); if (config->test_fs) len += snprintf(buf+len, PAGE_SIZE - len, @@ -493,7 +493,7 @@ static ssize_t config_show(struct device *dev, config->test_fs); else len += snprintf(buf+len, PAGE_SIZE - len, - "fs:\tEMTPY\n"); + "fs:\tEMPTY\n"); mutex_unlock(&test_dev->config_mutex); -- cgit v1.2.3 From 434b06ae23bab4f4111a674f9b64409c87fa8df9 Mon Sep 17 00:00:00 2001 From: "Luis R. Rodriguez" Date: Thu, 10 Aug 2017 15:23:44 -0700 Subject: test_kmod: fix bug which allows negative values on two config options Parsing with kstrtol() enables values to be negative, and we failed to check for negative values when parsing with test_dev_config_update_uint_sync() or test_dev_config_update_uint_range(). test_dev_config_update_uint_range() has a minimum check though so an issue is not present there. test_dev_config_update_uint_sync() is only used for the number of threads to use (config_num_threads_store()), and indeed this would fail with an attempt for a large allocation. Although the issue is only present in practice with the first fix both by using kstrtoul() instead of kstrtol(). Link: http://lkml.kernel.org/r/20170802211450.27928-4-mcgrof@kernel.org Fixes: 39258f448d71 ("kmod: add test driver to stress test the module loader") Signed-off-by: Luis R. Rodriguez Reported-by: Dan Carpenter Cc: Colin Ian King Cc: David Binderman Cc: Dmitry Torokhov Cc: Eric W. Biederman Cc: Jessica Yu Cc: Josh Poimboeuf Cc: Kees Cook Cc: Michal Marek Cc: Miroslav Benes Cc: Petr Mladek Cc: Rusty Russell Cc: Shuah Khan Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/test_kmod.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/lib/test_kmod.c b/lib/test_kmod.c index 90c91541fc16..8fc0a7a19c83 100644 --- a/lib/test_kmod.c +++ b/lib/test_kmod.c @@ -880,10 +880,10 @@ static int test_dev_config_update_uint_sync(struct kmod_test_device *test_dev, int (*test_sync)(struct kmod_test_device *test_dev)) { int ret; - long new; + unsigned long new; unsigned int old_val; - ret = kstrtol(buf, 10, &new); + ret = kstrtoul(buf, 10, &new); if (ret) return ret; @@ -918,9 +918,9 @@ static int test_dev_config_update_uint_range(struct kmod_test_device *test_dev, unsigned int max) { int ret; - long new; + unsigned long new; - ret = kstrtol(buf, 10, &new); + ret = kstrtoul(buf, 10, &new); if (ret) return ret; -- cgit v1.2.3 From 9c56771316ef50992ada284b4c01b03842b2660d Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Thu, 10 Aug 2017 15:23:47 -0700 Subject: test_kmod: fix the lock in register_test_dev_kmod() We accidentally just drop the lock twice instead of taking it and then releasing it. This isn't a big issue unless you are adding more than one device to test on, and the kmod.sh doesn't do that yet, however this obviously is the correct thing to do. [mcgrof@kernel.org: massaged subject, explain what happens] Link: http://lkml.kernel.org/r/20170802211450.27928-5-mcgrof@kernel.org Fixes: 39258f448d71 ("kmod: add test driver to stress test the module loader") Signed-off-by: Dan Carpenter Signed-off-by: Luis R. Rodriguez Cc: Colin Ian King Cc: David Binderman Cc: Dmitry Torokhov Cc: Eric W. Biederman Cc: Jessica Yu Cc: Josh Poimboeuf Cc: Kees Cook Cc: Michal Marek Cc: Miroslav Benes Cc: Petr Mladek Cc: Rusty Russell Cc: Shuah Khan Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/test_kmod.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/test_kmod.c b/lib/test_kmod.c index 8fc0a7a19c83..1bc06bbfc97a 100644 --- a/lib/test_kmod.c +++ b/lib/test_kmod.c @@ -1146,7 +1146,7 @@ static struct kmod_test_device *register_test_dev_kmod(void) struct kmod_test_device *test_dev = NULL; int ret; - mutex_unlock(®_dev_mutex); + mutex_lock(®_dev_mutex); /* int should suffice for number of devices, test for wrap */ if (unlikely(num_test_devs + 1) < 0) { -- cgit v1.2.3 From 4e98ebe5f435fbe5bca91c24bc5d5a2b33025e08 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Thu, 10 Aug 2017 15:23:50 -0700 Subject: test_kmod: fix small memory leak on filesystem tests The break was in the wrong place so file system tests don't work as intended, leaking memory at each test switch. [mcgrof@kernel.org: massaged commit subject, noted memory leak issue without the fix] Link: http://lkml.kernel.org/r/20170802211450.27928-6-mcgrof@kernel.org Fixes: 39258f448d71 ("kmod: add test driver to stress test the module loader") Signed-off-by: Dan Carpenter Signed-off-by: Luis R. Rodriguez Reported-by: David Binderman Cc: Colin Ian King Cc: Dmitry Torokhov Cc: Eric W. Biederman Cc: Jessica Yu Cc: Josh Poimboeuf Cc: Kees Cook Cc: Michal Marek Cc: Miroslav Benes Cc: Petr Mladek Cc: Rusty Russell Cc: Shuah Khan Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/test_kmod.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/test_kmod.c b/lib/test_kmod.c index 1bc06bbfc97a..ff9148969b92 100644 --- a/lib/test_kmod.c +++ b/lib/test_kmod.c @@ -746,11 +746,11 @@ static int trigger_config_run_type(struct kmod_test_device *test_dev, strlen(test_str)); break; case TEST_KMOD_FS_TYPE: - break; kfree_const(config->test_fs); config->test_driver = NULL; copied = config_copy_test_fs(config, test_str, strlen(test_str)); + break; default: mutex_unlock(&test_dev->config_mutex); return -EINVAL; -- cgit v1.2.3 From 9eeb52ae712e72141c4c1d173048a606ba8f42f6 Mon Sep 17 00:00:00 2001 From: Akinobu Mita Date: Thu, 10 Aug 2017 15:23:53 -0700 Subject: fault-inject: fix wrong should_fail() decision in task context Commit 1203c8e6fb0a ("fault-inject: simplify access check for fail-nth") unintentionally broke a conditional statement in should_fail(). Any faults are not injected in the task context by the change when the systematic fault injection is not used. This change restores to the previous correct behaviour. Link: http://lkml.kernel.org/r/1501633700-3488-1-git-send-email-akinobu.mita@gmail.com Fixes: 1203c8e6fb0a ("fault-inject: simplify access check for fail-nth") Signed-off-by: Akinobu Mita Reported-by: Lu Fengqi Tested-by: Lu Fengqi Cc: Dmitry Vyukov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/fault-inject.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/lib/fault-inject.c b/lib/fault-inject.c index 7d315fdb9f13..cf7b129b0b2b 100644 --- a/lib/fault-inject.c +++ b/lib/fault-inject.c @@ -110,10 +110,12 @@ bool should_fail(struct fault_attr *attr, ssize_t size) if (in_task()) { unsigned int fail_nth = READ_ONCE(current->fail_nth); - if (fail_nth && !WRITE_ONCE(current->fail_nth, fail_nth - 1)) - goto fail; + if (fail_nth) { + if (!WRITE_ONCE(current->fail_nth, fail_nth - 1)) + goto fail; - return false; + return false; + } } /* No need to check any other properties if the probability is 0 */ -- cgit v1.2.3 From 16af97dc5a8975371a83d9e30a64038b48f40a2d Mon Sep 17 00:00:00 2001 From: Nadav Amit Date: Thu, 10 Aug 2017 15:23:56 -0700 Subject: mm: migrate: prevent racy access to tlb_flush_pending Patch series "fixes of TLB batching races", v6. It turns out that Linux TLB batching mechanism suffers from various races. Races that are caused due to batching during reclamation were recently handled by Mel and this patch-set deals with others. The more fundamental issue is that concurrent updates of the page-tables allow for TLB flushes to be batched on one core, while another core changes the page-tables. This other core may assume a PTE change does not require a flush based on the updated PTE value, while it is unaware that TLB flushes are still pending. This behavior affects KSM (which may result in memory corruption) and MADV_FREE and MADV_DONTNEED (which may result in incorrect behavior). A proof-of-concept can easily produce the wrong behavior of MADV_DONTNEED. Memory corruption in KSM is harder to produce in practice, but was observed by hacking the kernel and adding a delay before flushing and replacing the KSM page. Finally, there is also one memory barrier missing, which may affect architectures with weak memory model. This patch (of 7): Setting and clearing mm->tlb_flush_pending can be performed by multiple threads, since mmap_sem may only be acquired for read in task_numa_work(). If this happens, tlb_flush_pending might be cleared while one of the threads still changes PTEs and batches TLB flushes. This can lead to the same race between migration and change_protection_range() that led to the introduction of tlb_flush_pending. The result of this race was data corruption, which means that this patch also addresses a theoretically possible data corruption. An actual data corruption was not observed, yet the race was was confirmed by adding assertion to check tlb_flush_pending is not set by two threads, adding artificial latency in change_protection_range() and using sysctl to reduce kernel.numa_balancing_scan_delay_ms. Link: http://lkml.kernel.org/r/20170802000818.4760-2-namit@vmware.com Fixes: 20841405940e ("mm: fix TLB flush race between migration, and change_protection_range") Signed-off-by: Nadav Amit Acked-by: Mel Gorman Acked-by: Rik van Riel Acked-by: Minchan Kim Cc: Andy Lutomirski Cc: Hugh Dickins Cc: "David S. Miller" Cc: Andrea Arcangeli Cc: Heiko Carstens Cc: Ingo Molnar Cc: Jeff Dike Cc: Martin Schwidefsky Cc: Mel Gorman Cc: Russell King Cc: Sergey Senozhatsky Cc: Tony Luck Cc: Yoshinori Sato Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mm_types.h | 31 ++++++++++++++++++++++--------- kernel/fork.c | 2 +- mm/debug.c | 2 +- mm/mprotect.c | 4 ++-- 4 files changed, 26 insertions(+), 13 deletions(-) diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index 7f384bb62d8e..f58f76ee1dfa 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -493,7 +493,7 @@ struct mm_struct { * can move process memory needs to flush the TLB when moving a * PROT_NONE or PROT_NUMA mapped page. */ - bool tlb_flush_pending; + atomic_t tlb_flush_pending; #endif #ifdef CONFIG_ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH /* See flush_tlb_batched_pending() */ @@ -532,33 +532,46 @@ static inline cpumask_t *mm_cpumask(struct mm_struct *mm) static inline bool mm_tlb_flush_pending(struct mm_struct *mm) { barrier(); - return mm->tlb_flush_pending; + return atomic_read(&mm->tlb_flush_pending) > 0; } -static inline void set_tlb_flush_pending(struct mm_struct *mm) + +static inline void init_tlb_flush_pending(struct mm_struct *mm) { - mm->tlb_flush_pending = true; + atomic_set(&mm->tlb_flush_pending, 0); +} + +static inline void inc_tlb_flush_pending(struct mm_struct *mm) +{ + atomic_inc(&mm->tlb_flush_pending); /* - * Guarantee that the tlb_flush_pending store does not leak into the + * Guarantee that the tlb_flush_pending increase does not leak into the * critical section updating the page tables */ smp_mb__before_spinlock(); } + /* Clearing is done after a TLB flush, which also provides a barrier. */ -static inline void clear_tlb_flush_pending(struct mm_struct *mm) +static inline void dec_tlb_flush_pending(struct mm_struct *mm) { barrier(); - mm->tlb_flush_pending = false; + atomic_dec(&mm->tlb_flush_pending); } #else static inline bool mm_tlb_flush_pending(struct mm_struct *mm) { return false; } -static inline void set_tlb_flush_pending(struct mm_struct *mm) + +static inline void init_tlb_flush_pending(struct mm_struct *mm) { } -static inline void clear_tlb_flush_pending(struct mm_struct *mm) + +static inline void inc_tlb_flush_pending(struct mm_struct *mm) +{ +} + +static inline void dec_tlb_flush_pending(struct mm_struct *mm) { } #endif diff --git a/kernel/fork.c b/kernel/fork.c index 17921b0390b4..e075b7780421 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -807,7 +807,7 @@ static struct mm_struct *mm_init(struct mm_struct *mm, struct task_struct *p, mm_init_aio(mm); mm_init_owner(mm, p); mmu_notifier_mm_init(mm); - clear_tlb_flush_pending(mm); + init_tlb_flush_pending(mm); #if defined(CONFIG_TRANSPARENT_HUGEPAGE) && !USE_SPLIT_PMD_PTLOCKS mm->pmd_huge_pte = NULL; #endif diff --git a/mm/debug.c b/mm/debug.c index db1cd26d8752..d70103bb4731 100644 --- a/mm/debug.c +++ b/mm/debug.c @@ -159,7 +159,7 @@ void dump_mm(const struct mm_struct *mm) mm->numa_next_scan, mm->numa_scan_offset, mm->numa_scan_seq, #endif #if defined(CONFIG_NUMA_BALANCING) || defined(CONFIG_COMPACTION) - mm->tlb_flush_pending, + atomic_read(&mm->tlb_flush_pending), #endif mm->def_flags, &mm->def_flags ); diff --git a/mm/mprotect.c b/mm/mprotect.c index 4180ad8cc9c5..bd0f409922cb 100644 --- a/mm/mprotect.c +++ b/mm/mprotect.c @@ -244,7 +244,7 @@ static unsigned long change_protection_range(struct vm_area_struct *vma, BUG_ON(addr >= end); pgd = pgd_offset(mm, addr); flush_cache_range(vma, addr, end); - set_tlb_flush_pending(mm); + inc_tlb_flush_pending(mm); do { next = pgd_addr_end(addr, end); if (pgd_none_or_clear_bad(pgd)) @@ -256,7 +256,7 @@ static unsigned long change_protection_range(struct vm_area_struct *vma, /* Only flush the TLB if we actually modified any entries: */ if (pages) flush_tlb_range(vma, start, end); - clear_tlb_flush_pending(mm); + dec_tlb_flush_pending(mm); return pages; } -- cgit v1.2.3 From 0a2c40487f3e4215c6ab46e7f837036badfb542b Mon Sep 17 00:00:00 2001 From: Nadav Amit Date: Thu, 10 Aug 2017 15:23:59 -0700 Subject: mm: migrate: fix barriers around tlb_flush_pending Reading tlb_flush_pending while the page-table lock is taken does not require a barrier, since the lock/unlock already acts as a barrier. Removing the barrier in mm_tlb_flush_pending() to address this issue. However, migrate_misplaced_transhuge_page() calls mm_tlb_flush_pending() while the page-table lock is already released, which may present a problem on architectures with weak memory model (PPC). To deal with this case, a new parameter is added to mm_tlb_flush_pending() to indicate if it is read without the page-table lock taken, and calling smp_mb__after_unlock_lock() in this case. Link: http://lkml.kernel.org/r/20170802000818.4760-3-namit@vmware.com Signed-off-by: Nadav Amit Acked-by: Rik van Riel Cc: Minchan Kim Cc: Sergey Senozhatsky Cc: Andy Lutomirski Cc: Mel Gorman Cc: "David S. Miller" Cc: Andrea Arcangeli Cc: Heiko Carstens Cc: Hugh Dickins Cc: Ingo Molnar Cc: Jeff Dike Cc: Martin Schwidefsky Cc: Mel Gorman Cc: Nadav Amit Cc: Russell King Cc: Tony Luck Cc: Yoshinori Sato Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mm_types.h | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index f58f76ee1dfa..0e478ebd2706 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -526,12 +526,12 @@ static inline cpumask_t *mm_cpumask(struct mm_struct *mm) /* * Memory barriers to keep this state in sync are graciously provided by * the page table locks, outside of which no page table modifications happen. - * The barriers below prevent the compiler from re-ordering the instructions - * around the memory barriers that are already present in the code. + * The barriers are used to ensure the order between tlb_flush_pending updates, + * which happen while the lock is not taken, and the PTE updates, which happen + * while the lock is taken, are serialized. */ static inline bool mm_tlb_flush_pending(struct mm_struct *mm) { - barrier(); return atomic_read(&mm->tlb_flush_pending) > 0; } @@ -554,7 +554,13 @@ static inline void inc_tlb_flush_pending(struct mm_struct *mm) /* Clearing is done after a TLB flush, which also provides a barrier. */ static inline void dec_tlb_flush_pending(struct mm_struct *mm) { - barrier(); + /* + * Guarantee that the tlb_flush_pending does not not leak into the + * critical section, since we must order the PTE change and changes to + * the pending TLB flush indication. We could have relied on TLB flush + * as a memory barrier, but this behavior is not clearly documented. + */ + smp_mb__before_atomic(); atomic_dec(&mm->tlb_flush_pending); } #else -- cgit v1.2.3 From a9b802500ebbff1544519a2969323b719dac21f0 Mon Sep 17 00:00:00 2001 From: Nadav Amit Date: Thu, 10 Aug 2017 15:24:02 -0700 Subject: Revert "mm: numa: defer TLB flush for THP migration as long as possible" While deferring TLB flushes is a good practice, the reverted patch caused pending TLB flushes to be checked while the page-table lock is not taken. As a result, in architectures with weak memory model (PPC), Linux may miss a memory-barrier, miss the fact TLB flushes are pending, and cause (in theory) a memory corruption. Since the alternative of using smp_mb__after_unlock_lock() was considered a bit open-coded, and the performance impact is expected to be small, the previous patch is reverted. This reverts b0943d61b8fa ("mm: numa: defer TLB flush for THP migration as long as possible"). Link: http://lkml.kernel.org/r/20170802000818.4760-4-namit@vmware.com Signed-off-by: Nadav Amit Suggested-by: Mel Gorman Acked-by: Mel Gorman Acked-by: Rik van Riel Cc: Minchan Kim Cc: Sergey Senozhatsky Cc: Andy Lutomirski Cc: "David S. Miller" Cc: Andrea Arcangeli Cc: Heiko Carstens Cc: Hugh Dickins Cc: Ingo Molnar Cc: Jeff Dike Cc: Martin Schwidefsky Cc: Mel Gorman Cc: Nadav Amit Cc: Russell King Cc: Tony Luck Cc: Yoshinori Sato Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/huge_memory.c | 7 +++++++ mm/migrate.c | 6 ------ 2 files changed, 7 insertions(+), 6 deletions(-) diff --git a/mm/huge_memory.c b/mm/huge_memory.c index 86975dec0ba1..216114f6ef0b 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -1495,6 +1495,13 @@ int do_huge_pmd_numa_page(struct vm_fault *vmf, pmd_t pmd) goto clear_pmdnuma; } + /* + * The page_table_lock above provides a memory barrier + * with change_protection_range. + */ + if (mm_tlb_flush_pending(vma->vm_mm)) + flush_tlb_range(vma, haddr, haddr + HPAGE_PMD_SIZE); + /* * Migrate the THP to the requested node, returns with page unlocked * and access rights restored. diff --git a/mm/migrate.c b/mm/migrate.c index 627671551873..d68a41da6abb 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -1937,12 +1937,6 @@ int migrate_misplaced_transhuge_page(struct mm_struct *mm, put_page(new_page); goto out_fail; } - /* - * We are not sure a pending tlb flush here is for a huge page - * mapping or not. Hence use the tlb range variant - */ - if (mm_tlb_flush_pending(mm)) - flush_tlb_range(vma, mmun_start, mmun_end); /* Prepare a page as a migration target */ __SetPageLocked(new_page); -- cgit v1.2.3 From 56236a59556cfd3bae7bffb7e5f438b5ef0af880 Mon Sep 17 00:00:00 2001 From: Minchan Kim Date: Thu, 10 Aug 2017 15:24:05 -0700 Subject: mm: refactor TLB gathering API This patch is a preparatory patch for solving race problems caused by TLB batch. For that, we will increase/decrease TLB flush pending count of mm_struct whenever tlb_[gather|finish]_mmu is called. Before making it simple, this patch separates architecture specific part and rename it to arch_tlb_[gather|finish]_mmu and generic part just calls it. It shouldn't change any behavior. Link: http://lkml.kernel.org/r/20170802000818.4760-5-namit@vmware.com Signed-off-by: Minchan Kim Signed-off-by: Nadav Amit Acked-by: Mel Gorman Cc: Ingo Molnar Cc: Russell King Cc: Tony Luck Cc: Martin Schwidefsky Cc: "David S. Miller" Cc: Heiko Carstens Cc: Yoshinori Sato Cc: Jeff Dike Cc: Andrea Arcangeli Cc: Andy Lutomirski Cc: Hugh Dickins Cc: Mel Gorman Cc: Nadav Amit Cc: Rik van Riel Cc: Sergey Senozhatsky Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/arm/include/asm/tlb.h | 6 ++++-- arch/ia64/include/asm/tlb.h | 6 ++++-- arch/s390/include/asm/tlb.h | 12 ++++++------ arch/sh/include/asm/tlb.h | 6 ++++-- arch/um/include/asm/tlb.h | 8 +++++--- include/asm-generic/tlb.h | 7 ++++--- include/linux/mm_types.h | 6 ++++++ mm/memory.c | 28 +++++++++++++++++++++------- 8 files changed, 54 insertions(+), 25 deletions(-) diff --git a/arch/arm/include/asm/tlb.h b/arch/arm/include/asm/tlb.h index 3f2eb76243e3..7f5b2a2d3861 100644 --- a/arch/arm/include/asm/tlb.h +++ b/arch/arm/include/asm/tlb.h @@ -148,7 +148,8 @@ static inline void tlb_flush_mmu(struct mmu_gather *tlb) } static inline void -tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm, unsigned long start, unsigned long end) +arch_tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm, + unsigned long start, unsigned long end) { tlb->mm = mm; tlb->fullmm = !(start | (end+1)); @@ -166,7 +167,8 @@ tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm, unsigned long start } static inline void -tlb_finish_mmu(struct mmu_gather *tlb, unsigned long start, unsigned long end) +arch_tlb_finish_mmu(struct mmu_gather *tlb, + unsigned long start, unsigned long end) { tlb_flush_mmu(tlb); diff --git a/arch/ia64/include/asm/tlb.h b/arch/ia64/include/asm/tlb.h index fced197b9626..93cadc04ac62 100644 --- a/arch/ia64/include/asm/tlb.h +++ b/arch/ia64/include/asm/tlb.h @@ -168,7 +168,8 @@ static inline void __tlb_alloc_page(struct mmu_gather *tlb) static inline void -tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm, unsigned long start, unsigned long end) +arch_tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm, + unsigned long start, unsigned long end) { tlb->mm = mm; tlb->max = ARRAY_SIZE(tlb->local); @@ -185,7 +186,8 @@ tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm, unsigned long start * collected. */ static inline void -tlb_finish_mmu(struct mmu_gather *tlb, unsigned long start, unsigned long end) +arch_tlb_finish_mmu(struct mmu_gather *tlb, + unsigned long start, unsigned long end) { /* * Note: tlb->nr may be 0 at this point, so we can't rely on tlb->start_addr and diff --git a/arch/s390/include/asm/tlb.h b/arch/s390/include/asm/tlb.h index 7317b3108a88..d574d0820dc8 100644 --- a/arch/s390/include/asm/tlb.h +++ b/arch/s390/include/asm/tlb.h @@ -47,10 +47,9 @@ struct mmu_table_batch { extern void tlb_table_flush(struct mmu_gather *tlb); extern void tlb_remove_table(struct mmu_gather *tlb, void *table); -static inline void tlb_gather_mmu(struct mmu_gather *tlb, - struct mm_struct *mm, - unsigned long start, - unsigned long end) +static inline void +arch_tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm, + unsigned long start, unsigned long end) { tlb->mm = mm; tlb->start = start; @@ -76,8 +75,9 @@ static inline void tlb_flush_mmu(struct mmu_gather *tlb) tlb_flush_mmu_free(tlb); } -static inline void tlb_finish_mmu(struct mmu_gather *tlb, - unsigned long start, unsigned long end) +static inline void +arch_tlb_finish_mmu(struct mmu_gather *tlb, + unsigned long start, unsigned long end) { tlb_flush_mmu(tlb); } diff --git a/arch/sh/include/asm/tlb.h b/arch/sh/include/asm/tlb.h index 46e0d635e36f..89786560dbd4 100644 --- a/arch/sh/include/asm/tlb.h +++ b/arch/sh/include/asm/tlb.h @@ -36,7 +36,8 @@ static inline void init_tlb_gather(struct mmu_gather *tlb) } static inline void -tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm, unsigned long start, unsigned long end) +arch_tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm, + unsigned long start, unsigned long end) { tlb->mm = mm; tlb->start = start; @@ -47,7 +48,8 @@ tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm, unsigned long start } static inline void -tlb_finish_mmu(struct mmu_gather *tlb, unsigned long start, unsigned long end) +arch_tlb_finish_mmu(struct mmu_gather *tlb, + unsigned long start, unsigned long end) { if (tlb->fullmm) flush_tlb_mm(tlb->mm); diff --git a/arch/um/include/asm/tlb.h b/arch/um/include/asm/tlb.h index 600a2e9bfee2..2a901eca7145 100644 --- a/arch/um/include/asm/tlb.h +++ b/arch/um/include/asm/tlb.h @@ -45,7 +45,8 @@ static inline void init_tlb_gather(struct mmu_gather *tlb) } static inline void -tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm, unsigned long start, unsigned long end) +arch_tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm, + unsigned long start, unsigned long end) { tlb->mm = mm; tlb->start = start; @@ -80,12 +81,13 @@ tlb_flush_mmu(struct mmu_gather *tlb) tlb_flush_mmu_free(tlb); } -/* tlb_finish_mmu +/* arch_tlb_finish_mmu * Called at the end of the shootdown operation to free up any resources * that were required. */ static inline void -tlb_finish_mmu(struct mmu_gather *tlb, unsigned long start, unsigned long end) +arch_tlb_finish_mmu(struct mmu_gather *tlb, + unsigned long start, unsigned long end) { tlb_flush_mmu(tlb); diff --git a/include/asm-generic/tlb.h b/include/asm-generic/tlb.h index 8afa4335e5b2..8f71521e7a44 100644 --- a/include/asm-generic/tlb.h +++ b/include/asm-generic/tlb.h @@ -112,10 +112,11 @@ struct mmu_gather { #define HAVE_GENERIC_MMU_GATHER -void tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm, unsigned long start, unsigned long end); +void arch_tlb_gather_mmu(struct mmu_gather *tlb, + struct mm_struct *mm, unsigned long start, unsigned long end); void tlb_flush_mmu(struct mmu_gather *tlb); -void tlb_finish_mmu(struct mmu_gather *tlb, unsigned long start, - unsigned long end); +void arch_tlb_finish_mmu(struct mmu_gather *tlb, + unsigned long start, unsigned long end); extern bool __tlb_remove_page_size(struct mmu_gather *tlb, struct page *page, int page_size); diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index 0e478ebd2706..c605f2a3a68e 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -522,6 +522,12 @@ static inline cpumask_t *mm_cpumask(struct mm_struct *mm) return mm->cpu_vm_mask_var; } +struct mmu_gather; +extern void tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm, + unsigned long start, unsigned long end); +extern void tlb_finish_mmu(struct mmu_gather *tlb, + unsigned long start, unsigned long end); + #if defined(CONFIG_NUMA_BALANCING) || defined(CONFIG_COMPACTION) /* * Memory barriers to keep this state in sync are graciously provided by diff --git a/mm/memory.c b/mm/memory.c index f65beaad319b..34cba5113e06 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -215,12 +215,8 @@ static bool tlb_next_batch(struct mmu_gather *tlb) return true; } -/* tlb_gather_mmu - * Called to initialize an (on-stack) mmu_gather structure for page-table - * tear-down from @mm. The @fullmm argument is used when @mm is without - * users and we're going to destroy the full address space (exit/execve). - */ -void tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm, unsigned long start, unsigned long end) +void arch_tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm, + unsigned long start, unsigned long end) { tlb->mm = mm; @@ -275,7 +271,8 @@ void tlb_flush_mmu(struct mmu_gather *tlb) * Called at the end of the shootdown operation to free up any resources * that were required. */ -void tlb_finish_mmu(struct mmu_gather *tlb, unsigned long start, unsigned long end) +void arch_tlb_finish_mmu(struct mmu_gather *tlb, + unsigned long start, unsigned long end) { struct mmu_gather_batch *batch, *next; @@ -398,6 +395,23 @@ void tlb_remove_table(struct mmu_gather *tlb, void *table) #endif /* CONFIG_HAVE_RCU_TABLE_FREE */ +/* tlb_gather_mmu + * Called to initialize an (on-stack) mmu_gather structure for page-table + * tear-down from @mm. The @fullmm argument is used when @mm is without + * users and we're going to destroy the full address space (exit/execve). + */ +void tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm, + unsigned long start, unsigned long end) +{ + arch_tlb_gather_mmu(tlb, mm, start, end); +} + +void tlb_finish_mmu(struct mmu_gather *tlb, + unsigned long start, unsigned long end) +{ + arch_tlb_finish_mmu(tlb, start, end); +} + /* * Note: this doesn't free the actual pages themselves. That * has been handled earlier when unmapping all the memory regions. -- cgit v1.2.3 From 0a2dd266dd6b7a31503b5bbe63af05961a6b446d Mon Sep 17 00:00:00 2001 From: Minchan Kim Date: Thu, 10 Aug 2017 15:24:09 -0700 Subject: mm: make tlb_flush_pending global Currently, tlb_flush_pending is used only for CONFIG_[NUMA_BALANCING| COMPACTION] but upcoming patches to solve subtle TLB flush batching problem will use it regardless of compaction/NUMA so this patch doesn't remove the dependency. [akpm@linux-foundation.org: remove more ifdefs from world's ugliest printk statement] Link: http://lkml.kernel.org/r/20170802000818.4760-6-namit@vmware.com Signed-off-by: Minchan Kim Signed-off-by: Nadav Amit Acked-by: Mel Gorman Cc: "David S. Miller" Cc: Andrea Arcangeli Cc: Andy Lutomirski Cc: Heiko Carstens Cc: Hugh Dickins Cc: Ingo Molnar Cc: Jeff Dike Cc: Martin Schwidefsky Cc: Mel Gorman Cc: Nadav Amit Cc: Rik van Riel Cc: Russell King Cc: Sergey Senozhatsky Cc: Tony Luck Cc: Yoshinori Sato Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mm_types.h | 21 --------------------- mm/debug.c | 4 ---- 2 files changed, 25 deletions(-) diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index c605f2a3a68e..892a7b0196fd 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -487,14 +487,12 @@ struct mm_struct { /* numa_scan_seq prevents two threads setting pte_numa */ int numa_scan_seq; #endif -#if defined(CONFIG_NUMA_BALANCING) || defined(CONFIG_COMPACTION) /* * An operation with batched TLB flushing is going on. Anything that * can move process memory needs to flush the TLB when moving a * PROT_NONE or PROT_NUMA mapped page. */ atomic_t tlb_flush_pending; -#endif #ifdef CONFIG_ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH /* See flush_tlb_batched_pending() */ bool tlb_flush_batched; @@ -528,7 +526,6 @@ extern void tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm, extern void tlb_finish_mmu(struct mmu_gather *tlb, unsigned long start, unsigned long end); -#if defined(CONFIG_NUMA_BALANCING) || defined(CONFIG_COMPACTION) /* * Memory barriers to keep this state in sync are graciously provided by * the page table locks, outside of which no page table modifications happen. @@ -569,24 +566,6 @@ static inline void dec_tlb_flush_pending(struct mm_struct *mm) smp_mb__before_atomic(); atomic_dec(&mm->tlb_flush_pending); } -#else -static inline bool mm_tlb_flush_pending(struct mm_struct *mm) -{ - return false; -} - -static inline void init_tlb_flush_pending(struct mm_struct *mm) -{ -} - -static inline void inc_tlb_flush_pending(struct mm_struct *mm) -{ -} - -static inline void dec_tlb_flush_pending(struct mm_struct *mm) -{ -} -#endif struct vm_fault; diff --git a/mm/debug.c b/mm/debug.c index d70103bb4731..5715448ab0b5 100644 --- a/mm/debug.c +++ b/mm/debug.c @@ -124,9 +124,7 @@ void dump_mm(const struct mm_struct *mm) #ifdef CONFIG_NUMA_BALANCING "numa_next_scan %lu numa_scan_offset %lu numa_scan_seq %d\n" #endif -#if defined(CONFIG_NUMA_BALANCING) || defined(CONFIG_COMPACTION) "tlb_flush_pending %d\n" -#endif "def_flags: %#lx(%pGv)\n", mm, mm->mmap, mm->vmacache_seqnum, mm->task_size, @@ -158,9 +156,7 @@ void dump_mm(const struct mm_struct *mm) #ifdef CONFIG_NUMA_BALANCING mm->numa_next_scan, mm->numa_scan_offset, mm->numa_scan_seq, #endif -#if defined(CONFIG_NUMA_BALANCING) || defined(CONFIG_COMPACTION) atomic_read(&mm->tlb_flush_pending), -#endif mm->def_flags, &mm->def_flags ); } -- cgit v1.2.3 From 99baac21e4585f4258f919502c6e23f1e5edc98c Mon Sep 17 00:00:00 2001 From: Minchan Kim Date: Thu, 10 Aug 2017 15:24:12 -0700 Subject: mm: fix MADV_[FREE|DONTNEED] TLB flush miss problem Nadav reported parallel MADV_DONTNEED on same range has a stale TLB problem and Mel fixed it[1] and found same problem on MADV_FREE[2]. Quote from Mel Gorman: "The race in question is CPU 0 running madv_free and updating some PTEs while CPU 1 is also running madv_free and looking at the same PTEs. CPU 1 may have writable TLB entries for a page but fail the pte_dirty check (because CPU 0 has updated it already) and potentially fail to flush. Hence, when madv_free on CPU 1 returns, there are still potentially writable TLB entries and the underlying PTE is still present so that a subsequent write does not necessarily propagate the dirty bit to the underlying PTE any more. Reclaim at some unknown time at the future may then see that the PTE is still clean and discard the page even though a write has happened in the meantime. I think this is possible but I could have missed some protection in madv_free that prevents it happening." This patch aims for solving both problems all at once and is ready for other problem with KSM, MADV_FREE and soft-dirty story[3]. TLB batch API(tlb_[gather|finish]_mmu] uses [inc|dec]_tlb_flush_pending and mmu_tlb_flush_pending so that when tlb_finish_mmu is called, we can catch there are parallel threads going on. In that case, forcefully, flush TLB to prevent for user to access memory via stale TLB entry although it fail to gather page table entry. I confirmed this patch works with [4] test program Nadav gave so this patch supersedes "mm: Always flush VMA ranges affected by zap_page_range v2" in current mmotm. NOTE: This patch modifies arch-specific TLB gathering interface(x86, ia64, s390, sh, um). It seems most of architecture are straightforward but s390 need to be careful because tlb_flush_mmu works only if mm->context.flush_mm is set to non-zero which happens only a pte entry really is cleared by ptep_get_and_clear and friends. However, this problem never changes the pte entries but need to flush to prevent memory access from stale tlb. [1] http://lkml.kernel.org/r/20170725101230.5v7gvnjmcnkzzql3@techsingularity.net [2] http://lkml.kernel.org/r/20170725100722.2dxnmgypmwnrfawp@suse.de [3] http://lkml.kernel.org/r/BD3A0EBE-ECF4-41D4-87FA-C755EA9AB6BD@gmail.com [4] https://patchwork.kernel.org/patch/9861621/ [minchan@kernel.org: decrease tlb flush pending count in tlb_finish_mmu] Link: http://lkml.kernel.org/r/20170808080821.GA31730@bbox Link: http://lkml.kernel.org/r/20170802000818.4760-7-namit@vmware.com Signed-off-by: Minchan Kim Signed-off-by: Nadav Amit Reported-by: Nadav Amit Reported-by: Mel Gorman Acked-by: Mel Gorman Cc: Ingo Molnar Cc: Russell King Cc: Tony Luck Cc: Martin Schwidefsky Cc: "David S. Miller" Cc: Heiko Carstens Cc: Yoshinori Sato Cc: Jeff Dike Cc: Andrea Arcangeli Cc: Andy Lutomirski Cc: Hugh Dickins Cc: Mel Gorman Cc: Nadav Amit Cc: Rik van Riel Cc: Sergey Senozhatsky Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/arm/include/asm/tlb.h | 7 ++++++- arch/ia64/include/asm/tlb.h | 4 +++- arch/s390/include/asm/tlb.h | 7 ++++++- arch/sh/include/asm/tlb.h | 4 ++-- arch/um/include/asm/tlb.h | 7 ++++++- include/asm-generic/tlb.h | 2 +- include/linux/mm_types.h | 8 ++++++++ mm/memory.c | 18 ++++++++++++++++-- 8 files changed, 48 insertions(+), 9 deletions(-) diff --git a/arch/arm/include/asm/tlb.h b/arch/arm/include/asm/tlb.h index 7f5b2a2d3861..d5562f9ce600 100644 --- a/arch/arm/include/asm/tlb.h +++ b/arch/arm/include/asm/tlb.h @@ -168,8 +168,13 @@ arch_tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm, static inline void arch_tlb_finish_mmu(struct mmu_gather *tlb, - unsigned long start, unsigned long end) + unsigned long start, unsigned long end, bool force) { + if (force) { + tlb->range_start = start; + tlb->range_end = end; + } + tlb_flush_mmu(tlb); /* keep the page table cache within bounds */ diff --git a/arch/ia64/include/asm/tlb.h b/arch/ia64/include/asm/tlb.h index 93cadc04ac62..cbe5ac3699bf 100644 --- a/arch/ia64/include/asm/tlb.h +++ b/arch/ia64/include/asm/tlb.h @@ -187,8 +187,10 @@ arch_tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm, */ static inline void arch_tlb_finish_mmu(struct mmu_gather *tlb, - unsigned long start, unsigned long end) + unsigned long start, unsigned long end, bool force) { + if (force) + tlb->need_flush = 1; /* * Note: tlb->nr may be 0 at this point, so we can't rely on tlb->start_addr and * tlb->end_addr. diff --git a/arch/s390/include/asm/tlb.h b/arch/s390/include/asm/tlb.h index d574d0820dc8..2eb8ff0d6fca 100644 --- a/arch/s390/include/asm/tlb.h +++ b/arch/s390/include/asm/tlb.h @@ -77,8 +77,13 @@ static inline void tlb_flush_mmu(struct mmu_gather *tlb) static inline void arch_tlb_finish_mmu(struct mmu_gather *tlb, - unsigned long start, unsigned long end) + unsigned long start, unsigned long end, bool force) { + if (force) { + tlb->start = start; + tlb->end = end; + } + tlb_flush_mmu(tlb); } diff --git a/arch/sh/include/asm/tlb.h b/arch/sh/include/asm/tlb.h index 89786560dbd4..51a8bc967e75 100644 --- a/arch/sh/include/asm/tlb.h +++ b/arch/sh/include/asm/tlb.h @@ -49,9 +49,9 @@ arch_tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm, static inline void arch_tlb_finish_mmu(struct mmu_gather *tlb, - unsigned long start, unsigned long end) + unsigned long start, unsigned long end, bool force) { - if (tlb->fullmm) + if (tlb->fullmm || force) flush_tlb_mm(tlb->mm); /* keep the page table cache within bounds */ diff --git a/arch/um/include/asm/tlb.h b/arch/um/include/asm/tlb.h index 2a901eca7145..344d95619d03 100644 --- a/arch/um/include/asm/tlb.h +++ b/arch/um/include/asm/tlb.h @@ -87,8 +87,13 @@ tlb_flush_mmu(struct mmu_gather *tlb) */ static inline void arch_tlb_finish_mmu(struct mmu_gather *tlb, - unsigned long start, unsigned long end) + unsigned long start, unsigned long end, bool force) { + if (force) { + tlb->start = start; + tlb->end = end; + tlb->need_flush = 1; + } tlb_flush_mmu(tlb); /* keep the page table cache within bounds */ diff --git a/include/asm-generic/tlb.h b/include/asm-generic/tlb.h index 8f71521e7a44..faddde44de8c 100644 --- a/include/asm-generic/tlb.h +++ b/include/asm-generic/tlb.h @@ -116,7 +116,7 @@ void arch_tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm, unsigned long start, unsigned long end); void tlb_flush_mmu(struct mmu_gather *tlb); void arch_tlb_finish_mmu(struct mmu_gather *tlb, - unsigned long start, unsigned long end); + unsigned long start, unsigned long end, bool force); extern bool __tlb_remove_page_size(struct mmu_gather *tlb, struct page *page, int page_size); diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index 892a7b0196fd..3cadee0a3508 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -538,6 +538,14 @@ static inline bool mm_tlb_flush_pending(struct mm_struct *mm) return atomic_read(&mm->tlb_flush_pending) > 0; } +/* + * Returns true if there are two above TLB batching threads in parallel. + */ +static inline bool mm_tlb_flush_nested(struct mm_struct *mm) +{ + return atomic_read(&mm->tlb_flush_pending) > 1; +} + static inline void init_tlb_flush_pending(struct mm_struct *mm) { atomic_set(&mm->tlb_flush_pending, 0); diff --git a/mm/memory.c b/mm/memory.c index 34cba5113e06..e158f7ac6730 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -272,10 +272,13 @@ void tlb_flush_mmu(struct mmu_gather *tlb) * that were required. */ void arch_tlb_finish_mmu(struct mmu_gather *tlb, - unsigned long start, unsigned long end) + unsigned long start, unsigned long end, bool force) { struct mmu_gather_batch *batch, *next; + if (force) + __tlb_adjust_range(tlb, start, end - start); + tlb_flush_mmu(tlb); /* keep the page table cache within bounds */ @@ -404,12 +407,23 @@ void tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm, unsigned long start, unsigned long end) { arch_tlb_gather_mmu(tlb, mm, start, end); + inc_tlb_flush_pending(tlb->mm); } void tlb_finish_mmu(struct mmu_gather *tlb, unsigned long start, unsigned long end) { - arch_tlb_finish_mmu(tlb, start, end); + /* + * If there are parallel threads are doing PTE changes on same range + * under non-exclusive lock(e.g., mmap_sem read-side) but defer TLB + * flush by batching, a thread has stable TLB entry can fail to flush + * the TLB by observing pte_none|!pte_dirty, for example so flush TLB + * forcefully if we detect parallel PTE batching threads. + */ + bool force = mm_tlb_flush_nested(tlb->mm); + + arch_tlb_finish_mmu(tlb, start, end, force); + dec_tlb_flush_pending(tlb->mm); } /* -- cgit v1.2.3 From b3a81d0841a954a3d3e782059960f53e63b37066 Mon Sep 17 00:00:00 2001 From: Minchan Kim Date: Thu, 10 Aug 2017 15:24:15 -0700 Subject: mm: fix KSM data corruption Nadav reported KSM can corrupt the user data by the TLB batching race[1]. That means data user written can be lost. Quote from Nadav Amit: "For this race we need 4 CPUs: CPU0: Caches a writable and dirty PTE entry, and uses the stale value for write later. CPU1: Runs madvise_free on the range that includes the PTE. It would clear the dirty-bit. It batches TLB flushes. CPU2: Writes 4 to /proc/PID/clear_refs , clearing the PTEs soft-dirty. We care about the fact that it clears the PTE write-bit, and of course, batches TLB flushes. CPU3: Runs KSM. Our purpose is to pass the following test in write_protect_page(): if (pte_write(*pvmw.pte) || pte_dirty(*pvmw.pte) || (pte_protnone(*pvmw.pte) && pte_savedwrite(*pvmw.pte))) Since it will avoid TLB flush. And we want to do it while the PTE is stale. Later, and before replacing the page, we would be able to change the page. Note that all the operations the CPU1-3 perform canhappen in parallel since they only acquire mmap_sem for read. We start with two identical pages. Everything below regards the same page/PTE. CPU0 CPU1 CPU2 CPU3 ---- ---- ---- ---- Write the same value on page [cache PTE as dirty in TLB] MADV_FREE pte_mkclean() 4 > clear_refs pte_wrprotect() write_protect_page() [ success, no flush ] pages_indentical() [ ok ] Write to page different value [Ok, using stale PTE] replace_page() Later, CPU1, CPU2 and CPU3 would flush the TLB, but that is too late. CPU0 already wrote on the page, but KSM ignored this write, and it got lost" In above scenario, MADV_FREE is fixed by changing TLB batching API including [set|clear]_tlb_flush_pending. Remained thing is soft-dirty part. This patch changes soft-dirty uses TLB batching API instead of flush_tlb_mm and KSM checks pending TLB flush by using mm_tlb_flush_pending so that it will flush TLB to avoid data lost if there are other parallel threads pending TLB flush. [1] http://lkml.kernel.org/r/BD3A0EBE-ECF4-41D4-87FA-C755EA9AB6BD@gmail.com Link: http://lkml.kernel.org/r/20170802000818.4760-8-namit@vmware.com Signed-off-by: Minchan Kim Signed-off-by: Nadav Amit Reported-by: Nadav Amit Tested-by: Nadav Amit Reviewed-by: Andrea Arcangeli Cc: Mel Gorman Cc: Hugh Dickins Cc: "David S. Miller" Cc: Andy Lutomirski Cc: Heiko Carstens Cc: Ingo Molnar Cc: Jeff Dike Cc: Martin Schwidefsky Cc: Mel Gorman Cc: Nadav Amit Cc: Rik van Riel Cc: Russell King Cc: Sergey Senozhatsky Cc: Tony Luck Cc: Yoshinori Sato Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/proc/task_mmu.c | 7 +++++-- mm/ksm.c | 3 ++- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c index b836fd61ed87..fe8f3265e877 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c @@ -16,9 +16,10 @@ #include #include #include +#include #include -#include +#include #include #include "internal.h" @@ -1008,6 +1009,7 @@ static ssize_t clear_refs_write(struct file *file, const char __user *buf, struct mm_struct *mm; struct vm_area_struct *vma; enum clear_refs_types type; + struct mmu_gather tlb; int itype; int rv; @@ -1054,6 +1056,7 @@ static ssize_t clear_refs_write(struct file *file, const char __user *buf, } down_read(&mm->mmap_sem); + tlb_gather_mmu(&tlb, mm, 0, -1); if (type == CLEAR_REFS_SOFT_DIRTY) { for (vma = mm->mmap; vma; vma = vma->vm_next) { if (!(vma->vm_flags & VM_SOFTDIRTY)) @@ -1075,7 +1078,7 @@ static ssize_t clear_refs_write(struct file *file, const char __user *buf, walk_page_range(0, mm->highest_vm_end, &clear_refs_walk); if (type == CLEAR_REFS_SOFT_DIRTY) mmu_notifier_invalidate_range_end(mm, 0, -1); - flush_tlb_mm(mm); + tlb_finish_mmu(&tlb, 0, -1); up_read(&mm->mmap_sem); out_mm: mmput(mm); diff --git a/mm/ksm.c b/mm/ksm.c index 4dc92f138786..db20f8436bc3 100644 --- a/mm/ksm.c +++ b/mm/ksm.c @@ -1038,7 +1038,8 @@ static int write_protect_page(struct vm_area_struct *vma, struct page *page, goto out_unlock; if (pte_write(*pvmw.pte) || pte_dirty(*pvmw.pte) || - (pte_protnone(*pvmw.pte) && pte_savedwrite(*pvmw.pte))) { + (pte_protnone(*pvmw.pte) && pte_savedwrite(*pvmw.pte)) || + mm_tlb_flush_pending(mm)) { pte_t entry; swapped = PageSwapCache(page); -- cgit v1.2.3 From c0a6a5ae6b5d976592a83bdafc1c2f49b0718c65 Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Thu, 10 Aug 2017 15:24:18 -0700 Subject: MAINTAINERS: copy virtio on balloon_compaction.c Changes to mm/balloon_compaction.c can easily break virtio, and virtio is the only user of that interface. Add a line to MAINTAINERS so whoever changes that file remembers to copy us. Link: http://lkml.kernel.org/r/1501764010-24456-1-git-send-email-mst@redhat.com Signed-off-by: Michael S. Tsirkin Acked-by: Rafael Aquini Acked-by: Wei Wang Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- MAINTAINERS | 1 + 1 file changed, 1 insertion(+) diff --git a/MAINTAINERS b/MAINTAINERS index 84d6a8277cbd..6f7721d1634c 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -14004,6 +14004,7 @@ F: drivers/block/virtio_blk.c F: include/linux/virtio*.h F: include/uapi/linux/virtio_*.h F: drivers/crypto/virtio/ +F: mm/balloon_compaction.c VIRTIO CRYPTO DRIVER M: Gonglei -- cgit v1.2.3 From af54aed94bf3a1cf0b847bbbf00f9a58e278b338 Mon Sep 17 00:00:00 2001 From: Wei Wang Date: Thu, 10 Aug 2017 15:24:21 -0700 Subject: mm/balloon_compaction.c: don't zero ballooned pages Revert commit bb01b64cfab7 ("mm/balloon_compaction.c: enqueue zero page to balloon device")' Zeroing ballon pages is rather time consuming, especially when a lot of pages are in flight. E.g. 7GB worth of ballooned memory takes 2.8s with __GFP_ZERO while it takes ~491ms without it. The original commit argued that zeroing will help ksmd to merge these pages on the host but this argument is assuming that the host actually marks balloon pages for ksm which is not universally true. So we pay performance penalty for something that even might not be used in the end which is wrong. The host can zero out pages on its own when there is a need. [mhocko@kernel.org: new changelog text] Link: http://lkml.kernel.org/r/1501761557-9758-1-git-send-email-wei.w.wang@intel.com Fixes: bb01b64cfab7 ("mm/balloon_compaction.c: enqueue zero page to balloon device") Signed-off-by: Wei Wang Acked-by: Michael S. Tsirkin Acked-by: Michal Hocko Cc: zhenwei.pi Cc: David Hildenbrand Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/balloon_compaction.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/balloon_compaction.c b/mm/balloon_compaction.c index 9075aa54e955..b06d9fe23a28 100644 --- a/mm/balloon_compaction.c +++ b/mm/balloon_compaction.c @@ -24,7 +24,7 @@ struct page *balloon_page_enqueue(struct balloon_dev_info *b_dev_info) { unsigned long flags; struct page *page = alloc_page(balloon_mapping_gfp_mask() | - __GFP_NOMEMALLOC | __GFP_NORETRY | __GFP_ZERO); + __GFP_NOMEMALLOC | __GFP_NORETRY); if (!page) return NULL; -- cgit v1.2.3 From d041353dc98a6339182cd6f628b4c8f111278cb3 Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Thu, 10 Aug 2017 15:24:24 -0700 Subject: mm: fix list corruptions on shmem shrinklist We saw many list corruption warnings on shmem shrinklist: WARNING: CPU: 18 PID: 177 at lib/list_debug.c:59 __list_del_entry+0x9e/0xc0 list_del corruption. prev->next should be ffff9ae5694b82d8, but was ffff9ae5699ba960 Modules linked in: intel_rapl sb_edac edac_core x86_pkg_temp_thermal coretemp iTCO_wdt iTCO_vendor_support crct10dif_pclmul crc32_pclmul ghash_clmulni_intel raid0 dcdbas shpchp wmi hed i2c_i801 ioatdma lpc_ich i2c_smbus acpi_cpufreq tcp_diag inet_diag sch_fq_codel ipmi_si ipmi_devintf ipmi_msghandler igb ptp crc32c_intel pps_core i2c_algo_bit i2c_core dca ipv6 crc_ccitt CPU: 18 PID: 177 Comm: kswapd1 Not tainted 4.9.34-t3.el7.twitter.x86_64 #1 Hardware name: Dell Inc. PowerEdge C6220/0W6W6G, BIOS 2.2.3 11/07/2013 Call Trace: dump_stack+0x4d/0x66 __warn+0xcb/0xf0 warn_slowpath_fmt+0x4f/0x60 __list_del_entry+0x9e/0xc0 shmem_unused_huge_shrink+0xfa/0x2e0 shmem_unused_huge_scan+0x20/0x30 super_cache_scan+0x193/0x1a0 shrink_slab.part.41+0x1e3/0x3f0 shrink_slab+0x29/0x30 shrink_node+0xf9/0x2f0 kswapd+0x2d8/0x6c0 kthread+0xd7/0xf0 ret_from_fork+0x22/0x30 WARNING: CPU: 23 PID: 639 at lib/list_debug.c:33 __list_add+0x89/0xb0 list_add corruption. prev->next should be next (ffff9ae5699ba960), but was ffff9ae5694b82d8. (prev=ffff9ae5694b82d8). Modules linked in: intel_rapl sb_edac edac_core x86_pkg_temp_thermal coretemp iTCO_wdt iTCO_vendor_support crct10dif_pclmul crc32_pclmul ghash_clmulni_intel raid0 dcdbas shpchp wmi hed i2c_i801 ioatdma lpc_ich i2c_smbus acpi_cpufreq tcp_diag inet_diag sch_fq_codel ipmi_si ipmi_devintf ipmi_msghandler igb ptp crc32c_intel pps_core i2c_algo_bit i2c_core dca ipv6 crc_ccitt CPU: 23 PID: 639 Comm: systemd-udevd Tainted: G W 4.9.34-t3.el7.twitter.x86_64 #1 Hardware name: Dell Inc. PowerEdge C6220/0W6W6G, BIOS 2.2.3 11/07/2013 Call Trace: dump_stack+0x4d/0x66 __warn+0xcb/0xf0 warn_slowpath_fmt+0x4f/0x60 __list_add+0x89/0xb0 shmem_setattr+0x204/0x230 notify_change+0x2ef/0x440 do_truncate+0x5d/0x90 path_openat+0x331/0x1190 do_filp_open+0x7e/0xe0 do_sys_open+0x123/0x200 SyS_open+0x1e/0x20 do_syscall_64+0x61/0x170 entry_SYSCALL64_slow_path+0x25/0x25 The problem is that shmem_unused_huge_shrink() moves entries from the global sbinfo->shrinklist to its local lists and then releases the spinlock. However, a parallel shmem_setattr() could access one of these entries directly and add it back to the global shrinklist if it is removed, with the spinlock held. The logic itself looks solid since an entry could be either in a local list or the global list, otherwise it is removed from one of them by list_del_init(). So probably the race condition is that, one CPU is in the middle of INIT_LIST_HEAD() but the other CPU calls list_empty() which returns true too early then the following list_add_tail() sees a corrupted entry. list_empty_careful() is designed to fix this situation. [akpm@linux-foundation.org: add comments] Link: http://lkml.kernel.org/r/20170803054630.18775-1-xiyou.wangcong@gmail.com Fixes: 779750d20b93 ("shmem: split huge pages beyond i_size under memory pressure") Signed-off-by: Cong Wang Acked-by: Linus Torvalds Acked-by: Kirill A. Shutemov Cc: Hugh Dickins Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/shmem.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/mm/shmem.c b/mm/shmem.c index b0aa6075d164..6540e5982444 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -1022,7 +1022,11 @@ static int shmem_setattr(struct dentry *dentry, struct iattr *attr) */ if (IS_ENABLED(CONFIG_TRANSPARENT_HUGE_PAGECACHE)) { spin_lock(&sbinfo->shrinklist_lock); - if (list_empty(&info->shrinklist)) { + /* + * _careful to defend against unlocked access to + * ->shrink_list in shmem_unused_huge_shrink() + */ + if (list_empty_careful(&info->shrinklist)) { list_add_tail(&info->shrinklist, &sbinfo->shrinklist); sbinfo->shrinklist_len++; @@ -1817,7 +1821,11 @@ alloc_nohuge: page = shmem_alloc_and_acct_page(gfp, info, sbinfo, * to shrink under memory pressure. */ spin_lock(&sbinfo->shrinklist_lock); - if (list_empty(&info->shrinklist)) { + /* + * _careful to defend against unlocked access to + * ->shrink_list in shmem_unused_huge_shrink() + */ + if (list_empty_careful(&info->shrinklist)) { list_add_tail(&info->shrinklist, &sbinfo->shrinklist); sbinfo->shrinklist_len++; -- cgit v1.2.3 From aac2fea94f7a3df8ad1eeb477eb2643f81fd5393 Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Thu, 10 Aug 2017 15:24:27 -0700 Subject: rmap: do not call mmu_notifier_invalidate_page() under ptl MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit MMU notifiers can sleep, but in page_mkclean_one() we call mmu_notifier_invalidate_page() under page table lock. Let's instead use mmu_notifier_invalidate_range() outside page_vma_mapped_walk() loop. [jglisse@redhat.com: try_to_unmap_one() do not call mmu_notifier under ptl] Link: http://lkml.kernel.org/r/20170809204333.27485-1-jglisse@redhat.com Link: http://lkml.kernel.org/r/20170804134928.l4klfcnqatni7vsc@black.fi.intel.com Fixes: c7ab0d2fdc84 ("mm: convert try_to_unmap_one() to use page_vma_mapped_walk()") Signed-off-by: Kirill A. Shutemov Signed-off-by: Jérôme Glisse Reported-by: axie Cc: Alex Deucher Cc: "Writer, Tim" Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/rmap.c | 52 ++++++++++++++++++++++++++++++---------------------- 1 file changed, 30 insertions(+), 22 deletions(-) diff --git a/mm/rmap.c b/mm/rmap.c index c8993c63eb25..c1286d47aa1f 100644 --- a/mm/rmap.c +++ b/mm/rmap.c @@ -888,10 +888,10 @@ static bool page_mkclean_one(struct page *page, struct vm_area_struct *vma, .flags = PVMW_SYNC, }; int *cleaned = arg; + bool invalidation_needed = false; while (page_vma_mapped_walk(&pvmw)) { int ret = 0; - address = pvmw.address; if (pvmw.pte) { pte_t entry; pte_t *pte = pvmw.pte; @@ -899,11 +899,11 @@ static bool page_mkclean_one(struct page *page, struct vm_area_struct *vma, if (!pte_dirty(*pte) && !pte_write(*pte)) continue; - flush_cache_page(vma, address, pte_pfn(*pte)); - entry = ptep_clear_flush(vma, address, pte); + flush_cache_page(vma, pvmw.address, pte_pfn(*pte)); + entry = ptep_clear_flush(vma, pvmw.address, pte); entry = pte_wrprotect(entry); entry = pte_mkclean(entry); - set_pte_at(vma->vm_mm, address, pte, entry); + set_pte_at(vma->vm_mm, pvmw.address, pte, entry); ret = 1; } else { #ifdef CONFIG_TRANSPARENT_HUGE_PAGECACHE @@ -913,11 +913,11 @@ static bool page_mkclean_one(struct page *page, struct vm_area_struct *vma, if (!pmd_dirty(*pmd) && !pmd_write(*pmd)) continue; - flush_cache_page(vma, address, page_to_pfn(page)); - entry = pmdp_huge_clear_flush(vma, address, pmd); + flush_cache_page(vma, pvmw.address, page_to_pfn(page)); + entry = pmdp_huge_clear_flush(vma, pvmw.address, pmd); entry = pmd_wrprotect(entry); entry = pmd_mkclean(entry); - set_pmd_at(vma->vm_mm, address, pmd, entry); + set_pmd_at(vma->vm_mm, pvmw.address, pmd, entry); ret = 1; #else /* unexpected pmd-mapped page? */ @@ -926,11 +926,16 @@ static bool page_mkclean_one(struct page *page, struct vm_area_struct *vma, } if (ret) { - mmu_notifier_invalidate_page(vma->vm_mm, address); (*cleaned)++; + invalidation_needed = true; } } + if (invalidation_needed) { + mmu_notifier_invalidate_range(vma->vm_mm, address, + address + (1UL << compound_order(page))); + } + return true; } @@ -1323,7 +1328,7 @@ static bool try_to_unmap_one(struct page *page, struct vm_area_struct *vma, }; pte_t pteval; struct page *subpage; - bool ret = true; + bool ret = true, invalidation_needed = false; enum ttu_flags flags = (enum ttu_flags)arg; /* munlock has nothing to gain from examining un-locked vmas */ @@ -1363,11 +1368,9 @@ static bool try_to_unmap_one(struct page *page, struct vm_area_struct *vma, VM_BUG_ON_PAGE(!pvmw.pte, page); subpage = page - page_to_pfn(page) + pte_pfn(*pvmw.pte); - address = pvmw.address; - if (!(flags & TTU_IGNORE_ACCESS)) { - if (ptep_clear_flush_young_notify(vma, address, + if (ptep_clear_flush_young_notify(vma, pvmw.address, pvmw.pte)) { ret = false; page_vma_mapped_walk_done(&pvmw); @@ -1376,7 +1379,7 @@ static bool try_to_unmap_one(struct page *page, struct vm_area_struct *vma, } /* Nuke the page table entry. */ - flush_cache_page(vma, address, pte_pfn(*pvmw.pte)); + flush_cache_page(vma, pvmw.address, pte_pfn(*pvmw.pte)); if (should_defer_flush(mm, flags)) { /* * We clear the PTE but do not flush so potentially @@ -1386,11 +1389,12 @@ static bool try_to_unmap_one(struct page *page, struct vm_area_struct *vma, * transition on a cached TLB entry is written through * and traps if the PTE is unmapped. */ - pteval = ptep_get_and_clear(mm, address, pvmw.pte); + pteval = ptep_get_and_clear(mm, pvmw.address, + pvmw.pte); set_tlb_ubc_flush_pending(mm, pte_dirty(pteval)); } else { - pteval = ptep_clear_flush(vma, address, pvmw.pte); + pteval = ptep_clear_flush(vma, pvmw.address, pvmw.pte); } /* Move the dirty bit to the page. Now the pte is gone. */ @@ -1405,12 +1409,12 @@ static bool try_to_unmap_one(struct page *page, struct vm_area_struct *vma, if (PageHuge(page)) { int nr = 1 << compound_order(page); hugetlb_count_sub(nr, mm); - set_huge_swap_pte_at(mm, address, + set_huge_swap_pte_at(mm, pvmw.address, pvmw.pte, pteval, vma_mmu_pagesize(vma)); } else { dec_mm_counter(mm, mm_counter(page)); - set_pte_at(mm, address, pvmw.pte, pteval); + set_pte_at(mm, pvmw.address, pvmw.pte, pteval); } } else if (pte_unused(pteval)) { @@ -1434,7 +1438,7 @@ static bool try_to_unmap_one(struct page *page, struct vm_area_struct *vma, swp_pte = swp_entry_to_pte(entry); if (pte_soft_dirty(pteval)) swp_pte = pte_swp_mksoft_dirty(swp_pte); - set_pte_at(mm, address, pvmw.pte, swp_pte); + set_pte_at(mm, pvmw.address, pvmw.pte, swp_pte); } else if (PageAnon(page)) { swp_entry_t entry = { .val = page_private(subpage) }; pte_t swp_pte; @@ -1460,7 +1464,7 @@ static bool try_to_unmap_one(struct page *page, struct vm_area_struct *vma, * If the page was redirtied, it cannot be * discarded. Remap the page to page table. */ - set_pte_at(mm, address, pvmw.pte, pteval); + set_pte_at(mm, pvmw.address, pvmw.pte, pteval); SetPageSwapBacked(page); ret = false; page_vma_mapped_walk_done(&pvmw); @@ -1468,7 +1472,7 @@ static bool try_to_unmap_one(struct page *page, struct vm_area_struct *vma, } if (swap_duplicate(entry) < 0) { - set_pte_at(mm, address, pvmw.pte, pteval); + set_pte_at(mm, pvmw.address, pvmw.pte, pteval); ret = false; page_vma_mapped_walk_done(&pvmw); break; @@ -1484,14 +1488,18 @@ static bool try_to_unmap_one(struct page *page, struct vm_area_struct *vma, swp_pte = swp_entry_to_pte(entry); if (pte_soft_dirty(pteval)) swp_pte = pte_swp_mksoft_dirty(swp_pte); - set_pte_at(mm, address, pvmw.pte, swp_pte); + set_pte_at(mm, pvmw.address, pvmw.pte, swp_pte); } else dec_mm_counter(mm, mm_counter_file(page)); discard: page_remove_rmap(subpage, PageHuge(page)); put_page(page); - mmu_notifier_invalidate_page(mm, address); + invalidation_needed = true; } + + if (invalidation_needed) + mmu_notifier_invalidate_range(mm, address, + address + (1UL << compound_order(page))); return ret; } -- cgit v1.2.3 From f357e345eef7863da037e0243f2d3df4ba6df986 Mon Sep 17 00:00:00 2001 From: Matthias Kaehlcke Date: Thu, 10 Aug 2017 15:24:29 -0700 Subject: zram: rework copy of compressor name in comp_algorithm_store() comp_algorithm_store() passes the size of the source buffer to strlcpy() instead of the destination buffer size. Make it explicit that the two buffers have the same size and use strcpy() instead of strlcpy(). The latter can be done safely since the function ensures that the string in the source buffer is terminated. Link: http://lkml.kernel.org/r/20170803163350.45245-1-mka@chromium.org Signed-off-by: Matthias Kaehlcke Reviewed-by: Douglas Anderson Reviewed-by: Sergey Senozhatsky Acked-by: Minchan Kim Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/block/zram/zram_drv.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c index 856d5dc02451..3b1b6340ba13 100644 --- a/drivers/block/zram/zram_drv.c +++ b/drivers/block/zram/zram_drv.c @@ -308,7 +308,7 @@ static ssize_t comp_algorithm_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t len) { struct zram *zram = dev_to_zram(dev); - char compressor[CRYPTO_MAX_ALG_NAME]; + char compressor[ARRAY_SIZE(zram->compressor)]; size_t sz; strlcpy(compressor, buf, sizeof(compressor)); @@ -327,7 +327,7 @@ static ssize_t comp_algorithm_store(struct device *dev, return -EBUSY; } - strlcpy(zram->compressor, compressor, sizeof(compressor)); + strcpy(zram->compressor, compressor); up_write(&zram->init_lock); return len; } -- cgit v1.2.3 From e86b298bebf7e799e4b7232e9135799f1947552e Mon Sep 17 00:00:00 2001 From: Mike Rapoport Date: Thu, 10 Aug 2017 15:24:32 -0700 Subject: userfaultfd: replace ENOSPC with ESRCH in case mm has gone during copy/zeropage When the process exit races with outstanding mcopy_atomic, it would be better to return ESRCH error. When such race occurs the process and it's mm are going away and returning "no such process" to the uffd monitor seems better fit than ENOSPC. Link: http://lkml.kernel.org/r/1502111545-32305-1-git-send-email-rppt@linux.vnet.ibm.com Signed-off-by: Mike Rapoport Suggested-by: Michal Hocko Acked-by: Michal Hocko Cc: Andrea Arcangeli Cc: "Dr. David Alan Gilbert" Cc: Pavel Emelyanov Cc: Mike Kravetz Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/userfaultfd.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/userfaultfd.c b/fs/userfaultfd.c index 06ea26b8c996..b0d5897bc4e6 100644 --- a/fs/userfaultfd.c +++ b/fs/userfaultfd.c @@ -1600,7 +1600,7 @@ static int userfaultfd_copy(struct userfaultfd_ctx *ctx, uffdio_copy.len); mmput(ctx->mm); } else { - return -ENOSPC; + return -ESRCH; } if (unlikely(put_user(ret, &user_uffdio_copy->copy))) return -EFAULT; @@ -1647,7 +1647,7 @@ static int userfaultfd_zeropage(struct userfaultfd_ctx *ctx, uffdio_zeropage.range.len); mmput(ctx->mm); } else { - return -ENOSPC; + return -ESRCH; } if (unlikely(put_user(ret, &user_uffdio_zeropage->zeropage))) return -EFAULT; -- cgit v1.2.3 From c587c79f90632df59c61383c6abebb2e07a81911 Mon Sep 17 00:00:00 2001 From: Doug Smythies Date: Tue, 8 Aug 2017 14:05:12 -0700 Subject: cpufreq: intel_pstate: report correct CPU frequencies during trace The intel_pstate CPU frequency scaling driver has always calculated CPU frequency incorrectly. Recent changes have eliminted most of the issues, however the frequency reported in the trace buffer, if used, is incorrect. It remains desireable that cpu->pstate.scaling still be a nice round number for things such as when setting max and min frequencies. So the proposal is to just fix the reported frequency in the trace data. Fixes what remains of [1]. Link: https://bugzilla.kernel.org/show_bug.cgi?id=96521 # [1] Signed-off-by: Doug Smythies Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/intel_pstate.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c index 0566455f233e..65ee4fcace1f 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c @@ -1613,8 +1613,7 @@ static inline bool intel_pstate_sample(struct cpudata *cpu, u64 time) static inline int32_t get_avg_frequency(struct cpudata *cpu) { - return mul_ext_fp(cpu->sample.core_avg_perf, - cpu->pstate.max_pstate_physical * cpu->pstate.scaling); + return mul_ext_fp(cpu->sample.core_avg_perf, cpu_khz); } static inline int32_t get_avg_pstate(struct cpudata *cpu) -- cgit v1.2.3 From 8e2f3bce05e056575c2c84a344a8291fdabb5f21 Mon Sep 17 00:00:00 2001 From: Doug Smythies Date: Tue, 8 Aug 2017 14:12:49 -0700 Subject: cpufreq: x86: Disable interrupts during MSRs reading According to Intel 64 and IA-32 Architectures SDM, Volume 3, Chapter 14.2, "Software needs to exercise care to avoid delays between the two RDMSRs (for example interrupts)". So, disable interrupts during reading MSRs IA32_APERF and IA32_MPERF. See also: commit 4ab60c3f32c7 (cpufreq: intel_pstate: Disable interrupts during MSRs reading). Signed-off-by: Doug Smythies Reviewed-by: Len Brown Signed-off-by: Rafael J. Wysocki --- arch/x86/kernel/cpu/aperfmperf.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/x86/kernel/cpu/aperfmperf.c b/arch/x86/kernel/cpu/aperfmperf.c index 7cf7c70b6ef2..0ee83321a313 100644 --- a/arch/x86/kernel/cpu/aperfmperf.c +++ b/arch/x86/kernel/cpu/aperfmperf.c @@ -40,13 +40,16 @@ static void aperfmperf_snapshot_khz(void *dummy) struct aperfmperf_sample *s = this_cpu_ptr(&samples); ktime_t now = ktime_get(); s64 time_delta = ktime_ms_delta(now, s->time); + unsigned long flags; /* Don't bother re-computing within the cache threshold time. */ if (time_delta < APERFMPERF_CACHE_THRESHOLD_MS) return; + local_irq_save(flags); rdmsrl(MSR_IA32_APERF, aperf); rdmsrl(MSR_IA32_MPERF, mperf); + local_irq_restore(flags); aperf_delta = aperf - s->aperf; mperf_delta = mperf - s->mperf; -- cgit v1.2.3 From 4d3a869333b74352c372077f316756d38cae09b1 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Fri, 11 Aug 2017 09:51:41 +0200 Subject: ALSA: seq: Fix CONFIG_SND_SEQ_MIDI dependency The commit 0181307abc1d ("ALSA: seq: Reorganize kconfig and build") rewrote the dependency of each sequencer module in a standard way, but there was one change applied mistakenly: CONFIG_SND_SEQ_MIDI isn't enabled properly by CONFIG_SND_RAWMIDI. I seem to have changed the wrong one instead, CONFIG_SND_SEQ_MIDI_EMUL, which is eventually reverse-selected by CONFIG_SND_SEQ_MIDI itself. This ended up the lack of snd-seq-midi module as reported below. The fix is to put def_tristate properly to CONFIG_SND_SEQ_MIDI instead of *_MIDI_EMUL entry. Fixes: 0181307abc1d ("ALSA: seq: Reorganize kconfig and build") Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=196633 Signed-off-by: Takashi Iwai --- sound/core/seq/Kconfig | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sound/core/seq/Kconfig b/sound/core/seq/Kconfig index a536760a94c2..45c1336c6597 100644 --- a/sound/core/seq/Kconfig +++ b/sound/core/seq/Kconfig @@ -47,10 +47,10 @@ config SND_SEQ_HRTIMER_DEFAULT timer. config SND_SEQ_MIDI_EVENT - def_tristate SND_RAWMIDI + tristate config SND_SEQ_MIDI - tristate + def_tristate SND_RAWMIDI select SND_SEQ_MIDI_EVENT config SND_SEQ_MIDI_EMUL -- cgit v1.2.3 From 9183976ef1c858c289b09066fd57aae51b86653c Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Thu, 25 May 2017 06:57:50 -0400 Subject: fuse: set mapping error in writepage_locked when it fails This ensures that we see errors on fsync when writeback fails. Signed-off-by: Jeff Layton Reviewed-by: Christoph Hellwig Reviewed-by: Jan Kara Signed-off-by: Miklos Szeredi --- fs/fuse/file.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/fuse/file.c b/fs/fuse/file.c index 810ed4f99e38..ab60051be6e5 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -1669,6 +1669,7 @@ err_nofile: err_free: fuse_request_free(req); err: + mapping_set_error(page->mapping, error); end_page_writeback(page); return error; } -- cgit v1.2.3 From 9e80dbd87286d3252ac2f78c6465c16e2ec8d476 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Mon, 10 Jul 2017 10:22:25 +0300 Subject: clocksource/drivers/timer-of: Checking for IS_ERR() instead of NULL The current code checks the return value of the of_io_request_and_map() function as it was returning a NULL pointer in case of error. However, it returns an error code encoded in the pointer return value, not a NULL value. Fix this by checking the returned pointer against IS_ERR() and return the error with PTR_ERR(). Signed-off-by: Dan Carpenter Signed-off-by: Daniel Lezcano --- drivers/clocksource/timer-of.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/clocksource/timer-of.c b/drivers/clocksource/timer-of.c index d509b500a7b5..4d7aef9d9c15 100644 --- a/drivers/clocksource/timer-of.c +++ b/drivers/clocksource/timer-of.c @@ -128,9 +128,9 @@ static __init int timer_base_init(struct device_node *np, const char *name = of_base->name ? of_base->name : np->full_name; of_base->base = of_io_request_and_map(np, of_base->index, name); - if (!of_base->base) { + if (IS_ERR(of_base->base)) { pr_err("Failed to iomap (%s)\n", name); - return -ENXIO; + return PTR_ERR(of_base->base); } return 0; -- cgit v1.2.3 From 599dc457c79bde8bd4fe8bbb2ba1f30ef3d7a5c8 Mon Sep 17 00:00:00 2001 From: Matt Redfearn Date: Tue, 18 Jul 2017 09:25:39 +0100 Subject: clocksource/drivers/Kconfig: Fix CLKSRC_PISTACHIO dependencies In v4.13, CLKSRC_PISTACHIO can select TIMER_OF on architectures without GENERIC_CLOCKEVENTS, resulting in a struct clock_event_device missing some required features and build breakage compiling timer_of.c. One of the symbols selecting TIMER_OF is CLKSRC_PISTACHIO, so add the dependency on GENERIC_CLOCKEVENTS. Thanks to kbuild test robot for finding this error (https://lkml.org/lkml/2017/7/16/249) Signed-off-by: Matt Redfearn Suggested-by: Ian Abbott Signed-off-by: Daniel Lezcano --- drivers/clocksource/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/clocksource/Kconfig b/drivers/clocksource/Kconfig index fcae5ca6ac92..54a67f8a28eb 100644 --- a/drivers/clocksource/Kconfig +++ b/drivers/clocksource/Kconfig @@ -262,7 +262,7 @@ config CLKSRC_LPC32XX config CLKSRC_PISTACHIO bool "Clocksource for Pistachio SoC" if COMPILE_TEST - depends on HAS_IOMEM + depends on GENERIC_CLOCKEVENTS && HAS_IOMEM select TIMER_OF help Enables the clocksource for the Pistachio SoC. -- cgit v1.2.3 From 5442c26995527245c94c4a49e535eae8a60a5299 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Tue, 1 Aug 2017 20:55:52 +0200 Subject: x86/cpufeature, kvm/svm: Rename (shorten) the new "virtualized VMSAVE/VMLOAD" CPUID flag MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit "virtual_vmload_vmsave" is what is going to land in /proc/cpuinfo now as per v4.13-rc4, for a single feature bit which is clearly too long. So rename it to what it is called in the processor manual. "v_vmsave_vmload" is a bit shorter, after all. We could go more aggressively here but having it the same as in the processor manual is advantageous. Signed-off-by: Borislav Petkov Acked-by: Radim Krčmář Cc: Janakarajan Natarajan Cc: Jörg Rödel Cc: Linus Torvalds Cc: Paolo Bonzini Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: kvm-ML Link: http://lkml.kernel.org/r/20170801185552.GA3743@nazgul.tnic Signed-off-by: Ingo Molnar --- arch/x86/include/asm/cpufeatures.h | 2 +- arch/x86/kvm/svm.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index ca3c48c0872f..5a28e8e55e36 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -286,7 +286,7 @@ #define X86_FEATURE_PAUSEFILTER (15*32+10) /* filtered pause intercept */ #define X86_FEATURE_PFTHRESHOLD (15*32+12) /* pause filter threshold */ #define X86_FEATURE_AVIC (15*32+13) /* Virtual Interrupt Controller */ -#define X86_FEATURE_VIRTUAL_VMLOAD_VMSAVE (15*32+15) /* Virtual VMLOAD VMSAVE */ +#define X86_FEATURE_V_VMSAVE_VMLOAD (15*32+15) /* Virtual VMSAVE VMLOAD */ /* Intel-defined CPU features, CPUID level 0x00000007:0 (ecx), word 16 */ #define X86_FEATURE_AVX512VBMI (16*32+ 1) /* AVX512 Vector Bit Manipulation instructions*/ diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 1107626938cc..56ba05312759 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -1100,7 +1100,7 @@ static __init int svm_hardware_setup(void) if (vls) { if (!npt_enabled || - !boot_cpu_has(X86_FEATURE_VIRTUAL_VMLOAD_VMSAVE) || + !boot_cpu_has(X86_FEATURE_V_VMSAVE_VMLOAD) || !IS_ENABLED(CONFIG_X86_64)) { vls = false; } else { -- cgit v1.2.3 From b45e4c45b13275a6b4a3f83ae8301a1963fbe5d0 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Thu, 10 Aug 2017 16:57:09 +0100 Subject: x86: Mark various structures and functions as 'static' Mark a couple of structures and functions as 'static', pointed out by Sparse: warning: symbol 'bts_pmu' was not declared. Should it be static? warning: symbol 'p4_event_aliases' was not declared. Should it be static? warning: symbol 'rapl_attr_groups' was not declared. Should it be static? symbol 'process_uv2_message' was not declared. Should it be static? Signed-off-by: Colin Ian King Acked-by: Andrew Banman # for the UV change Cc: Alexander Shishkin Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Sebastian Andrzej Siewior Cc: Thomas Gleixner Cc: Will Deacon Cc: kernel-janitors@vger.kernel.org Link: http://lkml.kernel.org/r/20170810155709.7094-1-colin.king@canonical.com Signed-off-by: Ingo Molnar --- arch/x86/events/intel/bts.c | 2 +- arch/x86/events/intel/p4.c | 2 +- arch/x86/events/intel/rapl.c | 2 +- arch/x86/platform/uv/tlb_uv.c | 4 ++-- 4 files changed, 5 insertions(+), 5 deletions(-) diff --git a/arch/x86/events/intel/bts.c b/arch/x86/events/intel/bts.c index 8ae8c5ce3a1f..ddd8d3516bfc 100644 --- a/arch/x86/events/intel/bts.c +++ b/arch/x86/events/intel/bts.c @@ -69,7 +69,7 @@ struct bts_buffer { struct bts_phys buf[0]; }; -struct pmu bts_pmu; +static struct pmu bts_pmu; static size_t buf_size(struct page *page) { diff --git a/arch/x86/events/intel/p4.c b/arch/x86/events/intel/p4.c index eb0533558c2b..d32c0eed38ca 100644 --- a/arch/x86/events/intel/p4.c +++ b/arch/x86/events/intel/p4.c @@ -587,7 +587,7 @@ static __initconst const u64 p4_hw_cache_event_ids * P4_CONFIG_ALIASABLE or bits for P4_PEBS_METRIC, they are * either up to date automatically or not applicable at all. */ -struct p4_event_alias { +static struct p4_event_alias { u64 original; u64 alternative; } p4_event_aliases[] = { diff --git a/arch/x86/events/intel/rapl.c b/arch/x86/events/intel/rapl.c index a45e2114a846..8e2457cb6b4a 100644 --- a/arch/x86/events/intel/rapl.c +++ b/arch/x86/events/intel/rapl.c @@ -559,7 +559,7 @@ static struct attribute_group rapl_pmu_format_group = { .attrs = rapl_formats_attr, }; -const struct attribute_group *rapl_attr_groups[] = { +static const struct attribute_group *rapl_attr_groups[] = { &rapl_pmu_attr_group, &rapl_pmu_format_group, &rapl_pmu_events_group, diff --git a/arch/x86/platform/uv/tlb_uv.c b/arch/x86/platform/uv/tlb_uv.c index 3e4bdb442fbc..f44c0bc95aa2 100644 --- a/arch/x86/platform/uv/tlb_uv.c +++ b/arch/x86/platform/uv/tlb_uv.c @@ -26,7 +26,7 @@ static struct bau_operations ops __ro_after_init; /* timeouts in nanoseconds (indexed by UVH_AGING_PRESCALE_SEL urgency7 30:28) */ -static int timeout_base_ns[] = { +static const int timeout_base_ns[] = { 20, 160, 1280, @@ -1216,7 +1216,7 @@ static struct bau_pq_entry *find_another_by_swack(struct bau_pq_entry *msg, * set a bit in the UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE register. * Such a message must be ignored. */ -void process_uv2_message(struct msg_desc *mdp, struct bau_control *bcp) +static void process_uv2_message(struct msg_desc *mdp, struct bau_control *bcp) { unsigned long mmr_image; unsigned char swack_vec; -- cgit v1.2.3 From c138d81163d82db044dcaf1141395713f03bf0bf Mon Sep 17 00:00:00 2001 From: Juergen Gross Date: Fri, 28 Jul 2017 12:23:12 +0200 Subject: x86: provide an init_mem_mapping hypervisor hook Provide a hook in hypervisor_x86 called after setting up initial memory mapping. This is needed e.g. by Xen HVM guests to map the hypervisor shared info page. Signed-off-by: Juergen Gross Reviewed-by: Boris Ostrovsky Acked-by: Ingo Molnar Signed-off-by: Juergen Gross --- arch/x86/include/asm/hypervisor.h | 10 ++++++++++ arch/x86/mm/init.c | 3 +++ 2 files changed, 13 insertions(+) diff --git a/arch/x86/include/asm/hypervisor.h b/arch/x86/include/asm/hypervisor.h index 21126155a739..0ead9dbb9130 100644 --- a/arch/x86/include/asm/hypervisor.h +++ b/arch/x86/include/asm/hypervisor.h @@ -43,6 +43,9 @@ struct hypervisor_x86 { /* pin current vcpu to specified physical cpu (run rarely) */ void (*pin_vcpu)(int); + + /* called during init_mem_mapping() to setup early mappings. */ + void (*init_mem_mapping)(void); }; extern const struct hypervisor_x86 *x86_hyper; @@ -57,8 +60,15 @@ extern const struct hypervisor_x86 x86_hyper_kvm; extern void init_hypervisor_platform(void); extern bool hypervisor_x2apic_available(void); extern void hypervisor_pin_vcpu(int cpu); + +static inline void hypervisor_init_mem_mapping(void) +{ + if (x86_hyper && x86_hyper->init_mem_mapping) + x86_hyper->init_mem_mapping(); +} #else static inline void init_hypervisor_platform(void) { } static inline bool hypervisor_x2apic_available(void) { return false; } +static inline void hypervisor_init_mem_mapping(void) { } #endif /* CONFIG_HYPERVISOR_GUEST */ #endif /* _ASM_X86_HYPERVISOR_H */ diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c index 673541eb3b3f..bf3f1065d6ad 100644 --- a/arch/x86/mm/init.c +++ b/arch/x86/mm/init.c @@ -18,6 +18,7 @@ #include /* for MAX_DMA_PFN */ #include #include +#include /* * We need to define the tracepoints somewhere, and tlb.c @@ -636,6 +637,8 @@ void __init init_mem_mapping(void) load_cr3(swapper_pg_dir); __flush_tlb_all(); + hypervisor_init_mem_mapping(); + early_memtest(0, max_pfn_mapped << PAGE_SHIFT); } -- cgit v1.2.3 From 10231f69eb039550864ff3eb395da0c63c03ed5f Mon Sep 17 00:00:00 2001 From: Juergen Gross Date: Fri, 28 Jul 2017 12:23:13 +0200 Subject: xen: split up xen_hvm_init_shared_info() Instead of calling xen_hvm_init_shared_info() on boot and resume split it up into a boot time function searching for the pfn to use and a mapping function doing the hypervisor mapping call. Signed-off-by: Juergen Gross Reviewed-by: Boris Ostrovsky Acked-by: Ingo Molnar Signed-off-by: Juergen Gross --- arch/x86/xen/enlighten_hvm.c | 45 +++++++++++++++++++++++--------------------- 1 file changed, 24 insertions(+), 21 deletions(-) diff --git a/arch/x86/xen/enlighten_hvm.c b/arch/x86/xen/enlighten_hvm.c index 87d791356ea9..d23531f5f17e 100644 --- a/arch/x86/xen/enlighten_hvm.c +++ b/arch/x86/xen/enlighten_hvm.c @@ -21,29 +21,9 @@ #include "mmu.h" #include "smp.h" -void __ref xen_hvm_init_shared_info(void) +void xen_hvm_init_shared_info(void) { struct xen_add_to_physmap xatp; - u64 pa; - - if (HYPERVISOR_shared_info == &xen_dummy_shared_info) { - /* - * Search for a free page starting at 4kB physical address. - * Low memory is preferred to avoid an EPT large page split up - * by the mapping. - * Starting below X86_RESERVE_LOW (usually 64kB) is fine as - * the BIOS used for HVM guests is well behaved and won't - * clobber memory other than the first 4kB. - */ - for (pa = PAGE_SIZE; - !e820__mapped_all(pa, pa + PAGE_SIZE, E820_TYPE_RAM) || - memblock_is_reserved(pa); - pa += PAGE_SIZE) - ; - - memblock_reserve(pa, PAGE_SIZE); - HYPERVISOR_shared_info = __va(pa); - } xatp.domid = DOMID_SELF; xatp.idx = 0; @@ -53,6 +33,28 @@ void __ref xen_hvm_init_shared_info(void) BUG(); } +static void __init reserve_shared_info(void) +{ + u64 pa; + + /* + * Search for a free page starting at 4kB physical address. + * Low memory is preferred to avoid an EPT large page split up + * by the mapping. + * Starting below X86_RESERVE_LOW (usually 64kB) is fine as + * the BIOS used for HVM guests is well behaved and won't + * clobber memory other than the first 4kB. + */ + for (pa = PAGE_SIZE; + !e820__mapped_all(pa, pa + PAGE_SIZE, E820_TYPE_RAM) || + memblock_is_reserved(pa); + pa += PAGE_SIZE) + ; + + memblock_reserve(pa, PAGE_SIZE); + HYPERVISOR_shared_info = __va(pa); +} + static void __init init_hvm_pv_info(void) { int major, minor; @@ -153,6 +155,7 @@ static void __init xen_hvm_guest_init(void) init_hvm_pv_info(); + reserve_shared_info(); xen_hvm_init_shared_info(); /* -- cgit v1.2.3 From 4ca83dcf4e3bc0c98836dbb97553792ca7ea5429 Mon Sep 17 00:00:00 2001 From: Juergen Gross Date: Fri, 28 Jul 2017 12:23:14 +0200 Subject: xen: fix hvm guest with kaslr enabled A Xen HVM guest running with KASLR enabled will die rather soon today because the shared info page mapping is using va() too early. This was introduced by commit a5d5f328b0e2baa5ee7c119fd66324eb79eeeb66 ("xen: allocate page for shared info page from low memory"). In order to fix this use early_memremap() to get a temporary virtual address for shared info until va() can be used safely. Signed-off-by: Juergen Gross Reviewed-by: Boris Ostrovsky Acked-by: Ingo Molnar Signed-off-by: Juergen Gross --- arch/x86/xen/enlighten_hvm.c | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/arch/x86/xen/enlighten_hvm.c b/arch/x86/xen/enlighten_hvm.c index d23531f5f17e..de503c225ae1 100644 --- a/arch/x86/xen/enlighten_hvm.c +++ b/arch/x86/xen/enlighten_hvm.c @@ -12,6 +12,7 @@ #include #include #include +#include #include #include @@ -21,6 +22,8 @@ #include "mmu.h" #include "smp.h" +static unsigned long shared_info_pfn; + void xen_hvm_init_shared_info(void) { struct xen_add_to_physmap xatp; @@ -28,7 +31,7 @@ void xen_hvm_init_shared_info(void) xatp.domid = DOMID_SELF; xatp.idx = 0; xatp.space = XENMAPSPACE_shared_info; - xatp.gpfn = virt_to_pfn(HYPERVISOR_shared_info); + xatp.gpfn = shared_info_pfn; if (HYPERVISOR_memory_op(XENMEM_add_to_physmap, &xatp)) BUG(); } @@ -51,8 +54,16 @@ static void __init reserve_shared_info(void) pa += PAGE_SIZE) ; + shared_info_pfn = PHYS_PFN(pa); + memblock_reserve(pa, PAGE_SIZE); - HYPERVISOR_shared_info = __va(pa); + HYPERVISOR_shared_info = early_memremap(pa, PAGE_SIZE); +} + +static void __init xen_hvm_init_mem_mapping(void) +{ + early_memunmap(HYPERVISOR_shared_info, PAGE_SIZE); + HYPERVISOR_shared_info = __va(PFN_PHYS(shared_info_pfn)); } static void __init init_hvm_pv_info(void) @@ -221,5 +232,6 @@ const struct hypervisor_x86 x86_hyper_xen_hvm = { .init_platform = xen_hvm_guest_init, .pin_vcpu = xen_pin_vcpu, .x2apic_available = xen_x2apic_para_available, + .init_mem_mapping = xen_hvm_init_mem_mapping, }; EXPORT_SYMBOL(x86_hyper_xen_hvm); -- cgit v1.2.3 From adb4f11e0a8f4e29900adb2b7af28b6bbd5c1fa4 Mon Sep 17 00:00:00 2001 From: Ding Tianhong Date: Thu, 10 Aug 2017 10:52:45 +0800 Subject: clocksource/drivers/arm_arch_timer: Avoid infinite recursion when ftrace is enabled On platforms with an arch timer erratum workaround, it's possible for arch_timer_reg_read_stable() to recurse into itself when certain tracing options are enabled, leading to stack overflows and related problems. For example, when PREEMPT_TRACER and FUNCTION_GRAPH_TRACER are selected, it's possible to trigger this with: $ mount -t debugfs nodev /sys/kernel/debug/ $ echo function_graph > /sys/kernel/debug/tracing/current_tracer The problem is that in such cases, preempt_disable() instrumentation attempts to acquire a timestamp via trace_clock(), resulting in a call back to arch_timer_reg_read_stable(), and hence recursion. This patch changes arch_timer_reg_read_stable() to use preempt_{disable,enable}_notrace(), which avoids this. This problem is similar to the fixed by upstream commit 96b3d28bf4 ("sched/clock: Prevent tracing recursion in sched_clock_cpu()"). Fixes: 6acc71ccac71 ("arm64: arch_timer: Allows a CPU-specific erratum to only affect a subset of CPUs") Signed-off-by: Ding Tianhong Acked-by: Mark Rutland Acked-by: Marc Zyngier Signed-off-by: Daniel Lezcano --- arch/arm64/include/asm/arch_timer.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm64/include/asm/arch_timer.h b/arch/arm64/include/asm/arch_timer.h index 74d08e44a651..a652ce0a5cb2 100644 --- a/arch/arm64/include/asm/arch_timer.h +++ b/arch/arm64/include/asm/arch_timer.h @@ -65,13 +65,13 @@ DECLARE_PER_CPU(const struct arch_timer_erratum_workaround *, u64 _val; \ if (needs_unstable_timer_counter_workaround()) { \ const struct arch_timer_erratum_workaround *wa; \ - preempt_disable(); \ + preempt_disable_notrace(); \ wa = __this_cpu_read(timer_unstable_counter_workaround); \ if (wa && wa->read_##reg) \ _val = wa->read_##reg(); \ else \ _val = read_sysreg(reg); \ - preempt_enable(); \ + preempt_enable_notrace(); \ } else { \ _val = read_sysreg(reg); \ } \ -- cgit v1.2.3 From 529871bb3c0675d0b425e2070d5a739db097be98 Mon Sep 17 00:00:00 2001 From: Juergen Gross Date: Fri, 28 Jul 2017 16:53:55 +0200 Subject: xen: avoid deadlock in xenbus When starting the xenwatch thread a theoretical deadlock situation is possible: xs_init() contains: task = kthread_run(xenwatch_thread, NULL, "xenwatch"); if (IS_ERR(task)) return PTR_ERR(task); xenwatch_pid = task->pid; And xenwatch_thread() does: mutex_lock(&xenwatch_mutex); ... event->handle->callback(); ... mutex_unlock(&xenwatch_mutex); The callback could call unregister_xenbus_watch() which does: ... if (current->pid != xenwatch_pid) mutex_lock(&xenwatch_mutex); ... In case a watch is firing before xenwatch_pid could be set and the callback of that watch unregisters a watch, then a self-deadlock would occur. Avoid this by setting xenwatch_pid in xenwatch_thread(). Signed-off-by: Juergen Gross Reviewed-by: Boris Ostrovsky Signed-off-by: Juergen Gross --- drivers/xen/xenbus/xenbus_xs.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/xen/xenbus/xenbus_xs.c b/drivers/xen/xenbus/xenbus_xs.c index e46080214955..3e59590c7254 100644 --- a/drivers/xen/xenbus/xenbus_xs.c +++ b/drivers/xen/xenbus/xenbus_xs.c @@ -857,6 +857,8 @@ static int xenwatch_thread(void *unused) struct list_head *ent; struct xs_watch_event *event; + xenwatch_pid = current->pid; + for (;;) { wait_event_interruptible(watch_events_waitq, !list_empty(&watch_events)); @@ -925,7 +927,6 @@ int xs_init(void) task = kthread_run(xenwatch_thread, NULL, "xenwatch"); if (IS_ERR(task)) return PTR_ERR(task); - xenwatch_pid = task->pid; /* shutdown watches for kexec boot */ xs_reset_watches(); -- cgit v1.2.3 From 020db9d3c1dc0aab9ab1252f4a36b6d8456b8794 Mon Sep 17 00:00:00 2001 From: Liu Shuo Date: Sun, 30 Jul 2017 00:59:57 +0800 Subject: xen/events: Fix interrupt lost during irq_disable and irq_enable Here is a device has xen-pirq-MSI interrupt. Dom0 might lost interrupt during driver irq_disable/irq_enable. Here is the scenario, 1. irq_disable -> disable_dynirq -> mask_evtchn(irq channel) 2. dev interrupt raised by HW and Xen mark its evtchn as pending 3. irq_enable -> startup_pirq -> eoi_pirq -> clear_evtchn(channel of irq) -> clear pending status 4. consume_one_event process the irq event without pending bit assert which result in interrupt lost once 5. No HW interrupt raising anymore. Now use enable_dynirq for enable_pirq of xen_pirq_chip to remove eoi_pirq when irq_enable. Signed-off-by: Liu Shuo Reviewed-by: Boris Ostrovsky Signed-off-by: Juergen Gross --- drivers/xen/events/events_base.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/xen/events/events_base.c b/drivers/xen/events/events_base.c index bae1f5d36c26..2d43118077e4 100644 --- a/drivers/xen/events/events_base.c +++ b/drivers/xen/events/events_base.c @@ -574,7 +574,7 @@ static void shutdown_pirq(struct irq_data *data) static void enable_pirq(struct irq_data *data) { - startup_pirq(data); + enable_dynirq(data); } static void disable_pirq(struct irq_data *data) -- cgit v1.2.3 From a7990c647b35415e3dd07a077480a908678947ba Mon Sep 17 00:00:00 2001 From: Artem Savkov Date: Tue, 8 Aug 2017 12:26:02 +0200 Subject: iommu/arm-smmu: fix null-pointer dereference in arm_smmu_add_device Commit c54451a "iommu/arm-smmu: Fix the error path in arm_smmu_add_device" removed fwspec assignment in legacy_binding path as redundant which is wrong. It needs to be updated after fwspec initialisation in arm_smmu_register_legacy_master() as it is dereferenced later. Without this there is a NULL-pointer dereference panic during boot on some hosts. Signed-off-by: Artem Savkov Reviewed-by: Robin Murphy Acked-by: Will Deacon Signed-off-by: Joerg Roedel --- drivers/iommu/arm-smmu.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/iommu/arm-smmu.c b/drivers/iommu/arm-smmu.c index b97188acc4f1..2d80fa8a0634 100644 --- a/drivers/iommu/arm-smmu.c +++ b/drivers/iommu/arm-smmu.c @@ -1519,6 +1519,13 @@ static int arm_smmu_add_device(struct device *dev) if (using_legacy_binding) { ret = arm_smmu_register_legacy_master(dev, &smmu); + + /* + * If dev->iommu_fwspec is initally NULL, arm_smmu_register_legacy_master() + * will allocate/initialise a new one. Thus we need to update fwspec for + * later use. + */ + fwspec = dev->iommu_fwspec; if (ret) goto out_free; } else if (fwspec && fwspec->ops == &arm_smmu_ops) { -- cgit v1.2.3 From 622b2fbe625bc255faa4ee69a0fbcab80d3e40e6 Mon Sep 17 00:00:00 2001 From: Shuah Khan Date: Wed, 9 Aug 2017 15:59:10 -0600 Subject: selftests: timers: freq-step: fix compile error MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fix compile error due to ksft_exit_skip() update to take var_args. freq-step.c: In function ‘init_test’: freq-step.c:234:3: error: too few arguments to function ‘ksft_exit_skip’ ksft_exit_skip(); ^~~~~~~~~~~~~~ In file included from freq-step.c:26:0: ../kselftest.h:167:19: note: declared here static inline int ksft_exit_skip(const char *msg, ...) ^~~~~~~~~~~~~~ : recipe for target 'freq-step' failed Signed-off-by: Shuah Khan --- tools/testing/selftests/timers/freq-step.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/tools/testing/selftests/timers/freq-step.c b/tools/testing/selftests/timers/freq-step.c index e8c61830825a..22312eb4c941 100644 --- a/tools/testing/selftests/timers/freq-step.c +++ b/tools/testing/selftests/timers/freq-step.c @@ -229,10 +229,9 @@ static void init_test(void) printf("CLOCK_MONOTONIC_RAW+CLOCK_MONOTONIC precision: %.0f ns\t\t", 1e9 * precision); - if (precision > MAX_PRECISION) { - printf("[SKIP]\n"); - ksft_exit_skip(); - } + if (precision > MAX_PRECISION) + ksft_exit_skip("precision: %.0f ns > MAX_PRECISION: %.0f ns\n", + 1e9 * precision, 1e9 * MAX_PRECISION); printf("[OK]\n"); srand(ts.tv_sec ^ ts.tv_nsec); -- cgit v1.2.3 From 8a9d6e964d318533ba3d2901ce153ba317c99a89 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sat, 5 Aug 2017 10:59:14 +0200 Subject: pnfs/blocklayout: require 64-bit sector_t The blocklayout code does not compile cleanly for a 32-bit sector_t, and also has no reliable checks for devices sizes, which makes it unsafe to use with a kernel that doesn't support large block devices. Signed-off-by: Christoph Hellwig Reported-by: Arnd Bergmann Fixes: 5c83746a0cf2 ("pnfs/blocklayout: in-kernel GETDEVICEINFO XDR parsing") Signed-off-by: Anna Schumaker --- fs/nfs/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/nfs/Kconfig b/fs/nfs/Kconfig index 69d02cf8cf37..5f93cfacb3d1 100644 --- a/fs/nfs/Kconfig +++ b/fs/nfs/Kconfig @@ -121,6 +121,7 @@ config PNFS_FILE_LAYOUT config PNFS_BLOCK tristate depends on NFS_V4_1 && BLK_DEV_DM + depends on 64BIT || LBDAF default NFS_V4 config PNFS_FLEXFILE_LAYOUT -- cgit v1.2.3 From e71cb9e00922902ba0519f37d09145f117dc02b3 Mon Sep 17 00:00:00 2001 From: Vivien Didelot Date: Wed, 9 Aug 2017 16:46:09 -0400 Subject: net: dsa: ksz: fix skb freeing The DSA layer frees the original skb when an xmit function returns NULL, meaning an error occurred. But if the tagging code copied the original skb, it is responsible of freeing the copy if an error occurs. The ksz tagging code currently has two issues: if skb_put_padto fails, the skb copy is not freed, and the original skb will be freed twice. To fix that, move skb_put_padto inside both branches of the skb_tailroom condition, before freeing the original skb, and free the copy on error. Signed-off-by: Vivien Didelot Reviewed-by: Woojung Huh Signed-off-by: David S. Miller --- net/dsa/tag_ksz.c | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/net/dsa/tag_ksz.c b/net/dsa/tag_ksz.c index fab41de8e983..de66ca8e6201 100644 --- a/net/dsa/tag_ksz.c +++ b/net/dsa/tag_ksz.c @@ -42,6 +42,9 @@ static struct sk_buff *ksz_xmit(struct sk_buff *skb, struct net_device *dev) padlen = (skb->len >= ETH_ZLEN) ? 0 : ETH_ZLEN - skb->len; if (skb_tailroom(skb) >= padlen + KSZ_INGRESS_TAG_LEN) { + if (skb_put_padto(skb, skb->len + padlen)) + return NULL; + nskb = skb; } else { nskb = alloc_skb(NET_IP_ALIGN + skb->len + @@ -56,13 +59,15 @@ static struct sk_buff *ksz_xmit(struct sk_buff *skb, struct net_device *dev) skb_set_transport_header(nskb, skb_transport_header(skb) - skb->head); skb_copy_and_csum_dev(skb, skb_put(nskb, skb->len)); + + if (skb_put_padto(nskb, nskb->len + padlen)) { + kfree_skb(nskb); + return NULL; + } + kfree_skb(skb); } - /* skb is freed when it fails */ - if (skb_put_padto(nskb, nskb->len + padlen)) - return NULL; - tag = skb_put(nskb, KSZ_INGRESS_TAG_LEN); tag[0] = 0; tag[1] = 1 << p->dp->index; /* destination port */ -- cgit v1.2.3 From ad729bc9acfb7c47112964b4877ef5404578ed13 Mon Sep 17 00:00:00 2001 From: Andreas Born Date: Thu, 10 Aug 2017 06:41:44 +0200 Subject: bonding: require speed/duplex only for 802.3ad, alb and tlb The patch c4adfc822bf5 ("bonding: make speed, duplex setting consistent with link state") puts the link state to down if bond_update_speed_duplex() cannot retrieve speed and duplex settings. Assumably the patch was written with 802.3ad mode in mind which relies on link speed/duplex settings. For other modes like active-backup these settings are not required. Thus, only for these other modes, this patch reintroduces support for slaves that do not support reporting speed or duplex such as wireless devices. This fixes the regression reported in bug 196547 (https://bugzilla.kernel.org/show_bug.cgi?id=196547). Fixes: c4adfc822bf5 ("bonding: make speed, duplex setting consistent with link state") Signed-off-by: Andreas Born Acked-by: Mahesh Bandewar Signed-off-by: David S. Miller --- drivers/net/bonding/bond_main.c | 6 ++++-- include/net/bonding.h | 5 +++++ 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index 9bee6c1c70cc..85bb272d2a34 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -1569,7 +1569,8 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev) new_slave->delay = 0; new_slave->link_failure_count = 0; - if (bond_update_speed_duplex(new_slave)) + if (bond_update_speed_duplex(new_slave) && + bond_needs_speed_duplex(bond)) new_slave->link = BOND_LINK_DOWN; new_slave->last_rx = jiffies - @@ -2140,7 +2141,8 @@ static void bond_miimon_commit(struct bonding *bond) continue; case BOND_LINK_UP: - if (bond_update_speed_duplex(slave)) { + if (bond_update_speed_duplex(slave) && + bond_needs_speed_duplex(bond)) { slave->link = BOND_LINK_DOWN; netdev_warn(bond->dev, "failed to get link speed/duplex for %s\n", diff --git a/include/net/bonding.h b/include/net/bonding.h index b00508d22e0a..b2e68657a216 100644 --- a/include/net/bonding.h +++ b/include/net/bonding.h @@ -277,6 +277,11 @@ static inline bool bond_is_lb(const struct bonding *bond) BOND_MODE(bond) == BOND_MODE_ALB; } +static inline bool bond_needs_speed_duplex(const struct bonding *bond) +{ + return BOND_MODE(bond) == BOND_MODE_8023AD || bond_is_lb(bond); +} + static inline bool bond_is_nondyn_tlb(const struct bonding *bond) { return (BOND_MODE(bond) == BOND_MODE_TLB) && -- cgit v1.2.3 From 8d55373875052b891ae72c9bcaf9c2d7178676c0 Mon Sep 17 00:00:00 2001 From: Konstantin Khlebnikov Date: Thu, 10 Aug 2017 12:31:40 +0300 Subject: net/sched/hfsc: allocate tcf block for hfsc root class Without this filters cannot be attached. Signed-off-by: Konstantin Khlebnikov Fixes: 6529eaba33f0 ("net: sched: introduce tcf block infractructure") Acked-by: Cong Wang Signed-off-by: David S. Miller --- net/sched/sch_hfsc.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/net/sched/sch_hfsc.c b/net/sched/sch_hfsc.c index b52f74610dc7..3ad02bbe6903 100644 --- a/net/sched/sch_hfsc.c +++ b/net/sched/sch_hfsc.c @@ -1428,6 +1428,10 @@ hfsc_init_qdisc(struct Qdisc *sch, struct nlattr *opt) return err; q->eligible = RB_ROOT; + err = tcf_block_get(&q->root.block, &q->root.filter_list); + if (err) + goto err_tcf; + q->root.cl_common.classid = sch->handle; q->root.refcnt = 1; q->root.sched = q; @@ -1447,6 +1451,10 @@ hfsc_init_qdisc(struct Qdisc *sch, struct nlattr *opt) qdisc_watchdog_init(&q->watchdog, sch); return 0; + +err_tcf: + qdisc_class_hash_destroy(&q->clhash); + return err; } static int -- cgit v1.2.3 From fbca164776e438b639af592c522b8b0506b54dcc Mon Sep 17 00:00:00 2001 From: Romain Perier Date: Thu, 10 Aug 2017 16:56:05 +0200 Subject: net: stmmac: Use the right logging function in stmmac_mdio_register Currently, the function stmmac_mdio_register() is only used by stmmac_dvr_probe() from stmmac_main.c, in order to register the MDIO bus and probe information about the PHY. As this function is called before calling register_netdev(), all messages logged from stmmac_mdio_register are prefixed by "(unnamed net_device)". The goal of netdev_info or netdev_err is to dump useful infos about a net_device, when this data structure is partially initialized, there is no point for using these functions. This commit fixes the issue by replacing all netdev_*() by the corresponding dev_*() function for logging. The last netdev_info is replaced by phy_attached_info(), as a valid phydev can be used at this point. Signed-off-by: Romain Perier Reviewed-by: Andrew Lunn Signed-off-by: David S. Miller --- drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c index db157a47000c..72ec711fcba2 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c @@ -204,6 +204,7 @@ int stmmac_mdio_register(struct net_device *ndev) struct stmmac_priv *priv = netdev_priv(ndev); struct stmmac_mdio_bus_data *mdio_bus_data = priv->plat->mdio_bus_data; struct device_node *mdio_node = priv->plat->mdio_node; + struct device *dev = ndev->dev.parent; int addr, found; if (!mdio_bus_data) @@ -237,7 +238,7 @@ int stmmac_mdio_register(struct net_device *ndev) else err = mdiobus_register(new_bus); if (err != 0) { - netdev_err(ndev, "Cannot register the MDIO bus\n"); + dev_err(dev, "Cannot register the MDIO bus\n"); goto bus_register_fail; } @@ -285,14 +286,12 @@ int stmmac_mdio_register(struct net_device *ndev) irq_str = irq_num; break; } - netdev_info(ndev, "PHY ID %08x at %d IRQ %s (%s)%s\n", - phydev->phy_id, addr, irq_str, phydev_name(phydev), - act ? " active" : ""); + phy_attached_info(phydev); found = 1; } if (!found && !mdio_node) { - netdev_warn(ndev, "No PHY found\n"); + dev_warn(dev, "No PHY found\n"); mdiobus_unregister(new_bus); mdiobus_free(new_bus); return -ENODEV; -- cgit v1.2.3 From bb3afda4fc4ea690ff92a36eef4c0afe4d19da04 Mon Sep 17 00:00:00 2001 From: Simon Horman Date: Fri, 11 Aug 2017 10:18:20 +0200 Subject: nfp: do not update MTU from BH in flower app The Flower app may receive a request to update the MTU of a representor netdev upon receipt of a control message from the firmware. This requires the RTNL lock which needs to be taken outside of the packet processing path. As a handling of this correctly seems a little to invasive for a fix simply skip setting the MTU for now. Relevant backtrace: [ 1496.288489] BUG: scheduling while atomic: kworker/0:3/373/0x00000100 [ 1496.294911] dca syscopyarea sysfillrect sysimgblt fb_sys_fops ptp drm mxm_wmi ahci pps_core libahci i2c_algo_bit wmi [last unloaded: nfp] [ 1496.294918] CPU: 0 PID: 373 Comm: kworker/0:3 Tainted: G OE 4.13.0-rc3+ #3 [ 1496.294919] Hardware name: Supermicro X10DRi/X10DRi, BIOS 2.0 12/28/2015 [ 1496.294923] Workqueue: events work_for_cpu_fn [ 1496.294924] Call Trace: [ 1496.294927] [ 1496.294931] dump_stack+0x63/0x82 [ 1496.294935] __schedule_bug+0x54/0x70 [ 1496.294937] __schedule+0x62f/0x890 [ 1496.294941] ? intel_unmap_sg+0x90/0x90 [ 1496.294942] schedule+0x36/0x80 [ 1496.294943] schedule_preempt_disabled+0xe/0x10 [ 1496.294945] __mutex_lock.isra.2+0x445/0x4a0 [ 1496.294947] ? device_is_rmrr_locked+0x12/0x50 [ 1496.294950] ? kfree+0x162/0x170 [ 1496.294952] ? device_is_rmrr_locked+0x12/0x50 [ 1496.294953] ? iommu_should_identity_map+0x50/0xe0 [ 1496.294954] __mutex_lock_slowpath+0x13/0x20 [ 1496.294955] ? iommu_no_mapping+0x48/0xd0 [ 1496.294956] ? __mutex_lock_slowpath+0x13/0x20 [ 1496.294957] mutex_lock+0x2f/0x40 [ 1496.294960] rtnl_lock+0x15/0x20 [ 1496.294979] nfp_flower_cmsg_rx+0xc8/0x150 [nfp] [ 1496.294986] nfp_ctrl_poll+0x286/0x350 [nfp] [ 1496.294989] tasklet_action+0xf6/0x110 [ 1496.294992] __do_softirq+0xed/0x278 [ 1496.294993] irq_exit+0xb6/0xc0 [ 1496.294994] do_IRQ+0x4f/0xd0 [ 1496.294996] common_interrupt+0x89/0x89 Fixes: 948faa46c05b ("nfp: add support for control messages for flower app") Signed-off-by: Simon Horman Reviewed-by: Jakub Kicinski Signed-off-by: David S. Miller --- drivers/net/ethernet/netronome/nfp/flower/cmsg.c | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/netronome/nfp/flower/cmsg.c b/drivers/net/ethernet/netronome/nfp/flower/cmsg.c index dd7fa9cf225f..b0837b58c3a1 100644 --- a/drivers/net/ethernet/netronome/nfp/flower/cmsg.c +++ b/drivers/net/ethernet/netronome/nfp/flower/cmsg.c @@ -115,14 +115,10 @@ nfp_flower_cmsg_portmod_rx(struct nfp_app *app, struct sk_buff *skb) return; } - if (link) { + if (link) netif_carrier_on(netdev); - rtnl_lock(); - dev_set_mtu(netdev, be16_to_cpu(msg->mtu)); - rtnl_unlock(); - } else { + else netif_carrier_off(netdev); - } rcu_read_unlock(); } -- cgit v1.2.3 From 54a6a043fb8580d5a741774669ef6049f402f228 Mon Sep 17 00:00:00 2001 From: Anton Vasilyev Date: Fri, 11 Aug 2017 15:57:22 +0300 Subject: mISDN: Fix null pointer dereference at mISDN_FsmNew If mISDN_FsmNew() fails to allocate memory for jumpmatrix then null pointer dereference will occur on any write to jumpmatrix. The patch adds check on successful allocation and corresponding error handling. Found by Linux Driver Verification project (linuxtesting.org). Signed-off-by: Anton Vasilyev Signed-off-by: David S. Miller --- drivers/isdn/mISDN/fsm.c | 5 ++++- drivers/isdn/mISDN/fsm.h | 2 +- drivers/isdn/mISDN/layer1.c | 3 +-- drivers/isdn/mISDN/layer2.c | 15 +++++++++++++-- drivers/isdn/mISDN/tei.c | 20 +++++++++++++++++--- 5 files changed, 36 insertions(+), 9 deletions(-) diff --git a/drivers/isdn/mISDN/fsm.c b/drivers/isdn/mISDN/fsm.c index 78fc5d5e9051..92e6570b1143 100644 --- a/drivers/isdn/mISDN/fsm.c +++ b/drivers/isdn/mISDN/fsm.c @@ -26,7 +26,7 @@ #define FSM_TIMER_DEBUG 0 -void +int mISDN_FsmNew(struct Fsm *fsm, struct FsmNode *fnlist, int fncount) { @@ -34,6 +34,8 @@ mISDN_FsmNew(struct Fsm *fsm, fsm->jumpmatrix = kzalloc(sizeof(FSMFNPTR) * fsm->state_count * fsm->event_count, GFP_KERNEL); + if (fsm->jumpmatrix == NULL) + return -ENOMEM; for (i = 0; i < fncount; i++) if ((fnlist[i].state >= fsm->state_count) || @@ -45,6 +47,7 @@ mISDN_FsmNew(struct Fsm *fsm, } else fsm->jumpmatrix[fsm->state_count * fnlist[i].event + fnlist[i].state] = (FSMFNPTR) fnlist[i].routine; + return 0; } EXPORT_SYMBOL(mISDN_FsmNew); diff --git a/drivers/isdn/mISDN/fsm.h b/drivers/isdn/mISDN/fsm.h index 928f5be192c1..e1def8490221 100644 --- a/drivers/isdn/mISDN/fsm.h +++ b/drivers/isdn/mISDN/fsm.h @@ -55,7 +55,7 @@ struct FsmTimer { void *arg; }; -extern void mISDN_FsmNew(struct Fsm *, struct FsmNode *, int); +extern int mISDN_FsmNew(struct Fsm *, struct FsmNode *, int); extern void mISDN_FsmFree(struct Fsm *); extern int mISDN_FsmEvent(struct FsmInst *, int , void *); extern void mISDN_FsmChangeState(struct FsmInst *, int); diff --git a/drivers/isdn/mISDN/layer1.c b/drivers/isdn/mISDN/layer1.c index bebc57b72138..3192b0eb3944 100644 --- a/drivers/isdn/mISDN/layer1.c +++ b/drivers/isdn/mISDN/layer1.c @@ -414,8 +414,7 @@ l1_init(u_int *deb) l1fsm_s.event_count = L1_EVENT_COUNT; l1fsm_s.strEvent = strL1Event; l1fsm_s.strState = strL1SState; - mISDN_FsmNew(&l1fsm_s, L1SFnList, ARRAY_SIZE(L1SFnList)); - return 0; + return mISDN_FsmNew(&l1fsm_s, L1SFnList, ARRAY_SIZE(L1SFnList)); } void diff --git a/drivers/isdn/mISDN/layer2.c b/drivers/isdn/mISDN/layer2.c index 7243a6746f8b..9ff0903a0e89 100644 --- a/drivers/isdn/mISDN/layer2.c +++ b/drivers/isdn/mISDN/layer2.c @@ -2247,15 +2247,26 @@ static struct Bprotocol X75SLP = { int Isdnl2_Init(u_int *deb) { + int res; debug = deb; mISDN_register_Bprotocol(&X75SLP); l2fsm.state_count = L2_STATE_COUNT; l2fsm.event_count = L2_EVENT_COUNT; l2fsm.strEvent = strL2Event; l2fsm.strState = strL2State; - mISDN_FsmNew(&l2fsm, L2FnList, ARRAY_SIZE(L2FnList)); - TEIInit(deb); + res = mISDN_FsmNew(&l2fsm, L2FnList, ARRAY_SIZE(L2FnList)); + if (res) + goto error; + res = TEIInit(deb); + if (res) + goto error_fsm; return 0; + +error_fsm: + mISDN_FsmFree(&l2fsm); +error: + mISDN_unregister_Bprotocol(&X75SLP); + return res; } void diff --git a/drivers/isdn/mISDN/tei.c b/drivers/isdn/mISDN/tei.c index 908127efccf8..12d9e5f4beb1 100644 --- a/drivers/isdn/mISDN/tei.c +++ b/drivers/isdn/mISDN/tei.c @@ -1387,23 +1387,37 @@ create_teimanager(struct mISDNdevice *dev) int TEIInit(u_int *deb) { + int res; debug = deb; teifsmu.state_count = TEI_STATE_COUNT; teifsmu.event_count = TEI_EVENT_COUNT; teifsmu.strEvent = strTeiEvent; teifsmu.strState = strTeiState; - mISDN_FsmNew(&teifsmu, TeiFnListUser, ARRAY_SIZE(TeiFnListUser)); + res = mISDN_FsmNew(&teifsmu, TeiFnListUser, ARRAY_SIZE(TeiFnListUser)); + if (res) + goto error; teifsmn.state_count = TEI_STATE_COUNT; teifsmn.event_count = TEI_EVENT_COUNT; teifsmn.strEvent = strTeiEvent; teifsmn.strState = strTeiState; - mISDN_FsmNew(&teifsmn, TeiFnListNet, ARRAY_SIZE(TeiFnListNet)); + res = mISDN_FsmNew(&teifsmn, TeiFnListNet, ARRAY_SIZE(TeiFnListNet)); + if (res) + goto error_smn; deactfsm.state_count = DEACT_STATE_COUNT; deactfsm.event_count = DEACT_EVENT_COUNT; deactfsm.strEvent = strDeactEvent; deactfsm.strState = strDeactState; - mISDN_FsmNew(&deactfsm, DeactFnList, ARRAY_SIZE(DeactFnList)); + res = mISDN_FsmNew(&deactfsm, DeactFnList, ARRAY_SIZE(DeactFnList)); + if (res) + goto error_deact; return 0; + +error_deact: + mISDN_FsmFree(&teifsmn); +error_smn: + mISDN_FsmFree(&teifsmu); +error: + return res; } void TEIFree(void) -- cgit v1.2.3 From e4dde4127396f0c8f1c2e11b3ecc5baf4f8628bf Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Fri, 11 Aug 2017 18:31:24 +0200 Subject: net: fix compilation when busy poll is not enabled MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit MIN_NAPI_ID is used in various places outside of CONFIG_NET_RX_BUSY_POLL wrapping, so when it's not set we run into build errors such as: net/core/dev.c: In function 'dev_get_by_napi_id': net/core/dev.c:886:16: error: ‘MIN_NAPI_ID’ undeclared (first use in this function) if (napi_id < MIN_NAPI_ID) ^~~~~~~~~~~ Thus, have MIN_NAPI_ID always defined to fix these errors. Signed-off-by: Daniel Borkmann Signed-off-by: David S. Miller --- include/net/busy_poll.h | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/include/net/busy_poll.h b/include/net/busy_poll.h index 8ffd434676b7..71c72a939bf8 100644 --- a/include/net/busy_poll.h +++ b/include/net/busy_poll.h @@ -29,18 +29,18 @@ #include #include -#ifdef CONFIG_NET_RX_BUSY_POLL - -struct napi_struct; -extern unsigned int sysctl_net_busy_read __read_mostly; -extern unsigned int sysctl_net_busy_poll __read_mostly; - /* 0 - Reserved to indicate value not set * 1..NR_CPUS - Reserved for sender_cpu * NR_CPUS+1..~0 - Region available for NAPI IDs */ #define MIN_NAPI_ID ((unsigned int)(NR_CPUS + 1)) +#ifdef CONFIG_NET_RX_BUSY_POLL + +struct napi_struct; +extern unsigned int sysctl_net_busy_read __read_mostly; +extern unsigned int sysctl_net_busy_poll __read_mostly; + static inline bool net_busy_loop_on(void) { return sysctl_net_busy_poll; -- cgit v1.2.3 From 2ed46ce45ec02f6b2188419acdf372a144e06fb5 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Fri, 11 Aug 2017 18:31:25 +0200 Subject: bpf: fix two missing target_size settings in bpf_convert_ctx_access When CONFIG_NET_SCHED or CONFIG_NET_RX_BUSY_POLL is /not/ set and we try a narrow __sk_buff load of tc_index or napi_id, respectively, then verifier rightfully complains that it's misconfigured, because we need to set target_size in each of the two cases. The rewrite for the ctx access is just a dummy op, but needs to pass, so fix this up. Fixes: f96da09473b5 ("bpf: simplify narrower ctx access") Reported-by: Shubham Bansal Signed-off-by: Daniel Borkmann Signed-off-by: David S. Miller --- net/core/filter.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/core/filter.c b/net/core/filter.c index f44fc22fd45a..6280a602604c 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -3505,6 +3505,7 @@ static u32 bpf_convert_ctx_access(enum bpf_access_type type, bpf_target_off(struct sk_buff, tc_index, 2, target_size)); #else + *target_size = 2; if (type == BPF_WRITE) *insn++ = BPF_MOV64_REG(si->dst_reg, si->dst_reg); else @@ -3520,6 +3521,7 @@ static u32 bpf_convert_ctx_access(enum bpf_access_type type, *insn++ = BPF_JMP_IMM(BPF_JGE, si->dst_reg, MIN_NAPI_ID, 1); *insn++ = BPF_MOV64_IMM(si->dst_reg, 0); #else + *target_size = 4; *insn++ = BPF_MOV64_IMM(si->dst_reg, 0); #endif break; -- cgit v1.2.3 From fd851ba9caa9a63fdbb72a2e6ed5560c0989e999 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 11 Aug 2017 10:48:53 -0700 Subject: udp: harden copy_linear_skb() syzkaller got crashes with CONFIG_HARDENED_USERCOPY=y configs. Issue here is that recvfrom() can be used with user buffer of Z bytes, and SO_PEEK_OFF of X bytes, from a skb with Y bytes, and following condition : Z < X < Y kernel BUG at mm/usercopy.c:72! invalid opcode: 0000 [#1] SMP KASAN Dumping ftrace buffer: (ftrace buffer empty) Modules linked in: CPU: 0 PID: 2917 Comm: syzkaller842281 Not tainted 4.13.0-rc3+ #16 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 task: ffff8801d2fa40c0 task.stack: ffff8801d1fe8000 RIP: 0010:report_usercopy mm/usercopy.c:64 [inline] RIP: 0010:__check_object_size+0x3ad/0x500 mm/usercopy.c:264 RSP: 0018:ffff8801d1fef8a8 EFLAGS: 00010286 RAX: 0000000000000078 RBX: ffffffff847102c0 RCX: 0000000000000000 RDX: 0000000000000078 RSI: 1ffff1003a3fded5 RDI: ffffed003a3fdf09 RBP: ffff8801d1fef998 R08: 0000000000000001 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000000 R12: ffff8801d1ea480e R13: fffffffffffffffa R14: ffffffff84710280 R15: dffffc0000000000 FS: 0000000001360880(0000) GS:ffff8801dc000000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00000000202ecfe4 CR3: 00000001d1ff8000 CR4: 00000000001406f0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: check_object_size include/linux/thread_info.h:108 [inline] check_copy_size include/linux/thread_info.h:139 [inline] copy_to_iter include/linux/uio.h:105 [inline] copy_linear_skb include/net/udp.h:371 [inline] udpv6_recvmsg+0x1040/0x1af0 net/ipv6/udp.c:395 inet_recvmsg+0x14c/0x5f0 net/ipv4/af_inet.c:793 sock_recvmsg_nosec net/socket.c:792 [inline] sock_recvmsg+0xc9/0x110 net/socket.c:799 SYSC_recvfrom+0x2d6/0x570 net/socket.c:1788 SyS_recvfrom+0x40/0x50 net/socket.c:1760 entry_SYSCALL_64_fastpath+0x1f/0xbe Fixes: b65ac44674dd ("udp: try to avoid 2 cache miss on dequeue") Signed-off-by: Eric Dumazet Cc: Paolo Abeni Signed-off-by: David S. Miller --- include/net/udp.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/include/net/udp.h b/include/net/udp.h index cc8036987dcb..e9b1d1eacb59 100644 --- a/include/net/udp.h +++ b/include/net/udp.h @@ -368,6 +368,8 @@ static inline int copy_linear_skb(struct sk_buff *skb, int len, int off, { int n, copy = len - off; + if (copy < 0) + return -EINVAL; n = copy_to_iter(skb->data + off, copy, to); if (n == copy) return 0; -- cgit v1.2.3 From c44245b3d5435f533ca8346ece65918f84c057f9 Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Fri, 11 Aug 2017 09:00:06 -0700 Subject: xfs: fix inobt inode allocation search optimization When we try to allocate a free inode by searching the inobt, we try to find the inode nearest the parent inode by searching chunks both left and right of the chunk containing the parent. As an optimization, we cache the leftmost and rightmost records that we previously searched; if we do another allocation with the same parent inode, we'll pick up the search where it last left off. There's a bug in the case where we found a free inode to the left of the parent's chunk: we need to update the cached left and right records, but because we already reassigned the right record to point to the left, we end up assigning the left record to both the cached left and right records. This isn't a correctness problem strictly, but it can result in the next allocation rechecking chunks unnecessarily or allocating inodes further away from the parent than it needs to. Fix it by swapping the record pointer after we update the cached left and right records. Fixes: bd169565993b ("xfs: speed up free inode search") Signed-off-by: Omar Sandoval Reviewed-by: Christoph Hellwig Reviewed-by: Darrick J. Wong Signed-off-by: Darrick J. Wong --- fs/xfs/libxfs/xfs_ialloc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/xfs/libxfs/xfs_ialloc.c b/fs/xfs/libxfs/xfs_ialloc.c index ffd5a15d1bb6..abf5beaae907 100644 --- a/fs/xfs/libxfs/xfs_ialloc.c +++ b/fs/xfs/libxfs/xfs_ialloc.c @@ -1246,13 +1246,13 @@ xfs_dialloc_ag_inobt( /* free inodes to the left? */ if (useleft && trec.ir_freecount) { - rec = trec; xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR); cur = tcur; pag->pagl_leftrec = trec.ir_startino; pag->pagl_rightrec = rec.ir_startino; pag->pagl_pagino = pagino; + rec = trec; goto alloc_inode; } -- cgit v1.2.3 From e28ae8e428fefe2facd72cea9f29906ecb9c861d Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 11 Aug 2017 12:45:35 -0700 Subject: iomap: fix integer truncation issues in the zeroing and dirtying helpers Fix the min_t calls in the zeroing and dirtying helpers to perform the comparisms on 64-bit types, which prevents them from incorrectly being truncated, and larger zeroing operations being stuck in a never ending loop. Special thanks to Markus Stockhausen for spotting the bug. Reported-by: Paul Menzel Tested-by: Paul Menzel Signed-off-by: Christoph Hellwig Reviewed-by: Darrick J. Wong Signed-off-by: Darrick J. Wong --- fs/iomap.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/iomap.c b/fs/iomap.c index 039266128b7f..59cc98ad7577 100644 --- a/fs/iomap.c +++ b/fs/iomap.c @@ -278,7 +278,7 @@ iomap_dirty_actor(struct inode *inode, loff_t pos, loff_t length, void *data, unsigned long bytes; /* Bytes to write to page */ offset = (pos & (PAGE_SIZE - 1)); - bytes = min_t(unsigned long, PAGE_SIZE - offset, length); + bytes = min_t(loff_t, PAGE_SIZE - offset, length); rpage = __iomap_read_page(inode, pos); if (IS_ERR(rpage)) @@ -373,7 +373,7 @@ iomap_zero_range_actor(struct inode *inode, loff_t pos, loff_t count, unsigned offset, bytes; offset = pos & (PAGE_SIZE - 1); /* Within page */ - bytes = min_t(unsigned, PAGE_SIZE - offset, count); + bytes = min_t(loff_t, PAGE_SIZE - offset, count); if (IS_DAX(inode)) status = iomap_dax_zero(pos, offset, bytes, iomap); -- cgit v1.2.3 From afc1f55ca44e257f69da8f43e0714a76686ae8d1 Mon Sep 17 00:00:00 2001 From: Shaohua Li Date: Fri, 11 Aug 2017 20:34:45 -0700 Subject: MD: not clear ->safemode for external metadata array ->safemode should be triggered by mdadm for external metadaa array, otherwise array's state confuses mdadm. Fixes: 33182d15c6bf(md: always clear ->safemode when md_check_recovery gets the mddev lock.) Cc: NeilBrown Signed-off-by: Shaohua Li --- drivers/md/md.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/md/md.c b/drivers/md/md.c index e4ba95f6cd59..b01e458d31e9 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -8656,7 +8656,7 @@ void md_check_recovery(struct mddev *mddev) if (mddev_trylock(mddev)) { int spares = 0; - if (mddev->safemode == 1) + if (!mddev->external && mddev->safemode == 1) mddev->safemode = 0; if (mddev->ro) { -- cgit v1.2.3 From 9a51544774a57fcb94994a61860a17f9e63a8d7b Mon Sep 17 00:00:00 2001 From: Abhishek Sahu Date: Wed, 2 Aug 2017 18:03:05 +0530 Subject: mtd: blkdevs: Fix mtd block write failure All the MTD block write requests are failing with following error messages mkfs.ext4 /dev/mtdblock0 print_req_error: I/O error, dev mtdblock0, sector 0 Buffer I/O error on dev mtdblock0, logical block 0, lost async page write The control is going to default case after block write request because of missing return. Fixes: commit 2a842acab109 ("block: introduce new block status code type") Signed-off-by: Abhishek Sahu Signed-off-by: Brian Norris --- drivers/mtd/mtd_blkdevs.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/mtd/mtd_blkdevs.c b/drivers/mtd/mtd_blkdevs.c index f336a9b85576..9ec8f033ac5f 100644 --- a/drivers/mtd/mtd_blkdevs.c +++ b/drivers/mtd/mtd_blkdevs.c @@ -113,6 +113,7 @@ static blk_status_t do_blktrans_request(struct mtd_blktrans_ops *tr, for (; nsect > 0; nsect--, block++, buf += tr->blksize) if (tr->writesect(dev, block, buf)) return BLK_STS_IOERR; + return BLK_STS_OK; default: return BLK_STS_IOERR; } -- cgit v1.2.3 From ef954844c7ace62f773f4f23e28d2d915adc419f Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 13 Aug 2017 16:01:32 -0700 Subject: Linux 4.13-rc5 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 6eba23bcb5ad..90da7bdc3f45 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ VERSION = 4 PATCHLEVEL = 13 SUBLEVEL = 0 -EXTRAVERSION = -rc4 +EXTRAVERSION = -rc5 NAME = Fearless Coyote # *DOCUMENTATION* -- cgit v1.2.3 From 11e9d7829dd08dbafb24517fe922f11c3a8a9dc2 Mon Sep 17 00:00:00 2001 From: Andreas Born Date: Sat, 12 Aug 2017 00:36:55 +0200 Subject: bonding: ratelimit failed speed/duplex update warning bond_miimon_commit() handles the UP transition for each slave of a bond in the case of MII. It is triggered 10 times per second for the default MII Polling interval of 100ms. For device drivers that do not implement __ethtool_get_link_ksettings() the call to bond_update_speed_duplex() fails persistently while the MII status could remain UP. That is, in this and other cases where the speed/duplex update keeps failing over a longer period of time while the MII state is UP, a warning is printed every MII polling interval. To address these excessive warnings net_ratelimit() should be used. Printing a warning once would not be sufficient since the call to bond_update_speed_duplex() could recover to succeed and fail again later. In that case there would be no new indication what went wrong. Fixes: b5bf0f5b16b9c (bonding: correctly update link status during mii-commit phase) Signed-off-by: Andreas Born Signed-off-by: David S. Miller --- drivers/net/bonding/bond_main.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index 85bb272d2a34..fc63992ab0e0 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -2144,9 +2144,10 @@ static void bond_miimon_commit(struct bonding *bond) if (bond_update_speed_duplex(slave) && bond_needs_speed_duplex(bond)) { slave->link = BOND_LINK_DOWN; - netdev_warn(bond->dev, - "failed to get link speed/duplex for %s\n", - slave->dev->name); + if (net_ratelimit()) + netdev_warn(bond->dev, + "failed to get link speed/duplex for %s\n", + slave->dev->name); continue; } bond_set_slave_link_state(slave, BOND_LINK_UP, -- cgit v1.2.3 From d86e63e1f0b7868c55c8d4a54854b85e2bac690b Mon Sep 17 00:00:00 2001 From: Icenowy Zheng Date: Fri, 11 Aug 2017 22:27:35 +0800 Subject: arm64: allwinner: h5: fix pinctrl IRQs The pin controller of H5 has three IRQs at the chip's GIC, which represents three banks of pinctrl IRQs. However, the device tree used to miss the third IRQ of the pin controller, which makes the PG bank IRQ not usable. Add the missing IRQ to the pinctrl node. Fixes: 4e36de179f27 ("arm64: allwinner: h5: add Allwinner H5 .dtsi") Signed-off-by: Icenowy Zheng Signed-off-by: Chen-Yu Tsai --- arch/arm64/boot/dts/allwinner/sun50i-h5.dtsi | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/arm64/boot/dts/allwinner/sun50i-h5.dtsi b/arch/arm64/boot/dts/allwinner/sun50i-h5.dtsi index 732e2e06f503..d9a720bff05d 100644 --- a/arch/arm64/boot/dts/allwinner/sun50i-h5.dtsi +++ b/arch/arm64/boot/dts/allwinner/sun50i-h5.dtsi @@ -120,5 +120,8 @@ }; &pio { + interrupts = , + , + ; compatible = "allwinner,sun50i-h5-pinctrl"; }; -- cgit v1.2.3 From e9bf53ab1ee34bb05c104bbfd2b77c844773f8e6 Mon Sep 17 00:00:00 2001 From: Arend Van Spriel Date: Fri, 11 Aug 2017 11:07:36 +0100 Subject: brcmfmac: feature check for multi-scheduled scan fails on bcm4343x devices The firmware feature check introduced for multi-scheduled scan turned out to be failing for bcm4343{0,1,8} devices resulting in a firmware crash. The reason for this crash has not yet been root cause so this patch avoids the feature check for those device as a short-term fix. Reported-by: Stefan Wahren Reported-by: Ian Molton Fixes: 9fe929aaace6 ("brcmfmac: add firmware feature detection for gscan feature") Signed-off-by: Arend van Spriel Signed-off-by: Kalle Valo --- drivers/net/wireless/broadcom/brcm80211/brcmfmac/feature.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/feature.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/feature.c index d21258d277ce..f1b60740e020 100644 --- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/feature.c +++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/feature.c @@ -159,8 +159,10 @@ void brcmf_feat_attach(struct brcmf_pub *drvr) brcmf_feat_firmware_capabilities(ifp); memset(&gscan_cfg, 0, sizeof(gscan_cfg)); - brcmf_feat_iovar_data_set(ifp, BRCMF_FEAT_GSCAN, "pfn_gscan_cfg", - &gscan_cfg, sizeof(gscan_cfg)); + if (drvr->bus_if->chip != BRCM_CC_43430_CHIP_ID) + brcmf_feat_iovar_data_set(ifp, BRCMF_FEAT_GSCAN, + "pfn_gscan_cfg", + &gscan_cfg, sizeof(gscan_cfg)); brcmf_feat_iovar_int_get(ifp, BRCMF_FEAT_PNO, "pfn"); if (drvr->bus_if->wowl_supported) brcmf_feat_iovar_int_get(ifp, BRCMF_FEAT_WOWL, "wowl"); -- cgit v1.2.3 From 8df4b0031067758d8b0a3bfde7d35e980d0376d5 Mon Sep 17 00:00:00 2001 From: "Shih-Yuan Lee (FourDollars)" Date: Mon, 14 Aug 2017 18:00:47 +0800 Subject: ALSA: hda/realtek - Fix pincfg for Dell XPS 13 9370 The initial pin configs for Dell headset mode of ALC3271 has changed. /sys/class/sound/hwC0D0/init_pin_configs: (BIOS 0.1.4) 0x12 0xb7a60130 0x13 0xb8a61140 0x14 0x40000000 0x16 0x411111f0 0x17 0x90170110 0x18 0x411111f0 0x19 0x411111f0 0x1a 0x411111f0 0x1b 0x411111f0 0x1d 0x4087992d 0x1e 0x411111f0 0x21 0x04211020 has changed to ... /sys/class/sound/hwC0D0/init_pin_configs: (BIOS 0.2.0) 0x12 0xb7a60130 0x13 0x40000000 0x14 0x411111f0 0x16 0x411111f0 0x17 0x90170110 0x18 0x411111f0 0x19 0x411111f0 0x1a 0x411111f0 0x1b 0x411111f0 0x1d 0x4067992d 0x1e 0x411111f0 0x21 0x04211020 Fixes: b4576de87243 ("ALSA: hda/realtek - Fix typo of pincfg for Dell quirk") Signed-off-by: Shih-Yuan Lee (FourDollars) Cc: Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_realtek.c | 1 - 1 file changed, 1 deletion(-) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index a91a9ef00c40..217bb582aff1 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -6647,7 +6647,6 @@ static const struct snd_hda_pin_quirk alc269_pin_fixup_tbl[] = { SND_HDA_PIN_QUIRK(0x10ec0299, 0x1028, "Dell", ALC269_FIXUP_DELL4_MIC_NO_PRESENCE, ALC225_STANDARD_PINS, {0x12, 0xb7a60130}, - {0x13, 0xb8a61140}, {0x17, 0x90170110}), {} }; -- cgit v1.2.3 From 26a72e8a8d0707f1d49133a19c027a3af9fcfdcb Mon Sep 17 00:00:00 2001 From: Lionel Landwerlin Date: Fri, 4 Aug 2017 15:03:48 +0100 Subject: drm/i915: remove unused function declaration This function is not part of the driver anymore. Signed-off-by: Lionel Landwerlin Fixes: 90f4fcd56bda ("drm/i915: Remove forced stop ring on suspend/unload") Reviewed-by: Daniel Vetter Link: https://patchwork.freedesktop.org/patch/msgid/20170804140348.24971-1-lionel.g.landwerlin@intel.com (cherry picked from commit fe29133df37ac31de9e657ad91bcf74cdfe8c4cd) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/intel_lrc.h | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/gpu/drm/i915/intel_lrc.h b/drivers/gpu/drm/i915/intel_lrc.h index 52b3a1fd4059..57ef5833c427 100644 --- a/drivers/gpu/drm/i915/intel_lrc.h +++ b/drivers/gpu/drm/i915/intel_lrc.h @@ -63,7 +63,6 @@ enum { }; /* Logical Rings */ -void intel_logical_ring_stop(struct intel_engine_cs *engine); void intel_logical_ring_cleanup(struct intel_engine_cs *engine); int logical_render_ring_init(struct intel_engine_cs *engine); int logical_xcs_ring_init(struct intel_engine_cs *engine); -- cgit v1.2.3 From a0125a932e917cb507b682cb66645efdca1f8cab Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 8 Aug 2017 14:19:04 +0100 Subject: drm/i915: Perform an invalidate prior to executing golden renderstate As we may have just bound the renderstate into the GGTT for execution, we need to ensure that the GTT TLB are also flushed. On snb-gt2, this would cause a random GPU hang at the start of a new context (e.g. boot) and on snb-gt1, it was causing the renderstate batch to take ~10s. It was the GPU hang that revealed the truth, as the CS gleefully executed beyond the end of the golden renderstate batch, a good indicator for a GTT TLB miss. Fixes: 20fe17aa52dc ("drm/i915: Remove redundant TLB invalidate on switching contexts") Signed-off-by: Chris Wilson Cc: Mika Kuoppala Cc: stable@vger.kernel.org Link: https://patchwork.freedesktop.org/patch/msgid/20170808131904.1385-1-chris@chris-wilson.co.uk Reviewed-by: Mika Kuoppala Cc: # v4.12-rc1+ (cherry picked from commit 802673d66f8a6ded5d2689d597853c7bb3a70163) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/i915_gem_render_state.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/gpu/drm/i915/i915_gem_render_state.c b/drivers/gpu/drm/i915/i915_gem_render_state.c index 7032c542a9b1..4dd4c2159a92 100644 --- a/drivers/gpu/drm/i915/i915_gem_render_state.c +++ b/drivers/gpu/drm/i915/i915_gem_render_state.c @@ -242,6 +242,10 @@ int i915_gem_render_state_emit(struct drm_i915_gem_request *req) goto err_unpin; } + ret = req->engine->emit_flush(req, EMIT_INVALIDATE); + if (ret) + goto err_unpin; + ret = req->engine->emit_bb_start(req, so->batch_offset, so->batch_size, I915_DISPATCH_SECURE); -- cgit v1.2.3 From 1dd7a3e7af70ebdd0cdd937b180726d15a4f0948 Mon Sep 17 00:00:00 2001 From: Rodrigo Vivi Date: Wed, 9 Aug 2017 13:07:02 -0700 Subject: drm/i915/cnl: Add slice and subslice information to debugfs. A missing part to EU slice power gating is the debugfs interface. This patch actually should have been squashed to the initial EU slice power gating one. v2: Initial patch was merged without this part. Fixes: c7ae7e9ab207 ("drm/i915/cnl: Configure EU slice power gating.") Cc: Joonas Lahtinen Signed-off-by: Rodrigo Vivi Reviewed-by: Joonas Lahtinen Link: https://patchwork.freedesktop.org/patch/msgid/20170809200702.11236-1-rodrigo.vivi@intel.com (cherry picked from commit 7ea1adf30f82a4c0910524ac06f8f1f26281bb23) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/i915_debugfs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index 00d8967c8512..d1bd53b73738 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -4580,7 +4580,7 @@ static void gen9_sseu_device_status(struct drm_i915_private *dev_priv, sseu->slice_mask |= BIT(s); - if (IS_GEN9_BC(dev_priv)) + if (IS_GEN9_BC(dev_priv) || IS_CANNONLAKE(dev_priv)) sseu->subslice_mask = INTEL_INFO(dev_priv)->sseu.subslice_mask; -- cgit v1.2.3 From 7eceb9d04966435ed2d03f5554413715ab3cb34a Mon Sep 17 00:00:00 2001 From: Matthias Kaehlcke Date: Mon, 17 Jul 2017 12:58:54 -0700 Subject: drm/i915: Return correct EDP voltage swing table for 0.85V For 0.85V cnl_get_buf_trans_edp() returns the DP table, instead of EDP. Use the correct table. The error was pointed out by this clang warning: drivers/gpu/drm/i915/intel_ddi.c:392:39: warning: variable 'cnl_ddi_translations_edp_0_85V' is not needed and will not be emitted [-Wunneeded-internal-declaration] static const struct cnl_ddi_buf_trans cnl_ddi_translations_edp_0_85V[] = { Fixes: cf54ca8bc567 ("drm/i915/cnl: Implement voltage swing sequence.") Signed-off-by: Matthias Kaehlcke Reviewed-by: Manasi Navare Signed-off-by: Rodrigo Vivi Link: https://patchwork.freedesktop.org/patch/msgid/20170717195854.192139-1-mka@chromium.org (cherry picked from commit 50946c89850db13bd672c664aec6cf4551f71fe9) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/intel_ddi.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/intel_ddi.c b/drivers/gpu/drm/i915/intel_ddi.c index 9edeaaef77ad..d3b3252a8742 100644 --- a/drivers/gpu/drm/i915/intel_ddi.c +++ b/drivers/gpu/drm/i915/intel_ddi.c @@ -1762,7 +1762,7 @@ cnl_get_buf_trans_edp(struct drm_i915_private *dev_priv, if (dev_priv->vbt.edp.low_vswing) { if (voltage == VOLTAGE_INFO_0_85V) { *n_entries = ARRAY_SIZE(cnl_ddi_translations_edp_0_85V); - return cnl_ddi_translations_dp_0_85V; + return cnl_ddi_translations_edp_0_85V; } else if (voltage == VOLTAGE_INFO_0_95V) { *n_entries = ARRAY_SIZE(cnl_ddi_translations_edp_0_95V); return cnl_ddi_translations_edp_0_95V; -- cgit v1.2.3 From 430ffaf46c05bda56535893f38e684f5418c4c93 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Sat, 12 Aug 2017 16:27:24 +0100 Subject: drm/i915: Suppress switch_mm emission between the same aliasing_ppgtt When switching between contexts using the aliasing_ppgtt, the VM is shared. We don't need to reload the PD registers unless they are dirty. Martin Peres reported an issue that looks like corruption between Haswell context switches, bisecting to commit f9326be5f1d3 ("drm/i915: Rearrange switch_context to load the aliasing ppgtt on first use"). Switching between the same mm (the aliasing_ppgtt is used for all contexts in this case) should be a nop, but appears to trigger some side-effects in the context switch. However, as we know the switch is redundant in this case, we can skip it and continue to ignore the issue until somebody feels strong enough to investigate full-ppgtt on gen7 again! Except.. Martin was using full-ppgtt which is not supported as it doesn't work correctly yet. So whilst the bisect did yield valuable information about the failures, the fix should not have any user impact under default settings, with the exception of a slightly lower throughput on xcs as the VM would always be reloaded. v2: Also remember to set the legacy_active_context following the switch on xcs (commit e8a9c58fcd9a ("drm/i915: Unify active context tracking between legacy/execlists/guc")) Fixes: f9326be5f1d3 ("drm/i915: Rearrange switch_context to load the aliasing ppgtt on first use") Fixes: e8a9c58fcd9a ("drm/i915: Unify active context tracking between legacy/execlists/guc") Reported-by: Martin Peres Signed-off-by: Chris Wilson Cc: Martin Peres Reviewed-by: Joonas Lahtinen Link: https://patchwork.freedesktop.org/patch/msgid/20170812152724.6883-1-chris@chris-wilson.co.uk (cherry picked from commit 12124bea5b82dc1e917304aed703c27292270051) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/i915_gem_context.c | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c index 39ed58a21fc1..e1e971ee2ed5 100644 --- a/drivers/gpu/drm/i915/i915_gem_context.c +++ b/drivers/gpu/drm/i915/i915_gem_context.c @@ -688,19 +688,19 @@ static inline bool skip_rcs_switch(struct i915_hw_ppgtt *ppgtt, } static bool -needs_pd_load_pre(struct i915_hw_ppgtt *ppgtt, - struct intel_engine_cs *engine, - struct i915_gem_context *to) +needs_pd_load_pre(struct i915_hw_ppgtt *ppgtt, struct intel_engine_cs *engine) { + struct i915_gem_context *from = engine->legacy_active_context; + if (!ppgtt) return false; /* Always load the ppgtt on first use */ - if (!engine->legacy_active_context) + if (!from) return true; /* Same context without new entries, skip */ - if (engine->legacy_active_context == to && + if ((!from->ppgtt || from->ppgtt == ppgtt) && !(intel_engine_flag(engine) & ppgtt->pd_dirty_rings)) return false; @@ -744,7 +744,7 @@ static int do_rcs_switch(struct drm_i915_gem_request *req) if (skip_rcs_switch(ppgtt, engine, to)) return 0; - if (needs_pd_load_pre(ppgtt, engine, to)) { + if (needs_pd_load_pre(ppgtt, engine)) { /* Older GENs and non render rings still want the load first, * "PP_DCLV followed by PP_DIR_BASE register through Load * Register Immediate commands in Ring Buffer before submitting @@ -841,7 +841,7 @@ int i915_switch_context(struct drm_i915_gem_request *req) struct i915_hw_ppgtt *ppgtt = to->ppgtt ?: req->i915->mm.aliasing_ppgtt; - if (needs_pd_load_pre(ppgtt, engine, to)) { + if (needs_pd_load_pre(ppgtt, engine)) { int ret; trace_switch_mm(engine, to); @@ -852,6 +852,7 @@ int i915_switch_context(struct drm_i915_gem_request *req) ppgtt->pd_dirty_rings &= ~intel_engine_flag(engine); } + engine->legacy_active_context = to; return 0; } -- cgit v1.2.3 From 781cc76e0c2469cb7ac12ba238a4ea006978e321 Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Tue, 8 Aug 2017 10:08:26 +0200 Subject: drm/i915: Avoid the gpu reset vs. modeset deadlock ... using the biggest hammer we have. This is essentially a weaponized version of the timeout-based wedging Chris added in commit 36703e79a982c8ce5a8e43833291f2719e92d0d1 Author: Chris Wilson Date: Thu Jun 22 11:56:25 2017 +0100 drm/i915: Break modeset deadlocks on reset Because defense-in-depth is good it's good to still have both. Also note that with the locking change we can now restrict this a lot (old gpus and special testing only), so this doesn't kill the TDR benefits on at least anything remotely modern. And futuremore with a few tricks it should be possible to make a much more educated guess about whether an atomic commit is stuck waiting on the gpu (atomic_t counting the pending i915_sw_fence used by the atomic modeset code should do it), so we can improve this. But for now just start with something that is guaranteed to recover faster, for much better CI througput. This defacto reverts TDR on these platforms, but there's not really a single commit to specify as the sole offender. v2: Add a debug message to explain what's going on. We can't DRM_ERROR because that spams CI. And the timeout based fallback still prints a DRM_ERROR, in case something goes wrong. v3: Fix comment layout (Michel) Fixes: 4680816be336 ("drm/i915: Wait first for submission, before waiting for request completion") Fixes: 221fe7994554 ("drm/i915: Perform a direct reset of the GPU from the waiter") Cc: Chris Wilson Cc: Mika Kuoppala Cc: Joonas Lahtinen Cc: Tvrtko Ursulin (v2) Cc: Michel Thierry Reviewed-by: Tvrtko Ursulin (v2) Reviewed-by: Michel Thierry Signed-off-by: Daniel Vetter Link: https://patchwork.freedesktop.org/patch/msgid/20170808080828.23650-1-daniel.vetter@ffwll.ch (cherry picked from commit 97154ec242c14f646a3ab3b4da8f838d197f300d) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/intel_display.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 9471c88d449e..cc484b56eeaa 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -3485,6 +3485,13 @@ void intel_prepare_reset(struct drm_i915_private *dev_priv) !gpu_reset_clobbers_display(dev_priv)) return; + /* We have a modeset vs reset deadlock, defensively unbreak it. + * + * FIXME: We can do a _lot_ better, this is just a first iteration. + */ + i915_gem_set_wedged(dev_priv); + DRM_DEBUG_DRIVER("Wedging GPU to avoid deadlocks with pending modeset updates\n"); + /* * Need mode_config.mutex so that we don't * trample ongoing ->detect() and whatnot. -- cgit v1.2.3 From 2c87d63ac853550e734edfd45e1be5e5aa44fbcc Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Mon, 14 Aug 2017 00:52:58 +0200 Subject: ipv4: route: fix inet_rtm_getroute induced crash "ip route get $daddr iif eth0 from $saddr" causes: BUG: KASAN: use-after-free in ip_route_input_rcu+0x1535/0x1b50 Call Trace: ip_route_input_rcu+0x1535/0x1b50 ip_route_input_noref+0xf9/0x190 tcp_v4_early_demux+0x1a4/0x2b0 ip_rcv+0xbcb/0xc05 __netif_receive_skb+0x9c/0xd0 netif_receive_skb_internal+0x5a8/0x890 Problem is that inet_rtm_getroute calls either ip_route_input_rcu (if an iif was provided) or ip_route_output_key_hash_rcu. But ip_route_input_rcu, unlike ip_route_output_key_hash_rcu, already associates the dst_entry with the skb. This clears the SKB_DST_NOREF bit (i.e. skb_dst_drop will release/free the entry while it should not). Thus only set the dst if we called ip_route_output_key_hash_rcu(). I tested this patch by running: while true;do ip r get 10.0.1.2;done > /dev/null & while true;do ip r get 10.0.1.2 iif eth0 from 10.0.1.1;done > /dev/null & ... and saw no crash or memory leak. Cc: Roopa Prabhu Cc: David Ahern Fixes: ba52d61e0ff ("ipv4: route: restore skb_dst_set in inet_rtm_getroute") Signed-off-by: Florian Westphal Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/route.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 0383e66f59bc..7effa62beed3 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -2750,12 +2750,13 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh, err = 0; if (IS_ERR(rt)) err = PTR_ERR(rt); + else + skb_dst_set(skb, &rt->dst); } if (err) goto errout_free; - skb_dst_set(skb, &rt->dst); if (rtm->rtm_flags & RTM_F_NOTIFY) rt->rt_flags |= RTCF_NOTIFY; -- cgit v1.2.3 From fed5f5718c4989a03b1b4cdc0c7f273c3c74ee9e Mon Sep 17 00:00:00 2001 From: Jon Paul Maloy Date: Mon, 14 Aug 2017 17:55:56 +0200 Subject: tipc: accept PACKET_MULTICAST packets On L2 bearers, the TIPC broadcast function is sending out packets using the corresponding L2 broadcast address. At reception, we filter such packets under the assumption that they will also be delivered as broadcast packets. This assumption doesn't always hold true. Under high load, we have seen that a switch may convert the destination address and deliver the packet as a PACKET_MULTICAST, something leading to inadvertently dropped packets and a stale and reset broadcast link. We fix this by extending the reception filtering to accept packets of type PACKET_MULTICAST. Signed-off-by: Jon Maloy Signed-off-by: David S. Miller --- net/tipc/bearer.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/tipc/bearer.c b/net/tipc/bearer.c index d174ee3254ee..767e0537dde5 100644 --- a/net/tipc/bearer.c +++ b/net/tipc/bearer.c @@ -596,7 +596,7 @@ static int tipc_l2_rcv_msg(struct sk_buff *skb, struct net_device *dev, rcu_read_lock(); b = rcu_dereference_rtnl(dev->tipc_ptr); if (likely(b && test_bit(0, &b->up) && - (skb->pkt_type <= PACKET_BROADCAST))) { + (skb->pkt_type <= PACKET_MULTICAST))) { skb->next = NULL; tipc_rcv(dev_net(dev), skb, b); rcu_read_unlock(); -- cgit v1.2.3 From 59a361bc6f6e91d57f25ff0aebb0e646beb3b41d Mon Sep 17 00:00:00 2001 From: Jon Paul Maloy Date: Mon, 14 Aug 2017 18:28:49 +0200 Subject: tipc: avoid inheriting msg_non_seq flag when message is returned In the function msg_reverse(), we reverse the header while trying to reuse the original buffer whenever possible. Those rejected/returned messages are always transmitted as unicast, but the msg_non_seq field is not explicitly set to zero as it should be. We have seen cases where multicast senders set the message type to "NOT dest_droppable", meaning that a multicast message shorter than one MTU will be returned, e.g., during receive buffer overflow, by reusing the original buffer. This has the effect that even the 'msg_non_seq' field is inadvertently inherited by the rejected message, although it is now sent as a unicast message. This again leads the receiving unicast link endpoint to steer the packet toward the broadcast link receive function, where it is dropped. The affected unicast link is thereafter (after 100 failed retransmissions) declared 'stale' and reset. We fix this by unconditionally setting the 'msg_non_seq' flag to zero for all rejected/returned messages. Reported-by: Canh Duc Luu Signed-off-by: Jon Maloy Signed-off-by: David S. Miller --- net/tipc/msg.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/tipc/msg.c b/net/tipc/msg.c index ab3087687a32..dcd90e6fa7c3 100644 --- a/net/tipc/msg.c +++ b/net/tipc/msg.c @@ -513,6 +513,7 @@ bool tipc_msg_reverse(u32 own_node, struct sk_buff **skb, int err) /* Now reverse the concerned fields */ msg_set_errcode(hdr, err); + msg_set_non_seq(hdr, 0); msg_set_origport(hdr, msg_destport(&ohdr)); msg_set_destport(hdr, msg_origport(&ohdr)); msg_set_destnode(hdr, msg_prevnode(&ohdr)); -- cgit v1.2.3 From 1874064eed0502bd9bef7be8023757b0c4f26883 Mon Sep 17 00:00:00 2001 From: Kai-Heng Feng Date: Mon, 14 Aug 2017 20:11:26 -0700 Subject: Input: elan_i2c - add ELAN0608 to the ACPI table Similar to commit 722c5ac708b4f ("Input: elan_i2c - add ELAN0605 to the ACPI table"), ELAN0608 should be handled by elan_i2c. This touchpad can be found in Lenovo ideapad 320-14IKB. BugLink: https://bugs.launchpad.net/bugs/1708852 Signed-off-by: Kai-Heng Feng Cc: stable@vger.kernel.org Signed-off-by: Dmitry Torokhov --- drivers/input/mouse/elan_i2c_core.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/input/mouse/elan_i2c_core.c b/drivers/input/mouse/elan_i2c_core.c index 3b616cb7c67f..9fe3908d12d5 100644 --- a/drivers/input/mouse/elan_i2c_core.c +++ b/drivers/input/mouse/elan_i2c_core.c @@ -1248,6 +1248,7 @@ static const struct acpi_device_id elan_acpi_id[] = { { "ELAN0100", 0 }, { "ELAN0600", 0 }, { "ELAN0605", 0 }, + { "ELAN0608", 0 }, { "ELAN1000", 0 }, { } }; -- cgit v1.2.3 From 76988690402dde2880bfe06ecccf381d48ba8e1c Mon Sep 17 00:00:00 2001 From: KT Liao Date: Mon, 14 Aug 2017 20:11:59 -0700 Subject: Input: elan_i2c - Add antoher Lenovo ACPI ID for upcoming Lenovo NB Add 2 new IDs (ELAN0609 and ELAN060B) to the list of ACPI IDs that should be handled by the driver. Signed-off-by: KT Liao Cc: stable@vger.kernel.org Signed-off-by: Dmitry Torokhov --- drivers/input/mouse/elan_i2c_core.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/input/mouse/elan_i2c_core.c b/drivers/input/mouse/elan_i2c_core.c index 9fe3908d12d5..714cf7f9b138 100644 --- a/drivers/input/mouse/elan_i2c_core.c +++ b/drivers/input/mouse/elan_i2c_core.c @@ -1249,6 +1249,9 @@ static const struct acpi_device_id elan_acpi_id[] = { { "ELAN0600", 0 }, { "ELAN0605", 0 }, { "ELAN0608", 0 }, + { "ELAN0605", 0 }, + { "ELAN0609", 0 }, + { "ELAN060B", 0 }, { "ELAN1000", 0 }, { } }; -- cgit v1.2.3 From a99b646afa8a02571ea298bedca6592d818229cd Mon Sep 17 00:00:00 2001 From: dingtianhong Date: Tue, 15 Aug 2017 11:23:23 +0800 Subject: PCI: Disable PCIe Relaxed Ordering if unsupported When bit4 is set in the PCIe Device Control register, it indicates whether the device is permitted to use relaxed ordering. On some platforms using relaxed ordering can have performance issues or due to erratum can cause data-corruption. In such cases devices must avoid using relaxed ordering. The patch adds a new flag PCI_DEV_FLAGS_NO_RELAXED_ORDERING to indicate that Relaxed Ordering (RO) attribute should not be used for Transaction Layer Packets (TLP) targeted towards these affected root complexes. This patch checks if there is any node in the hierarchy that indicates that using relaxed ordering is not safe. In such cases the patch turns off the relaxed ordering by clearing the capability for this device. Signed-off-by: Casey Leedom Signed-off-by: Ding Tianhong Acked-by: Ashok Raj Acked-by: Alexander Duyck Acked-by: Casey Leedom Signed-off-by: David S. Miller --- drivers/pci/probe.c | 43 +++++++++++++++++++++++++++++++++++++++++++ drivers/pci/quirks.c | 11 +++++++++++ include/linux/pci.h | 3 +++ 3 files changed, 57 insertions(+) diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c index c31310db0404..e6a917b4acd3 100644 --- a/drivers/pci/probe.c +++ b/drivers/pci/probe.c @@ -1762,6 +1762,48 @@ static void pci_configure_extended_tags(struct pci_dev *dev) PCI_EXP_DEVCTL_EXT_TAG); } +/** + * pcie_relaxed_ordering_enabled - Probe for PCIe relaxed ordering enable + * @dev: PCI device to query + * + * Returns true if the device has enabled relaxed ordering attribute. + */ +bool pcie_relaxed_ordering_enabled(struct pci_dev *dev) +{ + u16 v; + + pcie_capability_read_word(dev, PCI_EXP_DEVCTL, &v); + + return !!(v & PCI_EXP_DEVCTL_RELAX_EN); +} +EXPORT_SYMBOL(pcie_relaxed_ordering_enabled); + +static void pci_configure_relaxed_ordering(struct pci_dev *dev) +{ + struct pci_dev *root; + + /* PCI_EXP_DEVICE_RELAX_EN is RsvdP in VFs */ + if (dev->is_virtfn) + return; + + if (!pcie_relaxed_ordering_enabled(dev)) + return; + + /* + * For now, we only deal with Relaxed Ordering issues with Root + * Ports. Peer-to-Peer DMA is another can of worms. + */ + root = pci_find_pcie_root_port(dev); + if (!root) + return; + + if (root->dev_flags & PCI_DEV_FLAGS_NO_RELAXED_ORDERING) { + pcie_capability_clear_word(dev, PCI_EXP_DEVCTL, + PCI_EXP_DEVCTL_RELAX_EN); + dev_info(&dev->dev, "Disable Relaxed Ordering because the Root Port didn't support it\n"); + } +} + static void pci_configure_device(struct pci_dev *dev) { struct hotplug_params hpp; @@ -1769,6 +1811,7 @@ static void pci_configure_device(struct pci_dev *dev) pci_configure_mps(dev); pci_configure_extended_tags(dev); + pci_configure_relaxed_ordering(dev); memset(&hpp, 0, sizeof(hpp)); ret = pci_get_hp_params(dev, &hpp); diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c index 6967c6b4cf6b..61b59bfa7bfc 100644 --- a/drivers/pci/quirks.c +++ b/drivers/pci/quirks.c @@ -4015,6 +4015,17 @@ DECLARE_PCI_FIXUP_CLASS_EARLY(0x1797, 0x6868, PCI_CLASS_NOT_DEFINED, 8, DECLARE_PCI_FIXUP_CLASS_EARLY(0x1797, 0x6869, PCI_CLASS_NOT_DEFINED, 8, quirk_tw686x_class); +/* + * Some devices have problems with Transaction Layer Packets with the Relaxed + * Ordering Attribute set. Such devices should mark themselves and other + * Device Drivers should check before sending TLPs with RO set. + */ +static void quirk_relaxedordering_disable(struct pci_dev *dev) +{ + dev->dev_flags |= PCI_DEV_FLAGS_NO_RELAXED_ORDERING; + dev_info(&dev->dev, "Disable Relaxed Ordering Attributes to avoid PCIe Completion erratum\n"); +} + /* * Per PCIe r3.0, sec 2.2.9, "Completion headers must supply the same * values for the Attribute as were supplied in the header of the diff --git a/include/linux/pci.h b/include/linux/pci.h index 4869e66dd659..29606fb89464 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -188,6 +188,8 @@ enum pci_dev_flags { * the direct_complete optimization. */ PCI_DEV_FLAGS_NEEDS_RESUME = (__force pci_dev_flags_t) (1 << 11), + /* Don't use Relaxed Ordering for TLPs directed at this device */ + PCI_DEV_FLAGS_NO_RELAXED_ORDERING = (__force pci_dev_flags_t) (1 << 12), }; enum pci_irq_reroute_variant { @@ -1125,6 +1127,7 @@ bool pci_check_pme_status(struct pci_dev *dev); void pci_pme_wakeup_bus(struct pci_bus *bus); void pci_d3cold_enable(struct pci_dev *dev); void pci_d3cold_disable(struct pci_dev *dev); +bool pcie_relaxed_ordering_enabled(struct pci_dev *dev); /* PCI Virtual Channel */ int pci_save_vc_state(struct pci_dev *dev); -- cgit v1.2.3 From 87e09cdec4dae08acdb4aa49beb793c19d73e73e Mon Sep 17 00:00:00 2001 From: dingtianhong Date: Tue, 15 Aug 2017 11:23:24 +0800 Subject: PCI: Disable Relaxed Ordering for some Intel processors According to the Intel spec section 3.9.1 said: 3.9.1 Optimizing PCIe Performance for Accesses Toward Coherent Memory and Toward MMIO Regions (P2P) In order to maximize performance for PCIe devices in the processors listed in Table 3-6 below, the soft- ware should determine whether the accesses are toward coherent memory (system memory) or toward MMIO regions (P2P access to other devices). If the access is toward MMIO region, then software can command HW to set the RO bit in the TLP header, as this would allow hardware to achieve maximum throughput for these types of accesses. For accesses toward coherent memory, software can command HW to clear the RO bit in the TLP header (no RO), as this would allow hardware to achieve maximum throughput for these types of accesses. Table 3-6. Intel Processor CPU RP Device IDs for Processors Optimizing PCIe Performance Processor CPU RP Device IDs Intel Xeon processors based on 6F01H-6F0EH Broadwell microarchitecture Intel Xeon processors based on 2F01H-2F0EH Haswell microarchitecture It means some Intel processors has performance issue when use the Relaxed Ordering Attribute, so disable Relaxed Ordering for these root port. Signed-off-by: Casey Leedom Signed-off-by: Ding Tianhong Acked-by: Alexander Duyck Acked-by: Ashok Raj Signed-off-by: David S. Miller --- drivers/pci/quirks.c | 62 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 62 insertions(+) diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c index 61b59bfa7bfc..1272f7e65699 100644 --- a/drivers/pci/quirks.c +++ b/drivers/pci/quirks.c @@ -4026,6 +4026,68 @@ static void quirk_relaxedordering_disable(struct pci_dev *dev) dev_info(&dev->dev, "Disable Relaxed Ordering Attributes to avoid PCIe Completion erratum\n"); } +/* + * Intel Xeon processors based on Broadwell/Haswell microarchitecture Root + * Complex has a Flow Control Credit issue which can cause performance + * problems with Upstream Transaction Layer Packets with Relaxed Ordering set. + */ +DECLARE_PCI_FIXUP_CLASS_EARLY(PCI_VENDOR_ID_INTEL, 0x6f01, PCI_CLASS_NOT_DEFINED, 8, + quirk_relaxedordering_disable); +DECLARE_PCI_FIXUP_CLASS_EARLY(PCI_VENDOR_ID_INTEL, 0x6f02, PCI_CLASS_NOT_DEFINED, 8, + quirk_relaxedordering_disable); +DECLARE_PCI_FIXUP_CLASS_EARLY(PCI_VENDOR_ID_INTEL, 0x6f03, PCI_CLASS_NOT_DEFINED, 8, + quirk_relaxedordering_disable); +DECLARE_PCI_FIXUP_CLASS_EARLY(PCI_VENDOR_ID_INTEL, 0x6f04, PCI_CLASS_NOT_DEFINED, 8, + quirk_relaxedordering_disable); +DECLARE_PCI_FIXUP_CLASS_EARLY(PCI_VENDOR_ID_INTEL, 0x6f05, PCI_CLASS_NOT_DEFINED, 8, + quirk_relaxedordering_disable); +DECLARE_PCI_FIXUP_CLASS_EARLY(PCI_VENDOR_ID_INTEL, 0x6f06, PCI_CLASS_NOT_DEFINED, 8, + quirk_relaxedordering_disable); +DECLARE_PCI_FIXUP_CLASS_EARLY(PCI_VENDOR_ID_INTEL, 0x6f07, PCI_CLASS_NOT_DEFINED, 8, + quirk_relaxedordering_disable); +DECLARE_PCI_FIXUP_CLASS_EARLY(PCI_VENDOR_ID_INTEL, 0x6f08, PCI_CLASS_NOT_DEFINED, 8, + quirk_relaxedordering_disable); +DECLARE_PCI_FIXUP_CLASS_EARLY(PCI_VENDOR_ID_INTEL, 0x6f09, PCI_CLASS_NOT_DEFINED, 8, + quirk_relaxedordering_disable); +DECLARE_PCI_FIXUP_CLASS_EARLY(PCI_VENDOR_ID_INTEL, 0x6f0a, PCI_CLASS_NOT_DEFINED, 8, + quirk_relaxedordering_disable); +DECLARE_PCI_FIXUP_CLASS_EARLY(PCI_VENDOR_ID_INTEL, 0x6f0b, PCI_CLASS_NOT_DEFINED, 8, + quirk_relaxedordering_disable); +DECLARE_PCI_FIXUP_CLASS_EARLY(PCI_VENDOR_ID_INTEL, 0x6f0c, PCI_CLASS_NOT_DEFINED, 8, + quirk_relaxedordering_disable); +DECLARE_PCI_FIXUP_CLASS_EARLY(PCI_VENDOR_ID_INTEL, 0x6f0d, PCI_CLASS_NOT_DEFINED, 8, + quirk_relaxedordering_disable); +DECLARE_PCI_FIXUP_CLASS_EARLY(PCI_VENDOR_ID_INTEL, 0x6f0e, PCI_CLASS_NOT_DEFINED, 8, + quirk_relaxedordering_disable); +DECLARE_PCI_FIXUP_CLASS_EARLY(PCI_VENDOR_ID_INTEL, 0x2f01, PCI_CLASS_NOT_DEFINED, 8, + quirk_relaxedordering_disable); +DECLARE_PCI_FIXUP_CLASS_EARLY(PCI_VENDOR_ID_INTEL, 0x2f02, PCI_CLASS_NOT_DEFINED, 8, + quirk_relaxedordering_disable); +DECLARE_PCI_FIXUP_CLASS_EARLY(PCI_VENDOR_ID_INTEL, 0x2f03, PCI_CLASS_NOT_DEFINED, 8, + quirk_relaxedordering_disable); +DECLARE_PCI_FIXUP_CLASS_EARLY(PCI_VENDOR_ID_INTEL, 0x2f04, PCI_CLASS_NOT_DEFINED, 8, + quirk_relaxedordering_disable); +DECLARE_PCI_FIXUP_CLASS_EARLY(PCI_VENDOR_ID_INTEL, 0x2f05, PCI_CLASS_NOT_DEFINED, 8, + quirk_relaxedordering_disable); +DECLARE_PCI_FIXUP_CLASS_EARLY(PCI_VENDOR_ID_INTEL, 0x2f06, PCI_CLASS_NOT_DEFINED, 8, + quirk_relaxedordering_disable); +DECLARE_PCI_FIXUP_CLASS_EARLY(PCI_VENDOR_ID_INTEL, 0x2f07, PCI_CLASS_NOT_DEFINED, 8, + quirk_relaxedordering_disable); +DECLARE_PCI_FIXUP_CLASS_EARLY(PCI_VENDOR_ID_INTEL, 0x2f08, PCI_CLASS_NOT_DEFINED, 8, + quirk_relaxedordering_disable); +DECLARE_PCI_FIXUP_CLASS_EARLY(PCI_VENDOR_ID_INTEL, 0x2f09, PCI_CLASS_NOT_DEFINED, 8, + quirk_relaxedordering_disable); +DECLARE_PCI_FIXUP_CLASS_EARLY(PCI_VENDOR_ID_INTEL, 0x2f0a, PCI_CLASS_NOT_DEFINED, 8, + quirk_relaxedordering_disable); +DECLARE_PCI_FIXUP_CLASS_EARLY(PCI_VENDOR_ID_INTEL, 0x2f0b, PCI_CLASS_NOT_DEFINED, 8, + quirk_relaxedordering_disable); +DECLARE_PCI_FIXUP_CLASS_EARLY(PCI_VENDOR_ID_INTEL, 0x2f0c, PCI_CLASS_NOT_DEFINED, 8, + quirk_relaxedordering_disable); +DECLARE_PCI_FIXUP_CLASS_EARLY(PCI_VENDOR_ID_INTEL, 0x2f0d, PCI_CLASS_NOT_DEFINED, 8, + quirk_relaxedordering_disable); +DECLARE_PCI_FIXUP_CLASS_EARLY(PCI_VENDOR_ID_INTEL, 0x2f0e, PCI_CLASS_NOT_DEFINED, 8, + quirk_relaxedordering_disable); + /* * Per PCIe r3.0, sec 2.2.9, "Completion headers must supply the same * values for the Attribute as were supplied in the header of the -- cgit v1.2.3 From 077fa19c5dfa06a6ae04fb1661680940ff837612 Mon Sep 17 00:00:00 2001 From: dingtianhong Date: Tue, 15 Aug 2017 11:23:25 +0800 Subject: PCI: Disable Relaxed Ordering Attributes for AMD A1100 Casey reported that the AMD ARM A1100 SoC has a bug in its PCIe Root Port where Upstream Transaction Layer Packets with the Relaxed Ordering Attribute clear are allowed to bypass earlier TLPs with Relaxed Ordering set, it would cause Data Corruption, so we need to disable Relaxed Ordering Attribute when Upstream TLPs to the Root Port. Reported-and-suggested-by: Casey Leedom Signed-off-by: Casey Leedom Signed-off-by: Ding Tianhong Acked-by: Casey Leedom Signed-off-by: David S. Miller --- drivers/pci/quirks.c | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c index 1272f7e65699..140760403f36 100644 --- a/drivers/pci/quirks.c +++ b/drivers/pci/quirks.c @@ -4088,6 +4088,22 @@ DECLARE_PCI_FIXUP_CLASS_EARLY(PCI_VENDOR_ID_INTEL, 0x2f0d, PCI_CLASS_NOT_DEFINED DECLARE_PCI_FIXUP_CLASS_EARLY(PCI_VENDOR_ID_INTEL, 0x2f0e, PCI_CLASS_NOT_DEFINED, 8, quirk_relaxedordering_disable); +/* + * The AMD ARM A1100 (AKA "SEATTLE") SoC has a bug in its PCIe Root Complex + * where Upstream Transaction Layer Packets with the Relaxed Ordering + * Attribute clear are allowed to bypass earlier TLPs with Relaxed Ordering + * set. This is a violation of the PCIe 3.0 Transaction Ordering Rules + * outlined in Section 2.4.1 (PCI Express(r) Base Specification Revision 3.0 + * November 10, 2010). As a result, on this platform we can't use Relaxed + * Ordering for Upstream TLPs. + */ +DECLARE_PCI_FIXUP_CLASS_EARLY(PCI_VENDOR_ID_AMD, 0x1a00, PCI_CLASS_NOT_DEFINED, 8, + quirk_relaxedordering_disable); +DECLARE_PCI_FIXUP_CLASS_EARLY(PCI_VENDOR_ID_AMD, 0x1a01, PCI_CLASS_NOT_DEFINED, 8, + quirk_relaxedordering_disable); +DECLARE_PCI_FIXUP_CLASS_EARLY(PCI_VENDOR_ID_AMD, 0x1a02, PCI_CLASS_NOT_DEFINED, 8, + quirk_relaxedordering_disable); + /* * Per PCIe r3.0, sec 2.2.9, "Completion headers must supply the same * values for the Attribute as were supplied in the header of the -- cgit v1.2.3 From b0ba9d5fded9590cac67a482c5aab8b1bf86ee40 Mon Sep 17 00:00:00 2001 From: Casey Leedom Date: Tue, 15 Aug 2017 11:23:26 +0800 Subject: net/cxgb4: Use new PCI_DEV_FLAGS_NO_RELAXED_ORDERING flag cxgb4 Ethernet driver now queries PCIe configuration space to determine if it can send TLPs to it with the Relaxed Ordering Attribute set. Remove the enable_pcie_relaxed_ordering() to avoid enable PCIe Capability Device Control[Relaxed Ordering Enable] at probe routine, to make sure the driver will not send the Relaxed Ordering TLPs to the Root Complex which could not deal the Relaxed Ordering TLPs. Signed-off-by: Casey Leedom Signed-off-by: Ding Tianhong Reviewed-by: Casey Leedom Signed-off-by: David S. Miller --- drivers/net/ethernet/chelsio/cxgb4/cxgb4.h | 1 + drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c | 23 +++++++++++++++++------ drivers/net/ethernet/chelsio/cxgb4/sge.c | 5 +++-- 3 files changed, 21 insertions(+), 8 deletions(-) diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h index ef4be781fd05..09ea62ee96d3 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h @@ -529,6 +529,7 @@ enum { /* adapter flags */ USING_SOFT_PARAMS = (1 << 6), MASTER_PF = (1 << 7), FW_OFLD_CONN = (1 << 9), + ROOT_NO_RELAXED_ORDERING = (1 << 10), }; enum { diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c index e403fa18f1b1..33bb8678833a 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c @@ -4654,11 +4654,6 @@ static void print_port_info(const struct net_device *dev) dev->name, adap->params.vpd.id, adap->name, buf); } -static void enable_pcie_relaxed_ordering(struct pci_dev *dev) -{ - pcie_capability_set_word(dev, PCI_EXP_DEVCTL, PCI_EXP_DEVCTL_RELAX_EN); -} - /* * Free the following resources: * - memory used for tables @@ -4908,7 +4903,6 @@ static int init_one(struct pci_dev *pdev, const struct pci_device_id *ent) } pci_enable_pcie_error_reporting(pdev); - enable_pcie_relaxed_ordering(pdev); pci_set_master(pdev); pci_save_state(pdev); @@ -4947,6 +4941,23 @@ static int init_one(struct pci_dev *pdev, const struct pci_device_id *ent) adapter->msg_enable = DFLT_MSG_ENABLE; memset(adapter->chan_map, 0xff, sizeof(adapter->chan_map)); + /* If possible, we use PCIe Relaxed Ordering Attribute to deliver + * Ingress Packet Data to Free List Buffers in order to allow for + * chipset performance optimizations between the Root Complex and + * Memory Controllers. (Messages to the associated Ingress Queue + * notifying new Packet Placement in the Free Lists Buffers will be + * send without the Relaxed Ordering Attribute thus guaranteeing that + * all preceding PCIe Transaction Layer Packets will be processed + * first.) But some Root Complexes have various issues with Upstream + * Transaction Layer Packets with the Relaxed Ordering Attribute set. + * The PCIe devices which under the Root Complexes will be cleared the + * Relaxed Ordering bit in the configuration space, So we check our + * PCIe configuration space to see if it's flagged with advice against + * using Relaxed Ordering. + */ + if (!pcie_relaxed_ordering_enabled(pdev)) + adapter->flags |= ROOT_NO_RELAXED_ORDERING; + spin_lock_init(&adapter->stats_lock); spin_lock_init(&adapter->tid_release_lock); spin_lock_init(&adapter->win0_lock); diff --git a/drivers/net/ethernet/chelsio/cxgb4/sge.c b/drivers/net/ethernet/chelsio/cxgb4/sge.c index ede12209f20b..4ef68f69b58c 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/sge.c +++ b/drivers/net/ethernet/chelsio/cxgb4/sge.c @@ -2719,6 +2719,7 @@ int t4_sge_alloc_rxq(struct adapter *adap, struct sge_rspq *iq, bool fwevtq, struct fw_iq_cmd c; struct sge *s = &adap->sge; struct port_info *pi = netdev_priv(dev); + int relaxed = !(adap->flags & ROOT_NO_RELAXED_ORDERING); /* Size needs to be multiple of 16, including status entry. */ iq->size = roundup(iq->size, 16); @@ -2772,8 +2773,8 @@ int t4_sge_alloc_rxq(struct adapter *adap, struct sge_rspq *iq, bool fwevtq, flsz = fl->size / 8 + s->stat_len / sizeof(struct tx_desc); c.iqns_to_fl0congen |= htonl(FW_IQ_CMD_FL0PACKEN_F | - FW_IQ_CMD_FL0FETCHRO_F | - FW_IQ_CMD_FL0DATARO_F | + FW_IQ_CMD_FL0FETCHRO_V(relaxed) | + FW_IQ_CMD_FL0DATARO_V(relaxed) | FW_IQ_CMD_FL0PADEN_F); if (cong >= 0) c.iqns_to_fl0congen |= -- cgit v1.2.3 From b629276df7f669b1daaad2131ca418ab55186565 Mon Sep 17 00:00:00 2001 From: Casey Leedom Date: Tue, 15 Aug 2017 11:23:27 +0800 Subject: net/cxgb4vf: Use new PCI_DEV_FLAGS_NO_RELAXED_ORDERING flag cxgb4vf Ethernet driver now queries PCIe configuration space to determine if it can send TLPs to it with the Relaxed Ordering Attribute set, just like the pf did. Signed-off-by: Casey Leedom Signed-off-by: Ding Tianhong Reviewed-by: Casey Leedom Signed-off-by: David S. Miller --- drivers/net/ethernet/chelsio/cxgb4vf/adapter.h | 1 + drivers/net/ethernet/chelsio/cxgb4vf/cxgb4vf_main.c | 18 ++++++++++++++++++ drivers/net/ethernet/chelsio/cxgb4vf/sge.c | 3 +++ 3 files changed, 22 insertions(+) diff --git a/drivers/net/ethernet/chelsio/cxgb4vf/adapter.h b/drivers/net/ethernet/chelsio/cxgb4vf/adapter.h index 109bc630408b..08c6ddb84a04 100644 --- a/drivers/net/ethernet/chelsio/cxgb4vf/adapter.h +++ b/drivers/net/ethernet/chelsio/cxgb4vf/adapter.h @@ -408,6 +408,7 @@ enum { /* adapter flags */ USING_MSI = (1UL << 1), USING_MSIX = (1UL << 2), QUEUES_BOUND = (1UL << 3), + ROOT_NO_RELAXED_ORDERING = (1UL << 4), }; /* diff --git a/drivers/net/ethernet/chelsio/cxgb4vf/cxgb4vf_main.c b/drivers/net/ethernet/chelsio/cxgb4vf/cxgb4vf_main.c index ac7a150c54e9..2b85b874fd0d 100644 --- a/drivers/net/ethernet/chelsio/cxgb4vf/cxgb4vf_main.c +++ b/drivers/net/ethernet/chelsio/cxgb4vf/cxgb4vf_main.c @@ -2888,6 +2888,24 @@ static int cxgb4vf_pci_probe(struct pci_dev *pdev, */ adapter->name = pci_name(pdev); adapter->msg_enable = DFLT_MSG_ENABLE; + + /* If possible, we use PCIe Relaxed Ordering Attribute to deliver + * Ingress Packet Data to Free List Buffers in order to allow for + * chipset performance optimizations between the Root Complex and + * Memory Controllers. (Messages to the associated Ingress Queue + * notifying new Packet Placement in the Free Lists Buffers will be + * send without the Relaxed Ordering Attribute thus guaranteeing that + * all preceding PCIe Transaction Layer Packets will be processed + * first.) But some Root Complexes have various issues with Upstream + * Transaction Layer Packets with the Relaxed Ordering Attribute set. + * The PCIe devices which under the Root Complexes will be cleared the + * Relaxed Ordering bit in the configuration space, So we check our + * PCIe configuration space to see if it's flagged with advice against + * using Relaxed Ordering. + */ + if (!pcie_relaxed_ordering_enabled(pdev)) + adapter->flags |= ROOT_NO_RELAXED_ORDERING; + err = adap_init0(adapter); if (err) goto err_unmap_bar; diff --git a/drivers/net/ethernet/chelsio/cxgb4vf/sge.c b/drivers/net/ethernet/chelsio/cxgb4vf/sge.c index e37dde2ba97f..05498e7f2840 100644 --- a/drivers/net/ethernet/chelsio/cxgb4vf/sge.c +++ b/drivers/net/ethernet/chelsio/cxgb4vf/sge.c @@ -2205,6 +2205,7 @@ int t4vf_sge_alloc_rxq(struct adapter *adapter, struct sge_rspq *rspq, struct port_info *pi = netdev_priv(dev); struct fw_iq_cmd cmd, rpl; int ret, iqandst, flsz = 0; + int relaxed = !(adapter->flags & ROOT_NO_RELAXED_ORDERING); /* * If we're using MSI interrupts and we're not initializing the @@ -2300,6 +2301,8 @@ int t4vf_sge_alloc_rxq(struct adapter *adapter, struct sge_rspq *rspq, cpu_to_be32( FW_IQ_CMD_FL0HOSTFCMODE_V(SGE_HOSTFCMODE_NONE) | FW_IQ_CMD_FL0PACKEN_F | + FW_IQ_CMD_FL0FETCHRO_V(relaxed) | + FW_IQ_CMD_FL0DATARO_V(relaxed) | FW_IQ_CMD_FL0PADEN_F); /* In T6, for egress queue type FL there is internal overhead -- cgit v1.2.3 From 539a06baedd06127389b254f6b9f016ca072da13 Mon Sep 17 00:00:00 2001 From: Sabrina Dubroca Date: Mon, 14 Aug 2017 18:04:24 +0200 Subject: tcp: ulp: avoid module refcnt leak in tcp_set_ulp __tcp_ulp_find_autoload returns tcp_ulp_ops after taking a reference on the module. Then, if ->init fails, tcp_set_ulp propagates the error but nothing releases that reference. Fixes: 734942cc4ea6 ("tcp: ULP infrastructure") Signed-off-by: Sabrina Dubroca Signed-off-by: David S. Miller --- net/ipv4/tcp_ulp.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/net/ipv4/tcp_ulp.c b/net/ipv4/tcp_ulp.c index 2417f55374c5..6bb9e14c710a 100644 --- a/net/ipv4/tcp_ulp.c +++ b/net/ipv4/tcp_ulp.c @@ -122,14 +122,14 @@ int tcp_set_ulp(struct sock *sk, const char *name) ulp_ops = __tcp_ulp_find_autoload(name); if (!ulp_ops) - err = -ENOENT; - else - err = ulp_ops->init(sk); + return -ENOENT; - if (err) - goto out; + err = ulp_ops->init(sk); + if (err) { + module_put(ulp_ops->owner); + return err; + } icsk->icsk_ulp_ops = ulp_ops; - out: - return err; + return 0; } -- cgit v1.2.3 From 36f41f8fc6d8aa9f8c9072d66ff7cf9055f5e69b Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 14 Aug 2017 10:16:45 -0700 Subject: af_key: do not use GFP_KERNEL in atomic contexts pfkey_broadcast() might be called from non process contexts, we can not use GFP_KERNEL in these cases [1]. This patch partially reverts commit ba51b6be38c1 ("net: Fix RCU splat in af_key"), only keeping the GFP_ATOMIC forcing under rcu_read_lock() section. [1] : syzkaller reported : in_atomic(): 1, irqs_disabled(): 0, pid: 2932, name: syzkaller183439 3 locks held by syzkaller183439/2932: #0: (&net->xfrm.xfrm_cfg_mutex){+.+.+.}, at: [] pfkey_sendmsg+0x4c8/0x9f0 net/key/af_key.c:3649 #1: (&pfk->dump_lock){+.+.+.}, at: [] pfkey_do_dump+0x76/0x3f0 net/key/af_key.c:293 #2: (&(&net->xfrm.xfrm_policy_lock)->rlock){+...+.}, at: [] spin_lock_bh include/linux/spinlock.h:304 [inline] #2: (&(&net->xfrm.xfrm_policy_lock)->rlock){+...+.}, at: [] xfrm_policy_walk+0x192/0xa30 net/xfrm/xfrm_policy.c:1028 CPU: 0 PID: 2932 Comm: syzkaller183439 Not tainted 4.13.0-rc4+ #24 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Call Trace: __dump_stack lib/dump_stack.c:16 [inline] dump_stack+0x194/0x257 lib/dump_stack.c:52 ___might_sleep+0x2b2/0x470 kernel/sched/core.c:5994 __might_sleep+0x95/0x190 kernel/sched/core.c:5947 slab_pre_alloc_hook mm/slab.h:416 [inline] slab_alloc mm/slab.c:3383 [inline] kmem_cache_alloc+0x24b/0x6e0 mm/slab.c:3559 skb_clone+0x1a0/0x400 net/core/skbuff.c:1037 pfkey_broadcast_one+0x4b2/0x6f0 net/key/af_key.c:207 pfkey_broadcast+0x4ba/0x770 net/key/af_key.c:281 dump_sp+0x3d6/0x500 net/key/af_key.c:2685 xfrm_policy_walk+0x2f1/0xa30 net/xfrm/xfrm_policy.c:1042 pfkey_dump_sp+0x42/0x50 net/key/af_key.c:2695 pfkey_do_dump+0xaa/0x3f0 net/key/af_key.c:299 pfkey_spddump+0x1a0/0x210 net/key/af_key.c:2722 pfkey_process+0x606/0x710 net/key/af_key.c:2814 pfkey_sendmsg+0x4d6/0x9f0 net/key/af_key.c:3650 sock_sendmsg_nosec net/socket.c:633 [inline] sock_sendmsg+0xca/0x110 net/socket.c:643 ___sys_sendmsg+0x755/0x890 net/socket.c:2035 __sys_sendmsg+0xe5/0x210 net/socket.c:2069 SYSC_sendmsg net/socket.c:2080 [inline] SyS_sendmsg+0x2d/0x50 net/socket.c:2076 entry_SYSCALL_64_fastpath+0x1f/0xbe RIP: 0033:0x445d79 RSP: 002b:00007f32447c1dc8 EFLAGS: 00000202 ORIG_RAX: 000000000000002e RAX: ffffffffffffffda RBX: 0000000000000000 RCX: 0000000000445d79 RDX: 0000000000000000 RSI: 000000002023dfc8 RDI: 0000000000000008 RBP: 0000000000000086 R08: 00007f32447c2700 R09: 00007f32447c2700 R10: 00007f32447c2700 R11: 0000000000000202 R12: 0000000000000000 R13: 00007ffe33edec4f R14: 00007f32447c29c0 R15: 0000000000000000 Fixes: ba51b6be38c1 ("net: Fix RCU splat in af_key") Signed-off-by: Eric Dumazet Reported-by: Dmitry Vyukov Cc: David Ahern Acked-by: David Ahern Signed-off-by: David S. Miller --- net/key/af_key.c | 48 ++++++++++++++++++++++++++---------------------- 1 file changed, 26 insertions(+), 22 deletions(-) diff --git a/net/key/af_key.c b/net/key/af_key.c index ca9d3ae665e7..98f4d8211b9a 100644 --- a/net/key/af_key.c +++ b/net/key/af_key.c @@ -228,7 +228,7 @@ static int pfkey_broadcast_one(struct sk_buff *skb, struct sk_buff **skb2, #define BROADCAST_ONE 1 #define BROADCAST_REGISTERED 2 #define BROADCAST_PROMISC_ONLY 4 -static int pfkey_broadcast(struct sk_buff *skb, +static int pfkey_broadcast(struct sk_buff *skb, gfp_t allocation, int broadcast_flags, struct sock *one_sk, struct net *net) { @@ -278,7 +278,7 @@ static int pfkey_broadcast(struct sk_buff *skb, rcu_read_unlock(); if (one_sk != NULL) - err = pfkey_broadcast_one(skb, &skb2, GFP_KERNEL, one_sk); + err = pfkey_broadcast_one(skb, &skb2, allocation, one_sk); kfree_skb(skb2); kfree_skb(skb); @@ -311,7 +311,7 @@ static int pfkey_do_dump(struct pfkey_sock *pfk) hdr = (struct sadb_msg *) pfk->dump.skb->data; hdr->sadb_msg_seq = 0; hdr->sadb_msg_errno = rc; - pfkey_broadcast(pfk->dump.skb, BROADCAST_ONE, + pfkey_broadcast(pfk->dump.skb, GFP_ATOMIC, BROADCAST_ONE, &pfk->sk, sock_net(&pfk->sk)); pfk->dump.skb = NULL; } @@ -355,7 +355,7 @@ static int pfkey_error(const struct sadb_msg *orig, int err, struct sock *sk) hdr->sadb_msg_len = (sizeof(struct sadb_msg) / sizeof(uint64_t)); - pfkey_broadcast(skb, BROADCAST_ONE, sk, sock_net(sk)); + pfkey_broadcast(skb, GFP_KERNEL, BROADCAST_ONE, sk, sock_net(sk)); return 0; } @@ -1389,7 +1389,7 @@ static int pfkey_getspi(struct sock *sk, struct sk_buff *skb, const struct sadb_ xfrm_state_put(x); - pfkey_broadcast(resp_skb, BROADCAST_ONE, sk, net); + pfkey_broadcast(resp_skb, GFP_KERNEL, BROADCAST_ONE, sk, net); return 0; } @@ -1476,7 +1476,7 @@ static int key_notify_sa(struct xfrm_state *x, const struct km_event *c) hdr->sadb_msg_seq = c->seq; hdr->sadb_msg_pid = c->portid; - pfkey_broadcast(skb, BROADCAST_ALL, NULL, xs_net(x)); + pfkey_broadcast(skb, GFP_ATOMIC, BROADCAST_ALL, NULL, xs_net(x)); return 0; } @@ -1589,7 +1589,7 @@ static int pfkey_get(struct sock *sk, struct sk_buff *skb, const struct sadb_msg out_hdr->sadb_msg_reserved = 0; out_hdr->sadb_msg_seq = hdr->sadb_msg_seq; out_hdr->sadb_msg_pid = hdr->sadb_msg_pid; - pfkey_broadcast(out_skb, BROADCAST_ONE, sk, sock_net(sk)); + pfkey_broadcast(out_skb, GFP_ATOMIC, BROADCAST_ONE, sk, sock_net(sk)); return 0; } @@ -1694,8 +1694,8 @@ static int pfkey_register(struct sock *sk, struct sk_buff *skb, const struct sad return -ENOBUFS; } - pfkey_broadcast(supp_skb, BROADCAST_REGISTERED, sk, sock_net(sk)); - + pfkey_broadcast(supp_skb, GFP_KERNEL, BROADCAST_REGISTERED, sk, + sock_net(sk)); return 0; } @@ -1712,7 +1712,8 @@ static int unicast_flush_resp(struct sock *sk, const struct sadb_msg *ihdr) hdr->sadb_msg_errno = (uint8_t) 0; hdr->sadb_msg_len = (sizeof(struct sadb_msg) / sizeof(uint64_t)); - return pfkey_broadcast(skb, BROADCAST_ONE, sk, sock_net(sk)); + return pfkey_broadcast(skb, GFP_ATOMIC, BROADCAST_ONE, sk, + sock_net(sk)); } static int key_notify_sa_flush(const struct km_event *c) @@ -1733,7 +1734,7 @@ static int key_notify_sa_flush(const struct km_event *c) hdr->sadb_msg_len = (sizeof(struct sadb_msg) / sizeof(uint64_t)); hdr->sadb_msg_reserved = 0; - pfkey_broadcast(skb, BROADCAST_ALL, NULL, c->net); + pfkey_broadcast(skb, GFP_ATOMIC, BROADCAST_ALL, NULL, c->net); return 0; } @@ -1790,7 +1791,7 @@ static int dump_sa(struct xfrm_state *x, int count, void *ptr) out_hdr->sadb_msg_pid = pfk->dump.msg_portid; if (pfk->dump.skb) - pfkey_broadcast(pfk->dump.skb, BROADCAST_ONE, + pfkey_broadcast(pfk->dump.skb, GFP_ATOMIC, BROADCAST_ONE, &pfk->sk, sock_net(&pfk->sk)); pfk->dump.skb = out_skb; @@ -1878,7 +1879,7 @@ static int pfkey_promisc(struct sock *sk, struct sk_buff *skb, const struct sadb new_hdr->sadb_msg_errno = 0; } - pfkey_broadcast(skb, BROADCAST_ALL, NULL, sock_net(sk)); + pfkey_broadcast(skb, GFP_KERNEL, BROADCAST_ALL, NULL, sock_net(sk)); return 0; } @@ -2206,7 +2207,7 @@ static int key_notify_policy(struct xfrm_policy *xp, int dir, const struct km_ev out_hdr->sadb_msg_errno = 0; out_hdr->sadb_msg_seq = c->seq; out_hdr->sadb_msg_pid = c->portid; - pfkey_broadcast(out_skb, BROADCAST_ALL, NULL, xp_net(xp)); + pfkey_broadcast(out_skb, GFP_ATOMIC, BROADCAST_ALL, NULL, xp_net(xp)); return 0; } @@ -2426,7 +2427,7 @@ static int key_pol_get_resp(struct sock *sk, struct xfrm_policy *xp, const struc out_hdr->sadb_msg_errno = 0; out_hdr->sadb_msg_seq = hdr->sadb_msg_seq; out_hdr->sadb_msg_pid = hdr->sadb_msg_pid; - pfkey_broadcast(out_skb, BROADCAST_ONE, sk, xp_net(xp)); + pfkey_broadcast(out_skb, GFP_ATOMIC, BROADCAST_ONE, sk, xp_net(xp)); err = 0; out: @@ -2682,7 +2683,7 @@ static int dump_sp(struct xfrm_policy *xp, int dir, int count, void *ptr) out_hdr->sadb_msg_pid = pfk->dump.msg_portid; if (pfk->dump.skb) - pfkey_broadcast(pfk->dump.skb, BROADCAST_ONE, + pfkey_broadcast(pfk->dump.skb, GFP_ATOMIC, BROADCAST_ONE, &pfk->sk, sock_net(&pfk->sk)); pfk->dump.skb = out_skb; @@ -2739,7 +2740,7 @@ static int key_notify_policy_flush(const struct km_event *c) hdr->sadb_msg_satype = SADB_SATYPE_UNSPEC; hdr->sadb_msg_len = (sizeof(struct sadb_msg) / sizeof(uint64_t)); hdr->sadb_msg_reserved = 0; - pfkey_broadcast(skb_out, BROADCAST_ALL, NULL, c->net); + pfkey_broadcast(skb_out, GFP_ATOMIC, BROADCAST_ALL, NULL, c->net); return 0; } @@ -2803,7 +2804,7 @@ static int pfkey_process(struct sock *sk, struct sk_buff *skb, const struct sadb void *ext_hdrs[SADB_EXT_MAX]; int err; - pfkey_broadcast(skb_clone(skb, GFP_KERNEL), + pfkey_broadcast(skb_clone(skb, GFP_KERNEL), GFP_KERNEL, BROADCAST_PROMISC_ONLY, NULL, sock_net(sk)); memset(ext_hdrs, 0, sizeof(ext_hdrs)); @@ -3024,7 +3025,8 @@ static int key_notify_sa_expire(struct xfrm_state *x, const struct km_event *c) out_hdr->sadb_msg_seq = 0; out_hdr->sadb_msg_pid = 0; - pfkey_broadcast(out_skb, BROADCAST_REGISTERED, NULL, xs_net(x)); + pfkey_broadcast(out_skb, GFP_ATOMIC, BROADCAST_REGISTERED, NULL, + xs_net(x)); return 0; } @@ -3212,7 +3214,8 @@ static int pfkey_send_acquire(struct xfrm_state *x, struct xfrm_tmpl *t, struct xfrm_ctx->ctx_len); } - return pfkey_broadcast(skb, BROADCAST_REGISTERED, NULL, xs_net(x)); + return pfkey_broadcast(skb, GFP_ATOMIC, BROADCAST_REGISTERED, NULL, + xs_net(x)); } static struct xfrm_policy *pfkey_compile_policy(struct sock *sk, int opt, @@ -3408,7 +3411,8 @@ static int pfkey_send_new_mapping(struct xfrm_state *x, xfrm_address_t *ipaddr, n_port->sadb_x_nat_t_port_port = sport; n_port->sadb_x_nat_t_port_reserved = 0; - return pfkey_broadcast(skb, BROADCAST_REGISTERED, NULL, xs_net(x)); + return pfkey_broadcast(skb, GFP_ATOMIC, BROADCAST_REGISTERED, NULL, + xs_net(x)); } #ifdef CONFIG_NET_KEY_MIGRATE @@ -3599,7 +3603,7 @@ static int pfkey_send_migrate(const struct xfrm_selector *sel, u8 dir, u8 type, } /* broadcast migrate message to sockets */ - pfkey_broadcast(skb, BROADCAST_ALL, NULL, &init_net); + pfkey_broadcast(skb, GFP_ATOMIC, BROADCAST_ALL, NULL, &init_net); return 0; -- cgit v1.2.3 From e5645f51ba99738b0e5d708edf9c6454f33b9310 Mon Sep 17 00:00:00 2001 From: Wei Wang Date: Mon, 14 Aug 2017 10:44:59 -0700 Subject: ipv6: release rt6->rt6i_idev properly during ifdown When a dst is created by addrconf_dst_alloc() for a host route or an anycast route, dst->dev points to loopback dev while rt6->rt6i_idev points to a real device. When the real device goes down, the current cleanup code only checks for dst->dev and assumes rt6->rt6i_idev->dev is the same. This causes the refcount leak on the real device in the above situation. This patch makes sure to always release the refcount taken on rt6->rt6i_idev during dst_dev_put(). Fixes: 587fea741134 ("ipv6: mark DST_NOGC and remove the operation of dst_free()") Reported-by: John Stultz Tested-by: John Stultz Tested-by: Martin KaFai Lau Signed-off-by: Wei Wang Signed-off-by: Martin KaFai Lau Acked-by: David Ahern Signed-off-by: David S. Miller --- net/ipv6/route.c | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/net/ipv6/route.c b/net/ipv6/route.c index a640fbcba15d..99d4727f2b18 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -417,14 +417,11 @@ static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev, struct net_device *loopback_dev = dev_net(dev)->loopback_dev; - if (dev != loopback_dev) { - if (idev && idev->dev == dev) { - struct inet6_dev *loopback_idev = - in6_dev_get(loopback_dev); - if (loopback_idev) { - rt->rt6i_idev = loopback_idev; - in6_dev_put(idev); - } + if (idev && idev->dev != loopback_dev) { + struct inet6_dev *loopback_idev = in6_dev_get(loopback_dev); + if (loopback_idev) { + rt->rt6i_idev = loopback_idev; + in6_dev_put(idev); } } } -- cgit v1.2.3 From 42b7305905be52e467bbc346b0f2f95ad44eb1a0 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Mon, 14 Aug 2017 21:31:38 +0200 Subject: udp: fix linear skb reception with PEEK_OFF copy_linear_skb() is broken; both of its callers actually expect 'len' to be the amount we are trying to copy, not the offset of the end. Fix it keeping the meanings of arguments in sync with what the callers (both of them) expect. Also restore a saner behavior on EFAULT (i.e. preserving the iov_iter position in case of failure): The commit fd851ba9caa9 ("udp: harden copy_linear_skb()") avoids the more destructive effect of the buggy copy_linear_skb(), e.g. no more invalid memory access, but said function still behaves incorrectly: when peeking with offset it can fail with EINVAL instead of copying the appropriate amount of memory. Reported-by: Sasha Levin Fixes: b65ac44674dd ("udp: try to avoid 2 cache miss on dequeue") Fixes: fd851ba9caa9 ("udp: harden copy_linear_skb()") Signed-off-by: Al Viro Acked-by: Paolo Abeni Tested-by: Sasha Levin Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/udp.h | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/include/net/udp.h b/include/net/udp.h index e9b1d1eacb59..586de4b811b5 100644 --- a/include/net/udp.h +++ b/include/net/udp.h @@ -366,14 +366,13 @@ static inline bool udp_skb_is_linear(struct sk_buff *skb) static inline int copy_linear_skb(struct sk_buff *skb, int len, int off, struct iov_iter *to) { - int n, copy = len - off; + int n; - if (copy < 0) - return -EINVAL; - n = copy_to_iter(skb->data + off, copy, to); - if (n == copy) + n = copy_to_iter(skb->data + off, len, to); + if (n == len) return 0; + iov_iter_revert(to, n); return -EFAULT; } -- cgit v1.2.3 From 7749d4ff88d31b0be17c8683143135adaaadc6a7 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 14 Aug 2017 14:10:25 -0700 Subject: dccp: purge write queue in dccp_destroy_sock() syzkaller reported that DCCP could have a non empty write queue at dismantle time. WARNING: CPU: 1 PID: 2953 at net/core/stream.c:199 sk_stream_kill_queues+0x3ce/0x520 net/core/stream.c:199 Kernel panic - not syncing: panic_on_warn set ... CPU: 1 PID: 2953 Comm: syz-executor0 Not tainted 4.13.0-rc4+ #2 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Call Trace: __dump_stack lib/dump_stack.c:16 [inline] dump_stack+0x194/0x257 lib/dump_stack.c:52 panic+0x1e4/0x417 kernel/panic.c:180 __warn+0x1c4/0x1d9 kernel/panic.c:541 report_bug+0x211/0x2d0 lib/bug.c:183 fixup_bug+0x40/0x90 arch/x86/kernel/traps.c:190 do_trap_no_signal arch/x86/kernel/traps.c:224 [inline] do_trap+0x260/0x390 arch/x86/kernel/traps.c:273 do_error_trap+0x120/0x390 arch/x86/kernel/traps.c:310 do_invalid_op+0x1b/0x20 arch/x86/kernel/traps.c:323 invalid_op+0x1e/0x30 arch/x86/entry/entry_64.S:846 RIP: 0010:sk_stream_kill_queues+0x3ce/0x520 net/core/stream.c:199 RSP: 0018:ffff8801d182f108 EFLAGS: 00010297 RAX: ffff8801d1144140 RBX: ffff8801d13cb280 RCX: 0000000000000000 RDX: 0000000000000000 RSI: ffffffff85137b00 RDI: ffff8801d13cb280 RBP: ffff8801d182f148 R08: 0000000000000001 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000000 R12: ffff8801d13cb4d0 R13: ffff8801d13cb3b8 R14: ffff8801d13cb300 R15: ffff8801d13cb3b8 inet_csk_destroy_sock+0x175/0x3f0 net/ipv4/inet_connection_sock.c:835 dccp_close+0x84d/0xc10 net/dccp/proto.c:1067 inet_release+0xed/0x1c0 net/ipv4/af_inet.c:425 sock_release+0x8d/0x1e0 net/socket.c:597 sock_close+0x16/0x20 net/socket.c:1126 __fput+0x327/0x7e0 fs/file_table.c:210 ____fput+0x15/0x20 fs/file_table.c:246 task_work_run+0x18a/0x260 kernel/task_work.c:116 exit_task_work include/linux/task_work.h:21 [inline] do_exit+0xa32/0x1b10 kernel/exit.c:865 do_group_exit+0x149/0x400 kernel/exit.c:969 get_signal+0x7e8/0x17e0 kernel/signal.c:2330 do_signal+0x94/0x1ee0 arch/x86/kernel/signal.c:808 exit_to_usermode_loop+0x21c/0x2d0 arch/x86/entry/common.c:157 prepare_exit_to_usermode arch/x86/entry/common.c:194 [inline] syscall_return_slowpath+0x3a7/0x450 arch/x86/entry/common.c:263 Signed-off-by: Eric Dumazet Reported-by: Dmitry Vyukov Signed-off-by: David S. Miller --- net/dccp/proto.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/net/dccp/proto.c b/net/dccp/proto.c index 9fe25bf63296..86bc40ba6ba5 100644 --- a/net/dccp/proto.c +++ b/net/dccp/proto.c @@ -201,10 +201,7 @@ void dccp_destroy_sock(struct sock *sk) { struct dccp_sock *dp = dccp_sk(sk); - /* - * DCCP doesn't use sk_write_queue, just sk_send_head - * for retransmissions - */ + __skb_queue_purge(&sk->sk_write_queue); if (sk->sk_send_head != NULL) { kfree_skb(sk->sk_send_head); sk->sk_send_head = NULL; -- cgit v1.2.3 From d624d276d1ddacbcb12ad96832ce0c7b82cd25db Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 14 Aug 2017 17:44:43 -0700 Subject: tcp: fix possible deadlock in TCP stack vs BPF filter Filtering the ACK packet was not put at the right place. At this place, we already allocated a child and put it into accept queue. We absolutely need to call tcp_child_process() to release its spinlock, or we will deadlock at accept() or close() time. Found by syzkaller team (Thanks a lot !) Fixes: 8fac365f63c8 ("tcp: Add a tcp_filter hook before handle ack packet") Signed-off-by: Eric Dumazet Reported-by: Dmitry Vyukov Cc: Chenbo Feng Signed-off-by: David S. Miller --- net/ipv4/tcp_ipv4.c | 4 ++-- net/ipv6/tcp_ipv6.c | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index a20e7f03d5f7..e9252c7df809 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -1722,6 +1722,8 @@ process: */ sock_hold(sk); refcounted = true; + if (tcp_filter(sk, skb)) + goto discard_and_relse; nsk = tcp_check_req(sk, skb, req, false); if (!nsk) { reqsk_put(req); @@ -1729,8 +1731,6 @@ process: } if (nsk == sk) { reqsk_put(req); - } else if (tcp_filter(sk, skb)) { - goto discard_and_relse; } else if (tcp_child_process(sk, nsk, skb)) { tcp_v4_send_reset(nsk, skb); goto discard_and_relse; diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 2521690d62d6..206210125fd7 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -1456,6 +1456,8 @@ process: } sock_hold(sk); refcounted = true; + if (tcp_filter(sk, skb)) + goto discard_and_relse; nsk = tcp_check_req(sk, skb, req, false); if (!nsk) { reqsk_put(req); @@ -1464,8 +1466,6 @@ process: if (nsk == sk) { reqsk_put(req); tcp_v6_restore_cb(skb); - } else if (tcp_filter(sk, skb)) { - goto discard_and_relse; } else if (tcp_child_process(sk, nsk, skb)) { tcp_v6_send_reset(nsk, skb); goto discard_and_relse; -- cgit v1.2.3 From 7e1d90f60a0d501c8503e636942ca704a454d910 Mon Sep 17 00:00:00 2001 From: Daniel Mentz Date: Mon, 14 Aug 2017 14:46:01 -0700 Subject: ALSA: seq: 2nd attempt at fixing race creating a queue commit 4842e98f26dd80be3623c4714a244ba52ea096a8 ("ALSA: seq: Fix race at creating a queue") attempted to fix a race reported by syzkaller. That fix has been described as follows: " When a sequencer queue is created in snd_seq_queue_alloc(),it adds the new queue element to the public list before referencing it. Thus the queue might be deleted before the call of snd_seq_queue_use(), and it results in the use-after-free error, as spotted by syzkaller. The fix is to reference the queue object at the right time. " Even with that fix in place, syzkaller reported a use-after-free error. It specifically pointed to the last instruction "return q->queue" in snd_seq_queue_alloc(). The pointer q is being used after kfree() has been called on it. It turned out that there is still a small window where a race can happen. The window opens at snd_seq_ioctl_create_queue()->snd_seq_queue_alloc()->queue_list_add() and closes at snd_seq_ioctl_create_queue()->queueptr()->snd_use_lock_use(). Between these two calls, a different thread could delete the queue and possibly re-create a different queue in the same location in queue_list. This change prevents this situation by calling snd_use_lock_use() from snd_seq_queue_alloc() prior to calling queue_list_add(). It is then the caller's responsibility to call snd_use_lock_free(&q->use_lock). Fixes: 4842e98f26dd ("ALSA: seq: Fix race at creating a queue") Reported-by: Dmitry Vyukov Cc: Signed-off-by: Daniel Mentz Signed-off-by: Takashi Iwai --- sound/core/seq/seq_clientmgr.c | 13 ++++--------- sound/core/seq/seq_queue.c | 14 +++++++++----- sound/core/seq/seq_queue.h | 2 +- 3 files changed, 14 insertions(+), 15 deletions(-) diff --git a/sound/core/seq/seq_clientmgr.c b/sound/core/seq/seq_clientmgr.c index 272c55fe17c8..ea2d0ae85bd3 100644 --- a/sound/core/seq/seq_clientmgr.c +++ b/sound/core/seq/seq_clientmgr.c @@ -1502,16 +1502,11 @@ static int snd_seq_ioctl_unsubscribe_port(struct snd_seq_client *client, static int snd_seq_ioctl_create_queue(struct snd_seq_client *client, void *arg) { struct snd_seq_queue_info *info = arg; - int result; struct snd_seq_queue *q; - result = snd_seq_queue_alloc(client->number, info->locked, info->flags); - if (result < 0) - return result; - - q = queueptr(result); - if (q == NULL) - return -EINVAL; + q = snd_seq_queue_alloc(client->number, info->locked, info->flags); + if (IS_ERR(q)) + return PTR_ERR(q); info->queue = q->queue; info->locked = q->locked; @@ -1521,7 +1516,7 @@ static int snd_seq_ioctl_create_queue(struct snd_seq_client *client, void *arg) if (!info->name[0]) snprintf(info->name, sizeof(info->name), "Queue-%d", q->queue); strlcpy(q->name, info->name, sizeof(q->name)); - queuefree(q); + snd_use_lock_free(&q->use_lock); return 0; } diff --git a/sound/core/seq/seq_queue.c b/sound/core/seq/seq_queue.c index 450c5187eecb..79e0c5604ef8 100644 --- a/sound/core/seq/seq_queue.c +++ b/sound/core/seq/seq_queue.c @@ -184,22 +184,26 @@ void __exit snd_seq_queues_delete(void) static void queue_use(struct snd_seq_queue *queue, int client, int use); /* allocate a new queue - - * return queue index value or negative value for error + * return pointer to new queue or ERR_PTR(-errno) for error + * The new queue's use_lock is set to 1. It is the caller's responsibility to + * call snd_use_lock_free(&q->use_lock). */ -int snd_seq_queue_alloc(int client, int locked, unsigned int info_flags) +struct snd_seq_queue *snd_seq_queue_alloc(int client, int locked, unsigned int info_flags) { struct snd_seq_queue *q; q = queue_new(client, locked); if (q == NULL) - return -ENOMEM; + return ERR_PTR(-ENOMEM); q->info_flags = info_flags; queue_use(q, client, 1); + snd_use_lock_use(&q->use_lock); if (queue_list_add(q) < 0) { + snd_use_lock_free(&q->use_lock); queue_delete(q); - return -ENOMEM; + return ERR_PTR(-ENOMEM); } - return q->queue; + return q; } /* delete a queue - queue must be owned by the client */ diff --git a/sound/core/seq/seq_queue.h b/sound/core/seq/seq_queue.h index 30c8111477f6..719093489a2c 100644 --- a/sound/core/seq/seq_queue.h +++ b/sound/core/seq/seq_queue.h @@ -71,7 +71,7 @@ void snd_seq_queues_delete(void); /* create new queue (constructor) */ -int snd_seq_queue_alloc(int client, int locked, unsigned int flags); +struct snd_seq_queue *snd_seq_queue_alloc(int client, int locked, unsigned int flags); /* delete queue (destructor) */ int snd_seq_queue_delete(int client, int queueid); -- cgit v1.2.3 From a8e800fe0f68bc28ce309914f47e432742b865ed Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Mon, 14 Aug 2017 14:35:50 +0200 Subject: ALSA: usb-audio: Apply sample rate quirk to Sennheiser headset A Senheisser headset requires the typical sample-rate quirk for avoiding spurious errors from inquiring the current sample rate like: usb 1-1: 2:1: cannot get freq at ep 0x4 usb 1-1: 3:1: cannot get freq at ep 0x83 The USB ID 1395:740a has to be added to the entries in snd_usb_get_sample_rate_quirk(). Bugzilla: https://bugzilla.suse.com/show_bug.cgi?id=1052580 Cc: Signed-off-by: Takashi Iwai --- sound/usb/quirks.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/usb/quirks.c b/sound/usb/quirks.c index d7b0b0a3a2db..f3e9e30172f3 100644 --- a/sound/usb/quirks.c +++ b/sound/usb/quirks.c @@ -1142,6 +1142,7 @@ bool snd_usb_get_sample_rate_quirk(struct snd_usb_audio *chip) case USB_ID(0x0556, 0x0014): /* Phoenix Audio TMX320VC */ case USB_ID(0x05A3, 0x9420): /* ELP HD USB Camera */ case USB_ID(0x074D, 0x3553): /* Outlaw RR2150 (Micronas UAC3553B) */ + case USB_ID(0x1395, 0x740a): /* Sennheiser DECT */ case USB_ID(0x1901, 0x0191): /* GE B850V3 CP2114 audio interface */ case USB_ID(0x1de7, 0x0013): /* Phoenix Audio MT202exe */ case USB_ID(0x1de7, 0x0014): /* Phoenix Audio TMX320 */ -- cgit v1.2.3 From 84393817db09bb436e934f8f8cc981cbca9ea4dc Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 15 Aug 2017 13:03:47 +0200 Subject: x86/mtrr: Prevent CPU hotplug lock recursion Larry reported a CPU hotplug lock recursion in the MTRR code. ============================================ WARNING: possible recursive locking detected systemd-udevd/153 is trying to acquire lock: (cpu_hotplug_lock.rw_sem){.+.+.+}, at: [] stop_machine+0x16/0x30 but task is already holding lock: (cpu_hotplug_lock.rw_sem){.+.+.+}, at: [] mtrr_add_page+0x83/0x470 .... cpus_read_lock+0x48/0x90 stop_machine+0x16/0x30 mtrr_add_page+0x18b/0x470 mtrr_add+0x3e/0x70 mtrr_add_page() holds the hotplug rwsem already and calls stop_machine() which acquires it again. Call stop_machine_cpuslocked() instead. Reported-and-tested-by: Larry Finger Reported-by: Dmitry Vyukov Signed-off-by: Thomas Gleixner Link: http://lkml.kernel.org/r/alpine.DEB.2.20.1708140920250.1865@nanos Cc: "Paul E. McKenney" Cc: Borislav Petkov --- arch/x86/kernel/cpu/mtrr/main.c | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/arch/x86/kernel/cpu/mtrr/main.c b/arch/x86/kernel/cpu/mtrr/main.c index c5bb63be4ba1..40d5a8a75212 100644 --- a/arch/x86/kernel/cpu/mtrr/main.c +++ b/arch/x86/kernel/cpu/mtrr/main.c @@ -237,6 +237,18 @@ set_mtrr(unsigned int reg, unsigned long base, unsigned long size, mtrr_type typ stop_machine(mtrr_rendezvous_handler, &data, cpu_online_mask); } +static void set_mtrr_cpuslocked(unsigned int reg, unsigned long base, + unsigned long size, mtrr_type type) +{ + struct set_mtrr_data data = { .smp_reg = reg, + .smp_base = base, + .smp_size = size, + .smp_type = type + }; + + stop_machine_cpuslocked(mtrr_rendezvous_handler, &data, cpu_online_mask); +} + static void set_mtrr_from_inactive_cpu(unsigned int reg, unsigned long base, unsigned long size, mtrr_type type) { @@ -370,7 +382,7 @@ int mtrr_add_page(unsigned long base, unsigned long size, /* Search for an empty MTRR */ i = mtrr_if->get_free_region(base, size, replace); if (i >= 0) { - set_mtrr(i, base, size, type); + set_mtrr_cpuslocked(i, base, size, type); if (likely(replace < 0)) { mtrr_usage_table[i] = 1; } else { @@ -378,7 +390,7 @@ int mtrr_add_page(unsigned long base, unsigned long size, if (increment) mtrr_usage_table[i]++; if (unlikely(replace != i)) { - set_mtrr(replace, 0, 0, 0); + set_mtrr_cpuslocked(replace, 0, 0, 0); mtrr_usage_table[replace] = 0; } } @@ -506,7 +518,7 @@ int mtrr_del_page(int reg, unsigned long base, unsigned long size) goto out; } if (--mtrr_usage_table[reg] < 1) - set_mtrr(reg, 0, 0, 0); + set_mtrr_cpuslocked(reg, 0, 0, 0); error = reg; out: mutex_unlock(&mtrr_mutex); -- cgit v1.2.3 From 3280d66a6363af0df0441709bc0bc302bd9a2510 Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Mon, 14 Aug 2017 16:40:11 -0400 Subject: blk-mq: Fix queue usage on failed request allocation blk_mq_get_request() does not release the callers queue usage counter when allocation fails. The caller still needs to account for its own queue usage when it is unable to allocate a request. Fixes: 1ad43c0078b7 ("blk-mq: don't leak preempt counter/q_usage_counter when allocating rq failed") Reported-by: Max Gurtovoy Reviewed-by: Ming Lei Reviewed-by: Sagi Grimberg Tested-by: Max Gurtovoy Signed-off-by: Keith Busch Signed-off-by: Jens Axboe --- block/blk-mq.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/block/blk-mq.c b/block/blk-mq.c index 535cbdf32aab..4603b115e234 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -360,12 +360,12 @@ struct request *blk_mq_alloc_request(struct request_queue *q, unsigned int op, return ERR_PTR(ret); rq = blk_mq_get_request(q, NULL, op, &alloc_data); + blk_queue_exit(q); if (!rq) return ERR_PTR(-EWOULDBLOCK); blk_mq_put_ctx(alloc_data.ctx); - blk_queue_exit(q); rq->__data_len = 0; rq->__sector = (sector_t) -1; @@ -411,12 +411,11 @@ struct request *blk_mq_alloc_request_hctx(struct request_queue *q, alloc_data.ctx = __blk_mq_get_ctx(q, cpu); rq = blk_mq_get_request(q, NULL, op, &alloc_data); + blk_queue_exit(q); if (!rq) return ERR_PTR(-EWOULDBLOCK); - blk_queue_exit(q); - return rq; } EXPORT_SYMBOL_GPL(blk_mq_alloc_request_hctx); -- cgit v1.2.3 From 462cdace790ac2ed6aad1b19c9c0af0143b6aab0 Mon Sep 17 00:00:00 2001 From: Roger Pau Monne Date: Tue, 18 Jul 2017 15:01:00 +0100 Subject: xen: fix bio vec merging MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The current test for bio vec merging is not fully accurate and can be tricked into merging bios when certain grant combinations are used. The result of these malicious bio merges is a bio that extends past the memory page used by any of the originating bios. Take into account the following scenario, where a guest creates two grant references that point to the same mfn, ie: grant 1 -> mfn A, grant 2 -> mfn A. These references are then used in a PV block request, and mapped by the backend domain, thus obtaining two different pfns that point to the same mfn, pfn B -> mfn A, pfn C -> mfn A. If those grants happen to be used in two consecutive sectors of a disk IO operation becoming two different bios in the backend domain, the checks in xen_biovec_phys_mergeable will succeed, because bfn1 == bfn2 (they both point to the same mfn). However due to the bio merging, the backend domain will end up with a bio that expands past mfn A into mfn A + 1. Fix this by making sure the check in xen_biovec_phys_mergeable takes into account the offset and the length of the bio, this basically replicates whats done in __BIOVEC_PHYS_MERGEABLE using mfns (bus addresses). While there also remove the usage of __BIOVEC_PHYS_MERGEABLE, since that's already checked by the callers of xen_biovec_phys_mergeable. CC: stable@vger.kernel.org Reported-by: "Jan H. Schönherr" Signed-off-by: Roger Pau Monné Reviewed-by: Juergen Gross Signed-off-by: Konrad Rzeszutek Wilk --- drivers/xen/biomerge.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/xen/biomerge.c b/drivers/xen/biomerge.c index 4da69dbf7dca..1bdd02a6d6ac 100644 --- a/drivers/xen/biomerge.c +++ b/drivers/xen/biomerge.c @@ -10,8 +10,7 @@ bool xen_biovec_phys_mergeable(const struct bio_vec *vec1, unsigned long bfn1 = pfn_to_bfn(page_to_pfn(vec1->bv_page)); unsigned long bfn2 = pfn_to_bfn(page_to_pfn(vec2->bv_page)); - return __BIOVEC_PHYS_MERGEABLE(vec1, vec2) && - ((bfn1 == bfn2) || ((bfn1+1) == bfn2)); + return bfn1 + PFN_DOWN(vec1->bv_offset + vec1->bv_len) == bfn2; #else /* * XXX: Add support for merging bio_vec when using different page -- cgit v1.2.3 From b15bd8cb37598afb2963f7eb9e2de468d2d60a2f Mon Sep 17 00:00:00 2001 From: Munehisa Kamata Date: Wed, 9 Aug 2017 15:31:40 -0700 Subject: xen-blkfront: use a right index when checking requests Since commit d05d7f40791c ("Merge branch 'for-4.8/core' of git://git.kernel.dk/linux-block") and 3fc9d690936f ("Merge branch 'for-4.8/drivers' of git://git.kernel.dk/linux-block"), blkfront_resume() has been using an index for iterating ring_info to check request when iterating blk_shadow in an inner loop. This seems to have been accidentally introduced during the massive rewrite of the block layer macros in the commits. This may cause crash like this: [11798.057074] BUG: unable to handle kernel NULL pointer dereference at 0000000000000048 [11798.058832] IP: [] blkfront_resume+0x10a/0x610 .... [11798.061063] Call Trace: [11798.061063] [] xenbus_dev_resume+0x53/0x140 [11798.061063] [] ? xenbus_dev_probe+0x150/0x150 [11798.061063] [] dpm_run_callback+0x3e/0x110 [11798.061063] [] device_resume+0x88/0x190 [11798.061063] [] dpm_resume+0x100/0x2d0 [11798.061063] [] dpm_resume_end+0x11/0x20 [11798.061063] [] do_suspend+0xe8/0x1a0 [11798.061063] [] shutdown_handler+0xfd/0x130 [11798.061063] [] ? split+0x110/0x110 [11798.061063] [] xenwatch_thread+0x86/0x120 [11798.061063] [] ? prepare_to_wait_event+0x110/0x110 [11798.061063] [] kthread+0xd7/0xf0 [11798.061063] [] ? kfree+0x121/0x170 [11798.061063] [] ? kthread_park+0x60/0x60 [11798.061063] [] ? call_usermodehelper_exec_work+0xb0/0xb0 [11798.061063] [] ? call_usermodehelper_exec_async+0x13a/0x140 [11798.061063] [] ret_from_fork+0x25/0x30 Use the right index in the inner loop. Fixes: d05d7f40791c ("Merge branch 'for-4.8/core' of git://git.kernel.dk/linux-block") Fixes: 3fc9d690936f ("Merge branch 'for-4.8/drivers' of git://git.kernel.dk/linux-block") Signed-off-by: Munehisa Kamata Reviewed-by: Thomas Friebel Reviewed-by: Eduardo Valentin Reviewed-by: Boris Ostrovsky Cc: Juergen Gross Cc: Konrad Rzeszutek Wilk Reviewed-by: Roger Pau Monne Cc: xen-devel@lists.xenproject.org Cc: stable@vger.kernel.org Signed-off-by: Konrad Rzeszutek Wilk --- drivers/block/xen-blkfront.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c index 98e34e4c62b8..2468c28d4771 100644 --- a/drivers/block/xen-blkfront.c +++ b/drivers/block/xen-blkfront.c @@ -2075,9 +2075,9 @@ static int blkfront_resume(struct xenbus_device *dev) /* * Get the bios in the request so we can re-queue them. */ - if (req_op(shadow[i].request) == REQ_OP_FLUSH || - req_op(shadow[i].request) == REQ_OP_DISCARD || - req_op(shadow[i].request) == REQ_OP_SECURE_ERASE || + if (req_op(shadow[j].request) == REQ_OP_FLUSH || + req_op(shadow[j].request) == REQ_OP_DISCARD || + req_op(shadow[j].request) == REQ_OP_SECURE_ERASE || shadow[j].request->cmd_flags & REQ_FUA) { /* * Flush operations don't contain bios, so -- cgit v1.2.3 From 7a7c286d07f9c704e8fd11dd960bf421cc67b66b Mon Sep 17 00:00:00 2001 From: Chunming Zhou Date: Fri, 11 Aug 2017 09:34:33 +0800 Subject: drm/amdgpu: save list length when fence is signaled MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit update the list first to avoid redundant checks. Signed-off-by: Chunming Zhou Reviewed-by: Christian König Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c index a6899180b265..c586f44312f9 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c @@ -244,6 +244,12 @@ struct dma_fence *amdgpu_sync_peek_fence(struct amdgpu_sync *sync, struct dma_fence *f = e->fence; struct amd_sched_fence *s_fence = to_amd_sched_fence(f); + if (dma_fence_is_signaled(f)) { + hash_del(&e->node); + dma_fence_put(f); + kmem_cache_free(amdgpu_sync_slab, e); + continue; + } if (ring && s_fence) { /* For fences from the same ring it is sufficient * when they are scheduled. @@ -256,13 +262,6 @@ struct dma_fence *amdgpu_sync_peek_fence(struct amdgpu_sync *sync, } } - if (dma_fence_is_signaled(f)) { - hash_del(&e->node); - dma_fence_put(f); - kmem_cache_free(amdgpu_sync_slab, e); - continue; - } - return f; } -- cgit v1.2.3 From d76036ab47eafa6ce52b69482e91ca3ba337d6d6 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Tue, 15 Aug 2017 13:00:36 +0200 Subject: audit: Fix use after free in audit_remove_watch_rule() audit_remove_watch_rule() drops watch's reference to parent but then continues to work with it. That is not safe as parent can get freed once we drop our reference. The following is a trivial reproducer: mount -o loop image /mnt touch /mnt/file auditctl -w /mnt/file -p wax umount /mnt auditctl -D Grab our own reference in audit_remove_watch_rule() earlier to make sure mark does not get freed under us. CC: stable@vger.kernel.org Reported-by: Tony Jones Signed-off-by: Jan Kara Tested-by: Tony Jones Signed-off-by: Paul Moore --- kernel/audit_watch.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/kernel/audit_watch.c b/kernel/audit_watch.c index e0656bd63036..1c7ded42f82f 100644 --- a/kernel/audit_watch.c +++ b/kernel/audit_watch.c @@ -457,13 +457,15 @@ void audit_remove_watch_rule(struct audit_krule *krule) list_del(&krule->rlist); if (list_empty(&watch->rules)) { + /* + * audit_remove_watch() drops our reference to 'parent' which + * can get freed. Grab our own reference to be safe. + */ + audit_get_parent(parent); audit_remove_watch(watch); - - if (list_empty(&parent->watches)) { - audit_get_parent(parent); + if (list_empty(&parent->watches)) fsnotify_destroy_mark(&parent->mark, audit_watch_group); - audit_put_parent(parent); - } + audit_put_parent(parent); } } -- cgit v1.2.3 From b5fed474b98332559f2590c6bc90388a0899e793 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Tue, 15 Aug 2017 13:00:37 +0200 Subject: audit: Receive unmount event Although audit_watch_handle_event() can handle FS_UNMOUNT event, it is not part of AUDIT_FS_WATCH mask and thus such event never gets to audit_watch_handle_event(). Thus fsnotify marks are deleted by fsnotify subsystem on unmount without audit being notified about that which leads to a strange state of existing audit rules with dead fsnotify marks. Add FS_UNMOUNT to the mask of events to be received so that audit can clean up its state accordingly. Signed-off-by: Jan Kara Signed-off-by: Paul Moore --- kernel/audit_watch.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/audit_watch.c b/kernel/audit_watch.c index 1c7ded42f82f..d1b5857b7e33 100644 --- a/kernel/audit_watch.c +++ b/kernel/audit_watch.c @@ -66,7 +66,7 @@ static struct fsnotify_group *audit_watch_group; /* fsnotify events we care about. */ #define AUDIT_FS_WATCH (FS_MOVE | FS_CREATE | FS_DELETE | FS_DELETE_SELF |\ - FS_MOVE_SELF | FS_EVENT_ON_CHILD) + FS_MOVE_SELF | FS_EVENT_ON_CHILD | FS_UNMOUNT) static void audit_free_parent(struct audit_parent *parent) { -- cgit v1.2.3 From 12d94a804946af291e24b80fc53ec86264765781 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 15 Aug 2017 04:09:51 -0700 Subject: ipv6: fix NULL dereference in ip6_route_dev_notify() Based on a syzkaller report [1], I found that a per cpu allocation failure in snmp6_alloc_dev() would then lead to NULL dereference in ip6_route_dev_notify(). It seems this is a very old bug, thus no Fixes tag in this submission. Let's add in6_dev_put_clear() helper, as we will probably use it elsewhere (once available/present in net-next) [1] kasan: CONFIG_KASAN_INLINE enabled kasan: GPF could be caused by NULL-ptr deref or user memory access general protection fault: 0000 [#1] SMP KASAN Dumping ftrace buffer: (ftrace buffer empty) Modules linked in: CPU: 1 PID: 17294 Comm: syz-executor6 Not tainted 4.13.0-rc2+ #10 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 task: ffff88019f456680 task.stack: ffff8801c6e58000 RIP: 0010:__read_once_size include/linux/compiler.h:250 [inline] RIP: 0010:atomic_read arch/x86/include/asm/atomic.h:26 [inline] RIP: 0010:refcount_sub_and_test+0x7d/0x1b0 lib/refcount.c:178 RSP: 0018:ffff8801c6e5f1b0 EFLAGS: 00010202 RAX: 0000000000000037 RBX: dffffc0000000000 RCX: ffffc90005d25000 RDX: ffff8801c6e5f218 RSI: ffffffff82342bbf RDI: 0000000000000001 RBP: ffff8801c6e5f240 R08: 0000000000000001 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000000 R12: 1ffff10038dcbe37 R13: 0000000000000006 R14: 0000000000000001 R15: 00000000000001b8 FS: 00007f21e0429700(0000) GS:ffff8801dc100000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000001ddbc22000 CR3: 00000001d632b000 CR4: 00000000001426e0 DR0: 0000000020000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000600 Call Trace: refcount_dec_and_test+0x1a/0x20 lib/refcount.c:211 in6_dev_put include/net/addrconf.h:335 [inline] ip6_route_dev_notify+0x1c9/0x4a0 net/ipv6/route.c:3732 notifier_call_chain+0x136/0x2c0 kernel/notifier.c:93 __raw_notifier_call_chain kernel/notifier.c:394 [inline] raw_notifier_call_chain+0x2d/0x40 kernel/notifier.c:401 call_netdevice_notifiers_info+0x51/0x90 net/core/dev.c:1678 call_netdevice_notifiers net/core/dev.c:1694 [inline] rollback_registered_many+0x91c/0xe80 net/core/dev.c:7107 rollback_registered+0x1be/0x3c0 net/core/dev.c:7149 register_netdevice+0xbcd/0xee0 net/core/dev.c:7587 register_netdev+0x1a/0x30 net/core/dev.c:7669 loopback_net_init+0x76/0x160 drivers/net/loopback.c:214 ops_init+0x10a/0x570 net/core/net_namespace.c:118 setup_net+0x313/0x710 net/core/net_namespace.c:294 copy_net_ns+0x27c/0x580 net/core/net_namespace.c:418 create_new_namespaces+0x425/0x880 kernel/nsproxy.c:107 unshare_nsproxy_namespaces+0xae/0x1e0 kernel/nsproxy.c:206 SYSC_unshare kernel/fork.c:2347 [inline] SyS_unshare+0x653/0xfa0 kernel/fork.c:2297 entry_SYSCALL_64_fastpath+0x1f/0xbe RIP: 0033:0x4512c9 RSP: 002b:00007f21e0428c08 EFLAGS: 00000216 ORIG_RAX: 0000000000000110 RAX: ffffffffffffffda RBX: 0000000000718150 RCX: 00000000004512c9 RDX: 0000000000000000 RSI: 0000000000000000 RDI: 0000000062020200 RBP: 0000000000000086 R08: 0000000000000000 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000216 R12: 00000000004b973d R13: 00000000ffffffff R14: 000000002001d000 R15: 00000000000002dd Code: 50 2b 34 82 c7 00 f1 f1 f1 f1 c7 40 04 04 f2 f2 f2 c7 40 08 f3 f3 f3 f3 e8 a1 43 39 ff 4c 89 f8 48 8b 95 70 ff ff ff 48 c1 e8 03 <0f> b6 0c 18 4c 89 f8 83 e0 07 83 c0 03 38 c8 7c 08 84 c9 0f 85 RIP: __read_once_size include/linux/compiler.h:250 [inline] RSP: ffff8801c6e5f1b0 RIP: atomic_read arch/x86/include/asm/atomic.h:26 [inline] RSP: ffff8801c6e5f1b0 RIP: refcount_sub_and_test+0x7d/0x1b0 lib/refcount.c:178 RSP: ffff8801c6e5f1b0 ---[ end trace e441d046c6410d31 ]--- Signed-off-by: Eric Dumazet Reported-by: Dmitry Vyukov Signed-off-by: David S. Miller --- include/net/addrconf.h | 10 ++++++++++ net/ipv6/route.c | 6 +++--- 2 files changed, 13 insertions(+), 3 deletions(-) diff --git a/include/net/addrconf.h b/include/net/addrconf.h index 6df79e96a780..f44ff2476758 100644 --- a/include/net/addrconf.h +++ b/include/net/addrconf.h @@ -336,6 +336,16 @@ static inline void in6_dev_put(struct inet6_dev *idev) in6_dev_finish_destroy(idev); } +static inline void in6_dev_put_clear(struct inet6_dev **pidev) +{ + struct inet6_dev *idev = *pidev; + + if (idev) { + in6_dev_put(idev); + *pidev = NULL; + } +} + static inline void __in6_dev_put(struct inet6_dev *idev) { refcount_dec(&idev->refcnt); diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 99d4727f2b18..94d6a13d47f0 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -3721,10 +3721,10 @@ static int ip6_route_dev_notify(struct notifier_block *this, /* NETDEV_UNREGISTER could be fired for multiple times by * netdev_wait_allrefs(). Make sure we only call this once. */ - in6_dev_put(net->ipv6.ip6_null_entry->rt6i_idev); + in6_dev_put_clear(&net->ipv6.ip6_null_entry->rt6i_idev); #ifdef CONFIG_IPV6_MULTIPLE_TABLES - in6_dev_put(net->ipv6.ip6_prohibit_entry->rt6i_idev); - in6_dev_put(net->ipv6.ip6_blk_hole_entry->rt6i_idev); + in6_dev_put_clear(&net->ipv6.ip6_prohibit_entry->rt6i_idev); + in6_dev_put_clear(&net->ipv6.ip6_blk_hole_entry->rt6i_idev); #endif } -- cgit v1.2.3 From b3dc8f772fab5b2d284b780830fd56494491e493 Mon Sep 17 00:00:00 2001 From: Tonghao Zhang Date: Tue, 15 Aug 2017 04:28:54 -0700 Subject: net: Fix a typo in comment about sock flags. Signed-off-by: Tonghao Zhang Signed-off-by: David S. Miller --- include/linux/net.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/net.h b/include/linux/net.h index dda2cc939a53..ebeb48c92005 100644 --- a/include/linux/net.h +++ b/include/linux/net.h @@ -37,7 +37,7 @@ struct net; /* Historically, SOCKWQ_ASYNC_NOSPACE & SOCKWQ_ASYNC_WAITDATA were located * in sock->flags, but moved into sk->sk_wq->flags to be RCU protected. - * Eventually all flags will be in sk->sk_wq_flags. + * Eventually all flags will be in sk->sk_wq->flags. */ #define SOCKWQ_ASYNC_NOSPACE 0 #define SOCKWQ_ASYNC_WAITDATA 1 -- cgit v1.2.3 From 187e5b3ac84d3421d2de3aca949b2791fbcad554 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 15 Aug 2017 05:26:17 -0700 Subject: ipv4: fix NULL dereference in free_fib_info_rcu() If fi->fib_metrics could not be allocated in fib_create_info() we attempt to dereference a NULL pointer in free_fib_info_rcu() : m = fi->fib_metrics; if (m != &dst_default_metrics && atomic_dec_and_test(&m->refcnt)) kfree(m); Before my recent patch, we used to call kfree(NULL) and nothing wrong happened. Instead of using RCU to defer freeing while we are under memory stress, it seems better to take immediate action. This was reported by syzkaller team. Fixes: 3fb07daff8e9 ("ipv4: add reference counting to metrics") Signed-off-by: Eric Dumazet Reported-by: Dmitry Vyukov Signed-off-by: David S. Miller --- net/ipv4/fib_semantics.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c index b8d18171cca3..ec3a9ce281a6 100644 --- a/net/ipv4/fib_semantics.c +++ b/net/ipv4/fib_semantics.c @@ -1083,15 +1083,17 @@ struct fib_info *fib_create_info(struct fib_config *cfg, fi = kzalloc(sizeof(*fi)+nhs*sizeof(struct fib_nh), GFP_KERNEL); if (!fi) goto failure; - fib_info_cnt++; if (cfg->fc_mx) { fi->fib_metrics = kzalloc(sizeof(*fi->fib_metrics), GFP_KERNEL); - if (!fi->fib_metrics) - goto failure; + if (unlikely(!fi->fib_metrics)) { + kfree(fi); + return ERR_PTR(err); + } atomic_set(&fi->fib_metrics->refcnt, 1); - } else + } else { fi->fib_metrics = (struct dst_metrics *)&dst_default_metrics; - + } + fib_info_cnt++; fi->fib_net = net; fi->fib_protocol = cfg->fc_protocol; fi->fib_scope = cfg->fc_scope; -- cgit v1.2.3 From 898904226b5a6dee657f23cf51e385f50da22596 Mon Sep 17 00:00:00 2001 From: Konstantin Khlebnikov Date: Tue, 15 Aug 2017 16:35:21 +0300 Subject: net_sched: reset pointers to tcf blocks in classful qdiscs' destructors Traffic filters could keep direct pointers to classes in classful qdisc, thus qdisc destruction first removes all filters before freeing classes. Class destruction methods also tries to free attached filters but now this isn't safe because tcf_block_put() unlike to tcf_destroy_chain() cannot be called second time. This patch set class->block to NULL after first tcf_block_put() and turn second call into no-op. Fixes: 6529eaba33f0 ("net: sched: introduce tcf block infractructure") Signed-off-by: Konstantin Khlebnikov Acked-by: Jiri Pirko Signed-off-by: David S. Miller --- net/sched/sch_atm.c | 4 +++- net/sched/sch_cbq.c | 4 +++- net/sched/sch_hfsc.c | 4 +++- net/sched/sch_htb.c | 4 +++- 4 files changed, 12 insertions(+), 4 deletions(-) diff --git a/net/sched/sch_atm.c b/net/sched/sch_atm.c index 572fe2584e48..c403c87aff7a 100644 --- a/net/sched/sch_atm.c +++ b/net/sched/sch_atm.c @@ -572,8 +572,10 @@ static void atm_tc_destroy(struct Qdisc *sch) struct atm_flow_data *flow, *tmp; pr_debug("atm_tc_destroy(sch %p,[qdisc %p])\n", sch, p); - list_for_each_entry(flow, &p->flows, list) + list_for_each_entry(flow, &p->flows, list) { tcf_block_put(flow->block); + flow->block = NULL; + } list_for_each_entry_safe(flow, tmp, &p->flows, list) { if (flow->ref > 1) diff --git a/net/sched/sch_cbq.c b/net/sched/sch_cbq.c index 481036f6b54e..780db43300b1 100644 --- a/net/sched/sch_cbq.c +++ b/net/sched/sch_cbq.c @@ -1431,8 +1431,10 @@ static void cbq_destroy(struct Qdisc *sch) * be bound to classes which have been destroyed already. --TGR '04 */ for (h = 0; h < q->clhash.hashsize; h++) { - hlist_for_each_entry(cl, &q->clhash.hash[h], common.hnode) + hlist_for_each_entry(cl, &q->clhash.hash[h], common.hnode) { tcf_block_put(cl->block); + cl->block = NULL; + } } for (h = 0; h < q->clhash.hashsize; h++) { hlist_for_each_entry_safe(cl, next, &q->clhash.hash[h], diff --git a/net/sched/sch_hfsc.c b/net/sched/sch_hfsc.c index 3ad02bbe6903..fd15200f8627 100644 --- a/net/sched/sch_hfsc.c +++ b/net/sched/sch_hfsc.c @@ -1530,8 +1530,10 @@ hfsc_destroy_qdisc(struct Qdisc *sch) unsigned int i; for (i = 0; i < q->clhash.hashsize; i++) { - hlist_for_each_entry(cl, &q->clhash.hash[i], cl_common.hnode) + hlist_for_each_entry(cl, &q->clhash.hash[i], cl_common.hnode) { tcf_block_put(cl->block); + cl->block = NULL; + } } for (i = 0; i < q->clhash.hashsize; i++) { hlist_for_each_entry_safe(cl, next, &q->clhash.hash[i], diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c index 203286ab4427..5d65ec5207e9 100644 --- a/net/sched/sch_htb.c +++ b/net/sched/sch_htb.c @@ -1258,8 +1258,10 @@ static void htb_destroy(struct Qdisc *sch) tcf_block_put(q->block); for (i = 0; i < q->clhash.hashsize; i++) { - hlist_for_each_entry(cl, &q->clhash.hash[i], common.hnode) + hlist_for_each_entry(cl, &q->clhash.hash[i], common.hnode) { tcf_block_put(cl->block); + cl->block = NULL; + } } for (i = 0; i < q->clhash.hashsize; i++) { hlist_for_each_entry_safe(cl, next, &q->clhash.hash[i], -- cgit v1.2.3 From 325d5dc3f7e7c2840b65e4a2988c082c2c0025c5 Mon Sep 17 00:00:00 2001 From: Konstantin Khlebnikov Date: Tue, 15 Aug 2017 16:37:04 +0300 Subject: net_sched/sfq: update hierarchical backlog when drop packet When sfq_enqueue() drops head packet or packet from another queue it have to update backlog at upper qdiscs too. Fixes: 2ccccf5fb43f ("net_sched: update hierarchical backlog too") Signed-off-by: Konstantin Khlebnikov Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- net/sched/sch_sfq.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/net/sched/sch_sfq.c b/net/sched/sch_sfq.c index f80ea2cc5f1f..82469ef9655e 100644 --- a/net/sched/sch_sfq.c +++ b/net/sched/sch_sfq.c @@ -437,6 +437,7 @@ congestion_drop: qdisc_drop(head, sch, to_free); slot_queue_add(slot, skb); + qdisc_tree_reduce_backlog(sch, 0, delta); return NET_XMIT_CN; } @@ -468,8 +469,10 @@ enqueue: /* Return Congestion Notification only if we dropped a packet * from this flow. */ - if (qlen != slot->qlen) + if (qlen != slot->qlen) { + qdisc_tree_reduce_backlog(sch, 0, dropped - qdisc_pkt_len(skb)); return NET_XMIT_CN; + } /* As we dropped a packet, better let upper stack know this */ qdisc_tree_reduce_backlog(sch, 1, dropped); -- cgit v1.2.3 From c90e95147c27b1780e76c6e8fea1b5c78d7d387f Mon Sep 17 00:00:00 2001 From: Konstantin Khlebnikov Date: Tue, 15 Aug 2017 16:39:05 +0300 Subject: net_sched: remove warning from qdisc_hash_add It was added in commit e57a784d8cae ("pkt_sched: set root qdisc before change() in attach_default_qdiscs()") to hide duplicates from "tc qdisc show" for incative deivices. After 59cc1f61f ("net: sched: convert qdisc linked list to hashtable") it triggered when classful qdisc is added to inactive device because default qdiscs are added before switching root qdisc. Anyway after commit ea3274695353 ("net: sched: avoid duplicates in qdisc dump") duplicates are filtered right in dumper. Signed-off-by: Konstantin Khlebnikov Signed-off-by: David S. Miller --- net/sched/sch_api.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index bd24a550e0f9..a3fa144b8648 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -286,9 +286,6 @@ static struct Qdisc *qdisc_match_from_root(struct Qdisc *root, u32 handle) void qdisc_hash_add(struct Qdisc *q, bool invisible) { if ((q->parent != TC_H_ROOT) && !(q->flags & TCQ_F_INGRESS)) { - struct Qdisc *root = qdisc_dev(q)->qdisc; - - WARN_ON_ONCE(root == &noop_qdisc); ASSERT_RTNL(); hash_add_rcu(qdisc_dev(q)->qdisc_hash, &q->hash, q->handle); if (invisible) -- cgit v1.2.3 From 61deee962896f7eb547adc66ef09c8f1e7ddf7d7 Mon Sep 17 00:00:00 2001 From: Bert Kenward Date: Tue, 15 Aug 2017 14:55:32 +0100 Subject: sfc: don't try and read ef10 data on non-ef10 NIC The MAC stats command takes a port ID, which doesn't exist on pre-ef10 NICs (5000- and 6000- series). This is extracted from the NIC specific data; we misinterpret this as the ef10 data structure, causing us to read potentially unallocated data. With a KASAN kernel this can cause errors with: BUG: KASAN: slab-out-of-bounds in efx_mcdi_mac_stats Fixes: 0a2ab4d988d7 ("sfc: set the port-id when calling MC_CMD_MAC_STATS") Reported-by: Stefano Brivio Tested-by: Stefano Brivio Signed-off-by: Bert Kenward Signed-off-by: David S. Miller --- drivers/net/ethernet/sfc/mcdi_port.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/sfc/mcdi_port.c b/drivers/net/ethernet/sfc/mcdi_port.c index c905971c5f3a..990a63d7fcb7 100644 --- a/drivers/net/ethernet/sfc/mcdi_port.c +++ b/drivers/net/ethernet/sfc/mcdi_port.c @@ -938,7 +938,6 @@ enum efx_stats_action { static int efx_mcdi_mac_stats(struct efx_nic *efx, enum efx_stats_action action, int clear) { - struct efx_ef10_nic_data *nic_data = efx->nic_data; MCDI_DECLARE_BUF(inbuf, MC_CMD_MAC_STATS_IN_LEN); int rc; int change = action == EFX_STATS_PULL ? 0 : 1; @@ -960,7 +959,12 @@ static int efx_mcdi_mac_stats(struct efx_nic *efx, MAC_STATS_IN_PERIODIC_NOEVENT, 1, MAC_STATS_IN_PERIOD_MS, period); MCDI_SET_DWORD(inbuf, MAC_STATS_IN_DMA_LEN, dma_len); - MCDI_SET_DWORD(inbuf, MAC_STATS_IN_PORT_ID, nic_data->vport_id); + + if (efx_nic_rev(efx) >= EFX_REV_HUNT_A0) { + struct efx_ef10_nic_data *nic_data = efx->nic_data; + + MCDI_SET_DWORD(inbuf, MAC_STATS_IN_PORT_ID, nic_data->vport_id); + } rc = efx_mcdi_rpc_quiet(efx, MC_CMD_MAC_STATS, inbuf, sizeof(inbuf), NULL, 0, NULL); -- cgit v1.2.3 From 0e405232871d67bf1b238d56b6b3d500e69ebbf3 Mon Sep 17 00:00:00 2001 From: dingtianhong Date: Tue, 15 Aug 2017 23:24:48 +0800 Subject: PCI: fix oops when try to find Root Port for a PCI device Eric report a oops when booting the system after applying the commit a99b646afa8a ("PCI: Disable PCIe Relaxed..."): [ 4.241029] BUG: unable to handle kernel NULL pointer dereference at 0000000000000050 [ 4.247001] IP: pci_find_pcie_root_port+0x62/0x80 [ 4.253011] PGD 0 [ 4.253011] P4D 0 [ 4.253011] [ 4.258013] Oops: 0000 [#1] SMP DEBUG_PAGEALLOC [ 4.262015] Modules linked in: [ 4.265005] CPU: 31 PID: 1 Comm: swapper/0 Not tainted 4.13.0-dbx-DEV #316 [ 4.271002] Hardware name: Intel RML,PCH/Iota_QC_19, BIOS 2.40.0 06/22/2016 [ 4.279002] task: ffffa2ee38cfa040 task.stack: ffffa51ec0004000 [ 4.285001] RIP: 0010:pci_find_pcie_root_port+0x62/0x80 [ 4.290012] RSP: 0000:ffffa51ec0007ab8 EFLAGS: 00010246 [ 4.295003] RAX: 0000000000000000 RBX: ffffa2ee36bae000 RCX: 0000000000000006 [ 4.303002] RDX: 000000000000081c RSI: ffffa2ee38cfa8c8 RDI: ffffa2ee36bae000 [ 4.310013] RBP: ffffa51ec0007b58 R08: 0000000000000001 R09: 0000000000000000 [ 4.317001] R10: 0000000000000000 R11: 0000000000000000 R12: ffffa51ec0007ad0 [ 4.324005] R13: ffffa2ee36bae098 R14: 0000000000000002 R15: ffffa2ee37204818 [ 4.331002] FS: 0000000000000000(0000) GS:ffffa2ee3fcc0000(0000) knlGS:0000000000000000 [ 4.339002] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 4.345001] CR2: 0000000000000050 CR3: 000000401000f000 CR4: 00000000001406e0 [ 4.351002] Call Trace: [ 4.354012] ? pci_configure_device+0x19f/0x570 [ 4.359002] ? pci_conf1_read+0xb8/0xf0 [ 4.363002] ? raw_pci_read+0x23/0x40 [ 4.366011] ? pci_read+0x2c/0x30 [ 4.370014] ? pci_read_config_word+0x67/0x70 [ 4.374012] pci_device_add+0x28/0x230 [ 4.378012] ? pci_vpd_f0_read+0x50/0x80 [ 4.382014] pci_scan_single_device+0x96/0xc0 [ 4.386012] pci_scan_slot+0x79/0xf0 [ 4.389001] pci_scan_child_bus+0x31/0x180 [ 4.394014] acpi_pci_root_create+0x1c6/0x240 [ 4.398013] pci_acpi_scan_root+0x15f/0x1b0 [ 4.402012] acpi_pci_root_add+0x2e6/0x400 [ 4.406012] ? acpi_evaluate_integer+0x37/0x60 [ 4.411002] acpi_bus_attach+0xdf/0x200 [ 4.415002] acpi_bus_attach+0x6a/0x200 [ 4.418014] acpi_bus_attach+0x6a/0x200 [ 4.422013] acpi_bus_scan+0x38/0x70 [ 4.426011] acpi_scan_init+0x10c/0x271 [ 4.429001] acpi_init+0x2fa/0x348 [ 4.433004] ? acpi_sleep_proc_init+0x2d/0x2d [ 4.437001] do_one_initcall+0x43/0x169 [ 4.441001] kernel_init_freeable+0x1d0/0x258 [ 4.445003] ? rest_init+0xe0/0xe0 [ 4.449001] kernel_init+0xe/0x150 ====================== cut here ============================= It looks like the pci_find_pcie_root_port() was trying to find the Root Port for the PCI device which is the Root Port already, it will return NULL and trigger the problem, so check the highest_pcie_bridge to fix thie problem. Fixes: a99b646afa8a ("PCI: Disable PCIe Relaxed Ordering if unsupported") Fixes: c56d4450eb68 ("PCI: Turn off Request Attributes to avoid Chelsio T5 Completion erratum") Reported-by: Eric Dumazet Signed-off-by: Eric Dumazet Signed-off-by: Ding Tianhong Signed-off-by: David S. Miller --- drivers/pci/pci.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c index af0cc3456dc1..587cd7623ed8 100644 --- a/drivers/pci/pci.c +++ b/drivers/pci/pci.c @@ -522,10 +522,11 @@ struct pci_dev *pci_find_pcie_root_port(struct pci_dev *dev) bridge = pci_upstream_bridge(bridge); } - if (pci_pcie_type(highest_pcie_bridge) != PCI_EXP_TYPE_ROOT_PORT) - return NULL; + if (highest_pcie_bridge && + pci_pcie_type(highest_pcie_bridge) == PCI_EXP_TYPE_ROOT_PORT) + return highest_pcie_bridge; - return highest_pcie_bridge; + return NULL; } EXPORT_SYMBOL(pci_find_pcie_root_port); -- cgit v1.2.3 From 88a5c690b66110ad255380d8f629c629cf6ca559 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Wed, 16 Aug 2017 01:45:33 +0200 Subject: bpf: fix bpf_trace_printk on 32 bit archs James reported that on MIPS32 bpf_trace_printk() is currently broken while MIPS64 works fine: bpf_trace_printk() uses conditional operators to attempt to pass different types to __trace_printk() depending on the format operators. This doesn't work as intended on 32-bit architectures where u32 and long are passed differently to u64, since the result of C conditional operators follows the "usual arithmetic conversions" rules, such that the values passed to __trace_printk() will always be u64 [causing issues later in the va_list handling for vscnprintf()]. For example the samples/bpf/tracex5 test printed lines like below on MIPS32, where the fd and buf have come from the u64 fd argument, and the size from the buf argument: [...] 1180.941542: 0x00000001: write(fd=1, buf= (null), size=6258688) Instead of this: [...] 1625.616026: 0x00000001: write(fd=1, buf=009e4000, size=512) One way to get it working is to expand various combinations of argument types into 8 different combinations for 32 bit and 64 bit kernels. Fix tested by James on MIPS32 and MIPS64 as well that it resolves the issue. Fixes: 9c959c863f82 ("tracing: Allow BPF programs to call bpf_trace_printk()") Reported-by: James Hogan Tested-by: James Hogan Signed-off-by: Daniel Borkmann Signed-off-by: David S. Miller --- kernel/trace/bpf_trace.c | 34 ++++++++++++++++++++++++++++++---- 1 file changed, 30 insertions(+), 4 deletions(-) diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c index 37385193a608..dc498b605d5d 100644 --- a/kernel/trace/bpf_trace.c +++ b/kernel/trace/bpf_trace.c @@ -204,10 +204,36 @@ BPF_CALL_5(bpf_trace_printk, char *, fmt, u32, fmt_size, u64, arg1, fmt_cnt++; } - return __trace_printk(1/* fake ip will not be printed */, fmt, - mod[0] == 2 ? arg1 : mod[0] == 1 ? (long) arg1 : (u32) arg1, - mod[1] == 2 ? arg2 : mod[1] == 1 ? (long) arg2 : (u32) arg2, - mod[2] == 2 ? arg3 : mod[2] == 1 ? (long) arg3 : (u32) arg3); +/* Horrid workaround for getting va_list handling working with different + * argument type combinations generically for 32 and 64 bit archs. + */ +#define __BPF_TP_EMIT() __BPF_ARG3_TP() +#define __BPF_TP(...) \ + __trace_printk(1 /* Fake ip will not be printed. */, \ + fmt, ##__VA_ARGS__) + +#define __BPF_ARG1_TP(...) \ + ((mod[0] == 2 || (mod[0] == 1 && __BITS_PER_LONG == 64)) \ + ? __BPF_TP(arg1, ##__VA_ARGS__) \ + : ((mod[0] == 1 || (mod[0] == 0 && __BITS_PER_LONG == 32)) \ + ? __BPF_TP((long)arg1, ##__VA_ARGS__) \ + : __BPF_TP((u32)arg1, ##__VA_ARGS__))) + +#define __BPF_ARG2_TP(...) \ + ((mod[1] == 2 || (mod[1] == 1 && __BITS_PER_LONG == 64)) \ + ? __BPF_ARG1_TP(arg2, ##__VA_ARGS__) \ + : ((mod[1] == 1 || (mod[1] == 0 && __BITS_PER_LONG == 32)) \ + ? __BPF_ARG1_TP((long)arg2, ##__VA_ARGS__) \ + : __BPF_ARG1_TP((u32)arg2, ##__VA_ARGS__))) + +#define __BPF_ARG3_TP(...) \ + ((mod[2] == 2 || (mod[2] == 1 && __BITS_PER_LONG == 64)) \ + ? __BPF_ARG2_TP(arg3, ##__VA_ARGS__) \ + : ((mod[2] == 1 || (mod[2] == 0 && __BITS_PER_LONG == 32)) \ + ? __BPF_ARG2_TP((long)arg3, ##__VA_ARGS__) \ + : __BPF_ARG2_TP((u32)arg3, ##__VA_ARGS__))) + + return __BPF_TP_EMIT(); } static const struct bpf_func_proto bpf_trace_printk_proto = { -- cgit v1.2.3 From 4098116039911e8870d84c975e2ec22dab65a909 Mon Sep 17 00:00:00 2001 From: Thomas Bogendoerfer Date: Sat, 12 Aug 2017 23:36:47 +0200 Subject: parisc: pci memory bar assignment fails with 64bit kernels on dino/cujo For 64bit kernels the lmmio_space_offset of the host bridge window isn't set correctly on systems with dino/cujo PCI host bridges. This leads to not assigned memory bars and failing drivers, which need to use these bars. Signed-off-by: Thomas Bogendoerfer Cc: Acked-by: Helge Deller Signed-off-by: Helge Deller --- drivers/parisc/dino.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/parisc/dino.c b/drivers/parisc/dino.c index 5c63b920b471..ed92c1254cff 100644 --- a/drivers/parisc/dino.c +++ b/drivers/parisc/dino.c @@ -956,7 +956,7 @@ static int __init dino_probe(struct parisc_device *dev) dino_dev->hba.dev = dev; dino_dev->hba.base_addr = ioremap_nocache(hpa, 4096); - dino_dev->hba.lmmio_space_offset = 0; /* CPU addrs == bus addrs */ + dino_dev->hba.lmmio_space_offset = PCI_F_EXTEND; spin_lock_init(&dino_dev->dinosaur_pen); dino_dev->hba.iommu = ccio_get_iommu(dev); -- cgit v1.2.3 From 42819eb7a0957cc340ad4ed8bba736bab5ebc464 Mon Sep 17 00:00:00 2001 From: Martin Wilck Date: Mon, 14 Aug 2017 22:12:37 +0200 Subject: nvmet: don't overwrite identify sn/fr with 0-bytes The merged version of my patch "nvmet: don't report 0-bytes in serial number" fails to remove two lines which should have been replaced, so that the space-padded strings are overwritten again with 0-bytes. Fix it. Fixes: 42de82a8b544 nvmet: don't report 0-bytes in serial number Signed-off-by: Martin Wilck Reviewed-by: Sagi Grimberg Signed-off-by: Christoph Hellwig --- drivers/nvme/target/admin-cmd.c | 6 ------ 1 file changed, 6 deletions(-) diff --git a/drivers/nvme/target/admin-cmd.c b/drivers/nvme/target/admin-cmd.c index 2d7a98ab53fb..a53bb6635b83 100644 --- a/drivers/nvme/target/admin-cmd.c +++ b/drivers/nvme/target/admin-cmd.c @@ -199,12 +199,6 @@ static void nvmet_execute_identify_ctrl(struct nvmet_req *req) copy_and_pad(id->mn, sizeof(id->mn), model, sizeof(model) - 1); copy_and_pad(id->fr, sizeof(id->fr), UTS_RELEASE, strlen(UTS_RELEASE)); - memset(id->mn, ' ', sizeof(id->mn)); - strncpy((char *)id->mn, "Linux", sizeof(id->mn)); - - memset(id->fr, ' ', sizeof(id->fr)); - strncpy((char *)id->fr, UTS_RELEASE, sizeof(id->fr)); - id->rab = 6; /* -- cgit v1.2.3 From 16a5a480f067f945fd27bf91ffdce3f959b0d4b6 Mon Sep 17 00:00:00 2001 From: James Smart Date: Mon, 14 Aug 2017 11:20:32 -0700 Subject: nvmet-fc: correct use after free on list teardown Use list_for_each_entry_safe to prevent list handling from referencing next pointers directly after list_del's Signed-off-by: James Smart Signed-off-by: Christoph Hellwig --- drivers/nvme/target/fc.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/nvme/target/fc.c b/drivers/nvme/target/fc.c index 1b7f2520a20d..b200f9aadd52 100644 --- a/drivers/nvme/target/fc.c +++ b/drivers/nvme/target/fc.c @@ -704,7 +704,7 @@ nvmet_fc_delete_target_queue(struct nvmet_fc_tgt_queue *queue) { struct nvmet_fc_tgtport *tgtport = queue->assoc->tgtport; struct nvmet_fc_fcp_iod *fod = queue->fod; - struct nvmet_fc_defer_fcp_req *deferfcp; + struct nvmet_fc_defer_fcp_req *deferfcp, *tempptr; unsigned long flags; int i, writedataactive; bool disconnect; @@ -735,7 +735,8 @@ nvmet_fc_delete_target_queue(struct nvmet_fc_tgt_queue *queue) } /* Cleanup defer'ed IOs in queue */ - list_for_each_entry(deferfcp, &queue->avail_defer_list, req_list) { + list_for_each_entry_safe(deferfcp, tempptr, &queue->avail_defer_list, + req_list) { list_del(&deferfcp->req_list); kfree(deferfcp); } -- cgit v1.2.3 From 5a69aec945d27e78abac9fd032533d3aaebf7c1e Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Wed, 16 Aug 2017 16:01:14 +1000 Subject: powerpc: Fix VSX enabling/flushing to also test MSR_FP and MSR_VEC VSX uses a combination of the old vector registers, the old FP registers and new "second halves" of the FP registers. Thus when we need to see the VSX state in the thread struct (flush_vsx_to_thread()) or when we'll use the VSX in the kernel (enable_kernel_vsx()) we need to ensure they are all flushed into the thread struct if either of them is individually enabled. Unfortunately we only tested if the whole VSX was enabled, not if they were individually enabled. Fixes: 72cd7b44bc99 ("powerpc: Uncomment and make enable_kernel_vsx() routine available") Cc: stable@vger.kernel.org # v4.3+ Signed-off-by: Benjamin Herrenschmidt Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/process.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index ec480966f9bf..1f0fd361e09b 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -362,7 +362,8 @@ void enable_kernel_vsx(void) cpumsr = msr_check_and_set(MSR_FP|MSR_VEC|MSR_VSX); - if (current->thread.regs && (current->thread.regs->msr & MSR_VSX)) { + if (current->thread.regs && + (current->thread.regs->msr & (MSR_VSX|MSR_VEC|MSR_FP))) { check_if_tm_restore_required(current); /* * If a thread has already been reclaimed then the @@ -386,7 +387,7 @@ void flush_vsx_to_thread(struct task_struct *tsk) { if (tsk->thread.regs) { preempt_disable(); - if (tsk->thread.regs->msr & MSR_VSX) { + if (tsk->thread.regs->msr & (MSR_VSX|MSR_VEC|MSR_FP)) { BUG_ON(tsk != current); giveup_vsx(tsk); } -- cgit v1.2.3 From a7d2e03928c1936004750c56faf7534c8534f875 Mon Sep 17 00:00:00 2001 From: Bryan Tan Date: Thu, 10 Aug 2017 12:05:02 -0700 Subject: RDMA/vmw_pvrdma: Report CQ missed events There is a chance of a race between arming the CQ and receiving completions. By reporting CQ missed events any ULPs should poll again to get the completions. Fixes: 29c8d9eba550 ("IB: Add vmw_pvrdma driver") Acked-by: Aditya Sarwade Signed-off-by: Bryan Tan Signed-off-by: Adit Ranadive Reviewed-by: Yuval Shaia Signed-off-by: Doug Ledford --- drivers/infiniband/hw/vmw_pvrdma/pvrdma_cq.c | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_cq.c b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_cq.c index 69bda611d313..90aa326fd7c0 100644 --- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_cq.c +++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_cq.c @@ -65,13 +65,28 @@ int pvrdma_req_notify_cq(struct ib_cq *ibcq, struct pvrdma_dev *dev = to_vdev(ibcq->device); struct pvrdma_cq *cq = to_vcq(ibcq); u32 val = cq->cq_handle; + unsigned long flags; + int has_data = 0; val |= (notify_flags & IB_CQ_SOLICITED_MASK) == IB_CQ_SOLICITED ? PVRDMA_UAR_CQ_ARM_SOL : PVRDMA_UAR_CQ_ARM; + spin_lock_irqsave(&cq->cq_lock, flags); + pvrdma_write_uar_cq(dev, val); - return 0; + if (notify_flags & IB_CQ_REPORT_MISSED_EVENTS) { + unsigned int head; + + has_data = pvrdma_idx_ring_has_data(&cq->ring_state->rx, + cq->ibcq.cqe, &head); + if (unlikely(has_data == PVRDMA_INVALID_IDX)) + dev_err(&dev->pdev->dev, "CQ ring state invalid\n"); + } + + spin_unlock_irqrestore(&cq->cq_lock, flags); + + return has_data; } /** -- cgit v1.2.3 From f67ace2d8868d06710ceea1b10b124eead5040da Mon Sep 17 00:00:00 2001 From: Chien Tin Tung Date: Tue, 8 Aug 2017 20:38:43 -0500 Subject: i40iw: Fix parsing of query/commit FPM buffers Parsing of commit/query Host Memory Cache Function Private Memory is not skipping over reserved fields and incorrectly assigning those values into object's base/cnt/max_cnt fields. Skip over reserved fields and set correct values. Also correct memory alignment requirement for commit/query FPM buffers. Signed-off-by: Chien Tin Tung Signed-off-by: Shiraz Saleem Signed-off-by: Christopher N Bednarz Signed-off-by: Henry Orosco Signed-off-by: Doug Ledford --- drivers/infiniband/hw/i40iw/i40iw_ctrl.c | 121 +++++++++++++++++++++---------- drivers/infiniband/hw/i40iw/i40iw_d.h | 4 +- 2 files changed, 83 insertions(+), 42 deletions(-) diff --git a/drivers/infiniband/hw/i40iw/i40iw_ctrl.c b/drivers/infiniband/hw/i40iw/i40iw_ctrl.c index 9ec1ae9a82c9..ef4a73cd1710 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_ctrl.c +++ b/drivers/infiniband/hw/i40iw/i40iw_ctrl.c @@ -130,20 +130,32 @@ static enum i40iw_status_code i40iw_sc_parse_fpm_commit_buf( u64 base = 0; u32 i, j; u32 k = 0; - u32 low; /* copy base values in obj_info */ - for (i = I40IW_HMC_IW_QP, j = 0; - i <= I40IW_HMC_IW_PBLE; i++, j += 8) { + for (i = I40IW_HMC_IW_QP, j = 0; i <= I40IW_HMC_IW_PBLE; i++, j += 8) { + if ((i == I40IW_HMC_IW_SRQ) || + (i == I40IW_HMC_IW_FSIMC) || + (i == I40IW_HMC_IW_FSIAV)) { + info[i].base = 0; + info[i].cnt = 0; + continue; + } get_64bit_val(buf, j, &temp); info[i].base = RS_64_1(temp, 32) * 512; if (info[i].base > base) { base = info[i].base; k = i; } - low = (u32)(temp); - if (low) - info[i].cnt = low; + if (i == I40IW_HMC_IW_APBVT_ENTRY) { + info[i].cnt = 1; + continue; + } + if (i == I40IW_HMC_IW_QP) + info[i].cnt = (u32)RS_64(temp, I40IW_QUERY_FPM_MAX_QPS); + else if (i == I40IW_HMC_IW_CQ) + info[i].cnt = (u32)RS_64(temp, I40IW_QUERY_FPM_MAX_CQS); + else + info[i].cnt = (u32)(temp); } size = info[k].cnt * info[k].size + info[k].base; if (size & 0x1FFFFF) @@ -154,6 +166,31 @@ static enum i40iw_status_code i40iw_sc_parse_fpm_commit_buf( return 0; } +/** + * i40iw_sc_decode_fpm_query() - Decode a 64 bit value into max count and size + * @buf: ptr to fpm query buffer + * @buf_idx: index into buf + * @info: ptr to i40iw_hmc_obj_info struct + * @rsrc_idx: resource index into info + * + * Decode a 64 bit value from fpm query buffer into max count and size + */ +static u64 i40iw_sc_decode_fpm_query(u64 *buf, + u32 buf_idx, + struct i40iw_hmc_obj_info *obj_info, + u32 rsrc_idx) +{ + u64 temp; + u32 size; + + get_64bit_val(buf, buf_idx, &temp); + obj_info[rsrc_idx].max_cnt = (u32)temp; + size = (u32)RS_64_1(temp, 32); + obj_info[rsrc_idx].size = LS_64_1(1, size); + + return temp; +} + /** * i40iw_sc_parse_fpm_query_buf() - parses fpm query buffer * @buf: ptr to fpm query buffer @@ -168,9 +205,9 @@ static enum i40iw_status_code i40iw_sc_parse_fpm_query_buf( struct i40iw_hmc_info *hmc_info, struct i40iw_hmc_fpm_misc *hmc_fpm_misc) { - u64 temp; struct i40iw_hmc_obj_info *obj_info; - u32 i, j, size; + u64 temp; + u32 size; u16 max_pe_sds; obj_info = hmc_info->hmc_obj; @@ -185,41 +222,52 @@ static enum i40iw_status_code i40iw_sc_parse_fpm_query_buf( hmc_fpm_misc->max_sds = max_pe_sds; hmc_info->sd_table.sd_cnt = max_pe_sds + hmc_info->first_sd_index; - for (i = I40IW_HMC_IW_QP, j = 8; - i <= I40IW_HMC_IW_ARP; i++, j += 8) { - get_64bit_val(buf, j, &temp); - if (i == I40IW_HMC_IW_QP) - obj_info[i].max_cnt = (u32)RS_64(temp, I40IW_QUERY_FPM_MAX_QPS); - else if (i == I40IW_HMC_IW_CQ) - obj_info[i].max_cnt = (u32)RS_64(temp, I40IW_QUERY_FPM_MAX_CQS); - else - obj_info[i].max_cnt = (u32)temp; + get_64bit_val(buf, 8, &temp); + obj_info[I40IW_HMC_IW_QP].max_cnt = (u32)RS_64(temp, I40IW_QUERY_FPM_MAX_QPS); + size = (u32)RS_64_1(temp, 32); + obj_info[I40IW_HMC_IW_QP].size = LS_64_1(1, size); - size = (u32)RS_64_1(temp, 32); - obj_info[i].size = ((u64)1 << size); - } - for (i = I40IW_HMC_IW_MR, j = 48; - i <= I40IW_HMC_IW_PBLE; i++, j += 8) { - get_64bit_val(buf, j, &temp); - obj_info[i].max_cnt = (u32)temp; - size = (u32)RS_64_1(temp, 32); - obj_info[i].size = LS_64_1(1, size); - } + get_64bit_val(buf, 16, &temp); + obj_info[I40IW_HMC_IW_CQ].max_cnt = (u32)RS_64(temp, I40IW_QUERY_FPM_MAX_CQS); + size = (u32)RS_64_1(temp, 32); + obj_info[I40IW_HMC_IW_CQ].size = LS_64_1(1, size); + + i40iw_sc_decode_fpm_query(buf, 32, obj_info, I40IW_HMC_IW_HTE); + i40iw_sc_decode_fpm_query(buf, 40, obj_info, I40IW_HMC_IW_ARP); + + obj_info[I40IW_HMC_IW_APBVT_ENTRY].size = 8192; + obj_info[I40IW_HMC_IW_APBVT_ENTRY].max_cnt = 1; + + i40iw_sc_decode_fpm_query(buf, 48, obj_info, I40IW_HMC_IW_MR); + i40iw_sc_decode_fpm_query(buf, 56, obj_info, I40IW_HMC_IW_XF); - get_64bit_val(buf, 120, &temp); - hmc_fpm_misc->max_ceqs = (u8)RS_64(temp, I40IW_QUERY_FPM_MAX_CEQS); - get_64bit_val(buf, 120, &temp); - hmc_fpm_misc->ht_multiplier = RS_64(temp, I40IW_QUERY_FPM_HTMULTIPLIER); - get_64bit_val(buf, 120, &temp); - hmc_fpm_misc->timer_bucket = RS_64(temp, I40IW_QUERY_FPM_TIMERBUCKET); get_64bit_val(buf, 64, &temp); + obj_info[I40IW_HMC_IW_XFFL].max_cnt = (u32)temp; + obj_info[I40IW_HMC_IW_XFFL].size = 4; hmc_fpm_misc->xf_block_size = RS_64(temp, I40IW_QUERY_FPM_XFBLOCKSIZE); if (!hmc_fpm_misc->xf_block_size) return I40IW_ERR_INVALID_SIZE; + + i40iw_sc_decode_fpm_query(buf, 72, obj_info, I40IW_HMC_IW_Q1); + get_64bit_val(buf, 80, &temp); + obj_info[I40IW_HMC_IW_Q1FL].max_cnt = (u32)temp; + obj_info[I40IW_HMC_IW_Q1FL].size = 4; hmc_fpm_misc->q1_block_size = RS_64(temp, I40IW_QUERY_FPM_Q1BLOCKSIZE); if (!hmc_fpm_misc->q1_block_size) return I40IW_ERR_INVALID_SIZE; + + i40iw_sc_decode_fpm_query(buf, 88, obj_info, I40IW_HMC_IW_TIMER); + + get_64bit_val(buf, 112, &temp); + obj_info[I40IW_HMC_IW_PBLE].max_cnt = (u32)temp; + obj_info[I40IW_HMC_IW_PBLE].size = 8; + + get_64bit_val(buf, 120, &temp); + hmc_fpm_misc->max_ceqs = (u8)RS_64(temp, I40IW_QUERY_FPM_MAX_CEQS); + hmc_fpm_misc->ht_multiplier = RS_64(temp, I40IW_QUERY_FPM_HTMULTIPLIER); + hmc_fpm_misc->timer_bucket = RS_64(temp, I40IW_QUERY_FPM_TIMERBUCKET); + return 0; } @@ -3392,13 +3440,6 @@ enum i40iw_status_code i40iw_sc_init_iw_hmc(struct i40iw_sc_dev *dev, u8 hmc_fn_ hmc_info->sd_table.sd_entry = virt_mem.va; } - /* fill size of objects which are fixed */ - hmc_info->hmc_obj[I40IW_HMC_IW_XFFL].size = 4; - hmc_info->hmc_obj[I40IW_HMC_IW_Q1FL].size = 4; - hmc_info->hmc_obj[I40IW_HMC_IW_PBLE].size = 8; - hmc_info->hmc_obj[I40IW_HMC_IW_APBVT_ENTRY].size = 8192; - hmc_info->hmc_obj[I40IW_HMC_IW_APBVT_ENTRY].max_cnt = 1; - return ret_code; } diff --git a/drivers/infiniband/hw/i40iw/i40iw_d.h b/drivers/infiniband/hw/i40iw/i40iw_d.h index a39ac12b6a7e..2ebaadbed379 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_d.h +++ b/drivers/infiniband/hw/i40iw/i40iw_d.h @@ -1507,8 +1507,8 @@ enum { I40IW_CQ0_ALIGNMENT_MASK = (256 - 1), I40IW_HOST_CTX_ALIGNMENT_MASK = (4 - 1), I40IW_SHADOWAREA_MASK = (128 - 1), - I40IW_FPM_QUERY_BUF_ALIGNMENT_MASK = 0, - I40IW_FPM_COMMIT_BUF_ALIGNMENT_MASK = 0 + I40IW_FPM_QUERY_BUF_ALIGNMENT_MASK = (4 - 1), + I40IW_FPM_COMMIT_BUF_ALIGNMENT_MASK = (4 - 1) }; enum i40iw_alignment { -- cgit v1.2.3 From 8129331f01a683ed8d9a9a65ed01b5c6ad26403a Mon Sep 17 00:00:00 2001 From: Mustafa Ismail Date: Tue, 8 Aug 2017 20:38:44 -0500 Subject: i40iw: Correct variable names Fix incorrect naming of status code and struct. Use inline instead of immediate. Signed-off-by: Mustafa Ismail Signed-off-by: Henry Orosco Signed-off-by: Doug Ledford --- drivers/infiniband/hw/i40iw/i40iw_status.h | 2 +- drivers/infiniband/hw/i40iw/i40iw_uk.c | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/infiniband/hw/i40iw/i40iw_status.h b/drivers/infiniband/hw/i40iw/i40iw_status.h index 91c421762f06..f7013f11d808 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_status.h +++ b/drivers/infiniband/hw/i40iw/i40iw_status.h @@ -62,7 +62,7 @@ enum i40iw_status_code { I40IW_ERR_INVALID_ALIGNMENT = -23, I40IW_ERR_FLUSHED_QUEUE = -24, I40IW_ERR_INVALID_PUSH_PAGE_INDEX = -25, - I40IW_ERR_INVALID_IMM_DATA_SIZE = -26, + I40IW_ERR_INVALID_INLINE_DATA_SIZE = -26, I40IW_ERR_TIMEOUT = -27, I40IW_ERR_OPCODE_MISMATCH = -28, I40IW_ERR_CQP_COMPL_ERROR = -29, diff --git a/drivers/infiniband/hw/i40iw/i40iw_uk.c b/drivers/infiniband/hw/i40iw/i40iw_uk.c index b0d3a0e8a9b5..70a6b41980fa 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_uk.c +++ b/drivers/infiniband/hw/i40iw/i40iw_uk.c @@ -435,7 +435,7 @@ static enum i40iw_status_code i40iw_inline_rdma_write(struct i40iw_qp_uk *qp, op_info = &info->op.inline_rdma_write; if (op_info->len > I40IW_MAX_INLINE_DATA_SIZE) - return I40IW_ERR_INVALID_IMM_DATA_SIZE; + return I40IW_ERR_INVALID_INLINE_DATA_SIZE; ret_code = i40iw_inline_data_size_to_wqesize(op_info->len, &wqe_size); if (ret_code) @@ -511,7 +511,7 @@ static enum i40iw_status_code i40iw_inline_send(struct i40iw_qp_uk *qp, op_info = &info->op.inline_send; if (op_info->len > I40IW_MAX_INLINE_DATA_SIZE) - return I40IW_ERR_INVALID_IMM_DATA_SIZE; + return I40IW_ERR_INVALID_INLINE_DATA_SIZE; ret_code = i40iw_inline_data_size_to_wqesize(op_info->len, &wqe_size); if (ret_code) @@ -1187,7 +1187,7 @@ enum i40iw_status_code i40iw_inline_data_size_to_wqesize(u32 data_size, u8 *wqe_size) { if (data_size > I40IW_MAX_INLINE_DATA_SIZE) - return I40IW_ERR_INVALID_IMM_DATA_SIZE; + return I40IW_ERR_INVALID_INLINE_DATA_SIZE; if (data_size <= 16) *wqe_size = I40IW_QP_WQE_MIN_SIZE; -- cgit v1.2.3 From 29c2415a6669bab354f0aa3445777fe147c7a05d Mon Sep 17 00:00:00 2001 From: Mustafa Ismail Date: Tue, 8 Aug 2017 20:38:46 -0500 Subject: i40iw: Fix typecast of tcp_seq_num The typecast of tcp_seq_num incorrectly uses u8. Fix by casting to u32. Signed-off-by: Mustafa Ismail Signed-off-by: Henry Orosco Signed-off-by: Doug Ledford --- drivers/infiniband/hw/i40iw/i40iw_uk.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/i40iw/i40iw_uk.c b/drivers/infiniband/hw/i40iw/i40iw_uk.c index 70a6b41980fa..1060725d18bc 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_uk.c +++ b/drivers/infiniband/hw/i40iw/i40iw_uk.c @@ -784,7 +784,7 @@ static enum i40iw_status_code i40iw_cq_poll_completion(struct i40iw_cq_uk *cq, get_64bit_val(cqe, 0, &qword0); get_64bit_val(cqe, 16, &qword2); - info->tcp_seq_num = (u8)RS_64(qword0, I40IWCQ_TCPSEQNUM); + info->tcp_seq_num = (u32)RS_64(qword0, I40IWCQ_TCPSEQNUM); info->qp_id = (u32)RS_64(qword2, I40IWCQ_QPID); -- cgit v1.2.3 From a28f047e5f9b987d614eeee34388087ffdda3e53 Mon Sep 17 00:00:00 2001 From: Christopher N Bednarz Date: Tue, 8 Aug 2017 20:38:47 -0500 Subject: i40iw: Use correct alignment for CQ0 memory Utilize correct alignment variable when allocating DMA memory for CQ0. Signed-off-by: Christopher N Bednarz Signed-off-by: Henry Orosco Signed-off-by: Doug Ledford --- drivers/infiniband/hw/i40iw/i40iw_puda.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/i40iw/i40iw_puda.c b/drivers/infiniband/hw/i40iw/i40iw_puda.c index 71050c5d29a0..7f5583d83622 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_puda.c +++ b/drivers/infiniband/hw/i40iw/i40iw_puda.c @@ -685,7 +685,7 @@ static enum i40iw_status_code i40iw_puda_cq_create(struct i40iw_puda_rsrc *rsrc) cqsize = rsrc->cq_size * (sizeof(struct i40iw_cqe)); tsize = cqsize + sizeof(struct i40iw_cq_shadow_area); ret = i40iw_allocate_dma_mem(dev->hw, &rsrc->cqmem, tsize, - I40IW_CQ0_ALIGNMENT_MASK); + I40IW_CQ0_ALIGNMENT); if (ret) return ret; -- cgit v1.2.3 From aa939c12ab8a0c094420ad1b909a957ac590e43e Mon Sep 17 00:00:00 2001 From: Christopher N Bednarz Date: Tue, 8 Aug 2017 20:38:48 -0500 Subject: i40iw: Fix potential fcn_id_array out of bounds Avoid out of bounds error by utilizing I40IW_MAX_STATS_COUNT instead of I40IW_INVALID_FCN_ID. Signed-off-by: Christopher N Bednarz Signed-off-by: Henry Orosco Signed-off-by: Doug Ledford --- drivers/infiniband/hw/i40iw/i40iw_ctrl.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/i40iw/i40iw_ctrl.c b/drivers/infiniband/hw/i40iw/i40iw_ctrl.c index ef4a73cd1710..a49ff2eb6fb3 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_ctrl.c +++ b/drivers/infiniband/hw/i40iw/i40iw_ctrl.c @@ -4881,7 +4881,7 @@ void i40iw_vsi_stats_free(struct i40iw_sc_vsi *vsi) { u8 fcn_id = vsi->fcn_id; - if ((vsi->stats_fcn_id_alloc) && (fcn_id != I40IW_INVALID_FCN_ID)) + if (vsi->stats_fcn_id_alloc && fcn_id < I40IW_MAX_STATS_COUNT) vsi->dev->fcn_id_array[fcn_id] = false; i40iw_hw_stats_stop_timer(vsi); } -- cgit v1.2.3 From 5b59a3969e95cd9be3699ecf7149ae8ef103b6f5 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Tue, 8 Aug 2017 18:41:02 +0100 Subject: IB/hns: fix memory leak on ah on error return path When dmac is NULL, ah is not being freed on the error return path. Fix this by kfree'ing it. Detected by CoverityScan, CID#1452636 ("Resource Leak") Fixes: d8966fcd4c25 ("IB/core: Use rdma_ah_attr accessor functions") Signed-off-by: Colin Ian King Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hns/hns_roce_ah.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/hns/hns_roce_ah.c b/drivers/infiniband/hw/hns/hns_roce_ah.c index f78a733a63ec..d545302b8ef8 100644 --- a/drivers/infiniband/hw/hns/hns_roce_ah.c +++ b/drivers/infiniband/hw/hns/hns_roce_ah.c @@ -64,8 +64,10 @@ struct ib_ah *hns_roce_create_ah(struct ib_pd *ibpd, } else { u8 *dmac = rdma_ah_retrieve_dmac(ah_attr); - if (!dmac) + if (!dmac) { + kfree(ah); return ERR_PTR(-EINVAL); + } memcpy(ah->av.mac, dmac, ETH_ALEN); } -- cgit v1.2.3 From d4ba61d218822578dcf6c2453a38e000b0ea01e6 Mon Sep 17 00:00:00 2001 From: Steve Wise Date: Tue, 25 Jul 2017 06:51:15 -0700 Subject: iw_cxgb4: fix misuse of integer variable Fixes: ee30f7d507c0 ("iw_cxgb4: Max fastreg depth depends on DSGL support") Signed-off-by: Steve Wise Signed-off-by: Doug Ledford --- drivers/infiniband/hw/cxgb4/mem.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/cxgb4/mem.c b/drivers/infiniband/hw/cxgb4/mem.c index 5332f06b99ba..c2fba76becd4 100644 --- a/drivers/infiniband/hw/cxgb4/mem.c +++ b/drivers/infiniband/hw/cxgb4/mem.c @@ -661,7 +661,7 @@ struct ib_mr *c4iw_alloc_mr(struct ib_pd *pd, rhp = php->rhp; if (mr_type != IB_MR_TYPE_MEM_REG || - max_num_sg > t4_max_fr_depth(&rhp->rdev.lldi.ulptx_memwrite_dsgl && + max_num_sg > t4_max_fr_depth(rhp->rdev.lldi.ulptx_memwrite_dsgl && use_dsgl)) return ERR_PTR(-EINVAL); -- cgit v1.2.3 From 06f8174a97822f6befd28fc2dd315b43b82c700f Mon Sep 17 00:00:00 2001 From: Shiraz Saleem Date: Mon, 17 Jul 2017 14:03:50 -0500 Subject: IB/core: Protect sysfs entry on ib_unregister_device ib_unregister_device is not protecting removal of sysfs entries. A call to ib_register_device in that window can result in duplicate sysfs entry warning. Move mutex_unlock to after ib_device_unregister_sysfs to protect against sysfs entry creation. This issue is exposed during driver load/unload stress test. WARNING: CPU: 5 PID: 4445 at fs/sysfs/dir.c:31 sysfs_warn_dup+0x5f/0x70 sysfs: cannot create duplicate filename '/class/infiniband/i40iw0' Hardware name: Gigabyte Technology Co., Ltd. To be filled by O.E.M./Q87M-D2H BIOS F7 01/17/2014 Workqueue: i40e i40e_service_task [i40e] Call Trace: dump_stack+0x67/0x98 __warn+0xcc/0xf0 warn_slowpath_fmt+0x4a/0x50 ? kernfs_path_from_node+0x4b/0x60 sysfs_warn_dup+0x5f/0x70 sysfs_do_create_link_sd.isra.2+0xb7/0xc0 sysfs_create_link+0x20/0x40 device_add+0x28c/0x600 ib_device_register_sysfs+0x58/0x170 [ib_core] ib_register_device+0x325/0x570 [ib_core] ? i40iw_register_rdma_device+0x1f4/0x400 [i40iw] ? kmem_cache_alloc_trace+0x143/0x330 ? __raw_spin_lock_init+0x2d/0x50 i40iw_register_rdma_device+0x2dc/0x400 [i40iw] i40iw_open+0x10a6/0x1950 [i40iw] ? i40iw_open+0xeab/0x1950 [i40iw] ? i40iw_make_cm_node+0x9c0/0x9c0 [i40iw] i40e_client_subtask+0xa4/0x110 [i40e] i40e_service_task+0xc2d/0x1320 [i40e] process_one_work+0x203/0x710 ? process_one_work+0x16f/0x710 worker_thread+0x126/0x4a0 ? trace_hardirqs_on+0xd/0x10 kthread+0x112/0x150 ? process_one_work+0x710/0x710 ? kthread_create_on_node+0x40/0x40 ret_from_fork+0x2e/0x40 ---[ end trace fd11b69e21ea7653 ]--- Couldn't register device i40iw0 with driver model Signed-off-by: Shiraz Saleem Signed-off-by: Sindhu Devale Signed-off-by: Doug Ledford --- drivers/infiniband/core/device.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c index a5dfab6adf49..221468f77128 100644 --- a/drivers/infiniband/core/device.c +++ b/drivers/infiniband/core/device.c @@ -537,10 +537,11 @@ void ib_unregister_device(struct ib_device *device) } up_read(&lists_rwsem); - mutex_unlock(&device_mutex); - ib_device_unregister_rdmacg(device); ib_device_unregister_sysfs(device); + + mutex_unlock(&device_mutex); + ib_cache_cleanup_one(device); ib_security_destroy_port_pkey_list(device); -- cgit v1.2.3 From 870201f95fcbd19538aef630393fe9d583eff82e Mon Sep 17 00:00:00 2001 From: Maor Gottlieb Date: Wed, 16 Aug 2017 18:57:04 +0300 Subject: IB/uverbs: Fix NULL pointer dereference during device removal As part of ib_uverbs_remove_one which might be triggered upon reset flow, we trigger IB_EVENT_DEVICE_FATAL event to userspace application. If device was removed after uverbs fd was opened but before ib_uverbs_get_context was called, the event file will be accessed before it was allocated, result in NULL pointer dereference: [ 72.325873] BUG: unable to handle kernel NULL pointer dereference at (null) ... [ 72.325984] IP: _raw_spin_lock_irqsave+0x22/0x40 [ 72.327123] Call Trace: [ 72.327168] ib_uverbs_async_handler.isra.8+0x2e/0x160 [ib_uverbs] [ 72.327216] ? synchronize_srcu_expedited+0x27/0x30 [ 72.327269] ib_uverbs_remove_one+0x120/0x2c0 [ib_uverbs] [ 72.327330] ib_unregister_device+0xd0/0x180 [ib_core] [ 72.327373] mlx5_ib_remove+0x74/0x140 [mlx5_ib] [ 72.327422] mlx5_remove_device+0xfb/0x110 [mlx5_core] [ 72.327466] mlx5_unregister_interface+0x3c/0xa0 [mlx5_core] [ 72.327509] mlx5_ib_cleanup+0x10/0x962 [mlx5_ib] [ 72.327546] SyS_delete_module+0x155/0x230 [ 72.328472] ? exit_to_usermode_loop+0x70/0xa6 [ 72.329370] do_syscall_64+0x54/0xc0 [ 72.330262] entry_SYSCALL64_slow_path+0x25/0x25 Fix it by checking that user context was allocated before trigger the event. Fixes: 036b10635739 ('IB/uverbs: Enable device removal when there are active user space applications') Signed-off-by: Maor Gottlieb Reviewed-by: Matan Barak Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/core/uverbs_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/core/uverbs_main.c b/drivers/infiniband/core/uverbs_main.c index c023e2c81b8f..5e530d2bee44 100644 --- a/drivers/infiniband/core/uverbs_main.c +++ b/drivers/infiniband/core/uverbs_main.c @@ -1153,7 +1153,6 @@ static void ib_uverbs_free_hw_resources(struct ib_uverbs_device *uverbs_dev, kref_get(&file->ref); mutex_unlock(&uverbs_dev->lists_mutex); - ib_uverbs_event_handler(&file->event_handler, &event); mutex_lock(&file->cleanup_mutex); ucontext = file->ucontext; @@ -1170,6 +1169,7 @@ static void ib_uverbs_free_hw_resources(struct ib_uverbs_device *uverbs_dev, * for example due to freeing the resources * (e.g mmput). */ + ib_uverbs_event_handler(&file->event_handler, &event); ib_dev->disassociate_ucontext(ucontext); mutex_lock(&file->cleanup_mutex); ib_uverbs_cleanup_ucontext(file, ucontext, true); -- cgit v1.2.3 From 47ac5484fd961420e5ec0bb5b972fde381f57365 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Tue, 15 Aug 2017 17:39:52 +0200 Subject: x86: Fix norandmaps/ADDR_NO_RANDOMIZE Documentation/admin-guide/kernel-parameters.txt says: norandmaps Don't use address space randomization. Equivalent to echo 0 > /proc/sys/kernel/randomize_va_space but it doesn't work because arch_rnd() which is used to randomize mm->mmap_base returns a random value unconditionally. And as Kirill pointed out, ADDR_NO_RANDOMIZE is broken by the same reason. Just shift the PF_RANDOMIZE check from arch_mmap_rnd() to arch_rnd(). Fixes: 1b028f784e8c ("x86/mm: Introduce mmap_compat_base() for 32-bit mmap()") Signed-off-by: Oleg Nesterov Signed-off-by: Thomas Gleixner Acked-by: Kirill A. Shutemov Acked-by: Cyrill Gorcunov Reviewed-by: Dmitry Safonov Cc: stable@vger.kernel.org Cc: Andy Lutomirski Cc: Andrew Morton Cc: Borislav Petkov Cc: Linus Torvalds Link: http://lkml.kernel.org/r/20170815153952.GA1076@redhat.com --- arch/x86/mm/mmap.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/x86/mm/mmap.c b/arch/x86/mm/mmap.c index 229d04a83f85..c94df122815a 100644 --- a/arch/x86/mm/mmap.c +++ b/arch/x86/mm/mmap.c @@ -79,13 +79,13 @@ static int mmap_is_legacy(void) static unsigned long arch_rnd(unsigned int rndbits) { + if (!(current->flags & PF_RANDOMIZE)) + return 0; return (get_random_long() & ((1UL << rndbits) - 1)) << PAGE_SHIFT; } unsigned long arch_mmap_rnd(void) { - if (!(current->flags & PF_RANDOMIZE)) - return 0; return arch_rnd(mmap_is_ia32() ? mmap32_rnd_bits : mmap64_rnd_bits); } -- cgit v1.2.3 From 01578e36163cdd0e4fd61d9976de15f13364e26d Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Tue, 15 Aug 2017 17:40:11 +0200 Subject: x86/elf: Remove the unnecessary ADDR_NO_RANDOMIZE checks The ADDR_NO_RANDOMIZE checks in stack_maxrandom_size() and randomize_stack_top() are not required. PF_RANDOMIZE is set by load_elf_binary() only if ADDR_NO_RANDOMIZE is not set, no need to re-check after that. Signed-off-by: Oleg Nesterov Signed-off-by: Thomas Gleixner Reviewed-by: Dmitry Safonov Cc: stable@vger.kernel.org Cc: Andy Lutomirski Cc: Andrew Morton Cc: Borislav Petkov Cc: Linus Torvalds Cc: "Kirill A. Shutemov" Link: http://lkml.kernel.org/r/20170815154011.GB1076@redhat.com --- arch/x86/mm/mmap.c | 3 +-- fs/binfmt_elf.c | 3 +-- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/arch/x86/mm/mmap.c b/arch/x86/mm/mmap.c index c94df122815a..a88cfbfbd078 100644 --- a/arch/x86/mm/mmap.c +++ b/arch/x86/mm/mmap.c @@ -50,8 +50,7 @@ unsigned long tasksize_64bit(void) static unsigned long stack_maxrandom_size(unsigned long task_size) { unsigned long max = 0; - if ((current->flags & PF_RANDOMIZE) && - !(current->personality & ADDR_NO_RANDOMIZE)) { + if (current->flags & PF_RANDOMIZE) { max = (-1UL) & __STACK_RND_MASK(task_size == tasksize_32bit()); max <<= PAGE_SHIFT; } diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c index 879ff9c7ffd0..6466153f2bf0 100644 --- a/fs/binfmt_elf.c +++ b/fs/binfmt_elf.c @@ -664,8 +664,7 @@ static unsigned long randomize_stack_top(unsigned long stack_top) { unsigned long random_variable = 0; - if ((current->flags & PF_RANDOMIZE) && - !(current->personality & ADDR_NO_RANDOMIZE)) { + if (current->flags & PF_RANDOMIZE) { random_variable = get_random_long(); random_variable &= STACK_RND_MASK; random_variable <<= PAGE_SHIFT; -- cgit v1.2.3 From d6957f3396d0b1ee54d183524550d791054b5ebe Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Tue, 15 Aug 2017 11:34:19 +0200 Subject: printk-formats.txt: Better describe the difference between %pS and %pF Sometimes people seems unclear when to use the %pS or %pF printk format. For example, see commit 51d96dc2e2dc ("random: fix warning message on ia64 and parisc") which fixed such a wrong format string. The documentation should be more clear about the difference. Signed-off-by: Helge Deller [pmladek@suse.com: Restructure the entire section] Signed-off-by: Petr Mladek Reviewed-by: Sergey Senozhatsky Signed-off-by: Helge Deller --- Documentation/printk-formats.txt | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/Documentation/printk-formats.txt b/Documentation/printk-formats.txt index 65ea5915178b..074670b98bac 100644 --- a/Documentation/printk-formats.txt +++ b/Documentation/printk-formats.txt @@ -58,20 +58,23 @@ Symbols/Function Pointers %ps versatile_init %pB prev_fn_of_versatile_init+0x88/0x88 -For printing symbols and function pointers. The ``S`` and ``s`` specifiers -result in the symbol name with (``S``) or without (``s``) offsets. Where -this is used on a kernel without KALLSYMS - the symbol address is -printed instead. +The ``F`` and ``f`` specifiers are for printing function pointers, +for example, f->func, &gettimeofday. They have the same result as +``S`` and ``s`` specifiers. But they do an extra conversion on +ia64, ppc64 and parisc64 architectures where the function pointers +are actually function descriptors. + +The ``S`` and ``s`` specifiers can be used for printing symbols +from direct addresses, for example, __builtin_return_address(0), +(void *)regs->ip. They result in the symbol name with (``S``) or +without (``s``) offsets. If KALLSYMS are disabled then the symbol +address is printed instead. The ``B`` specifier results in the symbol name with offsets and should be used when printing stack backtraces. The specifier takes into consideration the effect of compiler optimisations which may occur when tail-call``s are used and marked with the noreturn GCC attribute. -On ia64, ppc64 and parisc64 architectures function pointers are -actually function descriptors which must first be resolved. The ``F`` and -``f`` specifiers perform this resolution and then provide the same -functionality as the ``S`` and ``s`` specifiers. Kernel Pointers =============== -- cgit v1.2.3 From 369157b41cca435442cf5add9df209aaf951860d Mon Sep 17 00:00:00 2001 From: James Smart Date: Wed, 16 Aug 2017 10:47:03 -0700 Subject: nvmet-fc: eliminate incorrect static markers on local variables There were 2 statics introduced that were bogus. Removed the static designations. Signed-off-by: James Smart Signed-off-by: Christoph Hellwig --- drivers/nvme/target/fc.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/nvme/target/fc.c b/drivers/nvme/target/fc.c index b200f9aadd52..309c84aa7595 100644 --- a/drivers/nvme/target/fc.c +++ b/drivers/nvme/target/fc.c @@ -394,7 +394,7 @@ nvmet_fc_free_ls_iodlist(struct nvmet_fc_tgtport *tgtport) static struct nvmet_fc_ls_iod * nvmet_fc_alloc_ls_iod(struct nvmet_fc_tgtport *tgtport) { - static struct nvmet_fc_ls_iod *iod; + struct nvmet_fc_ls_iod *iod; unsigned long flags; spin_lock_irqsave(&tgtport->lock, flags); @@ -471,7 +471,7 @@ nvmet_fc_destroy_fcp_iodlist(struct nvmet_fc_tgtport *tgtport, static struct nvmet_fc_fcp_iod * nvmet_fc_alloc_fcp_iod(struct nvmet_fc_tgt_queue *queue) { - static struct nvmet_fc_fcp_iod *fod; + struct nvmet_fc_fcp_iod *fod; lockdep_assert_held(&queue->qlock); -- cgit v1.2.3 From 187e91fe5e915f4b7f39b824aa422493463e443d Mon Sep 17 00:00:00 2001 From: Alexander Potapenko Date: Wed, 16 Aug 2017 21:08:08 +0200 Subject: x86/boot/64/clang: Use fixup_pointer() to access 'next_early_pgt' __startup_64() is normally using fixup_pointer() to access globals in a position-independent fashion. However 'next_early_pgt' was accessed directly, which wasn't guaranteed to work. Luckily GCC was generating a R_X86_64_PC32 PC-relative relocation for 'next_early_pgt', but Clang emitted a R_X86_64_32S, which led to accessing invalid memory and rebooting the kernel. Signed-off-by: Alexander Potapenko Acked-by: Kirill A. Shutemov Cc: Dmitry Vyukov Cc: Kirill A. Shutemov Cc: Linus Torvalds Cc: Michael Davidson Cc: Peter Zijlstra Cc: Thomas Gleixner Fixes: c88d71508e36 ("x86/boot/64: Rewrite startup_64() in C") Link: http://lkml.kernel.org/r/20170816190808.131748-1-glider@google.com Signed-off-by: Ingo Molnar --- arch/x86/kernel/head64.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c index 46c3c73e7f43..9ba79543d9ee 100644 --- a/arch/x86/kernel/head64.c +++ b/arch/x86/kernel/head64.c @@ -53,6 +53,7 @@ void __head __startup_64(unsigned long physaddr) pudval_t *pud; pmdval_t *pmd, pmd_entry; int i; + unsigned int *next_pgt_ptr; /* Is the address too large? */ if (physaddr >> MAX_PHYSMEM_BITS) @@ -91,9 +92,9 @@ void __head __startup_64(unsigned long physaddr) * creates a bunch of nonsense entries but that is fine -- * it avoids problems around wraparound. */ - - pud = fixup_pointer(early_dynamic_pgts[next_early_pgt++], physaddr); - pmd = fixup_pointer(early_dynamic_pgts[next_early_pgt++], physaddr); + next_pgt_ptr = fixup_pointer(&next_early_pgt, physaddr); + pud = fixup_pointer(early_dynamic_pgts[(*next_pgt_ptr)++], physaddr); + pmd = fixup_pointer(early_dynamic_pgts[(*next_pgt_ptr)++], physaddr); if (IS_ENABLED(CONFIG_X86_5LEVEL)) { p4d = fixup_pointer(early_dynamic_pgts[next_early_pgt++], physaddr); -- cgit v1.2.3 From ee7b1f31200d9f3cc45e1bd22e962bd6b1d4d611 Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Fri, 11 Aug 2017 17:29:56 +0100 Subject: of: fix DMA mask generation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Historically, DMA masks have suffered some ambiguity between whether they represent the range of physical memory a device can access, or the address bits a device is capable of driving, particularly since on many platforms the two are equivalent. Whilst there are some stragglers left (dma_max_pfn(), I'm looking at you...), the majority of DMA code has been cleaned up to follow the latter definition, not least since it is the only one which makes sense once IOMMUs are involved. In this respect, of_dma_configure() has always done the wrong thing in how it generates initial masks based on "dma-ranges". Although rounding down did not affect the TI Keystone platform where dma_addr + size is already a power of two, in any other case it results in a mask which is at best unnecessarily constrained and at worst unusable. BCM2837 illustrates the problem nicely, where we have a DMA base of 3GB and a size of 1GB - 16MB, giving dma_addr + size = 0xff000000 and a resultant mask of 0x7fffffff, which is then insufficient to even cover the necessary offset, effectively making all DMA addresses out-of-range. This has been hidden until now (mostly because we don't yet prevent drivers from simply overwriting this initial mask later upon probe), but due to recent changes elsewhere now shows up as USB being broken on Raspberry Pi 3. Make it right by rounding up instead of down, such that the mask correctly correctly describes all possisble bits the device needs to emit. Fixes: 9a6d7298b083 ("of: Calculate device DMA masks based on DT dma-range size") Reported-by: Stefan Wahren Reported-by: Andreas Färber Reported-by: Hans Verkuil Signed-off-by: Robin Murphy Acked-by: Rob Herring Signed-off-by: Christoph Hellwig --- drivers/of/device.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/of/device.c b/drivers/of/device.c index 28c38c756f92..e0a28ea341fe 100644 --- a/drivers/of/device.c +++ b/drivers/of/device.c @@ -89,6 +89,7 @@ int of_dma_configure(struct device *dev, struct device_node *np) bool coherent; unsigned long offset; const struct iommu_ops *iommu; + u64 mask; /* * Set default coherent_dma_mask to 32 bit. Drivers are expected to @@ -134,10 +135,9 @@ int of_dma_configure(struct device *dev, struct device_node *np) * Limit coherent and dma mask based on size and default mask * set by the driver. */ - dev->coherent_dma_mask = min(dev->coherent_dma_mask, - DMA_BIT_MASK(ilog2(dma_addr + size))); - *dev->dma_mask = min((*dev->dma_mask), - DMA_BIT_MASK(ilog2(dma_addr + size))); + mask = DMA_BIT_MASK(ilog2(dma_addr + size - 1) + 1); + dev->coherent_dma_mask &= mask; + *dev->dma_mask &= mask; coherent = of_dma_is_coherent(np); dev_dbg(dev, "device is%sdma coherent\n", -- cgit v1.2.3 From 0f174b3525a43bd51f9397394763925e0ebe7bc7 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Wed, 16 Aug 2017 14:18:37 +0200 Subject: ALSA: usb-audio: Add mute TLV for playback volumes on C-Media devices C-Media devices (at least some models) mute the playback stream when volumes are set to the minimum value. But this isn't informed via TLV and the user-space, typically PulseAudio, gets confused as if it's still played in a low volume. This patch adds the new flag, min_mute, to struct usb_mixer_elem_info for indicating that the mixer element is with the minimum-mute volume. This flag is set for known C-Media devices in snd_usb_mixer_fu_apply_quirk() in turn. Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=196669 Cc: Signed-off-by: Takashi Iwai --- sound/usb/mixer.c | 2 ++ sound/usb/mixer.h | 1 + sound/usb/mixer_quirks.c | 6 ++++++ 3 files changed, 9 insertions(+) diff --git a/sound/usb/mixer.c b/sound/usb/mixer.c index 082736c539bc..e630813c5008 100644 --- a/sound/usb/mixer.c +++ b/sound/usb/mixer.c @@ -542,6 +542,8 @@ int snd_usb_mixer_vol_tlv(struct snd_kcontrol *kcontrol, int op_flag, if (size < sizeof(scale)) return -ENOMEM; + if (cval->min_mute) + scale[0] = SNDRV_CTL_TLVT_DB_MINMAX_MUTE; scale[2] = cval->dBmin; scale[3] = cval->dBmax; if (copy_to_user(_tlv, scale, sizeof(scale))) diff --git a/sound/usb/mixer.h b/sound/usb/mixer.h index 3417ef347e40..2b4b067646ab 100644 --- a/sound/usb/mixer.h +++ b/sound/usb/mixer.h @@ -64,6 +64,7 @@ struct usb_mixer_elem_info { int cached; int cache_val[MAX_CHANNELS]; u8 initialized; + u8 min_mute; void *private_data; }; diff --git a/sound/usb/mixer_quirks.c b/sound/usb/mixer_quirks.c index e3d1dec48ee4..e1e7ce9ab217 100644 --- a/sound/usb/mixer_quirks.c +++ b/sound/usb/mixer_quirks.c @@ -1878,6 +1878,12 @@ void snd_usb_mixer_fu_apply_quirk(struct usb_mixer_interface *mixer, if (unitid == 7 && cval->control == UAC_FU_VOLUME) snd_dragonfly_quirk_db_scale(mixer, cval, kctl); break; + /* lowest playback value is muted on C-Media devices */ + case USB_ID(0x0d8c, 0x000c): + case USB_ID(0x0d8c, 0x0014): + if (strstr(kctl->id.name, "Playback")) + cval->min_mute = 1; + break; } } -- cgit v1.2.3 From c8c03f1858331e85d397bacccd34ef409aae993c Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Wed, 16 Aug 2017 17:08:07 -0700 Subject: pty: fix the cached path of the pty slave file descriptor in the master Christian Brauner reported that if you use the TIOCGPTPEER ioctl() to get a slave pty file descriptor, the resulting file descriptor doesn't look right in /proc//fd/. In particular, he wanted to use readlink() on /proc/self/fd/ to get the pathname of the slave pty (basically implementing "ptsname{_r}()"). The reason for that was that we had generated the wrong 'struct path' when we create the pty in ptmx_open(). In particular, the dentry was correct, but the vfsmount pointed to the mount of the ptmx node. That _can_ be correct - in case you use "/dev/pts/ptmx" to open the master - but usually is not. The normal case is to use /dev/ptmx, which then looks up the pts/ directory, and then the vfsmount of the ptmx node is obviously the /dev directory, not the /dev/pts/ directory. We actually did have the right vfsmount available, but in the wrong place (it gets looked up in 'devpts_acquire()' when we get a reference to the pts filesystem), and so ptmx_open() used the wrong mnt pointer. The end result of this confusion was that the pty worked fine, but when if you did TIOCGPTPEER to get the slave side of the pty, end end result would also work, but have that dodgy 'struct path'. And then when doing "d_path()" on to get the pathname, the vfsmount would not match the root of the pts directory, and d_path() would return an empty pathname thinking that the entry had escaped a bind mount into another mount. This fixes the problem by making devpts_acquire() return the vfsmount for the pts filesystem, allowing ptmx_open() to trivially just use the right mount for the pts dentry, and create the proper 'struct path'. Reported-by: Christian Brauner Cc: Al Viro Acked-by: Eric Biederman Signed-off-by: Linus Torvalds --- drivers/tty/pty.c | 7 +++++-- fs/devpts/inode.c | 4 +++- include/linux/devpts_fs.h | 2 +- 3 files changed, 9 insertions(+), 4 deletions(-) diff --git a/drivers/tty/pty.c b/drivers/tty/pty.c index 284749fb0f6b..1fc80ea87c13 100644 --- a/drivers/tty/pty.c +++ b/drivers/tty/pty.c @@ -793,6 +793,7 @@ static int ptmx_open(struct inode *inode, struct file *filp) struct tty_struct *tty; struct path *pts_path; struct dentry *dentry; + struct vfsmount *mnt; int retval; int index; @@ -805,7 +806,7 @@ static int ptmx_open(struct inode *inode, struct file *filp) if (retval) return retval; - fsi = devpts_acquire(filp); + fsi = devpts_acquire(filp, &mnt); if (IS_ERR(fsi)) { retval = PTR_ERR(fsi); goto out_free_file; @@ -849,7 +850,7 @@ static int ptmx_open(struct inode *inode, struct file *filp) pts_path = kmalloc(sizeof(struct path), GFP_KERNEL); if (!pts_path) goto err_release; - pts_path->mnt = filp->f_path.mnt; + pts_path->mnt = mnt; pts_path->dentry = dentry; path_get(pts_path); tty->link->driver_data = pts_path; @@ -866,6 +867,7 @@ err_path_put: path_put(pts_path); kfree(pts_path); err_release: + mntput(mnt); tty_unlock(tty); // This will also put-ref the fsi tty_release(inode, filp); @@ -874,6 +876,7 @@ out: devpts_kill_index(fsi, index); out_put_fsi: devpts_release(fsi); + mntput(mnt); out_free_file: tty_free_file(filp); return retval; diff --git a/fs/devpts/inode.c b/fs/devpts/inode.c index 108df2e3602c..44dfbca9306f 100644 --- a/fs/devpts/inode.c +++ b/fs/devpts/inode.c @@ -133,7 +133,7 @@ static inline struct pts_fs_info *DEVPTS_SB(struct super_block *sb) return sb->s_fs_info; } -struct pts_fs_info *devpts_acquire(struct file *filp) +struct pts_fs_info *devpts_acquire(struct file *filp, struct vfsmount **ptsmnt) { struct pts_fs_info *result; struct path path; @@ -142,6 +142,7 @@ struct pts_fs_info *devpts_acquire(struct file *filp) path = filp->f_path; path_get(&path); + *ptsmnt = NULL; /* Has the devpts filesystem already been found? */ sb = path.mnt->mnt_sb; @@ -165,6 +166,7 @@ struct pts_fs_info *devpts_acquire(struct file *filp) * pty code needs to hold extra references in case of last /dev/tty close */ atomic_inc(&sb->s_active); + *ptsmnt = mntget(path.mnt); result = DEVPTS_SB(sb); out: diff --git a/include/linux/devpts_fs.h b/include/linux/devpts_fs.h index 277ab9af9ac2..7883e901f65c 100644 --- a/include/linux/devpts_fs.h +++ b/include/linux/devpts_fs.h @@ -19,7 +19,7 @@ struct pts_fs_info; -struct pts_fs_info *devpts_acquire(struct file *); +struct pts_fs_info *devpts_acquire(struct file *, struct vfsmount **ptsmnt); void devpts_release(struct pts_fs_info *); int devpts_new_index(struct pts_fs_info *); -- cgit v1.2.3 From 81a0b8d74edd5841be29d223ce44bc8db2b00d09 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 17 Aug 2017 13:57:49 +0200 Subject: nvme-fabrics: fix reporting of unrecognized options Only print the specified options that are not recognized, instead of the whole list of options. Signed-off-by: Christoph Hellwig Reviewed-by: Max Gurtovoy --- drivers/nvme/host/fabrics.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/nvme/host/fabrics.c b/drivers/nvme/host/fabrics.c index 2e582a240943..5f5cd306f76d 100644 --- a/drivers/nvme/host/fabrics.c +++ b/drivers/nvme/host/fabrics.c @@ -794,7 +794,8 @@ static int nvmf_check_allowed_opts(struct nvmf_ctrl_options *opts, int i; for (i = 0; i < ARRAY_SIZE(opt_tokens); i++) { - if (opt_tokens[i].token & ~allowed_opts) { + if ((opt_tokens[i].token & opts->mask) && + (opt_tokens[i].token & ~allowed_opts)) { pr_warn("invalid parameter '%s'\n", opt_tokens[i].pattern); } -- cgit v1.2.3 From 8204f8ddaafafcae074746fcf2a05a45e6827603 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Thu, 10 Aug 2017 14:20:28 -0700 Subject: xfs: clear MS_ACTIVE after finishing log recovery Way back when we established inode block-map redo log items, it was discovered that we needed to prevent the VFS from evicting inodes during log recovery because any given inode might be have bmap redo items to replay even if the inode has no link count and is ultimately deleted, and any eviction of an unlinked inode causes the inode to be truncated and freed too early. To make this possible, we set MS_ACTIVE so that inodes would not be torn down immediately upon release. Unfortunately, this also results in the quota inodes not being released at all if a later part of the mount process should fail, because we never reclaim the inodes. So, set MS_ACTIVE right before we do the last part of log recovery and clear it immediately after we finish the log recovery so that everything will be torn down properly if we abort the mount. Fixes: 17c12bcd30 ("xfs: when replaying bmap operations, don't let unlinked inodes get reaped") Signed-off-by: Darrick J. Wong Reviewed-by: Brian Foster --- fs/xfs/xfs_log.c | 11 +++++++++++ fs/xfs/xfs_mount.c | 10 ---------- 2 files changed, 11 insertions(+), 10 deletions(-) diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c index 0053bcf2b10a..4ebd0bafc914 100644 --- a/fs/xfs/xfs_log.c +++ b/fs/xfs/xfs_log.c @@ -749,9 +749,20 @@ xfs_log_mount_finish( return 0; } + /* + * During the second phase of log recovery, we need iget and + * iput to behave like they do for an active filesystem. + * xfs_fs_drop_inode needs to be able to prevent the deletion + * of inodes before we're done replaying log items on those + * inodes. Turn it off immediately after recovery finishes + * so that we don't leak the quota inodes if subsequent mount + * activities fail. + */ + mp->m_super->s_flags |= MS_ACTIVE; error = xlog_recover_finish(mp->m_log); if (!error) xfs_log_work_queue(mp); + mp->m_super->s_flags &= ~MS_ACTIVE; return error; } diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c index 40d4e8b4e193..151a82db0945 100644 --- a/fs/xfs/xfs_mount.c +++ b/fs/xfs/xfs_mount.c @@ -944,15 +944,6 @@ xfs_mountfs( } } - /* - * During the second phase of log recovery, we need iget and - * iput to behave like they do for an active filesystem. - * xfs_fs_drop_inode needs to be able to prevent the deletion - * of inodes before we're done replaying log items on those - * inodes. - */ - mp->m_super->s_flags |= MS_ACTIVE; - /* * Finish recovering the file system. This part needed to be delayed * until after the root and real-time bitmap inodes were consistently @@ -1028,7 +1019,6 @@ xfs_mountfs( out_quota: xfs_qm_unmount_quotas(mp); out_rtunmount: - mp->m_super->s_flags &= ~MS_ACTIVE; xfs_rtunmount_inodes(mp); out_rele_rip: IRELE(rip); -- cgit v1.2.3 From 77aff8c76425c8f49b50d0b9009915066739e7d2 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Thu, 10 Aug 2017 14:20:29 -0700 Subject: xfs: don't leak quotacheck dquots when cow recovery If we fail a mount on account of cow recovery errors, it's possible that a previous quotacheck left some dquots in memory. The bailout clause of xfs_mountfs forgets to purge these, and so we leak them. Fix that. Signed-off-by: Darrick J. Wong Reviewed-by: Brian Foster --- fs/xfs/xfs_mount.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c index 151a82db0945..ea7d4b4e50d0 100644 --- a/fs/xfs/xfs_mount.c +++ b/fs/xfs/xfs_mount.c @@ -1024,6 +1024,8 @@ xfs_mountfs( IRELE(rip); cancel_delayed_work_sync(&mp->m_reclaim_work); xfs_reclaim_inodes(mp, SYNC_WAIT); + /* Clean out dquots that might be in memory after quotacheck. */ + xfs_qm_unmount(mp); out_log_dealloc: mp->m_flags |= XFS_MOUNT_UNMOUNTING; xfs_log_mount_cancel(mp); -- cgit v1.2.3 From c40bc54fdf2d52a80f66b365f1eac9d43b32e107 Mon Sep 17 00:00:00 2001 From: Gary Bisson Date: Thu, 17 Aug 2017 15:50:10 +0200 Subject: ARM: dts: imx6qdl-nitrogen6_som2: fix PCIe reset Previous value was a bad copy of nitrogen6_max device tree. Signed-off-by: Gary Bisson Fixes: 3faa1bb2e89c ("ARM: dts: imx: add Boundary Devices Nitrogen6_SOM2 support") Cc: Signed-off-by: Shawn Guo --- arch/arm/boot/dts/imx6qdl-nitrogen6_som2.dtsi | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm/boot/dts/imx6qdl-nitrogen6_som2.dtsi b/arch/arm/boot/dts/imx6qdl-nitrogen6_som2.dtsi index aeaa5a6e4fcf..a24e4f1911ab 100644 --- a/arch/arm/boot/dts/imx6qdl-nitrogen6_som2.dtsi +++ b/arch/arm/boot/dts/imx6qdl-nitrogen6_som2.dtsi @@ -507,7 +507,7 @@ pinctrl_pcie: pciegrp { fsl,pins = < /* PCIe reset */ - MX6QDL_PAD_EIM_BCLK__GPIO6_IO31 0x030b0 + MX6QDL_PAD_EIM_DA0__GPIO3_IO00 0x030b0 MX6QDL_PAD_EIM_DA4__GPIO3_IO04 0x030b0 >; }; @@ -668,7 +668,7 @@ &pcie { pinctrl-names = "default"; pinctrl-0 = <&pinctrl_pcie>; - reset-gpio = <&gpio6 31 GPIO_ACTIVE_LOW>; + reset-gpio = <&gpio3 0 GPIO_ACTIVE_LOW>; status = "okay"; }; -- cgit v1.2.3 From e9d8a0fdeacd843c85dcef480cdb2ab76bcdb6e4 Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Thu, 17 Aug 2017 16:45:06 -0400 Subject: nvme-pci: set cqe_seen on polled completions Fixes: 920d13a884 ("nvme-pci: factor out the cqe reading mechanics from __nvme_process_cq") Reported-by: Jens Axboe Signed-off-by: Keith Busch Signed-off-by: Christoph Hellwig --- drivers/nvme/host/pci.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index 74a124a06264..925467b31a33 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -801,6 +801,7 @@ static inline void nvme_handle_cqe(struct nvme_queue *nvmeq, return; } + nvmeq->cqe_seen = 1; req = blk_mq_tag_to_rq(*nvmeq->tags, cqe->command_id); nvme_end_request(req, cqe->status, cqe->result); } @@ -830,10 +831,8 @@ static void nvme_process_cq(struct nvme_queue *nvmeq) consumed++; } - if (consumed) { + if (consumed) nvme_ring_cq_doorbell(nvmeq); - nvmeq->cqe_seen = 1; - } } static irqreturn_t nvme_irq(int irq, void *data) -- cgit v1.2.3 From ed993c6fdfa7734881a4516852d95ae2d3b604d3 Mon Sep 17 00:00:00 2001 From: Jussi Laako Date: Fri, 18 Aug 2017 10:42:14 +0300 Subject: ALSA: usb-audio: add DSD support for new Amanero PID Add DSD support for new Amanero Combo384 firmware version with a new PID. This firmware uses DSD_U32_BE. Fixes: 3eff682d765b ("ALSA: usb-audio: Support both DSD LE/BE Amanero firmware versions") Signed-off-by: Jussi Laako Cc: Signed-off-by: Takashi Iwai --- sound/usb/quirks.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/sound/usb/quirks.c b/sound/usb/quirks.c index f3e9e30172f3..6a03f9697039 100644 --- a/sound/usb/quirks.c +++ b/sound/usb/quirks.c @@ -1375,6 +1375,10 @@ u64 snd_usb_interface_dsd_format_quirks(struct snd_usb_audio *chip, } } break; + case USB_ID(0x16d0, 0x0a23): + if (fp->altsetting == 2) + return SNDRV_PCM_FMTBIT_DSD_U32_BE; + break; default: break; -- cgit v1.2.3 From 0b36f2bd28d040acedb52f4327eb2441afe4f514 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Fri, 18 Aug 2017 10:55:10 +0200 Subject: ALSA: emu10k1: Fix forgotten user-copy conversion in init code The commit d42fe63d5839 ("ALSA: emu10k1: Get rid of set_fs() usage") converted the user-space copy hack with set_fs() to the direct memcpy(), but one place was forgotten. This resulted in the error from snd_emu10k1_init_efx(), eventually failed to load the driver. Fix the missing piece. Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=196687 Fixes: d42fe63d5839 ("ALSA: emu10k1: Get rid of set_fs() usage") Signed-off-by: Takashi Iwai --- sound/pci/emu10k1/emufx.c | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/sound/pci/emu10k1/emufx.c b/sound/pci/emu10k1/emufx.c index dc585959ca32..a2b56b188be4 100644 --- a/sound/pci/emu10k1/emufx.c +++ b/sound/pci/emu10k1/emufx.c @@ -698,10 +698,18 @@ static int copy_gctl(struct snd_emu10k1 *emu, { struct snd_emu10k1_fx8010_control_old_gpr __user *octl; - if (emu->support_tlv) - return copy_from_user(gctl, &_gctl[idx], sizeof(*gctl)); + if (emu->support_tlv) { + if (in_kernel) + memcpy(gctl, (void *)&_gctl[idx], sizeof(*gctl)); + else if (copy_from_user(gctl, &_gctl[idx], sizeof(*gctl))) + return -EFAULT; + return 0; + } + octl = (struct snd_emu10k1_fx8010_control_old_gpr __user *)_gctl; - if (copy_from_user(gctl, &octl[idx], sizeof(*octl))) + if (in_kernel) + memcpy(gctl, (void *)&octl[idx], sizeof(*octl)); + else if (copy_from_user(gctl, &octl[idx], sizeof(*octl))) return -EFAULT; gctl->tlv = NULL; return 0; -- cgit v1.2.3 From 7374bfb82e3844abcc5a5b8034620d80b92b820d Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Thu, 27 Jul 2017 15:47:33 -0700 Subject: MAINTAINERS: Remove Jason Cooper's irqchip git tree Jason's irqchip tree does not seem to have been updated for many months now, remove it from the list of trees to avoid any possible confusion. Jason says: "Unfortunately, when I have time for irqchip, I don't always have the time to properly follow up with pull-requests. So, for the time being, I'll stick to reviewing as I can." Signed-off-by: Florian Fainelli Signed-off-by: Thomas Gleixner Acked-by: Jason Cooper Cc: marc.zyngier@arm.com Link: http://lkml.kernel.org/r/20170727224733.8288-1-f.fainelli@gmail.com --- MAINTAINERS | 1 - 1 file changed, 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index f66488dfdbc9..b116efa1a087 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -7102,7 +7102,6 @@ M: Marc Zyngier L: linux-kernel@vger.kernel.org S: Maintained T: git git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git irq/core -T: git git://git.infradead.org/users/jcooper/linux.git irqchip/core F: Documentation/devicetree/bindings/interrupt-controller/ F: drivers/irqchip/ -- cgit v1.2.3 From 45bd07ad82622fb7c8dd7504d976b7dd11568965 Mon Sep 17 00:00:00 2001 From: Arvind Yadav Date: Thu, 20 Jul 2017 17:00:32 +0530 Subject: x86: Constify attribute_group structures attribute_groups are not supposed to change at runtime and none of the groups is modified. Mark the non-const structs as const. [ tglx: Folded into one big patch ] Signed-off-by: Arvind Yadav Signed-off-by: Thomas Gleixner Cc: tony.luck@intel.com Cc: bp@alien8.de Link: http://lkml.kernel.org/r/1500550238-15655-2-git-send-email-arvind.yadav.cs@gmail.com --- arch/x86/events/intel/uncore.c | 2 +- arch/x86/events/intel/uncore_nhmex.c | 12 ++++----- arch/x86/events/intel/uncore_snb.c | 6 ++--- arch/x86/events/intel/uncore_snbep.c | 42 ++++++++++++++++---------------- arch/x86/kernel/cpu/mcheck/therm_throt.c | 2 +- arch/x86/kernel/cpu/microcode/core.c | 4 +-- arch/x86/kernel/ksysfs.c | 4 +-- 7 files changed, 36 insertions(+), 36 deletions(-) diff --git a/arch/x86/events/intel/uncore.c b/arch/x86/events/intel/uncore.c index 44ec523287f6..1c5390f1cf09 100644 --- a/arch/x86/events/intel/uncore.c +++ b/arch/x86/events/intel/uncore.c @@ -721,7 +721,7 @@ static struct attribute *uncore_pmu_attrs[] = { NULL, }; -static struct attribute_group uncore_pmu_attr_group = { +static const struct attribute_group uncore_pmu_attr_group = { .attrs = uncore_pmu_attrs, }; diff --git a/arch/x86/events/intel/uncore_nhmex.c b/arch/x86/events/intel/uncore_nhmex.c index cda569332005..6a5cbe90f859 100644 --- a/arch/x86/events/intel/uncore_nhmex.c +++ b/arch/x86/events/intel/uncore_nhmex.c @@ -272,7 +272,7 @@ static struct attribute *nhmex_uncore_ubox_formats_attr[] = { NULL, }; -static struct attribute_group nhmex_uncore_ubox_format_group = { +static const struct attribute_group nhmex_uncore_ubox_format_group = { .name = "format", .attrs = nhmex_uncore_ubox_formats_attr, }; @@ -299,7 +299,7 @@ static struct attribute *nhmex_uncore_cbox_formats_attr[] = { NULL, }; -static struct attribute_group nhmex_uncore_cbox_format_group = { +static const struct attribute_group nhmex_uncore_cbox_format_group = { .name = "format", .attrs = nhmex_uncore_cbox_formats_attr, }; @@ -407,7 +407,7 @@ static struct attribute *nhmex_uncore_bbox_formats_attr[] = { NULL, }; -static struct attribute_group nhmex_uncore_bbox_format_group = { +static const struct attribute_group nhmex_uncore_bbox_format_group = { .name = "format", .attrs = nhmex_uncore_bbox_formats_attr, }; @@ -484,7 +484,7 @@ static struct attribute *nhmex_uncore_sbox_formats_attr[] = { NULL, }; -static struct attribute_group nhmex_uncore_sbox_format_group = { +static const struct attribute_group nhmex_uncore_sbox_format_group = { .name = "format", .attrs = nhmex_uncore_sbox_formats_attr, }; @@ -898,7 +898,7 @@ static struct attribute *nhmex_uncore_mbox_formats_attr[] = { NULL, }; -static struct attribute_group nhmex_uncore_mbox_format_group = { +static const struct attribute_group nhmex_uncore_mbox_format_group = { .name = "format", .attrs = nhmex_uncore_mbox_formats_attr, }; @@ -1163,7 +1163,7 @@ static struct attribute *nhmex_uncore_rbox_formats_attr[] = { NULL, }; -static struct attribute_group nhmex_uncore_rbox_format_group = { +static const struct attribute_group nhmex_uncore_rbox_format_group = { .name = "format", .attrs = nhmex_uncore_rbox_formats_attr, }; diff --git a/arch/x86/events/intel/uncore_snb.c b/arch/x86/events/intel/uncore_snb.c index a3dcc12bef4a..db1127ce685e 100644 --- a/arch/x86/events/intel/uncore_snb.c +++ b/arch/x86/events/intel/uncore_snb.c @@ -130,7 +130,7 @@ static struct attribute *snb_uncore_formats_attr[] = { NULL, }; -static struct attribute_group snb_uncore_format_group = { +static const struct attribute_group snb_uncore_format_group = { .name = "format", .attrs = snb_uncore_formats_attr, }; @@ -289,7 +289,7 @@ static struct attribute *snb_uncore_imc_formats_attr[] = { NULL, }; -static struct attribute_group snb_uncore_imc_format_group = { +static const struct attribute_group snb_uncore_imc_format_group = { .name = "format", .attrs = snb_uncore_imc_formats_attr, }; @@ -769,7 +769,7 @@ static struct attribute *nhm_uncore_formats_attr[] = { NULL, }; -static struct attribute_group nhm_uncore_format_group = { +static const struct attribute_group nhm_uncore_format_group = { .name = "format", .attrs = nhm_uncore_formats_attr, }; diff --git a/arch/x86/events/intel/uncore_snbep.c b/arch/x86/events/intel/uncore_snbep.c index 4f9127644b80..db1fe377e6dd 100644 --- a/arch/x86/events/intel/uncore_snbep.c +++ b/arch/x86/events/intel/uncore_snbep.c @@ -602,27 +602,27 @@ static struct uncore_event_desc snbep_uncore_qpi_events[] = { { /* end: all zeroes */ }, }; -static struct attribute_group snbep_uncore_format_group = { +static const struct attribute_group snbep_uncore_format_group = { .name = "format", .attrs = snbep_uncore_formats_attr, }; -static struct attribute_group snbep_uncore_ubox_format_group = { +static const struct attribute_group snbep_uncore_ubox_format_group = { .name = "format", .attrs = snbep_uncore_ubox_formats_attr, }; -static struct attribute_group snbep_uncore_cbox_format_group = { +static const struct attribute_group snbep_uncore_cbox_format_group = { .name = "format", .attrs = snbep_uncore_cbox_formats_attr, }; -static struct attribute_group snbep_uncore_pcu_format_group = { +static const struct attribute_group snbep_uncore_pcu_format_group = { .name = "format", .attrs = snbep_uncore_pcu_formats_attr, }; -static struct attribute_group snbep_uncore_qpi_format_group = { +static const struct attribute_group snbep_uncore_qpi_format_group = { .name = "format", .attrs = snbep_uncore_qpi_formats_attr, }; @@ -1431,27 +1431,27 @@ static struct attribute *ivbep_uncore_qpi_formats_attr[] = { NULL, }; -static struct attribute_group ivbep_uncore_format_group = { +static const struct attribute_group ivbep_uncore_format_group = { .name = "format", .attrs = ivbep_uncore_formats_attr, }; -static struct attribute_group ivbep_uncore_ubox_format_group = { +static const struct attribute_group ivbep_uncore_ubox_format_group = { .name = "format", .attrs = ivbep_uncore_ubox_formats_attr, }; -static struct attribute_group ivbep_uncore_cbox_format_group = { +static const struct attribute_group ivbep_uncore_cbox_format_group = { .name = "format", .attrs = ivbep_uncore_cbox_formats_attr, }; -static struct attribute_group ivbep_uncore_pcu_format_group = { +static const struct attribute_group ivbep_uncore_pcu_format_group = { .name = "format", .attrs = ivbep_uncore_pcu_formats_attr, }; -static struct attribute_group ivbep_uncore_qpi_format_group = { +static const struct attribute_group ivbep_uncore_qpi_format_group = { .name = "format", .attrs = ivbep_uncore_qpi_formats_attr, }; @@ -1887,7 +1887,7 @@ static struct attribute *knl_uncore_ubox_formats_attr[] = { NULL, }; -static struct attribute_group knl_uncore_ubox_format_group = { +static const struct attribute_group knl_uncore_ubox_format_group = { .name = "format", .attrs = knl_uncore_ubox_formats_attr, }; @@ -1927,7 +1927,7 @@ static struct attribute *knl_uncore_cha_formats_attr[] = { NULL, }; -static struct attribute_group knl_uncore_cha_format_group = { +static const struct attribute_group knl_uncore_cha_format_group = { .name = "format", .attrs = knl_uncore_cha_formats_attr, }; @@ -2037,7 +2037,7 @@ static struct attribute *knl_uncore_pcu_formats_attr[] = { NULL, }; -static struct attribute_group knl_uncore_pcu_format_group = { +static const struct attribute_group knl_uncore_pcu_format_group = { .name = "format", .attrs = knl_uncore_pcu_formats_attr, }; @@ -2187,7 +2187,7 @@ static struct attribute *knl_uncore_irp_formats_attr[] = { NULL, }; -static struct attribute_group knl_uncore_irp_format_group = { +static const struct attribute_group knl_uncore_irp_format_group = { .name = "format", .attrs = knl_uncore_irp_formats_attr, }; @@ -2385,7 +2385,7 @@ static struct attribute *hswep_uncore_ubox_formats_attr[] = { NULL, }; -static struct attribute_group hswep_uncore_ubox_format_group = { +static const struct attribute_group hswep_uncore_ubox_format_group = { .name = "format", .attrs = hswep_uncore_ubox_formats_attr, }; @@ -2439,7 +2439,7 @@ static struct attribute *hswep_uncore_cbox_formats_attr[] = { NULL, }; -static struct attribute_group hswep_uncore_cbox_format_group = { +static const struct attribute_group hswep_uncore_cbox_format_group = { .name = "format", .attrs = hswep_uncore_cbox_formats_attr, }; @@ -2621,7 +2621,7 @@ static struct attribute *hswep_uncore_sbox_formats_attr[] = { NULL, }; -static struct attribute_group hswep_uncore_sbox_format_group = { +static const struct attribute_group hswep_uncore_sbox_format_group = { .name = "format", .attrs = hswep_uncore_sbox_formats_attr, }; @@ -3314,7 +3314,7 @@ static struct attribute *skx_uncore_cha_formats_attr[] = { NULL, }; -static struct attribute_group skx_uncore_chabox_format_group = { +static const struct attribute_group skx_uncore_chabox_format_group = { .name = "format", .attrs = skx_uncore_cha_formats_attr, }; @@ -3427,7 +3427,7 @@ static struct attribute *skx_uncore_iio_formats_attr[] = { NULL, }; -static struct attribute_group skx_uncore_iio_format_group = { +static const struct attribute_group skx_uncore_iio_format_group = { .name = "format", .attrs = skx_uncore_iio_formats_attr, }; @@ -3484,7 +3484,7 @@ static struct attribute *skx_uncore_formats_attr[] = { NULL, }; -static struct attribute_group skx_uncore_format_group = { +static const struct attribute_group skx_uncore_format_group = { .name = "format", .attrs = skx_uncore_formats_attr, }; @@ -3605,7 +3605,7 @@ static struct attribute *skx_upi_uncore_formats_attr[] = { NULL, }; -static struct attribute_group skx_upi_uncore_format_group = { +static const struct attribute_group skx_upi_uncore_format_group = { .name = "format", .attrs = skx_upi_uncore_formats_attr, }; diff --git a/arch/x86/kernel/cpu/mcheck/therm_throt.c b/arch/x86/kernel/cpu/mcheck/therm_throt.c index d7cc190ae457..f7370abd33c6 100644 --- a/arch/x86/kernel/cpu/mcheck/therm_throt.c +++ b/arch/x86/kernel/cpu/mcheck/therm_throt.c @@ -122,7 +122,7 @@ static struct attribute *thermal_throttle_attrs[] = { NULL }; -static struct attribute_group thermal_attr_group = { +static const struct attribute_group thermal_attr_group = { .attrs = thermal_throttle_attrs, .name = "thermal_throttle" }; diff --git a/arch/x86/kernel/cpu/microcode/core.c b/arch/x86/kernel/cpu/microcode/core.c index 9cb98ee103db..86e8f0b2537b 100644 --- a/arch/x86/kernel/cpu/microcode/core.c +++ b/arch/x86/kernel/cpu/microcode/core.c @@ -561,7 +561,7 @@ static struct attribute *mc_default_attrs[] = { NULL }; -static struct attribute_group mc_attr_group = { +static const struct attribute_group mc_attr_group = { .attrs = mc_default_attrs, .name = "microcode", }; @@ -707,7 +707,7 @@ static struct attribute *cpu_root_microcode_attrs[] = { NULL }; -static struct attribute_group cpu_root_microcode_group = { +static const struct attribute_group cpu_root_microcode_group = { .name = "microcode", .attrs = cpu_root_microcode_attrs, }; diff --git a/arch/x86/kernel/ksysfs.c b/arch/x86/kernel/ksysfs.c index 4afc67f5facc..06e1ff5562c0 100644 --- a/arch/x86/kernel/ksysfs.c +++ b/arch/x86/kernel/ksysfs.c @@ -55,7 +55,7 @@ static struct bin_attribute *boot_params_data_attrs[] = { NULL, }; -static struct attribute_group boot_params_attr_group = { +static const struct attribute_group boot_params_attr_group = { .attrs = boot_params_version_attrs, .bin_attrs = boot_params_data_attrs, }; @@ -202,7 +202,7 @@ static struct bin_attribute *setup_data_data_attrs[] = { NULL, }; -static struct attribute_group setup_data_attr_group = { +static const struct attribute_group setup_data_attr_group = { .attrs = setup_data_type_attrs, .bin_attrs = setup_data_data_attrs, }; -- cgit v1.2.3 From 4dd6a9973b8aaffac4bf37c5bb70e8eae5a7afb4 Mon Sep 17 00:00:00 2001 From: Dave Gerlach Date: Fri, 28 Jul 2017 09:51:34 -0700 Subject: soc: ti: ti_sci_pm_domains: Populate name for genpd Commit b6a1d093f96b ("PM / Domains: Extend generic power domain debugfs") now creates a debugfs directory for each genpd based on the name of the genpd. Currently no name is given to the genpd created by ti_sci_pm_domains driver so because of this we see a NULL pointer dereferences when it is accessed on boot when the debugfs entry creation is attempted. Give the genpd a name before registering it to avoid this. Fixes: 52835d59fc6c ("soc: ti: Add ti_sci_pm_domains driver") Signed-off-by: Dave Gerlach Signed-off-by: Santosh Shilimkar Signed-off-by: Arnd Bergmann --- drivers/soc/ti/ti_sci_pm_domains.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/soc/ti/ti_sci_pm_domains.c b/drivers/soc/ti/ti_sci_pm_domains.c index b0b283810e72..de31b9389e2e 100644 --- a/drivers/soc/ti/ti_sci_pm_domains.c +++ b/drivers/soc/ti/ti_sci_pm_domains.c @@ -176,6 +176,8 @@ static int ti_sci_pm_domain_probe(struct platform_device *pdev) ti_sci_pd->dev = dev; + ti_sci_pd->pd.name = "ti_sci_pd"; + ti_sci_pd->pd.attach_dev = ti_sci_pd_attach_dev; ti_sci_pd->pd.detach_dev = ti_sci_pd_detach_dev; -- cgit v1.2.3 From e8f241893dfbbebe2813c01eac54f263e6a5e59c Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Fri, 18 Aug 2017 10:53:45 +0100 Subject: genirq: Restore trigger settings in irq_modify_status() irq_modify_status starts by clearing the trigger settings from irq_data before applying the new settings, but doesn't restore them, leaving them to IRQ_TYPE_NONE. That's pretty confusing to the potential request_irq() that could follow. Instead, snapshot the settings before clearing them, and restore them if the irq_modify_status() invocation was not changing the trigger. Fixes: 1e2a7d78499e ("irqdomain: Don't set type when mapping an IRQ") Reported-and-tested-by: jeffy Signed-off-by: Marc Zyngier Signed-off-by: Thomas Gleixner Cc: Jon Hunter Cc: stable@vger.kernel.org Link: http://lkml.kernel.org/r/20170818095345.12378-1-marc.zyngier@arm.com --- kernel/irq/chip.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/kernel/irq/chip.c b/kernel/irq/chip.c index a3cc37c0c85e..3675c6004f2a 100644 --- a/kernel/irq/chip.c +++ b/kernel/irq/chip.c @@ -1000,7 +1000,7 @@ EXPORT_SYMBOL_GPL(irq_set_chip_and_handler_name); void irq_modify_status(unsigned int irq, unsigned long clr, unsigned long set) { - unsigned long flags; + unsigned long flags, trigger, tmp; struct irq_desc *desc = irq_get_desc_lock(irq, &flags, 0); if (!desc) @@ -1014,6 +1014,8 @@ void irq_modify_status(unsigned int irq, unsigned long clr, unsigned long set) irq_settings_clr_and_set(desc, clr, set); + trigger = irqd_get_trigger_type(&desc->irq_data); + irqd_clear(&desc->irq_data, IRQD_NO_BALANCING | IRQD_PER_CPU | IRQD_TRIGGER_MASK | IRQD_LEVEL | IRQD_MOVE_PCNTXT); if (irq_settings_has_no_balance_set(desc)) @@ -1025,7 +1027,11 @@ void irq_modify_status(unsigned int irq, unsigned long clr, unsigned long set) if (irq_settings_is_level(desc)) irqd_set(&desc->irq_data, IRQD_LEVEL); - irqd_set(&desc->irq_data, irq_settings_get_trigger_mask(desc)); + tmp = irq_settings_get_trigger_mask(desc); + if (tmp != IRQ_TYPE_NONE) + trigger = tmp; + + irqd_set(&desc->irq_data, trigger); irq_put_desc_unlock(desc, flags); } -- cgit v1.2.3 From 7edaeb6841dfb27e362288ab8466ebdc4972e867 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 15 Aug 2017 09:50:13 +0200 Subject: kernel/watchdog: Prevent false positives with turbo modes The hardlockup detector on x86 uses a performance counter based on unhalted CPU cycles and a periodic hrtimer. The hrtimer period is about 2/5 of the performance counter period, so the hrtimer should fire 2-3 times before the performance counter NMI fires. The NMI code checks whether the hrtimer fired since the last invocation. If not, it assumess a hard lockup. The calculation of those periods is based on the nominal CPU frequency. Turbo modes increase the CPU clock frequency and therefore shorten the period of the perf/NMI watchdog. With extreme Turbo-modes (3x nominal frequency) the perf/NMI period is shorter than the hrtimer period which leads to false positives. A simple fix would be to shorten the hrtimer period, but that comes with the side effect of more frequent hrtimer and softlockup thread wakeups, which is not desired. Implement a low pass filter, which checks the perf/NMI period against kernel time. If the perf/NMI fires before 4/5 of the watchdog period has elapsed then the event is ignored and postponed to the next perf/NMI. That solves the problem and avoids the overhead of shorter hrtimer periods and more frequent softlockup thread wakeups. Fixes: 58687acba592 ("lockup_detector: Combine nmi_watchdog and softlockup detector") Reported-and-tested-by: Kan Liang Signed-off-by: Thomas Gleixner Cc: dzickus@redhat.com Cc: prarit@redhat.com Cc: ak@linux.intel.com Cc: babu.moger@oracle.com Cc: peterz@infradead.org Cc: eranian@google.com Cc: acme@redhat.com Cc: stable@vger.kernel.org Cc: atomlin@redhat.com Cc: akpm@linux-foundation.org Cc: torvalds@linux-foundation.org Link: http://lkml.kernel.org/r/alpine.DEB.2.20.1708150931310.1886@nanos --- arch/x86/Kconfig | 1 + include/linux/nmi.h | 8 +++++++ kernel/watchdog.c | 1 + kernel/watchdog_hld.c | 59 +++++++++++++++++++++++++++++++++++++++++++++++++++ lib/Kconfig.debug | 7 ++++++ 5 files changed, 76 insertions(+) diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 781521b7cf9e..9101bfc85539 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -100,6 +100,7 @@ config X86 select GENERIC_STRNCPY_FROM_USER select GENERIC_STRNLEN_USER select GENERIC_TIME_VSYSCALL + select HARDLOCKUP_CHECK_TIMESTAMP if X86_64 select HAVE_ACPI_APEI if ACPI select HAVE_ACPI_APEI_NMI if ACPI select HAVE_ALIGNED_STRUCT_PAGE if SLUB diff --git a/include/linux/nmi.h b/include/linux/nmi.h index 8aa01fd859fb..a36abe2da13e 100644 --- a/include/linux/nmi.h +++ b/include/linux/nmi.h @@ -168,6 +168,14 @@ extern int sysctl_hardlockup_all_cpu_backtrace; #define sysctl_softlockup_all_cpu_backtrace 0 #define sysctl_hardlockup_all_cpu_backtrace 0 #endif + +#if defined(CONFIG_HARDLOCKUP_CHECK_TIMESTAMP) && \ + defined(CONFIG_HARDLOCKUP_DETECTOR) +void watchdog_update_hrtimer_threshold(u64 period); +#else +static inline void watchdog_update_hrtimer_threshold(u64 period) { } +#endif + extern bool is_hardlockup(void); struct ctl_table; extern int proc_watchdog(struct ctl_table *, int , diff --git a/kernel/watchdog.c b/kernel/watchdog.c index 06d3389bca0d..f5d52024f6b7 100644 --- a/kernel/watchdog.c +++ b/kernel/watchdog.c @@ -240,6 +240,7 @@ static void set_sample_period(void) * hardlockup detector generates a warning */ sample_period = get_softlockup_thresh() * ((u64)NSEC_PER_SEC / 5); + watchdog_update_hrtimer_threshold(sample_period); } /* Commands for resetting the watchdog */ diff --git a/kernel/watchdog_hld.c b/kernel/watchdog_hld.c index 295a0d84934c..3a09ea1b1d3d 100644 --- a/kernel/watchdog_hld.c +++ b/kernel/watchdog_hld.c @@ -37,6 +37,62 @@ void arch_touch_nmi_watchdog(void) } EXPORT_SYMBOL(arch_touch_nmi_watchdog); +#ifdef CONFIG_HARDLOCKUP_CHECK_TIMESTAMP +static DEFINE_PER_CPU(ktime_t, last_timestamp); +static DEFINE_PER_CPU(unsigned int, nmi_rearmed); +static ktime_t watchdog_hrtimer_sample_threshold __read_mostly; + +void watchdog_update_hrtimer_threshold(u64 period) +{ + /* + * The hrtimer runs with a period of (watchdog_threshold * 2) / 5 + * + * So it runs effectively with 2.5 times the rate of the NMI + * watchdog. That means the hrtimer should fire 2-3 times before + * the NMI watchdog expires. The NMI watchdog on x86 is based on + * unhalted CPU cycles, so if Turbo-Mode is enabled the CPU cycles + * might run way faster than expected and the NMI fires in a + * smaller period than the one deduced from the nominal CPU + * frequency. Depending on the Turbo-Mode factor this might be fast + * enough to get the NMI period smaller than the hrtimer watchdog + * period and trigger false positives. + * + * The sample threshold is used to check in the NMI handler whether + * the minimum time between two NMI samples has elapsed. That + * prevents false positives. + * + * Set this to 4/5 of the actual watchdog threshold period so the + * hrtimer is guaranteed to fire at least once within the real + * watchdog threshold. + */ + watchdog_hrtimer_sample_threshold = period * 2; +} + +static bool watchdog_check_timestamp(void) +{ + ktime_t delta, now = ktime_get_mono_fast_ns(); + + delta = now - __this_cpu_read(last_timestamp); + if (delta < watchdog_hrtimer_sample_threshold) { + /* + * If ktime is jiffies based, a stalled timer would prevent + * jiffies from being incremented and the filter would look + * at a stale timestamp and never trigger. + */ + if (__this_cpu_inc_return(nmi_rearmed) < 10) + return false; + } + __this_cpu_write(nmi_rearmed, 0); + __this_cpu_write(last_timestamp, now); + return true; +} +#else +static inline bool watchdog_check_timestamp(void) +{ + return true; +} +#endif + static struct perf_event_attr wd_hw_attr = { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_CPU_CYCLES, @@ -61,6 +117,9 @@ static void watchdog_overflow_callback(struct perf_event *event, return; } + if (!watchdog_check_timestamp()) + return; + /* check for a hardlockup * This is done by making sure our timer interrupt * is incrementing. The timer interrupt should have diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index 98fe715522e8..c617b9d1d6cb 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -797,6 +797,13 @@ config HARDLOCKUP_DETECTOR_PERF bool select SOFTLOCKUP_DETECTOR +# +# Enables a timestamp based low pass filter to compensate for perf based +# hard lockup detection which runs too fast due to turbo modes. +# +config HARDLOCKUP_CHECK_TIMESTAMP + bool + # # arch/ can define HAVE_HARDLOCKUP_DETECTOR_ARCH to provide their own hard # lockup detector rather than the perf based detector. -- cgit v1.2.3 From c005390374957baacbc38eef96ea360559510aa7 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 17 Aug 2017 12:24:47 +0200 Subject: blk-mq-pci: add a fallback when pci_irq_get_affinity returns NULL While pci_irq_get_affinity should never fail for SMP kernel that implement the affinity mapping, it will always return NULL in the UP case, so provide a fallback mapping of all queues to CPU 0 in that case. Signed-off-by: Christoph Hellwig Cc: stable@vger.kernel.org Reviewed-by: Omar Sandoval Signed-off-by: Jens Axboe --- block/blk-mq-pci.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/block/blk-mq-pci.c b/block/blk-mq-pci.c index 0c3354cf3552..76944e3271bf 100644 --- a/block/blk-mq-pci.c +++ b/block/blk-mq-pci.c @@ -36,12 +36,18 @@ int blk_mq_pci_map_queues(struct blk_mq_tag_set *set, struct pci_dev *pdev) for (queue = 0; queue < set->nr_hw_queues; queue++) { mask = pci_irq_get_affinity(pdev, queue); if (!mask) - return -EINVAL; + goto fallback; for_each_cpu(cpu, mask) set->mq_map[cpu] = queue; } return 0; + +fallback: + WARN_ON_ONCE(set->nr_hw_queues > 1); + for_each_possible_cpu(cpu) + set->mq_map[cpu] = 0; + return 0; } EXPORT_SYMBOL_GPL(blk_mq_pci_map_queues); -- cgit v1.2.3 From 739f79fc9db1b38f96b5a5109b247a650fbebf6d Mon Sep 17 00:00:00 2001 From: Johannes Weiner Date: Fri, 18 Aug 2017 15:15:48 -0700 Subject: mm: memcontrol: fix NULL pointer crash in test_clear_page_writeback() Jaegeuk and Brad report a NULL pointer crash when writeback ending tries to update the memcg stats: BUG: unable to handle kernel NULL pointer dereference at 00000000000003b0 IP: test_clear_page_writeback+0x12e/0x2c0 [...] RIP: 0010:test_clear_page_writeback+0x12e/0x2c0 Call Trace: end_page_writeback+0x47/0x70 f2fs_write_end_io+0x76/0x180 [f2fs] bio_endio+0x9f/0x120 blk_update_request+0xa8/0x2f0 scsi_end_request+0x39/0x1d0 scsi_io_completion+0x211/0x690 scsi_finish_command+0xd9/0x120 scsi_softirq_done+0x127/0x150 __blk_mq_complete_request_remote+0x13/0x20 flush_smp_call_function_queue+0x56/0x110 generic_smp_call_function_single_interrupt+0x13/0x30 smp_call_function_single_interrupt+0x27/0x40 call_function_single_interrupt+0x89/0x90 RIP: 0010:native_safe_halt+0x6/0x10 (gdb) l *(test_clear_page_writeback+0x12e) 0xffffffff811bae3e is in test_clear_page_writeback (./include/linux/memcontrol.h:619). 614 mod_node_page_state(page_pgdat(page), idx, val); 615 if (mem_cgroup_disabled() || !page->mem_cgroup) 616 return; 617 mod_memcg_state(page->mem_cgroup, idx, val); 618 pn = page->mem_cgroup->nodeinfo[page_to_nid(page)]; 619 this_cpu_add(pn->lruvec_stat->count[idx], val); 620 } 621 622 unsigned long mem_cgroup_soft_limit_reclaim(pg_data_t *pgdat, int order, 623 gfp_t gfp_mask, The issue is that writeback doesn't hold a page reference and the page might get freed after PG_writeback is cleared (and the mapping is unlocked) in test_clear_page_writeback(). The stat functions looking up the page's node or zone are safe, as those attributes are static across allocation and free cycles. But page->mem_cgroup is not, and it will get cleared if we race with truncation or migration. It appears this race window has been around for a while, but less likely to trigger when the memcg stats were updated first thing after PG_writeback is cleared. Recent changes reshuffled this code to update the global node stats before the memcg ones, though, stretching the race window out to an extent where people can reproduce the problem. Update test_clear_page_writeback() to look up and pin page->mem_cgroup before clearing PG_writeback, then not use that pointer afterward. It is a partial revert of 62cccb8c8e7a ("mm: simplify lock_page_memcg()") but leaves the pageref-holding callsites that aren't affected alone. Link: http://lkml.kernel.org/r/20170809183825.GA26387@cmpxchg.org Fixes: 62cccb8c8e7a ("mm: simplify lock_page_memcg()") Signed-off-by: Johannes Weiner Reported-by: Jaegeuk Kim Tested-by: Jaegeuk Kim Reported-by: Bradley Bolen Tested-by: Brad Bolen Cc: Vladimir Davydov Cc: Michal Hocko Cc: [4.6+] Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/memcontrol.h | 10 ++++++++-- mm/memcontrol.c | 43 +++++++++++++++++++++++++++++++------------ mm/page-writeback.c | 15 ++++++++++++--- 3 files changed, 51 insertions(+), 17 deletions(-) diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index 3914e3dd6168..9b15a4bcfa77 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -484,7 +484,8 @@ bool mem_cgroup_oom_synchronize(bool wait); extern int do_swap_account; #endif -void lock_page_memcg(struct page *page); +struct mem_cgroup *lock_page_memcg(struct page *page); +void __unlock_page_memcg(struct mem_cgroup *memcg); void unlock_page_memcg(struct page *page); static inline unsigned long memcg_page_state(struct mem_cgroup *memcg, @@ -809,7 +810,12 @@ mem_cgroup_print_oom_info(struct mem_cgroup *memcg, struct task_struct *p) { } -static inline void lock_page_memcg(struct page *page) +static inline struct mem_cgroup *lock_page_memcg(struct page *page) +{ + return NULL; +} + +static inline void __unlock_page_memcg(struct mem_cgroup *memcg) { } diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 3df3c04d73ab..e09741af816f 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -1611,9 +1611,13 @@ cleanup: * @page: the page * * This function protects unlocked LRU pages from being moved to - * another cgroup and stabilizes their page->mem_cgroup binding. + * another cgroup. + * + * It ensures lifetime of the returned memcg. Caller is responsible + * for the lifetime of the page; __unlock_page_memcg() is available + * when @page might get freed inside the locked section. */ -void lock_page_memcg(struct page *page) +struct mem_cgroup *lock_page_memcg(struct page *page) { struct mem_cgroup *memcg; unsigned long flags; @@ -1622,18 +1626,24 @@ void lock_page_memcg(struct page *page) * The RCU lock is held throughout the transaction. The fast * path can get away without acquiring the memcg->move_lock * because page moving starts with an RCU grace period. - */ + * + * The RCU lock also protects the memcg from being freed when + * the page state that is going to change is the only thing + * preventing the page itself from being freed. E.g. writeback + * doesn't hold a page reference and relies on PG_writeback to + * keep off truncation, migration and so forth. + */ rcu_read_lock(); if (mem_cgroup_disabled()) - return; + return NULL; again: memcg = page->mem_cgroup; if (unlikely(!memcg)) - return; + return NULL; if (atomic_read(&memcg->moving_account) <= 0) - return; + return memcg; spin_lock_irqsave(&memcg->move_lock, flags); if (memcg != page->mem_cgroup) { @@ -1649,18 +1659,18 @@ again: memcg->move_lock_task = current; memcg->move_lock_flags = flags; - return; + return memcg; } EXPORT_SYMBOL(lock_page_memcg); /** - * unlock_page_memcg - unlock a page->mem_cgroup binding - * @page: the page + * __unlock_page_memcg - unlock and unpin a memcg + * @memcg: the memcg + * + * Unlock and unpin a memcg returned by lock_page_memcg(). */ -void unlock_page_memcg(struct page *page) +void __unlock_page_memcg(struct mem_cgroup *memcg) { - struct mem_cgroup *memcg = page->mem_cgroup; - if (memcg && memcg->move_lock_task == current) { unsigned long flags = memcg->move_lock_flags; @@ -1672,6 +1682,15 @@ void unlock_page_memcg(struct page *page) rcu_read_unlock(); } + +/** + * unlock_page_memcg - unlock a page->mem_cgroup binding + * @page: the page + */ +void unlock_page_memcg(struct page *page) +{ + __unlock_page_memcg(page->mem_cgroup); +} EXPORT_SYMBOL(unlock_page_memcg); /* diff --git a/mm/page-writeback.c b/mm/page-writeback.c index 96e93b214d31..bf050ab025b7 100644 --- a/mm/page-writeback.c +++ b/mm/page-writeback.c @@ -2724,9 +2724,12 @@ EXPORT_SYMBOL(clear_page_dirty_for_io); int test_clear_page_writeback(struct page *page) { struct address_space *mapping = page_mapping(page); + struct mem_cgroup *memcg; + struct lruvec *lruvec; int ret; - lock_page_memcg(page); + memcg = lock_page_memcg(page); + lruvec = mem_cgroup_page_lruvec(page, page_pgdat(page)); if (mapping && mapping_use_writeback_tags(mapping)) { struct inode *inode = mapping->host; struct backing_dev_info *bdi = inode_to_bdi(inode); @@ -2754,12 +2757,18 @@ int test_clear_page_writeback(struct page *page) } else { ret = TestClearPageWriteback(page); } + /* + * NOTE: Page might be free now! Writeback doesn't hold a page + * reference on its own, it relies on truncation to wait for + * the clearing of PG_writeback. The below can only access + * page state that is static across allocation cycles. + */ if (ret) { - dec_lruvec_page_state(page, NR_WRITEBACK); + dec_lruvec_state(lruvec, NR_WRITEBACK); dec_zone_page_state(page, NR_ZONE_WRITE_PENDING); inc_node_page_state(page, NR_WRITTEN); } - unlock_page_memcg(page); + __unlock_page_memcg(memcg); return ret; } -- cgit v1.2.3 From 92e5aae457787d0bc6b255200d2fb116edf69794 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Fri, 18 Aug 2017 15:15:51 -0700 Subject: kernel/watchdog: fix Kconfig constraints for perf hardlockup watchdog Commit 05a4a9527931 ("kernel/watchdog: split up config options") lost the perf-based hardlockup detector's dependency on PERF_EVENTS, which can result in broken builds with some powerpc configurations. Restore the dependency. Add it in for x86 too, despite x86 always selecting PERF_EVENTS it seems reasonable to make the dependency explicit. Link: http://lkml.kernel.org/r/20170810114452.6673-1-npiggin@gmail.com Fixes: 05a4a9527931 ("kernel/watchdog: split up config options") Signed-off-by: Nicholas Piggin Acked-by: Don Zickus Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/powerpc/Kconfig | 2 +- arch/x86/Kconfig | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index 36f858c37ca7..81b0031f909f 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -199,7 +199,7 @@ config PPC select HAVE_OPTPROBES if PPC64 select HAVE_PERF_EVENTS select HAVE_PERF_EVENTS_NMI if PPC64 - select HAVE_HARDLOCKUP_DETECTOR_PERF if HAVE_PERF_EVENTS_NMI && !HAVE_HARDLOCKUP_DETECTOR_ARCH + select HAVE_HARDLOCKUP_DETECTOR_PERF if PERF_EVENTS && HAVE_PERF_EVENTS_NMI && !HAVE_HARDLOCKUP_DETECTOR_ARCH select HAVE_PERF_REGS select HAVE_PERF_USER_STACK_DUMP select HAVE_RCU_TABLE_FREE if SMP diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 781521b7cf9e..29a1bf85e507 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -163,7 +163,7 @@ config X86 select HAVE_PCSPKR_PLATFORM select HAVE_PERF_EVENTS select HAVE_PERF_EVENTS_NMI - select HAVE_HARDLOCKUP_DETECTOR_PERF if HAVE_PERF_EVENTS_NMI + select HAVE_HARDLOCKUP_DETECTOR_PERF if PERF_EVENTS && HAVE_PERF_EVENTS_NMI select HAVE_PERF_REGS select HAVE_PERF_USER_STACK_DUMP select HAVE_REGS_AND_STACK_ACCESS_API -- cgit v1.2.3 From 8ada92799ec4de00f4bc0f10b1ededa256c1ab22 Mon Sep 17 00:00:00 2001 From: "Luis R. Rodriguez" Date: Fri, 18 Aug 2017 15:15:55 -0700 Subject: wait: add wait_event_killable_timeout() These are the few pending fixes I have queued up for v4.13-final. One is a a generic regression fix for recursive loops on kmod and the other one is a trivial print out correction. During the v4.13 development we assumed that recursive kmod loops were no longer possible. Clearly that is not true. The regression fix makes use of a new killable wait. We use a killable wait to be paranoid in how signals might be sent to modprobe and only accept a proper SIGKILL. The signal will only be available to userspace to issue *iff* a thread has already entered a wait state, and that happens only if we've already throttled after 50 kmod threads have been hit. Note that although it may seem excessive to trigger a failure afer 5 seconds if all kmod thread remain busy, prior to the series of changes that went into v4.13 we would actually *always* fatally fail any request which came in if the limit was already reached. The new waiting implemented in v4.13 actually gives us *more* breathing room -- the wait for 5 seconds is a wait for *any* kmod thread to finish. We give up and fail *iff* no kmod thread has finished and they're *all* running straight for 5 consecutive seconds. If 50 kmod threads are running consecutively for 5 seconds something else must be really bad. Recursive loops with kmod are bad but they're also hard to implement properly as a selftest without currently fooling current userspace tools like kmod [1]. For instance kmod will complain when you run depmod if it finds a recursive loop with symbol dependency between modules as such this type of recursive loop cannot go upstream as the modules_install target will fail after running depmod. These tests already exist on userspace kmod upstream though (refer to the testsuite/module-playground/mod-loop-*.c files). The same is not true if request_module() is used though, or worst if aliases are used. Likewise the issue with 64-bit kernels booting 32-bit userspace without a binfmt handler built-in is also currently not detected and proactively avoided by userspace kmod tools, or kconfig for all architectures. Although we could complain in the kernel when some of these individual recursive issues creep up, proactively avoiding these situations in userspace at build time is what we should keep striving for. Lastly, since recursive loops could happen with kmod it may mean recursive loops may also be possible with other kernel usermode helpers, this should be investigated and long term if we can come up with a more sensible generic solution even better! [0] https://git.kernel.org/pub/scm/linux/kernel/git/mcgrof/linux.git/log/?h=20170809-kmod-for-v4.13-final [1] https://git.kernel.org/pub/scm/utils/kernel/kmod/kmod.git This patch (of 3): This wait is similar to wait_event_interruptible_timeout() but only accepts SIGKILL interrupt signal. Other signals are ignored. Link: http://lkml.kernel.org/r/20170809234635.13443-2-mcgrof@kernel.org Signed-off-by: Luis R. Rodriguez Acked-by: Peter Zijlstra (Intel) Cc: Ingo Molnar Cc: Kees Cook Cc: Dmitry Torokhov Cc: Jessica Yu Cc: Rusty Russell Cc: Michal Marek Cc: Petr Mladek Cc: Miroslav Benes Cc: Josh Poimboeuf Cc: "Eric W. Biederman" Cc: Shuah Khan Cc: Matt Redfearn Cc: Dan Carpenter Cc: Colin Ian King Cc: Daniel Mentz Cc: David Binderman Cc: Matt Redfearn Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/wait.h | 37 +++++++++++++++++++++++++++++++++++++ 1 file changed, 37 insertions(+) diff --git a/include/linux/wait.h b/include/linux/wait.h index 5b74e36c0ca8..dc19880c02f5 100644 --- a/include/linux/wait.h +++ b/include/linux/wait.h @@ -757,6 +757,43 @@ extern int do_wait_intr_irq(wait_queue_head_t *, wait_queue_entry_t *); __ret; \ }) +#define __wait_event_killable_timeout(wq_head, condition, timeout) \ + ___wait_event(wq_head, ___wait_cond_timeout(condition), \ + TASK_KILLABLE, 0, timeout, \ + __ret = schedule_timeout(__ret)) + +/** + * wait_event_killable_timeout - sleep until a condition gets true or a timeout elapses + * @wq_head: the waitqueue to wait on + * @condition: a C expression for the event to wait for + * @timeout: timeout, in jiffies + * + * The process is put to sleep (TASK_KILLABLE) until the + * @condition evaluates to true or a kill signal is received. + * The @condition is checked each time the waitqueue @wq_head is woken up. + * + * wake_up() has to be called after changing any variable that could + * change the result of the wait condition. + * + * Returns: + * 0 if the @condition evaluated to %false after the @timeout elapsed, + * 1 if the @condition evaluated to %true after the @timeout elapsed, + * the remaining jiffies (at least 1) if the @condition evaluated + * to %true before the @timeout elapsed, or -%ERESTARTSYS if it was + * interrupted by a kill signal. + * + * Only kill signals interrupt this process. + */ +#define wait_event_killable_timeout(wq_head, condition, timeout) \ +({ \ + long __ret = timeout; \ + might_sleep(); \ + if (!___wait_cond_timeout(condition)) \ + __ret = __wait_event_killable_timeout(wq_head, \ + condition, timeout); \ + __ret; \ +}) + #define __wait_event_lock_irq(wq_head, condition, lock, cmd) \ (void)___wait_event(wq_head, condition, TASK_UNINTERRUPTIBLE, 0, 0, \ -- cgit v1.2.3 From 2ba293c9e7db150943f06b12d3eb7213e7fae624 Mon Sep 17 00:00:00 2001 From: "Luis R. Rodriguez" Date: Fri, 18 Aug 2017 15:15:58 -0700 Subject: kmod: fix wait on recursive loop Recursive loops with module loading were previously handled in kmod by restricting the number of modprobe calls to 50 and if that limit was breached request_module() would return an error and a user would see the following on their kernel dmesg: request_module: runaway loop modprobe binfmt-464c Starting init:/sbin/init exists but couldn't execute it (error -8) This issue could happen for instance when a 64-bit kernel boots a 32-bit userspace on some architectures and has no 32-bit binary format hanlders. This is visible, for instance, when a CONFIG_MODULES enabled 64-bit MIPS kernel boots a into o32 root filesystem and the binfmt handler for o32 binaries is not built-in. After commit 6d7964a722af ("kmod: throttle kmod thread limit") we now don't have any visible signs of an error and the kernel just waits for the loop to end somehow. Although this *particular* recursive loop could also be addressed by doing a sanity check on search_binary_handler() and disallowing a modular binfmt to be required for modprobe, a generic solution for any recursive kernel kmod issues is still needed. This should catch these loops. We can investigate each loop and address each one separately as they come in, this however puts a stop gap for them as before. Link: http://lkml.kernel.org/r/20170809234635.13443-3-mcgrof@kernel.org Fixes: 6d7964a722af ("kmod: throttle kmod thread limit") Signed-off-by: Luis R. Rodriguez Reported-by: Matt Redfearn Tested-by: Matt Redfearn Cc: "Eric W. Biederman" Cc: Colin Ian King Cc: Dan Carpenter Cc: Daniel Mentz Cc: David Binderman Cc: Dmitry Torokhov Cc: Ingo Molnar Cc: Jessica Yu Cc: Josh Poimboeuf Cc: Kees Cook Cc: Michal Marek Cc: Miroslav Benes Cc: Peter Zijlstra (Intel) Cc: Petr Mladek Cc: Rusty Russell Cc: Shuah Khan Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/kmod.c | 25 +++++++++++++++++++++++-- 1 file changed, 23 insertions(+), 2 deletions(-) diff --git a/kernel/kmod.c b/kernel/kmod.c index 6d016c5d97c8..2f37acde640b 100644 --- a/kernel/kmod.c +++ b/kernel/kmod.c @@ -70,6 +70,18 @@ static DECLARE_RWSEM(umhelper_sem); static atomic_t kmod_concurrent_max = ATOMIC_INIT(MAX_KMOD_CONCURRENT); static DECLARE_WAIT_QUEUE_HEAD(kmod_wq); +/* + * This is a restriction on having *all* MAX_KMOD_CONCURRENT threads + * running at the same time without returning. When this happens we + * believe you've somehow ended up with a recursive module dependency + * creating a loop. + * + * We have no option but to fail. + * + * Userspace should proactively try to detect and prevent these. + */ +#define MAX_KMOD_ALL_BUSY_TIMEOUT 5 + /* modprobe_path is set via /proc/sys. */ @@ -167,8 +179,17 @@ int __request_module(bool wait, const char *fmt, ...) pr_warn_ratelimited("request_module: kmod_concurrent_max (%u) close to 0 (max_modprobes: %u), for module %s, throttling...", atomic_read(&kmod_concurrent_max), MAX_KMOD_CONCURRENT, module_name); - wait_event_interruptible(kmod_wq, - atomic_dec_if_positive(&kmod_concurrent_max) >= 0); + ret = wait_event_killable_timeout(kmod_wq, + atomic_dec_if_positive(&kmod_concurrent_max) >= 0, + MAX_KMOD_ALL_BUSY_TIMEOUT * HZ); + if (!ret) { + pr_warn_ratelimited("request_module: modprobe %s cannot be processed, kmod busy with %d threads for more than %d seconds now", + module_name, MAX_KMOD_CONCURRENT, MAX_KMOD_ALL_BUSY_TIMEOUT); + return -ETIME; + } else if (ret == -ERESTARTSYS) { + pr_warn_ratelimited("request_module: sigkill sent for modprobe %s, giving up", module_name); + return ret; + } } trace_module_request(module_name, wait, _RET_IP_); -- cgit v1.2.3 From 768dc4e48420955518974d8486c1b00ec05e7274 Mon Sep 17 00:00:00 2001 From: "Luis R. Rodriguez" Date: Fri, 18 Aug 2017 15:16:02 -0700 Subject: test_kmod: fix description for -s -and -c parameters The descriptions were reversed, correct this. Link: http://lkml.kernel.org/r/20170809234635.13443-4-mcgrof@kernel.org Fixes: 64b671204afd71 ("test_sysctl: add generic script to expand on tests") Signed-off-by: Luis R. Rodriguez Reported-by: Daniel Mentz Cc: "Eric W. Biederman" Cc: Colin Ian King Cc: Dan Carpenter Cc: David Binderman Cc: Dmitry Torokhov Cc: Ingo Molnar Cc: Jessica Yu Cc: Josh Poimboeuf Cc: Kees Cook Cc: Matt Redfearn Cc: Matt Redfearn Cc: Michal Marek Cc: Miroslav Benes Cc: Peter Zijlstra (Intel) Cc: Petr Mladek Cc: Rusty Russell Cc: Shuah Khan Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- tools/testing/selftests/kmod/kmod.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/testing/selftests/kmod/kmod.sh b/tools/testing/selftests/kmod/kmod.sh index 8cecae9a8bca..7956ea3be667 100755 --- a/tools/testing/selftests/kmod/kmod.sh +++ b/tools/testing/selftests/kmod/kmod.sh @@ -473,8 +473,8 @@ usage() echo " all Runs all tests (default)" echo " -t Run test ID the number amount of times is recommended" echo " -w Watch test ID run until it runs into an error" - echo " -c Run test ID once" - echo " -s Run test ID x test-count number of times" + echo " -s Run test ID once" + echo " -c Run test ID x test-count number of times" echo " -l List all test ID list" echo " -h|--help Help" echo -- cgit v1.2.3 From 3010f876500f9ba921afaeccec30c45ca6584dc8 Mon Sep 17 00:00:00 2001 From: Pavel Tatashin Date: Fri, 18 Aug 2017 15:16:05 -0700 Subject: mm: discard memblock data later There is existing use after free bug when deferred struct pages are enabled: The memblock_add() allocates memory for the memory array if more than 128 entries are needed. See comment in e820__memblock_setup(): * The bootstrap memblock region count maximum is 128 entries * (INIT_MEMBLOCK_REGIONS), but EFI might pass us more E820 entries * than that - so allow memblock resizing. This memblock memory is freed here: free_low_memory_core_early() We access the freed memblock.memory later in boot when deferred pages are initialized in this path: deferred_init_memmap() for_each_mem_pfn_range() __next_mem_pfn_range() type = &memblock.memory; One possible explanation for why this use-after-free hasn't been hit before is that the limit of INIT_MEMBLOCK_REGIONS has never been exceeded at least on systems where deferred struct pages were enabled. Tested by reducing INIT_MEMBLOCK_REGIONS down to 4 from the current 128, and verifying in qemu that this code is getting excuted and that the freed pages are sane. Link: http://lkml.kernel.org/r/1502485554-318703-2-git-send-email-pasha.tatashin@oracle.com Fixes: 7e18adb4f80b ("mm: meminit: initialise remaining struct pages in parallel with kswapd") Signed-off-by: Pavel Tatashin Reviewed-by: Steven Sistare Reviewed-by: Daniel Jordan Reviewed-by: Bob Picco Acked-by: Michal Hocko Cc: Mel Gorman Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/memblock.h | 6 ++++-- mm/memblock.c | 38 +++++++++++++++++--------------------- mm/nobootmem.c | 16 ---------------- mm/page_alloc.c | 4 ++++ 4 files changed, 25 insertions(+), 39 deletions(-) diff --git a/include/linux/memblock.h b/include/linux/memblock.h index 77d427974f57..bae11c7e7bf3 100644 --- a/include/linux/memblock.h +++ b/include/linux/memblock.h @@ -61,6 +61,7 @@ extern int memblock_debug; #ifdef CONFIG_ARCH_DISCARD_MEMBLOCK #define __init_memblock __meminit #define __initdata_memblock __meminitdata +void memblock_discard(void); #else #define __init_memblock #define __initdata_memblock @@ -74,8 +75,6 @@ phys_addr_t memblock_find_in_range_node(phys_addr_t size, phys_addr_t align, int nid, ulong flags); phys_addr_t memblock_find_in_range(phys_addr_t start, phys_addr_t end, phys_addr_t size, phys_addr_t align); -phys_addr_t get_allocated_memblock_reserved_regions_info(phys_addr_t *addr); -phys_addr_t get_allocated_memblock_memory_regions_info(phys_addr_t *addr); void memblock_allow_resize(void); int memblock_add_node(phys_addr_t base, phys_addr_t size, int nid); int memblock_add(phys_addr_t base, phys_addr_t size); @@ -110,6 +109,9 @@ void __next_mem_range_rev(u64 *idx, int nid, ulong flags, void __next_reserved_mem_region(u64 *idx, phys_addr_t *out_start, phys_addr_t *out_end); +void __memblock_free_early(phys_addr_t base, phys_addr_t size); +void __memblock_free_late(phys_addr_t base, phys_addr_t size); + /** * for_each_mem_range - iterate through memblock areas from type_a and not * included in type_b. Or just type_a if type_b is NULL. diff --git a/mm/memblock.c b/mm/memblock.c index 2cb25fe4452c..bf14aea6ab70 100644 --- a/mm/memblock.c +++ b/mm/memblock.c @@ -285,31 +285,27 @@ static void __init_memblock memblock_remove_region(struct memblock_type *type, u } #ifdef CONFIG_ARCH_DISCARD_MEMBLOCK - -phys_addr_t __init_memblock get_allocated_memblock_reserved_regions_info( - phys_addr_t *addr) -{ - if (memblock.reserved.regions == memblock_reserved_init_regions) - return 0; - - *addr = __pa(memblock.reserved.regions); - - return PAGE_ALIGN(sizeof(struct memblock_region) * - memblock.reserved.max); -} - -phys_addr_t __init_memblock get_allocated_memblock_memory_regions_info( - phys_addr_t *addr) +/** + * Discard memory and reserved arrays if they were allocated + */ +void __init memblock_discard(void) { - if (memblock.memory.regions == memblock_memory_init_regions) - return 0; + phys_addr_t addr, size; - *addr = __pa(memblock.memory.regions); + if (memblock.reserved.regions != memblock_reserved_init_regions) { + addr = __pa(memblock.reserved.regions); + size = PAGE_ALIGN(sizeof(struct memblock_region) * + memblock.reserved.max); + __memblock_free_late(addr, size); + } - return PAGE_ALIGN(sizeof(struct memblock_region) * - memblock.memory.max); + if (memblock.memory.regions == memblock_memory_init_regions) { + addr = __pa(memblock.memory.regions); + size = PAGE_ALIGN(sizeof(struct memblock_region) * + memblock.memory.max); + __memblock_free_late(addr, size); + } } - #endif /** diff --git a/mm/nobootmem.c b/mm/nobootmem.c index 36454d0f96ee..3637809a18d0 100644 --- a/mm/nobootmem.c +++ b/mm/nobootmem.c @@ -146,22 +146,6 @@ static unsigned long __init free_low_memory_core_early(void) NULL) count += __free_memory_core(start, end); -#ifdef CONFIG_ARCH_DISCARD_MEMBLOCK - { - phys_addr_t size; - - /* Free memblock.reserved array if it was allocated */ - size = get_allocated_memblock_reserved_regions_info(&start); - if (size) - count += __free_memory_core(start, start + size); - - /* Free memblock.memory array if it was allocated */ - size = get_allocated_memblock_memory_regions_info(&start); - if (size) - count += __free_memory_core(start, start + size); - } -#endif - return count; } diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 6d00f746c2fd..1bad301820c7 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -1584,6 +1584,10 @@ void __init page_alloc_init_late(void) /* Reinit limits that are based on free pages after the kernel is up */ files_maxfiles_init(); #endif +#ifdef CONFIG_ARCH_DISCARD_MEMBLOCK + /* Discard memblock private memory */ + memblock_discard(); +#endif for_each_populated_zone(zone) set_zone_contiguous(zone); -- cgit v1.2.3 From f6ba488073fe8159851fe398cc3c5ee383bb4c7a Mon Sep 17 00:00:00 2001 From: Vladimir Davydov Date: Fri, 18 Aug 2017 15:16:08 -0700 Subject: slub: fix per memcg cache leak on css offline To avoid a possible deadlock, sysfs_slab_remove() schedules an asynchronous work to delete sysfs entries corresponding to the kmem cache. To ensure the cache isn't freed before the work function is called, it takes a reference to the cache kobject. The reference is supposed to be released by the work function. However, the work function (sysfs_slab_remove_workfn()) does nothing in case the cache sysfs entry has already been deleted, leaking the kobject and the corresponding cache. This may happen on a per memcg cache destruction, because sysfs entries of a per memcg cache are deleted on memcg offline if the cache is empty (see __kmemcg_cache_deactivate()). The kmemleak report looks like this: unreferenced object 0xffff9f798a79f540 (size 32): comm "kworker/1:4", pid 15416, jiffies 4307432429 (age 28687.554s) hex dump (first 32 bytes): 6b 6d 61 6c 6c 6f 63 2d 31 36 28 31 35 39 39 3a kmalloc-16(1599: 6e 65 77 72 6f 6f 74 29 00 23 6b c0 ff ff ff ff newroot).#k..... backtrace: kmemleak_alloc+0x4a/0xa0 __kmalloc_track_caller+0x148/0x2c0 kvasprintf+0x66/0xd0 kasprintf+0x49/0x70 memcg_create_kmem_cache+0xe6/0x160 memcg_kmem_cache_create_func+0x20/0x110 process_one_work+0x205/0x5d0 worker_thread+0x4e/0x3a0 kthread+0x109/0x140 ret_from_fork+0x2a/0x40 unreferenced object 0xffff9f79b6136840 (size 416): comm "kworker/1:4", pid 15416, jiffies 4307432429 (age 28687.573s) hex dump (first 32 bytes): 40 fb 80 c2 3e 33 00 00 00 00 00 40 00 00 00 00 @...>3.....@.... 00 00 00 00 00 00 00 00 10 00 00 00 10 00 00 00 ................ backtrace: kmemleak_alloc+0x4a/0xa0 kmem_cache_alloc+0x128/0x280 create_cache+0x3b/0x1e0 memcg_create_kmem_cache+0x118/0x160 memcg_kmem_cache_create_func+0x20/0x110 process_one_work+0x205/0x5d0 worker_thread+0x4e/0x3a0 kthread+0x109/0x140 ret_from_fork+0x2a/0x40 Fix the leak by adding the missing call to kobject_put() to sysfs_slab_remove_workfn(). Link: http://lkml.kernel.org/r/20170812181134.25027-1-vdavydov.dev@gmail.com Fixes: 3b7b314053d02 ("slub: make sysfs file removal asynchronous") Signed-off-by: Vladimir Davydov Reported-by: Andrei Vagin Tested-by: Andrei Vagin Acked-by: Tejun Heo Acked-by: David Rientjes Cc: Michal Hocko Cc: Johannes Weiner Cc: Christoph Lameter Cc: Pekka Enberg Cc: Joonsoo Kim Cc: [4.12.x] Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/slub.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/mm/slub.c b/mm/slub.c index 1d3f9835f4ea..e8b4e31162ca 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -5642,13 +5642,14 @@ static void sysfs_slab_remove_workfn(struct work_struct *work) * A cache is never shut down before deactivation is * complete, so no need to worry about synchronization. */ - return; + goto out; #ifdef CONFIG_MEMCG kset_unregister(s->memcg_kset); #endif kobject_uevent(&s->kobj, KOBJ_REMOVE); kobject_del(&s->kobj); +out: kobject_put(&s->kobj); } -- cgit v1.2.3 From 5b53a6ea886700a128b697a6fe8375340dea2c30 Mon Sep 17 00:00:00 2001 From: Michal Hocko Date: Fri, 18 Aug 2017 15:16:12 -0700 Subject: mm: fix double mmap_sem unlock on MMF_UNSTABLE enforced SIGBUS Tetsuo Handa has noticed that MMF_UNSTABLE SIGBUS path in handle_mm_fault causes a lockdep splat Out of memory: Kill process 1056 (a.out) score 603 or sacrifice child Killed process 1056 (a.out) total-vm:4268108kB, anon-rss:2246048kB, file-rss:0kB, shmem-rss:0kB a.out (1169) used greatest stack depth: 11664 bytes left DEBUG_LOCKS_WARN_ON(depth <= 0) ------------[ cut here ]------------ WARNING: CPU: 6 PID: 1339 at kernel/locking/lockdep.c:3617 lock_release+0x172/0x1e0 CPU: 6 PID: 1339 Comm: a.out Not tainted 4.13.0-rc3-next-20170803+ #142 Hardware name: VMware, Inc. VMware Virtual Platform/440BX Desktop Reference Platform, BIOS 6.00 07/02/2015 RIP: 0010:lock_release+0x172/0x1e0 Call Trace: up_read+0x1a/0x40 __do_page_fault+0x28e/0x4c0 do_page_fault+0x30/0x80 page_fault+0x28/0x30 The reason is that the page fault path might have dropped the mmap_sem and returned with VM_FAULT_RETRY. MMF_UNSTABLE check however rewrites the error path to VM_FAULT_SIGBUS and we always expect mmap_sem taken in that path. Fix this by taking mmap_sem when VM_FAULT_RETRY is held in the MMF_UNSTABLE path. We cannot simply add VM_FAULT_SIGBUS to the existing error code because all arch specific page fault handlers and g-u-p would have to learn a new error code combination. Link: http://lkml.kernel.org/r/20170807113839.16695-2-mhocko@kernel.org Fixes: 3f70dc38cec2 ("mm: make sure that kthreads will not refault oom reaped memory") Reported-by: Tetsuo Handa Signed-off-by: Michal Hocko Acked-by: David Rientjes Cc: Andrea Argangeli Cc: "Kirill A. Shutemov" Cc: Oleg Nesterov Cc: Wenwei Tao Cc: [4.9+] Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/memory.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/mm/memory.c b/mm/memory.c index e158f7ac6730..c717b5bcc80e 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -3910,8 +3910,18 @@ int handle_mm_fault(struct vm_area_struct *vma, unsigned long address, * further. */ if (unlikely((current->flags & PF_KTHREAD) && !(ret & VM_FAULT_ERROR) - && test_bit(MMF_UNSTABLE, &vma->vm_mm->flags))) + && test_bit(MMF_UNSTABLE, &vma->vm_mm->flags))) { + + /* + * We are going to enforce SIGBUS but the PF path might have + * dropped the mmap_sem already so take it again so that + * we do not break expectations of all arch specific PF paths + * and g-u-p + */ + if (ret & VM_FAULT_RETRY) + down_read(&vma->vm_mm->mmap_sem); ret = VM_FAULT_SIGBUS; + } return ret; } -- cgit v1.2.3 From 6b31d5955cb29a51c5baffee382f213d75e98fb8 Mon Sep 17 00:00:00 2001 From: Michal Hocko Date: Fri, 18 Aug 2017 15:16:15 -0700 Subject: mm, oom: fix potential data corruption when oom_reaper races with writer Wenwei Tao has noticed that our current assumption that the oom victim is dying and never doing any visible changes after it dies, and so the oom_reaper can tear it down, is not entirely true. __task_will_free_mem consider a task dying when SIGNAL_GROUP_EXIT is set but do_group_exit sends SIGKILL to all threads _after_ the flag is set. So there is a race window when some threads won't have fatal_signal_pending while the oom_reaper could start unmapping the address space. Moreover some paths might not check for fatal signals before each PF/g-u-p/copy_from_user. We already have a protection for oom_reaper vs. PF races by checking MMF_UNSTABLE. This has been, however, checked only for kernel threads (use_mm users) which can outlive the oom victim. A simple fix would be to extend the current check in handle_mm_fault for all tasks but that wouldn't be sufficient because the current check assumes that a kernel thread would bail out after EFAULT from get_user*/copy_from_user and never re-read the same address which would succeed because the PF path has established page tables already. This seems to be the case for the only existing use_mm user currently (virtio driver) but it is rather fragile in general. This is even more fragile in general for more complex paths such as generic_perform_write which can re-read the same address more times (e.g. iov_iter_copy_from_user_atomic to fail and then iov_iter_fault_in_readable on retry). Therefore we have to implement MMF_UNSTABLE protection in a robust way and never make a potentially corrupted content visible. That requires to hook deeper into the PF path and check for the flag _every time_ before a pte for anonymous memory is established (that means all !VM_SHARED mappings). The corruption can be triggered artificially (http://lkml.kernel.org/r/201708040646.v746kkhC024636@www262.sakura.ne.jp) but there doesn't seem to be any real life bug report. The race window should be quite tight to trigger most of the time. Link: http://lkml.kernel.org/r/20170807113839.16695-3-mhocko@kernel.org Fixes: aac453635549 ("mm, oom: introduce oom reaper") Signed-off-by: Michal Hocko Reported-by: Wenwei Tao Tested-by: Tetsuo Handa Cc: "Kirill A. Shutemov" Cc: Andrea Argangeli Cc: David Rientjes Cc: Oleg Nesterov Cc: Tetsuo Handa Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/oom.h | 22 ++++++++++++++++++++++ mm/huge_memory.c | 30 ++++++++++++++++++++++-------- mm/memory.c | 46 ++++++++++++++++++++-------------------------- 3 files changed, 64 insertions(+), 34 deletions(-) diff --git a/include/linux/oom.h b/include/linux/oom.h index 8a266e2be5a6..76aac4ce39bc 100644 --- a/include/linux/oom.h +++ b/include/linux/oom.h @@ -6,6 +6,8 @@ #include #include #include +#include /* MMF_* */ +#include /* VM_FAULT* */ struct zonelist; struct notifier_block; @@ -63,6 +65,26 @@ static inline bool tsk_is_oom_victim(struct task_struct * tsk) return tsk->signal->oom_mm; } +/* + * Checks whether a page fault on the given mm is still reliable. + * This is no longer true if the oom reaper started to reap the + * address space which is reflected by MMF_UNSTABLE flag set in + * the mm. At that moment any !shared mapping would lose the content + * and could cause a memory corruption (zero pages instead of the + * original content). + * + * User should call this before establishing a page table entry for + * a !shared mapping and under the proper page table lock. + * + * Return 0 when the PF is safe VM_FAULT_SIGBUS otherwise. + */ +static inline int check_stable_address_space(struct mm_struct *mm) +{ + if (unlikely(test_bit(MMF_UNSTABLE, &mm->flags))) + return VM_FAULT_SIGBUS; + return 0; +} + extern unsigned long oom_badness(struct task_struct *p, struct mem_cgroup *memcg, const nodemask_t *nodemask, unsigned long totalpages); diff --git a/mm/huge_memory.c b/mm/huge_memory.c index 216114f6ef0b..90731e3b7e58 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -32,6 +32,7 @@ #include #include #include +#include #include #include @@ -550,6 +551,7 @@ static int __do_huge_pmd_anonymous_page(struct vm_fault *vmf, struct page *page, struct mem_cgroup *memcg; pgtable_t pgtable; unsigned long haddr = vmf->address & HPAGE_PMD_MASK; + int ret = 0; VM_BUG_ON_PAGE(!PageCompound(page), page); @@ -561,9 +563,8 @@ static int __do_huge_pmd_anonymous_page(struct vm_fault *vmf, struct page *page, pgtable = pte_alloc_one(vma->vm_mm, haddr); if (unlikely(!pgtable)) { - mem_cgroup_cancel_charge(page, memcg, true); - put_page(page); - return VM_FAULT_OOM; + ret = VM_FAULT_OOM; + goto release; } clear_huge_page(page, haddr, HPAGE_PMD_NR); @@ -576,13 +577,14 @@ static int __do_huge_pmd_anonymous_page(struct vm_fault *vmf, struct page *page, vmf->ptl = pmd_lock(vma->vm_mm, vmf->pmd); if (unlikely(!pmd_none(*vmf->pmd))) { - spin_unlock(vmf->ptl); - mem_cgroup_cancel_charge(page, memcg, true); - put_page(page); - pte_free(vma->vm_mm, pgtable); + goto unlock_release; } else { pmd_t entry; + ret = check_stable_address_space(vma->vm_mm); + if (ret) + goto unlock_release; + /* Deliver the page fault to userland */ if (userfaultfd_missing(vma)) { int ret; @@ -610,6 +612,15 @@ static int __do_huge_pmd_anonymous_page(struct vm_fault *vmf, struct page *page, } return 0; +unlock_release: + spin_unlock(vmf->ptl); +release: + if (pgtable) + pte_free(vma->vm_mm, pgtable); + mem_cgroup_cancel_charge(page, memcg, true); + put_page(page); + return ret; + } /* @@ -688,7 +699,10 @@ int do_huge_pmd_anonymous_page(struct vm_fault *vmf) ret = 0; set = false; if (pmd_none(*vmf->pmd)) { - if (userfaultfd_missing(vma)) { + ret = check_stable_address_space(vma->vm_mm); + if (ret) { + spin_unlock(vmf->ptl); + } else if (userfaultfd_missing(vma)) { spin_unlock(vmf->ptl); ret = handle_userfault(vmf, VM_UFFD_MISSING); VM_BUG_ON(ret & VM_FAULT_FALLBACK); diff --git a/mm/memory.c b/mm/memory.c index c717b5bcc80e..fe2fba27ded2 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -68,6 +68,7 @@ #include #include #include +#include #include #include @@ -2893,6 +2894,7 @@ static int do_anonymous_page(struct vm_fault *vmf) struct vm_area_struct *vma = vmf->vma; struct mem_cgroup *memcg; struct page *page; + int ret = 0; pte_t entry; /* File mapping without ->vm_ops ? */ @@ -2925,6 +2927,9 @@ static int do_anonymous_page(struct vm_fault *vmf) vmf->address, &vmf->ptl); if (!pte_none(*vmf->pte)) goto unlock; + ret = check_stable_address_space(vma->vm_mm); + if (ret) + goto unlock; /* Deliver the page fault to userland, check inside PT lock */ if (userfaultfd_missing(vma)) { pte_unmap_unlock(vmf->pte, vmf->ptl); @@ -2959,6 +2964,10 @@ static int do_anonymous_page(struct vm_fault *vmf) if (!pte_none(*vmf->pte)) goto release; + ret = check_stable_address_space(vma->vm_mm); + if (ret) + goto release; + /* Deliver the page fault to userland, check inside PT lock */ if (userfaultfd_missing(vma)) { pte_unmap_unlock(vmf->pte, vmf->ptl); @@ -2978,7 +2987,7 @@ setpte: update_mmu_cache(vma, vmf->address, vmf->pte); unlock: pte_unmap_unlock(vmf->pte, vmf->ptl); - return 0; + return ret; release: mem_cgroup_cancel_charge(page, memcg, false); put_page(page); @@ -3252,7 +3261,7 @@ int alloc_set_pte(struct vm_fault *vmf, struct mem_cgroup *memcg, int finish_fault(struct vm_fault *vmf) { struct page *page; - int ret; + int ret = 0; /* Did we COW the page? */ if ((vmf->flags & FAULT_FLAG_WRITE) && @@ -3260,7 +3269,15 @@ int finish_fault(struct vm_fault *vmf) page = vmf->cow_page; else page = vmf->page; - ret = alloc_set_pte(vmf, vmf->memcg, page); + + /* + * check even for read faults because we might have lost our CoWed + * page + */ + if (!(vmf->vma->vm_flags & VM_SHARED)) + ret = check_stable_address_space(vmf->vma->vm_mm); + if (!ret) + ret = alloc_set_pte(vmf, vmf->memcg, page); if (vmf->pte) pte_unmap_unlock(vmf->pte, vmf->ptl); return ret; @@ -3900,29 +3917,6 @@ int handle_mm_fault(struct vm_area_struct *vma, unsigned long address, mem_cgroup_oom_synchronize(false); } - /* - * This mm has been already reaped by the oom reaper and so the - * refault cannot be trusted in general. Anonymous refaults would - * lose data and give a zero page instead e.g. This is especially - * problem for use_mm() because regular tasks will just die and - * the corrupted data will not be visible anywhere while kthread - * will outlive the oom victim and potentially propagate the date - * further. - */ - if (unlikely((current->flags & PF_KTHREAD) && !(ret & VM_FAULT_ERROR) - && test_bit(MMF_UNSTABLE, &vma->vm_mm->flags))) { - - /* - * We are going to enforce SIGBUS but the PF path might have - * dropped the mmap_sem already so take it again so that - * we do not break expectations of all arch specific PF paths - * and g-u-p - */ - if (ret & VM_FAULT_RETRY) - down_read(&vma->vm_mm->mmap_sem); - ret = VM_FAULT_SIGBUS; - } - return ret; } EXPORT_SYMBOL_GPL(handle_mm_fault); -- cgit v1.2.3 From eb61b5911bdc923875cde99eb25203a0e2b06d43 Mon Sep 17 00:00:00 2001 From: Jamie Iles Date: Fri, 18 Aug 2017 15:16:18 -0700 Subject: signal: don't remove SIGNAL_UNKILLABLE for traced tasks. When forcing a signal, SIGNAL_UNKILLABLE is removed to prevent recursive faults, but this is undesirable when tracing. For example, debugging an init process (whether global or namespace), hitting a breakpoint and SIGTRAP will force SIGTRAP and then remove SIGNAL_UNKILLABLE. Everything continues fine, but then once debugging has finished, the init process is left killable which is unlikely what the user expects, resulting in either an accidentally killed init or an init that stops reaping zombies. Link: http://lkml.kernel.org/r/20170815112806.10728-1-jamie.iles@oracle.com Signed-off-by: Jamie Iles Acked-by: Oleg Nesterov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/signal.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/kernel/signal.c b/kernel/signal.c index 7e33f8c583e6..ed804a470dcd 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -1194,7 +1194,11 @@ force_sig_info(int sig, struct siginfo *info, struct task_struct *t) recalc_sigpending_and_wake(t); } } - if (action->sa.sa_handler == SIG_DFL) + /* + * Don't clear SIGNAL_UNKILLABLE for traced tasks, users won't expect + * debugging to leave init killable. + */ + if (action->sa.sa_handler == SIG_DFL && !t->ptrace) t->signal->flags &= ~SIGNAL_UNKILLABLE; ret = specific_send_sig_info(sig, info, t); spin_unlock_irqrestore(&t->sighand->siglock, flags); -- cgit v1.2.3 From da094e42848e3c36feaa3b5271e53983fd45424f Mon Sep 17 00:00:00 2001 From: Prakash Gupta Date: Fri, 18 Aug 2017 15:16:21 -0700 Subject: mm/cma_debug.c: fix stack corruption due to sprintf usage name[] in cma_debugfs_add_one() can only accommodate 16 chars including NULL to store sprintf output. It's common for cma device name to be larger than 15 chars. This can cause stack corrpution. If the gcc stack protector is turned on, this can cause a panic due to stack corruption. Below is one example trace: Kernel panic - not syncing: stack-protector: Kernel stack is corrupted in: ffffff8e69a75730 Call trace: dump_backtrace+0x0/0x2c4 show_stack+0x20/0x28 dump_stack+0xb8/0xf4 panic+0x154/0x2b0 print_tainted+0x0/0xc0 cma_debugfs_init+0x274/0x290 do_one_initcall+0x5c/0x168 kernel_init_freeable+0x1c8/0x280 Fix the short sprintf buffer in cma_debugfs_add_one() by using scnprintf() instead of sprintf(). Link: http://lkml.kernel.org/r/1502446217-21840-1-git-send-email-guptap@codeaurora.org Fixes: f318dd083c81 ("cma: Store a name in the cma structure") Signed-off-by: Prakash Gupta Acked-by: Laura Abbott Cc: Greg Kroah-Hartman Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/cma_debug.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/cma_debug.c b/mm/cma_debug.c index 595b757bef72..c03ccbc405a0 100644 --- a/mm/cma_debug.c +++ b/mm/cma_debug.c @@ -167,7 +167,7 @@ static void cma_debugfs_add_one(struct cma *cma, int idx) char name[16]; int u32s; - sprintf(name, "cma-%s", cma->name); + scnprintf(name, sizeof(name), "cma-%s", cma->name); tmp = debugfs_create_dir(name, cma_debugfs_root); -- cgit v1.2.3 From 73223e4e2e3867ebf033a5a8eb2e5df0158ccc99 Mon Sep 17 00:00:00 2001 From: zhong jiang Date: Fri, 18 Aug 2017 15:16:24 -0700 Subject: mm/mempolicy: fix use after free when calling get_mempolicy I hit a use after free issue when executing trinity and repoduced it with KASAN enabled. The related call trace is as follows. BUG: KASan: use after free in SyS_get_mempolicy+0x3c8/0x960 at addr ffff8801f582d766 Read of size 2 by task syz-executor1/798 INFO: Allocated in mpol_new.part.2+0x74/0x160 age=3 cpu=1 pid=799 __slab_alloc+0x768/0x970 kmem_cache_alloc+0x2e7/0x450 mpol_new.part.2+0x74/0x160 mpol_new+0x66/0x80 SyS_mbind+0x267/0x9f0 system_call_fastpath+0x16/0x1b INFO: Freed in __mpol_put+0x2b/0x40 age=4 cpu=1 pid=799 __slab_free+0x495/0x8e0 kmem_cache_free+0x2f3/0x4c0 __mpol_put+0x2b/0x40 SyS_mbind+0x383/0x9f0 system_call_fastpath+0x16/0x1b INFO: Slab 0xffffea0009cb8dc0 objects=23 used=8 fp=0xffff8801f582de40 flags=0x200000000004080 INFO: Object 0xffff8801f582d760 @offset=5984 fp=0xffff8801f582d600 Bytes b4 ffff8801f582d750: ae 01 ff ff 00 00 00 00 5a 5a 5a 5a 5a 5a 5a 5a ........ZZZZZZZZ Object ffff8801f582d760: 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b kkkkkkkkkkkkkkkk Object ffff8801f582d770: 6b 6b 6b 6b 6b 6b 6b a5 kkkkkkk. Redzone ffff8801f582d778: bb bb bb bb bb bb bb bb ........ Padding ffff8801f582d8b8: 5a 5a 5a 5a 5a 5a 5a 5a ZZZZZZZZ Memory state around the buggy address: ffff8801f582d600: fb fb fb fc fc fc fc fc fc fc fc fc fc fc fc fc ffff8801f582d680: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc >ffff8801f582d700: fc fc fc fc fc fc fc fc fc fc fc fc fb fb fb fc !shared memory policy is not protected against parallel removal by other thread which is normally protected by the mmap_sem. do_get_mempolicy, however, drops the lock midway while we can still access it later. Early premature up_read is a historical artifact from times when put_user was called in this path see https://lwn.net/Articles/124754/ but that is gone since 8bccd85ffbaf ("[PATCH] Implement sys_* do_* layering in the memory policy layer."). but when we have the the current mempolicy ref count model. The issue was introduced accordingly. Fix the issue by removing the premature release. Link: http://lkml.kernel.org/r/1502950924-27521-1-git-send-email-zhongjiang@huawei.com Signed-off-by: zhong jiang Acked-by: Michal Hocko Cc: Minchan Kim Cc: Vlastimil Babka Cc: David Rientjes Cc: Mel Gorman Cc: [2.6+] Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/mempolicy.c | 5 ----- 1 file changed, 5 deletions(-) diff --git a/mm/mempolicy.c b/mm/mempolicy.c index d911fa5cb2a7..618ab125228b 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c @@ -861,11 +861,6 @@ static long do_get_mempolicy(int *policy, nodemask_t *nmask, *policy |= (pol->flags & MPOL_MODE_FLAGS); } - if (vma) { - up_read(¤t->mm->mmap_sem); - vma = NULL; - } - err = 0; if (nmask) { if (mpol_store_user_nodemask(pol)) { -- cgit v1.2.3 From 704b862f9efd6d4c87a8d0a344dda19bda9c6b69 Mon Sep 17 00:00:00 2001 From: Laura Abbott Date: Fri, 18 Aug 2017 15:16:27 -0700 Subject: mm/vmalloc.c: don't unconditonally use __GFP_HIGHMEM Commit 19809c2da28a ("mm, vmalloc: use __GFP_HIGHMEM implicitly") added use of __GFP_HIGHMEM for allocations. vmalloc_32 may use GFP_DMA/GFP_DMA32 which does not play nice with __GFP_HIGHMEM and will trigger a BUG in gfp_zone. Only add __GFP_HIGHMEM if we aren't using GFP_DMA/GFP_DMA32. Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1482249 Link: http://lkml.kernel.org/r/20170816220705.31374-1-labbott@redhat.com Fixes: 19809c2da28a ("mm, vmalloc: use __GFP_HIGHMEM implicitly") Signed-off-by: Laura Abbott Acked-by: Michal Hocko Cc: Vlastimil Babka Cc: "Kirill A. Shutemov" Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/vmalloc.c | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/mm/vmalloc.c b/mm/vmalloc.c index 8698c1c86c4d..a47e3894c775 100644 --- a/mm/vmalloc.c +++ b/mm/vmalloc.c @@ -1671,7 +1671,10 @@ static void *__vmalloc_area_node(struct vm_struct *area, gfp_t gfp_mask, struct page **pages; unsigned int nr_pages, array_size, i; const gfp_t nested_gfp = (gfp_mask & GFP_RECLAIM_MASK) | __GFP_ZERO; - const gfp_t alloc_mask = gfp_mask | __GFP_HIGHMEM | __GFP_NOWARN; + const gfp_t alloc_mask = gfp_mask | __GFP_NOWARN; + const gfp_t highmem_mask = (gfp_mask & (GFP_DMA | GFP_DMA32)) ? + 0 : + __GFP_HIGHMEM; nr_pages = get_vm_area_size(area) >> PAGE_SHIFT; array_size = (nr_pages * sizeof(struct page *)); @@ -1679,7 +1682,7 @@ static void *__vmalloc_area_node(struct vm_struct *area, gfp_t gfp_mask, area->nr_pages = nr_pages; /* Please note that the recursion is strictly bounded. */ if (array_size > PAGE_SIZE) { - pages = __vmalloc_node(array_size, 1, nested_gfp|__GFP_HIGHMEM, + pages = __vmalloc_node(array_size, 1, nested_gfp|highmem_mask, PAGE_KERNEL, node, area->caller); } else { pages = kmalloc_node(array_size, nested_gfp, node); @@ -1700,9 +1703,9 @@ static void *__vmalloc_area_node(struct vm_struct *area, gfp_t gfp_mask, } if (node == NUMA_NO_NODE) - page = alloc_page(alloc_mask); + page = alloc_page(alloc_mask|highmem_mask); else - page = alloc_pages_node(node, alloc_mask, 0); + page = alloc_pages_node(node, alloc_mask|highmem_mask, 0); if (unlikely(!page)) { /* Successfully allocated i pages, free them in __vunmap() */ @@ -1710,7 +1713,7 @@ static void *__vmalloc_area_node(struct vm_struct *area, gfp_t gfp_mask, goto fail; } area->pages[i] = page; - if (gfpflags_allow_blocking(gfp_mask)) + if (gfpflags_allow_blocking(gfp_mask|highmem_mask)) cond_resched(); } -- cgit v1.2.3 From c715b72c1ba406f133217b509044c38d8e714a37 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Fri, 18 Aug 2017 15:16:31 -0700 Subject: mm: revert x86_64 and arm64 ELF_ET_DYN_BASE base changes Moving the x86_64 and arm64 PIE base from 0x555555554000 to 0x000100000000 broke AddressSanitizer. This is a partial revert of: eab09532d400 ("binfmt_elf: use ELF_ET_DYN_BASE only for PIE") 02445990a96e ("arm64: move ELF_ET_DYN_BASE to 4GB / 4MB") The AddressSanitizer tool has hard-coded expectations about where executable mappings are loaded. The motivation for changing the PIE base in the above commits was to avoid the Stack-Clash CVEs that allowed executable mappings to get too close to heap and stack. This was mainly a problem on 32-bit, but the 64-bit bases were moved too, in an effort to proactively protect those systems (proofs of concept do exist that show 64-bit collisions, but other recent changes to fix stack accounting and setuid behaviors will minimize the impact). The new 32-bit PIE base is fine for ASan (since it matches the ET_EXEC base), so only the 64-bit PIE base needs to be reverted to let x86 and arm64 ASan binaries run again. Future changes to the 64-bit PIE base on these architectures can be made optional once a more dynamic method for dealing with AddressSanitizer is found. (e.g. always loading PIE into the mmap region for marked binaries.) Link: http://lkml.kernel.org/r/20170807201542.GA21271@beast Fixes: eab09532d400 ("binfmt_elf: use ELF_ET_DYN_BASE only for PIE") Fixes: 02445990a96e ("arm64: move ELF_ET_DYN_BASE to 4GB / 4MB") Signed-off-by: Kees Cook Reported-by: Kostya Serebryany Acked-by: Will Deacon Cc: Ingo Molnar Cc: "H. Peter Anvin" Cc: Thomas Gleixner Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/arm64/include/asm/elf.h | 4 ++-- arch/x86/include/asm/elf.h | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/arm64/include/asm/elf.h b/arch/arm64/include/asm/elf.h index acae781f7359..3288c2b36731 100644 --- a/arch/arm64/include/asm/elf.h +++ b/arch/arm64/include/asm/elf.h @@ -114,10 +114,10 @@ /* * This is the base location for PIE (ET_DYN with INTERP) loads. On - * 64-bit, this is raised to 4GB to leave the entire 32-bit address + * 64-bit, this is above 4GB to leave the entire 32-bit address * space open for things that want to use the area for 32-bit pointers. */ -#define ELF_ET_DYN_BASE 0x100000000UL +#define ELF_ET_DYN_BASE (2 * TASK_SIZE_64 / 3) #ifndef __ASSEMBLY__ diff --git a/arch/x86/include/asm/elf.h b/arch/x86/include/asm/elf.h index 1c18d83d3f09..9aeb91935ce0 100644 --- a/arch/x86/include/asm/elf.h +++ b/arch/x86/include/asm/elf.h @@ -247,11 +247,11 @@ extern int force_personality32; /* * This is the base location for PIE (ET_DYN with INTERP) loads. On - * 64-bit, this is raised to 4GB to leave the entire 32-bit address + * 64-bit, this is above 4GB to leave the entire 32-bit address * space open for things that want to use the area for 32-bit pointers. */ #define ELF_ET_DYN_BASE (mmap_is_ia32() ? 0x000400000UL : \ - 0x100000000UL) + (TASK_SIZE / 3 * 2)) /* This yields a mask that user programs can use to figure out what instruction set this CPU supports. This could be done in user space, -- cgit v1.2.3 From 8fbbe2d7cc478d1544f41f2271787c993c23a4f6 Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Sat, 19 Aug 2017 12:57:51 +0300 Subject: genirq/ipi: Fixup checks against nr_cpu_ids Valid CPU ids are [0, nr_cpu_ids-1] inclusive. Fixes: 3b8e29a82dd1 ("genirq: Implement ipi_send_mask/single()") Fixes: f9bce791ae2a ("genirq: Add a new function to get IPI reverse mapping") Signed-off-by: Alexey Dobriyan Signed-off-by: Thomas Gleixner Cc: stable@vger.kernel.org Link: http://lkml.kernel.org/r/20170819095751.GB27864@avx2 --- kernel/irq/ipi.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/kernel/irq/ipi.c b/kernel/irq/ipi.c index 1a9abc1c8ea0..259a22aa9934 100644 --- a/kernel/irq/ipi.c +++ b/kernel/irq/ipi.c @@ -165,7 +165,7 @@ irq_hw_number_t ipi_get_hwirq(unsigned int irq, unsigned int cpu) struct irq_data *data = irq_get_irq_data(irq); struct cpumask *ipimask = data ? irq_data_get_affinity_mask(data) : NULL; - if (!data || !ipimask || cpu > nr_cpu_ids) + if (!data || !ipimask || cpu >= nr_cpu_ids) return INVALID_HWIRQ; if (!cpumask_test_cpu(cpu, ipimask)) @@ -195,7 +195,7 @@ static int ipi_send_verify(struct irq_chip *chip, struct irq_data *data, if (!chip->ipi_send_single && !chip->ipi_send_mask) return -EINVAL; - if (cpu > nr_cpu_ids) + if (cpu >= nr_cpu_ids) return -EINVAL; if (dest) { -- cgit v1.2.3 From 197e7e521384a23b9e585178f3f11c9fa08274b9 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 20 Aug 2017 13:26:27 -0700 Subject: Sanitize 'move_pages()' permission checks The 'move_paghes()' system call was introduced long long ago with the same permission checks as for sending a signal (except using CAP_SYS_NICE instead of CAP_SYS_KILL for the overriding capability). That turns out to not be a great choice - while the system call really only moves physical page allocations around (and you need other capabilities to do a lot of it), you can check the return value to map out some the virtual address choices and defeat ASLR of a binary that still shares your uid. So change the access checks to the more common 'ptrace_may_access()' model instead. This tightens the access checks for the uid, and also effectively changes the CAP_SYS_NICE check to CAP_SYS_PTRACE, but it's unlikely that anybody really _uses_ this legacy system call any more (we hav ebetter NUMA placement models these days), so I expect nobody to notice. Famous last words. Reported-by: Otto Ebeling Acked-by: Eric W. Biederman Cc: Willy Tarreau Cc: stable@kernel.org Signed-off-by: Linus Torvalds --- mm/migrate.c | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) diff --git a/mm/migrate.c b/mm/migrate.c index d68a41da6abb..e84eeb4e4356 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -41,6 +41,7 @@ #include #include #include +#include #include @@ -1652,7 +1653,6 @@ SYSCALL_DEFINE6(move_pages, pid_t, pid, unsigned long, nr_pages, const int __user *, nodes, int __user *, status, int, flags) { - const struct cred *cred = current_cred(), *tcred; struct task_struct *task; struct mm_struct *mm; int err; @@ -1676,14 +1676,9 @@ SYSCALL_DEFINE6(move_pages, pid_t, pid, unsigned long, nr_pages, /* * Check if this process has the right to modify the specified - * process. The right exists if the process has administrative - * capabilities, superuser privileges or the same - * userid as the target process. + * process. Use the regular "ptrace_may_access()" checks. */ - tcred = __task_cred(task); - if (!uid_eq(cred->euid, tcred->suid) && !uid_eq(cred->euid, tcred->uid) && - !uid_eq(cred->uid, tcred->suid) && !uid_eq(cred->uid, tcred->uid) && - !capable(CAP_SYS_NICE)) { + if (!ptrace_may_access(task, PTRACE_MODE_READ_REALCREDS)) { rcu_read_unlock(); err = -EPERM; goto out; -- cgit v1.2.3 From 14ccee78fc82f5512908f4424f541549a5705b89 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 20 Aug 2017 14:13:52 -0700 Subject: Linux 4.13-rc6 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 90da7bdc3f45..235826f95741 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ VERSION = 4 PATCHLEVEL = 13 SUBLEVEL = 0 -EXTRAVERSION = -rc5 +EXTRAVERSION = -rc6 NAME = Fearless Coyote # *DOCUMENTATION* -- cgit v1.2.3 From bd0fdb191c8523a9126bb14ac1b22cb47698ebf5 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Mon, 13 Mar 2017 03:03:49 +1000 Subject: KVM: PPC: Book3S HV: Use msgsync with hypervisor doorbells on POWER9 When msgsnd is used for IPIs to other cores, msgsync must be executed by the target to order stores performed on the source before its msgsnd (provided the source executes the appropriate sync). Fixes: 1704a81ccebc ("KVM: PPC: Book3S HV: Use msgsnd for IPIs to other cores on POWER9") Signed-off-by: Nicholas Piggin Signed-off-by: Paul Mackerras --- arch/powerpc/kvm/book3s_hv_rmhandlers.S | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S index c52184a8efdf..9c9c983b864f 100644 --- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S +++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S @@ -1291,6 +1291,9 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR) /* Hypervisor doorbell - exit only if host IPI flag set */ cmpwi r12, BOOK3S_INTERRUPT_H_DOORBELL bne 3f +BEGIN_FTR_SECTION + PPC_MSGSYNC +END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300) lbz r0, HSTATE_HOST_IPI(r13) cmpwi r0, 0 beq 4f -- cgit v1.2.3 From 2c4fb78f78b6e420604ee1b05bdfb5c1d637869f Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Fri, 18 Aug 2017 12:10:52 +1000 Subject: KVM: PPC: Book3S HV: Workaround POWER9 DD1.0 bug causing IPB bit loss This adds a workaround for a bug in POWER9 DD1 chips where changing the CPPR (Current Processor Priority Register) can cause bits in the IPB (Interrupt Pending Buffer) to get lost. Thankfully it only happens when manually manipulating CPPR which is quite rare. When it does happen it can cause interrupts to be delayed or lost. Signed-off-by: Benjamin Herrenschmidt Signed-off-by: Paul Mackerras --- arch/powerpc/kvm/book3s_xive_template.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/kvm/book3s_xive_template.c b/arch/powerpc/kvm/book3s_xive_template.c index 4636ca6e7d38..150be86b1018 100644 --- a/arch/powerpc/kvm/book3s_xive_template.c +++ b/arch/powerpc/kvm/book3s_xive_template.c @@ -16,7 +16,16 @@ static void GLUE(X_PFX,ack_pending)(struct kvmppc_xive_vcpu *xc) u8 cppr; u16 ack; - /* XXX DD1 bug workaround: Check PIPR vs. CPPR first ! */ + /* + * DD1 bug workaround: If PIPR is less favored than CPPR + * ignore the interrupt or we might incorrectly lose an IPB + * bit. + */ + if (cpu_has_feature(CPU_FTR_POWER9_DD1)) { + u8 pipr = __x_readb(__x_tima + TM_QW1_OS + TM_PIPR); + if (pipr >= xc->hw_cppr) + return; + } /* Perform the acknowledge OS to register cycle. */ ack = be16_to_cpu(__x_readw(__x_tima + TM_SPC_ACK_OS_REG)); -- cgit v1.2.3 From bb9b52bd51dcb17b965a30167d0812902c1b9927 Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Fri, 18 Aug 2017 12:10:58 +1000 Subject: KVM: PPC: Book3S HV: Add missing barriers to XIVE code and document them This adds missing memory barriers to order updates/tests of the virtual CPPR and MFRR, thus fixing a lost IPI problem. While at it also document all barriers in this file. This fixes a bug causing guest IPIs to occasionally get lost. The symptom then is hangs or stalls in the guest. Signed-off-by: Benjamin Herrenschmidt Tested-by: Guilherme G. Piccoli Signed-off-by: Paul Mackerras --- arch/powerpc/kvm/book3s_xive_template.c | 57 +++++++++++++++++++++++++++++++-- 1 file changed, 55 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/kvm/book3s_xive_template.c b/arch/powerpc/kvm/book3s_xive_template.c index 150be86b1018..d1ed2c41b5d2 100644 --- a/arch/powerpc/kvm/book3s_xive_template.c +++ b/arch/powerpc/kvm/book3s_xive_template.c @@ -16,6 +16,12 @@ static void GLUE(X_PFX,ack_pending)(struct kvmppc_xive_vcpu *xc) u8 cppr; u16 ack; + /* + * Ensure any previous store to CPPR is ordered vs. + * the subsequent loads from PIPR or ACK. + */ + eieio(); + /* * DD1 bug workaround: If PIPR is less favored than CPPR * ignore the interrupt or we might incorrectly lose an IPB @@ -244,6 +250,11 @@ skip_ipi: /* * If we found an interrupt, adjust what the guest CPPR should * be as if we had just fetched that interrupt from HW. + * + * Note: This can only make xc->cppr smaller as the previous + * loop will only exit with hirq != 0 if prio is lower than + * the current xc->cppr. Thus we don't need to re-check xc->mfrr + * for pending IPIs. */ if (hirq) xc->cppr = prio; @@ -389,6 +400,12 @@ X_STATIC int GLUE(X_PFX,h_cppr)(struct kvm_vcpu *vcpu, unsigned long cppr) old_cppr = xc->cppr; xc->cppr = cppr; + /* + * Order the above update of xc->cppr with the subsequent + * read of xc->mfrr inside push_pending_to_hw() + */ + smp_mb(); + /* * We are masking less, we need to look for pending things * to deliver and set VP pending bits accordingly to trigger @@ -429,21 +446,37 @@ X_STATIC int GLUE(X_PFX,h_eoi)(struct kvm_vcpu *vcpu, unsigned long xirr) * used to signal MFRR changes is EOId when fetched from * the queue. */ - if (irq == XICS_IPI || irq == 0) + if (irq == XICS_IPI || irq == 0) { + /* + * This barrier orders the setting of xc->cppr vs. + * subsquent test of xc->mfrr done inside + * scan_interrupts and push_pending_to_hw + */ + smp_mb(); goto bail; + } /* Find interrupt source */ sb = kvmppc_xive_find_source(xive, irq, &src); if (!sb) { pr_devel(" source not found !\n"); rc = H_PARAMETER; + /* Same as above */ + smp_mb(); goto bail; } state = &sb->irq_state[src]; kvmppc_xive_select_irq(state, &hw_num, &xd); state->in_eoi = true; - mb(); + + /* + * This barrier orders both setting of in_eoi above vs, + * subsequent test of guest_priority, and the setting + * of xc->cppr vs. subsquent test of xc->mfrr done inside + * scan_interrupts and push_pending_to_hw + */ + smp_mb(); again: if (state->guest_priority == MASKED) { @@ -470,6 +503,14 @@ again: } + /* + * This barrier orders the above guest_priority check + * and spin_lock/unlock with clearing in_eoi below. + * + * It also has to be a full mb() as it must ensure + * the MMIOs done in source_eoi() are completed before + * state->in_eoi is visible. + */ mb(); state->in_eoi = false; bail: @@ -504,6 +545,18 @@ X_STATIC int GLUE(X_PFX,h_ipi)(struct kvm_vcpu *vcpu, unsigned long server, /* Locklessly write over MFRR */ xc->mfrr = mfrr; + /* + * The load of xc->cppr below and the subsequent MMIO store + * to the IPI must happen after the above mfrr update is + * globally visible so that: + * + * - Synchronize with another CPU doing an H_EOI or a H_CPPR + * updating xc->cppr then reading xc->mfrr. + * + * - The target of the IPI sees the xc->mfrr update + */ + mb(); + /* Shoot the IPI if most favored than target cppr */ if (mfrr < xc->cppr) __x_writeq(0, __x_trig_page(&xc->vp_ipi_data)); -- cgit v1.2.3 From c469268cd523245cc58255f6696e0c295485cb0b Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Thu, 24 Aug 2017 11:59:31 +0200 Subject: KVM: x86: block guest protection keys unless the host has them enabled If the host has protection keys disabled, we cannot read and write the guest PKRU---RDPKRU and WRPKRU fail with #GP(0) if CR4.PKE=0. Block the PKU cpuid bit in that case. This ensures that guest_CR4.PKE=1 implies host_CR4.PKE=1. Fixes: 1be0e61c1f255faaeab04a390e00c8b9b9042870 Cc: stable@vger.kernel.org Reviewed-by: David Hildenbrand Signed-off-by: Paolo Bonzini --- arch/x86/kvm/cpuid.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c index 59ca2eea522c..19adbb418443 100644 --- a/arch/x86/kvm/cpuid.c +++ b/arch/x86/kvm/cpuid.c @@ -469,7 +469,7 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, entry->ecx &= kvm_cpuid_7_0_ecx_x86_features; cpuid_mask(&entry->ecx, CPUID_7_ECX); /* PKU is not yet implemented for shadow paging. */ - if (!tdp_enabled) + if (!tdp_enabled || !boot_cpu_has(X86_FEATURE_OSPKE)) entry->ecx &= ~F(PKU); entry->edx &= kvm_cpuid_7_0_edx_x86_features; entry->edx &= get_scattered_cpuid_leaf(7, 0, CPUID_EDX); -- cgit v1.2.3 From b9dd21e104bcd45e124acfe978a79df71259e59b Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Wed, 23 Aug 2017 23:14:38 +0200 Subject: KVM: x86: simplify handling of PKRU Move it to struct kvm_arch_vcpu, replacing guest_pkru_valid with a simple comparison against the host value of the register. The write of PKRU in addition can be skipped if the guest has not enabled the feature. Once we do this, we need not test OSPKE in the host anymore, because guest_CR4.PKE=1 implies host_CR4.PKE=1. The static PKU test is kept to elide the code on older CPUs. Suggested-by: Yang Zhang Fixes: 1be0e61c1f255faaeab04a390e00c8b9b9042870 Cc: stable@vger.kernel.org Reviewed-by: David Hildenbrand Signed-off-by: Paolo Bonzini --- arch/x86/include/asm/kvm_host.h | 1 + arch/x86/kvm/kvm_cache_regs.h | 5 ----- arch/x86/kvm/mmu.h | 2 +- arch/x86/kvm/svm.c | 7 ------- arch/x86/kvm/vmx.c | 25 ++++++++----------------- 5 files changed, 10 insertions(+), 30 deletions(-) diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 87ac4fba6d8e..f4d120a3e22e 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -492,6 +492,7 @@ struct kvm_vcpu_arch { unsigned long cr4; unsigned long cr4_guest_owned_bits; unsigned long cr8; + u32 pkru; u32 hflags; u64 efer; u64 apic_base; diff --git a/arch/x86/kvm/kvm_cache_regs.h b/arch/x86/kvm/kvm_cache_regs.h index 762cdf2595f9..e1e89ee4af75 100644 --- a/arch/x86/kvm/kvm_cache_regs.h +++ b/arch/x86/kvm/kvm_cache_regs.h @@ -84,11 +84,6 @@ static inline u64 kvm_read_edx_eax(struct kvm_vcpu *vcpu) | ((u64)(kvm_register_read(vcpu, VCPU_REGS_RDX) & -1u) << 32); } -static inline u32 kvm_read_pkru(struct kvm_vcpu *vcpu) -{ - return kvm_x86_ops->get_pkru(vcpu); -} - static inline void enter_guest_mode(struct kvm_vcpu *vcpu) { vcpu->arch.hflags |= HF_GUEST_MASK; diff --git a/arch/x86/kvm/mmu.h b/arch/x86/kvm/mmu.h index d7d248a000dd..4b9a3ae6b725 100644 --- a/arch/x86/kvm/mmu.h +++ b/arch/x86/kvm/mmu.h @@ -185,7 +185,7 @@ static inline u8 permission_fault(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu, * index of the protection domain, so pte_pkey * 2 is * is the index of the first bit for the domain. */ - pkru_bits = (kvm_read_pkru(vcpu) >> (pte_pkey * 2)) & 3; + pkru_bits = (vcpu->arch.pkru >> (pte_pkey * 2)) & 3; /* clear present bit, replace PFEC.RSVD with ACC_USER_MASK. */ offset = (pfec & ~1) + diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 56ba05312759..af256b786a70 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -1777,11 +1777,6 @@ static void svm_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags) to_svm(vcpu)->vmcb->save.rflags = rflags; } -static u32 svm_get_pkru(struct kvm_vcpu *vcpu) -{ - return 0; -} - static void svm_cache_reg(struct kvm_vcpu *vcpu, enum kvm_reg reg) { switch (reg) { @@ -5413,8 +5408,6 @@ static struct kvm_x86_ops svm_x86_ops __ro_after_init = { .get_rflags = svm_get_rflags, .set_rflags = svm_set_rflags, - .get_pkru = svm_get_pkru, - .tlb_flush = svm_flush_tlb, .run = svm_vcpu_run, diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 9b21b1223035..c6ef2940119b 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -636,8 +636,6 @@ struct vcpu_vmx { u64 current_tsc_ratio; - bool guest_pkru_valid; - u32 guest_pkru; u32 host_pkru; /* @@ -2383,11 +2381,6 @@ static void vmx_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags) to_vmx(vcpu)->emulation_required = emulation_required(vcpu); } -static u32 vmx_get_pkru(struct kvm_vcpu *vcpu) -{ - return to_vmx(vcpu)->guest_pkru; -} - static u32 vmx_get_interrupt_shadow(struct kvm_vcpu *vcpu) { u32 interruptibility = vmcs_read32(GUEST_INTERRUPTIBILITY_INFO); @@ -9020,8 +9013,10 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu) if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP) vmx_set_interrupt_shadow(vcpu, 0); - if (vmx->guest_pkru_valid) - __write_pkru(vmx->guest_pkru); + if (static_cpu_has(X86_FEATURE_PKU) && + kvm_read_cr4_bits(vcpu, X86_CR4_PKE) && + vcpu->arch.pkru != vmx->host_pkru) + __write_pkru(vcpu->arch.pkru); atomic_switch_perf_msrs(vmx); debugctlmsr = get_debugctlmsr(); @@ -9169,13 +9164,11 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu) * back on host, so it is safe to read guest PKRU from current * XSAVE. */ - if (boot_cpu_has(X86_FEATURE_OSPKE)) { - vmx->guest_pkru = __read_pkru(); - if (vmx->guest_pkru != vmx->host_pkru) { - vmx->guest_pkru_valid = true; + if (static_cpu_has(X86_FEATURE_PKU) && + kvm_read_cr4_bits(vcpu, X86_CR4_PKE)) { + vcpu->arch.pkru = __read_pkru(); + if (vcpu->arch.pkru != vmx->host_pkru) __write_pkru(vmx->host_pkru); - } else - vmx->guest_pkru_valid = false; } /* @@ -11682,8 +11675,6 @@ static struct kvm_x86_ops vmx_x86_ops __ro_after_init = { .get_rflags = vmx_get_rflags, .set_rflags = vmx_set_rflags, - .get_pkru = vmx_get_pkru, - .tlb_flush = vmx_flush_tlb, .run = vmx_vcpu_run, -- cgit v1.2.3 From 38cfd5e3df9c4f88e76b547eee2087ee5c042ae2 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Wed, 23 Aug 2017 23:16:29 +0200 Subject: KVM, pkeys: do not use PKRU value in vcpu->arch.guest_fpu.state The host pkru is restored right after vcpu exit (commit 1be0e61), so KVM_GET_XSAVE will return the host PKRU value instead. Fix this by using the guest PKRU explicitly in fill_xsave and load_xsave. This part is based on a patch by Junkang Fu. The host PKRU data may also not match the value in vcpu->arch.guest_fpu.state, because it could have been changed by userspace since the last time it was saved, so skip loading it in kvm_load_guest_fpu. Reported-by: Junkang Fu Cc: Yang Zhang Fixes: 1be0e61c1f255faaeab04a390e00c8b9b9042870 Cc: stable@vger.kernel.org Signed-off-by: Paolo Bonzini --- arch/x86/include/asm/fpu/internal.h | 6 +++--- arch/x86/kvm/x86.c | 17 ++++++++++++++--- 2 files changed, 17 insertions(+), 6 deletions(-) diff --git a/arch/x86/include/asm/fpu/internal.h b/arch/x86/include/asm/fpu/internal.h index 255645f60ca2..554cdb205d17 100644 --- a/arch/x86/include/asm/fpu/internal.h +++ b/arch/x86/include/asm/fpu/internal.h @@ -450,10 +450,10 @@ static inline int copy_fpregs_to_fpstate(struct fpu *fpu) return 0; } -static inline void __copy_kernel_to_fpregs(union fpregs_state *fpstate) +static inline void __copy_kernel_to_fpregs(union fpregs_state *fpstate, u64 mask) { if (use_xsave()) { - copy_kernel_to_xregs(&fpstate->xsave, -1); + copy_kernel_to_xregs(&fpstate->xsave, mask); } else { if (use_fxsr()) copy_kernel_to_fxregs(&fpstate->fxsave); @@ -477,7 +477,7 @@ static inline void copy_kernel_to_fpregs(union fpregs_state *fpstate) : : [addr] "m" (fpstate)); } - __copy_kernel_to_fpregs(fpstate); + __copy_kernel_to_fpregs(fpstate, -1); } extern int copy_fpstate_to_sigframe(void __user *buf, void __user *fp, int size); diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index d734aa8c5b4f..05a5e57c6f39 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -3245,7 +3245,12 @@ static void fill_xsave(u8 *dest, struct kvm_vcpu *vcpu) u32 size, offset, ecx, edx; cpuid_count(XSTATE_CPUID, index, &size, &offset, &ecx, &edx); - memcpy(dest + offset, src, size); + if (feature == XFEATURE_MASK_PKRU) + memcpy(dest + offset, &vcpu->arch.pkru, + sizeof(vcpu->arch.pkru)); + else + memcpy(dest + offset, src, size); + } valid -= feature; @@ -3283,7 +3288,11 @@ static void load_xsave(struct kvm_vcpu *vcpu, u8 *src) u32 size, offset, ecx, edx; cpuid_count(XSTATE_CPUID, index, &size, &offset, &ecx, &edx); - memcpy(dest, src + offset, size); + if (feature == XFEATURE_MASK_PKRU) + memcpy(&vcpu->arch.pkru, src + offset, + sizeof(vcpu->arch.pkru)); + else + memcpy(dest, src + offset, size); } valid -= feature; @@ -7633,7 +7642,9 @@ void kvm_load_guest_fpu(struct kvm_vcpu *vcpu) */ vcpu->guest_fpu_loaded = 1; __kernel_fpu_begin(); - __copy_kernel_to_fpregs(&vcpu->arch.guest_fpu.state); + /* PKRU is separately restored in kvm_x86_ops->run. */ + __copy_kernel_to_fpregs(&vcpu->arch.guest_fpu.state, + ~XFEATURE_MASK_PKRU); trace_kvm_fpu(1); } -- cgit v1.2.3 From 47c5310a8dbe7c2cb9f0083daa43ceed76c257fa Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Thu, 24 Aug 2017 19:14:47 +1000 Subject: KVM: PPC: Book3S: Fix race and leak in kvm_vm_ioctl_create_spapr_tce() Nixiaoming pointed out that there is a memory leak in kvm_vm_ioctl_create_spapr_tce() if the call to anon_inode_getfd() fails; the memory allocated for the kvmppc_spapr_tce_table struct is not freed, and nor are the pages allocated for the iommu tables. In addition, we have already incremented the process's count of locked memory pages, and this doesn't get restored on error. David Hildenbrand pointed out that there is a race in that the function checks early on that there is not already an entry in the stt->iommu_tables list with the same LIOBN, but an entry with the same LIOBN could get added between then and when the new entry is added to the list. This fixes all three problems. To simplify things, we now call anon_inode_getfd() before placing the new entry in the list. The check for an existing entry is done while holding the kvm->lock mutex, immediately before adding the new entry to the list. Finally, on failure we now call kvmppc_account_memlimit to decrement the process's count of locked memory pages. Reported-by: Nixiaoming Reported-by: David Hildenbrand Signed-off-by: Paul Mackerras Signed-off-by: Paolo Bonzini --- arch/powerpc/kvm/book3s_64_vio.c | 56 ++++++++++++++++++++++++---------------- 1 file changed, 34 insertions(+), 22 deletions(-) diff --git a/arch/powerpc/kvm/book3s_64_vio.c b/arch/powerpc/kvm/book3s_64_vio.c index a160c14304eb..53766e2bc029 100644 --- a/arch/powerpc/kvm/book3s_64_vio.c +++ b/arch/powerpc/kvm/book3s_64_vio.c @@ -294,32 +294,26 @@ long kvm_vm_ioctl_create_spapr_tce(struct kvm *kvm, struct kvm_create_spapr_tce_64 *args) { struct kvmppc_spapr_tce_table *stt = NULL; + struct kvmppc_spapr_tce_table *siter; unsigned long npages, size; int ret = -ENOMEM; int i; + int fd = -1; if (!args->size) return -EINVAL; - /* Check this LIOBN hasn't been previously allocated */ - list_for_each_entry(stt, &kvm->arch.spapr_tce_tables, list) { - if (stt->liobn == args->liobn) - return -EBUSY; - } - size = _ALIGN_UP(args->size, PAGE_SIZE >> 3); npages = kvmppc_tce_pages(size); ret = kvmppc_account_memlimit(kvmppc_stt_pages(npages), true); - if (ret) { - stt = NULL; - goto fail; - } + if (ret) + return ret; ret = -ENOMEM; stt = kzalloc(sizeof(*stt) + npages * sizeof(struct page *), GFP_KERNEL); if (!stt) - goto fail; + goto fail_acct; stt->liobn = args->liobn; stt->page_shift = args->page_shift; @@ -334,24 +328,42 @@ long kvm_vm_ioctl_create_spapr_tce(struct kvm *kvm, goto fail; } - kvm_get_kvm(kvm); + ret = fd = anon_inode_getfd("kvm-spapr-tce", &kvm_spapr_tce_fops, + stt, O_RDWR | O_CLOEXEC); + if (ret < 0) + goto fail; mutex_lock(&kvm->lock); - list_add_rcu(&stt->list, &kvm->arch.spapr_tce_tables); + + /* Check this LIOBN hasn't been previously allocated */ + ret = 0; + list_for_each_entry(siter, &kvm->arch.spapr_tce_tables, list) { + if (siter->liobn == args->liobn) { + ret = -EBUSY; + break; + } + } + + if (!ret) { + list_add_rcu(&stt->list, &kvm->arch.spapr_tce_tables); + kvm_get_kvm(kvm); + } mutex_unlock(&kvm->lock); - return anon_inode_getfd("kvm-spapr-tce", &kvm_spapr_tce_fops, - stt, O_RDWR | O_CLOEXEC); + if (!ret) + return fd; -fail: - if (stt) { - for (i = 0; i < npages; i++) - if (stt->pages[i]) - __free_page(stt->pages[i]); + put_unused_fd(fd); - kfree(stt); - } + fail: + for (i = 0; i < npages; i++) + if (stt->pages[i]) + __free_page(stt->pages[i]); + + kfree(stt); + fail_acct: + kvmppc_account_memlimit(kvmppc_stt_pages(npages), false); return ret; } -- cgit v1.2.3 From edd03602d97236e8fea13cd76886c576186aa307 Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Mon, 28 Aug 2017 14:31:24 +1000 Subject: KVM: PPC: Book3S HV: Protect updates to spapr_tce_tables list Al Viro pointed out that while one thread of a process is executing in kvm_vm_ioctl_create_spapr_tce(), another thread could guess the file descriptor returned by anon_inode_getfd() and close() it before the first thread has added it to the kvm->arch.spapr_tce_tables list. That highlights a more general problem: there is no mutual exclusion between writers to the spapr_tce_tables list, leading to the possibility of the list becoming corrupted, which could cause a host kernel crash. To fix the mutual exclusion problem, we add a mutex_lock/unlock pair around the list_del_rce in kvm_spapr_tce_release(). Also, this moves the call to anon_inode_getfd() inside the region protected by the kvm->lock mutex, after we have done the check for a duplicate LIOBN. This means that if another thread does guess the file descriptor and closes it, its call to kvm_spapr_tce_release() will not do any harm because it will have to wait until the first thread has released kvm->lock. With this, there are no failure points in kvm_vm_ioctl_create_spapr_tce() after the call to anon_inode_getfd(). The other things that the second thread could do with the guessed file descriptor are to mmap it or to pass it as a parameter to a KVM_DEV_VFIO_GROUP_SET_SPAPR_TCE ioctl on a KVM device fd. An mmap call won't cause any harm because kvm_spapr_tce_mmap() and kvm_spapr_tce_fault() don't access the spapr_tce_tables list or the kvmppc_spapr_tce_table.list field, and the fields that they do use have been properly initialized by the time of the anon_inode_getfd() call. The KVM_DEV_VFIO_GROUP_SET_SPAPR_TCE ioctl calls kvm_spapr_tce_attach_iommu_group(), which scans the spapr_tce_tables list looking for the kvmppc_spapr_tce_table struct corresponding to the fd given as the parameter. Either it will find the new entry or it won't; if it doesn't, it just returns an error, and if it does, it will function normally. So, in each case there is no harmful effect. Reviewed-by: David Gibson Signed-off-by: Paul Mackerras --- arch/powerpc/kvm/book3s_64_vio.c | 21 ++++++++++----------- 1 file changed, 10 insertions(+), 11 deletions(-) diff --git a/arch/powerpc/kvm/book3s_64_vio.c b/arch/powerpc/kvm/book3s_64_vio.c index 53766e2bc029..8f2da8bba737 100644 --- a/arch/powerpc/kvm/book3s_64_vio.c +++ b/arch/powerpc/kvm/book3s_64_vio.c @@ -265,8 +265,11 @@ static int kvm_spapr_tce_release(struct inode *inode, struct file *filp) { struct kvmppc_spapr_tce_table *stt = filp->private_data; struct kvmppc_spapr_tce_iommu_table *stit, *tmp; + struct kvm *kvm = stt->kvm; + mutex_lock(&kvm->lock); list_del_rcu(&stt->list); + mutex_unlock(&kvm->lock); list_for_each_entry_safe(stit, tmp, &stt->iommu_tables, next) { WARN_ON(!kref_read(&stit->kref)); @@ -298,7 +301,6 @@ long kvm_vm_ioctl_create_spapr_tce(struct kvm *kvm, unsigned long npages, size; int ret = -ENOMEM; int i; - int fd = -1; if (!args->size) return -EINVAL; @@ -328,11 +330,6 @@ long kvm_vm_ioctl_create_spapr_tce(struct kvm *kvm, goto fail; } - ret = fd = anon_inode_getfd("kvm-spapr-tce", &kvm_spapr_tce_fops, - stt, O_RDWR | O_CLOEXEC); - if (ret < 0) - goto fail; - mutex_lock(&kvm->lock); /* Check this LIOBN hasn't been previously allocated */ @@ -344,17 +341,19 @@ long kvm_vm_ioctl_create_spapr_tce(struct kvm *kvm, } } - if (!ret) { + if (!ret) + ret = anon_inode_getfd("kvm-spapr-tce", &kvm_spapr_tce_fops, + stt, O_RDWR | O_CLOEXEC); + + if (ret >= 0) { list_add_rcu(&stt->list, &kvm->arch.spapr_tce_tables); kvm_get_kvm(kvm); } mutex_unlock(&kvm->lock); - if (!ret) - return fd; - - put_unused_fd(fd); + if (ret >= 0) + return ret; fail: for (i = 0; i < npages; i++) -- cgit v1.2.3