From 0764e365dacd0b8f75c1736f9236be280649bd18 Mon Sep 17 00:00:00 2001 From: Clément Bœsch Date: Sun, 5 Sep 2021 02:20:27 +0200 Subject: arm64: dts: allwinner: h5: NanoPI Neo 2: Fix ethernet node MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit RX and TX delay are provided by ethernet PHY. Reflect that in ethernet node. Fixes: 44a94c7ef989 ("arm64: dts: allwinner: H5: Restore EMAC changes") Signed-off-by: Clément Bœsch Reviewed-by: Jernej Skrabec Reviewed-by: Andrew Lunn Signed-off-by: Maxime Ripard Link: https://lore.kernel.org/r/20210905002027.171984-1-u@pkh.me --- arch/arm64/boot/dts/allwinner/sun50i-h5-nanopi-neo2.dts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/boot/dts/allwinner/sun50i-h5-nanopi-neo2.dts b/arch/arm64/boot/dts/allwinner/sun50i-h5-nanopi-neo2.dts index 02f8e72f0cad..05486cccee1c 100644 --- a/arch/arm64/boot/dts/allwinner/sun50i-h5-nanopi-neo2.dts +++ b/arch/arm64/boot/dts/allwinner/sun50i-h5-nanopi-neo2.dts @@ -75,7 +75,7 @@ pinctrl-0 = <&emac_rgmii_pins>; phy-supply = <®_gmac_3v3>; phy-handle = <&ext_rgmii_phy>; - phy-mode = "rgmii"; + phy-mode = "rgmii-id"; status = "okay"; }; -- cgit v1.2.3 From 55dd7e059098ce4bd0a55c251cb78e74604abb57 Mon Sep 17 00:00:00 2001 From: Bastien Roucariès Date: Thu, 16 Sep 2021 08:17:21 +0000 Subject: ARM: dts: sun7i: A20-olinuxino-lime2: Fix ethernet phy-mode MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Commit bbc4d71d6354 ("net: phy: realtek: fix rtl8211e rx/tx delay config") sets the RX/TX delay according to the phy-mode property in the device tree. For the A20-olinuxino-lime2 board this is "rgmii", which is the wrong setting. Following the example of a900cac3750b ("ARM: dts: sun7i: a20: bananapro: Fix ethernet phy-mode") the phy-mode is changed to "rgmii-id" which gets the Ethernet working again on this board. Signed-off-by: Bastien Roucariès Signed-off-by: Maxime Ripard Link: https://lore.kernel.org/r/20210916081721.237137-1-rouca@debian.org --- arch/arm/boot/dts/sun7i-a20-olinuxino-lime2.dts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/boot/dts/sun7i-a20-olinuxino-lime2.dts b/arch/arm/boot/dts/sun7i-a20-olinuxino-lime2.dts index 8077f1716fbc..ecb91fb899ff 100644 --- a/arch/arm/boot/dts/sun7i-a20-olinuxino-lime2.dts +++ b/arch/arm/boot/dts/sun7i-a20-olinuxino-lime2.dts @@ -112,7 +112,7 @@ pinctrl-names = "default"; pinctrl-0 = <&gmac_rgmii_pins>; phy-handle = <&phy1>; - phy-mode = "rgmii"; + phy-mode = "rgmii-id"; status = "okay"; }; -- cgit v1.2.3 From 636707e593120c9fa35f6a908c0d052f6154910d Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Thu, 30 Sep 2021 13:11:20 +0200 Subject: mac80211: mesh: fix HE operation element length check The length check here was bad, if the length doesn't at least include the length of the fixed part, we cannot call ieee80211_he_oper_size() to determine the total. Fix this, and convert to cfg80211_find_ext_elem() while at it. Cc: stable@vger.kernel.org Fixes: 70debba3ab7d ("mac80211: save HE oper info in BSS config for mesh") Link: https://lore.kernel.org/r/20210930131120.b0f940976c56.I954e1be55e9f87cc303165bff5c906afe1e54648@changeid Signed-off-by: Johannes Berg --- net/mac80211/mesh.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/net/mac80211/mesh.c b/net/mac80211/mesh.c index 97095b7c9c64..5dcfd53a4ab6 100644 --- a/net/mac80211/mesh.c +++ b/net/mac80211/mesh.c @@ -672,7 +672,7 @@ ieee80211_mesh_update_bss_params(struct ieee80211_sub_if_data *sdata, u8 *ie, u8 ie_len) { struct ieee80211_supported_band *sband; - const u8 *cap; + const struct element *cap; const struct ieee80211_he_operation *he_oper = NULL; sband = ieee80211_get_sband(sdata); @@ -687,9 +687,10 @@ ieee80211_mesh_update_bss_params(struct ieee80211_sub_if_data *sdata, sdata->vif.bss_conf.he_support = true; - cap = cfg80211_find_ext_ie(WLAN_EID_EXT_HE_OPERATION, ie, ie_len); - if (cap && cap[1] >= ieee80211_he_oper_size(&cap[3])) - he_oper = (void *)(cap + 3); + cap = cfg80211_find_ext_elem(WLAN_EID_EXT_HE_OPERATION, ie, ie_len); + if (cap && cap->datalen >= 1 + sizeof(*he_oper) && + cap->datalen >= 1 + ieee80211_he_oper_size(cap->data + 1)) + he_oper = (void *)(cap->data + 1); if (he_oper) sdata->vif.bss_conf.he_oper.params = -- cgit v1.2.3 From a2083eeb119fb9307258baea9b7c243ca9a2e0b6 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Thu, 30 Sep 2021 13:11:21 +0200 Subject: cfg80211: scan: fix RCU in cfg80211_add_nontrans_list() The SSID pointer is pointing to RCU protected data, so we need to have it under rcu_read_lock() for the entire use. Fix this. Cc: stable@vger.kernel.org Fixes: 0b8fb8235be8 ("cfg80211: Parsing of Multiple BSSID information in scanning") Link: https://lore.kernel.org/r/20210930131120.6ddfc603aa1d.I2137344c4e2426525b1a8e4ce5fca82f8ecbfe7e@changeid Signed-off-by: Johannes Berg --- net/wireless/scan.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/net/wireless/scan.c b/net/wireless/scan.c index 11c68b159324..adc0d14cfd86 100644 --- a/net/wireless/scan.c +++ b/net/wireless/scan.c @@ -418,14 +418,17 @@ cfg80211_add_nontrans_list(struct cfg80211_bss *trans_bss, } ssid_len = ssid[1]; ssid = ssid + 2; - rcu_read_unlock(); /* check if nontrans_bss is in the list */ list_for_each_entry(bss, &trans_bss->nontrans_list, nontrans_list) { - if (is_bss(bss, nontrans_bss->bssid, ssid, ssid_len)) + if (is_bss(bss, nontrans_bss->bssid, ssid, ssid_len)) { + rcu_read_unlock(); return 0; + } } + rcu_read_unlock(); + /* add to the list */ list_add_tail(&nontrans_bss->nontrans_list, &trans_bss->nontrans_list); return 0; -- cgit v1.2.3 From f33eb7f29c16ba78db3221ee02346fd832274cdd Mon Sep 17 00:00:00 2001 From: Jim Quinlan Date: Tue, 14 Sep 2021 15:11:21 -0700 Subject: reset: brcmstb-rescal: fix incorrect polarity of status bit The readl_poll_timeout() should complete when the status bit is a 1, not 0. Fixes: 4cf176e52397 ("reset: Add Broadcom STB RESCAL reset controller") Signed-off-by: Jim Quinlan Signed-off-by: Florian Fainelli Link: https://lore.kernel.org/r/20210914221122.62315-1-f.fainelli@gmail.com Signed-off-by: Philipp Zabel --- drivers/reset/reset-brcmstb-rescal.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/reset/reset-brcmstb-rescal.c b/drivers/reset/reset-brcmstb-rescal.c index b6f074d6a65f..433fa0c40e47 100644 --- a/drivers/reset/reset-brcmstb-rescal.c +++ b/drivers/reset/reset-brcmstb-rescal.c @@ -38,7 +38,7 @@ static int brcm_rescal_reset_set(struct reset_controller_dev *rcdev, } ret = readl_poll_timeout(base + BRCM_RESCAL_STATUS, reg, - !(reg & BRCM_RESCAL_STATUS_BIT), 100, 1000); + (reg & BRCM_RESCAL_STATUS_BIT), 100, 1000); if (ret) { dev_err(data->dev, "time out on SATA/PCIe rescal\n"); return ret; -- cgit v1.2.3 From 4af160707d7197d671a5b0ad9899383b6cb7c067 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Tue, 14 Sep 2021 11:15:59 +0200 Subject: reset: pistachio: Re-enable driver selection After the retirement of MACH_PISTACHIO, the Pistachio Reset Driver is no longer auto-enabled when building a kernel for Pistachio systems. Worse, the driver cannot be enabled by the user at all (unless compile-testing), as the config symbol is invisible. Fix this partially by making the symbol visible again when compiling for MIPS, and dropping the useless default. The user still has to enable the driver manually when building a kernel for Pistachio systems, though. Fixes: 104f942b2832ab13 ("MIPS: Retire MACH_PISTACHIO") Signed-off-by: Geert Uytterhoeven Reviewed-by: Rahul Bedarkar Reviewed-by: Jiaxun Yang Link: https://lore.kernel.org/r/2c399e52540536df9c4006e46ef93fbccdde88db.1631610825.git.geert+renesas@glider.be Signed-off-by: Philipp Zabel --- drivers/reset/Kconfig | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/reset/Kconfig b/drivers/reset/Kconfig index be799a5abf8a..b0056ae5d463 100644 --- a/drivers/reset/Kconfig +++ b/drivers/reset/Kconfig @@ -147,8 +147,8 @@ config RESET_OXNAS bool config RESET_PISTACHIO - bool "Pistachio Reset Driver" if COMPILE_TEST - default MACH_PISTACHIO + bool "Pistachio Reset Driver" + depends on MIPS || COMPILE_TEST help This enables the reset driver for ImgTec Pistachio SoCs. -- cgit v1.2.3 From c045ceb5a145d2a9a4bf33cbc55185ddf99f60ab Mon Sep 17 00:00:00 2001 From: Mikko Perttunen Date: Wed, 15 Sep 2021 11:55:14 +0300 Subject: reset: tegra-bpmp: Handle errors in BPMP response The return value from tegra_bpmp_transfer indicates the success or failure of the IPC transaction with BPMP. If the transaction succeeded, we also need to check the actual command's result code. Add code to do this. Signed-off-by: Mikko Perttunen Link: https://lore.kernel.org/r/20210915085517.1669675-2-mperttunen@nvidia.com Signed-off-by: Philipp Zabel --- drivers/reset/tegra/reset-bpmp.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/drivers/reset/tegra/reset-bpmp.c b/drivers/reset/tegra/reset-bpmp.c index 24d3395964cc..4c5bba52b105 100644 --- a/drivers/reset/tegra/reset-bpmp.c +++ b/drivers/reset/tegra/reset-bpmp.c @@ -20,6 +20,7 @@ static int tegra_bpmp_reset_common(struct reset_controller_dev *rstc, struct tegra_bpmp *bpmp = to_tegra_bpmp(rstc); struct mrq_reset_request request; struct tegra_bpmp_message msg; + int err; memset(&request, 0, sizeof(request)); request.cmd = command; @@ -30,7 +31,13 @@ static int tegra_bpmp_reset_common(struct reset_controller_dev *rstc, msg.tx.data = &request; msg.tx.size = sizeof(request); - return tegra_bpmp_transfer(bpmp, &msg); + err = tegra_bpmp_transfer(bpmp, &msg); + if (err) + return err; + if (msg.rx.ret) + return -EINVAL; + + return 0; } static int tegra_bpmp_reset_module(struct reset_controller_dev *rstc, -- cgit v1.2.3 From 3ad60b4b3570937f3278509fe6797a5093ce53f8 Mon Sep 17 00:00:00 2001 From: Paweł Anikiel Date: Mon, 20 Sep 2021 14:41:41 +0200 Subject: reset: socfpga: add empty driver allowing consumers to probe MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The early reset driver doesn't ever probe, which causes consuming devices to be unable to probe. Add an empty driver to set this device as available, allowing consumers to probe. Signed-off-by: Paweł Anikiel Link: https://lore.kernel.org/r/20210920124141.1166544-4-pan@semihalf.com Signed-off-by: Philipp Zabel --- drivers/reset/reset-socfpga.c | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/drivers/reset/reset-socfpga.c b/drivers/reset/reset-socfpga.c index 2a72f861f798..8c6492e5693c 100644 --- a/drivers/reset/reset-socfpga.c +++ b/drivers/reset/reset-socfpga.c @@ -92,3 +92,29 @@ void __init socfpga_reset_init(void) for_each_matching_node(np, socfpga_early_reset_dt_ids) a10_reset_init(np); } + +/* + * The early driver is problematic, because it doesn't register + * itself as a driver. This causes certain device links to prevent + * consumer devices from probing. The hacky solution is to register + * an empty driver, whose only job is to attach itself to the reset + * manager and call probe. + */ +static const struct of_device_id socfpga_reset_dt_ids[] = { + { .compatible = "altr,rst-mgr", }, + { /* sentinel */ }, +}; + +static int reset_simple_probe(struct platform_device *pdev) +{ + return 0; +} + +static struct platform_driver reset_socfpga_driver = { + .probe = reset_simple_probe, + .driver = { + .name = "socfpga-reset", + .of_match_table = socfpga_reset_dt_ids, + }, +}; +builtin_platform_driver(reset_socfpga_driver); -- cgit v1.2.3 From e93c7d8e8c4cf80c6afe56e71c83c1cd31b4fce1 Mon Sep 17 00:00:00 2001 From: Shiraz Saleem Date: Tue, 5 Oct 2021 13:23:02 -0500 Subject: RDMA/irdma: Process extended CQ entries correctly The valid bit for extended CQE's written by HW is retrieved from the incorrect quad-word. This leads to missed completions for any UD traffic particularly after a wrap-around. Get the valid bit for extended CQE's from the correct quad-word in the descriptor. Fixes: 551c46edc769 ("RDMA/irdma: Add user/kernel shared libraries") Link: https://lore.kernel.org/r/20211005182302.374-1-shiraz.saleem@intel.com Signed-off-by: Shiraz Saleem Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/irdma/uk.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/hw/irdma/uk.c b/drivers/infiniband/hw/irdma/uk.c index 5fb92de1f015..9b544a3b1288 100644 --- a/drivers/infiniband/hw/irdma/uk.c +++ b/drivers/infiniband/hw/irdma/uk.c @@ -1092,12 +1092,12 @@ irdma_uk_cq_poll_cmpl(struct irdma_cq_uk *cq, struct irdma_cq_poll_info *info) if (cq->avoid_mem_cflct) { ext_cqe = (__le64 *)((u8 *)cqe + 32); get_64bit_val(ext_cqe, 24, &qword7); - polarity = (u8)FIELD_GET(IRDMA_CQ_VALID, qword3); + polarity = (u8)FIELD_GET(IRDMA_CQ_VALID, qword7); } else { peek_head = (cq->cq_ring.head + 1) % cq->cq_ring.size; ext_cqe = cq->cq_base[peek_head].buf; get_64bit_val(ext_cqe, 24, &qword7); - polarity = (u8)FIELD_GET(IRDMA_CQ_VALID, qword3); + polarity = (u8)FIELD_GET(IRDMA_CQ_VALID, qword7); if (!peek_head) polarity ^= 1; } -- cgit v1.2.3 From 1ab52ac1e9bc9391f592c9fa8340a6e3e9c36286 Mon Sep 17 00:00:00 2001 From: Patrisious Haddad Date: Wed, 6 Oct 2021 12:31:53 +0300 Subject: RDMA/mlx5: Set user priority for DCT Currently, the driver doesn't set the PCP-based priority for DCT, hence DCT response packets are transmitted without user priority. Fix it by setting user provided priority in the eth_prio field in the DCT context, which in turn sets the value in the transmitted packet. Fixes: 776a3906b692 ("IB/mlx5: Add support for DC target QP") Link: https://lore.kernel.org/r/5fd2d94a13f5742d8803c218927322257d53205c.1633512672.git.leonro@nvidia.com Signed-off-by: Patrisious Haddad Reviewed-by: Maor Gottlieb Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/mlx5/qp.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c index b2fca110346c..e5abbcfc1d57 100644 --- a/drivers/infiniband/hw/mlx5/qp.c +++ b/drivers/infiniband/hw/mlx5/qp.c @@ -4458,6 +4458,8 @@ static int mlx5_ib_modify_dct(struct ib_qp *ibqp, struct ib_qp_attr *attr, MLX5_SET(dctc, dctc, mtu, attr->path_mtu); MLX5_SET(dctc, dctc, my_addr_index, attr->ah_attr.grh.sgid_index); MLX5_SET(dctc, dctc, hop_limit, attr->ah_attr.grh.hop_limit); + if (attr->ah_attr.type == RDMA_AH_ATTR_TYPE_ROCE) + MLX5_SET(dctc, dctc, eth_prio, attr->ah_attr.sl & 0x7); err = mlx5_core_create_dct(dev, &qp->dct.mdct, qp->dct.in, MLX5_ST_SZ_BYTES(create_dct_in), out, -- cgit v1.2.3 From 136f282028dae7d9dd68469b197aa2b36b410992 Mon Sep 17 00:00:00 2001 From: Miguel Bernal Marin Date: Wed, 6 Oct 2021 00:13:18 -0500 Subject: ACPI: tools: fix compilation error When ACPI tools are compiled, the following error is showed: $ cd tools/power/acpi $ make DESCEND tools/acpidbg MKDIR include CP include CC tools/acpidbg/acpidbg.o In file included from /home/linux/tools/power/acpi/include/acpi/platform/acenv.h:152, from /home/linux/tools/power/acpi/include/acpi/acpi.h:22, from acpidbg.c:9: /home/linux/tools/power/acpi/include/acpi/platform/acgcc.h:25:10: fatal error: linux/stdarg.h: No such file or directory 29 | #include | ^~~~~~~~~~~~~~~~ compilation terminated. Use the ACPICA logic: just identify when it is used inside the kernel or by an ACPI tool. Fixes: c0891ac15f04 ("isystem: ship and use stdarg.h") Signed-off-by: Miguel Bernal Marin [ rjw: Changelog edits ] Signed-off-by: Rafael J. Wysocki --- include/acpi/platform/acgcc.h | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/include/acpi/platform/acgcc.h b/include/acpi/platform/acgcc.h index fb172a03a753..20ecb004f5a4 100644 --- a/include/acpi/platform/acgcc.h +++ b/include/acpi/platform/acgcc.h @@ -22,9 +22,14 @@ typedef __builtin_va_list va_list; #define va_arg(v, l) __builtin_va_arg(v, l) #define va_copy(d, s) __builtin_va_copy(d, s) #else +#ifdef __KERNEL__ #include -#endif -#endif +#else +/* Used to build acpi tools */ +#include +#endif /* __KERNEL__ */ +#endif /* ACPI_USE_BUILTIN_STDARG */ +#endif /* ! va_arg */ #define ACPI_INLINE __inline__ -- cgit v1.2.3 From 55e6d8037805b3400096d621091dfbf713f97e83 Mon Sep 17 00:00:00 2001 From: Yang Yingliang Date: Tue, 12 Oct 2021 10:37:35 +0800 Subject: regmap: Fix possible double-free in regcache_rbtree_exit() In regcache_rbtree_insert_to_block(), when 'present' realloc failed, the 'blk' which is supposed to assign to 'rbnode->block' will be freed, so 'rbnode->block' points a freed memory, in the error handling path of regcache_rbtree_init(), 'rbnode->block' will be freed again in regcache_rbtree_exit(), KASAN will report double-free as follows: BUG: KASAN: double-free or invalid-free in kfree+0xce/0x390 Call Trace: slab_free_freelist_hook+0x10d/0x240 kfree+0xce/0x390 regcache_rbtree_exit+0x15d/0x1a0 regcache_rbtree_init+0x224/0x2c0 regcache_init+0x88d/0x1310 __regmap_init+0x3151/0x4a80 __devm_regmap_init+0x7d/0x100 madera_spi_probe+0x10f/0x333 [madera_spi] spi_probe+0x183/0x210 really_probe+0x285/0xc30 To fix this, moving up the assignment of rbnode->block to immediately after the reallocation has succeeded so that the data structure stays valid even if the second reallocation fails. Reported-by: Hulk Robot Fixes: 3f4ff561bc88b ("regmap: rbtree: Make cache_present bitmap per node") Signed-off-by: Yang Yingliang Link: https://lore.kernel.org/r/20211012023735.1632786-1-yangyingliang@huawei.com Signed-off-by: Mark Brown --- drivers/base/regmap/regcache-rbtree.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/drivers/base/regmap/regcache-rbtree.c b/drivers/base/regmap/regcache-rbtree.c index cfa29dc89bbf..fabf87058d80 100644 --- a/drivers/base/regmap/regcache-rbtree.c +++ b/drivers/base/regmap/regcache-rbtree.c @@ -281,14 +281,14 @@ static int regcache_rbtree_insert_to_block(struct regmap *map, if (!blk) return -ENOMEM; + rbnode->block = blk; + if (BITS_TO_LONGS(blklen) > BITS_TO_LONGS(rbnode->blklen)) { present = krealloc(rbnode->cache_present, BITS_TO_LONGS(blklen) * sizeof(*present), GFP_KERNEL); - if (!present) { - kfree(blk); + if (!present) return -ENOMEM; - } memset(present + BITS_TO_LONGS(rbnode->blklen), 0, (BITS_TO_LONGS(blklen) - BITS_TO_LONGS(rbnode->blklen)) @@ -305,7 +305,6 @@ static int regcache_rbtree_insert_to_block(struct regmap *map, } /* update the rbnode block, its size and the base register */ - rbnode->block = blk; rbnode->blklen = blklen; rbnode->base_reg = base_reg; rbnode->cache_present = present; -- cgit v1.2.3 From 50b6cb3516365cb69753b006be2b61c966b70588 Mon Sep 17 00:00:00 2001 From: Dexuan Cui Date: Thu, 7 Oct 2021 21:35:46 -0700 Subject: scsi: core: Fix shost->cmd_per_lun calculation in scsi_add_host_with_dma() After commit ea2f0f77538c ("scsi: core: Cap scsi_host cmd_per_lun at can_queue"), a 416-CPU VM running on Hyper-V hangs during boot because the hv_storvsc driver sets scsi_driver.can_queue to an integer value that exceeds SHRT_MAX, and hence scsi_add_host_with_dma() sets shost->cmd_per_lun to a negative "short" value. Use min_t(int, ...) to work around the issue. Link: https://lore.kernel.org/r/20211008043546.6006-1-decui@microsoft.com Fixes: ea2f0f77538c ("scsi: core: Cap scsi_host cmd_per_lun at can_queue") Cc: stable@vger.kernel.org Reviewed-by: Haiyang Zhang Reviewed-by: Ming Lei Reviewed-by: John Garry Signed-off-by: Dexuan Cui Signed-off-by: Martin K. Petersen --- drivers/scsi/hosts.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/scsi/hosts.c b/drivers/scsi/hosts.c index 3f6f14f0cafb..24b72ee4246f 100644 --- a/drivers/scsi/hosts.c +++ b/drivers/scsi/hosts.c @@ -220,7 +220,8 @@ int scsi_add_host_with_dma(struct Scsi_Host *shost, struct device *dev, goto fail; } - shost->cmd_per_lun = min_t(short, shost->cmd_per_lun, + /* Use min_t(int, ...) in case shost->can_queue exceeds SHRT_MAX */ + shost->cmd_per_lun = min_t(int, shost->cmd_per_lun, shost->can_queue); error = scsi_init_sense_cache(shost); -- cgit v1.2.3 From 187a580c9e7895978dcd1e627b9c9e7e3d13ca96 Mon Sep 17 00:00:00 2001 From: Mike Christie Date: Sun, 10 Oct 2021 11:19:04 -0500 Subject: scsi: iscsi: Fix set_param() handling In commit 9e67600ed6b8 ("scsi: iscsi: Fix race condition between login and sync thread") we meant to add a check where before we call ->set_param() we make sure the iscsi_cls_connection is bound. The problem is that between versions 4 and 5 of the patch the deletion of the unchecked set_param() call was dropped so we ended up with 2 calls. As a result we can still hit a crash where we access the unbound connection on the first call. This patch removes that first call. Fixes: 9e67600ed6b8 ("scsi: iscsi: Fix race condition between login and sync thread") Link: https://lore.kernel.org/r/20211010161904.60471-1-michael.christie@oracle.com Reviewed-by: Lee Duncan Reviewed-by: Li Feng Signed-off-by: Mike Christie Signed-off-by: Martin K. Petersen --- drivers/scsi/scsi_transport_iscsi.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/scsi/scsi_transport_iscsi.c b/drivers/scsi/scsi_transport_iscsi.c index 922e4c7bd88e..78343d3f9385 100644 --- a/drivers/scsi/scsi_transport_iscsi.c +++ b/drivers/scsi/scsi_transport_iscsi.c @@ -2930,8 +2930,6 @@ iscsi_set_param(struct iscsi_transport *transport, struct iscsi_uevent *ev) session->recovery_tmo = value; break; default: - err = transport->set_param(conn, ev->u.set_param.param, - data, ev->u.set_param.len); if ((conn->state == ISCSI_CONN_BOUND) || (conn->state == ISCSI_CONN_UP)) { err = transport->set_param(conn, ev->u.set_param.param, -- cgit v1.2.3 From 6fd13d699d24beaa28310848fe65fd898fbb9043 Mon Sep 17 00:00:00 2001 From: "Andrea Parri (Microsoft)" Date: Thu, 7 Oct 2021 14:28:28 +0200 Subject: scsi: storvsc: Fix validation for unsolicited incoming packets The validation on the length of incoming packets performed in storvsc_on_channel_callback() does not apply to unsolicited packets with ID of 0 sent by Hyper-V. Adjust the validation for such unsolicited packets. Link: https://lore.kernel.org/r/20211007122828.469289-1-parri.andrea@gmail.com Fixes: 91b1b640b834b2 ("scsi: storvsc: Validate length of incoming packet in storvsc_on_channel_callback()") Reported-by: Dexuan Cui Reviewed-by: Michael Kelley Reviewed-by: Haiyang Zhang Signed-off-by: Andrea Parri (Microsoft) Signed-off-by: Martin K. Petersen --- drivers/scsi/storvsc_drv.c | 32 +++++++++++++++++++++++--------- 1 file changed, 23 insertions(+), 9 deletions(-) diff --git a/drivers/scsi/storvsc_drv.c b/drivers/scsi/storvsc_drv.c index ebbbc1299c62..9eb1b88a29dd 100644 --- a/drivers/scsi/storvsc_drv.c +++ b/drivers/scsi/storvsc_drv.c @@ -1285,11 +1285,15 @@ static void storvsc_on_channel_callback(void *context) foreach_vmbus_pkt(desc, channel) { struct vstor_packet *packet = hv_pkt_data(desc); struct storvsc_cmd_request *request = NULL; + u32 pktlen = hv_pkt_datalen(desc); u64 rqst_id = desc->trans_id; + u32 minlen = rqst_id ? sizeof(struct vstor_packet) - + stor_device->vmscsi_size_delta : sizeof(enum vstor_packet_operation); - if (hv_pkt_datalen(desc) < sizeof(struct vstor_packet) - - stor_device->vmscsi_size_delta) { - dev_err(&device->device, "Invalid packet len\n"); + if (pktlen < minlen) { + dev_err(&device->device, + "Invalid pkt: id=%llu, len=%u, minlen=%u\n", + rqst_id, pktlen, minlen); continue; } @@ -1302,13 +1306,23 @@ static void storvsc_on_channel_callback(void *context) if (rqst_id == 0) { /* * storvsc_on_receive() looks at the vstor_packet in the message - * from the ring buffer. If the operation in the vstor_packet is - * COMPLETE_IO, then we call storvsc_on_io_completion(), and - * dereference the guest memory address. Make sure we don't call - * storvsc_on_io_completion() with a guest memory address that is - * zero if Hyper-V were to construct and send such a bogus packet. + * from the ring buffer. + * + * - If the operation in the vstor_packet is COMPLETE_IO, then + * we call storvsc_on_io_completion(), and dereference the + * guest memory address. Make sure we don't call + * storvsc_on_io_completion() with a guest memory address + * that is zero if Hyper-V were to construct and send such + * a bogus packet. + * + * - If the operation in the vstor_packet is FCHBA_DATA, then + * we call cache_wwn(), and access the data payload area of + * the packet (wwn_packet); however, there is no guarantee + * that the packet is big enough to contain such area. + * Future-proof the code by rejecting such a bogus packet. */ - if (packet->operation == VSTOR_OPERATION_COMPLETE_IO) { + if (packet->operation == VSTOR_OPERATION_COMPLETE_IO || + packet->operation == VSTOR_OPERATION_FCHBA_DATA) { dev_err(&device->device, "Invalid packet with ID of 0\n"); continue; } -- cgit v1.2.3 From f2b85040acec9a928b4eb1b57a989324e8e38d3f Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Fri, 8 Oct 2021 13:01:18 +0800 Subject: scsi: core: Put LLD module refcnt after SCSI device is released SCSI host release is triggered when SCSI device is freed. We have to make sure that the low-level device driver module won't be unloaded before SCSI host instance is released because shost->hostt is required in the release handler. Make sure to put LLD module refcnt after SCSI device is released. Fixes a kernel panic of 'BUG: unable to handle page fault for address' reported by Changhui and Yi. Link: https://lore.kernel.org/r/20211008050118.1440686-1-ming.lei@redhat.com Cc: Greg Kroah-Hartman Reported-by: Changhui Zhong Reported-by: Yi Zhang Tested-by: Yi Zhang Signed-off-by: Ming Lei Signed-off-by: Martin K. Petersen --- drivers/scsi/scsi.c | 4 +++- drivers/scsi/scsi_sysfs.c | 9 +++++++++ 2 files changed, 12 insertions(+), 1 deletion(-) diff --git a/drivers/scsi/scsi.c b/drivers/scsi/scsi.c index b241f9e3885c..291ecc33b1fe 100644 --- a/drivers/scsi/scsi.c +++ b/drivers/scsi/scsi.c @@ -553,8 +553,10 @@ EXPORT_SYMBOL(scsi_device_get); */ void scsi_device_put(struct scsi_device *sdev) { - module_put(sdev->host->hostt->module); + struct module *mod = sdev->host->hostt->module; + put_device(&sdev->sdev_gendev); + module_put(mod); } EXPORT_SYMBOL(scsi_device_put); diff --git a/drivers/scsi/scsi_sysfs.c b/drivers/scsi/scsi_sysfs.c index 86793259e541..a35841b34bfd 100644 --- a/drivers/scsi/scsi_sysfs.c +++ b/drivers/scsi/scsi_sysfs.c @@ -449,9 +449,12 @@ static void scsi_device_dev_release_usercontext(struct work_struct *work) struct scsi_vpd *vpd_pg80 = NULL, *vpd_pg83 = NULL; struct scsi_vpd *vpd_pg0 = NULL, *vpd_pg89 = NULL; unsigned long flags; + struct module *mod; sdev = container_of(work, struct scsi_device, ew.work); + mod = sdev->host->hostt->module; + scsi_dh_release_device(sdev); parent = sdev->sdev_gendev.parent; @@ -502,11 +505,17 @@ static void scsi_device_dev_release_usercontext(struct work_struct *work) if (parent) put_device(parent); + module_put(mod); } static void scsi_device_dev_release(struct device *dev) { struct scsi_device *sdp = to_scsi_device(dev); + + /* Set module pointer as NULL in case of module unloading */ + if (!try_module_get(sdp->host->hostt->module)) + sdp->host->hostt->module = NULL; + execute_in_process_context(scsi_device_dev_release_usercontext, &sdp->ew); } -- cgit v1.2.3 From d39bf40e55e666b5905fdbd46a0dced030ce87be Mon Sep 17 00:00:00 2001 From: Mike Marciniszyn Date: Tue, 12 Oct 2021 13:55:19 -0400 Subject: IB/qib: Protect from buffer overflow in struct qib_user_sdma_pkt fields Overflowing either addrlimit or bytes_togo can allow userspace to trigger a buffer overflow of kernel memory. Check for overflows in all the places doing math on user controlled buffers. Fixes: f931551bafe1 ("IB/qib: Add new qib driver for QLogic PCIe InfiniBand adapters") Link: https://lore.kernel.org/r/20211012175519.7298.77738.stgit@awfm-01.cornelisnetworks.com Reported-by: Ilja Van Sprundel Reviewed-by: Dennis Dalessandro Signed-off-by: Mike Marciniszyn Signed-off-by: Dennis Dalessandro Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/qib/qib_user_sdma.c | 33 +++++++++++++++++++++---------- 1 file changed, 23 insertions(+), 10 deletions(-) diff --git a/drivers/infiniband/hw/qib/qib_user_sdma.c b/drivers/infiniband/hw/qib/qib_user_sdma.c index a67599b5a550..ac11943a5ddb 100644 --- a/drivers/infiniband/hw/qib/qib_user_sdma.c +++ b/drivers/infiniband/hw/qib/qib_user_sdma.c @@ -602,7 +602,7 @@ done: /* * How many pages in this iovec element? */ -static int qib_user_sdma_num_pages(const struct iovec *iov) +static size_t qib_user_sdma_num_pages(const struct iovec *iov) { const unsigned long addr = (unsigned long) iov->iov_base; const unsigned long len = iov->iov_len; @@ -658,7 +658,7 @@ static void qib_user_sdma_free_pkt_frag(struct device *dev, static int qib_user_sdma_pin_pages(const struct qib_devdata *dd, struct qib_user_sdma_queue *pq, struct qib_user_sdma_pkt *pkt, - unsigned long addr, int tlen, int npages) + unsigned long addr, int tlen, size_t npages) { struct page *pages[8]; int i, j; @@ -722,7 +722,7 @@ static int qib_user_sdma_pin_pkt(const struct qib_devdata *dd, unsigned long idx; for (idx = 0; idx < niov; idx++) { - const int npages = qib_user_sdma_num_pages(iov + idx); + const size_t npages = qib_user_sdma_num_pages(iov + idx); const unsigned long addr = (unsigned long) iov[idx].iov_base; ret = qib_user_sdma_pin_pages(dd, pq, pkt, addr, @@ -824,8 +824,8 @@ static int qib_user_sdma_queue_pkts(const struct qib_devdata *dd, unsigned pktnw; unsigned pktnwc; int nfrags = 0; - int npages = 0; - int bytes_togo = 0; + size_t npages = 0; + size_t bytes_togo = 0; int tiddma = 0; int cfur; @@ -885,7 +885,11 @@ static int qib_user_sdma_queue_pkts(const struct qib_devdata *dd, npages += qib_user_sdma_num_pages(&iov[idx]); - bytes_togo += slen; + if (check_add_overflow(bytes_togo, slen, &bytes_togo) || + bytes_togo > type_max(typeof(pkt->bytes_togo))) { + ret = -EINVAL; + goto free_pbc; + } pktnwc += slen >> 2; idx++; nfrags++; @@ -904,8 +908,7 @@ static int qib_user_sdma_queue_pkts(const struct qib_devdata *dd, } if (frag_size) { - int tidsmsize, n; - size_t pktsize; + size_t tidsmsize, n, pktsize, sz, addrlimit; n = npages*((2*PAGE_SIZE/frag_size)+1); pktsize = struct_size(pkt, addr, n); @@ -923,14 +926,24 @@ static int qib_user_sdma_queue_pkts(const struct qib_devdata *dd, else tidsmsize = 0; - pkt = kmalloc(pktsize+tidsmsize, GFP_KERNEL); + if (check_add_overflow(pktsize, tidsmsize, &sz)) { + ret = -EINVAL; + goto free_pbc; + } + pkt = kmalloc(sz, GFP_KERNEL); if (!pkt) { ret = -ENOMEM; goto free_pbc; } pkt->largepkt = 1; pkt->frag_size = frag_size; - pkt->addrlimit = n + ARRAY_SIZE(pkt->addr); + if (check_add_overflow(n, ARRAY_SIZE(pkt->addr), + &addrlimit) || + addrlimit > type_max(typeof(pkt->addrlimit))) { + ret = -EINVAL; + goto free_pbc; + } + pkt->addrlimit = addrlimit; if (tiddma) { char *tidsm = (char *)pkt + pktsize; -- cgit v1.2.3 From 13bac861952a78664907a0f927d3e874e9a59034 Mon Sep 17 00:00:00 2001 From: Mike Marciniszyn Date: Wed, 13 Oct 2021 10:18:52 -0400 Subject: IB/hfi1: Fix abba locking issue with sc_disable() sc_disable() after having disabled the send context wakes up any waiters by calling hfi1_qp_wakeup() while holding the waitlock for the sc. This is contrary to the model for all other calls to hfi1_qp_wakeup() where the waitlock is dropped and a local is used to drive calls to hfi1_qp_wakeup(). Fix by moving the sc->piowait into a local list and driving the wakeup calls from the list. Fixes: 099a884ba4c0 ("IB/hfi1: Handle wakeup of orphaned QPs for pio") Link: https://lore.kernel.org/r/20211013141852.128104.2682.stgit@awfm-01.cornelisnetworks.com Signed-off-by: Mike Marciniszyn Reported-by: TOTE Robot Signed-off-by: Dennis Dalessandro Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/hfi1/pio.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/drivers/infiniband/hw/hfi1/pio.c b/drivers/infiniband/hw/hfi1/pio.c index 489b436f19bb..3d42bd2b36bd 100644 --- a/drivers/infiniband/hw/hfi1/pio.c +++ b/drivers/infiniband/hw/hfi1/pio.c @@ -878,6 +878,7 @@ void sc_disable(struct send_context *sc) { u64 reg; struct pio_buf *pbuf; + LIST_HEAD(wake_list); if (!sc) return; @@ -912,19 +913,21 @@ void sc_disable(struct send_context *sc) spin_unlock(&sc->release_lock); write_seqlock(&sc->waitlock); - while (!list_empty(&sc->piowait)) { + if (!list_empty(&sc->piowait)) + list_move(&sc->piowait, &wake_list); + write_sequnlock(&sc->waitlock); + while (!list_empty(&wake_list)) { struct iowait *wait; struct rvt_qp *qp; struct hfi1_qp_priv *priv; - wait = list_first_entry(&sc->piowait, struct iowait, list); + wait = list_first_entry(&wake_list, struct iowait, list); qp = iowait_to_qp(wait); priv = qp->priv; list_del_init(&priv->s_iowait.list); priv->s_iowait.lock = NULL; hfi1_qp_wakeup(qp, RVT_S_WAIT_PIO | HFI1_S_WAIT_PIO_DRAIN); } - write_sequnlock(&sc->waitlock); spin_unlock_irq(&sc->alloc_lock); } -- cgit v1.2.3 From 663991f32857b3b63c94c97de9dbb0ec8600144f Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Wed, 13 Oct 2021 11:06:45 +0300 Subject: RDMA/rdmavt: Fix error code in rvt_create_qp() Return negative -ENOMEM instead of positive ENOMEM. Returning a postive value will cause an Oops because it becomes an ERR_PTR() in the create_qp() function. Fixes: 514aee660df4 ("RDMA: Globally allocate and release QP memory") Link: https://lore.kernel.org/r/20211013080645.GD6010@kili Signed-off-by: Dan Carpenter Acked-by: Dennis Dalessandro Signed-off-by: Jason Gunthorpe --- drivers/infiniband/sw/rdmavt/qp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/sw/rdmavt/qp.c b/drivers/infiniband/sw/rdmavt/qp.c index 49bdd78ac664..3305f2744bfa 100644 --- a/drivers/infiniband/sw/rdmavt/qp.c +++ b/drivers/infiniband/sw/rdmavt/qp.c @@ -1223,7 +1223,7 @@ int rvt_create_qp(struct ib_qp *ibqp, struct ib_qp_init_attr *init_attr, spin_lock(&rdi->n_qps_lock); if (rdi->n_qps_allocated == rdi->dparms.props.max_qp) { spin_unlock(&rdi->n_qps_lock); - ret = ENOMEM; + ret = -ENOMEM; goto bail_ip; } -- cgit v1.2.3 From 0398adaec3419bdfa93baae70a3b696b4abdd7ad Mon Sep 17 00:00:00 2001 From: Rafał Miłecki Date: Fri, 8 Oct 2021 22:59:36 +0200 Subject: Revert "dt-bindings: pinctrl: bcm4708-pinmux: rework binding to use syscon" MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This reverts commit 2ae80900f239484069569380e1fc4340fd6e0089. My rework was unneeded & wrong. It replaced a clear & correct "reg" property usage with a custom "offset" one. Back then I didn't understand how to properly handle CRU block binding. I heard / read about syscon and tried to use it in a totally invalid way. That change also missed Rob's review (obviously). Northstar's pin controller is a simple consistent hardware block that can be cleanly mapped using a 0x24 long reg space. Since the rework commit there wasn't any follow up modifying in-kernel DTS files to use the new binding. Broadcom also isn't known to use that bugged binding. There is close to zero chance this revert may actually cause problems / regressions. This commit is a simple revert. Example binding may (should) be updated / cleaned up but that can be handled separately. Signed-off-by: Rafał Miłecki Reviewed-by: Rob Herring Link: https://lore.kernel.org/r/20211008205938.29925-1-zajec5@gmail.com Signed-off-by: Linus Walleij --- .../devicetree/bindings/mfd/brcm,cru.yaml | 11 ++++++----- .../bindings/pinctrl/brcm,ns-pinmux.yaml | 23 ++++++++++++---------- 2 files changed, 19 insertions(+), 15 deletions(-) diff --git a/Documentation/devicetree/bindings/mfd/brcm,cru.yaml b/Documentation/devicetree/bindings/mfd/brcm,cru.yaml index fc1317ab3226..28ac60acf4ac 100644 --- a/Documentation/devicetree/bindings/mfd/brcm,cru.yaml +++ b/Documentation/devicetree/bindings/mfd/brcm,cru.yaml @@ -32,13 +32,13 @@ properties: "#size-cells": const: 1 - pinctrl: - $ref: ../pinctrl/brcm,ns-pinmux.yaml - patternProperties: '^clock-controller@[a-f0-9]+$': $ref: ../clock/brcm,iproc-clocks.yaml + '^pin-controller@[a-f0-9]+$': + $ref: ../pinctrl/brcm,ns-pinmux.yaml + '^thermal@[a-f0-9]+$': $ref: ../thermal/brcm,ns-thermal.yaml @@ -73,9 +73,10 @@ examples: "iprocfast", "sata1", "sata2"; }; - pinctrl { + pin-controller@1c0 { compatible = "brcm,bcm4708-pinmux"; - offset = <0x1c0>; + reg = <0x1c0 0x24>; + reg-names = "cru_gpio_control"; }; thermal@2c0 { diff --git a/Documentation/devicetree/bindings/pinctrl/brcm,ns-pinmux.yaml b/Documentation/devicetree/bindings/pinctrl/brcm,ns-pinmux.yaml index 470aff599c27..78600a8fe403 100644 --- a/Documentation/devicetree/bindings/pinctrl/brcm,ns-pinmux.yaml +++ b/Documentation/devicetree/bindings/pinctrl/brcm,ns-pinmux.yaml @@ -17,9 +17,6 @@ description: A list of pins varies across chipsets so few bindings are available. - Node of the pinmux must be nested in the CRU (Central Resource Unit) "syscon" - node. - properties: compatible: enum: @@ -27,10 +24,11 @@ properties: - brcm,bcm4709-pinmux - brcm,bcm53012-pinmux - offset: - description: offset of pin registers in the CRU block + reg: maxItems: 1 - $ref: /schemas/types.yaml#/definitions/uint32-array + + reg-names: + const: cru_gpio_control patternProperties: '-pins$': @@ -72,19 +70,24 @@ allOf: uart1_grp ] required: - - offset + - reg + - reg-names additionalProperties: false examples: - | cru@1800c100 { - compatible = "syscon", "simple-mfd"; + compatible = "simple-bus"; reg = <0x1800c100 0x1a4>; + ranges; + #address-cells = <1>; + #size-cells = <1>; - pinctrl { + pin-controller@1c0 { compatible = "brcm,bcm4708-pinmux"; - offset = <0xc0>; + reg = <0x1c0 0x24>; + reg-names = "cru_gpio_control"; spi-pins { function = "spi"; -- cgit v1.2.3 From 1d0a779892e8aae484e11bf5f28fb41d49e8b0f6 Mon Sep 17 00:00:00 2001 From: Rafał Miłecki Date: Fri, 8 Oct 2021 22:59:37 +0200 Subject: dt-bindings: pinctrl: brcm,ns-pinmux: drop unneeded CRU from example MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit There is no need to include CRU in example of this binding. It wasn't complete / correct anyway. The proper binding can be find in the mfd/brcm,cru.yaml . Signed-off-by: Rafał Miłecki Reviewed-by: Rob Herring Link: https://lore.kernel.org/r/20211008205938.29925-2-zajec5@gmail.com Signed-off-by: Linus Walleij --- .../bindings/pinctrl/brcm,ns-pinmux.yaml | 24 ++++++++-------------- 1 file changed, 8 insertions(+), 16 deletions(-) diff --git a/Documentation/devicetree/bindings/pinctrl/brcm,ns-pinmux.yaml b/Documentation/devicetree/bindings/pinctrl/brcm,ns-pinmux.yaml index 78600a8fe403..fc39e3e9f71c 100644 --- a/Documentation/devicetree/bindings/pinctrl/brcm,ns-pinmux.yaml +++ b/Documentation/devicetree/bindings/pinctrl/brcm,ns-pinmux.yaml @@ -77,21 +77,13 @@ additionalProperties: false examples: - | - cru@1800c100 { - compatible = "simple-bus"; - reg = <0x1800c100 0x1a4>; - ranges; - #address-cells = <1>; - #size-cells = <1>; - - pin-controller@1c0 { - compatible = "brcm,bcm4708-pinmux"; - reg = <0x1c0 0x24>; - reg-names = "cru_gpio_control"; - - spi-pins { - function = "spi"; - groups = "spi_grp"; - }; + pin-controller@1800c1c0 { + compatible = "brcm,bcm4708-pinmux"; + reg = <0x1800c1c0 0x24>; + reg-names = "cru_gpio_control"; + + spi-pins { + function = "spi"; + groups = "spi_grp"; }; }; -- cgit v1.2.3 From 6dba4bdfd7a30e77b848a45404b224588bf989e5 Mon Sep 17 00:00:00 2001 From: Rafał Miłecki Date: Fri, 8 Oct 2021 22:59:38 +0200 Subject: Revert "pinctrl: bcm: ns: support updated DT binding as syscon subnode" MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This reverts commit a49d784d5a8272d0f63c448fe8dc69e589db006e. The updated binding was wrong / invalid and has been reverted. There isn't any upstream kernel DTS using it and Broadcom isn't known to use it neither. There is close to zero chance this will cause regression for anyone. Actually in-kernel bcm5301x.dtsi still uses the old good binding and so it's broken since the driver update. This revert fixes it. Signed-off-by: Rafał Miłecki Link: https://lore.kernel.org/r/20211008205938.29925-3-zajec5@gmail.com Signed-off-by: Linus Walleij --- drivers/pinctrl/bcm/pinctrl-ns.c | 29 ++++++++++------------------- 1 file changed, 10 insertions(+), 19 deletions(-) diff --git a/drivers/pinctrl/bcm/pinctrl-ns.c b/drivers/pinctrl/bcm/pinctrl-ns.c index e79690bd8b85..d7f8175d2c1c 100644 --- a/drivers/pinctrl/bcm/pinctrl-ns.c +++ b/drivers/pinctrl/bcm/pinctrl-ns.c @@ -5,7 +5,6 @@ #include #include -#include #include #include #include @@ -13,7 +12,6 @@ #include #include #include -#include #include #define FLAG_BCM4708 BIT(1) @@ -24,8 +22,7 @@ struct ns_pinctrl { struct device *dev; unsigned int chipset_flag; struct pinctrl_dev *pctldev; - struct regmap *regmap; - u32 offset; + void __iomem *base; struct pinctrl_desc pctldesc; struct ns_pinctrl_group *groups; @@ -232,9 +229,9 @@ static int ns_pinctrl_set_mux(struct pinctrl_dev *pctrl_dev, unset |= BIT(pin_number); } - regmap_read(ns_pinctrl->regmap, ns_pinctrl->offset, &tmp); + tmp = readl(ns_pinctrl->base); tmp &= ~unset; - regmap_write(ns_pinctrl->regmap, ns_pinctrl->offset, tmp); + writel(tmp, ns_pinctrl->base); return 0; } @@ -266,13 +263,13 @@ static const struct of_device_id ns_pinctrl_of_match_table[] = { static int ns_pinctrl_probe(struct platform_device *pdev) { struct device *dev = &pdev->dev; - struct device_node *np = dev->of_node; const struct of_device_id *of_id; struct ns_pinctrl *ns_pinctrl; struct pinctrl_desc *pctldesc; struct pinctrl_pin_desc *pin; struct ns_pinctrl_group *group; struct ns_pinctrl_function *function; + struct resource *res; int i; ns_pinctrl = devm_kzalloc(dev, sizeof(*ns_pinctrl), GFP_KERNEL); @@ -290,18 +287,12 @@ static int ns_pinctrl_probe(struct platform_device *pdev) return -EINVAL; ns_pinctrl->chipset_flag = (uintptr_t)of_id->data; - ns_pinctrl->regmap = syscon_node_to_regmap(of_get_parent(np)); - if (IS_ERR(ns_pinctrl->regmap)) { - int err = PTR_ERR(ns_pinctrl->regmap); - - dev_err(dev, "Failed to map pinctrl regs: %d\n", err); - - return err; - } - - if (of_property_read_u32(np, "offset", &ns_pinctrl->offset)) { - dev_err(dev, "Failed to get register offset\n"); - return -ENOENT; + res = platform_get_resource_byname(pdev, IORESOURCE_MEM, + "cru_gpio_control"); + ns_pinctrl->base = devm_ioremap_resource(dev, res); + if (IS_ERR(ns_pinctrl->base)) { + dev_err(dev, "Failed to map pinctrl regs\n"); + return PTR_ERR(ns_pinctrl->base); } memcpy(pctldesc, &ns_pinctrl_desc, sizeof(*pctldesc)); -- cgit v1.2.3 From c370bb474016ab9edfdabd7c08a88dd13a71ddbd Mon Sep 17 00:00:00 2001 From: Fabien Dessenne Date: Fri, 8 Oct 2021 14:25:17 +0200 Subject: pinctrl: stm32: use valid pin identifier in stm32_pinctrl_resume() When resuming from low power, the driver attempts to restore the configuration of some pins. This is done by a call to: stm32_pinctrl_restore_gpio_regs(struct stm32_pinctrl *pctl, u32 pin) where 'pin' must be a valid pin value (i.e. matching some 'groups->pin'). Fix the current implementation which uses some wrong 'pin' value. Fixes: e2f3cf18c3e2 ("pinctrl: stm32: add suspend/resume management") Signed-off-by: Fabien Dessenne Link: https://lore.kernel.org/r/20211008122517.617633-1-fabien.dessenne@foss.st.com Signed-off-by: Linus Walleij --- drivers/pinctrl/stm32/pinctrl-stm32.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/pinctrl/stm32/pinctrl-stm32.c b/drivers/pinctrl/stm32/pinctrl-stm32.c index 68b3886f9f0f..dfd8888a222a 100644 --- a/drivers/pinctrl/stm32/pinctrl-stm32.c +++ b/drivers/pinctrl/stm32/pinctrl-stm32.c @@ -1644,8 +1644,8 @@ int __maybe_unused stm32_pinctrl_resume(struct device *dev) struct stm32_pinctrl_group *g = pctl->groups; int i; - for (i = g->pin; i < g->pin + pctl->ngroups; i++) - stm32_pinctrl_restore_gpio_regs(pctl, i); + for (i = 0; i < pctl->ngroups; i++, g++) + stm32_pinctrl_restore_gpio_regs(pctl, g->pin); return 0; } -- cgit v1.2.3 From f7db8fd03a4bc5baf70ccf8978fe17cb54368b97 Mon Sep 17 00:00:00 2001 From: Namjae Jeon Date: Fri, 8 Oct 2021 07:31:03 +0900 Subject: ksmbd: add validation in smb2_ioctl MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add validation for request/response buffer size check in smb2_ioctl and fsctl_copychunk() take copychunk_ioctl_req pointer and the other arguments instead of smb2_ioctl_req structure and remove an unused smb2_ioctl_req argument of fsctl_validate_negotiate_info. Cc: Tom Talpey Cc: Ronnie Sahlberg Cc: Ralph Böhme Cc: Steve French Cc: Sergey Senozhatsky Acked-by: Hyunchul Lee Signed-off-by: Colin Ian King Signed-off-by: Namjae Jeon Signed-off-by: Steve French --- fs/ksmbd/smb2pdu.c | 113 +++++++++++++++++++++++++++++++++++++++-------------- fs/ksmbd/vfs.c | 2 +- fs/ksmbd/vfs.h | 2 +- 3 files changed, 86 insertions(+), 31 deletions(-) diff --git a/fs/ksmbd/smb2pdu.c b/fs/ksmbd/smb2pdu.c index 005aa93a49d6..1c7b837c8106 100644 --- a/fs/ksmbd/smb2pdu.c +++ b/fs/ksmbd/smb2pdu.c @@ -7023,24 +7023,26 @@ out2: return err; } -static int fsctl_copychunk(struct ksmbd_work *work, struct smb2_ioctl_req *req, +static int fsctl_copychunk(struct ksmbd_work *work, + struct copychunk_ioctl_req *ci_req, + unsigned int cnt_code, + unsigned int input_count, + unsigned long long volatile_id, + unsigned long long persistent_id, struct smb2_ioctl_rsp *rsp) { - struct copychunk_ioctl_req *ci_req; struct copychunk_ioctl_rsp *ci_rsp; struct ksmbd_file *src_fp = NULL, *dst_fp = NULL; struct srv_copychunk *chunks; unsigned int i, chunk_count, chunk_count_written = 0; unsigned int chunk_size_written = 0; loff_t total_size_written = 0; - int ret, cnt_code; + int ret = 0; - cnt_code = le32_to_cpu(req->CntCode); - ci_req = (struct copychunk_ioctl_req *)&req->Buffer[0]; ci_rsp = (struct copychunk_ioctl_rsp *)&rsp->Buffer[0]; - rsp->VolatileFileId = req->VolatileFileId; - rsp->PersistentFileId = req->PersistentFileId; + rsp->VolatileFileId = cpu_to_le64(volatile_id); + rsp->PersistentFileId = cpu_to_le64(persistent_id); ci_rsp->ChunksWritten = cpu_to_le32(ksmbd_server_side_copy_max_chunk_count()); ci_rsp->ChunkBytesWritten = @@ -7050,12 +7052,13 @@ static int fsctl_copychunk(struct ksmbd_work *work, struct smb2_ioctl_req *req, chunks = (struct srv_copychunk *)&ci_req->Chunks[0]; chunk_count = le32_to_cpu(ci_req->ChunkCount); + if (chunk_count == 0) + goto out; total_size_written = 0; /* verify the SRV_COPYCHUNK_COPY packet */ if (chunk_count > ksmbd_server_side_copy_max_chunk_count() || - le32_to_cpu(req->InputCount) < - offsetof(struct copychunk_ioctl_req, Chunks) + + input_count < offsetof(struct copychunk_ioctl_req, Chunks) + chunk_count * sizeof(struct srv_copychunk)) { rsp->hdr.Status = STATUS_INVALID_PARAMETER; return -EINVAL; @@ -7076,9 +7079,7 @@ static int fsctl_copychunk(struct ksmbd_work *work, struct smb2_ioctl_req *req, src_fp = ksmbd_lookup_foreign_fd(work, le64_to_cpu(ci_req->ResumeKey[0])); - dst_fp = ksmbd_lookup_fd_slow(work, - le64_to_cpu(req->VolatileFileId), - le64_to_cpu(req->PersistentFileId)); + dst_fp = ksmbd_lookup_fd_slow(work, volatile_id, persistent_id); ret = -EINVAL; if (!src_fp || src_fp->persistent_id != le64_to_cpu(ci_req->ResumeKey[1])) { @@ -7153,8 +7154,8 @@ static __be32 idev_ipv4_address(struct in_device *idev) } static int fsctl_query_iface_info_ioctl(struct ksmbd_conn *conn, - struct smb2_ioctl_req *req, - struct smb2_ioctl_rsp *rsp) + struct smb2_ioctl_rsp *rsp, + unsigned int out_buf_len) { struct network_interface_info_ioctl_rsp *nii_rsp = NULL; int nbytes = 0; @@ -7166,6 +7167,12 @@ static int fsctl_query_iface_info_ioctl(struct ksmbd_conn *conn, rtnl_lock(); for_each_netdev(&init_net, netdev) { + if (out_buf_len < + nbytes + sizeof(struct network_interface_info_ioctl_rsp)) { + rtnl_unlock(); + return -ENOSPC; + } + if (netdev->type == ARPHRD_LOOPBACK) continue; @@ -7245,11 +7252,6 @@ static int fsctl_query_iface_info_ioctl(struct ksmbd_conn *conn, if (nii_rsp) nii_rsp->Next = 0; - if (!nbytes) { - rsp->hdr.Status = STATUS_BUFFER_TOO_SMALL; - return -EINVAL; - } - rsp->PersistentFileId = cpu_to_le64(SMB2_NO_FID); rsp->VolatileFileId = cpu_to_le64(SMB2_NO_FID); return nbytes; @@ -7257,11 +7259,16 @@ static int fsctl_query_iface_info_ioctl(struct ksmbd_conn *conn, static int fsctl_validate_negotiate_info(struct ksmbd_conn *conn, struct validate_negotiate_info_req *neg_req, - struct validate_negotiate_info_rsp *neg_rsp) + struct validate_negotiate_info_rsp *neg_rsp, + unsigned int in_buf_len) { int ret = 0; int dialect; + if (in_buf_len < sizeof(struct validate_negotiate_info_req) + + le16_to_cpu(neg_req->DialectCount) * sizeof(__le16)) + return -EINVAL; + dialect = ksmbd_lookup_dialect_by_id(neg_req->Dialects, neg_req->DialectCount); if (dialect == BAD_PROT_ID || dialect != conn->dialect) { @@ -7295,7 +7302,7 @@ err_out: static int fsctl_query_allocated_ranges(struct ksmbd_work *work, u64 id, struct file_allocated_range_buffer *qar_req, struct file_allocated_range_buffer *qar_rsp, - int in_count, int *out_count) + unsigned int in_count, unsigned int *out_count) { struct ksmbd_file *fp; loff_t start, length; @@ -7322,7 +7329,8 @@ static int fsctl_query_allocated_ranges(struct ksmbd_work *work, u64 id, } static int fsctl_pipe_transceive(struct ksmbd_work *work, u64 id, - int out_buf_len, struct smb2_ioctl_req *req, + unsigned int out_buf_len, + struct smb2_ioctl_req *req, struct smb2_ioctl_rsp *rsp) { struct ksmbd_rpc_command *rpc_resp; @@ -7436,8 +7444,7 @@ int smb2_ioctl(struct ksmbd_work *work) { struct smb2_ioctl_req *req; struct smb2_ioctl_rsp *rsp, *rsp_org; - int cnt_code, nbytes = 0; - int out_buf_len; + unsigned int cnt_code, nbytes = 0, out_buf_len, in_buf_len; u64 id = KSMBD_NO_FID; struct ksmbd_conn *conn = work->conn; int ret = 0; @@ -7466,7 +7473,11 @@ int smb2_ioctl(struct ksmbd_work *work) cnt_code = le32_to_cpu(req->CntCode); out_buf_len = le32_to_cpu(req->MaxOutputResponse); - out_buf_len = min(KSMBD_IPC_MAX_PAYLOAD, out_buf_len); + out_buf_len = + min_t(u32, work->response_sz - work->next_smb2_rsp_hdr_off - + (offsetof(struct smb2_ioctl_rsp, Buffer) - 4), + out_buf_len); + in_buf_len = le32_to_cpu(req->InputCount); switch (cnt_code) { case FSCTL_DFS_GET_REFERRALS: @@ -7494,6 +7505,7 @@ int smb2_ioctl(struct ksmbd_work *work) break; } case FSCTL_PIPE_TRANSCEIVE: + out_buf_len = min_t(u32, KSMBD_IPC_MAX_PAYLOAD, out_buf_len); nbytes = fsctl_pipe_transceive(work, id, out_buf_len, req, rsp); break; case FSCTL_VALIDATE_NEGOTIATE_INFO: @@ -7502,9 +7514,16 @@ int smb2_ioctl(struct ksmbd_work *work) goto out; } + if (in_buf_len < sizeof(struct validate_negotiate_info_req)) + return -EINVAL; + + if (out_buf_len < sizeof(struct validate_negotiate_info_rsp)) + return -EINVAL; + ret = fsctl_validate_negotiate_info(conn, (struct validate_negotiate_info_req *)&req->Buffer[0], - (struct validate_negotiate_info_rsp *)&rsp->Buffer[0]); + (struct validate_negotiate_info_rsp *)&rsp->Buffer[0], + in_buf_len); if (ret < 0) goto out; @@ -7513,9 +7532,10 @@ int smb2_ioctl(struct ksmbd_work *work) rsp->VolatileFileId = cpu_to_le64(SMB2_NO_FID); break; case FSCTL_QUERY_NETWORK_INTERFACE_INFO: - nbytes = fsctl_query_iface_info_ioctl(conn, req, rsp); - if (nbytes < 0) + ret = fsctl_query_iface_info_ioctl(conn, rsp, out_buf_len); + if (ret < 0) goto out; + nbytes = ret; break; case FSCTL_REQUEST_RESUME_KEY: if (out_buf_len < sizeof(struct resume_key_ioctl_rsp)) { @@ -7540,15 +7560,33 @@ int smb2_ioctl(struct ksmbd_work *work) goto out; } + if (in_buf_len < sizeof(struct copychunk_ioctl_req)) { + ret = -EINVAL; + goto out; + } + if (out_buf_len < sizeof(struct copychunk_ioctl_rsp)) { ret = -EINVAL; goto out; } nbytes = sizeof(struct copychunk_ioctl_rsp); - fsctl_copychunk(work, req, rsp); + rsp->VolatileFileId = req->VolatileFileId; + rsp->PersistentFileId = req->PersistentFileId; + fsctl_copychunk(work, + (struct copychunk_ioctl_req *)&req->Buffer[0], + le32_to_cpu(req->CntCode), + le32_to_cpu(req->InputCount), + le64_to_cpu(req->VolatileFileId), + le64_to_cpu(req->PersistentFileId), + rsp); break; case FSCTL_SET_SPARSE: + if (in_buf_len < sizeof(struct file_sparse)) { + ret = -EINVAL; + goto out; + } + ret = fsctl_set_sparse(work, id, (struct file_sparse *)&req->Buffer[0]); if (ret < 0) @@ -7567,6 +7605,11 @@ int smb2_ioctl(struct ksmbd_work *work) goto out; } + if (in_buf_len < sizeof(struct file_zero_data_information)) { + ret = -EINVAL; + goto out; + } + zero_data = (struct file_zero_data_information *)&req->Buffer[0]; @@ -7586,6 +7629,11 @@ int smb2_ioctl(struct ksmbd_work *work) break; } case FSCTL_QUERY_ALLOCATED_RANGES: + if (in_buf_len < sizeof(struct file_allocated_range_buffer)) { + ret = -EINVAL; + goto out; + } + ret = fsctl_query_allocated_ranges(work, id, (struct file_allocated_range_buffer *)&req->Buffer[0], (struct file_allocated_range_buffer *)&rsp->Buffer[0], @@ -7626,6 +7674,11 @@ int smb2_ioctl(struct ksmbd_work *work) struct duplicate_extents_to_file *dup_ext; loff_t src_off, dst_off, length, cloned; + if (in_buf_len < sizeof(struct duplicate_extents_to_file)) { + ret = -EINVAL; + goto out; + } + dup_ext = (struct duplicate_extents_to_file *)&req->Buffer[0]; fp_in = ksmbd_lookup_fd_slow(work, dup_ext->VolatileFileHandle, @@ -7696,6 +7749,8 @@ out: rsp->hdr.Status = STATUS_OBJECT_NAME_NOT_FOUND; else if (ret == -EOPNOTSUPP) rsp->hdr.Status = STATUS_NOT_SUPPORTED; + else if (ret == -ENOSPC) + rsp->hdr.Status = STATUS_BUFFER_TOO_SMALL; else if (ret < 0 || rsp->hdr.Status == 0) rsp->hdr.Status = STATUS_INVALID_PARAMETER; smb2_set_err_rsp(work); diff --git a/fs/ksmbd/vfs.c b/fs/ksmbd/vfs.c index b41954294d38..835b384b0895 100644 --- a/fs/ksmbd/vfs.c +++ b/fs/ksmbd/vfs.c @@ -1023,7 +1023,7 @@ int ksmbd_vfs_zero_data(struct ksmbd_work *work, struct ksmbd_file *fp, int ksmbd_vfs_fqar_lseek(struct ksmbd_file *fp, loff_t start, loff_t length, struct file_allocated_range_buffer *ranges, - int in_count, int *out_count) + unsigned int in_count, unsigned int *out_count) { struct file *f = fp->filp; struct inode *inode = file_inode(fp->filp); diff --git a/fs/ksmbd/vfs.h b/fs/ksmbd/vfs.h index 7b1dcaa3fbdc..b0d5b8feb4a3 100644 --- a/fs/ksmbd/vfs.h +++ b/fs/ksmbd/vfs.h @@ -166,7 +166,7 @@ int ksmbd_vfs_zero_data(struct ksmbd_work *work, struct ksmbd_file *fp, struct file_allocated_range_buffer; int ksmbd_vfs_fqar_lseek(struct ksmbd_file *fp, loff_t start, loff_t length, struct file_allocated_range_buffer *ranges, - int in_count, int *out_count); + unsigned int in_count, unsigned int *out_count); int ksmbd_vfs_unlink(struct user_namespace *user_ns, struct dentry *dir, struct dentry *dentry); void *ksmbd_vfs_init_kstat(char **p, struct ksmbd_kstat *ksmbd_kstat); -- cgit v1.2.3 From bf8acc9e10e21c28452dfa067a7d31e6067104b1 Mon Sep 17 00:00:00 2001 From: Hyunchul Lee Date: Thu, 7 Oct 2021 16:26:58 +0900 Subject: ksmbd: improve credits management * Requests except READ, WRITE, IOCTL, INFO, QUERY DIRECOTRY, CANCEL must consume one credit. * If client's granted credits are insufficient, refuse to handle requests. * Windows server 2016 or later grant up to 8192 credits to clients at once. Acked-by: Namjae Jeon Signed-off-by: Hyunchul Lee Signed-off-by: Steve French --- fs/ksmbd/connection.c | 2 ++ fs/ksmbd/smb2misc.c | 38 +++++++++++++++++------- fs/ksmbd/smb2pdu.c | 81 ++++++++++++++++++--------------------------------- 3 files changed, 59 insertions(+), 62 deletions(-) diff --git a/fs/ksmbd/connection.c b/fs/ksmbd/connection.c index 48b18b4ec117..b57a0d8a392f 100644 --- a/fs/ksmbd/connection.c +++ b/fs/ksmbd/connection.c @@ -61,6 +61,8 @@ struct ksmbd_conn *ksmbd_conn_alloc(void) conn->local_nls = load_nls_default(); atomic_set(&conn->req_running, 0); atomic_set(&conn->r_count, 0); + conn->total_credits = 1; + init_waitqueue_head(&conn->req_running_q); INIT_LIST_HEAD(&conn->conns_list); INIT_LIST_HEAD(&conn->sessions); diff --git a/fs/ksmbd/smb2misc.c b/fs/ksmbd/smb2misc.c index 9edd9c161b27..e7e441c8f050 100644 --- a/fs/ksmbd/smb2misc.c +++ b/fs/ksmbd/smb2misc.c @@ -284,11 +284,13 @@ static inline int smb2_ioctl_resp_len(struct smb2_ioctl_req *h) le32_to_cpu(h->MaxOutputResponse); } -static int smb2_validate_credit_charge(struct smb2_hdr *hdr) +static int smb2_validate_credit_charge(struct ksmbd_conn *conn, + struct smb2_hdr *hdr) { - int req_len = 0, expect_resp_len = 0, calc_credit_num, max_len; - int credit_charge = le16_to_cpu(hdr->CreditCharge); + unsigned int req_len = 0, expect_resp_len = 0, calc_credit_num, max_len; + unsigned short credit_charge = le16_to_cpu(hdr->CreditCharge); void *__hdr = hdr; + int ret; switch (hdr->Command) { case SMB2_QUERY_INFO: @@ -310,21 +312,37 @@ static int smb2_validate_credit_charge(struct smb2_hdr *hdr) req_len = smb2_ioctl_req_len(__hdr); expect_resp_len = smb2_ioctl_resp_len(__hdr); break; - default: + case SMB2_CANCEL: return 0; + default: + req_len = 1; + break; } - credit_charge = max(1, credit_charge); - max_len = max(req_len, expect_resp_len); + credit_charge = max_t(unsigned short, credit_charge, 1); + max_len = max_t(unsigned int, req_len, expect_resp_len); calc_credit_num = DIV_ROUND_UP(max_len, SMB2_MAX_BUFFER_SIZE); if (credit_charge < calc_credit_num) { - pr_err("Insufficient credit charge, given: %d, needed: %d\n", - credit_charge, calc_credit_num); + ksmbd_debug(SMB, "Insufficient credit charge, given: %d, needed: %d\n", + credit_charge, calc_credit_num); + return 1; + } else if (credit_charge > conn->max_credits) { + ksmbd_debug(SMB, "Too large credit charge: %d\n", credit_charge); return 1; } - return 0; + spin_lock(&conn->credits_lock); + if (credit_charge <= conn->total_credits) { + conn->total_credits -= credit_charge; + ret = 0; + } else { + ksmbd_debug(SMB, "Insufficient credits granted, given: %u, granted: %u\n", + credit_charge, conn->total_credits); + ret = 1; + } + spin_unlock(&conn->credits_lock); + return ret; } int ksmbd_smb2_check_message(struct ksmbd_work *work) @@ -383,7 +401,7 @@ int ksmbd_smb2_check_message(struct ksmbd_work *work) } if ((work->conn->vals->capabilities & SMB2_GLOBAL_CAP_LARGE_MTU) && - smb2_validate_credit_charge(hdr)) { + smb2_validate_credit_charge(work->conn, hdr)) { work->conn->ops->set_rsp_status(work, STATUS_INVALID_PARAMETER); return 1; } diff --git a/fs/ksmbd/smb2pdu.c b/fs/ksmbd/smb2pdu.c index 1c7b837c8106..79b296abe04e 100644 --- a/fs/ksmbd/smb2pdu.c +++ b/fs/ksmbd/smb2pdu.c @@ -292,22 +292,6 @@ int init_smb2_neg_rsp(struct ksmbd_work *work) return 0; } -static int smb2_consume_credit_charge(struct ksmbd_work *work, - unsigned short credit_charge) -{ - struct ksmbd_conn *conn = work->conn; - unsigned int rsp_credits = 1; - - if (!conn->total_credits) - return 0; - - if (credit_charge > 0) - rsp_credits = credit_charge; - - conn->total_credits -= rsp_credits; - return rsp_credits; -} - /** * smb2_set_rsp_credits() - set number of credits in response buffer * @work: smb work containing smb response buffer @@ -317,49 +301,43 @@ int smb2_set_rsp_credits(struct ksmbd_work *work) struct smb2_hdr *req_hdr = ksmbd_req_buf_next(work); struct smb2_hdr *hdr = ksmbd_resp_buf_next(work); struct ksmbd_conn *conn = work->conn; - unsigned short credits_requested = le16_to_cpu(req_hdr->CreditRequest); - unsigned short credit_charge = 1, credits_granted = 0; - unsigned short aux_max, aux_credits, min_credits; - int rsp_credit_charge; + unsigned short credits_requested; + unsigned short credit_charge, credits_granted = 0; + unsigned short aux_max, aux_credits; - if (hdr->Command == SMB2_CANCEL) - goto out; + if (work->send_no_response) + return 0; - /* get default minimum credits by shifting maximum credits by 4 */ - min_credits = conn->max_credits >> 4; + hdr->CreditCharge = req_hdr->CreditCharge; - if (conn->total_credits >= conn->max_credits) { + if (conn->total_credits > conn->max_credits) { + hdr->CreditRequest = 0; pr_err("Total credits overflow: %d\n", conn->total_credits); - conn->total_credits = min_credits; - } - - rsp_credit_charge = - smb2_consume_credit_charge(work, le16_to_cpu(req_hdr->CreditCharge)); - if (rsp_credit_charge < 0) return -EINVAL; + } - hdr->CreditCharge = cpu_to_le16(rsp_credit_charge); + credit_charge = max_t(unsigned short, + le16_to_cpu(req_hdr->CreditCharge), 1); + credits_requested = max_t(unsigned short, + le16_to_cpu(req_hdr->CreditRequest), 1); - if (credits_requested > 0) { - aux_credits = credits_requested - 1; - aux_max = 32; - if (hdr->Command == SMB2_NEGOTIATE) - aux_max = 0; - aux_credits = (aux_credits < aux_max) ? aux_credits : aux_max; - credits_granted = aux_credits + credit_charge; + /* according to smb2.credits smbtorture, Windows server + * 2016 or later grant up to 8192 credits at once. + * + * TODO: Need to adjuct CreditRequest value according to + * current cpu load + */ + aux_credits = credits_requested - 1; + if (hdr->Command == SMB2_NEGOTIATE) + aux_max = 0; + else + aux_max = conn->max_credits - credit_charge; + aux_credits = min_t(unsigned short, aux_credits, aux_max); + credits_granted = credit_charge + aux_credits; - /* if credits granted per client is getting bigger than default - * minimum credits then we should wrap it up within the limits. - */ - if ((conn->total_credits + credits_granted) > min_credits) - credits_granted = min_credits - conn->total_credits; - /* - * TODO: Need to adjuct CreditRequest value according to - * current cpu load - */ - } else if (conn->total_credits == 0) { - credits_granted = 1; - } + if (conn->max_credits - conn->total_credits < credits_granted) + credits_granted = conn->max_credits - + conn->total_credits; conn->total_credits += credits_granted; work->credits_granted += credits_granted; @@ -368,7 +346,6 @@ int smb2_set_rsp_credits(struct ksmbd_work *work) /* Update CreditRequest in last request */ hdr->CreditRequest = cpu_to_le16(work->credits_granted); } -out: ksmbd_debug(SMB, "credits: requested[%d] granted[%d] total_granted[%d]\n", credits_requested, credits_granted, -- cgit v1.2.3 From 9a63b999ae5435d82a5c353c6b1467100f857742 Mon Sep 17 00:00:00 2001 From: Namjae Jeon Date: Mon, 11 Oct 2021 12:48:31 +0900 Subject: ksmbd: fix potencial 32bit overflow from data area check in smb2_write DataOffset and Length validation can be potencial 32bit overflow. This patch fix it. Signed-off-by: Namjae Jeon Signed-off-by: Steve French --- fs/ksmbd/smb2pdu.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/fs/ksmbd/smb2pdu.c b/fs/ksmbd/smb2pdu.c index 79b296abe04e..7b4689f2df49 100644 --- a/fs/ksmbd/smb2pdu.c +++ b/fs/ksmbd/smb2pdu.c @@ -6197,8 +6197,7 @@ static noinline int smb2_write_pipe(struct ksmbd_work *work) (offsetof(struct smb2_write_req, Buffer) - 4)) { data_buf = (char *)&req->Buffer[0]; } else { - if ((le16_to_cpu(req->DataOffset) > get_rfc1002_len(req)) || - (le16_to_cpu(req->DataOffset) + length > get_rfc1002_len(req))) { + if ((u64)le16_to_cpu(req->DataOffset) + length > get_rfc1002_len(req)) { pr_err("invalid write data offset %u, smb_len %u\n", le16_to_cpu(req->DataOffset), get_rfc1002_len(req)); @@ -6356,8 +6355,7 @@ int smb2_write(struct ksmbd_work *work) (offsetof(struct smb2_write_req, Buffer) - 4)) { data_buf = (char *)&req->Buffer[0]; } else { - if ((le16_to_cpu(req->DataOffset) > get_rfc1002_len(req)) || - (le16_to_cpu(req->DataOffset) + length > get_rfc1002_len(req))) { + if ((u64)le16_to_cpu(req->DataOffset) + length > get_rfc1002_len(req)) { pr_err("invalid write data offset %u, smb_len %u\n", le16_to_cpu(req->DataOffset), get_rfc1002_len(req)); -- cgit v1.2.3 From dbad63001eac3abeeb2b66ddf71504e8ab128c5c Mon Sep 17 00:00:00 2001 From: Namjae Jeon Date: Mon, 11 Oct 2021 19:15:25 +0900 Subject: ksmbd: validate compound response buffer Add the check to validate compound response buffer. Signed-off-by: Namjae Jeon Signed-off-by: Steve French --- fs/ksmbd/smb2pdu.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/fs/ksmbd/smb2pdu.c b/fs/ksmbd/smb2pdu.c index 7b4689f2df49..89c187aa8db2 100644 --- a/fs/ksmbd/smb2pdu.c +++ b/fs/ksmbd/smb2pdu.c @@ -449,6 +449,12 @@ bool is_chained_smb2_message(struct ksmbd_work *work) return false; } + if ((u64)get_rfc1002_len(work->response_buf) + MAX_CIFS_SMALL_BUFFER_SIZE > + work->response_sz) { + pr_err("next response offset exceeds response buffer size\n"); + return false; + } + ksmbd_debug(SMB, "got SMB2 chained command\n"); init_chained_smb2_rsp(work); return true; -- cgit v1.2.3 From 4bc59477c3298b191c72b5d99feb54a1dc8c254d Mon Sep 17 00:00:00 2001 From: Namjae Jeon Date: Fri, 15 Oct 2021 17:14:02 +0900 Subject: ksmbd: limit read/write/trans buffer size not to exceed 8MB ksmbd limit read/write/trans buffer size not to exceed maximum 8MB. And set the minimum value of max response buffer size to 64KB. Windows client doesn't send session setup request if ksmbd set max trans/read/write size lower than 64KB in smb2 negotiate. It means windows allow at least 64 KB or more about this value. Signed-off-by: Namjae Jeon Signed-off-by: Steve French --- fs/ksmbd/smb2ops.c | 3 +++ fs/ksmbd/smb2pdu.c | 2 +- fs/ksmbd/smb2pdu.h | 2 ++ 3 files changed, 6 insertions(+), 1 deletion(-) diff --git a/fs/ksmbd/smb2ops.c b/fs/ksmbd/smb2ops.c index b06456eb587b..fb6a65d23139 100644 --- a/fs/ksmbd/smb2ops.c +++ b/fs/ksmbd/smb2ops.c @@ -284,6 +284,7 @@ int init_smb3_11_server(struct ksmbd_conn *conn) void init_smb2_max_read_size(unsigned int sz) { + sz = clamp_val(sz, SMB3_MIN_IOSIZE, SMB3_MAX_IOSIZE); smb21_server_values.max_read_size = sz; smb30_server_values.max_read_size = sz; smb302_server_values.max_read_size = sz; @@ -292,6 +293,7 @@ void init_smb2_max_read_size(unsigned int sz) void init_smb2_max_write_size(unsigned int sz) { + sz = clamp_val(sz, SMB3_MIN_IOSIZE, SMB3_MAX_IOSIZE); smb21_server_values.max_write_size = sz; smb30_server_values.max_write_size = sz; smb302_server_values.max_write_size = sz; @@ -300,6 +302,7 @@ void init_smb2_max_write_size(unsigned int sz) void init_smb2_max_trans_size(unsigned int sz) { + sz = clamp_val(sz, SMB3_MIN_IOSIZE, SMB3_MAX_IOSIZE); smb21_server_values.max_trans_size = sz; smb30_server_values.max_trans_size = sz; smb302_server_values.max_trans_size = sz; diff --git a/fs/ksmbd/smb2pdu.c b/fs/ksmbd/smb2pdu.c index 89c187aa8db2..7999d8bc6892 100644 --- a/fs/ksmbd/smb2pdu.c +++ b/fs/ksmbd/smb2pdu.c @@ -524,7 +524,7 @@ int smb2_allocate_rsp_buf(struct ksmbd_work *work) { struct smb2_hdr *hdr = work->request_buf; size_t small_sz = MAX_CIFS_SMALL_BUFFER_SIZE; - size_t large_sz = work->conn->vals->max_trans_size + MAX_SMB2_HDR_SIZE; + size_t large_sz = small_sz + work->conn->vals->max_trans_size; size_t sz = small_sz; int cmd = le16_to_cpu(hdr->Command); diff --git a/fs/ksmbd/smb2pdu.h b/fs/ksmbd/smb2pdu.h index a6dec5ec6a54..ff5a2f01d34a 100644 --- a/fs/ksmbd/smb2pdu.h +++ b/fs/ksmbd/smb2pdu.h @@ -113,6 +113,8 @@ #define SMB21_DEFAULT_IOSIZE (1024 * 1024) #define SMB3_DEFAULT_IOSIZE (4 * 1024 * 1024) #define SMB3_DEFAULT_TRANS_SIZE (1024 * 1024) +#define SMB3_MIN_IOSIZE (64 * 1024) +#define SMB3_MAX_IOSIZE (8 * 1024 * 1024) /* * SMB2 Header Definition -- cgit v1.2.3 From 2ea086e35c3d726a3bacd0a971c1f02a50e98206 Mon Sep 17 00:00:00 2001 From: Hyunchul Lee Date: Fri, 15 Oct 2021 06:02:50 +0900 Subject: ksmbd: add buffer validation for smb direct Add buffer validation for smb direct. Acked-by: Namjae Jeon Signed-off-by: Hyunchul Lee Signed-off-by: Steve French --- fs/ksmbd/transport_rdma.c | 21 +++++++++++++++++++-- 1 file changed, 19 insertions(+), 2 deletions(-) diff --git a/fs/ksmbd/transport_rdma.c b/fs/ksmbd/transport_rdma.c index 3a7fa23ba850..a2fd5a4d4cd5 100644 --- a/fs/ksmbd/transport_rdma.c +++ b/fs/ksmbd/transport_rdma.c @@ -549,6 +549,10 @@ static void recv_done(struct ib_cq *cq, struct ib_wc *wc) switch (recvmsg->type) { case SMB_DIRECT_MSG_NEGOTIATE_REQ: + if (wc->byte_len < sizeof(struct smb_direct_negotiate_req)) { + put_empty_recvmsg(t, recvmsg); + return; + } t->negotiation_requested = true; t->full_packet_received = true; wake_up_interruptible(&t->wait_status); @@ -556,10 +560,23 @@ static void recv_done(struct ib_cq *cq, struct ib_wc *wc) case SMB_DIRECT_MSG_DATA_TRANSFER: { struct smb_direct_data_transfer *data_transfer = (struct smb_direct_data_transfer *)recvmsg->packet; - int data_length = le32_to_cpu(data_transfer->data_length); + unsigned int data_length; int avail_recvmsg_count, receive_credits; + if (wc->byte_len < + offsetof(struct smb_direct_data_transfer, padding)) { + put_empty_recvmsg(t, recvmsg); + return; + } + + data_length = le32_to_cpu(data_transfer->data_length); if (data_length) { + if (wc->byte_len < sizeof(struct smb_direct_data_transfer) + + (u64)data_length) { + put_empty_recvmsg(t, recvmsg); + return; + } + if (t->full_packet_received) recvmsg->first_segment = true; @@ -568,7 +585,7 @@ static void recv_done(struct ib_cq *cq, struct ib_wc *wc) else t->full_packet_received = true; - enqueue_reassembly(t, recvmsg, data_length); + enqueue_reassembly(t, recvmsg, (int)data_length); wake_up_interruptible(&t->wait_reassembly_queue); spin_lock(&t->receive_credit_lock); -- cgit v1.2.3 From 7a33488705008b5bb5f8d95d05326dcc64fc55f4 Mon Sep 17 00:00:00 2001 From: Ralph Boehme Date: Fri, 15 Oct 2021 12:52:58 +0900 Subject: ksmbd: validate credit charge after validating SMB2 PDU body size smb2_validate_credit_charge() accesses fields in the SMB2 PDU body, but until smb2_calc_size() is called the PDU has not yet been verified to be large enough to access the PDU dynamic part length field. Acked-by: Namjae Jeon Signed-off-by: Ralph Boehme Signed-off-by: Steve French --- fs/ksmbd/smb2misc.c | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/fs/ksmbd/smb2misc.c b/fs/ksmbd/smb2misc.c index e7e441c8f050..030ca57c3784 100644 --- a/fs/ksmbd/smb2misc.c +++ b/fs/ksmbd/smb2misc.c @@ -400,26 +400,20 @@ int ksmbd_smb2_check_message(struct ksmbd_work *work) } } - if ((work->conn->vals->capabilities & SMB2_GLOBAL_CAP_LARGE_MTU) && - smb2_validate_credit_charge(work->conn, hdr)) { - work->conn->ops->set_rsp_status(work, STATUS_INVALID_PARAMETER); - return 1; - } - if (smb2_calc_size(hdr, &clc_len)) return 1; if (len != clc_len) { /* client can return one byte more due to implied bcc[0] */ if (clc_len == len + 1) - return 0; + goto validate_credit; /* * Some windows servers (win2016) will pad also the final * PDU in a compound to 8 bytes. */ if (ALIGN(clc_len, 8) == len) - return 0; + goto validate_credit; /* * windows client also pad up to 8 bytes when compounding. @@ -432,7 +426,7 @@ int ksmbd_smb2_check_message(struct ksmbd_work *work) "cli req padded more than expected. Length %d not %d for cmd:%d mid:%llu\n", len, clc_len, command, le64_to_cpu(hdr->MessageId)); - return 0; + goto validate_credit; } ksmbd_debug(SMB, @@ -443,6 +437,13 @@ int ksmbd_smb2_check_message(struct ksmbd_work *work) return 1; } +validate_credit: + if ((work->conn->vals->capabilities & SMB2_GLOBAL_CAP_LARGE_MTU) && + smb2_validate_credit_charge(work->conn, hdr)) { + work->conn->ops->set_rsp_status(work, STATUS_INVALID_PARAMETER); + return 1; + } + return 0; } -- cgit v1.2.3 From 82a4f329b133ad0de66bee12c0be5c67bb8aa188 Mon Sep 17 00:00:00 2001 From: Frieder Schrempf Date: Fri, 15 Oct 2021 14:48:36 +0200 Subject: arm64: dts: imx8mm-kontron: Make sure SOC and DRAM supply voltages are correct It looks like the voltages for the SOC and DRAM supply weren't properly validated before. The datasheet and uboot-imx code tells us that VDD_SOC should be 800 mV in suspend and 850 mV in run mode. VDD_DRAM should be 950 mV for DDR clock frequencies of up to 1.5 GHz. Let's fix these values to make sure the voltages are within the required range. Fixes: 8668d8b2e67f ("arm64: dts: Add the Kontron i.MX8M Mini SoMs and baseboards") Cc: stable@vger.kernel.org Signed-off-by: Frieder Schrempf Signed-off-by: Shawn Guo --- arch/arm64/boot/dts/freescale/imx8mm-kontron-n801x-som.dtsi | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/arch/arm64/boot/dts/freescale/imx8mm-kontron-n801x-som.dtsi b/arch/arm64/boot/dts/freescale/imx8mm-kontron-n801x-som.dtsi index 9db9b90bf2bc..7cf60c17c29b 100644 --- a/arch/arm64/boot/dts/freescale/imx8mm-kontron-n801x-som.dtsi +++ b/arch/arm64/boot/dts/freescale/imx8mm-kontron-n801x-som.dtsi @@ -91,10 +91,12 @@ reg_vdd_soc: BUCK1 { regulator-name = "buck1"; regulator-min-microvolt = <800000>; - regulator-max-microvolt = <900000>; + regulator-max-microvolt = <850000>; regulator-boot-on; regulator-always-on; regulator-ramp-delay = <3125>; + nxp,dvs-run-voltage = <850000>; + nxp,dvs-standby-voltage = <800000>; }; reg_vdd_arm: BUCK2 { @@ -111,7 +113,7 @@ reg_vdd_dram: BUCK3 { regulator-name = "buck3"; regulator-min-microvolt = <850000>; - regulator-max-microvolt = <900000>; + regulator-max-microvolt = <950000>; regulator-boot-on; regulator-always-on; }; -- cgit v1.2.3 From 256a24eba7f897c817fb0103dac73467d3789202 Mon Sep 17 00:00:00 2001 From: Frieder Schrempf Date: Fri, 15 Oct 2021 14:48:37 +0200 Subject: arm64: dts: imx8mm-kontron: Set lower limit of VDD_SNVS to 800 mV According to the datasheet the typical value for VDD_SNVS should be 800 mV, so let's make sure that this is within the range of the regulator. Fixes: 8668d8b2e67f ("arm64: dts: Add the Kontron i.MX8M Mini SoMs and baseboards") Cc: stable@vger.kernel.org Signed-off-by: Frieder Schrempf Signed-off-by: Shawn Guo --- arch/arm64/boot/dts/freescale/imx8mm-kontron-n801x-som.dtsi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/boot/dts/freescale/imx8mm-kontron-n801x-som.dtsi b/arch/arm64/boot/dts/freescale/imx8mm-kontron-n801x-som.dtsi index 7cf60c17c29b..42bbbb3f532b 100644 --- a/arch/arm64/boot/dts/freescale/imx8mm-kontron-n801x-som.dtsi +++ b/arch/arm64/boot/dts/freescale/imx8mm-kontron-n801x-som.dtsi @@ -152,7 +152,7 @@ reg_vdd_snvs: LDO2 { regulator-name = "ldo2"; - regulator-min-microvolt = <850000>; + regulator-min-microvolt = <800000>; regulator-max-microvolt = <900000>; regulator-boot-on; regulator-always-on; -- cgit v1.2.3 From 6562d6e350284307e33ea10c7f46a6661ff22770 Mon Sep 17 00:00:00 2001 From: Frieder Schrempf Date: Fri, 15 Oct 2021 14:48:38 +0200 Subject: arm64: dts: imx8mm-kontron: Fix polarity of reg_rst_eth2 The regulator reg_rst_eth2 should keep the reset signal of the USB ethernet adapter deasserted anytime. Fix the polarity and mark it as always-on. Anyway, using the regulator is only a workaround for the missing support of specifying a reset GPIO for USB devices in a generic way. As we don't have a solution for this at the moment, at least fix the current workaround. Fixes: 8668d8b2e67f ("arm64: dts: Add the Kontron i.MX8M Mini SoMs and baseboards") Cc: stable@vger.kernel.org Signed-off-by: Frieder Schrempf Signed-off-by: Shawn Guo --- arch/arm64/boot/dts/freescale/imx8mm-kontron-n801x-s.dts | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/arch/arm64/boot/dts/freescale/imx8mm-kontron-n801x-s.dts b/arch/arm64/boot/dts/freescale/imx8mm-kontron-n801x-s.dts index d17abb515835..3912ac2446d7 100644 --- a/arch/arm64/boot/dts/freescale/imx8mm-kontron-n801x-s.dts +++ b/arch/arm64/boot/dts/freescale/imx8mm-kontron-n801x-s.dts @@ -70,7 +70,9 @@ regulator-name = "rst-usb-eth2"; pinctrl-names = "default"; pinctrl-0 = <&pinctrl_usb_eth2>; - gpio = <&gpio3 2 GPIO_ACTIVE_LOW>; + gpio = <&gpio3 2 GPIO_ACTIVE_HIGH>; + enable-active-high; + regulator-always-on; }; reg_vdd_5v: regulator-5v { -- cgit v1.2.3 From ca6f9d85d5944046a241b325700c1ca395651c28 Mon Sep 17 00:00:00 2001 From: Frieder Schrempf Date: Fri, 15 Oct 2021 14:48:39 +0200 Subject: arm64: dts: imx8mm-kontron: Fix CAN SPI clock frequency The MCP2515 can be used with an SPI clock of up to 10 MHz. Set the limit accordingly to prevent any performance issues caused by the really low clock speed of 100 kHz. This removes the arbitrarily low limit on the SPI frequency, that was caused by a typo in the original dts. Without this change, receiving CAN messages on the board beyond a certain bitrate will cause overrun errors (see 'ip -det -stat link show can0'). With this fix, receiving messages on the bus works without any overrun errors for bitrates up to 1 MBit. Fixes: 8668d8b2e67f ("arm64: dts: Add the Kontron i.MX8M Mini SoMs and baseboards") Cc: stable@vger.kernel.org Signed-off-by: Frieder Schrempf Signed-off-by: Shawn Guo --- arch/arm64/boot/dts/freescale/imx8mm-kontron-n801x-s.dts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/boot/dts/freescale/imx8mm-kontron-n801x-s.dts b/arch/arm64/boot/dts/freescale/imx8mm-kontron-n801x-s.dts index 3912ac2446d7..a52cdfb0920e 100644 --- a/arch/arm64/boot/dts/freescale/imx8mm-kontron-n801x-s.dts +++ b/arch/arm64/boot/dts/freescale/imx8mm-kontron-n801x-s.dts @@ -97,7 +97,7 @@ clocks = <&osc_can>; interrupt-parent = <&gpio4>; interrupts = <28 IRQ_TYPE_EDGE_FALLING>; - spi-max-frequency = <100000>; + spi-max-frequency = <10000000>; vdd-supply = <®_vdd_3v3>; xceiver-supply = <®_vdd_5v>; }; -- cgit v1.2.3 From 0b28c41e3c951ea3d4f012cfa9da5ebd6512cf6e Mon Sep 17 00:00:00 2001 From: Frieder Schrempf Date: Fri, 15 Oct 2021 14:48:40 +0200 Subject: arm64: dts: imx8mm-kontron: Fix connection type for VSC8531 RGMII PHY Previously we falsely relied on the PHY driver to unconditionally enable the internal RX delay. Since the following fix for the PHY driver this is not the case anymore: commit 7b005a1742be ("net: phy: mscc: configure both RX and TX internal delays for RGMII") In order to enable the delay we need to set the connection type to "rgmii-rxid". Without the RX delay the ethernet is not functional at all. Fixes: 8668d8b2e67f ("arm64: dts: Add the Kontron i.MX8M Mini SoMs and baseboards") Cc: stable@vger.kernel.org Signed-off-by: Frieder Schrempf Signed-off-by: Shawn Guo --- arch/arm64/boot/dts/freescale/imx8mm-kontron-n801x-s.dts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/boot/dts/freescale/imx8mm-kontron-n801x-s.dts b/arch/arm64/boot/dts/freescale/imx8mm-kontron-n801x-s.dts index a52cdfb0920e..e99e7644ff39 100644 --- a/arch/arm64/boot/dts/freescale/imx8mm-kontron-n801x-s.dts +++ b/arch/arm64/boot/dts/freescale/imx8mm-kontron-n801x-s.dts @@ -113,7 +113,7 @@ &fec1 { pinctrl-names = "default"; pinctrl-0 = <&pinctrl_enet>; - phy-connection-type = "rgmii"; + phy-connection-type = "rgmii-rxid"; phy-handle = <ðphy>; status = "okay"; -- cgit v1.2.3 From 4e5a04be88fe335ad5331f4f8c17f4ebd357e065 Mon Sep 17 00:00:00 2001 From: Sachi King Date: Sat, 9 Oct 2021 14:32:40 +1100 Subject: pinctrl: amd: disable and mask interrupts on probe Some systems such as the Microsoft Surface Laptop 4 leave interrupts enabled and configured for use in sleep states on boot, which cause unexpected behaviour such as spurious wakes and failed resumes in s2idle states. As interrupts should not be enabled until they are claimed and explicitly enabled, disabling any interrupts mistakenly left enabled by firmware should be safe. Signed-off-by: Sachi King Link: https://lore.kernel.org/r/20211009033240.21543-1-nakato@nakato.io Signed-off-by: Linus Walleij --- drivers/pinctrl/pinctrl-amd.c | 31 +++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) diff --git a/drivers/pinctrl/pinctrl-amd.c b/drivers/pinctrl/pinctrl-amd.c index 8d0f88e9ca88..bae9d429b813 100644 --- a/drivers/pinctrl/pinctrl-amd.c +++ b/drivers/pinctrl/pinctrl-amd.c @@ -840,6 +840,34 @@ static const struct pinconf_ops amd_pinconf_ops = { .pin_config_group_set = amd_pinconf_group_set, }; +static void amd_gpio_irq_init(struct amd_gpio *gpio_dev) +{ + struct pinctrl_desc *desc = gpio_dev->pctrl->desc; + unsigned long flags; + u32 pin_reg, mask; + int i; + + mask = BIT(WAKE_CNTRL_OFF_S0I3) | BIT(WAKE_CNTRL_OFF_S3) | + BIT(INTERRUPT_MASK_OFF) | BIT(INTERRUPT_ENABLE_OFF) | + BIT(WAKE_CNTRL_OFF_S4); + + for (i = 0; i < desc->npins; i++) { + int pin = desc->pins[i].number; + const struct pin_desc *pd = pin_desc_get(gpio_dev->pctrl, pin); + + if (!pd) + continue; + + raw_spin_lock_irqsave(&gpio_dev->lock, flags); + + pin_reg = readl(gpio_dev->base + i * 4); + pin_reg &= ~mask; + writel(pin_reg, gpio_dev->base + i * 4); + + raw_spin_unlock_irqrestore(&gpio_dev->lock, flags); + } +} + #ifdef CONFIG_PM_SLEEP static bool amd_gpio_should_save(struct amd_gpio *gpio_dev, unsigned int pin) { @@ -976,6 +1004,9 @@ static int amd_gpio_probe(struct platform_device *pdev) return PTR_ERR(gpio_dev->pctrl); } + /* Disable and mask interrupts */ + amd_gpio_irq_init(gpio_dev); + girq = &gpio_dev->gc.irq; girq->chip = &amd_gpio_irqchip; /* This will let us handle the parent IRQ in the driver */ -- cgit v1.2.3 From 97e6ea6d78064e7f1e9e19c45dc690aabbb71297 Mon Sep 17 00:00:00 2001 From: Sreekanth Reddy Date: Thu, 14 Oct 2021 11:24:25 +0530 Subject: scsi: mpi3mr: Fix duplicate device entries when scanning through sysfs When scanning devices through the 'scan' attribute in sysfs, the user will observe duplicate device entries in lsscsi command output. Set the shost's max_channel to zero to avoid this. Link: https://lore.kernel.org/r/20211014055425.30719-1-sreekanth.reddy@broadcom.com Fixes: 824a156633df ("scsi: mpi3mr: Base driver code") Signed-off-by: Sreekanth Reddy Signed-off-by: Martin K. Petersen --- drivers/scsi/mpi3mr/mpi3mr_os.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/scsi/mpi3mr/mpi3mr_os.c b/drivers/scsi/mpi3mr/mpi3mr_os.c index 2197988333fe..3cae8803383b 100644 --- a/drivers/scsi/mpi3mr/mpi3mr_os.c +++ b/drivers/scsi/mpi3mr/mpi3mr_os.c @@ -3736,7 +3736,7 @@ mpi3mr_probe(struct pci_dev *pdev, const struct pci_device_id *id) shost->max_lun = -1; shost->unique_id = mrioc->id; - shost->max_channel = 1; + shost->max_channel = 0; shost->max_id = 0xFFFFFFFF; if (prot_mask >= 0) -- cgit v1.2.3 From 85374b6392293d103c2b3406ceb9a1253f81d328 Mon Sep 17 00:00:00 2001 From: Miles Chen Date: Fri, 15 Oct 2021 15:46:54 +0800 Subject: scsi: sd: Fix crashes in sd_resume_runtime() After commit ed4246d37f3b ("scsi: sd: REQUEST SENSE for BLIST_IGN_MEDIA_CHANGE devices in runtime_resume()"), the following crash was observed. static int sd_resume_runtime(struct device *dev) { struct scsi_disk *sdkp = dev_get_drvdata(dev); struct scsi_device *sdp = sdkp->device; // sdkp == NULL and crash if (sdp->ignore_media_change) { ... } It is possible for sdkp to be NULL in sd_resume_runtime(). To fix this crash, follow sd_resume() to test if sdkp is NULL before dereferencing it. Crash: [ 4.695171][ T151] Unable to handle kernel NULL pointer dereference at virtual address 0000000000000008 [ 4.696591][ T151] Mem abort info: [ 4.697919][ T151] ESR = 0x96000005 [ 4.699692][ T151] EC = 0x25: DABT (current EL), IL = 32 bits [ 4.701990][ T151] SET = 0, FnV = 0 [ 4.702513][ T151] EA = 0, S1PTW = 0 [ 4.704431][ T151] FSC = 0x05: level 1 translation fault [ 4.705254][ T151] Data abort info: [ 4.705806][ T151] ISV = 0, ISS = 0x00000005 [ 4.706484][ T151] CM = 0, WnR = 0 [ 4.707048][ T151] [0000000000000008] user address but active_mm is swapper [ 4.710577][ T151] Internal error: Oops: 96000005 [#1] PREEMPT SMP [ 4.832361][ T151] Kernel Offset: 0x12acc80000 from 0xffffffc010000000 [ 4.833254][ T151] PHYS_OFFSET: 0x40000000 [ 4.833814][ T151] pstate: 80400005 (Nzcv daif +PAN -UAO) [ 4.834546][ T151] pc : sd_resume_runtime+0x20/0x14c [ 4.835227][ T151] lr : scsi_runtime_resume+0x84/0xe4 [ 4.835916][ T151] sp : ffffffc0110db8d0 [ 4.836450][ T151] x29: ffffffc0110db8d0 x28: 0000000000000001 [ 4.837258][ T151] x27: ffffff80c0bd1ac0 x26: ffffff80c0bd1ad0 [ 4.838063][ T151] x25: ffffff80cea7e448 x24: ffffffd2bf961000 [ 4.838867][ T151] x23: ffffffd2be69f838 x22: ffffffd2bd9dfb4c [ 4.839670][ T151] x21: 0000000000000000 x20: ffffff80cea7e000 [ 4.840474][ T151] x19: ffffff80cea7e260 x18: ffffffc0110dd078 [ 4.841277][ T151] x17: 00000000658783d9 x16: 0000000051469dac [ 4.842081][ T151] x15: 00000000b87f6327 x14: 0000000068fd680d [ 4.842885][ T151] x13: ffffff80c0bd2470 x12: ffffffd2bfa7f5f0 [ 4.843688][ T151] x11: 0000000000000078 x10: 0000000000000001 [ 4.844492][ T151] x9 : 00000000000000b1 x8 : ffffffd2be69f88c [ 4.845295][ T151] x7 : ffffffd2bd9e0e5c x6 : 0000000000000000 [ 4.846099][ T151] x5 : 0000000000000080 x4 : 0000000000000001 [ 4.846902][ T151] x3 : 68fd680dfe4ebe5e x2 : 0000000000000003 [ 4.847706][ T151] x1 : ffffffd2bf7f9380 x0 : ffffff80cea7e260 [ 4.856708][ T151] die+0x16c/0x59c [ 4.857191][ T151] __do_kernel_fault+0x1e8/0x210 [ 4.857833][ T151] do_page_fault+0xa4/0x654 [ 4.858418][ T151] do_translation_fault+0x6c/0x1b0 [ 4.859083][ T151] do_mem_abort+0x68/0x10c [ 4.859655][ T151] el1_abort+0x40/0x64 [ 4.860182][ T151] el1h_64_sync_handler+0x54/0x88 [ 4.860834][ T151] el1h_64_sync+0x7c/0x80 [ 4.861395][ T151] sd_resume_runtime+0x20/0x14c [ 4.862025][ T151] scsi_runtime_resume+0x84/0xe4 [ 4.862667][ T151] __rpm_callback+0x1f4/0x8cc [ 4.863275][ T151] rpm_resume+0x7e8/0xaa4 [ 4.863836][ T151] __pm_runtime_resume+0xa0/0x110 [ 4.864489][ T151] sd_probe+0x30/0x428 [ 4.865016][ T151] really_probe+0x14c/0x500 [ 4.865602][ T151] __driver_probe_device+0xb4/0x18c [ 4.866278][ T151] driver_probe_device+0x60/0x2c4 [ 4.866931][ T151] __device_attach_driver+0x228/0x2bc [ 4.867630][ T151] __device_attach_async_helper+0x154/0x21c [ 4.868398][ T151] async_run_entry_fn+0x5c/0x1c4 [ 4.869038][ T151] process_one_work+0x3ac/0x590 [ 4.869670][ T151] worker_thread+0x320/0x758 [ 4.870265][ T151] kthread+0x2e8/0x35c [ 4.870792][ T151] ret_from_fork+0x10/0x20 Link: https://lore.kernel.org/r/20211015074654.19615-1-miles.chen@mediatek.com Fixes: ed4246d37f3b ("scsi: sd: REQUEST SENSE for BLIST_IGN_MEDIA_CHANGE devices in runtime_resume()") Cc: Stanley Chu Reviewed-by: Martin Kepplinger Reviewed-by: Stanley Chu Signed-off-by: Miles Chen Signed-off-by: Martin K. Petersen --- drivers/scsi/sd.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c index 523bf2fdc253..fce63335084e 100644 --- a/drivers/scsi/sd.c +++ b/drivers/scsi/sd.c @@ -3683,7 +3683,12 @@ static int sd_resume(struct device *dev) static int sd_resume_runtime(struct device *dev) { struct scsi_disk *sdkp = dev_get_drvdata(dev); - struct scsi_device *sdp = sdkp->device; + struct scsi_device *sdp; + + if (!sdkp) /* E.g.: runtime resume at the start of sd_probe() */ + return 0; + + sdp = sdkp->device; if (sdp->ignore_media_change) { /* clear the device's sense data */ -- cgit v1.2.3 From 5370b0f49078203acf3c064b634a09707167a864 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Thu, 14 Oct 2021 13:20:22 -1000 Subject: blk-cgroup: blk_cgroup_bio_start() should use irq-safe operations on blkg->iostat_cpu c3df5fb57fe8 ("cgroup: rstat: fix A-A deadlock on 32bit around u64_stats_sync") made u64_stats updates irq-safe to avoid A-A deadlocks. Unfortunately, the conversion missed one in blk_cgroup_bio_start(). Fix it. Fixes: 2d146aa3aa84 ("mm: memcontrol: switch to rstat") Cc: stable@vger.kernel.org # v5.13+ Reported-by: syzbot+9738c8815b375ce482a1@syzkaller.appspotmail.com Signed-off-by: Tejun Heo Link: https://lore.kernel.org/r/YWi7NrQdVlxD6J9W@slm.duckdns.org Signed-off-by: Jens Axboe --- block/blk-cgroup.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c index 38b9f7684952..9a1c5839dd46 100644 --- a/block/blk-cgroup.c +++ b/block/blk-cgroup.c @@ -1897,10 +1897,11 @@ void blk_cgroup_bio_start(struct bio *bio) { int rwd = blk_cgroup_io_type(bio), cpu; struct blkg_iostat_set *bis; + unsigned long flags; cpu = get_cpu(); bis = per_cpu_ptr(bio->bi_blkg->iostat_cpu, cpu); - u64_stats_update_begin(&bis->sync); + flags = u64_stats_update_begin_irqsave(&bis->sync); /* * If the bio is flagged with BIO_CGROUP_ACCT it means this is a split @@ -1912,7 +1913,7 @@ void blk_cgroup_bio_start(struct bio *bio) } bis->cur.ios[rwd]++; - u64_stats_update_end(&bis->sync); + u64_stats_update_end_irqrestore(&bis->sync, flags); if (cgroup_subsys_on_dfl(io_cgrp_subsys)) cgroup_rstat_updated(bio->bi_blkg->blkcg->css.cgroup, cpu); put_cpu(); -- cgit v1.2.3 From 9fbfabfda25d8774c5a08634fdd2da000a924890 Mon Sep 17 00:00:00 2001 From: Zqiang Date: Mon, 18 Oct 2021 18:34:22 +0800 Subject: block: fix incorrect references to disk objects When adding partitions to the disk, the reference count of the disk object is increased. then alloc partition device and called device_add(), if the device_add() return error, the reference count of the disk object will be reduced twice, at put_device(pdev) and put_disk(disk). this leads to the end of the object's life cycle prematurely, and trigger following calltrace. __init_work+0x2d/0x50 kernel/workqueue.c:519 synchronize_rcu_expedited+0x3af/0x650 kernel/rcu/tree_exp.h:847 bdi_remove_from_list mm/backing-dev.c:938 [inline] bdi_unregister+0x17f/0x5c0 mm/backing-dev.c:946 release_bdi+0xa1/0xc0 mm/backing-dev.c:968 kref_put include/linux/kref.h:65 [inline] bdi_put+0x72/0xa0 mm/backing-dev.c:976 bdev_free_inode+0x11e/0x220 block/bdev.c:408 i_callback+0x3f/0x70 fs/inode.c:226 rcu_do_batch kernel/rcu/tree.c:2508 [inline] rcu_core+0x76d/0x16c0 kernel/rcu/tree.c:2743 __do_softirq+0x1d7/0x93b kernel/softirq.c:558 invoke_softirq kernel/softirq.c:432 [inline] __irq_exit_rcu kernel/softirq.c:636 [inline] irq_exit_rcu+0xf2/0x130 kernel/softirq.c:648 sysvec_apic_timer_interrupt+0x93/0xc0 making disk is NULL when calling put_disk(). Reported-by: Hao Sun Signed-off-by: Zqiang Reviewed-by: Christoph Hellwig Link: https://lore.kernel.org/r/20211018103422.2043-1-qiang.zhang1211@gmail.com Signed-off-by: Jens Axboe --- block/partitions/core.c | 1 + 1 file changed, 1 insertion(+) diff --git a/block/partitions/core.c b/block/partitions/core.c index 58c4c362c94f..7bea19dd9458 100644 --- a/block/partitions/core.c +++ b/block/partitions/core.c @@ -423,6 +423,7 @@ out_del: device_del(pdev); out_put: put_device(pdev); + return ERR_PTR(err); out_put_disk: put_disk(disk); return ERR_PTR(err); -- cgit v1.2.3 From 06634d5b6e923ed0d4772aba8def5a618f44c7fe Mon Sep 17 00:00:00 2001 From: Zheyu Ma Date: Mon, 18 Oct 2021 01:56:21 +0000 Subject: scsi: qla2xxx: Return -ENOMEM if kzalloc() fails The driver probing function should return < 0 for failure, otherwise kernel will treat value > 0 as success. Link: https://lore.kernel.org/r/1634522181-31166-1-git-send-email-zheyuma97@gmail.com Reviewed-by: Himanshu Madhani Signed-off-by: Zheyu Ma Signed-off-by: Martin K. Petersen --- drivers/scsi/qla2xxx/qla_os.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/scsi/qla2xxx/qla_os.c b/drivers/scsi/qla2xxx/qla_os.c index d2e40aaba734..836fedcea241 100644 --- a/drivers/scsi/qla2xxx/qla_os.c +++ b/drivers/scsi/qla2xxx/qla_os.c @@ -4157,7 +4157,7 @@ qla2x00_mem_alloc(struct qla_hw_data *ha, uint16_t req_len, uint16_t rsp_len, ql_dbg_pci(ql_dbg_init, ha->pdev, 0xe0ee, "%s: failed alloc dsd\n", __func__); - return 1; + return -ENOMEM; } ha->dif_bundle_kallocs++; -- cgit v1.2.3 From 7fb223d0ad801f633c78cbe42b1d1b55f5d163ad Mon Sep 17 00:00:00 2001 From: Joy Gu Date: Tue, 12 Oct 2021 12:18:33 -0700 Subject: scsi: qla2xxx: Fix a memory leak in an error path of qla2x00_process_els() Commit 8c0eb596baa5 ("[SCSI] qla2xxx: Fix a memory leak in an error path of qla2x00_process_els()"), intended to change: bsg_job->request->msgcode == FC_BSG_HST_ELS_NOLOGIN to: bsg_job->request->msgcode != FC_BSG_RPT_ELS but changed it to: bsg_job->request->msgcode == FC_BSG_RPT_ELS instead. Change the == to a != to avoid leaking the fcport structure or freeing unallocated memory. Link: https://lore.kernel.org/r/20211012191834.90306-2-jgu@purestorage.com Fixes: 8c0eb596baa5 ("[SCSI] qla2xxx: Fix a memory leak in an error path of qla2x00_process_els()") Reviewed-by: Bart Van Assche Signed-off-by: Joy Gu Signed-off-by: Martin K. Petersen --- drivers/scsi/qla2xxx/qla_bsg.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/scsi/qla2xxx/qla_bsg.c b/drivers/scsi/qla2xxx/qla_bsg.c index 4b5d28d89d69..655cf5de604b 100644 --- a/drivers/scsi/qla2xxx/qla_bsg.c +++ b/drivers/scsi/qla2xxx/qla_bsg.c @@ -431,7 +431,7 @@ done_unmap_sg: goto done_free_fcport; done_free_fcport: - if (bsg_request->msgcode == FC_BSG_RPT_ELS) + if (bsg_request->msgcode != FC_BSG_RPT_ELS) qla2x00_free_fcport(fcport); done: return rval; -- cgit v1.2.3 From 4a8f71014b4d56c4fb287607e844c0a9f68f46d9 Mon Sep 17 00:00:00 2001 From: Dmitry Bogdanov Date: Mon, 18 Oct 2021 15:26:50 +0300 Subject: scsi: qla2xxx: Fix unmap of already freed sgl The sgl is freed in the target stack in target_release_cmd_kref() before calling qlt_free_cmd() but there is an unmap of sgl in qlt_free_cmd() that causes a panic if sgl is not yet DMA unmapped: NIP dma_direct_unmap_sg+0xdc/0x180 LR dma_direct_unmap_sg+0xc8/0x180 Call Trace: ql_dbg_prefix+0x68/0xc0 [qla2xxx] (unreliable) dma_unmap_sg_attrs+0x54/0xf0 qlt_unmap_sg.part.19+0x54/0x1c0 [qla2xxx] qlt_free_cmd+0x124/0x1d0 [qla2xxx] tcm_qla2xxx_release_cmd+0x4c/0xa0 [tcm_qla2xxx] target_put_sess_cmd+0x198/0x370 [target_core_mod] transport_generic_free_cmd+0x6c/0x1b0 [target_core_mod] tcm_qla2xxx_complete_free+0x6c/0x90 [tcm_qla2xxx] The sgl may be left unmapped in error cases of response sending. For instance, qlt_rdy_to_xfer() maps sgl and exits when session is being deleted keeping the sgl mapped. This patch removes use-after-free of the sgl and ensures that the sgl is unmapped for any command that was not sent to firmware. Link: https://lore.kernel.org/r/20211018122650.11846-1-d.bogdanov@yadro.com Reviewed-by: Himanshu Madhani Signed-off-by: Dmitry Bogdanov Signed-off-by: Martin K. Petersen --- drivers/scsi/qla2xxx/qla_target.c | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) diff --git a/drivers/scsi/qla2xxx/qla_target.c b/drivers/scsi/qla2xxx/qla_target.c index b3478ed9b12e..7d8242c120fc 100644 --- a/drivers/scsi/qla2xxx/qla_target.c +++ b/drivers/scsi/qla2xxx/qla_target.c @@ -3319,8 +3319,7 @@ int qlt_xmit_response(struct qla_tgt_cmd *cmd, int xmit_type, "RESET-RSP online/active/old-count/new-count = %d/%d/%d/%d.\n", vha->flags.online, qla2x00_reset_active(vha), cmd->reset_count, qpair->chip_reset); - spin_unlock_irqrestore(qpair->qp_lock_ptr, flags); - return 0; + goto out_unmap_unlock; } /* Does F/W have an IOCBs for this request */ @@ -3445,10 +3444,6 @@ int qlt_rdy_to_xfer(struct qla_tgt_cmd *cmd) prm.sg = NULL; prm.req_cnt = 1; - /* Calculate number of entries and segments required */ - if (qlt_pci_map_calc_cnt(&prm) != 0) - return -EAGAIN; - if (!qpair->fw_started || (cmd->reset_count != qpair->chip_reset) || (cmd->sess && cmd->sess->deleted)) { /* @@ -3466,6 +3461,10 @@ int qlt_rdy_to_xfer(struct qla_tgt_cmd *cmd) return 0; } + /* Calculate number of entries and segments required */ + if (qlt_pci_map_calc_cnt(&prm) != 0) + return -EAGAIN; + spin_lock_irqsave(qpair->qp_lock_ptr, flags); /* Does F/W have an IOCBs for this request */ res = qlt_check_reserve_free_req(qpair, prm.req_cnt); @@ -3870,9 +3869,6 @@ void qlt_free_cmd(struct qla_tgt_cmd *cmd) BUG_ON(cmd->cmd_in_wq); - if (cmd->sg_mapped) - qlt_unmap_sg(cmd->vha, cmd); - if (!cmd->q_full) qlt_decr_num_pend_cmds(cmd->vha); -- cgit v1.2.3 From 4e5483b8440d01f6851a1388801088a6e0da0b56 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Mon, 18 Oct 2021 18:10:04 +0300 Subject: scsi: ufs: ufs-pci: Force a full restore after suspend-to-disk Implement the ->restore() PM operation and set the link to off, which will force a full reset and restore. This ensures that Host Performance Booster is reset after suspend-to-disk. The Host Performance Booster feature caches logical-to-physical mapping information in the host memory. After suspend-to-disk, such information is not valid, so a full reset and restore is needed. A full reset and restore is done if the SPM level is 5 or 6, but not for other SPM levels, so this change fixes those cases. A full reset and restore also restores base address registers, so that code is removed. Link: https://lore.kernel.org/r/20211018151004.284200-2-adrian.hunter@intel.com Reviewed-by: Avri Altman Signed-off-by: Adrian Hunter Signed-off-by: Martin K. Petersen --- drivers/scsi/ufs/ufshcd-pci.c | 33 ++++++++++++++++++--------------- 1 file changed, 18 insertions(+), 15 deletions(-) diff --git a/drivers/scsi/ufs/ufshcd-pci.c b/drivers/scsi/ufs/ufshcd-pci.c index 149c1aa09103..51424557810d 100644 --- a/drivers/scsi/ufs/ufshcd-pci.c +++ b/drivers/scsi/ufs/ufshcd-pci.c @@ -370,20 +370,6 @@ static void ufs_intel_common_exit(struct ufs_hba *hba) static int ufs_intel_resume(struct ufs_hba *hba, enum ufs_pm_op op) { - /* - * To support S4 (suspend-to-disk) with spm_lvl other than 5, the base - * address registers must be restored because the restore kernel can - * have used different addresses. - */ - ufshcd_writel(hba, lower_32_bits(hba->utrdl_dma_addr), - REG_UTP_TRANSFER_REQ_LIST_BASE_L); - ufshcd_writel(hba, upper_32_bits(hba->utrdl_dma_addr), - REG_UTP_TRANSFER_REQ_LIST_BASE_H); - ufshcd_writel(hba, lower_32_bits(hba->utmrdl_dma_addr), - REG_UTP_TASK_REQ_LIST_BASE_L); - ufshcd_writel(hba, upper_32_bits(hba->utmrdl_dma_addr), - REG_UTP_TASK_REQ_LIST_BASE_H); - if (ufshcd_is_link_hibern8(hba)) { int ret = ufshcd_uic_hibern8_exit(hba); @@ -463,6 +449,18 @@ static struct ufs_hba_variant_ops ufs_intel_lkf_hba_vops = { .device_reset = ufs_intel_device_reset, }; +#ifdef CONFIG_PM_SLEEP +static int ufshcd_pci_restore(struct device *dev) +{ + struct ufs_hba *hba = dev_get_drvdata(dev); + + /* Force a full reset and restore */ + ufshcd_set_link_off(hba); + + return ufshcd_system_resume(dev); +} +#endif + /** * ufshcd_pci_shutdown - main function to put the controller in reset state * @pdev: pointer to PCI device handle @@ -546,9 +544,14 @@ ufshcd_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) } static const struct dev_pm_ops ufshcd_pci_pm_ops = { - SET_SYSTEM_SLEEP_PM_OPS(ufshcd_system_suspend, ufshcd_system_resume) SET_RUNTIME_PM_OPS(ufshcd_runtime_suspend, ufshcd_runtime_resume, NULL) #ifdef CONFIG_PM_SLEEP + .suspend = ufshcd_system_suspend, + .resume = ufshcd_system_resume, + .freeze = ufshcd_system_suspend, + .thaw = ufshcd_system_resume, + .poweroff = ufshcd_system_suspend, + .restore = ufshcd_pci_restore, .prepare = ufshcd_suspend_prepare, .complete = ufshcd_resume_complete, #endif -- cgit v1.2.3 From 9d417cbe36eee7afdd85c2e871685f8dab7c2dba Mon Sep 17 00:00:00 2001 From: Nick Desaulniers Date: Wed, 8 Sep 2021 19:25:59 +0100 Subject: ARM: 9122/1: select HAVE_FUTEX_CMPXCHG tglx notes: This function [futex_detect_cmpxchg] is only needed when an architecture has to runtime discover whether the CPU supports it or not. ARM has unconditional support for this, so the obvious thing to do is the below. Fixes linkage failure from Clang randconfigs: kernel/futex.o:(.text.fixup+0x5c): relocation truncated to fit: R_ARM_JUMP24 against `.init.text' and boot failures for CONFIG_THUMB2_KERNEL. Link: https://github.com/ClangBuiltLinux/linux/issues/325 Comments from Nick Desaulniers: See-also: 03b8c7b623c8 ("futex: Allow architectures to skip futex_atomic_cmpxchg_inatomic() test") Reported-by: Arnd Bergmann Reported-by: Nathan Chancellor Suggested-by: Thomas Gleixner Signed-off-by: Nick Desaulniers Reviewed-by: Thomas Gleixner Tested-by: Nathan Chancellor Reviewed-by: Linus Walleij Cc: stable@vger.kernel.org # v3.14+ Reviewed-by: Arnd Bergmann Signed-off-by: Russell King (Oracle) --- arch/arm/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index fc196421b2ce..b760dd45b734 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -92,6 +92,7 @@ config ARM select HAVE_FTRACE_MCOUNT_RECORD if !XIP_KERNEL select HAVE_FUNCTION_GRAPH_TRACER if !THUMB2_KERNEL && !CC_IS_CLANG select HAVE_FUNCTION_TRACER if !XIP_KERNEL + select HAVE_FUTEX_CMPXCHG if FUTEX select HAVE_GCC_PLUGINS select HAVE_HW_BREAKPOINT if PERF_EVENTS && (CPU_V6 || CPU_V6K || CPU_V7) select HAVE_IRQ_TIME_ACCOUNTING -- cgit v1.2.3 From 00d43d13da6c3424101aebabefab9e97e6318bee Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Tue, 21 Sep 2021 15:31:23 +0100 Subject: ARM: 9125/1: fix incorrect use of get_kernel_nofault() Commit 344179fc7ef4 ("ARM: 9106/1: traps: use get_kernel_nofault instead of set_fs()") replaced an occurrence of __get_user() with get_kernel_nofault(), but inverted the sense of the conditional in the process, resulting in no values to be printed at all. I.e., every exception stack now looks like this: Exception stack(0xc18d1fb0 to 0xc18d1ff8) 1fa0: ???????? ???????? ???????? ???????? 1fc0: ???????? ???????? ???????? ???????? ???????? ???????? ???????? ???????? 1fe0: ???????? ???????? ???????? ???????? ???????? ???????? which is rather unhelpful. Fixes: 344179fc7ef4 ("ARM: 9106/1: traps: use get_kernel_nofault instead of set_fs()") Signed-off-by: Ard Biesheuvel Reviewed-by: Arnd Bergmann Signed-off-by: Russell King (Oracle) --- arch/arm/kernel/traps.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/kernel/traps.c b/arch/arm/kernel/traps.c index 4a7edc6e848f..195dff58bafc 100644 --- a/arch/arm/kernel/traps.c +++ b/arch/arm/kernel/traps.c @@ -136,7 +136,7 @@ static void dump_mem(const char *lvl, const char *str, unsigned long bottom, for (p = first, i = 0; i < 8 && p < top; i++, p += 4) { if (p >= bottom && p < top) { unsigned long val; - if (get_kernel_nofault(val, (unsigned long *)p)) + if (!get_kernel_nofault(val, (unsigned long *)p)) sprintf(str + i * 9, " %08lx", val); else sprintf(str + i * 9, " ????????"); -- cgit v1.2.3 From df909df0770779f1a5560c2bb641a2809655ef28 Mon Sep 17 00:00:00 2001 From: Lexi Shao Date: Thu, 23 Sep 2021 03:41:25 +0100 Subject: ARM: 9132/1: Fix __get_user_check failure with ARM KASAN images ARM: kasan: Fix __get_user_check failure with kasan In macro __get_user_check defined in arch/arm/include/asm/uaccess.h, error code is store in register int __e(r0). When kasan is enabled, assigning value to kernel address might trigger kasan check, which unexpectedly overwrites r0 and causes undefined behavior on arm kasan images. One example is failure in do_futex and results in process soft lockup. Log: watchdog: BUG: soft lockup - CPU#0 stuck for 62946ms! [rs:main Q:Reg:1151] ... (__asan_store4) from (futex_wait_setup+0xf8/0x2b4) (futex_wait_setup) from (futex_wait+0x138/0x394) (futex_wait) from (do_futex+0x164/0xe40) (do_futex) from (sys_futex_time32+0x178/0x230) (sys_futex_time32) from (ret_fast_syscall+0x0/0x50) The soft lockup happens in function futex_wait_setup. The reason is function get_futex_value_locked always return EINVAL, thus pc jump back to retry label and causes looping. This line in function get_futex_value_locked ret = __get_user(*dest, from); is expanded to *dest = (typeof(*(p))) __r2; , in macro __get_user_check. Writing to pointer dest triggers kasan check and overwrites the return value of __get_user_x function. The assembly code of get_futex_value_locked in kernel/futex.c: ... c01f6dc8: eb0b020e bl c04b7608 <__get_user_4> // "x = (typeof(*(p))) __r2;" triggers kasan check and r0 is overwritten c01f6dCc: e1a00007 mov r0, r7 c01f6dd0: e1a05002 mov r5, r2 c01f6dd4: eb04f1e6 bl c0333574 <__asan_store4> c01f6dd8: e5875000 str r5, [r7] // save ret value of __get_user(*dest, from), which is dest address now c01f6ddc: e1a05000 mov r5, r0 ... // checking return value of __get_user failed c01f6e00: e3550000 cmp r5, #0 ... c01f6e0c: 01a00005 moveq r0, r5 // assign return value to EINVAL c01f6e10: 13e0000d mvnne r0, #13 Return value is the destination address of get_user thus certainly non-zero, so get_futex_value_locked always return EINVAL. Fix it by using a tmp vairable to store the error code before the assignment. This fix has no effects to non-kasan images thanks to compiler optimization. It only affects cases that overwrite r0 due to kasan check. This should fix bug discussed in Link: [1] https://lore.kernel.org/linux-arm-kernel/0ef7c2a5-5d8b-c5e0-63fa-31693fd4495c@gmail.com/ Fixes: 421015713b30 ("ARM: 9017/2: Enable KASan for ARM") Signed-off-by: Lexi Shao Signed-off-by: Russell King (Oracle) --- arch/arm/include/asm/uaccess.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/arch/arm/include/asm/uaccess.h b/arch/arm/include/asm/uaccess.h index 084d1c07c2d0..36fbc3329252 100644 --- a/arch/arm/include/asm/uaccess.h +++ b/arch/arm/include/asm/uaccess.h @@ -176,6 +176,7 @@ extern int __get_user_64t_4(void *); register unsigned long __l asm("r1") = __limit; \ register int __e asm("r0"); \ unsigned int __ua_flags = uaccess_save_and_enable(); \ + int __tmp_e; \ switch (sizeof(*(__p))) { \ case 1: \ if (sizeof((x)) >= 8) \ @@ -203,9 +204,10 @@ extern int __get_user_64t_4(void *); break; \ default: __e = __get_user_bad(); break; \ } \ + __tmp_e = __e; \ uaccess_restore(__ua_flags); \ x = (typeof(*(p))) __r2; \ - __e; \ + __tmp_e; \ }) #define get_user(x, p) \ -- cgit v1.2.3 From e6a0c958bdf9b2e1b57501fc9433a461f0a6aadd Mon Sep 17 00:00:00 2001 From: Nick Desaulniers Date: Mon, 4 Oct 2021 18:03:28 +0100 Subject: ARM: 9133/1: mm: proc-macros: ensure *_tlb_fns are 4B aligned A kernel built with CONFIG_THUMB2_KERNEL=y and using clang as the assembler could generate non-naturally-aligned v7wbi_tlb_fns which results in a boot failure. The original commit adding the macro missed the .align directive on this data. Link: https://github.com/ClangBuiltLinux/linux/issues/1447 Link: https://lore.kernel.org/all/0699da7b-354f-aecc-a62f-e25693209af4@linaro.org/ Debugged-by: Ard Biesheuvel Debugged-by: Nathan Chancellor Debugged-by: Richard Henderson Fixes: 66a625a88174 ("ARM: mm: proc-macros: Add generic proc/cache/tlb struct definition macros") Suggested-by: Ard Biesheuvel Acked-by: Ard Biesheuvel Signed-off-by: Nick Desaulniers Tested-by: Nathan Chancellor Signed-off-by: Russell King (Oracle) --- arch/arm/mm/proc-macros.S | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm/mm/proc-macros.S b/arch/arm/mm/proc-macros.S index e2c743aa2eb2..d9f7dfe2a7ed 100644 --- a/arch/arm/mm/proc-macros.S +++ b/arch/arm/mm/proc-macros.S @@ -340,6 +340,7 @@ ENTRY(\name\()_cache_fns) .macro define_tlb_functions name:req, flags_up:req, flags_smp .type \name\()_tlb_fns, #object + .align 2 ENTRY(\name\()_tlb_fns) .long \name\()_flush_user_tlb_range .long \name\()_flush_kern_tlb_range -- cgit v1.2.3 From eaf6cc7165c9c5aa3c2f9faa03a98598123d0afb Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Mon, 18 Oct 2021 15:30:04 +0100 Subject: ARM: 9134/1: remove duplicate memcpy() definition Both the decompressor code and the kasan logic try to override the memcpy() and memmove() definitions, which leading to a clash in a KASAN-enabled kernel with XZ decompression: arch/arm/boot/compressed/decompress.c:50:9: error: 'memmove' macro redefined [-Werror,-Wmacro-redefined] #define memmove memmove ^ arch/arm/include/asm/string.h:59:9: note: previous definition is here #define memmove(dst, src, len) __memmove(dst, src, len) ^ arch/arm/boot/compressed/decompress.c:51:9: error: 'memcpy' macro redefined [-Werror,-Wmacro-redefined] #define memcpy memcpy ^ arch/arm/include/asm/string.h:58:9: note: previous definition is here #define memcpy(dst, src, len) __memcpy(dst, src, len) ^ Here we want the set of functions from the decompressor, so undefine the other macros before the override. Link: https://lore.kernel.org/linux-arm-kernel/CACRpkdZYJogU_SN3H9oeVq=zJkRgRT1gDz3xp59gdqWXxw-B=w@mail.gmail.com/ Link: https://lore.kernel.org/lkml/202105091112.F5rmd4By-lkp@intel.com/ Fixes: d6d51a96c7d6 ("ARM: 9014/2: Replace string mem* functions for KASan") Fixes: a7f464f3db93 ("ARM: 7001/2: Wire up support for the XZ decompressor") Reported-by: kernel test robot Reviewed-by: Linus Walleij Signed-off-by: Arnd Bergmann Signed-off-by: Russell King (Oracle) --- arch/arm/boot/compressed/decompress.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/arm/boot/compressed/decompress.c b/arch/arm/boot/compressed/decompress.c index aa075d8372ea..74255e819831 100644 --- a/arch/arm/boot/compressed/decompress.c +++ b/arch/arm/boot/compressed/decompress.c @@ -47,7 +47,10 @@ extern char * strchrnul(const char *, int); #endif #ifdef CONFIG_KERNEL_XZ +/* Prevent KASAN override of string helpers in decompressor */ +#undef memmove #define memmove memmove +#undef memcpy #define memcpy memcpy #include "../../../../lib/decompress_unxz.c" #endif -- cgit v1.2.3 From 44cc6412e66b2b84544eaf2e14cf1764301e2a80 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Mon, 18 Oct 2021 15:30:08 +0100 Subject: ARM: 9138/1: fix link warning with XIP + frame-pointer When frame pointers are used instead of the ARM unwinder, and the kernel is built using clang with an external assembler and CONFIG_XIP_KERNEL, every file produces two warnings like: arm-linux-gnueabi-ld: warning: orphan section `.ARM.extab' from `net/mac802154/util.o' being placed in section `.ARM.extab' arm-linux-gnueabi-ld: warning: orphan section `.ARM.exidx' from `net/mac802154/util.o' being placed in section `.ARM.exidx' The same fix was already merged for the normal (non-XIP) linker script, with a longer description. Fixes: c39866f268f8 ("arm/build: Always handle .ARM.exidx and .ARM.extab sections") Reviewed-by: Kees Cook Signed-off-by: Arnd Bergmann Signed-off-by: Russell King (Oracle) --- arch/arm/kernel/vmlinux-xip.lds.S | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/arch/arm/kernel/vmlinux-xip.lds.S b/arch/arm/kernel/vmlinux-xip.lds.S index 50136828f5b5..e0c00986487f 100644 --- a/arch/arm/kernel/vmlinux-xip.lds.S +++ b/arch/arm/kernel/vmlinux-xip.lds.S @@ -40,6 +40,10 @@ SECTIONS ARM_DISCARD *(.alt.smp.init) *(.pv_table) +#ifndef CONFIG_ARM_UNWIND + *(.ARM.exidx) *(.ARM.exidx.*) + *(.ARM.extab) *(.ARM.extab.*) +#endif } . = XIP_VIRT_ADDR(CONFIG_XIP_PHYS_ADDR); -- cgit v1.2.3 From 1f323127cab086e4fd618981b1e5edc396eaf0f4 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Mon, 18 Oct 2021 15:30:09 +0100 Subject: ARM: 9139/1: kprobes: fix arch_init_kprobes() prototype With extra warnings enabled, gcc complains about this function definition: arch/arm/probes/kprobes/core.c: In function 'arch_init_kprobes': arch/arm/probes/kprobes/core.c:465:12: warning: old-style function definition [-Wold-style-definition] 465 | int __init arch_init_kprobes() Link: https://lore.kernel.org/all/20201027093057.c685a14b386acacb3c449e3d@kernel.org/ Fixes: 24ba613c9d6c ("ARM kprobes: core code") Acked-by: Masami Hiramatsu Signed-off-by: Arnd Bergmann Signed-off-by: Russell King (Oracle) --- arch/arm/probes/kprobes/core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/probes/kprobes/core.c b/arch/arm/probes/kprobes/core.c index 27e0af78e88b..9d8634e2f12f 100644 --- a/arch/arm/probes/kprobes/core.c +++ b/arch/arm/probes/kprobes/core.c @@ -439,7 +439,7 @@ static struct undef_hook kprobes_arm_break_hook = { #endif /* !CONFIG_THUMB2_KERNEL */ -int __init arch_init_kprobes() +int __init arch_init_kprobes(void) { arm_probes_decode_init(); #ifdef CONFIG_THUMB2_KERNEL -- cgit v1.2.3 From 48ccc8edf5b90622cdc4f8878e0042ab5883e2ca Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Mon, 18 Oct 2021 15:30:37 +0100 Subject: ARM: 9141/1: only warn about XIP address when not compile testing In randconfig builds, we sometimes come across this warning: arm-linux-gnueabi-ld: XIP start address may cause MPU programming issues While this is helpful for actual systems to figure out why it fails, the warning does not provide any benefit for build testing, so guard it in a check for CONFIG_COMPILE_TEST, which is usually set on randconfig builds. Fixes: 216218308cfb ("ARM: 8713/1: NOMMU: Support MPU in XIP configuration") Signed-off-by: Arnd Bergmann Signed-off-by: Russell King (Oracle) --- arch/arm/kernel/vmlinux-xip.lds.S | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/kernel/vmlinux-xip.lds.S b/arch/arm/kernel/vmlinux-xip.lds.S index e0c00986487f..f14c2360ea0b 100644 --- a/arch/arm/kernel/vmlinux-xip.lds.S +++ b/arch/arm/kernel/vmlinux-xip.lds.S @@ -176,7 +176,7 @@ ASSERT((__arch_info_end - __arch_info_begin), "no machine record defined") ASSERT((_end - __bss_start) >= 12288, ".bss too small for CONFIG_XIP_DEFLATED_DATA") #endif -#ifdef CONFIG_ARM_MPU +#if defined(CONFIG_ARM_MPU) && !defined(CONFIG_COMPILE_TEST) /* * Due to PMSAv7 restriction on base address and size we have to * enforce minimal alignment restrictions. It was seen that weaker -- cgit v1.2.3 From 63acd42c0d4942f74710b11c38602fb14dea7320 Mon Sep 17 00:00:00 2001 From: Woody Lin Date: Tue, 12 Oct 2021 16:35:21 +0800 Subject: sched/scs: Reset the shadow stack when idle_task_exit Commit f1a0a376ca0c ("sched/core: Initialize the idle task with preemption disabled") removed the init_idle() call from idle_thread_get(). This was the sole call-path on hotplug that resets the Shadow Call Stack (scs) Stack Pointer (sp). Not resetting the scs-sp leads to scs overflow after enough hotplug cycles. Therefore add an explicit scs_task_reset() to the hotplug code to make sure the scs-sp does get reset on hotplug. Fixes: f1a0a376ca0c ("sched/core: Initialize the idle task with preemption disabled") Signed-off-by: Woody Lin [peterz: Changelog] Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Valentin Schneider Link: https://lore.kernel.org/r/20211012083521.973587-1-woodylin@google.com --- kernel/sched/core.c | 1 + 1 file changed, 1 insertion(+) diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 1bba4128a3e6..f21714ea3db8 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -8795,6 +8795,7 @@ void idle_task_exit(void) finish_arch_post_lock_switch(); } + scs_task_reset(current); /* finish_cpu(), as ran on the BP, will clean up the active_mm state */ } -- cgit v1.2.3 From bc28368596436e6e81ffc48c815b8225d96121c0 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Fri, 15 Oct 2021 19:12:21 +0200 Subject: ACPI: PM: Do not turn off power resources in unknown state Commit 6381195ad7d0 ("ACPI: power: Rework turning off unused power resources") caused power resources in unknown state with reference counters equal to zero to be turned off too, but that caused issues to appear in the field, so modify the code to only turn off power resources that are known to be "on". Link: https://lore.kernel.org/linux-acpi/6faf4b92-78d5-47a4-63df-cc2bab7769d0@molgen.mpg.de/ Fixes: 6381195ad7d0 ("ACPI: power: Rework turning off unused power resources") Reported-by: Andreas K. Huettel Tested-by: Andreas K. Huettel Signed-off-by: Rafael J. Wysocki Cc: 5.14+ # 5.14+ --- drivers/acpi/power.c | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/drivers/acpi/power.c b/drivers/acpi/power.c index b9863e22b952..f0ed4414edb1 100644 --- a/drivers/acpi/power.c +++ b/drivers/acpi/power.c @@ -1035,13 +1035,8 @@ void acpi_turn_off_unused_power_resources(void) list_for_each_entry_reverse(resource, &acpi_power_resource_list, list_node) { mutex_lock(&resource->resource_lock); - /* - * Turn off power resources in an unknown state too, because the - * platform firmware on some system expects the OS to turn off - * power resources without any users unconditionally. - */ if (!resource->ref_count && - resource->state != ACPI_POWER_RESOURCE_STATE_OFF) { + resource->state == ACPI_POWER_RESOURCE_STATE_ON) { acpi_handle_debug(resource->device.handle, "Turning OFF\n"); __acpi_power_off(resource); } -- cgit v1.2.3 From bc369921d6708542eb93da33478762f1162a5805 Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Tue, 19 Oct 2021 20:31:26 +0100 Subject: io-wq: max_worker fixes First, fix nr_workers checks against max_workers, with max_worker registration, it may pretty easily happen that nr_workers > max_workers. Also, synchronise writing to acct->max_worker with wqe->lock. It's not an actual problem, but as we don't care about io_wqe_create_worker(), it's better than WRITE_ONCE()/READ_ONCE(). Fixes: 2e480058ddc2 ("io-wq: provide a way to limit max number of workers") Reported-by: Beld Zhang Signed-off-by: Pavel Begunkov Link: https://lore.kernel.org/r/11f90e6b49410b7d1a88f5d04fb8d95bb86b8cf3.1634671835.git.asml.silence@gmail.com Signed-off-by: Jens Axboe --- fs/io-wq.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/fs/io-wq.c b/fs/io-wq.c index 5bf8aa81715e..422a7ed6a9bd 100644 --- a/fs/io-wq.c +++ b/fs/io-wq.c @@ -253,7 +253,7 @@ static bool io_wqe_create_worker(struct io_wqe *wqe, struct io_wqe_acct *acct) pr_warn_once("io-wq is not configured for unbound workers"); raw_spin_lock(&wqe->lock); - if (acct->nr_workers == acct->max_workers) { + if (acct->nr_workers >= acct->max_workers) { raw_spin_unlock(&wqe->lock); return true; } @@ -1291,15 +1291,18 @@ int io_wq_max_workers(struct io_wq *wq, int *new_count) rcu_read_lock(); for_each_node(node) { + struct io_wqe *wqe = wq->wqes[node]; struct io_wqe_acct *acct; + raw_spin_lock(&wqe->lock); for (i = 0; i < IO_WQ_ACCT_NR; i++) { - acct = &wq->wqes[node]->acct[i]; + acct = &wqe->acct[i]; prev = max_t(int, acct->max_workers, prev); if (new_count[i]) acct->max_workers = new_count[i]; new_count[i] = prev; } + raw_spin_unlock(&wqe->lock); } rcu_read_unlock(); return 0; -- cgit v1.2.3 From 60fab1076636493cfa2686e712446595fe43bc16 Mon Sep 17 00:00:00 2001 From: Prabhakar Kushwaha Date: Tue, 19 Oct 2021 11:22:12 +0300 Subject: rdma/qedr: Fix crash due to redundant release of device's qp memory Device's QP memory should only be allocated and released by IB layer. This patch removes the redundant release of the device's qp memory and uses completion APIs to make sure that .destroy_qp() only return, when qp reference becomes 0. Fixes: 514aee660df4 ("RDMA: Globally allocate and release QP memory") Link: https://lore.kernel.org/r/20211019082212.7052-1-pkushwaha@marvell.com Acked-by: Michal Kalderon Signed-off-by: Ariel Elior Signed-off-by: Shai Malin Signed-off-by: Alok Prasad Signed-off-by: Prabhakar Kushwaha Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/qedr/qedr.h | 1 + drivers/infiniband/hw/qedr/qedr_iw_cm.c | 2 +- drivers/infiniband/hw/qedr/verbs.c | 5 ++++- 3 files changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/hw/qedr/qedr.h b/drivers/infiniband/hw/qedr/qedr.h index 3cb4febaad0f..8def88cfa300 100644 --- a/drivers/infiniband/hw/qedr/qedr.h +++ b/drivers/infiniband/hw/qedr/qedr.h @@ -455,6 +455,7 @@ struct qedr_qp { /* synchronization objects used with iwarp ep */ struct kref refcnt; struct completion iwarp_cm_comp; + struct completion qp_rel_comp; unsigned long iwarp_cm_flags; /* enum iwarp_cm_flags */ }; diff --git a/drivers/infiniband/hw/qedr/qedr_iw_cm.c b/drivers/infiniband/hw/qedr/qedr_iw_cm.c index 1715fbe0719d..a51fc6854984 100644 --- a/drivers/infiniband/hw/qedr/qedr_iw_cm.c +++ b/drivers/infiniband/hw/qedr/qedr_iw_cm.c @@ -83,7 +83,7 @@ static void qedr_iw_free_qp(struct kref *ref) { struct qedr_qp *qp = container_of(ref, struct qedr_qp, refcnt); - kfree(qp); + complete(&qp->qp_rel_comp); } static void diff --git a/drivers/infiniband/hw/qedr/verbs.c b/drivers/infiniband/hw/qedr/verbs.c index 3fbf172dbbef..dcb3653db72d 100644 --- a/drivers/infiniband/hw/qedr/verbs.c +++ b/drivers/infiniband/hw/qedr/verbs.c @@ -1357,6 +1357,7 @@ static void qedr_set_common_qp_params(struct qedr_dev *dev, if (rdma_protocol_iwarp(&dev->ibdev, 1)) { kref_init(&qp->refcnt); init_completion(&qp->iwarp_cm_comp); + init_completion(&qp->qp_rel_comp); } qp->pd = pd; @@ -2857,8 +2858,10 @@ int qedr_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata) qedr_free_qp_resources(dev, qp, udata); - if (rdma_protocol_iwarp(&dev->ibdev, 1)) + if (rdma_protocol_iwarp(&dev->ibdev, 1)) { qedr_iw_qp_rem_ref(&qp->ibqp); + wait_for_completion(&qp->qp_rel_comp); + } return 0; } -- cgit v1.2.3 From 5508546631a0f555d7088203dec2614e41b5106e Mon Sep 17 00:00:00 2001 From: Aharon Landau Date: Tue, 19 Oct 2021 14:30:10 +0300 Subject: RDMA/mlx5: Initialize the ODP xarray when creating an ODP MR Normally the zero fill would hide the missing initialization, but an errant set to desc_size in reg_create() causes a crash: BUG: unable to handle page fault for address: 0000000800000000 PGD 0 P4D 0 Oops: 0000 [#1] SMP PTI CPU: 5 PID: 890 Comm: ib_write_bw Not tainted 5.15.0-rc4+ #47 Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS rel-1.13.0-0-gf21b5a4aeb02-prebuilt.qemu.org 04/01/2014 RIP: 0010:mlx5_ib_dereg_mr+0x14/0x3b0 [mlx5_ib] Code: 48 63 cd 4c 89 f7 48 89 0c 24 e8 37 30 03 e1 48 8b 0c 24 eb a0 90 0f 1f 44 00 00 41 56 41 55 41 54 55 53 48 89 fb 48 83 ec 30 <48> 8b 2f 65 48 8b 04 25 28 00 00 00 48 89 44 24 28 31 c0 8b 87 c8 RSP: 0018:ffff88811afa3a60 EFLAGS: 00010286 RAX: 000000000000001c RBX: 0000000800000000 RCX: 0000000000000000 RDX: 0000000000000000 RSI: 0000000000000000 RDI: 0000000800000000 RBP: 0000000800000000 R08: 0000000000000000 R09: c0000000fffff7ff R10: ffff88811afa38f8 R11: ffff88811afa38f0 R12: ffffffffa02c7ac0 R13: 0000000000000000 R14: ffff88811afa3cd8 R15: ffff88810772fa00 FS: 00007f47b9080740(0000) GS:ffff88852cd40000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000800000000 CR3: 000000010761e003 CR4: 0000000000370ea0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: mlx5_ib_free_odp_mr+0x95/0xc0 [mlx5_ib] mlx5_ib_dereg_mr+0x128/0x3b0 [mlx5_ib] ib_dereg_mr_user+0x45/0xb0 [ib_core] ? xas_load+0x8/0x80 destroy_hw_idr_uobject+0x1a/0x50 [ib_uverbs] uverbs_destroy_uobject+0x2f/0x150 [ib_uverbs] uobj_destroy+0x3c/0x70 [ib_uverbs] ib_uverbs_cmd_verbs+0x467/0xb00 [ib_uverbs] ? uverbs_finalize_object+0x60/0x60 [ib_uverbs] ? ttwu_queue_wakelist+0xa9/0xe0 ? pty_write+0x85/0x90 ? file_tty_write.isra.33+0x214/0x330 ? process_echoes+0x60/0x60 ib_uverbs_ioctl+0xa7/0x110 [ib_uverbs] __x64_sys_ioctl+0x10d/0x8e0 ? vfs_write+0x17f/0x260 do_syscall_64+0x3c/0x80 entry_SYSCALL_64_after_hwframe+0x44/0xae Add the missing xarray initialization and remove the desc_size set. Fixes: a639e66703ee ("RDMA/mlx5: Zero out ODP related items in the mlx5_ib_mr") Link: https://lore.kernel.org/r/a4846a11c9de834663e521770da895007f9f0d30.1634642730.git.leonro@nvidia.com Signed-off-by: Aharon Landau Reviewed-by: Maor Gottlieb Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/mlx5/mr.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c index 3be36ebbf67a..22e2f4d79743 100644 --- a/drivers/infiniband/hw/mlx5/mr.c +++ b/drivers/infiniband/hw/mlx5/mr.c @@ -1339,7 +1339,6 @@ static struct mlx5_ib_mr *reg_create(struct ib_pd *pd, struct ib_umem *umem, goto err_2; } mr->mmkey.type = MLX5_MKEY_MR; - mr->desc_size = sizeof(struct mlx5_mtt); mr->umem = umem; set_mr_fields(dev, mr, umem->length, access_flags); kvfree(in); @@ -1533,6 +1532,7 @@ static struct ib_mr *create_user_odp_mr(struct ib_pd *pd, u64 start, u64 length, ib_umem_release(&odp->umem); return ERR_CAST(mr); } + xa_init(&mr->implicit_children); odp->private = mr; err = mlx5r_store_odp_mkey(dev, &mr->mmkey); -- cgit v1.2.3 From cc07b73ef11d11d4359fb104d0199b22451dd3d8 Mon Sep 17 00:00:00 2001 From: Mustafa Ismail Date: Tue, 19 Oct 2021 10:16:53 -0500 Subject: RDMA/irdma: Set VLAN in UD work completion correctly Currently VLAN is reported in UD work completion when VLAN id is zero, i.e. no VLAN case. Report VLAN in UD work completion only when VLAN id is non-zero. Fixes: b48c24c2d710 ("RDMA/irdma: Implement device supported verb APIs") Link: https://lore.kernel.org/r/20211019151654.1943-1-shiraz.saleem@intel.com Signed-off-by: Mustafa Ismail Signed-off-by: Shiraz Saleem Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/irdma/verbs.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/hw/irdma/verbs.c b/drivers/infiniband/hw/irdma/verbs.c index 7110ebf834f9..102dc9342f2a 100644 --- a/drivers/infiniband/hw/irdma/verbs.c +++ b/drivers/infiniband/hw/irdma/verbs.c @@ -3399,9 +3399,13 @@ static void irdma_process_cqe(struct ib_wc *entry, } if (cq_poll_info->ud_vlan_valid) { - entry->vlan_id = cq_poll_info->ud_vlan & VLAN_VID_MASK; - entry->wc_flags |= IB_WC_WITH_VLAN; + u16 vlan = cq_poll_info->ud_vlan & VLAN_VID_MASK; + entry->sl = cq_poll_info->ud_vlan >> VLAN_PRIO_SHIFT; + if (vlan) { + entry->vlan_id = vlan; + entry->wc_flags |= IB_WC_WITH_VLAN; + } } else { entry->sl = 0; } -- cgit v1.2.3 From 2dace185caa580720c7cd67fec9efc5ee26108ac Mon Sep 17 00:00:00 2001 From: Mustafa Ismail Date: Tue, 19 Oct 2021 10:16:54 -0500 Subject: RDMA/irdma: Do not hold qos mutex twice on QP resume When irdma_ws_add fails, irdma_ws_remove is used to cleanup the leaf node. This lead to holding the qos mutex twice in the QP resume path. Fix this by avoiding the call to irdma_ws_remove and unwinding the error in irdma_ws_add. This skips the call to irdma_tc_in_use function which is not needed in the error unwind cases. Fixes: 3ae331c75128 ("RDMA/irdma: Add QoS definitions") Link: https://lore.kernel.org/r/20211019151654.1943-2-shiraz.saleem@intel.com Signed-off-by: Mustafa Ismail Signed-off-by: Shiraz Saleem Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/irdma/ws.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/drivers/infiniband/hw/irdma/ws.c b/drivers/infiniband/hw/irdma/ws.c index b68c575eb78e..b0d6ee0739f5 100644 --- a/drivers/infiniband/hw/irdma/ws.c +++ b/drivers/infiniband/hw/irdma/ws.c @@ -330,8 +330,10 @@ enum irdma_status_code irdma_ws_add(struct irdma_sc_vsi *vsi, u8 user_pri) tc_node->enable = true; ret = irdma_ws_cqp_cmd(vsi, tc_node, IRDMA_OP_WS_MODIFY_NODE); - if (ret) + if (ret) { + vsi->unregister_qset(vsi, tc_node); goto reg_err; + } } ibdev_dbg(to_ibdev(vsi->dev), "WS: Using node %d which represents VSI %d TC %d\n", @@ -350,6 +352,10 @@ enum irdma_status_code irdma_ws_add(struct irdma_sc_vsi *vsi, u8 user_pri) } goto exit; +reg_err: + irdma_ws_cqp_cmd(vsi, tc_node, IRDMA_OP_WS_DELETE_NODE); + list_del(&tc_node->siblings); + irdma_free_node(vsi, tc_node); leaf_add_err: if (list_empty(&vsi_node->child_list_head)) { if (irdma_ws_cqp_cmd(vsi, vsi_node, IRDMA_OP_WS_DELETE_NODE)) @@ -369,11 +375,6 @@ vsi_add_err: exit: mutex_unlock(&vsi->dev->ws_mutex); return ret; - -reg_err: - mutex_unlock(&vsi->dev->ws_mutex); - irdma_ws_remove(vsi, user_pri); - return ret; } /** -- cgit v1.2.3 From f09f6dfef8ce7b70a240cf83811e2b1909c3e47b Mon Sep 17 00:00:00 2001 From: Russ Weight Date: Mon, 18 Oct 2021 17:24:01 -0700 Subject: spi: altera: Change to dynamic allocation of spi id The spi-altera driver has two flavors: platform and dfl. I'm seeing a case where I have both device types in the same machine, and they are conflicting on the SPI ID: ... kernel: couldn't get idr ... kernel: WARNING: CPU: 28 PID: 912 at drivers/spi/spi.c:2920 spi_register_controller.cold+0x84/0xc0a Both the platform and dfl drivers use the parent's driver ID as the SPI ID. In the error case, the parent devices are dfl_dev.4 and subdev_spi_altera.4.auto. When the second spi-master is created, the failure occurs because the SPI ID of 4 has already been allocated. Change the ID allocation to dynamic (by initializing bus_num to -1) to avoid duplicate SPI IDs. Signed-off-by: Russ Weight Link: https://lore.kernel.org/r/20211019002401.24041-1-russell.h.weight@intel.com Signed-off-by: Mark Brown --- drivers/spi/spi-altera-dfl.c | 2 +- drivers/spi/spi-altera-platform.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/spi/spi-altera-dfl.c b/drivers/spi/spi-altera-dfl.c index 44fc9ee13fc7..ca40923258af 100644 --- a/drivers/spi/spi-altera-dfl.c +++ b/drivers/spi/spi-altera-dfl.c @@ -134,7 +134,7 @@ static int dfl_spi_altera_probe(struct dfl_device *dfl_dev) if (!master) return -ENOMEM; - master->bus_num = dfl_dev->id; + master->bus_num = -1; hw = spi_master_get_devdata(master); diff --git a/drivers/spi/spi-altera-platform.c b/drivers/spi/spi-altera-platform.c index f7a7c14e3679..65147aae82a1 100644 --- a/drivers/spi/spi-altera-platform.c +++ b/drivers/spi/spi-altera-platform.c @@ -48,7 +48,7 @@ static int altera_spi_probe(struct platform_device *pdev) return err; /* setup the master state. */ - master->bus_num = pdev->id; + master->bus_num = -1; if (pdata) { if (pdata->num_chipselect > ALTERA_SPI_MAX_CS) { -- cgit v1.2.3 From 34061d6b76a41b1e43c19e1e50d98e5d77f77d4e Mon Sep 17 00:00:00 2001 From: Hyunchul Lee Date: Sat, 16 Oct 2021 08:39:54 +0900 Subject: ksmbd: validate OutputBufferLength of QUERY_DIR, QUERY_INFO, IOCTL requests Validate OutputBufferLength of QUERY_DIR, QUERY_INFO, IOCTL requests and check the free size of response buffer for these requests. Acked-by: Namjae Jeon Signed-off-by: Hyunchul Lee Signed-off-by: Steve French --- fs/ksmbd/smb2pdu.c | 68 +++++++++++++++++++++++++++++++++++++++++------------- 1 file changed, 52 insertions(+), 16 deletions(-) diff --git a/fs/ksmbd/smb2pdu.c b/fs/ksmbd/smb2pdu.c index 7999d8bc6892..e0f3a44e1599 100644 --- a/fs/ksmbd/smb2pdu.c +++ b/fs/ksmbd/smb2pdu.c @@ -3762,6 +3762,24 @@ static int verify_info_level(int info_level) return 0; } +static int smb2_calc_max_out_buf_len(struct ksmbd_work *work, + unsigned short hdr2_len, + unsigned int out_buf_len) +{ + int free_len; + + if (out_buf_len > work->conn->vals->max_trans_size) + return -EINVAL; + + free_len = (int)(work->response_sz - + (get_rfc1002_len(work->response_buf) + 4)) - + hdr2_len; + if (free_len < 0) + return -EINVAL; + + return min_t(int, out_buf_len, free_len); +} + int smb2_query_dir(struct ksmbd_work *work) { struct ksmbd_conn *conn = work->conn; @@ -3838,9 +3856,13 @@ int smb2_query_dir(struct ksmbd_work *work) memset(&d_info, 0, sizeof(struct ksmbd_dir_info)); d_info.wptr = (char *)rsp->Buffer; d_info.rptr = (char *)rsp->Buffer; - d_info.out_buf_len = (work->response_sz - (get_rfc1002_len(rsp_org) + 4)); - d_info.out_buf_len = min_t(int, d_info.out_buf_len, le32_to_cpu(req->OutputBufferLength)) - - sizeof(struct smb2_query_directory_rsp); + d_info.out_buf_len = + smb2_calc_max_out_buf_len(work, 8, + le32_to_cpu(req->OutputBufferLength)); + if (d_info.out_buf_len < 0) { + rc = -EINVAL; + goto err_out; + } d_info.flags = srch_flag; /* @@ -4074,12 +4096,11 @@ static int smb2_get_ea(struct ksmbd_work *work, struct ksmbd_file *fp, le32_to_cpu(req->Flags)); } - buf_free_len = work->response_sz - - (get_rfc1002_len(rsp_org) + 4) - - sizeof(struct smb2_query_info_rsp); - - if (le32_to_cpu(req->OutputBufferLength) < buf_free_len) - buf_free_len = le32_to_cpu(req->OutputBufferLength); + buf_free_len = + smb2_calc_max_out_buf_len(work, 8, + le32_to_cpu(req->OutputBufferLength)); + if (buf_free_len < 0) + return -EINVAL; rc = ksmbd_vfs_listxattr(path->dentry, &xattr_list); if (rc < 0) { @@ -4390,6 +4411,8 @@ static void get_file_stream_info(struct ksmbd_work *work, struct path *path = &fp->filp->f_path; ssize_t xattr_list_len; int nbytes = 0, streamlen, stream_name_len, next, idx = 0; + int buf_free_len; + struct smb2_query_info_req *req = ksmbd_req_buf_next(work); generic_fillattr(file_mnt_user_ns(fp->filp), file_inode(fp->filp), &stat); @@ -4403,6 +4426,12 @@ static void get_file_stream_info(struct ksmbd_work *work, goto out; } + buf_free_len = + smb2_calc_max_out_buf_len(work, 8, + le32_to_cpu(req->OutputBufferLength)); + if (buf_free_len < 0) + goto out; + while (idx < xattr_list_len) { stream_name = xattr_list + idx; streamlen = strlen(stream_name); @@ -4427,6 +4456,10 @@ static void get_file_stream_info(struct ksmbd_work *work, streamlen = snprintf(stream_buf, streamlen + 1, ":%s", &stream_name[XATTR_NAME_STREAM_LEN]); + next = sizeof(struct smb2_file_stream_info) + streamlen * 2; + if (next > buf_free_len) + break; + file_info = (struct smb2_file_stream_info *)&rsp->Buffer[nbytes]; streamlen = smbConvertToUTF16((__le16 *)file_info->StreamName, stream_buf, streamlen, @@ -4437,12 +4470,13 @@ static void get_file_stream_info(struct ksmbd_work *work, file_info->StreamSize = cpu_to_le64(stream_name_len); file_info->StreamAllocationSize = cpu_to_le64(stream_name_len); - next = sizeof(struct smb2_file_stream_info) + streamlen; nbytes += next; + buf_free_len -= next; file_info->NextEntryOffset = cpu_to_le32(next); } - if (!S_ISDIR(stat.mode)) { + if (!S_ISDIR(stat.mode) && + buf_free_len >= sizeof(struct smb2_file_stream_info) + 7 * 2) { file_info = (struct smb2_file_stream_info *) &rsp->Buffer[nbytes]; streamlen = smbConvertToUTF16((__le16 *)file_info->StreamName, @@ -7453,11 +7487,13 @@ int smb2_ioctl(struct ksmbd_work *work) } cnt_code = le32_to_cpu(req->CntCode); - out_buf_len = le32_to_cpu(req->MaxOutputResponse); - out_buf_len = - min_t(u32, work->response_sz - work->next_smb2_rsp_hdr_off - - (offsetof(struct smb2_ioctl_rsp, Buffer) - 4), - out_buf_len); + ret = smb2_calc_max_out_buf_len(work, 48, + le32_to_cpu(req->MaxOutputResponse)); + if (ret < 0) { + rsp->hdr.Status = STATUS_INVALID_PARAMETER; + goto out; + } + out_buf_len = (unsigned int)ret; in_buf_len = le32_to_cpu(req->InputCount); switch (cnt_code) { -- cgit v1.2.3 From 621be84a9d1fbf0097fd058e249ec5cc4f35f3c5 Mon Sep 17 00:00:00 2001 From: Namjae Jeon Date: Wed, 13 Oct 2021 17:28:31 +0900 Subject: ksmbd: throttle session setup failures to avoid dictionary attacks To avoid dictionary attacks (repeated session setups rapidly sent) to connect to server, ksmbd make a delay of a 5 seconds on session setup failure to make it harder to send enough random connection requests to break into a server if a user insert the wrong password 10 times in a row. Signed-off-by: Namjae Jeon Signed-off-by: Steve French --- fs/ksmbd/ksmbd_netlink.h | 2 ++ fs/ksmbd/mgmt/user_config.c | 2 +- fs/ksmbd/mgmt/user_config.h | 1 + fs/ksmbd/smb2pdu.c | 27 ++++++++++++++++++++++++--- fs/ksmbd/transport_ipc.c | 3 ++- fs/ksmbd/transport_ipc.h | 2 +- 6 files changed, 31 insertions(+), 6 deletions(-) diff --git a/fs/ksmbd/ksmbd_netlink.h b/fs/ksmbd/ksmbd_netlink.h index 2fbe2bc1e093..c6718a05d347 100644 --- a/fs/ksmbd/ksmbd_netlink.h +++ b/fs/ksmbd/ksmbd_netlink.h @@ -211,6 +211,7 @@ struct ksmbd_tree_disconnect_request { */ struct ksmbd_logout_request { __s8 account[KSMBD_REQ_MAX_ACCOUNT_NAME_SZ]; /* user account name */ + __u32 account_flags; }; /* @@ -317,6 +318,7 @@ enum KSMBD_TREE_CONN_STATUS { #define KSMBD_USER_FLAG_BAD_UID BIT(2) #define KSMBD_USER_FLAG_BAD_USER BIT(3) #define KSMBD_USER_FLAG_GUEST_ACCOUNT BIT(4) +#define KSMBD_USER_FLAG_DELAY_SESSION BIT(5) /* * Share config flags. diff --git a/fs/ksmbd/mgmt/user_config.c b/fs/ksmbd/mgmt/user_config.c index d21629ae5c89..1019d3677d55 100644 --- a/fs/ksmbd/mgmt/user_config.c +++ b/fs/ksmbd/mgmt/user_config.c @@ -55,7 +55,7 @@ struct ksmbd_user *ksmbd_alloc_user(struct ksmbd_login_response *resp) void ksmbd_free_user(struct ksmbd_user *user) { - ksmbd_ipc_logout_request(user->name); + ksmbd_ipc_logout_request(user->name, user->flags); kfree(user->name); kfree(user->passkey); kfree(user); diff --git a/fs/ksmbd/mgmt/user_config.h b/fs/ksmbd/mgmt/user_config.h index b2bb074a0150..aff80b029579 100644 --- a/fs/ksmbd/mgmt/user_config.h +++ b/fs/ksmbd/mgmt/user_config.h @@ -18,6 +18,7 @@ struct ksmbd_user { size_t passkey_sz; char *passkey; + unsigned int failed_login_count; }; static inline bool user_guest(struct ksmbd_user *user) diff --git a/fs/ksmbd/smb2pdu.c b/fs/ksmbd/smb2pdu.c index e0f3a44e1599..cf7db5f71f9b 100644 --- a/fs/ksmbd/smb2pdu.c +++ b/fs/ksmbd/smb2pdu.c @@ -1779,9 +1779,30 @@ out_err: conn->mechToken = NULL; } - if (rc < 0 && sess) { - ksmbd_session_destroy(sess); - work->sess = NULL; + if (rc < 0) { + /* + * SecurityBufferOffset should be set to zero + * in session setup error response. + */ + rsp->SecurityBufferOffset = 0; + + if (sess) { + bool try_delay = false; + + /* + * To avoid dictionary attacks (repeated session setups rapidly sent) to + * connect to server, ksmbd make a delay of a 5 seconds on session setup + * failure to make it harder to send enough random connection requests + * to break into a server. + */ + if (sess->user && sess->user->flags & KSMBD_USER_FLAG_DELAY_SESSION) + try_delay = true; + + ksmbd_session_destroy(sess); + work->sess = NULL; + if (try_delay) + ssleep(5); + } } return rc; diff --git a/fs/ksmbd/transport_ipc.c b/fs/ksmbd/transport_ipc.c index 44aea33a67fa..1acf1892a466 100644 --- a/fs/ksmbd/transport_ipc.c +++ b/fs/ksmbd/transport_ipc.c @@ -601,7 +601,7 @@ int ksmbd_ipc_tree_disconnect_request(unsigned long long session_id, return ret; } -int ksmbd_ipc_logout_request(const char *account) +int ksmbd_ipc_logout_request(const char *account, int flags) { struct ksmbd_ipc_msg *msg; struct ksmbd_logout_request *req; @@ -616,6 +616,7 @@ int ksmbd_ipc_logout_request(const char *account) msg->type = KSMBD_EVENT_LOGOUT_REQUEST; req = (struct ksmbd_logout_request *)msg->payload; + req->account_flags = flags; strscpy(req->account, account, KSMBD_REQ_MAX_ACCOUNT_NAME_SZ); ret = ipc_msg_send(msg); diff --git a/fs/ksmbd/transport_ipc.h b/fs/ksmbd/transport_ipc.h index 9eacc895ffdb..5e5b90a0c187 100644 --- a/fs/ksmbd/transport_ipc.h +++ b/fs/ksmbd/transport_ipc.h @@ -25,7 +25,7 @@ ksmbd_ipc_tree_connect_request(struct ksmbd_session *sess, struct sockaddr *peer_addr); int ksmbd_ipc_tree_disconnect_request(unsigned long long session_id, unsigned long long connect_id); -int ksmbd_ipc_logout_request(const char *account); +int ksmbd_ipc_logout_request(const char *account, int flags); struct ksmbd_share_config_response * ksmbd_ipc_share_config_request(const char *name); struct ksmbd_spnego_authen_response * -- cgit v1.2.3 From 0d994cd482ee4e8e851388a70869beee51be1c54 Mon Sep 17 00:00:00 2001 From: Marios Makassikis Date: Tue, 19 Oct 2021 17:39:38 +0200 Subject: ksmbd: add buffer validation in session setup Make sure the security buffer's length/offset are valid with regards to the packet length. Acked-by: Namjae Jeon Signed-off-by: Marios Makassikis Signed-off-by: Steve French --- fs/ksmbd/auth.c | 16 +++++++++------- fs/ksmbd/smb2pdu.c | 51 +++++++++++++++++++++++++++++++-------------------- 2 files changed, 40 insertions(+), 27 deletions(-) diff --git a/fs/ksmbd/auth.c b/fs/ksmbd/auth.c index 71c989f1568d..30a92ddc1817 100644 --- a/fs/ksmbd/auth.c +++ b/fs/ksmbd/auth.c @@ -298,8 +298,8 @@ int ksmbd_decode_ntlmssp_auth_blob(struct authenticate_message *authblob, int blob_len, struct ksmbd_session *sess) { char *domain_name; - unsigned int lm_off, nt_off; - unsigned short nt_len; + unsigned int nt_off, dn_off; + unsigned short nt_len, dn_len; int ret; if (blob_len < sizeof(struct authenticate_message)) { @@ -314,15 +314,17 @@ int ksmbd_decode_ntlmssp_auth_blob(struct authenticate_message *authblob, return -EINVAL; } - lm_off = le32_to_cpu(authblob->LmChallengeResponse.BufferOffset); nt_off = le32_to_cpu(authblob->NtChallengeResponse.BufferOffset); nt_len = le16_to_cpu(authblob->NtChallengeResponse.Length); + dn_off = le32_to_cpu(authblob->DomainName.BufferOffset); + dn_len = le16_to_cpu(authblob->DomainName.Length); + + if (blob_len < (u64)dn_off + dn_len || blob_len < (u64)nt_off + nt_len) + return -EINVAL; /* TODO : use domain name that imported from configuration file */ - domain_name = smb_strndup_from_utf16((const char *)authblob + - le32_to_cpu(authblob->DomainName.BufferOffset), - le16_to_cpu(authblob->DomainName.Length), true, - sess->conn->local_nls); + domain_name = smb_strndup_from_utf16((const char *)authblob + dn_off, + dn_len, true, sess->conn->local_nls); if (IS_ERR(domain_name)) return PTR_ERR(domain_name); diff --git a/fs/ksmbd/smb2pdu.c b/fs/ksmbd/smb2pdu.c index cf7db5f71f9b..7e448df3f847 100644 --- a/fs/ksmbd/smb2pdu.c +++ b/fs/ksmbd/smb2pdu.c @@ -1257,19 +1257,13 @@ static int generate_preauth_hash(struct ksmbd_work *work) return 0; } -static int decode_negotiation_token(struct ksmbd_work *work, - struct negotiate_message *negblob) +static int decode_negotiation_token(struct ksmbd_conn *conn, + struct negotiate_message *negblob, + size_t sz) { - struct ksmbd_conn *conn = work->conn; - struct smb2_sess_setup_req *req; - int sz; - if (!conn->use_spnego) return -EINVAL; - req = work->request_buf; - sz = le16_to_cpu(req->SecurityBufferLength); - if (ksmbd_decode_negTokenInit((char *)negblob, sz, conn)) { if (ksmbd_decode_negTokenTarg((char *)negblob, sz, conn)) { conn->auth_mechs |= KSMBD_AUTH_NTLMSSP; @@ -1281,9 +1275,9 @@ static int decode_negotiation_token(struct ksmbd_work *work, } static int ntlm_negotiate(struct ksmbd_work *work, - struct negotiate_message *negblob) + struct negotiate_message *negblob, + size_t negblob_len) { - struct smb2_sess_setup_req *req = work->request_buf; struct smb2_sess_setup_rsp *rsp = work->response_buf; struct challenge_message *chgblob; unsigned char *spnego_blob = NULL; @@ -1292,8 +1286,7 @@ static int ntlm_negotiate(struct ksmbd_work *work, int sz, rc; ksmbd_debug(SMB, "negotiate phase\n"); - sz = le16_to_cpu(req->SecurityBufferLength); - rc = ksmbd_decode_ntlmssp_neg_blob(negblob, sz, work->sess); + rc = ksmbd_decode_ntlmssp_neg_blob(negblob, negblob_len, work->sess); if (rc) return rc; @@ -1361,12 +1354,23 @@ static struct ksmbd_user *session_user(struct ksmbd_conn *conn, struct authenticate_message *authblob; struct ksmbd_user *user; char *name; - int sz; + unsigned int auth_msg_len, name_off, name_len, secbuf_len; + secbuf_len = le16_to_cpu(req->SecurityBufferLength); + if (secbuf_len < sizeof(struct authenticate_message)) { + ksmbd_debug(SMB, "blob len %d too small\n", secbuf_len); + return NULL; + } authblob = user_authblob(conn, req); - sz = le32_to_cpu(authblob->UserName.BufferOffset); - name = smb_strndup_from_utf16((const char *)authblob + sz, - le16_to_cpu(authblob->UserName.Length), + name_off = le32_to_cpu(authblob->UserName.BufferOffset); + name_len = le16_to_cpu(authblob->UserName.Length); + auth_msg_len = le16_to_cpu(req->SecurityBufferOffset) + secbuf_len; + + if (auth_msg_len < (u64)name_off + name_len) + return NULL; + + name = smb_strndup_from_utf16((const char *)authblob + name_off, + name_len, true, conn->local_nls); if (IS_ERR(name)) { @@ -1612,6 +1616,7 @@ int smb2_sess_setup(struct ksmbd_work *work) struct smb2_sess_setup_rsp *rsp = work->response_buf; struct ksmbd_session *sess; struct negotiate_message *negblob; + unsigned int negblob_len, negblob_off; int rc = 0; ksmbd_debug(SMB, "Received request for session setup\n"); @@ -1692,10 +1697,16 @@ int smb2_sess_setup(struct ksmbd_work *work) if (sess->state == SMB2_SESSION_EXPIRED) sess->state = SMB2_SESSION_IN_PROGRESS; + negblob_off = le16_to_cpu(req->SecurityBufferOffset); + negblob_len = le16_to_cpu(req->SecurityBufferLength); + if (negblob_off < (offsetof(struct smb2_sess_setup_req, Buffer) - 4) || + negblob_len < offsetof(struct negotiate_message, NegotiateFlags)) + return -EINVAL; + negblob = (struct negotiate_message *)((char *)&req->hdr.ProtocolId + - le16_to_cpu(req->SecurityBufferOffset)); + negblob_off); - if (decode_negotiation_token(work, negblob) == 0) { + if (decode_negotiation_token(conn, negblob, negblob_len) == 0) { if (conn->mechToken) negblob = (struct negotiate_message *)conn->mechToken; } @@ -1719,7 +1730,7 @@ int smb2_sess_setup(struct ksmbd_work *work) sess->Preauth_HashValue = NULL; } else if (conn->preferred_auth_mech == KSMBD_AUTH_NTLMSSP) { if (negblob->MessageType == NtLmNegotiate) { - rc = ntlm_negotiate(work, negblob); + rc = ntlm_negotiate(work, negblob, negblob_len); if (rc) goto out_err; rsp->hdr.Status = -- cgit v1.2.3 From e139a1ec92f8dbaaa7380d7e7ea17e148d473d06 Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Tue, 19 Oct 2021 23:43:46 +0100 Subject: io_uring: apply max_workers limit to all future users Currently, IORING_REGISTER_IOWQ_MAX_WORKERS applies only to the task that issued it, it's unexpected for users. If one task creates a ring, limits workers and then passes it to another task the limit won't be applied to the other task. Another pitfall is that a task should either create a ring or submit at least one request for IORING_REGISTER_IOWQ_MAX_WORKERS to work at all, furher complicating the picture. Change the API, save the limits and apply to all future users. Note, it should be done first before giving away the ring or submitting new requests otherwise the result is not guaranteed. Fixes: 2e480058ddc2 ("io-wq: provide a way to limit max number of workers") Link: https://github.com/axboe/liburing/issues/460 Reported-by: Beld Zhang Signed-off-by: Pavel Begunkov Link: https://lore.kernel.org/r/51d0bae97180e08ab722c0d5c93e7439cfb6f697.1634683237.git.asml.silence@gmail.com Signed-off-by: Jens Axboe --- fs/io_uring.c | 29 +++++++++++++++++++++++------ 1 file changed, 23 insertions(+), 6 deletions(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index e68d27829bb2..e8b71f14ac8b 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -456,6 +456,8 @@ struct io_ring_ctx { struct work_struct exit_work; struct list_head tctx_list; struct completion ref_comp; + u32 iowq_limits[2]; + bool iowq_limits_set; }; }; @@ -9638,7 +9640,16 @@ static int __io_uring_add_tctx_node(struct io_ring_ctx *ctx) ret = io_uring_alloc_task_context(current, ctx); if (unlikely(ret)) return ret; + tctx = current->io_uring; + if (ctx->iowq_limits_set) { + unsigned int limits[2] = { ctx->iowq_limits[0], + ctx->iowq_limits[1], }; + + ret = io_wq_max_workers(tctx->io_wq, limits); + if (ret) + return ret; + } } if (!xa_load(&tctx->xa, (unsigned long)ctx)) { node = kmalloc(sizeof(*node), GFP_KERNEL); @@ -10674,13 +10685,19 @@ static int io_register_iowq_max_workers(struct io_ring_ctx *ctx, tctx = current->io_uring; } - ret = -EINVAL; - if (!tctx || !tctx->io_wq) - goto err; + BUILD_BUG_ON(sizeof(new_count) != sizeof(ctx->iowq_limits)); - ret = io_wq_max_workers(tctx->io_wq, new_count); - if (ret) - goto err; + memcpy(ctx->iowq_limits, new_count, sizeof(new_count)); + ctx->iowq_limits_set = true; + + ret = -EINVAL; + if (tctx && tctx->io_wq) { + ret = io_wq_max_workers(tctx->io_wq, new_count); + if (ret) + goto err; + } else { + memset(new_count, 0, sizeof(new_count)); + } if (sqd) { mutex_unlock(&sqd->lock); -- cgit v1.2.3 From 4ea672ab694c23886b52e97cee10dea056e43e62 Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Wed, 20 Oct 2021 09:53:02 +0100 Subject: io_uring: fix ltimeout unprep io_unprep_linked_timeout() is broken, first it needs to return back REQ_F_ARM_LTIMEOUT, so the linked timeout is enqueued and disarmed. But now we refcounted it, and linked timeouts may get not executed at all, leaking a request. Just kill the unprep optimisation. Fixes: 906c6caaf586 ("io_uring: optimise io_prep_linked_timeout()") Reported-by: Beld Zhang Signed-off-by: Pavel Begunkov Link: https://lore.kernel.org/r/51b8e2bfc4bea8ee625cf2ba62b2a350cc9be031.1634719585.git.asml.silence@gmail.com Link: https://github.com/axboe/liburing/issues/460 Reported-by: Beld Zhang Signed-off-by: Jens Axboe --- fs/io_uring.c | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index e8b71f14ac8b..d5cc103224f1 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -1370,11 +1370,6 @@ static void io_req_track_inflight(struct io_kiocb *req) } } -static inline void io_unprep_linked_timeout(struct io_kiocb *req) -{ - req->flags &= ~REQ_F_LINK_TIMEOUT; -} - static struct io_kiocb *__io_prep_linked_timeout(struct io_kiocb *req) { if (WARN_ON_ONCE(!req->link)) @@ -6985,7 +6980,7 @@ issue_sqe: switch (io_arm_poll_handler(req)) { case IO_APOLL_READY: if (linked_timeout) - io_unprep_linked_timeout(req); + io_queue_linked_timeout(linked_timeout); goto issue_sqe; case IO_APOLL_ABORTED: /* -- cgit v1.2.3 From 25f54d08f12feb593e62cc2193fedefaf7825301 Mon Sep 17 00:00:00 2001 From: Ian Kent Date: Thu, 23 Sep 2021 15:13:39 +0800 Subject: autofs: fix wait name hash calculation in autofs_wait() There's a mistake in commit 2be7828c9fefc ("get rid of autofs_getpath()") that affects kernels from v5.13.0, basically missed because of me not fully testing the change for Al. The problem is that the hash calculation for the wait name qstr hasn't been updated to account for the change to use dentry_path_raw(). This prevents the correct matching an existing wait resulting in multiple notifications being sent to the daemon for the same mount which must not occur. The problem wasn't discovered earlier because it only occurs when multiple processes trigger a request for the same mount concurrently so it only shows up in more aggressive testing. Fixes: 2be7828c9fefc ("get rid of autofs_getpath()") Cc: stable@vger.kernel.org Signed-off-by: Ian Kent Signed-off-by: Al Viro --- fs/autofs/waitq.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/autofs/waitq.c b/fs/autofs/waitq.c index 16b5fca0626e..54c1f8b8b075 100644 --- a/fs/autofs/waitq.c +++ b/fs/autofs/waitq.c @@ -358,7 +358,7 @@ int autofs_wait(struct autofs_sb_info *sbi, qstr.len = strlen(p); offset = p - name; } - qstr.hash = full_name_hash(dentry, name, qstr.len); + qstr.hash = full_name_hash(dentry, qstr.name, qstr.len); if (mutex_lock_interruptible(&sbi->wq_mutex)) { kfree(name); -- cgit v1.2.3 From de7cd3f6761f49bef044ec49493d88737a70f1a6 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Wed, 20 Oct 2021 06:27:36 -0400 Subject: KVM: x86: check for interrupts before deciding whether to exit the fast path The kvm_x86_sync_pir_to_irr callback can sometimes set KVM_REQ_EVENT. If that happens exactly at the time that an exit is handled as EXIT_FASTPATH_REENTER_GUEST, vcpu_enter_guest will go incorrectly through the loop that calls kvm_x86_run, instead of processing the request promptly. Fixes: 379a3c8ee444 ("KVM: VMX: Optimize posted-interrupt delivery for timer fastpath") Cc: stable@vger.kernel.org Reviewed-by: Sean Christopherson Signed-off-by: Paolo Bonzini --- arch/x86/kvm/x86.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 0c8b5129effd..381384a54790 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -9643,14 +9643,14 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) if (likely(exit_fastpath != EXIT_FASTPATH_REENTER_GUEST)) break; - if (unlikely(kvm_vcpu_exit_request(vcpu))) { + if (vcpu->arch.apicv_active) + static_call(kvm_x86_sync_pir_to_irr)(vcpu); + + if (unlikely(kvm_vcpu_exit_request(vcpu))) { exit_fastpath = EXIT_FASTPATH_EXIT_HANDLED; break; } - - if (vcpu->arch.apicv_active) - static_call(kvm_x86_sync_pir_to_irr)(vcpu); - } + } /* * Do this here before restoring debug registers on the host. And -- cgit v1.2.3 From 3a25dfa67fe40f3a2690af2c562e0947a78bd6a0 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Wed, 20 Oct 2021 06:22:59 -0400 Subject: KVM: nVMX: promptly process interrupts delivered while in guest mode Since commit c300ab9f08df ("KVM: x86: Replace late check_nested_events() hack with more precise fix") there is no longer the certainty that check_nested_events() tries to inject an external interrupt vmexit to L1 on every call to vcpu_enter_guest. Therefore, even in that case we need to set KVM_REQ_EVENT. This ensures that inject_pending_event() is called, and from there kvm_check_nested_events(). Fixes: c300ab9f08df ("KVM: x86: Replace late check_nested_events() hack with more precise fix") Cc: stable@vger.kernel.org Reviewed-by: Sean Christopherson Signed-off-by: Paolo Bonzini --- arch/x86/kvm/vmx/vmx.c | 17 ++++++----------- 1 file changed, 6 insertions(+), 11 deletions(-) diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index 7fb2a3a1ca46..7d595effb66f 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -6305,18 +6305,13 @@ static int vmx_sync_pir_to_irr(struct kvm_vcpu *vcpu) /* * If we are running L2 and L1 has a new pending interrupt - * which can be injected, we should re-evaluate - * what should be done with this new L1 interrupt. - * If L1 intercepts external-interrupts, we should - * exit from L2 to L1. Otherwise, interrupt should be - * delivered directly to L2. + * which can be injected, this may cause a vmexit or it may + * be injected into L2. Either way, this interrupt will be + * processed via KVM_REQ_EVENT, not RVI, because we do not use + * virtual interrupt delivery to inject L1 interrupts into L2. */ - if (is_guest_mode(vcpu) && max_irr_updated) { - if (nested_exit_on_intr(vcpu)) - kvm_vcpu_exiting_guest_mode(vcpu); - else - kvm_make_request(KVM_REQ_EVENT, vcpu); - } + if (is_guest_mode(vcpu) && max_irr_updated) + kvm_make_request(KVM_REQ_EVENT, vcpu); } else { max_irr = kvm_lapic_find_highest_irr(vcpu); } -- cgit v1.2.3 From d534d31d6a45d71de61db22090b4820afb68fddc Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Thu, 21 Oct 2021 10:01:38 +0200 Subject: fuse: check s_root when destroying sb Checking "fm" works because currently sb->s_fs_info is cleared on error paths; however, sb->s_root is what generic_shutdown_super() checks to determine whether the sb was fully initialized or not. This change will allow cleanup of sb setup error paths. Signed-off-by: Miklos Szeredi --- fs/fuse/inode.c | 2 +- fs/fuse/virtio_fs.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c index 36cd03114b6d..60ceaf332840 100644 --- a/fs/fuse/inode.c +++ b/fs/fuse/inode.c @@ -1747,7 +1747,7 @@ static void fuse_sb_destroy(struct super_block *sb) struct fuse_mount *fm = get_fuse_mount_super(sb); bool last; - if (fm) { + if (sb->s_root) { last = fuse_mount_remove(fm); if (last) fuse_conn_destroy(fm); diff --git a/fs/fuse/virtio_fs.c b/fs/fuse/virtio_fs.c index 0ad89c6629d7..32fd138c621e 100644 --- a/fs/fuse/virtio_fs.c +++ b/fs/fuse/virtio_fs.c @@ -1394,7 +1394,7 @@ static void virtio_kill_sb(struct super_block *sb) bool last; /* If mount failed, we can still be called without any fc */ - if (fm) { + if (sb->s_root) { last = fuse_mount_remove(fm); if (last) virtio_fs_conn_destroy(fm); -- cgit v1.2.3 From a27c061a49afd7ad2d935e6ac734e2a9f62861b8 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Thu, 21 Oct 2021 10:01:38 +0200 Subject: fuse: get rid of fuse_put_super() The ->put_super callback is called from generic_shutdown_super() in case of a fully initialized sb. This is called from kill_***_super(), which is called from ->kill_sb instances. Fuse uses ->put_super to destroy the fs specific fuse_mount and drop the reference to the fuse_conn, while it does the same on each error case during sb setup. This patch moves the destruction from fuse_put_super() to fuse_mount_destroy(), called at the end of all ->kill_sb instances. A follup patch will clean up the error paths. Signed-off-by: Miklos Szeredi --- fs/fuse/fuse_i.h | 3 +++ fs/fuse/inode.c | 20 +++++++++++--------- fs/fuse/virtio_fs.c | 1 + 3 files changed, 15 insertions(+), 9 deletions(-) diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h index 319596df5dc6..f55f9f94b1a4 100644 --- a/fs/fuse/fuse_i.h +++ b/fs/fuse/fuse_i.h @@ -1121,6 +1121,9 @@ int fuse_init_fs_context_submount(struct fs_context *fsc); */ void fuse_conn_destroy(struct fuse_mount *fm); +/* Drop the connection and free the fuse mount */ +void fuse_mount_destroy(struct fuse_mount *fm); + /** * Add connection to control filesystem */ diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c index 60ceaf332840..30ca1f71d777 100644 --- a/fs/fuse/inode.c +++ b/fs/fuse/inode.c @@ -457,14 +457,6 @@ static void fuse_send_destroy(struct fuse_mount *fm) } } -static void fuse_put_super(struct super_block *sb) -{ - struct fuse_mount *fm = get_fuse_mount_super(sb); - - fuse_conn_put(fm->fc); - kfree(fm); -} - static void convert_fuse_statfs(struct kstatfs *stbuf, struct fuse_kstatfs *attr) { stbuf->f_type = FUSE_SUPER_MAGIC; @@ -1003,7 +995,6 @@ static const struct super_operations fuse_super_operations = { .evict_inode = fuse_evict_inode, .write_inode = fuse_write_inode, .drop_inode = generic_delete_inode, - .put_super = fuse_put_super, .umount_begin = fuse_umount_begin, .statfs = fuse_statfs, .sync_fs = fuse_sync_fs, @@ -1754,10 +1745,20 @@ static void fuse_sb_destroy(struct super_block *sb) } } +void fuse_mount_destroy(struct fuse_mount *fm) +{ + if (fm) { + fuse_conn_put(fm->fc); + kfree(fm); + } +} +EXPORT_SYMBOL(fuse_mount_destroy); + static void fuse_kill_sb_anon(struct super_block *sb) { fuse_sb_destroy(sb); kill_anon_super(sb); + fuse_mount_destroy(get_fuse_mount_super(sb)); } static struct file_system_type fuse_fs_type = { @@ -1775,6 +1776,7 @@ static void fuse_kill_sb_blk(struct super_block *sb) { fuse_sb_destroy(sb); kill_block_super(sb); + fuse_mount_destroy(get_fuse_mount_super(sb)); } static struct file_system_type fuseblk_fs_type = { diff --git a/fs/fuse/virtio_fs.c b/fs/fuse/virtio_fs.c index 32fd138c621e..fa80d250c802 100644 --- a/fs/fuse/virtio_fs.c +++ b/fs/fuse/virtio_fs.c @@ -1400,6 +1400,7 @@ static void virtio_kill_sb(struct super_block *sb) virtio_fs_conn_destroy(fm); } kill_anon_super(sb); + fuse_mount_destroy(fm); } static int virtio_fs_test_super(struct super_block *sb, -- cgit v1.2.3 From c191cd07ee948c93081d8e4cba43d23b18b2f3da Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Thu, 21 Oct 2021 10:01:39 +0200 Subject: fuse: clean up fuse_mount destruction 1. call fuse_mount_destroy() for open coded variants 2. before deactivate_locked_super() don't need fuse_mount destruction since that will now be done (if ->s_fs_info is not cleared) 3. rearrange fuse_mount setup in fuse_get_tree_submount() so that the regular pattern can be used Signed-off-by: Miklos Szeredi --- fs/fuse/inode.c | 17 +++++------------ fs/fuse/virtio_fs.c | 9 ++------- 2 files changed, 7 insertions(+), 19 deletions(-) diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c index 30ca1f71d777..308daee54c12 100644 --- a/fs/fuse/inode.c +++ b/fs/fuse/inode.c @@ -1415,20 +1415,17 @@ static int fuse_get_tree_submount(struct fs_context *fsc) if (!fm) return -ENOMEM; + fm->fc = fuse_conn_get(fc); fsc->s_fs_info = fm; sb = sget_fc(fsc, NULL, set_anon_super_fc); - if (IS_ERR(sb)) { - kfree(fm); + if (fsc->s_fs_info) + fuse_mount_destroy(fm); + if (IS_ERR(sb)) return PTR_ERR(sb); - } - fm->fc = fuse_conn_get(fc); /* Initialize superblock, making @mp_fi its root */ err = fuse_fill_super_submount(sb, mp_fi); if (err) { - fuse_conn_put(fc); - kfree(fm); - sb->s_fs_info = NULL; deactivate_locked_super(sb); return err; } @@ -1595,16 +1592,12 @@ static int fuse_fill_super(struct super_block *sb, struct fs_context *fsc) err = fuse_fill_super_common(sb, ctx); if (err) - goto err_put_conn; + goto err; /* file->private_data shall be visible on all CPUs after this */ smp_mb(); fuse_send_init(get_fuse_mount_super(sb)); return 0; - err_put_conn: - fuse_conn_put(fc); - kfree(fm); - sb->s_fs_info = NULL; err: return err; } diff --git a/fs/fuse/virtio_fs.c b/fs/fuse/virtio_fs.c index fa80d250c802..94fc874f5de7 100644 --- a/fs/fuse/virtio_fs.c +++ b/fs/fuse/virtio_fs.c @@ -1456,19 +1456,14 @@ static int virtio_fs_get_tree(struct fs_context *fsc) fsc->s_fs_info = fm; sb = sget_fc(fsc, virtio_fs_test_super, set_anon_super_fc); - if (fsc->s_fs_info) { - fuse_conn_put(fc); - kfree(fm); - } + if (fsc->s_fs_info) + fuse_mount_destroy(fm); if (IS_ERR(sb)) return PTR_ERR(sb); if (!sb->s_root) { err = virtio_fs_fill_super(sb, fsc); if (err) { - fuse_conn_put(fc); - kfree(fm); - sb->s_fs_info = NULL; deactivate_locked_super(sb); return err; } -- cgit v1.2.3 From 80019f1138324b6f35ae728b4f25eeb08899b452 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Thu, 21 Oct 2021 10:01:39 +0200 Subject: fuse: always initialize sb->s_fs_info Syzkaller reports a null pointer dereference in fuse_test_super() that is caused by sb->s_fs_info being NULL. This is due to the fact that fuse_fill_super() is initializing s_fs_info, which is too late, it's already on the fs_supers list. The initialization needs to be done in sget_fc() with the sb_lock held. Move allocation of fuse_mount and fuse_conn from fuse_fill_super() into fuse_get_tree(). After this ->kill_sb() will always be called with non-NULL ->s_fs_info, hence fuse_mount_destroy() can drop the test for non-NULL "fm". Reported-by: syzbot+74a15f02ccb51f398601@syzkaller.appspotmail.com Fixes: 5d5b74aa9c76 ("fuse: allow sharing existing sb") Signed-off-by: Miklos Szeredi --- fs/fuse/inode.c | 50 +++++++++++++++++++++++++------------------------- 1 file changed, 25 insertions(+), 25 deletions(-) diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c index 308daee54c12..b468f55634f2 100644 --- a/fs/fuse/inode.c +++ b/fs/fuse/inode.c @@ -1557,8 +1557,6 @@ static int fuse_fill_super(struct super_block *sb, struct fs_context *fsc) { struct fuse_fs_context *ctx = fsc->fs_private; int err; - struct fuse_conn *fc; - struct fuse_mount *fm; if (!ctx->file || !ctx->rootmode_present || !ctx->user_id_present || !ctx->group_id_present) @@ -1574,22 +1572,6 @@ static int fuse_fill_super(struct super_block *sb, struct fs_context *fsc) goto err; ctx->fudptr = &ctx->file->private_data; - fc = kmalloc(sizeof(*fc), GFP_KERNEL); - err = -ENOMEM; - if (!fc) - goto err; - - fm = kzalloc(sizeof(*fm), GFP_KERNEL); - if (!fm) { - kfree(fc); - goto err; - } - - fuse_conn_init(fc, fm, sb->s_user_ns, &fuse_dev_fiq_ops, NULL); - fc->release = fuse_free_conn; - - sb->s_fs_info = fm; - err = fuse_fill_super_common(sb, ctx); if (err) goto err; @@ -1621,22 +1603,40 @@ static int fuse_get_tree(struct fs_context *fsc) { struct fuse_fs_context *ctx = fsc->fs_private; struct fuse_dev *fud; + struct fuse_conn *fc; + struct fuse_mount *fm; struct super_block *sb; int err; + fc = kmalloc(sizeof(*fc), GFP_KERNEL); + if (!fc) + return -ENOMEM; + + fm = kzalloc(sizeof(*fm), GFP_KERNEL); + if (!fm) { + kfree(fc); + return -ENOMEM; + } + + fuse_conn_init(fc, fm, fsc->user_ns, &fuse_dev_fiq_ops, NULL); + fc->release = fuse_free_conn; + + fsc->s_fs_info = fm; + if (ctx->fd_present) ctx->file = fget(ctx->fd); if (IS_ENABLED(CONFIG_BLOCK) && ctx->is_bdev) { err = get_tree_bdev(fsc, fuse_fill_super); - goto out_fput; + goto out; } /* * While block dev mount can be initialized with a dummy device fd * (found by device name), normal fuse mounts can't */ + err = -EINVAL; if (!ctx->file) - return -EINVAL; + goto out; /* * Allow creating a fuse mount with an already initialized fuse @@ -1652,7 +1652,9 @@ static int fuse_get_tree(struct fs_context *fsc) } else { err = get_tree_nodev(fsc, fuse_fill_super); } -out_fput: +out: + if (fsc->s_fs_info) + fuse_mount_destroy(fm); if (ctx->file) fput(ctx->file); return err; @@ -1740,10 +1742,8 @@ static void fuse_sb_destroy(struct super_block *sb) void fuse_mount_destroy(struct fuse_mount *fm) { - if (fm) { - fuse_conn_put(fm->fc); - kfree(fm); - } + fuse_conn_put(fm->fc); + kfree(fm); } EXPORT_SYMBOL(fuse_mount_destroy); -- cgit v1.2.3 From 964d32e512670c7b87870e30cfed2303da86d614 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Thu, 21 Oct 2021 10:01:39 +0200 Subject: fuse: clean up error exits in fuse_fill_super() Instead of "goto err", return error directly, since there's no error cleanup to do now. Signed-off-by: Miklos Szeredi --- fs/fuse/inode.c | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c index b468f55634f2..12d49a1914e8 100644 --- a/fs/fuse/inode.c +++ b/fs/fuse/inode.c @@ -1566,22 +1566,18 @@ static int fuse_fill_super(struct super_block *sb, struct fs_context *fsc) * Require mount to happen from the same user namespace which * opened /dev/fuse to prevent potential attacks. */ - err = -EINVAL; if ((ctx->file->f_op != &fuse_dev_operations) || (ctx->file->f_cred->user_ns != sb->s_user_ns)) - goto err; + return -EINVAL; ctx->fudptr = &ctx->file->private_data; err = fuse_fill_super_common(sb, ctx); if (err) - goto err; + return err; /* file->private_data shall be visible on all CPUs after this */ smp_mb(); fuse_send_init(get_fuse_mount_super(sb)); return 0; - - err: - return err; } /* -- cgit v1.2.3 From 0a30896fc5025e71c350449760b240fba5581b42 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 20 Oct 2021 23:08:16 +0200 Subject: MAINTAINERS: Add Dave Hansen to the x86 maintainer team Dave is already listed as x86/mm maintainer, has a profund knowledge of the x86 architecture in general and a good taste in terms of kernel programming in general. Add him as a full x86 maintainer with all rights and duties. Signed-off-by: Thomas Gleixner Signed-off-by: Borislav Petkov Acked-by: Borislav Petkov Acked-by: Andy Lutomirski Acked-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/87zgr3flq7.ffs@tglx --- MAINTAINERS | 1 + 1 file changed, 1 insertion(+) diff --git a/MAINTAINERS b/MAINTAINERS index 8d118d7957d2..f26920f0fa65 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -20336,6 +20336,7 @@ X86 ARCHITECTURE (32-BIT AND 64-BIT) M: Thomas Gleixner M: Ingo Molnar M: Borislav Petkov +M: Dave Hansen M: x86@kernel.org R: "H. Peter Anvin" L: linux-kernel@vger.kernel.org -- cgit v1.2.3 From a3ca5281bb771d8103ea16f0a6a8a5df9a7fb4f3 Mon Sep 17 00:00:00 2001 From: Chenyi Qiang Date: Thu, 21 Oct 2021 15:10:22 +0800 Subject: KVM: MMU: Reset mmu->pkru_mask to avoid stale data When updating mmu->pkru_mask, the value can only be added but it isn't reset in advance. This will make mmu->pkru_mask keep the stale data. Fix this issue. Fixes: 2d344105f57c ("KVM, pkeys: introduce pkru_mask to cache conditions") Signed-off-by: Chenyi Qiang Message-Id: <20211021071022.1140-1-chenyi.qiang@intel.com> Reviewed-by: Sean Christopherson Signed-off-by: Paolo Bonzini --- arch/x86/kvm/mmu/mmu.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c index 1a64ba5b9437..0cc58901bf7a 100644 --- a/arch/x86/kvm/mmu/mmu.c +++ b/arch/x86/kvm/mmu/mmu.c @@ -4596,10 +4596,10 @@ static void update_pkru_bitmask(struct kvm_mmu *mmu) unsigned bit; bool wp; - if (!is_cr4_pke(mmu)) { - mmu->pkru_mask = 0; + mmu->pkru_mask = 0; + + if (!is_cr4_pke(mmu)) return; - } wp = is_cr0_wp(mmu); -- cgit v1.2.3 From c8c340a9b4149fe5caa433f3b62463a1c8e07a46 Mon Sep 17 00:00:00 2001 From: Masahiro Kozuka Date: Tue, 14 Sep 2021 14:09:51 -0700 Subject: KVM: SEV: Flush cache on non-coherent systems before RECEIVE_UPDATE_DATA Flush the destination page before invoking RECEIVE_UPDATE_DATA, as the PSP encrypts the data with the guest's key when writing to guest memory. If the target memory was not previously encrypted, the cache may contain dirty, unecrypted data that will persist on non-coherent systems. Fixes: 15fb7de1a7f5 ("KVM: SVM: Add KVM_SEV_RECEIVE_UPDATE_DATA command") Cc: stable@vger.kernel.org Cc: Peter Gonda Cc: Marc Orr Cc: Tom Lendacky Cc: Brijesh Singh Signed-off-by: Masahiro Kozuka [sean: converted bug report to changelog] Signed-off-by: Sean Christopherson Signed-off-by: Paolo Bonzini Message-Id: <20210914210951.2994260-3-seanjc@google.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/svm/sev.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c index 0d21d59936e5..2e4916be290e 100644 --- a/arch/x86/kvm/svm/sev.c +++ b/arch/x86/kvm/svm/sev.c @@ -1484,6 +1484,13 @@ static int sev_receive_update_data(struct kvm *kvm, struct kvm_sev_cmd *argp) goto e_free_trans; } + /* + * Flush (on non-coherent CPUs) before RECEIVE_UPDATE_DATA, the PSP + * encrypts the written data with the guest's key, and the cache may + * contain dirty, unencrypted data. + */ + sev_clflush_pages(guest_page, n); + /* The RECEIVE_UPDATE_DATA command requires C-bit to be always set. */ data.guest_address = (page_to_pfn(guest_page[0]) << PAGE_SHIFT) + offset; data.guest_address |= sev_me_mask; -- cgit v1.2.3 From b22fa62a35d7f2029d757a518d78041822b7c7c1 Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Thu, 21 Oct 2021 13:20:29 +0100 Subject: io_uring: apply worker limits to previous users Another change to the API io-wq worker limitation API added in 5.15, apply the limit to all prior users that already registered a tctx. It may be confusing as it's now, in particular the change covers the following 2 cases: TASK1 | TASK2 _________________________________________________ ring = create() | | limit_iowq_workers() *not limited* | TASK1 | TASK2 _________________________________________________ ring = create() | | issue_requests() limit_iowq_workers() | | *not limited* A note on locking, it's safe to traverse ->tctx_list as we hold ->uring_lock, but do that after dropping sqd->lock to avoid possible problems. It's also safe to access tctx->io_wq there because tasks kill it only after removing themselves from tctx_list, see io_uring_cancel_generic() -> io_uring_clean_tctx() Signed-off-by: Pavel Begunkov Link: https://lore.kernel.org/r/d6e09ecc3545e4dc56e43c906ee3d71b7ae21bed.1634818641.git.asml.silence@gmail.com Reviewed-by: Hao Xu Signed-off-by: Jens Axboe --- fs/io_uring.c | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/fs/io_uring.c b/fs/io_uring.c index d5cc103224f1..bc18af5e0a93 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -10649,7 +10649,9 @@ static int io_unregister_iowq_aff(struct io_ring_ctx *ctx) static int io_register_iowq_max_workers(struct io_ring_ctx *ctx, void __user *arg) + __must_hold(&ctx->uring_lock) { + struct io_tctx_node *node; struct io_uring_task *tctx = NULL; struct io_sq_data *sqd = NULL; __u32 new_count[2]; @@ -10702,6 +10704,22 @@ static int io_register_iowq_max_workers(struct io_ring_ctx *ctx, if (copy_to_user(arg, new_count, sizeof(new_count))) return -EFAULT; + /* that's it for SQPOLL, only the SQPOLL task creates requests */ + if (sqd) + return 0; + + /* now propagate the restriction to all registered users */ + list_for_each_entry(node, &ctx->tctx_list, ctx_node) { + struct io_uring_task *tctx = node->task->io_uring; + + if (WARN_ON_ONCE(!tctx->io_wq)) + continue; + + for (i = 0; i < ARRAY_SIZE(new_count); i++) + new_count[i] = ctx->iowq_limits[i]; + /* ignore errors, it always returns zero anyway */ + (void)io_wq_max_workers(tctx->io_wq, new_count); + } return 0; err: if (sqd) { -- cgit v1.2.3 From 1394103fd72ce9c67d20f882a93d59403c8da057 Mon Sep 17 00:00:00 2001 From: Xie Yongji Date: Thu, 23 Sep 2021 15:57:22 +0800 Subject: vduse: Disallow injecting interrupt before DRIVER_OK is set The interrupt callback should not be triggered before DRIVER_OK is set. Otherwise, it might break the virtio device driver. So let's add a check to avoid the unexpected behavior. Fixes: c8a6153b6c59 ("vduse: Introduce VDUSE - vDPA Device in Userspace") Signed-off-by: Xie Yongji Link: https://lore.kernel.org/r/20210923075722.98-1-xieyongji@bytedance.com Signed-off-by: Michael S. Tsirkin --- drivers/vdpa/vdpa_user/vduse_dev.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/vdpa/vdpa_user/vduse_dev.c b/drivers/vdpa/vdpa_user/vduse_dev.c index 26e3d90d1e7c..cefb301b2ee4 100644 --- a/drivers/vdpa/vdpa_user/vduse_dev.c +++ b/drivers/vdpa/vdpa_user/vduse_dev.c @@ -966,6 +966,10 @@ static long vduse_dev_ioctl(struct file *file, unsigned int cmd, break; } case VDUSE_DEV_INJECT_CONFIG_IRQ: + ret = -EINVAL; + if (!(dev->status & VIRTIO_CONFIG_S_DRIVER_OK)) + break; + ret = 0; queue_work(vduse_irq_wq, &dev->inject); break; @@ -1045,6 +1049,10 @@ static long vduse_dev_ioctl(struct file *file, unsigned int cmd, case VDUSE_VQ_INJECT_IRQ: { u32 index; + ret = -EINVAL; + if (!(dev->status & VIRTIO_CONFIG_S_DRIVER_OK)) + break; + ret = -EFAULT; if (get_user(index, (u32 __user *)argp)) break; -- cgit v1.2.3 From 0943aacf5ae10471b68a702c022b42c89e91ba9e Mon Sep 17 00:00:00 2001 From: Xie Yongji Date: Wed, 29 Sep 2021 16:30:50 +0800 Subject: vduse: Fix race condition between resetting and irq injecting The interrupt might be triggered after a reset since there is no synchronization between resetting and irq injecting. And it might break something if the interrupt is delayed until a new round of device initialization. Fixes: c8a6153b6c59 ("vduse: Introduce VDUSE - vDPA Device in Userspace") Signed-off-by: Xie Yongji Link: https://lore.kernel.org/r/20210929083050.88-1-xieyongji@bytedance.com Signed-off-by: Michael S. Tsirkin --- drivers/vdpa/vdpa_user/vduse_dev.c | 37 +++++++++++++++++++++++++------------ 1 file changed, 25 insertions(+), 12 deletions(-) diff --git a/drivers/vdpa/vdpa_user/vduse_dev.c b/drivers/vdpa/vdpa_user/vduse_dev.c index cefb301b2ee4..841667a896dd 100644 --- a/drivers/vdpa/vdpa_user/vduse_dev.c +++ b/drivers/vdpa/vdpa_user/vduse_dev.c @@ -80,6 +80,7 @@ struct vduse_dev { struct vdpa_callback config_cb; struct work_struct inject; spinlock_t irq_lock; + struct rw_semaphore rwsem; int minor; bool broken; bool connected; @@ -410,6 +411,8 @@ static void vduse_dev_reset(struct vduse_dev *dev) if (domain->bounce_map) vduse_domain_reset_bounce_map(domain); + down_write(&dev->rwsem); + dev->status = 0; dev->driver_features = 0; dev->generation++; @@ -443,6 +446,8 @@ static void vduse_dev_reset(struct vduse_dev *dev) flush_work(&vq->inject); flush_work(&vq->kick); } + + up_write(&dev->rwsem); } static int vduse_vdpa_set_vq_address(struct vdpa_device *vdpa, u16 idx, @@ -885,6 +890,23 @@ static void vduse_vq_irq_inject(struct work_struct *work) spin_unlock_irq(&vq->irq_lock); } +static int vduse_dev_queue_irq_work(struct vduse_dev *dev, + struct work_struct *irq_work) +{ + int ret = -EINVAL; + + down_read(&dev->rwsem); + if (!(dev->status & VIRTIO_CONFIG_S_DRIVER_OK)) + goto unlock; + + ret = 0; + queue_work(vduse_irq_wq, irq_work); +unlock: + up_read(&dev->rwsem); + + return ret; +} + static long vduse_dev_ioctl(struct file *file, unsigned int cmd, unsigned long arg) { @@ -966,12 +988,7 @@ static long vduse_dev_ioctl(struct file *file, unsigned int cmd, break; } case VDUSE_DEV_INJECT_CONFIG_IRQ: - ret = -EINVAL; - if (!(dev->status & VIRTIO_CONFIG_S_DRIVER_OK)) - break; - - ret = 0; - queue_work(vduse_irq_wq, &dev->inject); + ret = vduse_dev_queue_irq_work(dev, &dev->inject); break; case VDUSE_VQ_SETUP: { struct vduse_vq_config config; @@ -1049,10 +1066,6 @@ static long vduse_dev_ioctl(struct file *file, unsigned int cmd, case VDUSE_VQ_INJECT_IRQ: { u32 index; - ret = -EINVAL; - if (!(dev->status & VIRTIO_CONFIG_S_DRIVER_OK)) - break; - ret = -EFAULT; if (get_user(index, (u32 __user *)argp)) break; @@ -1061,9 +1074,8 @@ static long vduse_dev_ioctl(struct file *file, unsigned int cmd, if (index >= dev->vq_num) break; - ret = 0; index = array_index_nospec(index, dev->vq_num); - queue_work(vduse_irq_wq, &dev->vqs[index].inject); + ret = vduse_dev_queue_irq_work(dev, &dev->vqs[index].inject); break; } default: @@ -1144,6 +1156,7 @@ static struct vduse_dev *vduse_dev_create(void) INIT_LIST_HEAD(&dev->send_list); INIT_LIST_HEAD(&dev->recv_list); spin_lock_init(&dev->irq_lock); + init_rwsem(&dev->rwsem); INIT_WORK(&dev->inject, vduse_dev_irq_inject); init_waitqueue_head(&dev->waitq); -- cgit v1.2.3 From b5998402e3de429b5e5f9bdea08ddf77c5fd661e Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Tue, 12 Oct 2021 10:22:34 -0400 Subject: KVM: SEV-ES: rename guest_ins_data to sev_pio_data We will be using this field for OUTS emulation as well, in case the data that is pushed via OUTS spans more than one page. In that case, there will be a need to save the data pointer across exits to userspace. So, change the name to something that refers to any kind of PIO. Also spell out what it is used for, namely SEV-ES. No functional change intended. Cc: stable@vger.kernel.org Fixes: 7ed9abfe8e9f ("KVM: SVM: Support string IO operations for an SEV-ES guest") Reviewed-by: Maxim Levitsky Signed-off-by: Paolo Bonzini --- arch/x86/include/asm/kvm_host.h | 2 +- arch/x86/kvm/x86.c | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index f8f48a7ec577..6bed6c416c6c 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -702,7 +702,7 @@ struct kvm_vcpu_arch { struct kvm_pio_request pio; void *pio_data; - void *guest_ins_data; + void *sev_pio_data; u8 event_exit_inst_len; diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 381384a54790..379175b725a1 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -12370,7 +12370,7 @@ EXPORT_SYMBOL_GPL(kvm_sev_es_mmio_read); static int complete_sev_es_emulated_ins(struct kvm_vcpu *vcpu) { - memcpy(vcpu->arch.guest_ins_data, vcpu->arch.pio_data, + memcpy(vcpu->arch.sev_pio_data, vcpu->arch.pio_data, vcpu->arch.pio.count * vcpu->arch.pio.size); vcpu->arch.pio.count = 0; @@ -12402,7 +12402,7 @@ static int kvm_sev_es_ins(struct kvm_vcpu *vcpu, unsigned int size, if (ret) { vcpu->arch.pio.count = 0; } else { - vcpu->arch.guest_ins_data = data; + vcpu->arch.sev_pio_data = data; vcpu->arch.complete_userspace_io = complete_sev_es_emulated_ins; } -- cgit v1.2.3 From 0d33b1baeb6ca7165d5ed4fdd1a8f969985e35b9 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Wed, 13 Oct 2021 12:29:42 -0400 Subject: KVM: x86: leave vcpu->arch.pio.count alone in emulator_pio_in_out Currently emulator_pio_in clears vcpu->arch.pio.count twice if emulator_pio_in_out performs kernel PIO. Move the clear into emulator_pio_out where it is actually necessary. No functional change intended. Cc: stable@vger.kernel.org Fixes: 7ed9abfe8e9f ("KVM: SVM: Support string IO operations for an SEV-ES guest") Reviewed-by: Maxim Levitsky Signed-off-by: Paolo Bonzini --- arch/x86/kvm/x86.c | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 379175b725a1..dff28a4fbb21 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -6914,10 +6914,8 @@ static int emulator_pio_in_out(struct kvm_vcpu *vcpu, int size, vcpu->arch.pio.count = count; vcpu->arch.pio.size = size; - if (!kernel_pio(vcpu, vcpu->arch.pio_data)) { - vcpu->arch.pio.count = 0; + if (!kernel_pio(vcpu, vcpu->arch.pio_data)) return 1; - } vcpu->run->exit_reason = KVM_EXIT_IO; vcpu->run->io.direction = in ? KVM_EXIT_IO_IN : KVM_EXIT_IO_OUT; @@ -6963,9 +6961,16 @@ static int emulator_pio_out(struct kvm_vcpu *vcpu, int size, unsigned short port, const void *val, unsigned int count) { + int ret; + memcpy(vcpu->arch.pio_data, val, size * count); trace_kvm_pio(KVM_PIO_OUT, port, size, count, vcpu->arch.pio_data); - return emulator_pio_in_out(vcpu, size, port, (void *)val, count, false); + ret = emulator_pio_in_out(vcpu, size, port, (void *)val, count, false); + if (ret) + vcpu->arch.pio.count = 0; + + return ret; + } static int emulator_pio_out_emulated(struct x86_emulate_ctxt *ctxt, -- cgit v1.2.3 From ea724ea420aac58b41bc822d1aed6940b136b78d Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Tue, 12 Oct 2021 10:51:55 -0400 Subject: KVM: SEV-ES: clean up kvm_sev_es_ins/outs A few very small cleanups to the functions, smushed together because the patch is already very small like this: - inline emulator_pio_in_emulated and emulator_pio_out_emulated, since we already have the vCPU - remove the data argument and pull setting vcpu->arch.sev_pio_data into the caller - remove unnecessary clearing of vcpu->arch.pio.count when emulation is done by the kernel (and therefore vcpu->arch.pio.count is already clear on exit from emulator_pio_in and emulator_pio_out). No functional change intended. Cc: stable@vger.kernel.org Fixes: 7ed9abfe8e9f ("KVM: SVM: Support string IO operations for an SEV-ES guest") Reviewed-by: Maxim Levitsky Signed-off-by: Paolo Bonzini --- arch/x86/kvm/x86.c | 31 +++++++++++++++---------------- 1 file changed, 15 insertions(+), 16 deletions(-) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index dff28a4fbb21..78ed0fe9fa1e 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -12383,34 +12383,32 @@ static int complete_sev_es_emulated_ins(struct kvm_vcpu *vcpu) } static int kvm_sev_es_outs(struct kvm_vcpu *vcpu, unsigned int size, - unsigned int port, void *data, unsigned int count) + unsigned int port, unsigned int count) { - int ret; + int ret = emulator_pio_out(vcpu, size, port, + vcpu->arch.sev_pio_data, count); - ret = emulator_pio_out_emulated(vcpu->arch.emulate_ctxt, size, port, - data, count); - if (ret) + if (ret) { + /* Emulation done by the kernel. */ return ret; + } vcpu->arch.pio.count = 0; - return 0; } static int kvm_sev_es_ins(struct kvm_vcpu *vcpu, unsigned int size, - unsigned int port, void *data, unsigned int count) + unsigned int port, unsigned int count) { - int ret; + int ret = emulator_pio_in(vcpu, size, port, + vcpu->arch.sev_pio_data, count); - ret = emulator_pio_in_emulated(vcpu->arch.emulate_ctxt, size, port, - data, count); if (ret) { - vcpu->arch.pio.count = 0; - } else { - vcpu->arch.sev_pio_data = data; - vcpu->arch.complete_userspace_io = complete_sev_es_emulated_ins; + /* Emulation done by the kernel. */ + return ret; } + vcpu->arch.complete_userspace_io = complete_sev_es_emulated_ins; return 0; } @@ -12418,8 +12416,9 @@ int kvm_sev_es_string_io(struct kvm_vcpu *vcpu, unsigned int size, unsigned int port, void *data, unsigned int count, int in) { - return in ? kvm_sev_es_ins(vcpu, size, port, data, count) - : kvm_sev_es_outs(vcpu, size, port, data, count); + vcpu->arch.sev_pio_data = data; + return in ? kvm_sev_es_ins(vcpu, size, port, count) + : kvm_sev_es_outs(vcpu, size, port, count); } EXPORT_SYMBOL_GPL(kvm_sev_es_string_io); -- cgit v1.2.3 From 3b27de27183911d461afedf50c6fa30c59740c07 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Wed, 13 Oct 2021 12:32:02 -0400 Subject: KVM: x86: split the two parts of emulator_pio_in emulator_pio_in handles both the case where the data is pending in vcpu->arch.pio.count, and the case where I/O has to be done via either an in-kernel device or a userspace exit. For SEV-ES we would like to split these, to identify clearly the moment at which the sev_pio_data is consumed. To this end, create two different functions: __emulator_pio_in fills in vcpu->arch.pio.count, while complete_emulator_pio_in clears it and releases vcpu->arch.pio.data. Because this patch has to be backported, things are left a bit messy. kernel_pio() operates on vcpu->arch.pio, which leads to emulator_pio_in() having with two calls to complete_emulator_pio_in(). It will be fixed in the next release. While at it, remove the unused void* val argument of emulator_pio_in_out. The function currently hardcodes vcpu->arch.pio_data as the source/destination buffer, which sucks but will be fixed after the more severe SEV-ES buffer overflow. No functional change intended. Cc: stable@vger.kernel.org Fixes: 7ed9abfe8e9f ("KVM: SVM: Support string IO operations for an SEV-ES guest") Signed-off-by: Paolo Bonzini --- arch/x86/kvm/x86.c | 45 ++++++++++++++++++++++++++++----------------- 1 file changed, 28 insertions(+), 17 deletions(-) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 78ed0fe9fa1e..c51ea81019e3 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -6906,7 +6906,7 @@ static int kernel_pio(struct kvm_vcpu *vcpu, void *pd) } static int emulator_pio_in_out(struct kvm_vcpu *vcpu, int size, - unsigned short port, void *val, + unsigned short port, unsigned int count, bool in) { vcpu->arch.pio.port = port; @@ -6927,26 +6927,38 @@ static int emulator_pio_in_out(struct kvm_vcpu *vcpu, int size, return 0; } -static int emulator_pio_in(struct kvm_vcpu *vcpu, int size, - unsigned short port, void *val, unsigned int count) +static int __emulator_pio_in(struct kvm_vcpu *vcpu, int size, + unsigned short port, unsigned int count) { - int ret; + WARN_ON(vcpu->arch.pio.count); + memset(vcpu->arch.pio_data, 0, size * count); + return emulator_pio_in_out(vcpu, size, port, count, true); +} - if (vcpu->arch.pio.count) - goto data_avail; +static void complete_emulator_pio_in(struct kvm_vcpu *vcpu, int size, + unsigned short port, void *val) +{ + memcpy(val, vcpu->arch.pio_data, size * vcpu->arch.pio.count); + trace_kvm_pio(KVM_PIO_IN, port, size, vcpu->arch.pio.count, vcpu->arch.pio_data); + vcpu->arch.pio.count = 0; +} - memset(vcpu->arch.pio_data, 0, size * count); +static int emulator_pio_in(struct kvm_vcpu *vcpu, int size, + unsigned short port, void *val, unsigned int count) +{ + if (vcpu->arch.pio.count) { + /* Complete previous iteration. */ + } else { + int r = __emulator_pio_in(vcpu, size, port, count); + if (!r) + return r; - ret = emulator_pio_in_out(vcpu, size, port, val, count, true); - if (ret) { -data_avail: - memcpy(val, vcpu->arch.pio_data, size * count); - trace_kvm_pio(KVM_PIO_IN, port, size, count, vcpu->arch.pio_data); - vcpu->arch.pio.count = 0; - return 1; + /* Results already available, fall through. */ } - return 0; + WARN_ON(count != vcpu->arch.pio.count); + complete_emulator_pio_in(vcpu, size, port, val); + return 1; } static int emulator_pio_in_emulated(struct x86_emulate_ctxt *ctxt, @@ -6965,12 +6977,11 @@ static int emulator_pio_out(struct kvm_vcpu *vcpu, int size, memcpy(vcpu->arch.pio_data, val, size * count); trace_kvm_pio(KVM_PIO_OUT, port, size, count, vcpu->arch.pio_data); - ret = emulator_pio_in_out(vcpu, size, port, (void *)val, count, false); + ret = emulator_pio_in_out(vcpu, size, port, count, false); if (ret) vcpu->arch.pio.count = 0; return ret; - } static int emulator_pio_out_emulated(struct x86_emulate_ctxt *ctxt, -- cgit v1.2.3 From 6b5efc930bbc8c97e4a1fe2ccb9a6f286365a56d Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Tue, 12 Oct 2021 12:35:20 -0400 Subject: KVM: x86: remove unnecessary arguments from complete_emulator_pio_in complete_emulator_pio_in can expect that vcpu->arch.pio has been filled in, and therefore does not need the size and count arguments. This makes things nicer when the function is called directly from a complete_userspace_io callback. No functional change intended. Cc: stable@vger.kernel.org Fixes: 7ed9abfe8e9f ("KVM: SVM: Support string IO operations for an SEV-ES guest") Reviewed-by: Maxim Levitsky Signed-off-by: Paolo Bonzini --- arch/x86/kvm/x86.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index c51ea81019e3..63f9cb33cc19 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -6935,11 +6935,12 @@ static int __emulator_pio_in(struct kvm_vcpu *vcpu, int size, return emulator_pio_in_out(vcpu, size, port, count, true); } -static void complete_emulator_pio_in(struct kvm_vcpu *vcpu, int size, - unsigned short port, void *val) +static void complete_emulator_pio_in(struct kvm_vcpu *vcpu, void *val) { - memcpy(val, vcpu->arch.pio_data, size * vcpu->arch.pio.count); - trace_kvm_pio(KVM_PIO_IN, port, size, vcpu->arch.pio.count, vcpu->arch.pio_data); + int size = vcpu->arch.pio.size; + unsigned count = vcpu->arch.pio.count; + memcpy(val, vcpu->arch.pio_data, size * count); + trace_kvm_pio(KVM_PIO_IN, vcpu->arch.pio.port, size, count, vcpu->arch.pio_data); vcpu->arch.pio.count = 0; } @@ -6957,7 +6958,7 @@ static int emulator_pio_in(struct kvm_vcpu *vcpu, int size, } WARN_ON(count != vcpu->arch.pio.count); - complete_emulator_pio_in(vcpu, size, port, val); + complete_emulator_pio_in(vcpu, val); return 1; } -- cgit v1.2.3 From 4fa4b38dae6fc6a3695695add8c18fa8b6a05a1a Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Tue, 12 Oct 2021 11:25:45 -0400 Subject: KVM: SEV-ES: keep INS functions together Make the diff a little nicer when we actually get to fixing the bug. No functional change intended. Cc: stable@vger.kernel.org Fixes: 7ed9abfe8e9f ("KVM: SVM: Support string IO operations for an SEV-ES guest") Reviewed-by: Maxim Levitsky Signed-off-by: Paolo Bonzini --- arch/x86/kvm/x86.c | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 63f9cb33cc19..23e772412134 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -12385,15 +12385,6 @@ int kvm_sev_es_mmio_read(struct kvm_vcpu *vcpu, gpa_t gpa, unsigned int bytes, } EXPORT_SYMBOL_GPL(kvm_sev_es_mmio_read); -static int complete_sev_es_emulated_ins(struct kvm_vcpu *vcpu) -{ - memcpy(vcpu->arch.sev_pio_data, vcpu->arch.pio_data, - vcpu->arch.pio.count * vcpu->arch.pio.size); - vcpu->arch.pio.count = 0; - - return 1; -} - static int kvm_sev_es_outs(struct kvm_vcpu *vcpu, unsigned int size, unsigned int port, unsigned int count) { @@ -12409,6 +12400,15 @@ static int kvm_sev_es_outs(struct kvm_vcpu *vcpu, unsigned int size, return 0; } +static int complete_sev_es_emulated_ins(struct kvm_vcpu *vcpu) +{ + memcpy(vcpu->arch.sev_pio_data, vcpu->arch.pio_data, + vcpu->arch.pio.count * vcpu->arch.pio.size); + vcpu->arch.pio.count = 0; + + return 1; +} + static int kvm_sev_es_ins(struct kvm_vcpu *vcpu, unsigned int size, unsigned int port, unsigned int count) { -- cgit v1.2.3 From 95e16b4792b0429f1933872f743410f00e590c55 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Tue, 12 Oct 2021 11:33:03 -0400 Subject: KVM: SEV-ES: go over the sev_pio_data buffer in multiple passes if needed The PIO scratch buffer is larger than a single page, and therefore it is not possible to copy it in a single step to vcpu->arch/pio_data. Bound each call to emulator_pio_in/out to a single page; keep track of how many I/O operations are left in vcpu->arch.sev_pio_count, so that the operation can be restarted in the complete_userspace_io callback. For OUT, this means that the previous kvm_sev_es_outs implementation becomes an iterator of the loop, and we can consume the sev_pio_data buffer before leaving to userspace. For IN, instead, consuming the buffer and decreasing sev_pio_count is always done in the complete_userspace_io callback, because that is when the memcpy is done into sev_pio_data. Cc: stable@vger.kernel.org Fixes: 7ed9abfe8e9f ("KVM: SVM: Support string IO operations for an SEV-ES guest") Reported-by: Felix Wilhelm Reviewed-by: Maxim Levitsky Signed-off-by: Paolo Bonzini --- arch/x86/include/asm/kvm_host.h | 1 + arch/x86/kvm/x86.c | 72 ++++++++++++++++++++++++++++++++--------- 2 files changed, 57 insertions(+), 16 deletions(-) diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 6bed6c416c6c..5a0298aa56ba 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -703,6 +703,7 @@ struct kvm_vcpu_arch { struct kvm_pio_request pio; void *pio_data; void *sev_pio_data; + unsigned sev_pio_count; u8 event_exit_inst_len; diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 23e772412134..b26647a5ea22 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -12386,38 +12386,77 @@ int kvm_sev_es_mmio_read(struct kvm_vcpu *vcpu, gpa_t gpa, unsigned int bytes, EXPORT_SYMBOL_GPL(kvm_sev_es_mmio_read); static int kvm_sev_es_outs(struct kvm_vcpu *vcpu, unsigned int size, - unsigned int port, unsigned int count) + unsigned int port); + +static int complete_sev_es_emulated_outs(struct kvm_vcpu *vcpu) { - int ret = emulator_pio_out(vcpu, size, port, - vcpu->arch.sev_pio_data, count); + int size = vcpu->arch.pio.size; + int port = vcpu->arch.pio.port; + + vcpu->arch.pio.count = 0; + if (vcpu->arch.sev_pio_count) + return kvm_sev_es_outs(vcpu, size, port); + return 1; +} + +static int kvm_sev_es_outs(struct kvm_vcpu *vcpu, unsigned int size, + unsigned int port) +{ + for (;;) { + unsigned int count = + min_t(unsigned int, PAGE_SIZE / size, vcpu->arch.sev_pio_count); + int ret = emulator_pio_out(vcpu, size, port, vcpu->arch.sev_pio_data, count); + + /* memcpy done already by emulator_pio_out. */ + vcpu->arch.sev_pio_count -= count; + vcpu->arch.sev_pio_data += count * vcpu->arch.pio.size; + if (!ret) + break; - if (ret) { /* Emulation done by the kernel. */ - return ret; + if (!vcpu->arch.sev_pio_count) + return 1; } - vcpu->arch.pio.count = 0; + vcpu->arch.complete_userspace_io = complete_sev_es_emulated_outs; return 0; } +static int kvm_sev_es_ins(struct kvm_vcpu *vcpu, unsigned int size, + unsigned int port); + +static void advance_sev_es_emulated_ins(struct kvm_vcpu *vcpu) +{ + unsigned count = vcpu->arch.pio.count; + complete_emulator_pio_in(vcpu, vcpu->arch.sev_pio_data); + vcpu->arch.sev_pio_count -= count; + vcpu->arch.sev_pio_data += count * vcpu->arch.pio.size; +} + static int complete_sev_es_emulated_ins(struct kvm_vcpu *vcpu) { - memcpy(vcpu->arch.sev_pio_data, vcpu->arch.pio_data, - vcpu->arch.pio.count * vcpu->arch.pio.size); - vcpu->arch.pio.count = 0; + int size = vcpu->arch.pio.size; + int port = vcpu->arch.pio.port; + advance_sev_es_emulated_ins(vcpu); + if (vcpu->arch.sev_pio_count) + return kvm_sev_es_ins(vcpu, size, port); return 1; } static int kvm_sev_es_ins(struct kvm_vcpu *vcpu, unsigned int size, - unsigned int port, unsigned int count) + unsigned int port) { - int ret = emulator_pio_in(vcpu, size, port, - vcpu->arch.sev_pio_data, count); + for (;;) { + unsigned int count = + min_t(unsigned int, PAGE_SIZE / size, vcpu->arch.sev_pio_count); + if (!__emulator_pio_in(vcpu, size, port, count)) + break; - if (ret) { /* Emulation done by the kernel. */ - return ret; + advance_sev_es_emulated_ins(vcpu); + if (!vcpu->arch.sev_pio_count) + return 1; } vcpu->arch.complete_userspace_io = complete_sev_es_emulated_ins; @@ -12429,8 +12468,9 @@ int kvm_sev_es_string_io(struct kvm_vcpu *vcpu, unsigned int size, int in) { vcpu->arch.sev_pio_data = data; - return in ? kvm_sev_es_ins(vcpu, size, port, count) - : kvm_sev_es_outs(vcpu, size, port, count); + vcpu->arch.sev_pio_count = count; + return in ? kvm_sev_es_ins(vcpu, size, port) + : kvm_sev_es_outs(vcpu, size, port); } EXPORT_SYMBOL_GPL(kvm_sev_es_string_io); -- cgit v1.2.3 From 8017c99680fa65e1e8d999df1583de476a187830 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Mon, 18 Oct 2021 15:19:08 +0200 Subject: hyperv/vmbus: include linux/bitops.h On arm64 randconfig builds, hyperv sometimes fails with this error: In file included from drivers/hv/hv_trace.c:3: In file included from drivers/hv/hyperv_vmbus.h:16: In file included from arch/arm64/include/asm/sync_bitops.h:5: arch/arm64/include/asm/bitops.h:11:2: error: only can be included directly In file included from include/asm-generic/bitops/hweight.h:5: include/asm-generic/bitops/arch_hweight.h:9:9: error: implicit declaration of function '__sw_hweight32' [-Werror,-Wimplicit-function-declaration] include/asm-generic/bitops/atomic.h:17:7: error: implicit declaration of function 'BIT_WORD' [-Werror,-Wimplicit-function-declaration] Include the correct header first. Signed-off-by: Arnd Bergmann Link: https://lore.kernel.org/r/20211018131929.2260087-1-arnd@kernel.org Signed-off-by: Wei Liu --- drivers/hv/hyperv_vmbus.h | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/hv/hyperv_vmbus.h b/drivers/hv/hyperv_vmbus.h index 42f3d9d123a1..d030577ad6a2 100644 --- a/drivers/hv/hyperv_vmbus.h +++ b/drivers/hv/hyperv_vmbus.h @@ -13,6 +13,7 @@ #define _HYPERV_VMBUS_H #include +#include #include #include #include -- cgit v1.2.3 From 7f678def99d29c520418607509bb19c7fc96a6db Mon Sep 17 00:00:00 2001 From: Vasily Averin Date: Fri, 22 Oct 2021 13:28:37 +0300 Subject: skb_expand_head() adjust skb->truesize incorrectly Christoph Paasch reports [1] about incorrect skb->truesize after skb_expand_head() call in ip6_xmit. This may happen because of two reasons: - skb_set_owner_w() for newly cloned skb is called too early, before pskb_expand_head() where truesize is adjusted for (!skb-sk) case. - pskb_expand_head() does not adjust truesize in (skb->sk) case. In this case sk->sk_wmem_alloc should be adjusted too. [1] https://lkml.org/lkml/2021/8/20/1082 Fixes: f1260ff15a71 ("skbuff: introduce skb_expand_head()") Fixes: 2d85a1b31dde ("ipv6: ip6_finish_output2: set sk into newly allocated nskb") Reported-by: Christoph Paasch Signed-off-by: Vasily Averin Reviewed-by: Eric Dumazet Link: https://lore.kernel.org/r/644330dd-477e-0462-83bf-9f514c41edd1@virtuozzo.com Signed-off-by: Jakub Kicinski --- net/core/skbuff.c | 36 +++++++++++++++++++++++------------- net/core/sock_destructor.h | 12 ++++++++++++ 2 files changed, 35 insertions(+), 13 deletions(-) create mode 100644 net/core/sock_destructor.h diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 2170bea2c7de..fe9358437380 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -80,6 +80,7 @@ #include #include "datagram.h" +#include "sock_destructor.h" struct kmem_cache *skbuff_head_cache __ro_after_init; static struct kmem_cache *skbuff_fclone_cache __ro_after_init; @@ -1804,30 +1805,39 @@ EXPORT_SYMBOL(skb_realloc_headroom); struct sk_buff *skb_expand_head(struct sk_buff *skb, unsigned int headroom) { int delta = headroom - skb_headroom(skb); + int osize = skb_end_offset(skb); + struct sock *sk = skb->sk; if (WARN_ONCE(delta <= 0, "%s is expecting an increase in the headroom", __func__)) return skb; - /* pskb_expand_head() might crash, if skb is shared */ - if (skb_shared(skb)) { + delta = SKB_DATA_ALIGN(delta); + /* pskb_expand_head() might crash, if skb is shared. */ + if (skb_shared(skb) || !is_skb_wmem(skb)) { struct sk_buff *nskb = skb_clone(skb, GFP_ATOMIC); - if (likely(nskb)) { - if (skb->sk) - skb_set_owner_w(nskb, skb->sk); - consume_skb(skb); - } else { - kfree_skb(skb); - } + if (unlikely(!nskb)) + goto fail; + + if (sk) + skb_set_owner_w(nskb, sk); + consume_skb(skb); skb = nskb; } - if (skb && - pskb_expand_head(skb, SKB_DATA_ALIGN(delta), 0, GFP_ATOMIC)) { - kfree_skb(skb); - skb = NULL; + if (pskb_expand_head(skb, delta, 0, GFP_ATOMIC)) + goto fail; + + if (sk && is_skb_wmem(skb)) { + delta = skb_end_offset(skb) - osize; + refcount_add(delta, &sk->sk_wmem_alloc); + skb->truesize += delta; } return skb; + +fail: + kfree_skb(skb); + return NULL; } EXPORT_SYMBOL(skb_expand_head); diff --git a/net/core/sock_destructor.h b/net/core/sock_destructor.h new file mode 100644 index 000000000000..2f396e6bfba5 --- /dev/null +++ b/net/core/sock_destructor.h @@ -0,0 +1,12 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +#ifndef _NET_CORE_SOCK_DESTRUCTOR_H +#define _NET_CORE_SOCK_DESTRUCTOR_H +#include + +static inline bool is_skb_wmem(const struct sk_buff *skb) +{ + return skb->destructor == sock_wfree || + skb->destructor == __sock_wfree || + (IS_ENABLED(CONFIG_INET) && skb->destructor == tcp_wfree); +} +#endif -- cgit v1.2.3 From 4f7019c7eb33967eb87766e0e4602b5576873680 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Wed, 20 Oct 2021 07:42:41 -0400 Subject: sctp: use init_tag from inithdr for ABORT chunk Currently Linux SCTP uses the verification tag of the existing SCTP asoc when failing to process and sending the packet with the ABORT chunk. This will result in the peer accepting the ABORT chunk and removing the SCTP asoc. One could exploit this to terminate a SCTP asoc. This patch is to fix it by always using the initiate tag of the received INIT chunk for the ABORT chunk to be sent. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Xin Long Acked-by: Marcelo Ricardo Leitner Signed-off-by: Jakub Kicinski --- net/sctp/sm_statefuns.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c index 32df65f68c12..7f8306968c39 100644 --- a/net/sctp/sm_statefuns.c +++ b/net/sctp/sm_statefuns.c @@ -6348,6 +6348,7 @@ static struct sctp_packet *sctp_ootb_pkt_new( * yet. */ switch (chunk->chunk_hdr->type) { + case SCTP_CID_INIT: case SCTP_CID_INIT_ACK: { struct sctp_initack_chunk *initack; -- cgit v1.2.3 From eae5783908042a762c24e1bd11876edb91d314b1 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Wed, 20 Oct 2021 07:42:42 -0400 Subject: sctp: fix the processing for INIT chunk This patch fixes the problems below: 1. In non-shutdown_ack_sent states: in sctp_sf_do_5_1B_init() and sctp_sf_do_5_2_2_dupinit(): chunk length check should be done before any checks that may cause to send abort, as making packet for abort will access the init_tag from init_hdr in sctp_ootb_pkt_new(). 2. In shutdown_ack_sent state: in sctp_sf_do_9_2_reshutack(): The same checks as does in sctp_sf_do_5_2_2_dupinit() is needed for sctp_sf_do_9_2_reshutack(). Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Xin Long Acked-by: Marcelo Ricardo Leitner Signed-off-by: Jakub Kicinski --- net/sctp/sm_statefuns.c | 72 +++++++++++++++++++++++++++++++------------------ 1 file changed, 46 insertions(+), 26 deletions(-) diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c index 7f8306968c39..9bfa8cca9974 100644 --- a/net/sctp/sm_statefuns.c +++ b/net/sctp/sm_statefuns.c @@ -156,6 +156,12 @@ static enum sctp_disposition __sctp_sf_do_9_1_abort( void *arg, struct sctp_cmd_seq *commands); +static enum sctp_disposition +__sctp_sf_do_9_2_reshutack(struct net *net, const struct sctp_endpoint *ep, + const struct sctp_association *asoc, + const union sctp_subtype type, void *arg, + struct sctp_cmd_seq *commands); + /* Small helper function that checks if the chunk length * is of the appropriate length. The 'required_length' argument * is set to be the size of a specific chunk we are testing. @@ -337,6 +343,14 @@ enum sctp_disposition sctp_sf_do_5_1B_init(struct net *net, if (!chunk->singleton) return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands); + /* Make sure that the INIT chunk has a valid length. + * Normally, this would cause an ABORT with a Protocol Violation + * error, but since we don't have an association, we'll + * just discard the packet. + */ + if (!sctp_chunk_length_valid(chunk, sizeof(struct sctp_init_chunk))) + return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands); + /* If the packet is an OOTB packet which is temporarily on the * control endpoint, respond with an ABORT. */ @@ -351,14 +365,6 @@ enum sctp_disposition sctp_sf_do_5_1B_init(struct net *net, if (chunk->sctp_hdr->vtag != 0) return sctp_sf_tabort_8_4_8(net, ep, asoc, type, arg, commands); - /* Make sure that the INIT chunk has a valid length. - * Normally, this would cause an ABORT with a Protocol Violation - * error, but since we don't have an association, we'll - * just discard the packet. - */ - if (!sctp_chunk_length_valid(chunk, sizeof(struct sctp_init_chunk))) - return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands); - /* If the INIT is coming toward a closing socket, we'll send back * and ABORT. Essentially, this catches the race of INIT being * backloged to the socket at the same time as the user issues close(). @@ -1524,20 +1530,16 @@ static enum sctp_disposition sctp_sf_do_unexpected_init( if (!chunk->singleton) return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands); + /* Make sure that the INIT chunk has a valid length. */ + if (!sctp_chunk_length_valid(chunk, sizeof(struct sctp_init_chunk))) + return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands); + /* 3.1 A packet containing an INIT chunk MUST have a zero Verification * Tag. */ if (chunk->sctp_hdr->vtag != 0) return sctp_sf_tabort_8_4_8(net, ep, asoc, type, arg, commands); - /* Make sure that the INIT chunk has a valid length. - * In this case, we generate a protocol violation since we have - * an association established. - */ - if (!sctp_chunk_length_valid(chunk, sizeof(struct sctp_init_chunk))) - return sctp_sf_violation_chunklen(net, ep, asoc, type, arg, - commands); - if (SCTP_INPUT_CB(chunk->skb)->encap_port != chunk->transport->encap_port) return sctp_sf_new_encap_port(net, ep, asoc, type, arg, commands); @@ -1882,9 +1884,9 @@ static enum sctp_disposition sctp_sf_do_dupcook_a( * its peer. */ if (sctp_state(asoc, SHUTDOWN_ACK_SENT)) { - disposition = sctp_sf_do_9_2_reshutack(net, ep, asoc, - SCTP_ST_CHUNK(chunk->chunk_hdr->type), - chunk, commands); + disposition = __sctp_sf_do_9_2_reshutack(net, ep, asoc, + SCTP_ST_CHUNK(chunk->chunk_hdr->type), + chunk, commands); if (SCTP_DISPOSITION_NOMEM == disposition) goto nomem; @@ -2970,13 +2972,11 @@ enum sctp_disposition sctp_sf_do_9_2_shut_ctsn( * that belong to this association, it should discard the INIT chunk and * retransmit the SHUTDOWN ACK chunk. */ -enum sctp_disposition sctp_sf_do_9_2_reshutack( - struct net *net, - const struct sctp_endpoint *ep, - const struct sctp_association *asoc, - const union sctp_subtype type, - void *arg, - struct sctp_cmd_seq *commands) +static enum sctp_disposition +__sctp_sf_do_9_2_reshutack(struct net *net, const struct sctp_endpoint *ep, + const struct sctp_association *asoc, + const union sctp_subtype type, void *arg, + struct sctp_cmd_seq *commands) { struct sctp_chunk *chunk = arg; struct sctp_chunk *reply; @@ -3010,6 +3010,26 @@ nomem: return SCTP_DISPOSITION_NOMEM; } +enum sctp_disposition +sctp_sf_do_9_2_reshutack(struct net *net, const struct sctp_endpoint *ep, + const struct sctp_association *asoc, + const union sctp_subtype type, void *arg, + struct sctp_cmd_seq *commands) +{ + struct sctp_chunk *chunk = arg; + + if (!chunk->singleton) + return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands); + + if (!sctp_chunk_length_valid(chunk, sizeof(struct sctp_init_chunk))) + return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands); + + if (chunk->sctp_hdr->vtag != 0) + return sctp_sf_tabort_8_4_8(net, ep, asoc, type, arg, commands); + + return __sctp_sf_do_9_2_reshutack(net, ep, asoc, type, arg, commands); +} + /* * sctp_sf_do_ecn_cwr * -- cgit v1.2.3 From 438b95a7c98f77d51cbf4db021f41b602d750a3f Mon Sep 17 00:00:00 2001 From: Xin Long Date: Wed, 20 Oct 2021 07:42:43 -0400 Subject: sctp: fix the processing for INIT_ACK chunk Currently INIT_ACK chunk in non-cookie_echoed state is processed in sctp_sf_discard_chunk() to send an abort with the existent asoc's vtag if the chunk length is not valid. But the vtag in the chunk's sctphdr is not verified, which may be exploited by one to cook a malicious chunk to terminal a SCTP asoc. sctp_sf_discard_chunk() also is called in many other places to send an abort, and most of those have this problem. This patch is to fix it by sending abort with the existent asoc's vtag only if the vtag from the chunk's sctphdr is verified in sctp_sf_discard_chunk(). Note on sctp_sf_do_9_1_abort() and sctp_sf_shutdown_pending_abort(), the chunk length has been verified before sctp_sf_discard_chunk(), so replace it with sctp_sf_discard(). On sctp_sf_do_asconf_ack() and sctp_sf_do_asconf(), move the sctp_chunk_length_valid check ahead of sctp_sf_discard_chunk(), then replace it with sctp_sf_discard(). Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Xin Long Acked-by: Marcelo Ricardo Leitner Signed-off-by: Jakub Kicinski --- net/sctp/sm_statefuns.c | 37 +++++++++++++++++++------------------ 1 file changed, 19 insertions(+), 18 deletions(-) diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c index 9bfa8cca9974..672e5308839b 100644 --- a/net/sctp/sm_statefuns.c +++ b/net/sctp/sm_statefuns.c @@ -2343,7 +2343,7 @@ enum sctp_disposition sctp_sf_shutdown_pending_abort( */ if (SCTP_ADDR_DEL == sctp_bind_addr_state(&asoc->base.bind_addr, &chunk->dest)) - return sctp_sf_discard_chunk(net, ep, asoc, type, arg, commands); + return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands); if (!sctp_err_chunk_valid(chunk)) return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands); @@ -2389,7 +2389,7 @@ enum sctp_disposition sctp_sf_shutdown_sent_abort( */ if (SCTP_ADDR_DEL == sctp_bind_addr_state(&asoc->base.bind_addr, &chunk->dest)) - return sctp_sf_discard_chunk(net, ep, asoc, type, arg, commands); + return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands); if (!sctp_err_chunk_valid(chunk)) return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands); @@ -2659,7 +2659,7 @@ enum sctp_disposition sctp_sf_do_9_1_abort( */ if (SCTP_ADDR_DEL == sctp_bind_addr_state(&asoc->base.bind_addr, &chunk->dest)) - return sctp_sf_discard_chunk(net, ep, asoc, type, arg, commands); + return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands); if (!sctp_err_chunk_valid(chunk)) return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands); @@ -3865,6 +3865,11 @@ enum sctp_disposition sctp_sf_do_asconf(struct net *net, return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands); } + /* Make sure that the ASCONF ADDIP chunk has a valid length. */ + if (!sctp_chunk_length_valid(chunk, sizeof(struct sctp_addip_chunk))) + return sctp_sf_violation_chunklen(net, ep, asoc, type, arg, + commands); + /* ADD-IP: Section 4.1.1 * This chunk MUST be sent in an authenticated way by using * the mechanism defined in [I-D.ietf-tsvwg-sctp-auth]. If this chunk @@ -3873,13 +3878,7 @@ enum sctp_disposition sctp_sf_do_asconf(struct net *net, */ if (!asoc->peer.asconf_capable || (!net->sctp.addip_noauth && !chunk->auth)) - return sctp_sf_discard_chunk(net, ep, asoc, type, arg, - commands); - - /* Make sure that the ASCONF ADDIP chunk has a valid length. */ - if (!sctp_chunk_length_valid(chunk, sizeof(struct sctp_addip_chunk))) - return sctp_sf_violation_chunklen(net, ep, asoc, type, arg, - commands); + return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands); hdr = (struct sctp_addiphdr *)chunk->skb->data; serial = ntohl(hdr->serial); @@ -4008,6 +4007,12 @@ enum sctp_disposition sctp_sf_do_asconf_ack(struct net *net, return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands); } + /* Make sure that the ADDIP chunk has a valid length. */ + if (!sctp_chunk_length_valid(asconf_ack, + sizeof(struct sctp_addip_chunk))) + return sctp_sf_violation_chunklen(net, ep, asoc, type, arg, + commands); + /* ADD-IP, Section 4.1.2: * This chunk MUST be sent in an authenticated way by using * the mechanism defined in [I-D.ietf-tsvwg-sctp-auth]. If this chunk @@ -4016,14 +4021,7 @@ enum sctp_disposition sctp_sf_do_asconf_ack(struct net *net, */ if (!asoc->peer.asconf_capable || (!net->sctp.addip_noauth && !asconf_ack->auth)) - return sctp_sf_discard_chunk(net, ep, asoc, type, arg, - commands); - - /* Make sure that the ADDIP chunk has a valid length. */ - if (!sctp_chunk_length_valid(asconf_ack, - sizeof(struct sctp_addip_chunk))) - return sctp_sf_violation_chunklen(net, ep, asoc, type, arg, - commands); + return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands); addip_hdr = (struct sctp_addiphdr *)asconf_ack->skb->data; rcvd_serial = ntohl(addip_hdr->serial); @@ -4595,6 +4593,9 @@ enum sctp_disposition sctp_sf_discard_chunk(struct net *net, { struct sctp_chunk *chunk = arg; + if (asoc && !sctp_vtag_verify(chunk, asoc)) + return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands); + /* Make sure that the chunk has a valid length. * Since we don't know the chunk type, we use a general * chunkhdr structure to make a comparison. -- cgit v1.2.3 From a64b341b8695e1c744dd972b39868371b4f68f83 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Wed, 20 Oct 2021 07:42:44 -0400 Subject: sctp: fix the processing for COOKIE_ECHO chunk 1. In closed state: in sctp_sf_do_5_1D_ce(): When asoc is NULL, making packet for abort will use chunk's vtag in sctp_ootb_pkt_new(). But when asoc exists, vtag from the chunk should be verified before using peer.i.init_tag to make packet for abort in sctp_ootb_pkt_new(), and just discard it if vtag is not correct. 2. In the other states: in sctp_sf_do_5_2_4_dupcook(): asoc always exists, but duplicate cookie_echo's vtag will be handled by sctp_tietags_compare() and then take actions, so before that we only verify the vtag for the abort sent for invalid chunk length. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Xin Long Acked-by: Marcelo Ricardo Leitner Signed-off-by: Jakub Kicinski --- net/sctp/sm_statefuns.c | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c index 672e5308839b..96a069d725e9 100644 --- a/net/sctp/sm_statefuns.c +++ b/net/sctp/sm_statefuns.c @@ -710,6 +710,9 @@ enum sctp_disposition sctp_sf_do_5_1D_ce(struct net *net, struct sock *sk; int error = 0; + if (asoc && !sctp_vtag_verify(chunk, asoc)) + return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands); + /* If the packet is an OOTB packet which is temporarily on the * control endpoint, respond with an ABORT. */ @@ -724,7 +727,8 @@ enum sctp_disposition sctp_sf_do_5_1D_ce(struct net *net, * in sctp_unpack_cookie(). */ if (!sctp_chunk_length_valid(chunk, sizeof(struct sctp_chunkhdr))) - return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands); + return sctp_sf_violation_chunklen(net, ep, asoc, type, arg, + commands); /* If the endpoint is not listening or if the number of associations * on the TCP-style socket exceed the max backlog, respond with an @@ -2204,9 +2208,11 @@ enum sctp_disposition sctp_sf_do_5_2_4_dupcook( * enough for the chunk header. Cookie length verification is * done later. */ - if (!sctp_chunk_length_valid(chunk, sizeof(struct sctp_chunkhdr))) - return sctp_sf_violation_chunklen(net, ep, asoc, type, arg, - commands); + if (!sctp_chunk_length_valid(chunk, sizeof(struct sctp_chunkhdr))) { + if (!sctp_vtag_verify(chunk, asoc)) + asoc = NULL; + return sctp_sf_violation_chunklen(net, ep, asoc, type, arg, commands); + } /* "Decode" the chunk. We have no optional parameters so we * are in good shape. -- cgit v1.2.3 From aa0f697e45286a6b5f0ceca9418acf54b9099d99 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Wed, 20 Oct 2021 07:42:45 -0400 Subject: sctp: add vtag check in sctp_sf_violation sctp_sf_violation() is called when processing HEARTBEAT_ACK chunk in cookie_wait state, and some other places are also using it. The vtag in the chunk's sctphdr should be verified, otherwise, as later in chunk length check, it may send abort with the existent asoc's vtag, which can be exploited by one to cook a malicious chunk to terminate a SCTP asoc. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Xin Long Acked-by: Marcelo Ricardo Leitner Signed-off-by: Jakub Kicinski --- net/sctp/sm_statefuns.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c index 96a069d725e9..36328ab88bdd 100644 --- a/net/sctp/sm_statefuns.c +++ b/net/sctp/sm_statefuns.c @@ -4669,6 +4669,9 @@ enum sctp_disposition sctp_sf_violation(struct net *net, { struct sctp_chunk *chunk = arg; + if (!sctp_vtag_verify(chunk, asoc)) + return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands); + /* Make sure that the chunk has a valid length. */ if (!sctp_chunk_length_valid(chunk, sizeof(struct sctp_chunkhdr))) return sctp_sf_violation_chunklen(net, ep, asoc, type, arg, -- cgit v1.2.3 From ef16b1734f0a176277b7bb9c71a6d977a6ef3998 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Wed, 20 Oct 2021 07:42:46 -0400 Subject: sctp: add vtag check in sctp_sf_do_8_5_1_E_sa sctp_sf_do_8_5_1_E_sa() is called when processing SHUTDOWN_ACK chunk in cookie_wait and cookie_echoed state. The vtag in the chunk's sctphdr should be verified, otherwise, as later in chunk length check, it may send abort with the existent asoc's vtag, which can be exploited by one to cook a malicious chunk to terminate a SCTP asoc. Note that when fails to verify the vtag from SHUTDOWN-ACK chunk, SHUTDOWN COMPLETE message will still be sent back to peer, but with the vtag from SHUTDOWN-ACK chunk, as said in 5) of rfc4960#section-8.4. While at it, also remove the unnecessary chunk length check from sctp_sf_shut_8_4_5(), as it's already done in both places where it calls sctp_sf_shut_8_4_5(). Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Xin Long Acked-by: Marcelo Ricardo Leitner Signed-off-by: Jakub Kicinski --- net/sctp/sm_statefuns.c | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c index 36328ab88bdd..a3545498a038 100644 --- a/net/sctp/sm_statefuns.c +++ b/net/sctp/sm_statefuns.c @@ -3803,12 +3803,6 @@ static enum sctp_disposition sctp_sf_shut_8_4_5( SCTP_INC_STATS(net, SCTP_MIB_OUTCTRLCHUNKS); - /* If the chunk length is invalid, we don't want to process - * the reset of the packet. - */ - if (!sctp_chunk_length_valid(chunk, sizeof(struct sctp_chunkhdr))) - return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands); - /* We need to discard the rest of the packet to prevent * potential boomming attacks from additional bundled chunks. * This is documented in SCTP Threats ID. @@ -3836,6 +3830,9 @@ enum sctp_disposition sctp_sf_do_8_5_1_E_sa(struct net *net, { struct sctp_chunk *chunk = arg; + if (!sctp_vtag_verify(chunk, asoc)) + asoc = NULL; + /* Make sure that the SHUTDOWN_ACK chunk has a valid length. */ if (!sctp_chunk_length_valid(chunk, sizeof(struct sctp_chunkhdr))) return sctp_sf_violation_chunklen(net, ep, asoc, type, arg, -- cgit v1.2.3 From 9d02831e517aa36ee6bdb453a0eb47bd49923fe3 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Wed, 20 Oct 2021 07:42:47 -0400 Subject: sctp: add vtag check in sctp_sf_ootb sctp_sf_ootb() is called when processing DATA chunk in closed state, and many other places are also using it. The vtag in the chunk's sctphdr should be verified, otherwise, as later in chunk length check, it may send abort with the existent asoc's vtag, which can be exploited by one to cook a malicious chunk to terminate a SCTP asoc. When fails to verify the vtag from the chunk, this patch sets asoc to NULL, so that the abort will be made with the vtag from the received chunk later. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Xin Long Acked-by: Marcelo Ricardo Leitner Signed-off-by: Jakub Kicinski --- net/sctp/sm_statefuns.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c index a3545498a038..fb3da4d8f4a3 100644 --- a/net/sctp/sm_statefuns.c +++ b/net/sctp/sm_statefuns.c @@ -3688,6 +3688,9 @@ enum sctp_disposition sctp_sf_ootb(struct net *net, SCTP_INC_STATS(net, SCTP_MIB_OUTOFBLUES); + if (asoc && !sctp_vtag_verify(chunk, asoc)) + asoc = NULL; + ch = (struct sctp_chunkhdr *)chunk->chunk_hdr; do { /* Report violation if the chunk is less then minimal */ -- cgit v1.2.3 From 1f83b835a3eaa5ae4bd825fb07182698bfc243ba Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Thu, 21 Oct 2021 16:02:47 +0200 Subject: fcnal-test: kill hanging ping/nettest binaries on cleanup On my box I see a bunch of ping/nettest processes hanging around after fcntal-test.sh is done. Clean those up before netns deletion. Signed-off-by: Florian Westphal Acked-by: David Ahern Link: https://lore.kernel.org/r/20211021140247.29691-1-fw@strlen.de Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/fcnal-test.sh | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tools/testing/selftests/net/fcnal-test.sh b/tools/testing/selftests/net/fcnal-test.sh index 8e67a252b672..3313566ce906 100755 --- a/tools/testing/selftests/net/fcnal-test.sh +++ b/tools/testing/selftests/net/fcnal-test.sh @@ -445,10 +445,13 @@ cleanup() ip -netns ${NSA} link set dev ${NSA_DEV} down ip -netns ${NSA} link del dev ${NSA_DEV} + ip netns pids ${NSA} | xargs kill 2>/dev/null ip netns del ${NSA} fi + ip netns pids ${NSB} | xargs kill 2>/dev/null ip netns del ${NSB} + ip netns pids ${NSC} | xargs kill 2>/dev/null ip netns del ${NSC} >/dev/null 2>&1 } -- cgit v1.2.3 From 8f04db78e4e36a5d4858ce841a3e9cc3d69bde36 Mon Sep 17 00:00:00 2001 From: Lorenz Bauer Date: Thu, 14 Oct 2021 15:25:51 +0100 Subject: bpf: Define bpf_jit_alloc_exec_limit for riscv JIT MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Expose the maximum amount of useable memory from the riscv JIT. Signed-off-by: Lorenz Bauer Signed-off-by: Alexei Starovoitov Acked-by: Luke Nelson Acked-by: Björn Töpel Link: https://lore.kernel.org/bpf/20211014142554.53120-2-lmb@cloudflare.com --- arch/riscv/net/bpf_jit_core.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/arch/riscv/net/bpf_jit_core.c b/arch/riscv/net/bpf_jit_core.c index fed86f42dfbe..0fee2cbaaf53 100644 --- a/arch/riscv/net/bpf_jit_core.c +++ b/arch/riscv/net/bpf_jit_core.c @@ -166,6 +166,11 @@ out: return prog; } +u64 bpf_jit_alloc_exec_limit(void) +{ + return BPF_JIT_REGION_SIZE; +} + void *bpf_jit_alloc_exec(unsigned long size) { return __vmalloc_node_range(size, PAGE_SIZE, BPF_JIT_REGION_START, -- cgit v1.2.3 From 5d63ae908242f028bd10860cba98450d11c079b8 Mon Sep 17 00:00:00 2001 From: Lorenz Bauer Date: Thu, 14 Oct 2021 15:25:52 +0100 Subject: bpf: Define bpf_jit_alloc_exec_limit for arm64 JIT Expose the maximum amount of useable memory from the arm64 JIT. Signed-off-by: Lorenz Bauer Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20211014142554.53120-3-lmb@cloudflare.com --- arch/arm64/net/bpf_jit_comp.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/arch/arm64/net/bpf_jit_comp.c b/arch/arm64/net/bpf_jit_comp.c index 41c23f474ea6..803e7773fa86 100644 --- a/arch/arm64/net/bpf_jit_comp.c +++ b/arch/arm64/net/bpf_jit_comp.c @@ -1136,6 +1136,11 @@ out: return prog; } +u64 bpf_jit_alloc_exec_limit(void) +{ + return BPF_JIT_REGION_SIZE; +} + void *bpf_jit_alloc_exec(unsigned long size) { return __vmalloc_node_range(size, PAGE_SIZE, BPF_JIT_REGION_START, -- cgit v1.2.3 From fadb7ff1a6c2c565af56b4aacdd086b067eed440 Mon Sep 17 00:00:00 2001 From: Lorenz Bauer Date: Thu, 14 Oct 2021 15:25:53 +0100 Subject: bpf: Prevent increasing bpf_jit_limit above max Restrict bpf_jit_limit to the maximum supported by the arch's JIT. Signed-off-by: Lorenz Bauer Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20211014142554.53120-4-lmb@cloudflare.com --- include/linux/filter.h | 1 + kernel/bpf/core.c | 4 +++- net/core/sysctl_net_core.c | 2 +- 3 files changed, 5 insertions(+), 2 deletions(-) diff --git a/include/linux/filter.h b/include/linux/filter.h index 4a93c12543ee..ef03ff34234d 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -1051,6 +1051,7 @@ extern int bpf_jit_enable; extern int bpf_jit_harden; extern int bpf_jit_kallsyms; extern long bpf_jit_limit; +extern long bpf_jit_limit_max; typedef void (*bpf_jit_fill_hole_t)(void *area, unsigned int size); diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c index d6b7dfdd8066..c1e7eb3f1876 100644 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c @@ -524,6 +524,7 @@ int bpf_jit_enable __read_mostly = IS_BUILTIN(CONFIG_BPF_JIT_DEFAULT_ON); int bpf_jit_kallsyms __read_mostly = IS_BUILTIN(CONFIG_BPF_JIT_DEFAULT_ON); int bpf_jit_harden __read_mostly; long bpf_jit_limit __read_mostly; +long bpf_jit_limit_max __read_mostly; static void bpf_prog_ksym_set_addr(struct bpf_prog *prog) @@ -817,7 +818,8 @@ u64 __weak bpf_jit_alloc_exec_limit(void) static int __init bpf_jit_charge_init(void) { /* Only used as heuristic here to derive limit. */ - bpf_jit_limit = min_t(u64, round_up(bpf_jit_alloc_exec_limit() >> 2, + bpf_jit_limit_max = bpf_jit_alloc_exec_limit(); + bpf_jit_limit = min_t(u64, round_up(bpf_jit_limit_max >> 2, PAGE_SIZE), LONG_MAX); return 0; } diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c index c8496c1142c9..5f88526ad61c 100644 --- a/net/core/sysctl_net_core.c +++ b/net/core/sysctl_net_core.c @@ -419,7 +419,7 @@ static struct ctl_table net_core_table[] = { .mode = 0600, .proc_handler = proc_dolongvec_minmax_bpf_restricted, .extra1 = &long_one, - .extra2 = &long_max, + .extra2 = &bpf_jit_limit_max, }, #endif { -- cgit v1.2.3 From fda7a38714f40b635f5502ec4855602c6b33dad2 Mon Sep 17 00:00:00 2001 From: Xu Kuohai Date: Tue, 19 Oct 2021 03:29:34 +0000 Subject: bpf: Fix error usage of map_fd and fdget() in generic_map_update_batch() 1. The ufd in generic_map_update_batch() should be read from batch.map_fd; 2. A call to fdget() should be followed by a symmetric call to fdput(). Fixes: aa2e93b8e58e ("bpf: Add generic support for update and delete batch ops") Signed-off-by: Xu Kuohai Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20211019032934.1210517-1-xukuohai@huawei.com --- kernel/bpf/syscall.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index 4e50c0bfdb7d..9dab49d3f394 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -1337,12 +1337,11 @@ int generic_map_update_batch(struct bpf_map *map, void __user *values = u64_to_user_ptr(attr->batch.values); void __user *keys = u64_to_user_ptr(attr->batch.keys); u32 value_size, cp, max_count; - int ufd = attr->map_fd; + int ufd = attr->batch.map_fd; void *key, *value; struct fd f; int err = 0; - f = fdget(ufd); if (attr->batch.elem_flags & ~BPF_F_LOCK) return -EINVAL; @@ -1367,6 +1366,7 @@ int generic_map_update_batch(struct bpf_map *map, return -ENOMEM; } + f = fdget(ufd); /* bpf_map_do_batch() guarantees ufd is valid */ for (cp = 0; cp < max_count; cp++) { err = -EFAULT; if (copy_from_user(key, keys + cp * map->key_size, @@ -1386,6 +1386,7 @@ int generic_map_update_batch(struct bpf_map *map, kvfree(value); kvfree(key); + fdput(f); return err; } -- cgit v1.2.3 From 04f8ef5643bcd8bcde25dfdebef998aea480b2ba Mon Sep 17 00:00:00 2001 From: Quanyang Wang Date: Mon, 18 Oct 2021 15:56:23 +0800 Subject: cgroup: Fix memory leak caused by missing cgroup_bpf_offline When enabling CONFIG_CGROUP_BPF, kmemleak can be observed by running the command as below: $mount -t cgroup -o none,name=foo cgroup cgroup/ $umount cgroup/ unreferenced object 0xc3585c40 (size 64): comm "mount", pid 425, jiffies 4294959825 (age 31.990s) hex dump (first 32 bytes): 01 00 00 80 84 8c 28 c0 00 00 00 00 00 00 00 00 ......(......... 00 00 00 00 00 00 00 00 6c 43 a0 c3 00 00 00 00 ........lC...... backtrace: [] cgroup_bpf_inherit+0x44/0x24c [<1f03679c>] cgroup_setup_root+0x174/0x37c [] cgroup1_get_tree+0x2c0/0x4a0 [] vfs_get_tree+0x24/0x108 [] path_mount+0x384/0x988 [] do_mount+0x64/0x9c [<208c9cfe>] sys_mount+0xfc/0x1f4 [<06dd06e0>] ret_fast_syscall+0x0/0x48 [] 0xbeb4daa8 This is because that since the commit 2b0d3d3e4fcf ("percpu_ref: reduce memory footprint of percpu_ref in fast path") root_cgrp->bpf.refcnt.data is allocated by the function percpu_ref_init in cgroup_bpf_inherit which is called by cgroup_setup_root when mounting, but not freed along with root_cgrp when umounting. Adding cgroup_bpf_offline which calls percpu_ref_kill to cgroup_kill_sb can free root_cgrp->bpf.refcnt.data in umount path. This patch also fixes the commit 4bfc0bb2c60e ("bpf: decouple the lifetime of cgroup_bpf from cgroup itself"). A cgroup_bpf_offline is needed to do a cleanup that frees the resources which are allocated by cgroup_bpf_inherit in cgroup_setup_root. And inside cgroup_bpf_offline, cgroup_get() is at the beginning and cgroup_put is at the end of cgroup_bpf_release which is called by cgroup_bpf_offline. So cgroup_bpf_offline can keep the balance of cgroup's refcount. Fixes: 2b0d3d3e4fcf ("percpu_ref: reduce memory footprint of percpu_ref in fast path") Fixes: 4bfc0bb2c60e ("bpf: decouple the lifetime of cgroup_bpf from cgroup itself") Signed-off-by: Quanyang Wang Signed-off-by: Alexei Starovoitov Acked-by: Roman Gushchin Acked-by: John Fastabend Link: https://lore.kernel.org/bpf/20211018075623.26884-1-quanyang.wang@windriver.com --- kernel/cgroup/cgroup.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c index 570b0c97392a..ea08f01d0111 100644 --- a/kernel/cgroup/cgroup.c +++ b/kernel/cgroup/cgroup.c @@ -2187,8 +2187,10 @@ static void cgroup_kill_sb(struct super_block *sb) * And don't kill the default root. */ if (list_empty(&root->cgrp.self.children) && root != &cgrp_dfl_root && - !percpu_ref_is_dying(&root->cgrp.self.refcnt)) + !percpu_ref_is_dying(&root->cgrp.self.refcnt)) { + cgroup_bpf_offline(&root->cgrp); percpu_ref_kill(&root->cgrp.self.refcnt); + } cgroup_put(&root->cgrp); kernfs_kill_sb(sb); } -- cgit v1.2.3 From d6423d2ec39cce2bfca418c81ef51792891576bc Mon Sep 17 00:00:00 2001 From: Yuiko Oshino Date: Fri, 22 Oct 2021 11:13:53 -0400 Subject: net: ethernet: microchip: lan743x: Fix driver crash when lan743x_pm_resume fails The driver needs to clean up and return when the initialization fails on resume. Fixes: 23f0703c125b ("lan743x: Add main source files for new lan743x driver") Signed-off-by: Yuiko Oshino Signed-off-by: David S. Miller --- drivers/net/ethernet/microchip/lan743x_main.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/ethernet/microchip/lan743x_main.c b/drivers/net/ethernet/microchip/lan743x_main.c index 9e8561cdc32a..3ac968b1c6ce 100644 --- a/drivers/net/ethernet/microchip/lan743x_main.c +++ b/drivers/net/ethernet/microchip/lan743x_main.c @@ -3019,6 +3019,8 @@ static int lan743x_pm_resume(struct device *dev) if (ret) { netif_err(adapter, probe, adapter->netdev, "lan743x_hardware_init returned %d\n", ret); + lan743x_pci_cleanup(adapter); + return ret; } /* open netdev when netdev is at running state while resume. -- cgit v1.2.3 From 95a359c9553342d36d408d35331ff0bfce75272f Mon Sep 17 00:00:00 2001 From: Yuiko Oshino Date: Fri, 22 Oct 2021 11:53:43 -0400 Subject: net: ethernet: microchip: lan743x: Fix dma allocation failure by using dma_set_mask_and_coherent The dma failure was reported in the raspberry pi github (issue #4117). https://github.com/raspberrypi/linux/issues/4117 The use of dma_set_mask_and_coherent fixes the issue. Tested on 32/64-bit raspberry pi CM4 and 64-bit ubuntu x86 PC with EVB-LAN7430. Fixes: 23f0703c125b ("lan743x: Add main source files for new lan743x driver") Signed-off-by: Yuiko Oshino Signed-off-by: David S. Miller --- drivers/net/ethernet/microchip/lan743x_main.c | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/drivers/net/ethernet/microchip/lan743x_main.c b/drivers/net/ethernet/microchip/lan743x_main.c index 3ac968b1c6ce..c4f32c7e0db1 100644 --- a/drivers/net/ethernet/microchip/lan743x_main.c +++ b/drivers/net/ethernet/microchip/lan743x_main.c @@ -1743,6 +1743,16 @@ static int lan743x_tx_ring_init(struct lan743x_tx *tx) ret = -EINVAL; goto cleanup; } + if (dma_set_mask_and_coherent(&tx->adapter->pdev->dev, + DMA_BIT_MASK(64))) { + if (dma_set_mask_and_coherent(&tx->adapter->pdev->dev, + DMA_BIT_MASK(32))) { + dev_warn(&tx->adapter->pdev->dev, + "lan743x_: No suitable DMA available\n"); + ret = -ENOMEM; + goto cleanup; + } + } ring_allocation_size = ALIGN(tx->ring_size * sizeof(struct lan743x_tx_descriptor), PAGE_SIZE); @@ -2276,6 +2286,16 @@ static int lan743x_rx_ring_init(struct lan743x_rx *rx) ret = -EINVAL; goto cleanup; } + if (dma_set_mask_and_coherent(&rx->adapter->pdev->dev, + DMA_BIT_MASK(64))) { + if (dma_set_mask_and_coherent(&rx->adapter->pdev->dev, + DMA_BIT_MASK(32))) { + dev_warn(&rx->adapter->pdev->dev, + "lan743x_: No suitable DMA available\n"); + ret = -ENOMEM; + goto cleanup; + } + } ring_allocation_size = ALIGN(rx->ring_size * sizeof(struct lan743x_rx_descriptor), PAGE_SIZE); -- cgit v1.2.3 From 87066fdd2e30fe9dd531125d95257c118a74617e Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 24 Oct 2021 09:48:33 -1000 Subject: Revert "mm/secretmem: use refcount_t instead of atomic_t" This reverts commit 110860541f443f950c1274f217a1a3e298670a33. Converting the "secretmem_users" counter to a refcount is incorrect, because a refcount is special in zero and can't just be incremented (but a count of users is not, and "no users" is actually perfectly valid and not a sign of a free'd resource). Reported-by: syzbot+75639e6a0331cd61d3e2@syzkaller.appspotmail.com Cc: Jordy Zomer Cc: Kees Cook , Cc: Jordy Zomer Cc: James Bottomley Cc: Mike Rapoport Cc: Andrew Morton Signed-off-by: Linus Torvalds --- mm/secretmem.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/mm/secretmem.c b/mm/secretmem.c index 1fea68b8d5a6..030f02ddc7c1 100644 --- a/mm/secretmem.c +++ b/mm/secretmem.c @@ -18,7 +18,6 @@ #include #include #include -#include #include @@ -41,11 +40,11 @@ module_param_named(enable, secretmem_enable, bool, 0400); MODULE_PARM_DESC(secretmem_enable, "Enable secretmem and memfd_secret(2) system call"); -static refcount_t secretmem_users; +static atomic_t secretmem_users; bool secretmem_active(void) { - return !!refcount_read(&secretmem_users); + return !!atomic_read(&secretmem_users); } static vm_fault_t secretmem_fault(struct vm_fault *vmf) @@ -104,7 +103,7 @@ static const struct vm_operations_struct secretmem_vm_ops = { static int secretmem_release(struct inode *inode, struct file *file) { - refcount_dec(&secretmem_users); + atomic_dec(&secretmem_users); return 0; } @@ -218,7 +217,7 @@ SYSCALL_DEFINE1(memfd_secret, unsigned int, flags) file->f_flags |= O_LARGEFILE; fd_install(fd, file); - refcount_inc(&secretmem_users); + atomic_inc(&secretmem_users); return fd; err_put_fd: -- cgit v1.2.3 From a0023bb9dd9bc439d44604eeec62426a990054cd Mon Sep 17 00:00:00 2001 From: Zheyu Ma Date: Fri, 22 Oct 2021 09:12:26 +0000 Subject: ata: sata_mv: Fix the error handling of mv_chip_id() mv_init_host() propagates the value returned by mv_chip_id() which in turn gets propagated by mv_pci_init_one() and hits local_pci_probe(). During the process of driver probing, the probe function should return < 0 for failure, otherwise, the kernel will treat value > 0 as success. Since this is a bug rather than a recoverable runtime error we should use dev_alert() instead of dev_err(). Signed-off-by: Zheyu Ma Signed-off-by: Damien Le Moal --- drivers/ata/sata_mv.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/ata/sata_mv.c b/drivers/ata/sata_mv.c index 9d86203e1e7a..c53633d47bfb 100644 --- a/drivers/ata/sata_mv.c +++ b/drivers/ata/sata_mv.c @@ -3896,8 +3896,8 @@ static int mv_chip_id(struct ata_host *host, unsigned int board_idx) break; default: - dev_err(host->dev, "BUG: invalid board index %u\n", board_idx); - return 1; + dev_alert(host->dev, "BUG: invalid board index %u\n", board_idx); + return -EINVAL; } hpriv->hp_flags = hp_flags; -- cgit v1.2.3 From 00568b8a6364e15009b345b462e927e0b9fc2bb9 Mon Sep 17 00:00:00 2001 From: LABBE Corentin Date: Thu, 21 Oct 2021 10:26:57 +0100 Subject: ARM: 9148/1: handle CONFIG_CPU_ENDIAN_BE32 in arch/arm/kernel/head.S MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit My intel-ixp42x-welltech-epbx100 no longer boot since 4.14. This is due to commit 463dbba4d189 ("ARM: 9104/2: Fix Keystone 2 kernel mapping regression") which forgot to handle CONFIG_CPU_ENDIAN_BE32 as possible BE config. Suggested-by: Krzysztof Hałasa Fixes: 463dbba4d189 ("ARM: 9104/2: Fix Keystone 2 kernel mapping regression") Signed-off-by: Corentin Labbe Signed-off-by: Russell King (Oracle) --- arch/arm/kernel/head.S | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm/kernel/head.S b/arch/arm/kernel/head.S index 29070eb8df7d..3fc7f9750ce4 100644 --- a/arch/arm/kernel/head.S +++ b/arch/arm/kernel/head.S @@ -253,7 +253,7 @@ __create_page_tables: add r0, r4, #KERNEL_OFFSET >> (SECTION_SHIFT - PMD_ORDER) ldr r6, =(_end - 1) adr_l r5, kernel_sec_start @ _pa(kernel_sec_start) -#ifdef CONFIG_CPU_ENDIAN_BE8 +#if defined CONFIG_CPU_ENDIAN_BE8 || defined CONFIG_CPU_ENDIAN_BE32 str r8, [r5, #4] @ Save physical start of kernel (BE) #else str r8, [r5] @ Save physical start of kernel (LE) @@ -266,7 +266,7 @@ __create_page_tables: bls 1b eor r3, r3, r7 @ Remove the MMU flags adr_l r5, kernel_sec_end @ _pa(kernel_sec_end) -#ifdef CONFIG_CPU_ENDIAN_BE8 +#if defined CONFIG_CPU_ENDIAN_BE8 || defined CONFIG_CPU_ENDIAN_BE32 str r3, [r5, #4] @ Save physical end of kernel (BE) #else str r3, [r5] @ Save physical end of kernel (LE) -- cgit v1.2.3 From c10a485c3de5ccbf1fff65a382cebcb2730c6b06 Mon Sep 17 00:00:00 2001 From: Andrew Lunn Date: Sun, 24 Oct 2021 21:48:02 +0200 Subject: phy: phy_ethtool_ksettings_get: Lock the phy for consistency The PHY structure should be locked while copying information out if it, otherwise there is no guarantee of self consistency. Without the lock the PHY state machine could be updating the structure. Fixes: 2d55173e71b0 ("phy: add generic function to support ksetting support") Signed-off-by: Andrew Lunn Signed-off-by: David S. Miller --- drivers/net/phy/phy.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/phy/phy.c b/drivers/net/phy/phy.c index f124a8a58bd4..8457b829667e 100644 --- a/drivers/net/phy/phy.c +++ b/drivers/net/phy/phy.c @@ -299,6 +299,7 @@ EXPORT_SYMBOL(phy_ethtool_ksettings_set); void phy_ethtool_ksettings_get(struct phy_device *phydev, struct ethtool_link_ksettings *cmd) { + mutex_lock(&phydev->lock); linkmode_copy(cmd->link_modes.supported, phydev->supported); linkmode_copy(cmd->link_modes.advertising, phydev->advertising); linkmode_copy(cmd->link_modes.lp_advertising, phydev->lp_advertising); @@ -317,6 +318,7 @@ void phy_ethtool_ksettings_get(struct phy_device *phydev, cmd->base.autoneg = phydev->autoneg; cmd->base.eth_tp_mdix_ctrl = phydev->mdix_ctrl; cmd->base.eth_tp_mdix = phydev->mdix; + mutex_unlock(&phydev->lock); } EXPORT_SYMBOL(phy_ethtool_ksettings_get); -- cgit v1.2.3 From 64cd92d5e8180c2ded3fdea76862de6f596ae2c9 Mon Sep 17 00:00:00 2001 From: Andrew Lunn Date: Sun, 24 Oct 2021 21:48:03 +0200 Subject: phy: phy_ethtool_ksettings_set: Move after phy_start_aneg This allows it to make use of a helper which assume the PHY is already locked. Fixes: 2d55173e71b0 ("phy: add generic function to support ksetting support") Signed-off-by: Andrew Lunn Signed-off-by: David S. Miller --- drivers/net/phy/phy.c | 106 +++++++++++++++++++++++++------------------------- 1 file changed, 53 insertions(+), 53 deletions(-) diff --git a/drivers/net/phy/phy.c b/drivers/net/phy/phy.c index 8457b829667e..ee584fa8b76d 100644 --- a/drivers/net/phy/phy.c +++ b/drivers/net/phy/phy.c @@ -243,59 +243,6 @@ static void phy_sanitize_settings(struct phy_device *phydev) } } -int phy_ethtool_ksettings_set(struct phy_device *phydev, - const struct ethtool_link_ksettings *cmd) -{ - __ETHTOOL_DECLARE_LINK_MODE_MASK(advertising); - u8 autoneg = cmd->base.autoneg; - u8 duplex = cmd->base.duplex; - u32 speed = cmd->base.speed; - - if (cmd->base.phy_address != phydev->mdio.addr) - return -EINVAL; - - linkmode_copy(advertising, cmd->link_modes.advertising); - - /* We make sure that we don't pass unsupported values in to the PHY */ - linkmode_and(advertising, advertising, phydev->supported); - - /* Verify the settings we care about. */ - if (autoneg != AUTONEG_ENABLE && autoneg != AUTONEG_DISABLE) - return -EINVAL; - - if (autoneg == AUTONEG_ENABLE && linkmode_empty(advertising)) - return -EINVAL; - - if (autoneg == AUTONEG_DISABLE && - ((speed != SPEED_1000 && - speed != SPEED_100 && - speed != SPEED_10) || - (duplex != DUPLEX_HALF && - duplex != DUPLEX_FULL))) - return -EINVAL; - - phydev->autoneg = autoneg; - - if (autoneg == AUTONEG_DISABLE) { - phydev->speed = speed; - phydev->duplex = duplex; - } - - linkmode_copy(phydev->advertising, advertising); - - linkmode_mod_bit(ETHTOOL_LINK_MODE_Autoneg_BIT, - phydev->advertising, autoneg == AUTONEG_ENABLE); - - phydev->master_slave_set = cmd->base.master_slave_cfg; - phydev->mdix_ctrl = cmd->base.eth_tp_mdix_ctrl; - - /* Restart the PHY */ - phy_start_aneg(phydev); - - return 0; -} -EXPORT_SYMBOL(phy_ethtool_ksettings_set); - void phy_ethtool_ksettings_get(struct phy_device *phydev, struct ethtool_link_ksettings *cmd) { @@ -802,6 +749,59 @@ static int phy_poll_aneg_done(struct phy_device *phydev) return ret < 0 ? ret : 0; } +int phy_ethtool_ksettings_set(struct phy_device *phydev, + const struct ethtool_link_ksettings *cmd) +{ + __ETHTOOL_DECLARE_LINK_MODE_MASK(advertising); + u8 autoneg = cmd->base.autoneg; + u8 duplex = cmd->base.duplex; + u32 speed = cmd->base.speed; + + if (cmd->base.phy_address != phydev->mdio.addr) + return -EINVAL; + + linkmode_copy(advertising, cmd->link_modes.advertising); + + /* We make sure that we don't pass unsupported values in to the PHY */ + linkmode_and(advertising, advertising, phydev->supported); + + /* Verify the settings we care about. */ + if (autoneg != AUTONEG_ENABLE && autoneg != AUTONEG_DISABLE) + return -EINVAL; + + if (autoneg == AUTONEG_ENABLE && linkmode_empty(advertising)) + return -EINVAL; + + if (autoneg == AUTONEG_DISABLE && + ((speed != SPEED_1000 && + speed != SPEED_100 && + speed != SPEED_10) || + (duplex != DUPLEX_HALF && + duplex != DUPLEX_FULL))) + return -EINVAL; + + phydev->autoneg = autoneg; + + if (autoneg == AUTONEG_DISABLE) { + phydev->speed = speed; + phydev->duplex = duplex; + } + + linkmode_copy(phydev->advertising, advertising); + + linkmode_mod_bit(ETHTOOL_LINK_MODE_Autoneg_BIT, + phydev->advertising, autoneg == AUTONEG_ENABLE); + + phydev->master_slave_set = cmd->base.master_slave_cfg; + phydev->mdix_ctrl = cmd->base.eth_tp_mdix_ctrl; + + /* Restart the PHY */ + phy_start_aneg(phydev); + + return 0; +} +EXPORT_SYMBOL(phy_ethtool_ksettings_set); + /** * phy_speed_down - set speed to lowest speed supported by both link partners * @phydev: the phy_device struct -- cgit v1.2.3 From 707293a56f95f8e7e0cfae008010c7933fb68973 Mon Sep 17 00:00:00 2001 From: Andrew Lunn Date: Sun, 24 Oct 2021 21:48:04 +0200 Subject: phy: phy_start_aneg: Add an unlocked version Split phy_start_aneg into a wrapper which takes the PHY lock, and a helper doing the real work. This will be needed when phy_ethtook_ksettings_set takes the lock. Fixes: 2d55173e71b0 ("phy: add generic function to support ksetting support") Signed-off-by: Andrew Lunn Signed-off-by: David S. Miller --- drivers/net/phy/phy.c | 30 ++++++++++++++++++++++++------ 1 file changed, 24 insertions(+), 6 deletions(-) diff --git a/drivers/net/phy/phy.c b/drivers/net/phy/phy.c index ee584fa8b76d..d845afab1af7 100644 --- a/drivers/net/phy/phy.c +++ b/drivers/net/phy/phy.c @@ -700,7 +700,7 @@ static int phy_check_link_status(struct phy_device *phydev) } /** - * phy_start_aneg - start auto-negotiation for this PHY device + * _phy_start_aneg - start auto-negotiation for this PHY device * @phydev: the phy_device struct * * Description: Sanitizes the settings (if we're not autonegotiating @@ -708,25 +708,43 @@ static int phy_check_link_status(struct phy_device *phydev) * If the PHYCONTROL Layer is operating, we change the state to * reflect the beginning of Auto-negotiation or forcing. */ -int phy_start_aneg(struct phy_device *phydev) +static int _phy_start_aneg(struct phy_device *phydev) { int err; + lockdep_assert_held(&phydev->lock); + if (!phydev->drv) return -EIO; - mutex_lock(&phydev->lock); - if (AUTONEG_DISABLE == phydev->autoneg) phy_sanitize_settings(phydev); err = phy_config_aneg(phydev); if (err < 0) - goto out_unlock; + return err; if (phy_is_started(phydev)) err = phy_check_link_status(phydev); -out_unlock: + + return err; +} + +/** + * phy_start_aneg - start auto-negotiation for this PHY device + * @phydev: the phy_device struct + * + * Description: Sanitizes the settings (if we're not autonegotiating + * them), and then calls the driver's config_aneg function. + * If the PHYCONTROL Layer is operating, we change the state to + * reflect the beginning of Auto-negotiation or forcing. + */ +int phy_start_aneg(struct phy_device *phydev) +{ + int err; + + mutex_lock(&phydev->lock); + err = _phy_start_aneg(phydev); mutex_unlock(&phydev->lock); return err; -- cgit v1.2.3 From af1a02aa23c37045e6adfcf074cf7dbac167a403 Mon Sep 17 00:00:00 2001 From: Andrew Lunn Date: Sun, 24 Oct 2021 21:48:05 +0200 Subject: phy: phy_ethtool_ksettings_set: Lock the PHY while changing settings There is a race condition where the PHY state machine can change members of the phydev structure at the same time userspace requests a change via ethtool. To prevent this, have phy_ethtool_ksettings_set take the PHY lock. Fixes: 2d55173e71b0 ("phy: add generic function to support ksetting support") Reported-by: Walter Stoll Suggested-by: Walter Stoll Tested-by: Walter Stoll Signed-off-by: Andrew Lunn Signed-off-by: David S. Miller --- drivers/net/phy/phy.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/net/phy/phy.c b/drivers/net/phy/phy.c index d845afab1af7..a3bfb156c83d 100644 --- a/drivers/net/phy/phy.c +++ b/drivers/net/phy/phy.c @@ -798,6 +798,7 @@ int phy_ethtool_ksettings_set(struct phy_device *phydev, duplex != DUPLEX_FULL))) return -EINVAL; + mutex_lock(&phydev->lock); phydev->autoneg = autoneg; if (autoneg == AUTONEG_DISABLE) { @@ -814,8 +815,9 @@ int phy_ethtool_ksettings_set(struct phy_device *phydev, phydev->mdix_ctrl = cmd->base.eth_tp_mdix_ctrl; /* Restart the PHY */ - phy_start_aneg(phydev); + _phy_start_aneg(phydev); + mutex_unlock(&phydev->lock); return 0; } EXPORT_SYMBOL(phy_ethtool_ksettings_set); -- cgit v1.2.3 From 09b1d5dc6ce1c9151777f6c4e128a59457704c97 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Mon, 25 Oct 2021 13:31:12 +0200 Subject: cfg80211: fix management registrations locking The management registrations locking was broken, the list was locked for each wdev, but cfg80211_mgmt_registrations_update() iterated it without holding all the correct spinlocks, causing list corruption. Rather than trying to fix it with fine-grained locking, just move the lock to the wiphy/rdev (still need the list on each wdev), we already need to hold the wdev lock to change it, so there's no contention on the lock in any case. This trivially fixes the bug since we hold one wdev's lock already, and now will hold the lock that protects all lists. Cc: stable@vger.kernel.org Reported-by: Jouni Malinen Fixes: 6cd536fe62ef ("cfg80211: change internal management frame registration API") Link: https://lore.kernel.org/r/20211025133111.5cf733eab0f4.I7b0abb0494ab712f74e2efcd24bb31ac33f7eee9@changeid Signed-off-by: Johannes Berg --- include/net/cfg80211.h | 2 -- net/wireless/core.c | 2 +- net/wireless/core.h | 2 ++ net/wireless/mlme.c | 26 ++++++++++++++------------ 4 files changed, 17 insertions(+), 15 deletions(-) diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 62dd8422e0dc..27336fc70467 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -5376,7 +5376,6 @@ static inline void wiphy_unlock(struct wiphy *wiphy) * netdev and may otherwise be used by driver read-only, will be update * by cfg80211 on change_interface * @mgmt_registrations: list of registrations for management frames - * @mgmt_registrations_lock: lock for the list * @mgmt_registrations_need_update: mgmt registrations were updated, * need to propagate the update to the driver * @mtx: mutex used to lock data in this struct, may be used by drivers @@ -5423,7 +5422,6 @@ struct wireless_dev { u32 identifier; struct list_head mgmt_registrations; - spinlock_t mgmt_registrations_lock; u8 mgmt_registrations_need_update:1; struct mutex mtx; diff --git a/net/wireless/core.c b/net/wireless/core.c index 03323121ca50..aaba847d79eb 100644 --- a/net/wireless/core.c +++ b/net/wireless/core.c @@ -524,6 +524,7 @@ use_default_name: INIT_WORK(&rdev->propagate_cac_done_wk, cfg80211_propagate_cac_done_wk); INIT_WORK(&rdev->mgmt_registrations_update_wk, cfg80211_mgmt_registrations_update_wk); + spin_lock_init(&rdev->mgmt_registrations_lock); #ifdef CONFIG_CFG80211_DEFAULT_PS rdev->wiphy.flags |= WIPHY_FLAG_PS_ON_BY_DEFAULT; @@ -1279,7 +1280,6 @@ void cfg80211_init_wdev(struct wireless_dev *wdev) INIT_LIST_HEAD(&wdev->event_list); spin_lock_init(&wdev->event_lock); INIT_LIST_HEAD(&wdev->mgmt_registrations); - spin_lock_init(&wdev->mgmt_registrations_lock); INIT_LIST_HEAD(&wdev->pmsr_list); spin_lock_init(&wdev->pmsr_lock); INIT_WORK(&wdev->pmsr_free_wk, cfg80211_pmsr_free_wk); diff --git a/net/wireless/core.h b/net/wireless/core.h index b35d0db12f1d..1720abf36f92 100644 --- a/net/wireless/core.h +++ b/net/wireless/core.h @@ -100,6 +100,8 @@ struct cfg80211_registered_device { struct work_struct propagate_cac_done_wk; struct work_struct mgmt_registrations_update_wk; + /* lock for all wdev lists */ + spinlock_t mgmt_registrations_lock; /* must be last because of the way we do wiphy_priv(), * and it should at least be aligned to NETDEV_ALIGN */ diff --git a/net/wireless/mlme.c b/net/wireless/mlme.c index 3aa69b375a10..783acd2c4211 100644 --- a/net/wireless/mlme.c +++ b/net/wireless/mlme.c @@ -452,9 +452,9 @@ static void cfg80211_mgmt_registrations_update(struct wireless_dev *wdev) lockdep_assert_held(&rdev->wiphy.mtx); - spin_lock_bh(&wdev->mgmt_registrations_lock); + spin_lock_bh(&rdev->mgmt_registrations_lock); if (!wdev->mgmt_registrations_need_update) { - spin_unlock_bh(&wdev->mgmt_registrations_lock); + spin_unlock_bh(&rdev->mgmt_registrations_lock); return; } @@ -479,7 +479,7 @@ static void cfg80211_mgmt_registrations_update(struct wireless_dev *wdev) rcu_read_unlock(); wdev->mgmt_registrations_need_update = 0; - spin_unlock_bh(&wdev->mgmt_registrations_lock); + spin_unlock_bh(&rdev->mgmt_registrations_lock); rdev_update_mgmt_frame_registrations(rdev, wdev, &upd); } @@ -503,6 +503,7 @@ int cfg80211_mlme_register_mgmt(struct wireless_dev *wdev, u32 snd_portid, int match_len, bool multicast_rx, struct netlink_ext_ack *extack) { + struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy); struct cfg80211_mgmt_registration *reg, *nreg; int err = 0; u16 mgmt_type; @@ -548,7 +549,7 @@ int cfg80211_mlme_register_mgmt(struct wireless_dev *wdev, u32 snd_portid, if (!nreg) return -ENOMEM; - spin_lock_bh(&wdev->mgmt_registrations_lock); + spin_lock_bh(&rdev->mgmt_registrations_lock); list_for_each_entry(reg, &wdev->mgmt_registrations, list) { int mlen = min(match_len, reg->match_len); @@ -583,7 +584,7 @@ int cfg80211_mlme_register_mgmt(struct wireless_dev *wdev, u32 snd_portid, list_add(&nreg->list, &wdev->mgmt_registrations); } wdev->mgmt_registrations_need_update = 1; - spin_unlock_bh(&wdev->mgmt_registrations_lock); + spin_unlock_bh(&rdev->mgmt_registrations_lock); cfg80211_mgmt_registrations_update(wdev); @@ -591,7 +592,7 @@ int cfg80211_mlme_register_mgmt(struct wireless_dev *wdev, u32 snd_portid, out: kfree(nreg); - spin_unlock_bh(&wdev->mgmt_registrations_lock); + spin_unlock_bh(&rdev->mgmt_registrations_lock); return err; } @@ -602,7 +603,7 @@ void cfg80211_mlme_unregister_socket(struct wireless_dev *wdev, u32 nlportid) struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy); struct cfg80211_mgmt_registration *reg, *tmp; - spin_lock_bh(&wdev->mgmt_registrations_lock); + spin_lock_bh(&rdev->mgmt_registrations_lock); list_for_each_entry_safe(reg, tmp, &wdev->mgmt_registrations, list) { if (reg->nlportid != nlportid) @@ -615,7 +616,7 @@ void cfg80211_mlme_unregister_socket(struct wireless_dev *wdev, u32 nlportid) schedule_work(&rdev->mgmt_registrations_update_wk); } - spin_unlock_bh(&wdev->mgmt_registrations_lock); + spin_unlock_bh(&rdev->mgmt_registrations_lock); if (nlportid && rdev->crit_proto_nlportid == nlportid) { rdev->crit_proto_nlportid = 0; @@ -628,15 +629,16 @@ void cfg80211_mlme_unregister_socket(struct wireless_dev *wdev, u32 nlportid) void cfg80211_mlme_purge_registrations(struct wireless_dev *wdev) { + struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy); struct cfg80211_mgmt_registration *reg, *tmp; - spin_lock_bh(&wdev->mgmt_registrations_lock); + spin_lock_bh(&rdev->mgmt_registrations_lock); list_for_each_entry_safe(reg, tmp, &wdev->mgmt_registrations, list) { list_del(®->list); kfree(reg); } wdev->mgmt_registrations_need_update = 1; - spin_unlock_bh(&wdev->mgmt_registrations_lock); + spin_unlock_bh(&rdev->mgmt_registrations_lock); cfg80211_mgmt_registrations_update(wdev); } @@ -784,7 +786,7 @@ bool cfg80211_rx_mgmt_khz(struct wireless_dev *wdev, int freq, int sig_dbm, data = buf + ieee80211_hdrlen(mgmt->frame_control); data_len = len - ieee80211_hdrlen(mgmt->frame_control); - spin_lock_bh(&wdev->mgmt_registrations_lock); + spin_lock_bh(&rdev->mgmt_registrations_lock); list_for_each_entry(reg, &wdev->mgmt_registrations, list) { if (reg->frame_type != ftype) @@ -808,7 +810,7 @@ bool cfg80211_rx_mgmt_khz(struct wireless_dev *wdev, int freq, int sig_dbm, break; } - spin_unlock_bh(&wdev->mgmt_registrations_lock); + spin_unlock_bh(&rdev->mgmt_registrations_lock); trace_cfg80211_return_bool(result); return result; -- cgit v1.2.3 From 689a0a9f505f7bffdefe6f17fddb41c8ab6344f6 Mon Sep 17 00:00:00 2001 From: Janusz Dziedzic Date: Sun, 24 Oct 2021 22:15:46 +0200 Subject: cfg80211: correct bridge/4addr mode check Without the patch we fail: $ sudo brctl addbr br0 $ sudo brctl addif br0 wlp1s0 $ sudo iw wlp1s0 set 4addr on command failed: Device or resource busy (-16) Last command failed but iface was already in 4addr mode. Fixes: ad4bb6f8883a ("cfg80211: disallow bridging managed/adhoc interfaces") Signed-off-by: Janusz Dziedzic Link: https://lore.kernel.org/r/20211024201546.614379-1-janusz.dziedzic@gmail.com [add fixes tag, fix indentation, edit commit log] Signed-off-by: Johannes Berg --- net/wireless/util.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/net/wireless/util.c b/net/wireless/util.c index 18dba3d7c638..a1a99a574984 100644 --- a/net/wireless/util.c +++ b/net/wireless/util.c @@ -1028,14 +1028,14 @@ int cfg80211_change_iface(struct cfg80211_registered_device *rdev, !(rdev->wiphy.interface_modes & (1 << ntype))) return -EOPNOTSUPP; - /* if it's part of a bridge, reject changing type to station/ibss */ - if (netif_is_bridge_port(dev) && - (ntype == NL80211_IFTYPE_ADHOC || - ntype == NL80211_IFTYPE_STATION || - ntype == NL80211_IFTYPE_P2P_CLIENT)) - return -EBUSY; - if (ntype != otype) { + /* if it's part of a bridge, reject changing type to station/ibss */ + if (netif_is_bridge_port(dev) && + (ntype == NL80211_IFTYPE_ADHOC || + ntype == NL80211_IFTYPE_STATION || + ntype == NL80211_IFTYPE_P2P_CLIENT)) + return -EBUSY; + dev->ieee80211_ptr->use_4addr = false; dev->ieee80211_ptr->mesh_id_up_len = 0; wdev_lock(dev->ieee80211_ptr); -- cgit v1.2.3 From ace19b992436a257d9a793672e57abc28fe83e2e Mon Sep 17 00:00:00 2001 From: Trevor Woerner Date: Sun, 24 Oct 2021 13:50:02 -0400 Subject: net: nxp: lpc_eth.c: avoid hang when bringing interface down A hard hang is observed whenever the ethernet interface is brought down. If the PHY is stopped before the LPC core block is reset, the SoC will hang. Comparing lpc_eth_close() and lpc_eth_open() I re-arranged the ordering of the functions calls in lpc_eth_close() to reset the hardware before stopping the PHY. Fixes: b7370112f519 ("lpc32xx: Added ethernet driver") Signed-off-by: Trevor Woerner Acked-by: Vladimir Zapolskiy Signed-off-by: David S. Miller --- drivers/net/ethernet/nxp/lpc_eth.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/nxp/lpc_eth.c b/drivers/net/ethernet/nxp/lpc_eth.c index d29fe562b3de..c910fa2f40a4 100644 --- a/drivers/net/ethernet/nxp/lpc_eth.c +++ b/drivers/net/ethernet/nxp/lpc_eth.c @@ -1015,9 +1015,6 @@ static int lpc_eth_close(struct net_device *ndev) napi_disable(&pldat->napi); netif_stop_queue(ndev); - if (ndev->phydev) - phy_stop(ndev->phydev); - spin_lock_irqsave(&pldat->lock, flags); __lpc_eth_reset(pldat); netif_carrier_off(ndev); @@ -1025,6 +1022,8 @@ static int lpc_eth_close(struct net_device *ndev) writel(0, LPC_ENET_MAC2(pldat->net_base)); spin_unlock_irqrestore(&pldat->lock, flags); + if (ndev->phydev) + phy_stop(ndev->phydev); clk_disable_unprepare(pldat->clk); return 0; -- cgit v1.2.3 From 64733956ebba7cc629856f4a6ee35a52bc9c023f Mon Sep 17 00:00:00 2001 From: Mark Zhang Date: Sun, 24 Oct 2021 09:08:20 +0300 Subject: RDMA/sa_query: Use strscpy_pad instead of memcpy to copy a string When copying the device name, the length of the data memcpy copied exceeds the length of the source buffer, which cause the KASAN issue below. Use strscpy_pad() instead. BUG: KASAN: slab-out-of-bounds in ib_nl_set_path_rec_attrs+0x136/0x320 [ib_core] Read of size 64 at addr ffff88811a10f5e0 by task rping/140263 CPU: 3 PID: 140263 Comm: rping Not tainted 5.15.0-rc1+ #1 Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS rel-1.13.0-0-gf21b5a4aeb02-prebuilt.qemu.org 04/01/2014 Call Trace: dump_stack_lvl+0x57/0x7d print_address_description.constprop.0+0x1d/0xa0 kasan_report+0xcb/0x110 kasan_check_range+0x13d/0x180 memcpy+0x20/0x60 ib_nl_set_path_rec_attrs+0x136/0x320 [ib_core] ib_nl_make_request+0x1c6/0x380 [ib_core] send_mad+0x20a/0x220 [ib_core] ib_sa_path_rec_get+0x3e3/0x800 [ib_core] cma_query_ib_route+0x29b/0x390 [rdma_cm] rdma_resolve_route+0x308/0x3e0 [rdma_cm] ucma_resolve_route+0xe1/0x150 [rdma_ucm] ucma_write+0x17b/0x1f0 [rdma_ucm] vfs_write+0x142/0x4d0 ksys_write+0x133/0x160 do_syscall_64+0x43/0x90 entry_SYSCALL_64_after_hwframe+0x44/0xae RIP: 0033:0x7f26499aa90f Code: 89 54 24 18 48 89 74 24 10 89 7c 24 08 e8 29 fd ff ff 48 8b 54 24 18 48 8b 74 24 10 41 89 c0 8b 7c 24 08 b8 01 00 00 00 0f 05 <48> 3d 00 f0 ff ff 77 31 44 89 c7 48 89 44 24 08 e8 5c fd ff ff 48 RSP: 002b:00007f26495f2dc0 EFLAGS: 00000293 ORIG_RAX: 0000000000000001 RAX: ffffffffffffffda RBX: 00000000000007d0 RCX: 00007f26499aa90f RDX: 0000000000000010 RSI: 00007f26495f2e00 RDI: 0000000000000003 RBP: 00005632a8315440 R08: 0000000000000000 R09: 0000000000000001 R10: 0000000000000000 R11: 0000000000000293 R12: 00007f26495f2e00 R13: 00005632a83154e0 R14: 00005632a8315440 R15: 00005632a830a810 Allocated by task 131419: kasan_save_stack+0x1b/0x40 __kasan_kmalloc+0x7c/0x90 proc_self_get_link+0x8b/0x100 pick_link+0x4f1/0x5c0 step_into+0x2eb/0x3d0 walk_component+0xc8/0x2c0 link_path_walk+0x3b8/0x580 path_openat+0x101/0x230 do_filp_open+0x12e/0x240 do_sys_openat2+0x115/0x280 __x64_sys_openat+0xce/0x140 do_syscall_64+0x43/0x90 entry_SYSCALL_64_after_hwframe+0x44/0xae Fixes: 2ca546b92a02 ("IB/sa: Route SA pathrecord query through netlink") Link: https://lore.kernel.org/r/72ede0f6dab61f7f23df9ac7a70666e07ef314b0.1635055496.git.leonro@nvidia.com Signed-off-by: Mark Zhang Reviewed-by: Mark Bloch Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/sa_query.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/core/sa_query.c b/drivers/infiniband/core/sa_query.c index a20b8108e160..c00f8e28aab7 100644 --- a/drivers/infiniband/core/sa_query.c +++ b/drivers/infiniband/core/sa_query.c @@ -706,8 +706,9 @@ static void ib_nl_set_path_rec_attrs(struct sk_buff *skb, /* Construct the family header first */ header = skb_put(skb, NLMSG_ALIGN(sizeof(*header))); - memcpy(header->device_name, dev_name(&query->port->agent->device->dev), - LS_DEVICE_NAME_MAX); + strscpy_pad(header->device_name, + dev_name(&query->port->agent->device->dev), + LS_DEVICE_NAME_MAX); header->port_num = query->port->port_num; if ((comp_mask & IB_SA_PATH_REC_REVERSIBLE) && -- cgit v1.2.3 From 0c57eeecc559ca6bc18b8c4e2808bc78dbe769b0 Mon Sep 17 00:00:00 2001 From: Michael Chan Date: Mon, 25 Oct 2021 05:05:28 -0400 Subject: net: Prevent infinite while loop in skb_tx_hash() Drivers call netdev_set_num_tc() and then netdev_set_tc_queue() to set the queue count and offset for each TC. So the queue count and offset for the TCs may be zero for a short period after dev->num_tc has been set. If a TX packet is being transmitted at this time in the code path netdev_pick_tx() -> skb_tx_hash(), skb_tx_hash() may see nonzero dev->num_tc but zero qcount for the TC. The while loop that keeps looping while hash >= qcount will not end. Fix it by checking the TC's qcount to be nonzero before using it. Fixes: eadec877ce9c ("net: Add support for subordinate traffic classes to netdev_pick_tx") Reviewed-by: Andy Gospodarek Signed-off-by: Michael Chan Signed-off-by: David S. Miller --- net/core/dev.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/net/core/dev.c b/net/core/dev.c index 7ee9fecd3aff..ea2366497697 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -3163,6 +3163,12 @@ static u16 skb_tx_hash(const struct net_device *dev, qoffset = sb_dev->tc_to_txq[tc].offset; qcount = sb_dev->tc_to_txq[tc].count; + if (unlikely(!qcount)) { + net_warn_ratelimited("%s: invalid qcount, qoffset %u for tc %u\n", + sb_dev->name, qoffset, tc); + qoffset = 0; + qcount = dev->real_num_tx_queues; + } } if (skb_rx_queue_recorded(skb)) { -- cgit v1.2.3 From 042b2046d0f05cf8124c26ff65dbb6148a4404fb Mon Sep 17 00:00:00 2001 From: Dongli Zhang Date: Fri, 22 Oct 2021 16:31:39 -0700 Subject: xen/netfront: stop tx queues during live migration The tx queues are not stopped during the live migration. As a result, the ndo_start_xmit() may access netfront_info->queues which is freed by talk_to_netback()->xennet_destroy_queues(). This patch is to netif_device_detach() at the beginning of xen-netfront resuming, and netif_device_attach() at the end of resuming. CPU A CPU B talk_to_netback() -> if (info->queues) xennet_destroy_queues(info); to free netfront_info->queues xennet_start_xmit() to access netfront_info->queues -> err = xennet_create_queues(info, &num_queues); The idea is borrowed from virtio-net. Cc: Joe Jin Signed-off-by: Dongli Zhang Signed-off-by: David S. Miller --- drivers/net/xen-netfront.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/net/xen-netfront.c b/drivers/net/xen-netfront.c index e31b98403f31..fc41ba95f81d 100644 --- a/drivers/net/xen-netfront.c +++ b/drivers/net/xen-netfront.c @@ -1730,6 +1730,10 @@ static int netfront_resume(struct xenbus_device *dev) dev_dbg(&dev->dev, "%s\n", dev->nodename); + netif_tx_lock_bh(info->netdev); + netif_device_detach(info->netdev); + netif_tx_unlock_bh(info->netdev); + xennet_disconnect_backend(info); return 0; } @@ -2349,6 +2353,10 @@ static int xennet_connect(struct net_device *dev) * domain a kick because we've probably just requeued some * packets. */ + netif_tx_lock_bh(np->netdev); + netif_device_attach(np->netdev); + netif_tx_unlock_bh(np->netdev); + netif_carrier_on(np->netdev); for (j = 0; j < num_queues; ++j) { queue = &np->queues[j]; -- cgit v1.2.3 From f7a1e76d0f608961cc2fc681f867a834f2746bce Mon Sep 17 00:00:00 2001 From: Xin Long Date: Mon, 25 Oct 2021 02:31:48 -0400 Subject: net-sysfs: initialize uid and gid before calling net_ns_get_ownership Currently in net_ns_get_ownership() it may not be able to set uid or gid if make_kuid or make_kgid returns an invalid value, and an uninit-value issue can be triggered by this. This patch is to fix it by initializing the uid and gid before calling net_ns_get_ownership(), as it does in kobject_get_ownership() Fixes: e6dee9f3893c ("net-sysfs: add netdev_change_owner()") Reported-by: Paolo Abeni Signed-off-by: Xin Long Acked-by: Christian Brauner Signed-off-by: David S. Miller --- net/core/net-sysfs.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c index f6197774048b..b2e49eb7001d 100644 --- a/net/core/net-sysfs.c +++ b/net/core/net-sysfs.c @@ -1973,9 +1973,9 @@ int netdev_register_kobject(struct net_device *ndev) int netdev_change_owner(struct net_device *ndev, const struct net *net_old, const struct net *net_new) { + kuid_t old_uid = GLOBAL_ROOT_UID, new_uid = GLOBAL_ROOT_UID; + kgid_t old_gid = GLOBAL_ROOT_GID, new_gid = GLOBAL_ROOT_GID; struct device *dev = &ndev->dev; - kuid_t old_uid, new_uid; - kgid_t old_gid, new_gid; int error; net_ns_get_ownership(net_old, &old_uid, &old_gid); -- cgit v1.2.3 From ac8a6eba2a117e0fdc04da62ab568d1b7ca4c8f6 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Mon, 25 Oct 2021 10:46:41 -0700 Subject: spi: Fix tegra20 build with CONFIG_PM=n once again Commit efafec27c565 ("spi: Fix tegra20 build with CONFIG_PM=n") already fixed the build without PM support once. There was an alternative fix by Guenter in commit 2bab94090b01 ("spi: tegra20-slink: Declare runtime suspend and resume functions conditionally"), and Mark then merged the two correctly in ffb1e76f4f32 ("Merge tag 'v5.15-rc2' into spi-5.15"). But for some inexplicable reason, Mark then merged things _again_ in commit 59c4e190b10c ("Merge tag 'v5.15-rc3' into spi-5.15"), and screwed things up at that point, and the __maybe_unused attribute on tegra_slink_runtime_resume() went missing. Reinstate it, so that alpha (and other architectures without PM support) builds cleanly again. Btw, this is another prime example of how random back-merges are not good. Just don't do them. Subsystem developers should not merge my tree in any normal circumstances. Both of those merge commits pointed to above are bad: even the one that got the merge result right doesn't even mention _why_ it was done, and the one that got it wrong is obviously broken. Reported-by: Guenter Roeck Cc: Mark Brown Signed-off-by: Linus Torvalds --- drivers/spi/spi-tegra20-slink.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/spi/spi-tegra20-slink.c b/drivers/spi/spi-tegra20-slink.c index 713292b0c71e..3226c4e1c7c0 100644 --- a/drivers/spi/spi-tegra20-slink.c +++ b/drivers/spi/spi-tegra20-slink.c @@ -1194,7 +1194,7 @@ static int __maybe_unused tegra_slink_runtime_suspend(struct device *dev) return 0; } -static int tegra_slink_runtime_resume(struct device *dev) +static int __maybe_unused tegra_slink_runtime_resume(struct device *dev) { struct spi_master *master = dev_get_drvdata(dev); struct tegra_slink_data *tspi = spi_master_get_devdata(master); -- cgit v1.2.3 From cb685432398122053f3e1dc6a1d68924e5b77be4 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Mon, 25 Oct 2021 19:16:34 +0100 Subject: secretmem: Prevent secretmem_users from wrapping to zero Commit 110860541f44 ("mm/secretmem: use refcount_t instead of atomic_t") attempted to fix the problem of secretmem_users wrapping to zero and allowing suspend once again. But it was reverted in commit 87066fdd2e30 ("Revert 'mm/secretmem: use refcount_t instead of atomic_t'") because of the problems it caused - a refcount_t was not semantically the right type to use. Instead prevent secretmem_users from wrapping to zero by forbidding new users if the number of users has wrapped from positive to negative. This stops a long way short of reaching the necessary 4 billion users where it wraps to zero again, so there's no need to be clever with special anti-wrap types or checking the return value from atomic_inc(). Signed-off-by: Matthew Wilcox (Oracle) Cc: Jordy Zomer Cc: Kees Cook , Cc: James Bottomley Cc: Mike Rapoport Cc: Andrew Morton Signed-off-by: Linus Torvalds --- mm/secretmem.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/mm/secretmem.c b/mm/secretmem.c index 030f02ddc7c1..c2dda408bb36 100644 --- a/mm/secretmem.c +++ b/mm/secretmem.c @@ -203,6 +203,8 @@ SYSCALL_DEFINE1(memfd_secret, unsigned int, flags) if (flags & ~(SECRETMEM_FLAGS_MASK | O_CLOEXEC)) return -EINVAL; + if (atomic_read(&secretmem_users) < 0) + return -ENFILE; fd = get_unused_fd_flags(flags & O_CLOEXEC); if (fd < 0) -- cgit v1.2.3 From 3906fe9bb7f1a2c8667ae54e967dc8690824f4ea Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Mon, 25 Oct 2021 11:30:31 -0700 Subject: Linux 5.15-rc7 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 91297670da8e..30c7c81d0437 100644 --- a/Makefile +++ b/Makefile @@ -2,7 +2,7 @@ VERSION = 5 PATCHLEVEL = 15 SUBLEVEL = 0 -EXTRAVERSION = -rc6 +EXTRAVERSION = -rc7 NAME = Opossums on Parade # *DOCUMENTATION* -- cgit v1.2.3 From e091b836a3baee4c8b1423a969589196b88a9e06 Mon Sep 17 00:00:00 2001 From: Amit Pundir Date: Thu, 14 Oct 2021 16:54:10 +0300 Subject: Revert "arm64: dts: qcom: sm8250: remove bus clock from the mdss node for sm8250 target" This reverts commit 001ce9785c0674d913531345e86222c965fc8bf4. This upstream commit broke AOSP (post Android 12 merge) build on RB5. The device either silently crashes into USB crash mode after android boot animation or we see a blank blue screen with following dpu errors in dmesg: [ T444] hw recovery is not complete for ctl:3 [ T444] [drm:dpu_encoder_phys_vid_prepare_for_kickoff:539] [dpu error]enc31 intf1 ctl 3 reset failure: -22 [ T444] [drm:dpu_encoder_phys_vid_wait_for_commit_done:513] [dpu error]vblank timeout [ T444] [drm:dpu_kms_wait_for_commit_done:454] [dpu error]wait for commit done returned -110 [ C7] [drm:dpu_encoder_frame_done_timeout:2127] [dpu error]enc31 frame done timeout [ T444] [drm:dpu_encoder_phys_vid_wait_for_commit_done:513] [dpu error]vblank timeout [ T444] [drm:dpu_kms_wait_for_commit_done:454] [dpu error]wait for commit done returned -110 Fixes: 001ce9785c06 ("arm64: dts: qcom: sm8250: remove bus clock from the mdss node for sm8250 target") Signed-off-by: Amit Pundir Signed-off-by: Dmitry Baryshkov Signed-off-by: Bjorn Andersson Link: https://lore.kernel.org/r/20211014135410.4136412-1-dmitry.baryshkov@linaro.org --- arch/arm64/boot/dts/qcom/sm8250.dtsi | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/arm64/boot/dts/qcom/sm8250.dtsi b/arch/arm64/boot/dts/qcom/sm8250.dtsi index 8c15d9fed08f..d12e4cbfc852 100644 --- a/arch/arm64/boot/dts/qcom/sm8250.dtsi +++ b/arch/arm64/boot/dts/qcom/sm8250.dtsi @@ -2590,9 +2590,10 @@ power-domains = <&dispcc MDSS_GDSC>; clocks = <&dispcc DISP_CC_MDSS_AHB_CLK>, + <&gcc GCC_DISP_HF_AXI_CLK>, <&gcc GCC_DISP_SF_AXI_CLK>, <&dispcc DISP_CC_MDSS_MDP_CLK>; - clock-names = "iface", "nrt_bus", "core"; + clock-names = "iface", "bus", "nrt_bus", "core"; assigned-clocks = <&dispcc DISP_CC_MDSS_MDP_CLK>; assigned-clock-rates = <460000000>; -- cgit v1.2.3 From 6a8b357278f5f8b9817147277ab8f12879dce8a8 Mon Sep 17 00:00:00 2001 From: Dave Ertman Date: Thu, 7 Oct 2021 08:40:31 -0700 Subject: ice: Respond to a NETDEV_UNREGISTER event for LAG When the PF is a member of a link aggregate, and the driver is removed, the process will hang unless we respond to the NETDEV_UNREGISTER event that is sent to the event_handler for LAG. Add a case statement for the ice_lag_event_handler to unlink the PF from the link aggregate. Also remove code that was incorrectly applying a dev_hold to peer_netdevs that were associated with the ice driver. Fixes: df006dd4b1dc ("ice: Add initial support framework for LAG") Signed-off-by: Dave Ertman Tested-by: Tony Brelinski Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ice/ice_lag.c | 18 ++++-------------- 1 file changed, 4 insertions(+), 14 deletions(-) diff --git a/drivers/net/ethernet/intel/ice/ice_lag.c b/drivers/net/ethernet/intel/ice/ice_lag.c index 37c18c66b5c7..e375ac849aec 100644 --- a/drivers/net/ethernet/intel/ice/ice_lag.c +++ b/drivers/net/ethernet/intel/ice/ice_lag.c @@ -100,9 +100,9 @@ static void ice_display_lag_info(struct ice_lag *lag) */ static void ice_lag_info_event(struct ice_lag *lag, void *ptr) { - struct net_device *event_netdev, *netdev_tmp; struct netdev_notifier_bonding_info *info; struct netdev_bonding_info *bonding_info; + struct net_device *event_netdev; const char *lag_netdev_name; event_netdev = netdev_notifier_info_to_dev(ptr); @@ -123,19 +123,6 @@ static void ice_lag_info_event(struct ice_lag *lag, void *ptr) goto lag_out; } - rcu_read_lock(); - for_each_netdev_in_bond_rcu(lag->upper_netdev, netdev_tmp) { - if (!netif_is_ice(netdev_tmp)) - continue; - - if (netdev_tmp && netdev_tmp != lag->netdev && - lag->peer_netdev != netdev_tmp) { - dev_hold(netdev_tmp); - lag->peer_netdev = netdev_tmp; - } - } - rcu_read_unlock(); - if (bonding_info->slave.state) ice_lag_set_backup(lag); else @@ -319,6 +306,9 @@ ice_lag_event_handler(struct notifier_block *notif_blk, unsigned long event, case NETDEV_BONDING_INFO: ice_lag_info_event(lag, ptr); break; + case NETDEV_UNREGISTER: + ice_lag_unlink(lag, ptr); + break; default: break; } -- cgit v1.2.3 From fd1b5beb177a8372cea2a0d014835491e4886f77 Mon Sep 17 00:00:00 2001 From: Yongxin Liu Date: Mon, 11 Oct 2021 15:02:16 +0800 Subject: ice: check whether PTP is initialized in ice_ptp_release() PTP is currently only supported on E810 devices, it is checked in ice_ptp_init(). However, there is no check in ice_ptp_release(). For other E800 series devices, ice_ptp_release() will be wrongly executed. Fix the following calltrace. INFO: trying to register non-static key. The code is fine but needs lockdep annotation, or maybe you didn't initialize this object before use? turning off the locking correctness validator. Workqueue: ice ice_service_task [ice] Call Trace: dump_stack_lvl+0x5b/0x82 dump_stack+0x10/0x12 register_lock_class+0x495/0x4a0 ? find_held_lock+0x3c/0xb0 __lock_acquire+0x71/0x1830 lock_acquire+0x1e6/0x330 ? ice_ptp_release+0x3c/0x1e0 [ice] ? _raw_spin_lock+0x19/0x70 ? ice_ptp_release+0x3c/0x1e0 [ice] _raw_spin_lock+0x38/0x70 ? ice_ptp_release+0x3c/0x1e0 [ice] ice_ptp_release+0x3c/0x1e0 [ice] ice_prepare_for_reset+0xcb/0xe0 [ice] ice_do_reset+0x38/0x110 [ice] ice_service_task+0x138/0xf10 [ice] ? __this_cpu_preempt_check+0x13/0x20 process_one_work+0x26a/0x650 worker_thread+0x3f/0x3b0 ? __kthread_parkme+0x51/0xb0 ? process_one_work+0x650/0x650 kthread+0x161/0x190 ? set_kthread_struct+0x40/0x40 ret_from_fork+0x1f/0x30 Fixes: 4dd0d5c33c3e ("ice: add lock around Tx timestamp tracker flush") Signed-off-by: Yongxin Liu Reviewed-by: Jacob Keller Tested-by: Gurucharan G Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ice/ice_ptp.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/ethernet/intel/ice/ice_ptp.c b/drivers/net/ethernet/intel/ice/ice_ptp.c index 80380aed8882..d1ef3d48a4b0 100644 --- a/drivers/net/ethernet/intel/ice/ice_ptp.c +++ b/drivers/net/ethernet/intel/ice/ice_ptp.c @@ -1571,6 +1571,9 @@ err_kworker: */ void ice_ptp_release(struct ice_pf *pf) { + if (!test_bit(ICE_FLAG_PTP, pf->flags)) + return; + /* Disable timestamping for both Tx and Rx */ ice_ptp_cfg_timestamp(pf, false); -- cgit v1.2.3 From 759635760a804b0d8ad0cc677b650f1544cae22f Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Sun, 24 Oct 2021 09:40:14 +0300 Subject: mlxsw: pci: Recycle received packet upon allocation failure When the driver fails to allocate a new Rx buffer, it passes an empty Rx descriptor (contains zero address and size) to the device and marks it as invalid by setting the skb pointer in the descriptor's metadata to NULL. After processing enough Rx descriptors, the driver will try to process the invalid descriptor, but will return immediately seeing that the skb pointer is NULL. Since the driver no longer passes new Rx descriptors to the device, the Rx queue will eventually become full and the device will start to drop packets. Fix this by recycling the received packet if allocation of the new packet failed. This means that allocation is no longer performed at the end of the Rx routine, but at the start, before tearing down the DMA mapping of the received packet. Remove the comment about the descriptor being zeroed as it is no longer correct. This is OK because we either use the descriptor as-is (when recycling) or overwrite its address and size fields with that of the newly allocated Rx buffer. The issue was discovered when a process ("perf") consumed too much memory and put the system under memory pressure. It can be reproduced by injecting slab allocation failures [1]. After the fix, the Rx queue no longer comes to a halt. [1] # echo 10 > /sys/kernel/debug/failslab/times # echo 1000 > /sys/kernel/debug/failslab/interval # echo 100 > /sys/kernel/debug/failslab/probability FAULT_INJECTION: forcing a failure. name failslab, interval 1000, probability 100, space 0, times 8 [...] Call Trace: dump_stack_lvl+0x34/0x44 should_fail.cold+0x32/0x37 should_failslab+0x5/0x10 kmem_cache_alloc_node+0x23/0x190 __alloc_skb+0x1f9/0x280 __netdev_alloc_skb+0x3a/0x150 mlxsw_pci_rdq_skb_alloc+0x24/0x90 mlxsw_pci_cq_tasklet+0x3dc/0x1200 tasklet_action_common.constprop.0+0x9f/0x100 __do_softirq+0xb5/0x252 irq_exit_rcu+0x7a/0xa0 common_interrupt+0x83/0xa0 asm_common_interrupt+0x1e/0x40 RIP: 0010:cpuidle_enter_state+0xc8/0x340 [...] mlxsw_spectrum2 0000:06:00.0: Failed to alloc skb for RDQ Fixes: eda6500a987a ("mlxsw: Add PCI bus implementation") Signed-off-by: Ido Schimmel Reviewed-by: Petr Machata Link: https://lore.kernel.org/r/20211024064014.1060919-1-idosch@idosch.org Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mellanox/mlxsw/pci.c | 25 ++++++++++++------------- 1 file changed, 12 insertions(+), 13 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/pci.c b/drivers/net/ethernet/mellanox/mlxsw/pci.c index 13b0259f7ea6..fcace73eae40 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/pci.c +++ b/drivers/net/ethernet/mellanox/mlxsw/pci.c @@ -353,13 +353,10 @@ static int mlxsw_pci_rdq_skb_alloc(struct mlxsw_pci *mlxsw_pci, struct sk_buff *skb; int err; - elem_info->u.rdq.skb = NULL; skb = netdev_alloc_skb_ip_align(NULL, buf_len); if (!skb) return -ENOMEM; - /* Assume that wqe was previously zeroed. */ - err = mlxsw_pci_wqe_frag_map(mlxsw_pci, wqe, 0, skb->data, buf_len, DMA_FROM_DEVICE); if (err) @@ -597,21 +594,26 @@ static void mlxsw_pci_cqe_rdq_handle(struct mlxsw_pci *mlxsw_pci, struct pci_dev *pdev = mlxsw_pci->pdev; struct mlxsw_pci_queue_elem_info *elem_info; struct mlxsw_rx_info rx_info = {}; - char *wqe; + char wqe[MLXSW_PCI_WQE_SIZE]; struct sk_buff *skb; u16 byte_count; int err; elem_info = mlxsw_pci_queue_elem_info_consumer_get(q); - skb = elem_info->u.sdq.skb; - if (!skb) - return; - wqe = elem_info->elem; - mlxsw_pci_wqe_frag_unmap(mlxsw_pci, wqe, 0, DMA_FROM_DEVICE); + skb = elem_info->u.rdq.skb; + memcpy(wqe, elem_info->elem, MLXSW_PCI_WQE_SIZE); if (q->consumer_counter++ != consumer_counter_limit) dev_dbg_ratelimited(&pdev->dev, "Consumer counter does not match limit in RDQ\n"); + err = mlxsw_pci_rdq_skb_alloc(mlxsw_pci, elem_info); + if (err) { + dev_err_ratelimited(&pdev->dev, "Failed to alloc skb for RDQ\n"); + goto out; + } + + mlxsw_pci_wqe_frag_unmap(mlxsw_pci, wqe, 0, DMA_FROM_DEVICE); + if (mlxsw_pci_cqe_lag_get(cqe_v, cqe)) { rx_info.is_lag = true; rx_info.u.lag_id = mlxsw_pci_cqe_lag_id_get(cqe_v, cqe); @@ -647,10 +649,7 @@ static void mlxsw_pci_cqe_rdq_handle(struct mlxsw_pci *mlxsw_pci, skb_put(skb, byte_count); mlxsw_core_skb_receive(mlxsw_pci->core, skb, &rx_info); - memset(wqe, 0, q->elem_size); - err = mlxsw_pci_rdq_skb_alloc(mlxsw_pci, elem_info); - if (err) - dev_dbg_ratelimited(&pdev->dev, "Failed to alloc skb for RDQ\n"); +out: /* Everything is set up, ring doorbell to pass elem to HW */ q->producer_counter++; mlxsw_pci_queue_doorbell_producer_ring(mlxsw_pci, q); -- cgit v1.2.3 From d81d0e41ed5fe7229a2c9a29d13bad288c7cf2d2 Mon Sep 17 00:00:00 2001 From: Thomas Perrot Date: Fri, 22 Oct 2021 16:21:04 +0200 Subject: spi: spl022: fix Microwire full duplex mode There are missing braces in the function that verify controller parameters, then an error is always returned when the parameter to select Microwire frames operation is used on devices allowing it. Signed-off-by: Thomas Perrot Link: https://lore.kernel.org/r/20211022142104.1386379-1-thomas.perrot@bootlin.com Signed-off-by: Mark Brown --- drivers/spi/spi-pl022.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/spi/spi-pl022.c b/drivers/spi/spi-pl022.c index feebda66f56e..e4484ace584e 100644 --- a/drivers/spi/spi-pl022.c +++ b/drivers/spi/spi-pl022.c @@ -1716,12 +1716,13 @@ static int verify_controller_parameters(struct pl022 *pl022, return -EINVAL; } } else { - if (chip_info->duplex != SSP_MICROWIRE_CHANNEL_FULL_DUPLEX) + if (chip_info->duplex != SSP_MICROWIRE_CHANNEL_FULL_DUPLEX) { dev_err(&pl022->adev->dev, "Microwire half duplex mode requested," " but this is only available in the" " ST version of PL022\n"); - return -EINVAL; + return -EINVAL; + } } } return 0; -- cgit v1.2.3 From 9122a70a6333705c0c35614ddc51c274ed1d3637 Mon Sep 17 00:00:00 2001 From: Cyril Strejc Date: Sun, 24 Oct 2021 22:14:25 +0200 Subject: net: multicast: calculate csum of looped-back and forwarded packets During a testing of an user-space application which transmits UDP multicast datagrams and utilizes multicast routing to send the UDP datagrams out of defined network interfaces, I've found a multicast router does not fill-in UDP checksum into locally produced, looped-back and forwarded UDP datagrams, if an original output NIC the datagrams are sent to has UDP TX checksum offload enabled. The datagrams are sent malformed out of the NIC the datagrams have been forwarded to. It is because: 1. If TX checksum offload is enabled on the output NIC, UDP checksum is not calculated by kernel and is not filled into skb data. 2. dev_loopback_xmit(), which is called solely by ip_mc_finish_output(), sets skb->ip_summed = CHECKSUM_UNNECESSARY unconditionally. 3. Since 35fc92a9 ("[NET]: Allow forwarding of ip_summed except CHECKSUM_COMPLETE"), the ip_summed value is preserved during forwarding. 4. If ip_summed != CHECKSUM_PARTIAL, checksum is not calculated during a packet egress. The minimum fix in dev_loopback_xmit(): 1. Preserves skb->ip_summed CHECKSUM_PARTIAL. This is the case when the original output NIC has TX checksum offload enabled. The effects are: a) If the forwarding destination interface supports TX checksum offloading, the NIC driver is responsible to fill-in the checksum. b) If the forwarding destination interface does NOT support TX checksum offloading, checksums are filled-in by kernel before skb is submitted to the NIC driver. c) For local delivery, checksum validation is skipped as in the case of CHECKSUM_UNNECESSARY, thanks to skb_csum_unnecessary(). 2. Translates ip_summed CHECKSUM_NONE to CHECKSUM_UNNECESSARY. It means, for CHECKSUM_NONE, the behavior is unmodified and is there to skip a looped-back packet local delivery checksum validation. Signed-off-by: Cyril Strejc Reviewed-by: Willem de Bruijn Signed-off-by: David S. Miller --- include/net/udp.h | 5 +++-- net/core/dev.c | 3 ++- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/include/net/udp.h b/include/net/udp.h index 360df454356c..909ecf447e0f 100644 --- a/include/net/udp.h +++ b/include/net/udp.h @@ -494,8 +494,9 @@ static inline struct sk_buff *udp_rcv_segment(struct sock *sk, * CHECKSUM_NONE in __udp_gso_segment. UDP GRO indeed builds partial * packets in udp_gro_complete_segment. As does UDP GSO, verified by * udp_send_skb. But when those packets are looped in dev_loopback_xmit - * their ip_summed is set to CHECKSUM_UNNECESSARY. Reset in this - * specific case, where PARTIAL is both correct and required. + * their ip_summed CHECKSUM_NONE is changed to CHECKSUM_UNNECESSARY. + * Reset in this specific case, where PARTIAL is both correct and + * required. */ if (skb->pkt_type == PACKET_LOOPBACK) skb->ip_summed = CHECKSUM_PARTIAL; diff --git a/net/core/dev.c b/net/core/dev.c index ea2366497697..eb3a366bf212 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -3912,7 +3912,8 @@ int dev_loopback_xmit(struct net *net, struct sock *sk, struct sk_buff *skb) skb_reset_mac_header(skb); __skb_pull(skb, skb_network_offset(skb)); skb->pkt_type = PACKET_LOOPBACK; - skb->ip_summed = CHECKSUM_UNNECESSARY; + if (skb->ip_summed == CHECKSUM_NONE) + skb->ip_summed = CHECKSUM_UNNECESSARY; WARN_ON(!skb_dst(skb)); skb_dst_force(skb); netif_rx_ni(skb); -- cgit v1.2.3 From 2195f2062e4cc93870da8e71c318ef98a1c51cef Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Mon, 25 Oct 2021 16:49:36 +0200 Subject: nfc: port100: fix using -ERRNO as command type mask During probing, the driver tries to get a list (mask) of supported command types in port100_get_command_type_mask() function. The value is u64 and 0 is treated as invalid mask (no commands supported). The function however returns also -ERRNO as u64 which will be interpret as valid command mask. Return 0 on every error case of port100_get_command_type_mask(), so the probing will stop. Cc: Fixes: 0347a6ab300a ("NFC: port100: Commands mechanism implementation") Signed-off-by: Krzysztof Kozlowski Signed-off-by: David S. Miller --- drivers/nfc/port100.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/nfc/port100.c b/drivers/nfc/port100.c index 517376c43b86..16ceb763594f 100644 --- a/drivers/nfc/port100.c +++ b/drivers/nfc/port100.c @@ -1006,11 +1006,11 @@ static u64 port100_get_command_type_mask(struct port100 *dev) skb = port100_alloc_skb(dev, 0); if (!skb) - return -ENOMEM; + return 0; resp = port100_send_cmd_sync(dev, PORT100_CMD_GET_COMMAND_TYPE, skb); if (IS_ERR(resp)) - return PTR_ERR(resp); + return 0; if (resp->len < 8) mask = 0; -- cgit v1.2.3 From fa40d9734a57bcbfa79a280189799f76c88f7bb0 Mon Sep 17 00:00:00 2001 From: Max VA Date: Mon, 25 Oct 2021 17:31:53 +0200 Subject: tipc: fix size validations for the MSG_CRYPTO type The function tipc_crypto_key_rcv is used to parse MSG_CRYPTO messages to receive keys from other nodes in the cluster in order to decrypt any further messages from them. This patch verifies that any supplied sizes in the message body are valid for the received message. Fixes: 1ef6f7c9390f ("tipc: add automatic session key exchange") Signed-off-by: Max VA Acked-by: Ying Xue Signed-off-by: Greg Kroah-Hartman Acked-by: Jon Maloy Signed-off-by: David S. Miller --- net/tipc/crypto.c | 32 +++++++++++++++++++++----------- 1 file changed, 21 insertions(+), 11 deletions(-) diff --git a/net/tipc/crypto.c b/net/tipc/crypto.c index c9391d38de85..dc60c32bb70d 100644 --- a/net/tipc/crypto.c +++ b/net/tipc/crypto.c @@ -2285,43 +2285,53 @@ static bool tipc_crypto_key_rcv(struct tipc_crypto *rx, struct tipc_msg *hdr) u16 key_gen = msg_key_gen(hdr); u16 size = msg_data_sz(hdr); u8 *data = msg_data(hdr); + unsigned int keylen; + + /* Verify whether the size can exist in the packet */ + if (unlikely(size < sizeof(struct tipc_aead_key) + TIPC_AEAD_KEYLEN_MIN)) { + pr_debug("%s: message data size is too small\n", rx->name); + goto exit; + } + + keylen = ntohl(*((__be32 *)(data + TIPC_AEAD_ALG_NAME))); + + /* Verify the supplied size values */ + if (unlikely(size != keylen + sizeof(struct tipc_aead_key) || + keylen > TIPC_AEAD_KEY_SIZE_MAX)) { + pr_debug("%s: invalid MSG_CRYPTO key size\n", rx->name); + goto exit; + } spin_lock(&rx->lock); if (unlikely(rx->skey || (key_gen == rx->key_gen && rx->key.keys))) { pr_err("%s: key existed <%p>, gen %d vs %d\n", rx->name, rx->skey, key_gen, rx->key_gen); - goto exit; + goto exit_unlock; } /* Allocate memory for the key */ skey = kmalloc(size, GFP_ATOMIC); if (unlikely(!skey)) { pr_err("%s: unable to allocate memory for skey\n", rx->name); - goto exit; + goto exit_unlock; } /* Copy key from msg data */ - skey->keylen = ntohl(*((__be32 *)(data + TIPC_AEAD_ALG_NAME))); + skey->keylen = keylen; memcpy(skey->alg_name, data, TIPC_AEAD_ALG_NAME); memcpy(skey->key, data + TIPC_AEAD_ALG_NAME + sizeof(__be32), skey->keylen); - /* Sanity check */ - if (unlikely(size != tipc_aead_key_size(skey))) { - kfree(skey); - skey = NULL; - goto exit; - } - rx->key_gen = key_gen; rx->skey_mode = msg_key_mode(hdr); rx->skey = skey; rx->nokey = 0; mb(); /* for nokey flag */ -exit: +exit_unlock: spin_unlock(&rx->lock); +exit: /* Schedule the key attaching on this crypto */ if (likely(skey && queue_delayed_work(tx->wq, &rx->work, 0))) return true; -- cgit v1.2.3 From 6f68cd634856f8ca93bafd623ba5357e0f648c68 Mon Sep 17 00:00:00 2001 From: Pavel Skripkin Date: Sun, 24 Oct 2021 16:13:56 +0300 Subject: net: batman-adv: fix error handling Syzbot reported ODEBUG warning in batadv_nc_mesh_free(). The problem was in wrong error handling in batadv_mesh_init(). Before this patch batadv_mesh_init() was calling batadv_mesh_free() in case of any batadv_*_init() calls failure. This approach may work well, when there is some kind of indicator, which can tell which parts of batadv are initialized; but there isn't any. All written above lead to cleaning up uninitialized fields. Even if we hide ODEBUG warning by initializing bat_priv->nc.work, syzbot was able to hit GPF in batadv_nc_purge_paths(), because hash pointer in still NULL. [1] To fix these bugs we can unwind batadv_*_init() calls one by one. It is good approach for 2 reasons: 1) It fixes bugs on error handling path 2) It improves the performance, since we won't call unneeded batadv_*_free() functions. So, this patch makes all batadv_*_init() clean up all allocated memory before returning with an error to no call correspoing batadv_*_free() and open-codes batadv_mesh_free() with proper order to avoid touching uninitialized fields. Link: https://lore.kernel.org/netdev/000000000000c87fbd05cef6bcb0@google.com/ [1] Reported-and-tested-by: syzbot+28b0702ada0bf7381f58@syzkaller.appspotmail.com Fixes: c6c8fea29769 ("net: Add batman-adv meshing protocol") Signed-off-by: Pavel Skripkin Acked-by: Sven Eckelmann Signed-off-by: David S. Miller --- net/batman-adv/bridge_loop_avoidance.c | 8 +++-- net/batman-adv/main.c | 56 ++++++++++++++++++++++++---------- net/batman-adv/network-coding.c | 4 ++- net/batman-adv/translation-table.c | 4 ++- 4 files changed, 52 insertions(+), 20 deletions(-) diff --git a/net/batman-adv/bridge_loop_avoidance.c b/net/batman-adv/bridge_loop_avoidance.c index 1669744304c5..17687848daec 100644 --- a/net/batman-adv/bridge_loop_avoidance.c +++ b/net/batman-adv/bridge_loop_avoidance.c @@ -1560,10 +1560,14 @@ int batadv_bla_init(struct batadv_priv *bat_priv) return 0; bat_priv->bla.claim_hash = batadv_hash_new(128); - bat_priv->bla.backbone_hash = batadv_hash_new(32); + if (!bat_priv->bla.claim_hash) + return -ENOMEM; - if (!bat_priv->bla.claim_hash || !bat_priv->bla.backbone_hash) + bat_priv->bla.backbone_hash = batadv_hash_new(32); + if (!bat_priv->bla.backbone_hash) { + batadv_hash_destroy(bat_priv->bla.claim_hash); return -ENOMEM; + } batadv_hash_set_lock_class(bat_priv->bla.claim_hash, &batadv_claim_hash_lock_class_key); diff --git a/net/batman-adv/main.c b/net/batman-adv/main.c index 3ddd66e4c29e..5207cd8d6ad8 100644 --- a/net/batman-adv/main.c +++ b/net/batman-adv/main.c @@ -190,29 +190,41 @@ int batadv_mesh_init(struct net_device *soft_iface) bat_priv->gw.generation = 0; - ret = batadv_v_mesh_init(bat_priv); - if (ret < 0) - goto err; - ret = batadv_originator_init(bat_priv); - if (ret < 0) - goto err; + if (ret < 0) { + atomic_set(&bat_priv->mesh_state, BATADV_MESH_DEACTIVATING); + goto err_orig; + } ret = batadv_tt_init(bat_priv); - if (ret < 0) - goto err; + if (ret < 0) { + atomic_set(&bat_priv->mesh_state, BATADV_MESH_DEACTIVATING); + goto err_tt; + } + + ret = batadv_v_mesh_init(bat_priv); + if (ret < 0) { + atomic_set(&bat_priv->mesh_state, BATADV_MESH_DEACTIVATING); + goto err_v; + } ret = batadv_bla_init(bat_priv); - if (ret < 0) - goto err; + if (ret < 0) { + atomic_set(&bat_priv->mesh_state, BATADV_MESH_DEACTIVATING); + goto err_bla; + } ret = batadv_dat_init(bat_priv); - if (ret < 0) - goto err; + if (ret < 0) { + atomic_set(&bat_priv->mesh_state, BATADV_MESH_DEACTIVATING); + goto err_dat; + } ret = batadv_nc_mesh_init(bat_priv); - if (ret < 0) - goto err; + if (ret < 0) { + atomic_set(&bat_priv->mesh_state, BATADV_MESH_DEACTIVATING); + goto err_nc; + } batadv_gw_init(bat_priv); batadv_mcast_init(bat_priv); @@ -222,8 +234,20 @@ int batadv_mesh_init(struct net_device *soft_iface) return 0; -err: - batadv_mesh_free(soft_iface); +err_nc: + batadv_dat_free(bat_priv); +err_dat: + batadv_bla_free(bat_priv); +err_bla: + batadv_v_mesh_free(bat_priv); +err_v: + batadv_tt_free(bat_priv); +err_tt: + batadv_originator_free(bat_priv); +err_orig: + batadv_purge_outstanding_packets(bat_priv, NULL); + atomic_set(&bat_priv->mesh_state, BATADV_MESH_INACTIVE); + return ret; } diff --git a/net/batman-adv/network-coding.c b/net/batman-adv/network-coding.c index 9f06132e007d..0a7f1d36a6a8 100644 --- a/net/batman-adv/network-coding.c +++ b/net/batman-adv/network-coding.c @@ -152,8 +152,10 @@ int batadv_nc_mesh_init(struct batadv_priv *bat_priv) &batadv_nc_coding_hash_lock_class_key); bat_priv->nc.decoding_hash = batadv_hash_new(128); - if (!bat_priv->nc.decoding_hash) + if (!bat_priv->nc.decoding_hash) { + batadv_hash_destroy(bat_priv->nc.coding_hash); goto err; + } batadv_hash_set_lock_class(bat_priv->nc.decoding_hash, &batadv_nc_decoding_hash_lock_class_key); diff --git a/net/batman-adv/translation-table.c b/net/batman-adv/translation-table.c index e0b3dace2020..4b7ad6684bc4 100644 --- a/net/batman-adv/translation-table.c +++ b/net/batman-adv/translation-table.c @@ -4162,8 +4162,10 @@ int batadv_tt_init(struct batadv_priv *bat_priv) return ret; ret = batadv_tt_global_init(bat_priv); - if (ret < 0) + if (ret < 0) { + batadv_tt_local_table_free(bat_priv); return ret; + } batadv_tvlv_handler_register(bat_priv, batadv_tt_tvlv_ogm_handler_v1, batadv_tt_tvlv_unicast_handler_v1, -- cgit v1.2.3 From db6c3c064f5d55fa9969f33eafca3cdbefbb3541 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Tue, 26 Oct 2021 12:36:17 +0200 Subject: net: lan78xx: fix division by zero in send path Add the missing endpoint max-packet sanity check to probe() to avoid division by zero in lan78xx_tx_bh() in case a malicious device has broken descriptors (or when doing descriptor fuzz testing). Note that USB core will reject URBs submitted for endpoints with zero wMaxPacketSize but that drivers doing packet-size calculations still need to handle this (cf. commit 2548288b4fb0 ("USB: Fix: Don't skip endpoint descriptors with maxpacket=0")). Fixes: 55d7de9de6c3 ("Microchip's LAN7800 family USB 2/3 to 10/100/1000 Ethernet device driver") Cc: stable@vger.kernel.org # 4.3 Cc: Woojung.Huh@microchip.com Signed-off-by: Johan Hovold Signed-off-by: David S. Miller --- drivers/net/usb/lan78xx.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/net/usb/lan78xx.c b/drivers/net/usb/lan78xx.c index 793f8fbe0069..63cd72c5f580 100644 --- a/drivers/net/usb/lan78xx.c +++ b/drivers/net/usb/lan78xx.c @@ -4122,6 +4122,12 @@ static int lan78xx_probe(struct usb_interface *intf, dev->maxpacket = usb_maxpacket(dev->udev, dev->pipe_out, 1); + /* Reject broken descriptors. */ + if (dev->maxpacket == 0) { + ret = -ENODEV; + goto out4; + } + /* driver requires remote-wakeup capability during autosuspend. */ intf->needs_remote_wakeup = 1; -- cgit v1.2.3 From 19fa0887c57d35b57bfb895e6caf8e72d9601ec0 Mon Sep 17 00:00:00 2001 From: Vadym Kochan Date: Tue, 26 Oct 2021 15:19:07 +0300 Subject: MAINTAINERS: please remove myself from the Prestera driver Signed-off-by: Vadym Kochan Signed-off-by: David S. Miller --- MAINTAINERS | 1 - 1 file changed, 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index 8d118d7957d2..086f5c89216b 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -11278,7 +11278,6 @@ F: Documentation/networking/device_drivers/ethernet/marvell/octeontx2.rst F: drivers/net/ethernet/marvell/octeontx2/af/ MARVELL PRESTERA ETHERNET SWITCH DRIVER -M: Vadym Kochan M: Taras Chornyi S: Supported W: https://github.com/Marvell-switching/switchdev-prestera -- cgit v1.2.3 From 05d5da3cb11c91c39e607066d3313a6ce621796a Mon Sep 17 00:00:00 2001 From: Christoph Niedermaier Date: Mon, 25 Oct 2021 09:37:06 +0200 Subject: MAINTAINERS: Add maintainers for DHCOM i.MX6 and DHCOM/DHCOR STM32MP1 Add maintainers for DH electronics DHCOM i.MX6 and DHCOM/DHCOR STM32MP1 boards. Signed-off-by: Christoph Niedermaier Cc: linux-arm-kernel@lists.infradead.org Cc: kernel@dh-electronics.com Cc: arnd@arndb.de Link: https://lore.kernel.org/r/20211025073706.2794-1-cniedermaier@dh-electronics.com' To: soc@kernel.org To: linux-kernel@vger.kernel.org Signed-off-by: Arnd Bergmann --- MAINTAINERS | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/MAINTAINERS b/MAINTAINERS index 8d118d7957d2..6fbcb632649f 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -5458,6 +5458,19 @@ F: include/net/devlink.h F: include/uapi/linux/devlink.h F: net/core/devlink.c +DH ELECTRONICS IMX6 DHCOM BOARD SUPPORT +M: Christoph Niedermaier +L: kernel@dh-electronics.com +S: Maintained +F: arch/arm/boot/dts/imx6*-dhcom-* + +DH ELECTRONICS STM32MP1 DHCOM/DHCOR BOARD SUPPORT +M: Marek Vasut +L: kernel@dh-electronics.com +S: Maintained +F: arch/arm/boot/dts/stm32mp1*-dhcom-* +F: arch/arm/boot/dts/stm32mp1*-dhcor-* + DIALOG SEMICONDUCTOR DRIVERS M: Support Opensource S: Supported -- cgit v1.2.3 From 6e7733ef0bb9372d5491168635f6ecba8ac3cb8a Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Thu, 7 Oct 2021 17:33:02 -0700 Subject: Revert "watchdog: iTCO_wdt: Account for rebooting on second timeout" MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This reverts commit cb011044e34c ("watchdog: iTCO_wdt: Account for rebooting on second timeout") and commit aec42642d91f ("watchdog: iTCO_wdt: Fix detection of SMI-off case") since those patches cause a regression on certain boards (https://bugzilla.kernel.org/show_bug.cgi?id=213809). While this revert may result in some boards to only reset after twice the configured timeout value, that is still better than a watchdog reset after half the configured value. Fixes: cb011044e34c ("watchdog: iTCO_wdt: Account for rebooting on second timeout") Fixes: aec42642d91f ("watchdog: iTCO_wdt: Fix detection of SMI-off case") Cc: Jan Kiszka Cc: Mantas Mikulėnas Reported-by: Javier S. Pedro Signed-off-by: Guenter Roeck Link: https://lore.kernel.org/r/20211008003302.1461733-1-linux@roeck-us.net Signed-off-by: Wim Van Sebroeck --- drivers/watchdog/iTCO_wdt.c | 12 +++--------- 1 file changed, 3 insertions(+), 9 deletions(-) diff --git a/drivers/watchdog/iTCO_wdt.c b/drivers/watchdog/iTCO_wdt.c index 643c6c2d0b72..ced2fc0deb8c 100644 --- a/drivers/watchdog/iTCO_wdt.c +++ b/drivers/watchdog/iTCO_wdt.c @@ -71,8 +71,6 @@ #define TCOBASE(p) ((p)->tco_res->start) /* SMI Control and Enable Register */ #define SMI_EN(p) ((p)->smi_res->start) -#define TCO_EN (1 << 13) -#define GBL_SMI_EN (1 << 0) #define TCO_RLD(p) (TCOBASE(p) + 0x00) /* TCO Timer Reload/Curr. Value */ #define TCOv1_TMR(p) (TCOBASE(p) + 0x01) /* TCOv1 Timer Initial Value*/ @@ -357,12 +355,8 @@ static int iTCO_wdt_set_timeout(struct watchdog_device *wd_dev, unsigned int t) tmrval = seconds_to_ticks(p, t); - /* - * If TCO SMIs are off, the timer counts down twice before rebooting. - * Otherwise, the BIOS generally reboots when the SMI triggers. - */ - if (p->smi_res && - (inl(SMI_EN(p)) & (TCO_EN | GBL_SMI_EN)) != (TCO_EN | GBL_SMI_EN)) + /* For TCO v1 the timer counts down twice before rebooting */ + if (p->iTCO_version == 1) tmrval /= 2; /* from the specs: */ @@ -527,7 +521,7 @@ static int iTCO_wdt_probe(struct platform_device *pdev) * Disables TCO logic generating an SMI# */ val32 = inl(SMI_EN(p)); - val32 &= ~TCO_EN; /* Turn off SMI clearing watchdog */ + val32 &= 0xffffdfff; /* Turn off SMI clearing watchdog */ outl(val32, SMI_EN(p)); } -- cgit v1.2.3 From f31afb502c3151855df3ed40f5974c7884c10d14 Mon Sep 17 00:00:00 2001 From: Jamie Iles Date: Fri, 3 Sep 2021 12:21:01 +0100 Subject: watchdog: sbsa: only use 32-bit accessors SBSA says of the generic watchdog: All registers are 32 bits in size and should be accessed using 32-bit reads and writes. If an access size other than 32 bits is used then the results are IMPLEMENTATION DEFINED. and for qemu, the implementation will only allow 32-bit accesses resulting in a synchronous external abort when configuring the watchdog. Use lo_hi_* accessors rather than a readq/writeq. Fixes: abd3ac7902fb ("watchdog: sbsa: Support architecture version 1") Signed-off-by: Jamie Iles Reviewed-by: Guenter Roeck Reviewed-by: Shaokun Zhang Link: https://lore.kernel.org/r/20210903112101.493552-1-quic_jiles@quicinc.com Signed-off-by: Guenter Roeck Signed-off-by: Wim Van Sebroeck --- drivers/watchdog/sbsa_gwdt.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/watchdog/sbsa_gwdt.c b/drivers/watchdog/sbsa_gwdt.c index ee9ff38929eb..6f4319bdbc50 100644 --- a/drivers/watchdog/sbsa_gwdt.c +++ b/drivers/watchdog/sbsa_gwdt.c @@ -130,7 +130,7 @@ static u64 sbsa_gwdt_reg_read(struct sbsa_gwdt *gwdt) if (gwdt->version == 0) return readl(gwdt->control_base + SBSA_GWDT_WOR); else - return readq(gwdt->control_base + SBSA_GWDT_WOR); + return lo_hi_readq(gwdt->control_base + SBSA_GWDT_WOR); } static void sbsa_gwdt_reg_write(u64 val, struct sbsa_gwdt *gwdt) @@ -138,7 +138,7 @@ static void sbsa_gwdt_reg_write(u64 val, struct sbsa_gwdt *gwdt) if (gwdt->version == 0) writel((u32)val, gwdt->control_base + SBSA_GWDT_WOR); else - writeq(val, gwdt->control_base + SBSA_GWDT_WOR); + lo_hi_writeq(val, gwdt->control_base + SBSA_GWDT_WOR); } /* -- cgit v1.2.3 From bcc3e704f1b73f7f8674364f0bda6593bbc9fd2b Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Fri, 17 Sep 2021 11:20:23 +0200 Subject: watchdog: sbsa: drop unneeded MODULE_ALIAS The MODULE_DEVICE_TABLE already creates proper alias for platform driver. Having another MODULE_ALIAS causes the alias to be duplicated. Signed-off-by: Krzysztof Kozlowski Reviewed-by: Guenter Roeck Link: https://lore.kernel.org/r/20210917092024.19323-1-krzysztof.kozlowski@canonical.com Signed-off-by: Guenter Roeck Signed-off-by: Wim Van Sebroeck --- drivers/watchdog/sbsa_gwdt.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/watchdog/sbsa_gwdt.c b/drivers/watchdog/sbsa_gwdt.c index 6f4319bdbc50..9791c74aebd4 100644 --- a/drivers/watchdog/sbsa_gwdt.c +++ b/drivers/watchdog/sbsa_gwdt.c @@ -411,4 +411,3 @@ MODULE_AUTHOR("Suravee Suthikulpanit "); MODULE_AUTHOR("Al Stone "); MODULE_AUTHOR("Timur Tabi "); MODULE_LICENSE("GPL v2"); -MODULE_ALIAS("platform:" DRV_NAME); -- cgit v1.2.3 From abd1c6adc16d1b82109a29b570fc545b775049d5 Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Fri, 10 Sep 2021 21:29:25 -0700 Subject: watchdog: ixp4xx_wdt: Fix address space warning sparse reports the following address space warning. drivers/watchdog/ixp4xx_wdt.c:122:20: sparse: incorrect type in assignment (different address spaces) drivers/watchdog/ixp4xx_wdt.c:122:20: sparse: expected void [noderef] __iomem *base drivers/watchdog/ixp4xx_wdt.c:122:20: sparse: got void *platform_data Add a typecast to solve the problem. Fixes: 21a0a29d16c6 ("watchdog: ixp4xx: Rewrite driver to use core") Cc: Linus Walleij Reviewed-by: Linus Walleij Signed-off-by: Guenter Roeck Link: https://lore.kernel.org/r/20210911042925.556889-1-linux@roeck-us.net Signed-off-by: Wim Van Sebroeck --- drivers/watchdog/ixp4xx_wdt.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/watchdog/ixp4xx_wdt.c b/drivers/watchdog/ixp4xx_wdt.c index 2693ffb24ac7..31b03fa71341 100644 --- a/drivers/watchdog/ixp4xx_wdt.c +++ b/drivers/watchdog/ixp4xx_wdt.c @@ -119,7 +119,7 @@ static int ixp4xx_wdt_probe(struct platform_device *pdev) iwdt = devm_kzalloc(dev, sizeof(*iwdt), GFP_KERNEL); if (!iwdt) return -ENOMEM; - iwdt->base = dev->platform_data; + iwdt->base = (void __iomem *)dev->platform_data; /* * Retrieve rate from a fixed clock from the device tree if -- cgit v1.2.3 From cd004d8299f1dc6cfa6a4eea8f94cb45eaedf070 Mon Sep 17 00:00:00 2001 From: Walter Stoll Date: Thu, 14 Oct 2021 12:22:29 +0200 Subject: watchdog: Fix OMAP watchdog early handling TI's implementation does not service the watchdog even if the kernel command line parameter omap_wdt.early_enable is set to 1. This patch fixes the issue. Signed-off-by: Walter Stoll Reviewed-by: Guenter Roeck Link: https://lore.kernel.org/r/88a8fe5229cd68fa0f1fd22f5d66666c1b7057a0.camel@duagon.com Signed-off-by: Guenter Roeck Signed-off-by: Wim Van Sebroeck --- drivers/watchdog/omap_wdt.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/watchdog/omap_wdt.c b/drivers/watchdog/omap_wdt.c index 1616f93dfad7..74d785b2b478 100644 --- a/drivers/watchdog/omap_wdt.c +++ b/drivers/watchdog/omap_wdt.c @@ -268,8 +268,12 @@ static int omap_wdt_probe(struct platform_device *pdev) wdev->wdog.bootstatus = WDIOF_CARDRESET; } - if (!early_enable) + if (early_enable) { + omap_wdt_start(&wdev->wdog); + set_bit(WDOG_HW_RUNNING, &wdev->wdog.status); + } else { omap_wdt_disable(wdev); + } ret = watchdog_register_device(&wdev->wdog); if (ret) { -- cgit v1.2.3 From cd9733f5d75c94a32544d6ce5be47e14194cf137 Mon Sep 17 00:00:00 2001 From: Liu Jian Date: Tue, 12 Oct 2021 13:20:19 +0800 Subject: tcp_bpf: Fix one concurrency problem in the tcp_bpf_send_verdict function With two Msgs, msgA and msgB and a user doing nonblocking sendmsg calls (or multiple cores) on a single socket 'sk' we could get the following flow. msgA, sk msgB, sk ----------- --------------- tcp_bpf_sendmsg() lock(sk) psock = sk->psock tcp_bpf_sendmsg() lock(sk) ... blocking tcp_bpf_send_verdict if (psock->eval == NONE) psock->eval = sk_psock_msg_verdict .. < handle SK_REDIRECT case > release_sock(sk) < lock dropped so grab here > ret = tcp_bpf_sendmsg_redir psock = sk->psock tcp_bpf_send_verdict lock_sock(sk) ... blocking on B if (psock->eval == NONE) <- boom. psock->eval will have msgA state The problem here is we dropped the lock on msgA and grabbed it with msgB. Now we have old state in psock and importantly psock->eval has not been cleared. So msgB will run whatever action was done on A and the verdict program may never see it. Fixes: 604326b41a6fb ("bpf, sockmap: convert to generic sk_msg interface") Signed-off-by: Liu Jian Signed-off-by: Alexei Starovoitov Acked-by: John Fastabend Link: https://lore.kernel.org/bpf/20211012052019.184398-1-liujian56@huawei.com --- net/ipv4/tcp_bpf.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/net/ipv4/tcp_bpf.c b/net/ipv4/tcp_bpf.c index d3e9386b493e..9d068153c316 100644 --- a/net/ipv4/tcp_bpf.c +++ b/net/ipv4/tcp_bpf.c @@ -232,6 +232,7 @@ static int tcp_bpf_send_verdict(struct sock *sk, struct sk_psock *psock, bool cork = false, enospc = sk_msg_full(msg); struct sock *sk_redir; u32 tosend, delta = 0; + u32 eval = __SK_NONE; int ret; more_data: @@ -275,13 +276,24 @@ more_data: case __SK_REDIRECT: sk_redir = psock->sk_redir; sk_msg_apply_bytes(psock, tosend); + if (!psock->apply_bytes) { + /* Clean up before releasing the sock lock. */ + eval = psock->eval; + psock->eval = __SK_NONE; + psock->sk_redir = NULL; + } if (psock->cork) { cork = true; psock->cork = NULL; } sk_msg_return(sk, msg, tosend); release_sock(sk); + ret = tcp_bpf_sendmsg_redir(sk_redir, msg, tosend, flags); + + if (eval == __SK_REDIRECT) + sock_put(sk_redir); + lock_sock(sk); if (unlikely(ret < 0)) { int free = sk_msg_free_nocharge(sk, msg); -- cgit v1.2.3 From 7b50ecfcc6cdfe87488576bc3ed443dc8d083b90 Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Fri, 8 Oct 2021 13:33:03 -0700 Subject: net: Rename ->stream_memory_read to ->sock_is_readable The proto ops ->stream_memory_read() is currently only used by TCP to check whether psock queue is empty or not. We need to rename it before reusing it for non-TCP protocols, and adjust the exsiting users accordingly. Signed-off-by: Cong Wang Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20211008203306.37525-2-xiyou.wangcong@gmail.com --- include/net/sock.h | 8 +++++++- include/net/tls.h | 2 +- net/ipv4/tcp.c | 5 +---- net/ipv4/tcp_bpf.c | 4 ++-- net/tls/tls_main.c | 4 ++-- net/tls/tls_sw.c | 2 +- 6 files changed, 14 insertions(+), 11 deletions(-) diff --git a/include/net/sock.h b/include/net/sock.h index ea6fbc88c8f9..463f390d90b3 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -1208,7 +1208,7 @@ struct proto { #endif bool (*stream_memory_free)(const struct sock *sk, int wake); - bool (*stream_memory_read)(const struct sock *sk); + bool (*sock_is_readable)(struct sock *sk); /* Memory pressure */ void (*enter_memory_pressure)(struct sock *sk); void (*leave_memory_pressure)(struct sock *sk); @@ -2820,4 +2820,10 @@ void sock_set_sndtimeo(struct sock *sk, s64 secs); int sock_bind_add(struct sock *sk, struct sockaddr *addr, int addr_len); +static inline bool sk_is_readable(struct sock *sk) +{ + if (sk->sk_prot->sock_is_readable) + return sk->sk_prot->sock_is_readable(sk); + return false; +} #endif /* _SOCK_H */ diff --git a/include/net/tls.h b/include/net/tls.h index be4b3e1cac46..01d2e3744393 100644 --- a/include/net/tls.h +++ b/include/net/tls.h @@ -375,7 +375,7 @@ void tls_sw_release_resources_rx(struct sock *sk); void tls_sw_free_ctx_rx(struct tls_context *tls_ctx); int tls_sw_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int nonblock, int flags, int *addr_len); -bool tls_sw_stream_read(const struct sock *sk); +bool tls_sw_sock_is_readable(struct sock *sk); ssize_t tls_sw_splice_read(struct socket *sock, loff_t *ppos, struct pipe_inode_info *pipe, size_t len, unsigned int flags); diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index e8b48df73c85..f5c336f8b0c8 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -486,10 +486,7 @@ static bool tcp_stream_is_readable(struct sock *sk, int target) { if (tcp_epollin_ready(sk, target)) return true; - - if (sk->sk_prot->stream_memory_read) - return sk->sk_prot->stream_memory_read(sk); - return false; + return sk_is_readable(sk); } /* diff --git a/net/ipv4/tcp_bpf.c b/net/ipv4/tcp_bpf.c index 9d068153c316..7e71e9e278cb 100644 --- a/net/ipv4/tcp_bpf.c +++ b/net/ipv4/tcp_bpf.c @@ -150,7 +150,7 @@ int tcp_bpf_sendmsg_redir(struct sock *sk, struct sk_msg *msg, EXPORT_SYMBOL_GPL(tcp_bpf_sendmsg_redir); #ifdef CONFIG_BPF_SYSCALL -static bool tcp_bpf_stream_read(const struct sock *sk) +static bool tcp_bpf_sock_is_readable(struct sock *sk) { struct sk_psock *psock; bool empty = true; @@ -491,7 +491,7 @@ static void tcp_bpf_rebuild_protos(struct proto prot[TCP_BPF_NUM_CFGS], prot[TCP_BPF_BASE].unhash = sock_map_unhash; prot[TCP_BPF_BASE].close = sock_map_close; prot[TCP_BPF_BASE].recvmsg = tcp_bpf_recvmsg; - prot[TCP_BPF_BASE].stream_memory_read = tcp_bpf_stream_read; + prot[TCP_BPF_BASE].sock_is_readable = tcp_bpf_sock_is_readable; prot[TCP_BPF_TX] = prot[TCP_BPF_BASE]; prot[TCP_BPF_TX].sendmsg = tcp_bpf_sendmsg; diff --git a/net/tls/tls_main.c b/net/tls/tls_main.c index fde56ff49163..9ab81db8a654 100644 --- a/net/tls/tls_main.c +++ b/net/tls/tls_main.c @@ -681,12 +681,12 @@ static void build_protos(struct proto prot[TLS_NUM_CONFIG][TLS_NUM_CONFIG], prot[TLS_BASE][TLS_SW] = prot[TLS_BASE][TLS_BASE]; prot[TLS_BASE][TLS_SW].recvmsg = tls_sw_recvmsg; - prot[TLS_BASE][TLS_SW].stream_memory_read = tls_sw_stream_read; + prot[TLS_BASE][TLS_SW].sock_is_readable = tls_sw_sock_is_readable; prot[TLS_BASE][TLS_SW].close = tls_sk_proto_close; prot[TLS_SW][TLS_SW] = prot[TLS_SW][TLS_BASE]; prot[TLS_SW][TLS_SW].recvmsg = tls_sw_recvmsg; - prot[TLS_SW][TLS_SW].stream_memory_read = tls_sw_stream_read; + prot[TLS_SW][TLS_SW].sock_is_readable = tls_sw_sock_is_readable; prot[TLS_SW][TLS_SW].close = tls_sk_proto_close; #ifdef CONFIG_TLS_DEVICE diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c index 4feb95e34b64..d5d09bd817b7 100644 --- a/net/tls/tls_sw.c +++ b/net/tls/tls_sw.c @@ -2026,7 +2026,7 @@ splice_read_end: return copied ? : err; } -bool tls_sw_stream_read(const struct sock *sk) +bool tls_sw_sock_is_readable(struct sock *sk) { struct tls_context *tls_ctx = tls_get_ctx(sk); struct tls_sw_context_rx *ctx = tls_sw_ctx_rx(tls_ctx); -- cgit v1.2.3 From fb4e0a5e73d4bb5ab69b7905abd2ec3b580e9b59 Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Fri, 8 Oct 2021 13:33:04 -0700 Subject: skmsg: Extract and reuse sk_msg_is_readable() tcp_bpf_sock_is_readable() is pretty much generic, we can extract it and reuse it for non-TCP sockets. Signed-off-by: Cong Wang Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20211008203306.37525-3-xiyou.wangcong@gmail.com --- include/linux/skmsg.h | 1 + net/core/skmsg.c | 14 ++++++++++++++ net/ipv4/tcp_bpf.c | 15 +-------------- 3 files changed, 16 insertions(+), 14 deletions(-) diff --git a/include/linux/skmsg.h b/include/linux/skmsg.h index 14ab0c0bc924..1ce9a9eb223b 100644 --- a/include/linux/skmsg.h +++ b/include/linux/skmsg.h @@ -128,6 +128,7 @@ int sk_msg_memcopy_from_iter(struct sock *sk, struct iov_iter *from, struct sk_msg *msg, u32 bytes); int sk_msg_recvmsg(struct sock *sk, struct sk_psock *psock, struct msghdr *msg, int len, int flags); +bool sk_msg_is_readable(struct sock *sk); static inline void sk_msg_check_to_free(struct sk_msg *msg, u32 i, u32 bytes) { diff --git a/net/core/skmsg.c b/net/core/skmsg.c index 2d6249b28928..a86ef7e844f8 100644 --- a/net/core/skmsg.c +++ b/net/core/skmsg.c @@ -474,6 +474,20 @@ int sk_msg_recvmsg(struct sock *sk, struct sk_psock *psock, struct msghdr *msg, } EXPORT_SYMBOL_GPL(sk_msg_recvmsg); +bool sk_msg_is_readable(struct sock *sk) +{ + struct sk_psock *psock; + bool empty = true; + + rcu_read_lock(); + psock = sk_psock(sk); + if (likely(psock)) + empty = list_empty(&psock->ingress_msg); + rcu_read_unlock(); + return !empty; +} +EXPORT_SYMBOL_GPL(sk_msg_is_readable); + static struct sk_msg *sk_psock_create_ingress_msg(struct sock *sk, struct sk_buff *skb) { diff --git a/net/ipv4/tcp_bpf.c b/net/ipv4/tcp_bpf.c index 7e71e9e278cb..5f4d6f45d87f 100644 --- a/net/ipv4/tcp_bpf.c +++ b/net/ipv4/tcp_bpf.c @@ -150,19 +150,6 @@ int tcp_bpf_sendmsg_redir(struct sock *sk, struct sk_msg *msg, EXPORT_SYMBOL_GPL(tcp_bpf_sendmsg_redir); #ifdef CONFIG_BPF_SYSCALL -static bool tcp_bpf_sock_is_readable(struct sock *sk) -{ - struct sk_psock *psock; - bool empty = true; - - rcu_read_lock(); - psock = sk_psock(sk); - if (likely(psock)) - empty = list_empty(&psock->ingress_msg); - rcu_read_unlock(); - return !empty; -} - static int tcp_msg_wait_data(struct sock *sk, struct sk_psock *psock, long timeo) { @@ -491,7 +478,7 @@ static void tcp_bpf_rebuild_protos(struct proto prot[TCP_BPF_NUM_CFGS], prot[TCP_BPF_BASE].unhash = sock_map_unhash; prot[TCP_BPF_BASE].close = sock_map_close; prot[TCP_BPF_BASE].recvmsg = tcp_bpf_recvmsg; - prot[TCP_BPF_BASE].sock_is_readable = tcp_bpf_sock_is_readable; + prot[TCP_BPF_BASE].sock_is_readable = sk_msg_is_readable; prot[TCP_BPF_TX] = prot[TCP_BPF_BASE]; prot[TCP_BPF_TX].sendmsg = tcp_bpf_sendmsg; -- cgit v1.2.3 From af493388950b6ea3a86f860cfaffab137e024fc8 Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Fri, 8 Oct 2021 13:33:05 -0700 Subject: net: Implement ->sock_is_readable() for UDP and AF_UNIX Yucong noticed we can't poll() sockets in sockmap even when they are the destination sockets of redirections. This is because we never poll any psock queues in ->poll(), except for TCP. With ->sock_is_readable() now we can overwrite >sock_is_readable(), invoke and implement it for both UDP and AF_UNIX sockets. Reported-by: Yucong Sun Signed-off-by: Cong Wang Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20211008203306.37525-4-xiyou.wangcong@gmail.com --- net/ipv4/udp.c | 3 +++ net/ipv4/udp_bpf.c | 1 + net/unix/af_unix.c | 4 ++++ net/unix/unix_bpf.c | 2 ++ 4 files changed, 10 insertions(+) diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 8536b2a7210b..2fffcf2b54f3 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -2867,6 +2867,9 @@ __poll_t udp_poll(struct file *file, struct socket *sock, poll_table *wait) !(sk->sk_shutdown & RCV_SHUTDOWN) && first_packet_length(sk) == -1) mask &= ~(EPOLLIN | EPOLLRDNORM); + /* psock ingress_msg queue should not contain any bad checksum frames */ + if (sk_is_readable(sk)) + mask |= EPOLLIN | EPOLLRDNORM; return mask; } diff --git a/net/ipv4/udp_bpf.c b/net/ipv4/udp_bpf.c index 7a1d5f473878..bbe6569c9ad3 100644 --- a/net/ipv4/udp_bpf.c +++ b/net/ipv4/udp_bpf.c @@ -114,6 +114,7 @@ static void udp_bpf_rebuild_protos(struct proto *prot, const struct proto *base) *prot = *base; prot->close = sock_map_close; prot->recvmsg = udp_bpf_recvmsg; + prot->sock_is_readable = sk_msg_is_readable; } static void udp_bpf_check_v6_needs_rebuild(struct proto *ops) diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index 89f9e85ae970..78e08e82c08c 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -3052,6 +3052,8 @@ static __poll_t unix_poll(struct file *file, struct socket *sock, poll_table *wa /* readable? */ if (!skb_queue_empty_lockless(&sk->sk_receive_queue)) mask |= EPOLLIN | EPOLLRDNORM; + if (sk_is_readable(sk)) + mask |= EPOLLIN | EPOLLRDNORM; /* Connection-based need to check for termination and startup */ if ((sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) && @@ -3091,6 +3093,8 @@ static __poll_t unix_dgram_poll(struct file *file, struct socket *sock, /* readable? */ if (!skb_queue_empty_lockless(&sk->sk_receive_queue)) mask |= EPOLLIN | EPOLLRDNORM; + if (sk_is_readable(sk)) + mask |= EPOLLIN | EPOLLRDNORM; /* Connection-based need to check for termination and startup */ if (sk->sk_type == SOCK_SEQPACKET) { diff --git a/net/unix/unix_bpf.c b/net/unix/unix_bpf.c index b927e2baae50..452376c6f419 100644 --- a/net/unix/unix_bpf.c +++ b/net/unix/unix_bpf.c @@ -102,6 +102,7 @@ static void unix_dgram_bpf_rebuild_protos(struct proto *prot, const struct proto *prot = *base; prot->close = sock_map_close; prot->recvmsg = unix_bpf_recvmsg; + prot->sock_is_readable = sk_msg_is_readable; } static void unix_stream_bpf_rebuild_protos(struct proto *prot, @@ -110,6 +111,7 @@ static void unix_stream_bpf_rebuild_protos(struct proto *prot, *prot = *base; prot->close = sock_map_close; prot->recvmsg = unix_bpf_recvmsg; + prot->sock_is_readable = sk_msg_is_readable; prot->unhash = sock_map_unhash; } -- cgit v1.2.3 From 67b821502dbd6c9b23715da79cb9b37fa7d969dc Mon Sep 17 00:00:00 2001 From: Yucong Sun Date: Fri, 8 Oct 2021 13:33:06 -0700 Subject: selftests/bpf: Use recv_timeout() instead of retries We use non-blocking sockets in those tests, retrying for EAGAIN is ugly because there is no upper bound for the packet arrival time, at least in theory. After we fix poll() on sockmap sockets, now we can switch to select()+recv(). Signed-off-by: Yucong Sun Signed-off-by: Cong Wang Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20211008203306.37525-5-xiyou.wangcong@gmail.com --- .../selftests/bpf/prog_tests/sockmap_listen.c | 75 ++++++---------------- 1 file changed, 20 insertions(+), 55 deletions(-) diff --git a/tools/testing/selftests/bpf/prog_tests/sockmap_listen.c b/tools/testing/selftests/bpf/prog_tests/sockmap_listen.c index 5c5979046523..d88bb65b74cc 100644 --- a/tools/testing/selftests/bpf/prog_tests/sockmap_listen.c +++ b/tools/testing/selftests/bpf/prog_tests/sockmap_listen.c @@ -949,7 +949,6 @@ static void redir_to_connected(int family, int sotype, int sock_mapfd, int err, n; u32 key; char b; - int retries = 100; zero_verdict_count(verd_mapfd); @@ -1002,17 +1001,11 @@ static void redir_to_connected(int family, int sotype, int sock_mapfd, goto close_peer1; if (pass != 1) FAIL("%s: want pass count 1, have %d", log_prefix, pass); -again: - n = read(c0, &b, 1); - if (n < 0) { - if (errno == EAGAIN && retries--) { - usleep(1000); - goto again; - } - FAIL_ERRNO("%s: read", log_prefix); - } + n = recv_timeout(c0, &b, 1, 0, IO_TIMEOUT_SEC); + if (n < 0) + FAIL_ERRNO("%s: recv_timeout", log_prefix); if (n == 0) - FAIL("%s: incomplete read", log_prefix); + FAIL("%s: incomplete recv", log_prefix); close_peer1: xclose(p1); @@ -1571,7 +1564,6 @@ static void unix_redir_to_connected(int sotype, int sock_mapfd, const char *log_prefix = redir_mode_str(mode); int c0, c1, p0, p1; unsigned int pass; - int retries = 100; int err, n; int sfd[2]; u32 key; @@ -1606,17 +1598,11 @@ static void unix_redir_to_connected(int sotype, int sock_mapfd, if (pass != 1) FAIL("%s: want pass count 1, have %d", log_prefix, pass); -again: - n = read(mode == REDIR_INGRESS ? p0 : c0, &b, 1); - if (n < 0) { - if (errno == EAGAIN && retries--) { - usleep(1000); - goto again; - } - FAIL_ERRNO("%s: read", log_prefix); - } + n = recv_timeout(mode == REDIR_INGRESS ? p0 : c0, &b, 1, 0, IO_TIMEOUT_SEC); + if (n < 0) + FAIL_ERRNO("%s: recv_timeout", log_prefix); if (n == 0) - FAIL("%s: incomplete read", log_prefix); + FAIL("%s: incomplete recv", log_prefix); close: xclose(c1); @@ -1748,7 +1734,6 @@ static void udp_redir_to_connected(int family, int sock_mapfd, int verd_mapfd, const char *log_prefix = redir_mode_str(mode); int c0, c1, p0, p1; unsigned int pass; - int retries = 100; int err, n; u32 key; char b; @@ -1781,17 +1766,11 @@ static void udp_redir_to_connected(int family, int sock_mapfd, int verd_mapfd, if (pass != 1) FAIL("%s: want pass count 1, have %d", log_prefix, pass); -again: - n = read(mode == REDIR_INGRESS ? p0 : c0, &b, 1); - if (n < 0) { - if (errno == EAGAIN && retries--) { - usleep(1000); - goto again; - } - FAIL_ERRNO("%s: read", log_prefix); - } + n = recv_timeout(mode == REDIR_INGRESS ? p0 : c0, &b, 1, 0, IO_TIMEOUT_SEC); + if (n < 0) + FAIL_ERRNO("%s: recv_timeout", log_prefix); if (n == 0) - FAIL("%s: incomplete read", log_prefix); + FAIL("%s: incomplete recv", log_prefix); close_cli1: xclose(c1); @@ -1841,7 +1820,6 @@ static void inet_unix_redir_to_connected(int family, int type, int sock_mapfd, const char *log_prefix = redir_mode_str(mode); int c0, c1, p0, p1; unsigned int pass; - int retries = 100; int err, n; int sfd[2]; u32 key; @@ -1876,17 +1854,11 @@ static void inet_unix_redir_to_connected(int family, int type, int sock_mapfd, if (pass != 1) FAIL("%s: want pass count 1, have %d", log_prefix, pass); -again: - n = read(mode == REDIR_INGRESS ? p0 : c0, &b, 1); - if (n < 0) { - if (errno == EAGAIN && retries--) { - usleep(1000); - goto again; - } - FAIL_ERRNO("%s: read", log_prefix); - } + n = recv_timeout(mode == REDIR_INGRESS ? p0 : c0, &b, 1, 0, IO_TIMEOUT_SEC); + if (n < 0) + FAIL_ERRNO("%s: recv_timeout", log_prefix); if (n == 0) - FAIL("%s: incomplete read", log_prefix); + FAIL("%s: incomplete recv", log_prefix); close_cli1: xclose(c1); @@ -1932,7 +1904,6 @@ static void unix_inet_redir_to_connected(int family, int type, int sock_mapfd, int sfd[2]; u32 key; char b; - int retries = 100; zero_verdict_count(verd_mapfd); @@ -1963,17 +1934,11 @@ static void unix_inet_redir_to_connected(int family, int type, int sock_mapfd, if (pass != 1) FAIL("%s: want pass count 1, have %d", log_prefix, pass); -again: - n = read(mode == REDIR_INGRESS ? p0 : c0, &b, 1); - if (n < 0) { - if (errno == EAGAIN && retries--) { - usleep(1000); - goto again; - } - FAIL_ERRNO("%s: read", log_prefix); - } + n = recv_timeout(mode == REDIR_INGRESS ? p0 : c0, &b, 1, 0, IO_TIMEOUT_SEC); + if (n < 0) + FAIL_ERRNO("%s: recv_timeout", log_prefix); if (n == 0) - FAIL("%s: incomplete read", log_prefix); + FAIL("%s: incomplete recv", log_prefix); close: xclose(c1); -- cgit v1.2.3 From 99d0a3831e3500d945162cdb2310e3a5fce90b60 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Thu, 21 Oct 2021 08:46:10 -1000 Subject: bpf: Move BPF_MAP_TYPE for INODE_STORAGE and TASK_STORAGE outside of CONFIG_NET bpf_types.h has BPF_MAP_TYPE_INODE_STORAGE and BPF_MAP_TYPE_TASK_STORAGE declared inside #ifdef CONFIG_NET although they are built regardless of CONFIG_NET. So, when CONFIG_BPF_SYSCALL && !CONFIG_NET, they are built without the declarations leading to spurious build failures and not registered to bpf_map_types making them unavailable. Fix it by moving the BPF_MAP_TYPE for the two map types outside of CONFIG_NET. Reported-by: kernel test robot Fixes: a10787e6d58c ("bpf: Enable task local storage for tracing programs") Signed-off-by: Tejun Heo Signed-off-by: Alexei Starovoitov Acked-by: Martin KaFai Lau Link: https://lore.kernel.org/bpf/YXG1cuuSJDqHQfRY@slm.duckdns.org --- include/linux/bpf_types.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/include/linux/bpf_types.h b/include/linux/bpf_types.h index 9c81724e4b98..bbe1eefa4c8a 100644 --- a/include/linux/bpf_types.h +++ b/include/linux/bpf_types.h @@ -101,14 +101,14 @@ BPF_MAP_TYPE(BPF_MAP_TYPE_STACK_TRACE, stack_trace_map_ops) #endif BPF_MAP_TYPE(BPF_MAP_TYPE_ARRAY_OF_MAPS, array_of_maps_map_ops) BPF_MAP_TYPE(BPF_MAP_TYPE_HASH_OF_MAPS, htab_of_maps_map_ops) -#ifdef CONFIG_NET -BPF_MAP_TYPE(BPF_MAP_TYPE_DEVMAP, dev_map_ops) -BPF_MAP_TYPE(BPF_MAP_TYPE_DEVMAP_HASH, dev_map_hash_ops) -BPF_MAP_TYPE(BPF_MAP_TYPE_SK_STORAGE, sk_storage_map_ops) #ifdef CONFIG_BPF_LSM BPF_MAP_TYPE(BPF_MAP_TYPE_INODE_STORAGE, inode_storage_map_ops) #endif BPF_MAP_TYPE(BPF_MAP_TYPE_TASK_STORAGE, task_storage_map_ops) +#ifdef CONFIG_NET +BPF_MAP_TYPE(BPF_MAP_TYPE_DEVMAP, dev_map_ops) +BPF_MAP_TYPE(BPF_MAP_TYPE_DEVMAP_HASH, dev_map_hash_ops) +BPF_MAP_TYPE(BPF_MAP_TYPE_SK_STORAGE, sk_storage_map_ops) BPF_MAP_TYPE(BPF_MAP_TYPE_CPUMAP, cpu_map_ops) #if defined(CONFIG_XDP_SOCKETS) BPF_MAP_TYPE(BPF_MAP_TYPE_XSKMAP, xsk_map_ops) -- cgit v1.2.3 From 54713c85f536048e685258f880bf298a74c3620d Mon Sep 17 00:00:00 2001 From: Toke Høiland-Jørgensen Date: Tue, 26 Oct 2021 13:00:19 +0200 Subject: bpf: Fix potential race in tail call compatibility check MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Lorenzo noticed that the code testing for program type compatibility of tail call maps is potentially racy in that two threads could encounter a map with an unset type simultaneously and both return true even though they are inserting incompatible programs. The race window is quite small, but artificially enlarging it by adding a usleep_range() inside the check in bpf_prog_array_compatible() makes it trivial to trigger from userspace with a program that does, essentially: map_fd = bpf_create_map(BPF_MAP_TYPE_PROG_ARRAY, 4, 4, 2, 0); pid = fork(); if (pid) { key = 0; value = xdp_fd; } else { key = 1; value = tc_fd; } err = bpf_map_update_elem(map_fd, &key, &value, 0); While the race window is small, it has potentially serious ramifications in that triggering it would allow a BPF program to tail call to a program of a different type. So let's get rid of it by protecting the update with a spinlock. The commit in the Fixes tag is the last commit that touches the code in question. v2: - Use a spinlock instead of an atomic variable and cmpxchg() (Alexei) v3: - Put lock and the members it protects into an embedded 'owner' struct (Daniel) Fixes: 3324b584b6f6 ("ebpf: misc core cleanup") Reported-by: Lorenzo Bianconi Signed-off-by: Toke Høiland-Jørgensen Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20211026110019.363464-1-toke@redhat.com --- include/linux/bpf.h | 7 +++++-- kernel/bpf/arraymap.c | 1 + kernel/bpf/core.c | 20 +++++++++++++------- kernel/bpf/syscall.c | 6 ++++-- 4 files changed, 23 insertions(+), 11 deletions(-) diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 020a7d5bf470..3db6f6c95489 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -929,8 +929,11 @@ struct bpf_array_aux { * stored in the map to make sure that all callers and callees have * the same prog type and JITed flag. */ - enum bpf_prog_type type; - bool jited; + struct { + spinlock_t lock; + enum bpf_prog_type type; + bool jited; + } owner; /* Programs with direct jumps into programs part of this array. */ struct list_head poke_progs; struct bpf_map *map; diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c index cebd4fb06d19..447def540544 100644 --- a/kernel/bpf/arraymap.c +++ b/kernel/bpf/arraymap.c @@ -1072,6 +1072,7 @@ static struct bpf_map *prog_array_map_alloc(union bpf_attr *attr) INIT_WORK(&aux->work, prog_array_map_clear_deferred); INIT_LIST_HEAD(&aux->poke_progs); mutex_init(&aux->poke_mutex); + spin_lock_init(&aux->owner.lock); map = array_map_alloc(attr); if (IS_ERR(map)) { diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c index c1e7eb3f1876..6e3ae90ad107 100644 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c @@ -1823,20 +1823,26 @@ static unsigned int __bpf_prog_ret0_warn(const void *ctx, bool bpf_prog_array_compatible(struct bpf_array *array, const struct bpf_prog *fp) { + bool ret; + if (fp->kprobe_override) return false; - if (!array->aux->type) { + spin_lock(&array->aux->owner.lock); + + if (!array->aux->owner.type) { /* There's no owner yet where we could check for * compatibility. */ - array->aux->type = fp->type; - array->aux->jited = fp->jited; - return true; + array->aux->owner.type = fp->type; + array->aux->owner.jited = fp->jited; + ret = true; + } else { + ret = array->aux->owner.type == fp->type && + array->aux->owner.jited == fp->jited; } - - return array->aux->type == fp->type && - array->aux->jited == fp->jited; + spin_unlock(&array->aux->owner.lock); + return ret; } static int bpf_check_tail_call(const struct bpf_prog *fp) diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index 9dab49d3f394..1cad6979a0d0 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -543,8 +543,10 @@ static void bpf_map_show_fdinfo(struct seq_file *m, struct file *filp) if (map->map_type == BPF_MAP_TYPE_PROG_ARRAY) { array = container_of(map, struct bpf_array, map); - type = array->aux->type; - jited = array->aux->jited; + spin_lock(&array->aux->owner.lock); + type = array->aux->owner.type; + jited = array->aux->owner.jited; + spin_unlock(&array->aux->owner.lock); } seq_printf(m, -- cgit v1.2.3 From 3bda2e5df476417b6d08967e2d84234a59d57b1c Mon Sep 17 00:00:00 2001 From: Guangbin Huang Date: Wed, 27 Oct 2021 20:11:43 +0800 Subject: net: hns3: fix pause config problem after autoneg disabled If a TP port is configured by follow steps: 1.ethtool -s ethx autoneg off speed 100 duplex full 2.ethtool -A ethx rx on tx on 3.ethtool -s ethx autoneg on(rx&tx negotiated pause results are off) 4.ethtool -s ethx autoneg off speed 100 duplex full In step 3, driver will set rx&tx pause parameters of hardware to off as pause parameters negotiated with link partner are off. After step 4, the "ethtool -a ethx" command shows both rx and tx pause parameters are on. However, pause parameters of hardware are still off and port has no flow control function actually. To fix this problem, if autoneg is disabled, driver uses its saved parameters to restore pause of hardware. If the speed is not changed in this case, there is no link state changed for phy, it will cause the pause parameter is not taken effect, so we need to force phy to go down and up. Fixes: aacbe27e82f0 ("net: hns3: modify how pause options is displayed") Signed-off-by: Guangbin Huang Signed-off-by: David S. Miller --- drivers/net/ethernet/hisilicon/hns3/hnae3.h | 1 + drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c | 33 ++++++++++++++++------ .../ethernet/hisilicon/hns3/hns3pf/hclge_main.c | 30 ++++++++++++++++++++ .../net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c | 2 +- .../net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.h | 1 + 5 files changed, 57 insertions(+), 10 deletions(-) diff --git a/drivers/net/ethernet/hisilicon/hns3/hnae3.h b/drivers/net/ethernet/hisilicon/hns3/hnae3.h index d701451596c8..da3a593f6a56 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hnae3.h +++ b/drivers/net/ethernet/hisilicon/hns3/hnae3.h @@ -568,6 +568,7 @@ struct hnae3_ae_ops { u32 *auto_neg, u32 *rx_en, u32 *tx_en); int (*set_pauseparam)(struct hnae3_handle *handle, u32 auto_neg, u32 rx_en, u32 tx_en); + int (*restore_pauseparam)(struct hnae3_handle *handle); int (*set_autoneg)(struct hnae3_handle *handle, bool enable); int (*get_autoneg)(struct hnae3_handle *handle); diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c b/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c index 5ebd96f6833d..7d92dd273ed7 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c @@ -824,6 +824,26 @@ static int hns3_check_ksettings_param(const struct net_device *netdev, return 0; } +static int hns3_set_phy_link_ksettings(struct net_device *netdev, + const struct ethtool_link_ksettings *cmd) +{ + struct hnae3_handle *handle = hns3_get_handle(netdev); + const struct hnae3_ae_ops *ops = handle->ae_algo->ops; + int ret; + + if (cmd->base.speed == SPEED_1000 && + cmd->base.autoneg == AUTONEG_DISABLE) + return -EINVAL; + + if (cmd->base.autoneg == AUTONEG_DISABLE && ops->restore_pauseparam) { + ret = ops->restore_pauseparam(handle); + if (ret) + return ret; + } + + return phy_ethtool_ksettings_set(netdev->phydev, cmd); +} + static int hns3_set_link_ksettings(struct net_device *netdev, const struct ethtool_link_ksettings *cmd) { @@ -842,16 +862,11 @@ static int hns3_set_link_ksettings(struct net_device *netdev, cmd->base.autoneg, cmd->base.speed, cmd->base.duplex); /* Only support ksettings_set for netdev with phy attached for now */ - if (netdev->phydev) { - if (cmd->base.speed == SPEED_1000 && - cmd->base.autoneg == AUTONEG_DISABLE) - return -EINVAL; - - return phy_ethtool_ksettings_set(netdev->phydev, cmd); - } else if (test_bit(HNAE3_DEV_SUPPORT_PHY_IMP_B, ae_dev->caps) && - ops->set_phy_link_ksettings) { + if (netdev->phydev) + return hns3_set_phy_link_ksettings(netdev, cmd); + else if (test_bit(HNAE3_DEV_SUPPORT_PHY_IMP_B, ae_dev->caps) && + ops->set_phy_link_ksettings) return ops->set_phy_link_ksettings(handle, cmd); - } if (ae_dev->dev_version < HNAE3_DEVICE_VERSION_V2) return -EOPNOTSUPP; diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c index dcd40cc73082..c6b9806c75a5 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c @@ -11021,6 +11021,35 @@ static int hclge_set_pauseparam(struct hnae3_handle *handle, u32 auto_neg, return -EOPNOTSUPP; } +static int hclge_restore_pauseparam(struct hnae3_handle *handle) +{ + struct hclge_vport *vport = hclge_get_vport(handle); + struct hclge_dev *hdev = vport->back; + u32 auto_neg, rx_pause, tx_pause; + int ret; + + hclge_get_pauseparam(handle, &auto_neg, &rx_pause, &tx_pause); + /* when autoneg is disabled, the pause setting of phy has no effect + * unless the link goes down. + */ + ret = phy_suspend(hdev->hw.mac.phydev); + if (ret) + return ret; + + phy_set_asym_pause(hdev->hw.mac.phydev, rx_pause, tx_pause); + + ret = phy_resume(hdev->hw.mac.phydev); + if (ret) + return ret; + + ret = hclge_mac_pause_setup_hw(hdev); + if (ret) + dev_err(&hdev->pdev->dev, + "restore pauseparam error, ret = %d.\n", ret); + + return ret; +} + static void hclge_get_ksettings_an_result(struct hnae3_handle *handle, u8 *auto_neg, u32 *speed, u8 *duplex) { @@ -12984,6 +13013,7 @@ static const struct hnae3_ae_ops hclge_ops = { .halt_autoneg = hclge_halt_autoneg, .get_pauseparam = hclge_get_pauseparam, .set_pauseparam = hclge_set_pauseparam, + .restore_pauseparam = hclge_restore_pauseparam, .set_mtu = hclge_set_mtu, .reset_queue = hclge_reset_tqp, .get_stats = hclge_get_stats, diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c index 95074e91a846..124791e4bfee 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c @@ -1435,7 +1435,7 @@ static int hclge_bp_setup_hw(struct hclge_dev *hdev, u8 tc) return 0; } -static int hclge_mac_pause_setup_hw(struct hclge_dev *hdev) +int hclge_mac_pause_setup_hw(struct hclge_dev *hdev) { bool tx_en, rx_en; diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.h index 2ee9b795f71d..4b2c3a788980 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.h +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.h @@ -244,6 +244,7 @@ int hclge_tm_get_pri_weight(struct hclge_dev *hdev, u8 pri_id, u8 *weight); int hclge_tm_get_pri_shaper(struct hclge_dev *hdev, u8 pri_id, enum hclge_opcode_type cmd, struct hclge_tm_shaper_para *para); +int hclge_mac_pause_setup_hw(struct hclge_dev *hdev); int hclge_tm_get_q_to_qs_map(struct hclge_dev *hdev, u16 q_id, u16 *qset_id); int hclge_tm_get_q_to_tc(struct hclge_dev *hdev, u16 q_id, u8 *tc_id); int hclge_tm_get_pg_to_pri_map(struct hclge_dev *hdev, u8 pg_id, -- cgit v1.2.3 From f29da4088fb4eeba457219a931327d1d5f45196a Mon Sep 17 00:00:00 2001 From: Yufeng Mo Date: Wed, 27 Oct 2021 20:11:44 +0800 Subject: net: hns3: change hclge/hclgevf workqueue to WQ_UNBOUND mode Currently, the workqueue of hclge/hclgevf is executed on the CPU that initiates scheduling requests by default. In stress scenarios, the CPU may be busy and workqueue scheduling is completed after a long period of time. To avoid this situation and implement proper scheduling, use the WQ_UNBOUND mode instead. In this way, the workqueue can be performed on a relatively idle CPU. Signed-off-by: Yufeng Mo Signed-off-by: Guangbin Huang Signed-off-by: David S. Miller --- .../ethernet/hisilicon/hns3/hns3pf/hclge_main.c | 34 ++++------------------ .../ethernet/hisilicon/hns3/hns3pf/hclge_main.h | 1 - .../ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c | 2 +- 3 files changed, 6 insertions(+), 31 deletions(-) diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c index c6b9806c75a5..3dbde0496545 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c @@ -2847,33 +2847,28 @@ static void hclge_mbx_task_schedule(struct hclge_dev *hdev) { if (!test_bit(HCLGE_STATE_REMOVING, &hdev->state) && !test_and_set_bit(HCLGE_STATE_MBX_SERVICE_SCHED, &hdev->state)) - mod_delayed_work_on(cpumask_first(&hdev->affinity_mask), - hclge_wq, &hdev->service_task, 0); + mod_delayed_work(hclge_wq, &hdev->service_task, 0); } static void hclge_reset_task_schedule(struct hclge_dev *hdev) { if (!test_bit(HCLGE_STATE_REMOVING, &hdev->state) && !test_and_set_bit(HCLGE_STATE_RST_SERVICE_SCHED, &hdev->state)) - mod_delayed_work_on(cpumask_first(&hdev->affinity_mask), - hclge_wq, &hdev->service_task, 0); + mod_delayed_work(hclge_wq, &hdev->service_task, 0); } static void hclge_errhand_task_schedule(struct hclge_dev *hdev) { if (!test_bit(HCLGE_STATE_REMOVING, &hdev->state) && !test_and_set_bit(HCLGE_STATE_ERR_SERVICE_SCHED, &hdev->state)) - mod_delayed_work_on(cpumask_first(&hdev->affinity_mask), - hclge_wq, &hdev->service_task, 0); + mod_delayed_work(hclge_wq, &hdev->service_task, 0); } void hclge_task_schedule(struct hclge_dev *hdev, unsigned long delay_time) { if (!test_bit(HCLGE_STATE_REMOVING, &hdev->state) && !test_bit(HCLGE_STATE_RST_FAIL, &hdev->state)) - mod_delayed_work_on(cpumask_first(&hdev->affinity_mask), - hclge_wq, &hdev->service_task, - delay_time); + mod_delayed_work(hclge_wq, &hdev->service_task, delay_time); } static int hclge_get_mac_link_status(struct hclge_dev *hdev, int *link_status) @@ -3491,33 +3486,14 @@ static void hclge_get_misc_vector(struct hclge_dev *hdev) hdev->num_msi_used += 1; } -static void hclge_irq_affinity_notify(struct irq_affinity_notify *notify, - const cpumask_t *mask) -{ - struct hclge_dev *hdev = container_of(notify, struct hclge_dev, - affinity_notify); - - cpumask_copy(&hdev->affinity_mask, mask); -} - -static void hclge_irq_affinity_release(struct kref *ref) -{ -} - static void hclge_misc_affinity_setup(struct hclge_dev *hdev) { irq_set_affinity_hint(hdev->misc_vector.vector_irq, &hdev->affinity_mask); - - hdev->affinity_notify.notify = hclge_irq_affinity_notify; - hdev->affinity_notify.release = hclge_irq_affinity_release; - irq_set_affinity_notifier(hdev->misc_vector.vector_irq, - &hdev->affinity_notify); } static void hclge_misc_affinity_teardown(struct hclge_dev *hdev) { - irq_set_affinity_notifier(hdev->misc_vector.vector_irq, NULL); irq_set_affinity_hint(hdev->misc_vector.vector_irq, NULL); } @@ -13082,7 +13058,7 @@ static int hclge_init(void) { pr_info("%s is initializing\n", HCLGE_NAME); - hclge_wq = alloc_workqueue("%s", 0, 0, HCLGE_NAME); + hclge_wq = alloc_workqueue("%s", WQ_UNBOUND, 0, HCLGE_NAME); if (!hclge_wq) { pr_err("%s: failed to create workqueue\n", HCLGE_NAME); return -ENOMEM; diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h index de6afbcbfbac..69cd8f87b4c8 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h @@ -944,7 +944,6 @@ struct hclge_dev { /* affinity mask and notify for misc interrupt */ cpumask_t affinity_mask; - struct irq_affinity_notify affinity_notify; struct hclge_ptp *ptp; struct devlink *devlink; }; diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c index bef6b98e2f50..5efa5420297d 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c @@ -3899,7 +3899,7 @@ static int hclgevf_init(void) { pr_info("%s is initializing\n", HCLGEVF_NAME); - hclgevf_wq = alloc_workqueue("%s", 0, 0, HCLGEVF_NAME); + hclgevf_wq = alloc_workqueue("%s", WQ_UNBOUND, 0, HCLGEVF_NAME); if (!hclgevf_wq) { pr_err("%s: failed to create workqueue\n", HCLGEVF_NAME); return -ENOMEM; -- cgit v1.2.3 From 0251d196b0e1a19c870be882e5d4f496de8ab758 Mon Sep 17 00:00:00 2001 From: Guangbin Huang Date: Wed, 27 Oct 2021 20:11:45 +0800 Subject: net: hns3: ignore reset event before initialization process is done Currently, if there is a reset event triggered by RAS during device in initialization process, driver may run reset process concurrently with initialization process. In this case, it may cause problem. For example, the RSS indirection table may has not been alloc memory in initialization process yet, but it is used in reset process, it will cause a call trace like this: [61228.744836] Unable to handle kernel NULL pointer dereference at virtual address 0000000000000000 ... [61228.897677] Workqueue: hclgevf hclgevf_service_task [hclgevf] [61228.911390] pstate: 40400009 (nZcv daif +PAN -UAO -TCO BTYPE=--) [61228.918670] pc : hclgevf_set_rss_indir_table+0xb4/0x190 [hclgevf] [61228.927812] lr : hclgevf_set_rss_indir_table+0x90/0x190 [hclgevf] [61228.937248] sp : ffff8000162ebb50 [61228.941087] x29: ffff8000162ebb50 x28: ffffb77add72dbc0 x27: ffff0820c7dc8080 [61228.949516] x26: 0000000000000000 x25: ffff0820ad4fc880 x24: ffff0820c7dc8080 [61228.958220] x23: ffff0820c7dc8090 x22: 00000000ffffffff x21: 0000000000000040 [61228.966360] x20: ffffb77add72b9c0 x19: 0000000000000000 x18: 0000000000000030 [61228.974646] x17: 0000000000000000 x16: ffffb77ae713feb0 x15: ffff0820ad4fcce8 [61228.982808] x14: ffffffffffffffff x13: ffff8000962eb7f7 x12: 00003834ec70c960 [61228.991990] x11: 00e0fafa8c206982 x10: 9670facc78a8f9a8 x9 : ffffb77add717530 [61229.001123] x8 : ffff0820ad4fd6b8 x7 : 0000000000000000 x6 : 0000000000000011 [61229.010249] x5 : 00000000000cb1b0 x4 : 0000000000002adb x3 : 0000000000000049 [61229.018662] x2 : ffff8000162ebbb8 x1 : 0000000000000000 x0 : 0000000000000480 [61229.027002] Call trace: [61229.030177] hclgevf_set_rss_indir_table+0xb4/0x190 [hclgevf] [61229.039009] hclgevf_rss_init_hw+0x128/0x1b4 [hclgevf] [61229.046809] hclgevf_reset_rebuild+0x17c/0x69c [hclgevf] [61229.053862] hclgevf_reset_service_task+0x4cc/0xa80 [hclgevf] [61229.061306] hclgevf_service_task+0x6c/0x630 [hclgevf] [61229.068491] process_one_work+0x1dc/0x48c [61229.074121] worker_thread+0x15c/0x464 [61229.078562] kthread+0x168/0x16c [61229.082873] ret_from_fork+0x10/0x18 [61229.088221] Code: 7900e7f6 f904a683 d503201f 9101a3e2 (38616b43) [61229.095357] ---[ end trace 153661a538f6768c ]--- To fix this problem, don't schedule reset task before initialization process is done. Signed-off-by: Guangbin Huang Signed-off-by: David S. Miller --- drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c | 1 + drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c | 3 +++ drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.h | 1 + 3 files changed, 5 insertions(+) diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c index 3dbde0496545..269e579762b2 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c @@ -2853,6 +2853,7 @@ static void hclge_mbx_task_schedule(struct hclge_dev *hdev) static void hclge_reset_task_schedule(struct hclge_dev *hdev) { if (!test_bit(HCLGE_STATE_REMOVING, &hdev->state) && + test_bit(HCLGE_STATE_SERVICE_INITED, &hdev->state) && !test_and_set_bit(HCLGE_STATE_RST_SERVICE_SCHED, &hdev->state)) mod_delayed_work(hclge_wq, &hdev->service_task, 0); } diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c index 5efa5420297d..cf00ad7bb881 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c @@ -2232,6 +2232,7 @@ static void hclgevf_get_misc_vector(struct hclgevf_dev *hdev) void hclgevf_reset_task_schedule(struct hclgevf_dev *hdev) { if (!test_bit(HCLGEVF_STATE_REMOVING, &hdev->state) && + test_bit(HCLGEVF_STATE_SERVICE_INITED, &hdev->state) && !test_and_set_bit(HCLGEVF_STATE_RST_SERVICE_SCHED, &hdev->state)) mod_delayed_work(hclgevf_wq, &hdev->service_task, 0); @@ -3449,6 +3450,8 @@ static int hclgevf_init_hdev(struct hclgevf_dev *hdev) hclgevf_init_rxd_adv_layout(hdev); + set_bit(HCLGEVF_STATE_SERVICE_INITED, &hdev->state); + hdev->last_reset_time = jiffies; dev_info(&hdev->pdev->dev, "finished initializing %s driver\n", HCLGEVF_DRIVER_NAME); diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.h b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.h index 883130a9b48f..28288d7e3303 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.h +++ b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.h @@ -146,6 +146,7 @@ enum hclgevf_states { HCLGEVF_STATE_REMOVING, HCLGEVF_STATE_NIC_REGISTERED, HCLGEVF_STATE_ROCE_REGISTERED, + HCLGEVF_STATE_SERVICE_INITED, /* task states */ HCLGEVF_STATE_RST_SERVICE_SCHED, HCLGEVF_STATE_RST_HANDLING, -- cgit v1.2.3 From 2a21dab594a98c338c4bfbc31864cbca15888549 Mon Sep 17 00:00:00 2001 From: Jie Wang Date: Wed, 27 Oct 2021 20:11:46 +0800 Subject: net: hns3: fix data endian problem of some functions of debugfs The member data in struct hclge_desc is type of __le32, it needs endian conversion before using it, and some functions of debugfs didn't do that, so this patch fixes it. Fixes: c0ebebb9ccc1 ("net: hns3: Add "dcb register" status information query function") Signed-off-by: Jie Wang Signed-off-by: Guangbin Huang Signed-off-by: David S. Miller --- .../ethernet/hisilicon/hns3/hns3pf/hclge_debugfs.c | 30 ++++++++++------------ 1 file changed, 14 insertions(+), 16 deletions(-) diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_debugfs.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_debugfs.c index 32f62cd2dd99..9cda8b3562b8 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_debugfs.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_debugfs.c @@ -391,7 +391,7 @@ static int hclge_dbg_dump_mac(struct hclge_dev *hdev, char *buf, int len) static int hclge_dbg_dump_dcb_qset(struct hclge_dev *hdev, char *buf, int len, int *pos) { - struct hclge_dbg_bitmap_cmd *bitmap; + struct hclge_dbg_bitmap_cmd req; struct hclge_desc desc; u16 qset_id, qset_num; int ret; @@ -408,12 +408,12 @@ static int hclge_dbg_dump_dcb_qset(struct hclge_dev *hdev, char *buf, int len, if (ret) return ret; - bitmap = (struct hclge_dbg_bitmap_cmd *)&desc.data[1]; + req.bitmap = (u8)le32_to_cpu(desc.data[1]); *pos += scnprintf(buf + *pos, len - *pos, "%04u %#x %#x %#x %#x\n", - qset_id, bitmap->bit0, bitmap->bit1, - bitmap->bit2, bitmap->bit3); + qset_id, req.bit0, req.bit1, req.bit2, + req.bit3); } return 0; @@ -422,7 +422,7 @@ static int hclge_dbg_dump_dcb_qset(struct hclge_dev *hdev, char *buf, int len, static int hclge_dbg_dump_dcb_pri(struct hclge_dev *hdev, char *buf, int len, int *pos) { - struct hclge_dbg_bitmap_cmd *bitmap; + struct hclge_dbg_bitmap_cmd req; struct hclge_desc desc; u8 pri_id, pri_num; int ret; @@ -439,12 +439,11 @@ static int hclge_dbg_dump_dcb_pri(struct hclge_dev *hdev, char *buf, int len, if (ret) return ret; - bitmap = (struct hclge_dbg_bitmap_cmd *)&desc.data[1]; + req.bitmap = (u8)le32_to_cpu(desc.data[1]); *pos += scnprintf(buf + *pos, len - *pos, "%03u %#x %#x %#x\n", - pri_id, bitmap->bit0, bitmap->bit1, - bitmap->bit2); + pri_id, req.bit0, req.bit1, req.bit2); } return 0; @@ -453,7 +452,7 @@ static int hclge_dbg_dump_dcb_pri(struct hclge_dev *hdev, char *buf, int len, static int hclge_dbg_dump_dcb_pg(struct hclge_dev *hdev, char *buf, int len, int *pos) { - struct hclge_dbg_bitmap_cmd *bitmap; + struct hclge_dbg_bitmap_cmd req; struct hclge_desc desc; u8 pg_id; int ret; @@ -466,12 +465,11 @@ static int hclge_dbg_dump_dcb_pg(struct hclge_dev *hdev, char *buf, int len, if (ret) return ret; - bitmap = (struct hclge_dbg_bitmap_cmd *)&desc.data[1]; + req.bitmap = (u8)le32_to_cpu(desc.data[1]); *pos += scnprintf(buf + *pos, len - *pos, "%03u %#x %#x %#x\n", - pg_id, bitmap->bit0, bitmap->bit1, - bitmap->bit2); + pg_id, req.bit0, req.bit1, req.bit2); } return 0; @@ -511,7 +509,7 @@ static int hclge_dbg_dump_dcb_queue(struct hclge_dev *hdev, char *buf, int len, static int hclge_dbg_dump_dcb_port(struct hclge_dev *hdev, char *buf, int len, int *pos) { - struct hclge_dbg_bitmap_cmd *bitmap; + struct hclge_dbg_bitmap_cmd req; struct hclge_desc desc; u8 port_id = 0; int ret; @@ -521,12 +519,12 @@ static int hclge_dbg_dump_dcb_port(struct hclge_dev *hdev, char *buf, int len, if (ret) return ret; - bitmap = (struct hclge_dbg_bitmap_cmd *)&desc.data[1]; + req.bitmap = (u8)le32_to_cpu(desc.data[1]); *pos += scnprintf(buf + *pos, len - *pos, "port_mask: %#x\n", - bitmap->bit0); + req.bit0); *pos += scnprintf(buf + *pos, len - *pos, "port_shaping_pass: %#x\n", - bitmap->bit1); + req.bit1); return 0; } -- cgit v1.2.3 From 6754614a787cbcbf87bae8a75619c24a33ea6791 Mon Sep 17 00:00:00 2001 From: Jie Wang Date: Wed, 27 Oct 2021 20:11:47 +0800 Subject: net: hns3: add more string spaces for dumping packets number of queue info in debugfs As the width of packets number registers is 32 bits, they needs at most 10 characters for decimal data printing, but now the string spaces is not enough, so this patch fixes it. Fixes: e44c495d95e ("net: hns3: refactor queue info of debugfs") Signed-off-by: Jie Wang Signed-off-by: Guangbin Huang Signed-off-by: David S. Miller --- drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c b/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c index 2b66c59f5eaf..3aaad96d0176 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c @@ -462,7 +462,7 @@ static const struct hns3_dbg_item rx_queue_info_items[] = { { "TAIL", 2 }, { "HEAD", 2 }, { "FBDNUM", 2 }, - { "PKTNUM", 2 }, + { "PKTNUM", 5 }, { "COPYBREAK", 2 }, { "RING_EN", 2 }, { "RX_RING_EN", 2 }, @@ -565,7 +565,7 @@ static const struct hns3_dbg_item tx_queue_info_items[] = { { "HEAD", 2 }, { "FBDNUM", 2 }, { "OFFSET", 2 }, - { "PKTNUM", 2 }, + { "PKTNUM", 5 }, { "RING_EN", 2 }, { "TX_RING_EN", 2 }, { "BASE_ADDR", 10 }, -- cgit v1.2.3 From c7a6e3978ea952efb107ecf511c095c3bbb2945f Mon Sep 17 00:00:00 2001 From: Guangbin Huang Date: Wed, 27 Oct 2021 20:11:48 +0800 Subject: net: hns3: expand buffer len for some debugfs command The specified buffer length for three debugfs files fd_tcam, uc and tqp is not enough for their maximum needs, so this patch fixes them. Fixes: b5a0b70d77b9 ("net: hns3: refactor dump fd tcam of debugfs") Fixes: 1556ea9120ff ("net: hns3: refactor dump mac list of debugfs") Fixes: d96b0e59468d ("net: hns3: refactor dump reg of debugfs") Signed-off-by: Guangbin Huang Signed-off-by: David S. Miller --- drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c b/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c index 3aaad96d0176..f2ade0446208 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c @@ -137,7 +137,7 @@ static struct hns3_dbg_cmd_info hns3_dbg_cmd[] = { .name = "uc", .cmd = HNAE3_DBG_CMD_MAC_UC, .dentry = HNS3_DBG_DENTRY_MAC, - .buf_len = HNS3_DBG_READ_LEN, + .buf_len = HNS3_DBG_READ_LEN_128KB, .init = hns3_dbg_common_file_init, }, { @@ -256,7 +256,7 @@ static struct hns3_dbg_cmd_info hns3_dbg_cmd[] = { .name = "tqp", .cmd = HNAE3_DBG_CMD_REG_TQP, .dentry = HNS3_DBG_DENTRY_REG, - .buf_len = HNS3_DBG_READ_LEN, + .buf_len = HNS3_DBG_READ_LEN_128KB, .init = hns3_dbg_common_file_init, }, { @@ -298,7 +298,7 @@ static struct hns3_dbg_cmd_info hns3_dbg_cmd[] = { .name = "fd_tcam", .cmd = HNAE3_DBG_CMD_FD_TCAM, .dentry = HNS3_DBG_DENTRY_FD, - .buf_len = HNS3_DBG_READ_LEN, + .buf_len = HNS3_DBG_READ_LEN_1MB, .init = hns3_dbg_common_file_init, }, { -- cgit v1.2.3 From 630a6738da82e2c29e1c38eafd6f8328e0a4963c Mon Sep 17 00:00:00 2001 From: Guangbin Huang Date: Wed, 27 Oct 2021 20:11:49 +0800 Subject: net: hns3: adjust string spaces of some parameters of tx bd info in debugfs This patch adjusts the string spaces of some parameters of tx bd info in debugfs according to their maximum needs. Signed-off-by: Guangbin Huang Signed-off-by: David S. Miller --- drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c b/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c index f2ade0446208..e54f96251fea 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c @@ -790,13 +790,13 @@ static int hns3_dbg_rx_bd_info(struct hns3_dbg_data *d, char *buf, int len) } static const struct hns3_dbg_item tx_bd_info_items[] = { - { "BD_IDX", 5 }, - { "ADDRESS", 2 }, + { "BD_IDX", 2 }, + { "ADDRESS", 13 }, { "VLAN_TAG", 2 }, { "SIZE", 2 }, { "T_CS_VLAN_TSO", 2 }, { "OT_VLAN_TAG", 3 }, - { "TV", 2 }, + { "TV", 5 }, { "OLT_VLAN_LEN", 2 }, { "PAYLEN_OL4CS", 2 }, { "BD_FE_SC_VLD", 2 }, -- cgit v1.2.3 From 4a089e95b4d6bb625044d47aed0c442a8f7bd093 Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Wed, 20 Oct 2021 12:11:16 -0700 Subject: nios2: Make NIOS2_DTB_SOURCE_BOOL depend on !COMPILE_TEST nios2:allmodconfig builds fail with make[1]: *** No rule to make target 'arch/nios2/boot/dts/""', needed by 'arch/nios2/boot/dts/built-in.a'. Stop. make: [Makefile:1868: arch/nios2/boot/dts] Error 2 (ignored) This is seen with compile tests since those enable NIOS2_DTB_SOURCE_BOOL, which in turn enables NIOS2_DTB_SOURCE. This causes the build error because the default value for NIOS2_DTB_SOURCE is an empty string. Disable NIOS2_DTB_SOURCE_BOOL for compile tests to avoid the error. Fixes: 2fc8483fdcde ("nios2: Build infrastructure") Signed-off-by: Guenter Roeck Reviewed-by: Randy Dunlap Signed-off-by: Dinh Nguyen --- arch/nios2/platform/Kconfig.platform | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/nios2/platform/Kconfig.platform b/arch/nios2/platform/Kconfig.platform index 9e32fb7f3d4c..e849daff6fd1 100644 --- a/arch/nios2/platform/Kconfig.platform +++ b/arch/nios2/platform/Kconfig.platform @@ -37,6 +37,7 @@ config NIOS2_DTB_PHYS_ADDR config NIOS2_DTB_SOURCE_BOOL bool "Compile and link device tree into kernel image" + depends on !COMPILE_TEST help This allows you to specify a dts (device tree source) file which will be compiled and linked into the kernel image. -- cgit v1.2.3 From 4e84dc47bb48accbbeeba4e6bb3f31aa7895323c Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Wed, 27 Oct 2021 12:51:01 -0400 Subject: ftrace/nds32: Update the proto for ftrace_trace_function to match ftrace_stub The ftrace callback prototype was changed to pass a special ftrace_regs instead of pt_regs as the last parameter, but the static ftrace for nds32 missed updating ftrace_trace_function and this caused a warning when compared to ftrace_stub: ../arch/nds32/kernel/ftrace.c: In function '_mcount': ../arch/nds32/kernel/ftrace.c:24:35: error: comparison of distinct pointer types lacks a cast [-Werror] 24 | if (ftrace_trace_function != ftrace_stub) | ^~ Link: https://lore.kernel.org/all/20211027055554.19372-1-rdunlap@infradead.org/ Link: https://lkml.kernel.org/r/20211027125101.33449969@gandalf.local.home Cc: Ingo Molnar Cc: Andrew Morton Cc: Nick Hu Cc: Greentime Hu Cc: Vincent Chen Cc: Masami Hiramatsu Cc: Peter Zijlstra Cc: stable@vger.kernel.org Fixes: d19ad0775dcd6 ("ftrace: Have the callbacks receive a struct ftrace_regs instead of pt_regs") Reported-by: Randy Dunlap Signed-off-by: Steven Rostedt (VMware) --- arch/nds32/kernel/ftrace.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/nds32/kernel/ftrace.c b/arch/nds32/kernel/ftrace.c index 0e23e3a8df6b..d55b73b18149 100644 --- a/arch/nds32/kernel/ftrace.c +++ b/arch/nds32/kernel/ftrace.c @@ -6,7 +6,7 @@ #ifndef CONFIG_DYNAMIC_FTRACE extern void (*ftrace_trace_function)(unsigned long, unsigned long, - struct ftrace_ops*, struct pt_regs*); + struct ftrace_ops*, struct ftrace_regs*); extern void ftrace_graph_caller(void); noinline void __naked ftrace_stub(unsigned long ip, unsigned long parent_ip, -- cgit v1.2.3 From 6f7c88691191e6c52ef2543d6f1da8d360b27a24 Mon Sep 17 00:00:00 2001 From: Wang Hai Date: Tue, 26 Oct 2021 20:40:15 +0800 Subject: usbnet: fix error return code in usbnet_probe() Return error code if usb_maxpacket() returns 0 in usbnet_probe() Fixes: 397430b50a36 ("usbnet: sanity check for maxpacket") Reported-by: Hulk Robot Signed-off-by: Wang Hai Reviewed-by: Johan Hovold Link: https://lore.kernel.org/r/20211026124015.3025136-1-wanghai38@huawei.com Signed-off-by: Jakub Kicinski --- drivers/net/usb/usbnet.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/usb/usbnet.c b/drivers/net/usb/usbnet.c index 80432ee0ce69..a33d7fb82a00 100644 --- a/drivers/net/usb/usbnet.c +++ b/drivers/net/usb/usbnet.c @@ -1790,6 +1790,7 @@ usbnet_probe (struct usb_interface *udev, const struct usb_device_id *prod) dev->maxpacket = usb_maxpacket (dev->udev, dev->out, 1); if (dev->maxpacket == 0) { /* that is a broken device */ + status = -ENODEV; goto out4; } -- cgit v1.2.3 From 890d33561337ffeba0d8ba42517e71288cfee2b6 Mon Sep 17 00:00:00 2001 From: Vincent Whitchurch Date: Tue, 26 Oct 2021 15:31:00 +0200 Subject: virtio-ring: fix DMA metadata flags The flags are currently overwritten, leading to the wrong direction being passed to the DMA unmap functions. Fixes: 72b5e8958738aaa4 ("virtio-ring: store DMA metadata in desc_extra for split virtqueue") Signed-off-by: Vincent Whitchurch Link: https://lore.kernel.org/r/20211026133100.17541-1-vincent.whitchurch@axis.com Signed-off-by: Michael S. Tsirkin Acked-by: Jason Wang --- drivers/virtio/virtio_ring.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c index dd95dfd85e98..3035bb6f5458 100644 --- a/drivers/virtio/virtio_ring.c +++ b/drivers/virtio/virtio_ring.c @@ -576,7 +576,7 @@ static inline int virtqueue_add_split(struct virtqueue *_vq, /* Last one doesn't continue. */ desc[prev].flags &= cpu_to_virtio16(_vq->vdev, ~VRING_DESC_F_NEXT); if (!indirect && vq->use_dma_api) - vq->split.desc_extra[prev & (vq->split.vring.num - 1)].flags = + vq->split.desc_extra[prev & (vq->split.vring.num - 1)].flags &= ~VRING_DESC_F_NEXT; if (indirect) { -- cgit v1.2.3 From f82161516756be2827609ec4845e1e4316df0709 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Sun, 24 Oct 2021 09:38:31 -0700 Subject: ptp: Document the PTP_CLK_MAGIC ioctl number Add PTP_CLK_MAGIC to the userspace-api/ioctl/ioctl-number.rst documentation file. Fixes: d94ba80ebbea ("ptp: Added a brand new class driver for ptp clocks.") Signed-off-by: Randy Dunlap Cc: Richard Cochran Cc: John Stultz Cc: Jonathan Corbet Link: https://lore.kernel.org/r/20211024163831.10200-1-rdunlap@infradead.org Signed-off-by: Jakub Kicinski --- Documentation/userspace-api/ioctl/ioctl-number.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/Documentation/userspace-api/ioctl/ioctl-number.rst b/Documentation/userspace-api/ioctl/ioctl-number.rst index 2e8134059c87..6655d929a351 100644 --- a/Documentation/userspace-api/ioctl/ioctl-number.rst +++ b/Documentation/userspace-api/ioctl/ioctl-number.rst @@ -104,6 +104,7 @@ Code Seq# Include File Comments '8' all SNP8023 advanced NIC card ';' 64-7F linux/vfio.h +'=' 00-3f uapi/linux/ptp_clock.h '@' 00-0F linux/radeonfb.h conflict! '@' 00-0F drivers/video/aty/aty128fb.c conflict! 'A' 00-1F linux/apm_bios.h conflict! -- cgit v1.2.3 From 72f898ca0ab85fde6facf78b14d9f67a4a7b32d1 Mon Sep 17 00:00:00 2001 From: Janghyub Seo Date: Tue, 26 Oct 2021 07:12:42 +0000 Subject: r8169: Add device 10ec:8162 to driver r8169 This patch makes the driver r8169 pick up device Realtek Semiconductor Co. , Ltd. Device [10ec:8162]. Signed-off-by: Janghyub Seo Suggested-by: Rushab Shah Link: https://lore.kernel.org/r/1635231849296.1489250046.441294000@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/realtek/r8169_main.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/realtek/r8169_main.c b/drivers/net/ethernet/realtek/r8169_main.c index 46a6ff9a782d..2918947dd57c 100644 --- a/drivers/net/ethernet/realtek/r8169_main.c +++ b/drivers/net/ethernet/realtek/r8169_main.c @@ -157,6 +157,7 @@ static const struct pci_device_id rtl8169_pci_tbl[] = { { PCI_VDEVICE(REALTEK, 0x8129) }, { PCI_VDEVICE(REALTEK, 0x8136), RTL_CFG_NO_GBIT }, { PCI_VDEVICE(REALTEK, 0x8161) }, + { PCI_VDEVICE(REALTEK, 0x8162) }, { PCI_VDEVICE(REALTEK, 0x8167) }, { PCI_VDEVICE(REALTEK, 0x8168) }, { PCI_VDEVICE(NCUBE, 0x8168) }, -- cgit v1.2.3 From 7fa598f9706d40bd16f2ab286bdf5808e1393d35 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Wed, 27 Oct 2021 12:08:54 -0400 Subject: tracing: Do not warn when connecting eprobe to non existing event When the syscall trace points are not configured in, the kselftests for ftrace will try to attach an event probe (eprobe) to one of the system call trace points. This triggered a WARNING, because the failure only expects to see memory issues. But this is not the only failure. The user may attempt to attach to a non existent event, and the kernel must not warn about it. Link: https://lkml.kernel.org/r/20211027120854.0680aa0f@gandalf.local.home Fixes: 7491e2c442781 ("tracing: Add a probe that attaches to trace events") Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/trace_eprobe.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/kernel/trace/trace_eprobe.c b/kernel/trace/trace_eprobe.c index c4a15aef36af..5c5f208c15d3 100644 --- a/kernel/trace/trace_eprobe.c +++ b/kernel/trace/trace_eprobe.c @@ -904,8 +904,8 @@ static int __trace_eprobe_create(int argc, const char *argv[]) if (IS_ERR(ep)) { ret = PTR_ERR(ep); - /* This must return -ENOMEM, else there is a bug */ - WARN_ON_ONCE(ret != -ENOMEM); + /* This must return -ENOMEM or misssing event, else there is a bug */ + WARN_ON_ONCE(ret != -ENOMEM && ret != -ENODEV); ep = NULL; goto error; } -- cgit v1.2.3 From 9159f102402a64ac85e676b75cc1f9c62c5b4b73 Mon Sep 17 00:00:00 2001 From: Dongli Zhang Date: Tue, 26 Oct 2021 14:50:31 -0700 Subject: vmxnet3: do not stop tx queues after netif_device_detach() The netif_device_detach() conditionally stops all tx queues if the queues are running. There is no need to call netif_tx_stop_all_queues() again. Signed-off-by: Dongli Zhang Signed-off-by: David S. Miller --- drivers/net/vmxnet3/vmxnet3_drv.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/net/vmxnet3/vmxnet3_drv.c b/drivers/net/vmxnet3/vmxnet3_drv.c index 142f70670f5c..8799854bacb2 100644 --- a/drivers/net/vmxnet3/vmxnet3_drv.c +++ b/drivers/net/vmxnet3/vmxnet3_drv.c @@ -3833,7 +3833,6 @@ vmxnet3_suspend(struct device *device) vmxnet3_free_intr_resources(adapter); netif_device_detach(netdev); - netif_tx_stop_all_queues(netdev); /* Create wake-up filters. */ pmConf = adapter->pm_conf; -- cgit v1.2.3 From 90a881fc352a953f1c8beb61977a2db0818157d4 Mon Sep 17 00:00:00 2001 From: Yu Xiao Date: Thu, 28 Oct 2021 12:00:36 +0200 Subject: nfp: bpf: relax prog rejection for mtu check through max_pkt_offset MTU change is refused whenever the value of new MTU is bigger than the max packet bytes that fits in NFP Cluster Target Memory (CTM). However, an eBPF program doesn't always need to access the whole packet data. The maximum direct packet access (DPA) offset has always been caculated by verifier and stored in the max_pkt_offset field of prog aux data. Signed-off-by: Yu Xiao Reviewed-by: Yinjun Zhang Reviewed-by: Niklas Soderlund Signed-off-by: Simon Horman Signed-off-by: David S. Miller --- drivers/net/ethernet/netronome/nfp/bpf/main.c | 16 +++++++++++----- drivers/net/ethernet/netronome/nfp/bpf/main.h | 2 ++ drivers/net/ethernet/netronome/nfp/bpf/offload.c | 17 +++++++++++++---- 3 files changed, 26 insertions(+), 9 deletions(-) diff --git a/drivers/net/ethernet/netronome/nfp/bpf/main.c b/drivers/net/ethernet/netronome/nfp/bpf/main.c index 11c83a99b014..f469950c7265 100644 --- a/drivers/net/ethernet/netronome/nfp/bpf/main.c +++ b/drivers/net/ethernet/netronome/nfp/bpf/main.c @@ -182,15 +182,21 @@ static int nfp_bpf_check_mtu(struct nfp_app *app, struct net_device *netdev, int new_mtu) { struct nfp_net *nn = netdev_priv(netdev); - unsigned int max_mtu; + struct nfp_bpf_vnic *bv; + struct bpf_prog *prog; if (~nn->dp.ctrl & NFP_NET_CFG_CTRL_BPF) return 0; - max_mtu = nn_readb(nn, NFP_NET_CFG_BPF_INL_MTU) * 64 - 32; - if (new_mtu > max_mtu) { - nn_info(nn, "BPF offload active, MTU over %u not supported\n", - max_mtu); + if (nn->xdp_hw.prog) { + prog = nn->xdp_hw.prog; + } else { + bv = nn->app_priv; + prog = bv->tc_prog; + } + + if (nfp_bpf_offload_check_mtu(nn, prog, new_mtu)) { + nn_info(nn, "BPF offload active, potential packet access beyond hardware packet boundary"); return -EBUSY; } return 0; diff --git a/drivers/net/ethernet/netronome/nfp/bpf/main.h b/drivers/net/ethernet/netronome/nfp/bpf/main.h index d0e17eebddd9..16841bb750b7 100644 --- a/drivers/net/ethernet/netronome/nfp/bpf/main.h +++ b/drivers/net/ethernet/netronome/nfp/bpf/main.h @@ -560,6 +560,8 @@ bool nfp_is_subprog_start(struct nfp_insn_meta *meta); void nfp_bpf_jit_prepare(struct nfp_prog *nfp_prog); int nfp_bpf_jit(struct nfp_prog *prog); bool nfp_bpf_supported_opcode(u8 code); +bool nfp_bpf_offload_check_mtu(struct nfp_net *nn, struct bpf_prog *prog, + unsigned int mtu); int nfp_verify_insn(struct bpf_verifier_env *env, int insn_idx, int prev_insn_idx); diff --git a/drivers/net/ethernet/netronome/nfp/bpf/offload.c b/drivers/net/ethernet/netronome/nfp/bpf/offload.c index 53851853562c..9d97cd281f18 100644 --- a/drivers/net/ethernet/netronome/nfp/bpf/offload.c +++ b/drivers/net/ethernet/netronome/nfp/bpf/offload.c @@ -481,19 +481,28 @@ int nfp_bpf_event_output(struct nfp_app_bpf *bpf, const void *data, return 0; } +bool nfp_bpf_offload_check_mtu(struct nfp_net *nn, struct bpf_prog *prog, + unsigned int mtu) +{ + unsigned int fw_mtu, pkt_off; + + fw_mtu = nn_readb(nn, NFP_NET_CFG_BPF_INL_MTU) * 64 - 32; + pkt_off = min(prog->aux->max_pkt_offset, mtu); + + return fw_mtu < pkt_off; +} + static int nfp_net_bpf_load(struct nfp_net *nn, struct bpf_prog *prog, struct netlink_ext_ack *extack) { struct nfp_prog *nfp_prog = prog->aux->offload->dev_priv; - unsigned int fw_mtu, pkt_off, max_stack, max_prog_len; + unsigned int max_stack, max_prog_len; dma_addr_t dma_addr; void *img; int err; - fw_mtu = nn_readb(nn, NFP_NET_CFG_BPF_INL_MTU) * 64 - 32; - pkt_off = min(prog->aux->max_pkt_offset, nn->dp.netdev->mtu); - if (fw_mtu < pkt_off) { + if (nfp_bpf_offload_check_mtu(nn, prog, nn->dp.netdev->mtu)) { NL_SET_ERR_MSG_MOD(extack, "BPF offload not supported with potential packet access beyond HW packet split boundary"); return -EOPNOTSUPP; } -- cgit v1.2.3 From c4a146c7cf5e8ad76231523b174d161bf152c6e7 Mon Sep 17 00:00:00 2001 From: Tony Lu Date: Thu, 28 Oct 2021 15:13:45 +0800 Subject: net/smc: Fix smc_link->llc_testlink_time overflow The value of llc_testlink_time is set to the value stored in net->ipv4.sysctl_tcp_keepalive_time when linkgroup init. The value of sysctl_tcp_keepalive_time is already jiffies, so we don't need to multiply by HZ, which would cause smc_link->llc_testlink_time overflow, and test_link send flood. Signed-off-by: Tony Lu Reviewed-by: Xuan Zhuo Reviewed-by: Wen Gu Signed-off-by: David S. Miller --- net/smc/smc_llc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/smc/smc_llc.c b/net/smc/smc_llc.c index 72f4b72eb175..f1d323439a2a 100644 --- a/net/smc/smc_llc.c +++ b/net/smc/smc_llc.c @@ -1822,7 +1822,7 @@ void smc_llc_link_active(struct smc_link *link) link->smcibdev->ibdev->name, link->ibport); link->state = SMC_LNK_ACTIVE; if (link->lgr->llc_testlink_time) { - link->llc_testlink_time = link->lgr->llc_testlink_time * HZ; + link->llc_testlink_time = link->lgr->llc_testlink_time; schedule_delayed_work(&link->llc_testlink_wrk, link->llc_testlink_time); } -- cgit v1.2.3 From f3a3a0fe0b644582fa5d83dd94b398f99fc57914 Mon Sep 17 00:00:00 2001 From: Wen Gu Date: Thu, 28 Oct 2021 15:13:47 +0800 Subject: net/smc: Correct spelling mistake to TCPF_SYN_RECV There should use TCPF_SYN_RECV instead of TCP_SYN_RECV. Signed-off-by: Wen Gu Reviewed-by: Tony Lu Signed-off-by: David S. Miller --- net/smc/af_smc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c index c038efc23ce3..78b663dbfa1f 100644 --- a/net/smc/af_smc.c +++ b/net/smc/af_smc.c @@ -1057,7 +1057,7 @@ static void smc_connect_work(struct work_struct *work) if (smc->clcsock->sk->sk_err) { smc->sk.sk_err = smc->clcsock->sk->sk_err; } else if ((1 << smc->clcsock->sk->sk_state) & - (TCPF_SYN_SENT | TCP_SYN_RECV)) { + (TCPF_SYN_SENT | TCPF_SYN_RECV)) { rc = sk_stream_wait_connect(smc->clcsock->sk, &timeo); if ((rc == -EPIPE) && ((1 << smc->clcsock->sk->sk_state) & -- cgit v1.2.3 From da353fac65fede6b8b4cfe207f0d9408e3121105 Mon Sep 17 00:00:00 2001 From: Daniel Jordan Date: Wed, 27 Oct 2021 17:59:20 -0400 Subject: net/tls: Fix flipped sign in tls_err_abort() calls sk->sk_err appears to expect a positive value, a convention that ktls doesn't always follow and that leads to memory corruption in other code. For instance, [kworker] tls_encrypt_done(..., err=) tls_err_abort(.., err) sk->sk_err = err; [task] splice_from_pipe_feed ... tls_sw_do_sendpage if (sk->sk_err) { ret = -sk->sk_err; // ret is positive splice_from_pipe_feed (continued) ret = actor(...) // ret is still positive and interpreted as bytes // written, resulting in underflow of buf->len and // sd->len, leading to huge buf->offset and bogus // addresses computed in later calls to actor() Fix all tls_err_abort() callers to pass a negative error code consistently and centralize the error-prone sign flip there, throwing in a warning to catch future misuse and uninlining the function so it really does only warn once. Cc: stable@vger.kernel.org Fixes: c46234ebb4d1e ("tls: RX path for ktls") Reported-by: syzbot+b187b77c8474f9648fae@syzkaller.appspotmail.com Signed-off-by: Daniel Jordan Signed-off-by: David S. Miller --- include/net/tls.h | 9 ++------- net/tls/tls_sw.c | 17 +++++++++++++---- 2 files changed, 15 insertions(+), 11 deletions(-) diff --git a/include/net/tls.h b/include/net/tls.h index 01d2e3744393..1fffb206f09f 100644 --- a/include/net/tls.h +++ b/include/net/tls.h @@ -358,6 +358,7 @@ int tls_sk_query(struct sock *sk, int optname, char __user *optval, int __user *optlen); int tls_sk_attach(struct sock *sk, int optname, char __user *optval, unsigned int optlen); +void tls_err_abort(struct sock *sk, int err); int tls_set_sw_offload(struct sock *sk, struct tls_context *ctx, int tx); void tls_sw_strparser_arm(struct sock *sk, struct tls_context *ctx); @@ -466,12 +467,6 @@ static inline bool tls_is_sk_tx_device_offloaded(struct sock *sk) #endif } -static inline void tls_err_abort(struct sock *sk, int err) -{ - sk->sk_err = err; - sk_error_report(sk); -} - static inline bool tls_bigint_increment(unsigned char *seq, int len) { int i; @@ -512,7 +507,7 @@ static inline void tls_advance_record_sn(struct sock *sk, struct cipher_context *ctx) { if (tls_bigint_increment(ctx->rec_seq, prot->rec_seq_size)) - tls_err_abort(sk, EBADMSG); + tls_err_abort(sk, -EBADMSG); if (prot->version != TLS_1_3_VERSION && prot->cipher_type != TLS_CIPHER_CHACHA20_POLY1305) diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c index d5d09bd817b7..1644f8baea19 100644 --- a/net/tls/tls_sw.c +++ b/net/tls/tls_sw.c @@ -35,6 +35,7 @@ * SOFTWARE. */ +#include #include #include #include @@ -43,6 +44,14 @@ #include #include +noinline void tls_err_abort(struct sock *sk, int err) +{ + WARN_ON_ONCE(err >= 0); + /* sk->sk_err should contain a positive error code. */ + sk->sk_err = -err; + sk_error_report(sk); +} + static int __skb_nsg(struct sk_buff *skb, int offset, int len, unsigned int recursion_level) { @@ -419,7 +428,7 @@ int tls_tx_records(struct sock *sk, int flags) tx_err: if (rc < 0 && rc != -EAGAIN) - tls_err_abort(sk, EBADMSG); + tls_err_abort(sk, -EBADMSG); return rc; } @@ -763,7 +772,7 @@ static int tls_push_record(struct sock *sk, int flags, msg_pl->sg.size + prot->tail_size, i); if (rc < 0) { if (rc != -EINPROGRESS) { - tls_err_abort(sk, EBADMSG); + tls_err_abort(sk, -EBADMSG); if (split) { tls_ctx->pending_open_record_frags = true; tls_merge_open_record(sk, rec, tmp, orig_end); @@ -1827,7 +1836,7 @@ int tls_sw_recvmsg(struct sock *sk, err = decrypt_skb_update(sk, skb, &msg->msg_iter, &chunk, &zc, async_capable); if (err < 0 && err != -EINPROGRESS) { - tls_err_abort(sk, EBADMSG); + tls_err_abort(sk, -EBADMSG); goto recv_end; } @@ -2007,7 +2016,7 @@ ssize_t tls_sw_splice_read(struct socket *sock, loff_t *ppos, } if (err < 0) { - tls_err_abort(sk, EBADMSG); + tls_err_abort(sk, -EBADMSG); goto splice_read_end; } ctx->decrypted = 1; -- cgit v1.2.3 From 1d9d6fd21ad4a28b16ed9ee5432ae738b9dc58aa Mon Sep 17 00:00:00 2001 From: Daniel Jordan Date: Wed, 27 Oct 2021 17:59:21 -0400 Subject: net/tls: Fix flipped sign in async_wait.err assignment sk->sk_err contains a positive number, yet async_wait.err wants the opposite. Fix the missed sign flip, which Jakub caught by inspection. Fixes: a42055e8d2c3 ("net/tls: Add support for async encryption of records for performance") Suggested-by: Jakub Kicinski Signed-off-by: Daniel Jordan Signed-off-by: David S. Miller --- net/tls/tls_sw.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c index 1644f8baea19..1b08b877a890 100644 --- a/net/tls/tls_sw.c +++ b/net/tls/tls_sw.c @@ -459,7 +459,7 @@ static void tls_encrypt_done(struct crypto_async_request *req, int err) /* If err is already set on socket, return the same code */ if (sk->sk_err) { - ctx->async_wait.err = sk->sk_err; + ctx->async_wait.err = -sk->sk_err; } else { ctx->async_wait.err = err; tls_err_abort(sk, err); -- cgit v1.2.3 From e8684db191e4164f3f5f3ad7dec04a6734c25f1c Mon Sep 17 00:00:00 2001 From: Yuiko Oshino Date: Wed, 27 Oct 2021 14:23:02 -0400 Subject: net: ethernet: microchip: lan743x: Fix skb allocation failure The driver allocates skb during ndo_open with GFP_ATOMIC which has high chance of failure when there are multiple instances. GFP_KERNEL is enough while open and use GFP_ATOMIC only from interrupt context. Fixes: 23f0703c125b ("lan743x: Add main source files for new lan743x driver") Signed-off-by: Yuiko Oshino Signed-off-by: David S. Miller --- drivers/net/ethernet/microchip/lan743x_main.c | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/microchip/lan743x_main.c b/drivers/net/ethernet/microchip/lan743x_main.c index c4f32c7e0db1..4d5a5d6595b3 100644 --- a/drivers/net/ethernet/microchip/lan743x_main.c +++ b/drivers/net/ethernet/microchip/lan743x_main.c @@ -1944,7 +1944,8 @@ static void lan743x_rx_update_tail(struct lan743x_rx *rx, int index) index); } -static int lan743x_rx_init_ring_element(struct lan743x_rx *rx, int index) +static int lan743x_rx_init_ring_element(struct lan743x_rx *rx, int index, + gfp_t gfp) { struct net_device *netdev = rx->adapter->netdev; struct device *dev = &rx->adapter->pdev->dev; @@ -1958,7 +1959,7 @@ static int lan743x_rx_init_ring_element(struct lan743x_rx *rx, int index) descriptor = &rx->ring_cpu_ptr[index]; buffer_info = &rx->buffer_info[index]; - skb = __netdev_alloc_skb(netdev, buffer_length, GFP_ATOMIC | GFP_DMA); + skb = __netdev_alloc_skb(netdev, buffer_length, gfp); if (!skb) return -ENOMEM; dma_ptr = dma_map_single(dev, skb->data, buffer_length, DMA_FROM_DEVICE); @@ -2120,7 +2121,8 @@ static int lan743x_rx_process_buffer(struct lan743x_rx *rx) /* save existing skb, allocate new skb and map to dma */ skb = buffer_info->skb; - if (lan743x_rx_init_ring_element(rx, rx->last_head)) { + if (lan743x_rx_init_ring_element(rx, rx->last_head, + GFP_ATOMIC | GFP_DMA)) { /* failed to allocate next skb. * Memory is very low. * Drop this packet and reuse buffer. @@ -2335,13 +2337,16 @@ static int lan743x_rx_ring_init(struct lan743x_rx *rx) rx->last_head = 0; for (index = 0; index < rx->ring_size; index++) { - ret = lan743x_rx_init_ring_element(rx, index); + ret = lan743x_rx_init_ring_element(rx, index, GFP_KERNEL); if (ret) goto cleanup; } return 0; cleanup: + netif_warn(rx->adapter, ifup, rx->adapter->netdev, + "Error allocating memory for LAN743x\n"); + lan743x_rx_ring_cleanup(rx); return ret; } -- cgit v1.2.3 From cc45b96e2de7ada26520f101dada0abafa4ba997 Mon Sep 17 00:00:00 2001 From: Subbaraya Sundeep Date: Wed, 27 Oct 2021 23:02:32 +0530 Subject: octeontx2-af: Check whether ipolicers exists While displaying ingress policers information in debugfs check whether ingress policers exist in the hardware or not because some platforms(CN9XXX) do not have this feature. Fixes: e7d8971763f3 ("octeontx2-af: cn10k: Debugfs support for bandwidth") Signed-off-by: Subbaraya Sundeep Signed-off-by: Rakesh Babu Signed-off-by: Sunil Kovvuri Goutham Signed-off-by: David S. Miller --- drivers/net/ethernet/marvell/octeontx2/af/rvu_debugfs.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_debugfs.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_debugfs.c index 9338765da048..6c589ca9b577 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_debugfs.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_debugfs.c @@ -1719,6 +1719,10 @@ static int rvu_dbg_nix_band_prof_ctx_display(struct seq_file *m, void *unused) u16 pcifunc; char *str; + /* Ingress policers do not exist on all platforms */ + if (!nix_hw->ipolicer) + return 0; + for (layer = 0; layer < BAND_PROF_NUM_LAYERS; layer++) { if (layer == BAND_PROF_INVAL_LAYER) continue; @@ -1768,6 +1772,10 @@ static int rvu_dbg_nix_band_prof_rsrc_display(struct seq_file *m, void *unused) int layer; char *str; + /* Ingress policers do not exist on all platforms */ + if (!nix_hw->ipolicer) + return 0; + seq_puts(m, "\nBandwidth profile resource free count\n"); seq_puts(m, "=====================================\n"); for (layer = 0; layer < BAND_PROF_NUM_LAYERS; layer++) { -- cgit v1.2.3 From e77bcdd1f639809950c45234b08647ac6d3ffe7b Mon Sep 17 00:00:00 2001 From: Rakesh Babu Date: Wed, 27 Oct 2021 23:02:33 +0530 Subject: octeontx2-af: Display all enabled PF VF rsrc_alloc entries. Currently, we are using a fixed buffer size of length 2048 to display rsrc_alloc output. As a result a maximum of 2048 characters of rsrc_alloc output is displayed, which may lead sometimes to display only partial output. This patch fixes this dependency on max limit of buffer size and displays all PF VF entries. Each column of the debugfs entry "rsrc_alloc" uses a fixed width of 12 characters to print the list of LFs of each block for a PF/VF. If the length of list of LFs of a block exceeds this fixed width then the list gets truncated and displays only a part of the list. This patch fixes this by using the maximum possible length of list of LFs among all blocks of all PFs and VFs entries as the width size. Fixes: f7884097141b ("octeontx2-af: Formatting debugfs entry rsrc_alloc.") Fixes: 23205e6d06d4 ("octeontx2-af: Dump current resource provisioning status") Signed-off-by: Rakesh Babu Signed-off-by: Nithin Dabilpuram Signed-off-by: Sunil Kovvuri Goutham Signed-off-by: David S. Miller --- .../ethernet/marvell/octeontx2/af/rvu_debugfs.c | 138 ++++++++++++++++----- 1 file changed, 106 insertions(+), 32 deletions(-) diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_debugfs.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_debugfs.c index 6c589ca9b577..c7e12464c243 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_debugfs.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_debugfs.c @@ -226,18 +226,85 @@ static const struct file_operations rvu_dbg_##name##_fops = { \ static void print_nix_qsize(struct seq_file *filp, struct rvu_pfvf *pfvf); +static void get_lf_str_list(struct rvu_block block, int pcifunc, + char *lfs) +{ + int lf = 0, seq = 0, len = 0, prev_lf = block.lf.max; + + for_each_set_bit(lf, block.lf.bmap, block.lf.max) { + if (lf >= block.lf.max) + break; + + if (block.fn_map[lf] != pcifunc) + continue; + + if (lf == prev_lf + 1) { + prev_lf = lf; + seq = 1; + continue; + } + + if (seq) + len += sprintf(lfs + len, "-%d,%d", prev_lf, lf); + else + len += (len ? sprintf(lfs + len, ",%d", lf) : + sprintf(lfs + len, "%d", lf)); + + prev_lf = lf; + seq = 0; + } + + if (seq) + len += sprintf(lfs + len, "-%d", prev_lf); + + lfs[len] = '\0'; +} + +static int get_max_column_width(struct rvu *rvu) +{ + int index, pf, vf, lf_str_size = 12, buf_size = 256; + struct rvu_block block; + u16 pcifunc; + char *buf; + + buf = kzalloc(buf_size, GFP_KERNEL); + if (!buf) + return -ENOMEM; + + for (pf = 0; pf < rvu->hw->total_pfs; pf++) { + for (vf = 0; vf <= rvu->hw->total_vfs; vf++) { + pcifunc = pf << 10 | vf; + if (!pcifunc) + continue; + + for (index = 0; index < BLK_COUNT; index++) { + block = rvu->hw->block[index]; + if (!strlen(block.name)) + continue; + + get_lf_str_list(block, pcifunc, buf); + if (lf_str_size <= strlen(buf)) + lf_str_size = strlen(buf) + 1; + } + } + } + + kfree(buf); + return lf_str_size; +} + /* Dumps current provisioning status of all RVU block LFs */ static ssize_t rvu_dbg_rsrc_attach_status(struct file *filp, char __user *buffer, size_t count, loff_t *ppos) { - int index, off = 0, flag = 0, go_back = 0, len = 0; + int index, off = 0, flag = 0, len = 0, i = 0; struct rvu *rvu = filp->private_data; - int lf, pf, vf, pcifunc; + int bytes_not_copied = 0; struct rvu_block block; - int bytes_not_copied; - int lf_str_size = 12; + int pf, vf, pcifunc; int buf_size = 2048; + int lf_str_size; char *lfs; char *buf; @@ -249,6 +316,9 @@ static ssize_t rvu_dbg_rsrc_attach_status(struct file *filp, if (!buf) return -ENOSPC; + /* Get the maximum width of a column */ + lf_str_size = get_max_column_width(rvu); + lfs = kzalloc(lf_str_size, GFP_KERNEL); if (!lfs) { kfree(buf); @@ -262,65 +332,69 @@ static ssize_t rvu_dbg_rsrc_attach_status(struct file *filp, "%-*s", lf_str_size, rvu->hw->block[index].name); } + off += scnprintf(&buf[off], buf_size - 1 - off, "\n"); + bytes_not_copied = copy_to_user(buffer + (i * off), buf, off); + if (bytes_not_copied) + goto out; + + i++; + *ppos += off; for (pf = 0; pf < rvu->hw->total_pfs; pf++) { for (vf = 0; vf <= rvu->hw->total_vfs; vf++) { + off = 0; + flag = 0; pcifunc = pf << 10 | vf; if (!pcifunc) continue; if (vf) { sprintf(lfs, "PF%d:VF%d", pf, vf - 1); - go_back = scnprintf(&buf[off], - buf_size - 1 - off, - "%-*s", lf_str_size, lfs); + off = scnprintf(&buf[off], + buf_size - 1 - off, + "%-*s", lf_str_size, lfs); } else { sprintf(lfs, "PF%d", pf); - go_back = scnprintf(&buf[off], - buf_size - 1 - off, - "%-*s", lf_str_size, lfs); + off = scnprintf(&buf[off], + buf_size - 1 - off, + "%-*s", lf_str_size, lfs); } - off += go_back; - for (index = 0; index < BLKTYPE_MAX; index++) { + for (index = 0; index < BLK_COUNT; index++) { block = rvu->hw->block[index]; if (!strlen(block.name)) continue; len = 0; lfs[len] = '\0'; - for (lf = 0; lf < block.lf.max; lf++) { - if (block.fn_map[lf] != pcifunc) - continue; + get_lf_str_list(block, pcifunc, lfs); + if (strlen(lfs)) flag = 1; - len += sprintf(&lfs[len], "%d,", lf); - } - if (flag) - len--; - lfs[len] = '\0'; off += scnprintf(&buf[off], buf_size - 1 - off, "%-*s", lf_str_size, lfs); - if (!strlen(lfs)) - go_back += lf_str_size; } - if (!flag) - off -= go_back; - else - flag = 0; - off--; - off += scnprintf(&buf[off], buf_size - 1 - off, "\n"); + if (flag) { + off += scnprintf(&buf[off], + buf_size - 1 - off, "\n"); + bytes_not_copied = copy_to_user(buffer + + (i * off), + buf, off); + if (bytes_not_copied) + goto out; + + i++; + *ppos += off; + } } } - bytes_not_copied = copy_to_user(buffer, buf, off); +out: kfree(lfs); kfree(buf); - if (bytes_not_copied) return -EFAULT; - *ppos = off; - return off; + return *ppos; } RVU_DEBUG_FOPS(rsrc_status, rsrc_attach_status, NULL); -- cgit v1.2.3 From c2d4c543f74c90f883e8ec62a31973ae8807d354 Mon Sep 17 00:00:00 2001 From: Rakesh Babu Saladi Date: Wed, 27 Oct 2021 23:02:34 +0530 Subject: octeontx2-af: Fix possible null pointer dereference. This patch fixes possible null pointer dereference in files "rvu_debugfs.c" and "rvu_nix.c" Fixes: 8756828a8148 ("octeontx2-af: Add NPA aura and pool contexts to debugfs") Fixes: 9a946def264d ("octeontx2-af: Modify nix_vtag_cfg mailbox to support TX VTAG entries") Signed-off-by: Rakesh Babu Saladi Signed-off-by: Subbaraya Sundeep Signed-off-by: Sunil Goutham Signed-off-by: David S. Miller --- drivers/net/ethernet/marvell/octeontx2/af/rvu_debugfs.c | 2 +- drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c | 3 +++ 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_debugfs.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_debugfs.c index c7e12464c243..49d822a98ada 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_debugfs.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_debugfs.c @@ -578,7 +578,7 @@ static ssize_t rvu_dbg_qsize_write(struct file *filp, if (cmd_buf) ret = -EINVAL; - if (!strncmp(subtoken, "help", 4) || ret < 0) { + if (ret < 0 || !strncmp(subtoken, "help", 4)) { dev_info(rvu->dev, "Use echo <%s-lf > qsize\n", blk_string); goto qsize_write_done; } diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c index 9ef4e942e31e..6970540dc470 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c @@ -2507,6 +2507,9 @@ static void nix_free_tx_vtag_entries(struct rvu *rvu, u16 pcifunc) return; nix_hw = get_nix_hw(rvu->hw, blkaddr); + if (!nix_hw) + return; + vlan = &nix_hw->txvlan; mutex_lock(&vlan->rsrc_lock); -- cgit v1.2.3 From 27de809a3d83a6199664479ebb19712533d6fd9b Mon Sep 17 00:00:00 2001 From: Björn Töpel Date: Thu, 28 Oct 2021 14:51:15 +0200 Subject: riscv, bpf: Fix potential NULL dereference MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The bpf_jit_binary_free() function requires a non-NULL argument. When the RISC-V BPF JIT fails to converge in NR_JIT_ITERATIONS steps, jit_data->header will be NULL, which triggers a NULL dereference. Avoid this by checking the argument, prior calling the function. Fixes: ca6cb5447cec ("riscv, bpf: Factor common RISC-V JIT code") Signed-off-by: Björn Töpel Acked-by: Daniel Borkmann Link: https://lore.kernel.org/r/20211028125115.514587-1-bjorn@kernel.org Signed-off-by: Jakub Kicinski --- arch/riscv/net/bpf_jit_core.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/riscv/net/bpf_jit_core.c b/arch/riscv/net/bpf_jit_core.c index 0fee2cbaaf53..753d85bdfad0 100644 --- a/arch/riscv/net/bpf_jit_core.c +++ b/arch/riscv/net/bpf_jit_core.c @@ -125,7 +125,8 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog) if (i == NR_JIT_ITERATIONS) { pr_err("bpf-jit: image did not converge in <%d passes!\n", i); - bpf_jit_binary_free(jit_data->header); + if (jit_data->header) + bpf_jit_binary_free(jit_data->header); prog = orig_prog; goto out_offset; } -- cgit v1.2.3 From f7cc8890f30d3ddc785e2b2ddc647da5b4b3c3ec Mon Sep 17 00:00:00 2001 From: Davide Caratti Date: Wed, 27 Oct 2021 13:38:55 -0700 Subject: mptcp: fix corrupt receiver key in MPC + data + checksum MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit using packetdrill it's possible to observe that the receiver key contains random values when clients transmit MP_CAPABLE with data and checksum (as specified in RFC8684 §3.1). Fix the layout of mptcp_out_options, to avoid using the skb extension copy when writing the MP_CAPABLE sub-option. Fixes: d7b269083786 ("mptcp: shrink mptcp_out_options struct") Closes: https://github.com/multipath-tcp/mptcp_net-next/issues/233 Reported-by: Poorva Sonparote Signed-off-by: Davide Caratti Signed-off-by: Mat Martineau Link: https://lore.kernel.org/r/20211027203855.264600-1-mathew.j.martineau@linux.intel.com Signed-off-by: Jakub Kicinski --- include/net/mptcp.h | 4 ++++ net/mptcp/options.c | 39 ++++++++++++++++++++++++--------------- 2 files changed, 28 insertions(+), 15 deletions(-) diff --git a/include/net/mptcp.h b/include/net/mptcp.h index 6026bbefbffd..3214848402ec 100644 --- a/include/net/mptcp.h +++ b/include/net/mptcp.h @@ -69,6 +69,10 @@ struct mptcp_out_options { struct { u64 sndr_key; u64 rcvr_key; + u64 data_seq; + u32 subflow_seq; + u16 data_len; + __sum16 csum; }; struct { struct mptcp_addr_info addr; diff --git a/net/mptcp/options.c b/net/mptcp/options.c index c41273cefc51..f0f22eb4fd5f 100644 --- a/net/mptcp/options.c +++ b/net/mptcp/options.c @@ -485,11 +485,11 @@ static bool mptcp_established_options_mp(struct sock *sk, struct sk_buff *skb, mpext = mptcp_get_ext(skb); data_len = mpext ? mpext->data_len : 0; - /* we will check ext_copy.data_len in mptcp_write_options() to + /* we will check ops->data_len in mptcp_write_options() to * discriminate between TCPOLEN_MPTCP_MPC_ACK_DATA and * TCPOLEN_MPTCP_MPC_ACK */ - opts->ext_copy.data_len = data_len; + opts->data_len = data_len; opts->suboptions = OPTION_MPTCP_MPC_ACK; opts->sndr_key = subflow->local_key; opts->rcvr_key = subflow->remote_key; @@ -505,9 +505,9 @@ static bool mptcp_established_options_mp(struct sock *sk, struct sk_buff *skb, len = TCPOLEN_MPTCP_MPC_ACK_DATA; if (opts->csum_reqd) { /* we need to propagate more info to csum the pseudo hdr */ - opts->ext_copy.data_seq = mpext->data_seq; - opts->ext_copy.subflow_seq = mpext->subflow_seq; - opts->ext_copy.csum = mpext->csum; + opts->data_seq = mpext->data_seq; + opts->subflow_seq = mpext->subflow_seq; + opts->csum = mpext->csum; len += TCPOLEN_MPTCP_DSS_CHECKSUM; } *size = ALIGN(len, 4); @@ -1227,7 +1227,7 @@ static void mptcp_set_rwin(const struct tcp_sock *tp) WRITE_ONCE(msk->rcv_wnd_sent, ack_seq); } -static u16 mptcp_make_csum(const struct mptcp_ext *mpext) +static u16 __mptcp_make_csum(u64 data_seq, u32 subflow_seq, u16 data_len, __sum16 sum) { struct csum_pseudo_header header; __wsum csum; @@ -1237,15 +1237,21 @@ static u16 mptcp_make_csum(const struct mptcp_ext *mpext) * always the 64-bit value, irrespective of what length is used in the * DSS option itself. */ - header.data_seq = cpu_to_be64(mpext->data_seq); - header.subflow_seq = htonl(mpext->subflow_seq); - header.data_len = htons(mpext->data_len); + header.data_seq = cpu_to_be64(data_seq); + header.subflow_seq = htonl(subflow_seq); + header.data_len = htons(data_len); header.csum = 0; - csum = csum_partial(&header, sizeof(header), ~csum_unfold(mpext->csum)); + csum = csum_partial(&header, sizeof(header), ~csum_unfold(sum)); return (__force u16)csum_fold(csum); } +static u16 mptcp_make_csum(const struct mptcp_ext *mpext) +{ + return __mptcp_make_csum(mpext->data_seq, mpext->subflow_seq, mpext->data_len, + mpext->csum); +} + void mptcp_write_options(__be32 *ptr, const struct tcp_sock *tp, struct mptcp_out_options *opts) { @@ -1337,7 +1343,7 @@ void mptcp_write_options(__be32 *ptr, const struct tcp_sock *tp, len = TCPOLEN_MPTCP_MPC_SYN; } else if (OPTION_MPTCP_MPC_SYNACK & opts->suboptions) { len = TCPOLEN_MPTCP_MPC_SYNACK; - } else if (opts->ext_copy.data_len) { + } else if (opts->data_len) { len = TCPOLEN_MPTCP_MPC_ACK_DATA; if (opts->csum_reqd) len += TCPOLEN_MPTCP_DSS_CHECKSUM; @@ -1366,14 +1372,17 @@ void mptcp_write_options(__be32 *ptr, const struct tcp_sock *tp, put_unaligned_be64(opts->rcvr_key, ptr); ptr += 2; - if (!opts->ext_copy.data_len) + if (!opts->data_len) goto mp_capable_done; if (opts->csum_reqd) { - put_unaligned_be32(opts->ext_copy.data_len << 16 | - mptcp_make_csum(&opts->ext_copy), ptr); + put_unaligned_be32(opts->data_len << 16 | + __mptcp_make_csum(opts->data_seq, + opts->subflow_seq, + opts->data_len, + opts->csum), ptr); } else { - put_unaligned_be32(opts->ext_copy.data_len << 16 | + put_unaligned_be32(opts->data_len << 16 | TCPOPT_NOP << 8 | TCPOPT_NOP, ptr); } ptr += 1; -- cgit v1.2.3 From 35392da51b1ab7ba0c63de0a553e2a93c2314266 Mon Sep 17 00:00:00 2001 From: Guangbin Huang Date: Thu, 28 Oct 2021 22:06:24 +0800 Subject: Revert "net: hns3: fix pause config problem after autoneg disabled" This reverts commit 3bda2e5df476417b6d08967e2d84234a59d57b1c. According to discussion with Andrew as follow: https://lore.kernel.org/netdev/09eda9fe-196b-006b-6f01-f54e75715961@huawei.com/ HNS3 driver needs to separate pause autoneg from general autoneg, so revert this incorrect patch. Signed-off-by: Guangbin Huang Link: https://lore.kernel.org/r/20211028140624.53149-1-huangguangbin2@huawei.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/hisilicon/hns3/hnae3.h | 1 - drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c | 33 ++++++---------------- .../ethernet/hisilicon/hns3/hns3pf/hclge_main.c | 30 -------------------- .../net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c | 2 +- .../net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.h | 1 - 5 files changed, 10 insertions(+), 57 deletions(-) diff --git a/drivers/net/ethernet/hisilicon/hns3/hnae3.h b/drivers/net/ethernet/hisilicon/hns3/hnae3.h index da3a593f6a56..d701451596c8 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hnae3.h +++ b/drivers/net/ethernet/hisilicon/hns3/hnae3.h @@ -568,7 +568,6 @@ struct hnae3_ae_ops { u32 *auto_neg, u32 *rx_en, u32 *tx_en); int (*set_pauseparam)(struct hnae3_handle *handle, u32 auto_neg, u32 rx_en, u32 tx_en); - int (*restore_pauseparam)(struct hnae3_handle *handle); int (*set_autoneg)(struct hnae3_handle *handle, bool enable); int (*get_autoneg)(struct hnae3_handle *handle); diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c b/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c index 7d92dd273ed7..5ebd96f6833d 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c @@ -824,26 +824,6 @@ static int hns3_check_ksettings_param(const struct net_device *netdev, return 0; } -static int hns3_set_phy_link_ksettings(struct net_device *netdev, - const struct ethtool_link_ksettings *cmd) -{ - struct hnae3_handle *handle = hns3_get_handle(netdev); - const struct hnae3_ae_ops *ops = handle->ae_algo->ops; - int ret; - - if (cmd->base.speed == SPEED_1000 && - cmd->base.autoneg == AUTONEG_DISABLE) - return -EINVAL; - - if (cmd->base.autoneg == AUTONEG_DISABLE && ops->restore_pauseparam) { - ret = ops->restore_pauseparam(handle); - if (ret) - return ret; - } - - return phy_ethtool_ksettings_set(netdev->phydev, cmd); -} - static int hns3_set_link_ksettings(struct net_device *netdev, const struct ethtool_link_ksettings *cmd) { @@ -862,11 +842,16 @@ static int hns3_set_link_ksettings(struct net_device *netdev, cmd->base.autoneg, cmd->base.speed, cmd->base.duplex); /* Only support ksettings_set for netdev with phy attached for now */ - if (netdev->phydev) - return hns3_set_phy_link_ksettings(netdev, cmd); - else if (test_bit(HNAE3_DEV_SUPPORT_PHY_IMP_B, ae_dev->caps) && - ops->set_phy_link_ksettings) + if (netdev->phydev) { + if (cmd->base.speed == SPEED_1000 && + cmd->base.autoneg == AUTONEG_DISABLE) + return -EINVAL; + + return phy_ethtool_ksettings_set(netdev->phydev, cmd); + } else if (test_bit(HNAE3_DEV_SUPPORT_PHY_IMP_B, ae_dev->caps) && + ops->set_phy_link_ksettings) { return ops->set_phy_link_ksettings(handle, cmd); + } if (ae_dev->dev_version < HNAE3_DEVICE_VERSION_V2) return -EOPNOTSUPP; diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c index 269e579762b2..d891390d492f 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c @@ -10998,35 +10998,6 @@ static int hclge_set_pauseparam(struct hnae3_handle *handle, u32 auto_neg, return -EOPNOTSUPP; } -static int hclge_restore_pauseparam(struct hnae3_handle *handle) -{ - struct hclge_vport *vport = hclge_get_vport(handle); - struct hclge_dev *hdev = vport->back; - u32 auto_neg, rx_pause, tx_pause; - int ret; - - hclge_get_pauseparam(handle, &auto_neg, &rx_pause, &tx_pause); - /* when autoneg is disabled, the pause setting of phy has no effect - * unless the link goes down. - */ - ret = phy_suspend(hdev->hw.mac.phydev); - if (ret) - return ret; - - phy_set_asym_pause(hdev->hw.mac.phydev, rx_pause, tx_pause); - - ret = phy_resume(hdev->hw.mac.phydev); - if (ret) - return ret; - - ret = hclge_mac_pause_setup_hw(hdev); - if (ret) - dev_err(&hdev->pdev->dev, - "restore pauseparam error, ret = %d.\n", ret); - - return ret; -} - static void hclge_get_ksettings_an_result(struct hnae3_handle *handle, u8 *auto_neg, u32 *speed, u8 *duplex) { @@ -12990,7 +12961,6 @@ static const struct hnae3_ae_ops hclge_ops = { .halt_autoneg = hclge_halt_autoneg, .get_pauseparam = hclge_get_pauseparam, .set_pauseparam = hclge_set_pauseparam, - .restore_pauseparam = hclge_restore_pauseparam, .set_mtu = hclge_set_mtu, .reset_queue = hclge_reset_tqp, .get_stats = hclge_get_stats, diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c index 124791e4bfee..95074e91a846 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c @@ -1435,7 +1435,7 @@ static int hclge_bp_setup_hw(struct hclge_dev *hdev, u8 tc) return 0; } -int hclge_mac_pause_setup_hw(struct hclge_dev *hdev) +static int hclge_mac_pause_setup_hw(struct hclge_dev *hdev) { bool tx_en, rx_en; diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.h index 4b2c3a788980..2ee9b795f71d 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.h +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.h @@ -244,7 +244,6 @@ int hclge_tm_get_pri_weight(struct hclge_dev *hdev, u8 pri_id, u8 *weight); int hclge_tm_get_pri_shaper(struct hclge_dev *hdev, u8 pri_id, enum hclge_opcode_type cmd, struct hclge_tm_shaper_para *para); -int hclge_mac_pause_setup_hw(struct hclge_dev *hdev); int hclge_tm_get_q_to_qs_map(struct hclge_dev *hdev, u16 q_id, u16 *qset_id); int hclge_tm_get_q_to_tc(struct hclge_dev *hdev, u16 q_id, u8 *tc_id); int hclge_tm_get_pg_to_pri_map(struct hclge_dev *hdev, u8 pg_id, -- cgit v1.2.3