diff options
160 files changed, 6520 insertions, 2990 deletions
diff --git a/Documentation/devicetree/bindings/net/dsa/mt7530.txt b/Documentation/devicetree/bindings/net/dsa/mt7530.txt index 560369efad6c..de04626a8e9d 100644 --- a/Documentation/devicetree/bindings/net/dsa/mt7530.txt +++ b/Documentation/devicetree/bindings/net/dsa/mt7530.txt @@ -76,6 +76,12 @@ phy-mode must be set, see also example 2 below! * mt7621: phy-mode = "rgmii-txid"; * mt7623: phy-mode = "rgmii"; +Optional properties: + +- gpio-controller: Boolean; if defined, MT7530's LED controller will run on + GPIO mode. +- #gpio-cells: Must be 2 if gpio-controller is defined. + See Documentation/devicetree/bindings/net/dsa/dsa.txt for a list of additional required, optional properties and how the integrated switch subnodes must be specified. diff --git a/Documentation/devicetree/bindings/net/qcom,ipa.yaml b/Documentation/devicetree/bindings/net/qcom,ipa.yaml index 8a2d12644675..8f86084bf12e 100644 --- a/Documentation/devicetree/bindings/net/qcom,ipa.yaml +++ b/Documentation/devicetree/bindings/net/qcom,ipa.yaml @@ -113,13 +113,6 @@ properties: performing early IPA initialization, including loading and validating firwmare used by the GSI. - modem-remoteproc: - $ref: /schemas/types.yaml#/definitions/phandle - description: - This defines the phandle to the remoteproc node representing - the modem subsystem. This is requied so the IPA driver can - receive and act on notifications of modem up/down events. - memory-region: maxItems: 1 description: @@ -135,7 +128,6 @@ required: - interrupts - interconnects - qcom,smem-states - - modem-remoteproc oneOf: - required: @@ -147,7 +139,7 @@ additionalProperties: false examples: - | - #include <dt-bindings/interrupt-controller/irq.h> + #include <dt-bindings/interrupt-controller/arm-gic.h> #include <dt-bindings/clock/qcom,rpmh.h> #include <dt-bindings/interconnect/qcom,sdm845.h> @@ -168,7 +160,6 @@ examples: compatible = "qcom,sdm845-ipa"; modem-init; - modem-remoteproc = <&mss_pil>; iommus = <&apps_smmu 0x720 0x3>; reg = <0x1e40000 0x7000>, @@ -178,8 +169,8 @@ examples: "ipa-shared", "gsi"; - interrupts-extended = <&intc 0 311 IRQ_TYPE_EDGE_RISING>, - <&intc 0 432 IRQ_TYPE_LEVEL_HIGH>, + interrupts-extended = <&intc GIC_SPI 311 IRQ_TYPE_EDGE_RISING>, + <&intc GIC_SPI 432 IRQ_TYPE_LEVEL_HIGH>, <&ipa_smp2p_in 0 IRQ_TYPE_EDGE_RISING>, <&ipa_smp2p_in 1 IRQ_TYPE_EDGE_RISING>; interrupt-names = "ipa", diff --git a/Documentation/devicetree/bindings/net/renesas,etheravb.yaml b/Documentation/devicetree/bindings/net/renesas,etheravb.yaml index de9dd574a2f9..91ba96d43c6c 100644 --- a/Documentation/devicetree/bindings/net/renesas,etheravb.yaml +++ b/Documentation/devicetree/bindings/net/renesas,etheravb.yaml @@ -40,6 +40,7 @@ properties: - renesas,etheravb-r8a77980 # R-Car V3H - renesas,etheravb-r8a77990 # R-Car E3 - renesas,etheravb-r8a77995 # R-Car D3 + - renesas,etheravb-r8a779a0 # R-Car V3U - const: renesas,etheravb-rcar-gen3 # R-Car Gen3 and RZ/G2 reg: true @@ -170,6 +171,7 @@ allOf: - renesas,etheravb-r8a77965 - renesas,etheravb-r8a77970 - renesas,etheravb-r8a77980 + - renesas,etheravb-r8a779a0 then: required: - tx-internal-delay-ps diff --git a/Documentation/networking/device_drivers/ethernet/marvell/octeontx2.rst b/Documentation/networking/device_drivers/ethernet/marvell/octeontx2.rst index 61e850460e18..dd5cd69467be 100644 --- a/Documentation/networking/device_drivers/ethernet/marvell/octeontx2.rst +++ b/Documentation/networking/device_drivers/ethernet/marvell/octeontx2.rst @@ -217,3 +217,73 @@ For example:: NPA_AF_ERR: NPA Error Interrupt Reg : 4096 AQ Doorbell Error + + +NIX Reporters +------------- +The NIX reporters are responsible for reporting and recovering the following group of errors: + +1. GENERAL events + + - Receive mirror/multicast packet drop due to insufficient buffer. + - SMQ Flush operation. + +2. ERROR events + + - Memory Fault due to WQE read/write from multicast/mirror buffer. + - Receive multicast/mirror replication list error. + - Receive packet on an unmapped PF. + - Fault due to NIX_AQ_INST_S read or NIX_AQ_RES_S write. + - AQ Doorbell Error. + +3. RAS events + + - RAS Error Reporting for NIX Receive Multicast/Mirror Entry Structure. + - RAS Error Reporting for WQE/Packet Data read from Multicast/Mirror Buffer.. + - RAS Error Reporting for NIX_AQ_INST_S/NIX_AQ_RES_S. + +4. RVU events + + - Error due to unmapped slot. + +Sample Output:: + + ~# ./devlink health + pci/0002:01:00.0: + reporter hw_npa_intr + state healthy error 0 recover 0 grace_period 0 auto_recover true auto_dump true + reporter hw_npa_gen + state healthy error 0 recover 0 grace_period 0 auto_recover true auto_dump true + reporter hw_npa_err + state healthy error 0 recover 0 grace_period 0 auto_recover true auto_dump true + reporter hw_npa_ras + state healthy error 0 recover 0 grace_period 0 auto_recover true auto_dump true + reporter hw_nix_intr + state healthy error 1121 recover 1121 last_dump_date 2021-01-19 last_dump_time 05:42:26 grace_period 0 auto_recover true auto_dump true + reporter hw_nix_gen + state healthy error 949 recover 949 last_dump_date 2021-01-19 last_dump_time 05:42:43 grace_period 0 auto_recover true auto_dump true + reporter hw_nix_err + state healthy error 1147 recover 1147 last_dump_date 2021-01-19 last_dump_time 05:42:59 grace_period 0 auto_recover true auto_dump true + reporter hw_nix_ras + state healthy error 409 recover 409 last_dump_date 2021-01-19 last_dump_time 05:43:16 grace_period 0 auto_recover true auto_dump true + +Each reporter dumps the + + - Error Type + - Error Register value + - Reason in words + +For example:: + + ~# devlink health dump show pci/0002:01:00.0 reporter hw_nix_intr + NIX_AF_RVU: + NIX RVU Interrupt Reg : 1 + Unmap Slot Error + ~# devlink health dump show pci/0002:01:00.0 reporter hw_nix_gen + NIX_AF_GENERAL: + NIX General Interrupt Reg : 1 + Rx multicast pkt drop + ~# devlink health dump show pci/0002:01:00.0 reporter hw_nix_err + NIX_AF_ERR: + NIX Error Interrupt Reg : 64 + Rx on unmapped PF_FUNC diff --git a/Documentation/networking/devlink/devlink-resource.rst b/Documentation/networking/devlink/devlink-resource.rst index 93e92d2f0752..3d5ae51e65a2 100644 --- a/Documentation/networking/devlink/devlink-resource.rst +++ b/Documentation/networking/devlink/devlink-resource.rst @@ -23,6 +23,20 @@ current size and related sub resources. To access a sub resource, you specify the path of the resource. For example ``/IPv4/fib`` is the id for the ``fib`` sub-resource under the ``IPv4`` resource. +Generic Resources +================= + +Generic resources are used to describe resources that can be shared by multiple +device drivers and their description must be added to the following table: + +.. list-table:: List of Generic Resources + :widths: 10 90 + + * - Name + - Description + * - ``physical_ports`` + - A limited capacity of physical ports that the switch ASIC can support + example usage ------------- diff --git a/Documentation/networking/ip-sysctl.rst b/Documentation/networking/ip-sysctl.rst index dd2b12a32b73..0e51ddd9a2f1 100644 --- a/Documentation/networking/ip-sysctl.rst +++ b/Documentation/networking/ip-sysctl.rst @@ -1871,6 +1871,16 @@ accept_ra_defrtr - BOOLEAN - enabled if accept_ra is enabled. - disabled if accept_ra is disabled. +ra_defrtr_metric - UNSIGNED INTEGER + Route metric for default route learned in Router Advertisement. This value + will be assigned as metric for the default route learned via IPv6 Router + Advertisement. Takes affect only if accept_ra_defrtr is enabled. + + Possible values: + 1 to 0xFFFFFFFF + + Default: IP6_RT_PRIO_USER i.e. 1024. + accept_ra_from_local - BOOLEAN Accept RA with source-address that is found on local machine if the RA is otherwise proper and able to be accepted. diff --git a/MAINTAINERS b/MAINTAINERS index 1df56a32d2df..650deb973913 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -2787,6 +2787,14 @@ F: arch/arm64/ F: tools/testing/selftests/arm64/ X: arch/arm64/boot/dts/ +ARROW SPEEDCHIPS XRS7000 SERIES ETHERNET SWITCH DRIVER +M: George McCollister <george.mccollister@gmail.com> +L: netdev@vger.kernel.org +S: Maintained +F: Documentation/devicetree/bindings/net/dsa/arrow,xrs700x.yaml +F: drivers/net/dsa/xrs700x/* +F: net/dsa/tag_xrs700x.c + AS3645A LED FLASH CONTROLLER DRIVER M: Sakari Ailus <sakari.ailus@iki.fi> L: linux-leds@vger.kernel.org @@ -649,7 +649,8 @@ ifeq ($(KBUILD_EXTMOD),) core-y := init/ usr/ drivers-y := drivers/ sound/ drivers-$(CONFIG_SAMPLES) += samples/ -drivers-y += net/ virt/ +drivers-$(CONFIG_NET) += net/ +drivers-y += virt/ libs-y := lib/ endif # KBUILD_EXTMOD diff --git a/arch/arm64/boot/dts/qcom/sc7180.dtsi b/arch/arm64/boot/dts/qcom/sc7180.dtsi index 22b832fc62e3..003309f0d3e1 100644 --- a/arch/arm64/boot/dts/qcom/sc7180.dtsi +++ b/arch/arm64/boot/dts/qcom/sc7180.dtsi @@ -1434,8 +1434,6 @@ qcom,smem-state-names = "ipa-clock-enabled-valid", "ipa-clock-enabled"; - modem-remoteproc = <&remoteproc_mpss>; - status = "disabled"; }; diff --git a/arch/arm64/boot/dts/qcom/sdm845.dtsi b/arch/arm64/boot/dts/qcom/sdm845.dtsi index bcf888381f14..04b2490eec9f 100644 --- a/arch/arm64/boot/dts/qcom/sdm845.dtsi +++ b/arch/arm64/boot/dts/qcom/sdm845.dtsi @@ -2366,8 +2366,6 @@ qcom,smem-state-names = "ipa-clock-enabled-valid", "ipa-clock-enabled"; - modem-remoteproc = <&mss_pil>; - status = "disabled"; }; diff --git a/drivers/net/Kconfig b/drivers/net/Kconfig index 260f9f46668b..1ebb4b943876 100644 --- a/drivers/net/Kconfig +++ b/drivers/net/Kconfig @@ -42,6 +42,7 @@ config BONDING tristate "Bonding driver support" depends on INET depends on IPV6 || IPV6=n + depends on TLS || TLS_DEVICE=n help Say 'Y' or 'M' if you wish to be able to 'bond' multiple Ethernet Channels together. This is called 'Etherchannel' by Cisco, diff --git a/drivers/net/can/dev/dev.c b/drivers/net/can/dev/dev.c index 01e4a194f187..d9281ae853f8 100644 --- a/drivers/net/can/dev/dev.c +++ b/drivers/net/can/dev/dev.c @@ -74,7 +74,7 @@ static int can_rx_state_to_frame(struct net_device *dev, enum can_state state) } } -static const char *can_get_state_str(const enum can_state state) +const char *can_get_state_str(const enum can_state state) { switch (state) { case CAN_STATE_ERROR_ACTIVE: @@ -95,6 +95,7 @@ static const char *can_get_state_str(const enum can_state state) return "<unknown>"; } +EXPORT_SYMBOL_GPL(can_get_state_str); void can_change_state(struct net_device *dev, struct can_frame *cf, enum can_state tx_state, enum can_state rx_state) diff --git a/drivers/net/can/dev/length.c b/drivers/net/can/dev/length.c index d35c4e82314d..b48140b1102e 100644 --- a/drivers/net/can/dev/length.c +++ b/drivers/net/can/dev/length.c @@ -27,12 +27,17 @@ static const u8 len2dlc[] = { 13, 13, 13, 13, 13, 13, 13, 13, /* 25 - 32 */ 14, 14, 14, 14, 14, 14, 14, 14, /* 33 - 40 */ 14, 14, 14, 14, 14, 14, 14, 14, /* 41 - 48 */ + 15, 15, 15, 15, 15, 15, 15, 15, /* 49 - 56 */ + 15, 15, 15, 15, 15, 15, 15, 15 /* 57 - 64 */ }; /* map the sanitized data length to an appropriate data length code */ u8 can_fd_len2dlc(u8 len) { - if (len >= ARRAY_SIZE(len2dlc)) + /* check for length mapping table size at build time */ + BUILD_BUG_ON(ARRAY_SIZE(len2dlc) != CANFD_MAX_DLEN + 1); + + if (unlikely(len > CANFD_MAX_DLEN)) return CANFD_MAX_DLC; return len2dlc[len]; diff --git a/drivers/net/can/flexcan.c b/drivers/net/can/flexcan.c index 5d9157c655e9..971ada36e37f 100644 --- a/drivers/net/can/flexcan.c +++ b/drivers/net/can/flexcan.c @@ -1975,14 +1975,14 @@ static int flexcan_setup_stop_mode_scfw(struct platform_device *pdev) priv = netdev_priv(dev); priv->scu_idx = scu_idx; - /* this function could be defered probe, return -EPROBE_DEFER */ + /* this function could be deferred probe, return -EPROBE_DEFER */ return imx_scu_get_handle(&priv->sc_ipc_handle); } /* flexcan_setup_stop_mode - Setup stop mode for wakeup * * Return: = 0 setup stop mode successfully or doesn't support this feature - * < 0 fail to setup stop mode (could be defered probe) + * < 0 fail to setup stop mode (could be deferred probe) */ static int flexcan_setup_stop_mode(struct platform_device *pdev) { diff --git a/drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c b/drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c index 00e9855c23d1..3638b474d86b 100644 --- a/drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c +++ b/drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c @@ -335,6 +335,8 @@ static void mcp251xfd_ring_init(struct mcp251xfd_priv *priv) u8 len; int i, j; + netdev_reset_queue(priv->ndev); + /* TEF */ tef_ring = priv->tef; tef_ring->head = 0; @@ -1249,7 +1251,8 @@ mcp251xfd_handle_tefif_recover(const struct mcp251xfd_priv *priv, const u32 seq) static int mcp251xfd_handle_tefif_one(struct mcp251xfd_priv *priv, - const struct mcp251xfd_hw_tef_obj *hw_tef_obj) + const struct mcp251xfd_hw_tef_obj *hw_tef_obj, + unsigned int *frame_len_ptr) { struct net_device_stats *stats = &priv->ndev->stats; u32 seq, seq_masked, tef_tail_masked; @@ -1271,7 +1274,8 @@ mcp251xfd_handle_tefif_one(struct mcp251xfd_priv *priv, stats->tx_bytes += can_rx_offload_get_echo_skb(&priv->offload, mcp251xfd_get_tef_tail(priv), - hw_tef_obj->ts, NULL); + hw_tef_obj->ts, + frame_len_ptr); stats->tx_packets++; priv->tef->tail++; @@ -1308,6 +1312,7 @@ mcp251xfd_tef_obj_read(const struct mcp251xfd_priv *priv, const u8 offset, const u8 len) { const struct mcp251xfd_tx_ring *tx_ring = priv->tx; + const int val_bytes = regmap_get_val_bytes(priv->map_rx); if (IS_ENABLED(CONFIG_CAN_MCP251XFD_SANITY) && (offset > tx_ring->obj_num || @@ -1322,12 +1327,13 @@ mcp251xfd_tef_obj_read(const struct mcp251xfd_priv *priv, return regmap_bulk_read(priv->map_rx, mcp251xfd_get_tef_obj_addr(offset), hw_tef_obj, - sizeof(*hw_tef_obj) / sizeof(u32) * len); + sizeof(*hw_tef_obj) / val_bytes * len); } static int mcp251xfd_handle_tefif(struct mcp251xfd_priv *priv) { struct mcp251xfd_hw_tef_obj hw_tef_obj[MCP251XFD_TX_OBJ_NUM_MAX]; + unsigned int total_frame_len = 0; u8 tef_tail, len, l; int err, i; @@ -1349,7 +1355,9 @@ static int mcp251xfd_handle_tefif(struct mcp251xfd_priv *priv) } for (i = 0; i < len; i++) { - err = mcp251xfd_handle_tefif_one(priv, &hw_tef_obj[i]); + unsigned int frame_len; + + err = mcp251xfd_handle_tefif_one(priv, &hw_tef_obj[i], &frame_len); /* -EAGAIN means the Sequence Number in the TEF * doesn't match our tef_tail. This can happen if we * read the TEF objects too early. Leave loop let the @@ -1359,6 +1367,8 @@ static int mcp251xfd_handle_tefif(struct mcp251xfd_priv *priv) goto out_netif_wake_queue; if (err) return err; + + total_frame_len += frame_len; } out_netif_wake_queue: @@ -1389,6 +1399,7 @@ static int mcp251xfd_handle_tefif(struct mcp251xfd_priv *priv) return err; tx_ring->tail += len; + netdev_completed_queue(priv->ndev, len, total_frame_len); err = mcp251xfd_check_tef_tail(priv); if (err) @@ -1438,6 +1449,7 @@ mcp251xfd_hw_rx_obj_to_skb(const struct mcp251xfd_priv *priv, struct sk_buff *skb) { struct canfd_frame *cfd = (struct canfd_frame *)skb->data; + u8 dlc; if (hw_rx_obj->flags & MCP251XFD_OBJ_FLAGS_IDE) { u32 sid, eid; @@ -1453,9 +1465,10 @@ mcp251xfd_hw_rx_obj_to_skb(const struct mcp251xfd_priv *priv, hw_rx_obj->id); } + dlc = FIELD_GET(MCP251XFD_OBJ_FLAGS_DLC, hw_rx_obj->flags); + /* CANFD */ if (hw_rx_obj->flags & MCP251XFD_OBJ_FLAGS_FDF) { - u8 dlc; if (hw_rx_obj->flags & MCP251XFD_OBJ_FLAGS_ESI) cfd->flags |= CANFD_ESI; @@ -1463,17 +1476,17 @@ mcp251xfd_hw_rx_obj_to_skb(const struct mcp251xfd_priv *priv, if (hw_rx_obj->flags & MCP251XFD_OBJ_FLAGS_BRS) cfd->flags |= CANFD_BRS; - dlc = FIELD_GET(MCP251XFD_OBJ_FLAGS_DLC, hw_rx_obj->flags); cfd->len = can_fd_dlc2len(dlc); } else { if (hw_rx_obj->flags & MCP251XFD_OBJ_FLAGS_RTR) cfd->can_id |= CAN_RTR_FLAG; - cfd->len = can_cc_dlc2len(FIELD_GET(MCP251XFD_OBJ_FLAGS_DLC, - hw_rx_obj->flags)); + can_frame_set_cc_len((struct can_frame *)cfd, dlc, + priv->can.ctrlmode); } - memcpy(cfd->data, hw_rx_obj->data, cfd->len); + if (!(hw_rx_obj->flags & MCP251XFD_OBJ_FLAGS_RTR)) + memcpy(cfd->data, hw_rx_obj->data, cfd->len); } static int @@ -1510,12 +1523,13 @@ mcp251xfd_rx_obj_read(const struct mcp251xfd_priv *priv, struct mcp251xfd_hw_rx_obj_canfd *hw_rx_obj, const u8 offset, const u8 len) { + const int val_bytes = regmap_get_val_bytes(priv->map_rx); int err; err = regmap_bulk_read(priv->map_rx, mcp251xfd_get_rx_obj_addr(ring, offset), hw_rx_obj, - len * ring->obj_size / sizeof(u32)); + len * ring->obj_size / val_bytes); return err; } @@ -2137,6 +2151,7 @@ static int mcp251xfd_handle_spicrcif(struct mcp251xfd_priv *priv) static irqreturn_t mcp251xfd_irq(int irq, void *dev_id) { struct mcp251xfd_priv *priv = dev_id; + const int val_bytes = regmap_get_val_bytes(priv->map_reg); irqreturn_t handled = IRQ_NONE; int err; @@ -2162,7 +2177,7 @@ static irqreturn_t mcp251xfd_irq(int irq, void *dev_id) err = regmap_bulk_read(priv->map_reg, MCP251XFD_REG_INT, &priv->regs_status, sizeof(priv->regs_status) / - sizeof(u32)); + val_bytes); if (err) goto out_fail; @@ -2300,7 +2315,7 @@ mcp251xfd_tx_obj_from_skb(const struct mcp251xfd_priv *priv, union mcp251xfd_tx_obj_load_buf *load_buf; u8 dlc; u32 id, flags; - int offset, len; + int len_sanitized = 0, len; if (cfd->can_id & CAN_EFF_FLAG) { u32 sid, eid; @@ -2321,12 +2336,12 @@ mcp251xfd_tx_obj_from_skb(const struct mcp251xfd_priv *priv, * harm, only the lower 7 bits will be transferred into the * TEF object. */ - dlc = can_fd_len2dlc(cfd->len); - flags |= FIELD_PREP(MCP251XFD_OBJ_FLAGS_SEQ_MCP2518FD_MASK, seq) | - FIELD_PREP(MCP251XFD_OBJ_FLAGS_DLC, dlc); + flags |= FIELD_PREP(MCP251XFD_OBJ_FLAGS_SEQ_MCP2518FD_MASK, seq); if (cfd->can_id & CAN_RTR_FLAG) flags |= MCP251XFD_OBJ_FLAGS_RTR; + else + len_sanitized = canfd_sanitize_len(cfd->len); /* CANFD */ if (can_is_canfd_skb(skb)) { @@ -2337,8 +2352,15 @@ mcp251xfd_tx_obj_from_skb(const struct mcp251xfd_priv *priv, if (cfd->flags & CANFD_BRS) flags |= MCP251XFD_OBJ_FLAGS_BRS; + + dlc = can_fd_len2dlc(cfd->len); + } else { + dlc = can_get_cc_dlc((struct can_frame *)cfd, + priv->can.ctrlmode); } + flags |= FIELD_PREP(MCP251XFD_OBJ_FLAGS_DLC, dlc); + load_buf = &tx_obj->buf; if (priv->devtype_data.quirks & MCP251XFD_QUIRK_CRC_TX) hw_tx_obj = &load_buf->crc.hw_tx_obj; @@ -2348,17 +2370,22 @@ mcp251xfd_tx_obj_from_skb(const struct mcp251xfd_priv *priv, put_unaligned_le32(id, &hw_tx_obj->id); put_unaligned_le32(flags, &hw_tx_obj->flags); - /* Clear data at end of CAN frame */ - offset = round_down(cfd->len, sizeof(u32)); - len = round_up(can_fd_dlc2len(dlc), sizeof(u32)) - offset; - if (MCP251XFD_SANITIZE_CAN && len) - memset(hw_tx_obj->data + offset, 0x0, len); + /* Copy data */ memcpy(hw_tx_obj->data, cfd->data, cfd->len); + /* Clear unused data at end of CAN frame */ + if (MCP251XFD_SANITIZE_CAN && len_sanitized) { + int pad_len; + + pad_len = len_sanitized - cfd->len; + if (pad_len) + memset(hw_tx_obj->data + cfd->len, 0x0, pad_len); + } + /* Number of bytes to be written into the RAM of the controller */ len = sizeof(hw_tx_obj->id) + sizeof(hw_tx_obj->flags); if (MCP251XFD_SANITIZE_CAN) - len += round_up(can_fd_dlc2len(dlc), sizeof(u32)); + len += round_up(len_sanitized, sizeof(u32)); else len += round_up(cfd->len, sizeof(u32)); @@ -2418,6 +2445,7 @@ static netdev_tx_t mcp251xfd_start_xmit(struct sk_buff *skb, struct mcp251xfd_priv *priv = netdev_priv(ndev); struct mcp251xfd_tx_ring *tx_ring = priv->tx; struct mcp251xfd_tx_obj *tx_obj; + unsigned int frame_len; u8 tx_head; int err; @@ -2433,10 +2461,12 @@ static netdev_tx_t mcp251xfd_start_xmit(struct sk_buff *skb, /* Stop queue if we occupy the complete TX FIFO */ tx_head = mcp251xfd_get_tx_head(tx_ring); tx_ring->head++; - if (tx_ring->head - tx_ring->tail >= tx_ring->obj_num) + if (mcp251xfd_get_tx_free(tx_ring) == 0) netif_stop_queue(ndev); - can_put_echo_skb(skb, ndev, tx_head, 0); + frame_len = can_skb_get_frame_len(skb); + can_put_echo_skb(skb, ndev, tx_head, frame_len); + netdev_sent_queue(priv->ndev, frame_len); err = mcp251xfd_tx_obj_write(priv, tx_obj); if (err) @@ -2885,7 +2915,8 @@ static int mcp251xfd_probe(struct spi_device *spi) priv->can.data_bittiming_const = &mcp251xfd_data_bittiming_const; priv->can.ctrlmode_supported = CAN_CTRLMODE_LOOPBACK | CAN_CTRLMODE_LISTENONLY | CAN_CTRLMODE_BERR_REPORTING | - CAN_CTRLMODE_FD | CAN_CTRLMODE_FD_NON_ISO; + CAN_CTRLMODE_FD | CAN_CTRLMODE_FD_NON_ISO | + CAN_CTRLMODE_CC_LEN8_DLC; priv->ndev = ndev; priv->spi = spi; priv->rx_int = rx_int; diff --git a/drivers/net/can/usb/mcba_usb.c b/drivers/net/can/usb/mcba_usb.c index 4232a7126c1b..1f649d178010 100644 --- a/drivers/net/can/usb/mcba_usb.c +++ b/drivers/net/can/usb/mcba_usb.c @@ -466,7 +466,7 @@ static void mcba_usb_process_ka_usb(struct mcba_priv *priv, struct mcba_usb_msg_ka_usb *msg) { if (unlikely(priv->usb_ka_first_pass)) { - netdev_info(priv->netdev, "PIC USB version %hhu.%hhu\n", + netdev_info(priv->netdev, "PIC USB version %u.%u\n", msg->soft_ver_major, msg->soft_ver_minor); priv->usb_ka_first_pass = false; @@ -492,7 +492,7 @@ static void mcba_usb_process_ka_can(struct mcba_priv *priv, struct mcba_usb_msg_ka_can *msg) { if (unlikely(priv->can_ka_first_pass)) { - netdev_info(priv->netdev, "PIC CAN version %hhu.%hhu\n", + netdev_info(priv->netdev, "PIC CAN version %u.%u\n", msg->soft_ver_major, msg->soft_ver_minor); priv->can_ka_first_pass = false; @@ -554,7 +554,7 @@ static void mcba_usb_process_rx(struct mcba_priv *priv, break; default: - netdev_warn(priv->netdev, "Unsupported msg (0x%hhX)", + netdev_warn(priv->netdev, "Unsupported msg (0x%X)", msg->cmd_id); break; } diff --git a/drivers/net/dsa/hirschmann/hellcreek.c b/drivers/net/dsa/hirschmann/hellcreek.c index 9a1921e653e8..4cc51fb37e67 100644 --- a/drivers/net/dsa/hirschmann/hellcreek.c +++ b/drivers/net/dsa/hirschmann/hellcreek.c @@ -3,7 +3,7 @@ * DSA driver for: * Hirschmann Hellcreek TSN switch. * - * Copyright (C) 2019,2020 Linutronix GmbH + * Copyright (C) 2019-2021 Linutronix GmbH * Author Kurt Kanzenbach <kurt@linutronix.de> */ @@ -153,6 +153,13 @@ static void hellcreek_select_vlan(struct hellcreek *hellcreek, int vid, hellcreek_write(hellcreek, val, HR_VIDCFG); } +static void hellcreek_select_tgd(struct hellcreek *hellcreek, int port) +{ + u16 val = port << TR_TGDSEL_TDGSEL_SHIFT; + + hellcreek_write(hellcreek, val, TR_TGDSEL); +} + static int hellcreek_wait_until_ready(struct hellcreek *hellcreek) { u16 val; @@ -1125,6 +1132,296 @@ out: return ret; } +static void hellcreek_setup_gcl(struct hellcreek *hellcreek, int port, + const struct tc_taprio_qopt_offload *schedule) +{ + const struct tc_taprio_sched_entry *cur, *initial, *next; + size_t i; + + cur = initial = &schedule->entries[0]; + next = cur + 1; + + for (i = 1; i <= schedule->num_entries; ++i) { + u16 data; + u8 gates; + + cur++; + next++; + + if (i == schedule->num_entries) + gates = initial->gate_mask ^ + cur->gate_mask; + else + gates = next->gate_mask ^ + cur->gate_mask; + + data = gates; + + if (i == schedule->num_entries) + data |= TR_GCLDAT_GCLWRLAST; + + /* Gates states */ + hellcreek_write(hellcreek, data, TR_GCLDAT); + + /* Time interval */ + hellcreek_write(hellcreek, + cur->interval & 0x0000ffff, + TR_GCLTIL); + hellcreek_write(hellcreek, + (cur->interval & 0xffff0000) >> 16, + TR_GCLTIH); + + /* Commit entry */ + data = ((i - 1) << TR_GCLCMD_GCLWRADR_SHIFT) | + (initial->gate_mask << + TR_GCLCMD_INIT_GATE_STATES_SHIFT); + hellcreek_write(hellcreek, data, TR_GCLCMD); + } +} + +static void hellcreek_set_cycle_time(struct hellcreek *hellcreek, + const struct tc_taprio_qopt_offload *schedule) +{ + u32 cycle_time = schedule->cycle_time; + + hellcreek_write(hellcreek, cycle_time & 0x0000ffff, TR_CTWRL); + hellcreek_write(hellcreek, (cycle_time & 0xffff0000) >> 16, TR_CTWRH); +} + +static void hellcreek_switch_schedule(struct hellcreek *hellcreek, + ktime_t start_time) +{ + struct timespec64 ts = ktime_to_timespec64(start_time); + + /* Start schedule at this point of time */ + hellcreek_write(hellcreek, ts.tv_nsec & 0x0000ffff, TR_ESTWRL); + hellcreek_write(hellcreek, (ts.tv_nsec & 0xffff0000) >> 16, TR_ESTWRH); + + /* Arm timer, set seconds and switch schedule */ + hellcreek_write(hellcreek, TR_ESTCMD_ESTARM | TR_ESTCMD_ESTSWCFG | + ((ts.tv_sec & TR_ESTCMD_ESTSEC_MASK) << + TR_ESTCMD_ESTSEC_SHIFT), TR_ESTCMD); +} + +static bool hellcreek_schedule_startable(struct hellcreek *hellcreek, int port) +{ + struct hellcreek_port *hellcreek_port = &hellcreek->ports[port]; + s64 base_time_ns, current_ns; + + /* The switch allows a schedule to be started only eight seconds within + * the future. Therefore, check the current PTP time if the schedule is + * startable or not. + */ + + /* Use the "cached" time. That should be alright, as it's updated quite + * frequently in the PTP code. + */ + mutex_lock(&hellcreek->ptp_lock); + current_ns = hellcreek->seconds * NSEC_PER_SEC + hellcreek->last_ts; + mutex_unlock(&hellcreek->ptp_lock); + + /* Calculate difference to admin base time */ + base_time_ns = ktime_to_ns(hellcreek_port->current_schedule->base_time); + + return base_time_ns - current_ns < (s64)8 * NSEC_PER_SEC; +} + +static void hellcreek_start_schedule(struct hellcreek *hellcreek, int port) +{ + struct hellcreek_port *hellcreek_port = &hellcreek->ports[port]; + ktime_t base_time, current_time; + s64 current_ns; + u32 cycle_time; + + /* First select port */ + hellcreek_select_tgd(hellcreek, port); + + /* Forward base time into the future if needed */ + mutex_lock(&hellcreek->ptp_lock); + current_ns = hellcreek->seconds * NSEC_PER_SEC + hellcreek->last_ts; + mutex_unlock(&hellcreek->ptp_lock); + + current_time = ns_to_ktime(current_ns); + base_time = hellcreek_port->current_schedule->base_time; + cycle_time = hellcreek_port->current_schedule->cycle_time; + + if (ktime_compare(current_time, base_time) > 0) { + s64 n; + + n = div64_s64(ktime_sub_ns(current_time, base_time), + cycle_time); + base_time = ktime_add_ns(base_time, (n + 1) * cycle_time); + } + + /* Set admin base time and switch schedule */ + hellcreek_switch_schedule(hellcreek, base_time); + + taprio_offload_free(hellcreek_port->current_schedule); + hellcreek_port->current_schedule = NULL; + + dev_dbg(hellcreek->dev, "Armed EST timer for port %d\n", + hellcreek_port->port); +} + +static void hellcreek_check_schedule(struct work_struct *work) +{ + struct delayed_work *dw = to_delayed_work(work); + struct hellcreek_port *hellcreek_port; + struct hellcreek *hellcreek; + bool startable; + + hellcreek_port = dw_to_hellcreek_port(dw); + hellcreek = hellcreek_port->hellcreek; + + mutex_lock(&hellcreek->reg_lock); + + /* Check starting time */ + startable = hellcreek_schedule_startable(hellcreek, + hellcreek_port->port); + if (startable) { + hellcreek_start_schedule(hellcreek, hellcreek_port->port); + mutex_unlock(&hellcreek->reg_lock); + return; + } + + mutex_unlock(&hellcreek->reg_lock); + + /* Reschedule */ + schedule_delayed_work(&hellcreek_port->schedule_work, + HELLCREEK_SCHEDULE_PERIOD); +} + +static int hellcreek_port_set_schedule(struct dsa_switch *ds, int port, + struct tc_taprio_qopt_offload *taprio) +{ + struct hellcreek *hellcreek = ds->priv; + struct hellcreek_port *hellcreek_port; + bool startable; + u16 ctrl; + + hellcreek_port = &hellcreek->ports[port]; + + dev_dbg(hellcreek->dev, "Configure traffic schedule on port %d\n", + port); + + /* First cancel delayed work */ + cancel_delayed_work_sync(&hellcreek_port->schedule_work); + + mutex_lock(&hellcreek->reg_lock); + + if (hellcreek_port->current_schedule) { + taprio_offload_free(hellcreek_port->current_schedule); + hellcreek_port->current_schedule = NULL; + } + hellcreek_port->current_schedule = taprio_offload_get(taprio); + + /* Then select port */ + hellcreek_select_tgd(hellcreek, port); + + /* Enable gating and keep defaults */ + ctrl = (0xff << TR_TGDCTRL_ADMINGATESTATES_SHIFT) | TR_TGDCTRL_GATE_EN; + hellcreek_write(hellcreek, ctrl, TR_TGDCTRL); + + /* Cancel pending schedule */ + hellcreek_write(hellcreek, 0x00, TR_ESTCMD); + + /* Setup a new schedule */ + hellcreek_setup_gcl(hellcreek, port, hellcreek_port->current_schedule); + + /* Configure cycle time */ + hellcreek_set_cycle_time(hellcreek, hellcreek_port->current_schedule); + + /* Check starting time */ + startable = hellcreek_schedule_startable(hellcreek, port); + if (startable) { + hellcreek_start_schedule(hellcreek, port); + mutex_unlock(&hellcreek->reg_lock); + return 0; + } + + mutex_unlock(&hellcreek->reg_lock); + + /* Schedule periodic schedule check */ + schedule_delayed_work(&hellcreek_port->schedule_work, + HELLCREEK_SCHEDULE_PERIOD); + + return 0; +} + +static int hellcreek_port_del_schedule(struct dsa_switch *ds, int port) +{ + struct hellcreek *hellcreek = ds->priv; + struct hellcreek_port *hellcreek_port; + + hellcreek_port = &hellcreek->ports[port]; + + dev_dbg(hellcreek->dev, "Remove traffic schedule on port %d\n", port); + + /* First cancel delayed work */ + cancel_delayed_work_sync(&hellcreek_port->schedule_work); + + mutex_lock(&hellcreek->reg_lock); + + if (hellcreek_port->current_schedule) { + taprio_offload_free(hellcreek_port->current_schedule); + hellcreek_port->current_schedule = NULL; + } + + /* Then select port */ + hellcreek_select_tgd(hellcreek, port); + + /* Disable gating and return to regular switching flow */ + hellcreek_write(hellcreek, 0xff << TR_TGDCTRL_ADMINGATESTATES_SHIFT, + TR_TGDCTRL); + + mutex_unlock(&hellcreek->reg_lock); + + return 0; +} + +static bool hellcreek_validate_schedule(struct hellcreek *hellcreek, + struct tc_taprio_qopt_offload *schedule) +{ + size_t i; + + /* Does this hellcreek version support Qbv in hardware? */ + if (!hellcreek->pdata->qbv_support) + return false; + + /* cycle time can only be 32bit */ + if (schedule->cycle_time > (u32)-1) + return false; + + /* cycle time extension is not supported */ + if (schedule->cycle_time_extension) + return false; + + /* Only set command is supported */ + for (i = 0; i < schedule->num_entries; ++i) + if (schedule->entries[i].command != TC_TAPRIO_CMD_SET_GATES) + return false; + + return true; +} + +static int hellcreek_port_setup_tc(struct dsa_switch *ds, int port, + enum tc_setup_type type, void *type_data) +{ + struct tc_taprio_qopt_offload *taprio = type_data; + struct hellcreek *hellcreek = ds->priv; + + if (type != TC_SETUP_QDISC_TAPRIO) + return -EOPNOTSUPP; + + if (!hellcreek_validate_schedule(hellcreek, taprio)) + return -EOPNOTSUPP; + + if (taprio->enable) + return hellcreek_port_set_schedule(ds, port, taprio); + + return hellcreek_port_del_schedule(ds, port); +} + static const struct dsa_switch_ops hellcreek_ds_ops = { .get_ethtool_stats = hellcreek_get_ethtool_stats, .get_sset_count = hellcreek_get_sset_count, @@ -1143,6 +1440,7 @@ static const struct dsa_switch_ops hellcreek_ds_ops = { .port_hwtstamp_get = hellcreek_port_hwtstamp_get, .port_prechangeupper = hellcreek_port_prechangeupper, .port_rxtstamp = hellcreek_port_rxtstamp, + .port_setup_tc = hellcreek_port_setup_tc, .port_stp_state_set = hellcreek_port_stp_state_set, .port_txtstamp = hellcreek_port_txtstamp, .port_vlan_add = hellcreek_vlan_add, @@ -1197,6 +1495,9 @@ static int hellcreek_probe(struct platform_device *pdev) port->hellcreek = hellcreek; port->port = i; + + INIT_DELAYED_WORK(&port->schedule_work, + hellcreek_check_schedule); } mutex_init(&hellcreek->reg_lock); diff --git a/drivers/net/dsa/hirschmann/hellcreek.h b/drivers/net/dsa/hirschmann/hellcreek.h index e81781ebc31c..854639f87247 100644 --- a/drivers/net/dsa/hirschmann/hellcreek.h +++ b/drivers/net/dsa/hirschmann/hellcreek.h @@ -3,7 +3,7 @@ * DSA driver for: * Hirschmann Hellcreek TSN switch. * - * Copyright (C) 2019,2020 Linutronix GmbH + * Copyright (C) 2019-2021 Linutronix GmbH * Author Kurt Kanzenbach <kurt@linutronix.de> */ @@ -21,6 +21,7 @@ #include <linux/ptp_clock_kernel.h> #include <linux/timecounter.h> #include <net/dsa.h> +#include <net/pkt_sched.h> /* Ports: * - 0: CPU @@ -246,6 +247,10 @@ struct hellcreek_port { /* Per-port timestamping resources */ struct hellcreek_port_hwtstamp port_hwtstamp; + + /* Per-port Qbv schedule information */ + struct tc_taprio_qopt_offload *current_schedule; + struct delayed_work schedule_work; }; struct hellcreek_fdb_entry { @@ -283,4 +288,14 @@ struct hellcreek { size_t fdb_entries; }; +/* A Qbv schedule can only started up to 8 seconds in the future. If the delta + * between the base time and the current ptp time is larger than 8 seconds, then + * use periodic work to check for the schedule to be started. The delayed work + * cannot be armed directly to $base_time - 8 + X, because for large deltas the + * PTP frequency matters. + */ +#define HELLCREEK_SCHEDULE_PERIOD (2 * HZ) +#define dw_to_hellcreek_port(dw) \ + container_of(dw, struct hellcreek_port, schedule_work) + #endif /* _HELLCREEK_H_ */ diff --git a/drivers/net/dsa/mt7530.c b/drivers/net/dsa/mt7530.c index d2196197d920..eb13ba79dd01 100644 --- a/drivers/net/dsa/mt7530.c +++ b/drivers/net/dsa/mt7530.c @@ -18,6 +18,7 @@ #include <linux/regulator/consumer.h> #include <linux/reset.h> #include <linux/gpio/consumer.h> +#include <linux/gpio/driver.h> #include <net/dsa.h> #include "mt7530.h" @@ -1622,6 +1623,109 @@ mtk_get_tag_protocol(struct dsa_switch *ds, int port, } } +static inline u32 +mt7530_gpio_to_bit(unsigned int offset) +{ + /* Map GPIO offset to register bit + * [ 2: 0] port 0 LED 0..2 as GPIO 0..2 + * [ 6: 4] port 1 LED 0..2 as GPIO 3..5 + * [10: 8] port 2 LED 0..2 as GPIO 6..8 + * [14:12] port 3 LED 0..2 as GPIO 9..11 + * [18:16] port 4 LED 0..2 as GPIO 12..14 + */ + return BIT(offset + offset / 3); +} + +static int +mt7530_gpio_get(struct gpio_chip *gc, unsigned int offset) +{ + struct mt7530_priv *priv = gpiochip_get_data(gc); + u32 bit = mt7530_gpio_to_bit(offset); + + return !!(mt7530_read(priv, MT7530_LED_GPIO_DATA) & bit); +} + +static void +mt7530_gpio_set(struct gpio_chip *gc, unsigned int offset, int value) +{ + struct mt7530_priv *priv = gpiochip_get_data(gc); + u32 bit = mt7530_gpio_to_bit(offset); + + if (value) + mt7530_set(priv, MT7530_LED_GPIO_DATA, bit); + else + mt7530_clear(priv, MT7530_LED_GPIO_DATA, bit); +} + +static int +mt7530_gpio_get_direction(struct gpio_chip *gc, unsigned int offset) +{ + struct mt7530_priv *priv = gpiochip_get_data(gc); + u32 bit = mt7530_gpio_to_bit(offset); + + return (mt7530_read(priv, MT7530_LED_GPIO_DIR) & bit) ? + GPIO_LINE_DIRECTION_OUT : GPIO_LINE_DIRECTION_IN; +} + +static int +mt7530_gpio_direction_input(struct gpio_chip *gc, unsigned int offset) +{ + struct mt7530_priv *priv = gpiochip_get_data(gc); + u32 bit = mt7530_gpio_to_bit(offset); + + mt7530_clear(priv, MT7530_LED_GPIO_OE, bit); + mt7530_clear(priv, MT7530_LED_GPIO_DIR, bit); + + return 0; +} + +static int +mt7530_gpio_direction_output(struct gpio_chip *gc, unsigned int offset, int value) +{ + struct mt7530_priv *priv = gpiochip_get_data(gc); + u32 bit = mt7530_gpio_to_bit(offset); + + mt7530_set(priv, MT7530_LED_GPIO_DIR, bit); + + if (value) + mt7530_set(priv, MT7530_LED_GPIO_DATA, bit); + else + mt7530_clear(priv, MT7530_LED_GPIO_DATA, bit); + + mt7530_set(priv, MT7530_LED_GPIO_OE, bit); + + return 0; +} + +static int +mt7530_setup_gpio(struct mt7530_priv *priv) +{ + struct device *dev = priv->dev; + struct gpio_chip *gc; + + gc = devm_kzalloc(dev, sizeof(*gc), GFP_KERNEL); + if (!gc) + return -ENOMEM; + + mt7530_write(priv, MT7530_LED_GPIO_OE, 0); + mt7530_write(priv, MT7530_LED_GPIO_DIR, 0); + mt7530_write(priv, MT7530_LED_IO_MODE, 0); + + gc->label = "mt7530"; + gc->parent = dev; + gc->owner = THIS_MODULE; + gc->get_direction = mt7530_gpio_get_direction; + gc->direction_input = mt7530_gpio_direction_input; + gc->direction_output = mt7530_gpio_direction_output; + gc->get = mt7530_gpio_get; + gc->set = mt7530_gpio_set; + gc->base = -1; + gc->ngpio = 15; + gc->can_sleep = true; + + return devm_gpiochip_add_data(dev, gc, priv); +} + static int mt7530_setup(struct dsa_switch *ds) { @@ -1763,6 +1867,12 @@ mt7530_setup(struct dsa_switch *ds) } } + if (of_property_read_bool(priv->dev->of_node, "gpio-controller")) { + ret = mt7530_setup_gpio(priv); + if (ret) + return ret; + } + mt7530_setup_port5(ds, interface); /* Flush the FDB table */ diff --git a/drivers/net/dsa/mt7530.h b/drivers/net/dsa/mt7530.h index 32d8969b3ace..64a9bb377e15 100644 --- a/drivers/net/dsa/mt7530.h +++ b/drivers/net/dsa/mt7530.h @@ -554,6 +554,26 @@ enum mt7531_clk_skew { #define MT7531_GPIO12_RG_RXD3_MASK GENMASK(19, 16) #define MT7531_EXT_P_MDIO_12 (2 << 16) +/* Registers for LED GPIO control (MT7530 only) + * All registers follow this pattern: + * [ 2: 0] port 0 + * [ 6: 4] port 1 + * [10: 8] port 2 + * [14:12] port 3 + * [18:16] port 4 + */ + +/* LED enable, 0: Disable, 1: Enable (Default) */ +#define MT7530_LED_EN 0x7d00 +/* LED mode, 0: GPIO mode, 1: PHY mode (Default) */ +#define MT7530_LED_IO_MODE 0x7d04 +/* GPIO direction, 0: Input, 1: Output */ +#define MT7530_LED_GPIO_DIR 0x7d10 +/* GPIO output enable, 0: Disable, 1: Enable */ +#define MT7530_LED_GPIO_OE 0x7d14 +/* GPIO value, 0: Low, 1: High */ +#define MT7530_LED_GPIO_DATA 0x7d18 + #define MT7530_CREV 0x7ffc #define CHIP_NAME_SHIFT 16 #define MT7530_ID 0x7530 diff --git a/drivers/net/dsa/mv88e6xxx/Kconfig b/drivers/net/dsa/mv88e6xxx/Kconfig index 51185e4d7d15..b17540926c11 100644 --- a/drivers/net/dsa/mv88e6xxx/Kconfig +++ b/drivers/net/dsa/mv88e6xxx/Kconfig @@ -25,7 +25,6 @@ config NET_DSA_MV88E6XXX_PTP default n depends on NET_DSA_MV88E6XXX_GLOBAL2 depends on PTP_1588_CLOCK - imply NETWORK_PHY_TIMESTAMPING help Say Y to enable PTP hardware timestamping on Marvell 88E6xxx switch chips that support it. diff --git a/drivers/net/dsa/mv88e6xxx/chip.c b/drivers/net/dsa/mv88e6xxx/chip.c index 2f976050a0d7..514364947944 100644 --- a/drivers/net/dsa/mv88e6xxx/chip.c +++ b/drivers/net/dsa/mv88e6xxx/chip.c @@ -4049,8 +4049,8 @@ static const struct mv88e6xxx_ops mv88e6250_ops = { .mgmt_rsvd2cpu = mv88e6352_g2_mgmt_rsvd2cpu, .pot_clear = mv88e6xxx_g2_pot_clear, .reset = mv88e6250_g1_reset, - .vtu_getnext = mv88e6250_g1_vtu_getnext, - .vtu_loadpurge = mv88e6250_g1_vtu_loadpurge, + .vtu_getnext = mv88e6185_g1_vtu_getnext, + .vtu_loadpurge = mv88e6185_g1_vtu_loadpurge, .avb_ops = &mv88e6352_avb_ops, .ptp_ops = &mv88e6250_ptp_ops, .phylink_validate = mv88e6065_phylink_validate, diff --git a/drivers/net/dsa/mv88e6xxx/global1.h b/drivers/net/dsa/mv88e6xxx/global1.h index 80a182c5b98a..7c396964d0b2 100644 --- a/drivers/net/dsa/mv88e6xxx/global1.h +++ b/drivers/net/dsa/mv88e6xxx/global1.h @@ -336,10 +336,6 @@ int mv88e6185_g1_vtu_getnext(struct mv88e6xxx_chip *chip, struct mv88e6xxx_vtu_entry *entry); int mv88e6185_g1_vtu_loadpurge(struct mv88e6xxx_chip *chip, struct mv88e6xxx_vtu_entry *entry); -int mv88e6250_g1_vtu_getnext(struct mv88e6xxx_chip *chip, - struct mv88e6xxx_vtu_entry *entry); -int mv88e6250_g1_vtu_loadpurge(struct mv88e6xxx_chip *chip, - struct mv88e6xxx_vtu_entry *entry); int mv88e6352_g1_vtu_getnext(struct mv88e6xxx_chip *chip, struct mv88e6xxx_vtu_entry *entry); int mv88e6352_g1_vtu_loadpurge(struct mv88e6xxx_chip *chip, diff --git a/drivers/net/dsa/mv88e6xxx/global1_vtu.c b/drivers/net/dsa/mv88e6xxx/global1_vtu.c index 7b96396be609..ae12c981923e 100644 --- a/drivers/net/dsa/mv88e6xxx/global1_vtu.c +++ b/drivers/net/dsa/mv88e6xxx/global1_vtu.c @@ -336,39 +336,6 @@ int mv88e6xxx_g1_vtu_getnext(struct mv88e6xxx_chip *chip, return mv88e6xxx_g1_vtu_vid_read(chip, entry); } -int mv88e6250_g1_vtu_getnext(struct mv88e6xxx_chip *chip, - struct mv88e6xxx_vtu_entry *entry) -{ - u16 val; - int err; - - err = mv88e6xxx_g1_vtu_getnext(chip, entry); - if (err) - return err; - - if (entry->valid) { - err = mv88e6185_g1_vtu_data_read(chip, entry); - if (err) - return err; - - err = mv88e6185_g1_stu_data_read(chip, entry); - if (err) - return err; - - /* VTU DBNum[3:0] are located in VTU Operation 3:0 - * VTU DBNum[5:4] are located in VTU Operation 9:8 - */ - err = mv88e6xxx_g1_read(chip, MV88E6XXX_G1_VTU_OP, &val); - if (err) - return err; - - entry->fid = val & 0x000f; - entry->fid |= (val & 0x0300) >> 4; - } - - return 0; -} - int mv88e6185_g1_vtu_getnext(struct mv88e6xxx_chip *chip, struct mv88e6xxx_vtu_entry *entry) { @@ -389,7 +356,7 @@ int mv88e6185_g1_vtu_getnext(struct mv88e6xxx_chip *chip, return err; /* VTU DBNum[3:0] are located in VTU Operation 3:0 - * VTU DBNum[7:4] are located in VTU Operation 11:8 + * VTU DBNum[7:4] ([5:4] for 6250) are located in VTU Operation 11:8 (9:8) */ err = mv88e6xxx_g1_read(chip, MV88E6XXX_G1_VTU_OP, &val); if (err) @@ -397,6 +364,7 @@ int mv88e6185_g1_vtu_getnext(struct mv88e6xxx_chip *chip, entry->fid = val & 0x000f; entry->fid |= (val & 0x0f00) >> 4; + entry->fid &= mv88e6xxx_num_databases(chip) - 1; } return 0; @@ -466,35 +434,6 @@ int mv88e6390_g1_vtu_getnext(struct mv88e6xxx_chip *chip, return 0; } -int mv88e6250_g1_vtu_loadpurge(struct mv88e6xxx_chip *chip, - struct mv88e6xxx_vtu_entry *entry) -{ - u16 op = MV88E6XXX_G1_VTU_OP_VTU_LOAD_PURGE; - int err; - - err = mv88e6xxx_g1_vtu_op_wait(chip); - if (err) - return err; - - err = mv88e6xxx_g1_vtu_vid_write(chip, entry); - if (err) - return err; - - if (entry->valid) { - err = mv88e6185_g1_vtu_data_write(chip, entry); - if (err) - return err; - - /* VTU DBNum[3:0] are located in VTU Operation 3:0 - * VTU DBNum[5:4] are located in VTU Operation 9:8 - */ - op |= entry->fid & 0x000f; - op |= (entry->fid & 0x0030) << 4; - } - - return mv88e6xxx_g1_vtu_op(chip, op); -} - int mv88e6185_g1_vtu_loadpurge(struct mv88e6xxx_chip *chip, struct mv88e6xxx_vtu_entry *entry) { @@ -516,6 +455,10 @@ int mv88e6185_g1_vtu_loadpurge(struct mv88e6xxx_chip *chip, /* VTU DBNum[3:0] are located in VTU Operation 3:0 * VTU DBNum[7:4] are located in VTU Operation 11:8 + * + * For the 6250/6220, the latter are really [5:4] and + * 9:8, but in those cases bits 7:6 of entry->fid are + * 0 since they have num_databases = 64. */ op |= entry->fid & 0x000f; op |= (entry->fid & 0x00f0) << 4; diff --git a/drivers/net/ethernet/Kconfig b/drivers/net/ethernet/Kconfig index de50e8b9e656..ad04660b97b8 100644 --- a/drivers/net/ethernet/Kconfig +++ b/drivers/net/ethernet/Kconfig @@ -33,7 +33,6 @@ source "drivers/net/ethernet/apple/Kconfig" source "drivers/net/ethernet/aquantia/Kconfig" source "drivers/net/ethernet/arc/Kconfig" source "drivers/net/ethernet/atheros/Kconfig" -source "drivers/net/ethernet/aurora/Kconfig" source "drivers/net/ethernet/broadcom/Kconfig" source "drivers/net/ethernet/brocade/Kconfig" source "drivers/net/ethernet/cadence/Kconfig" diff --git a/drivers/net/ethernet/Makefile b/drivers/net/ethernet/Makefile index f8f38dcb5f8a..1e7dc8a7762d 100644 --- a/drivers/net/ethernet/Makefile +++ b/drivers/net/ethernet/Makefile @@ -19,7 +19,6 @@ obj-$(CONFIG_NET_VENDOR_APPLE) += apple/ obj-$(CONFIG_NET_VENDOR_AQUANTIA) += aquantia/ obj-$(CONFIG_NET_VENDOR_ARC) += arc/ obj-$(CONFIG_NET_VENDOR_ATHEROS) += atheros/ -obj-$(CONFIG_NET_VENDOR_AURORA) += aurora/ obj-$(CONFIG_NET_VENDOR_CADENCE) += cadence/ obj-$(CONFIG_NET_VENDOR_BROADCOM) += broadcom/ obj-$(CONFIG_NET_VENDOR_BROCADE) += brocade/ diff --git a/drivers/net/ethernet/aurora/Kconfig b/drivers/net/ethernet/aurora/Kconfig deleted file mode 100644 index 9ee30ea90bfa..000000000000 --- a/drivers/net/ethernet/aurora/Kconfig +++ /dev/null @@ -1,23 +0,0 @@ -# SPDX-License-Identifier: GPL-2.0-only -config NET_VENDOR_AURORA - bool "Aurora VLSI devices" - default y - help - If you have a network (Ethernet) device belonging to this class, - say Y. - - Note that the answer to this question doesn't directly affect the - kernel: saying N will just cause the configurator to skip all - questions about Aurora devices. If you say Y, you will be asked - for your specific device in the following questions. - -if NET_VENDOR_AURORA - -config AURORA_NB8800 - tristate "Aurora AU-NB8800 support" - depends on HAS_DMA - select PHYLIB - help - Support for the AU-NB8800 gigabit Ethernet controller. - -endif diff --git a/drivers/net/ethernet/aurora/Makefile b/drivers/net/ethernet/aurora/Makefile deleted file mode 100644 index f3d599867619..000000000000 --- a/drivers/net/ethernet/aurora/Makefile +++ /dev/null @@ -1,2 +0,0 @@ -# SPDX-License-Identifier: GPL-2.0-only -obj-$(CONFIG_AURORA_NB8800) += nb8800.o diff --git a/drivers/net/ethernet/aurora/nb8800.c b/drivers/net/ethernet/aurora/nb8800.c deleted file mode 100644 index 5b20185cbd62..000000000000 --- a/drivers/net/ethernet/aurora/nb8800.c +++ /dev/null @@ -1,1520 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-or-later -/* - * Copyright (C) 2015 Mans Rullgard <mans@mansr.com> - * - * Mostly rewritten, based on driver from Sigma Designs. Original - * copyright notice below. - * - * Driver for tangox SMP864x/SMP865x/SMP867x/SMP868x builtin Ethernet Mac. - * - * Copyright (C) 2005 Maxime Bizon <mbizon@freebox.fr> - */ - -#include <linux/module.h> -#include <linux/etherdevice.h> -#include <linux/delay.h> -#include <linux/ethtool.h> -#include <linux/interrupt.h> -#include <linux/platform_device.h> -#include <linux/of_device.h> -#include <linux/of_mdio.h> -#include <linux/of_net.h> -#include <linux/dma-mapping.h> -#include <linux/phy.h> -#include <linux/cache.h> -#include <linux/jiffies.h> -#include <linux/io.h> -#include <linux/iopoll.h> -#include <asm/barrier.h> - -#include "nb8800.h" - -static void nb8800_tx_done(struct net_device *dev); -static int nb8800_dma_stop(struct net_device *dev); - -static inline u8 nb8800_readb(struct nb8800_priv *priv, int reg) -{ - return readb_relaxed(priv->base + reg); -} - -static inline u32 nb8800_readl(struct nb8800_priv *priv, int reg) -{ - return readl_relaxed(priv->base + reg); -} - -static inline void nb8800_writeb(struct nb8800_priv *priv, int reg, u8 val) -{ - writeb_relaxed(val, priv->base + reg); -} - -static inline void nb8800_writew(struct nb8800_priv *priv, int reg, u16 val) -{ - writew_relaxed(val, priv->base + reg); -} - -static inline void nb8800_writel(struct nb8800_priv *priv, int reg, u32 val) -{ - writel_relaxed(val, priv->base + reg); -} - -static inline void nb8800_maskb(struct nb8800_priv *priv, int reg, - u32 mask, u32 val) -{ - u32 old = nb8800_readb(priv, reg); - u32 new = (old & ~mask) | (val & mask); - - if (new != old) - nb8800_writeb(priv, reg, new); -} - -static inline void nb8800_maskl(struct nb8800_priv *priv, int reg, - u32 mask, u32 val) -{ - u32 old = nb8800_readl(priv, reg); - u32 new = (old & ~mask) | (val & mask); - - if (new != old) - nb8800_writel(priv, reg, new); -} - -static inline void nb8800_modb(struct nb8800_priv *priv, int reg, u8 bits, - bool set) -{ - nb8800_maskb(priv, reg, bits, set ? bits : 0); -} - -static inline void nb8800_setb(struct nb8800_priv *priv, int reg, u8 bits) -{ - nb8800_maskb(priv, reg, bits, bits); -} - -static inline void nb8800_clearb(struct nb8800_priv *priv, int reg, u8 bits) -{ - nb8800_maskb(priv, reg, bits, 0); -} - -static inline void nb8800_modl(struct nb8800_priv *priv, int reg, u32 bits, - bool set) -{ - nb8800_maskl(priv, reg, bits, set ? bits : 0); -} - -static inline void nb8800_setl(struct nb8800_priv *priv, int reg, u32 bits) -{ - nb8800_maskl(priv, reg, bits, bits); -} - -static inline void nb8800_clearl(struct nb8800_priv *priv, int reg, u32 bits) -{ - nb8800_maskl(priv, reg, bits, 0); -} - -static int nb8800_mdio_wait(struct mii_bus *bus) -{ - struct nb8800_priv *priv = bus->priv; - u32 val; - - return readl_poll_timeout_atomic(priv->base + NB8800_MDIO_CMD, - val, !(val & MDIO_CMD_GO), 1, 1000); -} - -static int nb8800_mdio_cmd(struct mii_bus *bus, u32 cmd) -{ - struct nb8800_priv *priv = bus->priv; - int err; - - err = nb8800_mdio_wait(bus); - if (err) - return err; - - nb8800_writel(priv, NB8800_MDIO_CMD, cmd); - udelay(10); - nb8800_writel(priv, NB8800_MDIO_CMD, cmd | MDIO_CMD_GO); - - return nb8800_mdio_wait(bus); -} - -static int nb8800_mdio_read(struct mii_bus *bus, int phy_id, int reg) -{ - struct nb8800_priv *priv = bus->priv; - u32 val; - int err; - - err = nb8800_mdio_cmd(bus, MDIO_CMD_ADDR(phy_id) | MDIO_CMD_REG(reg)); - if (err) - return err; - - val = nb8800_readl(priv, NB8800_MDIO_STS); - if (val & MDIO_STS_ERR) - return 0xffff; - - return val & 0xffff; -} - -static int nb8800_mdio_write(struct mii_bus *bus, int phy_id, int reg, u16 val) -{ - u32 cmd = MDIO_CMD_ADDR(phy_id) | MDIO_CMD_REG(reg) | - MDIO_CMD_DATA(val) | MDIO_CMD_WR; - - return nb8800_mdio_cmd(bus, cmd); -} - -static void nb8800_mac_tx(struct net_device *dev, bool enable) -{ - struct nb8800_priv *priv = netdev_priv(dev); - - while (nb8800_readl(priv, NB8800_TXC_CR) & TCR_EN) - cpu_relax(); - - nb8800_modb(priv, NB8800_TX_CTL1, TX_EN, enable); -} - -static void nb8800_mac_rx(struct net_device *dev, bool enable) -{ - nb8800_modb(netdev_priv(dev), NB8800_RX_CTL, RX_EN, enable); -} - -static void nb8800_mac_af(struct net_device *dev, bool enable) -{ - nb8800_modb(netdev_priv(dev), NB8800_RX_CTL, RX_AF_EN, enable); -} - -static void nb8800_start_rx(struct net_device *dev) -{ - nb8800_setl(netdev_priv(dev), NB8800_RXC_CR, RCR_EN); -} - -static int nb8800_alloc_rx(struct net_device *dev, unsigned int i, bool napi) -{ - struct nb8800_priv *priv = netdev_priv(dev); - struct nb8800_rx_desc *rxd = &priv->rx_descs[i]; - struct nb8800_rx_buf *rxb = &priv->rx_bufs[i]; - int size = L1_CACHE_ALIGN(RX_BUF_SIZE); - dma_addr_t dma_addr; - struct page *page; - unsigned long offset; - void *data; - - data = napi ? napi_alloc_frag(size) : netdev_alloc_frag(size); - if (!data) - return -ENOMEM; - - page = virt_to_head_page(data); - offset = data - page_address(page); - - dma_addr = dma_map_page(&dev->dev, page, offset, RX_BUF_SIZE, - DMA_FROM_DEVICE); - - if (dma_mapping_error(&dev->dev, dma_addr)) { - skb_free_frag(data); - return -ENOMEM; - } - - rxb->page = page; - rxb->offset = offset; - rxd->desc.s_addr = dma_addr; - - return 0; -} - -static void nb8800_receive(struct net_device *dev, unsigned int i, - unsigned int len) -{ - struct nb8800_priv *priv = netdev_priv(dev); - struct nb8800_rx_desc *rxd = &priv->rx_descs[i]; - struct page *page = priv->rx_bufs[i].page; - int offset = priv->rx_bufs[i].offset; - void *data = page_address(page) + offset; - dma_addr_t dma = rxd->desc.s_addr; - struct sk_buff *skb; - unsigned int size; - int err; - - size = len <= RX_COPYBREAK ? len : RX_COPYHDR; - - skb = napi_alloc_skb(&priv->napi, size); - if (!skb) { - netdev_err(dev, "rx skb allocation failed\n"); - dev->stats.rx_dropped++; - return; - } - - if (len <= RX_COPYBREAK) { - dma_sync_single_for_cpu(&dev->dev, dma, len, DMA_FROM_DEVICE); - skb_put_data(skb, data, len); - dma_sync_single_for_device(&dev->dev, dma, len, - DMA_FROM_DEVICE); - } else { - err = nb8800_alloc_rx(dev, i, true); - if (err) { - netdev_err(dev, "rx buffer allocation failed\n"); - dev->stats.rx_dropped++; - dev_kfree_skb(skb); - return; - } - - dma_unmap_page(&dev->dev, dma, RX_BUF_SIZE, DMA_FROM_DEVICE); - skb_put_data(skb, data, RX_COPYHDR); - skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, page, - offset + RX_COPYHDR, len - RX_COPYHDR, - RX_BUF_SIZE); - } - - skb->protocol = eth_type_trans(skb, dev); - napi_gro_receive(&priv->napi, skb); -} - -static void nb8800_rx_error(struct net_device *dev, u32 report) -{ - if (report & RX_LENGTH_ERR) - dev->stats.rx_length_errors++; - - if (report & RX_FCS_ERR) - dev->stats.rx_crc_errors++; - - if (report & RX_FIFO_OVERRUN) - dev->stats.rx_fifo_errors++; - - if (report & RX_ALIGNMENT_ERROR) - dev->stats.rx_frame_errors++; - - dev->stats.rx_errors++; -} - -static int nb8800_poll(struct napi_struct *napi, int budget) -{ - struct net_device *dev = napi->dev; - struct nb8800_priv *priv = netdev_priv(dev); - struct nb8800_rx_desc *rxd; - unsigned int last = priv->rx_eoc; - unsigned int next; - int work = 0; - - nb8800_tx_done(dev); - -again: - do { - unsigned int len; - - next = (last + 1) % RX_DESC_COUNT; - - rxd = &priv->rx_descs[next]; - - if (!rxd->report) - break; - - len = RX_BYTES_TRANSFERRED(rxd->report); - - if (IS_RX_ERROR(rxd->report)) - nb8800_rx_error(dev, rxd->report); - else - nb8800_receive(dev, next, len); - - dev->stats.rx_packets++; - dev->stats.rx_bytes += len; - - if (rxd->report & RX_MULTICAST_PKT) - dev->stats.multicast++; - - rxd->report = 0; - last = next; - work++; - } while (work < budget); - - if (work) { - priv->rx_descs[last].desc.config |= DESC_EOC; - wmb(); /* ensure new EOC is written before clearing old */ - priv->rx_descs[priv->rx_eoc].desc.config &= ~DESC_EOC; - priv->rx_eoc = last; - nb8800_start_rx(dev); - } - - if (work < budget) { - nb8800_writel(priv, NB8800_RX_ITR, priv->rx_itr_irq); - - /* If a packet arrived after we last checked but - * before writing RX_ITR, the interrupt will be - * delayed, so we retrieve it now. - */ - if (priv->rx_descs[next].report) - goto again; - - napi_complete_done(napi, work); - } - - return work; -} - -static void __nb8800_tx_dma_start(struct net_device *dev) -{ - struct nb8800_priv *priv = netdev_priv(dev); - struct nb8800_tx_buf *txb; - u32 txc_cr; - - txb = &priv->tx_bufs[priv->tx_queue]; - if (!txb->ready) - return; - - txc_cr = nb8800_readl(priv, NB8800_TXC_CR); - if (txc_cr & TCR_EN) - return; - - nb8800_writel(priv, NB8800_TX_DESC_ADDR, txb->dma_desc); - wmb(); /* ensure desc addr is written before starting DMA */ - nb8800_writel(priv, NB8800_TXC_CR, txc_cr | TCR_EN); - - priv->tx_queue = (priv->tx_queue + txb->chain_len) % TX_DESC_COUNT; -} - -static void nb8800_tx_dma_start(struct net_device *dev) -{ - struct nb8800_priv *priv = netdev_priv(dev); - - spin_lock_irq(&priv->tx_lock); - __nb8800_tx_dma_start(dev); - spin_unlock_irq(&priv->tx_lock); -} - -static void nb8800_tx_dma_start_irq(struct net_device *dev) -{ - struct nb8800_priv *priv = netdev_priv(dev); - - spin_lock(&priv->tx_lock); - __nb8800_tx_dma_start(dev); - spin_unlock(&priv->tx_lock); -} - -static netdev_tx_t nb8800_xmit(struct sk_buff *skb, struct net_device *dev) -{ - struct nb8800_priv *priv = netdev_priv(dev); - struct nb8800_tx_desc *txd; - struct nb8800_tx_buf *txb; - struct nb8800_dma_desc *desc; - dma_addr_t dma_addr; - unsigned int dma_len; - unsigned int align; - unsigned int next; - bool xmit_more; - - if (atomic_read(&priv->tx_free) <= NB8800_DESC_LOW) { - netif_stop_queue(dev); - return NETDEV_TX_BUSY; - } - - align = (8 - (uintptr_t)skb->data) & 7; - - dma_len = skb->len - align; - dma_addr = dma_map_single(&dev->dev, skb->data + align, - dma_len, DMA_TO_DEVICE); - - if (dma_mapping_error(&dev->dev, dma_addr)) { - netdev_err(dev, "tx dma mapping error\n"); - kfree_skb(skb); - dev->stats.tx_dropped++; - return NETDEV_TX_OK; - } - - xmit_more = netdev_xmit_more(); - if (atomic_dec_return(&priv->tx_free) <= NB8800_DESC_LOW) { - netif_stop_queue(dev); - xmit_more = false; - } - - next = priv->tx_next; - txb = &priv->tx_bufs[next]; - txd = &priv->tx_descs[next]; - desc = &txd->desc[0]; - - next = (next + 1) % TX_DESC_COUNT; - - if (align) { - memcpy(txd->buf, skb->data, align); - - desc->s_addr = - txb->dma_desc + offsetof(struct nb8800_tx_desc, buf); - desc->n_addr = txb->dma_desc + sizeof(txd->desc[0]); - desc->config = DESC_BTS(2) | DESC_DS | align; - - desc++; - } - - desc->s_addr = dma_addr; - desc->n_addr = priv->tx_bufs[next].dma_desc; - desc->config = DESC_BTS(2) | DESC_DS | DESC_EOF | dma_len; - - if (!xmit_more) - desc->config |= DESC_EOC; - - txb->skb = skb; - txb->dma_addr = dma_addr; - txb->dma_len = dma_len; - - if (!priv->tx_chain) { - txb->chain_len = 1; - priv->tx_chain = txb; - } else { - priv->tx_chain->chain_len++; - } - - netdev_sent_queue(dev, skb->len); - - priv->tx_next = next; - - if (!xmit_more) { - smp_wmb(); - priv->tx_chain->ready = true; - priv->tx_chain = NULL; - nb8800_tx_dma_start(dev); - } - - return NETDEV_TX_OK; -} - -static void nb8800_tx_error(struct net_device *dev, u32 report) -{ - if (report & TX_LATE_COLLISION) - dev->stats.collisions++; - - if (report & TX_PACKET_DROPPED) - dev->stats.tx_dropped++; - - if (report & TX_FIFO_UNDERRUN) - dev->stats.tx_fifo_errors++; - - dev->stats.tx_errors++; -} - -static void nb8800_tx_done(struct net_device *dev) -{ - struct nb8800_priv *priv = netdev_priv(dev); - unsigned int limit = priv->tx_next; - unsigned int done = priv->tx_done; - unsigned int packets = 0; - unsigned int len = 0; - - while (done != limit) { - struct nb8800_tx_desc *txd = &priv->tx_descs[done]; - struct nb8800_tx_buf *txb = &priv->tx_bufs[done]; - struct sk_buff *skb; - - if (!txd->report) - break; - - skb = txb->skb; - len += skb->len; - - dma_unmap_single(&dev->dev, txb->dma_addr, txb->dma_len, - DMA_TO_DEVICE); - - if (IS_TX_ERROR(txd->report)) { - nb8800_tx_error(dev, txd->report); - kfree_skb(skb); - } else { - consume_skb(skb); - } - - dev->stats.tx_packets++; - dev->stats.tx_bytes += TX_BYTES_TRANSFERRED(txd->report); - dev->stats.collisions += TX_EARLY_COLLISIONS(txd->report); - - txb->skb = NULL; - txb->ready = false; - txd->report = 0; - - done = (done + 1) % TX_DESC_COUNT; - packets++; - } - - if (packets) { - smp_mb__before_atomic(); - atomic_add(packets, &priv->tx_free); - netdev_completed_queue(dev, packets, len); - netif_wake_queue(dev); - priv->tx_done = done; - } -} - -static irqreturn_t nb8800_irq(int irq, void *dev_id) -{ - struct net_device *dev = dev_id; - struct nb8800_priv *priv = netdev_priv(dev); - irqreturn_t ret = IRQ_NONE; - u32 val; - - /* tx interrupt */ - val = nb8800_readl(priv, NB8800_TXC_SR); - if (val) { - nb8800_writel(priv, NB8800_TXC_SR, val); - - if (val & TSR_DI) - nb8800_tx_dma_start_irq(dev); - - if (val & TSR_TI) - napi_schedule_irqoff(&priv->napi); - - if (unlikely(val & TSR_DE)) - netdev_err(dev, "TX DMA error\n"); - - /* should never happen with automatic status retrieval */ - if (unlikely(val & TSR_TO)) - netdev_err(dev, "TX Status FIFO overflow\n"); - - ret = IRQ_HANDLED; - } - - /* rx interrupt */ - val = nb8800_readl(priv, NB8800_RXC_SR); - if (val) { - nb8800_writel(priv, NB8800_RXC_SR, val); - - if (likely(val & (RSR_RI | RSR_DI))) { - nb8800_writel(priv, NB8800_RX_ITR, priv->rx_itr_poll); - napi_schedule_irqoff(&priv->napi); - } - - if (unlikely(val & RSR_DE)) - netdev_err(dev, "RX DMA error\n"); - - /* should never happen with automatic status retrieval */ - if (unlikely(val & RSR_RO)) - netdev_err(dev, "RX Status FIFO overflow\n"); - - ret = IRQ_HANDLED; - } - - return ret; -} - -static void nb8800_mac_config(struct net_device *dev) -{ - struct nb8800_priv *priv = netdev_priv(dev); - bool gigabit = priv->speed == SPEED_1000; - u32 mac_mode_mask = RGMII_MODE | HALF_DUPLEX | GMAC_MODE; - u32 mac_mode = 0; - u32 slot_time; - u32 phy_clk; - u32 ict; - - if (!priv->duplex) - mac_mode |= HALF_DUPLEX; - - if (gigabit) { - if (phy_interface_is_rgmii(dev->phydev)) - mac_mode |= RGMII_MODE; - - mac_mode |= GMAC_MODE; - phy_clk = 125000000; - - /* Should be 512 but register is only 8 bits */ - slot_time = 255; - } else { - phy_clk = 25000000; - slot_time = 128; - } - - ict = DIV_ROUND_UP(phy_clk, clk_get_rate(priv->clk)); - - nb8800_writeb(priv, NB8800_IC_THRESHOLD, ict); - nb8800_writeb(priv, NB8800_SLOT_TIME, slot_time); - nb8800_maskb(priv, NB8800_MAC_MODE, mac_mode_mask, mac_mode); -} - -static void nb8800_pause_config(struct net_device *dev) -{ - struct nb8800_priv *priv = netdev_priv(dev); - struct phy_device *phydev = dev->phydev; - u32 rxcr; - - if (priv->pause_aneg) { - if (!phydev || !phydev->link) - return; - - priv->pause_rx = phydev->pause; - priv->pause_tx = phydev->pause ^ phydev->asym_pause; - } - - nb8800_modb(priv, NB8800_RX_CTL, RX_PAUSE_EN, priv->pause_rx); - - rxcr = nb8800_readl(priv, NB8800_RXC_CR); - if (!!(rxcr & RCR_FL) == priv->pause_tx) - return; - - if (netif_running(dev)) { - napi_disable(&priv->napi); - netif_tx_lock_bh(dev); - nb8800_dma_stop(dev); - nb8800_modl(priv, NB8800_RXC_CR, RCR_FL, priv->pause_tx); - nb8800_start_rx(dev); - netif_tx_unlock_bh(dev); - napi_enable(&priv->napi); - } else { - nb8800_modl(priv, NB8800_RXC_CR, RCR_FL, priv->pause_tx); - } -} - -static void nb8800_link_reconfigure(struct net_device *dev) -{ - struct nb8800_priv *priv = netdev_priv(dev); - struct phy_device *phydev = dev->phydev; - int change = 0; - - if (phydev->link) { - if (phydev->speed != priv->speed) { - priv->speed = phydev->speed; - change = 1; - } - - if (phydev->duplex != priv->duplex) { - priv->duplex = phydev->duplex; - change = 1; - } - - if (change) - nb8800_mac_config(dev); - - nb8800_pause_config(dev); - } - - if (phydev->link != priv->link) { - priv->link = phydev->link; - change = 1; - } - - if (change) - phy_print_status(phydev); -} - -static void nb8800_update_mac_addr(struct net_device *dev) -{ - struct nb8800_priv *priv = netdev_priv(dev); - int i; - - for (i = 0; i < ETH_ALEN; i++) - nb8800_writeb(priv, NB8800_SRC_ADDR(i), dev->dev_addr[i]); - - for (i = 0; i < ETH_ALEN; i++) - nb8800_writeb(priv, NB8800_UC_ADDR(i), dev->dev_addr[i]); -} - -static int nb8800_set_mac_address(struct net_device *dev, void *addr) -{ - struct sockaddr *sock = addr; - - if (netif_running(dev)) - return -EBUSY; - - ether_addr_copy(dev->dev_addr, sock->sa_data); - nb8800_update_mac_addr(dev); - - return 0; -} - -static void nb8800_mc_init(struct net_device *dev, int val) -{ - struct nb8800_priv *priv = netdev_priv(dev); - - nb8800_writeb(priv, NB8800_MC_INIT, val); - readb_poll_timeout_atomic(priv->base + NB8800_MC_INIT, val, !val, - 1, 1000); -} - -static void nb8800_set_rx_mode(struct net_device *dev) -{ - struct nb8800_priv *priv = netdev_priv(dev); - struct netdev_hw_addr *ha; - int i; - - if (dev->flags & (IFF_PROMISC | IFF_ALLMULTI)) { - nb8800_mac_af(dev, false); - return; - } - - nb8800_mac_af(dev, true); - nb8800_mc_init(dev, 0); - - netdev_for_each_mc_addr(ha, dev) { - for (i = 0; i < ETH_ALEN; i++) - nb8800_writeb(priv, NB8800_MC_ADDR(i), ha->addr[i]); - - nb8800_mc_init(dev, 0xff); - } -} - -#define RX_DESC_SIZE (RX_DESC_COUNT * sizeof(struct nb8800_rx_desc)) -#define TX_DESC_SIZE (TX_DESC_COUNT * sizeof(struct nb8800_tx_desc)) - -static void nb8800_dma_free(struct net_device *dev) -{ - struct nb8800_priv *priv = netdev_priv(dev); - unsigned int i; - - if (priv->rx_bufs) { - for (i = 0; i < RX_DESC_COUNT; i++) - if (priv->rx_bufs[i].page) - put_page(priv->rx_bufs[i].page); - - kfree(priv->rx_bufs); - priv->rx_bufs = NULL; - } - - if (priv->tx_bufs) { - for (i = 0; i < TX_DESC_COUNT; i++) - kfree_skb(priv->tx_bufs[i].skb); - - kfree(priv->tx_bufs); - priv->tx_bufs = NULL; - } - - if (priv->rx_descs) { - dma_free_coherent(dev->dev.parent, RX_DESC_SIZE, priv->rx_descs, - priv->rx_desc_dma); - priv->rx_descs = NULL; - } - - if (priv->tx_descs) { - dma_free_coherent(dev->dev.parent, TX_DESC_SIZE, priv->tx_descs, - priv->tx_desc_dma); - priv->tx_descs = NULL; - } -} - -static void nb8800_dma_reset(struct net_device *dev) -{ - struct nb8800_priv *priv = netdev_priv(dev); - struct nb8800_rx_desc *rxd; - struct nb8800_tx_desc *txd; - unsigned int i; - - for (i = 0; i < RX_DESC_COUNT; i++) { - dma_addr_t rx_dma = priv->rx_desc_dma + i * sizeof(*rxd); - - rxd = &priv->rx_descs[i]; - rxd->desc.n_addr = rx_dma + sizeof(*rxd); - rxd->desc.r_addr = - rx_dma + offsetof(struct nb8800_rx_desc, report); - rxd->desc.config = priv->rx_dma_config; - rxd->report = 0; - } - - rxd->desc.n_addr = priv->rx_desc_dma; - rxd->desc.config |= DESC_EOC; - - priv->rx_eoc = RX_DESC_COUNT - 1; - - for (i = 0; i < TX_DESC_COUNT; i++) { - struct nb8800_tx_buf *txb = &priv->tx_bufs[i]; - dma_addr_t r_dma = txb->dma_desc + - offsetof(struct nb8800_tx_desc, report); - - txd = &priv->tx_descs[i]; - txd->desc[0].r_addr = r_dma; - txd->desc[1].r_addr = r_dma; - txd->report = 0; - } - - priv->tx_next = 0; - priv->tx_queue = 0; - priv->tx_done = 0; - atomic_set(&priv->tx_free, TX_DESC_COUNT); - - nb8800_writel(priv, NB8800_RX_DESC_ADDR, priv->rx_desc_dma); - - wmb(); /* ensure all setup is written before starting */ -} - -static int nb8800_dma_init(struct net_device *dev) -{ - struct nb8800_priv *priv = netdev_priv(dev); - unsigned int n_rx = RX_DESC_COUNT; - unsigned int n_tx = TX_DESC_COUNT; - unsigned int i; - int err; - - priv->rx_descs = dma_alloc_coherent(dev->dev.parent, RX_DESC_SIZE, - &priv->rx_desc_dma, GFP_KERNEL); - if (!priv->rx_descs) - goto err_out; - - priv->rx_bufs = kcalloc(n_rx, sizeof(*priv->rx_bufs), GFP_KERNEL); - if (!priv->rx_bufs) - goto err_out; - - for (i = 0; i < n_rx; i++) { - err = nb8800_alloc_rx(dev, i, false); - if (err) - goto err_out; - } - - priv->tx_descs = dma_alloc_coherent(dev->dev.parent, TX_DESC_SIZE, - &priv->tx_desc_dma, GFP_KERNEL); - if (!priv->tx_descs) - goto err_out; - - priv->tx_bufs = kcalloc(n_tx, sizeof(*priv->tx_bufs), GFP_KERNEL); - if (!priv->tx_bufs) - goto err_out; - - for (i = 0; i < n_tx; i++) - priv->tx_bufs[i].dma_desc = - priv->tx_desc_dma + i * sizeof(struct nb8800_tx_desc); - - nb8800_dma_reset(dev); - - return 0; - -err_out: - nb8800_dma_free(dev); - - return -ENOMEM; -} - -static int nb8800_dma_stop(struct net_device *dev) -{ - struct nb8800_priv *priv = netdev_priv(dev); - struct nb8800_tx_buf *txb = &priv->tx_bufs[0]; - struct nb8800_tx_desc *txd = &priv->tx_descs[0]; - int retry = 5; - u32 txcr; - u32 rxcr; - int err; - unsigned int i; - - /* wait for tx to finish */ - err = readl_poll_timeout_atomic(priv->base + NB8800_TXC_CR, txcr, - !(txcr & TCR_EN) && - priv->tx_done == priv->tx_next, - 1000, 1000000); - if (err) - return err; - - /* The rx DMA only stops if it reaches the end of chain. - * To make this happen, we set the EOC flag on all rx - * descriptors, put the device in loopback mode, and send - * a few dummy frames. The interrupt handler will ignore - * these since NAPI is disabled and no real frames are in - * the tx queue. - */ - - for (i = 0; i < RX_DESC_COUNT; i++) - priv->rx_descs[i].desc.config |= DESC_EOC; - - txd->desc[0].s_addr = - txb->dma_desc + offsetof(struct nb8800_tx_desc, buf); - txd->desc[0].config = DESC_BTS(2) | DESC_DS | DESC_EOF | DESC_EOC | 8; - memset(txd->buf, 0, sizeof(txd->buf)); - - nb8800_mac_af(dev, false); - nb8800_setb(priv, NB8800_MAC_MODE, LOOPBACK_EN); - - do { - nb8800_writel(priv, NB8800_TX_DESC_ADDR, txb->dma_desc); - wmb(); - nb8800_writel(priv, NB8800_TXC_CR, txcr | TCR_EN); - - err = readl_poll_timeout_atomic(priv->base + NB8800_RXC_CR, - rxcr, !(rxcr & RCR_EN), - 1000, 100000); - } while (err && --retry); - - nb8800_mac_af(dev, true); - nb8800_clearb(priv, NB8800_MAC_MODE, LOOPBACK_EN); - nb8800_dma_reset(dev); - - return retry ? 0 : -ETIMEDOUT; -} - -static void nb8800_pause_adv(struct net_device *dev) -{ - struct nb8800_priv *priv = netdev_priv(dev); - struct phy_device *phydev = dev->phydev; - - if (!phydev) - return; - - phy_set_asym_pause(phydev, priv->pause_rx, priv->pause_tx); -} - -static int nb8800_open(struct net_device *dev) -{ - struct nb8800_priv *priv = netdev_priv(dev); - struct phy_device *phydev; - int err; - - /* clear any pending interrupts */ - nb8800_writel(priv, NB8800_RXC_SR, 0xf); - nb8800_writel(priv, NB8800_TXC_SR, 0xf); - - err = nb8800_dma_init(dev); - if (err) - return err; - - err = request_irq(dev->irq, nb8800_irq, 0, dev_name(&dev->dev), dev); - if (err) - goto err_free_dma; - - nb8800_mac_rx(dev, true); - nb8800_mac_tx(dev, true); - - phydev = of_phy_connect(dev, priv->phy_node, - nb8800_link_reconfigure, 0, - priv->phy_mode); - if (!phydev) { - err = -ENODEV; - goto err_free_irq; - } - - nb8800_pause_adv(dev); - - netdev_reset_queue(dev); - napi_enable(&priv->napi); - netif_start_queue(dev); - - nb8800_start_rx(dev); - phy_start(phydev); - - return 0; - -err_free_irq: - free_irq(dev->irq, dev); -err_free_dma: - nb8800_dma_free(dev); - - return err; -} - -static int nb8800_stop(struct net_device *dev) -{ - struct nb8800_priv *priv = netdev_priv(dev); - struct phy_device *phydev = dev->phydev; - - phy_stop(phydev); - - netif_stop_queue(dev); - napi_disable(&priv->napi); - - nb8800_dma_stop(dev); - nb8800_mac_rx(dev, false); - nb8800_mac_tx(dev, false); - - phy_disconnect(phydev); - - free_irq(dev->irq, dev); - - nb8800_dma_free(dev); - - return 0; -} - -static const struct net_device_ops nb8800_netdev_ops = { - .ndo_open = nb8800_open, - .ndo_stop = nb8800_stop, - .ndo_start_xmit = nb8800_xmit, - .ndo_set_mac_address = nb8800_set_mac_address, - .ndo_set_rx_mode = nb8800_set_rx_mode, - .ndo_do_ioctl = phy_do_ioctl, - .ndo_validate_addr = eth_validate_addr, -}; - -static void nb8800_get_pauseparam(struct net_device *dev, - struct ethtool_pauseparam *pp) -{ - struct nb8800_priv *priv = netdev_priv(dev); - - pp->autoneg = priv->pause_aneg; - pp->rx_pause = priv->pause_rx; - pp->tx_pause = priv->pause_tx; -} - -static int nb8800_set_pauseparam(struct net_device *dev, - struct ethtool_pauseparam *pp) -{ - struct nb8800_priv *priv = netdev_priv(dev); - struct phy_device *phydev = dev->phydev; - - priv->pause_aneg = pp->autoneg; - priv->pause_rx = pp->rx_pause; - priv->pause_tx = pp->tx_pause; - - nb8800_pause_adv(dev); - - if (!priv->pause_aneg) - nb8800_pause_config(dev); - else if (phydev) - phy_start_aneg(phydev); - - return 0; -} - -static const char nb8800_stats_names[][ETH_GSTRING_LEN] = { - "rx_bytes_ok", - "rx_frames_ok", - "rx_undersize_frames", - "rx_fragment_frames", - "rx_64_byte_frames", - "rx_127_byte_frames", - "rx_255_byte_frames", - "rx_511_byte_frames", - "rx_1023_byte_frames", - "rx_max_size_frames", - "rx_oversize_frames", - "rx_bad_fcs_frames", - "rx_broadcast_frames", - "rx_multicast_frames", - "rx_control_frames", - "rx_pause_frames", - "rx_unsup_control_frames", - "rx_align_error_frames", - "rx_overrun_frames", - "rx_jabber_frames", - "rx_bytes", - "rx_frames", - - "tx_bytes_ok", - "tx_frames_ok", - "tx_64_byte_frames", - "tx_127_byte_frames", - "tx_255_byte_frames", - "tx_511_byte_frames", - "tx_1023_byte_frames", - "tx_max_size_frames", - "tx_oversize_frames", - "tx_broadcast_frames", - "tx_multicast_frames", - "tx_control_frames", - "tx_pause_frames", - "tx_underrun_frames", - "tx_single_collision_frames", - "tx_multi_collision_frames", - "tx_deferred_collision_frames", - "tx_late_collision_frames", - "tx_excessive_collision_frames", - "tx_bytes", - "tx_frames", - "tx_collisions", -}; - -#define NB8800_NUM_STATS ARRAY_SIZE(nb8800_stats_names) - -static int nb8800_get_sset_count(struct net_device *dev, int sset) -{ - if (sset == ETH_SS_STATS) - return NB8800_NUM_STATS; - - return -EOPNOTSUPP; -} - -static void nb8800_get_strings(struct net_device *dev, u32 sset, u8 *buf) -{ - if (sset == ETH_SS_STATS) - memcpy(buf, &nb8800_stats_names, sizeof(nb8800_stats_names)); -} - -static u32 nb8800_read_stat(struct net_device *dev, int index) -{ - struct nb8800_priv *priv = netdev_priv(dev); - - nb8800_writeb(priv, NB8800_STAT_INDEX, index); - - return nb8800_readl(priv, NB8800_STAT_DATA); -} - -static void nb8800_get_ethtool_stats(struct net_device *dev, - struct ethtool_stats *estats, u64 *st) -{ - unsigned int i; - u32 rx, tx; - - for (i = 0; i < NB8800_NUM_STATS / 2; i++) { - rx = nb8800_read_stat(dev, i); - tx = nb8800_read_stat(dev, i | 0x80); - st[i] = rx; - st[i + NB8800_NUM_STATS / 2] = tx; - } -} - -static const struct ethtool_ops nb8800_ethtool_ops = { - .nway_reset = phy_ethtool_nway_reset, - .get_link = ethtool_op_get_link, - .get_pauseparam = nb8800_get_pauseparam, - .set_pauseparam = nb8800_set_pauseparam, - .get_sset_count = nb8800_get_sset_count, - .get_strings = nb8800_get_strings, - .get_ethtool_stats = nb8800_get_ethtool_stats, - .get_link_ksettings = phy_ethtool_get_link_ksettings, - .set_link_ksettings = phy_ethtool_set_link_ksettings, -}; - -static int nb8800_hw_init(struct net_device *dev) -{ - struct nb8800_priv *priv = netdev_priv(dev); - u32 val; - - val = TX_RETRY_EN | TX_PAD_EN | TX_APPEND_FCS; - nb8800_writeb(priv, NB8800_TX_CTL1, val); - - /* Collision retry count */ - nb8800_writeb(priv, NB8800_TX_CTL2, 5); - - val = RX_PAD_STRIP | RX_AF_EN; - nb8800_writeb(priv, NB8800_RX_CTL, val); - - /* Chosen by fair dice roll */ - nb8800_writeb(priv, NB8800_RANDOM_SEED, 4); - - /* TX cycles per deferral period */ - nb8800_writeb(priv, NB8800_TX_SDP, 12); - - /* The following three threshold values have been - * experimentally determined for good results. - */ - - /* RX/TX FIFO threshold for partial empty (64-bit entries) */ - nb8800_writeb(priv, NB8800_PE_THRESHOLD, 0); - - /* RX/TX FIFO threshold for partial full (64-bit entries) */ - nb8800_writeb(priv, NB8800_PF_THRESHOLD, 255); - - /* Buffer size for transmit (64-bit entries) */ - nb8800_writeb(priv, NB8800_TX_BUFSIZE, 64); - - /* Configure tx DMA */ - - val = nb8800_readl(priv, NB8800_TXC_CR); - val &= TCR_LE; /* keep endian setting */ - val |= TCR_DM; /* DMA descriptor mode */ - val |= TCR_RS; /* automatically store tx status */ - val |= TCR_DIE; /* interrupt on DMA chain completion */ - val |= TCR_TFI(7); /* interrupt after 7 frames transmitted */ - val |= TCR_BTS(2); /* 32-byte bus transaction size */ - nb8800_writel(priv, NB8800_TXC_CR, val); - - /* TX complete interrupt after 10 ms or 7 frames (see above) */ - val = clk_get_rate(priv->clk) / 100; - nb8800_writel(priv, NB8800_TX_ITR, val); - - /* Configure rx DMA */ - - val = nb8800_readl(priv, NB8800_RXC_CR); - val &= RCR_LE; /* keep endian setting */ - val |= RCR_DM; /* DMA descriptor mode */ - val |= RCR_RS; /* automatically store rx status */ - val |= RCR_DIE; /* interrupt at end of DMA chain */ - val |= RCR_RFI(7); /* interrupt after 7 frames received */ - val |= RCR_BTS(2); /* 32-byte bus transaction size */ - nb8800_writel(priv, NB8800_RXC_CR, val); - - /* The rx interrupt can fire before the DMA has completed - * unless a small delay is added. 50 us is hopefully enough. - */ - priv->rx_itr_irq = clk_get_rate(priv->clk) / 20000; - - /* In NAPI poll mode we want to disable interrupts, but the - * hardware does not permit this. Delay 10 ms instead. - */ - priv->rx_itr_poll = clk_get_rate(priv->clk) / 100; - - nb8800_writel(priv, NB8800_RX_ITR, priv->rx_itr_irq); - - priv->rx_dma_config = RX_BUF_SIZE | DESC_BTS(2) | DESC_DS | DESC_EOF; - - /* Flow control settings */ - - /* Pause time of 0.1 ms */ - val = 100000 / 512; - nb8800_writeb(priv, NB8800_PQ1, val >> 8); - nb8800_writeb(priv, NB8800_PQ2, val & 0xff); - - /* Auto-negotiate by default */ - priv->pause_aneg = true; - priv->pause_rx = true; - priv->pause_tx = true; - - nb8800_mc_init(dev, 0); - - return 0; -} - -static int nb8800_tangox_init(struct net_device *dev) -{ - struct nb8800_priv *priv = netdev_priv(dev); - u32 pad_mode = PAD_MODE_MII; - - switch (priv->phy_mode) { - case PHY_INTERFACE_MODE_MII: - case PHY_INTERFACE_MODE_GMII: - pad_mode = PAD_MODE_MII; - break; - - case PHY_INTERFACE_MODE_RGMII: - case PHY_INTERFACE_MODE_RGMII_ID: - case PHY_INTERFACE_MODE_RGMII_RXID: - case PHY_INTERFACE_MODE_RGMII_TXID: - pad_mode = PAD_MODE_RGMII; - break; - - default: - dev_err(dev->dev.parent, "unsupported phy mode %s\n", - phy_modes(priv->phy_mode)); - return -EINVAL; - } - - nb8800_writeb(priv, NB8800_TANGOX_PAD_MODE, pad_mode); - - return 0; -} - -static int nb8800_tangox_reset(struct net_device *dev) -{ - struct nb8800_priv *priv = netdev_priv(dev); - int clk_div; - - nb8800_writeb(priv, NB8800_TANGOX_RESET, 0); - usleep_range(1000, 10000); - nb8800_writeb(priv, NB8800_TANGOX_RESET, 1); - - wmb(); /* ensure reset is cleared before proceeding */ - - clk_div = DIV_ROUND_UP(clk_get_rate(priv->clk), 2 * MAX_MDC_CLOCK); - nb8800_writew(priv, NB8800_TANGOX_MDIO_CLKDIV, clk_div); - - return 0; -} - -static const struct nb8800_ops nb8800_tangox_ops = { - .init = nb8800_tangox_init, - .reset = nb8800_tangox_reset, -}; - -static int nb8800_tango4_init(struct net_device *dev) -{ - struct nb8800_priv *priv = netdev_priv(dev); - int err; - - err = nb8800_tangox_init(dev); - if (err) - return err; - - /* On tango4 interrupt on DMA completion per frame works and gives - * better performance despite generating more rx interrupts. - */ - - /* Disable unnecessary interrupt on rx completion */ - nb8800_clearl(priv, NB8800_RXC_CR, RCR_RFI(7)); - - /* Request interrupt on descriptor DMA completion */ - priv->rx_dma_config |= DESC_ID; - - return 0; -} - -static const struct nb8800_ops nb8800_tango4_ops = { - .init = nb8800_tango4_init, - .reset = nb8800_tangox_reset, -}; - -static const struct of_device_id nb8800_dt_ids[] = { - { - .compatible = "aurora,nb8800", - }, - { - .compatible = "sigma,smp8642-ethernet", - .data = &nb8800_tangox_ops, - }, - { - .compatible = "sigma,smp8734-ethernet", - .data = &nb8800_tango4_ops, - }, - { } -}; -MODULE_DEVICE_TABLE(of, nb8800_dt_ids); - -static int nb8800_probe(struct platform_device *pdev) -{ - const struct of_device_id *match; - const struct nb8800_ops *ops = NULL; - struct nb8800_priv *priv; - struct resource *res; - struct net_device *dev; - struct mii_bus *bus; - const unsigned char *mac; - void __iomem *base; - int irq; - int ret; - - match = of_match_device(nb8800_dt_ids, &pdev->dev); - if (match) - ops = match->data; - - irq = platform_get_irq(pdev, 0); - if (irq <= 0) - return -EINVAL; - - res = platform_get_resource(pdev, IORESOURCE_MEM, 0); - base = devm_ioremap_resource(&pdev->dev, res); - if (IS_ERR(base)) - return PTR_ERR(base); - - dev_dbg(&pdev->dev, "AU-NB8800 Ethernet at %pa\n", &res->start); - - dev = alloc_etherdev(sizeof(*priv)); - if (!dev) - return -ENOMEM; - - platform_set_drvdata(pdev, dev); - SET_NETDEV_DEV(dev, &pdev->dev); - - priv = netdev_priv(dev); - priv->base = base; - - ret = of_get_phy_mode(pdev->dev.of_node, &priv->phy_mode); - if (ret) - priv->phy_mode = PHY_INTERFACE_MODE_RGMII; - - priv->clk = devm_clk_get(&pdev->dev, NULL); - if (IS_ERR(priv->clk)) { - dev_err(&pdev->dev, "failed to get clock\n"); - ret = PTR_ERR(priv->clk); - goto err_free_dev; - } - - ret = clk_prepare_enable(priv->clk); - if (ret) - goto err_free_dev; - - spin_lock_init(&priv->tx_lock); - - if (ops && ops->reset) { - ret = ops->reset(dev); - if (ret) - goto err_disable_clk; - } - - bus = devm_mdiobus_alloc(&pdev->dev); - if (!bus) { - ret = -ENOMEM; - goto err_disable_clk; - } - - bus->name = "nb8800-mii"; - bus->read = nb8800_mdio_read; - bus->write = nb8800_mdio_write; - bus->parent = &pdev->dev; - snprintf(bus->id, MII_BUS_ID_SIZE, "%lx.nb8800-mii", - (unsigned long)res->start); - bus->priv = priv; - - ret = of_mdiobus_register(bus, pdev->dev.of_node); - if (ret) { - dev_err(&pdev->dev, "failed to register MII bus\n"); - goto err_disable_clk; - } - - if (of_phy_is_fixed_link(pdev->dev.of_node)) { - ret = of_phy_register_fixed_link(pdev->dev.of_node); - if (ret < 0) { - dev_err(&pdev->dev, "bad fixed-link spec\n"); - goto err_free_bus; - } - priv->phy_node = of_node_get(pdev->dev.of_node); - } - - if (!priv->phy_node) - priv->phy_node = of_parse_phandle(pdev->dev.of_node, - "phy-handle", 0); - - if (!priv->phy_node) { - dev_err(&pdev->dev, "no PHY specified\n"); - ret = -ENODEV; - goto err_free_bus; - } - - priv->mii_bus = bus; - - ret = nb8800_hw_init(dev); - if (ret) - goto err_deregister_fixed_link; - - if (ops && ops->init) { - ret = ops->init(dev); - if (ret) - goto err_deregister_fixed_link; - } - - dev->netdev_ops = &nb8800_netdev_ops; - dev->ethtool_ops = &nb8800_ethtool_ops; - dev->flags |= IFF_MULTICAST; - dev->irq = irq; - - mac = of_get_mac_address(pdev->dev.of_node); - if (!IS_ERR(mac)) - ether_addr_copy(dev->dev_addr, mac); - - if (!is_valid_ether_addr(dev->dev_addr)) - eth_hw_addr_random(dev); - - nb8800_update_mac_addr(dev); - - netif_carrier_off(dev); - - ret = register_netdev(dev); - if (ret) { - netdev_err(dev, "failed to register netdev\n"); - goto err_free_dma; - } - - netif_napi_add(dev, &priv->napi, nb8800_poll, NAPI_POLL_WEIGHT); - - netdev_info(dev, "MAC address %pM\n", dev->dev_addr); - - return 0; - -err_free_dma: - nb8800_dma_free(dev); -err_deregister_fixed_link: - if (of_phy_is_fixed_link(pdev->dev.of_node)) - of_phy_deregister_fixed_link(pdev->dev.of_node); -err_free_bus: - of_node_put(priv->phy_node); - mdiobus_unregister(bus); -err_disable_clk: - clk_disable_unprepare(priv->clk); -err_free_dev: - free_netdev(dev); - - return ret; -} - -static int nb8800_remove(struct platform_device *pdev) -{ - struct net_device *ndev = platform_get_drvdata(pdev); - struct nb8800_priv *priv = netdev_priv(ndev); - - unregister_netdev(ndev); - if (of_phy_is_fixed_link(pdev->dev.of_node)) - of_phy_deregister_fixed_link(pdev->dev.of_node); - of_node_put(priv->phy_node); - - mdiobus_unregister(priv->mii_bus); - - clk_disable_unprepare(priv->clk); - - nb8800_dma_free(ndev); - free_netdev(ndev); - - return 0; -} - -static struct platform_driver nb8800_driver = { - .driver = { - .name = "nb8800", - .of_match_table = nb8800_dt_ids, - }, - .probe = nb8800_probe, - .remove = nb8800_remove, -}; - -module_platform_driver(nb8800_driver); - -MODULE_DESCRIPTION("Aurora AU-NB8800 Ethernet driver"); -MODULE_AUTHOR("Mans Rullgard <mans@mansr.com>"); -MODULE_LICENSE("GPL"); diff --git a/drivers/net/ethernet/aurora/nb8800.h b/drivers/net/ethernet/aurora/nb8800.h deleted file mode 100644 index 40941fb6065b..000000000000 --- a/drivers/net/ethernet/aurora/nb8800.h +++ /dev/null @@ -1,316 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _NB8800_H_ -#define _NB8800_H_ - -#include <linux/types.h> -#include <linux/skbuff.h> -#include <linux/phy.h> -#include <linux/clk.h> -#include <linux/bitops.h> - -#define RX_DESC_COUNT 256 -#define TX_DESC_COUNT 256 - -#define NB8800_DESC_LOW 4 - -#define RX_BUF_SIZE 1552 - -#define RX_COPYBREAK 256 -#define RX_COPYHDR 128 - -#define MAX_MDC_CLOCK 2500000 - -/* Stargate Solutions SSN8800 core registers */ -#define NB8800_TX_CTL1 0x000 -#define TX_TPD BIT(5) -#define TX_APPEND_FCS BIT(4) -#define TX_PAD_EN BIT(3) -#define TX_RETRY_EN BIT(2) -#define TX_EN BIT(0) - -#define NB8800_TX_CTL2 0x001 - -#define NB8800_RX_CTL 0x004 -#define RX_BC_DISABLE BIT(7) -#define RX_RUNT BIT(6) -#define RX_AF_EN BIT(5) -#define RX_PAUSE_EN BIT(3) -#define RX_SEND_CRC BIT(2) -#define RX_PAD_STRIP BIT(1) -#define RX_EN BIT(0) - -#define NB8800_RANDOM_SEED 0x008 -#define NB8800_TX_SDP 0x14 -#define NB8800_TX_TPDP1 0x18 -#define NB8800_TX_TPDP2 0x19 -#define NB8800_SLOT_TIME 0x1c - -#define NB8800_MDIO_CMD 0x020 -#define MDIO_CMD_GO BIT(31) -#define MDIO_CMD_WR BIT(26) -#define MDIO_CMD_ADDR(x) ((x) << 21) -#define MDIO_CMD_REG(x) ((x) << 16) -#define MDIO_CMD_DATA(x) ((x) << 0) - -#define NB8800_MDIO_STS 0x024 -#define MDIO_STS_ERR BIT(31) - -#define NB8800_MC_ADDR(i) (0x028 + (i)) -#define NB8800_MC_INIT 0x02e -#define NB8800_UC_ADDR(i) (0x03c + (i)) - -#define NB8800_MAC_MODE 0x044 -#define RGMII_MODE BIT(7) -#define HALF_DUPLEX BIT(4) -#define BURST_EN BIT(3) -#define LOOPBACK_EN BIT(2) -#define GMAC_MODE BIT(0) - -#define NB8800_IC_THRESHOLD 0x050 -#define NB8800_PE_THRESHOLD 0x051 -#define NB8800_PF_THRESHOLD 0x052 -#define NB8800_TX_BUFSIZE 0x054 -#define NB8800_FIFO_CTL 0x056 -#define NB8800_PQ1 0x060 -#define NB8800_PQ2 0x061 -#define NB8800_SRC_ADDR(i) (0x06a + (i)) -#define NB8800_STAT_DATA 0x078 -#define NB8800_STAT_INDEX 0x07c -#define NB8800_STAT_CLEAR 0x07d - -#define NB8800_SLEEP_MODE 0x07e -#define SLEEP_MODE BIT(0) - -#define NB8800_WAKEUP 0x07f -#define WAKEUP BIT(0) - -/* Aurora NB8800 host interface registers */ -#define NB8800_TXC_CR 0x100 -#define TCR_LK BIT(12) -#define TCR_DS BIT(11) -#define TCR_BTS(x) (((x) & 0x7) << 8) -#define TCR_DIE BIT(7) -#define TCR_TFI(x) (((x) & 0x7) << 4) -#define TCR_LE BIT(3) -#define TCR_RS BIT(2) -#define TCR_DM BIT(1) -#define TCR_EN BIT(0) - -#define NB8800_TXC_SR 0x104 -#define TSR_DE BIT(3) -#define TSR_DI BIT(2) -#define TSR_TO BIT(1) -#define TSR_TI BIT(0) - -#define NB8800_TX_SAR 0x108 -#define NB8800_TX_DESC_ADDR 0x10c - -#define NB8800_TX_REPORT_ADDR 0x110 -#define TX_BYTES_TRANSFERRED(x) (((x) >> 16) & 0xffff) -#define TX_FIRST_DEFERRAL BIT(7) -#define TX_EARLY_COLLISIONS(x) (((x) >> 3) & 0xf) -#define TX_LATE_COLLISION BIT(2) -#define TX_PACKET_DROPPED BIT(1) -#define TX_FIFO_UNDERRUN BIT(0) -#define IS_TX_ERROR(r) ((r) & 0x07) - -#define NB8800_TX_FIFO_SR 0x114 -#define NB8800_TX_ITR 0x118 - -#define NB8800_RXC_CR 0x200 -#define RCR_FL BIT(13) -#define RCR_LK BIT(12) -#define RCR_DS BIT(11) -#define RCR_BTS(x) (((x) & 7) << 8) -#define RCR_DIE BIT(7) -#define RCR_RFI(x) (((x) & 7) << 4) -#define RCR_LE BIT(3) -#define RCR_RS BIT(2) -#define RCR_DM BIT(1) -#define RCR_EN BIT(0) - -#define NB8800_RXC_SR 0x204 -#define RSR_DE BIT(3) -#define RSR_DI BIT(2) -#define RSR_RO BIT(1) -#define RSR_RI BIT(0) - -#define NB8800_RX_SAR 0x208 -#define NB8800_RX_DESC_ADDR 0x20c - -#define NB8800_RX_REPORT_ADDR 0x210 -#define RX_BYTES_TRANSFERRED(x) (((x) >> 16) & 0xFFFF) -#define RX_MULTICAST_PKT BIT(9) -#define RX_BROADCAST_PKT BIT(8) -#define RX_LENGTH_ERR BIT(7) -#define RX_FCS_ERR BIT(6) -#define RX_RUNT_PKT BIT(5) -#define RX_FIFO_OVERRUN BIT(4) -#define RX_LATE_COLLISION BIT(3) -#define RX_ALIGNMENT_ERROR BIT(2) -#define RX_ERROR_MASK 0xfc -#define IS_RX_ERROR(r) ((r) & RX_ERROR_MASK) - -#define NB8800_RX_FIFO_SR 0x214 -#define NB8800_RX_ITR 0x218 - -/* Sigma Designs SMP86xx additional registers */ -#define NB8800_TANGOX_PAD_MODE 0x400 -#define PAD_MODE_MASK 0x7 -#define PAD_MODE_MII 0x0 -#define PAD_MODE_RGMII 0x1 -#define PAD_MODE_GTX_CLK_INV BIT(3) -#define PAD_MODE_GTX_CLK_DELAY BIT(4) - -#define NB8800_TANGOX_MDIO_CLKDIV 0x420 -#define NB8800_TANGOX_RESET 0x424 - -/* Hardware DMA descriptor */ -struct nb8800_dma_desc { - u32 s_addr; /* start address */ - u32 n_addr; /* next descriptor address */ - u32 r_addr; /* report address */ - u32 config; -} __aligned(8); - -#define DESC_ID BIT(23) -#define DESC_EOC BIT(22) -#define DESC_EOF BIT(21) -#define DESC_LK BIT(20) -#define DESC_DS BIT(19) -#define DESC_BTS(x) (((x) & 0x7) << 16) - -/* DMA descriptor and associated data for rx. - * Allocated from coherent memory. - */ -struct nb8800_rx_desc { - /* DMA descriptor */ - struct nb8800_dma_desc desc; - - /* Status report filled in by hardware */ - u32 report; -}; - -/* Address of buffer on rx ring */ -struct nb8800_rx_buf { - struct page *page; - unsigned long offset; -}; - -/* DMA descriptors and associated data for tx. - * Allocated from coherent memory. - */ -struct nb8800_tx_desc { - /* DMA descriptor. The second descriptor is used if packet - * data is unaligned. - */ - struct nb8800_dma_desc desc[2]; - - /* Status report filled in by hardware */ - u32 report; - - /* Bounce buffer for initial unaligned part of packet */ - u8 buf[8] __aligned(8); -}; - -/* Packet in tx queue */ -struct nb8800_tx_buf { - /* Currently queued skb */ - struct sk_buff *skb; - - /* DMA address of the first descriptor */ - dma_addr_t dma_desc; - - /* DMA address of packet data */ - dma_addr_t dma_addr; - - /* Length of DMA mapping, less than skb->len if alignment - * buffer is used. - */ - unsigned int dma_len; - - /* Number of packets in chain starting here */ - unsigned int chain_len; - - /* Packet chain ready to be submitted to hardware */ - bool ready; -}; - -struct nb8800_priv { - struct napi_struct napi; - - void __iomem *base; - - /* RX DMA descriptors */ - struct nb8800_rx_desc *rx_descs; - - /* RX buffers referenced by DMA descriptors */ - struct nb8800_rx_buf *rx_bufs; - - /* Current end of chain */ - u32 rx_eoc; - - /* Value for rx interrupt time register in NAPI interrupt mode */ - u32 rx_itr_irq; - - /* Value for rx interrupt time register in NAPI poll mode */ - u32 rx_itr_poll; - - /* Value for config field of rx DMA descriptors */ - u32 rx_dma_config; - - /* TX DMA descriptors */ - struct nb8800_tx_desc *tx_descs; - - /* TX packet queue */ - struct nb8800_tx_buf *tx_bufs; - - /* Number of free tx queue entries */ - atomic_t tx_free; - - /* First free tx queue entry */ - u32 tx_next; - - /* Next buffer to transmit */ - u32 tx_queue; - - /* Start of current packet chain */ - struct nb8800_tx_buf *tx_chain; - - /* Next buffer to reclaim */ - u32 tx_done; - - /* Lock for DMA activation */ - spinlock_t tx_lock; - - struct mii_bus *mii_bus; - struct device_node *phy_node; - - /* PHY connection type from DT */ - phy_interface_t phy_mode; - - /* Current link status */ - int speed; - int duplex; - int link; - - /* Pause settings */ - bool pause_aneg; - bool pause_rx; - bool pause_tx; - - /* DMA base address of rx descriptors, see rx_descs above */ - dma_addr_t rx_desc_dma; - - /* DMA base address of tx descriptors, see tx_descs above */ - dma_addr_t tx_desc_dma; - - struct clk *clk; -}; - -struct nb8800_ops { - int (*init)(struct net_device *dev); - int (*reset)(struct net_device *dev); -}; - -#endif /* _NB8800_H_ */ diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index d31a5ad7522a..f508c5c61a30 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -255,6 +255,7 @@ static const u16 bnxt_async_events_arr[] = { ASYNC_EVENT_CMPL_EVENT_ID_PORT_PHY_CFG_CHANGE, ASYNC_EVENT_CMPL_EVENT_ID_RESET_NOTIFY, ASYNC_EVENT_CMPL_EVENT_ID_ERROR_RECOVERY, + ASYNC_EVENT_CMPL_EVENT_ID_DEBUG_NOTIFICATION, ASYNC_EVENT_CMPL_EVENT_ID_RING_MONITOR_MSG, }; @@ -1265,8 +1266,7 @@ static void bnxt_tpa_start(struct bnxt *bp, struct bnxt_rx_ring_info *rxr, } else { tpa_info->hash_type = PKT_HASH_TYPE_NONE; tpa_info->gso_type = 0; - if (netif_msg_rx_err(bp)) - netdev_warn(bp->dev, "TPA packet without valid hash\n"); + netif_warn(bp, rx_err, bp->dev, "TPA packet without valid hash\n"); } tpa_info->flags2 = le32_to_cpu(tpa_start1->rx_tpa_start_cmp_flags2); tpa_info->metadata = le32_to_cpu(tpa_start1->rx_tpa_start_cmp_metadata); @@ -2021,10 +2021,9 @@ static int bnxt_async_event_process(struct bnxt *bp, goto async_event_process_exit; set_bit(BNXT_RESET_TASK_SILENT_SP_EVENT, &bp->sp_event); break; - case ASYNC_EVENT_CMPL_EVENT_ID_RESET_NOTIFY: - if (netif_msg_hw(bp)) - netdev_warn(bp->dev, "Received RESET_NOTIFY event, data1: 0x%x, data2: 0x%x\n", - data1, data2); + case ASYNC_EVENT_CMPL_EVENT_ID_RESET_NOTIFY: { + char *fatal_str = "non-fatal"; + if (!bp->fw_health) goto async_event_process_exit; @@ -2036,14 +2035,17 @@ static int bnxt_async_event_process(struct bnxt *bp, if (!bp->fw_reset_max_dsecs) bp->fw_reset_max_dsecs = BNXT_DFLT_FW_RST_MAX_DSECS; if (EVENT_DATA1_RESET_NOTIFY_FATAL(data1)) { - netdev_warn(bp->dev, "Firmware fatal reset event received\n"); + fatal_str = "fatal"; set_bit(BNXT_STATE_FW_FATAL_COND, &bp->state); - } else { - netdev_warn(bp->dev, "Firmware non-fatal reset event received, max wait time %d msec\n", - bp->fw_reset_max_dsecs * 100); } + netif_warn(bp, hw, bp->dev, + "Firmware %s reset event, data1: 0x%x, data2: 0x%x, min wait %u ms, max wait %u ms\n", + fatal_str, data1, data2, + bp->fw_reset_min_dsecs * 100, + bp->fw_reset_max_dsecs * 100); set_bit(BNXT_FW_RESET_NOTIFY_SP_EVENT, &bp->sp_event); break; + } case ASYNC_EVENT_CMPL_EVENT_ID_ERROR_RECOVERY: { struct bnxt_fw_health *fw_health = bp->fw_health; @@ -2055,13 +2057,11 @@ static int bnxt_async_event_process(struct bnxt *bp, if (!fw_health->enabled) break; - if (netif_msg_drv(bp)) - netdev_info(bp->dev, "Error recovery info: error recovery[%d], master[%d], reset count[0x%x], health status: 0x%x\n", - fw_health->enabled, fw_health->master, - bnxt_fw_health_readl(bp, - BNXT_FW_RESET_CNT_REG), - bnxt_fw_health_readl(bp, - BNXT_FW_HEALTH_REG)); + netif_info(bp, drv, bp->dev, + "Error recovery info: error recovery[%d], master[%d], reset count[0x%x], health status: 0x%x\n", + fw_health->enabled, fw_health->master, + bnxt_fw_health_readl(bp, BNXT_FW_RESET_CNT_REG), + bnxt_fw_health_readl(bp, BNXT_FW_HEALTH_REG)); fw_health->tmr_multiplier = DIV_ROUND_UP(fw_health->polling_dsecs * HZ, bp->current_interval * 10); @@ -2072,6 +2072,11 @@ static int bnxt_async_event_process(struct bnxt *bp, bnxt_fw_health_readl(bp, BNXT_FW_RESET_CNT_REG); goto async_event_process_exit; } + case ASYNC_EVENT_CMPL_EVENT_ID_DEBUG_NOTIFICATION: + netif_notice(bp, hw, bp->dev, + "Received firmware debug notification, data1: 0x%x, data2: 0x%x\n", + data1, data2); + goto async_event_process_exit; case ASYNC_EVENT_CMPL_EVENT_ID_RING_MONITOR_MSG: { struct bnxt_rx_ring_info *rxr; u16 grp_idx; @@ -2394,6 +2399,10 @@ static int bnxt_poll(struct napi_struct *napi, int budget) struct bnxt_cp_ring_info *cpr = &bnapi->cp_ring; int work_done = 0; + if (unlikely(test_bit(BNXT_STATE_FW_FATAL_COND, &bp->state))) { + napi_complete(napi); + return 0; + } while (1) { work_done += bnxt_poll_work(bp, cpr, budget - work_done); @@ -2468,6 +2477,10 @@ static int bnxt_poll_p5(struct napi_struct *napi, int budget) int work_done = 0; u32 cons; + if (unlikely(test_bit(BNXT_STATE_FW_FATAL_COND, &bp->state))) { + napi_complete(napi); + return 0; + } if (cpr->has_more_work) { cpr->has_more_work = 0; work_done = __bnxt_poll_cqs(bp, bnapi, budget); @@ -4272,6 +4285,9 @@ static void bnxt_disable_int_sync(struct bnxt *bp) { int i; + if (!bp->irq_tbl) + return; + atomic_inc(&bp->intr_sem); bnxt_disable_int(bp); @@ -4425,6 +4441,8 @@ static int bnxt_hwrm_do_send_msg(struct bnxt *bp, void *msg, u32 msg_len, if (!timeout) timeout = DFLT_HWRM_CMD_TIMEOUT; + /* Limit timeout to an upper limit */ + timeout = min(timeout, HWRM_CMD_MAX_TIMEOUT); /* convert timeout to usec */ timeout *= 1000; @@ -6845,6 +6863,7 @@ static int bnxt_hwrm_func_backing_store_cfg(struct bnxt *bp, u32 enables) struct hwrm_func_backing_store_cfg_input req = {0}; struct bnxt_ctx_mem_info *ctx = bp->ctx; struct bnxt_ctx_pg_info *ctx_pg; + u32 req_len = sizeof(req); __le32 *num_entries; __le64 *pg_dir; u32 flags = 0; @@ -6855,6 +6874,8 @@ static int bnxt_hwrm_func_backing_store_cfg(struct bnxt *bp, u32 enables) if (!ctx) return 0; + if (req_len > bp->hwrm_max_ext_req_len) + req_len = BNXT_BACKING_STORE_CFG_LEGACY_LEN; bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_FUNC_BACKING_STORE_CFG, -1, -1); req.enables = cpu_to_le32(enables); @@ -6938,7 +6959,7 @@ static int bnxt_hwrm_func_backing_store_cfg(struct bnxt *bp, u32 enables) bnxt_hwrm_set_pg_attr(&ctx_pg->ring_mem, pg_attr, pg_dir); } req.flags = cpu_to_le32(flags); - return hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT); + return hwrm_send_message(bp, &req, req_len, HWRM_CMD_TIMEOUT); } static int bnxt_alloc_ctx_mem_blk(struct bnxt *bp, @@ -7438,9 +7459,22 @@ static void bnxt_try_map_fw_health_reg(struct bnxt *bp) sig = readl(hs + offsetof(struct hcomm_status, sig_ver)); if ((sig & HCOMM_STATUS_SIGNATURE_MASK) != HCOMM_STATUS_SIGNATURE_VAL) { - if (bp->fw_health) - bp->fw_health->status_reliable = false; - return; + if (!bp->chip_num) { + __bnxt_map_fw_health_reg(bp, BNXT_GRC_REG_BASE); + bp->chip_num = readl(bp->bar0 + + BNXT_FW_HEALTH_WIN_BASE + + BNXT_GRC_REG_CHIP_NUM); + } + if (!BNXT_CHIP_P5(bp)) { + if (bp->fw_health) + bp->fw_health->status_reliable = false; + return; + } + status_loc = BNXT_GRC_REG_STATUS_P5 | + BNXT_FW_HEALTH_REG_TYPE_BAR0; + } else { + status_loc = readl(hs + offsetof(struct hcomm_status, + fw_status_loc)); } if (__bnxt_alloc_fw_health(bp)) { @@ -7448,7 +7482,6 @@ static void bnxt_try_map_fw_health_reg(struct bnxt *bp) return; } - status_loc = readl(hs + offsetof(struct hcomm_status, fw_status_loc)); bp->fw_health->regs[BNXT_FW_HEALTH_REG] = status_loc; reg_type = BNXT_FW_HEALTH_REG_TYPE(status_loc); if (reg_type == BNXT_FW_HEALTH_REG_TYPE_GRC) { @@ -8811,7 +8844,8 @@ static void bnxt_disable_napi(struct bnxt *bp) { int i; - if (!bp->bnapi) + if (!bp->bnapi || + test_and_set_bit(BNXT_STATE_NAPI_DISABLED, &bp->state)) return; for (i = 0; i < bp->cp_nr_rings; i++) { @@ -8828,6 +8862,7 @@ static void bnxt_enable_napi(struct bnxt *bp) { int i; + clear_bit(BNXT_STATE_NAPI_DISABLED, &bp->state); for (i = 0; i < bp->cp_nr_rings; i++) { struct bnxt_napi *bnapi = bp->bnapi[i]; struct bnxt_cp_ring_info *cpr; @@ -9334,13 +9369,60 @@ static int bnxt_hwrm_shutdown_link(struct bnxt *bp) static int bnxt_fw_init_one(struct bnxt *bp); +static int bnxt_fw_reset_via_optee(struct bnxt *bp) +{ +#ifdef CONFIG_TEE_BNXT_FW + int rc = tee_bnxt_fw_load(); + + if (rc) + netdev_err(bp->dev, "Failed FW reset via OP-TEE, rc=%d\n", rc); + + return rc; +#else + netdev_err(bp->dev, "OP-TEE not supported\n"); + return -ENODEV; +#endif +} + +static int bnxt_try_recover_fw(struct bnxt *bp) +{ + if (bp->fw_health && bp->fw_health->status_reliable) { + int retry = 0, rc; + u32 sts; + + mutex_lock(&bp->hwrm_cmd_lock); + do { + rc = __bnxt_hwrm_ver_get(bp, true); + sts = bnxt_fw_health_readl(bp, BNXT_FW_HEALTH_REG); + if (!sts || !BNXT_FW_IS_BOOTING(sts)) + break; + retry++; + } while (rc == -EBUSY && retry < BNXT_FW_RETRY); + mutex_unlock(&bp->hwrm_cmd_lock); + + if (!BNXT_FW_IS_HEALTHY(sts)) { + netdev_err(bp->dev, + "Firmware not responding, status: 0x%x\n", + sts); + rc = -ENODEV; + } + if (sts & FW_STATUS_REG_CRASHED_NO_MASTER) { + netdev_warn(bp->dev, "Firmware recover via OP-TEE requested\n"); + return bnxt_fw_reset_via_optee(bp); + } + return rc; + } + + return -ENODEV; +} + static int bnxt_hwrm_if_change(struct bnxt *bp, bool up) { struct hwrm_func_drv_if_change_output *resp = bp->hwrm_cmd_resp_addr; struct hwrm_func_drv_if_change_input req = {0}; bool resc_reinit = false, fw_reset = false; + int rc, retry = 0; u32 flags = 0; - int rc; if (!(bp->fw_cap & BNXT_FW_CAP_IF_CHANGE)) return 0; @@ -9349,10 +9431,25 @@ static int bnxt_hwrm_if_change(struct bnxt *bp, bool up) if (up) req.flags = cpu_to_le32(FUNC_DRV_IF_CHANGE_REQ_FLAGS_UP); mutex_lock(&bp->hwrm_cmd_lock); - rc = _hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT); + while (retry < BNXT_FW_IF_RETRY) { + rc = _hwrm_send_message(bp, &req, sizeof(req), + HWRM_CMD_TIMEOUT); + if (rc != -EAGAIN) + break; + + msleep(50); + retry++; + } if (!rc) flags = le32_to_cpu(resp->flags); mutex_unlock(&bp->hwrm_cmd_lock); + + if (rc == -EAGAIN) + return rc; + if (rc && up) { + rc = bnxt_try_recover_fw(bp); + fw_reset = true; + } if (rc) return rc; @@ -9692,6 +9789,25 @@ static void bnxt_preset_reg_win(struct bnxt *bp) static int bnxt_init_dflt_ring_mode(struct bnxt *bp); +static int bnxt_reinit_after_abort(struct bnxt *bp) +{ + int rc; + + if (test_bit(BNXT_STATE_IN_FW_RESET, &bp->state)) + return -EBUSY; + + rc = bnxt_fw_init_one(bp); + if (!rc) { + bnxt_clear_int_mode(bp); + rc = bnxt_init_int_mode(bp); + if (!rc) { + clear_bit(BNXT_STATE_ABORT_ERR, &bp->state); + set_bit(BNXT_STATE_FW_RESET_DET, &bp->state); + } + } + return rc; +} + static int __bnxt_open_nic(struct bnxt *bp, bool irq_re_init, bool link_re_init) { int rc = 0; @@ -9850,8 +9966,14 @@ static int bnxt_open(struct net_device *dev) int rc; if (test_bit(BNXT_STATE_ABORT_ERR, &bp->state)) { - netdev_err(bp->dev, "A previous firmware reset did not complete, aborting\n"); - return -ENODEV; + rc = bnxt_reinit_after_abort(bp); + if (rc) { + if (rc == -EBUSY) + netdev_err(bp->dev, "A previous firmware reset has not completed, aborting\n"); + else + netdev_err(bp->dev, "Failed to reinitialize after aborted firmware reset\n"); + return -ENODEV; + } } rc = bnxt_hwrm_if_change(bp, true); @@ -10788,11 +10910,18 @@ static void bnxt_rx_ring_reset(struct bnxt *bp) static void bnxt_fw_reset_close(struct bnxt *bp) { bnxt_ulp_stop(bp); - /* When firmware is fatal state, disable PCI device to prevent - * any potential bad DMAs before freeing kernel memory. + /* When firmware is in fatal state, quiesce device and disable + * bus master to prevent any potential bad DMAs before freeing + * kernel memory. */ - if (test_bit(BNXT_STATE_FW_FATAL_COND, &bp->state)) + if (test_bit(BNXT_STATE_FW_FATAL_COND, &bp->state)) { + bnxt_tx_disable(bp); + bnxt_disable_napi(bp); + bnxt_disable_int_sync(bp); + bnxt_free_irq(bp); + bnxt_clear_int_mode(bp); pci_disable_device(bp->pdev); + } __bnxt_close_nic(bp, true, false); bnxt_clear_int_mode(bp); bnxt_hwrm_func_drv_unrgtr(bp); @@ -11180,21 +11309,6 @@ static void bnxt_init_dflt_coal(struct bnxt *bp) bp->stats_coal_ticks = BNXT_DEF_STATS_COAL_TICKS; } -static int bnxt_fw_reset_via_optee(struct bnxt *bp) -{ -#ifdef CONFIG_TEE_BNXT_FW - int rc = tee_bnxt_fw_load(); - - if (rc) - netdev_err(bp->dev, "Failed FW reset via OP-TEE, rc=%d\n", rc); - - return rc; -#else - netdev_err(bp->dev, "OP-TEE not supported\n"); - return -ENODEV; -#endif -} - static int bnxt_fw_init_one_p1(struct bnxt *bp) { int rc; @@ -11203,19 +11317,10 @@ static int bnxt_fw_init_one_p1(struct bnxt *bp) rc = bnxt_hwrm_ver_get(bp); bnxt_try_map_fw_health_reg(bp); if (rc) { - if (bp->fw_health && bp->fw_health->status_reliable) { - u32 sts = bnxt_fw_health_readl(bp, BNXT_FW_HEALTH_REG); - - netdev_err(bp->dev, - "Firmware not responding, status: 0x%x\n", - sts); - if (sts & FW_STATUS_REG_CRASHED_NO_MASTER) { - netdev_warn(bp->dev, "Firmware recover via OP-TEE requested\n"); - rc = bnxt_fw_reset_via_optee(bp); - if (!rc) - rc = bnxt_hwrm_ver_get(bp); - } - } + rc = bnxt_try_recover_fw(bp); + if (rc) + return rc; + rc = bnxt_hwrm_ver_get(bp); if (rc) return rc; } @@ -11415,6 +11520,12 @@ static void bnxt_reset_all(struct bnxt *bp) bp->fw_reset_timestamp = jiffies; } +static bool bnxt_fw_reset_timeout(struct bnxt *bp) +{ + return time_after(jiffies, bp->fw_reset_timestamp + + (bp->fw_reset_max_dsecs * HZ / 10)); +} + static void bnxt_fw_reset_task(struct work_struct *work) { struct bnxt *bp = container_of(work, struct bnxt, fw_reset_task.work); @@ -11436,8 +11547,7 @@ static void bnxt_fw_reset_task(struct work_struct *work) bp->fw_reset_timestamp)); goto fw_reset_abort; } else if (n > 0) { - if (time_after(jiffies, bp->fw_reset_timestamp + - (bp->fw_reset_max_dsecs * HZ / 10))) { + if (bnxt_fw_reset_timeout(bp)) { clear_bit(BNXT_STATE_IN_FW_RESET, &bp->state); bp->fw_reset_state = 0; netdev_err(bp->dev, "Firmware reset aborted, bnxt_get_registered_vfs() returns %d\n", @@ -11466,8 +11576,7 @@ static void bnxt_fw_reset_task(struct work_struct *work) val = bnxt_fw_health_readl(bp, BNXT_FW_HEALTH_REG); if (!(val & BNXT_FW_STATUS_SHUTDOWN) && - !time_after(jiffies, bp->fw_reset_timestamp + - (bp->fw_reset_max_dsecs * HZ / 10))) { + !bnxt_fw_reset_timeout(bp)) { bnxt_queue_fw_reset_work(bp, HZ / 5); return; } @@ -11509,8 +11618,7 @@ static void bnxt_fw_reset_task(struct work_struct *work) bp->hwrm_cmd_timeout = SHORT_HWRM_CMD_TIMEOUT; rc = __bnxt_hwrm_ver_get(bp, true); if (rc) { - if (time_after(jiffies, bp->fw_reset_timestamp + - (bp->fw_reset_max_dsecs * HZ / 10))) { + if (bnxt_fw_reset_timeout(bp)) { netdev_err(bp->dev, "Firmware reset aborted\n"); goto fw_reset_abort_status; } @@ -12542,9 +12650,6 @@ static int bnxt_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) dev->ethtool_ops = &bnxt_ethtool_ops; pci_set_drvdata(pdev, dev); - if (BNXT_PF(bp)) - bnxt_vpd_read_info(bp); - rc = bnxt_alloc_hwrm_resources(bp); if (rc) goto init_err_pci_clean; @@ -12556,6 +12661,9 @@ static int bnxt_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) if (rc) goto init_err_pci_clean; + if (BNXT_PF(bp)) + bnxt_vpd_read_info(bp); + if (BNXT_CHIP_P5(bp)) { bp->flags |= BNXT_FLAG_CHIP_P5; if (BNXT_CHIP_SR2(bp)) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.h b/drivers/net/ethernet/broadcom/bnxt/bnxt.h index 51996c85547e..4ef6888acdc6 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.h +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.h @@ -656,6 +656,7 @@ struct nqe_cn { #define BNXT_HWRM_MAX_REQ_LEN (bp->hwrm_max_req_len) #define BNXT_HWRM_SHORT_REQ_LEN sizeof(struct hwrm_short_input) #define DFLT_HWRM_CMD_TIMEOUT 500 +#define HWRM_CMD_MAX_TIMEOUT 40000 #define SHORT_HWRM_CMD_TIMEOUT 20 #define HWRM_CMD_TIMEOUT (bp->hwrm_cmd_timeout) #define HWRM_RESET_TIMEOUT ((HWRM_CMD_TIMEOUT) * 4) @@ -1345,9 +1346,14 @@ struct bnxt_test_info { #define BNXT_CAG_REG_LEGACY_INT_STATUS 0x4014 #define BNXT_CAG_REG_BASE 0x300000 +#define BNXT_GRC_REG_STATUS_P5 0x520 + #define BNXT_GRCPF_REG_KONG_COMM 0xA00 #define BNXT_GRCPF_REG_KONG_COMM_TRIGGER 0xB00 +#define BNXT_GRC_REG_CHIP_NUM 0x48 +#define BNXT_GRC_REG_BASE 0x260000 + #define BNXT_GRC_BASE_MASK 0xfffff000 #define BNXT_GRC_OFFSET_MASK 0x00000ffc @@ -1441,6 +1447,8 @@ struct bnxt_ctx_pg_info { #define BNXT_MAX_TQM_RINGS \ (BNXT_MAX_TQM_SP_RINGS + BNXT_MAX_TQM_FP_RINGS) +#define BNXT_BACKING_STORE_CFG_LEGACY_LEN 256 + struct bnxt_ctx_mem_info { u32 qp_max_entries; u16 qp_min_qp1_entries; @@ -1532,9 +1540,22 @@ struct bnxt_fw_reporter_ctx { #define BNXT_FW_HEALTH_WIN_OFF(reg) (BNXT_FW_HEALTH_WIN_BASE + \ ((reg) & BNXT_GRC_OFFSET_MASK)) +#define BNXT_FW_STATUS_HEALTH_MSK 0xffff #define BNXT_FW_STATUS_HEALTHY 0x8000 #define BNXT_FW_STATUS_SHUTDOWN 0x100000 +#define BNXT_FW_IS_HEALTHY(sts) (((sts) & BNXT_FW_STATUS_HEALTH_MSK) ==\ + BNXT_FW_STATUS_HEALTHY) + +#define BNXT_FW_IS_BOOTING(sts) (((sts) & BNXT_FW_STATUS_HEALTH_MSK) < \ + BNXT_FW_STATUS_HEALTHY) + +#define BNXT_FW_IS_ERR(sts) (((sts) & BNXT_FW_STATUS_HEALTH_MSK) > \ + BNXT_FW_STATUS_HEALTHY) + +#define BNXT_FW_RETRY 5 +#define BNXT_FW_IF_RETRY 10 + struct bnxt { void __iomem *bar0; void __iomem *bar1; @@ -1788,6 +1809,7 @@ struct bnxt { #define BNXT_STATE_FW_FATAL_COND 6 #define BNXT_STATE_DRV_REGISTERED 7 #define BNXT_STATE_PCI_CHANNEL_IO_FROZEN 8 +#define BNXT_STATE_NAPI_DISABLED 9 #define BNXT_NO_FW_ACCESS(bp) \ (test_bit(BNXT_STATE_FW_FATAL_COND, &(bp)->state) || \ diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c index 6b7b69ed62db..90a31b4a3020 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c @@ -44,21 +44,20 @@ static int bnxt_fw_reporter_diagnose(struct devlink_health_reporter *reporter, struct netlink_ext_ack *extack) { struct bnxt *bp = devlink_health_reporter_priv(reporter); - u32 val, health_status; + u32 val; int rc; if (test_bit(BNXT_STATE_IN_FW_RESET, &bp->state)) return 0; val = bnxt_fw_health_readl(bp, BNXT_FW_HEALTH_REG); - health_status = val & 0xffff; - if (health_status < BNXT_FW_STATUS_HEALTHY) { + if (BNXT_FW_IS_BOOTING(val)) { rc = devlink_fmsg_string_pair_put(fmsg, "Description", "Not yet completed initialization"); if (rc) return rc; - } else if (health_status > BNXT_FW_STATUS_HEALTHY) { + } else if (BNXT_FW_IS_ERR(val)) { rc = devlink_fmsg_string_pair_put(fmsg, "Description", "Encountered fatal error and cannot recover"); if (rc) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_hsi.h b/drivers/net/ethernet/broadcom/bnxt/bnxt_hsi.h index 2d3e962bdac3..d5c6e6a3d22d 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_hsi.h +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_hsi.h @@ -2,7 +2,7 @@ * * Copyright (c) 2014-2016 Broadcom Corporation * Copyright (c) 2014-2018 Broadcom Limited - * Copyright (c) 2018-2020 Broadcom Inc. + * Copyright (c) 2018-2021 Broadcom Inc. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -164,6 +164,7 @@ struct cmd_nums { #define HWRM_VNIC_PLCMODES_CFG 0x48UL #define HWRM_VNIC_PLCMODES_QCFG 0x49UL #define HWRM_VNIC_QCAPS 0x4aUL + #define HWRM_VNIC_UPDATE 0x4bUL #define HWRM_RING_ALLOC 0x50UL #define HWRM_RING_FREE 0x51UL #define HWRM_RING_CMPL_RING_QAGGINT_PARAMS 0x52UL @@ -184,6 +185,9 @@ struct cmd_nums { #define HWRM_QUEUE_MPLS_QCAPS 0x80UL #define HWRM_QUEUE_MPLSTC2PRI_QCFG 0x81UL #define HWRM_QUEUE_MPLSTC2PRI_CFG 0x82UL + #define HWRM_QUEUE_VLANPRI_QCAPS 0x83UL + #define HWRM_QUEUE_VLANPRI2PRI_QCFG 0x84UL + #define HWRM_QUEUE_VLANPRI2PRI_CFG 0x85UL #define HWRM_CFA_L2_FILTER_ALLOC 0x90UL #define HWRM_CFA_L2_FILTER_FREE 0x91UL #define HWRM_CFA_L2_FILTER_CFG 0x92UL @@ -217,6 +221,8 @@ struct cmd_nums { #define HWRM_PORT_TX_FIR_CFG 0xbbUL #define HWRM_PORT_TX_FIR_QCFG 0xbcUL #define HWRM_PORT_ECN_QSTATS 0xbdUL + #define HWRM_FW_LIVEPATCH_QUERY 0xbeUL + #define HWRM_FW_LIVEPATCH 0xbfUL #define HWRM_FW_RESET 0xc0UL #define HWRM_FW_QSTATUS 0xc1UL #define HWRM_FW_HEALTH_CHECK 0xc2UL @@ -347,6 +353,8 @@ struct cmd_nums { #define HWRM_FUNC_HOST_PF_IDS_QUERY 0x197UL #define HWRM_FUNC_QSTATS_EXT 0x198UL #define HWRM_STAT_EXT_CTX_QUERY 0x199UL + #define HWRM_FUNC_SPD_CFG 0x19aUL + #define HWRM_FUNC_SPD_QCFG 0x19bUL #define HWRM_SELFTEST_QLIST 0x200UL #define HWRM_SELFTEST_EXEC 0x201UL #define HWRM_SELFTEST_IRQ 0x202UL @@ -359,6 +367,11 @@ struct cmd_nums { #define HWRM_MFG_HDMA_TEST 0x209UL #define HWRM_MFG_FRU_EEPROM_WRITE 0x20aUL #define HWRM_MFG_FRU_EEPROM_READ 0x20bUL + #define HWRM_MFG_SOC_IMAGE 0x20cUL + #define HWRM_MFG_SOC_QSTATUS 0x20dUL + #define HWRM_MFG_PARAM_SEEPROM_SYNC 0x20eUL + #define HWRM_MFG_PARAM_SEEPROM_READ 0x20fUL + #define HWRM_MFG_PARAM_SEEPROM_HEALTH 0x210UL #define HWRM_TF 0x2bcUL #define HWRM_TF_VERSION_GET 0x2bdUL #define HWRM_TF_SESSION_OPEN 0x2c6UL @@ -384,6 +397,7 @@ struct cmd_nums { #define HWRM_TF_EXT_EM_QCFG 0x2e9UL #define HWRM_TF_EM_INSERT 0x2eaUL #define HWRM_TF_EM_DELETE 0x2ebUL + #define HWRM_TF_EM_HASH_INSERT 0x2ecUL #define HWRM_TF_TCAM_SET 0x2f8UL #define HWRM_TF_TCAM_GET 0x2f9UL #define HWRM_TF_TCAM_MOVE 0x2faUL @@ -486,9 +500,9 @@ struct hwrm_err_output { #define HWRM_TARGET_ID_TOOLS 0xFFFD #define HWRM_VERSION_MAJOR 1 #define HWRM_VERSION_MINOR 10 -#define HWRM_VERSION_UPDATE 1 -#define HWRM_VERSION_RSVD 68 -#define HWRM_VERSION_STR "1.10.1.68" +#define HWRM_VERSION_UPDATE 2 +#define HWRM_VERSION_RSVD 11 +#define HWRM_VERSION_STR "1.10.2.11" /* hwrm_ver_get_input (size:192b/24B) */ struct hwrm_ver_get_input { @@ -563,8 +577,9 @@ struct hwrm_ver_get_output { __le16 max_resp_len; __le16 def_req_timeout; u8 flags; - #define VER_GET_RESP_FLAGS_DEV_NOT_RDY 0x1UL - #define VER_GET_RESP_FLAGS_EXT_VER_AVAIL 0x2UL + #define VER_GET_RESP_FLAGS_DEV_NOT_RDY 0x1UL + #define VER_GET_RESP_FLAGS_EXT_VER_AVAIL 0x2UL + #define VER_GET_RESP_FLAGS_DEV_NOT_RDY_BACKING_STORE 0x4UL u8 unused_0[2]; u8 always_1; __le16 hwrm_intf_major; @@ -708,6 +723,7 @@ struct hwrm_async_event_cmpl { #define ASYNC_EVENT_CMPL_EVENT_ID_QUIESCE_DONE 0x3fUL #define ASYNC_EVENT_CMPL_EVENT_ID_DEFERRED_RESPONSE 0x40UL #define ASYNC_EVENT_CMPL_EVENT_ID_PFC_WATCHDOG_CFG_CHANGE 0x41UL + #define ASYNC_EVENT_CMPL_EVENT_ID_MAX_RGTR_EVENT_ID 0x42UL #define ASYNC_EVENT_CMPL_EVENT_ID_FW_TRACE_MSG 0xfeUL #define ASYNC_EVENT_CMPL_EVENT_ID_HWRM_ERROR 0xffUL #define ASYNC_EVENT_CMPL_EVENT_ID_LAST ASYNC_EVENT_CMPL_EVENT_ID_HWRM_ERROR @@ -815,6 +831,8 @@ struct hwrm_async_event_cmpl_reset_notify { #define ASYNC_EVENT_CMPL_RESET_NOTIFY_EVENT_ID_RESET_NOTIFY 0x8UL #define ASYNC_EVENT_CMPL_RESET_NOTIFY_EVENT_ID_LAST ASYNC_EVENT_CMPL_RESET_NOTIFY_EVENT_ID_RESET_NOTIFY __le32 event_data2; + #define ASYNC_EVENT_CMPL_RESET_NOTIFY_EVENT_DATA2_FW_STATUS_CODE_MASK 0xffffUL + #define ASYNC_EVENT_CMPL_RESET_NOTIFY_EVENT_DATA2_FW_STATUS_CODE_SFT 0 u8 opaque_v; #define ASYNC_EVENT_CMPL_RESET_NOTIFY_V 0x1UL #define ASYNC_EVENT_CMPL_RESET_NOTIFY_OPAQUE_MASK 0xfeUL @@ -832,7 +850,8 @@ struct hwrm_async_event_cmpl_reset_notify { #define ASYNC_EVENT_CMPL_RESET_NOTIFY_EVENT_DATA1_REASON_CODE_MANAGEMENT_RESET_REQUEST (0x1UL << 8) #define ASYNC_EVENT_CMPL_RESET_NOTIFY_EVENT_DATA1_REASON_CODE_FW_EXCEPTION_FATAL (0x2UL << 8) #define ASYNC_EVENT_CMPL_RESET_NOTIFY_EVENT_DATA1_REASON_CODE_FW_EXCEPTION_NON_FATAL (0x3UL << 8) - #define ASYNC_EVENT_CMPL_RESET_NOTIFY_EVENT_DATA1_REASON_CODE_LAST ASYNC_EVENT_CMPL_RESET_NOTIFY_EVENT_DATA1_REASON_CODE_FW_EXCEPTION_NON_FATAL + #define ASYNC_EVENT_CMPL_RESET_NOTIFY_EVENT_DATA1_REASON_CODE_FAST_RESET (0x4UL << 8) + #define ASYNC_EVENT_CMPL_RESET_NOTIFY_EVENT_DATA1_REASON_CODE_LAST ASYNC_EVENT_CMPL_RESET_NOTIFY_EVENT_DATA1_REASON_CODE_FAST_RESET #define ASYNC_EVENT_CMPL_RESET_NOTIFY_EVENT_DATA1_DELAY_IN_100MS_TICKS_MASK 0xffff0000UL #define ASYNC_EVENT_CMPL_RESET_NOTIFY_EVENT_DATA1_DELAY_IN_100MS_TICKS_SFT 16 }; @@ -1271,6 +1290,10 @@ struct hwrm_func_qcaps_output { #define FUNC_QCAPS_RESP_FLAGS_EXT_TX_PROXY_SRC_INTF_OVERRIDE_SUPPORT 0x20UL #define FUNC_QCAPS_RESP_FLAGS_EXT_SCHQ_SUPPORTED 0x40UL #define FUNC_QCAPS_RESP_FLAGS_EXT_PPP_PUSH_MODE_SUPPORTED 0x80UL + #define FUNC_QCAPS_RESP_FLAGS_EXT_EVB_MODE_CFG_NOT_SUPPORTED 0x100UL + #define FUNC_QCAPS_RESP_FLAGS_EXT_SOC_SPD_SUPPORTED 0x200UL + #define FUNC_QCAPS_RESP_FLAGS_EXT_FW_LIVEPATCH_SUPPORTED 0x400UL + #define FUNC_QCAPS_RESP_FLAGS_EXT_FAST_RESET_CAPABLE 0x800UL u8 max_schqs; u8 mpc_chnls_cap; #define FUNC_QCAPS_RESP_MPC_CHNLS_CAP_TCE 0x1UL @@ -1315,6 +1338,7 @@ struct hwrm_func_qcfg_output { #define FUNC_QCFG_RESP_FLAGS_HOT_RESET_ALLOWED 0x200UL #define FUNC_QCFG_RESP_FLAGS_PPP_PUSH_MODE_ENABLED 0x400UL #define FUNC_QCFG_RESP_FLAGS_RING_MONITOR_ENABLED 0x800UL + #define FUNC_QCFG_RESP_FLAGS_FAST_RESET_ALLOWED 0x1000UL u8 mac_address[6]; __le16 pci_id; __le16 alloc_rsscos_ctx; @@ -1731,6 +1755,7 @@ struct hwrm_func_drv_rgtr_input { #define FUNC_DRV_RGTR_REQ_FLAGS_HOT_RESET_SUPPORT 0x10UL #define FUNC_DRV_RGTR_REQ_FLAGS_ERROR_RECOVERY_SUPPORT 0x20UL #define FUNC_DRV_RGTR_REQ_FLAGS_MASTER_SUPPORT 0x40UL + #define FUNC_DRV_RGTR_REQ_FLAGS_FAST_RESET_SUPPORT 0x80UL __le32 enables; #define FUNC_DRV_RGTR_REQ_ENABLES_OS_TYPE 0x1UL #define FUNC_DRV_RGTR_REQ_ENABLES_VER 0x2UL @@ -1993,7 +2018,7 @@ struct hwrm_func_backing_store_qcaps_input { __le64 resp_addr; }; -/* hwrm_func_backing_store_qcaps_output (size:640b/80B) */ +/* hwrm_func_backing_store_qcaps_output (size:704b/88B) */ struct hwrm_func_backing_store_qcaps_output { __le16 error_code; __le16 req_type; @@ -2024,13 +2049,25 @@ struct hwrm_func_backing_store_qcaps_output { __le16 mrav_num_entries_units; u8 tqm_entries_multiple; u8 ctx_kind_initializer; - __le32 rsvd; - __le16 rsvd1; + __le16 ctx_init_mask; + #define FUNC_BACKING_STORE_QCAPS_RESP_CTX_INIT_MASK_QP 0x1UL + #define FUNC_BACKING_STORE_QCAPS_RESP_CTX_INIT_MASK_SRQ 0x2UL + #define FUNC_BACKING_STORE_QCAPS_RESP_CTX_INIT_MASK_CQ 0x4UL + #define FUNC_BACKING_STORE_QCAPS_RESP_CTX_INIT_MASK_VNIC 0x8UL + #define FUNC_BACKING_STORE_QCAPS_RESP_CTX_INIT_MASK_STAT 0x10UL + #define FUNC_BACKING_STORE_QCAPS_RESP_CTX_INIT_MASK_MRAV 0x20UL + u8 qp_init_offset; + u8 srq_init_offset; + u8 cq_init_offset; + u8 vnic_init_offset; u8 tqm_fp_rings_count; + u8 stat_init_offset; + u8 mrav_init_offset; + u8 rsvd[6]; u8 valid; }; -/* hwrm_func_backing_store_cfg_input (size:2048b/256B) */ +/* hwrm_func_backing_store_cfg_input (size:2432b/304B) */ struct hwrm_func_backing_store_cfg_input { __le16 req_type; __le16 cmpl_ring; @@ -2041,22 +2078,25 @@ struct hwrm_func_backing_store_cfg_input { #define FUNC_BACKING_STORE_CFG_REQ_FLAGS_PREBOOT_MODE 0x1UL #define FUNC_BACKING_STORE_CFG_REQ_FLAGS_MRAV_RESERVATION_SPLIT 0x2UL __le32 enables; - #define FUNC_BACKING_STORE_CFG_REQ_ENABLES_QP 0x1UL - #define FUNC_BACKING_STORE_CFG_REQ_ENABLES_SRQ 0x2UL - #define FUNC_BACKING_STORE_CFG_REQ_ENABLES_CQ 0x4UL - #define FUNC_BACKING_STORE_CFG_REQ_ENABLES_VNIC 0x8UL - #define FUNC_BACKING_STORE_CFG_REQ_ENABLES_STAT 0x10UL - #define FUNC_BACKING_STORE_CFG_REQ_ENABLES_TQM_SP 0x20UL - #define FUNC_BACKING_STORE_CFG_REQ_ENABLES_TQM_RING0 0x40UL - #define FUNC_BACKING_STORE_CFG_REQ_ENABLES_TQM_RING1 0x80UL - #define FUNC_BACKING_STORE_CFG_REQ_ENABLES_TQM_RING2 0x100UL - #define FUNC_BACKING_STORE_CFG_REQ_ENABLES_TQM_RING3 0x200UL - #define FUNC_BACKING_STORE_CFG_REQ_ENABLES_TQM_RING4 0x400UL - #define FUNC_BACKING_STORE_CFG_REQ_ENABLES_TQM_RING5 0x800UL - #define FUNC_BACKING_STORE_CFG_REQ_ENABLES_TQM_RING6 0x1000UL - #define FUNC_BACKING_STORE_CFG_REQ_ENABLES_TQM_RING7 0x2000UL - #define FUNC_BACKING_STORE_CFG_REQ_ENABLES_MRAV 0x4000UL - #define FUNC_BACKING_STORE_CFG_REQ_ENABLES_TIM 0x8000UL + #define FUNC_BACKING_STORE_CFG_REQ_ENABLES_QP 0x1UL + #define FUNC_BACKING_STORE_CFG_REQ_ENABLES_SRQ 0x2UL + #define FUNC_BACKING_STORE_CFG_REQ_ENABLES_CQ 0x4UL + #define FUNC_BACKING_STORE_CFG_REQ_ENABLES_VNIC 0x8UL + #define FUNC_BACKING_STORE_CFG_REQ_ENABLES_STAT 0x10UL + #define FUNC_BACKING_STORE_CFG_REQ_ENABLES_TQM_SP 0x20UL + #define FUNC_BACKING_STORE_CFG_REQ_ENABLES_TQM_RING0 0x40UL + #define FUNC_BACKING_STORE_CFG_REQ_ENABLES_TQM_RING1 0x80UL + #define FUNC_BACKING_STORE_CFG_REQ_ENABLES_TQM_RING2 0x100UL + #define FUNC_BACKING_STORE_CFG_REQ_ENABLES_TQM_RING3 0x200UL + #define FUNC_BACKING_STORE_CFG_REQ_ENABLES_TQM_RING4 0x400UL + #define FUNC_BACKING_STORE_CFG_REQ_ENABLES_TQM_RING5 0x800UL + #define FUNC_BACKING_STORE_CFG_REQ_ENABLES_TQM_RING6 0x1000UL + #define FUNC_BACKING_STORE_CFG_REQ_ENABLES_TQM_RING7 0x2000UL + #define FUNC_BACKING_STORE_CFG_REQ_ENABLES_MRAV 0x4000UL + #define FUNC_BACKING_STORE_CFG_REQ_ENABLES_TIM 0x8000UL + #define FUNC_BACKING_STORE_CFG_REQ_ENABLES_TQM_RING8 0x10000UL + #define FUNC_BACKING_STORE_CFG_REQ_ENABLES_TQM_RING9 0x20000UL + #define FUNC_BACKING_STORE_CFG_REQ_ENABLES_TQM_RING10 0x40000UL u8 qpc_pg_size_qpc_lvl; #define FUNC_BACKING_STORE_CFG_REQ_QPC_LVL_MASK 0xfUL #define FUNC_BACKING_STORE_CFG_REQ_QPC_LVL_SFT 0 @@ -2358,6 +2398,63 @@ struct hwrm_func_backing_store_cfg_input { __le16 tqm_entry_size; __le16 mrav_entry_size; __le16 tim_entry_size; + u8 tqm_ring8_pg_size_tqm_ring_lvl; + #define FUNC_BACKING_STORE_CFG_REQ_RING8_TQM_RING_LVL_MASK 0xfUL + #define FUNC_BACKING_STORE_CFG_REQ_RING8_TQM_RING_LVL_SFT 0 + #define FUNC_BACKING_STORE_CFG_REQ_RING8_TQM_RING_LVL_LVL_0 0x0UL + #define FUNC_BACKING_STORE_CFG_REQ_RING8_TQM_RING_LVL_LVL_1 0x1UL + #define FUNC_BACKING_STORE_CFG_REQ_RING8_TQM_RING_LVL_LVL_2 0x2UL + #define FUNC_BACKING_STORE_CFG_REQ_RING8_TQM_RING_LVL_LAST FUNC_BACKING_STORE_CFG_REQ_RING8_TQM_RING_LVL_LVL_2 + #define FUNC_BACKING_STORE_CFG_REQ_RING8_TQM_RING_PG_SIZE_MASK 0xf0UL + #define FUNC_BACKING_STORE_CFG_REQ_RING8_TQM_RING_PG_SIZE_SFT 4 + #define FUNC_BACKING_STORE_CFG_REQ_RING8_TQM_RING_PG_SIZE_PG_4K (0x0UL << 4) + #define FUNC_BACKING_STORE_CFG_REQ_RING8_TQM_RING_PG_SIZE_PG_8K (0x1UL << 4) + #define FUNC_BACKING_STORE_CFG_REQ_RING8_TQM_RING_PG_SIZE_PG_64K (0x2UL << 4) + #define FUNC_BACKING_STORE_CFG_REQ_RING8_TQM_RING_PG_SIZE_PG_2M (0x3UL << 4) + #define FUNC_BACKING_STORE_CFG_REQ_RING8_TQM_RING_PG_SIZE_PG_8M (0x4UL << 4) + #define FUNC_BACKING_STORE_CFG_REQ_RING8_TQM_RING_PG_SIZE_PG_1G (0x5UL << 4) + #define FUNC_BACKING_STORE_CFG_REQ_RING8_TQM_RING_PG_SIZE_LAST FUNC_BACKING_STORE_CFG_REQ_RING8_TQM_RING_PG_SIZE_PG_1G + u8 ring8_unused[3]; + __le32 tqm_ring8_num_entries; + __le64 tqm_ring8_page_dir; + u8 tqm_ring9_pg_size_tqm_ring_lvl; + #define FUNC_BACKING_STORE_CFG_REQ_RING9_TQM_RING_LVL_MASK 0xfUL + #define FUNC_BACKING_STORE_CFG_REQ_RING9_TQM_RING_LVL_SFT 0 + #define FUNC_BACKING_STORE_CFG_REQ_RING9_TQM_RING_LVL_LVL_0 0x0UL + #define FUNC_BACKING_STORE_CFG_REQ_RING9_TQM_RING_LVL_LVL_1 0x1UL + #define FUNC_BACKING_STORE_CFG_REQ_RING9_TQM_RING_LVL_LVL_2 0x2UL + #define FUNC_BACKING_STORE_CFG_REQ_RING9_TQM_RING_LVL_LAST FUNC_BACKING_STORE_CFG_REQ_RING9_TQM_RING_LVL_LVL_2 + #define FUNC_BACKING_STORE_CFG_REQ_RING9_TQM_RING_PG_SIZE_MASK 0xf0UL + #define FUNC_BACKING_STORE_CFG_REQ_RING9_TQM_RING_PG_SIZE_SFT 4 + #define FUNC_BACKING_STORE_CFG_REQ_RING9_TQM_RING_PG_SIZE_PG_4K (0x0UL << 4) + #define FUNC_BACKING_STORE_CFG_REQ_RING9_TQM_RING_PG_SIZE_PG_8K (0x1UL << 4) + #define FUNC_BACKING_STORE_CFG_REQ_RING9_TQM_RING_PG_SIZE_PG_64K (0x2UL << 4) + #define FUNC_BACKING_STORE_CFG_REQ_RING9_TQM_RING_PG_SIZE_PG_2M (0x3UL << 4) + #define FUNC_BACKING_STORE_CFG_REQ_RING9_TQM_RING_PG_SIZE_PG_8M (0x4UL << 4) + #define FUNC_BACKING_STORE_CFG_REQ_RING9_TQM_RING_PG_SIZE_PG_1G (0x5UL << 4) + #define FUNC_BACKING_STORE_CFG_REQ_RING9_TQM_RING_PG_SIZE_LAST FUNC_BACKING_STORE_CFG_REQ_RING9_TQM_RING_PG_SIZE_PG_1G + u8 ring9_unused[3]; + __le32 tqm_ring9_num_entries; + __le64 tqm_ring9_page_dir; + u8 tqm_ring10_pg_size_tqm_ring_lvl; + #define FUNC_BACKING_STORE_CFG_REQ_RING10_TQM_RING_LVL_MASK 0xfUL + #define FUNC_BACKING_STORE_CFG_REQ_RING10_TQM_RING_LVL_SFT 0 + #define FUNC_BACKING_STORE_CFG_REQ_RING10_TQM_RING_LVL_LVL_0 0x0UL + #define FUNC_BACKING_STORE_CFG_REQ_RING10_TQM_RING_LVL_LVL_1 0x1UL + #define FUNC_BACKING_STORE_CFG_REQ_RING10_TQM_RING_LVL_LVL_2 0x2UL + #define FUNC_BACKING_STORE_CFG_REQ_RING10_TQM_RING_LVL_LAST FUNC_BACKING_STORE_CFG_REQ_RING10_TQM_RING_LVL_LVL_2 + #define FUNC_BACKING_STORE_CFG_REQ_RING10_TQM_RING_PG_SIZE_MASK 0xf0UL + #define FUNC_BACKING_STORE_CFG_REQ_RING10_TQM_RING_PG_SIZE_SFT 4 + #define FUNC_BACKING_STORE_CFG_REQ_RING10_TQM_RING_PG_SIZE_PG_4K (0x0UL << 4) + #define FUNC_BACKING_STORE_CFG_REQ_RING10_TQM_RING_PG_SIZE_PG_8K (0x1UL << 4) + #define FUNC_BACKING_STORE_CFG_REQ_RING10_TQM_RING_PG_SIZE_PG_64K (0x2UL << 4) + #define FUNC_BACKING_STORE_CFG_REQ_RING10_TQM_RING_PG_SIZE_PG_2M (0x3UL << 4) + #define FUNC_BACKING_STORE_CFG_REQ_RING10_TQM_RING_PG_SIZE_PG_8M (0x4UL << 4) + #define FUNC_BACKING_STORE_CFG_REQ_RING10_TQM_RING_PG_SIZE_PG_1G (0x5UL << 4) + #define FUNC_BACKING_STORE_CFG_REQ_RING10_TQM_RING_PG_SIZE_LAST FUNC_BACKING_STORE_CFG_REQ_RING10_TQM_RING_PG_SIZE_PG_1G + u8 ring10_unused[3]; + __le32 tqm_ring10_num_entries; + __le64 tqm_ring10_page_dir; }; /* hwrm_func_backing_store_cfg_output (size:128b/16B) */ @@ -2930,6 +3027,7 @@ struct hwrm_port_phy_qcfg_output { #define PORT_PHY_QCFG_RESP_DUPLEX_STATE_LAST PORT_PHY_QCFG_RESP_DUPLEX_STATE_FULL u8 option_flags; #define PORT_PHY_QCFG_RESP_OPTION_FLAGS_MEDIA_AUTO_DETECT 0x1UL + #define PORT_PHY_QCFG_RESP_OPTION_FLAGS_SIGNAL_MODE_KNOWN 0x2UL char phy_vendor_name[16]; char phy_vendor_partnumber[16]; __le16 support_pam4_speeds; @@ -3528,8 +3626,8 @@ struct hwrm_port_phy_qcaps_output { #define PORT_PHY_QCAPS_RESP_FLAGS_SHARED_PHY_CFG_SUPPORTED 0x8UL #define PORT_PHY_QCAPS_RESP_FLAGS_CUMULATIVE_COUNTERS_ON_RESET 0x10UL #define PORT_PHY_QCAPS_RESP_FLAGS_LOCAL_LPBK_NOT_SUPPORTED 0x20UL - #define PORT_PHY_QCAPS_RESP_FLAGS_RSVD1_MASK 0xc0UL - #define PORT_PHY_QCAPS_RESP_FLAGS_RSVD1_SFT 6 + #define PORT_PHY_QCAPS_RESP_FLAGS_FW_MANAGED_LINK_DOWN 0x40UL + #define PORT_PHY_QCAPS_RESP_FLAGS_RSVD1 0x80UL u8 port_cnt; #define PORT_PHY_QCAPS_RESP_PORT_CNT_UNKNOWN 0x0UL #define PORT_PHY_QCAPS_RESP_PORT_CNT_1 0x1UL @@ -4119,7 +4217,10 @@ struct hwrm_queue_qportcfg_output { #define QUEUE_QPORTCFG_RESP_QUEUE_ID7_SERVICE_PROFILE_LOSSLESS_NIC 0x3UL #define QUEUE_QPORTCFG_RESP_QUEUE_ID7_SERVICE_PROFILE_UNKNOWN 0xffUL #define QUEUE_QPORTCFG_RESP_QUEUE_ID7_SERVICE_PROFILE_LAST QUEUE_QPORTCFG_RESP_QUEUE_ID7_SERVICE_PROFILE_UNKNOWN - u8 unused_0; + u8 queue_id0_service_profile_type; + #define QUEUE_QPORTCFG_RESP_QUEUE_ID0_SERVICE_PROFILE_TYPE_ROCE 0x1UL + #define QUEUE_QPORTCFG_RESP_QUEUE_ID0_SERVICE_PROFILE_TYPE_NIC 0x2UL + #define QUEUE_QPORTCFG_RESP_QUEUE_ID0_SERVICE_PROFILE_TYPE_CNP 0x4UL char qid0_name[16]; char qid1_name[16]; char qid2_name[16]; @@ -4128,7 +4229,34 @@ struct hwrm_queue_qportcfg_output { char qid5_name[16]; char qid6_name[16]; char qid7_name[16]; - u8 unused_1[7]; + u8 queue_id1_service_profile_type; + #define QUEUE_QPORTCFG_RESP_QUEUE_ID1_SERVICE_PROFILE_TYPE_ROCE 0x1UL + #define QUEUE_QPORTCFG_RESP_QUEUE_ID1_SERVICE_PROFILE_TYPE_NIC 0x2UL + #define QUEUE_QPORTCFG_RESP_QUEUE_ID1_SERVICE_PROFILE_TYPE_CNP 0x4UL + u8 queue_id2_service_profile_type; + #define QUEUE_QPORTCFG_RESP_QUEUE_ID2_SERVICE_PROFILE_TYPE_ROCE 0x1UL + #define QUEUE_QPORTCFG_RESP_QUEUE_ID2_SERVICE_PROFILE_TYPE_NIC 0x2UL + #define QUEUE_QPORTCFG_RESP_QUEUE_ID2_SERVICE_PROFILE_TYPE_CNP 0x4UL + u8 queue_id3_service_profile_type; + #define QUEUE_QPORTCFG_RESP_QUEUE_ID3_SERVICE_PROFILE_TYPE_ROCE 0x1UL + #define QUEUE_QPORTCFG_RESP_QUEUE_ID3_SERVICE_PROFILE_TYPE_NIC 0x2UL + #define QUEUE_QPORTCFG_RESP_QUEUE_ID3_SERVICE_PROFILE_TYPE_CNP 0x4UL + u8 queue_id4_service_profile_type; + #define QUEUE_QPORTCFG_RESP_QUEUE_ID4_SERVICE_PROFILE_TYPE_ROCE 0x1UL + #define QUEUE_QPORTCFG_RESP_QUEUE_ID4_SERVICE_PROFILE_TYPE_NIC 0x2UL + #define QUEUE_QPORTCFG_RESP_QUEUE_ID4_SERVICE_PROFILE_TYPE_CNP 0x4UL + u8 queue_id5_service_profile_type; + #define QUEUE_QPORTCFG_RESP_QUEUE_ID5_SERVICE_PROFILE_TYPE_ROCE 0x1UL + #define QUEUE_QPORTCFG_RESP_QUEUE_ID5_SERVICE_PROFILE_TYPE_NIC 0x2UL + #define QUEUE_QPORTCFG_RESP_QUEUE_ID5_SERVICE_PROFILE_TYPE_CNP 0x4UL + u8 queue_id6_service_profile_type; + #define QUEUE_QPORTCFG_RESP_QUEUE_ID6_SERVICE_PROFILE_TYPE_ROCE 0x1UL + #define QUEUE_QPORTCFG_RESP_QUEUE_ID6_SERVICE_PROFILE_TYPE_NIC 0x2UL + #define QUEUE_QPORTCFG_RESP_QUEUE_ID6_SERVICE_PROFILE_TYPE_CNP 0x4UL + u8 queue_id7_service_profile_type; + #define QUEUE_QPORTCFG_RESP_QUEUE_ID7_SERVICE_PROFILE_TYPE_ROCE 0x1UL + #define QUEUE_QPORTCFG_RESP_QUEUE_ID7_SERVICE_PROFILE_TYPE_NIC 0x2UL + #define QUEUE_QPORTCFG_RESP_QUEUE_ID7_SERVICE_PROFILE_TYPE_CNP 0x4UL u8 valid; }; @@ -5142,8 +5270,10 @@ struct hwrm_vnic_alloc_input { __le16 target_id; __le64 resp_addr; __le32 flags; - #define VNIC_ALLOC_REQ_FLAGS_DEFAULT 0x1UL - u8 unused_0[4]; + #define VNIC_ALLOC_REQ_FLAGS_DEFAULT 0x1UL + #define VNIC_ALLOC_REQ_FLAGS_VIRTIO_NET_FID_VALID 0x2UL + __le16 virtio_net_fid; + u8 unused_0[2]; }; /* hwrm_vnic_alloc_output (size:128b/16B) */ @@ -5260,6 +5390,8 @@ struct hwrm_vnic_qcaps_output { #define VNIC_QCAPS_RESP_FLAGS_OUTERMOST_RSS_CAP 0x80UL #define VNIC_QCAPS_RESP_FLAGS_COS_ASSIGNMENT_CAP 0x100UL #define VNIC_QCAPS_RESP_FLAGS_RX_CMPL_V2_CAP 0x200UL + #define VNIC_QCAPS_RESP_FLAGS_VNIC_STATE_CAP 0x400UL + #define VNIC_QCAPS_RESP_FLAGS_VIRTIO_NET_VNIC_ALLOC_CAP 0x800UL __le16 max_aggs_supported; u8 unused_1[5]; u8 valid; @@ -5585,7 +5717,11 @@ struct hwrm_ring_alloc_output { __le16 resp_len; __le16 ring_id; __le16 logical_ring_id; - u8 unused_0[3]; + u8 push_buffer_index; + #define RING_ALLOC_RESP_PUSH_BUFFER_INDEX_PING_BUFFER 0x0UL + #define RING_ALLOC_RESP_PUSH_BUFFER_INDEX_PONG_BUFFER 0x1UL + #define RING_ALLOC_RESP_PUSH_BUFFER_INDEX_LAST RING_ALLOC_RESP_PUSH_BUFFER_INDEX_PONG_BUFFER + u8 unused_0[2]; u8 valid; }; @@ -5644,7 +5780,11 @@ struct hwrm_ring_reset_output { __le16 req_type; __le16 seq_id; __le16 resp_len; - u8 unused_0[4]; + u8 push_buffer_index; + #define RING_RESET_RESP_PUSH_BUFFER_INDEX_PING_BUFFER 0x0UL + #define RING_RESET_RESP_PUSH_BUFFER_INDEX_PONG_BUFFER 0x1UL + #define RING_RESET_RESP_PUSH_BUFFER_INDEX_LAST RING_RESET_RESP_PUSH_BUFFER_INDEX_PONG_BUFFER + u8 unused_0[3]; u8 consumer_idx[3]; u8 valid; }; @@ -6988,21 +7128,23 @@ struct hwrm_cfa_adv_flow_mgnt_qcaps_output { __le16 seq_id; __le16 resp_len; __le32 flags; - #define CFA_ADV_FLOW_MGNT_QCAPS_RESP_FLAGS_FLOW_HND_16BIT_SUPPORTED 0x1UL - #define CFA_ADV_FLOW_MGNT_QCAPS_RESP_FLAGS_FLOW_HND_64BIT_SUPPORTED 0x2UL - #define CFA_ADV_FLOW_MGNT_QCAPS_RESP_FLAGS_FLOW_BATCH_DELETE_SUPPORTED 0x4UL - #define CFA_ADV_FLOW_MGNT_QCAPS_RESP_FLAGS_FLOW_RESET_ALL_SUPPORTED 0x8UL - #define CFA_ADV_FLOW_MGNT_QCAPS_RESP_FLAGS_NTUPLE_FLOW_DEST_FUNC_SUPPORTED 0x10UL - #define CFA_ADV_FLOW_MGNT_QCAPS_RESP_FLAGS_TX_EEM_FLOW_SUPPORTED 0x20UL - #define CFA_ADV_FLOW_MGNT_QCAPS_RESP_FLAGS_RX_EEM_FLOW_SUPPORTED 0x40UL - #define CFA_ADV_FLOW_MGNT_QCAPS_RESP_FLAGS_FLOW_COUNTER_ALLOC_SUPPORTED 0x80UL - #define CFA_ADV_FLOW_MGNT_QCAPS_RESP_FLAGS_RFS_RING_TBL_IDX_SUPPORTED 0x100UL - #define CFA_ADV_FLOW_MGNT_QCAPS_RESP_FLAGS_UNTAGGED_VLAN_SUPPORTED 0x200UL - #define CFA_ADV_FLOW_MGNT_QCAPS_RESP_FLAGS_XDP_SUPPORTED 0x400UL - #define CFA_ADV_FLOW_MGNT_QCAPS_RESP_FLAGS_L2_HEADER_SOURCE_FIELDS_SUPPORTED 0x800UL - #define CFA_ADV_FLOW_MGNT_QCAPS_RESP_FLAGS_NTUPLE_FLOW_RX_ARP_SUPPORTED 0x1000UL - #define CFA_ADV_FLOW_MGNT_QCAPS_RESP_FLAGS_RFS_RING_TBL_IDX_V2_SUPPORTED 0x2000UL - #define CFA_ADV_FLOW_MGNT_QCAPS_RESP_FLAGS_NTUPLE_FLOW_RX_ETHERTYPE_IP_SUPPORTED 0x4000UL + #define CFA_ADV_FLOW_MGNT_QCAPS_RESP_FLAGS_FLOW_HND_16BIT_SUPPORTED 0x1UL + #define CFA_ADV_FLOW_MGNT_QCAPS_RESP_FLAGS_FLOW_HND_64BIT_SUPPORTED 0x2UL + #define CFA_ADV_FLOW_MGNT_QCAPS_RESP_FLAGS_FLOW_BATCH_DELETE_SUPPORTED 0x4UL + #define CFA_ADV_FLOW_MGNT_QCAPS_RESP_FLAGS_FLOW_RESET_ALL_SUPPORTED 0x8UL + #define CFA_ADV_FLOW_MGNT_QCAPS_RESP_FLAGS_NTUPLE_FLOW_DEST_FUNC_SUPPORTED 0x10UL + #define CFA_ADV_FLOW_MGNT_QCAPS_RESP_FLAGS_TX_EEM_FLOW_SUPPORTED 0x20UL + #define CFA_ADV_FLOW_MGNT_QCAPS_RESP_FLAGS_RX_EEM_FLOW_SUPPORTED 0x40UL + #define CFA_ADV_FLOW_MGNT_QCAPS_RESP_FLAGS_FLOW_COUNTER_ALLOC_SUPPORTED 0x80UL + #define CFA_ADV_FLOW_MGNT_QCAPS_RESP_FLAGS_RFS_RING_TBL_IDX_SUPPORTED 0x100UL + #define CFA_ADV_FLOW_MGNT_QCAPS_RESP_FLAGS_UNTAGGED_VLAN_SUPPORTED 0x200UL + #define CFA_ADV_FLOW_MGNT_QCAPS_RESP_FLAGS_XDP_SUPPORTED 0x400UL + #define CFA_ADV_FLOW_MGNT_QCAPS_RESP_FLAGS_L2_HEADER_SOURCE_FIELDS_SUPPORTED 0x800UL + #define CFA_ADV_FLOW_MGNT_QCAPS_RESP_FLAGS_NTUPLE_FLOW_RX_ARP_SUPPORTED 0x1000UL + #define CFA_ADV_FLOW_MGNT_QCAPS_RESP_FLAGS_RFS_RING_TBL_IDX_V2_SUPPORTED 0x2000UL + #define CFA_ADV_FLOW_MGNT_QCAPS_RESP_FLAGS_NTUPLE_FLOW_RX_ETHERTYPE_IP_SUPPORTED 0x4000UL + #define CFA_ADV_FLOW_MGNT_QCAPS_RESP_FLAGS_TRUFLOW_CAPABLE 0x8000UL + #define CFA_ADV_FLOW_MGNT_QCAPS_RESP_FLAGS_L2_FILTER_TRAFFIC_TYPE_L2_ROCE_SUPPORTED 0x10000UL u8 unused_0[3]; u8 valid; }; @@ -7472,7 +7614,8 @@ struct hwrm_struct_hdr { #define STRUCT_HDR_STRUCT_ID_AFM_OPAQUE 0x1UL #define STRUCT_HDR_STRUCT_ID_PORT_DESCRIPTION 0xaUL #define STRUCT_HDR_STRUCT_ID_RSS_V2 0x64UL - #define STRUCT_HDR_STRUCT_ID_LAST STRUCT_HDR_STRUCT_ID_RSS_V2 + #define STRUCT_HDR_STRUCT_ID_MSIX_PER_VF 0xc8UL + #define STRUCT_HDR_STRUCT_ID_LAST STRUCT_HDR_STRUCT_ID_MSIX_PER_VF __le16 len; u8 version; u8 count; @@ -8000,6 +8143,9 @@ struct hwrm_dbg_coredump_initiate_output { struct coredump_data_hdr { __le32 address; __le32 flags_length; + #define COREDUMP_DATA_HDR_FLAGS_LENGTH_ACTUAL_LEN_MASK 0xffffffUL + #define COREDUMP_DATA_HDR_FLAGS_LENGTH_ACTUAL_LEN_SFT 0 + #define COREDUMP_DATA_HDR_FLAGS_LENGTH_INDIRECT_ACCESS 0x1000000UL __le32 instance; __le32 next_offset; }; @@ -8669,7 +8815,6 @@ struct hcomm_status { #define HCOMM_STATUS_TRUE_OFFSET_MASK 0xfffffffcUL #define HCOMM_STATUS_TRUE_OFFSET_SFT 2 }; - #define HCOMM_STATUS_STRUCT_LOC 0x31001F0UL #endif /* _BNXT_HSI_H_ */ diff --git a/drivers/net/ethernet/broadcom/tg3.c b/drivers/net/ethernet/broadcom/tg3.c index 5143cdd0eeca..8936c2bc6286 100644 --- a/drivers/net/ethernet/broadcom/tg3.c +++ b/drivers/net/ethernet/broadcom/tg3.c @@ -12826,11 +12826,13 @@ static __be32 *tg3_vpd_readblock(struct tg3 *tp, u32 *vpdlen) offset = tg3_nvram_logical_addr(tp, offset); } - } - if (!offset || !len) { - offset = TG3_NVM_VPD_OFF; - len = TG3_NVM_VPD_LEN; + if (!offset || !len) { + offset = TG3_NVM_VPD_OFF; + len = TG3_NVM_VPD_LEN; + } + } else { + len = TG3_NVM_PCI_VPD_MAX_LEN; } buf = kmalloc(len, GFP_KERNEL); @@ -12846,26 +12848,16 @@ static __be32 *tg3_vpd_readblock(struct tg3 *tp, u32 *vpdlen) if (tg3_nvram_read_be32(tp, offset + i, &buf[i/4])) goto error; } + *vpdlen = len; } else { - u8 *ptr; ssize_t cnt; - unsigned int pos = 0; - - ptr = (u8 *)&buf[0]; - for (i = 0; pos < len && i < 3; i++, pos += cnt, ptr += cnt) { - cnt = pci_read_vpd(tp->pdev, pos, - len - pos, ptr); - if (cnt == -ETIMEDOUT || cnt == -EINTR) - cnt = 0; - else if (cnt < 0) - goto error; - } - if (pos != len) + + cnt = pci_read_vpd(tp->pdev, 0, len, (u8 *)buf); + if (cnt < 0) goto error; + *vpdlen = cnt; } - *vpdlen = len; - return buf; error: diff --git a/drivers/net/ethernet/broadcom/tg3.h b/drivers/net/ethernet/broadcom/tg3.h index 1000c894064f..46ec4fdfd16a 100644 --- a/drivers/net/ethernet/broadcom/tg3.h +++ b/drivers/net/ethernet/broadcom/tg3.h @@ -2101,6 +2101,7 @@ /* Hardware Legacy NVRAM layout */ #define TG3_NVM_VPD_OFF 0x100 #define TG3_NVM_VPD_LEN 256 +#define TG3_NVM_PCI_VPD_MAX_LEN 512 /* Hardware Selfboot NVRAM layout */ #define TG3_NVM_HWSB_CFG1 0x00000004 diff --git a/drivers/net/ethernet/cadence/macb_main.c b/drivers/net/ethernet/cadence/macb_main.c index 814a5b10141d..472bf8f220bc 100644 --- a/drivers/net/ethernet/cadence/macb_main.c +++ b/drivers/net/ethernet/cadence/macb_main.c @@ -470,6 +470,10 @@ static void macb_set_tx_clk(struct macb *bp, int speed) if (!bp->tx_clk || (bp->caps & MACB_CAPS_CLK_HW_CHG)) return; + /* In case of MII the PHY is the clock master */ + if (bp->phy_interface == PHY_INTERFACE_MODE_MII) + return; + switch (speed) { case SPEED_10: rate = 2500000; diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c index 15542661e3d2..9f1965c80fb1 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c @@ -5137,7 +5137,7 @@ static int adap_init0(struct adapter *adap, int vpd_skip) /* See if FW supports FW_FILTER2 work request */ if (is_t4(adap->params.chip)) { - adap->params.filter2_wr_support = 0; + adap->params.filter2_wr_support = false; } else { params[0] = FW_PARAM_DEV(FILTER2_WR); ret = t4_query_params(adap, adap->mbox, adap->pf, 0, diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c b/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c index 98d01a7497ec..98829e482bfa 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c +++ b/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c @@ -2689,7 +2689,6 @@ void t4_get_regs(struct adapter *adap, void *buf, size_t buf_size) #define VPD_BASE 0x400 #define VPD_BASE_OLD 0 #define VPD_LEN 1024 -#define CHELSIO_VPD_UNIQUE_ID 0x82 /** * t4_eeprom_ptov - translate a physical EEPROM address to virtual @@ -2745,7 +2744,7 @@ int t4_get_raw_vpd_params(struct adapter *adapter, struct vpd_params *p) { int i, ret = 0, addr; int ec, sn, pn, na; - u8 *vpd, csum; + u8 *vpd, csum, base_val = 0; unsigned int vpdr_len, kw_offset, id_len; vpd = vmalloc(VPD_LEN); @@ -2755,17 +2754,11 @@ int t4_get_raw_vpd_params(struct adapter *adapter, struct vpd_params *p) /* Card information normally starts at VPD_BASE but early cards had * it at 0. */ - ret = pci_read_vpd(adapter->pdev, VPD_BASE, sizeof(u32), vpd); + ret = pci_read_vpd(adapter->pdev, VPD_BASE, 1, &base_val); if (ret < 0) goto out; - /* The VPD shall have a unique identifier specified by the PCI SIG. - * For chelsio adapters, the identifier is 0x82. The first byte of a VPD - * shall be CHELSIO_VPD_UNIQUE_ID (0x82). The VPD programming software - * is expected to automatically put this entry at the - * beginning of the VPD. - */ - addr = *vpd == CHELSIO_VPD_UNIQUE_ID ? VPD_BASE : VPD_BASE_OLD; + addr = base_val == PCI_VPD_LRDT_ID_STRING ? VPD_BASE : VPD_BASE_OLD; ret = pci_read_vpd(adapter->pdev, addr, VPD_LEN, vpd); if (ret < 0) diff --git a/drivers/net/ethernet/freescale/ucc_geth.c b/drivers/net/ethernet/freescale/ucc_geth.c index 6d853f018d53..ef4e2febeb5b 100644 --- a/drivers/net/ethernet/freescale/ucc_geth.c +++ b/drivers/net/ethernet/freescale/ucc_geth.c @@ -70,9 +70,32 @@ static struct { module_param_named(debug, debug.msg_enable, int, 0); MODULE_PARM_DESC(debug, "Debug verbosity level (0=none, ..., 0xffff=all)"); -static struct ucc_geth_info ugeth_primary_info = { +static int ucc_geth_thread_count(enum ucc_geth_num_of_threads idx) +{ + static const u8 count[] = { + [UCC_GETH_NUM_OF_THREADS_1] = 1, + [UCC_GETH_NUM_OF_THREADS_2] = 2, + [UCC_GETH_NUM_OF_THREADS_4] = 4, + [UCC_GETH_NUM_OF_THREADS_6] = 6, + [UCC_GETH_NUM_OF_THREADS_8] = 8, + }; + if (idx >= ARRAY_SIZE(count)) + return 0; + return count[idx]; +} + +static inline int ucc_geth_tx_queues(const struct ucc_geth_info *info) +{ + return 1; +} + +static inline int ucc_geth_rx_queues(const struct ucc_geth_info *info) +{ + return 1; +} + +static const struct ucc_geth_info ugeth_primary_info = { .uf_info = { - .bd_mem_part = MEM_PART_SYSTEM, .rtsm = UCC_FAST_SEND_IDLES_BETWEEN_FRAMES, .max_rx_buf_length = 1536, /* adjusted at startup if max-speed 1000 */ @@ -90,8 +113,6 @@ static struct ucc_geth_info ugeth_primary_info = { .tcrc = UCC_FAST_16_BIT_CRC, .synl = UCC_FAST_SYNC_LEN_NOT_USED, }, - .numQueuesTx = 1, - .numQueuesRx = 1, .extendedFilteringChainPointer = ((uint32_t) NULL), .typeorlen = 3072 /*1536 */ , .nonBackToBackIfgPart1 = 0x40, @@ -157,8 +178,6 @@ static struct ucc_geth_info ugeth_primary_info = { .riscRx = QE_RISC_ALLOCATION_RISC1_AND_RISC2, }; -static struct ucc_geth_info ugeth_info[8]; - #ifdef DEBUG static void mem_disp(u8 *addr, int size) { @@ -558,7 +577,7 @@ static void dump_bds(struct ucc_geth_private *ugeth) int i; int length; - for (i = 0; i < ugeth->ug_info->numQueuesTx; i++) { + for (i = 0; i < ucc_geth_tx_queues(ugeth->ug_info); i++) { if (ugeth->p_tx_bd_ring[i]) { length = (ugeth->ug_info->bdRingLenTx[i] * @@ -567,7 +586,7 @@ static void dump_bds(struct ucc_geth_private *ugeth) mem_disp(ugeth->p_tx_bd_ring[i], length); } } - for (i = 0; i < ugeth->ug_info->numQueuesRx; i++) { + for (i = 0; i < ucc_geth_rx_queues(ugeth->ug_info); i++) { if (ugeth->p_rx_bd_ring[i]) { length = (ugeth->ug_info->bdRingLenRx[i] * @@ -671,32 +690,12 @@ static void dump_regs(struct ucc_geth_private *ugeth) in_be32(&ugeth->ug_regs->scam)); if (ugeth->p_thread_data_tx) { - int numThreadsTxNumerical; - switch (ugeth->ug_info->numThreadsTx) { - case UCC_GETH_NUM_OF_THREADS_1: - numThreadsTxNumerical = 1; - break; - case UCC_GETH_NUM_OF_THREADS_2: - numThreadsTxNumerical = 2; - break; - case UCC_GETH_NUM_OF_THREADS_4: - numThreadsTxNumerical = 4; - break; - case UCC_GETH_NUM_OF_THREADS_6: - numThreadsTxNumerical = 6; - break; - case UCC_GETH_NUM_OF_THREADS_8: - numThreadsTxNumerical = 8; - break; - default: - numThreadsTxNumerical = 0; - break; - } + int count = ucc_geth_thread_count(ugeth->ug_info->numThreadsTx); pr_info("Thread data TXs:\n"); pr_info("Base address: 0x%08x\n", (u32)ugeth->p_thread_data_tx); - for (i = 0; i < numThreadsTxNumerical; i++) { + for (i = 0; i < count; i++) { pr_info("Thread data TX[%d]:\n", i); pr_info("Base address: 0x%08x\n", (u32)&ugeth->p_thread_data_tx[i]); @@ -705,32 +704,12 @@ static void dump_regs(struct ucc_geth_private *ugeth) } } if (ugeth->p_thread_data_rx) { - int numThreadsRxNumerical; - switch (ugeth->ug_info->numThreadsRx) { - case UCC_GETH_NUM_OF_THREADS_1: - numThreadsRxNumerical = 1; - break; - case UCC_GETH_NUM_OF_THREADS_2: - numThreadsRxNumerical = 2; - break; - case UCC_GETH_NUM_OF_THREADS_4: - numThreadsRxNumerical = 4; - break; - case UCC_GETH_NUM_OF_THREADS_6: - numThreadsRxNumerical = 6; - break; - case UCC_GETH_NUM_OF_THREADS_8: - numThreadsRxNumerical = 8; - break; - default: - numThreadsRxNumerical = 0; - break; - } + int count = ucc_geth_thread_count(ugeth->ug_info->numThreadsRx); pr_info("Thread data RX:\n"); pr_info("Base address: 0x%08x\n", (u32)ugeth->p_thread_data_rx); - for (i = 0; i < numThreadsRxNumerical; i++) { + for (i = 0; i < count; i++) { pr_info("Thread data RX[%d]:\n", i); pr_info("Base address: 0x%08x\n", (u32)&ugeth->p_thread_data_rx[i]); @@ -905,7 +884,7 @@ static void dump_regs(struct ucc_geth_private *ugeth) if (ugeth->p_send_q_mem_reg) { pr_info("Send Q memory registers:\n"); pr_info("Base address: 0x%08x\n", (u32)ugeth->p_send_q_mem_reg); - for (i = 0; i < ugeth->ug_info->numQueuesTx; i++) { + for (i = 0; i < ucc_geth_tx_queues(ugeth->ug_info); i++) { pr_info("SQQD[%d]:\n", i); pr_info("Base address: 0x%08x\n", (u32)&ugeth->p_send_q_mem_reg->sqqd[i]); @@ -937,7 +916,7 @@ static void dump_regs(struct ucc_geth_private *ugeth) pr_info("RX IRQ coalescing tables:\n"); pr_info("Base address: 0x%08x\n", (u32)ugeth->p_rx_irq_coalescing_tbl); - for (i = 0; i < ugeth->ug_info->numQueuesRx; i++) { + for (i = 0; i < ucc_geth_rx_queues(ugeth->ug_info); i++) { pr_info("RX IRQ coalescing table entry[%d]:\n", i); pr_info("Base address: 0x%08x\n", (u32)&ugeth->p_rx_irq_coalescing_tbl-> @@ -959,7 +938,7 @@ static void dump_regs(struct ucc_geth_private *ugeth) if (ugeth->p_rx_bd_qs_tbl) { pr_info("RX BD QS tables:\n"); pr_info("Base address: 0x%08x\n", (u32)ugeth->p_rx_bd_qs_tbl); - for (i = 0; i < ugeth->ug_info->numQueuesRx; i++) { + for (i = 0; i < ucc_geth_rx_queues(ugeth->ug_info); i++) { pr_info("RX BD QS table[%d]:\n", i); pr_info("Base address: 0x%08x\n", (u32)&ugeth->p_rx_bd_qs_tbl[i]); @@ -1835,7 +1814,7 @@ static void ucc_geth_free_rx(struct ucc_geth_private *ugeth) ug_info = ugeth->ug_info; uf_info = &ug_info->uf_info; - for (i = 0; i < ugeth->ug_info->numQueuesRx; i++) { + for (i = 0; i < ucc_geth_rx_queues(ugeth->ug_info); i++) { if (ugeth->p_rx_bd_ring[i]) { /* Return existing data buffers in ring */ bd = ugeth->p_rx_bd_ring[i]; @@ -1856,12 +1835,7 @@ static void ucc_geth_free_rx(struct ucc_geth_private *ugeth) kfree(ugeth->rx_skbuff[i]); - if (ugeth->ug_info->uf_info.bd_mem_part == - MEM_PART_SYSTEM) - kfree((void *)ugeth->rx_bd_ring_offset[i]); - else if (ugeth->ug_info->uf_info.bd_mem_part == - MEM_PART_MURAM) - qe_muram_free(ugeth->rx_bd_ring_offset[i]); + kfree(ugeth->p_rx_bd_ring[i]); ugeth->p_rx_bd_ring[i] = NULL; } } @@ -1880,7 +1854,7 @@ static void ucc_geth_free_tx(struct ucc_geth_private *ugeth) ug_info = ugeth->ug_info; uf_info = &ug_info->uf_info; - for (i = 0; i < ugeth->ug_info->numQueuesTx; i++) { + for (i = 0; i < ucc_geth_tx_queues(ugeth->ug_info); i++) { bd = ugeth->p_tx_bd_ring[i]; if (!bd) continue; @@ -1898,15 +1872,8 @@ static void ucc_geth_free_tx(struct ucc_geth_private *ugeth) kfree(ugeth->tx_skbuff[i]); - if (ugeth->p_tx_bd_ring[i]) { - if (ugeth->ug_info->uf_info.bd_mem_part == - MEM_PART_SYSTEM) - kfree((void *)ugeth->tx_bd_ring_offset[i]); - else if (ugeth->ug_info->uf_info.bd_mem_part == - MEM_PART_MURAM) - qe_muram_free(ugeth->tx_bd_ring_offset[i]); - ugeth->p_tx_bd_ring[i] = NULL; - } + kfree(ugeth->p_tx_bd_ring[i]); + ugeth->p_tx_bd_ring[i] = NULL; } } @@ -1921,50 +1888,39 @@ static void ucc_geth_memclean(struct ucc_geth_private *ugeth) ugeth->uccf = NULL; } - if (ugeth->p_thread_data_tx) { - qe_muram_free(ugeth->thread_dat_tx_offset); - ugeth->p_thread_data_tx = NULL; - } - if (ugeth->p_thread_data_rx) { - qe_muram_free(ugeth->thread_dat_rx_offset); - ugeth->p_thread_data_rx = NULL; - } - if (ugeth->p_exf_glbl_param) { - qe_muram_free(ugeth->exf_glbl_param_offset); - ugeth->p_exf_glbl_param = NULL; - } - if (ugeth->p_rx_glbl_pram) { - qe_muram_free(ugeth->rx_glbl_pram_offset); - ugeth->p_rx_glbl_pram = NULL; - } - if (ugeth->p_tx_glbl_pram) { - qe_muram_free(ugeth->tx_glbl_pram_offset); - ugeth->p_tx_glbl_pram = NULL; - } - if (ugeth->p_send_q_mem_reg) { - qe_muram_free(ugeth->send_q_mem_reg_offset); - ugeth->p_send_q_mem_reg = NULL; - } - if (ugeth->p_scheduler) { - qe_muram_free(ugeth->scheduler_offset); - ugeth->p_scheduler = NULL; - } - if (ugeth->p_tx_fw_statistics_pram) { - qe_muram_free(ugeth->tx_fw_statistics_pram_offset); - ugeth->p_tx_fw_statistics_pram = NULL; - } - if (ugeth->p_rx_fw_statistics_pram) { - qe_muram_free(ugeth->rx_fw_statistics_pram_offset); - ugeth->p_rx_fw_statistics_pram = NULL; - } - if (ugeth->p_rx_irq_coalescing_tbl) { - qe_muram_free(ugeth->rx_irq_coalescing_tbl_offset); - ugeth->p_rx_irq_coalescing_tbl = NULL; - } - if (ugeth->p_rx_bd_qs_tbl) { - qe_muram_free(ugeth->rx_bd_qs_tbl_offset); - ugeth->p_rx_bd_qs_tbl = NULL; - } + qe_muram_free_addr(ugeth->p_thread_data_tx); + ugeth->p_thread_data_tx = NULL; + + qe_muram_free_addr(ugeth->p_thread_data_rx); + ugeth->p_thread_data_rx = NULL; + + qe_muram_free_addr(ugeth->p_exf_glbl_param); + ugeth->p_exf_glbl_param = NULL; + + qe_muram_free_addr(ugeth->p_rx_glbl_pram); + ugeth->p_rx_glbl_pram = NULL; + + qe_muram_free_addr(ugeth->p_tx_glbl_pram); + ugeth->p_tx_glbl_pram = NULL; + + qe_muram_free_addr(ugeth->p_send_q_mem_reg); + ugeth->p_send_q_mem_reg = NULL; + + qe_muram_free_addr(ugeth->p_scheduler); + ugeth->p_scheduler = NULL; + + qe_muram_free_addr(ugeth->p_tx_fw_statistics_pram); + ugeth->p_tx_fw_statistics_pram = NULL; + + qe_muram_free_addr(ugeth->p_rx_fw_statistics_pram); + ugeth->p_rx_fw_statistics_pram = NULL; + + qe_muram_free_addr(ugeth->p_rx_irq_coalescing_tbl); + ugeth->p_rx_irq_coalescing_tbl = NULL; + + qe_muram_free_addr(ugeth->p_rx_bd_qs_tbl); + ugeth->p_rx_bd_qs_tbl = NULL; + if (ugeth->p_init_enet_param_shadow) { return_init_enet_entries(ugeth, &(ugeth->p_init_enet_param_shadow-> @@ -2073,15 +2029,8 @@ static int ucc_struct_init(struct ucc_geth_private *ugeth) ug_info = ugeth->ug_info; uf_info = &ug_info->uf_info; - if (!((uf_info->bd_mem_part == MEM_PART_SYSTEM) || - (uf_info->bd_mem_part == MEM_PART_MURAM))) { - if (netif_msg_probe(ugeth)) - pr_err("Bad memory partition value\n"); - return -EINVAL; - } - /* Rx BD lengths */ - for (i = 0; i < ug_info->numQueuesRx; i++) { + for (i = 0; i < ucc_geth_rx_queues(ug_info); i++) { if ((ug_info->bdRingLenRx[i] < UCC_GETH_RX_BD_RING_SIZE_MIN) || (ug_info->bdRingLenRx[i] % UCC_GETH_RX_BD_RING_SIZE_ALIGNMENT)) { @@ -2092,7 +2041,7 @@ static int ucc_struct_init(struct ucc_geth_private *ugeth) } /* Tx BD lengths */ - for (i = 0; i < ug_info->numQueuesTx; i++) { + for (i = 0; i < ucc_geth_tx_queues(ug_info); i++) { if (ug_info->bdRingLenTx[i] < UCC_GETH_TX_BD_RING_SIZE_MIN) { if (netif_msg_probe(ugeth)) pr_err("Tx BD ring length must be no smaller than 2\n"); @@ -2109,14 +2058,14 @@ static int ucc_struct_init(struct ucc_geth_private *ugeth) } /* num Tx queues */ - if (ug_info->numQueuesTx > NUM_TX_QUEUES) { + if (ucc_geth_tx_queues(ug_info) > NUM_TX_QUEUES) { if (netif_msg_probe(ugeth)) pr_err("number of tx queues too large\n"); return -EINVAL; } /* num Rx queues */ - if (ug_info->numQueuesRx > NUM_RX_QUEUES) { + if (ucc_geth_rx_queues(ug_info) > NUM_RX_QUEUES) { if (netif_msg_probe(ugeth)) pr_err("number of rx queues too large\n"); return -EINVAL; @@ -2124,7 +2073,7 @@ static int ucc_struct_init(struct ucc_geth_private *ugeth) /* l2qt */ for (i = 0; i < UCC_GETH_VLAN_PRIORITY_MAX; i++) { - if (ug_info->l2qt[i] >= ug_info->numQueuesRx) { + if (ug_info->l2qt[i] >= ucc_geth_rx_queues(ug_info)) { if (netif_msg_probe(ugeth)) pr_err("VLAN priority table entry must not be larger than number of Rx queues\n"); return -EINVAL; @@ -2133,7 +2082,7 @@ static int ucc_struct_init(struct ucc_geth_private *ugeth) /* l3qt */ for (i = 0; i < UCC_GETH_IP_PRIORITY_MAX; i++) { - if (ug_info->l3qt[i] >= ug_info->numQueuesRx) { + if (ug_info->l3qt[i] >= ucc_geth_rx_queues(ug_info)) { if (netif_msg_probe(ugeth)) pr_err("IP priority table entry must not be larger than number of Rx queues\n"); return -EINVAL; @@ -2156,10 +2105,10 @@ static int ucc_struct_init(struct ucc_geth_private *ugeth) /* Generate uccm_mask for receive */ uf_info->uccm_mask = ug_info->eventRegMask & UCCE_OTHER;/* Errors */ - for (i = 0; i < ug_info->numQueuesRx; i++) + for (i = 0; i < ucc_geth_rx_queues(ug_info); i++) uf_info->uccm_mask |= (UCC_GETH_UCCE_RXF0 << i); - for (i = 0; i < ug_info->numQueuesTx; i++) + for (i = 0; i < ucc_geth_tx_queues(ug_info); i++) uf_info->uccm_mask |= (UCC_GETH_UCCE_TXB0 << i); /* Initialize the general fast UCC block. */ if (ucc_fast_init(uf_info, &ugeth->uccf)) { @@ -2198,53 +2147,32 @@ static int ucc_geth_alloc_tx(struct ucc_geth_private *ugeth) uf_info = &ug_info->uf_info; /* Allocate Tx bds */ - for (j = 0; j < ug_info->numQueuesTx; j++) { - /* Allocate in multiple of - UCC_GETH_TX_BD_RING_SIZE_MEMORY_ALIGNMENT, - according to spec */ - length = ((ug_info->bdRingLenTx[j] * sizeof(struct qe_bd)) - / UCC_GETH_TX_BD_RING_SIZE_MEMORY_ALIGNMENT) - * UCC_GETH_TX_BD_RING_SIZE_MEMORY_ALIGNMENT; - if ((ug_info->bdRingLenTx[j] * sizeof(struct qe_bd)) % - UCC_GETH_TX_BD_RING_SIZE_MEMORY_ALIGNMENT) - length += UCC_GETH_TX_BD_RING_SIZE_MEMORY_ALIGNMENT; - if (uf_info->bd_mem_part == MEM_PART_SYSTEM) { - u32 align = 4; - if (UCC_GETH_TX_BD_RING_ALIGNMENT > 4) - align = UCC_GETH_TX_BD_RING_ALIGNMENT; - ugeth->tx_bd_ring_offset[j] = - (u32) kmalloc((u32) (length + align), GFP_KERNEL); - - if (ugeth->tx_bd_ring_offset[j] != 0) - ugeth->p_tx_bd_ring[j] = - (u8 __iomem *)((ugeth->tx_bd_ring_offset[j] + - align) & ~(align - 1)); - } else if (uf_info->bd_mem_part == MEM_PART_MURAM) { - ugeth->tx_bd_ring_offset[j] = - qe_muram_alloc(length, - UCC_GETH_TX_BD_RING_ALIGNMENT); - if (!IS_ERR_VALUE(ugeth->tx_bd_ring_offset[j])) - ugeth->p_tx_bd_ring[j] = - (u8 __iomem *) qe_muram_addr(ugeth-> - tx_bd_ring_offset[j]); - } + for (j = 0; j < ucc_geth_tx_queues(ug_info); j++) { + u32 align = max(UCC_GETH_TX_BD_RING_ALIGNMENT, + UCC_GETH_TX_BD_RING_SIZE_MEMORY_ALIGNMENT); + u32 alloc; + + length = ug_info->bdRingLenTx[j] * sizeof(struct qe_bd); + alloc = round_up(length, align); + alloc = roundup_pow_of_two(alloc); + + ugeth->p_tx_bd_ring[j] = kmalloc(alloc, GFP_KERNEL); + if (!ugeth->p_tx_bd_ring[j]) { if (netif_msg_ifup(ugeth)) pr_err("Can not allocate memory for Tx bd rings\n"); return -ENOMEM; } /* Zero unused end of bd ring, according to spec */ - memset_io((void __iomem *)(ugeth->p_tx_bd_ring[j] + - ug_info->bdRingLenTx[j] * sizeof(struct qe_bd)), 0, - length - ug_info->bdRingLenTx[j] * sizeof(struct qe_bd)); + memset(ugeth->p_tx_bd_ring[j] + length, 0, alloc - length); } /* Init Tx bds */ - for (j = 0; j < ug_info->numQueuesTx; j++) { + for (j = 0; j < ucc_geth_tx_queues(ug_info); j++) { /* Setup the skbuff rings */ ugeth->tx_skbuff[j] = - kmalloc_array(ugeth->ug_info->bdRingLenTx[j], - sizeof(struct sk_buff *), GFP_KERNEL); + kcalloc(ugeth->ug_info->bdRingLenTx[j], + sizeof(struct sk_buff *), GFP_KERNEL); if (ugeth->tx_skbuff[j] == NULL) { if (netif_msg_ifup(ugeth)) @@ -2252,9 +2180,6 @@ static int ucc_geth_alloc_tx(struct ucc_geth_private *ugeth) return -ENOMEM; } - for (i = 0; i < ugeth->ug_info->bdRingLenTx[j]; i++) - ugeth->tx_skbuff[j][i] = NULL; - ugeth->skb_curtx[j] = ugeth->skb_dirtytx[j] = 0; bd = ugeth->confBd[j] = ugeth->txBd[j] = ugeth->p_tx_bd_ring[j]; for (i = 0; i < ug_info->bdRingLenTx[j]; i++) { @@ -2284,27 +2209,15 @@ static int ucc_geth_alloc_rx(struct ucc_geth_private *ugeth) uf_info = &ug_info->uf_info; /* Allocate Rx bds */ - for (j = 0; j < ug_info->numQueuesRx; j++) { + for (j = 0; j < ucc_geth_rx_queues(ug_info); j++) { + u32 align = UCC_GETH_RX_BD_RING_ALIGNMENT; + u32 alloc; + length = ug_info->bdRingLenRx[j] * sizeof(struct qe_bd); - if (uf_info->bd_mem_part == MEM_PART_SYSTEM) { - u32 align = 4; - if (UCC_GETH_RX_BD_RING_ALIGNMENT > 4) - align = UCC_GETH_RX_BD_RING_ALIGNMENT; - ugeth->rx_bd_ring_offset[j] = - (u32) kmalloc((u32) (length + align), GFP_KERNEL); - if (ugeth->rx_bd_ring_offset[j] != 0) - ugeth->p_rx_bd_ring[j] = - (u8 __iomem *)((ugeth->rx_bd_ring_offset[j] + - align) & ~(align - 1)); - } else if (uf_info->bd_mem_part == MEM_PART_MURAM) { - ugeth->rx_bd_ring_offset[j] = - qe_muram_alloc(length, - UCC_GETH_RX_BD_RING_ALIGNMENT); - if (!IS_ERR_VALUE(ugeth->rx_bd_ring_offset[j])) - ugeth->p_rx_bd_ring[j] = - (u8 __iomem *) qe_muram_addr(ugeth-> - rx_bd_ring_offset[j]); - } + alloc = round_up(length, align); + alloc = roundup_pow_of_two(alloc); + + ugeth->p_rx_bd_ring[j] = kmalloc(alloc, GFP_KERNEL); if (!ugeth->p_rx_bd_ring[j]) { if (netif_msg_ifup(ugeth)) pr_err("Can not allocate memory for Rx bd rings\n"); @@ -2313,11 +2226,11 @@ static int ucc_geth_alloc_rx(struct ucc_geth_private *ugeth) } /* Init Rx bds */ - for (j = 0; j < ug_info->numQueuesRx; j++) { + for (j = 0; j < ucc_geth_rx_queues(ug_info); j++) { /* Setup the skbuff rings */ ugeth->rx_skbuff[j] = - kmalloc_array(ugeth->ug_info->bdRingLenRx[j], - sizeof(struct sk_buff *), GFP_KERNEL); + kcalloc(ugeth->ug_info->bdRingLenRx[j], + sizeof(struct sk_buff *), GFP_KERNEL); if (ugeth->rx_skbuff[j] == NULL) { if (netif_msg_ifup(ugeth)) @@ -2325,9 +2238,6 @@ static int ucc_geth_alloc_rx(struct ucc_geth_private *ugeth) return -ENOMEM; } - for (i = 0; i < ugeth->ug_info->bdRingLenRx[j]; i++) - ugeth->rx_skbuff[j][i] = NULL; - ugeth->skb_currx[j] = 0; bd = ugeth->rxBd[j] = ugeth->p_rx_bd_ring[j]; for (i = 0; i < ug_info->bdRingLenRx[j]; i++) { @@ -2359,10 +2269,10 @@ static int ucc_geth_startup(struct ucc_geth_private *ugeth) u32 init_enet_pram_offset, cecr_subblock, command; u32 ifstat, i, j, size, l2qt, l3qt; u16 temoder = UCC_GETH_TEMODER_INIT; - u16 test; u8 function_code = 0; u8 __iomem *endOfRing; u8 numThreadsRxNumerical, numThreadsTxNumerical; + s32 rx_glbl_pram_offset, tx_glbl_pram_offset; ugeth_vdbg("%s: IN", __func__); uccf = ugeth->uccf; @@ -2371,45 +2281,15 @@ static int ucc_geth_startup(struct ucc_geth_private *ugeth) uf_regs = uccf->uf_regs; ug_regs = ugeth->ug_regs; - switch (ug_info->numThreadsRx) { - case UCC_GETH_NUM_OF_THREADS_1: - numThreadsRxNumerical = 1; - break; - case UCC_GETH_NUM_OF_THREADS_2: - numThreadsRxNumerical = 2; - break; - case UCC_GETH_NUM_OF_THREADS_4: - numThreadsRxNumerical = 4; - break; - case UCC_GETH_NUM_OF_THREADS_6: - numThreadsRxNumerical = 6; - break; - case UCC_GETH_NUM_OF_THREADS_8: - numThreadsRxNumerical = 8; - break; - default: + numThreadsRxNumerical = ucc_geth_thread_count(ug_info->numThreadsRx); + if (!numThreadsRxNumerical) { if (netif_msg_ifup(ugeth)) pr_err("Bad number of Rx threads value\n"); return -EINVAL; } - switch (ug_info->numThreadsTx) { - case UCC_GETH_NUM_OF_THREADS_1: - numThreadsTxNumerical = 1; - break; - case UCC_GETH_NUM_OF_THREADS_2: - numThreadsTxNumerical = 2; - break; - case UCC_GETH_NUM_OF_THREADS_4: - numThreadsTxNumerical = 4; - break; - case UCC_GETH_NUM_OF_THREADS_6: - numThreadsTxNumerical = 6; - break; - case UCC_GETH_NUM_OF_THREADS_8: - numThreadsTxNumerical = 8; - break; - default: + numThreadsTxNumerical = ucc_geth_thread_count(ug_info->numThreadsTx); + if (!numThreadsTxNumerical) { if (netif_msg_ifup(ugeth)) pr_err("Bad number of Tx threads value\n"); return -EINVAL; @@ -2507,20 +2387,15 @@ static int ucc_geth_startup(struct ucc_geth_private *ugeth) */ /* Tx global PRAM */ /* Allocate global tx parameter RAM page */ - ugeth->tx_glbl_pram_offset = + tx_glbl_pram_offset = qe_muram_alloc(sizeof(struct ucc_geth_tx_global_pram), UCC_GETH_TX_GLOBAL_PRAM_ALIGNMENT); - if (IS_ERR_VALUE(ugeth->tx_glbl_pram_offset)) { + if (tx_glbl_pram_offset < 0) { if (netif_msg_ifup(ugeth)) pr_err("Can not allocate DPRAM memory for p_tx_glbl_pram\n"); return -ENOMEM; } - ugeth->p_tx_glbl_pram = - (struct ucc_geth_tx_global_pram __iomem *) qe_muram_addr(ugeth-> - tx_glbl_pram_offset); - /* Zero out p_tx_glbl_pram */ - memset_io((void __iomem *)ugeth->p_tx_glbl_pram, 0, sizeof(struct ucc_geth_tx_global_pram)); - + ugeth->p_tx_glbl_pram = qe_muram_addr(tx_glbl_pram_offset); /* Fill global PRAM */ /* TQPTR */ @@ -2554,7 +2429,7 @@ static int ucc_geth_startup(struct ucc_geth_private *ugeth) /* SQPTR */ /* Size varies with number of Tx queues */ ugeth->send_q_mem_reg_offset = - qe_muram_alloc(ug_info->numQueuesTx * + qe_muram_alloc(ucc_geth_tx_queues(ug_info) * sizeof(struct ucc_geth_send_queue_qd), UCC_GETH_SEND_QUEUE_QUEUE_DESCRIPTOR_ALIGNMENT); if (IS_ERR_VALUE(ugeth->send_q_mem_reg_offset)) { @@ -2570,29 +2445,20 @@ static int ucc_geth_startup(struct ucc_geth_private *ugeth) /* Setup the table */ /* Assume BD rings are already established */ - for (i = 0; i < ug_info->numQueuesTx; i++) { + for (i = 0; i < ucc_geth_tx_queues(ug_info); i++) { endOfRing = ugeth->p_tx_bd_ring[i] + (ug_info->bdRingLenTx[i] - 1) * sizeof(struct qe_bd); - if (ugeth->ug_info->uf_info.bd_mem_part == MEM_PART_SYSTEM) { - out_be32(&ugeth->p_send_q_mem_reg->sqqd[i].bd_ring_base, - (u32) virt_to_phys(ugeth->p_tx_bd_ring[i])); - out_be32(&ugeth->p_send_q_mem_reg->sqqd[i]. - last_bd_completed_address, - (u32) virt_to_phys(endOfRing)); - } else if (ugeth->ug_info->uf_info.bd_mem_part == - MEM_PART_MURAM) { - out_be32(&ugeth->p_send_q_mem_reg->sqqd[i].bd_ring_base, - (u32)qe_muram_dma(ugeth->p_tx_bd_ring[i])); - out_be32(&ugeth->p_send_q_mem_reg->sqqd[i]. - last_bd_completed_address, - (u32)qe_muram_dma(endOfRing)); - } + out_be32(&ugeth->p_send_q_mem_reg->sqqd[i].bd_ring_base, + (u32) virt_to_phys(ugeth->p_tx_bd_ring[i])); + out_be32(&ugeth->p_send_q_mem_reg->sqqd[i]. + last_bd_completed_address, + (u32) virt_to_phys(endOfRing)); } /* schedulerbasepointer */ - if (ug_info->numQueuesTx > 1) { + if (ucc_geth_tx_queues(ug_info) > 1) { /* scheduler exists only if more than 1 tx queue */ ugeth->scheduler_offset = qe_muram_alloc(sizeof(struct ucc_geth_scheduler), @@ -2608,8 +2474,6 @@ static int ucc_geth_startup(struct ucc_geth_private *ugeth) scheduler_offset); out_be32(&ugeth->p_tx_glbl_pram->schedulerbasepointer, ugeth->scheduler_offset); - /* Zero out p_scheduler */ - memset_io((void __iomem *)ugeth->p_scheduler, 0, sizeof(struct ucc_geth_scheduler)); /* Set values in scheduler */ out_be32(&ugeth->p_scheduler->mblinterval, @@ -2652,23 +2516,18 @@ static int ucc_geth_startup(struct ucc_geth_private *ugeth) ugeth->p_tx_fw_statistics_pram = (struct ucc_geth_tx_firmware_statistics_pram __iomem *) qe_muram_addr(ugeth->tx_fw_statistics_pram_offset); - /* Zero out p_tx_fw_statistics_pram */ - memset_io((void __iomem *)ugeth->p_tx_fw_statistics_pram, - 0, sizeof(struct ucc_geth_tx_firmware_statistics_pram)); } /* temoder */ /* Already has speed set */ - if (ug_info->numQueuesTx > 1) + if (ucc_geth_tx_queues(ug_info) > 1) temoder |= TEMODER_SCHEDULER_ENABLE; if (ug_info->ipCheckSumGenerate) temoder |= TEMODER_IP_CHECKSUM_GENERATE; - temoder |= ((ug_info->numQueuesTx - 1) << TEMODER_NUM_OF_QUEUES_SHIFT); + temoder |= ((ucc_geth_tx_queues(ug_info) - 1) << TEMODER_NUM_OF_QUEUES_SHIFT); out_be16(&ugeth->p_tx_glbl_pram->temoder, temoder); - test = in_be16(&ugeth->p_tx_glbl_pram->temoder); - /* Function code register value to be used later */ function_code = UCC_BMR_BO_BE | UCC_BMR_GBL; /* Required for QE */ @@ -2678,20 +2537,15 @@ static int ucc_geth_startup(struct ucc_geth_private *ugeth) /* Rx global PRAM */ /* Allocate global rx parameter RAM page */ - ugeth->rx_glbl_pram_offset = + rx_glbl_pram_offset = qe_muram_alloc(sizeof(struct ucc_geth_rx_global_pram), UCC_GETH_RX_GLOBAL_PRAM_ALIGNMENT); - if (IS_ERR_VALUE(ugeth->rx_glbl_pram_offset)) { + if (rx_glbl_pram_offset < 0) { if (netif_msg_ifup(ugeth)) pr_err("Can not allocate DPRAM memory for p_rx_glbl_pram\n"); return -ENOMEM; } - ugeth->p_rx_glbl_pram = - (struct ucc_geth_rx_global_pram __iomem *) qe_muram_addr(ugeth-> - rx_glbl_pram_offset); - /* Zero out p_rx_glbl_pram */ - memset_io((void __iomem *)ugeth->p_rx_glbl_pram, 0, sizeof(struct ucc_geth_rx_global_pram)); - + ugeth->p_rx_glbl_pram = qe_muram_addr(rx_glbl_pram_offset); /* Fill global PRAM */ /* RQPTR */ @@ -2729,16 +2583,13 @@ static int ucc_geth_startup(struct ucc_geth_private *ugeth) ugeth->p_rx_fw_statistics_pram = (struct ucc_geth_rx_firmware_statistics_pram __iomem *) qe_muram_addr(ugeth->rx_fw_statistics_pram_offset); - /* Zero out p_rx_fw_statistics_pram */ - memset_io((void __iomem *)ugeth->p_rx_fw_statistics_pram, 0, - sizeof(struct ucc_geth_rx_firmware_statistics_pram)); } /* intCoalescingPtr */ /* Size varies with number of Rx queues */ ugeth->rx_irq_coalescing_tbl_offset = - qe_muram_alloc(ug_info->numQueuesRx * + qe_muram_alloc(ucc_geth_rx_queues(ug_info) * sizeof(struct ucc_geth_rx_interrupt_coalescing_entry) + 4, UCC_GETH_RX_INTERRUPT_COALESCING_ALIGNMENT); if (IS_ERR_VALUE(ugeth->rx_irq_coalescing_tbl_offset)) { @@ -2754,7 +2605,7 @@ static int ucc_geth_startup(struct ucc_geth_private *ugeth) ugeth->rx_irq_coalescing_tbl_offset); /* Fill interrupt coalescing table */ - for (i = 0; i < ug_info->numQueuesRx; i++) { + for (i = 0; i < ucc_geth_rx_queues(ug_info); i++) { out_be32(&ugeth->p_rx_irq_coalescing_tbl->coalescingentry[i]. interruptcoalescingmaxvalue, ug_info->interruptcoalescingmaxvalue[i]); @@ -2803,7 +2654,7 @@ static int ucc_geth_startup(struct ucc_geth_private *ugeth) /* RBDQPTR */ /* Size varies with number of Rx queues */ ugeth->rx_bd_qs_tbl_offset = - qe_muram_alloc(ug_info->numQueuesRx * + qe_muram_alloc(ucc_geth_rx_queues(ug_info) * (sizeof(struct ucc_geth_rx_bd_queues_entry) + sizeof(struct ucc_geth_rx_prefetched_bds)), UCC_GETH_RX_BD_QUEUES_ALIGNMENT); @@ -2817,23 +2668,12 @@ static int ucc_geth_startup(struct ucc_geth_private *ugeth) (struct ucc_geth_rx_bd_queues_entry __iomem *) qe_muram_addr(ugeth-> rx_bd_qs_tbl_offset); out_be32(&ugeth->p_rx_glbl_pram->rbdqptr, ugeth->rx_bd_qs_tbl_offset); - /* Zero out p_rx_bd_qs_tbl */ - memset_io((void __iomem *)ugeth->p_rx_bd_qs_tbl, - 0, - ug_info->numQueuesRx * (sizeof(struct ucc_geth_rx_bd_queues_entry) + - sizeof(struct ucc_geth_rx_prefetched_bds))); /* Setup the table */ /* Assume BD rings are already established */ - for (i = 0; i < ug_info->numQueuesRx; i++) { - if (ugeth->ug_info->uf_info.bd_mem_part == MEM_PART_SYSTEM) { - out_be32(&ugeth->p_rx_bd_qs_tbl[i].externalbdbaseptr, - (u32) virt_to_phys(ugeth->p_rx_bd_ring[i])); - } else if (ugeth->ug_info->uf_info.bd_mem_part == - MEM_PART_MURAM) { - out_be32(&ugeth->p_rx_bd_qs_tbl[i].externalbdbaseptr, - (u32)qe_muram_dma(ugeth->p_rx_bd_ring[i])); - } + for (i = 0; i < ucc_geth_rx_queues(ug_info); i++) { + out_be32(&ugeth->p_rx_bd_qs_tbl[i].externalbdbaseptr, + (u32) virt_to_phys(ugeth->p_rx_bd_ring[i])); /* rest of fields handled by QE */ } @@ -2854,7 +2694,7 @@ static int ucc_geth_startup(struct ucc_geth_private *ugeth) ug_info-> vlanOperationNonTagged << REMODER_VLAN_OPERATION_NON_TAGGED_SHIFT; remoder |= ug_info->rxQoSMode << REMODER_RX_QOS_MODE_SHIFT; - remoder |= ((ug_info->numQueuesRx - 1) << REMODER_NUM_OF_QUEUES_SHIFT); + remoder |= ((ucc_geth_rx_queues(ug_info) - 1) << REMODER_NUM_OF_QUEUES_SHIFT); if (ug_info->ipCheckSumCheck) remoder |= REMODER_IP_CHECKSUM_CHECK; if (ug_info->ipAddressAlignment) @@ -2937,14 +2777,11 @@ static int ucc_geth_startup(struct ucc_geth_private *ugeth) * allocated resources can be released when the channel is freed. */ if (!(ugeth->p_init_enet_param_shadow = - kmalloc(sizeof(struct ucc_geth_init_pram), GFP_KERNEL))) { + kzalloc(sizeof(struct ucc_geth_init_pram), GFP_KERNEL))) { if (netif_msg_ifup(ugeth)) pr_err("Can not allocate memory for p_UccInitEnetParamShadows\n"); return -ENOMEM; } - /* Zero out *p_init_enet_param_shadow */ - memset((char *)ugeth->p_init_enet_param_shadow, - 0, sizeof(struct ucc_geth_init_pram)); /* Fill shadow InitEnet command parameter structure */ @@ -2964,7 +2801,7 @@ static int ucc_geth_startup(struct ucc_geth_private *ugeth) ((u32) ug_info->numThreadsTx) << ENET_INIT_PARAM_TGF_SHIFT; ugeth->p_init_enet_param_shadow->rgftgfrxglobal |= - ugeth->rx_glbl_pram_offset | ug_info->riscRx; + rx_glbl_pram_offset | ug_info->riscRx; if ((ug_info->largestexternallookupkeysize != QE_FLTR_LARGEST_EXTERNAL_TABLE_LOOKUP_KEY_SIZE_NONE) && (ug_info->largestexternallookupkeysize != @@ -3002,7 +2839,7 @@ static int ucc_geth_startup(struct ucc_geth_private *ugeth) } ugeth->p_init_enet_param_shadow->txglobal = - ugeth->tx_glbl_pram_offset | ug_info->riscTx; + tx_glbl_pram_offset | ug_info->riscTx; if ((ret_val = fill_init_enet_entries(ugeth, &(ugeth->p_init_enet_param_shadow-> @@ -3016,7 +2853,7 @@ static int ucc_geth_startup(struct ucc_geth_private *ugeth) } /* Load Rx bds with buffers */ - for (i = 0; i < ug_info->numQueuesRx; i++) { + for (i = 0; i < ucc_geth_rx_queues(ug_info); i++) { if ((ret_val = rx_bd_buffer_set(ugeth, (u8) i)) != 0) { if (netif_msg_ifup(ugeth)) pr_err("Can not fill Rx bds with buffers\n"); @@ -3287,12 +3124,12 @@ static int ucc_geth_poll(struct napi_struct *napi, int budget) /* Tx event processing */ spin_lock(&ugeth->lock); - for (i = 0; i < ug_info->numQueuesTx; i++) + for (i = 0; i < ucc_geth_tx_queues(ug_info); i++) ucc_geth_tx(ugeth->ndev, i); spin_unlock(&ugeth->lock); howmany = 0; - for (i = 0; i < ug_info->numQueuesRx; i++) + for (i = 0; i < ucc_geth_rx_queues(ug_info); i++) howmany += ucc_geth_rx(ugeth, i, budget - howmany); if (howmany < budget) { @@ -3685,6 +3522,36 @@ static const struct net_device_ops ucc_geth_netdev_ops = { #endif }; +static int ucc_geth_parse_clock(struct device_node *np, const char *which, + enum qe_clock *out) +{ + const char *sprop; + char buf[24]; + + snprintf(buf, sizeof(buf), "%s-clock-name", which); + sprop = of_get_property(np, buf, NULL); + if (sprop) { + *out = qe_clock_source(sprop); + } else { + u32 val; + + snprintf(buf, sizeof(buf), "%s-clock", which); + if (of_property_read_u32(np, buf, &val)) { + /* If both *-clock-name and *-clock are missing, + * we want to tell people to use *-clock-name. + */ + pr_err("missing %s-clock-name property\n", buf); + return -EINVAL; + } + *out = val; + } + if (*out < QE_CLK_NONE || *out > QE_CLK24) { + pr_err("invalid %s property\n", buf); + return -EINVAL; + } + return 0; +} + static int ucc_geth_probe(struct platform_device* ofdev) { struct device *device = &ofdev->dev; @@ -3695,7 +3562,6 @@ static int ucc_geth_probe(struct platform_device* ofdev) struct resource res; int err, ucc_num, max_speed = 0; const unsigned int *prop; - const char *sprop; const void *mac_addr; phy_interface_t phy_interface; static const int enet_to_speed[] = { @@ -3725,62 +3591,23 @@ static int ucc_geth_probe(struct platform_device* ofdev) if ((ucc_num < 0) || (ucc_num > 7)) return -ENODEV; - ug_info = &ugeth_info[ucc_num]; - if (ug_info == NULL) { - if (netif_msg_probe(&debug)) - pr_err("[%d] Missing additional data!\n", ucc_num); - return -ENODEV; - } + ug_info = kmalloc(sizeof(*ug_info), GFP_KERNEL); + if (ug_info == NULL) + return -ENOMEM; + memcpy(ug_info, &ugeth_primary_info, sizeof(*ug_info)); ug_info->uf_info.ucc_num = ucc_num; - sprop = of_get_property(np, "rx-clock-name", NULL); - if (sprop) { - ug_info->uf_info.rx_clock = qe_clock_source(sprop); - if ((ug_info->uf_info.rx_clock < QE_CLK_NONE) || - (ug_info->uf_info.rx_clock > QE_CLK24)) { - pr_err("invalid rx-clock-name property\n"); - return -EINVAL; - } - } else { - prop = of_get_property(np, "rx-clock", NULL); - if (!prop) { - /* If both rx-clock-name and rx-clock are missing, - we want to tell people to use rx-clock-name. */ - pr_err("missing rx-clock-name property\n"); - return -EINVAL; - } - if ((*prop < QE_CLK_NONE) || (*prop > QE_CLK24)) { - pr_err("invalid rx-clock property\n"); - return -EINVAL; - } - ug_info->uf_info.rx_clock = *prop; - } - - sprop = of_get_property(np, "tx-clock-name", NULL); - if (sprop) { - ug_info->uf_info.tx_clock = qe_clock_source(sprop); - if ((ug_info->uf_info.tx_clock < QE_CLK_NONE) || - (ug_info->uf_info.tx_clock > QE_CLK24)) { - pr_err("invalid tx-clock-name property\n"); - return -EINVAL; - } - } else { - prop = of_get_property(np, "tx-clock", NULL); - if (!prop) { - pr_err("missing tx-clock-name property\n"); - return -EINVAL; - } - if ((*prop < QE_CLK_NONE) || (*prop > QE_CLK24)) { - pr_err("invalid tx-clock property\n"); - return -EINVAL; - } - ug_info->uf_info.tx_clock = *prop; - } + err = ucc_geth_parse_clock(np, "rx", &ug_info->uf_info.rx_clock); + if (err) + goto err_free_info; + err = ucc_geth_parse_clock(np, "tx", &ug_info->uf_info.tx_clock); + if (err) + goto err_free_info; err = of_address_to_resource(np, 0, &res); if (err) - return -EINVAL; + goto err_free_info; ug_info->uf_info.regs = res.start; ug_info->uf_info.irq = irq_of_parse_and_map(np, 0); @@ -3793,7 +3620,7 @@ static int ucc_geth_probe(struct platform_device* ofdev) */ err = of_phy_register_fixed_link(np); if (err) - return err; + goto err_free_info; ug_info->phy_node = of_node_get(np); } @@ -3924,6 +3751,8 @@ err_deregister_fixed_link: of_phy_deregister_fixed_link(np); of_node_put(ug_info->tbi_node); of_node_put(ug_info->phy_node); +err_free_info: + kfree(ug_info); return err; } @@ -3940,6 +3769,7 @@ static int ucc_geth_remove(struct platform_device* ofdev) of_phy_deregister_fixed_link(np); of_node_put(ugeth->ug_info->tbi_node); of_node_put(ugeth->ug_info->phy_node); + kfree(ugeth->ug_info); free_netdev(dev); return 0; @@ -3968,17 +3798,10 @@ static struct platform_driver ucc_geth_driver = { static int __init ucc_geth_init(void) { - int i, ret; - if (netif_msg_drv(&debug)) pr_info(DRV_DESC "\n"); - for (i = 0; i < 8; i++) - memcpy(&(ugeth_info[i]), &ugeth_primary_info, - sizeof(ugeth_primary_info)); - - ret = platform_driver_register(&ucc_geth_driver); - return ret; + return platform_driver_register(&ucc_geth_driver); } static void __exit ucc_geth_exit(void) diff --git a/drivers/net/ethernet/freescale/ucc_geth.h b/drivers/net/ethernet/freescale/ucc_geth.h index 11d4bf5dc21f..4294ed096ebb 100644 --- a/drivers/net/ethernet/freescale/ucc_geth.h +++ b/drivers/net/ethernet/freescale/ucc_geth.h @@ -1076,8 +1076,6 @@ struct ucc_geth_tad_params { /* GETH protocol initialization structure */ struct ucc_geth_info { struct ucc_fast_info uf_info; - u8 numQueuesTx; - u8 numQueuesRx; int ipCheckSumCheck; int ipCheckSumGenerate; int rxExtendedFiltering; @@ -1165,9 +1163,7 @@ struct ucc_geth_private { struct ucc_geth_exf_global_pram __iomem *p_exf_glbl_param; u32 exf_glbl_param_offset; struct ucc_geth_rx_global_pram __iomem *p_rx_glbl_pram; - u32 rx_glbl_pram_offset; struct ucc_geth_tx_global_pram __iomem *p_tx_glbl_pram; - u32 tx_glbl_pram_offset; struct ucc_geth_send_queue_mem_region __iomem *p_send_q_mem_reg; u32 send_q_mem_reg_offset; struct ucc_geth_thread_data_tx __iomem *p_thread_data_tx; @@ -1185,9 +1181,7 @@ struct ucc_geth_private { struct ucc_geth_rx_bd_queues_entry __iomem *p_rx_bd_qs_tbl; u32 rx_bd_qs_tbl_offset; u8 __iomem *p_tx_bd_ring[NUM_TX_QUEUES]; - u32 tx_bd_ring_offset[NUM_TX_QUEUES]; u8 __iomem *p_rx_bd_ring[NUM_RX_QUEUES]; - u32 rx_bd_ring_offset[NUM_RX_QUEUES]; u8 __iomem *confBd[NUM_TX_QUEUES]; u8 __iomem *txBd[NUM_TX_QUEUES]; u8 __iomem *rxBd[NUM_RX_QUEUES]; diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c index 405e49033417..512080640cbc 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c @@ -1070,7 +1070,7 @@ static bool hns3_check_hw_tx_csum(struct sk_buff *skb) * HW checksum of the non-IP packets and GSO packets is handled at * different place in the following code */ - if (skb->csum_not_inet || skb_is_gso(skb) || + if (skb_csum_is_sctp(skb) || skb_is_gso(skb) || !test_bit(HNS3_NIC_STATE_HW_TX_CSUM_ENABLE, &priv->state)) return false; diff --git a/drivers/net/ethernet/marvell/Kconfig b/drivers/net/ethernet/marvell/Kconfig index 41815b609569..7fe15a3286f4 100644 --- a/drivers/net/ethernet/marvell/Kconfig +++ b/drivers/net/ethernet/marvell/Kconfig @@ -94,7 +94,6 @@ config MVPP2 config MVPP2_PTP bool "Marvell Armada 8K Enable PTP support" - depends on NETWORK_PHY_TIMESTAMPING depends on (PTP_1588_CLOCK = y && MVPP2 = y) || \ (PTP_1588_CLOCK && MVPP2 = m) diff --git a/drivers/net/ethernet/marvell/octeontx2/af/mbox.h b/drivers/net/ethernet/marvell/octeontx2/af/mbox.h index f919283ddc34..89e93eb46ab7 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/mbox.h +++ b/drivers/net/ethernet/marvell/octeontx2/af/mbox.h @@ -717,6 +717,8 @@ struct nix_rss_flowkey_cfg { #define NIX_FLOW_KEY_TYPE_INNR_ETH_DMAC BIT(17) #define NIX_FLOW_KEY_TYPE_VLAN BIT(20) #define NIX_FLOW_KEY_TYPE_IPV4_PROTO BIT(21) +#define NIX_FLOW_KEY_TYPE_AH BIT(22) +#define NIX_FLOW_KEY_TYPE_ESP BIT(23) u32 flowkey_cfg; /* Flowkey types selected */ u8 group; /* RSS context or group */ }; diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_devlink.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_devlink.c index bc0e4113370e..10a98bcb7c54 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_devlink.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_devlink.c @@ -52,6 +52,650 @@ static bool rvu_common_request_irq(struct rvu *rvu, int offset, return rvu->irq_allocated[offset]; } +static void rvu_nix_intr_work(struct work_struct *work) +{ + struct rvu_nix_health_reporters *rvu_nix_health_reporter; + + rvu_nix_health_reporter = container_of(work, struct rvu_nix_health_reporters, intr_work); + devlink_health_report(rvu_nix_health_reporter->rvu_hw_nix_intr_reporter, + "NIX_AF_RVU Error", + rvu_nix_health_reporter->nix_event_ctx); +} + +static irqreturn_t rvu_nix_af_rvu_intr_handler(int irq, void *rvu_irq) +{ + struct rvu_nix_event_ctx *nix_event_context; + struct rvu_devlink *rvu_dl = rvu_irq; + struct rvu *rvu; + int blkaddr; + u64 intr; + + rvu = rvu_dl->rvu; + blkaddr = rvu_get_blkaddr(rvu, BLKTYPE_NIX, 0); + if (blkaddr < 0) + return IRQ_NONE; + + nix_event_context = rvu_dl->rvu_nix_health_reporter->nix_event_ctx; + intr = rvu_read64(rvu, blkaddr, NIX_AF_RVU_INT); + nix_event_context->nix_af_rvu_int = intr; + + /* Clear interrupts */ + rvu_write64(rvu, blkaddr, NIX_AF_RVU_INT, intr); + rvu_write64(rvu, blkaddr, NIX_AF_RVU_INT_ENA_W1C, ~0ULL); + queue_work(rvu_dl->devlink_wq, &rvu_dl->rvu_nix_health_reporter->intr_work); + + return IRQ_HANDLED; +} + +static void rvu_nix_gen_work(struct work_struct *work) +{ + struct rvu_nix_health_reporters *rvu_nix_health_reporter; + + rvu_nix_health_reporter = container_of(work, struct rvu_nix_health_reporters, gen_work); + devlink_health_report(rvu_nix_health_reporter->rvu_hw_nix_gen_reporter, + "NIX_AF_GEN Error", + rvu_nix_health_reporter->nix_event_ctx); +} + +static irqreturn_t rvu_nix_af_rvu_gen_handler(int irq, void *rvu_irq) +{ + struct rvu_nix_event_ctx *nix_event_context; + struct rvu_devlink *rvu_dl = rvu_irq; + struct rvu *rvu; + int blkaddr; + u64 intr; + + rvu = rvu_dl->rvu; + blkaddr = rvu_get_blkaddr(rvu, BLKTYPE_NIX, 0); + if (blkaddr < 0) + return IRQ_NONE; + + nix_event_context = rvu_dl->rvu_nix_health_reporter->nix_event_ctx; + intr = rvu_read64(rvu, blkaddr, NIX_AF_GEN_INT); + nix_event_context->nix_af_rvu_gen = intr; + + /* Clear interrupts */ + rvu_write64(rvu, blkaddr, NIX_AF_GEN_INT, intr); + rvu_write64(rvu, blkaddr, NIX_AF_GEN_INT_ENA_W1C, ~0ULL); + queue_work(rvu_dl->devlink_wq, &rvu_dl->rvu_nix_health_reporter->gen_work); + + return IRQ_HANDLED; +} + +static void rvu_nix_err_work(struct work_struct *work) +{ + struct rvu_nix_health_reporters *rvu_nix_health_reporter; + + rvu_nix_health_reporter = container_of(work, struct rvu_nix_health_reporters, err_work); + devlink_health_report(rvu_nix_health_reporter->rvu_hw_nix_err_reporter, + "NIX_AF_ERR Error", + rvu_nix_health_reporter->nix_event_ctx); +} + +static irqreturn_t rvu_nix_af_rvu_err_handler(int irq, void *rvu_irq) +{ + struct rvu_nix_event_ctx *nix_event_context; + struct rvu_devlink *rvu_dl = rvu_irq; + struct rvu *rvu; + int blkaddr; + u64 intr; + + rvu = rvu_dl->rvu; + blkaddr = rvu_get_blkaddr(rvu, BLKTYPE_NIX, 0); + if (blkaddr < 0) + return IRQ_NONE; + + nix_event_context = rvu_dl->rvu_nix_health_reporter->nix_event_ctx; + intr = rvu_read64(rvu, blkaddr, NIX_AF_ERR_INT); + nix_event_context->nix_af_rvu_err = intr; + + /* Clear interrupts */ + rvu_write64(rvu, blkaddr, NIX_AF_ERR_INT, intr); + rvu_write64(rvu, blkaddr, NIX_AF_ERR_INT_ENA_W1C, ~0ULL); + queue_work(rvu_dl->devlink_wq, &rvu_dl->rvu_nix_health_reporter->err_work); + + return IRQ_HANDLED; +} + +static void rvu_nix_ras_work(struct work_struct *work) +{ + struct rvu_nix_health_reporters *rvu_nix_health_reporter; + + rvu_nix_health_reporter = container_of(work, struct rvu_nix_health_reporters, ras_work); + devlink_health_report(rvu_nix_health_reporter->rvu_hw_nix_ras_reporter, + "NIX_AF_RAS Error", + rvu_nix_health_reporter->nix_event_ctx); +} + +static irqreturn_t rvu_nix_af_rvu_ras_handler(int irq, void *rvu_irq) +{ + struct rvu_nix_event_ctx *nix_event_context; + struct rvu_devlink *rvu_dl = rvu_irq; + struct rvu *rvu; + int blkaddr; + u64 intr; + + rvu = rvu_dl->rvu; + blkaddr = rvu_get_blkaddr(rvu, BLKTYPE_NIX, 0); + if (blkaddr < 0) + return IRQ_NONE; + + nix_event_context = rvu_dl->rvu_nix_health_reporter->nix_event_ctx; + intr = rvu_read64(rvu, blkaddr, NIX_AF_ERR_INT); + nix_event_context->nix_af_rvu_ras = intr; + + /* Clear interrupts */ + rvu_write64(rvu, blkaddr, NIX_AF_RAS, intr); + rvu_write64(rvu, blkaddr, NIX_AF_RAS_ENA_W1C, ~0ULL); + queue_work(rvu_dl->devlink_wq, &rvu_dl->rvu_nix_health_reporter->ras_work); + + return IRQ_HANDLED; +} + +static void rvu_nix_unregister_interrupts(struct rvu *rvu) +{ + struct rvu_devlink *rvu_dl = rvu->rvu_dl; + int offs, i, blkaddr; + + blkaddr = rvu_get_blkaddr(rvu, BLKTYPE_NIX, 0); + if (blkaddr < 0) + return; + + offs = rvu_read64(rvu, blkaddr, NIX_PRIV_AF_INT_CFG) & 0x3ff; + if (!offs) + return; + + rvu_write64(rvu, blkaddr, NIX_AF_RVU_INT_ENA_W1C, ~0ULL); + rvu_write64(rvu, blkaddr, NIX_AF_GEN_INT_ENA_W1C, ~0ULL); + rvu_write64(rvu, blkaddr, NIX_AF_ERR_INT_ENA_W1C, ~0ULL); + rvu_write64(rvu, blkaddr, NIX_AF_RAS_ENA_W1C, ~0ULL); + + if (rvu->irq_allocated[offs + NIX_AF_INT_VEC_RVU]) { + free_irq(pci_irq_vector(rvu->pdev, offs + NIX_AF_INT_VEC_RVU), + rvu_dl); + rvu->irq_allocated[offs + NIX_AF_INT_VEC_RVU] = false; + } + + for (i = NIX_AF_INT_VEC_AF_ERR; i < NIX_AF_INT_VEC_CNT; i++) + if (rvu->irq_allocated[offs + i]) { + free_irq(pci_irq_vector(rvu->pdev, offs + i), rvu_dl); + rvu->irq_allocated[offs + i] = false; + } +} + +static int rvu_nix_register_interrupts(struct rvu *rvu) +{ + int blkaddr, base; + bool rc; + + blkaddr = rvu_get_blkaddr(rvu, BLKTYPE_NIX, 0); + if (blkaddr < 0) + return blkaddr; + + /* Get NIX AF MSIX vectors offset. */ + base = rvu_read64(rvu, blkaddr, NIX_PRIV_AF_INT_CFG) & 0x3ff; + if (!base) { + dev_warn(rvu->dev, + "Failed to get NIX%d NIX_AF_INT vector offsets\n", + blkaddr - BLKADDR_NIX0); + return 0; + } + /* Register and enable NIX_AF_RVU_INT interrupt */ + rc = rvu_common_request_irq(rvu, base + NIX_AF_INT_VEC_RVU, + "NIX_AF_RVU_INT", + rvu_nix_af_rvu_intr_handler); + if (!rc) + goto err; + rvu_write64(rvu, blkaddr, NIX_AF_RVU_INT_ENA_W1S, ~0ULL); + + /* Register and enable NIX_AF_GEN_INT interrupt */ + rc = rvu_common_request_irq(rvu, base + NIX_AF_INT_VEC_GEN, + "NIX_AF_GEN_INT", + rvu_nix_af_rvu_gen_handler); + if (!rc) + goto err; + rvu_write64(rvu, blkaddr, NIX_AF_GEN_INT_ENA_W1S, ~0ULL); + + /* Register and enable NIX_AF_ERR_INT interrupt */ + rc = rvu_common_request_irq(rvu, base + NIX_AF_INT_VEC_AF_ERR, + "NIX_AF_ERR_INT", + rvu_nix_af_rvu_err_handler); + if (!rc) + goto err; + rvu_write64(rvu, blkaddr, NIX_AF_ERR_INT_ENA_W1S, ~0ULL); + + /* Register and enable NIX_AF_RAS interrupt */ + rc = rvu_common_request_irq(rvu, base + NIX_AF_INT_VEC_POISON, + "NIX_AF_RAS", + rvu_nix_af_rvu_ras_handler); + if (!rc) + goto err; + rvu_write64(rvu, blkaddr, NIX_AF_RAS_ENA_W1S, ~0ULL); + + return 0; +err: + rvu_nix_unregister_interrupts(rvu); + return rc; +} + +static int rvu_nix_report_show(struct devlink_fmsg *fmsg, void *ctx, + enum nix_af_rvu_health health_reporter) +{ + struct rvu_nix_event_ctx *nix_event_context; + u64 intr_val; + int err; + + nix_event_context = ctx; + switch (health_reporter) { + case NIX_AF_RVU_INTR: + intr_val = nix_event_context->nix_af_rvu_int; + err = rvu_report_pair_start(fmsg, "NIX_AF_RVU"); + if (err) + return err; + err = devlink_fmsg_u64_pair_put(fmsg, "\tNIX RVU Interrupt Reg ", + nix_event_context->nix_af_rvu_int); + if (err) + return err; + if (intr_val & BIT_ULL(0)) { + err = devlink_fmsg_string_put(fmsg, "\n\tUnmap Slot Error"); + if (err) + return err; + } + err = rvu_report_pair_end(fmsg); + if (err) + return err; + break; + case NIX_AF_RVU_GEN: + intr_val = nix_event_context->nix_af_rvu_gen; + err = rvu_report_pair_start(fmsg, "NIX_AF_GENERAL"); + if (err) + return err; + err = devlink_fmsg_u64_pair_put(fmsg, "\tNIX General Interrupt Reg ", + nix_event_context->nix_af_rvu_gen); + if (err) + return err; + if (intr_val & BIT_ULL(0)) { + err = devlink_fmsg_string_put(fmsg, "\n\tRx multicast pkt drop"); + if (err) + return err; + } + if (intr_val & BIT_ULL(1)) { + err = devlink_fmsg_string_put(fmsg, "\n\tRx mirror pkt drop"); + if (err) + return err; + } + if (intr_val & BIT_ULL(4)) { + err = devlink_fmsg_string_put(fmsg, "\n\tSMQ flush done"); + if (err) + return err; + } + err = rvu_report_pair_end(fmsg); + if (err) + return err; + break; + case NIX_AF_RVU_ERR: + intr_val = nix_event_context->nix_af_rvu_err; + err = rvu_report_pair_start(fmsg, "NIX_AF_ERR"); + if (err) + return err; + err = devlink_fmsg_u64_pair_put(fmsg, "\tNIX Error Interrupt Reg ", + nix_event_context->nix_af_rvu_err); + if (err) + return err; + if (intr_val & BIT_ULL(14)) { + err = devlink_fmsg_string_put(fmsg, "\n\tFault on NIX_AQ_INST_S read"); + if (err) + return err; + } + if (intr_val & BIT_ULL(13)) { + err = devlink_fmsg_string_put(fmsg, "\n\tFault on NIX_AQ_RES_S write"); + if (err) + return err; + } + if (intr_val & BIT_ULL(12)) { + err = devlink_fmsg_string_put(fmsg, "\n\tAQ Doorbell Error"); + if (err) + return err; + } + if (intr_val & BIT_ULL(6)) { + err = devlink_fmsg_string_put(fmsg, "\n\tRx on unmapped PF_FUNC"); + if (err) + return err; + } + if (intr_val & BIT_ULL(5)) { + err = devlink_fmsg_string_put(fmsg, "\n\tRx multicast replication error"); + if (err) + return err; + } + if (intr_val & BIT_ULL(4)) { + err = devlink_fmsg_string_put(fmsg, "\n\tFault on NIX_RX_MCE_S read"); + if (err) + return err; + } + if (intr_val & BIT_ULL(3)) { + err = devlink_fmsg_string_put(fmsg, "\n\tFault on multicast WQE read"); + if (err) + return err; + } + if (intr_val & BIT_ULL(2)) { + err = devlink_fmsg_string_put(fmsg, "\n\tFault on mirror WQE read"); + if (err) + return err; + } + if (intr_val & BIT_ULL(1)) { + err = devlink_fmsg_string_put(fmsg, "\n\tFault on mirror pkt write"); + if (err) + return err; + } + if (intr_val & BIT_ULL(0)) { + err = devlink_fmsg_string_put(fmsg, "\n\tFault on multicast pkt write"); + if (err) + return err; + } + err = rvu_report_pair_end(fmsg); + if (err) + return err; + break; + case NIX_AF_RVU_RAS: + intr_val = nix_event_context->nix_af_rvu_err; + err = rvu_report_pair_start(fmsg, "NIX_AF_RAS"); + if (err) + return err; + err = devlink_fmsg_u64_pair_put(fmsg, "\tNIX RAS Interrupt Reg ", + nix_event_context->nix_af_rvu_err); + if (err) + return err; + err = devlink_fmsg_string_put(fmsg, "\n\tPoison Data on:"); + if (err) + return err; + if (intr_val & BIT_ULL(34)) { + err = devlink_fmsg_string_put(fmsg, "\n\tNIX_AQ_INST_S"); + if (err) + return err; + } + if (intr_val & BIT_ULL(33)) { + err = devlink_fmsg_string_put(fmsg, "\n\tNIX_AQ_RES_S"); + if (err) + return err; + } + if (intr_val & BIT_ULL(32)) { + err = devlink_fmsg_string_put(fmsg, "\n\tHW ctx"); + if (err) + return err; + } + if (intr_val & BIT_ULL(4)) { + err = devlink_fmsg_string_put(fmsg, "\n\tPacket from mirror buffer"); + if (err) + return err; + } + if (intr_val & BIT_ULL(3)) { + err = devlink_fmsg_string_put(fmsg, "\n\tPacket from multicast buffer"); + + if (err) + return err; + } + if (intr_val & BIT_ULL(2)) { + err = devlink_fmsg_string_put(fmsg, "\n\tWQE read from mirror buffer"); + if (err) + return err; + } + if (intr_val & BIT_ULL(1)) { + err = devlink_fmsg_string_put(fmsg, "\n\tWQE read from multicast buffer"); + if (err) + return err; + } + if (intr_val & BIT_ULL(0)) { + err = devlink_fmsg_string_put(fmsg, "\n\tNIX_RX_MCE_S read"); + if (err) + return err; + } + err = rvu_report_pair_end(fmsg); + if (err) + return err; + break; + default: + return -EINVAL; + } + + return 0; +} + +static int rvu_hw_nix_intr_dump(struct devlink_health_reporter *reporter, + struct devlink_fmsg *fmsg, void *ctx, + struct netlink_ext_ack *netlink_extack) +{ + struct rvu *rvu = devlink_health_reporter_priv(reporter); + struct rvu_devlink *rvu_dl = rvu->rvu_dl; + struct rvu_nix_event_ctx *nix_ctx; + + nix_ctx = rvu_dl->rvu_nix_health_reporter->nix_event_ctx; + + return ctx ? rvu_nix_report_show(fmsg, ctx, NIX_AF_RVU_INTR) : + rvu_nix_report_show(fmsg, nix_ctx, NIX_AF_RVU_INTR); +} + +static int rvu_hw_nix_intr_recover(struct devlink_health_reporter *reporter, + void *ctx, struct netlink_ext_ack *netlink_extack) +{ + struct rvu *rvu = devlink_health_reporter_priv(reporter); + struct rvu_nix_event_ctx *nix_event_ctx = ctx; + int blkaddr; + + blkaddr = rvu_get_blkaddr(rvu, BLKTYPE_NIX, 0); + if (blkaddr < 0) + return blkaddr; + + if (nix_event_ctx->nix_af_rvu_int) + rvu_write64(rvu, blkaddr, NIX_AF_RVU_INT_ENA_W1S, ~0ULL); + + return 0; +} + +static int rvu_hw_nix_gen_dump(struct devlink_health_reporter *reporter, + struct devlink_fmsg *fmsg, void *ctx, + struct netlink_ext_ack *netlink_extack) +{ + struct rvu *rvu = devlink_health_reporter_priv(reporter); + struct rvu_devlink *rvu_dl = rvu->rvu_dl; + struct rvu_nix_event_ctx *nix_ctx; + + nix_ctx = rvu_dl->rvu_nix_health_reporter->nix_event_ctx; + + return ctx ? rvu_nix_report_show(fmsg, ctx, NIX_AF_RVU_GEN) : + rvu_nix_report_show(fmsg, nix_ctx, NIX_AF_RVU_GEN); +} + +static int rvu_hw_nix_gen_recover(struct devlink_health_reporter *reporter, + void *ctx, struct netlink_ext_ack *netlink_extack) +{ + struct rvu *rvu = devlink_health_reporter_priv(reporter); + struct rvu_nix_event_ctx *nix_event_ctx = ctx; + int blkaddr; + + blkaddr = rvu_get_blkaddr(rvu, BLKTYPE_NIX, 0); + if (blkaddr < 0) + return blkaddr; + + if (nix_event_ctx->nix_af_rvu_gen) + rvu_write64(rvu, blkaddr, NIX_AF_GEN_INT_ENA_W1S, ~0ULL); + + return 0; +} + +static int rvu_hw_nix_err_dump(struct devlink_health_reporter *reporter, + struct devlink_fmsg *fmsg, void *ctx, + struct netlink_ext_ack *netlink_extack) +{ + struct rvu *rvu = devlink_health_reporter_priv(reporter); + struct rvu_devlink *rvu_dl = rvu->rvu_dl; + struct rvu_nix_event_ctx *nix_ctx; + + nix_ctx = rvu_dl->rvu_nix_health_reporter->nix_event_ctx; + + return ctx ? rvu_nix_report_show(fmsg, ctx, NIX_AF_RVU_ERR) : + rvu_nix_report_show(fmsg, nix_ctx, NIX_AF_RVU_ERR); +} + +static int rvu_hw_nix_err_recover(struct devlink_health_reporter *reporter, + void *ctx, struct netlink_ext_ack *netlink_extack) +{ + struct rvu *rvu = devlink_health_reporter_priv(reporter); + struct rvu_nix_event_ctx *nix_event_ctx = ctx; + int blkaddr; + + blkaddr = rvu_get_blkaddr(rvu, BLKTYPE_NIX, 0); + if (blkaddr < 0) + return blkaddr; + + if (nix_event_ctx->nix_af_rvu_err) + rvu_write64(rvu, blkaddr, NIX_AF_ERR_INT_ENA_W1S, ~0ULL); + + return 0; +} + +static int rvu_hw_nix_ras_dump(struct devlink_health_reporter *reporter, + struct devlink_fmsg *fmsg, void *ctx, + struct netlink_ext_ack *netlink_extack) +{ + struct rvu *rvu = devlink_health_reporter_priv(reporter); + struct rvu_devlink *rvu_dl = rvu->rvu_dl; + struct rvu_nix_event_ctx *nix_ctx; + + nix_ctx = rvu_dl->rvu_nix_health_reporter->nix_event_ctx; + + return ctx ? rvu_nix_report_show(fmsg, ctx, NIX_AF_RVU_RAS) : + rvu_nix_report_show(fmsg, nix_ctx, NIX_AF_RVU_RAS); +} + +static int rvu_hw_nix_ras_recover(struct devlink_health_reporter *reporter, + void *ctx, struct netlink_ext_ack *netlink_extack) +{ + struct rvu *rvu = devlink_health_reporter_priv(reporter); + struct rvu_nix_event_ctx *nix_event_ctx = ctx; + int blkaddr; + + blkaddr = rvu_get_blkaddr(rvu, BLKTYPE_NIX, 0); + if (blkaddr < 0) + return blkaddr; + + if (nix_event_ctx->nix_af_rvu_int) + rvu_write64(rvu, blkaddr, NIX_AF_RAS_ENA_W1S, ~0ULL); + + return 0; +} + +RVU_REPORTERS(hw_nix_intr); +RVU_REPORTERS(hw_nix_gen); +RVU_REPORTERS(hw_nix_err); +RVU_REPORTERS(hw_nix_ras); + +static void rvu_nix_health_reporters_destroy(struct rvu_devlink *rvu_dl); + +static int rvu_nix_register_reporters(struct rvu_devlink *rvu_dl) +{ + struct rvu_nix_health_reporters *rvu_reporters; + struct rvu_nix_event_ctx *nix_event_context; + struct rvu *rvu = rvu_dl->rvu; + + rvu_reporters = kzalloc(sizeof(*rvu_reporters), GFP_KERNEL); + if (!rvu_reporters) + return -ENOMEM; + + rvu_dl->rvu_nix_health_reporter = rvu_reporters; + nix_event_context = kzalloc(sizeof(*nix_event_context), GFP_KERNEL); + if (!nix_event_context) + return -ENOMEM; + + rvu_reporters->nix_event_ctx = nix_event_context; + rvu_reporters->rvu_hw_nix_intr_reporter = + devlink_health_reporter_create(rvu_dl->dl, &rvu_hw_nix_intr_reporter_ops, 0, rvu); + if (IS_ERR(rvu_reporters->rvu_hw_nix_intr_reporter)) { + dev_warn(rvu->dev, "Failed to create hw_nix_intr reporter, err=%ld\n", + PTR_ERR(rvu_reporters->rvu_hw_nix_intr_reporter)); + return PTR_ERR(rvu_reporters->rvu_hw_nix_intr_reporter); + } + + rvu_reporters->rvu_hw_nix_gen_reporter = + devlink_health_reporter_create(rvu_dl->dl, &rvu_hw_nix_gen_reporter_ops, 0, rvu); + if (IS_ERR(rvu_reporters->rvu_hw_nix_gen_reporter)) { + dev_warn(rvu->dev, "Failed to create hw_nix_gen reporter, err=%ld\n", + PTR_ERR(rvu_reporters->rvu_hw_nix_gen_reporter)); + return PTR_ERR(rvu_reporters->rvu_hw_nix_gen_reporter); + } + + rvu_reporters->rvu_hw_nix_err_reporter = + devlink_health_reporter_create(rvu_dl->dl, &rvu_hw_nix_err_reporter_ops, 0, rvu); + if (IS_ERR(rvu_reporters->rvu_hw_nix_err_reporter)) { + dev_warn(rvu->dev, "Failed to create hw_nix_err reporter, err=%ld\n", + PTR_ERR(rvu_reporters->rvu_hw_nix_err_reporter)); + return PTR_ERR(rvu_reporters->rvu_hw_nix_err_reporter); + } + + rvu_reporters->rvu_hw_nix_ras_reporter = + devlink_health_reporter_create(rvu_dl->dl, &rvu_hw_nix_ras_reporter_ops, 0, rvu); + if (IS_ERR(rvu_reporters->rvu_hw_nix_ras_reporter)) { + dev_warn(rvu->dev, "Failed to create hw_nix_ras reporter, err=%ld\n", + PTR_ERR(rvu_reporters->rvu_hw_nix_ras_reporter)); + return PTR_ERR(rvu_reporters->rvu_hw_nix_ras_reporter); + } + + rvu_dl->devlink_wq = create_workqueue("rvu_devlink_wq"); + if (!rvu_dl->devlink_wq) + goto err; + + INIT_WORK(&rvu_reporters->intr_work, rvu_nix_intr_work); + INIT_WORK(&rvu_reporters->gen_work, rvu_nix_gen_work); + INIT_WORK(&rvu_reporters->err_work, rvu_nix_err_work); + INIT_WORK(&rvu_reporters->ras_work, rvu_nix_ras_work); + + return 0; +err: + rvu_nix_health_reporters_destroy(rvu_dl); + return -ENOMEM; +} + +static int rvu_nix_health_reporters_create(struct rvu_devlink *rvu_dl) +{ + struct rvu *rvu = rvu_dl->rvu; + int err; + + err = rvu_nix_register_reporters(rvu_dl); + if (err) { + dev_warn(rvu->dev, "Failed to create nix reporter, err =%d\n", + err); + return err; + } + rvu_nix_register_interrupts(rvu); + + return 0; +} + +static void rvu_nix_health_reporters_destroy(struct rvu_devlink *rvu_dl) +{ + struct rvu_nix_health_reporters *nix_reporters; + struct rvu *rvu = rvu_dl->rvu; + + nix_reporters = rvu_dl->rvu_nix_health_reporter; + + if (!nix_reporters->rvu_hw_nix_ras_reporter) + return; + if (!IS_ERR_OR_NULL(nix_reporters->rvu_hw_nix_intr_reporter)) + devlink_health_reporter_destroy(nix_reporters->rvu_hw_nix_intr_reporter); + + if (!IS_ERR_OR_NULL(nix_reporters->rvu_hw_nix_gen_reporter)) + devlink_health_reporter_destroy(nix_reporters->rvu_hw_nix_gen_reporter); + + if (!IS_ERR_OR_NULL(nix_reporters->rvu_hw_nix_err_reporter)) + devlink_health_reporter_destroy(nix_reporters->rvu_hw_nix_err_reporter); + + if (!IS_ERR_OR_NULL(nix_reporters->rvu_hw_nix_ras_reporter)) + devlink_health_reporter_destroy(nix_reporters->rvu_hw_nix_ras_reporter); + + rvu_nix_unregister_interrupts(rvu); + kfree(rvu_dl->rvu_nix_health_reporter->nix_event_ctx); + kfree(rvu_dl->rvu_nix_health_reporter); +} + static void rvu_npa_intr_work(struct work_struct *work) { struct rvu_npa_health_reporters *rvu_npa_health_reporter; @@ -698,9 +1342,14 @@ static void rvu_npa_health_reporters_destroy(struct rvu_devlink *rvu_dl) static int rvu_health_reporters_create(struct rvu *rvu) { struct rvu_devlink *rvu_dl; + int err; rvu_dl = rvu->rvu_dl; - return rvu_npa_health_reporters_create(rvu_dl); + err = rvu_npa_health_reporters_create(rvu_dl); + if (err) + return err; + + return rvu_nix_health_reporters_create(rvu_dl); } static void rvu_health_reporters_destroy(struct rvu *rvu) @@ -712,6 +1361,7 @@ static void rvu_health_reporters_destroy(struct rvu *rvu) rvu_dl = rvu->rvu_dl; rvu_npa_health_reporters_destroy(rvu_dl); + rvu_nix_health_reporters_destroy(rvu_dl); } static int rvu_devlink_info_get(struct devlink *devlink, struct devlink_info_req *req, diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_devlink.h b/drivers/net/ethernet/marvell/octeontx2/af/rvu_devlink.h index d7578fa92ac1..471e57dedb20 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_devlink.h +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_devlink.h @@ -41,11 +41,38 @@ struct rvu_npa_health_reporters { struct work_struct ras_work; }; +enum nix_af_rvu_health { + NIX_AF_RVU_INTR, + NIX_AF_RVU_GEN, + NIX_AF_RVU_ERR, + NIX_AF_RVU_RAS, +}; + +struct rvu_nix_event_ctx { + u64 nix_af_rvu_int; + u64 nix_af_rvu_gen; + u64 nix_af_rvu_err; + u64 nix_af_rvu_ras; +}; + +struct rvu_nix_health_reporters { + struct rvu_nix_event_ctx *nix_event_ctx; + struct devlink_health_reporter *rvu_hw_nix_intr_reporter; + struct work_struct intr_work; + struct devlink_health_reporter *rvu_hw_nix_gen_reporter; + struct work_struct gen_work; + struct devlink_health_reporter *rvu_hw_nix_err_reporter; + struct work_struct err_work; + struct devlink_health_reporter *rvu_hw_nix_ras_reporter; + struct work_struct ras_work; +}; + struct rvu_devlink { struct devlink *dl; struct rvu *rvu; struct workqueue_struct *devlink_wq; struct rvu_npa_health_reporters *rvu_npa_health_reporter; + struct rvu_nix_health_reporters *rvu_nix_health_reporter; }; /* Devlink APIs */ diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c index a8dfbb6d1774..b54753ef7d94 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c @@ -2580,6 +2580,7 @@ static int set_flowkey_fields(struct nix_rx_flowkey_alg *alg, u32 flow_cfg) struct nix_rx_flowkey_alg *field; struct nix_rx_flowkey_alg tmp; u32 key_type, valid_key; + int l4_key_offset; if (!alg) return -EINVAL; @@ -2712,6 +2713,12 @@ static int set_flowkey_fields(struct nix_rx_flowkey_alg *alg, u32 flow_cfg) field_marker = false; keyoff_marker = false; } + + /* TCP/UDP/SCTP and ESP/AH falls at same offset so + * remember the TCP key offset of 40 byte hash key. + */ + if (key_type == NIX_FLOW_KEY_TYPE_TCP) + l4_key_offset = key_off; break; case NIX_FLOW_KEY_TYPE_NVGRE: field->lid = NPC_LID_LD; @@ -2783,11 +2790,31 @@ static int set_flowkey_fields(struct nix_rx_flowkey_alg *alg, u32 flow_cfg) field->ltype_mask = 0xF; field->fn_mask = 1; /* Mask out the first nibble */ break; + case NIX_FLOW_KEY_TYPE_AH: + case NIX_FLOW_KEY_TYPE_ESP: + field->hdr_offset = 0; + field->bytesm1 = 7; /* SPI + sequence number */ + field->ltype_mask = 0xF; + field->lid = NPC_LID_LE; + field->ltype_match = NPC_LT_LE_ESP; + if (key_type == NIX_FLOW_KEY_TYPE_AH) { + field->lid = NPC_LID_LD; + field->ltype_match = NPC_LT_LD_AH; + field->hdr_offset = 4; + keyoff_marker = false; + } + break; } field->ena = 1; /* Found a valid flow key type */ if (valid_key) { + /* Use the key offset of TCP/UDP/SCTP fields + * for ESP/AH fields. + */ + if (key_type == NIX_FLOW_KEY_TYPE_ESP || + key_type == NIX_FLOW_KEY_TYPE_AH) + key_off = l4_key_offset; field->key_offset = key_off; memcpy(&alg[nr_field], field, sizeof(*field)); max_key_off = max(max_key_off, field->bytesm1 + 1); diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_struct.h b/drivers/net/ethernet/marvell/octeontx2/af/rvu_struct.h index e2153d47c373..5e15f4fc11e3 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_struct.h +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_struct.h @@ -74,6 +74,16 @@ enum npa_af_int_vec_e { NPA_AF_INT_VEC_CNT = 0x5, }; +/* NIX Admin function Interrupt Vector Enumeration */ +enum nix_af_int_vec_e { + NIX_AF_INT_VEC_RVU = 0x0, + NIX_AF_INT_VEC_GEN = 0x1, + NIX_AF_INT_VEC_AQ_DONE = 0x2, + NIX_AF_INT_VEC_AF_ERR = 0x3, + NIX_AF_INT_VEC_POISON = 0x4, + NIX_AF_INT_VEC_CNT = 0x5, +}; + /** * RVU PF Interrupt Vector Enumeration */ diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_ethtool.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_ethtool.c index aaba0454d188..e0199f0e4a6c 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_ethtool.c +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_ethtool.c @@ -448,10 +448,14 @@ static int otx2_get_rss_hash_opts(struct otx2_nic *pfvf, nfc->data |= RXH_L4_B_0_1 | RXH_L4_B_2_3; break; case AH_ESP_V4_FLOW: + case AH_ESP_V6_FLOW: + if (rss->flowkey_cfg & NIX_FLOW_KEY_TYPE_ESP) + nfc->data |= RXH_L4_B_0_1 | RXH_L4_B_2_3; + break; case AH_V4_FLOW: case ESP_V4_FLOW: case IPV4_FLOW: - case AH_ESP_V6_FLOW: + break; case AH_V6_FLOW: case ESP_V6_FLOW: case IPV6_FLOW: @@ -459,6 +463,7 @@ static int otx2_get_rss_hash_opts(struct otx2_nic *pfvf, default: return -EINVAL; } + return 0; } @@ -527,6 +532,36 @@ static int otx2_set_rss_hash_opts(struct otx2_nic *pfvf, return -EINVAL; } break; + case AH_ESP_V4_FLOW: + case AH_ESP_V6_FLOW: + switch (nfc->data & rxh_l4) { + case 0: + rss_cfg &= ~(NIX_FLOW_KEY_TYPE_ESP | + NIX_FLOW_KEY_TYPE_AH); + rss_cfg |= NIX_FLOW_KEY_TYPE_VLAN | + NIX_FLOW_KEY_TYPE_IPV4_PROTO; + break; + case (RXH_L4_B_0_1 | RXH_L4_B_2_3): + /* If VLAN hashing is also requested for ESP then do not + * allow because of hardware 40 bytes flow key limit. + */ + if (rss_cfg & NIX_FLOW_KEY_TYPE_VLAN) { + netdev_err(pfvf->netdev, + "RSS hash of ESP or AH with VLAN is not supported\n"); + return -EOPNOTSUPP; + } + + rss_cfg |= NIX_FLOW_KEY_TYPE_ESP | NIX_FLOW_KEY_TYPE_AH; + /* Disable IPv4 proto hashing since IPv6 SA+DA(32 bytes) + * and ESP SPI+sequence(8 bytes) uses hardware maximum + * limit of 40 byte flow key. + */ + rss_cfg &= ~NIX_FLOW_KEY_TYPE_IPV4_PROTO; + break; + default: + return -EINVAL; + } + break; case IPV4_FLOW: case IPV6_FLOW: rss_cfg = NIX_FLOW_KEY_TYPE_IPV4 | NIX_FLOW_KEY_TYPE_IPV6; diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c index 6d2d60675ffd..01d3ee4b5829 100644 --- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c +++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c @@ -353,7 +353,7 @@ static void mtk_mac_config(struct phylink_config *config, unsigned int mode, /* Setup gmac */ mcr_cur = mtk_r32(mac->hw, MTK_MAC_MCR(mac->id)); mcr_new = mcr_cur; - mcr_new |= MAC_MCR_MAX_RX_1536 | MAC_MCR_IPG_CFG | MAC_MCR_FORCE_MODE | + mcr_new |= MAC_MCR_IPG_CFG | MAC_MCR_FORCE_MODE | MAC_MCR_BACKOFF_EN | MAC_MCR_BACKPR_EN | MAC_MCR_FORCE_LINK; /* Only update control register when needed! */ @@ -759,8 +759,8 @@ static void mtk_get_stats64(struct net_device *dev, static inline int mtk_max_frag_size(int mtu) { /* make sure buf_size will be at least MTK_MAX_RX_LENGTH */ - if (mtu + MTK_RX_ETH_HLEN < MTK_MAX_RX_LENGTH) - mtu = MTK_MAX_RX_LENGTH - MTK_RX_ETH_HLEN; + if (mtu + MTK_RX_ETH_HLEN < MTK_MAX_RX_LENGTH_2K) + mtu = MTK_MAX_RX_LENGTH_2K - MTK_RX_ETH_HLEN; return SKB_DATA_ALIGN(MTK_RX_HLEN + mtu) + SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); @@ -771,7 +771,7 @@ static inline int mtk_max_buf_size(int frag_size) int buf_size = frag_size - NET_SKB_PAD - NET_IP_ALIGN - SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); - WARN_ON(buf_size < MTK_MAX_RX_LENGTH); + WARN_ON(buf_size < MTK_MAX_RX_LENGTH_2K); return buf_size; } @@ -2499,6 +2499,35 @@ static void mtk_uninit(struct net_device *dev) mtk_rx_irq_disable(eth, ~0); } +static int mtk_change_mtu(struct net_device *dev, int new_mtu) +{ + int length = new_mtu + MTK_RX_ETH_HLEN; + struct mtk_mac *mac = netdev_priv(dev); + struct mtk_eth *eth = mac->hw; + u32 mcr_cur, mcr_new; + + if (!MTK_HAS_CAPS(eth->soc->caps, MTK_SOC_MT7628)) { + mcr_cur = mtk_r32(mac->hw, MTK_MAC_MCR(mac->id)); + mcr_new = mcr_cur & ~MAC_MCR_MAX_RX_MASK; + + if (length <= 1518) + mcr_new |= MAC_MCR_MAX_RX(MAC_MCR_MAX_RX_1518); + else if (length <= 1536) + mcr_new |= MAC_MCR_MAX_RX(MAC_MCR_MAX_RX_1536); + else if (length <= 1552) + mcr_new |= MAC_MCR_MAX_RX(MAC_MCR_MAX_RX_1552); + else + mcr_new |= MAC_MCR_MAX_RX(MAC_MCR_MAX_RX_2048); + + if (mcr_new != mcr_cur) + mtk_w32(mac->hw, mcr_new, MTK_MAC_MCR(mac->id)); + } + + dev->mtu = new_mtu; + + return 0; +} + static int mtk_do_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) { struct mtk_mac *mac = netdev_priv(dev); @@ -2795,6 +2824,7 @@ static const struct net_device_ops mtk_netdev_ops = { .ndo_set_mac_address = mtk_set_mac_address, .ndo_validate_addr = eth_validate_addr, .ndo_do_ioctl = mtk_do_ioctl, + .ndo_change_mtu = mtk_change_mtu, .ndo_tx_timeout = mtk_tx_timeout, .ndo_get_stats64 = mtk_get_stats64, .ndo_fix_features = mtk_fix_features, @@ -2896,7 +2926,10 @@ static int mtk_add_mac(struct mtk_eth *eth, struct device_node *np) eth->netdev[id]->irq = eth->irq[0]; eth->netdev[id]->dev.of_node = np; - eth->netdev[id]->max_mtu = MTK_MAX_RX_LENGTH - MTK_RX_ETH_HLEN; + if (MTK_HAS_CAPS(eth->soc->caps, MTK_SOC_MT7628)) + eth->netdev[id]->max_mtu = MTK_MAX_RX_LENGTH - MTK_RX_ETH_HLEN; + else + eth->netdev[id]->max_mtu = MTK_MAX_RX_LENGTH_2K - MTK_RX_ETH_HLEN; return 0; diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.h b/drivers/net/ethernet/mediatek/mtk_eth_soc.h index 454cfcd465fd..fd3cec8f06ba 100644 --- a/drivers/net/ethernet/mediatek/mtk_eth_soc.h +++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.h @@ -17,12 +17,13 @@ #include <linux/phylink.h> #define MTK_QDMA_PAGE_SIZE 2048 -#define MTK_MAX_RX_LENGTH 1536 +#define MTK_MAX_RX_LENGTH 1536 +#define MTK_MAX_RX_LENGTH_2K 2048 #define MTK_TX_DMA_BUF_LEN 0x3fff #define MTK_DMA_SIZE 256 #define MTK_NAPI_WEIGHT 64 #define MTK_MAC_COUNT 2 -#define MTK_RX_ETH_HLEN (VLAN_ETH_HLEN + VLAN_HLEN + ETH_FCS_LEN) +#define MTK_RX_ETH_HLEN (ETH_HLEN + ETH_FCS_LEN) #define MTK_RX_HLEN (NET_SKB_PAD + MTK_RX_ETH_HLEN + NET_IP_ALIGN) #define MTK_DMA_DUMMY_DESC 0xffffffff #define MTK_DEFAULT_MSG_ENABLE (NETIF_MSG_DRV | \ @@ -320,7 +321,12 @@ /* Mac control registers */ #define MTK_MAC_MCR(x) (0x10100 + (x * 0x100)) -#define MAC_MCR_MAX_RX_1536 BIT(24) +#define MAC_MCR_MAX_RX_MASK GENMASK(25, 24) +#define MAC_MCR_MAX_RX(_x) (MAC_MCR_MAX_RX_MASK & ((_x) << 24)) +#define MAC_MCR_MAX_RX_1518 0x0 +#define MAC_MCR_MAX_RX_1536 0x1 +#define MAC_MCR_MAX_RX_1552 0x2 +#define MAC_MCR_MAX_RX_2048 0x3 #define MAC_MCR_IPG_CFG (BIT(18) | BIT(16)) #define MAC_MCR_FORCE_MODE BIT(15) #define MAC_MCR_TX_EN BIT(14) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Makefile b/drivers/net/ethernet/mellanox/mlx5/core/Makefile index 134bd038ae8a..fcfc0b114985 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/Makefile +++ b/drivers/net/ethernet/mellanox/mlx5/core/Makefile @@ -16,7 +16,8 @@ mlx5_core-y := main.o cmd.o debugfs.o fw.o eq.o uar.o pagealloc.o \ transobj.o vport.o sriov.o fs_cmd.o fs_core.o pci_irq.o \ fs_counters.o rl.o lag.o dev.o events.o wq.o lib/gid.o \ lib/devcom.o lib/pci_vsc.o lib/dm.o diag/fs_tracepoint.o \ - diag/fw_tracer.o diag/crdump.o devlink.o diag/rsc_dump.o fw_reset.o + diag/fw_tracer.o diag/crdump.o devlink.o diag/rsc_dump.o \ + fw_reset.o qos.o # # Netdev basic @@ -25,7 +26,8 @@ mlx5_core-$(CONFIG_MLX5_CORE_EN) += en_main.o en_common.o en_fs.o en_ethtool.o \ en_tx.o en_rx.o en_dim.o en_txrx.o en/xdp.o en_stats.o \ en_selftest.o en/port.o en/monitor_stats.o en/health.o \ en/reporter_tx.o en/reporter_rx.o en/params.o en/xsk/pool.o \ - en/xsk/setup.o en/xsk/rx.o en/xsk/tx.o en/devlink.o en/ptp.o + en/xsk/setup.o en/xsk/rx.o en/xsk/tx.o en/devlink.o en/ptp.o \ + en/qos.o # # Netdev extra diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h index 055baf3b6cb1..26e578a973e5 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h @@ -55,6 +55,7 @@ #include "en_stats.h" #include "en/dcbnl.h" #include "en/fs.h" +#include "en/qos.h" #include "lib/hv_vhca.h" extern const struct net_device_ops mlx5e_netdev_ops; @@ -161,6 +162,9 @@ do { \ ##__VA_ARGS__); \ } while (0) +#define mlx5e_state_dereference(priv, p) \ + rcu_dereference_protected((p), lockdep_is_held(&(priv)->state_lock)) + enum mlx5e_rq_group { MLX5E_RQ_GROUP_REGULAR, MLX5E_RQ_GROUP_XSK, @@ -663,11 +667,13 @@ struct mlx5e_channel { struct mlx5e_xdpsq rq_xdpsq; struct mlx5e_txqsq sq[MLX5E_MAX_NUM_TC]; struct mlx5e_icosq icosq; /* internal control operations */ + struct mlx5e_txqsq __rcu * __rcu *qos_sqs; bool xdp; struct napi_struct napi; struct device *pdev; struct net_device *netdev; __be32 mkey_be; + u16 qos_sqs_size; u8 num_tc; u8 lag_port; @@ -756,6 +762,8 @@ struct mlx5e_modify_sq_param { int next_state; int rl_update; int rl_index; + bool qos_update; + u16 qos_queue_group_id; }; #if IS_ENABLED(CONFIG_PCI_HYPERV_INTERFACE) @@ -788,10 +796,20 @@ struct mlx5e_scratchpad { cpumask_var_t cpumask; }; +struct mlx5e_htb { + DECLARE_HASHTABLE(qos_tc2node, order_base_2(MLX5E_QOS_MAX_LEAF_NODES)); + DECLARE_BITMAP(qos_used_qids, MLX5E_QOS_MAX_LEAF_NODES); + struct mlx5e_sq_stats **qos_sq_stats; + u16 max_qos_sqs; + u16 maj_id; + u16 defcls; +}; + struct mlx5e_priv { /* priv data path fields - start */ /* +1 for port ptp ts */ - struct mlx5e_txqsq *txq2sq[(MLX5E_MAX_NUM_CHANNELS + 1) * MLX5E_MAX_NUM_TC]; + struct mlx5e_txqsq *txq2sq[(MLX5E_MAX_NUM_CHANNELS + 1) * MLX5E_MAX_NUM_TC + + MLX5E_QOS_MAX_LEAF_NODES]; int channel_tc2realtxq[MLX5E_MAX_NUM_CHANNELS][MLX5E_MAX_NUM_TC]; int port_ptp_tc2realtxq[MLX5E_MAX_NUM_TC]; #ifdef CONFIG_MLX5_CORE_EN_DCB @@ -859,6 +877,7 @@ struct mlx5e_priv { struct mlx5e_hv_vhca_stats_agent stats_agent; #endif struct mlx5e_scratchpad scratchpad; + struct mlx5e_htb htb; }; struct mlx5e_rx_handlers { @@ -986,6 +1005,7 @@ int mlx5e_safe_switch_channels(struct mlx5e_priv *priv, struct mlx5e_channels *new_chs, mlx5e_fp_preactivate preactivate, void *context); +int mlx5e_update_tx_netdev_queues(struct mlx5e_priv *priv); int mlx5e_num_channels_changed(struct mlx5e_priv *priv); int mlx5e_num_channels_changed_ctx(struct mlx5e_priv *priv, void *context); void mlx5e_activate_priv_channels(struct mlx5e_priv *priv); @@ -1010,6 +1030,9 @@ void mlx5e_deactivate_icosq(struct mlx5e_icosq *icosq); int mlx5e_modify_sq(struct mlx5_core_dev *mdev, u32 sqn, struct mlx5e_modify_sq_param *p); +int mlx5e_open_txqsq(struct mlx5e_channel *c, u32 tisn, int txq_ix, + struct mlx5e_params *params, struct mlx5e_sq_param *param, + struct mlx5e_txqsq *sq, int tc, u16 qos_queue_group_id, u16 qos_qid); void mlx5e_activate_txqsq(struct mlx5e_txqsq *sq); void mlx5e_deactivate_txqsq(struct mlx5e_txqsq *sq); void mlx5e_free_txqsq(struct mlx5e_txqsq *sq); @@ -1020,8 +1043,10 @@ struct mlx5e_create_sq_param; int mlx5e_create_sq_rdy(struct mlx5_core_dev *mdev, struct mlx5e_sq_param *param, struct mlx5e_create_sq_param *csp, + u16 qos_queue_group_id, u32 *sqn); void mlx5e_tx_err_cqe_work(struct work_struct *recover_work); +void mlx5e_close_txqsq(struct mlx5e_txqsq *sq); static inline bool mlx5_tx_swp_supported(struct mlx5_core_dev *mdev) { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/params.h b/drivers/net/ethernet/mellanox/mlx5/core/en/params.h index 807147d97a0f..ea2cfb04b31a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/params.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/params.h @@ -118,6 +118,8 @@ void mlx5e_build_rq_param(struct mlx5e_priv *priv, struct mlx5e_rq_param *param); void mlx5e_build_sq_param_common(struct mlx5e_priv *priv, struct mlx5e_sq_param *param); +void mlx5e_build_sq_param(struct mlx5e_priv *priv, struct mlx5e_params *params, + struct mlx5e_sq_param *param); void mlx5e_build_rx_cq_param(struct mlx5e_priv *priv, struct mlx5e_params *params, struct mlx5e_xsk_param *xsk, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c b/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c index 2a2bac30daaa..eeddd1137dda 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c @@ -261,7 +261,7 @@ static int mlx5e_ptp_open_txqsq(struct mlx5e_port_ptp *c, u32 tisn, csp.min_inline_mode = txqsq->min_inline_mode; csp.ts_cqe_to_dest_cqn = ptpsq->ts_cq.mcq.cqn; - err = mlx5e_create_sq_rdy(c->mdev, sqp, &csp, &txqsq->sqn); + err = mlx5e_create_sq_rdy(c->mdev, sqp, &csp, 0, &txqsq->sqn); if (err) goto err_free_txqsq; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/qos.c b/drivers/net/ethernet/mellanox/mlx5/core/en/qos.c new file mode 100644 index 000000000000..12d7ad061237 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/qos.c @@ -0,0 +1,984 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* Copyright (c) 2020, Mellanox Technologies inc. All rights reserved. */ + +#include "en.h" +#include "params.h" +#include "../qos.h" + +#define BYTES_IN_MBIT 125000 + +int mlx5e_qos_max_leaf_nodes(struct mlx5_core_dev *mdev) +{ + return min(MLX5E_QOS_MAX_LEAF_NODES, mlx5_qos_max_leaf_nodes(mdev)); +} + +int mlx5e_qos_cur_leaf_nodes(struct mlx5e_priv *priv) +{ + int last = find_last_bit(priv->htb.qos_used_qids, mlx5e_qos_max_leaf_nodes(priv->mdev)); + + return last == mlx5e_qos_max_leaf_nodes(priv->mdev) ? 0 : last + 1; +} + +/* Software representation of the QoS tree (internal to this file) */ + +static int mlx5e_find_unused_qos_qid(struct mlx5e_priv *priv) +{ + int size = mlx5e_qos_max_leaf_nodes(priv->mdev); + int res; + + WARN_ONCE(!mutex_is_locked(&priv->state_lock), "%s: state_lock is not held\n", __func__); + res = find_first_zero_bit(priv->htb.qos_used_qids, size); + + return res == size ? -ENOSPC : res; +} + +struct mlx5e_qos_node { + struct hlist_node hnode; + struct rcu_head rcu; + struct mlx5e_qos_node *parent; + u64 rate; + u32 bw_share; + u32 max_average_bw; + u32 hw_id; + u32 classid; /* 16-bit, except root. */ + u16 qid; +}; + +#define MLX5E_QOS_QID_INNER 0xffff +#define MLX5E_HTB_CLASSID_ROOT 0xffffffff + +static struct mlx5e_qos_node * +mlx5e_sw_node_create_leaf(struct mlx5e_priv *priv, u16 classid, u16 qid, + struct mlx5e_qos_node *parent) +{ + struct mlx5e_qos_node *node; + + node = kzalloc(sizeof(*node), GFP_KERNEL); + if (!node) + return ERR_PTR(-ENOMEM); + + node->parent = parent; + + node->qid = qid; + __set_bit(qid, priv->htb.qos_used_qids); + + node->classid = classid; + hash_add_rcu(priv->htb.qos_tc2node, &node->hnode, classid); + + mlx5e_update_tx_netdev_queues(priv); + + return node; +} + +static struct mlx5e_qos_node *mlx5e_sw_node_create_root(struct mlx5e_priv *priv) +{ + struct mlx5e_qos_node *node; + + node = kzalloc(sizeof(*node), GFP_KERNEL); + if (!node) + return ERR_PTR(-ENOMEM); + + node->qid = MLX5E_QOS_QID_INNER; + node->classid = MLX5E_HTB_CLASSID_ROOT; + hash_add_rcu(priv->htb.qos_tc2node, &node->hnode, node->classid); + + return node; +} + +static struct mlx5e_qos_node *mlx5e_sw_node_find(struct mlx5e_priv *priv, u32 classid) +{ + struct mlx5e_qos_node *node = NULL; + + hash_for_each_possible(priv->htb.qos_tc2node, node, hnode, classid) { + if (node->classid == classid) + break; + } + + return node; +} + +static struct mlx5e_qos_node *mlx5e_sw_node_find_rcu(struct mlx5e_priv *priv, u32 classid) +{ + struct mlx5e_qos_node *node = NULL; + + hash_for_each_possible_rcu(priv->htb.qos_tc2node, node, hnode, classid) { + if (node->classid == classid) + break; + } + + return node; +} + +static void mlx5e_sw_node_delete(struct mlx5e_priv *priv, struct mlx5e_qos_node *node) +{ + hash_del_rcu(&node->hnode); + if (node->qid != MLX5E_QOS_QID_INNER) { + __clear_bit(node->qid, priv->htb.qos_used_qids); + mlx5e_update_tx_netdev_queues(priv); + } + kfree_rcu(node, rcu); +} + +/* TX datapath API */ + +static u16 mlx5e_qid_from_qos(struct mlx5e_channels *chs, u16 qid) +{ + /* These channel params are safe to access from the datapath, because: + * 1. This function is called only after checking priv->htb.maj_id != 0, + * and the number of queues can't change while HTB offload is active. + * 2. When priv->htb.maj_id becomes 0, synchronize_rcu waits for + * mlx5e_select_queue to finish while holding priv->state_lock, + * preventing other code from changing the number of queues. + */ + bool is_ptp = MLX5E_GET_PFLAG(&chs->params, MLX5E_PFLAG_TX_PORT_TS); + + return (chs->params.num_channels + is_ptp) * chs->params.num_tc + qid; +} + +int mlx5e_get_txq_by_classid(struct mlx5e_priv *priv, u16 classid) +{ + struct mlx5e_qos_node *node; + u16 qid; + int res; + + rcu_read_lock(); + + node = mlx5e_sw_node_find_rcu(priv, classid); + if (!node) { + res = -ENOENT; + goto out; + } + qid = READ_ONCE(node->qid); + if (qid == MLX5E_QOS_QID_INNER) { + res = -EINVAL; + goto out; + } + res = mlx5e_qid_from_qos(&priv->channels, qid); + +out: + rcu_read_unlock(); + return res; +} + +static struct mlx5e_txqsq *mlx5e_get_qos_sq(struct mlx5e_priv *priv, int qid) +{ + struct mlx5e_params *params = &priv->channels.params; + struct mlx5e_txqsq __rcu **qos_sqs; + struct mlx5e_channel *c; + int ix; + + ix = qid % params->num_channels; + qid /= params->num_channels; + c = priv->channels.c[ix]; + + qos_sqs = mlx5e_state_dereference(priv, c->qos_sqs); + return mlx5e_state_dereference(priv, qos_sqs[qid]); +} + +/* SQ lifecycle */ + +static int mlx5e_open_qos_sq(struct mlx5e_priv *priv, struct mlx5e_channels *chs, + struct mlx5e_qos_node *node) +{ + struct mlx5e_create_cq_param ccp = {}; + struct mlx5e_txqsq __rcu **qos_sqs; + struct mlx5e_sq_param param_sq; + struct mlx5e_cq_param param_cq; + int txq_ix, ix, qid, err = 0; + struct mlx5e_params *params; + struct mlx5e_channel *c; + struct mlx5e_txqsq *sq; + + params = &chs->params; + + txq_ix = mlx5e_qid_from_qos(chs, node->qid); + + WARN_ON(node->qid > priv->htb.max_qos_sqs); + if (node->qid == priv->htb.max_qos_sqs) { + struct mlx5e_sq_stats *stats, **stats_list = NULL; + + if (priv->htb.max_qos_sqs == 0) { + stats_list = kvcalloc(mlx5e_qos_max_leaf_nodes(priv->mdev), + sizeof(*stats_list), + GFP_KERNEL); + if (!stats_list) + return -ENOMEM; + } + stats = kzalloc(sizeof(*stats), GFP_KERNEL); + if (!stats) { + kvfree(stats_list); + return -ENOMEM; + } + if (stats_list) + WRITE_ONCE(priv->htb.qos_sq_stats, stats_list); + WRITE_ONCE(priv->htb.qos_sq_stats[node->qid], stats); + /* Order max_qos_sqs increment after writing the array pointer. + * Pairs with smp_load_acquire in en_stats.c. + */ + smp_store_release(&priv->htb.max_qos_sqs, priv->htb.max_qos_sqs + 1); + } + + ix = node->qid % params->num_channels; + qid = node->qid / params->num_channels; + c = chs->c[ix]; + + qos_sqs = mlx5e_state_dereference(priv, c->qos_sqs); + sq = kzalloc(sizeof(*sq), GFP_KERNEL); + + if (!sq) + return -ENOMEM; + + mlx5e_build_create_cq_param(&ccp, c); + + memset(¶m_sq, 0, sizeof(param_sq)); + memset(¶m_cq, 0, sizeof(param_cq)); + mlx5e_build_sq_param(priv, params, ¶m_sq); + mlx5e_build_tx_cq_param(priv, params, ¶m_cq); + err = mlx5e_open_cq(priv, params->tx_cq_moderation, ¶m_cq, &ccp, &sq->cq); + if (err) + goto err_free_sq; + err = mlx5e_open_txqsq(c, priv->tisn[c->lag_port][0], txq_ix, params, + ¶m_sq, sq, 0, node->hw_id, node->qid); + if (err) + goto err_close_cq; + + rcu_assign_pointer(qos_sqs[qid], sq); + + return 0; + +err_close_cq: + mlx5e_close_cq(&sq->cq); +err_free_sq: + kfree(sq); + return err; +} + +static void mlx5e_activate_qos_sq(struct mlx5e_priv *priv, struct mlx5e_qos_node *node) +{ + struct mlx5e_txqsq *sq; + + sq = mlx5e_get_qos_sq(priv, node->qid); + + WRITE_ONCE(priv->txq2sq[mlx5e_qid_from_qos(&priv->channels, node->qid)], sq); + + /* Make the change to txq2sq visible before the queue is started. + * As mlx5e_xmit runs under a spinlock, there is an implicit ACQUIRE, + * which pairs with this barrier. + */ + smp_wmb(); + + qos_dbg(priv->mdev, "Activate QoS SQ qid %u\n", node->qid); + mlx5e_activate_txqsq(sq); +} + +static void mlx5e_deactivate_qos_sq(struct mlx5e_priv *priv, u16 qid) +{ + struct mlx5e_txqsq *sq; + + sq = mlx5e_get_qos_sq(priv, qid); + if (!sq) /* Handle the case when the SQ failed to open. */ + return; + + qos_dbg(priv->mdev, "Deactivate QoS SQ qid %u\n", qid); + mlx5e_deactivate_txqsq(sq); + + /* The queue is disabled, no synchronization with datapath is needed. */ + priv->txq2sq[mlx5e_qid_from_qos(&priv->channels, qid)] = NULL; +} + +static void mlx5e_close_qos_sq(struct mlx5e_priv *priv, u16 qid) +{ + struct mlx5e_txqsq __rcu **qos_sqs; + struct mlx5e_params *params; + struct mlx5e_channel *c; + struct mlx5e_txqsq *sq; + int ix; + + params = &priv->channels.params; + + ix = qid % params->num_channels; + qid /= params->num_channels; + c = priv->channels.c[ix]; + qos_sqs = mlx5e_state_dereference(priv, c->qos_sqs); + sq = rcu_replace_pointer(qos_sqs[qid], NULL, lockdep_is_held(&priv->state_lock)); + if (!sq) /* Handle the case when the SQ failed to open. */ + return; + + synchronize_rcu(); /* Sync with NAPI. */ + + mlx5e_close_txqsq(sq); + mlx5e_close_cq(&sq->cq); + kfree(sq); +} + +void mlx5e_qos_close_queues(struct mlx5e_channel *c) +{ + struct mlx5e_txqsq __rcu **qos_sqs; + int i; + + qos_sqs = rcu_replace_pointer(c->qos_sqs, NULL, lockdep_is_held(&c->priv->state_lock)); + if (!qos_sqs) + return; + synchronize_rcu(); /* Sync with NAPI. */ + + for (i = 0; i < c->qos_sqs_size; i++) { + struct mlx5e_txqsq *sq; + + sq = mlx5e_state_dereference(c->priv, qos_sqs[i]); + if (!sq) /* Handle the case when the SQ failed to open. */ + continue; + + mlx5e_close_txqsq(sq); + mlx5e_close_cq(&sq->cq); + kfree(sq); + } + + kvfree(qos_sqs); +} + +static void mlx5e_qos_close_all_queues(struct mlx5e_channels *chs) +{ + int i; + + for (i = 0; i < chs->num; i++) + mlx5e_qos_close_queues(chs->c[i]); +} + +static int mlx5e_qos_alloc_queues(struct mlx5e_priv *priv, struct mlx5e_channels *chs) +{ + u16 qos_sqs_size; + int i; + + qos_sqs_size = DIV_ROUND_UP(mlx5e_qos_max_leaf_nodes(priv->mdev), chs->num); + + for (i = 0; i < chs->num; i++) { + struct mlx5e_txqsq **sqs; + + sqs = kvcalloc(qos_sqs_size, sizeof(struct mlx5e_txqsq *), GFP_KERNEL); + if (!sqs) + goto err_free; + + WRITE_ONCE(chs->c[i]->qos_sqs_size, qos_sqs_size); + smp_wmb(); /* Pairs with mlx5e_napi_poll. */ + rcu_assign_pointer(chs->c[i]->qos_sqs, sqs); + } + + return 0; + +err_free: + while (--i >= 0) { + struct mlx5e_txqsq **sqs; + + sqs = rcu_replace_pointer(chs->c[i]->qos_sqs, NULL, + lockdep_is_held(&priv->state_lock)); + + synchronize_rcu(); /* Sync with NAPI. */ + kvfree(sqs); + } + return -ENOMEM; +} + +int mlx5e_qos_open_queues(struct mlx5e_priv *priv, struct mlx5e_channels *chs) +{ + struct mlx5e_qos_node *node = NULL; + int bkt, err; + + if (!priv->htb.maj_id) + return 0; + + err = mlx5e_qos_alloc_queues(priv, chs); + if (err) + return err; + + hash_for_each(priv->htb.qos_tc2node, bkt, node, hnode) { + if (node->qid == MLX5E_QOS_QID_INNER) + continue; + err = mlx5e_open_qos_sq(priv, chs, node); + if (err) { + mlx5e_qos_close_all_queues(chs); + return err; + } + } + + return 0; +} + +void mlx5e_qos_activate_queues(struct mlx5e_priv *priv) +{ + struct mlx5e_qos_node *node = NULL; + int bkt; + + hash_for_each(priv->htb.qos_tc2node, bkt, node, hnode) { + if (node->qid == MLX5E_QOS_QID_INNER) + continue; + mlx5e_activate_qos_sq(priv, node); + } +} + +void mlx5e_qos_deactivate_queues(struct mlx5e_channel *c) +{ + struct mlx5e_params *params = &c->priv->channels.params; + struct mlx5e_txqsq __rcu **qos_sqs; + int i; + + qos_sqs = mlx5e_state_dereference(c->priv, c->qos_sqs); + if (!qos_sqs) + return; + + for (i = 0; i < c->qos_sqs_size; i++) { + u16 qid = params->num_channels * i + c->ix; + struct mlx5e_txqsq *sq; + + sq = mlx5e_state_dereference(c->priv, qos_sqs[i]); + if (!sq) /* Handle the case when the SQ failed to open. */ + continue; + + qos_dbg(c->mdev, "Deactivate QoS SQ qid %u\n", qid); + mlx5e_deactivate_txqsq(sq); + + /* The queue is disabled, no synchronization with datapath is needed. */ + c->priv->txq2sq[mlx5e_qid_from_qos(&c->priv->channels, qid)] = NULL; + } +} + +static void mlx5e_qos_deactivate_all_queues(struct mlx5e_channels *chs) +{ + int i; + + for (i = 0; i < chs->num; i++) + mlx5e_qos_deactivate_queues(chs->c[i]); +} + +/* HTB API */ + +int mlx5e_htb_root_add(struct mlx5e_priv *priv, u16 htb_maj_id, u16 htb_defcls, + struct netlink_ext_ack *extack) +{ + struct mlx5e_qos_node *root; + bool opened; + int err; + + qos_dbg(priv->mdev, "TC_HTB_CREATE handle %04x:, default :%04x\n", htb_maj_id, htb_defcls); + + if (!mlx5_qos_is_supported(priv->mdev)) { + NL_SET_ERR_MSG_MOD(extack, + "Missing QoS capabilities. Try disabling SRIOV or use a supported device."); + return -EOPNOTSUPP; + } + + opened = test_bit(MLX5E_STATE_OPENED, &priv->state); + if (opened) { + err = mlx5e_qos_alloc_queues(priv, &priv->channels); + if (err) + return err; + } + + root = mlx5e_sw_node_create_root(priv); + if (IS_ERR(root)) { + err = PTR_ERR(root); + goto err_free_queues; + } + + err = mlx5_qos_create_root_node(priv->mdev, &root->hw_id); + if (err) { + NL_SET_ERR_MSG_MOD(extack, "Firmware error. Try upgrading firmware."); + goto err_sw_node_delete; + } + + WRITE_ONCE(priv->htb.defcls, htb_defcls); + /* Order maj_id after defcls - pairs with + * mlx5e_select_queue/mlx5e_select_htb_queues. + */ + smp_store_release(&priv->htb.maj_id, htb_maj_id); + + return 0; + +err_sw_node_delete: + mlx5e_sw_node_delete(priv, root); + +err_free_queues: + if (opened) + mlx5e_qos_close_all_queues(&priv->channels); + return err; +} + +int mlx5e_htb_root_del(struct mlx5e_priv *priv) +{ + struct mlx5e_qos_node *root; + int err; + + qos_dbg(priv->mdev, "TC_HTB_DESTROY\n"); + + WRITE_ONCE(priv->htb.maj_id, 0); + synchronize_rcu(); /* Sync with mlx5e_select_htb_queue and TX data path. */ + + root = mlx5e_sw_node_find(priv, MLX5E_HTB_CLASSID_ROOT); + if (!root) { + qos_err(priv->mdev, "Failed to find the root node in the QoS tree\n"); + return -ENOENT; + } + err = mlx5_qos_destroy_node(priv->mdev, root->hw_id); + if (err) + qos_err(priv->mdev, "Failed to destroy root node %u, err = %d\n", + root->hw_id, err); + mlx5e_sw_node_delete(priv, root); + + mlx5e_qos_deactivate_all_queues(&priv->channels); + mlx5e_qos_close_all_queues(&priv->channels); + + return err; +} + +static int mlx5e_htb_convert_rate(struct mlx5e_priv *priv, u64 rate, + struct mlx5e_qos_node *parent, u32 *bw_share) +{ + u64 share = 0; + + while (parent->classid != MLX5E_HTB_CLASSID_ROOT && !parent->max_average_bw) + parent = parent->parent; + + if (parent->max_average_bw) + share = div64_u64(div_u64(rate * 100, BYTES_IN_MBIT), + parent->max_average_bw); + else + share = 101; + + *bw_share = share == 0 ? 1 : share > 100 ? 0 : share; + + qos_dbg(priv->mdev, "Convert: rate %llu, parent ceil %llu -> bw_share %u\n", + rate, (u64)parent->max_average_bw * BYTES_IN_MBIT, *bw_share); + + return 0; +} + +static void mlx5e_htb_convert_ceil(struct mlx5e_priv *priv, u64 ceil, u32 *max_average_bw) +{ + *max_average_bw = div_u64(ceil, BYTES_IN_MBIT); + + qos_dbg(priv->mdev, "Convert: ceil %llu -> max_average_bw %u\n", + ceil, *max_average_bw); +} + +int mlx5e_htb_leaf_alloc_queue(struct mlx5e_priv *priv, u16 classid, + u32 parent_classid, u64 rate, u64 ceil, + struct netlink_ext_ack *extack) +{ + struct mlx5e_qos_node *node, *parent; + int qid; + int err; + + qos_dbg(priv->mdev, "TC_HTB_LEAF_ALLOC_QUEUE classid %04x, parent %04x, rate %llu, ceil %llu\n", + classid, parent_classid, rate, ceil); + + qid = mlx5e_find_unused_qos_qid(priv); + if (qid < 0) { + NL_SET_ERR_MSG_MOD(extack, "Maximum amount of leaf classes is reached."); + return qid; + } + + parent = mlx5e_sw_node_find(priv, parent_classid); + if (!parent) + return -EINVAL; + + node = mlx5e_sw_node_create_leaf(priv, classid, qid, parent); + if (IS_ERR(node)) + return PTR_ERR(node); + + node->rate = rate; + mlx5e_htb_convert_rate(priv, rate, node->parent, &node->bw_share); + mlx5e_htb_convert_ceil(priv, ceil, &node->max_average_bw); + + err = mlx5_qos_create_leaf_node(priv->mdev, node->parent->hw_id, + node->bw_share, node->max_average_bw, + &node->hw_id); + if (err) { + NL_SET_ERR_MSG_MOD(extack, "Firmware error when creating a leaf node."); + qos_err(priv->mdev, "Failed to create a leaf node (class %04x), err = %d\n", + classid, err); + mlx5e_sw_node_delete(priv, node); + return err; + } + + if (test_bit(MLX5E_STATE_OPENED, &priv->state)) { + err = mlx5e_open_qos_sq(priv, &priv->channels, node); + if (err) { + NL_SET_ERR_MSG_MOD(extack, "Error creating an SQ."); + qos_warn(priv->mdev, "Failed to create a QoS SQ (class %04x), err = %d\n", + classid, err); + } else { + mlx5e_activate_qos_sq(priv, node); + } + } + + return mlx5e_qid_from_qos(&priv->channels, node->qid); +} + +int mlx5e_htb_leaf_to_inner(struct mlx5e_priv *priv, u16 classid, u16 child_classid, + u64 rate, u64 ceil, struct netlink_ext_ack *extack) +{ + struct mlx5e_qos_node *node, *child; + int err, tmp_err; + u32 new_hw_id; + u16 qid; + + qos_dbg(priv->mdev, "TC_HTB_LEAF_TO_INNER classid %04x, upcoming child %04x, rate %llu, ceil %llu\n", + classid, child_classid, rate, ceil); + + node = mlx5e_sw_node_find(priv, classid); + if (!node) + return -ENOENT; + + err = mlx5_qos_create_inner_node(priv->mdev, node->parent->hw_id, + node->bw_share, node->max_average_bw, + &new_hw_id); + if (err) { + NL_SET_ERR_MSG_MOD(extack, "Firmware error when creating an inner node."); + qos_err(priv->mdev, "Failed to create an inner node (class %04x), err = %d\n", + classid, err); + return err; + } + + /* Intentionally reuse the qid for the upcoming first child. */ + child = mlx5e_sw_node_create_leaf(priv, child_classid, node->qid, node); + if (IS_ERR(child)) { + err = PTR_ERR(child); + goto err_destroy_hw_node; + } + + child->rate = rate; + mlx5e_htb_convert_rate(priv, rate, node, &child->bw_share); + mlx5e_htb_convert_ceil(priv, ceil, &child->max_average_bw); + + err = mlx5_qos_create_leaf_node(priv->mdev, new_hw_id, child->bw_share, + child->max_average_bw, &child->hw_id); + if (err) { + NL_SET_ERR_MSG_MOD(extack, "Firmware error when creating a leaf node."); + qos_err(priv->mdev, "Failed to create a leaf node (class %04x), err = %d\n", + classid, err); + goto err_delete_sw_node; + } + + /* No fail point. */ + + qid = node->qid; + /* Pairs with mlx5e_get_txq_by_classid. */ + WRITE_ONCE(node->qid, MLX5E_QOS_QID_INNER); + + if (test_bit(MLX5E_STATE_OPENED, &priv->state)) { + mlx5e_deactivate_qos_sq(priv, qid); + mlx5e_close_qos_sq(priv, qid); + } + + err = mlx5_qos_destroy_node(priv->mdev, node->hw_id); + if (err) /* Not fatal. */ + qos_warn(priv->mdev, "Failed to destroy leaf node %u (class %04x), err = %d\n", + node->hw_id, classid, err); + + node->hw_id = new_hw_id; + + if (test_bit(MLX5E_STATE_OPENED, &priv->state)) { + err = mlx5e_open_qos_sq(priv, &priv->channels, child); + if (err) { + NL_SET_ERR_MSG_MOD(extack, "Error creating an SQ."); + qos_warn(priv->mdev, "Failed to create a QoS SQ (class %04x), err = %d\n", + classid, err); + } else { + mlx5e_activate_qos_sq(priv, child); + } + } + + return 0; + +err_delete_sw_node: + child->qid = MLX5E_QOS_QID_INNER; + mlx5e_sw_node_delete(priv, child); + +err_destroy_hw_node: + tmp_err = mlx5_qos_destroy_node(priv->mdev, new_hw_id); + if (tmp_err) /* Not fatal. */ + qos_warn(priv->mdev, "Failed to roll back creation of an inner node %u (class %04x), err = %d\n", + new_hw_id, classid, tmp_err); + return err; +} + +static struct mlx5e_qos_node *mlx5e_sw_node_find_by_qid(struct mlx5e_priv *priv, u16 qid) +{ + struct mlx5e_qos_node *node = NULL; + int bkt; + + hash_for_each(priv->htb.qos_tc2node, bkt, node, hnode) + if (node->qid == qid) + break; + + return node; +} + +static void mlx5e_reactivate_qos_sq(struct mlx5e_priv *priv, u16 qid, struct netdev_queue *txq) +{ + qos_dbg(priv->mdev, "Reactivate QoS SQ qid %u\n", qid); + netdev_tx_reset_queue(txq); + netif_tx_start_queue(txq); +} + +static void mlx5e_reset_qdisc(struct net_device *dev, u16 qid) +{ + struct netdev_queue *dev_queue = netdev_get_tx_queue(dev, qid); + struct Qdisc *qdisc = dev_queue->qdisc_sleeping; + + if (!qdisc) + return; + + spin_lock_bh(qdisc_lock(qdisc)); + qdisc_reset(qdisc); + spin_unlock_bh(qdisc_lock(qdisc)); +} + +int mlx5e_htb_leaf_del(struct mlx5e_priv *priv, u16 classid, u16 *old_qid, + u16 *new_qid, struct netlink_ext_ack *extack) +{ + struct mlx5e_qos_node *node; + struct netdev_queue *txq; + u16 qid, moved_qid; + bool opened; + int err; + + qos_dbg(priv->mdev, "TC_HTB_LEAF_DEL classid %04x\n", classid); + + *old_qid = *new_qid = 0; + + node = mlx5e_sw_node_find(priv, classid); + if (!node) + return -ENOENT; + + /* Store qid for reuse. */ + qid = node->qid; + + opened = test_bit(MLX5E_STATE_OPENED, &priv->state); + if (opened) { + txq = netdev_get_tx_queue(priv->netdev, + mlx5e_qid_from_qos(&priv->channels, qid)); + mlx5e_deactivate_qos_sq(priv, qid); + mlx5e_close_qos_sq(priv, qid); + } + + err = mlx5_qos_destroy_node(priv->mdev, node->hw_id); + if (err) /* Not fatal. */ + qos_warn(priv->mdev, "Failed to destroy leaf node %u (class %04x), err = %d\n", + node->hw_id, classid, err); + + mlx5e_sw_node_delete(priv, node); + + moved_qid = mlx5e_qos_cur_leaf_nodes(priv); + + if (moved_qid == 0) { + /* The last QoS SQ was just destroyed. */ + if (opened) + mlx5e_reactivate_qos_sq(priv, qid, txq); + return 0; + } + moved_qid--; + + if (moved_qid < qid) { + /* The highest QoS SQ was just destroyed. */ + WARN(moved_qid != qid - 1, "Gaps in queue numeration: destroyed queue %u, the highest queue is %u", + qid, moved_qid); + if (opened) + mlx5e_reactivate_qos_sq(priv, qid, txq); + return 0; + } + + WARN(moved_qid == qid, "Can't move node with qid %u to itself", qid); + qos_dbg(priv->mdev, "Moving QoS SQ %u to %u\n", moved_qid, qid); + + node = mlx5e_sw_node_find_by_qid(priv, moved_qid); + WARN(!node, "Could not find a node with qid %u to move to queue %u", + moved_qid, qid); + + /* Stop traffic to the old queue. */ + WRITE_ONCE(node->qid, MLX5E_QOS_QID_INNER); + __clear_bit(moved_qid, priv->htb.qos_used_qids); + + if (opened) { + txq = netdev_get_tx_queue(priv->netdev, + mlx5e_qid_from_qos(&priv->channels, moved_qid)); + mlx5e_deactivate_qos_sq(priv, moved_qid); + mlx5e_close_qos_sq(priv, moved_qid); + } + + /* Prevent packets from the old class from getting into the new one. */ + mlx5e_reset_qdisc(priv->netdev, moved_qid); + + __set_bit(qid, priv->htb.qos_used_qids); + WRITE_ONCE(node->qid, qid); + + if (test_bit(MLX5E_STATE_OPENED, &priv->state)) { + err = mlx5e_open_qos_sq(priv, &priv->channels, node); + if (err) { + NL_SET_ERR_MSG_MOD(extack, "Error creating an SQ."); + qos_warn(priv->mdev, "Failed to create a QoS SQ (class %04x) while moving qid %u to %u, err = %d\n", + node->classid, moved_qid, qid, err); + } else { + mlx5e_activate_qos_sq(priv, node); + } + } + + mlx5e_update_tx_netdev_queues(priv); + if (opened) + mlx5e_reactivate_qos_sq(priv, moved_qid, txq); + + *old_qid = mlx5e_qid_from_qos(&priv->channels, moved_qid); + *new_qid = mlx5e_qid_from_qos(&priv->channels, qid); + return 0; +} + +int mlx5e_htb_leaf_del_last(struct mlx5e_priv *priv, u16 classid, bool force, + struct netlink_ext_ack *extack) +{ + struct mlx5e_qos_node *node, *parent; + u32 old_hw_id, new_hw_id; + int err, saved_err = 0; + u16 qid; + + qos_dbg(priv->mdev, "TC_HTB_LEAF_DEL_LAST%s classid %04x\n", + force ? "_FORCE" : "", classid); + + node = mlx5e_sw_node_find(priv, classid); + if (!node) + return -ENOENT; + + err = mlx5_qos_create_leaf_node(priv->mdev, node->parent->parent->hw_id, + node->parent->bw_share, + node->parent->max_average_bw, + &new_hw_id); + if (err) { + NL_SET_ERR_MSG_MOD(extack, "Firmware error when creating a leaf node."); + qos_err(priv->mdev, "Failed to create a leaf node (class %04x), err = %d\n", + classid, err); + if (!force) + return err; + saved_err = err; + } + + /* Store qid for reuse and prevent clearing the bit. */ + qid = node->qid; + /* Pairs with mlx5e_get_txq_by_classid. */ + WRITE_ONCE(node->qid, MLX5E_QOS_QID_INNER); + + if (test_bit(MLX5E_STATE_OPENED, &priv->state)) { + mlx5e_deactivate_qos_sq(priv, qid); + mlx5e_close_qos_sq(priv, qid); + } + + /* Prevent packets from the old class from getting into the new one. */ + mlx5e_reset_qdisc(priv->netdev, qid); + + err = mlx5_qos_destroy_node(priv->mdev, node->hw_id); + if (err) /* Not fatal. */ + qos_warn(priv->mdev, "Failed to destroy leaf node %u (class %04x), err = %d\n", + node->hw_id, classid, err); + + parent = node->parent; + mlx5e_sw_node_delete(priv, node); + + node = parent; + WRITE_ONCE(node->qid, qid); + + /* Early return on error in force mode. Parent will still be an inner + * node to be deleted by a following delete operation. + */ + if (saved_err) + return saved_err; + + old_hw_id = node->hw_id; + node->hw_id = new_hw_id; + + if (test_bit(MLX5E_STATE_OPENED, &priv->state)) { + err = mlx5e_open_qos_sq(priv, &priv->channels, node); + if (err) { + NL_SET_ERR_MSG_MOD(extack, "Error creating an SQ."); + qos_warn(priv->mdev, "Failed to create a QoS SQ (class %04x), err = %d\n", + classid, err); + } else { + mlx5e_activate_qos_sq(priv, node); + } + } + + err = mlx5_qos_destroy_node(priv->mdev, old_hw_id); + if (err) /* Not fatal. */ + qos_warn(priv->mdev, "Failed to destroy leaf node %u (class %04x), err = %d\n", + node->hw_id, classid, err); + + return 0; +} + +static int mlx5e_qos_update_children(struct mlx5e_priv *priv, struct mlx5e_qos_node *node, + struct netlink_ext_ack *extack) +{ + struct mlx5e_qos_node *child; + int err = 0; + int bkt; + + hash_for_each(priv->htb.qos_tc2node, bkt, child, hnode) { + u32 old_bw_share = child->bw_share; + int err_one; + + if (child->parent != node) + continue; + + mlx5e_htb_convert_rate(priv, child->rate, node, &child->bw_share); + if (child->bw_share == old_bw_share) + continue; + + err_one = mlx5_qos_update_node(priv->mdev, child->hw_id, child->bw_share, + child->max_average_bw, child->hw_id); + if (!err && err_one) { + err = err_one; + + NL_SET_ERR_MSG_MOD(extack, "Firmware error when modifying a child node."); + qos_err(priv->mdev, "Failed to modify a child node (class %04x), err = %d\n", + node->classid, err); + } + } + + return err; +} + +int mlx5e_htb_node_modify(struct mlx5e_priv *priv, u16 classid, u64 rate, u64 ceil, + struct netlink_ext_ack *extack) +{ + u32 bw_share, max_average_bw; + struct mlx5e_qos_node *node; + bool ceil_changed = false; + int err; + + qos_dbg(priv->mdev, "TC_HTB_LEAF_MODIFY classid %04x, rate %llu, ceil %llu\n", + classid, rate, ceil); + + node = mlx5e_sw_node_find(priv, classid); + if (!node) + return -ENOENT; + + node->rate = rate; + mlx5e_htb_convert_rate(priv, rate, node->parent, &bw_share); + mlx5e_htb_convert_ceil(priv, ceil, &max_average_bw); + + err = mlx5_qos_update_node(priv->mdev, node->parent->hw_id, bw_share, + max_average_bw, node->hw_id); + if (err) { + NL_SET_ERR_MSG_MOD(extack, "Firmware error when modifying a node."); + qos_err(priv->mdev, "Failed to modify a node (class %04x), err = %d\n", + classid, err); + return err; + } + + if (max_average_bw != node->max_average_bw) + ceil_changed = true; + + node->bw_share = bw_share; + node->max_average_bw = max_average_bw; + + if (ceil_changed) + err = mlx5e_qos_update_children(priv, node, extack); + + return err; +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/qos.h b/drivers/net/ethernet/mellanox/mlx5/core/en/qos.h new file mode 100644 index 000000000000..5af7991fcd19 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/qos.h @@ -0,0 +1,44 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2020, Mellanox Technologies inc. All rights reserved. */ + +#ifndef __MLX5E_EN_QOS_H +#define __MLX5E_EN_QOS_H + +#include <linux/mlx5/driver.h> + +#define MLX5E_QOS_MAX_LEAF_NODES 256 + +struct mlx5e_priv; +struct mlx5e_channels; +struct mlx5e_channel; + +int mlx5e_qos_max_leaf_nodes(struct mlx5_core_dev *mdev); +int mlx5e_qos_cur_leaf_nodes(struct mlx5e_priv *priv); + +/* TX datapath API */ +int mlx5e_get_txq_by_classid(struct mlx5e_priv *priv, u16 classid); +struct mlx5e_txqsq *mlx5e_get_sq(struct mlx5e_priv *priv, int qid); + +/* SQ lifecycle */ +int mlx5e_qos_open_queues(struct mlx5e_priv *priv, struct mlx5e_channels *chs); +void mlx5e_qos_activate_queues(struct mlx5e_priv *priv); +void mlx5e_qos_deactivate_queues(struct mlx5e_channel *c); +void mlx5e_qos_close_queues(struct mlx5e_channel *c); + +/* HTB API */ +int mlx5e_htb_root_add(struct mlx5e_priv *priv, u16 htb_maj_id, u16 htb_defcls, + struct netlink_ext_ack *extack); +int mlx5e_htb_root_del(struct mlx5e_priv *priv); +int mlx5e_htb_leaf_alloc_queue(struct mlx5e_priv *priv, u16 classid, + u32 parent_classid, u64 rate, u64 ceil, + struct netlink_ext_ack *extack); +int mlx5e_htb_leaf_to_inner(struct mlx5e_priv *priv, u16 classid, u16 child_classid, + u64 rate, u64 ceil, struct netlink_ext_ack *extack); +int mlx5e_htb_leaf_del(struct mlx5e_priv *priv, u16 classid, u16 *old_qid, + u16 *new_qid, struct netlink_ext_ack *extack); +int mlx5e_htb_leaf_del_last(struct mlx5e_priv *priv, u16 classid, bool force, + struct netlink_ext_ack *extack); +int mlx5e_htb_node_modify(struct mlx5e_priv *priv, u16 classid, u64 rate, u64 ceil, + struct netlink_ext_ack *extack); + +#endif diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c index 2d37742a888c..2e5a0696374a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c @@ -447,6 +447,17 @@ int mlx5e_ethtool_set_channels(struct mlx5e_priv *priv, goto out; } + /* Don't allow changing the number of channels if HTB offload is active, + * because the numeration of the QoS SQs will change, while per-queue + * qdiscs are attached. + */ + if (priv->htb.maj_id) { + err = -EINVAL; + netdev_err(priv->netdev, "%s: HTB offload is active, cannot change the number of channels\n", + __func__); + goto out; + } + new_channels.params = priv->channels.params; new_channels.params.num_channels = count; @@ -1966,6 +1977,16 @@ static int set_pflag_tx_port_ts(struct net_device *netdev, bool enable) if (!MLX5_CAP_GEN(mdev, ts_cqe_to_dest_cqn)) return -EOPNOTSUPP; + /* Don't allow changing the PTP state if HTB offload is active, because + * the numeration of the QoS SQs will change, while per-queue qdiscs are + * attached. + */ + if (priv->htb.maj_id) { + netdev_err(priv->netdev, "%s: HTB offload is active, cannot change the PTP state\n", + __func__); + return -EINVAL; + } + new_channels.params = priv->channels.params; MLX5E_SET_PFLAG(&new_channels.params, MLX5E_PFLAG_TX_PORT_TS, enable); /* No need to verify SQ stop room as diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index f33c38629886..b9a175982801 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -65,6 +65,7 @@ #include "en/devlink.h" #include "lib/mlx5.h" #include "en/ptp.h" +#include "qos.h" bool mlx5e_check_fragmented_striding_rq_cap(struct mlx5_core_dev *mdev) { @@ -1143,7 +1144,6 @@ static int mlx5e_alloc_txqsq(struct mlx5e_channel *c, sq->uar_map = mdev->mlx5e_res.bfreg.map; sq->min_inline_mode = params->tx_min_inline_mode; sq->hw_mtu = MLX5E_SW2HW_MTU(params, params->sw_mtu); - sq->stats = &c->priv->channel_stats[c->ix].sq[tc]; INIT_WORK(&sq->recover_work, mlx5e_tx_err_cqe_work); if (!MLX5_CAP_ETH(mdev, wqe_vlan_insert)) set_bit(MLX5E_SQ_STATE_VLAN_NEED_L2_INLINE, &sq->state); @@ -1233,6 +1233,7 @@ static int mlx5e_create_sq(struct mlx5_core_dev *mdev, int mlx5e_modify_sq(struct mlx5_core_dev *mdev, u32 sqn, struct mlx5e_modify_sq_param *p) { + u64 bitmask = 0; void *in; void *sqc; int inlen; @@ -1248,9 +1249,14 @@ int mlx5e_modify_sq(struct mlx5_core_dev *mdev, u32 sqn, MLX5_SET(modify_sq_in, in, sq_state, p->curr_state); MLX5_SET(sqc, sqc, state, p->next_state); if (p->rl_update && p->next_state == MLX5_SQC_STATE_RDY) { - MLX5_SET64(modify_sq_in, in, modify_bitmask, 1); - MLX5_SET(sqc, sqc, packet_pacing_rate_limit_index, p->rl_index); + bitmask |= 1; + MLX5_SET(sqc, sqc, packet_pacing_rate_limit_index, p->rl_index); } + if (p->qos_update && p->next_state == MLX5_SQC_STATE_RDY) { + bitmask |= 1 << 2; + MLX5_SET(sqc, sqc, qos_queue_group_id, p->qos_queue_group_id); + } + MLX5_SET64(modify_sq_in, in, modify_bitmask, bitmask); err = mlx5_core_modify_sq(mdev, sqn, in); @@ -1267,6 +1273,7 @@ static void mlx5e_destroy_sq(struct mlx5_core_dev *mdev, u32 sqn) int mlx5e_create_sq_rdy(struct mlx5_core_dev *mdev, struct mlx5e_sq_param *param, struct mlx5e_create_sq_param *csp, + u16 qos_queue_group_id, u32 *sqn) { struct mlx5e_modify_sq_param msp = {0}; @@ -1278,6 +1285,10 @@ int mlx5e_create_sq_rdy(struct mlx5_core_dev *mdev, msp.curr_state = MLX5_SQC_STATE_RST; msp.next_state = MLX5_SQC_STATE_RDY; + if (qos_queue_group_id) { + msp.qos_update = true; + msp.qos_queue_group_id = qos_queue_group_id; + } err = mlx5e_modify_sq(mdev, *sqn, &msp); if (err) mlx5e_destroy_sq(mdev, *sqn); @@ -1288,13 +1299,9 @@ int mlx5e_create_sq_rdy(struct mlx5_core_dev *mdev, static int mlx5e_set_sq_maxrate(struct net_device *dev, struct mlx5e_txqsq *sq, u32 rate); -static int mlx5e_open_txqsq(struct mlx5e_channel *c, - u32 tisn, - int txq_ix, - struct mlx5e_params *params, - struct mlx5e_sq_param *param, - struct mlx5e_txqsq *sq, - int tc) +int mlx5e_open_txqsq(struct mlx5e_channel *c, u32 tisn, int txq_ix, + struct mlx5e_params *params, struct mlx5e_sq_param *param, + struct mlx5e_txqsq *sq, int tc, u16 qos_queue_group_id, u16 qos_qid) { struct mlx5e_create_sq_param csp = {}; u32 tx_rate; @@ -1304,12 +1311,17 @@ static int mlx5e_open_txqsq(struct mlx5e_channel *c, if (err) return err; + if (qos_queue_group_id) + sq->stats = c->priv->htb.qos_sq_stats[qos_qid]; + else + sq->stats = &c->priv->channel_stats[c->ix].sq[tc]; + csp.tisn = tisn; csp.tis_lst_sz = 1; csp.cqn = sq->cq.mcq.cqn; csp.wq_ctrl = &sq->wq_ctrl; csp.min_inline_mode = sq->min_inline_mode; - err = mlx5e_create_sq_rdy(c->mdev, param, &csp, &sq->sqn); + err = mlx5e_create_sq_rdy(c->mdev, param, &csp, qos_queue_group_id, &sq->sqn); if (err) goto err_free_txqsq; @@ -1366,7 +1378,7 @@ void mlx5e_deactivate_txqsq(struct mlx5e_txqsq *sq) } } -static void mlx5e_close_txqsq(struct mlx5e_txqsq *sq) +void mlx5e_close_txqsq(struct mlx5e_txqsq *sq) { struct mlx5_core_dev *mdev = sq->mdev; struct mlx5_rate_limit rl = {0}; @@ -1403,7 +1415,7 @@ int mlx5e_open_icosq(struct mlx5e_channel *c, struct mlx5e_params *params, csp.cqn = sq->cq.mcq.cqn; csp.wq_ctrl = &sq->wq_ctrl; csp.min_inline_mode = params->tx_min_inline_mode; - err = mlx5e_create_sq_rdy(c->mdev, param, &csp, &sq->sqn); + err = mlx5e_create_sq_rdy(c->mdev, param, &csp, 0, &sq->sqn); if (err) goto err_free_icosq; @@ -1452,7 +1464,7 @@ int mlx5e_open_xdpsq(struct mlx5e_channel *c, struct mlx5e_params *params, csp.wq_ctrl = &sq->wq_ctrl; csp.min_inline_mode = sq->min_inline_mode; set_bit(MLX5E_SQ_STATE_ENABLED, &sq->state); - err = mlx5e_create_sq_rdy(c->mdev, param, &csp, &sq->sqn); + err = mlx5e_create_sq_rdy(c->mdev, param, &csp, 0, &sq->sqn); if (err) goto err_free_xdpsq; @@ -1703,7 +1715,7 @@ static int mlx5e_open_sqs(struct mlx5e_channel *c, int txq_ix = c->ix + tc * params->num_channels; err = mlx5e_open_txqsq(c, c->priv->tisn[c->lag_port][tc], txq_ix, - params, &cparam->txq_sq, &c->sq[tc], tc); + params, &cparam->txq_sq, &c->sq[tc], tc, 0, 0); if (err) goto err_close_sqs; } @@ -2044,6 +2056,8 @@ static void mlx5e_deactivate_channel(struct mlx5e_channel *c) mlx5e_deactivate_icosq(&c->icosq); for (tc = 0; tc < c->num_tc; tc++) mlx5e_deactivate_txqsq(&c->sq[tc]); + + mlx5e_qos_deactivate_queues(c); } static void mlx5e_close_channel(struct mlx5e_channel *c) @@ -2051,6 +2065,7 @@ static void mlx5e_close_channel(struct mlx5e_channel *c) if (test_bit(MLX5E_CHANNEL_STATE_XSK, c->state)) mlx5e_close_xsk(c); mlx5e_close_queues(c); + mlx5e_qos_close_queues(c); netif_napi_del(&c->napi); kvfree(c); @@ -2198,9 +2213,8 @@ void mlx5e_build_sq_param_common(struct mlx5e_priv *priv, param->wq.buf_numa_node = dev_to_node(mlx5_core_dma_dev(priv->mdev)); } -static void mlx5e_build_sq_param(struct mlx5e_priv *priv, - struct mlx5e_params *params, - struct mlx5e_sq_param *param) +void mlx5e_build_sq_param(struct mlx5e_priv *priv, struct mlx5e_params *params, + struct mlx5e_sq_param *param) { void *sqc = param->sqc; void *wq = MLX5_ADDR_OF(sqc, sqc, wq); @@ -2379,10 +2393,18 @@ int mlx5e_open_channels(struct mlx5e_priv *priv, goto err_close_channels; } + err = mlx5e_qos_open_queues(priv, chs); + if (err) + goto err_close_ptp; + mlx5e_health_channels_update(priv); kvfree(cparam); return 0; +err_close_ptp: + if (chs->port_ptp) + mlx5e_port_ptp_close(chs->port_ptp); + err_close_channels: for (i--; i >= 0; i--) mlx5e_close_channel(chs->c[i]); @@ -2915,11 +2937,31 @@ static void mlx5e_netdev_set_tcs(struct net_device *netdev, u16 nch, u8 ntc) netdev_set_tc_queue(netdev, tc, nch, 0); } +int mlx5e_update_tx_netdev_queues(struct mlx5e_priv *priv) +{ + int qos_queues, nch, ntc, num_txqs, err; + + qos_queues = mlx5e_qos_cur_leaf_nodes(priv); + + nch = priv->channels.params.num_channels; + ntc = priv->channels.params.num_tc; + num_txqs = nch * ntc + qos_queues; + if (MLX5E_GET_PFLAG(&priv->channels.params, MLX5E_PFLAG_TX_PORT_TS)) + num_txqs += ntc; + + mlx5e_dbg(DRV, priv, "Setting num_txqs %d\n", num_txqs); + err = netif_set_real_num_tx_queues(priv->netdev, num_txqs); + if (err) + netdev_warn(priv->netdev, "netif_set_real_num_tx_queues failed, %d\n", err); + + return err; +} + static int mlx5e_update_netdev_queues(struct mlx5e_priv *priv) { struct net_device *netdev = priv->netdev; - int num_txqs, num_rxqs, nch, ntc; int old_num_txqs, old_ntc; + int num_rxqs, nch, ntc; int err; old_num_txqs = netdev->real_num_tx_queues; @@ -2927,18 +2969,13 @@ static int mlx5e_update_netdev_queues(struct mlx5e_priv *priv) nch = priv->channels.params.num_channels; ntc = priv->channels.params.num_tc; - num_txqs = nch * ntc; - if (MLX5E_GET_PFLAG(&priv->channels.params, MLX5E_PFLAG_TX_PORT_TS)) - num_txqs += ntc; num_rxqs = nch * priv->profile->rq_groups; mlx5e_netdev_set_tcs(netdev, nch, ntc); - err = netif_set_real_num_tx_queues(netdev, num_txqs); - if (err) { - netdev_warn(netdev, "netif_set_real_num_tx_queues failed, %d\n", err); + err = mlx5e_update_tx_netdev_queues(priv); + if (err) goto err_tcs; - } err = netif_set_real_num_rx_queues(netdev, num_rxqs); if (err) { netdev_warn(netdev, "netif_set_real_num_rx_queues failed, %d\n", err); @@ -3042,6 +3079,7 @@ void mlx5e_activate_priv_channels(struct mlx5e_priv *priv) mlx5e_update_num_tc_x_num_ch(priv); mlx5e_build_txq_maps(priv); mlx5e_activate_channels(&priv->channels); + mlx5e_qos_activate_queues(priv); mlx5e_xdp_tx_enable(priv); netif_tx_start_all_queues(priv->netdev); @@ -3608,6 +3646,14 @@ static int mlx5e_setup_tc_mqprio(struct mlx5e_priv *priv, mutex_lock(&priv->state_lock); + /* MQPRIO is another toplevel qdisc that can't be attached + * simultaneously with the offloaded HTB. + */ + if (WARN_ON(priv->htb.maj_id)) { + err = -EINVAL; + goto out; + } + new_channels.params = priv->channels.params; new_channels.params.num_tc = tc ? tc : 1; @@ -3628,12 +3674,55 @@ out: return err; } +static int mlx5e_setup_tc_htb(struct mlx5e_priv *priv, struct tc_htb_qopt_offload *htb) +{ + int res; + + switch (htb->command) { + case TC_HTB_CREATE: + return mlx5e_htb_root_add(priv, htb->parent_classid, htb->classid, + htb->extack); + case TC_HTB_DESTROY: + return mlx5e_htb_root_del(priv); + case TC_HTB_LEAF_ALLOC_QUEUE: + res = mlx5e_htb_leaf_alloc_queue(priv, htb->classid, htb->parent_classid, + htb->rate, htb->ceil, htb->extack); + if (res < 0) + return res; + htb->qid = res; + return 0; + case TC_HTB_LEAF_TO_INNER: + return mlx5e_htb_leaf_to_inner(priv, htb->parent_classid, htb->classid, + htb->rate, htb->ceil, htb->extack); + case TC_HTB_LEAF_DEL: + return mlx5e_htb_leaf_del(priv, htb->classid, &htb->moved_qid, &htb->qid, + htb->extack); + case TC_HTB_LEAF_DEL_LAST: + case TC_HTB_LEAF_DEL_LAST_FORCE: + return mlx5e_htb_leaf_del_last(priv, htb->classid, + htb->command == TC_HTB_LEAF_DEL_LAST_FORCE, + htb->extack); + case TC_HTB_NODE_MODIFY: + return mlx5e_htb_node_modify(priv, htb->classid, htb->rate, htb->ceil, + htb->extack); + case TC_HTB_LEAF_QUERY_QUEUE: + res = mlx5e_get_txq_by_classid(priv, htb->classid); + if (res < 0) + return res; + htb->qid = res; + return 0; + default: + return -EOPNOTSUPP; + } +} + static LIST_HEAD(mlx5e_block_cb_list); static int mlx5e_setup_tc(struct net_device *dev, enum tc_setup_type type, void *type_data) { struct mlx5e_priv *priv = netdev_priv(dev); + int err; switch (type) { case TC_SETUP_BLOCK: { @@ -3647,6 +3736,11 @@ static int mlx5e_setup_tc(struct net_device *dev, enum tc_setup_type type, } case TC_SETUP_QDISC_MQPRIO: return mlx5e_setup_tc_mqprio(priv, type_data); + case TC_SETUP_QDISC_HTB: + mutex_lock(&priv->state_lock); + err = mlx5e_setup_tc_htb(priv, type_data); + mutex_unlock(&priv->state_lock); + return err; default: return -EOPNOTSUPP; } @@ -3811,20 +3905,25 @@ static int set_feature_cvlan_filter(struct net_device *netdev, bool enable) return 0; } -#if IS_ENABLED(CONFIG_MLX5_CLS_ACT) -static int set_feature_tc_num_filters(struct net_device *netdev, bool enable) +static int set_feature_hw_tc(struct net_device *netdev, bool enable) { struct mlx5e_priv *priv = netdev_priv(netdev); +#if IS_ENABLED(CONFIG_MLX5_CLS_ACT) if (!enable && mlx5e_tc_num_filters(priv, MLX5_TC_FLAG(NIC_OFFLOAD))) { netdev_err(netdev, "Active offloaded tc filters, can't turn hw_tc_offload off\n"); return -EINVAL; } +#endif + + if (!enable && priv->htb.maj_id) { + netdev_err(netdev, "Active HTB offload, can't turn hw_tc_offload off\n"); + return -EINVAL; + } return 0; } -#endif static int set_feature_rx_all(struct net_device *netdev, bool enable) { @@ -3922,9 +4021,7 @@ int mlx5e_set_features(struct net_device *netdev, netdev_features_t features) err |= MLX5E_HANDLE_FEATURE(NETIF_F_LRO, set_feature_lro); err |= MLX5E_HANDLE_FEATURE(NETIF_F_HW_VLAN_CTAG_FILTER, set_feature_cvlan_filter); -#if IS_ENABLED(CONFIG_MLX5_CLS_ACT) - err |= MLX5E_HANDLE_FEATURE(NETIF_F_HW_TC, set_feature_tc_num_filters); -#endif + err |= MLX5E_HANDLE_FEATURE(NETIF_F_HW_TC, set_feature_hw_tc); err |= MLX5E_HANDLE_FEATURE(NETIF_F_RXALL, set_feature_rx_all); err |= MLX5E_HANDLE_FEATURE(NETIF_F_RXFCS, set_feature_rx_fcs); err |= MLX5E_HANDLE_FEATURE(NETIF_F_HW_VLAN_CTAG_RX, set_feature_rx_vlan); @@ -5028,6 +5125,8 @@ static void mlx5e_build_nic_netdev(struct net_device *netdev) netdev->hw_features |= NETIF_F_NTUPLE; #endif } + if (mlx5_qos_is_supported(mdev)) + netdev->features |= NETIF_F_HW_TC; netdev->features |= NETIF_F_HIGHDMA; netdev->features |= NETIF_F_HW_VLAN_STAG_FILTER; @@ -5333,6 +5432,7 @@ int mlx5e_netdev_init(struct net_device *netdev, return -ENOMEM; mutex_init(&priv->state_lock); + hash_init(priv->htb.qos_tc2node); INIT_WORK(&priv->update_carrier_work, mlx5e_update_carrier_work); INIT_WORK(&priv->set_rx_mode_work, mlx5e_set_rx_mode_work); INIT_WORK(&priv->tx_timeout_work, mlx5e_tx_timeout_work); @@ -5355,8 +5455,14 @@ err_free_cpumask: void mlx5e_netdev_cleanup(struct net_device *netdev, struct mlx5e_priv *priv) { + int i; + destroy_workqueue(priv->wq); free_cpumask_var(priv->scratchpad.cpumask); + + for (i = 0; i < priv->htb.max_qos_sqs; i++) + kfree(priv->htb.qos_sq_stats[i]); + kvfree(priv->htb.qos_sq_stats); } struct net_device *mlx5e_create_netdev(struct mlx5_core_dev *mdev, @@ -5366,13 +5472,17 @@ struct net_device *mlx5e_create_netdev(struct mlx5_core_dev *mdev, { struct net_device *netdev; unsigned int ptp_txqs = 0; + int qos_sqs = 0; int err; if (MLX5_CAP_GEN(mdev, ts_cqe_to_dest_cqn)) ptp_txqs = profile->max_tc; + if (mlx5_qos_is_supported(mdev)) + qos_sqs = mlx5e_qos_max_leaf_nodes(mdev); + netdev = alloc_etherdev_mqs(sizeof(struct mlx5e_priv), - nch * profile->max_tc + ptp_txqs, + nch * profile->max_tc + ptp_txqs + qos_sqs, nch * profile->rq_groups); if (!netdev) { mlx5_core_err(mdev, "alloc_etherdev_mqs() failed\n"); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c index 2cf2042b37c7..92c5b81427b9 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c @@ -420,6 +420,25 @@ static void mlx5e_stats_grp_sw_update_stats_ptp(struct mlx5e_priv *priv, } } +static void mlx5e_stats_grp_sw_update_stats_qos(struct mlx5e_priv *priv, + struct mlx5e_sw_stats *s) +{ + struct mlx5e_sq_stats **stats; + u16 max_qos_sqs; + int i; + + /* Pairs with smp_store_release in mlx5e_open_qos_sq. */ + max_qos_sqs = smp_load_acquire(&priv->htb.max_qos_sqs); + stats = READ_ONCE(priv->htb.qos_sq_stats); + + for (i = 0; i < max_qos_sqs; i++) { + mlx5e_stats_grp_sw_update_stats_sq(s, READ_ONCE(stats[i])); + + /* https://gcc.gnu.org/bugzilla/show_bug.cgi?id=92657 */ + barrier(); + } +} + static MLX5E_DECLARE_STATS_GRP_OP_UPDATE_STATS(sw) { struct mlx5e_sw_stats *s = &priv->stats.sw; @@ -449,6 +468,7 @@ static MLX5E_DECLARE_STATS_GRP_OP_UPDATE_STATS(sw) } } mlx5e_stats_grp_sw_update_stats_ptp(priv, s); + mlx5e_stats_grp_sw_update_stats_qos(priv, s); } static const struct counter_desc q_stats_desc[] = { @@ -1740,6 +1760,41 @@ static const struct counter_desc ptp_cq_stats_desc[] = { { MLX5E_DECLARE_PTP_CQ_STAT(struct mlx5e_ptp_cq_stats, abort_abs_diff_ns) }, }; +static const struct counter_desc qos_sq_stats_desc[] = { + { MLX5E_DECLARE_QOS_TX_STAT(struct mlx5e_sq_stats, packets) }, + { MLX5E_DECLARE_QOS_TX_STAT(struct mlx5e_sq_stats, bytes) }, + { MLX5E_DECLARE_QOS_TX_STAT(struct mlx5e_sq_stats, tso_packets) }, + { MLX5E_DECLARE_QOS_TX_STAT(struct mlx5e_sq_stats, tso_bytes) }, + { MLX5E_DECLARE_QOS_TX_STAT(struct mlx5e_sq_stats, tso_inner_packets) }, + { MLX5E_DECLARE_QOS_TX_STAT(struct mlx5e_sq_stats, tso_inner_bytes) }, + { MLX5E_DECLARE_QOS_TX_STAT(struct mlx5e_sq_stats, csum_partial) }, + { MLX5E_DECLARE_QOS_TX_STAT(struct mlx5e_sq_stats, csum_partial_inner) }, + { MLX5E_DECLARE_QOS_TX_STAT(struct mlx5e_sq_stats, added_vlan_packets) }, + { MLX5E_DECLARE_QOS_TX_STAT(struct mlx5e_sq_stats, nop) }, + { MLX5E_DECLARE_QOS_TX_STAT(struct mlx5e_sq_stats, mpwqe_blks) }, + { MLX5E_DECLARE_QOS_TX_STAT(struct mlx5e_sq_stats, mpwqe_pkts) }, +#ifdef CONFIG_MLX5_EN_TLS + { MLX5E_DECLARE_QOS_TX_STAT(struct mlx5e_sq_stats, tls_encrypted_packets) }, + { MLX5E_DECLARE_QOS_TX_STAT(struct mlx5e_sq_stats, tls_encrypted_bytes) }, + { MLX5E_DECLARE_QOS_TX_STAT(struct mlx5e_sq_stats, tls_ctx) }, + { MLX5E_DECLARE_QOS_TX_STAT(struct mlx5e_sq_stats, tls_ooo) }, + { MLX5E_DECLARE_QOS_TX_STAT(struct mlx5e_sq_stats, tls_dump_packets) }, + { MLX5E_DECLARE_QOS_TX_STAT(struct mlx5e_sq_stats, tls_dump_bytes) }, + { MLX5E_DECLARE_QOS_TX_STAT(struct mlx5e_sq_stats, tls_resync_bytes) }, + { MLX5E_DECLARE_QOS_TX_STAT(struct mlx5e_sq_stats, tls_skip_no_sync_data) }, + { MLX5E_DECLARE_QOS_TX_STAT(struct mlx5e_sq_stats, tls_drop_no_sync_data) }, + { MLX5E_DECLARE_QOS_TX_STAT(struct mlx5e_sq_stats, tls_drop_bypass_req) }, +#endif + { MLX5E_DECLARE_QOS_TX_STAT(struct mlx5e_sq_stats, csum_none) }, + { MLX5E_DECLARE_QOS_TX_STAT(struct mlx5e_sq_stats, stopped) }, + { MLX5E_DECLARE_QOS_TX_STAT(struct mlx5e_sq_stats, dropped) }, + { MLX5E_DECLARE_QOS_TX_STAT(struct mlx5e_sq_stats, xmit_more) }, + { MLX5E_DECLARE_QOS_TX_STAT(struct mlx5e_sq_stats, recover) }, + { MLX5E_DECLARE_QOS_TX_STAT(struct mlx5e_sq_stats, cqes) }, + { MLX5E_DECLARE_QOS_TX_STAT(struct mlx5e_sq_stats, wake) }, + { MLX5E_DECLARE_QOS_TX_STAT(struct mlx5e_sq_stats, cqe_err) }, +}; + #define NUM_RQ_STATS ARRAY_SIZE(rq_stats_desc) #define NUM_SQ_STATS ARRAY_SIZE(sq_stats_desc) #define NUM_XDPSQ_STATS ARRAY_SIZE(xdpsq_stats_desc) @@ -1750,6 +1805,49 @@ static const struct counter_desc ptp_cq_stats_desc[] = { #define NUM_PTP_SQ_STATS ARRAY_SIZE(ptp_sq_stats_desc) #define NUM_PTP_CH_STATS ARRAY_SIZE(ptp_ch_stats_desc) #define NUM_PTP_CQ_STATS ARRAY_SIZE(ptp_cq_stats_desc) +#define NUM_QOS_SQ_STATS ARRAY_SIZE(qos_sq_stats_desc) + +static MLX5E_DECLARE_STATS_GRP_OP_NUM_STATS(qos) +{ + /* Pairs with smp_store_release in mlx5e_open_qos_sq. */ + return NUM_QOS_SQ_STATS * smp_load_acquire(&priv->htb.max_qos_sqs); +} + +static MLX5E_DECLARE_STATS_GRP_OP_FILL_STRS(qos) +{ + /* Pairs with smp_store_release in mlx5e_open_qos_sq. */ + u16 max_qos_sqs = smp_load_acquire(&priv->htb.max_qos_sqs); + int i, qid; + + for (qid = 0; qid < max_qos_sqs; qid++) + for (i = 0; i < NUM_QOS_SQ_STATS; i++) + sprintf(data + (idx++) * ETH_GSTRING_LEN, + qos_sq_stats_desc[i].format, qid); + + return idx; +} + +static MLX5E_DECLARE_STATS_GRP_OP_FILL_STATS(qos) +{ + struct mlx5e_sq_stats **stats; + u16 max_qos_sqs; + int i, qid; + + /* Pairs with smp_store_release in mlx5e_open_qos_sq. */ + max_qos_sqs = smp_load_acquire(&priv->htb.max_qos_sqs); + stats = READ_ONCE(priv->htb.qos_sq_stats); + + for (qid = 0; qid < max_qos_sqs; qid++) { + struct mlx5e_sq_stats *s = READ_ONCE(stats[qid]); + + for (i = 0; i < NUM_QOS_SQ_STATS; i++) + data[idx++] = MLX5E_READ_CTR64_CPU(s, qos_sq_stats_desc, i); + } + + return idx; +} + +static MLX5E_DECLARE_STATS_GRP_OP_UPDATE_STATS(qos) { return; } static MLX5E_DECLARE_STATS_GRP_OP_NUM_STATS(ptp) { @@ -1932,6 +2030,7 @@ MLX5E_DEFINE_STATS_GRP(per_port_buff_congest, 0); MLX5E_DEFINE_STATS_GRP(eth_ext, 0); static MLX5E_DEFINE_STATS_GRP(tls, 0); static MLX5E_DEFINE_STATS_GRP(ptp, 0); +static MLX5E_DEFINE_STATS_GRP(qos, 0); /* The stats groups order is opposite to the update_stats() order calls */ mlx5e_stats_grp_t mlx5e_nic_stats_grps[] = { @@ -1955,6 +2054,7 @@ mlx5e_stats_grp_t mlx5e_nic_stats_grps[] = { &MLX5E_STATS_GRP(channels), &MLX5E_STATS_GRP(per_port_buff_congest), &MLX5E_STATS_GRP(ptp), + &MLX5E_STATS_GRP(qos), }; unsigned int mlx5e_nic_stats_grps_num(struct mlx5e_priv *priv) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h index e41fc11f2ce7..93c41312fb03 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h @@ -55,6 +55,8 @@ #define MLX5E_DECLARE_PTP_CH_STAT(type, fld) "ptp_ch_"#fld, offsetof(type, fld) #define MLX5E_DECLARE_PTP_CQ_STAT(type, fld) "ptp_cq%d_"#fld, offsetof(type, fld) +#define MLX5E_DECLARE_QOS_TX_STAT(type, fld) "qos_tx%d_"#fld, offsetof(type, fld) + struct counter_desc { char format[ETH_GSTRING_LEN]; size_t offset; /* Byte offset */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c index 74f233eece54..da6a358a8a10 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c @@ -106,28 +106,53 @@ return_txq: return priv->port_ptp_tc2realtxq[up]; } +static int mlx5e_select_htb_queue(struct mlx5e_priv *priv, struct sk_buff *skb, + u16 htb_maj_id) +{ + u16 classid; + + if ((TC_H_MAJ(skb->priority) >> 16) == htb_maj_id) + classid = TC_H_MIN(skb->priority); + else + classid = READ_ONCE(priv->htb.defcls); + + if (!classid) + return 0; + + return mlx5e_get_txq_by_classid(priv, classid); +} + u16 mlx5e_select_queue(struct net_device *dev, struct sk_buff *skb, struct net_device *sb_dev) { struct mlx5e_priv *priv = netdev_priv(dev); + int num_tc_x_num_ch; int txq_ix; int up = 0; int ch_ix; - if (unlikely(priv->channels.port_ptp)) { - int num_tc_x_num_ch; + /* Sync with mlx5e_update_num_tc_x_num_ch - avoid refetching. */ + num_tc_x_num_ch = READ_ONCE(priv->num_tc_x_num_ch); + if (unlikely(dev->real_num_tx_queues > num_tc_x_num_ch)) { + /* Order maj_id before defcls - pairs with mlx5e_htb_root_add. */ + u16 htb_maj_id = smp_load_acquire(&priv->htb.maj_id); - if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP) && - mlx5e_use_ptpsq(skb)) - return mlx5e_select_ptpsq(dev, skb); + if (unlikely(htb_maj_id)) { + txq_ix = mlx5e_select_htb_queue(priv, skb, htb_maj_id); + if (txq_ix > 0) + return txq_ix; + } - /* Sync with mlx5e_update_num_tc_x_num_ch - avoid refetching. */ - num_tc_x_num_ch = READ_ONCE(priv->num_tc_x_num_ch); + if (unlikely(priv->channels.port_ptp)) + if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP) && + mlx5e_use_ptpsq(skb)) + return mlx5e_select_ptpsq(dev, skb); txq_ix = netdev_pick_tx(dev, skb, NULL); - /* Fix netdev_pick_tx() not to choose ptp_channel txqs. + /* Fix netdev_pick_tx() not to choose ptp_channel and HTB txqs. * If they are selected, switch to regular queues. - * Driver to select these queues only at mlx5e_select_ptpsq(). + * Driver to select these queues only at mlx5e_select_ptpsq() + * and mlx5e_select_htb_queue(). */ if (unlikely(txq_ix >= num_tc_x_num_ch)) txq_ix %= num_tc_x_num_ch; @@ -702,6 +727,10 @@ netdev_tx_t mlx5e_xmit(struct sk_buff *skb, struct net_device *dev) u16 pi; sq = priv->txq2sq[skb_get_queue_mapping(skb)]; + if (unlikely(!sq)) { + dev_kfree_skb_any(skb); + return NETDEV_TX_OK; + } /* May send SKBs and WQEs. */ if (unlikely(!mlx5e_accel_tx_begin(dev, sq, skb, &accel))) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c index a3cfe06d5116..d54da3797c30 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c @@ -115,17 +115,21 @@ int mlx5e_napi_poll(struct napi_struct *napi, int budget) napi); struct mlx5e_ch_stats *ch_stats = c->stats; struct mlx5e_xdpsq *xsksq = &c->xsksq; + struct mlx5e_txqsq __rcu **qos_sqs; struct mlx5e_rq *xskrq = &c->xskrq; struct mlx5e_rq *rq = &c->rq; bool aff_change = false; bool busy_xsk = false; bool busy = false; int work_done = 0; + u16 qos_sqs_size; bool xsk_open; int i; rcu_read_lock(); + qos_sqs = rcu_dereference(c->qos_sqs); + xsk_open = test_bit(MLX5E_CHANNEL_STATE_XSK, c->state); ch_stats->poll++; @@ -133,6 +137,18 @@ int mlx5e_napi_poll(struct napi_struct *napi, int budget) for (i = 0; i < c->num_tc; i++) busy |= mlx5e_poll_tx_cq(&c->sq[i].cq, budget); + if (unlikely(qos_sqs)) { + smp_rmb(); /* Pairs with mlx5e_qos_alloc_queues. */ + qos_sqs_size = READ_ONCE(c->qos_sqs_size); + + for (i = 0; i < qos_sqs_size; i++) { + struct mlx5e_txqsq *sq = rcu_dereference(qos_sqs[i]); + + if (sq) + busy |= mlx5e_poll_tx_cq(&sq->cq, budget); + } + } + busy |= mlx5e_poll_xdpsq_cq(&c->xdpsq.cq); if (c->xdp) @@ -186,6 +202,16 @@ int mlx5e_napi_poll(struct napi_struct *napi, int budget) mlx5e_handle_tx_dim(&c->sq[i]); mlx5e_cq_arm(&c->sq[i].cq); } + if (unlikely(qos_sqs)) { + for (i = 0; i < qos_sqs_size; i++) { + struct mlx5e_txqsq *sq = rcu_dereference(qos_sqs[i]); + + if (sq) { + mlx5e_handle_tx_dim(sq); + mlx5e_cq_arm(&sq->cq); + } + } + } mlx5e_handle_rx_dim(rq); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/qos.c b/drivers/net/ethernet/mellanox/mlx5/core/qos.c new file mode 100644 index 000000000000..0777be24a307 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/qos.c @@ -0,0 +1,85 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* Copyright (c) 2020, Mellanox Technologies inc. All rights reserved. */ + +#include "qos.h" + +#define MLX5_QOS_DEFAULT_DWRR_UID 0 + +bool mlx5_qos_is_supported(struct mlx5_core_dev *mdev) +{ + if (!MLX5_CAP_GEN(mdev, qos)) + return false; + if (!MLX5_CAP_QOS(mdev, nic_sq_scheduling)) + return false; + if (!MLX5_CAP_QOS(mdev, nic_bw_share)) + return false; + if (!MLX5_CAP_QOS(mdev, nic_rate_limit)) + return false; + return true; +} + +int mlx5_qos_max_leaf_nodes(struct mlx5_core_dev *mdev) +{ + return 1 << MLX5_CAP_QOS(mdev, log_max_qos_nic_queue_group); +} + +int mlx5_qos_create_leaf_node(struct mlx5_core_dev *mdev, u32 parent_id, + u32 bw_share, u32 max_avg_bw, u32 *id) +{ + u32 sched_ctx[MLX5_ST_SZ_DW(scheduling_context)] = {0}; + + MLX5_SET(scheduling_context, sched_ctx, parent_element_id, parent_id); + MLX5_SET(scheduling_context, sched_ctx, element_type, + SCHEDULING_CONTEXT_ELEMENT_TYPE_QUEUE_GROUP); + MLX5_SET(scheduling_context, sched_ctx, bw_share, bw_share); + MLX5_SET(scheduling_context, sched_ctx, max_average_bw, max_avg_bw); + + return mlx5_create_scheduling_element_cmd(mdev, SCHEDULING_HIERARCHY_NIC, + sched_ctx, id); +} + +int mlx5_qos_create_inner_node(struct mlx5_core_dev *mdev, u32 parent_id, + u32 bw_share, u32 max_avg_bw, u32 *id) +{ + u32 sched_ctx[MLX5_ST_SZ_DW(scheduling_context)] = {0}; + void *attr; + + MLX5_SET(scheduling_context, sched_ctx, parent_element_id, parent_id); + MLX5_SET(scheduling_context, sched_ctx, element_type, + SCHEDULING_CONTEXT_ELEMENT_TYPE_TSAR); + MLX5_SET(scheduling_context, sched_ctx, bw_share, bw_share); + MLX5_SET(scheduling_context, sched_ctx, max_average_bw, max_avg_bw); + + attr = MLX5_ADDR_OF(scheduling_context, sched_ctx, element_attributes); + MLX5_SET(tsar_element, attr, tsar_type, TSAR_ELEMENT_TSAR_TYPE_DWRR); + + return mlx5_create_scheduling_element_cmd(mdev, SCHEDULING_HIERARCHY_NIC, + sched_ctx, id); +} + +int mlx5_qos_create_root_node(struct mlx5_core_dev *mdev, u32 *id) +{ + return mlx5_qos_create_inner_node(mdev, MLX5_QOS_DEFAULT_DWRR_UID, 0, 0, id); +} + +int mlx5_qos_update_node(struct mlx5_core_dev *mdev, u32 parent_id, + u32 bw_share, u32 max_avg_bw, u32 id) +{ + u32 sched_ctx[MLX5_ST_SZ_DW(scheduling_context)] = {0}; + u32 bitmask = 0; + + MLX5_SET(scheduling_context, sched_ctx, parent_element_id, parent_id); + MLX5_SET(scheduling_context, sched_ctx, bw_share, bw_share); + MLX5_SET(scheduling_context, sched_ctx, max_average_bw, max_avg_bw); + + bitmask |= MODIFY_SCHEDULING_ELEMENT_IN_MODIFY_BITMASK_BW_SHARE; + bitmask |= MODIFY_SCHEDULING_ELEMENT_IN_MODIFY_BITMASK_MAX_AVERAGE_BW; + + return mlx5_modify_scheduling_element_cmd(mdev, SCHEDULING_HIERARCHY_NIC, + sched_ctx, id, bitmask); +} + +int mlx5_qos_destroy_node(struct mlx5_core_dev *mdev, u32 id) +{ + return mlx5_destroy_scheduling_element_cmd(mdev, SCHEDULING_HIERARCHY_NIC, id); +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/qos.h b/drivers/net/ethernet/mellanox/mlx5/core/qos.h new file mode 100644 index 000000000000..125e4e47e6f7 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/qos.h @@ -0,0 +1,30 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2020, Mellanox Technologies inc. All rights reserved. */ + +#ifndef __MLX5_QOS_H +#define __MLX5_QOS_H + +#include "mlx5_core.h" + +#define MLX5_DEBUG_QOS_MASK BIT(4) + +#define qos_err(mdev, fmt, ...) \ + mlx5_core_err(mdev, "QoS: " fmt, ##__VA_ARGS__) +#define qos_warn(mdev, fmt, ...) \ + mlx5_core_warn(mdev, "QoS: " fmt, ##__VA_ARGS__) +#define qos_dbg(mdev, fmt, ...) \ + mlx5_core_dbg_mask(mdev, MLX5_DEBUG_QOS_MASK, "QoS: " fmt, ##__VA_ARGS__) + +bool mlx5_qos_is_supported(struct mlx5_core_dev *mdev); +int mlx5_qos_max_leaf_nodes(struct mlx5_core_dev *mdev); + +int mlx5_qos_create_leaf_node(struct mlx5_core_dev *mdev, u32 parent_id, + u32 bw_share, u32 max_avg_bw, u32 *id); +int mlx5_qos_create_inner_node(struct mlx5_core_dev *mdev, u32 parent_id, + u32 bw_share, u32 max_avg_bw, u32 *id); +int mlx5_qos_create_root_node(struct mlx5_core_dev *mdev, u32 *id); +int mlx5_qos_update_node(struct mlx5_core_dev *mdev, u32 parent_id, u32 bw_share, + u32 max_avg_bw, u32 id); +int mlx5_qos_destroy_node(struct mlx5_core_dev *mdev, u32 id); + +#endif diff --git a/drivers/net/ethernet/mellanox/mlxsw/core.c b/drivers/net/ethernet/mellanox/mlxsw/core.c index 685037e052af..52fdc34251ba 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/core.c +++ b/drivers/net/ethernet/mellanox/mlxsw/core.c @@ -84,6 +84,7 @@ struct mlxsw_core { struct mlxsw_thermal *thermal; struct mlxsw_core_port *ports; unsigned int max_ports; + atomic_t active_ports_count; bool fw_flash_in_progress; struct { struct devlink_health_reporter *fw_fatal; @@ -96,8 +97,36 @@ struct mlxsw_core { #define MLXSW_PORT_MAX_PORTS_DEFAULT 0x40 -static int mlxsw_ports_init(struct mlxsw_core *mlxsw_core) +static u64 mlxsw_ports_occ_get(void *priv) { + struct mlxsw_core *mlxsw_core = priv; + + return atomic_read(&mlxsw_core->active_ports_count); +} + +static int mlxsw_core_resources_ports_register(struct mlxsw_core *mlxsw_core) +{ + struct devlink *devlink = priv_to_devlink(mlxsw_core); + struct devlink_resource_size_params ports_num_params; + u32 max_ports; + + max_ports = mlxsw_core->max_ports - 1; + devlink_resource_size_params_init(&ports_num_params, max_ports, + max_ports, 1, + DEVLINK_RESOURCE_UNIT_ENTRY); + + return devlink_resource_register(devlink, + DEVLINK_RESOURCE_GENERIC_NAME_PORTS, + max_ports, MLXSW_CORE_RESOURCE_PORTS, + DEVLINK_RESOURCE_ID_PARENT_TOP, + &ports_num_params); +} + +static int mlxsw_ports_init(struct mlxsw_core *mlxsw_core, bool reload) +{ + struct devlink *devlink = priv_to_devlink(mlxsw_core); + int err; + /* Switch ports are numbered from 1 to queried value */ if (MLXSW_CORE_RES_VALID(mlxsw_core, MAX_SYSTEM_PORT)) mlxsw_core->max_ports = MLXSW_CORE_RES_GET(mlxsw_core, @@ -110,11 +139,30 @@ static int mlxsw_ports_init(struct mlxsw_core *mlxsw_core) if (!mlxsw_core->ports) return -ENOMEM; + if (!reload) { + err = mlxsw_core_resources_ports_register(mlxsw_core); + if (err) + goto err_resources_ports_register; + } + atomic_set(&mlxsw_core->active_ports_count, 0); + devlink_resource_occ_get_register(devlink, MLXSW_CORE_RESOURCE_PORTS, + mlxsw_ports_occ_get, mlxsw_core); + return 0; + +err_resources_ports_register: + kfree(mlxsw_core->ports); + return err; } -static void mlxsw_ports_fini(struct mlxsw_core *mlxsw_core) +static void mlxsw_ports_fini(struct mlxsw_core *mlxsw_core, bool reload) { + struct devlink *devlink = priv_to_devlink(mlxsw_core); + + devlink_resource_occ_get_unregister(devlink, MLXSW_CORE_RESOURCE_PORTS); + if (!reload) + devlink_resources_unregister(priv_to_devlink(mlxsw_core), NULL); + kfree(mlxsw_core->ports); } @@ -1897,7 +1945,7 @@ __mlxsw_core_bus_device_register(const struct mlxsw_bus_info *mlxsw_bus_info, goto err_register_resources; } - err = mlxsw_ports_init(mlxsw_core); + err = mlxsw_ports_init(mlxsw_core, reload); if (err) goto err_ports_init; @@ -1986,7 +2034,7 @@ err_devlink_register: err_emad_init: kfree(mlxsw_core->lag.mapping); err_alloc_lag_mapping: - mlxsw_ports_fini(mlxsw_core); + mlxsw_ports_fini(mlxsw_core, reload); err_ports_init: if (!reload) devlink_resources_unregister(devlink, NULL); @@ -2056,7 +2104,7 @@ void mlxsw_core_bus_device_unregister(struct mlxsw_core *mlxsw_core, devlink_unregister(devlink); mlxsw_emad_fini(mlxsw_core); kfree(mlxsw_core->lag.mapping); - mlxsw_ports_fini(mlxsw_core); + mlxsw_ports_fini(mlxsw_core, reload); if (!reload) devlink_resources_unregister(devlink, NULL); mlxsw_core->bus->fini(mlxsw_core->bus_priv); @@ -2755,16 +2803,25 @@ int mlxsw_core_port_init(struct mlxsw_core *mlxsw_core, u8 local_port, const unsigned char *switch_id, unsigned char switch_id_len) { - return __mlxsw_core_port_init(mlxsw_core, local_port, - DEVLINK_PORT_FLAVOUR_PHYSICAL, - port_number, split, split_port_subnumber, - splittable, lanes, - switch_id, switch_id_len); + int err; + + err = __mlxsw_core_port_init(mlxsw_core, local_port, + DEVLINK_PORT_FLAVOUR_PHYSICAL, + port_number, split, split_port_subnumber, + splittable, lanes, + switch_id, switch_id_len); + if (err) + return err; + + atomic_inc(&mlxsw_core->active_ports_count); + return 0; } EXPORT_SYMBOL(mlxsw_core_port_init); void mlxsw_core_port_fini(struct mlxsw_core *mlxsw_core, u8 local_port) { + atomic_dec(&mlxsw_core->active_ports_count); + __mlxsw_core_port_fini(mlxsw_core, local_port); } EXPORT_SYMBOL(mlxsw_core_port_fini); diff --git a/drivers/net/ethernet/mellanox/mlxsw/core.h b/drivers/net/ethernet/mellanox/mlxsw/core.h index 6b3ccbf6b238..8af7d9d03475 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/core.h +++ b/drivers/net/ethernet/mellanox/mlxsw/core.h @@ -19,6 +19,11 @@ #include "cmd.h" #include "resources.h" +enum mlxsw_core_resource_id { + MLXSW_CORE_RESOURCE_PORTS = 1, + MLXSW_CORE_RESOURCE_MAX, +}; + struct mlxsw_core; struct mlxsw_core_port; struct mlxsw_driver; diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h index a6956cfc9cb1..a3769f95a182 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h @@ -52,7 +52,7 @@ #define MLXSW_SP_RESOURCE_NAME_COUNTERS_RIF "rif" enum mlxsw_sp_resource_id { - MLXSW_SP_RESOURCE_KVD = 1, + MLXSW_SP_RESOURCE_KVD = MLXSW_CORE_RESOURCE_MAX, MLXSW_SP_RESOURCE_KVD_LINEAR, MLXSW_SP_RESOURCE_KVD_HASH_SINGLE, MLXSW_SP_RESOURCE_KVD_HASH_DOUBLE, diff --git a/drivers/net/ethernet/realtek/r8169_main.c b/drivers/net/ethernet/realtek/r8169_main.c index fb67d8f797ec..475e6f01ea10 100644 --- a/drivers/net/ethernet/realtek/r8169_main.c +++ b/drivers/net/ethernet/realtek/r8169_main.c @@ -4850,10 +4850,8 @@ static void rtl_shutdown(struct pci_dev *pdev) rtl_rar_set(tp, tp->dev->perm_addr); if (system_state == SYSTEM_POWER_OFF) { - if (tp->saved_wolopts) { - rtl_wol_enable_rx(tp); + if (tp->saved_wolopts) rtl_wol_shutdown_quirk(tp); - } pci_wake_from_d3(pdev, tp->saved_wolopts); pci_set_power_state(pdev, PCI_D3hot); diff --git a/drivers/net/ethernet/rocker/rocker_ofdpa.c b/drivers/net/ethernet/rocker/rocker_ofdpa.c index d067da1ef070..967a634ee9ac 100644 --- a/drivers/net/ethernet/rocker/rocker_ofdpa.c +++ b/drivers/net/ethernet/rocker/rocker_ofdpa.c @@ -923,7 +923,7 @@ static int ofdpa_flow_tbl_bridge(struct ofdpa_port *ofdpa_port, struct ofdpa_flow_tbl_entry *entry; u32 priority; bool vlan_bridging = !!vlan_id; - bool dflt = !eth_dst || (eth_dst && eth_dst_mask); + bool dflt = !eth_dst || eth_dst_mask; bool wild = false; entry = kzalloc(sizeof(*entry), GFP_ATOMIC); diff --git a/drivers/net/ethernet/sfc/efx_channels.c b/drivers/net/ethernet/sfc/efx_channels.c index a4a626e9cd9a..1bfeee283ea9 100644 --- a/drivers/net/ethernet/sfc/efx_channels.c +++ b/drivers/net/ethernet/sfc/efx_channels.c @@ -17,6 +17,7 @@ #include "rx_common.h" #include "nic.h" #include "sriov.h" +#include "workarounds.h" /* This is the first interrupt mode to try out of: * 0 => MSI-X @@ -137,6 +138,7 @@ static int efx_allocate_msix_channels(struct efx_nic *efx, { unsigned int n_channels = parallelism; int vec_count; + int tx_per_ev; int n_xdp_tx; int n_xdp_ev; @@ -149,9 +151,9 @@ static int efx_allocate_msix_channels(struct efx_nic *efx, * multiple tx queues, assuming tx and ev queues are both * maximum size. */ - + tx_per_ev = EFX_MAX_EVQ_SIZE / EFX_TXQ_MAX_ENT(efx); n_xdp_tx = num_possible_cpus(); - n_xdp_ev = DIV_ROUND_UP(n_xdp_tx, EFX_MAX_TXQ_PER_CHANNEL); + n_xdp_ev = DIV_ROUND_UP(n_xdp_tx, tx_per_ev); vec_count = pci_msix_vec_count(efx->pci_dev); if (vec_count < 0) diff --git a/drivers/net/ipa/gsi.c b/drivers/net/ipa/gsi.c index 5b29f7d9d6ac..f79cf3c327c1 100644 --- a/drivers/net/ipa/gsi.c +++ b/drivers/net/ipa/gsi.c @@ -272,7 +272,7 @@ static void gsi_irq_ch_ctrl_disable(struct gsi *gsi) iowrite32(0, gsi->virt + GSI_CNTXT_SRC_CH_IRQ_MSK_OFFSET); } -static void gsi_irq_ieob_enable(struct gsi *gsi, u32 evt_ring_id) +static void gsi_irq_ieob_enable_one(struct gsi *gsi, u32 evt_ring_id) { bool enable_ieob = !gsi->ieob_enabled_bitmap; u32 val; @@ -286,11 +286,11 @@ static void gsi_irq_ieob_enable(struct gsi *gsi, u32 evt_ring_id) gsi_irq_type_enable(gsi, GSI_IEOB); } -static void gsi_irq_ieob_disable(struct gsi *gsi, u32 evt_ring_id) +static void gsi_irq_ieob_disable(struct gsi *gsi, u32 event_mask) { u32 val; - gsi->ieob_enabled_bitmap &= ~BIT(evt_ring_id); + gsi->ieob_enabled_bitmap &= ~event_mask; /* Disable the interrupt type if this was the last enabled channel */ if (!gsi->ieob_enabled_bitmap) @@ -300,6 +300,11 @@ static void gsi_irq_ieob_disable(struct gsi *gsi, u32 evt_ring_id) iowrite32(val, gsi->virt + GSI_CNTXT_SRC_IEOB_IRQ_MSK_OFFSET); } +static void gsi_irq_ieob_disable_one(struct gsi *gsi, u32 evt_ring_id) +{ + gsi_irq_ieob_disable(gsi, BIT(evt_ring_id)); +} + /* Enable all GSI_interrupt types */ static void gsi_irq_enable(struct gsi *gsi) { @@ -766,13 +771,13 @@ static void gsi_channel_freeze(struct gsi_channel *channel) napi_disable(&channel->napi); - gsi_irq_ieob_disable(channel->gsi, channel->evt_ring_id); + gsi_irq_ieob_disable_one(channel->gsi, channel->evt_ring_id); } /* Allow transactions to be used on the channel again. */ static void gsi_channel_thaw(struct gsi_channel *channel) { - gsi_irq_ieob_enable(channel->gsi, channel->evt_ring_id); + gsi_irq_ieob_enable_one(channel->gsi, channel->evt_ring_id); napi_enable(&channel->napi); } @@ -1200,6 +1205,7 @@ static void gsi_isr_ieob(struct gsi *gsi) u32 event_mask; event_mask = ioread32(gsi->virt + GSI_CNTXT_SRC_IEOB_IRQ_OFFSET); + gsi_irq_ieob_disable(gsi, event_mask); iowrite32(event_mask, gsi->virt + GSI_CNTXT_SRC_IEOB_IRQ_CLR_OFFSET); while (event_mask) { @@ -1207,7 +1213,6 @@ static void gsi_isr_ieob(struct gsi *gsi) event_mask ^= BIT(evt_ring_id); - gsi_irq_ieob_disable(gsi, evt_ring_id); napi_schedule(&gsi->evt_ring[evt_ring_id].channel->napi); } } @@ -1452,7 +1457,7 @@ void gsi_channel_doorbell(struct gsi_channel *channel) } /* Consult hardware, move any newly completed transactions to completed list */ -static void gsi_channel_update(struct gsi_channel *channel) +static struct gsi_trans *gsi_channel_update(struct gsi_channel *channel) { u32 evt_ring_id = channel->evt_ring_id; struct gsi *gsi = channel->gsi; @@ -1471,7 +1476,7 @@ static void gsi_channel_update(struct gsi_channel *channel) offset = GSI_EV_CH_E_CNTXT_4_OFFSET(evt_ring_id); index = gsi_ring_index(ring, ioread32(gsi->virt + offset)); if (index == ring->index % ring->count) - return; + return NULL; /* Get the transaction for the latest completed event. Take a * reference to keep it from completing before we give the events @@ -1496,6 +1501,8 @@ static void gsi_channel_update(struct gsi_channel *channel) gsi_evt_ring_doorbell(channel->gsi, channel->evt_ring_id, index); gsi_trans_free(trans); + + return gsi_channel_trans_complete(channel); } /** @@ -1516,11 +1523,8 @@ static struct gsi_trans *gsi_channel_poll_one(struct gsi_channel *channel) /* Get the first transaction from the completed list */ trans = gsi_channel_trans_complete(channel); - if (!trans) { - /* List is empty; see if there's more to do */ - gsi_channel_update(channel); - trans = gsi_channel_trans_complete(channel); - } + if (!trans) /* List is empty; see if there's more to do */ + trans = gsi_channel_update(channel); if (trans) gsi_trans_move_polled(trans); @@ -1543,23 +1547,20 @@ static struct gsi_trans *gsi_channel_poll_one(struct gsi_channel *channel) static int gsi_channel_poll(struct napi_struct *napi, int budget) { struct gsi_channel *channel; - int count = 0; + int count; channel = container_of(napi, struct gsi_channel, napi); - while (count < budget) { + for (count = 0; count < budget; count++) { struct gsi_trans *trans; - count++; trans = gsi_channel_poll_one(channel); if (!trans) break; gsi_trans_complete(trans); } - if (count < budget) { - napi_complete(&channel->napi); - gsi_irq_ieob_enable(channel->gsi, channel->evt_ring_id); - } + if (count < budget && napi_complete(napi)) + gsi_irq_ieob_enable_one(channel->gsi, channel->evt_ring_id); return count; } diff --git a/drivers/net/ipa/ipa.h b/drivers/net/ipa/ipa.h index 6c2371084c55..c6c6a7f6909c 100644 --- a/drivers/net/ipa/ipa.h +++ b/drivers/net/ipa/ipa.h @@ -43,7 +43,6 @@ enum ipa_flag { * @flags: Boolean state flags * @version: IPA hardware version * @pdev: Platform device - * @modem_rproc: Remoteproc handle for modem subsystem * @smp2p: SMP2P information * @clock: IPA clocking information * @table_addr: DMA address of filter/route table content @@ -83,7 +82,6 @@ struct ipa { DECLARE_BITMAP(flags, IPA_FLAG_COUNT); enum ipa_version version; struct platform_device *pdev; - struct rproc *modem_rproc; struct notifier_block nb; void *notifier; struct ipa_smp2p *smp2p; diff --git a/drivers/net/ipa/ipa_main.c b/drivers/net/ipa/ipa_main.c index 84bb8ae92725..ab0fd5cb4927 100644 --- a/drivers/net/ipa/ipa_main.c +++ b/drivers/net/ipa/ipa_main.c @@ -15,7 +15,6 @@ #include <linux/of.h> #include <linux/of_device.h> #include <linux/of_address.h> -#include <linux/remoteproc.h> #include <linux/qcom_scm.h> #include <linux/soc/qcom/mdt_loader.h> @@ -729,19 +728,6 @@ static const struct of_device_id ipa_match[] = { }; MODULE_DEVICE_TABLE(of, ipa_match); -static phandle of_property_read_phandle(const struct device_node *np, - const char *name) -{ - struct property *prop; - int len = 0; - - prop = of_find_property(np, name, &len); - if (!prop || len != sizeof(__be32)) - return 0; - - return be32_to_cpup(prop->value); -} - /* Check things that can be validated at build time. This just * groups these things BUILD_BUG_ON() calls don't clutter the rest * of the code. @@ -807,10 +793,8 @@ static int ipa_probe(struct platform_device *pdev) struct device *dev = &pdev->dev; const struct ipa_data *data; struct ipa_clock *clock; - struct rproc *rproc; bool modem_init; struct ipa *ipa; - phandle ph; int ret; ipa_validate_build(); @@ -829,25 +813,12 @@ static int ipa_probe(struct platform_device *pdev) if (!qcom_scm_is_available()) return -EPROBE_DEFER; - /* We rely on remoteproc to tell us about modem state changes */ - ph = of_property_read_phandle(dev->of_node, "modem-remoteproc"); - if (!ph) { - dev_err(dev, "DT missing \"modem-remoteproc\" property\n"); - return -EINVAL; - } - - rproc = rproc_get_by_phandle(ph); - if (!rproc) - return -EPROBE_DEFER; - /* The clock and interconnects might not be ready when we're * probed, so might return -EPROBE_DEFER. */ clock = ipa_clock_init(dev, data->clock_data); - if (IS_ERR(clock)) { - ret = PTR_ERR(clock); - goto err_rproc_put; - } + if (IS_ERR(clock)) + return PTR_ERR(clock); /* No more EPROBE_DEFER. Allocate and initialize the IPA structure */ ipa = kzalloc(sizeof(*ipa), GFP_KERNEL); @@ -858,7 +829,6 @@ static int ipa_probe(struct platform_device *pdev) ipa->pdev = pdev; dev_set_drvdata(dev, ipa); - ipa->modem_rproc = rproc; ipa->clock = clock; ipa->version = data->version; @@ -935,8 +905,6 @@ err_kfree_ipa: kfree(ipa); err_clock_exit: ipa_clock_exit(clock); -err_rproc_put: - rproc_put(rproc); return ret; } @@ -944,7 +912,6 @@ err_rproc_put: static int ipa_remove(struct platform_device *pdev) { struct ipa *ipa = dev_get_drvdata(&pdev->dev); - struct rproc *rproc = ipa->modem_rproc; struct ipa_clock *clock = ipa->clock; int ret; @@ -970,7 +937,6 @@ static int ipa_remove(struct platform_device *pdev) ipa_reg_exit(ipa); kfree(ipa); ipa_clock_exit(clock); - rproc_put(rproc); return 0; } diff --git a/drivers/net/mhi_net.c b/drivers/net/mhi_net.c index 5f3a4cc92a88..a5a214d29849 100644 --- a/drivers/net/mhi_net.c +++ b/drivers/net/mhi_net.c @@ -248,6 +248,10 @@ static void mhi_net_rx_refill_work(struct work_struct *work) schedule_delayed_work(&mhi_netdev->rx_refill, HZ / 2); } +static struct device_type wwan_type = { + .name = "wwan", +}; + static int mhi_net_probe(struct mhi_device *mhi_dev, const struct mhi_device_id *id) { @@ -267,6 +271,7 @@ static int mhi_net_probe(struct mhi_device *mhi_dev, mhi_netdev->ndev = ndev; mhi_netdev->mdev = mhi_dev; SET_NETDEV_DEV(ndev, &mhi_dev->dev); + SET_NETDEV_DEVTYPE(ndev, &wwan_type); /* All MHI net channels have 128 ring elements (at least for now) */ mhi_netdev->rx_queue_sz = 128; diff --git a/drivers/net/phy/realtek.c b/drivers/net/phy/realtek.c index 99ecd6c4c15a..821e85a97367 100644 --- a/drivers/net/phy/realtek.c +++ b/drivers/net/phy/realtek.c @@ -60,6 +60,9 @@ #define RTL_LPADV_5000FULL BIT(6) #define RTL_LPADV_2500FULL BIT(5) +#define RTL9000A_GINMR 0x14 +#define RTL9000A_GINMR_LINK_STATUS BIT(4) + #define RTLGEN_SPEED_MASK 0x0630 #define RTL_GENERIC_PHYID 0x001cc800 @@ -655,6 +658,122 @@ static int rtlgen_resume(struct phy_device *phydev) return ret; } +static int rtl9000a_config_init(struct phy_device *phydev) +{ + phydev->autoneg = AUTONEG_DISABLE; + phydev->speed = SPEED_100; + phydev->duplex = DUPLEX_FULL; + + return 0; +} + +static int rtl9000a_config_aneg(struct phy_device *phydev) +{ + int ret; + u16 ctl = 0; + + switch (phydev->master_slave_set) { + case MASTER_SLAVE_CFG_MASTER_FORCE: + ctl |= CTL1000_AS_MASTER; + break; + case MASTER_SLAVE_CFG_SLAVE_FORCE: + break; + case MASTER_SLAVE_CFG_UNKNOWN: + case MASTER_SLAVE_CFG_UNSUPPORTED: + return 0; + default: + phydev_warn(phydev, "Unsupported Master/Slave mode\n"); + return -EOPNOTSUPP; + } + + ret = phy_modify_changed(phydev, MII_CTRL1000, CTL1000_AS_MASTER, ctl); + if (ret == 1) + ret = genphy_soft_reset(phydev); + + return ret; +} + +static int rtl9000a_read_status(struct phy_device *phydev) +{ + int ret; + + phydev->master_slave_get = MASTER_SLAVE_CFG_UNKNOWN; + phydev->master_slave_state = MASTER_SLAVE_STATE_UNKNOWN; + + ret = genphy_update_link(phydev); + if (ret) + return ret; + + ret = phy_read(phydev, MII_CTRL1000); + if (ret < 0) + return ret; + if (ret & CTL1000_AS_MASTER) + phydev->master_slave_get = MASTER_SLAVE_CFG_MASTER_FORCE; + else + phydev->master_slave_get = MASTER_SLAVE_CFG_SLAVE_FORCE; + + ret = phy_read(phydev, MII_STAT1000); + if (ret < 0) + return ret; + if (ret & LPA_1000MSRES) + phydev->master_slave_state = MASTER_SLAVE_STATE_MASTER; + else + phydev->master_slave_state = MASTER_SLAVE_STATE_SLAVE; + + return 0; +} + +static int rtl9000a_ack_interrupt(struct phy_device *phydev) +{ + int err; + + err = phy_read(phydev, RTL8211F_INSR); + + return (err < 0) ? err : 0; +} + +static int rtl9000a_config_intr(struct phy_device *phydev) +{ + u16 val; + int err; + + if (phydev->interrupts == PHY_INTERRUPT_ENABLED) { + err = rtl9000a_ack_interrupt(phydev); + if (err) + return err; + + val = (u16)~RTL9000A_GINMR_LINK_STATUS; + err = phy_write_paged(phydev, 0xa42, RTL9000A_GINMR, val); + } else { + val = ~0; + err = phy_write_paged(phydev, 0xa42, RTL9000A_GINMR, val); + if (err) + return err; + + err = rtl9000a_ack_interrupt(phydev); + } + + return phy_write_paged(phydev, 0xa42, RTL9000A_GINMR, val); +} + +static irqreturn_t rtl9000a_handle_interrupt(struct phy_device *phydev) +{ + int irq_status; + + irq_status = phy_read(phydev, RTL8211F_INSR); + if (irq_status < 0) { + phy_error(phydev); + return IRQ_NONE; + } + + if (!(irq_status & RTL8211F_INER_LINK_STATUS)) + return IRQ_NONE; + + phy_trigger_machine(phydev); + + return IRQ_HANDLED; +} + static struct phy_driver realtek_drvs[] = { { PHY_ID_MATCH_EXACT(0x00008201), @@ -823,6 +942,19 @@ static struct phy_driver realtek_drvs[] = { .handle_interrupt = genphy_handle_interrupt_no_ack, .suspend = genphy_suspend, .resume = genphy_resume, + }, { + PHY_ID_MATCH_EXACT(0x001ccb00), + .name = "RTL9000AA_RTL9000AN Ethernet", + .features = PHY_BASIC_T1_FEATURES, + .config_init = rtl9000a_config_init, + .config_aneg = rtl9000a_config_aneg, + .read_status = rtl9000a_read_status, + .config_intr = rtl9000a_config_intr, + .handle_interrupt = rtl9000a_handle_interrupt, + .suspend = genphy_suspend, + .resume = genphy_resume, + .read_page = rtl821x_read_page, + .write_page = rtl821x_write_page, }, }; diff --git a/drivers/net/usb/usbnet.c b/drivers/net/usb/usbnet.c index 1447da1d5729..b4c8080e6f87 100644 --- a/drivers/net/usb/usbnet.c +++ b/drivers/net/usb/usbnet.c @@ -1539,11 +1539,11 @@ static void usbnet_bh (struct timer_list *t) } } -static void usbnet_bh_tasklet(unsigned long data) +static void usbnet_bh_tasklet(struct tasklet_struct *t) { - struct timer_list *t = (struct timer_list *)data; + struct usbnet *dev = from_tasklet(dev, t, bh); - usbnet_bh(t); + usbnet_bh(&dev->delay); } @@ -1673,8 +1673,7 @@ usbnet_probe (struct usb_interface *udev, const struct usb_device_id *prod) skb_queue_head_init (&dev->txq); skb_queue_head_init (&dev->done); skb_queue_head_init(&dev->rxq_pause); - dev->bh.func = usbnet_bh_tasklet; - dev->bh.data = (unsigned long)&dev->delay; + tasklet_setup(&dev->bh, usbnet_bh_tasklet); INIT_WORK (&dev->kevent, usbnet_deferred_kevent); init_usb_anchor(&dev->deferred); timer_setup(&dev->delay, usbnet_bh, 0); @@ -1964,12 +1963,12 @@ static int __usbnet_read_cmd(struct usbnet *dev, u8 cmd, u8 reqtype, cmd, reqtype, value, index, buf, size, USB_CTRL_GET_TIMEOUT); if (err > 0 && err <= size) { - if (data) - memcpy(data, buf, err); - else - netdev_dbg(dev->net, - "Huh? Data requested but thrown away.\n"); - } + if (data) + memcpy(data, buf, err); + else + netdev_dbg(dev->net, + "Huh? Data requested but thrown away.\n"); + } kfree(buf); out: return err; diff --git a/drivers/nfc/fdp/i2c.c b/drivers/nfc/fdp/i2c.c index ad0abb1f0bae..adaa1a7147f9 100644 --- a/drivers/nfc/fdp/i2c.c +++ b/drivers/nfc/fdp/i2c.c @@ -155,7 +155,7 @@ static int fdp_nci_i2c_read(struct fdp_i2c_phy *phy, struct sk_buff **skb) /* * LRC check failed. This may due to transmission error or - * desynchronization between driver and FDP. Drop the paquet + * desynchronization between driver and FDP. Drop the packet * and force resynchronization */ if (lrc) { diff --git a/drivers/nfc/trf7970a.c b/drivers/nfc/trf7970a.c index c70f62fe321e..33978022ae47 100644 --- a/drivers/nfc/trf7970a.c +++ b/drivers/nfc/trf7970a.c @@ -169,7 +169,7 @@ /* Bits determining whether its a direct command or register R/W, * whether to use a continuous SPI transaction or not, and the actual - * direct cmd opcode or regster address. + * direct cmd opcode or register address. */ #define TRF7970A_CMD_BIT_CTRL BIT(7) #define TRF7970A_CMD_BIT_RW BIT(6) diff --git a/drivers/soc/fsl/qe/qe_common.c b/drivers/soc/fsl/qe/qe_common.c index 497a7e0fd027..654e9246ce6b 100644 --- a/drivers/soc/fsl/qe/qe_common.c +++ b/drivers/soc/fsl/qe/qe_common.c @@ -27,7 +27,7 @@ static struct gen_pool *muram_pool; static spinlock_t cpm_muram_lock; -static u8 __iomem *muram_vbase; +static void __iomem *muram_vbase; static phys_addr_t muram_pbase; struct muram_block { @@ -223,9 +223,9 @@ void __iomem *cpm_muram_addr(unsigned long offset) } EXPORT_SYMBOL(cpm_muram_addr); -unsigned long cpm_muram_offset(void __iomem *addr) +unsigned long cpm_muram_offset(const void __iomem *addr) { - return addr - (void __iomem *)muram_vbase; + return addr - muram_vbase; } EXPORT_SYMBOL(cpm_muram_offset); @@ -235,6 +235,18 @@ EXPORT_SYMBOL(cpm_muram_offset); */ dma_addr_t cpm_muram_dma(void __iomem *addr) { - return muram_pbase + ((u8 __iomem *)addr - muram_vbase); + return muram_pbase + (addr - muram_vbase); } EXPORT_SYMBOL(cpm_muram_dma); + +/* + * As cpm_muram_free, but takes the virtual address rather than the + * muram offset. + */ +void cpm_muram_free_addr(const void __iomem *addr) +{ + if (!addr) + return; + cpm_muram_free(cpm_muram_offset(addr)); +} +EXPORT_SYMBOL(cpm_muram_free_addr); diff --git a/include/linux/can/dev.h b/include/linux/can/dev.h index 7faf6a37d5b2..ac4d83a1ab81 100644 --- a/include/linux/can/dev.h +++ b/include/linux/can/dev.h @@ -123,6 +123,7 @@ void unregister_candev(struct net_device *dev); int can_restart_now(struct net_device *dev); void can_bus_off(struct net_device *dev); +const char *can_get_state_str(const enum can_state state); void can_change_state(struct net_device *dev, struct can_frame *cf, enum can_state tx_state, enum can_state rx_state); diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h index dda61d150a13..9d1f29f0c512 100644 --- a/include/linux/ipv6.h +++ b/include/linux/ipv6.h @@ -31,6 +31,7 @@ struct ipv6_devconf { __s32 max_desync_factor; __s32 max_addresses; __s32 accept_ra_defrtr; + __u32 ra_defrtr_metric; __s32 accept_ra_min_hop_limit; __s32 accept_ra_pinfo; __s32 ignore_routes_with_linkdown; diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 823411e288c0..71ae6aac3410 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -842,11 +842,16 @@ struct mlx5_ifc_qos_cap_bits { u8 reserved_at_4[0x1]; u8 packet_pacing_burst_bound[0x1]; u8 packet_pacing_typical_size[0x1]; - u8 reserved_at_7[0x4]; + u8 reserved_at_7[0x1]; + u8 nic_sq_scheduling[0x1]; + u8 nic_bw_share[0x1]; + u8 nic_rate_limit[0x1]; u8 packet_pacing_uid[0x1]; u8 reserved_at_c[0x14]; - u8 reserved_at_20[0x20]; + u8 reserved_at_20[0xb]; + u8 log_max_qos_nic_queue_group[0x5]; + u8 reserved_at_30[0x10]; u8 packet_pacing_max_rate[0x20]; @@ -3347,7 +3352,7 @@ struct mlx5_ifc_sqc_bits { u8 reserved_at_e0[0x10]; u8 packet_pacing_rate_limit_index[0x10]; u8 tis_lst_sz[0x10]; - u8 reserved_at_110[0x10]; + u8 qos_queue_group_id[0x10]; u8 reserved_at_120[0x40]; @@ -3362,6 +3367,7 @@ enum { SCHEDULING_CONTEXT_ELEMENT_TYPE_VPORT = 0x1, SCHEDULING_CONTEXT_ELEMENT_TYPE_VPORT_TC = 0x2, SCHEDULING_CONTEXT_ELEMENT_TYPE_PARA_VPORT_TC = 0x3, + SCHEDULING_CONTEXT_ELEMENT_TYPE_QUEUE_GROUP = 0x4, }; enum { @@ -4805,6 +4811,7 @@ struct mlx5_ifc_query_scheduling_element_out_bits { enum { SCHEDULING_HIERARCHY_E_SWITCH = 0x2, + SCHEDULING_HIERARCHY_NIC = 0x3, }; struct mlx5_ifc_query_scheduling_element_in_bits { diff --git a/include/linux/netdev_features.h b/include/linux/netdev_features.h index 934de56644e7..c06d6aaba9df 100644 --- a/include/linux/netdev_features.h +++ b/include/linux/netdev_features.h @@ -84,6 +84,7 @@ enum { NETIF_F_GRO_FRAGLIST_BIT, /* Fraglist GRO */ NETIF_F_HW_MACSEC_BIT, /* Offload MACsec operations */ + NETIF_F_GRO_UDP_FWD_BIT, /* Allow UDP GRO for forwarding */ /* * Add your fresh new feature above and remember to update @@ -157,6 +158,7 @@ enum { #define NETIF_F_GRO_FRAGLIST __NETIF_F(GRO_FRAGLIST) #define NETIF_F_GSO_FRAGLIST __NETIF_F(GSO_FRAGLIST) #define NETIF_F_HW_MACSEC __NETIF_F(HW_MACSEC) +#define NETIF_F_GRO_UDP_FWD __NETIF_F(GRO_UDP_FWD) /* Finds the next feature with the highest number of the range of start till 0. */ @@ -234,7 +236,7 @@ static inline int find_next_netdev_feature(u64 feature, unsigned long start) #define NETIF_F_SOFT_FEATURES (NETIF_F_GSO | NETIF_F_GRO) /* Changeable features with no special hardware requirements that defaults to off. */ -#define NETIF_F_SOFT_FEATURES_OFF NETIF_F_GRO_FRAGLIST +#define NETIF_F_SOFT_FEATURES_OFF (NETIF_F_GRO_FRAGLIST | NETIF_F_GRO_UDP_FWD) #define NETIF_F_VLAN_FEATURES (NETIF_F_HW_VLAN_CTAG_FILTER | \ NETIF_F_HW_VLAN_CTAG_RX | \ diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index ef517254367d..9e8572533d8e 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -858,6 +858,7 @@ enum tc_setup_type { TC_SETUP_QDISC_ETS, TC_SETUP_QDISC_TBF, TC_SETUP_QDISC_FIFO, + TC_SETUP_QDISC_HTB, }; /* These structures hold the attributes of bpf state that are being passed diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 186dad231e30..9313b5aaf45b 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -3859,7 +3859,7 @@ static inline bool skb_defer_rx_timestamp(struct sk_buff *skb) void skb_complete_tx_timestamp(struct sk_buff *skb, struct skb_shared_hwtstamps *hwtstamps); -void __skb_tstamp_tx(struct sk_buff *orig_skb, +void __skb_tstamp_tx(struct sk_buff *orig_skb, const struct sk_buff *ack_skb, struct skb_shared_hwtstamps *hwtstamps, struct sock *sk, int tstype); diff --git a/include/linux/tcp.h b/include/linux/tcp.h index 2f87377e9af7..48d8a363319e 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -496,7 +496,8 @@ static inline u32 tcp_saved_syn_len(const struct saved_syn *saved_syn) } struct sk_buff *tcp_get_timestamping_opt_stats(const struct sock *sk, - const struct sk_buff *orig_skb); + const struct sk_buff *orig_skb, + const struct sk_buff *ack_skb); static inline u16 tcp_mss_clamp(const struct tcp_sock *tp, u16 mss) { diff --git a/include/net/devlink.h b/include/net/devlink.h index f466819cc477..d12ed2854c34 100644 --- a/include/net/devlink.h +++ b/include/net/devlink.h @@ -380,6 +380,8 @@ struct devlink_resource { #define DEVLINK_RESOURCE_ID_PARENT_TOP 0 +#define DEVLINK_RESOURCE_GENERIC_NAME_PORTS "physical_ports" + #define __DEVLINK_PARAM_MAX_STRING_VALUE 32 enum devlink_param_type { DEVLINK_PARAM_TYPE_U8, diff --git a/include/net/inet_connection_sock.h b/include/net/inet_connection_sock.h index 111d7771b208..c11f80f328f1 100644 --- a/include/net/inet_connection_sock.h +++ b/include/net/inet_connection_sock.h @@ -141,7 +141,6 @@ struct inet_connection_sock { #define ICSK_TIME_RETRANS 1 /* Retransmit timer */ #define ICSK_TIME_DACK 2 /* Delayed ack timer */ #define ICSK_TIME_PROBE0 3 /* Zero window probe timer */ -#define ICSK_TIME_EARLY_RETRANS 4 /* Early retransmit timer */ #define ICSK_TIME_LOSS_PROBE 5 /* Tail loss probe timer */ #define ICSK_TIME_REO_TIMEOUT 6 /* Reordering timer */ @@ -227,8 +226,7 @@ static inline void inet_csk_reset_xmit_timer(struct sock *sk, const int what, } if (what == ICSK_TIME_RETRANS || what == ICSK_TIME_PROBE0 || - what == ICSK_TIME_EARLY_RETRANS || what == ICSK_TIME_LOSS_PROBE || - what == ICSK_TIME_REO_TIMEOUT) { + what == ICSK_TIME_LOSS_PROBE || what == ICSK_TIME_REO_TIMEOUT) { icsk->icsk_pending = what; icsk->icsk_timeout = jiffies + when; sk_reset_timer(sk, &icsk->icsk_retransmit_timer, icsk->icsk_timeout); diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h index 2a5277758379..f51a118bfce8 100644 --- a/include/net/ip6_route.h +++ b/include/net/ip6_route.h @@ -174,7 +174,8 @@ struct fib6_info *rt6_get_dflt_router(struct net *net, struct net_device *dev); struct fib6_info *rt6_add_dflt_router(struct net *net, const struct in6_addr *gwaddr, - struct net_device *dev, unsigned int pref); + struct net_device *dev, unsigned int pref, + u32 defrtr_usr_metric); void rt6_purge_dflt_routers(struct net *net); diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h index 0f2a9c44171c..255e4f4b521f 100644 --- a/include/net/pkt_cls.h +++ b/include/net/pkt_cls.h @@ -783,6 +783,42 @@ struct tc_mq_qopt_offload { }; }; +enum tc_htb_command { + /* Root */ + TC_HTB_CREATE, /* Initialize HTB offload. */ + TC_HTB_DESTROY, /* Destroy HTB offload. */ + + /* Classes */ + /* Allocate qid and create leaf. */ + TC_HTB_LEAF_ALLOC_QUEUE, + /* Convert leaf to inner, preserve and return qid, create new leaf. */ + TC_HTB_LEAF_TO_INNER, + /* Delete leaf, while siblings remain. */ + TC_HTB_LEAF_DEL, + /* Delete leaf, convert parent to leaf, preserving qid. */ + TC_HTB_LEAF_DEL_LAST, + /* TC_HTB_LEAF_DEL_LAST, but delete driver data on hardware errors. */ + TC_HTB_LEAF_DEL_LAST_FORCE, + /* Modify parameters of a node. */ + TC_HTB_NODE_MODIFY, + + /* Class qdisc */ + TC_HTB_LEAF_QUERY_QUEUE, /* Query qid by classid. */ +}; + +struct tc_htb_qopt_offload { + struct netlink_ext_ack *extack; + enum tc_htb_command command; + u16 classid; + u32 parent_classid; + u16 qid; + u16 moved_qid; + u64 rate; + u64 ceil; +}; + +#define TC_HTB_CLASSID_ROOT U32_MAX + enum tc_red_command { TC_RED_REPLACE, TC_RED_DESTROY, diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h index e7bee99aebce..070f01bf17eb 100644 --- a/include/net/sch_generic.h +++ b/include/net/sch_generic.h @@ -210,7 +210,8 @@ struct Qdisc_class_ops { int (*change)(struct Qdisc *, u32, u32, struct nlattr **, unsigned long *, struct netlink_ext_ack *); - int (*delete)(struct Qdisc *, unsigned long); + int (*delete)(struct Qdisc *, unsigned long, + struct netlink_ext_ack *); void (*walk)(struct Qdisc *, struct qdisc_walker * arg); /* Filter manipulation */ @@ -552,14 +553,20 @@ static inline struct net_device *qdisc_dev(const struct Qdisc *qdisc) return qdisc->dev_queue->dev; } -static inline void sch_tree_lock(const struct Qdisc *q) +static inline void sch_tree_lock(struct Qdisc *q) { - spin_lock_bh(qdisc_root_sleeping_lock(q)); + if (q->flags & TCQ_F_MQROOT) + spin_lock_bh(qdisc_lock(q)); + else + spin_lock_bh(qdisc_root_sleeping_lock(q)); } -static inline void sch_tree_unlock(const struct Qdisc *q) +static inline void sch_tree_unlock(struct Qdisc *q) { - spin_unlock_bh(qdisc_root_sleeping_lock(q)); + if (q->flags & TCQ_F_MQROOT) + spin_unlock_bh(qdisc_lock(q)); + else + spin_unlock_bh(qdisc_root_sleeping_lock(q)); } extern struct Qdisc noop_qdisc; diff --git a/include/soc/fsl/qe/qe.h b/include/soc/fsl/qe/qe.h index 3feddfec9f87..4925a1b59dc9 100644 --- a/include/soc/fsl/qe/qe.h +++ b/include/soc/fsl/qe/qe.h @@ -27,12 +27,6 @@ #define QE_NUM_OF_BRGS 16 #define QE_NUM_OF_PORTS 1024 -/* Memory partitions -*/ -#define MEM_PART_SYSTEM 0 -#define MEM_PART_SECONDARY 1 -#define MEM_PART_MURAM 2 - /* Clocks and BRGs */ enum qe_clock { QE_CLK_NONE = 0, @@ -102,8 +96,9 @@ s32 cpm_muram_alloc(unsigned long size, unsigned long align); void cpm_muram_free(s32 offset); s32 cpm_muram_alloc_fixed(unsigned long offset, unsigned long size); void __iomem *cpm_muram_addr(unsigned long offset); -unsigned long cpm_muram_offset(void __iomem *addr); +unsigned long cpm_muram_offset(const void __iomem *addr); dma_addr_t cpm_muram_dma(void __iomem *addr); +void cpm_muram_free_addr(const void __iomem *addr); #else static inline s32 cpm_muram_alloc(unsigned long size, unsigned long align) @@ -126,7 +121,7 @@ static inline void __iomem *cpm_muram_addr(unsigned long offset) return NULL; } -static inline unsigned long cpm_muram_offset(void __iomem *addr) +static inline unsigned long cpm_muram_offset(const void __iomem *addr) { return -ENOSYS; } @@ -135,6 +130,9 @@ static inline dma_addr_t cpm_muram_dma(void __iomem *addr) { return 0; } +static inline void cpm_muram_free_addr(const void __iomem *addr) +{ +} #endif /* defined(CONFIG_CPM) || defined(CONFIG_QUICC_ENGINE) */ /* QE PIO */ @@ -239,6 +237,7 @@ static inline int qe_alive_during_sleep(void) #define qe_muram_addr cpm_muram_addr #define qe_muram_offset cpm_muram_offset #define qe_muram_dma cpm_muram_dma +#define qe_muram_free_addr cpm_muram_free_addr #ifdef CONFIG_PPC32 #define qe_iowrite8(val, addr) out_8(addr, val) diff --git a/include/soc/fsl/qe/ucc_fast.h b/include/soc/fsl/qe/ucc_fast.h index dc4e79468094..9696a5b9b5d1 100644 --- a/include/soc/fsl/qe/ucc_fast.h +++ b/include/soc/fsl/qe/ucc_fast.h @@ -146,7 +146,6 @@ struct ucc_fast_info { resource_size_t regs; int irq; u32 uccm_mask; - int bd_mem_part; int brkpt_support; int grant_support; int tsa; diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h index 2bd0d8bbcdb2..eb8018c3a737 100644 --- a/include/uapi/linux/if_link.h +++ b/include/uapi/linux/if_link.h @@ -525,6 +525,8 @@ enum { IFLA_BRPORT_BACKUP_PORT, IFLA_BRPORT_MRP_RING_OPEN, IFLA_BRPORT_MRP_IN_OPEN, + IFLA_BRPORT_MCAST_EHT_HOSTS_LIMIT, + IFLA_BRPORT_MCAST_EHT_HOSTS_CNT, __IFLA_BRPORT_MAX }; #define IFLA_BRPORT_MAX (__IFLA_BRPORT_MAX - 1) diff --git a/include/uapi/linux/ipv6.h b/include/uapi/linux/ipv6.h index 13e8751bf24a..70603775fe91 100644 --- a/include/uapi/linux/ipv6.h +++ b/include/uapi/linux/ipv6.h @@ -189,6 +189,7 @@ enum { DEVCONF_ACCEPT_RA_RT_INFO_MIN_PLEN, DEVCONF_NDISC_TCLASS, DEVCONF_RPL_SEG_ENABLED, + DEVCONF_RA_DEFRTR_METRIC, DEVCONF_MAX }; diff --git a/include/uapi/linux/pkt_sched.h b/include/uapi/linux/pkt_sched.h index 9e7c2c607845..79a699f106b1 100644 --- a/include/uapi/linux/pkt_sched.h +++ b/include/uapi/linux/pkt_sched.h @@ -434,6 +434,7 @@ enum { TCA_HTB_RATE64, TCA_HTB_CEIL64, TCA_HTB_PAD, + TCA_HTB_OFFLOAD, __TCA_HTB_MAX, }; diff --git a/include/uapi/linux/sysctl.h b/include/uapi/linux/sysctl.h index 458179df9b27..1e05d3caa712 100644 --- a/include/uapi/linux/sysctl.h +++ b/include/uapi/linux/sysctl.h @@ -571,6 +571,7 @@ enum { NET_IPV6_ACCEPT_SOURCE_ROUTE=25, NET_IPV6_ACCEPT_RA_FROM_LOCAL=26, NET_IPV6_ACCEPT_RA_RT_INFO_MIN_PLEN=27, + NET_IPV6_RA_DEFRTR_METRIC=28, __NET_IPV6_MAX }; diff --git a/include/uapi/linux/tcp.h b/include/uapi/linux/tcp.h index 768e93bd5b51..42fc5a640df4 100644 --- a/include/uapi/linux/tcp.h +++ b/include/uapi/linux/tcp.h @@ -314,6 +314,7 @@ enum { TCP_NLA_TIMEOUT_REHASH, /* Timeout-triggered rehash attempts */ TCP_NLA_BYTES_NOTSENT, /* Bytes in write queue not yet sent */ TCP_NLA_EDT, /* Earliest departure time (CLOCK_MONOTONIC) */ + TCP_NLA_TTL, /* TTL or hop limit of a packet received */ }; /* for TCP_MD5SIG socket option */ @@ -353,5 +354,9 @@ struct tcp_zerocopy_receive { __u64 copybuf_address; /* in: copybuf address (small reads) */ __s32 copybuf_len; /* in/out: copybuf bytes avail/used or error */ __u32 flags; /* in: flags */ + __u64 msg_control; /* ancillary data */ + __u64 msg_controllen; + __u32 msg_flags; + /* __u32 hole; Next we must add >1 u32 otherwise length checks fail. */ }; #endif /* _UAPI_LINUX_TCP_H */ diff --git a/net/9p/Kconfig b/net/9p/Kconfig index 3d11fec3a8dc..64468c49791f 100644 --- a/net/9p/Kconfig +++ b/net/9p/Kconfig @@ -4,7 +4,6 @@ # menuconfig NET_9P - depends on NET tristate "Plan 9 Resource Sharing Support (9P2000)" help If you say Y here, you will get experimental support for diff --git a/net/Makefile b/net/Makefile index d96b0aa8f39f..9ca9572188fe 100644 --- a/net/Makefile +++ b/net/Makefile @@ -6,20 +6,19 @@ # Rewritten to use lists instead of if-statements. # -obj-$(CONFIG_NET) := devres.o socket.o core/ +obj-y := devres.o socket.o core/ -tmp-$(CONFIG_COMPAT) := compat.o -obj-$(CONFIG_NET) += $(tmp-y) +obj-$(CONFIG_COMPAT) += compat.o # LLC has to be linked before the files in net/802/ obj-$(CONFIG_LLC) += llc/ -obj-$(CONFIG_NET) += ethernet/ 802/ sched/ netlink/ bpf/ ethtool/ +obj-y += ethernet/ 802/ sched/ netlink/ bpf/ ethtool/ obj-$(CONFIG_NETFILTER) += netfilter/ obj-$(CONFIG_INET) += ipv4/ obj-$(CONFIG_TLS) += tls/ obj-$(CONFIG_XFRM) += xfrm/ obj-$(CONFIG_UNIX_SCM) += unix/ -obj-$(CONFIG_NET) += ipv6/ +obj-y += ipv6/ obj-$(CONFIG_BPFILTER) += bpfilter/ obj-$(CONFIG_PACKET) += packet/ obj-$(CONFIG_NET_KEY) += key/ @@ -56,16 +55,12 @@ obj-$(CONFIG_SMC) += smc/ obj-$(CONFIG_RFKILL) += rfkill/ obj-$(CONFIG_NET_9P) += 9p/ obj-$(CONFIG_CAIF) += caif/ -ifneq ($(CONFIG_DCB),) -obj-y += dcb/ -endif +obj-$(CONFIG_DCB) += dcb/ obj-$(CONFIG_6LOWPAN) += 6lowpan/ obj-$(CONFIG_IEEE802154) += ieee802154/ obj-$(CONFIG_MAC802154) += mac802154/ -ifeq ($(CONFIG_NET),y) obj-$(CONFIG_SYSCTL) += sysctl_net.o -endif obj-$(CONFIG_DNS_RESOLVER) += dns_resolver/ obj-$(CONFIG_CEPH_LIB) += ceph/ obj-$(CONFIG_BATMAN_ADV) += batman-adv/ @@ -77,12 +72,8 @@ obj-$(CONFIG_VSOCKETS) += vmw_vsock/ obj-$(CONFIG_MPLS) += mpls/ obj-$(CONFIG_NET_NSH) += nsh/ obj-$(CONFIG_HSR) += hsr/ -ifneq ($(CONFIG_NET_SWITCHDEV),) -obj-y += switchdev/ -endif -ifneq ($(CONFIG_NET_L3_MASTER_DEV),) -obj-y += l3mdev/ -endif +obj-$(CONFIG_NET_SWITCHDEV) += switchdev/ +obj-$(CONFIG_NET_L3_MASTER_DEV) += l3mdev/ obj-$(CONFIG_QRTR) += qrtr/ obj-$(CONFIG_NET_NCSI) += ncsi/ obj-$(CONFIG_XDP_SOCKETS) += xdp/ diff --git a/net/batman-adv/Kconfig b/net/batman-adv/Kconfig index 993afd5ff7bb..43ae3dcbbbeb 100644 --- a/net/batman-adv/Kconfig +++ b/net/batman-adv/Kconfig @@ -9,7 +9,6 @@ config BATMAN_ADV tristate "B.A.T.M.A.N. Advanced Meshing Protocol" - depends on NET select LIBCRC32C help B.A.T.M.A.N. (better approach to mobile ad-hoc networking) is diff --git a/net/bluetooth/Kconfig b/net/bluetooth/Kconfig index 64e669acd42f..400c5130dc0a 100644 --- a/net/bluetooth/Kconfig +++ b/net/bluetooth/Kconfig @@ -5,7 +5,7 @@ menuconfig BT tristate "Bluetooth subsystem support" - depends on NET && !S390 + depends on !S390 depends on RFKILL || !RFKILL select CRC16 select CRYPTO diff --git a/net/bpfilter/Kconfig b/net/bpfilter/Kconfig index 8ad0233ce497..3d4a21462458 100644 --- a/net/bpfilter/Kconfig +++ b/net/bpfilter/Kconfig @@ -1,7 +1,7 @@ # SPDX-License-Identifier: GPL-2.0-only menuconfig BPFILTER bool "BPF based packet filtering framework (BPFILTER)" - depends on NET && BPF && INET + depends on BPF && INET select USERMODE_DRIVER help This builds experimental bpfilter framework that is aiming to diff --git a/net/bridge/Makefile b/net/bridge/Makefile index 4702702a74d3..7fb9a021873b 100644 --- a/net/bridge/Makefile +++ b/net/bridge/Makefile @@ -18,7 +18,7 @@ br_netfilter-y := br_netfilter_hooks.o br_netfilter-$(subst m,y,$(CONFIG_IPV6)) += br_netfilter_ipv6.o obj-$(CONFIG_BRIDGE_NETFILTER) += br_netfilter.o -bridge-$(CONFIG_BRIDGE_IGMP_SNOOPING) += br_multicast.o br_mdb.o +bridge-$(CONFIG_BRIDGE_IGMP_SNOOPING) += br_multicast.o br_mdb.o br_multicast_eht.o bridge-$(CONFIG_BRIDGE_VLAN_FILTERING) += br_vlan.o br_vlan_tunnel.o br_vlan_options.o diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c index 257ac4e25f6d..6f672eb7ff33 100644 --- a/net/bridge/br_multicast.c +++ b/net/bridge/br_multicast.c @@ -33,6 +33,7 @@ #endif #include "br_private.h" +#include "br_private_mcast_eht.h" static const struct rhashtable_params br_mdb_rht_params = { .head_offset = offsetof(struct net_bridge_mdb_entry, rhnode), @@ -441,7 +442,8 @@ static void br_multicast_fwd_src_add(struct net_bridge_group_src *src) br_multicast_sg_add_exclude_ports(star_mp, sg); } -static void br_multicast_fwd_src_remove(struct net_bridge_group_src *src) +static void br_multicast_fwd_src_remove(struct net_bridge_group_src *src, + bool fastleave) { struct net_bridge_port_group *p, *pg = src->pg; struct net_bridge_port_group __rcu **pp; @@ -466,6 +468,8 @@ static void br_multicast_fwd_src_remove(struct net_bridge_group_src *src) (p->flags & MDB_PG_FLAGS_PERMANENT)) break; + if (fastleave) + p->flags |= MDB_PG_FLAGS_FAST_LEAVE; br_multicast_del_pg(mp, p, pp); break; } @@ -559,11 +563,12 @@ static void br_multicast_destroy_group_src(struct net_bridge_mcast_gc *gc) kfree_rcu(src, rcu); } -static void br_multicast_del_group_src(struct net_bridge_group_src *src) +void br_multicast_del_group_src(struct net_bridge_group_src *src, + bool fastleave) { struct net_bridge *br = src->pg->key.port->br; - br_multicast_fwd_src_remove(src); + br_multicast_fwd_src_remove(src, fastleave); hlist_del_init_rcu(&src->node); src->pg->src_ents--; hlist_add_head(&src->mcast_gc.gc_node, &br->mcast_gc_list); @@ -593,8 +598,9 @@ void br_multicast_del_pg(struct net_bridge_mdb_entry *mp, rcu_assign_pointer(*pp, pg->next); hlist_del_init(&pg->mglist); + br_multicast_eht_clean_sets(pg); hlist_for_each_entry_safe(ent, tmp, &pg->src_list, node) - br_multicast_del_group_src(ent); + br_multicast_del_group_src(ent, false); br_mdb_notify(br->dev, mp, pg, RTM_DELMDB); if (!br_multicast_is_star_g(&mp->addr)) { rhashtable_remove_fast(&br->sg_port_tbl, &pg->rhnode, @@ -651,7 +657,7 @@ static void br_multicast_port_group_expired(struct timer_list *t) pg->filter_mode = MCAST_INCLUDE; hlist_for_each_entry_safe(src_ent, tmp, &pg->src_list, node) { if (!timer_pending(&src_ent->timer)) { - br_multicast_del_group_src(src_ent); + br_multicast_del_group_src(src_ent, false); changed = true; } } @@ -1078,7 +1084,7 @@ static void br_multicast_group_src_expired(struct timer_list *t) pg = src->pg; if (pg->filter_mode == MCAST_INCLUDE) { - br_multicast_del_group_src(src); + br_multicast_del_group_src(src, false); if (!hlist_empty(&pg->src_list)) goto out; br_multicast_find_del_pg(br, pg); @@ -1090,7 +1096,7 @@ out: spin_unlock(&br->multicast_lock); } -static struct net_bridge_group_src * +struct net_bridge_group_src * br_multicast_find_group_src(struct net_bridge_port_group *pg, struct br_ip *ip) { struct net_bridge_group_src *ent; @@ -1172,6 +1178,8 @@ struct net_bridge_port_group *br_multicast_new_port_group( p->flags = flags; p->filter_mode = filter_mode; p->rt_protocol = rt_protocol; + p->eht_host_tree = RB_ROOT; + p->eht_set_tree = RB_ROOT; p->mcast_gc.destroy = br_multicast_destroy_port_group; INIT_HLIST_HEAD(&p->src_list); @@ -1292,7 +1300,7 @@ static int br_multicast_add_group(struct net_bridge *br, pg = __br_multicast_add_group(br, port, group, src, filter_mode, igmpv2_mldv1, false); /* NULL is considered valid for host joined groups */ - err = IS_ERR(pg) ? PTR_ERR(pg) : 0; + err = PTR_ERR_OR_ZERO(pg); spin_unlock(&br->multicast_lock); return err; @@ -1600,6 +1608,7 @@ static void br_mc_disabled_update(struct net_device *dev, bool value) int br_multicast_add_port(struct net_bridge_port *port) { port->multicast_router = MDB_RTR_TYPE_TEMP_QUERY; + port->multicast_eht_hosts_limit = BR_MCAST_DEFAULT_EHT_HOSTS_LIMIT; timer_setup(&port->multicast_router_timer, br_multicast_router_expired, 0); @@ -1700,7 +1709,7 @@ static int __grp_src_delete_marked(struct net_bridge_port_group *pg) hlist_for_each_entry_safe(ent, tmp, &pg->src_list, node) if (ent->flags & BR_SGRP_F_DELETE) { - br_multicast_del_group_src(ent); + br_multicast_del_group_src(ent, false); deleted++; } @@ -1799,8 +1808,9 @@ static void __grp_send_query_and_rexmit(struct net_bridge_port_group *pg) * INCLUDE (A) ALLOW (B) INCLUDE (A+B) (B)=GMI * EXCLUDE (X,Y) ALLOW (A) EXCLUDE (X+A,Y-A) (A)=GMI */ -static bool br_multicast_isinc_allow(struct net_bridge_port_group *pg, - void *srcs, u32 nsrcs, size_t src_size) +static bool br_multicast_isinc_allow(struct net_bridge_port_group *pg, void *h_addr, + void *srcs, u32 nsrcs, size_t addr_size, + int grec_type) { struct net_bridge *br = pg->key.port->br; struct net_bridge_group_src *ent; @@ -1812,7 +1822,7 @@ static bool br_multicast_isinc_allow(struct net_bridge_port_group *pg, memset(&src_ip, 0, sizeof(src_ip)); src_ip.proto = pg->key.addr.proto; for (src_idx = 0; src_idx < nsrcs; src_idx++) { - memcpy(&src_ip.src, srcs, src_size); + memcpy(&src_ip.src, srcs + (src_idx * addr_size), addr_size); ent = br_multicast_find_group_src(pg, &src_ip); if (!ent) { ent = br_multicast_new_group_src(pg, &src_ip); @@ -1822,9 +1832,11 @@ static bool br_multicast_isinc_allow(struct net_bridge_port_group *pg, if (ent) __grp_src_mod_timer(ent, now + br_multicast_gmi(br)); - srcs += src_size; } + if (br_multicast_eht_handle(pg, h_addr, srcs, nsrcs, addr_size, grec_type)) + changed = true; + return changed; } @@ -1833,8 +1845,9 @@ static bool br_multicast_isinc_allow(struct net_bridge_port_group *pg, * Delete (A-B) * Group Timer=GMI */ -static void __grp_src_isexc_incl(struct net_bridge_port_group *pg, - void *srcs, u32 nsrcs, size_t src_size) +static void __grp_src_isexc_incl(struct net_bridge_port_group *pg, void *h_addr, + void *srcs, u32 nsrcs, size_t addr_size, + int grec_type) { struct net_bridge_group_src *ent; struct br_ip src_ip; @@ -1846,7 +1859,7 @@ static void __grp_src_isexc_incl(struct net_bridge_port_group *pg, memset(&src_ip, 0, sizeof(src_ip)); src_ip.proto = pg->key.addr.proto; for (src_idx = 0; src_idx < nsrcs; src_idx++) { - memcpy(&src_ip.src, srcs, src_size); + memcpy(&src_ip.src, srcs + (src_idx * addr_size), addr_size); ent = br_multicast_find_group_src(pg, &src_ip); if (ent) ent->flags &= ~BR_SGRP_F_DELETE; @@ -1854,9 +1867,10 @@ static void __grp_src_isexc_incl(struct net_bridge_port_group *pg, ent = br_multicast_new_group_src(pg, &src_ip); if (ent) br_multicast_fwd_src_handle(ent); - srcs += src_size; } + br_multicast_eht_handle(pg, h_addr, srcs, nsrcs, addr_size, grec_type); + __grp_src_delete_marked(pg); } @@ -1866,8 +1880,9 @@ static void __grp_src_isexc_incl(struct net_bridge_port_group *pg, * Delete (Y-A) * Group Timer=GMI */ -static bool __grp_src_isexc_excl(struct net_bridge_port_group *pg, - void *srcs, u32 nsrcs, size_t src_size) +static bool __grp_src_isexc_excl(struct net_bridge_port_group *pg, void *h_addr, + void *srcs, u32 nsrcs, size_t addr_size, + int grec_type) { struct net_bridge *br = pg->key.port->br; struct net_bridge_group_src *ent; @@ -1882,7 +1897,7 @@ static bool __grp_src_isexc_excl(struct net_bridge_port_group *pg, memset(&src_ip, 0, sizeof(src_ip)); src_ip.proto = pg->key.addr.proto; for (src_idx = 0; src_idx < nsrcs; src_idx++) { - memcpy(&src_ip.src, srcs, src_size); + memcpy(&src_ip.src, srcs + (src_idx * addr_size), addr_size); ent = br_multicast_find_group_src(pg, &src_ip); if (ent) { ent->flags &= ~BR_SGRP_F_DELETE; @@ -1894,29 +1909,34 @@ static bool __grp_src_isexc_excl(struct net_bridge_port_group *pg, changed = true; } } - srcs += src_size; } + if (br_multicast_eht_handle(pg, h_addr, srcs, nsrcs, addr_size, grec_type)) + changed = true; + if (__grp_src_delete_marked(pg)) changed = true; return changed; } -static bool br_multicast_isexc(struct net_bridge_port_group *pg, - void *srcs, u32 nsrcs, size_t src_size) +static bool br_multicast_isexc(struct net_bridge_port_group *pg, void *h_addr, + void *srcs, u32 nsrcs, size_t addr_size, + int grec_type) { struct net_bridge *br = pg->key.port->br; bool changed = false; switch (pg->filter_mode) { case MCAST_INCLUDE: - __grp_src_isexc_incl(pg, srcs, nsrcs, src_size); + __grp_src_isexc_incl(pg, h_addr, srcs, nsrcs, addr_size, + grec_type); br_multicast_star_g_handle_mode(pg, MCAST_EXCLUDE); changed = true; break; case MCAST_EXCLUDE: - changed = __grp_src_isexc_excl(pg, srcs, nsrcs, src_size); + changed = __grp_src_isexc_excl(pg, h_addr, srcs, nsrcs, addr_size, + grec_type); break; } @@ -1930,8 +1950,9 @@ static bool br_multicast_isexc(struct net_bridge_port_group *pg, * INCLUDE (A) TO_IN (B) INCLUDE (A+B) (B)=GMI * Send Q(G,A-B) */ -static bool __grp_src_toin_incl(struct net_bridge_port_group *pg, - void *srcs, u32 nsrcs, size_t src_size) +static bool __grp_src_toin_incl(struct net_bridge_port_group *pg, void *h_addr, + void *srcs, u32 nsrcs, size_t addr_size, + int grec_type) { struct net_bridge *br = pg->key.port->br; u32 src_idx, to_send = pg->src_ents; @@ -1946,7 +1967,7 @@ static bool __grp_src_toin_incl(struct net_bridge_port_group *pg, memset(&src_ip, 0, sizeof(src_ip)); src_ip.proto = pg->key.addr.proto; for (src_idx = 0; src_idx < nsrcs; src_idx++) { - memcpy(&src_ip.src, srcs, src_size); + memcpy(&src_ip.src, srcs + (src_idx * addr_size), addr_size); ent = br_multicast_find_group_src(pg, &src_ip); if (ent) { ent->flags &= ~BR_SGRP_F_SEND; @@ -1958,9 +1979,11 @@ static bool __grp_src_toin_incl(struct net_bridge_port_group *pg, } if (ent) __grp_src_mod_timer(ent, now + br_multicast_gmi(br)); - srcs += src_size; } + if (br_multicast_eht_handle(pg, h_addr, srcs, nsrcs, addr_size, grec_type)) + changed = true; + if (to_send) __grp_src_query_marked_and_rexmit(pg); @@ -1972,8 +1995,9 @@ static bool __grp_src_toin_incl(struct net_bridge_port_group *pg, * Send Q(G,X-A) * Send Q(G) */ -static bool __grp_src_toin_excl(struct net_bridge_port_group *pg, - void *srcs, u32 nsrcs, size_t src_size) +static bool __grp_src_toin_excl(struct net_bridge_port_group *pg, void *h_addr, + void *srcs, u32 nsrcs, size_t addr_size, + int grec_type) { struct net_bridge *br = pg->key.port->br; u32 src_idx, to_send = pg->src_ents; @@ -1989,7 +2013,7 @@ static bool __grp_src_toin_excl(struct net_bridge_port_group *pg, memset(&src_ip, 0, sizeof(src_ip)); src_ip.proto = pg->key.addr.proto; for (src_idx = 0; src_idx < nsrcs; src_idx++) { - memcpy(&src_ip.src, srcs, src_size); + memcpy(&src_ip.src, srcs + (src_idx * addr_size), addr_size); ent = br_multicast_find_group_src(pg, &src_ip); if (ent) { if (timer_pending(&ent->timer)) { @@ -2003,9 +2027,11 @@ static bool __grp_src_toin_excl(struct net_bridge_port_group *pg, } if (ent) __grp_src_mod_timer(ent, now + br_multicast_gmi(br)); - srcs += src_size; } + if (br_multicast_eht_handle(pg, h_addr, srcs, nsrcs, addr_size, grec_type)) + changed = true; + if (to_send) __grp_src_query_marked_and_rexmit(pg); @@ -2014,20 +2040,32 @@ static bool __grp_src_toin_excl(struct net_bridge_port_group *pg, return changed; } -static bool br_multicast_toin(struct net_bridge_port_group *pg, - void *srcs, u32 nsrcs, size_t src_size) +static bool br_multicast_toin(struct net_bridge_port_group *pg, void *h_addr, + void *srcs, u32 nsrcs, size_t addr_size, + int grec_type) { bool changed = false; switch (pg->filter_mode) { case MCAST_INCLUDE: - changed = __grp_src_toin_incl(pg, srcs, nsrcs, src_size); + changed = __grp_src_toin_incl(pg, h_addr, srcs, nsrcs, addr_size, + grec_type); break; case MCAST_EXCLUDE: - changed = __grp_src_toin_excl(pg, srcs, nsrcs, src_size); + changed = __grp_src_toin_excl(pg, h_addr, srcs, nsrcs, addr_size, + grec_type); break; } + if (br_multicast_eht_should_del_pg(pg)) { + pg->flags |= MDB_PG_FLAGS_FAST_LEAVE; + br_multicast_find_del_pg(pg->key.port->br, pg); + /* a notification has already been sent and we shouldn't + * access pg after the delete so we have to return false + */ + changed = false; + } + return changed; } @@ -2037,8 +2075,9 @@ static bool br_multicast_toin(struct net_bridge_port_group *pg, * Send Q(G,A*B) * Group Timer=GMI */ -static void __grp_src_toex_incl(struct net_bridge_port_group *pg, - void *srcs, u32 nsrcs, size_t src_size) +static void __grp_src_toex_incl(struct net_bridge_port_group *pg, void *h_addr, + void *srcs, u32 nsrcs, size_t addr_size, + int grec_type) { struct net_bridge_group_src *ent; u32 src_idx, to_send = 0; @@ -2050,7 +2089,7 @@ static void __grp_src_toex_incl(struct net_bridge_port_group *pg, memset(&src_ip, 0, sizeof(src_ip)); src_ip.proto = pg->key.addr.proto; for (src_idx = 0; src_idx < nsrcs; src_idx++) { - memcpy(&src_ip.src, srcs, src_size); + memcpy(&src_ip.src, srcs + (src_idx * addr_size), addr_size); ent = br_multicast_find_group_src(pg, &src_ip); if (ent) { ent->flags = (ent->flags & ~BR_SGRP_F_DELETE) | @@ -2061,9 +2100,10 @@ static void __grp_src_toex_incl(struct net_bridge_port_group *pg, } if (ent) br_multicast_fwd_src_handle(ent); - srcs += src_size; } + br_multicast_eht_handle(pg, h_addr, srcs, nsrcs, addr_size, grec_type); + __grp_src_delete_marked(pg); if (to_send) __grp_src_query_marked_and_rexmit(pg); @@ -2076,8 +2116,9 @@ static void __grp_src_toex_incl(struct net_bridge_port_group *pg, * Send Q(G,A-Y) * Group Timer=GMI */ -static bool __grp_src_toex_excl(struct net_bridge_port_group *pg, - void *srcs, u32 nsrcs, size_t src_size) +static bool __grp_src_toex_excl(struct net_bridge_port_group *pg, void *h_addr, + void *srcs, u32 nsrcs, size_t addr_size, + int grec_type) { struct net_bridge_group_src *ent; u32 src_idx, to_send = 0; @@ -2090,7 +2131,7 @@ static bool __grp_src_toex_excl(struct net_bridge_port_group *pg, memset(&src_ip, 0, sizeof(src_ip)); src_ip.proto = pg->key.addr.proto; for (src_idx = 0; src_idx < nsrcs; src_idx++) { - memcpy(&src_ip.src, srcs, src_size); + memcpy(&src_ip.src, srcs + (src_idx * addr_size), addr_size); ent = br_multicast_find_group_src(pg, &src_ip); if (ent) { ent->flags &= ~BR_SGRP_F_DELETE; @@ -2105,9 +2146,11 @@ static bool __grp_src_toex_excl(struct net_bridge_port_group *pg, ent->flags |= BR_SGRP_F_SEND; to_send++; } - srcs += src_size; } + if (br_multicast_eht_handle(pg, h_addr, srcs, nsrcs, addr_size, grec_type)) + changed = true; + if (__grp_src_delete_marked(pg)) changed = true; if (to_send) @@ -2116,20 +2159,23 @@ static bool __grp_src_toex_excl(struct net_bridge_port_group *pg, return changed; } -static bool br_multicast_toex(struct net_bridge_port_group *pg, - void *srcs, u32 nsrcs, size_t src_size) +static bool br_multicast_toex(struct net_bridge_port_group *pg, void *h_addr, + void *srcs, u32 nsrcs, size_t addr_size, + int grec_type) { struct net_bridge *br = pg->key.port->br; bool changed = false; switch (pg->filter_mode) { case MCAST_INCLUDE: - __grp_src_toex_incl(pg, srcs, nsrcs, src_size); + __grp_src_toex_incl(pg, h_addr, srcs, nsrcs, addr_size, + grec_type); br_multicast_star_g_handle_mode(pg, MCAST_EXCLUDE); changed = true; break; case MCAST_EXCLUDE: - changed = __grp_src_toex_excl(pg, srcs, nsrcs, src_size); + changed = __grp_src_toex_excl(pg, h_addr, srcs, nsrcs, addr_size, + grec_type); break; } @@ -2142,11 +2188,12 @@ static bool br_multicast_toex(struct net_bridge_port_group *pg, /* State Msg type New state Actions * INCLUDE (A) BLOCK (B) INCLUDE (A) Send Q(G,A*B) */ -static void __grp_src_block_incl(struct net_bridge_port_group *pg, - void *srcs, u32 nsrcs, size_t src_size) +static bool __grp_src_block_incl(struct net_bridge_port_group *pg, void *h_addr, + void *srcs, u32 nsrcs, size_t addr_size, int grec_type) { struct net_bridge_group_src *ent; u32 src_idx, to_send = 0; + bool changed = false; struct br_ip src_ip; hlist_for_each_entry(ent, &pg->src_list, node) @@ -2155,28 +2202,29 @@ static void __grp_src_block_incl(struct net_bridge_port_group *pg, memset(&src_ip, 0, sizeof(src_ip)); src_ip.proto = pg->key.addr.proto; for (src_idx = 0; src_idx < nsrcs; src_idx++) { - memcpy(&src_ip.src, srcs, src_size); + memcpy(&src_ip.src, srcs + (src_idx * addr_size), addr_size); ent = br_multicast_find_group_src(pg, &src_ip); if (ent) { ent->flags |= BR_SGRP_F_SEND; to_send++; } - srcs += src_size; } + if (br_multicast_eht_handle(pg, h_addr, srcs, nsrcs, addr_size, grec_type)) + changed = true; + if (to_send) __grp_src_query_marked_and_rexmit(pg); - if (pg->filter_mode == MCAST_INCLUDE && hlist_empty(&pg->src_list)) - br_multicast_find_del_pg(pg->key.port->br, pg); + return changed; } /* State Msg type New state Actions * EXCLUDE (X,Y) BLOCK (A) EXCLUDE (X+(A-Y),Y) (A-X-Y)=Group Timer * Send Q(G,A-Y) */ -static bool __grp_src_block_excl(struct net_bridge_port_group *pg, - void *srcs, u32 nsrcs, size_t src_size) +static bool __grp_src_block_excl(struct net_bridge_port_group *pg, void *h_addr, + void *srcs, u32 nsrcs, size_t addr_size, int grec_type) { struct net_bridge_group_src *ent; u32 src_idx, to_send = 0; @@ -2189,7 +2237,7 @@ static bool __grp_src_block_excl(struct net_bridge_port_group *pg, memset(&src_ip, 0, sizeof(src_ip)); src_ip.proto = pg->key.addr.proto; for (src_idx = 0; src_idx < nsrcs; src_idx++) { - memcpy(&src_ip.src, srcs, src_size); + memcpy(&src_ip.src, srcs + (src_idx * addr_size), addr_size); ent = br_multicast_find_group_src(pg, &src_ip); if (!ent) { ent = br_multicast_new_group_src(pg, &src_ip); @@ -2202,29 +2250,44 @@ static bool __grp_src_block_excl(struct net_bridge_port_group *pg, ent->flags |= BR_SGRP_F_SEND; to_send++; } - srcs += src_size; } + if (br_multicast_eht_handle(pg, h_addr, srcs, nsrcs, addr_size, grec_type)) + changed = true; + if (to_send) __grp_src_query_marked_and_rexmit(pg); return changed; } -static bool br_multicast_block(struct net_bridge_port_group *pg, - void *srcs, u32 nsrcs, size_t src_size) +static bool br_multicast_block(struct net_bridge_port_group *pg, void *h_addr, + void *srcs, u32 nsrcs, size_t addr_size, int grec_type) { bool changed = false; switch (pg->filter_mode) { case MCAST_INCLUDE: - __grp_src_block_incl(pg, srcs, nsrcs, src_size); + changed = __grp_src_block_incl(pg, h_addr, srcs, nsrcs, addr_size, + grec_type); break; case MCAST_EXCLUDE: - changed = __grp_src_block_excl(pg, srcs, nsrcs, src_size); + changed = __grp_src_block_excl(pg, h_addr, srcs, nsrcs, addr_size, + grec_type); break; } + if ((pg->filter_mode == MCAST_INCLUDE && hlist_empty(&pg->src_list)) || + br_multicast_eht_should_del_pg(pg)) { + if (br_multicast_eht_should_del_pg(pg)) + pg->flags |= MDB_PG_FLAGS_FAST_LEAVE; + br_multicast_find_del_pg(pg->key.port->br, pg); + /* a notification has already been sent and we shouldn't + * access pg after the delete so we have to return false + */ + changed = false; + } + return changed; } @@ -2257,8 +2320,8 @@ static int br_ip4_multicast_igmp3_report(struct net_bridge *br, struct igmpv3_report *ih; struct igmpv3_grec *grec; int i, len, num, type; + __be32 group, *h_addr; bool changed = false; - __be32 group; int err = 0; u16 nsrcs; @@ -2318,32 +2381,33 @@ static int br_ip4_multicast_igmp3_report(struct net_bridge *br, pg = br_multicast_find_port(mdst, port, src); if (!pg || (pg->flags & MDB_PG_FLAGS_PERMANENT)) goto unlock_continue; - /* reload grec */ + /* reload grec and host addr */ grec = (void *)(skb->data + len - sizeof(*grec) - (nsrcs * 4)); + h_addr = &ip_hdr(skb)->saddr; switch (type) { case IGMPV3_ALLOW_NEW_SOURCES: - changed = br_multicast_isinc_allow(pg, grec->grec_src, - nsrcs, sizeof(__be32)); + changed = br_multicast_isinc_allow(pg, h_addr, grec->grec_src, + nsrcs, sizeof(__be32), type); break; case IGMPV3_MODE_IS_INCLUDE: - changed = br_multicast_isinc_allow(pg, grec->grec_src, nsrcs, - sizeof(__be32)); + changed = br_multicast_isinc_allow(pg, h_addr, grec->grec_src, + nsrcs, sizeof(__be32), type); break; case IGMPV3_MODE_IS_EXCLUDE: - changed = br_multicast_isexc(pg, grec->grec_src, nsrcs, - sizeof(__be32)); + changed = br_multicast_isexc(pg, h_addr, grec->grec_src, + nsrcs, sizeof(__be32), type); break; case IGMPV3_CHANGE_TO_INCLUDE: - changed = br_multicast_toin(pg, grec->grec_src, nsrcs, - sizeof(__be32)); + changed = br_multicast_toin(pg, h_addr, grec->grec_src, + nsrcs, sizeof(__be32), type); break; case IGMPV3_CHANGE_TO_EXCLUDE: - changed = br_multicast_toex(pg, grec->grec_src, nsrcs, - sizeof(__be32)); + changed = br_multicast_toex(pg, h_addr, grec->grec_src, + nsrcs, sizeof(__be32), type); break; case IGMPV3_BLOCK_OLD_SOURCES: - changed = br_multicast_block(pg, grec->grec_src, nsrcs, - sizeof(__be32)); + changed = br_multicast_block(pg, h_addr, grec->grec_src, + nsrcs, sizeof(__be32), type); break; } if (changed) @@ -2367,6 +2431,7 @@ static int br_ip6_multicast_mld2_report(struct net_bridge *br, unsigned int nsrcs_offset; const unsigned char *src; struct icmp6hdr *icmp6h; + struct in6_addr *h_addr; struct mld2_grec *grec; unsigned int grec_len; bool changed = false; @@ -2445,31 +2510,43 @@ static int br_ip6_multicast_mld2_report(struct net_bridge *br, pg = br_multicast_find_port(mdst, port, src); if (!pg || (pg->flags & MDB_PG_FLAGS_PERMANENT)) goto unlock_continue; + h_addr = &ipv6_hdr(skb)->saddr; switch (grec->grec_type) { case MLD2_ALLOW_NEW_SOURCES: - changed = br_multicast_isinc_allow(pg, grec->grec_src, - nsrcs, - sizeof(struct in6_addr)); + changed = br_multicast_isinc_allow(pg, h_addr, + grec->grec_src, nsrcs, + sizeof(struct in6_addr), + grec->grec_type); break; case MLD2_MODE_IS_INCLUDE: - changed = br_multicast_isinc_allow(pg, grec->grec_src, nsrcs, - sizeof(struct in6_addr)); + changed = br_multicast_isinc_allow(pg, h_addr, + grec->grec_src, nsrcs, + sizeof(struct in6_addr), + grec->grec_type); break; case MLD2_MODE_IS_EXCLUDE: - changed = br_multicast_isexc(pg, grec->grec_src, nsrcs, - sizeof(struct in6_addr)); + changed = br_multicast_isexc(pg, h_addr, + grec->grec_src, nsrcs, + sizeof(struct in6_addr), + grec->grec_type); break; case MLD2_CHANGE_TO_INCLUDE: - changed = br_multicast_toin(pg, grec->grec_src, nsrcs, - sizeof(struct in6_addr)); + changed = br_multicast_toin(pg, h_addr, + grec->grec_src, nsrcs, + sizeof(struct in6_addr), + grec->grec_type); break; case MLD2_CHANGE_TO_EXCLUDE: - changed = br_multicast_toex(pg, grec->grec_src, nsrcs, - sizeof(struct in6_addr)); + changed = br_multicast_toex(pg, h_addr, + grec->grec_src, nsrcs, + sizeof(struct in6_addr), + grec->grec_type); break; case MLD2_BLOCK_OLD_SOURCES: - changed = br_multicast_block(pg, grec->grec_src, nsrcs, - sizeof(struct in6_addr)); + changed = br_multicast_block(pg, h_addr, + grec->grec_src, nsrcs, + sizeof(struct in6_addr), + grec->grec_type); break; } if (changed) diff --git a/net/bridge/br_multicast_eht.c b/net/bridge/br_multicast_eht.c new file mode 100644 index 000000000000..fea38b9a7268 --- /dev/null +++ b/net/bridge/br_multicast_eht.c @@ -0,0 +1,878 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +// Copyright (c) 2020, Nikolay Aleksandrov <nikolay@nvidia.com> +#include <linux/err.h> +#include <linux/export.h> +#include <linux/if_ether.h> +#include <linux/igmp.h> +#include <linux/in.h> +#include <linux/jhash.h> +#include <linux/kernel.h> +#include <linux/log2.h> +#include <linux/netdevice.h> +#include <linux/netfilter_bridge.h> +#include <linux/random.h> +#include <linux/rculist.h> +#include <linux/skbuff.h> +#include <linux/slab.h> +#include <linux/timer.h> +#include <linux/inetdevice.h> +#include <linux/mroute.h> +#include <net/ip.h> +#include <net/switchdev.h> +#if IS_ENABLED(CONFIG_IPV6) +#include <linux/icmpv6.h> +#include <net/ipv6.h> +#include <net/mld.h> +#include <net/ip6_checksum.h> +#include <net/addrconf.h> +#endif + +#include "br_private.h" +#include "br_private_mcast_eht.h" + +static bool br_multicast_del_eht_set_entry(struct net_bridge_port_group *pg, + union net_bridge_eht_addr *src_addr, + union net_bridge_eht_addr *h_addr); +static void br_multicast_create_eht_set_entry(struct net_bridge_port_group *pg, + union net_bridge_eht_addr *src_addr, + union net_bridge_eht_addr *h_addr, + int filter_mode, + bool allow_zero_src); + +static struct net_bridge_group_eht_host * +br_multicast_eht_host_lookup(struct net_bridge_port_group *pg, + union net_bridge_eht_addr *h_addr) +{ + struct rb_node *node = pg->eht_host_tree.rb_node; + + while (node) { + struct net_bridge_group_eht_host *this; + int result; + + this = rb_entry(node, struct net_bridge_group_eht_host, + rb_node); + result = memcmp(h_addr, &this->h_addr, sizeof(*h_addr)); + if (result < 0) + node = node->rb_left; + else if (result > 0) + node = node->rb_right; + else + return this; + } + + return NULL; +} + +static int br_multicast_eht_host_filter_mode(struct net_bridge_port_group *pg, + union net_bridge_eht_addr *h_addr) +{ + struct net_bridge_group_eht_host *eht_host; + + eht_host = br_multicast_eht_host_lookup(pg, h_addr); + if (!eht_host) + return MCAST_INCLUDE; + + return eht_host->filter_mode; +} + +static struct net_bridge_group_eht_set_entry * +br_multicast_eht_set_entry_lookup(struct net_bridge_group_eht_set *eht_set, + union net_bridge_eht_addr *h_addr) +{ + struct rb_node *node = eht_set->entry_tree.rb_node; + + while (node) { + struct net_bridge_group_eht_set_entry *this; + int result; + + this = rb_entry(node, struct net_bridge_group_eht_set_entry, + rb_node); + result = memcmp(h_addr, &this->h_addr, sizeof(*h_addr)); + if (result < 0) + node = node->rb_left; + else if (result > 0) + node = node->rb_right; + else + return this; + } + + return NULL; +} + +static struct net_bridge_group_eht_set * +br_multicast_eht_set_lookup(struct net_bridge_port_group *pg, + union net_bridge_eht_addr *src_addr) +{ + struct rb_node *node = pg->eht_set_tree.rb_node; + + while (node) { + struct net_bridge_group_eht_set *this; + int result; + + this = rb_entry(node, struct net_bridge_group_eht_set, + rb_node); + result = memcmp(src_addr, &this->src_addr, sizeof(*src_addr)); + if (result < 0) + node = node->rb_left; + else if (result > 0) + node = node->rb_right; + else + return this; + } + + return NULL; +} + +static void __eht_destroy_host(struct net_bridge_group_eht_host *eht_host) +{ + WARN_ON(!hlist_empty(&eht_host->set_entries)); + + br_multicast_eht_hosts_dec(eht_host->pg); + + rb_erase(&eht_host->rb_node, &eht_host->pg->eht_host_tree); + RB_CLEAR_NODE(&eht_host->rb_node); + kfree(eht_host); +} + +static void br_multicast_destroy_eht_set_entry(struct net_bridge_mcast_gc *gc) +{ + struct net_bridge_group_eht_set_entry *set_h; + + set_h = container_of(gc, struct net_bridge_group_eht_set_entry, mcast_gc); + WARN_ON(!RB_EMPTY_NODE(&set_h->rb_node)); + + del_timer_sync(&set_h->timer); + kfree(set_h); +} + +static void br_multicast_destroy_eht_set(struct net_bridge_mcast_gc *gc) +{ + struct net_bridge_group_eht_set *eht_set; + + eht_set = container_of(gc, struct net_bridge_group_eht_set, mcast_gc); + WARN_ON(!RB_EMPTY_NODE(&eht_set->rb_node)); + WARN_ON(!RB_EMPTY_ROOT(&eht_set->entry_tree)); + + del_timer_sync(&eht_set->timer); + kfree(eht_set); +} + +static void __eht_del_set_entry(struct net_bridge_group_eht_set_entry *set_h) +{ + struct net_bridge_group_eht_host *eht_host = set_h->h_parent; + union net_bridge_eht_addr zero_addr; + + rb_erase(&set_h->rb_node, &set_h->eht_set->entry_tree); + RB_CLEAR_NODE(&set_h->rb_node); + hlist_del_init(&set_h->host_list); + memset(&zero_addr, 0, sizeof(zero_addr)); + if (memcmp(&set_h->h_addr, &zero_addr, sizeof(zero_addr))) + eht_host->num_entries--; + hlist_add_head(&set_h->mcast_gc.gc_node, &set_h->br->mcast_gc_list); + queue_work(system_long_wq, &set_h->br->mcast_gc_work); + + if (hlist_empty(&eht_host->set_entries)) + __eht_destroy_host(eht_host); +} + +static void br_multicast_del_eht_set(struct net_bridge_group_eht_set *eht_set) +{ + struct net_bridge_group_eht_set_entry *set_h; + struct rb_node *node; + + while ((node = rb_first(&eht_set->entry_tree))) { + set_h = rb_entry(node, struct net_bridge_group_eht_set_entry, + rb_node); + __eht_del_set_entry(set_h); + } + + rb_erase(&eht_set->rb_node, &eht_set->pg->eht_set_tree); + RB_CLEAR_NODE(&eht_set->rb_node); + hlist_add_head(&eht_set->mcast_gc.gc_node, &eht_set->br->mcast_gc_list); + queue_work(system_long_wq, &eht_set->br->mcast_gc_work); +} + +void br_multicast_eht_clean_sets(struct net_bridge_port_group *pg) +{ + struct net_bridge_group_eht_set *eht_set; + struct rb_node *node; + + while ((node = rb_first(&pg->eht_set_tree))) { + eht_set = rb_entry(node, struct net_bridge_group_eht_set, + rb_node); + br_multicast_del_eht_set(eht_set); + } +} + +static void br_multicast_eht_set_entry_expired(struct timer_list *t) +{ + struct net_bridge_group_eht_set_entry *set_h = from_timer(set_h, t, timer); + struct net_bridge *br = set_h->br; + + spin_lock(&br->multicast_lock); + if (RB_EMPTY_NODE(&set_h->rb_node) || timer_pending(&set_h->timer)) + goto out; + + br_multicast_del_eht_set_entry(set_h->eht_set->pg, + &set_h->eht_set->src_addr, + &set_h->h_addr); +out: + spin_unlock(&br->multicast_lock); +} + +static void br_multicast_eht_set_expired(struct timer_list *t) +{ + struct net_bridge_group_eht_set *eht_set = from_timer(eht_set, t, + timer); + struct net_bridge *br = eht_set->br; + + spin_lock(&br->multicast_lock); + if (RB_EMPTY_NODE(&eht_set->rb_node) || timer_pending(&eht_set->timer)) + goto out; + + br_multicast_del_eht_set(eht_set); +out: + spin_unlock(&br->multicast_lock); +} + +static struct net_bridge_group_eht_host * +__eht_lookup_create_host(struct net_bridge_port_group *pg, + union net_bridge_eht_addr *h_addr, + unsigned char filter_mode) +{ + struct rb_node **link = &pg->eht_host_tree.rb_node, *parent = NULL; + struct net_bridge_group_eht_host *eht_host; + + while (*link) { + struct net_bridge_group_eht_host *this; + int result; + + this = rb_entry(*link, struct net_bridge_group_eht_host, + rb_node); + result = memcmp(h_addr, &this->h_addr, sizeof(*h_addr)); + parent = *link; + if (result < 0) + link = &((*link)->rb_left); + else if (result > 0) + link = &((*link)->rb_right); + else + return this; + } + + if (br_multicast_eht_hosts_over_limit(pg)) + return NULL; + + eht_host = kzalloc(sizeof(*eht_host), GFP_ATOMIC); + if (!eht_host) + return NULL; + + memcpy(&eht_host->h_addr, h_addr, sizeof(*h_addr)); + INIT_HLIST_HEAD(&eht_host->set_entries); + eht_host->pg = pg; + eht_host->filter_mode = filter_mode; + + rb_link_node(&eht_host->rb_node, parent, link); + rb_insert_color(&eht_host->rb_node, &pg->eht_host_tree); + + br_multicast_eht_hosts_inc(pg); + + return eht_host; +} + +static struct net_bridge_group_eht_set_entry * +__eht_lookup_create_set_entry(struct net_bridge *br, + struct net_bridge_group_eht_set *eht_set, + struct net_bridge_group_eht_host *eht_host, + bool allow_zero_src) +{ + struct rb_node **link = &eht_set->entry_tree.rb_node, *parent = NULL; + struct net_bridge_group_eht_set_entry *set_h; + + while (*link) { + struct net_bridge_group_eht_set_entry *this; + int result; + + this = rb_entry(*link, struct net_bridge_group_eht_set_entry, + rb_node); + result = memcmp(&eht_host->h_addr, &this->h_addr, + sizeof(union net_bridge_eht_addr)); + parent = *link; + if (result < 0) + link = &((*link)->rb_left); + else if (result > 0) + link = &((*link)->rb_right); + else + return this; + } + + /* always allow auto-created zero entry */ + if (!allow_zero_src && eht_host->num_entries >= PG_SRC_ENT_LIMIT) + return NULL; + + set_h = kzalloc(sizeof(*set_h), GFP_ATOMIC); + if (!set_h) + return NULL; + + memcpy(&set_h->h_addr, &eht_host->h_addr, + sizeof(union net_bridge_eht_addr)); + set_h->mcast_gc.destroy = br_multicast_destroy_eht_set_entry; + set_h->eht_set = eht_set; + set_h->h_parent = eht_host; + set_h->br = br; + timer_setup(&set_h->timer, br_multicast_eht_set_entry_expired, 0); + + hlist_add_head(&set_h->host_list, &eht_host->set_entries); + rb_link_node(&set_h->rb_node, parent, link); + rb_insert_color(&set_h->rb_node, &eht_set->entry_tree); + /* we must not count the auto-created zero entry otherwise we won't be + * able to track the full list of PG_SRC_ENT_LIMIT entries + */ + if (!allow_zero_src) + eht_host->num_entries++; + + return set_h; +} + +static struct net_bridge_group_eht_set * +__eht_lookup_create_set(struct net_bridge_port_group *pg, + union net_bridge_eht_addr *src_addr) +{ + struct rb_node **link = &pg->eht_set_tree.rb_node, *parent = NULL; + struct net_bridge_group_eht_set *eht_set; + + while (*link) { + struct net_bridge_group_eht_set *this; + int result; + + this = rb_entry(*link, struct net_bridge_group_eht_set, + rb_node); + result = memcmp(src_addr, &this->src_addr, sizeof(*src_addr)); + parent = *link; + if (result < 0) + link = &((*link)->rb_left); + else if (result > 0) + link = &((*link)->rb_right); + else + return this; + } + + eht_set = kzalloc(sizeof(*eht_set), GFP_ATOMIC); + if (!eht_set) + return NULL; + + memcpy(&eht_set->src_addr, src_addr, sizeof(*src_addr)); + eht_set->mcast_gc.destroy = br_multicast_destroy_eht_set; + eht_set->pg = pg; + eht_set->br = pg->key.port->br; + eht_set->entry_tree = RB_ROOT; + timer_setup(&eht_set->timer, br_multicast_eht_set_expired, 0); + + rb_link_node(&eht_set->rb_node, parent, link); + rb_insert_color(&eht_set->rb_node, &pg->eht_set_tree); + + return eht_set; +} + +static void br_multicast_ip_src_to_eht_addr(const struct br_ip *src, + union net_bridge_eht_addr *dest) +{ + switch (src->proto) { + case htons(ETH_P_IP): + dest->ip4 = src->src.ip4; + break; +#if IS_ENABLED(CONFIG_IPV6) + case htons(ETH_P_IPV6): + memcpy(&dest->ip6, &src->src.ip6, sizeof(struct in6_addr)); + break; +#endif + } +} + +static void br_eht_convert_host_filter_mode(struct net_bridge_port_group *pg, + union net_bridge_eht_addr *h_addr, + int filter_mode) +{ + struct net_bridge_group_eht_host *eht_host; + union net_bridge_eht_addr zero_addr; + + eht_host = br_multicast_eht_host_lookup(pg, h_addr); + if (eht_host) + eht_host->filter_mode = filter_mode; + + memset(&zero_addr, 0, sizeof(zero_addr)); + switch (filter_mode) { + case MCAST_INCLUDE: + br_multicast_del_eht_set_entry(pg, &zero_addr, h_addr); + break; + case MCAST_EXCLUDE: + br_multicast_create_eht_set_entry(pg, &zero_addr, h_addr, + MCAST_EXCLUDE, + true); + break; + } +} + +static void br_multicast_create_eht_set_entry(struct net_bridge_port_group *pg, + union net_bridge_eht_addr *src_addr, + union net_bridge_eht_addr *h_addr, + int filter_mode, + bool allow_zero_src) +{ + struct net_bridge_group_eht_set_entry *set_h; + struct net_bridge_group_eht_host *eht_host; + struct net_bridge *br = pg->key.port->br; + struct net_bridge_group_eht_set *eht_set; + union net_bridge_eht_addr zero_addr; + + memset(&zero_addr, 0, sizeof(zero_addr)); + if (!allow_zero_src && !memcmp(src_addr, &zero_addr, sizeof(zero_addr))) + return; + + eht_set = __eht_lookup_create_set(pg, src_addr); + if (!eht_set) + return; + + eht_host = __eht_lookup_create_host(pg, h_addr, filter_mode); + if (!eht_host) + goto fail_host; + + set_h = __eht_lookup_create_set_entry(br, eht_set, eht_host, + allow_zero_src); + if (!set_h) + goto fail_set_entry; + + mod_timer(&set_h->timer, jiffies + br_multicast_gmi(br)); + mod_timer(&eht_set->timer, jiffies + br_multicast_gmi(br)); + + return; + +fail_set_entry: + if (hlist_empty(&eht_host->set_entries)) + __eht_destroy_host(eht_host); +fail_host: + if (RB_EMPTY_ROOT(&eht_set->entry_tree)) + br_multicast_del_eht_set(eht_set); +} + +static bool br_multicast_del_eht_set_entry(struct net_bridge_port_group *pg, + union net_bridge_eht_addr *src_addr, + union net_bridge_eht_addr *h_addr) +{ + struct net_bridge_group_eht_set_entry *set_h; + struct net_bridge_group_eht_set *eht_set; + bool set_deleted = false; + + eht_set = br_multicast_eht_set_lookup(pg, src_addr); + if (!eht_set) + goto out; + + set_h = br_multicast_eht_set_entry_lookup(eht_set, h_addr); + if (!set_h) + goto out; + + __eht_del_set_entry(set_h); + + if (RB_EMPTY_ROOT(&eht_set->entry_tree)) { + br_multicast_del_eht_set(eht_set); + set_deleted = true; + } + +out: + return set_deleted; +} + +static void br_multicast_del_eht_host(struct net_bridge_port_group *pg, + union net_bridge_eht_addr *h_addr) +{ + struct net_bridge_group_eht_set_entry *set_h; + struct net_bridge_group_eht_host *eht_host; + struct hlist_node *tmp; + + eht_host = br_multicast_eht_host_lookup(pg, h_addr); + if (!eht_host) + return; + + hlist_for_each_entry_safe(set_h, tmp, &eht_host->set_entries, host_list) + br_multicast_del_eht_set_entry(set_h->eht_set->pg, + &set_h->eht_set->src_addr, + &set_h->h_addr); +} + +static void __eht_allow_incl(struct net_bridge_port_group *pg, + union net_bridge_eht_addr *h_addr, + void *srcs, + u32 nsrcs, + size_t addr_size) +{ + union net_bridge_eht_addr eht_src_addr; + u32 src_idx; + + memset(&eht_src_addr, 0, sizeof(eht_src_addr)); + for (src_idx = 0; src_idx < nsrcs; src_idx++) { + memcpy(&eht_src_addr, srcs + (src_idx * addr_size), addr_size); + br_multicast_create_eht_set_entry(pg, &eht_src_addr, h_addr, + MCAST_INCLUDE, + false); + } +} + +static bool __eht_allow_excl(struct net_bridge_port_group *pg, + union net_bridge_eht_addr *h_addr, + void *srcs, + u32 nsrcs, + size_t addr_size) +{ + bool changed = false, host_excl = false; + union net_bridge_eht_addr eht_src_addr; + struct net_bridge_group_src *src_ent; + struct br_ip src_ip; + u32 src_idx; + + host_excl = !!(br_multicast_eht_host_filter_mode(pg, h_addr) == MCAST_EXCLUDE); + memset(&eht_src_addr, 0, sizeof(eht_src_addr)); + for (src_idx = 0; src_idx < nsrcs; src_idx++) { + memcpy(&eht_src_addr, srcs + (src_idx * addr_size), addr_size); + if (!host_excl) { + br_multicast_create_eht_set_entry(pg, &eht_src_addr, h_addr, + MCAST_INCLUDE, + false); + } else { + if (!br_multicast_del_eht_set_entry(pg, &eht_src_addr, + h_addr)) + continue; + memcpy(&src_ip, srcs + (src_idx * addr_size), addr_size); + src_ent = br_multicast_find_group_src(pg, &src_ip); + if (!src_ent) + continue; + br_multicast_del_group_src(src_ent, true); + changed = true; + } + } + + return changed; +} + +static bool br_multicast_eht_allow(struct net_bridge_port_group *pg, + union net_bridge_eht_addr *h_addr, + void *srcs, + u32 nsrcs, + size_t addr_size) +{ + bool changed = false; + + switch (br_multicast_eht_host_filter_mode(pg, h_addr)) { + case MCAST_INCLUDE: + __eht_allow_incl(pg, h_addr, srcs, nsrcs, addr_size); + break; + case MCAST_EXCLUDE: + changed = __eht_allow_excl(pg, h_addr, srcs, nsrcs, addr_size); + break; + } + + return changed; +} + +static bool __eht_block_incl(struct net_bridge_port_group *pg, + union net_bridge_eht_addr *h_addr, + void *srcs, + u32 nsrcs, + size_t addr_size) +{ + union net_bridge_eht_addr eht_src_addr; + struct net_bridge_group_src *src_ent; + bool changed = false; + struct br_ip src_ip; + u32 src_idx; + + memset(&eht_src_addr, 0, sizeof(eht_src_addr)); + memset(&src_ip, 0, sizeof(src_ip)); + src_ip.proto = pg->key.addr.proto; + for (src_idx = 0; src_idx < nsrcs; src_idx++) { + memcpy(&eht_src_addr, srcs + (src_idx * addr_size), addr_size); + if (!br_multicast_del_eht_set_entry(pg, &eht_src_addr, h_addr)) + continue; + memcpy(&src_ip, srcs + (src_idx * addr_size), addr_size); + src_ent = br_multicast_find_group_src(pg, &src_ip); + if (!src_ent) + continue; + br_multicast_del_group_src(src_ent, true); + changed = true; + } + + return changed; +} + +static bool __eht_block_excl(struct net_bridge_port_group *pg, + union net_bridge_eht_addr *h_addr, + void *srcs, + u32 nsrcs, + size_t addr_size) +{ + bool changed = false, host_excl = false; + union net_bridge_eht_addr eht_src_addr; + struct net_bridge_group_src *src_ent; + struct br_ip src_ip; + u32 src_idx; + + host_excl = !!(br_multicast_eht_host_filter_mode(pg, h_addr) == MCAST_EXCLUDE); + memset(&eht_src_addr, 0, sizeof(eht_src_addr)); + memset(&src_ip, 0, sizeof(src_ip)); + src_ip.proto = pg->key.addr.proto; + for (src_idx = 0; src_idx < nsrcs; src_idx++) { + memcpy(&eht_src_addr, srcs + (src_idx * addr_size), addr_size); + if (host_excl) { + br_multicast_create_eht_set_entry(pg, &eht_src_addr, h_addr, + MCAST_EXCLUDE, + false); + } else { + if (!br_multicast_del_eht_set_entry(pg, &eht_src_addr, + h_addr)) + continue; + memcpy(&src_ip, srcs + (src_idx * addr_size), addr_size); + src_ent = br_multicast_find_group_src(pg, &src_ip); + if (!src_ent) + continue; + br_multicast_del_group_src(src_ent, true); + changed = true; + } + } + + return changed; +} + +static bool br_multicast_eht_block(struct net_bridge_port_group *pg, + union net_bridge_eht_addr *h_addr, + void *srcs, + u32 nsrcs, + size_t addr_size) +{ + bool changed = false; + + switch (br_multicast_eht_host_filter_mode(pg, h_addr)) { + case MCAST_INCLUDE: + changed = __eht_block_incl(pg, h_addr, srcs, nsrcs, addr_size); + break; + case MCAST_EXCLUDE: + changed = __eht_block_excl(pg, h_addr, srcs, nsrcs, addr_size); + break; + } + + return changed; +} + +/* flush_entries is true when changing mode */ +static bool __eht_inc_exc(struct net_bridge_port_group *pg, + union net_bridge_eht_addr *h_addr, + void *srcs, + u32 nsrcs, + size_t addr_size, + unsigned char filter_mode, + bool to_report) +{ + bool changed = false, flush_entries = to_report; + union net_bridge_eht_addr eht_src_addr; + u32 src_idx; + + if (br_multicast_eht_host_filter_mode(pg, h_addr) != filter_mode) + flush_entries = true; + + memset(&eht_src_addr, 0, sizeof(eht_src_addr)); + /* if we're changing mode del host and its entries */ + if (flush_entries) + br_multicast_del_eht_host(pg, h_addr); + for (src_idx = 0; src_idx < nsrcs; src_idx++) { + memcpy(&eht_src_addr, srcs + (src_idx * addr_size), addr_size); + br_multicast_create_eht_set_entry(pg, &eht_src_addr, h_addr, + filter_mode, false); + } + /* we can be missing sets only if we've deleted some entries */ + if (flush_entries) { + struct net_bridge *br = pg->key.port->br; + struct net_bridge_group_eht_set *eht_set; + struct net_bridge_group_src *src_ent; + struct hlist_node *tmp; + + hlist_for_each_entry_safe(src_ent, tmp, &pg->src_list, node) { + br_multicast_ip_src_to_eht_addr(&src_ent->addr, + &eht_src_addr); + if (!br_multicast_eht_set_lookup(pg, &eht_src_addr)) { + br_multicast_del_group_src(src_ent, true); + changed = true; + continue; + } + /* this is an optimization for TO_INCLUDE where we lower + * the set's timeout to LMQT to catch timeout hosts: + * - host A (timing out): set entries X, Y + * - host B: set entry Z (new from current TO_INCLUDE) + * sends BLOCK Z after LMQT but host A's EHT + * entries still exist (unless lowered to LMQT + * so they can timeout with the S,Gs) + * => we wait another LMQT, when we can just delete the + * group immediately + */ + if (!(src_ent->flags & BR_SGRP_F_SEND) || + filter_mode != MCAST_INCLUDE || + !to_report) + continue; + eht_set = br_multicast_eht_set_lookup(pg, + &eht_src_addr); + if (!eht_set) + continue; + mod_timer(&eht_set->timer, jiffies + br_multicast_lmqt(br)); + } + } + + return changed; +} + +static bool br_multicast_eht_inc(struct net_bridge_port_group *pg, + union net_bridge_eht_addr *h_addr, + void *srcs, + u32 nsrcs, + size_t addr_size, + bool to_report) +{ + bool changed; + + changed = __eht_inc_exc(pg, h_addr, srcs, nsrcs, addr_size, + MCAST_INCLUDE, to_report); + br_eht_convert_host_filter_mode(pg, h_addr, MCAST_INCLUDE); + + return changed; +} + +static bool br_multicast_eht_exc(struct net_bridge_port_group *pg, + union net_bridge_eht_addr *h_addr, + void *srcs, + u32 nsrcs, + size_t addr_size, + bool to_report) +{ + bool changed; + + changed = __eht_inc_exc(pg, h_addr, srcs, nsrcs, addr_size, + MCAST_EXCLUDE, to_report); + br_eht_convert_host_filter_mode(pg, h_addr, MCAST_EXCLUDE); + + return changed; +} + +static bool __eht_ip4_handle(struct net_bridge_port_group *pg, + union net_bridge_eht_addr *h_addr, + void *srcs, + u32 nsrcs, + int grec_type) +{ + bool changed = false, to_report = false; + + switch (grec_type) { + case IGMPV3_ALLOW_NEW_SOURCES: + br_multicast_eht_allow(pg, h_addr, srcs, nsrcs, sizeof(__be32)); + break; + case IGMPV3_BLOCK_OLD_SOURCES: + changed = br_multicast_eht_block(pg, h_addr, srcs, nsrcs, + sizeof(__be32)); + break; + case IGMPV3_CHANGE_TO_INCLUDE: + to_report = true; + fallthrough; + case IGMPV3_MODE_IS_INCLUDE: + changed = br_multicast_eht_inc(pg, h_addr, srcs, nsrcs, + sizeof(__be32), to_report); + break; + case IGMPV3_CHANGE_TO_EXCLUDE: + to_report = true; + fallthrough; + case IGMPV3_MODE_IS_EXCLUDE: + changed = br_multicast_eht_exc(pg, h_addr, srcs, nsrcs, + sizeof(__be32), to_report); + break; + } + + return changed; +} + +#if IS_ENABLED(CONFIG_IPV6) +static bool __eht_ip6_handle(struct net_bridge_port_group *pg, + union net_bridge_eht_addr *h_addr, + void *srcs, + u32 nsrcs, + int grec_type) +{ + bool changed = false, to_report = false; + + switch (grec_type) { + case MLD2_ALLOW_NEW_SOURCES: + br_multicast_eht_allow(pg, h_addr, srcs, nsrcs, + sizeof(struct in6_addr)); + break; + case MLD2_BLOCK_OLD_SOURCES: + changed = br_multicast_eht_block(pg, h_addr, srcs, nsrcs, + sizeof(struct in6_addr)); + break; + case MLD2_CHANGE_TO_INCLUDE: + to_report = true; + fallthrough; + case MLD2_MODE_IS_INCLUDE: + changed = br_multicast_eht_inc(pg, h_addr, srcs, nsrcs, + sizeof(struct in6_addr), + to_report); + break; + case MLD2_CHANGE_TO_EXCLUDE: + to_report = true; + fallthrough; + case MLD2_MODE_IS_EXCLUDE: + changed = br_multicast_eht_exc(pg, h_addr, srcs, nsrcs, + sizeof(struct in6_addr), + to_report); + break; + } + + return changed; +} +#endif + +/* true means an entry was deleted */ +bool br_multicast_eht_handle(struct net_bridge_port_group *pg, + void *h_addr, + void *srcs, + u32 nsrcs, + size_t addr_size, + int grec_type) +{ + bool eht_enabled = !!(pg->key.port->flags & BR_MULTICAST_FAST_LEAVE); + union net_bridge_eht_addr eht_host_addr; + bool changed = false; + + if (!eht_enabled) + goto out; + + memset(&eht_host_addr, 0, sizeof(eht_host_addr)); + memcpy(&eht_host_addr, h_addr, addr_size); + if (addr_size == sizeof(__be32)) + changed = __eht_ip4_handle(pg, &eht_host_addr, srcs, nsrcs, + grec_type); +#if IS_ENABLED(CONFIG_IPV6) + else + changed = __eht_ip6_handle(pg, &eht_host_addr, srcs, nsrcs, + grec_type); +#endif + +out: + return changed; +} + +int br_multicast_eht_set_hosts_limit(struct net_bridge_port *p, + u32 eht_hosts_limit) +{ + struct net_bridge *br = p->br; + + if (!eht_hosts_limit) + return -EINVAL; + + spin_lock_bh(&br->multicast_lock); + p->multicast_eht_hosts_limit = eht_hosts_limit; + spin_unlock_bh(&br->multicast_lock); + + return 0; +} diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c index 762f273802cd..bd3962da345a 100644 --- a/net/bridge/br_netlink.c +++ b/net/bridge/br_netlink.c @@ -18,6 +18,7 @@ #include "br_private_stp.h" #include "br_private_cfm.h" #include "br_private_tunnel.h" +#include "br_private_mcast_eht.h" static int __get_num_vlan_infos(struct net_bridge_vlan_group *vg, u32 filter_mask) @@ -199,6 +200,8 @@ static inline size_t br_port_info_size(void) + nla_total_size(sizeof(u16)) /* IFLA_BRPORT_GROUP_FWD_MASK */ + nla_total_size(sizeof(u8)) /* IFLA_BRPORT_MRP_RING_OPEN */ + nla_total_size(sizeof(u8)) /* IFLA_BRPORT_MRP_IN_OPEN */ + + nla_total_size(sizeof(u32)) /* IFLA_BRPORT_MCAST_EHT_HOSTS_LIMIT */ + + nla_total_size(sizeof(u32)) /* IFLA_BRPORT_MCAST_EHT_HOSTS_CNT */ + 0; } @@ -283,7 +286,11 @@ static int br_port_fill_attrs(struct sk_buff *skb, #ifdef CONFIG_BRIDGE_IGMP_SNOOPING if (nla_put_u8(skb, IFLA_BRPORT_MULTICAST_ROUTER, - p->multicast_router)) + p->multicast_router) || + nla_put_u32(skb, IFLA_BRPORT_MCAST_EHT_HOSTS_LIMIT, + p->multicast_eht_hosts_limit) || + nla_put_u32(skb, IFLA_BRPORT_MCAST_EHT_HOSTS_CNT, + p->multicast_eht_hosts_cnt)) return -EMSGSIZE; #endif @@ -820,6 +827,7 @@ static const struct nla_policy br_port_policy[IFLA_BRPORT_MAX + 1] = { [IFLA_BRPORT_NEIGH_SUPPRESS] = { .type = NLA_U8 }, [IFLA_BRPORT_ISOLATED] = { .type = NLA_U8 }, [IFLA_BRPORT_BACKUP_PORT] = { .type = NLA_U32 }, + [IFLA_BRPORT_MCAST_EHT_HOSTS_LIMIT] = { .type = NLA_U32 }, }; /* Change the state of the port and notify spanning tree */ @@ -955,6 +963,15 @@ static int br_setport(struct net_bridge_port *p, struct nlattr *tb[]) if (err) return err; } + + if (tb[IFLA_BRPORT_MCAST_EHT_HOSTS_LIMIT]) { + u32 hlimit; + + hlimit = nla_get_u32(tb[IFLA_BRPORT_MCAST_EHT_HOSTS_LIMIT]); + err = br_multicast_eht_set_hosts_limit(p, hlimit); + if (err) + return err; + } #endif if (tb[IFLA_BRPORT_GROUP_FWD_MASK]) { diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h index d62c6e1af64a..d242ba668e47 100644 --- a/net/bridge/br_private.h +++ b/net/bridge/br_private.h @@ -252,6 +252,8 @@ struct net_bridge_port_group { struct timer_list timer; struct timer_list rexmit_timer; struct hlist_node mglist; + struct rb_root eht_set_tree; + struct rb_root eht_host_tree; struct rhash_head rhnode; struct net_bridge_mcast_gc mcast_gc; @@ -308,6 +310,8 @@ struct net_bridge_port { #if IS_ENABLED(CONFIG_IPV6) struct bridge_mcast_own_query ip6_own_query; #endif /* IS_ENABLED(CONFIG_IPV6) */ + u32 multicast_eht_hosts_limit; + u32 multicast_eht_hosts_cnt; unsigned char multicast_router; struct bridge_mcast_stats __percpu *mcast_stats; struct timer_list multicast_router_timer; @@ -846,6 +850,10 @@ void br_multicast_star_g_handle_mode(struct net_bridge_port_group *pg, u8 filter_mode); void br_multicast_sg_add_exclude_ports(struct net_bridge_mdb_entry *star_mp, struct net_bridge_port_group *sg); +struct net_bridge_group_src * +br_multicast_find_group_src(struct net_bridge_port_group *pg, struct br_ip *ip); +void br_multicast_del_group_src(struct net_bridge_group_src *src, + bool fastleave); static inline bool br_group_is_l2(const struct br_ip *group) { diff --git a/net/bridge/br_private_mcast_eht.h b/net/bridge/br_private_mcast_eht.h new file mode 100644 index 000000000000..f89049f4892c --- /dev/null +++ b/net/bridge/br_private_mcast_eht.h @@ -0,0 +1,93 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later + * Copyright (c) 2020, Nikolay Aleksandrov <nikolay@nvidia.com> + */ +#ifndef _BR_PRIVATE_MCAST_EHT_H_ +#define _BR_PRIVATE_MCAST_EHT_H_ + +#define BR_MCAST_DEFAULT_EHT_HOSTS_LIMIT 512 + +union net_bridge_eht_addr { + __be32 ip4; +#if IS_ENABLED(CONFIG_IPV6) + struct in6_addr ip6; +#endif +}; + +/* single host's list of set entries and filter_mode */ +struct net_bridge_group_eht_host { + struct rb_node rb_node; + + union net_bridge_eht_addr h_addr; + struct hlist_head set_entries; + unsigned int num_entries; + unsigned char filter_mode; + struct net_bridge_port_group *pg; +}; + +/* (host, src entry) added to a per-src set and host's list */ +struct net_bridge_group_eht_set_entry { + struct rb_node rb_node; + struct hlist_node host_list; + + union net_bridge_eht_addr h_addr; + struct timer_list timer; + struct net_bridge *br; + struct net_bridge_group_eht_set *eht_set; + struct net_bridge_group_eht_host *h_parent; + struct net_bridge_mcast_gc mcast_gc; +}; + +/* per-src set */ +struct net_bridge_group_eht_set { + struct rb_node rb_node; + + union net_bridge_eht_addr src_addr; + struct rb_root entry_tree; + struct timer_list timer; + struct net_bridge_port_group *pg; + struct net_bridge *br; + struct net_bridge_mcast_gc mcast_gc; +}; + +#ifdef CONFIG_BRIDGE_IGMP_SNOOPING +void br_multicast_eht_clean_sets(struct net_bridge_port_group *pg); +bool br_multicast_eht_handle(struct net_bridge_port_group *pg, + void *h_addr, + void *srcs, + u32 nsrcs, + size_t addr_size, + int grec_type); +int br_multicast_eht_set_hosts_limit(struct net_bridge_port *p, + u32 eht_hosts_limit); + +static inline bool +br_multicast_eht_should_del_pg(const struct net_bridge_port_group *pg) +{ + return !!((pg->key.port->flags & BR_MULTICAST_FAST_LEAVE) && + RB_EMPTY_ROOT(&pg->eht_host_tree)); +} + +static inline bool +br_multicast_eht_hosts_over_limit(const struct net_bridge_port_group *pg) +{ + const struct net_bridge_port *p = pg->key.port; + + return !!(p->multicast_eht_hosts_cnt >= p->multicast_eht_hosts_limit); +} + +static inline void br_multicast_eht_hosts_inc(struct net_bridge_port_group *pg) +{ + struct net_bridge_port *p = pg->key.port; + + p->multicast_eht_hosts_cnt++; +} + +static inline void br_multicast_eht_hosts_dec(struct net_bridge_port_group *pg) +{ + struct net_bridge_port *p = pg->key.port; + + p->multicast_eht_hosts_cnt--; +} +#endif /* CONFIG_BRIDGE_IGMP_SNOOPING */ + +#endif /* _BR_PRIVATE_MCAST_EHT_H_ */ diff --git a/net/bridge/br_sysfs_if.c b/net/bridge/br_sysfs_if.c index 7a59cdddd3ce..b66305fae26b 100644 --- a/net/bridge/br_sysfs_if.c +++ b/net/bridge/br_sysfs_if.c @@ -16,6 +16,7 @@ #include <linux/sched/signal.h> #include "br_private.h" +#include "br_private_mcast_eht.h" struct brport_attribute { struct attribute attr; @@ -245,6 +246,29 @@ static int store_multicast_router(struct net_bridge_port *p, static BRPORT_ATTR(multicast_router, 0644, show_multicast_router, store_multicast_router); +static ssize_t show_multicast_eht_hosts_limit(struct net_bridge_port *p, + char *buf) +{ + return sprintf(buf, "%u\n", p->multicast_eht_hosts_limit); +} + +static int store_multicast_eht_hosts_limit(struct net_bridge_port *p, + unsigned long v) +{ + return br_multicast_eht_set_hosts_limit(p, v); +} +static BRPORT_ATTR(multicast_eht_hosts_limit, 0644, + show_multicast_eht_hosts_limit, + store_multicast_eht_hosts_limit); + +static ssize_t show_multicast_eht_hosts_cnt(struct net_bridge_port *p, + char *buf) +{ + return sprintf(buf, "%u\n", p->multicast_eht_hosts_cnt); +} +static BRPORT_ATTR(multicast_eht_hosts_cnt, 0444, show_multicast_eht_hosts_cnt, + NULL); + BRPORT_ATTR_FLAG(multicast_fast_leave, BR_MULTICAST_FAST_LEAVE); BRPORT_ATTR_FLAG(multicast_to_unicast, BR_MULTICAST_TO_UNICAST); #endif @@ -274,6 +298,8 @@ static const struct brport_attribute *brport_attrs[] = { &brport_attr_multicast_router, &brport_attr_multicast_fast_leave, &brport_attr_multicast_to_unicast, + &brport_attr_multicast_eht_hosts_limit, + &brport_attr_multicast_eht_hosts_cnt, #endif &brport_attr_proxyarp, &brport_attr_proxyarp_wifi, diff --git a/net/can/Kconfig b/net/can/Kconfig index 7c9958df91d3..a9ac5ffab286 100644 --- a/net/can/Kconfig +++ b/net/can/Kconfig @@ -4,7 +4,6 @@ # menuconfig CAN - depends on NET tristate "CAN bus subsystem support" help Controller Area Network (CAN) is a slow (up to 1Mbit/s) serial diff --git a/net/can/gw.c b/net/can/gw.c index 8598d9da0e5f..ba4124805602 100644 --- a/net/can/gw.c +++ b/net/can/gw.c @@ -225,7 +225,7 @@ static void mod_store_ccdlc(struct canfd_frame *cf) if (ccf->len <= CAN_MAX_DLEN) return; - /* potentially broken values are catched in can_can_gw_rcv() */ + /* potentially broken values are caught in can_can_gw_rcv() */ if (ccf->len > CAN_MAX_RAW_DLC) return; diff --git a/net/core/dev.c b/net/core/dev.c index d9ce02e95992..6df3f1bcdc68 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -4084,7 +4084,7 @@ static int __dev_queue_xmit(struct sk_buff *skb, struct net_device *sb_dev) skb_reset_mac_header(skb); if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_SCHED_TSTAMP)) - __skb_tstamp_tx(skb, NULL, skb->sk, SCM_TSTAMP_SCHED); + __skb_tstamp_tx(skb, NULL, NULL, skb->sk, SCM_TSTAMP_SCHED); /* Disable soft irqs for various locks below. Also * stops preemption for RCU. diff --git a/net/core/devlink.c b/net/core/devlink.c index 738d4344d679..72ea79879762 100644 --- a/net/core/devlink.c +++ b/net/core/devlink.c @@ -8617,6 +8617,10 @@ EXPORT_SYMBOL_GPL(devlink_dpipe_table_unregister); * @resource_id: resource's id * @parent_resource_id: resource's parent id * @size_params: size parameters + * + * Generic resources should reuse the same names across drivers. + * Please see the generic resources list at: + * Documentation/networking/devlink/devlink-resource.rst */ int devlink_resource_register(struct devlink *devlink, const char *resource_name, diff --git a/net/core/pktgen.c b/net/core/pktgen.c index 105978604ffd..3fba429f1f57 100644 --- a/net/core/pktgen.c +++ b/net/core/pktgen.c @@ -3464,7 +3464,7 @@ static int pktgen_thread_worker(void *arg) struct pktgen_dev *pkt_dev = NULL; int cpu = t->cpu; - BUG_ON(smp_processor_id() != cpu); + WARN_ON(smp_processor_id() != cpu); init_waitqueue_head(&t->queue); complete(&t->start_done); diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 3d6ab194d0f5..c313aaf2bce1 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -55,7 +55,7 @@ #include <net/net_namespace.h> #define RTNL_MAX_TYPE 50 -#define RTNL_SLAVE_MAX_TYPE 36 +#define RTNL_SLAVE_MAX_TYPE 40 struct rtnl_link { rtnl_doit_func doit; diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 145503d3f06b..2af12f7e170c 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -4721,6 +4721,7 @@ err: EXPORT_SYMBOL_GPL(skb_complete_tx_timestamp); void __skb_tstamp_tx(struct sk_buff *orig_skb, + const struct sk_buff *ack_skb, struct skb_shared_hwtstamps *hwtstamps, struct sock *sk, int tstype) { @@ -4743,7 +4744,8 @@ void __skb_tstamp_tx(struct sk_buff *orig_skb, if ((sk->sk_tsflags & SOF_TIMESTAMPING_OPT_STATS) && sk->sk_protocol == IPPROTO_TCP && sk->sk_type == SOCK_STREAM) { - skb = tcp_get_timestamping_opt_stats(sk, orig_skb); + skb = tcp_get_timestamping_opt_stats(sk, orig_skb, + ack_skb); opt_stats = true; } else #endif @@ -4772,7 +4774,7 @@ EXPORT_SYMBOL_GPL(__skb_tstamp_tx); void skb_tstamp_tx(struct sk_buff *orig_skb, struct skb_shared_hwtstamps *hwtstamps) { - return __skb_tstamp_tx(orig_skb, hwtstamps, orig_skb->sk, + return __skb_tstamp_tx(orig_skb, NULL, hwtstamps, orig_skb->sk, SCM_TSTAMP_SND); } EXPORT_SYMBOL_GPL(skb_tstamp_tx); diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c index d86d8d11cfe4..4567de519603 100644 --- a/net/core/sysctl_net_core.c +++ b/net/core/sysctl_net_core.c @@ -309,7 +309,6 @@ proc_dolongvec_minmax_bpf_restricted(struct ctl_table *table, int write, #endif static struct ctl_table net_core_table[] = { -#ifdef CONFIG_NET { .procname = "wmem_max", .data = &sysctl_wmem_max, @@ -507,7 +506,6 @@ static struct ctl_table net_core_table[] = { .proc_handler = set_default_qdisc }, #endif -#endif /* CONFIG_NET */ { .procname = "netdev_budget", .data = &netdev_budget, diff --git a/net/dcb/Makefile b/net/dcb/Makefile index 3016e5a7716a..2c0fa16ee2a9 100644 --- a/net/dcb/Makefile +++ b/net/dcb/Makefile @@ -1,2 +1,2 @@ # SPDX-License-Identifier: GPL-2.0-only -obj-$(CONFIG_DCB) += dcbnl.o dcbevent.o +obj-y += dcbnl.o dcbevent.o diff --git a/net/dns_resolver/Kconfig b/net/dns_resolver/Kconfig index 255df9b6e9e8..155b06163409 100644 --- a/net/dns_resolver/Kconfig +++ b/net/dns_resolver/Kconfig @@ -4,7 +4,7 @@ # config DNS_RESOLVER tristate "DNS Resolver support" - depends on NET && KEYS + depends on KEYS help Saying Y here will include support for the DNS Resolver key type which can be used to make upcalls to perform DNS lookups in diff --git a/net/ethtool/common.c b/net/ethtool/common.c index 24036e3055a1..181220101a6e 100644 --- a/net/ethtool/common.c +++ b/net/ethtool/common.c @@ -68,6 +68,7 @@ const char netdev_features_strings[NETDEV_FEATURE_COUNT][ETH_GSTRING_LEN] = { [NETIF_F_HW_TLS_RX_BIT] = "tls-hw-rx-offload", [NETIF_F_GRO_FRAGLIST_BIT] = "rx-gro-list", [NETIF_F_HW_MACSEC_BIT] = "macsec-hw-offload", + [NETIF_F_GRO_UDP_FWD_BIT] = "rx-udp-gro-forwarding", }; const char diff --git a/net/ife/Kconfig b/net/ife/Kconfig index bcf650564db4..de36a5b91e50 100644 --- a/net/ife/Kconfig +++ b/net/ife/Kconfig @@ -4,7 +4,6 @@ # menuconfig NET_IFE - depends on NET tristate "Inter-FE based on IETF ForCES InterFE LFB" default n help diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 856ae516ac18..e1a17c6b473c 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -280,6 +280,12 @@ #include <asm/ioctls.h> #include <net/busy_poll.h> +/* Track pending CMSGs. */ +enum { + TCP_CMSG_INQ = 1, + TCP_CMSG_TS = 2 +}; + struct percpu_counter tcp_orphan_count; EXPORT_SYMBOL_GPL(tcp_orphan_count); @@ -1739,6 +1745,20 @@ int tcp_set_rcvlowat(struct sock *sk, int val) } EXPORT_SYMBOL(tcp_set_rcvlowat); +static void tcp_update_recv_tstamps(struct sk_buff *skb, + struct scm_timestamping_internal *tss) +{ + if (skb->tstamp) + tss->ts[0] = ktime_to_timespec64(skb->tstamp); + else + tss->ts[0] = (struct timespec64) {0}; + + if (skb_hwtstamps(skb)->hwtstamp) + tss->ts[2] = ktime_to_timespec64(skb_hwtstamps(skb)->hwtstamp); + else + tss->ts[2] = (struct timespec64) {0}; +} + #ifdef CONFIG_MMU static const struct vm_operations_struct tcp_vm_ops = { }; @@ -1842,13 +1862,13 @@ static int tcp_recvmsg_locked(struct sock *sk, struct msghdr *msg, size_t len, struct scm_timestamping_internal *tss, int *cmsg_flags); static int receive_fallback_to_copy(struct sock *sk, - struct tcp_zerocopy_receive *zc, int inq) + struct tcp_zerocopy_receive *zc, int inq, + struct scm_timestamping_internal *tss) { unsigned long copy_address = (unsigned long)zc->copybuf_address; - struct scm_timestamping_internal tss_unused; - int err, cmsg_flags_unused; struct msghdr msg = {}; struct iovec iov; + int err; zc->length = 0; zc->recv_skip_hint = 0; @@ -1862,7 +1882,7 @@ static int receive_fallback_to_copy(struct sock *sk, return err; err = tcp_recvmsg_locked(sk, &msg, inq, /*nonblock=*/1, /*flags=*/0, - &tss_unused, &cmsg_flags_unused); + tss, &zc->msg_flags); if (err < 0) return err; @@ -1903,21 +1923,27 @@ static int tcp_copy_straggler_data(struct tcp_zerocopy_receive *zc, return (__s32)copylen; } -static int tcp_zerocopy_handle_leftover_data(struct tcp_zerocopy_receive *zc, - struct sock *sk, - struct sk_buff *skb, - u32 *seq, - s32 copybuf_len) +static int tcp_zc_handle_leftover(struct tcp_zerocopy_receive *zc, + struct sock *sk, + struct sk_buff *skb, + u32 *seq, + s32 copybuf_len, + struct scm_timestamping_internal *tss) { u32 offset, copylen = min_t(u32, copybuf_len, zc->recv_skip_hint); if (!copylen) return 0; /* skb is null if inq < PAGE_SIZE. */ - if (skb) + if (skb) { offset = *seq - TCP_SKB_CB(skb)->seq; - else + } else { skb = tcp_recv_skb(sk, *seq, &offset); + if (TCP_SKB_CB(skb)->has_rxtstamp) { + tcp_update_recv_tstamps(skb, tss); + zc->msg_flags |= TCP_CMSG_TS; + } + } zc->copybuf_len = tcp_copy_straggler_data(zc, skb, copylen, &offset, seq); @@ -2004,9 +2030,37 @@ static int tcp_zerocopy_vm_insert_batch(struct vm_area_struct *vma, err); } +static void tcp_recv_timestamp(struct msghdr *msg, const struct sock *sk, + struct scm_timestamping_internal *tss); +static void tcp_zc_finalize_rx_tstamp(struct sock *sk, + struct tcp_zerocopy_receive *zc, + struct scm_timestamping_internal *tss) +{ + unsigned long msg_control_addr; + struct msghdr cmsg_dummy; + + msg_control_addr = (unsigned long)zc->msg_control; + cmsg_dummy.msg_control = (void *)msg_control_addr; + cmsg_dummy.msg_controllen = + (__kernel_size_t)zc->msg_controllen; + cmsg_dummy.msg_flags = in_compat_syscall() + ? MSG_CMSG_COMPAT : 0; + zc->msg_flags = 0; + if (zc->msg_control == msg_control_addr && + zc->msg_controllen == cmsg_dummy.msg_controllen) { + tcp_recv_timestamp(&cmsg_dummy, sk, tss); + zc->msg_control = (__u64) + ((uintptr_t)cmsg_dummy.msg_control); + zc->msg_controllen = + (__u64)cmsg_dummy.msg_controllen; + zc->msg_flags = (__u32)cmsg_dummy.msg_flags; + } +} + #define TCP_ZEROCOPY_PAGE_BATCH_SIZE 32 static int tcp_zerocopy_receive(struct sock *sk, - struct tcp_zerocopy_receive *zc) + struct tcp_zerocopy_receive *zc, + struct scm_timestamping_internal *tss) { u32 length = 0, offset, vma_len, avail_len, copylen = 0; unsigned long address = (unsigned long)zc->address; @@ -2023,6 +2077,7 @@ static int tcp_zerocopy_receive(struct sock *sk, int ret; zc->copybuf_len = 0; + zc->msg_flags = 0; if (address & (PAGE_SIZE - 1) || address != zc->address) return -EINVAL; @@ -2033,7 +2088,7 @@ static int tcp_zerocopy_receive(struct sock *sk, sock_rps_record_flow(sk); if (inq && inq <= copybuf_len) - return receive_fallback_to_copy(sk, zc, inq); + return receive_fallback_to_copy(sk, zc, inq, tss); if (inq < PAGE_SIZE) { zc->length = 0; @@ -2078,6 +2133,11 @@ static int tcp_zerocopy_receive(struct sock *sk, } else { skb = tcp_recv_skb(sk, seq, &offset); } + + if (TCP_SKB_CB(skb)->has_rxtstamp) { + tcp_update_recv_tstamps(skb, tss); + zc->msg_flags |= TCP_CMSG_TS; + } zc->recv_skip_hint = skb->len - offset; frags = skb_advance_to_frag(skb, offset, &offset_frag); if (!frags || offset_frag) @@ -2120,8 +2180,7 @@ out: mmap_read_unlock(current->mm); /* Try to copy straggler data. */ if (!ret) - copylen = tcp_zerocopy_handle_leftover_data(zc, sk, skb, &seq, - copybuf_len); + copylen = tcp_zc_handle_leftover(zc, sk, skb, &seq, copybuf_len, tss); if (length + copylen) { WRITE_ONCE(tp->copied_seq, seq); @@ -2142,20 +2201,6 @@ out: } #endif -static void tcp_update_recv_tstamps(struct sk_buff *skb, - struct scm_timestamping_internal *tss) -{ - if (skb->tstamp) - tss->ts[0] = ktime_to_timespec64(skb->tstamp); - else - tss->ts[0] = (struct timespec64) {0}; - - if (skb_hwtstamps(skb)->hwtstamp) - tss->ts[2] = ktime_to_timespec64(skb_hwtstamps(skb)->hwtstamp); - else - tss->ts[2] = (struct timespec64) {0}; -} - /* Similar to __sock_recv_timestamp, but does not require an skb */ static void tcp_recv_timestamp(struct msghdr *msg, const struct sock *sk, struct scm_timestamping_internal *tss) @@ -2272,7 +2317,7 @@ static int tcp_recvmsg_locked(struct sock *sk, struct msghdr *msg, size_t len, goto out; if (tp->recvmsg_inq) - *cmsg_flags = 1; + *cmsg_flags = TCP_CMSG_INQ; timeo = sock_rcvtimeo(sk, nonblock); /* Urgent data needs to be handled specially. */ @@ -2453,7 +2498,7 @@ skip_copy: if (TCP_SKB_CB(skb)->has_rxtstamp) { tcp_update_recv_tstamps(skb, tss); - *cmsg_flags |= 2; + *cmsg_flags |= TCP_CMSG_TS; } if (used + offset < skb->len) @@ -2513,9 +2558,9 @@ int tcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int nonblock, release_sock(sk); if (cmsg_flags && ret >= 0) { - if (cmsg_flags & 2) + if (cmsg_flags & TCP_CMSG_TS) tcp_recv_timestamp(msg, sk, &tss); - if (cmsg_flags & 1) { + if (cmsg_flags & TCP_CMSG_INQ) { inq = tcp_inq_hint(sk); put_cmsg(msg, SOL_TCP, TCP_CM_INQ, sizeof(inq), &inq); } @@ -3767,11 +3812,24 @@ static size_t tcp_opt_stats_get_size(void) nla_total_size(sizeof(u16)) + /* TCP_NLA_TIMEOUT_REHASH */ nla_total_size(sizeof(u32)) + /* TCP_NLA_BYTES_NOTSENT */ nla_total_size_64bit(sizeof(u64)) + /* TCP_NLA_EDT */ + nla_total_size(sizeof(u8)) + /* TCP_NLA_TTL */ 0; } +/* Returns TTL or hop limit of an incoming packet from skb. */ +static u8 tcp_skb_ttl_or_hop_limit(const struct sk_buff *skb) +{ + if (skb->protocol == htons(ETH_P_IP)) + return ip_hdr(skb)->ttl; + else if (skb->protocol == htons(ETH_P_IPV6)) + return ipv6_hdr(skb)->hop_limit; + else + return 0; +} + struct sk_buff *tcp_get_timestamping_opt_stats(const struct sock *sk, - const struct sk_buff *orig_skb) + const struct sk_buff *orig_skb, + const struct sk_buff *ack_skb) { const struct tcp_sock *tp = tcp_sk(sk); struct sk_buff *stats; @@ -3827,6 +3885,9 @@ struct sk_buff *tcp_get_timestamping_opt_stats(const struct sock *sk, max_t(int, 0, tp->write_seq - tp->snd_nxt)); nla_put_u64_64bit(stats, TCP_NLA_EDT, orig_skb->skb_mstamp_ns, TCP_NLA_PAD); + if (ack_skb) + nla_put_u8(stats, TCP_NLA_TTL, + tcp_skb_ttl_or_hop_limit(ack_skb)); return stats; } @@ -4083,6 +4144,7 @@ static int do_tcp_getsockopt(struct sock *sk, int level, } #ifdef CONFIG_MMU case TCP_ZEROCOPY_RECEIVE: { + struct scm_timestamping_internal tss; struct tcp_zerocopy_receive zc = {}; int err; @@ -4098,11 +4160,18 @@ static int do_tcp_getsockopt(struct sock *sk, int level, if (copy_from_user(&zc, optval, len)) return -EFAULT; lock_sock(sk); - err = tcp_zerocopy_receive(sk, &zc); + err = tcp_zerocopy_receive(sk, &zc, &tss); release_sock(sk); - if (len >= offsetofend(struct tcp_zerocopy_receive, err)) - goto zerocopy_rcv_sk_err; + if (len >= offsetofend(struct tcp_zerocopy_receive, msg_flags)) + goto zerocopy_rcv_cmsg; switch (len) { + case offsetofend(struct tcp_zerocopy_receive, msg_flags): + goto zerocopy_rcv_cmsg; + case offsetofend(struct tcp_zerocopy_receive, msg_controllen): + case offsetofend(struct tcp_zerocopy_receive, msg_control): + case offsetofend(struct tcp_zerocopy_receive, flags): + case offsetofend(struct tcp_zerocopy_receive, copybuf_len): + case offsetofend(struct tcp_zerocopy_receive, copybuf_address): case offsetofend(struct tcp_zerocopy_receive, err): goto zerocopy_rcv_sk_err; case offsetofend(struct tcp_zerocopy_receive, inq): @@ -4111,6 +4180,11 @@ static int do_tcp_getsockopt(struct sock *sk, int level, default: goto zerocopy_rcv_out; } +zerocopy_rcv_cmsg: + if (zc.msg_flags & TCP_CMSG_TS) + tcp_zc_finalize_rx_tstamp(sk, &zc, &tss); + else + zc.msg_flags = 0; zerocopy_rcv_sk_err: if (!err) zc.err = sock_error(sk); diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index a7dfca0a38cd..d4f66aba9fd8 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -3145,7 +3145,7 @@ static u32 tcp_tso_acked(struct sock *sk, struct sk_buff *skb) } static void tcp_ack_tstamp(struct sock *sk, struct sk_buff *skb, - u32 prior_snd_una) + const struct sk_buff *ack_skb, u32 prior_snd_una) { const struct skb_shared_info *shinfo; @@ -3157,7 +3157,7 @@ static void tcp_ack_tstamp(struct sock *sk, struct sk_buff *skb, if (!before(shinfo->tskey, prior_snd_una) && before(shinfo->tskey, tcp_sk(sk)->snd_una)) { tcp_skb_tsorted_save(skb) { - __skb_tstamp_tx(skb, NULL, sk, SCM_TSTAMP_ACK); + __skb_tstamp_tx(skb, ack_skb, NULL, sk, SCM_TSTAMP_ACK); } tcp_skb_tsorted_restore(skb); } } @@ -3166,8 +3166,8 @@ static void tcp_ack_tstamp(struct sock *sk, struct sk_buff *skb, * is before the ack sequence we can discard it as it's confirmed to have * arrived at the other end. */ -static int tcp_clean_rtx_queue(struct sock *sk, u32 prior_fack, - u32 prior_snd_una, +static int tcp_clean_rtx_queue(struct sock *sk, const struct sk_buff *ack_skb, + u32 prior_fack, u32 prior_snd_una, struct tcp_sacktag_state *sack, bool ece_ack) { const struct inet_connection_sock *icsk = inet_csk(sk); @@ -3256,7 +3256,7 @@ static int tcp_clean_rtx_queue(struct sock *sk, u32 prior_fack, if (!fully_acked) break; - tcp_ack_tstamp(sk, skb, prior_snd_una); + tcp_ack_tstamp(sk, skb, ack_skb, prior_snd_una); next = skb_rb_next(skb); if (unlikely(skb == tp->retransmit_skb_hint)) @@ -3274,7 +3274,7 @@ static int tcp_clean_rtx_queue(struct sock *sk, u32 prior_fack, tp->snd_up = tp->snd_una; if (skb) { - tcp_ack_tstamp(sk, skb, prior_snd_una); + tcp_ack_tstamp(sk, skb, ack_skb, prior_snd_una); if (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED) flag |= FLAG_SACK_RENEGING; } @@ -3809,8 +3809,8 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag) goto no_queue; /* See if we can take anything off of the retransmit queue. */ - flag |= tcp_clean_rtx_queue(sk, prior_fack, prior_snd_una, &sack_state, - flag & FLAG_ECE); + flag |= tcp_clean_rtx_queue(sk, skb, prior_fack, prior_snd_una, + &sack_state, flag & FLAG_ECE); tcp_rack_update_reo_wnd(sk, &rs); diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c index 1168d186cc43..41249705d9e9 100644 --- a/net/ipv4/udp_offload.c +++ b/net/ipv4/udp_offload.c @@ -460,7 +460,8 @@ struct sk_buff *udp_gro_receive(struct list_head *head, struct sk_buff *skb, if (skb->dev->features & NETIF_F_GRO_FRAGLIST) NAPI_GRO_CB(skb)->is_flist = sk ? !udp_sk(sk)->gro_enabled: 1; - if ((sk && udp_sk(sk)->gro_enabled) || NAPI_GRO_CB(skb)->is_flist) { + if ((!sk && (skb->dev->features & NETIF_F_GRO_UDP_FWD)) || + (sk && udp_sk(sk)->gro_enabled) || NAPI_GRO_CB(skb)->is_flist) { pp = call_gro_receive(udp_gro_receive_segment, head, skb); return pp; } diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 9edc5bb2d531..f2337fb756ac 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -205,6 +205,7 @@ static struct ipv6_devconf ipv6_devconf __read_mostly = { .max_desync_factor = MAX_DESYNC_FACTOR, .max_addresses = IPV6_MAX_ADDRESSES, .accept_ra_defrtr = 1, + .ra_defrtr_metric = IP6_RT_PRIO_USER, .accept_ra_from_local = 0, .accept_ra_min_hop_limit= 1, .accept_ra_pinfo = 1, @@ -260,6 +261,7 @@ static struct ipv6_devconf ipv6_devconf_dflt __read_mostly = { .max_desync_factor = MAX_DESYNC_FACTOR, .max_addresses = IPV6_MAX_ADDRESSES, .accept_ra_defrtr = 1, + .ra_defrtr_metric = IP6_RT_PRIO_USER, .accept_ra_from_local = 0, .accept_ra_min_hop_limit= 1, .accept_ra_pinfo = 1, @@ -5476,6 +5478,7 @@ static inline void ipv6_store_devconf(struct ipv6_devconf *cnf, array[DEVCONF_MAX_DESYNC_FACTOR] = cnf->max_desync_factor; array[DEVCONF_MAX_ADDRESSES] = cnf->max_addresses; array[DEVCONF_ACCEPT_RA_DEFRTR] = cnf->accept_ra_defrtr; + array[DEVCONF_RA_DEFRTR_METRIC] = cnf->ra_defrtr_metric; array[DEVCONF_ACCEPT_RA_MIN_HOP_LIMIT] = cnf->accept_ra_min_hop_limit; array[DEVCONF_ACCEPT_RA_PINFO] = cnf->accept_ra_pinfo; #ifdef CONFIG_IPV6_ROUTER_PREF @@ -6669,6 +6672,14 @@ static const struct ctl_table addrconf_sysctl[] = { .proc_handler = proc_dointvec, }, { + .procname = "ra_defrtr_metric", + .data = &ipv6_devconf.ra_defrtr_metric, + .maxlen = sizeof(u32), + .mode = 0644, + .proc_handler = proc_douintvec_minmax, + .extra1 = (void *)SYSCTL_ONE, + }, + { .procname = "accept_ra_min_hop_limit", .data = &ipv6_devconf.accept_ra_min_hop_limit, .maxlen = sizeof(int), diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c index 76717478f173..c467c6419893 100644 --- a/net/ipv6/ndisc.c +++ b/net/ipv6/ndisc.c @@ -1173,6 +1173,7 @@ static void ndisc_router_discovery(struct sk_buff *skb) struct neighbour *neigh = NULL; struct inet6_dev *in6_dev; struct fib6_info *rt = NULL; + u32 defrtr_usr_metric; struct net *net; int lifetime; struct ndisc_options ndopts; @@ -1303,18 +1304,21 @@ static void ndisc_router_discovery(struct sk_buff *skb) return; } } - if (rt && lifetime == 0) { + /* Set default route metric as specified by user */ + defrtr_usr_metric = in6_dev->cnf.ra_defrtr_metric; + /* delete the route if lifetime is 0 or if metric needs change */ + if (rt && (lifetime == 0 || rt->fib6_metric != defrtr_usr_metric)) { ip6_del_rt(net, rt, false); rt = NULL; } - ND_PRINTK(3, info, "RA: rt: %p lifetime: %d, for dev: %s\n", - rt, lifetime, skb->dev->name); + ND_PRINTK(3, info, "RA: rt: %p lifetime: %d, metric: %d, for dev: %s\n", + rt, lifetime, defrtr_usr_metric, skb->dev->name); if (!rt && lifetime) { ND_PRINTK(3, info, "RA: adding default router\n"); rt = rt6_add_dflt_router(net, &ipv6_hdr(skb)->saddr, - skb->dev, pref); + skb->dev, pref, defrtr_usr_metric); if (!rt) { ND_PRINTK(0, err, "RA: %s failed to add default route\n", diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 188e114b29b4..41d8f801b75f 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -4252,11 +4252,12 @@ struct fib6_info *rt6_get_dflt_router(struct net *net, struct fib6_info *rt6_add_dflt_router(struct net *net, const struct in6_addr *gwaddr, struct net_device *dev, - unsigned int pref) + unsigned int pref, + u32 defrtr_usr_metric) { struct fib6_config cfg = { .fc_table = l3mdev_fib_table(dev) ? : RT6_TABLE_DFLT, - .fc_metric = IP6_RT_PRIO_USER, + .fc_metric = defrtr_usr_metric, .fc_ifindex = dev->ifindex, .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT | RTF_UP | RTF_EXPIRES | RTF_PREF(pref), diff --git a/net/l3mdev/Makefile b/net/l3mdev/Makefile index 59755a9e2f9b..9e7da0acc58c 100644 --- a/net/l3mdev/Makefile +++ b/net/l3mdev/Makefile @@ -3,4 +3,4 @@ # Makefile for the L3 device API # -obj-$(CONFIG_NET_L3_MASTER_DEV) += l3mdev.o +obj-y += l3mdev.o diff --git a/net/llc/Kconfig b/net/llc/Kconfig index b0e646ac47eb..7f79f5e134f9 100644 --- a/net/llc/Kconfig +++ b/net/llc/Kconfig @@ -1,7 +1,6 @@ # SPDX-License-Identifier: GPL-2.0-only config LLC tristate - depends on NET config LLC2 tristate "ANSI/IEEE 802.2 LLC type 2 Support" diff --git a/net/mptcp/pm_netlink.c b/net/mptcp/pm_netlink.c index 9b1f6298bbdb..83976b9ee99b 100644 --- a/net/mptcp/pm_netlink.c +++ b/net/mptcp/pm_netlink.c @@ -60,15 +60,20 @@ static bool addresses_equal(const struct mptcp_addr_info *a, { bool addr_equals = false; - if (a->family != b->family) - return false; - - if (a->family == AF_INET) - addr_equals = a->addr.s_addr == b->addr.s_addr; + if (a->family == b->family) { + if (a->family == AF_INET) + addr_equals = a->addr.s_addr == b->addr.s_addr; #if IS_ENABLED(CONFIG_MPTCP_IPV6) - else - addr_equals = !ipv6_addr_cmp(&a->addr6, &b->addr6); + else + addr_equals = !ipv6_addr_cmp(&a->addr6, &b->addr6); + } else if (a->family == AF_INET) { + if (ipv6_addr_v4mapped(&b->addr6)) + addr_equals = a->addr.s_addr == b->addr6.s6_addr32[3]; + } else if (b->family == AF_INET) { + if (ipv6_addr_v4mapped(&a->addr6)) + addr_equals = a->addr6.s6_addr32[3] == b->addr.s_addr; #endif + } if (!addr_equals) return false; @@ -137,6 +142,7 @@ select_local_address(const struct pm_nl_pernet *pernet, struct mptcp_sock *msk) { struct mptcp_pm_addr_entry *entry, *ret = NULL; + struct sock *sk = (struct sock *)msk; rcu_read_lock(); __mptcp_flush_join_list(msk); @@ -144,11 +150,20 @@ select_local_address(const struct pm_nl_pernet *pernet, if (!(entry->addr.flags & MPTCP_PM_ADDR_FLAG_SUBFLOW)) continue; + if (entry->addr.family != sk->sk_family) { +#if IS_ENABLED(CONFIG_MPTCP_IPV6) + if ((entry->addr.family == AF_INET && + !ipv6_addr_v4mapped(&sk->sk_v6_daddr)) || + (sk->sk_family == AF_INET && + !ipv6_addr_v4mapped(&entry->addr.addr6))) +#endif + continue; + } + /* avoid any address already in use by subflows and * pending join */ - if (entry->addr.family == ((struct sock *)msk)->sk_family && - !lookup_subflow_by_saddr(&msk->conn_list, &entry->addr)) { + if (!lookup_subflow_by_saddr(&msk->conn_list, &entry->addr)) { ret = entry; break; } @@ -310,7 +325,6 @@ void mptcp_pm_free_anno_list(struct mptcp_sock *msk) static void mptcp_pm_create_subflow_or_signal_addr(struct mptcp_sock *msk) { - struct mptcp_addr_info remote = { 0 }; struct sock *sk = (struct sock *)msk; struct mptcp_pm_addr_entry *local; struct pm_nl_pernet *pernet; @@ -344,13 +358,14 @@ static void mptcp_pm_create_subflow_or_signal_addr(struct mptcp_sock *msk) /* check if should create a new subflow */ if (msk->pm.local_addr_used < msk->pm.local_addr_max && msk->pm.subflows < msk->pm.subflows_max) { - remote_address((struct sock_common *)sk, &remote); - local = select_local_address(pernet, msk); if (local) { + struct mptcp_addr_info remote = { 0 }; + msk->pm.local_addr_used++; msk->pm.subflows++; check_work_pending(msk); + remote_address((struct sock_common *)sk, &remote); spin_unlock_bh(&msk->pm.lock); __mptcp_subflow_connect(sk, &local->addr, &remote); spin_lock_bh(&msk->pm.lock); diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index f998a077c7dd..a033bf9c26ee 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -45,6 +45,9 @@ static struct percpu_counter mptcp_sockets_allocated; static void __mptcp_destroy_sock(struct sock *sk); static void __mptcp_check_send_data_fin(struct sock *sk); +DEFINE_PER_CPU(struct mptcp_delegated_action, mptcp_delegated_actions); +static struct net_device mptcp_napi_dev; + /* If msk has an initial subflow socket, and the MP_CAPABLE handshake has not * completed yet or has failed, return the subflow socket. * Otherwise return NULL. @@ -114,11 +117,7 @@ static int __mptcp_socket_create(struct mptcp_sock *msk) list_add(&subflow->node, &msk->conn_list); sock_hold(ssock->sk); subflow->request_mptcp = 1; - - /* accept() will wait on first subflow sk_wq, and we always wakes up - * via msk->sk_socket - */ - RCU_INIT_POINTER(msk->first->sk_wq, &sk->sk_socket->wq); + mptcp_sock_graft(msk->first, sk->sk_socket); return 0; } @@ -734,10 +733,14 @@ wake: void __mptcp_flush_join_list(struct mptcp_sock *msk) { + struct mptcp_subflow_context *subflow; + if (likely(list_empty(&msk->join_list))) return; spin_lock_bh(&msk->join_list_lock); + list_for_each_entry(subflow, &msk->join_list, node) + mptcp_propagate_sndbuf((struct sock *)msk, mptcp_subflow_tcp_sock(subflow)); list_splice_tail_init(&msk->join_list, &msk->conn_list); spin_unlock_bh(&msk->join_list_lock); } @@ -1037,13 +1040,6 @@ out: __mptcp_update_wmem(sk); sk_mem_reclaim_partial(sk); } - - if (sk_stream_is_writeable(sk)) { - /* pairs with memory barrier in mptcp_poll */ - smp_mb(); - if (test_and_clear_bit(MPTCP_NOSPACE, &msk->flags)) - sk_stream_write_space(sk); - } } if (snd_una == READ_ONCE(msk->snd_nxt)) { @@ -1362,8 +1358,7 @@ struct subflow_send_info { u64 ratio; }; -static struct sock *mptcp_subflow_get_send(struct mptcp_sock *msk, - u32 *sndbuf) +static struct sock *mptcp_subflow_get_send(struct mptcp_sock *msk) { struct subflow_send_info send_info[2]; struct mptcp_subflow_context *subflow; @@ -1374,24 +1369,17 @@ static struct sock *mptcp_subflow_get_send(struct mptcp_sock *msk, sock_owned_by_me((struct sock *)msk); - *sndbuf = 0; if (__mptcp_check_fallback(msk)) { if (!msk->first) return NULL; - *sndbuf = msk->first->sk_sndbuf; return sk_stream_memory_free(msk->first) ? msk->first : NULL; } /* re-use last subflow, if the burst allow that */ if (msk->last_snd && msk->snd_burst > 0 && sk_stream_memory_free(msk->last_snd) && - mptcp_subflow_active(mptcp_subflow_ctx(msk->last_snd))) { - mptcp_for_each_subflow(msk, subflow) { - ssk = mptcp_subflow_tcp_sock(subflow); - *sndbuf = max(tcp_sk(ssk)->snd_wnd, *sndbuf); - } + mptcp_subflow_active(mptcp_subflow_ctx(msk->last_snd))) return msk->last_snd; - } /* pick the subflow with the lower wmem/wspace ratio */ for (i = 0; i < 2; ++i) { @@ -1404,8 +1392,7 @@ static struct sock *mptcp_subflow_get_send(struct mptcp_sock *msk, continue; nr_active += !subflow->backup; - *sndbuf = max(tcp_sk(ssk)->snd_wnd, *sndbuf); - if (!sk_stream_memory_free(subflow->tcp_sock)) + if (!sk_stream_memory_free(subflow->tcp_sock) || !tcp_sk(ssk)->snd_wnd) continue; pace = READ_ONCE(ssk->sk_pacing_rate); @@ -1431,9 +1418,10 @@ static struct sock *mptcp_subflow_get_send(struct mptcp_sock *msk, if (send_info[0].ssk) { msk->last_snd = send_info[0].ssk; msk->snd_burst = min_t(int, MPTCP_SEND_BURST_SIZE, - sk_stream_wspace(msk->last_snd)); + tcp_sk(msk->last_snd)->snd_wnd); return msk->last_snd; } + return NULL; } @@ -1454,7 +1442,6 @@ static void mptcp_push_pending(struct sock *sk, unsigned int flags) }; struct mptcp_data_frag *dfrag; int len, copied = 0; - u32 sndbuf; while ((dfrag = mptcp_send_head(sk))) { info.sent = dfrag->already_sent; @@ -1465,12 +1452,7 @@ static void mptcp_push_pending(struct sock *sk, unsigned int flags) prev_ssk = ssk; __mptcp_flush_join_list(msk); - ssk = mptcp_subflow_get_send(msk, &sndbuf); - - /* do auto tuning */ - if (!(sk->sk_userlocks & SOCK_SNDBUF_LOCK) && - sndbuf > READ_ONCE(sk->sk_sndbuf)) - WRITE_ONCE(sk->sk_sndbuf, sndbuf); + ssk = mptcp_subflow_get_send(msk); /* try to keep the subflow socket lock across * consecutive xmit on the same socket @@ -1527,7 +1509,9 @@ static void __mptcp_subflow_push_pending(struct sock *sk, struct sock *ssk) struct mptcp_sock *msk = mptcp_sk(sk); struct mptcp_sendmsg_info info; struct mptcp_data_frag *dfrag; + struct sock *xmit_ssk; int len, copied = 0; + bool first = true; info.flags = 0; while ((dfrag = mptcp_send_head(sk))) { @@ -1537,10 +1521,17 @@ static void __mptcp_subflow_push_pending(struct sock *sk, struct sock *ssk) while (len > 0) { int ret = 0; - /* do auto tuning */ - if (!(sk->sk_userlocks & SOCK_SNDBUF_LOCK) && - ssk->sk_sndbuf > READ_ONCE(sk->sk_sndbuf)) - WRITE_ONCE(sk->sk_sndbuf, ssk->sk_sndbuf); + /* the caller already invoked the packet scheduler, + * check for a different subflow usage only after + * spooling the first chunk of data + */ + xmit_ssk = first ? ssk : mptcp_subflow_get_send(mptcp_sk(sk)); + if (!xmit_ssk) + goto out; + if (xmit_ssk != ssk) { + mptcp_subflow_delegate(mptcp_subflow_ctx(xmit_ssk)); + goto out; + } if (unlikely(mptcp_must_reclaim_memory(sk, ssk))) { __mptcp_update_wmem(sk); @@ -1560,6 +1551,7 @@ static void __mptcp_subflow_push_pending(struct sock *sk, struct sock *ssk) msk->tx_pending_data -= ret; copied += ret; len -= ret; + first = false; } WRITE_ONCE(msk->first_pending, mptcp_send_next(sk)); } @@ -1579,6 +1571,15 @@ out: } } +static void mptcp_set_nospace(struct sock *sk) +{ + /* enable autotune */ + set_bit(SOCK_NOSPACE, &sk->sk_socket->flags); + + /* will be cleared on avail space */ + set_bit(MPTCP_NOSPACE, &mptcp_sk(sk)->flags); +} + static int mptcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) { struct mptcp_sock *msk = mptcp_sk(sk); @@ -1680,7 +1681,7 @@ static int mptcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) continue; wait_for_memory: - set_bit(MPTCP_NOSPACE, &msk->flags); + mptcp_set_nospace(sk); mptcp_push_pending(sk, msg->msg_flags); ret = sk_stream_wait_memory(sk, &timeo); if (ret) @@ -2116,9 +2117,6 @@ static struct sock *mptcp_subflow_get_retrans(const struct mptcp_sock *msk) void __mptcp_close_ssk(struct sock *sk, struct sock *ssk, struct mptcp_subflow_context *subflow) { - bool dispose_socket = false; - struct socket *sock; - list_del(&subflow->node); lock_sock_nested(ssk, SINGLE_DEPTH_NESTING); @@ -2126,11 +2124,8 @@ void __mptcp_close_ssk(struct sock *sk, struct sock *ssk, /* if we are invoked by the msk cleanup code, the subflow is * already orphaned */ - sock = ssk->sk_socket; - if (sock) { - dispose_socket = sock != sk->sk_socket; + if (ssk->sk_socket) sock_orphan(ssk); - } subflow->disposable = 1; @@ -2148,8 +2143,6 @@ void __mptcp_close_ssk(struct sock *sk, struct sock *ssk, __sock_put(ssk); } release_sock(ssk); - if (dispose_socket) - iput(SOCK_INODE(sock)); sock_put(ssk); } @@ -2536,6 +2529,12 @@ static void __mptcp_destroy_sock(struct sock *sk) pr_debug("msk=%p", msk); + /* dispose the ancillatory tcp socket, if any */ + if (msk->subflow) { + iput(SOCK_INODE(msk->subflow)); + msk->subflow = NULL; + } + /* be sure to always acquire the join list lock, to sync vs * mptcp_finish_join(). */ @@ -2586,20 +2585,10 @@ cleanup: inet_csk(sk)->icsk_mtup.probe_timestamp = tcp_jiffies32; list_for_each_entry(subflow, &mptcp_sk(sk)->conn_list, node) { struct sock *ssk = mptcp_subflow_tcp_sock(subflow); - bool slow, dispose_socket; - struct socket *sock; + bool slow = lock_sock_fast(ssk); - slow = lock_sock_fast(ssk); - sock = ssk->sk_socket; - dispose_socket = sock && sock != sk->sk_socket; sock_orphan(ssk); unlock_sock_fast(ssk, slow); - - /* for the outgoing subflows we additionally need to free - * the associated socket - */ - if (dispose_socket) - iput(SOCK_INODE(sock)); } sock_orphan(sk); @@ -2928,10 +2917,16 @@ void __mptcp_check_push(struct sock *sk, struct sock *ssk) if (!mptcp_send_head(sk)) return; - if (!sock_owned_by_user(sk)) - __mptcp_subflow_push_pending(sk, ssk); - else + if (!sock_owned_by_user(sk)) { + struct sock *xmit_ssk = mptcp_subflow_get_send(mptcp_sk(sk)); + + if (xmit_ssk == ssk) + __mptcp_subflow_push_pending(sk, ssk); + else if (xmit_ssk) + mptcp_subflow_delegate(mptcp_subflow_ctx(xmit_ssk)); + } else { set_bit(MPTCP_PUSH_PENDING, &mptcp_sk(sk)->flags); + } } #define MPTCP_DEFERRED_ALL (TCPF_WRITE_TIMER_DEFERRED) @@ -2979,6 +2974,20 @@ static void mptcp_release_cb(struct sock *sk) } } +void mptcp_subflow_process_delegated(struct sock *ssk) +{ + struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk); + struct sock *sk = subflow->conn; + + mptcp_data_lock(sk); + if (!sock_owned_by_user(sk)) + __mptcp_subflow_push_pending(sk, ssk); + else + set_bit(MPTCP_PUSH_PENDING, &mptcp_sk(sk)->flags); + mptcp_data_unlock(sk); + mptcp_subflow_delegated_done(subflow); +} + static int mptcp_hash(struct sock *sk) { /* should never be called, @@ -3041,7 +3050,7 @@ void mptcp_finish_connect(struct sock *ssk) mptcp_rcv_space_init(msk, ssk); } -static void mptcp_sock_graft(struct sock *sk, struct socket *parent) +void mptcp_sock_graft(struct sock *sk, struct socket *parent) { write_lock_bh(&sk->sk_callback_lock); rcu_assign_pointer(sk->sk_wq, &parent->wq); @@ -3284,6 +3293,7 @@ static int mptcp_stream_accept(struct socket *sock, struct socket *newsock, mptcp_copy_inaddrs(newsk, msk->first); mptcp_rcv_space_init(msk, msk->first); + mptcp_propagate_sndbuf(newsk, msk->first); /* set ssk->sk_socket of accept()ed flows to mptcp socket. * This is needed so NOSPACE flag can be set from tcp stack. @@ -3324,7 +3334,7 @@ static __poll_t mptcp_check_writeable(struct mptcp_sock *msk) if (sk_stream_is_writeable(sk)) return EPOLLOUT | EPOLLWRNORM; - set_bit(MPTCP_NOSPACE, &msk->flags); + mptcp_set_nospace(sk); smp_mb__after_atomic(); /* msk->flags is changed by write_space cb */ if (sk_stream_is_writeable(sk)) return EPOLLOUT | EPOLLWRNORM; @@ -3388,13 +3398,58 @@ static struct inet_protosw mptcp_protosw = { .flags = INET_PROTOSW_ICSK, }; +static int mptcp_napi_poll(struct napi_struct *napi, int budget) +{ + struct mptcp_delegated_action *delegated; + struct mptcp_subflow_context *subflow; + int work_done = 0; + + delegated = container_of(napi, struct mptcp_delegated_action, napi); + while ((subflow = mptcp_subflow_delegated_next(delegated)) != NULL) { + struct sock *ssk = mptcp_subflow_tcp_sock(subflow); + + bh_lock_sock_nested(ssk); + if (!sock_owned_by_user(ssk) && + mptcp_subflow_has_delegated_action(subflow)) + mptcp_subflow_process_delegated(ssk); + /* ... elsewhere tcp_release_cb_override already processed + * the action or will do at next release_sock(). + * In both case must dequeue the subflow here - on the same + * CPU that scheduled it. + */ + bh_unlock_sock(ssk); + sock_put(ssk); + + if (++work_done == budget) + return budget; + } + + /* always provide a 0 'work_done' argument, so that napi_complete_done + * will not try accessing the NULL napi->dev ptr + */ + napi_complete_done(napi, 0); + return work_done; +} + void __init mptcp_proto_init(void) { + struct mptcp_delegated_action *delegated; + int cpu; + mptcp_prot.h.hashinfo = tcp_prot.h.hashinfo; if (percpu_counter_init(&mptcp_sockets_allocated, 0, GFP_KERNEL)) panic("Failed to allocate MPTCP pcpu counter\n"); + init_dummy_netdev(&mptcp_napi_dev); + for_each_possible_cpu(cpu) { + delegated = per_cpu_ptr(&mptcp_delegated_actions, cpu); + INIT_LIST_HEAD(&delegated->head); + netif_tx_napi_add(&mptcp_napi_dev, &delegated->napi, mptcp_napi_poll, + NAPI_POLL_WEIGHT); + napi_enable(&delegated->napi); + } + mptcp_subflow_init(); mptcp_pm_init(); mptcp_token_init(); diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h index d6400ad2d615..1460705aaad0 100644 --- a/net/mptcp/protocol.h +++ b/net/mptcp/protocol.h @@ -378,6 +378,15 @@ enum mptcp_data_avail { MPTCP_SUBFLOW_OOO_DATA }; +struct mptcp_delegated_action { + struct napi_struct napi; + struct list_head head; +}; + +DECLARE_PER_CPU(struct mptcp_delegated_action, mptcp_delegated_actions); + +#define MPTCP_DELEGATE_SEND 0 + /* MPTCP subflow context */ struct mptcp_subflow_context { struct list_head node;/* conn_list of subflows */ @@ -415,6 +424,9 @@ struct mptcp_subflow_context { u8 local_id; u8 remote_id; + long delegated_status; + struct list_head delegated_node; /* link into delegated_action, protected by local BH */ + struct sock *tcp_sock; /* tcp sk backpointer */ struct sock *conn; /* parent mptcp_sock */ const struct inet_connection_sock_af_ops *icsk_af_ops; @@ -463,6 +475,61 @@ static inline void mptcp_add_pending_subflow(struct mptcp_sock *msk, spin_unlock_bh(&msk->join_list_lock); } +void mptcp_subflow_process_delegated(struct sock *ssk); + +static inline void mptcp_subflow_delegate(struct mptcp_subflow_context *subflow) +{ + struct mptcp_delegated_action *delegated; + bool schedule; + + /* The implied barrier pairs with mptcp_subflow_delegated_done(), and + * ensures the below list check sees list updates done prior to status + * bit changes + */ + if (!test_and_set_bit(MPTCP_DELEGATE_SEND, &subflow->delegated_status)) { + /* still on delegated list from previous scheduling */ + if (!list_empty(&subflow->delegated_node)) + return; + + /* the caller held the subflow bh socket lock */ + lockdep_assert_in_softirq(); + + delegated = this_cpu_ptr(&mptcp_delegated_actions); + schedule = list_empty(&delegated->head); + list_add_tail(&subflow->delegated_node, &delegated->head); + sock_hold(mptcp_subflow_tcp_sock(subflow)); + if (schedule) + napi_schedule(&delegated->napi); + } +} + +static inline struct mptcp_subflow_context * +mptcp_subflow_delegated_next(struct mptcp_delegated_action *delegated) +{ + struct mptcp_subflow_context *ret; + + if (list_empty(&delegated->head)) + return NULL; + + ret = list_first_entry(&delegated->head, struct mptcp_subflow_context, delegated_node); + list_del_init(&ret->delegated_node); + return ret; +} + +static inline bool mptcp_subflow_has_delegated_action(const struct mptcp_subflow_context *subflow) +{ + return test_bit(MPTCP_DELEGATE_SEND, &subflow->delegated_status); +} + +static inline void mptcp_subflow_delegated_done(struct mptcp_subflow_context *subflow) +{ + /* pairs with mptcp_subflow_delegate, ensures delegate_node is updated before + * touching the status bit + */ + smp_wmb(); + clear_bit(MPTCP_DELEGATE_SEND, &subflow->delegated_status); +} + int mptcp_is_enabled(struct net *net); unsigned int mptcp_get_add_addr_timeout(struct net *net); void mptcp_subflow_fully_established(struct mptcp_subflow_context *subflow, @@ -473,6 +540,7 @@ void mptcp_subflow_shutdown(struct sock *sk, struct sock *ssk, int how); void __mptcp_close_ssk(struct sock *sk, struct sock *ssk, struct mptcp_subflow_context *subflow); void mptcp_subflow_reset(struct sock *ssk); +void mptcp_sock_graft(struct sock *sk, struct socket *parent); /* called with sk socket lock held */ int __mptcp_subflow_connect(struct sock *sk, const struct mptcp_addr_info *loc, @@ -521,6 +589,25 @@ static inline bool mptcp_data_fin_enabled(const struct mptcp_sock *msk) READ_ONCE(msk->write_seq) == READ_ONCE(msk->snd_nxt); } +static inline bool mptcp_propagate_sndbuf(struct sock *sk, struct sock *ssk) +{ + if ((sk->sk_userlocks & SOCK_SNDBUF_LOCK) || ssk->sk_sndbuf <= READ_ONCE(sk->sk_sndbuf)) + return false; + + WRITE_ONCE(sk->sk_sndbuf, ssk->sk_sndbuf); + return true; +} + +static inline void mptcp_write_space(struct sock *sk) +{ + if (sk_stream_is_writeable(sk)) { + /* pairs with memory barrier in mptcp_poll */ + smp_mb(); + if (test_and_clear_bit(MPTCP_NOSPACE, &mptcp_sk(sk)->flags)) + sk_stream_write_space(sk); + } +} + void mptcp_destroy_common(struct mptcp_sock *msk); void __init mptcp_token_init(void); diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c index 278cbe3e539e..586156281e5a 100644 --- a/net/mptcp/subflow.c +++ b/net/mptcp/subflow.c @@ -18,12 +18,15 @@ #include <net/tcp.h> #if IS_ENABLED(CONFIG_MPTCP_IPV6) #include <net/ip6_route.h> +#include <net/transp_v6.h> #endif #include <net/mptcp.h> #include <uapi/linux/mptcp.h> #include "protocol.h" #include "mib.h" +static void mptcp_subflow_ops_undo_override(struct sock *ssk); + static void SUBFLOW_REQ_INC_STATS(struct request_sock *req, enum linux_mptcp_mib_field field) { @@ -343,6 +346,7 @@ static void subflow_finish_connect(struct sock *sk, const struct sk_buff *skb) if (subflow->conn_finished) return; + mptcp_propagate_sndbuf(parent, sk); subflow->rel_write_seq = 1; subflow->conn_finished = 1; subflow->ssn_offset = TCP_SKB_CB(skb)->seq; @@ -427,6 +431,7 @@ drop: static struct tcp_request_sock_ops subflow_request_sock_ipv6_ops; static struct inet_connection_sock_af_ops subflow_v6_specific; static struct inet_connection_sock_af_ops subflow_v6m_specific; +static struct proto tcpv6_prot_override; static int subflow_v6_conn_request(struct sock *sk, struct sk_buff *skb) { @@ -508,6 +513,8 @@ static void subflow_ulp_fallback(struct sock *sk, icsk->icsk_ulp_ops = NULL; rcu_assign_pointer(icsk->icsk_ulp_data, NULL); tcp_sk(sk)->is_mptcp = 0; + + mptcp_subflow_ops_undo_override(sk); } static void subflow_drop_ctx(struct sock *ssk) @@ -681,6 +688,7 @@ dispose_child: } static struct inet_connection_sock_af_ops subflow_specific; +static struct proto tcp_prot_override; enum mapping_status { MAPPING_OK, @@ -1040,7 +1048,10 @@ static void subflow_data_ready(struct sock *sk) static void subflow_write_space(struct sock *ssk) { - /* we take action in __mptcp_clean_una() */ + struct sock *sk = mptcp_subflow_ctx(ssk)->conn; + + mptcp_propagate_sndbuf(sk, ssk); + mptcp_write_space(sk); } static struct inet_connection_sock_af_ops * @@ -1074,21 +1085,31 @@ void mptcpv6_handle_mapped(struct sock *sk, bool mapped) #endif static void mptcp_info2sockaddr(const struct mptcp_addr_info *info, - struct sockaddr_storage *addr) + struct sockaddr_storage *addr, + unsigned short family) { memset(addr, 0, sizeof(*addr)); - addr->ss_family = info->family; + addr->ss_family = family; if (addr->ss_family == AF_INET) { struct sockaddr_in *in_addr = (struct sockaddr_in *)addr; - in_addr->sin_addr = info->addr; + if (info->family == AF_INET) + in_addr->sin_addr = info->addr; +#if IS_ENABLED(CONFIG_MPTCP_IPV6) + else if (ipv6_addr_v4mapped(&info->addr6)) + in_addr->sin_addr.s_addr = info->addr6.s6_addr32[3]; +#endif in_addr->sin_port = info->port; } #if IS_ENABLED(CONFIG_MPTCP_IPV6) else if (addr->ss_family == AF_INET6) { struct sockaddr_in6 *in6_addr = (struct sockaddr_in6 *)addr; - in6_addr->sin6_addr = info->addr6; + if (info->family == AF_INET) + ipv6_addr_set_v4mapped(info->addr.s_addr, + &in6_addr->sin6_addr); + else + in6_addr->sin6_addr = info->addr6; in6_addr->sin6_port = info->port; } #endif @@ -1132,11 +1153,11 @@ int __mptcp_subflow_connect(struct sock *sk, const struct mptcp_addr_info *loc, subflow->remote_key = msk->remote_key; subflow->local_key = msk->local_key; subflow->token = msk->token; - mptcp_info2sockaddr(loc, &addr); + mptcp_info2sockaddr(loc, &addr, ssk->sk_family); addrlen = sizeof(struct sockaddr_in); #if IS_ENABLED(CONFIG_MPTCP_IPV6) - if (loc->family == AF_INET6) + if (addr.ss_family == AF_INET6) addrlen = sizeof(struct sockaddr_in6); #endif ssk->sk_bound_dev_if = loc->ifindex; @@ -1152,13 +1173,16 @@ int __mptcp_subflow_connect(struct sock *sk, const struct mptcp_addr_info *loc, subflow->remote_id = remote_id; subflow->request_join = 1; subflow->request_bkup = !!(loc->flags & MPTCP_PM_ADDR_FLAG_BACKUP); - mptcp_info2sockaddr(remote, &addr); + mptcp_info2sockaddr(remote, &addr, ssk->sk_family); mptcp_add_pending_subflow(msk, subflow); err = kernel_connect(sf, (struct sockaddr *)&addr, addrlen, O_NONBLOCK); if (err && err != -EINPROGRESS) goto failed_unlink; + /* discard the subflow socket */ + mptcp_sock_graft(ssk, sk->sk_socket); + iput(SOCK_INODE(sf)); return err; failed_unlink: @@ -1196,6 +1220,25 @@ static void mptcp_attach_cgroup(struct sock *parent, struct sock *child) #endif /* CONFIG_SOCK_CGROUP_DATA */ } +static void mptcp_subflow_ops_override(struct sock *ssk) +{ +#if IS_ENABLED(CONFIG_MPTCP_IPV6) + if (ssk->sk_prot == &tcpv6_prot) + ssk->sk_prot = &tcpv6_prot_override; + else +#endif + ssk->sk_prot = &tcp_prot_override; +} + +static void mptcp_subflow_ops_undo_override(struct sock *ssk) +{ +#if IS_ENABLED(CONFIG_MPTCP_IPV6) + if (ssk->sk_prot == &tcpv6_prot_override) + ssk->sk_prot = &tcpv6_prot; + else +#endif + ssk->sk_prot = &tcp_prot; +} int mptcp_subflow_create_socket(struct sock *sk, struct socket **new_sock) { struct mptcp_subflow_context *subflow; @@ -1251,6 +1294,7 @@ int mptcp_subflow_create_socket(struct sock *sk, struct socket **new_sock) *new_sock = sf; sock_hold(sk); subflow->conn = sk; + mptcp_subflow_ops_override(sf->sk); return 0; } @@ -1267,6 +1311,7 @@ static struct mptcp_subflow_context *subflow_create_ctx(struct sock *sk, rcu_assign_pointer(icsk->icsk_ulp_data, ctx); INIT_LIST_HEAD(&ctx->node); + INIT_LIST_HEAD(&ctx->delegated_node); pr_debug("subflow=%p", ctx); @@ -1299,6 +1344,7 @@ static void subflow_state_change(struct sock *sk) __subflow_state_change(sk); if (subflow_simultaneous_connect(sk)) { + mptcp_propagate_sndbuf(parent, sk); mptcp_do_fallback(sk); mptcp_rcv_space_init(mptcp_sk(parent), sk); pr_fallback(mptcp_sk(parent)); @@ -1378,6 +1424,7 @@ static void subflow_ulp_release(struct sock *ssk) sock_put(sk); } + mptcp_subflow_ops_undo_override(ssk); if (release) kfree_rcu(ctx, rcu); } @@ -1431,6 +1478,16 @@ static void subflow_ulp_clone(const struct request_sock *req, } } +static void tcp_release_cb_override(struct sock *ssk) +{ + struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk); + + if (mptcp_subflow_has_delegated_action(subflow)) + mptcp_subflow_process_delegated(ssk); + + tcp_release_cb(ssk); +} + static struct tcp_ulp_ops subflow_ulp_ops __read_mostly = { .name = "mptcp", .owner = THIS_MODULE, @@ -1471,6 +1528,9 @@ void __init mptcp_subflow_init(void) subflow_specific.syn_recv_sock = subflow_syn_recv_sock; subflow_specific.sk_rx_dst_set = subflow_finish_connect; + tcp_prot_override = tcp_prot; + tcp_prot_override.release_cb = tcp_release_cb_override; + #if IS_ENABLED(CONFIG_MPTCP_IPV6) subflow_request_sock_ipv6_ops = tcp_request_sock_ipv6_ops; subflow_request_sock_ipv6_ops.route_req = subflow_v6_route_req; @@ -1486,6 +1546,9 @@ void __init mptcp_subflow_init(void) subflow_v6m_specific.net_header_len = ipv4_specific.net_header_len; subflow_v6m_specific.mtu_reduced = ipv4_specific.mtu_reduced; subflow_v6m_specific.net_frag_header_len = 0; + + tcpv6_prot_override = tcpv6_prot; + tcpv6_prot_override.release_cb = tcp_release_cb_override; #endif mptcp_diag_subflow_init(&subflow_ulp_ops); diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig index 49fbef0d99be..1a92063c73a4 100644 --- a/net/netfilter/Kconfig +++ b/net/netfilter/Kconfig @@ -1,6 +1,6 @@ # SPDX-License-Identifier: GPL-2.0-only menu "Core Netfilter Configuration" - depends on NET && INET && NETFILTER + depends on INET && NETFILTER config NETFILTER_INGRESS bool "Netfilter ingress support" diff --git a/net/netfilter/ipvs/Kconfig b/net/netfilter/ipvs/Kconfig index eb0e329f9b8d..c39a1e35c104 100644 --- a/net/netfilter/ipvs/Kconfig +++ b/net/netfilter/ipvs/Kconfig @@ -4,7 +4,7 @@ # menuconfig IP_VS tristate "IP virtual server support" - depends on NET && INET && NETFILTER + depends on INET && NETFILTER depends on (NF_CONNTRACK || NF_CONNTRACK=n) help IP Virtual Server support will let you build a high-performance diff --git a/net/nfc/Kconfig b/net/nfc/Kconfig index 96b91674dd37..466a0279b93e 100644 --- a/net/nfc/Kconfig +++ b/net/nfc/Kconfig @@ -4,7 +4,6 @@ # menuconfig NFC - depends on NET depends on RFKILL || !RFKILL tristate "NFC subsystem support" default n diff --git a/net/psample/Kconfig b/net/psample/Kconfig index 028f514a9c60..be0b839209ba 100644 --- a/net/psample/Kconfig +++ b/net/psample/Kconfig @@ -4,7 +4,6 @@ # menuconfig PSAMPLE - depends on NET tristate "Packet-sampling netlink channel" default n help diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index 6fe4e5cc807c..e2e4353db8a7 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -1866,7 +1866,8 @@ static int tclass_notify(struct net *net, struct sk_buff *oskb, static int tclass_del_notify(struct net *net, const struct Qdisc_class_ops *cops, struct sk_buff *oskb, struct nlmsghdr *n, - struct Qdisc *q, unsigned long cl) + struct Qdisc *q, unsigned long cl, + struct netlink_ext_ack *extack) { u32 portid = oskb ? NETLINK_CB(oskb).portid : 0; struct sk_buff *skb; @@ -1885,7 +1886,7 @@ static int tclass_del_notify(struct net *net, return -EINVAL; } - err = cops->delete(q, cl); + err = cops->delete(q, cl, extack); if (err) { kfree_skb(skb); return err; @@ -2088,7 +2089,7 @@ static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n, goto out; break; case RTM_DELTCLASS: - err = tclass_del_notify(net, cops, skb, n, q, cl); + err = tclass_del_notify(net, cops, skb, n, q, cl, extack); /* Unbind the class with flilters with 0 */ tc_bind_tclass(q, portid, clid, 0); goto out; diff --git a/net/sched/sch_atm.c b/net/sched/sch_atm.c index 007bd2d9f1ff..d0c9a57398fc 100644 --- a/net/sched/sch_atm.c +++ b/net/sched/sch_atm.c @@ -320,7 +320,8 @@ err_out: return error; } -static int atm_tc_delete(struct Qdisc *sch, unsigned long arg) +static int atm_tc_delete(struct Qdisc *sch, unsigned long arg, + struct netlink_ext_ack *extack) { struct atm_qdisc_data *p = qdisc_priv(sch); struct atm_flow_data *flow = (struct atm_flow_data *)arg; diff --git a/net/sched/sch_cbq.c b/net/sched/sch_cbq.c index 53d45e029c36..320b3d31fa97 100644 --- a/net/sched/sch_cbq.c +++ b/net/sched/sch_cbq.c @@ -1675,7 +1675,8 @@ failure: return err; } -static int cbq_delete(struct Qdisc *sch, unsigned long arg) +static int cbq_delete(struct Qdisc *sch, unsigned long arg, + struct netlink_ext_ack *extack) { struct cbq_sched_data *q = qdisc_priv(sch); struct cbq_class *cl = (struct cbq_class *)arg; diff --git a/net/sched/sch_drr.c b/net/sched/sch_drr.c index dde564670ad8..fc1e47069593 100644 --- a/net/sched/sch_drr.c +++ b/net/sched/sch_drr.c @@ -146,7 +146,8 @@ static void drr_destroy_class(struct Qdisc *sch, struct drr_class *cl) kfree(cl); } -static int drr_delete_class(struct Qdisc *sch, unsigned long arg) +static int drr_delete_class(struct Qdisc *sch, unsigned long arg, + struct netlink_ext_ack *extack) { struct drr_sched *q = qdisc_priv(sch); struct drr_class *cl = (struct drr_class *)arg; diff --git a/net/sched/sch_dsmark.c b/net/sched/sch_dsmark.c index 2b88710994d7..cd2748e2d4a2 100644 --- a/net/sched/sch_dsmark.c +++ b/net/sched/sch_dsmark.c @@ -150,7 +150,8 @@ errout: return err; } -static int dsmark_delete(struct Qdisc *sch, unsigned long arg) +static int dsmark_delete(struct Qdisc *sch, unsigned long arg, + struct netlink_ext_ack *extack) { struct dsmark_qdisc_data *p = qdisc_priv(sch); diff --git a/net/sched/sch_hfsc.c b/net/sched/sch_hfsc.c index d1902fca9844..bf0034c66e35 100644 --- a/net/sched/sch_hfsc.c +++ b/net/sched/sch_hfsc.c @@ -1090,7 +1090,8 @@ hfsc_destroy_class(struct Qdisc *sch, struct hfsc_class *cl) } static int -hfsc_delete_class(struct Qdisc *sch, unsigned long arg) +hfsc_delete_class(struct Qdisc *sch, unsigned long arg, + struct netlink_ext_ack *extack) { struct hfsc_sched *q = qdisc_priv(sch); struct hfsc_class *cl = (struct hfsc_class *)arg; diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c index cd70dbcbd72f..dff3adf5a915 100644 --- a/net/sched/sch_htb.c +++ b/net/sched/sch_htb.c @@ -114,6 +114,7 @@ struct htb_class { * Written often fields */ struct gnet_stats_basic_packed bstats; + struct gnet_stats_basic_packed bstats_bias; struct tc_htb_xstats xstats; /* our special stats */ /* token bucket parameters */ @@ -174,6 +175,11 @@ struct htb_sched { int row_mask[TC_HTB_MAXDEPTH]; struct htb_level hlevel[TC_HTB_MAXDEPTH]; + + struct Qdisc **direct_qdiscs; + unsigned int num_direct_qdiscs; + + bool offload; }; /* find class in global hash table using given handle */ @@ -957,7 +963,7 @@ static void htb_reset(struct Qdisc *sch) if (cl->level) memset(&cl->inner, 0, sizeof(cl->inner)); else { - if (cl->leaf.q) + if (cl->leaf.q && !q->offload) qdisc_reset(cl->leaf.q); } cl->prio_activity = 0; @@ -980,6 +986,7 @@ static const struct nla_policy htb_policy[TCA_HTB_MAX + 1] = { [TCA_HTB_DIRECT_QLEN] = { .type = NLA_U32 }, [TCA_HTB_RATE64] = { .type = NLA_U64 }, [TCA_HTB_CEIL64] = { .type = NLA_U64 }, + [TCA_HTB_OFFLOAD] = { .type = NLA_FLAG }, }; static void htb_work_func(struct work_struct *work) @@ -992,12 +999,27 @@ static void htb_work_func(struct work_struct *work) rcu_read_unlock(); } +static void htb_set_lockdep_class_child(struct Qdisc *q) +{ + static struct lock_class_key child_key; + + lockdep_set_class(qdisc_lock(q), &child_key); +} + +static int htb_offload(struct net_device *dev, struct tc_htb_qopt_offload *opt) +{ + return dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_QDISC_HTB, opt); +} + static int htb_init(struct Qdisc *sch, struct nlattr *opt, struct netlink_ext_ack *extack) { + struct net_device *dev = qdisc_dev(sch); + struct tc_htb_qopt_offload offload_opt; struct htb_sched *q = qdisc_priv(sch); struct nlattr *tb[TCA_HTB_MAX + 1]; struct tc_htb_glob *gopt; + unsigned int ntx; int err; qdisc_watchdog_init(&q->watchdog, sch); @@ -1022,9 +1044,26 @@ static int htb_init(struct Qdisc *sch, struct nlattr *opt, if (gopt->version != HTB_VER >> 16) return -EINVAL; + q->offload = nla_get_flag(tb[TCA_HTB_OFFLOAD]); + + if (q->offload) { + if (sch->parent != TC_H_ROOT) + return -EOPNOTSUPP; + + if (!tc_can_offload(dev) || !dev->netdev_ops->ndo_setup_tc) + return -EOPNOTSUPP; + + q->num_direct_qdiscs = dev->real_num_tx_queues; + q->direct_qdiscs = kcalloc(q->num_direct_qdiscs, + sizeof(*q->direct_qdiscs), + GFP_KERNEL); + if (!q->direct_qdiscs) + return -ENOMEM; + } + err = qdisc_class_hash_init(&q->clhash); if (err < 0) - return err; + goto err_free_direct_qdiscs; qdisc_skb_head_init(&q->direct_queue); @@ -1037,7 +1076,107 @@ static int htb_init(struct Qdisc *sch, struct nlattr *opt, q->rate2quantum = 1; q->defcls = gopt->defcls; + if (!q->offload) + return 0; + + for (ntx = 0; ntx < q->num_direct_qdiscs; ntx++) { + struct netdev_queue *dev_queue = netdev_get_tx_queue(dev, ntx); + struct Qdisc *qdisc; + + qdisc = qdisc_create_dflt(dev_queue, &pfifo_qdisc_ops, + TC_H_MAKE(sch->handle, 0), extack); + if (!qdisc) { + err = -ENOMEM; + goto err_free_qdiscs; + } + + htb_set_lockdep_class_child(qdisc); + q->direct_qdiscs[ntx] = qdisc; + qdisc->flags |= TCQ_F_ONETXQUEUE | TCQ_F_NOPARENT; + } + + sch->flags |= TCQ_F_MQROOT; + + offload_opt = (struct tc_htb_qopt_offload) { + .command = TC_HTB_CREATE, + .parent_classid = TC_H_MAJ(sch->handle) >> 16, + .classid = TC_H_MIN(q->defcls), + .extack = extack, + }; + err = htb_offload(dev, &offload_opt); + if (err) + goto err_free_qdiscs; + return 0; + +err_free_qdiscs: + /* TC_HTB_CREATE call failed, avoid any further calls to the driver. */ + q->offload = false; + + for (ntx = 0; ntx < q->num_direct_qdiscs && q->direct_qdiscs[ntx]; + ntx++) + qdisc_put(q->direct_qdiscs[ntx]); + + qdisc_class_hash_destroy(&q->clhash); + /* Prevent use-after-free and double-free when htb_destroy gets called. + */ + q->clhash.hash = NULL; + q->clhash.hashsize = 0; + +err_free_direct_qdiscs: + kfree(q->direct_qdiscs); + q->direct_qdiscs = NULL; + return err; +} + +static void htb_attach_offload(struct Qdisc *sch) +{ + struct net_device *dev = qdisc_dev(sch); + struct htb_sched *q = qdisc_priv(sch); + unsigned int ntx; + + for (ntx = 0; ntx < q->num_direct_qdiscs; ntx++) { + struct Qdisc *old, *qdisc = q->direct_qdiscs[ntx]; + + old = dev_graft_qdisc(qdisc->dev_queue, qdisc); + qdisc_put(old); + qdisc_hash_add(qdisc, false); + } + for (ntx = q->num_direct_qdiscs; ntx < dev->num_tx_queues; ntx++) { + struct netdev_queue *dev_queue = netdev_get_tx_queue(dev, ntx); + struct Qdisc *old = dev_graft_qdisc(dev_queue, NULL); + + qdisc_put(old); + } + + kfree(q->direct_qdiscs); + q->direct_qdiscs = NULL; +} + +static void htb_attach_software(struct Qdisc *sch) +{ + struct net_device *dev = qdisc_dev(sch); + unsigned int ntx; + + /* Resemble qdisc_graft behavior. */ + for (ntx = 0; ntx < dev->num_tx_queues; ntx++) { + struct netdev_queue *dev_queue = netdev_get_tx_queue(dev, ntx); + struct Qdisc *old = dev_graft_qdisc(dev_queue, sch); + + qdisc_refcount_inc(sch); + + qdisc_put(old); + } +} + +static void htb_attach(struct Qdisc *sch) +{ + struct htb_sched *q = qdisc_priv(sch); + + if (q->offload) + htb_attach_offload(sch); + else + htb_attach_software(sch); } static int htb_dump(struct Qdisc *sch, struct sk_buff *skb) @@ -1046,6 +1185,11 @@ static int htb_dump(struct Qdisc *sch, struct sk_buff *skb) struct nlattr *nest; struct tc_htb_glob gopt; + if (q->offload) + sch->flags |= TCQ_F_OFFLOADED; + else + sch->flags &= ~TCQ_F_OFFLOADED; + sch->qstats.overlimits = q->overlimits; /* Its safe to not acquire qdisc lock. As we hold RTNL, * no change can happen on the qdisc parameters. @@ -1063,6 +1207,8 @@ static int htb_dump(struct Qdisc *sch, struct sk_buff *skb) if (nla_put(skb, TCA_HTB_INIT, sizeof(gopt), &gopt) || nla_put_u32(skb, TCA_HTB_DIRECT_QLEN, q->direct_qlen)) goto nla_put_failure; + if (q->offload && nla_put_flag(skb, TCA_HTB_OFFLOAD)) + goto nla_put_failure; return nla_nest_end(skb, nest); @@ -1075,6 +1221,7 @@ static int htb_dump_class(struct Qdisc *sch, unsigned long arg, struct sk_buff *skb, struct tcmsg *tcm) { struct htb_class *cl = (struct htb_class *)arg; + struct htb_sched *q = qdisc_priv(sch); struct nlattr *nest; struct tc_htb_opt opt; @@ -1101,6 +1248,8 @@ static int htb_dump_class(struct Qdisc *sch, unsigned long arg, opt.level = cl->level; if (nla_put(skb, TCA_HTB_PARMS, sizeof(opt), &opt)) goto nla_put_failure; + if (q->offload && nla_put_flag(skb, TCA_HTB_OFFLOAD)) + goto nla_put_failure; if ((cl->rate.rate_bytes_ps >= (1ULL << 32)) && nla_put_u64_64bit(skb, TCA_HTB_RATE64, cl->rate.rate_bytes_ps, TCA_HTB_PAD)) @@ -1117,10 +1266,39 @@ nla_put_failure: return -1; } +static void htb_offload_aggregate_stats(struct htb_sched *q, + struct htb_class *cl) +{ + struct htb_class *c; + unsigned int i; + + memset(&cl->bstats, 0, sizeof(cl->bstats)); + + for (i = 0; i < q->clhash.hashsize; i++) { + hlist_for_each_entry(c, &q->clhash.hash[i], common.hnode) { + struct htb_class *p = c; + + while (p && p->level < cl->level) + p = p->parent; + + if (p != cl) + continue; + + cl->bstats.bytes += c->bstats_bias.bytes; + cl->bstats.packets += c->bstats_bias.packets; + if (c->level == 0) { + cl->bstats.bytes += c->leaf.q->bstats.bytes; + cl->bstats.packets += c->leaf.q->bstats.packets; + } + } + } +} + static int htb_dump_class_stats(struct Qdisc *sch, unsigned long arg, struct gnet_dump *d) { struct htb_class *cl = (struct htb_class *)arg; + struct htb_sched *q = qdisc_priv(sch); struct gnet_stats_queue qs = { .drops = cl->drops, .overlimits = cl->overlimits, @@ -1135,6 +1313,19 @@ htb_dump_class_stats(struct Qdisc *sch, unsigned long arg, struct gnet_dump *d) cl->xstats.ctokens = clamp_t(s64, PSCHED_NS2TICKS(cl->ctokens), INT_MIN, INT_MAX); + if (q->offload) { + if (!cl->level) { + if (cl->leaf.q) + cl->bstats = cl->leaf.q->bstats; + else + memset(&cl->bstats, 0, sizeof(cl->bstats)); + cl->bstats.bytes += cl->bstats_bias.bytes; + cl->bstats.packets += cl->bstats_bias.packets; + } else { + htb_offload_aggregate_stats(q, cl); + } + } + if (gnet_stats_copy_basic(qdisc_root_sleeping_running(sch), d, NULL, &cl->bstats) < 0 || gnet_stats_copy_rate_est(d, &cl->rate_est) < 0 || @@ -1144,19 +1335,97 @@ htb_dump_class_stats(struct Qdisc *sch, unsigned long arg, struct gnet_dump *d) return gnet_stats_copy_app(d, &cl->xstats, sizeof(cl->xstats)); } +static struct netdev_queue * +htb_select_queue(struct Qdisc *sch, struct tcmsg *tcm) +{ + struct net_device *dev = qdisc_dev(sch); + struct tc_htb_qopt_offload offload_opt; + int err; + + offload_opt = (struct tc_htb_qopt_offload) { + .command = TC_HTB_LEAF_QUERY_QUEUE, + .classid = TC_H_MIN(tcm->tcm_parent), + }; + err = htb_offload(dev, &offload_opt); + if (err || offload_opt.qid >= dev->num_tx_queues) + return NULL; + return netdev_get_tx_queue(dev, offload_opt.qid); +} + +static struct Qdisc * +htb_graft_helper(struct netdev_queue *dev_queue, struct Qdisc *new_q) +{ + struct net_device *dev = dev_queue->dev; + struct Qdisc *old_q; + + if (dev->flags & IFF_UP) + dev_deactivate(dev); + old_q = dev_graft_qdisc(dev_queue, new_q); + if (new_q) + new_q->flags |= TCQ_F_ONETXQUEUE | TCQ_F_NOPARENT; + if (dev->flags & IFF_UP) + dev_activate(dev); + + return old_q; +} + +static void htb_offload_move_qdisc(struct Qdisc *sch, u16 qid_old, u16 qid_new) +{ + struct netdev_queue *queue_old, *queue_new; + struct net_device *dev = qdisc_dev(sch); + struct Qdisc *qdisc; + + queue_old = netdev_get_tx_queue(dev, qid_old); + queue_new = netdev_get_tx_queue(dev, qid_new); + + if (dev->flags & IFF_UP) + dev_deactivate(dev); + qdisc = dev_graft_qdisc(queue_old, NULL); + qdisc->dev_queue = queue_new; + qdisc = dev_graft_qdisc(queue_new, qdisc); + if (dev->flags & IFF_UP) + dev_activate(dev); + + WARN_ON(!(qdisc->flags & TCQ_F_BUILTIN)); +} + static int htb_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new, struct Qdisc **old, struct netlink_ext_ack *extack) { + struct netdev_queue *dev_queue = sch->dev_queue; struct htb_class *cl = (struct htb_class *)arg; + struct htb_sched *q = qdisc_priv(sch); + struct Qdisc *old_q; if (cl->level) return -EINVAL; - if (new == NULL && - (new = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops, - cl->common.classid, extack)) == NULL) - return -ENOBUFS; + + if (q->offload) { + dev_queue = new->dev_queue; + WARN_ON(dev_queue != cl->leaf.q->dev_queue); + } + + if (!new) { + new = qdisc_create_dflt(dev_queue, &pfifo_qdisc_ops, + cl->common.classid, extack); + if (!new) + return -ENOBUFS; + } + + if (q->offload) { + htb_set_lockdep_class_child(new); + /* One ref for cl->leaf.q, the other for dev_queue->qdisc. */ + qdisc_refcount_inc(new); + old_q = htb_graft_helper(dev_queue, new); + } *old = qdisc_replace(sch, new, &cl->leaf.q); + + if (q->offload) { + WARN_ON(old_q != *old); + qdisc_put(old_q); + } + return 0; } @@ -1184,9 +1453,10 @@ static inline int htb_parent_last_child(struct htb_class *cl) return 1; } -static void htb_parent_to_leaf(struct htb_sched *q, struct htb_class *cl, +static void htb_parent_to_leaf(struct Qdisc *sch, struct htb_class *cl, struct Qdisc *new_q) { + struct htb_sched *q = qdisc_priv(sch); struct htb_class *parent = cl->parent; WARN_ON(cl->level || !cl->leaf.q || cl->prio_activity); @@ -1204,6 +1474,76 @@ static void htb_parent_to_leaf(struct htb_sched *q, struct htb_class *cl, parent->cmode = HTB_CAN_SEND; } +static void htb_parent_to_leaf_offload(struct Qdisc *sch, + struct netdev_queue *dev_queue, + struct Qdisc *new_q) +{ + struct Qdisc *old_q; + + /* One ref for cl->leaf.q, the other for dev_queue->qdisc. */ + qdisc_refcount_inc(new_q); + old_q = htb_graft_helper(dev_queue, new_q); + WARN_ON(!(old_q->flags & TCQ_F_BUILTIN)); +} + +static int htb_destroy_class_offload(struct Qdisc *sch, struct htb_class *cl, + bool last_child, bool destroying, + struct netlink_ext_ack *extack) +{ + struct tc_htb_qopt_offload offload_opt; + struct Qdisc *q = cl->leaf.q; + struct Qdisc *old = NULL; + int err; + + if (cl->level) + return -EINVAL; + + WARN_ON(!q); + if (!destroying) { + /* On destroy of HTB, two cases are possible: + * 1. q is a normal qdisc, but q->dev_queue has noop qdisc. + * 2. q is a noop qdisc (for nodes that were inner), + * q->dev_queue is noop_netdev_queue. + */ + old = htb_graft_helper(q->dev_queue, NULL); + WARN_ON(!old); + WARN_ON(old != q); + } + + if (cl->parent) { + cl->parent->bstats_bias.bytes += q->bstats.bytes; + cl->parent->bstats_bias.packets += q->bstats.packets; + } + + offload_opt = (struct tc_htb_qopt_offload) { + .command = !last_child ? TC_HTB_LEAF_DEL : + destroying ? TC_HTB_LEAF_DEL_LAST_FORCE : + TC_HTB_LEAF_DEL_LAST, + .classid = cl->common.classid, + .extack = extack, + }; + err = htb_offload(qdisc_dev(sch), &offload_opt); + + if (!err || destroying) + qdisc_put(old); + else + htb_graft_helper(q->dev_queue, old); + + if (last_child) + return err; + + if (!err && offload_opt.moved_qid != 0) { + if (destroying) + q->dev_queue = netdev_get_tx_queue(qdisc_dev(sch), + offload_opt.qid); + else + htb_offload_move_qdisc(sch, offload_opt.moved_qid, + offload_opt.qid); + } + + return err; +} + static void htb_destroy_class(struct Qdisc *sch, struct htb_class *cl) { if (!cl->level) { @@ -1217,8 +1557,11 @@ static void htb_destroy_class(struct Qdisc *sch, struct htb_class *cl) static void htb_destroy(struct Qdisc *sch) { + struct net_device *dev = qdisc_dev(sch); + struct tc_htb_qopt_offload offload_opt; struct htb_sched *q = qdisc_priv(sch); struct hlist_node *next; + bool nonempty, changed; struct htb_class *cl; unsigned int i; @@ -1237,21 +1580,68 @@ static void htb_destroy(struct Qdisc *sch) cl->block = NULL; } } - for (i = 0; i < q->clhash.hashsize; i++) { - hlist_for_each_entry_safe(cl, next, &q->clhash.hash[i], - common.hnode) - htb_destroy_class(sch, cl); - } + + do { + nonempty = false; + changed = false; + for (i = 0; i < q->clhash.hashsize; i++) { + hlist_for_each_entry_safe(cl, next, &q->clhash.hash[i], + common.hnode) { + bool last_child; + + if (!q->offload) { + htb_destroy_class(sch, cl); + continue; + } + + nonempty = true; + + if (cl->level) + continue; + + changed = true; + + last_child = htb_parent_last_child(cl); + htb_destroy_class_offload(sch, cl, last_child, + true, NULL); + qdisc_class_hash_remove(&q->clhash, + &cl->common); + if (cl->parent) + cl->parent->children--; + if (last_child) + htb_parent_to_leaf(sch, cl, NULL); + htb_destroy_class(sch, cl); + } + } + } while (changed); + WARN_ON(nonempty); + qdisc_class_hash_destroy(&q->clhash); __qdisc_reset_queue(&q->direct_queue); + + if (!q->offload) + return; + + offload_opt = (struct tc_htb_qopt_offload) { + .command = TC_HTB_DESTROY, + }; + htb_offload(dev, &offload_opt); + + if (!q->direct_qdiscs) + return; + for (i = 0; i < q->num_direct_qdiscs && q->direct_qdiscs[i]; i++) + qdisc_put(q->direct_qdiscs[i]); + kfree(q->direct_qdiscs); } -static int htb_delete(struct Qdisc *sch, unsigned long arg) +static int htb_delete(struct Qdisc *sch, unsigned long arg, + struct netlink_ext_ack *extack) { struct htb_sched *q = qdisc_priv(sch); struct htb_class *cl = (struct htb_class *)arg; struct Qdisc *new_q = NULL; int last_child = 0; + int err; /* TODO: why don't allow to delete subtree ? references ? does * tc subsys guarantee us that in htb_destroy it holds no class @@ -1260,11 +1650,28 @@ static int htb_delete(struct Qdisc *sch, unsigned long arg) if (cl->children || cl->filter_cnt) return -EBUSY; - if (!cl->level && htb_parent_last_child(cl)) { - new_q = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops, + if (!cl->level && htb_parent_last_child(cl)) + last_child = 1; + + if (q->offload) { + err = htb_destroy_class_offload(sch, cl, last_child, false, + extack); + if (err) + return err; + } + + if (last_child) { + struct netdev_queue *dev_queue; + + dev_queue = q->offload ? cl->leaf.q->dev_queue : sch->dev_queue; + new_q = qdisc_create_dflt(dev_queue, &pfifo_qdisc_ops, cl->parent->common.classid, NULL); - last_child = 1; + if (q->offload) { + if (new_q) + htb_set_lockdep_class_child(new_q); + htb_parent_to_leaf_offload(sch, dev_queue, new_q); + } } sch_tree_lock(sch); @@ -1285,7 +1692,7 @@ static int htb_delete(struct Qdisc *sch, unsigned long arg) &q->hlevel[cl->level].wait_pq); if (last_child) - htb_parent_to_leaf(q, cl, new_q); + htb_parent_to_leaf(sch, cl, new_q); sch_tree_unlock(sch); @@ -1300,9 +1707,11 @@ static int htb_change_class(struct Qdisc *sch, u32 classid, int err = -EINVAL; struct htb_sched *q = qdisc_priv(sch); struct htb_class *cl = (struct htb_class *)*arg, *parent; + struct tc_htb_qopt_offload offload_opt; struct nlattr *opt = tca[TCA_OPTIONS]; struct nlattr *tb[TCA_HTB_MAX + 1]; struct Qdisc *parent_qdisc = NULL; + struct netdev_queue *dev_queue; struct tc_htb_opt *hopt; u64 rate64, ceil64; int warn = 0; @@ -1335,8 +1744,12 @@ static int htb_change_class(struct Qdisc *sch, u32 classid, qdisc_put_rtab(qdisc_get_rtab(&hopt->ceil, tb[TCA_HTB_CTAB], NULL)); + rate64 = tb[TCA_HTB_RATE64] ? nla_get_u64(tb[TCA_HTB_RATE64]) : 0; + ceil64 = tb[TCA_HTB_CEIL64] ? nla_get_u64(tb[TCA_HTB_CEIL64]) : 0; + if (!cl) { /* new class */ - struct Qdisc *new_q; + struct net_device *dev = qdisc_dev(sch); + struct Qdisc *new_q, *old_q; int prio; struct { struct nlattr nla; @@ -1379,11 +1792,8 @@ static int htb_change_class(struct Qdisc *sch, u32 classid, NULL, qdisc_root_sleeping_running(sch), tca[TCA_RATE] ? : &est.nla); - if (err) { - tcf_block_put(cl->block); - kfree(cl); - goto failure; - } + if (err) + goto err_block_put; } cl->children = 0; @@ -1392,12 +1802,76 @@ static int htb_change_class(struct Qdisc *sch, u32 classid, for (prio = 0; prio < TC_HTB_NUMPRIO; prio++) RB_CLEAR_NODE(&cl->node[prio]); + cl->common.classid = classid; + + /* Make sure nothing interrupts us in between of two + * ndo_setup_tc calls. + */ + ASSERT_RTNL(); + /* create leaf qdisc early because it uses kmalloc(GFP_KERNEL) * so that can't be used inside of sch_tree_lock * -- thanks to Karlis Peisenieks */ - new_q = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops, + if (!q->offload) { + dev_queue = sch->dev_queue; + } else if (!(parent && !parent->level)) { + /* Assign a dev_queue to this classid. */ + offload_opt = (struct tc_htb_qopt_offload) { + .command = TC_HTB_LEAF_ALLOC_QUEUE, + .classid = cl->common.classid, + .parent_classid = parent ? + TC_H_MIN(parent->common.classid) : + TC_HTB_CLASSID_ROOT, + .rate = max_t(u64, hopt->rate.rate, rate64), + .ceil = max_t(u64, hopt->ceil.rate, ceil64), + .extack = extack, + }; + err = htb_offload(dev, &offload_opt); + if (err) { + pr_err("htb: TC_HTB_LEAF_ALLOC_QUEUE failed with err = %d\n", + err); + goto err_kill_estimator; + } + dev_queue = netdev_get_tx_queue(dev, offload_opt.qid); + } else { /* First child. */ + dev_queue = parent->leaf.q->dev_queue; + old_q = htb_graft_helper(dev_queue, NULL); + WARN_ON(old_q != parent->leaf.q); + offload_opt = (struct tc_htb_qopt_offload) { + .command = TC_HTB_LEAF_TO_INNER, + .classid = cl->common.classid, + .parent_classid = + TC_H_MIN(parent->common.classid), + .rate = max_t(u64, hopt->rate.rate, rate64), + .ceil = max_t(u64, hopt->ceil.rate, ceil64), + .extack = extack, + }; + err = htb_offload(dev, &offload_opt); + if (err) { + pr_err("htb: TC_HTB_LEAF_TO_INNER failed with err = %d\n", + err); + htb_graft_helper(dev_queue, old_q); + goto err_kill_estimator; + } + parent->bstats_bias.bytes += old_q->bstats.bytes; + parent->bstats_bias.packets += old_q->bstats.packets; + qdisc_put(old_q); + } + new_q = qdisc_create_dflt(dev_queue, &pfifo_qdisc_ops, classid, NULL); + if (q->offload) { + if (new_q) { + htb_set_lockdep_class_child(new_q); + /* One ref for cl->leaf.q, the other for + * dev_queue->qdisc. + */ + qdisc_refcount_inc(new_q); + } + old_q = htb_graft_helper(dev_queue, new_q); + /* No qdisc_put needed. */ + WARN_ON(!(old_q->flags & TCQ_F_BUILTIN)); + } sch_tree_lock(sch); if (parent && !parent->level) { /* turn parent into inner node */ @@ -1415,10 +1889,10 @@ static int htb_change_class(struct Qdisc *sch, u32 classid, : TC_HTB_MAXDEPTH) - 1; memset(&parent->inner, 0, sizeof(parent->inner)); } + /* leaf (we) needs elementary qdisc */ cl->leaf.q = new_q ? new_q : &noop_qdisc; - cl->common.classid = classid; cl->parent = parent; /* set class to be in HTB_CAN_SEND state */ @@ -1444,12 +1918,30 @@ static int htb_change_class(struct Qdisc *sch, u32 classid, if (err) return err; } - sch_tree_lock(sch); - } - rate64 = tb[TCA_HTB_RATE64] ? nla_get_u64(tb[TCA_HTB_RATE64]) : 0; + if (q->offload) { + struct net_device *dev = qdisc_dev(sch); + + offload_opt = (struct tc_htb_qopt_offload) { + .command = TC_HTB_NODE_MODIFY, + .classid = cl->common.classid, + .rate = max_t(u64, hopt->rate.rate, rate64), + .ceil = max_t(u64, hopt->ceil.rate, ceil64), + .extack = extack, + }; + err = htb_offload(dev, &offload_opt); + if (err) + /* Estimator was replaced, and rollback may fail + * as well, so we don't try to recover it, and + * the estimator won't work property with the + * offload anyway, because bstats are updated + * only when the stats are queried. + */ + return err; + } - ceil64 = tb[TCA_HTB_CEIL64] ? nla_get_u64(tb[TCA_HTB_CEIL64]) : 0; + sch_tree_lock(sch); + } psched_ratecfg_precompute(&cl->rate, &hopt->rate, rate64); psched_ratecfg_precompute(&cl->ceil, &hopt->ceil, ceil64); @@ -1492,6 +1984,11 @@ static int htb_change_class(struct Qdisc *sch, u32 classid, *arg = (unsigned long)cl; return 0; +err_kill_estimator: + gen_kill_estimator(&cl->rate_est); +err_block_put: + tcf_block_put(cl->block); + kfree(cl); failure: return err; } @@ -1557,6 +2054,7 @@ static void htb_walk(struct Qdisc *sch, struct qdisc_walker *arg) } static const struct Qdisc_class_ops htb_class_ops = { + .select_queue = htb_select_queue, .graft = htb_graft, .leaf = htb_leaf, .qlen_notify = htb_qlen_notify, @@ -1579,6 +2077,7 @@ static struct Qdisc_ops htb_qdisc_ops __read_mostly = { .dequeue = htb_dequeue, .peek = qdisc_peek_dequeued, .init = htb_init, + .attach = htb_attach, .reset = htb_reset, .destroy = htb_destroy, .dump = htb_dump, diff --git a/net/sched/sch_qfq.c b/net/sched/sch_qfq.c index 6335230a971e..1db9d4a2ef5e 100644 --- a/net/sched/sch_qfq.c +++ b/net/sched/sch_qfq.c @@ -529,7 +529,8 @@ static void qfq_destroy_class(struct Qdisc *sch, struct qfq_class *cl) kfree(cl); } -static int qfq_delete_class(struct Qdisc *sch, unsigned long arg) +static int qfq_delete_class(struct Qdisc *sch, unsigned long arg, + struct netlink_ext_ack *extack) { struct qfq_sched *q = qdisc_priv(sch); struct qfq_class *cl = (struct qfq_class *)arg; diff --git a/net/sched/sch_sfb.c b/net/sched/sch_sfb.c index da047a37a3bf..dde829d4b9f8 100644 --- a/net/sched/sch_sfb.c +++ b/net/sched/sch_sfb.c @@ -649,7 +649,8 @@ static int sfb_change_class(struct Qdisc *sch, u32 classid, u32 parentid, return -ENOSYS; } -static int sfb_delete(struct Qdisc *sch, unsigned long cl) +static int sfb_delete(struct Qdisc *sch, unsigned long cl, + struct netlink_ext_ack *extack) { return -ENOSYS; } diff --git a/net/switchdev/Makefile b/net/switchdev/Makefile index bd69a3136e76..c5561d7f3a7c 100644 --- a/net/switchdev/Makefile +++ b/net/switchdev/Makefile @@ -3,4 +3,4 @@ # Makefile for the Switch device API # -obj-$(CONFIG_NET_SWITCHDEV) += switchdev.o +obj-y += switchdev.o diff --git a/net/tipc/msg.c b/net/tipc/msg.c index 2aca86021df5..e9263280a2d4 100644 --- a/net/tipc/msg.c +++ b/net/tipc/msg.c @@ -117,10 +117,6 @@ struct sk_buff *tipc_msg_create(uint user, uint type, msg_set_origport(msg, oport); msg_set_destport(msg, dport); msg_set_errcode(msg, errcode); - if (hdr_sz > SHORT_H_SIZE) { - msg_set_orignode(msg, onode); - msg_set_destnode(msg, dnode); - } return buf; } diff --git a/tools/include/uapi/linux/pkt_sched.h b/tools/include/uapi/linux/pkt_sched.h index 0d18b1d1fbbc..5c903abc9fa5 100644 --- a/tools/include/uapi/linux/pkt_sched.h +++ b/tools/include/uapi/linux/pkt_sched.h @@ -414,6 +414,7 @@ enum { TCA_HTB_RATE64, TCA_HTB_CEIL64, TCA_HTB_PAD, + TCA_HTB_OFFLOAD, __TCA_HTB_MAX, }; diff --git a/tools/testing/selftests/drivers/net/mlxsw/port_scale.sh b/tools/testing/selftests/drivers/net/mlxsw/port_scale.sh new file mode 100644 index 000000000000..f813ffefc07e --- /dev/null +++ b/tools/testing/selftests/drivers/net/mlxsw/port_scale.sh @@ -0,0 +1,64 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +# Test for physical ports resource. The test splits each splittable port +# to its width and checks that eventually the number of physical ports equals +# the maximum number of physical ports. + +PORT_NUM_NETIFS=0 + +port_setup_prepare() +{ + : +} + +port_cleanup() +{ + pre_cleanup + + for port in "${unsplit[@]}"; do + devlink port unsplit $port + check_err $? "Did not unsplit $netdev" + done +} + +split_all_ports() +{ + local should_fail=$1; shift + local -a unsplit + + # Loop over the splittable netdevs and create tuples of netdev along + # with its width. For example: + # '$netdev1 $count1 $netdev2 $count2...', when: + # $netdev1-2 are splittable netdevs in the device, and + # $count1-2 are the netdevs width respectively. + while read netdev count <<<$( + devlink -j port show | + jq -r '.[][] | select(.splittable==true) | "\(.netdev) \(.lanes)"' + ) + [[ ! -z $netdev ]] + do + devlink port split $netdev count $count + check_err $? "Did not split $netdev into $count" + unsplit+=( "${netdev}s0" ) + done +} + +port_test() +{ + local max_ports=$1; shift + local should_fail=$1; shift + + split_all_ports $should_fail + + occ=$(devlink -j resource show $DEVLINK_DEV \ + | jq '.[][][] | select(.name=="physical_ports") |.["occ"]') + + [[ $occ -eq $max_ports ]] + if [[ $should_fail -eq 0 ]]; then + check_err $? "Mismatch ports number: Expected $max_ports, got $occ." + else + check_err_fail $should_fail $? "Reached more ports than expected" + fi + +} diff --git a/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/port_scale.sh b/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/port_scale.sh new file mode 100644 index 000000000000..0b71dfbbb447 --- /dev/null +++ b/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/port_scale.sh @@ -0,0 +1,16 @@ +# SPDX-License-Identifier: GPL-2.0 +source ../port_scale.sh + +port_get_target() +{ + local should_fail=$1 + local target + + target=$(devlink_resource_size_get physical_ports) + + if ((! should_fail)); then + echo $target + else + echo $((target + 1)) + fi +} diff --git a/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/resource_scale.sh b/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/resource_scale.sh index d7cf33a3f18d..4a1c9328555f 100755 --- a/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/resource_scale.sh +++ b/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/resource_scale.sh @@ -28,7 +28,7 @@ cleanup() trap cleanup EXIT -ALL_TESTS="router tc_flower mirror_gre tc_police" +ALL_TESTS="router tc_flower mirror_gre tc_police port" for current_test in ${TESTS:-$ALL_TESTS}; do source ${current_test}_scale.sh diff --git a/tools/testing/selftests/drivers/net/mlxsw/spectrum/port_scale.sh b/tools/testing/selftests/drivers/net/mlxsw/spectrum/port_scale.sh new file mode 100644 index 000000000000..0b71dfbbb447 --- /dev/null +++ b/tools/testing/selftests/drivers/net/mlxsw/spectrum/port_scale.sh @@ -0,0 +1,16 @@ +# SPDX-License-Identifier: GPL-2.0 +source ../port_scale.sh + +port_get_target() +{ + local should_fail=$1 + local target + + target=$(devlink_resource_size_get physical_ports) + + if ((! should_fail)); then + echo $target + else + echo $((target + 1)) + fi +} diff --git a/tools/testing/selftests/drivers/net/mlxsw/spectrum/resource_scale.sh b/tools/testing/selftests/drivers/net/mlxsw/spectrum/resource_scale.sh index 43f662401bc3..087a884f66cd 100755 --- a/tools/testing/selftests/drivers/net/mlxsw/spectrum/resource_scale.sh +++ b/tools/testing/selftests/drivers/net/mlxsw/spectrum/resource_scale.sh @@ -22,7 +22,7 @@ cleanup() devlink_sp_read_kvd_defaults trap cleanup EXIT -ALL_TESTS="router tc_flower mirror_gre tc_police" +ALL_TESTS="router tc_flower mirror_gre tc_police port" for current_test in ${TESTS:-$ALL_TESTS}; do source ${current_test}_scale.sh diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile index fa5fa425d148..25f198bec0b2 100644 --- a/tools/testing/selftests/net/Makefile +++ b/tools/testing/selftests/net/Makefile @@ -22,6 +22,7 @@ TEST_PROGS += devlink_port_split.py TEST_PROGS += drop_monitor_tests.sh TEST_PROGS += vrf_route_leaking.sh TEST_PROGS += bareudp.sh +TEST_PROGS += unicast_extensions.sh TEST_PROGS_EXTENDED := in_netns.sh TEST_GEN_FILES = socket nettest TEST_GEN_FILES += psock_fanout psock_tpacket msg_zerocopy reuseport_addr_any diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh index f74cd993b168..be34b9ccbd20 100755 --- a/tools/testing/selftests/net/mptcp/mptcp_join.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh @@ -790,6 +790,81 @@ chk_join_nr "remove subflow and signal IPv6" 2 2 2 chk_add_nr 1 1 chk_rm_nr 1 1 +# subflow IPv4-mapped to IPv4-mapped +reset +ip netns exec $ns1 ./pm_nl_ctl limits 0 1 +ip netns exec $ns2 ./pm_nl_ctl limits 0 1 +ip netns exec $ns2 ./pm_nl_ctl add "::ffff:10.0.3.2" flags subflow +run_tests $ns1 $ns2 "::ffff:10.0.1.1" +chk_join_nr "single subflow IPv4-mapped" 1 1 1 + +# signal address IPv4-mapped with IPv4-mapped sk +reset +ip netns exec $ns1 ./pm_nl_ctl limits 0 1 +ip netns exec $ns2 ./pm_nl_ctl limits 1 1 +ip netns exec $ns1 ./pm_nl_ctl add "::ffff:10.0.2.1" flags signal +run_tests $ns1 $ns2 "::ffff:10.0.1.1" +chk_join_nr "signal address IPv4-mapped" 1 1 1 +chk_add_nr 1 1 + +# subflow v4-map-v6 +reset +ip netns exec $ns1 ./pm_nl_ctl limits 0 1 +ip netns exec $ns2 ./pm_nl_ctl limits 0 1 +ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow +run_tests $ns1 $ns2 "::ffff:10.0.1.1" +chk_join_nr "single subflow v4-map-v6" 1 1 1 + +# signal address v4-map-v6 +reset +ip netns exec $ns1 ./pm_nl_ctl limits 0 1 +ip netns exec $ns2 ./pm_nl_ctl limits 1 1 +ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal +run_tests $ns1 $ns2 "::ffff:10.0.1.1" +chk_join_nr "signal address v4-map-v6" 1 1 1 +chk_add_nr 1 1 + +# subflow v6-map-v4 +reset +ip netns exec $ns1 ./pm_nl_ctl limits 0 1 +ip netns exec $ns2 ./pm_nl_ctl limits 0 1 +ip netns exec $ns2 ./pm_nl_ctl add "::ffff:10.0.3.2" flags subflow +run_tests $ns1 $ns2 10.0.1.1 +chk_join_nr "single subflow v6-map-v4" 1 1 1 + +# signal address v6-map-v4 +reset +ip netns exec $ns1 ./pm_nl_ctl limits 0 1 +ip netns exec $ns2 ./pm_nl_ctl limits 1 1 +ip netns exec $ns1 ./pm_nl_ctl add "::ffff:10.0.2.1" flags signal +run_tests $ns1 $ns2 10.0.1.1 +chk_join_nr "signal address v6-map-v4" 1 1 1 +chk_add_nr 1 1 + +# no subflow IPv6 to v4 address +reset +ip netns exec $ns1 ./pm_nl_ctl limits 0 1 +ip netns exec $ns2 ./pm_nl_ctl limits 0 1 +ip netns exec $ns2 ./pm_nl_ctl add dead:beef:2::2 flags subflow +run_tests $ns1 $ns2 10.0.1.1 +chk_join_nr "no JOIN with diff families v4-v6" 0 0 0 + +# no subflow IPv6 to v4 address even if v6 has a valid v4 at the end +reset +ip netns exec $ns1 ./pm_nl_ctl limits 0 1 +ip netns exec $ns2 ./pm_nl_ctl limits 0 1 +ip netns exec $ns2 ./pm_nl_ctl add dead:beef:2::10.0.3.2 flags subflow +run_tests $ns1 $ns2 10.0.1.1 +chk_join_nr "no JOIN with diff families v4-v6-2" 0 0 0 + +# no subflow IPv4 to v6 address, no need to slow down too then +reset +ip netns exec $ns1 ./pm_nl_ctl limits 0 1 +ip netns exec $ns2 ./pm_nl_ctl limits 0 1 +ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow +run_tests $ns1 $ns2 dead:beef:1::1 +chk_join_nr "no JOIN with diff families v6-v4" 0 0 0 + # single subflow, backup reset ip netns exec $ns1 ./pm_nl_ctl limits 0 1 diff --git a/tools/testing/selftests/net/mptcp/settings b/tools/testing/selftests/net/mptcp/settings index 026384c189c9..a62d2fa1275c 100644 --- a/tools/testing/selftests/net/mptcp/settings +++ b/tools/testing/selftests/net/mptcp/settings @@ -1 +1 @@ -timeout=450 +timeout=600 diff --git a/tools/testing/selftests/net/unicast_extensions.sh b/tools/testing/selftests/net/unicast_extensions.sh new file mode 100755 index 000000000000..dbf0421986df --- /dev/null +++ b/tools/testing/selftests/net/unicast_extensions.sh @@ -0,0 +1,228 @@ +#!/bin/sh +# SPDX-License-Identifier: GPL-2.0 +# +# By Seth Schoen (c) 2021, for the IPv4 Unicast Extensions Project +# Thanks to David Ahern for help and advice on nettest modifications. +# +# Self-tests for IPv4 address extensions: the kernel's ability to accept +# certain traditionally unused or unallocated IPv4 addresses. For each kind +# of address, we test for interface assignment, ping, TCP, and forwarding. +# Must be run as root (to manipulate network namespaces and virtual +# interfaces). +# +# Things we test for here: +# +# * Currently the kernel accepts addresses in 0/8 and 240/4 as valid. +# +# * Notwithstanding that, 0.0.0.0 and 255.255.255.255 cannot be assigned. +# +# * Currently the kernel DOES NOT accept unicast use of the lowest +# address in an IPv4 subnet (e.g. 192.168.100.0/32 in 192.168.100.0/24). +# This is treated as a second broadcast address, for compatibility +# with 4.2BSD (!). +# +# * Currently the kernel DOES NOT accept unicast use of any of 127/8. +# +# * Currently the kernel DOES NOT accept unicast use of any of 224/4. +# +# These tests provide an easy way to flip the expected result of any +# of these behaviors for testing kernel patches that change them. + +# nettest can be run from PATH or from same directory as this selftest +if ! which nettest >/dev/null; then + PATH=$PWD:$PATH + if ! which nettest >/dev/null; then + echo "'nettest' command not found; skipping tests" + exit 0 + fi +fi + +result=0 + +hide_output(){ exec 3>&1 4>&2 >/dev/null 2>/dev/null; } +show_output(){ exec >&3 2>&4; } + +show_result(){ + if [ $1 -eq 0 ]; then + printf "TEST: %-60s [ OK ]\n" "${2}" + else + printf "TEST: %-60s [FAIL]\n" "${2}" + result=1 + fi +} + +_do_segmenttest(){ + # Perform a simple set of link tests between a pair of + # IP addresses on a shared (virtual) segment, using + # ping and nettest. + # foo --- bar + # Arguments: ip_a ip_b prefix_length test_description + # + # Caller must set up foo-ns and bar-ns namespaces + # containing linked veth devices foo and bar, + # respectively. + + ip -n foo-ns address add $1/$3 dev foo || return 1 + ip -n foo-ns link set foo up || return 1 + ip -n bar-ns address add $2/$3 dev bar || return 1 + ip -n bar-ns link set bar up || return 1 + + ip netns exec foo-ns timeout 2 ping -c 1 $2 || return 1 + ip netns exec bar-ns timeout 2 ping -c 1 $1 || return 1 + + nettest -B -N bar-ns -O foo-ns -r $1 || return 1 + nettest -B -N foo-ns -O bar-ns -r $2 || return 1 + + return 0 +} + +_do_route_test(){ + # Perform a simple set of gateway tests. + # + # [foo] <---> [foo1]-[bar1] <---> [bar] /prefix + # host gateway host + # + # Arguments: foo_ip foo1_ip bar1_ip bar_ip prefix_len test_description + # Displays test result and returns success or failure. + + # Caller must set up foo-ns, bar-ns, and router-ns + # containing linked veth devices foo-foo1, bar1-bar + # (foo in foo-ns, foo1 and bar1 in router-ns, and + # bar in bar-ns). + + ip -n foo-ns address add $1/$5 dev foo || return 1 + ip -n foo-ns link set foo up || return 1 + ip -n foo-ns route add default via $2 || return 1 + ip -n bar-ns address add $4/$5 dev bar || return 1 + ip -n bar-ns link set bar up || return 1 + ip -n bar-ns route add default via $3 || return 1 + ip -n router-ns address add $2/$5 dev foo1 || return 1 + ip -n router-ns link set foo1 up || return 1 + ip -n router-ns address add $3/$5 dev bar1 || return 1 + ip -n router-ns link set bar1 up || return 1 + + echo 1 | ip netns exec router-ns tee /proc/sys/net/ipv4/ip_forward + + ip netns exec foo-ns timeout 2 ping -c 1 $2 || return 1 + ip netns exec foo-ns timeout 2 ping -c 1 $4 || return 1 + ip netns exec bar-ns timeout 2 ping -c 1 $3 || return 1 + ip netns exec bar-ns timeout 2 ping -c 1 $1 || return 1 + + nettest -B -N bar-ns -O foo-ns -r $1 || return 1 + nettest -B -N foo-ns -O bar-ns -r $4 || return 1 + + return 0 +} + +segmenttest(){ + # Sets up veth link and tries to connect over it. + # Arguments: ip_a ip_b prefix_len test_description + hide_output + ip netns add foo-ns + ip netns add bar-ns + ip link add foo netns foo-ns type veth peer name bar netns bar-ns + + test_result=0 + _do_segmenttest "$@" || test_result=1 + + ip netns pids foo-ns | xargs -r kill -9 + ip netns pids bar-ns | xargs -r kill -9 + ip netns del foo-ns + ip netns del bar-ns + show_output + + # inverted tests will expect failure instead of success + [ -n "$expect_failure" ] && test_result=`expr 1 - $test_result` + + show_result $test_result "$4" +} + +route_test(){ + # Sets up a simple gateway and tries to connect through it. + # [foo] <---> [foo1]-[bar1] <---> [bar] /prefix + # Arguments: foo_ip foo1_ip bar1_ip bar_ip prefix_len test_description + # Returns success or failure. + + hide_output + ip netns add foo-ns + ip netns add bar-ns + ip netns add router-ns + ip link add foo netns foo-ns type veth peer name foo1 netns router-ns + ip link add bar netns bar-ns type veth peer name bar1 netns router-ns + + test_result=0 + _do_route_test "$@" || test_result=1 + + ip netns pids foo-ns | xargs -r kill -9 + ip netns pids bar-ns | xargs -r kill -9 + ip netns pids router-ns | xargs -r kill -9 + ip netns del foo-ns + ip netns del bar-ns + ip netns del router-ns + + show_output + + # inverted tests will expect failure instead of success + [ -n "$expect_failure" ] && test_result=`expr 1 - $test_result` + show_result $test_result "$6" +} + +echo "###########################################################################" +echo "Unicast address extensions tests (behavior of reserved IPv4 addresses)" +echo "###########################################################################" +# +# Test support for 240/4 +segmenttest 240.1.2.1 240.1.2.4 24 "assign and ping within 240/4 (1 of 2) (is allowed)" +segmenttest 250.100.2.1 250.100.30.4 16 "assign and ping within 240/4 (2 of 2) (is allowed)" +# +# Test support for 0/8 +segmenttest 0.1.2.17 0.1.2.23 24 "assign and ping within 0/8 (1 of 2) (is allowed)" +segmenttest 0.77.240.17 0.77.2.23 16 "assign and ping within 0/8 (2 of 2) (is allowed)" +# +# Even 255.255/16 is OK! +segmenttest 255.255.3.1 255.255.50.77 16 "assign and ping inside 255.255/16 (is allowed)" +# +# Or 255.255.255/24 +segmenttest 255.255.255.1 255.255.255.254 24 "assign and ping inside 255.255.255/24 (is allowed)" +# +# Routing between different networks +route_test 240.5.6.7 240.5.6.1 255.1.2.1 255.1.2.3 24 "route between 240.5.6/24 and 255.1.2/24 (is allowed)" +route_test 0.200.6.7 0.200.38.1 245.99.101.1 245.99.200.111 16 "route between 0.200/16 and 245.99/16 (is allowed)" +# +# ============================================== +# ==== TESTS THAT CURRENTLY EXPECT FAILURE ===== +# ============================================== +expect_failure=true +# It should still not be possible to use 0.0.0.0 or 255.255.255.255 +# as a unicast address. Thus, these tests expect failure. +segmenttest 0.0.1.5 0.0.0.0 16 "assigning 0.0.0.0 (is forbidden)" +segmenttest 255.255.255.1 255.255.255.255 16 "assigning 255.255.255.255 (is forbidden)" +# +# Test support for not having all of 127 be loopback +# Currently Linux does not allow this, so this should fail too +segmenttest 127.99.4.5 127.99.4.6 16 "assign and ping inside 127/8 (is forbidden)" +# +# Test support for lowest address +# Currently Linux does not allow this, so this should fail too +segmenttest 5.10.15.20 5.10.15.0 24 "assign and ping lowest address (is forbidden)" +# +# Routing using lowest address as a gateway/endpoint +# Currently Linux does not allow this, so this should fail too +route_test 192.168.42.1 192.168.42.0 9.8.7.6 9.8.7.0 24 "routing using lowest address (is forbidden)" +# +# Test support for unicast use of class D +# Currently Linux does not allow this, so this should fail too +segmenttest 225.1.2.3 225.1.2.200 24 "assign and ping class D address (is forbidden)" +# +# Routing using class D as a gateway +route_test 225.1.42.1 225.1.42.2 9.8.7.6 9.8.7.1 24 "routing using class D (is forbidden)" +# +# Routing using 127/8 +# Currently Linux does not allow this, so this should fail too +route_test 127.99.2.3 127.99.2.4 200.1.2.3 200.1.2.4 24 "routing using 127/8 (is forbidden)" +# +unset expect_failure +# ===================================================== +# ==== END OF TESTS THAT CURRENTLY EXPECT FAILURE ===== +# ===================================================== +exit ${result} |