160 files changed, 6520 insertions, 2990 deletions
diff --git a/Documentation/devicetree/bindings/net/dsa/mt7530.txt b/Documentation/devicetree/bindings/net/dsa/mt7530.txt
index 560369efad6c..de04626a8e9d 100644
--- a/Documentation/devicetree/bindings/net/dsa/mt7530.txt
+++ b/Documentation/devicetree/bindings/net/dsa/mt7530.txt
@@ -76,6 +76,12 @@ phy-mode must be set, see also example 2 below!
  * mt7621: phy-mode = "rgmii-txid";
  * mt7623: phy-mode = "rgmii";
 
+Optional properties:
+
+- gpio-controller: Boolean; if defined, MT7530's LED controller will run on
+	GPIO mode.
+- #gpio-cells: Must be 2 if gpio-controller is defined.
+
 See Documentation/devicetree/bindings/net/dsa/dsa.txt for a list of additional
 required, optional properties and how the integrated switch subnodes must
 be specified.
diff --git a/Documentation/devicetree/bindings/net/qcom,ipa.yaml b/Documentation/devicetree/bindings/net/qcom,ipa.yaml
index 8a2d12644675..8f86084bf12e 100644
--- a/Documentation/devicetree/bindings/net/qcom,ipa.yaml
+++ b/Documentation/devicetree/bindings/net/qcom,ipa.yaml
@@ -113,13 +113,6 @@ properties:
       performing early IPA initialization, including loading and
       validating firwmare used by the GSI.
 
-  modem-remoteproc:
-    $ref: /schemas/types.yaml#/definitions/phandle
-    description:
-      This defines the phandle to the remoteproc node representing
-      the modem subsystem.  This is requied so the IPA driver can
-      receive and act on notifications of modem up/down events.
-
   memory-region:
     maxItems: 1
     description:
@@ -135,7 +128,6 @@ required:
   - interrupts
   - interconnects
   - qcom,smem-states
-  - modem-remoteproc
 
 oneOf:
   - required:
@@ -147,7 +139,7 @@ additionalProperties: false
 
 examples:
   - |
-        #include <dt-bindings/interrupt-controller/irq.h>
+        #include <dt-bindings/interrupt-controller/arm-gic.h>
         #include <dt-bindings/clock/qcom,rpmh.h>
         #include <dt-bindings/interconnect/qcom,sdm845.h>
 
@@ -168,7 +160,6 @@ examples:
                 compatible = "qcom,sdm845-ipa";
 
                 modem-init;
-                modem-remoteproc = <&mss_pil>;
 
                 iommus = <&apps_smmu 0x720 0x3>;
                 reg = <0x1e40000 0x7000>,
@@ -178,8 +169,8 @@ examples:
                             "ipa-shared",
                             "gsi";
 
-                interrupts-extended = <&intc 0 311 IRQ_TYPE_EDGE_RISING>,
-                                      <&intc 0 432 IRQ_TYPE_LEVEL_HIGH>,
+                interrupts-extended = <&intc GIC_SPI 311 IRQ_TYPE_EDGE_RISING>,
+                                      <&intc GIC_SPI 432 IRQ_TYPE_LEVEL_HIGH>,
                                       <&ipa_smp2p_in 0 IRQ_TYPE_EDGE_RISING>,
                                       <&ipa_smp2p_in 1 IRQ_TYPE_EDGE_RISING>;
                 interrupt-names = "ipa",
diff --git a/Documentation/devicetree/bindings/net/renesas,etheravb.yaml b/Documentation/devicetree/bindings/net/renesas,etheravb.yaml
index de9dd574a2f9..91ba96d43c6c 100644
--- a/Documentation/devicetree/bindings/net/renesas,etheravb.yaml
+++ b/Documentation/devicetree/bindings/net/renesas,etheravb.yaml
@@ -40,6 +40,7 @@ properties:
               - renesas,etheravb-r8a77980     # R-Car V3H
               - renesas,etheravb-r8a77990     # R-Car E3
               - renesas,etheravb-r8a77995     # R-Car D3
+              - renesas,etheravb-r8a779a0     # R-Car V3U
           - const: renesas,etheravb-rcar-gen3 # R-Car Gen3 and RZ/G2
 
   reg: true
@@ -170,6 +171,7 @@ allOf:
               - renesas,etheravb-r8a77965
               - renesas,etheravb-r8a77970
               - renesas,etheravb-r8a77980
+              - renesas,etheravb-r8a779a0
     then:
       required:
         - tx-internal-delay-ps
diff --git a/Documentation/networking/device_drivers/ethernet/marvell/octeontx2.rst b/Documentation/networking/device_drivers/ethernet/marvell/octeontx2.rst
index 61e850460e18..dd5cd69467be 100644
--- a/Documentation/networking/device_drivers/ethernet/marvell/octeontx2.rst
+++ b/Documentation/networking/device_drivers/ethernet/marvell/octeontx2.rst
@@ -217,3 +217,73 @@ For example::
 	 NPA_AF_ERR:
 	        NPA Error Interrupt Reg : 4096
 	        AQ Doorbell Error
+
+
+NIX Reporters
+-------------
+The NIX reporters are responsible for reporting and recovering the following group of errors:
+
+1. GENERAL events
+
+   - Receive mirror/multicast packet drop due to insufficient buffer.
+   - SMQ Flush operation.
+
+2. ERROR events
+
+   - Memory Fault due to WQE read/write from multicast/mirror buffer.
+   - Receive multicast/mirror replication list error.
+   - Receive packet on an unmapped PF.
+   - Fault due to NIX_AQ_INST_S read or NIX_AQ_RES_S write.
+   - AQ Doorbell Error.
+
+3. RAS events
+
+   - RAS Error Reporting for NIX Receive Multicast/Mirror Entry Structure.
+   - RAS Error Reporting for WQE/Packet Data read from Multicast/Mirror Buffer..
+   - RAS Error Reporting for NIX_AQ_INST_S/NIX_AQ_RES_S.
+
+4. RVU events
+
+   - Error due to unmapped slot.
+
+Sample Output::
+
+	~# ./devlink health
+	pci/0002:01:00.0:
+	  reporter hw_npa_intr
+	    state healthy error 0 recover 0 grace_period 0 auto_recover true auto_dump true
+	  reporter hw_npa_gen
+	    state healthy error 0 recover 0 grace_period 0 auto_recover true auto_dump true
+	  reporter hw_npa_err
+	    state healthy error 0 recover 0 grace_period 0 auto_recover true auto_dump true
+	  reporter hw_npa_ras
+	    state healthy error 0 recover 0 grace_period 0 auto_recover true auto_dump true
+	  reporter hw_nix_intr
+	    state healthy error 1121 recover 1121 last_dump_date 2021-01-19 last_dump_time 05:42:26 grace_period 0 auto_recover true auto_dump true
+	  reporter hw_nix_gen
+	    state healthy error 949 recover 949 last_dump_date 2021-01-19 last_dump_time 05:42:43 grace_period 0 auto_recover true auto_dump true
+	  reporter hw_nix_err
+	    state healthy error 1147 recover 1147 last_dump_date 2021-01-19 last_dump_time 05:42:59 grace_period 0 auto_recover true auto_dump true
+	  reporter hw_nix_ras
+	    state healthy error 409 recover 409 last_dump_date 2021-01-19 last_dump_time 05:43:16 grace_period 0 auto_recover true auto_dump true
+
+Each reporter dumps the
+
+ - Error Type
+ - Error Register value
+ - Reason in words
+
+For example::
+
+	~# devlink health dump show pci/0002:01:00.0 reporter hw_nix_intr
+	 NIX_AF_RVU:
+	        NIX RVU Interrupt Reg : 1
+	        Unmap Slot Error
+	~# devlink health dump show pci/0002:01:00.0 reporter hw_nix_gen
+	 NIX_AF_GENERAL:
+	        NIX General Interrupt Reg : 1
+	        Rx multicast pkt drop
+	~# devlink health dump show pci/0002:01:00.0 reporter hw_nix_err
+	 NIX_AF_ERR:
+	        NIX Error Interrupt Reg : 64
+	        Rx on unmapped PF_FUNC
diff --git a/Documentation/networking/devlink/devlink-resource.rst b/Documentation/networking/devlink/devlink-resource.rst
index 93e92d2f0752..3d5ae51e65a2 100644
--- a/Documentation/networking/devlink/devlink-resource.rst
+++ b/Documentation/networking/devlink/devlink-resource.rst
@@ -23,6 +23,20 @@ current size and related sub resources. To access a sub resource, you
 specify the path of the resource. For example ``/IPv4/fib`` is the id for
 the ``fib`` sub-resource under the ``IPv4`` resource.
 
+Generic Resources
+=================
+
+Generic resources are used to describe resources that can be shared by multiple
+device drivers and their description must be added to the following table:
+
+.. list-table:: List of Generic Resources
+   :widths: 10 90
+
+   * - Name
+     - Description
+   * - ``physical_ports``
+     - A limited capacity of physical ports that the switch ASIC can support
+
 example usage
 -------------
 
diff --git a/Documentation/networking/ip-sysctl.rst b/Documentation/networking/ip-sysctl.rst
index dd2b12a32b73..0e51ddd9a2f1 100644
--- a/Documentation/networking/ip-sysctl.rst
+++ b/Documentation/networking/ip-sysctl.rst
@@ -1871,6 +1871,16 @@ accept_ra_defrtr - BOOLEAN
 		- enabled if accept_ra is enabled.
 		- disabled if accept_ra is disabled.
 
+ra_defrtr_metric - UNSIGNED INTEGER
+	Route metric for default route learned in Router Advertisement. This value
+	will be assigned as metric for the default route learned via IPv6 Router
+	Advertisement. Takes affect only if accept_ra_defrtr is enabled.
+
+	Possible values:
+		1 to 0xFFFFFFFF
+
+		Default: IP6_RT_PRIO_USER i.e. 1024.
+
 accept_ra_from_local - BOOLEAN
 	Accept RA with source-address that is found on local machine
 	if the RA is otherwise proper and able to be accepted.
diff --git a/MAINTAINERS b/MAINTAINERS
index 1df56a32d2df..650deb973913 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -2787,6 +2787,14 @@ F:	arch/arm64/
 F:	tools/testing/selftests/arm64/
 X:	arch/arm64/boot/dts/
 
+ARROW SPEEDCHIPS XRS7000 SERIES ETHERNET SWITCH DRIVER
+M:	George McCollister <george.mccollister@gmail.com>
+L:	netdev@vger.kernel.org
+S:	Maintained
+F:	Documentation/devicetree/bindings/net/dsa/arrow,xrs700x.yaml
+F:	drivers/net/dsa/xrs700x/*
+F:	net/dsa/tag_xrs700x.c
+
 AS3645A LED FLASH CONTROLLER DRIVER
 M:	Sakari Ailus <sakari.ailus@iki.fi>
 L:	linux-leds@vger.kernel.org
diff --git a/Makefile b/Makefile
index b0e4767735dc..61357f7eb55f 100644
--- a/Makefile
+++ b/Makefile
@@ -649,7 +649,8 @@ ifeq ($(KBUILD_EXTMOD),)
 core-y		:= init/ usr/
 drivers-y	:= drivers/ sound/
 drivers-$(CONFIG_SAMPLES) += samples/
-drivers-y	+= net/ virt/
+drivers-$(CONFIG_NET) += net/
+drivers-y	+= virt/
 libs-y		:= lib/
 endif # KBUILD_EXTMOD
 
diff --git a/arch/arm64/boot/dts/qcom/sc7180.dtsi b/arch/arm64/boot/dts/qcom/sc7180.dtsi
index 22b832fc62e3..003309f0d3e1 100644
--- a/arch/arm64/boot/dts/qcom/sc7180.dtsi
+++ b/arch/arm64/boot/dts/qcom/sc7180.dtsi
@@ -1434,8 +1434,6 @@
 			qcom,smem-state-names = "ipa-clock-enabled-valid",
 						"ipa-clock-enabled";
 
-			modem-remoteproc = <&remoteproc_mpss>;
-
 			status = "disabled";
 		};
 
diff --git a/arch/arm64/boot/dts/qcom/sdm845.dtsi b/arch/arm64/boot/dts/qcom/sdm845.dtsi
index bcf888381f14..04b2490eec9f 100644
--- a/arch/arm64/boot/dts/qcom/sdm845.dtsi
+++ b/arch/arm64/boot/dts/qcom/sdm845.dtsi
@@ -2366,8 +2366,6 @@
 			qcom,smem-state-names = "ipa-clock-enabled-valid",
 						"ipa-clock-enabled";
 
-			modem-remoteproc = <&mss_pil>;
-
 			status = "disabled";
 		};
 
diff --git a/drivers/net/Kconfig b/drivers/net/Kconfig
index 260f9f46668b..1ebb4b943876 100644
--- a/drivers/net/Kconfig
+++ b/drivers/net/Kconfig
@@ -42,6 +42,7 @@ config BONDING
 	tristate "Bonding driver support"
 	depends on INET
 	depends on IPV6 || IPV6=n
+	depends on TLS || TLS_DEVICE=n
 	help
 	  Say 'Y' or 'M' if you wish to be able to 'bond' multiple Ethernet
 	  Channels together. This is called 'Etherchannel' by Cisco,
diff --git a/drivers/net/can/dev/dev.c b/drivers/net/can/dev/dev.c
index 01e4a194f187..d9281ae853f8 100644
--- a/drivers/net/can/dev/dev.c
+++ b/drivers/net/can/dev/dev.c
@@ -74,7 +74,7 @@ static int can_rx_state_to_frame(struct net_device *dev, enum can_state state)
 	}
 }
 
-static const char *can_get_state_str(const enum can_state state)
+const char *can_get_state_str(const enum can_state state)
 {
 	switch (state) {
 	case CAN_STATE_ERROR_ACTIVE:
@@ -95,6 +95,7 @@ static const char *can_get_state_str(const enum can_state state)
 
 	return "<unknown>";
 }
+EXPORT_SYMBOL_GPL(can_get_state_str);
 
 void can_change_state(struct net_device *dev, struct can_frame *cf,
 		      enum can_state tx_state, enum can_state rx_state)
diff --git a/drivers/net/can/dev/length.c b/drivers/net/can/dev/length.c
index d35c4e82314d..b48140b1102e 100644
--- a/drivers/net/can/dev/length.c
+++ b/drivers/net/can/dev/length.c
@@ -27,12 +27,17 @@ static const u8 len2dlc[] = {
 	13, 13, 13, 13, 13, 13, 13, 13,	/* 25 - 32 */
 	14, 14, 14, 14, 14, 14, 14, 14,	/* 33 - 40 */
 	14, 14, 14, 14, 14, 14, 14, 14,	/* 41 - 48 */
+	15, 15, 15, 15, 15, 15, 15, 15,	/* 49 - 56 */
+	15, 15, 15, 15, 15, 15, 15, 15	/* 57 - 64 */
 };
 
 /* map the sanitized data length to an appropriate data length code */
 u8 can_fd_len2dlc(u8 len)
 {
-	if (len >= ARRAY_SIZE(len2dlc))
+	/* check for length mapping table size at build time */
+	BUILD_BUG_ON(ARRAY_SIZE(len2dlc) != CANFD_MAX_DLEN + 1);
+
+	if (unlikely(len > CANFD_MAX_DLEN))
 		return CANFD_MAX_DLC;
 
 	return len2dlc[len];
diff --git a/drivers/net/can/flexcan.c b/drivers/net/can/flexcan.c
index 5d9157c655e9..971ada36e37f 100644
--- a/drivers/net/can/flexcan.c
+++ b/drivers/net/can/flexcan.c
@@ -1975,14 +1975,14 @@ static int flexcan_setup_stop_mode_scfw(struct platform_device *pdev)
 	priv = netdev_priv(dev);
 	priv->scu_idx = scu_idx;
 
-	/* this function could be defered probe, return -EPROBE_DEFER */
+	/* this function could be deferred probe, return -EPROBE_DEFER */
 	return imx_scu_get_handle(&priv->sc_ipc_handle);
 }
 
 /* flexcan_setup_stop_mode - Setup stop mode for wakeup
  *
  * Return: = 0 setup stop mode successfully or doesn't support this feature
- *         < 0 fail to setup stop mode (could be defered probe)
+ *         < 0 fail to setup stop mode (could be deferred probe)
  */
 static int flexcan_setup_stop_mode(struct platform_device *pdev)
 {
diff --git a/drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c b/drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c
index 00e9855c23d1..3638b474d86b 100644
--- a/drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c
+++ b/drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c
@@ -335,6 +335,8 @@ static void mcp251xfd_ring_init(struct mcp251xfd_priv *priv)
 	u8 len;
 	int i, j;
 
+	netdev_reset_queue(priv->ndev);
+
 	/* TEF */
 	tef_ring = priv->tef;
 	tef_ring->head = 0;
@@ -1249,7 +1251,8 @@ mcp251xfd_handle_tefif_recover(const struct mcp251xfd_priv *priv, const u32 seq)
 
 static int
 mcp251xfd_handle_tefif_one(struct mcp251xfd_priv *priv,
-			   const struct mcp251xfd_hw_tef_obj *hw_tef_obj)
+			   const struct mcp251xfd_hw_tef_obj *hw_tef_obj,
+			   unsigned int *frame_len_ptr)
 {
 	struct net_device_stats *stats = &priv->ndev->stats;
 	u32 seq, seq_masked, tef_tail_masked;
@@ -1271,7 +1274,8 @@ mcp251xfd_handle_tefif_one(struct mcp251xfd_priv *priv,
 	stats->tx_bytes +=
 		can_rx_offload_get_echo_skb(&priv->offload,
 					    mcp251xfd_get_tef_tail(priv),
-					    hw_tef_obj->ts, NULL);
+					    hw_tef_obj->ts,
+					    frame_len_ptr);
 	stats->tx_packets++;
 	priv->tef->tail++;
 
@@ -1308,6 +1312,7 @@ mcp251xfd_tef_obj_read(const struct mcp251xfd_priv *priv,
 		       const u8 offset, const u8 len)
 {
 	const struct mcp251xfd_tx_ring *tx_ring = priv->tx;
+	const int val_bytes = regmap_get_val_bytes(priv->map_rx);
 
 	if (IS_ENABLED(CONFIG_CAN_MCP251XFD_SANITY) &&
 	    (offset > tx_ring->obj_num ||
@@ -1322,12 +1327,13 @@ mcp251xfd_tef_obj_read(const struct mcp251xfd_priv *priv,
 	return regmap_bulk_read(priv->map_rx,
 				mcp251xfd_get_tef_obj_addr(offset),
 				hw_tef_obj,
-				sizeof(*hw_tef_obj) / sizeof(u32) * len);
+				sizeof(*hw_tef_obj) / val_bytes * len);
 }
 
 static int mcp251xfd_handle_tefif(struct mcp251xfd_priv *priv)
 {
 	struct mcp251xfd_hw_tef_obj hw_tef_obj[MCP251XFD_TX_OBJ_NUM_MAX];
+	unsigned int total_frame_len = 0;
 	u8 tef_tail, len, l;
 	int err, i;
 
@@ -1349,7 +1355,9 @@ static int mcp251xfd_handle_tefif(struct mcp251xfd_priv *priv)
 	}
 
 	for (i = 0; i < len; i++) {
-		err = mcp251xfd_handle_tefif_one(priv, &hw_tef_obj[i]);
+		unsigned int frame_len;
+
+		err = mcp251xfd_handle_tefif_one(priv, &hw_tef_obj[i], &frame_len);
 		/* -EAGAIN means the Sequence Number in the TEF
 		 * doesn't match our tef_tail. This can happen if we
 		 * read the TEF objects too early. Leave loop let the
@@ -1359,6 +1367,8 @@ static int mcp251xfd_handle_tefif(struct mcp251xfd_priv *priv)
 			goto out_netif_wake_queue;
 		if (err)
 			return err;
+
+		total_frame_len += frame_len;
 	}
 
  out_netif_wake_queue:
@@ -1389,6 +1399,7 @@ static int mcp251xfd_handle_tefif(struct mcp251xfd_priv *priv)
 			return err;
 
 		tx_ring->tail += len;
+		netdev_completed_queue(priv->ndev, len, total_frame_len);
 
 		err = mcp251xfd_check_tef_tail(priv);
 		if (err)
@@ -1438,6 +1449,7 @@ mcp251xfd_hw_rx_obj_to_skb(const struct mcp251xfd_priv *priv,
 			   struct sk_buff *skb)
 {
 	struct canfd_frame *cfd = (struct canfd_frame *)skb->data;
+	u8 dlc;
 
 	if (hw_rx_obj->flags & MCP251XFD_OBJ_FLAGS_IDE) {
 		u32 sid, eid;
@@ -1453,9 +1465,10 @@ mcp251xfd_hw_rx_obj_to_skb(const struct mcp251xfd_priv *priv,
 					hw_rx_obj->id);
 	}
 
+	dlc = FIELD_GET(MCP251XFD_OBJ_FLAGS_DLC, hw_rx_obj->flags);
+
 	/* CANFD */
 	if (hw_rx_obj->flags & MCP251XFD_OBJ_FLAGS_FDF) {
-		u8 dlc;
 
 		if (hw_rx_obj->flags & MCP251XFD_OBJ_FLAGS_ESI)
 			cfd->flags |= CANFD_ESI;
@@ -1463,17 +1476,17 @@ mcp251xfd_hw_rx_obj_to_skb(const struct mcp251xfd_priv *priv,
 		if (hw_rx_obj->flags & MCP251XFD_OBJ_FLAGS_BRS)
 			cfd->flags |= CANFD_BRS;
 
-		dlc = FIELD_GET(MCP251XFD_OBJ_FLAGS_DLC, hw_rx_obj->flags);
 		cfd->len = can_fd_dlc2len(dlc);
 	} else {
 		if (hw_rx_obj->flags & MCP251XFD_OBJ_FLAGS_RTR)
 			cfd->can_id |= CAN_RTR_FLAG;
 
-		cfd->len = can_cc_dlc2len(FIELD_GET(MCP251XFD_OBJ_FLAGS_DLC,
-						 hw_rx_obj->flags));
+		can_frame_set_cc_len((struct can_frame *)cfd, dlc,
+				     priv->can.ctrlmode);
 	}
 
-	memcpy(cfd->data, hw_rx_obj->data, cfd->len);
+	if (!(hw_rx_obj->flags & MCP251XFD_OBJ_FLAGS_RTR))
+		memcpy(cfd->data, hw_rx_obj->data, cfd->len);
 }
 
 static int
@@ -1510,12 +1523,13 @@ mcp251xfd_rx_obj_read(const struct mcp251xfd_priv *priv,
 		      struct mcp251xfd_hw_rx_obj_canfd *hw_rx_obj,
 		      const u8 offset, const u8 len)
 {
+	const int val_bytes = regmap_get_val_bytes(priv->map_rx);
 	int err;
 
 	err = regmap_bulk_read(priv->map_rx,
 			       mcp251xfd_get_rx_obj_addr(ring, offset),
 			       hw_rx_obj,
-			       len * ring->obj_size / sizeof(u32));
+			       len * ring->obj_size / val_bytes);
 
 	return err;
 }
@@ -2137,6 +2151,7 @@ static int mcp251xfd_handle_spicrcif(struct mcp251xfd_priv *priv)
 static irqreturn_t mcp251xfd_irq(int irq, void *dev_id)
 {
 	struct mcp251xfd_priv *priv = dev_id;
+	const int val_bytes = regmap_get_val_bytes(priv->map_reg);
 	irqreturn_t handled = IRQ_NONE;
 	int err;
 
@@ -2162,7 +2177,7 @@ static irqreturn_t mcp251xfd_irq(int irq, void *dev_id)
 		err = regmap_bulk_read(priv->map_reg, MCP251XFD_REG_INT,
 				       &priv->regs_status,
 				       sizeof(priv->regs_status) /
-				       sizeof(u32));
+				       val_bytes);
 		if (err)
 			goto out_fail;
 
@@ -2300,7 +2315,7 @@ mcp251xfd_tx_obj_from_skb(const struct mcp251xfd_priv *priv,
 	union mcp251xfd_tx_obj_load_buf *load_buf;
 	u8 dlc;
 	u32 id, flags;
-	int offset, len;
+	int len_sanitized = 0, len;
 
 	if (cfd->can_id & CAN_EFF_FLAG) {
 		u32 sid, eid;
@@ -2321,12 +2336,12 @@ mcp251xfd_tx_obj_from_skb(const struct mcp251xfd_priv *priv,
 	 * harm, only the lower 7 bits will be transferred into the
 	 * TEF object.
 	 */
-	dlc = can_fd_len2dlc(cfd->len);
-	flags |= FIELD_PREP(MCP251XFD_OBJ_FLAGS_SEQ_MCP2518FD_MASK, seq) |
-		FIELD_PREP(MCP251XFD_OBJ_FLAGS_DLC, dlc);
+	flags |= FIELD_PREP(MCP251XFD_OBJ_FLAGS_SEQ_MCP2518FD_MASK, seq);
 
 	if (cfd->can_id & CAN_RTR_FLAG)
 		flags |= MCP251XFD_OBJ_FLAGS_RTR;
+	else
+		len_sanitized = canfd_sanitize_len(cfd->len);
 
 	/* CANFD */
 	if (can_is_canfd_skb(skb)) {
@@ -2337,8 +2352,15 @@ mcp251xfd_tx_obj_from_skb(const struct mcp251xfd_priv *priv,
 
 		if (cfd->flags & CANFD_BRS)
 			flags |= MCP251XFD_OBJ_FLAGS_BRS;
+
+		dlc = can_fd_len2dlc(cfd->len);
+	} else {
+		dlc = can_get_cc_dlc((struct can_frame *)cfd,
+				     priv->can.ctrlmode);
 	}
 
+	flags |= FIELD_PREP(MCP251XFD_OBJ_FLAGS_DLC, dlc);
+
 	load_buf = &tx_obj->buf;
 	if (priv->devtype_data.quirks & MCP251XFD_QUIRK_CRC_TX)
 		hw_tx_obj = &load_buf->crc.hw_tx_obj;
@@ -2348,17 +2370,22 @@ mcp251xfd_tx_obj_from_skb(const struct mcp251xfd_priv *priv,
 	put_unaligned_le32(id, &hw_tx_obj->id);
 	put_unaligned_le32(flags, &hw_tx_obj->flags);
 
-	/* Clear data at end of CAN frame */
-	offset = round_down(cfd->len, sizeof(u32));
-	len = round_up(can_fd_dlc2len(dlc), sizeof(u32)) - offset;
-	if (MCP251XFD_SANITIZE_CAN && len)
-		memset(hw_tx_obj->data + offset, 0x0, len);
+	/* Copy data */
 	memcpy(hw_tx_obj->data, cfd->data, cfd->len);
 
+	/* Clear unused data at end of CAN frame */
+	if (MCP251XFD_SANITIZE_CAN && len_sanitized) {
+		int pad_len;
+
+		pad_len = len_sanitized - cfd->len;
+		if (pad_len)
+			memset(hw_tx_obj->data + cfd->len, 0x0, pad_len);
+	}
+
 	/* Number of bytes to be written into the RAM of the controller */
 	len = sizeof(hw_tx_obj->id) + sizeof(hw_tx_obj->flags);
 	if (MCP251XFD_SANITIZE_CAN)
-		len += round_up(can_fd_dlc2len(dlc), sizeof(u32));
+		len += round_up(len_sanitized, sizeof(u32));
 	else
 		len += round_up(cfd->len, sizeof(u32));
 
@@ -2418,6 +2445,7 @@ static netdev_tx_t mcp251xfd_start_xmit(struct sk_buff *skb,
 	struct mcp251xfd_priv *priv = netdev_priv(ndev);
 	struct mcp251xfd_tx_ring *tx_ring = priv->tx;
 	struct mcp251xfd_tx_obj *tx_obj;
+	unsigned int frame_len;
 	u8 tx_head;
 	int err;
 
@@ -2433,10 +2461,12 @@ static netdev_tx_t mcp251xfd_start_xmit(struct sk_buff *skb,
 	/* Stop queue if we occupy the complete TX FIFO */
 	tx_head = mcp251xfd_get_tx_head(tx_ring);
 	tx_ring->head++;
-	if (tx_ring->head - tx_ring->tail >= tx_ring->obj_num)
+	if (mcp251xfd_get_tx_free(tx_ring) == 0)
 		netif_stop_queue(ndev);
 
-	can_put_echo_skb(skb, ndev, tx_head, 0);
+	frame_len = can_skb_get_frame_len(skb);
+	can_put_echo_skb(skb, ndev, tx_head, frame_len);
+	netdev_sent_queue(priv->ndev, frame_len);
 
 	err = mcp251xfd_tx_obj_write(priv, tx_obj);
 	if (err)
@@ -2885,7 +2915,8 @@ static int mcp251xfd_probe(struct spi_device *spi)
 	priv->can.data_bittiming_const = &mcp251xfd_data_bittiming_const;
 	priv->can.ctrlmode_supported = CAN_CTRLMODE_LOOPBACK |
 		CAN_CTRLMODE_LISTENONLY | CAN_CTRLMODE_BERR_REPORTING |
-		CAN_CTRLMODE_FD | CAN_CTRLMODE_FD_NON_ISO;
+		CAN_CTRLMODE_FD | CAN_CTRLMODE_FD_NON_ISO |
+		CAN_CTRLMODE_CC_LEN8_DLC;
 	priv->ndev = ndev;
 	priv->spi = spi;
 	priv->rx_int = rx_int;
diff --git a/drivers/net/can/usb/mcba_usb.c b/drivers/net/can/usb/mcba_usb.c
index 4232a7126c1b..1f649d178010 100644
--- a/drivers/net/can/usb/mcba_usb.c
+++ b/drivers/net/can/usb/mcba_usb.c
@@ -466,7 +466,7 @@ static void mcba_usb_process_ka_usb(struct mcba_priv *priv,
 				    struct mcba_usb_msg_ka_usb *msg)
 {
 	if (unlikely(priv->usb_ka_first_pass)) {
-		netdev_info(priv->netdev, "PIC USB version %hhu.%hhu\n",
+		netdev_info(priv->netdev, "PIC USB version %u.%u\n",
 			    msg->soft_ver_major, msg->soft_ver_minor);
 
 		priv->usb_ka_first_pass = false;
@@ -492,7 +492,7 @@ static void mcba_usb_process_ka_can(struct mcba_priv *priv,
 				    struct mcba_usb_msg_ka_can *msg)
 {
 	if (unlikely(priv->can_ka_first_pass)) {
-		netdev_info(priv->netdev, "PIC CAN version %hhu.%hhu\n",
+		netdev_info(priv->netdev, "PIC CAN version %u.%u\n",
 			    msg->soft_ver_major, msg->soft_ver_minor);
 
 		priv->can_ka_first_pass = false;
@@ -554,7 +554,7 @@ static void mcba_usb_process_rx(struct mcba_priv *priv,
 		break;
 
 	default:
-		netdev_warn(priv->netdev, "Unsupported msg (0x%hhX)",
+		netdev_warn(priv->netdev, "Unsupported msg (0x%X)",
 			    msg->cmd_id);
 		break;
 	}
diff --git a/drivers/net/dsa/hirschmann/hellcreek.c b/drivers/net/dsa/hirschmann/hellcreek.c
index 9a1921e653e8..4cc51fb37e67 100644
--- a/drivers/net/dsa/hirschmann/hellcreek.c
+++ b/drivers/net/dsa/hirschmann/hellcreek.c
@@ -3,7 +3,7 @@
  * DSA driver for:
  * Hirschmann Hellcreek TSN switch.
  *
- * Copyright (C) 2019,2020 Linutronix GmbH
+ * Copyright (C) 2019-2021 Linutronix GmbH
  * Author Kurt Kanzenbach <kurt@linutronix.de>
  */
 
@@ -153,6 +153,13 @@ static void hellcreek_select_vlan(struct hellcreek *hellcreek, int vid,
 	hellcreek_write(hellcreek, val, HR_VIDCFG);
 }
 
+static void hellcreek_select_tgd(struct hellcreek *hellcreek, int port)
+{
+	u16 val = port << TR_TGDSEL_TDGSEL_SHIFT;
+
+	hellcreek_write(hellcreek, val, TR_TGDSEL);
+}
+
 static int hellcreek_wait_until_ready(struct hellcreek *hellcreek)
 {
 	u16 val;
@@ -1125,6 +1132,296 @@ out:
 	return ret;
 }
 
+static void hellcreek_setup_gcl(struct hellcreek *hellcreek, int port,
+				const struct tc_taprio_qopt_offload *schedule)
+{
+	const struct tc_taprio_sched_entry *cur, *initial, *next;
+	size_t i;
+
+	cur = initial = &schedule->entries[0];
+	next = cur + 1;
+
+	for (i = 1; i <= schedule->num_entries; ++i) {
+		u16 data;
+		u8 gates;
+
+		cur++;
+		next++;
+
+		if (i == schedule->num_entries)
+			gates = initial->gate_mask ^
+				cur->gate_mask;
+		else
+			gates = next->gate_mask ^
+				cur->gate_mask;
+
+		data = gates;
+
+		if (i == schedule->num_entries)
+			data |= TR_GCLDAT_GCLWRLAST;
+
+		/* Gates states */
+		hellcreek_write(hellcreek, data, TR_GCLDAT);
+
+		/* Time interval */
+		hellcreek_write(hellcreek,
+				cur->interval & 0x0000ffff,
+				TR_GCLTIL);
+		hellcreek_write(hellcreek,
+				(cur->interval & 0xffff0000) >> 16,
+				TR_GCLTIH);
+
+		/* Commit entry */
+		data = ((i - 1) << TR_GCLCMD_GCLWRADR_SHIFT) |
+			(initial->gate_mask <<
+			 TR_GCLCMD_INIT_GATE_STATES_SHIFT);
+		hellcreek_write(hellcreek, data, TR_GCLCMD);
+	}
+}
+
+static void hellcreek_set_cycle_time(struct hellcreek *hellcreek,
+				     const struct tc_taprio_qopt_offload *schedule)
+{
+	u32 cycle_time = schedule->cycle_time;
+
+	hellcreek_write(hellcreek, cycle_time & 0x0000ffff, TR_CTWRL);
+	hellcreek_write(hellcreek, (cycle_time & 0xffff0000) >> 16, TR_CTWRH);
+}
+
+static void hellcreek_switch_schedule(struct hellcreek *hellcreek,
+				      ktime_t start_time)
+{
+	struct timespec64 ts = ktime_to_timespec64(start_time);
+
+	/* Start schedule at this point of time */
+	hellcreek_write(hellcreek, ts.tv_nsec & 0x0000ffff, TR_ESTWRL);
+	hellcreek_write(hellcreek, (ts.tv_nsec & 0xffff0000) >> 16, TR_ESTWRH);
+
+	/* Arm timer, set seconds and switch schedule */
+	hellcreek_write(hellcreek, TR_ESTCMD_ESTARM | TR_ESTCMD_ESTSWCFG |
+			((ts.tv_sec & TR_ESTCMD_ESTSEC_MASK) <<
+			 TR_ESTCMD_ESTSEC_SHIFT), TR_ESTCMD);
+}
+
+static bool hellcreek_schedule_startable(struct hellcreek *hellcreek, int port)
+{
+	struct hellcreek_port *hellcreek_port = &hellcreek->ports[port];
+	s64 base_time_ns, current_ns;
+
+	/* The switch allows a schedule to be started only eight seconds within
+	 * the future. Therefore, check the current PTP time if the schedule is
+	 * startable or not.
+	 */
+
+	/* Use the "cached" time. That should be alright, as it's updated quite
+	 * frequently in the PTP code.
+	 */
+	mutex_lock(&hellcreek->ptp_lock);
+	current_ns = hellcreek->seconds * NSEC_PER_SEC + hellcreek->last_ts;
+	mutex_unlock(&hellcreek->ptp_lock);
+
+	/* Calculate difference to admin base time */
+	base_time_ns = ktime_to_ns(hellcreek_port->current_schedule->base_time);
+
+	return base_time_ns - current_ns < (s64)8 * NSEC_PER_SEC;
+}
+
+static void hellcreek_start_schedule(struct hellcreek *hellcreek, int port)
+{
+	struct hellcreek_port *hellcreek_port = &hellcreek->ports[port];
+	ktime_t base_time, current_time;
+	s64 current_ns;
+	u32 cycle_time;
+
+	/* First select port */
+	hellcreek_select_tgd(hellcreek, port);
+
+	/* Forward base time into the future if needed */
+	mutex_lock(&hellcreek->ptp_lock);
+	current_ns = hellcreek->seconds * NSEC_PER_SEC + hellcreek->last_ts;
+	mutex_unlock(&hellcreek->ptp_lock);
+
+	current_time = ns_to_ktime(current_ns);
+	base_time    = hellcreek_port->current_schedule->base_time;
+	cycle_time   = hellcreek_port->current_schedule->cycle_time;
+
+	if (ktime_compare(current_time, base_time) > 0) {
+		s64 n;
+
+		n = div64_s64(ktime_sub_ns(current_time, base_time),
+			      cycle_time);
+		base_time = ktime_add_ns(base_time, (n + 1) * cycle_time);
+	}
+
+	/* Set admin base time and switch schedule */
+	hellcreek_switch_schedule(hellcreek, base_time);
+
+	taprio_offload_free(hellcreek_port->current_schedule);
+	hellcreek_port->current_schedule = NULL;
+
+	dev_dbg(hellcreek->dev, "Armed EST timer for port %d\n",
+		hellcreek_port->port);
+}
+
+static void hellcreek_check_schedule(struct work_struct *work)
+{
+	struct delayed_work *dw = to_delayed_work(work);
+	struct hellcreek_port *hellcreek_port;
+	struct hellcreek *hellcreek;
+	bool startable;
+
+	hellcreek_port = dw_to_hellcreek_port(dw);
+	hellcreek = hellcreek_port->hellcreek;
+
+	mutex_lock(&hellcreek->reg_lock);
+
+	/* Check starting time */
+	startable = hellcreek_schedule_startable(hellcreek,
+						 hellcreek_port->port);
+	if (startable) {
+		hellcreek_start_schedule(hellcreek, hellcreek_port->port);
+		mutex_unlock(&hellcreek->reg_lock);
+		return;
+	}
+
+	mutex_unlock(&hellcreek->reg_lock);
+
+	/* Reschedule */
+	schedule_delayed_work(&hellcreek_port->schedule_work,
+			      HELLCREEK_SCHEDULE_PERIOD);
+}
+
+static int hellcreek_port_set_schedule(struct dsa_switch *ds, int port,
+				       struct tc_taprio_qopt_offload *taprio)
+{
+	struct hellcreek *hellcreek = ds->priv;
+	struct hellcreek_port *hellcreek_port;
+	bool startable;
+	u16 ctrl;
+
+	hellcreek_port = &hellcreek->ports[port];
+
+	dev_dbg(hellcreek->dev, "Configure traffic schedule on port %d\n",
+		port);
+
+	/* First cancel delayed work */
+	cancel_delayed_work_sync(&hellcreek_port->schedule_work);
+
+	mutex_lock(&hellcreek->reg_lock);
+
+	if (hellcreek_port->current_schedule) {
+		taprio_offload_free(hellcreek_port->current_schedule);
+		hellcreek_port->current_schedule = NULL;
+	}
+	hellcreek_port->current_schedule = taprio_offload_get(taprio);
+
+	/* Then select port */
+	hellcreek_select_tgd(hellcreek, port);
+
+	/* Enable gating and keep defaults */
+	ctrl = (0xff << TR_TGDCTRL_ADMINGATESTATES_SHIFT) | TR_TGDCTRL_GATE_EN;
+	hellcreek_write(hellcreek, ctrl, TR_TGDCTRL);
+
+	/* Cancel pending schedule */
+	hellcreek_write(hellcreek, 0x00, TR_ESTCMD);
+
+	/* Setup a new schedule */
+	hellcreek_setup_gcl(hellcreek, port, hellcreek_port->current_schedule);
+
+	/* Configure cycle time */
+	hellcreek_set_cycle_time(hellcreek, hellcreek_port->current_schedule);
+
+	/* Check starting time */
+	startable = hellcreek_schedule_startable(hellcreek, port);
+	if (startable) {
+		hellcreek_start_schedule(hellcreek, port);
+		mutex_unlock(&hellcreek->reg_lock);
+		return 0;
+	}
+
+	mutex_unlock(&hellcreek->reg_lock);
+
+	/* Schedule periodic schedule check */
+	schedule_delayed_work(&hellcreek_port->schedule_work,
+			      HELLCREEK_SCHEDULE_PERIOD);
+
+	return 0;
+}
+
+static int hellcreek_port_del_schedule(struct dsa_switch *ds, int port)
+{
+	struct hellcreek *hellcreek = ds->priv;
+	struct hellcreek_port *hellcreek_port;
+
+	hellcreek_port = &hellcreek->ports[port];
+
+	dev_dbg(hellcreek->dev, "Remove traffic schedule on port %d\n", port);
+
+	/* First cancel delayed work */
+	cancel_delayed_work_sync(&hellcreek_port->schedule_work);
+
+	mutex_lock(&hellcreek->reg_lock);
+
+	if (hellcreek_port->current_schedule) {
+		taprio_offload_free(hellcreek_port->current_schedule);
+		hellcreek_port->current_schedule = NULL;
+	}
+
+	/* Then select port */
+	hellcreek_select_tgd(hellcreek, port);
+
+	/* Disable gating and return to regular switching flow */
+	hellcreek_write(hellcreek, 0xff << TR_TGDCTRL_ADMINGATESTATES_SHIFT,
+			TR_TGDCTRL);
+
+	mutex_unlock(&hellcreek->reg_lock);
+
+	return 0;
+}
+
+static bool hellcreek_validate_schedule(struct hellcreek *hellcreek,
+					struct tc_taprio_qopt_offload *schedule)
+{
+	size_t i;
+
+	/* Does this hellcreek version support Qbv in hardware? */
+	if (!hellcreek->pdata->qbv_support)
+		return false;
+
+	/* cycle time can only be 32bit */
+	if (schedule->cycle_time > (u32)-1)
+		return false;
+
+	/* cycle time extension is not supported */
+	if (schedule->cycle_time_extension)
+		return false;
+
+	/* Only set command is supported */
+	for (i = 0; i < schedule->num_entries; ++i)
+		if (schedule->entries[i].command != TC_TAPRIO_CMD_SET_GATES)
+			return false;
+
+	return true;
+}
+
+static int hellcreek_port_setup_tc(struct dsa_switch *ds, int port,
+				   enum tc_setup_type type, void *type_data)
+{
+	struct tc_taprio_qopt_offload *taprio = type_data;
+	struct hellcreek *hellcreek = ds->priv;
+
+	if (type != TC_SETUP_QDISC_TAPRIO)
+		return -EOPNOTSUPP;
+
+	if (!hellcreek_validate_schedule(hellcreek, taprio))
+		return -EOPNOTSUPP;
+
+	if (taprio->enable)
+		return hellcreek_port_set_schedule(ds, port, taprio);
+
+	return hellcreek_port_del_schedule(ds, port);
+}
+
 static const struct dsa_switch_ops hellcreek_ds_ops = {
 	.get_ethtool_stats   = hellcreek_get_ethtool_stats,
 	.get_sset_count	     = hellcreek_get_sset_count,
@@ -1143,6 +1440,7 @@ static const struct dsa_switch_ops hellcreek_ds_ops = {
 	.port_hwtstamp_get   = hellcreek_port_hwtstamp_get,
 	.port_prechangeupper = hellcreek_port_prechangeupper,
 	.port_rxtstamp	     = hellcreek_port_rxtstamp,
+	.port_setup_tc	     = hellcreek_port_setup_tc,
 	.port_stp_state_set  = hellcreek_port_stp_state_set,
 	.port_txtstamp	     = hellcreek_port_txtstamp,
 	.port_vlan_add	     = hellcreek_vlan_add,
@@ -1197,6 +1495,9 @@ static int hellcreek_probe(struct platform_device *pdev)
 
 		port->hellcreek	= hellcreek;
 		port->port	= i;
+
+		INIT_DELAYED_WORK(&port->schedule_work,
+				  hellcreek_check_schedule);
 	}
 
 	mutex_init(&hellcreek->reg_lock);
diff --git a/drivers/net/dsa/hirschmann/hellcreek.h b/drivers/net/dsa/hirschmann/hellcreek.h
index e81781ebc31c..854639f87247 100644
--- a/drivers/net/dsa/hirschmann/hellcreek.h
+++ b/drivers/net/dsa/hirschmann/hellcreek.h
@@ -3,7 +3,7 @@
  * DSA driver for:
  * Hirschmann Hellcreek TSN switch.
  *
- * Copyright (C) 2019,2020 Linutronix GmbH
+ * Copyright (C) 2019-2021 Linutronix GmbH
  * Author Kurt Kanzenbach <kurt@linutronix.de>
  */
 
@@ -21,6 +21,7 @@
 #include <linux/ptp_clock_kernel.h>
 #include <linux/timecounter.h>
 #include <net/dsa.h>
+#include <net/pkt_sched.h>
 
 /* Ports:
  *  - 0: CPU
@@ -246,6 +247,10 @@ struct hellcreek_port {
 
 	/* Per-port timestamping resources */
 	struct hellcreek_port_hwtstamp port_hwtstamp;
+
+	/* Per-port Qbv schedule information */
+	struct tc_taprio_qopt_offload *current_schedule;
+	struct delayed_work schedule_work;
 };
 
 struct hellcreek_fdb_entry {
@@ -283,4 +288,14 @@ struct hellcreek {
 	size_t fdb_entries;
 };
 
+/* A Qbv schedule can only started up to 8 seconds in the future. If the delta
+ * between the base time and the current ptp time is larger than 8 seconds, then
+ * use periodic work to check for the schedule to be started. The delayed work
+ * cannot be armed directly to $base_time - 8 + X, because for large deltas the
+ * PTP frequency matters.
+ */
+#define HELLCREEK_SCHEDULE_PERIOD	(2 * HZ)
+#define dw_to_hellcreek_port(dw)				\
+	container_of(dw, struct hellcreek_port, schedule_work)
+
 #endif /* _HELLCREEK_H_ */
diff --git a/drivers/net/dsa/mt7530.c b/drivers/net/dsa/mt7530.c
index d2196197d920..eb13ba79dd01 100644
--- a/drivers/net/dsa/mt7530.c
+++ b/drivers/net/dsa/mt7530.c
@@ -18,6 +18,7 @@
 #include <linux/regulator/consumer.h>
 #include <linux/reset.h>
 #include <linux/gpio/consumer.h>
+#include <linux/gpio/driver.h>
 #include <net/dsa.h>
 
 #include "mt7530.h"
@@ -1622,6 +1623,109 @@ mtk_get_tag_protocol(struct dsa_switch *ds, int port,
 	}
 }
 
+static inline u32
+mt7530_gpio_to_bit(unsigned int offset)
+{
+	/* Map GPIO offset to register bit
+	 * [ 2: 0]  port 0 LED 0..2 as GPIO 0..2
+	 * [ 6: 4]  port 1 LED 0..2 as GPIO 3..5
+	 * [10: 8]  port 2 LED 0..2 as GPIO 6..8
+	 * [14:12]  port 3 LED 0..2 as GPIO 9..11
+	 * [18:16]  port 4 LED 0..2 as GPIO 12..14
+	 */
+	return BIT(offset + offset / 3);
+}
+
+static int
+mt7530_gpio_get(struct gpio_chip *gc, unsigned int offset)
+{
+	struct mt7530_priv *priv = gpiochip_get_data(gc);
+	u32 bit = mt7530_gpio_to_bit(offset);
+
+	return !!(mt7530_read(priv, MT7530_LED_GPIO_DATA) & bit);
+}
+
+static void
+mt7530_gpio_set(struct gpio_chip *gc, unsigned int offset, int value)
+{
+	struct mt7530_priv *priv = gpiochip_get_data(gc);
+	u32 bit = mt7530_gpio_to_bit(offset);
+
+	if (value)
+		mt7530_set(priv, MT7530_LED_GPIO_DATA, bit);
+	else
+		mt7530_clear(priv, MT7530_LED_GPIO_DATA, bit);
+}
+
+static int
+mt7530_gpio_get_direction(struct gpio_chip *gc, unsigned int offset)
+{
+	struct mt7530_priv *priv = gpiochip_get_data(gc);
+	u32 bit = mt7530_gpio_to_bit(offset);
+
+	return (mt7530_read(priv, MT7530_LED_GPIO_DIR) & bit) ?
+		GPIO_LINE_DIRECTION_OUT : GPIO_LINE_DIRECTION_IN;
+}
+
+static int
+mt7530_gpio_direction_input(struct gpio_chip *gc, unsigned int offset)
+{
+	struct mt7530_priv *priv = gpiochip_get_data(gc);
+	u32 bit = mt7530_gpio_to_bit(offset);
+
+	mt7530_clear(priv, MT7530_LED_GPIO_OE, bit);
+	mt7530_clear(priv, MT7530_LED_GPIO_DIR, bit);
+
+	return 0;
+}
+
+static int
+mt7530_gpio_direction_output(struct gpio_chip *gc, unsigned int offset, int value)
+{
+	struct mt7530_priv *priv = gpiochip_get_data(gc);
+	u32 bit = mt7530_gpio_to_bit(offset);
+
+	mt7530_set(priv, MT7530_LED_GPIO_DIR, bit);
+
+	if (value)
+		mt7530_set(priv, MT7530_LED_GPIO_DATA, bit);
+	else
+		mt7530_clear(priv, MT7530_LED_GPIO_DATA, bit);
+
+	mt7530_set(priv, MT7530_LED_GPIO_OE, bit);
+
+	return 0;
+}
+
+static int
+mt7530_setup_gpio(struct mt7530_priv *priv)
+{
+	struct device *dev = priv->dev;
+	struct gpio_chip *gc;
+
+	gc = devm_kzalloc(dev, sizeof(*gc), GFP_KERNEL);
+	if (!gc)
+		return -ENOMEM;
+
+	mt7530_write(priv, MT7530_LED_GPIO_OE, 0);
+	mt7530_write(priv, MT7530_LED_GPIO_DIR, 0);
+	mt7530_write(priv, MT7530_LED_IO_MODE, 0);
+
+	gc->label = "mt7530";
+	gc->parent = dev;
+	gc->owner = THIS_MODULE;
+	gc->get_direction = mt7530_gpio_get_direction;
+	gc->direction_input = mt7530_gpio_direction_input;
+	gc->direction_output = mt7530_gpio_direction_output;
+	gc->get = mt7530_gpio_get;
+	gc->set = mt7530_gpio_set;
+	gc->base = -1;
+	gc->ngpio = 15;
+	gc->can_sleep = true;
+
+	return devm_gpiochip_add_data(dev, gc, priv);
+}
+
 static int
 mt7530_setup(struct dsa_switch *ds)
 {
@@ -1763,6 +1867,12 @@ mt7530_setup(struct dsa_switch *ds)
 		}
 	}
 
+	if (of_property_read_bool(priv->dev->of_node, "gpio-controller")) {
+		ret = mt7530_setup_gpio(priv);
+		if (ret)
+			return ret;
+	}
+
 	mt7530_setup_port5(ds, interface);
 
 	/* Flush the FDB table */
diff --git a/drivers/net/dsa/mt7530.h b/drivers/net/dsa/mt7530.h
index 32d8969b3ace..64a9bb377e15 100644
--- a/drivers/net/dsa/mt7530.h
+++ b/drivers/net/dsa/mt7530.h
@@ -554,6 +554,26 @@ enum mt7531_clk_skew {
 #define  MT7531_GPIO12_RG_RXD3_MASK	GENMASK(19, 16)
 #define  MT7531_EXT_P_MDIO_12		(2 << 16)
 
+/* Registers for LED GPIO control (MT7530 only)
+ * All registers follow this pattern:
+ * [ 2: 0]  port 0
+ * [ 6: 4]  port 1
+ * [10: 8]  port 2
+ * [14:12]  port 3
+ * [18:16]  port 4
+ */
+
+/* LED enable, 0: Disable, 1: Enable (Default) */
+#define MT7530_LED_EN			0x7d00
+/* LED mode, 0: GPIO mode, 1: PHY mode (Default) */
+#define MT7530_LED_IO_MODE		0x7d04
+/* GPIO direction, 0: Input, 1: Output */
+#define MT7530_LED_GPIO_DIR		0x7d10
+/* GPIO output enable, 0: Disable, 1: Enable */
+#define MT7530_LED_GPIO_OE		0x7d14
+/* GPIO value, 0: Low, 1: High */
+#define MT7530_LED_GPIO_DATA		0x7d18
+
 #define MT7530_CREV			0x7ffc
 #define  CHIP_NAME_SHIFT		16
 #define  MT7530_ID			0x7530
diff --git a/drivers/net/dsa/mv88e6xxx/Kconfig b/drivers/net/dsa/mv88e6xxx/Kconfig
index 51185e4d7d15..b17540926c11 100644
--- a/drivers/net/dsa/mv88e6xxx/Kconfig
+++ b/drivers/net/dsa/mv88e6xxx/Kconfig
@@ -25,7 +25,6 @@ config NET_DSA_MV88E6XXX_PTP
 	default n
 	depends on NET_DSA_MV88E6XXX_GLOBAL2
 	depends on PTP_1588_CLOCK
-	imply NETWORK_PHY_TIMESTAMPING
 	help
 	  Say Y to enable PTP hardware timestamping on Marvell 88E6xxx switch
 	  chips that support it.
diff --git a/drivers/net/dsa/mv88e6xxx/chip.c b/drivers/net/dsa/mv88e6xxx/chip.c
index 2f976050a0d7..514364947944 100644
--- a/drivers/net/dsa/mv88e6xxx/chip.c
+++ b/drivers/net/dsa/mv88e6xxx/chip.c
@@ -4049,8 +4049,8 @@ static const struct mv88e6xxx_ops mv88e6250_ops = {
 	.mgmt_rsvd2cpu = mv88e6352_g2_mgmt_rsvd2cpu,
 	.pot_clear = mv88e6xxx_g2_pot_clear,
 	.reset = mv88e6250_g1_reset,
-	.vtu_getnext = mv88e6250_g1_vtu_getnext,
-	.vtu_loadpurge = mv88e6250_g1_vtu_loadpurge,
+	.vtu_getnext = mv88e6185_g1_vtu_getnext,
+	.vtu_loadpurge = mv88e6185_g1_vtu_loadpurge,
 	.avb_ops = &mv88e6352_avb_ops,
 	.ptp_ops = &mv88e6250_ptp_ops,
 	.phylink_validate = mv88e6065_phylink_validate,
diff --git a/drivers/net/dsa/mv88e6xxx/global1.h b/drivers/net/dsa/mv88e6xxx/global1.h
index 80a182c5b98a..7c396964d0b2 100644
--- a/drivers/net/dsa/mv88e6xxx/global1.h
+++ b/drivers/net/dsa/mv88e6xxx/global1.h
@@ -336,10 +336,6 @@ int mv88e6185_g1_vtu_getnext(struct mv88e6xxx_chip *chip,
 			     struct mv88e6xxx_vtu_entry *entry);
 int mv88e6185_g1_vtu_loadpurge(struct mv88e6xxx_chip *chip,
 			       struct mv88e6xxx_vtu_entry *entry);
-int mv88e6250_g1_vtu_getnext(struct mv88e6xxx_chip *chip,
-			     struct mv88e6xxx_vtu_entry *entry);
-int mv88e6250_g1_vtu_loadpurge(struct mv88e6xxx_chip *chip,
-			       struct mv88e6xxx_vtu_entry *entry);
 int mv88e6352_g1_vtu_getnext(struct mv88e6xxx_chip *chip,
 			     struct mv88e6xxx_vtu_entry *entry);
 int mv88e6352_g1_vtu_loadpurge(struct mv88e6xxx_chip *chip,
diff --git a/drivers/net/dsa/mv88e6xxx/global1_vtu.c b/drivers/net/dsa/mv88e6xxx/global1_vtu.c
index 7b96396be609..ae12c981923e 100644
--- a/drivers/net/dsa/mv88e6xxx/global1_vtu.c
+++ b/drivers/net/dsa/mv88e6xxx/global1_vtu.c
@@ -336,39 +336,6 @@ int mv88e6xxx_g1_vtu_getnext(struct mv88e6xxx_chip *chip,
 	return mv88e6xxx_g1_vtu_vid_read(chip, entry);
 }
 
-int mv88e6250_g1_vtu_getnext(struct mv88e6xxx_chip *chip,
-			     struct mv88e6xxx_vtu_entry *entry)
-{
-	u16 val;
-	int err;
-
-	err = mv88e6xxx_g1_vtu_getnext(chip, entry);
-	if (err)
-		return err;
-
-	if (entry->valid) {
-		err = mv88e6185_g1_vtu_data_read(chip, entry);
-		if (err)
-			return err;
-
-		err = mv88e6185_g1_stu_data_read(chip, entry);
-		if (err)
-			return err;
-
-		/* VTU DBNum[3:0] are located in VTU Operation 3:0
-		 * VTU DBNum[5:4] are located in VTU Operation 9:8
-		 */
-		err = mv88e6xxx_g1_read(chip, MV88E6XXX_G1_VTU_OP, &val);
-		if (err)
-			return err;
-
-		entry->fid = val & 0x000f;
-		entry->fid |= (val & 0x0300) >> 4;
-	}
-
-	return 0;
-}
-
 int mv88e6185_g1_vtu_getnext(struct mv88e6xxx_chip *chip,
 			     struct mv88e6xxx_vtu_entry *entry)
 {
@@ -389,7 +356,7 @@ int mv88e6185_g1_vtu_getnext(struct mv88e6xxx_chip *chip,
 			return err;
 
 		/* VTU DBNum[3:0] are located in VTU Operation 3:0
-		 * VTU DBNum[7:4] are located in VTU Operation 11:8
+		 * VTU DBNum[7:4] ([5:4] for 6250) are located in VTU Operation 11:8 (9:8)
 		 */
 		err = mv88e6xxx_g1_read(chip, MV88E6XXX_G1_VTU_OP, &val);
 		if (err)
@@ -397,6 +364,7 @@ int mv88e6185_g1_vtu_getnext(struct mv88e6xxx_chip *chip,
 
 		entry->fid = val & 0x000f;
 		entry->fid |= (val & 0x0f00) >> 4;
+		entry->fid &= mv88e6xxx_num_databases(chip) - 1;
 	}
 
 	return 0;
@@ -466,35 +434,6 @@ int mv88e6390_g1_vtu_getnext(struct mv88e6xxx_chip *chip,
 	return 0;
 }
 
-int mv88e6250_g1_vtu_loadpurge(struct mv88e6xxx_chip *chip,
-			       struct mv88e6xxx_vtu_entry *entry)
-{
-	u16 op = MV88E6XXX_G1_VTU_OP_VTU_LOAD_PURGE;
-	int err;
-
-	err = mv88e6xxx_g1_vtu_op_wait(chip);
-	if (err)
-		return err;
-
-	err = mv88e6xxx_g1_vtu_vid_write(chip, entry);
-	if (err)
-		return err;
-
-	if (entry->valid) {
-		err = mv88e6185_g1_vtu_data_write(chip, entry);
-		if (err)
-			return err;
-
-		/* VTU DBNum[3:0] are located in VTU Operation 3:0
-		 * VTU DBNum[5:4] are located in VTU Operation 9:8
-		 */
-		op |= entry->fid & 0x000f;
-		op |= (entry->fid & 0x0030) << 4;
-	}
-
-	return mv88e6xxx_g1_vtu_op(chip, op);
-}
-
 int mv88e6185_g1_vtu_loadpurge(struct mv88e6xxx_chip *chip,
 			       struct mv88e6xxx_vtu_entry *entry)
 {
@@ -516,6 +455,10 @@ int mv88e6185_g1_vtu_loadpurge(struct mv88e6xxx_chip *chip,
 
 		/* VTU DBNum[3:0] are located in VTU Operation 3:0
 		 * VTU DBNum[7:4] are located in VTU Operation 11:8
+		 *
+		 * For the 6250/6220, the latter are really [5:4] and
+		 * 9:8, but in those cases bits 7:6 of entry->fid are
+		 * 0 since they have num_databases = 64.
 		 */
 		op |= entry->fid & 0x000f;
 		op |= (entry->fid & 0x00f0) << 4;
diff --git a/drivers/net/ethernet/Kconfig b/drivers/net/ethernet/Kconfig
index de50e8b9e656..ad04660b97b8 100644
--- a/drivers/net/ethernet/Kconfig
+++ b/drivers/net/ethernet/Kconfig
@@ -33,7 +33,6 @@ source "drivers/net/ethernet/apple/Kconfig"
 source "drivers/net/ethernet/aquantia/Kconfig"
 source "drivers/net/ethernet/arc/Kconfig"
 source "drivers/net/ethernet/atheros/Kconfig"
-source "drivers/net/ethernet/aurora/Kconfig"
 source "drivers/net/ethernet/broadcom/Kconfig"
 source "drivers/net/ethernet/brocade/Kconfig"
 source "drivers/net/ethernet/cadence/Kconfig"
diff --git a/drivers/net/ethernet/Makefile b/drivers/net/ethernet/Makefile
index f8f38dcb5f8a..1e7dc8a7762d 100644
--- a/drivers/net/ethernet/Makefile
+++ b/drivers/net/ethernet/Makefile
@@ -19,7 +19,6 @@ obj-$(CONFIG_NET_VENDOR_APPLE) += apple/
 obj-$(CONFIG_NET_VENDOR_AQUANTIA) += aquantia/
 obj-$(CONFIG_NET_VENDOR_ARC) += arc/
 obj-$(CONFIG_NET_VENDOR_ATHEROS) += atheros/
-obj-$(CONFIG_NET_VENDOR_AURORA) += aurora/
 obj-$(CONFIG_NET_VENDOR_CADENCE) += cadence/
 obj-$(CONFIG_NET_VENDOR_BROADCOM) += broadcom/
 obj-$(CONFIG_NET_VENDOR_BROCADE) += brocade/
diff --git a/drivers/net/ethernet/aurora/Kconfig b/drivers/net/ethernet/aurora/Kconfig
deleted file mode 100644
index 9ee30ea90bfa..000000000000
--- a/drivers/net/ethernet/aurora/Kconfig
+++ /dev/null
@@ -1,23 +0,0 @@
-# SPDX-License-Identifier: GPL-2.0-only
-config NET_VENDOR_AURORA
-	bool "Aurora VLSI devices"
-	default y
-	help
-	  If you have a network (Ethernet) device belonging to this class,
-	  say Y.
-
-	  Note that the answer to this question doesn't directly affect the
-	  kernel: saying N will just cause the configurator to skip all
-	  questions about Aurora devices. If you say Y, you will be asked
-	  for your specific device in the following questions.
-
-if NET_VENDOR_AURORA
-
-config AURORA_NB8800
-	tristate "Aurora AU-NB8800 support"
-	depends on HAS_DMA
-	select PHYLIB
-	help
-	 Support for the AU-NB8800 gigabit Ethernet controller.
-
-endif
diff --git a/drivers/net/ethernet/aurora/Makefile b/drivers/net/ethernet/aurora/Makefile
deleted file mode 100644
index f3d599867619..000000000000
--- a/drivers/net/ethernet/aurora/Makefile
+++ /dev/null
@@ -1,2 +0,0 @@
-# SPDX-License-Identifier: GPL-2.0-only
-obj-$(CONFIG_AURORA_NB8800) += nb8800.o
diff --git a/drivers/net/ethernet/aurora/nb8800.c b/drivers/net/ethernet/aurora/nb8800.c
deleted file mode 100644
index 5b20185cbd62..000000000000
--- a/drivers/net/ethernet/aurora/nb8800.c
+++ /dev/null
@@ -1,1520 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-/*
- * Copyright (C) 2015 Mans Rullgard <mans@mansr.com>
- *
- * Mostly rewritten, based on driver from Sigma Designs.  Original
- * copyright notice below.
- *
- * Driver for tangox SMP864x/SMP865x/SMP867x/SMP868x builtin Ethernet Mac.
- *
- * Copyright (C) 2005 Maxime Bizon <mbizon@freebox.fr>
- */
-
-#include <linux/module.h>
-#include <linux/etherdevice.h>
-#include <linux/delay.h>
-#include <linux/ethtool.h>
-#include <linux/interrupt.h>
-#include <linux/platform_device.h>
-#include <linux/of_device.h>
-#include <linux/of_mdio.h>
-#include <linux/of_net.h>
-#include <linux/dma-mapping.h>
-#include <linux/phy.h>
-#include <linux/cache.h>
-#include <linux/jiffies.h>
-#include <linux/io.h>
-#include <linux/iopoll.h>
-#include <asm/barrier.h>
-
-#include "nb8800.h"
-
-static void nb8800_tx_done(struct net_device *dev);
-static int nb8800_dma_stop(struct net_device *dev);
-
-static inline u8 nb8800_readb(struct nb8800_priv *priv, int reg)
-{
-	return readb_relaxed(priv->base + reg);
-}
-
-static inline u32 nb8800_readl(struct nb8800_priv *priv, int reg)
-{
-	return readl_relaxed(priv->base + reg);
-}
-
-static inline void nb8800_writeb(struct nb8800_priv *priv, int reg, u8 val)
-{
-	writeb_relaxed(val, priv->base + reg);
-}
-
-static inline void nb8800_writew(struct nb8800_priv *priv, int reg, u16 val)
-{
-	writew_relaxed(val, priv->base + reg);
-}
-
-static inline void nb8800_writel(struct nb8800_priv *priv, int reg, u32 val)
-{
-	writel_relaxed(val, priv->base + reg);
-}
-
-static inline void nb8800_maskb(struct nb8800_priv *priv, int reg,
-				u32 mask, u32 val)
-{
-	u32 old = nb8800_readb(priv, reg);
-	u32 new = (old & ~mask) | (val & mask);
-
-	if (new != old)
-		nb8800_writeb(priv, reg, new);
-}
-
-static inline void nb8800_maskl(struct nb8800_priv *priv, int reg,
-				u32 mask, u32 val)
-{
-	u32 old = nb8800_readl(priv, reg);
-	u32 new = (old & ~mask) | (val & mask);
-
-	if (new != old)
-		nb8800_writel(priv, reg, new);
-}
-
-static inline void nb8800_modb(struct nb8800_priv *priv, int reg, u8 bits,
-			       bool set)
-{
-	nb8800_maskb(priv, reg, bits, set ? bits : 0);
-}
-
-static inline void nb8800_setb(struct nb8800_priv *priv, int reg, u8 bits)
-{
-	nb8800_maskb(priv, reg, bits, bits);
-}
-
-static inline void nb8800_clearb(struct nb8800_priv *priv, int reg, u8 bits)
-{
-	nb8800_maskb(priv, reg, bits, 0);
-}
-
-static inline void nb8800_modl(struct nb8800_priv *priv, int reg, u32 bits,
-			       bool set)
-{
-	nb8800_maskl(priv, reg, bits, set ? bits : 0);
-}
-
-static inline void nb8800_setl(struct nb8800_priv *priv, int reg, u32 bits)
-{
-	nb8800_maskl(priv, reg, bits, bits);
-}
-
-static inline void nb8800_clearl(struct nb8800_priv *priv, int reg, u32 bits)
-{
-	nb8800_maskl(priv, reg, bits, 0);
-}
-
-static int nb8800_mdio_wait(struct mii_bus *bus)
-{
-	struct nb8800_priv *priv = bus->priv;
-	u32 val;
-
-	return readl_poll_timeout_atomic(priv->base + NB8800_MDIO_CMD,
-					 val, !(val & MDIO_CMD_GO), 1, 1000);
-}
-
-static int nb8800_mdio_cmd(struct mii_bus *bus, u32 cmd)
-{
-	struct nb8800_priv *priv = bus->priv;
-	int err;
-
-	err = nb8800_mdio_wait(bus);
-	if (err)
-		return err;
-
-	nb8800_writel(priv, NB8800_MDIO_CMD, cmd);
-	udelay(10);
-	nb8800_writel(priv, NB8800_MDIO_CMD, cmd | MDIO_CMD_GO);
-
-	return nb8800_mdio_wait(bus);
-}
-
-static int nb8800_mdio_read(struct mii_bus *bus, int phy_id, int reg)
-{
-	struct nb8800_priv *priv = bus->priv;
-	u32 val;
-	int err;
-
-	err = nb8800_mdio_cmd(bus, MDIO_CMD_ADDR(phy_id) | MDIO_CMD_REG(reg));
-	if (err)
-		return err;
-
-	val = nb8800_readl(priv, NB8800_MDIO_STS);
-	if (val & MDIO_STS_ERR)
-		return 0xffff;
-
-	return val & 0xffff;
-}
-
-static int nb8800_mdio_write(struct mii_bus *bus, int phy_id, int reg, u16 val)
-{
-	u32 cmd = MDIO_CMD_ADDR(phy_id) | MDIO_CMD_REG(reg) |
-		MDIO_CMD_DATA(val) | MDIO_CMD_WR;
-
-	return nb8800_mdio_cmd(bus, cmd);
-}
-
-static void nb8800_mac_tx(struct net_device *dev, bool enable)
-{
-	struct nb8800_priv *priv = netdev_priv(dev);
-
-	while (nb8800_readl(priv, NB8800_TXC_CR) & TCR_EN)
-		cpu_relax();
-
-	nb8800_modb(priv, NB8800_TX_CTL1, TX_EN, enable);
-}
-
-static void nb8800_mac_rx(struct net_device *dev, bool enable)
-{
-	nb8800_modb(netdev_priv(dev), NB8800_RX_CTL, RX_EN, enable);
-}
-
-static void nb8800_mac_af(struct net_device *dev, bool enable)
-{
-	nb8800_modb(netdev_priv(dev), NB8800_RX_CTL, RX_AF_EN, enable);
-}
-
-static void nb8800_start_rx(struct net_device *dev)
-{
-	nb8800_setl(netdev_priv(dev), NB8800_RXC_CR, RCR_EN);
-}
-
-static int nb8800_alloc_rx(struct net_device *dev, unsigned int i, bool napi)
-{
-	struct nb8800_priv *priv = netdev_priv(dev);
-	struct nb8800_rx_desc *rxd = &priv->rx_descs[i];
-	struct nb8800_rx_buf *rxb = &priv->rx_bufs[i];
-	int size = L1_CACHE_ALIGN(RX_BUF_SIZE);
-	dma_addr_t dma_addr;
-	struct page *page;
-	unsigned long offset;
-	void *data;
-
-	data = napi ? napi_alloc_frag(size) : netdev_alloc_frag(size);
-	if (!data)
-		return -ENOMEM;
-
-	page = virt_to_head_page(data);
-	offset = data - page_address(page);
-
-	dma_addr = dma_map_page(&dev->dev, page, offset, RX_BUF_SIZE,
-				DMA_FROM_DEVICE);
-
-	if (dma_mapping_error(&dev->dev, dma_addr)) {
-		skb_free_frag(data);
-		return -ENOMEM;
-	}
-
-	rxb->page = page;
-	rxb->offset = offset;
-	rxd->desc.s_addr = dma_addr;
-
-	return 0;
-}
-
-static void nb8800_receive(struct net_device *dev, unsigned int i,
-			   unsigned int len)
-{
-	struct nb8800_priv *priv = netdev_priv(dev);
-	struct nb8800_rx_desc *rxd = &priv->rx_descs[i];
-	struct page *page = priv->rx_bufs[i].page;
-	int offset = priv->rx_bufs[i].offset;
-	void *data = page_address(page) + offset;
-	dma_addr_t dma = rxd->desc.s_addr;
-	struct sk_buff *skb;
-	unsigned int size;
-	int err;
-
-	size = len <= RX_COPYBREAK ? len : RX_COPYHDR;
-
-	skb = napi_alloc_skb(&priv->napi, size);
-	if (!skb) {
-		netdev_err(dev, "rx skb allocation failed\n");
-		dev->stats.rx_dropped++;
-		return;
-	}
-
-	if (len <= RX_COPYBREAK) {
-		dma_sync_single_for_cpu(&dev->dev, dma, len, DMA_FROM_DEVICE);
-		skb_put_data(skb, data, len);
-		dma_sync_single_for_device(&dev->dev, dma, len,
-					   DMA_FROM_DEVICE);
-	} else {
-		err = nb8800_alloc_rx(dev, i, true);
-		if (err) {
-			netdev_err(dev, "rx buffer allocation failed\n");
-			dev->stats.rx_dropped++;
-			dev_kfree_skb(skb);
-			return;
-		}
-
-		dma_unmap_page(&dev->dev, dma, RX_BUF_SIZE, DMA_FROM_DEVICE);
-		skb_put_data(skb, data, RX_COPYHDR);
-		skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, page,
-				offset + RX_COPYHDR, len - RX_COPYHDR,
-				RX_BUF_SIZE);
-	}
-
-	skb->protocol = eth_type_trans(skb, dev);
-	napi_gro_receive(&priv->napi, skb);
-}
-
-static void nb8800_rx_error(struct net_device *dev, u32 report)
-{
-	if (report & RX_LENGTH_ERR)
-		dev->stats.rx_length_errors++;
-
-	if (report & RX_FCS_ERR)
-		dev->stats.rx_crc_errors++;
-
-	if (report & RX_FIFO_OVERRUN)
-		dev->stats.rx_fifo_errors++;
-
-	if (report & RX_ALIGNMENT_ERROR)
-		dev->stats.rx_frame_errors++;
-
-	dev->stats.rx_errors++;
-}
-
-static int nb8800_poll(struct napi_struct *napi, int budget)
-{
-	struct net_device *dev = napi->dev;
-	struct nb8800_priv *priv = netdev_priv(dev);
-	struct nb8800_rx_desc *rxd;
-	unsigned int last = priv->rx_eoc;
-	unsigned int next;
-	int work = 0;
-
-	nb8800_tx_done(dev);
-
-again:
-	do {
-		unsigned int len;
-
-		next = (last + 1) % RX_DESC_COUNT;
-
-		rxd = &priv->rx_descs[next];
-
-		if (!rxd->report)
-			break;
-
-		len = RX_BYTES_TRANSFERRED(rxd->report);
-
-		if (IS_RX_ERROR(rxd->report))
-			nb8800_rx_error(dev, rxd->report);
-		else
-			nb8800_receive(dev, next, len);
-
-		dev->stats.rx_packets++;
-		dev->stats.rx_bytes += len;
-
-		if (rxd->report & RX_MULTICAST_PKT)
-			dev->stats.multicast++;
-
-		rxd->report = 0;
-		last = next;
-		work++;
-	} while (work < budget);
-
-	if (work) {
-		priv->rx_descs[last].desc.config |= DESC_EOC;
-		wmb();	/* ensure new EOC is written before clearing old */
-		priv->rx_descs[priv->rx_eoc].desc.config &= ~DESC_EOC;
-		priv->rx_eoc = last;
-		nb8800_start_rx(dev);
-	}
-
-	if (work < budget) {
-		nb8800_writel(priv, NB8800_RX_ITR, priv->rx_itr_irq);
-
-		/* If a packet arrived after we last checked but
-		 * before writing RX_ITR, the interrupt will be
-		 * delayed, so we retrieve it now.
-		 */
-		if (priv->rx_descs[next].report)
-			goto again;
-
-		napi_complete_done(napi, work);
-	}
-
-	return work;
-}
-
-static void __nb8800_tx_dma_start(struct net_device *dev)
-{
-	struct nb8800_priv *priv = netdev_priv(dev);
-	struct nb8800_tx_buf *txb;
-	u32 txc_cr;
-
-	txb = &priv->tx_bufs[priv->tx_queue];
-	if (!txb->ready)
-		return;
-
-	txc_cr = nb8800_readl(priv, NB8800_TXC_CR);
-	if (txc_cr & TCR_EN)
-		return;
-
-	nb8800_writel(priv, NB8800_TX_DESC_ADDR, txb->dma_desc);
-	wmb();		/* ensure desc addr is written before starting DMA */
-	nb8800_writel(priv, NB8800_TXC_CR, txc_cr | TCR_EN);
-
-	priv->tx_queue = (priv->tx_queue + txb->chain_len) % TX_DESC_COUNT;
-}
-
-static void nb8800_tx_dma_start(struct net_device *dev)
-{
-	struct nb8800_priv *priv = netdev_priv(dev);
-
-	spin_lock_irq(&priv->tx_lock);
-	__nb8800_tx_dma_start(dev);
-	spin_unlock_irq(&priv->tx_lock);
-}
-
-static void nb8800_tx_dma_start_irq(struct net_device *dev)
-{
-	struct nb8800_priv *priv = netdev_priv(dev);
-
-	spin_lock(&priv->tx_lock);
-	__nb8800_tx_dma_start(dev);
-	spin_unlock(&priv->tx_lock);
-}
-
-static netdev_tx_t nb8800_xmit(struct sk_buff *skb, struct net_device *dev)
-{
-	struct nb8800_priv *priv = netdev_priv(dev);
-	struct nb8800_tx_desc *txd;
-	struct nb8800_tx_buf *txb;
-	struct nb8800_dma_desc *desc;
-	dma_addr_t dma_addr;
-	unsigned int dma_len;
-	unsigned int align;
-	unsigned int next;
-	bool xmit_more;
-
-	if (atomic_read(&priv->tx_free) <= NB8800_DESC_LOW) {
-		netif_stop_queue(dev);
-		return NETDEV_TX_BUSY;
-	}
-
-	align = (8 - (uintptr_t)skb->data) & 7;
-
-	dma_len = skb->len - align;
-	dma_addr = dma_map_single(&dev->dev, skb->data + align,
-				  dma_len, DMA_TO_DEVICE);
-
-	if (dma_mapping_error(&dev->dev, dma_addr)) {
-		netdev_err(dev, "tx dma mapping error\n");
-		kfree_skb(skb);
-		dev->stats.tx_dropped++;
-		return NETDEV_TX_OK;
-	}
-
-	xmit_more = netdev_xmit_more();
-	if (atomic_dec_return(&priv->tx_free) <= NB8800_DESC_LOW) {
-		netif_stop_queue(dev);
-		xmit_more = false;
-	}
-
-	next = priv->tx_next;
-	txb = &priv->tx_bufs[next];
-	txd = &priv->tx_descs[next];
-	desc = &txd->desc[0];
-
-	next = (next + 1) % TX_DESC_COUNT;
-
-	if (align) {
-		memcpy(txd->buf, skb->data, align);
-
-		desc->s_addr =
-			txb->dma_desc + offsetof(struct nb8800_tx_desc, buf);
-		desc->n_addr = txb->dma_desc + sizeof(txd->desc[0]);
-		desc->config = DESC_BTS(2) | DESC_DS | align;
-
-		desc++;
-	}
-
-	desc->s_addr = dma_addr;
-	desc->n_addr = priv->tx_bufs[next].dma_desc;
-	desc->config = DESC_BTS(2) | DESC_DS | DESC_EOF | dma_len;
-
-	if (!xmit_more)
-		desc->config |= DESC_EOC;
-
-	txb->skb = skb;
-	txb->dma_addr = dma_addr;
-	txb->dma_len = dma_len;
-
-	if (!priv->tx_chain) {
-		txb->chain_len = 1;
-		priv->tx_chain = txb;
-	} else {
-		priv->tx_chain->chain_len++;
-	}
-
-	netdev_sent_queue(dev, skb->len);
-
-	priv->tx_next = next;
-
-	if (!xmit_more) {
-		smp_wmb();
-		priv->tx_chain->ready = true;
-		priv->tx_chain = NULL;
-		nb8800_tx_dma_start(dev);
-	}
-
-	return NETDEV_TX_OK;
-}
-
-static void nb8800_tx_error(struct net_device *dev, u32 report)
-{
-	if (report & TX_LATE_COLLISION)
-		dev->stats.collisions++;
-
-	if (report & TX_PACKET_DROPPED)
-		dev->stats.tx_dropped++;
-
-	if (report & TX_FIFO_UNDERRUN)
-		dev->stats.tx_fifo_errors++;
-
-	dev->stats.tx_errors++;
-}
-
-static void nb8800_tx_done(struct net_device *dev)
-{
-	struct nb8800_priv *priv = netdev_priv(dev);
-	unsigned int limit = priv->tx_next;
-	unsigned int done = priv->tx_done;
-	unsigned int packets = 0;
-	unsigned int len = 0;
-
-	while (done != limit) {
-		struct nb8800_tx_desc *txd = &priv->tx_descs[done];
-		struct nb8800_tx_buf *txb = &priv->tx_bufs[done];
-		struct sk_buff *skb;
-
-		if (!txd->report)
-			break;
-
-		skb = txb->skb;
-		len += skb->len;
-
-		dma_unmap_single(&dev->dev, txb->dma_addr, txb->dma_len,
-				 DMA_TO_DEVICE);
-
-		if (IS_TX_ERROR(txd->report)) {
-			nb8800_tx_error(dev, txd->report);
-			kfree_skb(skb);
-		} else {
-			consume_skb(skb);
-		}
-
-		dev->stats.tx_packets++;
-		dev->stats.tx_bytes += TX_BYTES_TRANSFERRED(txd->report);
-		dev->stats.collisions += TX_EARLY_COLLISIONS(txd->report);
-
-		txb->skb = NULL;
-		txb->ready = false;
-		txd->report = 0;
-
-		done = (done + 1) % TX_DESC_COUNT;
-		packets++;
-	}
-
-	if (packets) {
-		smp_mb__before_atomic();
-		atomic_add(packets, &priv->tx_free);
-		netdev_completed_queue(dev, packets, len);
-		netif_wake_queue(dev);
-		priv->tx_done = done;
-	}
-}
-
-static irqreturn_t nb8800_irq(int irq, void *dev_id)
-{
-	struct net_device *dev = dev_id;
-	struct nb8800_priv *priv = netdev_priv(dev);
-	irqreturn_t ret = IRQ_NONE;
-	u32 val;
-
-	/* tx interrupt */
-	val = nb8800_readl(priv, NB8800_TXC_SR);
-	if (val) {
-		nb8800_writel(priv, NB8800_TXC_SR, val);
-
-		if (val & TSR_DI)
-			nb8800_tx_dma_start_irq(dev);
-
-		if (val & TSR_TI)
-			napi_schedule_irqoff(&priv->napi);
-
-		if (unlikely(val & TSR_DE))
-			netdev_err(dev, "TX DMA error\n");
-
-		/* should never happen with automatic status retrieval */
-		if (unlikely(val & TSR_TO))
-			netdev_err(dev, "TX Status FIFO overflow\n");
-
-		ret = IRQ_HANDLED;
-	}
-
-	/* rx interrupt */
-	val = nb8800_readl(priv, NB8800_RXC_SR);
-	if (val) {
-		nb8800_writel(priv, NB8800_RXC_SR, val);
-
-		if (likely(val & (RSR_RI | RSR_DI))) {
-			nb8800_writel(priv, NB8800_RX_ITR, priv->rx_itr_poll);
-			napi_schedule_irqoff(&priv->napi);
-		}
-
-		if (unlikely(val & RSR_DE))
-			netdev_err(dev, "RX DMA error\n");
-
-		/* should never happen with automatic status retrieval */
-		if (unlikely(val & RSR_RO))
-			netdev_err(dev, "RX Status FIFO overflow\n");
-
-		ret = IRQ_HANDLED;
-	}
-
-	return ret;
-}
-
-static void nb8800_mac_config(struct net_device *dev)
-{
-	struct nb8800_priv *priv = netdev_priv(dev);
-	bool gigabit = priv->speed == SPEED_1000;
-	u32 mac_mode_mask = RGMII_MODE | HALF_DUPLEX | GMAC_MODE;
-	u32 mac_mode = 0;
-	u32 slot_time;
-	u32 phy_clk;
-	u32 ict;
-
-	if (!priv->duplex)
-		mac_mode |= HALF_DUPLEX;
-
-	if (gigabit) {
-		if (phy_interface_is_rgmii(dev->phydev))
-			mac_mode |= RGMII_MODE;
-
-		mac_mode |= GMAC_MODE;
-		phy_clk = 125000000;
-
-		/* Should be 512 but register is only 8 bits */
-		slot_time = 255;
-	} else {
-		phy_clk = 25000000;
-		slot_time = 128;
-	}
-
-	ict = DIV_ROUND_UP(phy_clk, clk_get_rate(priv->clk));
-
-	nb8800_writeb(priv, NB8800_IC_THRESHOLD, ict);
-	nb8800_writeb(priv, NB8800_SLOT_TIME, slot_time);
-	nb8800_maskb(priv, NB8800_MAC_MODE, mac_mode_mask, mac_mode);
-}
-
-static void nb8800_pause_config(struct net_device *dev)
-{
-	struct nb8800_priv *priv = netdev_priv(dev);
-	struct phy_device *phydev = dev->phydev;
-	u32 rxcr;
-
-	if (priv->pause_aneg) {
-		if (!phydev || !phydev->link)
-			return;
-
-		priv->pause_rx = phydev->pause;
-		priv->pause_tx = phydev->pause ^ phydev->asym_pause;
-	}
-
-	nb8800_modb(priv, NB8800_RX_CTL, RX_PAUSE_EN, priv->pause_rx);
-
-	rxcr = nb8800_readl(priv, NB8800_RXC_CR);
-	if (!!(rxcr & RCR_FL) == priv->pause_tx)
-		return;
-
-	if (netif_running(dev)) {
-		napi_disable(&priv->napi);
-		netif_tx_lock_bh(dev);
-		nb8800_dma_stop(dev);
-		nb8800_modl(priv, NB8800_RXC_CR, RCR_FL, priv->pause_tx);
-		nb8800_start_rx(dev);
-		netif_tx_unlock_bh(dev);
-		napi_enable(&priv->napi);
-	} else {
-		nb8800_modl(priv, NB8800_RXC_CR, RCR_FL, priv->pause_tx);
-	}
-}
-
-static void nb8800_link_reconfigure(struct net_device *dev)
-{
-	struct nb8800_priv *priv = netdev_priv(dev);
-	struct phy_device *phydev = dev->phydev;
-	int change = 0;
-
-	if (phydev->link) {
-		if (phydev->speed != priv->speed) {
-			priv->speed = phydev->speed;
-			change = 1;
-		}
-
-		if (phydev->duplex != priv->duplex) {
-			priv->duplex = phydev->duplex;
-			change = 1;
-		}
-
-		if (change)
-			nb8800_mac_config(dev);
-
-		nb8800_pause_config(dev);
-	}
-
-	if (phydev->link != priv->link) {
-		priv->link = phydev->link;
-		change = 1;
-	}
-
-	if (change)
-		phy_print_status(phydev);
-}
-
-static void nb8800_update_mac_addr(struct net_device *dev)
-{
-	struct nb8800_priv *priv = netdev_priv(dev);
-	int i;
-
-	for (i = 0; i < ETH_ALEN; i++)
-		nb8800_writeb(priv, NB8800_SRC_ADDR(i), dev->dev_addr[i]);
-
-	for (i = 0; i < ETH_ALEN; i++)
-		nb8800_writeb(priv, NB8800_UC_ADDR(i), dev->dev_addr[i]);
-}
-
-static int nb8800_set_mac_address(struct net_device *dev, void *addr)
-{
-	struct sockaddr *sock = addr;
-
-	if (netif_running(dev))
-		return -EBUSY;
-
-	ether_addr_copy(dev->dev_addr, sock->sa_data);
-	nb8800_update_mac_addr(dev);
-
-	return 0;
-}
-
-static void nb8800_mc_init(struct net_device *dev, int val)
-{
-	struct nb8800_priv *priv = netdev_priv(dev);
-
-	nb8800_writeb(priv, NB8800_MC_INIT, val);
-	readb_poll_timeout_atomic(priv->base + NB8800_MC_INIT, val, !val,
-				  1, 1000);
-}
-
-static void nb8800_set_rx_mode(struct net_device *dev)
-{
-	struct nb8800_priv *priv = netdev_priv(dev);
-	struct netdev_hw_addr *ha;
-	int i;
-
-	if (dev->flags & (IFF_PROMISC | IFF_ALLMULTI)) {
-		nb8800_mac_af(dev, false);
-		return;
-	}
-
-	nb8800_mac_af(dev, true);
-	nb8800_mc_init(dev, 0);
-
-	netdev_for_each_mc_addr(ha, dev) {
-		for (i = 0; i < ETH_ALEN; i++)
-			nb8800_writeb(priv, NB8800_MC_ADDR(i), ha->addr[i]);
-
-		nb8800_mc_init(dev, 0xff);
-	}
-}
-
-#define RX_DESC_SIZE (RX_DESC_COUNT * sizeof(struct nb8800_rx_desc))
-#define TX_DESC_SIZE (TX_DESC_COUNT * sizeof(struct nb8800_tx_desc))
-
-static void nb8800_dma_free(struct net_device *dev)
-{
-	struct nb8800_priv *priv = netdev_priv(dev);
-	unsigned int i;
-
-	if (priv->rx_bufs) {
-		for (i = 0; i < RX_DESC_COUNT; i++)
-			if (priv->rx_bufs[i].page)
-				put_page(priv->rx_bufs[i].page);
-
-		kfree(priv->rx_bufs);
-		priv->rx_bufs = NULL;
-	}
-
-	if (priv->tx_bufs) {
-		for (i = 0; i < TX_DESC_COUNT; i++)
-			kfree_skb(priv->tx_bufs[i].skb);
-
-		kfree(priv->tx_bufs);
-		priv->tx_bufs = NULL;
-	}
-
-	if (priv->rx_descs) {
-		dma_free_coherent(dev->dev.parent, RX_DESC_SIZE, priv->rx_descs,
-				  priv->rx_desc_dma);
-		priv->rx_descs = NULL;
-	}
-
-	if (priv->tx_descs) {
-		dma_free_coherent(dev->dev.parent, TX_DESC_SIZE, priv->tx_descs,
-				  priv->tx_desc_dma);
-		priv->tx_descs = NULL;
-	}
-}
-
-static void nb8800_dma_reset(struct net_device *dev)
-{
-	struct nb8800_priv *priv = netdev_priv(dev);
-	struct nb8800_rx_desc *rxd;
-	struct nb8800_tx_desc *txd;
-	unsigned int i;
-
-	for (i = 0; i < RX_DESC_COUNT; i++) {
-		dma_addr_t rx_dma = priv->rx_desc_dma + i * sizeof(*rxd);
-
-		rxd = &priv->rx_descs[i];
-		rxd->desc.n_addr = rx_dma + sizeof(*rxd);
-		rxd->desc.r_addr =
-			rx_dma + offsetof(struct nb8800_rx_desc, report);
-		rxd->desc.config = priv->rx_dma_config;
-		rxd->report = 0;
-	}
-
-	rxd->desc.n_addr = priv->rx_desc_dma;
-	rxd->desc.config |= DESC_EOC;
-
-	priv->rx_eoc = RX_DESC_COUNT - 1;
-
-	for (i = 0; i < TX_DESC_COUNT; i++) {
-		struct nb8800_tx_buf *txb = &priv->tx_bufs[i];
-		dma_addr_t r_dma = txb->dma_desc +
-			offsetof(struct nb8800_tx_desc, report);
-
-		txd = &priv->tx_descs[i];
-		txd->desc[0].r_addr = r_dma;
-		txd->desc[1].r_addr = r_dma;
-		txd->report = 0;
-	}
-
-	priv->tx_next = 0;
-	priv->tx_queue = 0;
-	priv->tx_done = 0;
-	atomic_set(&priv->tx_free, TX_DESC_COUNT);
-
-	nb8800_writel(priv, NB8800_RX_DESC_ADDR, priv->rx_desc_dma);
-
-	wmb();		/* ensure all setup is written before starting */
-}
-
-static int nb8800_dma_init(struct net_device *dev)
-{
-	struct nb8800_priv *priv = netdev_priv(dev);
-	unsigned int n_rx = RX_DESC_COUNT;
-	unsigned int n_tx = TX_DESC_COUNT;
-	unsigned int i;
-	int err;
-
-	priv->rx_descs = dma_alloc_coherent(dev->dev.parent, RX_DESC_SIZE,
-					    &priv->rx_desc_dma, GFP_KERNEL);
-	if (!priv->rx_descs)
-		goto err_out;
-
-	priv->rx_bufs = kcalloc(n_rx, sizeof(*priv->rx_bufs), GFP_KERNEL);
-	if (!priv->rx_bufs)
-		goto err_out;
-
-	for (i = 0; i < n_rx; i++) {
-		err = nb8800_alloc_rx(dev, i, false);
-		if (err)
-			goto err_out;
-	}
-
-	priv->tx_descs = dma_alloc_coherent(dev->dev.parent, TX_DESC_SIZE,
-					    &priv->tx_desc_dma, GFP_KERNEL);
-	if (!priv->tx_descs)
-		goto err_out;
-
-	priv->tx_bufs = kcalloc(n_tx, sizeof(*priv->tx_bufs), GFP_KERNEL);
-	if (!priv->tx_bufs)
-		goto err_out;
-
-	for (i = 0; i < n_tx; i++)
-		priv->tx_bufs[i].dma_desc =
-			priv->tx_desc_dma + i * sizeof(struct nb8800_tx_desc);
-
-	nb8800_dma_reset(dev);
-
-	return 0;
-
-err_out:
-	nb8800_dma_free(dev);
-
-	return -ENOMEM;
-}
-
-static int nb8800_dma_stop(struct net_device *dev)
-{
-	struct nb8800_priv *priv = netdev_priv(dev);
-	struct nb8800_tx_buf *txb = &priv->tx_bufs[0];
-	struct nb8800_tx_desc *txd = &priv->tx_descs[0];
-	int retry = 5;
-	u32 txcr;
-	u32 rxcr;
-	int err;
-	unsigned int i;
-
-	/* wait for tx to finish */
-	err = readl_poll_timeout_atomic(priv->base + NB8800_TXC_CR, txcr,
-					!(txcr & TCR_EN) &&
-					priv->tx_done == priv->tx_next,
-					1000, 1000000);
-	if (err)
-		return err;
-
-	/* The rx DMA only stops if it reaches the end of chain.
-	 * To make this happen, we set the EOC flag on all rx
-	 * descriptors, put the device in loopback mode, and send
-	 * a few dummy frames.  The interrupt handler will ignore
-	 * these since NAPI is disabled and no real frames are in
-	 * the tx queue.
-	 */
-
-	for (i = 0; i < RX_DESC_COUNT; i++)
-		priv->rx_descs[i].desc.config |= DESC_EOC;
-
-	txd->desc[0].s_addr =
-		txb->dma_desc + offsetof(struct nb8800_tx_desc, buf);
-	txd->desc[0].config = DESC_BTS(2) | DESC_DS | DESC_EOF | DESC_EOC | 8;
-	memset(txd->buf, 0, sizeof(txd->buf));
-
-	nb8800_mac_af(dev, false);
-	nb8800_setb(priv, NB8800_MAC_MODE, LOOPBACK_EN);
-
-	do {
-		nb8800_writel(priv, NB8800_TX_DESC_ADDR, txb->dma_desc);
-		wmb();
-		nb8800_writel(priv, NB8800_TXC_CR, txcr | TCR_EN);
-
-		err = readl_poll_timeout_atomic(priv->base + NB8800_RXC_CR,
-						rxcr, !(rxcr & RCR_EN),
-						1000, 100000);
-	} while (err && --retry);
-
-	nb8800_mac_af(dev, true);
-	nb8800_clearb(priv, NB8800_MAC_MODE, LOOPBACK_EN);
-	nb8800_dma_reset(dev);
-
-	return retry ? 0 : -ETIMEDOUT;
-}
-
-static void nb8800_pause_adv(struct net_device *dev)
-{
-	struct nb8800_priv *priv = netdev_priv(dev);
-	struct phy_device *phydev = dev->phydev;
-
-	if (!phydev)
-		return;
-
-	phy_set_asym_pause(phydev, priv->pause_rx, priv->pause_tx);
-}
-
-static int nb8800_open(struct net_device *dev)
-{
-	struct nb8800_priv *priv = netdev_priv(dev);
-	struct phy_device *phydev;
-	int err;
-
-	/* clear any pending interrupts */
-	nb8800_writel(priv, NB8800_RXC_SR, 0xf);
-	nb8800_writel(priv, NB8800_TXC_SR, 0xf);
-
-	err = nb8800_dma_init(dev);
-	if (err)
-		return err;
-
-	err = request_irq(dev->irq, nb8800_irq, 0, dev_name(&dev->dev), dev);
-	if (err)
-		goto err_free_dma;
-
-	nb8800_mac_rx(dev, true);
-	nb8800_mac_tx(dev, true);
-
-	phydev = of_phy_connect(dev, priv->phy_node,
-				nb8800_link_reconfigure, 0,
-				priv->phy_mode);
-	if (!phydev) {
-		err = -ENODEV;
-		goto err_free_irq;
-	}
-
-	nb8800_pause_adv(dev);
-
-	netdev_reset_queue(dev);
-	napi_enable(&priv->napi);
-	netif_start_queue(dev);
-
-	nb8800_start_rx(dev);
-	phy_start(phydev);
-
-	return 0;
-
-err_free_irq:
-	free_irq(dev->irq, dev);
-err_free_dma:
-	nb8800_dma_free(dev);
-
-	return err;
-}
-
-static int nb8800_stop(struct net_device *dev)
-{
-	struct nb8800_priv *priv = netdev_priv(dev);
-	struct phy_device *phydev = dev->phydev;
-
-	phy_stop(phydev);
-
-	netif_stop_queue(dev);
-	napi_disable(&priv->napi);
-
-	nb8800_dma_stop(dev);
-	nb8800_mac_rx(dev, false);
-	nb8800_mac_tx(dev, false);
-
-	phy_disconnect(phydev);
-
-	free_irq(dev->irq, dev);
-
-	nb8800_dma_free(dev);
-
-	return 0;
-}
-
-static const struct net_device_ops nb8800_netdev_ops = {
-	.ndo_open		= nb8800_open,
-	.ndo_stop		= nb8800_stop,
-	.ndo_start_xmit		= nb8800_xmit,
-	.ndo_set_mac_address	= nb8800_set_mac_address,
-	.ndo_set_rx_mode	= nb8800_set_rx_mode,
-	.ndo_do_ioctl		= phy_do_ioctl,
-	.ndo_validate_addr	= eth_validate_addr,
-};
-
-static void nb8800_get_pauseparam(struct net_device *dev,
-				  struct ethtool_pauseparam *pp)
-{
-	struct nb8800_priv *priv = netdev_priv(dev);
-
-	pp->autoneg = priv->pause_aneg;
-	pp->rx_pause = priv->pause_rx;
-	pp->tx_pause = priv->pause_tx;
-}
-
-static int nb8800_set_pauseparam(struct net_device *dev,
-				 struct ethtool_pauseparam *pp)
-{
-	struct nb8800_priv *priv = netdev_priv(dev);
-	struct phy_device *phydev = dev->phydev;
-
-	priv->pause_aneg = pp->autoneg;
-	priv->pause_rx = pp->rx_pause;
-	priv->pause_tx = pp->tx_pause;
-
-	nb8800_pause_adv(dev);
-
-	if (!priv->pause_aneg)
-		nb8800_pause_config(dev);
-	else if (phydev)
-		phy_start_aneg(phydev);
-
-	return 0;
-}
-
-static const char nb8800_stats_names[][ETH_GSTRING_LEN] = {
-	"rx_bytes_ok",
-	"rx_frames_ok",
-	"rx_undersize_frames",
-	"rx_fragment_frames",
-	"rx_64_byte_frames",
-	"rx_127_byte_frames",
-	"rx_255_byte_frames",
-	"rx_511_byte_frames",
-	"rx_1023_byte_frames",
-	"rx_max_size_frames",
-	"rx_oversize_frames",
-	"rx_bad_fcs_frames",
-	"rx_broadcast_frames",
-	"rx_multicast_frames",
-	"rx_control_frames",
-	"rx_pause_frames",
-	"rx_unsup_control_frames",
-	"rx_align_error_frames",
-	"rx_overrun_frames",
-	"rx_jabber_frames",
-	"rx_bytes",
-	"rx_frames",
-
-	"tx_bytes_ok",
-	"tx_frames_ok",
-	"tx_64_byte_frames",
-	"tx_127_byte_frames",
-	"tx_255_byte_frames",
-	"tx_511_byte_frames",
-	"tx_1023_byte_frames",
-	"tx_max_size_frames",
-	"tx_oversize_frames",
-	"tx_broadcast_frames",
-	"tx_multicast_frames",
-	"tx_control_frames",
-	"tx_pause_frames",
-	"tx_underrun_frames",
-	"tx_single_collision_frames",
-	"tx_multi_collision_frames",
-	"tx_deferred_collision_frames",
-	"tx_late_collision_frames",
-	"tx_excessive_collision_frames",
-	"tx_bytes",
-	"tx_frames",
-	"tx_collisions",
-};
-
-#define NB8800_NUM_STATS ARRAY_SIZE(nb8800_stats_names)
-
-static int nb8800_get_sset_count(struct net_device *dev, int sset)
-{
-	if (sset == ETH_SS_STATS)
-		return NB8800_NUM_STATS;
-
-	return -EOPNOTSUPP;
-}
-
-static void nb8800_get_strings(struct net_device *dev, u32 sset, u8 *buf)
-{
-	if (sset == ETH_SS_STATS)
-		memcpy(buf, &nb8800_stats_names, sizeof(nb8800_stats_names));
-}
-
-static u32 nb8800_read_stat(struct net_device *dev, int index)
-{
-	struct nb8800_priv *priv = netdev_priv(dev);
-
-	nb8800_writeb(priv, NB8800_STAT_INDEX, index);
-
-	return nb8800_readl(priv, NB8800_STAT_DATA);
-}
-
-static void nb8800_get_ethtool_stats(struct net_device *dev,
-				     struct ethtool_stats *estats, u64 *st)
-{
-	unsigned int i;
-	u32 rx, tx;
-
-	for (i = 0; i < NB8800_NUM_STATS / 2; i++) {
-		rx = nb8800_read_stat(dev, i);
-		tx = nb8800_read_stat(dev, i | 0x80);
-		st[i] = rx;
-		st[i + NB8800_NUM_STATS / 2] = tx;
-	}
-}
-
-static const struct ethtool_ops nb8800_ethtool_ops = {
-	.nway_reset		= phy_ethtool_nway_reset,
-	.get_link		= ethtool_op_get_link,
-	.get_pauseparam		= nb8800_get_pauseparam,
-	.set_pauseparam		= nb8800_set_pauseparam,
-	.get_sset_count		= nb8800_get_sset_count,
-	.get_strings		= nb8800_get_strings,
-	.get_ethtool_stats	= nb8800_get_ethtool_stats,
-	.get_link_ksettings	= phy_ethtool_get_link_ksettings,
-	.set_link_ksettings	= phy_ethtool_set_link_ksettings,
-};
-
-static int nb8800_hw_init(struct net_device *dev)
-{
-	struct nb8800_priv *priv = netdev_priv(dev);
-	u32 val;
-
-	val = TX_RETRY_EN | TX_PAD_EN | TX_APPEND_FCS;
-	nb8800_writeb(priv, NB8800_TX_CTL1, val);
-
-	/* Collision retry count */
-	nb8800_writeb(priv, NB8800_TX_CTL2, 5);
-
-	val = RX_PAD_STRIP | RX_AF_EN;
-	nb8800_writeb(priv, NB8800_RX_CTL, val);
-
-	/* Chosen by fair dice roll */
-	nb8800_writeb(priv, NB8800_RANDOM_SEED, 4);
-
-	/* TX cycles per deferral period */
-	nb8800_writeb(priv, NB8800_TX_SDP, 12);
-
-	/* The following three threshold values have been
-	 * experimentally determined for good results.
-	 */
-
-	/* RX/TX FIFO threshold for partial empty (64-bit entries) */
-	nb8800_writeb(priv, NB8800_PE_THRESHOLD, 0);
-
-	/* RX/TX FIFO threshold for partial full (64-bit entries) */
-	nb8800_writeb(priv, NB8800_PF_THRESHOLD, 255);
-
-	/* Buffer size for transmit (64-bit entries) */
-	nb8800_writeb(priv, NB8800_TX_BUFSIZE, 64);
-
-	/* Configure tx DMA */
-
-	val = nb8800_readl(priv, NB8800_TXC_CR);
-	val &= TCR_LE;		/* keep endian setting */
-	val |= TCR_DM;		/* DMA descriptor mode */
-	val |= TCR_RS;		/* automatically store tx status  */
-	val |= TCR_DIE;		/* interrupt on DMA chain completion */
-	val |= TCR_TFI(7);	/* interrupt after 7 frames transmitted */
-	val |= TCR_BTS(2);	/* 32-byte bus transaction size */
-	nb8800_writel(priv, NB8800_TXC_CR, val);
-
-	/* TX complete interrupt after 10 ms or 7 frames (see above) */
-	val = clk_get_rate(priv->clk) / 100;
-	nb8800_writel(priv, NB8800_TX_ITR, val);
-
-	/* Configure rx DMA */
-
-	val = nb8800_readl(priv, NB8800_RXC_CR);
-	val &= RCR_LE;		/* keep endian setting */
-	val |= RCR_DM;		/* DMA descriptor mode */
-	val |= RCR_RS;		/* automatically store rx status */
-	val |= RCR_DIE;		/* interrupt at end of DMA chain */
-	val |= RCR_RFI(7);	/* interrupt after 7 frames received */
-	val |= RCR_BTS(2);	/* 32-byte bus transaction size */
-	nb8800_writel(priv, NB8800_RXC_CR, val);
-
-	/* The rx interrupt can fire before the DMA has completed
-	 * unless a small delay is added.  50 us is hopefully enough.
-	 */
-	priv->rx_itr_irq = clk_get_rate(priv->clk) / 20000;
-
-	/* In NAPI poll mode we want to disable interrupts, but the
-	 * hardware does not permit this.  Delay 10 ms instead.
-	 */
-	priv->rx_itr_poll = clk_get_rate(priv->clk) / 100;
-
-	nb8800_writel(priv, NB8800_RX_ITR, priv->rx_itr_irq);
-
-	priv->rx_dma_config = RX_BUF_SIZE | DESC_BTS(2) | DESC_DS | DESC_EOF;
-
-	/* Flow control settings */
-
-	/* Pause time of 0.1 ms */
-	val = 100000 / 512;
-	nb8800_writeb(priv, NB8800_PQ1, val >> 8);
-	nb8800_writeb(priv, NB8800_PQ2, val & 0xff);
-
-	/* Auto-negotiate by default */
-	priv->pause_aneg = true;
-	priv->pause_rx = true;
-	priv->pause_tx = true;
-
-	nb8800_mc_init(dev, 0);
-
-	return 0;
-}
-
-static int nb8800_tangox_init(struct net_device *dev)
-{
-	struct nb8800_priv *priv = netdev_priv(dev);
-	u32 pad_mode = PAD_MODE_MII;
-
-	switch (priv->phy_mode) {
-	case PHY_INTERFACE_MODE_MII:
-	case PHY_INTERFACE_MODE_GMII:
-		pad_mode = PAD_MODE_MII;
-		break;
-
-	case PHY_INTERFACE_MODE_RGMII:
-	case PHY_INTERFACE_MODE_RGMII_ID:
-	case PHY_INTERFACE_MODE_RGMII_RXID:
-	case PHY_INTERFACE_MODE_RGMII_TXID:
-		pad_mode = PAD_MODE_RGMII;
-		break;
-
-	default:
-		dev_err(dev->dev.parent, "unsupported phy mode %s\n",
-			phy_modes(priv->phy_mode));
-		return -EINVAL;
-	}
-
-	nb8800_writeb(priv, NB8800_TANGOX_PAD_MODE, pad_mode);
-
-	return 0;
-}
-
-static int nb8800_tangox_reset(struct net_device *dev)
-{
-	struct nb8800_priv *priv = netdev_priv(dev);
-	int clk_div;
-
-	nb8800_writeb(priv, NB8800_TANGOX_RESET, 0);
-	usleep_range(1000, 10000);
-	nb8800_writeb(priv, NB8800_TANGOX_RESET, 1);
-
-	wmb();		/* ensure reset is cleared before proceeding */
-
-	clk_div = DIV_ROUND_UP(clk_get_rate(priv->clk), 2 * MAX_MDC_CLOCK);
-	nb8800_writew(priv, NB8800_TANGOX_MDIO_CLKDIV, clk_div);
-
-	return 0;
-}
-
-static const struct nb8800_ops nb8800_tangox_ops = {
-	.init	= nb8800_tangox_init,
-	.reset	= nb8800_tangox_reset,
-};
-
-static int nb8800_tango4_init(struct net_device *dev)
-{
-	struct nb8800_priv *priv = netdev_priv(dev);
-	int err;
-
-	err = nb8800_tangox_init(dev);
-	if (err)
-		return err;
-
-	/* On tango4 interrupt on DMA completion per frame works and gives
-	 * better performance despite generating more rx interrupts.
-	 */
-
-	/* Disable unnecessary interrupt on rx completion */
-	nb8800_clearl(priv, NB8800_RXC_CR, RCR_RFI(7));
-
-	/* Request interrupt on descriptor DMA completion */
-	priv->rx_dma_config |= DESC_ID;
-
-	return 0;
-}
-
-static const struct nb8800_ops nb8800_tango4_ops = {
-	.init	= nb8800_tango4_init,
-	.reset	= nb8800_tangox_reset,
-};
-
-static const struct of_device_id nb8800_dt_ids[] = {
-	{
-		.compatible = "aurora,nb8800",
-	},
-	{
-		.compatible = "sigma,smp8642-ethernet",
-		.data = &nb8800_tangox_ops,
-	},
-	{
-		.compatible = "sigma,smp8734-ethernet",
-		.data = &nb8800_tango4_ops,
-	},
-	{ }
-};
-MODULE_DEVICE_TABLE(of, nb8800_dt_ids);
-
-static int nb8800_probe(struct platform_device *pdev)
-{
-	const struct of_device_id *match;
-	const struct nb8800_ops *ops = NULL;
-	struct nb8800_priv *priv;
-	struct resource *res;
-	struct net_device *dev;
-	struct mii_bus *bus;
-	const unsigned char *mac;
-	void __iomem *base;
-	int irq;
-	int ret;
-
-	match = of_match_device(nb8800_dt_ids, &pdev->dev);
-	if (match)
-		ops = match->data;
-
-	irq = platform_get_irq(pdev, 0);
-	if (irq <= 0)
-		return -EINVAL;
-
-	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-	base = devm_ioremap_resource(&pdev->dev, res);
-	if (IS_ERR(base))
-		return PTR_ERR(base);
-
-	dev_dbg(&pdev->dev, "AU-NB8800 Ethernet at %pa\n", &res->start);
-
-	dev = alloc_etherdev(sizeof(*priv));
-	if (!dev)
-		return -ENOMEM;
-
-	platform_set_drvdata(pdev, dev);
-	SET_NETDEV_DEV(dev, &pdev->dev);
-
-	priv = netdev_priv(dev);
-	priv->base = base;
-
-	ret = of_get_phy_mode(pdev->dev.of_node, &priv->phy_mode);
-	if (ret)
-		priv->phy_mode = PHY_INTERFACE_MODE_RGMII;
-
-	priv->clk = devm_clk_get(&pdev->dev, NULL);
-	if (IS_ERR(priv->clk)) {
-		dev_err(&pdev->dev, "failed to get clock\n");
-		ret = PTR_ERR(priv->clk);
-		goto err_free_dev;
-	}
-
-	ret = clk_prepare_enable(priv->clk);
-	if (ret)
-		goto err_free_dev;
-
-	spin_lock_init(&priv->tx_lock);
-
-	if (ops && ops->reset) {
-		ret = ops->reset(dev);
-		if (ret)
-			goto err_disable_clk;
-	}
-
-	bus = devm_mdiobus_alloc(&pdev->dev);
-	if (!bus) {
-		ret = -ENOMEM;
-		goto err_disable_clk;
-	}
-
-	bus->name = "nb8800-mii";
-	bus->read = nb8800_mdio_read;
-	bus->write = nb8800_mdio_write;
-	bus->parent = &pdev->dev;
-	snprintf(bus->id, MII_BUS_ID_SIZE, "%lx.nb8800-mii",
-		 (unsigned long)res->start);
-	bus->priv = priv;
-
-	ret = of_mdiobus_register(bus, pdev->dev.of_node);
-	if (ret) {
-		dev_err(&pdev->dev, "failed to register MII bus\n");
-		goto err_disable_clk;
-	}
-
-	if (of_phy_is_fixed_link(pdev->dev.of_node)) {
-		ret = of_phy_register_fixed_link(pdev->dev.of_node);
-		if (ret < 0) {
-			dev_err(&pdev->dev, "bad fixed-link spec\n");
-			goto err_free_bus;
-		}
-		priv->phy_node = of_node_get(pdev->dev.of_node);
-	}
-
-	if (!priv->phy_node)
-		priv->phy_node = of_parse_phandle(pdev->dev.of_node,
-						  "phy-handle", 0);
-
-	if (!priv->phy_node) {
-		dev_err(&pdev->dev, "no PHY specified\n");
-		ret = -ENODEV;
-		goto err_free_bus;
-	}
-
-	priv->mii_bus = bus;
-
-	ret = nb8800_hw_init(dev);
-	if (ret)
-		goto err_deregister_fixed_link;
-
-	if (ops && ops->init) {
-		ret = ops->init(dev);
-		if (ret)
-			goto err_deregister_fixed_link;
-	}
-
-	dev->netdev_ops = &nb8800_netdev_ops;
-	dev->ethtool_ops = &nb8800_ethtool_ops;
-	dev->flags |= IFF_MULTICAST;
-	dev->irq = irq;
-
-	mac = of_get_mac_address(pdev->dev.of_node);
-	if (!IS_ERR(mac))
-		ether_addr_copy(dev->dev_addr, mac);
-
-	if (!is_valid_ether_addr(dev->dev_addr))
-		eth_hw_addr_random(dev);
-
-	nb8800_update_mac_addr(dev);
-
-	netif_carrier_off(dev);
-
-	ret = register_netdev(dev);
-	if (ret) {
-		netdev_err(dev, "failed to register netdev\n");
-		goto err_free_dma;
-	}
-
-	netif_napi_add(dev, &priv->napi, nb8800_poll, NAPI_POLL_WEIGHT);
-
-	netdev_info(dev, "MAC address %pM\n", dev->dev_addr);
-
-	return 0;
-
-err_free_dma:
-	nb8800_dma_free(dev);
-err_deregister_fixed_link:
-	if (of_phy_is_fixed_link(pdev->dev.of_node))
-		of_phy_deregister_fixed_link(pdev->dev.of_node);
-err_free_bus:
-	of_node_put(priv->phy_node);
-	mdiobus_unregister(bus);
-err_disable_clk:
-	clk_disable_unprepare(priv->clk);
-err_free_dev:
-	free_netdev(dev);
-
-	return ret;
-}
-
-static int nb8800_remove(struct platform_device *pdev)
-{
-	struct net_device *ndev = platform_get_drvdata(pdev);
-	struct nb8800_priv *priv = netdev_priv(ndev);
-
-	unregister_netdev(ndev);
-	if (of_phy_is_fixed_link(pdev->dev.of_node))
-		of_phy_deregister_fixed_link(pdev->dev.of_node);
-	of_node_put(priv->phy_node);
-
-	mdiobus_unregister(priv->mii_bus);
-
-	clk_disable_unprepare(priv->clk);
-
-	nb8800_dma_free(ndev);
-	free_netdev(ndev);
-
-	return 0;
-}
-
-static struct platform_driver nb8800_driver = {
-	.driver = {
-		.name		= "nb8800",
-		.of_match_table	= nb8800_dt_ids,
-	},
-	.probe	= nb8800_probe,
-	.remove	= nb8800_remove,
-};
-
-module_platform_driver(nb8800_driver);
-
-MODULE_DESCRIPTION("Aurora AU-NB8800 Ethernet driver");
-MODULE_AUTHOR("Mans Rullgard <mans@mansr.com>");
-MODULE_LICENSE("GPL");
diff --git a/drivers/net/ethernet/aurora/nb8800.h b/drivers/net/ethernet/aurora/nb8800.h
deleted file mode 100644
index 40941fb6065b..000000000000
--- a/drivers/net/ethernet/aurora/nb8800.h
+++ /dev/null
@@ -1,316 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef _NB8800_H_
-#define _NB8800_H_
-
-#include <linux/types.h>
-#include <linux/skbuff.h>
-#include <linux/phy.h>
-#include <linux/clk.h>
-#include <linux/bitops.h>
-
-#define RX_DESC_COUNT			256
-#define TX_DESC_COUNT			256
-
-#define NB8800_DESC_LOW			4
-
-#define RX_BUF_SIZE			1552
-
-#define RX_COPYBREAK			256
-#define RX_COPYHDR			128
-
-#define MAX_MDC_CLOCK			2500000
-
-/* Stargate Solutions SSN8800 core registers */
-#define NB8800_TX_CTL1			0x000
-#define TX_TPD				BIT(5)
-#define TX_APPEND_FCS			BIT(4)
-#define TX_PAD_EN			BIT(3)
-#define TX_RETRY_EN			BIT(2)
-#define TX_EN				BIT(0)
-
-#define NB8800_TX_CTL2			0x001
-
-#define NB8800_RX_CTL			0x004
-#define RX_BC_DISABLE			BIT(7)
-#define RX_RUNT				BIT(6)
-#define RX_AF_EN			BIT(5)
-#define RX_PAUSE_EN			BIT(3)
-#define RX_SEND_CRC			BIT(2)
-#define RX_PAD_STRIP			BIT(1)
-#define RX_EN				BIT(0)
-
-#define NB8800_RANDOM_SEED		0x008
-#define NB8800_TX_SDP			0x14
-#define NB8800_TX_TPDP1			0x18
-#define NB8800_TX_TPDP2			0x19
-#define NB8800_SLOT_TIME		0x1c
-
-#define NB8800_MDIO_CMD			0x020
-#define MDIO_CMD_GO			BIT(31)
-#define MDIO_CMD_WR			BIT(26)
-#define MDIO_CMD_ADDR(x)		((x) << 21)
-#define MDIO_CMD_REG(x)			((x) << 16)
-#define MDIO_CMD_DATA(x)		((x) <<	 0)
-
-#define NB8800_MDIO_STS			0x024
-#define MDIO_STS_ERR			BIT(31)
-
-#define NB8800_MC_ADDR(i)		(0x028 + (i))
-#define NB8800_MC_INIT			0x02e
-#define NB8800_UC_ADDR(i)		(0x03c + (i))
-
-#define NB8800_MAC_MODE			0x044
-#define RGMII_MODE			BIT(7)
-#define HALF_DUPLEX			BIT(4)
-#define BURST_EN			BIT(3)
-#define LOOPBACK_EN			BIT(2)
-#define GMAC_MODE			BIT(0)
-
-#define NB8800_IC_THRESHOLD		0x050
-#define NB8800_PE_THRESHOLD		0x051
-#define NB8800_PF_THRESHOLD		0x052
-#define NB8800_TX_BUFSIZE		0x054
-#define NB8800_FIFO_CTL			0x056
-#define NB8800_PQ1			0x060
-#define NB8800_PQ2			0x061
-#define NB8800_SRC_ADDR(i)		(0x06a + (i))
-#define NB8800_STAT_DATA		0x078
-#define NB8800_STAT_INDEX		0x07c
-#define NB8800_STAT_CLEAR		0x07d
-
-#define NB8800_SLEEP_MODE		0x07e
-#define SLEEP_MODE			BIT(0)
-
-#define NB8800_WAKEUP			0x07f
-#define WAKEUP				BIT(0)
-
-/* Aurora NB8800 host interface registers */
-#define NB8800_TXC_CR			0x100
-#define TCR_LK				BIT(12)
-#define TCR_DS				BIT(11)
-#define TCR_BTS(x)			(((x) & 0x7) << 8)
-#define TCR_DIE				BIT(7)
-#define TCR_TFI(x)			(((x) & 0x7) << 4)
-#define TCR_LE				BIT(3)
-#define TCR_RS				BIT(2)
-#define TCR_DM				BIT(1)
-#define TCR_EN				BIT(0)
-
-#define NB8800_TXC_SR			0x104
-#define TSR_DE				BIT(3)
-#define TSR_DI				BIT(2)
-#define TSR_TO				BIT(1)
-#define TSR_TI				BIT(0)
-
-#define NB8800_TX_SAR			0x108
-#define NB8800_TX_DESC_ADDR		0x10c
-
-#define NB8800_TX_REPORT_ADDR		0x110
-#define TX_BYTES_TRANSFERRED(x)		(((x) >> 16) & 0xffff)
-#define TX_FIRST_DEFERRAL		BIT(7)
-#define TX_EARLY_COLLISIONS(x)		(((x) >> 3) & 0xf)
-#define TX_LATE_COLLISION		BIT(2)
-#define TX_PACKET_DROPPED		BIT(1)
-#define TX_FIFO_UNDERRUN		BIT(0)
-#define IS_TX_ERROR(r)			((r) & 0x07)
-
-#define NB8800_TX_FIFO_SR		0x114
-#define NB8800_TX_ITR			0x118
-
-#define NB8800_RXC_CR			0x200
-#define RCR_FL				BIT(13)
-#define RCR_LK				BIT(12)
-#define RCR_DS				BIT(11)
-#define RCR_BTS(x)			(((x) & 7) << 8)
-#define RCR_DIE				BIT(7)
-#define RCR_RFI(x)			(((x) & 7) << 4)
-#define RCR_LE				BIT(3)
-#define RCR_RS				BIT(2)
-#define RCR_DM				BIT(1)
-#define RCR_EN				BIT(0)
-
-#define NB8800_RXC_SR			0x204
-#define RSR_DE				BIT(3)
-#define RSR_DI				BIT(2)
-#define RSR_RO				BIT(1)
-#define RSR_RI				BIT(0)
-
-#define NB8800_RX_SAR			0x208
-#define NB8800_RX_DESC_ADDR		0x20c
-
-#define NB8800_RX_REPORT_ADDR		0x210
-#define RX_BYTES_TRANSFERRED(x)		(((x) >> 16) & 0xFFFF)
-#define RX_MULTICAST_PKT		BIT(9)
-#define RX_BROADCAST_PKT		BIT(8)
-#define RX_LENGTH_ERR			BIT(7)
-#define RX_FCS_ERR			BIT(6)
-#define RX_RUNT_PKT			BIT(5)
-#define RX_FIFO_OVERRUN			BIT(4)
-#define RX_LATE_COLLISION		BIT(3)
-#define RX_ALIGNMENT_ERROR		BIT(2)
-#define RX_ERROR_MASK			0xfc
-#define IS_RX_ERROR(r)			((r) & RX_ERROR_MASK)
-
-#define NB8800_RX_FIFO_SR		0x214
-#define NB8800_RX_ITR			0x218
-
-/* Sigma Designs SMP86xx additional registers */
-#define NB8800_TANGOX_PAD_MODE		0x400
-#define PAD_MODE_MASK			0x7
-#define PAD_MODE_MII			0x0
-#define PAD_MODE_RGMII			0x1
-#define PAD_MODE_GTX_CLK_INV		BIT(3)
-#define PAD_MODE_GTX_CLK_DELAY		BIT(4)
-
-#define NB8800_TANGOX_MDIO_CLKDIV	0x420
-#define NB8800_TANGOX_RESET		0x424
-
-/* Hardware DMA descriptor */
-struct nb8800_dma_desc {
-	u32				s_addr;	/* start address */
-	u32				n_addr;	/* next descriptor address */
-	u32				r_addr;	/* report address */
-	u32				config;
-} __aligned(8);
-
-#define DESC_ID				BIT(23)
-#define DESC_EOC			BIT(22)
-#define DESC_EOF			BIT(21)
-#define DESC_LK				BIT(20)
-#define DESC_DS				BIT(19)
-#define DESC_BTS(x)			(((x) & 0x7) << 16)
-
-/* DMA descriptor and associated data for rx.
- * Allocated from coherent memory.
- */
-struct nb8800_rx_desc {
-	/* DMA descriptor */
-	struct nb8800_dma_desc		desc;
-
-	/* Status report filled in by hardware */
-	u32				report;
-};
-
-/* Address of buffer on rx ring */
-struct nb8800_rx_buf {
-	struct page			*page;
-	unsigned long			offset;
-};
-
-/* DMA descriptors and associated data for tx.
- * Allocated from coherent memory.
- */
-struct nb8800_tx_desc {
-	/* DMA descriptor.  The second descriptor is used if packet
-	 * data is unaligned.
-	 */
-	struct nb8800_dma_desc		desc[2];
-
-	/* Status report filled in by hardware */
-	u32				report;
-
-	/* Bounce buffer for initial unaligned part of packet */
-	u8				buf[8] __aligned(8);
-};
-
-/* Packet in tx queue */
-struct nb8800_tx_buf {
-	/* Currently queued skb */
-	struct sk_buff			*skb;
-
-	/* DMA address of the first descriptor */
-	dma_addr_t			dma_desc;
-
-	/* DMA address of packet data */
-	dma_addr_t			dma_addr;
-
-	/* Length of DMA mapping, less than skb->len if alignment
-	 * buffer is used.
-	 */
-	unsigned int			dma_len;
-
-	/* Number of packets in chain starting here */
-	unsigned int			chain_len;
-
-	/* Packet chain ready to be submitted to hardware */
-	bool				ready;
-};
-
-struct nb8800_priv {
-	struct napi_struct		napi;
-
-	void __iomem			*base;
-
-	/* RX DMA descriptors */
-	struct nb8800_rx_desc		*rx_descs;
-
-	/* RX buffers referenced by DMA descriptors */
-	struct nb8800_rx_buf		*rx_bufs;
-
-	/* Current end of chain */
-	u32				rx_eoc;
-
-	/* Value for rx interrupt time register in NAPI interrupt mode */
-	u32				rx_itr_irq;
-
-	/* Value for rx interrupt time register in NAPI poll mode */
-	u32				rx_itr_poll;
-
-	/* Value for config field of rx DMA descriptors */
-	u32				rx_dma_config;
-
-	/* TX DMA descriptors */
-	struct nb8800_tx_desc		*tx_descs;
-
-	/* TX packet queue */
-	struct nb8800_tx_buf		*tx_bufs;
-
-	/* Number of free tx queue entries */
-	atomic_t			tx_free;
-
-	/* First free tx queue entry */
-	u32				tx_next;
-
-	/* Next buffer to transmit */
-	u32				tx_queue;
-
-	/* Start of current packet chain */
-	struct nb8800_tx_buf		*tx_chain;
-
-	/* Next buffer to reclaim */
-	u32				tx_done;
-
-	/* Lock for DMA activation */
-	spinlock_t			tx_lock;
-
-	struct mii_bus			*mii_bus;
-	struct device_node		*phy_node;
-
-	/* PHY connection type from DT */
-	phy_interface_t			phy_mode;
-
-	/* Current link status */
-	int				speed;
-	int				duplex;
-	int				link;
-
-	/* Pause settings */
-	bool				pause_aneg;
-	bool				pause_rx;
-	bool				pause_tx;
-
-	/* DMA base address of rx descriptors, see rx_descs above */
-	dma_addr_t			rx_desc_dma;
-
-	/* DMA base address of tx descriptors, see tx_descs above */
-	dma_addr_t			tx_desc_dma;
-
-	struct clk			*clk;
-};
-
-struct nb8800_ops {
-	int				(*init)(struct net_device *dev);
-	int				(*reset)(struct net_device *dev);
-};
-
-#endif /* _NB8800_H_ */
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
index d31a5ad7522a..f508c5c61a30 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
@@ -255,6 +255,7 @@ static const u16 bnxt_async_events_arr[] = {
 	ASYNC_EVENT_CMPL_EVENT_ID_PORT_PHY_CFG_CHANGE,
 	ASYNC_EVENT_CMPL_EVENT_ID_RESET_NOTIFY,
 	ASYNC_EVENT_CMPL_EVENT_ID_ERROR_RECOVERY,
+	ASYNC_EVENT_CMPL_EVENT_ID_DEBUG_NOTIFICATION,
 	ASYNC_EVENT_CMPL_EVENT_ID_RING_MONITOR_MSG,
 };
 
@@ -1265,8 +1266,7 @@ static void bnxt_tpa_start(struct bnxt *bp, struct bnxt_rx_ring_info *rxr,
 	} else {
 		tpa_info->hash_type = PKT_HASH_TYPE_NONE;
 		tpa_info->gso_type = 0;
-		if (netif_msg_rx_err(bp))
-			netdev_warn(bp->dev, "TPA packet without valid hash\n");
+		netif_warn(bp, rx_err, bp->dev, "TPA packet without valid hash\n");
 	}
 	tpa_info->flags2 = le32_to_cpu(tpa_start1->rx_tpa_start_cmp_flags2);
 	tpa_info->metadata = le32_to_cpu(tpa_start1->rx_tpa_start_cmp_metadata);
@@ -2021,10 +2021,9 @@ static int bnxt_async_event_process(struct bnxt *bp,
 			goto async_event_process_exit;
 		set_bit(BNXT_RESET_TASK_SILENT_SP_EVENT, &bp->sp_event);
 		break;
-	case ASYNC_EVENT_CMPL_EVENT_ID_RESET_NOTIFY:
-		if (netif_msg_hw(bp))
-			netdev_warn(bp->dev, "Received RESET_NOTIFY event, data1: 0x%x, data2: 0x%x\n",
-				    data1, data2);
+	case ASYNC_EVENT_CMPL_EVENT_ID_RESET_NOTIFY: {
+		char *fatal_str = "non-fatal";
+
 		if (!bp->fw_health)
 			goto async_event_process_exit;
 
@@ -2036,14 +2035,17 @@ static int bnxt_async_event_process(struct bnxt *bp,
 		if (!bp->fw_reset_max_dsecs)
 			bp->fw_reset_max_dsecs = BNXT_DFLT_FW_RST_MAX_DSECS;
 		if (EVENT_DATA1_RESET_NOTIFY_FATAL(data1)) {
-			netdev_warn(bp->dev, "Firmware fatal reset event received\n");
+			fatal_str = "fatal";
 			set_bit(BNXT_STATE_FW_FATAL_COND, &bp->state);
-		} else {
-			netdev_warn(bp->dev, "Firmware non-fatal reset event received, max wait time %d msec\n",
-				    bp->fw_reset_max_dsecs * 100);
 		}
+		netif_warn(bp, hw, bp->dev,
+			   "Firmware %s reset event, data1: 0x%x, data2: 0x%x, min wait %u ms, max wait %u ms\n",
+			   fatal_str, data1, data2,
+			   bp->fw_reset_min_dsecs * 100,
+			   bp->fw_reset_max_dsecs * 100);
 		set_bit(BNXT_FW_RESET_NOTIFY_SP_EVENT, &bp->sp_event);
 		break;
+	}
 	case ASYNC_EVENT_CMPL_EVENT_ID_ERROR_RECOVERY: {
 		struct bnxt_fw_health *fw_health = bp->fw_health;
 
@@ -2055,13 +2057,11 @@ static int bnxt_async_event_process(struct bnxt *bp,
 		if (!fw_health->enabled)
 			break;
 
-		if (netif_msg_drv(bp))
-			netdev_info(bp->dev, "Error recovery info: error recovery[%d], master[%d], reset count[0x%x], health status: 0x%x\n",
-				    fw_health->enabled, fw_health->master,
-				    bnxt_fw_health_readl(bp,
-							 BNXT_FW_RESET_CNT_REG),
-				    bnxt_fw_health_readl(bp,
-							 BNXT_FW_HEALTH_REG));
+		netif_info(bp, drv, bp->dev,
+			   "Error recovery info: error recovery[%d], master[%d], reset count[0x%x], health status: 0x%x\n",
+			   fw_health->enabled, fw_health->master,
+			   bnxt_fw_health_readl(bp, BNXT_FW_RESET_CNT_REG),
+			   bnxt_fw_health_readl(bp, BNXT_FW_HEALTH_REG));
 		fw_health->tmr_multiplier =
 			DIV_ROUND_UP(fw_health->polling_dsecs * HZ,
 				     bp->current_interval * 10);
@@ -2072,6 +2072,11 @@ static int bnxt_async_event_process(struct bnxt *bp,
 			bnxt_fw_health_readl(bp, BNXT_FW_RESET_CNT_REG);
 		goto async_event_process_exit;
 	}
+	case ASYNC_EVENT_CMPL_EVENT_ID_DEBUG_NOTIFICATION:
+		netif_notice(bp, hw, bp->dev,
+			     "Received firmware debug notification, data1: 0x%x, data2: 0x%x\n",
+			     data1, data2);
+		goto async_event_process_exit;
 	case ASYNC_EVENT_CMPL_EVENT_ID_RING_MONITOR_MSG: {
 		struct bnxt_rx_ring_info *rxr;
 		u16 grp_idx;
@@ -2394,6 +2399,10 @@ static int bnxt_poll(struct napi_struct *napi, int budget)
 	struct bnxt_cp_ring_info *cpr = &bnapi->cp_ring;
 	int work_done = 0;
 
+	if (unlikely(test_bit(BNXT_STATE_FW_FATAL_COND, &bp->state))) {
+		napi_complete(napi);
+		return 0;
+	}
 	while (1) {
 		work_done += bnxt_poll_work(bp, cpr, budget - work_done);
 
@@ -2468,6 +2477,10 @@ static int bnxt_poll_p5(struct napi_struct *napi, int budget)
 	int work_done = 0;
 	u32 cons;
 
+	if (unlikely(test_bit(BNXT_STATE_FW_FATAL_COND, &bp->state))) {
+		napi_complete(napi);
+		return 0;
+	}
 	if (cpr->has_more_work) {
 		cpr->has_more_work = 0;
 		work_done = __bnxt_poll_cqs(bp, bnapi, budget);
@@ -4272,6 +4285,9 @@ static void bnxt_disable_int_sync(struct bnxt *bp)
 {
 	int i;
 
+	if (!bp->irq_tbl)
+		return;
+
 	atomic_inc(&bp->intr_sem);
 
 	bnxt_disable_int(bp);
@@ -4425,6 +4441,8 @@ static int bnxt_hwrm_do_send_msg(struct bnxt *bp, void *msg, u32 msg_len,
 
 	if (!timeout)
 		timeout = DFLT_HWRM_CMD_TIMEOUT;
+	/* Limit timeout to an upper limit */
+	timeout = min(timeout, HWRM_CMD_MAX_TIMEOUT);
 	/* convert timeout to usec */
 	timeout *= 1000;
 
@@ -6845,6 +6863,7 @@ static int bnxt_hwrm_func_backing_store_cfg(struct bnxt *bp, u32 enables)
 	struct hwrm_func_backing_store_cfg_input req = {0};
 	struct bnxt_ctx_mem_info *ctx = bp->ctx;
 	struct bnxt_ctx_pg_info *ctx_pg;
+	u32 req_len = sizeof(req);
 	__le32 *num_entries;
 	__le64 *pg_dir;
 	u32 flags = 0;
@@ -6855,6 +6874,8 @@ static int bnxt_hwrm_func_backing_store_cfg(struct bnxt *bp, u32 enables)
 	if (!ctx)
 		return 0;
 
+	if (req_len > bp->hwrm_max_ext_req_len)
+		req_len = BNXT_BACKING_STORE_CFG_LEGACY_LEN;
 	bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_FUNC_BACKING_STORE_CFG, -1, -1);
 	req.enables = cpu_to_le32(enables);
 
@@ -6938,7 +6959,7 @@ static int bnxt_hwrm_func_backing_store_cfg(struct bnxt *bp, u32 enables)
 		bnxt_hwrm_set_pg_attr(&ctx_pg->ring_mem, pg_attr, pg_dir);
 	}
 	req.flags = cpu_to_le32(flags);
-	return hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
+	return hwrm_send_message(bp, &req, req_len, HWRM_CMD_TIMEOUT);
 }
 
 static int bnxt_alloc_ctx_mem_blk(struct bnxt *bp,
@@ -7438,9 +7459,22 @@ static void bnxt_try_map_fw_health_reg(struct bnxt *bp)
 
 	sig = readl(hs + offsetof(struct hcomm_status, sig_ver));
 	if ((sig & HCOMM_STATUS_SIGNATURE_MASK) != HCOMM_STATUS_SIGNATURE_VAL) {
-		if (bp->fw_health)
-			bp->fw_health->status_reliable = false;
-		return;
+		if (!bp->chip_num) {
+			__bnxt_map_fw_health_reg(bp, BNXT_GRC_REG_BASE);
+			bp->chip_num = readl(bp->bar0 +
+					     BNXT_FW_HEALTH_WIN_BASE +
+					     BNXT_GRC_REG_CHIP_NUM);
+		}
+		if (!BNXT_CHIP_P5(bp)) {
+			if (bp->fw_health)
+				bp->fw_health->status_reliable = false;
+			return;
+		}
+		status_loc = BNXT_GRC_REG_STATUS_P5 |
+			     BNXT_FW_HEALTH_REG_TYPE_BAR0;
+	} else {
+		status_loc = readl(hs + offsetof(struct hcomm_status,
+						 fw_status_loc));
 	}
 
 	if (__bnxt_alloc_fw_health(bp)) {
@@ -7448,7 +7482,6 @@ static void bnxt_try_map_fw_health_reg(struct bnxt *bp)
 		return;
 	}
 
-	status_loc = readl(hs + offsetof(struct hcomm_status, fw_status_loc));
 	bp->fw_health->regs[BNXT_FW_HEALTH_REG] = status_loc;
 	reg_type = BNXT_FW_HEALTH_REG_TYPE(status_loc);
 	if (reg_type == BNXT_FW_HEALTH_REG_TYPE_GRC) {
@@ -8811,7 +8844,8 @@ static void bnxt_disable_napi(struct bnxt *bp)
 {
 	int i;
 
-	if (!bp->bnapi)
+	if (!bp->bnapi ||
+	    test_and_set_bit(BNXT_STATE_NAPI_DISABLED, &bp->state))
 		return;
 
 	for (i = 0; i < bp->cp_nr_rings; i++) {
@@ -8828,6 +8862,7 @@ static void bnxt_enable_napi(struct bnxt *bp)
 {
 	int i;
 
+	clear_bit(BNXT_STATE_NAPI_DISABLED, &bp->state);
 	for (i = 0; i < bp->cp_nr_rings; i++) {
 		struct bnxt_napi *bnapi = bp->bnapi[i];
 		struct bnxt_cp_ring_info *cpr;
@@ -9334,13 +9369,60 @@ static int bnxt_hwrm_shutdown_link(struct bnxt *bp)
 
 static int bnxt_fw_init_one(struct bnxt *bp);
 
+static int bnxt_fw_reset_via_optee(struct bnxt *bp)
+{
+#ifdef CONFIG_TEE_BNXT_FW
+	int rc = tee_bnxt_fw_load();
+
+	if (rc)
+		netdev_err(bp->dev, "Failed FW reset via OP-TEE, rc=%d\n", rc);
+
+	return rc;
+#else
+	netdev_err(bp->dev, "OP-TEE not supported\n");
+	return -ENODEV;
+#endif
+}
+
+static int bnxt_try_recover_fw(struct bnxt *bp)
+{
+	if (bp->fw_health && bp->fw_health->status_reliable) {
+		int retry = 0, rc;
+		u32 sts;
+
+		mutex_lock(&bp->hwrm_cmd_lock);
+		do {
+			rc = __bnxt_hwrm_ver_get(bp, true);
+			sts = bnxt_fw_health_readl(bp, BNXT_FW_HEALTH_REG);
+			if (!sts || !BNXT_FW_IS_BOOTING(sts))
+				break;
+			retry++;
+		} while (rc == -EBUSY && retry < BNXT_FW_RETRY);
+		mutex_unlock(&bp->hwrm_cmd_lock);
+
+		if (!BNXT_FW_IS_HEALTHY(sts)) {
+			netdev_err(bp->dev,
+				   "Firmware not responding, status: 0x%x\n",
+				   sts);
+			rc = -ENODEV;
+		}
+		if (sts & FW_STATUS_REG_CRASHED_NO_MASTER) {
+			netdev_warn(bp->dev, "Firmware recover via OP-TEE requested\n");
+			return bnxt_fw_reset_via_optee(bp);
+		}
+		return rc;
+	}
+
+	return -ENODEV;
+}
+
 static int bnxt_hwrm_if_change(struct bnxt *bp, bool up)
 {
 	struct hwrm_func_drv_if_change_output *resp = bp->hwrm_cmd_resp_addr;
 	struct hwrm_func_drv_if_change_input req = {0};
 	bool resc_reinit = false, fw_reset = false;
+	int rc, retry = 0;
 	u32 flags = 0;
-	int rc;
 
 	if (!(bp->fw_cap & BNXT_FW_CAP_IF_CHANGE))
 		return 0;
@@ -9349,10 +9431,25 @@ static int bnxt_hwrm_if_change(struct bnxt *bp, bool up)
 	if (up)
 		req.flags = cpu_to_le32(FUNC_DRV_IF_CHANGE_REQ_FLAGS_UP);
 	mutex_lock(&bp->hwrm_cmd_lock);
-	rc = _hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
+	while (retry < BNXT_FW_IF_RETRY) {
+		rc = _hwrm_send_message(bp, &req, sizeof(req),
+					HWRM_CMD_TIMEOUT);
+		if (rc != -EAGAIN)
+			break;
+
+		msleep(50);
+		retry++;
+	}
 	if (!rc)
 		flags = le32_to_cpu(resp->flags);
 	mutex_unlock(&bp->hwrm_cmd_lock);
+
+	if (rc == -EAGAIN)
+		return rc;
+	if (rc && up) {
+		rc = bnxt_try_recover_fw(bp);
+		fw_reset = true;
+	}
 	if (rc)
 		return rc;
 
@@ -9692,6 +9789,25 @@ static void bnxt_preset_reg_win(struct bnxt *bp)
 
 static int bnxt_init_dflt_ring_mode(struct bnxt *bp);
 
+static int bnxt_reinit_after_abort(struct bnxt *bp)
+{
+	int rc;
+
+	if (test_bit(BNXT_STATE_IN_FW_RESET, &bp->state))
+		return -EBUSY;
+
+	rc = bnxt_fw_init_one(bp);
+	if (!rc) {
+		bnxt_clear_int_mode(bp);
+		rc = bnxt_init_int_mode(bp);
+		if (!rc) {
+			clear_bit(BNXT_STATE_ABORT_ERR, &bp->state);
+			set_bit(BNXT_STATE_FW_RESET_DET, &bp->state);
+		}
+	}
+	return rc;
+}
+
 static int __bnxt_open_nic(struct bnxt *bp, bool irq_re_init, bool link_re_init)
 {
 	int rc = 0;
@@ -9850,8 +9966,14 @@ static int bnxt_open(struct net_device *dev)
 	int rc;
 
 	if (test_bit(BNXT_STATE_ABORT_ERR, &bp->state)) {
-		netdev_err(bp->dev, "A previous firmware reset did not complete, aborting\n");
-		return -ENODEV;
+		rc = bnxt_reinit_after_abort(bp);
+		if (rc) {
+			if (rc == -EBUSY)
+				netdev_err(bp->dev, "A previous firmware reset has not completed, aborting\n");
+			else
+				netdev_err(bp->dev, "Failed to reinitialize after aborted firmware reset\n");
+			return -ENODEV;
+		}
 	}
 
 	rc = bnxt_hwrm_if_change(bp, true);
@@ -10788,11 +10910,18 @@ static void bnxt_rx_ring_reset(struct bnxt *bp)
 static void bnxt_fw_reset_close(struct bnxt *bp)
 {
 	bnxt_ulp_stop(bp);
-	/* When firmware is fatal state, disable PCI device to prevent
-	 * any potential bad DMAs before freeing kernel memory.
+	/* When firmware is in fatal state, quiesce device and disable
+	 * bus master to prevent any potential bad DMAs before freeing
+	 * kernel memory.
 	 */
-	if (test_bit(BNXT_STATE_FW_FATAL_COND, &bp->state))
+	if (test_bit(BNXT_STATE_FW_FATAL_COND, &bp->state)) {
+		bnxt_tx_disable(bp);
+		bnxt_disable_napi(bp);
+		bnxt_disable_int_sync(bp);
+		bnxt_free_irq(bp);
+		bnxt_clear_int_mode(bp);
 		pci_disable_device(bp->pdev);
+	}
 	__bnxt_close_nic(bp, true, false);
 	bnxt_clear_int_mode(bp);
 	bnxt_hwrm_func_drv_unrgtr(bp);
@@ -11180,21 +11309,6 @@ static void bnxt_init_dflt_coal(struct bnxt *bp)
 	bp->stats_coal_ticks = BNXT_DEF_STATS_COAL_TICKS;
 }
 
-static int bnxt_fw_reset_via_optee(struct bnxt *bp)
-{
-#ifdef CONFIG_TEE_BNXT_FW
-	int rc = tee_bnxt_fw_load();
-
-	if (rc)
-		netdev_err(bp->dev, "Failed FW reset via OP-TEE, rc=%d\n", rc);
-
-	return rc;
-#else
-	netdev_err(bp->dev, "OP-TEE not supported\n");
-	return -ENODEV;
-#endif
-}
-
 static int bnxt_fw_init_one_p1(struct bnxt *bp)
 {
 	int rc;
@@ -11203,19 +11317,10 @@ static int bnxt_fw_init_one_p1(struct bnxt *bp)
 	rc = bnxt_hwrm_ver_get(bp);
 	bnxt_try_map_fw_health_reg(bp);
 	if (rc) {
-		if (bp->fw_health && bp->fw_health->status_reliable) {
-			u32 sts = bnxt_fw_health_readl(bp, BNXT_FW_HEALTH_REG);
-
-			netdev_err(bp->dev,
-				   "Firmware not responding, status: 0x%x\n",
-				   sts);
-			if (sts & FW_STATUS_REG_CRASHED_NO_MASTER) {
-				netdev_warn(bp->dev, "Firmware recover via OP-TEE requested\n");
-				rc = bnxt_fw_reset_via_optee(bp);
-				if (!rc)
-					rc = bnxt_hwrm_ver_get(bp);
-			}
-		}
+		rc = bnxt_try_recover_fw(bp);
+		if (rc)
+			return rc;
+		rc = bnxt_hwrm_ver_get(bp);
 		if (rc)
 			return rc;
 	}
@@ -11415,6 +11520,12 @@ static void bnxt_reset_all(struct bnxt *bp)
 	bp->fw_reset_timestamp = jiffies;
 }
 
+static bool bnxt_fw_reset_timeout(struct bnxt *bp)
+{
+	return time_after(jiffies, bp->fw_reset_timestamp +
+			  (bp->fw_reset_max_dsecs * HZ / 10));
+}
+
 static void bnxt_fw_reset_task(struct work_struct *work)
 {
 	struct bnxt *bp = container_of(work, struct bnxt, fw_reset_task.work);
@@ -11436,8 +11547,7 @@ static void bnxt_fw_reset_task(struct work_struct *work)
 				   bp->fw_reset_timestamp));
 			goto fw_reset_abort;
 		} else if (n > 0) {
-			if (time_after(jiffies, bp->fw_reset_timestamp +
-				       (bp->fw_reset_max_dsecs * HZ / 10))) {
+			if (bnxt_fw_reset_timeout(bp)) {
 				clear_bit(BNXT_STATE_IN_FW_RESET, &bp->state);
 				bp->fw_reset_state = 0;
 				netdev_err(bp->dev, "Firmware reset aborted, bnxt_get_registered_vfs() returns %d\n",
@@ -11466,8 +11576,7 @@ static void bnxt_fw_reset_task(struct work_struct *work)
 
 		val = bnxt_fw_health_readl(bp, BNXT_FW_HEALTH_REG);
 		if (!(val & BNXT_FW_STATUS_SHUTDOWN) &&
-		    !time_after(jiffies, bp->fw_reset_timestamp +
-		    (bp->fw_reset_max_dsecs * HZ / 10))) {
+		    !bnxt_fw_reset_timeout(bp)) {
 			bnxt_queue_fw_reset_work(bp, HZ / 5);
 			return;
 		}
@@ -11509,8 +11618,7 @@ static void bnxt_fw_reset_task(struct work_struct *work)
 		bp->hwrm_cmd_timeout = SHORT_HWRM_CMD_TIMEOUT;
 		rc = __bnxt_hwrm_ver_get(bp, true);
 		if (rc) {
-			if (time_after(jiffies, bp->fw_reset_timestamp +
-				       (bp->fw_reset_max_dsecs * HZ / 10))) {
+			if (bnxt_fw_reset_timeout(bp)) {
 				netdev_err(bp->dev, "Firmware reset aborted\n");
 				goto fw_reset_abort_status;
 			}
@@ -12542,9 +12650,6 @@ static int bnxt_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
 	dev->ethtool_ops = &bnxt_ethtool_ops;
 	pci_set_drvdata(pdev, dev);
 
-	if (BNXT_PF(bp))
-		bnxt_vpd_read_info(bp);
-
 	rc = bnxt_alloc_hwrm_resources(bp);
 	if (rc)
 		goto init_err_pci_clean;
@@ -12556,6 +12661,9 @@ static int bnxt_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
 	if (rc)
 		goto init_err_pci_clean;
 
+	if (BNXT_PF(bp))
+		bnxt_vpd_read_info(bp);
+
 	if (BNXT_CHIP_P5(bp)) {
 		bp->flags |= BNXT_FLAG_CHIP_P5;
 		if (BNXT_CHIP_SR2(bp))
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.h b/drivers/net/ethernet/broadcom/bnxt/bnxt.h
index 51996c85547e..4ef6888acdc6 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.h
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.h
@@ -656,6 +656,7 @@ struct nqe_cn {
 #define BNXT_HWRM_MAX_REQ_LEN		(bp->hwrm_max_req_len)
 #define BNXT_HWRM_SHORT_REQ_LEN		sizeof(struct hwrm_short_input)
 #define DFLT_HWRM_CMD_TIMEOUT		500
+#define HWRM_CMD_MAX_TIMEOUT		40000
 #define SHORT_HWRM_CMD_TIMEOUT		20
 #define HWRM_CMD_TIMEOUT		(bp->hwrm_cmd_timeout)
 #define HWRM_RESET_TIMEOUT		((HWRM_CMD_TIMEOUT) * 4)
@@ -1345,9 +1346,14 @@ struct bnxt_test_info {
 #define BNXT_CAG_REG_LEGACY_INT_STATUS		0x4014
 #define BNXT_CAG_REG_BASE			0x300000
 
+#define BNXT_GRC_REG_STATUS_P5			0x520
+
 #define BNXT_GRCPF_REG_KONG_COMM		0xA00
 #define BNXT_GRCPF_REG_KONG_COMM_TRIGGER	0xB00
 
+#define BNXT_GRC_REG_CHIP_NUM			0x48
+#define BNXT_GRC_REG_BASE			0x260000
+
 #define BNXT_GRC_BASE_MASK			0xfffff000
 #define BNXT_GRC_OFFSET_MASK			0x00000ffc
 
@@ -1441,6 +1447,8 @@ struct bnxt_ctx_pg_info {
 #define BNXT_MAX_TQM_RINGS		\
 	(BNXT_MAX_TQM_SP_RINGS + BNXT_MAX_TQM_FP_RINGS)
 
+#define BNXT_BACKING_STORE_CFG_LEGACY_LEN	256
+
 struct bnxt_ctx_mem_info {
 	u32	qp_max_entries;
 	u16	qp_min_qp1_entries;
@@ -1532,9 +1540,22 @@ struct bnxt_fw_reporter_ctx {
 #define BNXT_FW_HEALTH_WIN_OFF(reg)	(BNXT_FW_HEALTH_WIN_BASE +	\
 					 ((reg) & BNXT_GRC_OFFSET_MASK))
 
+#define BNXT_FW_STATUS_HEALTH_MSK	0xffff
 #define BNXT_FW_STATUS_HEALTHY		0x8000
 #define BNXT_FW_STATUS_SHUTDOWN		0x100000
 
+#define BNXT_FW_IS_HEALTHY(sts)		(((sts) & BNXT_FW_STATUS_HEALTH_MSK) ==\
+					 BNXT_FW_STATUS_HEALTHY)
+
+#define BNXT_FW_IS_BOOTING(sts)		(((sts) & BNXT_FW_STATUS_HEALTH_MSK) < \
+					 BNXT_FW_STATUS_HEALTHY)
+
+#define BNXT_FW_IS_ERR(sts)		(((sts) & BNXT_FW_STATUS_HEALTH_MSK) > \
+					 BNXT_FW_STATUS_HEALTHY)
+
+#define BNXT_FW_RETRY			5
+#define BNXT_FW_IF_RETRY		10
+
 struct bnxt {
 	void __iomem		*bar0;
 	void __iomem		*bar1;
@@ -1788,6 +1809,7 @@ struct bnxt {
 #define BNXT_STATE_FW_FATAL_COND	6
 #define BNXT_STATE_DRV_REGISTERED	7
 #define BNXT_STATE_PCI_CHANNEL_IO_FROZEN	8
+#define BNXT_STATE_NAPI_DISABLED	9
 
 #define BNXT_NO_FW_ACCESS(bp)					\
 	(test_bit(BNXT_STATE_FW_FATAL_COND, &(bp)->state) ||	\
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c
index 6b7b69ed62db..90a31b4a3020 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c
@@ -44,21 +44,20 @@ static int bnxt_fw_reporter_diagnose(struct devlink_health_reporter *reporter,
 				     struct netlink_ext_ack *extack)
 {
 	struct bnxt *bp = devlink_health_reporter_priv(reporter);
-	u32 val, health_status;
+	u32 val;
 	int rc;
 
 	if (test_bit(BNXT_STATE_IN_FW_RESET, &bp->state))
 		return 0;
 
 	val = bnxt_fw_health_readl(bp, BNXT_FW_HEALTH_REG);
-	health_status = val & 0xffff;
 
-	if (health_status < BNXT_FW_STATUS_HEALTHY) {
+	if (BNXT_FW_IS_BOOTING(val)) {
 		rc = devlink_fmsg_string_pair_put(fmsg, "Description",
 						  "Not yet completed initialization");
 		if (rc)
 			return rc;
-	} else if (health_status > BNXT_FW_STATUS_HEALTHY) {
+	} else if (BNXT_FW_IS_ERR(val)) {
 		rc = devlink_fmsg_string_pair_put(fmsg, "Description",
 						  "Encountered fatal error and cannot recover");
 		if (rc)
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_hsi.h b/drivers/net/ethernet/broadcom/bnxt/bnxt_hsi.h
index 2d3e962bdac3..d5c6e6a3d22d 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_hsi.h
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_hsi.h
@@ -2,7 +2,7 @@
  *
  * Copyright (c) 2014-2016 Broadcom Corporation
  * Copyright (c) 2014-2018 Broadcom Limited
- * Copyright (c) 2018-2020 Broadcom Inc.
+ * Copyright (c) 2018-2021 Broadcom Inc.
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License as published by
@@ -164,6 +164,7 @@ struct cmd_nums {
 	#define HWRM_VNIC_PLCMODES_CFG                    0x48UL
 	#define HWRM_VNIC_PLCMODES_QCFG                   0x49UL
 	#define HWRM_VNIC_QCAPS                           0x4aUL
+	#define HWRM_VNIC_UPDATE                          0x4bUL
 	#define HWRM_RING_ALLOC                           0x50UL
 	#define HWRM_RING_FREE                            0x51UL
 	#define HWRM_RING_CMPL_RING_QAGGINT_PARAMS        0x52UL
@@ -184,6 +185,9 @@ struct cmd_nums {
 	#define HWRM_QUEUE_MPLS_QCAPS                     0x80UL
 	#define HWRM_QUEUE_MPLSTC2PRI_QCFG                0x81UL
 	#define HWRM_QUEUE_MPLSTC2PRI_CFG                 0x82UL
+	#define HWRM_QUEUE_VLANPRI_QCAPS                  0x83UL
+	#define HWRM_QUEUE_VLANPRI2PRI_QCFG               0x84UL
+	#define HWRM_QUEUE_VLANPRI2PRI_CFG                0x85UL
 	#define HWRM_CFA_L2_FILTER_ALLOC                  0x90UL
 	#define HWRM_CFA_L2_FILTER_FREE                   0x91UL
 	#define HWRM_CFA_L2_FILTER_CFG                    0x92UL
@@ -217,6 +221,8 @@ struct cmd_nums {
 	#define HWRM_PORT_TX_FIR_CFG                      0xbbUL
 	#define HWRM_PORT_TX_FIR_QCFG                     0xbcUL
 	#define HWRM_PORT_ECN_QSTATS                      0xbdUL
+	#define HWRM_FW_LIVEPATCH_QUERY                   0xbeUL
+	#define HWRM_FW_LIVEPATCH                         0xbfUL
 	#define HWRM_FW_RESET                             0xc0UL
 	#define HWRM_FW_QSTATUS                           0xc1UL
 	#define HWRM_FW_HEALTH_CHECK                      0xc2UL
@@ -347,6 +353,8 @@ struct cmd_nums {
 	#define HWRM_FUNC_HOST_PF_IDS_QUERY               0x197UL
 	#define HWRM_FUNC_QSTATS_EXT                      0x198UL
 	#define HWRM_STAT_EXT_CTX_QUERY                   0x199UL
+	#define HWRM_FUNC_SPD_CFG                         0x19aUL
+	#define HWRM_FUNC_SPD_QCFG                        0x19bUL
 	#define HWRM_SELFTEST_QLIST                       0x200UL
 	#define HWRM_SELFTEST_EXEC                        0x201UL
 	#define HWRM_SELFTEST_IRQ                         0x202UL
@@ -359,6 +367,11 @@ struct cmd_nums {
 	#define HWRM_MFG_HDMA_TEST                        0x209UL
 	#define HWRM_MFG_FRU_EEPROM_WRITE                 0x20aUL
 	#define HWRM_MFG_FRU_EEPROM_READ                  0x20bUL
+	#define HWRM_MFG_SOC_IMAGE                        0x20cUL
+	#define HWRM_MFG_SOC_QSTATUS                      0x20dUL
+	#define HWRM_MFG_PARAM_SEEPROM_SYNC               0x20eUL
+	#define HWRM_MFG_PARAM_SEEPROM_READ               0x20fUL
+	#define HWRM_MFG_PARAM_SEEPROM_HEALTH             0x210UL
 	#define HWRM_TF                                   0x2bcUL
 	#define HWRM_TF_VERSION_GET                       0x2bdUL
 	#define HWRM_TF_SESSION_OPEN                      0x2c6UL
@@ -384,6 +397,7 @@ struct cmd_nums {
 	#define HWRM_TF_EXT_EM_QCFG                       0x2e9UL
 	#define HWRM_TF_EM_INSERT                         0x2eaUL
 	#define HWRM_TF_EM_DELETE                         0x2ebUL
+	#define HWRM_TF_EM_HASH_INSERT                    0x2ecUL
 	#define HWRM_TF_TCAM_SET                          0x2f8UL
 	#define HWRM_TF_TCAM_GET                          0x2f9UL
 	#define HWRM_TF_TCAM_MOVE                         0x2faUL
@@ -486,9 +500,9 @@ struct hwrm_err_output {
 #define HWRM_TARGET_ID_TOOLS 0xFFFD
 #define HWRM_VERSION_MAJOR 1
 #define HWRM_VERSION_MINOR 10
-#define HWRM_VERSION_UPDATE 1
-#define HWRM_VERSION_RSVD 68
-#define HWRM_VERSION_STR "1.10.1.68"
+#define HWRM_VERSION_UPDATE 2
+#define HWRM_VERSION_RSVD 11
+#define HWRM_VERSION_STR "1.10.2.11"
 
 /* hwrm_ver_get_input (size:192b/24B) */
 struct hwrm_ver_get_input {
@@ -563,8 +577,9 @@ struct hwrm_ver_get_output {
 	__le16	max_resp_len;
 	__le16	def_req_timeout;
 	u8	flags;
-	#define VER_GET_RESP_FLAGS_DEV_NOT_RDY       0x1UL
-	#define VER_GET_RESP_FLAGS_EXT_VER_AVAIL     0x2UL
+	#define VER_GET_RESP_FLAGS_DEV_NOT_RDY                   0x1UL
+	#define VER_GET_RESP_FLAGS_EXT_VER_AVAIL                 0x2UL
+	#define VER_GET_RESP_FLAGS_DEV_NOT_RDY_BACKING_STORE     0x4UL
 	u8	unused_0[2];
 	u8	always_1;
 	__le16	hwrm_intf_major;
@@ -708,6 +723,7 @@ struct hwrm_async_event_cmpl {
 	#define ASYNC_EVENT_CMPL_EVENT_ID_QUIESCE_DONE               0x3fUL
 	#define ASYNC_EVENT_CMPL_EVENT_ID_DEFERRED_RESPONSE          0x40UL
 	#define ASYNC_EVENT_CMPL_EVENT_ID_PFC_WATCHDOG_CFG_CHANGE    0x41UL
+	#define ASYNC_EVENT_CMPL_EVENT_ID_MAX_RGTR_EVENT_ID          0x42UL
 	#define ASYNC_EVENT_CMPL_EVENT_ID_FW_TRACE_MSG               0xfeUL
 	#define ASYNC_EVENT_CMPL_EVENT_ID_HWRM_ERROR                 0xffUL
 	#define ASYNC_EVENT_CMPL_EVENT_ID_LAST                      ASYNC_EVENT_CMPL_EVENT_ID_HWRM_ERROR
@@ -815,6 +831,8 @@ struct hwrm_async_event_cmpl_reset_notify {
 	#define ASYNC_EVENT_CMPL_RESET_NOTIFY_EVENT_ID_RESET_NOTIFY 0x8UL
 	#define ASYNC_EVENT_CMPL_RESET_NOTIFY_EVENT_ID_LAST        ASYNC_EVENT_CMPL_RESET_NOTIFY_EVENT_ID_RESET_NOTIFY
 	__le32	event_data2;
+	#define ASYNC_EVENT_CMPL_RESET_NOTIFY_EVENT_DATA2_FW_STATUS_CODE_MASK 0xffffUL
+	#define ASYNC_EVENT_CMPL_RESET_NOTIFY_EVENT_DATA2_FW_STATUS_CODE_SFT 0
 	u8	opaque_v;
 	#define ASYNC_EVENT_CMPL_RESET_NOTIFY_V          0x1UL
 	#define ASYNC_EVENT_CMPL_RESET_NOTIFY_OPAQUE_MASK 0xfeUL
@@ -832,7 +850,8 @@ struct hwrm_async_event_cmpl_reset_notify {
 	#define ASYNC_EVENT_CMPL_RESET_NOTIFY_EVENT_DATA1_REASON_CODE_MANAGEMENT_RESET_REQUEST  (0x1UL << 8)
 	#define ASYNC_EVENT_CMPL_RESET_NOTIFY_EVENT_DATA1_REASON_CODE_FW_EXCEPTION_FATAL        (0x2UL << 8)
 	#define ASYNC_EVENT_CMPL_RESET_NOTIFY_EVENT_DATA1_REASON_CODE_FW_EXCEPTION_NON_FATAL    (0x3UL << 8)
-	#define ASYNC_EVENT_CMPL_RESET_NOTIFY_EVENT_DATA1_REASON_CODE_LAST                     ASYNC_EVENT_CMPL_RESET_NOTIFY_EVENT_DATA1_REASON_CODE_FW_EXCEPTION_NON_FATAL
+	#define ASYNC_EVENT_CMPL_RESET_NOTIFY_EVENT_DATA1_REASON_CODE_FAST_RESET                (0x4UL << 8)
+	#define ASYNC_EVENT_CMPL_RESET_NOTIFY_EVENT_DATA1_REASON_CODE_LAST                     ASYNC_EVENT_CMPL_RESET_NOTIFY_EVENT_DATA1_REASON_CODE_FAST_RESET
 	#define ASYNC_EVENT_CMPL_RESET_NOTIFY_EVENT_DATA1_DELAY_IN_100MS_TICKS_MASK           0xffff0000UL
 	#define ASYNC_EVENT_CMPL_RESET_NOTIFY_EVENT_DATA1_DELAY_IN_100MS_TICKS_SFT            16
 };
@@ -1271,6 +1290,10 @@ struct hwrm_func_qcaps_output {
 	#define FUNC_QCAPS_RESP_FLAGS_EXT_TX_PROXY_SRC_INTF_OVERRIDE_SUPPORT     0x20UL
 	#define FUNC_QCAPS_RESP_FLAGS_EXT_SCHQ_SUPPORTED                         0x40UL
 	#define FUNC_QCAPS_RESP_FLAGS_EXT_PPP_PUSH_MODE_SUPPORTED                0x80UL
+	#define FUNC_QCAPS_RESP_FLAGS_EXT_EVB_MODE_CFG_NOT_SUPPORTED             0x100UL
+	#define FUNC_QCAPS_RESP_FLAGS_EXT_SOC_SPD_SUPPORTED                      0x200UL
+	#define FUNC_QCAPS_RESP_FLAGS_EXT_FW_LIVEPATCH_SUPPORTED                 0x400UL
+	#define FUNC_QCAPS_RESP_FLAGS_EXT_FAST_RESET_CAPABLE                     0x800UL
 	u8	max_schqs;
 	u8	mpc_chnls_cap;
 	#define FUNC_QCAPS_RESP_MPC_CHNLS_CAP_TCE         0x1UL
@@ -1315,6 +1338,7 @@ struct hwrm_func_qcfg_output {
 	#define FUNC_QCFG_RESP_FLAGS_HOT_RESET_ALLOWED            0x200UL
 	#define FUNC_QCFG_RESP_FLAGS_PPP_PUSH_MODE_ENABLED        0x400UL
 	#define FUNC_QCFG_RESP_FLAGS_RING_MONITOR_ENABLED         0x800UL
+	#define FUNC_QCFG_RESP_FLAGS_FAST_RESET_ALLOWED           0x1000UL
 	u8	mac_address[6];
 	__le16	pci_id;
 	__le16	alloc_rsscos_ctx;
@@ -1731,6 +1755,7 @@ struct hwrm_func_drv_rgtr_input {
 	#define FUNC_DRV_RGTR_REQ_FLAGS_HOT_RESET_SUPPORT          0x10UL
 	#define FUNC_DRV_RGTR_REQ_FLAGS_ERROR_RECOVERY_SUPPORT     0x20UL
 	#define FUNC_DRV_RGTR_REQ_FLAGS_MASTER_SUPPORT             0x40UL
+	#define FUNC_DRV_RGTR_REQ_FLAGS_FAST_RESET_SUPPORT         0x80UL
 	__le32	enables;
 	#define FUNC_DRV_RGTR_REQ_ENABLES_OS_TYPE             0x1UL
 	#define FUNC_DRV_RGTR_REQ_ENABLES_VER                 0x2UL
@@ -1993,7 +2018,7 @@ struct hwrm_func_backing_store_qcaps_input {
 	__le64	resp_addr;
 };
 
-/* hwrm_func_backing_store_qcaps_output (size:640b/80B) */
+/* hwrm_func_backing_store_qcaps_output (size:704b/88B) */
 struct hwrm_func_backing_store_qcaps_output {
 	__le16	error_code;
 	__le16	req_type;
@@ -2024,13 +2049,25 @@ struct hwrm_func_backing_store_qcaps_output {
 	__le16	mrav_num_entries_units;
 	u8	tqm_entries_multiple;
 	u8	ctx_kind_initializer;
-	__le32	rsvd;
-	__le16	rsvd1;
+	__le16	ctx_init_mask;
+	#define FUNC_BACKING_STORE_QCAPS_RESP_CTX_INIT_MASK_QP       0x1UL
+	#define FUNC_BACKING_STORE_QCAPS_RESP_CTX_INIT_MASK_SRQ      0x2UL
+	#define FUNC_BACKING_STORE_QCAPS_RESP_CTX_INIT_MASK_CQ       0x4UL
+	#define FUNC_BACKING_STORE_QCAPS_RESP_CTX_INIT_MASK_VNIC     0x8UL
+	#define FUNC_BACKING_STORE_QCAPS_RESP_CTX_INIT_MASK_STAT     0x10UL
+	#define FUNC_BACKING_STORE_QCAPS_RESP_CTX_INIT_MASK_MRAV     0x20UL
+	u8	qp_init_offset;
+	u8	srq_init_offset;
+	u8	cq_init_offset;
+	u8	vnic_init_offset;
 	u8	tqm_fp_rings_count;
+	u8	stat_init_offset;
+	u8	mrav_init_offset;
+	u8	rsvd[6];
 	u8	valid;
 };
 
-/* hwrm_func_backing_store_cfg_input (size:2048b/256B) */
+/* hwrm_func_backing_store_cfg_input (size:2432b/304B) */
 struct hwrm_func_backing_store_cfg_input {
 	__le16	req_type;
 	__le16	cmpl_ring;
@@ -2041,22 +2078,25 @@ struct hwrm_func_backing_store_cfg_input {
 	#define FUNC_BACKING_STORE_CFG_REQ_FLAGS_PREBOOT_MODE               0x1UL
 	#define FUNC_BACKING_STORE_CFG_REQ_FLAGS_MRAV_RESERVATION_SPLIT     0x2UL
 	__le32	enables;
-	#define FUNC_BACKING_STORE_CFG_REQ_ENABLES_QP            0x1UL
-	#define FUNC_BACKING_STORE_CFG_REQ_ENABLES_SRQ           0x2UL
-	#define FUNC_BACKING_STORE_CFG_REQ_ENABLES_CQ            0x4UL
-	#define FUNC_BACKING_STORE_CFG_REQ_ENABLES_VNIC          0x8UL
-	#define FUNC_BACKING_STORE_CFG_REQ_ENABLES_STAT          0x10UL
-	#define FUNC_BACKING_STORE_CFG_REQ_ENABLES_TQM_SP        0x20UL
-	#define FUNC_BACKING_STORE_CFG_REQ_ENABLES_TQM_RING0     0x40UL
-	#define FUNC_BACKING_STORE_CFG_REQ_ENABLES_TQM_RING1     0x80UL
-	#define FUNC_BACKING_STORE_CFG_REQ_ENABLES_TQM_RING2     0x100UL
-	#define FUNC_BACKING_STORE_CFG_REQ_ENABLES_TQM_RING3     0x200UL
-	#define FUNC_BACKING_STORE_CFG_REQ_ENABLES_TQM_RING4     0x400UL
-	#define FUNC_BACKING_STORE_CFG_REQ_ENABLES_TQM_RING5     0x800UL
-	#define FUNC_BACKING_STORE_CFG_REQ_ENABLES_TQM_RING6     0x1000UL
-	#define FUNC_BACKING_STORE_CFG_REQ_ENABLES_TQM_RING7     0x2000UL
-	#define FUNC_BACKING_STORE_CFG_REQ_ENABLES_MRAV          0x4000UL
-	#define FUNC_BACKING_STORE_CFG_REQ_ENABLES_TIM           0x8000UL
+	#define FUNC_BACKING_STORE_CFG_REQ_ENABLES_QP             0x1UL
+	#define FUNC_BACKING_STORE_CFG_REQ_ENABLES_SRQ            0x2UL
+	#define FUNC_BACKING_STORE_CFG_REQ_ENABLES_CQ             0x4UL
+	#define FUNC_BACKING_STORE_CFG_REQ_ENABLES_VNIC           0x8UL
+	#define FUNC_BACKING_STORE_CFG_REQ_ENABLES_STAT           0x10UL
+	#define FUNC_BACKING_STORE_CFG_REQ_ENABLES_TQM_SP         0x20UL
+	#define FUNC_BACKING_STORE_CFG_REQ_ENABLES_TQM_RING0      0x40UL
+	#define FUNC_BACKING_STORE_CFG_REQ_ENABLES_TQM_RING1      0x80UL
+	#define FUNC_BACKING_STORE_CFG_REQ_ENABLES_TQM_RING2      0x100UL
+	#define FUNC_BACKING_STORE_CFG_REQ_ENABLES_TQM_RING3      0x200UL
+	#define FUNC_BACKING_STORE_CFG_REQ_ENABLES_TQM_RING4      0x400UL
+	#define FUNC_BACKING_STORE_CFG_REQ_ENABLES_TQM_RING5      0x800UL
+	#define FUNC_BACKING_STORE_CFG_REQ_ENABLES_TQM_RING6      0x1000UL
+	#define FUNC_BACKING_STORE_CFG_REQ_ENABLES_TQM_RING7      0x2000UL
+	#define FUNC_BACKING_STORE_CFG_REQ_ENABLES_MRAV           0x4000UL
+	#define FUNC_BACKING_STORE_CFG_REQ_ENABLES_TIM            0x8000UL
+	#define FUNC_BACKING_STORE_CFG_REQ_ENABLES_TQM_RING8      0x10000UL
+	#define FUNC_BACKING_STORE_CFG_REQ_ENABLES_TQM_RING9      0x20000UL
+	#define FUNC_BACKING_STORE_CFG_REQ_ENABLES_TQM_RING10     0x40000UL
 	u8	qpc_pg_size_qpc_lvl;
 	#define FUNC_BACKING_STORE_CFG_REQ_QPC_LVL_MASK      0xfUL
 	#define FUNC_BACKING_STORE_CFG_REQ_QPC_LVL_SFT       0
@@ -2358,6 +2398,63 @@ struct hwrm_func_backing_store_cfg_input {
 	__le16	tqm_entry_size;
 	__le16	mrav_entry_size;
 	__le16	tim_entry_size;
+	u8	tqm_ring8_pg_size_tqm_ring_lvl;
+	#define FUNC_BACKING_STORE_CFG_REQ_RING8_TQM_RING_LVL_MASK      0xfUL
+	#define FUNC_BACKING_STORE_CFG_REQ_RING8_TQM_RING_LVL_SFT       0
+	#define FUNC_BACKING_STORE_CFG_REQ_RING8_TQM_RING_LVL_LVL_0       0x0UL
+	#define FUNC_BACKING_STORE_CFG_REQ_RING8_TQM_RING_LVL_LVL_1       0x1UL
+	#define FUNC_BACKING_STORE_CFG_REQ_RING8_TQM_RING_LVL_LVL_2       0x2UL
+	#define FUNC_BACKING_STORE_CFG_REQ_RING8_TQM_RING_LVL_LAST       FUNC_BACKING_STORE_CFG_REQ_RING8_TQM_RING_LVL_LVL_2
+	#define FUNC_BACKING_STORE_CFG_REQ_RING8_TQM_RING_PG_SIZE_MASK  0xf0UL
+	#define FUNC_BACKING_STORE_CFG_REQ_RING8_TQM_RING_PG_SIZE_SFT   4
+	#define FUNC_BACKING_STORE_CFG_REQ_RING8_TQM_RING_PG_SIZE_PG_4K   (0x0UL << 4)
+	#define FUNC_BACKING_STORE_CFG_REQ_RING8_TQM_RING_PG_SIZE_PG_8K   (0x1UL << 4)
+	#define FUNC_BACKING_STORE_CFG_REQ_RING8_TQM_RING_PG_SIZE_PG_64K  (0x2UL << 4)
+	#define FUNC_BACKING_STORE_CFG_REQ_RING8_TQM_RING_PG_SIZE_PG_2M   (0x3UL << 4)
+	#define FUNC_BACKING_STORE_CFG_REQ_RING8_TQM_RING_PG_SIZE_PG_8M   (0x4UL << 4)
+	#define FUNC_BACKING_STORE_CFG_REQ_RING8_TQM_RING_PG_SIZE_PG_1G   (0x5UL << 4)
+	#define FUNC_BACKING_STORE_CFG_REQ_RING8_TQM_RING_PG_SIZE_LAST   FUNC_BACKING_STORE_CFG_REQ_RING8_TQM_RING_PG_SIZE_PG_1G
+	u8	ring8_unused[3];
+	__le32	tqm_ring8_num_entries;
+	__le64	tqm_ring8_page_dir;
+	u8	tqm_ring9_pg_size_tqm_ring_lvl;
+	#define FUNC_BACKING_STORE_CFG_REQ_RING9_TQM_RING_LVL_MASK      0xfUL
+	#define FUNC_BACKING_STORE_CFG_REQ_RING9_TQM_RING_LVL_SFT       0
+	#define FUNC_BACKING_STORE_CFG_REQ_RING9_TQM_RING_LVL_LVL_0       0x0UL
+	#define FUNC_BACKING_STORE_CFG_REQ_RING9_TQM_RING_LVL_LVL_1       0x1UL
+	#define FUNC_BACKING_STORE_CFG_REQ_RING9_TQM_RING_LVL_LVL_2       0x2UL
+	#define FUNC_BACKING_STORE_CFG_REQ_RING9_TQM_RING_LVL_LAST       FUNC_BACKING_STORE_CFG_REQ_RING9_TQM_RING_LVL_LVL_2
+	#define FUNC_BACKING_STORE_CFG_REQ_RING9_TQM_RING_PG_SIZE_MASK  0xf0UL
+	#define FUNC_BACKING_STORE_CFG_REQ_RING9_TQM_RING_PG_SIZE_SFT   4
+	#define FUNC_BACKING_STORE_CFG_REQ_RING9_TQM_RING_PG_SIZE_PG_4K   (0x0UL << 4)
+	#define FUNC_BACKING_STORE_CFG_REQ_RING9_TQM_RING_PG_SIZE_PG_8K   (0x1UL << 4)
+	#define FUNC_BACKING_STORE_CFG_REQ_RING9_TQM_RING_PG_SIZE_PG_64K  (0x2UL << 4)
+	#define FUNC_BACKING_STORE_CFG_REQ_RING9_TQM_RING_PG_SIZE_PG_2M   (0x3UL << 4)
+	#define FUNC_BACKING_STORE_CFG_REQ_RING9_TQM_RING_PG_SIZE_PG_8M   (0x4UL << 4)
+	#define FUNC_BACKING_STORE_CFG_REQ_RING9_TQM_RING_PG_SIZE_PG_1G   (0x5UL << 4)
+	#define FUNC_BACKING_STORE_CFG_REQ_RING9_TQM_RING_PG_SIZE_LAST   FUNC_BACKING_STORE_CFG_REQ_RING9_TQM_RING_PG_SIZE_PG_1G
+	u8	ring9_unused[3];
+	__le32	tqm_ring9_num_entries;
+	__le64	tqm_ring9_page_dir;
+	u8	tqm_ring10_pg_size_tqm_ring_lvl;
+	#define FUNC_BACKING_STORE_CFG_REQ_RING10_TQM_RING_LVL_MASK      0xfUL
+	#define FUNC_BACKING_STORE_CFG_REQ_RING10_TQM_RING_LVL_SFT       0
+	#define FUNC_BACKING_STORE_CFG_REQ_RING10_TQM_RING_LVL_LVL_0       0x0UL
+	#define FUNC_BACKING_STORE_CFG_REQ_RING10_TQM_RING_LVL_LVL_1       0x1UL
+	#define FUNC_BACKING_STORE_CFG_REQ_RING10_TQM_RING_LVL_LVL_2       0x2UL
+	#define FUNC_BACKING_STORE_CFG_REQ_RING10_TQM_RING_LVL_LAST       FUNC_BACKING_STORE_CFG_REQ_RING10_TQM_RING_LVL_LVL_2
+	#define FUNC_BACKING_STORE_CFG_REQ_RING10_TQM_RING_PG_SIZE_MASK  0xf0UL
+	#define FUNC_BACKING_STORE_CFG_REQ_RING10_TQM_RING_PG_SIZE_SFT   4
+	#define FUNC_BACKING_STORE_CFG_REQ_RING10_TQM_RING_PG_SIZE_PG_4K   (0x0UL << 4)
+	#define FUNC_BACKING_STORE_CFG_REQ_RING10_TQM_RING_PG_SIZE_PG_8K   (0x1UL << 4)
+	#define FUNC_BACKING_STORE_CFG_REQ_RING10_TQM_RING_PG_SIZE_PG_64K  (0x2UL << 4)
+	#define FUNC_BACKING_STORE_CFG_REQ_RING10_TQM_RING_PG_SIZE_PG_2M   (0x3UL << 4)
+	#define FUNC_BACKING_STORE_CFG_REQ_RING10_TQM_RING_PG_SIZE_PG_8M   (0x4UL << 4)
+	#define FUNC_BACKING_STORE_CFG_REQ_RING10_TQM_RING_PG_SIZE_PG_1G   (0x5UL << 4)
+	#define FUNC_BACKING_STORE_CFG_REQ_RING10_TQM_RING_PG_SIZE_LAST   FUNC_BACKING_STORE_CFG_REQ_RING10_TQM_RING_PG_SIZE_PG_1G
+	u8	ring10_unused[3];
+	__le32	tqm_ring10_num_entries;
+	__le64	tqm_ring10_page_dir;
 };
 
 /* hwrm_func_backing_store_cfg_output (size:128b/16B) */
@@ -2930,6 +3027,7 @@ struct hwrm_port_phy_qcfg_output {
 	#define PORT_PHY_QCFG_RESP_DUPLEX_STATE_LAST PORT_PHY_QCFG_RESP_DUPLEX_STATE_FULL
 	u8	option_flags;
 	#define PORT_PHY_QCFG_RESP_OPTION_FLAGS_MEDIA_AUTO_DETECT     0x1UL
+	#define PORT_PHY_QCFG_RESP_OPTION_FLAGS_SIGNAL_MODE_KNOWN     0x2UL
 	char	phy_vendor_name[16];
 	char	phy_vendor_partnumber[16];
 	__le16	support_pam4_speeds;
@@ -3528,8 +3626,8 @@ struct hwrm_port_phy_qcaps_output {
 	#define PORT_PHY_QCAPS_RESP_FLAGS_SHARED_PHY_CFG_SUPPORTED         0x8UL
 	#define PORT_PHY_QCAPS_RESP_FLAGS_CUMULATIVE_COUNTERS_ON_RESET     0x10UL
 	#define PORT_PHY_QCAPS_RESP_FLAGS_LOCAL_LPBK_NOT_SUPPORTED         0x20UL
-	#define PORT_PHY_QCAPS_RESP_FLAGS_RSVD1_MASK                       0xc0UL
-	#define PORT_PHY_QCAPS_RESP_FLAGS_RSVD1_SFT                        6
+	#define PORT_PHY_QCAPS_RESP_FLAGS_FW_MANAGED_LINK_DOWN             0x40UL
+	#define PORT_PHY_QCAPS_RESP_FLAGS_RSVD1                            0x80UL
 	u8	port_cnt;
 	#define PORT_PHY_QCAPS_RESP_PORT_CNT_UNKNOWN 0x0UL
 	#define PORT_PHY_QCAPS_RESP_PORT_CNT_1       0x1UL
@@ -4119,7 +4217,10 @@ struct hwrm_queue_qportcfg_output {
 	#define QUEUE_QPORTCFG_RESP_QUEUE_ID7_SERVICE_PROFILE_LOSSLESS_NIC   0x3UL
 	#define QUEUE_QPORTCFG_RESP_QUEUE_ID7_SERVICE_PROFILE_UNKNOWN        0xffUL
 	#define QUEUE_QPORTCFG_RESP_QUEUE_ID7_SERVICE_PROFILE_LAST          QUEUE_QPORTCFG_RESP_QUEUE_ID7_SERVICE_PROFILE_UNKNOWN
-	u8	unused_0;
+	u8	queue_id0_service_profile_type;
+	#define QUEUE_QPORTCFG_RESP_QUEUE_ID0_SERVICE_PROFILE_TYPE_ROCE     0x1UL
+	#define QUEUE_QPORTCFG_RESP_QUEUE_ID0_SERVICE_PROFILE_TYPE_NIC      0x2UL
+	#define QUEUE_QPORTCFG_RESP_QUEUE_ID0_SERVICE_PROFILE_TYPE_CNP      0x4UL
 	char	qid0_name[16];
 	char	qid1_name[16];
 	char	qid2_name[16];
@@ -4128,7 +4229,34 @@ struct hwrm_queue_qportcfg_output {
 	char	qid5_name[16];
 	char	qid6_name[16];
 	char	qid7_name[16];
-	u8	unused_1[7];
+	u8	queue_id1_service_profile_type;
+	#define QUEUE_QPORTCFG_RESP_QUEUE_ID1_SERVICE_PROFILE_TYPE_ROCE     0x1UL
+	#define QUEUE_QPORTCFG_RESP_QUEUE_ID1_SERVICE_PROFILE_TYPE_NIC      0x2UL
+	#define QUEUE_QPORTCFG_RESP_QUEUE_ID1_SERVICE_PROFILE_TYPE_CNP      0x4UL
+	u8	queue_id2_service_profile_type;
+	#define QUEUE_QPORTCFG_RESP_QUEUE_ID2_SERVICE_PROFILE_TYPE_ROCE     0x1UL
+	#define QUEUE_QPORTCFG_RESP_QUEUE_ID2_SERVICE_PROFILE_TYPE_NIC      0x2UL
+	#define QUEUE_QPORTCFG_RESP_QUEUE_ID2_SERVICE_PROFILE_TYPE_CNP      0x4UL
+	u8	queue_id3_service_profile_type;
+	#define QUEUE_QPORTCFG_RESP_QUEUE_ID3_SERVICE_PROFILE_TYPE_ROCE     0x1UL
+	#define QUEUE_QPORTCFG_RESP_QUEUE_ID3_SERVICE_PROFILE_TYPE_NIC      0x2UL
+	#define QUEUE_QPORTCFG_RESP_QUEUE_ID3_SERVICE_PROFILE_TYPE_CNP      0x4UL
+	u8	queue_id4_service_profile_type;
+	#define QUEUE_QPORTCFG_RESP_QUEUE_ID4_SERVICE_PROFILE_TYPE_ROCE     0x1UL
+	#define QUEUE_QPORTCFG_RESP_QUEUE_ID4_SERVICE_PROFILE_TYPE_NIC      0x2UL
+	#define QUEUE_QPORTCFG_RESP_QUEUE_ID4_SERVICE_PROFILE_TYPE_CNP      0x4UL
+	u8	queue_id5_service_profile_type;
+	#define QUEUE_QPORTCFG_RESP_QUEUE_ID5_SERVICE_PROFILE_TYPE_ROCE     0x1UL
+	#define QUEUE_QPORTCFG_RESP_QUEUE_ID5_SERVICE_PROFILE_TYPE_NIC      0x2UL
+	#define QUEUE_QPORTCFG_RESP_QUEUE_ID5_SERVICE_PROFILE_TYPE_CNP      0x4UL
+	u8	queue_id6_service_profile_type;
+	#define QUEUE_QPORTCFG_RESP_QUEUE_ID6_SERVICE_PROFILE_TYPE_ROCE     0x1UL
+	#define QUEUE_QPORTCFG_RESP_QUEUE_ID6_SERVICE_PROFILE_TYPE_NIC      0x2UL
+	#define QUEUE_QPORTCFG_RESP_QUEUE_ID6_SERVICE_PROFILE_TYPE_CNP      0x4UL
+	u8	queue_id7_service_profile_type;
+	#define QUEUE_QPORTCFG_RESP_QUEUE_ID7_SERVICE_PROFILE_TYPE_ROCE     0x1UL
+	#define QUEUE_QPORTCFG_RESP_QUEUE_ID7_SERVICE_PROFILE_TYPE_NIC      0x2UL
+	#define QUEUE_QPORTCFG_RESP_QUEUE_ID7_SERVICE_PROFILE_TYPE_CNP      0x4UL
 	u8	valid;
 };
 
@@ -5142,8 +5270,10 @@ struct hwrm_vnic_alloc_input {
 	__le16	target_id;
 	__le64	resp_addr;
 	__le32	flags;
-	#define VNIC_ALLOC_REQ_FLAGS_DEFAULT     0x1UL
-	u8	unused_0[4];
+	#define VNIC_ALLOC_REQ_FLAGS_DEFAULT                  0x1UL
+	#define VNIC_ALLOC_REQ_FLAGS_VIRTIO_NET_FID_VALID     0x2UL
+	__le16	virtio_net_fid;
+	u8	unused_0[2];
 };
 
 /* hwrm_vnic_alloc_output (size:128b/16B) */
@@ -5260,6 +5390,8 @@ struct hwrm_vnic_qcaps_output {
 	#define VNIC_QCAPS_RESP_FLAGS_OUTERMOST_RSS_CAP                   0x80UL
 	#define VNIC_QCAPS_RESP_FLAGS_COS_ASSIGNMENT_CAP                  0x100UL
 	#define VNIC_QCAPS_RESP_FLAGS_RX_CMPL_V2_CAP                      0x200UL
+	#define VNIC_QCAPS_RESP_FLAGS_VNIC_STATE_CAP                      0x400UL
+	#define VNIC_QCAPS_RESP_FLAGS_VIRTIO_NET_VNIC_ALLOC_CAP           0x800UL
 	__le16	max_aggs_supported;
 	u8	unused_1[5];
 	u8	valid;
@@ -5585,7 +5717,11 @@ struct hwrm_ring_alloc_output {
 	__le16	resp_len;
 	__le16	ring_id;
 	__le16	logical_ring_id;
-	u8	unused_0[3];
+	u8	push_buffer_index;
+	#define RING_ALLOC_RESP_PUSH_BUFFER_INDEX_PING_BUFFER 0x0UL
+	#define RING_ALLOC_RESP_PUSH_BUFFER_INDEX_PONG_BUFFER 0x1UL
+	#define RING_ALLOC_RESP_PUSH_BUFFER_INDEX_LAST       RING_ALLOC_RESP_PUSH_BUFFER_INDEX_PONG_BUFFER
+	u8	unused_0[2];
 	u8	valid;
 };
 
@@ -5644,7 +5780,11 @@ struct hwrm_ring_reset_output {
 	__le16	req_type;
 	__le16	seq_id;
 	__le16	resp_len;
-	u8	unused_0[4];
+	u8	push_buffer_index;
+	#define RING_RESET_RESP_PUSH_BUFFER_INDEX_PING_BUFFER 0x0UL
+	#define RING_RESET_RESP_PUSH_BUFFER_INDEX_PONG_BUFFER 0x1UL
+	#define RING_RESET_RESP_PUSH_BUFFER_INDEX_LAST       RING_RESET_RESP_PUSH_BUFFER_INDEX_PONG_BUFFER
+	u8	unused_0[3];
 	u8	consumer_idx[3];
 	u8	valid;
 };
@@ -6988,21 +7128,23 @@ struct hwrm_cfa_adv_flow_mgnt_qcaps_output {
 	__le16	seq_id;
 	__le16	resp_len;
 	__le32	flags;
-	#define CFA_ADV_FLOW_MGNT_QCAPS_RESP_FLAGS_FLOW_HND_16BIT_SUPPORTED                  0x1UL
-	#define CFA_ADV_FLOW_MGNT_QCAPS_RESP_FLAGS_FLOW_HND_64BIT_SUPPORTED                  0x2UL
-	#define CFA_ADV_FLOW_MGNT_QCAPS_RESP_FLAGS_FLOW_BATCH_DELETE_SUPPORTED               0x4UL
-	#define CFA_ADV_FLOW_MGNT_QCAPS_RESP_FLAGS_FLOW_RESET_ALL_SUPPORTED                  0x8UL
-	#define CFA_ADV_FLOW_MGNT_QCAPS_RESP_FLAGS_NTUPLE_FLOW_DEST_FUNC_SUPPORTED           0x10UL
-	#define CFA_ADV_FLOW_MGNT_QCAPS_RESP_FLAGS_TX_EEM_FLOW_SUPPORTED                     0x20UL
-	#define CFA_ADV_FLOW_MGNT_QCAPS_RESP_FLAGS_RX_EEM_FLOW_SUPPORTED                     0x40UL
-	#define CFA_ADV_FLOW_MGNT_QCAPS_RESP_FLAGS_FLOW_COUNTER_ALLOC_SUPPORTED              0x80UL
-	#define CFA_ADV_FLOW_MGNT_QCAPS_RESP_FLAGS_RFS_RING_TBL_IDX_SUPPORTED                0x100UL
-	#define CFA_ADV_FLOW_MGNT_QCAPS_RESP_FLAGS_UNTAGGED_VLAN_SUPPORTED                   0x200UL
-	#define CFA_ADV_FLOW_MGNT_QCAPS_RESP_FLAGS_XDP_SUPPORTED                             0x400UL
-	#define CFA_ADV_FLOW_MGNT_QCAPS_RESP_FLAGS_L2_HEADER_SOURCE_FIELDS_SUPPORTED         0x800UL
-	#define CFA_ADV_FLOW_MGNT_QCAPS_RESP_FLAGS_NTUPLE_FLOW_RX_ARP_SUPPORTED              0x1000UL
-	#define CFA_ADV_FLOW_MGNT_QCAPS_RESP_FLAGS_RFS_RING_TBL_IDX_V2_SUPPORTED             0x2000UL
-	#define CFA_ADV_FLOW_MGNT_QCAPS_RESP_FLAGS_NTUPLE_FLOW_RX_ETHERTYPE_IP_SUPPORTED     0x4000UL
+	#define CFA_ADV_FLOW_MGNT_QCAPS_RESP_FLAGS_FLOW_HND_16BIT_SUPPORTED                     0x1UL
+	#define CFA_ADV_FLOW_MGNT_QCAPS_RESP_FLAGS_FLOW_HND_64BIT_SUPPORTED                     0x2UL
+	#define CFA_ADV_FLOW_MGNT_QCAPS_RESP_FLAGS_FLOW_BATCH_DELETE_SUPPORTED                  0x4UL
+	#define CFA_ADV_FLOW_MGNT_QCAPS_RESP_FLAGS_FLOW_RESET_ALL_SUPPORTED                     0x8UL
+	#define CFA_ADV_FLOW_MGNT_QCAPS_RESP_FLAGS_NTUPLE_FLOW_DEST_FUNC_SUPPORTED              0x10UL
+	#define CFA_ADV_FLOW_MGNT_QCAPS_RESP_FLAGS_TX_EEM_FLOW_SUPPORTED                        0x20UL
+	#define CFA_ADV_FLOW_MGNT_QCAPS_RESP_FLAGS_RX_EEM_FLOW_SUPPORTED                        0x40UL
+	#define CFA_ADV_FLOW_MGNT_QCAPS_RESP_FLAGS_FLOW_COUNTER_ALLOC_SUPPORTED                 0x80UL
+	#define CFA_ADV_FLOW_MGNT_QCAPS_RESP_FLAGS_RFS_RING_TBL_IDX_SUPPORTED                   0x100UL
+	#define CFA_ADV_FLOW_MGNT_QCAPS_RESP_FLAGS_UNTAGGED_VLAN_SUPPORTED                      0x200UL
+	#define CFA_ADV_FLOW_MGNT_QCAPS_RESP_FLAGS_XDP_SUPPORTED                                0x400UL
+	#define CFA_ADV_FLOW_MGNT_QCAPS_RESP_FLAGS_L2_HEADER_SOURCE_FIELDS_SUPPORTED            0x800UL
+	#define CFA_ADV_FLOW_MGNT_QCAPS_RESP_FLAGS_NTUPLE_FLOW_RX_ARP_SUPPORTED                 0x1000UL
+	#define CFA_ADV_FLOW_MGNT_QCAPS_RESP_FLAGS_RFS_RING_TBL_IDX_V2_SUPPORTED                0x2000UL
+	#define CFA_ADV_FLOW_MGNT_QCAPS_RESP_FLAGS_NTUPLE_FLOW_RX_ETHERTYPE_IP_SUPPORTED        0x4000UL
+	#define CFA_ADV_FLOW_MGNT_QCAPS_RESP_FLAGS_TRUFLOW_CAPABLE                              0x8000UL
+	#define CFA_ADV_FLOW_MGNT_QCAPS_RESP_FLAGS_L2_FILTER_TRAFFIC_TYPE_L2_ROCE_SUPPORTED     0x10000UL
 	u8	unused_0[3];
 	u8	valid;
 };
@@ -7472,7 +7614,8 @@ struct hwrm_struct_hdr {
 	#define STRUCT_HDR_STRUCT_ID_AFM_OPAQUE         0x1UL
 	#define STRUCT_HDR_STRUCT_ID_PORT_DESCRIPTION   0xaUL
 	#define STRUCT_HDR_STRUCT_ID_RSS_V2             0x64UL
-	#define STRUCT_HDR_STRUCT_ID_LAST              STRUCT_HDR_STRUCT_ID_RSS_V2
+	#define STRUCT_HDR_STRUCT_ID_MSIX_PER_VF        0xc8UL
+	#define STRUCT_HDR_STRUCT_ID_LAST              STRUCT_HDR_STRUCT_ID_MSIX_PER_VF
 	__le16	len;
 	u8	version;
 	u8	count;
@@ -8000,6 +8143,9 @@ struct hwrm_dbg_coredump_initiate_output {
 struct coredump_data_hdr {
 	__le32	address;
 	__le32	flags_length;
+	#define COREDUMP_DATA_HDR_FLAGS_LENGTH_ACTUAL_LEN_MASK     0xffffffUL
+	#define COREDUMP_DATA_HDR_FLAGS_LENGTH_ACTUAL_LEN_SFT      0
+	#define COREDUMP_DATA_HDR_FLAGS_LENGTH_INDIRECT_ACCESS     0x1000000UL
 	__le32	instance;
 	__le32	next_offset;
 };
@@ -8669,7 +8815,6 @@ struct hcomm_status {
 	#define HCOMM_STATUS_TRUE_OFFSET_MASK        0xfffffffcUL
 	#define HCOMM_STATUS_TRUE_OFFSET_SFT         2
 };
-
 #define HCOMM_STATUS_STRUCT_LOC 0x31001F0UL
 
 #endif /* _BNXT_HSI_H_ */
diff --git a/drivers/net/ethernet/broadcom/tg3.c b/drivers/net/ethernet/broadcom/tg3.c
index 5143cdd0eeca..8936c2bc6286 100644
--- a/drivers/net/ethernet/broadcom/tg3.c
+++ b/drivers/net/ethernet/broadcom/tg3.c
@@ -12826,11 +12826,13 @@ static __be32 *tg3_vpd_readblock(struct tg3 *tp, u32 *vpdlen)
 
 			offset = tg3_nvram_logical_addr(tp, offset);
 		}
-	}
 
-	if (!offset || !len) {
-		offset = TG3_NVM_VPD_OFF;
-		len = TG3_NVM_VPD_LEN;
+		if (!offset || !len) {
+			offset = TG3_NVM_VPD_OFF;
+			len = TG3_NVM_VPD_LEN;
+		}
+	} else {
+		len = TG3_NVM_PCI_VPD_MAX_LEN;
 	}
 
 	buf = kmalloc(len, GFP_KERNEL);
@@ -12846,26 +12848,16 @@ static __be32 *tg3_vpd_readblock(struct tg3 *tp, u32 *vpdlen)
 			if (tg3_nvram_read_be32(tp, offset + i, &buf[i/4]))
 				goto error;
 		}
+		*vpdlen = len;
 	} else {
-		u8 *ptr;
 		ssize_t cnt;
-		unsigned int pos = 0;
-
-		ptr = (u8 *)&buf[0];
-		for (i = 0; pos < len && i < 3; i++, pos += cnt, ptr += cnt) {
-			cnt = pci_read_vpd(tp->pdev, pos,
-					   len - pos, ptr);
-			if (cnt == -ETIMEDOUT || cnt == -EINTR)
-				cnt = 0;
-			else if (cnt < 0)
-				goto error;
-		}
-		if (pos != len)
+
+		cnt = pci_read_vpd(tp->pdev, 0, len, (u8 *)buf);
+		if (cnt < 0)
 			goto error;
+		*vpdlen = cnt;
 	}
 
-	*vpdlen = len;
-
 	return buf;
 
 error:
diff --git a/drivers/net/ethernet/broadcom/tg3.h b/drivers/net/ethernet/broadcom/tg3.h
index 1000c894064f..46ec4fdfd16a 100644
--- a/drivers/net/ethernet/broadcom/tg3.h
+++ b/drivers/net/ethernet/broadcom/tg3.h
@@ -2101,6 +2101,7 @@
 /* Hardware Legacy NVRAM layout */
 #define TG3_NVM_VPD_OFF			0x100
 #define TG3_NVM_VPD_LEN			256
+#define TG3_NVM_PCI_VPD_MAX_LEN		512
 
 /* Hardware Selfboot NVRAM layout */
 #define TG3_NVM_HWSB_CFG1		0x00000004
diff --git a/drivers/net/ethernet/cadence/macb_main.c b/drivers/net/ethernet/cadence/macb_main.c
index 814a5b10141d..472bf8f220bc 100644
--- a/drivers/net/ethernet/cadence/macb_main.c
+++ b/drivers/net/ethernet/cadence/macb_main.c
@@ -470,6 +470,10 @@ static void macb_set_tx_clk(struct macb *bp, int speed)
 	if (!bp->tx_clk || (bp->caps & MACB_CAPS_CLK_HW_CHG))
 		return;
 
+	/* In case of MII the PHY is the clock master */
+	if (bp->phy_interface == PHY_INTERFACE_MODE_MII)
+		return;
+
 	switch (speed) {
 	case SPEED_10:
 		rate = 2500000;
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
index 15542661e3d2..9f1965c80fb1 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
@@ -5137,7 +5137,7 @@ static int adap_init0(struct adapter *adap, int vpd_skip)
 
 	/* See if FW supports FW_FILTER2 work request */
 	if (is_t4(adap->params.chip)) {
-		adap->params.filter2_wr_support = 0;
+		adap->params.filter2_wr_support = false;
 	} else {
 		params[0] = FW_PARAM_DEV(FILTER2_WR);
 		ret = t4_query_params(adap, adap->mbox, adap->pf, 0,
diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c b/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c
index 98d01a7497ec..98829e482bfa 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c
@@ -2689,7 +2689,6 @@ void t4_get_regs(struct adapter *adap, void *buf, size_t buf_size)
 #define VPD_BASE           0x400
 #define VPD_BASE_OLD       0
 #define VPD_LEN            1024
-#define CHELSIO_VPD_UNIQUE_ID 0x82
 
 /**
  * t4_eeprom_ptov - translate a physical EEPROM address to virtual
@@ -2745,7 +2744,7 @@ int t4_get_raw_vpd_params(struct adapter *adapter, struct vpd_params *p)
 {
 	int i, ret = 0, addr;
 	int ec, sn, pn, na;
-	u8 *vpd, csum;
+	u8 *vpd, csum, base_val = 0;
 	unsigned int vpdr_len, kw_offset, id_len;
 
 	vpd = vmalloc(VPD_LEN);
@@ -2755,17 +2754,11 @@ int t4_get_raw_vpd_params(struct adapter *adapter, struct vpd_params *p)
 	/* Card information normally starts at VPD_BASE but early cards had
 	 * it at 0.
 	 */
-	ret = pci_read_vpd(adapter->pdev, VPD_BASE, sizeof(u32), vpd);
+	ret = pci_read_vpd(adapter->pdev, VPD_BASE, 1, &base_val);
 	if (ret < 0)
 		goto out;
 
-	/* The VPD shall have a unique identifier specified by the PCI SIG.
-	 * For chelsio adapters, the identifier is 0x82. The first byte of a VPD
-	 * shall be CHELSIO_VPD_UNIQUE_ID (0x82). The VPD programming software
-	 * is expected to automatically put this entry at the
-	 * beginning of the VPD.
-	 */
-	addr = *vpd == CHELSIO_VPD_UNIQUE_ID ? VPD_BASE : VPD_BASE_OLD;
+	addr = base_val == PCI_VPD_LRDT_ID_STRING ? VPD_BASE : VPD_BASE_OLD;
 
 	ret = pci_read_vpd(adapter->pdev, addr, VPD_LEN, vpd);
 	if (ret < 0)
diff --git a/drivers/net/ethernet/freescale/ucc_geth.c b/drivers/net/ethernet/freescale/ucc_geth.c
index 6d853f018d53..ef4e2febeb5b 100644
--- a/drivers/net/ethernet/freescale/ucc_geth.c
+++ b/drivers/net/ethernet/freescale/ucc_geth.c
@@ -70,9 +70,32 @@ static struct {
 module_param_named(debug, debug.msg_enable, int, 0);
 MODULE_PARM_DESC(debug, "Debug verbosity level (0=none, ..., 0xffff=all)");
 
-static struct ucc_geth_info ugeth_primary_info = {
+static int ucc_geth_thread_count(enum ucc_geth_num_of_threads idx)
+{
+	static const u8 count[] = {
+		[UCC_GETH_NUM_OF_THREADS_1] = 1,
+		[UCC_GETH_NUM_OF_THREADS_2] = 2,
+		[UCC_GETH_NUM_OF_THREADS_4] = 4,
+		[UCC_GETH_NUM_OF_THREADS_6] = 6,
+		[UCC_GETH_NUM_OF_THREADS_8] = 8,
+	};
+	if (idx >= ARRAY_SIZE(count))
+		return 0;
+	return count[idx];
+}
+
+static inline int ucc_geth_tx_queues(const struct ucc_geth_info *info)
+{
+	return 1;
+}
+
+static inline int ucc_geth_rx_queues(const struct ucc_geth_info *info)
+{
+	return 1;
+}
+
+static const struct ucc_geth_info ugeth_primary_info = {
 	.uf_info = {
-		    .bd_mem_part = MEM_PART_SYSTEM,
 		    .rtsm = UCC_FAST_SEND_IDLES_BETWEEN_FRAMES,
 		    .max_rx_buf_length = 1536,
 		    /* adjusted at startup if max-speed 1000 */
@@ -90,8 +113,6 @@ static struct ucc_geth_info ugeth_primary_info = {
 		    .tcrc = UCC_FAST_16_BIT_CRC,
 		    .synl = UCC_FAST_SYNC_LEN_NOT_USED,
 		    },
-	.numQueuesTx = 1,
-	.numQueuesRx = 1,
 	.extendedFilteringChainPointer = ((uint32_t) NULL),
 	.typeorlen = 3072 /*1536 */ ,
 	.nonBackToBackIfgPart1 = 0x40,
@@ -157,8 +178,6 @@ static struct ucc_geth_info ugeth_primary_info = {
 	.riscRx = QE_RISC_ALLOCATION_RISC1_AND_RISC2,
 };
 
-static struct ucc_geth_info ugeth_info[8];
-
 #ifdef DEBUG
 static void mem_disp(u8 *addr, int size)
 {
@@ -558,7 +577,7 @@ static void dump_bds(struct ucc_geth_private *ugeth)
 	int i;
 	int length;
 
-	for (i = 0; i < ugeth->ug_info->numQueuesTx; i++) {
+	for (i = 0; i < ucc_geth_tx_queues(ugeth->ug_info); i++) {
 		if (ugeth->p_tx_bd_ring[i]) {
 			length =
 			    (ugeth->ug_info->bdRingLenTx[i] *
@@ -567,7 +586,7 @@ static void dump_bds(struct ucc_geth_private *ugeth)
 			mem_disp(ugeth->p_tx_bd_ring[i], length);
 		}
 	}
-	for (i = 0; i < ugeth->ug_info->numQueuesRx; i++) {
+	for (i = 0; i < ucc_geth_rx_queues(ugeth->ug_info); i++) {
 		if (ugeth->p_rx_bd_ring[i]) {
 			length =
 			    (ugeth->ug_info->bdRingLenRx[i] *
@@ -671,32 +690,12 @@ static void dump_regs(struct ucc_geth_private *ugeth)
 		in_be32(&ugeth->ug_regs->scam));
 
 	if (ugeth->p_thread_data_tx) {
-		int numThreadsTxNumerical;
-		switch (ugeth->ug_info->numThreadsTx) {
-		case UCC_GETH_NUM_OF_THREADS_1:
-			numThreadsTxNumerical = 1;
-			break;
-		case UCC_GETH_NUM_OF_THREADS_2:
-			numThreadsTxNumerical = 2;
-			break;
-		case UCC_GETH_NUM_OF_THREADS_4:
-			numThreadsTxNumerical = 4;
-			break;
-		case UCC_GETH_NUM_OF_THREADS_6:
-			numThreadsTxNumerical = 6;
-			break;
-		case UCC_GETH_NUM_OF_THREADS_8:
-			numThreadsTxNumerical = 8;
-			break;
-		default:
-			numThreadsTxNumerical = 0;
-			break;
-		}
+		int count = ucc_geth_thread_count(ugeth->ug_info->numThreadsTx);
 
 		pr_info("Thread data TXs:\n");
 		pr_info("Base address: 0x%08x\n",
 			(u32)ugeth->p_thread_data_tx);
-		for (i = 0; i < numThreadsTxNumerical; i++) {
+		for (i = 0; i < count; i++) {
 			pr_info("Thread data TX[%d]:\n", i);
 			pr_info("Base address: 0x%08x\n",
 				(u32)&ugeth->p_thread_data_tx[i]);
@@ -705,32 +704,12 @@ static void dump_regs(struct ucc_geth_private *ugeth)
 		}
 	}
 	if (ugeth->p_thread_data_rx) {
-		int numThreadsRxNumerical;
-		switch (ugeth->ug_info->numThreadsRx) {
-		case UCC_GETH_NUM_OF_THREADS_1:
-			numThreadsRxNumerical = 1;
-			break;
-		case UCC_GETH_NUM_OF_THREADS_2:
-			numThreadsRxNumerical = 2;
-			break;
-		case UCC_GETH_NUM_OF_THREADS_4:
-			numThreadsRxNumerical = 4;
-			break;
-		case UCC_GETH_NUM_OF_THREADS_6:
-			numThreadsRxNumerical = 6;
-			break;
-		case UCC_GETH_NUM_OF_THREADS_8:
-			numThreadsRxNumerical = 8;
-			break;
-		default:
-			numThreadsRxNumerical = 0;
-			break;
-		}
+		int count = ucc_geth_thread_count(ugeth->ug_info->numThreadsRx);
 
 		pr_info("Thread data RX:\n");
 		pr_info("Base address: 0x%08x\n",
 			(u32)ugeth->p_thread_data_rx);
-		for (i = 0; i < numThreadsRxNumerical; i++) {
+		for (i = 0; i < count; i++) {
 			pr_info("Thread data RX[%d]:\n", i);
 			pr_info("Base address: 0x%08x\n",
 				(u32)&ugeth->p_thread_data_rx[i]);
@@ -905,7 +884,7 @@ static void dump_regs(struct ucc_geth_private *ugeth)
 	if (ugeth->p_send_q_mem_reg) {
 		pr_info("Send Q memory registers:\n");
 		pr_info("Base address: 0x%08x\n", (u32)ugeth->p_send_q_mem_reg);
-		for (i = 0; i < ugeth->ug_info->numQueuesTx; i++) {
+		for (i = 0; i < ucc_geth_tx_queues(ugeth->ug_info); i++) {
 			pr_info("SQQD[%d]:\n", i);
 			pr_info("Base address: 0x%08x\n",
 				(u32)&ugeth->p_send_q_mem_reg->sqqd[i]);
@@ -937,7 +916,7 @@ static void dump_regs(struct ucc_geth_private *ugeth)
 		pr_info("RX IRQ coalescing tables:\n");
 		pr_info("Base address: 0x%08x\n",
 			(u32)ugeth->p_rx_irq_coalescing_tbl);
-		for (i = 0; i < ugeth->ug_info->numQueuesRx; i++) {
+		for (i = 0; i < ucc_geth_rx_queues(ugeth->ug_info); i++) {
 			pr_info("RX IRQ coalescing table entry[%d]:\n", i);
 			pr_info("Base address: 0x%08x\n",
 				(u32)&ugeth->p_rx_irq_coalescing_tbl->
@@ -959,7 +938,7 @@ static void dump_regs(struct ucc_geth_private *ugeth)
 	if (ugeth->p_rx_bd_qs_tbl) {
 		pr_info("RX BD QS tables:\n");
 		pr_info("Base address: 0x%08x\n", (u32)ugeth->p_rx_bd_qs_tbl);
-		for (i = 0; i < ugeth->ug_info->numQueuesRx; i++) {
+		for (i = 0; i < ucc_geth_rx_queues(ugeth->ug_info); i++) {
 			pr_info("RX BD QS table[%d]:\n", i);
 			pr_info("Base address: 0x%08x\n",
 				(u32)&ugeth->p_rx_bd_qs_tbl[i]);
@@ -1835,7 +1814,7 @@ static void ucc_geth_free_rx(struct ucc_geth_private *ugeth)
 	ug_info = ugeth->ug_info;
 	uf_info = &ug_info->uf_info;
 
-	for (i = 0; i < ugeth->ug_info->numQueuesRx; i++) {
+	for (i = 0; i < ucc_geth_rx_queues(ugeth->ug_info); i++) {
 		if (ugeth->p_rx_bd_ring[i]) {
 			/* Return existing data buffers in ring */
 			bd = ugeth->p_rx_bd_ring[i];
@@ -1856,12 +1835,7 @@ static void ucc_geth_free_rx(struct ucc_geth_private *ugeth)
 
 			kfree(ugeth->rx_skbuff[i]);
 
-			if (ugeth->ug_info->uf_info.bd_mem_part ==
-			    MEM_PART_SYSTEM)
-				kfree((void *)ugeth->rx_bd_ring_offset[i]);
-			else if (ugeth->ug_info->uf_info.bd_mem_part ==
-				 MEM_PART_MURAM)
-				qe_muram_free(ugeth->rx_bd_ring_offset[i]);
+			kfree(ugeth->p_rx_bd_ring[i]);
 			ugeth->p_rx_bd_ring[i] = NULL;
 		}
 	}
@@ -1880,7 +1854,7 @@ static void ucc_geth_free_tx(struct ucc_geth_private *ugeth)
 	ug_info = ugeth->ug_info;
 	uf_info = &ug_info->uf_info;
 
-	for (i = 0; i < ugeth->ug_info->numQueuesTx; i++) {
+	for (i = 0; i < ucc_geth_tx_queues(ugeth->ug_info); i++) {
 		bd = ugeth->p_tx_bd_ring[i];
 		if (!bd)
 			continue;
@@ -1898,15 +1872,8 @@ static void ucc_geth_free_tx(struct ucc_geth_private *ugeth)
 
 		kfree(ugeth->tx_skbuff[i]);
 
-		if (ugeth->p_tx_bd_ring[i]) {
-			if (ugeth->ug_info->uf_info.bd_mem_part ==
-			    MEM_PART_SYSTEM)
-				kfree((void *)ugeth->tx_bd_ring_offset[i]);
-			else if (ugeth->ug_info->uf_info.bd_mem_part ==
-				 MEM_PART_MURAM)
-				qe_muram_free(ugeth->tx_bd_ring_offset[i]);
-			ugeth->p_tx_bd_ring[i] = NULL;
-		}
+		kfree(ugeth->p_tx_bd_ring[i]);
+		ugeth->p_tx_bd_ring[i] = NULL;
 	}
 
 }
@@ -1921,50 +1888,39 @@ static void ucc_geth_memclean(struct ucc_geth_private *ugeth)
 		ugeth->uccf = NULL;
 	}
 
-	if (ugeth->p_thread_data_tx) {
-		qe_muram_free(ugeth->thread_dat_tx_offset);
-		ugeth->p_thread_data_tx = NULL;
-	}
-	if (ugeth->p_thread_data_rx) {
-		qe_muram_free(ugeth->thread_dat_rx_offset);
-		ugeth->p_thread_data_rx = NULL;
-	}
-	if (ugeth->p_exf_glbl_param) {
-		qe_muram_free(ugeth->exf_glbl_param_offset);
-		ugeth->p_exf_glbl_param = NULL;
-	}
-	if (ugeth->p_rx_glbl_pram) {
-		qe_muram_free(ugeth->rx_glbl_pram_offset);
-		ugeth->p_rx_glbl_pram = NULL;
-	}
-	if (ugeth->p_tx_glbl_pram) {
-		qe_muram_free(ugeth->tx_glbl_pram_offset);
-		ugeth->p_tx_glbl_pram = NULL;
-	}
-	if (ugeth->p_send_q_mem_reg) {
-		qe_muram_free(ugeth->send_q_mem_reg_offset);
-		ugeth->p_send_q_mem_reg = NULL;
-	}
-	if (ugeth->p_scheduler) {
-		qe_muram_free(ugeth->scheduler_offset);
-		ugeth->p_scheduler = NULL;
-	}
-	if (ugeth->p_tx_fw_statistics_pram) {
-		qe_muram_free(ugeth->tx_fw_statistics_pram_offset);
-		ugeth->p_tx_fw_statistics_pram = NULL;
-	}
-	if (ugeth->p_rx_fw_statistics_pram) {
-		qe_muram_free(ugeth->rx_fw_statistics_pram_offset);
-		ugeth->p_rx_fw_statistics_pram = NULL;
-	}
-	if (ugeth->p_rx_irq_coalescing_tbl) {
-		qe_muram_free(ugeth->rx_irq_coalescing_tbl_offset);
-		ugeth->p_rx_irq_coalescing_tbl = NULL;
-	}
-	if (ugeth->p_rx_bd_qs_tbl) {
-		qe_muram_free(ugeth->rx_bd_qs_tbl_offset);
-		ugeth->p_rx_bd_qs_tbl = NULL;
-	}
+	qe_muram_free_addr(ugeth->p_thread_data_tx);
+	ugeth->p_thread_data_tx = NULL;
+
+	qe_muram_free_addr(ugeth->p_thread_data_rx);
+	ugeth->p_thread_data_rx = NULL;
+
+	qe_muram_free_addr(ugeth->p_exf_glbl_param);
+	ugeth->p_exf_glbl_param = NULL;
+
+	qe_muram_free_addr(ugeth->p_rx_glbl_pram);
+	ugeth->p_rx_glbl_pram = NULL;
+
+	qe_muram_free_addr(ugeth->p_tx_glbl_pram);
+	ugeth->p_tx_glbl_pram = NULL;
+
+	qe_muram_free_addr(ugeth->p_send_q_mem_reg);
+	ugeth->p_send_q_mem_reg = NULL;
+
+	qe_muram_free_addr(ugeth->p_scheduler);
+	ugeth->p_scheduler = NULL;
+
+	qe_muram_free_addr(ugeth->p_tx_fw_statistics_pram);
+	ugeth->p_tx_fw_statistics_pram = NULL;
+
+	qe_muram_free_addr(ugeth->p_rx_fw_statistics_pram);
+	ugeth->p_rx_fw_statistics_pram = NULL;
+
+	qe_muram_free_addr(ugeth->p_rx_irq_coalescing_tbl);
+	ugeth->p_rx_irq_coalescing_tbl = NULL;
+
+	qe_muram_free_addr(ugeth->p_rx_bd_qs_tbl);
+	ugeth->p_rx_bd_qs_tbl = NULL;
+
 	if (ugeth->p_init_enet_param_shadow) {
 		return_init_enet_entries(ugeth,
 					 &(ugeth->p_init_enet_param_shadow->
@@ -2073,15 +2029,8 @@ static int ucc_struct_init(struct ucc_geth_private *ugeth)
 	ug_info = ugeth->ug_info;
 	uf_info = &ug_info->uf_info;
 
-	if (!((uf_info->bd_mem_part == MEM_PART_SYSTEM) ||
-	      (uf_info->bd_mem_part == MEM_PART_MURAM))) {
-		if (netif_msg_probe(ugeth))
-			pr_err("Bad memory partition value\n");
-		return -EINVAL;
-	}
-
 	/* Rx BD lengths */
-	for (i = 0; i < ug_info->numQueuesRx; i++) {
+	for (i = 0; i < ucc_geth_rx_queues(ug_info); i++) {
 		if ((ug_info->bdRingLenRx[i] < UCC_GETH_RX_BD_RING_SIZE_MIN) ||
 		    (ug_info->bdRingLenRx[i] %
 		     UCC_GETH_RX_BD_RING_SIZE_ALIGNMENT)) {
@@ -2092,7 +2041,7 @@ static int ucc_struct_init(struct ucc_geth_private *ugeth)
 	}
 
 	/* Tx BD lengths */
-	for (i = 0; i < ug_info->numQueuesTx; i++) {
+	for (i = 0; i < ucc_geth_tx_queues(ug_info); i++) {
 		if (ug_info->bdRingLenTx[i] < UCC_GETH_TX_BD_RING_SIZE_MIN) {
 			if (netif_msg_probe(ugeth))
 				pr_err("Tx BD ring length must be no smaller than 2\n");
@@ -2109,14 +2058,14 @@ static int ucc_struct_init(struct ucc_geth_private *ugeth)
 	}
 
 	/* num Tx queues */
-	if (ug_info->numQueuesTx > NUM_TX_QUEUES) {
+	if (ucc_geth_tx_queues(ug_info) > NUM_TX_QUEUES) {
 		if (netif_msg_probe(ugeth))
 			pr_err("number of tx queues too large\n");
 		return -EINVAL;
 	}
 
 	/* num Rx queues */
-	if (ug_info->numQueuesRx > NUM_RX_QUEUES) {
+	if (ucc_geth_rx_queues(ug_info) > NUM_RX_QUEUES) {
 		if (netif_msg_probe(ugeth))
 			pr_err("number of rx queues too large\n");
 		return -EINVAL;
@@ -2124,7 +2073,7 @@ static int ucc_struct_init(struct ucc_geth_private *ugeth)
 
 	/* l2qt */
 	for (i = 0; i < UCC_GETH_VLAN_PRIORITY_MAX; i++) {
-		if (ug_info->l2qt[i] >= ug_info->numQueuesRx) {
+		if (ug_info->l2qt[i] >= ucc_geth_rx_queues(ug_info)) {
 			if (netif_msg_probe(ugeth))
 				pr_err("VLAN priority table entry must not be larger than number of Rx queues\n");
 			return -EINVAL;
@@ -2133,7 +2082,7 @@ static int ucc_struct_init(struct ucc_geth_private *ugeth)
 
 	/* l3qt */
 	for (i = 0; i < UCC_GETH_IP_PRIORITY_MAX; i++) {
-		if (ug_info->l3qt[i] >= ug_info->numQueuesRx) {
+		if (ug_info->l3qt[i] >= ucc_geth_rx_queues(ug_info)) {
 			if (netif_msg_probe(ugeth))
 				pr_err("IP priority table entry must not be larger than number of Rx queues\n");
 			return -EINVAL;
@@ -2156,10 +2105,10 @@ static int ucc_struct_init(struct ucc_geth_private *ugeth)
 
 	/* Generate uccm_mask for receive */
 	uf_info->uccm_mask = ug_info->eventRegMask & UCCE_OTHER;/* Errors */
-	for (i = 0; i < ug_info->numQueuesRx; i++)
+	for (i = 0; i < ucc_geth_rx_queues(ug_info); i++)
 		uf_info->uccm_mask |= (UCC_GETH_UCCE_RXF0 << i);
 
-	for (i = 0; i < ug_info->numQueuesTx; i++)
+	for (i = 0; i < ucc_geth_tx_queues(ug_info); i++)
 		uf_info->uccm_mask |= (UCC_GETH_UCCE_TXB0 << i);
 	/* Initialize the general fast UCC block. */
 	if (ucc_fast_init(uf_info, &ugeth->uccf)) {
@@ -2198,53 +2147,32 @@ static int ucc_geth_alloc_tx(struct ucc_geth_private *ugeth)
 	uf_info = &ug_info->uf_info;
 
 	/* Allocate Tx bds */
-	for (j = 0; j < ug_info->numQueuesTx; j++) {
-		/* Allocate in multiple of
-		   UCC_GETH_TX_BD_RING_SIZE_MEMORY_ALIGNMENT,
-		   according to spec */
-		length = ((ug_info->bdRingLenTx[j] * sizeof(struct qe_bd))
-			  / UCC_GETH_TX_BD_RING_SIZE_MEMORY_ALIGNMENT)
-		    * UCC_GETH_TX_BD_RING_SIZE_MEMORY_ALIGNMENT;
-		if ((ug_info->bdRingLenTx[j] * sizeof(struct qe_bd)) %
-		    UCC_GETH_TX_BD_RING_SIZE_MEMORY_ALIGNMENT)
-			length += UCC_GETH_TX_BD_RING_SIZE_MEMORY_ALIGNMENT;
-		if (uf_info->bd_mem_part == MEM_PART_SYSTEM) {
-			u32 align = 4;
-			if (UCC_GETH_TX_BD_RING_ALIGNMENT > 4)
-				align = UCC_GETH_TX_BD_RING_ALIGNMENT;
-			ugeth->tx_bd_ring_offset[j] =
-				(u32) kmalloc((u32) (length + align), GFP_KERNEL);
-
-			if (ugeth->tx_bd_ring_offset[j] != 0)
-				ugeth->p_tx_bd_ring[j] =
-					(u8 __iomem *)((ugeth->tx_bd_ring_offset[j] +
-					align) & ~(align - 1));
-		} else if (uf_info->bd_mem_part == MEM_PART_MURAM) {
-			ugeth->tx_bd_ring_offset[j] =
-			    qe_muram_alloc(length,
-					   UCC_GETH_TX_BD_RING_ALIGNMENT);
-			if (!IS_ERR_VALUE(ugeth->tx_bd_ring_offset[j]))
-				ugeth->p_tx_bd_ring[j] =
-				    (u8 __iomem *) qe_muram_addr(ugeth->
-							 tx_bd_ring_offset[j]);
-		}
+	for (j = 0; j < ucc_geth_tx_queues(ug_info); j++) {
+		u32 align = max(UCC_GETH_TX_BD_RING_ALIGNMENT,
+				UCC_GETH_TX_BD_RING_SIZE_MEMORY_ALIGNMENT);
+		u32 alloc;
+
+		length = ug_info->bdRingLenTx[j] * sizeof(struct qe_bd);
+		alloc = round_up(length, align);
+		alloc = roundup_pow_of_two(alloc);
+
+		ugeth->p_tx_bd_ring[j] = kmalloc(alloc, GFP_KERNEL);
+
 		if (!ugeth->p_tx_bd_ring[j]) {
 			if (netif_msg_ifup(ugeth))
 				pr_err("Can not allocate memory for Tx bd rings\n");
 			return -ENOMEM;
 		}
 		/* Zero unused end of bd ring, according to spec */
-		memset_io((void __iomem *)(ugeth->p_tx_bd_ring[j] +
-		       ug_info->bdRingLenTx[j] * sizeof(struct qe_bd)), 0,
-		       length - ug_info->bdRingLenTx[j] * sizeof(struct qe_bd));
+		memset(ugeth->p_tx_bd_ring[j] + length, 0, alloc - length);
 	}
 
 	/* Init Tx bds */
-	for (j = 0; j < ug_info->numQueuesTx; j++) {
+	for (j = 0; j < ucc_geth_tx_queues(ug_info); j++) {
 		/* Setup the skbuff rings */
 		ugeth->tx_skbuff[j] =
-			kmalloc_array(ugeth->ug_info->bdRingLenTx[j],
-				      sizeof(struct sk_buff *), GFP_KERNEL);
+			kcalloc(ugeth->ug_info->bdRingLenTx[j],
+				sizeof(struct sk_buff *), GFP_KERNEL);
 
 		if (ugeth->tx_skbuff[j] == NULL) {
 			if (netif_msg_ifup(ugeth))
@@ -2252,9 +2180,6 @@ static int ucc_geth_alloc_tx(struct ucc_geth_private *ugeth)
 			return -ENOMEM;
 		}
 
-		for (i = 0; i < ugeth->ug_info->bdRingLenTx[j]; i++)
-			ugeth->tx_skbuff[j][i] = NULL;
-
 		ugeth->skb_curtx[j] = ugeth->skb_dirtytx[j] = 0;
 		bd = ugeth->confBd[j] = ugeth->txBd[j] = ugeth->p_tx_bd_ring[j];
 		for (i = 0; i < ug_info->bdRingLenTx[j]; i++) {
@@ -2284,27 +2209,15 @@ static int ucc_geth_alloc_rx(struct ucc_geth_private *ugeth)
 	uf_info = &ug_info->uf_info;
 
 	/* Allocate Rx bds */
-	for (j = 0; j < ug_info->numQueuesRx; j++) {
+	for (j = 0; j < ucc_geth_rx_queues(ug_info); j++) {
+		u32 align = UCC_GETH_RX_BD_RING_ALIGNMENT;
+		u32 alloc;
+
 		length = ug_info->bdRingLenRx[j] * sizeof(struct qe_bd);
-		if (uf_info->bd_mem_part == MEM_PART_SYSTEM) {
-			u32 align = 4;
-			if (UCC_GETH_RX_BD_RING_ALIGNMENT > 4)
-				align = UCC_GETH_RX_BD_RING_ALIGNMENT;
-			ugeth->rx_bd_ring_offset[j] =
-				(u32) kmalloc((u32) (length + align), GFP_KERNEL);
-			if (ugeth->rx_bd_ring_offset[j] != 0)
-				ugeth->p_rx_bd_ring[j] =
-					(u8 __iomem *)((ugeth->rx_bd_ring_offset[j] +
-					align) & ~(align - 1));
-		} else if (uf_info->bd_mem_part == MEM_PART_MURAM) {
-			ugeth->rx_bd_ring_offset[j] =
-			    qe_muram_alloc(length,
-					   UCC_GETH_RX_BD_RING_ALIGNMENT);
-			if (!IS_ERR_VALUE(ugeth->rx_bd_ring_offset[j]))
-				ugeth->p_rx_bd_ring[j] =
-				    (u8 __iomem *) qe_muram_addr(ugeth->
-							 rx_bd_ring_offset[j]);
-		}
+		alloc = round_up(length, align);
+		alloc = roundup_pow_of_two(alloc);
+
+		ugeth->p_rx_bd_ring[j] = kmalloc(alloc, GFP_KERNEL);
 		if (!ugeth->p_rx_bd_ring[j]) {
 			if (netif_msg_ifup(ugeth))
 				pr_err("Can not allocate memory for Rx bd rings\n");
@@ -2313,11 +2226,11 @@ static int ucc_geth_alloc_rx(struct ucc_geth_private *ugeth)
 	}
 
 	/* Init Rx bds */
-	for (j = 0; j < ug_info->numQueuesRx; j++) {
+	for (j = 0; j < ucc_geth_rx_queues(ug_info); j++) {
 		/* Setup the skbuff rings */
 		ugeth->rx_skbuff[j] =
-			kmalloc_array(ugeth->ug_info->bdRingLenRx[j],
-				      sizeof(struct sk_buff *), GFP_KERNEL);
+			kcalloc(ugeth->ug_info->bdRingLenRx[j],
+				sizeof(struct sk_buff *), GFP_KERNEL);
 
 		if (ugeth->rx_skbuff[j] == NULL) {
 			if (netif_msg_ifup(ugeth))
@@ -2325,9 +2238,6 @@ static int ucc_geth_alloc_rx(struct ucc_geth_private *ugeth)
 			return -ENOMEM;
 		}
 
-		for (i = 0; i < ugeth->ug_info->bdRingLenRx[j]; i++)
-			ugeth->rx_skbuff[j][i] = NULL;
-
 		ugeth->skb_currx[j] = 0;
 		bd = ugeth->rxBd[j] = ugeth->p_rx_bd_ring[j];
 		for (i = 0; i < ug_info->bdRingLenRx[j]; i++) {
@@ -2359,10 +2269,10 @@ static int ucc_geth_startup(struct ucc_geth_private *ugeth)
 	u32 init_enet_pram_offset, cecr_subblock, command;
 	u32 ifstat, i, j, size, l2qt, l3qt;
 	u16 temoder = UCC_GETH_TEMODER_INIT;
-	u16 test;
 	u8 function_code = 0;
 	u8 __iomem *endOfRing;
 	u8 numThreadsRxNumerical, numThreadsTxNumerical;
+	s32 rx_glbl_pram_offset, tx_glbl_pram_offset;
 
 	ugeth_vdbg("%s: IN", __func__);
 	uccf = ugeth->uccf;
@@ -2371,45 +2281,15 @@ static int ucc_geth_startup(struct ucc_geth_private *ugeth)
 	uf_regs = uccf->uf_regs;
 	ug_regs = ugeth->ug_regs;
 
-	switch (ug_info->numThreadsRx) {
-	case UCC_GETH_NUM_OF_THREADS_1:
-		numThreadsRxNumerical = 1;
-		break;
-	case UCC_GETH_NUM_OF_THREADS_2:
-		numThreadsRxNumerical = 2;
-		break;
-	case UCC_GETH_NUM_OF_THREADS_4:
-		numThreadsRxNumerical = 4;
-		break;
-	case UCC_GETH_NUM_OF_THREADS_6:
-		numThreadsRxNumerical = 6;
-		break;
-	case UCC_GETH_NUM_OF_THREADS_8:
-		numThreadsRxNumerical = 8;
-		break;
-	default:
+	numThreadsRxNumerical = ucc_geth_thread_count(ug_info->numThreadsRx);
+	if (!numThreadsRxNumerical) {
 		if (netif_msg_ifup(ugeth))
 			pr_err("Bad number of Rx threads value\n");
 		return -EINVAL;
 	}
 
-	switch (ug_info->numThreadsTx) {
-	case UCC_GETH_NUM_OF_THREADS_1:
-		numThreadsTxNumerical = 1;
-		break;
-	case UCC_GETH_NUM_OF_THREADS_2:
-		numThreadsTxNumerical = 2;
-		break;
-	case UCC_GETH_NUM_OF_THREADS_4:
-		numThreadsTxNumerical = 4;
-		break;
-	case UCC_GETH_NUM_OF_THREADS_6:
-		numThreadsTxNumerical = 6;
-		break;
-	case UCC_GETH_NUM_OF_THREADS_8:
-		numThreadsTxNumerical = 8;
-		break;
-	default:
+	numThreadsTxNumerical = ucc_geth_thread_count(ug_info->numThreadsTx);
+	if (!numThreadsTxNumerical) {
 		if (netif_msg_ifup(ugeth))
 			pr_err("Bad number of Tx threads value\n");
 		return -EINVAL;
@@ -2507,20 +2387,15 @@ static int ucc_geth_startup(struct ucc_geth_private *ugeth)
 	 */
 	/* Tx global PRAM */
 	/* Allocate global tx parameter RAM page */
-	ugeth->tx_glbl_pram_offset =
+	tx_glbl_pram_offset =
 	    qe_muram_alloc(sizeof(struct ucc_geth_tx_global_pram),
 			   UCC_GETH_TX_GLOBAL_PRAM_ALIGNMENT);
-	if (IS_ERR_VALUE(ugeth->tx_glbl_pram_offset)) {
+	if (tx_glbl_pram_offset < 0) {
 		if (netif_msg_ifup(ugeth))
 			pr_err("Can not allocate DPRAM memory for p_tx_glbl_pram\n");
 		return -ENOMEM;
 	}
-	ugeth->p_tx_glbl_pram =
-	    (struct ucc_geth_tx_global_pram __iomem *) qe_muram_addr(ugeth->
-							tx_glbl_pram_offset);
-	/* Zero out p_tx_glbl_pram */
-	memset_io((void __iomem *)ugeth->p_tx_glbl_pram, 0, sizeof(struct ucc_geth_tx_global_pram));
-
+	ugeth->p_tx_glbl_pram = qe_muram_addr(tx_glbl_pram_offset);
 	/* Fill global PRAM */
 
 	/* TQPTR */
@@ -2554,7 +2429,7 @@ static int ucc_geth_startup(struct ucc_geth_private *ugeth)
 	/* SQPTR */
 	/* Size varies with number of Tx queues */
 	ugeth->send_q_mem_reg_offset =
-	    qe_muram_alloc(ug_info->numQueuesTx *
+	    qe_muram_alloc(ucc_geth_tx_queues(ug_info) *
 			   sizeof(struct ucc_geth_send_queue_qd),
 			   UCC_GETH_SEND_QUEUE_QUEUE_DESCRIPTOR_ALIGNMENT);
 	if (IS_ERR_VALUE(ugeth->send_q_mem_reg_offset)) {
@@ -2570,29 +2445,20 @@ static int ucc_geth_startup(struct ucc_geth_private *ugeth)
 
 	/* Setup the table */
 	/* Assume BD rings are already established */
-	for (i = 0; i < ug_info->numQueuesTx; i++) {
+	for (i = 0; i < ucc_geth_tx_queues(ug_info); i++) {
 		endOfRing =
 		    ugeth->p_tx_bd_ring[i] + (ug_info->bdRingLenTx[i] -
 					      1) * sizeof(struct qe_bd);
-		if (ugeth->ug_info->uf_info.bd_mem_part == MEM_PART_SYSTEM) {
-			out_be32(&ugeth->p_send_q_mem_reg->sqqd[i].bd_ring_base,
-				 (u32) virt_to_phys(ugeth->p_tx_bd_ring[i]));
-			out_be32(&ugeth->p_send_q_mem_reg->sqqd[i].
-				 last_bd_completed_address,
-				 (u32) virt_to_phys(endOfRing));
-		} else if (ugeth->ug_info->uf_info.bd_mem_part ==
-			   MEM_PART_MURAM) {
-			out_be32(&ugeth->p_send_q_mem_reg->sqqd[i].bd_ring_base,
-				 (u32)qe_muram_dma(ugeth->p_tx_bd_ring[i]));
-			out_be32(&ugeth->p_send_q_mem_reg->sqqd[i].
-				 last_bd_completed_address,
-				 (u32)qe_muram_dma(endOfRing));
-		}
+		out_be32(&ugeth->p_send_q_mem_reg->sqqd[i].bd_ring_base,
+			 (u32) virt_to_phys(ugeth->p_tx_bd_ring[i]));
+		out_be32(&ugeth->p_send_q_mem_reg->sqqd[i].
+			 last_bd_completed_address,
+			 (u32) virt_to_phys(endOfRing));
 	}
 
 	/* schedulerbasepointer */
 
-	if (ug_info->numQueuesTx > 1) {
+	if (ucc_geth_tx_queues(ug_info) > 1) {
 	/* scheduler exists only if more than 1 tx queue */
 		ugeth->scheduler_offset =
 		    qe_muram_alloc(sizeof(struct ucc_geth_scheduler),
@@ -2608,8 +2474,6 @@ static int ucc_geth_startup(struct ucc_geth_private *ugeth)
 							   scheduler_offset);
 		out_be32(&ugeth->p_tx_glbl_pram->schedulerbasepointer,
 			 ugeth->scheduler_offset);
-		/* Zero out p_scheduler */
-		memset_io((void __iomem *)ugeth->p_scheduler, 0, sizeof(struct ucc_geth_scheduler));
 
 		/* Set values in scheduler */
 		out_be32(&ugeth->p_scheduler->mblinterval,
@@ -2652,23 +2516,18 @@ static int ucc_geth_startup(struct ucc_geth_private *ugeth)
 		ugeth->p_tx_fw_statistics_pram =
 		    (struct ucc_geth_tx_firmware_statistics_pram __iomem *)
 		    qe_muram_addr(ugeth->tx_fw_statistics_pram_offset);
-		/* Zero out p_tx_fw_statistics_pram */
-		memset_io((void __iomem *)ugeth->p_tx_fw_statistics_pram,
-		       0, sizeof(struct ucc_geth_tx_firmware_statistics_pram));
 	}
 
 	/* temoder */
 	/* Already has speed set */
 
-	if (ug_info->numQueuesTx > 1)
+	if (ucc_geth_tx_queues(ug_info) > 1)
 		temoder |= TEMODER_SCHEDULER_ENABLE;
 	if (ug_info->ipCheckSumGenerate)
 		temoder |= TEMODER_IP_CHECKSUM_GENERATE;
-	temoder |= ((ug_info->numQueuesTx - 1) << TEMODER_NUM_OF_QUEUES_SHIFT);
+	temoder |= ((ucc_geth_tx_queues(ug_info) - 1) << TEMODER_NUM_OF_QUEUES_SHIFT);
 	out_be16(&ugeth->p_tx_glbl_pram->temoder, temoder);
 
-	test = in_be16(&ugeth->p_tx_glbl_pram->temoder);
-
 	/* Function code register value to be used later */
 	function_code = UCC_BMR_BO_BE | UCC_BMR_GBL;
 	/* Required for QE */
@@ -2678,20 +2537,15 @@ static int ucc_geth_startup(struct ucc_geth_private *ugeth)
 
 	/* Rx global PRAM */
 	/* Allocate global rx parameter RAM page */
-	ugeth->rx_glbl_pram_offset =
+	rx_glbl_pram_offset =
 	    qe_muram_alloc(sizeof(struct ucc_geth_rx_global_pram),
 			   UCC_GETH_RX_GLOBAL_PRAM_ALIGNMENT);
-	if (IS_ERR_VALUE(ugeth->rx_glbl_pram_offset)) {
+	if (rx_glbl_pram_offset < 0) {
 		if (netif_msg_ifup(ugeth))
 			pr_err("Can not allocate DPRAM memory for p_rx_glbl_pram\n");
 		return -ENOMEM;
 	}
-	ugeth->p_rx_glbl_pram =
-	    (struct ucc_geth_rx_global_pram __iomem *) qe_muram_addr(ugeth->
-							rx_glbl_pram_offset);
-	/* Zero out p_rx_glbl_pram */
-	memset_io((void __iomem *)ugeth->p_rx_glbl_pram, 0, sizeof(struct ucc_geth_rx_global_pram));
-
+	ugeth->p_rx_glbl_pram = qe_muram_addr(rx_glbl_pram_offset);
 	/* Fill global PRAM */
 
 	/* RQPTR */
@@ -2729,16 +2583,13 @@ static int ucc_geth_startup(struct ucc_geth_private *ugeth)
 		ugeth->p_rx_fw_statistics_pram =
 		    (struct ucc_geth_rx_firmware_statistics_pram __iomem *)
 		    qe_muram_addr(ugeth->rx_fw_statistics_pram_offset);
-		/* Zero out p_rx_fw_statistics_pram */
-		memset_io((void __iomem *)ugeth->p_rx_fw_statistics_pram, 0,
-		       sizeof(struct ucc_geth_rx_firmware_statistics_pram));
 	}
 
 	/* intCoalescingPtr */
 
 	/* Size varies with number of Rx queues */
 	ugeth->rx_irq_coalescing_tbl_offset =
-	    qe_muram_alloc(ug_info->numQueuesRx *
+	    qe_muram_alloc(ucc_geth_rx_queues(ug_info) *
 			   sizeof(struct ucc_geth_rx_interrupt_coalescing_entry)
 			   + 4, UCC_GETH_RX_INTERRUPT_COALESCING_ALIGNMENT);
 	if (IS_ERR_VALUE(ugeth->rx_irq_coalescing_tbl_offset)) {
@@ -2754,7 +2605,7 @@ static int ucc_geth_startup(struct ucc_geth_private *ugeth)
 		 ugeth->rx_irq_coalescing_tbl_offset);
 
 	/* Fill interrupt coalescing table */
-	for (i = 0; i < ug_info->numQueuesRx; i++) {
+	for (i = 0; i < ucc_geth_rx_queues(ug_info); i++) {
 		out_be32(&ugeth->p_rx_irq_coalescing_tbl->coalescingentry[i].
 			 interruptcoalescingmaxvalue,
 			 ug_info->interruptcoalescingmaxvalue[i]);
@@ -2803,7 +2654,7 @@ static int ucc_geth_startup(struct ucc_geth_private *ugeth)
 	/* RBDQPTR */
 	/* Size varies with number of Rx queues */
 	ugeth->rx_bd_qs_tbl_offset =
-	    qe_muram_alloc(ug_info->numQueuesRx *
+	    qe_muram_alloc(ucc_geth_rx_queues(ug_info) *
 			   (sizeof(struct ucc_geth_rx_bd_queues_entry) +
 			    sizeof(struct ucc_geth_rx_prefetched_bds)),
 			   UCC_GETH_RX_BD_QUEUES_ALIGNMENT);
@@ -2817,23 +2668,12 @@ static int ucc_geth_startup(struct ucc_geth_private *ugeth)
 	    (struct ucc_geth_rx_bd_queues_entry __iomem *) qe_muram_addr(ugeth->
 				    rx_bd_qs_tbl_offset);
 	out_be32(&ugeth->p_rx_glbl_pram->rbdqptr, ugeth->rx_bd_qs_tbl_offset);
-	/* Zero out p_rx_bd_qs_tbl */
-	memset_io((void __iomem *)ugeth->p_rx_bd_qs_tbl,
-	       0,
-	       ug_info->numQueuesRx * (sizeof(struct ucc_geth_rx_bd_queues_entry) +
-				       sizeof(struct ucc_geth_rx_prefetched_bds)));
 
 	/* Setup the table */
 	/* Assume BD rings are already established */
-	for (i = 0; i < ug_info->numQueuesRx; i++) {
-		if (ugeth->ug_info->uf_info.bd_mem_part == MEM_PART_SYSTEM) {
-			out_be32(&ugeth->p_rx_bd_qs_tbl[i].externalbdbaseptr,
-				 (u32) virt_to_phys(ugeth->p_rx_bd_ring[i]));
-		} else if (ugeth->ug_info->uf_info.bd_mem_part ==
-			   MEM_PART_MURAM) {
-			out_be32(&ugeth->p_rx_bd_qs_tbl[i].externalbdbaseptr,
-				 (u32)qe_muram_dma(ugeth->p_rx_bd_ring[i]));
-		}
+	for (i = 0; i < ucc_geth_rx_queues(ug_info); i++) {
+		out_be32(&ugeth->p_rx_bd_qs_tbl[i].externalbdbaseptr,
+			 (u32) virt_to_phys(ugeth->p_rx_bd_ring[i]));
 		/* rest of fields handled by QE */
 	}
 
@@ -2854,7 +2694,7 @@ static int ucc_geth_startup(struct ucc_geth_private *ugeth)
 	    ug_info->
 	    vlanOperationNonTagged << REMODER_VLAN_OPERATION_NON_TAGGED_SHIFT;
 	remoder |= ug_info->rxQoSMode << REMODER_RX_QOS_MODE_SHIFT;
-	remoder |= ((ug_info->numQueuesRx - 1) << REMODER_NUM_OF_QUEUES_SHIFT);
+	remoder |= ((ucc_geth_rx_queues(ug_info) - 1) << REMODER_NUM_OF_QUEUES_SHIFT);
 	if (ug_info->ipCheckSumCheck)
 		remoder |= REMODER_IP_CHECKSUM_CHECK;
 	if (ug_info->ipAddressAlignment)
@@ -2937,14 +2777,11 @@ static int ucc_geth_startup(struct ucc_geth_private *ugeth)
 	 * allocated resources can be released when the channel is freed.
 	 */
 	if (!(ugeth->p_init_enet_param_shadow =
-	      kmalloc(sizeof(struct ucc_geth_init_pram), GFP_KERNEL))) {
+	      kzalloc(sizeof(struct ucc_geth_init_pram), GFP_KERNEL))) {
 		if (netif_msg_ifup(ugeth))
 			pr_err("Can not allocate memory for p_UccInitEnetParamShadows\n");
 		return -ENOMEM;
 	}
-	/* Zero out *p_init_enet_param_shadow */
-	memset((char *)ugeth->p_init_enet_param_shadow,
-	       0, sizeof(struct ucc_geth_init_pram));
 
 	/* Fill shadow InitEnet command parameter structure */
 
@@ -2964,7 +2801,7 @@ static int ucc_geth_startup(struct ucc_geth_private *ugeth)
 	    ((u32) ug_info->numThreadsTx) << ENET_INIT_PARAM_TGF_SHIFT;
 
 	ugeth->p_init_enet_param_shadow->rgftgfrxglobal |=
-	    ugeth->rx_glbl_pram_offset | ug_info->riscRx;
+	    rx_glbl_pram_offset | ug_info->riscRx;
 	if ((ug_info->largestexternallookupkeysize !=
 	     QE_FLTR_LARGEST_EXTERNAL_TABLE_LOOKUP_KEY_SIZE_NONE) &&
 	    (ug_info->largestexternallookupkeysize !=
@@ -3002,7 +2839,7 @@ static int ucc_geth_startup(struct ucc_geth_private *ugeth)
 	}
 
 	ugeth->p_init_enet_param_shadow->txglobal =
-	    ugeth->tx_glbl_pram_offset | ug_info->riscTx;
+	    tx_glbl_pram_offset | ug_info->riscTx;
 	if ((ret_val =
 	     fill_init_enet_entries(ugeth,
 				    &(ugeth->p_init_enet_param_shadow->
@@ -3016,7 +2853,7 @@ static int ucc_geth_startup(struct ucc_geth_private *ugeth)
 	}
 
 	/* Load Rx bds with buffers */
-	for (i = 0; i < ug_info->numQueuesRx; i++) {
+	for (i = 0; i < ucc_geth_rx_queues(ug_info); i++) {
 		if ((ret_val = rx_bd_buffer_set(ugeth, (u8) i)) != 0) {
 			if (netif_msg_ifup(ugeth))
 				pr_err("Can not fill Rx bds with buffers\n");
@@ -3287,12 +3124,12 @@ static int ucc_geth_poll(struct napi_struct *napi, int budget)
 
 	/* Tx event processing */
 	spin_lock(&ugeth->lock);
-	for (i = 0; i < ug_info->numQueuesTx; i++)
+	for (i = 0; i < ucc_geth_tx_queues(ug_info); i++)
 		ucc_geth_tx(ugeth->ndev, i);
 	spin_unlock(&ugeth->lock);
 
 	howmany = 0;
-	for (i = 0; i < ug_info->numQueuesRx; i++)
+	for (i = 0; i < ucc_geth_rx_queues(ug_info); i++)
 		howmany += ucc_geth_rx(ugeth, i, budget - howmany);
 
 	if (howmany < budget) {
@@ -3685,6 +3522,36 @@ static const struct net_device_ops ucc_geth_netdev_ops = {
 #endif
 };
 
+static int ucc_geth_parse_clock(struct device_node *np, const char *which,
+				enum qe_clock *out)
+{
+	const char *sprop;
+	char buf[24];
+
+	snprintf(buf, sizeof(buf), "%s-clock-name", which);
+	sprop = of_get_property(np, buf, NULL);
+	if (sprop) {
+		*out = qe_clock_source(sprop);
+	} else {
+		u32 val;
+
+		snprintf(buf, sizeof(buf), "%s-clock", which);
+		if (of_property_read_u32(np, buf, &val)) {
+			/* If both *-clock-name and *-clock are missing,
+			 * we want to tell people to use *-clock-name.
+			 */
+			pr_err("missing %s-clock-name property\n", buf);
+			return -EINVAL;
+		}
+		*out = val;
+	}
+	if (*out < QE_CLK_NONE || *out > QE_CLK24) {
+		pr_err("invalid %s property\n", buf);
+		return -EINVAL;
+	}
+	return 0;
+}
+
 static int ucc_geth_probe(struct platform_device* ofdev)
 {
 	struct device *device = &ofdev->dev;
@@ -3695,7 +3562,6 @@ static int ucc_geth_probe(struct platform_device* ofdev)
 	struct resource res;
 	int err, ucc_num, max_speed = 0;
 	const unsigned int *prop;
-	const char *sprop;
 	const void *mac_addr;
 	phy_interface_t phy_interface;
 	static const int enet_to_speed[] = {
@@ -3725,62 +3591,23 @@ static int ucc_geth_probe(struct platform_device* ofdev)
 	if ((ucc_num < 0) || (ucc_num > 7))
 		return -ENODEV;
 
-	ug_info = &ugeth_info[ucc_num];
-	if (ug_info == NULL) {
-		if (netif_msg_probe(&debug))
-			pr_err("[%d] Missing additional data!\n", ucc_num);
-		return -ENODEV;
-	}
+	ug_info = kmalloc(sizeof(*ug_info), GFP_KERNEL);
+	if (ug_info == NULL)
+		return -ENOMEM;
+	memcpy(ug_info, &ugeth_primary_info, sizeof(*ug_info));
 
 	ug_info->uf_info.ucc_num = ucc_num;
 
-	sprop = of_get_property(np, "rx-clock-name", NULL);
-	if (sprop) {
-		ug_info->uf_info.rx_clock = qe_clock_source(sprop);
-		if ((ug_info->uf_info.rx_clock < QE_CLK_NONE) ||
-		    (ug_info->uf_info.rx_clock > QE_CLK24)) {
-			pr_err("invalid rx-clock-name property\n");
-			return -EINVAL;
-		}
-	} else {
-		prop = of_get_property(np, "rx-clock", NULL);
-		if (!prop) {
-			/* If both rx-clock-name and rx-clock are missing,
-			   we want to tell people to use rx-clock-name. */
-			pr_err("missing rx-clock-name property\n");
-			return -EINVAL;
-		}
-		if ((*prop < QE_CLK_NONE) || (*prop > QE_CLK24)) {
-			pr_err("invalid rx-clock property\n");
-			return -EINVAL;
-		}
-		ug_info->uf_info.rx_clock = *prop;
-	}
-
-	sprop = of_get_property(np, "tx-clock-name", NULL);
-	if (sprop) {
-		ug_info->uf_info.tx_clock = qe_clock_source(sprop);
-		if ((ug_info->uf_info.tx_clock < QE_CLK_NONE) ||
-		    (ug_info->uf_info.tx_clock > QE_CLK24)) {
-			pr_err("invalid tx-clock-name property\n");
-			return -EINVAL;
-		}
-	} else {
-		prop = of_get_property(np, "tx-clock", NULL);
-		if (!prop) {
-			pr_err("missing tx-clock-name property\n");
-			return -EINVAL;
-		}
-		if ((*prop < QE_CLK_NONE) || (*prop > QE_CLK24)) {
-			pr_err("invalid tx-clock property\n");
-			return -EINVAL;
-		}
-		ug_info->uf_info.tx_clock = *prop;
-	}
+	err = ucc_geth_parse_clock(np, "rx", &ug_info->uf_info.rx_clock);
+	if (err)
+		goto err_free_info;
+	err = ucc_geth_parse_clock(np, "tx", &ug_info->uf_info.tx_clock);
+	if (err)
+		goto err_free_info;
 
 	err = of_address_to_resource(np, 0, &res);
 	if (err)
-		return -EINVAL;
+		goto err_free_info;
 
 	ug_info->uf_info.regs = res.start;
 	ug_info->uf_info.irq = irq_of_parse_and_map(np, 0);
@@ -3793,7 +3620,7 @@ static int ucc_geth_probe(struct platform_device* ofdev)
 		 */
 		err = of_phy_register_fixed_link(np);
 		if (err)
-			return err;
+			goto err_free_info;
 		ug_info->phy_node = of_node_get(np);
 	}
 
@@ -3924,6 +3751,8 @@ err_deregister_fixed_link:
 		of_phy_deregister_fixed_link(np);
 	of_node_put(ug_info->tbi_node);
 	of_node_put(ug_info->phy_node);
+err_free_info:
+	kfree(ug_info);
 
 	return err;
 }
@@ -3940,6 +3769,7 @@ static int ucc_geth_remove(struct platform_device* ofdev)
 		of_phy_deregister_fixed_link(np);
 	of_node_put(ugeth->ug_info->tbi_node);
 	of_node_put(ugeth->ug_info->phy_node);
+	kfree(ugeth->ug_info);
 	free_netdev(dev);
 
 	return 0;
@@ -3968,17 +3798,10 @@ static struct platform_driver ucc_geth_driver = {
 
 static int __init ucc_geth_init(void)
 {
-	int i, ret;
-
 	if (netif_msg_drv(&debug))
 		pr_info(DRV_DESC "\n");
-	for (i = 0; i < 8; i++)
-		memcpy(&(ugeth_info[i]), &ugeth_primary_info,
-		       sizeof(ugeth_primary_info));
-
-	ret = platform_driver_register(&ucc_geth_driver);
 
-	return ret;
+	return platform_driver_register(&ucc_geth_driver);
 }
 
 static void __exit ucc_geth_exit(void)
diff --git a/drivers/net/ethernet/freescale/ucc_geth.h b/drivers/net/ethernet/freescale/ucc_geth.h
index 11d4bf5dc21f..4294ed096ebb 100644
--- a/drivers/net/ethernet/freescale/ucc_geth.h
+++ b/drivers/net/ethernet/freescale/ucc_geth.h
@@ -1076,8 +1076,6 @@ struct ucc_geth_tad_params {
 /* GETH protocol initialization structure */
 struct ucc_geth_info {
 	struct ucc_fast_info uf_info;
-	u8 numQueuesTx;
-	u8 numQueuesRx;
 	int ipCheckSumCheck;
 	int ipCheckSumGenerate;
 	int rxExtendedFiltering;
@@ -1165,9 +1163,7 @@ struct ucc_geth_private {
 	struct ucc_geth_exf_global_pram __iomem *p_exf_glbl_param;
 	u32 exf_glbl_param_offset;
 	struct ucc_geth_rx_global_pram __iomem *p_rx_glbl_pram;
-	u32 rx_glbl_pram_offset;
 	struct ucc_geth_tx_global_pram __iomem *p_tx_glbl_pram;
-	u32 tx_glbl_pram_offset;
 	struct ucc_geth_send_queue_mem_region __iomem *p_send_q_mem_reg;
 	u32 send_q_mem_reg_offset;
 	struct ucc_geth_thread_data_tx __iomem *p_thread_data_tx;
@@ -1185,9 +1181,7 @@ struct ucc_geth_private {
 	struct ucc_geth_rx_bd_queues_entry __iomem *p_rx_bd_qs_tbl;
 	u32 rx_bd_qs_tbl_offset;
 	u8 __iomem *p_tx_bd_ring[NUM_TX_QUEUES];
-	u32 tx_bd_ring_offset[NUM_TX_QUEUES];
 	u8 __iomem *p_rx_bd_ring[NUM_RX_QUEUES];
-	u32 rx_bd_ring_offset[NUM_RX_QUEUES];
 	u8 __iomem *confBd[NUM_TX_QUEUES];
 	u8 __iomem *txBd[NUM_TX_QUEUES];
 	u8 __iomem *rxBd[NUM_RX_QUEUES];
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
index 405e49033417..512080640cbc 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
@@ -1070,7 +1070,7 @@ static bool hns3_check_hw_tx_csum(struct sk_buff *skb)
 	 * HW checksum of the non-IP packets and GSO packets is handled at
 	 * different place in the following code
 	 */
-	if (skb->csum_not_inet || skb_is_gso(skb) ||
+	if (skb_csum_is_sctp(skb) || skb_is_gso(skb) ||
 	    !test_bit(HNS3_NIC_STATE_HW_TX_CSUM_ENABLE, &priv->state))
 		return false;
 
diff --git a/drivers/net/ethernet/marvell/Kconfig b/drivers/net/ethernet/marvell/Kconfig
index 41815b609569..7fe15a3286f4 100644
--- a/drivers/net/ethernet/marvell/Kconfig
+++ b/drivers/net/ethernet/marvell/Kconfig
@@ -94,7 +94,6 @@ config MVPP2
 
 config MVPP2_PTP
 	bool "Marvell Armada 8K Enable PTP support"
-	depends on NETWORK_PHY_TIMESTAMPING
 	depends on (PTP_1588_CLOCK = y && MVPP2 = y) || \
 		   (PTP_1588_CLOCK && MVPP2 = m)
 
diff --git a/drivers/net/ethernet/marvell/octeontx2/af/mbox.h b/drivers/net/ethernet/marvell/octeontx2/af/mbox.h
index f919283ddc34..89e93eb46ab7 100644
--- a/drivers/net/ethernet/marvell/octeontx2/af/mbox.h
+++ b/drivers/net/ethernet/marvell/octeontx2/af/mbox.h
@@ -717,6 +717,8 @@ struct nix_rss_flowkey_cfg {
 #define NIX_FLOW_KEY_TYPE_INNR_ETH_DMAC BIT(17)
 #define NIX_FLOW_KEY_TYPE_VLAN		BIT(20)
 #define NIX_FLOW_KEY_TYPE_IPV4_PROTO	BIT(21)
+#define NIX_FLOW_KEY_TYPE_AH		BIT(22)
+#define NIX_FLOW_KEY_TYPE_ESP		BIT(23)
 	u32	flowkey_cfg; /* Flowkey types selected */
 	u8	group;       /* RSS context or group */
 };
diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_devlink.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_devlink.c
index bc0e4113370e..10a98bcb7c54 100644
--- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_devlink.c
+++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_devlink.c
@@ -52,6 +52,650 @@ static bool rvu_common_request_irq(struct rvu *rvu, int offset,
 	return rvu->irq_allocated[offset];
 }
 
+static void rvu_nix_intr_work(struct work_struct *work)
+{
+	struct rvu_nix_health_reporters *rvu_nix_health_reporter;
+
+	rvu_nix_health_reporter = container_of(work, struct rvu_nix_health_reporters, intr_work);
+	devlink_health_report(rvu_nix_health_reporter->rvu_hw_nix_intr_reporter,
+			      "NIX_AF_RVU Error",
+			      rvu_nix_health_reporter->nix_event_ctx);
+}
+
+static irqreturn_t rvu_nix_af_rvu_intr_handler(int irq, void *rvu_irq)
+{
+	struct rvu_nix_event_ctx *nix_event_context;
+	struct rvu_devlink *rvu_dl = rvu_irq;
+	struct rvu *rvu;
+	int blkaddr;
+	u64 intr;
+
+	rvu = rvu_dl->rvu;
+	blkaddr = rvu_get_blkaddr(rvu, BLKTYPE_NIX, 0);
+	if (blkaddr < 0)
+		return IRQ_NONE;
+
+	nix_event_context = rvu_dl->rvu_nix_health_reporter->nix_event_ctx;
+	intr = rvu_read64(rvu, blkaddr, NIX_AF_RVU_INT);
+	nix_event_context->nix_af_rvu_int = intr;
+
+	/* Clear interrupts */
+	rvu_write64(rvu, blkaddr, NIX_AF_RVU_INT, intr);
+	rvu_write64(rvu, blkaddr, NIX_AF_RVU_INT_ENA_W1C, ~0ULL);
+	queue_work(rvu_dl->devlink_wq, &rvu_dl->rvu_nix_health_reporter->intr_work);
+
+	return IRQ_HANDLED;
+}
+
+static void rvu_nix_gen_work(struct work_struct *work)
+{
+	struct rvu_nix_health_reporters *rvu_nix_health_reporter;
+
+	rvu_nix_health_reporter = container_of(work, struct rvu_nix_health_reporters, gen_work);
+	devlink_health_report(rvu_nix_health_reporter->rvu_hw_nix_gen_reporter,
+			      "NIX_AF_GEN Error",
+			      rvu_nix_health_reporter->nix_event_ctx);
+}
+
+static irqreturn_t rvu_nix_af_rvu_gen_handler(int irq, void *rvu_irq)
+{
+	struct rvu_nix_event_ctx *nix_event_context;
+	struct rvu_devlink *rvu_dl = rvu_irq;
+	struct rvu *rvu;
+	int blkaddr;
+	u64 intr;
+
+	rvu = rvu_dl->rvu;
+	blkaddr = rvu_get_blkaddr(rvu, BLKTYPE_NIX, 0);
+	if (blkaddr < 0)
+		return IRQ_NONE;
+
+	nix_event_context = rvu_dl->rvu_nix_health_reporter->nix_event_ctx;
+	intr = rvu_read64(rvu, blkaddr, NIX_AF_GEN_INT);
+	nix_event_context->nix_af_rvu_gen = intr;
+
+	/* Clear interrupts */
+	rvu_write64(rvu, blkaddr, NIX_AF_GEN_INT, intr);
+	rvu_write64(rvu, blkaddr, NIX_AF_GEN_INT_ENA_W1C, ~0ULL);
+	queue_work(rvu_dl->devlink_wq, &rvu_dl->rvu_nix_health_reporter->gen_work);
+
+	return IRQ_HANDLED;
+}
+
+static void rvu_nix_err_work(struct work_struct *work)
+{
+	struct rvu_nix_health_reporters *rvu_nix_health_reporter;
+
+	rvu_nix_health_reporter = container_of(work, struct rvu_nix_health_reporters, err_work);
+	devlink_health_report(rvu_nix_health_reporter->rvu_hw_nix_err_reporter,
+			      "NIX_AF_ERR Error",
+			      rvu_nix_health_reporter->nix_event_ctx);
+}
+
+static irqreturn_t rvu_nix_af_rvu_err_handler(int irq, void *rvu_irq)
+{
+	struct rvu_nix_event_ctx *nix_event_context;
+	struct rvu_devlink *rvu_dl = rvu_irq;
+	struct rvu *rvu;
+	int blkaddr;
+	u64 intr;
+
+	rvu = rvu_dl->rvu;
+	blkaddr = rvu_get_blkaddr(rvu, BLKTYPE_NIX, 0);
+	if (blkaddr < 0)
+		return IRQ_NONE;
+
+	nix_event_context = rvu_dl->rvu_nix_health_reporter->nix_event_ctx;
+	intr = rvu_read64(rvu, blkaddr, NIX_AF_ERR_INT);
+	nix_event_context->nix_af_rvu_err = intr;
+
+	/* Clear interrupts */
+	rvu_write64(rvu, blkaddr, NIX_AF_ERR_INT, intr);
+	rvu_write64(rvu, blkaddr, NIX_AF_ERR_INT_ENA_W1C, ~0ULL);
+	queue_work(rvu_dl->devlink_wq, &rvu_dl->rvu_nix_health_reporter->err_work);
+
+	return IRQ_HANDLED;
+}
+
+static void rvu_nix_ras_work(struct work_struct *work)
+{
+	struct rvu_nix_health_reporters *rvu_nix_health_reporter;
+
+	rvu_nix_health_reporter = container_of(work, struct rvu_nix_health_reporters, ras_work);
+	devlink_health_report(rvu_nix_health_reporter->rvu_hw_nix_ras_reporter,
+			      "NIX_AF_RAS Error",
+			      rvu_nix_health_reporter->nix_event_ctx);
+}
+
+static irqreturn_t rvu_nix_af_rvu_ras_handler(int irq, void *rvu_irq)
+{
+	struct rvu_nix_event_ctx *nix_event_context;
+	struct rvu_devlink *rvu_dl = rvu_irq;
+	struct rvu *rvu;
+	int blkaddr;
+	u64 intr;
+
+	rvu = rvu_dl->rvu;
+	blkaddr = rvu_get_blkaddr(rvu, BLKTYPE_NIX, 0);
+	if (blkaddr < 0)
+		return IRQ_NONE;
+
+	nix_event_context = rvu_dl->rvu_nix_health_reporter->nix_event_ctx;
+	intr = rvu_read64(rvu, blkaddr, NIX_AF_ERR_INT);
+	nix_event_context->nix_af_rvu_ras = intr;
+
+	/* Clear interrupts */
+	rvu_write64(rvu, blkaddr, NIX_AF_RAS, intr);
+	rvu_write64(rvu, blkaddr, NIX_AF_RAS_ENA_W1C, ~0ULL);
+	queue_work(rvu_dl->devlink_wq, &rvu_dl->rvu_nix_health_reporter->ras_work);
+
+	return IRQ_HANDLED;
+}
+
+static void rvu_nix_unregister_interrupts(struct rvu *rvu)
+{
+	struct rvu_devlink *rvu_dl = rvu->rvu_dl;
+	int offs, i, blkaddr;
+
+	blkaddr = rvu_get_blkaddr(rvu, BLKTYPE_NIX, 0);
+	if (blkaddr < 0)
+		return;
+
+	offs = rvu_read64(rvu, blkaddr, NIX_PRIV_AF_INT_CFG) & 0x3ff;
+	if (!offs)
+		return;
+
+	rvu_write64(rvu, blkaddr, NIX_AF_RVU_INT_ENA_W1C, ~0ULL);
+	rvu_write64(rvu, blkaddr, NIX_AF_GEN_INT_ENA_W1C, ~0ULL);
+	rvu_write64(rvu, blkaddr, NIX_AF_ERR_INT_ENA_W1C, ~0ULL);
+	rvu_write64(rvu, blkaddr, NIX_AF_RAS_ENA_W1C, ~0ULL);
+
+	if (rvu->irq_allocated[offs + NIX_AF_INT_VEC_RVU]) {
+		free_irq(pci_irq_vector(rvu->pdev, offs + NIX_AF_INT_VEC_RVU),
+			 rvu_dl);
+		rvu->irq_allocated[offs + NIX_AF_INT_VEC_RVU] = false;
+	}
+
+	for (i = NIX_AF_INT_VEC_AF_ERR; i < NIX_AF_INT_VEC_CNT; i++)
+		if (rvu->irq_allocated[offs + i]) {
+			free_irq(pci_irq_vector(rvu->pdev, offs + i), rvu_dl);
+			rvu->irq_allocated[offs + i] = false;
+		}
+}
+
+static int rvu_nix_register_interrupts(struct rvu *rvu)
+{
+	int blkaddr, base;
+	bool rc;
+
+	blkaddr = rvu_get_blkaddr(rvu, BLKTYPE_NIX, 0);
+	if (blkaddr < 0)
+		return blkaddr;
+
+	/* Get NIX AF MSIX vectors offset. */
+	base = rvu_read64(rvu, blkaddr, NIX_PRIV_AF_INT_CFG) & 0x3ff;
+	if (!base) {
+		dev_warn(rvu->dev,
+			 "Failed to get NIX%d NIX_AF_INT vector offsets\n",
+			 blkaddr - BLKADDR_NIX0);
+		return 0;
+	}
+	/* Register and enable NIX_AF_RVU_INT interrupt */
+	rc = rvu_common_request_irq(rvu, base +  NIX_AF_INT_VEC_RVU,
+				    "NIX_AF_RVU_INT",
+				    rvu_nix_af_rvu_intr_handler);
+	if (!rc)
+		goto err;
+	rvu_write64(rvu, blkaddr, NIX_AF_RVU_INT_ENA_W1S, ~0ULL);
+
+	/* Register and enable NIX_AF_GEN_INT interrupt */
+	rc = rvu_common_request_irq(rvu, base +  NIX_AF_INT_VEC_GEN,
+				    "NIX_AF_GEN_INT",
+				    rvu_nix_af_rvu_gen_handler);
+	if (!rc)
+		goto err;
+	rvu_write64(rvu, blkaddr, NIX_AF_GEN_INT_ENA_W1S, ~0ULL);
+
+	/* Register and enable NIX_AF_ERR_INT interrupt */
+	rc = rvu_common_request_irq(rvu, base + NIX_AF_INT_VEC_AF_ERR,
+				    "NIX_AF_ERR_INT",
+				    rvu_nix_af_rvu_err_handler);
+	if (!rc)
+		goto err;
+	rvu_write64(rvu, blkaddr, NIX_AF_ERR_INT_ENA_W1S, ~0ULL);
+
+	/* Register and enable NIX_AF_RAS interrupt */
+	rc = rvu_common_request_irq(rvu, base + NIX_AF_INT_VEC_POISON,
+				    "NIX_AF_RAS",
+				    rvu_nix_af_rvu_ras_handler);
+	if (!rc)
+		goto err;
+	rvu_write64(rvu, blkaddr, NIX_AF_RAS_ENA_W1S, ~0ULL);
+
+	return 0;
+err:
+	rvu_nix_unregister_interrupts(rvu);
+	return rc;
+}
+
+static int rvu_nix_report_show(struct devlink_fmsg *fmsg, void *ctx,
+			       enum nix_af_rvu_health health_reporter)
+{
+	struct rvu_nix_event_ctx *nix_event_context;
+	u64 intr_val;
+	int err;
+
+	nix_event_context = ctx;
+	switch (health_reporter) {
+	case NIX_AF_RVU_INTR:
+		intr_val = nix_event_context->nix_af_rvu_int;
+		err = rvu_report_pair_start(fmsg, "NIX_AF_RVU");
+		if (err)
+			return err;
+		err = devlink_fmsg_u64_pair_put(fmsg, "\tNIX RVU Interrupt Reg ",
+						nix_event_context->nix_af_rvu_int);
+		if (err)
+			return err;
+		if (intr_val & BIT_ULL(0)) {
+			err = devlink_fmsg_string_put(fmsg, "\n\tUnmap Slot Error");
+			if (err)
+				return err;
+		}
+		err = rvu_report_pair_end(fmsg);
+		if (err)
+			return err;
+		break;
+	case NIX_AF_RVU_GEN:
+		intr_val = nix_event_context->nix_af_rvu_gen;
+		err = rvu_report_pair_start(fmsg, "NIX_AF_GENERAL");
+		if (err)
+			return err;
+		err = devlink_fmsg_u64_pair_put(fmsg, "\tNIX General Interrupt Reg ",
+						nix_event_context->nix_af_rvu_gen);
+		if (err)
+			return err;
+		if (intr_val & BIT_ULL(0)) {
+			err = devlink_fmsg_string_put(fmsg, "\n\tRx multicast pkt drop");
+			if (err)
+				return err;
+		}
+		if (intr_val & BIT_ULL(1)) {
+			err = devlink_fmsg_string_put(fmsg, "\n\tRx mirror pkt drop");
+			if (err)
+				return err;
+		}
+		if (intr_val & BIT_ULL(4)) {
+			err = devlink_fmsg_string_put(fmsg, "\n\tSMQ flush done");
+			if (err)
+				return err;
+		}
+		err = rvu_report_pair_end(fmsg);
+		if (err)
+			return err;
+		break;
+	case NIX_AF_RVU_ERR:
+		intr_val = nix_event_context->nix_af_rvu_err;
+		err = rvu_report_pair_start(fmsg, "NIX_AF_ERR");
+		if (err)
+			return err;
+		err = devlink_fmsg_u64_pair_put(fmsg, "\tNIX Error Interrupt Reg ",
+						nix_event_context->nix_af_rvu_err);
+		if (err)
+			return err;
+		if (intr_val & BIT_ULL(14)) {
+			err = devlink_fmsg_string_put(fmsg, "\n\tFault on NIX_AQ_INST_S read");
+			if (err)
+				return err;
+		}
+		if (intr_val & BIT_ULL(13)) {
+			err = devlink_fmsg_string_put(fmsg, "\n\tFault on NIX_AQ_RES_S write");
+			if (err)
+				return err;
+		}
+		if (intr_val & BIT_ULL(12)) {
+			err = devlink_fmsg_string_put(fmsg, "\n\tAQ Doorbell Error");
+			if (err)
+				return err;
+		}
+		if (intr_val & BIT_ULL(6)) {
+			err = devlink_fmsg_string_put(fmsg, "\n\tRx on unmapped PF_FUNC");
+			if (err)
+				return err;
+		}
+		if (intr_val & BIT_ULL(5)) {
+			err = devlink_fmsg_string_put(fmsg, "\n\tRx multicast replication error");
+			if (err)
+				return err;
+		}
+		if (intr_val & BIT_ULL(4)) {
+			err = devlink_fmsg_string_put(fmsg, "\n\tFault on NIX_RX_MCE_S read");
+			if (err)
+				return err;
+		}
+		if (intr_val & BIT_ULL(3)) {
+			err = devlink_fmsg_string_put(fmsg, "\n\tFault on multicast WQE read");
+			if (err)
+				return err;
+		}
+		if (intr_val & BIT_ULL(2)) {
+			err = devlink_fmsg_string_put(fmsg, "\n\tFault on mirror WQE read");
+			if (err)
+				return err;
+		}
+		if (intr_val & BIT_ULL(1)) {
+			err = devlink_fmsg_string_put(fmsg, "\n\tFault on mirror pkt write");
+			if (err)
+				return err;
+		}
+		if (intr_val & BIT_ULL(0)) {
+			err = devlink_fmsg_string_put(fmsg, "\n\tFault on multicast pkt write");
+			if (err)
+				return err;
+		}
+		err = rvu_report_pair_end(fmsg);
+		if (err)
+			return err;
+		break;
+	case NIX_AF_RVU_RAS:
+		intr_val = nix_event_context->nix_af_rvu_err;
+		err = rvu_report_pair_start(fmsg, "NIX_AF_RAS");
+		if (err)
+			return err;
+		err = devlink_fmsg_u64_pair_put(fmsg, "\tNIX RAS Interrupt Reg ",
+						nix_event_context->nix_af_rvu_err);
+		if (err)
+			return err;
+		err = devlink_fmsg_string_put(fmsg, "\n\tPoison Data on:");
+		if (err)
+			return err;
+		if (intr_val & BIT_ULL(34)) {
+			err = devlink_fmsg_string_put(fmsg, "\n\tNIX_AQ_INST_S");
+			if (err)
+				return err;
+		}
+		if (intr_val & BIT_ULL(33)) {
+			err = devlink_fmsg_string_put(fmsg, "\n\tNIX_AQ_RES_S");
+			if (err)
+				return err;
+		}
+		if (intr_val & BIT_ULL(32)) {
+			err = devlink_fmsg_string_put(fmsg, "\n\tHW ctx");
+			if (err)
+				return err;
+		}
+		if (intr_val & BIT_ULL(4)) {
+			err = devlink_fmsg_string_put(fmsg, "\n\tPacket from mirror buffer");
+			if (err)
+				return err;
+		}
+		if (intr_val & BIT_ULL(3)) {
+			err = devlink_fmsg_string_put(fmsg, "\n\tPacket from multicast buffer");
+
+			if (err)
+				return err;
+		}
+		if (intr_val & BIT_ULL(2)) {
+			err = devlink_fmsg_string_put(fmsg, "\n\tWQE read from mirror buffer");
+			if (err)
+				return err;
+		}
+		if (intr_val & BIT_ULL(1)) {
+			err = devlink_fmsg_string_put(fmsg, "\n\tWQE read from multicast buffer");
+			if (err)
+				return err;
+		}
+		if (intr_val & BIT_ULL(0)) {
+			err = devlink_fmsg_string_put(fmsg, "\n\tNIX_RX_MCE_S read");
+			if (err)
+				return err;
+		}
+		err = rvu_report_pair_end(fmsg);
+		if (err)
+			return err;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static int rvu_hw_nix_intr_dump(struct devlink_health_reporter *reporter,
+				struct devlink_fmsg *fmsg, void *ctx,
+				struct netlink_ext_ack *netlink_extack)
+{
+	struct rvu *rvu = devlink_health_reporter_priv(reporter);
+	struct rvu_devlink *rvu_dl = rvu->rvu_dl;
+	struct rvu_nix_event_ctx *nix_ctx;
+
+	nix_ctx = rvu_dl->rvu_nix_health_reporter->nix_event_ctx;
+
+	return ctx ? rvu_nix_report_show(fmsg, ctx, NIX_AF_RVU_INTR) :
+		     rvu_nix_report_show(fmsg, nix_ctx, NIX_AF_RVU_INTR);
+}
+
+static int rvu_hw_nix_intr_recover(struct devlink_health_reporter *reporter,
+				   void *ctx, struct netlink_ext_ack *netlink_extack)
+{
+	struct rvu *rvu = devlink_health_reporter_priv(reporter);
+	struct rvu_nix_event_ctx *nix_event_ctx = ctx;
+	int blkaddr;
+
+	blkaddr = rvu_get_blkaddr(rvu, BLKTYPE_NIX, 0);
+	if (blkaddr < 0)
+		return blkaddr;
+
+	if (nix_event_ctx->nix_af_rvu_int)
+		rvu_write64(rvu, blkaddr, NIX_AF_RVU_INT_ENA_W1S, ~0ULL);
+
+	return 0;
+}
+
+static int rvu_hw_nix_gen_dump(struct devlink_health_reporter *reporter,
+			       struct devlink_fmsg *fmsg, void *ctx,
+			       struct netlink_ext_ack *netlink_extack)
+{
+	struct rvu *rvu = devlink_health_reporter_priv(reporter);
+	struct rvu_devlink *rvu_dl = rvu->rvu_dl;
+	struct rvu_nix_event_ctx *nix_ctx;
+
+	nix_ctx = rvu_dl->rvu_nix_health_reporter->nix_event_ctx;
+
+	return ctx ? rvu_nix_report_show(fmsg, ctx, NIX_AF_RVU_GEN) :
+		     rvu_nix_report_show(fmsg, nix_ctx, NIX_AF_RVU_GEN);
+}
+
+static int rvu_hw_nix_gen_recover(struct devlink_health_reporter *reporter,
+				  void *ctx, struct netlink_ext_ack *netlink_extack)
+{
+	struct rvu *rvu = devlink_health_reporter_priv(reporter);
+	struct rvu_nix_event_ctx *nix_event_ctx = ctx;
+	int blkaddr;
+
+	blkaddr = rvu_get_blkaddr(rvu, BLKTYPE_NIX, 0);
+	if (blkaddr < 0)
+		return blkaddr;
+
+	if (nix_event_ctx->nix_af_rvu_gen)
+		rvu_write64(rvu, blkaddr, NIX_AF_GEN_INT_ENA_W1S, ~0ULL);
+
+	return 0;
+}
+
+static int rvu_hw_nix_err_dump(struct devlink_health_reporter *reporter,
+			       struct devlink_fmsg *fmsg, void *ctx,
+			       struct netlink_ext_ack *netlink_extack)
+{
+	struct rvu *rvu = devlink_health_reporter_priv(reporter);
+	struct rvu_devlink *rvu_dl = rvu->rvu_dl;
+	struct rvu_nix_event_ctx *nix_ctx;
+
+	nix_ctx = rvu_dl->rvu_nix_health_reporter->nix_event_ctx;
+
+	return ctx ? rvu_nix_report_show(fmsg, ctx, NIX_AF_RVU_ERR) :
+		     rvu_nix_report_show(fmsg, nix_ctx, NIX_AF_RVU_ERR);
+}
+
+static int rvu_hw_nix_err_recover(struct devlink_health_reporter *reporter,
+				  void *ctx, struct netlink_ext_ack *netlink_extack)
+{
+	struct rvu *rvu = devlink_health_reporter_priv(reporter);
+	struct rvu_nix_event_ctx *nix_event_ctx = ctx;
+	int blkaddr;
+
+	blkaddr = rvu_get_blkaddr(rvu, BLKTYPE_NIX, 0);
+	if (blkaddr < 0)
+		return blkaddr;
+
+	if (nix_event_ctx->nix_af_rvu_err)
+		rvu_write64(rvu, blkaddr, NIX_AF_ERR_INT_ENA_W1S, ~0ULL);
+
+	return 0;
+}
+
+static int rvu_hw_nix_ras_dump(struct devlink_health_reporter *reporter,
+			       struct devlink_fmsg *fmsg, void *ctx,
+			       struct netlink_ext_ack *netlink_extack)
+{
+	struct rvu *rvu = devlink_health_reporter_priv(reporter);
+	struct rvu_devlink *rvu_dl = rvu->rvu_dl;
+	struct rvu_nix_event_ctx *nix_ctx;
+
+	nix_ctx = rvu_dl->rvu_nix_health_reporter->nix_event_ctx;
+
+	return ctx ? rvu_nix_report_show(fmsg, ctx, NIX_AF_RVU_RAS) :
+		     rvu_nix_report_show(fmsg, nix_ctx, NIX_AF_RVU_RAS);
+}
+
+static int rvu_hw_nix_ras_recover(struct devlink_health_reporter *reporter,
+				  void *ctx, struct netlink_ext_ack *netlink_extack)
+{
+	struct rvu *rvu = devlink_health_reporter_priv(reporter);
+	struct rvu_nix_event_ctx *nix_event_ctx = ctx;
+	int blkaddr;
+
+	blkaddr = rvu_get_blkaddr(rvu, BLKTYPE_NIX, 0);
+	if (blkaddr < 0)
+		return blkaddr;
+
+	if (nix_event_ctx->nix_af_rvu_int)
+		rvu_write64(rvu, blkaddr, NIX_AF_RAS_ENA_W1S, ~0ULL);
+
+	return 0;
+}
+
+RVU_REPORTERS(hw_nix_intr);
+RVU_REPORTERS(hw_nix_gen);
+RVU_REPORTERS(hw_nix_err);
+RVU_REPORTERS(hw_nix_ras);
+
+static void rvu_nix_health_reporters_destroy(struct rvu_devlink *rvu_dl);
+
+static int rvu_nix_register_reporters(struct rvu_devlink *rvu_dl)
+{
+	struct rvu_nix_health_reporters *rvu_reporters;
+	struct rvu_nix_event_ctx *nix_event_context;
+	struct rvu *rvu = rvu_dl->rvu;
+
+	rvu_reporters = kzalloc(sizeof(*rvu_reporters), GFP_KERNEL);
+	if (!rvu_reporters)
+		return -ENOMEM;
+
+	rvu_dl->rvu_nix_health_reporter = rvu_reporters;
+	nix_event_context = kzalloc(sizeof(*nix_event_context), GFP_KERNEL);
+	if (!nix_event_context)
+		return -ENOMEM;
+
+	rvu_reporters->nix_event_ctx = nix_event_context;
+	rvu_reporters->rvu_hw_nix_intr_reporter =
+		devlink_health_reporter_create(rvu_dl->dl, &rvu_hw_nix_intr_reporter_ops, 0, rvu);
+	if (IS_ERR(rvu_reporters->rvu_hw_nix_intr_reporter)) {
+		dev_warn(rvu->dev, "Failed to create hw_nix_intr reporter, err=%ld\n",
+			 PTR_ERR(rvu_reporters->rvu_hw_nix_intr_reporter));
+		return PTR_ERR(rvu_reporters->rvu_hw_nix_intr_reporter);
+	}
+
+	rvu_reporters->rvu_hw_nix_gen_reporter =
+		devlink_health_reporter_create(rvu_dl->dl, &rvu_hw_nix_gen_reporter_ops, 0, rvu);
+	if (IS_ERR(rvu_reporters->rvu_hw_nix_gen_reporter)) {
+		dev_warn(rvu->dev, "Failed to create hw_nix_gen reporter, err=%ld\n",
+			 PTR_ERR(rvu_reporters->rvu_hw_nix_gen_reporter));
+		return PTR_ERR(rvu_reporters->rvu_hw_nix_gen_reporter);
+	}
+
+	rvu_reporters->rvu_hw_nix_err_reporter =
+		devlink_health_reporter_create(rvu_dl->dl, &rvu_hw_nix_err_reporter_ops, 0, rvu);
+	if (IS_ERR(rvu_reporters->rvu_hw_nix_err_reporter)) {
+		dev_warn(rvu->dev, "Failed to create hw_nix_err reporter, err=%ld\n",
+			 PTR_ERR(rvu_reporters->rvu_hw_nix_err_reporter));
+		return PTR_ERR(rvu_reporters->rvu_hw_nix_err_reporter);
+	}
+
+	rvu_reporters->rvu_hw_nix_ras_reporter =
+		devlink_health_reporter_create(rvu_dl->dl, &rvu_hw_nix_ras_reporter_ops, 0, rvu);
+	if (IS_ERR(rvu_reporters->rvu_hw_nix_ras_reporter)) {
+		dev_warn(rvu->dev, "Failed to create hw_nix_ras reporter, err=%ld\n",
+			 PTR_ERR(rvu_reporters->rvu_hw_nix_ras_reporter));
+		return PTR_ERR(rvu_reporters->rvu_hw_nix_ras_reporter);
+	}
+
+	rvu_dl->devlink_wq = create_workqueue("rvu_devlink_wq");
+	if (!rvu_dl->devlink_wq)
+		goto err;
+
+	INIT_WORK(&rvu_reporters->intr_work, rvu_nix_intr_work);
+	INIT_WORK(&rvu_reporters->gen_work, rvu_nix_gen_work);
+	INIT_WORK(&rvu_reporters->err_work, rvu_nix_err_work);
+	INIT_WORK(&rvu_reporters->ras_work, rvu_nix_ras_work);
+
+	return 0;
+err:
+	rvu_nix_health_reporters_destroy(rvu_dl);
+	return -ENOMEM;
+}
+
+static int rvu_nix_health_reporters_create(struct rvu_devlink *rvu_dl)
+{
+	struct rvu *rvu = rvu_dl->rvu;
+	int err;
+
+	err = rvu_nix_register_reporters(rvu_dl);
+	if (err) {
+		dev_warn(rvu->dev, "Failed to create nix reporter, err =%d\n",
+			 err);
+		return err;
+	}
+	rvu_nix_register_interrupts(rvu);
+
+	return 0;
+}
+
+static void rvu_nix_health_reporters_destroy(struct rvu_devlink *rvu_dl)
+{
+	struct rvu_nix_health_reporters *nix_reporters;
+	struct rvu *rvu = rvu_dl->rvu;
+
+	nix_reporters = rvu_dl->rvu_nix_health_reporter;
+
+	if (!nix_reporters->rvu_hw_nix_ras_reporter)
+		return;
+	if (!IS_ERR_OR_NULL(nix_reporters->rvu_hw_nix_intr_reporter))
+		devlink_health_reporter_destroy(nix_reporters->rvu_hw_nix_intr_reporter);
+
+	if (!IS_ERR_OR_NULL(nix_reporters->rvu_hw_nix_gen_reporter))
+		devlink_health_reporter_destroy(nix_reporters->rvu_hw_nix_gen_reporter);
+
+	if (!IS_ERR_OR_NULL(nix_reporters->rvu_hw_nix_err_reporter))
+		devlink_health_reporter_destroy(nix_reporters->rvu_hw_nix_err_reporter);
+
+	if (!IS_ERR_OR_NULL(nix_reporters->rvu_hw_nix_ras_reporter))
+		devlink_health_reporter_destroy(nix_reporters->rvu_hw_nix_ras_reporter);
+
+	rvu_nix_unregister_interrupts(rvu);
+	kfree(rvu_dl->rvu_nix_health_reporter->nix_event_ctx);
+	kfree(rvu_dl->rvu_nix_health_reporter);
+}
+
 static void rvu_npa_intr_work(struct work_struct *work)
 {
 	struct rvu_npa_health_reporters *rvu_npa_health_reporter;
@@ -698,9 +1342,14 @@ static void rvu_npa_health_reporters_destroy(struct rvu_devlink *rvu_dl)
 static int rvu_health_reporters_create(struct rvu *rvu)
 {
 	struct rvu_devlink *rvu_dl;
+	int err;
 
 	rvu_dl = rvu->rvu_dl;
-	return rvu_npa_health_reporters_create(rvu_dl);
+	err = rvu_npa_health_reporters_create(rvu_dl);
+	if (err)
+		return err;
+
+	return rvu_nix_health_reporters_create(rvu_dl);
 }
 
 static void rvu_health_reporters_destroy(struct rvu *rvu)
@@ -712,6 +1361,7 @@ static void rvu_health_reporters_destroy(struct rvu *rvu)
 
 	rvu_dl = rvu->rvu_dl;
 	rvu_npa_health_reporters_destroy(rvu_dl);
+	rvu_nix_health_reporters_destroy(rvu_dl);
 }
 
 static int rvu_devlink_info_get(struct devlink *devlink, struct devlink_info_req *req,
diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_devlink.h b/drivers/net/ethernet/marvell/octeontx2/af/rvu_devlink.h
index d7578fa92ac1..471e57dedb20 100644
--- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_devlink.h
+++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_devlink.h
@@ -41,11 +41,38 @@ struct rvu_npa_health_reporters {
 	struct work_struct              ras_work;
 };
 
+enum nix_af_rvu_health {
+	NIX_AF_RVU_INTR,
+	NIX_AF_RVU_GEN,
+	NIX_AF_RVU_ERR,
+	NIX_AF_RVU_RAS,
+};
+
+struct rvu_nix_event_ctx {
+	u64 nix_af_rvu_int;
+	u64 nix_af_rvu_gen;
+	u64 nix_af_rvu_err;
+	u64 nix_af_rvu_ras;
+};
+
+struct rvu_nix_health_reporters {
+	struct rvu_nix_event_ctx *nix_event_ctx;
+	struct devlink_health_reporter *rvu_hw_nix_intr_reporter;
+	struct work_struct		intr_work;
+	struct devlink_health_reporter *rvu_hw_nix_gen_reporter;
+	struct work_struct		gen_work;
+	struct devlink_health_reporter *rvu_hw_nix_err_reporter;
+	struct work_struct		err_work;
+	struct devlink_health_reporter *rvu_hw_nix_ras_reporter;
+	struct work_struct		ras_work;
+};
+
 struct rvu_devlink {
 	struct devlink *dl;
 	struct rvu *rvu;
 	struct workqueue_struct *devlink_wq;
 	struct rvu_npa_health_reporters *rvu_npa_health_reporter;
+	struct rvu_nix_health_reporters *rvu_nix_health_reporter;
 };
 
 /* Devlink APIs */
diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c
index a8dfbb6d1774..b54753ef7d94 100644
--- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c
+++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c
@@ -2580,6 +2580,7 @@ static int set_flowkey_fields(struct nix_rx_flowkey_alg *alg, u32 flow_cfg)
 	struct nix_rx_flowkey_alg *field;
 	struct nix_rx_flowkey_alg tmp;
 	u32 key_type, valid_key;
+	int l4_key_offset;
 
 	if (!alg)
 		return -EINVAL;
@@ -2712,6 +2713,12 @@ static int set_flowkey_fields(struct nix_rx_flowkey_alg *alg, u32 flow_cfg)
 				field_marker = false;
 				keyoff_marker = false;
 			}
+
+			/* TCP/UDP/SCTP and ESP/AH falls at same offset so
+			 * remember the TCP key offset of 40 byte hash key.
+			 */
+			if (key_type == NIX_FLOW_KEY_TYPE_TCP)
+				l4_key_offset = key_off;
 			break;
 		case NIX_FLOW_KEY_TYPE_NVGRE:
 			field->lid = NPC_LID_LD;
@@ -2783,11 +2790,31 @@ static int set_flowkey_fields(struct nix_rx_flowkey_alg *alg, u32 flow_cfg)
 			field->ltype_mask = 0xF;
 			field->fn_mask = 1; /* Mask out the first nibble */
 			break;
+		case NIX_FLOW_KEY_TYPE_AH:
+		case NIX_FLOW_KEY_TYPE_ESP:
+			field->hdr_offset = 0;
+			field->bytesm1 = 7; /* SPI + sequence number */
+			field->ltype_mask = 0xF;
+			field->lid = NPC_LID_LE;
+			field->ltype_match = NPC_LT_LE_ESP;
+			if (key_type == NIX_FLOW_KEY_TYPE_AH) {
+				field->lid = NPC_LID_LD;
+				field->ltype_match = NPC_LT_LD_AH;
+				field->hdr_offset = 4;
+				keyoff_marker = false;
+			}
+			break;
 		}
 		field->ena = 1;
 
 		/* Found a valid flow key type */
 		if (valid_key) {
+			/* Use the key offset of TCP/UDP/SCTP fields
+			 * for ESP/AH fields.
+			 */
+			if (key_type == NIX_FLOW_KEY_TYPE_ESP ||
+			    key_type == NIX_FLOW_KEY_TYPE_AH)
+				key_off = l4_key_offset;
 			field->key_offset = key_off;
 			memcpy(&alg[nr_field], field, sizeof(*field));
 			max_key_off = max(max_key_off, field->bytesm1 + 1);
diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_struct.h b/drivers/net/ethernet/marvell/octeontx2/af/rvu_struct.h
index e2153d47c373..5e15f4fc11e3 100644
--- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_struct.h
+++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_struct.h
@@ -74,6 +74,16 @@ enum npa_af_int_vec_e {
 	NPA_AF_INT_VEC_CNT	= 0x5,
 };
 
+/* NIX Admin function Interrupt Vector Enumeration */
+enum nix_af_int_vec_e {
+	NIX_AF_INT_VEC_RVU	= 0x0,
+	NIX_AF_INT_VEC_GEN	= 0x1,
+	NIX_AF_INT_VEC_AQ_DONE	= 0x2,
+	NIX_AF_INT_VEC_AF_ERR	= 0x3,
+	NIX_AF_INT_VEC_POISON	= 0x4,
+	NIX_AF_INT_VEC_CNT	= 0x5,
+};
+
 /**
  * RVU PF Interrupt Vector Enumeration
  */
diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_ethtool.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_ethtool.c
index aaba0454d188..e0199f0e4a6c 100644
--- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_ethtool.c
+++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_ethtool.c
@@ -448,10 +448,14 @@ static int otx2_get_rss_hash_opts(struct otx2_nic *pfvf,
 			nfc->data |= RXH_L4_B_0_1 | RXH_L4_B_2_3;
 		break;
 	case AH_ESP_V4_FLOW:
+	case AH_ESP_V6_FLOW:
+		if (rss->flowkey_cfg & NIX_FLOW_KEY_TYPE_ESP)
+			nfc->data |= RXH_L4_B_0_1 | RXH_L4_B_2_3;
+		break;
 	case AH_V4_FLOW:
 	case ESP_V4_FLOW:
 	case IPV4_FLOW:
-	case AH_ESP_V6_FLOW:
+		break;
 	case AH_V6_FLOW:
 	case ESP_V6_FLOW:
 	case IPV6_FLOW:
@@ -459,6 +463,7 @@ static int otx2_get_rss_hash_opts(struct otx2_nic *pfvf,
 	default:
 		return -EINVAL;
 	}
+
 	return 0;
 }
 
@@ -527,6 +532,36 @@ static int otx2_set_rss_hash_opts(struct otx2_nic *pfvf,
 			return -EINVAL;
 		}
 		break;
+	case AH_ESP_V4_FLOW:
+	case AH_ESP_V6_FLOW:
+		switch (nfc->data & rxh_l4) {
+		case 0:
+			rss_cfg &= ~(NIX_FLOW_KEY_TYPE_ESP |
+				     NIX_FLOW_KEY_TYPE_AH);
+			rss_cfg |= NIX_FLOW_KEY_TYPE_VLAN |
+				   NIX_FLOW_KEY_TYPE_IPV4_PROTO;
+			break;
+		case (RXH_L4_B_0_1 | RXH_L4_B_2_3):
+			/* If VLAN hashing is also requested for ESP then do not
+			 * allow because of hardware 40 bytes flow key limit.
+			 */
+			if (rss_cfg & NIX_FLOW_KEY_TYPE_VLAN) {
+				netdev_err(pfvf->netdev,
+					   "RSS hash of ESP or AH with VLAN is not supported\n");
+				return -EOPNOTSUPP;
+			}
+
+			rss_cfg |= NIX_FLOW_KEY_TYPE_ESP | NIX_FLOW_KEY_TYPE_AH;
+			/* Disable IPv4 proto hashing since IPv6 SA+DA(32 bytes)
+			 * and ESP SPI+sequence(8 bytes) uses hardware maximum
+			 * limit of 40 byte flow key.
+			 */
+			rss_cfg &= ~NIX_FLOW_KEY_TYPE_IPV4_PROTO;
+			break;
+		default:
+			return -EINVAL;
+		}
+		break;
 	case IPV4_FLOW:
 	case IPV6_FLOW:
 		rss_cfg = NIX_FLOW_KEY_TYPE_IPV4 | NIX_FLOW_KEY_TYPE_IPV6;
diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
index 6d2d60675ffd..01d3ee4b5829 100644
--- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c
+++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
@@ -353,7 +353,7 @@ static void mtk_mac_config(struct phylink_config *config, unsigned int mode,
 	/* Setup gmac */
 	mcr_cur = mtk_r32(mac->hw, MTK_MAC_MCR(mac->id));
 	mcr_new = mcr_cur;
-	mcr_new |= MAC_MCR_MAX_RX_1536 | MAC_MCR_IPG_CFG | MAC_MCR_FORCE_MODE |
+	mcr_new |= MAC_MCR_IPG_CFG | MAC_MCR_FORCE_MODE |
 		   MAC_MCR_BACKOFF_EN | MAC_MCR_BACKPR_EN | MAC_MCR_FORCE_LINK;
 
 	/* Only update control register when needed! */
@@ -759,8 +759,8 @@ static void mtk_get_stats64(struct net_device *dev,
 static inline int mtk_max_frag_size(int mtu)
 {
 	/* make sure buf_size will be at least MTK_MAX_RX_LENGTH */
-	if (mtu + MTK_RX_ETH_HLEN < MTK_MAX_RX_LENGTH)
-		mtu = MTK_MAX_RX_LENGTH - MTK_RX_ETH_HLEN;
+	if (mtu + MTK_RX_ETH_HLEN < MTK_MAX_RX_LENGTH_2K)
+		mtu = MTK_MAX_RX_LENGTH_2K - MTK_RX_ETH_HLEN;
 
 	return SKB_DATA_ALIGN(MTK_RX_HLEN + mtu) +
 		SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
@@ -771,7 +771,7 @@ static inline int mtk_max_buf_size(int frag_size)
 	int buf_size = frag_size - NET_SKB_PAD - NET_IP_ALIGN -
 		       SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
 
-	WARN_ON(buf_size < MTK_MAX_RX_LENGTH);
+	WARN_ON(buf_size < MTK_MAX_RX_LENGTH_2K);
 
 	return buf_size;
 }
@@ -2499,6 +2499,35 @@ static void mtk_uninit(struct net_device *dev)
 	mtk_rx_irq_disable(eth, ~0);
 }
 
+static int mtk_change_mtu(struct net_device *dev, int new_mtu)
+{
+	int length = new_mtu + MTK_RX_ETH_HLEN;
+	struct mtk_mac *mac = netdev_priv(dev);
+	struct mtk_eth *eth = mac->hw;
+	u32 mcr_cur, mcr_new;
+
+	if (!MTK_HAS_CAPS(eth->soc->caps, MTK_SOC_MT7628)) {
+		mcr_cur = mtk_r32(mac->hw, MTK_MAC_MCR(mac->id));
+		mcr_new = mcr_cur & ~MAC_MCR_MAX_RX_MASK;
+
+		if (length <= 1518)
+			mcr_new |= MAC_MCR_MAX_RX(MAC_MCR_MAX_RX_1518);
+		else if (length <= 1536)
+			mcr_new |= MAC_MCR_MAX_RX(MAC_MCR_MAX_RX_1536);
+		else if (length <= 1552)
+			mcr_new |= MAC_MCR_MAX_RX(MAC_MCR_MAX_RX_1552);
+		else
+			mcr_new |= MAC_MCR_MAX_RX(MAC_MCR_MAX_RX_2048);
+
+		if (mcr_new != mcr_cur)
+			mtk_w32(mac->hw, mcr_new, MTK_MAC_MCR(mac->id));
+	}
+
+	dev->mtu = new_mtu;
+
+	return 0;
+}
+
 static int mtk_do_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
 {
 	struct mtk_mac *mac = netdev_priv(dev);
@@ -2795,6 +2824,7 @@ static const struct net_device_ops mtk_netdev_ops = {
 	.ndo_set_mac_address	= mtk_set_mac_address,
 	.ndo_validate_addr	= eth_validate_addr,
 	.ndo_do_ioctl		= mtk_do_ioctl,
+	.ndo_change_mtu		= mtk_change_mtu,
 	.ndo_tx_timeout		= mtk_tx_timeout,
 	.ndo_get_stats64        = mtk_get_stats64,
 	.ndo_fix_features	= mtk_fix_features,
@@ -2896,7 +2926,10 @@ static int mtk_add_mac(struct mtk_eth *eth, struct device_node *np)
 	eth->netdev[id]->irq = eth->irq[0];
 	eth->netdev[id]->dev.of_node = np;
 
-	eth->netdev[id]->max_mtu = MTK_MAX_RX_LENGTH - MTK_RX_ETH_HLEN;
+	if (MTK_HAS_CAPS(eth->soc->caps, MTK_SOC_MT7628))
+		eth->netdev[id]->max_mtu = MTK_MAX_RX_LENGTH - MTK_RX_ETH_HLEN;
+	else
+		eth->netdev[id]->max_mtu = MTK_MAX_RX_LENGTH_2K - MTK_RX_ETH_HLEN;
 
 	return 0;
 
diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.h b/drivers/net/ethernet/mediatek/mtk_eth_soc.h
index 454cfcd465fd..fd3cec8f06ba 100644
--- a/drivers/net/ethernet/mediatek/mtk_eth_soc.h
+++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.h
@@ -17,12 +17,13 @@
 #include <linux/phylink.h>
 
 #define MTK_QDMA_PAGE_SIZE	2048
-#define	MTK_MAX_RX_LENGTH	1536
+#define MTK_MAX_RX_LENGTH	1536
+#define MTK_MAX_RX_LENGTH_2K	2048
 #define MTK_TX_DMA_BUF_LEN	0x3fff
 #define MTK_DMA_SIZE		256
 #define MTK_NAPI_WEIGHT		64
 #define MTK_MAC_COUNT		2
-#define MTK_RX_ETH_HLEN		(VLAN_ETH_HLEN + VLAN_HLEN + ETH_FCS_LEN)
+#define MTK_RX_ETH_HLEN		(ETH_HLEN + ETH_FCS_LEN)
 #define MTK_RX_HLEN		(NET_SKB_PAD + MTK_RX_ETH_HLEN + NET_IP_ALIGN)
 #define MTK_DMA_DUMMY_DESC	0xffffffff
 #define MTK_DEFAULT_MSG_ENABLE	(NETIF_MSG_DRV | \
@@ -320,7 +321,12 @@
 
 /* Mac control registers */
 #define MTK_MAC_MCR(x)		(0x10100 + (x * 0x100))
-#define MAC_MCR_MAX_RX_1536	BIT(24)
+#define MAC_MCR_MAX_RX_MASK	GENMASK(25, 24)
+#define MAC_MCR_MAX_RX(_x)	(MAC_MCR_MAX_RX_MASK & ((_x) << 24))
+#define MAC_MCR_MAX_RX_1518	0x0
+#define MAC_MCR_MAX_RX_1536	0x1
+#define MAC_MCR_MAX_RX_1552	0x2
+#define MAC_MCR_MAX_RX_2048	0x3
 #define MAC_MCR_IPG_CFG		(BIT(18) | BIT(16))
 #define MAC_MCR_FORCE_MODE	BIT(15)
 #define MAC_MCR_TX_EN		BIT(14)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Makefile b/drivers/net/ethernet/mellanox/mlx5/core/Makefile
index 134bd038ae8a..fcfc0b114985 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/Makefile
+++ b/drivers/net/ethernet/mellanox/mlx5/core/Makefile
@@ -16,7 +16,8 @@ mlx5_core-y :=	main.o cmd.o debugfs.o fw.o eq.o uar.o pagealloc.o \
 		transobj.o vport.o sriov.o fs_cmd.o fs_core.o pci_irq.o \
 		fs_counters.o rl.o lag.o dev.o events.o wq.o lib/gid.o \
 		lib/devcom.o lib/pci_vsc.o lib/dm.o diag/fs_tracepoint.o \
-		diag/fw_tracer.o diag/crdump.o devlink.o diag/rsc_dump.o fw_reset.o
+		diag/fw_tracer.o diag/crdump.o devlink.o diag/rsc_dump.o \
+		fw_reset.o qos.o
 
 #
 # Netdev basic
@@ -25,7 +26,8 @@ mlx5_core-$(CONFIG_MLX5_CORE_EN) += en_main.o en_common.o en_fs.o en_ethtool.o \
 		en_tx.o en_rx.o en_dim.o en_txrx.o en/xdp.o en_stats.o \
 		en_selftest.o en/port.o en/monitor_stats.o en/health.o \
 		en/reporter_tx.o en/reporter_rx.o en/params.o en/xsk/pool.o \
-		en/xsk/setup.o en/xsk/rx.o en/xsk/tx.o en/devlink.o en/ptp.o
+		en/xsk/setup.o en/xsk/rx.o en/xsk/tx.o en/devlink.o en/ptp.o \
+		en/qos.o
 
 #
 # Netdev extra
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h
index 055baf3b6cb1..26e578a973e5 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h
@@ -55,6 +55,7 @@
 #include "en_stats.h"
 #include "en/dcbnl.h"
 #include "en/fs.h"
+#include "en/qos.h"
 #include "lib/hv_vhca.h"
 
 extern const struct net_device_ops mlx5e_netdev_ops;
@@ -161,6 +162,9 @@ do {                                                            \
 			    ##__VA_ARGS__);                     \
 } while (0)
 
+#define mlx5e_state_dereference(priv, p) \
+	rcu_dereference_protected((p), lockdep_is_held(&(priv)->state_lock))
+
 enum mlx5e_rq_group {
 	MLX5E_RQ_GROUP_REGULAR,
 	MLX5E_RQ_GROUP_XSK,
@@ -663,11 +667,13 @@ struct mlx5e_channel {
 	struct mlx5e_xdpsq         rq_xdpsq;
 	struct mlx5e_txqsq         sq[MLX5E_MAX_NUM_TC];
 	struct mlx5e_icosq         icosq;   /* internal control operations */
+	struct mlx5e_txqsq __rcu * __rcu *qos_sqs;
 	bool                       xdp;
 	struct napi_struct         napi;
 	struct device             *pdev;
 	struct net_device         *netdev;
 	__be32                     mkey_be;
+	u16                        qos_sqs_size;
 	u8                         num_tc;
 	u8                         lag_port;
 
@@ -756,6 +762,8 @@ struct mlx5e_modify_sq_param {
 	int next_state;
 	int rl_update;
 	int rl_index;
+	bool qos_update;
+	u16 qos_queue_group_id;
 };
 
 #if IS_ENABLED(CONFIG_PCI_HYPERV_INTERFACE)
@@ -788,10 +796,20 @@ struct mlx5e_scratchpad {
 	cpumask_var_t cpumask;
 };
 
+struct mlx5e_htb {
+	DECLARE_HASHTABLE(qos_tc2node, order_base_2(MLX5E_QOS_MAX_LEAF_NODES));
+	DECLARE_BITMAP(qos_used_qids, MLX5E_QOS_MAX_LEAF_NODES);
+	struct mlx5e_sq_stats **qos_sq_stats;
+	u16 max_qos_sqs;
+	u16 maj_id;
+	u16 defcls;
+};
+
 struct mlx5e_priv {
 	/* priv data path fields - start */
 	/* +1 for port ptp ts */
-	struct mlx5e_txqsq *txq2sq[(MLX5E_MAX_NUM_CHANNELS + 1) * MLX5E_MAX_NUM_TC];
+	struct mlx5e_txqsq *txq2sq[(MLX5E_MAX_NUM_CHANNELS + 1) * MLX5E_MAX_NUM_TC +
+				   MLX5E_QOS_MAX_LEAF_NODES];
 	int channel_tc2realtxq[MLX5E_MAX_NUM_CHANNELS][MLX5E_MAX_NUM_TC];
 	int port_ptp_tc2realtxq[MLX5E_MAX_NUM_TC];
 #ifdef CONFIG_MLX5_CORE_EN_DCB
@@ -859,6 +877,7 @@ struct mlx5e_priv {
 	struct mlx5e_hv_vhca_stats_agent stats_agent;
 #endif
 	struct mlx5e_scratchpad    scratchpad;
+	struct mlx5e_htb           htb;
 };
 
 struct mlx5e_rx_handlers {
@@ -986,6 +1005,7 @@ int mlx5e_safe_switch_channels(struct mlx5e_priv *priv,
 			       struct mlx5e_channels *new_chs,
 			       mlx5e_fp_preactivate preactivate,
 			       void *context);
+int mlx5e_update_tx_netdev_queues(struct mlx5e_priv *priv);
 int mlx5e_num_channels_changed(struct mlx5e_priv *priv);
 int mlx5e_num_channels_changed_ctx(struct mlx5e_priv *priv, void *context);
 void mlx5e_activate_priv_channels(struct mlx5e_priv *priv);
@@ -1010,6 +1030,9 @@ void mlx5e_deactivate_icosq(struct mlx5e_icosq *icosq);
 
 int mlx5e_modify_sq(struct mlx5_core_dev *mdev, u32 sqn,
 		    struct mlx5e_modify_sq_param *p);
+int mlx5e_open_txqsq(struct mlx5e_channel *c, u32 tisn, int txq_ix,
+		     struct mlx5e_params *params, struct mlx5e_sq_param *param,
+		     struct mlx5e_txqsq *sq, int tc, u16 qos_queue_group_id, u16 qos_qid);
 void mlx5e_activate_txqsq(struct mlx5e_txqsq *sq);
 void mlx5e_deactivate_txqsq(struct mlx5e_txqsq *sq);
 void mlx5e_free_txqsq(struct mlx5e_txqsq *sq);
@@ -1020,8 +1043,10 @@ struct mlx5e_create_sq_param;
 int mlx5e_create_sq_rdy(struct mlx5_core_dev *mdev,
 			struct mlx5e_sq_param *param,
 			struct mlx5e_create_sq_param *csp,
+			u16 qos_queue_group_id,
 			u32 *sqn);
 void mlx5e_tx_err_cqe_work(struct work_struct *recover_work);
+void mlx5e_close_txqsq(struct mlx5e_txqsq *sq);
 
 static inline bool mlx5_tx_swp_supported(struct mlx5_core_dev *mdev)
 {
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/params.h b/drivers/net/ethernet/mellanox/mlx5/core/en/params.h
index 807147d97a0f..ea2cfb04b31a 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/params.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/params.h
@@ -118,6 +118,8 @@ void mlx5e_build_rq_param(struct mlx5e_priv *priv,
 			  struct mlx5e_rq_param *param);
 void mlx5e_build_sq_param_common(struct mlx5e_priv *priv,
 				 struct mlx5e_sq_param *param);
+void mlx5e_build_sq_param(struct mlx5e_priv *priv, struct mlx5e_params *params,
+			  struct mlx5e_sq_param *param);
 void mlx5e_build_rx_cq_param(struct mlx5e_priv *priv,
 			     struct mlx5e_params *params,
 			     struct mlx5e_xsk_param *xsk,
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c b/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c
index 2a2bac30daaa..eeddd1137dda 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c
@@ -261,7 +261,7 @@ static int mlx5e_ptp_open_txqsq(struct mlx5e_port_ptp *c, u32 tisn,
 	csp.min_inline_mode = txqsq->min_inline_mode;
 	csp.ts_cqe_to_dest_cqn = ptpsq->ts_cq.mcq.cqn;
 
-	err = mlx5e_create_sq_rdy(c->mdev, sqp, &csp, &txqsq->sqn);
+	err = mlx5e_create_sq_rdy(c->mdev, sqp, &csp, 0, &txqsq->sqn);
 	if (err)
 		goto err_free_txqsq;
 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/qos.c b/drivers/net/ethernet/mellanox/mlx5/core/en/qos.c
new file mode 100644
index 000000000000..12d7ad061237
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/qos.c
@@ -0,0 +1,984 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/* Copyright (c) 2020, Mellanox Technologies inc. All rights reserved. */
+
+#include "en.h"
+#include "params.h"
+#include "../qos.h"
+
+#define BYTES_IN_MBIT 125000
+
+int mlx5e_qos_max_leaf_nodes(struct mlx5_core_dev *mdev)
+{
+	return min(MLX5E_QOS_MAX_LEAF_NODES, mlx5_qos_max_leaf_nodes(mdev));
+}
+
+int mlx5e_qos_cur_leaf_nodes(struct mlx5e_priv *priv)
+{
+	int last = find_last_bit(priv->htb.qos_used_qids, mlx5e_qos_max_leaf_nodes(priv->mdev));
+
+	return last == mlx5e_qos_max_leaf_nodes(priv->mdev) ? 0 : last + 1;
+}
+
+/* Software representation of the QoS tree (internal to this file) */
+
+static int mlx5e_find_unused_qos_qid(struct mlx5e_priv *priv)
+{
+	int size = mlx5e_qos_max_leaf_nodes(priv->mdev);
+	int res;
+
+	WARN_ONCE(!mutex_is_locked(&priv->state_lock), "%s: state_lock is not held\n", __func__);
+	res = find_first_zero_bit(priv->htb.qos_used_qids, size);
+
+	return res == size ? -ENOSPC : res;
+}
+
+struct mlx5e_qos_node {
+	struct hlist_node hnode;
+	struct rcu_head rcu;
+	struct mlx5e_qos_node *parent;
+	u64 rate;
+	u32 bw_share;
+	u32 max_average_bw;
+	u32 hw_id;
+	u32 classid; /* 16-bit, except root. */
+	u16 qid;
+};
+
+#define MLX5E_QOS_QID_INNER 0xffff
+#define MLX5E_HTB_CLASSID_ROOT 0xffffffff
+
+static struct mlx5e_qos_node *
+mlx5e_sw_node_create_leaf(struct mlx5e_priv *priv, u16 classid, u16 qid,
+			  struct mlx5e_qos_node *parent)
+{
+	struct mlx5e_qos_node *node;
+
+	node = kzalloc(sizeof(*node), GFP_KERNEL);
+	if (!node)
+		return ERR_PTR(-ENOMEM);
+
+	node->parent = parent;
+
+	node->qid = qid;
+	__set_bit(qid, priv->htb.qos_used_qids);
+
+	node->classid = classid;
+	hash_add_rcu(priv->htb.qos_tc2node, &node->hnode, classid);
+
+	mlx5e_update_tx_netdev_queues(priv);
+
+	return node;
+}
+
+static struct mlx5e_qos_node *mlx5e_sw_node_create_root(struct mlx5e_priv *priv)
+{
+	struct mlx5e_qos_node *node;
+
+	node = kzalloc(sizeof(*node), GFP_KERNEL);
+	if (!node)
+		return ERR_PTR(-ENOMEM);
+
+	node->qid = MLX5E_QOS_QID_INNER;
+	node->classid = MLX5E_HTB_CLASSID_ROOT;
+	hash_add_rcu(priv->htb.qos_tc2node, &node->hnode, node->classid);
+
+	return node;
+}
+
+static struct mlx5e_qos_node *mlx5e_sw_node_find(struct mlx5e_priv *priv, u32 classid)
+{
+	struct mlx5e_qos_node *node = NULL;
+
+	hash_for_each_possible(priv->htb.qos_tc2node, node, hnode, classid) {
+		if (node->classid == classid)
+			break;
+	}
+
+	return node;
+}
+
+static struct mlx5e_qos_node *mlx5e_sw_node_find_rcu(struct mlx5e_priv *priv, u32 classid)
+{
+	struct mlx5e_qos_node *node = NULL;
+
+	hash_for_each_possible_rcu(priv->htb.qos_tc2node, node, hnode, classid) {
+		if (node->classid == classid)
+			break;
+	}
+
+	return node;
+}
+
+static void mlx5e_sw_node_delete(struct mlx5e_priv *priv, struct mlx5e_qos_node *node)
+{
+	hash_del_rcu(&node->hnode);
+	if (node->qid != MLX5E_QOS_QID_INNER) {
+		__clear_bit(node->qid, priv->htb.qos_used_qids);
+		mlx5e_update_tx_netdev_queues(priv);
+	}
+	kfree_rcu(node, rcu);
+}
+
+/* TX datapath API */
+
+static u16 mlx5e_qid_from_qos(struct mlx5e_channels *chs, u16 qid)
+{
+	/* These channel params are safe to access from the datapath, because:
+	 * 1. This function is called only after checking priv->htb.maj_id != 0,
+	 *    and the number of queues can't change while HTB offload is active.
+	 * 2. When priv->htb.maj_id becomes 0, synchronize_rcu waits for
+	 *    mlx5e_select_queue to finish while holding priv->state_lock,
+	 *    preventing other code from changing the number of queues.
+	 */
+	bool is_ptp = MLX5E_GET_PFLAG(&chs->params, MLX5E_PFLAG_TX_PORT_TS);
+
+	return (chs->params.num_channels + is_ptp) * chs->params.num_tc + qid;
+}
+
+int mlx5e_get_txq_by_classid(struct mlx5e_priv *priv, u16 classid)
+{
+	struct mlx5e_qos_node *node;
+	u16 qid;
+	int res;
+
+	rcu_read_lock();
+
+	node = mlx5e_sw_node_find_rcu(priv, classid);
+	if (!node) {
+		res = -ENOENT;
+		goto out;
+	}
+	qid = READ_ONCE(node->qid);
+	if (qid == MLX5E_QOS_QID_INNER) {
+		res = -EINVAL;
+		goto out;
+	}
+	res = mlx5e_qid_from_qos(&priv->channels, qid);
+
+out:
+	rcu_read_unlock();
+	return res;
+}
+
+static struct mlx5e_txqsq *mlx5e_get_qos_sq(struct mlx5e_priv *priv, int qid)
+{
+	struct mlx5e_params *params = &priv->channels.params;
+	struct mlx5e_txqsq __rcu **qos_sqs;
+	struct mlx5e_channel *c;
+	int ix;
+
+	ix = qid % params->num_channels;
+	qid /= params->num_channels;
+	c = priv->channels.c[ix];
+
+	qos_sqs = mlx5e_state_dereference(priv, c->qos_sqs);
+	return mlx5e_state_dereference(priv, qos_sqs[qid]);
+}
+
+/* SQ lifecycle */
+
+static int mlx5e_open_qos_sq(struct mlx5e_priv *priv, struct mlx5e_channels *chs,
+			     struct mlx5e_qos_node *node)
+{
+	struct mlx5e_create_cq_param ccp = {};
+	struct mlx5e_txqsq __rcu **qos_sqs;
+	struct mlx5e_sq_param param_sq;
+	struct mlx5e_cq_param param_cq;
+	int txq_ix, ix, qid, err = 0;
+	struct mlx5e_params *params;
+	struct mlx5e_channel *c;
+	struct mlx5e_txqsq *sq;
+
+	params = &chs->params;
+
+	txq_ix = mlx5e_qid_from_qos(chs, node->qid);
+
+	WARN_ON(node->qid > priv->htb.max_qos_sqs);
+	if (node->qid == priv->htb.max_qos_sqs) {
+		struct mlx5e_sq_stats *stats, **stats_list = NULL;
+
+		if (priv->htb.max_qos_sqs == 0) {
+			stats_list = kvcalloc(mlx5e_qos_max_leaf_nodes(priv->mdev),
+					      sizeof(*stats_list),
+					      GFP_KERNEL);
+			if (!stats_list)
+				return -ENOMEM;
+		}
+		stats = kzalloc(sizeof(*stats), GFP_KERNEL);
+		if (!stats) {
+			kvfree(stats_list);
+			return -ENOMEM;
+		}
+		if (stats_list)
+			WRITE_ONCE(priv->htb.qos_sq_stats, stats_list);
+		WRITE_ONCE(priv->htb.qos_sq_stats[node->qid], stats);
+		/* Order max_qos_sqs increment after writing the array pointer.
+		 * Pairs with smp_load_acquire in en_stats.c.
+		 */
+		smp_store_release(&priv->htb.max_qos_sqs, priv->htb.max_qos_sqs + 1);
+	}
+
+	ix = node->qid % params->num_channels;
+	qid = node->qid / params->num_channels;
+	c = chs->c[ix];
+
+	qos_sqs = mlx5e_state_dereference(priv, c->qos_sqs);
+	sq = kzalloc(sizeof(*sq), GFP_KERNEL);
+
+	if (!sq)
+		return -ENOMEM;
+
+	mlx5e_build_create_cq_param(&ccp, c);
+
+	memset(&param_sq, 0, sizeof(param_sq));
+	memset(&param_cq, 0, sizeof(param_cq));
+	mlx5e_build_sq_param(priv, params, &param_sq);
+	mlx5e_build_tx_cq_param(priv, params, &param_cq);
+	err = mlx5e_open_cq(priv, params->tx_cq_moderation, &param_cq, &ccp, &sq->cq);
+	if (err)
+		goto err_free_sq;
+	err = mlx5e_open_txqsq(c, priv->tisn[c->lag_port][0], txq_ix, params,
+			       &param_sq, sq, 0, node->hw_id, node->qid);
+	if (err)
+		goto err_close_cq;
+
+	rcu_assign_pointer(qos_sqs[qid], sq);
+
+	return 0;
+
+err_close_cq:
+	mlx5e_close_cq(&sq->cq);
+err_free_sq:
+	kfree(sq);
+	return err;
+}
+
+static void mlx5e_activate_qos_sq(struct mlx5e_priv *priv, struct mlx5e_qos_node *node)
+{
+	struct mlx5e_txqsq *sq;
+
+	sq = mlx5e_get_qos_sq(priv, node->qid);
+
+	WRITE_ONCE(priv->txq2sq[mlx5e_qid_from_qos(&priv->channels, node->qid)], sq);
+
+	/* Make the change to txq2sq visible before the queue is started.
+	 * As mlx5e_xmit runs under a spinlock, there is an implicit ACQUIRE,
+	 * which pairs with this barrier.
+	 */
+	smp_wmb();
+
+	qos_dbg(priv->mdev, "Activate QoS SQ qid %u\n", node->qid);
+	mlx5e_activate_txqsq(sq);
+}
+
+static void mlx5e_deactivate_qos_sq(struct mlx5e_priv *priv, u16 qid)
+{
+	struct mlx5e_txqsq *sq;
+
+	sq = mlx5e_get_qos_sq(priv, qid);
+	if (!sq) /* Handle the case when the SQ failed to open. */
+		return;
+
+	qos_dbg(priv->mdev, "Deactivate QoS SQ qid %u\n", qid);
+	mlx5e_deactivate_txqsq(sq);
+
+	/* The queue is disabled, no synchronization with datapath is needed. */
+	priv->txq2sq[mlx5e_qid_from_qos(&priv->channels, qid)] = NULL;
+}
+
+static void mlx5e_close_qos_sq(struct mlx5e_priv *priv, u16 qid)
+{
+	struct mlx5e_txqsq __rcu **qos_sqs;
+	struct mlx5e_params *params;
+	struct mlx5e_channel *c;
+	struct mlx5e_txqsq *sq;
+	int ix;
+
+	params = &priv->channels.params;
+
+	ix = qid % params->num_channels;
+	qid /= params->num_channels;
+	c = priv->channels.c[ix];
+	qos_sqs = mlx5e_state_dereference(priv, c->qos_sqs);
+	sq = rcu_replace_pointer(qos_sqs[qid], NULL, lockdep_is_held(&priv->state_lock));
+	if (!sq) /* Handle the case when the SQ failed to open. */
+		return;
+
+	synchronize_rcu(); /* Sync with NAPI. */
+
+	mlx5e_close_txqsq(sq);
+	mlx5e_close_cq(&sq->cq);
+	kfree(sq);
+}
+
+void mlx5e_qos_close_queues(struct mlx5e_channel *c)
+{
+	struct mlx5e_txqsq __rcu **qos_sqs;
+	int i;
+
+	qos_sqs = rcu_replace_pointer(c->qos_sqs, NULL, lockdep_is_held(&c->priv->state_lock));
+	if (!qos_sqs)
+		return;
+	synchronize_rcu(); /* Sync with NAPI. */
+
+	for (i = 0; i < c->qos_sqs_size; i++) {
+		struct mlx5e_txqsq *sq;
+
+		sq = mlx5e_state_dereference(c->priv, qos_sqs[i]);
+		if (!sq) /* Handle the case when the SQ failed to open. */
+			continue;
+
+		mlx5e_close_txqsq(sq);
+		mlx5e_close_cq(&sq->cq);
+		kfree(sq);
+	}
+
+	kvfree(qos_sqs);
+}
+
+static void mlx5e_qos_close_all_queues(struct mlx5e_channels *chs)
+{
+	int i;
+
+	for (i = 0; i < chs->num; i++)
+		mlx5e_qos_close_queues(chs->c[i]);
+}
+
+static int mlx5e_qos_alloc_queues(struct mlx5e_priv *priv, struct mlx5e_channels *chs)
+{
+	u16 qos_sqs_size;
+	int i;
+
+	qos_sqs_size = DIV_ROUND_UP(mlx5e_qos_max_leaf_nodes(priv->mdev), chs->num);
+
+	for (i = 0; i < chs->num; i++) {
+		struct mlx5e_txqsq **sqs;
+
+		sqs = kvcalloc(qos_sqs_size, sizeof(struct mlx5e_txqsq *), GFP_KERNEL);
+		if (!sqs)
+			goto err_free;
+
+		WRITE_ONCE(chs->c[i]->qos_sqs_size, qos_sqs_size);
+		smp_wmb(); /* Pairs with mlx5e_napi_poll. */
+		rcu_assign_pointer(chs->c[i]->qos_sqs, sqs);
+	}
+
+	return 0;
+
+err_free:
+	while (--i >= 0) {
+		struct mlx5e_txqsq **sqs;
+
+		sqs = rcu_replace_pointer(chs->c[i]->qos_sqs, NULL,
+					  lockdep_is_held(&priv->state_lock));
+
+		synchronize_rcu(); /* Sync with NAPI. */
+		kvfree(sqs);
+	}
+	return -ENOMEM;
+}
+
+int mlx5e_qos_open_queues(struct mlx5e_priv *priv, struct mlx5e_channels *chs)
+{
+	struct mlx5e_qos_node *node = NULL;
+	int bkt, err;
+
+	if (!priv->htb.maj_id)
+		return 0;
+
+	err = mlx5e_qos_alloc_queues(priv, chs);
+	if (err)
+		return err;
+
+	hash_for_each(priv->htb.qos_tc2node, bkt, node, hnode) {
+		if (node->qid == MLX5E_QOS_QID_INNER)
+			continue;
+		err = mlx5e_open_qos_sq(priv, chs, node);
+		if (err) {
+			mlx5e_qos_close_all_queues(chs);
+			return err;
+		}
+	}
+
+	return 0;
+}
+
+void mlx5e_qos_activate_queues(struct mlx5e_priv *priv)
+{
+	struct mlx5e_qos_node *node = NULL;
+	int bkt;
+
+	hash_for_each(priv->htb.qos_tc2node, bkt, node, hnode) {
+		if (node->qid == MLX5E_QOS_QID_INNER)
+			continue;
+		mlx5e_activate_qos_sq(priv, node);
+	}
+}
+
+void mlx5e_qos_deactivate_queues(struct mlx5e_channel *c)
+{
+	struct mlx5e_params *params = &c->priv->channels.params;
+	struct mlx5e_txqsq __rcu **qos_sqs;
+	int i;
+
+	qos_sqs = mlx5e_state_dereference(c->priv, c->qos_sqs);
+	if (!qos_sqs)
+		return;
+
+	for (i = 0; i < c->qos_sqs_size; i++) {
+		u16 qid = params->num_channels * i + c->ix;
+		struct mlx5e_txqsq *sq;
+
+		sq = mlx5e_state_dereference(c->priv, qos_sqs[i]);
+		if (!sq) /* Handle the case when the SQ failed to open. */
+			continue;
+
+		qos_dbg(c->mdev, "Deactivate QoS SQ qid %u\n", qid);
+		mlx5e_deactivate_txqsq(sq);
+
+		/* The queue is disabled, no synchronization with datapath is needed. */
+		c->priv->txq2sq[mlx5e_qid_from_qos(&c->priv->channels, qid)] = NULL;
+	}
+}
+
+static void mlx5e_qos_deactivate_all_queues(struct mlx5e_channels *chs)
+{
+	int i;
+
+	for (i = 0; i < chs->num; i++)
+		mlx5e_qos_deactivate_queues(chs->c[i]);
+}
+
+/* HTB API */
+
+int mlx5e_htb_root_add(struct mlx5e_priv *priv, u16 htb_maj_id, u16 htb_defcls,
+		       struct netlink_ext_ack *extack)
+{
+	struct mlx5e_qos_node *root;
+	bool opened;
+	int err;
+
+	qos_dbg(priv->mdev, "TC_HTB_CREATE handle %04x:, default :%04x\n", htb_maj_id, htb_defcls);
+
+	if (!mlx5_qos_is_supported(priv->mdev)) {
+		NL_SET_ERR_MSG_MOD(extack,
+				   "Missing QoS capabilities. Try disabling SRIOV or use a supported device.");
+		return -EOPNOTSUPP;
+	}
+
+	opened = test_bit(MLX5E_STATE_OPENED, &priv->state);
+	if (opened) {
+		err = mlx5e_qos_alloc_queues(priv, &priv->channels);
+		if (err)
+			return err;
+	}
+
+	root = mlx5e_sw_node_create_root(priv);
+	if (IS_ERR(root)) {
+		err = PTR_ERR(root);
+		goto err_free_queues;
+	}
+
+	err = mlx5_qos_create_root_node(priv->mdev, &root->hw_id);
+	if (err) {
+		NL_SET_ERR_MSG_MOD(extack, "Firmware error. Try upgrading firmware.");
+		goto err_sw_node_delete;
+	}
+
+	WRITE_ONCE(priv->htb.defcls, htb_defcls);
+	/* Order maj_id after defcls - pairs with
+	 * mlx5e_select_queue/mlx5e_select_htb_queues.
+	 */
+	smp_store_release(&priv->htb.maj_id, htb_maj_id);
+
+	return 0;
+
+err_sw_node_delete:
+	mlx5e_sw_node_delete(priv, root);
+
+err_free_queues:
+	if (opened)
+		mlx5e_qos_close_all_queues(&priv->channels);
+	return err;
+}
+
+int mlx5e_htb_root_del(struct mlx5e_priv *priv)
+{
+	struct mlx5e_qos_node *root;
+	int err;
+
+	qos_dbg(priv->mdev, "TC_HTB_DESTROY\n");
+
+	WRITE_ONCE(priv->htb.maj_id, 0);
+	synchronize_rcu(); /* Sync with mlx5e_select_htb_queue and TX data path. */
+
+	root = mlx5e_sw_node_find(priv, MLX5E_HTB_CLASSID_ROOT);
+	if (!root) {
+		qos_err(priv->mdev, "Failed to find the root node in the QoS tree\n");
+		return -ENOENT;
+	}
+	err = mlx5_qos_destroy_node(priv->mdev, root->hw_id);
+	if (err)
+		qos_err(priv->mdev, "Failed to destroy root node %u, err = %d\n",
+			root->hw_id, err);
+	mlx5e_sw_node_delete(priv, root);
+
+	mlx5e_qos_deactivate_all_queues(&priv->channels);
+	mlx5e_qos_close_all_queues(&priv->channels);
+
+	return err;
+}
+
+static int mlx5e_htb_convert_rate(struct mlx5e_priv *priv, u64 rate,
+				  struct mlx5e_qos_node *parent, u32 *bw_share)
+{
+	u64 share = 0;
+
+	while (parent->classid != MLX5E_HTB_CLASSID_ROOT && !parent->max_average_bw)
+		parent = parent->parent;
+
+	if (parent->max_average_bw)
+		share = div64_u64(div_u64(rate * 100, BYTES_IN_MBIT),
+				  parent->max_average_bw);
+	else
+		share = 101;
+
+	*bw_share = share == 0 ? 1 : share > 100 ? 0 : share;
+
+	qos_dbg(priv->mdev, "Convert: rate %llu, parent ceil %llu -> bw_share %u\n",
+		rate, (u64)parent->max_average_bw * BYTES_IN_MBIT, *bw_share);
+
+	return 0;
+}
+
+static void mlx5e_htb_convert_ceil(struct mlx5e_priv *priv, u64 ceil, u32 *max_average_bw)
+{
+	*max_average_bw = div_u64(ceil, BYTES_IN_MBIT);
+
+	qos_dbg(priv->mdev, "Convert: ceil %llu -> max_average_bw %u\n",
+		ceil, *max_average_bw);
+}
+
+int mlx5e_htb_leaf_alloc_queue(struct mlx5e_priv *priv, u16 classid,
+			       u32 parent_classid, u64 rate, u64 ceil,
+			       struct netlink_ext_ack *extack)
+{
+	struct mlx5e_qos_node *node, *parent;
+	int qid;
+	int err;
+
+	qos_dbg(priv->mdev, "TC_HTB_LEAF_ALLOC_QUEUE classid %04x, parent %04x, rate %llu, ceil %llu\n",
+		classid, parent_classid, rate, ceil);
+
+	qid = mlx5e_find_unused_qos_qid(priv);
+	if (qid < 0) {
+		NL_SET_ERR_MSG_MOD(extack, "Maximum amount of leaf classes is reached.");
+		return qid;
+	}
+
+	parent = mlx5e_sw_node_find(priv, parent_classid);
+	if (!parent)
+		return -EINVAL;
+
+	node = mlx5e_sw_node_create_leaf(priv, classid, qid, parent);
+	if (IS_ERR(node))
+		return PTR_ERR(node);
+
+	node->rate = rate;
+	mlx5e_htb_convert_rate(priv, rate, node->parent, &node->bw_share);
+	mlx5e_htb_convert_ceil(priv, ceil, &node->max_average_bw);
+
+	err = mlx5_qos_create_leaf_node(priv->mdev, node->parent->hw_id,
+					node->bw_share, node->max_average_bw,
+					&node->hw_id);
+	if (err) {
+		NL_SET_ERR_MSG_MOD(extack, "Firmware error when creating a leaf node.");
+		qos_err(priv->mdev, "Failed to create a leaf node (class %04x), err = %d\n",
+			classid, err);
+		mlx5e_sw_node_delete(priv, node);
+		return err;
+	}
+
+	if (test_bit(MLX5E_STATE_OPENED, &priv->state)) {
+		err = mlx5e_open_qos_sq(priv, &priv->channels, node);
+		if (err) {
+			NL_SET_ERR_MSG_MOD(extack, "Error creating an SQ.");
+			qos_warn(priv->mdev, "Failed to create a QoS SQ (class %04x), err = %d\n",
+				 classid, err);
+		} else {
+			mlx5e_activate_qos_sq(priv, node);
+		}
+	}
+
+	return mlx5e_qid_from_qos(&priv->channels, node->qid);
+}
+
+int mlx5e_htb_leaf_to_inner(struct mlx5e_priv *priv, u16 classid, u16 child_classid,
+			    u64 rate, u64 ceil, struct netlink_ext_ack *extack)
+{
+	struct mlx5e_qos_node *node, *child;
+	int err, tmp_err;
+	u32 new_hw_id;
+	u16 qid;
+
+	qos_dbg(priv->mdev, "TC_HTB_LEAF_TO_INNER classid %04x, upcoming child %04x, rate %llu, ceil %llu\n",
+		classid, child_classid, rate, ceil);
+
+	node = mlx5e_sw_node_find(priv, classid);
+	if (!node)
+		return -ENOENT;
+
+	err = mlx5_qos_create_inner_node(priv->mdev, node->parent->hw_id,
+					 node->bw_share, node->max_average_bw,
+					 &new_hw_id);
+	if (err) {
+		NL_SET_ERR_MSG_MOD(extack, "Firmware error when creating an inner node.");
+		qos_err(priv->mdev, "Failed to create an inner node (class %04x), err = %d\n",
+			classid, err);
+		return err;
+	}
+
+	/* Intentionally reuse the qid for the upcoming first child. */
+	child = mlx5e_sw_node_create_leaf(priv, child_classid, node->qid, node);
+	if (IS_ERR(child)) {
+		err = PTR_ERR(child);
+		goto err_destroy_hw_node;
+	}
+
+	child->rate = rate;
+	mlx5e_htb_convert_rate(priv, rate, node, &child->bw_share);
+	mlx5e_htb_convert_ceil(priv, ceil, &child->max_average_bw);
+
+	err = mlx5_qos_create_leaf_node(priv->mdev, new_hw_id, child->bw_share,
+					child->max_average_bw, &child->hw_id);
+	if (err) {
+		NL_SET_ERR_MSG_MOD(extack, "Firmware error when creating a leaf node.");
+		qos_err(priv->mdev, "Failed to create a leaf node (class %04x), err = %d\n",
+			classid, err);
+		goto err_delete_sw_node;
+	}
+
+	/* No fail point. */
+
+	qid = node->qid;
+	/* Pairs with mlx5e_get_txq_by_classid. */
+	WRITE_ONCE(node->qid, MLX5E_QOS_QID_INNER);
+
+	if (test_bit(MLX5E_STATE_OPENED, &priv->state)) {
+		mlx5e_deactivate_qos_sq(priv, qid);
+		mlx5e_close_qos_sq(priv, qid);
+	}
+
+	err = mlx5_qos_destroy_node(priv->mdev, node->hw_id);
+	if (err) /* Not fatal. */
+		qos_warn(priv->mdev, "Failed to destroy leaf node %u (class %04x), err = %d\n",
+			 node->hw_id, classid, err);
+
+	node->hw_id = new_hw_id;
+
+	if (test_bit(MLX5E_STATE_OPENED, &priv->state)) {
+		err = mlx5e_open_qos_sq(priv, &priv->channels, child);
+		if (err) {
+			NL_SET_ERR_MSG_MOD(extack, "Error creating an SQ.");
+			qos_warn(priv->mdev, "Failed to create a QoS SQ (class %04x), err = %d\n",
+				 classid, err);
+		} else {
+			mlx5e_activate_qos_sq(priv, child);
+		}
+	}
+
+	return 0;
+
+err_delete_sw_node:
+	child->qid = MLX5E_QOS_QID_INNER;
+	mlx5e_sw_node_delete(priv, child);
+
+err_destroy_hw_node:
+	tmp_err = mlx5_qos_destroy_node(priv->mdev, new_hw_id);
+	if (tmp_err) /* Not fatal. */
+		qos_warn(priv->mdev, "Failed to roll back creation of an inner node %u (class %04x), err = %d\n",
+			 new_hw_id, classid, tmp_err);
+	return err;
+}
+
+static struct mlx5e_qos_node *mlx5e_sw_node_find_by_qid(struct mlx5e_priv *priv, u16 qid)
+{
+	struct mlx5e_qos_node *node = NULL;
+	int bkt;
+
+	hash_for_each(priv->htb.qos_tc2node, bkt, node, hnode)
+		if (node->qid == qid)
+			break;
+
+	return node;
+}
+
+static void mlx5e_reactivate_qos_sq(struct mlx5e_priv *priv, u16 qid, struct netdev_queue *txq)
+{
+	qos_dbg(priv->mdev, "Reactivate QoS SQ qid %u\n", qid);
+	netdev_tx_reset_queue(txq);
+	netif_tx_start_queue(txq);
+}
+
+static void mlx5e_reset_qdisc(struct net_device *dev, u16 qid)
+{
+	struct netdev_queue *dev_queue = netdev_get_tx_queue(dev, qid);
+	struct Qdisc *qdisc = dev_queue->qdisc_sleeping;
+
+	if (!qdisc)
+		return;
+
+	spin_lock_bh(qdisc_lock(qdisc));
+	qdisc_reset(qdisc);
+	spin_unlock_bh(qdisc_lock(qdisc));
+}
+
+int mlx5e_htb_leaf_del(struct mlx5e_priv *priv, u16 classid, u16 *old_qid,
+		       u16 *new_qid, struct netlink_ext_ack *extack)
+{
+	struct mlx5e_qos_node *node;
+	struct netdev_queue *txq;
+	u16 qid, moved_qid;
+	bool opened;
+	int err;
+
+	qos_dbg(priv->mdev, "TC_HTB_LEAF_DEL classid %04x\n", classid);
+
+	*old_qid = *new_qid = 0;
+
+	node = mlx5e_sw_node_find(priv, classid);
+	if (!node)
+		return -ENOENT;
+
+	/* Store qid for reuse. */
+	qid = node->qid;
+
+	opened = test_bit(MLX5E_STATE_OPENED, &priv->state);
+	if (opened) {
+		txq = netdev_get_tx_queue(priv->netdev,
+					  mlx5e_qid_from_qos(&priv->channels, qid));
+		mlx5e_deactivate_qos_sq(priv, qid);
+		mlx5e_close_qos_sq(priv, qid);
+	}
+
+	err = mlx5_qos_destroy_node(priv->mdev, node->hw_id);
+	if (err) /* Not fatal. */
+		qos_warn(priv->mdev, "Failed to destroy leaf node %u (class %04x), err = %d\n",
+			 node->hw_id, classid, err);
+
+	mlx5e_sw_node_delete(priv, node);
+
+	moved_qid = mlx5e_qos_cur_leaf_nodes(priv);
+
+	if (moved_qid == 0) {
+		/* The last QoS SQ was just destroyed. */
+		if (opened)
+			mlx5e_reactivate_qos_sq(priv, qid, txq);
+		return 0;
+	}
+	moved_qid--;
+
+	if (moved_qid < qid) {
+		/* The highest QoS SQ was just destroyed. */
+		WARN(moved_qid != qid - 1, "Gaps in queue numeration: destroyed queue %u, the highest queue is %u",
+		     qid, moved_qid);
+		if (opened)
+			mlx5e_reactivate_qos_sq(priv, qid, txq);
+		return 0;
+	}
+
+	WARN(moved_qid == qid, "Can't move node with qid %u to itself", qid);
+	qos_dbg(priv->mdev, "Moving QoS SQ %u to %u\n", moved_qid, qid);
+
+	node = mlx5e_sw_node_find_by_qid(priv, moved_qid);
+	WARN(!node, "Could not find a node with qid %u to move to queue %u",
+	     moved_qid, qid);
+
+	/* Stop traffic to the old queue. */
+	WRITE_ONCE(node->qid, MLX5E_QOS_QID_INNER);
+	__clear_bit(moved_qid, priv->htb.qos_used_qids);
+
+	if (opened) {
+		txq = netdev_get_tx_queue(priv->netdev,
+					  mlx5e_qid_from_qos(&priv->channels, moved_qid));
+		mlx5e_deactivate_qos_sq(priv, moved_qid);
+		mlx5e_close_qos_sq(priv, moved_qid);
+	}
+
+	/* Prevent packets from the old class from getting into the new one. */
+	mlx5e_reset_qdisc(priv->netdev, moved_qid);
+
+	__set_bit(qid, priv->htb.qos_used_qids);
+	WRITE_ONCE(node->qid, qid);
+
+	if (test_bit(MLX5E_STATE_OPENED, &priv->state)) {
+		err = mlx5e_open_qos_sq(priv, &priv->channels, node);
+		if (err) {
+			NL_SET_ERR_MSG_MOD(extack, "Error creating an SQ.");
+			qos_warn(priv->mdev, "Failed to create a QoS SQ (class %04x) while moving qid %u to %u, err = %d\n",
+				 node->classid, moved_qid, qid, err);
+		} else {
+			mlx5e_activate_qos_sq(priv, node);
+		}
+	}
+
+	mlx5e_update_tx_netdev_queues(priv);
+	if (opened)
+		mlx5e_reactivate_qos_sq(priv, moved_qid, txq);
+
+	*old_qid = mlx5e_qid_from_qos(&priv->channels, moved_qid);
+	*new_qid = mlx5e_qid_from_qos(&priv->channels, qid);
+	return 0;
+}
+
+int mlx5e_htb_leaf_del_last(struct mlx5e_priv *priv, u16 classid, bool force,
+			    struct netlink_ext_ack *extack)
+{
+	struct mlx5e_qos_node *node, *parent;
+	u32 old_hw_id, new_hw_id;
+	int err, saved_err = 0;
+	u16 qid;
+
+	qos_dbg(priv->mdev, "TC_HTB_LEAF_DEL_LAST%s classid %04x\n",
+		force ? "_FORCE" : "", classid);
+
+	node = mlx5e_sw_node_find(priv, classid);
+	if (!node)
+		return -ENOENT;
+
+	err = mlx5_qos_create_leaf_node(priv->mdev, node->parent->parent->hw_id,
+					node->parent->bw_share,
+					node->parent->max_average_bw,
+					&new_hw_id);
+	if (err) {
+		NL_SET_ERR_MSG_MOD(extack, "Firmware error when creating a leaf node.");
+		qos_err(priv->mdev, "Failed to create a leaf node (class %04x), err = %d\n",
+			classid, err);
+		if (!force)
+			return err;
+		saved_err = err;
+	}
+
+	/* Store qid for reuse and prevent clearing the bit. */
+	qid = node->qid;
+	/* Pairs with mlx5e_get_txq_by_classid. */
+	WRITE_ONCE(node->qid, MLX5E_QOS_QID_INNER);
+
+	if (test_bit(MLX5E_STATE_OPENED, &priv->state)) {
+		mlx5e_deactivate_qos_sq(priv, qid);
+		mlx5e_close_qos_sq(priv, qid);
+	}
+
+	/* Prevent packets from the old class from getting into the new one. */
+	mlx5e_reset_qdisc(priv->netdev, qid);
+
+	err = mlx5_qos_destroy_node(priv->mdev, node->hw_id);
+	if (err) /* Not fatal. */
+		qos_warn(priv->mdev, "Failed to destroy leaf node %u (class %04x), err = %d\n",
+			 node->hw_id, classid, err);
+
+	parent = node->parent;
+	mlx5e_sw_node_delete(priv, node);
+
+	node = parent;
+	WRITE_ONCE(node->qid, qid);
+
+	/* Early return on error in force mode. Parent will still be an inner
+	 * node to be deleted by a following delete operation.
+	 */
+	if (saved_err)
+		return saved_err;
+
+	old_hw_id = node->hw_id;
+	node->hw_id = new_hw_id;
+
+	if (test_bit(MLX5E_STATE_OPENED, &priv->state)) {
+		err = mlx5e_open_qos_sq(priv, &priv->channels, node);
+		if (err) {
+			NL_SET_ERR_MSG_MOD(extack, "Error creating an SQ.");
+			qos_warn(priv->mdev, "Failed to create a QoS SQ (class %04x), err = %d\n",
+				 classid, err);
+		} else {
+			mlx5e_activate_qos_sq(priv, node);
+		}
+	}
+
+	err = mlx5_qos_destroy_node(priv->mdev, old_hw_id);
+	if (err) /* Not fatal. */
+		qos_warn(priv->mdev, "Failed to destroy leaf node %u (class %04x), err = %d\n",
+			 node->hw_id, classid, err);
+
+	return 0;
+}
+
+static int mlx5e_qos_update_children(struct mlx5e_priv *priv, struct mlx5e_qos_node *node,
+				     struct netlink_ext_ack *extack)
+{
+	struct mlx5e_qos_node *child;
+	int err = 0;
+	int bkt;
+
+	hash_for_each(priv->htb.qos_tc2node, bkt, child, hnode) {
+		u32 old_bw_share = child->bw_share;
+		int err_one;
+
+		if (child->parent != node)
+			continue;
+
+		mlx5e_htb_convert_rate(priv, child->rate, node, &child->bw_share);
+		if (child->bw_share == old_bw_share)
+			continue;
+
+		err_one = mlx5_qos_update_node(priv->mdev, child->hw_id, child->bw_share,
+					       child->max_average_bw, child->hw_id);
+		if (!err && err_one) {
+			err = err_one;
+
+			NL_SET_ERR_MSG_MOD(extack, "Firmware error when modifying a child node.");
+			qos_err(priv->mdev, "Failed to modify a child node (class %04x), err = %d\n",
+				node->classid, err);
+		}
+	}
+
+	return err;
+}
+
+int mlx5e_htb_node_modify(struct mlx5e_priv *priv, u16 classid, u64 rate, u64 ceil,
+			  struct netlink_ext_ack *extack)
+{
+	u32 bw_share, max_average_bw;
+	struct mlx5e_qos_node *node;
+	bool ceil_changed = false;
+	int err;
+
+	qos_dbg(priv->mdev, "TC_HTB_LEAF_MODIFY classid %04x, rate %llu, ceil %llu\n",
+		classid, rate, ceil);
+
+	node = mlx5e_sw_node_find(priv, classid);
+	if (!node)
+		return -ENOENT;
+
+	node->rate = rate;
+	mlx5e_htb_convert_rate(priv, rate, node->parent, &bw_share);
+	mlx5e_htb_convert_ceil(priv, ceil, &max_average_bw);
+
+	err = mlx5_qos_update_node(priv->mdev, node->parent->hw_id, bw_share,
+				   max_average_bw, node->hw_id);
+	if (err) {
+		NL_SET_ERR_MSG_MOD(extack, "Firmware error when modifying a node.");
+		qos_err(priv->mdev, "Failed to modify a node (class %04x), err = %d\n",
+			classid, err);
+		return err;
+	}
+
+	if (max_average_bw != node->max_average_bw)
+		ceil_changed = true;
+
+	node->bw_share = bw_share;
+	node->max_average_bw = max_average_bw;
+
+	if (ceil_changed)
+		err = mlx5e_qos_update_children(priv, node, extack);
+
+	return err;
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/qos.h b/drivers/net/ethernet/mellanox/mlx5/core/en/qos.h
new file mode 100644
index 000000000000..5af7991fcd19
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/qos.h
@@ -0,0 +1,44 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2020, Mellanox Technologies inc. All rights reserved. */
+
+#ifndef __MLX5E_EN_QOS_H
+#define __MLX5E_EN_QOS_H
+
+#include <linux/mlx5/driver.h>
+
+#define MLX5E_QOS_MAX_LEAF_NODES 256
+
+struct mlx5e_priv;
+struct mlx5e_channels;
+struct mlx5e_channel;
+
+int mlx5e_qos_max_leaf_nodes(struct mlx5_core_dev *mdev);
+int mlx5e_qos_cur_leaf_nodes(struct mlx5e_priv *priv);
+
+/* TX datapath API */
+int mlx5e_get_txq_by_classid(struct mlx5e_priv *priv, u16 classid);
+struct mlx5e_txqsq *mlx5e_get_sq(struct mlx5e_priv *priv, int qid);
+
+/* SQ lifecycle */
+int mlx5e_qos_open_queues(struct mlx5e_priv *priv, struct mlx5e_channels *chs);
+void mlx5e_qos_activate_queues(struct mlx5e_priv *priv);
+void mlx5e_qos_deactivate_queues(struct mlx5e_channel *c);
+void mlx5e_qos_close_queues(struct mlx5e_channel *c);
+
+/* HTB API */
+int mlx5e_htb_root_add(struct mlx5e_priv *priv, u16 htb_maj_id, u16 htb_defcls,
+		       struct netlink_ext_ack *extack);
+int mlx5e_htb_root_del(struct mlx5e_priv *priv);
+int mlx5e_htb_leaf_alloc_queue(struct mlx5e_priv *priv, u16 classid,
+			       u32 parent_classid, u64 rate, u64 ceil,
+			       struct netlink_ext_ack *extack);
+int mlx5e_htb_leaf_to_inner(struct mlx5e_priv *priv, u16 classid, u16 child_classid,
+			    u64 rate, u64 ceil, struct netlink_ext_ack *extack);
+int mlx5e_htb_leaf_del(struct mlx5e_priv *priv, u16 classid, u16 *old_qid,
+		       u16 *new_qid, struct netlink_ext_ack *extack);
+int mlx5e_htb_leaf_del_last(struct mlx5e_priv *priv, u16 classid, bool force,
+			    struct netlink_ext_ack *extack);
+int mlx5e_htb_node_modify(struct mlx5e_priv *priv, u16 classid, u64 rate, u64 ceil,
+			  struct netlink_ext_ack *extack);
+
+#endif
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
index 2d37742a888c..2e5a0696374a 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
@@ -447,6 +447,17 @@ int mlx5e_ethtool_set_channels(struct mlx5e_priv *priv,
 		goto out;
 	}
 
+	/* Don't allow changing the number of channels if HTB offload is active,
+	 * because the numeration of the QoS SQs will change, while per-queue
+	 * qdiscs are attached.
+	 */
+	if (priv->htb.maj_id) {
+		err = -EINVAL;
+		netdev_err(priv->netdev, "%s: HTB offload is active, cannot change the number of channels\n",
+			   __func__);
+		goto out;
+	}
+
 	new_channels.params = priv->channels.params;
 	new_channels.params.num_channels = count;
 
@@ -1966,6 +1977,16 @@ static int set_pflag_tx_port_ts(struct net_device *netdev, bool enable)
 	if (!MLX5_CAP_GEN(mdev, ts_cqe_to_dest_cqn))
 		return -EOPNOTSUPP;
 
+	/* Don't allow changing the PTP state if HTB offload is active, because
+	 * the numeration of the QoS SQs will change, while per-queue qdiscs are
+	 * attached.
+	 */
+	if (priv->htb.maj_id) {
+		netdev_err(priv->netdev, "%s: HTB offload is active, cannot change the PTP state\n",
+			   __func__);
+		return -EINVAL;
+	}
+
 	new_channels.params = priv->channels.params;
 	MLX5E_SET_PFLAG(&new_channels.params, MLX5E_PFLAG_TX_PORT_TS, enable);
 	/* No need to verify SQ stop room as
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
index f33c38629886..b9a175982801 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
@@ -65,6 +65,7 @@
 #include "en/devlink.h"
 #include "lib/mlx5.h"
 #include "en/ptp.h"
+#include "qos.h"
 
 bool mlx5e_check_fragmented_striding_rq_cap(struct mlx5_core_dev *mdev)
 {
@@ -1143,7 +1144,6 @@ static int mlx5e_alloc_txqsq(struct mlx5e_channel *c,
 	sq->uar_map   = mdev->mlx5e_res.bfreg.map;
 	sq->min_inline_mode = params->tx_min_inline_mode;
 	sq->hw_mtu    = MLX5E_SW2HW_MTU(params, params->sw_mtu);
-	sq->stats     = &c->priv->channel_stats[c->ix].sq[tc];
 	INIT_WORK(&sq->recover_work, mlx5e_tx_err_cqe_work);
 	if (!MLX5_CAP_ETH(mdev, wqe_vlan_insert))
 		set_bit(MLX5E_SQ_STATE_VLAN_NEED_L2_INLINE, &sq->state);
@@ -1233,6 +1233,7 @@ static int mlx5e_create_sq(struct mlx5_core_dev *mdev,
 int mlx5e_modify_sq(struct mlx5_core_dev *mdev, u32 sqn,
 		    struct mlx5e_modify_sq_param *p)
 {
+	u64 bitmask = 0;
 	void *in;
 	void *sqc;
 	int inlen;
@@ -1248,9 +1249,14 @@ int mlx5e_modify_sq(struct mlx5_core_dev *mdev, u32 sqn,
 	MLX5_SET(modify_sq_in, in, sq_state, p->curr_state);
 	MLX5_SET(sqc, sqc, state, p->next_state);
 	if (p->rl_update && p->next_state == MLX5_SQC_STATE_RDY) {
-		MLX5_SET64(modify_sq_in, in, modify_bitmask, 1);
-		MLX5_SET(sqc,  sqc, packet_pacing_rate_limit_index, p->rl_index);
+		bitmask |= 1;
+		MLX5_SET(sqc, sqc, packet_pacing_rate_limit_index, p->rl_index);
 	}
+	if (p->qos_update && p->next_state == MLX5_SQC_STATE_RDY) {
+		bitmask |= 1 << 2;
+		MLX5_SET(sqc, sqc, qos_queue_group_id, p->qos_queue_group_id);
+	}
+	MLX5_SET64(modify_sq_in, in, modify_bitmask, bitmask);
 
 	err = mlx5_core_modify_sq(mdev, sqn, in);
 
@@ -1267,6 +1273,7 @@ static void mlx5e_destroy_sq(struct mlx5_core_dev *mdev, u32 sqn)
 int mlx5e_create_sq_rdy(struct mlx5_core_dev *mdev,
 			struct mlx5e_sq_param *param,
 			struct mlx5e_create_sq_param *csp,
+			u16 qos_queue_group_id,
 			u32 *sqn)
 {
 	struct mlx5e_modify_sq_param msp = {0};
@@ -1278,6 +1285,10 @@ int mlx5e_create_sq_rdy(struct mlx5_core_dev *mdev,
 
 	msp.curr_state = MLX5_SQC_STATE_RST;
 	msp.next_state = MLX5_SQC_STATE_RDY;
+	if (qos_queue_group_id) {
+		msp.qos_update = true;
+		msp.qos_queue_group_id = qos_queue_group_id;
+	}
 	err = mlx5e_modify_sq(mdev, *sqn, &msp);
 	if (err)
 		mlx5e_destroy_sq(mdev, *sqn);
@@ -1288,13 +1299,9 @@ int mlx5e_create_sq_rdy(struct mlx5_core_dev *mdev,
 static int mlx5e_set_sq_maxrate(struct net_device *dev,
 				struct mlx5e_txqsq *sq, u32 rate);
 
-static int mlx5e_open_txqsq(struct mlx5e_channel *c,
-			    u32 tisn,
-			    int txq_ix,
-			    struct mlx5e_params *params,
-			    struct mlx5e_sq_param *param,
-			    struct mlx5e_txqsq *sq,
-			    int tc)
+int mlx5e_open_txqsq(struct mlx5e_channel *c, u32 tisn, int txq_ix,
+		     struct mlx5e_params *params, struct mlx5e_sq_param *param,
+		     struct mlx5e_txqsq *sq, int tc, u16 qos_queue_group_id, u16 qos_qid)
 {
 	struct mlx5e_create_sq_param csp = {};
 	u32 tx_rate;
@@ -1304,12 +1311,17 @@ static int mlx5e_open_txqsq(struct mlx5e_channel *c,
 	if (err)
 		return err;
 
+	if (qos_queue_group_id)
+		sq->stats = c->priv->htb.qos_sq_stats[qos_qid];
+	else
+		sq->stats = &c->priv->channel_stats[c->ix].sq[tc];
+
 	csp.tisn            = tisn;
 	csp.tis_lst_sz      = 1;
 	csp.cqn             = sq->cq.mcq.cqn;
 	csp.wq_ctrl         = &sq->wq_ctrl;
 	csp.min_inline_mode = sq->min_inline_mode;
-	err = mlx5e_create_sq_rdy(c->mdev, param, &csp, &sq->sqn);
+	err = mlx5e_create_sq_rdy(c->mdev, param, &csp, qos_queue_group_id, &sq->sqn);
 	if (err)
 		goto err_free_txqsq;
 
@@ -1366,7 +1378,7 @@ void mlx5e_deactivate_txqsq(struct mlx5e_txqsq *sq)
 	}
 }
 
-static void mlx5e_close_txqsq(struct mlx5e_txqsq *sq)
+void mlx5e_close_txqsq(struct mlx5e_txqsq *sq)
 {
 	struct mlx5_core_dev *mdev = sq->mdev;
 	struct mlx5_rate_limit rl = {0};
@@ -1403,7 +1415,7 @@ int mlx5e_open_icosq(struct mlx5e_channel *c, struct mlx5e_params *params,
 	csp.cqn             = sq->cq.mcq.cqn;
 	csp.wq_ctrl         = &sq->wq_ctrl;
 	csp.min_inline_mode = params->tx_min_inline_mode;
-	err = mlx5e_create_sq_rdy(c->mdev, param, &csp, &sq->sqn);
+	err = mlx5e_create_sq_rdy(c->mdev, param, &csp, 0, &sq->sqn);
 	if (err)
 		goto err_free_icosq;
 
@@ -1452,7 +1464,7 @@ int mlx5e_open_xdpsq(struct mlx5e_channel *c, struct mlx5e_params *params,
 	csp.wq_ctrl         = &sq->wq_ctrl;
 	csp.min_inline_mode = sq->min_inline_mode;
 	set_bit(MLX5E_SQ_STATE_ENABLED, &sq->state);
-	err = mlx5e_create_sq_rdy(c->mdev, param, &csp, &sq->sqn);
+	err = mlx5e_create_sq_rdy(c->mdev, param, &csp, 0, &sq->sqn);
 	if (err)
 		goto err_free_xdpsq;
 
@@ -1703,7 +1715,7 @@ static int mlx5e_open_sqs(struct mlx5e_channel *c,
 		int txq_ix = c->ix + tc * params->num_channels;
 
 		err = mlx5e_open_txqsq(c, c->priv->tisn[c->lag_port][tc], txq_ix,
-				       params, &cparam->txq_sq, &c->sq[tc], tc);
+				       params, &cparam->txq_sq, &c->sq[tc], tc, 0, 0);
 		if (err)
 			goto err_close_sqs;
 	}
@@ -2044,6 +2056,8 @@ static void mlx5e_deactivate_channel(struct mlx5e_channel *c)
 	mlx5e_deactivate_icosq(&c->icosq);
 	for (tc = 0; tc < c->num_tc; tc++)
 		mlx5e_deactivate_txqsq(&c->sq[tc]);
+
+	mlx5e_qos_deactivate_queues(c);
 }
 
 static void mlx5e_close_channel(struct mlx5e_channel *c)
@@ -2051,6 +2065,7 @@ static void mlx5e_close_channel(struct mlx5e_channel *c)
 	if (test_bit(MLX5E_CHANNEL_STATE_XSK, c->state))
 		mlx5e_close_xsk(c);
 	mlx5e_close_queues(c);
+	mlx5e_qos_close_queues(c);
 	netif_napi_del(&c->napi);
 
 	kvfree(c);
@@ -2198,9 +2213,8 @@ void mlx5e_build_sq_param_common(struct mlx5e_priv *priv,
 	param->wq.buf_numa_node = dev_to_node(mlx5_core_dma_dev(priv->mdev));
 }
 
-static void mlx5e_build_sq_param(struct mlx5e_priv *priv,
-				 struct mlx5e_params *params,
-				 struct mlx5e_sq_param *param)
+void mlx5e_build_sq_param(struct mlx5e_priv *priv, struct mlx5e_params *params,
+			  struct mlx5e_sq_param *param)
 {
 	void *sqc = param->sqc;
 	void *wq = MLX5_ADDR_OF(sqc, sqc, wq);
@@ -2379,10 +2393,18 @@ int mlx5e_open_channels(struct mlx5e_priv *priv,
 			goto err_close_channels;
 	}
 
+	err = mlx5e_qos_open_queues(priv, chs);
+	if (err)
+		goto err_close_ptp;
+
 	mlx5e_health_channels_update(priv);
 	kvfree(cparam);
 	return 0;
 
+err_close_ptp:
+	if (chs->port_ptp)
+		mlx5e_port_ptp_close(chs->port_ptp);
+
 err_close_channels:
 	for (i--; i >= 0; i--)
 		mlx5e_close_channel(chs->c[i]);
@@ -2915,11 +2937,31 @@ static void mlx5e_netdev_set_tcs(struct net_device *netdev, u16 nch, u8 ntc)
 		netdev_set_tc_queue(netdev, tc, nch, 0);
 }
 
+int mlx5e_update_tx_netdev_queues(struct mlx5e_priv *priv)
+{
+	int qos_queues, nch, ntc, num_txqs, err;
+
+	qos_queues = mlx5e_qos_cur_leaf_nodes(priv);
+
+	nch = priv->channels.params.num_channels;
+	ntc = priv->channels.params.num_tc;
+	num_txqs = nch * ntc + qos_queues;
+	if (MLX5E_GET_PFLAG(&priv->channels.params, MLX5E_PFLAG_TX_PORT_TS))
+		num_txqs += ntc;
+
+	mlx5e_dbg(DRV, priv, "Setting num_txqs %d\n", num_txqs);
+	err = netif_set_real_num_tx_queues(priv->netdev, num_txqs);
+	if (err)
+		netdev_warn(priv->netdev, "netif_set_real_num_tx_queues failed, %d\n", err);
+
+	return err;
+}
+
 static int mlx5e_update_netdev_queues(struct mlx5e_priv *priv)
 {
 	struct net_device *netdev = priv->netdev;
-	int num_txqs, num_rxqs, nch, ntc;
 	int old_num_txqs, old_ntc;
+	int num_rxqs, nch, ntc;
 	int err;
 
 	old_num_txqs = netdev->real_num_tx_queues;
@@ -2927,18 +2969,13 @@ static int mlx5e_update_netdev_queues(struct mlx5e_priv *priv)
 
 	nch = priv->channels.params.num_channels;
 	ntc = priv->channels.params.num_tc;
-	num_txqs = nch * ntc;
-	if (MLX5E_GET_PFLAG(&priv->channels.params, MLX5E_PFLAG_TX_PORT_TS))
-		num_txqs += ntc;
 	num_rxqs = nch * priv->profile->rq_groups;
 
 	mlx5e_netdev_set_tcs(netdev, nch, ntc);
 
-	err = netif_set_real_num_tx_queues(netdev, num_txqs);
-	if (err) {
-		netdev_warn(netdev, "netif_set_real_num_tx_queues failed, %d\n", err);
+	err = mlx5e_update_tx_netdev_queues(priv);
+	if (err)
 		goto err_tcs;
-	}
 	err = netif_set_real_num_rx_queues(netdev, num_rxqs);
 	if (err) {
 		netdev_warn(netdev, "netif_set_real_num_rx_queues failed, %d\n", err);
@@ -3042,6 +3079,7 @@ void mlx5e_activate_priv_channels(struct mlx5e_priv *priv)
 	mlx5e_update_num_tc_x_num_ch(priv);
 	mlx5e_build_txq_maps(priv);
 	mlx5e_activate_channels(&priv->channels);
+	mlx5e_qos_activate_queues(priv);
 	mlx5e_xdp_tx_enable(priv);
 	netif_tx_start_all_queues(priv->netdev);
 
@@ -3608,6 +3646,14 @@ static int mlx5e_setup_tc_mqprio(struct mlx5e_priv *priv,
 
 	mutex_lock(&priv->state_lock);
 
+	/* MQPRIO is another toplevel qdisc that can't be attached
+	 * simultaneously with the offloaded HTB.
+	 */
+	if (WARN_ON(priv->htb.maj_id)) {
+		err = -EINVAL;
+		goto out;
+	}
+
 	new_channels.params = priv->channels.params;
 	new_channels.params.num_tc = tc ? tc : 1;
 
@@ -3628,12 +3674,55 @@ out:
 	return err;
 }
 
+static int mlx5e_setup_tc_htb(struct mlx5e_priv *priv, struct tc_htb_qopt_offload *htb)
+{
+	int res;
+
+	switch (htb->command) {
+	case TC_HTB_CREATE:
+		return mlx5e_htb_root_add(priv, htb->parent_classid, htb->classid,
+					  htb->extack);
+	case TC_HTB_DESTROY:
+		return mlx5e_htb_root_del(priv);
+	case TC_HTB_LEAF_ALLOC_QUEUE:
+		res = mlx5e_htb_leaf_alloc_queue(priv, htb->classid, htb->parent_classid,
+						 htb->rate, htb->ceil, htb->extack);
+		if (res < 0)
+			return res;
+		htb->qid = res;
+		return 0;
+	case TC_HTB_LEAF_TO_INNER:
+		return mlx5e_htb_leaf_to_inner(priv, htb->parent_classid, htb->classid,
+					       htb->rate, htb->ceil, htb->extack);
+	case TC_HTB_LEAF_DEL:
+		return mlx5e_htb_leaf_del(priv, htb->classid, &htb->moved_qid, &htb->qid,
+					  htb->extack);
+	case TC_HTB_LEAF_DEL_LAST:
+	case TC_HTB_LEAF_DEL_LAST_FORCE:
+		return mlx5e_htb_leaf_del_last(priv, htb->classid,
+					       htb->command == TC_HTB_LEAF_DEL_LAST_FORCE,
+					       htb->extack);
+	case TC_HTB_NODE_MODIFY:
+		return mlx5e_htb_node_modify(priv, htb->classid, htb->rate, htb->ceil,
+					     htb->extack);
+	case TC_HTB_LEAF_QUERY_QUEUE:
+		res = mlx5e_get_txq_by_classid(priv, htb->classid);
+		if (res < 0)
+			return res;
+		htb->qid = res;
+		return 0;
+	default:
+		return -EOPNOTSUPP;
+	}
+}
+
 static LIST_HEAD(mlx5e_block_cb_list);
 
 static int mlx5e_setup_tc(struct net_device *dev, enum tc_setup_type type,
 			  void *type_data)
 {
 	struct mlx5e_priv *priv = netdev_priv(dev);
+	int err;
 
 	switch (type) {
 	case TC_SETUP_BLOCK: {
@@ -3647,6 +3736,11 @@ static int mlx5e_setup_tc(struct net_device *dev, enum tc_setup_type type,
 	}
 	case TC_SETUP_QDISC_MQPRIO:
 		return mlx5e_setup_tc_mqprio(priv, type_data);
+	case TC_SETUP_QDISC_HTB:
+		mutex_lock(&priv->state_lock);
+		err = mlx5e_setup_tc_htb(priv, type_data);
+		mutex_unlock(&priv->state_lock);
+		return err;
 	default:
 		return -EOPNOTSUPP;
 	}
@@ -3811,20 +3905,25 @@ static int set_feature_cvlan_filter(struct net_device *netdev, bool enable)
 	return 0;
 }
 
-#if IS_ENABLED(CONFIG_MLX5_CLS_ACT)
-static int set_feature_tc_num_filters(struct net_device *netdev, bool enable)
+static int set_feature_hw_tc(struct net_device *netdev, bool enable)
 {
 	struct mlx5e_priv *priv = netdev_priv(netdev);
 
+#if IS_ENABLED(CONFIG_MLX5_CLS_ACT)
 	if (!enable && mlx5e_tc_num_filters(priv, MLX5_TC_FLAG(NIC_OFFLOAD))) {
 		netdev_err(netdev,
 			   "Active offloaded tc filters, can't turn hw_tc_offload off\n");
 		return -EINVAL;
 	}
+#endif
+
+	if (!enable && priv->htb.maj_id) {
+		netdev_err(netdev, "Active HTB offload, can't turn hw_tc_offload off\n");
+		return -EINVAL;
+	}
 
 	return 0;
 }
-#endif
 
 static int set_feature_rx_all(struct net_device *netdev, bool enable)
 {
@@ -3922,9 +4021,7 @@ int mlx5e_set_features(struct net_device *netdev, netdev_features_t features)
 	err |= MLX5E_HANDLE_FEATURE(NETIF_F_LRO, set_feature_lro);
 	err |= MLX5E_HANDLE_FEATURE(NETIF_F_HW_VLAN_CTAG_FILTER,
 				    set_feature_cvlan_filter);
-#if IS_ENABLED(CONFIG_MLX5_CLS_ACT)
-	err |= MLX5E_HANDLE_FEATURE(NETIF_F_HW_TC, set_feature_tc_num_filters);
-#endif
+	err |= MLX5E_HANDLE_FEATURE(NETIF_F_HW_TC, set_feature_hw_tc);
 	err |= MLX5E_HANDLE_FEATURE(NETIF_F_RXALL, set_feature_rx_all);
 	err |= MLX5E_HANDLE_FEATURE(NETIF_F_RXFCS, set_feature_rx_fcs);
 	err |= MLX5E_HANDLE_FEATURE(NETIF_F_HW_VLAN_CTAG_RX, set_feature_rx_vlan);
@@ -5028,6 +5125,8 @@ static void mlx5e_build_nic_netdev(struct net_device *netdev)
 		netdev->hw_features	 |= NETIF_F_NTUPLE;
 #endif
 	}
+	if (mlx5_qos_is_supported(mdev))
+		netdev->features |= NETIF_F_HW_TC;
 
 	netdev->features         |= NETIF_F_HIGHDMA;
 	netdev->features         |= NETIF_F_HW_VLAN_STAG_FILTER;
@@ -5333,6 +5432,7 @@ int mlx5e_netdev_init(struct net_device *netdev,
 		return -ENOMEM;
 
 	mutex_init(&priv->state_lock);
+	hash_init(priv->htb.qos_tc2node);
 	INIT_WORK(&priv->update_carrier_work, mlx5e_update_carrier_work);
 	INIT_WORK(&priv->set_rx_mode_work, mlx5e_set_rx_mode_work);
 	INIT_WORK(&priv->tx_timeout_work, mlx5e_tx_timeout_work);
@@ -5355,8 +5455,14 @@ err_free_cpumask:
 
 void mlx5e_netdev_cleanup(struct net_device *netdev, struct mlx5e_priv *priv)
 {
+	int i;
+
 	destroy_workqueue(priv->wq);
 	free_cpumask_var(priv->scratchpad.cpumask);
+
+	for (i = 0; i < priv->htb.max_qos_sqs; i++)
+		kfree(priv->htb.qos_sq_stats[i]);
+	kvfree(priv->htb.qos_sq_stats);
 }
 
 struct net_device *mlx5e_create_netdev(struct mlx5_core_dev *mdev,
@@ -5366,13 +5472,17 @@ struct net_device *mlx5e_create_netdev(struct mlx5_core_dev *mdev,
 {
 	struct net_device *netdev;
 	unsigned int ptp_txqs = 0;
+	int qos_sqs = 0;
 	int err;
 
 	if (MLX5_CAP_GEN(mdev, ts_cqe_to_dest_cqn))
 		ptp_txqs = profile->max_tc;
 
+	if (mlx5_qos_is_supported(mdev))
+		qos_sqs = mlx5e_qos_max_leaf_nodes(mdev);
+
 	netdev = alloc_etherdev_mqs(sizeof(struct mlx5e_priv),
-				    nch * profile->max_tc + ptp_txqs,
+				    nch * profile->max_tc + ptp_txqs + qos_sqs,
 				    nch * profile->rq_groups);
 	if (!netdev) {
 		mlx5_core_err(mdev, "alloc_etherdev_mqs() failed\n");
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c
index 2cf2042b37c7..92c5b81427b9 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c
@@ -420,6 +420,25 @@ static void mlx5e_stats_grp_sw_update_stats_ptp(struct mlx5e_priv *priv,
 	}
 }
 
+static void mlx5e_stats_grp_sw_update_stats_qos(struct mlx5e_priv *priv,
+						struct mlx5e_sw_stats *s)
+{
+	struct mlx5e_sq_stats **stats;
+	u16 max_qos_sqs;
+	int i;
+
+	/* Pairs with smp_store_release in mlx5e_open_qos_sq. */
+	max_qos_sqs = smp_load_acquire(&priv->htb.max_qos_sqs);
+	stats = READ_ONCE(priv->htb.qos_sq_stats);
+
+	for (i = 0; i < max_qos_sqs; i++) {
+		mlx5e_stats_grp_sw_update_stats_sq(s, READ_ONCE(stats[i]));
+
+		/* https://gcc.gnu.org/bugzilla/show_bug.cgi?id=92657 */
+		barrier();
+	}
+}
+
 static MLX5E_DECLARE_STATS_GRP_OP_UPDATE_STATS(sw)
 {
 	struct mlx5e_sw_stats *s = &priv->stats.sw;
@@ -449,6 +468,7 @@ static MLX5E_DECLARE_STATS_GRP_OP_UPDATE_STATS(sw)
 		}
 	}
 	mlx5e_stats_grp_sw_update_stats_ptp(priv, s);
+	mlx5e_stats_grp_sw_update_stats_qos(priv, s);
 }
 
 static const struct counter_desc q_stats_desc[] = {
@@ -1740,6 +1760,41 @@ static const struct counter_desc ptp_cq_stats_desc[] = {
 	{ MLX5E_DECLARE_PTP_CQ_STAT(struct mlx5e_ptp_cq_stats, abort_abs_diff_ns) },
 };
 
+static const struct counter_desc qos_sq_stats_desc[] = {
+	{ MLX5E_DECLARE_QOS_TX_STAT(struct mlx5e_sq_stats, packets) },
+	{ MLX5E_DECLARE_QOS_TX_STAT(struct mlx5e_sq_stats, bytes) },
+	{ MLX5E_DECLARE_QOS_TX_STAT(struct mlx5e_sq_stats, tso_packets) },
+	{ MLX5E_DECLARE_QOS_TX_STAT(struct mlx5e_sq_stats, tso_bytes) },
+	{ MLX5E_DECLARE_QOS_TX_STAT(struct mlx5e_sq_stats, tso_inner_packets) },
+	{ MLX5E_DECLARE_QOS_TX_STAT(struct mlx5e_sq_stats, tso_inner_bytes) },
+	{ MLX5E_DECLARE_QOS_TX_STAT(struct mlx5e_sq_stats, csum_partial) },
+	{ MLX5E_DECLARE_QOS_TX_STAT(struct mlx5e_sq_stats, csum_partial_inner) },
+	{ MLX5E_DECLARE_QOS_TX_STAT(struct mlx5e_sq_stats, added_vlan_packets) },
+	{ MLX5E_DECLARE_QOS_TX_STAT(struct mlx5e_sq_stats, nop) },
+	{ MLX5E_DECLARE_QOS_TX_STAT(struct mlx5e_sq_stats, mpwqe_blks) },
+	{ MLX5E_DECLARE_QOS_TX_STAT(struct mlx5e_sq_stats, mpwqe_pkts) },
+#ifdef CONFIG_MLX5_EN_TLS
+	{ MLX5E_DECLARE_QOS_TX_STAT(struct mlx5e_sq_stats, tls_encrypted_packets) },
+	{ MLX5E_DECLARE_QOS_TX_STAT(struct mlx5e_sq_stats, tls_encrypted_bytes) },
+	{ MLX5E_DECLARE_QOS_TX_STAT(struct mlx5e_sq_stats, tls_ctx) },
+	{ MLX5E_DECLARE_QOS_TX_STAT(struct mlx5e_sq_stats, tls_ooo) },
+	{ MLX5E_DECLARE_QOS_TX_STAT(struct mlx5e_sq_stats, tls_dump_packets) },
+	{ MLX5E_DECLARE_QOS_TX_STAT(struct mlx5e_sq_stats, tls_dump_bytes) },
+	{ MLX5E_DECLARE_QOS_TX_STAT(struct mlx5e_sq_stats, tls_resync_bytes) },
+	{ MLX5E_DECLARE_QOS_TX_STAT(struct mlx5e_sq_stats, tls_skip_no_sync_data) },
+	{ MLX5E_DECLARE_QOS_TX_STAT(struct mlx5e_sq_stats, tls_drop_no_sync_data) },
+	{ MLX5E_DECLARE_QOS_TX_STAT(struct mlx5e_sq_stats, tls_drop_bypass_req) },
+#endif
+	{ MLX5E_DECLARE_QOS_TX_STAT(struct mlx5e_sq_stats, csum_none) },
+	{ MLX5E_DECLARE_QOS_TX_STAT(struct mlx5e_sq_stats, stopped) },
+	{ MLX5E_DECLARE_QOS_TX_STAT(struct mlx5e_sq_stats, dropped) },
+	{ MLX5E_DECLARE_QOS_TX_STAT(struct mlx5e_sq_stats, xmit_more) },
+	{ MLX5E_DECLARE_QOS_TX_STAT(struct mlx5e_sq_stats, recover) },
+	{ MLX5E_DECLARE_QOS_TX_STAT(struct mlx5e_sq_stats, cqes) },
+	{ MLX5E_DECLARE_QOS_TX_STAT(struct mlx5e_sq_stats, wake) },
+	{ MLX5E_DECLARE_QOS_TX_STAT(struct mlx5e_sq_stats, cqe_err) },
+};
+
 #define NUM_RQ_STATS			ARRAY_SIZE(rq_stats_desc)
 #define NUM_SQ_STATS			ARRAY_SIZE(sq_stats_desc)
 #define NUM_XDPSQ_STATS			ARRAY_SIZE(xdpsq_stats_desc)
@@ -1750,6 +1805,49 @@ static const struct counter_desc ptp_cq_stats_desc[] = {
 #define NUM_PTP_SQ_STATS		ARRAY_SIZE(ptp_sq_stats_desc)
 #define NUM_PTP_CH_STATS		ARRAY_SIZE(ptp_ch_stats_desc)
 #define NUM_PTP_CQ_STATS		ARRAY_SIZE(ptp_cq_stats_desc)
+#define NUM_QOS_SQ_STATS		ARRAY_SIZE(qos_sq_stats_desc)
+
+static MLX5E_DECLARE_STATS_GRP_OP_NUM_STATS(qos)
+{
+	/* Pairs with smp_store_release in mlx5e_open_qos_sq. */
+	return NUM_QOS_SQ_STATS * smp_load_acquire(&priv->htb.max_qos_sqs);
+}
+
+static MLX5E_DECLARE_STATS_GRP_OP_FILL_STRS(qos)
+{
+	/* Pairs with smp_store_release in mlx5e_open_qos_sq. */
+	u16 max_qos_sqs = smp_load_acquire(&priv->htb.max_qos_sqs);
+	int i, qid;
+
+	for (qid = 0; qid < max_qos_sqs; qid++)
+		for (i = 0; i < NUM_QOS_SQ_STATS; i++)
+			sprintf(data + (idx++) * ETH_GSTRING_LEN,
+				qos_sq_stats_desc[i].format, qid);
+
+	return idx;
+}
+
+static MLX5E_DECLARE_STATS_GRP_OP_FILL_STATS(qos)
+{
+	struct mlx5e_sq_stats **stats;
+	u16 max_qos_sqs;
+	int i, qid;
+
+	/* Pairs with smp_store_release in mlx5e_open_qos_sq. */
+	max_qos_sqs = smp_load_acquire(&priv->htb.max_qos_sqs);
+	stats = READ_ONCE(priv->htb.qos_sq_stats);
+
+	for (qid = 0; qid < max_qos_sqs; qid++) {
+		struct mlx5e_sq_stats *s = READ_ONCE(stats[qid]);
+
+		for (i = 0; i < NUM_QOS_SQ_STATS; i++)
+			data[idx++] = MLX5E_READ_CTR64_CPU(s, qos_sq_stats_desc, i);
+	}
+
+	return idx;
+}
+
+static MLX5E_DECLARE_STATS_GRP_OP_UPDATE_STATS(qos) { return; }
 
 static MLX5E_DECLARE_STATS_GRP_OP_NUM_STATS(ptp)
 {
@@ -1932,6 +2030,7 @@ MLX5E_DEFINE_STATS_GRP(per_port_buff_congest, 0);
 MLX5E_DEFINE_STATS_GRP(eth_ext, 0);
 static MLX5E_DEFINE_STATS_GRP(tls, 0);
 static MLX5E_DEFINE_STATS_GRP(ptp, 0);
+static MLX5E_DEFINE_STATS_GRP(qos, 0);
 
 /* The stats groups order is opposite to the update_stats() order calls */
 mlx5e_stats_grp_t mlx5e_nic_stats_grps[] = {
@@ -1955,6 +2054,7 @@ mlx5e_stats_grp_t mlx5e_nic_stats_grps[] = {
 	&MLX5E_STATS_GRP(channels),
 	&MLX5E_STATS_GRP(per_port_buff_congest),
 	&MLX5E_STATS_GRP(ptp),
+	&MLX5E_STATS_GRP(qos),
 };
 
 unsigned int mlx5e_nic_stats_grps_num(struct mlx5e_priv *priv)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h
index e41fc11f2ce7..93c41312fb03 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h
@@ -55,6 +55,8 @@
 #define MLX5E_DECLARE_PTP_CH_STAT(type, fld) "ptp_ch_"#fld, offsetof(type, fld)
 #define MLX5E_DECLARE_PTP_CQ_STAT(type, fld) "ptp_cq%d_"#fld, offsetof(type, fld)
 
+#define MLX5E_DECLARE_QOS_TX_STAT(type, fld) "qos_tx%d_"#fld, offsetof(type, fld)
+
 struct counter_desc {
 	char		format[ETH_GSTRING_LEN];
 	size_t		offset; /* Byte offset */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c
index 74f233eece54..da6a358a8a10 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c
@@ -106,28 +106,53 @@ return_txq:
 	return priv->port_ptp_tc2realtxq[up];
 }
 
+static int mlx5e_select_htb_queue(struct mlx5e_priv *priv, struct sk_buff *skb,
+				  u16 htb_maj_id)
+{
+	u16 classid;
+
+	if ((TC_H_MAJ(skb->priority) >> 16) == htb_maj_id)
+		classid = TC_H_MIN(skb->priority);
+	else
+		classid = READ_ONCE(priv->htb.defcls);
+
+	if (!classid)
+		return 0;
+
+	return mlx5e_get_txq_by_classid(priv, classid);
+}
+
 u16 mlx5e_select_queue(struct net_device *dev, struct sk_buff *skb,
 		       struct net_device *sb_dev)
 {
 	struct mlx5e_priv *priv = netdev_priv(dev);
+	int num_tc_x_num_ch;
 	int txq_ix;
 	int up = 0;
 	int ch_ix;
 
-	if (unlikely(priv->channels.port_ptp)) {
-		int num_tc_x_num_ch;
+	/* Sync with mlx5e_update_num_tc_x_num_ch - avoid refetching. */
+	num_tc_x_num_ch = READ_ONCE(priv->num_tc_x_num_ch);
+	if (unlikely(dev->real_num_tx_queues > num_tc_x_num_ch)) {
+		/* Order maj_id before defcls - pairs with mlx5e_htb_root_add. */
+		u16 htb_maj_id = smp_load_acquire(&priv->htb.maj_id);
 
-		if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP) &&
-		    mlx5e_use_ptpsq(skb))
-			return mlx5e_select_ptpsq(dev, skb);
+		if (unlikely(htb_maj_id)) {
+			txq_ix = mlx5e_select_htb_queue(priv, skb, htb_maj_id);
+			if (txq_ix > 0)
+				return txq_ix;
+		}
 
-		/* Sync with mlx5e_update_num_tc_x_num_ch - avoid refetching. */
-		num_tc_x_num_ch = READ_ONCE(priv->num_tc_x_num_ch);
+		if (unlikely(priv->channels.port_ptp))
+			if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP) &&
+			    mlx5e_use_ptpsq(skb))
+				return mlx5e_select_ptpsq(dev, skb);
 
 		txq_ix = netdev_pick_tx(dev, skb, NULL);
-		/* Fix netdev_pick_tx() not to choose ptp_channel txqs.
+		/* Fix netdev_pick_tx() not to choose ptp_channel and HTB txqs.
 		 * If they are selected, switch to regular queues.
-		 * Driver to select these queues only at mlx5e_select_ptpsq().
+		 * Driver to select these queues only at mlx5e_select_ptpsq()
+		 * and mlx5e_select_htb_queue().
 		 */
 		if (unlikely(txq_ix >= num_tc_x_num_ch))
 			txq_ix %= num_tc_x_num_ch;
@@ -702,6 +727,10 @@ netdev_tx_t mlx5e_xmit(struct sk_buff *skb, struct net_device *dev)
 	u16 pi;
 
 	sq = priv->txq2sq[skb_get_queue_mapping(skb)];
+	if (unlikely(!sq)) {
+		dev_kfree_skb_any(skb);
+		return NETDEV_TX_OK;
+	}
 
 	/* May send SKBs and WQEs. */
 	if (unlikely(!mlx5e_accel_tx_begin(dev, sq, skb, &accel)))
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c
index a3cfe06d5116..d54da3797c30 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c
@@ -115,17 +115,21 @@ int mlx5e_napi_poll(struct napi_struct *napi, int budget)
 					       napi);
 	struct mlx5e_ch_stats *ch_stats = c->stats;
 	struct mlx5e_xdpsq *xsksq = &c->xsksq;
+	struct mlx5e_txqsq __rcu **qos_sqs;
 	struct mlx5e_rq *xskrq = &c->xskrq;
 	struct mlx5e_rq *rq = &c->rq;
 	bool aff_change = false;
 	bool busy_xsk = false;
 	bool busy = false;
 	int work_done = 0;
+	u16 qos_sqs_size;
 	bool xsk_open;
 	int i;
 
 	rcu_read_lock();
 
+	qos_sqs = rcu_dereference(c->qos_sqs);
+
 	xsk_open = test_bit(MLX5E_CHANNEL_STATE_XSK, c->state);
 
 	ch_stats->poll++;
@@ -133,6 +137,18 @@ int mlx5e_napi_poll(struct napi_struct *napi, int budget)
 	for (i = 0; i < c->num_tc; i++)
 		busy |= mlx5e_poll_tx_cq(&c->sq[i].cq, budget);
 
+	if (unlikely(qos_sqs)) {
+		smp_rmb(); /* Pairs with mlx5e_qos_alloc_queues. */
+		qos_sqs_size = READ_ONCE(c->qos_sqs_size);
+
+		for (i = 0; i < qos_sqs_size; i++) {
+			struct mlx5e_txqsq *sq = rcu_dereference(qos_sqs[i]);
+
+			if (sq)
+				busy |= mlx5e_poll_tx_cq(&sq->cq, budget);
+		}
+	}
+
 	busy |= mlx5e_poll_xdpsq_cq(&c->xdpsq.cq);
 
 	if (c->xdp)
@@ -186,6 +202,16 @@ int mlx5e_napi_poll(struct napi_struct *napi, int budget)
 		mlx5e_handle_tx_dim(&c->sq[i]);
 		mlx5e_cq_arm(&c->sq[i].cq);
 	}
+	if (unlikely(qos_sqs)) {
+		for (i = 0; i < qos_sqs_size; i++) {
+			struct mlx5e_txqsq *sq = rcu_dereference(qos_sqs[i]);
+
+			if (sq) {
+				mlx5e_handle_tx_dim(sq);
+				mlx5e_cq_arm(&sq->cq);
+			}
+		}
+	}
 
 	mlx5e_handle_rx_dim(rq);
 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/qos.c b/drivers/net/ethernet/mellanox/mlx5/core/qos.c
new file mode 100644
index 000000000000..0777be24a307
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/qos.c
@@ -0,0 +1,85 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/* Copyright (c) 2020, Mellanox Technologies inc. All rights reserved. */
+
+#include "qos.h"
+
+#define MLX5_QOS_DEFAULT_DWRR_UID 0
+
+bool mlx5_qos_is_supported(struct mlx5_core_dev *mdev)
+{
+	if (!MLX5_CAP_GEN(mdev, qos))
+		return false;
+	if (!MLX5_CAP_QOS(mdev, nic_sq_scheduling))
+		return false;
+	if (!MLX5_CAP_QOS(mdev, nic_bw_share))
+		return false;
+	if (!MLX5_CAP_QOS(mdev, nic_rate_limit))
+		return false;
+	return true;
+}
+
+int mlx5_qos_max_leaf_nodes(struct mlx5_core_dev *mdev)
+{
+	return 1 << MLX5_CAP_QOS(mdev, log_max_qos_nic_queue_group);
+}
+
+int mlx5_qos_create_leaf_node(struct mlx5_core_dev *mdev, u32 parent_id,
+			      u32 bw_share, u32 max_avg_bw, u32 *id)
+{
+	u32 sched_ctx[MLX5_ST_SZ_DW(scheduling_context)] = {0};
+
+	MLX5_SET(scheduling_context, sched_ctx, parent_element_id, parent_id);
+	MLX5_SET(scheduling_context, sched_ctx, element_type,
+		 SCHEDULING_CONTEXT_ELEMENT_TYPE_QUEUE_GROUP);
+	MLX5_SET(scheduling_context, sched_ctx, bw_share, bw_share);
+	MLX5_SET(scheduling_context, sched_ctx, max_average_bw, max_avg_bw);
+
+	return mlx5_create_scheduling_element_cmd(mdev, SCHEDULING_HIERARCHY_NIC,
+						  sched_ctx, id);
+}
+
+int mlx5_qos_create_inner_node(struct mlx5_core_dev *mdev, u32 parent_id,
+			       u32 bw_share, u32 max_avg_bw, u32 *id)
+{
+	u32 sched_ctx[MLX5_ST_SZ_DW(scheduling_context)] = {0};
+	void *attr;
+
+	MLX5_SET(scheduling_context, sched_ctx, parent_element_id, parent_id);
+	MLX5_SET(scheduling_context, sched_ctx, element_type,
+		 SCHEDULING_CONTEXT_ELEMENT_TYPE_TSAR);
+	MLX5_SET(scheduling_context, sched_ctx, bw_share, bw_share);
+	MLX5_SET(scheduling_context, sched_ctx, max_average_bw, max_avg_bw);
+
+	attr = MLX5_ADDR_OF(scheduling_context, sched_ctx, element_attributes);
+	MLX5_SET(tsar_element, attr, tsar_type, TSAR_ELEMENT_TSAR_TYPE_DWRR);
+
+	return mlx5_create_scheduling_element_cmd(mdev, SCHEDULING_HIERARCHY_NIC,
+						  sched_ctx, id);
+}
+
+int mlx5_qos_create_root_node(struct mlx5_core_dev *mdev, u32 *id)
+{
+	return mlx5_qos_create_inner_node(mdev, MLX5_QOS_DEFAULT_DWRR_UID, 0, 0, id);
+}
+
+int mlx5_qos_update_node(struct mlx5_core_dev *mdev, u32 parent_id,
+			 u32 bw_share, u32 max_avg_bw, u32 id)
+{
+	u32 sched_ctx[MLX5_ST_SZ_DW(scheduling_context)] = {0};
+	u32 bitmask = 0;
+
+	MLX5_SET(scheduling_context, sched_ctx, parent_element_id, parent_id);
+	MLX5_SET(scheduling_context, sched_ctx, bw_share, bw_share);
+	MLX5_SET(scheduling_context, sched_ctx, max_average_bw, max_avg_bw);
+
+	bitmask |= MODIFY_SCHEDULING_ELEMENT_IN_MODIFY_BITMASK_BW_SHARE;
+	bitmask |= MODIFY_SCHEDULING_ELEMENT_IN_MODIFY_BITMASK_MAX_AVERAGE_BW;
+
+	return mlx5_modify_scheduling_element_cmd(mdev, SCHEDULING_HIERARCHY_NIC,
+						  sched_ctx, id, bitmask);
+}
+
+int mlx5_qos_destroy_node(struct mlx5_core_dev *mdev, u32 id)
+{
+	return mlx5_destroy_scheduling_element_cmd(mdev, SCHEDULING_HIERARCHY_NIC, id);
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/qos.h b/drivers/net/ethernet/mellanox/mlx5/core/qos.h
new file mode 100644
index 000000000000..125e4e47e6f7
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/qos.h
@@ -0,0 +1,30 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2020, Mellanox Technologies inc. All rights reserved. */
+
+#ifndef __MLX5_QOS_H
+#define __MLX5_QOS_H
+
+#include "mlx5_core.h"
+
+#define MLX5_DEBUG_QOS_MASK BIT(4)
+
+#define qos_err(mdev, fmt, ...) \
+	mlx5_core_err(mdev, "QoS: " fmt, ##__VA_ARGS__)
+#define qos_warn(mdev, fmt, ...) \
+	mlx5_core_warn(mdev, "QoS: " fmt, ##__VA_ARGS__)
+#define qos_dbg(mdev, fmt, ...) \
+	mlx5_core_dbg_mask(mdev, MLX5_DEBUG_QOS_MASK, "QoS: " fmt, ##__VA_ARGS__)
+
+bool mlx5_qos_is_supported(struct mlx5_core_dev *mdev);
+int mlx5_qos_max_leaf_nodes(struct mlx5_core_dev *mdev);
+
+int mlx5_qos_create_leaf_node(struct mlx5_core_dev *mdev, u32 parent_id,
+			      u32 bw_share, u32 max_avg_bw, u32 *id);
+int mlx5_qos_create_inner_node(struct mlx5_core_dev *mdev, u32 parent_id,
+			       u32 bw_share, u32 max_avg_bw, u32 *id);
+int mlx5_qos_create_root_node(struct mlx5_core_dev *mdev, u32 *id);
+int mlx5_qos_update_node(struct mlx5_core_dev *mdev, u32 parent_id, u32 bw_share,
+			 u32 max_avg_bw, u32 id);
+int mlx5_qos_destroy_node(struct mlx5_core_dev *mdev, u32 id);
+
+#endif
diff --git a/drivers/net/ethernet/mellanox/mlxsw/core.c b/drivers/net/ethernet/mellanox/mlxsw/core.c
index 685037e052af..52fdc34251ba 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/core.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/core.c
@@ -84,6 +84,7 @@ struct mlxsw_core {
 	struct mlxsw_thermal *thermal;
 	struct mlxsw_core_port *ports;
 	unsigned int max_ports;
+	atomic_t active_ports_count;
 	bool fw_flash_in_progress;
 	struct {
 		struct devlink_health_reporter *fw_fatal;
@@ -96,8 +97,36 @@ struct mlxsw_core {
 
 #define MLXSW_PORT_MAX_PORTS_DEFAULT	0x40
 
-static int mlxsw_ports_init(struct mlxsw_core *mlxsw_core)
+static u64 mlxsw_ports_occ_get(void *priv)
 {
+	struct mlxsw_core *mlxsw_core = priv;
+
+	return atomic_read(&mlxsw_core->active_ports_count);
+}
+
+static int mlxsw_core_resources_ports_register(struct mlxsw_core *mlxsw_core)
+{
+	struct devlink *devlink = priv_to_devlink(mlxsw_core);
+	struct devlink_resource_size_params ports_num_params;
+	u32 max_ports;
+
+	max_ports = mlxsw_core->max_ports - 1;
+	devlink_resource_size_params_init(&ports_num_params, max_ports,
+					  max_ports, 1,
+					  DEVLINK_RESOURCE_UNIT_ENTRY);
+
+	return devlink_resource_register(devlink,
+					 DEVLINK_RESOURCE_GENERIC_NAME_PORTS,
+					 max_ports, MLXSW_CORE_RESOURCE_PORTS,
+					 DEVLINK_RESOURCE_ID_PARENT_TOP,
+					 &ports_num_params);
+}
+
+static int mlxsw_ports_init(struct mlxsw_core *mlxsw_core, bool reload)
+{
+	struct devlink *devlink = priv_to_devlink(mlxsw_core);
+	int err;
+
 	/* Switch ports are numbered from 1 to queried value */
 	if (MLXSW_CORE_RES_VALID(mlxsw_core, MAX_SYSTEM_PORT))
 		mlxsw_core->max_ports = MLXSW_CORE_RES_GET(mlxsw_core,
@@ -110,11 +139,30 @@ static int mlxsw_ports_init(struct mlxsw_core *mlxsw_core)
 	if (!mlxsw_core->ports)
 		return -ENOMEM;
 
+	if (!reload) {
+		err = mlxsw_core_resources_ports_register(mlxsw_core);
+		if (err)
+			goto err_resources_ports_register;
+	}
+	atomic_set(&mlxsw_core->active_ports_count, 0);
+	devlink_resource_occ_get_register(devlink, MLXSW_CORE_RESOURCE_PORTS,
+					  mlxsw_ports_occ_get, mlxsw_core);
+
 	return 0;
+
+err_resources_ports_register:
+	kfree(mlxsw_core->ports);
+	return err;
 }
 
-static void mlxsw_ports_fini(struct mlxsw_core *mlxsw_core)
+static void mlxsw_ports_fini(struct mlxsw_core *mlxsw_core, bool reload)
 {
+	struct devlink *devlink = priv_to_devlink(mlxsw_core);
+
+	devlink_resource_occ_get_unregister(devlink, MLXSW_CORE_RESOURCE_PORTS);
+	if (!reload)
+		devlink_resources_unregister(priv_to_devlink(mlxsw_core), NULL);
+
 	kfree(mlxsw_core->ports);
 }
 
@@ -1897,7 +1945,7 @@ __mlxsw_core_bus_device_register(const struct mlxsw_bus_info *mlxsw_bus_info,
 			goto err_register_resources;
 	}
 
-	err = mlxsw_ports_init(mlxsw_core);
+	err = mlxsw_ports_init(mlxsw_core, reload);
 	if (err)
 		goto err_ports_init;
 
@@ -1986,7 +2034,7 @@ err_devlink_register:
 err_emad_init:
 	kfree(mlxsw_core->lag.mapping);
 err_alloc_lag_mapping:
-	mlxsw_ports_fini(mlxsw_core);
+	mlxsw_ports_fini(mlxsw_core, reload);
 err_ports_init:
 	if (!reload)
 		devlink_resources_unregister(devlink, NULL);
@@ -2056,7 +2104,7 @@ void mlxsw_core_bus_device_unregister(struct mlxsw_core *mlxsw_core,
 		devlink_unregister(devlink);
 	mlxsw_emad_fini(mlxsw_core);
 	kfree(mlxsw_core->lag.mapping);
-	mlxsw_ports_fini(mlxsw_core);
+	mlxsw_ports_fini(mlxsw_core, reload);
 	if (!reload)
 		devlink_resources_unregister(devlink, NULL);
 	mlxsw_core->bus->fini(mlxsw_core->bus_priv);
@@ -2755,16 +2803,25 @@ int mlxsw_core_port_init(struct mlxsw_core *mlxsw_core, u8 local_port,
 			 const unsigned char *switch_id,
 			 unsigned char switch_id_len)
 {
-	return __mlxsw_core_port_init(mlxsw_core, local_port,
-				      DEVLINK_PORT_FLAVOUR_PHYSICAL,
-				      port_number, split, split_port_subnumber,
-				      splittable, lanes,
-				      switch_id, switch_id_len);
+	int err;
+
+	err = __mlxsw_core_port_init(mlxsw_core, local_port,
+				     DEVLINK_PORT_FLAVOUR_PHYSICAL,
+				     port_number, split, split_port_subnumber,
+				     splittable, lanes,
+				     switch_id, switch_id_len);
+	if (err)
+		return err;
+
+	atomic_inc(&mlxsw_core->active_ports_count);
+	return 0;
 }
 EXPORT_SYMBOL(mlxsw_core_port_init);
 
 void mlxsw_core_port_fini(struct mlxsw_core *mlxsw_core, u8 local_port)
 {
+	atomic_dec(&mlxsw_core->active_ports_count);
+
 	__mlxsw_core_port_fini(mlxsw_core, local_port);
 }
 EXPORT_SYMBOL(mlxsw_core_port_fini);
diff --git a/drivers/net/ethernet/mellanox/mlxsw/core.h b/drivers/net/ethernet/mellanox/mlxsw/core.h
index 6b3ccbf6b238..8af7d9d03475 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/core.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/core.h
@@ -19,6 +19,11 @@
 #include "cmd.h"
 #include "resources.h"
 
+enum mlxsw_core_resource_id {
+	MLXSW_CORE_RESOURCE_PORTS = 1,
+	MLXSW_CORE_RESOURCE_MAX,
+};
+
 struct mlxsw_core;
 struct mlxsw_core_port;
 struct mlxsw_driver;
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h
index a6956cfc9cb1..a3769f95a182 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h
@@ -52,7 +52,7 @@
 #define MLXSW_SP_RESOURCE_NAME_COUNTERS_RIF "rif"
 
 enum mlxsw_sp_resource_id {
-	MLXSW_SP_RESOURCE_KVD = 1,
+	MLXSW_SP_RESOURCE_KVD = MLXSW_CORE_RESOURCE_MAX,
 	MLXSW_SP_RESOURCE_KVD_LINEAR,
 	MLXSW_SP_RESOURCE_KVD_HASH_SINGLE,
 	MLXSW_SP_RESOURCE_KVD_HASH_DOUBLE,
diff --git a/drivers/net/ethernet/realtek/r8169_main.c b/drivers/net/ethernet/realtek/r8169_main.c
index fb67d8f797ec..475e6f01ea10 100644
--- a/drivers/net/ethernet/realtek/r8169_main.c
+++ b/drivers/net/ethernet/realtek/r8169_main.c
@@ -4850,10 +4850,8 @@ static void rtl_shutdown(struct pci_dev *pdev)
 	rtl_rar_set(tp, tp->dev->perm_addr);
 
 	if (system_state == SYSTEM_POWER_OFF) {
-		if (tp->saved_wolopts) {
-			rtl_wol_enable_rx(tp);
+		if (tp->saved_wolopts)
 			rtl_wol_shutdown_quirk(tp);
-		}
 
 		pci_wake_from_d3(pdev, tp->saved_wolopts);
 		pci_set_power_state(pdev, PCI_D3hot);
diff --git a/drivers/net/ethernet/rocker/rocker_ofdpa.c b/drivers/net/ethernet/rocker/rocker_ofdpa.c
index d067da1ef070..967a634ee9ac 100644
--- a/drivers/net/ethernet/rocker/rocker_ofdpa.c
+++ b/drivers/net/ethernet/rocker/rocker_ofdpa.c
@@ -923,7 +923,7 @@ static int ofdpa_flow_tbl_bridge(struct ofdpa_port *ofdpa_port,
 	struct ofdpa_flow_tbl_entry *entry;
 	u32 priority;
 	bool vlan_bridging = !!vlan_id;
-	bool dflt = !eth_dst || (eth_dst && eth_dst_mask);
+	bool dflt = !eth_dst || eth_dst_mask;
 	bool wild = false;
 
 	entry = kzalloc(sizeof(*entry), GFP_ATOMIC);
diff --git a/drivers/net/ethernet/sfc/efx_channels.c b/drivers/net/ethernet/sfc/efx_channels.c
index a4a626e9cd9a..1bfeee283ea9 100644
--- a/drivers/net/ethernet/sfc/efx_channels.c
+++ b/drivers/net/ethernet/sfc/efx_channels.c
@@ -17,6 +17,7 @@
 #include "rx_common.h"
 #include "nic.h"
 #include "sriov.h"
+#include "workarounds.h"
 
 /* This is the first interrupt mode to try out of:
  * 0 => MSI-X
@@ -137,6 +138,7 @@ static int efx_allocate_msix_channels(struct efx_nic *efx,
 {
 	unsigned int n_channels = parallelism;
 	int vec_count;
+	int tx_per_ev;
 	int n_xdp_tx;
 	int n_xdp_ev;
 
@@ -149,9 +151,9 @@ static int efx_allocate_msix_channels(struct efx_nic *efx,
 	 * multiple tx queues, assuming tx and ev queues are both
 	 * maximum size.
 	 */
-
+	tx_per_ev = EFX_MAX_EVQ_SIZE / EFX_TXQ_MAX_ENT(efx);
 	n_xdp_tx = num_possible_cpus();
-	n_xdp_ev = DIV_ROUND_UP(n_xdp_tx, EFX_MAX_TXQ_PER_CHANNEL);
+	n_xdp_ev = DIV_ROUND_UP(n_xdp_tx, tx_per_ev);
 
 	vec_count = pci_msix_vec_count(efx->pci_dev);
 	if (vec_count < 0)
diff --git a/drivers/net/ipa/gsi.c b/drivers/net/ipa/gsi.c
index 5b29f7d9d6ac..f79cf3c327c1 100644
--- a/drivers/net/ipa/gsi.c
+++ b/drivers/net/ipa/gsi.c
@@ -272,7 +272,7 @@ static void gsi_irq_ch_ctrl_disable(struct gsi *gsi)
 	iowrite32(0, gsi->virt + GSI_CNTXT_SRC_CH_IRQ_MSK_OFFSET);
 }
 
-static void gsi_irq_ieob_enable(struct gsi *gsi, u32 evt_ring_id)
+static void gsi_irq_ieob_enable_one(struct gsi *gsi, u32 evt_ring_id)
 {
 	bool enable_ieob = !gsi->ieob_enabled_bitmap;
 	u32 val;
@@ -286,11 +286,11 @@ static void gsi_irq_ieob_enable(struct gsi *gsi, u32 evt_ring_id)
 		gsi_irq_type_enable(gsi, GSI_IEOB);
 }
 
-static void gsi_irq_ieob_disable(struct gsi *gsi, u32 evt_ring_id)
+static void gsi_irq_ieob_disable(struct gsi *gsi, u32 event_mask)
 {
 	u32 val;
 
-	gsi->ieob_enabled_bitmap &= ~BIT(evt_ring_id);
+	gsi->ieob_enabled_bitmap &= ~event_mask;
 
 	/* Disable the interrupt type if this was the last enabled channel */
 	if (!gsi->ieob_enabled_bitmap)
@@ -300,6 +300,11 @@ static void gsi_irq_ieob_disable(struct gsi *gsi, u32 evt_ring_id)
 	iowrite32(val, gsi->virt + GSI_CNTXT_SRC_IEOB_IRQ_MSK_OFFSET);
 }
 
+static void gsi_irq_ieob_disable_one(struct gsi *gsi, u32 evt_ring_id)
+{
+	gsi_irq_ieob_disable(gsi, BIT(evt_ring_id));
+}
+
 /* Enable all GSI_interrupt types */
 static void gsi_irq_enable(struct gsi *gsi)
 {
@@ -766,13 +771,13 @@ static void gsi_channel_freeze(struct gsi_channel *channel)
 
 	napi_disable(&channel->napi);
 
-	gsi_irq_ieob_disable(channel->gsi, channel->evt_ring_id);
+	gsi_irq_ieob_disable_one(channel->gsi, channel->evt_ring_id);
 }
 
 /* Allow transactions to be used on the channel again. */
 static void gsi_channel_thaw(struct gsi_channel *channel)
 {
-	gsi_irq_ieob_enable(channel->gsi, channel->evt_ring_id);
+	gsi_irq_ieob_enable_one(channel->gsi, channel->evt_ring_id);
 
 	napi_enable(&channel->napi);
 }
@@ -1200,6 +1205,7 @@ static void gsi_isr_ieob(struct gsi *gsi)
 	u32 event_mask;
 
 	event_mask = ioread32(gsi->virt + GSI_CNTXT_SRC_IEOB_IRQ_OFFSET);
+	gsi_irq_ieob_disable(gsi, event_mask);
 	iowrite32(event_mask, gsi->virt + GSI_CNTXT_SRC_IEOB_IRQ_CLR_OFFSET);
 
 	while (event_mask) {
@@ -1207,7 +1213,6 @@ static void gsi_isr_ieob(struct gsi *gsi)
 
 		event_mask ^= BIT(evt_ring_id);
 
-		gsi_irq_ieob_disable(gsi, evt_ring_id);
 		napi_schedule(&gsi->evt_ring[evt_ring_id].channel->napi);
 	}
 }
@@ -1452,7 +1457,7 @@ void gsi_channel_doorbell(struct gsi_channel *channel)
 }
 
 /* Consult hardware, move any newly completed transactions to completed list */
-static void gsi_channel_update(struct gsi_channel *channel)
+static struct gsi_trans *gsi_channel_update(struct gsi_channel *channel)
 {
 	u32 evt_ring_id = channel->evt_ring_id;
 	struct gsi *gsi = channel->gsi;
@@ -1471,7 +1476,7 @@ static void gsi_channel_update(struct gsi_channel *channel)
 	offset = GSI_EV_CH_E_CNTXT_4_OFFSET(evt_ring_id);
 	index = gsi_ring_index(ring, ioread32(gsi->virt + offset));
 	if (index == ring->index % ring->count)
-		return;
+		return NULL;
 
 	/* Get the transaction for the latest completed event.  Take a
 	 * reference to keep it from completing before we give the events
@@ -1496,6 +1501,8 @@ static void gsi_channel_update(struct gsi_channel *channel)
 	gsi_evt_ring_doorbell(channel->gsi, channel->evt_ring_id, index);
 
 	gsi_trans_free(trans);
+
+	return gsi_channel_trans_complete(channel);
 }
 
 /**
@@ -1516,11 +1523,8 @@ static struct gsi_trans *gsi_channel_poll_one(struct gsi_channel *channel)
 
 	/* Get the first transaction from the completed list */
 	trans = gsi_channel_trans_complete(channel);
-	if (!trans) {
-		/* List is empty; see if there's more to do */
-		gsi_channel_update(channel);
-		trans = gsi_channel_trans_complete(channel);
-	}
+	if (!trans)	/* List is empty; see if there's more to do */
+		trans = gsi_channel_update(channel);
 
 	if (trans)
 		gsi_trans_move_polled(trans);
@@ -1543,23 +1547,20 @@ static struct gsi_trans *gsi_channel_poll_one(struct gsi_channel *channel)
 static int gsi_channel_poll(struct napi_struct *napi, int budget)
 {
 	struct gsi_channel *channel;
-	int count = 0;
+	int count;
 
 	channel = container_of(napi, struct gsi_channel, napi);
-	while (count < budget) {
+	for (count = 0; count < budget; count++) {
 		struct gsi_trans *trans;
 
-		count++;
 		trans = gsi_channel_poll_one(channel);
 		if (!trans)
 			break;
 		gsi_trans_complete(trans);
 	}
 
-	if (count < budget) {
-		napi_complete(&channel->napi);
-		gsi_irq_ieob_enable(channel->gsi, channel->evt_ring_id);
-	}
+	if (count < budget && napi_complete(napi))
+		gsi_irq_ieob_enable_one(channel->gsi, channel->evt_ring_id);
 
 	return count;
 }
diff --git a/drivers/net/ipa/ipa.h b/drivers/net/ipa/ipa.h
index 6c2371084c55..c6c6a7f6909c 100644
--- a/drivers/net/ipa/ipa.h
+++ b/drivers/net/ipa/ipa.h
@@ -43,7 +43,6 @@ enum ipa_flag {
  * @flags:		Boolean state flags
  * @version:		IPA hardware version
  * @pdev:		Platform device
- * @modem_rproc:	Remoteproc handle for modem subsystem
  * @smp2p:		SMP2P information
  * @clock:		IPA clocking information
  * @table_addr:		DMA address of filter/route table content
@@ -83,7 +82,6 @@ struct ipa {
 	DECLARE_BITMAP(flags, IPA_FLAG_COUNT);
 	enum ipa_version version;
 	struct platform_device *pdev;
-	struct rproc *modem_rproc;
 	struct notifier_block nb;
 	void *notifier;
 	struct ipa_smp2p *smp2p;
diff --git a/drivers/net/ipa/ipa_main.c b/drivers/net/ipa/ipa_main.c
index 84bb8ae92725..ab0fd5cb4927 100644
--- a/drivers/net/ipa/ipa_main.c
+++ b/drivers/net/ipa/ipa_main.c
@@ -15,7 +15,6 @@
 #include <linux/of.h>
 #include <linux/of_device.h>
 #include <linux/of_address.h>
-#include <linux/remoteproc.h>
 #include <linux/qcom_scm.h>
 #include <linux/soc/qcom/mdt_loader.h>
 
@@ -729,19 +728,6 @@ static const struct of_device_id ipa_match[] = {
 };
 MODULE_DEVICE_TABLE(of, ipa_match);
 
-static phandle of_property_read_phandle(const struct device_node *np,
-					const char *name)
-{
-        struct property *prop;
-        int len = 0;
-
-        prop = of_find_property(np, name, &len);
-        if (!prop || len != sizeof(__be32))
-                return 0;
-
-        return be32_to_cpup(prop->value);
-}
-
 /* Check things that can be validated at build time.  This just
  * groups these things BUILD_BUG_ON() calls don't clutter the rest
  * of the code.
@@ -807,10 +793,8 @@ static int ipa_probe(struct platform_device *pdev)
 	struct device *dev = &pdev->dev;
 	const struct ipa_data *data;
 	struct ipa_clock *clock;
-	struct rproc *rproc;
 	bool modem_init;
 	struct ipa *ipa;
-	phandle ph;
 	int ret;
 
 	ipa_validate_build();
@@ -829,25 +813,12 @@ static int ipa_probe(struct platform_device *pdev)
 		if (!qcom_scm_is_available())
 			return -EPROBE_DEFER;
 
-	/* We rely on remoteproc to tell us about modem state changes */
-	ph = of_property_read_phandle(dev->of_node, "modem-remoteproc");
-	if (!ph) {
-		dev_err(dev, "DT missing \"modem-remoteproc\" property\n");
-		return -EINVAL;
-	}
-
-	rproc = rproc_get_by_phandle(ph);
-	if (!rproc)
-		return -EPROBE_DEFER;
-
 	/* The clock and interconnects might not be ready when we're
 	 * probed, so might return -EPROBE_DEFER.
 	 */
 	clock = ipa_clock_init(dev, data->clock_data);
-	if (IS_ERR(clock)) {
-		ret = PTR_ERR(clock);
-		goto err_rproc_put;
-	}
+	if (IS_ERR(clock))
+		return PTR_ERR(clock);
 
 	/* No more EPROBE_DEFER.  Allocate and initialize the IPA structure */
 	ipa = kzalloc(sizeof(*ipa), GFP_KERNEL);
@@ -858,7 +829,6 @@ static int ipa_probe(struct platform_device *pdev)
 
 	ipa->pdev = pdev;
 	dev_set_drvdata(dev, ipa);
-	ipa->modem_rproc = rproc;
 	ipa->clock = clock;
 	ipa->version = data->version;
 
@@ -935,8 +905,6 @@ err_kfree_ipa:
 	kfree(ipa);
 err_clock_exit:
 	ipa_clock_exit(clock);
-err_rproc_put:
-	rproc_put(rproc);
 
 	return ret;
 }
@@ -944,7 +912,6 @@ err_rproc_put:
 static int ipa_remove(struct platform_device *pdev)
 {
 	struct ipa *ipa = dev_get_drvdata(&pdev->dev);
-	struct rproc *rproc = ipa->modem_rproc;
 	struct ipa_clock *clock = ipa->clock;
 	int ret;
 
@@ -970,7 +937,6 @@ static int ipa_remove(struct platform_device *pdev)
 	ipa_reg_exit(ipa);
 	kfree(ipa);
 	ipa_clock_exit(clock);
-	rproc_put(rproc);
 
 	return 0;
 }
diff --git a/drivers/net/mhi_net.c b/drivers/net/mhi_net.c
index 5f3a4cc92a88..a5a214d29849 100644
--- a/drivers/net/mhi_net.c
+++ b/drivers/net/mhi_net.c
@@ -248,6 +248,10 @@ static void mhi_net_rx_refill_work(struct work_struct *work)
 		schedule_delayed_work(&mhi_netdev->rx_refill, HZ / 2);
 }
 
+static struct device_type wwan_type = {
+	.name = "wwan",
+};
+
 static int mhi_net_probe(struct mhi_device *mhi_dev,
 			 const struct mhi_device_id *id)
 {
@@ -267,6 +271,7 @@ static int mhi_net_probe(struct mhi_device *mhi_dev,
 	mhi_netdev->ndev = ndev;
 	mhi_netdev->mdev = mhi_dev;
 	SET_NETDEV_DEV(ndev, &mhi_dev->dev);
+	SET_NETDEV_DEVTYPE(ndev, &wwan_type);
 
 	/* All MHI net channels have 128 ring elements (at least for now) */
 	mhi_netdev->rx_queue_sz = 128;
diff --git a/drivers/net/phy/realtek.c b/drivers/net/phy/realtek.c
index 99ecd6c4c15a..821e85a97367 100644
--- a/drivers/net/phy/realtek.c
+++ b/drivers/net/phy/realtek.c
@@ -60,6 +60,9 @@
 #define RTL_LPADV_5000FULL			BIT(6)
 #define RTL_LPADV_2500FULL			BIT(5)
 
+#define RTL9000A_GINMR				0x14
+#define RTL9000A_GINMR_LINK_STATUS		BIT(4)
+
 #define RTLGEN_SPEED_MASK			0x0630
 
 #define RTL_GENERIC_PHYID			0x001cc800
@@ -655,6 +658,122 @@ static int rtlgen_resume(struct phy_device *phydev)
 	return ret;
 }
 
+static int rtl9000a_config_init(struct phy_device *phydev)
+{
+	phydev->autoneg = AUTONEG_DISABLE;
+	phydev->speed = SPEED_100;
+	phydev->duplex = DUPLEX_FULL;
+
+	return 0;
+}
+
+static int rtl9000a_config_aneg(struct phy_device *phydev)
+{
+	int ret;
+	u16 ctl = 0;
+
+	switch (phydev->master_slave_set) {
+	case MASTER_SLAVE_CFG_MASTER_FORCE:
+		ctl |= CTL1000_AS_MASTER;
+		break;
+	case MASTER_SLAVE_CFG_SLAVE_FORCE:
+		break;
+	case MASTER_SLAVE_CFG_UNKNOWN:
+	case MASTER_SLAVE_CFG_UNSUPPORTED:
+		return 0;
+	default:
+		phydev_warn(phydev, "Unsupported Master/Slave mode\n");
+		return -EOPNOTSUPP;
+	}
+
+	ret = phy_modify_changed(phydev, MII_CTRL1000, CTL1000_AS_MASTER, ctl);
+	if (ret == 1)
+		ret = genphy_soft_reset(phydev);
+
+	return ret;
+}
+
+static int rtl9000a_read_status(struct phy_device *phydev)
+{
+	int ret;
+
+	phydev->master_slave_get = MASTER_SLAVE_CFG_UNKNOWN;
+	phydev->master_slave_state = MASTER_SLAVE_STATE_UNKNOWN;
+
+	ret = genphy_update_link(phydev);
+	if (ret)
+		return ret;
+
+	ret = phy_read(phydev, MII_CTRL1000);
+	if (ret < 0)
+		return ret;
+	if (ret & CTL1000_AS_MASTER)
+		phydev->master_slave_get = MASTER_SLAVE_CFG_MASTER_FORCE;
+	else
+		phydev->master_slave_get = MASTER_SLAVE_CFG_SLAVE_FORCE;
+
+	ret = phy_read(phydev, MII_STAT1000);
+	if (ret < 0)
+		return ret;
+	if (ret & LPA_1000MSRES)
+		phydev->master_slave_state = MASTER_SLAVE_STATE_MASTER;
+	else
+		phydev->master_slave_state = MASTER_SLAVE_STATE_SLAVE;
+
+	return 0;
+}
+
+static int rtl9000a_ack_interrupt(struct phy_device *phydev)
+{
+	int err;
+
+	err = phy_read(phydev, RTL8211F_INSR);
+
+	return (err < 0) ? err : 0;
+}
+
+static int rtl9000a_config_intr(struct phy_device *phydev)
+{
+	u16 val;
+	int err;
+
+	if (phydev->interrupts == PHY_INTERRUPT_ENABLED) {
+		err = rtl9000a_ack_interrupt(phydev);
+		if (err)
+			return err;
+
+		val = (u16)~RTL9000A_GINMR_LINK_STATUS;
+		err = phy_write_paged(phydev, 0xa42, RTL9000A_GINMR, val);
+	} else {
+		val = ~0;
+		err = phy_write_paged(phydev, 0xa42, RTL9000A_GINMR, val);
+		if (err)
+			return err;
+
+		err = rtl9000a_ack_interrupt(phydev);
+	}
+
+	return phy_write_paged(phydev, 0xa42, RTL9000A_GINMR, val);
+}
+
+static irqreturn_t rtl9000a_handle_interrupt(struct phy_device *phydev)
+{
+	int irq_status;
+
+	irq_status = phy_read(phydev, RTL8211F_INSR);
+	if (irq_status < 0) {
+		phy_error(phydev);
+		return IRQ_NONE;
+	}
+
+	if (!(irq_status & RTL8211F_INER_LINK_STATUS))
+		return IRQ_NONE;
+
+	phy_trigger_machine(phydev);
+
+	return IRQ_HANDLED;
+}
+
 static struct phy_driver realtek_drvs[] = {
 	{
 		PHY_ID_MATCH_EXACT(0x00008201),
@@ -823,6 +942,19 @@ static struct phy_driver realtek_drvs[] = {
 		.handle_interrupt = genphy_handle_interrupt_no_ack,
 		.suspend	= genphy_suspend,
 		.resume		= genphy_resume,
+	}, {
+		PHY_ID_MATCH_EXACT(0x001ccb00),
+		.name		= "RTL9000AA_RTL9000AN Ethernet",
+		.features       = PHY_BASIC_T1_FEATURES,
+		.config_init	= rtl9000a_config_init,
+		.config_aneg	= rtl9000a_config_aneg,
+		.read_status	= rtl9000a_read_status,
+		.config_intr	= rtl9000a_config_intr,
+		.handle_interrupt = rtl9000a_handle_interrupt,
+		.suspend	= genphy_suspend,
+		.resume		= genphy_resume,
+		.read_page	= rtl821x_read_page,
+		.write_page	= rtl821x_write_page,
 	},
 };
 
diff --git a/drivers/net/usb/usbnet.c b/drivers/net/usb/usbnet.c
index 1447da1d5729..b4c8080e6f87 100644
--- a/drivers/net/usb/usbnet.c
+++ b/drivers/net/usb/usbnet.c
@@ -1539,11 +1539,11 @@ static void usbnet_bh (struct timer_list *t)
 	}
 }
 
-static void usbnet_bh_tasklet(unsigned long data)
+static void usbnet_bh_tasklet(struct tasklet_struct *t)
 {
-	struct timer_list *t = (struct timer_list *)data;
+	struct usbnet *dev = from_tasklet(dev, t, bh);
 
-	usbnet_bh(t);
+	usbnet_bh(&dev->delay);
 }
 
 
@@ -1673,8 +1673,7 @@ usbnet_probe (struct usb_interface *udev, const struct usb_device_id *prod)
 	skb_queue_head_init (&dev->txq);
 	skb_queue_head_init (&dev->done);
 	skb_queue_head_init(&dev->rxq_pause);
-	dev->bh.func = usbnet_bh_tasklet;
-	dev->bh.data = (unsigned long)&dev->delay;
+	tasklet_setup(&dev->bh, usbnet_bh_tasklet);
 	INIT_WORK (&dev->kevent, usbnet_deferred_kevent);
 	init_usb_anchor(&dev->deferred);
 	timer_setup(&dev->delay, usbnet_bh, 0);
@@ -1964,12 +1963,12 @@ static int __usbnet_read_cmd(struct usbnet *dev, u8 cmd, u8 reqtype,
 			      cmd, reqtype, value, index, buf, size,
 			      USB_CTRL_GET_TIMEOUT);
 	if (err > 0 && err <= size) {
-        if (data)
-            memcpy(data, buf, err);
-        else
-            netdev_dbg(dev->net,
-                "Huh? Data requested but thrown away.\n");
-    }
+		if (data)
+			memcpy(data, buf, err);
+		else
+			netdev_dbg(dev->net,
+				   "Huh? Data requested but thrown away.\n");
+	}
 	kfree(buf);
 out:
 	return err;
diff --git a/drivers/nfc/fdp/i2c.c b/drivers/nfc/fdp/i2c.c
index ad0abb1f0bae..adaa1a7147f9 100644
--- a/drivers/nfc/fdp/i2c.c
+++ b/drivers/nfc/fdp/i2c.c
@@ -155,7 +155,7 @@ static int fdp_nci_i2c_read(struct fdp_i2c_phy *phy, struct sk_buff **skb)
 
 		/*
 		 * LRC check failed. This may due to transmission error or
-		 * desynchronization between driver and FDP. Drop the paquet
+		 * desynchronization between driver and FDP. Drop the packet
 		 * and force resynchronization
 		 */
 		if (lrc) {
diff --git a/drivers/nfc/trf7970a.c b/drivers/nfc/trf7970a.c
index c70f62fe321e..33978022ae47 100644
--- a/drivers/nfc/trf7970a.c
+++ b/drivers/nfc/trf7970a.c
@@ -169,7 +169,7 @@
 
 /* Bits determining whether its a direct command or register R/W,
  * whether to use a continuous SPI transaction or not, and the actual
- * direct cmd opcode or regster address.
+ * direct cmd opcode or register address.
  */
 #define TRF7970A_CMD_BIT_CTRL			BIT(7)
 #define TRF7970A_CMD_BIT_RW			BIT(6)
diff --git a/drivers/soc/fsl/qe/qe_common.c b/drivers/soc/fsl/qe/qe_common.c
index 497a7e0fd027..654e9246ce6b 100644
--- a/drivers/soc/fsl/qe/qe_common.c
+++ b/drivers/soc/fsl/qe/qe_common.c
@@ -27,7 +27,7 @@
 
 static struct gen_pool *muram_pool;
 static spinlock_t cpm_muram_lock;
-static u8 __iomem *muram_vbase;
+static void __iomem *muram_vbase;
 static phys_addr_t muram_pbase;
 
 struct muram_block {
@@ -223,9 +223,9 @@ void __iomem *cpm_muram_addr(unsigned long offset)
 }
 EXPORT_SYMBOL(cpm_muram_addr);
 
-unsigned long cpm_muram_offset(void __iomem *addr)
+unsigned long cpm_muram_offset(const void __iomem *addr)
 {
-	return addr - (void __iomem *)muram_vbase;
+	return addr - muram_vbase;
 }
 EXPORT_SYMBOL(cpm_muram_offset);
 
@@ -235,6 +235,18 @@ EXPORT_SYMBOL(cpm_muram_offset);
  */
 dma_addr_t cpm_muram_dma(void __iomem *addr)
 {
-	return muram_pbase + ((u8 __iomem *)addr - muram_vbase);
+	return muram_pbase + (addr - muram_vbase);
 }
 EXPORT_SYMBOL(cpm_muram_dma);
+
+/*
+ * As cpm_muram_free, but takes the virtual address rather than the
+ * muram offset.
+ */
+void cpm_muram_free_addr(const void __iomem *addr)
+{
+	if (!addr)
+		return;
+	cpm_muram_free(cpm_muram_offset(addr));
+}
+EXPORT_SYMBOL(cpm_muram_free_addr);
diff --git a/include/linux/can/dev.h b/include/linux/can/dev.h
index 7faf6a37d5b2..ac4d83a1ab81 100644
--- a/include/linux/can/dev.h
+++ b/include/linux/can/dev.h
@@ -123,6 +123,7 @@ void unregister_candev(struct net_device *dev);
 int can_restart_now(struct net_device *dev);
 void can_bus_off(struct net_device *dev);
 
+const char *can_get_state_str(const enum can_state state);
 void can_change_state(struct net_device *dev, struct can_frame *cf,
 		      enum can_state tx_state, enum can_state rx_state);
 
diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h
index dda61d150a13..9d1f29f0c512 100644
--- a/include/linux/ipv6.h
+++ b/include/linux/ipv6.h
@@ -31,6 +31,7 @@ struct ipv6_devconf {
 	__s32		max_desync_factor;
 	__s32		max_addresses;
 	__s32		accept_ra_defrtr;
+	__u32		ra_defrtr_metric;
 	__s32		accept_ra_min_hop_limit;
 	__s32		accept_ra_pinfo;
 	__s32		ignore_routes_with_linkdown;
diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h
index 823411e288c0..71ae6aac3410 100644
--- a/include/linux/mlx5/mlx5_ifc.h
+++ b/include/linux/mlx5/mlx5_ifc.h
@@ -842,11 +842,16 @@ struct mlx5_ifc_qos_cap_bits {
 	u8         reserved_at_4[0x1];
 	u8         packet_pacing_burst_bound[0x1];
 	u8         packet_pacing_typical_size[0x1];
-	u8         reserved_at_7[0x4];
+	u8         reserved_at_7[0x1];
+	u8         nic_sq_scheduling[0x1];
+	u8         nic_bw_share[0x1];
+	u8         nic_rate_limit[0x1];
 	u8         packet_pacing_uid[0x1];
 	u8         reserved_at_c[0x14];
 
-	u8         reserved_at_20[0x20];
+	u8         reserved_at_20[0xb];
+	u8         log_max_qos_nic_queue_group[0x5];
+	u8         reserved_at_30[0x10];
 
 	u8         packet_pacing_max_rate[0x20];
 
@@ -3347,7 +3352,7 @@ struct mlx5_ifc_sqc_bits {
 	u8         reserved_at_e0[0x10];
 	u8         packet_pacing_rate_limit_index[0x10];
 	u8         tis_lst_sz[0x10];
-	u8         reserved_at_110[0x10];
+	u8         qos_queue_group_id[0x10];
 
 	u8         reserved_at_120[0x40];
 
@@ -3362,6 +3367,7 @@ enum {
 	SCHEDULING_CONTEXT_ELEMENT_TYPE_VPORT = 0x1,
 	SCHEDULING_CONTEXT_ELEMENT_TYPE_VPORT_TC = 0x2,
 	SCHEDULING_CONTEXT_ELEMENT_TYPE_PARA_VPORT_TC = 0x3,
+	SCHEDULING_CONTEXT_ELEMENT_TYPE_QUEUE_GROUP = 0x4,
 };
 
 enum {
@@ -4805,6 +4811,7 @@ struct mlx5_ifc_query_scheduling_element_out_bits {
 
 enum {
 	SCHEDULING_HIERARCHY_E_SWITCH = 0x2,
+	SCHEDULING_HIERARCHY_NIC = 0x3,
 };
 
 struct mlx5_ifc_query_scheduling_element_in_bits {
diff --git a/include/linux/netdev_features.h b/include/linux/netdev_features.h
index 934de56644e7..c06d6aaba9df 100644
--- a/include/linux/netdev_features.h
+++ b/include/linux/netdev_features.h
@@ -84,6 +84,7 @@ enum {
 	NETIF_F_GRO_FRAGLIST_BIT,	/* Fraglist GRO */
 
 	NETIF_F_HW_MACSEC_BIT,		/* Offload MACsec operations */
+	NETIF_F_GRO_UDP_FWD_BIT,	/* Allow UDP GRO for forwarding */
 
 	/*
 	 * Add your fresh new feature above and remember to update
@@ -157,6 +158,7 @@ enum {
 #define NETIF_F_GRO_FRAGLIST	__NETIF_F(GRO_FRAGLIST)
 #define NETIF_F_GSO_FRAGLIST	__NETIF_F(GSO_FRAGLIST)
 #define NETIF_F_HW_MACSEC	__NETIF_F(HW_MACSEC)
+#define NETIF_F_GRO_UDP_FWD	__NETIF_F(GRO_UDP_FWD)
 
 /* Finds the next feature with the highest number of the range of start till 0.
  */
@@ -234,7 +236,7 @@ static inline int find_next_netdev_feature(u64 feature, unsigned long start)
 #define NETIF_F_SOFT_FEATURES	(NETIF_F_GSO | NETIF_F_GRO)
 
 /* Changeable features with no special hardware requirements that defaults to off. */
-#define NETIF_F_SOFT_FEATURES_OFF	NETIF_F_GRO_FRAGLIST
+#define NETIF_F_SOFT_FEATURES_OFF	(NETIF_F_GRO_FRAGLIST | NETIF_F_GRO_UDP_FWD)
 
 #define NETIF_F_VLAN_FEATURES	(NETIF_F_HW_VLAN_CTAG_FILTER | \
 				 NETIF_F_HW_VLAN_CTAG_RX | \
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index ef517254367d..9e8572533d8e 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -858,6 +858,7 @@ enum tc_setup_type {
 	TC_SETUP_QDISC_ETS,
 	TC_SETUP_QDISC_TBF,
 	TC_SETUP_QDISC_FIFO,
+	TC_SETUP_QDISC_HTB,
 };
 
 /* These structures hold the attributes of bpf state that are being passed
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 186dad231e30..9313b5aaf45b 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -3859,7 +3859,7 @@ static inline bool skb_defer_rx_timestamp(struct sk_buff *skb)
 void skb_complete_tx_timestamp(struct sk_buff *skb,
 			       struct skb_shared_hwtstamps *hwtstamps);
 
-void __skb_tstamp_tx(struct sk_buff *orig_skb,
+void __skb_tstamp_tx(struct sk_buff *orig_skb, const struct sk_buff *ack_skb,
 		     struct skb_shared_hwtstamps *hwtstamps,
 		     struct sock *sk, int tstype);
 
diff --git a/include/linux/tcp.h b/include/linux/tcp.h
index 2f87377e9af7..48d8a363319e 100644
--- a/include/linux/tcp.h
+++ b/include/linux/tcp.h
@@ -496,7 +496,8 @@ static inline u32 tcp_saved_syn_len(const struct saved_syn *saved_syn)
 }
 
 struct sk_buff *tcp_get_timestamping_opt_stats(const struct sock *sk,
-					       const struct sk_buff *orig_skb);
+					       const struct sk_buff *orig_skb,
+					       const struct sk_buff *ack_skb);
 
 static inline u16 tcp_mss_clamp(const struct tcp_sock *tp, u16 mss)
 {
diff --git a/include/net/devlink.h b/include/net/devlink.h
index f466819cc477..d12ed2854c34 100644
--- a/include/net/devlink.h
+++ b/include/net/devlink.h
@@ -380,6 +380,8 @@ struct devlink_resource {
 
 #define DEVLINK_RESOURCE_ID_PARENT_TOP 0
 
+#define DEVLINK_RESOURCE_GENERIC_NAME_PORTS "physical_ports"
+
 #define __DEVLINK_PARAM_MAX_STRING_VALUE 32
 enum devlink_param_type {
 	DEVLINK_PARAM_TYPE_U8,
diff --git a/include/net/inet_connection_sock.h b/include/net/inet_connection_sock.h
index 111d7771b208..c11f80f328f1 100644
--- a/include/net/inet_connection_sock.h
+++ b/include/net/inet_connection_sock.h
@@ -141,7 +141,6 @@ struct inet_connection_sock {
 #define ICSK_TIME_RETRANS	1	/* Retransmit timer */
 #define ICSK_TIME_DACK		2	/* Delayed ack timer */
 #define ICSK_TIME_PROBE0	3	/* Zero window probe timer */
-#define ICSK_TIME_EARLY_RETRANS 4	/* Early retransmit timer */
 #define ICSK_TIME_LOSS_PROBE	5	/* Tail loss probe timer */
 #define ICSK_TIME_REO_TIMEOUT	6	/* Reordering timer */
 
@@ -227,8 +226,7 @@ static inline void inet_csk_reset_xmit_timer(struct sock *sk, const int what,
 	}
 
 	if (what == ICSK_TIME_RETRANS || what == ICSK_TIME_PROBE0 ||
-	    what == ICSK_TIME_EARLY_RETRANS || what == ICSK_TIME_LOSS_PROBE ||
-	    what == ICSK_TIME_REO_TIMEOUT) {
+	    what == ICSK_TIME_LOSS_PROBE || what == ICSK_TIME_REO_TIMEOUT) {
 		icsk->icsk_pending = what;
 		icsk->icsk_timeout = jiffies + when;
 		sk_reset_timer(sk, &icsk->icsk_retransmit_timer, icsk->icsk_timeout);
diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h
index 2a5277758379..f51a118bfce8 100644
--- a/include/net/ip6_route.h
+++ b/include/net/ip6_route.h
@@ -174,7 +174,8 @@ struct fib6_info *rt6_get_dflt_router(struct net *net,
 				     struct net_device *dev);
 struct fib6_info *rt6_add_dflt_router(struct net *net,
 				     const struct in6_addr *gwaddr,
-				     struct net_device *dev, unsigned int pref);
+				     struct net_device *dev, unsigned int pref,
+				     u32 defrtr_usr_metric);
 
 void rt6_purge_dflt_routers(struct net *net);
 
diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h
index 0f2a9c44171c..255e4f4b521f 100644
--- a/include/net/pkt_cls.h
+++ b/include/net/pkt_cls.h
@@ -783,6 +783,42 @@ struct tc_mq_qopt_offload {
 	};
 };
 
+enum tc_htb_command {
+	/* Root */
+	TC_HTB_CREATE, /* Initialize HTB offload. */
+	TC_HTB_DESTROY, /* Destroy HTB offload. */
+
+	/* Classes */
+	/* Allocate qid and create leaf. */
+	TC_HTB_LEAF_ALLOC_QUEUE,
+	/* Convert leaf to inner, preserve and return qid, create new leaf. */
+	TC_HTB_LEAF_TO_INNER,
+	/* Delete leaf, while siblings remain. */
+	TC_HTB_LEAF_DEL,
+	/* Delete leaf, convert parent to leaf, preserving qid. */
+	TC_HTB_LEAF_DEL_LAST,
+	/* TC_HTB_LEAF_DEL_LAST, but delete driver data on hardware errors. */
+	TC_HTB_LEAF_DEL_LAST_FORCE,
+	/* Modify parameters of a node. */
+	TC_HTB_NODE_MODIFY,
+
+	/* Class qdisc */
+	TC_HTB_LEAF_QUERY_QUEUE, /* Query qid by classid. */
+};
+
+struct tc_htb_qopt_offload {
+	struct netlink_ext_ack *extack;
+	enum tc_htb_command command;
+	u16 classid;
+	u32 parent_classid;
+	u16 qid;
+	u16 moved_qid;
+	u64 rate;
+	u64 ceil;
+};
+
+#define TC_HTB_CLASSID_ROOT U32_MAX
+
 enum tc_red_command {
 	TC_RED_REPLACE,
 	TC_RED_DESTROY,
diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h
index e7bee99aebce..070f01bf17eb 100644
--- a/include/net/sch_generic.h
+++ b/include/net/sch_generic.h
@@ -210,7 +210,8 @@ struct Qdisc_class_ops {
 	int			(*change)(struct Qdisc *, u32, u32,
 					struct nlattr **, unsigned long *,
 					struct netlink_ext_ack *);
-	int			(*delete)(struct Qdisc *, unsigned long);
+	int			(*delete)(struct Qdisc *, unsigned long,
+					  struct netlink_ext_ack *);
 	void			(*walk)(struct Qdisc *, struct qdisc_walker * arg);
 
 	/* Filter manipulation */
@@ -552,14 +553,20 @@ static inline struct net_device *qdisc_dev(const struct Qdisc *qdisc)
 	return qdisc->dev_queue->dev;
 }
 
-static inline void sch_tree_lock(const struct Qdisc *q)
+static inline void sch_tree_lock(struct Qdisc *q)
 {
-	spin_lock_bh(qdisc_root_sleeping_lock(q));
+	if (q->flags & TCQ_F_MQROOT)
+		spin_lock_bh(qdisc_lock(q));
+	else
+		spin_lock_bh(qdisc_root_sleeping_lock(q));
 }
 
-static inline void sch_tree_unlock(const struct Qdisc *q)
+static inline void sch_tree_unlock(struct Qdisc *q)
 {
-	spin_unlock_bh(qdisc_root_sleeping_lock(q));
+	if (q->flags & TCQ_F_MQROOT)
+		spin_unlock_bh(qdisc_lock(q));
+	else
+		spin_unlock_bh(qdisc_root_sleeping_lock(q));
 }
 
 extern struct Qdisc noop_qdisc;
diff --git a/include/soc/fsl/qe/qe.h b/include/soc/fsl/qe/qe.h
index 3feddfec9f87..4925a1b59dc9 100644
--- a/include/soc/fsl/qe/qe.h
+++ b/include/soc/fsl/qe/qe.h
@@ -27,12 +27,6 @@
 #define QE_NUM_OF_BRGS	16
 #define QE_NUM_OF_PORTS	1024
 
-/* Memory partitions
-*/
-#define MEM_PART_SYSTEM		0
-#define MEM_PART_SECONDARY	1
-#define MEM_PART_MURAM		2
-
 /* Clocks and BRGs */
 enum qe_clock {
 	QE_CLK_NONE = 0,
@@ -102,8 +96,9 @@ s32 cpm_muram_alloc(unsigned long size, unsigned long align);
 void cpm_muram_free(s32 offset);
 s32 cpm_muram_alloc_fixed(unsigned long offset, unsigned long size);
 void __iomem *cpm_muram_addr(unsigned long offset);
-unsigned long cpm_muram_offset(void __iomem *addr);
+unsigned long cpm_muram_offset(const void __iomem *addr);
 dma_addr_t cpm_muram_dma(void __iomem *addr);
+void cpm_muram_free_addr(const void __iomem *addr);
 #else
 static inline s32 cpm_muram_alloc(unsigned long size,
 				  unsigned long align)
@@ -126,7 +121,7 @@ static inline void __iomem *cpm_muram_addr(unsigned long offset)
 	return NULL;
 }
 
-static inline unsigned long cpm_muram_offset(void __iomem *addr)
+static inline unsigned long cpm_muram_offset(const void __iomem *addr)
 {
 	return -ENOSYS;
 }
@@ -135,6 +130,9 @@ static inline dma_addr_t cpm_muram_dma(void __iomem *addr)
 {
 	return 0;
 }
+static inline void cpm_muram_free_addr(const void __iomem *addr)
+{
+}
 #endif /* defined(CONFIG_CPM) || defined(CONFIG_QUICC_ENGINE) */
 
 /* QE PIO */
@@ -239,6 +237,7 @@ static inline int qe_alive_during_sleep(void)
 #define qe_muram_addr cpm_muram_addr
 #define qe_muram_offset cpm_muram_offset
 #define qe_muram_dma cpm_muram_dma
+#define qe_muram_free_addr cpm_muram_free_addr
 
 #ifdef CONFIG_PPC32
 #define qe_iowrite8(val, addr)     out_8(addr, val)
diff --git a/include/soc/fsl/qe/ucc_fast.h b/include/soc/fsl/qe/ucc_fast.h
index dc4e79468094..9696a5b9b5d1 100644
--- a/include/soc/fsl/qe/ucc_fast.h
+++ b/include/soc/fsl/qe/ucc_fast.h
@@ -146,7 +146,6 @@ struct ucc_fast_info {
 	resource_size_t regs;
 	int irq;
 	u32 uccm_mask;
-	int bd_mem_part;
 	int brkpt_support;
 	int grant_support;
 	int tsa;
diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h
index 2bd0d8bbcdb2..eb8018c3a737 100644
--- a/include/uapi/linux/if_link.h
+++ b/include/uapi/linux/if_link.h
@@ -525,6 +525,8 @@ enum {
 	IFLA_BRPORT_BACKUP_PORT,
 	IFLA_BRPORT_MRP_RING_OPEN,
 	IFLA_BRPORT_MRP_IN_OPEN,
+	IFLA_BRPORT_MCAST_EHT_HOSTS_LIMIT,
+	IFLA_BRPORT_MCAST_EHT_HOSTS_CNT,
 	__IFLA_BRPORT_MAX
 };
 #define IFLA_BRPORT_MAX (__IFLA_BRPORT_MAX - 1)
diff --git a/include/uapi/linux/ipv6.h b/include/uapi/linux/ipv6.h
index 13e8751bf24a..70603775fe91 100644
--- a/include/uapi/linux/ipv6.h
+++ b/include/uapi/linux/ipv6.h
@@ -189,6 +189,7 @@ enum {
 	DEVCONF_ACCEPT_RA_RT_INFO_MIN_PLEN,
 	DEVCONF_NDISC_TCLASS,
 	DEVCONF_RPL_SEG_ENABLED,
+	DEVCONF_RA_DEFRTR_METRIC,
 	DEVCONF_MAX
 };
 
diff --git a/include/uapi/linux/pkt_sched.h b/include/uapi/linux/pkt_sched.h
index 9e7c2c607845..79a699f106b1 100644
--- a/include/uapi/linux/pkt_sched.h
+++ b/include/uapi/linux/pkt_sched.h
@@ -434,6 +434,7 @@ enum {
 	TCA_HTB_RATE64,
 	TCA_HTB_CEIL64,
 	TCA_HTB_PAD,
+	TCA_HTB_OFFLOAD,
 	__TCA_HTB_MAX,
 };
 
diff --git a/include/uapi/linux/sysctl.h b/include/uapi/linux/sysctl.h
index 458179df9b27..1e05d3caa712 100644
--- a/include/uapi/linux/sysctl.h
+++ b/include/uapi/linux/sysctl.h
@@ -571,6 +571,7 @@ enum {
 	NET_IPV6_ACCEPT_SOURCE_ROUTE=25,
 	NET_IPV6_ACCEPT_RA_FROM_LOCAL=26,
 	NET_IPV6_ACCEPT_RA_RT_INFO_MIN_PLEN=27,
+	NET_IPV6_RA_DEFRTR_METRIC=28,
 	__NET_IPV6_MAX
 };
 
diff --git a/include/uapi/linux/tcp.h b/include/uapi/linux/tcp.h
index 768e93bd5b51..42fc5a640df4 100644
--- a/include/uapi/linux/tcp.h
+++ b/include/uapi/linux/tcp.h
@@ -314,6 +314,7 @@ enum {
 	TCP_NLA_TIMEOUT_REHASH, /* Timeout-triggered rehash attempts */
 	TCP_NLA_BYTES_NOTSENT,	/* Bytes in write queue not yet sent */
 	TCP_NLA_EDT,		/* Earliest departure time (CLOCK_MONOTONIC) */
+	TCP_NLA_TTL,		/* TTL or hop limit of a packet received */
 };
 
 /* for TCP_MD5SIG socket option */
@@ -353,5 +354,9 @@ struct tcp_zerocopy_receive {
 	__u64 copybuf_address;	/* in: copybuf address (small reads) */
 	__s32 copybuf_len; /* in/out: copybuf bytes avail/used or error */
 	__u32 flags; /* in: flags */
+	__u64 msg_control; /* ancillary data */
+	__u64 msg_controllen;
+	__u32 msg_flags;
+	/* __u32 hole;  Next we must add >1 u32 otherwise length checks fail. */
 };
 #endif /* _UAPI_LINUX_TCP_H */
diff --git a/net/9p/Kconfig b/net/9p/Kconfig
index 3d11fec3a8dc..64468c49791f 100644
--- a/net/9p/Kconfig
+++ b/net/9p/Kconfig
@@ -4,7 +4,6 @@
 #
 
 menuconfig NET_9P
-	depends on NET
 	tristate "Plan 9 Resource Sharing Support (9P2000)"
 	help
 	  If you say Y here, you will get experimental support for
diff --git a/net/Makefile b/net/Makefile
index d96b0aa8f39f..9ca9572188fe 100644
--- a/net/Makefile
+++ b/net/Makefile
@@ -6,20 +6,19 @@
 # Rewritten to use lists instead of if-statements.
 #
 
-obj-$(CONFIG_NET)		:= devres.o socket.o core/
+obj-y				:= devres.o socket.o core/
 
-tmp-$(CONFIG_COMPAT) 		:= compat.o
-obj-$(CONFIG_NET)		+= $(tmp-y)
+obj-$(CONFIG_COMPAT)		+= compat.o
 
 # LLC has to be linked before the files in net/802/
 obj-$(CONFIG_LLC)		+= llc/
-obj-$(CONFIG_NET)		+= ethernet/ 802/ sched/ netlink/ bpf/ ethtool/
+obj-y				+= ethernet/ 802/ sched/ netlink/ bpf/ ethtool/
 obj-$(CONFIG_NETFILTER)		+= netfilter/
 obj-$(CONFIG_INET)		+= ipv4/
 obj-$(CONFIG_TLS)		+= tls/
 obj-$(CONFIG_XFRM)		+= xfrm/
 obj-$(CONFIG_UNIX_SCM)		+= unix/
-obj-$(CONFIG_NET)		+= ipv6/
+obj-y				+= ipv6/
 obj-$(CONFIG_BPFILTER)		+= bpfilter/
 obj-$(CONFIG_PACKET)		+= packet/
 obj-$(CONFIG_NET_KEY)		+= key/
@@ -56,16 +55,12 @@ obj-$(CONFIG_SMC)		+= smc/
 obj-$(CONFIG_RFKILL)		+= rfkill/
 obj-$(CONFIG_NET_9P)		+= 9p/
 obj-$(CONFIG_CAIF)		+= caif/
-ifneq ($(CONFIG_DCB),)
-obj-y				+= dcb/
-endif
+obj-$(CONFIG_DCB)		+= dcb/
 obj-$(CONFIG_6LOWPAN)		+= 6lowpan/
 obj-$(CONFIG_IEEE802154)	+= ieee802154/
 obj-$(CONFIG_MAC802154)		+= mac802154/
 
-ifeq ($(CONFIG_NET),y)
 obj-$(CONFIG_SYSCTL)		+= sysctl_net.o
-endif
 obj-$(CONFIG_DNS_RESOLVER)	+= dns_resolver/
 obj-$(CONFIG_CEPH_LIB)		+= ceph/
 obj-$(CONFIG_BATMAN_ADV)	+= batman-adv/
@@ -77,12 +72,8 @@ obj-$(CONFIG_VSOCKETS)	+= vmw_vsock/
 obj-$(CONFIG_MPLS)		+= mpls/
 obj-$(CONFIG_NET_NSH)		+= nsh/
 obj-$(CONFIG_HSR)		+= hsr/
-ifneq ($(CONFIG_NET_SWITCHDEV),)
-obj-y				+= switchdev/
-endif
-ifneq ($(CONFIG_NET_L3_MASTER_DEV),)
-obj-y				+= l3mdev/
-endif
+obj-$(CONFIG_NET_SWITCHDEV)	+= switchdev/
+obj-$(CONFIG_NET_L3_MASTER_DEV)	+= l3mdev/
 obj-$(CONFIG_QRTR)		+= qrtr/
 obj-$(CONFIG_NET_NCSI)		+= ncsi/
 obj-$(CONFIG_XDP_SOCKETS)	+= xdp/
diff --git a/net/batman-adv/Kconfig b/net/batman-adv/Kconfig
index 993afd5ff7bb..43ae3dcbbbeb 100644
--- a/net/batman-adv/Kconfig
+++ b/net/batman-adv/Kconfig
@@ -9,7 +9,6 @@
 
 config BATMAN_ADV
 	tristate "B.A.T.M.A.N. Advanced Meshing Protocol"
-	depends on NET
 	select LIBCRC32C
 	help
 	  B.A.T.M.A.N. (better approach to mobile ad-hoc networking) is
diff --git a/net/bluetooth/Kconfig b/net/bluetooth/Kconfig
index 64e669acd42f..400c5130dc0a 100644
--- a/net/bluetooth/Kconfig
+++ b/net/bluetooth/Kconfig
@@ -5,7 +5,7 @@
 
 menuconfig BT
 	tristate "Bluetooth subsystem support"
-	depends on NET && !S390
+	depends on !S390
 	depends on RFKILL || !RFKILL
 	select CRC16
 	select CRYPTO
diff --git a/net/bpfilter/Kconfig b/net/bpfilter/Kconfig
index 8ad0233ce497..3d4a21462458 100644
--- a/net/bpfilter/Kconfig
+++ b/net/bpfilter/Kconfig
@@ -1,7 +1,7 @@
 # SPDX-License-Identifier: GPL-2.0-only
 menuconfig BPFILTER
 	bool "BPF based packet filtering framework (BPFILTER)"
-	depends on NET && BPF && INET
+	depends on BPF && INET
 	select USERMODE_DRIVER
 	help
 	  This builds experimental bpfilter framework that is aiming to
diff --git a/net/bridge/Makefile b/net/bridge/Makefile
index 4702702a74d3..7fb9a021873b 100644
--- a/net/bridge/Makefile
+++ b/net/bridge/Makefile
@@ -18,7 +18,7 @@ br_netfilter-y := br_netfilter_hooks.o
 br_netfilter-$(subst m,y,$(CONFIG_IPV6)) += br_netfilter_ipv6.o
 obj-$(CONFIG_BRIDGE_NETFILTER) += br_netfilter.o
 
-bridge-$(CONFIG_BRIDGE_IGMP_SNOOPING) += br_multicast.o br_mdb.o
+bridge-$(CONFIG_BRIDGE_IGMP_SNOOPING) += br_multicast.o br_mdb.o br_multicast_eht.o
 
 bridge-$(CONFIG_BRIDGE_VLAN_FILTERING) += br_vlan.o br_vlan_tunnel.o br_vlan_options.o
 
diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c
index 257ac4e25f6d..6f672eb7ff33 100644
--- a/net/bridge/br_multicast.c
+++ b/net/bridge/br_multicast.c
@@ -33,6 +33,7 @@
 #endif
 
 #include "br_private.h"
+#include "br_private_mcast_eht.h"
 
 static const struct rhashtable_params br_mdb_rht_params = {
 	.head_offset = offsetof(struct net_bridge_mdb_entry, rhnode),
@@ -441,7 +442,8 @@ static void br_multicast_fwd_src_add(struct net_bridge_group_src *src)
 	br_multicast_sg_add_exclude_ports(star_mp, sg);
 }
 
-static void br_multicast_fwd_src_remove(struct net_bridge_group_src *src)
+static void br_multicast_fwd_src_remove(struct net_bridge_group_src *src,
+					bool fastleave)
 {
 	struct net_bridge_port_group *p, *pg = src->pg;
 	struct net_bridge_port_group __rcu **pp;
@@ -466,6 +468,8 @@ static void br_multicast_fwd_src_remove(struct net_bridge_group_src *src)
 		    (p->flags & MDB_PG_FLAGS_PERMANENT))
 			break;
 
+		if (fastleave)
+			p->flags |= MDB_PG_FLAGS_FAST_LEAVE;
 		br_multicast_del_pg(mp, p, pp);
 		break;
 	}
@@ -559,11 +563,12 @@ static void br_multicast_destroy_group_src(struct net_bridge_mcast_gc *gc)
 	kfree_rcu(src, rcu);
 }
 
-static void br_multicast_del_group_src(struct net_bridge_group_src *src)
+void br_multicast_del_group_src(struct net_bridge_group_src *src,
+				bool fastleave)
 {
 	struct net_bridge *br = src->pg->key.port->br;
 
-	br_multicast_fwd_src_remove(src);
+	br_multicast_fwd_src_remove(src, fastleave);
 	hlist_del_init_rcu(&src->node);
 	src->pg->src_ents--;
 	hlist_add_head(&src->mcast_gc.gc_node, &br->mcast_gc_list);
@@ -593,8 +598,9 @@ void br_multicast_del_pg(struct net_bridge_mdb_entry *mp,
 
 	rcu_assign_pointer(*pp, pg->next);
 	hlist_del_init(&pg->mglist);
+	br_multicast_eht_clean_sets(pg);
 	hlist_for_each_entry_safe(ent, tmp, &pg->src_list, node)
-		br_multicast_del_group_src(ent);
+		br_multicast_del_group_src(ent, false);
 	br_mdb_notify(br->dev, mp, pg, RTM_DELMDB);
 	if (!br_multicast_is_star_g(&mp->addr)) {
 		rhashtable_remove_fast(&br->sg_port_tbl, &pg->rhnode,
@@ -651,7 +657,7 @@ static void br_multicast_port_group_expired(struct timer_list *t)
 	pg->filter_mode = MCAST_INCLUDE;
 	hlist_for_each_entry_safe(src_ent, tmp, &pg->src_list, node) {
 		if (!timer_pending(&src_ent->timer)) {
-			br_multicast_del_group_src(src_ent);
+			br_multicast_del_group_src(src_ent, false);
 			changed = true;
 		}
 	}
@@ -1078,7 +1084,7 @@ static void br_multicast_group_src_expired(struct timer_list *t)
 
 	pg = src->pg;
 	if (pg->filter_mode == MCAST_INCLUDE) {
-		br_multicast_del_group_src(src);
+		br_multicast_del_group_src(src, false);
 		if (!hlist_empty(&pg->src_list))
 			goto out;
 		br_multicast_find_del_pg(br, pg);
@@ -1090,7 +1096,7 @@ out:
 	spin_unlock(&br->multicast_lock);
 }
 
-static struct net_bridge_group_src *
+struct net_bridge_group_src *
 br_multicast_find_group_src(struct net_bridge_port_group *pg, struct br_ip *ip)
 {
 	struct net_bridge_group_src *ent;
@@ -1172,6 +1178,8 @@ struct net_bridge_port_group *br_multicast_new_port_group(
 	p->flags = flags;
 	p->filter_mode = filter_mode;
 	p->rt_protocol = rt_protocol;
+	p->eht_host_tree = RB_ROOT;
+	p->eht_set_tree = RB_ROOT;
 	p->mcast_gc.destroy = br_multicast_destroy_port_group;
 	INIT_HLIST_HEAD(&p->src_list);
 
@@ -1292,7 +1300,7 @@ static int br_multicast_add_group(struct net_bridge *br,
 	pg = __br_multicast_add_group(br, port, group, src, filter_mode,
 				      igmpv2_mldv1, false);
 	/* NULL is considered valid for host joined groups */
-	err = IS_ERR(pg) ? PTR_ERR(pg) : 0;
+	err = PTR_ERR_OR_ZERO(pg);
 	spin_unlock(&br->multicast_lock);
 
 	return err;
@@ -1600,6 +1608,7 @@ static void br_mc_disabled_update(struct net_device *dev, bool value)
 int br_multicast_add_port(struct net_bridge_port *port)
 {
 	port->multicast_router = MDB_RTR_TYPE_TEMP_QUERY;
+	port->multicast_eht_hosts_limit = BR_MCAST_DEFAULT_EHT_HOSTS_LIMIT;
 
 	timer_setup(&port->multicast_router_timer,
 		    br_multicast_router_expired, 0);
@@ -1700,7 +1709,7 @@ static int __grp_src_delete_marked(struct net_bridge_port_group *pg)
 
 	hlist_for_each_entry_safe(ent, tmp, &pg->src_list, node)
 		if (ent->flags & BR_SGRP_F_DELETE) {
-			br_multicast_del_group_src(ent);
+			br_multicast_del_group_src(ent, false);
 			deleted++;
 		}
 
@@ -1799,8 +1808,9 @@ static void __grp_send_query_and_rexmit(struct net_bridge_port_group *pg)
  * INCLUDE (A)    ALLOW (B)     INCLUDE (A+B)            (B)=GMI
  * EXCLUDE (X,Y)  ALLOW (A)     EXCLUDE (X+A,Y-A)        (A)=GMI
  */
-static bool br_multicast_isinc_allow(struct net_bridge_port_group *pg,
-				     void *srcs, u32 nsrcs, size_t src_size)
+static bool br_multicast_isinc_allow(struct net_bridge_port_group *pg, void *h_addr,
+				     void *srcs, u32 nsrcs, size_t addr_size,
+				     int grec_type)
 {
 	struct net_bridge *br = pg->key.port->br;
 	struct net_bridge_group_src *ent;
@@ -1812,7 +1822,7 @@ static bool br_multicast_isinc_allow(struct net_bridge_port_group *pg,
 	memset(&src_ip, 0, sizeof(src_ip));
 	src_ip.proto = pg->key.addr.proto;
 	for (src_idx = 0; src_idx < nsrcs; src_idx++) {
-		memcpy(&src_ip.src, srcs, src_size);
+		memcpy(&src_ip.src, srcs + (src_idx * addr_size), addr_size);
 		ent = br_multicast_find_group_src(pg, &src_ip);
 		if (!ent) {
 			ent = br_multicast_new_group_src(pg, &src_ip);
@@ -1822,9 +1832,11 @@ static bool br_multicast_isinc_allow(struct net_bridge_port_group *pg,
 
 		if (ent)
 			__grp_src_mod_timer(ent, now + br_multicast_gmi(br));
-		srcs += src_size;
 	}
 
+	if (br_multicast_eht_handle(pg, h_addr, srcs, nsrcs, addr_size, grec_type))
+		changed = true;
+
 	return changed;
 }
 
@@ -1833,8 +1845,9 @@ static bool br_multicast_isinc_allow(struct net_bridge_port_group *pg,
  *                                                       Delete (A-B)
  *                                                       Group Timer=GMI
  */
-static void __grp_src_isexc_incl(struct net_bridge_port_group *pg,
-				 void *srcs, u32 nsrcs, size_t src_size)
+static void __grp_src_isexc_incl(struct net_bridge_port_group *pg, void *h_addr,
+				 void *srcs, u32 nsrcs, size_t addr_size,
+				 int grec_type)
 {
 	struct net_bridge_group_src *ent;
 	struct br_ip src_ip;
@@ -1846,7 +1859,7 @@ static void __grp_src_isexc_incl(struct net_bridge_port_group *pg,
 	memset(&src_ip, 0, sizeof(src_ip));
 	src_ip.proto = pg->key.addr.proto;
 	for (src_idx = 0; src_idx < nsrcs; src_idx++) {
-		memcpy(&src_ip.src, srcs, src_size);
+		memcpy(&src_ip.src, srcs + (src_idx * addr_size), addr_size);
 		ent = br_multicast_find_group_src(pg, &src_ip);
 		if (ent)
 			ent->flags &= ~BR_SGRP_F_DELETE;
@@ -1854,9 +1867,10 @@ static void __grp_src_isexc_incl(struct net_bridge_port_group *pg,
 			ent = br_multicast_new_group_src(pg, &src_ip);
 		if (ent)
 			br_multicast_fwd_src_handle(ent);
-		srcs += src_size;
 	}
 
+	br_multicast_eht_handle(pg, h_addr, srcs, nsrcs, addr_size, grec_type);
+
 	__grp_src_delete_marked(pg);
 }
 
@@ -1866,8 +1880,9 @@ static void __grp_src_isexc_incl(struct net_bridge_port_group *pg,
  *                                                       Delete (Y-A)
  *                                                       Group Timer=GMI
  */
-static bool __grp_src_isexc_excl(struct net_bridge_port_group *pg,
-				 void *srcs, u32 nsrcs, size_t src_size)
+static bool __grp_src_isexc_excl(struct net_bridge_port_group *pg, void *h_addr,
+				 void *srcs, u32 nsrcs, size_t addr_size,
+				 int grec_type)
 {
 	struct net_bridge *br = pg->key.port->br;
 	struct net_bridge_group_src *ent;
@@ -1882,7 +1897,7 @@ static bool __grp_src_isexc_excl(struct net_bridge_port_group *pg,
 	memset(&src_ip, 0, sizeof(src_ip));
 	src_ip.proto = pg->key.addr.proto;
 	for (src_idx = 0; src_idx < nsrcs; src_idx++) {
-		memcpy(&src_ip.src, srcs, src_size);
+		memcpy(&src_ip.src, srcs + (src_idx * addr_size), addr_size);
 		ent = br_multicast_find_group_src(pg, &src_ip);
 		if (ent) {
 			ent->flags &= ~BR_SGRP_F_DELETE;
@@ -1894,29 +1909,34 @@ static bool __grp_src_isexc_excl(struct net_bridge_port_group *pg,
 				changed = true;
 			}
 		}
-		srcs += src_size;
 	}
 
+	if (br_multicast_eht_handle(pg, h_addr, srcs, nsrcs, addr_size, grec_type))
+		changed = true;
+
 	if (__grp_src_delete_marked(pg))
 		changed = true;
 
 	return changed;
 }
 
-static bool br_multicast_isexc(struct net_bridge_port_group *pg,
-			       void *srcs, u32 nsrcs, size_t src_size)
+static bool br_multicast_isexc(struct net_bridge_port_group *pg, void *h_addr,
+			       void *srcs, u32 nsrcs, size_t addr_size,
+			       int grec_type)
 {
 	struct net_bridge *br = pg->key.port->br;
 	bool changed = false;
 
 	switch (pg->filter_mode) {
 	case MCAST_INCLUDE:
-		__grp_src_isexc_incl(pg, srcs, nsrcs, src_size);
+		__grp_src_isexc_incl(pg, h_addr, srcs, nsrcs, addr_size,
+				     grec_type);
 		br_multicast_star_g_handle_mode(pg, MCAST_EXCLUDE);
 		changed = true;
 		break;
 	case MCAST_EXCLUDE:
-		changed = __grp_src_isexc_excl(pg, srcs, nsrcs, src_size);
+		changed = __grp_src_isexc_excl(pg, h_addr, srcs, nsrcs, addr_size,
+					       grec_type);
 		break;
 	}
 
@@ -1930,8 +1950,9 @@ static bool br_multicast_isexc(struct net_bridge_port_group *pg,
  * INCLUDE (A)    TO_IN (B)     INCLUDE (A+B)            (B)=GMI
  *                                                       Send Q(G,A-B)
  */
-static bool __grp_src_toin_incl(struct net_bridge_port_group *pg,
-				void *srcs, u32 nsrcs, size_t src_size)
+static bool __grp_src_toin_incl(struct net_bridge_port_group *pg, void *h_addr,
+				void *srcs, u32 nsrcs, size_t addr_size,
+				int grec_type)
 {
 	struct net_bridge *br = pg->key.port->br;
 	u32 src_idx, to_send = pg->src_ents;
@@ -1946,7 +1967,7 @@ static bool __grp_src_toin_incl(struct net_bridge_port_group *pg,
 	memset(&src_ip, 0, sizeof(src_ip));
 	src_ip.proto = pg->key.addr.proto;
 	for (src_idx = 0; src_idx < nsrcs; src_idx++) {
-		memcpy(&src_ip.src, srcs, src_size);
+		memcpy(&src_ip.src, srcs + (src_idx * addr_size), addr_size);
 		ent = br_multicast_find_group_src(pg, &src_ip);
 		if (ent) {
 			ent->flags &= ~BR_SGRP_F_SEND;
@@ -1958,9 +1979,11 @@ static bool __grp_src_toin_incl(struct net_bridge_port_group *pg,
 		}
 		if (ent)
 			__grp_src_mod_timer(ent, now + br_multicast_gmi(br));
-		srcs += src_size;
 	}
 
+	if (br_multicast_eht_handle(pg, h_addr, srcs, nsrcs, addr_size, grec_type))
+		changed = true;
+
 	if (to_send)
 		__grp_src_query_marked_and_rexmit(pg);
 
@@ -1972,8 +1995,9 @@ static bool __grp_src_toin_incl(struct net_bridge_port_group *pg,
  *                                                       Send Q(G,X-A)
  *                                                       Send Q(G)
  */
-static bool __grp_src_toin_excl(struct net_bridge_port_group *pg,
-				void *srcs, u32 nsrcs, size_t src_size)
+static bool __grp_src_toin_excl(struct net_bridge_port_group *pg, void *h_addr,
+				void *srcs, u32 nsrcs, size_t addr_size,
+				int grec_type)
 {
 	struct net_bridge *br = pg->key.port->br;
 	u32 src_idx, to_send = pg->src_ents;
@@ -1989,7 +2013,7 @@ static bool __grp_src_toin_excl(struct net_bridge_port_group *pg,
 	memset(&src_ip, 0, sizeof(src_ip));
 	src_ip.proto = pg->key.addr.proto;
 	for (src_idx = 0; src_idx < nsrcs; src_idx++) {
-		memcpy(&src_ip.src, srcs, src_size);
+		memcpy(&src_ip.src, srcs + (src_idx * addr_size), addr_size);
 		ent = br_multicast_find_group_src(pg, &src_ip);
 		if (ent) {
 			if (timer_pending(&ent->timer)) {
@@ -2003,9 +2027,11 @@ static bool __grp_src_toin_excl(struct net_bridge_port_group *pg,
 		}
 		if (ent)
 			__grp_src_mod_timer(ent, now + br_multicast_gmi(br));
-		srcs += src_size;
 	}
 
+	if (br_multicast_eht_handle(pg, h_addr, srcs, nsrcs, addr_size, grec_type))
+		changed = true;
+
 	if (to_send)
 		__grp_src_query_marked_and_rexmit(pg);
 
@@ -2014,20 +2040,32 @@ static bool __grp_src_toin_excl(struct net_bridge_port_group *pg,
 	return changed;
 }
 
-static bool br_multicast_toin(struct net_bridge_port_group *pg,
-			      void *srcs, u32 nsrcs, size_t src_size)
+static bool br_multicast_toin(struct net_bridge_port_group *pg, void *h_addr,
+			      void *srcs, u32 nsrcs, size_t addr_size,
+			      int grec_type)
 {
 	bool changed = false;
 
 	switch (pg->filter_mode) {
 	case MCAST_INCLUDE:
-		changed = __grp_src_toin_incl(pg, srcs, nsrcs, src_size);
+		changed = __grp_src_toin_incl(pg, h_addr, srcs, nsrcs, addr_size,
+					      grec_type);
 		break;
 	case MCAST_EXCLUDE:
-		changed = __grp_src_toin_excl(pg, srcs, nsrcs, src_size);
+		changed = __grp_src_toin_excl(pg, h_addr, srcs, nsrcs, addr_size,
+					      grec_type);
 		break;
 	}
 
+	if (br_multicast_eht_should_del_pg(pg)) {
+		pg->flags |= MDB_PG_FLAGS_FAST_LEAVE;
+		br_multicast_find_del_pg(pg->key.port->br, pg);
+		/* a notification has already been sent and we shouldn't
+		 * access pg after the delete so we have to return false
+		 */
+		changed = false;
+	}
+
 	return changed;
 }
 
@@ -2037,8 +2075,9 @@ static bool br_multicast_toin(struct net_bridge_port_group *pg,
  *                                                       Send Q(G,A*B)
  *                                                       Group Timer=GMI
  */
-static void __grp_src_toex_incl(struct net_bridge_port_group *pg,
-				void *srcs, u32 nsrcs, size_t src_size)
+static void __grp_src_toex_incl(struct net_bridge_port_group *pg, void *h_addr,
+				void *srcs, u32 nsrcs, size_t addr_size,
+				int grec_type)
 {
 	struct net_bridge_group_src *ent;
 	u32 src_idx, to_send = 0;
@@ -2050,7 +2089,7 @@ static void __grp_src_toex_incl(struct net_bridge_port_group *pg,
 	memset(&src_ip, 0, sizeof(src_ip));
 	src_ip.proto = pg->key.addr.proto;
 	for (src_idx = 0; src_idx < nsrcs; src_idx++) {
-		memcpy(&src_ip.src, srcs, src_size);
+		memcpy(&src_ip.src, srcs + (src_idx * addr_size), addr_size);
 		ent = br_multicast_find_group_src(pg, &src_ip);
 		if (ent) {
 			ent->flags = (ent->flags & ~BR_SGRP_F_DELETE) |
@@ -2061,9 +2100,10 @@ static void __grp_src_toex_incl(struct net_bridge_port_group *pg,
 		}
 		if (ent)
 			br_multicast_fwd_src_handle(ent);
-		srcs += src_size;
 	}
 
+	br_multicast_eht_handle(pg, h_addr, srcs, nsrcs, addr_size, grec_type);
+
 	__grp_src_delete_marked(pg);
 	if (to_send)
 		__grp_src_query_marked_and_rexmit(pg);
@@ -2076,8 +2116,9 @@ static void __grp_src_toex_incl(struct net_bridge_port_group *pg,
  *                                                       Send Q(G,A-Y)
  *                                                       Group Timer=GMI
  */
-static bool __grp_src_toex_excl(struct net_bridge_port_group *pg,
-				void *srcs, u32 nsrcs, size_t src_size)
+static bool __grp_src_toex_excl(struct net_bridge_port_group *pg, void *h_addr,
+				void *srcs, u32 nsrcs, size_t addr_size,
+				int grec_type)
 {
 	struct net_bridge_group_src *ent;
 	u32 src_idx, to_send = 0;
@@ -2090,7 +2131,7 @@ static bool __grp_src_toex_excl(struct net_bridge_port_group *pg,
 	memset(&src_ip, 0, sizeof(src_ip));
 	src_ip.proto = pg->key.addr.proto;
 	for (src_idx = 0; src_idx < nsrcs; src_idx++) {
-		memcpy(&src_ip.src, srcs, src_size);
+		memcpy(&src_ip.src, srcs + (src_idx * addr_size), addr_size);
 		ent = br_multicast_find_group_src(pg, &src_ip);
 		if (ent) {
 			ent->flags &= ~BR_SGRP_F_DELETE;
@@ -2105,9 +2146,11 @@ static bool __grp_src_toex_excl(struct net_bridge_port_group *pg,
 			ent->flags |= BR_SGRP_F_SEND;
 			to_send++;
 		}
-		srcs += src_size;
 	}
 
+	if (br_multicast_eht_handle(pg, h_addr, srcs, nsrcs, addr_size, grec_type))
+		changed = true;
+
 	if (__grp_src_delete_marked(pg))
 		changed = true;
 	if (to_send)
@@ -2116,20 +2159,23 @@ static bool __grp_src_toex_excl(struct net_bridge_port_group *pg,
 	return changed;
 }
 
-static bool br_multicast_toex(struct net_bridge_port_group *pg,
-			      void *srcs, u32 nsrcs, size_t src_size)
+static bool br_multicast_toex(struct net_bridge_port_group *pg, void *h_addr,
+			      void *srcs, u32 nsrcs, size_t addr_size,
+			      int grec_type)
 {
 	struct net_bridge *br = pg->key.port->br;
 	bool changed = false;
 
 	switch (pg->filter_mode) {
 	case MCAST_INCLUDE:
-		__grp_src_toex_incl(pg, srcs, nsrcs, src_size);
+		__grp_src_toex_incl(pg, h_addr, srcs, nsrcs, addr_size,
+				    grec_type);
 		br_multicast_star_g_handle_mode(pg, MCAST_EXCLUDE);
 		changed = true;
 		break;
 	case MCAST_EXCLUDE:
-		changed = __grp_src_toex_excl(pg, srcs, nsrcs, src_size);
+		changed = __grp_src_toex_excl(pg, h_addr, srcs, nsrcs, addr_size,
+					      grec_type);
 		break;
 	}
 
@@ -2142,11 +2188,12 @@ static bool br_multicast_toex(struct net_bridge_port_group *pg,
 /* State          Msg type      New state                Actions
  * INCLUDE (A)    BLOCK (B)     INCLUDE (A)              Send Q(G,A*B)
  */
-static void __grp_src_block_incl(struct net_bridge_port_group *pg,
-				 void *srcs, u32 nsrcs, size_t src_size)
+static bool __grp_src_block_incl(struct net_bridge_port_group *pg, void *h_addr,
+				 void *srcs, u32 nsrcs, size_t addr_size, int grec_type)
 {
 	struct net_bridge_group_src *ent;
 	u32 src_idx, to_send = 0;
+	bool changed = false;
 	struct br_ip src_ip;
 
 	hlist_for_each_entry(ent, &pg->src_list, node)
@@ -2155,28 +2202,29 @@ static void __grp_src_block_incl(struct net_bridge_port_group *pg,
 	memset(&src_ip, 0, sizeof(src_ip));
 	src_ip.proto = pg->key.addr.proto;
 	for (src_idx = 0; src_idx < nsrcs; src_idx++) {
-		memcpy(&src_ip.src, srcs, src_size);
+		memcpy(&src_ip.src, srcs + (src_idx * addr_size), addr_size);
 		ent = br_multicast_find_group_src(pg, &src_ip);
 		if (ent) {
 			ent->flags |= BR_SGRP_F_SEND;
 			to_send++;
 		}
-		srcs += src_size;
 	}
 
+	if (br_multicast_eht_handle(pg, h_addr, srcs, nsrcs, addr_size, grec_type))
+		changed = true;
+
 	if (to_send)
 		__grp_src_query_marked_and_rexmit(pg);
 
-	if (pg->filter_mode == MCAST_INCLUDE && hlist_empty(&pg->src_list))
-		br_multicast_find_del_pg(pg->key.port->br, pg);
+	return changed;
 }
 
 /* State          Msg type      New state                Actions
  * EXCLUDE (X,Y)  BLOCK (A)     EXCLUDE (X+(A-Y),Y)      (A-X-Y)=Group Timer
  *                                                       Send Q(G,A-Y)
  */
-static bool __grp_src_block_excl(struct net_bridge_port_group *pg,
-				 void *srcs, u32 nsrcs, size_t src_size)
+static bool __grp_src_block_excl(struct net_bridge_port_group *pg, void *h_addr,
+				 void *srcs, u32 nsrcs, size_t addr_size, int grec_type)
 {
 	struct net_bridge_group_src *ent;
 	u32 src_idx, to_send = 0;
@@ -2189,7 +2237,7 @@ static bool __grp_src_block_excl(struct net_bridge_port_group *pg,
 	memset(&src_ip, 0, sizeof(src_ip));
 	src_ip.proto = pg->key.addr.proto;
 	for (src_idx = 0; src_idx < nsrcs; src_idx++) {
-		memcpy(&src_ip.src, srcs, src_size);
+		memcpy(&src_ip.src, srcs + (src_idx * addr_size), addr_size);
 		ent = br_multicast_find_group_src(pg, &src_ip);
 		if (!ent) {
 			ent = br_multicast_new_group_src(pg, &src_ip);
@@ -2202,29 +2250,44 @@ static bool __grp_src_block_excl(struct net_bridge_port_group *pg,
 			ent->flags |= BR_SGRP_F_SEND;
 			to_send++;
 		}
-		srcs += src_size;
 	}
 
+	if (br_multicast_eht_handle(pg, h_addr, srcs, nsrcs, addr_size, grec_type))
+		changed = true;
+
 	if (to_send)
 		__grp_src_query_marked_and_rexmit(pg);
 
 	return changed;
 }
 
-static bool br_multicast_block(struct net_bridge_port_group *pg,
-			       void *srcs, u32 nsrcs, size_t src_size)
+static bool br_multicast_block(struct net_bridge_port_group *pg, void *h_addr,
+			       void *srcs, u32 nsrcs, size_t addr_size, int grec_type)
 {
 	bool changed = false;
 
 	switch (pg->filter_mode) {
 	case MCAST_INCLUDE:
-		__grp_src_block_incl(pg, srcs, nsrcs, src_size);
+		changed = __grp_src_block_incl(pg, h_addr, srcs, nsrcs, addr_size,
+					       grec_type);
 		break;
 	case MCAST_EXCLUDE:
-		changed = __grp_src_block_excl(pg, srcs, nsrcs, src_size);
+		changed = __grp_src_block_excl(pg, h_addr, srcs, nsrcs, addr_size,
+					       grec_type);
 		break;
 	}
 
+	if ((pg->filter_mode == MCAST_INCLUDE && hlist_empty(&pg->src_list)) ||
+	    br_multicast_eht_should_del_pg(pg)) {
+		if (br_multicast_eht_should_del_pg(pg))
+			pg->flags |= MDB_PG_FLAGS_FAST_LEAVE;
+		br_multicast_find_del_pg(pg->key.port->br, pg);
+		/* a notification has already been sent and we shouldn't
+		 * access pg after the delete so we have to return false
+		 */
+		changed = false;
+	}
+
 	return changed;
 }
 
@@ -2257,8 +2320,8 @@ static int br_ip4_multicast_igmp3_report(struct net_bridge *br,
 	struct igmpv3_report *ih;
 	struct igmpv3_grec *grec;
 	int i, len, num, type;
+	__be32 group, *h_addr;
 	bool changed = false;
-	__be32 group;
 	int err = 0;
 	u16 nsrcs;
 
@@ -2318,32 +2381,33 @@ static int br_ip4_multicast_igmp3_report(struct net_bridge *br,
 		pg = br_multicast_find_port(mdst, port, src);
 		if (!pg || (pg->flags & MDB_PG_FLAGS_PERMANENT))
 			goto unlock_continue;
-		/* reload grec */
+		/* reload grec and host addr */
 		grec = (void *)(skb->data + len - sizeof(*grec) - (nsrcs * 4));
+		h_addr = &ip_hdr(skb)->saddr;
 		switch (type) {
 		case IGMPV3_ALLOW_NEW_SOURCES:
-			changed = br_multicast_isinc_allow(pg, grec->grec_src,
-							   nsrcs, sizeof(__be32));
+			changed = br_multicast_isinc_allow(pg, h_addr, grec->grec_src,
+							   nsrcs, sizeof(__be32), type);
 			break;
 		case IGMPV3_MODE_IS_INCLUDE:
-			changed = br_multicast_isinc_allow(pg, grec->grec_src, nsrcs,
-							   sizeof(__be32));
+			changed = br_multicast_isinc_allow(pg, h_addr, grec->grec_src,
+							   nsrcs, sizeof(__be32), type);
 			break;
 		case IGMPV3_MODE_IS_EXCLUDE:
-			changed = br_multicast_isexc(pg, grec->grec_src, nsrcs,
-						     sizeof(__be32));
+			changed = br_multicast_isexc(pg, h_addr, grec->grec_src,
+						     nsrcs, sizeof(__be32), type);
 			break;
 		case IGMPV3_CHANGE_TO_INCLUDE:
-			changed = br_multicast_toin(pg, grec->grec_src, nsrcs,
-						    sizeof(__be32));
+			changed = br_multicast_toin(pg, h_addr, grec->grec_src,
+						    nsrcs, sizeof(__be32), type);
 			break;
 		case IGMPV3_CHANGE_TO_EXCLUDE:
-			changed = br_multicast_toex(pg, grec->grec_src, nsrcs,
-						    sizeof(__be32));
+			changed = br_multicast_toex(pg, h_addr, grec->grec_src,
+						    nsrcs, sizeof(__be32), type);
 			break;
 		case IGMPV3_BLOCK_OLD_SOURCES:
-			changed = br_multicast_block(pg, grec->grec_src, nsrcs,
-						     sizeof(__be32));
+			changed = br_multicast_block(pg, h_addr, grec->grec_src,
+						     nsrcs, sizeof(__be32), type);
 			break;
 		}
 		if (changed)
@@ -2367,6 +2431,7 @@ static int br_ip6_multicast_mld2_report(struct net_bridge *br,
 	unsigned int nsrcs_offset;
 	const unsigned char *src;
 	struct icmp6hdr *icmp6h;
+	struct in6_addr *h_addr;
 	struct mld2_grec *grec;
 	unsigned int grec_len;
 	bool changed = false;
@@ -2445,31 +2510,43 @@ static int br_ip6_multicast_mld2_report(struct net_bridge *br,
 		pg = br_multicast_find_port(mdst, port, src);
 		if (!pg || (pg->flags & MDB_PG_FLAGS_PERMANENT))
 			goto unlock_continue;
+		h_addr = &ipv6_hdr(skb)->saddr;
 		switch (grec->grec_type) {
 		case MLD2_ALLOW_NEW_SOURCES:
-			changed = br_multicast_isinc_allow(pg, grec->grec_src,
-							   nsrcs,
-							   sizeof(struct in6_addr));
+			changed = br_multicast_isinc_allow(pg, h_addr,
+							   grec->grec_src, nsrcs,
+							   sizeof(struct in6_addr),
+							   grec->grec_type);
 			break;
 		case MLD2_MODE_IS_INCLUDE:
-			changed = br_multicast_isinc_allow(pg, grec->grec_src, nsrcs,
-							   sizeof(struct in6_addr));
+			changed = br_multicast_isinc_allow(pg, h_addr,
+							   grec->grec_src, nsrcs,
+							   sizeof(struct in6_addr),
+							   grec->grec_type);
 			break;
 		case MLD2_MODE_IS_EXCLUDE:
-			changed = br_multicast_isexc(pg, grec->grec_src, nsrcs,
-						     sizeof(struct in6_addr));
+			changed = br_multicast_isexc(pg, h_addr,
+						     grec->grec_src, nsrcs,
+						     sizeof(struct in6_addr),
+						     grec->grec_type);
 			break;
 		case MLD2_CHANGE_TO_INCLUDE:
-			changed = br_multicast_toin(pg, grec->grec_src, nsrcs,
-						    sizeof(struct in6_addr));
+			changed = br_multicast_toin(pg, h_addr,
+						    grec->grec_src, nsrcs,
+						    sizeof(struct in6_addr),
+						    grec->grec_type);
 			break;
 		case MLD2_CHANGE_TO_EXCLUDE:
-			changed = br_multicast_toex(pg, grec->grec_src, nsrcs,
-						    sizeof(struct in6_addr));
+			changed = br_multicast_toex(pg, h_addr,
+						    grec->grec_src, nsrcs,
+						    sizeof(struct in6_addr),
+						    grec->grec_type);
 			break;
 		case MLD2_BLOCK_OLD_SOURCES:
-			changed = br_multicast_block(pg, grec->grec_src, nsrcs,
-						     sizeof(struct in6_addr));
+			changed = br_multicast_block(pg, h_addr,
+						     grec->grec_src, nsrcs,
+						     sizeof(struct in6_addr),
+						     grec->grec_type);
 			break;
 		}
 		if (changed)
diff --git a/net/bridge/br_multicast_eht.c b/net/bridge/br_multicast_eht.c
new file mode 100644
index 000000000000..fea38b9a7268
--- /dev/null
+++ b/net/bridge/br_multicast_eht.c
@@ -0,0 +1,878 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+// Copyright (c) 2020, Nikolay Aleksandrov <nikolay@nvidia.com>
+#include <linux/err.h>
+#include <linux/export.h>
+#include <linux/if_ether.h>
+#include <linux/igmp.h>
+#include <linux/in.h>
+#include <linux/jhash.h>
+#include <linux/kernel.h>
+#include <linux/log2.h>
+#include <linux/netdevice.h>
+#include <linux/netfilter_bridge.h>
+#include <linux/random.h>
+#include <linux/rculist.h>
+#include <linux/skbuff.h>
+#include <linux/slab.h>
+#include <linux/timer.h>
+#include <linux/inetdevice.h>
+#include <linux/mroute.h>
+#include <net/ip.h>
+#include <net/switchdev.h>
+#if IS_ENABLED(CONFIG_IPV6)
+#include <linux/icmpv6.h>
+#include <net/ipv6.h>
+#include <net/mld.h>
+#include <net/ip6_checksum.h>
+#include <net/addrconf.h>
+#endif
+
+#include "br_private.h"
+#include "br_private_mcast_eht.h"
+
+static bool br_multicast_del_eht_set_entry(struct net_bridge_port_group *pg,
+					   union net_bridge_eht_addr *src_addr,
+					   union net_bridge_eht_addr *h_addr);
+static void br_multicast_create_eht_set_entry(struct net_bridge_port_group *pg,
+					      union net_bridge_eht_addr *src_addr,
+					      union net_bridge_eht_addr *h_addr,
+					      int filter_mode,
+					      bool allow_zero_src);
+
+static struct net_bridge_group_eht_host *
+br_multicast_eht_host_lookup(struct net_bridge_port_group *pg,
+			     union net_bridge_eht_addr *h_addr)
+{
+	struct rb_node *node = pg->eht_host_tree.rb_node;
+
+	while (node) {
+		struct net_bridge_group_eht_host *this;
+		int result;
+
+		this = rb_entry(node, struct net_bridge_group_eht_host,
+				rb_node);
+		result = memcmp(h_addr, &this->h_addr, sizeof(*h_addr));
+		if (result < 0)
+			node = node->rb_left;
+		else if (result > 0)
+			node = node->rb_right;
+		else
+			return this;
+	}
+
+	return NULL;
+}
+
+static int br_multicast_eht_host_filter_mode(struct net_bridge_port_group *pg,
+					     union net_bridge_eht_addr *h_addr)
+{
+	struct net_bridge_group_eht_host *eht_host;
+
+	eht_host = br_multicast_eht_host_lookup(pg, h_addr);
+	if (!eht_host)
+		return MCAST_INCLUDE;
+
+	return eht_host->filter_mode;
+}
+
+static struct net_bridge_group_eht_set_entry *
+br_multicast_eht_set_entry_lookup(struct net_bridge_group_eht_set *eht_set,
+				  union net_bridge_eht_addr *h_addr)
+{
+	struct rb_node *node = eht_set->entry_tree.rb_node;
+
+	while (node) {
+		struct net_bridge_group_eht_set_entry *this;
+		int result;
+
+		this = rb_entry(node, struct net_bridge_group_eht_set_entry,
+				rb_node);
+		result = memcmp(h_addr, &this->h_addr, sizeof(*h_addr));
+		if (result < 0)
+			node = node->rb_left;
+		else if (result > 0)
+			node = node->rb_right;
+		else
+			return this;
+	}
+
+	return NULL;
+}
+
+static struct net_bridge_group_eht_set *
+br_multicast_eht_set_lookup(struct net_bridge_port_group *pg,
+			    union net_bridge_eht_addr *src_addr)
+{
+	struct rb_node *node = pg->eht_set_tree.rb_node;
+
+	while (node) {
+		struct net_bridge_group_eht_set *this;
+		int result;
+
+		this = rb_entry(node, struct net_bridge_group_eht_set,
+				rb_node);
+		result = memcmp(src_addr, &this->src_addr, sizeof(*src_addr));
+		if (result < 0)
+			node = node->rb_left;
+		else if (result > 0)
+			node = node->rb_right;
+		else
+			return this;
+	}
+
+	return NULL;
+}
+
+static void __eht_destroy_host(struct net_bridge_group_eht_host *eht_host)
+{
+	WARN_ON(!hlist_empty(&eht_host->set_entries));
+
+	br_multicast_eht_hosts_dec(eht_host->pg);
+
+	rb_erase(&eht_host->rb_node, &eht_host->pg->eht_host_tree);
+	RB_CLEAR_NODE(&eht_host->rb_node);
+	kfree(eht_host);
+}
+
+static void br_multicast_destroy_eht_set_entry(struct net_bridge_mcast_gc *gc)
+{
+	struct net_bridge_group_eht_set_entry *set_h;
+
+	set_h = container_of(gc, struct net_bridge_group_eht_set_entry, mcast_gc);
+	WARN_ON(!RB_EMPTY_NODE(&set_h->rb_node));
+
+	del_timer_sync(&set_h->timer);
+	kfree(set_h);
+}
+
+static void br_multicast_destroy_eht_set(struct net_bridge_mcast_gc *gc)
+{
+	struct net_bridge_group_eht_set *eht_set;
+
+	eht_set = container_of(gc, struct net_bridge_group_eht_set, mcast_gc);
+	WARN_ON(!RB_EMPTY_NODE(&eht_set->rb_node));
+	WARN_ON(!RB_EMPTY_ROOT(&eht_set->entry_tree));
+
+	del_timer_sync(&eht_set->timer);
+	kfree(eht_set);
+}
+
+static void __eht_del_set_entry(struct net_bridge_group_eht_set_entry *set_h)
+{
+	struct net_bridge_group_eht_host *eht_host = set_h->h_parent;
+	union net_bridge_eht_addr zero_addr;
+
+	rb_erase(&set_h->rb_node, &set_h->eht_set->entry_tree);
+	RB_CLEAR_NODE(&set_h->rb_node);
+	hlist_del_init(&set_h->host_list);
+	memset(&zero_addr, 0, sizeof(zero_addr));
+	if (memcmp(&set_h->h_addr, &zero_addr, sizeof(zero_addr)))
+		eht_host->num_entries--;
+	hlist_add_head(&set_h->mcast_gc.gc_node, &set_h->br->mcast_gc_list);
+	queue_work(system_long_wq, &set_h->br->mcast_gc_work);
+
+	if (hlist_empty(&eht_host->set_entries))
+		__eht_destroy_host(eht_host);
+}
+
+static void br_multicast_del_eht_set(struct net_bridge_group_eht_set *eht_set)
+{
+	struct net_bridge_group_eht_set_entry *set_h;
+	struct rb_node *node;
+
+	while ((node = rb_first(&eht_set->entry_tree))) {
+		set_h = rb_entry(node, struct net_bridge_group_eht_set_entry,
+				 rb_node);
+		__eht_del_set_entry(set_h);
+	}
+
+	rb_erase(&eht_set->rb_node, &eht_set->pg->eht_set_tree);
+	RB_CLEAR_NODE(&eht_set->rb_node);
+	hlist_add_head(&eht_set->mcast_gc.gc_node, &eht_set->br->mcast_gc_list);
+	queue_work(system_long_wq, &eht_set->br->mcast_gc_work);
+}
+
+void br_multicast_eht_clean_sets(struct net_bridge_port_group *pg)
+{
+	struct net_bridge_group_eht_set *eht_set;
+	struct rb_node *node;
+
+	while ((node = rb_first(&pg->eht_set_tree))) {
+		eht_set = rb_entry(node, struct net_bridge_group_eht_set,
+				   rb_node);
+		br_multicast_del_eht_set(eht_set);
+	}
+}
+
+static void br_multicast_eht_set_entry_expired(struct timer_list *t)
+{
+	struct net_bridge_group_eht_set_entry *set_h = from_timer(set_h, t, timer);
+	struct net_bridge *br = set_h->br;
+
+	spin_lock(&br->multicast_lock);
+	if (RB_EMPTY_NODE(&set_h->rb_node) || timer_pending(&set_h->timer))
+		goto out;
+
+	br_multicast_del_eht_set_entry(set_h->eht_set->pg,
+				       &set_h->eht_set->src_addr,
+				       &set_h->h_addr);
+out:
+	spin_unlock(&br->multicast_lock);
+}
+
+static void br_multicast_eht_set_expired(struct timer_list *t)
+{
+	struct net_bridge_group_eht_set *eht_set = from_timer(eht_set, t,
+							      timer);
+	struct net_bridge *br = eht_set->br;
+
+	spin_lock(&br->multicast_lock);
+	if (RB_EMPTY_NODE(&eht_set->rb_node) || timer_pending(&eht_set->timer))
+		goto out;
+
+	br_multicast_del_eht_set(eht_set);
+out:
+	spin_unlock(&br->multicast_lock);
+}
+
+static struct net_bridge_group_eht_host *
+__eht_lookup_create_host(struct net_bridge_port_group *pg,
+			 union net_bridge_eht_addr *h_addr,
+			 unsigned char filter_mode)
+{
+	struct rb_node **link = &pg->eht_host_tree.rb_node, *parent = NULL;
+	struct net_bridge_group_eht_host *eht_host;
+
+	while (*link) {
+		struct net_bridge_group_eht_host *this;
+		int result;
+
+		this = rb_entry(*link, struct net_bridge_group_eht_host,
+				rb_node);
+		result = memcmp(h_addr, &this->h_addr, sizeof(*h_addr));
+		parent = *link;
+		if (result < 0)
+			link = &((*link)->rb_left);
+		else if (result > 0)
+			link = &((*link)->rb_right);
+		else
+			return this;
+	}
+
+	if (br_multicast_eht_hosts_over_limit(pg))
+		return NULL;
+
+	eht_host = kzalloc(sizeof(*eht_host), GFP_ATOMIC);
+	if (!eht_host)
+		return NULL;
+
+	memcpy(&eht_host->h_addr, h_addr, sizeof(*h_addr));
+	INIT_HLIST_HEAD(&eht_host->set_entries);
+	eht_host->pg = pg;
+	eht_host->filter_mode = filter_mode;
+
+	rb_link_node(&eht_host->rb_node, parent, link);
+	rb_insert_color(&eht_host->rb_node, &pg->eht_host_tree);
+
+	br_multicast_eht_hosts_inc(pg);
+
+	return eht_host;
+}
+
+static struct net_bridge_group_eht_set_entry *
+__eht_lookup_create_set_entry(struct net_bridge *br,
+			      struct net_bridge_group_eht_set *eht_set,
+			      struct net_bridge_group_eht_host *eht_host,
+			      bool allow_zero_src)
+{
+	struct rb_node **link = &eht_set->entry_tree.rb_node, *parent = NULL;
+	struct net_bridge_group_eht_set_entry *set_h;
+
+	while (*link) {
+		struct net_bridge_group_eht_set_entry *this;
+		int result;
+
+		this = rb_entry(*link, struct net_bridge_group_eht_set_entry,
+				rb_node);
+		result = memcmp(&eht_host->h_addr, &this->h_addr,
+				sizeof(union net_bridge_eht_addr));
+		parent = *link;
+		if (result < 0)
+			link = &((*link)->rb_left);
+		else if (result > 0)
+			link = &((*link)->rb_right);
+		else
+			return this;
+	}
+
+	/* always allow auto-created zero entry */
+	if (!allow_zero_src && eht_host->num_entries >= PG_SRC_ENT_LIMIT)
+		return NULL;
+
+	set_h = kzalloc(sizeof(*set_h), GFP_ATOMIC);
+	if (!set_h)
+		return NULL;
+
+	memcpy(&set_h->h_addr, &eht_host->h_addr,
+	       sizeof(union net_bridge_eht_addr));
+	set_h->mcast_gc.destroy = br_multicast_destroy_eht_set_entry;
+	set_h->eht_set = eht_set;
+	set_h->h_parent = eht_host;
+	set_h->br = br;
+	timer_setup(&set_h->timer, br_multicast_eht_set_entry_expired, 0);
+
+	hlist_add_head(&set_h->host_list, &eht_host->set_entries);
+	rb_link_node(&set_h->rb_node, parent, link);
+	rb_insert_color(&set_h->rb_node, &eht_set->entry_tree);
+	/* we must not count the auto-created zero entry otherwise we won't be
+	 * able to track the full list of PG_SRC_ENT_LIMIT entries
+	 */
+	if (!allow_zero_src)
+		eht_host->num_entries++;
+
+	return set_h;
+}
+
+static struct net_bridge_group_eht_set *
+__eht_lookup_create_set(struct net_bridge_port_group *pg,
+			union net_bridge_eht_addr *src_addr)
+{
+	struct rb_node **link = &pg->eht_set_tree.rb_node, *parent = NULL;
+	struct net_bridge_group_eht_set *eht_set;
+
+	while (*link) {
+		struct net_bridge_group_eht_set *this;
+		int result;
+
+		this = rb_entry(*link, struct net_bridge_group_eht_set,
+				rb_node);
+		result = memcmp(src_addr, &this->src_addr, sizeof(*src_addr));
+		parent = *link;
+		if (result < 0)
+			link = &((*link)->rb_left);
+		else if (result > 0)
+			link = &((*link)->rb_right);
+		else
+			return this;
+	}
+
+	eht_set = kzalloc(sizeof(*eht_set), GFP_ATOMIC);
+	if (!eht_set)
+		return NULL;
+
+	memcpy(&eht_set->src_addr, src_addr, sizeof(*src_addr));
+	eht_set->mcast_gc.destroy = br_multicast_destroy_eht_set;
+	eht_set->pg = pg;
+	eht_set->br = pg->key.port->br;
+	eht_set->entry_tree = RB_ROOT;
+	timer_setup(&eht_set->timer, br_multicast_eht_set_expired, 0);
+
+	rb_link_node(&eht_set->rb_node, parent, link);
+	rb_insert_color(&eht_set->rb_node, &pg->eht_set_tree);
+
+	return eht_set;
+}
+
+static void br_multicast_ip_src_to_eht_addr(const struct br_ip *src,
+					    union net_bridge_eht_addr *dest)
+{
+	switch (src->proto) {
+	case htons(ETH_P_IP):
+		dest->ip4 = src->src.ip4;
+		break;
+#if IS_ENABLED(CONFIG_IPV6)
+	case htons(ETH_P_IPV6):
+		memcpy(&dest->ip6, &src->src.ip6, sizeof(struct in6_addr));
+		break;
+#endif
+	}
+}
+
+static void br_eht_convert_host_filter_mode(struct net_bridge_port_group *pg,
+					    union net_bridge_eht_addr *h_addr,
+					    int filter_mode)
+{
+	struct net_bridge_group_eht_host *eht_host;
+	union net_bridge_eht_addr zero_addr;
+
+	eht_host = br_multicast_eht_host_lookup(pg, h_addr);
+	if (eht_host)
+		eht_host->filter_mode = filter_mode;
+
+	memset(&zero_addr, 0, sizeof(zero_addr));
+	switch (filter_mode) {
+	case MCAST_INCLUDE:
+		br_multicast_del_eht_set_entry(pg, &zero_addr, h_addr);
+		break;
+	case MCAST_EXCLUDE:
+		br_multicast_create_eht_set_entry(pg, &zero_addr, h_addr,
+						  MCAST_EXCLUDE,
+						  true);
+		break;
+	}
+}
+
+static void br_multicast_create_eht_set_entry(struct net_bridge_port_group *pg,
+					      union net_bridge_eht_addr *src_addr,
+					      union net_bridge_eht_addr *h_addr,
+					      int filter_mode,
+					      bool allow_zero_src)
+{
+	struct net_bridge_group_eht_set_entry *set_h;
+	struct net_bridge_group_eht_host *eht_host;
+	struct net_bridge *br = pg->key.port->br;
+	struct net_bridge_group_eht_set *eht_set;
+	union net_bridge_eht_addr zero_addr;
+
+	memset(&zero_addr, 0, sizeof(zero_addr));
+	if (!allow_zero_src && !memcmp(src_addr, &zero_addr, sizeof(zero_addr)))
+		return;
+
+	eht_set = __eht_lookup_create_set(pg, src_addr);
+	if (!eht_set)
+		return;
+
+	eht_host = __eht_lookup_create_host(pg, h_addr, filter_mode);
+	if (!eht_host)
+		goto fail_host;
+
+	set_h = __eht_lookup_create_set_entry(br, eht_set, eht_host,
+					      allow_zero_src);
+	if (!set_h)
+		goto fail_set_entry;
+
+	mod_timer(&set_h->timer, jiffies + br_multicast_gmi(br));
+	mod_timer(&eht_set->timer, jiffies + br_multicast_gmi(br));
+
+	return;
+
+fail_set_entry:
+	if (hlist_empty(&eht_host->set_entries))
+		__eht_destroy_host(eht_host);
+fail_host:
+	if (RB_EMPTY_ROOT(&eht_set->entry_tree))
+		br_multicast_del_eht_set(eht_set);
+}
+
+static bool br_multicast_del_eht_set_entry(struct net_bridge_port_group *pg,
+					   union net_bridge_eht_addr *src_addr,
+					   union net_bridge_eht_addr *h_addr)
+{
+	struct net_bridge_group_eht_set_entry *set_h;
+	struct net_bridge_group_eht_set *eht_set;
+	bool set_deleted = false;
+
+	eht_set = br_multicast_eht_set_lookup(pg, src_addr);
+	if (!eht_set)
+		goto out;
+
+	set_h = br_multicast_eht_set_entry_lookup(eht_set, h_addr);
+	if (!set_h)
+		goto out;
+
+	__eht_del_set_entry(set_h);
+
+	if (RB_EMPTY_ROOT(&eht_set->entry_tree)) {
+		br_multicast_del_eht_set(eht_set);
+		set_deleted = true;
+	}
+
+out:
+	return set_deleted;
+}
+
+static void br_multicast_del_eht_host(struct net_bridge_port_group *pg,
+				      union net_bridge_eht_addr *h_addr)
+{
+	struct net_bridge_group_eht_set_entry *set_h;
+	struct net_bridge_group_eht_host *eht_host;
+	struct hlist_node *tmp;
+
+	eht_host = br_multicast_eht_host_lookup(pg, h_addr);
+	if (!eht_host)
+		return;
+
+	hlist_for_each_entry_safe(set_h, tmp, &eht_host->set_entries, host_list)
+		br_multicast_del_eht_set_entry(set_h->eht_set->pg,
+					       &set_h->eht_set->src_addr,
+					       &set_h->h_addr);
+}
+
+static void __eht_allow_incl(struct net_bridge_port_group *pg,
+			     union net_bridge_eht_addr *h_addr,
+			     void *srcs,
+			     u32 nsrcs,
+			     size_t addr_size)
+{
+	union net_bridge_eht_addr eht_src_addr;
+	u32 src_idx;
+
+	memset(&eht_src_addr, 0, sizeof(eht_src_addr));
+	for (src_idx = 0; src_idx < nsrcs; src_idx++) {
+		memcpy(&eht_src_addr, srcs + (src_idx * addr_size), addr_size);
+		br_multicast_create_eht_set_entry(pg, &eht_src_addr, h_addr,
+						  MCAST_INCLUDE,
+						  false);
+	}
+}
+
+static bool __eht_allow_excl(struct net_bridge_port_group *pg,
+			     union net_bridge_eht_addr *h_addr,
+			     void *srcs,
+			     u32 nsrcs,
+			     size_t addr_size)
+{
+	bool changed = false, host_excl = false;
+	union net_bridge_eht_addr eht_src_addr;
+	struct net_bridge_group_src *src_ent;
+	struct br_ip src_ip;
+	u32 src_idx;
+
+	host_excl = !!(br_multicast_eht_host_filter_mode(pg, h_addr) == MCAST_EXCLUDE);
+	memset(&eht_src_addr, 0, sizeof(eht_src_addr));
+	for (src_idx = 0; src_idx < nsrcs; src_idx++) {
+		memcpy(&eht_src_addr, srcs + (src_idx * addr_size), addr_size);
+		if (!host_excl) {
+			br_multicast_create_eht_set_entry(pg, &eht_src_addr, h_addr,
+							  MCAST_INCLUDE,
+							  false);
+		} else {
+			if (!br_multicast_del_eht_set_entry(pg, &eht_src_addr,
+							    h_addr))
+				continue;
+			memcpy(&src_ip, srcs + (src_idx * addr_size), addr_size);
+			src_ent = br_multicast_find_group_src(pg, &src_ip);
+			if (!src_ent)
+				continue;
+			br_multicast_del_group_src(src_ent, true);
+			changed = true;
+		}
+	}
+
+	return changed;
+}
+
+static bool br_multicast_eht_allow(struct net_bridge_port_group *pg,
+				   union net_bridge_eht_addr *h_addr,
+				   void *srcs,
+				   u32 nsrcs,
+				   size_t addr_size)
+{
+	bool changed = false;
+
+	switch (br_multicast_eht_host_filter_mode(pg, h_addr)) {
+	case MCAST_INCLUDE:
+		__eht_allow_incl(pg, h_addr, srcs, nsrcs, addr_size);
+		break;
+	case MCAST_EXCLUDE:
+		changed = __eht_allow_excl(pg, h_addr, srcs, nsrcs, addr_size);
+		break;
+	}
+
+	return changed;
+}
+
+static bool __eht_block_incl(struct net_bridge_port_group *pg,
+			     union net_bridge_eht_addr *h_addr,
+			     void *srcs,
+			     u32 nsrcs,
+			     size_t addr_size)
+{
+	union net_bridge_eht_addr eht_src_addr;
+	struct net_bridge_group_src *src_ent;
+	bool changed = false;
+	struct br_ip src_ip;
+	u32 src_idx;
+
+	memset(&eht_src_addr, 0, sizeof(eht_src_addr));
+	memset(&src_ip, 0, sizeof(src_ip));
+	src_ip.proto = pg->key.addr.proto;
+	for (src_idx = 0; src_idx < nsrcs; src_idx++) {
+		memcpy(&eht_src_addr, srcs + (src_idx * addr_size), addr_size);
+		if (!br_multicast_del_eht_set_entry(pg, &eht_src_addr, h_addr))
+			continue;
+		memcpy(&src_ip, srcs + (src_idx * addr_size), addr_size);
+		src_ent = br_multicast_find_group_src(pg, &src_ip);
+		if (!src_ent)
+			continue;
+		br_multicast_del_group_src(src_ent, true);
+		changed = true;
+	}
+
+	return changed;
+}
+
+static bool __eht_block_excl(struct net_bridge_port_group *pg,
+			     union net_bridge_eht_addr *h_addr,
+			     void *srcs,
+			     u32 nsrcs,
+			     size_t addr_size)
+{
+	bool changed = false, host_excl = false;
+	union net_bridge_eht_addr eht_src_addr;
+	struct net_bridge_group_src *src_ent;
+	struct br_ip src_ip;
+	u32 src_idx;
+
+	host_excl = !!(br_multicast_eht_host_filter_mode(pg, h_addr) == MCAST_EXCLUDE);
+	memset(&eht_src_addr, 0, sizeof(eht_src_addr));
+	memset(&src_ip, 0, sizeof(src_ip));
+	src_ip.proto = pg->key.addr.proto;
+	for (src_idx = 0; src_idx < nsrcs; src_idx++) {
+		memcpy(&eht_src_addr, srcs + (src_idx * addr_size), addr_size);
+		if (host_excl) {
+			br_multicast_create_eht_set_entry(pg, &eht_src_addr, h_addr,
+							  MCAST_EXCLUDE,
+							  false);
+		} else {
+			if (!br_multicast_del_eht_set_entry(pg, &eht_src_addr,
+							    h_addr))
+				continue;
+			memcpy(&src_ip, srcs + (src_idx * addr_size), addr_size);
+			src_ent = br_multicast_find_group_src(pg, &src_ip);
+			if (!src_ent)
+				continue;
+			br_multicast_del_group_src(src_ent, true);
+			changed = true;
+		}
+	}
+
+	return changed;
+}
+
+static bool br_multicast_eht_block(struct net_bridge_port_group *pg,
+				   union net_bridge_eht_addr *h_addr,
+				   void *srcs,
+				   u32 nsrcs,
+				   size_t addr_size)
+{
+	bool changed = false;
+
+	switch (br_multicast_eht_host_filter_mode(pg, h_addr)) {
+	case MCAST_INCLUDE:
+		changed = __eht_block_incl(pg, h_addr, srcs, nsrcs, addr_size);
+		break;
+	case MCAST_EXCLUDE:
+		changed = __eht_block_excl(pg, h_addr, srcs, nsrcs, addr_size);
+		break;
+	}
+
+	return changed;
+}
+
+/* flush_entries is true when changing mode */
+static bool __eht_inc_exc(struct net_bridge_port_group *pg,
+			  union net_bridge_eht_addr *h_addr,
+			  void *srcs,
+			  u32 nsrcs,
+			  size_t addr_size,
+			  unsigned char filter_mode,
+			  bool to_report)
+{
+	bool changed = false, flush_entries = to_report;
+	union net_bridge_eht_addr eht_src_addr;
+	u32 src_idx;
+
+	if (br_multicast_eht_host_filter_mode(pg, h_addr) != filter_mode)
+		flush_entries = true;
+
+	memset(&eht_src_addr, 0, sizeof(eht_src_addr));
+	/* if we're changing mode del host and its entries */
+	if (flush_entries)
+		br_multicast_del_eht_host(pg, h_addr);
+	for (src_idx = 0; src_idx < nsrcs; src_idx++) {
+		memcpy(&eht_src_addr, srcs + (src_idx * addr_size), addr_size);
+		br_multicast_create_eht_set_entry(pg, &eht_src_addr, h_addr,
+						  filter_mode, false);
+	}
+	/* we can be missing sets only if we've deleted some entries */
+	if (flush_entries) {
+		struct net_bridge *br = pg->key.port->br;
+		struct net_bridge_group_eht_set *eht_set;
+		struct net_bridge_group_src *src_ent;
+		struct hlist_node *tmp;
+
+		hlist_for_each_entry_safe(src_ent, tmp, &pg->src_list, node) {
+			br_multicast_ip_src_to_eht_addr(&src_ent->addr,
+							&eht_src_addr);
+			if (!br_multicast_eht_set_lookup(pg, &eht_src_addr)) {
+				br_multicast_del_group_src(src_ent, true);
+				changed = true;
+				continue;
+			}
+			/* this is an optimization for TO_INCLUDE where we lower
+			 * the set's timeout to LMQT to catch timeout hosts:
+			 * - host A (timing out): set entries X, Y
+			 * - host B: set entry Z (new from current TO_INCLUDE)
+			 *           sends BLOCK Z after LMQT but host A's EHT
+			 *           entries still exist (unless lowered to LMQT
+			 *           so they can timeout with the S,Gs)
+			 * => we wait another LMQT, when we can just delete the
+			 *    group immediately
+			 */
+			if (!(src_ent->flags & BR_SGRP_F_SEND) ||
+			    filter_mode != MCAST_INCLUDE ||
+			    !to_report)
+				continue;
+			eht_set = br_multicast_eht_set_lookup(pg,
+							      &eht_src_addr);
+			if (!eht_set)
+				continue;
+			mod_timer(&eht_set->timer, jiffies + br_multicast_lmqt(br));
+		}
+	}
+
+	return changed;
+}
+
+static bool br_multicast_eht_inc(struct net_bridge_port_group *pg,
+				 union net_bridge_eht_addr *h_addr,
+				 void *srcs,
+				 u32 nsrcs,
+				 size_t addr_size,
+				 bool to_report)
+{
+	bool changed;
+
+	changed = __eht_inc_exc(pg, h_addr, srcs, nsrcs, addr_size,
+				MCAST_INCLUDE, to_report);
+	br_eht_convert_host_filter_mode(pg, h_addr, MCAST_INCLUDE);
+
+	return changed;
+}
+
+static bool br_multicast_eht_exc(struct net_bridge_port_group *pg,
+				 union net_bridge_eht_addr *h_addr,
+				 void *srcs,
+				 u32 nsrcs,
+				 size_t addr_size,
+				 bool to_report)
+{
+	bool changed;
+
+	changed = __eht_inc_exc(pg, h_addr, srcs, nsrcs, addr_size,
+				MCAST_EXCLUDE, to_report);
+	br_eht_convert_host_filter_mode(pg, h_addr, MCAST_EXCLUDE);
+
+	return changed;
+}
+
+static bool __eht_ip4_handle(struct net_bridge_port_group *pg,
+			     union net_bridge_eht_addr *h_addr,
+			     void *srcs,
+			     u32 nsrcs,
+			     int grec_type)
+{
+	bool changed = false, to_report = false;
+
+	switch (grec_type) {
+	case IGMPV3_ALLOW_NEW_SOURCES:
+		br_multicast_eht_allow(pg, h_addr, srcs, nsrcs, sizeof(__be32));
+		break;
+	case IGMPV3_BLOCK_OLD_SOURCES:
+		changed = br_multicast_eht_block(pg, h_addr, srcs, nsrcs,
+						 sizeof(__be32));
+		break;
+	case IGMPV3_CHANGE_TO_INCLUDE:
+		to_report = true;
+		fallthrough;
+	case IGMPV3_MODE_IS_INCLUDE:
+		changed = br_multicast_eht_inc(pg, h_addr, srcs, nsrcs,
+					       sizeof(__be32), to_report);
+		break;
+	case IGMPV3_CHANGE_TO_EXCLUDE:
+		to_report = true;
+		fallthrough;
+	case IGMPV3_MODE_IS_EXCLUDE:
+		changed = br_multicast_eht_exc(pg, h_addr, srcs, nsrcs,
+					       sizeof(__be32), to_report);
+		break;
+	}
+
+	return changed;
+}
+
+#if IS_ENABLED(CONFIG_IPV6)
+static bool __eht_ip6_handle(struct net_bridge_port_group *pg,
+			     union net_bridge_eht_addr *h_addr,
+			     void *srcs,
+			     u32 nsrcs,
+			     int grec_type)
+{
+	bool changed = false, to_report = false;
+
+	switch (grec_type) {
+	case MLD2_ALLOW_NEW_SOURCES:
+		br_multicast_eht_allow(pg, h_addr, srcs, nsrcs,
+				       sizeof(struct in6_addr));
+		break;
+	case MLD2_BLOCK_OLD_SOURCES:
+		changed = br_multicast_eht_block(pg, h_addr, srcs, nsrcs,
+						 sizeof(struct in6_addr));
+		break;
+	case MLD2_CHANGE_TO_INCLUDE:
+		to_report = true;
+		fallthrough;
+	case MLD2_MODE_IS_INCLUDE:
+		changed = br_multicast_eht_inc(pg, h_addr, srcs, nsrcs,
+					       sizeof(struct in6_addr),
+					       to_report);
+		break;
+	case MLD2_CHANGE_TO_EXCLUDE:
+		to_report = true;
+		fallthrough;
+	case MLD2_MODE_IS_EXCLUDE:
+		changed = br_multicast_eht_exc(pg, h_addr, srcs, nsrcs,
+					       sizeof(struct in6_addr),
+					       to_report);
+		break;
+	}
+
+	return changed;
+}
+#endif
+
+/* true means an entry was deleted */
+bool br_multicast_eht_handle(struct net_bridge_port_group *pg,
+			     void *h_addr,
+			     void *srcs,
+			     u32 nsrcs,
+			     size_t addr_size,
+			     int grec_type)
+{
+	bool eht_enabled = !!(pg->key.port->flags & BR_MULTICAST_FAST_LEAVE);
+	union net_bridge_eht_addr eht_host_addr;
+	bool changed = false;
+
+	if (!eht_enabled)
+		goto out;
+
+	memset(&eht_host_addr, 0, sizeof(eht_host_addr));
+	memcpy(&eht_host_addr, h_addr, addr_size);
+	if (addr_size == sizeof(__be32))
+		changed = __eht_ip4_handle(pg, &eht_host_addr, srcs, nsrcs,
+					   grec_type);
+#if IS_ENABLED(CONFIG_IPV6)
+	else
+		changed = __eht_ip6_handle(pg, &eht_host_addr, srcs, nsrcs,
+					   grec_type);
+#endif
+
+out:
+	return changed;
+}
+
+int br_multicast_eht_set_hosts_limit(struct net_bridge_port *p,
+				     u32 eht_hosts_limit)
+{
+	struct net_bridge *br = p->br;
+
+	if (!eht_hosts_limit)
+		return -EINVAL;
+
+	spin_lock_bh(&br->multicast_lock);
+	p->multicast_eht_hosts_limit = eht_hosts_limit;
+	spin_unlock_bh(&br->multicast_lock);
+
+	return 0;
+}
diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c
index 762f273802cd..bd3962da345a 100644
--- a/net/bridge/br_netlink.c
+++ b/net/bridge/br_netlink.c
@@ -18,6 +18,7 @@
 #include "br_private_stp.h"
 #include "br_private_cfm.h"
 #include "br_private_tunnel.h"
+#include "br_private_mcast_eht.h"
 
 static int __get_num_vlan_infos(struct net_bridge_vlan_group *vg,
 				u32 filter_mask)
@@ -199,6 +200,8 @@ static inline size_t br_port_info_size(void)
 		+ nla_total_size(sizeof(u16))	/* IFLA_BRPORT_GROUP_FWD_MASK */
 		+ nla_total_size(sizeof(u8))	/* IFLA_BRPORT_MRP_RING_OPEN */
 		+ nla_total_size(sizeof(u8))	/* IFLA_BRPORT_MRP_IN_OPEN */
+		+ nla_total_size(sizeof(u32))	/* IFLA_BRPORT_MCAST_EHT_HOSTS_LIMIT */
+		+ nla_total_size(sizeof(u32))	/* IFLA_BRPORT_MCAST_EHT_HOSTS_CNT */
 		+ 0;
 }
 
@@ -283,7 +286,11 @@ static int br_port_fill_attrs(struct sk_buff *skb,
 
 #ifdef CONFIG_BRIDGE_IGMP_SNOOPING
 	if (nla_put_u8(skb, IFLA_BRPORT_MULTICAST_ROUTER,
-		       p->multicast_router))
+		       p->multicast_router) ||
+	    nla_put_u32(skb, IFLA_BRPORT_MCAST_EHT_HOSTS_LIMIT,
+			p->multicast_eht_hosts_limit) ||
+	    nla_put_u32(skb, IFLA_BRPORT_MCAST_EHT_HOSTS_CNT,
+			p->multicast_eht_hosts_cnt))
 		return -EMSGSIZE;
 #endif
 
@@ -820,6 +827,7 @@ static const struct nla_policy br_port_policy[IFLA_BRPORT_MAX + 1] = {
 	[IFLA_BRPORT_NEIGH_SUPPRESS] = { .type = NLA_U8 },
 	[IFLA_BRPORT_ISOLATED]	= { .type = NLA_U8 },
 	[IFLA_BRPORT_BACKUP_PORT] = { .type = NLA_U32 },
+	[IFLA_BRPORT_MCAST_EHT_HOSTS_LIMIT] = { .type = NLA_U32 },
 };
 
 /* Change the state of the port and notify spanning tree */
@@ -955,6 +963,15 @@ static int br_setport(struct net_bridge_port *p, struct nlattr *tb[])
 		if (err)
 			return err;
 	}
+
+	if (tb[IFLA_BRPORT_MCAST_EHT_HOSTS_LIMIT]) {
+		u32 hlimit;
+
+		hlimit = nla_get_u32(tb[IFLA_BRPORT_MCAST_EHT_HOSTS_LIMIT]);
+		err = br_multicast_eht_set_hosts_limit(p, hlimit);
+		if (err)
+			return err;
+	}
 #endif
 
 	if (tb[IFLA_BRPORT_GROUP_FWD_MASK]) {
diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h
index d62c6e1af64a..d242ba668e47 100644
--- a/net/bridge/br_private.h
+++ b/net/bridge/br_private.h
@@ -252,6 +252,8 @@ struct net_bridge_port_group {
 	struct timer_list		timer;
 	struct timer_list		rexmit_timer;
 	struct hlist_node		mglist;
+	struct rb_root			eht_set_tree;
+	struct rb_root			eht_host_tree;
 
 	struct rhash_head		rhnode;
 	struct net_bridge_mcast_gc	mcast_gc;
@@ -308,6 +310,8 @@ struct net_bridge_port {
 #if IS_ENABLED(CONFIG_IPV6)
 	struct bridge_mcast_own_query	ip6_own_query;
 #endif /* IS_ENABLED(CONFIG_IPV6) */
+	u32				multicast_eht_hosts_limit;
+	u32				multicast_eht_hosts_cnt;
 	unsigned char			multicast_router;
 	struct bridge_mcast_stats	__percpu *mcast_stats;
 	struct timer_list		multicast_router_timer;
@@ -846,6 +850,10 @@ void br_multicast_star_g_handle_mode(struct net_bridge_port_group *pg,
 				     u8 filter_mode);
 void br_multicast_sg_add_exclude_ports(struct net_bridge_mdb_entry *star_mp,
 				       struct net_bridge_port_group *sg);
+struct net_bridge_group_src *
+br_multicast_find_group_src(struct net_bridge_port_group *pg, struct br_ip *ip);
+void br_multicast_del_group_src(struct net_bridge_group_src *src,
+				bool fastleave);
 
 static inline bool br_group_is_l2(const struct br_ip *group)
 {
diff --git a/net/bridge/br_private_mcast_eht.h b/net/bridge/br_private_mcast_eht.h
new file mode 100644
index 000000000000..f89049f4892c
--- /dev/null
+++ b/net/bridge/br_private_mcast_eht.h
@@ -0,0 +1,93 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later
+ * Copyright (c) 2020, Nikolay Aleksandrov <nikolay@nvidia.com>
+ */
+#ifndef _BR_PRIVATE_MCAST_EHT_H_
+#define _BR_PRIVATE_MCAST_EHT_H_
+
+#define BR_MCAST_DEFAULT_EHT_HOSTS_LIMIT 512
+
+union net_bridge_eht_addr {
+	__be32				ip4;
+#if IS_ENABLED(CONFIG_IPV6)
+	struct in6_addr			ip6;
+#endif
+};
+
+/* single host's list of set entries and filter_mode */
+struct net_bridge_group_eht_host {
+	struct rb_node			rb_node;
+
+	union net_bridge_eht_addr	h_addr;
+	struct hlist_head		set_entries;
+	unsigned int			num_entries;
+	unsigned char			filter_mode;
+	struct net_bridge_port_group	*pg;
+};
+
+/* (host, src entry) added to a per-src set and host's list */
+struct net_bridge_group_eht_set_entry {
+	struct rb_node			rb_node;
+	struct hlist_node		host_list;
+
+	union net_bridge_eht_addr	h_addr;
+	struct timer_list		timer;
+	struct net_bridge		*br;
+	struct net_bridge_group_eht_set	*eht_set;
+	struct net_bridge_group_eht_host *h_parent;
+	struct net_bridge_mcast_gc	mcast_gc;
+};
+
+/* per-src set */
+struct net_bridge_group_eht_set {
+	struct rb_node			rb_node;
+
+	union net_bridge_eht_addr	src_addr;
+	struct rb_root			entry_tree;
+	struct timer_list		timer;
+	struct net_bridge_port_group	*pg;
+	struct net_bridge		*br;
+	struct net_bridge_mcast_gc	mcast_gc;
+};
+
+#ifdef CONFIG_BRIDGE_IGMP_SNOOPING
+void br_multicast_eht_clean_sets(struct net_bridge_port_group *pg);
+bool br_multicast_eht_handle(struct net_bridge_port_group *pg,
+			     void *h_addr,
+			     void *srcs,
+			     u32 nsrcs,
+			     size_t addr_size,
+			     int grec_type);
+int br_multicast_eht_set_hosts_limit(struct net_bridge_port *p,
+				     u32 eht_hosts_limit);
+
+static inline bool
+br_multicast_eht_should_del_pg(const struct net_bridge_port_group *pg)
+{
+	return !!((pg->key.port->flags & BR_MULTICAST_FAST_LEAVE) &&
+		  RB_EMPTY_ROOT(&pg->eht_host_tree));
+}
+
+static inline bool
+br_multicast_eht_hosts_over_limit(const struct net_bridge_port_group *pg)
+{
+	const struct net_bridge_port *p = pg->key.port;
+
+	return !!(p->multicast_eht_hosts_cnt >= p->multicast_eht_hosts_limit);
+}
+
+static inline void br_multicast_eht_hosts_inc(struct net_bridge_port_group *pg)
+{
+	struct net_bridge_port *p = pg->key.port;
+
+	p->multicast_eht_hosts_cnt++;
+}
+
+static inline void br_multicast_eht_hosts_dec(struct net_bridge_port_group *pg)
+{
+	struct net_bridge_port *p = pg->key.port;
+
+	p->multicast_eht_hosts_cnt--;
+}
+#endif /* CONFIG_BRIDGE_IGMP_SNOOPING */
+
+#endif /* _BR_PRIVATE_MCAST_EHT_H_ */
diff --git a/net/bridge/br_sysfs_if.c b/net/bridge/br_sysfs_if.c
index 7a59cdddd3ce..b66305fae26b 100644
--- a/net/bridge/br_sysfs_if.c
+++ b/net/bridge/br_sysfs_if.c
@@ -16,6 +16,7 @@
 #include <linux/sched/signal.h>
 
 #include "br_private.h"
+#include "br_private_mcast_eht.h"
 
 struct brport_attribute {
 	struct attribute	attr;
@@ -245,6 +246,29 @@ static int store_multicast_router(struct net_bridge_port *p,
 static BRPORT_ATTR(multicast_router, 0644, show_multicast_router,
 		   store_multicast_router);
 
+static ssize_t show_multicast_eht_hosts_limit(struct net_bridge_port *p,
+					      char *buf)
+{
+	return sprintf(buf, "%u\n", p->multicast_eht_hosts_limit);
+}
+
+static int store_multicast_eht_hosts_limit(struct net_bridge_port *p,
+					   unsigned long v)
+{
+	return br_multicast_eht_set_hosts_limit(p, v);
+}
+static BRPORT_ATTR(multicast_eht_hosts_limit, 0644,
+		   show_multicast_eht_hosts_limit,
+		   store_multicast_eht_hosts_limit);
+
+static ssize_t show_multicast_eht_hosts_cnt(struct net_bridge_port *p,
+					    char *buf)
+{
+	return sprintf(buf, "%u\n", p->multicast_eht_hosts_cnt);
+}
+static BRPORT_ATTR(multicast_eht_hosts_cnt, 0444, show_multicast_eht_hosts_cnt,
+		   NULL);
+
 BRPORT_ATTR_FLAG(multicast_fast_leave, BR_MULTICAST_FAST_LEAVE);
 BRPORT_ATTR_FLAG(multicast_to_unicast, BR_MULTICAST_TO_UNICAST);
 #endif
@@ -274,6 +298,8 @@ static const struct brport_attribute *brport_attrs[] = {
 	&brport_attr_multicast_router,
 	&brport_attr_multicast_fast_leave,
 	&brport_attr_multicast_to_unicast,
+	&brport_attr_multicast_eht_hosts_limit,
+	&brport_attr_multicast_eht_hosts_cnt,
 #endif
 	&brport_attr_proxyarp,
 	&brport_attr_proxyarp_wifi,
diff --git a/net/can/Kconfig b/net/can/Kconfig
index 7c9958df91d3..a9ac5ffab286 100644
--- a/net/can/Kconfig
+++ b/net/can/Kconfig
@@ -4,7 +4,6 @@
 #
 
 menuconfig CAN
-	depends on NET
 	tristate "CAN bus subsystem support"
 	help
 	  Controller Area Network (CAN) is a slow (up to 1Mbit/s) serial
diff --git a/net/can/gw.c b/net/can/gw.c
index 8598d9da0e5f..ba4124805602 100644
--- a/net/can/gw.c
+++ b/net/can/gw.c
@@ -225,7 +225,7 @@ static void mod_store_ccdlc(struct canfd_frame *cf)
 	if (ccf->len <= CAN_MAX_DLEN)
 		return;
 
-	/* potentially broken values are catched in can_can_gw_rcv() */
+	/* potentially broken values are caught in can_can_gw_rcv() */
 	if (ccf->len > CAN_MAX_RAW_DLC)
 		return;
 
diff --git a/net/core/dev.c b/net/core/dev.c
index d9ce02e95992..6df3f1bcdc68 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -4084,7 +4084,7 @@ static int __dev_queue_xmit(struct sk_buff *skb, struct net_device *sb_dev)
 	skb_reset_mac_header(skb);
 
 	if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_SCHED_TSTAMP))
-		__skb_tstamp_tx(skb, NULL, skb->sk, SCM_TSTAMP_SCHED);
+		__skb_tstamp_tx(skb, NULL, NULL, skb->sk, SCM_TSTAMP_SCHED);
 
 	/* Disable soft irqs for various locks below. Also
 	 * stops preemption for RCU.
diff --git a/net/core/devlink.c b/net/core/devlink.c
index 738d4344d679..72ea79879762 100644
--- a/net/core/devlink.c
+++ b/net/core/devlink.c
@@ -8617,6 +8617,10 @@ EXPORT_SYMBOL_GPL(devlink_dpipe_table_unregister);
  *	@resource_id: resource's id
  *	@parent_resource_id: resource's parent id
  *	@size_params: size parameters
+ *
+ *	Generic resources should reuse the same names across drivers.
+ *	Please see the generic resources list at:
+ *	Documentation/networking/devlink/devlink-resource.rst
  */
 int devlink_resource_register(struct devlink *devlink,
 			      const char *resource_name,
diff --git a/net/core/pktgen.c b/net/core/pktgen.c
index 105978604ffd..3fba429f1f57 100644
--- a/net/core/pktgen.c
+++ b/net/core/pktgen.c
@@ -3464,7 +3464,7 @@ static int pktgen_thread_worker(void *arg)
 	struct pktgen_dev *pkt_dev = NULL;
 	int cpu = t->cpu;
 
-	BUG_ON(smp_processor_id() != cpu);
+	WARN_ON(smp_processor_id() != cpu);
 
 	init_waitqueue_head(&t->queue);
 	complete(&t->start_done);
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 3d6ab194d0f5..c313aaf2bce1 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -55,7 +55,7 @@
 #include <net/net_namespace.h>
 
 #define RTNL_MAX_TYPE		50
-#define RTNL_SLAVE_MAX_TYPE	36
+#define RTNL_SLAVE_MAX_TYPE	40
 
 struct rtnl_link {
 	rtnl_doit_func		doit;
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 145503d3f06b..2af12f7e170c 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -4721,6 +4721,7 @@ err:
 EXPORT_SYMBOL_GPL(skb_complete_tx_timestamp);
 
 void __skb_tstamp_tx(struct sk_buff *orig_skb,
+		     const struct sk_buff *ack_skb,
 		     struct skb_shared_hwtstamps *hwtstamps,
 		     struct sock *sk, int tstype)
 {
@@ -4743,7 +4744,8 @@ void __skb_tstamp_tx(struct sk_buff *orig_skb,
 		if ((sk->sk_tsflags & SOF_TIMESTAMPING_OPT_STATS) &&
 		    sk->sk_protocol == IPPROTO_TCP &&
 		    sk->sk_type == SOCK_STREAM) {
-			skb = tcp_get_timestamping_opt_stats(sk, orig_skb);
+			skb = tcp_get_timestamping_opt_stats(sk, orig_skb,
+							     ack_skb);
 			opt_stats = true;
 		} else
 #endif
@@ -4772,7 +4774,7 @@ EXPORT_SYMBOL_GPL(__skb_tstamp_tx);
 void skb_tstamp_tx(struct sk_buff *orig_skb,
 		   struct skb_shared_hwtstamps *hwtstamps)
 {
-	return __skb_tstamp_tx(orig_skb, hwtstamps, orig_skb->sk,
+	return __skb_tstamp_tx(orig_skb, NULL, hwtstamps, orig_skb->sk,
 			       SCM_TSTAMP_SND);
 }
 EXPORT_SYMBOL_GPL(skb_tstamp_tx);
diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c
index d86d8d11cfe4..4567de519603 100644
--- a/net/core/sysctl_net_core.c
+++ b/net/core/sysctl_net_core.c
@@ -309,7 +309,6 @@ proc_dolongvec_minmax_bpf_restricted(struct ctl_table *table, int write,
 #endif
 
 static struct ctl_table net_core_table[] = {
-#ifdef CONFIG_NET
 	{
 		.procname	= "wmem_max",
 		.data		= &sysctl_wmem_max,
@@ -507,7 +506,6 @@ static struct ctl_table net_core_table[] = {
 		.proc_handler	= set_default_qdisc
 	},
 #endif
-#endif /* CONFIG_NET */
 	{
 		.procname	= "netdev_budget",
 		.data		= &netdev_budget,
diff --git a/net/dcb/Makefile b/net/dcb/Makefile
index 3016e5a7716a..2c0fa16ee2a9 100644
--- a/net/dcb/Makefile
+++ b/net/dcb/Makefile
@@ -1,2 +1,2 @@
 # SPDX-License-Identifier: GPL-2.0-only
-obj-$(CONFIG_DCB) += dcbnl.o dcbevent.o
+obj-y += dcbnl.o dcbevent.o
diff --git a/net/dns_resolver/Kconfig b/net/dns_resolver/Kconfig
index 255df9b6e9e8..155b06163409 100644
--- a/net/dns_resolver/Kconfig
+++ b/net/dns_resolver/Kconfig
@@ -4,7 +4,7 @@
 #
 config DNS_RESOLVER
 	tristate "DNS Resolver support"
-	depends on NET && KEYS
+	depends on KEYS
 	help
 	  Saying Y here will include support for the DNS Resolver key type
 	  which can be used to make upcalls to perform DNS lookups in
diff --git a/net/ethtool/common.c b/net/ethtool/common.c
index 24036e3055a1..181220101a6e 100644
--- a/net/ethtool/common.c
+++ b/net/ethtool/common.c
@@ -68,6 +68,7 @@ const char netdev_features_strings[NETDEV_FEATURE_COUNT][ETH_GSTRING_LEN] = {
 	[NETIF_F_HW_TLS_RX_BIT] =	 "tls-hw-rx-offload",
 	[NETIF_F_GRO_FRAGLIST_BIT] =	 "rx-gro-list",
 	[NETIF_F_HW_MACSEC_BIT] =	 "macsec-hw-offload",
+	[NETIF_F_GRO_UDP_FWD_BIT] =	 "rx-udp-gro-forwarding",
 };
 
 const char
diff --git a/net/ife/Kconfig b/net/ife/Kconfig
index bcf650564db4..de36a5b91e50 100644
--- a/net/ife/Kconfig
+++ b/net/ife/Kconfig
@@ -4,7 +4,6 @@
 #
 
 menuconfig NET_IFE
-	depends on NET
 	tristate "Inter-FE based on IETF ForCES InterFE LFB"
 	default n
 	help
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 856ae516ac18..e1a17c6b473c 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -280,6 +280,12 @@
 #include <asm/ioctls.h>
 #include <net/busy_poll.h>
 
+/* Track pending CMSGs. */
+enum {
+	TCP_CMSG_INQ = 1,
+	TCP_CMSG_TS = 2
+};
+
 struct percpu_counter tcp_orphan_count;
 EXPORT_SYMBOL_GPL(tcp_orphan_count);
 
@@ -1739,6 +1745,20 @@ int tcp_set_rcvlowat(struct sock *sk, int val)
 }
 EXPORT_SYMBOL(tcp_set_rcvlowat);
 
+static void tcp_update_recv_tstamps(struct sk_buff *skb,
+				    struct scm_timestamping_internal *tss)
+{
+	if (skb->tstamp)
+		tss->ts[0] = ktime_to_timespec64(skb->tstamp);
+	else
+		tss->ts[0] = (struct timespec64) {0};
+
+	if (skb_hwtstamps(skb)->hwtstamp)
+		tss->ts[2] = ktime_to_timespec64(skb_hwtstamps(skb)->hwtstamp);
+	else
+		tss->ts[2] = (struct timespec64) {0};
+}
+
 #ifdef CONFIG_MMU
 static const struct vm_operations_struct tcp_vm_ops = {
 };
@@ -1842,13 +1862,13 @@ static int tcp_recvmsg_locked(struct sock *sk, struct msghdr *msg, size_t len,
 			      struct scm_timestamping_internal *tss,
 			      int *cmsg_flags);
 static int receive_fallback_to_copy(struct sock *sk,
-				    struct tcp_zerocopy_receive *zc, int inq)
+				    struct tcp_zerocopy_receive *zc, int inq,
+				    struct scm_timestamping_internal *tss)
 {
 	unsigned long copy_address = (unsigned long)zc->copybuf_address;
-	struct scm_timestamping_internal tss_unused;
-	int err, cmsg_flags_unused;
 	struct msghdr msg = {};
 	struct iovec iov;
+	int err;
 
 	zc->length = 0;
 	zc->recv_skip_hint = 0;
@@ -1862,7 +1882,7 @@ static int receive_fallback_to_copy(struct sock *sk,
 		return err;
 
 	err = tcp_recvmsg_locked(sk, &msg, inq, /*nonblock=*/1, /*flags=*/0,
-				 &tss_unused, &cmsg_flags_unused);
+				 tss, &zc->msg_flags);
 	if (err < 0)
 		return err;
 
@@ -1903,21 +1923,27 @@ static int tcp_copy_straggler_data(struct tcp_zerocopy_receive *zc,
 	return (__s32)copylen;
 }
 
-static int tcp_zerocopy_handle_leftover_data(struct tcp_zerocopy_receive *zc,
-					     struct sock *sk,
-					     struct sk_buff *skb,
-					     u32 *seq,
-					     s32 copybuf_len)
+static int tcp_zc_handle_leftover(struct tcp_zerocopy_receive *zc,
+				  struct sock *sk,
+				  struct sk_buff *skb,
+				  u32 *seq,
+				  s32 copybuf_len,
+				  struct scm_timestamping_internal *tss)
 {
 	u32 offset, copylen = min_t(u32, copybuf_len, zc->recv_skip_hint);
 
 	if (!copylen)
 		return 0;
 	/* skb is null if inq < PAGE_SIZE. */
-	if (skb)
+	if (skb) {
 		offset = *seq - TCP_SKB_CB(skb)->seq;
-	else
+	} else {
 		skb = tcp_recv_skb(sk, *seq, &offset);
+		if (TCP_SKB_CB(skb)->has_rxtstamp) {
+			tcp_update_recv_tstamps(skb, tss);
+			zc->msg_flags |= TCP_CMSG_TS;
+		}
+	}
 
 	zc->copybuf_len = tcp_copy_straggler_data(zc, skb, copylen, &offset,
 						  seq);
@@ -2004,9 +2030,37 @@ static int tcp_zerocopy_vm_insert_batch(struct vm_area_struct *vma,
 		err);
 }
 
+static void tcp_recv_timestamp(struct msghdr *msg, const struct sock *sk,
+			       struct scm_timestamping_internal *tss);
+static void tcp_zc_finalize_rx_tstamp(struct sock *sk,
+				      struct tcp_zerocopy_receive *zc,
+				      struct scm_timestamping_internal *tss)
+{
+	unsigned long msg_control_addr;
+	struct msghdr cmsg_dummy;
+
+	msg_control_addr = (unsigned long)zc->msg_control;
+	cmsg_dummy.msg_control = (void *)msg_control_addr;
+	cmsg_dummy.msg_controllen =
+		(__kernel_size_t)zc->msg_controllen;
+	cmsg_dummy.msg_flags = in_compat_syscall()
+		? MSG_CMSG_COMPAT : 0;
+	zc->msg_flags = 0;
+	if (zc->msg_control == msg_control_addr &&
+	    zc->msg_controllen == cmsg_dummy.msg_controllen) {
+		tcp_recv_timestamp(&cmsg_dummy, sk, tss);
+		zc->msg_control = (__u64)
+			((uintptr_t)cmsg_dummy.msg_control);
+		zc->msg_controllen =
+			(__u64)cmsg_dummy.msg_controllen;
+		zc->msg_flags = (__u32)cmsg_dummy.msg_flags;
+	}
+}
+
 #define TCP_ZEROCOPY_PAGE_BATCH_SIZE 32
 static int tcp_zerocopy_receive(struct sock *sk,
-				struct tcp_zerocopy_receive *zc)
+				struct tcp_zerocopy_receive *zc,
+				struct scm_timestamping_internal *tss)
 {
 	u32 length = 0, offset, vma_len, avail_len, copylen = 0;
 	unsigned long address = (unsigned long)zc->address;
@@ -2023,6 +2077,7 @@ static int tcp_zerocopy_receive(struct sock *sk,
 	int ret;
 
 	zc->copybuf_len = 0;
+	zc->msg_flags = 0;
 
 	if (address & (PAGE_SIZE - 1) || address != zc->address)
 		return -EINVAL;
@@ -2033,7 +2088,7 @@ static int tcp_zerocopy_receive(struct sock *sk,
 	sock_rps_record_flow(sk);
 
 	if (inq && inq <= copybuf_len)
-		return receive_fallback_to_copy(sk, zc, inq);
+		return receive_fallback_to_copy(sk, zc, inq, tss);
 
 	if (inq < PAGE_SIZE) {
 		zc->length = 0;
@@ -2078,6 +2133,11 @@ static int tcp_zerocopy_receive(struct sock *sk,
 			} else {
 				skb = tcp_recv_skb(sk, seq, &offset);
 			}
+
+			if (TCP_SKB_CB(skb)->has_rxtstamp) {
+				tcp_update_recv_tstamps(skb, tss);
+				zc->msg_flags |= TCP_CMSG_TS;
+			}
 			zc->recv_skip_hint = skb->len - offset;
 			frags = skb_advance_to_frag(skb, offset, &offset_frag);
 			if (!frags || offset_frag)
@@ -2120,8 +2180,7 @@ out:
 	mmap_read_unlock(current->mm);
 	/* Try to copy straggler data. */
 	if (!ret)
-		copylen = tcp_zerocopy_handle_leftover_data(zc, sk, skb, &seq,
-							    copybuf_len);
+		copylen = tcp_zc_handle_leftover(zc, sk, skb, &seq, copybuf_len, tss);
 
 	if (length + copylen) {
 		WRITE_ONCE(tp->copied_seq, seq);
@@ -2142,20 +2201,6 @@ out:
 }
 #endif
 
-static void tcp_update_recv_tstamps(struct sk_buff *skb,
-				    struct scm_timestamping_internal *tss)
-{
-	if (skb->tstamp)
-		tss->ts[0] = ktime_to_timespec64(skb->tstamp);
-	else
-		tss->ts[0] = (struct timespec64) {0};
-
-	if (skb_hwtstamps(skb)->hwtstamp)
-		tss->ts[2] = ktime_to_timespec64(skb_hwtstamps(skb)->hwtstamp);
-	else
-		tss->ts[2] = (struct timespec64) {0};
-}
-
 /* Similar to __sock_recv_timestamp, but does not require an skb */
 static void tcp_recv_timestamp(struct msghdr *msg, const struct sock *sk,
 			       struct scm_timestamping_internal *tss)
@@ -2272,7 +2317,7 @@ static int tcp_recvmsg_locked(struct sock *sk, struct msghdr *msg, size_t len,
 		goto out;
 
 	if (tp->recvmsg_inq)
-		*cmsg_flags = 1;
+		*cmsg_flags = TCP_CMSG_INQ;
 	timeo = sock_rcvtimeo(sk, nonblock);
 
 	/* Urgent data needs to be handled specially. */
@@ -2453,7 +2498,7 @@ skip_copy:
 
 		if (TCP_SKB_CB(skb)->has_rxtstamp) {
 			tcp_update_recv_tstamps(skb, tss);
-			*cmsg_flags |= 2;
+			*cmsg_flags |= TCP_CMSG_TS;
 		}
 
 		if (used + offset < skb->len)
@@ -2513,9 +2558,9 @@ int tcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int nonblock,
 	release_sock(sk);
 
 	if (cmsg_flags && ret >= 0) {
-		if (cmsg_flags & 2)
+		if (cmsg_flags & TCP_CMSG_TS)
 			tcp_recv_timestamp(msg, sk, &tss);
-		if (cmsg_flags & 1) {
+		if (cmsg_flags & TCP_CMSG_INQ) {
 			inq = tcp_inq_hint(sk);
 			put_cmsg(msg, SOL_TCP, TCP_CM_INQ, sizeof(inq), &inq);
 		}
@@ -3767,11 +3812,24 @@ static size_t tcp_opt_stats_get_size(void)
 		nla_total_size(sizeof(u16)) + /* TCP_NLA_TIMEOUT_REHASH */
 		nla_total_size(sizeof(u32)) + /* TCP_NLA_BYTES_NOTSENT */
 		nla_total_size_64bit(sizeof(u64)) + /* TCP_NLA_EDT */
+		nla_total_size(sizeof(u8)) + /* TCP_NLA_TTL */
 		0;
 }
 
+/* Returns TTL or hop limit of an incoming packet from skb. */
+static u8 tcp_skb_ttl_or_hop_limit(const struct sk_buff *skb)
+{
+	if (skb->protocol == htons(ETH_P_IP))
+		return ip_hdr(skb)->ttl;
+	else if (skb->protocol == htons(ETH_P_IPV6))
+		return ipv6_hdr(skb)->hop_limit;
+	else
+		return 0;
+}
+
 struct sk_buff *tcp_get_timestamping_opt_stats(const struct sock *sk,
-					       const struct sk_buff *orig_skb)
+					       const struct sk_buff *orig_skb,
+					       const struct sk_buff *ack_skb)
 {
 	const struct tcp_sock *tp = tcp_sk(sk);
 	struct sk_buff *stats;
@@ -3827,6 +3885,9 @@ struct sk_buff *tcp_get_timestamping_opt_stats(const struct sock *sk,
 		    max_t(int, 0, tp->write_seq - tp->snd_nxt));
 	nla_put_u64_64bit(stats, TCP_NLA_EDT, orig_skb->skb_mstamp_ns,
 			  TCP_NLA_PAD);
+	if (ack_skb)
+		nla_put_u8(stats, TCP_NLA_TTL,
+			   tcp_skb_ttl_or_hop_limit(ack_skb));
 
 	return stats;
 }
@@ -4083,6 +4144,7 @@ static int do_tcp_getsockopt(struct sock *sk, int level,
 	}
 #ifdef CONFIG_MMU
 	case TCP_ZEROCOPY_RECEIVE: {
+		struct scm_timestamping_internal tss;
 		struct tcp_zerocopy_receive zc = {};
 		int err;
 
@@ -4098,11 +4160,18 @@ static int do_tcp_getsockopt(struct sock *sk, int level,
 		if (copy_from_user(&zc, optval, len))
 			return -EFAULT;
 		lock_sock(sk);
-		err = tcp_zerocopy_receive(sk, &zc);
+		err = tcp_zerocopy_receive(sk, &zc, &tss);
 		release_sock(sk);
-		if (len >= offsetofend(struct tcp_zerocopy_receive, err))
-			goto zerocopy_rcv_sk_err;
+		if (len >= offsetofend(struct tcp_zerocopy_receive, msg_flags))
+			goto zerocopy_rcv_cmsg;
 		switch (len) {
+		case offsetofend(struct tcp_zerocopy_receive, msg_flags):
+			goto zerocopy_rcv_cmsg;
+		case offsetofend(struct tcp_zerocopy_receive, msg_controllen):
+		case offsetofend(struct tcp_zerocopy_receive, msg_control):
+		case offsetofend(struct tcp_zerocopy_receive, flags):
+		case offsetofend(struct tcp_zerocopy_receive, copybuf_len):
+		case offsetofend(struct tcp_zerocopy_receive, copybuf_address):
 		case offsetofend(struct tcp_zerocopy_receive, err):
 			goto zerocopy_rcv_sk_err;
 		case offsetofend(struct tcp_zerocopy_receive, inq):
@@ -4111,6 +4180,11 @@ static int do_tcp_getsockopt(struct sock *sk, int level,
 		default:
 			goto zerocopy_rcv_out;
 		}
+zerocopy_rcv_cmsg:
+		if (zc.msg_flags & TCP_CMSG_TS)
+			tcp_zc_finalize_rx_tstamp(sk, &zc, &tss);
+		else
+			zc.msg_flags = 0;
 zerocopy_rcv_sk_err:
 		if (!err)
 			zc.err = sock_error(sk);
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index a7dfca0a38cd..d4f66aba9fd8 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -3145,7 +3145,7 @@ static u32 tcp_tso_acked(struct sock *sk, struct sk_buff *skb)
 }
 
 static void tcp_ack_tstamp(struct sock *sk, struct sk_buff *skb,
-			   u32 prior_snd_una)
+			   const struct sk_buff *ack_skb, u32 prior_snd_una)
 {
 	const struct skb_shared_info *shinfo;
 
@@ -3157,7 +3157,7 @@ static void tcp_ack_tstamp(struct sock *sk, struct sk_buff *skb,
 	if (!before(shinfo->tskey, prior_snd_una) &&
 	    before(shinfo->tskey, tcp_sk(sk)->snd_una)) {
 		tcp_skb_tsorted_save(skb) {
-			__skb_tstamp_tx(skb, NULL, sk, SCM_TSTAMP_ACK);
+			__skb_tstamp_tx(skb, ack_skb, NULL, sk, SCM_TSTAMP_ACK);
 		} tcp_skb_tsorted_restore(skb);
 	}
 }
@@ -3166,8 +3166,8 @@ static void tcp_ack_tstamp(struct sock *sk, struct sk_buff *skb,
  * is before the ack sequence we can discard it as it's confirmed to have
  * arrived at the other end.
  */
-static int tcp_clean_rtx_queue(struct sock *sk, u32 prior_fack,
-			       u32 prior_snd_una,
+static int tcp_clean_rtx_queue(struct sock *sk, const struct sk_buff *ack_skb,
+			       u32 prior_fack, u32 prior_snd_una,
 			       struct tcp_sacktag_state *sack, bool ece_ack)
 {
 	const struct inet_connection_sock *icsk = inet_csk(sk);
@@ -3256,7 +3256,7 @@ static int tcp_clean_rtx_queue(struct sock *sk, u32 prior_fack,
 		if (!fully_acked)
 			break;
 
-		tcp_ack_tstamp(sk, skb, prior_snd_una);
+		tcp_ack_tstamp(sk, skb, ack_skb, prior_snd_una);
 
 		next = skb_rb_next(skb);
 		if (unlikely(skb == tp->retransmit_skb_hint))
@@ -3274,7 +3274,7 @@ static int tcp_clean_rtx_queue(struct sock *sk, u32 prior_fack,
 		tp->snd_up = tp->snd_una;
 
 	if (skb) {
-		tcp_ack_tstamp(sk, skb, prior_snd_una);
+		tcp_ack_tstamp(sk, skb, ack_skb, prior_snd_una);
 		if (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED)
 			flag |= FLAG_SACK_RENEGING;
 	}
@@ -3809,8 +3809,8 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
 		goto no_queue;
 
 	/* See if we can take anything off of the retransmit queue. */
-	flag |= tcp_clean_rtx_queue(sk, prior_fack, prior_snd_una, &sack_state,
-				    flag & FLAG_ECE);
+	flag |= tcp_clean_rtx_queue(sk, skb, prior_fack, prior_snd_una,
+				    &sack_state, flag & FLAG_ECE);
 
 	tcp_rack_update_reo_wnd(sk, &rs);
 
diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c
index 1168d186cc43..41249705d9e9 100644
--- a/net/ipv4/udp_offload.c
+++ b/net/ipv4/udp_offload.c
@@ -460,7 +460,8 @@ struct sk_buff *udp_gro_receive(struct list_head *head, struct sk_buff *skb,
 	if (skb->dev->features & NETIF_F_GRO_FRAGLIST)
 		NAPI_GRO_CB(skb)->is_flist = sk ? !udp_sk(sk)->gro_enabled: 1;
 
-	if ((sk && udp_sk(sk)->gro_enabled) || NAPI_GRO_CB(skb)->is_flist) {
+	if ((!sk && (skb->dev->features & NETIF_F_GRO_UDP_FWD)) ||
+	    (sk && udp_sk(sk)->gro_enabled) || NAPI_GRO_CB(skb)->is_flist) {
 		pp = call_gro_receive(udp_gro_receive_segment, head, skb);
 		return pp;
 	}
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 9edc5bb2d531..f2337fb756ac 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -205,6 +205,7 @@ static struct ipv6_devconf ipv6_devconf __read_mostly = {
 	.max_desync_factor	= MAX_DESYNC_FACTOR,
 	.max_addresses		= IPV6_MAX_ADDRESSES,
 	.accept_ra_defrtr	= 1,
+	.ra_defrtr_metric	= IP6_RT_PRIO_USER,
 	.accept_ra_from_local	= 0,
 	.accept_ra_min_hop_limit= 1,
 	.accept_ra_pinfo	= 1,
@@ -260,6 +261,7 @@ static struct ipv6_devconf ipv6_devconf_dflt __read_mostly = {
 	.max_desync_factor	= MAX_DESYNC_FACTOR,
 	.max_addresses		= IPV6_MAX_ADDRESSES,
 	.accept_ra_defrtr	= 1,
+	.ra_defrtr_metric	= IP6_RT_PRIO_USER,
 	.accept_ra_from_local	= 0,
 	.accept_ra_min_hop_limit= 1,
 	.accept_ra_pinfo	= 1,
@@ -5476,6 +5478,7 @@ static inline void ipv6_store_devconf(struct ipv6_devconf *cnf,
 	array[DEVCONF_MAX_DESYNC_FACTOR] = cnf->max_desync_factor;
 	array[DEVCONF_MAX_ADDRESSES] = cnf->max_addresses;
 	array[DEVCONF_ACCEPT_RA_DEFRTR] = cnf->accept_ra_defrtr;
+	array[DEVCONF_RA_DEFRTR_METRIC] = cnf->ra_defrtr_metric;
 	array[DEVCONF_ACCEPT_RA_MIN_HOP_LIMIT] = cnf->accept_ra_min_hop_limit;
 	array[DEVCONF_ACCEPT_RA_PINFO] = cnf->accept_ra_pinfo;
 #ifdef CONFIG_IPV6_ROUTER_PREF
@@ -6669,6 +6672,14 @@ static const struct ctl_table addrconf_sysctl[] = {
 		.proc_handler	= proc_dointvec,
 	},
 	{
+		.procname	= "ra_defrtr_metric",
+		.data		= &ipv6_devconf.ra_defrtr_metric,
+		.maxlen		= sizeof(u32),
+		.mode		= 0644,
+		.proc_handler	= proc_douintvec_minmax,
+		.extra1		= (void *)SYSCTL_ONE,
+	},
+	{
 		.procname	= "accept_ra_min_hop_limit",
 		.data		= &ipv6_devconf.accept_ra_min_hop_limit,
 		.maxlen		= sizeof(int),
diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c
index 76717478f173..c467c6419893 100644
--- a/net/ipv6/ndisc.c
+++ b/net/ipv6/ndisc.c
@@ -1173,6 +1173,7 @@ static void ndisc_router_discovery(struct sk_buff *skb)
 	struct neighbour *neigh = NULL;
 	struct inet6_dev *in6_dev;
 	struct fib6_info *rt = NULL;
+	u32 defrtr_usr_metric;
 	struct net *net;
 	int lifetime;
 	struct ndisc_options ndopts;
@@ -1303,18 +1304,21 @@ static void ndisc_router_discovery(struct sk_buff *skb)
 			return;
 		}
 	}
-	if (rt && lifetime == 0) {
+	/* Set default route metric as specified by user */
+	defrtr_usr_metric = in6_dev->cnf.ra_defrtr_metric;
+	/* delete the route if lifetime is 0 or if metric needs change */
+	if (rt && (lifetime == 0 || rt->fib6_metric != defrtr_usr_metric)) {
 		ip6_del_rt(net, rt, false);
 		rt = NULL;
 	}
 
-	ND_PRINTK(3, info, "RA: rt: %p  lifetime: %d, for dev: %s\n",
-		  rt, lifetime, skb->dev->name);
+	ND_PRINTK(3, info, "RA: rt: %p  lifetime: %d, metric: %d, for dev: %s\n",
+		  rt, lifetime, defrtr_usr_metric, skb->dev->name);
 	if (!rt && lifetime) {
 		ND_PRINTK(3, info, "RA: adding default router\n");
 
 		rt = rt6_add_dflt_router(net, &ipv6_hdr(skb)->saddr,
-					 skb->dev, pref);
+					 skb->dev, pref, defrtr_usr_metric);
 		if (!rt) {
 			ND_PRINTK(0, err,
 				  "RA: %s failed to add default route\n",
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 188e114b29b4..41d8f801b75f 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -4252,11 +4252,12 @@ struct fib6_info *rt6_get_dflt_router(struct net *net,
 struct fib6_info *rt6_add_dflt_router(struct net *net,
 				     const struct in6_addr *gwaddr,
 				     struct net_device *dev,
-				     unsigned int pref)
+				     unsigned int pref,
+				     u32 defrtr_usr_metric)
 {
 	struct fib6_config cfg = {
 		.fc_table	= l3mdev_fib_table(dev) ? : RT6_TABLE_DFLT,
-		.fc_metric	= IP6_RT_PRIO_USER,
+		.fc_metric	= defrtr_usr_metric,
 		.fc_ifindex	= dev->ifindex,
 		.fc_flags	= RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT |
 				  RTF_UP | RTF_EXPIRES | RTF_PREF(pref),
diff --git a/net/l3mdev/Makefile b/net/l3mdev/Makefile
index 59755a9e2f9b..9e7da0acc58c 100644
--- a/net/l3mdev/Makefile
+++ b/net/l3mdev/Makefile
@@ -3,4 +3,4 @@
 # Makefile for the L3 device API
 #
 
-obj-$(CONFIG_NET_L3_MASTER_DEV) += l3mdev.o
+obj-y += l3mdev.o
diff --git a/net/llc/Kconfig b/net/llc/Kconfig
index b0e646ac47eb..7f79f5e134f9 100644
--- a/net/llc/Kconfig
+++ b/net/llc/Kconfig
@@ -1,7 +1,6 @@
 # SPDX-License-Identifier: GPL-2.0-only
 config LLC
 	tristate
-	depends on NET
 
 config LLC2
 	tristate "ANSI/IEEE 802.2 LLC type 2 Support"
diff --git a/net/mptcp/pm_netlink.c b/net/mptcp/pm_netlink.c
index 9b1f6298bbdb..83976b9ee99b 100644
--- a/net/mptcp/pm_netlink.c
+++ b/net/mptcp/pm_netlink.c
@@ -60,15 +60,20 @@ static bool addresses_equal(const struct mptcp_addr_info *a,
 {
 	bool addr_equals = false;
 
-	if (a->family != b->family)
-		return false;
-
-	if (a->family == AF_INET)
-		addr_equals = a->addr.s_addr == b->addr.s_addr;
+	if (a->family == b->family) {
+		if (a->family == AF_INET)
+			addr_equals = a->addr.s_addr == b->addr.s_addr;
 #if IS_ENABLED(CONFIG_MPTCP_IPV6)
-	else
-		addr_equals = !ipv6_addr_cmp(&a->addr6, &b->addr6);
+		else
+			addr_equals = !ipv6_addr_cmp(&a->addr6, &b->addr6);
+	} else if (a->family == AF_INET) {
+		if (ipv6_addr_v4mapped(&b->addr6))
+			addr_equals = a->addr.s_addr == b->addr6.s6_addr32[3];
+	} else if (b->family == AF_INET) {
+		if (ipv6_addr_v4mapped(&a->addr6))
+			addr_equals = a->addr6.s6_addr32[3] == b->addr.s_addr;
 #endif
+	}
 
 	if (!addr_equals)
 		return false;
@@ -137,6 +142,7 @@ select_local_address(const struct pm_nl_pernet *pernet,
 		     struct mptcp_sock *msk)
 {
 	struct mptcp_pm_addr_entry *entry, *ret = NULL;
+	struct sock *sk = (struct sock *)msk;
 
 	rcu_read_lock();
 	__mptcp_flush_join_list(msk);
@@ -144,11 +150,20 @@ select_local_address(const struct pm_nl_pernet *pernet,
 		if (!(entry->addr.flags & MPTCP_PM_ADDR_FLAG_SUBFLOW))
 			continue;
 
+		if (entry->addr.family != sk->sk_family) {
+#if IS_ENABLED(CONFIG_MPTCP_IPV6)
+			if ((entry->addr.family == AF_INET &&
+			     !ipv6_addr_v4mapped(&sk->sk_v6_daddr)) ||
+			    (sk->sk_family == AF_INET &&
+			     !ipv6_addr_v4mapped(&entry->addr.addr6)))
+#endif
+				continue;
+		}
+
 		/* avoid any address already in use by subflows and
 		 * pending join
 		 */
-		if (entry->addr.family == ((struct sock *)msk)->sk_family &&
-		    !lookup_subflow_by_saddr(&msk->conn_list, &entry->addr)) {
+		if (!lookup_subflow_by_saddr(&msk->conn_list, &entry->addr)) {
 			ret = entry;
 			break;
 		}
@@ -310,7 +325,6 @@ void mptcp_pm_free_anno_list(struct mptcp_sock *msk)
 
 static void mptcp_pm_create_subflow_or_signal_addr(struct mptcp_sock *msk)
 {
-	struct mptcp_addr_info remote = { 0 };
 	struct sock *sk = (struct sock *)msk;
 	struct mptcp_pm_addr_entry *local;
 	struct pm_nl_pernet *pernet;
@@ -344,13 +358,14 @@ static void mptcp_pm_create_subflow_or_signal_addr(struct mptcp_sock *msk)
 	/* check if should create a new subflow */
 	if (msk->pm.local_addr_used < msk->pm.local_addr_max &&
 	    msk->pm.subflows < msk->pm.subflows_max) {
-		remote_address((struct sock_common *)sk, &remote);
-
 		local = select_local_address(pernet, msk);
 		if (local) {
+			struct mptcp_addr_info remote = { 0 };
+
 			msk->pm.local_addr_used++;
 			msk->pm.subflows++;
 			check_work_pending(msk);
+			remote_address((struct sock_common *)sk, &remote);
 			spin_unlock_bh(&msk->pm.lock);
 			__mptcp_subflow_connect(sk, &local->addr, &remote);
 			spin_lock_bh(&msk->pm.lock);
diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
index f998a077c7dd..a033bf9c26ee 100644
--- a/net/mptcp/protocol.c
+++ b/net/mptcp/protocol.c
@@ -45,6 +45,9 @@ static struct percpu_counter mptcp_sockets_allocated;
 static void __mptcp_destroy_sock(struct sock *sk);
 static void __mptcp_check_send_data_fin(struct sock *sk);
 
+DEFINE_PER_CPU(struct mptcp_delegated_action, mptcp_delegated_actions);
+static struct net_device mptcp_napi_dev;
+
 /* If msk has an initial subflow socket, and the MP_CAPABLE handshake has not
  * completed yet or has failed, return the subflow socket.
  * Otherwise return NULL.
@@ -114,11 +117,7 @@ static int __mptcp_socket_create(struct mptcp_sock *msk)
 	list_add(&subflow->node, &msk->conn_list);
 	sock_hold(ssock->sk);
 	subflow->request_mptcp = 1;
-
-	/* accept() will wait on first subflow sk_wq, and we always wakes up
-	 * via msk->sk_socket
-	 */
-	RCU_INIT_POINTER(msk->first->sk_wq, &sk->sk_socket->wq);
+	mptcp_sock_graft(msk->first, sk->sk_socket);
 
 	return 0;
 }
@@ -734,10 +733,14 @@ wake:
 
 void __mptcp_flush_join_list(struct mptcp_sock *msk)
 {
+	struct mptcp_subflow_context *subflow;
+
 	if (likely(list_empty(&msk->join_list)))
 		return;
 
 	spin_lock_bh(&msk->join_list_lock);
+	list_for_each_entry(subflow, &msk->join_list, node)
+		mptcp_propagate_sndbuf((struct sock *)msk, mptcp_subflow_tcp_sock(subflow));
 	list_splice_tail_init(&msk->join_list, &msk->conn_list);
 	spin_unlock_bh(&msk->join_list_lock);
 }
@@ -1037,13 +1040,6 @@ out:
 			__mptcp_update_wmem(sk);
 			sk_mem_reclaim_partial(sk);
 		}
-
-		if (sk_stream_is_writeable(sk)) {
-			/* pairs with memory barrier in mptcp_poll */
-			smp_mb();
-			if (test_and_clear_bit(MPTCP_NOSPACE, &msk->flags))
-				sk_stream_write_space(sk);
-		}
 	}
 
 	if (snd_una == READ_ONCE(msk->snd_nxt)) {
@@ -1362,8 +1358,7 @@ struct subflow_send_info {
 	u64 ratio;
 };
 
-static struct sock *mptcp_subflow_get_send(struct mptcp_sock *msk,
-					   u32 *sndbuf)
+static struct sock *mptcp_subflow_get_send(struct mptcp_sock *msk)
 {
 	struct subflow_send_info send_info[2];
 	struct mptcp_subflow_context *subflow;
@@ -1374,24 +1369,17 @@ static struct sock *mptcp_subflow_get_send(struct mptcp_sock *msk,
 
 	sock_owned_by_me((struct sock *)msk);
 
-	*sndbuf = 0;
 	if (__mptcp_check_fallback(msk)) {
 		if (!msk->first)
 			return NULL;
-		*sndbuf = msk->first->sk_sndbuf;
 		return sk_stream_memory_free(msk->first) ? msk->first : NULL;
 	}
 
 	/* re-use last subflow, if the burst allow that */
 	if (msk->last_snd && msk->snd_burst > 0 &&
 	    sk_stream_memory_free(msk->last_snd) &&
-	    mptcp_subflow_active(mptcp_subflow_ctx(msk->last_snd))) {
-		mptcp_for_each_subflow(msk, subflow) {
-			ssk =  mptcp_subflow_tcp_sock(subflow);
-			*sndbuf = max(tcp_sk(ssk)->snd_wnd, *sndbuf);
-		}
+	    mptcp_subflow_active(mptcp_subflow_ctx(msk->last_snd)))
 		return msk->last_snd;
-	}
 
 	/* pick the subflow with the lower wmem/wspace ratio */
 	for (i = 0; i < 2; ++i) {
@@ -1404,8 +1392,7 @@ static struct sock *mptcp_subflow_get_send(struct mptcp_sock *msk,
 			continue;
 
 		nr_active += !subflow->backup;
-		*sndbuf = max(tcp_sk(ssk)->snd_wnd, *sndbuf);
-		if (!sk_stream_memory_free(subflow->tcp_sock))
+		if (!sk_stream_memory_free(subflow->tcp_sock) || !tcp_sk(ssk)->snd_wnd)
 			continue;
 
 		pace = READ_ONCE(ssk->sk_pacing_rate);
@@ -1431,9 +1418,10 @@ static struct sock *mptcp_subflow_get_send(struct mptcp_sock *msk,
 	if (send_info[0].ssk) {
 		msk->last_snd = send_info[0].ssk;
 		msk->snd_burst = min_t(int, MPTCP_SEND_BURST_SIZE,
-				       sk_stream_wspace(msk->last_snd));
+				       tcp_sk(msk->last_snd)->snd_wnd);
 		return msk->last_snd;
 	}
+
 	return NULL;
 }
 
@@ -1454,7 +1442,6 @@ static void mptcp_push_pending(struct sock *sk, unsigned int flags)
 	};
 	struct mptcp_data_frag *dfrag;
 	int len, copied = 0;
-	u32 sndbuf;
 
 	while ((dfrag = mptcp_send_head(sk))) {
 		info.sent = dfrag->already_sent;
@@ -1465,12 +1452,7 @@ static void mptcp_push_pending(struct sock *sk, unsigned int flags)
 
 			prev_ssk = ssk;
 			__mptcp_flush_join_list(msk);
-			ssk = mptcp_subflow_get_send(msk, &sndbuf);
-
-			/* do auto tuning */
-			if (!(sk->sk_userlocks & SOCK_SNDBUF_LOCK) &&
-			    sndbuf > READ_ONCE(sk->sk_sndbuf))
-				WRITE_ONCE(sk->sk_sndbuf, sndbuf);
+			ssk = mptcp_subflow_get_send(msk);
 
 			/* try to keep the subflow socket lock across
 			 * consecutive xmit on the same socket
@@ -1527,7 +1509,9 @@ static void __mptcp_subflow_push_pending(struct sock *sk, struct sock *ssk)
 	struct mptcp_sock *msk = mptcp_sk(sk);
 	struct mptcp_sendmsg_info info;
 	struct mptcp_data_frag *dfrag;
+	struct sock *xmit_ssk;
 	int len, copied = 0;
+	bool first = true;
 
 	info.flags = 0;
 	while ((dfrag = mptcp_send_head(sk))) {
@@ -1537,10 +1521,17 @@ static void __mptcp_subflow_push_pending(struct sock *sk, struct sock *ssk)
 		while (len > 0) {
 			int ret = 0;
 
-			/* do auto tuning */
-			if (!(sk->sk_userlocks & SOCK_SNDBUF_LOCK) &&
-			    ssk->sk_sndbuf > READ_ONCE(sk->sk_sndbuf))
-				WRITE_ONCE(sk->sk_sndbuf, ssk->sk_sndbuf);
+			/* the caller already invoked the packet scheduler,
+			 * check for a different subflow usage only after
+			 * spooling the first chunk of data
+			 */
+			xmit_ssk = first ? ssk : mptcp_subflow_get_send(mptcp_sk(sk));
+			if (!xmit_ssk)
+				goto out;
+			if (xmit_ssk != ssk) {
+				mptcp_subflow_delegate(mptcp_subflow_ctx(xmit_ssk));
+				goto out;
+			}
 
 			if (unlikely(mptcp_must_reclaim_memory(sk, ssk))) {
 				__mptcp_update_wmem(sk);
@@ -1560,6 +1551,7 @@ static void __mptcp_subflow_push_pending(struct sock *sk, struct sock *ssk)
 			msk->tx_pending_data -= ret;
 			copied += ret;
 			len -= ret;
+			first = false;
 		}
 		WRITE_ONCE(msk->first_pending, mptcp_send_next(sk));
 	}
@@ -1579,6 +1571,15 @@ out:
 	}
 }
 
+static void mptcp_set_nospace(struct sock *sk)
+{
+	/* enable autotune */
+	set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
+
+	/* will be cleared on avail space */
+	set_bit(MPTCP_NOSPACE, &mptcp_sk(sk)->flags);
+}
+
 static int mptcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
 {
 	struct mptcp_sock *msk = mptcp_sk(sk);
@@ -1680,7 +1681,7 @@ static int mptcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
 		continue;
 
 wait_for_memory:
-		set_bit(MPTCP_NOSPACE, &msk->flags);
+		mptcp_set_nospace(sk);
 		mptcp_push_pending(sk, msg->msg_flags);
 		ret = sk_stream_wait_memory(sk, &timeo);
 		if (ret)
@@ -2116,9 +2117,6 @@ static struct sock *mptcp_subflow_get_retrans(const struct mptcp_sock *msk)
 void __mptcp_close_ssk(struct sock *sk, struct sock *ssk,
 		       struct mptcp_subflow_context *subflow)
 {
-	bool dispose_socket = false;
-	struct socket *sock;
-
 	list_del(&subflow->node);
 
 	lock_sock_nested(ssk, SINGLE_DEPTH_NESTING);
@@ -2126,11 +2124,8 @@ void __mptcp_close_ssk(struct sock *sk, struct sock *ssk,
 	/* if we are invoked by the msk cleanup code, the subflow is
 	 * already orphaned
 	 */
-	sock = ssk->sk_socket;
-	if (sock) {
-		dispose_socket = sock != sk->sk_socket;
+	if (ssk->sk_socket)
 		sock_orphan(ssk);
-	}
 
 	subflow->disposable = 1;
 
@@ -2148,8 +2143,6 @@ void __mptcp_close_ssk(struct sock *sk, struct sock *ssk,
 		__sock_put(ssk);
 	}
 	release_sock(ssk);
-	if (dispose_socket)
-		iput(SOCK_INODE(sock));
 
 	sock_put(ssk);
 }
@@ -2536,6 +2529,12 @@ static void __mptcp_destroy_sock(struct sock *sk)
 
 	pr_debug("msk=%p", msk);
 
+	/* dispose the ancillatory tcp socket, if any */
+	if (msk->subflow) {
+		iput(SOCK_INODE(msk->subflow));
+		msk->subflow = NULL;
+	}
+
 	/* be sure to always acquire the join list lock, to sync vs
 	 * mptcp_finish_join().
 	 */
@@ -2586,20 +2585,10 @@ cleanup:
 	inet_csk(sk)->icsk_mtup.probe_timestamp = tcp_jiffies32;
 	list_for_each_entry(subflow, &mptcp_sk(sk)->conn_list, node) {
 		struct sock *ssk = mptcp_subflow_tcp_sock(subflow);
-		bool slow, dispose_socket;
-		struct socket *sock;
+		bool slow = lock_sock_fast(ssk);
 
-		slow = lock_sock_fast(ssk);
-		sock = ssk->sk_socket;
-		dispose_socket = sock && sock != sk->sk_socket;
 		sock_orphan(ssk);
 		unlock_sock_fast(ssk, slow);
-
-		/* for the outgoing subflows we additionally need to free
-		 * the associated socket
-		 */
-		if (dispose_socket)
-			iput(SOCK_INODE(sock));
 	}
 	sock_orphan(sk);
 
@@ -2928,10 +2917,16 @@ void __mptcp_check_push(struct sock *sk, struct sock *ssk)
 	if (!mptcp_send_head(sk))
 		return;
 
-	if (!sock_owned_by_user(sk))
-		__mptcp_subflow_push_pending(sk, ssk);
-	else
+	if (!sock_owned_by_user(sk)) {
+		struct sock *xmit_ssk = mptcp_subflow_get_send(mptcp_sk(sk));
+
+		if (xmit_ssk == ssk)
+			__mptcp_subflow_push_pending(sk, ssk);
+		else if (xmit_ssk)
+			mptcp_subflow_delegate(mptcp_subflow_ctx(xmit_ssk));
+	} else {
 		set_bit(MPTCP_PUSH_PENDING, &mptcp_sk(sk)->flags);
+	}
 }
 
 #define MPTCP_DEFERRED_ALL (TCPF_WRITE_TIMER_DEFERRED)
@@ -2979,6 +2974,20 @@ static void mptcp_release_cb(struct sock *sk)
 	}
 }
 
+void mptcp_subflow_process_delegated(struct sock *ssk)
+{
+	struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk);
+	struct sock *sk = subflow->conn;
+
+	mptcp_data_lock(sk);
+	if (!sock_owned_by_user(sk))
+		__mptcp_subflow_push_pending(sk, ssk);
+	else
+		set_bit(MPTCP_PUSH_PENDING, &mptcp_sk(sk)->flags);
+	mptcp_data_unlock(sk);
+	mptcp_subflow_delegated_done(subflow);
+}
+
 static int mptcp_hash(struct sock *sk)
 {
 	/* should never be called,
@@ -3041,7 +3050,7 @@ void mptcp_finish_connect(struct sock *ssk)
 	mptcp_rcv_space_init(msk, ssk);
 }
 
-static void mptcp_sock_graft(struct sock *sk, struct socket *parent)
+void mptcp_sock_graft(struct sock *sk, struct socket *parent)
 {
 	write_lock_bh(&sk->sk_callback_lock);
 	rcu_assign_pointer(sk->sk_wq, &parent->wq);
@@ -3284,6 +3293,7 @@ static int mptcp_stream_accept(struct socket *sock, struct socket *newsock,
 
 		mptcp_copy_inaddrs(newsk, msk->first);
 		mptcp_rcv_space_init(msk, msk->first);
+		mptcp_propagate_sndbuf(newsk, msk->first);
 
 		/* set ssk->sk_socket of accept()ed flows to mptcp socket.
 		 * This is needed so NOSPACE flag can be set from tcp stack.
@@ -3324,7 +3334,7 @@ static __poll_t mptcp_check_writeable(struct mptcp_sock *msk)
 	if (sk_stream_is_writeable(sk))
 		return EPOLLOUT | EPOLLWRNORM;
 
-	set_bit(MPTCP_NOSPACE, &msk->flags);
+	mptcp_set_nospace(sk);
 	smp_mb__after_atomic(); /* msk->flags is changed by write_space cb */
 	if (sk_stream_is_writeable(sk))
 		return EPOLLOUT | EPOLLWRNORM;
@@ -3388,13 +3398,58 @@ static struct inet_protosw mptcp_protosw = {
 	.flags		= INET_PROTOSW_ICSK,
 };
 
+static int mptcp_napi_poll(struct napi_struct *napi, int budget)
+{
+	struct mptcp_delegated_action *delegated;
+	struct mptcp_subflow_context *subflow;
+	int work_done = 0;
+
+	delegated = container_of(napi, struct mptcp_delegated_action, napi);
+	while ((subflow = mptcp_subflow_delegated_next(delegated)) != NULL) {
+		struct sock *ssk = mptcp_subflow_tcp_sock(subflow);
+
+		bh_lock_sock_nested(ssk);
+		if (!sock_owned_by_user(ssk) &&
+		    mptcp_subflow_has_delegated_action(subflow))
+			mptcp_subflow_process_delegated(ssk);
+		/* ... elsewhere tcp_release_cb_override already processed
+		 * the action or will do at next release_sock().
+		 * In both case must dequeue the subflow here - on the same
+		 * CPU that scheduled it.
+		 */
+		bh_unlock_sock(ssk);
+		sock_put(ssk);
+
+		if (++work_done == budget)
+			return budget;
+	}
+
+	/* always provide a 0 'work_done' argument, so that napi_complete_done
+	 * will not try accessing the NULL napi->dev ptr
+	 */
+	napi_complete_done(napi, 0);
+	return work_done;
+}
+
 void __init mptcp_proto_init(void)
 {
+	struct mptcp_delegated_action *delegated;
+	int cpu;
+
 	mptcp_prot.h.hashinfo = tcp_prot.h.hashinfo;
 
 	if (percpu_counter_init(&mptcp_sockets_allocated, 0, GFP_KERNEL))
 		panic("Failed to allocate MPTCP pcpu counter\n");
 
+	init_dummy_netdev(&mptcp_napi_dev);
+	for_each_possible_cpu(cpu) {
+		delegated = per_cpu_ptr(&mptcp_delegated_actions, cpu);
+		INIT_LIST_HEAD(&delegated->head);
+		netif_tx_napi_add(&mptcp_napi_dev, &delegated->napi, mptcp_napi_poll,
+				  NAPI_POLL_WEIGHT);
+		napi_enable(&delegated->napi);
+	}
+
 	mptcp_subflow_init();
 	mptcp_pm_init();
 	mptcp_token_init();
diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h
index d6400ad2d615..1460705aaad0 100644
--- a/net/mptcp/protocol.h
+++ b/net/mptcp/protocol.h
@@ -378,6 +378,15 @@ enum mptcp_data_avail {
 	MPTCP_SUBFLOW_OOO_DATA
 };
 
+struct mptcp_delegated_action {
+	struct napi_struct napi;
+	struct list_head head;
+};
+
+DECLARE_PER_CPU(struct mptcp_delegated_action, mptcp_delegated_actions);
+
+#define MPTCP_DELEGATE_SEND		0
+
 /* MPTCP subflow context */
 struct mptcp_subflow_context {
 	struct	list_head node;/* conn_list of subflows */
@@ -415,6 +424,9 @@ struct mptcp_subflow_context {
 	u8	local_id;
 	u8	remote_id;
 
+	long	delegated_status;
+	struct	list_head delegated_node;   /* link into delegated_action, protected by local BH */
+
 	struct	sock *tcp_sock;	    /* tcp sk backpointer */
 	struct	sock *conn;	    /* parent mptcp_sock */
 	const	struct inet_connection_sock_af_ops *icsk_af_ops;
@@ -463,6 +475,61 @@ static inline void mptcp_add_pending_subflow(struct mptcp_sock *msk,
 	spin_unlock_bh(&msk->join_list_lock);
 }
 
+void mptcp_subflow_process_delegated(struct sock *ssk);
+
+static inline void mptcp_subflow_delegate(struct mptcp_subflow_context *subflow)
+{
+	struct mptcp_delegated_action *delegated;
+	bool schedule;
+
+	/* The implied barrier pairs with mptcp_subflow_delegated_done(), and
+	 * ensures the below list check sees list updates done prior to status
+	 * bit changes
+	 */
+	if (!test_and_set_bit(MPTCP_DELEGATE_SEND, &subflow->delegated_status)) {
+		/* still on delegated list from previous scheduling */
+		if (!list_empty(&subflow->delegated_node))
+			return;
+
+		/* the caller held the subflow bh socket lock */
+		lockdep_assert_in_softirq();
+
+		delegated = this_cpu_ptr(&mptcp_delegated_actions);
+		schedule = list_empty(&delegated->head);
+		list_add_tail(&subflow->delegated_node, &delegated->head);
+		sock_hold(mptcp_subflow_tcp_sock(subflow));
+		if (schedule)
+			napi_schedule(&delegated->napi);
+	}
+}
+
+static inline struct mptcp_subflow_context *
+mptcp_subflow_delegated_next(struct mptcp_delegated_action *delegated)
+{
+	struct mptcp_subflow_context *ret;
+
+	if (list_empty(&delegated->head))
+		return NULL;
+
+	ret = list_first_entry(&delegated->head, struct mptcp_subflow_context, delegated_node);
+	list_del_init(&ret->delegated_node);
+	return ret;
+}
+
+static inline bool mptcp_subflow_has_delegated_action(const struct mptcp_subflow_context *subflow)
+{
+	return test_bit(MPTCP_DELEGATE_SEND, &subflow->delegated_status);
+}
+
+static inline void mptcp_subflow_delegated_done(struct mptcp_subflow_context *subflow)
+{
+	/* pairs with mptcp_subflow_delegate, ensures delegate_node is updated before
+	 * touching the status bit
+	 */
+	smp_wmb();
+	clear_bit(MPTCP_DELEGATE_SEND, &subflow->delegated_status);
+}
+
 int mptcp_is_enabled(struct net *net);
 unsigned int mptcp_get_add_addr_timeout(struct net *net);
 void mptcp_subflow_fully_established(struct mptcp_subflow_context *subflow,
@@ -473,6 +540,7 @@ void mptcp_subflow_shutdown(struct sock *sk, struct sock *ssk, int how);
 void __mptcp_close_ssk(struct sock *sk, struct sock *ssk,
 		       struct mptcp_subflow_context *subflow);
 void mptcp_subflow_reset(struct sock *ssk);
+void mptcp_sock_graft(struct sock *sk, struct socket *parent);
 
 /* called with sk socket lock held */
 int __mptcp_subflow_connect(struct sock *sk, const struct mptcp_addr_info *loc,
@@ -521,6 +589,25 @@ static inline bool mptcp_data_fin_enabled(const struct mptcp_sock *msk)
 	       READ_ONCE(msk->write_seq) == READ_ONCE(msk->snd_nxt);
 }
 
+static inline bool mptcp_propagate_sndbuf(struct sock *sk, struct sock *ssk)
+{
+	if ((sk->sk_userlocks & SOCK_SNDBUF_LOCK) || ssk->sk_sndbuf <= READ_ONCE(sk->sk_sndbuf))
+		return false;
+
+	WRITE_ONCE(sk->sk_sndbuf, ssk->sk_sndbuf);
+	return true;
+}
+
+static inline void mptcp_write_space(struct sock *sk)
+{
+	if (sk_stream_is_writeable(sk)) {
+		/* pairs with memory barrier in mptcp_poll */
+		smp_mb();
+		if (test_and_clear_bit(MPTCP_NOSPACE, &mptcp_sk(sk)->flags))
+			sk_stream_write_space(sk);
+	}
+}
+
 void mptcp_destroy_common(struct mptcp_sock *msk);
 
 void __init mptcp_token_init(void);
diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c
index 278cbe3e539e..586156281e5a 100644
--- a/net/mptcp/subflow.c
+++ b/net/mptcp/subflow.c
@@ -18,12 +18,15 @@
 #include <net/tcp.h>
 #if IS_ENABLED(CONFIG_MPTCP_IPV6)
 #include <net/ip6_route.h>
+#include <net/transp_v6.h>
 #endif
 #include <net/mptcp.h>
 #include <uapi/linux/mptcp.h>
 #include "protocol.h"
 #include "mib.h"
 
+static void mptcp_subflow_ops_undo_override(struct sock *ssk);
+
 static void SUBFLOW_REQ_INC_STATS(struct request_sock *req,
 				  enum linux_mptcp_mib_field field)
 {
@@ -343,6 +346,7 @@ static void subflow_finish_connect(struct sock *sk, const struct sk_buff *skb)
 	if (subflow->conn_finished)
 		return;
 
+	mptcp_propagate_sndbuf(parent, sk);
 	subflow->rel_write_seq = 1;
 	subflow->conn_finished = 1;
 	subflow->ssn_offset = TCP_SKB_CB(skb)->seq;
@@ -427,6 +431,7 @@ drop:
 static struct tcp_request_sock_ops subflow_request_sock_ipv6_ops;
 static struct inet_connection_sock_af_ops subflow_v6_specific;
 static struct inet_connection_sock_af_ops subflow_v6m_specific;
+static struct proto tcpv6_prot_override;
 
 static int subflow_v6_conn_request(struct sock *sk, struct sk_buff *skb)
 {
@@ -508,6 +513,8 @@ static void subflow_ulp_fallback(struct sock *sk,
 	icsk->icsk_ulp_ops = NULL;
 	rcu_assign_pointer(icsk->icsk_ulp_data, NULL);
 	tcp_sk(sk)->is_mptcp = 0;
+
+	mptcp_subflow_ops_undo_override(sk);
 }
 
 static void subflow_drop_ctx(struct sock *ssk)
@@ -681,6 +688,7 @@ dispose_child:
 }
 
 static struct inet_connection_sock_af_ops subflow_specific;
+static struct proto tcp_prot_override;
 
 enum mapping_status {
 	MAPPING_OK,
@@ -1040,7 +1048,10 @@ static void subflow_data_ready(struct sock *sk)
 
 static void subflow_write_space(struct sock *ssk)
 {
-	/* we take action in __mptcp_clean_una() */
+	struct sock *sk = mptcp_subflow_ctx(ssk)->conn;
+
+	mptcp_propagate_sndbuf(sk, ssk);
+	mptcp_write_space(sk);
 }
 
 static struct inet_connection_sock_af_ops *
@@ -1074,21 +1085,31 @@ void mptcpv6_handle_mapped(struct sock *sk, bool mapped)
 #endif
 
 static void mptcp_info2sockaddr(const struct mptcp_addr_info *info,
-				struct sockaddr_storage *addr)
+				struct sockaddr_storage *addr,
+				unsigned short family)
 {
 	memset(addr, 0, sizeof(*addr));
-	addr->ss_family = info->family;
+	addr->ss_family = family;
 	if (addr->ss_family == AF_INET) {
 		struct sockaddr_in *in_addr = (struct sockaddr_in *)addr;
 
-		in_addr->sin_addr = info->addr;
+		if (info->family == AF_INET)
+			in_addr->sin_addr = info->addr;
+#if IS_ENABLED(CONFIG_MPTCP_IPV6)
+		else if (ipv6_addr_v4mapped(&info->addr6))
+			in_addr->sin_addr.s_addr = info->addr6.s6_addr32[3];
+#endif
 		in_addr->sin_port = info->port;
 	}
 #if IS_ENABLED(CONFIG_MPTCP_IPV6)
 	else if (addr->ss_family == AF_INET6) {
 		struct sockaddr_in6 *in6_addr = (struct sockaddr_in6 *)addr;
 
-		in6_addr->sin6_addr = info->addr6;
+		if (info->family == AF_INET)
+			ipv6_addr_set_v4mapped(info->addr.s_addr,
+					       &in6_addr->sin6_addr);
+		else
+			in6_addr->sin6_addr = info->addr6;
 		in6_addr->sin6_port = info->port;
 	}
 #endif
@@ -1132,11 +1153,11 @@ int __mptcp_subflow_connect(struct sock *sk, const struct mptcp_addr_info *loc,
 	subflow->remote_key = msk->remote_key;
 	subflow->local_key = msk->local_key;
 	subflow->token = msk->token;
-	mptcp_info2sockaddr(loc, &addr);
+	mptcp_info2sockaddr(loc, &addr, ssk->sk_family);
 
 	addrlen = sizeof(struct sockaddr_in);
 #if IS_ENABLED(CONFIG_MPTCP_IPV6)
-	if (loc->family == AF_INET6)
+	if (addr.ss_family == AF_INET6)
 		addrlen = sizeof(struct sockaddr_in6);
 #endif
 	ssk->sk_bound_dev_if = loc->ifindex;
@@ -1152,13 +1173,16 @@ int __mptcp_subflow_connect(struct sock *sk, const struct mptcp_addr_info *loc,
 	subflow->remote_id = remote_id;
 	subflow->request_join = 1;
 	subflow->request_bkup = !!(loc->flags & MPTCP_PM_ADDR_FLAG_BACKUP);
-	mptcp_info2sockaddr(remote, &addr);
+	mptcp_info2sockaddr(remote, &addr, ssk->sk_family);
 
 	mptcp_add_pending_subflow(msk, subflow);
 	err = kernel_connect(sf, (struct sockaddr *)&addr, addrlen, O_NONBLOCK);
 	if (err && err != -EINPROGRESS)
 		goto failed_unlink;
 
+	/* discard the subflow socket */
+	mptcp_sock_graft(ssk, sk->sk_socket);
+	iput(SOCK_INODE(sf));
 	return err;
 
 failed_unlink:
@@ -1196,6 +1220,25 @@ static void mptcp_attach_cgroup(struct sock *parent, struct sock *child)
 #endif /* CONFIG_SOCK_CGROUP_DATA */
 }
 
+static void mptcp_subflow_ops_override(struct sock *ssk)
+{
+#if IS_ENABLED(CONFIG_MPTCP_IPV6)
+	if (ssk->sk_prot == &tcpv6_prot)
+		ssk->sk_prot = &tcpv6_prot_override;
+	else
+#endif
+		ssk->sk_prot = &tcp_prot_override;
+}
+
+static void mptcp_subflow_ops_undo_override(struct sock *ssk)
+{
+#if IS_ENABLED(CONFIG_MPTCP_IPV6)
+	if (ssk->sk_prot == &tcpv6_prot_override)
+		ssk->sk_prot = &tcpv6_prot;
+	else
+#endif
+		ssk->sk_prot = &tcp_prot;
+}
 int mptcp_subflow_create_socket(struct sock *sk, struct socket **new_sock)
 {
 	struct mptcp_subflow_context *subflow;
@@ -1251,6 +1294,7 @@ int mptcp_subflow_create_socket(struct sock *sk, struct socket **new_sock)
 	*new_sock = sf;
 	sock_hold(sk);
 	subflow->conn = sk;
+	mptcp_subflow_ops_override(sf->sk);
 
 	return 0;
 }
@@ -1267,6 +1311,7 @@ static struct mptcp_subflow_context *subflow_create_ctx(struct sock *sk,
 
 	rcu_assign_pointer(icsk->icsk_ulp_data, ctx);
 	INIT_LIST_HEAD(&ctx->node);
+	INIT_LIST_HEAD(&ctx->delegated_node);
 
 	pr_debug("subflow=%p", ctx);
 
@@ -1299,6 +1344,7 @@ static void subflow_state_change(struct sock *sk)
 	__subflow_state_change(sk);
 
 	if (subflow_simultaneous_connect(sk)) {
+		mptcp_propagate_sndbuf(parent, sk);
 		mptcp_do_fallback(sk);
 		mptcp_rcv_space_init(mptcp_sk(parent), sk);
 		pr_fallback(mptcp_sk(parent));
@@ -1378,6 +1424,7 @@ static void subflow_ulp_release(struct sock *ssk)
 		sock_put(sk);
 	}
 
+	mptcp_subflow_ops_undo_override(ssk);
 	if (release)
 		kfree_rcu(ctx, rcu);
 }
@@ -1431,6 +1478,16 @@ static void subflow_ulp_clone(const struct request_sock *req,
 	}
 }
 
+static void tcp_release_cb_override(struct sock *ssk)
+{
+	struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk);
+
+	if (mptcp_subflow_has_delegated_action(subflow))
+		mptcp_subflow_process_delegated(ssk);
+
+	tcp_release_cb(ssk);
+}
+
 static struct tcp_ulp_ops subflow_ulp_ops __read_mostly = {
 	.name		= "mptcp",
 	.owner		= THIS_MODULE,
@@ -1471,6 +1528,9 @@ void __init mptcp_subflow_init(void)
 	subflow_specific.syn_recv_sock = subflow_syn_recv_sock;
 	subflow_specific.sk_rx_dst_set = subflow_finish_connect;
 
+	tcp_prot_override = tcp_prot;
+	tcp_prot_override.release_cb = tcp_release_cb_override;
+
 #if IS_ENABLED(CONFIG_MPTCP_IPV6)
 	subflow_request_sock_ipv6_ops = tcp_request_sock_ipv6_ops;
 	subflow_request_sock_ipv6_ops.route_req = subflow_v6_route_req;
@@ -1486,6 +1546,9 @@ void __init mptcp_subflow_init(void)
 	subflow_v6m_specific.net_header_len = ipv4_specific.net_header_len;
 	subflow_v6m_specific.mtu_reduced = ipv4_specific.mtu_reduced;
 	subflow_v6m_specific.net_frag_header_len = 0;
+
+	tcpv6_prot_override = tcpv6_prot;
+	tcpv6_prot_override.release_cb = tcp_release_cb_override;
 #endif
 
 	mptcp_diag_subflow_init(&subflow_ulp_ops);
diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig
index 49fbef0d99be..1a92063c73a4 100644
--- a/net/netfilter/Kconfig
+++ b/net/netfilter/Kconfig
@@ -1,6 +1,6 @@
 # SPDX-License-Identifier: GPL-2.0-only
 menu "Core Netfilter Configuration"
-	depends on NET && INET && NETFILTER
+	depends on INET && NETFILTER
 
 config NETFILTER_INGRESS
 	bool "Netfilter ingress support"
diff --git a/net/netfilter/ipvs/Kconfig b/net/netfilter/ipvs/Kconfig
index eb0e329f9b8d..c39a1e35c104 100644
--- a/net/netfilter/ipvs/Kconfig
+++ b/net/netfilter/ipvs/Kconfig
@@ -4,7 +4,7 @@
 #
 menuconfig IP_VS
 	tristate "IP virtual server support"
-	depends on NET && INET && NETFILTER
+	depends on INET && NETFILTER
 	depends on (NF_CONNTRACK || NF_CONNTRACK=n)
 	help
 	  IP Virtual Server support will let you build a high-performance
diff --git a/net/nfc/Kconfig b/net/nfc/Kconfig
index 96b91674dd37..466a0279b93e 100644
--- a/net/nfc/Kconfig
+++ b/net/nfc/Kconfig
@@ -4,7 +4,6 @@
 #
 
 menuconfig NFC
-	depends on NET
 	depends on RFKILL || !RFKILL
 	tristate "NFC subsystem support"
 	default n
diff --git a/net/psample/Kconfig b/net/psample/Kconfig
index 028f514a9c60..be0b839209ba 100644
--- a/net/psample/Kconfig
+++ b/net/psample/Kconfig
@@ -4,7 +4,6 @@
 #
 
 menuconfig PSAMPLE
-	depends on NET
 	tristate "Packet-sampling netlink channel"
 	default n
 	help
diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
index 6fe4e5cc807c..e2e4353db8a7 100644
--- a/net/sched/sch_api.c
+++ b/net/sched/sch_api.c
@@ -1866,7 +1866,8 @@ static int tclass_notify(struct net *net, struct sk_buff *oskb,
 static int tclass_del_notify(struct net *net,
 			     const struct Qdisc_class_ops *cops,
 			     struct sk_buff *oskb, struct nlmsghdr *n,
-			     struct Qdisc *q, unsigned long cl)
+			     struct Qdisc *q, unsigned long cl,
+			     struct netlink_ext_ack *extack)
 {
 	u32 portid = oskb ? NETLINK_CB(oskb).portid : 0;
 	struct sk_buff *skb;
@@ -1885,7 +1886,7 @@ static int tclass_del_notify(struct net *net,
 		return -EINVAL;
 	}
 
-	err = cops->delete(q, cl);
+	err = cops->delete(q, cl, extack);
 	if (err) {
 		kfree_skb(skb);
 		return err;
@@ -2088,7 +2089,7 @@ static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n,
 				goto out;
 			break;
 		case RTM_DELTCLASS:
-			err = tclass_del_notify(net, cops, skb, n, q, cl);
+			err = tclass_del_notify(net, cops, skb, n, q, cl, extack);
 			/* Unbind the class with flilters with 0 */
 			tc_bind_tclass(q, portid, clid, 0);
 			goto out;
diff --git a/net/sched/sch_atm.c b/net/sched/sch_atm.c
index 007bd2d9f1ff..d0c9a57398fc 100644
--- a/net/sched/sch_atm.c
+++ b/net/sched/sch_atm.c
@@ -320,7 +320,8 @@ err_out:
 	return error;
 }
 
-static int atm_tc_delete(struct Qdisc *sch, unsigned long arg)
+static int atm_tc_delete(struct Qdisc *sch, unsigned long arg,
+			 struct netlink_ext_ack *extack)
 {
 	struct atm_qdisc_data *p = qdisc_priv(sch);
 	struct atm_flow_data *flow = (struct atm_flow_data *)arg;
diff --git a/net/sched/sch_cbq.c b/net/sched/sch_cbq.c
index 53d45e029c36..320b3d31fa97 100644
--- a/net/sched/sch_cbq.c
+++ b/net/sched/sch_cbq.c
@@ -1675,7 +1675,8 @@ failure:
 	return err;
 }
 
-static int cbq_delete(struct Qdisc *sch, unsigned long arg)
+static int cbq_delete(struct Qdisc *sch, unsigned long arg,
+		      struct netlink_ext_ack *extack)
 {
 	struct cbq_sched_data *q = qdisc_priv(sch);
 	struct cbq_class *cl = (struct cbq_class *)arg;
diff --git a/net/sched/sch_drr.c b/net/sched/sch_drr.c
index dde564670ad8..fc1e47069593 100644
--- a/net/sched/sch_drr.c
+++ b/net/sched/sch_drr.c
@@ -146,7 +146,8 @@ static void drr_destroy_class(struct Qdisc *sch, struct drr_class *cl)
 	kfree(cl);
 }
 
-static int drr_delete_class(struct Qdisc *sch, unsigned long arg)
+static int drr_delete_class(struct Qdisc *sch, unsigned long arg,
+			    struct netlink_ext_ack *extack)
 {
 	struct drr_sched *q = qdisc_priv(sch);
 	struct drr_class *cl = (struct drr_class *)arg;
diff --git a/net/sched/sch_dsmark.c b/net/sched/sch_dsmark.c
index 2b88710994d7..cd2748e2d4a2 100644
--- a/net/sched/sch_dsmark.c
+++ b/net/sched/sch_dsmark.c
@@ -150,7 +150,8 @@ errout:
 	return err;
 }
 
-static int dsmark_delete(struct Qdisc *sch, unsigned long arg)
+static int dsmark_delete(struct Qdisc *sch, unsigned long arg,
+			 struct netlink_ext_ack *extack)
 {
 	struct dsmark_qdisc_data *p = qdisc_priv(sch);
 
diff --git a/net/sched/sch_hfsc.c b/net/sched/sch_hfsc.c
index d1902fca9844..bf0034c66e35 100644
--- a/net/sched/sch_hfsc.c
+++ b/net/sched/sch_hfsc.c
@@ -1090,7 +1090,8 @@ hfsc_destroy_class(struct Qdisc *sch, struct hfsc_class *cl)
 }
 
 static int
-hfsc_delete_class(struct Qdisc *sch, unsigned long arg)
+hfsc_delete_class(struct Qdisc *sch, unsigned long arg,
+		  struct netlink_ext_ack *extack)
 {
 	struct hfsc_sched *q = qdisc_priv(sch);
 	struct hfsc_class *cl = (struct hfsc_class *)arg;
diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c
index cd70dbcbd72f..dff3adf5a915 100644
--- a/net/sched/sch_htb.c
+++ b/net/sched/sch_htb.c
@@ -114,6 +114,7 @@ struct htb_class {
 	 * Written often fields
 	 */
 	struct gnet_stats_basic_packed bstats;
+	struct gnet_stats_basic_packed bstats_bias;
 	struct tc_htb_xstats	xstats;	/* our special stats */
 
 	/* token bucket parameters */
@@ -174,6 +175,11 @@ struct htb_sched {
 	int			row_mask[TC_HTB_MAXDEPTH];
 
 	struct htb_level	hlevel[TC_HTB_MAXDEPTH];
+
+	struct Qdisc		**direct_qdiscs;
+	unsigned int            num_direct_qdiscs;
+
+	bool			offload;
 };
 
 /* find class in global hash table using given handle */
@@ -957,7 +963,7 @@ static void htb_reset(struct Qdisc *sch)
 			if (cl->level)
 				memset(&cl->inner, 0, sizeof(cl->inner));
 			else {
-				if (cl->leaf.q)
+				if (cl->leaf.q && !q->offload)
 					qdisc_reset(cl->leaf.q);
 			}
 			cl->prio_activity = 0;
@@ -980,6 +986,7 @@ static const struct nla_policy htb_policy[TCA_HTB_MAX + 1] = {
 	[TCA_HTB_DIRECT_QLEN] = { .type = NLA_U32 },
 	[TCA_HTB_RATE64] = { .type = NLA_U64 },
 	[TCA_HTB_CEIL64] = { .type = NLA_U64 },
+	[TCA_HTB_OFFLOAD] = { .type = NLA_FLAG },
 };
 
 static void htb_work_func(struct work_struct *work)
@@ -992,12 +999,27 @@ static void htb_work_func(struct work_struct *work)
 	rcu_read_unlock();
 }
 
+static void htb_set_lockdep_class_child(struct Qdisc *q)
+{
+	static struct lock_class_key child_key;
+
+	lockdep_set_class(qdisc_lock(q), &child_key);
+}
+
+static int htb_offload(struct net_device *dev, struct tc_htb_qopt_offload *opt)
+{
+	return dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_QDISC_HTB, opt);
+}
+
 static int htb_init(struct Qdisc *sch, struct nlattr *opt,
 		    struct netlink_ext_ack *extack)
 {
+	struct net_device *dev = qdisc_dev(sch);
+	struct tc_htb_qopt_offload offload_opt;
 	struct htb_sched *q = qdisc_priv(sch);
 	struct nlattr *tb[TCA_HTB_MAX + 1];
 	struct tc_htb_glob *gopt;
+	unsigned int ntx;
 	int err;
 
 	qdisc_watchdog_init(&q->watchdog, sch);
@@ -1022,9 +1044,26 @@ static int htb_init(struct Qdisc *sch, struct nlattr *opt,
 	if (gopt->version != HTB_VER >> 16)
 		return -EINVAL;
 
+	q->offload = nla_get_flag(tb[TCA_HTB_OFFLOAD]);
+
+	if (q->offload) {
+		if (sch->parent != TC_H_ROOT)
+			return -EOPNOTSUPP;
+
+		if (!tc_can_offload(dev) || !dev->netdev_ops->ndo_setup_tc)
+			return -EOPNOTSUPP;
+
+		q->num_direct_qdiscs = dev->real_num_tx_queues;
+		q->direct_qdiscs = kcalloc(q->num_direct_qdiscs,
+					   sizeof(*q->direct_qdiscs),
+					   GFP_KERNEL);
+		if (!q->direct_qdiscs)
+			return -ENOMEM;
+	}
+
 	err = qdisc_class_hash_init(&q->clhash);
 	if (err < 0)
-		return err;
+		goto err_free_direct_qdiscs;
 
 	qdisc_skb_head_init(&q->direct_queue);
 
@@ -1037,7 +1076,107 @@ static int htb_init(struct Qdisc *sch, struct nlattr *opt,
 		q->rate2quantum = 1;
 	q->defcls = gopt->defcls;
 
+	if (!q->offload)
+		return 0;
+
+	for (ntx = 0; ntx < q->num_direct_qdiscs; ntx++) {
+		struct netdev_queue *dev_queue = netdev_get_tx_queue(dev, ntx);
+		struct Qdisc *qdisc;
+
+		qdisc = qdisc_create_dflt(dev_queue, &pfifo_qdisc_ops,
+					  TC_H_MAKE(sch->handle, 0), extack);
+		if (!qdisc) {
+			err = -ENOMEM;
+			goto err_free_qdiscs;
+		}
+
+		htb_set_lockdep_class_child(qdisc);
+		q->direct_qdiscs[ntx] = qdisc;
+		qdisc->flags |= TCQ_F_ONETXQUEUE | TCQ_F_NOPARENT;
+	}
+
+	sch->flags |= TCQ_F_MQROOT;
+
+	offload_opt = (struct tc_htb_qopt_offload) {
+		.command = TC_HTB_CREATE,
+		.parent_classid = TC_H_MAJ(sch->handle) >> 16,
+		.classid = TC_H_MIN(q->defcls),
+		.extack = extack,
+	};
+	err = htb_offload(dev, &offload_opt);
+	if (err)
+		goto err_free_qdiscs;
+
 	return 0;
+
+err_free_qdiscs:
+	/* TC_HTB_CREATE call failed, avoid any further calls to the driver. */
+	q->offload = false;
+
+	for (ntx = 0; ntx < q->num_direct_qdiscs && q->direct_qdiscs[ntx];
+	     ntx++)
+		qdisc_put(q->direct_qdiscs[ntx]);
+
+	qdisc_class_hash_destroy(&q->clhash);
+	/* Prevent use-after-free and double-free when htb_destroy gets called.
+	 */
+	q->clhash.hash = NULL;
+	q->clhash.hashsize = 0;
+
+err_free_direct_qdiscs:
+	kfree(q->direct_qdiscs);
+	q->direct_qdiscs = NULL;
+	return err;
+}
+
+static void htb_attach_offload(struct Qdisc *sch)
+{
+	struct net_device *dev = qdisc_dev(sch);
+	struct htb_sched *q = qdisc_priv(sch);
+	unsigned int ntx;
+
+	for (ntx = 0; ntx < q->num_direct_qdiscs; ntx++) {
+		struct Qdisc *old, *qdisc = q->direct_qdiscs[ntx];
+
+		old = dev_graft_qdisc(qdisc->dev_queue, qdisc);
+		qdisc_put(old);
+		qdisc_hash_add(qdisc, false);
+	}
+	for (ntx = q->num_direct_qdiscs; ntx < dev->num_tx_queues; ntx++) {
+		struct netdev_queue *dev_queue = netdev_get_tx_queue(dev, ntx);
+		struct Qdisc *old = dev_graft_qdisc(dev_queue, NULL);
+
+		qdisc_put(old);
+	}
+
+	kfree(q->direct_qdiscs);
+	q->direct_qdiscs = NULL;
+}
+
+static void htb_attach_software(struct Qdisc *sch)
+{
+	struct net_device *dev = qdisc_dev(sch);
+	unsigned int ntx;
+
+	/* Resemble qdisc_graft behavior. */
+	for (ntx = 0; ntx < dev->num_tx_queues; ntx++) {
+		struct netdev_queue *dev_queue = netdev_get_tx_queue(dev, ntx);
+		struct Qdisc *old = dev_graft_qdisc(dev_queue, sch);
+
+		qdisc_refcount_inc(sch);
+
+		qdisc_put(old);
+	}
+}
+
+static void htb_attach(struct Qdisc *sch)
+{
+	struct htb_sched *q = qdisc_priv(sch);
+
+	if (q->offload)
+		htb_attach_offload(sch);
+	else
+		htb_attach_software(sch);
 }
 
 static int htb_dump(struct Qdisc *sch, struct sk_buff *skb)
@@ -1046,6 +1185,11 @@ static int htb_dump(struct Qdisc *sch, struct sk_buff *skb)
 	struct nlattr *nest;
 	struct tc_htb_glob gopt;
 
+	if (q->offload)
+		sch->flags |= TCQ_F_OFFLOADED;
+	else
+		sch->flags &= ~TCQ_F_OFFLOADED;
+
 	sch->qstats.overlimits = q->overlimits;
 	/* Its safe to not acquire qdisc lock. As we hold RTNL,
 	 * no change can happen on the qdisc parameters.
@@ -1063,6 +1207,8 @@ static int htb_dump(struct Qdisc *sch, struct sk_buff *skb)
 	if (nla_put(skb, TCA_HTB_INIT, sizeof(gopt), &gopt) ||
 	    nla_put_u32(skb, TCA_HTB_DIRECT_QLEN, q->direct_qlen))
 		goto nla_put_failure;
+	if (q->offload && nla_put_flag(skb, TCA_HTB_OFFLOAD))
+		goto nla_put_failure;
 
 	return nla_nest_end(skb, nest);
 
@@ -1075,6 +1221,7 @@ static int htb_dump_class(struct Qdisc *sch, unsigned long arg,
 			  struct sk_buff *skb, struct tcmsg *tcm)
 {
 	struct htb_class *cl = (struct htb_class *)arg;
+	struct htb_sched *q = qdisc_priv(sch);
 	struct nlattr *nest;
 	struct tc_htb_opt opt;
 
@@ -1101,6 +1248,8 @@ static int htb_dump_class(struct Qdisc *sch, unsigned long arg,
 	opt.level = cl->level;
 	if (nla_put(skb, TCA_HTB_PARMS, sizeof(opt), &opt))
 		goto nla_put_failure;
+	if (q->offload && nla_put_flag(skb, TCA_HTB_OFFLOAD))
+		goto nla_put_failure;
 	if ((cl->rate.rate_bytes_ps >= (1ULL << 32)) &&
 	    nla_put_u64_64bit(skb, TCA_HTB_RATE64, cl->rate.rate_bytes_ps,
 			      TCA_HTB_PAD))
@@ -1117,10 +1266,39 @@ nla_put_failure:
 	return -1;
 }
 
+static void htb_offload_aggregate_stats(struct htb_sched *q,
+					struct htb_class *cl)
+{
+	struct htb_class *c;
+	unsigned int i;
+
+	memset(&cl->bstats, 0, sizeof(cl->bstats));
+
+	for (i = 0; i < q->clhash.hashsize; i++) {
+		hlist_for_each_entry(c, &q->clhash.hash[i], common.hnode) {
+			struct htb_class *p = c;
+
+			while (p && p->level < cl->level)
+				p = p->parent;
+
+			if (p != cl)
+				continue;
+
+			cl->bstats.bytes += c->bstats_bias.bytes;
+			cl->bstats.packets += c->bstats_bias.packets;
+			if (c->level == 0) {
+				cl->bstats.bytes += c->leaf.q->bstats.bytes;
+				cl->bstats.packets += c->leaf.q->bstats.packets;
+			}
+		}
+	}
+}
+
 static int
 htb_dump_class_stats(struct Qdisc *sch, unsigned long arg, struct gnet_dump *d)
 {
 	struct htb_class *cl = (struct htb_class *)arg;
+	struct htb_sched *q = qdisc_priv(sch);
 	struct gnet_stats_queue qs = {
 		.drops = cl->drops,
 		.overlimits = cl->overlimits,
@@ -1135,6 +1313,19 @@ htb_dump_class_stats(struct Qdisc *sch, unsigned long arg, struct gnet_dump *d)
 	cl->xstats.ctokens = clamp_t(s64, PSCHED_NS2TICKS(cl->ctokens),
 				     INT_MIN, INT_MAX);
 
+	if (q->offload) {
+		if (!cl->level) {
+			if (cl->leaf.q)
+				cl->bstats = cl->leaf.q->bstats;
+			else
+				memset(&cl->bstats, 0, sizeof(cl->bstats));
+			cl->bstats.bytes += cl->bstats_bias.bytes;
+			cl->bstats.packets += cl->bstats_bias.packets;
+		} else {
+			htb_offload_aggregate_stats(q, cl);
+		}
+	}
+
 	if (gnet_stats_copy_basic(qdisc_root_sleeping_running(sch),
 				  d, NULL, &cl->bstats) < 0 ||
 	    gnet_stats_copy_rate_est(d, &cl->rate_est) < 0 ||
@@ -1144,19 +1335,97 @@ htb_dump_class_stats(struct Qdisc *sch, unsigned long arg, struct gnet_dump *d)
 	return gnet_stats_copy_app(d, &cl->xstats, sizeof(cl->xstats));
 }
 
+static struct netdev_queue *
+htb_select_queue(struct Qdisc *sch, struct tcmsg *tcm)
+{
+	struct net_device *dev = qdisc_dev(sch);
+	struct tc_htb_qopt_offload offload_opt;
+	int err;
+
+	offload_opt = (struct tc_htb_qopt_offload) {
+		.command = TC_HTB_LEAF_QUERY_QUEUE,
+		.classid = TC_H_MIN(tcm->tcm_parent),
+	};
+	err = htb_offload(dev, &offload_opt);
+	if (err || offload_opt.qid >= dev->num_tx_queues)
+		return NULL;
+	return netdev_get_tx_queue(dev, offload_opt.qid);
+}
+
+static struct Qdisc *
+htb_graft_helper(struct netdev_queue *dev_queue, struct Qdisc *new_q)
+{
+	struct net_device *dev = dev_queue->dev;
+	struct Qdisc *old_q;
+
+	if (dev->flags & IFF_UP)
+		dev_deactivate(dev);
+	old_q = dev_graft_qdisc(dev_queue, new_q);
+	if (new_q)
+		new_q->flags |= TCQ_F_ONETXQUEUE | TCQ_F_NOPARENT;
+	if (dev->flags & IFF_UP)
+		dev_activate(dev);
+
+	return old_q;
+}
+
+static void htb_offload_move_qdisc(struct Qdisc *sch, u16 qid_old, u16 qid_new)
+{
+	struct netdev_queue *queue_old, *queue_new;
+	struct net_device *dev = qdisc_dev(sch);
+	struct Qdisc *qdisc;
+
+	queue_old = netdev_get_tx_queue(dev, qid_old);
+	queue_new = netdev_get_tx_queue(dev, qid_new);
+
+	if (dev->flags & IFF_UP)
+		dev_deactivate(dev);
+	qdisc = dev_graft_qdisc(queue_old, NULL);
+	qdisc->dev_queue = queue_new;
+	qdisc = dev_graft_qdisc(queue_new, qdisc);
+	if (dev->flags & IFF_UP)
+		dev_activate(dev);
+
+	WARN_ON(!(qdisc->flags & TCQ_F_BUILTIN));
+}
+
 static int htb_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
 		     struct Qdisc **old, struct netlink_ext_ack *extack)
 {
+	struct netdev_queue *dev_queue = sch->dev_queue;
 	struct htb_class *cl = (struct htb_class *)arg;
+	struct htb_sched *q = qdisc_priv(sch);
+	struct Qdisc *old_q;
 
 	if (cl->level)
 		return -EINVAL;
-	if (new == NULL &&
-	    (new = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops,
-				     cl->common.classid, extack)) == NULL)
-		return -ENOBUFS;
+
+	if (q->offload) {
+		dev_queue = new->dev_queue;
+		WARN_ON(dev_queue != cl->leaf.q->dev_queue);
+	}
+
+	if (!new) {
+		new = qdisc_create_dflt(dev_queue, &pfifo_qdisc_ops,
+					cl->common.classid, extack);
+		if (!new)
+			return -ENOBUFS;
+	}
+
+	if (q->offload) {
+		htb_set_lockdep_class_child(new);
+		/* One ref for cl->leaf.q, the other for dev_queue->qdisc. */
+		qdisc_refcount_inc(new);
+		old_q = htb_graft_helper(dev_queue, new);
+	}
 
 	*old = qdisc_replace(sch, new, &cl->leaf.q);
+
+	if (q->offload) {
+		WARN_ON(old_q != *old);
+		qdisc_put(old_q);
+	}
+
 	return 0;
 }
 
@@ -1184,9 +1453,10 @@ static inline int htb_parent_last_child(struct htb_class *cl)
 	return 1;
 }
 
-static void htb_parent_to_leaf(struct htb_sched *q, struct htb_class *cl,
+static void htb_parent_to_leaf(struct Qdisc *sch, struct htb_class *cl,
 			       struct Qdisc *new_q)
 {
+	struct htb_sched *q = qdisc_priv(sch);
 	struct htb_class *parent = cl->parent;
 
 	WARN_ON(cl->level || !cl->leaf.q || cl->prio_activity);
@@ -1204,6 +1474,76 @@ static void htb_parent_to_leaf(struct htb_sched *q, struct htb_class *cl,
 	parent->cmode = HTB_CAN_SEND;
 }
 
+static void htb_parent_to_leaf_offload(struct Qdisc *sch,
+				       struct netdev_queue *dev_queue,
+				       struct Qdisc *new_q)
+{
+	struct Qdisc *old_q;
+
+	/* One ref for cl->leaf.q, the other for dev_queue->qdisc. */
+	qdisc_refcount_inc(new_q);
+	old_q = htb_graft_helper(dev_queue, new_q);
+	WARN_ON(!(old_q->flags & TCQ_F_BUILTIN));
+}
+
+static int htb_destroy_class_offload(struct Qdisc *sch, struct htb_class *cl,
+				     bool last_child, bool destroying,
+				     struct netlink_ext_ack *extack)
+{
+	struct tc_htb_qopt_offload offload_opt;
+	struct Qdisc *q = cl->leaf.q;
+	struct Qdisc *old = NULL;
+	int err;
+
+	if (cl->level)
+		return -EINVAL;
+
+	WARN_ON(!q);
+	if (!destroying) {
+		/* On destroy of HTB, two cases are possible:
+		 * 1. q is a normal qdisc, but q->dev_queue has noop qdisc.
+		 * 2. q is a noop qdisc (for nodes that were inner),
+		 *    q->dev_queue is noop_netdev_queue.
+		 */
+		old = htb_graft_helper(q->dev_queue, NULL);
+		WARN_ON(!old);
+		WARN_ON(old != q);
+	}
+
+	if (cl->parent) {
+		cl->parent->bstats_bias.bytes += q->bstats.bytes;
+		cl->parent->bstats_bias.packets += q->bstats.packets;
+	}
+
+	offload_opt = (struct tc_htb_qopt_offload) {
+		.command = !last_child ? TC_HTB_LEAF_DEL :
+			   destroying ? TC_HTB_LEAF_DEL_LAST_FORCE :
+			   TC_HTB_LEAF_DEL_LAST,
+		.classid = cl->common.classid,
+		.extack = extack,
+	};
+	err = htb_offload(qdisc_dev(sch), &offload_opt);
+
+	if (!err || destroying)
+		qdisc_put(old);
+	else
+		htb_graft_helper(q->dev_queue, old);
+
+	if (last_child)
+		return err;
+
+	if (!err && offload_opt.moved_qid != 0) {
+		if (destroying)
+			q->dev_queue = netdev_get_tx_queue(qdisc_dev(sch),
+							   offload_opt.qid);
+		else
+			htb_offload_move_qdisc(sch, offload_opt.moved_qid,
+					       offload_opt.qid);
+	}
+
+	return err;
+}
+
 static void htb_destroy_class(struct Qdisc *sch, struct htb_class *cl)
 {
 	if (!cl->level) {
@@ -1217,8 +1557,11 @@ static void htb_destroy_class(struct Qdisc *sch, struct htb_class *cl)
 
 static void htb_destroy(struct Qdisc *sch)
 {
+	struct net_device *dev = qdisc_dev(sch);
+	struct tc_htb_qopt_offload offload_opt;
 	struct htb_sched *q = qdisc_priv(sch);
 	struct hlist_node *next;
+	bool nonempty, changed;
 	struct htb_class *cl;
 	unsigned int i;
 
@@ -1237,21 +1580,68 @@ static void htb_destroy(struct Qdisc *sch)
 			cl->block = NULL;
 		}
 	}
-	for (i = 0; i < q->clhash.hashsize; i++) {
-		hlist_for_each_entry_safe(cl, next, &q->clhash.hash[i],
-					  common.hnode)
-			htb_destroy_class(sch, cl);
-	}
+
+	do {
+		nonempty = false;
+		changed = false;
+		for (i = 0; i < q->clhash.hashsize; i++) {
+			hlist_for_each_entry_safe(cl, next, &q->clhash.hash[i],
+						  common.hnode) {
+				bool last_child;
+
+				if (!q->offload) {
+					htb_destroy_class(sch, cl);
+					continue;
+				}
+
+				nonempty = true;
+
+				if (cl->level)
+					continue;
+
+				changed = true;
+
+				last_child = htb_parent_last_child(cl);
+				htb_destroy_class_offload(sch, cl, last_child,
+							  true, NULL);
+				qdisc_class_hash_remove(&q->clhash,
+							&cl->common);
+				if (cl->parent)
+					cl->parent->children--;
+				if (last_child)
+					htb_parent_to_leaf(sch, cl, NULL);
+				htb_destroy_class(sch, cl);
+			}
+		}
+	} while (changed);
+	WARN_ON(nonempty);
+
 	qdisc_class_hash_destroy(&q->clhash);
 	__qdisc_reset_queue(&q->direct_queue);
+
+	if (!q->offload)
+		return;
+
+	offload_opt = (struct tc_htb_qopt_offload) {
+		.command = TC_HTB_DESTROY,
+	};
+	htb_offload(dev, &offload_opt);
+
+	if (!q->direct_qdiscs)
+		return;
+	for (i = 0; i < q->num_direct_qdiscs && q->direct_qdiscs[i]; i++)
+		qdisc_put(q->direct_qdiscs[i]);
+	kfree(q->direct_qdiscs);
 }
 
-static int htb_delete(struct Qdisc *sch, unsigned long arg)
+static int htb_delete(struct Qdisc *sch, unsigned long arg,
+		      struct netlink_ext_ack *extack)
 {
 	struct htb_sched *q = qdisc_priv(sch);
 	struct htb_class *cl = (struct htb_class *)arg;
 	struct Qdisc *new_q = NULL;
 	int last_child = 0;
+	int err;
 
 	/* TODO: why don't allow to delete subtree ? references ? does
 	 * tc subsys guarantee us that in htb_destroy it holds no class
@@ -1260,11 +1650,28 @@ static int htb_delete(struct Qdisc *sch, unsigned long arg)
 	if (cl->children || cl->filter_cnt)
 		return -EBUSY;
 
-	if (!cl->level && htb_parent_last_child(cl)) {
-		new_q = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops,
+	if (!cl->level && htb_parent_last_child(cl))
+		last_child = 1;
+
+	if (q->offload) {
+		err = htb_destroy_class_offload(sch, cl, last_child, false,
+						extack);
+		if (err)
+			return err;
+	}
+
+	if (last_child) {
+		struct netdev_queue *dev_queue;
+
+		dev_queue = q->offload ? cl->leaf.q->dev_queue : sch->dev_queue;
+		new_q = qdisc_create_dflt(dev_queue, &pfifo_qdisc_ops,
 					  cl->parent->common.classid,
 					  NULL);
-		last_child = 1;
+		if (q->offload) {
+			if (new_q)
+				htb_set_lockdep_class_child(new_q);
+			htb_parent_to_leaf_offload(sch, dev_queue, new_q);
+		}
 	}
 
 	sch_tree_lock(sch);
@@ -1285,7 +1692,7 @@ static int htb_delete(struct Qdisc *sch, unsigned long arg)
 				  &q->hlevel[cl->level].wait_pq);
 
 	if (last_child)
-		htb_parent_to_leaf(q, cl, new_q);
+		htb_parent_to_leaf(sch, cl, new_q);
 
 	sch_tree_unlock(sch);
 
@@ -1300,9 +1707,11 @@ static int htb_change_class(struct Qdisc *sch, u32 classid,
 	int err = -EINVAL;
 	struct htb_sched *q = qdisc_priv(sch);
 	struct htb_class *cl = (struct htb_class *)*arg, *parent;
+	struct tc_htb_qopt_offload offload_opt;
 	struct nlattr *opt = tca[TCA_OPTIONS];
 	struct nlattr *tb[TCA_HTB_MAX + 1];
 	struct Qdisc *parent_qdisc = NULL;
+	struct netdev_queue *dev_queue;
 	struct tc_htb_opt *hopt;
 	u64 rate64, ceil64;
 	int warn = 0;
@@ -1335,8 +1744,12 @@ static int htb_change_class(struct Qdisc *sch, u32 classid,
 		qdisc_put_rtab(qdisc_get_rtab(&hopt->ceil, tb[TCA_HTB_CTAB],
 					      NULL));
 
+	rate64 = tb[TCA_HTB_RATE64] ? nla_get_u64(tb[TCA_HTB_RATE64]) : 0;
+	ceil64 = tb[TCA_HTB_CEIL64] ? nla_get_u64(tb[TCA_HTB_CEIL64]) : 0;
+
 	if (!cl) {		/* new class */
-		struct Qdisc *new_q;
+		struct net_device *dev = qdisc_dev(sch);
+		struct Qdisc *new_q, *old_q;
 		int prio;
 		struct {
 			struct nlattr		nla;
@@ -1379,11 +1792,8 @@ static int htb_change_class(struct Qdisc *sch, u32 classid,
 						NULL,
 						qdisc_root_sleeping_running(sch),
 						tca[TCA_RATE] ? : &est.nla);
-			if (err) {
-				tcf_block_put(cl->block);
-				kfree(cl);
-				goto failure;
-			}
+			if (err)
+				goto err_block_put;
 		}
 
 		cl->children = 0;
@@ -1392,12 +1802,76 @@ static int htb_change_class(struct Qdisc *sch, u32 classid,
 		for (prio = 0; prio < TC_HTB_NUMPRIO; prio++)
 			RB_CLEAR_NODE(&cl->node[prio]);
 
+		cl->common.classid = classid;
+
+		/* Make sure nothing interrupts us in between of two
+		 * ndo_setup_tc calls.
+		 */
+		ASSERT_RTNL();
+
 		/* create leaf qdisc early because it uses kmalloc(GFP_KERNEL)
 		 * so that can't be used inside of sch_tree_lock
 		 * -- thanks to Karlis Peisenieks
 		 */
-		new_q = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops,
+		if (!q->offload) {
+			dev_queue = sch->dev_queue;
+		} else if (!(parent && !parent->level)) {
+			/* Assign a dev_queue to this classid. */
+			offload_opt = (struct tc_htb_qopt_offload) {
+				.command = TC_HTB_LEAF_ALLOC_QUEUE,
+				.classid = cl->common.classid,
+				.parent_classid = parent ?
+					TC_H_MIN(parent->common.classid) :
+					TC_HTB_CLASSID_ROOT,
+				.rate = max_t(u64, hopt->rate.rate, rate64),
+				.ceil = max_t(u64, hopt->ceil.rate, ceil64),
+				.extack = extack,
+			};
+			err = htb_offload(dev, &offload_opt);
+			if (err) {
+				pr_err("htb: TC_HTB_LEAF_ALLOC_QUEUE failed with err = %d\n",
+				       err);
+				goto err_kill_estimator;
+			}
+			dev_queue = netdev_get_tx_queue(dev, offload_opt.qid);
+		} else { /* First child. */
+			dev_queue = parent->leaf.q->dev_queue;
+			old_q = htb_graft_helper(dev_queue, NULL);
+			WARN_ON(old_q != parent->leaf.q);
+			offload_opt = (struct tc_htb_qopt_offload) {
+				.command = TC_HTB_LEAF_TO_INNER,
+				.classid = cl->common.classid,
+				.parent_classid =
+					TC_H_MIN(parent->common.classid),
+				.rate = max_t(u64, hopt->rate.rate, rate64),
+				.ceil = max_t(u64, hopt->ceil.rate, ceil64),
+				.extack = extack,
+			};
+			err = htb_offload(dev, &offload_opt);
+			if (err) {
+				pr_err("htb: TC_HTB_LEAF_TO_INNER failed with err = %d\n",
+				       err);
+				htb_graft_helper(dev_queue, old_q);
+				goto err_kill_estimator;
+			}
+			parent->bstats_bias.bytes += old_q->bstats.bytes;
+			parent->bstats_bias.packets += old_q->bstats.packets;
+			qdisc_put(old_q);
+		}
+		new_q = qdisc_create_dflt(dev_queue, &pfifo_qdisc_ops,
 					  classid, NULL);
+		if (q->offload) {
+			if (new_q) {
+				htb_set_lockdep_class_child(new_q);
+				/* One ref for cl->leaf.q, the other for
+				 * dev_queue->qdisc.
+				 */
+				qdisc_refcount_inc(new_q);
+			}
+			old_q = htb_graft_helper(dev_queue, new_q);
+			/* No qdisc_put needed. */
+			WARN_ON(!(old_q->flags & TCQ_F_BUILTIN));
+		}
 		sch_tree_lock(sch);
 		if (parent && !parent->level) {
 			/* turn parent into inner node */
@@ -1415,10 +1889,10 @@ static int htb_change_class(struct Qdisc *sch, u32 classid,
 					 : TC_HTB_MAXDEPTH) - 1;
 			memset(&parent->inner, 0, sizeof(parent->inner));
 		}
+
 		/* leaf (we) needs elementary qdisc */
 		cl->leaf.q = new_q ? new_q : &noop_qdisc;
 
-		cl->common.classid = classid;
 		cl->parent = parent;
 
 		/* set class to be in HTB_CAN_SEND state */
@@ -1444,12 +1918,30 @@ static int htb_change_class(struct Qdisc *sch, u32 classid,
 			if (err)
 				return err;
 		}
-		sch_tree_lock(sch);
-	}
 
-	rate64 = tb[TCA_HTB_RATE64] ? nla_get_u64(tb[TCA_HTB_RATE64]) : 0;
+		if (q->offload) {
+			struct net_device *dev = qdisc_dev(sch);
+
+			offload_opt = (struct tc_htb_qopt_offload) {
+				.command = TC_HTB_NODE_MODIFY,
+				.classid = cl->common.classid,
+				.rate = max_t(u64, hopt->rate.rate, rate64),
+				.ceil = max_t(u64, hopt->ceil.rate, ceil64),
+				.extack = extack,
+			};
+			err = htb_offload(dev, &offload_opt);
+			if (err)
+				/* Estimator was replaced, and rollback may fail
+				 * as well, so we don't try to recover it, and
+				 * the estimator won't work property with the
+				 * offload anyway, because bstats are updated
+				 * only when the stats are queried.
+				 */
+				return err;
+		}
 
-	ceil64 = tb[TCA_HTB_CEIL64] ? nla_get_u64(tb[TCA_HTB_CEIL64]) : 0;
+		sch_tree_lock(sch);
+	}
 
 	psched_ratecfg_precompute(&cl->rate, &hopt->rate, rate64);
 	psched_ratecfg_precompute(&cl->ceil, &hopt->ceil, ceil64);
@@ -1492,6 +1984,11 @@ static int htb_change_class(struct Qdisc *sch, u32 classid,
 	*arg = (unsigned long)cl;
 	return 0;
 
+err_kill_estimator:
+	gen_kill_estimator(&cl->rate_est);
+err_block_put:
+	tcf_block_put(cl->block);
+	kfree(cl);
 failure:
 	return err;
 }
@@ -1557,6 +2054,7 @@ static void htb_walk(struct Qdisc *sch, struct qdisc_walker *arg)
 }
 
 static const struct Qdisc_class_ops htb_class_ops = {
+	.select_queue	=	htb_select_queue,
 	.graft		=	htb_graft,
 	.leaf		=	htb_leaf,
 	.qlen_notify	=	htb_qlen_notify,
@@ -1579,6 +2077,7 @@ static struct Qdisc_ops htb_qdisc_ops __read_mostly = {
 	.dequeue	=	htb_dequeue,
 	.peek		=	qdisc_peek_dequeued,
 	.init		=	htb_init,
+	.attach		=	htb_attach,
 	.reset		=	htb_reset,
 	.destroy	=	htb_destroy,
 	.dump		=	htb_dump,
diff --git a/net/sched/sch_qfq.c b/net/sched/sch_qfq.c
index 6335230a971e..1db9d4a2ef5e 100644
--- a/net/sched/sch_qfq.c
+++ b/net/sched/sch_qfq.c
@@ -529,7 +529,8 @@ static void qfq_destroy_class(struct Qdisc *sch, struct qfq_class *cl)
 	kfree(cl);
 }
 
-static int qfq_delete_class(struct Qdisc *sch, unsigned long arg)
+static int qfq_delete_class(struct Qdisc *sch, unsigned long arg,
+			    struct netlink_ext_ack *extack)
 {
 	struct qfq_sched *q = qdisc_priv(sch);
 	struct qfq_class *cl = (struct qfq_class *)arg;
diff --git a/net/sched/sch_sfb.c b/net/sched/sch_sfb.c
index da047a37a3bf..dde829d4b9f8 100644
--- a/net/sched/sch_sfb.c
+++ b/net/sched/sch_sfb.c
@@ -649,7 +649,8 @@ static int sfb_change_class(struct Qdisc *sch, u32 classid, u32 parentid,
 	return -ENOSYS;
 }
 
-static int sfb_delete(struct Qdisc *sch, unsigned long cl)
+static int sfb_delete(struct Qdisc *sch, unsigned long cl,
+		      struct netlink_ext_ack *extack)
 {
 	return -ENOSYS;
 }
diff --git a/net/switchdev/Makefile b/net/switchdev/Makefile
index bd69a3136e76..c5561d7f3a7c 100644
--- a/net/switchdev/Makefile
+++ b/net/switchdev/Makefile
@@ -3,4 +3,4 @@
 # Makefile for the Switch device API
 #
 
-obj-$(CONFIG_NET_SWITCHDEV) += switchdev.o
+obj-y += switchdev.o
diff --git a/net/tipc/msg.c b/net/tipc/msg.c
index 2aca86021df5..e9263280a2d4 100644
--- a/net/tipc/msg.c
+++ b/net/tipc/msg.c
@@ -117,10 +117,6 @@ struct sk_buff *tipc_msg_create(uint user, uint type,
 	msg_set_origport(msg, oport);
 	msg_set_destport(msg, dport);
 	msg_set_errcode(msg, errcode);
-	if (hdr_sz > SHORT_H_SIZE) {
-		msg_set_orignode(msg, onode);
-		msg_set_destnode(msg, dnode);
-	}
 	return buf;
 }
 
diff --git a/tools/include/uapi/linux/pkt_sched.h b/tools/include/uapi/linux/pkt_sched.h
index 0d18b1d1fbbc..5c903abc9fa5 100644
--- a/tools/include/uapi/linux/pkt_sched.h
+++ b/tools/include/uapi/linux/pkt_sched.h
@@ -414,6 +414,7 @@ enum {
 	TCA_HTB_RATE64,
 	TCA_HTB_CEIL64,
 	TCA_HTB_PAD,
+	TCA_HTB_OFFLOAD,
 	__TCA_HTB_MAX,
 };
 
diff --git a/tools/testing/selftests/drivers/net/mlxsw/port_scale.sh b/tools/testing/selftests/drivers/net/mlxsw/port_scale.sh
new file mode 100644
index 000000000000..f813ffefc07e
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/mlxsw/port_scale.sh
@@ -0,0 +1,64 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# Test for physical ports resource. The test splits each splittable port
+# to its width and checks that eventually the number of physical ports equals
+# the maximum number of physical ports.
+
+PORT_NUM_NETIFS=0
+
+port_setup_prepare()
+{
+	:
+}
+
+port_cleanup()
+{
+	pre_cleanup
+
+	for port in "${unsplit[@]}"; do
+		devlink port unsplit $port
+		check_err $? "Did not unsplit $netdev"
+	done
+}
+
+split_all_ports()
+{
+	local should_fail=$1; shift
+	local -a unsplit
+
+	# Loop over the splittable netdevs and create tuples of netdev along
+	# with its width. For example:
+	# '$netdev1 $count1 $netdev2 $count2...', when:
+	# $netdev1-2 are splittable netdevs in the device, and
+	# $count1-2 are the netdevs width respectively.
+	while read netdev count <<<$(
+		devlink -j port show |
+		jq -r '.[][] | select(.splittable==true) | "\(.netdev) \(.lanes)"'
+		)
+		[[ ! -z $netdev ]]
+	do
+		devlink port split $netdev count $count
+		check_err $? "Did not split $netdev into $count"
+		unsplit+=( "${netdev}s0" )
+	done
+}
+
+port_test()
+{
+	local max_ports=$1; shift
+	local should_fail=$1; shift
+
+	split_all_ports $should_fail
+
+	occ=$(devlink -j resource show $DEVLINK_DEV \
+	      | jq '.[][][] | select(.name=="physical_ports") |.["occ"]')
+
+	[[ $occ -eq $max_ports ]]
+	if [[ $should_fail -eq 0 ]]; then
+		check_err $? "Mismatch ports number: Expected $max_ports, got $occ."
+	else
+		check_err_fail $should_fail $? "Reached more ports than expected"
+	fi
+
+}
diff --git a/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/port_scale.sh b/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/port_scale.sh
new file mode 100644
index 000000000000..0b71dfbbb447
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/port_scale.sh
@@ -0,0 +1,16 @@
+# SPDX-License-Identifier: GPL-2.0
+source ../port_scale.sh
+
+port_get_target()
+{
+	local should_fail=$1
+	local target
+
+	target=$(devlink_resource_size_get physical_ports)
+
+	if ((! should_fail)); then
+		echo $target
+	else
+		echo $((target + 1))
+	fi
+}
diff --git a/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/resource_scale.sh b/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/resource_scale.sh
index d7cf33a3f18d..4a1c9328555f 100755
--- a/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/resource_scale.sh
+++ b/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/resource_scale.sh
@@ -28,7 +28,7 @@ cleanup()
 
 trap cleanup EXIT
 
-ALL_TESTS="router tc_flower mirror_gre tc_police"
+ALL_TESTS="router tc_flower mirror_gre tc_police port"
 for current_test in ${TESTS:-$ALL_TESTS}; do
 	source ${current_test}_scale.sh
 
diff --git a/tools/testing/selftests/drivers/net/mlxsw/spectrum/port_scale.sh b/tools/testing/selftests/drivers/net/mlxsw/spectrum/port_scale.sh
new file mode 100644
index 000000000000..0b71dfbbb447
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/mlxsw/spectrum/port_scale.sh
@@ -0,0 +1,16 @@
+# SPDX-License-Identifier: GPL-2.0
+source ../port_scale.sh
+
+port_get_target()
+{
+	local should_fail=$1
+	local target
+
+	target=$(devlink_resource_size_get physical_ports)
+
+	if ((! should_fail)); then
+		echo $target
+	else
+		echo $((target + 1))
+	fi
+}
diff --git a/tools/testing/selftests/drivers/net/mlxsw/spectrum/resource_scale.sh b/tools/testing/selftests/drivers/net/mlxsw/spectrum/resource_scale.sh
index 43f662401bc3..087a884f66cd 100755
--- a/tools/testing/selftests/drivers/net/mlxsw/spectrum/resource_scale.sh
+++ b/tools/testing/selftests/drivers/net/mlxsw/spectrum/resource_scale.sh
@@ -22,7 +22,7 @@ cleanup()
 devlink_sp_read_kvd_defaults
 trap cleanup EXIT
 
-ALL_TESTS="router tc_flower mirror_gre tc_police"
+ALL_TESTS="router tc_flower mirror_gre tc_police port"
 for current_test in ${TESTS:-$ALL_TESTS}; do
 	source ${current_test}_scale.sh
 
diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile
index fa5fa425d148..25f198bec0b2 100644
--- a/tools/testing/selftests/net/Makefile
+++ b/tools/testing/selftests/net/Makefile
@@ -22,6 +22,7 @@ TEST_PROGS += devlink_port_split.py
 TEST_PROGS += drop_monitor_tests.sh
 TEST_PROGS += vrf_route_leaking.sh
 TEST_PROGS += bareudp.sh
+TEST_PROGS += unicast_extensions.sh
 TEST_PROGS_EXTENDED := in_netns.sh
 TEST_GEN_FILES =  socket nettest
 TEST_GEN_FILES += psock_fanout psock_tpacket msg_zerocopy reuseport_addr_any
diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh
index f74cd993b168..be34b9ccbd20 100755
--- a/tools/testing/selftests/net/mptcp/mptcp_join.sh
+++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh
@@ -790,6 +790,81 @@ chk_join_nr "remove subflow and signal IPv6" 2 2 2
 chk_add_nr 1 1
 chk_rm_nr 1 1
 
+# subflow IPv4-mapped to IPv4-mapped
+reset
+ip netns exec $ns1 ./pm_nl_ctl limits 0 1
+ip netns exec $ns2 ./pm_nl_ctl limits 0 1
+ip netns exec $ns2 ./pm_nl_ctl add "::ffff:10.0.3.2" flags subflow
+run_tests $ns1 $ns2 "::ffff:10.0.1.1"
+chk_join_nr "single subflow IPv4-mapped" 1 1 1
+
+# signal address IPv4-mapped with IPv4-mapped sk
+reset
+ip netns exec $ns1 ./pm_nl_ctl limits 0 1
+ip netns exec $ns2 ./pm_nl_ctl limits 1 1
+ip netns exec $ns1 ./pm_nl_ctl add "::ffff:10.0.2.1" flags signal
+run_tests $ns1 $ns2 "::ffff:10.0.1.1"
+chk_join_nr "signal address IPv4-mapped" 1 1 1
+chk_add_nr 1 1
+
+# subflow v4-map-v6
+reset
+ip netns exec $ns1 ./pm_nl_ctl limits 0 1
+ip netns exec $ns2 ./pm_nl_ctl limits 0 1
+ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow
+run_tests $ns1 $ns2 "::ffff:10.0.1.1"
+chk_join_nr "single subflow v4-map-v6" 1 1 1
+
+# signal address v4-map-v6
+reset
+ip netns exec $ns1 ./pm_nl_ctl limits 0 1
+ip netns exec $ns2 ./pm_nl_ctl limits 1 1
+ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal
+run_tests $ns1 $ns2 "::ffff:10.0.1.1"
+chk_join_nr "signal address v4-map-v6" 1 1 1
+chk_add_nr 1 1
+
+# subflow v6-map-v4
+reset
+ip netns exec $ns1 ./pm_nl_ctl limits 0 1
+ip netns exec $ns2 ./pm_nl_ctl limits 0 1
+ip netns exec $ns2 ./pm_nl_ctl add "::ffff:10.0.3.2" flags subflow
+run_tests $ns1 $ns2 10.0.1.1
+chk_join_nr "single subflow v6-map-v4" 1 1 1
+
+# signal address v6-map-v4
+reset
+ip netns exec $ns1 ./pm_nl_ctl limits 0 1
+ip netns exec $ns2 ./pm_nl_ctl limits 1 1
+ip netns exec $ns1 ./pm_nl_ctl add "::ffff:10.0.2.1" flags signal
+run_tests $ns1 $ns2 10.0.1.1
+chk_join_nr "signal address v6-map-v4" 1 1 1
+chk_add_nr 1 1
+
+# no subflow IPv6 to v4 address
+reset
+ip netns exec $ns1 ./pm_nl_ctl limits 0 1
+ip netns exec $ns2 ./pm_nl_ctl limits 0 1
+ip netns exec $ns2 ./pm_nl_ctl add dead:beef:2::2 flags subflow
+run_tests $ns1 $ns2 10.0.1.1
+chk_join_nr "no JOIN with diff families v4-v6" 0 0 0
+
+# no subflow IPv6 to v4 address even if v6 has a valid v4 at the end
+reset
+ip netns exec $ns1 ./pm_nl_ctl limits 0 1
+ip netns exec $ns2 ./pm_nl_ctl limits 0 1
+ip netns exec $ns2 ./pm_nl_ctl add dead:beef:2::10.0.3.2 flags subflow
+run_tests $ns1 $ns2 10.0.1.1
+chk_join_nr "no JOIN with diff families v4-v6-2" 0 0 0
+
+# no subflow IPv4 to v6 address, no need to slow down too then
+reset
+ip netns exec $ns1 ./pm_nl_ctl limits 0 1
+ip netns exec $ns2 ./pm_nl_ctl limits 0 1
+ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow
+run_tests $ns1 $ns2 dead:beef:1::1
+chk_join_nr "no JOIN with diff families v6-v4" 0 0 0
+
 # single subflow, backup
 reset
 ip netns exec $ns1 ./pm_nl_ctl limits 0 1
diff --git a/tools/testing/selftests/net/mptcp/settings b/tools/testing/selftests/net/mptcp/settings
index 026384c189c9..a62d2fa1275c 100644
--- a/tools/testing/selftests/net/mptcp/settings
+++ b/tools/testing/selftests/net/mptcp/settings
@@ -1 +1 @@
-timeout=450
+timeout=600
diff --git a/tools/testing/selftests/net/unicast_extensions.sh b/tools/testing/selftests/net/unicast_extensions.sh
new file mode 100755
index 000000000000..dbf0421986df
--- /dev/null
+++ b/tools/testing/selftests/net/unicast_extensions.sh
@@ -0,0 +1,228 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+#
+# By Seth Schoen (c) 2021, for the IPv4 Unicast Extensions Project
+# Thanks to David Ahern for help and advice on nettest modifications.
+#
+# Self-tests for IPv4 address extensions: the kernel's ability to accept
+# certain traditionally unused or unallocated IPv4 addresses. For each kind
+# of address, we test for interface assignment, ping, TCP, and forwarding.
+# Must be run as root (to manipulate network namespaces and virtual
+# interfaces).
+#
+# Things we test for here:
+#
+# * Currently the kernel accepts addresses in 0/8 and 240/4 as valid.
+#
+# * Notwithstanding that, 0.0.0.0 and 255.255.255.255 cannot be assigned.
+#
+# * Currently the kernel DOES NOT accept unicast use of the lowest
+#   address in an IPv4 subnet (e.g. 192.168.100.0/32 in 192.168.100.0/24).
+#   This is treated as a second broadcast address, for compatibility
+#   with 4.2BSD (!).
+#
+# * Currently the kernel DOES NOT accept unicast use of any of 127/8.
+#
+# * Currently the kernel DOES NOT accept unicast use of any of 224/4.
+#
+# These tests provide an easy way to flip the expected result of any
+# of these behaviors for testing kernel patches that change them.
+
+# nettest can be run from PATH or from same directory as this selftest
+if ! which nettest >/dev/null; then
+	PATH=$PWD:$PATH
+	if ! which nettest >/dev/null; then
+		echo "'nettest' command not found; skipping tests"
+		exit 0
+	fi
+fi
+
+result=0
+
+hide_output(){ exec 3>&1 4>&2 >/dev/null 2>/dev/null; }
+show_output(){ exec >&3 2>&4; }
+
+show_result(){
+	if [ $1 -eq 0 ]; then
+		printf "TEST: %-60s  [ OK ]\n" "${2}"
+	else
+		printf "TEST: %-60s  [FAIL]\n" "${2}"
+		result=1
+	fi
+}
+
+_do_segmenttest(){
+	# Perform a simple set of link tests between a pair of
+	# IP addresses on a shared (virtual) segment, using
+	# ping and nettest.
+	# foo --- bar
+	# Arguments: ip_a ip_b prefix_length test_description
+	#
+	# Caller must set up foo-ns and bar-ns namespaces
+	# containing linked veth devices foo and bar,
+	# respectively.
+
+	ip -n foo-ns address add $1/$3 dev foo || return 1
+	ip -n foo-ns link set foo up || return 1
+	ip -n bar-ns address add $2/$3 dev bar || return 1
+	ip -n bar-ns link set bar up || return 1
+
+	ip netns exec foo-ns timeout 2 ping -c 1 $2 || return 1
+	ip netns exec bar-ns timeout 2 ping -c 1 $1 || return 1
+
+	nettest -B -N bar-ns -O foo-ns -r $1 || return 1
+	nettest -B -N foo-ns -O bar-ns -r $2 || return 1
+
+	return 0
+}
+
+_do_route_test(){
+	# Perform a simple set of gateway tests.
+	#
+	# [foo] <---> [foo1]-[bar1] <---> [bar]   /prefix
+	#  host          gateway          host
+	#
+	# Arguments: foo_ip foo1_ip bar1_ip bar_ip prefix_len test_description
+	# Displays test result and returns success or failure.
+
+	# Caller must set up foo-ns, bar-ns, and router-ns
+	# containing linked veth devices foo-foo1, bar1-bar
+	# (foo in foo-ns, foo1 and bar1 in router-ns, and
+	# bar in bar-ns).
+
+	ip -n foo-ns address add $1/$5 dev foo || return 1
+	ip -n foo-ns link set foo up || return 1
+	ip -n foo-ns route add default via $2 || return 1
+	ip -n bar-ns address add $4/$5 dev bar || return 1
+	ip -n bar-ns link set bar up || return 1
+	ip -n bar-ns route add default via $3 || return 1
+	ip -n router-ns address add $2/$5 dev foo1 || return 1
+	ip -n router-ns link set foo1 up || return 1
+	ip -n router-ns address add $3/$5 dev bar1 || return 1
+	ip -n router-ns link set bar1 up || return 1
+
+	echo 1 | ip netns exec router-ns tee /proc/sys/net/ipv4/ip_forward
+
+	ip netns exec foo-ns timeout 2 ping -c 1 $2 || return 1
+	ip netns exec foo-ns timeout 2 ping -c 1 $4 || return 1
+	ip netns exec bar-ns timeout 2 ping -c 1 $3 || return 1
+	ip netns exec bar-ns timeout 2 ping -c 1 $1 || return 1
+
+	nettest -B -N bar-ns -O foo-ns -r $1 || return 1
+	nettest -B -N foo-ns -O bar-ns -r $4 || return 1
+
+	return 0
+}
+
+segmenttest(){
+	# Sets up veth link and tries to connect over it.
+	# Arguments: ip_a ip_b prefix_len test_description
+	hide_output
+	ip netns add foo-ns
+	ip netns add bar-ns
+	ip link add foo netns foo-ns type veth peer name bar netns bar-ns
+
+	test_result=0
+	_do_segmenttest "$@" || test_result=1
+
+	ip netns pids foo-ns | xargs -r kill -9
+	ip netns pids bar-ns | xargs -r kill -9
+	ip netns del foo-ns
+	ip netns del bar-ns
+	show_output
+
+	# inverted tests will expect failure instead of success
+	[ -n "$expect_failure" ] && test_result=`expr 1 - $test_result`
+
+	show_result $test_result "$4"
+}
+
+route_test(){
+	# Sets up a simple gateway and tries to connect through it.
+	# [foo] <---> [foo1]-[bar1] <---> [bar]   /prefix
+	# Arguments: foo_ip foo1_ip bar1_ip bar_ip prefix_len test_description
+	# Returns success or failure.
+
+	hide_output
+	ip netns add foo-ns
+	ip netns add bar-ns
+	ip netns add router-ns
+	ip link add foo netns foo-ns type veth peer name foo1 netns router-ns
+	ip link add bar netns bar-ns type veth peer name bar1 netns router-ns
+
+	test_result=0
+	_do_route_test "$@" || test_result=1
+
+	ip netns pids foo-ns | xargs -r kill -9
+	ip netns pids bar-ns | xargs -r kill -9
+	ip netns pids router-ns | xargs -r kill -9
+	ip netns del foo-ns
+	ip netns del bar-ns
+	ip netns del router-ns
+
+	show_output
+
+	# inverted tests will expect failure instead of success
+	[ -n "$expect_failure" ] && test_result=`expr 1 - $test_result`
+	show_result $test_result "$6"
+}
+
+echo "###########################################################################"
+echo "Unicast address extensions tests (behavior of reserved IPv4 addresses)"
+echo "###########################################################################"
+#
+# Test support for 240/4
+segmenttest 240.1.2.1   240.1.2.4    24 "assign and ping within 240/4 (1 of 2) (is allowed)"
+segmenttest 250.100.2.1 250.100.30.4 16 "assign and ping within 240/4 (2 of 2) (is allowed)"
+#
+# Test support for 0/8
+segmenttest 0.1.2.17    0.1.2.23  24 "assign and ping within 0/8 (1 of 2) (is allowed)"
+segmenttest 0.77.240.17 0.77.2.23 16 "assign and ping within 0/8 (2 of 2) (is allowed)"
+#
+# Even 255.255/16 is OK!
+segmenttest 255.255.3.1 255.255.50.77 16 "assign and ping inside 255.255/16 (is allowed)"
+#
+# Or 255.255.255/24
+segmenttest 255.255.255.1 255.255.255.254 24 "assign and ping inside 255.255.255/24 (is allowed)"
+#
+# Routing between different networks
+route_test 240.5.6.7 240.5.6.1  255.1.2.1    255.1.2.3      24 "route between 240.5.6/24 and 255.1.2/24 (is allowed)"
+route_test 0.200.6.7 0.200.38.1 245.99.101.1 245.99.200.111 16 "route between 0.200/16 and 245.99/16 (is allowed)"
+#
+# ==============================================
+# ==== TESTS THAT CURRENTLY EXPECT FAILURE =====
+# ==============================================
+expect_failure=true
+# It should still not be possible to use 0.0.0.0 or 255.255.255.255
+# as a unicast address.  Thus, these tests expect failure.
+segmenttest 0.0.1.5       0.0.0.0         16 "assigning 0.0.0.0 (is forbidden)"
+segmenttest 255.255.255.1 255.255.255.255 16 "assigning 255.255.255.255 (is forbidden)"
+#
+# Test support for not having all of 127 be loopback
+# Currently Linux does not allow this, so this should fail too
+segmenttest 127.99.4.5 127.99.4.6 16 "assign and ping inside 127/8 (is forbidden)"
+#
+# Test support for lowest address
+# Currently Linux does not allow this, so this should fail too
+segmenttest 5.10.15.20 5.10.15.0 24 "assign and ping lowest address (is forbidden)"
+#
+# Routing using lowest address as a gateway/endpoint
+# Currently Linux does not allow this, so this should fail too
+route_test 192.168.42.1 192.168.42.0 9.8.7.6 9.8.7.0 24 "routing using lowest address (is forbidden)"
+#
+# Test support for unicast use of class D
+# Currently Linux does not allow this, so this should fail too
+segmenttest 225.1.2.3 225.1.2.200 24 "assign and ping class D address (is forbidden)"
+#
+# Routing using class D as a gateway
+route_test 225.1.42.1 225.1.42.2 9.8.7.6 9.8.7.1 24 "routing using class D (is forbidden)"
+#
+# Routing using 127/8
+# Currently Linux does not allow this, so this should fail too
+route_test 127.99.2.3 127.99.2.4 200.1.2.3 200.1.2.4 24 "routing using 127/8 (is forbidden)"
+#
+unset expect_failure
+# =====================================================
+# ==== END OF TESTS THAT CURRENTLY EXPECT FAILURE =====
+# =====================================================
+exit ${result}