diff options
114 files changed, 4648 insertions, 749 deletions
diff --git a/Documentation/devicetree/bindings/net/marvell-pp2.txt b/Documentation/devicetree/bindings/net/marvell-pp2.txt index 49484db81583..c78f3187dfea 100644 --- a/Documentation/devicetree/bindings/net/marvell-pp2.txt +++ b/Documentation/devicetree/bindings/net/marvell-pp2.txt @@ -44,7 +44,7 @@ Optional properties (port): - interrupt-names: if more than a single interrupt for rx is given, must be the name associated to the interrupts listed. Valid names are: "tx-cpu0", "tx-cpu1", "tx-cpu2", "tx-cpu3", - "rx-shared". + "rx-shared", "link". - marvell,system-controller: a phandle to the system controller. Example for marvell,armada-375-pp2: diff --git a/Documentation/devicetree/bindings/phy/phy-mvebu-comphy.txt b/Documentation/devicetree/bindings/phy/phy-mvebu-comphy.txt new file mode 100644 index 000000000000..bfcf80341657 --- /dev/null +++ b/Documentation/devicetree/bindings/phy/phy-mvebu-comphy.txt @@ -0,0 +1,43 @@ +mvebu comphy driver +------------------- + +A comphy controller can be found on Marvell Armada 7k/8k on the CP110. It +provides a number of shared PHYs used by various interfaces (network, sata, +usb, PCIe...). + +Required properties: + +- compatible: should be "marvell,comphy-cp110" +- reg: should contain the comphy register location and length. +- marvell,system-controller: should contain a phandle to the + system controller node. +- #address-cells: should be 1. +- #size-cells: should be 0. + +A sub-node is required for each comphy lane provided by the comphy. + +Required properties (child nodes): + +- reg: comphy lane number. +- #phy-cells : from the generic phy bindings, must be 1. Defines the + input port to use for a given comphy lane. + +Example: + + cpm_comphy: phy@120000 { + compatible = "marvell,comphy-cp110"; + reg = <0x120000 0x6000>; + marvell,system-controller = <&cpm_syscon0>; + #address-cells = <1>; + #size-cells = <0>; + + cpm_comphy0: phy@0 { + reg = <0>; + #phy-cells = <1>; + }; + + cpm_comphy1: phy@1 { + reg = <1>; + #phy-cells = <1>; + }; + }; diff --git a/Documentation/networking/netvsc.txt b/Documentation/networking/netvsc.txt index fa8d86356791..93560fb1170a 100644 --- a/Documentation/networking/netvsc.txt +++ b/Documentation/networking/netvsc.txt @@ -32,9 +32,9 @@ Features hashing. Using L3 hashing is recommended in this case. For example, for UDP over IPv4 on eth0: - To include UDP port numbers in hasing: + To include UDP port numbers in hashing: ethtool -N eth0 rx-flow-hash udp4 sdfn - To exclude UDP port numbers in hasing: + To exclude UDP port numbers in hashing: ethtool -N eth0 rx-flow-hash udp4 sd To show UDP hash level: ethtool -n eth0 rx-flow-hash udp4 diff --git a/Documentation/networking/rmnet.txt b/Documentation/networking/rmnet.txt new file mode 100644 index 000000000000..6b341eaf2062 --- /dev/null +++ b/Documentation/networking/rmnet.txt @@ -0,0 +1,82 @@ +1. Introduction + +rmnet driver is used for supporting the Multiplexing and aggregation +Protocol (MAP). This protocol is used by all recent chipsets using Qualcomm +Technologies, Inc. modems. + +This driver can be used to register onto any physical network device in +IP mode. Physical transports include USB, HSIC, PCIe and IP accelerator. + +Multiplexing allows for creation of logical netdevices (rmnet devices) to +handle multiple private data networks (PDN) like a default internet, tethering, +multimedia messaging service (MMS) or IP media subsystem (IMS). Hardware sends +packets with MAP headers to rmnet. Based on the multiplexer id, rmnet +routes to the appropriate PDN after removing the MAP header. + +Aggregation is required to achieve high data rates. This involves hardware +sending aggregated bunch of MAP frames. rmnet driver will de-aggregate +these MAP frames and send them to appropriate PDN's. + +2. Packet format + +a. MAP packet (data / control) + +MAP header has the same endianness of the IP packet. + +Packet format - + +Bit 0 1 2-7 8 - 15 16 - 31 +Function Command / Data Reserved Pad Multiplexer ID Payload length +Bit 32 - x +Function Raw Bytes + +Command (1)/ Data (0) bit value is to indicate if the packet is a MAP command +or data packet. Control packet is used for transport level flow control. Data +packets are standard IP packets. + +Reserved bits are usually zeroed out and to be ignored by receiver. + +Padding is number of bytes to be added for 4 byte alignment if required by +hardware. + +Multiplexer ID is to indicate the PDN on which data has to be sent. + +Payload length includes the padding length but does not include MAP header +length. + +b. MAP packet (command specific) + +Bit 0 1 2-7 8 - 15 16 - 31 +Function Command Reserved Pad Multiplexer ID Payload length +Bit 32 - 39 40 - 45 46 - 47 48 - 63 +Function Command name Reserved Command Type Reserved +Bit 64 - 95 +Function Transaction ID +Bit 96 - 127 +Function Command data + +Command 1 indicates disabling flow while 2 is enabling flow + +Command types - +0 for MAP command request +1 is to acknowledge the receipt of a command +2 is for unsupported commands +3 is for error during processing of commands + +c. Aggregation + +Aggregation is multiple MAP packets (can be data or command) delivered to +rmnet in a single linear skb. rmnet will process the individual +packets and either ACK the MAP command or deliver the IP packet to the +network stack as needed + +MAP header|IP Packet|Optional padding|MAP header|IP Packet|Optional padding.... +MAP header|IP Packet|Optional padding|MAP header|Command Packet|Optional pad... + +3. Userspace configuration + +rmnet userspace configuration is done through netlink library librmnetctl +and command line utility rmnetcli. Utility is hosted in codeaurora forum git. +The driver uses rtnl_link_ops for communication. + +https://source.codeaurora.org/quic/la/platform/vendor/qcom-opensource/dataservices/tree/rmnetctl diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c index 8194696e2805..8c9573660d51 100644 --- a/arch/x86/net/bpf_jit_comp.c +++ b/arch/x86/net/bpf_jit_comp.c @@ -287,7 +287,7 @@ static void emit_bpf_tail_call(u8 **pprog) EMIT4(0x48, 0x8B, 0x46, /* mov rax, qword ptr [rsi + 16] */ offsetof(struct bpf_array, map.max_entries)); EMIT3(0x48, 0x39, 0xD0); /* cmp rax, rdx */ -#define OFFSET1 47 /* number of bytes to jump */ +#define OFFSET1 43 /* number of bytes to jump */ EMIT2(X86_JBE, OFFSET1); /* jbe out */ label1 = cnt; @@ -296,21 +296,20 @@ static void emit_bpf_tail_call(u8 **pprog) */ EMIT2_off32(0x8B, 0x85, 36); /* mov eax, dword ptr [rbp + 36] */ EMIT3(0x83, 0xF8, MAX_TAIL_CALL_CNT); /* cmp eax, MAX_TAIL_CALL_CNT */ -#define OFFSET2 36 +#define OFFSET2 32 EMIT2(X86_JA, OFFSET2); /* ja out */ label2 = cnt; EMIT3(0x83, 0xC0, 0x01); /* add eax, 1 */ EMIT2_off32(0x89, 0x85, 36); /* mov dword ptr [rbp + 36], eax */ /* prog = array->ptrs[index]; */ - EMIT4_off32(0x48, 0x8D, 0x84, 0xD6, /* lea rax, [rsi + rdx * 8 + offsetof(...)] */ + EMIT4_off32(0x48, 0x8B, 0x84, 0xD6, /* mov rax, [rsi + rdx * 8 + offsetof(...)] */ offsetof(struct bpf_array, ptrs)); - EMIT3(0x48, 0x8B, 0x00); /* mov rax, qword ptr [rax] */ /* if (prog == NULL) * goto out; */ - EMIT4(0x48, 0x83, 0xF8, 0x00); /* cmp rax, 0 */ + EMIT3(0x48, 0x85, 0xC0); /* test rax,rax */ #define OFFSET3 10 EMIT2(X86_JE, OFFSET3); /* je out */ label3 = cnt; diff --git a/drivers/net/ethernet/broadcom/bcm63xx_enet.c b/drivers/net/ethernet/broadcom/bcm63xx_enet.c index 61a88b64bd39..4f3845a58126 100644 --- a/drivers/net/ethernet/broadcom/bcm63xx_enet.c +++ b/drivers/net/ethernet/broadcom/bcm63xx_enet.c @@ -2674,7 +2674,7 @@ static int bcm_enetsw_set_ringparam(struct net_device *dev, return 0; } -static struct ethtool_ops bcm_enetsw_ethtool_ops = { +static const struct ethtool_ops bcm_enetsw_ethtool_ops = { .get_strings = bcm_enetsw_get_strings, .get_sset_count = bcm_enetsw_get_sset_count, .get_ethtool_stats = bcm_enetsw_get_ethtool_stats, diff --git a/drivers/net/ethernet/cavium/liquidio/cn23xx_pf_device.c b/drivers/net/ethernet/cavium/liquidio/cn23xx_pf_device.c index f6c0bad78cd4..e8b290473ee2 100644 --- a/drivers/net/ethernet/cavium/liquidio/cn23xx_pf_device.c +++ b/drivers/net/ethernet/cavium/liquidio/cn23xx_pf_device.c @@ -1305,6 +1305,26 @@ static int cn23xx_sriov_config(struct octeon_device *oct) int setup_cn23xx_octeon_pf_device(struct octeon_device *oct) { + u32 data32; + u64 BAR0, BAR1; + + pci_read_config_dword(oct->pci_dev, PCI_BASE_ADDRESS_0, &data32); + BAR0 = (u64)(data32 & ~0xf); + pci_read_config_dword(oct->pci_dev, PCI_BASE_ADDRESS_1, &data32); + BAR0 |= ((u64)data32 << 32); + pci_read_config_dword(oct->pci_dev, PCI_BASE_ADDRESS_2, &data32); + BAR1 = (u64)(data32 & ~0xf); + pci_read_config_dword(oct->pci_dev, PCI_BASE_ADDRESS_3, &data32); + BAR1 |= ((u64)data32 << 32); + + if (!BAR0 || !BAR1) { + if (!BAR0) + dev_err(&oct->pci_dev->dev, "device BAR0 unassigned\n"); + if (!BAR1) + dev_err(&oct->pci_dev->dev, "device BAR1 unassigned\n"); + return 1; + } + if (octeon_map_pci_barx(oct, 0, 0)) return 1; diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mdio.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mdio.c index a2add8bb1945..f32d719c4f77 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mdio.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mdio.c @@ -150,7 +150,7 @@ int hclge_mac_mdio_config(struct hclge_dev *hdev) } phydev = mdiobus_get_phy(mdio_bus, mac->phy_addr); - if (!phydev || IS_ERR(phydev)) { + if (!phydev) { dev_err(mdio_bus->parent, "Failed to get phy device\n"); mdiobus_unregister(mdio_bus); return -EIO; diff --git a/drivers/net/ethernet/marvell/mvpp2.c b/drivers/net/ethernet/marvell/mvpp2.c index fea9ae5b70ba..f37c05fed5bc 100644 --- a/drivers/net/ethernet/marvell/mvpp2.c +++ b/drivers/net/ethernet/marvell/mvpp2.c @@ -28,6 +28,7 @@ #include <linux/of_address.h> #include <linux/of_device.h> #include <linux/phy.h> +#include <linux/phy/phy.h> #include <linux/clk.h> #include <linux/hrtimer.h> #include <linux/ktime.h> @@ -347,16 +348,24 @@ #define MVPP2_GMAC_FLOW_CTRL_AUTONEG BIT(11) #define MVPP2_GMAC_CONFIG_FULL_DUPLEX BIT(12) #define MVPP2_GMAC_AN_DUPLEX_EN BIT(13) +#define MVPP2_GMAC_STATUS0 0x10 +#define MVPP2_GMAC_STATUS0_LINK_UP BIT(0) #define MVPP2_GMAC_PORT_FIFO_CFG_1_REG 0x1c #define MVPP2_GMAC_TX_FIFO_MIN_TH_OFFS 6 #define MVPP2_GMAC_TX_FIFO_MIN_TH_ALL_MASK 0x1fc0 #define MVPP2_GMAC_TX_FIFO_MIN_TH_MASK(v) (((v) << 6) & \ MVPP2_GMAC_TX_FIFO_MIN_TH_ALL_MASK) +#define MVPP22_GMAC_INT_STAT 0x20 +#define MVPP22_GMAC_INT_STAT_LINK BIT(1) +#define MVPP22_GMAC_INT_MASK 0x24 +#define MVPP22_GMAC_INT_MASK_LINK_STAT BIT(1) #define MVPP22_GMAC_CTRL_4_REG 0x90 #define MVPP22_CTRL4_EXT_PIN_GMII_SEL BIT(0) #define MVPP22_CTRL4_DP_CLK_SEL BIT(5) #define MVPP22_CTRL4_SYNC_BYPASS_DIS BIT(6) #define MVPP22_CTRL4_QSGMII_BYPASS_ACTIVE BIT(7) +#define MVPP22_GMAC_INT_SUM_MASK 0xa4 +#define MVPP22_GMAC_INT_SUM_MASK_LINK_STAT BIT(1) /* Per-port XGMAC registers. PPv2.2 only, only for GOP port 0, * relative to port->base. @@ -369,11 +378,19 @@ #define MVPP22_XLG_CTRL1_REG 0x104 #define MVPP22_XLG_CTRL1_FRAMESIZELIMIT_OFFS 0 #define MVPP22_XLG_CTRL1_FRAMESIZELIMIT_MASK 0x1fff +#define MVPP22_XLG_STATUS 0x10c +#define MVPP22_XLG_STATUS_LINK_UP BIT(0) +#define MVPP22_XLG_INT_STAT 0x114 +#define MVPP22_XLG_INT_STAT_LINK BIT(1) +#define MVPP22_XLG_INT_MASK 0x118 +#define MVPP22_XLG_INT_MASK_LINK BIT(1) #define MVPP22_XLG_CTRL3_REG 0x11c #define MVPP22_XLG_CTRL3_MACMODESELECT_MASK (7 << 13) #define MVPP22_XLG_CTRL3_MACMODESELECT_GMAC (0 << 13) #define MVPP22_XLG_CTRL3_MACMODESELECT_10G (1 << 13) - +#define MVPP22_XLG_EXT_INT_MASK 0x15c +#define MVPP22_XLG_EXT_INT_MASK_XLG BIT(1) +#define MVPP22_XLG_EXT_INT_MASK_GIG BIT(2) #define MVPP22_XLG_CTRL4_REG 0x184 #define MVPP22_XLG_CTRL4_FWD_FC BIT(5) #define MVPP22_XLG_CTRL4_FWD_PFC BIT(6) @@ -836,6 +853,8 @@ struct mvpp2_port { */ int gop_id; + int link_irq; + struct mvpp2 *priv; /* Per-port registers' base address */ @@ -861,6 +880,7 @@ struct mvpp2_port { phy_interface_t phy_interface; struct device_node *phy_node; + struct phy *comphy; unsigned int link; unsigned int duplex; unsigned int speed; @@ -4420,6 +4440,94 @@ invalid_conf: return -EINVAL; } +static void mvpp22_gop_unmask_irq(struct mvpp2_port *port) +{ + u32 val; + + if (phy_interface_mode_is_rgmii(port->phy_interface) || + port->phy_interface == PHY_INTERFACE_MODE_SGMII) { + /* Enable the GMAC link status irq for this port */ + val = readl(port->base + MVPP22_GMAC_INT_SUM_MASK); + val |= MVPP22_GMAC_INT_SUM_MASK_LINK_STAT; + writel(val, port->base + MVPP22_GMAC_INT_SUM_MASK); + } + + if (port->gop_id == 0) { + /* Enable the XLG/GIG irqs for this port */ + val = readl(port->base + MVPP22_XLG_EXT_INT_MASK); + if (port->phy_interface == PHY_INTERFACE_MODE_10GKR) + val |= MVPP22_XLG_EXT_INT_MASK_XLG; + else + val |= MVPP22_XLG_EXT_INT_MASK_GIG; + writel(val, port->base + MVPP22_XLG_EXT_INT_MASK); + } +} + +static void mvpp22_gop_mask_irq(struct mvpp2_port *port) +{ + u32 val; + + if (port->gop_id == 0) { + val = readl(port->base + MVPP22_XLG_EXT_INT_MASK); + val &= ~(MVPP22_XLG_EXT_INT_MASK_XLG | + MVPP22_XLG_EXT_INT_MASK_GIG); + writel(val, port->base + MVPP22_XLG_EXT_INT_MASK); + } + + if (phy_interface_mode_is_rgmii(port->phy_interface) || + port->phy_interface == PHY_INTERFACE_MODE_SGMII) { + val = readl(port->base + MVPP22_GMAC_INT_SUM_MASK); + val &= ~MVPP22_GMAC_INT_SUM_MASK_LINK_STAT; + writel(val, port->base + MVPP22_GMAC_INT_SUM_MASK); + } +} + +static void mvpp22_gop_setup_irq(struct mvpp2_port *port) +{ + u32 val; + + if (phy_interface_mode_is_rgmii(port->phy_interface) || + port->phy_interface == PHY_INTERFACE_MODE_SGMII) { + val = readl(port->base + MVPP22_GMAC_INT_MASK); + val |= MVPP22_GMAC_INT_MASK_LINK_STAT; + writel(val, port->base + MVPP22_GMAC_INT_MASK); + } + + if (port->gop_id == 0) { + val = readl(port->base + MVPP22_XLG_INT_MASK); + val |= MVPP22_XLG_INT_MASK_LINK; + writel(val, port->base + MVPP22_XLG_INT_MASK); + } + + mvpp22_gop_unmask_irq(port); +} + +static int mvpp22_comphy_init(struct mvpp2_port *port) +{ + enum phy_mode mode; + int ret; + + if (!port->comphy) + return 0; + + switch (port->phy_interface) { + case PHY_INTERFACE_MODE_SGMII: + mode = PHY_MODE_SGMII; + break; + case PHY_INTERFACE_MODE_10GKR: + mode = PHY_MODE_10GKR; + break; + default: + return -EINVAL; + } + + ret = phy_set_mode(port->comphy, mode); + if (ret) + return ret; + + return phy_power_on(port->comphy); +} + static void mvpp2_port_mii_gmac_configure_mode(struct mvpp2_port *port) { u32 val; @@ -4435,10 +4543,7 @@ static void mvpp2_port_mii_gmac_configure_mode(struct mvpp2_port *port) val |= MVPP2_GMAC_DISABLE_PADDING; val &= ~MVPP2_GMAC_FLOW_CTRL_MASK; writel(val, port->base + MVPP2_GMAC_CTRL_2_REG); - } else if (port->phy_interface == PHY_INTERFACE_MODE_RGMII || - port->phy_interface == PHY_INTERFACE_MODE_RGMII_ID || - port->phy_interface == PHY_INTERFACE_MODE_RGMII_RXID || - port->phy_interface == PHY_INTERFACE_MODE_RGMII_TXID) { + } else if (phy_interface_mode_is_rgmii(port->phy_interface)) { val = readl(port->base + MVPP22_GMAC_CTRL_4_REG); val |= MVPP22_CTRL4_EXT_PIN_GMII_SEL | MVPP22_CTRL4_SYNC_BYPASS_DIS | @@ -4484,10 +4589,7 @@ static void mvpp2_port_mii_gmac_configure(struct mvpp2_port *port) val = readl(port->base + MVPP2_GMAC_CTRL_2_REG); if (port->phy_interface == PHY_INTERFACE_MODE_SGMII) { val |= MVPP2_GMAC_INBAND_AN_MASK | MVPP2_GMAC_PCS_ENABLE_MASK; - } else if (port->phy_interface == PHY_INTERFACE_MODE_RGMII || - port->phy_interface == PHY_INTERFACE_MODE_RGMII_ID || - port->phy_interface == PHY_INTERFACE_MODE_RGMII_RXID || - port->phy_interface == PHY_INTERFACE_MODE_RGMII_TXID) { + } else if (phy_interface_mode_is_rgmii(port->phy_interface)) { val &= ~MVPP2_GMAC_PCS_ENABLE_MASK; val |= MVPP2_GMAC_PORT_RGMII_MASK; } @@ -4547,10 +4649,7 @@ static void mvpp2_port_mii_set(struct mvpp2_port *port) if (port->priv->hw_version == MVPP22) mvpp22_port_mii_set(port); - if (port->phy_interface == PHY_INTERFACE_MODE_RGMII || - port->phy_interface == PHY_INTERFACE_MODE_RGMII_ID || - port->phy_interface == PHY_INTERFACE_MODE_RGMII_RXID || - port->phy_interface == PHY_INTERFACE_MODE_RGMII_TXID || + if (phy_interface_mode_is_rgmii(port->phy_interface) || port->phy_interface == PHY_INTERFACE_MODE_SGMII) mvpp2_port_mii_gmac_configure(port); else if (port->phy_interface == PHY_INTERFACE_MODE_10GKR) @@ -5707,63 +5806,161 @@ static irqreturn_t mvpp2_isr(int irq, void *dev_id) return IRQ_HANDLED; } +/* Per-port interrupt for link status changes */ +static irqreturn_t mvpp2_link_status_isr(int irq, void *dev_id) +{ + struct mvpp2_port *port = (struct mvpp2_port *)dev_id; + struct net_device *dev = port->dev; + bool event = false, link = false; + u32 val; + + mvpp22_gop_mask_irq(port); + + if (port->gop_id == 0 && + port->phy_interface == PHY_INTERFACE_MODE_10GKR) { + val = readl(port->base + MVPP22_XLG_INT_STAT); + if (val & MVPP22_XLG_INT_STAT_LINK) { + event = true; + val = readl(port->base + MVPP22_XLG_STATUS); + if (val & MVPP22_XLG_STATUS_LINK_UP) + link = true; + } + } else if (phy_interface_mode_is_rgmii(port->phy_interface) || + port->phy_interface == PHY_INTERFACE_MODE_SGMII) { + val = readl(port->base + MVPP22_GMAC_INT_STAT); + if (val & MVPP22_GMAC_INT_STAT_LINK) { + event = true; + val = readl(port->base + MVPP2_GMAC_STATUS0); + if (val & MVPP2_GMAC_STATUS0_LINK_UP) + link = true; + } + } + + if (!netif_running(dev) || !event) + goto handled; + + if (link) { + mvpp2_interrupts_enable(port); + + mvpp2_egress_enable(port); + mvpp2_ingress_enable(port); + netif_carrier_on(dev); + netif_tx_wake_all_queues(dev); + } else { + netif_tx_stop_all_queues(dev); + netif_carrier_off(dev); + mvpp2_ingress_disable(port); + mvpp2_egress_disable(port); + + mvpp2_interrupts_disable(port); + } + +handled: + mvpp22_gop_unmask_irq(port); + return IRQ_HANDLED; +} + +static void mvpp2_gmac_set_autoneg(struct mvpp2_port *port, + struct phy_device *phydev) +{ + u32 val; + + if (port->phy_interface != PHY_INTERFACE_MODE_RGMII && + port->phy_interface != PHY_INTERFACE_MODE_RGMII_ID && + port->phy_interface != PHY_INTERFACE_MODE_RGMII_RXID && + port->phy_interface != PHY_INTERFACE_MODE_RGMII_TXID && + port->phy_interface != PHY_INTERFACE_MODE_SGMII) + return; + + val = readl(port->base + MVPP2_GMAC_AUTONEG_CONFIG); + val &= ~(MVPP2_GMAC_CONFIG_MII_SPEED | + MVPP2_GMAC_CONFIG_GMII_SPEED | + MVPP2_GMAC_CONFIG_FULL_DUPLEX | + MVPP2_GMAC_AN_SPEED_EN | + MVPP2_GMAC_AN_DUPLEX_EN); + + if (phydev->duplex) + val |= MVPP2_GMAC_CONFIG_FULL_DUPLEX; + + if (phydev->speed == SPEED_1000) + val |= MVPP2_GMAC_CONFIG_GMII_SPEED; + else if (phydev->speed == SPEED_100) + val |= MVPP2_GMAC_CONFIG_MII_SPEED; + + writel(val, port->base + MVPP2_GMAC_AUTONEG_CONFIG); +} + /* Adjust link */ static void mvpp2_link_event(struct net_device *dev) { struct mvpp2_port *port = netdev_priv(dev); struct phy_device *phydev = dev->phydev; - int status_change = 0; + bool link_reconfigured = false; u32 val; if (phydev->link) { + if (port->phy_interface != phydev->interface && port->comphy) { + /* disable current port for reconfiguration */ + mvpp2_interrupts_disable(port); + netif_carrier_off(port->dev); + mvpp2_port_disable(port); + phy_power_off(port->comphy); + + /* comphy reconfiguration */ + port->phy_interface = phydev->interface; + mvpp22_comphy_init(port); + + /* gop/mac reconfiguration */ + mvpp22_gop_init(port); + mvpp2_port_mii_set(port); + + link_reconfigured = true; + } + if ((port->speed != phydev->speed) || (port->duplex != phydev->duplex)) { - u32 val; - - val = readl(port->base + MVPP2_GMAC_AUTONEG_CONFIG); - val &= ~(MVPP2_GMAC_CONFIG_MII_SPEED | - MVPP2_GMAC_CONFIG_GMII_SPEED | - MVPP2_GMAC_CONFIG_FULL_DUPLEX | - MVPP2_GMAC_AN_SPEED_EN | - MVPP2_GMAC_AN_DUPLEX_EN); - - if (phydev->duplex) - val |= MVPP2_GMAC_CONFIG_FULL_DUPLEX; - - if (phydev->speed == SPEED_1000) - val |= MVPP2_GMAC_CONFIG_GMII_SPEED; - else if (phydev->speed == SPEED_100) - val |= MVPP2_GMAC_CONFIG_MII_SPEED; - - writel(val, port->base + MVPP2_GMAC_AUTONEG_CONFIG); + mvpp2_gmac_set_autoneg(port, phydev); port->duplex = phydev->duplex; port->speed = phydev->speed; } } - if (phydev->link != port->link) { - if (!phydev->link) { - port->duplex = -1; - port->speed = 0; - } - + if (phydev->link != port->link || link_reconfigured) { port->link = phydev->link; - status_change = 1; - } - if (status_change) { if (phydev->link) { - val = readl(port->base + MVPP2_GMAC_AUTONEG_CONFIG); - val |= (MVPP2_GMAC_FORCE_LINK_PASS | - MVPP2_GMAC_FORCE_LINK_DOWN); - writel(val, port->base + MVPP2_GMAC_AUTONEG_CONFIG); + if (port->phy_interface == PHY_INTERFACE_MODE_RGMII || + port->phy_interface == PHY_INTERFACE_MODE_RGMII_ID || + port->phy_interface == PHY_INTERFACE_MODE_RGMII_RXID || + port->phy_interface == PHY_INTERFACE_MODE_RGMII_TXID || + port->phy_interface == PHY_INTERFACE_MODE_SGMII) { + val = readl(port->base + MVPP2_GMAC_AUTONEG_CONFIG); + val |= (MVPP2_GMAC_FORCE_LINK_PASS | + MVPP2_GMAC_FORCE_LINK_DOWN); + writel(val, port->base + MVPP2_GMAC_AUTONEG_CONFIG); + } + + mvpp2_interrupts_enable(port); + mvpp2_port_enable(port); + mvpp2_egress_enable(port); mvpp2_ingress_enable(port); + netif_carrier_on(dev); + netif_tx_wake_all_queues(dev); } else { + port->duplex = -1; + port->speed = 0; + + netif_tx_stop_all_queues(dev); + netif_carrier_off(dev); mvpp2_ingress_disable(port); mvpp2_egress_disable(port); + + mvpp2_port_disable(port); + mvpp2_interrupts_disable(port); } + phy_print_status(phydev); } } @@ -6404,12 +6601,15 @@ static void mvpp2_start_dev(struct mvpp2_port *port) /* Enable interrupts on all CPUs */ mvpp2_interrupts_enable(port); - if (port->priv->hw_version == MVPP22) + if (port->priv->hw_version == MVPP22) { + mvpp22_comphy_init(port); mvpp22_gop_init(port); + } mvpp2_port_mii_set(port); mvpp2_port_enable(port); - phy_start(ndev->phydev); + if (ndev->phydev) + phy_start(ndev->phydev); netif_tx_start_all_queues(port->dev); } @@ -6435,7 +6635,9 @@ static void mvpp2_stop_dev(struct mvpp2_port *port) mvpp2_egress_disable(port); mvpp2_port_disable(port); - phy_stop(ndev->phydev); + if (ndev->phydev) + phy_stop(ndev->phydev); + phy_power_off(port->comphy); } static int mvpp2_check_ringparam_valid(struct net_device *dev, @@ -6491,6 +6693,10 @@ static int mvpp2_phy_connect(struct mvpp2_port *port) { struct phy_device *phy_dev; + /* No PHY is attached */ + if (!port->phy_node) + return 0; + phy_dev = of_phy_connect(port->dev, port->phy_node, mvpp2_link_event, 0, port->phy_interface); if (!phy_dev) { @@ -6511,6 +6717,9 @@ static void mvpp2_phy_disconnect(struct mvpp2_port *port) { struct net_device *ndev = port->dev; + if (!ndev->phydev) + return; + phy_disconnect(ndev->phydev); } @@ -6557,6 +6766,7 @@ static void mvpp2_irqs_deinit(struct mvpp2_port *port) static int mvpp2_open(struct net_device *dev) { struct mvpp2_port *port = netdev_priv(dev); + struct mvpp2 *priv = port->priv; unsigned char mac_bcast[ETH_ALEN] = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }; int err; @@ -6602,12 +6812,24 @@ static int mvpp2_open(struct net_device *dev) goto err_cleanup_txqs; } + if (priv->hw_version == MVPP22 && !port->phy_node && port->link_irq) { + err = request_irq(port->link_irq, mvpp2_link_status_isr, 0, + dev->name, port); + if (err) { + netdev_err(port->dev, "cannot request link IRQ %d\n", + port->link_irq); + goto err_free_irq; + } + + mvpp22_gop_setup_irq(port); + } + /* In default link is down */ netif_carrier_off(port->dev); err = mvpp2_phy_connect(port); if (err < 0) - goto err_free_irq; + goto err_free_link_irq; /* Unmask interrupts on all CPUs */ on_each_cpu(mvpp2_interrupts_unmask, port, 1); @@ -6617,6 +6839,9 @@ static int mvpp2_open(struct net_device *dev) return 0; +err_free_link_irq: + if (priv->hw_version == MVPP22 && !port->phy_node && port->link_irq) + free_irq(port->link_irq, port); err_free_irq: mvpp2_irqs_deinit(port); err_cleanup_txqs: @@ -6630,6 +6855,7 @@ static int mvpp2_stop(struct net_device *dev) { struct mvpp2_port *port = netdev_priv(dev); struct mvpp2_port_pcpu *port_pcpu; + struct mvpp2 *priv = port->priv; int cpu; mvpp2_stop_dev(port); @@ -6639,6 +6865,9 @@ static int mvpp2_stop(struct net_device *dev) on_each_cpu(mvpp2_interrupts_mask, port, 1); mvpp2_shared_interrupt_mask_unmask(port, true); + if (priv->hw_version == MVPP22 && !port->phy_node && port->link_irq) + free_irq(port->link_irq, port); + mvpp2_irqs_deinit(port); if (!port->has_tx_irqs) { for_each_present_cpu(cpu) { @@ -7242,6 +7471,7 @@ static int mvpp2_port_probe(struct platform_device *pdev, struct mvpp2 *priv) { struct device_node *phy_node; + struct phy *comphy; struct mvpp2_port *port; struct mvpp2_port_pcpu *port_pcpu; struct net_device *dev; @@ -7272,12 +7502,6 @@ static int mvpp2_port_probe(struct platform_device *pdev, return -ENOMEM; phy_node = of_parse_phandle(port_node, "phy", 0); - if (!phy_node) { - dev_err(&pdev->dev, "missing phy\n"); - err = -ENODEV; - goto err_free_netdev; - } - phy_mode = of_get_phy_mode(port_node); if (phy_mode < 0) { dev_err(&pdev->dev, "incorrect phy mode\n"); @@ -7285,6 +7509,15 @@ static int mvpp2_port_probe(struct platform_device *pdev, goto err_free_netdev; } + comphy = devm_of_phy_get(&pdev->dev, port_node, NULL); + if (IS_ERR(comphy)) { + if (PTR_ERR(comphy) == -EPROBE_DEFER) { + err = -EPROBE_DEFER; + goto err_free_netdev; + } + comphy = NULL; + } + if (of_property_read_u32(port_node, "port-id", &id)) { err = -EINVAL; dev_err(&pdev->dev, "missing port-id value\n"); @@ -7307,6 +7540,15 @@ static int mvpp2_port_probe(struct platform_device *pdev, if (err) goto err_free_netdev; + port->link_irq = of_irq_get_byname(port_node, "link"); + if (port->link_irq == -EPROBE_DEFER) { + err = -EPROBE_DEFER; + goto err_deinit_qvecs; + } + if (port->link_irq <= 0) + /* the link irq is optional */ + port->link_irq = 0; + if (of_property_read_bool(port_node, "marvell,loopback")) port->flags |= MVPP2_F_LOOPBACK; @@ -7318,13 +7560,14 @@ static int mvpp2_port_probe(struct platform_device *pdev, port->phy_node = phy_node; port->phy_interface = phy_mode; + port->comphy = comphy; if (priv->hw_version == MVPP21) { res = platform_get_resource(pdev, IORESOURCE_MEM, 2 + id); port->base = devm_ioremap_resource(&pdev->dev, res); if (IS_ERR(port->base)) { err = PTR_ERR(port->base); - goto err_deinit_qvecs; + goto err_free_irq; } } else { if (of_property_read_u32(port_node, "gop-port-id", @@ -7341,7 +7584,7 @@ static int mvpp2_port_probe(struct platform_device *pdev, port->stats = netdev_alloc_pcpu_stats(struct mvpp2_pcpu_stats); if (!port->stats) { err = -ENOMEM; - goto err_deinit_qvecs; + goto err_free_irq; } dt_mac_addr = of_get_mac_address(port_node); @@ -7425,6 +7668,9 @@ err_free_txq_pcpu: free_percpu(port->txqs[i]->pcpu); err_free_stats: free_percpu(port->stats); +err_free_irq: + if (port->link_irq) + irq_dispose_mapping(port->link_irq); err_deinit_qvecs: mvpp2_queue_vectors_deinit(port); err_free_netdev: @@ -7445,6 +7691,8 @@ static void mvpp2_port_remove(struct mvpp2_port *port) for (i = 0; i < port->ntxqs; i++) free_percpu(port->txqs[i]->pcpu); mvpp2_queue_vectors_deinit(port); + if (port->link_irq) + irq_dispose_mapping(port->link_irq); free_netdev(port->dev); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h index 0039b4725405..a31912415264 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h @@ -620,6 +620,12 @@ enum mlx5e_traffic_types { MLX5E_NUM_INDIR_TIRS = MLX5E_TT_ANY, }; +enum mlx5e_tunnel_types { + MLX5E_TT_IPV4_GRE, + MLX5E_TT_IPV6_GRE, + MLX5E_NUM_TUNNEL_TT, +}; + enum { MLX5E_STATE_ASYNC_EVENTS_ENABLED, MLX5E_STATE_OPENED, @@ -679,6 +685,7 @@ struct mlx5e_l2_table { struct mlx5e_ttc_table { struct mlx5e_flow_table ft; struct mlx5_flow_handle *rules[MLX5E_NUM_TT]; + struct mlx5_flow_handle *tunnel_rules[MLX5E_NUM_TUNNEL_TT]; }; #define ARFS_HASH_SHIFT BITS_PER_BYTE @@ -711,6 +718,7 @@ enum { MLX5E_VLAN_FT_LEVEL = 0, MLX5E_L2_FT_LEVEL, MLX5E_TTC_FT_LEVEL, + MLX5E_INNER_TTC_FT_LEVEL, MLX5E_ARFS_FT_LEVEL }; @@ -736,6 +744,7 @@ struct mlx5e_flow_steering { struct mlx5e_vlan_table vlan; struct mlx5e_l2_table l2; struct mlx5e_ttc_table ttc; + struct mlx5e_ttc_table inner_ttc; struct mlx5e_arfs_tables arfs; }; @@ -769,6 +778,7 @@ struct mlx5e_priv { u32 tisn[MLX5E_MAX_NUM_TC]; struct mlx5e_rqt indir_rqt; struct mlx5e_tir indir_tir[MLX5E_NUM_INDIR_TIRS]; + struct mlx5e_tir inner_indir_tir[MLX5E_NUM_INDIR_TIRS]; struct mlx5e_tir direct_tir[MLX5E_MAX_NUM_CHANNELS]; u32 tx_rates[MLX5E_MAX_NUM_SQS]; int hard_mtu; @@ -903,7 +913,7 @@ int mlx5e_redirect_rqt(struct mlx5e_priv *priv, u32 rqtn, int sz, struct mlx5e_redirect_rqt_param rrp); void mlx5e_build_indir_tir_ctx_hash(struct mlx5e_params *params, enum mlx5e_traffic_types tt, - void *tirc); + void *tirc, bool inner); int mlx5e_open_locked(struct net_device *netdev); int mlx5e_close_locked(struct net_device *netdev); @@ -932,6 +942,12 @@ void mlx5e_set_rx_cq_mode_params(struct mlx5e_params *params, void mlx5e_set_rq_type_params(struct mlx5_core_dev *mdev, struct mlx5e_params *params, u8 rq_type); +static inline bool mlx5e_tunnel_inner_ft_supported(struct mlx5_core_dev *mdev) +{ + return (MLX5_CAP_ETH(mdev, tunnel_stateless_gre) && + MLX5_CAP_FLOWTABLE_NIC_RX(mdev, ft_field_support.inner_ip_version)); +} + static inline struct mlx5e_tx_wqe *mlx5e_post_nop(struct mlx5_wq_cyc *wq, u32 sqn, u16 *pc) { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c index 0dd7e9caf150..c6ec90e9c95b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c @@ -1212,9 +1212,18 @@ static void mlx5e_modify_tirs_hash(struct mlx5e_priv *priv, void *in, int inlen) for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++) { memset(tirc, 0, ctxlen); - mlx5e_build_indir_tir_ctx_hash(&priv->channels.params, tt, tirc); + mlx5e_build_indir_tir_ctx_hash(&priv->channels.params, tt, tirc, false); mlx5_core_modify_tir(mdev, priv->indir_tir[tt].tirn, in, inlen); } + + if (!mlx5e_tunnel_inner_ft_supported(priv->mdev)) + return; + + for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++) { + memset(tirc, 0, ctxlen); + mlx5e_build_indir_tir_ctx_hash(&priv->channels.params, tt, tirc, true); + mlx5_core_modify_tir(mdev, priv->inner_indir_tir[tt].tirn, in, inlen); + } } static int mlx5e_set_rxfh(struct net_device *dev, const u32 *indir, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c b/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c index eecbc6d4f51f..f11fd07ac4dd 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c @@ -608,12 +608,21 @@ static void mlx5e_cleanup_ttc_rules(struct mlx5e_ttc_table *ttc) ttc->rules[i] = NULL; } } + + for (i = 0; i < MLX5E_NUM_TUNNEL_TT; i++) { + if (!IS_ERR_OR_NULL(ttc->tunnel_rules[i])) { + mlx5_del_flow_rules(ttc->tunnel_rules[i]); + ttc->tunnel_rules[i] = NULL; + } + } } -static struct { +struct mlx5e_etype_proto { u16 etype; u8 proto; -} ttc_rules[] = { +}; + +static struct mlx5e_etype_proto ttc_rules[] = { [MLX5E_TT_IPV4_TCP] = { .etype = ETH_P_IP, .proto = IPPROTO_TCP, @@ -660,6 +669,28 @@ static struct { }, }; +static struct mlx5e_etype_proto ttc_tunnel_rules[] = { + [MLX5E_TT_IPV4_GRE] = { + .etype = ETH_P_IP, + .proto = IPPROTO_GRE, + }, + [MLX5E_TT_IPV6_GRE] = { + .etype = ETH_P_IPV6, + .proto = IPPROTO_GRE, + }, +}; + +static u8 mlx5e_etype_to_ipv(u16 ethertype) +{ + if (ethertype == ETH_P_IP) + return 4; + + if (ethertype == ETH_P_IPV6) + return 6; + + return 0; +} + static struct mlx5_flow_handle * mlx5e_generate_ttc_rule(struct mlx5e_priv *priv, struct mlx5_flow_table *ft, @@ -667,10 +698,12 @@ mlx5e_generate_ttc_rule(struct mlx5e_priv *priv, u16 etype, u8 proto) { + int match_ipv_outer = MLX5_CAP_FLOWTABLE_NIC_RX(priv->mdev, ft_field_support.outer_ip_version); MLX5_DECLARE_FLOW_ACT(flow_act); struct mlx5_flow_handle *rule; struct mlx5_flow_spec *spec; int err = 0; + u8 ipv; spec = kvzalloc(sizeof(*spec), GFP_KERNEL); if (!spec) @@ -681,7 +714,13 @@ mlx5e_generate_ttc_rule(struct mlx5e_priv *priv, MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, outer_headers.ip_protocol); MLX5_SET(fte_match_param, spec->match_value, outer_headers.ip_protocol, proto); } - if (etype) { + + ipv = mlx5e_etype_to_ipv(etype); + if (match_ipv_outer && ipv) { + spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS; + MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, outer_headers.ip_version); + MLX5_SET(fte_match_param, spec->match_value, outer_headers.ip_version, ipv); + } else if (etype) { spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS; MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, outer_headers.ethertype); MLX5_SET(fte_match_param, spec->match_value, outer_headers.ethertype, etype); @@ -723,6 +762,20 @@ static int mlx5e_generate_ttc_table_rules(struct mlx5e_priv *priv) goto del_rules; } + if (!mlx5e_tunnel_inner_ft_supported(priv->mdev)) + return 0; + + rules = ttc->tunnel_rules; + dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE; + dest.ft = priv->fs.inner_ttc.ft.t; + for (tt = 0; tt < MLX5E_NUM_TUNNEL_TT; tt++) { + rules[tt] = mlx5e_generate_ttc_rule(priv, ft, &dest, + ttc_tunnel_rules[tt].etype, + ttc_tunnel_rules[tt].proto); + if (IS_ERR(rules[tt])) + goto del_rules; + } + return 0; del_rules: @@ -733,13 +786,23 @@ del_rules: } #define MLX5E_TTC_NUM_GROUPS 3 -#define MLX5E_TTC_GROUP1_SIZE BIT(3) -#define MLX5E_TTC_GROUP2_SIZE BIT(1) -#define MLX5E_TTC_GROUP3_SIZE BIT(0) +#define MLX5E_TTC_GROUP1_SIZE (BIT(3) + MLX5E_NUM_TUNNEL_TT) +#define MLX5E_TTC_GROUP2_SIZE BIT(1) +#define MLX5E_TTC_GROUP3_SIZE BIT(0) #define MLX5E_TTC_TABLE_SIZE (MLX5E_TTC_GROUP1_SIZE +\ MLX5E_TTC_GROUP2_SIZE +\ MLX5E_TTC_GROUP3_SIZE) -static int mlx5e_create_ttc_table_groups(struct mlx5e_ttc_table *ttc) + +#define MLX5E_INNER_TTC_NUM_GROUPS 3 +#define MLX5E_INNER_TTC_GROUP1_SIZE BIT(3) +#define MLX5E_INNER_TTC_GROUP2_SIZE BIT(1) +#define MLX5E_INNER_TTC_GROUP3_SIZE BIT(0) +#define MLX5E_INNER_TTC_TABLE_SIZE (MLX5E_INNER_TTC_GROUP1_SIZE +\ + MLX5E_INNER_TTC_GROUP2_SIZE +\ + MLX5E_INNER_TTC_GROUP3_SIZE) + +static int mlx5e_create_ttc_table_groups(struct mlx5e_ttc_table *ttc, + bool use_ipv) { int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in); struct mlx5e_flow_table *ft = &ttc->ft; @@ -761,7 +824,10 @@ static int mlx5e_create_ttc_table_groups(struct mlx5e_ttc_table *ttc) /* L4 Group */ mc = MLX5_ADDR_OF(create_flow_group_in, in, match_criteria); MLX5_SET_TO_ONES(fte_match_param, mc, outer_headers.ip_protocol); - MLX5_SET_TO_ONES(fte_match_param, mc, outer_headers.ethertype); + if (use_ipv) + MLX5_SET_TO_ONES(fte_match_param, mc, outer_headers.ip_version); + else + MLX5_SET_TO_ONES(fte_match_param, mc, outer_headers.ethertype); MLX5_SET_CFG(in, match_criteria_enable, MLX5_MATCH_OUTER_HEADERS); MLX5_SET_CFG(in, start_flow_index, ix); ix += MLX5E_TTC_GROUP1_SIZE; @@ -802,6 +868,190 @@ err: return err; } +static struct mlx5_flow_handle * +mlx5e_generate_inner_ttc_rule(struct mlx5e_priv *priv, + struct mlx5_flow_table *ft, + struct mlx5_flow_destination *dest, + u16 etype, u8 proto) +{ + MLX5_DECLARE_FLOW_ACT(flow_act); + struct mlx5_flow_handle *rule; + struct mlx5_flow_spec *spec; + int err = 0; + u8 ipv; + + spec = kvzalloc(sizeof(*spec), GFP_KERNEL); + if (!spec) + return ERR_PTR(-ENOMEM); + + ipv = mlx5e_etype_to_ipv(etype); + if (etype && ipv) { + spec->match_criteria_enable = MLX5_MATCH_INNER_HEADERS; + MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, inner_headers.ip_version); + MLX5_SET(fte_match_param, spec->match_value, inner_headers.ip_version, ipv); + } + + if (proto) { + spec->match_criteria_enable = MLX5_MATCH_INNER_HEADERS; + MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, inner_headers.ip_protocol); + MLX5_SET(fte_match_param, spec->match_value, inner_headers.ip_protocol, proto); + } + + rule = mlx5_add_flow_rules(ft, spec, &flow_act, dest, 1); + if (IS_ERR(rule)) { + err = PTR_ERR(rule); + netdev_err(priv->netdev, "%s: add rule failed\n", __func__); + } + + kvfree(spec); + return err ? ERR_PTR(err) : rule; +} + +static int mlx5e_generate_inner_ttc_table_rules(struct mlx5e_priv *priv) +{ + struct mlx5_flow_destination dest; + struct mlx5_flow_handle **rules; + struct mlx5e_ttc_table *ttc; + struct mlx5_flow_table *ft; + int err; + int tt; + + ttc = &priv->fs.inner_ttc; + ft = ttc->ft.t; + rules = ttc->rules; + + dest.type = MLX5_FLOW_DESTINATION_TYPE_TIR; + for (tt = 0; tt < MLX5E_NUM_TT; tt++) { + if (tt == MLX5E_TT_ANY) + dest.tir_num = priv->direct_tir[0].tirn; + else + dest.tir_num = priv->inner_indir_tir[tt].tirn; + + rules[tt] = mlx5e_generate_inner_ttc_rule(priv, ft, &dest, + ttc_rules[tt].etype, + ttc_rules[tt].proto); + if (IS_ERR(rules[tt])) + goto del_rules; + } + + return 0; + +del_rules: + err = PTR_ERR(rules[tt]); + rules[tt] = NULL; + mlx5e_cleanup_ttc_rules(ttc); + return err; +} + +static int mlx5e_create_inner_ttc_table_groups(struct mlx5e_ttc_table *ttc) +{ + int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in); + struct mlx5e_flow_table *ft = &ttc->ft; + int ix = 0; + u32 *in; + int err; + u8 *mc; + + ft->g = kcalloc(MLX5E_INNER_TTC_NUM_GROUPS, sizeof(*ft->g), GFP_KERNEL); + if (!ft->g) + return -ENOMEM; + in = kvzalloc(inlen, GFP_KERNEL); + if (!in) { + kfree(ft->g); + return -ENOMEM; + } + + /* L4 Group */ + mc = MLX5_ADDR_OF(create_flow_group_in, in, match_criteria); + MLX5_SET_TO_ONES(fte_match_param, mc, inner_headers.ip_protocol); + MLX5_SET_TO_ONES(fte_match_param, mc, inner_headers.ip_version); + MLX5_SET_CFG(in, match_criteria_enable, MLX5_MATCH_INNER_HEADERS); + MLX5_SET_CFG(in, start_flow_index, ix); + ix += MLX5E_INNER_TTC_GROUP1_SIZE; + MLX5_SET_CFG(in, end_flow_index, ix - 1); + ft->g[ft->num_groups] = mlx5_create_flow_group(ft->t, in); + if (IS_ERR(ft->g[ft->num_groups])) + goto err; + ft->num_groups++; + + /* L3 Group */ + MLX5_SET(fte_match_param, mc, inner_headers.ip_protocol, 0); + MLX5_SET_CFG(in, start_flow_index, ix); + ix += MLX5E_INNER_TTC_GROUP2_SIZE; + MLX5_SET_CFG(in, end_flow_index, ix - 1); + ft->g[ft->num_groups] = mlx5_create_flow_group(ft->t, in); + if (IS_ERR(ft->g[ft->num_groups])) + goto err; + ft->num_groups++; + + /* Any Group */ + memset(in, 0, inlen); + MLX5_SET_CFG(in, start_flow_index, ix); + ix += MLX5E_INNER_TTC_GROUP3_SIZE; + MLX5_SET_CFG(in, end_flow_index, ix - 1); + ft->g[ft->num_groups] = mlx5_create_flow_group(ft->t, in); + if (IS_ERR(ft->g[ft->num_groups])) + goto err; + ft->num_groups++; + + kvfree(in); + return 0; + +err: + err = PTR_ERR(ft->g[ft->num_groups]); + ft->g[ft->num_groups] = NULL; + kvfree(in); + + return err; +} + +static int mlx5e_create_inner_ttc_table(struct mlx5e_priv *priv) +{ + struct mlx5e_ttc_table *ttc = &priv->fs.inner_ttc; + struct mlx5_flow_table_attr ft_attr = {}; + struct mlx5e_flow_table *ft = &ttc->ft; + int err; + + if (!mlx5e_tunnel_inner_ft_supported(priv->mdev)) + return 0; + + ft_attr.max_fte = MLX5E_INNER_TTC_TABLE_SIZE; + ft_attr.level = MLX5E_INNER_TTC_FT_LEVEL; + ft_attr.prio = MLX5E_NIC_PRIO; + + ft->t = mlx5_create_flow_table(priv->fs.ns, &ft_attr); + if (IS_ERR(ft->t)) { + err = PTR_ERR(ft->t); + ft->t = NULL; + return err; + } + + err = mlx5e_create_inner_ttc_table_groups(ttc); + if (err) + goto err; + + err = mlx5e_generate_inner_ttc_table_rules(priv); + if (err) + goto err; + + return 0; + +err: + mlx5e_destroy_flow_table(ft); + return err; +} + +static void mlx5e_destroy_inner_ttc_table(struct mlx5e_priv *priv) +{ + struct mlx5e_ttc_table *ttc = &priv->fs.inner_ttc; + + if (!mlx5e_tunnel_inner_ft_supported(priv->mdev)) + return; + + mlx5e_cleanup_ttc_rules(ttc); + mlx5e_destroy_flow_table(&ttc->ft); +} + void mlx5e_destroy_ttc_table(struct mlx5e_priv *priv) { struct mlx5e_ttc_table *ttc = &priv->fs.ttc; @@ -812,6 +1062,7 @@ void mlx5e_destroy_ttc_table(struct mlx5e_priv *priv) int mlx5e_create_ttc_table(struct mlx5e_priv *priv) { + bool match_ipv_outer = MLX5_CAP_FLOWTABLE_NIC_RX(priv->mdev, ft_field_support.outer_ip_version); struct mlx5e_ttc_table *ttc = &priv->fs.ttc; struct mlx5_flow_table_attr ft_attr = {}; struct mlx5e_flow_table *ft = &ttc->ft; @@ -828,7 +1079,7 @@ int mlx5e_create_ttc_table(struct mlx5e_priv *priv) return err; } - err = mlx5e_create_ttc_table_groups(ttc); + err = mlx5e_create_ttc_table_groups(ttc, match_ipv_outer); if (err) goto err; @@ -1154,11 +1405,18 @@ int mlx5e_create_flow_steering(struct mlx5e_priv *priv) priv->netdev->hw_features &= ~NETIF_F_NTUPLE; } + err = mlx5e_create_inner_ttc_table(priv); + if (err) { + netdev_err(priv->netdev, "Failed to create inner ttc table, err=%d\n", + err); + goto err_destroy_arfs_tables; + } + err = mlx5e_create_ttc_table(priv); if (err) { netdev_err(priv->netdev, "Failed to create ttc table, err=%d\n", err); - goto err_destroy_arfs_tables; + goto err_destroy_inner_ttc_table; } err = mlx5e_create_l2_table(priv); @@ -1183,6 +1441,8 @@ err_destroy_l2_table: mlx5e_destroy_l2_table(priv); err_destroy_ttc_table: mlx5e_destroy_ttc_table(priv); +err_destroy_inner_ttc_table: + mlx5e_destroy_inner_ttc_table(priv); err_destroy_arfs_tables: mlx5e_arfs_destroy_tables(priv); @@ -1194,6 +1454,7 @@ void mlx5e_destroy_flow_steering(struct mlx5e_priv *priv) mlx5e_destroy_vlan_table(priv); mlx5e_destroy_l2_table(priv); mlx5e_destroy_ttc_table(priv); + mlx5e_destroy_inner_ttc_table(priv); mlx5e_arfs_destroy_tables(priv); mlx5e_ethtool_cleanup_steering(priv); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index fdc2b92f020b..111c7523d448 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -2349,9 +2349,10 @@ static void mlx5e_build_tir_ctx_lro(struct mlx5e_params *params, void *tirc) void mlx5e_build_indir_tir_ctx_hash(struct mlx5e_params *params, enum mlx5e_traffic_types tt, - void *tirc) + void *tirc, bool inner) { - void *hfso = MLX5_ADDR_OF(tirc, tirc, rx_hash_field_selector_outer); + void *hfso = inner ? MLX5_ADDR_OF(tirc, tirc, rx_hash_field_selector_inner) : + MLX5_ADDR_OF(tirc, tirc, rx_hash_field_selector_outer); #define MLX5_HASH_IP (MLX5_HASH_FIELD_SEL_SRC_IP |\ MLX5_HASH_FIELD_SEL_DST_IP) @@ -2500,6 +2501,21 @@ free_in: return err; } +static void mlx5e_build_inner_indir_tir_ctx(struct mlx5e_priv *priv, + enum mlx5e_traffic_types tt, + u32 *tirc) +{ + MLX5_SET(tirc, tirc, transport_domain, priv->mdev->mlx5e_res.td.tdn); + + mlx5e_build_tir_ctx_lro(&priv->channels.params, tirc); + + MLX5_SET(tirc, tirc, disp_type, MLX5_TIRC_DISP_TYPE_INDIRECT); + MLX5_SET(tirc, tirc, indirect_table, priv->indir_rqt.rqtn); + MLX5_SET(tirc, tirc, tunneled_offload_en, 0x1); + + mlx5e_build_indir_tir_ctx_hash(&priv->channels.params, tt, tirc, true); +} + static int mlx5e_set_mtu(struct mlx5e_priv *priv, u16 mtu) { struct mlx5_core_dev *mdev = priv->mdev; @@ -2865,7 +2881,7 @@ static void mlx5e_build_indir_tir_ctx(struct mlx5e_priv *priv, MLX5_SET(tirc, tirc, disp_type, MLX5_TIRC_DISP_TYPE_INDIRECT); MLX5_SET(tirc, tirc, indirect_table, priv->indir_rqt.rqtn); - mlx5e_build_indir_tir_ctx_hash(&priv->channels.params, tt, tirc); + mlx5e_build_indir_tir_ctx_hash(&priv->channels.params, tt, tirc, false); } static void mlx5e_build_direct_tir_ctx(struct mlx5e_priv *priv, u32 rqtn, u32 *tirc) @@ -2884,6 +2900,7 @@ int mlx5e_create_indirect_tirs(struct mlx5e_priv *priv) struct mlx5e_tir *tir; void *tirc; int inlen; + int i = 0; int err; u32 *in; int tt; @@ -2899,16 +2916,36 @@ int mlx5e_create_indirect_tirs(struct mlx5e_priv *priv) tirc = MLX5_ADDR_OF(create_tir_in, in, ctx); mlx5e_build_indir_tir_ctx(priv, tt, tirc); err = mlx5e_create_tir(priv->mdev, tir, in, inlen); - if (err) - goto err_destroy_tirs; + if (err) { + mlx5_core_warn(priv->mdev, "create indirect tirs failed, %d\n", err); + goto err_destroy_inner_tirs; + } } + if (!mlx5e_tunnel_inner_ft_supported(priv->mdev)) + goto out; + + for (i = 0; i < MLX5E_NUM_INDIR_TIRS; i++) { + memset(in, 0, inlen); + tir = &priv->inner_indir_tir[i]; + tirc = MLX5_ADDR_OF(create_tir_in, in, ctx); + mlx5e_build_inner_indir_tir_ctx(priv, i, tirc); + err = mlx5e_create_tir(priv->mdev, tir, in, inlen); + if (err) { + mlx5_core_warn(priv->mdev, "create inner indirect tirs failed, %d\n", err); + goto err_destroy_inner_tirs; + } + } + +out: kvfree(in); return 0; -err_destroy_tirs: - mlx5_core_warn(priv->mdev, "create indirect tirs failed, %d\n", err); +err_destroy_inner_tirs: + for (i--; i >= 0; i--) + mlx5e_destroy_tir(priv->mdev, &priv->inner_indir_tir[i]); + for (tt--; tt >= 0; tt--) mlx5e_destroy_tir(priv->mdev, &priv->indir_tir[tt]); @@ -2962,6 +2999,12 @@ void mlx5e_destroy_indirect_tirs(struct mlx5e_priv *priv) for (i = 0; i < MLX5E_NUM_INDIR_TIRS; i++) mlx5e_destroy_tir(priv->mdev, &priv->indir_tir[i]); + + if (!mlx5e_tunnel_inner_ft_supported(priv->mdev)) + return; + + for (i = 0; i < MLX5E_NUM_INDIR_TIRS; i++) + mlx5e_destroy_tir(priv->mdev, &priv->inner_indir_tir[i]); } void mlx5e_destroy_direct_tirs(struct mlx5e_priv *priv) @@ -3499,13 +3542,13 @@ static void mlx5e_del_vxlan_port(struct net_device *netdev, mlx5e_vxlan_queue_work(priv, ti->sa_family, be16_to_cpu(ti->port), 0); } -static netdev_features_t mlx5e_vxlan_features_check(struct mlx5e_priv *priv, - struct sk_buff *skb, - netdev_features_t features) +static netdev_features_t mlx5e_tunnel_features_check(struct mlx5e_priv *priv, + struct sk_buff *skb, + netdev_features_t features) { struct udphdr *udph; - u16 proto; - u16 port = 0; + u8 proto; + u16 port; switch (vlan_get_protocol(skb)) { case htons(ETH_P_IP): @@ -3518,14 +3561,17 @@ static netdev_features_t mlx5e_vxlan_features_check(struct mlx5e_priv *priv, goto out; } - if (proto == IPPROTO_UDP) { + switch (proto) { + case IPPROTO_GRE: + return features; + case IPPROTO_UDP: udph = udp_hdr(skb); port = be16_to_cpu(udph->dest); - } - /* Verify if UDP port is being offloaded by HW */ - if (port && mlx5e_vxlan_lookup_port(priv, port)) - return features; + /* Verify if UDP port is being offloaded by HW */ + if (mlx5e_vxlan_lookup_port(priv, port)) + return features; + } out: /* Disable CSUM and GSO if the udp dport is not offloaded by HW */ @@ -3549,7 +3595,7 @@ static netdev_features_t mlx5e_features_check(struct sk_buff *skb, /* Validate if the tunneled packet is being offloaded by HW */ if (skb->encapsulation && (features & NETIF_F_CSUM_MASK || features & NETIF_F_GSO_MASK)) - return mlx5e_vxlan_features_check(priv, skb, features); + return mlx5e_tunnel_features_check(priv, skb, features); return features; } @@ -4014,20 +4060,32 @@ static void mlx5e_build_nic_netdev(struct net_device *netdev) netdev->hw_features |= NETIF_F_HW_VLAN_CTAG_RX; netdev->hw_features |= NETIF_F_HW_VLAN_CTAG_FILTER; - if (mlx5e_vxlan_allowed(mdev)) { - netdev->hw_features |= NETIF_F_GSO_UDP_TUNNEL | - NETIF_F_GSO_UDP_TUNNEL_CSUM | - NETIF_F_GSO_PARTIAL; + if (mlx5e_vxlan_allowed(mdev) || MLX5_CAP_ETH(mdev, tunnel_stateless_gre)) { + netdev->hw_features |= NETIF_F_GSO_PARTIAL; netdev->hw_enc_features |= NETIF_F_IP_CSUM; netdev->hw_enc_features |= NETIF_F_IPV6_CSUM; netdev->hw_enc_features |= NETIF_F_TSO; netdev->hw_enc_features |= NETIF_F_TSO6; - netdev->hw_enc_features |= NETIF_F_GSO_UDP_TUNNEL; - netdev->hw_enc_features |= NETIF_F_GSO_UDP_TUNNEL_CSUM | - NETIF_F_GSO_PARTIAL; + netdev->hw_enc_features |= NETIF_F_GSO_PARTIAL; + } + + if (mlx5e_vxlan_allowed(mdev)) { + netdev->hw_features |= NETIF_F_GSO_UDP_TUNNEL | + NETIF_F_GSO_UDP_TUNNEL_CSUM; + netdev->hw_enc_features |= NETIF_F_GSO_UDP_TUNNEL | + NETIF_F_GSO_UDP_TUNNEL_CSUM; netdev->gso_partial_features = NETIF_F_GSO_UDP_TUNNEL_CSUM; } + if (MLX5_CAP_ETH(mdev, tunnel_stateless_gre)) { + netdev->hw_features |= NETIF_F_GSO_GRE | + NETIF_F_GSO_GRE_CSUM; + netdev->hw_enc_features |= NETIF_F_GSO_GRE | + NETIF_F_GSO_GRE_CSUM; + netdev->gso_partial_features |= NETIF_F_GSO_GRE | + NETIF_F_GSO_GRE_CSUM; + } + mlx5_query_port_fcs(mdev, &fcs_supported, &fcs_enabled); if (fcs_supported) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c index d731d57a996a..5a7bea688ec8 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c @@ -83,8 +83,8 @@ #define ETHTOOL_PRIO_NUM_LEVELS 1 #define ETHTOOL_NUM_PRIOS 11 #define ETHTOOL_MIN_LEVEL (KERNEL_MIN_LEVEL + ETHTOOL_NUM_PRIOS) -/* Vlan, mac, ttc, aRFS */ -#define KERNEL_NIC_PRIO_NUM_LEVELS 4 +/* Vlan, mac, ttc, inner ttc, aRFS */ +#define KERNEL_NIC_PRIO_NUM_LEVELS 5 #define KERNEL_NIC_NUM_PRIOS 1 /* One more level for tc */ #define KERNEL_MIN_LEVEL (KERNEL_NIC_PRIO_NUM_LEVELS + 1) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_dpipe.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_dpipe.c index 3ea13148ed05..51e6846da72b 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_dpipe.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_dpipe.c @@ -76,6 +76,7 @@ static struct devlink_dpipe_header *mlxsw_dpipe_headers[] = { &mlxsw_sp_dpipe_header_metadata, &devlink_dpipe_header_ethernet, &devlink_dpipe_header_ipv4, + &devlink_dpipe_header_ipv6, }; static struct devlink_dpipe_headers mlxsw_sp_dpipe_headers = { @@ -328,9 +329,21 @@ static int mlxsw_sp_dpipe_table_host_matches_dump(struct sk_buff *skb, int type) if (err) return err; - match.type = DEVLINK_DPIPE_MATCH_TYPE_FIELD_EXACT; - match.header = &devlink_dpipe_header_ipv4; - match.field_id = DEVLINK_DPIPE_FIELD_IPV4_DST_IP; + switch (type) { + case AF_INET: + match.type = DEVLINK_DPIPE_MATCH_TYPE_FIELD_EXACT; + match.header = &devlink_dpipe_header_ipv4; + match.field_id = DEVLINK_DPIPE_FIELD_IPV4_DST_IP; + break; + case AF_INET6: + match.type = DEVLINK_DPIPE_MATCH_TYPE_FIELD_EXACT; + match.header = &devlink_dpipe_header_ipv6; + match.field_id = DEVLINK_DPIPE_FIELD_IPV6_DST_IP; + break; + default: + WARN_ON(1); + return -EINVAL; + } return devlink_dpipe_match_put(skb, &match); } @@ -342,7 +355,7 @@ mlxsw_sp_dpipe_table_host4_matches_dump(void *priv, struct sk_buff *skb) } static int -mlxsw_sp_dpipe_table_host4_actions_dump(void *priv, struct sk_buff *skb) +mlxsw_sp_dpipe_table_host_actions_dump(void *priv, struct sk_buff *skb) { struct devlink_dpipe_action action = {0}; @@ -373,8 +386,19 @@ mlxsw_sp_dpipe_table_host_match_action_prepare(struct devlink_dpipe_match *match match = &matches[MLXSW_SP_DPIPE_TABLE_HOST_MATCH_DIP]; match->type = DEVLINK_DPIPE_MATCH_TYPE_FIELD_EXACT; - match->header = &devlink_dpipe_header_ipv4; - match->field_id = DEVLINK_DPIPE_FIELD_IPV4_DST_IP; + switch (type) { + case AF_INET: + match->header = &devlink_dpipe_header_ipv4; + match->field_id = DEVLINK_DPIPE_FIELD_IPV4_DST_IP; + break; + case AF_INET6: + match->header = &devlink_dpipe_header_ipv6; + match->field_id = DEVLINK_DPIPE_FIELD_IPV6_DST_IP; + break; + default: + WARN_ON(1); + return; + } action->type = DEVLINK_DPIPE_ACTION_TYPE_FIELD_MODIFY; action->header = &devlink_dpipe_header_ethernet; @@ -411,7 +435,18 @@ mlxsw_sp_dpipe_table_host_entry_prepare(struct devlink_dpipe_entry *entry, match_value = &match_values[MLXSW_SP_DPIPE_TABLE_HOST_MATCH_DIP]; match_value->match = match; - match_value->value_size = sizeof(u32); + switch (type) { + case AF_INET: + match_value->value_size = sizeof(u32); + break; + case AF_INET6: + match_value->value_size = sizeof(struct in6_addr); + break; + default: + WARN_ON(1); + return -EINVAL; + } + match_value->value = kmalloc(match_value->value_size, GFP_KERNEL); if (!match_value->value) return -ENOMEM; @@ -426,13 +461,12 @@ mlxsw_sp_dpipe_table_host_entry_prepare(struct devlink_dpipe_entry *entry, } static void -__mlxsw_sp_dpipe_table_host4_entry_fill(struct devlink_dpipe_entry *entry, - struct mlxsw_sp_rif *rif, - unsigned char *ha, u32 dip) +__mlxsw_sp_dpipe_table_host_entry_fill(struct devlink_dpipe_entry *entry, + struct mlxsw_sp_rif *rif, + unsigned char *ha, void *dip) { struct devlink_dpipe_value *value; u32 *rif_value; - u32 *dip_value; u8 *ha_value; /* Set Match RIF index */ @@ -445,9 +479,7 @@ __mlxsw_sp_dpipe_table_host4_entry_fill(struct devlink_dpipe_entry *entry, /* Set Match DIP */ value = &entry->match_values[MLXSW_SP_DPIPE_TABLE_HOST_MATCH_DIP]; - - dip_value = value->value; - *dip_value = dip; + memcpy(value->value, dip, value->value_size); /* Set Action DMAC */ value = entry->action_values; @@ -465,7 +497,21 @@ mlxsw_sp_dpipe_table_host4_entry_fill(struct devlink_dpipe_entry *entry, ha = mlxsw_sp_neigh_entry_ha(neigh_entry); dip = mlxsw_sp_neigh4_entry_dip(neigh_entry); - __mlxsw_sp_dpipe_table_host4_entry_fill(entry, rif, ha, dip); + __mlxsw_sp_dpipe_table_host_entry_fill(entry, rif, ha, &dip); +} + +static void +mlxsw_sp_dpipe_table_host6_entry_fill(struct devlink_dpipe_entry *entry, + struct mlxsw_sp_neigh_entry *neigh_entry, + struct mlxsw_sp_rif *rif) +{ + struct in6_addr *dip; + unsigned char *ha; + + ha = mlxsw_sp_neigh_entry_ha(neigh_entry); + dip = mlxsw_sp_neigh6_entry_dip(neigh_entry); + + __mlxsw_sp_dpipe_table_host_entry_fill(entry, rif, ha, dip); } static void @@ -477,7 +523,18 @@ mlxsw_sp_dpipe_table_host_entry_fill(struct mlxsw_sp *mlxsw_sp, { int err; - mlxsw_sp_dpipe_table_host4_entry_fill(entry, neigh_entry, rif); + switch (type) { + case AF_INET: + mlxsw_sp_dpipe_table_host4_entry_fill(entry, neigh_entry, rif); + break; + case AF_INET6: + mlxsw_sp_dpipe_table_host6_entry_fill(entry, neigh_entry, rif); + break; + default: + WARN_ON(1); + return; + } + err = mlxsw_sp_neigh_counter_get(mlxsw_sp, neigh_entry, &entry->counter); if (!err) @@ -516,7 +573,13 @@ start_again: rif_neigh_count = 0; mlxsw_sp_rif_neigh_for_each(neigh_entry, rif) { - if (mlxsw_sp_neigh_entry_type(neigh_entry) != type) + int neigh_type = mlxsw_sp_neigh_entry_type(neigh_entry); + + if (neigh_type != type) + continue; + + if (neigh_type == AF_INET6 && + mlxsw_sp_neigh_ipv6_ignore(neigh_entry)) continue; if (rif_neigh_count < rif_neigh_skip) @@ -616,8 +679,15 @@ mlxsw_sp_dpipe_table_host_counters_update(struct mlxsw_sp *mlxsw_sp, if (!rif) continue; mlxsw_sp_rif_neigh_for_each(neigh_entry, rif) { - if (mlxsw_sp_neigh_entry_type(neigh_entry) != type) + int neigh_type = mlxsw_sp_neigh_entry_type(neigh_entry); + + if (neigh_type != type) continue; + + if (neigh_type == AF_INET6 && + mlxsw_sp_neigh_ipv6_ignore(neigh_entry)) + continue; + mlxsw_sp_neigh_entry_counter_update(mlxsw_sp, neigh_entry, enable); @@ -648,8 +718,15 @@ mlxsw_sp_dpipe_table_host_size_get(struct mlxsw_sp *mlxsw_sp, int type) if (!rif) continue; mlxsw_sp_rif_neigh_for_each(neigh_entry, rif) { - if (mlxsw_sp_neigh_entry_type(neigh_entry) != type) + int neigh_type = mlxsw_sp_neigh_entry_type(neigh_entry); + + if (neigh_type != type) continue; + + if (neigh_type == AF_INET6 && + mlxsw_sp_neigh_ipv6_ignore(neigh_entry)) + continue; + size++; } } @@ -667,7 +744,7 @@ static u64 mlxsw_sp_dpipe_table_host4_size_get(void *priv) static struct devlink_dpipe_table_ops mlxsw_sp_host4_ops = { .matches_dump = mlxsw_sp_dpipe_table_host4_matches_dump, - .actions_dump = mlxsw_sp_dpipe_table_host4_actions_dump, + .actions_dump = mlxsw_sp_dpipe_table_host_actions_dump, .entries_dump = mlxsw_sp_dpipe_table_host4_entries_dump, .counters_set_update = mlxsw_sp_dpipe_table_host4_counters_update, .size_get = mlxsw_sp_dpipe_table_host4_size_get, @@ -691,6 +768,64 @@ static void mlxsw_sp_dpipe_host4_table_fini(struct mlxsw_sp *mlxsw_sp) MLXSW_SP_DPIPE_TABLE_NAME_HOST4); } +static int +mlxsw_sp_dpipe_table_host6_matches_dump(void *priv, struct sk_buff *skb) +{ + return mlxsw_sp_dpipe_table_host_matches_dump(skb, AF_INET6); +} + +static int +mlxsw_sp_dpipe_table_host6_entries_dump(void *priv, bool counters_enabled, + struct devlink_dpipe_dump_ctx *dump_ctx) +{ + struct mlxsw_sp *mlxsw_sp = priv; + + return mlxsw_sp_dpipe_table_host_entries_dump(mlxsw_sp, + counters_enabled, + dump_ctx, AF_INET6); +} + +static int mlxsw_sp_dpipe_table_host6_counters_update(void *priv, bool enable) +{ + struct mlxsw_sp *mlxsw_sp = priv; + + mlxsw_sp_dpipe_table_host_counters_update(mlxsw_sp, enable, AF_INET6); + return 0; +} + +static u64 mlxsw_sp_dpipe_table_host6_size_get(void *priv) +{ + struct mlxsw_sp *mlxsw_sp = priv; + + return mlxsw_sp_dpipe_table_host_size_get(mlxsw_sp, AF_INET6); +} + +static struct devlink_dpipe_table_ops mlxsw_sp_host6_ops = { + .matches_dump = mlxsw_sp_dpipe_table_host6_matches_dump, + .actions_dump = mlxsw_sp_dpipe_table_host_actions_dump, + .entries_dump = mlxsw_sp_dpipe_table_host6_entries_dump, + .counters_set_update = mlxsw_sp_dpipe_table_host6_counters_update, + .size_get = mlxsw_sp_dpipe_table_host6_size_get, +}; + +static int mlxsw_sp_dpipe_host6_table_init(struct mlxsw_sp *mlxsw_sp) +{ + struct devlink *devlink = priv_to_devlink(mlxsw_sp->core); + + return devlink_dpipe_table_register(devlink, + MLXSW_SP_DPIPE_TABLE_NAME_HOST6, + &mlxsw_sp_host6_ops, + mlxsw_sp, false); +} + +static void mlxsw_sp_dpipe_host6_table_fini(struct mlxsw_sp *mlxsw_sp) +{ + struct devlink *devlink = priv_to_devlink(mlxsw_sp->core); + + devlink_dpipe_table_unregister(devlink, + MLXSW_SP_DPIPE_TABLE_NAME_HOST6); +} + int mlxsw_sp_dpipe_init(struct mlxsw_sp *mlxsw_sp) { struct devlink *devlink = priv_to_devlink(mlxsw_sp->core); @@ -707,8 +842,14 @@ int mlxsw_sp_dpipe_init(struct mlxsw_sp *mlxsw_sp) err = mlxsw_sp_dpipe_host4_table_init(mlxsw_sp); if (err) goto err_host4_table_init; + + err = mlxsw_sp_dpipe_host6_table_init(mlxsw_sp); + if (err) + goto err_host6_table_init; return 0; +err_host6_table_init: + mlxsw_sp_dpipe_host4_table_fini(mlxsw_sp); err_host4_table_init: mlxsw_sp_dpipe_erif_table_fini(mlxsw_sp); err_erif_table_init: @@ -720,6 +861,7 @@ void mlxsw_sp_dpipe_fini(struct mlxsw_sp *mlxsw_sp) { struct devlink *devlink = priv_to_devlink(mlxsw_sp->core); + mlxsw_sp_dpipe_host6_table_fini(mlxsw_sp); mlxsw_sp_dpipe_host4_table_fini(mlxsw_sp); mlxsw_sp_dpipe_erif_table_fini(mlxsw_sp); devlink_dpipe_headers_unregister(devlink); diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_dpipe.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum_dpipe.h index c56a3d92a2bb..283fde4e6783 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_dpipe.h +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_dpipe.h @@ -55,5 +55,6 @@ static inline void mlxsw_sp_dpipe_fini(struct mlxsw_sp *mlxsw_sp) #define MLXSW_SP_DPIPE_TABLE_NAME_ERIF "mlxsw_erif" #define MLXSW_SP_DPIPE_TABLE_NAME_HOST4 "mlxsw_host4" +#define MLXSW_SP_DPIPE_TABLE_NAME_HOST6 "mlxsw_host6" #endif /* _MLXSW_PIPELINE_H_*/ diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c index 377d85cff7fa..de15eac50866 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c @@ -947,6 +947,15 @@ u32 mlxsw_sp_neigh4_entry_dip(struct mlxsw_sp_neigh_entry *neigh_entry) return ntohl(*((__be32 *) n->primary_key)); } +struct in6_addr * +mlxsw_sp_neigh6_entry_dip(struct mlxsw_sp_neigh_entry *neigh_entry) +{ + struct neighbour *n; + + n = neigh_entry->key.n; + return (struct in6_addr *) &n->primary_key; +} + int mlxsw_sp_neigh_counter_get(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_neigh_entry *neigh_entry, u64 *p_counter) @@ -999,21 +1008,33 @@ mlxsw_sp_neigh_entry_remove(struct mlxsw_sp *mlxsw_sp, } static bool -mlxsw_sp_neigh4_counter_should_alloc(struct mlxsw_sp *mlxsw_sp) +mlxsw_sp_neigh_counter_should_alloc(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_neigh_entry *neigh_entry) { struct devlink *devlink; + const char *table_name; + + switch (mlxsw_sp_neigh_entry_type(neigh_entry)) { + case AF_INET: + table_name = MLXSW_SP_DPIPE_TABLE_NAME_HOST4; + break; + case AF_INET6: + table_name = MLXSW_SP_DPIPE_TABLE_NAME_HOST6; + break; + default: + WARN_ON(1); + return false; + } devlink = priv_to_devlink(mlxsw_sp->core); - return devlink_dpipe_table_counter_enabled(devlink, - MLXSW_SP_DPIPE_TABLE_NAME_HOST4); + return devlink_dpipe_table_counter_enabled(devlink, table_name); } static void mlxsw_sp_neigh_counter_alloc(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_neigh_entry *neigh_entry) { - if (mlxsw_sp_neigh_entry_type(neigh_entry) != AF_INET || - !mlxsw_sp_neigh4_counter_should_alloc(mlxsw_sp)) + if (!mlxsw_sp_neigh_counter_should_alloc(mlxsw_sp, neigh_entry)) return; if (mlxsw_sp_flow_counter_alloc(mlxsw_sp, &neigh_entry->counter_index)) @@ -1396,8 +1417,10 @@ mlxsw_sp_router_neigh_entry_op6(struct mlxsw_sp *mlxsw_sp, mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rauht), rauht_pl); } -static bool mlxsw_sp_neigh_ipv6_ignore(struct neighbour *n) +bool mlxsw_sp_neigh_ipv6_ignore(struct mlxsw_sp_neigh_entry *neigh_entry) { + struct neighbour *n = neigh_entry->key.n; + /* Packets with a link-local destination address are trapped * after LPM lookup and never reach the neighbour table, so * there is no need to program such neighbours to the device. @@ -1420,7 +1443,7 @@ mlxsw_sp_neigh_entry_update(struct mlxsw_sp *mlxsw_sp, mlxsw_sp_router_neigh_entry_op4(mlxsw_sp, neigh_entry, mlxsw_sp_rauht_op(adding)); } else if (neigh_entry->key.n->tbl->family == AF_INET6) { - if (mlxsw_sp_neigh_ipv6_ignore(neigh_entry->key.n)) + if (mlxsw_sp_neigh_ipv6_ignore(neigh_entry)) return; mlxsw_sp_router_neigh_entry_op6(mlxsw_sp, neigh_entry, mlxsw_sp_rauht_op(adding)); @@ -3636,7 +3659,7 @@ static void mlxsw_sp_fib6_entry_type_set(struct mlxsw_sp_fib_entry *fib_entry, * local, which will cause them to be trapped with a lower * priority than packets that need to be locally received. */ - if (rt->rt6i_flags & RTF_LOCAL) + if (rt->rt6i_flags & (RTF_LOCAL | RTF_ANYCAST)) fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_TRAP; else if (rt->rt6i_flags & RTF_REJECT) fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_LOCAL; @@ -3970,9 +3993,6 @@ static int __mlxsw_sp_router_set_abort_trap(struct mlxsw_sp *mlxsw_sp, char raltb_pl[MLXSW_REG_RALTB_LEN]; char ralue_pl[MLXSW_REG_RALUE_LEN]; - if (!mlxsw_sp_vr_is_used(vr)) - continue; - mlxsw_reg_raltb_pack(raltb_pl, vr->id, proto, tree_id); err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(raltb), raltb_pl); diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.h index fb0f971c670c..87a04afee138 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.h +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.h @@ -65,6 +65,8 @@ int mlxsw_sp_neigh_entry_type(struct mlxsw_sp_neigh_entry *neigh_entry); unsigned char * mlxsw_sp_neigh_entry_ha(struct mlxsw_sp_neigh_entry *neigh_entry); u32 mlxsw_sp_neigh4_entry_dip(struct mlxsw_sp_neigh_entry *neigh_entry); +struct in6_addr * +mlxsw_sp_neigh6_entry_dip(struct mlxsw_sp_neigh_entry *neigh_entry); #define mlxsw_sp_rif_neigh_for_each(neigh_entry, rif) \ for (neigh_entry = mlxsw_sp_rif_neigh_next(rif, NULL); neigh_entry; \ @@ -76,5 +78,6 @@ void mlxsw_sp_neigh_entry_counter_update(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_neigh_entry *neigh_entry, bool adding); +bool mlxsw_sp_neigh_ipv6_ignore(struct mlxsw_sp_neigh_entry *neigh_entry); #endif /* _MLXSW_ROUTER_H_*/ diff --git a/drivers/net/ethernet/qlogic/qed/qed_l2.c b/drivers/net/ethernet/qlogic/qed/qed_l2.c index 9a1645852015..085338990f49 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_l2.c +++ b/drivers/net/ethernet/qlogic/qed/qed_l2.c @@ -2805,7 +2805,7 @@ static int qed_get_coalesce(struct qed_dev *cdev, u16 *coal, void *handle) p_hwfn = p_cid->p_owner; rc = qed_get_queue_coalesce(p_hwfn, coal, handle); if (rc) - DP_NOTICE(p_hwfn, "Unable to read queue calescing\n"); + DP_NOTICE(p_hwfn, "Unable to read queue coalescing\n"); return rc; } diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_init.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_init.c index be41e4c77b65..c48a0e2d4d7e 100644 --- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_init.c +++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_init.c @@ -592,13 +592,9 @@ qlcnic_receive_peg_ready(struct qlcnic_adapter *adapter) } while (--retries); - if (!retries) { - dev_err(&adapter->pdev->dev, "Receive Peg initialization not " - "complete, state: 0x%x.\n", val); - return -EIO; - } - - return 0; + dev_err(&adapter->pdev->dev, "Receive Peg initialization not complete, state: 0x%x.\n", + val); + return -EIO; } int diff --git a/drivers/net/ethernet/qualcomm/Kconfig b/drivers/net/ethernet/qualcomm/Kconfig index 877675a27b9f..f5200712718d 100644 --- a/drivers/net/ethernet/qualcomm/Kconfig +++ b/drivers/net/ethernet/qualcomm/Kconfig @@ -59,4 +59,6 @@ config QCOM_EMAC low power, Receive-Side Scaling (RSS), and IEEE 1588-2008 Precision Clock Synchronization Protocol. +source "drivers/net/ethernet/qualcomm/rmnet/Kconfig" + endif # NET_VENDOR_QUALCOMM diff --git a/drivers/net/ethernet/qualcomm/Makefile b/drivers/net/ethernet/qualcomm/Makefile index 92fa7c4da90a..1847350f48a7 100644 --- a/drivers/net/ethernet/qualcomm/Makefile +++ b/drivers/net/ethernet/qualcomm/Makefile @@ -9,3 +9,5 @@ obj-$(CONFIG_QCA7000_UART) += qcauart.o qcauart-objs := qca_uart.o obj-y += emac/ + +obj-$(CONFIG_RMNET) += rmnet/ diff --git a/drivers/net/ethernet/qualcomm/rmnet/Kconfig b/drivers/net/ethernet/qualcomm/rmnet/Kconfig new file mode 100644 index 000000000000..6e2587af47a4 --- /dev/null +++ b/drivers/net/ethernet/qualcomm/rmnet/Kconfig @@ -0,0 +1,12 @@ +# +# RMNET MAP driver +# + +menuconfig RMNET + tristate "RmNet MAP driver" + default n + ---help--- + If you select this, you will enable the RMNET module which is used + for handling data in the multiplexing and aggregation protocol (MAP) + format in the embedded data path. RMNET devices can be attached to + any IP mode physical device. diff --git a/drivers/net/ethernet/qualcomm/rmnet/Makefile b/drivers/net/ethernet/qualcomm/rmnet/Makefile new file mode 100644 index 000000000000..01bddf207cac --- /dev/null +++ b/drivers/net/ethernet/qualcomm/rmnet/Makefile @@ -0,0 +1,10 @@ +# +# Makefile for the RMNET module +# + +rmnet-y := rmnet_config.o +rmnet-y += rmnet_vnd.o +rmnet-y += rmnet_handlers.o +rmnet-y += rmnet_map_data.o +rmnet-y += rmnet_map_command.o +obj-$(CONFIG_RMNET) += rmnet.o diff --git a/drivers/net/ethernet/qualcomm/rmnet/rmnet_config.c b/drivers/net/ethernet/qualcomm/rmnet/rmnet_config.c new file mode 100644 index 000000000000..e836d267d5cc --- /dev/null +++ b/drivers/net/ethernet/qualcomm/rmnet/rmnet_config.c @@ -0,0 +1,419 @@ +/* Copyright (c) 2013-2017, The Linux Foundation. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 and + * only version 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * RMNET configuration engine + * + */ + +#include <net/sock.h> +#include <linux/module.h> +#include <linux/netlink.h> +#include <linux/netdevice.h> +#include "rmnet_config.h" +#include "rmnet_handlers.h" +#include "rmnet_vnd.h" +#include "rmnet_private.h" + +/* Locking scheme - + * The shared resource which needs to be protected is realdev->rx_handler_data. + * For the writer path, this is using rtnl_lock(). The writer paths are + * rmnet_newlink(), rmnet_dellink() and rmnet_force_unassociate_device(). These + * paths are already called with rtnl_lock() acquired in. There is also an + * ASSERT_RTNL() to ensure that we are calling with rtnl acquired. For + * dereference here, we will need to use rtnl_dereference(). Dev list writing + * needs to happen with rtnl_lock() acquired for netdev_master_upper_dev_link(). + * For the reader path, the real_dev->rx_handler_data is called in the TX / RX + * path. We only need rcu_read_lock() for these scenarios. In these cases, + * the rcu_read_lock() is held in __dev_queue_xmit() and + * netif_receive_skb_internal(), so readers need to use rcu_dereference_rtnl() + * to get the relevant information. For dev list reading, we again acquire + * rcu_read_lock() in rmnet_dellink() for netdev_master_upper_dev_get_rcu(). + * We also use unregister_netdevice_many() to free all rmnet devices in + * rmnet_force_unassociate_device() so we dont lose the rtnl_lock() and free in + * same context. + */ + +/* Local Definitions and Declarations */ +#define RMNET_LOCAL_LOGICAL_ENDPOINT -1 + +struct rmnet_walk_data { + struct net_device *real_dev; + struct list_head *head; + struct rmnet_real_dev_info *real_dev_info; +}; + +static int rmnet_is_real_dev_registered(const struct net_device *real_dev) +{ + rx_handler_func_t *rx_handler; + + rx_handler = rcu_dereference(real_dev->rx_handler); + return (rx_handler == rmnet_rx_handler); +} + +/* Needs either rcu_read_lock() or rtnl lock */ +static struct rmnet_real_dev_info* +__rmnet_get_real_dev_info(const struct net_device *real_dev) +{ + if (rmnet_is_real_dev_registered(real_dev)) + return rcu_dereference_rtnl(real_dev->rx_handler_data); + else + return NULL; +} + +/* Needs rtnl lock */ +static struct rmnet_real_dev_info* +rmnet_get_real_dev_info_rtnl(const struct net_device *real_dev) +{ + return rtnl_dereference(real_dev->rx_handler_data); +} + +static struct rmnet_endpoint* +rmnet_get_endpoint(struct net_device *dev, int config_id) +{ + struct rmnet_real_dev_info *r; + struct rmnet_endpoint *ep; + + if (!rmnet_is_real_dev_registered(dev)) { + ep = rmnet_vnd_get_endpoint(dev); + } else { + r = __rmnet_get_real_dev_info(dev); + + if (!r) + return NULL; + + if (config_id == RMNET_LOCAL_LOGICAL_ENDPOINT) + ep = &r->local_ep; + else + ep = &r->muxed_ep[config_id]; + } + + return ep; +} + +static int rmnet_unregister_real_device(struct net_device *real_dev, + struct rmnet_real_dev_info *r) +{ + if (r->nr_rmnet_devs) + return -EINVAL; + + kfree(r); + + netdev_rx_handler_unregister(real_dev); + + /* release reference on real_dev */ + dev_put(real_dev); + + netdev_dbg(real_dev, "Removed from rmnet\n"); + return 0; +} + +static int rmnet_register_real_device(struct net_device *real_dev) +{ + struct rmnet_real_dev_info *r; + int rc; + + ASSERT_RTNL(); + + if (rmnet_is_real_dev_registered(real_dev)) + return 0; + + r = kzalloc(sizeof(*r), GFP_ATOMIC); + if (!r) + return -ENOMEM; + + r->dev = real_dev; + rc = netdev_rx_handler_register(real_dev, rmnet_rx_handler, r); + if (rc) { + kfree(r); + return -EBUSY; + } + + /* hold on to real dev for MAP data */ + dev_hold(real_dev); + + netdev_dbg(real_dev, "registered with rmnet\n"); + return 0; +} + +static int rmnet_set_ingress_data_format(struct net_device *dev, u32 idf) +{ + struct rmnet_real_dev_info *r; + + netdev_dbg(dev, "Ingress format 0x%08X\n", idf); + + r = __rmnet_get_real_dev_info(dev); + + r->ingress_data_format = idf; + + return 0; +} + +static int rmnet_set_egress_data_format(struct net_device *dev, u32 edf, + u16 agg_size, u16 agg_count) +{ + struct rmnet_real_dev_info *r; + + netdev_dbg(dev, "Egress format 0x%08X agg size %d cnt %d\n", + edf, agg_size, agg_count); + + r = __rmnet_get_real_dev_info(dev); + + r->egress_data_format = edf; + + return 0; +} + +static int __rmnet_set_endpoint_config(struct net_device *dev, int config_id, + struct rmnet_endpoint *ep) +{ + struct rmnet_endpoint *dev_ep; + + dev_ep = rmnet_get_endpoint(dev, config_id); + + if (!dev_ep) + return -EINVAL; + + memcpy(dev_ep, ep, sizeof(struct rmnet_endpoint)); + if (config_id == RMNET_LOCAL_LOGICAL_ENDPOINT) + dev_ep->mux_id = 0; + else + dev_ep->mux_id = config_id; + + return 0; +} + +static int rmnet_set_endpoint_config(struct net_device *dev, + int config_id, u8 rmnet_mode, + struct net_device *egress_dev) +{ + struct rmnet_endpoint ep; + + netdev_dbg(dev, "id %d mode %d dev %s\n", + config_id, rmnet_mode, egress_dev->name); + + if (config_id < RMNET_LOCAL_LOGICAL_ENDPOINT || + config_id >= RMNET_MAX_LOGICAL_EP) + return -EINVAL; + + /* This config is cleared on every set, so its ok to not + * clear it on a device delete. + */ + memset(&ep, 0, sizeof(struct rmnet_endpoint)); + ep.rmnet_mode = rmnet_mode; + ep.egress_dev = egress_dev; + + return __rmnet_set_endpoint_config(dev, config_id, &ep); +} + +static int rmnet_newlink(struct net *src_net, struct net_device *dev, + struct nlattr *tb[], struct nlattr *data[], + struct netlink_ext_ack *extack) +{ + int ingress_format = RMNET_INGRESS_FORMAT_DEMUXING | + RMNET_INGRESS_FORMAT_DEAGGREGATION | + RMNET_INGRESS_FORMAT_MAP; + int egress_format = RMNET_EGRESS_FORMAT_MUXING | + RMNET_EGRESS_FORMAT_MAP; + struct rmnet_real_dev_info *r; + struct net_device *real_dev; + int mode = RMNET_EPMODE_VND; + int err = 0; + u16 mux_id; + + real_dev = __dev_get_by_index(src_net, nla_get_u32(tb[IFLA_LINK])); + if (!real_dev || !dev) + return -ENODEV; + + if (!data[IFLA_VLAN_ID]) + return -EINVAL; + + mux_id = nla_get_u16(data[IFLA_VLAN_ID]); + + err = rmnet_register_real_device(real_dev); + if (err) + goto err0; + + r = rmnet_get_real_dev_info_rtnl(real_dev); + err = rmnet_vnd_newlink(mux_id, dev, r); + if (err) + goto err1; + + err = netdev_master_upper_dev_link(dev, real_dev, NULL, NULL); + if (err) + goto err2; + + rmnet_vnd_set_mux(dev, mux_id); + rmnet_set_egress_data_format(real_dev, egress_format, 0, 0); + rmnet_set_ingress_data_format(real_dev, ingress_format); + rmnet_set_endpoint_config(real_dev, mux_id, mode, dev); + rmnet_set_endpoint_config(dev, mux_id, mode, real_dev); + return 0; + +err2: + rmnet_vnd_dellink(mux_id, r); +err1: + rmnet_unregister_real_device(real_dev, r); +err0: + return err; +} + +static void rmnet_dellink(struct net_device *dev, struct list_head *head) +{ + struct rmnet_real_dev_info *r; + struct net_device *real_dev; + u8 mux_id; + + rcu_read_lock(); + real_dev = netdev_master_upper_dev_get_rcu(dev); + rcu_read_unlock(); + + if (!real_dev || !rmnet_is_real_dev_registered(real_dev)) + return; + + r = rmnet_get_real_dev_info_rtnl(real_dev); + + mux_id = rmnet_vnd_get_mux(dev); + rmnet_vnd_dellink(mux_id, r); + netdev_upper_dev_unlink(dev, real_dev); + rmnet_unregister_real_device(real_dev, r); + + unregister_netdevice_queue(dev, head); +} + +static int rmnet_dev_walk_unreg(struct net_device *rmnet_dev, void *data) +{ + struct rmnet_walk_data *d = data; + u8 mux_id; + + mux_id = rmnet_vnd_get_mux(rmnet_dev); + + rmnet_vnd_dellink(mux_id, d->real_dev_info); + netdev_upper_dev_unlink(rmnet_dev, d->real_dev); + unregister_netdevice_queue(rmnet_dev, d->head); + + return 0; +} + +static void rmnet_force_unassociate_device(struct net_device *dev) +{ + struct net_device *real_dev = dev; + struct rmnet_real_dev_info *r; + struct rmnet_walk_data d; + LIST_HEAD(list); + + if (!rmnet_is_real_dev_registered(real_dev)) + return; + + ASSERT_RTNL(); + + d.real_dev = real_dev; + d.head = &list; + + r = rmnet_get_real_dev_info_rtnl(dev); + d.real_dev_info = r; + + rcu_read_lock(); + netdev_walk_all_lower_dev_rcu(real_dev, rmnet_dev_walk_unreg, &d); + rcu_read_unlock(); + unregister_netdevice_many(&list); + + rmnet_unregister_real_device(real_dev, r); +} + +static int rmnet_config_notify_cb(struct notifier_block *nb, + unsigned long event, void *data) +{ + struct net_device *dev = netdev_notifier_info_to_dev(data); + + if (!dev) + return NOTIFY_DONE; + + switch (event) { + case NETDEV_UNREGISTER: + netdev_dbg(dev, "Kernel unregister\n"); + rmnet_force_unassociate_device(dev); + break; + + default: + break; + } + + return NOTIFY_DONE; +} + +static struct notifier_block rmnet_dev_notifier __read_mostly = { + .notifier_call = rmnet_config_notify_cb, +}; + +static int rmnet_rtnl_validate(struct nlattr *tb[], struct nlattr *data[], + struct netlink_ext_ack *extack) +{ + u16 mux_id; + + if (!data || !data[IFLA_VLAN_ID]) + return -EINVAL; + + mux_id = nla_get_u16(data[IFLA_VLAN_ID]); + if (mux_id > (RMNET_MAX_LOGICAL_EP - 1)) + return -ERANGE; + + return 0; +} + +static size_t rmnet_get_size(const struct net_device *dev) +{ + return nla_total_size(2); /* IFLA_VLAN_ID */ +} + +struct rtnl_link_ops rmnet_link_ops __read_mostly = { + .kind = "rmnet", + .maxtype = __IFLA_VLAN_MAX, + .priv_size = sizeof(struct rmnet_priv), + .setup = rmnet_vnd_setup, + .validate = rmnet_rtnl_validate, + .newlink = rmnet_newlink, + .dellink = rmnet_dellink, + .get_size = rmnet_get_size, +}; + +struct rmnet_real_dev_info* +rmnet_get_real_dev_info(struct net_device *real_dev) +{ + return __rmnet_get_real_dev_info(real_dev); +} + +/* Startup/Shutdown */ + +static int __init rmnet_init(void) +{ + int rc; + + rc = register_netdevice_notifier(&rmnet_dev_notifier); + if (rc != 0) + return rc; + + rc = rtnl_link_register(&rmnet_link_ops); + if (rc != 0) { + unregister_netdevice_notifier(&rmnet_dev_notifier); + return rc; + } + return rc; +} + +static void __exit rmnet_exit(void) +{ + unregister_netdevice_notifier(&rmnet_dev_notifier); + rtnl_link_unregister(&rmnet_link_ops); +} + +module_init(rmnet_init) +module_exit(rmnet_exit) +MODULE_LICENSE("GPL v2"); diff --git a/drivers/net/ethernet/qualcomm/rmnet/rmnet_config.h b/drivers/net/ethernet/qualcomm/rmnet/rmnet_config.h new file mode 100644 index 000000000000..985d372e0d8d --- /dev/null +++ b/drivers/net/ethernet/qualcomm/rmnet/rmnet_config.h @@ -0,0 +1,56 @@ +/* Copyright (c) 2013-2014, 2016-2017 The Linux Foundation. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 and + * only version 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * RMNET Data configuration engine + * + */ + +#include <linux/skbuff.h> + +#ifndef _RMNET_CONFIG_H_ +#define _RMNET_CONFIG_H_ + +#define RMNET_MAX_LOGICAL_EP 255 +#define RMNET_MAX_VND 32 + +/* Information about the next device to deliver the packet to. + * Exact usage of this parameter depends on the rmnet_mode. + */ +struct rmnet_endpoint { + u8 rmnet_mode; + u8 mux_id; + struct net_device *egress_dev; +}; + +/* One instance of this structure is instantiated for each real_dev associated + * with rmnet. + */ +struct rmnet_real_dev_info { + struct net_device *dev; + struct rmnet_endpoint local_ep; + struct rmnet_endpoint muxed_ep[RMNET_MAX_LOGICAL_EP]; + u32 ingress_data_format; + u32 egress_data_format; + struct net_device *rmnet_devices[RMNET_MAX_VND]; + u8 nr_rmnet_devs; +}; + +extern struct rtnl_link_ops rmnet_link_ops; + +struct rmnet_priv { + struct rmnet_endpoint local_ep; + u8 mux_id; +}; + +struct rmnet_real_dev_info* +rmnet_get_real_dev_info(struct net_device *real_dev); + +#endif /* _RMNET_CONFIG_H_ */ diff --git a/drivers/net/ethernet/qualcomm/rmnet/rmnet_handlers.c b/drivers/net/ethernet/qualcomm/rmnet/rmnet_handlers.c new file mode 100644 index 000000000000..7dab3bbfeda5 --- /dev/null +++ b/drivers/net/ethernet/qualcomm/rmnet/rmnet_handlers.c @@ -0,0 +1,271 @@ +/* Copyright (c) 2013-2017, The Linux Foundation. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 and + * only version 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * RMNET Data ingress/egress handler + * + */ + +#include <linux/netdevice.h> +#include <linux/netdev_features.h> +#include "rmnet_private.h" +#include "rmnet_config.h" +#include "rmnet_vnd.h" +#include "rmnet_map.h" +#include "rmnet_handlers.h" + +#define RMNET_IP_VERSION_4 0x40 +#define RMNET_IP_VERSION_6 0x60 + +/* Helper Functions */ + +static void rmnet_set_skb_proto(struct sk_buff *skb) +{ + switch (skb->data[0] & 0xF0) { + case RMNET_IP_VERSION_4: + skb->protocol = htons(ETH_P_IP); + break; + case RMNET_IP_VERSION_6: + skb->protocol = htons(ETH_P_IPV6); + break; + default: + skb->protocol = htons(ETH_P_MAP); + break; + } +} + +/* Generic handler */ + +static rx_handler_result_t +rmnet_bridge_handler(struct sk_buff *skb, struct rmnet_endpoint *ep) +{ + if (!ep->egress_dev) + kfree_skb(skb); + else + rmnet_egress_handler(skb, ep); + + return RX_HANDLER_CONSUMED; +} + +static rx_handler_result_t +rmnet_deliver_skb(struct sk_buff *skb, struct rmnet_endpoint *ep) +{ + switch (ep->rmnet_mode) { + case RMNET_EPMODE_NONE: + return RX_HANDLER_PASS; + + case RMNET_EPMODE_BRIDGE: + return rmnet_bridge_handler(skb, ep); + + case RMNET_EPMODE_VND: + skb_reset_transport_header(skb); + skb_reset_network_header(skb); + rmnet_vnd_rx_fixup(skb, skb->dev); + + skb->pkt_type = PACKET_HOST; + skb_set_mac_header(skb, 0); + netif_receive_skb(skb); + return RX_HANDLER_CONSUMED; + + default: + kfree_skb(skb); + return RX_HANDLER_CONSUMED; + } +} + +static rx_handler_result_t +rmnet_ingress_deliver_packet(struct sk_buff *skb, + struct rmnet_real_dev_info *r) +{ + if (!r) { + kfree_skb(skb); + return RX_HANDLER_CONSUMED; + } + + skb->dev = r->local_ep.egress_dev; + + return rmnet_deliver_skb(skb, &r->local_ep); +} + +/* MAP handler */ + +static rx_handler_result_t +__rmnet_map_ingress_handler(struct sk_buff *skb, + struct rmnet_real_dev_info *r) +{ + struct rmnet_endpoint *ep; + u8 mux_id; + u16 len; + + if (RMNET_MAP_GET_CD_BIT(skb)) { + if (r->ingress_data_format + & RMNET_INGRESS_FORMAT_MAP_COMMANDS) + return rmnet_map_command(skb, r); + + kfree_skb(skb); + return RX_HANDLER_CONSUMED; + } + + mux_id = RMNET_MAP_GET_MUX_ID(skb); + len = RMNET_MAP_GET_LENGTH(skb) - RMNET_MAP_GET_PAD(skb); + + if (mux_id >= RMNET_MAX_LOGICAL_EP) { + kfree_skb(skb); + return RX_HANDLER_CONSUMED; + } + + ep = &r->muxed_ep[mux_id]; + + if (r->ingress_data_format & RMNET_INGRESS_FORMAT_DEMUXING) + skb->dev = ep->egress_dev; + + /* Subtract MAP header */ + skb_pull(skb, sizeof(struct rmnet_map_header)); + skb_trim(skb, len); + rmnet_set_skb_proto(skb); + return rmnet_deliver_skb(skb, ep); +} + +static rx_handler_result_t +rmnet_map_ingress_handler(struct sk_buff *skb, + struct rmnet_real_dev_info *r) +{ + struct sk_buff *skbn; + int rc; + + if (r->ingress_data_format & RMNET_INGRESS_FORMAT_DEAGGREGATION) { + while ((skbn = rmnet_map_deaggregate(skb, r)) != NULL) + __rmnet_map_ingress_handler(skbn, r); + + consume_skb(skb); + rc = RX_HANDLER_CONSUMED; + } else { + rc = __rmnet_map_ingress_handler(skb, r); + } + + return rc; +} + +static int rmnet_map_egress_handler(struct sk_buff *skb, + struct rmnet_real_dev_info *r, + struct rmnet_endpoint *ep, + struct net_device *orig_dev) +{ + int required_headroom, additional_header_len; + struct rmnet_map_header *map_header; + + additional_header_len = 0; + required_headroom = sizeof(struct rmnet_map_header); + + if (skb_headroom(skb) < required_headroom) { + if (pskb_expand_head(skb, required_headroom, 0, GFP_KERNEL)) + return RMNET_MAP_CONSUMED; + } + + map_header = rmnet_map_add_map_header(skb, additional_header_len, 0); + if (!map_header) + return RMNET_MAP_CONSUMED; + + if (r->egress_data_format & RMNET_EGRESS_FORMAT_MUXING) { + if (ep->mux_id == 0xff) + map_header->mux_id = 0; + else + map_header->mux_id = ep->mux_id; + } + + skb->protocol = htons(ETH_P_MAP); + + return RMNET_MAP_SUCCESS; +} + +/* Ingress / Egress Entry Points */ + +/* Processes packet as per ingress data format for receiving device. Logical + * endpoint is determined from packet inspection. Packet is then sent to the + * egress device listed in the logical endpoint configuration. + */ +rx_handler_result_t rmnet_rx_handler(struct sk_buff **pskb) +{ + struct rmnet_real_dev_info *r; + struct sk_buff *skb = *pskb; + struct net_device *dev; + int rc; + + if (!skb) + return RX_HANDLER_CONSUMED; + + dev = skb->dev; + r = rmnet_get_real_dev_info(dev); + + if (r->ingress_data_format & RMNET_INGRESS_FORMAT_MAP) { + rc = rmnet_map_ingress_handler(skb, r); + } else { + switch (ntohs(skb->protocol)) { + case ETH_P_MAP: + if (r->local_ep.rmnet_mode == + RMNET_EPMODE_BRIDGE) { + rc = rmnet_ingress_deliver_packet(skb, r); + } else { + kfree_skb(skb); + rc = RX_HANDLER_CONSUMED; + } + break; + + case ETH_P_IP: + case ETH_P_IPV6: + rc = rmnet_ingress_deliver_packet(skb, r); + break; + + default: + rc = RX_HANDLER_PASS; + } + } + + return rc; +} + +/* Modifies packet as per logical endpoint configuration and egress data format + * for egress device configured in logical endpoint. Packet is then transmitted + * on the egress device. + */ +void rmnet_egress_handler(struct sk_buff *skb, + struct rmnet_endpoint *ep) +{ + struct rmnet_real_dev_info *r; + struct net_device *orig_dev; + + orig_dev = skb->dev; + skb->dev = ep->egress_dev; + + r = rmnet_get_real_dev_info(skb->dev); + if (!r) { + kfree_skb(skb); + return; + } + + if (r->egress_data_format & RMNET_EGRESS_FORMAT_MAP) { + switch (rmnet_map_egress_handler(skb, r, ep, orig_dev)) { + case RMNET_MAP_CONSUMED: + return; + + case RMNET_MAP_SUCCESS: + break; + + default: + kfree_skb(skb); + return; + } + } + + if (ep->rmnet_mode == RMNET_EPMODE_VND) + rmnet_vnd_tx_fixup(skb, orig_dev); + + dev_queue_xmit(skb); +} diff --git a/drivers/net/ethernet/qualcomm/rmnet/rmnet_handlers.h b/drivers/net/ethernet/qualcomm/rmnet/rmnet_handlers.h new file mode 100644 index 000000000000..f2638cf5693c --- /dev/null +++ b/drivers/net/ethernet/qualcomm/rmnet/rmnet_handlers.h @@ -0,0 +1,26 @@ +/* Copyright (c) 2013, 2016-2017 The Linux Foundation. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 and + * only version 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * RMNET Data ingress/egress handler + * + */ + +#ifndef _RMNET_HANDLERS_H_ +#define _RMNET_HANDLERS_H_ + +#include "rmnet_config.h" + +void rmnet_egress_handler(struct sk_buff *skb, + struct rmnet_endpoint *ep); + +rx_handler_result_t rmnet_rx_handler(struct sk_buff **pskb); + +#endif /* _RMNET_HANDLERS_H_ */ diff --git a/drivers/net/ethernet/qualcomm/rmnet/rmnet_map.h b/drivers/net/ethernet/qualcomm/rmnet/rmnet_map.h new file mode 100644 index 000000000000..2aabad289b10 --- /dev/null +++ b/drivers/net/ethernet/qualcomm/rmnet/rmnet_map.h @@ -0,0 +1,88 @@ +/* Copyright (c) 2013-2017, The Linux Foundation. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 and + * only version 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#ifndef _RMNET_MAP_H_ +#define _RMNET_MAP_H_ + +struct rmnet_map_control_command { + u8 command_name; + u8 cmd_type:2; + u8 reserved:6; + u16 reserved2; + u32 transaction_id; + union { + struct { + u16 ip_family:2; + u16 reserved:14; + u16 flow_control_seq_num; + u32 qos_id; + } flow_control; + u8 data[0]; + }; +} __aligned(1); + +enum rmnet_map_results { + RMNET_MAP_SUCCESS, + RMNET_MAP_CONSUMED, + RMNET_MAP_GENERAL_FAILURE, + RMNET_MAP_NOT_ENABLED, + RMNET_MAP_FAILED_AGGREGATION, + RMNET_MAP_FAILED_MUX +}; + +enum rmnet_map_commands { + RMNET_MAP_COMMAND_NONE, + RMNET_MAP_COMMAND_FLOW_DISABLE, + RMNET_MAP_COMMAND_FLOW_ENABLE, + /* These should always be the last 2 elements */ + RMNET_MAP_COMMAND_UNKNOWN, + RMNET_MAP_COMMAND_ENUM_LENGTH +}; + +struct rmnet_map_header { + u8 pad_len:6; + u8 reserved_bit:1; + u8 cd_bit:1; + u8 mux_id; + u16 pkt_len; +} __aligned(1); + +#define RMNET_MAP_GET_MUX_ID(Y) (((struct rmnet_map_header *) \ + (Y)->data)->mux_id) +#define RMNET_MAP_GET_CD_BIT(Y) (((struct rmnet_map_header *) \ + (Y)->data)->cd_bit) +#define RMNET_MAP_GET_PAD(Y) (((struct rmnet_map_header *) \ + (Y)->data)->pad_len) +#define RMNET_MAP_GET_CMD_START(Y) ((struct rmnet_map_control_command *) \ + ((Y)->data + \ + sizeof(struct rmnet_map_header))) +#define RMNET_MAP_GET_LENGTH(Y) (ntohs(((struct rmnet_map_header *) \ + (Y)->data)->pkt_len)) + +#define RMNET_MAP_COMMAND_REQUEST 0 +#define RMNET_MAP_COMMAND_ACK 1 +#define RMNET_MAP_COMMAND_UNSUPPORTED 2 +#define RMNET_MAP_COMMAND_INVALID 3 + +#define RMNET_MAP_NO_PAD_BYTES 0 +#define RMNET_MAP_ADD_PAD_BYTES 1 + +u8 rmnet_map_demultiplex(struct sk_buff *skb); +struct sk_buff *rmnet_map_deaggregate(struct sk_buff *skb, + struct rmnet_real_dev_info *rdinfo); + +struct rmnet_map_header *rmnet_map_add_map_header(struct sk_buff *skb, + int hdrlen, int pad); +rx_handler_result_t rmnet_map_command(struct sk_buff *skb, + struct rmnet_real_dev_info *rdinfo); + +#endif /* _RMNET_MAP_H_ */ diff --git a/drivers/net/ethernet/qualcomm/rmnet/rmnet_map_command.c b/drivers/net/ethernet/qualcomm/rmnet/rmnet_map_command.c new file mode 100644 index 000000000000..ccded40ee551 --- /dev/null +++ b/drivers/net/ethernet/qualcomm/rmnet/rmnet_map_command.c @@ -0,0 +1,107 @@ +/* Copyright (c) 2013-2017, The Linux Foundation. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 and + * only version 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include <linux/netdevice.h> +#include "rmnet_config.h" +#include "rmnet_map.h" +#include "rmnet_private.h" +#include "rmnet_vnd.h" + +static u8 rmnet_map_do_flow_control(struct sk_buff *skb, + struct rmnet_real_dev_info *rdinfo, + int enable) +{ + struct rmnet_map_control_command *cmd; + struct rmnet_endpoint *ep; + struct net_device *vnd; + u16 ip_family; + u16 fc_seq; + u32 qos_id; + u8 mux_id; + int r; + + mux_id = RMNET_MAP_GET_MUX_ID(skb); + cmd = RMNET_MAP_GET_CMD_START(skb); + + if (mux_id >= RMNET_MAX_LOGICAL_EP) { + kfree_skb(skb); + return RX_HANDLER_CONSUMED; + } + + ep = &rdinfo->muxed_ep[mux_id]; + vnd = ep->egress_dev; + + ip_family = cmd->flow_control.ip_family; + fc_seq = ntohs(cmd->flow_control.flow_control_seq_num); + qos_id = ntohl(cmd->flow_control.qos_id); + + /* Ignore the ip family and pass the sequence number for both v4 and v6 + * sequence. User space does not support creating dedicated flows for + * the 2 protocols + */ + r = rmnet_vnd_do_flow_control(vnd, enable); + if (r) { + kfree_skb(skb); + return RMNET_MAP_COMMAND_UNSUPPORTED; + } else { + return RMNET_MAP_COMMAND_ACK; + } +} + +static void rmnet_map_send_ack(struct sk_buff *skb, + unsigned char type, + struct rmnet_real_dev_info *rdinfo) +{ + struct rmnet_map_control_command *cmd; + int xmit_status; + + skb->protocol = htons(ETH_P_MAP); + + cmd = RMNET_MAP_GET_CMD_START(skb); + cmd->cmd_type = type & 0x03; + + netif_tx_lock(skb->dev); + xmit_status = skb->dev->netdev_ops->ndo_start_xmit(skb, skb->dev); + netif_tx_unlock(skb->dev); +} + +/* Process MAP command frame and send N/ACK message as appropriate. Message cmd + * name is decoded here and appropriate handler is called. + */ +rx_handler_result_t rmnet_map_command(struct sk_buff *skb, + struct rmnet_real_dev_info *rdinfo) +{ + struct rmnet_map_control_command *cmd; + unsigned char command_name; + unsigned char rc = 0; + + cmd = RMNET_MAP_GET_CMD_START(skb); + command_name = cmd->command_name; + + switch (command_name) { + case RMNET_MAP_COMMAND_FLOW_ENABLE: + rc = rmnet_map_do_flow_control(skb, rdinfo, 1); + break; + + case RMNET_MAP_COMMAND_FLOW_DISABLE: + rc = rmnet_map_do_flow_control(skb, rdinfo, 0); + break; + + default: + rc = RMNET_MAP_COMMAND_UNSUPPORTED; + kfree_skb(skb); + break; + } + if (rc == RMNET_MAP_COMMAND_ACK) + rmnet_map_send_ack(skb, rc, rdinfo); + return RX_HANDLER_CONSUMED; +} diff --git a/drivers/net/ethernet/qualcomm/rmnet/rmnet_map_data.c b/drivers/net/ethernet/qualcomm/rmnet/rmnet_map_data.c new file mode 100644 index 000000000000..a29c476a4755 --- /dev/null +++ b/drivers/net/ethernet/qualcomm/rmnet/rmnet_map_data.c @@ -0,0 +1,105 @@ +/* Copyright (c) 2013-2017, The Linux Foundation. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 and + * only version 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * RMNET Data MAP protocol + * + */ + +#include <linux/netdevice.h> +#include "rmnet_config.h" +#include "rmnet_map.h" +#include "rmnet_private.h" + +#define RMNET_MAP_DEAGGR_SPACING 64 +#define RMNET_MAP_DEAGGR_HEADROOM (RMNET_MAP_DEAGGR_SPACING / 2) + +/* Adds MAP header to front of skb->data + * Padding is calculated and set appropriately in MAP header. Mux ID is + * initialized to 0. + */ +struct rmnet_map_header *rmnet_map_add_map_header(struct sk_buff *skb, + int hdrlen, int pad) +{ + struct rmnet_map_header *map_header; + u32 padding, map_datalen; + u8 *padbytes; + + if (skb_headroom(skb) < sizeof(struct rmnet_map_header)) + return NULL; + + map_datalen = skb->len - hdrlen; + map_header = (struct rmnet_map_header *) + skb_push(skb, sizeof(struct rmnet_map_header)); + memset(map_header, 0, sizeof(struct rmnet_map_header)); + + if (pad == RMNET_MAP_NO_PAD_BYTES) { + map_header->pkt_len = htons(map_datalen); + return map_header; + } + + padding = ALIGN(map_datalen, 4) - map_datalen; + + if (padding == 0) + goto done; + + if (skb_tailroom(skb) < padding) + return NULL; + + padbytes = (u8 *)skb_put(skb, padding); + memset(padbytes, 0, padding); + +done: + map_header->pkt_len = htons(map_datalen + padding); + map_header->pad_len = padding & 0x3F; + + return map_header; +} + +/* Deaggregates a single packet + * A whole new buffer is allocated for each portion of an aggregated frame. + * Caller should keep calling deaggregate() on the source skb until 0 is + * returned, indicating that there are no more packets to deaggregate. Caller + * is responsible for freeing the original skb. + */ +struct sk_buff *rmnet_map_deaggregate(struct sk_buff *skb, + struct rmnet_real_dev_info *rdinfo) +{ + struct rmnet_map_header *maph; + struct sk_buff *skbn; + u32 packet_len; + + if (skb->len == 0) + return NULL; + + maph = (struct rmnet_map_header *)skb->data; + packet_len = ntohs(maph->pkt_len) + sizeof(struct rmnet_map_header); + + if (((int)skb->len - (int)packet_len) < 0) + return NULL; + + skbn = alloc_skb(packet_len + RMNET_MAP_DEAGGR_SPACING, GFP_ATOMIC); + if (!skbn) + return NULL; + + skbn->dev = skb->dev; + skb_reserve(skbn, RMNET_MAP_DEAGGR_HEADROOM); + skb_put(skbn, packet_len); + memcpy(skbn->data, skb->data, packet_len); + skb_pull(skb, packet_len); + + /* Some hardware can send us empty frames. Catch them */ + if (ntohs(maph->pkt_len) == 0) { + kfree_skb(skb); + return NULL; + } + + return skbn; +} diff --git a/drivers/net/ethernet/qualcomm/rmnet/rmnet_private.h b/drivers/net/ethernet/qualcomm/rmnet/rmnet_private.h new file mode 100644 index 000000000000..ed820b5522f5 --- /dev/null +++ b/drivers/net/ethernet/qualcomm/rmnet/rmnet_private.h @@ -0,0 +1,45 @@ +/* Copyright (c) 2013-2014, 2016-2017 The Linux Foundation. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 and + * only version 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#ifndef _RMNET_PRIVATE_H_ +#define _RMNET_PRIVATE_H_ + +#define RMNET_MAX_VND 32 +#define RMNET_MAX_PACKET_SIZE 16384 +#define RMNET_DFLT_PACKET_SIZE 1500 +#define RMNET_NEEDED_HEADROOM 16 +#define RMNET_TX_QUEUE_LEN 1000 + +/* Constants */ +#define RMNET_EGRESS_FORMAT__RESERVED__ BIT(0) +#define RMNET_EGRESS_FORMAT_MAP BIT(1) +#define RMNET_EGRESS_FORMAT_AGGREGATION BIT(2) +#define RMNET_EGRESS_FORMAT_MUXING BIT(3) +#define RMNET_EGRESS_FORMAT_MAP_CKSUMV3 BIT(4) +#define RMNET_EGRESS_FORMAT_MAP_CKSUMV4 BIT(5) + +#define RMNET_INGRESS_FIX_ETHERNET BIT(0) +#define RMNET_INGRESS_FORMAT_MAP BIT(1) +#define RMNET_INGRESS_FORMAT_DEAGGREGATION BIT(2) +#define RMNET_INGRESS_FORMAT_DEMUXING BIT(3) +#define RMNET_INGRESS_FORMAT_MAP_COMMANDS BIT(4) +#define RMNET_INGRESS_FORMAT_MAP_CKSUMV3 BIT(5) +#define RMNET_INGRESS_FORMAT_MAP_CKSUMV4 BIT(6) + +/* Pass the frame up the stack with no modifications to skb->dev */ +#define RMNET_EPMODE_NONE (0) +/* Replace skb->dev to a virtual rmnet device and pass up the stack */ +#define RMNET_EPMODE_VND (1) +/* Pass the frame directly to another device with dev_queue_xmit() */ +#define RMNET_EPMODE_BRIDGE (2) + +#endif /* _RMNET_PRIVATE_H_ */ diff --git a/drivers/net/ethernet/qualcomm/rmnet/rmnet_vnd.c b/drivers/net/ethernet/qualcomm/rmnet/rmnet_vnd.c new file mode 100644 index 000000000000..c8b573d28dcf --- /dev/null +++ b/drivers/net/ethernet/qualcomm/rmnet/rmnet_vnd.c @@ -0,0 +1,170 @@ +/* Copyright (c) 2013-2017, The Linux Foundation. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 and + * only version 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * + * RMNET Data virtual network driver + * + */ + +#include <linux/etherdevice.h> +#include <linux/if_arp.h> +#include <net/pkt_sched.h> +#include "rmnet_config.h" +#include "rmnet_handlers.h" +#include "rmnet_private.h" +#include "rmnet_map.h" +#include "rmnet_vnd.h" + +/* RX/TX Fixup */ + +void rmnet_vnd_rx_fixup(struct sk_buff *skb, struct net_device *dev) +{ + dev->stats.rx_packets++; + dev->stats.rx_bytes += skb->len; +} + +void rmnet_vnd_tx_fixup(struct sk_buff *skb, struct net_device *dev) +{ + dev->stats.tx_packets++; + dev->stats.tx_bytes += skb->len; +} + +/* Network Device Operations */ + +static netdev_tx_t rmnet_vnd_start_xmit(struct sk_buff *skb, + struct net_device *dev) +{ + struct rmnet_priv *priv; + + priv = netdev_priv(dev); + if (priv->local_ep.egress_dev) { + rmnet_egress_handler(skb, &priv->local_ep); + } else { + dev->stats.tx_dropped++; + kfree_skb(skb); + } + return NETDEV_TX_OK; +} + +static int rmnet_vnd_change_mtu(struct net_device *rmnet_dev, int new_mtu) +{ + if (new_mtu < 0 || new_mtu > RMNET_MAX_PACKET_SIZE) + return -EINVAL; + + rmnet_dev->mtu = new_mtu; + return 0; +} + +static const struct net_device_ops rmnet_vnd_ops = { + .ndo_start_xmit = rmnet_vnd_start_xmit, + .ndo_change_mtu = rmnet_vnd_change_mtu, +}; + +/* Called by kernel whenever a new rmnet<n> device is created. Sets MTU, + * flags, ARP type, needed headroom, etc... + */ +void rmnet_vnd_setup(struct net_device *rmnet_dev) +{ + struct rmnet_priv *priv; + + priv = netdev_priv(rmnet_dev); + netdev_dbg(rmnet_dev, "Setting up device %s\n", rmnet_dev->name); + + rmnet_dev->netdev_ops = &rmnet_vnd_ops; + rmnet_dev->mtu = RMNET_DFLT_PACKET_SIZE; + rmnet_dev->needed_headroom = RMNET_NEEDED_HEADROOM; + random_ether_addr(rmnet_dev->dev_addr); + rmnet_dev->tx_queue_len = RMNET_TX_QUEUE_LEN; + + /* Raw IP mode */ + rmnet_dev->header_ops = NULL; /* No header */ + rmnet_dev->type = ARPHRD_RAWIP; + rmnet_dev->hard_header_len = 0; + rmnet_dev->flags &= ~(IFF_BROADCAST | IFF_MULTICAST); + + rmnet_dev->needs_free_netdev = true; +} + +/* Exposed API */ + +int rmnet_vnd_newlink(u8 id, struct net_device *rmnet_dev, + struct rmnet_real_dev_info *r) +{ + int rc; + + if (r->rmnet_devices[id]) + return -EINVAL; + + rc = register_netdevice(rmnet_dev); + if (!rc) { + r->rmnet_devices[id] = rmnet_dev; + r->nr_rmnet_devs++; + rmnet_dev->rtnl_link_ops = &rmnet_link_ops; + } + + return rc; +} + +int rmnet_vnd_dellink(u8 id, struct rmnet_real_dev_info *r) +{ + if (id >= RMNET_MAX_VND || !r->rmnet_devices[id]) + return -EINVAL; + + r->rmnet_devices[id] = NULL; + r->nr_rmnet_devs--; + return 0; +} + +u8 rmnet_vnd_get_mux(struct net_device *rmnet_dev) +{ + struct rmnet_priv *priv; + + priv = netdev_priv(rmnet_dev); + return priv->mux_id; +} + +void rmnet_vnd_set_mux(struct net_device *rmnet_dev, u8 mux_id) +{ + struct rmnet_priv *priv; + + priv = netdev_priv(rmnet_dev); + priv->mux_id = mux_id; +} + +/* Gets the logical endpoint configuration for a RmNet virtual network device + * node. Caller should confirm that devices is a RmNet VND before calling. + */ +struct rmnet_endpoint *rmnet_vnd_get_endpoint(struct net_device *rmnet_dev) +{ + struct rmnet_priv *priv; + + if (!rmnet_dev) + return NULL; + + priv = netdev_priv(rmnet_dev); + + return &priv->local_ep; +} + +int rmnet_vnd_do_flow_control(struct net_device *rmnet_dev, int enable) +{ + netdev_dbg(rmnet_dev, "Setting VND TX queue state to %d\n", enable); + /* Although we expect similar number of enable/disable + * commands, optimize for the disable. That is more + * latency sensitive than enable + */ + if (unlikely(enable)) + netif_wake_queue(rmnet_dev); + else + netif_stop_queue(rmnet_dev); + + return 0; +} diff --git a/drivers/net/ethernet/qualcomm/rmnet/rmnet_vnd.h b/drivers/net/ethernet/qualcomm/rmnet/rmnet_vnd.h new file mode 100644 index 000000000000..b102b4269be1 --- /dev/null +++ b/drivers/net/ethernet/qualcomm/rmnet/rmnet_vnd.h @@ -0,0 +1,29 @@ +/* Copyright (c) 2013-2017, The Linux Foundation. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 and + * only version 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * RMNET Data Virtual Network Device APIs + * + */ + +#ifndef _RMNET_VND_H_ +#define _RMNET_VND_H_ + +int rmnet_vnd_do_flow_control(struct net_device *dev, int enable); +struct rmnet_endpoint *rmnet_vnd_get_endpoint(struct net_device *dev); +int rmnet_vnd_newlink(u8 id, struct net_device *rmnet_dev, + struct rmnet_real_dev_info *r); +int rmnet_vnd_dellink(u8 id, struct rmnet_real_dev_info *r); +void rmnet_vnd_rx_fixup(struct sk_buff *skb, struct net_device *dev); +void rmnet_vnd_tx_fixup(struct sk_buff *skb, struct net_device *dev); +u8 rmnet_vnd_get_mux(struct net_device *rmnet_dev); +void rmnet_vnd_set_mux(struct net_device *rmnet_dev, u8 mux_id); +void rmnet_vnd_setup(struct net_device *dev); +#endif /* _RMNET_VND_H_ */ diff --git a/drivers/net/phy/Kconfig b/drivers/net/phy/Kconfig index 5afe6fdcc968..a9d16a3af514 100644 --- a/drivers/net/phy/Kconfig +++ b/drivers/net/phy/Kconfig @@ -85,7 +85,7 @@ config MDIO_BUS_MUX_MMIOREG parent bus. Child bus selection is under the control of one of the FPGA's registers. - Currently, only 8-bit registers are supported. + Currently, only 8/16/32 bits registers are supported. config MDIO_CAVIUM tristate diff --git a/drivers/net/phy/dp83640.c b/drivers/net/phy/dp83640.c index c3065236ffcc..cbd629822f04 100644 --- a/drivers/net/phy/dp83640.c +++ b/drivers/net/phy/dp83640.c @@ -874,7 +874,6 @@ static void decode_rxts(struct dp83640_private *dp83640, shhwtstamps = skb_hwtstamps(skb); memset(shhwtstamps, 0, sizeof(*shhwtstamps)); shhwtstamps->hwtstamp = ns_to_ktime(rxts->ns); - netif_rx_ni(skb); list_add(&rxts->list, &dp83640->rxpool); break; } @@ -885,6 +884,9 @@ static void decode_rxts(struct dp83640_private *dp83640, list_add_tail(&rxts->list, &dp83640->rxts); out: spin_unlock_irqrestore(&dp83640->rx_lock, flags); + + if (shhwtstamps) + netif_rx_ni(skb); } static void decode_txts(struct dp83640_private *dp83640, @@ -1425,7 +1427,6 @@ static bool dp83640_rxtstamp(struct phy_device *phydev, shhwtstamps = skb_hwtstamps(skb); memset(shhwtstamps, 0, sizeof(*shhwtstamps)); shhwtstamps->hwtstamp = ns_to_ktime(rxts->ns); - netif_rx_ni(skb); list_del_init(&rxts->list); list_add(&rxts->list, &dp83640->rxpool); break; @@ -1438,6 +1439,8 @@ static bool dp83640_rxtstamp(struct phy_device *phydev, skb_info->tmo = jiffies + SKB_TIMESTAMP_TIMEOUT; skb_queue_tail(&dp83640->rx_queue, skb); schedule_delayed_work(&dp83640->ts_work, SKB_TIMESTAMP_TIMEOUT); + } else { + netif_rx_ni(skb); } return true; diff --git a/drivers/net/phy/mdio-mux.c b/drivers/net/phy/mdio-mux.c index 942ceaf3fd3f..6f75e9f27fed 100644 --- a/drivers/net/phy/mdio-mux.c +++ b/drivers/net/phy/mdio-mux.c @@ -13,7 +13,6 @@ #include <linux/module.h> #include <linux/phy.h> -#define DRV_VERSION "1.0" #define DRV_DESCRIPTION "MDIO bus multiplexer driver" struct mdio_mux_child_bus; @@ -117,10 +116,11 @@ int mdio_mux_init(struct device *dev, } else { parent_bus_node = NULL; parent_bus = mux_bus; + get_device(&parent_bus->dev); } pb = devm_kzalloc(dev, sizeof(*pb), GFP_KERNEL); - if (pb == NULL) { + if (!pb) { ret_val = -ENOMEM; goto err_pb_kz; } @@ -144,10 +144,7 @@ int mdio_mux_init(struct device *dev, } cb = devm_kzalloc(dev, sizeof(*cb), GFP_KERNEL); - if (cb == NULL) { - dev_err(dev, - "Error: Failed to allocate memory for child %pOF\n", - child_bus_node); + if (!cb) { ret_val = -ENOMEM; continue; } @@ -156,9 +153,6 @@ int mdio_mux_init(struct device *dev, cb->mii_bus = mdiobus_alloc(); if (!cb->mii_bus) { - dev_err(dev, - "Error: Failed to allocate MDIO bus for child %pOF\n", - child_bus_node); ret_val = -ENOMEM; devm_kfree(dev, cb); continue; @@ -185,16 +179,13 @@ int mdio_mux_init(struct device *dev, } if (pb->children) { *mux_handle = pb; - dev_info(dev, "Version " DRV_VERSION "\n"); return 0; } dev_err(dev, "Error: No acceptable child buses found\n"); devm_kfree(dev, pb); err_pb_kz: - /* balance the reference of_mdio_find_bus() took */ - if (!mux_bus) - put_device(&parent_bus->dev); + put_device(&parent_bus->dev); err_parent_bus: of_node_put(parent_bus_node); return ret_val; @@ -212,12 +203,10 @@ void mdio_mux_uninit(void *mux_handle) cb = cb->next; } - /* balance the reference of_mdio_find_bus() in mdio_mux_init() took */ put_device(&pb->mii_bus->dev); } EXPORT_SYMBOL_GPL(mdio_mux_uninit); MODULE_DESCRIPTION(DRV_DESCRIPTION); -MODULE_VERSION(DRV_VERSION); MODULE_AUTHOR("David Daney"); MODULE_LICENSE("GPL"); diff --git a/drivers/net/xen-netfront.c b/drivers/net/xen-netfront.c index 7b61adb6270c..523387e71a80 100644 --- a/drivers/net/xen-netfront.c +++ b/drivers/net/xen-netfront.c @@ -611,7 +611,7 @@ static int xennet_start_xmit(struct sk_buff *skb, struct net_device *dev) nskb = skb_copy(skb, GFP_ATOMIC); if (!nskb) goto drop; - dev_kfree_skb_any(skb); + dev_consume_skb_any(skb); skb = nskb; page = virt_to_page(skb->data); offset = offset_in_page(skb->data); diff --git a/drivers/phy/marvell/Kconfig b/drivers/phy/marvell/Kconfig index 048d8893bc2e..68e321225400 100644 --- a/drivers/phy/marvell/Kconfig +++ b/drivers/phy/marvell/Kconfig @@ -21,6 +21,17 @@ config PHY_BERLIN_USB help Enable this to support the USB PHY on Marvell Berlin SoCs. +config PHY_MVEBU_CP110_COMPHY + tristate "Marvell CP110 comphy driver" + depends on ARCH_MVEBU || COMPILE_TEST + depends on OF + select GENERIC_PHY + help + This driver allows to control the comphy, an hardware block providing + shared serdes PHYs on Marvell Armada 7k/8k (in the CP110). Its serdes + lanes can be used by various controllers (Ethernet, sata, usb, + PCIe...). + config PHY_MVEBU_SATA def_bool y depends on ARCH_DOVE || MACH_DOVE || MACH_KIRKWOOD diff --git a/drivers/phy/marvell/Makefile b/drivers/phy/marvell/Makefile index 3fc188f59118..0cf6a7cbaf9f 100644 --- a/drivers/phy/marvell/Makefile +++ b/drivers/phy/marvell/Makefile @@ -1,6 +1,7 @@ obj-$(CONFIG_ARMADA375_USBCLUSTER_PHY) += phy-armada375-usb2.o obj-$(CONFIG_PHY_BERLIN_SATA) += phy-berlin-sata.o obj-$(CONFIG_PHY_BERLIN_USB) += phy-berlin-usb.o +obj-$(CONFIG_PHY_MVEBU_CP110_COMPHY) += phy-mvebu-cp110-comphy.o obj-$(CONFIG_PHY_MVEBU_SATA) += phy-mvebu-sata.o obj-$(CONFIG_PHY_PXA_28NM_HSIC) += phy-pxa-28nm-hsic.o obj-$(CONFIG_PHY_PXA_28NM_USB2) += phy-pxa-28nm-usb2.o diff --git a/drivers/phy/marvell/phy-mvebu-cp110-comphy.c b/drivers/phy/marvell/phy-mvebu-cp110-comphy.c new file mode 100644 index 000000000000..73ebad6634a7 --- /dev/null +++ b/drivers/phy/marvell/phy-mvebu-cp110-comphy.c @@ -0,0 +1,644 @@ +/* + * Copyright (C) 2017 Marvell + * + * Antoine Tenart <antoine.tenart@free-electrons.com> + * + * This file is licensed under the terms of the GNU General Public + * License version 2. This program is licensed "as is" without any + * warranty of any kind, whether express or implied. + */ + +#include <linux/io.h> +#include <linux/iopoll.h> +#include <linux/mfd/syscon.h> +#include <linux/module.h> +#include <linux/phy/phy.h> +#include <linux/platform_device.h> +#include <linux/regmap.h> + +/* Relative to priv->base */ +#define MVEBU_COMPHY_SERDES_CFG0(n) (0x0 + (n) * 0x1000) +#define MVEBU_COMPHY_SERDES_CFG0_PU_PLL BIT(1) +#define MVEBU_COMPHY_SERDES_CFG0_GEN_RX(n) ((n) << 3) +#define MVEBU_COMPHY_SERDES_CFG0_GEN_TX(n) ((n) << 7) +#define MVEBU_COMPHY_SERDES_CFG0_PU_RX BIT(11) +#define MVEBU_COMPHY_SERDES_CFG0_PU_TX BIT(12) +#define MVEBU_COMPHY_SERDES_CFG0_HALF_BUS BIT(14) +#define MVEBU_COMPHY_SERDES_CFG1(n) (0x4 + (n) * 0x1000) +#define MVEBU_COMPHY_SERDES_CFG1_RESET BIT(3) +#define MVEBU_COMPHY_SERDES_CFG1_RX_INIT BIT(4) +#define MVEBU_COMPHY_SERDES_CFG1_CORE_RESET BIT(5) +#define MVEBU_COMPHY_SERDES_CFG1_RF_RESET BIT(6) +#define MVEBU_COMPHY_SERDES_CFG2(n) (0x8 + (n) * 0x1000) +#define MVEBU_COMPHY_SERDES_CFG2_DFE_EN BIT(4) +#define MVEBU_COMPHY_SERDES_STATUS0(n) (0x18 + (n) * 0x1000) +#define MVEBU_COMPHY_SERDES_STATUS0_TX_PLL_RDY BIT(2) +#define MVEBU_COMPHY_SERDES_STATUS0_RX_PLL_RDY BIT(3) +#define MVEBU_COMPHY_SERDES_STATUS0_RX_INIT BIT(4) +#define MVEBU_COMPHY_PWRPLL_CTRL(n) (0x804 + (n) * 0x1000) +#define MVEBU_COMPHY_PWRPLL_CTRL_RFREQ(n) ((n) << 0) +#define MVEBU_COMPHY_PWRPLL_PHY_MODE(n) ((n) << 5) +#define MVEBU_COMPHY_IMP_CAL(n) (0x80c + (n) * 0x1000) +#define MVEBU_COMPHY_IMP_CAL_TX_EXT(n) ((n) << 10) +#define MVEBU_COMPHY_IMP_CAL_TX_EXT_EN BIT(15) +#define MVEBU_COMPHY_DFE_RES(n) (0x81c + (n) * 0x1000) +#define MVEBU_COMPHY_DFE_RES_FORCE_GEN_TBL BIT(15) +#define MVEBU_COMPHY_COEF(n) (0x828 + (n) * 0x1000) +#define MVEBU_COMPHY_COEF_DFE_EN BIT(14) +#define MVEBU_COMPHY_COEF_DFE_CTRL BIT(15) +#define MVEBU_COMPHY_GEN1_S0(n) (0x834 + (n) * 0x1000) +#define MVEBU_COMPHY_GEN1_S0_TX_AMP(n) ((n) << 1) +#define MVEBU_COMPHY_GEN1_S0_TX_EMPH(n) ((n) << 7) +#define MVEBU_COMPHY_GEN1_S1(n) (0x838 + (n) * 0x1000) +#define MVEBU_COMPHY_GEN1_S1_RX_MUL_PI(n) ((n) << 0) +#define MVEBU_COMPHY_GEN1_S1_RX_MUL_PF(n) ((n) << 3) +#define MVEBU_COMPHY_GEN1_S1_RX_MUL_FI(n) ((n) << 6) +#define MVEBU_COMPHY_GEN1_S1_RX_MUL_FF(n) ((n) << 8) +#define MVEBU_COMPHY_GEN1_S1_RX_DFE_EN BIT(10) +#define MVEBU_COMPHY_GEN1_S1_RX_DIV(n) ((n) << 11) +#define MVEBU_COMPHY_GEN1_S2(n) (0x8f4 + (n) * 0x1000) +#define MVEBU_COMPHY_GEN1_S2_TX_EMPH(n) ((n) << 0) +#define MVEBU_COMPHY_GEN1_S2_TX_EMPH_EN BIT(4) +#define MVEBU_COMPHY_LOOPBACK(n) (0x88c + (n) * 0x1000) +#define MVEBU_COMPHY_LOOPBACK_DBUS_WIDTH(n) ((n) << 1) +#define MVEBU_COMPHY_VDD_CAL0(n) (0x908 + (n) * 0x1000) +#define MVEBU_COMPHY_VDD_CAL0_CONT_MODE BIT(15) +#define MVEBU_COMPHY_EXT_SELV(n) (0x914 + (n) * 0x1000) +#define MVEBU_COMPHY_EXT_SELV_RX_SAMPL(n) ((n) << 5) +#define MVEBU_COMPHY_MISC_CTRL0(n) (0x93c + (n) * 0x1000) +#define MVEBU_COMPHY_MISC_CTRL0_ICP_FORCE BIT(5) +#define MVEBU_COMPHY_MISC_CTRL0_REFCLK_SEL BIT(10) +#define MVEBU_COMPHY_RX_CTRL1(n) (0x940 + (n) * 0x1000) +#define MVEBU_COMPHY_RX_CTRL1_RXCLK2X_SEL BIT(11) +#define MVEBU_COMPHY_RX_CTRL1_CLK8T_EN BIT(12) +#define MVEBU_COMPHY_SPEED_DIV(n) (0x954 + (n) * 0x1000) +#define MVEBU_COMPHY_SPEED_DIV_TX_FORCE BIT(7) +#define MVEBU_SP_CALIB(n) (0x96c + (n) * 0x1000) +#define MVEBU_SP_CALIB_SAMPLER(n) ((n) << 8) +#define MVEBU_SP_CALIB_SAMPLER_EN BIT(12) +#define MVEBU_COMPHY_TX_SLEW_RATE(n) (0x974 + (n) * 0x1000) +#define MVEBU_COMPHY_TX_SLEW_RATE_EMPH(n) ((n) << 5) +#define MVEBU_COMPHY_TX_SLEW_RATE_SLC(n) ((n) << 10) +#define MVEBU_COMPHY_DLT_CTRL(n) (0x984 + (n) * 0x1000) +#define MVEBU_COMPHY_DLT_CTRL_DTL_FLOOP_EN BIT(2) +#define MVEBU_COMPHY_FRAME_DETECT0(n) (0xa14 + (n) * 0x1000) +#define MVEBU_COMPHY_FRAME_DETECT0_PATN(n) ((n) << 7) +#define MVEBU_COMPHY_FRAME_DETECT3(n) (0xa20 + (n) * 0x1000) +#define MVEBU_COMPHY_FRAME_DETECT3_LOST_TIMEOUT_EN BIT(12) +#define MVEBU_COMPHY_DME(n) (0xa28 + (n) * 0x1000) +#define MVEBU_COMPHY_DME_ETH_MODE BIT(7) +#define MVEBU_COMPHY_TRAINING0(n) (0xa68 + (n) * 0x1000) +#define MVEBU_COMPHY_TRAINING0_P2P_HOLD BIT(15) +#define MVEBU_COMPHY_TRAINING5(n) (0xaa4 + (n) * 0x1000) +#define MVEBU_COMPHY_TRAINING5_RX_TIMER(n) ((n) << 0) +#define MVEBU_COMPHY_TX_TRAIN_PRESET(n) (0xb1c + (n) * 0x1000) +#define MVEBU_COMPHY_TX_TRAIN_PRESET_16B_AUTO_EN BIT(8) +#define MVEBU_COMPHY_TX_TRAIN_PRESET_PRBS11 BIT(9) +#define MVEBU_COMPHY_GEN1_S3(n) (0xc40 + (n) * 0x1000) +#define MVEBU_COMPHY_GEN1_S3_FBCK_SEL BIT(9) +#define MVEBU_COMPHY_GEN1_S4(n) (0xc44 + (n) * 0x1000) +#define MVEBU_COMPHY_GEN1_S4_DFE_RES(n) ((n) << 8) +#define MVEBU_COMPHY_TX_PRESET(n) (0xc68 + (n) * 0x1000) +#define MVEBU_COMPHY_TX_PRESET_INDEX(n) ((n) << 0) +#define MVEBU_COMPHY_GEN1_S5(n) (0xd38 + (n) * 0x1000) +#define MVEBU_COMPHY_GEN1_S5_ICP(n) ((n) << 0) + +/* Relative to priv->regmap */ +#define MVEBU_COMPHY_CONF1(n) (0x1000 + (n) * 0x28) +#define MVEBU_COMPHY_CONF1_PWRUP BIT(1) +#define MVEBU_COMPHY_CONF1_USB_PCIE BIT(2) /* 0: Ethernet/SATA */ +#define MVEBU_COMPHY_CONF6(n) (0x1014 + (n) * 0x28) +#define MVEBU_COMPHY_CONF6_40B BIT(18) +#define MVEBU_COMPHY_SELECTOR 0x1140 +#define MVEBU_COMPHY_SELECTOR_PHY(n) ((n) * 0x4) + +#define MVEBU_COMPHY_LANES 6 +#define MVEBU_COMPHY_PORTS 3 + +struct mvebu_comhy_conf { + enum phy_mode mode; + unsigned lane; + unsigned port; + u32 mux; +}; + +#define MVEBU_COMPHY_CONF(_lane, _port, _mode, _mux) \ + { \ + .lane = _lane, \ + .port = _port, \ + .mode = _mode, \ + .mux = _mux, \ + } + +static const struct mvebu_comhy_conf mvebu_comphy_cp110_modes[] = { + /* lane 0 */ + MVEBU_COMPHY_CONF(0, 1, PHY_MODE_SGMII, 0x1), + /* lane 1 */ + MVEBU_COMPHY_CONF(1, 2, PHY_MODE_SGMII, 0x1), + /* lane 2 */ + MVEBU_COMPHY_CONF(2, 0, PHY_MODE_SGMII, 0x1), + MVEBU_COMPHY_CONF(2, 0, PHY_MODE_10GKR, 0x1), + /* lane 3 */ + MVEBU_COMPHY_CONF(3, 1, PHY_MODE_SGMII, 0x2), + /* lane 4 */ + MVEBU_COMPHY_CONF(4, 0, PHY_MODE_SGMII, 0x2), + MVEBU_COMPHY_CONF(4, 0, PHY_MODE_10GKR, 0x2), + MVEBU_COMPHY_CONF(4, 1, PHY_MODE_SGMII, 0x1), + /* lane 5 */ + MVEBU_COMPHY_CONF(5, 2, PHY_MODE_SGMII, 0x1), +}; + +struct mvebu_comphy_priv { + void __iomem *base; + struct regmap *regmap; + struct device *dev; + int modes[MVEBU_COMPHY_LANES]; +}; + +struct mvebu_comphy_lane { + struct mvebu_comphy_priv *priv; + unsigned id; + enum phy_mode mode; + int port; +}; + +static int mvebu_comphy_get_mux(int lane, int port, enum phy_mode mode) +{ + int i, n = ARRAY_SIZE(mvebu_comphy_cp110_modes); + + /* Unused PHY mux value is 0x0 */ + if (mode == PHY_MODE_INVALID) + return 0; + + for (i = 0; i < n; i++) { + if (mvebu_comphy_cp110_modes[i].lane == lane && + mvebu_comphy_cp110_modes[i].port == port && + mvebu_comphy_cp110_modes[i].mode == mode) + break; + } + + if (i == n) + return -EINVAL; + + return mvebu_comphy_cp110_modes[i].mux; +} + +static void mvebu_comphy_ethernet_init_reset(struct mvebu_comphy_lane *lane, + enum phy_mode mode) +{ + struct mvebu_comphy_priv *priv = lane->priv; + u32 val; + + regmap_read(priv->regmap, MVEBU_COMPHY_CONF1(lane->id), &val); + val &= ~MVEBU_COMPHY_CONF1_USB_PCIE; + val |= MVEBU_COMPHY_CONF1_PWRUP; + regmap_write(priv->regmap, MVEBU_COMPHY_CONF1(lane->id), val); + + /* Select baud rates and PLLs */ + val = readl(priv->base + MVEBU_COMPHY_SERDES_CFG0(lane->id)); + val &= ~(MVEBU_COMPHY_SERDES_CFG0_PU_PLL | + MVEBU_COMPHY_SERDES_CFG0_PU_RX | + MVEBU_COMPHY_SERDES_CFG0_PU_TX | + MVEBU_COMPHY_SERDES_CFG0_HALF_BUS | + MVEBU_COMPHY_SERDES_CFG0_GEN_RX(0xf) | + MVEBU_COMPHY_SERDES_CFG0_GEN_TX(0xf)); + if (mode == PHY_MODE_10GKR) + val |= MVEBU_COMPHY_SERDES_CFG0_GEN_RX(0xe) | + MVEBU_COMPHY_SERDES_CFG0_GEN_TX(0xe); + else if (mode == PHY_MODE_SGMII) + val |= MVEBU_COMPHY_SERDES_CFG0_GEN_RX(0x6) | + MVEBU_COMPHY_SERDES_CFG0_GEN_TX(0x6) | + MVEBU_COMPHY_SERDES_CFG0_HALF_BUS; + writel(val, priv->base + MVEBU_COMPHY_SERDES_CFG0(lane->id)); + + /* reset */ + val = readl(priv->base + MVEBU_COMPHY_SERDES_CFG1(lane->id)); + val &= ~(MVEBU_COMPHY_SERDES_CFG1_RESET | + MVEBU_COMPHY_SERDES_CFG1_CORE_RESET | + MVEBU_COMPHY_SERDES_CFG1_RF_RESET); + writel(val, priv->base + MVEBU_COMPHY_SERDES_CFG1(lane->id)); + + /* de-assert reset */ + val = readl(priv->base + MVEBU_COMPHY_SERDES_CFG1(lane->id)); + val |= MVEBU_COMPHY_SERDES_CFG1_RESET | + MVEBU_COMPHY_SERDES_CFG1_CORE_RESET; + writel(val, priv->base + MVEBU_COMPHY_SERDES_CFG1(lane->id)); + + /* wait until clocks are ready */ + mdelay(1); + + /* exlicitly disable 40B, the bits isn't clear on reset */ + regmap_read(priv->regmap, MVEBU_COMPHY_CONF6(lane->id), &val); + val &= ~MVEBU_COMPHY_CONF6_40B; + regmap_write(priv->regmap, MVEBU_COMPHY_CONF6(lane->id), val); + + /* refclk selection */ + val = readl(priv->base + MVEBU_COMPHY_MISC_CTRL0(lane->id)); + val &= ~MVEBU_COMPHY_MISC_CTRL0_REFCLK_SEL; + if (mode == PHY_MODE_10GKR) + val |= MVEBU_COMPHY_MISC_CTRL0_ICP_FORCE; + writel(val, priv->base + MVEBU_COMPHY_MISC_CTRL0(lane->id)); + + /* power and pll selection */ + val = readl(priv->base + MVEBU_COMPHY_PWRPLL_CTRL(lane->id)); + val &= ~(MVEBU_COMPHY_PWRPLL_CTRL_RFREQ(0x1f) | + MVEBU_COMPHY_PWRPLL_PHY_MODE(0x7)); + val |= MVEBU_COMPHY_PWRPLL_CTRL_RFREQ(0x1) | + MVEBU_COMPHY_PWRPLL_PHY_MODE(0x4); + writel(val, priv->base + MVEBU_COMPHY_PWRPLL_CTRL(lane->id)); + + val = readl(priv->base + MVEBU_COMPHY_LOOPBACK(lane->id)); + val &= ~MVEBU_COMPHY_LOOPBACK_DBUS_WIDTH(0x7); + val |= MVEBU_COMPHY_LOOPBACK_DBUS_WIDTH(0x1); + writel(val, priv->base + MVEBU_COMPHY_LOOPBACK(lane->id)); +} + +static int mvebu_comphy_init_plls(struct mvebu_comphy_lane *lane, + enum phy_mode mode) +{ + struct mvebu_comphy_priv *priv = lane->priv; + u32 val; + + /* SERDES external config */ + val = readl(priv->base + MVEBU_COMPHY_SERDES_CFG0(lane->id)); + val |= MVEBU_COMPHY_SERDES_CFG0_PU_PLL | + MVEBU_COMPHY_SERDES_CFG0_PU_RX | + MVEBU_COMPHY_SERDES_CFG0_PU_TX; + writel(val, priv->base + MVEBU_COMPHY_SERDES_CFG0(lane->id)); + + /* check rx/tx pll */ + readl_poll_timeout(priv->base + MVEBU_COMPHY_SERDES_STATUS0(lane->id), + val, + val & (MVEBU_COMPHY_SERDES_STATUS0_RX_PLL_RDY | + MVEBU_COMPHY_SERDES_STATUS0_TX_PLL_RDY), + 1000, 150000); + if (!(val & (MVEBU_COMPHY_SERDES_STATUS0_RX_PLL_RDY | + MVEBU_COMPHY_SERDES_STATUS0_TX_PLL_RDY))) + return -ETIMEDOUT; + + /* rx init */ + val = readl(priv->base + MVEBU_COMPHY_SERDES_CFG1(lane->id)); + val |= MVEBU_COMPHY_SERDES_CFG1_RX_INIT; + writel(val, priv->base + MVEBU_COMPHY_SERDES_CFG1(lane->id)); + + /* check rx */ + readl_poll_timeout(priv->base + MVEBU_COMPHY_SERDES_STATUS0(lane->id), + val, val & MVEBU_COMPHY_SERDES_STATUS0_RX_INIT, + 1000, 10000); + if (!(val & MVEBU_COMPHY_SERDES_STATUS0_RX_INIT)) + return -ETIMEDOUT; + + val = readl(priv->base + MVEBU_COMPHY_SERDES_CFG1(lane->id)); + val &= ~MVEBU_COMPHY_SERDES_CFG1_RX_INIT; + writel(val, priv->base + MVEBU_COMPHY_SERDES_CFG1(lane->id)); + + return 0; +} + +static int mvebu_comphy_set_mode_sgmii(struct phy *phy) +{ + struct mvebu_comphy_lane *lane = phy_get_drvdata(phy); + struct mvebu_comphy_priv *priv = lane->priv; + u32 val; + + mvebu_comphy_ethernet_init_reset(lane, PHY_MODE_SGMII); + + val = readl(priv->base + MVEBU_COMPHY_RX_CTRL1(lane->id)); + val &= ~MVEBU_COMPHY_RX_CTRL1_CLK8T_EN; + val |= MVEBU_COMPHY_RX_CTRL1_RXCLK2X_SEL; + writel(val, priv->base + MVEBU_COMPHY_RX_CTRL1(lane->id)); + + val = readl(priv->base + MVEBU_COMPHY_DLT_CTRL(lane->id)); + val &= ~MVEBU_COMPHY_DLT_CTRL_DTL_FLOOP_EN; + writel(val, priv->base + MVEBU_COMPHY_DLT_CTRL(lane->id)); + + regmap_read(priv->regmap, MVEBU_COMPHY_CONF1(lane->id), &val); + val &= ~MVEBU_COMPHY_CONF1_USB_PCIE; + val |= MVEBU_COMPHY_CONF1_PWRUP; + regmap_write(priv->regmap, MVEBU_COMPHY_CONF1(lane->id), val); + + val = readl(priv->base + MVEBU_COMPHY_GEN1_S0(lane->id)); + val &= ~MVEBU_COMPHY_GEN1_S0_TX_EMPH(0xf); + val |= MVEBU_COMPHY_GEN1_S0_TX_EMPH(0x1); + writel(val, priv->base + MVEBU_COMPHY_GEN1_S0(lane->id)); + + return mvebu_comphy_init_plls(lane, PHY_MODE_SGMII); +} + +static int mvebu_comphy_set_mode_10gkr(struct phy *phy) +{ + struct mvebu_comphy_lane *lane = phy_get_drvdata(phy); + struct mvebu_comphy_priv *priv = lane->priv; + u32 val; + + mvebu_comphy_ethernet_init_reset(lane, PHY_MODE_10GKR); + + val = readl(priv->base + MVEBU_COMPHY_RX_CTRL1(lane->id)); + val |= MVEBU_COMPHY_RX_CTRL1_RXCLK2X_SEL | + MVEBU_COMPHY_RX_CTRL1_CLK8T_EN; + writel(val, priv->base + MVEBU_COMPHY_RX_CTRL1(lane->id)); + + val = readl(priv->base + MVEBU_COMPHY_DLT_CTRL(lane->id)); + val |= MVEBU_COMPHY_DLT_CTRL_DTL_FLOOP_EN; + writel(val, priv->base + MVEBU_COMPHY_DLT_CTRL(lane->id)); + + /* Speed divider */ + val = readl(priv->base + MVEBU_COMPHY_SPEED_DIV(lane->id)); + val |= MVEBU_COMPHY_SPEED_DIV_TX_FORCE; + writel(val, priv->base + MVEBU_COMPHY_SPEED_DIV(lane->id)); + + val = readl(priv->base + MVEBU_COMPHY_SERDES_CFG2(lane->id)); + val |= MVEBU_COMPHY_SERDES_CFG2_DFE_EN; + writel(val, priv->base + MVEBU_COMPHY_SERDES_CFG2(lane->id)); + + /* DFE resolution */ + val = readl(priv->base + MVEBU_COMPHY_DFE_RES(lane->id)); + val |= MVEBU_COMPHY_DFE_RES_FORCE_GEN_TBL; + writel(val, priv->base + MVEBU_COMPHY_DFE_RES(lane->id)); + + val = readl(priv->base + MVEBU_COMPHY_GEN1_S0(lane->id)); + val &= ~(MVEBU_COMPHY_GEN1_S0_TX_AMP(0x1f) | + MVEBU_COMPHY_GEN1_S0_TX_EMPH(0xf)); + val |= MVEBU_COMPHY_GEN1_S0_TX_AMP(0x1c) | + MVEBU_COMPHY_GEN1_S0_TX_EMPH(0xe); + writel(val, priv->base + MVEBU_COMPHY_GEN1_S0(lane->id)); + + val = readl(priv->base + MVEBU_COMPHY_GEN1_S2(lane->id)); + val &= ~MVEBU_COMPHY_GEN1_S2_TX_EMPH(0xf); + val |= MVEBU_COMPHY_GEN1_S2_TX_EMPH_EN; + writel(val, priv->base + MVEBU_COMPHY_GEN1_S2(lane->id)); + + val = readl(priv->base + MVEBU_COMPHY_TX_SLEW_RATE(lane->id)); + val |= MVEBU_COMPHY_TX_SLEW_RATE_EMPH(0x3) | + MVEBU_COMPHY_TX_SLEW_RATE_SLC(0x3f); + writel(val, priv->base + MVEBU_COMPHY_TX_SLEW_RATE(lane->id)); + + /* Impedance calibration */ + val = readl(priv->base + MVEBU_COMPHY_IMP_CAL(lane->id)); + val &= ~MVEBU_COMPHY_IMP_CAL_TX_EXT(0x1f); + val |= MVEBU_COMPHY_IMP_CAL_TX_EXT(0xe) | + MVEBU_COMPHY_IMP_CAL_TX_EXT_EN; + writel(val, priv->base + MVEBU_COMPHY_IMP_CAL(lane->id)); + + val = readl(priv->base + MVEBU_COMPHY_GEN1_S5(lane->id)); + val &= ~MVEBU_COMPHY_GEN1_S5_ICP(0xf); + writel(val, priv->base + MVEBU_COMPHY_GEN1_S5(lane->id)); + + val = readl(priv->base + MVEBU_COMPHY_GEN1_S1(lane->id)); + val &= ~(MVEBU_COMPHY_GEN1_S1_RX_MUL_PI(0x7) | + MVEBU_COMPHY_GEN1_S1_RX_MUL_PF(0x7) | + MVEBU_COMPHY_GEN1_S1_RX_MUL_FI(0x3) | + MVEBU_COMPHY_GEN1_S1_RX_MUL_FF(0x3)); + val |= MVEBU_COMPHY_GEN1_S1_RX_DFE_EN | + MVEBU_COMPHY_GEN1_S1_RX_MUL_PI(0x2) | + MVEBU_COMPHY_GEN1_S1_RX_MUL_PF(0x2) | + MVEBU_COMPHY_GEN1_S1_RX_MUL_FF(0x1) | + MVEBU_COMPHY_GEN1_S1_RX_DIV(0x3); + writel(val, priv->base + MVEBU_COMPHY_GEN1_S1(lane->id)); + + val = readl(priv->base + MVEBU_COMPHY_COEF(lane->id)); + val &= ~(MVEBU_COMPHY_COEF_DFE_EN | MVEBU_COMPHY_COEF_DFE_CTRL); + writel(val, priv->base + MVEBU_COMPHY_COEF(lane->id)); + + val = readl(priv->base + MVEBU_COMPHY_GEN1_S4(lane->id)); + val &= ~MVEBU_COMPHY_GEN1_S4_DFE_RES(0x3); + val |= MVEBU_COMPHY_GEN1_S4_DFE_RES(0x1); + writel(val, priv->base + MVEBU_COMPHY_GEN1_S4(lane->id)); + + val = readl(priv->base + MVEBU_COMPHY_GEN1_S3(lane->id)); + val |= MVEBU_COMPHY_GEN1_S3_FBCK_SEL; + writel(val, priv->base + MVEBU_COMPHY_GEN1_S3(lane->id)); + + /* rx training timer */ + val = readl(priv->base + MVEBU_COMPHY_TRAINING5(lane->id)); + val &= ~MVEBU_COMPHY_TRAINING5_RX_TIMER(0x3ff); + val |= MVEBU_COMPHY_TRAINING5_RX_TIMER(0x13); + writel(val, priv->base + MVEBU_COMPHY_TRAINING5(lane->id)); + + /* tx train peak to peak hold */ + val = readl(priv->base + MVEBU_COMPHY_TRAINING0(lane->id)); + val |= MVEBU_COMPHY_TRAINING0_P2P_HOLD; + writel(val, priv->base + MVEBU_COMPHY_TRAINING0(lane->id)); + + val = readl(priv->base + MVEBU_COMPHY_TX_PRESET(lane->id)); + val &= ~MVEBU_COMPHY_TX_PRESET_INDEX(0xf); + val |= MVEBU_COMPHY_TX_PRESET_INDEX(0x2); /* preset coeff */ + writel(val, priv->base + MVEBU_COMPHY_TX_PRESET(lane->id)); + + val = readl(priv->base + MVEBU_COMPHY_FRAME_DETECT3(lane->id)); + val &= ~MVEBU_COMPHY_FRAME_DETECT3_LOST_TIMEOUT_EN; + writel(val, priv->base + MVEBU_COMPHY_FRAME_DETECT3(lane->id)); + + val = readl(priv->base + MVEBU_COMPHY_TX_TRAIN_PRESET(lane->id)); + val |= MVEBU_COMPHY_TX_TRAIN_PRESET_16B_AUTO_EN | + MVEBU_COMPHY_TX_TRAIN_PRESET_PRBS11; + writel(val, priv->base + MVEBU_COMPHY_TX_TRAIN_PRESET(lane->id)); + + val = readl(priv->base + MVEBU_COMPHY_FRAME_DETECT0(lane->id)); + val &= ~MVEBU_COMPHY_FRAME_DETECT0_PATN(0x1ff); + val |= MVEBU_COMPHY_FRAME_DETECT0_PATN(0x88); + writel(val, priv->base + MVEBU_COMPHY_FRAME_DETECT0(lane->id)); + + val = readl(priv->base + MVEBU_COMPHY_DME(lane->id)); + val |= MVEBU_COMPHY_DME_ETH_MODE; + writel(val, priv->base + MVEBU_COMPHY_DME(lane->id)); + + val = readl(priv->base + MVEBU_COMPHY_VDD_CAL0(lane->id)); + val |= MVEBU_COMPHY_VDD_CAL0_CONT_MODE; + writel(val, priv->base + MVEBU_COMPHY_VDD_CAL0(lane->id)); + + val = readl(priv->base + MVEBU_SP_CALIB(lane->id)); + val &= ~MVEBU_SP_CALIB_SAMPLER(0x3); + val |= MVEBU_SP_CALIB_SAMPLER(0x3) | + MVEBU_SP_CALIB_SAMPLER_EN; + writel(val, priv->base + MVEBU_SP_CALIB(lane->id)); + val &= ~MVEBU_SP_CALIB_SAMPLER_EN; + writel(val, priv->base + MVEBU_SP_CALIB(lane->id)); + + /* External rx regulator */ + val = readl(priv->base + MVEBU_COMPHY_EXT_SELV(lane->id)); + val &= ~MVEBU_COMPHY_EXT_SELV_RX_SAMPL(0x1f); + val |= MVEBU_COMPHY_EXT_SELV_RX_SAMPL(0x1a); + writel(val, priv->base + MVEBU_COMPHY_EXT_SELV(lane->id)); + + return mvebu_comphy_init_plls(lane, PHY_MODE_10GKR); +} + +static int mvebu_comphy_power_on(struct phy *phy) +{ + struct mvebu_comphy_lane *lane = phy_get_drvdata(phy); + struct mvebu_comphy_priv *priv = lane->priv; + int ret; + u32 mux, val; + + mux = mvebu_comphy_get_mux(lane->id, lane->port, lane->mode); + if (mux < 0) + return -ENOTSUPP; + + regmap_read(priv->regmap, MVEBU_COMPHY_SELECTOR, &val); + val &= ~(0xf << MVEBU_COMPHY_SELECTOR_PHY(lane->id)); + val |= mux << MVEBU_COMPHY_SELECTOR_PHY(lane->id); + regmap_write(priv->regmap, MVEBU_COMPHY_SELECTOR, val); + + switch (lane->mode) { + case PHY_MODE_SGMII: + ret = mvebu_comphy_set_mode_sgmii(phy); + break; + case PHY_MODE_10GKR: + ret = mvebu_comphy_set_mode_10gkr(phy); + break; + default: + return -ENOTSUPP; + } + + /* digital reset */ + val = readl(priv->base + MVEBU_COMPHY_SERDES_CFG1(lane->id)); + val |= MVEBU_COMPHY_SERDES_CFG1_RF_RESET; + writel(val, priv->base + MVEBU_COMPHY_SERDES_CFG1(lane->id)); + + return ret; +} + +static int mvebu_comphy_set_mode(struct phy *phy, enum phy_mode mode) +{ + struct mvebu_comphy_lane *lane = phy_get_drvdata(phy); + + if (mvebu_comphy_get_mux(lane->id, lane->port, mode) < 0) + return -EINVAL; + + lane->mode = mode; + return 0; +} + +static int mvebu_comphy_power_off(struct phy *phy) +{ + struct mvebu_comphy_lane *lane = phy_get_drvdata(phy); + struct mvebu_comphy_priv *priv = lane->priv; + u32 val; + + val = readl(priv->base + MVEBU_COMPHY_SERDES_CFG1(lane->id)); + val &= ~(MVEBU_COMPHY_SERDES_CFG1_RESET | + MVEBU_COMPHY_SERDES_CFG1_CORE_RESET | + MVEBU_COMPHY_SERDES_CFG1_RF_RESET); + writel(val, priv->base + MVEBU_COMPHY_SERDES_CFG1(lane->id)); + + regmap_read(priv->regmap, MVEBU_COMPHY_SELECTOR, &val); + val &= ~(0xf << MVEBU_COMPHY_SELECTOR_PHY(lane->id)); + regmap_write(priv->regmap, MVEBU_COMPHY_SELECTOR, val); + + return 0; +} + +static const struct phy_ops mvebu_comphy_ops = { + .power_on = mvebu_comphy_power_on, + .power_off = mvebu_comphy_power_off, + .set_mode = mvebu_comphy_set_mode, + .owner = THIS_MODULE, +}; + +static struct phy *mvebu_comphy_xlate(struct device *dev, + struct of_phandle_args *args) +{ + struct mvebu_comphy_lane *lane; + struct phy *phy; + + if (WARN_ON(args->args[0] >= MVEBU_COMPHY_PORTS)) + return ERR_PTR(-EINVAL); + + phy = of_phy_simple_xlate(dev, args); + if (IS_ERR(phy)) + return phy; + + lane = phy_get_drvdata(phy); + if (lane->port >= 0) + return ERR_PTR(-EBUSY); + lane->port = args->args[0]; + + return phy; +} + +static int mvebu_comphy_probe(struct platform_device *pdev) +{ + struct mvebu_comphy_priv *priv; + struct phy_provider *provider; + struct device_node *child; + struct resource *res; + + priv = devm_kzalloc(&pdev->dev, sizeof(*priv), GFP_KERNEL); + if (!priv) + return -ENOMEM; + + priv->dev = &pdev->dev; + priv->regmap = + syscon_regmap_lookup_by_phandle(pdev->dev.of_node, + "marvell,system-controller"); + if (IS_ERR(priv->regmap)) + return PTR_ERR(priv->regmap); + res = platform_get_resource(pdev, IORESOURCE_MEM, 0); + priv->base = devm_ioremap_resource(&pdev->dev, res); + if (!priv->base) + return -ENOMEM; + + for_each_available_child_of_node(pdev->dev.of_node, child) { + struct mvebu_comphy_lane *lane; + struct phy *phy; + int ret; + u32 val; + + ret = of_property_read_u32(child, "reg", &val); + if (ret < 0) { + dev_err(&pdev->dev, "missing 'reg' property (%d)\n", + ret); + continue; + } + + if (val >= MVEBU_COMPHY_LANES) { + dev_err(&pdev->dev, "invalid 'reg' property\n"); + continue; + } + + lane = devm_kzalloc(&pdev->dev, sizeof(*lane), GFP_KERNEL); + if (!lane) + return -ENOMEM; + + phy = devm_phy_create(&pdev->dev, child, &mvebu_comphy_ops); + if (IS_ERR(phy)) + return PTR_ERR(phy); + + lane->priv = priv; + lane->mode = PHY_MODE_INVALID; + lane->id = val; + lane->port = -1; + phy_set_drvdata(phy, lane); + + /* + * Once all modes are supported in this driver we should call + * mvebu_comphy_power_off(phy) here to avoid relying on the + * bootloader/firmware configuration. + */ + } + + dev_set_drvdata(&pdev->dev, priv); + provider = devm_of_phy_provider_register(&pdev->dev, + mvebu_comphy_xlate); + return PTR_ERR_OR_ZERO(provider); +} + +static const struct of_device_id mvebu_comphy_of_match_table[] = { + { .compatible = "marvell,comphy-cp110" }, + { }, +}; +MODULE_DEVICE_TABLE(of, mvebu_comphy_of_match_table); + +static struct platform_driver mvebu_comphy_driver = { + .probe = mvebu_comphy_probe, + .driver = { + .name = "mvebu-comphy", + .of_match_table = mvebu_comphy_of_match_table, + }, +}; +module_platform_driver(mvebu_comphy_driver); + +MODULE_AUTHOR("Antoine Tenart <antoine.tenart@free-electrons.com>"); +MODULE_DESCRIPTION("Common PHY driver for mvebu SoCs"); +MODULE_LICENSE("GPL v2"); diff --git a/drivers/staging/irda/net/irmod.c b/drivers/staging/irda/net/irmod.c index c5e35b85c477..4319f4ff66b0 100644 --- a/drivers/staging/irda/net/irmod.c +++ b/drivers/staging/irda/net/irmod.c @@ -190,7 +190,7 @@ static void __exit irda_cleanup(void) * * Jean II */ -subsys_initcall(irda_init); +device_initcall(irda_init); module_exit(irda_cleanup); MODULE_AUTHOR("Dag Brattli <dagb@cs.uit.no> & Jean Tourrilhes <jt@hpl.hp.com>"); diff --git a/include/linux/idr.h b/include/linux/idr.h index bf70b3ef0a07..7c3a365f7e12 100644 --- a/include/linux/idr.h +++ b/include/linux/idr.h @@ -80,19 +80,75 @@ static inline void idr_set_cursor(struct idr *idr, unsigned int val) */ void idr_preload(gfp_t gfp_mask); -int idr_alloc(struct idr *, void *entry, int start, int end, gfp_t); + +int idr_alloc_cmn(struct idr *idr, void *ptr, unsigned long *index, + unsigned long start, unsigned long end, gfp_t gfp, + bool ext); + +/** + * idr_alloc - allocate an id + * @idr: idr handle + * @ptr: pointer to be associated with the new id + * @start: the minimum id (inclusive) + * @end: the maximum id (exclusive) + * @gfp: memory allocation flags + * + * Allocates an unused ID in the range [start, end). Returns -ENOSPC + * if there are no unused IDs in that range. + * + * Note that @end is treated as max when <= 0. This is to always allow + * using @start + N as @end as long as N is inside integer range. + * + * Simultaneous modifications to the @idr are not allowed and should be + * prevented by the user, usually with a lock. idr_alloc() may be called + * concurrently with read-only accesses to the @idr, such as idr_find() and + * idr_for_each_entry(). + */ +static inline int idr_alloc(struct idr *idr, void *ptr, + int start, int end, gfp_t gfp) +{ + unsigned long id; + int ret; + + if (WARN_ON_ONCE(start < 0)) + return -EINVAL; + + ret = idr_alloc_cmn(idr, ptr, &id, start, end, gfp, false); + + if (ret) + return ret; + + return id; +} + +static inline int idr_alloc_ext(struct idr *idr, void *ptr, + unsigned long *index, + unsigned long start, + unsigned long end, + gfp_t gfp) +{ + return idr_alloc_cmn(idr, ptr, index, start, end, gfp, true); +} + int idr_alloc_cyclic(struct idr *, void *entry, int start, int end, gfp_t); int idr_for_each(const struct idr *, int (*fn)(int id, void *p, void *data), void *data); void *idr_get_next(struct idr *, int *nextid); +void *idr_get_next_ext(struct idr *idr, unsigned long *nextid); void *idr_replace(struct idr *, void *, int id); +void *idr_replace_ext(struct idr *idr, void *ptr, unsigned long id); void idr_destroy(struct idr *); -static inline void *idr_remove(struct idr *idr, int id) +static inline void *idr_remove_ext(struct idr *idr, unsigned long id) { return radix_tree_delete_item(&idr->idr_rt, id, NULL); } +static inline void *idr_remove(struct idr *idr, int id) +{ + return idr_remove_ext(idr, id); +} + static inline void idr_init(struct idr *idr) { INIT_RADIX_TREE(&idr->idr_rt, IDR_RT_MARKER); @@ -128,11 +184,16 @@ static inline void idr_preload_end(void) * This function can be called under rcu_read_lock(), given that the leaf * pointers lifetimes are correctly managed. */ -static inline void *idr_find(const struct idr *idr, int id) +static inline void *idr_find_ext(const struct idr *idr, unsigned long id) { return radix_tree_lookup(&idr->idr_rt, id); } +static inline void *idr_find(const struct idr *idr, int id) +{ + return idr_find_ext(idr, id); +} + /** * idr_for_each_entry - iterate over an idr's elements of a given type * @idr: idr handle @@ -145,6 +206,8 @@ static inline void *idr_find(const struct idr *idr, int id) */ #define idr_for_each_entry(idr, entry, id) \ for (id = 0; ((entry) = idr_get_next(idr, &(id))) != NULL; ++id) +#define idr_for_each_entry_ext(idr, entry, id) \ + for (id = 0; ((entry) = idr_get_next_ext(idr, &(id))) != NULL; ++id) /** * idr_for_each_entry_continue - continue iteration over an idr's elements of a given type diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index ae7d09b9c52f..3d5d32e5446c 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -602,7 +602,7 @@ struct mlx5_ifc_per_protocol_networking_offload_caps_bits { u8 reserved_at_1a[0x1]; u8 tunnel_lso_const_out_ip_id[0x1]; u8 reserved_at_1c[0x2]; - u8 tunnel_statless_gre[0x1]; + u8 tunnel_stateless_gre[0x1]; u8 tunnel_stateless_vxlan[0x1]; u8 swp[0x1]; diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 29bf06ff157c..35de8312e0b5 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1124,8 +1124,8 @@ struct xfrmdev_ops { * This function is used to submit a XDP packet for transmit on a * netdevice. * void (*ndo_xdp_flush)(struct net_device *dev); - * This function is used to inform the driver to flush a paticular - * xpd tx queue. Must be called on same CPU as xdp_xmit. + * This function is used to inform the driver to flush a particular + * xdp tx queue. Must be called on same CPU as xdp_xmit. */ struct net_device_ops { int (*ndo_init)(struct net_device *dev); diff --git a/include/linux/phy/phy.h b/include/linux/phy/phy.h index 78bb0d7f6b11..e694d4008c4a 100644 --- a/include/linux/phy/phy.h +++ b/include/linux/phy/phy.h @@ -27,6 +27,8 @@ enum phy_mode { PHY_MODE_USB_HOST, PHY_MODE_USB_DEVICE, PHY_MODE_USB_OTG, + PHY_MODE_SGMII, + PHY_MODE_10GKR, }; /** diff --git a/include/linux/radix-tree.h b/include/linux/radix-tree.h index 3e5735064b71..567ebb5eaab0 100644 --- a/include/linux/radix-tree.h +++ b/include/linux/radix-tree.h @@ -357,8 +357,25 @@ int radix_tree_split(struct radix_tree_root *, unsigned long index, unsigned new_order); int radix_tree_join(struct radix_tree_root *, unsigned long index, unsigned new_order, void *); -void __rcu **idr_get_free(struct radix_tree_root *, struct radix_tree_iter *, - gfp_t, int end); + +void __rcu **idr_get_free_cmn(struct radix_tree_root *root, + struct radix_tree_iter *iter, gfp_t gfp, + unsigned long max); +static inline void __rcu **idr_get_free(struct radix_tree_root *root, + struct radix_tree_iter *iter, + gfp_t gfp, + int end) +{ + return idr_get_free_cmn(root, iter, gfp, end > 0 ? end - 1 : INT_MAX); +} + +static inline void __rcu **idr_get_free_ext(struct radix_tree_root *root, + struct radix_tree_iter *iter, + gfp_t gfp, + unsigned long end) +{ + return idr_get_free_cmn(root, iter, gfp, end - 1); +} enum { RADIX_TREE_ITER_TAG_MASK = 0x0f, /* tag index in lower nybble */ diff --git a/include/linux/tcp.h b/include/linux/tcp.h index 267164a1d559..4aa40ef02d32 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -148,6 +148,12 @@ struct tcp_sock { u16 gso_segs; /* Max number of segs per GSO packet */ /* + * Header prediction flags + * 0x5?10 << 16 + snd_wnd in net byte order + */ + __be32 pred_flags; + +/* * RFC793 variables by their proper names. This means you can * read the code and the spec side by side (and laugh ...) * See RFC793 and RFC1122. The RFC writes these in capitals. diff --git a/include/net/act_api.h b/include/net/act_api.h index 26ffd8333f50..8f3d5d8b5ae0 100644 --- a/include/net/act_api.h +++ b/include/net/act_api.h @@ -10,12 +10,9 @@ #include <net/net_namespace.h> #include <net/netns/generic.h> - -struct tcf_hashinfo { - struct hlist_head *htab; - unsigned int hmask; - spinlock_t lock; - u32 index; +struct tcf_idrinfo { + spinlock_t lock; + struct idr action_idr; }; struct tc_action_ops; @@ -25,9 +22,8 @@ struct tc_action { __u32 type; /* for backward compat(TCA_OLD_COMPAT) */ __u32 order; struct list_head list; - struct tcf_hashinfo *hinfo; + struct tcf_idrinfo *idrinfo; - struct hlist_node tcfa_head; u32 tcfa_index; int tcfa_refcnt; int tcfa_bindcnt; @@ -44,7 +40,6 @@ struct tc_action { struct tc_cookie *act_cookie; struct tcf_chain *goto_chain; }; -#define tcf_head common.tcfa_head #define tcf_index common.tcfa_index #define tcf_refcnt common.tcfa_refcnt #define tcf_bindcnt common.tcfa_bindcnt @@ -57,27 +52,6 @@ struct tc_action { #define tcf_lock common.tcfa_lock #define tcf_rcu common.tcfa_rcu -static inline unsigned int tcf_hash(u32 index, unsigned int hmask) -{ - return index & hmask; -} - -static inline int tcf_hashinfo_init(struct tcf_hashinfo *hf, unsigned int mask) -{ - int i; - - spin_lock_init(&hf->lock); - hf->index = 0; - hf->hmask = mask; - hf->htab = kzalloc((mask + 1) * sizeof(struct hlist_head), - GFP_KERNEL); - if (!hf->htab) - return -ENOMEM; - for (i = 0; i < mask + 1; i++) - INIT_HLIST_HEAD(&hf->htab[i]); - return 0; -} - /* Update lastuse only if needed, to avoid dirtying a cache line. * We use a temp variable to avoid fetching jiffies twice. */ @@ -126,53 +100,51 @@ struct tc_action_ops { }; struct tc_action_net { - struct tcf_hashinfo *hinfo; + struct tcf_idrinfo *idrinfo; const struct tc_action_ops *ops; }; static inline int tc_action_net_init(struct tc_action_net *tn, - const struct tc_action_ops *ops, unsigned int mask) + const struct tc_action_ops *ops) { int err = 0; - tn->hinfo = kmalloc(sizeof(*tn->hinfo), GFP_KERNEL); - if (!tn->hinfo) + tn->idrinfo = kmalloc(sizeof(*tn->idrinfo), GFP_KERNEL); + if (!tn->idrinfo) return -ENOMEM; tn->ops = ops; - err = tcf_hashinfo_init(tn->hinfo, mask); - if (err) - kfree(tn->hinfo); + spin_lock_init(&tn->idrinfo->lock); + idr_init(&tn->idrinfo->action_idr); return err; } -void tcf_hashinfo_destroy(const struct tc_action_ops *ops, - struct tcf_hashinfo *hinfo); +void tcf_idrinfo_destroy(const struct tc_action_ops *ops, + struct tcf_idrinfo *idrinfo); static inline void tc_action_net_exit(struct tc_action_net *tn) { - tcf_hashinfo_destroy(tn->ops, tn->hinfo); - kfree(tn->hinfo); + tcf_idrinfo_destroy(tn->ops, tn->idrinfo); + kfree(tn->idrinfo); } int tcf_generic_walker(struct tc_action_net *tn, struct sk_buff *skb, struct netlink_callback *cb, int type, const struct tc_action_ops *ops); -int tcf_hash_search(struct tc_action_net *tn, struct tc_action **a, u32 index); -u32 tcf_hash_new_index(struct tc_action_net *tn); -bool tcf_hash_check(struct tc_action_net *tn, u32 index, struct tc_action **a, +int tcf_idr_search(struct tc_action_net *tn, struct tc_action **a, u32 index); +bool tcf_idr_check(struct tc_action_net *tn, u32 index, struct tc_action **a, int bind); -int tcf_hash_create(struct tc_action_net *tn, u32 index, struct nlattr *est, - struct tc_action **a, const struct tc_action_ops *ops, int bind, - bool cpustats); -void tcf_hash_cleanup(struct tc_action *a, struct nlattr *est); -void tcf_hash_insert(struct tc_action_net *tn, struct tc_action *a); +int tcf_idr_create(struct tc_action_net *tn, u32 index, struct nlattr *est, + struct tc_action **a, const struct tc_action_ops *ops, + int bind, bool cpustats); +void tcf_idr_cleanup(struct tc_action *a, struct nlattr *est); +void tcf_idr_insert(struct tc_action_net *tn, struct tc_action *a); -int __tcf_hash_release(struct tc_action *a, bool bind, bool strict); +int __tcf_idr_release(struct tc_action *a, bool bind, bool strict); -static inline int tcf_hash_release(struct tc_action *a, bool bind) +static inline int tcf_idr_release(struct tc_action *a, bool bind) { - return __tcf_hash_release(a, bind, false); + return __tcf_idr_release(a, bind, false); } int tcf_register_action(struct tc_action_ops *a, struct pernet_operations *ops); diff --git a/include/net/devlink.h b/include/net/devlink.h index aaf7178127a2..b9654e133599 100644 --- a/include/net/devlink.h +++ b/include/net/devlink.h @@ -330,6 +330,7 @@ int devlink_dpipe_match_put(struct sk_buff *skb, struct devlink_dpipe_match *match); extern struct devlink_dpipe_header devlink_dpipe_header_ethernet; extern struct devlink_dpipe_header devlink_dpipe_header_ipv4; +extern struct devlink_dpipe_header devlink_dpipe_header_ipv6; #else diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h index c30b634c5f82..d6247a3c40df 100644 --- a/include/net/sch_generic.h +++ b/include/net/sch_generic.h @@ -217,6 +217,7 @@ struct tcf_proto_ops { void **, bool); int (*delete)(struct tcf_proto*, void *, bool*); void (*walk)(struct tcf_proto*, struct tcf_walker *arg); + void (*bind_class)(void *, u32, unsigned long); /* rtnetlink specific */ int (*dump)(struct net*, struct tcf_proto*, void *, diff --git a/include/net/tcp.h b/include/net/tcp.h index c614ff135b66..9c3db054e47f 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -634,6 +634,29 @@ static inline u32 __tcp_set_rto(const struct tcp_sock *tp) return usecs_to_jiffies((tp->srtt_us >> 3) + tp->rttvar_us); } +static inline void __tcp_fast_path_on(struct tcp_sock *tp, u32 snd_wnd) +{ + tp->pred_flags = htonl((tp->tcp_header_len << 26) | + ntohl(TCP_FLAG_ACK) | + snd_wnd); +} + +static inline void tcp_fast_path_on(struct tcp_sock *tp) +{ + __tcp_fast_path_on(tp, tp->snd_wnd >> tp->rx_opt.snd_wscale); +} + +static inline void tcp_fast_path_check(struct sock *sk) +{ + struct tcp_sock *tp = tcp_sk(sk); + + if (RB_EMPTY_ROOT(&tp->out_of_order_queue) && + tp->rcv_wnd && + atomic_read(&sk->sk_rmem_alloc) < sk->sk_rcvbuf && + !tp->urg_data) + tcp_fast_path_on(tp); +} + /* Compute the actual rto_min value */ static inline u32 tcp_rto_min(struct sock *sk) { @@ -910,8 +933,9 @@ enum tcp_ca_event { /* Information about inbound ACK, passed to cong_ops->in_ack_event() */ enum tcp_ca_ack_event_flags { - CA_ACK_WIN_UPDATE = (1 << 0), /* ACK updated window */ - CA_ACK_ECE = (1 << 1), /* ECE bit is set on ack */ + CA_ACK_SLOWPATH = (1 << 0), /* In slow path processing */ + CA_ACK_WIN_UPDATE = (1 << 1), /* ACK updated window */ + CA_ACK_ECE = (1 << 2), /* ECE bit is set on ack */ }; /* diff --git a/include/net/xfrm.h b/include/net/xfrm.h index 9c7b70cce6d6..f002a2c5e33c 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -1019,6 +1019,7 @@ struct xfrm_offload { #define CRYPTO_FALLBACK 8 #define XFRM_GSO_SEGMENT 16 #define XFRM_GRO 32 +#define XFRM_ESP_NO_TRAILER 64 __u32 status; #define CRYPTO_SUCCESS 1 diff --git a/include/trace/events/bridge.h b/include/trace/events/bridge.h index 0f1cde00af11..1bee3e7fdf32 100644 --- a/include/trace/events/bridge.h +++ b/include/trace/events/bridge.h @@ -92,6 +92,37 @@ TRACE_EVENT(fdb_delete, __entry->addr[4], __entry->addr[5], __entry->vid) ); +TRACE_EVENT(br_fdb_update, + + TP_PROTO(struct net_bridge *br, struct net_bridge_port *source, + const unsigned char *addr, u16 vid, bool added_by_user), + + TP_ARGS(br, source, addr, vid, added_by_user), + + TP_STRUCT__entry( + __string(br_dev, br->dev->name) + __string(dev, source->dev->name) + __array(unsigned char, addr, ETH_ALEN) + __field(u16, vid) + __field(bool, added_by_user) + ), + + TP_fast_assign( + __assign_str(br_dev, br->dev->name); + __assign_str(dev, source->dev->name); + memcpy(__entry->addr, addr, ETH_ALEN); + __entry->vid = vid; + __entry->added_by_user = added_by_user; + ), + + TP_printk("br_dev %s source %s addr %02x:%02x:%02x:%02x:%02x:%02x vid %u added_by_user %d", + __get_str(br_dev), __get_str(dev), __entry->addr[0], + __entry->addr[1], __entry->addr[2], __entry->addr[3], + __entry->addr[4], __entry->addr[5], __entry->vid, + __entry->added_by_user) +); + + #endif /* _TRACE_BRIDGE_H */ /* This part must be outside protection */ diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 08c206a863e1..ba848b761cfb 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -758,6 +758,8 @@ struct bpf_sock { __u32 family; __u32 type; __u32 protocol; + __u32 mark; + __u32 priority; }; #define XDP_PACKET_HEADROOM 256 diff --git a/include/uapi/linux/devlink.h b/include/uapi/linux/devlink.h index 6c172548589d..0cbca96c66b9 100644 --- a/include/uapi/linux/devlink.h +++ b/include/uapi/linux/devlink.h @@ -234,9 +234,14 @@ enum devlink_dpipe_field_ipv4_id { DEVLINK_DPIPE_FIELD_IPV4_DST_IP, }; +enum devlink_dpipe_field_ipv6_id { + DEVLINK_DPIPE_FIELD_IPV6_DST_IP, +}; + enum devlink_dpipe_header_id { DEVLINK_DPIPE_HEADER_ETHERNET, DEVLINK_DPIPE_HEADER_IPV4, + DEVLINK_DPIPE_HEADER_IPV6, }; #endif /* _UAPI_LINUX_DEVLINK_H_ */ diff --git a/include/uapi/linux/if_arp.h b/include/uapi/linux/if_arp.h index cf73510b9238..a2a635620600 100644 --- a/include/uapi/linux/if_arp.h +++ b/include/uapi/linux/if_arp.h @@ -59,6 +59,7 @@ #define ARPHRD_LAPB 516 /* LAPB */ #define ARPHRD_DDCMP 517 /* Digital's DDCMP protocol */ #define ARPHRD_RAWHDLC 518 /* Raw HDLC */ +#define ARPHRD_RAWIP 519 /* Raw IP */ #define ARPHRD_TUNNEL 768 /* IPIP tunnel */ #define ARPHRD_TUNNEL6 769 /* IP6IP6 tunnel */ diff --git a/include/uapi/linux/if_ether.h b/include/uapi/linux/if_ether.h index 61f7ccce5b69..9037065e23d0 100644 --- a/include/uapi/linux/if_ether.h +++ b/include/uapi/linux/if_ether.h @@ -140,6 +140,9 @@ #define ETH_P_IEEE802154 0x00F6 /* IEEE802.15.4 frame */ #define ETH_P_CAIF 0x00F7 /* ST-Ericsson CAIF protocol */ #define ETH_P_XDSA 0x00F8 /* Multiplexed DSA protocol */ +#define ETH_P_MAP 0x00F9 /* Qualcomm multiplexing and + * aggregation protocol + */ /* * This is an Ethernet frame header. diff --git a/include/uapi/linux/snmp.h b/include/uapi/linux/snmp.h index b3f346fb9fe3..758f12b58541 100644 --- a/include/uapi/linux/snmp.h +++ b/include/uapi/linux/snmp.h @@ -184,7 +184,9 @@ enum LINUX_MIB_DELAYEDACKLOST, /* DelayedACKLost */ LINUX_MIB_LISTENOVERFLOWS, /* ListenOverflows */ LINUX_MIB_LISTENDROPS, /* ListenDrops */ + LINUX_MIB_TCPHPHITS, /* TCPHPHits */ LINUX_MIB_TCPPUREACKS, /* TCPPureAcks */ + LINUX_MIB_TCPHPACKS, /* TCPHPAcks */ LINUX_MIB_TCPRENORECOVERY, /* TCPRenoRecovery */ LINUX_MIB_TCPSACKRECOVERY, /* TCPSackRecovery */ LINUX_MIB_TCPSACKRENEGING, /* TCPSACKReneging */ diff --git a/kernel/bpf/bpf_lru_list.h b/kernel/bpf/bpf_lru_list.h index 5c35a98d02bf..7d4f89b7cb84 100644 --- a/kernel/bpf/bpf_lru_list.h +++ b/kernel/bpf/bpf_lru_list.h @@ -69,7 +69,8 @@ static inline void bpf_lru_node_set_ref(struct bpf_lru_node *node) /* ref is an approximation on access frequency. It does not * have to be very accurate. Hence, no protection is used. */ - node->ref = 1; + if (!node->ref) + node->ref = 1; } int bpf_lru_init(struct bpf_lru *lru, bool percpu, u32 hash_offset, diff --git a/kernel/bpf/hashtab.c b/kernel/bpf/hashtab.c index d246905f2bb1..431126f31ea3 100644 --- a/kernel/bpf/hashtab.c +++ b/kernel/bpf/hashtab.c @@ -514,6 +514,29 @@ static void *htab_lru_map_lookup_elem(struct bpf_map *map, void *key) return NULL; } +static u32 htab_lru_map_gen_lookup(struct bpf_map *map, + struct bpf_insn *insn_buf) +{ + struct bpf_insn *insn = insn_buf; + const int ret = BPF_REG_0; + const int ref_reg = BPF_REG_1; + + *insn++ = BPF_EMIT_CALL((u64 (*)(u64, u64, u64, u64, u64))__htab_map_lookup_elem); + *insn++ = BPF_JMP_IMM(BPF_JEQ, ret, 0, 4); + *insn++ = BPF_LDX_MEM(BPF_B, ref_reg, ret, + offsetof(struct htab_elem, lru_node) + + offsetof(struct bpf_lru_node, ref)); + *insn++ = BPF_JMP_IMM(BPF_JNE, ref_reg, 0, 1); + *insn++ = BPF_ST_MEM(BPF_B, ret, + offsetof(struct htab_elem, lru_node) + + offsetof(struct bpf_lru_node, ref), + 1); + *insn++ = BPF_ALU64_IMM(BPF_ADD, ret, + offsetof(struct htab_elem, key) + + round_up(map->key_size, 8)); + return insn - insn_buf; +} + /* It is called from the bpf_lru_list when the LRU needs to delete * older elements from the htab. */ @@ -1137,6 +1160,7 @@ const struct bpf_map_ops htab_lru_map_ops = { .map_lookup_elem = htab_lru_map_lookup_elem, .map_update_elem = htab_lru_map_update_elem, .map_delete_elem = htab_lru_map_delete_elem, + .map_gen_lookup = htab_lru_map_gen_lookup, }; /* Called from eBPF program */ diff --git a/lib/idr.c b/lib/idr.c index b13682bb0a1c..082778cf883e 100644 --- a/lib/idr.c +++ b/lib/idr.c @@ -7,45 +7,32 @@ DEFINE_PER_CPU(struct ida_bitmap *, ida_bitmap); static DEFINE_SPINLOCK(simple_ida_lock); -/** - * idr_alloc - allocate an id - * @idr: idr handle - * @ptr: pointer to be associated with the new id - * @start: the minimum id (inclusive) - * @end: the maximum id (exclusive) - * @gfp: memory allocation flags - * - * Allocates an unused ID in the range [start, end). Returns -ENOSPC - * if there are no unused IDs in that range. - * - * Note that @end is treated as max when <= 0. This is to always allow - * using @start + N as @end as long as N is inside integer range. - * - * Simultaneous modifications to the @idr are not allowed and should be - * prevented by the user, usually with a lock. idr_alloc() may be called - * concurrently with read-only accesses to the @idr, such as idr_find() and - * idr_for_each_entry(). - */ -int idr_alloc(struct idr *idr, void *ptr, int start, int end, gfp_t gfp) +int idr_alloc_cmn(struct idr *idr, void *ptr, unsigned long *index, + unsigned long start, unsigned long end, gfp_t gfp, + bool ext) { - void __rcu **slot; struct radix_tree_iter iter; + void __rcu **slot; - if (WARN_ON_ONCE(start < 0)) - return -EINVAL; if (WARN_ON_ONCE(radix_tree_is_internal_node(ptr))) return -EINVAL; radix_tree_iter_init(&iter, start); - slot = idr_get_free(&idr->idr_rt, &iter, gfp, end); + if (ext) + slot = idr_get_free_ext(&idr->idr_rt, &iter, gfp, end); + else + slot = idr_get_free(&idr->idr_rt, &iter, gfp, end); if (IS_ERR(slot)) return PTR_ERR(slot); radix_tree_iter_replace(&idr->idr_rt, &iter, slot, ptr); radix_tree_iter_tag_clear(&idr->idr_rt, &iter, IDR_FREE); - return iter.index; + + if (index) + *index = iter.index; + return 0; } -EXPORT_SYMBOL_GPL(idr_alloc); +EXPORT_SYMBOL_GPL(idr_alloc_cmn); /** * idr_alloc_cyclic - allocate new idr entry in a cyclical fashion @@ -134,6 +121,20 @@ void *idr_get_next(struct idr *idr, int *nextid) } EXPORT_SYMBOL(idr_get_next); +void *idr_get_next_ext(struct idr *idr, unsigned long *nextid) +{ + struct radix_tree_iter iter; + void __rcu **slot; + + slot = radix_tree_iter_find(&idr->idr_rt, &iter, *nextid); + if (!slot) + return NULL; + + *nextid = iter.index; + return rcu_dereference_raw(*slot); +} +EXPORT_SYMBOL(idr_get_next_ext); + /** * idr_replace - replace pointer for given id * @idr: idr handle @@ -150,12 +151,19 @@ EXPORT_SYMBOL(idr_get_next); */ void *idr_replace(struct idr *idr, void *ptr, int id) { + if (WARN_ON_ONCE(id < 0)) + return ERR_PTR(-EINVAL); + + return idr_replace_ext(idr, ptr, id); +} +EXPORT_SYMBOL(idr_replace); + +void *idr_replace_ext(struct idr *idr, void *ptr, unsigned long id) +{ struct radix_tree_node *node; void __rcu **slot = NULL; void *entry; - if (WARN_ON_ONCE(id < 0)) - return ERR_PTR(-EINVAL); if (WARN_ON_ONCE(radix_tree_is_internal_node(ptr))) return ERR_PTR(-EINVAL); @@ -167,7 +175,7 @@ void *idr_replace(struct idr *idr, void *ptr, int id) return entry; } -EXPORT_SYMBOL(idr_replace); +EXPORT_SYMBOL(idr_replace_ext); /** * DOC: IDA description diff --git a/lib/radix-tree.c b/lib/radix-tree.c index 898e87998417..c191b42d1213 100644 --- a/lib/radix-tree.c +++ b/lib/radix-tree.c @@ -2137,13 +2137,13 @@ int ida_pre_get(struct ida *ida, gfp_t gfp) } EXPORT_SYMBOL(ida_pre_get); -void __rcu **idr_get_free(struct radix_tree_root *root, - struct radix_tree_iter *iter, gfp_t gfp, int end) +void __rcu **idr_get_free_cmn(struct radix_tree_root *root, + struct radix_tree_iter *iter, gfp_t gfp, + unsigned long max) { struct radix_tree_node *node = NULL, *child; void __rcu **slot = (void __rcu **)&root->rnode; unsigned long maxindex, start = iter->next_index; - unsigned long max = end > 0 ? end - 1 : INT_MAX; unsigned int shift, offset = 0; grow: diff --git a/net/bridge/br_fdb.c b/net/bridge/br_fdb.c index be5e1da6d824..4ea5c8bbe286 100644 --- a/net/bridge/br_fdb.c +++ b/net/bridge/br_fdb.c @@ -583,8 +583,10 @@ void br_fdb_update(struct net_bridge *br, struct net_bridge_port *source, fdb->updated = now; if (unlikely(added_by_user)) fdb->added_by_user = 1; - if (unlikely(fdb_modified)) + if (unlikely(fdb_modified)) { + trace_br_fdb_update(br, source, addr, vid, added_by_user); fdb_notify(br, fdb, RTM_NEWNEIGH); + } } } else { spin_lock(&br->hash_lock); @@ -593,6 +595,7 @@ void br_fdb_update(struct net_bridge *br, struct net_bridge_port *source, if (fdb) { if (unlikely(added_by_user)) fdb->added_by_user = 1; + trace_br_fdb_update(br, source, addr, vid, added_by_user); fdb_notify(br, fdb, RTM_NEWNEIGH); } } diff --git a/net/core/devlink.c b/net/core/devlink.c index 194708aa5f11..7d430c1d9c3e 100644 --- a/net/core/devlink.c +++ b/net/core/devlink.c @@ -31,7 +31,7 @@ static struct devlink_dpipe_field devlink_dpipe_fields_ethernet[] = { { - .name = "destination_mac", + .name = "destination mac", .id = DEVLINK_DPIPE_FIELD_ETHERNET_DST_MAC, .bitwidth = 48, }, @@ -63,6 +63,23 @@ struct devlink_dpipe_header devlink_dpipe_header_ipv4 = { }; EXPORT_SYMBOL(devlink_dpipe_header_ipv4); +static struct devlink_dpipe_field devlink_dpipe_fields_ipv6[] = { + { + .name = "destination ip", + .id = DEVLINK_DPIPE_FIELD_IPV6_DST_IP, + .bitwidth = 128, + }, +}; + +struct devlink_dpipe_header devlink_dpipe_header_ipv6 = { + .name = "ipv6", + .id = DEVLINK_DPIPE_HEADER_IPV6, + .fields = devlink_dpipe_fields_ipv6, + .fields_count = ARRAY_SIZE(devlink_dpipe_fields_ipv6), + .global = true, +}; +EXPORT_SYMBOL(devlink_dpipe_header_ipv6); + EXPORT_TRACEPOINT_SYMBOL_GPL(devlink_hwmsg); static LIST_HEAD(devlink_list); diff --git a/net/core/filter.c b/net/core/filter.c index c6a37fe0285b..9dad3e7e2e10 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -3150,6 +3150,20 @@ bpf_base_func_proto(enum bpf_func_id func_id) } static const struct bpf_func_proto * +sock_filter_func_proto(enum bpf_func_id func_id) +{ + switch (func_id) { + /* inet and inet6 sockets are created in a process + * context so there is always a valid uid/gid + */ + case BPF_FUNC_get_current_uid_gid: + return &bpf_get_current_uid_gid_proto; + default: + return bpf_base_func_proto(func_id); + } +} + +static const struct bpf_func_proto * sk_filter_func_proto(enum bpf_func_id func_id) { switch (func_id) { @@ -3455,6 +3469,10 @@ static bool sock_filter_is_valid_access(int off, int size, switch (off) { case offsetof(struct bpf_sock, bound_dev_if): break; + case offsetof(struct bpf_sock, mark): + break; + case offsetof(struct bpf_sock, priority): + break; default: return false; } @@ -3958,6 +3976,28 @@ static u32 sock_filter_convert_ctx_access(enum bpf_access_type type, offsetof(struct sock, sk_bound_dev_if)); break; + case offsetof(struct bpf_sock, mark): + BUILD_BUG_ON(FIELD_SIZEOF(struct sock, sk_mark) != 4); + + if (type == BPF_WRITE) + *insn++ = BPF_STX_MEM(BPF_W, si->dst_reg, si->src_reg, + offsetof(struct sock, sk_mark)); + else + *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg, + offsetof(struct sock, sk_mark)); + break; + + case offsetof(struct bpf_sock, priority): + BUILD_BUG_ON(FIELD_SIZEOF(struct sock, sk_priority) != 4); + + if (type == BPF_WRITE) + *insn++ = BPF_STX_MEM(BPF_W, si->dst_reg, si->src_reg, + offsetof(struct sock, sk_priority)); + else + *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg, + offsetof(struct sock, sk_priority)); + break; + case offsetof(struct bpf_sock, family): BUILD_BUG_ON(FIELD_SIZEOF(struct sock, sk_family) != 2); @@ -4207,7 +4247,7 @@ const struct bpf_verifier_ops lwt_xmit_prog_ops = { }; const struct bpf_verifier_ops cg_sock_prog_ops = { - .get_func_proto = bpf_base_func_proto, + .get_func_proto = sock_filter_func_proto, .is_valid_access = sock_filter_is_valid_access, .convert_ctx_access = sock_filter_convert_ctx_access, }; diff --git a/net/core/net-traces.c b/net/core/net-traces.c index 4a0292c97070..1132820c8e62 100644 --- a/net/core/net-traces.c +++ b/net/core/net-traces.c @@ -42,6 +42,7 @@ EXPORT_TRACEPOINT_SYMBOL_GPL(fib6_table_lookup); EXPORT_TRACEPOINT_SYMBOL_GPL(br_fdb_add); EXPORT_TRACEPOINT_SYMBOL_GPL(br_fdb_external_learn_add); EXPORT_TRACEPOINT_SYMBOL_GPL(fdb_delete); +EXPORT_TRACEPOINT_SYMBOL_GPL(br_fdb_update); #endif EXPORT_TRACEPOINT_SYMBOL_GPL(kfree_skb); diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c index 47a7b59b355e..5df7857fc0f3 100644 --- a/net/dccp/ipv6.c +++ b/net/dccp/ipv6.c @@ -16,6 +16,7 @@ #include <linux/random.h> #include <linux/slab.h> #include <linux/xfrm.h> +#include <linux/string.h> #include <net/addrconf.h> #include <net/inet_common.h> @@ -30,6 +31,7 @@ #include <net/ip6_checksum.h> #include <net/xfrm.h> #include <net/secure_seq.h> +#include <net/sock.h> #include "dccp.h" #include "ipv6.h" @@ -597,19 +599,13 @@ static int dccp_v6_do_rcv(struct sock *sk, struct sk_buff *skb) --ANK (980728) */ if (np->rxopt.all) - /* - * FIXME: Add handling of IPV6_PKTOPTIONS skb. See the comments below - * (wrt ipv6_pktopions) and net/ipv6/tcp_ipv6.c for an example. - */ opt_skb = skb_clone(skb, GFP_ATOMIC); if (sk->sk_state == DCCP_OPEN) { /* Fast path */ if (dccp_rcv_established(sk, skb, dccp_hdr(skb), skb->len)) goto reset; - if (opt_skb) { - /* XXX This is where we would goto ipv6_pktoptions. */ - __kfree_skb(opt_skb); - } + if (opt_skb) + goto ipv6_pktoptions; return 0; } @@ -640,10 +636,8 @@ static int dccp_v6_do_rcv(struct sock *sk, struct sk_buff *skb) if (dccp_rcv_state_process(sk, skb, dccp_hdr(skb), skb->len)) goto reset; - if (opt_skb) { - /* XXX This is where we would goto ipv6_pktoptions. */ - __kfree_skb(opt_skb); - } + if (opt_skb) + goto ipv6_pktoptions; return 0; reset: @@ -653,6 +647,35 @@ discard: __kfree_skb(opt_skb); kfree_skb(skb); return 0; + +/* Handling IPV6_PKTOPTIONS skb the similar + * way it's done for net/ipv6/tcp_ipv6.c + */ +ipv6_pktoptions: + if (!((1 << sk->sk_state) & (DCCPF_CLOSED | DCCPF_LISTEN))) { + if (np->rxopt.bits.rxinfo || np->rxopt.bits.rxoinfo) + np->mcast_oif = inet6_iif(opt_skb); + if (np->rxopt.bits.rxhlim || np->rxopt.bits.rxohlim) + np->mcast_hops = ipv6_hdr(opt_skb)->hop_limit; + if (np->rxopt.bits.rxflow || np->rxopt.bits.rxtclass) + np->rcv_flowinfo = ip6_flowinfo(ipv6_hdr(opt_skb)); + if (np->repflow) + np->flow_label = ip6_flowlabel(ipv6_hdr(opt_skb)); + if (ipv6_opt_accepted(sk, opt_skb, + &DCCP_SKB_CB(opt_skb)->header.h6)) { + skb_set_owner_r(opt_skb, sk); + memmove(IP6CB(opt_skb), + &DCCP_SKB_CB(opt_skb)->header.h6, + sizeof(struct inet6_skb_parm)); + opt_skb = xchg(&np->pktoptions, opt_skb); + } else { + __kfree_skb(opt_skb); + opt_skb = xchg(&np->pktoptions, NULL); + } + } + + kfree_skb(opt_skb); + return 0; } static int dccp_v6_rcv(struct sk_buff *skb) diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c index 741acd7b9646..319000573bc7 100644 --- a/net/ipv4/esp4.c +++ b/net/ipv4/esp4.c @@ -499,19 +499,59 @@ static int esp_output(struct xfrm_state *x, struct sk_buff *skb) return esp_output_tail(x, skb, &esp); } +static inline int esp_remove_trailer(struct sk_buff *skb) +{ + struct xfrm_state *x = xfrm_input_state(skb); + struct xfrm_offload *xo = xfrm_offload(skb); + struct crypto_aead *aead = x->data; + int alen, hlen, elen; + int padlen, trimlen; + __wsum csumdiff; + u8 nexthdr[2]; + int ret; + + alen = crypto_aead_authsize(aead); + hlen = sizeof(struct ip_esp_hdr) + crypto_aead_ivsize(aead); + elen = skb->len - hlen; + + if (xo && (xo->flags & XFRM_ESP_NO_TRAILER)) { + ret = xo->proto; + goto out; + } + + if (skb_copy_bits(skb, skb->len - alen - 2, nexthdr, 2)) + BUG(); + + ret = -EINVAL; + padlen = nexthdr[0]; + if (padlen + 2 + alen >= elen) { + net_dbg_ratelimited("ipsec esp packet is garbage padlen=%d, elen=%d\n", + padlen + 2, elen - alen); + goto out; + } + + trimlen = alen + padlen + 2; + if (skb->ip_summed == CHECKSUM_COMPLETE) { + csumdiff = skb_checksum(skb, skb->len - trimlen, trimlen, 0); + skb->csum = csum_block_sub(skb->csum, csumdiff, + skb->len - trimlen); + } + pskb_trim(skb, skb->len - trimlen); + + ret = nexthdr[1]; + +out: + return ret; +} + int esp_input_done2(struct sk_buff *skb, int err) { const struct iphdr *iph; struct xfrm_state *x = xfrm_input_state(skb); struct xfrm_offload *xo = xfrm_offload(skb); struct crypto_aead *aead = x->data; - int alen = crypto_aead_authsize(aead); int hlen = sizeof(struct ip_esp_hdr) + crypto_aead_ivsize(aead); - int elen = skb->len - hlen; int ihl; - u8 nexthdr[2]; - int padlen, trimlen; - __wsum csumdiff; if (!xo || (xo && !(xo->flags & CRYPTO_DONE))) kfree(ESP_SKB_CB(skb)->tmp); @@ -519,16 +559,10 @@ int esp_input_done2(struct sk_buff *skb, int err) if (unlikely(err)) goto out; - if (skb_copy_bits(skb, skb->len-alen-2, nexthdr, 2)) - BUG(); - - err = -EINVAL; - padlen = nexthdr[0]; - if (padlen + 2 + alen >= elen) + err = esp_remove_trailer(skb); + if (unlikely(err < 0)) goto out; - /* ... check padding bits here. Silly. :-) */ - iph = ip_hdr(skb); ihl = iph->ihl * 4; @@ -569,22 +603,12 @@ int esp_input_done2(struct sk_buff *skb, int err) skb->ip_summed = CHECKSUM_UNNECESSARY; } - trimlen = alen + padlen + 2; - if (skb->ip_summed == CHECKSUM_COMPLETE) { - csumdiff = skb_checksum(skb, skb->len - trimlen, trimlen, 0); - skb->csum = csum_block_sub(skb->csum, csumdiff, - skb->len - trimlen); - } - pskb_trim(skb, skb->len - trimlen); - skb_pull_rcsum(skb, hlen); if (x->props.mode == XFRM_MODE_TUNNEL) skb_reset_transport_header(skb); else skb_set_transport_header(skb, -ihl); - err = nexthdr[1]; - /* RFC4303: Drop dummy packets without any error */ if (err == IPPROTO_NONE) err = -EINVAL; diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c index b6d3fe03feb3..127153f1ed8a 100644 --- a/net/ipv4/proc.c +++ b/net/ipv4/proc.c @@ -206,7 +206,9 @@ static const struct snmp_mib snmp4_net_list[] = { SNMP_MIB_ITEM("DelayedACKLost", LINUX_MIB_DELAYEDACKLOST), SNMP_MIB_ITEM("ListenOverflows", LINUX_MIB_LISTENOVERFLOWS), SNMP_MIB_ITEM("ListenDrops", LINUX_MIB_LISTENDROPS), + SNMP_MIB_ITEM("TCPHPHits", LINUX_MIB_TCPHPHITS), SNMP_MIB_ITEM("TCPPureAcks", LINUX_MIB_TCPPUREACKS), + SNMP_MIB_ITEM("TCPHPAcks", LINUX_MIB_TCPHPACKS), SNMP_MIB_ITEM("TCPRenoRecovery", LINUX_MIB_TCPRENORECOVERY), SNMP_MIB_ITEM("TCPSackRecovery", LINUX_MIB_TCPSACKRECOVERY), SNMP_MIB_ITEM("TCPSACKReneging", LINUX_MIB_TCPSACKRENEGING), diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 566083ee2654..21ca2df274c5 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -1963,8 +1963,10 @@ int tcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int nonblock, tcp_rcv_space_adjust(sk); skip_copy: - if (tp->urg_data && after(tp->copied_seq, tp->urg_seq)) + if (tp->urg_data && after(tp->copied_seq, tp->urg_seq)) { tp->urg_data = 0; + tcp_fast_path_check(sk); + } if (used + offset < skb->len) continue; diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 7616cd76f6f6..c5d7656beeee 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -103,6 +103,7 @@ int sysctl_tcp_invalid_ratelimit __read_mostly = HZ/2; #define FLAG_DATA_SACKED 0x20 /* New SACK. */ #define FLAG_ECE 0x40 /* ECE in this ACK */ #define FLAG_LOST_RETRANS 0x80 /* This ACK marks some retransmission lost */ +#define FLAG_SLOWPATH 0x100 /* Do not skip RFC checks for window update.*/ #define FLAG_ORIG_SACK_ACKED 0x200 /* Never retransmitted data are (s)acked */ #define FLAG_SND_UNA_ADVANCED 0x400 /* Snd_una was changed (!= FLAG_DATA_ACKED) */ #define FLAG_DSACKING_ACK 0x800 /* SACK blocks contained D-SACK info */ @@ -3371,6 +3372,12 @@ static int tcp_ack_update_window(struct sock *sk, const struct sk_buff *skb, u32 if (tp->snd_wnd != nwin) { tp->snd_wnd = nwin; + /* Note, it is the only place, where + * fast path is recovered for sending TCP. + */ + tp->pred_flags = 0; + tcp_fast_path_check(sk); + if (tcp_send_head(sk)) tcp_slow_start_after_idle_check(sk); @@ -3552,7 +3559,6 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag) u32 lost = tp->lost; int acked = 0; /* Number of packets newly acked */ int rexmit = REXMIT_NONE; /* Flag to (re)transmit to recover losses */ - u32 ack_ev_flags = 0; sack_state.first_sackt = 0; sack_state.rate = &rs; @@ -3593,26 +3599,42 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag) if (flag & FLAG_UPDATE_TS_RECENT) tcp_replace_ts_recent(tp, TCP_SKB_CB(skb)->seq); - if (ack_seq != TCP_SKB_CB(skb)->end_seq) - flag |= FLAG_DATA; - else - NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPPUREACKS); + if (!(flag & FLAG_SLOWPATH) && after(ack, prior_snd_una)) { + /* Window is constant, pure forward advance. + * No more checks are required. + * Note, we use the fact that SND.UNA>=SND.WL2. + */ + tcp_update_wl(tp, ack_seq); + tcp_snd_una_update(tp, ack); + flag |= FLAG_WIN_UPDATE; - flag |= tcp_ack_update_window(sk, skb, ack, ack_seq); + tcp_in_ack_event(sk, CA_ACK_WIN_UPDATE); - if (TCP_SKB_CB(skb)->sacked) - flag |= tcp_sacktag_write_queue(sk, skb, prior_snd_una, - &sack_state); + NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPHPACKS); + } else { + u32 ack_ev_flags = CA_ACK_SLOWPATH; - if (tcp_ecn_rcv_ecn_echo(tp, tcp_hdr(skb))) { - flag |= FLAG_ECE; - ack_ev_flags = CA_ACK_ECE; - } + if (ack_seq != TCP_SKB_CB(skb)->end_seq) + flag |= FLAG_DATA; + else + NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPPUREACKS); + + flag |= tcp_ack_update_window(sk, skb, ack, ack_seq); - if (flag & FLAG_WIN_UPDATE) - ack_ev_flags |= CA_ACK_WIN_UPDATE; + if (TCP_SKB_CB(skb)->sacked) + flag |= tcp_sacktag_write_queue(sk, skb, prior_snd_una, + &sack_state); - tcp_in_ack_event(sk, ack_ev_flags); + if (tcp_ecn_rcv_ecn_echo(tp, tcp_hdr(skb))) { + flag |= FLAG_ECE; + ack_ev_flags |= CA_ACK_ECE; + } + + if (flag & FLAG_WIN_UPDATE) + ack_ev_flags |= CA_ACK_WIN_UPDATE; + + tcp_in_ack_event(sk, ack_ev_flags); + } /* We passed data and got it acked, remove any soft error * log. Something worked... @@ -4404,6 +4426,8 @@ static void tcp_data_queue_ofo(struct sock *sk, struct sk_buff *skb) if (TCP_SKB_CB(skb)->has_rxtstamp) TCP_SKB_CB(skb)->swtstamp = skb->tstamp; + /* Disable header prediction. */ + tp->pred_flags = 0; inet_csk_schedule_ack(sk); NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPOFOQUEUE); @@ -4644,6 +4668,8 @@ queue_and_out: if (tp->rx_opt.num_sacks) tcp_sack_remove(tp); + tcp_fast_path_check(sk); + if (eaten > 0) kfree_skb_partial(skb, fragstolen); if (!sock_flag(sk, SOCK_DEAD)) @@ -4969,6 +4995,7 @@ static int tcp_prune_queue(struct sock *sk) NET_INC_STATS(sock_net(sk), LINUX_MIB_RCVPRUNED); /* Massive buffer overcommit. */ + tp->pred_flags = 0; return -1; } @@ -5140,6 +5167,9 @@ static void tcp_check_urg(struct sock *sk, const struct tcphdr *th) tp->urg_data = TCP_URG_NOTYET; tp->urg_seq = ptr; + + /* Disable header prediction. */ + tp->pred_flags = 0; } /* This is the 'fast' part of urgent handling. */ @@ -5298,6 +5328,26 @@ discard: /* * TCP receive function for the ESTABLISHED state. + * + * It is split into a fast path and a slow path. The fast path is + * disabled when: + * - A zero window was announced from us - zero window probing + * is only handled properly in the slow path. + * - Out of order segments arrived. + * - Urgent data is expected. + * - There is no buffer space left + * - Unexpected TCP flags/window values/header lengths are received + * (detected by checking the TCP header against pred_flags) + * - Data is sent in both directions. Fast path only supports pure senders + * or pure receivers (this means either the sequence number or the ack + * value must stay constant) + * - Unexpected TCP option. + * + * When these conditions are not satisfied it drops into a standard + * receive procedure patterned after RFC793 to handle all cases. + * The first three cases are guaranteed by proper pred_flags setting, + * the rest is checked inline. Fast processing is turned on in + * tcp_data_queue when everything is OK. */ void tcp_rcv_established(struct sock *sk, struct sk_buff *skb, const struct tcphdr *th) @@ -5308,19 +5358,144 @@ void tcp_rcv_established(struct sock *sk, struct sk_buff *skb, tcp_mstamp_refresh(tp); if (unlikely(!sk->sk_rx_dst)) inet_csk(sk)->icsk_af_ops->sk_rx_dst_set(sk, skb); + /* + * Header prediction. + * The code loosely follows the one in the famous + * "30 instruction TCP receive" Van Jacobson mail. + * + * Van's trick is to deposit buffers into socket queue + * on a device interrupt, to call tcp_recv function + * on the receive process context and checksum and copy + * the buffer to user space. smart... + * + * Our current scheme is not silly either but we take the + * extra cost of the net_bh soft interrupt processing... + * We do checksum and copy also but from device to kernel. + */ tp->rx_opt.saw_tstamp = 0; + /* pred_flags is 0xS?10 << 16 + snd_wnd + * if header_prediction is to be made + * 'S' will always be tp->tcp_header_len >> 2 + * '?' will be 0 for the fast path, otherwise pred_flags is 0 to + * turn it off (when there are holes in the receive + * space for instance) + * PSH flag is ignored. + */ + + if ((tcp_flag_word(th) & TCP_HP_BITS) == tp->pred_flags && + TCP_SKB_CB(skb)->seq == tp->rcv_nxt && + !after(TCP_SKB_CB(skb)->ack_seq, tp->snd_nxt)) { + int tcp_header_len = tp->tcp_header_len; + + /* Timestamp header prediction: tcp_header_len + * is automatically equal to th->doff*4 due to pred_flags + * match. + */ + + /* Check timestamp */ + if (tcp_header_len == sizeof(struct tcphdr) + TCPOLEN_TSTAMP_ALIGNED) { + /* No? Slow path! */ + if (!tcp_parse_aligned_timestamp(tp, th)) + goto slow_path; + + /* If PAWS failed, check it more carefully in slow path */ + if ((s32)(tp->rx_opt.rcv_tsval - tp->rx_opt.ts_recent) < 0) + goto slow_path; + + /* DO NOT update ts_recent here, if checksum fails + * and timestamp was corrupted part, it will result + * in a hung connection since we will drop all + * future packets due to the PAWS test. + */ + } + + if (len <= tcp_header_len) { + /* Bulk data transfer: sender */ + if (len == tcp_header_len) { + /* Predicted packet is in window by definition. + * seq == rcv_nxt and rcv_wup <= rcv_nxt. + * Hence, check seq<=rcv_wup reduces to: + */ + if (tcp_header_len == + (sizeof(struct tcphdr) + TCPOLEN_TSTAMP_ALIGNED) && + tp->rcv_nxt == tp->rcv_wup) + tcp_store_ts_recent(tp); + + /* We know that such packets are checksummed + * on entry. + */ + tcp_ack(sk, skb, 0); + __kfree_skb(skb); + tcp_data_snd_check(sk); + return; + } else { /* Header too small */ + TCP_INC_STATS(sock_net(sk), TCP_MIB_INERRS); + goto discard; + } + } else { + int eaten = 0; + bool fragstolen = false; + + if (tcp_checksum_complete(skb)) + goto csum_error; + + if ((int)skb->truesize > sk->sk_forward_alloc) + goto step5; + + /* Predicted packet is in window by definition. + * seq == rcv_nxt and rcv_wup <= rcv_nxt. + * Hence, check seq<=rcv_wup reduces to: + */ + if (tcp_header_len == + (sizeof(struct tcphdr) + TCPOLEN_TSTAMP_ALIGNED) && + tp->rcv_nxt == tp->rcv_wup) + tcp_store_ts_recent(tp); + + tcp_rcv_rtt_measure_ts(sk, skb); + + NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPHPHITS); + + /* Bulk data transfer: receiver */ + eaten = tcp_queue_rcv(sk, skb, tcp_header_len, + &fragstolen); + + tcp_event_data_recv(sk, skb); + + if (TCP_SKB_CB(skb)->ack_seq != tp->snd_una) { + /* Well, only one small jumplet in fast path... */ + tcp_ack(sk, skb, FLAG_DATA); + tcp_data_snd_check(sk); + if (!inet_csk_ack_scheduled(sk)) + goto no_ack; + } + + __tcp_ack_snd_check(sk, 0); +no_ack: + if (eaten) + kfree_skb_partial(skb, fragstolen); + sk->sk_data_ready(sk); + return; + } + } + +slow_path: if (len < (th->doff << 2) || tcp_checksum_complete(skb)) goto csum_error; if (!th->ack && !th->rst && !th->syn) goto discard; + /* + * Standard slow path. + */ + if (!tcp_validate_incoming(sk, skb, th, 1)) return; - if (tcp_ack(sk, skb, FLAG_UPDATE_TS_RECENT) < 0) +step5: + if (tcp_ack(sk, skb, FLAG_SLOWPATH | FLAG_UPDATE_TS_RECENT) < 0) goto discard; tcp_rcv_rtt_measure_ts(sk, skb); @@ -5373,6 +5548,11 @@ void tcp_finish_connect(struct sock *sk, struct sk_buff *skb) if (sock_flag(sk, SOCK_KEEPOPEN)) inet_csk_reset_keepalive_timer(sk, keepalive_time_when(tp)); + + if (!tp->rx_opt.snd_wscale) + __tcp_fast_path_on(tp, tp->snd_wnd); + else + tp->pred_flags = 0; } static bool tcp_rcv_fastopen_synack(struct sock *sk, struct sk_buff *synack, @@ -5501,7 +5681,7 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb, tcp_ecn_rcv_synack(tp, th); tcp_init_wl(tp, TCP_SKB_CB(skb)->seq); - tcp_ack(sk, skb, 0); + tcp_ack(sk, skb, FLAG_SLOWPATH); /* Ok.. it's good. Set up sequence numbers and * move to established. @@ -5737,8 +5917,8 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb) return 0; /* step 5: check the ACK field */ - - acceptable = tcp_ack(sk, skb, FLAG_UPDATE_TS_RECENT | + acceptable = tcp_ack(sk, skb, FLAG_SLOWPATH | + FLAG_UPDATE_TS_RECENT | FLAG_NO_CHALLENGE_ACK) > 0; if (!acceptable) { @@ -5806,6 +5986,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb) tp->lsndtime = tcp_jiffies32; tcp_initialize_rcv_mss(sk); + tcp_fast_path_on(tp); break; case TCP_FIN_WAIT1: { diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index 1537b87c657f..188a6f31356d 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -436,6 +436,8 @@ struct sock *tcp_create_openreq_child(const struct sock *sk, struct tcp_sock *newtp = tcp_sk(newsk); /* Now setup tcp_sock */ + newtp->pred_flags = 0; + newtp->rcv_wup = newtp->copied_seq = newtp->rcv_nxt = treq->rcv_isn + 1; newtp->segs_in = 1; diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 3e0d19631534..5b6690d05abb 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -295,7 +295,9 @@ static u16 tcp_select_window(struct sock *sk) /* RFC1323 scaling applied */ new_win >>= tp->rx_opt.rcv_wscale; + /* If we advertise zero window, disable fast path. */ if (new_win == 0) { + tp->pred_flags = 0; if (old_win) NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPTOZEROWINDOWADV); diff --git a/net/ipv4/tcp_westwood.c b/net/ipv4/tcp_westwood.c index e5de84310949..bec9cafbe3f9 100644 --- a/net/ipv4/tcp_westwood.c +++ b/net/ipv4/tcp_westwood.c @@ -154,6 +154,24 @@ static inline void update_rtt_min(struct westwood *w) } /* + * @westwood_fast_bw + * It is called when we are in fast path. In particular it is called when + * header prediction is successful. In such case in fact update is + * straight forward and doesn't need any particular care. + */ +static inline void westwood_fast_bw(struct sock *sk) +{ + const struct tcp_sock *tp = tcp_sk(sk); + struct westwood *w = inet_csk_ca(sk); + + westwood_update_window(sk); + + w->bk += tp->snd_una - w->snd_una; + w->snd_una = tp->snd_una; + update_rtt_min(w); +} + +/* * @westwood_acked_count * This function evaluates cumul_ack for evaluating bk in case of * delayed or partial acks. @@ -205,12 +223,17 @@ static u32 tcp_westwood_bw_rttmin(const struct sock *sk) static void tcp_westwood_ack(struct sock *sk, u32 ack_flags) { - struct westwood *w = inet_csk_ca(sk); + if (ack_flags & CA_ACK_SLOWPATH) { + struct westwood *w = inet_csk_ca(sk); - westwood_update_window(sk); - w->bk += westwood_acked_count(sk); + westwood_update_window(sk); + w->bk += westwood_acked_count(sk); - update_rtt_min(w); + update_rtt_min(w); + return; + } + + westwood_fast_bw(sk); } static void tcp_westwood_event(struct sock *sk, enum tcp_ca_event event) diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c index 74bde202eb9a..7fb41b0ad437 100644 --- a/net/ipv6/esp6.c +++ b/net/ipv6/esp6.c @@ -461,29 +461,30 @@ static int esp6_output(struct xfrm_state *x, struct sk_buff *skb) return esp6_output_tail(x, skb, &esp); } -int esp6_input_done2(struct sk_buff *skb, int err) +static inline int esp_remove_trailer(struct sk_buff *skb) { struct xfrm_state *x = xfrm_input_state(skb); struct xfrm_offload *xo = xfrm_offload(skb); struct crypto_aead *aead = x->data; - int alen = crypto_aead_authsize(aead); - int hlen = sizeof(struct ip_esp_hdr) + crypto_aead_ivsize(aead); - int elen = skb->len - hlen; - int hdr_len = skb_network_header_len(skb); + int alen, hlen, elen; int padlen, trimlen; __wsum csumdiff; u8 nexthdr[2]; + int ret; - if (!xo || (xo && !(xo->flags & CRYPTO_DONE))) - kfree(ESP_SKB_CB(skb)->tmp); + alen = crypto_aead_authsize(aead); + hlen = sizeof(struct ip_esp_hdr) + crypto_aead_ivsize(aead); + elen = skb->len - hlen; - if (unlikely(err)) + if (xo && (xo->flags & XFRM_ESP_NO_TRAILER)) { + ret = xo->proto; goto out; + } if (skb_copy_bits(skb, skb->len - alen - 2, nexthdr, 2)) BUG(); - err = -EINVAL; + ret = -EINVAL; padlen = nexthdr[0]; if (padlen + 2 + alen >= elen) { net_dbg_ratelimited("ipsec esp packet is garbage padlen=%d, elen=%d\n", @@ -491,26 +492,46 @@ int esp6_input_done2(struct sk_buff *skb, int err) goto out; } - /* ... check padding bits here. Silly. :-) */ - trimlen = alen + padlen + 2; if (skb->ip_summed == CHECKSUM_COMPLETE) { - skb_postpull_rcsum(skb, skb_network_header(skb), - skb_network_header_len(skb)); csumdiff = skb_checksum(skb, skb->len - trimlen, trimlen, 0); skb->csum = csum_block_sub(skb->csum, csumdiff, skb->len - trimlen); } pskb_trim(skb, skb->len - trimlen); + ret = nexthdr[1]; + +out: + return ret; +} + +int esp6_input_done2(struct sk_buff *skb, int err) +{ + struct xfrm_state *x = xfrm_input_state(skb); + struct xfrm_offload *xo = xfrm_offload(skb); + struct crypto_aead *aead = x->data; + int hlen = sizeof(struct ip_esp_hdr) + crypto_aead_ivsize(aead); + int hdr_len = skb_network_header_len(skb); + + if (!xo || (xo && !(xo->flags & CRYPTO_DONE))) + kfree(ESP_SKB_CB(skb)->tmp); + + if (unlikely(err)) + goto out; + + err = esp_remove_trailer(skb); + if (unlikely(err < 0)) + goto out; + + skb_postpull_rcsum(skb, skb_network_header(skb), + skb_network_header_len(skb)); skb_pull_rcsum(skb, hlen); if (x->props.mode == XFRM_MODE_TUNNEL) skb_reset_transport_header(skb); else skb_set_transport_header(skb, -hdr_len); - err = nexthdr[1]; - /* RFC4303: Drop dummy packets without any error */ if (err == IPPROTO_NONE) err = -EINVAL; diff --git a/net/ipv6/seg6_local.c b/net/ipv6/seg6_local.c index 9c1a885ee482..7ff54db73a48 100644 --- a/net/ipv6/seg6_local.c +++ b/net/ipv6/seg6_local.c @@ -62,17 +62,23 @@ static struct seg6_local_lwt *seg6_local_lwtunnel(struct lwtunnel_state *lwt) static struct ipv6_sr_hdr *get_srh(struct sk_buff *skb) { struct ipv6_sr_hdr *srh; - struct ipv6hdr *hdr; - int len; + int len, srhoff = 0; + + if (ipv6_find_hdr(skb, &srhoff, IPPROTO_ROUTING, NULL, NULL) < 0) + return NULL; + + if (!pskb_may_pull(skb, srhoff + sizeof(*srh))) + return NULL; + + srh = (struct ipv6_sr_hdr *)(skb->data + srhoff); - hdr = ipv6_hdr(skb); - if (hdr->nexthdr != IPPROTO_ROUTING) + /* make sure it's a Segment Routing header (Routing Type 4) */ + if (srh->type != IPV6_SRCRT_TYPE_4) return NULL; - srh = (struct ipv6_sr_hdr *)(hdr + 1); len = (srh->hdrlen + 1) << 3; - if (!pskb_may_pull(skb, sizeof(*hdr) + len)) + if (!pskb_may_pull(skb, srhoff + len)) return NULL; if (!seg6_validate_srh(srh, len)) diff --git a/net/sched/act_api.c b/net/sched/act_api.c index 02fcb0c78a28..0eb545bcb247 100644 --- a/net/sched/act_api.c +++ b/net/sched/act_api.c @@ -70,11 +70,11 @@ static void free_tcf(struct rcu_head *head) kfree(p); } -static void tcf_hash_destroy(struct tcf_hashinfo *hinfo, struct tc_action *p) +static void tcf_idr_remove(struct tcf_idrinfo *idrinfo, struct tc_action *p) { - spin_lock_bh(&hinfo->lock); - hlist_del(&p->tcfa_head); - spin_unlock_bh(&hinfo->lock); + spin_lock_bh(&idrinfo->lock); + idr_remove_ext(&idrinfo->action_idr, p->tcfa_index); + spin_unlock_bh(&idrinfo->lock); gen_kill_estimator(&p->tcfa_rate_est); /* * gen_estimator est_timer() might access p->tcfa_lock @@ -83,7 +83,7 @@ static void tcf_hash_destroy(struct tcf_hashinfo *hinfo, struct tc_action *p) call_rcu(&p->tcfa_rcu, free_tcf); } -int __tcf_hash_release(struct tc_action *p, bool bind, bool strict) +int __tcf_idr_release(struct tc_action *p, bool bind, bool strict) { int ret = 0; @@ -97,64 +97,60 @@ int __tcf_hash_release(struct tc_action *p, bool bind, bool strict) if (p->tcfa_bindcnt <= 0 && p->tcfa_refcnt <= 0) { if (p->ops->cleanup) p->ops->cleanup(p, bind); - tcf_hash_destroy(p->hinfo, p); + tcf_idr_remove(p->idrinfo, p); ret = ACT_P_DELETED; } } return ret; } -EXPORT_SYMBOL(__tcf_hash_release); +EXPORT_SYMBOL(__tcf_idr_release); -static int tcf_dump_walker(struct tcf_hashinfo *hinfo, struct sk_buff *skb, +static int tcf_dump_walker(struct tcf_idrinfo *idrinfo, struct sk_buff *skb, struct netlink_callback *cb) { - int err = 0, index = -1, i = 0, s_i = 0, n_i = 0; + int err = 0, index = -1, s_i = 0, n_i = 0; u32 act_flags = cb->args[2]; unsigned long jiffy_since = cb->args[3]; struct nlattr *nest; + struct idr *idr = &idrinfo->action_idr; + struct tc_action *p; + unsigned long id = 1; - spin_lock_bh(&hinfo->lock); + spin_lock_bh(&idrinfo->lock); s_i = cb->args[0]; - for (i = 0; i < (hinfo->hmask + 1); i++) { - struct hlist_head *head; - struct tc_action *p; - - head = &hinfo->htab[tcf_hash(i, hinfo->hmask)]; - - hlist_for_each_entry_rcu(p, head, tcfa_head) { - index++; - if (index < s_i) - continue; - - if (jiffy_since && - time_after(jiffy_since, - (unsigned long)p->tcfa_tm.lastuse)) - continue; - - nest = nla_nest_start(skb, n_i); - if (nest == NULL) - goto nla_put_failure; - err = tcf_action_dump_1(skb, p, 0, 0); - if (err < 0) { - index--; - nlmsg_trim(skb, nest); - goto done; - } - nla_nest_end(skb, nest); - n_i++; - if (!(act_flags & TCA_FLAG_LARGE_DUMP_ON) && - n_i >= TCA_ACT_MAX_PRIO) - goto done; + idr_for_each_entry_ext(idr, p, id) { + index++; + if (index < s_i) + continue; + + if (jiffy_since && + time_after(jiffy_since, + (unsigned long)p->tcfa_tm.lastuse)) + continue; + + nest = nla_nest_start(skb, n_i); + if (!nest) + goto nla_put_failure; + err = tcf_action_dump_1(skb, p, 0, 0); + if (err < 0) { + index--; + nlmsg_trim(skb, nest); + goto done; } + nla_nest_end(skb, nest); + n_i++; + if (!(act_flags & TCA_FLAG_LARGE_DUMP_ON) && + n_i >= TCA_ACT_MAX_PRIO) + goto done; } done: if (index >= 0) cb->args[0] = index + 1; - spin_unlock_bh(&hinfo->lock); + spin_unlock_bh(&idrinfo->lock); if (n_i) { if (act_flags & TCA_FLAG_LARGE_DUMP_ON) cb->args[1] = n_i; @@ -166,31 +162,29 @@ nla_put_failure: goto done; } -static int tcf_del_walker(struct tcf_hashinfo *hinfo, struct sk_buff *skb, +static int tcf_del_walker(struct tcf_idrinfo *idrinfo, struct sk_buff *skb, const struct tc_action_ops *ops) { struct nlattr *nest; - int i = 0, n_i = 0; + int n_i = 0; int ret = -EINVAL; + struct idr *idr = &idrinfo->action_idr; + struct tc_action *p; + unsigned long id = 1; nest = nla_nest_start(skb, 0); if (nest == NULL) goto nla_put_failure; if (nla_put_string(skb, TCA_KIND, ops->kind)) goto nla_put_failure; - for (i = 0; i < (hinfo->hmask + 1); i++) { - struct hlist_head *head; - struct hlist_node *n; - struct tc_action *p; - - head = &hinfo->htab[tcf_hash(i, hinfo->hmask)]; - hlist_for_each_entry_safe(p, n, head, tcfa_head) { - ret = __tcf_hash_release(p, false, true); - if (ret == ACT_P_DELETED) { - module_put(p->ops->owner); - n_i++; - } else if (ret < 0) - goto nla_put_failure; + + idr_for_each_entry_ext(idr, p, id) { + ret = __tcf_idr_release(p, false, true); + if (ret == ACT_P_DELETED) { + module_put(p->ops->owner); + n_i++; + } else if (ret < 0) { + goto nla_put_failure; } } if (nla_put_u32(skb, TCA_FCNT, n_i)) @@ -207,12 +201,12 @@ int tcf_generic_walker(struct tc_action_net *tn, struct sk_buff *skb, struct netlink_callback *cb, int type, const struct tc_action_ops *ops) { - struct tcf_hashinfo *hinfo = tn->hinfo; + struct tcf_idrinfo *idrinfo = tn->idrinfo; if (type == RTM_DELACTION) { - return tcf_del_walker(hinfo, skb, ops); + return tcf_del_walker(idrinfo, skb, ops); } else if (type == RTM_GETACTION) { - return tcf_dump_walker(hinfo, skb, cb); + return tcf_dump_walker(idrinfo, skb, cb); } else { WARN(1, "tcf_generic_walker: unknown action %d\n", type); return -EINVAL; @@ -220,40 +214,21 @@ int tcf_generic_walker(struct tc_action_net *tn, struct sk_buff *skb, } EXPORT_SYMBOL(tcf_generic_walker); -static struct tc_action *tcf_hash_lookup(u32 index, struct tcf_hashinfo *hinfo) +static struct tc_action *tcf_idr_lookup(u32 index, struct tcf_idrinfo *idrinfo) { struct tc_action *p = NULL; - struct hlist_head *head; - spin_lock_bh(&hinfo->lock); - head = &hinfo->htab[tcf_hash(index, hinfo->hmask)]; - hlist_for_each_entry_rcu(p, head, tcfa_head) - if (p->tcfa_index == index) - break; - spin_unlock_bh(&hinfo->lock); + spin_lock_bh(&idrinfo->lock); + p = idr_find_ext(&idrinfo->action_idr, index); + spin_unlock_bh(&idrinfo->lock); return p; } -u32 tcf_hash_new_index(struct tc_action_net *tn) -{ - struct tcf_hashinfo *hinfo = tn->hinfo; - u32 val = hinfo->index; - - do { - if (++val == 0) - val = 1; - } while (tcf_hash_lookup(val, hinfo)); - - hinfo->index = val; - return val; -} -EXPORT_SYMBOL(tcf_hash_new_index); - -int tcf_hash_search(struct tc_action_net *tn, struct tc_action **a, u32 index) +int tcf_idr_search(struct tc_action_net *tn, struct tc_action **a, u32 index) { - struct tcf_hashinfo *hinfo = tn->hinfo; - struct tc_action *p = tcf_hash_lookup(index, hinfo); + struct tcf_idrinfo *idrinfo = tn->idrinfo; + struct tc_action *p = tcf_idr_lookup(index, idrinfo); if (p) { *a = p; @@ -261,15 +236,15 @@ int tcf_hash_search(struct tc_action_net *tn, struct tc_action **a, u32 index) } return 0; } -EXPORT_SYMBOL(tcf_hash_search); +EXPORT_SYMBOL(tcf_idr_search); -bool tcf_hash_check(struct tc_action_net *tn, u32 index, struct tc_action **a, - int bind) +bool tcf_idr_check(struct tc_action_net *tn, u32 index, struct tc_action **a, + int bind) { - struct tcf_hashinfo *hinfo = tn->hinfo; - struct tc_action *p = NULL; + struct tcf_idrinfo *idrinfo = tn->idrinfo; + struct tc_action *p = tcf_idr_lookup(index, idrinfo); - if (index && (p = tcf_hash_lookup(index, hinfo)) != NULL) { + if (index && p) { if (bind) p->tcfa_bindcnt++; p->tcfa_refcnt++; @@ -278,23 +253,25 @@ bool tcf_hash_check(struct tc_action_net *tn, u32 index, struct tc_action **a, } return false; } -EXPORT_SYMBOL(tcf_hash_check); +EXPORT_SYMBOL(tcf_idr_check); -void tcf_hash_cleanup(struct tc_action *a, struct nlattr *est) +void tcf_idr_cleanup(struct tc_action *a, struct nlattr *est) { if (est) gen_kill_estimator(&a->tcfa_rate_est); call_rcu(&a->tcfa_rcu, free_tcf); } -EXPORT_SYMBOL(tcf_hash_cleanup); +EXPORT_SYMBOL(tcf_idr_cleanup); -int tcf_hash_create(struct tc_action_net *tn, u32 index, struct nlattr *est, - struct tc_action **a, const struct tc_action_ops *ops, - int bind, bool cpustats) +int tcf_idr_create(struct tc_action_net *tn, u32 index, struct nlattr *est, + struct tc_action **a, const struct tc_action_ops *ops, + int bind, bool cpustats) { struct tc_action *p = kzalloc(ops->size, GFP_KERNEL); - struct tcf_hashinfo *hinfo = tn->hinfo; + struct tcf_idrinfo *idrinfo = tn->idrinfo; + struct idr *idr = &idrinfo->action_idr; int err = -ENOMEM; + unsigned long idr_index; if (unlikely(!p)) return -ENOMEM; @@ -317,8 +294,28 @@ err2: } } spin_lock_init(&p->tcfa_lock); - INIT_HLIST_NODE(&p->tcfa_head); - p->tcfa_index = index ? index : tcf_hash_new_index(tn); + /* user doesn't specify an index */ + if (!index) { + spin_lock_bh(&idrinfo->lock); + err = idr_alloc_ext(idr, NULL, &idr_index, 1, 0, + GFP_KERNEL); + spin_unlock_bh(&idrinfo->lock); + if (err) { +err3: + free_percpu(p->cpu_qstats); + goto err2; + } + p->tcfa_index = idr_index; + } else { + spin_lock_bh(&idrinfo->lock); + err = idr_alloc_ext(idr, NULL, NULL, index, index + 1, + GFP_KERNEL); + spin_unlock_bh(&idrinfo->lock); + if (err) + goto err3; + p->tcfa_index = index; + } + p->tcfa_tm.install = jiffies; p->tcfa_tm.lastuse = jiffies; p->tcfa_tm.firstuse = 0; @@ -327,52 +324,46 @@ err2: &p->tcfa_rate_est, &p->tcfa_lock, NULL, est); if (err) { - free_percpu(p->cpu_qstats); - goto err2; + goto err3; } } - p->hinfo = hinfo; + p->idrinfo = idrinfo; p->ops = ops; INIT_LIST_HEAD(&p->list); *a = p; return 0; } -EXPORT_SYMBOL(tcf_hash_create); +EXPORT_SYMBOL(tcf_idr_create); -void tcf_hash_insert(struct tc_action_net *tn, struct tc_action *a) +void tcf_idr_insert(struct tc_action_net *tn, struct tc_action *a) { - struct tcf_hashinfo *hinfo = tn->hinfo; - unsigned int h = tcf_hash(a->tcfa_index, hinfo->hmask); + struct tcf_idrinfo *idrinfo = tn->idrinfo; - spin_lock_bh(&hinfo->lock); - hlist_add_head(&a->tcfa_head, &hinfo->htab[h]); - spin_unlock_bh(&hinfo->lock); + spin_lock_bh(&idrinfo->lock); + idr_replace_ext(&idrinfo->action_idr, a, a->tcfa_index); + spin_unlock_bh(&idrinfo->lock); } -EXPORT_SYMBOL(tcf_hash_insert); +EXPORT_SYMBOL(tcf_idr_insert); -void tcf_hashinfo_destroy(const struct tc_action_ops *ops, - struct tcf_hashinfo *hinfo) +void tcf_idrinfo_destroy(const struct tc_action_ops *ops, + struct tcf_idrinfo *idrinfo) { - int i; - - for (i = 0; i < hinfo->hmask + 1; i++) { - struct tc_action *p; - struct hlist_node *n; - - hlist_for_each_entry_safe(p, n, &hinfo->htab[i], tcfa_head) { - int ret; + struct idr *idr = &idrinfo->action_idr; + struct tc_action *p; + int ret; + unsigned long id = 1; - ret = __tcf_hash_release(p, false, true); - if (ret == ACT_P_DELETED) - module_put(ops->owner); - else if (ret < 0) - return; - } + idr_for_each_entry_ext(idr, p, id) { + ret = __tcf_idr_release(p, false, true); + if (ret == ACT_P_DELETED) + module_put(ops->owner); + else if (ret < 0) + return; } - kfree(hinfo->htab); + idr_destroy(&idrinfo->action_idr); } -EXPORT_SYMBOL(tcf_hashinfo_destroy); +EXPORT_SYMBOL(tcf_idrinfo_destroy); static LIST_HEAD(act_base); static DEFINE_RWLOCK(act_mod_lock); @@ -524,7 +515,7 @@ int tcf_action_destroy(struct list_head *actions, int bind) int ret = 0; list_for_each_entry_safe(a, tmp, actions, list) { - ret = __tcf_hash_release(a, bind, true); + ret = __tcf_idr_release(a, bind, true); if (ret == ACT_P_DELETED) module_put(a->ops->owner); else if (ret < 0) diff --git a/net/sched/act_bpf.c b/net/sched/act_bpf.c index 9afe1337cfd1..c0c707eb2c96 100644 --- a/net/sched/act_bpf.c +++ b/net/sched/act_bpf.c @@ -21,7 +21,6 @@ #include <linux/tc_act/tc_bpf.h> #include <net/tc_act/tc_bpf.h> -#define BPF_TAB_MASK 15 #define ACT_BPF_NAME_LEN 256 struct tcf_bpf_cfg { @@ -295,9 +294,9 @@ static int tcf_bpf_init(struct net *net, struct nlattr *nla, parm = nla_data(tb[TCA_ACT_BPF_PARMS]); - if (!tcf_hash_check(tn, parm->index, act, bind)) { - ret = tcf_hash_create(tn, parm->index, est, act, - &act_bpf_ops, bind, true); + if (!tcf_idr_check(tn, parm->index, act, bind)) { + ret = tcf_idr_create(tn, parm->index, est, act, + &act_bpf_ops, bind, true); if (ret < 0) return ret; @@ -307,7 +306,7 @@ static int tcf_bpf_init(struct net *net, struct nlattr *nla, if (bind) return 0; - tcf_hash_release(*act, bind); + tcf_idr_release(*act, bind); if (!replace) return -EEXIST; } @@ -343,7 +342,7 @@ static int tcf_bpf_init(struct net *net, struct nlattr *nla, rcu_assign_pointer(prog->filter, cfg.filter); if (res == ACT_P_CREATED) { - tcf_hash_insert(tn, *act); + tcf_idr_insert(tn, *act); } else { /* make sure the program being replaced is no longer executing */ synchronize_rcu(); @@ -353,7 +352,7 @@ static int tcf_bpf_init(struct net *net, struct nlattr *nla, return res; out: if (res == ACT_P_CREATED) - tcf_hash_cleanup(*act, est); + tcf_idr_cleanup(*act, est); return ret; } @@ -379,7 +378,7 @@ static int tcf_bpf_search(struct net *net, struct tc_action **a, u32 index) { struct tc_action_net *tn = net_generic(net, bpf_net_id); - return tcf_hash_search(tn, a, index); + return tcf_idr_search(tn, a, index); } static struct tc_action_ops act_bpf_ops __read_mostly = { @@ -399,7 +398,7 @@ static __net_init int bpf_init_net(struct net *net) { struct tc_action_net *tn = net_generic(net, bpf_net_id); - return tc_action_net_init(tn, &act_bpf_ops, BPF_TAB_MASK); + return tc_action_net_init(tn, &act_bpf_ops); } static void __net_exit bpf_exit_net(struct net *net) diff --git a/net/sched/act_connmark.c b/net/sched/act_connmark.c index 2155bc6c6a1e..10b7a8855a6c 100644 --- a/net/sched/act_connmark.c +++ b/net/sched/act_connmark.c @@ -28,8 +28,6 @@ #include <net/netfilter/nf_conntrack_core.h> #include <net/netfilter/nf_conntrack_zones.h> -#define CONNMARK_TAB_MASK 3 - static unsigned int connmark_net_id; static struct tc_action_ops act_connmark_ops; @@ -119,9 +117,9 @@ static int tcf_connmark_init(struct net *net, struct nlattr *nla, parm = nla_data(tb[TCA_CONNMARK_PARMS]); - if (!tcf_hash_check(tn, parm->index, a, bind)) { - ret = tcf_hash_create(tn, parm->index, est, a, - &act_connmark_ops, bind, false); + if (!tcf_idr_check(tn, parm->index, a, bind)) { + ret = tcf_idr_create(tn, parm->index, est, a, + &act_connmark_ops, bind, false); if (ret) return ret; @@ -130,13 +128,13 @@ static int tcf_connmark_init(struct net *net, struct nlattr *nla, ci->net = net; ci->zone = parm->zone; - tcf_hash_insert(tn, *a); + tcf_idr_insert(tn, *a); ret = ACT_P_CREATED; } else { ci = to_connmark(*a); if (bind) return 0; - tcf_hash_release(*a, bind); + tcf_idr_release(*a, bind); if (!ovr) return -EEXIST; /* replacing action and zone */ @@ -189,7 +187,7 @@ static int tcf_connmark_search(struct net *net, struct tc_action **a, u32 index) { struct tc_action_net *tn = net_generic(net, connmark_net_id); - return tcf_hash_search(tn, a, index); + return tcf_idr_search(tn, a, index); } static struct tc_action_ops act_connmark_ops = { @@ -208,7 +206,7 @@ static __net_init int connmark_init_net(struct net *net) { struct tc_action_net *tn = net_generic(net, connmark_net_id); - return tc_action_net_init(tn, &act_connmark_ops, CONNMARK_TAB_MASK); + return tc_action_net_init(tn, &act_connmark_ops); } static void __net_exit connmark_exit_net(struct net *net) diff --git a/net/sched/act_csum.c b/net/sched/act_csum.c index 67afc12df88b..1c40caadcff9 100644 --- a/net/sched/act_csum.c +++ b/net/sched/act_csum.c @@ -37,8 +37,6 @@ #include <linux/tc_act/tc_csum.h> #include <net/tc_act/tc_csum.h> -#define CSUM_TAB_MASK 15 - static const struct nla_policy csum_policy[TCA_CSUM_MAX + 1] = { [TCA_CSUM_PARMS] = { .len = sizeof(struct tc_csum), }, }; @@ -67,16 +65,16 @@ static int tcf_csum_init(struct net *net, struct nlattr *nla, return -EINVAL; parm = nla_data(tb[TCA_CSUM_PARMS]); - if (!tcf_hash_check(tn, parm->index, a, bind)) { - ret = tcf_hash_create(tn, parm->index, est, a, - &act_csum_ops, bind, false); + if (!tcf_idr_check(tn, parm->index, a, bind)) { + ret = tcf_idr_create(tn, parm->index, est, a, + &act_csum_ops, bind, false); if (ret) return ret; ret = ACT_P_CREATED; } else { if (bind)/* dont override defaults */ return 0; - tcf_hash_release(*a, bind); + tcf_idr_release(*a, bind); if (!ovr) return -EEXIST; } @@ -88,7 +86,7 @@ static int tcf_csum_init(struct net *net, struct nlattr *nla, spin_unlock_bh(&p->tcf_lock); if (ret == ACT_P_CREATED) - tcf_hash_insert(tn, *a); + tcf_idr_insert(tn, *a); return ret; } @@ -609,7 +607,7 @@ static int tcf_csum_search(struct net *net, struct tc_action **a, u32 index) { struct tc_action_net *tn = net_generic(net, csum_net_id); - return tcf_hash_search(tn, a, index); + return tcf_idr_search(tn, a, index); } static struct tc_action_ops act_csum_ops = { @@ -628,7 +626,7 @@ static __net_init int csum_init_net(struct net *net) { struct tc_action_net *tn = net_generic(net, csum_net_id); - return tc_action_net_init(tn, &act_csum_ops, CSUM_TAB_MASK); + return tc_action_net_init(tn, &act_csum_ops); } static void __net_exit csum_exit_net(struct net *net) diff --git a/net/sched/act_gact.c b/net/sched/act_gact.c index 99afe8b1f1fb..e29a48ef7fc3 100644 --- a/net/sched/act_gact.c +++ b/net/sched/act_gact.c @@ -23,8 +23,6 @@ #include <linux/tc_act/tc_gact.h> #include <net/tc_act/tc_gact.h> -#define GACT_TAB_MASK 15 - static unsigned int gact_net_id; static struct tc_action_ops act_gact_ops; @@ -92,16 +90,16 @@ static int tcf_gact_init(struct net *net, struct nlattr *nla, } #endif - if (!tcf_hash_check(tn, parm->index, a, bind)) { - ret = tcf_hash_create(tn, parm->index, est, a, - &act_gact_ops, bind, true); + if (!tcf_idr_check(tn, parm->index, a, bind)) { + ret = tcf_idr_create(tn, parm->index, est, a, + &act_gact_ops, bind, true); if (ret) return ret; ret = ACT_P_CREATED; } else { if (bind)/* dont override defaults */ return 0; - tcf_hash_release(*a, bind); + tcf_idr_release(*a, bind); if (!ovr) return -EEXIST; } @@ -122,7 +120,7 @@ static int tcf_gact_init(struct net *net, struct nlattr *nla, } #endif if (ret == ACT_P_CREATED) - tcf_hash_insert(tn, *a); + tcf_idr_insert(tn, *a); return ret; } @@ -214,7 +212,7 @@ static int tcf_gact_search(struct net *net, struct tc_action **a, u32 index) { struct tc_action_net *tn = net_generic(net, gact_net_id); - return tcf_hash_search(tn, a, index); + return tcf_idr_search(tn, a, index); } static struct tc_action_ops act_gact_ops = { @@ -234,7 +232,7 @@ static __net_init int gact_init_net(struct net *net) { struct tc_action_net *tn = net_generic(net, gact_net_id); - return tc_action_net_init(tn, &act_gact_ops, GACT_TAB_MASK); + return tc_action_net_init(tn, &act_gact_ops); } static void __net_exit gact_exit_net(struct net *net) diff --git a/net/sched/act_ife.c b/net/sched/act_ife.c index 7ed1be80ee86..8ccd35825b6b 100644 --- a/net/sched/act_ife.c +++ b/net/sched/act_ife.c @@ -34,8 +34,6 @@ #include <linux/etherdevice.h> #include <net/ife.h> -#define IFE_TAB_MASK 15 - static unsigned int ife_net_id; static int max_metacnt = IFE_META_MAX + 1; static struct tc_action_ops act_ife_ops; @@ -452,18 +450,18 @@ static int tcf_ife_init(struct net *net, struct nlattr *nla, parm = nla_data(tb[TCA_IFE_PARMS]); - exists = tcf_hash_check(tn, parm->index, a, bind); + exists = tcf_idr_check(tn, parm->index, a, bind); if (exists && bind) return 0; if (!exists) { - ret = tcf_hash_create(tn, parm->index, est, a, &act_ife_ops, - bind, false); + ret = tcf_idr_create(tn, parm->index, est, a, &act_ife_ops, + bind, false); if (ret) return ret; ret = ACT_P_CREATED; } else { - tcf_hash_release(*a, bind); + tcf_idr_release(*a, bind); if (!ovr) return -EEXIST; } @@ -507,7 +505,7 @@ static int tcf_ife_init(struct net *net, struct nlattr *nla, if (err) { metadata_parse_err: if (exists) - tcf_hash_release(*a, bind); + tcf_idr_release(*a, bind); if (ret == ACT_P_CREATED) _tcf_ife_cleanup(*a, bind); @@ -541,7 +539,7 @@ metadata_parse_err: spin_unlock_bh(&ife->tcf_lock); if (ret == ACT_P_CREATED) - tcf_hash_insert(tn, *a); + tcf_idr_insert(tn, *a); return ret; } @@ -800,7 +798,7 @@ static int tcf_ife_search(struct net *net, struct tc_action **a, u32 index) { struct tc_action_net *tn = net_generic(net, ife_net_id); - return tcf_hash_search(tn, a, index); + return tcf_idr_search(tn, a, index); } static struct tc_action_ops act_ife_ops = { @@ -820,7 +818,7 @@ static __net_init int ife_init_net(struct net *net) { struct tc_action_net *tn = net_generic(net, ife_net_id); - return tc_action_net_init(tn, &act_ife_ops, IFE_TAB_MASK); + return tc_action_net_init(tn, &act_ife_ops); } static void __net_exit ife_exit_net(struct net *net) diff --git a/net/sched/act_ipt.c b/net/sched/act_ipt.c index 541707802a23..d9e399a7e3d5 100644 --- a/net/sched/act_ipt.c +++ b/net/sched/act_ipt.c @@ -28,8 +28,6 @@ #include <linux/netfilter_ipv4/ip_tables.h> -#define IPT_TAB_MASK 15 - static unsigned int ipt_net_id; static struct tc_action_ops act_ipt_ops; @@ -118,33 +116,33 @@ static int __tcf_ipt_init(struct net *net, unsigned int id, struct nlattr *nla, if (tb[TCA_IPT_INDEX] != NULL) index = nla_get_u32(tb[TCA_IPT_INDEX]); - exists = tcf_hash_check(tn, index, a, bind); + exists = tcf_idr_check(tn, index, a, bind); if (exists && bind) return 0; if (tb[TCA_IPT_HOOK] == NULL || tb[TCA_IPT_TARG] == NULL) { if (exists) - tcf_hash_release(*a, bind); + tcf_idr_release(*a, bind); return -EINVAL; } td = (struct xt_entry_target *)nla_data(tb[TCA_IPT_TARG]); if (nla_len(tb[TCA_IPT_TARG]) < td->u.target_size) { if (exists) - tcf_hash_release(*a, bind); + tcf_idr_release(*a, bind); return -EINVAL; } if (!exists) { - ret = tcf_hash_create(tn, index, est, a, ops, bind, - false); + ret = tcf_idr_create(tn, index, est, a, ops, bind, + false); if (ret) return ret; ret = ACT_P_CREATED; } else { if (bind)/* dont override defaults */ return 0; - tcf_hash_release(*a, bind); + tcf_idr_release(*a, bind); if (!ovr) return -EEXIST; @@ -180,7 +178,7 @@ static int __tcf_ipt_init(struct net *net, unsigned int id, struct nlattr *nla, ipt->tcfi_hook = hook; spin_unlock_bh(&ipt->tcf_lock); if (ret == ACT_P_CREATED) - tcf_hash_insert(tn, *a); + tcf_idr_insert(tn, *a); return ret; err3: @@ -189,7 +187,7 @@ err2: kfree(tname); err1: if (ret == ACT_P_CREATED) - tcf_hash_cleanup(*a, est); + tcf_idr_cleanup(*a, est); return err; } @@ -316,7 +314,7 @@ static int tcf_ipt_search(struct net *net, struct tc_action **a, u32 index) { struct tc_action_net *tn = net_generic(net, ipt_net_id); - return tcf_hash_search(tn, a, index); + return tcf_idr_search(tn, a, index); } static struct tc_action_ops act_ipt_ops = { @@ -336,7 +334,7 @@ static __net_init int ipt_init_net(struct net *net) { struct tc_action_net *tn = net_generic(net, ipt_net_id); - return tc_action_net_init(tn, &act_ipt_ops, IPT_TAB_MASK); + return tc_action_net_init(tn, &act_ipt_ops); } static void __net_exit ipt_exit_net(struct net *net) @@ -366,7 +364,7 @@ static int tcf_xt_search(struct net *net, struct tc_action **a, u32 index) { struct tc_action_net *tn = net_generic(net, xt_net_id); - return tcf_hash_search(tn, a, index); + return tcf_idr_search(tn, a, index); } static struct tc_action_ops act_xt_ops = { @@ -386,7 +384,7 @@ static __net_init int xt_init_net(struct net *net) { struct tc_action_net *tn = net_generic(net, xt_net_id); - return tc_action_net_init(tn, &act_xt_ops, IPT_TAB_MASK); + return tc_action_net_init(tn, &act_xt_ops); } static void __net_exit xt_exit_net(struct net *net) diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c index 1b5549ababd4..416627c66f08 100644 --- a/net/sched/act_mirred.c +++ b/net/sched/act_mirred.c @@ -28,7 +28,6 @@ #include <linux/tc_act/tc_mirred.h> #include <net/tc_act/tc_mirred.h> -#define MIRRED_TAB_MASK 7 static LIST_HEAD(mirred_list); static DEFINE_SPINLOCK(mirred_list_lock); @@ -94,7 +93,7 @@ static int tcf_mirred_init(struct net *net, struct nlattr *nla, return -EINVAL; parm = nla_data(tb[TCA_MIRRED_PARMS]); - exists = tcf_hash_check(tn, parm->index, a, bind); + exists = tcf_idr_check(tn, parm->index, a, bind); if (exists && bind) return 0; @@ -106,14 +105,14 @@ static int tcf_mirred_init(struct net *net, struct nlattr *nla, break; default: if (exists) - tcf_hash_release(*a, bind); + tcf_idr_release(*a, bind); return -EINVAL; } if (parm->ifindex) { dev = __dev_get_by_index(net, parm->ifindex); if (dev == NULL) { if (exists) - tcf_hash_release(*a, bind); + tcf_idr_release(*a, bind); return -ENODEV; } mac_header_xmit = dev_is_mac_header_xmit(dev); @@ -124,13 +123,13 @@ static int tcf_mirred_init(struct net *net, struct nlattr *nla, if (!exists) { if (dev == NULL) return -EINVAL; - ret = tcf_hash_create(tn, parm->index, est, a, - &act_mirred_ops, bind, true); + ret = tcf_idr_create(tn, parm->index, est, a, + &act_mirred_ops, bind, true); if (ret) return ret; ret = ACT_P_CREATED; } else { - tcf_hash_release(*a, bind); + tcf_idr_release(*a, bind); if (!ovr) return -EEXIST; } @@ -152,7 +151,7 @@ static int tcf_mirred_init(struct net *net, struct nlattr *nla, spin_lock_bh(&mirred_list_lock); list_add(&m->tcfm_list, &mirred_list); spin_unlock_bh(&mirred_list_lock); - tcf_hash_insert(tn, *a); + tcf_idr_insert(tn, *a); } return ret; @@ -283,7 +282,7 @@ static int tcf_mirred_search(struct net *net, struct tc_action **a, u32 index) { struct tc_action_net *tn = net_generic(net, mirred_net_id); - return tcf_hash_search(tn, a, index); + return tcf_idr_search(tn, a, index); } static int mirred_device_event(struct notifier_block *unused, @@ -344,7 +343,7 @@ static __net_init int mirred_init_net(struct net *net) { struct tc_action_net *tn = net_generic(net, mirred_net_id); - return tc_action_net_init(tn, &act_mirred_ops, MIRRED_TAB_MASK); + return tc_action_net_init(tn, &act_mirred_ops); } static void __net_exit mirred_exit_net(struct net *net) diff --git a/net/sched/act_nat.c b/net/sched/act_nat.c index 9016ab8a0649..c365d01b99c8 100644 --- a/net/sched/act_nat.c +++ b/net/sched/act_nat.c @@ -29,8 +29,6 @@ #include <net/udp.h> -#define NAT_TAB_MASK 15 - static unsigned int nat_net_id; static struct tc_action_ops act_nat_ops; @@ -58,16 +56,16 @@ static int tcf_nat_init(struct net *net, struct nlattr *nla, struct nlattr *est, return -EINVAL; parm = nla_data(tb[TCA_NAT_PARMS]); - if (!tcf_hash_check(tn, parm->index, a, bind)) { - ret = tcf_hash_create(tn, parm->index, est, a, - &act_nat_ops, bind, false); + if (!tcf_idr_check(tn, parm->index, a, bind)) { + ret = tcf_idr_create(tn, parm->index, est, a, + &act_nat_ops, bind, false); if (ret) return ret; ret = ACT_P_CREATED; } else { if (bind) return 0; - tcf_hash_release(*a, bind); + tcf_idr_release(*a, bind); if (!ovr) return -EEXIST; } @@ -83,7 +81,7 @@ static int tcf_nat_init(struct net *net, struct nlattr *nla, struct nlattr *est, spin_unlock_bh(&p->tcf_lock); if (ret == ACT_P_CREATED) - tcf_hash_insert(tn, *a); + tcf_idr_insert(tn, *a); return ret; } @@ -290,7 +288,7 @@ static int tcf_nat_search(struct net *net, struct tc_action **a, u32 index) { struct tc_action_net *tn = net_generic(net, nat_net_id); - return tcf_hash_search(tn, a, index); + return tcf_idr_search(tn, a, index); } static struct tc_action_ops act_nat_ops = { @@ -309,7 +307,7 @@ static __net_init int nat_init_net(struct net *net) { struct tc_action_net *tn = net_generic(net, nat_net_id); - return tc_action_net_init(tn, &act_nat_ops, NAT_TAB_MASK); + return tc_action_net_init(tn, &act_nat_ops); } static void __net_exit nat_exit_net(struct net *net) diff --git a/net/sched/act_pedit.c b/net/sched/act_pedit.c index 7dc5892671c8..491fe5deb09e 100644 --- a/net/sched/act_pedit.c +++ b/net/sched/act_pedit.c @@ -24,8 +24,6 @@ #include <net/tc_act/tc_pedit.h> #include <uapi/linux/tc_act/tc_pedit.h> -#define PEDIT_TAB_MASK 15 - static unsigned int pedit_net_id; static struct tc_action_ops act_pedit_ops; @@ -168,17 +166,17 @@ static int tcf_pedit_init(struct net *net, struct nlattr *nla, if (IS_ERR(keys_ex)) return PTR_ERR(keys_ex); - if (!tcf_hash_check(tn, parm->index, a, bind)) { + if (!tcf_idr_check(tn, parm->index, a, bind)) { if (!parm->nkeys) return -EINVAL; - ret = tcf_hash_create(tn, parm->index, est, a, - &act_pedit_ops, bind, false); + ret = tcf_idr_create(tn, parm->index, est, a, + &act_pedit_ops, bind, false); if (ret) return ret; p = to_pedit(*a); keys = kmalloc(ksize, GFP_KERNEL); if (keys == NULL) { - tcf_hash_cleanup(*a, est); + tcf_idr_cleanup(*a, est); kfree(keys_ex); return -ENOMEM; } @@ -186,7 +184,7 @@ static int tcf_pedit_init(struct net *net, struct nlattr *nla, } else { if (bind) return 0; - tcf_hash_release(*a, bind); + tcf_idr_release(*a, bind); if (!ovr) return -EEXIST; p = to_pedit(*a); @@ -214,7 +212,7 @@ static int tcf_pedit_init(struct net *net, struct nlattr *nla, spin_unlock_bh(&p->tcf_lock); if (ret == ACT_P_CREATED) - tcf_hash_insert(tn, *a); + tcf_idr_insert(tn, *a); return ret; } @@ -432,7 +430,7 @@ static int tcf_pedit_search(struct net *net, struct tc_action **a, u32 index) { struct tc_action_net *tn = net_generic(net, pedit_net_id); - return tcf_hash_search(tn, a, index); + return tcf_idr_search(tn, a, index); } static struct tc_action_ops act_pedit_ops = { @@ -452,7 +450,7 @@ static __net_init int pedit_init_net(struct net *net) { struct tc_action_net *tn = net_generic(net, pedit_net_id); - return tc_action_net_init(tn, &act_pedit_ops, PEDIT_TAB_MASK); + return tc_action_net_init(tn, &act_pedit_ops); } static void __net_exit pedit_exit_net(struct net *net) diff --git a/net/sched/act_police.c b/net/sched/act_police.c index b062bc80c7cb..3bb2ebf9e9ae 100644 --- a/net/sched/act_police.c +++ b/net/sched/act_police.c @@ -40,8 +40,6 @@ struct tcf_police { #define to_police(pc) ((struct tcf_police *)pc) -#define POL_TAB_MASK 15 - /* old policer structure from before tc actions */ struct tc_police_compat { u32 index; @@ -101,18 +99,18 @@ static int tcf_act_police_init(struct net *net, struct nlattr *nla, return -EINVAL; parm = nla_data(tb[TCA_POLICE_TBF]); - exists = tcf_hash_check(tn, parm->index, a, bind); + exists = tcf_idr_check(tn, parm->index, a, bind); if (exists && bind) return 0; if (!exists) { - ret = tcf_hash_create(tn, parm->index, NULL, a, - &act_police_ops, bind, false); + ret = tcf_idr_create(tn, parm->index, NULL, a, + &act_police_ops, bind, false); if (ret) return ret; ret = ACT_P_CREATED; } else { - tcf_hash_release(*a, bind); + tcf_idr_release(*a, bind); if (!ovr) return -EEXIST; } @@ -188,7 +186,7 @@ static int tcf_act_police_init(struct net *net, struct nlattr *nla, return ret; police->tcfp_t_c = ktime_get_ns(); - tcf_hash_insert(tn, *a); + tcf_idr_insert(tn, *a); return ret; @@ -196,7 +194,7 @@ failure: qdisc_put_rtab(P_tab); qdisc_put_rtab(R_tab); if (ret == ACT_P_CREATED) - tcf_hash_cleanup(*a, est); + tcf_idr_cleanup(*a, est); return err; } @@ -310,7 +308,7 @@ static int tcf_police_search(struct net *net, struct tc_action **a, u32 index) { struct tc_action_net *tn = net_generic(net, police_net_id); - return tcf_hash_search(tn, a, index); + return tcf_idr_search(tn, a, index); } MODULE_AUTHOR("Alexey Kuznetsov"); @@ -333,7 +331,7 @@ static __net_init int police_init_net(struct net *net) { struct tc_action_net *tn = net_generic(net, police_net_id); - return tc_action_net_init(tn, &act_police_ops, POL_TAB_MASK); + return tc_action_net_init(tn, &act_police_ops); } static void __net_exit police_exit_net(struct net *net) diff --git a/net/sched/act_sample.c b/net/sched/act_sample.c index 59d6645a4007..ec986ae52808 100644 --- a/net/sched/act_sample.c +++ b/net/sched/act_sample.c @@ -25,7 +25,6 @@ #include <linux/if_arp.h> -#define SAMPLE_TAB_MASK 7 static unsigned int sample_net_id; static struct tc_action_ops act_sample_ops; @@ -59,18 +58,18 @@ static int tcf_sample_init(struct net *net, struct nlattr *nla, parm = nla_data(tb[TCA_SAMPLE_PARMS]); - exists = tcf_hash_check(tn, parm->index, a, bind); + exists = tcf_idr_check(tn, parm->index, a, bind); if (exists && bind) return 0; if (!exists) { - ret = tcf_hash_create(tn, parm->index, est, a, - &act_sample_ops, bind, false); + ret = tcf_idr_create(tn, parm->index, est, a, + &act_sample_ops, bind, false); if (ret) return ret; ret = ACT_P_CREATED; } else { - tcf_hash_release(*a, bind); + tcf_idr_release(*a, bind); if (!ovr) return -EEXIST; } @@ -82,7 +81,7 @@ static int tcf_sample_init(struct net *net, struct nlattr *nla, psample_group = psample_group_get(net, s->psample_group_num); if (!psample_group) { if (ret == ACT_P_CREATED) - tcf_hash_release(*a, bind); + tcf_idr_release(*a, bind); return -ENOMEM; } RCU_INIT_POINTER(s->psample_group, psample_group); @@ -93,7 +92,7 @@ static int tcf_sample_init(struct net *net, struct nlattr *nla, } if (ret == ACT_P_CREATED) - tcf_hash_insert(tn, *a); + tcf_idr_insert(tn, *a); return ret; } @@ -221,7 +220,7 @@ static int tcf_sample_search(struct net *net, struct tc_action **a, u32 index) { struct tc_action_net *tn = net_generic(net, sample_net_id); - return tcf_hash_search(tn, a, index); + return tcf_idr_search(tn, a, index); } static struct tc_action_ops act_sample_ops = { @@ -241,7 +240,7 @@ static __net_init int sample_init_net(struct net *net) { struct tc_action_net *tn = net_generic(net, sample_net_id); - return tc_action_net_init(tn, &act_sample_ops, SAMPLE_TAB_MASK); + return tc_action_net_init(tn, &act_sample_ops); } static void __net_exit sample_exit_net(struct net *net) diff --git a/net/sched/act_simple.c b/net/sched/act_simple.c index 43605e7ce051..e7b57e5071a3 100644 --- a/net/sched/act_simple.c +++ b/net/sched/act_simple.c @@ -24,8 +24,6 @@ #include <linux/tc_act/tc_defact.h> #include <net/tc_act/tc_defact.h> -#define SIMP_TAB_MASK 7 - static unsigned int simp_net_id; static struct tc_action_ops act_simp_ops; @@ -102,28 +100,28 @@ static int tcf_simp_init(struct net *net, struct nlattr *nla, return -EINVAL; parm = nla_data(tb[TCA_DEF_PARMS]); - exists = tcf_hash_check(tn, parm->index, a, bind); + exists = tcf_idr_check(tn, parm->index, a, bind); if (exists && bind) return 0; if (tb[TCA_DEF_DATA] == NULL) { if (exists) - tcf_hash_release(*a, bind); + tcf_idr_release(*a, bind); return -EINVAL; } defdata = nla_data(tb[TCA_DEF_DATA]); if (!exists) { - ret = tcf_hash_create(tn, parm->index, est, a, - &act_simp_ops, bind, false); + ret = tcf_idr_create(tn, parm->index, est, a, + &act_simp_ops, bind, false); if (ret) return ret; d = to_defact(*a); ret = alloc_defdata(d, defdata); if (ret < 0) { - tcf_hash_cleanup(*a, est); + tcf_idr_cleanup(*a, est); return ret; } d->tcf_action = parm->action; @@ -131,7 +129,7 @@ static int tcf_simp_init(struct net *net, struct nlattr *nla, } else { d = to_defact(*a); - tcf_hash_release(*a, bind); + tcf_idr_release(*a, bind); if (!ovr) return -EEXIST; @@ -139,7 +137,7 @@ static int tcf_simp_init(struct net *net, struct nlattr *nla, } if (ret == ACT_P_CREATED) - tcf_hash_insert(tn, *a); + tcf_idr_insert(tn, *a); return ret; } @@ -183,7 +181,7 @@ static int tcf_simp_search(struct net *net, struct tc_action **a, u32 index) { struct tc_action_net *tn = net_generic(net, simp_net_id); - return tcf_hash_search(tn, a, index); + return tcf_idr_search(tn, a, index); } static struct tc_action_ops act_simp_ops = { @@ -203,7 +201,7 @@ static __net_init int simp_init_net(struct net *net) { struct tc_action_net *tn = net_generic(net, simp_net_id); - return tc_action_net_init(tn, &act_simp_ops, SIMP_TAB_MASK); + return tc_action_net_init(tn, &act_simp_ops); } static void __net_exit simp_exit_net(struct net *net) diff --git a/net/sched/act_skbedit.c b/net/sched/act_skbedit.c index 6b3e65d7de0c..59949d61f20d 100644 --- a/net/sched/act_skbedit.c +++ b/net/sched/act_skbedit.c @@ -27,8 +27,6 @@ #include <linux/tc_act/tc_skbedit.h> #include <net/tc_act/tc_skbedit.h> -#define SKBEDIT_TAB_MASK 15 - static unsigned int skbedit_net_id; static struct tc_action_ops act_skbedit_ops; @@ -118,18 +116,18 @@ static int tcf_skbedit_init(struct net *net, struct nlattr *nla, parm = nla_data(tb[TCA_SKBEDIT_PARMS]); - exists = tcf_hash_check(tn, parm->index, a, bind); + exists = tcf_idr_check(tn, parm->index, a, bind); if (exists && bind) return 0; if (!flags) { - tcf_hash_release(*a, bind); + tcf_idr_release(*a, bind); return -EINVAL; } if (!exists) { - ret = tcf_hash_create(tn, parm->index, est, a, - &act_skbedit_ops, bind, false); + ret = tcf_idr_create(tn, parm->index, est, a, + &act_skbedit_ops, bind, false); if (ret) return ret; @@ -137,7 +135,7 @@ static int tcf_skbedit_init(struct net *net, struct nlattr *nla, ret = ACT_P_CREATED; } else { d = to_skbedit(*a); - tcf_hash_release(*a, bind); + tcf_idr_release(*a, bind); if (!ovr) return -EEXIST; } @@ -163,7 +161,7 @@ static int tcf_skbedit_init(struct net *net, struct nlattr *nla, spin_unlock_bh(&d->tcf_lock); if (ret == ACT_P_CREATED) - tcf_hash_insert(tn, *a); + tcf_idr_insert(tn, *a); return ret; } @@ -221,7 +219,7 @@ static int tcf_skbedit_search(struct net *net, struct tc_action **a, u32 index) { struct tc_action_net *tn = net_generic(net, skbedit_net_id); - return tcf_hash_search(tn, a, index); + return tcf_idr_search(tn, a, index); } static struct tc_action_ops act_skbedit_ops = { @@ -240,7 +238,7 @@ static __net_init int skbedit_init_net(struct net *net) { struct tc_action_net *tn = net_generic(net, skbedit_net_id); - return tc_action_net_init(tn, &act_skbedit_ops, SKBEDIT_TAB_MASK); + return tc_action_net_init(tn, &act_skbedit_ops); } static void __net_exit skbedit_exit_net(struct net *net) diff --git a/net/sched/act_skbmod.c b/net/sched/act_skbmod.c index a73c4bbcada2..b642ad3d39dd 100644 --- a/net/sched/act_skbmod.c +++ b/net/sched/act_skbmod.c @@ -20,8 +20,6 @@ #include <linux/tc_act/tc_skbmod.h> #include <net/tc_act/tc_skbmod.h> -#define SKBMOD_TAB_MASK 15 - static unsigned int skbmod_net_id; static struct tc_action_ops act_skbmod_ops; @@ -129,7 +127,7 @@ static int tcf_skbmod_init(struct net *net, struct nlattr *nla, if (parm->flags & SKBMOD_F_SWAPMAC) lflags = SKBMOD_F_SWAPMAC; - exists = tcf_hash_check(tn, parm->index, a, bind); + exists = tcf_idr_check(tn, parm->index, a, bind); if (exists && bind) return 0; @@ -137,14 +135,14 @@ static int tcf_skbmod_init(struct net *net, struct nlattr *nla, return -EINVAL; if (!exists) { - ret = tcf_hash_create(tn, parm->index, est, a, - &act_skbmod_ops, bind, true); + ret = tcf_idr_create(tn, parm->index, est, a, + &act_skbmod_ops, bind, true); if (ret) return ret; ret = ACT_P_CREATED; } else { - tcf_hash_release(*a, bind); + tcf_idr_release(*a, bind); if (!ovr) return -EEXIST; } @@ -155,7 +153,7 @@ static int tcf_skbmod_init(struct net *net, struct nlattr *nla, p = kzalloc(sizeof(struct tcf_skbmod_params), GFP_KERNEL); if (unlikely(!p)) { if (ovr) - tcf_hash_release(*a, bind); + tcf_idr_release(*a, bind); return -ENOMEM; } @@ -182,7 +180,7 @@ static int tcf_skbmod_init(struct net *net, struct nlattr *nla, kfree_rcu(p_old, rcu); if (ret == ACT_P_CREATED) - tcf_hash_insert(tn, *a); + tcf_idr_insert(tn, *a); return ret; } @@ -245,7 +243,7 @@ static int tcf_skbmod_search(struct net *net, struct tc_action **a, u32 index) { struct tc_action_net *tn = net_generic(net, skbmod_net_id); - return tcf_hash_search(tn, a, index); + return tcf_idr_search(tn, a, index); } static struct tc_action_ops act_skbmod_ops = { @@ -265,7 +263,7 @@ static __net_init int skbmod_init_net(struct net *net) { struct tc_action_net *tn = net_generic(net, skbmod_net_id); - return tc_action_net_init(tn, &act_skbmod_ops, SKBMOD_TAB_MASK); + return tc_action_net_init(tn, &act_skbmod_ops); } static void __net_exit skbmod_exit_net(struct net *net) diff --git a/net/sched/act_tunnel_key.c b/net/sched/act_tunnel_key.c index fd7e75679c69..30c96274c638 100644 --- a/net/sched/act_tunnel_key.c +++ b/net/sched/act_tunnel_key.c @@ -20,8 +20,6 @@ #include <linux/tc_act/tc_tunnel_key.h> #include <net/tc_act/tc_tunnel_key.h> -#define TUNNEL_KEY_TAB_MASK 15 - static unsigned int tunnel_key_net_id; static struct tc_action_ops act_tunnel_key_ops; @@ -100,7 +98,7 @@ static int tunnel_key_init(struct net *net, struct nlattr *nla, return -EINVAL; parm = nla_data(tb[TCA_TUNNEL_KEY_PARMS]); - exists = tcf_hash_check(tn, parm->index, a, bind); + exists = tcf_idr_check(tn, parm->index, a, bind); if (exists && bind) return 0; @@ -159,14 +157,14 @@ static int tunnel_key_init(struct net *net, struct nlattr *nla, } if (!exists) { - ret = tcf_hash_create(tn, parm->index, est, a, - &act_tunnel_key_ops, bind, true); + ret = tcf_idr_create(tn, parm->index, est, a, + &act_tunnel_key_ops, bind, true); if (ret) return ret; ret = ACT_P_CREATED; } else { - tcf_hash_release(*a, bind); + tcf_idr_release(*a, bind); if (!ovr) return -EEXIST; } @@ -177,7 +175,7 @@ static int tunnel_key_init(struct net *net, struct nlattr *nla, params_new = kzalloc(sizeof(*params_new), GFP_KERNEL); if (unlikely(!params_new)) { if (ret == ACT_P_CREATED) - tcf_hash_release(*a, bind); + tcf_idr_release(*a, bind); return -ENOMEM; } @@ -193,13 +191,13 @@ static int tunnel_key_init(struct net *net, struct nlattr *nla, kfree_rcu(params_old, rcu); if (ret == ACT_P_CREATED) - tcf_hash_insert(tn, *a); + tcf_idr_insert(tn, *a); return ret; err_out: if (exists) - tcf_hash_release(*a, bind); + tcf_idr_release(*a, bind); return ret; } @@ -304,7 +302,7 @@ static int tunnel_key_search(struct net *net, struct tc_action **a, u32 index) { struct tc_action_net *tn = net_generic(net, tunnel_key_net_id); - return tcf_hash_search(tn, a, index); + return tcf_idr_search(tn, a, index); } static struct tc_action_ops act_tunnel_key_ops = { @@ -324,7 +322,7 @@ static __net_init int tunnel_key_init_net(struct net *net) { struct tc_action_net *tn = net_generic(net, tunnel_key_net_id); - return tc_action_net_init(tn, &act_tunnel_key_ops, TUNNEL_KEY_TAB_MASK); + return tc_action_net_init(tn, &act_tunnel_key_ops); } static void __net_exit tunnel_key_exit_net(struct net *net) diff --git a/net/sched/act_vlan.c b/net/sched/act_vlan.c index 13ba3a89f675..16eb067a8d8f 100644 --- a/net/sched/act_vlan.c +++ b/net/sched/act_vlan.c @@ -19,8 +19,6 @@ #include <linux/tc_act/tc_vlan.h> #include <net/tc_act/tc_vlan.h> -#define VLAN_TAB_MASK 15 - static unsigned int vlan_net_id; static struct tc_action_ops act_vlan_ops; @@ -128,7 +126,7 @@ static int tcf_vlan_init(struct net *net, struct nlattr *nla, if (!tb[TCA_VLAN_PARMS]) return -EINVAL; parm = nla_data(tb[TCA_VLAN_PARMS]); - exists = tcf_hash_check(tn, parm->index, a, bind); + exists = tcf_idr_check(tn, parm->index, a, bind); if (exists && bind) return 0; @@ -139,13 +137,13 @@ static int tcf_vlan_init(struct net *net, struct nlattr *nla, case TCA_VLAN_ACT_MODIFY: if (!tb[TCA_VLAN_PUSH_VLAN_ID]) { if (exists) - tcf_hash_release(*a, bind); + tcf_idr_release(*a, bind); return -EINVAL; } push_vid = nla_get_u16(tb[TCA_VLAN_PUSH_VLAN_ID]); if (push_vid >= VLAN_VID_MASK) { if (exists) - tcf_hash_release(*a, bind); + tcf_idr_release(*a, bind); return -ERANGE; } @@ -167,20 +165,20 @@ static int tcf_vlan_init(struct net *net, struct nlattr *nla, break; default: if (exists) - tcf_hash_release(*a, bind); + tcf_idr_release(*a, bind); return -EINVAL; } action = parm->v_action; if (!exists) { - ret = tcf_hash_create(tn, parm->index, est, a, - &act_vlan_ops, bind, false); + ret = tcf_idr_create(tn, parm->index, est, a, + &act_vlan_ops, bind, false); if (ret) return ret; ret = ACT_P_CREATED; } else { - tcf_hash_release(*a, bind); + tcf_idr_release(*a, bind); if (!ovr) return -EEXIST; } @@ -199,7 +197,7 @@ static int tcf_vlan_init(struct net *net, struct nlattr *nla, spin_unlock_bh(&v->tcf_lock); if (ret == ACT_P_CREATED) - tcf_hash_insert(tn, *a); + tcf_idr_insert(tn, *a); return ret; } @@ -252,7 +250,7 @@ static int tcf_vlan_search(struct net *net, struct tc_action **a, u32 index) { struct tc_action_net *tn = net_generic(net, vlan_net_id); - return tcf_hash_search(tn, a, index); + return tcf_idr_search(tn, a, index); } static struct tc_action_ops act_vlan_ops = { @@ -271,7 +269,7 @@ static __net_init int vlan_init_net(struct net *net) { struct tc_action_net *tn = net_generic(net, vlan_net_id); - return tc_action_net_init(tn, &act_vlan_ops, VLAN_TAB_MASK); + return tc_action_net_init(tn, &act_vlan_ops); } static void __net_exit vlan_exit_net(struct net *net) diff --git a/net/sched/cls_basic.c b/net/sched/cls_basic.c index 73cc7f167a38..d89ebafd2239 100644 --- a/net/sched/cls_basic.c +++ b/net/sched/cls_basic.c @@ -235,6 +235,14 @@ skip: } } +static void basic_bind_class(void *fh, u32 classid, unsigned long cl) +{ + struct basic_filter *f = fh; + + if (f && f->res.classid == classid) + f->res.class = cl; +} + static int basic_dump(struct net *net, struct tcf_proto *tp, void *fh, struct sk_buff *skb, struct tcmsg *t) { @@ -280,6 +288,7 @@ static struct tcf_proto_ops cls_basic_ops __read_mostly = { .delete = basic_delete, .walk = basic_walk, .dump = basic_dump, + .bind_class = basic_bind_class, .owner = THIS_MODULE, }; diff --git a/net/sched/cls_bpf.c b/net/sched/cls_bpf.c index 6f2dffe30f25..520c5027646a 100644 --- a/net/sched/cls_bpf.c +++ b/net/sched/cls_bpf.c @@ -607,6 +607,14 @@ nla_put_failure: return -1; } +static void cls_bpf_bind_class(void *fh, u32 classid, unsigned long cl) +{ + struct cls_bpf_prog *prog = fh; + + if (prog && prog->res.classid == classid) + prog->res.class = cl; +} + static void cls_bpf_walk(struct tcf_proto *tp, struct tcf_walker *arg) { struct cls_bpf_head *head = rtnl_dereference(tp->root); @@ -635,6 +643,7 @@ static struct tcf_proto_ops cls_bpf_ops __read_mostly = { .delete = cls_bpf_delete, .walk = cls_bpf_walk, .dump = cls_bpf_dump, + .bind_class = cls_bpf_bind_class, }; static int __init cls_bpf_init_mod(void) diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c index bd9dab41f8af..1a267e77c6de 100644 --- a/net/sched/cls_flower.c +++ b/net/sched/cls_flower.c @@ -68,7 +68,6 @@ struct cls_fl_head { struct rhashtable ht; struct fl_flow_mask mask; struct flow_dissector dissector; - u32 hgen; bool mask_assigned; struct list_head filters; struct rhashtable_params ht_params; @@ -76,6 +75,7 @@ struct cls_fl_head { struct work_struct work; struct rcu_head rcu; }; + struct idr handle_idr; }; struct cls_fl_filter { @@ -210,6 +210,7 @@ static int fl_init(struct tcf_proto *tp) INIT_LIST_HEAD_RCU(&head->filters); rcu_assign_pointer(tp->root, head); + idr_init(&head->handle_idr); return 0; } @@ -295,6 +296,9 @@ static void fl_hw_update_stats(struct tcf_proto *tp, struct cls_fl_filter *f) static void __fl_delete(struct tcf_proto *tp, struct cls_fl_filter *f) { + struct cls_fl_head *head = rtnl_dereference(tp->root); + + idr_remove_ext(&head->handle_idr, f->handle); list_del_rcu(&f->list); if (!tc_skip_hw(f->flags)) fl_hw_destroy_filter(tp, f); @@ -327,6 +331,7 @@ static void fl_destroy(struct tcf_proto *tp) list_for_each_entry_safe(f, next, &head->filters, list) __fl_delete(tp, f); + idr_destroy(&head->handle_idr); __module_get(THIS_MODULE); call_rcu(&head->rcu, fl_destroy_rcu); @@ -335,12 +340,8 @@ static void fl_destroy(struct tcf_proto *tp) static void *fl_get(struct tcf_proto *tp, u32 handle) { struct cls_fl_head *head = rtnl_dereference(tp->root); - struct cls_fl_filter *f; - list_for_each_entry(f, &head->filters, list) - if (f->handle == handle) - return f; - return NULL; + return idr_find_ext(&head->handle_idr, handle); } static const struct nla_policy fl_policy[TCA_FLOWER_MAX + 1] = { @@ -859,27 +860,6 @@ static int fl_set_parms(struct net *net, struct tcf_proto *tp, return 0; } -static u32 fl_grab_new_handle(struct tcf_proto *tp, - struct cls_fl_head *head) -{ - unsigned int i = 0x80000000; - u32 handle; - - do { - if (++head->hgen == 0x7FFFFFFF) - head->hgen = 1; - } while (--i > 0 && fl_get(tp, head->hgen)); - - if (unlikely(i == 0)) { - pr_err("Insufficient number of handles\n"); - handle = 0; - } else { - handle = head->hgen; - } - - return handle; -} - static int fl_change(struct net *net, struct sk_buff *in_skb, struct tcf_proto *tp, unsigned long base, u32 handle, struct nlattr **tca, @@ -890,6 +870,7 @@ static int fl_change(struct net *net, struct sk_buff *in_skb, struct cls_fl_filter *fnew; struct nlattr **tb; struct fl_flow_mask mask = {}; + unsigned long idr_index; int err; if (!tca[TCA_OPTIONS]) @@ -920,13 +901,21 @@ static int fl_change(struct net *net, struct sk_buff *in_skb, goto errout; if (!handle) { - handle = fl_grab_new_handle(tp, head); - if (!handle) { - err = -EINVAL; + err = idr_alloc_ext(&head->handle_idr, fnew, &idr_index, + 1, 0x80000000, GFP_KERNEL); + if (err) goto errout; - } + fnew->handle = idr_index; + } + + /* user specifies a handle and it doesn't exist */ + if (handle && !fold) { + err = idr_alloc_ext(&head->handle_idr, fnew, &idr_index, + handle, handle + 1, GFP_KERNEL); + if (err) + goto errout; + fnew->handle = idr_index; } - fnew->handle = handle; if (tb[TCA_FLOWER_FLAGS]) { fnew->flags = nla_get_u32(tb[TCA_FLOWER_FLAGS]); @@ -980,6 +969,8 @@ static int fl_change(struct net *net, struct sk_buff *in_skb, *arg = fnew; if (fold) { + fnew->handle = handle; + idr_replace_ext(&head->handle_idr, fnew, fnew->handle); list_replace_rcu(&fold->list, &fnew->list); tcf_unbind_filter(tp, &fold->res); call_rcu(&fold->rcu, fl_destroy_filter); @@ -1360,6 +1351,14 @@ nla_put_failure: return -1; } +static void fl_bind_class(void *fh, u32 classid, unsigned long cl) +{ + struct cls_fl_filter *f = fh; + + if (f && f->res.classid == classid) + f->res.class = cl; +} + static struct tcf_proto_ops cls_fl_ops __read_mostly = { .kind = "flower", .classify = fl_classify, @@ -1370,6 +1369,7 @@ static struct tcf_proto_ops cls_fl_ops __read_mostly = { .delete = fl_delete, .walk = fl_walk, .dump = fl_dump, + .bind_class = fl_bind_class, .owner = THIS_MODULE, }; diff --git a/net/sched/cls_fw.c b/net/sched/cls_fw.c index 192255ec50bd..941245ad07fd 100644 --- a/net/sched/cls_fw.c +++ b/net/sched/cls_fw.c @@ -412,6 +412,14 @@ nla_put_failure: return -1; } +static void fw_bind_class(void *fh, u32 classid, unsigned long cl) +{ + struct fw_filter *f = fh; + + if (f && f->res.classid == classid) + f->res.class = cl; +} + static struct tcf_proto_ops cls_fw_ops __read_mostly = { .kind = "fw", .classify = fw_classify, @@ -422,6 +430,7 @@ static struct tcf_proto_ops cls_fw_ops __read_mostly = { .delete = fw_delete, .walk = fw_walk, .dump = fw_dump, + .bind_class = fw_bind_class, .owner = THIS_MODULE, }; diff --git a/net/sched/cls_matchall.c b/net/sched/cls_matchall.c index d4dc387f7a56..21cc45caf842 100644 --- a/net/sched/cls_matchall.c +++ b/net/sched/cls_matchall.c @@ -251,6 +251,14 @@ nla_put_failure: return -1; } +static void mall_bind_class(void *fh, u32 classid, unsigned long cl) +{ + struct cls_mall_head *head = fh; + + if (head && head->res.classid == classid) + head->res.class = cl; +} + static struct tcf_proto_ops cls_mall_ops __read_mostly = { .kind = "matchall", .classify = mall_classify, @@ -261,6 +269,7 @@ static struct tcf_proto_ops cls_mall_ops __read_mostly = { .delete = mall_delete, .walk = mall_walk, .dump = mall_dump, + .bind_class = mall_bind_class, .owner = THIS_MODULE, }; diff --git a/net/sched/cls_route.c b/net/sched/cls_route.c index 3b70982394ce..9ddde65915d2 100644 --- a/net/sched/cls_route.c +++ b/net/sched/cls_route.c @@ -624,6 +624,14 @@ nla_put_failure: return -1; } +static void route4_bind_class(void *fh, u32 classid, unsigned long cl) +{ + struct route4_filter *f = fh; + + if (f && f->res.classid == classid) + f->res.class = cl; +} + static struct tcf_proto_ops cls_route4_ops __read_mostly = { .kind = "route", .classify = route4_classify, @@ -634,6 +642,7 @@ static struct tcf_proto_ops cls_route4_ops __read_mostly = { .delete = route4_delete, .walk = route4_walk, .dump = route4_dump, + .bind_class = route4_bind_class, .owner = THIS_MODULE, }; diff --git a/net/sched/cls_rsvp.h b/net/sched/cls_rsvp.h index 26203ff817f3..98c05db85bcb 100644 --- a/net/sched/cls_rsvp.h +++ b/net/sched/cls_rsvp.h @@ -723,6 +723,14 @@ nla_put_failure: return -1; } +static void rsvp_bind_class(void *fh, u32 classid, unsigned long cl) +{ + struct rsvp_filter *f = fh; + + if (f && f->res.classid == classid) + f->res.class = cl; +} + static struct tcf_proto_ops RSVP_OPS __read_mostly = { .kind = RSVP_ID, .classify = rsvp_classify, @@ -733,6 +741,7 @@ static struct tcf_proto_ops RSVP_OPS __read_mostly = { .delete = rsvp_delete, .walk = rsvp_walk, .dump = rsvp_dump, + .bind_class = rsvp_bind_class, .owner = THIS_MODULE, }; diff --git a/net/sched/cls_tcindex.c b/net/sched/cls_tcindex.c index fb281b9b2c52..14a7e08b2fa9 100644 --- a/net/sched/cls_tcindex.c +++ b/net/sched/cls_tcindex.c @@ -606,6 +606,14 @@ nla_put_failure: return -1; } +static void tcindex_bind_class(void *fh, u32 classid, unsigned long cl) +{ + struct tcindex_filter_result *r = fh; + + if (r && r->res.classid == classid) + r->res.class = cl; +} + static struct tcf_proto_ops cls_tcindex_ops __read_mostly = { .kind = "tcindex", .classify = tcindex_classify, @@ -616,6 +624,7 @@ static struct tcf_proto_ops cls_tcindex_ops __read_mostly = { .delete = tcindex_delete, .walk = tcindex_walk, .dump = tcindex_dump, + .bind_class = tcindex_bind_class, .owner = THIS_MODULE, }; diff --git a/net/sched/cls_u32.c b/net/sched/cls_u32.c index 99ea4c74dd5b..10b8d851fc6b 100644 --- a/net/sched/cls_u32.c +++ b/net/sched/cls_u32.c @@ -1112,6 +1112,14 @@ static void u32_walk(struct tcf_proto *tp, struct tcf_walker *arg) } } +static void u32_bind_class(void *fh, u32 classid, unsigned long cl) +{ + struct tc_u_knode *n = fh; + + if (n && n->res.classid == classid) + n->res.class = cl; +} + static int u32_dump(struct net *net, struct tcf_proto *tp, void *fh, struct sk_buff *skb, struct tcmsg *t) { @@ -1242,6 +1250,7 @@ static struct tcf_proto_ops cls_u32_ops __read_mostly = { .delete = u32_delete, .walk = u32_walk, .dump = u32_dump, + .bind_class = u32_bind_class, .owner = THIS_MODULE, }; diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index e7f8e4bfd4ec..929b024f41ba 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -35,6 +35,7 @@ #include <net/sock.h> #include <net/netlink.h> #include <net/pkt_sched.h> +#include <net/pkt_cls.h> /* @@ -1648,6 +1649,64 @@ static int tclass_del_notify(struct net *net, n->nlmsg_flags & NLM_F_ECHO); } +#ifdef CONFIG_NET_CLS + +struct tcf_bind_args { + struct tcf_walker w; + u32 classid; + unsigned long cl; +}; + +static int tcf_node_bind(struct tcf_proto *tp, void *n, struct tcf_walker *arg) +{ + struct tcf_bind_args *a = (void *)arg; + + if (tp->ops->bind_class) { + tcf_tree_lock(tp); + tp->ops->bind_class(n, a->classid, a->cl); + tcf_tree_unlock(tp); + } + return 0; +} + +static void tc_bind_tclass(struct Qdisc *q, u32 portid, u32 clid, + unsigned long new_cl) +{ + const struct Qdisc_class_ops *cops = q->ops->cl_ops; + struct tcf_block *block; + struct tcf_chain *chain; + unsigned long cl; + + cl = cops->find(q, portid); + if (!cl) + return; + block = cops->tcf_block(q, cl); + if (!block) + return; + list_for_each_entry(chain, &block->chain_list, list) { + struct tcf_proto *tp; + + for (tp = rtnl_dereference(chain->filter_chain); + tp; tp = rtnl_dereference(tp->next)) { + struct tcf_bind_args arg = {}; + + arg.w.fn = tcf_node_bind; + arg.classid = clid; + arg.cl = new_cl; + tp->ops->walk(tp, &arg.w); + } + } +} + +#else + +static void tc_bind_tclass(struct Qdisc *q, u32 portid, u32 clid, + unsigned long new_cl) +{ +} + +#endif + static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n, struct netlink_ext_ack *extack) { @@ -1753,6 +1812,8 @@ static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n, break; case RTM_DELTCLASS: err = tclass_del_notify(net, cops, skb, n, q, cl); + /* Unbind the class with flilters with 0 */ + tc_bind_tclass(q, portid, clid, 0); goto out; case RTM_GETTCLASS: err = tclass_notify(net, skb, n, q, cl, RTM_NEWTCLASS); @@ -1767,9 +1828,12 @@ static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n, err = -EOPNOTSUPP; if (cops->change) err = cops->change(q, clid, portid, tca, &new_cl); - if (err == 0) + if (err == 0) { tclass_notify(net, skb, n, q, new_cl, RTM_NEWTCLASS); - + /* We just create a new class, need to do reverse binding. */ + if (cl != new_cl) + tc_bind_tclass(q, portid, clid, new_cl); + } out: return err; } diff --git a/net/xfrm/xfrm_input.c b/net/xfrm/xfrm_input.c index f07eec59dcae..2515cd2bc5db 100644 --- a/net/xfrm/xfrm_input.c +++ b/net/xfrm/xfrm_input.c @@ -247,6 +247,11 @@ int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type) goto drop; } + if (xo->status & CRYPTO_INVALID_PROTOCOL) { + XFRM_INC_STATS(net, LINUX_MIB_XFRMINSTATEPROTOERROR); + goto drop; + } + XFRM_INC_STATS(net, LINUX_MIB_XFRMINBUFFERERROR); goto drop; } diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index cc3268d814b4..490132d6dc36 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -900,13 +900,13 @@ static int copy_to_user_state_extra(struct xfrm_state *x, ret = copy_user_offload(&x->xso, skb); if (ret) goto out; - if (x->security) - ret = copy_sec_ctx(x->security, skb); if (x->props.output_mark) { ret = nla_put_u32(skb, XFRMA_OUTPUT_MARK, x->props.output_mark); if (ret) goto out; } + if (x->security) + ret = copy_sec_ctx(x->security, skb); out: return ret; } diff --git a/samples/bpf/map_perf_test_kern.c b/samples/bpf/map_perf_test_kern.c index ca3b22ed577a..098c857f1eda 100644 --- a/samples/bpf/map_perf_test_kern.c +++ b/samples/bpf/map_perf_test_kern.c @@ -88,6 +88,13 @@ struct bpf_map_def SEC("maps") array_map = { .max_entries = MAX_ENTRIES, }; +struct bpf_map_def SEC("maps") lru_hash_lookup_map = { + .type = BPF_MAP_TYPE_LRU_HASH, + .key_size = sizeof(u32), + .value_size = sizeof(long), + .max_entries = MAX_ENTRIES, +}; + SEC("kprobe/sys_getuid") int stress_hmap(struct pt_regs *ctx) { @@ -148,12 +155,23 @@ int stress_percpu_hmap_alloc(struct pt_regs *ctx) SEC("kprobe/sys_connect") int stress_lru_hmap_alloc(struct pt_regs *ctx) { + char fmt[] = "Failed at stress_lru_hmap_alloc. ret:%dn"; + union { + u16 dst6[8]; + struct { + u16 magic0; + u16 magic1; + u16 tcase; + u16 unused16; + u32 unused32; + u32 key; + }; + } test_params; struct sockaddr_in6 *in6; - u16 test_case, dst6[8]; + u16 test_case; int addrlen, ret; - char fmt[] = "Failed at stress_lru_hmap_alloc. ret:%d\n"; long val = 1; - u32 key = bpf_get_prandom_u32(); + u32 key = 0; in6 = (struct sockaddr_in6 *)PT_REGS_PARM2(ctx); addrlen = (int)PT_REGS_PARM3(ctx); @@ -161,14 +179,18 @@ int stress_lru_hmap_alloc(struct pt_regs *ctx) if (addrlen != sizeof(*in6)) return 0; - ret = bpf_probe_read(dst6, sizeof(dst6), &in6->sin6_addr); + ret = bpf_probe_read(test_params.dst6, sizeof(test_params.dst6), + &in6->sin6_addr); if (ret) goto done; - if (dst6[0] != 0xdead || dst6[1] != 0xbeef) + if (test_params.magic0 != 0xdead || + test_params.magic1 != 0xbeef) return 0; - test_case = dst6[7]; + test_case = test_params.tcase; + if (test_case != 3) + key = bpf_get_prandom_u32(); if (test_case == 0) { ret = bpf_map_update_elem(&lru_hash_map, &key, &val, BPF_ANY); @@ -188,6 +210,16 @@ int stress_lru_hmap_alloc(struct pt_regs *ctx) ret = bpf_map_update_elem(nolocal_lru_map, &key, &val, BPF_ANY); + } else if (test_case == 3) { + u32 i; + + key = test_params.key; + +#pragma clang loop unroll(full) + for (i = 0; i < 32; i++) { + bpf_map_lookup_elem(&lru_hash_lookup_map, &key); + key++; + } } else { ret = -EINVAL; } diff --git a/samples/bpf/map_perf_test_user.c b/samples/bpf/map_perf_test_user.c index bccbf8478e43..f388254896f6 100644 --- a/samples/bpf/map_perf_test_user.c +++ b/samples/bpf/map_perf_test_user.c @@ -46,6 +46,7 @@ enum test_type { HASH_LOOKUP, ARRAY_LOOKUP, INNER_LRU_HASH_PREALLOC, + LRU_HASH_LOOKUP, NR_TESTS, }; @@ -60,6 +61,7 @@ const char *test_map_names[NR_TESTS] = { [HASH_LOOKUP] = "hash_map", [ARRAY_LOOKUP] = "array_map", [INNER_LRU_HASH_PREALLOC] = "inner_lru_hash_map", + [LRU_HASH_LOOKUP] = "lru_hash_lookup_map", }; static int test_flags = ~0; @@ -67,6 +69,8 @@ static uint32_t num_map_entries; static uint32_t inner_lru_hash_size; static int inner_lru_hash_idx = -1; static int array_of_lru_hashs_idx = -1; +static int lru_hash_lookup_idx = -1; +static int lru_hash_lookup_test_entries = 32; static uint32_t max_cnt = 1000000; static int check_test_flags(enum test_type t) @@ -86,6 +90,32 @@ static void test_hash_prealloc(int cpu) cpu, max_cnt * 1000000000ll / (time_get_ns() - start_time)); } +static int pre_test_lru_hash_lookup(int tasks) +{ + int fd = map_fd[lru_hash_lookup_idx]; + uint32_t key; + long val = 1; + int ret; + + if (num_map_entries > lru_hash_lookup_test_entries) + lru_hash_lookup_test_entries = num_map_entries; + + /* Populate the lru_hash_map for LRU_HASH_LOOKUP perf test. + * + * It is fine that the user requests for a map with + * num_map_entries < 32 and some of the later lru hash lookup + * may return not found. For LRU map, we are not interested + * in such small map performance. + */ + for (key = 0; key < lru_hash_lookup_test_entries; key++) { + ret = bpf_map_update_elem(fd, &key, &val, BPF_NOEXIST); + if (ret) + return ret; + } + + return 0; +} + static void do_test_lru(enum test_type test, int cpu) { static int inner_lru_map_fds[MAX_NR_CPUS]; @@ -135,13 +165,17 @@ static void do_test_lru(enum test_type test, int cpu) if (test == LRU_HASH_PREALLOC) { test_name = "lru_hash_map_perf"; - in6.sin6_addr.s6_addr16[7] = 0; + in6.sin6_addr.s6_addr16[2] = 0; } else if (test == NOCOMMON_LRU_HASH_PREALLOC) { test_name = "nocommon_lru_hash_map_perf"; - in6.sin6_addr.s6_addr16[7] = 1; + in6.sin6_addr.s6_addr16[2] = 1; } else if (test == INNER_LRU_HASH_PREALLOC) { test_name = "inner_lru_hash_map_perf"; - in6.sin6_addr.s6_addr16[7] = 2; + in6.sin6_addr.s6_addr16[2] = 2; + } else if (test == LRU_HASH_LOOKUP) { + test_name = "lru_hash_lookup_perf"; + in6.sin6_addr.s6_addr16[2] = 3; + in6.sin6_addr.s6_addr32[3] = 0; } else { assert(0); } @@ -150,6 +184,11 @@ static void do_test_lru(enum test_type test, int cpu) for (i = 0; i < max_cnt; i++) { ret = connect(-1, (const struct sockaddr *)&in6, sizeof(in6)); assert(ret == -1 && errno == EBADF); + if (in6.sin6_addr.s6_addr32[3] < + lru_hash_lookup_test_entries - 32) + in6.sin6_addr.s6_addr32[3] += 32; + else + in6.sin6_addr.s6_addr32[3] = 0; } printf("%d:%s pre-alloc %lld events per sec\n", cpu, test_name, @@ -171,6 +210,11 @@ static void test_inner_lru_hash_prealloc(int cpu) do_test_lru(INNER_LRU_HASH_PREALLOC, cpu); } +static void test_lru_hash_lookup(int cpu) +{ + do_test_lru(LRU_HASH_LOOKUP, cpu); +} + static void test_percpu_hash_prealloc(int cpu) { __u64 start_time; @@ -243,6 +287,11 @@ static void test_array_lookup(int cpu) cpu, max_cnt * 1000000000ll * 64 / (time_get_ns() - start_time)); } +typedef int (*pre_test_func)(int tasks); +const pre_test_func pre_test_funcs[] = { + [LRU_HASH_LOOKUP] = pre_test_lru_hash_lookup, +}; + typedef void (*test_func)(int cpu); const test_func test_funcs[] = { [HASH_PREALLOC] = test_hash_prealloc, @@ -255,8 +304,25 @@ const test_func test_funcs[] = { [HASH_LOOKUP] = test_hash_lookup, [ARRAY_LOOKUP] = test_array_lookup, [INNER_LRU_HASH_PREALLOC] = test_inner_lru_hash_prealloc, + [LRU_HASH_LOOKUP] = test_lru_hash_lookup, }; +static int pre_test(int tasks) +{ + int i; + + for (i = 0; i < NR_TESTS; i++) { + if (pre_test_funcs[i] && check_test_flags(i)) { + int ret = pre_test_funcs[i](tasks); + + if (ret) + return ret; + } + } + + return 0; +} + static void loop(int cpu) { cpu_set_t cpuset; @@ -277,6 +343,8 @@ static void run_perf_test(int tasks) pid_t pid[tasks]; int i; + assert(!pre_test(tasks)); + for (i = 0; i < tasks; i++) { pid[i] = fork(); if (pid[i] == 0) { @@ -344,6 +412,9 @@ static void fixup_map(struct bpf_map_data *map, int idx) array_of_lru_hashs_idx = idx; } + if (!strcmp("lru_hash_lookup_map", map->name)) + lru_hash_lookup_idx = idx; + if (num_map_entries <= 0) return; diff --git a/samples/bpf/sock_flags_kern.c b/samples/bpf/sock_flags_kern.c index 533dd11a6baa..05dcdf8a4baa 100644 --- a/samples/bpf/sock_flags_kern.c +++ b/samples/bpf/sock_flags_kern.c @@ -9,8 +9,13 @@ SEC("cgroup/sock1") int bpf_prog1(struct bpf_sock *sk) { char fmt[] = "socket: family %d type %d protocol %d\n"; + char fmt2[] = "socket: uid %u gid %u\n"; + __u64 gid_uid = bpf_get_current_uid_gid(); + __u32 uid = gid_uid & 0xffffffff; + __u32 gid = gid_uid >> 32; bpf_trace_printk(fmt, sizeof(fmt), sk->family, sk->type, sk->protocol); + bpf_trace_printk(fmt2, sizeof(fmt2), uid, gid); /* block PF_INET6, SOCK_RAW, IPPROTO_ICMPV6 sockets * ie., make ping6 fail diff --git a/samples/bpf/test_cgrp2_sock.c b/samples/bpf/test_cgrp2_sock.c index c3cfb23e23b5..e79594dd629b 100644 --- a/samples/bpf/test_cgrp2_sock.c +++ b/samples/bpf/test_cgrp2_sock.c @@ -19,68 +19,271 @@ #include <errno.h> #include <fcntl.h> #include <net/if.h> +#include <inttypes.h> #include <linux/bpf.h> #include "libbpf.h" char bpf_log_buf[BPF_LOG_BUF_SIZE]; -static int prog_load(int idx) +static int prog_load(__u32 idx, __u32 mark, __u32 prio) { - struct bpf_insn prog[] = { + /* save pointer to context */ + struct bpf_insn prog_start[] = { BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), + }; + struct bpf_insn prog_end[] = { + BPF_MOV64_IMM(BPF_REG_0, 1), /* r0 = verdict */ + BPF_EXIT_INSN(), + }; + + /* set sk_bound_dev_if on socket */ + struct bpf_insn prog_dev[] = { BPF_MOV64_IMM(BPF_REG_3, idx), BPF_MOV64_IMM(BPF_REG_2, offsetof(struct bpf_sock, bound_dev_if)), BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_3, offsetof(struct bpf_sock, bound_dev_if)), - BPF_MOV64_IMM(BPF_REG_0, 1), /* r0 = verdict */ - BPF_EXIT_INSN(), }; - size_t insns_cnt = sizeof(prog) / sizeof(struct bpf_insn); - return bpf_load_program(BPF_PROG_TYPE_CGROUP_SOCK, prog, insns_cnt, + /* set mark on socket */ + struct bpf_insn prog_mark[] = { + /* get uid of process */ + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_get_current_uid_gid), + BPF_ALU64_IMM(BPF_AND, BPF_REG_0, 0xffffffff), + + /* if uid is 0, use given mark, else use the uid as the mark */ + BPF_MOV64_REG(BPF_REG_3, BPF_REG_0), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1), + BPF_MOV64_IMM(BPF_REG_3, mark), + + /* set the mark on the new socket */ + BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), + BPF_MOV64_IMM(BPF_REG_2, offsetof(struct bpf_sock, mark)), + BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_3, offsetof(struct bpf_sock, mark)), + }; + + /* set priority on socket */ + struct bpf_insn prog_prio[] = { + BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), + BPF_MOV64_IMM(BPF_REG_3, prio), + BPF_MOV64_IMM(BPF_REG_2, offsetof(struct bpf_sock, priority)), + BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_3, offsetof(struct bpf_sock, priority)), + }; + + struct bpf_insn *prog; + size_t insns_cnt; + void *p; + int ret; + + insns_cnt = sizeof(prog_start) + sizeof(prog_end); + if (idx) + insns_cnt += sizeof(prog_dev); + + if (mark) + insns_cnt += sizeof(prog_mark); + + if (prio) + insns_cnt += sizeof(prog_prio); + + p = prog = malloc(insns_cnt); + if (!prog) { + fprintf(stderr, "Failed to allocate memory for instructions\n"); + return EXIT_FAILURE; + } + + memcpy(p, prog_start, sizeof(prog_start)); + p += sizeof(prog_start); + + if (idx) { + memcpy(p, prog_dev, sizeof(prog_dev)); + p += sizeof(prog_dev); + } + + if (mark) { + memcpy(p, prog_mark, sizeof(prog_mark)); + p += sizeof(prog_mark); + } + + if (prio) { + memcpy(p, prog_prio, sizeof(prog_prio)); + p += sizeof(prog_prio); + } + + memcpy(p, prog_end, sizeof(prog_end)); + p += sizeof(prog_end); + + insns_cnt /= sizeof(struct bpf_insn); + + ret = bpf_load_program(BPF_PROG_TYPE_CGROUP_SOCK, prog, insns_cnt, "GPL", 0, bpf_log_buf, BPF_LOG_BUF_SIZE); + + free(prog); + + return ret; +} + +static int get_bind_to_device(int sd, char *name, size_t len) +{ + socklen_t optlen = len; + int rc; + + name[0] = '\0'; + rc = getsockopt(sd, SOL_SOCKET, SO_BINDTODEVICE, name, &optlen); + if (rc < 0) + perror("setsockopt(SO_BINDTODEVICE)"); + + return rc; +} + +static unsigned int get_somark(int sd) +{ + unsigned int mark = 0; + socklen_t optlen = sizeof(mark); + int rc; + + rc = getsockopt(sd, SOL_SOCKET, SO_MARK, &mark, &optlen); + if (rc < 0) + perror("getsockopt(SO_MARK)"); + + return mark; +} + +static unsigned int get_priority(int sd) +{ + unsigned int prio = 0; + socklen_t optlen = sizeof(prio); + int rc; + + rc = getsockopt(sd, SOL_SOCKET, SO_PRIORITY, &prio, &optlen); + if (rc < 0) + perror("getsockopt(SO_PRIORITY)"); + + return prio; +} + +static int show_sockopts(int family) +{ + unsigned int mark, prio; + char name[16]; + int sd; + + sd = socket(family, SOCK_DGRAM, 17); + if (sd < 0) { + perror("socket"); + return 1; + } + + if (get_bind_to_device(sd, name, sizeof(name)) < 0) + return 1; + + mark = get_somark(sd); + prio = get_priority(sd); + + close(sd); + + printf("sd %d: dev %s, mark %u, priority %u\n", sd, name, mark, prio); + + return 0; } static int usage(const char *argv0) { - printf("Usage: %s cg-path device-index\n", argv0); + printf("Usage:\n"); + printf(" Attach a program\n"); + printf(" %s -b bind-to-dev -m mark -p prio cg-path\n", argv0); + printf("\n"); + printf(" Detach a program\n"); + printf(" %s -d cg-path\n", argv0); + printf("\n"); + printf(" Show inherited socket settings (mark, priority, and device)\n"); + printf(" %s [-6]\n", argv0); return EXIT_FAILURE; } int main(int argc, char **argv) { + __u32 idx = 0, mark = 0, prio = 0; + const char *cgrp_path = NULL; int cg_fd, prog_fd, ret; - unsigned int idx; + int family = PF_INET; + int do_attach = 1; + int rc; + + while ((rc = getopt(argc, argv, "db:m:p:6")) != -1) { + switch (rc) { + case 'd': + do_attach = 0; + break; + case 'b': + idx = if_nametoindex(optarg); + if (!idx) { + idx = strtoumax(optarg, NULL, 0); + if (!idx) { + printf("Invalid device name\n"); + return EXIT_FAILURE; + } + } + break; + case 'm': + mark = strtoumax(optarg, NULL, 0); + break; + case 'p': + prio = strtoumax(optarg, NULL, 0); + break; + case '6': + family = PF_INET6; + break; + default: + return usage(argv[0]); + } + } - if (argc < 2) - return usage(argv[0]); + if (optind == argc) + return show_sockopts(family); - idx = if_nametoindex(argv[2]); - if (!idx) { - printf("Invalid device name\n"); + cgrp_path = argv[optind]; + if (!cgrp_path) { + fprintf(stderr, "cgroup path not given\n"); return EXIT_FAILURE; } - cg_fd = open(argv[1], O_DIRECTORY | O_RDONLY); - if (cg_fd < 0) { - printf("Failed to open cgroup path: '%s'\n", strerror(errno)); + if (do_attach && !idx && !mark && !prio) { + fprintf(stderr, + "One of device, mark or priority must be given\n"); return EXIT_FAILURE; } - prog_fd = prog_load(idx); - printf("Output from kernel verifier:\n%s\n-------\n", bpf_log_buf); - - if (prog_fd < 0) { - printf("Failed to load prog: '%s'\n", strerror(errno)); + cg_fd = open(cgrp_path, O_DIRECTORY | O_RDONLY); + if (cg_fd < 0) { + printf("Failed to open cgroup path: '%s'\n", strerror(errno)); return EXIT_FAILURE; } - ret = bpf_prog_attach(prog_fd, cg_fd, BPF_CGROUP_INET_SOCK_CREATE, 0); - if (ret < 0) { - printf("Failed to attach prog to cgroup: '%s'\n", - strerror(errno)); - return EXIT_FAILURE; + if (do_attach) { + prog_fd = prog_load(idx, mark, prio); + if (prog_fd < 0) { + printf("Failed to load prog: '%s'\n", strerror(errno)); + printf("Output from kernel verifier:\n%s\n-------\n", + bpf_log_buf); + return EXIT_FAILURE; + } + + ret = bpf_prog_attach(prog_fd, cg_fd, + BPF_CGROUP_INET_SOCK_CREATE, 0); + if (ret < 0) { + printf("Failed to attach prog to cgroup: '%s'\n", + strerror(errno)); + return EXIT_FAILURE; + } + } else { + ret = bpf_prog_detach(cg_fd, BPF_CGROUP_INET_SOCK_CREATE); + if (ret < 0) { + printf("Failed to detach prog from cgroup: '%s'\n", + strerror(errno)); + return EXIT_FAILURE; + } } + close(cg_fd); return EXIT_SUCCESS; } diff --git a/samples/bpf/test_cgrp2_sock.sh b/samples/bpf/test_cgrp2_sock.sh index 925fd467c7cc..a81f38eef417 100755 --- a/samples/bpf/test_cgrp2_sock.sh +++ b/samples/bpf/test_cgrp2_sock.sh @@ -1,47 +1,133 @@ -#!/bin/bash - -function config_device { - ip netns add at_ns0 - ip link add veth0 type veth peer name veth0b - ip link set veth0b up - ip link set veth0 netns at_ns0 - ip netns exec at_ns0 ip addr add 172.16.1.100/24 dev veth0 - ip netns exec at_ns0 ip addr add 2401:db00::1/64 dev veth0 nodad - ip netns exec at_ns0 ip link set dev veth0 up - ip link add foo type vrf table 1234 - ip link set foo up - ip addr add 172.16.1.101/24 dev veth0b - ip addr add 2401:db00::2/64 dev veth0b nodad - ip link set veth0b master foo +#!/bin/sh + +# Test various socket options that can be set by attaching programs to cgroups. + +CGRP_MNT="/tmp/cgroupv2-test_cgrp2_sock" + +################################################################################ +# +print_result() +{ + local rc=$1 + local status=" OK " + + [ $rc -ne 0 ] && status="FAIL" + + printf "%-50s [%4s]\n" "$2" "$status" } -function attach_bpf { - rm -rf /tmp/cgroupv2 - mkdir -p /tmp/cgroupv2 - mount -t cgroup2 none /tmp/cgroupv2 - mkdir -p /tmp/cgroupv2/foo - test_cgrp2_sock /tmp/cgroupv2/foo foo - echo $$ >> /tmp/cgroupv2/foo/cgroup.procs +check_sock() +{ + out=$(test_cgrp2_sock) + echo $out | grep -q "$1" + if [ $? -ne 0 ]; then + print_result 1 "IPv4: $2" + echo " expected: $1" + echo " have: $out" + rc=1 + else + print_result 0 "IPv4: $2" + fi } -function cleanup { - set +ex - ip netns delete at_ns0 - ip link del veth0 - ip link del foo - umount /tmp/cgroupv2 - rm -rf /tmp/cgroupv2 - set -ex +check_sock6() +{ + out=$(test_cgrp2_sock -6) + echo $out | grep -q "$1" + if [ $? -ne 0 ]; then + print_result 1 "IPv6: $2" + echo " expected: $1" + echo " have: $out" + rc=1 + else + print_result 0 "IPv6: $2" + fi } -function do_test { - ping -c1 -w1 172.16.1.100 - ping6 -c1 -w1 2401:db00::1 +################################################################################ +# + +cleanup() +{ + echo $$ >> ${CGRP_MNT}/cgroup.procs + rmdir ${CGRP_MNT}/sockopts } +cleanup_and_exit() +{ + local rc=$1 + local msg="$2" + + [ -n "$msg" ] && echo "ERROR: $msg" + + ip li del cgrp2_sock + umount ${CGRP_MNT} + + exit $rc +} + + +################################################################################ +# main + +rc=0 + +ip li add cgrp2_sock type dummy 2>/dev/null + +set -e +mkdir -p ${CGRP_MNT} +mount -t cgroup2 none ${CGRP_MNT} +set +e + + +# make sure we have a known start point cleanup 2>/dev/null -config_device -attach_bpf -do_test -cleanup -echo "*** PASS ***" + +mkdir -p ${CGRP_MNT}/sockopts +[ $? -ne 0 ] && cleanup_and_exit 1 "Failed to create cgroup hierarchy" + + +# set pid into cgroup +echo $$ > ${CGRP_MNT}/sockopts/cgroup.procs + +# no bpf program attached, so socket should show no settings +check_sock "dev , mark 0, priority 0" "No programs attached" +check_sock6 "dev , mark 0, priority 0" "No programs attached" + +# verify device is set +# +test_cgrp2_sock -b cgrp2_sock ${CGRP_MNT}/sockopts +if [ $? -ne 0 ]; then + cleanup_and_exit 1 "Failed to install program to set device" +fi +check_sock "dev cgrp2_sock, mark 0, priority 0" "Device set" +check_sock6 "dev cgrp2_sock, mark 0, priority 0" "Device set" + +# verify mark is set +# +test_cgrp2_sock -m 666 ${CGRP_MNT}/sockopts +if [ $? -ne 0 ]; then + cleanup_and_exit 1 "Failed to install program to set mark" +fi +check_sock "dev , mark 666, priority 0" "Mark set" +check_sock6 "dev , mark 666, priority 0" "Mark set" + +# verify priority is set +# +test_cgrp2_sock -p 123 ${CGRP_MNT}/sockopts +if [ $? -ne 0 ]; then + cleanup_and_exit 1 "Failed to install program to set priority" +fi +check_sock "dev , mark 0, priority 123" "Priority set" +check_sock6 "dev , mark 0, priority 123" "Priority set" + +# all 3 at once +# +test_cgrp2_sock -b cgrp2_sock -m 666 -p 123 ${CGRP_MNT}/sockopts +if [ $? -ne 0 ]; then + cleanup_and_exit 1 "Failed to install program to set device, mark and priority" +fi +check_sock "dev cgrp2_sock, mark 666, priority 123" "Priority set" +check_sock6 "dev cgrp2_sock, mark 666, priority 123" "Priority set" + +cleanup_and_exit $rc diff --git a/samples/bpf/xdp_redirect_kern.c b/samples/bpf/xdp_redirect_kern.c index 1c90288d0203..8abb151e385f 100644 --- a/samples/bpf/xdp_redirect_kern.c +++ b/samples/bpf/xdp_redirect_kern.c @@ -82,7 +82,7 @@ int xdp_redirect_prog(struct xdp_md *ctx) /* Redirect require an XDP bpf_prog loaded on the TX device */ SEC("xdp_redirect_dummy") -int xdp_redirect_dummy(struct xdp_md *ctx) +int xdp_redirect_dummy_prog(struct xdp_md *ctx) { return XDP_PASS; } diff --git a/samples/bpf/xdp_redirect_map_kern.c b/samples/bpf/xdp_redirect_map_kern.c index 79795d41ad0d..740a529ba84f 100644 --- a/samples/bpf/xdp_redirect_map_kern.c +++ b/samples/bpf/xdp_redirect_map_kern.c @@ -84,7 +84,7 @@ int xdp_redirect_map_prog(struct xdp_md *ctx) /* Redirect require an XDP bpf_prog loaded on the TX device */ SEC("xdp_redirect_dummy") -int xdp_redirect_dummy(struct xdp_md *ctx) +int xdp_redirect_dummy_prog(struct xdp_md *ctx) { return XDP_PASS; } diff --git a/tools/testing/selftests/bpf/test_maps.c b/tools/testing/selftests/bpf/test_maps.c index 7059bb315a10..4acc772a28c0 100644 --- a/tools/testing/selftests/bpf/test_maps.c +++ b/tools/testing/selftests/bpf/test_maps.c @@ -504,7 +504,7 @@ static void test_sockmap(int tasks, void *data) } err = listen(sfd[i], 32); if (err < 0) { - printf("failed to listeen\n"); + printf("failed to listen\n"); goto out; } } @@ -525,7 +525,7 @@ static void test_sockmap(int tasks, void *data) addr.sin_port = htons(ports[i - 2]); err = connect(sfd[i], (struct sockaddr *)&addr, sizeof(addr)); if (err) { - printf("failed to conenct\n"); + printf("failed to connect\n"); goto out; } } |