From 3934788a7b4df71f8bd7a2a1f1c0480f06a2076e Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Sun, 10 Sep 2017 19:02:25 -0700 Subject: net: tcp_input: Neaten DBGUNDO Move the #ifdef into the static void function so that the use of DBGUNDO is validated when FASTRETRANS_DEBUG <= 1. Remove the now unnecessary #else and #define DBGUNDO. Signed-off-by: Joe Perches Signed-off-by: David S. Miller --- net/ipv4/tcp_input.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index c5d7656beeee..bddf724f5c02 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -2333,9 +2333,9 @@ static bool tcp_any_retrans_done(const struct sock *sk) return false; } -#if FASTRETRANS_DEBUG > 1 static void DBGUNDO(struct sock *sk, const char *msg) { +#if FASTRETRANS_DEBUG > 1 struct tcp_sock *tp = tcp_sk(sk); struct inet_sock *inet = inet_sk(sk); @@ -2357,10 +2357,8 @@ static void DBGUNDO(struct sock *sk, const char *msg) tp->packets_out); } #endif -} -#else -#define DBGUNDO(x...) do { } while (0) #endif +} static void tcp_undo_cwnd_reduction(struct sock *sk, bool unmark_loss) { -- cgit v1.2.3 From ab076b94c6a3cfc4e5651d559750220543394871 Mon Sep 17 00:00:00 2001 From: Zhang Shengju Date: Tue, 12 Sep 2017 09:32:45 +0800 Subject: dummy: declare dummy devices as enumerated devices Dummy device name is enumerated by the kernel, let user space be aware of the naming scheme used by dummy devices: (visible in /sys/class/net//name_assign_type). Signed-off-by: Zhang Shengju Signed-off-by: David S. Miller --- drivers/net/dummy.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/dummy.c b/drivers/net/dummy.c index d0a1f9ce3168..e31ab3b94c6f 100644 --- a/drivers/net/dummy.c +++ b/drivers/net/dummy.c @@ -388,7 +388,7 @@ static int __init dummy_init_one(void) int err; dev_dummy = alloc_netdev(sizeof(struct dummy_priv), - "dummy%d", NET_NAME_UNKNOWN, dummy_setup); + "dummy%d", NET_NAME_ENUM, dummy_setup); if (!dev_dummy) return -ENOMEM; -- cgit v1.2.3 From 013955a6556766a76f9f2cc31e740fc6db6ecff4 Mon Sep 17 00:00:00 2001 From: Kunihiko Hayashi Date: Tue, 12 Sep 2017 18:54:35 +0900 Subject: net: phy: realtek: rename RTL8211F_PAGE_SELECT to RTL821x_PAGE_SELECT This renames the definition of page select register from RTL8211F_PAGE_SELECT to RTL821x_PAGE_SELECT to use it across models. Signed-off-by: Kunihiko Hayashi Reviewed-by: Andrew Lunn Signed-off-by: David S. Miller --- drivers/net/phy/realtek.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/net/phy/realtek.c b/drivers/net/phy/realtek.c index 9cbe645e3d89..99c3297c957e 100644 --- a/drivers/net/phy/realtek.c +++ b/drivers/net/phy/realtek.c @@ -22,11 +22,11 @@ #define RTL821x_INER 0x12 #define RTL821x_INER_INIT 0x6400 #define RTL821x_INSR 0x13 +#define RTL821x_PAGE_SELECT 0x1f #define RTL8211E_INER_LINK_STATUS 0x400 #define RTL8211F_INER_LINK_STATUS 0x0010 #define RTL8211F_INSR 0x1d -#define RTL8211F_PAGE_SELECT 0x1f #define RTL8211F_TX_DELAY 0x100 MODULE_DESCRIPTION("Realtek PHY driver"); @@ -46,10 +46,10 @@ static int rtl8211f_ack_interrupt(struct phy_device *phydev) { int err; - phy_write(phydev, RTL8211F_PAGE_SELECT, 0xa43); + phy_write(phydev, RTL821x_PAGE_SELECT, 0xa43); err = phy_read(phydev, RTL8211F_INSR); /* restore to default page 0 */ - phy_write(phydev, RTL8211F_PAGE_SELECT, 0x0); + phy_write(phydev, RTL821x_PAGE_SELECT, 0x0); return (err < 0) ? err : 0; } @@ -102,7 +102,7 @@ static int rtl8211f_config_init(struct phy_device *phydev) if (ret < 0) return ret; - phy_write(phydev, RTL8211F_PAGE_SELECT, 0xd08); + phy_write(phydev, RTL821x_PAGE_SELECT, 0xd08); reg = phy_read(phydev, 0x11); /* enable TX-delay for rgmii-id and rgmii-txid, otherwise disable it */ @@ -114,7 +114,7 @@ static int rtl8211f_config_init(struct phy_device *phydev) phy_write(phydev, 0x11, reg); /* restore to default page 0 */ - phy_write(phydev, RTL8211F_PAGE_SELECT, 0x0); + phy_write(phydev, RTL821x_PAGE_SELECT, 0x0); return 0; } -- cgit v1.2.3 From 513588dd44b09bb5fdd5066a4fbc1e7443b86d1c Mon Sep 17 00:00:00 2001 From: Jassi Brar Date: Tue, 12 Sep 2017 18:54:36 +0900 Subject: net: phy: realtek: add RTL8201F phy-id and functions Add RTL8201F phy-id and the related functions to the driver. The original patch is as follows: https://patchwork.kernel.org/patch/2538341/ Signed-off-by: Jongsung Kim Signed-off-by: Jassi Brar Signed-off-by: Kunihiko Hayashi Reviewed-by: Andrew Lunn Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller --- drivers/net/phy/realtek.c | 44 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 44 insertions(+) diff --git a/drivers/net/phy/realtek.c b/drivers/net/phy/realtek.c index 99c3297c957e..d4670ecdb1ba 100644 --- a/drivers/net/phy/realtek.c +++ b/drivers/net/phy/realtek.c @@ -29,10 +29,22 @@ #define RTL8211F_INSR 0x1d #define RTL8211F_TX_DELAY 0x100 +#define RTL8201F_ISR 0x1e +#define RTL8201F_IER 0x13 + MODULE_DESCRIPTION("Realtek PHY driver"); MODULE_AUTHOR("Johnson Leung"); MODULE_LICENSE("GPL"); +static int rtl8201_ack_interrupt(struct phy_device *phydev) +{ + int err; + + err = phy_read(phydev, RTL8201F_ISR); + + return (err < 0) ? err : 0; +} + static int rtl821x_ack_interrupt(struct phy_device *phydev) { int err; @@ -54,6 +66,25 @@ static int rtl8211f_ack_interrupt(struct phy_device *phydev) return (err < 0) ? err : 0; } +static int rtl8201_config_intr(struct phy_device *phydev) +{ + int err; + + /* switch to page 7 */ + phy_write(phydev, RTL821x_PAGE_SELECT, 0x7); + + if (phydev->interrupts == PHY_INTERRUPT_ENABLED) + err = phy_write(phydev, RTL8201F_IER, + BIT(13) | BIT(12) | BIT(11)); + else + err = phy_write(phydev, RTL8201F_IER, 0); + + /* restore to default page 0 */ + phy_write(phydev, RTL821x_PAGE_SELECT, 0x0); + + return err; +} + static int rtl8211b_config_intr(struct phy_device *phydev) { int err; @@ -128,6 +159,18 @@ static struct phy_driver realtek_drvs[] = { .flags = PHY_HAS_INTERRUPT, .config_aneg = &genphy_config_aneg, .read_status = &genphy_read_status, + }, { + .phy_id = 0x001cc816, + .name = "RTL8201F 10/100Mbps Ethernet", + .phy_id_mask = 0x001fffff, + .features = PHY_BASIC_FEATURES, + .flags = PHY_HAS_INTERRUPT, + .config_aneg = &genphy_config_aneg, + .read_status = &genphy_read_status, + .ack_interrupt = &rtl8201_ack_interrupt, + .config_intr = &rtl8201_config_intr, + .suspend = genphy_suspend, + .resume = genphy_resume, }, { .phy_id = 0x001cc912, .name = "RTL8211B Gigabit Ethernet", @@ -181,6 +224,7 @@ static struct phy_driver realtek_drvs[] = { module_phy_driver(realtek_drvs); static struct mdio_device_id __maybe_unused realtek_tbl[] = { + { 0x001cc816, 0x001fffff }, { 0x001cc912, 0x001fffff }, { 0x001cc914, 0x001fffff }, { 0x001cc915, 0x001fffff }, -- cgit v1.2.3 From 785ec87483d1e24a012ecf642ee7d07c4118f142 Mon Sep 17 00:00:00 2001 From: Sergei Shtylyov Date: Tue, 12 Sep 2017 23:02:08 +0300 Subject: ravb: document R8A77970 bindings R-Car V3M (R8A77970) SoC also has the R-Car gen3 compatible EtherAVB device, so document the SoC specific bindings. Signed-off-by: Sergei Shtylyov Acked-by: Simon Horman Reviewed-by: Geert Uytterhoeven Signed-off-by: David S. Miller --- Documentation/devicetree/bindings/net/renesas,ravb.txt | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/Documentation/devicetree/bindings/net/renesas,ravb.txt b/Documentation/devicetree/bindings/net/renesas,ravb.txt index 16723535e1aa..2689211d324c 100644 --- a/Documentation/devicetree/bindings/net/renesas,ravb.txt +++ b/Documentation/devicetree/bindings/net/renesas,ravb.txt @@ -17,6 +17,7 @@ Required properties: - "renesas,etheravb-r8a7795" for the R8A7795 SoC. - "renesas,etheravb-r8a7796" for the R8A7796 SoC. + - "renesas,etheravb-r8a77970" for the R8A77970 SoC. - "renesas,etheravb-rcar-gen3" as a fallback for the above R-Car Gen3 devices. @@ -40,7 +41,7 @@ Optional properties: - interrupt-parent: the phandle for the interrupt controller that services interrupts for this device. - interrupt-names: A list of interrupt names. - For the R8A779[56] SoCs this property is mandatory; + For the R-Car Gen 3 SoCs this property is mandatory; it should include one entry per channel, named "ch%u", where %u is the channel number ranging from 0 to 24. For other SoCs this property is optional; if present -- cgit v1.2.3 From 7016e0627171878810798a842a416dddee4e3329 Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Wed, 13 Sep 2017 13:58:15 -0700 Subject: net: Convert int functions to bool Global function ipv6_rcv_saddr_equal and static functions ipv6_rcv_saddr_equal and ipv4_rcv_saddr_equal currently return int. bool is slightly more descriptive for these functions so change their return type from int to bool. Signed-off-by: Joe Perches Signed-off-by: David S. Miller --- include/net/addrconf.h | 4 ++-- net/ipv4/inet_connection_sock.c | 36 ++++++++++++++++++------------------ 2 files changed, 20 insertions(+), 20 deletions(-) diff --git a/include/net/addrconf.h b/include/net/addrconf.h index f44ff2476758..87981cd63180 100644 --- a/include/net/addrconf.h +++ b/include/net/addrconf.h @@ -94,8 +94,8 @@ int __ipv6_get_lladdr(struct inet6_dev *idev, struct in6_addr *addr, u32 banned_flags); int ipv6_get_lladdr(struct net_device *dev, struct in6_addr *addr, u32 banned_flags); -int inet_rcv_saddr_equal(const struct sock *sk, const struct sock *sk2, - bool match_wildcard); +bool inet_rcv_saddr_equal(const struct sock *sk, const struct sock *sk2, + bool match_wildcard); void addrconf_join_solict(struct net_device *dev, const struct in6_addr *addr); void addrconf_leave_solict(struct inet6_dev *idev, const struct in6_addr *addr); diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index b9c64b40a83a..9707372b78ed 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -39,11 +39,11 @@ EXPORT_SYMBOL(inet_csk_timer_bug_msg); * IPV6_ADDR_ANY only equals to IPV6_ADDR_ANY, * and 0.0.0.0 equals to 0.0.0.0 only */ -static int ipv6_rcv_saddr_equal(const struct in6_addr *sk1_rcv_saddr6, - const struct in6_addr *sk2_rcv_saddr6, - __be32 sk1_rcv_saddr, __be32 sk2_rcv_saddr, - bool sk1_ipv6only, bool sk2_ipv6only, - bool match_wildcard) +static bool ipv6_rcv_saddr_equal(const struct in6_addr *sk1_rcv_saddr6, + const struct in6_addr *sk2_rcv_saddr6, + __be32 sk1_rcv_saddr, __be32 sk2_rcv_saddr, + bool sk1_ipv6only, bool sk2_ipv6only, + bool match_wildcard) { int addr_type = ipv6_addr_type(sk1_rcv_saddr6); int addr_type2 = sk2_rcv_saddr6 ? ipv6_addr_type(sk2_rcv_saddr6) : IPV6_ADDR_MAPPED; @@ -52,29 +52,29 @@ static int ipv6_rcv_saddr_equal(const struct in6_addr *sk1_rcv_saddr6, if (addr_type == IPV6_ADDR_MAPPED && addr_type2 == IPV6_ADDR_MAPPED) { if (!sk2_ipv6only) { if (sk1_rcv_saddr == sk2_rcv_saddr) - return 1; + return true; if (!sk1_rcv_saddr || !sk2_rcv_saddr) return match_wildcard; } - return 0; + return false; } if (addr_type == IPV6_ADDR_ANY && addr_type2 == IPV6_ADDR_ANY) - return 1; + return true; if (addr_type2 == IPV6_ADDR_ANY && match_wildcard && !(sk2_ipv6only && addr_type == IPV6_ADDR_MAPPED)) - return 1; + return true; if (addr_type == IPV6_ADDR_ANY && match_wildcard && !(sk1_ipv6only && addr_type2 == IPV6_ADDR_MAPPED)) - return 1; + return true; if (sk2_rcv_saddr6 && ipv6_addr_equal(sk1_rcv_saddr6, sk2_rcv_saddr6)) - return 1; + return true; - return 0; + return false; } #endif @@ -82,20 +82,20 @@ static int ipv6_rcv_saddr_equal(const struct in6_addr *sk1_rcv_saddr6, * match_wildcard == false: addresses must be exactly the same, i.e. * 0.0.0.0 only equals to 0.0.0.0 */ -static int ipv4_rcv_saddr_equal(__be32 sk1_rcv_saddr, __be32 sk2_rcv_saddr, - bool sk2_ipv6only, bool match_wildcard) +static bool ipv4_rcv_saddr_equal(__be32 sk1_rcv_saddr, __be32 sk2_rcv_saddr, + bool sk2_ipv6only, bool match_wildcard) { if (!sk2_ipv6only) { if (sk1_rcv_saddr == sk2_rcv_saddr) - return 1; + return true; if (!sk1_rcv_saddr || !sk2_rcv_saddr) return match_wildcard; } - return 0; + return false; } -int inet_rcv_saddr_equal(const struct sock *sk, const struct sock *sk2, - bool match_wildcard) +bool inet_rcv_saddr_equal(const struct sock *sk, const struct sock *sk2, + bool match_wildcard) { #if IS_ENABLED(CONFIG_IPV6) if (sk->sk_family == AF_INET6) -- cgit v1.2.3 From f231c4178a655b09c1fe4dce4b09de7b867c20af Mon Sep 17 00:00:00 2001 From: Yoshihiro Shimoda Date: Thu, 14 Sep 2017 09:06:38 +0900 Subject: dt-bindings: net: renesas-ravb: Add support for R8A77995 RAVB Add a new compatible string for the R8A77995 (R-Car D3) RAVB. Signed-off-by: Yoshihiro Shimoda Acked-by: Geert Uytterhoeven Acked-by: Sergei Shtylyov Acked-by: Simon Horman Signed-off-by: David S. Miller --- Documentation/devicetree/bindings/net/renesas,ravb.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/Documentation/devicetree/bindings/net/renesas,ravb.txt b/Documentation/devicetree/bindings/net/renesas,ravb.txt index 2689211d324c..c902261893b9 100644 --- a/Documentation/devicetree/bindings/net/renesas,ravb.txt +++ b/Documentation/devicetree/bindings/net/renesas,ravb.txt @@ -18,6 +18,7 @@ Required properties: - "renesas,etheravb-r8a7795" for the R8A7795 SoC. - "renesas,etheravb-r8a7796" for the R8A7796 SoC. - "renesas,etheravb-r8a77970" for the R8A77970 SoC. + - "renesas,etheravb-r8a77995" for the R8A77995 SoC. - "renesas,etheravb-rcar-gen3" as a fallback for the above R-Car Gen3 devices. -- cgit v1.2.3 From a45b3faf16f0cbc4ec48f9ec81e550d430199212 Mon Sep 17 00:00:00 2001 From: Hans Wippel Date: Mon, 18 Sep 2017 21:18:14 +0200 Subject: s390/qeth: add basic VNICC support VNIC Characteristics (VNICC) are features of HiperSockets that define how packets are handled by the underlying network hardware. For example, if the VNICC flooding is configured on a qeth device, ethernet frames to unknown destination MAC addresses are received. Currently, there is support for seven VNICCs: flooding, multicast flooding, receive broadcast, learning, takeover learning, takeover setvmac, bridge invisible. Also, six IPA commands exist for configuring VNICCs on a qeth device: query characteristics, query commands, enable characteristic, disable characteristic, set timeout, get timeout. This patch adds the basic code infrastructure for VNICC support to qeth. It allows querying VNICC support from the underlying hardware. To this end, it adds: * basic message formats for IPA commands * basic data structures * basic error handling * query characteristics IPA command support The query characteristics IPA command allows requesting the currently supported and currently enabled VNIC characteristics from the underlying hardware. Support for the other IPA commands and for the configuration of VNICCs is added in follow-up patches together with the respective user interface functions. Signed-off-by: Hans Wippel Reviewed-by: Julian Wiedmann Signed-off-by: Julian Wiedmann Signed-off-by: David S. Miller --- drivers/s390/net/qeth_core.h | 7 ++ drivers/s390/net/qeth_core_mpc.c | 4 +- drivers/s390/net/qeth_core_mpc.h | 29 +++++++++ drivers/s390/net/qeth_l2_main.c | 136 ++++++++++++++++++++++++++++++++++++++- 4 files changed, 172 insertions(+), 4 deletions(-) diff --git a/drivers/s390/net/qeth_core.h b/drivers/s390/net/qeth_core.h index 59e09854c4f7..b96438df8fd4 100644 --- a/drivers/s390/net/qeth_core.h +++ b/drivers/s390/net/qeth_core.h @@ -182,6 +182,12 @@ struct qeth_sbp_info { __u32 reflect_promisc_primary:1; }; +struct qeth_vnicc_info { + /* supported/currently configured VNICCs; updated in IPA exchanges */ + u32 sup_chars; + u32 cur_chars; +}; + static inline int qeth_is_ipa_supported(struct qeth_ipa_info *ipa, enum qeth_ipa_funcs func) { @@ -673,6 +679,7 @@ struct qeth_card_options { struct qeth_routing_info route6; struct qeth_ipa_info ipa6; struct qeth_sbp_info sbp; /* SETBRIDGEPORT options */ + struct qeth_vnicc_info vnicc; /* VNICC options */ int fake_broadcast; int layer2; int performance_stats; diff --git a/drivers/s390/net/qeth_core_mpc.c b/drivers/s390/net/qeth_core_mpc.c index 6dd7d05e5693..5f8a2b834d39 100644 --- a/drivers/s390/net/qeth_core_mpc.c +++ b/drivers/s390/net/qeth_core_mpc.c @@ -167,7 +167,7 @@ static struct ipa_rc_msg qeth_ipa_rc_msg[] = { {IPA_RC_IP_TABLE_FULL, "Add Addr IP Table Full - ipv6"}, {IPA_RC_UNKNOWN_ERROR, "IPA command failed - reason unknown"}, {IPA_RC_UNSUPPORTED_COMMAND, "Command not supported"}, - {IPA_RC_TRACE_ALREADY_ACTIVE, "trace already active"}, + {IPA_RC_VNICC_OOSEQ, "Command issued out of sequence"}, {IPA_RC_INVALID_FORMAT, "invalid format or length"}, {IPA_RC_DUP_IPV6_REMOTE, "ipv6 address already registered remote"}, {IPA_RC_SBP_IQD_NOT_CONFIGURED, "Not configured for bridgeport"}, @@ -193,6 +193,7 @@ static struct ipa_rc_msg qeth_ipa_rc_msg[] = { {IPA_RC_L2_INVALID_VLAN_ID, "L2 invalid vlan id"}, {IPA_RC_L2_DUP_VLAN_ID, "L2 duplicate vlan id"}, {IPA_RC_L2_VLAN_ID_NOT_FOUND, "L2 vlan id not found"}, + {IPA_RC_VNICC_VNICBP, "VNIC is BridgePort"}, {IPA_RC_SBP_OSA_NOT_CONFIGURED, "Not configured for bridgeport"}, {IPA_RC_SBP_OSA_OS_MISMATCH, "OS mismatch"}, {IPA_RC_SBP_OSA_ANO_DEV_PRIMARY, "Primary bridgeport exists already"}, @@ -253,6 +254,7 @@ static struct ipa_cmd_names qeth_ipa_cmd_names[] = { {IPA_CMD_DELGMAC, "delgmac"}, {IPA_CMD_SETVLAN, "setvlan"}, {IPA_CMD_DELVLAN, "delvlan"}, + {IPA_CMD_VNICC, "vnic_characteristics"}, {IPA_CMD_SETBRIDGEPORT_OSA, "set_bridge_port(osa)"}, {IPA_CMD_SETCCID, "setccid"}, {IPA_CMD_DELCCID, "delccid"}, diff --git a/drivers/s390/net/qeth_core_mpc.h b/drivers/s390/net/qeth_core_mpc.h index 912e0107de8f..c13816b8c92f 100644 --- a/drivers/s390/net/qeth_core_mpc.h +++ b/drivers/s390/net/qeth_core_mpc.h @@ -90,6 +90,7 @@ enum qeth_ipa_cmds { IPA_CMD_DELGMAC = 0x24, IPA_CMD_SETVLAN = 0x25, IPA_CMD_DELVLAN = 0x26, + IPA_CMD_VNICC = 0x2a, IPA_CMD_SETBRIDGEPORT_OSA = 0x2b, IPA_CMD_SETCCID = 0x41, IPA_CMD_DELCCID = 0x42, @@ -165,6 +166,7 @@ enum qeth_ipa_return_codes { IPA_RC_L2_INVALID_VLAN_ID = 0x2015, IPA_RC_L2_DUP_VLAN_ID = 0x2016, IPA_RC_L2_VLAN_ID_NOT_FOUND = 0x2017, + IPA_RC_VNICC_VNICBP = 0x20B0, IPA_RC_SBP_OSA_NOT_CONFIGURED = 0x2B0C, IPA_RC_SBP_OSA_OS_MISMATCH = 0x2B10, IPA_RC_SBP_OSA_ANO_DEV_PRIMARY = 0x2B14, @@ -197,6 +199,9 @@ enum qeth_ipa_return_codes { IPA_RC_ENOMEM = 0xfffe, IPA_RC_FFFF = 0xffff }; +/* for VNIC Characteristics */ +#define IPA_RC_VNICC_OOSEQ 0x0005 + /* for SET_DIAGNOSTIC_ASSIST */ #define IPA_RC_INVALID_SUBCMD IPA_RC_IP_TABLE_FULL #define IPA_RC_HARDWARE_AUTH_ERROR IPA_RC_UNKNOWN_ERROR @@ -551,6 +556,29 @@ struct qeth_ipacmd_diagass { __u8 cdata[64]; } __attribute__ ((packed)); +/* VNIC Characteristics IPA Command: *****************************************/ +/* IPA commands/sub commands for VNICC */ +#define IPA_VNICC_QUERY_CHARS 0x00000000L + +/* VNICC header */ +struct qeth_ipacmd_vnicc_hdr { + u32 sup; + u32 cur; +}; + +/* VNICC sub command header */ +struct qeth_vnicc_sub_hdr { + u16 data_length; + u16 reserved; + u32 sub_command; +}; + +/* complete VNICC IPA command message */ +struct qeth_ipacmd_vnicc { + struct qeth_ipacmd_vnicc_hdr hdr; + struct qeth_vnicc_sub_hdr sub_hdr; +}; + /* SETBRIDGEPORT IPA Command: *********************************************/ enum qeth_ipa_sbp_cmd { IPA_SBP_QUERY_COMMANDS_SUPPORTED = 0x00000000L, @@ -692,6 +720,7 @@ struct qeth_ipa_cmd { struct qeth_ipacmd_diagass diagass; struct qeth_ipacmd_setbridgeport sbp; struct qeth_ipacmd_addr_change addrchange; + struct qeth_ipacmd_vnicc vnicc; } data; } __attribute__ ((packed)); diff --git a/drivers/s390/net/qeth_l2_main.c b/drivers/s390/net/qeth_l2_main.c index 760b023eae95..2fa273b40dd1 100644 --- a/drivers/s390/net/qeth_l2_main.c +++ b/drivers/s390/net/qeth_l2_main.c @@ -33,6 +33,7 @@ static void qeth_bridge_state_change(struct qeth_card *card, struct qeth_ipa_cmd *cmd); static void qeth_bridge_host_event(struct qeth_card *card, struct qeth_ipa_cmd *cmd); +static void qeth_l2_vnicc_init(struct qeth_card *card); static int qeth_l2_verify_dev(struct net_device *dev) { @@ -1045,9 +1046,14 @@ static int qeth_l2_start_ipassists(struct qeth_card *card) static void qeth_l2_trace_features(struct qeth_card *card) { - QETH_CARD_TEXT(card, 2, "l2featur"); + /* Set BridgePort features */ + QETH_CARD_TEXT(card, 2, "featuSBP"); QETH_CARD_HEX(card, 2, &card->options.sbp.supported_funcs, sizeof(card->options.sbp.supported_funcs)); + /* VNIC Characteristics features */ + QETH_CARD_TEXT(card, 2, "feaVNICC"); + QETH_CARD_HEX(card, 2, &card->options.vnicc.sup_chars, + sizeof(card->options.vnicc.sup_chars)); } static int __qeth_l2_set_online(struct ccwgroup_device *gdev, int recovery_mode) @@ -1072,8 +1078,6 @@ static int __qeth_l2_set_online(struct ccwgroup_device *gdev, int recovery_mode) if (card->options.sbp.supported_funcs) dev_info(&card->gdev->dev, "The device represents a Bridge Capable Port\n"); - qeth_trace_features(card); - qeth_l2_trace_features(card); if (!card->dev && qeth_l2_setup_netdev(card)) { rc = -ENODEV; @@ -1090,6 +1094,12 @@ static int __qeth_l2_set_online(struct ccwgroup_device *gdev, int recovery_mode) } else card->info.hwtrap = 0; + /* for the rx_bcast characteristic, init VNICC after setmac */ + qeth_l2_vnicc_init(card); + + qeth_trace_features(card); + qeth_l2_trace_features(card); + qeth_l2_setup_bridgeport_attrs(card); card->state = CARD_STATE_HARDSETUP; @@ -2039,6 +2049,126 @@ int qeth_bridgeport_an_set(struct qeth_card *card, int enable) } EXPORT_SYMBOL_GPL(qeth_bridgeport_an_set); +/* VNIC Characteristics support */ + +/* handle VNICC IPA command return codes; convert to error codes */ +static int qeth_l2_vnicc_makerc(struct qeth_card *card, int ipa_rc) +{ + int rc; + + switch (ipa_rc) { + case IPA_RC_SUCCESS: + return ipa_rc; + case IPA_RC_L2_UNSUPPORTED_CMD: + case IPA_RC_NOTSUPP: + rc = -EOPNOTSUPP; + break; + case IPA_RC_VNICC_OOSEQ: + rc = -EALREADY; + break; + case IPA_RC_VNICC_VNICBP: + rc = -EBUSY; + break; + case IPA_RC_L2_ADDR_TABLE_FULL: + rc = -ENOSPC; + break; + case IPA_RC_L2_MAC_NOT_AUTH_BY_ADP: + rc = -EACCES; + break; + default: + rc = -EIO; + } + + QETH_CARD_TEXT_(card, 2, "err%04x", ipa_rc); + return rc; +} + +/* generic VNICC request call back control */ +struct _qeth_l2_vnicc_request_cbctl { + u32 sub_cmd; +}; + +/* generic VNICC request call back */ +static int qeth_l2_vnicc_request_cb(struct qeth_card *card, + struct qeth_reply *reply, + unsigned long data) +{ + struct qeth_ipa_cmd *cmd = (struct qeth_ipa_cmd *) data; + struct qeth_ipacmd_vnicc *rep = &cmd->data.vnicc; + + QETH_CARD_TEXT(card, 2, "vniccrcb"); + if (cmd->hdr.return_code) + return 0; + /* return results to caller */ + card->options.vnicc.sup_chars = rep->hdr.sup; + card->options.vnicc.cur_chars = rep->hdr.cur; + + return 0; +} + +/* generic VNICC request */ +static int qeth_l2_vnicc_request(struct qeth_card *card, + struct _qeth_l2_vnicc_request_cbctl *cbctl) +{ + struct qeth_ipacmd_vnicc *req; + struct qeth_cmd_buffer *iob; + struct qeth_ipa_cmd *cmd; + int rc; + + QETH_CARD_TEXT(card, 2, "vniccreq"); + + /* get new buffer for request */ + iob = qeth_get_ipacmd_buffer(card, IPA_CMD_VNICC, 0); + if (!iob) + return -ENOMEM; + + /* create header for request */ + cmd = (struct qeth_ipa_cmd *)(iob->data + IPA_PDU_HEADER_SIZE); + req = &cmd->data.vnicc; + + /* create sub command header for request */ + req->sub_hdr.data_length = sizeof(req->sub_hdr); + req->sub_hdr.sub_command = cbctl->sub_cmd; + + /* create sub command specific request fields */ + switch (cbctl->sub_cmd) { + case IPA_VNICC_QUERY_CHARS: + break; + default: + qeth_release_buffer(iob->channel, iob); + return -EOPNOTSUPP; + } + + /* send request */ + rc = qeth_send_ipa_cmd(card, iob, qeth_l2_vnicc_request_cb, + (void *) cbctl); + + return qeth_l2_vnicc_makerc(card, rc); +} + +/* VNICC query VNIC characteristics request */ +static int qeth_l2_vnicc_query_chars(struct qeth_card *card) +{ + struct _qeth_l2_vnicc_request_cbctl cbctl; + + /* prepare callback control */ + cbctl.sub_cmd = IPA_VNICC_QUERY_CHARS; + + QETH_CARD_TEXT(card, 2, "vniccqch"); + return qeth_l2_vnicc_request(card, &cbctl); +} + +/* (re-)initialize VNICC */ +static void qeth_l2_vnicc_init(struct qeth_card *card) +{ + QETH_CARD_TEXT(card, 2, "vniccini"); + /* initial query and storage of VNIC characteristics */ + if (qeth_l2_vnicc_query_chars(card)) { + card->options.vnicc.sup_chars = 0; + card->options.vnicc.cur_chars = 0; + } +} + module_init(qeth_l2_init); module_exit(qeth_l2_exit); MODULE_AUTHOR("Frank Blaschka "); -- cgit v1.2.3 From caa1f0b10d18f31c5491f84cb2e68a5d2047f437 Mon Sep 17 00:00:00 2001 From: Hans Wippel Date: Mon, 18 Sep 2017 21:18:15 +0200 Subject: s390/qeth: add VNICC enable/disable support HiperSocket devices allow enabling and disabling so called VNIC Characteristics (VNICC) that influence how the underlying hardware handles packets. These VNICCs are: * Flooding VNICC: Flooding allows specifying if packets to unknown destination MAC addresses are received by the qeth device. * Multicast flooding VNICC: Multicast flooding allows specifying if packets to multicast MAC addresses are received by the qeth device. * Learning VNICC: If learning is enabled on a qeth device, the device learns the source MAC addresses of outgoing packets and incoming packets to those learned MAC addresses are received. * Takeover setvmac VNICC: If takeover setvmac is configured on a qeth device, the MAC address of this device can be configured on a different qeth device with the setvmac IPA command. * Takeover by learning VNICC: If takeover learning is enabled on a qeth device, the MAC address of this device can be learned (learning VNICC) on a different qeth device. * BridgePort invisible VNICC: If BridgePort invisible is enabled on a qeth device, (1) packets from this device are not sent to a BridgePort enabled qeth device and (2) packets coming from a BridgePort enabled qeth device are not received by this device. * Receive broadcast VNICC: Receive broadcast allows configuring if a qeth device receives packets with the broadcast destination MAC address. This patch adds support for the IPA commands that are required to enable and disable these VNIC characteristics on qeth devices. As a prerequisite, it also adds the query commands IPA command. The query commands IPA command allows requesting the supported commands for each characteristic from the underlying hardware. Additionally, this patch provides users with a sysfs user interface to enable/disable the VNICCs mentioned above. Signed-off-by: Hans Wippel Reviewed-by: Julian Wiedmann Signed-off-by: Julian Wiedmann Signed-off-by: David S. Miller --- drivers/s390/net/qeth_core.h | 6 ++ drivers/s390/net/qeth_core_mpc.h | 31 ++++++ drivers/s390/net/qeth_l2.h | 4 + drivers/s390/net/qeth_l2_main.c | 207 +++++++++++++++++++++++++++++++++++++++ drivers/s390/net/qeth_l2_sys.c | 158 +++++++++++++++++++++++++++--- 5 files changed, 392 insertions(+), 14 deletions(-) diff --git a/drivers/s390/net/qeth_core.h b/drivers/s390/net/qeth_core.h index b96438df8fd4..2236c0c9744a 100644 --- a/drivers/s390/net/qeth_core.h +++ b/drivers/s390/net/qeth_core.h @@ -186,6 +186,12 @@ struct qeth_vnicc_info { /* supported/currently configured VNICCs; updated in IPA exchanges */ u32 sup_chars; u32 cur_chars; + /* supported commands: bitmasks which VNICCs support respective cmd */ + u32 set_char_sup; + /* characteristics wanted/configured by user */ + u32 wanted_chars; + /* has user explicitly enabled rx_bcast while online? */ + bool rx_bcast_enabled; }; static inline int qeth_is_ipa_supported(struct qeth_ipa_info *ipa, diff --git a/drivers/s390/net/qeth_core_mpc.h b/drivers/s390/net/qeth_core_mpc.h index c13816b8c92f..7f67a81a2ae6 100644 --- a/drivers/s390/net/qeth_core_mpc.h +++ b/drivers/s390/net/qeth_core_mpc.h @@ -559,6 +559,22 @@ struct qeth_ipacmd_diagass { /* VNIC Characteristics IPA Command: *****************************************/ /* IPA commands/sub commands for VNICC */ #define IPA_VNICC_QUERY_CHARS 0x00000000L +#define IPA_VNICC_QUERY_CMDS 0x00000001L +#define IPA_VNICC_ENABLE 0x00000002L +#define IPA_VNICC_DISABLE 0x00000004L + +/* VNICC flags */ +#define QETH_VNICC_FLOODING 0x80000000 +#define QETH_VNICC_MCAST_FLOODING 0x40000000 +#define QETH_VNICC_LEARNING 0x20000000 +#define QETH_VNICC_TAKEOVER_SETVMAC 0x10000000 +#define QETH_VNICC_TAKEOVER_LEARNING 0x08000000 +#define QETH_VNICC_BRIDGE_INVISIBLE 0x04000000 +#define QETH_VNICC_RX_BCAST 0x02000000 + +/* VNICC default values */ +#define QETH_VNICC_ALL 0xff000000 +#define QETH_VNICC_DEFAULT QETH_VNICC_RX_BCAST /* VNICC header */ struct qeth_ipacmd_vnicc_hdr { @@ -573,10 +589,25 @@ struct qeth_vnicc_sub_hdr { u32 sub_command; }; +/* query supported commands for VNIC characteristic */ +struct qeth_vnicc_query_cmds { + u32 vnic_char; + u32 sup_cmds; +}; + +/* enable/disable VNIC characteristic */ +struct qeth_vnicc_set_char { + u32 vnic_char; +}; + /* complete VNICC IPA command message */ struct qeth_ipacmd_vnicc { struct qeth_ipacmd_vnicc_hdr hdr; struct qeth_vnicc_sub_hdr sub_hdr; + union { + struct qeth_vnicc_query_cmds query_cmds; + struct qeth_vnicc_set_char set_char; + }; }; /* SETBRIDGEPORT IPA Command: *********************************************/ diff --git a/drivers/s390/net/qeth_l2.h b/drivers/s390/net/qeth_l2.h index 0d59f9a45ea9..0619018c76f9 100644 --- a/drivers/s390/net/qeth_l2.h +++ b/drivers/s390/net/qeth_l2.h @@ -14,6 +14,10 @@ int qeth_l2_create_device_attributes(struct device *); void qeth_l2_remove_device_attributes(struct device *); void qeth_l2_setup_bridgeport_attrs(struct qeth_card *card); +int qeth_l2_vnicc_set_state(struct qeth_card *card, u32 vnicc, bool state); +int qeth_l2_vnicc_get_state(struct qeth_card *card, u32 vnicc, bool *state); +bool qeth_l2_vnicc_is_in_use(struct qeth_card *card); + struct qeth_mac { u8 mac_addr[OSA_ADDR_LEN]; u8 is_uc:1; diff --git a/drivers/s390/net/qeth_l2_main.c b/drivers/s390/net/qeth_l2_main.c index 2fa273b40dd1..36a7fd7255e3 100644 --- a/drivers/s390/net/qeth_l2_main.c +++ b/drivers/s390/net/qeth_l2_main.c @@ -33,6 +33,7 @@ static void qeth_bridge_state_change(struct qeth_card *card, struct qeth_ipa_cmd *cmd); static void qeth_bridge_host_event(struct qeth_card *card, struct qeth_ipa_cmd *cmd); +static void qeth_l2_vnicc_set_defaults(struct qeth_card *card); static void qeth_l2_vnicc_init(struct qeth_card *card); static int qeth_l2_verify_dev(struct net_device *dev) @@ -920,6 +921,7 @@ static int qeth_l2_probe_device(struct ccwgroup_device *gdev) hash_init(card->mac_htable); card->options.layer2 = 1; card->info.hwtrap = 0; + qeth_l2_vnicc_set_defaults(card); return 0; } @@ -2049,6 +2051,12 @@ int qeth_bridgeport_an_set(struct qeth_card *card, int enable) } EXPORT_SYMBOL_GPL(qeth_bridgeport_an_set); +static bool qeth_bridgeport_is_in_use(struct qeth_card *card) +{ + return (card->options.sbp.role || card->options.sbp.reflect_promisc || + card->options.sbp.hostnotification); +} + /* VNIC Characteristics support */ /* handle VNICC IPA command return codes; convert to error codes */ @@ -2086,6 +2094,12 @@ static int qeth_l2_vnicc_makerc(struct qeth_card *card, int ipa_rc) /* generic VNICC request call back control */ struct _qeth_l2_vnicc_request_cbctl { u32 sub_cmd; + struct { + u32 vnic_char; + } param; + struct { + u32 *sup_cmds; + } result; }; /* generic VNICC request call back */ @@ -2093,6 +2107,8 @@ static int qeth_l2_vnicc_request_cb(struct qeth_card *card, struct qeth_reply *reply, unsigned long data) { + struct _qeth_l2_vnicc_request_cbctl *cbctl = + (struct _qeth_l2_vnicc_request_cbctl *) reply->param; struct qeth_ipa_cmd *cmd = (struct qeth_ipa_cmd *) data; struct qeth_ipacmd_vnicc *rep = &cmd->data.vnicc; @@ -2103,6 +2119,9 @@ static int qeth_l2_vnicc_request_cb(struct qeth_card *card, card->options.vnicc.sup_chars = rep->hdr.sup; card->options.vnicc.cur_chars = rep->hdr.cur; + if (cbctl->sub_cmd == IPA_VNICC_QUERY_CMDS) + *cbctl->result.sup_cmds = rep->query_cmds.sup_cmds; + return 0; } @@ -2134,6 +2153,15 @@ static int qeth_l2_vnicc_request(struct qeth_card *card, switch (cbctl->sub_cmd) { case IPA_VNICC_QUERY_CHARS: break; + case IPA_VNICC_QUERY_CMDS: + req->sub_hdr.data_length += sizeof(req->query_cmds); + req->query_cmds.vnic_char = cbctl->param.vnic_char; + break; + case IPA_VNICC_ENABLE: + case IPA_VNICC_DISABLE: + req->sub_hdr.data_length += sizeof(req->set_char); + req->set_char.vnic_char = cbctl->param.vnic_char; + break; default: qeth_release_buffer(iob->channel, iob); return -EOPNOTSUPP; @@ -2158,15 +2186,194 @@ static int qeth_l2_vnicc_query_chars(struct qeth_card *card) return qeth_l2_vnicc_request(card, &cbctl); } +/* VNICC query sub commands request */ +static int qeth_l2_vnicc_query_cmds(struct qeth_card *card, u32 vnic_char, + u32 *sup_cmds) +{ + struct _qeth_l2_vnicc_request_cbctl cbctl; + + /* prepare callback control */ + cbctl.sub_cmd = IPA_VNICC_QUERY_CMDS; + cbctl.param.vnic_char = vnic_char; + cbctl.result.sup_cmds = sup_cmds; + + QETH_CARD_TEXT(card, 2, "vniccqcm"); + return qeth_l2_vnicc_request(card, &cbctl); +} + +/* VNICC enable/disable characteristic request */ +static int qeth_l2_vnicc_set_char(struct qeth_card *card, u32 vnic_char, + u32 cmd) +{ + struct _qeth_l2_vnicc_request_cbctl cbctl; + + /* prepare callback control */ + cbctl.sub_cmd = cmd; + cbctl.param.vnic_char = vnic_char; + + QETH_CARD_TEXT(card, 2, "vniccedc"); + return qeth_l2_vnicc_request(card, &cbctl); +} + +/* set current VNICC flag state; called from sysfs store function */ +int qeth_l2_vnicc_set_state(struct qeth_card *card, u32 vnicc, bool state) +{ + int rc = 0; + u32 cmd; + + QETH_CARD_TEXT(card, 2, "vniccsch"); + + /* do not change anything if BridgePort is enabled */ + if (qeth_bridgeport_is_in_use(card)) + return -EBUSY; + + /* check if characteristic and enable/disable are supported */ + if (!(card->options.vnicc.sup_chars & vnicc) || + !(card->options.vnicc.set_char_sup & vnicc)) + return -EOPNOTSUPP; + + /* set enable/disable command and store wanted characteristic */ + if (state) { + cmd = IPA_VNICC_ENABLE; + card->options.vnicc.wanted_chars |= vnicc; + } else { + cmd = IPA_VNICC_DISABLE; + card->options.vnicc.wanted_chars &= ~vnicc; + } + + /* do we need to do anything? */ + if (card->options.vnicc.cur_chars == card->options.vnicc.wanted_chars) + return rc; + + /* if card is not ready, simply stop here */ + if (!qeth_card_hw_is_reachable(card)) { + if (state) + card->options.vnicc.cur_chars |= vnicc; + else + card->options.vnicc.cur_chars &= ~vnicc; + return rc; + } + + rc = qeth_l2_vnicc_set_char(card, vnicc, cmd); + if (rc) + card->options.vnicc.wanted_chars = + card->options.vnicc.cur_chars; + else if (state && vnicc == QETH_VNICC_RX_BCAST) + card->options.vnicc.rx_bcast_enabled = true; + + return rc; +} + +/* get current VNICC flag state; called from sysfs show function */ +int qeth_l2_vnicc_get_state(struct qeth_card *card, u32 vnicc, bool *state) +{ + int rc = 0; + + QETH_CARD_TEXT(card, 2, "vniccgch"); + + /* do not get anything if BridgePort is enabled */ + if (qeth_bridgeport_is_in_use(card)) + return -EBUSY; + + /* check if characteristic is supported */ + if (!(card->options.vnicc.sup_chars & vnicc)) + return -EOPNOTSUPP; + + /* if card is ready, query current VNICC state */ + if (qeth_card_hw_is_reachable(card)) + rc = qeth_l2_vnicc_query_chars(card); + + *state = (card->options.vnicc.cur_chars & vnicc) ? true : false; + return rc; +} + +/* check if VNICC is currently enabled */ +bool qeth_l2_vnicc_is_in_use(struct qeth_card *card) +{ + /* if everything is turned off, VNICC is not active */ + if (!card->options.vnicc.cur_chars) + return false; + /* default values are only OK if rx_bcast was not enabled by user + * or the card is offline. + */ + if (card->options.vnicc.cur_chars == QETH_VNICC_DEFAULT) { + if (!card->options.vnicc.rx_bcast_enabled || + !qeth_card_hw_is_reachable(card)) + return false; + } + return true; +} + +/* recover user characteristic setting */ +static bool qeth_l2_vnicc_recover_char(struct qeth_card *card, u32 vnicc, + bool enable) +{ + u32 cmd = enable ? IPA_VNICC_ENABLE : IPA_VNICC_DISABLE; + + if (card->options.vnicc.sup_chars & vnicc && + card->options.vnicc.set_char_sup & vnicc && + !qeth_l2_vnicc_set_char(card, vnicc, cmd)) + return false; + card->options.vnicc.wanted_chars &= ~vnicc; + card->options.vnicc.wanted_chars |= QETH_VNICC_DEFAULT & vnicc; + return true; +} + /* (re-)initialize VNICC */ static void qeth_l2_vnicc_init(struct qeth_card *card) { + unsigned int chars_len, i; + unsigned long chars_tmp; + u32 sup_cmds, vnicc; + bool enable, error; + QETH_CARD_TEXT(card, 2, "vniccini"); + /* reset rx_bcast */ + card->options.vnicc.rx_bcast_enabled = 0; /* initial query and storage of VNIC characteristics */ if (qeth_l2_vnicc_query_chars(card)) { + if (card->options.vnicc.wanted_chars != QETH_VNICC_DEFAULT) + dev_err(&card->gdev->dev, "Configuring the VNIC characteristics failed\n"); + /* fail quietly if user didn't change the default config */ card->options.vnicc.sup_chars = 0; card->options.vnicc.cur_chars = 0; + card->options.vnicc.wanted_chars = QETH_VNICC_DEFAULT; + return; } + /* get supported commands for each supported characteristic */ + chars_tmp = card->options.vnicc.sup_chars; + chars_len = sizeof(card->options.vnicc.sup_chars) * BITS_PER_BYTE; + for_each_set_bit(i, &chars_tmp, chars_len) { + vnicc = BIT(i); + qeth_l2_vnicc_query_cmds(card, vnicc, &sup_cmds); + if (!(sup_cmds & IPA_VNICC_ENABLE) || + !(sup_cmds & IPA_VNICC_DISABLE)) + card->options.vnicc.set_char_sup &= ~vnicc; + } + /* enforce assumed default values and recover settings, if changed */ + error = false; + chars_tmp = card->options.vnicc.wanted_chars ^ QETH_VNICC_DEFAULT; + chars_tmp |= QETH_VNICC_BRIDGE_INVISIBLE; + chars_len = sizeof(card->options.vnicc.wanted_chars) * BITS_PER_BYTE; + for_each_set_bit(i, &chars_tmp, chars_len) { + vnicc = BIT(i); + enable = card->options.vnicc.wanted_chars & vnicc; + error |= qeth_l2_vnicc_recover_char(card, vnicc, enable); + } + if (error) + dev_err(&card->gdev->dev, "Configuring the VNIC characteristics failed\n"); +} + +/* configure default values of VNIC characteristics */ +static void qeth_l2_vnicc_set_defaults(struct qeth_card *card) +{ + /* characteristics values */ + card->options.vnicc.sup_chars = QETH_VNICC_ALL; + card->options.vnicc.cur_chars = QETH_VNICC_DEFAULT; + /* supported commands */ + card->options.vnicc.set_char_sup = QETH_VNICC_ALL; + /* settings wanted by users */ + card->options.vnicc.wanted_chars = QETH_VNICC_DEFAULT; } module_init(qeth_l2_init); diff --git a/drivers/s390/net/qeth_l2_sys.c b/drivers/s390/net/qeth_l2_sys.c index 9696baa49e2d..01936f7dbe7e 100644 --- a/drivers/s390/net/qeth_l2_sys.c +++ b/drivers/s390/net/qeth_l2_sys.c @@ -20,6 +20,9 @@ static ssize_t qeth_bridge_port_role_state_show(struct device *dev, if (!card) return -EINVAL; + if (qeth_l2_vnicc_is_in_use(card)) + return sprintf(buf, "n/a (VNIC characteristics)\n"); + if (qeth_card_hw_is_reachable(card) && card->options.sbp.supported_funcs) rc = qeth_bridgeport_query_ports(card, @@ -60,6 +63,11 @@ static ssize_t qeth_bridge_port_role_state_show(struct device *dev, static ssize_t qeth_bridge_port_role_show(struct device *dev, struct device_attribute *attr, char *buf) { + struct qeth_card *card = dev_get_drvdata(dev); + + if (qeth_l2_vnicc_is_in_use(card)) + return sprintf(buf, "n/a (VNIC characteristics)\n"); + return qeth_bridge_port_role_state_show(dev, attr, buf, 0); } @@ -83,7 +91,10 @@ static ssize_t qeth_bridge_port_role_store(struct device *dev, mutex_lock(&card->conf_mutex); - if (card->options.sbp.reflect_promisc) /* Forbid direct manipulation */ + if (qeth_l2_vnicc_is_in_use(card)) + rc = -EBUSY; + else if (card->options.sbp.reflect_promisc) + /* Forbid direct manipulation */ rc = -EPERM; else if (qeth_card_hw_is_reachable(card)) { rc = qeth_bridgeport_setrole(card, role); @@ -103,6 +114,11 @@ static DEVICE_ATTR(bridge_role, 0644, qeth_bridge_port_role_show, static ssize_t qeth_bridge_port_state_show(struct device *dev, struct device_attribute *attr, char *buf) { + struct qeth_card *card = dev_get_drvdata(dev); + + if (qeth_l2_vnicc_is_in_use(card)) + return sprintf(buf, "n/a (VNIC characteristics)\n"); + return qeth_bridge_port_role_state_show(dev, attr, buf, 1); } @@ -118,6 +134,9 @@ static ssize_t qeth_bridgeport_hostnotification_show(struct device *dev, if (!card) return -EINVAL; + if (qeth_l2_vnicc_is_in_use(card)) + return sprintf(buf, "n/a (VNIC characteristics)\n"); + enabled = card->options.sbp.hostnotification; return sprintf(buf, "%d\n", enabled); @@ -142,7 +161,9 @@ static ssize_t qeth_bridgeport_hostnotification_store(struct device *dev, mutex_lock(&card->conf_mutex); - if (qeth_card_hw_is_reachable(card)) { + if (qeth_l2_vnicc_is_in_use(card)) + rc = -EBUSY; + else if (qeth_card_hw_is_reachable(card)) { rc = qeth_bridgeport_an_set(card, enable); if (!rc) card->options.sbp.hostnotification = enable; @@ -167,6 +188,9 @@ static ssize_t qeth_bridgeport_reflect_show(struct device *dev, if (!card) return -EINVAL; + if (qeth_l2_vnicc_is_in_use(card)) + return sprintf(buf, "n/a (VNIC characteristics)\n"); + if (card->options.sbp.reflect_promisc) { if (card->options.sbp.reflect_promisc_primary) state = "primary"; @@ -202,7 +226,9 @@ static ssize_t qeth_bridgeport_reflect_store(struct device *dev, mutex_lock(&card->conf_mutex); - if (card->options.sbp.role != QETH_SBP_ROLE_NONE) + if (qeth_l2_vnicc_is_in_use(card)) + rc = -EBUSY; + else if (card->options.sbp.role != QETH_SBP_ROLE_NONE) rc = -EPERM; else { card->options.sbp.reflect_promisc = enable; @@ -231,16 +257,6 @@ static struct attribute_group qeth_l2_bridgeport_attr_group = { .attrs = qeth_l2_bridgeport_attrs, }; -int qeth_l2_create_device_attributes(struct device *dev) -{ - return sysfs_create_group(&dev->kobj, &qeth_l2_bridgeport_attr_group); -} - -void qeth_l2_remove_device_attributes(struct device *dev) -{ - sysfs_remove_group(&dev->kobj, &qeth_l2_bridgeport_attr_group); -} - /** * qeth_l2_setup_bridgeport_attrs() - set/restore attrs when turning online. * @card: qeth_card structure pointer @@ -270,10 +286,124 @@ void qeth_l2_setup_bridgeport_attrs(struct qeth_card *card) qeth_bridgeport_an_set(card, 0); } +/* VNIC CHARS support */ + +/* convert sysfs attr name to VNIC characteristic */ +static u32 qeth_l2_vnicc_sysfs_attr_to_char(const char *attr_name) +{ + if (sysfs_streq(attr_name, "flooding")) + return QETH_VNICC_FLOODING; + else if (sysfs_streq(attr_name, "mcast_flooding")) + return QETH_VNICC_MCAST_FLOODING; + else if (sysfs_streq(attr_name, "learning")) + return QETH_VNICC_LEARNING; + else if (sysfs_streq(attr_name, "takeover_setvmac")) + return QETH_VNICC_TAKEOVER_SETVMAC; + else if (sysfs_streq(attr_name, "takeover_learning")) + return QETH_VNICC_TAKEOVER_LEARNING; + else if (sysfs_streq(attr_name, "bridge_invisible")) + return QETH_VNICC_BRIDGE_INVISIBLE; + else if (sysfs_streq(attr_name, "rx_bcast")) + return QETH_VNICC_RX_BCAST; + + return 0; +} + +/* get current setting of characteristic */ +static ssize_t qeth_vnicc_char_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct qeth_card *card = dev_get_drvdata(dev); + bool state; + u32 vnicc; + int rc; + + if (!card) + return -EINVAL; + + vnicc = qeth_l2_vnicc_sysfs_attr_to_char(attr->attr.name); + rc = qeth_l2_vnicc_get_state(card, vnicc, &state); + + if (rc == -EBUSY) + return sprintf(buf, "n/a (BridgePort)\n"); + if (rc == -EOPNOTSUPP) + return sprintf(buf, "n/a\n"); + return rc ? rc : sprintf(buf, "%d\n", state); +} + +/* change setting of characteristic */ +static ssize_t qeth_vnicc_char_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + struct qeth_card *card = dev_get_drvdata(dev); + bool state; + u32 vnicc; + int rc; + + if (!card) + return -EINVAL; + + if (kstrtobool(buf, &state)) + return -EINVAL; + + vnicc = qeth_l2_vnicc_sysfs_attr_to_char(attr->attr.name); + mutex_lock(&card->conf_mutex); + rc = qeth_l2_vnicc_set_state(card, vnicc, state); + mutex_unlock(&card->conf_mutex); + + return rc ? rc : count; +} + +static DEVICE_ATTR(flooding, 0644, qeth_vnicc_char_show, qeth_vnicc_char_store); +static DEVICE_ATTR(mcast_flooding, 0644, qeth_vnicc_char_show, + qeth_vnicc_char_store); +static DEVICE_ATTR(learning, 0644, qeth_vnicc_char_show, qeth_vnicc_char_store); +static DEVICE_ATTR(takeover_setvmac, 0644, qeth_vnicc_char_show, + qeth_vnicc_char_store); +static DEVICE_ATTR(takeover_learning, 0644, qeth_vnicc_char_show, + qeth_vnicc_char_store); +static DEVICE_ATTR(bridge_invisible, 0644, qeth_vnicc_char_show, + qeth_vnicc_char_store); +static DEVICE_ATTR(rx_bcast, 0644, qeth_vnicc_char_show, qeth_vnicc_char_store); + +static struct attribute *qeth_l2_vnicc_attrs[] = { + &dev_attr_flooding.attr, + &dev_attr_mcast_flooding.attr, + &dev_attr_learning.attr, + &dev_attr_takeover_setvmac.attr, + &dev_attr_takeover_learning.attr, + &dev_attr_bridge_invisible.attr, + &dev_attr_rx_bcast.attr, + NULL, +}; + +static struct attribute_group qeth_l2_vnicc_attr_group = { + .attrs = qeth_l2_vnicc_attrs, + .name = "vnicc", +}; + +static const struct attribute_group *qeth_l2_only_attr_groups[] = { + &qeth_l2_bridgeport_attr_group, + &qeth_l2_vnicc_attr_group, + NULL, +}; + +int qeth_l2_create_device_attributes(struct device *dev) +{ + return sysfs_create_groups(&dev->kobj, qeth_l2_only_attr_groups); +} + +void qeth_l2_remove_device_attributes(struct device *dev) +{ + sysfs_remove_groups(&dev->kobj, qeth_l2_only_attr_groups); +} + const struct attribute_group *qeth_l2_attr_groups[] = { &qeth_device_attr_group, &qeth_device_blkt_group, - /* l2 specific, see l2_{create,remove}_device_attributes(): */ + /* l2 specific, see qeth_l2_only_attr_groups: */ &qeth_l2_bridgeport_attr_group, + &qeth_l2_vnicc_attr_group, NULL, }; -- cgit v1.2.3 From 349d13d5ab58668ab7c8fadadf292430170c919e Mon Sep 17 00:00:00 2001 From: Hans Wippel Date: Mon, 18 Sep 2017 21:18:16 +0200 Subject: s390/qeth: add VNICC get/set timeout support HiperSockets allow configuring so called VNIC Characteristics (VNICC) that influence how the underlying hardware handles packets. For VNICCs, additional commands for getting and setting timeouts are available. Currently, the learning VNICC uses these commands. * Learning VNICC: If learning is enabled on a qeth device, the device learns the source MAC addresses of outgoing packets and incoming packets to those learned MAC addresses are received. For learning, the timeout specifies the idle period in seconds, after which the underlying hardware removes a learned MAC address again. This patch adds support for the IPA commands that are required to get and set the current timeout values for the learning VNIC characteristic. Also, it introduces the sysfs interface that allows users to configure the timeout. Signed-off-by: Hans Wippel Reviewed-by: Julian Wiedmann Signed-off-by: Julian Wiedmann Signed-off-by: David S. Miller --- drivers/s390/net/qeth_core.h | 3 + drivers/s390/net/qeth_core_mpc.h | 11 ++++ drivers/s390/net/qeth_l2.h | 2 + drivers/s390/net/qeth_l2_main.c | 135 +++++++++++++++++++++++++++++++++++++-- drivers/s390/net/qeth_l2_sys.c | 44 +++++++++++++ 5 files changed, 190 insertions(+), 5 deletions(-) diff --git a/drivers/s390/net/qeth_core.h b/drivers/s390/net/qeth_core.h index 2236c0c9744a..e3d3609cd9e7 100644 --- a/drivers/s390/net/qeth_core.h +++ b/drivers/s390/net/qeth_core.h @@ -188,6 +188,9 @@ struct qeth_vnicc_info { u32 cur_chars; /* supported commands: bitmasks which VNICCs support respective cmd */ u32 set_char_sup; + u32 getset_timeout_sup; + /* timeout value for the learning characteristic */ + u32 learning_timeout; /* characteristics wanted/configured by user */ u32 wanted_chars; /* has user explicitly enabled rx_bcast while online? */ diff --git a/drivers/s390/net/qeth_core_mpc.h b/drivers/s390/net/qeth_core_mpc.h index 7f67a81a2ae6..2f1f0da3d089 100644 --- a/drivers/s390/net/qeth_core_mpc.h +++ b/drivers/s390/net/qeth_core_mpc.h @@ -562,6 +562,8 @@ struct qeth_ipacmd_diagass { #define IPA_VNICC_QUERY_CMDS 0x00000001L #define IPA_VNICC_ENABLE 0x00000002L #define IPA_VNICC_DISABLE 0x00000004L +#define IPA_VNICC_SET_TIMEOUT 0x00000008L +#define IPA_VNICC_GET_TIMEOUT 0x00000010L /* VNICC flags */ #define QETH_VNICC_FLOODING 0x80000000 @@ -575,6 +577,8 @@ struct qeth_ipacmd_diagass { /* VNICC default values */ #define QETH_VNICC_ALL 0xff000000 #define QETH_VNICC_DEFAULT QETH_VNICC_RX_BCAST +/* default VNICC timeout in seconds */ +#define QETH_VNICC_DEFAULT_TIMEOUT 600 /* VNICC header */ struct qeth_ipacmd_vnicc_hdr { @@ -600,6 +604,12 @@ struct qeth_vnicc_set_char { u32 vnic_char; }; +/* get/set timeout for VNIC characteristic */ +struct qeth_vnicc_getset_timeout { + u32 vnic_char; + u32 timeout; +}; + /* complete VNICC IPA command message */ struct qeth_ipacmd_vnicc { struct qeth_ipacmd_vnicc_hdr hdr; @@ -607,6 +617,7 @@ struct qeth_ipacmd_vnicc { union { struct qeth_vnicc_query_cmds query_cmds; struct qeth_vnicc_set_char set_char; + struct qeth_vnicc_getset_timeout getset_timeout; }; }; diff --git a/drivers/s390/net/qeth_l2.h b/drivers/s390/net/qeth_l2.h index 0619018c76f9..241df6b98ab4 100644 --- a/drivers/s390/net/qeth_l2.h +++ b/drivers/s390/net/qeth_l2.h @@ -16,6 +16,8 @@ void qeth_l2_setup_bridgeport_attrs(struct qeth_card *card); int qeth_l2_vnicc_set_state(struct qeth_card *card, u32 vnicc, bool state); int qeth_l2_vnicc_get_state(struct qeth_card *card, u32 vnicc, bool *state); +int qeth_l2_vnicc_set_timeout(struct qeth_card *card, u32 timeout); +int qeth_l2_vnicc_get_timeout(struct qeth_card *card, u32 *timeout); bool qeth_l2_vnicc_is_in_use(struct qeth_card *card); struct qeth_mac { diff --git a/drivers/s390/net/qeth_l2_main.c b/drivers/s390/net/qeth_l2_main.c index 36a7fd7255e3..25a0f381bcd5 100644 --- a/drivers/s390/net/qeth_l2_main.c +++ b/drivers/s390/net/qeth_l2_main.c @@ -35,6 +35,8 @@ static void qeth_bridge_host_event(struct qeth_card *card, struct qeth_ipa_cmd *cmd); static void qeth_l2_vnicc_set_defaults(struct qeth_card *card); static void qeth_l2_vnicc_init(struct qeth_card *card); +static bool qeth_l2_vnicc_recover_timeout(struct qeth_card *card, u32 vnicc, + u32 *timeout); static int qeth_l2_verify_dev(struct net_device *dev) { @@ -2096,9 +2098,13 @@ struct _qeth_l2_vnicc_request_cbctl { u32 sub_cmd; struct { u32 vnic_char; + u32 timeout; } param; struct { - u32 *sup_cmds; + union{ + u32 *sup_cmds; + u32 *timeout; + }; } result; }; @@ -2122,6 +2128,9 @@ static int qeth_l2_vnicc_request_cb(struct qeth_card *card, if (cbctl->sub_cmd == IPA_VNICC_QUERY_CMDS) *cbctl->result.sup_cmds = rep->query_cmds.sup_cmds; + if (cbctl->sub_cmd == IPA_VNICC_GET_TIMEOUT) + *cbctl->result.timeout = rep->getset_timeout.timeout; + return 0; } @@ -2162,6 +2171,13 @@ static int qeth_l2_vnicc_request(struct qeth_card *card, req->sub_hdr.data_length += sizeof(req->set_char); req->set_char.vnic_char = cbctl->param.vnic_char; break; + case IPA_VNICC_SET_TIMEOUT: + req->getset_timeout.timeout = cbctl->param.timeout; + /* fallthrough */ + case IPA_VNICC_GET_TIMEOUT: + req->sub_hdr.data_length += sizeof(req->getset_timeout); + req->getset_timeout.vnic_char = cbctl->param.vnic_char; + break; default: qeth_release_buffer(iob->channel, iob); return -EOPNOTSUPP; @@ -2215,6 +2231,24 @@ static int qeth_l2_vnicc_set_char(struct qeth_card *card, u32 vnic_char, return qeth_l2_vnicc_request(card, &cbctl); } +/* VNICC get/set timeout for characteristic request */ +static int qeth_l2_vnicc_getset_timeout(struct qeth_card *card, u32 vnicc, + u32 cmd, u32 *timeout) +{ + struct _qeth_l2_vnicc_request_cbctl cbctl; + + /* prepare callback control */ + cbctl.sub_cmd = cmd; + cbctl.param.vnic_char = vnicc; + if (cmd == IPA_VNICC_SET_TIMEOUT) + cbctl.param.timeout = *timeout; + if (cmd == IPA_VNICC_GET_TIMEOUT) + cbctl.result.timeout = timeout; + + QETH_CARD_TEXT(card, 2, "vniccgst"); + return qeth_l2_vnicc_request(card, &cbctl); +} + /* set current VNICC flag state; called from sysfs store function */ int qeth_l2_vnicc_set_state(struct qeth_card *card, u32 vnicc, bool state) { @@ -2258,8 +2292,14 @@ int qeth_l2_vnicc_set_state(struct qeth_card *card, u32 vnicc, bool state) if (rc) card->options.vnicc.wanted_chars = card->options.vnicc.cur_chars; - else if (state && vnicc == QETH_VNICC_RX_BCAST) - card->options.vnicc.rx_bcast_enabled = true; + else { + /* successful online VNICC change; handle special cases */ + if (state && vnicc == QETH_VNICC_RX_BCAST) + card->options.vnicc.rx_bcast_enabled = true; + if (!state && vnicc == QETH_VNICC_LEARNING) + qeth_l2_vnicc_recover_timeout(card, vnicc, + &card->options.vnicc.learning_timeout); + } return rc; } @@ -2287,6 +2327,70 @@ int qeth_l2_vnicc_get_state(struct qeth_card *card, u32 vnicc, bool *state) return rc; } +/* set VNICC timeout; called from sysfs store function. Currently, only learning + * supports timeout + */ +int qeth_l2_vnicc_set_timeout(struct qeth_card *card, u32 timeout) +{ + int rc = 0; + + QETH_CARD_TEXT(card, 2, "vniccsto"); + + /* do not change anything if BridgePort is enabled */ + if (qeth_bridgeport_is_in_use(card)) + return -EBUSY; + + /* check if characteristic and set_timeout are supported */ + if (!(card->options.vnicc.sup_chars & QETH_VNICC_LEARNING) || + !(card->options.vnicc.getset_timeout_sup & QETH_VNICC_LEARNING)) + return -EOPNOTSUPP; + + /* do we need to do anything? */ + if (card->options.vnicc.learning_timeout == timeout) + return rc; + + /* if card is not ready, simply store the value internally and return */ + if (!qeth_card_hw_is_reachable(card)) { + card->options.vnicc.learning_timeout = timeout; + return rc; + } + + /* send timeout value to card; if successful, store value internally */ + rc = qeth_l2_vnicc_getset_timeout(card, QETH_VNICC_LEARNING, + IPA_VNICC_SET_TIMEOUT, &timeout); + if (!rc) + card->options.vnicc.learning_timeout = timeout; + + return rc; +} + +/* get current VNICC timeout; called from sysfs show function. Currently, only + * learning supports timeout + */ +int qeth_l2_vnicc_get_timeout(struct qeth_card *card, u32 *timeout) +{ + int rc = 0; + + QETH_CARD_TEXT(card, 2, "vniccgto"); + + /* do not get anything if BridgePort is enabled */ + if (qeth_bridgeport_is_in_use(card)) + return -EBUSY; + + /* check if characteristic and get_timeout are supported */ + if (!(card->options.vnicc.sup_chars & QETH_VNICC_LEARNING) || + !(card->options.vnicc.getset_timeout_sup & QETH_VNICC_LEARNING)) + return -EOPNOTSUPP; + /* if card is ready, get timeout. Otherwise, just return stored value */ + *timeout = card->options.vnicc.learning_timeout; + if (qeth_card_hw_is_reachable(card)) + rc = qeth_l2_vnicc_getset_timeout(card, QETH_VNICC_LEARNING, + IPA_VNICC_GET_TIMEOUT, + timeout); + + return rc; +} + /* check if VNICC is currently enabled */ bool qeth_l2_vnicc_is_in_use(struct qeth_card *card) { @@ -2304,6 +2408,19 @@ bool qeth_l2_vnicc_is_in_use(struct qeth_card *card) return true; } +/* recover user timeout setting */ +static bool qeth_l2_vnicc_recover_timeout(struct qeth_card *card, u32 vnicc, + u32 *timeout) +{ + if (card->options.vnicc.sup_chars & vnicc && + card->options.vnicc.getset_timeout_sup & vnicc && + !qeth_l2_vnicc_getset_timeout(card, vnicc, IPA_VNICC_SET_TIMEOUT, + timeout)) + return false; + *timeout = QETH_VNICC_DEFAULT_TIMEOUT; + return true; +} + /* recover user characteristic setting */ static bool qeth_l2_vnicc_recover_char(struct qeth_card *card, u32 vnicc, bool enable) @@ -2322,6 +2439,7 @@ static bool qeth_l2_vnicc_recover_char(struct qeth_card *card, u32 vnicc, /* (re-)initialize VNICC */ static void qeth_l2_vnicc_init(struct qeth_card *card) { + u32 *timeout = &card->options.vnicc.learning_timeout; unsigned int chars_len, i; unsigned long chars_tmp; u32 sup_cmds, vnicc; @@ -2332,7 +2450,8 @@ static void qeth_l2_vnicc_init(struct qeth_card *card) card->options.vnicc.rx_bcast_enabled = 0; /* initial query and storage of VNIC characteristics */ if (qeth_l2_vnicc_query_chars(card)) { - if (card->options.vnicc.wanted_chars != QETH_VNICC_DEFAULT) + if (card->options.vnicc.wanted_chars != QETH_VNICC_DEFAULT || + *timeout != QETH_VNICC_DEFAULT_TIMEOUT) dev_err(&card->gdev->dev, "Configuring the VNIC characteristics failed\n"); /* fail quietly if user didn't change the default config */ card->options.vnicc.sup_chars = 0; @@ -2346,12 +2465,16 @@ static void qeth_l2_vnicc_init(struct qeth_card *card) for_each_set_bit(i, &chars_tmp, chars_len) { vnicc = BIT(i); qeth_l2_vnicc_query_cmds(card, vnicc, &sup_cmds); + if (!(sup_cmds & IPA_VNICC_SET_TIMEOUT) || + !(sup_cmds & IPA_VNICC_GET_TIMEOUT)) + card->options.vnicc.getset_timeout_sup &= ~vnicc; if (!(sup_cmds & IPA_VNICC_ENABLE) || !(sup_cmds & IPA_VNICC_DISABLE)) card->options.vnicc.set_char_sup &= ~vnicc; } /* enforce assumed default values and recover settings, if changed */ - error = false; + error = qeth_l2_vnicc_recover_timeout(card, QETH_VNICC_LEARNING, + timeout); chars_tmp = card->options.vnicc.wanted_chars ^ QETH_VNICC_DEFAULT; chars_tmp |= QETH_VNICC_BRIDGE_INVISIBLE; chars_len = sizeof(card->options.vnicc.wanted_chars) * BITS_PER_BYTE; @@ -2370,8 +2493,10 @@ static void qeth_l2_vnicc_set_defaults(struct qeth_card *card) /* characteristics values */ card->options.vnicc.sup_chars = QETH_VNICC_ALL; card->options.vnicc.cur_chars = QETH_VNICC_DEFAULT; + card->options.vnicc.learning_timeout = QETH_VNICC_DEFAULT_TIMEOUT; /* supported commands */ card->options.vnicc.set_char_sup = QETH_VNICC_ALL; + card->options.vnicc.getset_timeout_sup = QETH_VNICC_LEARNING; /* settings wanted by users */ card->options.vnicc.wanted_chars = QETH_VNICC_DEFAULT; } diff --git a/drivers/s390/net/qeth_l2_sys.c b/drivers/s390/net/qeth_l2_sys.c index 01936f7dbe7e..4608daedb204 100644 --- a/drivers/s390/net/qeth_l2_sys.c +++ b/drivers/s390/net/qeth_l2_sys.c @@ -309,6 +309,47 @@ static u32 qeth_l2_vnicc_sysfs_attr_to_char(const char *attr_name) return 0; } +/* get current timeout setting */ +static ssize_t qeth_vnicc_timeout_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct qeth_card *card = dev_get_drvdata(dev); + u32 timeout; + int rc; + + if (!card) + return -EINVAL; + + rc = qeth_l2_vnicc_get_timeout(card, &timeout); + if (rc == -EBUSY) + return sprintf(buf, "n/a (BridgePort)\n"); + if (rc == -EOPNOTSUPP) + return sprintf(buf, "n/a\n"); + return rc ? rc : sprintf(buf, "%d\n", timeout); +} + +/* change timeout setting */ +static ssize_t qeth_vnicc_timeout_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + struct qeth_card *card = dev_get_drvdata(dev); + u32 timeout; + int rc; + + if (!card) + return -EINVAL; + + rc = kstrtou32(buf, 10, &timeout); + if (rc) + return rc; + + mutex_lock(&card->conf_mutex); + rc = qeth_l2_vnicc_set_timeout(card, timeout); + mutex_unlock(&card->conf_mutex); + return rc ? rc : count; +} + /* get current setting of characteristic */ static ssize_t qeth_vnicc_char_show(struct device *dev, struct device_attribute *attr, char *buf) @@ -359,6 +400,8 @@ static DEVICE_ATTR(flooding, 0644, qeth_vnicc_char_show, qeth_vnicc_char_store); static DEVICE_ATTR(mcast_flooding, 0644, qeth_vnicc_char_show, qeth_vnicc_char_store); static DEVICE_ATTR(learning, 0644, qeth_vnicc_char_show, qeth_vnicc_char_store); +static DEVICE_ATTR(learning_timeout, 0644, qeth_vnicc_timeout_show, + qeth_vnicc_timeout_store); static DEVICE_ATTR(takeover_setvmac, 0644, qeth_vnicc_char_show, qeth_vnicc_char_store); static DEVICE_ATTR(takeover_learning, 0644, qeth_vnicc_char_show, @@ -371,6 +414,7 @@ static struct attribute *qeth_l2_vnicc_attrs[] = { &dev_attr_flooding.attr, &dev_attr_mcast_flooding.attr, &dev_attr_learning.attr, + &dev_attr_learning_timeout.attr, &dev_attr_takeover_setvmac.attr, &dev_attr_takeover_learning.attr, &dev_attr_bridge_invisible.attr, -- cgit v1.2.3 From 9627923062b6a5c40964b188bb222ec8c02fd2d6 Mon Sep 17 00:00:00 2001 From: Julian Wiedmann Date: Mon, 18 Sep 2017 21:18:17 +0200 Subject: s390/qeth: remove unused code in qdio_establish_cq() Storing the number of input buffers into 'i' has no effect, it is immediately re-assigned in the next line. Signed-off-by: Julian Wiedmann Reviewed-by: Ursula Braun Signed-off-by: David S. Miller --- drivers/s390/net/qeth_core_main.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/s390/net/qeth_core_main.c b/drivers/s390/net/qeth_core_main.c index bae7440abc01..77032801e30b 100644 --- a/drivers/s390/net/qeth_core_main.c +++ b/drivers/s390/net/qeth_core_main.c @@ -4923,7 +4923,6 @@ static void qeth_qdio_establish_cq(struct qeth_card *card, if (card->options.cq == QETH_CQ_ENABLED) { int offset = QDIO_MAX_BUFFERS_PER_Q * (card->qdio.no_in_queues - 1); - i = QDIO_MAX_BUFFERS_PER_Q * (card->qdio.no_in_queues - 1); for (i = 0; i < QDIO_MAX_BUFFERS_PER_Q; ++i) { in_sbal_ptrs[offset + i] = (struct qdio_buffer *) virt_to_phys(card->qdio.c_q->bufs[i].buffer); -- cgit v1.2.3 From 7c2e9ba373264b29a35d6df06cc5fb125d189b90 Mon Sep 17 00:00:00 2001 From: Julian Wiedmann Date: Mon, 18 Sep 2017 21:18:18 +0200 Subject: s390/qeth: don't take queue lock in send_packet_fast() Locking the output queue prior to TX is needed on OSA devices, to synchronize against a packing flush from the TX completion code (via qeth_check_outbound_queue()). But send_packet_fast() is only used for IQDs, which don't do packing. So remove the locking, and apply some easy cleanups. Signed-off-by: Julian Wiedmann Reviewed-by: Ursula Braun Signed-off-by: David S. Miller --- drivers/s390/net/qeth_core.h | 3 +-- drivers/s390/net/qeth_core_main.c | 22 +++++----------------- drivers/s390/net/qeth_l2_main.c | 2 +- drivers/s390/net/qeth_l3_main.c | 4 ++-- 4 files changed, 9 insertions(+), 22 deletions(-) diff --git a/drivers/s390/net/qeth_core.h b/drivers/s390/net/qeth_core.h index e3d3609cd9e7..b7e8531625aa 100644 --- a/drivers/s390/net/qeth_core.h +++ b/drivers/s390/net/qeth_core.h @@ -962,8 +962,7 @@ int qeth_get_priority_queue(struct qeth_card *, struct sk_buff *, int, int); int qeth_get_elements_no(struct qeth_card *card, struct sk_buff *skb, int extra_elems, int data_offset); int qeth_get_elements_for_frags(struct sk_buff *); -int qeth_do_send_packet_fast(struct qeth_card *card, - struct qeth_qdio_out_q *queue, struct sk_buff *skb, +int qeth_do_send_packet_fast(struct qeth_qdio_out_q *queue, struct sk_buff *skb, struct qeth_hdr *hdr, unsigned int offset, unsigned int hd_len); int qeth_do_send_packet(struct qeth_card *card, struct qeth_qdio_out_q *queue, diff --git a/drivers/s390/net/qeth_core_main.c b/drivers/s390/net/qeth_core_main.c index 77032801e30b..68e118f1202e 100644 --- a/drivers/s390/net/qeth_core_main.c +++ b/drivers/s390/net/qeth_core_main.c @@ -4040,35 +4040,23 @@ static int qeth_fill_buffer(struct qeth_qdio_out_q *queue, return flush_cnt; } -int qeth_do_send_packet_fast(struct qeth_card *card, - struct qeth_qdio_out_q *queue, struct sk_buff *skb, +int qeth_do_send_packet_fast(struct qeth_qdio_out_q *queue, struct sk_buff *skb, struct qeth_hdr *hdr, unsigned int offset, unsigned int hd_len) { - struct qeth_qdio_out_buffer *buffer; - int index; + int index = queue->next_buf_to_fill; + struct qeth_qdio_out_buffer *buffer = queue->bufs[index]; - /* spin until we get the queue ... */ - while (atomic_cmpxchg(&queue->state, QETH_OUT_Q_UNLOCKED, - QETH_OUT_Q_LOCKED) != QETH_OUT_Q_UNLOCKED); - /* ... now we've got the queue */ - index = queue->next_buf_to_fill; - buffer = queue->bufs[queue->next_buf_to_fill]; /* * check if buffer is empty to make sure that we do not 'overtake' * ourselves and try to fill a buffer that is already primed */ if (atomic_read(&buffer->state) != QETH_QDIO_BUF_EMPTY) - goto out; - queue->next_buf_to_fill = (queue->next_buf_to_fill + 1) % - QDIO_MAX_BUFFERS_PER_Q; - atomic_set(&queue->state, QETH_OUT_Q_UNLOCKED); + return -EBUSY; + queue->next_buf_to_fill = (index + 1) % QDIO_MAX_BUFFERS_PER_Q; qeth_fill_buffer(queue, buffer, skb, hdr, offset, hd_len); qeth_flush_buffers(queue, index, 1); return 0; -out: - atomic_set(&queue->state, QETH_OUT_Q_UNLOCKED); - return -EBUSY; } EXPORT_SYMBOL_GPL(qeth_do_send_packet_fast); diff --git a/drivers/s390/net/qeth_l2_main.c b/drivers/s390/net/qeth_l2_main.c index 25a0f381bcd5..009de7248cc7 100644 --- a/drivers/s390/net/qeth_l2_main.c +++ b/drivers/s390/net/qeth_l2_main.c @@ -698,7 +698,7 @@ static int qeth_l2_xmit_iqd(struct qeth_card *card, struct sk_buff *skb, rc = -E2BIG; goto out; } - rc = qeth_do_send_packet_fast(card, queue, skb, hdr, data_offset, + rc = qeth_do_send_packet_fast(queue, skb, hdr, data_offset, sizeof(*hdr) + data_offset); out: if (rc) diff --git a/drivers/s390/net/qeth_l3_main.c b/drivers/s390/net/qeth_l3_main.c index ab661a431f7c..733f94cbd4d4 100644 --- a/drivers/s390/net/qeth_l3_main.c +++ b/drivers/s390/net/qeth_l3_main.c @@ -2771,8 +2771,8 @@ static netdev_tx_t qeth_l3_hard_start_xmit(struct sk_buff *skb, rc = qeth_do_send_packet(card, queue, new_skb, hdr, hd_len, hd_len, elements); } else - rc = qeth_do_send_packet_fast(card, queue, new_skb, hdr, - data_offset, hd_len); + rc = qeth_do_send_packet_fast(queue, new_skb, hdr, data_offset, + hd_len); if (!rc) { card->stats.tx_packets++; -- cgit v1.2.3 From ab25a5014eddb4da94b7acf0d214265964c5fd21 Mon Sep 17 00:00:00 2001 From: Julian Wiedmann Date: Mon, 18 Sep 2017 21:18:19 +0200 Subject: s390/qeth: simplify L3 sysfs group management Use the right helpers to create/remove all attribute groups in one go. Suggested-by: Hans Wippel Signed-off-by: Julian Wiedmann Reviewed-by: Ursula Braun Signed-off-by: David S. Miller --- drivers/s390/net/qeth_l3_sys.c | 45 +++++++++++------------------------------- 1 file changed, 12 insertions(+), 33 deletions(-) diff --git a/drivers/s390/net/qeth_l3_sys.c b/drivers/s390/net/qeth_l3_sys.c index e8bcc314cc5f..0ad0f7f4b23f 100644 --- a/drivers/s390/net/qeth_l3_sys.c +++ b/drivers/s390/net/qeth_l3_sys.c @@ -1028,52 +1028,31 @@ static const struct attribute_group qeth_device_rxip_group = { .attrs = qeth_rxip_device_attrs, }; +static const struct attribute_group *qeth_l3_only_attr_groups[] = { + &qeth_l3_device_attr_group, + &qeth_device_ipato_group, + &qeth_device_vipa_group, + &qeth_device_rxip_group, + NULL, +}; + int qeth_l3_create_device_attributes(struct device *dev) { - int ret; - - ret = sysfs_create_group(&dev->kobj, &qeth_l3_device_attr_group); - if (ret) - return ret; - - ret = sysfs_create_group(&dev->kobj, &qeth_device_ipato_group); - if (ret) { - sysfs_remove_group(&dev->kobj, &qeth_l3_device_attr_group); - return ret; - } - - ret = sysfs_create_group(&dev->kobj, &qeth_device_vipa_group); - if (ret) { - sysfs_remove_group(&dev->kobj, &qeth_l3_device_attr_group); - sysfs_remove_group(&dev->kobj, &qeth_device_ipato_group); - return ret; - } - - ret = sysfs_create_group(&dev->kobj, &qeth_device_rxip_group); - if (ret) { - sysfs_remove_group(&dev->kobj, &qeth_l3_device_attr_group); - sysfs_remove_group(&dev->kobj, &qeth_device_ipato_group); - sysfs_remove_group(&dev->kobj, &qeth_device_vipa_group); - return ret; - } - return 0; + return sysfs_create_groups(&dev->kobj, qeth_l3_only_attr_groups); } void qeth_l3_remove_device_attributes(struct device *dev) { - sysfs_remove_group(&dev->kobj, &qeth_l3_device_attr_group); - sysfs_remove_group(&dev->kobj, &qeth_device_ipato_group); - sysfs_remove_group(&dev->kobj, &qeth_device_vipa_group); - sysfs_remove_group(&dev->kobj, &qeth_device_rxip_group); + sysfs_remove_groups(&dev->kobj, qeth_l3_only_attr_groups); } const struct attribute_group *qeth_l3_attr_groups[] = { &qeth_device_attr_group, &qeth_device_blkt_group, - /* l3 specific, see l3_{create,remove}_device_attributes(): */ + /* l3 specific, see qeth_l3_only_attr_groups: */ &qeth_l3_device_attr_group, &qeth_device_ipato_group, &qeth_device_vipa_group, &qeth_device_rxip_group, -NULL, + NULL, }; -- cgit v1.2.3 From 2aa4867198c22ecfae1c12f0c96c4c25fea01cb5 Mon Sep 17 00:00:00 2001 From: Julian Wiedmann Date: Mon, 18 Sep 2017 21:18:20 +0200 Subject: s390/qeth: translate SETVLAN/DELVLAN errors Properly return any error encountered during VLAN processing to the the caller. Resulting change in behaviour: if SETVLAN fails while registering a new VLAN ID, the stack no longer creates the corresponding vlan device. Signed-off-by: Julian Wiedmann Reviewed-by: Ursula Braun Signed-off-by: David S. Miller --- drivers/s390/net/qeth_core_mpc.h | 1 + drivers/s390/net/qeth_l2_main.c | 47 ++++++++++++++++++++++++++++++---------- 2 files changed, 36 insertions(+), 12 deletions(-) diff --git a/drivers/s390/net/qeth_core_mpc.h b/drivers/s390/net/qeth_core_mpc.h index 2f1f0da3d089..6a7654df6e78 100644 --- a/drivers/s390/net/qeth_core_mpc.h +++ b/drivers/s390/net/qeth_core_mpc.h @@ -166,6 +166,7 @@ enum qeth_ipa_return_codes { IPA_RC_L2_INVALID_VLAN_ID = 0x2015, IPA_RC_L2_DUP_VLAN_ID = 0x2016, IPA_RC_L2_VLAN_ID_NOT_FOUND = 0x2017, + IPA_RC_L2_VLAN_ID_NOT_ALLOWED = 0x2050, IPA_RC_VNICC_VNICBP = 0x20B0, IPA_RC_SBP_OSA_NOT_CONFIGURED = 0x2B0C, IPA_RC_SBP_OSA_OS_MISMATCH = 0x2B10, diff --git a/drivers/s390/net/qeth_l2_main.c b/drivers/s390/net/qeth_l2_main.c index 009de7248cc7..7b61c2ef4c74 100644 --- a/drivers/s390/net/qeth_l2_main.c +++ b/drivers/s390/net/qeth_l2_main.c @@ -78,7 +78,7 @@ static struct net_device *qeth_l2_netdev_by_devno(unsigned char *read_dev_no) return ndev; } -static int qeth_setdel_makerc(struct qeth_card *card, int retcode) +static int qeth_setdelmac_makerc(struct qeth_card *card, int retcode) { int rc; @@ -128,8 +128,8 @@ static int qeth_l2_send_setdelmac(struct qeth_card *card, __u8 *mac, cmd = (struct qeth_ipa_cmd *)(iob->data+IPA_PDU_HEADER_SIZE); cmd->data.setdelmac.mac_length = OSA_ADDR_LEN; memcpy(&cmd->data.setdelmac.mac, mac, OSA_ADDR_LEN); - return qeth_setdel_makerc(card, qeth_send_ipa_cmd(card, iob, - NULL, NULL)); + return qeth_setdelmac_makerc(card, qeth_send_ipa_cmd(card, iob, + NULL, NULL)); } static int qeth_l2_send_setmac(struct qeth_card *card, __u8 *mac) @@ -289,17 +289,40 @@ static void qeth_l2_fill_header(struct qeth_hdr *hdr, struct sk_buff *skb, } } +static int qeth_setdelvlan_makerc(struct qeth_card *card, int retcode) +{ + if (retcode) + QETH_CARD_TEXT_(card, 2, "err%04x", retcode); + + switch (retcode) { + case IPA_RC_SUCCESS: + return 0; + case IPA_RC_L2_INVALID_VLAN_ID: + return -EINVAL; + case IPA_RC_L2_DUP_VLAN_ID: + return -EEXIST; + case IPA_RC_L2_VLAN_ID_NOT_FOUND: + return -ENOENT; + case IPA_RC_L2_VLAN_ID_NOT_ALLOWED: + return -EPERM; + case -ENOMEM: + return -ENOMEM; + default: + return -EIO; + } +} + static int qeth_l2_send_setdelvlan_cb(struct qeth_card *card, - struct qeth_reply *reply, unsigned long data) + struct qeth_reply *reply, + unsigned long data) { - struct qeth_ipa_cmd *cmd; + struct qeth_ipa_cmd *cmd = (struct qeth_ipa_cmd *) data; QETH_CARD_TEXT(card, 2, "L2sdvcb"); - cmd = (struct qeth_ipa_cmd *) data; if (cmd->hdr.return_code) { - QETH_DBF_MESSAGE(2, "Error in processing VLAN %i on %s: 0x%x. " - "Continuing\n", cmd->data.setdelvlan.vlan_id, - QETH_CARD_IFNAME(card), cmd->hdr.return_code); + QETH_DBF_MESSAGE(2, "Error in processing VLAN %i on %s: 0x%x.\n", + cmd->data.setdelvlan.vlan_id, + QETH_CARD_IFNAME(card), cmd->hdr.return_code); QETH_CARD_TEXT_(card, 2, "L2VL%4x", cmd->hdr.command); QETH_CARD_TEXT_(card, 2, "err%d", cmd->hdr.return_code); } @@ -307,7 +330,7 @@ static int qeth_l2_send_setdelvlan_cb(struct qeth_card *card, } static int qeth_l2_send_setdelvlan(struct qeth_card *card, __u16 i, - enum qeth_ipa_cmds ipacmd) + enum qeth_ipa_cmds ipacmd) { struct qeth_ipa_cmd *cmd; struct qeth_cmd_buffer *iob; @@ -318,8 +341,8 @@ static int qeth_l2_send_setdelvlan(struct qeth_card *card, __u16 i, return -ENOMEM; cmd = (struct qeth_ipa_cmd *)(iob->data+IPA_PDU_HEADER_SIZE); cmd->data.setdelvlan.vlan_id = i; - return qeth_send_ipa_cmd(card, iob, - qeth_l2_send_setdelvlan_cb, NULL); + return qeth_setdelvlan_makerc(card, qeth_send_ipa_cmd(card, iob, + qeth_l2_send_setdelvlan_cb, NULL)); } static void qeth_l2_process_vlans(struct qeth_card *card) -- cgit v1.2.3 From d7aa9d0bb923d55aa65a6e18428b384038c6523e Mon Sep 17 00:00:00 2001 From: Julian Wiedmann Date: Mon, 18 Sep 2017 21:18:21 +0200 Subject: s390/qeth: fold VLAN handling into l3_rebuild_skb() Move the overly complicated VLAN processing from the L3 RX handler into its l3_rebuild_skb() helper. No change in functionality. Signed-off-by: Julian Wiedmann Reviewed-by: Ursula Braun Signed-off-by: David S. Miller --- drivers/s390/net/qeth_l3_main.c | 27 +++++++++++---------------- 1 file changed, 11 insertions(+), 16 deletions(-) diff --git a/drivers/s390/net/qeth_l3_main.c b/drivers/s390/net/qeth_l3_main.c index 733f94cbd4d4..270ac9515f6b 100644 --- a/drivers/s390/net/qeth_l3_main.c +++ b/drivers/s390/net/qeth_l3_main.c @@ -1646,13 +1646,12 @@ static int qeth_l3_vlan_rx_kill_vid(struct net_device *dev, return 0; } -static int qeth_l3_rebuild_skb(struct qeth_card *card, struct sk_buff *skb, - struct qeth_hdr *hdr, unsigned short *vlan_id) +static void qeth_l3_rebuild_skb(struct qeth_card *card, struct sk_buff *skb, + struct qeth_hdr *hdr) { __u16 prot; struct iphdr *ip_hdr; unsigned char tg_addr[MAX_ADDR_LEN]; - int is_vlan = 0; if (!(hdr->hdr.l3.flags & QETH_HDR_PASSTHRU)) { prot = (hdr->hdr.l3.flags & QETH_HDR_IPV6) ? ETH_P_IPV6 : @@ -1706,11 +1705,14 @@ static int qeth_l3_rebuild_skb(struct qeth_card *card, struct sk_buff *skb, skb->protocol = eth_type_trans(skb, card->dev); - if (hdr->hdr.l3.ext_flags & - (QETH_HDR_EXT_VLAN_FRAME | QETH_HDR_EXT_INCLUDE_VLAN_TAG)) { - *vlan_id = (hdr->hdr.l3.ext_flags & QETH_HDR_EXT_VLAN_FRAME) ? - hdr->hdr.l3.vlan_id : *((u16 *)&hdr->hdr.l3.dest_addr[12]); - is_vlan = 1; + /* copy VLAN tag from hdr into skb */ + if (!card->options.sniffer && + (hdr->hdr.l3.ext_flags & (QETH_HDR_EXT_VLAN_FRAME | + QETH_HDR_EXT_INCLUDE_VLAN_TAG))) { + u16 tag = (hdr->hdr.l3.ext_flags & QETH_HDR_EXT_VLAN_FRAME) ? + hdr->hdr.l3.vlan_id : + *((u16 *)&hdr->hdr.l3.dest_addr[12]); + __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), tag); } if (card->dev->features & NETIF_F_RXCSUM) { @@ -1724,7 +1726,6 @@ static int qeth_l3_rebuild_skb(struct qeth_card *card, struct sk_buff *skb, skb->ip_summed = CHECKSUM_NONE; } else skb->ip_summed = CHECKSUM_NONE; - return is_vlan; } static int qeth_l3_process_inbound_buffer(struct qeth_card *card, @@ -1733,8 +1734,6 @@ static int qeth_l3_process_inbound_buffer(struct qeth_card *card, int work_done = 0; struct sk_buff *skb; struct qeth_hdr *hdr; - __u16 vlan_tag = 0; - int is_vlan; unsigned int len; __u16 magic; @@ -1764,12 +1763,8 @@ static int qeth_l3_process_inbound_buffer(struct qeth_card *card, card->dev->addr_len); netif_receive_skb(skb); } else { - is_vlan = qeth_l3_rebuild_skb(card, skb, hdr, - &vlan_tag); + qeth_l3_rebuild_skb(card, skb, hdr); len = skb->len; - if (is_vlan && !card->options.sniffer) - __vlan_hwaccel_put_tag(skb, - htons(ETH_P_8021Q), vlan_tag); napi_gro_receive(&card->napi, skb); } break; -- cgit v1.2.3 From e878c5e6003edae23512fe43d3c050622e5ebcf5 Mon Sep 17 00:00:00 2001 From: Jens Remus Date: Mon, 18 Sep 2017 21:18:22 +0200 Subject: s390/qeth: tidy up parameter naming for qeth_do_send_packet() Cppcheck reports the following for drivers/s390/net/qeth_core.h: warning - line 1560 - Function 'qeth_do_send_packet' argument order different: declaration 'card, queue, skb, hdr, hd_len, offset, elements' definition 'card, queue, skb, hdr, offset, hd_len, elements_needed'. Match the naming in the function's declaration against its definition. Signed-off-by: Jens Remus Reviewed-by: Julian Wiedmann Signed-off-by: Julian Wiedmann Signed-off-by: David S. Miller --- drivers/s390/net/qeth_core.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/s390/net/qeth_core.h b/drivers/s390/net/qeth_core.h index b7e8531625aa..91fcadbede80 100644 --- a/drivers/s390/net/qeth_core.h +++ b/drivers/s390/net/qeth_core.h @@ -967,7 +967,8 @@ int qeth_do_send_packet_fast(struct qeth_qdio_out_q *queue, struct sk_buff *skb, unsigned int hd_len); int qeth_do_send_packet(struct qeth_card *card, struct qeth_qdio_out_q *queue, struct sk_buff *skb, struct qeth_hdr *hdr, - unsigned int hd_len, unsigned int offset, int elements); + unsigned int offset, unsigned int hd_len, + int elements_needed); int qeth_do_ioctl(struct net_device *dev, struct ifreq *rq, int cmd); int qeth_core_get_sset_count(struct net_device *, int); void qeth_core_get_ethtool_stats(struct net_device *, -- cgit v1.2.3 From 7598b3498bfdad49d2b19bf0cba829922a9689a9 Mon Sep 17 00:00:00 2001 From: Zhu Yanjun Date: Thu, 14 Sep 2017 23:01:51 -0400 Subject: forcedeth: replace pci_map_single with dma_map_single functions pci_map_single functions are obsolete. So replace them with dma_map_single functions. Signed-off-by: Zhu Yanjun Signed-off-by: David S. Miller --- drivers/net/ethernet/nvidia/forcedeth.c | 70 +++++++++++++++++---------------- 1 file changed, 36 insertions(+), 34 deletions(-) diff --git a/drivers/net/ethernet/nvidia/forcedeth.c b/drivers/net/ethernet/nvidia/forcedeth.c index 994a83a1f0a5..b605b94f4567 100644 --- a/drivers/net/ethernet/nvidia/forcedeth.c +++ b/drivers/net/ethernet/nvidia/forcedeth.c @@ -1813,12 +1813,12 @@ static int nv_alloc_rx(struct net_device *dev) struct sk_buff *skb = netdev_alloc_skb(dev, np->rx_buf_sz + NV_RX_ALLOC_PAD); if (skb) { np->put_rx_ctx->skb = skb; - np->put_rx_ctx->dma = pci_map_single(np->pci_dev, + np->put_rx_ctx->dma = dma_map_single(&np->pci_dev->dev, skb->data, skb_tailroom(skb), - PCI_DMA_FROMDEVICE); - if (pci_dma_mapping_error(np->pci_dev, - np->put_rx_ctx->dma)) { + DMA_FROM_DEVICE); + if (dma_mapping_error(&np->pci_dev->dev, + np->put_rx_ctx->dma)) { kfree_skb(skb); goto packet_dropped; } @@ -1854,12 +1854,12 @@ static int nv_alloc_rx_optimized(struct net_device *dev) struct sk_buff *skb = netdev_alloc_skb(dev, np->rx_buf_sz + NV_RX_ALLOC_PAD); if (skb) { np->put_rx_ctx->skb = skb; - np->put_rx_ctx->dma = pci_map_single(np->pci_dev, + np->put_rx_ctx->dma = dma_map_single(&np->pci_dev->dev, skb->data, skb_tailroom(skb), - PCI_DMA_FROMDEVICE); - if (pci_dma_mapping_error(np->pci_dev, - np->put_rx_ctx->dma)) { + DMA_FROM_DEVICE); + if (dma_mapping_error(&np->pci_dev->dev, + np->put_rx_ctx->dma)) { kfree_skb(skb); goto packet_dropped; } @@ -1977,9 +1977,9 @@ static void nv_unmap_txskb(struct fe_priv *np, struct nv_skb_map *tx_skb) { if (tx_skb->dma) { if (tx_skb->dma_single) - pci_unmap_single(np->pci_dev, tx_skb->dma, + dma_unmap_single(&np->pci_dev->dev, tx_skb->dma, tx_skb->dma_len, - PCI_DMA_TODEVICE); + DMA_TO_DEVICE); else pci_unmap_page(np->pci_dev, tx_skb->dma, tx_skb->dma_len, @@ -2047,10 +2047,10 @@ static void nv_drain_rx(struct net_device *dev) } wmb(); if (np->rx_skb[i].skb) { - pci_unmap_single(np->pci_dev, np->rx_skb[i].dma, + dma_unmap_single(&np->pci_dev->dev, np->rx_skb[i].dma, (skb_end_pointer(np->rx_skb[i].skb) - - np->rx_skb[i].skb->data), - PCI_DMA_FROMDEVICE); + np->rx_skb[i].skb->data), + DMA_FROM_DEVICE); dev_kfree_skb(np->rx_skb[i].skb); np->rx_skb[i].skb = NULL; } @@ -2224,10 +2224,11 @@ static netdev_tx_t nv_start_xmit(struct sk_buff *skb, struct net_device *dev) prev_tx = put_tx; prev_tx_ctx = np->put_tx_ctx; bcnt = (size > NV_TX2_TSO_MAX_SIZE) ? NV_TX2_TSO_MAX_SIZE : size; - np->put_tx_ctx->dma = pci_map_single(np->pci_dev, skb->data + offset, bcnt, - PCI_DMA_TODEVICE); - if (pci_dma_mapping_error(np->pci_dev, - np->put_tx_ctx->dma)) { + np->put_tx_ctx->dma = dma_map_single(&np->pci_dev->dev, + skb->data + offset, bcnt, + DMA_TO_DEVICE); + if (dma_mapping_error(&np->pci_dev->dev, + np->put_tx_ctx->dma)) { /* on DMA mapping error - drop the packet */ dev_kfree_skb_any(skb); u64_stats_update_begin(&np->swstats_tx_syncp); @@ -2373,10 +2374,11 @@ static netdev_tx_t nv_start_xmit_optimized(struct sk_buff *skb, prev_tx = put_tx; prev_tx_ctx = np->put_tx_ctx; bcnt = (size > NV_TX2_TSO_MAX_SIZE) ? NV_TX2_TSO_MAX_SIZE : size; - np->put_tx_ctx->dma = pci_map_single(np->pci_dev, skb->data + offset, bcnt, - PCI_DMA_TODEVICE); - if (pci_dma_mapping_error(np->pci_dev, - np->put_tx_ctx->dma)) { + np->put_tx_ctx->dma = dma_map_single(&np->pci_dev->dev, + skb->data + offset, bcnt, + DMA_TO_DEVICE); + if (dma_mapping_error(&np->pci_dev->dev, + np->put_tx_ctx->dma)) { /* on DMA mapping error - drop the packet */ dev_kfree_skb_any(skb); u64_stats_update_begin(&np->swstats_tx_syncp); @@ -2810,9 +2812,9 @@ static int nv_rx_process(struct net_device *dev, int limit) * TODO: check if a prefetch of the first cacheline improves * the performance. */ - pci_unmap_single(np->pci_dev, np->get_rx_ctx->dma, - np->get_rx_ctx->dma_len, - PCI_DMA_FROMDEVICE); + dma_unmap_single(&np->pci_dev->dev, np->get_rx_ctx->dma, + np->get_rx_ctx->dma_len, + DMA_FROM_DEVICE); skb = np->get_rx_ctx->skb; np->get_rx_ctx->skb = NULL; @@ -2916,9 +2918,9 @@ static int nv_rx_process_optimized(struct net_device *dev, int limit) * TODO: check if a prefetch of the first cacheline improves * the performance. */ - pci_unmap_single(np->pci_dev, np->get_rx_ctx->dma, - np->get_rx_ctx->dma_len, - PCI_DMA_FROMDEVICE); + dma_unmap_single(&np->pci_dev->dev, np->get_rx_ctx->dma, + np->get_rx_ctx->dma_len, + DMA_FROM_DEVICE); skb = np->get_rx_ctx->skb; np->get_rx_ctx->skb = NULL; @@ -5070,11 +5072,11 @@ static int nv_loopback_test(struct net_device *dev) ret = 0; goto out; } - test_dma_addr = pci_map_single(np->pci_dev, tx_skb->data, + test_dma_addr = dma_map_single(&np->pci_dev->dev, tx_skb->data, skb_tailroom(tx_skb), - PCI_DMA_FROMDEVICE); - if (pci_dma_mapping_error(np->pci_dev, - test_dma_addr)) { + DMA_FROM_DEVICE); + if (dma_mapping_error(&np->pci_dev->dev, + test_dma_addr)) { dev_kfree_skb_any(tx_skb); goto out; } @@ -5129,9 +5131,9 @@ static int nv_loopback_test(struct net_device *dev) } } - pci_unmap_single(np->pci_dev, test_dma_addr, - (skb_end_pointer(tx_skb) - tx_skb->data), - PCI_DMA_TODEVICE); + dma_unmap_single(&np->pci_dev->dev, test_dma_addr, + (skb_end_pointer(tx_skb) - tx_skb->data), + DMA_TO_DEVICE); dev_kfree_skb_any(tx_skb); out: /* stop engines */ -- cgit v1.2.3 From 2df9d6730215db85f7306f0bda03b7391e392837 Mon Sep 17 00:00:00 2001 From: Valentin Longchamp Date: Fri, 15 Sep 2017 07:58:47 +0200 Subject: net/ethernet/freescale: fix warning for ucc_geth uf_info.regs is resource_size_t i.e. phys_addr_t that can be either u32 or u64 according to CONFIG_PHYS_ADDR_T_64BIT. The printk format is thus adaptet to u64 and the regs value cast to u64 to take both u32 and u64 into account. Signed-off-by: Valentin Longchamp Signed-off-by: David S. Miller --- drivers/net/ethernet/freescale/ucc_geth.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/freescale/ucc_geth.c b/drivers/net/ethernet/freescale/ucc_geth.c index f77ba9fa257b..a96b838cffce 100644 --- a/drivers/net/ethernet/freescale/ucc_geth.c +++ b/drivers/net/ethernet/freescale/ucc_geth.c @@ -3857,8 +3857,9 @@ static int ucc_geth_probe(struct platform_device* ofdev) } if (netif_msg_probe(&debug)) - pr_info("UCC%1d at 0x%8x (irq = %d)\n", - ug_info->uf_info.ucc_num + 1, ug_info->uf_info.regs, + pr_info("UCC%1d at 0x%8llx (irq = %d)\n", + ug_info->uf_info.ucc_num + 1, + (u64)ug_info->uf_info.regs, ug_info->uf_info.irq); /* Create an ethernet device instance */ -- cgit v1.2.3 From ca444073a2de97809d63e613d01203f4f4644cfb Mon Sep 17 00:00:00 2001 From: Sudip Mukherjee Date: Sun, 17 Sep 2017 12:46:20 +0100 Subject: hamradio: baycom: use new parport device model Modify baycom driver to use the new parallel port device model. Signed-off-by: Sudip Mukherjee Acked-By: Thomas Sailer Signed-off-by: David S. Miller --- drivers/net/hamradio/baycom_epp.c | 50 +++++++++++++++++++++++++++++++++++---- 1 file changed, 46 insertions(+), 4 deletions(-) diff --git a/drivers/net/hamradio/baycom_epp.c b/drivers/net/hamradio/baycom_epp.c index 1503f10122f7..1e62d00732f2 100644 --- a/drivers/net/hamradio/baycom_epp.c +++ b/drivers/net/hamradio/baycom_epp.c @@ -840,6 +840,7 @@ static int epp_open(struct net_device *dev) unsigned char tmp[128]; unsigned char stat; unsigned long tstart; + struct pardev_cb par_cb; if (!pp) { printk(KERN_ERR "%s: parport at 0x%lx unknown\n", bc_drvname, dev->base_addr); @@ -859,8 +860,21 @@ static int epp_open(struct net_device *dev) return -EIO; } memset(&bc->modem, 0, sizeof(bc->modem)); - bc->pdev = parport_register_device(pp, dev->name, NULL, epp_wakeup, - NULL, PARPORT_DEV_EXCL, dev); + memset(&par_cb, 0, sizeof(par_cb)); + par_cb.wakeup = epp_wakeup; + par_cb.private = (void *)dev; + par_cb.flags = PARPORT_DEV_EXCL; + for (i = 0; i < NR_PORTS; i++) + if (baycom_device[i] == dev) + break; + + if (i == NR_PORTS) { + pr_err("%s: no device found\n", bc_drvname); + parport_put_port(pp); + return -ENODEV; + } + + bc->pdev = parport_register_dev_model(pp, dev->name, &par_cb, i); parport_put_port(pp); if (!bc->pdev) { printk(KERN_ERR "%s: cannot register parport at 0x%lx\n", bc_drvname, pp->base); @@ -1185,6 +1199,23 @@ MODULE_LICENSE("GPL"); /* --------------------------------------------------------------------- */ +static int baycom_epp_par_probe(struct pardevice *par_dev) +{ + struct device_driver *drv = par_dev->dev.driver; + int len = strlen(drv->name); + + if (strncmp(par_dev->name, drv->name, len)) + return -ENODEV; + + return 0; +} + +static struct parport_driver baycom_epp_par_driver = { + .name = "bce", + .probe = baycom_epp_par_probe, + .devmodel = true, +}; + static void __init baycom_epp_dev_setup(struct net_device *dev) { struct baycom_state *bc = netdev_priv(dev); @@ -1204,10 +1235,15 @@ static void __init baycom_epp_dev_setup(struct net_device *dev) static int __init init_baycomepp(void) { - int i, found = 0; + int i, found = 0, ret; char set_hw = 1; printk(bc_drvinfo); + + ret = parport_register_driver(&baycom_epp_par_driver); + if (ret) + return ret; + /* * register net devices */ @@ -1241,7 +1277,12 @@ static int __init init_baycomepp(void) found++; } - return found ? 0 : -ENXIO; + if (found == 0) { + parport_unregister_driver(&baycom_epp_par_driver); + return -ENXIO; + } + + return 0; } static void __exit cleanup_baycomepp(void) @@ -1260,6 +1301,7 @@ static void __exit cleanup_baycomepp(void) printk(paranoia_str, "cleanup_module"); } } + parport_unregister_driver(&baycom_epp_par_driver); } module_init(init_baycomepp); -- cgit v1.2.3 From 7ce103b4cbb20babf19b881e28228b7fd40ce0b3 Mon Sep 17 00:00:00 2001 From: Roman Yeryomin Date: Sun, 17 Sep 2017 20:24:15 +0300 Subject: net: korina: don't use overflow and underflow interrupts When such interrupts occur there is not much we can do. Dropping the whole ring doesn't help and only produces high packet loss. If we just ignore the interrupt the mac will drop one or few packets instead of the whole ring. Also this will lower the irq handling load and increase performance. Signed-off-by: Roman Yeryomin Signed-off-by: David S. Miller --- drivers/net/ethernet/korina.c | 83 +------------------------------------------ 1 file changed, 1 insertion(+), 82 deletions(-) diff --git a/drivers/net/ethernet/korina.c b/drivers/net/ethernet/korina.c index 3c0a6451273d..98d686ed69a9 100644 --- a/drivers/net/ethernet/korina.c +++ b/drivers/net/ethernet/korina.c @@ -122,8 +122,6 @@ struct korina_private { int rx_irq; int tx_irq; - int ovr_irq; - int und_irq; spinlock_t lock; /* NIC xmit lock */ @@ -891,8 +889,6 @@ static void korina_restart_task(struct work_struct *work) */ disable_irq(lp->rx_irq); disable_irq(lp->tx_irq); - disable_irq(lp->ovr_irq); - disable_irq(lp->und_irq); writel(readl(&lp->tx_dma_regs->dmasm) | DMA_STAT_FINI | DMA_STAT_ERR, @@ -911,40 +907,10 @@ static void korina_restart_task(struct work_struct *work) } korina_multicast_list(dev); - enable_irq(lp->und_irq); - enable_irq(lp->ovr_irq); enable_irq(lp->tx_irq); enable_irq(lp->rx_irq); } -static void korina_clear_and_restart(struct net_device *dev, u32 value) -{ - struct korina_private *lp = netdev_priv(dev); - - netif_stop_queue(dev); - writel(value, &lp->eth_regs->ethintfc); - schedule_work(&lp->restart_task); -} - -/* Ethernet Tx Underflow interrupt */ -static irqreturn_t korina_und_interrupt(int irq, void *dev_id) -{ - struct net_device *dev = dev_id; - struct korina_private *lp = netdev_priv(dev); - unsigned int und; - - spin_lock(&lp->lock); - - und = readl(&lp->eth_regs->ethintfc); - - if (und & ETH_INT_FC_UND) - korina_clear_and_restart(dev, und & ~ETH_INT_FC_UND); - - spin_unlock(&lp->lock); - - return IRQ_HANDLED; -} - static void korina_tx_timeout(struct net_device *dev) { struct korina_private *lp = netdev_priv(dev); @@ -952,25 +918,6 @@ static void korina_tx_timeout(struct net_device *dev) schedule_work(&lp->restart_task); } -/* Ethernet Rx Overflow interrupt */ -static irqreturn_t -korina_ovr_interrupt(int irq, void *dev_id) -{ - struct net_device *dev = dev_id; - struct korina_private *lp = netdev_priv(dev); - unsigned int ovr; - - spin_lock(&lp->lock); - ovr = readl(&lp->eth_regs->ethintfc); - - if (ovr & ETH_INT_FC_OVR) - korina_clear_and_restart(dev, ovr & ~ETH_INT_FC_OVR); - - spin_unlock(&lp->lock); - - return IRQ_HANDLED; -} - #ifdef CONFIG_NET_POLL_CONTROLLER static void korina_poll_controller(struct net_device *dev) { @@ -993,8 +940,7 @@ static int korina_open(struct net_device *dev) } /* Install the interrupt handler - * that handles the Done Finished - * Ovr and Und Events */ + * that handles the Done Finished */ ret = request_irq(lp->rx_irq, korina_rx_dma_interrupt, 0, "Korina ethernet Rx", dev); if (ret < 0) { @@ -1010,31 +956,10 @@ static int korina_open(struct net_device *dev) goto err_free_rx_irq; } - /* Install handler for overrun error. */ - ret = request_irq(lp->ovr_irq, korina_ovr_interrupt, - 0, "Ethernet Overflow", dev); - if (ret < 0) { - printk(KERN_ERR "%s: unable to get OVR IRQ %d\n", - dev->name, lp->ovr_irq); - goto err_free_tx_irq; - } - - /* Install handler for underflow error. */ - ret = request_irq(lp->und_irq, korina_und_interrupt, - 0, "Ethernet Underflow", dev); - if (ret < 0) { - printk(KERN_ERR "%s: unable to get UND IRQ %d\n", - dev->name, lp->und_irq); - goto err_free_ovr_irq; - } mod_timer(&lp->media_check_timer, jiffies + 1); out: return ret; -err_free_ovr_irq: - free_irq(lp->ovr_irq, dev); -err_free_tx_irq: - free_irq(lp->tx_irq, dev); err_free_rx_irq: free_irq(lp->rx_irq, dev); err_release: @@ -1052,8 +977,6 @@ static int korina_close(struct net_device *dev) /* Disable interrupts */ disable_irq(lp->rx_irq); disable_irq(lp->tx_irq); - disable_irq(lp->ovr_irq); - disable_irq(lp->und_irq); korina_abort_tx(dev); tmp = readl(&lp->tx_dma_regs->dmasm); @@ -1073,8 +996,6 @@ static int korina_close(struct net_device *dev) free_irq(lp->rx_irq, dev); free_irq(lp->tx_irq, dev); - free_irq(lp->ovr_irq, dev); - free_irq(lp->und_irq, dev); return 0; } @@ -1113,8 +1034,6 @@ static int korina_probe(struct platform_device *pdev) lp->rx_irq = platform_get_irq_byname(pdev, "korina_rx"); lp->tx_irq = platform_get_irq_byname(pdev, "korina_tx"); - lp->ovr_irq = platform_get_irq_byname(pdev, "korina_ovr"); - lp->und_irq = platform_get_irq_byname(pdev, "korina_und"); r = platform_get_resource_byname(pdev, IORESOURCE_MEM, "korina_regs"); dev->base_addr = r->start; -- cgit v1.2.3 From 364a97f5d1ae3102d53a3ad1efea3fa546781f78 Mon Sep 17 00:00:00 2001 From: Roman Yeryomin Date: Sun, 17 Sep 2017 20:24:26 +0300 Subject: net: korina: optimize rx descriptor flags processing Signed-off-by: Roman Yeryomin Signed-off-by: David S. Miller --- drivers/net/ethernet/korina.c | 87 ++++++++++++++++++++++--------------------- 1 file changed, 44 insertions(+), 43 deletions(-) diff --git a/drivers/net/ethernet/korina.c b/drivers/net/ethernet/korina.c index 98d686ed69a9..e5466e19994a 100644 --- a/drivers/net/ethernet/korina.c +++ b/drivers/net/ethernet/korina.c @@ -363,59 +363,60 @@ static int korina_rx(struct net_device *dev, int limit) if ((KORINA_RBSIZE - (u32)DMA_COUNT(rd->control)) == 0) break; - /* Update statistics counters */ - if (devcs & ETH_RX_CRC) - dev->stats.rx_crc_errors++; - if (devcs & ETH_RX_LOR) - dev->stats.rx_length_errors++; - if (devcs & ETH_RX_LE) - dev->stats.rx_length_errors++; - if (devcs & ETH_RX_OVR) - dev->stats.rx_fifo_errors++; - if (devcs & ETH_RX_CV) - dev->stats.rx_frame_errors++; - if (devcs & ETH_RX_CES) - dev->stats.rx_length_errors++; - if (devcs & ETH_RX_MP) - dev->stats.multicast++; - - if ((devcs & ETH_RX_LD) != ETH_RX_LD) { - /* check that this is a whole packet - * WARNING: DMA_FD bit incorrectly set - * in Rc32434 (errata ref #077) */ + /* check that this is a whole packet + * WARNING: DMA_FD bit incorrectly set + * in Rc32434 (errata ref #077) */ + if (!(devcs & ETH_RX_LD)) + goto next; + + if (!(devcs & ETH_RX_ROK)) { + /* Update statistics counters */ dev->stats.rx_errors++; dev->stats.rx_dropped++; - } else if ((devcs & ETH_RX_ROK)) { - pkt_len = RCVPKT_LENGTH(devcs); + if (devcs & ETH_RX_CRC) + dev->stats.rx_crc_errors++; + if (devcs & ETH_RX_LE) + dev->stats.rx_length_errors++; + if (devcs & ETH_RX_OVR) + dev->stats.rx_fifo_errors++; + if (devcs & ETH_RX_CV) + dev->stats.rx_frame_errors++; + if (devcs & ETH_RX_CES) + dev->stats.rx_frame_errors++; + + goto next; + } - /* must be the (first and) last - * descriptor then */ - pkt_buf = (u8 *)lp->rx_skb[lp->rx_next_done]->data; + pkt_len = RCVPKT_LENGTH(devcs); - /* invalidate the cache */ - dma_cache_inv((unsigned long)pkt_buf, pkt_len - 4); + /* must be the (first and) last + * descriptor then */ + pkt_buf = (u8 *)lp->rx_skb[lp->rx_next_done]->data; - /* Malloc up new buffer. */ - skb_new = netdev_alloc_skb_ip_align(dev, KORINA_RBSIZE); + /* invalidate the cache */ + dma_cache_inv((unsigned long)pkt_buf, pkt_len - 4); - if (!skb_new) - break; - /* Do not count the CRC */ - skb_put(skb, pkt_len - 4); - skb->protocol = eth_type_trans(skb, dev); + /* Malloc up new buffer. */ + skb_new = netdev_alloc_skb_ip_align(dev, KORINA_RBSIZE); - /* Pass the packet to upper layers */ - netif_receive_skb(skb); - dev->stats.rx_packets++; - dev->stats.rx_bytes += pkt_len; + if (!skb_new) + break; + /* Do not count the CRC */ + skb_put(skb, pkt_len - 4); + skb->protocol = eth_type_trans(skb, dev); - /* Update the mcast stats */ - if (devcs & ETH_RX_MP) - dev->stats.multicast++; + /* Pass the packet to upper layers */ + netif_receive_skb(skb); + dev->stats.rx_packets++; + dev->stats.rx_bytes += pkt_len; - lp->rx_skb[lp->rx_next_done] = skb_new; - } + /* Update the mcast stats */ + if (devcs & ETH_RX_MP) + dev->stats.multicast++; + + lp->rx_skb[lp->rx_next_done] = skb_new; +next: rd->devcs = 0; /* Restore descriptor's curr_addr */ -- cgit v1.2.3 From d609d2893c25a3336422d06e1aff50d13ba5b7f2 Mon Sep 17 00:00:00 2001 From: Roman Yeryomin Date: Sun, 17 Sep 2017 20:24:38 +0300 Subject: net: korina: use NAPI_POLL_WEIGHT Signed-off-by: Roman Yeryomin Signed-off-by: David S. Miller --- drivers/net/ethernet/korina.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/korina.c b/drivers/net/ethernet/korina.c index e5466e19994a..c210add9b654 100644 --- a/drivers/net/ethernet/korina.c +++ b/drivers/net/ethernet/korina.c @@ -1082,7 +1082,7 @@ static int korina_probe(struct platform_device *pdev) dev->netdev_ops = &korina_netdev_ops; dev->ethtool_ops = &netdev_ethtool_ops; dev->watchdog_timeo = TX_TIMEOUT; - netif_napi_add(dev, &lp->napi, korina_poll, 64); + netif_napi_add(dev, &lp->napi, korina_poll, NAPI_POLL_WEIGHT); lp->phy_addr = (((lp->rx_irq == 0x2c? 1:0) << 8) | 0x05); lp->mii_if.dev = dev; -- cgit v1.2.3 From 247c78f2bed0c4d72c381c9caf429173513dcc51 Mon Sep 17 00:00:00 2001 From: Roman Yeryomin Date: Sun, 17 Sep 2017 20:24:50 +0300 Subject: net: korina: use GRO Performance gain when receiving locally is 55->95Mbps and 50->65Mbps for NAT. Signed-off-by: Roman Yeryomin Signed-off-by: David S. Miller --- drivers/net/ethernet/korina.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/korina.c b/drivers/net/ethernet/korina.c index c210add9b654..5f36e1703378 100644 --- a/drivers/net/ethernet/korina.c +++ b/drivers/net/ethernet/korina.c @@ -406,7 +406,7 @@ static int korina_rx(struct net_device *dev, int limit) skb->protocol = eth_type_trans(skb, dev); /* Pass the packet to upper layers */ - netif_receive_skb(skb); + napi_gro_receive(&lp->napi, skb); dev->stats.rx_packets++; dev->stats.rx_bytes += pkt_len; -- cgit v1.2.3 From 2e5396b14db3c885c5b9de698ecf38652a0f3c15 Mon Sep 17 00:00:00 2001 From: Roman Yeryomin Date: Sun, 17 Sep 2017 20:25:02 +0300 Subject: net: korina: whitespace cleanup Signed-off-by: Roman Yeryomin Signed-off-by: David S. Miller --- drivers/net/ethernet/korina.c | 58 +++++++++++++++++++++++-------------------- 1 file changed, 31 insertions(+), 27 deletions(-) diff --git a/drivers/net/ethernet/korina.c b/drivers/net/ethernet/korina.c index 5f36e1703378..c26f0d84ba6b 100644 --- a/drivers/net/ethernet/korina.c +++ b/drivers/net/ethernet/korina.c @@ -64,9 +64,9 @@ #include #include -#define DRV_NAME "korina" -#define DRV_VERSION "0.10" -#define DRV_RELDATE "04Mar2008" +#define DRV_NAME "korina" +#define DRV_VERSION "0.10" +#define DRV_RELDATE "04Mar2008" #define STATION_ADDRESS_HIGH(dev) (((dev)->dev_addr[0] << 8) | \ ((dev)->dev_addr[1])) @@ -75,7 +75,7 @@ ((dev)->dev_addr[4] << 8) | \ ((dev)->dev_addr[5])) -#define MII_CLOCK 1250000 /* no more than 2.5MHz */ +#define MII_CLOCK 1250000 /* no more than 2.5MHz */ /* the following must be powers of two */ #define KORINA_NUM_RDS 64 /* number of receive descriptors */ @@ -87,15 +87,19 @@ #define KORINA_RBSIZE 1536 /* size of one resource buffer = Ether MTU */ #define KORINA_RDS_MASK (KORINA_NUM_RDS - 1) #define KORINA_TDS_MASK (KORINA_NUM_TDS - 1) -#define RD_RING_SIZE (KORINA_NUM_RDS * sizeof(struct dma_desc)) +#define RD_RING_SIZE (KORINA_NUM_RDS * sizeof(struct dma_desc)) #define TD_RING_SIZE (KORINA_NUM_TDS * sizeof(struct dma_desc)) -#define TX_TIMEOUT (6000 * HZ / 1000) +#define TX_TIMEOUT (6000 * HZ / 1000) -enum chain_status { desc_filled, desc_empty }; -#define IS_DMA_FINISHED(X) (((X) & (DMA_DESC_FINI)) != 0) -#define IS_DMA_DONE(X) (((X) & (DMA_DESC_DONE)) != 0) -#define RCVPKT_LENGTH(X) (((X) & ETH_RX_LEN) >> ETH_RX_LEN_BIT) +enum chain_status { + desc_filled, + desc_empty +}; + +#define IS_DMA_FINISHED(X) (((X) & (DMA_DESC_FINI)) != 0) +#define IS_DMA_DONE(X) (((X) & (DMA_DESC_DONE)) != 0) +#define RCVPKT_LENGTH(X) (((X) & ETH_RX_LEN) >> ETH_RX_LEN_BIT) /* Information that need to be kept for each board. */ struct korina_private { @@ -123,7 +127,7 @@ struct korina_private { int rx_irq; int tx_irq; - spinlock_t lock; /* NIC xmit lock */ + spinlock_t lock; /* NIC xmit lock */ int dma_halt_cnt; int dma_run_cnt; @@ -146,17 +150,17 @@ static inline void korina_start_dma(struct dma_reg *ch, u32 dma_addr) static inline void korina_abort_dma(struct net_device *dev, struct dma_reg *ch) { - if (readl(&ch->dmac) & DMA_CHAN_RUN_BIT) { - writel(0x10, &ch->dmac); + if (readl(&ch->dmac) & DMA_CHAN_RUN_BIT) { + writel(0x10, &ch->dmac); - while (!(readl(&ch->dmas) & DMA_STAT_HALT)) - netif_trans_update(dev); + while (!(readl(&ch->dmas) & DMA_STAT_HALT)) + netif_trans_update(dev); - writel(0, &ch->dmas); - } + writel(0, &ch->dmas); + } - writel(0, &ch->dmadptr); - writel(0, &ch->dmandptr); + writel(0, &ch->dmadptr); + writel(0, &ch->dmandptr); } static inline void korina_chain_dma(struct dma_reg *ch, u32 dma_addr) @@ -685,7 +689,7 @@ static int korina_ioctl(struct net_device *dev, struct ifreq *rq, int cmd) /* ethtool helpers */ static void netdev_get_drvinfo(struct net_device *dev, - struct ethtool_drvinfo *info) + struct ethtool_drvinfo *info) { struct korina_private *lp = netdev_priv(dev); @@ -728,10 +732,10 @@ static u32 netdev_get_link(struct net_device *dev) } static const struct ethtool_ops netdev_ethtool_ops = { - .get_drvinfo = netdev_get_drvinfo, - .get_link = netdev_get_link, - .get_link_ksettings = netdev_get_link_ksettings, - .set_link_ksettings = netdev_set_link_ksettings, + .get_drvinfo = netdev_get_drvinfo, + .get_link = netdev_get_link, + .get_link_ksettings = netdev_get_link_ksettings, + .set_link_ksettings = netdev_set_link_ksettings, }; static int korina_alloc_ring(struct net_device *dev) @@ -863,7 +867,7 @@ static int korina_init(struct net_device *dev) /* Management Clock Prescaler Divisor * Clock independent setting */ writel(((idt_cpu_freq) / MII_CLOCK + 1) & ~1, - &lp->eth_regs->ethmcp); + &lp->eth_regs->ethmcp); /* don't transmit until fifo contains 48b */ writel(48, &lp->eth_regs->ethfifott); @@ -946,14 +950,14 @@ static int korina_open(struct net_device *dev) 0, "Korina ethernet Rx", dev); if (ret < 0) { printk(KERN_ERR "%s: unable to get Rx DMA IRQ %d\n", - dev->name, lp->rx_irq); + dev->name, lp->rx_irq); goto err_release; } ret = request_irq(lp->tx_irq, korina_tx_dma_interrupt, 0, "Korina ethernet Tx", dev); if (ret < 0) { printk(KERN_ERR "%s: unable to get Tx DMA IRQ %d\n", - dev->name, lp->tx_irq); + dev->name, lp->tx_irq); goto err_free_rx_irq; } -- cgit v1.2.3 From 87736fc6f75f23ae2583ee197a0e85515f246ba6 Mon Sep 17 00:00:00 2001 From: Roman Yeryomin Date: Sun, 17 Sep 2017 20:25:11 +0300 Subject: net: korina: update authors Signed-off-by: Roman Yeryomin Signed-off-by: David S. Miller --- drivers/net/ethernet/korina.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/ethernet/korina.c b/drivers/net/ethernet/korina.c index c26f0d84ba6b..d58aa4bfcb58 100644 --- a/drivers/net/ethernet/korina.c +++ b/drivers/net/ethernet/korina.c @@ -4,6 +4,7 @@ * Copyright 2004 IDT Inc. (rischelp@idt.com) * Copyright 2006 Felix Fietkau * Copyright 2008 Florian Fainelli + * Copyright 2017 Roman Yeryomin * * This program is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License as published by the @@ -1150,5 +1151,6 @@ module_platform_driver(korina_driver); MODULE_AUTHOR("Philip Rischel "); MODULE_AUTHOR("Felix Fietkau "); MODULE_AUTHOR("Florian Fainelli "); +MODULE_AUTHOR("Roman Yeryomin "); MODULE_DESCRIPTION("IDT RC32434 (Korina) Ethernet driver"); MODULE_LICENSE("GPL"); -- cgit v1.2.3 From da1d2def654dc5a9cae346a93f25bd2e8959b080 Mon Sep 17 00:00:00 2001 From: Roman Yeryomin Date: Sun, 17 Sep 2017 20:25:21 +0300 Subject: net: korina: bump version Signed-off-by: Roman Yeryomin Signed-off-by: David S. Miller --- drivers/net/ethernet/korina.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/korina.c b/drivers/net/ethernet/korina.c index d58aa4bfcb58..7cecd9dbc111 100644 --- a/drivers/net/ethernet/korina.c +++ b/drivers/net/ethernet/korina.c @@ -66,8 +66,8 @@ #include #define DRV_NAME "korina" -#define DRV_VERSION "0.10" -#define DRV_RELDATE "04Mar2008" +#define DRV_VERSION "0.20" +#define DRV_RELDATE "15Sep2017" #define STATION_ADDRESS_HIGH(dev) (((dev)->dev_addr[0] << 8) | \ ((dev)->dev_addr[1])) -- cgit v1.2.3 From 7e5dd53f6ee81ffa0d1f42f79e6440b6a751ab40 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Mon, 18 Sep 2017 12:40:38 +0100 Subject: net_sched: use explicit size of struct tcmsg, remove need to declare tcm Pointer tcm is being initialized and is never read, it is only being used to determine the size of struct tcmsg. Clean this up by removing variable tcm and explicitly using the sizeof struct tcmsg rather than *tcm. Cleans up clang warning: warning: Value stored to 'tcm' during its initialization is never read Signed-off-by: Colin Ian King Signed-off-by: David S. Miller --- net/sched/sch_api.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index c6deb74e3d2f..aa82116ed10c 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -1500,7 +1500,6 @@ static int tc_dump_qdisc(struct sk_buff *skb, struct netlink_callback *cb) int s_idx, s_q_idx; struct net_device *dev; const struct nlmsghdr *nlh = cb->nlh; - struct tcmsg *tcm = nlmsg_data(nlh); struct nlattr *tca[TCA_MAX + 1]; int err; @@ -1510,7 +1509,7 @@ static int tc_dump_qdisc(struct sk_buff *skb, struct netlink_callback *cb) idx = 0; ASSERT_RTNL(); - err = nlmsg_parse(nlh, sizeof(*tcm), tca, TCA_MAX, NULL, NULL); + err = nlmsg_parse(nlh, sizeof(struct tcmsg), tca, TCA_MAX, NULL, NULL); if (err < 0) return err; -- cgit v1.2.3 From 3c75f6ee139d464351f8ab77a042dd3635769d98 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 18 Sep 2017 12:36:22 -0700 Subject: net_sched: sch_htb: add per class overlimits counter HTB qdisc overlimits counter is properly increased, but we have no per class counter, meaning it is difficult to diagnose HTB problems. This patch adds this counter, visible in "tc -s class show dev eth0", with current iproute2. Signed-off-by: Eric Dumazet Reported-by: Denys Fedoryshchenko Signed-off-by: David S. Miller --- net/sched/sch_htb.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c index 7e148376ba52..c6d7ae81b41f 100644 --- a/net/sched/sch_htb.c +++ b/net/sched/sch_htb.c @@ -142,6 +142,7 @@ struct htb_class { struct rb_node node[TC_HTB_NUMPRIO]; /* node for self or feed tree */ unsigned int drops ____cacheline_aligned_in_smp; + unsigned int overlimits; }; struct htb_level { @@ -533,6 +534,9 @@ htb_change_class_mode(struct htb_sched *q, struct htb_class *cl, s64 *diff) if (new_mode == cl->cmode) return; + if (new_mode == HTB_CANT_SEND) + cl->overlimits++; + if (cl->prio_activity) { /* not necessary: speed optimization */ if (cl->cmode != HTB_CANT_SEND) htb_deactivate_prios(q, cl); @@ -1143,6 +1147,7 @@ htb_dump_class_stats(struct Qdisc *sch, unsigned long arg, struct gnet_dump *d) struct htb_class *cl = (struct htb_class *)arg; struct gnet_stats_queue qs = { .drops = cl->drops, + .overlimits = cl->overlimits, }; __u32 qlen = 0; -- cgit v1.2.3 From 38c5eb93aca9dc1b21a2c96d583ce7f9886a44e6 Mon Sep 17 00:00:00 2001 From: Antoine Tenart Date: Mon, 18 Sep 2017 15:36:51 +0200 Subject: net: mvpp2: remove useless goto Remove a goto in the PPv2 tx function which jumps to the next line anyway. This is a cosmetic commit. Signed-off-by: Antoine Tenart Signed-off-by: David S. Miller --- drivers/net/ethernet/marvell/mvpp2.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/net/ethernet/marvell/mvpp2.c b/drivers/net/ethernet/marvell/mvpp2.c index dd0ee2691c86..8041d692db3c 100644 --- a/drivers/net/ethernet/marvell/mvpp2.c +++ b/drivers/net/ethernet/marvell/mvpp2.c @@ -6452,7 +6452,6 @@ static int mvpp2_tx(struct sk_buff *skb, struct net_device *dev) if (mvpp2_tx_frag_process(port, skb, aggr_txq, txq)) { tx_desc_unmap_put(port, txq, tx_desc); frags = 0; - goto out; } } -- cgit v1.2.3 From 173f4c5ebbd803023e42799d956cf174dea92db5 Mon Sep 17 00:00:00 2001 From: Corentin Labbe Date: Mon, 18 Sep 2017 20:18:55 +0200 Subject: vsock: vmci: Remove unneeded linux/miscdevice.h include net/vmw_vsock/vmci_transport.c does not use any miscdevice so this patch remove this unnecessary inclusion. Signed-off-by: Corentin Labbe Signed-off-by: David S. Miller --- net/vmw_vsock/vmci_transport.c | 1 - 1 file changed, 1 deletion(-) diff --git a/net/vmw_vsock/vmci_transport.c b/net/vmw_vsock/vmci_transport.c index 10ae7823a19d..0206155bff53 100644 --- a/net/vmw_vsock/vmci_transport.c +++ b/net/vmw_vsock/vmci_transport.c @@ -21,7 +21,6 @@ #include #include #include -#include #include #include #include -- cgit v1.2.3 From e454cf5958538666635488030046b6a84a22d447 Mon Sep 17 00:00:00 2001 From: Craig Gallek Date: Mon, 18 Sep 2017 15:30:55 -0400 Subject: bpf: Implement map_delete_elem for BPF_MAP_TYPE_LPM_TRIE This is a simple non-recursive delete operation. It prunes paths of empty nodes in the tree, but it does not try to further compress the tree as nodes are removed. Signed-off-by: Craig Gallek Acked-by: Daniel Borkmann Signed-off-by: David S. Miller --- kernel/bpf/lpm_trie.c | 80 +++++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 77 insertions(+), 3 deletions(-) diff --git a/kernel/bpf/lpm_trie.c b/kernel/bpf/lpm_trie.c index 1b767844a76f..9d58a576b2ae 100644 --- a/kernel/bpf/lpm_trie.c +++ b/kernel/bpf/lpm_trie.c @@ -389,10 +389,84 @@ out: return ret; } -static int trie_delete_elem(struct bpf_map *map, void *key) +/* Called from syscall or from eBPF program */ +static int trie_delete_elem(struct bpf_map *map, void *_key) { - /* TODO */ - return -ENOSYS; + struct lpm_trie *trie = container_of(map, struct lpm_trie, map); + struct bpf_lpm_trie_key *key = _key; + struct lpm_trie_node __rcu **trim; + struct lpm_trie_node *node; + unsigned long irq_flags; + unsigned int next_bit; + size_t matchlen = 0; + int ret = 0; + + if (key->prefixlen > trie->max_prefixlen) + return -EINVAL; + + raw_spin_lock_irqsave(&trie->lock, irq_flags); + + /* Walk the tree looking for an exact key/length match and keeping + * track of where we could begin trimming the tree. The trim-point + * is the sub-tree along the walk consisting of only single-child + * intermediate nodes and ending at a leaf node that we want to + * remove. + */ + trim = &trie->root; + node = rcu_dereference_protected( + trie->root, lockdep_is_held(&trie->lock)); + while (node) { + matchlen = longest_prefix_match(trie, node, key); + + if (node->prefixlen != matchlen || + node->prefixlen == key->prefixlen) + break; + + next_bit = extract_bit(key->data, node->prefixlen); + /* If we hit a node that has more than one child or is a valid + * prefix itself, do not remove it. Reset the root of the trim + * path to its descendant on our path. + */ + if (!(node->flags & LPM_TREE_NODE_FLAG_IM) || + (node->child[0] && node->child[1])) + trim = &node->child[next_bit]; + node = rcu_dereference_protected( + node->child[next_bit], lockdep_is_held(&trie->lock)); + } + + if (!node || node->prefixlen != key->prefixlen || + (node->flags & LPM_TREE_NODE_FLAG_IM)) { + ret = -ENOENT; + goto out; + } + + trie->n_entries--; + + /* If the node we are removing is not a leaf node, simply mark it + * as intermediate and we are done. + */ + if (rcu_access_pointer(node->child[0]) || + rcu_access_pointer(node->child[1])) { + node->flags |= LPM_TREE_NODE_FLAG_IM; + goto out; + } + + /* trim should now point to the slot holding the start of a path from + * zero or more intermediate nodes to our leaf node for deletion. + */ + while ((node = rcu_dereference_protected( + *trim, lockdep_is_held(&trie->lock)))) { + RCU_INIT_POINTER(*trim, NULL); + trim = rcu_access_pointer(node->child[0]) ? + &node->child[0] : + &node->child[1]; + kfree_rcu(node, rcu); + } + +out: + raw_spin_unlock_irqrestore(&trie->lock, irq_flags); + + return ret; } #define LPM_DATA_SIZE_MAX 256 -- cgit v1.2.3 From bae30468dfc1522464f16051addd7bc7da6da75a Mon Sep 17 00:00:00 2001 From: Craig Gallek Date: Mon, 18 Sep 2017 15:30:56 -0400 Subject: bpf: Add uniqueness invariant to trivial lpm test implementation The 'trivial' lpm implementation in this test allows equivalent nodes to be added (that is, nodes consisting of the same prefix and prefix length). For lookup operations, this is fine because insertion happens at the head of the (singly linked) list and the first, best match is returned. In order to support deletion, the tlpm data structue must first enforce uniqueness. This change modifies the insertion algorithm to search for equivalent nodes and remove them. Note: the BPF_MAP_TYPE_LPM_TRIE already has a uniqueness invariant that is implemented as node replacement. Signed-off-by: Craig Gallek Acked-by: Alexei Starovoitov Acked-by: Daniel Borkmann Signed-off-by: David S. Miller --- tools/testing/selftests/bpf/test_lpm_map.c | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/tools/testing/selftests/bpf/test_lpm_map.c b/tools/testing/selftests/bpf/test_lpm_map.c index e97565243d59..a5ed7c4f1819 100644 --- a/tools/testing/selftests/bpf/test_lpm_map.c +++ b/tools/testing/selftests/bpf/test_lpm_map.c @@ -31,6 +31,10 @@ struct tlpm_node { uint8_t key[]; }; +static struct tlpm_node *tlpm_match(struct tlpm_node *list, + const uint8_t *key, + size_t n_bits); + static struct tlpm_node *tlpm_add(struct tlpm_node *list, const uint8_t *key, size_t n_bits) @@ -38,9 +42,17 @@ static struct tlpm_node *tlpm_add(struct tlpm_node *list, struct tlpm_node *node; size_t n; + n = (n_bits + 7) / 8; + + /* 'overwrite' an equivalent entry if one already exists */ + node = tlpm_match(list, key, n_bits); + if (node && node->n_bits == n_bits) { + memcpy(node->key, key, n); + return list; + } + /* add new entry with @key/@n_bits to @list and return new head */ - n = (n_bits + 7) / 8; node = malloc(sizeof(*node) + n); assert(node); -- cgit v1.2.3 From e8d1749910b56a7248cb9a5392274348d10108bb Mon Sep 17 00:00:00 2001 From: Craig Gallek Date: Mon, 18 Sep 2017 15:30:57 -0400 Subject: bpf: Test deletion in BPF_MAP_TYPE_LPM_TRIE Extend the 'random' operation tests to include a delete operation (delete half of the nodes from both lpm implementions and ensure that lookups are still equivalent). Also, add a simple IPv4 test which verifies lookup behavior as nodes are deleted from the tree. Signed-off-by: Craig Gallek Acked-by: Alexei Starovoitov Acked-by: Daniel Borkmann Signed-off-by: David S. Miller --- tools/testing/selftests/bpf/test_lpm_map.c | 187 ++++++++++++++++++++++++++++- 1 file changed, 183 insertions(+), 4 deletions(-) diff --git a/tools/testing/selftests/bpf/test_lpm_map.c b/tools/testing/selftests/bpf/test_lpm_map.c index a5ed7c4f1819..6fedb9fec36b 100644 --- a/tools/testing/selftests/bpf/test_lpm_map.c +++ b/tools/testing/selftests/bpf/test_lpm_map.c @@ -104,6 +104,34 @@ static struct tlpm_node *tlpm_match(struct tlpm_node *list, return best; } +static struct tlpm_node *tlpm_delete(struct tlpm_node *list, + const uint8_t *key, + size_t n_bits) +{ + struct tlpm_node *best = tlpm_match(list, key, n_bits); + struct tlpm_node *node; + + if (!best || best->n_bits != n_bits) + return list; + + if (best == list) { + node = best->next; + free(best); + return node; + } + + for (node = list; node; node = node->next) { + if (node->next == best) { + node->next = best->next; + free(best); + return list; + } + } + /* should never get here */ + assert(0); + return list; +} + static void test_lpm_basic(void) { struct tlpm_node *list = NULL, *t1, *t2; @@ -126,6 +154,13 @@ static void test_lpm_basic(void) assert(t1 == tlpm_match(list, (uint8_t[]){ 0xff, 0xff }, 15)); assert(!tlpm_match(list, (uint8_t[]){ 0x7f, 0xff }, 16)); + list = tlpm_delete(list, (uint8_t[]){ 0xff, 0xff }, 16); + assert(t1 == tlpm_match(list, (uint8_t[]){ 0xff }, 8)); + assert(t1 == tlpm_match(list, (uint8_t[]){ 0xff, 0xff }, 16)); + + list = tlpm_delete(list, (uint8_t[]){ 0xff }, 8); + assert(!tlpm_match(list, (uint8_t[]){ 0xff }, 8)); + tlpm_clear(list); } @@ -170,7 +205,7 @@ static void test_lpm_order(void) static void test_lpm_map(int keysize) { - size_t i, j, n_matches, n_nodes, n_lookups; + size_t i, j, n_matches, n_matches_after_delete, n_nodes, n_lookups; struct tlpm_node *t, *list = NULL; struct bpf_lpm_trie_key *key; uint8_t *data, *value; @@ -182,6 +217,7 @@ static void test_lpm_map(int keysize) */ n_matches = 0; + n_matches_after_delete = 0; n_nodes = 1 << 8; n_lookups = 1 << 16; @@ -235,15 +271,54 @@ static void test_lpm_map(int keysize) } } + /* Remove the first half of the elements in the tlpm and the + * corresponding nodes from the bpf-lpm. Then run the same + * large number of random lookups in both and make sure they match. + * Note: we need to count the number of nodes actually inserted + * since there may have been duplicates. + */ + for (i = 0, t = list; t; i++, t = t->next) + ; + for (j = 0; j < i / 2; ++j) { + key->prefixlen = list->n_bits; + memcpy(key->data, list->key, keysize); + r = bpf_map_delete_elem(map, key); + assert(!r); + list = tlpm_delete(list, list->key, list->n_bits); + assert(list); + } + for (i = 0; i < n_lookups; ++i) { + for (j = 0; j < keysize; ++j) + data[j] = rand() & 0xff; + + t = tlpm_match(list, data, 8 * keysize); + + key->prefixlen = 8 * keysize; + memcpy(key->data, data, keysize); + r = bpf_map_lookup_elem(map, key, value); + assert(!r || errno == ENOENT); + assert(!t == !!r); + + if (t) { + ++n_matches_after_delete; + assert(t->n_bits == value[keysize]); + for (j = 0; j < t->n_bits; ++j) + assert((t->key[j / 8] & (1 << (7 - j % 8))) == + (value[j / 8] & (1 << (7 - j % 8)))); + } + } + close(map); tlpm_clear(list); /* With 255 random nodes in the map, we are pretty likely to match * something on every lookup. For statistics, use this: * - * printf(" nodes: %zu\n" - * "lookups: %zu\n" - * "matches: %zu\n", n_nodes, n_lookups, n_matches); + * printf(" nodes: %zu\n" + * " lookups: %zu\n" + * " matches: %zu\n" + * "matches(delete): %zu\n", + * n_nodes, n_lookups, n_matches, n_matches_after_delete); */ } @@ -343,6 +418,108 @@ static void test_lpm_ipaddr(void) close(map_fd_ipv6); } +static void test_lpm_delete(void) +{ + struct bpf_lpm_trie_key *key; + size_t key_size; + int map_fd; + __u64 value; + + key_size = sizeof(*key) + sizeof(__u32); + key = alloca(key_size); + + map_fd = bpf_create_map(BPF_MAP_TYPE_LPM_TRIE, + key_size, sizeof(value), + 100, BPF_F_NO_PREALLOC); + assert(map_fd >= 0); + + /* Add nodes: + * 192.168.0.0/16 (1) + * 192.168.0.0/24 (2) + * 192.168.128.0/24 (3) + * 192.168.1.0/24 (4) + * + * (1) + * / \ + * (IM) (3) + * / \ + * (2) (4) + */ + value = 1; + key->prefixlen = 16; + inet_pton(AF_INET, "192.168.0.0", key->data); + assert(bpf_map_update_elem(map_fd, key, &value, 0) == 0); + + value = 2; + key->prefixlen = 24; + inet_pton(AF_INET, "192.168.0.0", key->data); + assert(bpf_map_update_elem(map_fd, key, &value, 0) == 0); + + value = 3; + key->prefixlen = 24; + inet_pton(AF_INET, "192.168.128.0", key->data); + assert(bpf_map_update_elem(map_fd, key, &value, 0) == 0); + + value = 4; + key->prefixlen = 24; + inet_pton(AF_INET, "192.168.1.0", key->data); + assert(bpf_map_update_elem(map_fd, key, &value, 0) == 0); + + /* remove non-existent node */ + key->prefixlen = 32; + inet_pton(AF_INET, "10.0.0.1", key->data); + assert(bpf_map_lookup_elem(map_fd, key, &value) == -1 && + errno == ENOENT); + + /* assert initial lookup */ + key->prefixlen = 32; + inet_pton(AF_INET, "192.168.0.1", key->data); + assert(bpf_map_lookup_elem(map_fd, key, &value) == 0); + assert(value == 2); + + /* remove leaf node */ + key->prefixlen = 24; + inet_pton(AF_INET, "192.168.0.0", key->data); + assert(bpf_map_delete_elem(map_fd, key) == 0); + + key->prefixlen = 32; + inet_pton(AF_INET, "192.168.0.1", key->data); + assert(bpf_map_lookup_elem(map_fd, key, &value) == 0); + assert(value == 1); + + /* remove leaf (and intermediary) node */ + key->prefixlen = 24; + inet_pton(AF_INET, "192.168.1.0", key->data); + assert(bpf_map_delete_elem(map_fd, key) == 0); + + key->prefixlen = 32; + inet_pton(AF_INET, "192.168.1.1", key->data); + assert(bpf_map_lookup_elem(map_fd, key, &value) == 0); + assert(value == 1); + + /* remove root node */ + key->prefixlen = 16; + inet_pton(AF_INET, "192.168.0.0", key->data); + assert(bpf_map_delete_elem(map_fd, key) == 0); + + key->prefixlen = 32; + inet_pton(AF_INET, "192.168.128.1", key->data); + assert(bpf_map_lookup_elem(map_fd, key, &value) == 0); + assert(value == 3); + + /* remove last node */ + key->prefixlen = 24; + inet_pton(AF_INET, "192.168.128.0", key->data); + assert(bpf_map_delete_elem(map_fd, key) == 0); + + key->prefixlen = 32; + inet_pton(AF_INET, "192.168.128.1", key->data); + assert(bpf_map_lookup_elem(map_fd, key, &value) == -1 && + errno == ENOENT); + + close(map_fd); +} + int main(void) { struct rlimit limit = { RLIM_INFINITY, RLIM_INFINITY }; @@ -365,6 +542,8 @@ int main(void) test_lpm_ipaddr(); + test_lpm_delete(); + printf("test_lpm: OK\n"); return 0; } -- cgit v1.2.3 From e2b2d35a052d9264a774715bc6aa3395a45dcfa2 Mon Sep 17 00:00:00 2001 From: Yotam Gigi Date: Tue, 19 Sep 2017 10:00:08 +0200 Subject: mlxsw: spectrum: Change init order The multicast router offloading code is going to require the counter_pools initialization to occur before the router initialization, thus, change the spectrum initialization order to fix it. Signed-off-by: Yotam Gigi Reviewed-by: Ido Schimmel Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlxsw/spectrum.c | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c index 696b99e65a5a..97284161ea35 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c @@ -3693,6 +3693,12 @@ static int mlxsw_sp_init(struct mlxsw_core *mlxsw_core, goto err_switchdev_init; } + err = mlxsw_sp_counter_pool_init(mlxsw_sp); + if (err) { + dev_err(mlxsw_sp->bus_info->dev, "Failed to init counter pool\n"); + goto err_counter_pool_init; + } + err = mlxsw_sp_router_init(mlxsw_sp); if (err) { dev_err(mlxsw_sp->bus_info->dev, "Failed to initialize router\n"); @@ -3711,12 +3717,6 @@ static int mlxsw_sp_init(struct mlxsw_core *mlxsw_core, goto err_acl_init; } - err = mlxsw_sp_counter_pool_init(mlxsw_sp); - if (err) { - dev_err(mlxsw_sp->bus_info->dev, "Failed to init counter pool\n"); - goto err_counter_pool_init; - } - err = mlxsw_sp_dpipe_init(mlxsw_sp); if (err) { dev_err(mlxsw_sp->bus_info->dev, "Failed to init pipeline debug\n"); @@ -3734,14 +3734,14 @@ static int mlxsw_sp_init(struct mlxsw_core *mlxsw_core, err_ports_create: mlxsw_sp_dpipe_fini(mlxsw_sp); err_dpipe_init: - mlxsw_sp_counter_pool_fini(mlxsw_sp); -err_counter_pool_init: mlxsw_sp_acl_fini(mlxsw_sp); err_acl_init: mlxsw_sp_span_fini(mlxsw_sp); err_span_init: mlxsw_sp_router_fini(mlxsw_sp); err_router_init: + mlxsw_sp_counter_pool_fini(mlxsw_sp); +err_counter_pool_init: mlxsw_sp_switchdev_fini(mlxsw_sp); err_switchdev_init: mlxsw_sp_lag_fini(mlxsw_sp); @@ -3760,10 +3760,10 @@ static void mlxsw_sp_fini(struct mlxsw_core *mlxsw_core) mlxsw_sp_ports_remove(mlxsw_sp); mlxsw_sp_dpipe_fini(mlxsw_sp); - mlxsw_sp_counter_pool_fini(mlxsw_sp); mlxsw_sp_acl_fini(mlxsw_sp); mlxsw_sp_span_fini(mlxsw_sp); mlxsw_sp_router_fini(mlxsw_sp); + mlxsw_sp_counter_pool_fini(mlxsw_sp); mlxsw_sp_switchdev_fini(mlxsw_sp); mlxsw_sp_lag_fini(mlxsw_sp); mlxsw_sp_buffers_fini(mlxsw_sp); -- cgit v1.2.3 From d3b939b8f9a571da82359b6baa5506c9179770d1 Mon Sep 17 00:00:00 2001 From: Yotam Gigi Date: Tue, 19 Sep 2017 10:00:09 +0200 Subject: mlxsw: spectrum: Move ACL flexible actions instance to spectrum A flexible action instance allows, given a set of ops, creating, committing and sharing a set of ACL action blocks. The flexible action instance in question is using the spectrum KVD linear space to store the flexible action sets. Move this flexible action instance to the common spectrum struct to allow other users (such as multicast router) to get that functionality. Signed-off-by: Yotam Gigi Reviewed-by: Ido Schimmel Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlxsw/Makefile | 2 +- drivers/net/ethernet/mellanox/mlxsw/spectrum.c | 10 ++ drivers/net/ethernet/mellanox/mlxsw/spectrum.h | 1 + drivers/net/ethernet/mellanox/mlxsw/spectrum_acl.c | 93 +-------------- .../mellanox/mlxsw/spectrum_acl_flex_actions.c | 129 +++++++++++++++++++++ .../mellanox/mlxsw/spectrum_acl_flex_actions.h | 44 +++++++ 6 files changed, 186 insertions(+), 93 deletions(-) create mode 100644 drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_flex_actions.c create mode 100644 drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_flex_actions.h diff --git a/drivers/net/ethernet/mellanox/mlxsw/Makefile b/drivers/net/ethernet/mellanox/mlxsw/Makefile index 891ff418bb5e..4b88158173f3 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/Makefile +++ b/drivers/net/ethernet/mellanox/mlxsw/Makefile @@ -17,7 +17,7 @@ mlxsw_spectrum-objs := spectrum.o spectrum_buffers.o \ spectrum_kvdl.o spectrum_acl_tcam.o \ spectrum_acl.o spectrum_flower.o \ spectrum_cnt.o spectrum_fid.o \ - spectrum_ipip.o + spectrum_ipip.o spectrum_acl_flex_actions.o mlxsw_spectrum-$(CONFIG_MLXSW_SPECTRUM_DCB) += spectrum_dcb.o mlxsw_spectrum-$(CONFIG_NET_DEVLINK) += spectrum_dpipe.o obj-$(CONFIG_MLXSW_MINIMAL) += mlxsw_minimal.o diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c index 97284161ea35..6ba6ff276b17 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c @@ -69,6 +69,7 @@ #include "txheader.h" #include "spectrum_cnt.h" #include "spectrum_dpipe.h" +#include "spectrum_acl_flex_actions.h" #include "../mlxfw/mlxfw.h" #define MLXSW_FWREV_MAJOR 13 @@ -3699,6 +3700,12 @@ static int mlxsw_sp_init(struct mlxsw_core *mlxsw_core, goto err_counter_pool_init; } + err = mlxsw_sp_afa_init(mlxsw_sp); + if (err) { + dev_err(mlxsw_sp->bus_info->dev, "Failed to initialize ACL actions\n"); + goto err_afa_init; + } + err = mlxsw_sp_router_init(mlxsw_sp); if (err) { dev_err(mlxsw_sp->bus_info->dev, "Failed to initialize router\n"); @@ -3740,6 +3747,8 @@ err_acl_init: err_span_init: mlxsw_sp_router_fini(mlxsw_sp); err_router_init: + mlxsw_sp_afa_fini(mlxsw_sp); +err_afa_init: mlxsw_sp_counter_pool_fini(mlxsw_sp); err_counter_pool_init: mlxsw_sp_switchdev_fini(mlxsw_sp); @@ -3763,6 +3772,7 @@ static void mlxsw_sp_fini(struct mlxsw_core *mlxsw_core) mlxsw_sp_acl_fini(mlxsw_sp); mlxsw_sp_span_fini(mlxsw_sp); mlxsw_sp_router_fini(mlxsw_sp); + mlxsw_sp_afa_fini(mlxsw_sp); mlxsw_sp_counter_pool_fini(mlxsw_sp); mlxsw_sp_switchdev_fini(mlxsw_sp); mlxsw_sp_lag_fini(mlxsw_sp); diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h index 84ce83acdc19..7180d8f3de75 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h @@ -152,6 +152,7 @@ struct mlxsw_sp { struct mlxsw_sp_sb *sb; struct mlxsw_sp_bridge *bridge; struct mlxsw_sp_router *router; + struct mlxsw_afa *afa; struct mlxsw_sp_acl *acl; struct mlxsw_sp_fid_core *fid_core; struct { diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl.c index 4b2455e3e079..2523785f1904 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl.c @@ -52,7 +52,6 @@ struct mlxsw_sp_acl { struct mlxsw_sp *mlxsw_sp; struct mlxsw_afk *afk; - struct mlxsw_afa *afa; struct mlxsw_sp_fid *dummy_fid; const struct mlxsw_sp_acl_ops *ops; struct rhashtable ruleset_ht; @@ -333,7 +332,7 @@ mlxsw_sp_acl_rulei_create(struct mlxsw_sp_acl *acl) rulei = kzalloc(sizeof(*rulei), GFP_KERNEL); if (!rulei) return NULL; - rulei->act_block = mlxsw_afa_block_create(acl->afa); + rulei->act_block = mlxsw_afa_block_create(acl->mlxsw_sp->afa); if (IS_ERR(rulei->act_block)) { err = PTR_ERR(rulei->act_block); goto err_afa_block_create; @@ -653,85 +652,6 @@ int mlxsw_sp_acl_rule_get_stats(struct mlxsw_sp *mlxsw_sp, return 0; } -#define MLXSW_SP_KDVL_ACT_EXT_SIZE 1 - -static int mlxsw_sp_act_kvdl_set_add(void *priv, u32 *p_kvdl_index, - char *enc_actions, bool is_first) -{ - struct mlxsw_sp *mlxsw_sp = priv; - char pefa_pl[MLXSW_REG_PEFA_LEN]; - u32 kvdl_index; - int err; - - /* The first action set of a TCAM entry is stored directly in TCAM, - * not KVD linear area. - */ - if (is_first) - return 0; - - err = mlxsw_sp_kvdl_alloc(mlxsw_sp, MLXSW_SP_KDVL_ACT_EXT_SIZE, - &kvdl_index); - if (err) - return err; - mlxsw_reg_pefa_pack(pefa_pl, kvdl_index, enc_actions); - err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(pefa), pefa_pl); - if (err) - goto err_pefa_write; - *p_kvdl_index = kvdl_index; - return 0; - -err_pefa_write: - mlxsw_sp_kvdl_free(mlxsw_sp, kvdl_index); - return err; -} - -static void mlxsw_sp_act_kvdl_set_del(void *priv, u32 kvdl_index, - bool is_first) -{ - struct mlxsw_sp *mlxsw_sp = priv; - - if (is_first) - return; - mlxsw_sp_kvdl_free(mlxsw_sp, kvdl_index); -} - -static int mlxsw_sp_act_kvdl_fwd_entry_add(void *priv, u32 *p_kvdl_index, - u8 local_port) -{ - struct mlxsw_sp *mlxsw_sp = priv; - char ppbs_pl[MLXSW_REG_PPBS_LEN]; - u32 kvdl_index; - int err; - - err = mlxsw_sp_kvdl_alloc(mlxsw_sp, 1, &kvdl_index); - if (err) - return err; - mlxsw_reg_ppbs_pack(ppbs_pl, kvdl_index, local_port); - err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ppbs), ppbs_pl); - if (err) - goto err_ppbs_write; - *p_kvdl_index = kvdl_index; - return 0; - -err_ppbs_write: - mlxsw_sp_kvdl_free(mlxsw_sp, kvdl_index); - return err; -} - -static void mlxsw_sp_act_kvdl_fwd_entry_del(void *priv, u32 kvdl_index) -{ - struct mlxsw_sp *mlxsw_sp = priv; - - mlxsw_sp_kvdl_free(mlxsw_sp, kvdl_index); -} - -static const struct mlxsw_afa_ops mlxsw_sp_act_afa_ops = { - .kvdl_set_add = mlxsw_sp_act_kvdl_set_add, - .kvdl_set_del = mlxsw_sp_act_kvdl_set_del, - .kvdl_fwd_entry_add = mlxsw_sp_act_kvdl_fwd_entry_add, - .kvdl_fwd_entry_del = mlxsw_sp_act_kvdl_fwd_entry_del, -}; - int mlxsw_sp_acl_init(struct mlxsw_sp *mlxsw_sp) { const struct mlxsw_sp_acl_ops *acl_ops = &mlxsw_sp_acl_tcam_ops; @@ -753,14 +673,6 @@ int mlxsw_sp_acl_init(struct mlxsw_sp *mlxsw_sp) goto err_afk_create; } - acl->afa = mlxsw_afa_create(MLXSW_CORE_RES_GET(mlxsw_sp->core, - ACL_ACTIONS_PER_SET), - &mlxsw_sp_act_afa_ops, mlxsw_sp); - if (IS_ERR(acl->afa)) { - err = PTR_ERR(acl->afa); - goto err_afa_create; - } - err = rhashtable_init(&acl->ruleset_ht, &mlxsw_sp_acl_ruleset_ht_params); if (err) @@ -792,8 +704,6 @@ err_acl_ops_init: err_fid_get: rhashtable_destroy(&acl->ruleset_ht); err_rhashtable_init: - mlxsw_afa_destroy(acl->afa); -err_afa_create: mlxsw_afk_destroy(acl->afk); err_afk_create: kfree(acl); @@ -810,7 +720,6 @@ void mlxsw_sp_acl_fini(struct mlxsw_sp *mlxsw_sp) WARN_ON(!list_empty(&acl->rules)); mlxsw_sp_fid_put(acl->dummy_fid); rhashtable_destroy(&acl->ruleset_ht); - mlxsw_afa_destroy(acl->afa); mlxsw_afk_destroy(acl->afk); kfree(acl); } diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_flex_actions.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_flex_actions.c new file mode 100644 index 000000000000..4d3340ed0291 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_flex_actions.c @@ -0,0 +1,129 @@ +/* + * drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_flex_actions.c + * Copyright (c) 2017 Mellanox Technologies. All rights reserved. + * Copyright (c) 2017 Jiri Pirko + * Copyright (c) 2017 Yotam Gigi + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the names of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * Alternatively, this software may be distributed under the terms of the + * GNU General Public License ("GPL") version 2 as published by the Free + * Software Foundation. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include "spectrum_acl_flex_actions.h" +#include "core_acl_flex_actions.h" + +#define MLXSW_SP_KVDL_ACT_EXT_SIZE 1 + +static int mlxsw_sp_act_kvdl_set_add(void *priv, u32 *p_kvdl_index, + char *enc_actions, bool is_first) +{ + struct mlxsw_sp *mlxsw_sp = priv; + char pefa_pl[MLXSW_REG_PEFA_LEN]; + u32 kvdl_index; + int err; + + /* The first action set of a TCAM entry is stored directly in TCAM, + * not KVD linear area. + */ + if (is_first) + return 0; + + err = mlxsw_sp_kvdl_alloc(mlxsw_sp, MLXSW_SP_KVDL_ACT_EXT_SIZE, + &kvdl_index); + if (err) + return err; + mlxsw_reg_pefa_pack(pefa_pl, kvdl_index, enc_actions); + err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(pefa), pefa_pl); + if (err) + goto err_pefa_write; + *p_kvdl_index = kvdl_index; + return 0; + +err_pefa_write: + mlxsw_sp_kvdl_free(mlxsw_sp, kvdl_index); + return err; +} + +static void mlxsw_sp_act_kvdl_set_del(void *priv, u32 kvdl_index, + bool is_first) +{ + struct mlxsw_sp *mlxsw_sp = priv; + + if (is_first) + return; + mlxsw_sp_kvdl_free(mlxsw_sp, kvdl_index); +} + +static int mlxsw_sp_act_kvdl_fwd_entry_add(void *priv, u32 *p_kvdl_index, + u8 local_port) +{ + struct mlxsw_sp *mlxsw_sp = priv; + char ppbs_pl[MLXSW_REG_PPBS_LEN]; + u32 kvdl_index; + int err; + + err = mlxsw_sp_kvdl_alloc(mlxsw_sp, 1, &kvdl_index); + if (err) + return err; + mlxsw_reg_ppbs_pack(ppbs_pl, kvdl_index, local_port); + err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ppbs), ppbs_pl); + if (err) + goto err_ppbs_write; + *p_kvdl_index = kvdl_index; + return 0; + +err_ppbs_write: + mlxsw_sp_kvdl_free(mlxsw_sp, kvdl_index); + return err; +} + +static void mlxsw_sp_act_kvdl_fwd_entry_del(void *priv, u32 kvdl_index) +{ + struct mlxsw_sp *mlxsw_sp = priv; + + mlxsw_sp_kvdl_free(mlxsw_sp, kvdl_index); +} + +static const struct mlxsw_afa_ops mlxsw_sp_act_afa_ops = { + .kvdl_set_add = mlxsw_sp_act_kvdl_set_add, + .kvdl_set_del = mlxsw_sp_act_kvdl_set_del, + .kvdl_fwd_entry_add = mlxsw_sp_act_kvdl_fwd_entry_add, + .kvdl_fwd_entry_del = mlxsw_sp_act_kvdl_fwd_entry_del, +}; + +int mlxsw_sp_afa_init(struct mlxsw_sp *mlxsw_sp) +{ + mlxsw_sp->afa = mlxsw_afa_create(MLXSW_CORE_RES_GET(mlxsw_sp->core, + ACL_ACTIONS_PER_SET), + &mlxsw_sp_act_afa_ops, mlxsw_sp); + return PTR_ERR_OR_ZERO(mlxsw_sp->afa); +} + +void mlxsw_sp_afa_fini(struct mlxsw_sp *mlxsw_sp) +{ + mlxsw_afa_destroy(mlxsw_sp->afa); +} diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_flex_actions.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_flex_actions.h new file mode 100644 index 000000000000..2726192836ad --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_flex_actions.h @@ -0,0 +1,44 @@ +/* + * drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_flex_actions.h + * Copyright (c) 2017 Mellanox Technologies. All rights reserved. + * Copyright (c) 2017 Jiri Pirko + * Copyright (c) 2017 Yotam Gigi + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the names of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * Alternatively, this software may be distributed under the terms of the + * GNU General Public License ("GPL") version 2 as published by the Free + * Software Foundation. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _MLXSW_SPECTRUM_ACL_FLEX_KEYS_H +#define _MLXSW_SPECTRUM_ACL_FLEX_KEYS_H + +#include "spectrum.h" + +int mlxsw_sp_afa_init(struct mlxsw_sp *mlxsw_sp); +void mlxsw_sp_afa_fini(struct mlxsw_sp *mlxsw_sp); + +#endif -- cgit v1.2.3 From 4b8a79ff27645c1201287c3b17091add748d1fb9 Mon Sep 17 00:00:00 2001 From: Yotam Gigi Date: Tue, 19 Sep 2017 10:00:10 +0200 Subject: mlxsw: acl: Introduce mcrouter ACL action The Spectrum multicast forwarding is done using an ACL action. Add the mcrouter ACL action that will be used to offload the multicast router logic. Signed-off-by: Yotam Gigi Reviewed-by: Ido Schimmel Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- .../mellanox/mlxsw/core_acl_flex_actions.c | 71 ++++++++++++++++++++++ .../mellanox/mlxsw/core_acl_flex_actions.h | 3 + 2 files changed, 74 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_actions.c b/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_actions.c index 5ae110172c22..65a32d7b4350 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_actions.c +++ b/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_actions.c @@ -891,3 +891,74 @@ int mlxsw_afa_block_append_fid_set(struct mlxsw_afa_block *block, u16 fid) return 0; } EXPORT_SYMBOL(mlxsw_afa_block_append_fid_set); + +/* MC Routing Action + * ----------------- + * The Multicast router action. Can be used by RMFT_V2 - Router Multicast + * Forwarding Table Version 2 Register. + */ + +#define MLXSW_AFA_MCROUTER_CODE 0x10 +#define MLXSW_AFA_MCROUTER_SIZE 2 + +enum mlxsw_afa_mcrouter_rpf_action { + MLXSW_AFA_MCROUTER_RPF_ACTION_NOP, + MLXSW_AFA_MCROUTER_RPF_ACTION_TRAP, + MLXSW_AFA_MCROUTER_RPF_ACTION_DISCARD_ERROR, +}; + +/* afa_mcrouter_rpf_action */ +MLXSW_ITEM32(afa, mcrouter, rpf_action, 0x00, 28, 3); + +/* afa_mcrouter_expected_irif */ +MLXSW_ITEM32(afa, mcrouter, expected_irif, 0x00, 0, 16); + +/* afa_mcrouter_min_mtu */ +MLXSW_ITEM32(afa, mcrouter, min_mtu, 0x08, 0, 16); + +enum mlxsw_afa_mrouter_vrmid { + MLXSW_AFA_MCROUTER_VRMID_INVALID, + MLXSW_AFA_MCROUTER_VRMID_VALID +}; + +/* afa_mcrouter_vrmid + * Valid RMID: rigr_rmid_index is used as RMID + */ +MLXSW_ITEM32(afa, mcrouter, vrmid, 0x0C, 31, 1); + +/* afa_mcrouter_rigr_rmid_index + * When the vrmid field is set to invalid, the field is used as pointer to + * Router Interface Group (RIGR) Table in the KVD linear. + * When the vrmid is set to valid, the field is used as RMID index, ranged + * from 0 to max_mid - 1. The index is to the Port Group Table. + */ +MLXSW_ITEM32(afa, mcrouter, rigr_rmid_index, 0x0C, 0, 24); + +static inline void +mlxsw_afa_mcrouter_pack(char *payload, + enum mlxsw_afa_mcrouter_rpf_action rpf_action, + u16 expected_irif, u16 min_mtu, + enum mlxsw_afa_mrouter_vrmid vrmid, u32 rigr_rmid_index) + +{ + mlxsw_afa_mcrouter_rpf_action_set(payload, rpf_action); + mlxsw_afa_mcrouter_expected_irif_set(payload, expected_irif); + mlxsw_afa_mcrouter_min_mtu_set(payload, min_mtu); + mlxsw_afa_mcrouter_vrmid_set(payload, vrmid); + mlxsw_afa_mcrouter_rigr_rmid_index_set(payload, rigr_rmid_index); +} + +int mlxsw_afa_block_append_mcrouter(struct mlxsw_afa_block *block, + u16 expected_irif, u16 min_mtu, + bool rmid_valid, u32 kvdl_index) +{ + char *act = mlxsw_afa_block_append_action(block, + MLXSW_AFA_MCROUTER_CODE, + MLXSW_AFA_MCROUTER_SIZE); + if (!act) + return -ENOBUFS; + mlxsw_afa_mcrouter_pack(act, MLXSW_AFA_MCROUTER_RPF_ACTION_TRAP, + expected_irif, min_mtu, rmid_valid, kvdl_index); + return 0; +} +EXPORT_SYMBOL(mlxsw_afa_block_append_mcrouter); diff --git a/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_actions.h b/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_actions.h index f99c341b2497..5dbb31fa5a27 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_actions.h +++ b/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_actions.h @@ -68,5 +68,8 @@ int mlxsw_afa_block_append_vlan_modify(struct mlxsw_afa_block *block, int mlxsw_afa_block_append_counter(struct mlxsw_afa_block *block, u32 counter_index); int mlxsw_afa_block_append_fid_set(struct mlxsw_afa_block *block, u16 fid); +int mlxsw_afa_block_append_mcrouter(struct mlxsw_afa_block *block, + u16 expected_irif, u16 min_mtu, + bool rmid_valid, u32 kvdl_index); #endif -- cgit v1.2.3 From 9cb3fa940e2c1c62d35972ab8433531a4ba421a5 Mon Sep 17 00:00:00 2001 From: Yotam Gigi Date: Tue, 19 Sep 2017 10:00:11 +0200 Subject: mlxsw: acl: Change trap ACL action to get the trap_id as a parameter Allow the trap ACL action to be configured with different traps. This allows the multicast router offloading code to use that same ACL action with the multicast router traps. By using different traps, the multicast router can have different trap policies and can handle the packet differently. Signed-off-by: Yotam Gigi Reviewed-by: Ido Schimmel Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_actions.c | 4 ++-- drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_actions.h | 2 +- drivers/net/ethernet/mellanox/mlxsw/spectrum_acl.c | 3 ++- 3 files changed, 5 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_actions.c b/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_actions.c index 65a32d7b4350..ab3ffe7a8eda 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_actions.c +++ b/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_actions.c @@ -712,7 +712,7 @@ int mlxsw_afa_block_append_drop(struct mlxsw_afa_block *block) } EXPORT_SYMBOL(mlxsw_afa_block_append_drop); -int mlxsw_afa_block_append_trap(struct mlxsw_afa_block *block) +int mlxsw_afa_block_append_trap(struct mlxsw_afa_block *block, u16 trap_id) { char *act = mlxsw_afa_block_append_action(block, MLXSW_AFA_TRAPDISC_CODE, @@ -722,7 +722,7 @@ int mlxsw_afa_block_append_trap(struct mlxsw_afa_block *block) return -ENOBUFS; mlxsw_afa_trapdisc_pack(act, MLXSW_AFA_TRAPDISC_TRAP_ACTION_TRAP, MLXSW_AFA_TRAPDISC_FORWARD_ACTION_DISCARD, - MLXSW_TRAP_ID_ACL0); + trap_id); return 0; } EXPORT_SYMBOL(mlxsw_afa_block_append_trap); diff --git a/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_actions.h b/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_actions.h index 5dbb31fa5a27..501819c790d6 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_actions.h +++ b/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_actions.h @@ -60,7 +60,7 @@ u32 mlxsw_afa_block_first_set_kvdl_index(struct mlxsw_afa_block *block); void mlxsw_afa_block_continue(struct mlxsw_afa_block *block); void mlxsw_afa_block_jump(struct mlxsw_afa_block *block, u16 group_id); int mlxsw_afa_block_append_drop(struct mlxsw_afa_block *block); -int mlxsw_afa_block_append_trap(struct mlxsw_afa_block *block); +int mlxsw_afa_block_append_trap(struct mlxsw_afa_block *block, u16 trap_id); int mlxsw_afa_block_append_fwd(struct mlxsw_afa_block *block, u8 local_port, bool in_port); int mlxsw_afa_block_append_vlan_modify(struct mlxsw_afa_block *block, diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl.c index 2523785f1904..eede75fbd585 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl.c @@ -396,7 +396,8 @@ int mlxsw_sp_acl_rulei_act_drop(struct mlxsw_sp_acl_rule_info *rulei) int mlxsw_sp_acl_rulei_act_trap(struct mlxsw_sp_acl_rule_info *rulei) { - return mlxsw_afa_block_append_trap(rulei->act_block); + return mlxsw_afa_block_append_trap(rulei->act_block, + MLXSW_TRAP_ID_ACL0); } int mlxsw_sp_acl_rulei_act_fwd(struct mlxsw_sp *mlxsw_sp, -- cgit v1.2.3 From 587265655159d73247a56236092917131183496e Mon Sep 17 00:00:00 2001 From: Yotam Gigi Date: Tue, 19 Sep 2017 10:00:12 +0200 Subject: mlxsw: reg: Rename the flexible action set length field The MLXSW_REG_PXXX_FLEX_ACTION_SET_LEN is relevant for the multicast router registers too, so rename it to have a general name which is not bound to a specific register. Signed-off-by: Yotam Gigi Reviewed-by: Ido Schimmel Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlxsw/reg.h | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/reg.h b/drivers/net/ethernet/mellanox/mlxsw/reg.h index cc27c5de5a1d..fb8ab441b11e 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/reg.h +++ b/drivers/net/ethernet/mellanox/mlxsw/reg.h @@ -2142,15 +2142,14 @@ MLXSW_REG_DEFINE(pefa, MLXSW_REG_PEFA_ID, MLXSW_REG_PEFA_LEN); */ MLXSW_ITEM32(reg, pefa, index, 0x00, 0, 24); -#define MLXSW_REG_PXXX_FLEX_ACTION_SET_LEN 0xA8 +#define MLXSW_REG_FLEX_ACTION_SET_LEN 0xA8 /* reg_pefa_flex_action_set * Action-set to perform when rule is matched. * Must be zero padded if action set is shorter. * Access: RW */ -MLXSW_ITEM_BUF(reg, pefa, flex_action_set, 0x08, - MLXSW_REG_PXXX_FLEX_ACTION_SET_LEN); +MLXSW_ITEM_BUF(reg, pefa, flex_action_set, 0x08, MLXSW_REG_FLEX_ACTION_SET_LEN); static inline void mlxsw_reg_pefa_pack(char *payload, u32 index, const char *flex_action_set) @@ -2243,7 +2242,7 @@ MLXSW_ITEM_BUF(reg, ptce2, mask, 0x80, * Access: RW */ MLXSW_ITEM_BUF(reg, ptce2, flex_action_set, 0xE0, - MLXSW_REG_PXXX_FLEX_ACTION_SET_LEN); + MLXSW_REG_FLEX_ACTION_SET_LEN); static inline void mlxsw_reg_ptce2_pack(char *payload, bool valid, enum mlxsw_reg_ptce2_op op, -- cgit v1.2.3 From 46a7054ebace0fcd0d1826881aa5ab219faa6a77 Mon Sep 17 00:00:00 2001 From: Yotam Gigi Date: Tue, 19 Sep 2017 10:00:13 +0200 Subject: mlxsw: reg: Add The Router TCAM Allocation register This register is used for allocation of regions in the TCAM table and it will be used by the multicast router offloading logic. Signed-off-by: Yotam Gigi Reviewed-by: Ido Schimmel Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlxsw/reg.h | 52 +++++++++++++++++++++++++++++++ 1 file changed, 52 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlxsw/reg.h b/drivers/net/ethernet/mellanox/mlxsw/reg.h index fb8ab441b11e..e9f37eac8788 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/reg.h +++ b/drivers/net/ethernet/mellanox/mlxsw/reg.h @@ -4310,6 +4310,57 @@ mlxsw_reg_ritr_loopback_ipip4_pack(char *payload, mlxsw_reg_ritr_loopback_ipip_usip4_set(payload, usip); } +/* RTAR - Router TCAM Allocation Register + * -------------------------------------- + * This register is used for allocation of regions in the TCAM table. + */ +#define MLXSW_REG_RTAR_ID 0x8004 +#define MLXSW_REG_RTAR_LEN 0x20 + +MLXSW_REG_DEFINE(rtar, MLXSW_REG_RTAR_ID, MLXSW_REG_RTAR_LEN); + +enum mlxsw_reg_rtar_op { + MLXSW_REG_RTAR_OP_ALLOCATE, + MLXSW_REG_RTAR_OP_RESIZE, + MLXSW_REG_RTAR_OP_DEALLOCATE, +}; + +/* reg_rtar_op + * Access: WO + */ +MLXSW_ITEM32(reg, rtar, op, 0x00, 28, 4); + +enum mlxsw_reg_rtar_key_type { + MLXSW_REG_RTAR_KEY_TYPE_IPV4_MULTICAST = 1, + MLXSW_REG_RTAR_KEY_TYPE_IPV6_MULTICAST = 3 +}; + +/* reg_rtar_key_type + * TCAM key type for the region. + * Access: WO + */ +MLXSW_ITEM32(reg, rtar, key_type, 0x00, 0, 8); + +/* reg_rtar_region_size + * TCAM region size. When allocating/resizing this is the requested + * size, the response is the actual size. + * Note: Actual size may be larger than requested. + * Reserved for op = Deallocate + * Access: WO + */ +MLXSW_ITEM32(reg, rtar, region_size, 0x04, 0, 16); + +static inline void mlxsw_reg_rtar_pack(char *payload, + enum mlxsw_reg_rtar_op op, + enum mlxsw_reg_rtar_key_type key_type, + u16 region_size) +{ + MLXSW_REG_ZERO(rtar, payload); + mlxsw_reg_rtar_op_set(payload, op); + mlxsw_reg_rtar_key_type_set(payload, key_type); + mlxsw_reg_rtar_region_size_set(payload, region_size); +} + /* RATR - Router Adjacency Table Register * -------------------------------------- * The RATR register is used to configure the Router Adjacency (next-hop) @@ -6855,6 +6906,7 @@ static const struct mlxsw_reg_info *mlxsw_reg_infos[] = { MLXSW_REG(hpkt), MLXSW_REG(rgcr), MLXSW_REG(ritr), + MLXSW_REG(rtar), MLXSW_REG(ratr), MLXSW_REG(rtdp), MLXSW_REG(ricnt), -- cgit v1.2.3 From 5080c7e91701744ef1a5d7aab51f568f889bfddb Mon Sep 17 00:00:00 2001 From: Yotam Gigi Date: Tue, 19 Sep 2017 10:00:14 +0200 Subject: mlxsw: reg: Add the Router Interface Group Version 2 register The RIGR-V2 register is used to add, remove and query egress interface list of a multicast forwarding entry and it will be used by the multicast router offloading logic. Signed-off-by: Yotam Gigi Reviewed-by: Ido Schimmel Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlxsw/reg.h | 83 +++++++++++++++++++++++++++++++ 1 file changed, 83 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlxsw/reg.h b/drivers/net/ethernet/mellanox/mlxsw/reg.h index e9f37eac8788..1778d7f5f843 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/reg.h +++ b/drivers/net/ethernet/mellanox/mlxsw/reg.h @@ -5646,6 +5646,88 @@ mlxsw_reg_rtdp_ipip4_pack(char *payload, u16 irif, mlxsw_reg_rtdp_ipip_expected_gre_key_set(payload, expected_gre_key); } +/* RIGR-V2 - Router Interface Group Register Version 2 + * --------------------------------------------------- + * The RIGR_V2 register is used to add, remove and query egress interface list + * of a multicast forwarding entry. + */ +#define MLXSW_REG_RIGR2_ID 0x8023 +#define MLXSW_REG_RIGR2_LEN 0xB0 + +#define MLXSW_REG_RIGR2_MAX_ERIFS 32 + +MLXSW_REG_DEFINE(rigr2, MLXSW_REG_RIGR2_ID, MLXSW_REG_RIGR2_LEN); + +/* reg_rigr2_rigr_index + * KVD Linear index. + * Access: Index + */ +MLXSW_ITEM32(reg, rigr2, rigr_index, 0x04, 0, 24); + +/* reg_rigr2_vnext + * Next RIGR Index is valid. + * Access: RW + */ +MLXSW_ITEM32(reg, rigr2, vnext, 0x08, 31, 1); + +/* reg_rigr2_next_rigr_index + * Next RIGR Index. The index is to the KVD linear. + * Reserved when vnxet = '0'. + * Access: RW + */ +MLXSW_ITEM32(reg, rigr2, next_rigr_index, 0x08, 0, 24); + +/* reg_rigr2_vrmid + * RMID Index is valid. + * Access: RW + */ +MLXSW_ITEM32(reg, rigr2, vrmid, 0x20, 31, 1); + +/* reg_rigr2_rmid_index + * RMID Index. + * Range 0 .. max_mid - 1 + * Reserved when vrmid = '0'. + * The index is to the Port Group Table (PGT) + * Access: RW + */ +MLXSW_ITEM32(reg, rigr2, rmid_index, 0x20, 0, 16); + +/* reg_rigr2_erif_entry_v + * Egress Router Interface is valid. + * Note that low-entries must be set if high-entries are set. For + * example: if erif_entry[2].v is set then erif_entry[1].v and + * erif_entry[0].v must be set. + * Index can be from 0 to cap_mc_erif_list_entries-1 + * Access: RW + */ +MLXSW_ITEM32_INDEXED(reg, rigr2, erif_entry_v, 0x24, 31, 1, 4, 0, false); + +/* reg_rigr2_erif_entry_erif + * Egress Router Interface. + * Valid range is from 0 to cap_max_router_interfaces - 1 + * Index can be from 0 to MLXSW_REG_RIGR2_MAX_ERIFS - 1 + * Access: RW + */ +MLXSW_ITEM32_INDEXED(reg, rigr2, erif_entry_erif, 0x24, 0, 16, 4, 0, false); + +static inline void mlxsw_reg_rigr2_pack(char *payload, u32 rigr_index, + bool vnext, u32 next_rigr_index) +{ + MLXSW_REG_ZERO(rigr2, payload); + mlxsw_reg_rigr2_rigr_index_set(payload, rigr_index); + mlxsw_reg_rigr2_vnext_set(payload, vnext); + mlxsw_reg_rigr2_next_rigr_index_set(payload, next_rigr_index); + mlxsw_reg_rigr2_vrmid_set(payload, 0); + mlxsw_reg_rigr2_rmid_index_set(payload, 0); +} + +static inline void mlxsw_reg_rigr2_erif_entry_pack(char *payload, int index, + bool v, u16 erif) +{ + mlxsw_reg_rigr2_erif_entry_v_set(payload, index, v); + mlxsw_reg_rigr2_erif_entry_erif_set(payload, index, erif); +} + /* MFCR - Management Fan Control Register * -------------------------------------- * This register controls the settings of the Fan Speed PWM mechanism. @@ -6917,6 +6999,7 @@ static const struct mlxsw_reg_info *mlxsw_reg_infos[] = { MLXSW_REG(rauht), MLXSW_REG(raleu), MLXSW_REG(rauhtd), + MLXSW_REG(rigr2), MLXSW_REG(mfcr), MLXSW_REG(mfsc), MLXSW_REG(mfsm), -- cgit v1.2.3 From 771ced742a4f02ac248ad679325bd434843d78d0 Mon Sep 17 00:00:00 2001 From: Yotam Gigi Date: Tue, 19 Sep 2017 10:00:15 +0200 Subject: mlxsw: resources: Add multicast ERIF list entries resource The multicast ERIF list entries resource indicates the number of entries that can be put in one rigr2 register operation. While the register can hold up to MLXSW_REG_RIGR2_MAX_ERIFS ( = 32) ERIF entries, the actual number allowed by firmware is indicated with this resource. Signed-off-by: Yotam Gigi Reviewed-by: Ido Schimmel Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlxsw/resources.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlxsw/resources.h b/drivers/net/ethernet/mellanox/mlxsw/resources.h index 9556d934714b..087aad52c195 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/resources.h +++ b/drivers/net/ethernet/mellanox/mlxsw/resources.h @@ -63,6 +63,7 @@ enum mlxsw_res_id { MLXSW_RES_ID_MAX_CPU_POLICERS, MLXSW_RES_ID_MAX_VRS, MLXSW_RES_ID_MAX_RIFS, + MLXSW_RES_ID_MC_ERIF_LIST_ENTRIES, MLXSW_RES_ID_MAX_LPM_TREES, /* Internal resources. @@ -100,6 +101,7 @@ static u16 mlxsw_res_ids[] = { [MLXSW_RES_ID_MAX_CPU_POLICERS] = 0x2A13, [MLXSW_RES_ID_MAX_VRS] = 0x2C01, [MLXSW_RES_ID_MAX_RIFS] = 0x2C02, + [MLXSW_RES_ID_MC_ERIF_LIST_ENTRIES] = 0x2C10, [MLXSW_RES_ID_MAX_LPM_TREES] = 0x2C30, }; -- cgit v1.2.3 From 2e654e33c5791332d7abf759fd9d34a39082ffc7 Mon Sep 17 00:00:00 2001 From: Yotam Gigi Date: Tue, 19 Sep 2017 10:00:16 +0200 Subject: mlxsw: reg: Add the Router Multicast Forwarding Table Version 2 register The RMFT-V2 register is used to configure and query the multicast table and will be used by the multicast router offloading logic. Signed-off-by: Yotam Gigi Reviewed-by: Ido Schimmel Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlxsw/reg.h | 142 ++++++++++++++++++++++++++++++ 1 file changed, 142 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlxsw/reg.h b/drivers/net/ethernet/mellanox/mlxsw/reg.h index 1778d7f5f843..046525ebe5ac 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/reg.h +++ b/drivers/net/ethernet/mellanox/mlxsw/reg.h @@ -5728,6 +5728,147 @@ static inline void mlxsw_reg_rigr2_erif_entry_pack(char *payload, int index, mlxsw_reg_rigr2_erif_entry_erif_set(payload, index, erif); } +/* RMFT-V2 - Router Multicast Forwarding Table Version 2 Register + * -------------------------------------------------------------- + * The RMFT_V2 register is used to configure and query the multicast table. + */ +#define MLXSW_REG_RMFT2_ID 0x8027 +#define MLXSW_REG_RMFT2_LEN 0x174 + +MLXSW_REG_DEFINE(rmft2, MLXSW_REG_RMFT2_ID, MLXSW_REG_RMFT2_LEN); + +/* reg_rmft2_v + * Valid + * Access: RW + */ +MLXSW_ITEM32(reg, rmft2, v, 0x00, 31, 1); + +enum mlxsw_reg_rmft2_type { + MLXSW_REG_RMFT2_TYPE_IPV4, + MLXSW_REG_RMFT2_TYPE_IPV6 +}; + +/* reg_rmft2_type + * Access: Index + */ +MLXSW_ITEM32(reg, rmft2, type, 0x00, 28, 2); + +enum mlxsw_sp_reg_rmft2_op { + /* For Write: + * Write operation. Used to write a new entry to the table. All RW + * fields are relevant for new entry. Activity bit is set for new + * entries - Note write with v (Valid) 0 will delete the entry. + * For Query: + * Read operation + */ + MLXSW_REG_RMFT2_OP_READ_WRITE, +}; + +/* reg_rmft2_op + * Operation. + * Access: OP + */ +MLXSW_ITEM32(reg, rmft2, op, 0x00, 20, 2); + +/* reg_rmft2_a + * Activity. Set for new entries. Set if a packet lookup has hit on the specific + * entry. + * Access: RO + */ +MLXSW_ITEM32(reg, rmft2, a, 0x00, 16, 1); + +/* reg_rmft2_offset + * Offset within the multicast forwarding table to write to. + * Access: Index + */ +MLXSW_ITEM32(reg, rmft2, offset, 0x00, 0, 16); + +/* reg_rmft2_virtual_router + * Virtual Router ID. Range from 0..cap_max_virtual_routers-1 + * Access: RW + */ +MLXSW_ITEM32(reg, rmft2, virtual_router, 0x04, 0, 16); + +enum mlxsw_reg_rmft2_irif_mask { + MLXSW_REG_RMFT2_IRIF_MASK_IGNORE, + MLXSW_REG_RMFT2_IRIF_MASK_COMPARE +}; + +/* reg_rmft2_irif_mask + * Ingress RIF mask. + * Access: RW + */ +MLXSW_ITEM32(reg, rmft2, irif_mask, 0x08, 24, 1); + +/* reg_rmft2_irif + * Ingress RIF index. + * Access: RW + */ +MLXSW_ITEM32(reg, rmft2, irif, 0x08, 0, 16); + +/* reg_rmft2_dip4 + * Destination IPv4 address + * Access: RW + */ +MLXSW_ITEM32(reg, rmft2, dip4, 0x1C, 0, 32); + +/* reg_rmft2_dip4_mask + * A bit that is set directs the TCAM to compare the corresponding bit in key. A + * bit that is clear directs the TCAM to ignore the corresponding bit in key. + * Access: RW + */ +MLXSW_ITEM32(reg, rmft2, dip4_mask, 0x2C, 0, 32); + +/* reg_rmft2_sip4 + * Source IPv4 address + * Access: RW + */ +MLXSW_ITEM32(reg, rmft2, sip4, 0x3C, 0, 32); + +/* reg_rmft2_sip4_mask + * A bit that is set directs the TCAM to compare the corresponding bit in key. A + * bit that is clear directs the TCAM to ignore the corresponding bit in key. + * Access: RW + */ +MLXSW_ITEM32(reg, rmft2, sip4_mask, 0x4C, 0, 32); + +/* reg_rmft2_flexible_action_set + * ACL action set. The only supported action types in this field and in any + * action-set pointed from here are as follows: + * 00h: ACTION_NULL + * 01h: ACTION_MAC_TTL, only TTL configuration is supported. + * 03h: ACTION_TRAP + * 06h: ACTION_QOS + * 08h: ACTION_POLICING_MONITORING + * 10h: ACTION_ROUTER_MC + * Access: RW + */ +MLXSW_ITEM_BUF(reg, rmft2, flexible_action_set, 0x80, + MLXSW_REG_FLEX_ACTION_SET_LEN); + +static inline void +mlxsw_reg_rmft2_ipv4_pack(char *payload, bool v, u16 offset, u16 virtual_router, + enum mlxsw_reg_rmft2_irif_mask irif_mask, u16 irif, + u32 dip4, u32 dip4_mask, u32 sip4, u32 sip4_mask, + const char *flexible_action_set) +{ + MLXSW_REG_ZERO(rmft2, payload); + mlxsw_reg_rmft2_v_set(payload, v); + mlxsw_reg_rmft2_type_set(payload, MLXSW_REG_RMFT2_TYPE_IPV4); + mlxsw_reg_rmft2_op_set(payload, MLXSW_REG_RMFT2_OP_READ_WRITE); + mlxsw_reg_rmft2_offset_set(payload, offset); + mlxsw_reg_rmft2_virtual_router_set(payload, virtual_router); + mlxsw_reg_rmft2_irif_mask_set(payload, irif_mask); + mlxsw_reg_rmft2_irif_set(payload, irif); + mlxsw_reg_rmft2_dip4_set(payload, dip4); + mlxsw_reg_rmft2_dip4_mask_set(payload, dip4_mask); + mlxsw_reg_rmft2_sip4_set(payload, sip4); + mlxsw_reg_rmft2_sip4_mask_set(payload, sip4_mask); + if (flexible_action_set) + mlxsw_reg_rmft2_flexible_action_set_memcpy_to(payload, + flexible_action_set); +} + /* MFCR - Management Fan Control Register * -------------------------------------- * This register controls the settings of the Fan Speed PWM mechanism. @@ -7000,6 +7141,7 @@ static const struct mlxsw_reg_info *mlxsw_reg_infos[] = { MLXSW_REG(raleu), MLXSW_REG(rauhtd), MLXSW_REG(rigr2), + MLXSW_REG(rmft2), MLXSW_REG(mfcr), MLXSW_REG(mfsc), MLXSW_REG(mfsm), -- cgit v1.2.3 From 4fc92846f65b0a3470b433c54251a40feae7b2d5 Mon Sep 17 00:00:00 2001 From: Yotam Gigi Date: Tue, 19 Sep 2017 10:00:17 +0200 Subject: mlxsw: reg: Add Router Rules Copy Register The RRCR register is used for copying and moving TCAM multicast routes from different offsets. It will be used to allow routes relocation for parman ops as part of the multicast router offloading logic. Signed-off-by: Yotam Gigi Reviewed-by: Ido Schimmel Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlxsw/reg.h | 60 +++++++++++++++++++++++++++++++ 1 file changed, 60 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlxsw/reg.h b/drivers/net/ethernet/mellanox/mlxsw/reg.h index 046525ebe5ac..31d120ae8dc6 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/reg.h +++ b/drivers/net/ethernet/mellanox/mlxsw/reg.h @@ -4680,6 +4680,65 @@ static inline void mlxsw_reg_ricnt_pack(char *payload, u32 index, MLXSW_REG_RICNT_COUNTER_SET_TYPE_BASIC); } +/* RRCR - Router Rules Copy Register Layout + * ---------------------------------------- + * This register is used for moving and copying route entry rules. + */ +#define MLXSW_REG_RRCR_ID 0x800F +#define MLXSW_REG_RRCR_LEN 0x24 + +MLXSW_REG_DEFINE(rrcr, MLXSW_REG_RRCR_ID, MLXSW_REG_RRCR_LEN); + +enum mlxsw_reg_rrcr_op { + /* Move rules */ + MLXSW_REG_RRCR_OP_MOVE, + /* Copy rules */ + MLXSW_REG_RRCR_OP_COPY, +}; + +/* reg_rrcr_op + * Access: WO + */ +MLXSW_ITEM32(reg, rrcr, op, 0x00, 28, 4); + +/* reg_rrcr_offset + * Offset within the region from which to copy/move. + * Access: Index + */ +MLXSW_ITEM32(reg, rrcr, offset, 0x00, 0, 16); + +/* reg_rrcr_size + * The number of rules to copy/move. + * Access: WO + */ +MLXSW_ITEM32(reg, rrcr, size, 0x04, 0, 16); + +/* reg_rrcr_table_id + * Identifier of the table on which to perform the operation. Encoding is the + * same as in RTAR.key_type + * Access: Index + */ +MLXSW_ITEM32(reg, rrcr, table_id, 0x10, 0, 4); + +/* reg_rrcr_dest_offset + * Offset within the region to which to copy/move + * Access: Index + */ +MLXSW_ITEM32(reg, rrcr, dest_offset, 0x20, 0, 16); + +static inline void mlxsw_reg_rrcr_pack(char *payload, enum mlxsw_reg_rrcr_op op, + u16 offset, u16 size, + enum mlxsw_reg_rtar_key_type table_id, + u16 dest_offset) +{ + MLXSW_REG_ZERO(rrcr, payload); + mlxsw_reg_rrcr_op_set(payload, op); + mlxsw_reg_rrcr_offset_set(payload, offset); + mlxsw_reg_rrcr_size_set(payload, size); + mlxsw_reg_rrcr_table_id_set(payload, table_id); + mlxsw_reg_rrcr_dest_offset_set(payload, dest_offset); +} + /* RALTA - Router Algorithmic LPM Tree Allocation Register * ------------------------------------------------------- * RALTA is used to allocate the LPM trees of the SHSPM method. @@ -7133,6 +7192,7 @@ static const struct mlxsw_reg_info *mlxsw_reg_infos[] = { MLXSW_REG(ratr), MLXSW_REG(rtdp), MLXSW_REG(ricnt), + MLXSW_REG(rrcr), MLXSW_REG(ralta), MLXSW_REG(ralst), MLXSW_REG(raltb), -- cgit v1.2.3 From 4af5964e58884855d28ae68ddf01279868e70853 Mon Sep 17 00:00:00 2001 From: Yotam Gigi Date: Tue, 19 Sep 2017 10:00:18 +0200 Subject: mlxsw: reg: Configure RIF to forward IPv4 multicast packets by default Turn on two bits on the Spectrum RIF configuration: - IPv4 multicast: when a multicast packet arrives on a RIF, send it to go through multicast routes lookup. - IPv4 multicast forwarding enable: when multicast packet arrives on a RIF, allow it to be forwarded by multicast routes. If this bit is not set, multicast packets will go through multicast routing lookup but will be dropped at the egress of the ports. Signed-off-by: Yotam Gigi Reviewed-by: Ido Schimmel Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlxsw/reg.h | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlxsw/reg.h b/drivers/net/ethernet/mellanox/mlxsw/reg.h index 31d120ae8dc6..c203e0dfa827 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/reg.h +++ b/drivers/net/ethernet/mellanox/mlxsw/reg.h @@ -3991,6 +3991,12 @@ MLXSW_ITEM32(reg, ritr, ipv4, 0x00, 29, 1); */ MLXSW_ITEM32(reg, ritr, ipv6, 0x00, 28, 1); +/* reg_ritr_ipv4_mc + * IPv4 multicast routing enable. + * Access: RW + */ +MLXSW_ITEM32(reg, ritr, ipv4_mc, 0x00, 27, 1); + enum mlxsw_reg_ritr_if_type { /* VLAN interface. */ MLXSW_REG_RITR_VLAN_IF, @@ -4048,6 +4054,14 @@ MLXSW_ITEM32(reg, ritr, ipv4_fe, 0x04, 29, 1); */ MLXSW_ITEM32(reg, ritr, ipv6_fe, 0x04, 28, 1); +/* reg_ritr_ipv4_mc_fe + * IPv4 Multicast Forwarding Enable. + * When disabled, forwarding is blocked but local traffic (traps and IP to me) + * will be enabled. + * Access: RW + */ +MLXSW_ITEM32(reg, ritr, ipv4_mc_fe, 0x04, 27, 1); + /* reg_ritr_lb_en * Loop-back filter enable for unicast packets. * If the flag is set then loop-back filter for unicast packets is @@ -4270,11 +4284,13 @@ static inline void mlxsw_reg_ritr_pack(char *payload, bool enable, mlxsw_reg_ritr_enable_set(payload, enable); mlxsw_reg_ritr_ipv4_set(payload, 1); mlxsw_reg_ritr_ipv6_set(payload, 1); + mlxsw_reg_ritr_ipv4_mc_set(payload, 1); mlxsw_reg_ritr_type_set(payload, type); mlxsw_reg_ritr_op_set(payload, op); mlxsw_reg_ritr_rif_set(payload, rif); mlxsw_reg_ritr_ipv4_fe_set(payload, 1); mlxsw_reg_ritr_ipv6_fe_set(payload, 1); + mlxsw_reg_ritr_ipv4_mc_fe_set(payload, 1); mlxsw_reg_ritr_lb_en_set(payload, 1); mlxsw_reg_ritr_virtual_router_set(payload, vr_id); mlxsw_reg_ritr_mtu_set(payload, mtu); -- cgit v1.2.3 From 91e4d59a4600afe64b44e013a7c1805bbfe61e59 Mon Sep 17 00:00:00 2001 From: Yotam Gigi Date: Tue, 19 Sep 2017 10:00:19 +0200 Subject: mlxsw: spectrum_router: Export RIF dev access function The mlxsw_sp_rif struct, defined as private struct in spectrum_router.c will be used in the multicast router source file. Due to the fact that the dev field will be needed by the multicast router logic, add an access function to it. Signed-off-by: Yotam Gigi Reviewed-by: Ido Schimmel Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c | 5 +++++ drivers/net/ethernet/mellanox/mlxsw/spectrum_router.h | 1 + 2 files changed, 6 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c index 2cfb3f5d092d..0bd93dc88ffa 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c @@ -5049,6 +5049,11 @@ int mlxsw_sp_rif_dev_ifindex(const struct mlxsw_sp_rif *rif) return rif->dev->ifindex; } +const struct net_device *mlxsw_sp_rif_dev(const struct mlxsw_sp_rif *rif) +{ + return rif->dev; +} + static struct mlxsw_sp_rif * mlxsw_sp_rif_create(struct mlxsw_sp *mlxsw_sp, const struct mlxsw_sp_rif_params *params) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.h index 345fcc4f38e9..ae4c99b3f2fc 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.h +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.h @@ -69,6 +69,7 @@ u16 mlxsw_sp_rif_index(const struct mlxsw_sp_rif *rif); u16 mlxsw_sp_ipip_lb_rif_index(const struct mlxsw_sp_rif_ipip_lb *rif); u16 mlxsw_sp_ipip_lb_ul_vr_id(const struct mlxsw_sp_rif_ipip_lb *rif); int mlxsw_sp_rif_dev_ifindex(const struct mlxsw_sp_rif *rif); +const struct net_device *mlxsw_sp_rif_dev(const struct mlxsw_sp_rif *rif); int mlxsw_sp_rif_counter_value_get(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_rif *rif, enum mlxsw_sp_rif_counter_dir dir, -- cgit v1.2.3 From b48cfc80ce9c27368e331d9aa742314487b0ee12 Mon Sep 17 00:00:00 2001 From: Yotam Gigi Date: Tue, 19 Sep 2017 10:00:20 +0200 Subject: mlxsw: spectrum: Add multicast router traps and trap groups Add three new traps needed for multicast routing: - PIM: Trap for PIM protocol control packets. - RPF: Trap for packets that fail the RPF check on a specific hardware route entry. - MULTICAST: Generic trap for multicast. It is used for routes that trap the packets to the CPU. The RPF and MULTICAST traps have rate limiters as these traps may have line-rate of packets trapped. The PIM trap has a rate limiter similarly to other L3 control protocols. The rate limiters are implemented by adding three new trap groups for the newly introduced traps. Signed-off-by: Yotam Gigi Reviewed-by: Ido Schimmel Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlxsw/reg.h | 3 +++ drivers/net/ethernet/mellanox/mlxsw/spectrum.c | 10 ++++++++++ drivers/net/ethernet/mellanox/mlxsw/trap.h | 4 ++++ 3 files changed, 17 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlxsw/reg.h b/drivers/net/ethernet/mellanox/mlxsw/reg.h index c203e0dfa827..17eba19100de 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/reg.h +++ b/drivers/net/ethernet/mellanox/mlxsw/reg.h @@ -3681,12 +3681,15 @@ enum mlxsw_reg_htgt_trap_group { MLXSW_REG_HTGT_TRAP_GROUP_SP_IGMP, MLXSW_REG_HTGT_TRAP_GROUP_SP_BGP, MLXSW_REG_HTGT_TRAP_GROUP_SP_OSPF, + MLXSW_REG_HTGT_TRAP_GROUP_SP_PIM, + MLXSW_REG_HTGT_TRAP_GROUP_SP_MULTICAST, MLXSW_REG_HTGT_TRAP_GROUP_SP_ARP, MLXSW_REG_HTGT_TRAP_GROUP_SP_HOST_MISS, MLXSW_REG_HTGT_TRAP_GROUP_SP_ROUTER_EXP, MLXSW_REG_HTGT_TRAP_GROUP_SP_REMOTE_ROUTE, MLXSW_REG_HTGT_TRAP_GROUP_SP_IP2ME, MLXSW_REG_HTGT_TRAP_GROUP_SP_DHCP, + MLXSW_REG_HTGT_TRAP_GROUP_SP_RPF, MLXSW_REG_HTGT_TRAP_GROUP_SP_EVENT, MLXSW_REG_HTGT_TRAP_GROUP_SP_IPV6_MLD, MLXSW_REG_HTGT_TRAP_GROUP_SP_IPV6_ND, diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c index 6ba6ff276b17..e9b94430afed 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c @@ -3421,6 +3421,10 @@ static const struct mlxsw_listener mlxsw_sp_listener[] = { false, SP_IP2ME, DISCARD), /* ACL trap */ MLXSW_SP_RXL_NO_MARK(ACL0, TRAP_TO_CPU, IP2ME, false), + /* Multicast Router Traps */ + MLXSW_SP_RXL_MARK(IPV4_PIM, TRAP_TO_CPU, PIM, false), + MLXSW_SP_RXL_MARK(RPF, TRAP_TO_CPU, RPF, false), + MLXSW_SP_RXL_MARK(ACL1, TRAP_TO_CPU, MULTICAST, false), }; static int mlxsw_sp_cpu_policers_set(struct mlxsw_core *mlxsw_core) @@ -3446,6 +3450,8 @@ static int mlxsw_sp_cpu_policers_set(struct mlxsw_core *mlxsw_core) case MLXSW_REG_HTGT_TRAP_GROUP_SP_LACP: case MLXSW_REG_HTGT_TRAP_GROUP_SP_LLDP: case MLXSW_REG_HTGT_TRAP_GROUP_SP_OSPF: + case MLXSW_REG_HTGT_TRAP_GROUP_SP_PIM: + case MLXSW_REG_HTGT_TRAP_GROUP_SP_RPF: rate = 128; burst_size = 7; break; @@ -3461,6 +3467,7 @@ static int mlxsw_sp_cpu_policers_set(struct mlxsw_core *mlxsw_core) case MLXSW_REG_HTGT_TRAP_GROUP_SP_ROUTER_EXP: case MLXSW_REG_HTGT_TRAP_GROUP_SP_REMOTE_ROUTE: case MLXSW_REG_HTGT_TRAP_GROUP_SP_IPV6_ND: + case MLXSW_REG_HTGT_TRAP_GROUP_SP_MULTICAST: rate = 1024; burst_size = 7; break; @@ -3506,6 +3513,7 @@ static int mlxsw_sp_trap_groups_set(struct mlxsw_core *mlxsw_core) case MLXSW_REG_HTGT_TRAP_GROUP_SP_LACP: case MLXSW_REG_HTGT_TRAP_GROUP_SP_LLDP: case MLXSW_REG_HTGT_TRAP_GROUP_SP_OSPF: + case MLXSW_REG_HTGT_TRAP_GROUP_SP_PIM: priority = 5; tc = 5; break; @@ -3522,12 +3530,14 @@ static int mlxsw_sp_trap_groups_set(struct mlxsw_core *mlxsw_core) break; case MLXSW_REG_HTGT_TRAP_GROUP_SP_ARP: case MLXSW_REG_HTGT_TRAP_GROUP_SP_IPV6_ND: + case MLXSW_REG_HTGT_TRAP_GROUP_SP_RPF: priority = 2; tc = 2; break; case MLXSW_REG_HTGT_TRAP_GROUP_SP_HOST_MISS: case MLXSW_REG_HTGT_TRAP_GROUP_SP_ROUTER_EXP: case MLXSW_REG_HTGT_TRAP_GROUP_SP_REMOTE_ROUTE: + case MLXSW_REG_HTGT_TRAP_GROUP_SP_MULTICAST: priority = 1; tc = 1; break; diff --git a/drivers/net/ethernet/mellanox/mlxsw/trap.h b/drivers/net/ethernet/mellanox/mlxsw/trap.h index f396a1fef633..a98103539f6b 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/trap.h +++ b/drivers/net/ethernet/mellanox/mlxsw/trap.h @@ -62,6 +62,8 @@ enum { MLXSW_TRAP_ID_TTLERROR = 0x53, MLXSW_TRAP_ID_LBERROR = 0x54, MLXSW_TRAP_ID_IPV4_OSPF = 0x55, + MLXSW_TRAP_ID_IPV4_PIM = 0x58, + MLXSW_TRAP_ID_RPF = 0x5C, MLXSW_TRAP_ID_IP2ME = 0x5F, MLXSW_TRAP_ID_IPV6_UNSPECIFIED_ADDRESS = 0x60, MLXSW_TRAP_ID_IPV6_LINK_LOCAL_DEST = 0x61, @@ -89,6 +91,8 @@ enum { MLXSW_TRAP_ID_ROUTER_ALERT_IPV4 = 0xD6, MLXSW_TRAP_ID_ROUTER_ALERT_IPV6 = 0xD7, MLXSW_TRAP_ID_ACL0 = 0x1C0, + /* Multicast trap used for routes with trap action */ + MLXSW_TRAP_ID_ACL1 = 0x1C1, MLXSW_TRAP_ID_MAX = 0x1FF }; -- cgit v1.2.3 From bffa72cf7f9df842f0016ba03586039296b4caaf Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 19 Sep 2017 05:14:24 -0700 Subject: net: sk_buff rbnode reorg skb->rbnode shares space with skb->next, skb->prev and skb->tstamp Current uses (TCP receive ofo queue and netem) need to save/restore tstamp, while skb->dev is either NULL (TCP) or a constant for a given queue (netem). Since we plan using an RB tree for TCP retransmit queue to speedup SACK processing with large BDP, this patch exchanges skb->dev and skb->tstamp. This saves some overhead in both TCP and netem. v2: removes the swtstamp field from struct tcp_skb_cb Signed-off-by: Eric Dumazet Cc: Soheil Hassas Yeganeh Cc: Wei Wang Cc: Willem de Bruijn Acked-by: Soheil Hassas Yeganeh Signed-off-by: David S. Miller --- include/linux/skbuff.h | 16 ++++++++-------- include/net/tcp.h | 6 ------ net/ipv4/tcp_input.c | 27 +++++---------------------- net/sched/sch_netem.c | 7 ++++--- 4 files changed, 17 insertions(+), 39 deletions(-) diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 72299ef00061..492828801acb 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -661,8 +661,12 @@ struct sk_buff { struct sk_buff *prev; union { - ktime_t tstamp; - u64 skb_mstamp; + struct net_device *dev; + /* Some protocols might use this space to store information, + * while device pointer would be NULL. + * UDP receive path is one user. + */ + unsigned long dev_scratch; }; }; struct rb_node rbnode; /* used in netem & tcp stack */ @@ -670,12 +674,8 @@ struct sk_buff { struct sock *sk; union { - struct net_device *dev; - /* Some protocols might use this space to store information, - * while device pointer would be NULL. - * UDP receive path is one user. - */ - unsigned long dev_scratch; + ktime_t tstamp; + u64 skb_mstamp; }; /* * This is the control buffer. It is free to use for every diff --git a/include/net/tcp.h b/include/net/tcp.h index b510f284427a..49a8a46466f3 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -797,12 +797,6 @@ struct tcp_skb_cb { u16 tcp_gso_segs; u16 tcp_gso_size; }; - - /* Used to stash the receive timestamp while this skb is in the - * out of order queue, as skb->tstamp is overwritten by the - * rbnode. - */ - ktime_t swtstamp; }; __u8 tcp_flags; /* TCP header flags. (tcp[13]) */ diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index bddf724f5c02..db9bb46b5776 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -4266,11 +4266,6 @@ static void tcp_sack_remove(struct tcp_sock *tp) tp->rx_opt.num_sacks = num_sacks; } -enum tcp_queue { - OOO_QUEUE, - RCV_QUEUE, -}; - /** * tcp_try_coalesce - try to merge skb to prior one * @sk: socket @@ -4286,7 +4281,6 @@ enum tcp_queue { * Returns true if caller should free @from instead of queueing it */ static bool tcp_try_coalesce(struct sock *sk, - enum tcp_queue dest, struct sk_buff *to, struct sk_buff *from, bool *fragstolen) @@ -4311,10 +4305,7 @@ static bool tcp_try_coalesce(struct sock *sk, if (TCP_SKB_CB(from)->has_rxtstamp) { TCP_SKB_CB(to)->has_rxtstamp = true; - if (dest == OOO_QUEUE) - TCP_SKB_CB(to)->swtstamp = TCP_SKB_CB(from)->swtstamp; - else - to->tstamp = from->tstamp; + to->tstamp = from->tstamp; } return true; @@ -4351,9 +4342,6 @@ static void tcp_ofo_queue(struct sock *sk) } p = rb_next(p); rb_erase(&skb->rbnode, &tp->out_of_order_queue); - /* Replace tstamp which was stomped by rbnode */ - if (TCP_SKB_CB(skb)->has_rxtstamp) - skb->tstamp = TCP_SKB_CB(skb)->swtstamp; if (unlikely(!after(TCP_SKB_CB(skb)->end_seq, tp->rcv_nxt))) { SOCK_DEBUG(sk, "ofo packet was already received\n"); @@ -4365,8 +4353,7 @@ static void tcp_ofo_queue(struct sock *sk) TCP_SKB_CB(skb)->end_seq); tail = skb_peek_tail(&sk->sk_receive_queue); - eaten = tail && tcp_try_coalesce(sk, RCV_QUEUE, - tail, skb, &fragstolen); + eaten = tail && tcp_try_coalesce(sk, tail, skb, &fragstolen); tcp_rcv_nxt_update(tp, TCP_SKB_CB(skb)->end_seq); fin = TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN; if (!eaten) @@ -4420,10 +4407,6 @@ static void tcp_data_queue_ofo(struct sock *sk, struct sk_buff *skb) return; } - /* Stash tstamp to avoid being stomped on by rbnode */ - if (TCP_SKB_CB(skb)->has_rxtstamp) - TCP_SKB_CB(skb)->swtstamp = skb->tstamp; - /* Disable header prediction. */ tp->pred_flags = 0; inet_csk_schedule_ack(sk); @@ -4451,7 +4434,7 @@ static void tcp_data_queue_ofo(struct sock *sk, struct sk_buff *skb) /* In the typical case, we are adding an skb to the end of the list. * Use of ooo_last_skb avoids the O(Log(N)) rbtree lookup. */ - if (tcp_try_coalesce(sk, OOO_QUEUE, tp->ooo_last_skb, + if (tcp_try_coalesce(sk, tp->ooo_last_skb, skb, &fragstolen)) { coalesce_done: tcp_grow_window(sk, skb); @@ -4502,7 +4485,7 @@ coalesce_done: __kfree_skb(skb1); goto merge_right; } - } else if (tcp_try_coalesce(sk, OOO_QUEUE, skb1, + } else if (tcp_try_coalesce(sk, skb1, skb, &fragstolen)) { goto coalesce_done; } @@ -4554,7 +4537,7 @@ static int __must_check tcp_queue_rcv(struct sock *sk, struct sk_buff *skb, int __skb_pull(skb, hdrlen); eaten = (tail && - tcp_try_coalesce(sk, RCV_QUEUE, tail, + tcp_try_coalesce(sk, tail, skb, fragstolen)) ? 1 : 0; tcp_rcv_nxt_update(tcp_sk(sk), TCP_SKB_CB(skb)->end_seq); if (!eaten) { diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c index b1266e75ca43..063a4bdb9ee6 100644 --- a/net/sched/sch_netem.c +++ b/net/sched/sch_netem.c @@ -146,7 +146,6 @@ struct netem_sched_data { */ struct netem_skb_cb { psched_time_t time_to_send; - ktime_t tstamp_save; }; @@ -561,7 +560,6 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch, } cb->time_to_send = now + delay; - cb->tstamp_save = skb->tstamp; ++q->counter; tfifo_enqueue(skb, sch); } else { @@ -629,7 +627,10 @@ deliver: qdisc_qstats_backlog_dec(sch, skb); skb->next = NULL; skb->prev = NULL; - skb->tstamp = netem_skb_cb(skb)->tstamp_save; + /* skb->dev shares skb->rbnode area, + * we need to restore its value. + */ + skb->dev = qdisc_dev(sch); #ifdef CONFIG_NET_CLS_ACT /* -- cgit v1.2.3 From 69e33b2754ea4fbe4fdd2d18bc8ca05d8670a5c2 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Tue, 19 Sep 2017 14:42:17 +0200 Subject: selftests: rtnetlink.sh: add test case for device ifalias Signed-off-by: Florian Westphal Signed-off-by: David S. Miller --- tools/testing/selftests/net/rtnetlink.sh | 57 ++++++++++++++++++++++++++++++++ 1 file changed, 57 insertions(+) diff --git a/tools/testing/selftests/net/rtnetlink.sh b/tools/testing/selftests/net/rtnetlink.sh index 57b5ff576240..4b48de565cae 100755 --- a/tools/testing/selftests/net/rtnetlink.sh +++ b/tools/testing/selftests/net/rtnetlink.sh @@ -15,6 +15,14 @@ check_err() fi } +# same but inverted -- used when command must fail for test to pass +check_fail() +{ + if [ $1 -eq 0 ]; then + ret=1 + fi +} + kci_add_dummy() { ip link add name "$devdummy" type dummy @@ -235,6 +243,54 @@ kci_test_addrlabel() echo "PASS: ipv6 addrlabel" } +kci_test_ifalias() +{ + ret=0 + namewant=$(uuidgen) + syspathname="/sys/class/net/$devdummy/ifalias" + + ip link set dev "$devdummy" alias "$namewant" + check_err $? + + if [ $ret -ne 0 ]; then + echo "FAIL: cannot set interface alias of $devdummy to $namewant" + return 1 + fi + + ip link show "$devdummy" | grep -q "alias $namewant" + check_err $? + + if [ -r "$syspathname" ] ; then + read namehave < "$syspathname" + if [ "$namewant" != "$namehave" ]; then + echo "FAIL: did set ifalias $namewant but got $namehave" + return 1 + fi + + namewant=$(uuidgen) + echo "$namewant" > "$syspathname" + ip link show "$devdummy" | grep -q "alias $namewant" + check_err $? + + # sysfs interface allows to delete alias again + echo "" > "$syspathname" + + ip link show "$devdummy" | grep -q "alias $namewant" + check_fail $? + + # re-add the alias -- kernel should free mem when dummy dev is removed + ip link set dev "$devdummy" alias "$namewant" + check_err $? + fi + + if [ $ret -ne 0 ]; then + echo "FAIL: set interface alias $devdummy to $namewant" + return 1 + fi + + echo "PASS: set ifalias $namewant for $devdummy" +} + kci_test_rtnl() { kci_add_dummy @@ -249,6 +305,7 @@ kci_test_rtnl() kci_test_gre kci_test_bridge kci_test_addrlabel + kci_test_ifalias kci_del_dummy } -- cgit v1.2.3 From f5619866592c65adc087364cc1a3ba709201ea26 Mon Sep 17 00:00:00 2001 From: Vivien Didelot Date: Tue, 19 Sep 2017 11:56:57 -0400 Subject: net: dsa: remove copy of master ethtool_ops There is no need to store a copy of the master ethtool ops, storing the original pointer in DSA and the new one in the master netdev itself is enough. In the meantime, set orig_ethtool_ops to NULL when restoring the master ethtool ops and check the presence of the master original ethtool ops as well as its needed functions before calling them. Signed-off-by: Vivien Didelot Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller --- include/net/dsa.h | 1 - net/dsa/dsa.c | 8 ++++---- net/dsa/slave.c | 19 +++++++++++-------- 3 files changed, 15 insertions(+), 13 deletions(-) diff --git a/include/net/dsa.h b/include/net/dsa.h index dd44d6ce1097..8dee216a5a9b 100644 --- a/include/net/dsa.h +++ b/include/net/dsa.h @@ -188,7 +188,6 @@ struct dsa_port { /* * Original copy of the master netdev ethtool_ops */ - struct ethtool_ops ethtool_ops; const struct ethtool_ops *orig_ethtool_ops; }; diff --git a/net/dsa/dsa.c b/net/dsa/dsa.c index 03c58b0eb082..abadf7b49236 100644 --- a/net/dsa/dsa.c +++ b/net/dsa/dsa.c @@ -124,11 +124,10 @@ int dsa_cpu_port_ethtool_setup(struct dsa_port *cpu_dp) if (!cpu_ops) return -ENOMEM; - memcpy(&cpu_dp->ethtool_ops, master->ethtool_ops, - sizeof(struct ethtool_ops)); cpu_dp->orig_ethtool_ops = master->ethtool_ops; - memcpy(cpu_ops, &cpu_dp->ethtool_ops, - sizeof(struct ethtool_ops)); + if (cpu_dp->orig_ethtool_ops) + memcpy(cpu_ops, cpu_dp->orig_ethtool_ops, sizeof(*cpu_ops)); + dsa_cpu_port_ethtool_init(cpu_ops); master->ethtool_ops = cpu_ops; @@ -138,6 +137,7 @@ int dsa_cpu_port_ethtool_setup(struct dsa_port *cpu_dp) void dsa_cpu_port_ethtool_restore(struct dsa_port *cpu_dp) { cpu_dp->netdev->ethtool_ops = cpu_dp->orig_ethtool_ops; + cpu_dp->orig_ethtool_ops = NULL; } void dsa_cpu_dsa_destroy(struct dsa_port *port) diff --git a/net/dsa/slave.c b/net/dsa/slave.c index 2afa99506f8b..2ff4f907d137 100644 --- a/net/dsa/slave.c +++ b/net/dsa/slave.c @@ -574,12 +574,13 @@ static void dsa_cpu_port_get_ethtool_stats(struct net_device *dev, struct dsa_switch_tree *dst = dev->dsa_ptr; struct dsa_port *cpu_dp = dsa_get_cpu_port(dst); struct dsa_switch *ds = cpu_dp->ds; + const struct ethtool_ops *ops = cpu_dp->orig_ethtool_ops; s8 cpu_port = cpu_dp->index; int count = 0; - if (cpu_dp->ethtool_ops.get_sset_count) { - count = cpu_dp->ethtool_ops.get_sset_count(dev, ETH_SS_STATS); - cpu_dp->ethtool_ops.get_ethtool_stats(dev, stats, data); + if (ops && ops->get_sset_count && ops->get_ethtool_stats) { + count = ops->get_sset_count(dev, ETH_SS_STATS); + ops->get_ethtool_stats(dev, stats, data); } if (ds->ops->get_ethtool_stats) @@ -591,10 +592,11 @@ static int dsa_cpu_port_get_sset_count(struct net_device *dev, int sset) struct dsa_switch_tree *dst = dev->dsa_ptr; struct dsa_port *cpu_dp = dsa_get_cpu_port(dst); struct dsa_switch *ds = cpu_dp->ds; + const struct ethtool_ops *ops = cpu_dp->orig_ethtool_ops; int count = 0; - if (cpu_dp->ethtool_ops.get_sset_count) - count += cpu_dp->ethtool_ops.get_sset_count(dev, sset); + if (ops && ops->get_sset_count) + count += ops->get_sset_count(dev, sset); if (sset == ETH_SS_STATS && ds->ops->get_sset_count) count += ds->ops->get_sset_count(ds); @@ -608,6 +610,7 @@ static void dsa_cpu_port_get_strings(struct net_device *dev, struct dsa_switch_tree *dst = dev->dsa_ptr; struct dsa_port *cpu_dp = dsa_get_cpu_port(dst); struct dsa_switch *ds = cpu_dp->ds; + const struct ethtool_ops *ops = cpu_dp->orig_ethtool_ops; s8 cpu_port = cpu_dp->index; int len = ETH_GSTRING_LEN; int mcount = 0, count; @@ -619,9 +622,9 @@ static void dsa_cpu_port_get_strings(struct net_device *dev, /* We do not want to be NULL-terminated, since this is a prefix */ pfx[sizeof(pfx) - 1] = '_'; - if (cpu_dp->ethtool_ops.get_sset_count) { - mcount = cpu_dp->ethtool_ops.get_sset_count(dev, ETH_SS_STATS); - cpu_dp->ethtool_ops.get_strings(dev, stringset, data); + if (ops && ops->get_sset_count && ops->get_strings) { + mcount = ops->get_sset_count(dev, ETH_SS_STATS); + ops->get_strings(dev, stringset, data); } if (stringset == ETH_SS_STATS && ds->ops->get_strings) { -- cgit v1.2.3 From cd8d7dd41bfddaa02e34db35abfab14ce4584dee Mon Sep 17 00:00:00 2001 From: Vivien Didelot Date: Tue, 19 Sep 2017 11:56:58 -0400 Subject: net: dsa: setup master ethtool unconditionally When a DSA switch tree is meant to be applied, it already has a CPU port. Thus remove the condition of dst->cpu_dp. Moreover, the next lines access dst->cpu_dp unconditionally. Signed-off-by: Vivien Didelot Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller --- net/dsa/dsa2.c | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) diff --git a/net/dsa/dsa2.c b/net/dsa/dsa2.c index 873af0108e24..bd19304f862f 100644 --- a/net/dsa/dsa2.c +++ b/net/dsa/dsa2.c @@ -433,11 +433,9 @@ static int dsa_dst_apply(struct dsa_switch_tree *dst) return err; } - if (dst->cpu_dp) { - err = dsa_cpu_port_ethtool_setup(dst->cpu_dp); - if (err) - return err; - } + err = dsa_cpu_port_ethtool_setup(dst->cpu_dp); + if (err) + return err; /* If we use a tagging format that doesn't have an ethertype * field, make sure that all packets from this point on get @@ -474,10 +472,8 @@ static void dsa_dst_unapply(struct dsa_switch_tree *dst) dsa_ds_unapply(dst, ds); } - if (dst->cpu_dp) { - dsa_cpu_port_ethtool_restore(dst->cpu_dp); - dst->cpu_dp = NULL; - } + dsa_cpu_port_ethtool_restore(dst->cpu_dp); + dst->cpu_dp = NULL; pr_info("DSA: tree %d unapplied\n", dst->tree); dst->applied = false; -- cgit v1.2.3 From 1943563dfd4b3a1f9dc102f056813112d29bb60f Mon Sep 17 00:00:00 2001 From: Vivien Didelot Date: Tue, 19 Sep 2017 11:56:59 -0400 Subject: net: dsa: setup master ethtool after dsa_ptr DSA overrides the master's ethtool ops so that we can inject its CPU port's statistics. Because of that, we need to setup the ethtool ops after the master's dsa_ptr pointer has been assigned, not before. This patch setups the ethtool ops after dsa_ptr is assigned, and restores them before it gets cleared. Signed-off-by: Vivien Didelot Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller --- net/dsa/dsa2.c | 12 +++++++----- net/dsa/legacy.c | 10 +++------- 2 files changed, 10 insertions(+), 12 deletions(-) diff --git a/net/dsa/dsa2.c b/net/dsa/dsa2.c index bd19304f862f..032f8bc3e788 100644 --- a/net/dsa/dsa2.c +++ b/net/dsa/dsa2.c @@ -433,16 +433,17 @@ static int dsa_dst_apply(struct dsa_switch_tree *dst) return err; } - err = dsa_cpu_port_ethtool_setup(dst->cpu_dp); - if (err) - return err; - /* If we use a tagging format that doesn't have an ethertype * field, make sure that all packets from this point on get * sent to the tag format's receive function. */ wmb(); dst->cpu_dp->netdev->dsa_ptr = dst; + + err = dsa_cpu_port_ethtool_setup(dst->cpu_dp); + if (err) + return err; + dst->applied = true; return 0; @@ -456,6 +457,8 @@ static void dsa_dst_unapply(struct dsa_switch_tree *dst) if (!dst->applied) return; + dsa_cpu_port_ethtool_restore(dst->cpu_dp); + dst->cpu_dp->netdev->dsa_ptr = NULL; /* If we used a tagging format that doesn't have an ethertype @@ -472,7 +475,6 @@ static void dsa_dst_unapply(struct dsa_switch_tree *dst) dsa_ds_unapply(dst, ds); } - dsa_cpu_port_ethtool_restore(dst->cpu_dp); dst->cpu_dp = NULL; pr_info("DSA: tree %d unapplied\n", dst->tree); diff --git a/net/dsa/legacy.c b/net/dsa/legacy.c index 91e6f7981d39..163910699db7 100644 --- a/net/dsa/legacy.c +++ b/net/dsa/legacy.c @@ -206,10 +206,6 @@ static int dsa_switch_setup_one(struct dsa_switch *ds, netdev_err(master, "[%d] : can't configure CPU and DSA ports\n", index); - ret = dsa_cpu_port_ethtool_setup(ds->dst->cpu_dp); - if (ret) - return ret; - return 0; } @@ -606,7 +602,7 @@ static int dsa_setup_dst(struct dsa_switch_tree *dst, struct net_device *dev, wmb(); dev->dsa_ptr = dst; - return 0; + return dsa_cpu_port_ethtool_setup(dst->cpu_dp); } static int dsa_probe(struct platform_device *pdev) @@ -671,6 +667,8 @@ static void dsa_remove_dst(struct dsa_switch_tree *dst) { int i; + dsa_cpu_port_ethtool_restore(dst->cpu_dp); + dst->cpu_dp->netdev->dsa_ptr = NULL; /* If we used a tagging format that doesn't have an ethertype @@ -686,8 +684,6 @@ static void dsa_remove_dst(struct dsa_switch_tree *dst) dsa_switch_destroy(ds); } - dsa_cpu_port_ethtool_restore(dst->cpu_dp); - dev_put(dst->cpu_dp->netdev); } -- cgit v1.2.3 From f2f2356685bcaf1063859356fc65a5ac808b1382 Mon Sep 17 00:00:00 2001 From: Vivien Didelot Date: Tue, 19 Sep 2017 11:57:00 -0400 Subject: net: dsa: move master ethtool code DSA overrides the master device ethtool ops, so that it can inject stats from its dedicated switch CPU port as well. The related code is currently split in dsa.c and slave.c, but it only scopes the master net device. Move it to a new master.c DSA core file. This file will be later extented with master net device specific code. Signed-off-by: Vivien Didelot Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller --- net/dsa/Makefile | 2 +- net/dsa/dsa.c | 28 ------------- net/dsa/dsa2.c | 4 +- net/dsa/dsa_priv.h | 7 ++-- net/dsa/legacy.c | 4 +- net/dsa/master.c | 120 +++++++++++++++++++++++++++++++++++++++++++++++++++++ net/dsa/slave.c | 83 ------------------------------------ 7 files changed, 129 insertions(+), 119 deletions(-) create mode 100644 net/dsa/master.c diff --git a/net/dsa/Makefile b/net/dsa/Makefile index fcce25da937c..2e7ac8bab19d 100644 --- a/net/dsa/Makefile +++ b/net/dsa/Makefile @@ -1,6 +1,6 @@ # the core obj-$(CONFIG_NET_DSA) += dsa_core.o -dsa_core-y += dsa.o dsa2.o legacy.o port.o slave.o switch.o +dsa_core-y += dsa.o dsa2.o legacy.o master.o port.o slave.o switch.o # tagging formats dsa_core-$(CONFIG_NET_DSA_TAG_BRCM) += tag_brcm.o diff --git a/net/dsa/dsa.c b/net/dsa/dsa.c index abadf7b49236..81c852e32821 100644 --- a/net/dsa/dsa.c +++ b/net/dsa/dsa.c @@ -112,34 +112,6 @@ const struct dsa_device_ops *dsa_resolve_tag_protocol(int tag_protocol) return ops; } -int dsa_cpu_port_ethtool_setup(struct dsa_port *cpu_dp) -{ - struct dsa_switch *ds = cpu_dp->ds; - struct net_device *master; - struct ethtool_ops *cpu_ops; - - master = cpu_dp->netdev; - - cpu_ops = devm_kzalloc(ds->dev, sizeof(*cpu_ops), GFP_KERNEL); - if (!cpu_ops) - return -ENOMEM; - - cpu_dp->orig_ethtool_ops = master->ethtool_ops; - if (cpu_dp->orig_ethtool_ops) - memcpy(cpu_ops, cpu_dp->orig_ethtool_ops, sizeof(*cpu_ops)); - - dsa_cpu_port_ethtool_init(cpu_ops); - master->ethtool_ops = cpu_ops; - - return 0; -} - -void dsa_cpu_port_ethtool_restore(struct dsa_port *cpu_dp) -{ - cpu_dp->netdev->ethtool_ops = cpu_dp->orig_ethtool_ops; - cpu_dp->orig_ethtool_ops = NULL; -} - void dsa_cpu_dsa_destroy(struct dsa_port *port) { struct device_node *port_dn = port->dn; diff --git a/net/dsa/dsa2.c b/net/dsa/dsa2.c index 032f8bc3e788..dcccaebde708 100644 --- a/net/dsa/dsa2.c +++ b/net/dsa/dsa2.c @@ -440,7 +440,7 @@ static int dsa_dst_apply(struct dsa_switch_tree *dst) wmb(); dst->cpu_dp->netdev->dsa_ptr = dst; - err = dsa_cpu_port_ethtool_setup(dst->cpu_dp); + err = dsa_master_ethtool_setup(dst->cpu_dp->netdev); if (err) return err; @@ -457,7 +457,7 @@ static void dsa_dst_unapply(struct dsa_switch_tree *dst) if (!dst->applied) return; - dsa_cpu_port_ethtool_restore(dst->cpu_dp); + dsa_master_ethtool_restore(dst->cpu_dp->netdev); dst->cpu_dp->netdev->dsa_ptr = NULL; diff --git a/net/dsa/dsa_priv.h b/net/dsa/dsa_priv.h index 9c3eeb72462d..f616b3444418 100644 --- a/net/dsa/dsa_priv.h +++ b/net/dsa/dsa_priv.h @@ -97,8 +97,6 @@ struct dsa_slave_priv { int dsa_cpu_dsa_setup(struct dsa_port *port); void dsa_cpu_dsa_destroy(struct dsa_port *dport); const struct dsa_device_ops *dsa_resolve_tag_protocol(int tag_protocol); -int dsa_cpu_port_ethtool_setup(struct dsa_port *cpu_dp); -void dsa_cpu_port_ethtool_restore(struct dsa_port *cpu_dp); bool dsa_schedule_work(struct work_struct *work); /* legacy.c */ @@ -112,6 +110,10 @@ int dsa_legacy_fdb_del(struct ndmsg *ndm, struct nlattr *tb[], struct net_device *dev, const unsigned char *addr, u16 vid); +/* master.c */ +int dsa_master_ethtool_setup(struct net_device *dev); +void dsa_master_ethtool_restore(struct net_device *dev); + /* port.c */ int dsa_port_set_state(struct dsa_port *dp, u8 state, struct switchdev_trans *trans); @@ -139,7 +141,6 @@ int dsa_port_vlan_del(struct dsa_port *dp, /* slave.c */ extern const struct dsa_device_ops notag_netdev_ops; void dsa_slave_mii_bus_init(struct dsa_switch *ds); -void dsa_cpu_port_ethtool_init(struct ethtool_ops *ops); int dsa_slave_create(struct dsa_port *port, const char *name); void dsa_slave_destroy(struct net_device *slave_dev); int dsa_slave_suspend(struct net_device *slave_dev); diff --git a/net/dsa/legacy.c b/net/dsa/legacy.c index 163910699db7..ae505d8e4417 100644 --- a/net/dsa/legacy.c +++ b/net/dsa/legacy.c @@ -602,7 +602,7 @@ static int dsa_setup_dst(struct dsa_switch_tree *dst, struct net_device *dev, wmb(); dev->dsa_ptr = dst; - return dsa_cpu_port_ethtool_setup(dst->cpu_dp); + return dsa_master_ethtool_setup(dst->cpu_dp->netdev); } static int dsa_probe(struct platform_device *pdev) @@ -667,7 +667,7 @@ static void dsa_remove_dst(struct dsa_switch_tree *dst) { int i; - dsa_cpu_port_ethtool_restore(dst->cpu_dp); + dsa_master_ethtool_restore(dst->cpu_dp->netdev); dst->cpu_dp->netdev->dsa_ptr = NULL; diff --git a/net/dsa/master.c b/net/dsa/master.c new file mode 100644 index 000000000000..5e5147ec5a44 --- /dev/null +++ b/net/dsa/master.c @@ -0,0 +1,120 @@ +/* + * Handling of a master device, switching frames via its switch fabric CPU port + * + * Copyright (c) 2017 Savoir-faire Linux Inc. + * Vivien Didelot + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + */ + +#include "dsa_priv.h" + +static void dsa_master_get_ethtool_stats(struct net_device *dev, + struct ethtool_stats *stats, + uint64_t *data) +{ + struct dsa_switch_tree *dst = dev->dsa_ptr; + struct dsa_port *port = dst->cpu_dp; + struct dsa_switch *ds = port->ds; + const struct ethtool_ops *ops = port->orig_ethtool_ops; + int count = 0; + + if (ops && ops->get_sset_count && ops->get_ethtool_stats) { + count = ops->get_sset_count(dev, ETH_SS_STATS); + ops->get_ethtool_stats(dev, stats, data); + } + + if (ds->ops->get_ethtool_stats) + ds->ops->get_ethtool_stats(ds, port->index, data + count); +} + +static int dsa_master_get_sset_count(struct net_device *dev, int sset) +{ + struct dsa_switch_tree *dst = dev->dsa_ptr; + struct dsa_port *port = dst->cpu_dp; + struct dsa_switch *ds = port->ds; + const struct ethtool_ops *ops = port->orig_ethtool_ops; + int count = 0; + + if (ops && ops->get_sset_count) + count += ops->get_sset_count(dev, sset); + + if (sset == ETH_SS_STATS && ds->ops->get_sset_count) + count += ds->ops->get_sset_count(ds); + + return count; +} + +static void dsa_master_get_strings(struct net_device *dev, uint32_t stringset, + uint8_t *data) +{ + struct dsa_switch_tree *dst = dev->dsa_ptr; + struct dsa_port *port = dst->cpu_dp; + struct dsa_switch *ds = port->ds; + const struct ethtool_ops *ops = port->orig_ethtool_ops; + int len = ETH_GSTRING_LEN; + int mcount = 0, count; + unsigned int i; + uint8_t pfx[4]; + uint8_t *ndata; + + snprintf(pfx, sizeof(pfx), "p%.2d", port->index); + /* We do not want to be NULL-terminated, since this is a prefix */ + pfx[sizeof(pfx) - 1] = '_'; + + if (ops && ops->get_sset_count && ops->get_strings) { + mcount = ops->get_sset_count(dev, ETH_SS_STATS); + ops->get_strings(dev, stringset, data); + } + + if (stringset == ETH_SS_STATS && ds->ops->get_strings) { + ndata = data + mcount * len; + /* This function copies ETH_GSTRINGS_LEN bytes, we will mangle + * the output after to prepend our CPU port prefix we + * constructed earlier + */ + ds->ops->get_strings(ds, port->index, ndata); + count = ds->ops->get_sset_count(ds); + for (i = 0; i < count; i++) { + memmove(ndata + (i * len + sizeof(pfx)), + ndata + i * len, len - sizeof(pfx)); + memcpy(ndata + i * len, pfx, sizeof(pfx)); + } + } +} + +int dsa_master_ethtool_setup(struct net_device *dev) +{ + struct dsa_switch_tree *dst = dev->dsa_ptr; + struct dsa_port *port = dst->cpu_dp; + struct dsa_switch *ds = port->ds; + struct ethtool_ops *ops; + + ops = devm_kzalloc(ds->dev, sizeof(*ops), GFP_KERNEL); + if (!ops) + return -ENOMEM; + + port->orig_ethtool_ops = dev->ethtool_ops; + if (port->orig_ethtool_ops) + memcpy(ops, port->orig_ethtool_ops, sizeof(*ops)); + + ops->get_sset_count = dsa_master_get_sset_count; + ops->get_ethtool_stats = dsa_master_get_ethtool_stats; + ops->get_strings = dsa_master_get_strings; + + dev->ethtool_ops = ops; + + return 0; +} + +void dsa_master_ethtool_restore(struct net_device *dev) +{ + struct dsa_switch_tree *dst = dev->dsa_ptr; + struct dsa_port *port = dst->cpu_dp; + + dev->ethtool_ops = port->orig_ethtool_ops; + port->orig_ethtool_ops = NULL; +} diff --git a/net/dsa/slave.c b/net/dsa/slave.c index 2ff4f907d137..d51b10450e1b 100644 --- a/net/dsa/slave.c +++ b/net/dsa/slave.c @@ -567,82 +567,6 @@ static void dsa_slave_get_strings(struct net_device *dev, } } -static void dsa_cpu_port_get_ethtool_stats(struct net_device *dev, - struct ethtool_stats *stats, - uint64_t *data) -{ - struct dsa_switch_tree *dst = dev->dsa_ptr; - struct dsa_port *cpu_dp = dsa_get_cpu_port(dst); - struct dsa_switch *ds = cpu_dp->ds; - const struct ethtool_ops *ops = cpu_dp->orig_ethtool_ops; - s8 cpu_port = cpu_dp->index; - int count = 0; - - if (ops && ops->get_sset_count && ops->get_ethtool_stats) { - count = ops->get_sset_count(dev, ETH_SS_STATS); - ops->get_ethtool_stats(dev, stats, data); - } - - if (ds->ops->get_ethtool_stats) - ds->ops->get_ethtool_stats(ds, cpu_port, data + count); -} - -static int dsa_cpu_port_get_sset_count(struct net_device *dev, int sset) -{ - struct dsa_switch_tree *dst = dev->dsa_ptr; - struct dsa_port *cpu_dp = dsa_get_cpu_port(dst); - struct dsa_switch *ds = cpu_dp->ds; - const struct ethtool_ops *ops = cpu_dp->orig_ethtool_ops; - int count = 0; - - if (ops && ops->get_sset_count) - count += ops->get_sset_count(dev, sset); - - if (sset == ETH_SS_STATS && ds->ops->get_sset_count) - count += ds->ops->get_sset_count(ds); - - return count; -} - -static void dsa_cpu_port_get_strings(struct net_device *dev, - uint32_t stringset, uint8_t *data) -{ - struct dsa_switch_tree *dst = dev->dsa_ptr; - struct dsa_port *cpu_dp = dsa_get_cpu_port(dst); - struct dsa_switch *ds = cpu_dp->ds; - const struct ethtool_ops *ops = cpu_dp->orig_ethtool_ops; - s8 cpu_port = cpu_dp->index; - int len = ETH_GSTRING_LEN; - int mcount = 0, count; - unsigned int i; - uint8_t pfx[4]; - uint8_t *ndata; - - snprintf(pfx, sizeof(pfx), "p%.2d", cpu_port); - /* We do not want to be NULL-terminated, since this is a prefix */ - pfx[sizeof(pfx) - 1] = '_'; - - if (ops && ops->get_sset_count && ops->get_strings) { - mcount = ops->get_sset_count(dev, ETH_SS_STATS); - ops->get_strings(dev, stringset, data); - } - - if (stringset == ETH_SS_STATS && ds->ops->get_strings) { - ndata = data + mcount * len; - /* This function copies ETH_GSTRINGS_LEN bytes, we will mangle - * the output after to prepend our CPU port prefix we - * constructed earlier - */ - ds->ops->get_strings(ds, cpu_port, ndata); - count = ds->ops->get_sset_count(ds); - for (i = 0; i < count; i++) { - memmove(ndata + (i * len + sizeof(pfx)), - ndata + i * len, len - sizeof(pfx)); - memcpy(ndata + i * len, pfx, sizeof(pfx)); - } - } -} - static void dsa_slave_get_ethtool_stats(struct net_device *dev, struct ethtool_stats *stats, uint64_t *data) @@ -979,13 +903,6 @@ static void dsa_slave_get_stats64(struct net_device *dev, } } -void dsa_cpu_port_ethtool_init(struct ethtool_ops *ops) -{ - ops->get_sset_count = dsa_cpu_port_get_sset_count; - ops->get_ethtool_stats = dsa_cpu_port_get_ethtool_stats; - ops->get_strings = dsa_cpu_port_get_strings; -} - static int dsa_slave_get_rxnfc(struct net_device *dev, struct ethtool_rxnfc *nfc, u32 *rule_locs) { -- cgit v1.2.3 From 7131cc9fc9fd3291bff0e0d3326a1b6983fa0f34 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Tue, 19 Sep 2017 10:46:43 -0700 Subject: net: dsa: b53: Remove is_cpu_port() This is not used anywhere, so remove it. Reviewed-by: Vivien Didelot Signed-off-by: Florian Fainelli Signed-off-by: David S. Miller --- drivers/net/dsa/b53/b53_priv.h | 5 ----- 1 file changed, 5 deletions(-) diff --git a/drivers/net/dsa/b53/b53_priv.h b/drivers/net/dsa/b53/b53_priv.h index 01bd8cbe9a3f..7528b22aeb03 100644 --- a/drivers/net/dsa/b53/b53_priv.h +++ b/drivers/net/dsa/b53/b53_priv.h @@ -186,11 +186,6 @@ static inline int is58xx(struct b53_device *dev) #define B53_CPU_PORT_25 5 #define B53_CPU_PORT 8 -static inline int is_cpu_port(struct b53_device *dev, int port) -{ - return dev->cpu_port; -} - struct b53_device *b53_switch_alloc(struct device *base, const struct b53_io_ops *ops, void *priv); -- cgit v1.2.3 From 299752a7d28654dd74304a76d3639e744df52084 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Tue, 19 Sep 2017 10:46:44 -0700 Subject: net: dsa: b53: Make b53_enable_cpu_port() take a port argument In preparation for future changes allowing the configuring of multiple CPU ports, make b53_enable_cpu_port() take a port argument. Reviewed-by: Vivien Didelot Signed-off-by: Florian Fainelli Signed-off-by: David S. Miller --- drivers/net/dsa/b53/b53_common.c | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/drivers/net/dsa/b53/b53_common.c b/drivers/net/dsa/b53/b53_common.c index 274f3679f33d..d8bc54cfcfbe 100644 --- a/drivers/net/dsa/b53/b53_common.c +++ b/drivers/net/dsa/b53/b53_common.c @@ -538,19 +538,18 @@ static void b53_disable_port(struct dsa_switch *ds, int port, b53_write8(dev, B53_CTRL_PAGE, B53_PORT_CTRL(port), reg); } -static void b53_enable_cpu_port(struct b53_device *dev) +static void b53_enable_cpu_port(struct b53_device *dev, int port) { - unsigned int cpu_port = dev->cpu_port; u8 port_ctrl; /* BCM5325 CPU port is at 8 */ - if ((is5325(dev) || is5365(dev)) && cpu_port == B53_CPU_PORT_25) - cpu_port = B53_CPU_PORT; + if ((is5325(dev) || is5365(dev)) && port == B53_CPU_PORT_25) + port = B53_CPU_PORT; port_ctrl = PORT_CTRL_RX_BCST_EN | PORT_CTRL_RX_MCST_EN | PORT_CTRL_RX_UCST_EN; - b53_write8(dev, B53_CTRL_PAGE, B53_PORT_CTRL(cpu_port), port_ctrl); + b53_write8(dev, B53_CTRL_PAGE, B53_PORT_CTRL(port), port_ctrl); } static void b53_enable_mib(struct b53_device *dev) @@ -820,7 +819,7 @@ static int b53_setup(struct dsa_switch *ds) if (BIT(port) & ds->enabled_port_mask) b53_enable_port(ds, port, NULL); else if (dsa_is_cpu_port(ds, port)) - b53_enable_cpu_port(dev); + b53_enable_cpu_port(dev, port); else b53_disable_port(ds, port, NULL); } -- cgit v1.2.3 From 34c8befd1365eb38f8dd7f3f81086fa766915610 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Tue, 19 Sep 2017 10:46:45 -0700 Subject: net: dsa: b53: Defer port enabling to calling port_enable There is no need to configure the enabled ports once in b53_setup() and then a second time around when dsa_switch_ops::port_enable is called, just do it when port_enable is called which is better in terms of power consumption and correctness. Reviewed-by: Vivien Didelot Signed-off-by: Florian Fainelli Signed-off-by: David S. Miller --- drivers/net/dsa/b53/b53_common.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/drivers/net/dsa/b53/b53_common.c b/drivers/net/dsa/b53/b53_common.c index d8bc54cfcfbe..3297af6aab8a 100644 --- a/drivers/net/dsa/b53/b53_common.c +++ b/drivers/net/dsa/b53/b53_common.c @@ -815,12 +815,13 @@ static int b53_setup(struct dsa_switch *ds) if (ret) dev_err(ds->dev, "failed to apply configuration\n"); + /* Configure IMP/CPU port, disable unused ports. Enabled + * ports will be configured with .port_enable + */ for (port = 0; port < dev->num_ports; port++) { - if (BIT(port) & ds->enabled_port_mask) - b53_enable_port(ds, port, NULL); - else if (dsa_is_cpu_port(ds, port)) + if (dsa_is_cpu_port(ds, port)) b53_enable_cpu_port(dev, port); - else + else if (!(BIT(port) & ds->enabled_port_mask)) b53_disable_port(ds, port, NULL); } -- cgit v1.2.3 From e85ec74ace29acfda4afdfd56df122a0a62e6a1a Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Tue, 19 Sep 2017 10:46:46 -0700 Subject: net: dsa: bcm_sf2: Defer port enabling to calling port_enable There is no need to configure the enabled ports once in bcm_sf2_sw_setup() and then a second time around when dsa_switch_ops::port_enable is called, just do it when port_enable is called which is better in terms of power consumption and correctness. Reviewed-by: Vivien Didelot Signed-off-by: Florian Fainelli Signed-off-by: David S. Miller --- drivers/net/dsa/bcm_sf2.c | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/drivers/net/dsa/bcm_sf2.c b/drivers/net/dsa/bcm_sf2.c index d7b53d53c116..8acbd17bc1fd 100644 --- a/drivers/net/dsa/bcm_sf2.c +++ b/drivers/net/dsa/bcm_sf2.c @@ -890,14 +890,11 @@ static int bcm_sf2_sw_setup(struct dsa_switch *ds) struct bcm_sf2_priv *priv = bcm_sf2_to_priv(ds); unsigned int port; - /* Enable all valid ports and disable those unused */ + /* Disable unused ports and configure IMP port */ for (port = 0; port < priv->hw_params.num_ports; port++) { - /* IMP port receives special treatment */ - if ((1 << port) & ds->enabled_port_mask) - bcm_sf2_port_setup(ds, port, NULL); - else if (dsa_is_cpu_port(ds, port)) + if (dsa_is_cpu_port(ds, port)) bcm_sf2_imp_setup(ds, port); - else + else if (!((1 << port) & ds->enabled_port_mask)) bcm_sf2_port_disable(ds, port, NULL); } -- cgit v1.2.3 From 5345862e9af02ce3780639bfe350ed2da9f8af13 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Tue, 19 Sep 2017 10:46:47 -0700 Subject: net: dsa: b53: Use a macro to define I/O operations Instead of repeating the same pattern: acquire mutex, read/write, release mutex, define a macro: b53_build_op() which takes the type (read|write), I/O size, and value (scalar or pointer). This helps with fixing bugs that could exist (e.g: missing barrier, lock etc.). Reviewed-by: Vivien Didelot Signed-off-by: Florian Fainelli Signed-off-by: David S. Miller --- drivers/net/dsa/b53/b53_priv.h | 133 +++++++---------------------------------- 1 file changed, 22 insertions(+), 111 deletions(-) diff --git a/drivers/net/dsa/b53/b53_priv.h b/drivers/net/dsa/b53/b53_priv.h index 7528b22aeb03..5bebe97900e8 100644 --- a/drivers/net/dsa/b53/b53_priv.h +++ b/drivers/net/dsa/b53/b53_priv.h @@ -199,119 +199,30 @@ static inline void b53_switch_remove(struct b53_device *dev) dsa_unregister_switch(dev->ds); } -static inline int b53_read8(struct b53_device *dev, u8 page, u8 reg, u8 *val) -{ - int ret; - - mutex_lock(&dev->reg_mutex); - ret = dev->ops->read8(dev, page, reg, val); - mutex_unlock(&dev->reg_mutex); - - return ret; -} - -static inline int b53_read16(struct b53_device *dev, u8 page, u8 reg, u16 *val) -{ - int ret; - - mutex_lock(&dev->reg_mutex); - ret = dev->ops->read16(dev, page, reg, val); - mutex_unlock(&dev->reg_mutex); - - return ret; -} - -static inline int b53_read32(struct b53_device *dev, u8 page, u8 reg, u32 *val) -{ - int ret; - - mutex_lock(&dev->reg_mutex); - ret = dev->ops->read32(dev, page, reg, val); - mutex_unlock(&dev->reg_mutex); - - return ret; -} - -static inline int b53_read48(struct b53_device *dev, u8 page, u8 reg, u64 *val) -{ - int ret; - - mutex_lock(&dev->reg_mutex); - ret = dev->ops->read48(dev, page, reg, val); - mutex_unlock(&dev->reg_mutex); - - return ret; +#define b53_build_op(type_op_size, val_type) \ +static inline int b53_##type_op_size(struct b53_device *dev, u8 page, \ + u8 reg, val_type val) \ +{ \ + int ret; \ + \ + mutex_lock(&dev->reg_mutex); \ + ret = dev->ops->type_op_size(dev, page, reg, val); \ + mutex_unlock(&dev->reg_mutex); \ + \ + return ret; \ } -static inline int b53_read64(struct b53_device *dev, u8 page, u8 reg, u64 *val) -{ - int ret; - - mutex_lock(&dev->reg_mutex); - ret = dev->ops->read64(dev, page, reg, val); - mutex_unlock(&dev->reg_mutex); - - return ret; -} - -static inline int b53_write8(struct b53_device *dev, u8 page, u8 reg, u8 value) -{ - int ret; - - mutex_lock(&dev->reg_mutex); - ret = dev->ops->write8(dev, page, reg, value); - mutex_unlock(&dev->reg_mutex); - - return ret; -} - -static inline int b53_write16(struct b53_device *dev, u8 page, u8 reg, - u16 value) -{ - int ret; - - mutex_lock(&dev->reg_mutex); - ret = dev->ops->write16(dev, page, reg, value); - mutex_unlock(&dev->reg_mutex); - - return ret; -} - -static inline int b53_write32(struct b53_device *dev, u8 page, u8 reg, - u32 value) -{ - int ret; - - mutex_lock(&dev->reg_mutex); - ret = dev->ops->write32(dev, page, reg, value); - mutex_unlock(&dev->reg_mutex); - - return ret; -} - -static inline int b53_write48(struct b53_device *dev, u8 page, u8 reg, - u64 value) -{ - int ret; - - mutex_lock(&dev->reg_mutex); - ret = dev->ops->write48(dev, page, reg, value); - mutex_unlock(&dev->reg_mutex); - - return ret; -} - -static inline int b53_write64(struct b53_device *dev, u8 page, u8 reg, - u64 value) -{ - int ret; - - mutex_lock(&dev->reg_mutex); - ret = dev->ops->write64(dev, page, reg, value); - mutex_unlock(&dev->reg_mutex); - - return ret; -} +b53_build_op(read8, u8 *); +b53_build_op(read16, u16 *); +b53_build_op(read32, u32 *); +b53_build_op(read48, u64 *); +b53_build_op(read64, u64 *); + +b53_build_op(write8, u8); +b53_build_op(write16, u16); +b53_build_op(write32, u32); +b53_build_op(write48, u64); +b53_build_op(write64, u64); struct b53_arl_entry { u8 port; -- cgit v1.2.3 From b409a9efa183d92d99bc314fb26ebc1c2a8b4379 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Tue, 19 Sep 2017 10:46:48 -0700 Subject: net: dsa: b53: Move Broadcom header setup to b53 The code to enable Broadcom tags/headers is largely switch independent, and in preparation for enabling it for multiple devices with b53, move the code we have in bcm_sf2.c to b53_common.c Reviewed-by: Vivien Didelot Signed-off-by: Florian Fainelli Signed-off-by: David S. Miller --- drivers/net/dsa/b53/b53_common.c | 47 ++++++++++++++++++++++++++++++++++++++++ drivers/net/dsa/b53/b53_priv.h | 1 + drivers/net/dsa/b53/b53_regs.h | 7 ++++++ drivers/net/dsa/bcm_sf2.c | 43 ++---------------------------------- drivers/net/dsa/bcm_sf2_regs.h | 8 ------- 5 files changed, 57 insertions(+), 49 deletions(-) diff --git a/drivers/net/dsa/b53/b53_common.c b/drivers/net/dsa/b53/b53_common.c index 3297af6aab8a..aa2187c71ea5 100644 --- a/drivers/net/dsa/b53/b53_common.c +++ b/drivers/net/dsa/b53/b53_common.c @@ -538,6 +538,53 @@ static void b53_disable_port(struct dsa_switch *ds, int port, b53_write8(dev, B53_CTRL_PAGE, B53_PORT_CTRL(port), reg); } +void b53_brcm_hdr_setup(struct dsa_switch *ds, int port) +{ + struct b53_device *dev = ds->priv; + u8 hdr_ctl, val; + u16 reg; + + /* Resolve which bit controls the Broadcom tag */ + switch (port) { + case 8: + val = BRCM_HDR_P8_EN; + break; + case 7: + val = BRCM_HDR_P7_EN; + break; + case 5: + val = BRCM_HDR_P5_EN; + break; + default: + val = 0; + break; + } + + /* Enable Broadcom tags for IMP port */ + b53_read8(dev, B53_MGMT_PAGE, B53_BRCM_HDR, &hdr_ctl); + hdr_ctl |= val; + b53_write8(dev, B53_MGMT_PAGE, B53_BRCM_HDR, hdr_ctl); + + /* Registers below are only accessible on newer devices */ + if (!is58xx(dev)) + return; + + /* Enable reception Broadcom tag for CPU TX (switch RX) to + * allow us to tag outgoing frames + */ + b53_read16(dev, B53_MGMT_PAGE, B53_BRCM_HDR_RX_DIS, ®); + reg &= ~BIT(port); + b53_write16(dev, B53_MGMT_PAGE, B53_BRCM_HDR_RX_DIS, reg); + + /* Enable transmission of Broadcom tags from the switch (CPU RX) to + * allow delivering frames to the per-port net_devices + */ + b53_read16(dev, B53_MGMT_PAGE, B53_BRCM_HDR_TX_DIS, ®); + reg &= ~BIT(port); + b53_write16(dev, B53_MGMT_PAGE, B53_BRCM_HDR_TX_DIS, reg); +} +EXPORT_SYMBOL(b53_brcm_hdr_setup); + static void b53_enable_cpu_port(struct b53_device *dev, int port) { u8 port_ctrl; diff --git a/drivers/net/dsa/b53/b53_priv.h b/drivers/net/dsa/b53/b53_priv.h index 5bebe97900e8..77102f685da0 100644 --- a/drivers/net/dsa/b53/b53_priv.h +++ b/drivers/net/dsa/b53/b53_priv.h @@ -309,5 +309,6 @@ int b53_mirror_add(struct dsa_switch *ds, int port, struct dsa_mall_mirror_tc_entry *mirror, bool ingress); void b53_mirror_del(struct dsa_switch *ds, int port, struct dsa_mall_mirror_tc_entry *mirror); +void b53_brcm_hdr_setup(struct dsa_switch *ds, int port); #endif diff --git a/drivers/net/dsa/b53/b53_regs.h b/drivers/net/dsa/b53/b53_regs.h index e5c86d44667a..5e8b8e31fee8 100644 --- a/drivers/net/dsa/b53/b53_regs.h +++ b/drivers/net/dsa/b53/b53_regs.h @@ -210,6 +210,7 @@ #define B53_BRCM_HDR 0x03 #define BRCM_HDR_P8_EN BIT(0) /* Enable tagging on port 8 */ #define BRCM_HDR_P5_EN BIT(1) /* Enable tagging on port 5 */ +#define BRCM_HDR_P7_EN BIT(2) /* Enable tagging on port 7 */ /* Mirror capture control register (16 bit) */ #define B53_MIR_CAP_CTL 0x10 @@ -249,6 +250,12 @@ /* Revision ID register (8 bit) */ #define B53_REV_ID 0x40 +/* Broadcom header RX control (16 bit) */ +#define B53_BRCM_HDR_RX_DIS 0x60 + +/* Broadcom header TX control (16 bit) */ +#define B53_BRCM_HDR_TX_DIS 0x62 + /************************************************************************* * ARL Access Page Registers *************************************************************************/ diff --git a/drivers/net/dsa/bcm_sf2.c b/drivers/net/dsa/bcm_sf2.c index 8acbd17bc1fd..49cb51223f70 100644 --- a/drivers/net/dsa/bcm_sf2.c +++ b/drivers/net/dsa/bcm_sf2.c @@ -60,45 +60,6 @@ static void bcm_sf2_imp_vlan_setup(struct dsa_switch *ds, int cpu_port) } } -static void bcm_sf2_brcm_hdr_setup(struct bcm_sf2_priv *priv, int port) -{ - u32 reg, val; - - /* Resolve which bit controls the Broadcom tag */ - switch (port) { - case 8: - val = BRCM_HDR_EN_P8; - break; - case 7: - val = BRCM_HDR_EN_P7; - break; - case 5: - val = BRCM_HDR_EN_P5; - break; - default: - val = 0; - break; - } - - /* Enable Broadcom tags for IMP port */ - reg = core_readl(priv, CORE_BRCM_HDR_CTRL); - reg |= val; - core_writel(priv, reg, CORE_BRCM_HDR_CTRL); - - /* Enable reception Broadcom tag for CPU TX (switch RX) to - * allow us to tag outgoing frames - */ - reg = core_readl(priv, CORE_BRCM_HDR_RX_DIS); - reg &= ~(1 << port); - core_writel(priv, reg, CORE_BRCM_HDR_RX_DIS); - - /* Enable transmission of Broadcom tags from the switch (CPU RX) to - * allow delivering frames to the per-port net_devices - */ - reg = core_readl(priv, CORE_BRCM_HDR_TX_DIS); - reg &= ~(1 << port); - core_writel(priv, reg, CORE_BRCM_HDR_TX_DIS); -} static void bcm_sf2_imp_setup(struct dsa_switch *ds, int port) { @@ -138,7 +99,7 @@ static void bcm_sf2_imp_setup(struct dsa_switch *ds, int port) reg |= i << (PRT_TO_QID_SHIFT * i); core_writel(priv, reg, CORE_PORT_TC2_QOS_MAP_PORT(port)); - bcm_sf2_brcm_hdr_setup(priv, port); + b53_brcm_hdr_setup(ds, port); /* Force link status for IMP port */ reg = core_readl(priv, offset); @@ -247,7 +208,7 @@ static int bcm_sf2_port_setup(struct dsa_switch *ds, int port, /* Enable Broadcom tags for that port if requested */ if (priv->brcm_tag_mask & BIT(port)) - bcm_sf2_brcm_hdr_setup(priv, port); + b53_brcm_hdr_setup(ds, port); /* Configure Traffic Class to QoS mapping, allow each priority to map * to a different queue number diff --git a/drivers/net/dsa/bcm_sf2_regs.h b/drivers/net/dsa/bcm_sf2_regs.h index 49695fcc2ea8..788361ad68a0 100644 --- a/drivers/net/dsa/bcm_sf2_regs.h +++ b/drivers/net/dsa/bcm_sf2_regs.h @@ -205,16 +205,8 @@ enum bcm_sf2_reg_offs { #define CORE_IMP0_PRT_ID 0x0804 -#define CORE_BRCM_HDR_CTRL 0x0080c -#define BRCM_HDR_EN_P8 (1 << 0) -#define BRCM_HDR_EN_P5 (1 << 1) -#define BRCM_HDR_EN_P7 (1 << 2) - #define CORE_RST_MIB_CNT_EN 0x0950 -#define CORE_BRCM_HDR_RX_DIS 0x0980 -#define CORE_BRCM_HDR_TX_DIS 0x0988 - #define CORE_ARLA_VTBL_RWCTRL 0x1600 #define ARLA_VTBL_CMD_WRITE 0 #define ARLA_VTBL_CMD_READ 1 -- cgit v1.2.3 From 909d812a668e8950ed8bb33ecdc9afbcdb0d371b Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Tue, 19 Sep 2017 10:46:49 -0700 Subject: net: dsa: b53: Define EEE register page In preparation for migrating the EEE code from bcm_sf2 to b53, define the full EEE register page and offsets within that page. Reviewed-by: Vivien Didelot Signed-off-by: Florian Fainelli Signed-off-by: David S. Miller --- drivers/net/dsa/b53/b53_regs.h | 41 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 41 insertions(+) diff --git a/drivers/net/dsa/b53/b53_regs.h b/drivers/net/dsa/b53/b53_regs.h index 5e8b8e31fee8..2a9f421680aa 100644 --- a/drivers/net/dsa/b53/b53_regs.h +++ b/drivers/net/dsa/b53/b53_regs.h @@ -50,6 +50,9 @@ /* Jumbo Frame Registers */ #define B53_JUMBO_PAGE 0x40 +/* EEE Control Registers Page */ +#define B53_EEE_PAGE 0x92 + /* CFP Configuration Registers Page */ #define B53_CFP_PAGE 0xa1 @@ -471,6 +474,44 @@ #define JMS_MIN_SIZE 1518 #define JMS_MAX_SIZE 9724 +/************************************************************************* + * EEE Configuration Page Registers + *************************************************************************/ + +/* EEE Enable control register (16 bit) */ +#define B53_EEE_EN_CTRL 0x00 + +/* EEE LPI assert status register (16 bit) */ +#define B53_EEE_LPI_ASSERT_STS 0x02 + +/* EEE LPI indicate status register (16 bit) */ +#define B53_EEE_LPI_INDICATE 0x4 + +/* EEE Receiving idle symbols status register (16 bit) */ +#define B53_EEE_RX_IDLE_SYM_STS 0x6 + +/* EEE Pipeline timer register (32 bit) */ +#define B53_EEE_PIP_TIMER 0xC + +/* EEE Sleep timer Gig register (32 bit) */ +#define B53_EEE_SLEEP_TIMER_GIG(i) (0x10 + 4 * (i)) + +/* EEE Sleep timer FE register (32 bit) */ +#define B53_EEE_SLEEP_TIMER_FE(i) (0x34 + 4 * (i)) + +/* EEE Minimum LP timer Gig register (32 bit) */ +#define B53_EEE_MIN_LP_TIMER_GIG(i) (0x58 + 4 * (i)) + +/* EEE Minimum LP timer FE register (32 bit) */ +#define B53_EEE_MIN_LP_TIMER_FE(i) (0x7c + 4 * (i)) + +/* EEE Wake timer Gig register (16 bit) */ +#define B53_EEE_WAKE_TIMER_GIG(i) (0xa0 + 2 * (i)) + +/* EEE Wake timer FE register (16 bit) */ +#define B53_EEE_WAKE_TIMER_FE(i) (0xb2 + 2 * (i)) + + /************************************************************************* * CFP Configuration Page Registers *************************************************************************/ -- cgit v1.2.3 From 22256b0afb12333571ad11799fa68fd27e4f4e80 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Tue, 19 Sep 2017 10:46:50 -0700 Subject: net: dsa: b53: Move EEE functions to b53 Move the bcm_sf2 EEE-related functions to the b53 driver because this is shared code amongst Gigabit capable switch, only 5325 and 5365 are too old to support that. Reviewed-by: Vivien Didelot Signed-off-by: Florian Fainelli Signed-off-by: David S. Miller --- drivers/net/dsa/b53/b53_common.c | 63 ++++++++++++++++++++++++++++++++++++++ drivers/net/dsa/b53/b53_priv.h | 5 +++ drivers/net/dsa/bcm_sf2.c | 66 ++++------------------------------------ drivers/net/dsa/bcm_sf2.h | 2 -- drivers/net/dsa/bcm_sf2_regs.h | 3 -- 5 files changed, 74 insertions(+), 65 deletions(-) diff --git a/drivers/net/dsa/b53/b53_common.c b/drivers/net/dsa/b53/b53_common.c index aa2187c71ea5..491e4ffa8a0e 100644 --- a/drivers/net/dsa/b53/b53_common.c +++ b/drivers/net/dsa/b53/b53_common.c @@ -1531,6 +1531,69 @@ void b53_mirror_del(struct dsa_switch *ds, int port, } EXPORT_SYMBOL(b53_mirror_del); +void b53_eee_enable_set(struct dsa_switch *ds, int port, bool enable) +{ + struct b53_device *dev = ds->priv; + u16 reg; + + b53_read16(dev, B53_EEE_PAGE, B53_EEE_EN_CTRL, ®); + if (enable) + reg |= BIT(port); + else + reg &= ~BIT(port); + b53_write16(dev, B53_EEE_PAGE, B53_EEE_EN_CTRL, reg); +} +EXPORT_SYMBOL(b53_eee_enable_set); + + +/* Returns 0 if EEE was not enabled, or 1 otherwise + */ +int b53_eee_init(struct dsa_switch *ds, int port, struct phy_device *phy) +{ + int ret; + + ret = phy_init_eee(phy, 0); + if (ret) + return 0; + + b53_eee_enable_set(ds, port, true); + + return 1; +} +EXPORT_SYMBOL(b53_eee_init); + +int b53_get_mac_eee(struct dsa_switch *ds, int port, struct ethtool_eee *e) +{ + struct b53_device *dev = ds->priv; + struct ethtool_eee *p = &dev->ports[port].eee; + u16 reg; + + if (is5325(dev) || is5365(dev)) + return -EOPNOTSUPP; + + b53_read16(dev, B53_EEE_PAGE, B53_EEE_LPI_INDICATE, ®); + e->eee_enabled = p->eee_enabled; + e->eee_active = !!(reg & BIT(port)); + + return 0; +} +EXPORT_SYMBOL(b53_get_mac_eee); + +int b53_set_mac_eee(struct dsa_switch *ds, int port, struct ethtool_eee *e) +{ + struct b53_device *dev = ds->priv; + struct ethtool_eee *p = &dev->ports[port].eee; + + if (is5325(dev) || is5365(dev)) + return -EOPNOTSUPP; + + p->eee_enabled = e->eee_enabled; + b53_eee_enable_set(ds, port, e->eee_enabled); + + return 0; +} +EXPORT_SYMBOL(b53_set_mac_eee); + static const struct dsa_switch_ops b53_switch_ops = { .get_tag_protocol = b53_get_tag_protocol, .setup = b53_setup, diff --git a/drivers/net/dsa/b53/b53_priv.h b/drivers/net/dsa/b53/b53_priv.h index 77102f685da0..aabe80eab25d 100644 --- a/drivers/net/dsa/b53/b53_priv.h +++ b/drivers/net/dsa/b53/b53_priv.h @@ -70,6 +70,7 @@ enum { struct b53_port { u16 vlan_ctl_mask; + struct ethtool_eee eee; }; struct b53_vlan { @@ -310,5 +311,9 @@ int b53_mirror_add(struct dsa_switch *ds, int port, void b53_mirror_del(struct dsa_switch *ds, int port, struct dsa_mall_mirror_tc_entry *mirror); void b53_brcm_hdr_setup(struct dsa_switch *ds, int port); +void b53_eee_enable_set(struct dsa_switch *ds, int port, bool enable); +int b53_eee_init(struct dsa_switch *ds, int port, struct phy_device *phy); +int b53_get_mac_eee(struct dsa_switch *ds, int port, struct ethtool_eee *e); +int b53_set_mac_eee(struct dsa_switch *ds, int port, struct ethtool_eee *e); #endif diff --git a/drivers/net/dsa/bcm_sf2.c b/drivers/net/dsa/bcm_sf2.c index 49cb51223f70..4e8ef4c07eab 100644 --- a/drivers/net/dsa/bcm_sf2.c +++ b/drivers/net/dsa/bcm_sf2.c @@ -107,19 +107,6 @@ static void bcm_sf2_imp_setup(struct dsa_switch *ds, int port) core_writel(priv, reg, offset); } -static void bcm_sf2_eee_enable_set(struct dsa_switch *ds, int port, bool enable) -{ - struct bcm_sf2_priv *priv = bcm_sf2_to_priv(ds); - u32 reg; - - reg = core_readl(priv, CORE_EEE_EN_CTRL); - if (enable) - reg |= 1 << port; - else - reg &= ~(1 << port); - core_writel(priv, reg, CORE_EEE_EN_CTRL); -} - static void bcm_sf2_gphy_enable_set(struct dsa_switch *ds, bool enable) { struct bcm_sf2_priv *priv = bcm_sf2_to_priv(ds); @@ -256,8 +243,8 @@ static int bcm_sf2_port_setup(struct dsa_switch *ds, int port, bcm_sf2_imp_vlan_setup(ds, cpu_port); /* If EEE was enabled, restore it */ - if (priv->port_sts[port].eee.eee_enabled) - bcm_sf2_eee_enable_set(ds, port, true); + if (priv->dev->ports[port].eee.eee_enabled) + b53_eee_enable_set(ds, port, true); return 0; } @@ -292,47 +279,6 @@ static void bcm_sf2_port_disable(struct dsa_switch *ds, int port, core_writel(priv, reg, CORE_MEM_PSM_VDD_CTRL); } -/* Returns 0 if EEE was not enabled, or 1 otherwise - */ -static int bcm_sf2_eee_init(struct dsa_switch *ds, int port, - struct phy_device *phy) -{ - int ret; - - ret = phy_init_eee(phy, 0); - if (ret) - return 0; - - bcm_sf2_eee_enable_set(ds, port, true); - - return 1; -} - -static int bcm_sf2_sw_get_mac_eee(struct dsa_switch *ds, int port, - struct ethtool_eee *e) -{ - struct bcm_sf2_priv *priv = bcm_sf2_to_priv(ds); - struct ethtool_eee *p = &priv->port_sts[port].eee; - u32 reg; - - reg = core_readl(priv, CORE_EEE_LPI_INDICATE); - e->eee_enabled = p->eee_enabled; - e->eee_active = !!(reg & (1 << port)); - - return 0; -} - -static int bcm_sf2_sw_set_mac_eee(struct dsa_switch *ds, int port, - struct ethtool_eee *e) -{ - struct bcm_sf2_priv *priv = bcm_sf2_to_priv(ds); - struct ethtool_eee *p = &priv->port_sts[port].eee; - - p->eee_enabled = e->eee_enabled; - bcm_sf2_eee_enable_set(ds, port, e->eee_enabled); - - return 0; -} static int bcm_sf2_sw_indir_rw(struct bcm_sf2_priv *priv, int op, int addr, int regnum, u16 val) @@ -567,7 +513,7 @@ static void bcm_sf2_sw_adjust_link(struct dsa_switch *ds, int port, struct phy_device *phydev) { struct bcm_sf2_priv *priv = bcm_sf2_to_priv(ds); - struct ethtool_eee *p = &priv->port_sts[port].eee; + struct ethtool_eee *p = &priv->dev->ports[port].eee; u32 id_mode_dis = 0, port_mode; const char *str = NULL; u32 reg, offset; @@ -649,7 +595,7 @@ force_link: core_writel(priv, reg, offset); if (!phydev->is_pseudo_fixed_link) - p->eee_enabled = bcm_sf2_eee_init(ds, port, phydev); + p->eee_enabled = b53_eee_init(ds, port, phydev); } static void bcm_sf2_sw_fixed_link_update(struct dsa_switch *ds, int port, @@ -978,8 +924,8 @@ static const struct dsa_switch_ops bcm_sf2_ops = { .set_wol = bcm_sf2_sw_set_wol, .port_enable = bcm_sf2_port_setup, .port_disable = bcm_sf2_port_disable, - .get_mac_eee = bcm_sf2_sw_get_mac_eee, - .set_mac_eee = bcm_sf2_sw_set_mac_eee, + .get_mac_eee = b53_get_mac_eee, + .set_mac_eee = b53_set_mac_eee, .port_bridge_join = b53_br_join, .port_bridge_leave = b53_br_leave, .port_stp_state_set = b53_br_set_stp_state, diff --git a/drivers/net/dsa/bcm_sf2.h b/drivers/net/dsa/bcm_sf2.h index 02c499f9c56b..1922e027ff59 100644 --- a/drivers/net/dsa/bcm_sf2.h +++ b/drivers/net/dsa/bcm_sf2.h @@ -48,8 +48,6 @@ struct bcm_sf2_hw_params { struct bcm_sf2_port_status { unsigned int link; - - struct ethtool_eee eee; }; struct bcm_sf2_cfp_priv { diff --git a/drivers/net/dsa/bcm_sf2_regs.h b/drivers/net/dsa/bcm_sf2_regs.h index 788361ad68a0..d8b8074a47b9 100644 --- a/drivers/net/dsa/bcm_sf2_regs.h +++ b/drivers/net/dsa/bcm_sf2_regs.h @@ -244,9 +244,6 @@ enum bcm_sf2_reg_offs { #define CORE_JOIN_ALL_VLAN_EN 0xd140 -#define CORE_EEE_EN_CTRL 0x24800 -#define CORE_EEE_LPI_INDICATE 0x24810 - #define CORE_CFP_ACC 0x28000 #define OP_STR_DONE (1 << 0) #define OP_SEL_SHIFT 1 -- cgit v1.2.3 From f43a2dbe9597038578171e70cb8fe751a0383fe2 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Tue, 19 Sep 2017 10:46:51 -0700 Subject: net: dsa: b53: Wire-up EEE Add support for enabling and disabling EEE, as well as re-negotiating it in .adjust_link() and in .port_enable(). Reviewed-by: Vivien Didelot Signed-off-by: Florian Fainelli Signed-off-by: David S. Miller --- drivers/net/dsa/b53/b53_common.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/drivers/net/dsa/b53/b53_common.c b/drivers/net/dsa/b53/b53_common.c index 491e4ffa8a0e..4e37ec27e496 100644 --- a/drivers/net/dsa/b53/b53_common.c +++ b/drivers/net/dsa/b53/b53_common.c @@ -523,6 +523,10 @@ static int b53_enable_port(struct dsa_switch *ds, int port, b53_imp_vlan_setup(ds, cpu_port); + /* If EEE was enabled, restore it */ + if (dev->ports[port].eee.eee_enabled) + b53_eee_enable_set(ds, port, true); + return 0; } @@ -879,6 +883,7 @@ static void b53_adjust_link(struct dsa_switch *ds, int port, struct phy_device *phydev) { struct b53_device *dev = ds->priv; + struct ethtool_eee *p = &dev->ports[port].eee; u8 rgmii_ctrl = 0, reg = 0, off; if (!phy_is_pseudo_fixed_link(phydev)) @@ -1000,6 +1005,9 @@ static void b53_adjust_link(struct dsa_switch *ds, int port, b53_write8(dev, B53_CTRL_PAGE, po_reg, gmii_po); } } + + /* Re-negotiate EEE if it was enabled already */ + p->eee_enabled = b53_eee_init(ds, port, phydev); } int b53_vlan_filtering(struct dsa_switch *ds, int port, bool vlan_filtering) @@ -1605,6 +1613,8 @@ static const struct dsa_switch_ops b53_switch_ops = { .adjust_link = b53_adjust_link, .port_enable = b53_enable_port, .port_disable = b53_disable_port, + .get_mac_eee = b53_get_mac_eee, + .set_mac_eee = b53_set_mac_eee, .port_bridge_join = b53_br_join, .port_bridge_leave = b53_br_leave, .port_stp_state_set = b53_br_set_stp_state, -- cgit v1.2.3 From aac028672cbea038f23e378ddb3af08c1dbe668c Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Tue, 19 Sep 2017 10:46:52 -0700 Subject: net: dsa: b53: Export b53_imp_vlan_setup() bcm_sf2 and b53 do exactly the same thing, so share that piece. Reviewed-by: Vivien Didelot Signed-off-by: Florian Fainelli Signed-off-by: David S. Miller --- drivers/net/dsa/b53/b53_common.c | 3 ++- drivers/net/dsa/b53/b53_priv.h | 1 + drivers/net/dsa/bcm_sf2.c | 23 +---------------------- 3 files changed, 4 insertions(+), 23 deletions(-) diff --git a/drivers/net/dsa/b53/b53_common.c b/drivers/net/dsa/b53/b53_common.c index 4e37ec27e496..c3f1cd2c33ea 100644 --- a/drivers/net/dsa/b53/b53_common.c +++ b/drivers/net/dsa/b53/b53_common.c @@ -484,7 +484,7 @@ static int b53_fast_age_vlan(struct b53_device *dev, u16 vid) return b53_flush_arl(dev, FAST_AGE_VLAN); } -static void b53_imp_vlan_setup(struct dsa_switch *ds, int cpu_port) +void b53_imp_vlan_setup(struct dsa_switch *ds, int cpu_port) { struct b53_device *dev = ds->priv; unsigned int i; @@ -500,6 +500,7 @@ static void b53_imp_vlan_setup(struct dsa_switch *ds, int cpu_port) b53_write16(dev, B53_PVLAN_PAGE, B53_PVLAN_PORT_MASK(i), pvlan); } } +EXPORT_SYMBOL(b53_imp_vlan_setup); static int b53_enable_port(struct dsa_switch *ds, int port, struct phy_device *phy) diff --git a/drivers/net/dsa/b53/b53_priv.h b/drivers/net/dsa/b53/b53_priv.h index aabe80eab25d..8f4f83e2e4bd 100644 --- a/drivers/net/dsa/b53/b53_priv.h +++ b/drivers/net/dsa/b53/b53_priv.h @@ -284,6 +284,7 @@ static inline int b53_switch_get_reset_gpio(struct b53_device *dev) #endif /* Exported functions towards other drivers */ +void b53_imp_vlan_setup(struct dsa_switch *ds, int cpu_port); void b53_get_strings(struct dsa_switch *ds, int port, uint8_t *data); void b53_get_ethtool_stats(struct dsa_switch *ds, int port, uint64_t *data); int b53_get_sset_count(struct dsa_switch *ds); diff --git a/drivers/net/dsa/bcm_sf2.c b/drivers/net/dsa/bcm_sf2.c index 4e8ef4c07eab..08639674947a 100644 --- a/drivers/net/dsa/bcm_sf2.c +++ b/drivers/net/dsa/bcm_sf2.c @@ -40,27 +40,6 @@ static enum dsa_tag_protocol bcm_sf2_sw_get_tag_protocol(struct dsa_switch *ds) return DSA_TAG_PROTO_BRCM; } -static void bcm_sf2_imp_vlan_setup(struct dsa_switch *ds, int cpu_port) -{ - struct bcm_sf2_priv *priv = bcm_sf2_to_priv(ds); - unsigned int i; - u32 reg; - - /* Enable the IMP Port to be in the same VLAN as the other ports - * on a per-port basis such that we only have Port i and IMP in - * the same VLAN. - */ - for (i = 0; i < priv->hw_params.num_ports; i++) { - if (!((1 << i) & ds->enabled_port_mask)) - continue; - - reg = core_readl(priv, CORE_PORT_VLAN_CTL_PORT(i)); - reg |= (1 << cpu_port); - core_writel(priv, reg, CORE_PORT_VLAN_CTL_PORT(i)); - } -} - - static void bcm_sf2_imp_setup(struct dsa_switch *ds, int port) { struct bcm_sf2_priv *priv = bcm_sf2_to_priv(ds); @@ -240,7 +219,7 @@ static int bcm_sf2_port_setup(struct dsa_switch *ds, int port, reg |= priv->dev->ports[port].vlan_ctl_mask; core_writel(priv, reg, CORE_PORT_VLAN_CTL_PORT(port)); - bcm_sf2_imp_vlan_setup(ds, cpu_port); + b53_imp_vlan_setup(ds, cpu_port); /* If EEE was enabled, restore it */ if (priv->dev->ports[port].eee.eee_enabled) -- cgit v1.2.3 From 152b6fd60ae075dbe41707ed9c7caddc18b03b35 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Tue, 19 Sep 2017 10:46:53 -0700 Subject: net: dsa: bcm_sf2: Use SF2_NUM_EGRESS_QUEUES for CFP The magic number 8 in 3 locations in bcm_sf2_cfp.c actually designates the number of switch port egress queues, so use that define instead of open-coding it. Reviewed-by: Vivien Didelot Signed-off-by: Florian Fainelli Signed-off-by: David S. Miller --- drivers/net/dsa/bcm_sf2_cfp.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/net/dsa/bcm_sf2_cfp.c b/drivers/net/dsa/bcm_sf2_cfp.c index 8a1da7e67707..94649e1481ec 100644 --- a/drivers/net/dsa/bcm_sf2_cfp.c +++ b/drivers/net/dsa/bcm_sf2_cfp.c @@ -144,7 +144,7 @@ static int bcm_sf2_cfp_rule_set(struct dsa_switch *ds, int port, * destination port is enabled and that we are within the * number of ports supported by the switch */ - port_num = fs->ring_cookie / 8; + port_num = fs->ring_cookie / SF2_NUM_EGRESS_QUEUES; if (fs->ring_cookie == RX_CLS_FLOW_DISC || !(BIT(port_num) & ds->enabled_port_mask) || @@ -280,7 +280,7 @@ static int bcm_sf2_cfp_rule_set(struct dsa_switch *ds, int port, * We have a small oddity where Port 6 just does not have a * valid bit here (so we subtract by one). */ - queue_num = fs->ring_cookie % 8; + queue_num = fs->ring_cookie % SF2_NUM_EGRESS_QUEUES; if (port_num >= 7) port_num -= 1; @@ -401,7 +401,7 @@ static int bcm_sf2_cfp_rule_get(struct bcm_sf2_priv *priv, int port, /* There is no Port 6, so we compensate for that here */ if (nfc->fs.ring_cookie >= 6) nfc->fs.ring_cookie++; - nfc->fs.ring_cookie *= 8; + nfc->fs.ring_cookie *= SF2_NUM_EGRESS_QUEUES; /* Extract the destination queue */ queue_num = (reg >> NEW_TC_SHIFT) & NEW_TC_MASK; -- cgit v1.2.3 From f86ad77faf248d6101394f8c79d03f16a9ea461d Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Tue, 19 Sep 2017 10:46:54 -0700 Subject: net: dsa: bcm_sf2: Utilize b53_{enable, disable}_port Export b53_{enable,disable}_port and use these two functions in bcm_sf2_port_setup and bcm_sf2_port_disable. The generic functions cannot be used without wrapping because we need to manage additional switch integration details (PHY, Broadcom tag etc.). Reviewed-by: Vivien Didelot Signed-off-by: Florian Fainelli Signed-off-by: David S. Miller --- drivers/net/dsa/b53/b53_common.c | 8 ++++---- drivers/net/dsa/b53/b53_priv.h | 2 ++ drivers/net/dsa/bcm_sf2.c | 26 ++------------------------ 3 files changed, 8 insertions(+), 28 deletions(-) diff --git a/drivers/net/dsa/b53/b53_common.c b/drivers/net/dsa/b53/b53_common.c index c3f1cd2c33ea..a9f2a5b55a5e 100644 --- a/drivers/net/dsa/b53/b53_common.c +++ b/drivers/net/dsa/b53/b53_common.c @@ -502,8 +502,7 @@ void b53_imp_vlan_setup(struct dsa_switch *ds, int cpu_port) } EXPORT_SYMBOL(b53_imp_vlan_setup); -static int b53_enable_port(struct dsa_switch *ds, int port, - struct phy_device *phy) +int b53_enable_port(struct dsa_switch *ds, int port, struct phy_device *phy) { struct b53_device *dev = ds->priv; unsigned int cpu_port = dev->cpu_port; @@ -530,9 +529,9 @@ static int b53_enable_port(struct dsa_switch *ds, int port, return 0; } +EXPORT_SYMBOL(b53_enable_port); -static void b53_disable_port(struct dsa_switch *ds, int port, - struct phy_device *phy) +void b53_disable_port(struct dsa_switch *ds, int port, struct phy_device *phy) { struct b53_device *dev = ds->priv; u8 reg; @@ -542,6 +541,7 @@ static void b53_disable_port(struct dsa_switch *ds, int port, reg |= PORT_CTRL_RX_DISABLE | PORT_CTRL_TX_DISABLE; b53_write8(dev, B53_CTRL_PAGE, B53_PORT_CTRL(port), reg); } +EXPORT_SYMBOL(b53_disable_port); void b53_brcm_hdr_setup(struct dsa_switch *ds, int port) { diff --git a/drivers/net/dsa/b53/b53_priv.h b/drivers/net/dsa/b53/b53_priv.h index 8f4f83e2e4bd..603c66d240d8 100644 --- a/drivers/net/dsa/b53/b53_priv.h +++ b/drivers/net/dsa/b53/b53_priv.h @@ -311,6 +311,8 @@ int b53_mirror_add(struct dsa_switch *ds, int port, struct dsa_mall_mirror_tc_entry *mirror, bool ingress); void b53_mirror_del(struct dsa_switch *ds, int port, struct dsa_mall_mirror_tc_entry *mirror); +int b53_enable_port(struct dsa_switch *ds, int port, struct phy_device *phy); +void b53_disable_port(struct dsa_switch *ds, int port, struct phy_device *phy); void b53_brcm_hdr_setup(struct dsa_switch *ds, int port); void b53_eee_enable_set(struct dsa_switch *ds, int port, bool enable); int b53_eee_init(struct dsa_switch *ds, int port, struct phy_device *phy); diff --git a/drivers/net/dsa/bcm_sf2.c b/drivers/net/dsa/bcm_sf2.c index 08639674947a..0072a959db5b 100644 --- a/drivers/net/dsa/bcm_sf2.c +++ b/drivers/net/dsa/bcm_sf2.c @@ -163,7 +163,6 @@ static int bcm_sf2_port_setup(struct dsa_switch *ds, int port, struct phy_device *phy) { struct bcm_sf2_priv *priv = bcm_sf2_to_priv(ds); - s8 cpu_port = ds->dst->cpu_dp->index; unsigned int i; u32 reg; @@ -184,9 +183,6 @@ static int bcm_sf2_port_setup(struct dsa_switch *ds, int port, reg |= i << (PRT_TO_QID_SHIFT * i); core_writel(priv, reg, CORE_PORT_TC2_QOS_MAP_PORT(port)); - /* Clear the Rx and Tx disable bits and set to no spanning tree */ - core_writel(priv, 0, CORE_G_PCTL_PORT(port)); - /* Re-enable the GPHY and re-apply workarounds */ if (priv->int_phy_mask & 1 << port && priv->hw_params.num_gphy == 1) { bcm_sf2_gphy_enable_set(ds, true); @@ -209,23 +205,7 @@ static int bcm_sf2_port_setup(struct dsa_switch *ds, int port, if (port == priv->moca_port) bcm_sf2_port_intr_enable(priv, port); - /* Set this port, and only this one to be in the default VLAN, - * if member of a bridge, restore its membership prior to - * bringing down this port. - */ - reg = core_readl(priv, CORE_PORT_VLAN_CTL_PORT(port)); - reg &= ~PORT_VLAN_CTRL_MASK; - reg |= (1 << port); - reg |= priv->dev->ports[port].vlan_ctl_mask; - core_writel(priv, reg, CORE_PORT_VLAN_CTL_PORT(port)); - - b53_imp_vlan_setup(ds, cpu_port); - - /* If EEE was enabled, restore it */ - if (priv->dev->ports[port].eee.eee_enabled) - b53_eee_enable_set(ds, port, true); - - return 0; + return b53_enable_port(ds, port, phy); } static void bcm_sf2_port_disable(struct dsa_switch *ds, int port, @@ -248,9 +228,7 @@ static void bcm_sf2_port_disable(struct dsa_switch *ds, int port, else off = CORE_G_PCTL_PORT(port); - reg = core_readl(priv, off); - reg |= RX_DIS | TX_DIS; - core_writel(priv, reg, off); + b53_disable_port(ds, port, phy); /* Power down the port memory */ reg = core_readl(priv, CORE_MEM_PSM_VDD_CTRL); -- cgit v1.2.3 From 7e936bd73483eb1873b9ab6f375c9a1d781bc5cb Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Wed, 20 Sep 2017 01:12:11 +0200 Subject: test_rhashtable: don't allocate huge static array Signed-off-by: Florian Westphal Signed-off-by: David S. Miller --- lib/test_rhashtable.c | 27 ++++++++++++++++----------- 1 file changed, 16 insertions(+), 11 deletions(-) diff --git a/lib/test_rhashtable.c b/lib/test_rhashtable.c index 0ffca990a833..c40d6e636f33 100644 --- a/lib/test_rhashtable.c +++ b/lib/test_rhashtable.c @@ -72,8 +72,6 @@ struct thread_data { struct test_obj *objs; }; -static struct test_obj array[MAX_ENTRIES]; - static struct rhashtable_params test_rht_params = { .head_offset = offsetof(struct test_obj, node), .key_offset = offsetof(struct test_obj, value), @@ -85,7 +83,7 @@ static struct rhashtable_params test_rht_params = { static struct semaphore prestart_sem; static struct semaphore startup_sem = __SEMAPHORE_INITIALIZER(startup_sem, 0); -static int insert_retry(struct rhashtable *ht, struct rhash_head *obj, +static int insert_retry(struct rhashtable *ht, struct test_obj *obj, const struct rhashtable_params params) { int err, retries = -1, enomem_retries = 0; @@ -93,7 +91,7 @@ static int insert_retry(struct rhashtable *ht, struct rhash_head *obj, do { retries++; cond_resched(); - err = rhashtable_insert_fast(ht, obj, params); + err = rhashtable_insert_fast(ht, &obj->node, params); if (err == -ENOMEM && enomem_retry) { enomem_retries++; err = -EBUSY; @@ -107,7 +105,7 @@ static int insert_retry(struct rhashtable *ht, struct rhash_head *obj, return err ? : retries; } -static int __init test_rht_lookup(struct rhashtable *ht) +static int __init test_rht_lookup(struct rhashtable *ht, struct test_obj *array) { unsigned int i; @@ -186,7 +184,7 @@ static void test_bucket_stats(struct rhashtable *ht) pr_warn("Test failed: Total count mismatch ^^^"); } -static s64 __init test_rhashtable(struct rhashtable *ht) +static s64 __init test_rhashtable(struct rhashtable *ht, struct test_obj *array) { struct test_obj *obj; int err; @@ -203,7 +201,7 @@ static s64 __init test_rhashtable(struct rhashtable *ht) struct test_obj *obj = &array[i]; obj->value.id = i * 2; - err = insert_retry(ht, &obj->node, test_rht_params); + err = insert_retry(ht, obj, test_rht_params); if (err > 0) insert_retries += err; else if (err) @@ -216,7 +214,7 @@ static s64 __init test_rhashtable(struct rhashtable *ht) test_bucket_stats(ht); rcu_read_lock(); - test_rht_lookup(ht); + test_rht_lookup(ht, array); rcu_read_unlock(); test_bucket_stats(ht); @@ -286,7 +284,7 @@ static int threadfunc(void *data) for (i = 0; i < entries; i++) { tdata->objs[i].value.id = i; tdata->objs[i].value.tid = tdata->id; - err = insert_retry(&ht, &tdata->objs[i].node, test_rht_params); + err = insert_retry(&ht, &tdata->objs[i], test_rht_params); if (err > 0) { insert_retries += err; } else if (err) { @@ -349,6 +347,10 @@ static int __init test_rht_init(void) test_rht_params.max_size = max_size ? : roundup_pow_of_two(entries); test_rht_params.nelem_hint = size; + objs = vzalloc((test_rht_params.max_size + 1) * sizeof(struct test_obj)); + if (!objs) + return -ENOMEM; + pr_info("Running rhashtable test nelem=%d, max_size=%d, shrinking=%d\n", size, max_size, shrinking); @@ -356,7 +358,8 @@ static int __init test_rht_init(void) s64 time; pr_info("Test %02d:\n", i); - memset(&array, 0, sizeof(array)); + memset(objs, 0, test_rht_params.max_size * sizeof(struct test_obj)); + err = rhashtable_init(&ht, &test_rht_params); if (err < 0) { pr_warn("Test failed: Unable to initialize hashtable: %d\n", @@ -364,9 +367,10 @@ static int __init test_rht_init(void) continue; } - time = test_rhashtable(&ht); + time = test_rhashtable(&ht, objs); rhashtable_destroy(&ht); if (time < 0) { + vfree(objs); pr_warn("Test failed: return code %lld\n", time); return -EINVAL; } @@ -374,6 +378,7 @@ static int __init test_rht_init(void) total_time += time; } + vfree(objs); do_div(total_time, runs); pr_info("Average test time: %llu\n", total_time); -- cgit v1.2.3 From f651616e799ff01d1e21abc053ee1e23ac1a2344 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Wed, 20 Sep 2017 01:12:12 +0200 Subject: test_rhashtable: don't use global entries variable pass the entries to test as an argument instead. Followup patch will add an rhlist test case; rhlist delete opererations are slow so we need to use a smaller number to test it. Signed-off-by: Florian Westphal Signed-off-by: David S. Miller --- lib/test_rhashtable.c | 37 +++++++++++++++++++++++-------------- 1 file changed, 23 insertions(+), 14 deletions(-) diff --git a/lib/test_rhashtable.c b/lib/test_rhashtable.c index c40d6e636f33..69f5b3849980 100644 --- a/lib/test_rhashtable.c +++ b/lib/test_rhashtable.c @@ -28,9 +28,9 @@ #define MAX_ENTRIES 1000000 #define TEST_INSERT_FAIL INT_MAX -static int entries = 50000; -module_param(entries, int, 0); -MODULE_PARM_DESC(entries, "Number of entries to add (default: 50000)"); +static int parm_entries = 50000; +module_param(parm_entries, int, 0); +MODULE_PARM_DESC(parm_entries, "Number of entries to add (default: 50000)"); static int runs = 4; module_param(runs, int, 0); @@ -67,6 +67,7 @@ struct test_obj { }; struct thread_data { + unsigned int entries; int id; struct task_struct *task; struct test_obj *objs; @@ -105,11 +106,12 @@ static int insert_retry(struct rhashtable *ht, struct test_obj *obj, return err ? : retries; } -static int __init test_rht_lookup(struct rhashtable *ht, struct test_obj *array) +static int __init test_rht_lookup(struct rhashtable *ht, struct test_obj *array, + unsigned int entries) { unsigned int i; - for (i = 0; i < entries * 2; i++) { + for (i = 0; i < entries; i++) { struct test_obj *obj; bool expected = !(i % 2); struct test_obj_val key = { @@ -142,7 +144,7 @@ static int __init test_rht_lookup(struct rhashtable *ht, struct test_obj *array) return 0; } -static void test_bucket_stats(struct rhashtable *ht) +static void test_bucket_stats(struct rhashtable *ht, unsigned int entries) { unsigned int err, total = 0, chain_len = 0; struct rhashtable_iter hti; @@ -184,7 +186,8 @@ static void test_bucket_stats(struct rhashtable *ht) pr_warn("Test failed: Total count mismatch ^^^"); } -static s64 __init test_rhashtable(struct rhashtable *ht, struct test_obj *array) +static s64 __init test_rhashtable(struct rhashtable *ht, struct test_obj *array, + unsigned int entries) { struct test_obj *obj; int err; @@ -212,12 +215,12 @@ static s64 __init test_rhashtable(struct rhashtable *ht, struct test_obj *array) pr_info(" %u insertions retried due to memory pressure\n", insert_retries); - test_bucket_stats(ht); + test_bucket_stats(ht, entries); rcu_read_lock(); - test_rht_lookup(ht, array); + test_rht_lookup(ht, array, entries); rcu_read_unlock(); - test_bucket_stats(ht); + test_bucket_stats(ht, entries); pr_info(" Deleting %d keys\n", entries); for (i = 0; i < entries; i++) { @@ -245,6 +248,7 @@ static struct rhashtable ht; static int thread_lookup_test(struct thread_data *tdata) { + unsigned int entries = tdata->entries; int i, err = 0; for (i = 0; i < entries; i++) { @@ -281,7 +285,7 @@ static int threadfunc(void *data) if (down_interruptible(&startup_sem)) pr_err(" thread[%d]: down_interruptible failed\n", tdata->id); - for (i = 0; i < entries; i++) { + for (i = 0; i < tdata->entries; i++) { tdata->objs[i].value.id = i; tdata->objs[i].value.tid = tdata->id; err = insert_retry(&ht, &tdata->objs[i], test_rht_params); @@ -305,7 +309,7 @@ static int threadfunc(void *data) } for (step = 10; step > 0; step--) { - for (i = 0; i < entries; i += step) { + for (i = 0; i < tdata->entries; i += step) { if (tdata->objs[i].value.id == TEST_INSERT_FAIL) continue; err = rhashtable_remove_fast(&ht, &tdata->objs[i].node, @@ -336,12 +340,16 @@ out: static int __init test_rht_init(void) { + unsigned int entries; int i, err, started_threads = 0, failed_threads = 0; u64 total_time = 0; struct thread_data *tdata; struct test_obj *objs; - entries = min(entries, MAX_ENTRIES); + if (parm_entries < 0) + parm_entries = 1; + + entries = min(parm_entries, MAX_ENTRIES); test_rht_params.automatic_shrinking = shrinking; test_rht_params.max_size = max_size ? : roundup_pow_of_two(entries); @@ -367,7 +375,7 @@ static int __init test_rht_init(void) continue; } - time = test_rhashtable(&ht, objs); + time = test_rhashtable(&ht, objs, entries); rhashtable_destroy(&ht); if (time < 0) { vfree(objs); @@ -409,6 +417,7 @@ static int __init test_rht_init(void) } for (i = 0; i < tcount; i++) { tdata[i].id = i; + tdata[i].entries = entries; tdata[i].objs = objs + i * entries; tdata[i].task = kthread_run(threadfunc, &tdata[i], "rhashtable_thrad[%d]", i); -- cgit v1.2.3 From a6359bd8dd1c3a15c09e7dbb533bf89d13b42acd Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Wed, 20 Sep 2017 01:12:13 +0200 Subject: test_rhashtable: add a check for max_size add a test that tries to insert more than max_size elements. Signed-off-by: Florian Westphal Signed-off-by: David S. Miller --- lib/test_rhashtable.c | 41 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 41 insertions(+) diff --git a/lib/test_rhashtable.c b/lib/test_rhashtable.c index 69f5b3849980..1eee90e6e394 100644 --- a/lib/test_rhashtable.c +++ b/lib/test_rhashtable.c @@ -246,6 +246,43 @@ static s64 __init test_rhashtable(struct rhashtable *ht, struct test_obj *array, static struct rhashtable ht; +static int __init test_rhashtable_max(struct test_obj *array, + unsigned int entries) +{ + unsigned int i, insert_retries = 0; + int err; + + test_rht_params.max_size = roundup_pow_of_two(entries / 8); + err = rhashtable_init(&ht, &test_rht_params); + if (err) + return err; + + for (i = 0; i < ht.max_elems; i++) { + struct test_obj *obj = &array[i]; + + obj->value.id = i * 2; + err = insert_retry(&ht, obj, test_rht_params); + if (err > 0) + insert_retries += err; + else if (err) + return err; + } + + err = insert_retry(&ht, &array[ht.max_elems], test_rht_params); + if (err == -E2BIG) { + err = 0; + } else { + pr_info("insert element %u should have failed with %d, got %d\n", + ht.max_elems, -E2BIG, err); + if (err == 0) + err = -1; + } + + rhashtable_destroy(&ht); + + return err; +} + static int thread_lookup_test(struct thread_data *tdata) { unsigned int entries = tdata->entries; @@ -386,7 +423,11 @@ static int __init test_rht_init(void) total_time += time; } + pr_info("test if its possible to exceed max_size %d: %s\n", + test_rht_params.max_size, test_rhashtable_max(objs, entries) == 0 ? + "no, ok" : "YES, failed"); vfree(objs); + do_div(total_time, runs); pr_info("Average test time: %llu\n", total_time); -- cgit v1.2.3 From cdd4de372ea06a18e104100b9f2c442527d26db1 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Wed, 20 Sep 2017 01:12:14 +0200 Subject: test_rhashtable: add test case for rhl_table interface also test rhltable. rhltable remove operations are slow as deletions require a list walk, thus test with 1/16th of the given entry count number to get a run duration similar to rhashtable one. Signed-off-by: Florian Westphal Signed-off-by: David S. Miller --- lib/test_rhashtable.c | 196 +++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 194 insertions(+), 2 deletions(-) diff --git a/lib/test_rhashtable.c b/lib/test_rhashtable.c index 1eee90e6e394..de4d0584631a 100644 --- a/lib/test_rhashtable.c +++ b/lib/test_rhashtable.c @@ -23,6 +23,7 @@ #include #include #include +#include #include #define MAX_ENTRIES 1000000 @@ -66,6 +67,11 @@ struct test_obj { struct rhash_head node; }; +struct test_obj_rhl { + struct test_obj_val value; + struct rhlist_head list_node; +}; + struct thread_data { unsigned int entries; int id; @@ -245,6 +251,186 @@ static s64 __init test_rhashtable(struct rhashtable *ht, struct test_obj *array, } static struct rhashtable ht; +static struct rhltable rhlt __initdata; + +static int __init test_rhltable(unsigned int entries) +{ + struct test_obj_rhl *rhl_test_objects; + unsigned long *obj_in_table; + unsigned int i, j, k; + int ret, err; + + if (entries == 0) + entries = 1; + + rhl_test_objects = vzalloc(sizeof(*rhl_test_objects) * entries); + if (!rhl_test_objects) + return -ENOMEM; + + ret = -ENOMEM; + obj_in_table = vzalloc(BITS_TO_LONGS(entries) * sizeof(unsigned long)); + if (!obj_in_table) + goto out_free; + + /* nulls_base not supported in rhlist interface */ + test_rht_params.nulls_base = 0; + err = rhltable_init(&rhlt, &test_rht_params); + if (WARN_ON(err)) + goto out_free; + + k = prandom_u32(); + ret = 0; + for (i = 0; i < entries; i++) { + rhl_test_objects[i].value.id = k; + err = rhltable_insert(&rhlt, &rhl_test_objects[i].list_node, + test_rht_params); + if (WARN(err, "error %d on element %d\n", err, i)) + break; + if (err == 0) + set_bit(i, obj_in_table); + } + + if (err) + ret = err; + + pr_info("test %d add/delete pairs into rhlist\n", entries); + for (i = 0; i < entries; i++) { + struct rhlist_head *h, *pos; + struct test_obj_rhl *obj; + struct test_obj_val key = { + .id = k, + }; + bool found; + + rcu_read_lock(); + h = rhltable_lookup(&rhlt, &key, test_rht_params); + if (WARN(!h, "key not found during iteration %d of %d", i, entries)) { + rcu_read_unlock(); + break; + } + + if (i) { + j = i - 1; + rhl_for_each_entry_rcu(obj, pos, h, list_node) { + if (WARN(pos == &rhl_test_objects[j].list_node, "old element found, should be gone")) + break; + } + } + + cond_resched_rcu(); + + found = false; + + rhl_for_each_entry_rcu(obj, pos, h, list_node) { + if (pos == &rhl_test_objects[i].list_node) { + found = true; + break; + } + } + + rcu_read_unlock(); + + if (WARN(!found, "element %d not found", i)) + break; + + err = rhltable_remove(&rhlt, &rhl_test_objects[i].list_node, test_rht_params); + WARN(err, "rhltable_remove: err %d for iteration %d\n", err, i); + if (err == 0) + clear_bit(i, obj_in_table); + } + + if (ret == 0 && err) + ret = err; + + for (i = 0; i < entries; i++) { + WARN(test_bit(i, obj_in_table), "elem %d allegedly still present", i); + + err = rhltable_insert(&rhlt, &rhl_test_objects[i].list_node, + test_rht_params); + if (WARN(err, "error %d on element %d\n", err, i)) + break; + if (err == 0) + set_bit(i, obj_in_table); + } + + pr_info("test %d random rhlist add/delete operations\n", entries); + for (j = 0; j < entries; j++) { + u32 i = prandom_u32_max(entries); + u32 prand = prandom_u32(); + + cond_resched(); + + if (prand == 0) + prand = prandom_u32(); + + if (prand & 1) { + prand >>= 1; + continue; + } + + err = rhltable_remove(&rhlt, &rhl_test_objects[i].list_node, test_rht_params); + if (test_bit(i, obj_in_table)) { + clear_bit(i, obj_in_table); + if (WARN(err, "cannot remove element at slot %d", i)) + continue; + } else { + if (WARN(err != -ENOENT, "removed non-existant element %d, error %d not %d", + i, err, -ENOENT)) + continue; + } + + if (prand & 1) { + prand >>= 1; + continue; + } + + err = rhltable_insert(&rhlt, &rhl_test_objects[i].list_node, test_rht_params); + if (err == 0) { + if (WARN(test_and_set_bit(i, obj_in_table), "succeeded to insert same object %d", i)) + continue; + } else { + if (WARN(!test_bit(i, obj_in_table), "failed to insert object %d", i)) + continue; + } + + if (prand & 1) { + prand >>= 1; + continue; + } + + i = prandom_u32_max(entries); + if (test_bit(i, obj_in_table)) { + err = rhltable_remove(&rhlt, &rhl_test_objects[i].list_node, test_rht_params); + WARN(err, "cannot remove element at slot %d", i); + if (err == 0) + clear_bit(i, obj_in_table); + } else { + err = rhltable_insert(&rhlt, &rhl_test_objects[i].list_node, test_rht_params); + WARN(err, "failed to insert object %d", i); + if (err == 0) + set_bit(i, obj_in_table); + } + } + + for (i = 0; i < entries; i++) { + cond_resched(); + err = rhltable_remove(&rhlt, &rhl_test_objects[i].list_node, test_rht_params); + if (test_bit(i, obj_in_table)) { + if (WARN(err, "cannot remove element at slot %d", i)) + continue; + } else { + if (WARN(err != -ENOENT, "removed non-existant element, error %d not %d", + err, -ENOENT)) + continue; + } + } + + rhltable_destroy(&rhlt); +out_free: + vfree(rhl_test_objects); + vfree(obj_in_table); + return ret; +} static int __init test_rhashtable_max(struct test_obj *array, unsigned int entries) @@ -480,11 +666,17 @@ static int __init test_rht_init(void) failed_threads++; } } - pr_info("Started %d threads, %d failed\n", - started_threads, failed_threads); rhashtable_destroy(&ht); vfree(tdata); vfree(objs); + + /* + * rhltable_remove is very expensive, default values can cause test + * to run for 2 minutes or more, use a smaller number instead. + */ + err = test_rhltable(entries / 16); + pr_info("Started %d threads, %d failed, rhltable test returns %d\n", + started_threads, failed_threads, err); return 0; } -- cgit v1.2.3 From bd7d2106b63adfd0dfd08331344e356461c29d70 Mon Sep 17 00:00:00 2001 From: Jim Hanko Date: Tue, 19 Sep 2017 11:33:39 -0700 Subject: team: fall back to hash if table entry is empty If the hash to port mapping table does not have a valid port (i.e. when a port goes down), fall back to the simple hashing mechanism to avoid dropping packets. Signed-off-by: Jim Hanko Acked-by: Jiri Pirko Signed-off-by: David S. Miller --- drivers/net/team/team_mode_loadbalance.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/net/team/team_mode_loadbalance.c b/drivers/net/team/team_mode_loadbalance.c index 1468ddf424cc..a5ef97010eb3 100644 --- a/drivers/net/team/team_mode_loadbalance.c +++ b/drivers/net/team/team_mode_loadbalance.c @@ -137,7 +137,13 @@ static struct team_port *lb_htpm_select_tx_port(struct team *team, struct sk_buff *skb, unsigned char hash) { - return rcu_dereference_bh(LB_HTPM_PORT_BY_HASH(lb_priv, hash)); + struct team_port *port; + + port = rcu_dereference_bh(LB_HTPM_PORT_BY_HASH(lb_priv, hash)); + if (likely(port)) + return port; + /* If no valid port in the table, fall back to simple hash */ + return lb_hash_select_tx_port(team, lb_priv, skb, hash); } struct lb_select_tx_port { -- cgit v1.2.3 From 752fbcc33405d6f8249465e4b2c4e420091bb825 Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Tue, 19 Sep 2017 13:15:42 -0700 Subject: net_sched: no need to free qdisc in RCU callback gen estimator has been rewritten in commit 1c0d32fde5bd ("net_sched: gen_estimator: complete rewrite of rate estimators"), the caller no longer needs to wait for a grace period. So this patch gets rid of it. Cc: Jamal Hadi Salim Cc: Eric Dumazet Signed-off-by: Cong Wang Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/sch_generic.h | 1 - net/sched/sch_generic.c | 10 ++-------- 2 files changed, 2 insertions(+), 9 deletions(-) diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h index 135f5a2dd931..684d8ed27eaa 100644 --- a/include/net/sch_generic.h +++ b/include/net/sch_generic.h @@ -93,7 +93,6 @@ struct Qdisc { unsigned long state; struct Qdisc *next_sched; struct sk_buff *skb_bad_txq; - struct rcu_head rcu_head; int padded; refcount_t refcnt; diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index 92237e75dbbc..1fb0c754b7fd 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c @@ -688,10 +688,8 @@ void qdisc_reset(struct Qdisc *qdisc) } EXPORT_SYMBOL(qdisc_reset); -static void qdisc_rcu_free(struct rcu_head *head) +static void qdisc_free(struct Qdisc *qdisc) { - struct Qdisc *qdisc = container_of(head, struct Qdisc, rcu_head); - if (qdisc_is_percpu_stats(qdisc)) { free_percpu(qdisc->cpu_bstats); free_percpu(qdisc->cpu_qstats); @@ -724,11 +722,7 @@ void qdisc_destroy(struct Qdisc *qdisc) kfree_skb_list(qdisc->gso_skb); kfree_skb(qdisc->skb_bad_txq); - /* - * gen_estimator est_timer() might access qdisc->q.lock, - * wait a RCU grace period before freeing qdisc. - */ - call_rcu(&qdisc->rcu_head, qdisc_rcu_free); + qdisc_free(qdisc); } EXPORT_SYMBOL(qdisc_destroy); -- cgit v1.2.3 From 16dff336b33d87c15d9cbe933cfd275aae2a8251 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 19 Sep 2017 16:27:03 -0700 Subject: kobject: add kobject_uevent_net_broadcast() This removes some #ifdef pollution and will ease follow up patches. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- lib/kobject_uevent.c | 96 +++++++++++++++++++++++++++++----------------------- 1 file changed, 53 insertions(+), 43 deletions(-) diff --git a/lib/kobject_uevent.c b/lib/kobject_uevent.c index e590523ea476..4f48cc3b11d5 100644 --- a/lib/kobject_uevent.c +++ b/lib/kobject_uevent.c @@ -294,6 +294,57 @@ static void cleanup_uevent_env(struct subprocess_info *info) } #endif +static int kobject_uevent_net_broadcast(struct kobject *kobj, + struct kobj_uevent_env *env, + const char *action_string, + const char *devpath) +{ + int retval = 0; +#if defined(CONFIG_NET) + struct uevent_sock *ue_sk; + + /* send netlink message */ + list_for_each_entry(ue_sk, &uevent_sock_list, list) { + struct sock *uevent_sock = ue_sk->sk; + struct sk_buff *skb; + size_t len; + + if (!netlink_has_listeners(uevent_sock, 1)) + continue; + + /* allocate message with the maximum possible size */ + len = strlen(action_string) + strlen(devpath) + 2; + skb = alloc_skb(len + env->buflen, GFP_KERNEL); + if (skb) { + char *scratch; + int i; + + /* add header */ + scratch = skb_put(skb, len); + sprintf(scratch, "%s@%s", action_string, devpath); + + /* copy keys to our continuous event payload buffer */ + for (i = 0; i < env->envp_idx; i++) { + len = strlen(env->envp[i]) + 1; + scratch = skb_put(skb, len); + strcpy(scratch, env->envp[i]); + } + + NETLINK_CB(skb).dst_group = 1; + retval = netlink_broadcast_filtered(uevent_sock, skb, + 0, 1, GFP_KERNEL, + kobj_bcast_filter, + kobj); + /* ENOBUFS should be handled in userspace */ + if (retval == -ENOBUFS || retval == -ESRCH) + retval = 0; + } else + retval = -ENOMEM; + } +#endif + return retval; +} + /** * kobject_uevent_env - send an uevent with environmental data * @@ -316,9 +367,6 @@ int kobject_uevent_env(struct kobject *kobj, enum kobject_action action, const struct kset_uevent_ops *uevent_ops; int i = 0; int retval = 0; -#ifdef CONFIG_NET - struct uevent_sock *ue_sk; -#endif pr_debug("kobject: '%s' (%p): %s\n", kobject_name(kobj), kobj, __func__); @@ -427,46 +475,8 @@ int kobject_uevent_env(struct kobject *kobj, enum kobject_action action, mutex_unlock(&uevent_sock_mutex); goto exit; } - -#if defined(CONFIG_NET) - /* send netlink message */ - list_for_each_entry(ue_sk, &uevent_sock_list, list) { - struct sock *uevent_sock = ue_sk->sk; - struct sk_buff *skb; - size_t len; - - if (!netlink_has_listeners(uevent_sock, 1)) - continue; - - /* allocate message with the maximum possible size */ - len = strlen(action_string) + strlen(devpath) + 2; - skb = alloc_skb(len + env->buflen, GFP_KERNEL); - if (skb) { - char *scratch; - - /* add header */ - scratch = skb_put(skb, len); - sprintf(scratch, "%s@%s", action_string, devpath); - - /* copy keys to our continuous event payload buffer */ - for (i = 0; i < env->envp_idx; i++) { - len = strlen(env->envp[i]) + 1; - scratch = skb_put(skb, len); - strcpy(scratch, env->envp[i]); - } - - NETLINK_CB(skb).dst_group = 1; - retval = netlink_broadcast_filtered(uevent_sock, skb, - 0, 1, GFP_KERNEL, - kobj_bcast_filter, - kobj); - /* ENOBUFS should be handled in userspace */ - if (retval == -ENOBUFS || retval == -ESRCH) - retval = 0; - } else - retval = -ENOMEM; - } -#endif + retval = kobject_uevent_net_broadcast(kobj, env, action_string, + devpath); mutex_unlock(&uevent_sock_mutex); #ifdef CONFIG_UEVENT_HELPER -- cgit v1.2.3 From 4a336a23d619e96aef37d4d054cfadcdd1b581ba Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 19 Sep 2017 16:27:04 -0700 Subject: kobject: copy env blob in one go No need to iterate over strings, just copy in one efficient memcpy() call. Tested: time perf record "(for f in `seq 1 3000` ; do ip netns add tast$f; done)" [ perf record: Woken up 10 times to write data ] [ perf record: Captured and wrote 8.224 MB perf.data (~359301 samples) ] real 0m52.554s # instead of 1m7.492s user 0m0.309s sys 0m51.375s # instead of 1m6.875s 9.88% ip [kernel.kallsyms] [k] netlink_broadcast_filtered 8.86% ip [kernel.kallsyms] [k] string 7.37% ip [kernel.kallsyms] [k] __ip6addrlbl_add 5.68% ip [kernel.kallsyms] [k] netlink_has_listeners 5.52% ip [kernel.kallsyms] [k] memcpy_erms 4.76% ip [kernel.kallsyms] [k] __alloc_skb 4.54% ip [kernel.kallsyms] [k] vsnprintf 3.94% ip [kernel.kallsyms] [k] format_decode 3.80% ip [kernel.kallsyms] [k] kmem_cache_alloc_node_trace 3.71% ip [kernel.kallsyms] [k] kmem_cache_alloc_node 3.66% ip [kernel.kallsyms] [k] kobject_uevent_env 3.38% ip [kernel.kallsyms] [k] strlen 2.65% ip [kernel.kallsyms] [k] _raw_spin_lock_irqsave 2.20% ip [kernel.kallsyms] [k] kfree 2.09% ip [kernel.kallsyms] [k] memset_erms 2.07% ip [kernel.kallsyms] [k] ___cache_free 1.95% ip [kernel.kallsyms] [k] kmem_cache_free 1.91% ip [kernel.kallsyms] [k] _raw_read_lock 1.45% ip [kernel.kallsyms] [k] ksize 1.25% ip [kernel.kallsyms] [k] _raw_spin_unlock_irqrestore 1.00% ip [kernel.kallsyms] [k] widen_string Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- lib/kobject_uevent.c | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/lib/kobject_uevent.c b/lib/kobject_uevent.c index 4f48cc3b11d5..78b2a7e378c0 100644 --- a/lib/kobject_uevent.c +++ b/lib/kobject_uevent.c @@ -317,18 +317,12 @@ static int kobject_uevent_net_broadcast(struct kobject *kobj, skb = alloc_skb(len + env->buflen, GFP_KERNEL); if (skb) { char *scratch; - int i; /* add header */ scratch = skb_put(skb, len); sprintf(scratch, "%s@%s", action_string, devpath); - /* copy keys to our continuous event payload buffer */ - for (i = 0; i < env->envp_idx; i++) { - len = strlen(env->envp[i]) + 1; - scratch = skb_put(skb, len); - strcpy(scratch, env->envp[i]); - } + skb_put_data(skb, env->buf, env->buflen); NETLINK_CB(skb).dst_group = 1; retval = netlink_broadcast_filtered(uevent_sock, skb, -- cgit v1.2.3 From d464e84eed02993d40ad55fdc19f4523e4deee5b Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 19 Sep 2017 16:27:05 -0700 Subject: kobject: factorize skb setup in kobject_uevent_net_broadcast() We can build one skb and let it be cloned in netlink. This is much faster, and use less memory (all clones will share the same skb->head) Tested: time perf record (for f in `seq 1 3000` ; do ip netns add tast$f; done) [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 4.110 MB perf.data (~179584 samples) ] real 0m24.227s # instead of 0m52.554s user 0m0.329s sys 0m23.753s # instead of 0m51.375s 14.77% ip [kernel.kallsyms] [k] __ip6addrlbl_add 14.56% ip [kernel.kallsyms] [k] netlink_broadcast_filtered 11.65% ip [kernel.kallsyms] [k] netlink_has_listeners 6.19% ip [kernel.kallsyms] [k] _raw_spin_lock_irqsave 5.66% ip [kernel.kallsyms] [k] kobject_uevent_env 4.97% ip [kernel.kallsyms] [k] memset_erms 4.67% ip [kernel.kallsyms] [k] refcount_sub_and_test 4.41% ip [kernel.kallsyms] [k] _raw_read_lock 3.59% ip [kernel.kallsyms] [k] refcount_inc_not_zero 3.13% ip [kernel.kallsyms] [k] _raw_spin_unlock_irqrestore 1.55% ip [kernel.kallsyms] [k] __wake_up 1.20% ip [kernel.kallsyms] [k] strlen 1.03% ip [kernel.kallsyms] [k] __wake_up_common 0.93% ip [kernel.kallsyms] [k] consume_skb 0.92% ip [kernel.kallsyms] [k] netlink_trim 0.87% ip [kernel.kallsyms] [k] insert_header 0.63% ip [kernel.kallsyms] [k] unmap_page_range Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- lib/kobject_uevent.c | 34 +++++++++++++++++++--------------- 1 file changed, 19 insertions(+), 15 deletions(-) diff --git a/lib/kobject_uevent.c b/lib/kobject_uevent.c index 78b2a7e378c0..147db91c10d0 100644 --- a/lib/kobject_uevent.c +++ b/lib/kobject_uevent.c @@ -301,23 +301,26 @@ static int kobject_uevent_net_broadcast(struct kobject *kobj, { int retval = 0; #if defined(CONFIG_NET) + struct sk_buff *skb = NULL; struct uevent_sock *ue_sk; /* send netlink message */ list_for_each_entry(ue_sk, &uevent_sock_list, list) { struct sock *uevent_sock = ue_sk->sk; - struct sk_buff *skb; - size_t len; if (!netlink_has_listeners(uevent_sock, 1)) continue; - /* allocate message with the maximum possible size */ - len = strlen(action_string) + strlen(devpath) + 2; - skb = alloc_skb(len + env->buflen, GFP_KERNEL); - if (skb) { + if (!skb) { + /* allocate message with the maximum possible size */ + size_t len = strlen(action_string) + strlen(devpath) + 2; char *scratch; + retval = -ENOMEM; + skb = alloc_skb(len + env->buflen, GFP_KERNEL); + if (!skb) + continue; + /* add header */ scratch = skb_put(skb, len); sprintf(scratch, "%s@%s", action_string, devpath); @@ -325,16 +328,17 @@ static int kobject_uevent_net_broadcast(struct kobject *kobj, skb_put_data(skb, env->buf, env->buflen); NETLINK_CB(skb).dst_group = 1; - retval = netlink_broadcast_filtered(uevent_sock, skb, - 0, 1, GFP_KERNEL, - kobj_bcast_filter, - kobj); - /* ENOBUFS should be handled in userspace */ - if (retval == -ENOBUFS || retval == -ESRCH) - retval = 0; - } else - retval = -ENOMEM; + } + + retval = netlink_broadcast_filtered(uevent_sock, skb_get(skb), + 0, 1, GFP_KERNEL, + kobj_bcast_filter, + kobj); + /* ENOBUFS should be handled in userspace */ + if (retval == -ENOBUFS || retval == -ESRCH) + retval = 0; } + consume_skb(skb); #endif return retval; } -- cgit v1.2.3 From a90c9347e90ed1e9323d71402ed18023bc910cd8 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 19 Sep 2017 16:27:06 -0700 Subject: ipv6: addrlabel: per netns list Having a global list of labels do not scale to thousands of netns in the cloud era. This causes quadratic behavior on netns creation and deletion. This is time having a per netns list of ~10 labels. Tested: $ time perf record (for f in `seq 1 3000` ; do ip netns add tast$f; done) [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 3.637 MB perf.data (~158898 samples) ] real 0m20.837s # instead of 0m24.227s user 0m0.328s sys 0m20.338s # instead of 0m23.753s 16.17% ip [kernel.kallsyms] [k] netlink_broadcast_filtered 12.30% ip [kernel.kallsyms] [k] netlink_has_listeners 6.76% ip [kernel.kallsyms] [k] _raw_spin_lock_irqsave 5.78% ip [kernel.kallsyms] [k] memset_erms 5.77% ip [kernel.kallsyms] [k] kobject_uevent_env 5.18% ip [kernel.kallsyms] [k] refcount_sub_and_test 4.96% ip [kernel.kallsyms] [k] _raw_read_lock 3.82% ip [kernel.kallsyms] [k] refcount_inc_not_zero 3.33% ip [kernel.kallsyms] [k] _raw_spin_unlock_irqrestore 2.11% ip [kernel.kallsyms] [k] unmap_page_range 1.77% ip [kernel.kallsyms] [k] __wake_up 1.69% ip [kernel.kallsyms] [k] strlen 1.17% ip [kernel.kallsyms] [k] __wake_up_common 1.09% ip [kernel.kallsyms] [k] insert_header 1.04% ip [kernel.kallsyms] [k] page_remove_rmap 1.01% ip [kernel.kallsyms] [k] consume_skb 0.98% ip [kernel.kallsyms] [k] netlink_trim 0.51% ip [kernel.kallsyms] [k] kernfs_link_sibling 0.51% ip [kernel.kallsyms] [k] filemap_map_pages 0.46% ip [kernel.kallsyms] [k] memcpy_erms Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/netns/ipv6.h | 5 +++ net/ipv6/addrlabel.c | 81 ++++++++++++++++++------------------------------ 2 files changed, 35 insertions(+), 51 deletions(-) diff --git a/include/net/netns/ipv6.h b/include/net/netns/ipv6.h index 2544f9760a42..2ea1ed341ef8 100644 --- a/include/net/netns/ipv6.h +++ b/include/net/netns/ipv6.h @@ -89,6 +89,11 @@ struct netns_ipv6 { atomic_t fib6_sernum; struct seg6_pernet_data *seg6_data; struct fib_notifier_ops *notifier_ops; + struct { + struct hlist_head head; + spinlock_t lock; + u32 seq; + } ip6addrlbl_table; }; #if IS_ENABLED(CONFIG_NF_DEFRAG_IPV6) diff --git a/net/ipv6/addrlabel.c b/net/ipv6/addrlabel.c index b055bc79f56d..c6311d7108f6 100644 --- a/net/ipv6/addrlabel.c +++ b/net/ipv6/addrlabel.c @@ -30,7 +30,6 @@ * Policy Table */ struct ip6addrlbl_entry { - possible_net_t lbl_net; struct in6_addr prefix; int prefixlen; int ifindex; @@ -41,19 +40,6 @@ struct ip6addrlbl_entry { struct rcu_head rcu; }; -static struct ip6addrlbl_table -{ - struct hlist_head head; - spinlock_t lock; - u32 seq; -} ip6addrlbl_table; - -static inline -struct net *ip6addrlbl_net(const struct ip6addrlbl_entry *lbl) -{ - return read_pnet(&lbl->lbl_net); -} - /* * Default policy table (RFC6724 + extensions) * @@ -148,13 +134,10 @@ static inline void ip6addrlbl_put(struct ip6addrlbl_entry *p) } /* Find label */ -static bool __ip6addrlbl_match(struct net *net, - const struct ip6addrlbl_entry *p, +static bool __ip6addrlbl_match(const struct ip6addrlbl_entry *p, const struct in6_addr *addr, int addrtype, int ifindex) { - if (!net_eq(ip6addrlbl_net(p), net)) - return false; if (p->ifindex && p->ifindex != ifindex) return false; if (p->addrtype && p->addrtype != addrtype) @@ -169,8 +152,9 @@ static struct ip6addrlbl_entry *__ipv6_addr_label(struct net *net, int type, int ifindex) { struct ip6addrlbl_entry *p; - hlist_for_each_entry_rcu(p, &ip6addrlbl_table.head, list) { - if (__ip6addrlbl_match(net, p, addr, type, ifindex)) + + hlist_for_each_entry_rcu(p, &net->ipv6.ip6addrlbl_table.head, list) { + if (__ip6addrlbl_match(p, addr, type, ifindex)) return p; } return NULL; @@ -196,8 +180,7 @@ u32 ipv6_addr_label(struct net *net, } /* allocate one entry */ -static struct ip6addrlbl_entry *ip6addrlbl_alloc(struct net *net, - const struct in6_addr *prefix, +static struct ip6addrlbl_entry *ip6addrlbl_alloc(const struct in6_addr *prefix, int prefixlen, int ifindex, u32 label) { @@ -236,24 +219,23 @@ static struct ip6addrlbl_entry *ip6addrlbl_alloc(struct net *net, newp->addrtype = addrtype; newp->label = label; INIT_HLIST_NODE(&newp->list); - write_pnet(&newp->lbl_net, net); refcount_set(&newp->refcnt, 1); return newp; } /* add a label */ -static int __ip6addrlbl_add(struct ip6addrlbl_entry *newp, int replace) +static int __ip6addrlbl_add(struct net *net, struct ip6addrlbl_entry *newp, + int replace) { - struct hlist_node *n; struct ip6addrlbl_entry *last = NULL, *p = NULL; + struct hlist_node *n; int ret = 0; ADDRLABEL(KERN_DEBUG "%s(newp=%p, replace=%d)\n", __func__, newp, replace); - hlist_for_each_entry_safe(p, n, &ip6addrlbl_table.head, list) { + hlist_for_each_entry_safe(p, n, &net->ipv6.ip6addrlbl_table.head, list) { if (p->prefixlen == newp->prefixlen && - net_eq(ip6addrlbl_net(p), ip6addrlbl_net(newp)) && p->ifindex == newp->ifindex && ipv6_addr_equal(&p->prefix, &newp->prefix)) { if (!replace) { @@ -273,10 +255,10 @@ static int __ip6addrlbl_add(struct ip6addrlbl_entry *newp, int replace) if (last) hlist_add_behind_rcu(&newp->list, &last->list); else - hlist_add_head_rcu(&newp->list, &ip6addrlbl_table.head); + hlist_add_head_rcu(&newp->list, &net->ipv6.ip6addrlbl_table.head); out: if (!ret) - ip6addrlbl_table.seq++; + net->ipv6.ip6addrlbl_table.seq++; return ret; } @@ -292,12 +274,12 @@ static int ip6addrlbl_add(struct net *net, __func__, prefix, prefixlen, ifindex, (unsigned int)label, replace); - newp = ip6addrlbl_alloc(net, prefix, prefixlen, ifindex, label); + newp = ip6addrlbl_alloc(prefix, prefixlen, ifindex, label); if (IS_ERR(newp)) return PTR_ERR(newp); - spin_lock(&ip6addrlbl_table.lock); - ret = __ip6addrlbl_add(newp, replace); - spin_unlock(&ip6addrlbl_table.lock); + spin_lock(&net->ipv6.ip6addrlbl_table.lock); + ret = __ip6addrlbl_add(net, newp, replace); + spin_unlock(&net->ipv6.ip6addrlbl_table.lock); if (ret) ip6addrlbl_free(newp); return ret; @@ -315,9 +297,8 @@ static int __ip6addrlbl_del(struct net *net, ADDRLABEL(KERN_DEBUG "%s(prefix=%pI6, prefixlen=%d, ifindex=%d)\n", __func__, prefix, prefixlen, ifindex); - hlist_for_each_entry_safe(p, n, &ip6addrlbl_table.head, list) { + hlist_for_each_entry_safe(p, n, &net->ipv6.ip6addrlbl_table.head, list) { if (p->prefixlen == prefixlen && - net_eq(ip6addrlbl_net(p), net) && p->ifindex == ifindex && ipv6_addr_equal(&p->prefix, prefix)) { hlist_del_rcu(&p->list); @@ -340,9 +321,9 @@ static int ip6addrlbl_del(struct net *net, __func__, prefix, prefixlen, ifindex); ipv6_addr_prefix(&prefix_buf, prefix, prefixlen); - spin_lock(&ip6addrlbl_table.lock); + spin_lock(&net->ipv6.ip6addrlbl_table.lock); ret = __ip6addrlbl_del(net, &prefix_buf, prefixlen, ifindex); - spin_unlock(&ip6addrlbl_table.lock); + spin_unlock(&net->ipv6.ip6addrlbl_table.lock); return ret; } @@ -354,6 +335,9 @@ static int __net_init ip6addrlbl_net_init(struct net *net) ADDRLABEL(KERN_DEBUG "%s\n", __func__); + spin_lock_init(&net->ipv6.ip6addrlbl_table.lock); + INIT_HLIST_HEAD(&net->ipv6.ip6addrlbl_table.head); + for (i = 0; i < ARRAY_SIZE(ip6addrlbl_init_table); i++) { int ret = ip6addrlbl_add(net, ip6addrlbl_init_table[i].prefix, @@ -373,14 +357,12 @@ static void __net_exit ip6addrlbl_net_exit(struct net *net) struct hlist_node *n; /* Remove all labels belonging to the exiting net */ - spin_lock(&ip6addrlbl_table.lock); - hlist_for_each_entry_safe(p, n, &ip6addrlbl_table.head, list) { - if (net_eq(ip6addrlbl_net(p), net)) { - hlist_del_rcu(&p->list); - ip6addrlbl_put(p); - } + spin_lock(&net->ipv6.ip6addrlbl_table.lock); + hlist_for_each_entry_safe(p, n, &net->ipv6.ip6addrlbl_table.head, list) { + hlist_del_rcu(&p->list); + ip6addrlbl_put(p); } - spin_unlock(&ip6addrlbl_table.lock); + spin_unlock(&net->ipv6.ip6addrlbl_table.lock); } static struct pernet_operations ipv6_addr_label_ops = { @@ -390,8 +372,6 @@ static struct pernet_operations ipv6_addr_label_ops = { int __init ipv6_addr_label_init(void) { - spin_lock_init(&ip6addrlbl_table.lock); - return register_pernet_subsys(&ipv6_addr_label_ops); } @@ -510,11 +490,10 @@ static int ip6addrlbl_dump(struct sk_buff *skb, struct netlink_callback *cb) int err; rcu_read_lock(); - hlist_for_each_entry_rcu(p, &ip6addrlbl_table.head, list) { - if (idx >= s_idx && - net_eq(ip6addrlbl_net(p), net)) { + hlist_for_each_entry_rcu(p, &net->ipv6.ip6addrlbl_table.head, list) { + if (idx >= s_idx) { err = ip6addrlbl_fill(skb, p, - ip6addrlbl_table.seq, + net->ipv6.ip6addrlbl_table.seq, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, RTM_NEWADDRLABEL, @@ -571,7 +550,7 @@ static int ip6addrlbl_get(struct sk_buff *in_skb, struct nlmsghdr *nlh, p = __ipv6_addr_label(net, addr, ipv6_addr_type(addr), ifal->ifal_index); if (p && !ip6addrlbl_hold(p)) p = NULL; - lseq = ip6addrlbl_table.seq; + lseq = net->ipv6.ip6addrlbl_table.seq; rcu_read_unlock(); if (!p) { -- cgit v1.2.3 From 789e6ddb0b2fb5d5024b760b178a47876e4de7a6 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 19 Sep 2017 16:27:07 -0700 Subject: tcp: batch tcp_net_metrics_exit When dealing with a list of dismantling netns, we can scan tcp_metrics once, saving cpu cycles. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/tcp_metrics.c | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/net/ipv4/tcp_metrics.c b/net/ipv4/tcp_metrics.c index 102b2c90bb80..0ab78abc811b 100644 --- a/net/ipv4/tcp_metrics.c +++ b/net/ipv4/tcp_metrics.c @@ -892,10 +892,14 @@ static void tcp_metrics_flush_all(struct net *net) for (row = 0; row < max_rows; row++, hb++) { struct tcp_metrics_block __rcu **pp; + bool match; + spin_lock_bh(&tcp_metrics_lock); pp = &hb->chain; for (tm = deref_locked(*pp); tm; tm = deref_locked(*pp)) { - if (net_eq(tm_net(tm), net)) { + match = net ? net_eq(tm_net(tm), net) : + !atomic_read(&tm_net(tm)->count); + if (match) { *pp = tm->tcpm_next; kfree_rcu(tm, rcu_head); } else { @@ -1018,14 +1022,14 @@ static int __net_init tcp_net_metrics_init(struct net *net) return 0; } -static void __net_exit tcp_net_metrics_exit(struct net *net) +static void __net_exit tcp_net_metrics_exit_batch(struct list_head *net_exit_list) { - tcp_metrics_flush_all(net); + tcp_metrics_flush_all(NULL); } static __net_initdata struct pernet_operations tcp_net_metrics_ops = { - .init = tcp_net_metrics_init, - .exit = tcp_net_metrics_exit, + .init = tcp_net_metrics_init, + .exit_batch = tcp_net_metrics_exit_batch, }; void __init tcp_metrics_init(void) -- cgit v1.2.3 From bb401caefe9d2c65e0c0fa23b21deecfbfa473fe Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 19 Sep 2017 16:27:08 -0700 Subject: ipv6: speedup ipv6 tunnels dismantle Implement exit_batch() method to dismantle more devices per round. (rtnl_lock() ... unregister_netdevice_many() ... rtnl_unlock()) Tested: $ cat add_del_unshare.sh for i in `seq 1 40` do (for j in `seq 1 100` ; do unshare -n /bin/true >/dev/null ; done) & done wait ; grep net_namespace /proc/slabinfo Before patch : $ time ./add_del_unshare.sh net_namespace 110 267 5504 1 2 : tunables 8 4 0 : slabdata 110 267 0 real 3m25.292s user 0m0.644s sys 0m40.153s After patch: $ time ./add_del_unshare.sh net_namespace 126 282 5504 1 2 : tunables 8 4 0 : slabdata 126 282 0 real 1m38.965s user 0m0.688s sys 0m37.017s Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv6/ip6_gre.c | 8 +++++--- net/ipv6/ip6_tunnel.c | 20 +++++++++++--------- net/ipv6/ip6_vti.c | 23 ++++++++++++++--------- net/ipv6/sit.c | 9 ++++++--- 4 files changed, 36 insertions(+), 24 deletions(-) diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c index b7a72d409334..c82d41ef25e2 100644 --- a/net/ipv6/ip6_gre.c +++ b/net/ipv6/ip6_gre.c @@ -1155,19 +1155,21 @@ err_alloc_dev: return err; } -static void __net_exit ip6gre_exit_net(struct net *net) +static void __net_exit ip6gre_exit_batch_net(struct list_head *net_list) { + struct net *net; LIST_HEAD(list); rtnl_lock(); - ip6gre_destroy_tunnels(net, &list); + list_for_each_entry(net, net_list, exit_list) + ip6gre_destroy_tunnels(net, &list); unregister_netdevice_many(&list); rtnl_unlock(); } static struct pernet_operations ip6gre_net_ops = { .init = ip6gre_init_net, - .exit = ip6gre_exit_net, + .exit_batch = ip6gre_exit_batch_net, .id = &ip6gre_net_id, .size = sizeof(struct ip6gre_net), }; diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index ae73164559d5..3d6df489b39f 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c @@ -2167,17 +2167,16 @@ static struct xfrm6_tunnel ip6ip6_handler __read_mostly = { .priority = 1, }; -static void __net_exit ip6_tnl_destroy_tunnels(struct net *net) +static void __net_exit ip6_tnl_destroy_tunnels(struct net *net, struct list_head *list) { struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id); struct net_device *dev, *aux; int h; struct ip6_tnl *t; - LIST_HEAD(list); for_each_netdev_safe(net, dev, aux) if (dev->rtnl_link_ops == &ip6_link_ops) - unregister_netdevice_queue(dev, &list); + unregister_netdevice_queue(dev, list); for (h = 0; h < IP6_TUNNEL_HASH_SIZE; h++) { t = rtnl_dereference(ip6n->tnls_r_l[h]); @@ -2186,12 +2185,10 @@ static void __net_exit ip6_tnl_destroy_tunnels(struct net *net) * been added to the list by the previous loop. */ if (!net_eq(dev_net(t->dev), net)) - unregister_netdevice_queue(t->dev, &list); + unregister_netdevice_queue(t->dev, list); t = rtnl_dereference(t->next); } } - - unregister_netdevice_many(&list); } static int __net_init ip6_tnl_init_net(struct net *net) @@ -2235,16 +2232,21 @@ err_alloc_dev: return err; } -static void __net_exit ip6_tnl_exit_net(struct net *net) +static void __net_exit ip6_tnl_exit_batch_net(struct list_head *net_list) { + struct net *net; + LIST_HEAD(list); + rtnl_lock(); - ip6_tnl_destroy_tunnels(net); + list_for_each_entry(net, net_list, exit_list) + ip6_tnl_destroy_tunnels(net, &list); + unregister_netdevice_many(&list); rtnl_unlock(); } static struct pernet_operations ip6_tnl_net_ops = { .init = ip6_tnl_init_net, - .exit = ip6_tnl_exit_net, + .exit_batch = ip6_tnl_exit_batch_net, .id = &ip6_tnl_net_id, .size = sizeof(struct ip6_tnl_net), }; diff --git a/net/ipv6/ip6_vti.c b/net/ipv6/ip6_vti.c index 79444a4bfd6d..714914d1bb98 100644 --- a/net/ipv6/ip6_vti.c +++ b/net/ipv6/ip6_vti.c @@ -1052,23 +1052,22 @@ static struct rtnl_link_ops vti6_link_ops __read_mostly = { .get_link_net = ip6_tnl_get_link_net, }; -static void __net_exit vti6_destroy_tunnels(struct vti6_net *ip6n) +static void __net_exit vti6_destroy_tunnels(struct vti6_net *ip6n, + struct list_head *list) { int h; struct ip6_tnl *t; - LIST_HEAD(list); for (h = 0; h < IP6_VTI_HASH_SIZE; h++) { t = rtnl_dereference(ip6n->tnls_r_l[h]); while (t) { - unregister_netdevice_queue(t->dev, &list); + unregister_netdevice_queue(t->dev, list); t = rtnl_dereference(t->next); } } t = rtnl_dereference(ip6n->tnls_wc[0]); - unregister_netdevice_queue(t->dev, &list); - unregister_netdevice_many(&list); + unregister_netdevice_queue(t->dev, list); } static int __net_init vti6_init_net(struct net *net) @@ -1108,18 +1107,24 @@ err_alloc_dev: return err; } -static void __net_exit vti6_exit_net(struct net *net) +static void __net_exit vti6_exit_batch_net(struct list_head *net_list) { - struct vti6_net *ip6n = net_generic(net, vti6_net_id); + struct vti6_net *ip6n; + struct net *net; + LIST_HEAD(list); rtnl_lock(); - vti6_destroy_tunnels(ip6n); + list_for_each_entry(net, net_list, exit_list) { + ip6n = net_generic(net, vti6_net_id); + vti6_destroy_tunnels(ip6n, &list); + } + unregister_netdevice_many(&list); rtnl_unlock(); } static struct pernet_operations vti6_net_ops = { .init = vti6_init_net, - .exit = vti6_exit_net, + .exit_batch = vti6_exit_batch_net, .id = &vti6_net_id, .size = sizeof(struct vti6_net), }; diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c index ac912bb21747..a799f5258614 100644 --- a/net/ipv6/sit.c +++ b/net/ipv6/sit.c @@ -1848,19 +1848,22 @@ err_alloc_dev: return err; } -static void __net_exit sit_exit_net(struct net *net) +static void __net_exit sit_exit_batch_net(struct list_head *net_list) { LIST_HEAD(list); + struct net *net; rtnl_lock(); - sit_destroy_tunnels(net, &list); + list_for_each_entry(net, net_list, exit_list) + sit_destroy_tunnels(net, &list); + unregister_netdevice_many(&list); rtnl_unlock(); } static struct pernet_operations sit_net_ops = { .init = sit_init_net, - .exit = sit_exit_net, + .exit_batch = sit_exit_batch_net, .id = &sit_net_id, .size = sizeof(struct sit_net), }; -- cgit v1.2.3 From 64bc17811b72758753e2b64cd8f2a63812c61fe1 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 19 Sep 2017 16:27:09 -0700 Subject: ipv4: speedup ipv6 tunnels dismantle Implement exit_batch() method to dismantle more devices per round. (rtnl_lock() ... unregister_netdevice_many() ... rtnl_unlock()) Tested: $ cat add_del_unshare.sh for i in `seq 1 40` do (for j in `seq 1 100` ; do unshare -n /bin/true >/dev/null ; done) & done wait ; grep net_namespace /proc/slabinfo Before patch : $ time ./add_del_unshare.sh net_namespace 126 282 5504 1 2 : tunables 8 4 0 : slabdata 126 282 0 real 1m38.965s user 0m0.688s sys 0m37.017s After patch: $ time ./add_del_unshare.sh net_namespace 135 291 5504 1 2 : tunables 8 4 0 : slabdata 135 291 0 real 0m22.117s user 0m0.728s sys 0m35.328s Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/ip_tunnels.h | 3 ++- net/ipv4/ip_gre.c | 22 +++++++++------------- net/ipv4/ip_tunnel.c | 12 +++++++++--- net/ipv4/ip_vti.c | 7 +++---- net/ipv4/ipip.c | 7 +++---- 5 files changed, 26 insertions(+), 25 deletions(-) diff --git a/include/net/ip_tunnels.h b/include/net/ip_tunnels.h index 992652856fe8..b41a1e057fce 100644 --- a/include/net/ip_tunnels.h +++ b/include/net/ip_tunnels.h @@ -258,7 +258,8 @@ int ip_tunnel_get_iflink(const struct net_device *dev); int ip_tunnel_init_net(struct net *net, unsigned int ip_tnl_net_id, struct rtnl_link_ops *ops, char *devname); -void ip_tunnel_delete_net(struct ip_tunnel_net *itn, struct rtnl_link_ops *ops); +void ip_tunnel_delete_nets(struct list_head *list_net, unsigned int id, + struct rtnl_link_ops *ops); void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev, const struct iphdr *tnl_params, const u8 protocol); diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index 0162fb955b33..9cee986ac6b8 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c @@ -1013,15 +1013,14 @@ static int __net_init ipgre_init_net(struct net *net) return ip_tunnel_init_net(net, ipgre_net_id, &ipgre_link_ops, NULL); } -static void __net_exit ipgre_exit_net(struct net *net) +static void __net_exit ipgre_exit_batch_net(struct list_head *list_net) { - struct ip_tunnel_net *itn = net_generic(net, ipgre_net_id); - ip_tunnel_delete_net(itn, &ipgre_link_ops); + ip_tunnel_delete_nets(list_net, ipgre_net_id, &ipgre_link_ops); } static struct pernet_operations ipgre_net_ops = { .init = ipgre_init_net, - .exit = ipgre_exit_net, + .exit_batch = ipgre_exit_batch_net, .id = &ipgre_net_id, .size = sizeof(struct ip_tunnel_net), }; @@ -1540,15 +1539,14 @@ static int __net_init ipgre_tap_init_net(struct net *net) return ip_tunnel_init_net(net, gre_tap_net_id, &ipgre_tap_ops, "gretap0"); } -static void __net_exit ipgre_tap_exit_net(struct net *net) +static void __net_exit ipgre_tap_exit_batch_net(struct list_head *list_net) { - struct ip_tunnel_net *itn = net_generic(net, gre_tap_net_id); - ip_tunnel_delete_net(itn, &ipgre_tap_ops); + ip_tunnel_delete_nets(list_net, gre_tap_net_id, &ipgre_tap_ops); } static struct pernet_operations ipgre_tap_net_ops = { .init = ipgre_tap_init_net, - .exit = ipgre_tap_exit_net, + .exit_batch = ipgre_tap_exit_batch_net, .id = &gre_tap_net_id, .size = sizeof(struct ip_tunnel_net), }; @@ -1559,16 +1557,14 @@ static int __net_init erspan_init_net(struct net *net) &erspan_link_ops, "erspan0"); } -static void __net_exit erspan_exit_net(struct net *net) +static void __net_exit erspan_exit_batch_net(struct list_head *net_list) { - struct ip_tunnel_net *itn = net_generic(net, erspan_net_id); - - ip_tunnel_delete_net(itn, &erspan_link_ops); + ip_tunnel_delete_nets(net_list, erspan_net_id, &erspan_link_ops); } static struct pernet_operations erspan_net_ops = { .init = erspan_init_net, - .exit = erspan_exit_net, + .exit_batch = erspan_exit_batch_net, .id = &erspan_net_id, .size = sizeof(struct ip_tunnel_net), }; diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c index e9805ad664ac..fe6fee728ce4 100644 --- a/net/ipv4/ip_tunnel.c +++ b/net/ipv4/ip_tunnel.c @@ -1061,16 +1061,22 @@ static void ip_tunnel_destroy(struct ip_tunnel_net *itn, struct list_head *head, } } -void ip_tunnel_delete_net(struct ip_tunnel_net *itn, struct rtnl_link_ops *ops) +void ip_tunnel_delete_nets(struct list_head *net_list, unsigned int id, + struct rtnl_link_ops *ops) { + struct ip_tunnel_net *itn; + struct net *net; LIST_HEAD(list); rtnl_lock(); - ip_tunnel_destroy(itn, &list, ops); + list_for_each_entry(net, net_list, exit_list) { + itn = net_generic(net, id); + ip_tunnel_destroy(itn, &list, ops); + } unregister_netdevice_many(&list); rtnl_unlock(); } -EXPORT_SYMBOL_GPL(ip_tunnel_delete_net); +EXPORT_SYMBOL_GPL(ip_tunnel_delete_nets); int ip_tunnel_newlink(struct net_device *dev, struct nlattr *tb[], struct ip_tunnel_parm *p, __u32 fwmark) diff --git a/net/ipv4/ip_vti.c b/net/ipv4/ip_vti.c index 5ed63d250950..02d70ca99db1 100644 --- a/net/ipv4/ip_vti.c +++ b/net/ipv4/ip_vti.c @@ -452,15 +452,14 @@ static int __net_init vti_init_net(struct net *net) return 0; } -static void __net_exit vti_exit_net(struct net *net) +static void __net_exit vti_exit_batch_net(struct list_head *list_net) { - struct ip_tunnel_net *itn = net_generic(net, vti_net_id); - ip_tunnel_delete_net(itn, &vti_link_ops); + ip_tunnel_delete_nets(list_net, vti_net_id, &vti_link_ops); } static struct pernet_operations vti_net_ops = { .init = vti_init_net, - .exit = vti_exit_net, + .exit_batch = vti_exit_batch_net, .id = &vti_net_id, .size = sizeof(struct ip_tunnel_net), }; diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c index fb1ad22b5e29..1e47818e38c7 100644 --- a/net/ipv4/ipip.c +++ b/net/ipv4/ipip.c @@ -634,15 +634,14 @@ static int __net_init ipip_init_net(struct net *net) return ip_tunnel_init_net(net, ipip_net_id, &ipip_link_ops, "tunl0"); } -static void __net_exit ipip_exit_net(struct net *net) +static void __net_exit ipip_exit_batch_net(struct list_head *list_net) { - struct ip_tunnel_net *itn = net_generic(net, ipip_net_id); - ip_tunnel_delete_net(itn, &ipip_link_ops); + ip_tunnel_delete_nets(list_net, ipip_net_id, &ipip_link_ops); } static struct pernet_operations ipip_net_ops = { .init = ipip_init_net, - .exit = ipip_exit_net, + .exit_batch = ipip_exit_batch_net, .id = &ipip_net_id, .size = sizeof(struct ip_tunnel_net), }; -- cgit v1.2.3 From 91f4aa977947f046dc144fa9e3b06f0ffb53be79 Mon Sep 17 00:00:00 2001 From: Diogenes Pereira Date: Wed, 9 Aug 2017 13:19:24 -0300 Subject: mac802154: replace hardcoded value with macro Use IEEE802154_SCF_SECLEVEL_NONE macro defined at ieee802154.h file. Signed-off-by: Diogenes Pereira Signed-off-by: Stefan Schmidt --- net/mac802154/llsec.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/mac802154/llsec.c b/net/mac802154/llsec.c index 1e1c9b20bab7..edec2f9919d0 100644 --- a/net/mac802154/llsec.c +++ b/net/mac802154/llsec.c @@ -713,7 +713,8 @@ int mac802154_llsec_encrypt(struct mac802154_llsec *sec, struct sk_buff *skb) if (hlen < 0 || hdr.fc.type != IEEE802154_FC_TYPE_DATA) return -EINVAL; - if (!hdr.fc.security_enabled || hdr.sec.level == 0) { + if (!hdr.fc.security_enabled || + (hdr.sec.level == IEEE802154_SCF_SECLEVEL_NONE)) { skb_push(skb, hlen); return 0; } -- cgit v1.2.3 From 3e4962667efb0f6c09fa3111e6ee53838118b227 Mon Sep 17 00:00:00 2001 From: Diogenes Pereira Date: Tue, 5 Sep 2017 09:18:04 -0300 Subject: mac802154: Fix MAC header and payload encrypted According to 802.15.4-2003/2006/2015 specifications the MAC frame is composed of MHR, MAC payload and MFR and just the outgoing MAC payload must be encrypted. If communication is secure,sender build Auxiliary Security Header(ASH), insert it next to the standard MHR header with security enabled bit ON, and secure frames before transmitting them. According to the information carried within the ASH, recipient retrieves the right cryptographic key and correctly un-secure MAC frames. The error scenario occurs on Linux using IEEE802154_SCF_SECLEVEL_ENC(4) security level when llsec_do_encrypt_unauth() function builds theses MAC frames incorrectly. On recipients these MAC frames are discarded,logging "got invalid frame" messages. Signed-off-by: Diogenes Pereira Signed-off-by: Stefan Schmidt --- net/mac802154/llsec.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/net/mac802154/llsec.c b/net/mac802154/llsec.c index edec2f9919d0..2fb703d70803 100644 --- a/net/mac802154/llsec.c +++ b/net/mac802154/llsec.c @@ -623,13 +623,18 @@ llsec_do_encrypt_unauth(struct sk_buff *skb, const struct mac802154_llsec *sec, u8 iv[16]; struct scatterlist src; SKCIPHER_REQUEST_ON_STACK(req, key->tfm0); - int err; + int err, datalen; + unsigned char *data; llsec_geniv(iv, sec->params.hwaddr, &hdr->sec); - sg_init_one(&src, skb->data, skb->len); + /* Compute data payload offset and data length */ + data = skb_mac_header(skb) + skb->mac_len; + datalen = skb_tail_pointer(skb) - data; + sg_init_one(&src, data, datalen); + skcipher_request_set_tfm(req, key->tfm0); skcipher_request_set_callback(req, 0, NULL, NULL); - skcipher_request_set_crypt(req, &src, &src, skb->len, iv); + skcipher_request_set_crypt(req, &src, &src, datalen, iv); err = crypto_skcipher_encrypt(req); skcipher_request_zero(req); return err; -- cgit v1.2.3 From d5dd29e4dafef4baad7bf529ad73cafeb13e1aa8 Mon Sep 17 00:00:00 2001 From: Josef Filzmaier Date: Thu, 14 Sep 2017 14:32:10 +0200 Subject: ieee802154: atusb: Driver for Busware HUL dongle Busware manufactured an USB dongle that is quite similar to the atben and rzusb USB dongles. that are already supported. This patch aims to support the Busware HUL dongle (called hulusb) alongside atusb and rzusb. hulusb is using the at86rf212 transceiver which is specifically designed to support the 700/800/900 MHz wave band. The source code is heavily inspired by the existing atusb and at86rf2xx drivers. Signed-off-by: Josef Filzmaier Signed-off-by: Stefan Schmidt --- drivers/net/ieee802154/atusb.c | 317 +++++++++++++++++++++++++++++++++++------ drivers/net/ieee802154/atusb.h | 8 ++ 2 files changed, 285 insertions(+), 40 deletions(-) diff --git a/drivers/net/ieee802154/atusb.c b/drivers/net/ieee802154/atusb.c index ef688518ad77..115fa3f37a86 100644 --- a/drivers/net/ieee802154/atusb.c +++ b/drivers/net/ieee802154/atusb.c @@ -21,6 +21,9 @@ * * USB initialization is * Copyright (c) 2013 Alexander Aring + * + * Busware HUL support is + * Copyright (c) 2017 Josef Filzmaier */ #include @@ -42,9 +45,12 @@ #define ATUSB_ALLOC_DELAY_MS 100 /* delay after failed allocation */ #define ATUSB_TX_TIMEOUT_MS 200 /* on the air timeout */ +struct atusb_chip_data; + struct atusb { struct ieee802154_hw *hw; struct usb_device *usb_dev; + struct atusb_chip_data *data; int shutdown; /* non-zero if shutting down */ int err; /* set by first error */ @@ -65,6 +71,14 @@ struct atusb { unsigned char fw_hw_type; /* Firmware hardware type */ }; +struct atusb_chip_data { + u16 t_channel_switch; + int rssi_base_val; + + int (*set_channel)(struct ieee802154_hw*, u8, u8); + int (*set_txpower)(struct ieee802154_hw*, s32); +}; + /* ----- USB commands without data ----------------------------------------- */ /* To reduce the number of error checks in the code, we record the first error @@ -163,6 +177,18 @@ static int atusb_write_subreg(struct atusb *atusb, uint8_t reg, uint8_t mask, return ret; } +static int atusb_read_subreg(struct atusb *lp, + unsigned int addr, unsigned int mask, + unsigned int shift) +{ + int rc; + + rc = atusb_read_reg(lp, addr); + rc = (rc & mask) >> shift; + + return rc; +} + static int atusb_get_and_clear_error(struct atusb *atusb) { int err = atusb->err; @@ -379,18 +405,6 @@ static int atusb_xmit(struct ieee802154_hw *hw, struct sk_buff *skb) return ret; } -static int atusb_channel(struct ieee802154_hw *hw, u8 page, u8 channel) -{ - struct atusb *atusb = hw->priv; - int ret; - - ret = atusb_write_subreg(atusb, SR_CHANNEL, channel); - if (ret < 0) - return ret; - msleep(1); /* @@@ ugly synchronization */ - return 0; -} - static int atusb_ed(struct ieee802154_hw *hw, u8 *level) { BUG_ON(!level); @@ -474,6 +488,17 @@ static const s32 atusb_powers[ATUSB_MAX_TX_POWERS + 1] = { -900, -1200, -1700, }; +static int +atusb_txpower(struct ieee802154_hw *hw, s32 mbm) +{ + struct atusb *atusb = hw->priv; + + if (atusb->data) + return atusb->data->set_txpower(hw, mbm); + else + return -ENOTSUPP; +} + static int atusb_set_txpower(struct ieee802154_hw *hw, s32 mbm) { @@ -488,12 +513,43 @@ atusb_set_txpower(struct ieee802154_hw *hw, s32 mbm) return -EINVAL; } +static int +hulusb_set_txpower(struct ieee802154_hw *hw, s32 mbm) +{ + u32 i; + + for (i = 0; i < hw->phy->supported.tx_powers_size; i++) { + if (hw->phy->supported.tx_powers[i] == mbm) + return atusb_write_subreg(hw->priv, SR_TX_PWR_212, i); + } + + return -EINVAL; +} + #define ATUSB_MAX_ED_LEVELS 0xF static const s32 atusb_ed_levels[ATUSB_MAX_ED_LEVELS + 1] = { -9100, -8900, -8700, -8500, -8300, -8100, -7900, -7700, -7500, -7300, -7100, -6900, -6700, -6500, -6300, -6100, }; +#define AT86RF212_MAX_TX_POWERS 0x1F +static const s32 at86rf212_powers[AT86RF212_MAX_TX_POWERS + 1] = { + 500, 400, 300, 200, 100, 0, -100, -200, -300, -400, -500, -600, -700, + -800, -900, -1000, -1100, -1200, -1300, -1400, -1500, -1600, -1700, + -1800, -1900, -2000, -2100, -2200, -2300, -2400, -2500, -2600, +}; + +#define AT86RF2XX_MAX_ED_LEVELS 0xF +static const s32 at86rf212_ed_levels_100[AT86RF2XX_MAX_ED_LEVELS + 1] = { + -10000, -9800, -9600, -9400, -9200, -9000, -8800, -8600, -8400, -8200, + -8000, -7800, -7600, -7400, -7200, -7000, +}; + +static const s32 at86rf212_ed_levels_98[AT86RF2XX_MAX_ED_LEVELS + 1] = { + -9800, -9600, -9400, -9200, -9000, -8800, -8600, -8400, -8200, -8000, + -7800, -7600, -7400, -7200, -7000, -6800, +}; + static int atusb_set_cca_mode(struct ieee802154_hw *hw, const struct wpan_phy_cca *cca) { @@ -527,6 +583,30 @@ atusb_set_cca_mode(struct ieee802154_hw *hw, const struct wpan_phy_cca *cca) return atusb_write_subreg(atusb, SR_CCA_MODE, val); } +static int hulusb_set_cca_ed_level(struct atusb *lp, int rssi_base_val) +{ + unsigned int cca_ed_thres; + + cca_ed_thres = atusb_read_subreg(lp, SR_CCA_ED_THRES); + + switch (rssi_base_val) { + case -98: + lp->hw->phy->supported.cca_ed_levels = at86rf212_ed_levels_98; + lp->hw->phy->supported.cca_ed_levels_size = ARRAY_SIZE(at86rf212_ed_levels_98); + lp->hw->phy->cca_ed_level = at86rf212_ed_levels_98[cca_ed_thres]; + break; + case -100: + lp->hw->phy->supported.cca_ed_levels = at86rf212_ed_levels_100; + lp->hw->phy->supported.cca_ed_levels_size = ARRAY_SIZE(at86rf212_ed_levels_100); + lp->hw->phy->cca_ed_level = at86rf212_ed_levels_100[cca_ed_thres]; + break; + default: + WARN_ON(1); + } + + return 0; +} + static int atusb_set_cca_ed_level(struct ieee802154_hw *hw, s32 mbm) { @@ -541,6 +621,92 @@ atusb_set_cca_ed_level(struct ieee802154_hw *hw, s32 mbm) return -EINVAL; } +static int atusb_channel(struct ieee802154_hw *hw, u8 page, u8 channel) +{ + struct atusb *atusb = hw->priv; + int ret = -ENOTSUPP; + + if (atusb->data) { + ret = atusb->data->set_channel(hw, page, channel); + /* @@@ ugly synchronization */ + msleep(atusb->data->t_channel_switch); + } + + return ret; +} + +static int atusb_set_channel(struct ieee802154_hw *hw, u8 page, u8 channel) +{ + struct atusb *atusb = hw->priv; + int ret; + + ret = atusb_write_subreg(atusb, SR_CHANNEL, channel); + if (ret < 0) + return ret; + return 0; +} + +static int hulusb_set_channel(struct ieee802154_hw *hw, u8 page, u8 channel) +{ + int rc; + int rssi_base_val; + + struct atusb *lp = hw->priv; + + if (channel == 0) + rc = atusb_write_subreg(lp, SR_SUB_MODE, 0); + else + rc = atusb_write_subreg(lp, SR_SUB_MODE, 1); + if (rc < 0) + return rc; + + if (page == 0) { + rc = atusb_write_subreg(lp, SR_BPSK_QPSK, 0); + rssi_base_val = -100; + } else { + rc = atusb_write_subreg(lp, SR_BPSK_QPSK, 1); + rssi_base_val = -98; + } + if (rc < 0) + return rc; + + rc = hulusb_set_cca_ed_level(lp, rssi_base_val); + if (rc < 0) + return rc; + + /* This sets the symbol_duration according frequency on the 212. + * TODO move this handling while set channel and page in cfg802154. + * We can do that, this timings are according 802.15.4 standard. + * If we do that in cfg802154, this is a more generic calculation. + * + * This should also protected from ifs_timer. Means cancel timer and + * init with a new value. For now, this is okay. + */ + if (channel == 0) { + if (page == 0) { + /* SUB:0 and BPSK:0 -> BPSK-20 */ + lp->hw->phy->symbol_duration = 50; + } else { + /* SUB:1 and BPSK:0 -> BPSK-40 */ + lp->hw->phy->symbol_duration = 25; + } + } else { + if (page == 0) + /* SUB:0 and BPSK:1 -> OQPSK-100/200/400 */ + lp->hw->phy->symbol_duration = 40; + else + /* SUB:1 and BPSK:1 -> OQPSK-250/500/1000 */ + lp->hw->phy->symbol_duration = 16; + } + + lp->hw->phy->lifs_period = IEEE802154_LIFS_PERIOD * + lp->hw->phy->symbol_duration; + lp->hw->phy->sifs_period = IEEE802154_SIFS_PERIOD * + lp->hw->phy->symbol_duration; + + return atusb_write_subreg(lp, SR_CHANNEL, channel); +} + static int atusb_set_csma_params(struct ieee802154_hw *hw, u8 min_be, u8 max_be, u8 retries) { @@ -558,6 +724,14 @@ atusb_set_csma_params(struct ieee802154_hw *hw, u8 min_be, u8 max_be, u8 retries return atusb_write_subreg(atusb, SR_MAX_CSMA_RETRIES, retries); } +static int +hulusb_set_lbt(struct ieee802154_hw *hw, bool on) +{ + struct atusb *atusb = hw->priv; + + return atusb_write_subreg(atusb, SR_CSMA_LBT_MODE, on); +} + static int atusb_set_frame_retries(struct ieee802154_hw *hw, s8 retries) { @@ -593,6 +767,20 @@ atusb_set_promiscuous_mode(struct ieee802154_hw *hw, const bool on) return 0; } +struct atusb_chip_data atusb_chip_data = { + .t_channel_switch = 1, + .rssi_base_val = -91, + .set_txpower = atusb_set_txpower, + .set_channel = atusb_set_channel, +}; + +struct atusb_chip_data hulusb_chip_data = { + .t_channel_switch = 11, + .rssi_base_val = -100, + .set_txpower = hulusb_set_txpower, + .set_channel = hulusb_set_channel, +}; + static const struct ieee802154_ops atusb_ops = { .owner = THIS_MODULE, .xmit_async = atusb_xmit, @@ -601,7 +789,8 @@ static const struct ieee802154_ops atusb_ops = { .start = atusb_start, .stop = atusb_stop, .set_hw_addr_filt = atusb_set_hw_addr_filt, - .set_txpower = atusb_set_txpower, + .set_txpower = atusb_txpower, + .set_lbt = hulusb_set_lbt, .set_cca_mode = atusb_set_cca_mode, .set_cca_ed_level = atusb_set_cca_ed_level, .set_csma_params = atusb_set_csma_params, @@ -614,6 +803,7 @@ static const struct ieee802154_ops atusb_ops = { static int atusb_get_and_show_revision(struct atusb *atusb) { struct usb_device *usb_dev = atusb->usb_dev; + char *hw_name; unsigned char *buffer; int ret; @@ -630,9 +820,31 @@ static int atusb_get_and_show_revision(struct atusb *atusb) atusb->fw_ver_min = buffer[1]; atusb->fw_hw_type = buffer[2]; + switch (atusb->fw_hw_type) { + case ATUSB_HW_TYPE_100813: + case ATUSB_HW_TYPE_101216: + case ATUSB_HW_TYPE_110131: + hw_name = "ATUSB"; + atusb->data = &atusb_chip_data; + break; + case ATUSB_HW_TYPE_RZUSB: + hw_name = "RZUSB"; + atusb->data = &atusb_chip_data; + break; + case ATUSB_HW_TYPE_HULUSB: + hw_name = "HULUSB"; + atusb->data = &hulusb_chip_data; + break; + default: + hw_name = "UNKNOWN"; + atusb->err = -ENOTSUPP; + ret = -ENOTSUPP; + break; + } + dev_info(&usb_dev->dev, - "Firmware: major: %u, minor: %u, hardware type: %u\n", - atusb->fw_ver_maj, atusb->fw_ver_min, atusb->fw_hw_type); + "Firmware: major: %u, minor: %u, hardware type: %s (%d)\n", + atusb->fw_ver_maj, atusb->fw_ver_min, hw_name, atusb->fw_hw_type); } if (atusb->fw_ver_maj == 0 && atusb->fw_ver_min < 2) { dev_info(&usb_dev->dev, @@ -667,11 +879,12 @@ static int atusb_get_and_show_build(struct atusb *atusb) return ret; } -static int atusb_get_and_show_chip(struct atusb *atusb) +static int atusb_get_and_conf_chip(struct atusb *atusb) { struct usb_device *usb_dev = atusb->usb_dev; uint8_t man_id_0, man_id_1, part_num, version_num; const char *chip; + struct ieee802154_hw *hw = atusb->hw; man_id_0 = atusb_read_reg(atusb, RG_MAN_ID_0); man_id_1 = atusb_read_reg(atusb, RG_MAN_ID_1); @@ -681,6 +894,22 @@ static int atusb_get_and_show_chip(struct atusb *atusb) if (atusb->err) return atusb->err; + hw->flags = IEEE802154_HW_TX_OMIT_CKSUM | IEEE802154_HW_AFILT | + IEEE802154_HW_PROMISCUOUS | IEEE802154_HW_CSMA_PARAMS; + + hw->phy->flags = WPAN_PHY_FLAG_TXPOWER | WPAN_PHY_FLAG_CCA_ED_LEVEL | + WPAN_PHY_FLAG_CCA_MODE; + + hw->phy->supported.cca_modes = BIT(NL802154_CCA_ENERGY) | + BIT(NL802154_CCA_CARRIER) | + BIT(NL802154_CCA_ENERGY_CARRIER); + hw->phy->supported.cca_opts = BIT(NL802154_CCA_OPT_ENERGY_CARRIER_AND) | + BIT(NL802154_CCA_OPT_ENERGY_CARRIER_OR); + + hw->phy->cca.mode = NL802154_CCA_ENERGY; + + hw->phy->current_page = 0; + if ((man_id_1 << 8 | man_id_0) != ATUSB_JEDEC_ATMEL) { dev_err(&usb_dev->dev, "non-Atmel transceiver xxxx%02x%02x\n", @@ -691,9 +920,36 @@ static int atusb_get_and_show_chip(struct atusb *atusb) switch (part_num) { case 2: chip = "AT86RF230"; + atusb->hw->phy->supported.channels[0] = 0x7FFF800; + atusb->hw->phy->current_channel = 11; /* reset default */ + atusb->hw->phy->symbol_duration = 16; + atusb->hw->phy->supported.tx_powers = atusb_powers; + atusb->hw->phy->supported.tx_powers_size = ARRAY_SIZE(atusb_powers); + hw->phy->supported.cca_ed_levels = atusb_ed_levels; + hw->phy->supported.cca_ed_levels_size = ARRAY_SIZE(atusb_ed_levels); break; case 3: chip = "AT86RF231"; + atusb->hw->phy->supported.channels[0] = 0x7FFF800; + atusb->hw->phy->current_channel = 11; /* reset default */ + atusb->hw->phy->symbol_duration = 16; + atusb->hw->phy->supported.tx_powers = atusb_powers; + atusb->hw->phy->supported.tx_powers_size = ARRAY_SIZE(atusb_powers); + hw->phy->supported.cca_ed_levels = atusb_ed_levels; + hw->phy->supported.cca_ed_levels_size = ARRAY_SIZE(atusb_ed_levels); + break; + case 7: + chip = "AT86RF212"; + atusb->hw->flags |= IEEE802154_HW_LBT; + atusb->hw->phy->supported.channels[0] = 0x00007FF; + atusb->hw->phy->supported.channels[2] = 0x00007FF; + atusb->hw->phy->current_channel = 5; + atusb->hw->phy->symbol_duration = 25; + atusb->hw->phy->supported.lbt = NL802154_SUPPORTED_BOOL_BOTH; + atusb->hw->phy->supported.tx_powers = at86rf212_powers; + atusb->hw->phy->supported.tx_powers_size = ARRAY_SIZE(at86rf212_powers); + atusb->hw->phy->supported.cca_ed_levels = at86rf212_ed_levels_100; + atusb->hw->phy->supported.cca_ed_levels_size = ARRAY_SIZE(at86rf212_ed_levels_100); break; default: dev_err(&usb_dev->dev, @@ -702,6 +958,9 @@ static int atusb_get_and_show_chip(struct atusb *atusb) goto fail; } + hw->phy->transmit_power = hw->phy->supported.tx_powers[0]; + hw->phy->cca_ed_level = hw->phy->supported.cca_ed_levels[7]; + dev_info(&usb_dev->dev, "ATUSB: %s version %d\n", chip, version_num); return 0; @@ -794,32 +1053,9 @@ static int atusb_probe(struct usb_interface *interface, goto fail; hw->parent = &usb_dev->dev; - hw->flags = IEEE802154_HW_TX_OMIT_CKSUM | IEEE802154_HW_AFILT | - IEEE802154_HW_PROMISCUOUS | IEEE802154_HW_CSMA_PARAMS; - - hw->phy->flags = WPAN_PHY_FLAG_TXPOWER | WPAN_PHY_FLAG_CCA_ED_LEVEL | - WPAN_PHY_FLAG_CCA_MODE; - - hw->phy->supported.cca_modes = BIT(NL802154_CCA_ENERGY) | - BIT(NL802154_CCA_CARRIER) | BIT(NL802154_CCA_ENERGY_CARRIER); - hw->phy->supported.cca_opts = BIT(NL802154_CCA_OPT_ENERGY_CARRIER_AND) | - BIT(NL802154_CCA_OPT_ENERGY_CARRIER_OR); - - hw->phy->supported.cca_ed_levels = atusb_ed_levels; - hw->phy->supported.cca_ed_levels_size = ARRAY_SIZE(atusb_ed_levels); - - hw->phy->cca.mode = NL802154_CCA_ENERGY; - - hw->phy->current_page = 0; - hw->phy->current_channel = 11; /* reset default */ - hw->phy->supported.channels[0] = 0x7FFF800; - hw->phy->supported.tx_powers = atusb_powers; - hw->phy->supported.tx_powers_size = ARRAY_SIZE(atusb_powers); - hw->phy->transmit_power = hw->phy->supported.tx_powers[0]; - hw->phy->cca_ed_level = hw->phy->supported.cca_ed_levels[7]; atusb_command(atusb, ATUSB_RF_RESET, 0); - atusb_get_and_show_chip(atusb); + atusb_get_and_conf_chip(atusb); atusb_get_and_show_revision(atusb); atusb_get_and_show_build(atusb); atusb_set_extended_addr(atusb); @@ -941,5 +1177,6 @@ MODULE_AUTHOR("Alexander Aring "); MODULE_AUTHOR("Richard Sharpe "); MODULE_AUTHOR("Stefan Schmidt "); MODULE_AUTHOR("Werner Almesberger "); +MODULE_AUTHOR("Josef Filzmaier "); MODULE_DESCRIPTION("ATUSB IEEE 802.15.4 Driver"); MODULE_LICENSE("GPL"); diff --git a/drivers/net/ieee802154/atusb.h b/drivers/net/ieee802154/atusb.h index b22bbaa77590..555d14bf14a3 100644 --- a/drivers/net/ieee802154/atusb.h +++ b/drivers/net/ieee802154/atusb.h @@ -50,6 +50,14 @@ enum atusb_requests { ATUSB_EUI64_READ, }; +enum { + ATUSB_HW_TYPE_100813, /* 2010-08-13 */ + ATUSB_HW_TYPE_101216, /* 2010-12-16 */ + ATUSB_HW_TYPE_110131, /* 2011-01-31, ATmega32U2-based */ + ATUSB_HW_TYPE_RZUSB, /* Atmel Raven USB dongle with at86rf230 */ + ATUSB_HW_TYPE_HULUSB, /* Busware HUL USB dongle with at86rf212 */ +}; + /* * Direction bRequest wValue wIndex wLength * -- cgit v1.2.3 From 421eedff1180ffa8f1780932d0f2561d67a6b44f Mon Sep 17 00:00:00 2001 From: Prameela Rani Garnepudi Date: Wed, 30 Aug 2017 15:08:21 +0530 Subject: rsi: add p2p support parameters to mac80211 This patch adds p2p supported parameters to mac80211 hw and wiphy structures during mac80211 registration. Signed-off-by: Prameela Rani Garnepudi Signed-off-by: Amitkumar Karwar Signed-off-by: Kalle Valo --- drivers/net/wireless/rsi/rsi_91x_mac80211.c | 37 ++++++++++++++++++++++++++++- 1 file changed, 36 insertions(+), 1 deletion(-) diff --git a/drivers/net/wireless/rsi/rsi_91x_mac80211.c b/drivers/net/wireless/rsi/rsi_91x_mac80211.c index fa12c05d9e23..992ac6cf9a34 100644 --- a/drivers/net/wireless/rsi/rsi_91x_mac80211.c +++ b/drivers/net/wireless/rsi/rsi_91x_mac80211.c @@ -139,6 +139,32 @@ static const u32 rsi_max_ap_stas[16] = { 4, /* 14 - AP + BT Dual */ }; +static const struct ieee80211_iface_limit rsi_iface_limits[] = { + { + .max = 1, + .types = BIT(NL80211_IFTYPE_STATION), + }, + { + .max = 1, + .types = BIT(NL80211_IFTYPE_AP) | + BIT(NL80211_IFTYPE_P2P_CLIENT) | + BIT(NL80211_IFTYPE_P2P_GO), + }, + { + .max = 1, + .types = BIT(NL80211_IFTYPE_P2P_DEVICE), + }, +}; + +static const struct ieee80211_iface_combination rsi_iface_combinations[] = { + { + .num_different_channels = 1, + .max_interfaces = 3, + .limits = rsi_iface_limits, + .n_limits = ARRAY_SIZE(rsi_iface_limits), + }, +}; + /** * rsi_is_cipher_wep() - This function determines if the cipher is WEP or not. * @common: Pointer to the driver private structure. @@ -1581,7 +1607,11 @@ int rsi_mac80211_attach(struct rsi_common *common) ether_addr_copy(hw->wiphy->addr_mask, addr_mask); wiphy->interface_modes = BIT(NL80211_IFTYPE_STATION) | - BIT(NL80211_IFTYPE_AP); + BIT(NL80211_IFTYPE_AP) | + BIT(NL80211_IFTYPE_P2P_DEVICE) | + BIT(NL80211_IFTYPE_P2P_CLIENT) | + BIT(NL80211_IFTYPE_P2P_GO); + wiphy->signal_type = CFG80211_SIGNAL_TYPE_MBM; wiphy->retry_short = RETRY_SHORT; wiphy->retry_long = RETRY_LONG; @@ -1608,6 +1638,11 @@ int rsi_mac80211_attach(struct rsi_common *common) wiphy_ext_feature_set(wiphy, NL80211_EXT_FEATURE_CQM_RSSI_LIST); + /* Wi-Fi direct parameters */ + hw->max_listen_interval = 10; + wiphy->iface_combinations = rsi_iface_combinations; + wiphy->n_iface_combinations = ARRAY_SIZE(rsi_iface_combinations); + status = ieee80211_register_hw(hw); if (status) return status; -- cgit v1.2.3 From b8bd3a439f3593a5d40e45ce14a17a086a0f6fe2 Mon Sep 17 00:00:00 2001 From: Prameela Rani Garnepudi Date: Wed, 30 Aug 2017 15:08:22 +0530 Subject: rsi: add/remove interface enhancements for p2p STA_OPMODE and AP_OPMODE macros are renamed to RSI_OPMODE_STA and RSI_OPMODE_AP. New opmodes are added to this list for P2P support. Mapping of mac80211 interface types to rsi interface types are added. Add and remove interface callbacks are handled for P2P mode. Signed-off-by: Prameela Rani Garnepudi Signed-off-by: Amitkumar Karwar Signed-off-by: Kalle Valo --- drivers/net/wireless/rsi/rsi_91x_mac80211.c | 117 +++++++++++++++++----------- drivers/net/wireless/rsi/rsi_91x_mgmt.c | 12 +-- drivers/net/wireless/rsi/rsi_main.h | 6 +- drivers/net/wireless/rsi/rsi_mgmt.h | 9 ++- 4 files changed, 89 insertions(+), 55 deletions(-) diff --git a/drivers/net/wireless/rsi/rsi_91x_mac80211.c b/drivers/net/wireless/rsi/rsi_91x_mac80211.c index 992ac6cf9a34..db69d87bd12e 100644 --- a/drivers/net/wireless/rsi/rsi_91x_mac80211.c +++ b/drivers/net/wireless/rsi/rsi_91x_mac80211.c @@ -355,6 +355,24 @@ static void rsi_mac80211_stop(struct ieee80211_hw *hw) mutex_unlock(&common->mutex); } +static int rsi_map_intf_mode(enum nl80211_iftype vif_type) +{ + switch (vif_type) { + case NL80211_IFTYPE_STATION: + return RSI_OPMODE_STA; + case NL80211_IFTYPE_AP: + return RSI_OPMODE_AP; + case NL80211_IFTYPE_P2P_DEVICE: + return RSI_OPMODE_P2P_CLIENT; + case NL80211_IFTYPE_P2P_CLIENT: + return RSI_OPMODE_P2P_CLIENT; + case NL80211_IFTYPE_P2P_GO: + return RSI_OPMODE_P2P_GO; + default: + return RSI_OPMODE_UNSUPPORTED; + } +} + /** * rsi_mac80211_add_interface() - This function is called when a netdevice * attached to the hardware is enabled. @@ -368,54 +386,62 @@ static int rsi_mac80211_add_interface(struct ieee80211_hw *hw, { struct rsi_hw *adapter = hw->priv; struct rsi_common *common = adapter->priv; + struct vif_priv *vif_info = (struct vif_priv *)vif->drv_priv; enum opmode intf_mode; - int ret = -EOPNOTSUPP; + enum vap_status vap_status; + int vap_idx = -1, i; vif->driver_flags |= IEEE80211_VIF_SUPPORTS_UAPSD; mutex_lock(&common->mutex); - if (adapter->sc_nvifs > 1) { - mutex_unlock(&common->mutex); - return -EOPNOTSUPP; - } - - switch (vif->type) { - case NL80211_IFTYPE_STATION: - rsi_dbg(INFO_ZONE, "Station Mode"); - intf_mode = STA_OPMODE; - break; - case NL80211_IFTYPE_AP: - rsi_dbg(INFO_ZONE, "AP Mode"); - intf_mode = AP_OPMODE; - break; - default: + intf_mode = rsi_map_intf_mode(vif->type); + if (intf_mode == RSI_OPMODE_UNSUPPORTED) { rsi_dbg(ERR_ZONE, "%s: Interface type %d not supported\n", __func__, vif->type); - goto out; + mutex_unlock(&common->mutex); + return -EOPNOTSUPP; + } + if ((vif->type == NL80211_IFTYPE_P2P_DEVICE) || + (vif->type == NL80211_IFTYPE_P2P_CLIENT) || + (vif->type == NL80211_IFTYPE_P2P_GO)) + common->p2p_enabled = true; + + /* Get free vap index */ + for (i = 0; i < RSI_MAX_VIFS; i++) { + if (!adapter->vifs[i]) { + vap_idx = i; + break; + } + } + if (vap_idx < 0) { + rsi_dbg(ERR_ZONE, "Reject: Max VAPs reached\n"); + mutex_unlock(&common->mutex); + return -EOPNOTSUPP; } + vif_info->vap_id = vap_idx; + adapter->vifs[vap_idx] = vif; + adapter->sc_nvifs++; + vap_status = VAP_ADD; - adapter->vifs[adapter->sc_nvifs++] = vif; - ret = rsi_set_vap_capabilities(common, intf_mode, common->mac_addr, - 0, VAP_ADD); - if (ret) { + if (rsi_set_vap_capabilities(common, intf_mode, vif->addr, + vif_info->vap_id, vap_status)) { rsi_dbg(ERR_ZONE, "Failed to set VAP capabilities\n"); - goto out; + mutex_unlock(&common->mutex); + return -EINVAL; } - if (vif->type == NL80211_IFTYPE_AP) { - int i; - + if ((vif->type == NL80211_IFTYPE_AP) || + (vif->type == NL80211_IFTYPE_P2P_GO)) { rsi_send_rx_filter_frame(common, DISALLOW_BEACONS); common->min_rate = RSI_RATE_AUTO; for (i = 0; i < common->max_stations; i++) common->stations[i].sta = NULL; } -out: mutex_unlock(&common->mutex); - return ret; + return 0; } /** @@ -432,6 +458,7 @@ static void rsi_mac80211_remove_interface(struct ieee80211_hw *hw, struct rsi_hw *adapter = hw->priv; struct rsi_common *common = adapter->priv; enum opmode opmode; + int i; rsi_dbg(INFO_ZONE, "Remove Interface Called\n"); @@ -442,23 +469,22 @@ static void rsi_mac80211_remove_interface(struct ieee80211_hw *hw, return; } - switch (vif->type) { - case NL80211_IFTYPE_STATION: - opmode = STA_OPMODE; - break; - case NL80211_IFTYPE_AP: - opmode = AP_OPMODE; - break; - default: + opmode = rsi_map_intf_mode(vif->type); + if (opmode == RSI_OPMODE_UNSUPPORTED) { + rsi_dbg(ERR_ZONE, "Opmode error : %d\n", opmode); mutex_unlock(&common->mutex); return; } - rsi_set_vap_capabilities(common, opmode, vif->addr, - 0, VAP_DELETE); - adapter->sc_nvifs--; - - if (!memcmp(adapter->vifs[0], vif, sizeof(struct ieee80211_vif))) - adapter->vifs[0] = NULL; + for (i = 0; i < RSI_MAX_VIFS; i++) { + if (!adapter->vifs[i]) + continue; + if (vif == adapter->vifs[i]) { + rsi_set_vap_capabilities(common, opmode, vif->addr, + i, VAP_DELETE); + adapter->sc_nvifs--; + adapter->vifs[i] = NULL; + } + } mutex_unlock(&common->mutex); } @@ -652,7 +678,7 @@ static void rsi_mac80211_bss_info_changed(struct ieee80211_hw *hw, rsi_send_rx_filter_frame(common, rx_filter_word); } rsi_inform_bss_status(common, - STA_OPMODE, + RSI_OPMODE_STA, bss_conf->assoc, bss_conf->bssid, bss_conf->qos, @@ -1285,8 +1311,9 @@ static int rsi_mac80211_sta_add(struct ieee80211_hw *hw, /* Send peer notify to device */ rsi_dbg(INFO_ZONE, "Indicate bss status to device\n"); - rsi_inform_bss_status(common, AP_OPMODE, 1, sta->addr, - sta->wme, sta->aid, sta, sta_idx); + rsi_inform_bss_status(common, RSI_OPMODE_AP, 1, + sta->addr, sta->wme, sta->aid, + sta, sta_idx); if (common->key) { struct ieee80211_key_conf *key = common->key; @@ -1358,7 +1385,7 @@ static int rsi_mac80211_sta_remove(struct ieee80211_hw *hw, if (!rsta->sta) continue; if (!memcmp(rsta->sta->addr, sta->addr, ETH_ALEN)) { - rsi_inform_bss_status(common, AP_OPMODE, 0, + rsi_inform_bss_status(common, RSI_OPMODE_AP, 0, sta->addr, sta->wme, sta->aid, sta, sta_idx); rsta->sta = NULL; diff --git a/drivers/net/wireless/rsi/rsi_91x_mgmt.c b/drivers/net/wireless/rsi/rsi_91x_mgmt.c index f7b550f900c4..082329d1b42b 100644 --- a/drivers/net/wireless/rsi/rsi_91x_mgmt.c +++ b/drivers/net/wireless/rsi/rsi_91x_mgmt.c @@ -482,9 +482,9 @@ static int rsi_hal_send_sta_notify_frame(struct rsi_common *common, memset(skb->data, 0, frame_len); peer_notify = (struct rsi_peer_notify *)skb->data; - if (opmode == STA_OPMODE) + if (opmode == RSI_OPMODE_STA) peer_notify->command = cpu_to_le16(PEER_TYPE_AP << 1); - else if (opmode == AP_OPMODE) + else if (opmode == RSI_OPMODE_AP) peer_notify->command = cpu_to_le16(PEER_TYPE_STA << 1); switch (notify_event) { @@ -1321,7 +1321,7 @@ void rsi_inform_bss_status(struct rsi_common *common, u16 sta_id) { if (status) { - if (opmode == STA_OPMODE) + if (opmode == RSI_OPMODE_STA) common->hw_data_qs_blocked = true; rsi_hal_send_sta_notify_frame(common, opmode, @@ -1331,12 +1331,12 @@ void rsi_inform_bss_status(struct rsi_common *common, aid, sta_id); if (common->min_rate == 0xffff) rsi_send_auto_rate_request(common, sta, sta_id); - if (opmode == STA_OPMODE) { + if (opmode == RSI_OPMODE_STA) { if (!rsi_send_block_unblock_frame(common, false)) common->hw_data_qs_blocked = false; } } else { - if (opmode == STA_OPMODE) + if (opmode == RSI_OPMODE_STA) common->hw_data_qs_blocked = true; rsi_hal_send_sta_notify_frame(common, opmode, @@ -1344,7 +1344,7 @@ void rsi_inform_bss_status(struct rsi_common *common, addr, qos_enable, aid, sta_id); - if (opmode == STA_OPMODE) + if (opmode == RSI_OPMODE_STA) rsi_send_block_unblock_frame(common, true); } } diff --git a/drivers/net/wireless/rsi/rsi_main.h b/drivers/net/wireless/rsi/rsi_main.h index 2c18dde633ea..a4837fab1d50 100644 --- a/drivers/net/wireless/rsi/rsi_main.h +++ b/drivers/net/wireless/rsi/rsi_main.h @@ -60,7 +60,7 @@ enum RSI_FSM_STATES { extern u32 rsi_zone_enabled; extern __printf(2, 3) void rsi_dbg(u32 zone, const char *fmt, ...); -#define RSI_MAX_VIFS 1 +#define RSI_MAX_VIFS 3 #define NUM_EDCA_QUEUES 4 #define IEEE80211_ADDR_LEN 6 #define FRAME_DESC_SZ 16 @@ -157,6 +157,7 @@ struct vif_priv { bool is_ht; bool sgi; u16 seq_start; + int vap_id; }; struct rsi_event { @@ -270,6 +271,9 @@ struct rsi_common { int num_stations; int max_stations; struct ieee80211_key_conf *key; + + /* Wi-Fi direct mode related */ + bool p2p_enabled; }; enum host_intf { diff --git a/drivers/net/wireless/rsi/rsi_mgmt.h b/drivers/net/wireless/rsi/rsi_mgmt.h index c6e1fa669a27..8bd7dfacf259 100644 --- a/drivers/net/wireless/rsi/rsi_mgmt.h +++ b/drivers/net/wireless/rsi/rsi_mgmt.h @@ -200,8 +200,11 @@ #define RSI_DATA_DESC_INSERT_SEQ_NO BIT(2) enum opmode { - AP_OPMODE = 0, - STA_OPMODE, + RSI_OPMODE_UNSUPPORTED = -1, + RSI_OPMODE_AP = 0, + RSI_OPMODE_STA, + RSI_OPMODE_P2P_GO, + RSI_OPMODE_P2P_CLIENT }; enum vap_status { @@ -363,9 +366,9 @@ struct rsi_vap_caps { u8 vif_type; u8 channel_bw; __le16 antenna_info; + __le16 token; u8 radioid_macid; u8 vap_id; - __le16 reserved3; u8 mac_addr[6]; __le16 keep_alive_period; u8 bssid[6]; -- cgit v1.2.3 From df771911914ab9f80dd38a2710e50c5a418200ba Mon Sep 17 00:00:00 2001 From: Prameela Rani Garnepudi Date: Wed, 30 Aug 2017 15:08:23 +0530 Subject: rsi: add support for p2p listen Remain-on-channel and cancel-remain-on-channel are implemented to support p2p listen phase. Signed-off-by: Prameela Rani Garnepudi Signed-off-by: Amitkumar Karwar Signed-off-by: Kalle Valo --- drivers/net/wireless/rsi/rsi_91x_core.c | 15 +++ drivers/net/wireless/rsi/rsi_91x_mac80211.c | 188 +++++++++++++++++++++++++--- drivers/net/wireless/rsi/rsi_91x_main.c | 9 +- drivers/net/wireless/rsi/rsi_91x_mgmt.c | 4 +- drivers/net/wireless/rsi/rsi_common.h | 4 +- drivers/net/wireless/rsi/rsi_main.h | 2 + drivers/net/wireless/rsi/rsi_mgmt.h | 2 +- 7 files changed, 199 insertions(+), 25 deletions(-) diff --git a/drivers/net/wireless/rsi/rsi_91x_core.c b/drivers/net/wireless/rsi/rsi_91x_core.c index 2b0516d2f63d..d2121965265b 100644 --- a/drivers/net/wireless/rsi/rsi_91x_core.c +++ b/drivers/net/wireless/rsi/rsi_91x_core.c @@ -334,6 +334,21 @@ struct rsi_sta *rsi_find_sta(struct rsi_common *common, u8 *mac_addr) return NULL; } +struct ieee80211_vif *rsi_get_vif(struct rsi_hw *adapter, u8 *mac) +{ + struct ieee80211_vif *vif; + int i; + + for (i = 0; i < RSI_MAX_VIFS; i++) { + vif = adapter->vifs[i]; + if (!vif) + continue; + if (!memcmp(vif->addr, mac, ETH_ALEN)) + return vif; + } + return NULL; +} + /** * rsi_core_xmit() - This function transmits the packets received from mac80211 * @common: Pointer to the driver private structure. diff --git a/drivers/net/wireless/rsi/rsi_91x_mac80211.c b/drivers/net/wireless/rsi/rsi_91x_mac80211.c index db69d87bd12e..6780c1cd8c62 100644 --- a/drivers/net/wireless/rsi/rsi_91x_mac80211.c +++ b/drivers/net/wireless/rsi/rsi_91x_mac80211.c @@ -503,35 +503,44 @@ static int rsi_channel_change(struct ieee80211_hw *hw) int status = -EOPNOTSUPP; struct ieee80211_channel *curchan = hw->conf.chandef.chan; u16 channel = curchan->hw_value; - struct ieee80211_bss_conf *bss = &adapter->vifs[0]->bss_conf; + struct ieee80211_vif *vif; + struct ieee80211_bss_conf *bss; + bool assoc = false; + int i; rsi_dbg(INFO_ZONE, "%s: Set channel: %d MHz type: %d channel_no %d\n", __func__, curchan->center_freq, curchan->flags, channel); - if (bss->assoc) { + for (i = 0; i < RSI_MAX_VIFS; i++) { + vif = adapter->vifs[i]; + if (!vif) + continue; + if (vif->type == NL80211_IFTYPE_STATION) { + bss = &vif->bss_conf; + if (bss->assoc) { + assoc = true; + break; + } + } + } + if (assoc) { if (!common->hw_data_qs_blocked && - (rsi_get_connected_channel(adapter) != channel)) { + (rsi_get_connected_channel(vif) != channel)) { rsi_dbg(INFO_ZONE, "blk data q %d\n", channel); if (!rsi_send_block_unblock_frame(common, true)) common->hw_data_qs_blocked = true; } } - status = rsi_band_check(common); + status = rsi_band_check(common, curchan); if (!status) status = rsi_set_channel(adapter->priv, curchan); - if (bss->assoc) { + if (assoc) { if (common->hw_data_qs_blocked && - (rsi_get_connected_channel(adapter) == channel)) { - rsi_dbg(INFO_ZONE, "unblk data q %d\n", channel); - if (!rsi_send_block_unblock_frame(common, false)) - common->hw_data_qs_blocked = false; - } - } else { - if (common->hw_data_qs_blocked) { + (rsi_get_connected_channel(vif) == channel)) { rsi_dbg(INFO_ZONE, "unblk data q %d\n", channel); if (!rsi_send_block_unblock_frame(common, false)) common->hw_data_qs_blocked = false; @@ -632,16 +641,42 @@ static int rsi_mac80211_config(struct ieee80211_hw *hw, * * Return: Current connected AP's channel number is returned. */ -u16 rsi_get_connected_channel(struct rsi_hw *adapter) +u16 rsi_get_connected_channel(struct ieee80211_vif *vif) { - struct ieee80211_vif *vif = adapter->vifs[0]; - if (vif) { - struct ieee80211_bss_conf *bss = &vif->bss_conf; - struct ieee80211_channel *channel = bss->chandef.chan; - return channel->hw_value; - } + struct ieee80211_bss_conf *bss; + struct ieee80211_channel *channel; - return 0; + if (!vif) + return 0; + + bss = &vif->bss_conf; + channel = bss->chandef.chan; + + if (!channel) + return 0; + + return channel->hw_value; +} + +static void rsi_switch_channel(struct rsi_hw *adapter, + struct ieee80211_vif *vif) +{ + struct rsi_common *common = adapter->priv; + struct ieee80211_channel *channel; + + if (common->iface_down) + return; + if (!vif) + return; + + channel = vif->bss_conf.chandef.chan; + + if (!channel) + return; + + rsi_band_check(common, channel); + rsi_set_channel(common, channel); + rsi_dbg(INFO_ZONE, "Switched to channel - %d\n", channel->hw_value); } /** @@ -1561,6 +1596,114 @@ static void rsi_mac80211_rfkill_poll(struct ieee80211_hw *hw) mutex_unlock(&common->mutex); } +static void rsi_resume_conn_channel(struct rsi_common *common) +{ + struct rsi_hw *adapter = common->priv; + struct ieee80211_vif *vif; + int cnt; + + for (cnt = 0; cnt < RSI_MAX_VIFS; cnt++) { + vif = adapter->vifs[cnt]; + if (!vif) + continue; + + if ((vif->type == NL80211_IFTYPE_AP) || + (vif->type == NL80211_IFTYPE_P2P_GO)) { + rsi_switch_channel(adapter, vif); + break; + } + if (((vif->type == NL80211_IFTYPE_STATION) || + (vif->type == NL80211_IFTYPE_P2P_CLIENT)) && + vif->bss_conf.assoc) { + rsi_switch_channel(adapter, vif); + break; + } + } +} + +void rsi_roc_timeout(unsigned long data) +{ + struct rsi_common *common = (struct rsi_common *)data; + + rsi_dbg(INFO_ZONE, "Remain on channel expired\n"); + + mutex_lock(&common->mutex); + ieee80211_remain_on_channel_expired(common->priv->hw); + + if (timer_pending(&common->roc_timer)) + del_timer(&common->roc_timer); + + rsi_resume_conn_channel(common); + mutex_unlock(&common->mutex); +} + +static int rsi_mac80211_roc(struct ieee80211_hw *hw, struct ieee80211_vif *vif, + struct ieee80211_channel *chan, int duration, + enum ieee80211_roc_type type) +{ + struct rsi_hw *adapter = (struct rsi_hw *)hw->priv; + struct rsi_common *common = (struct rsi_common *)adapter->priv; + int status = 0; + + rsi_dbg(INFO_ZONE, "***** Remain on channel *****\n"); + + mutex_lock(&common->mutex); + rsi_dbg(INFO_ZONE, "%s: channel: %d duration: %dms\n", + __func__, chan->hw_value, duration); + + if (timer_pending(&common->roc_timer)) { + rsi_dbg(INFO_ZONE, "Stop on-going ROC\n"); + del_timer(&common->roc_timer); + } + common->roc_timer.expires = msecs_to_jiffies(duration) + jiffies; + add_timer(&common->roc_timer); + + /* Configure band */ + if (rsi_band_check(common, chan)) { + rsi_dbg(ERR_ZONE, "Failed to set band\n"); + status = -EINVAL; + goto out; + } + + /* Configure channel */ + if (rsi_set_channel(common, chan)) { + rsi_dbg(ERR_ZONE, "Failed to set the channel\n"); + status = -EINVAL; + goto out; + } + + common->roc_vif = vif; + ieee80211_ready_on_channel(hw); + rsi_dbg(INFO_ZONE, "%s: Ready on channel :%d\n", + __func__, chan->hw_value); + +out: + mutex_unlock(&common->mutex); + + return status; +} + +static int rsi_mac80211_cancel_roc(struct ieee80211_hw *hw) +{ + struct rsi_hw *adapter = hw->priv; + struct rsi_common *common = adapter->priv; + + rsi_dbg(INFO_ZONE, "Cancel remain on channel\n"); + + mutex_lock(&common->mutex); + if (!timer_pending(&common->roc_timer)) { + mutex_unlock(&common->mutex); + return 0; + } + + del_timer(&common->roc_timer); + + rsi_resume_conn_channel(common); + mutex_unlock(&common->mutex); + + return 0; +} + static const struct ieee80211_ops mac80211_ops = { .tx = rsi_mac80211_tx, .start = rsi_mac80211_start, @@ -1580,6 +1723,8 @@ static const struct ieee80211_ops mac80211_ops = { .set_antenna = rsi_mac80211_set_antenna, .get_antenna = rsi_mac80211_get_antenna, .rfkill_poll = rsi_mac80211_rfkill_poll, + .remain_on_channel = rsi_mac80211_roc, + .cancel_remain_on_channel = rsi_mac80211_cancel_roc, }; /** @@ -1666,6 +1811,9 @@ int rsi_mac80211_attach(struct rsi_common *common) wiphy_ext_feature_set(wiphy, NL80211_EXT_FEATURE_CQM_RSSI_LIST); /* Wi-Fi direct parameters */ + wiphy->flags |= WIPHY_FLAG_HAS_REMAIN_ON_CHANNEL; + wiphy->flags |= WIPHY_FLAG_OFFCHAN_TX; + wiphy->max_remain_on_channel_duration = 10000; hw->max_listen_interval = 10; wiphy->iface_combinations = rsi_iface_combinations; wiphy->n_iface_combinations = ARRAY_SIZE(rsi_iface_combinations); diff --git a/drivers/net/wireless/rsi/rsi_91x_main.c b/drivers/net/wireless/rsi/rsi_91x_main.c index 3e1e80888d98..b57bfdcf3549 100644 --- a/drivers/net/wireless/rsi/rsi_91x_main.c +++ b/drivers/net/wireless/rsi/rsi_91x_main.c @@ -74,6 +74,8 @@ static struct sk_buff *rsi_prepare_skb(struct rsi_common *common, struct skb_info *rx_params; struct sk_buff *skb = NULL; u8 payload_offset; + struct ieee80211_vif *vif; + struct ieee80211_hdr *wh; if (WARN(!pkt_len, "%s: Dummy pkt received", __func__)) return NULL; @@ -92,11 +94,13 @@ static struct sk_buff *rsi_prepare_skb(struct rsi_common *common, payload_offset = (extended_desc + FRAME_DESC_SZ); skb_put(skb, pkt_len); memcpy((skb->data), (buffer + payload_offset), skb->len); + wh = (struct ieee80211_hdr *)skb->data; + vif = rsi_get_vif(common->priv, wh->addr1); info = IEEE80211_SKB_CB(skb); rx_params = (struct skb_info *)info->driver_data; rx_params->rssi = rsi_get_rssi(buffer); - rx_params->channel = rsi_get_connected_channel(common->priv); + rx_params->channel = rsi_get_connected_channel(vif); return skb; } @@ -233,6 +237,9 @@ struct rsi_hw *rsi_91x_init(void) rsi_default_ps_params(adapter); spin_lock_init(&adapter->ps_lock); + common->roc_timer.data = (unsigned long)common; + common->roc_timer.function = (void *)&rsi_roc_timeout; + init_timer(&common->roc_timer); common->init_done = true; return adapter; diff --git a/drivers/net/wireless/rsi/rsi_91x_mgmt.c b/drivers/net/wireless/rsi/rsi_91x_mgmt.c index 082329d1b42b..aaf5f32aca54 100644 --- a/drivers/net/wireless/rsi/rsi_91x_mgmt.c +++ b/drivers/net/wireless/rsi/rsi_91x_mgmt.c @@ -926,13 +926,13 @@ static int rsi_send_reset_mac(struct rsi_common *common) * * Return: 0 on success, corresponding error code on failure. */ -int rsi_band_check(struct rsi_common *common) +int rsi_band_check(struct rsi_common *common, + struct ieee80211_channel *curchan) { struct rsi_hw *adapter = common->priv; struct ieee80211_hw *hw = adapter->hw; u8 prev_bw = common->channel_width; u8 prev_ep = common->endpoint; - struct ieee80211_channel *curchan = hw->conf.chandef.chan; int status = 0; if (common->band != curchan->band) { diff --git a/drivers/net/wireless/rsi/rsi_common.h b/drivers/net/wireless/rsi/rsi_common.h index e579d694d13c..272e18d750ff 100644 --- a/drivers/net/wireless/rsi/rsi_common.h +++ b/drivers/net/wireless/rsi/rsi_common.h @@ -79,9 +79,11 @@ static inline int rsi_kill_thread(struct rsi_thread *handle) } void rsi_mac80211_detach(struct rsi_hw *hw); -u16 rsi_get_connected_channel(struct rsi_hw *adapter); +u16 rsi_get_connected_channel(struct ieee80211_vif *vif); struct rsi_hw *rsi_91x_init(void); void rsi_91x_deinit(struct rsi_hw *adapter); int rsi_read_pkt(struct rsi_common *common, s32 rcv_pkt_len); struct rsi_sta *rsi_find_sta(struct rsi_common *common, u8 *mac_addr); +struct ieee80211_vif *rsi_get_vif(struct rsi_hw *adapter, u8 *mac); +void rsi_roc_timeout(unsigned long data); #endif diff --git a/drivers/net/wireless/rsi/rsi_main.h b/drivers/net/wireless/rsi/rsi_main.h index a4837fab1d50..fd07b377d8c0 100644 --- a/drivers/net/wireless/rsi/rsi_main.h +++ b/drivers/net/wireless/rsi/rsi_main.h @@ -274,6 +274,8 @@ struct rsi_common { /* Wi-Fi direct mode related */ bool p2p_enabled; + struct timer_list roc_timer; + struct ieee80211_vif *roc_vif; }; enum host_intf { diff --git a/drivers/net/wireless/rsi/rsi_mgmt.h b/drivers/net/wireless/rsi/rsi_mgmt.h index 8bd7dfacf259..08e3b43b6a10 100644 --- a/drivers/net/wireless/rsi/rsi_mgmt.h +++ b/drivers/net/wireless/rsi/rsi_mgmt.h @@ -633,7 +633,7 @@ void rsi_core_qos_processor(struct rsi_common *common); void rsi_core_xmit(struct rsi_common *common, struct sk_buff *skb); int rsi_send_mgmt_pkt(struct rsi_common *common, struct sk_buff *skb); int rsi_send_data_pkt(struct rsi_common *common, struct sk_buff *skb); -int rsi_band_check(struct rsi_common *common); +int rsi_band_check(struct rsi_common *common, struct ieee80211_channel *chan); int rsi_send_rx_filter_frame(struct rsi_common *common, u16 rx_filter_word); int rsi_send_radio_params_update(struct rsi_common *common); int rsi_set_antenna(struct rsi_common *common, u8 antenna); -- cgit v1.2.3 From 4671c209ac461c8826c1241ba423e75f84ae486b Mon Sep 17 00:00:00 2001 From: Prameela Rani Garnepudi Date: Wed, 30 Aug 2017 15:08:24 +0530 Subject: rsi: handle peer connection and disconnection in p2p mode Parameter 'vif' is passed to inform_bss_status function to check the type of vif and to get vap_id. Signed-off-by: Prameela Rani Garnepudi Signed-off-by: Amitkumar Karwar Signed-off-by: Kalle Valo --- drivers/net/wireless/rsi/rsi_91x_core.c | 9 ++++-- drivers/net/wireless/rsi/rsi_91x_hal.c | 47 +++++++++++++++++------------ drivers/net/wireless/rsi/rsi_91x_mac80211.c | 22 +++++++++----- drivers/net/wireless/rsi/rsi_91x_mgmt.c | 15 +++++---- drivers/net/wireless/rsi/rsi_hal.h | 3 +- drivers/net/wireless/rsi/rsi_main.h | 2 ++ drivers/net/wireless/rsi/rsi_mgmt.h | 5 ++- 7 files changed, 64 insertions(+), 39 deletions(-) diff --git a/drivers/net/wireless/rsi/rsi_91x_core.c b/drivers/net/wireless/rsi/rsi_91x_core.c index d2121965265b..432d6ebd14a3 100644 --- a/drivers/net/wireless/rsi/rsi_91x_core.c +++ b/drivers/net/wireless/rsi/rsi_91x_core.c @@ -361,8 +361,8 @@ void rsi_core_xmit(struct rsi_common *common, struct sk_buff *skb) struct rsi_hw *adapter = common->priv; struct ieee80211_tx_info *info; struct skb_info *tx_params; - struct ieee80211_hdr *wh; - struct ieee80211_vif *vif = adapter->vifs[0]; + struct ieee80211_hdr *wh = NULL; + struct ieee80211_vif *vif; u8 q_num, tid = 0; struct rsi_sta *rsta = NULL; @@ -381,6 +381,11 @@ void rsi_core_xmit(struct rsi_common *common, struct sk_buff *skb) wh = (struct ieee80211_hdr *)&skb->data[0]; tx_params->sta_id = 0; + vif = rsi_get_vif(adapter, wh->addr2); + if (!vif) + goto xmit_fail; + tx_params->vif = vif; + tx_params->vap_id = ((struct vif_priv *)vif->drv_priv)->vap_id; if ((ieee80211_is_mgmt(wh->frame_control)) || (ieee80211_is_ctl(wh->frame_control)) || (ieee80211_is_qos_nullfunc(wh->frame_control))) { diff --git a/drivers/net/wireless/rsi/rsi_91x_hal.c b/drivers/net/wireless/rsi/rsi_91x_hal.c index 070dfd68bb83..d0b119e3c6df 100644 --- a/drivers/net/wireless/rsi/rsi_91x_hal.c +++ b/drivers/net/wireless/rsi/rsi_91x_hal.c @@ -42,7 +42,7 @@ static int rsi_prepare_mgmt_desc(struct rsi_common *common, struct sk_buff *skb) struct ieee80211_hdr *wh = NULL; struct ieee80211_tx_info *info; struct ieee80211_conf *conf = &adapter->hw->conf; - struct ieee80211_vif *vif = adapter->vifs[0]; + struct ieee80211_vif *vif; struct rsi_mgmt_desc *mgmt_desc; struct skb_info *tx_params; struct ieee80211_bss_conf *bss = NULL; @@ -57,6 +57,7 @@ static int rsi_prepare_mgmt_desc(struct rsi_common *common, struct sk_buff *skb) info = IEEE80211_SKB_CB(skb); tx_params = (struct skb_info *)info->driver_data; + vif = tx_params->vif; /* Update header size */ header_size = FRAME_DESC_SZ + sizeof(struct xtended_desc); @@ -78,7 +79,7 @@ static int rsi_prepare_mgmt_desc(struct rsi_common *common, struct sk_buff *skb) tx_params->internal_hdr_size = header_size; memset(&skb->data[0], 0, header_size); - bss = &info->control.vif->bss_conf; + bss = &vif->bss_conf; wh = (struct ieee80211_hdr *)&skb->data[header_size]; mgmt_desc = (struct rsi_mgmt_desc *)skb->data; @@ -95,10 +96,10 @@ static int rsi_prepare_mgmt_desc(struct rsi_common *common, struct sk_buff *skb) mgmt_desc->seq_ctrl = cpu_to_le16(IEEE80211_SEQ_TO_SN(le16_to_cpu(wh->seq_ctrl))); - if (common->band == NL80211_BAND_2GHZ) - mgmt_desc->rate_info = RSI_RATE_1; + if ((common->band == NL80211_BAND_2GHZ) && !common->p2p_enabled) + mgmt_desc->rate_info = cpu_to_le16(RSI_RATE_1); else - mgmt_desc->rate_info = RSI_RATE_6; + mgmt_desc->rate_info = cpu_to_le16(RSI_RATE_6); if (conf_is_ht40(conf)) mgmt_desc->bbp_info = cpu_to_le16(FULL40M_ENABLE); @@ -121,7 +122,8 @@ static int rsi_prepare_mgmt_desc(struct rsi_common *common, struct sk_buff *skb) xtend_desc->retry_cnt = PROBE_RESP_RETRY_CNT; } - if ((vif->type == NL80211_IFTYPE_AP) && + if (((vif->type == NL80211_IFTYPE_AP) || + (vif->type == NL80211_IFTYPE_P2P_GO)) && (ieee80211_is_action(wh->frame_control))) { struct rsi_sta *rsta = rsi_find_sta(common, wh->addr1); @@ -130,6 +132,10 @@ static int rsi_prepare_mgmt_desc(struct rsi_common *common, struct sk_buff *skb) else return -EINVAL; } + mgmt_desc->rate_info |= + cpu_to_le16((tx_params->vap_id << RSI_DESC_VAP_ID_OFST) & + RSI_DESC_VAP_ID_MASK); + return 0; } @@ -306,21 +312,11 @@ int rsi_send_mgmt_pkt(struct rsi_common *common, struct ieee80211_tx_info *info; struct skb_info *tx_params; int status = -E2BIG; - u8 extnd_size; info = IEEE80211_SKB_CB(skb); tx_params = (struct skb_info *)info->driver_data; - extnd_size = ((uintptr_t)skb->data & 0x3); if (tx_params->flags & INTERNAL_MGMT_PKT) { - skb->data[1] |= BIT(7); /* Immediate Wakeup bit*/ - if ((extnd_size) > skb_headroom(skb)) { - rsi_dbg(ERR_ZONE, "%s: Unable to send pkt\n", __func__); - dev_kfree_skb(skb); - return -ENOSPC; - } - skb_push(skb, extnd_size); - skb->data[extnd_size + 4] = extnd_size; status = adapter->host_intf_ops->write_pkt(common->priv, (u8 *)skb->data, skb->len); @@ -352,12 +348,23 @@ int rsi_prepare_beacon(struct rsi_common *common, struct sk_buff *skb) struct rsi_data_desc *bcn_frm; struct ieee80211_hw *hw = common->priv->hw; struct ieee80211_conf *conf = &hw->conf; + struct ieee80211_vif *vif; struct sk_buff *mac_bcn; - u8 vap_id = 0; - u16 tim_offset; - + u8 vap_id = 0, i; + u16 tim_offset = 0; + + for (i = 0; i < RSI_MAX_VIFS; i++) { + vif = adapter->vifs[i]; + if (!vif) + continue; + if ((vif->type == NL80211_IFTYPE_AP) || + (vif->type == NL80211_IFTYPE_P2P_GO)) + break; + } + if (!vif) + return -EINVAL; mac_bcn = ieee80211_beacon_get_tim(adapter->hw, - adapter->vifs[adapter->sc_nvifs - 1], + vif, &tim_offset, NULL); if (!mac_bcn) { rsi_dbg(ERR_ZONE, "Failed to get beacon from mac80211\n"); diff --git a/drivers/net/wireless/rsi/rsi_91x_mac80211.c b/drivers/net/wireless/rsi/rsi_91x_mac80211.c index 6780c1cd8c62..ce2f911eace1 100644 --- a/drivers/net/wireless/rsi/rsi_91x_mac80211.c +++ b/drivers/net/wireless/rsi/rsi_91x_mac80211.c @@ -718,7 +718,7 @@ static void rsi_mac80211_bss_info_changed(struct ieee80211_hw *hw, bss_conf->bssid, bss_conf->qos, bss_conf->aid, - NULL, 0); + NULL, 0, vif); adapter->ps_info.dtim_interval_duration = bss->dtim_period; adapter->ps_info.listen_interval = conf->listen_interval; @@ -862,7 +862,8 @@ static int rsi_hal_key_config(struct ieee80211_hw *hw, rsi_dbg(ERR_ZONE, "%s: Cipher 0x%x key_type: %d key_len: %d\n", __func__, key->cipher, key_type, key->keylen); - if (vif->type == NL80211_IFTYPE_AP) { + if ((vif->type == NL80211_IFTYPE_AP) || + (vif->type == NL80211_IFTYPE_P2P_GO)) { if (sta) { rsta = rsi_find_sta(adapter->priv, sta->addr); if (rsta) @@ -1297,7 +1298,8 @@ static int rsi_mac80211_sta_add(struct ieee80211_hw *hw, mutex_lock(&common->mutex); - if (vif->type == NL80211_IFTYPE_AP) { + if ((vif->type == NL80211_IFTYPE_AP) || + (vif->type == NL80211_IFTYPE_P2P_GO)) { u8 cnt; int sta_idx = -1; int free_index = -1; @@ -1348,7 +1350,7 @@ static int rsi_mac80211_sta_add(struct ieee80211_hw *hw, rsi_dbg(INFO_ZONE, "Indicate bss status to device\n"); rsi_inform_bss_status(common, RSI_OPMODE_AP, 1, sta->addr, sta->wme, sta->aid, - sta, sta_idx); + sta, sta_idx, vif); if (common->key) { struct ieee80211_key_conf *key = common->key; @@ -1368,7 +1370,8 @@ static int rsi_mac80211_sta_add(struct ieee80211_hw *hw, } } - if (vif->type == NL80211_IFTYPE_STATION) { + if ((vif->type == NL80211_IFTYPE_STATION) || + (vif->type == NL80211_IFTYPE_P2P_CLIENT)) { rsi_set_min_rate(hw, sta, common); if (sta->ht_cap.ht_supported) { common->vif_info[0].is_ht = true; @@ -1409,7 +1412,8 @@ static int rsi_mac80211_sta_remove(struct ieee80211_hw *hw, mutex_lock(&common->mutex); - if (vif->type == NL80211_IFTYPE_AP) { + if ((vif->type == NL80211_IFTYPE_AP) || + (vif->type == NL80211_IFTYPE_P2P_GO)) { u8 sta_idx, cnt; /* Send peer notify to device */ @@ -1422,7 +1426,8 @@ static int rsi_mac80211_sta_remove(struct ieee80211_hw *hw, if (!memcmp(rsta->sta->addr, sta->addr, ETH_ALEN)) { rsi_inform_bss_status(common, RSI_OPMODE_AP, 0, sta->addr, sta->wme, - sta->aid, sta, sta_idx); + sta->aid, sta, sta_idx, + vif); rsta->sta = NULL; rsta->sta_id = -1; for (cnt = 0; cnt < IEEE80211_NUM_TIDS; cnt++) @@ -1436,7 +1441,8 @@ static int rsi_mac80211_sta_remove(struct ieee80211_hw *hw, rsi_dbg(ERR_ZONE, "%s: No station found\n", __func__); } - if (vif->type == NL80211_IFTYPE_STATION) { + if ((vif->type == NL80211_IFTYPE_STATION) || + (vif->type == NL80211_IFTYPE_P2P_CLIENT)) { /* Resetting all the fields to default values */ memcpy((u8 *)bss->bssid, (u8 *)sta->addr, ETH_ALEN); bss->qos = sta->wme; diff --git a/drivers/net/wireless/rsi/rsi_91x_mgmt.c b/drivers/net/wireless/rsi/rsi_91x_mgmt.c index aaf5f32aca54..428369bf02f0 100644 --- a/drivers/net/wireless/rsi/rsi_91x_mgmt.c +++ b/drivers/net/wireless/rsi/rsi_91x_mgmt.c @@ -460,12 +460,12 @@ static int rsi_hal_send_sta_notify_frame(struct rsi_common *common, const unsigned char *bssid, u8 qos_enable, u16 aid, - u16 sta_id) + u16 sta_id, + struct ieee80211_vif *vif) { - struct ieee80211_vif *vif = common->priv->vifs[0]; struct sk_buff *skb = NULL; struct rsi_peer_notify *peer_notify; - u16 vap_id = 0; + u16 vap_id = ((struct vif_priv *)vif->drv_priv)->vap_id; int status; u16 frame_len = sizeof(struct rsi_peer_notify); @@ -1318,7 +1318,8 @@ void rsi_inform_bss_status(struct rsi_common *common, u8 qos_enable, u16 aid, struct ieee80211_sta *sta, - u16 sta_id) + u16 sta_id, + struct ieee80211_vif *vif) { if (status) { if (opmode == RSI_OPMODE_STA) @@ -1328,7 +1329,8 @@ void rsi_inform_bss_status(struct rsi_common *common, STA_CONNECTED, addr, qos_enable, - aid, sta_id); + aid, sta_id, + vif); if (common->min_rate == 0xffff) rsi_send_auto_rate_request(common, sta, sta_id); if (opmode == RSI_OPMODE_STA) { @@ -1343,7 +1345,8 @@ void rsi_inform_bss_status(struct rsi_common *common, STA_DISCONNECTED, addr, qos_enable, - aid, sta_id); + aid, sta_id, + vif); if (opmode == RSI_OPMODE_STA) rsi_send_block_unblock_frame(common, true); } diff --git a/drivers/net/wireless/rsi/rsi_hal.h b/drivers/net/wireless/rsi/rsi_hal.h index 7c145053da6d..ad0d6537a678 100644 --- a/drivers/net/wireless/rsi/rsi_hal.h +++ b/drivers/net/wireless/rsi/rsi_hal.h @@ -121,8 +121,7 @@ struct rsi_mgmt_desc { u8 xtend_desc_size; u8 header_len; __le16 frame_info; - u8 rate_info; - u8 reserved1; + __le16 rate_info; __le16 bbp_info; __le16 seq_ctrl; u8 reserved2; diff --git a/drivers/net/wireless/rsi/rsi_main.h b/drivers/net/wireless/rsi/rsi_main.h index fd07b377d8c0..34089ab111aa 100644 --- a/drivers/net/wireless/rsi/rsi_main.h +++ b/drivers/net/wireless/rsi/rsi_main.h @@ -124,6 +124,8 @@ struct skb_info { s8 tid; s8 sta_id; u8 internal_hdr_size; + struct ieee80211_vif *vif; + u8 vap_id; }; enum edca_queue { diff --git a/drivers/net/wireless/rsi/rsi_mgmt.h b/drivers/net/wireless/rsi/rsi_mgmt.h index 08e3b43b6a10..a82552cfb9d7 100644 --- a/drivers/net/wireless/rsi/rsi_mgmt.h +++ b/drivers/net/wireless/rsi/rsi_mgmt.h @@ -189,6 +189,8 @@ IEEE80211_WMM_IE_STA_QOSINFO_AC_BE | \ IEEE80211_WMM_IE_STA_QOSINFO_AC_BK) +#define RSI_DESC_VAP_ID_MASK 0xC000 +#define RSI_DESC_VAP_ID_OFST 14 #define RSI_DATA_DESC_MAC_BBP_INFO BIT(0) #define RSI_DATA_DESC_NO_ACK_IND BIT(9) #define RSI_DATA_DESC_QOS_EN BIT(12) @@ -623,7 +625,8 @@ int rsi_send_vap_dynamic_update(struct rsi_common *common); int rsi_send_block_unblock_frame(struct rsi_common *common, bool event); void rsi_inform_bss_status(struct rsi_common *common, enum opmode opmode, u8 status, const u8 *addr, u8 qos_enable, u16 aid, - struct ieee80211_sta *sta, u16 sta_id); + struct ieee80211_sta *sta, u16 sta_id, + struct ieee80211_vif *vif); void rsi_indicate_pkt_to_os(struct rsi_common *common, struct sk_buff *skb); int rsi_mac80211_attach(struct rsi_common *common); void rsi_indicate_tx_status(struct rsi_hw *common, struct sk_buff *skb, -- cgit v1.2.3 From eac4eed3224b1bc769489ae2e146105cbba3a8ad Mon Sep 17 00:00:00 2001 From: Prameela Rani Garnepudi Date: Wed, 30 Aug 2017 15:08:25 +0530 Subject: rsi: tx and rx path enhancements for p2p mode Data descriptor is updated to include vap_id. TX command frame key config also updated to include vap_id. Signed-off-by: Prameela Rani Garnepudi Signed-off-by: Amitkumar Karwar Signed-off-by: Kalle Valo --- drivers/net/wireless/rsi/rsi_91x_core.c | 9 +++++++-- drivers/net/wireless/rsi/rsi_91x_hal.c | 26 ++++++++++++++++++-------- drivers/net/wireless/rsi/rsi_91x_mac80211.c | 23 +++++++++++++++++++---- drivers/net/wireless/rsi/rsi_91x_mgmt.c | 4 ++-- drivers/net/wireless/rsi/rsi_mgmt.h | 3 ++- 5 files changed, 48 insertions(+), 17 deletions(-) diff --git a/drivers/net/wireless/rsi/rsi_91x_core.c b/drivers/net/wireless/rsi/rsi_91x_core.c index 432d6ebd14a3..bc18a191aef9 100644 --- a/drivers/net/wireless/rsi/rsi_91x_core.c +++ b/drivers/net/wireless/rsi/rsi_91x_core.c @@ -95,6 +95,8 @@ static u32 rsi_get_num_pkts_dequeue(struct rsi_common *common, u8 q_num) s16 txop = common->tx_qinfo[q_num].txop * 32; __le16 r_txop; struct ieee80211_rate rate; + struct ieee80211_hdr *wh; + struct ieee80211_vif *vif; rate.bitrate = RSI_RATE_MCS0 * 5 * 10; /* Convert to Kbps */ if (q_num == VI_Q) @@ -106,8 +108,10 @@ static u32 rsi_get_num_pkts_dequeue(struct rsi_common *common, u8 q_num) return 0; do { + wh = (struct ieee80211_hdr *)skb->data; + vif = rsi_get_vif(adapter, wh->addr2); r_txop = ieee80211_generic_frame_duration(adapter->hw, - adapter->vifs[0], + vif, common->band, skb->len, &rate); txop -= le16_to_cpu(r_txop); @@ -403,7 +407,8 @@ void rsi_core_xmit(struct rsi_common *common, struct sk_buff *skb) q_num = skb->priority; tx_params->tid = tid; - if ((vif->type == NL80211_IFTYPE_AP) && + if (((vif->type == NL80211_IFTYPE_AP) || + (vif->type == NL80211_IFTYPE_P2P_GO)) && (!is_broadcast_ether_addr(wh->addr1)) && (!is_multicast_ether_addr(wh->addr1))) { rsta = rsi_find_sta(common, wh->addr1); diff --git a/drivers/net/wireless/rsi/rsi_91x_hal.c b/drivers/net/wireless/rsi/rsi_91x_hal.c index d0b119e3c6df..7e8e5d4f5f3d 100644 --- a/drivers/net/wireless/rsi/rsi_91x_hal.c +++ b/drivers/net/wireless/rsi/rsi_91x_hal.c @@ -157,7 +157,8 @@ static int rsi_prepare_data_desc(struct rsi_common *common, struct sk_buff *skb) u16 seq_num; info = IEEE80211_SKB_CB(skb); - bss = &info->control.vif->bss_conf; + vif = info->control.vif; + bss = &vif->bss_conf; tx_params = (struct skb_info *)info->driver_data; header_size = FRAME_DESC_SZ + sizeof(struct xtended_desc); @@ -181,7 +182,6 @@ static int rsi_prepare_data_desc(struct rsi_common *common, struct sk_buff *skb) xtend_desc = (struct xtended_desc *)&skb->data[FRAME_DESC_SZ]; wh = (struct ieee80211_hdr *)&skb->data[header_size]; seq_num = IEEE80211_SEQ_TO_SN(le16_to_cpu(wh->seq_ctrl)); - vif = adapter->vifs[0]; data_desc->xtend_desc_size = header_size - FRAME_DESC_SZ; @@ -190,7 +190,8 @@ static int rsi_prepare_data_desc(struct rsi_common *common, struct sk_buff *skb) data_desc->mac_flags |= cpu_to_le16(RSI_QOS_ENABLE); } - if ((vif->type == NL80211_IFTYPE_STATION) && + if (((vif->type == NL80211_IFTYPE_STATION) || + (vif->type == NL80211_IFTYPE_P2P_CLIENT)) && (adapter->ps_state == PS_ENABLED)) wh->frame_control |= cpu_to_le16(RSI_SET_PS_ENABLE); @@ -246,17 +247,23 @@ static int rsi_prepare_data_desc(struct rsi_common *common, struct sk_buff *skb) data_desc->frame_info |= cpu_to_le16(RSI_BROADCAST_PKT); data_desc->sta_id = vap_id; - if (vif->type == NL80211_IFTYPE_AP) { + if ((vif->type == NL80211_IFTYPE_AP) || + (vif->type == NL80211_IFTYPE_P2P_GO)) { if (common->band == NL80211_BAND_5GHZ) data_desc->rate_info = cpu_to_le16(RSI_RATE_6); else data_desc->rate_info = cpu_to_le16(RSI_RATE_1); } } - if ((vif->type == NL80211_IFTYPE_AP) && + if (((vif->type == NL80211_IFTYPE_AP) || + (vif->type == NL80211_IFTYPE_P2P_GO)) && (ieee80211_has_moredata(wh->frame_control))) data_desc->frame_info |= cpu_to_le16(MORE_DATA_PRESENT); + data_desc->rate_info |= + cpu_to_le16((tx_params->vap_id << RSI_DESC_VAP_ID_OFST) & + RSI_DESC_VAP_ID_MASK); + return 0; } @@ -264,7 +271,7 @@ static int rsi_prepare_data_desc(struct rsi_common *common, struct sk_buff *skb) int rsi_send_data_pkt(struct rsi_common *common, struct sk_buff *skb) { struct rsi_hw *adapter = common->priv; - struct ieee80211_vif *vif = adapter->vifs[0]; + struct ieee80211_vif *vif; struct ieee80211_tx_info *info; struct ieee80211_bss_conf *bss; int status = -EINVAL; @@ -277,9 +284,12 @@ int rsi_send_data_pkt(struct rsi_common *common, struct sk_buff *skb) info = IEEE80211_SKB_CB(skb); if (!info->control.vif) goto err; - bss = &info->control.vif->bss_conf; + vif = info->control.vif; + bss = &vif->bss_conf; - if ((vif->type == NL80211_IFTYPE_STATION) && (!bss->assoc)) + if (((vif->type == NL80211_IFTYPE_STATION) || + (vif->type == NL80211_IFTYPE_P2P_CLIENT)) && + (!bss->assoc)) goto err; status = rsi_prepare_data_desc(common, skb); diff --git a/drivers/net/wireless/rsi/rsi_91x_mac80211.c b/drivers/net/wireless/rsi/rsi_91x_mac80211.c index ce2f911eace1..b1550bc243dd 100644 --- a/drivers/net/wireless/rsi/rsi_91x_mac80211.c +++ b/drivers/net/wireless/rsi/rsi_91x_mac80211.c @@ -879,7 +879,8 @@ static int rsi_hal_key_config(struct ieee80211_hw *hw, RSI_PAIRWISE_KEY, key->keyidx, key->cipher, - sta_id); + sta_id, + vif); if (status) return status; } @@ -891,7 +892,8 @@ static int rsi_hal_key_config(struct ieee80211_hw *hw, key_type, key->keyidx, key->cipher, - sta_id); + sta_id, + vif); } /** @@ -1165,7 +1167,9 @@ static void rsi_fill_rx_status(struct ieee80211_hw *hw, struct rsi_common *common, struct ieee80211_rx_status *rxs) { - struct ieee80211_bss_conf *bss = &common->priv->vifs[0]->bss_conf; + struct rsi_hw *adapter = common->priv; + struct ieee80211_vif *vif; + struct ieee80211_bss_conf *bss = NULL; struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb); struct skb_info *rx_params = (struct skb_info *)info->driver_data; struct ieee80211_hdr *hdr; @@ -1173,6 +1177,7 @@ static void rsi_fill_rx_status(struct ieee80211_hw *hw, u8 hdrlen = 0; u8 channel = rx_params->channel; s32 freq; + int i; hdr = ((struct ieee80211_hdr *)(skb->data)); hdrlen = ieee80211_hdrlen(hdr->frame_control); @@ -1201,6 +1206,15 @@ static void rsi_fill_rx_status(struct ieee80211_hw *hw, rxs->flag |= RX_FLAG_IV_STRIPPED; } + for (i = 0; i < RSI_MAX_VIFS; i++) { + vif = adapter->vifs[i]; + if (!vif) + continue; + if (vif->type == NL80211_IFTYPE_STATION) + bss = &vif->bss_conf; + } + if (!bss) + return; /* CQM only for connected AP beacons, the RSSI is a weighted avg */ if (bss->assoc && !(memcmp(bss->bssid, hdr->addr2, ETH_ALEN))) { if (ieee80211_is_beacon(hdr->frame_control)) @@ -1363,7 +1377,8 @@ static int rsi_mac80211_sta_add(struct ieee80211_hw *hw, RSI_PAIRWISE_KEY, key->keyidx, key->cipher, - sta_idx); + sta_idx, + vif); } common->num_stations++; diff --git a/drivers/net/wireless/rsi/rsi_91x_mgmt.c b/drivers/net/wireless/rsi/rsi_91x_mgmt.c index 428369bf02f0..4e50cfdc2f47 100644 --- a/drivers/net/wireless/rsi/rsi_91x_mgmt.c +++ b/drivers/net/wireless/rsi/rsi_91x_mgmt.c @@ -716,9 +716,9 @@ int rsi_hal_load_key(struct rsi_common *common, u8 key_type, u8 key_id, u32 cipher, - s16 sta_id) + s16 sta_id, + struct ieee80211_vif *vif) { - struct ieee80211_vif *vif = common->priv->vifs[0]; struct sk_buff *skb = NULL; struct rsi_set_key *set_key; u16 key_descriptor = 0; diff --git a/drivers/net/wireless/rsi/rsi_mgmt.h b/drivers/net/wireless/rsi/rsi_mgmt.h index a82552cfb9d7..331d64b05649 100644 --- a/drivers/net/wireless/rsi/rsi_mgmt.h +++ b/drivers/net/wireless/rsi/rsi_mgmt.h @@ -618,7 +618,8 @@ int rsi_send_aggregation_params_frame(struct rsi_common *common, u16 tid, u16 ssn, u8 buf_size, u8 event, u8 sta_id); int rsi_hal_load_key(struct rsi_common *common, u8 *data, u16 key_len, - u8 key_type, u8 key_id, u32 cipher, s16 sta_id); + u8 key_type, u8 key_id, u32 cipher, s16 sta_id, + struct ieee80211_vif *vif); int rsi_set_channel(struct rsi_common *common, struct ieee80211_channel *channel); int rsi_send_vap_dynamic_update(struct rsi_common *common); -- cgit v1.2.3 From efe877aa0f40dc1f2be450aba5baf8d90e7d9707 Mon Sep 17 00:00:00 2001 From: Prameela Rani Garnepudi Date: Wed, 30 Aug 2017 15:08:26 +0530 Subject: rsi: disallow power save config when AP vap running When AP or P2P GO VAP is running, power save configuration should be disallowed. To check interface type in power save configuration 'vif' parameters is passed. Signed-off-by: Prameela Rani Garnepudi Signed-off-by: Kalle Valo --- drivers/net/wireless/rsi/rsi_91x_mac80211.c | 33 ++++++++++++++++++++--------- drivers/net/wireless/rsi/rsi_91x_mgmt.c | 5 +++-- drivers/net/wireless/rsi/rsi_91x_ps.c | 15 +++++++------ drivers/net/wireless/rsi/rsi_mgmt.h | 2 ++ drivers/net/wireless/rsi/rsi_ps.h | 7 +++--- 5 files changed, 39 insertions(+), 23 deletions(-) diff --git a/drivers/net/wireless/rsi/rsi_91x_mac80211.c b/drivers/net/wireless/rsi/rsi_91x_mac80211.c index b1550bc243dd..09ad909e27ed 100644 --- a/drivers/net/wireless/rsi/rsi_91x_mac80211.c +++ b/drivers/net/wireless/rsi/rsi_91x_mac80211.c @@ -592,7 +592,6 @@ static int rsi_mac80211_config(struct ieee80211_hw *hw, { struct rsi_hw *adapter = hw->priv; struct rsi_common *common = adapter->priv; - struct ieee80211_vif *vif = adapter->vifs[0]; struct ieee80211_conf *conf = &hw->conf; int status = -EOPNOTSUPP; @@ -608,16 +607,30 @@ static int rsi_mac80211_config(struct ieee80211_hw *hw, } /* Power save parameters */ - if ((changed & IEEE80211_CONF_CHANGE_PS) && - (vif->type == NL80211_IFTYPE_STATION)) { + if (changed & IEEE80211_CONF_CHANGE_PS) { + struct ieee80211_vif *vif; unsigned long flags; + int i, set_ps = 1; - spin_lock_irqsave(&adapter->ps_lock, flags); - if (conf->flags & IEEE80211_CONF_PS) - rsi_enable_ps(adapter); - else - rsi_disable_ps(adapter); - spin_unlock_irqrestore(&adapter->ps_lock, flags); + for (i = 0; i < RSI_MAX_VIFS; i++) { + vif = adapter->vifs[i]; + if (!vif) + continue; + /* Don't go to power save if AP vap exists */ + if ((vif->type == NL80211_IFTYPE_AP) || + (vif->type == NL80211_IFTYPE_P2P_GO)) { + set_ps = 0; + break; + } + } + if (set_ps) { + spin_lock_irqsave(&adapter->ps_lock, flags); + if (conf->flags & IEEE80211_CONF_PS) + rsi_enable_ps(adapter, vif); + else + rsi_disable_ps(adapter, vif); + spin_unlock_irqrestore(&adapter->ps_lock, flags); + } } /* RTS threshold */ @@ -726,7 +739,7 @@ static void rsi_mac80211_bss_info_changed(struct ieee80211_hw *hw, if (bss->assoc) { if (common->uapsd_bitmap) { rsi_dbg(INFO_ZONE, "Configuring UAPSD\n"); - rsi_conf_uapsd(adapter); + rsi_conf_uapsd(adapter, vif); } } else { common->uapsd_bitmap = 0; diff --git a/drivers/net/wireless/rsi/rsi_91x_mgmt.c b/drivers/net/wireless/rsi/rsi_91x_mgmt.c index 4e50cfdc2f47..383cd432b237 100644 --- a/drivers/net/wireless/rsi/rsi_91x_mgmt.c +++ b/drivers/net/wireless/rsi/rsi_91x_mgmt.c @@ -1474,10 +1474,11 @@ int rsi_send_rx_filter_frame(struct rsi_common *common, u16 rx_filter_word) return rsi_send_internal_mgmt_frame(common, skb); } -int rsi_send_ps_request(struct rsi_hw *adapter, bool enable) +int rsi_send_ps_request(struct rsi_hw *adapter, bool enable, + struct ieee80211_vif *vif) { struct rsi_common *common = adapter->priv; - struct ieee80211_bss_conf *bss = &adapter->vifs[0]->bss_conf; + struct ieee80211_bss_conf *bss = &vif->bss_conf; struct rsi_request_ps *ps; struct rsi_ps_info *ps_info; struct sk_buff *skb; diff --git a/drivers/net/wireless/rsi/rsi_91x_ps.c b/drivers/net/wireless/rsi/rsi_91x_ps.c index 48c79f035c59..523f5329d2b7 100644 --- a/drivers/net/wireless/rsi/rsi_91x_ps.c +++ b/drivers/net/wireless/rsi/rsi_91x_ps.c @@ -67,7 +67,7 @@ void rsi_default_ps_params(struct rsi_hw *adapter) ps_info->deep_sleep_wakeup_period = RSI_DEF_DS_WAKEUP_PERIOD; } -void rsi_enable_ps(struct rsi_hw *adapter) +void rsi_enable_ps(struct rsi_hw *adapter, struct ieee80211_vif *vif) { if (adapter->ps_state != PS_NONE) { rsi_dbg(ERR_ZONE, @@ -76,7 +76,7 @@ void rsi_enable_ps(struct rsi_hw *adapter) return; } - if (rsi_send_ps_request(adapter, true)) { + if (rsi_send_ps_request(adapter, true, vif)) { rsi_dbg(ERR_ZONE, "%s: Failed to send PS request to device\n", __func__); @@ -86,7 +86,8 @@ void rsi_enable_ps(struct rsi_hw *adapter) rsi_modify_ps_state(adapter, PS_ENABLE_REQ_SENT); } -void rsi_disable_ps(struct rsi_hw *adapter) +/* This function is used to disable power save */ +void rsi_disable_ps(struct rsi_hw *adapter, struct ieee80211_vif *vif) { if (adapter->ps_state != PS_ENABLED) { rsi_dbg(ERR_ZONE, @@ -95,7 +96,7 @@ void rsi_disable_ps(struct rsi_hw *adapter) return; } - if (rsi_send_ps_request(adapter, false)) { + if (rsi_send_ps_request(adapter, false, vif)) { rsi_dbg(ERR_ZONE, "%s: Failed to send PS request to device\n", __func__); @@ -105,16 +106,16 @@ void rsi_disable_ps(struct rsi_hw *adapter) rsi_modify_ps_state(adapter, PS_DISABLE_REQ_SENT); } -void rsi_conf_uapsd(struct rsi_hw *adapter) +void rsi_conf_uapsd(struct rsi_hw *adapter, struct ieee80211_vif *vif) { int ret; if (adapter->ps_state != PS_ENABLED) return; - ret = rsi_send_ps_request(adapter, false); + ret = rsi_send_ps_request(adapter, false, vif); if (!ret) - ret = rsi_send_ps_request(adapter, true); + ret = rsi_send_ps_request(adapter, true, vif); if (ret) rsi_dbg(ERR_ZONE, "%s: Failed to send PS request to device\n", diff --git a/drivers/net/wireless/rsi/rsi_mgmt.h b/drivers/net/wireless/rsi/rsi_mgmt.h index 331d64b05649..b9d0802c1b0f 100644 --- a/drivers/net/wireless/rsi/rsi_mgmt.h +++ b/drivers/net/wireless/rsi/rsi_mgmt.h @@ -641,4 +641,6 @@ int rsi_band_check(struct rsi_common *common, struct ieee80211_channel *chan); int rsi_send_rx_filter_frame(struct rsi_common *common, u16 rx_filter_word); int rsi_send_radio_params_update(struct rsi_common *common); int rsi_set_antenna(struct rsi_common *common, u8 antenna); +int rsi_send_ps_request(struct rsi_hw *adapter, bool enable, + struct ieee80211_vif *vif); #endif diff --git a/drivers/net/wireless/rsi/rsi_ps.h b/drivers/net/wireless/rsi/rsi_ps.h index d8475873df36..98ff6a4ced57 100644 --- a/drivers/net/wireless/rsi/rsi_ps.h +++ b/drivers/net/wireless/rsi/rsi_ps.h @@ -55,10 +55,9 @@ struct rsi_ps_info { } __packed; char *str_psstate(enum ps_state state); -void rsi_enable_ps(struct rsi_hw *adapter); -void rsi_disable_ps(struct rsi_hw *adapter); +void rsi_enable_ps(struct rsi_hw *adapter, struct ieee80211_vif *vif); +void rsi_disable_ps(struct rsi_hw *adapter, struct ieee80211_vif *vif); int rsi_handle_ps_confirm(struct rsi_hw *adapter, u8 *msg); void rsi_default_ps_params(struct rsi_hw *hw); -int rsi_send_ps_request(struct rsi_hw *adapter, bool enable); -void rsi_conf_uapsd(struct rsi_hw *adapter); +void rsi_conf_uapsd(struct rsi_hw *adapter, struct ieee80211_vif *vif); #endif -- cgit v1.2.3 From c7245c0975f134cb10f113b0626ebd11799c8931 Mon Sep 17 00:00:00 2001 From: Prameela Rani Garnepudi Date: Wed, 30 Aug 2017 15:08:27 +0530 Subject: rsi: aggregation changes for p2p mode P2P Go condition is added wherever AP mode is there in aggregation path. Signed-off-by: Prameela Rani Garnepudi Signed-off-by: Kalle Valo --- drivers/net/wireless/rsi/rsi_91x_mac80211.c | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/drivers/net/wireless/rsi/rsi_91x_mac80211.c b/drivers/net/wireless/rsi/rsi_91x_mac80211.c index 09ad909e27ed..79426a2defc4 100644 --- a/drivers/net/wireless/rsi/rsi_91x_mac80211.c +++ b/drivers/net/wireless/rsi/rsi_91x_mac80211.c @@ -1005,7 +1005,8 @@ static int rsi_mac80211_ampdu_action(struct ieee80211_hw *hw, if (ssn != NULL) seq_no = *ssn; - if (vif->type == NL80211_IFTYPE_AP) { + if ((vif->type == NL80211_IFTYPE_AP) || + (vif->type == NL80211_IFTYPE_P2P_GO)) { rsta = rsi_find_sta(common, sta->addr); if (!rsta) { rsi_dbg(ERR_ZONE, "No station mapped\n"); @@ -1039,9 +1040,11 @@ static int rsi_mac80211_ampdu_action(struct ieee80211_hw *hw, break; case IEEE80211_AMPDU_TX_START: - if (vif->type == NL80211_IFTYPE_STATION) + if ((vif->type == NL80211_IFTYPE_STATION) || + (vif->type == NL80211_IFTYPE_P2P_CLIENT)) common->vif_info[ii].seq_start = seq_no; - else if (vif->type == NL80211_IFTYPE_AP) + else if ((vif->type == NL80211_IFTYPE_AP) || + (vif->type == NL80211_IFTYPE_P2P_GO)) rsta->seq_start[tid] = seq_no; ieee80211_start_tx_ba_cb_irqsafe(vif, sta->addr, tid); status = 0; @@ -1061,9 +1064,11 @@ static int rsi_mac80211_ampdu_action(struct ieee80211_hw *hw, break; case IEEE80211_AMPDU_TX_OPERATIONAL: - if (vif->type == NL80211_IFTYPE_STATION) + if ((vif->type == NL80211_IFTYPE_STATION) || + (vif->type == NL80211_IFTYPE_P2P_CLIENT)) seq_start = common->vif_info[ii].seq_start; - else if (vif->type == NL80211_IFTYPE_AP) + else if ((vif->type == NL80211_IFTYPE_AP) || + (vif->type == NL80211_IFTYPE_P2P_GO)) seq_start = rsta->seq_start[tid]; status = rsi_send_aggregation_params_frame(common, tid, -- cgit v1.2.3 From af75687286bfea63c428591f08e9df6a7e7a9ff2 Mon Sep 17 00:00:00 2001 From: Prameela Rani Garnepudi Date: Wed, 30 Aug 2017 15:08:28 +0530 Subject: rsi: miscellaneous changes for p2p mode Add P2P_GO condition as well when handling BEACON_ENABLE from mac80211. Also passing 'vif' for auto rate request. Signed-off-by: Prameela Rani Garnepudi Signed-off-by: Amitkumar Karwar Signed-off-by: Kalle Valo --- drivers/net/wireless/rsi/rsi_91x_mac80211.c | 15 +++++++++------ drivers/net/wireless/rsi/rsi_91x_mgmt.c | 6 +++--- 2 files changed, 12 insertions(+), 9 deletions(-) diff --git a/drivers/net/wireless/rsi/rsi_91x_mac80211.c b/drivers/net/wireless/rsi/rsi_91x_mac80211.c index 79426a2defc4..b1f5dbbde3cb 100644 --- a/drivers/net/wireless/rsi/rsi_91x_mac80211.c +++ b/drivers/net/wireless/rsi/rsi_91x_mac80211.c @@ -756,7 +756,8 @@ static void rsi_mac80211_bss_info_changed(struct ieee80211_hw *hw, } if ((changed & BSS_CHANGED_BEACON_ENABLED) && - (vif->type == NL80211_IFTYPE_AP)) { + ((vif->type == NL80211_IFTYPE_AP) || + (vif->type == NL80211_IFTYPE_P2P_GO))) { if (bss->enable_beacon) { rsi_dbg(INFO_ZONE, "===> BEACON ENABLED <===\n"); common->beacon_enabled = 1; @@ -1147,9 +1148,9 @@ static int rsi_mac80211_set_rate_mask(struct ieee80211_hw *hw, */ static void rsi_perform_cqm(struct rsi_common *common, u8 *bssid, - s8 rssi) + s8 rssi, + struct ieee80211_vif *vif) { - struct rsi_hw *adapter = common->priv; s8 last_event = common->cqm_info.last_cqm_event_rssi; int thold = common->cqm_info.rssi_thold; u32 hyst = common->cqm_info.rssi_hyst; @@ -1165,7 +1166,7 @@ static void rsi_perform_cqm(struct rsi_common *common, common->cqm_info.last_cqm_event_rssi = rssi; rsi_dbg(INFO_ZONE, "CQM: Notifying event: %d\n", event); - ieee80211_cqm_rssi_notify(adapter->vifs[0], event, rssi, GFP_KERNEL); + ieee80211_cqm_rssi_notify(vif, event, rssi, GFP_KERNEL); return; } @@ -1228,15 +1229,17 @@ static void rsi_fill_rx_status(struct ieee80211_hw *hw, vif = adapter->vifs[i]; if (!vif) continue; - if (vif->type == NL80211_IFTYPE_STATION) + if (vif->type == NL80211_IFTYPE_STATION) { bss = &vif->bss_conf; + break; + } } if (!bss) return; /* CQM only for connected AP beacons, the RSSI is a weighted avg */ if (bss->assoc && !(memcmp(bss->bssid, hdr->addr2, ETH_ALEN))) { if (ieee80211_is_beacon(hdr->frame_control)) - rsi_perform_cqm(common, hdr->addr2, rxs->signal); + rsi_perform_cqm(common, hdr->addr2, rxs->signal, vif); } return; diff --git a/drivers/net/wireless/rsi/rsi_91x_mgmt.c b/drivers/net/wireless/rsi/rsi_91x_mgmt.c index 383cd432b237..4b94190c9797 100644 --- a/drivers/net/wireless/rsi/rsi_91x_mgmt.c +++ b/drivers/net/wireless/rsi/rsi_91x_mgmt.c @@ -1160,9 +1160,9 @@ static bool rsi_map_rates(u16 rate, int *offset) */ static int rsi_send_auto_rate_request(struct rsi_common *common, struct ieee80211_sta *sta, - u16 sta_id) + u16 sta_id, + struct ieee80211_vif *vif) { - struct ieee80211_vif *vif = common->priv->vifs[0]; struct sk_buff *skb; struct rsi_auto_rate *auto_rate; int ii = 0, jj = 0, kk = 0; @@ -1332,7 +1332,7 @@ void rsi_inform_bss_status(struct rsi_common *common, aid, sta_id, vif); if (common->min_rate == 0xffff) - rsi_send_auto_rate_request(common, sta, sta_id); + rsi_send_auto_rate_request(common, sta, sta_id, vif); if (opmode == RSI_OPMODE_STA) { if (!rsi_send_block_unblock_frame(common, false)) common->hw_data_qs_blocked = false; -- cgit v1.2.3 From 6508497cbdc70b92130fbca57402af6a94e05d20 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Fri, 8 Sep 2017 16:24:52 +0100 Subject: rsi: fix a dereference on adapter before it has been null checked The assignment of dev is dereferencing adapter before adapter has been null checked, potentially leading to a null pointer dereference. Fix this by simply moving the assignment of dev to a later point after the sanity null check of adapter. Detected by CoverityScan CID#1398383 ("Dereference before null check") Fixes: dad0d04fa7ba ("rsi: Add RS9113 wireless driver") Signed-off-by: Colin Ian King Signed-off-by: Kalle Valo --- drivers/net/wireless/rsi/rsi_91x_usb.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/wireless/rsi/rsi_91x_usb.c b/drivers/net/wireless/rsi/rsi_91x_usb.c index 81df09dd2636..08730227cd18 100644 --- a/drivers/net/wireless/rsi/rsi_91x_usb.c +++ b/drivers/net/wireless/rsi/rsi_91x_usb.c @@ -73,8 +73,7 @@ static int rsi_write_multiple(struct rsi_hw *adapter, u8 *data, u32 count) { - struct rsi_91x_usbdev *dev = - (struct rsi_91x_usbdev *)adapter->rsi_dev; + struct rsi_91x_usbdev *dev; if (!adapter) return -ENODEV; @@ -82,6 +81,7 @@ static int rsi_write_multiple(struct rsi_hw *adapter, if (endpoint == 0) return -EINVAL; + dev = (struct rsi_91x_usbdev *)adapter->rsi_dev; if (dev->write_fail) return -ENETDOWN; -- cgit v1.2.3 From e31fbe1034d9066cfff0d0a4f7ce440ddf75643f Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Tue, 5 Sep 2017 19:15:50 +0100 Subject: b43: fix unitialized reads of ret by initializing the array to zero The u8 char array ret is not being initialized and elements outside the range start to end contain just garbage values from the stack. This results in a later scan of the array to read potentially uninitialized values. Fix this by initializing the array to zero. This seems to have been an issue since the very first commit. Detected by CoverityScan CID#139652 ("Uninitialized scalar variable") Signed-off-by: Colin Ian King Reviewed-by: Michael Buesch Signed-off-by: Kalle Valo --- drivers/net/wireless/broadcom/b43/phy_g.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wireless/broadcom/b43/phy_g.c b/drivers/net/wireless/broadcom/b43/phy_g.c index 822dcaa8ace6..f59c02166462 100644 --- a/drivers/net/wireless/broadcom/b43/phy_g.c +++ b/drivers/net/wireless/broadcom/b43/phy_g.c @@ -2297,7 +2297,7 @@ static u8 b43_gphy_aci_detect(struct b43_wldev *dev, u8 channel) static u8 b43_gphy_aci_scan(struct b43_wldev *dev) { struct b43_phy *phy = &dev->phy; - u8 ret[13]; + u8 ret[13] = { 0 }; unsigned int channel = phy->channel; unsigned int i, j, start, end; -- cgit v1.2.3 From e3ae1c7720462136aaa1119c656704bd96952f4d Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Tue, 5 Sep 2017 19:16:58 +0100 Subject: b43legacy: fix unitialized reads of ret by initializing the array to zero The u8 char array ret is not being initialized and elements outside the range start to end contain just garbage values from the stack. This results in a later scan of the array to read potentially uninitialized values. Fix this by initializing the array to zero. This seems to have been an issue since the very first commit. Detected by CoverityScan CID#139653 ("Uninitialized scalar variable") Signed-off-by: Colin Ian King Reviewed-by: Michael Buesch Signed-off-by: Kalle Valo --- drivers/net/wireless/broadcom/b43legacy/radio.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wireless/broadcom/b43legacy/radio.c b/drivers/net/wireless/broadcom/b43legacy/radio.c index 9501420340a9..eab1c9387846 100644 --- a/drivers/net/wireless/broadcom/b43legacy/radio.c +++ b/drivers/net/wireless/broadcom/b43legacy/radio.c @@ -280,7 +280,7 @@ u8 b43legacy_radio_aci_detect(struct b43legacy_wldev *dev, u8 channel) u8 b43legacy_radio_aci_scan(struct b43legacy_wldev *dev) { struct b43legacy_phy *phy = &dev->phy; - u8 ret[13]; + u8 ret[13] = { 0 }; unsigned int channel = phy->channel; unsigned int i; unsigned int j; -- cgit v1.2.3 From 0f61953dd0f5fbed6ac833f58dc590293b7569b0 Mon Sep 17 00:00:00 2001 From: Larry Finger Date: Thu, 31 Aug 2017 09:48:59 -0500 Subject: rtlwifi: btcoexist: 23b 1ant: fix duplicated code for different branches A typo led to this issue, which was detected with the help of Coccinelle. In addition to fixing the error, the code is refactored to eliminate an if statement. Addresses-Coverity-ID: 1226788 Reported-by: Gustavo A. R. Silva Signed-off-by: Larry Finger Cc: Ping-Ke Shih Cc: Yan-Hsuan Chuang Cc: Birming Chiu Cc: Shaofu Cc: Steven Ting Signed-off-by: Kalle Valo --- .../wireless/realtek/rtlwifi/btcoexist/halbtc8723b1ant.c | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/drivers/net/wireless/realtek/rtlwifi/btcoexist/halbtc8723b1ant.c b/drivers/net/wireless/realtek/rtlwifi/btcoexist/halbtc8723b1ant.c index c04425236ce4..5f726f6d3567 100644 --- a/drivers/net/wireless/realtek/rtlwifi/btcoexist/halbtc8723b1ant.c +++ b/drivers/net/wireless/realtek/rtlwifi/btcoexist/halbtc8723b1ant.c @@ -2260,14 +2260,11 @@ static void halbtc8723b1ant_run_coexist_mechanism(struct btc_coexist *btcoexist) if (iot_peer != BTC_IOT_PEER_CISCO && iot_peer != BTC_IOT_PEER_BROADCOM) { - if (bt_link_info->sco_exist) - halbtc8723b1ant_limited_rx(btcoexist, - NORMAL_EXEC, false, - false, 0x5); - else - halbtc8723b1ant_limited_rx(btcoexist, - NORMAL_EXEC, false, - false, 0x5); + bool sco_exist = bt_link_info->sco_exist; + + halbtc8723b1ant_limited_rx(btcoexist, + NORMAL_EXEC, sco_exist, + false, 0x5); } else { if (bt_link_info->sco_exist) { halbtc8723b1ant_limited_rx(btcoexist, -- cgit v1.2.3 From 519ce2f933fa14acf69d5c8cabcc18711943d629 Mon Sep 17 00:00:00 2001 From: Larry Finger Date: Thu, 14 Sep 2017 13:17:44 -0500 Subject: rtlwifi: rtl8192ee: Fix memory leak when loading firmware In routine rtl92ee_set_fw_rsvdpagepkt(), the driver allocates an skb, but never calls rtl_cmd_send_packet(), which will free the buffer. All other rtlwifi drivers perform this operation correctly. This problem has been in the driver since it was included in the kernel. Fortunately, each firmware load only leaks 4 buffers, which likely explains why it has not previously been detected. Cc: Stable # 3.18+ Signed-off-by: Larry Finger Signed-off-by: Kalle Valo --- drivers/net/wireless/realtek/rtlwifi/rtl8192ee/fw.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8192ee/fw.c b/drivers/net/wireless/realtek/rtlwifi/rtl8192ee/fw.c index 7eae27f8e173..f9563ae301ad 100644 --- a/drivers/net/wireless/realtek/rtlwifi/rtl8192ee/fw.c +++ b/drivers/net/wireless/realtek/rtlwifi/rtl8192ee/fw.c @@ -682,7 +682,7 @@ void rtl92ee_set_fw_rsvdpagepkt(struct ieee80211_hw *hw, bool b_dl_finished) struct rtl_priv *rtlpriv = rtl_priv(hw); struct rtl_mac *mac = rtl_mac(rtl_priv(hw)); struct sk_buff *skb = NULL; - + bool rtstatus; u32 totalpacketlen; u8 u1rsvdpageloc[5] = { 0 }; bool b_dlok = false; @@ -768,7 +768,9 @@ void rtl92ee_set_fw_rsvdpagepkt(struct ieee80211_hw *hw, bool b_dl_finished) skb = dev_alloc_skb(totalpacketlen); skb_put_data(skb, &reserved_page_packet, totalpacketlen); - b_dlok = true; + rtstatus = rtl_cmd_send_packet(hw, skb); + if (rtstatus) + b_dlok = true; if (b_dlok) { RT_TRACE(rtlpriv, COMP_POWER, DBG_LOUD , -- cgit v1.2.3 From 31726ff20190bafcf41d5eb5a7615f052f15bdd3 Mon Sep 17 00:00:00 2001 From: Ganapathi Bhat Date: Thu, 31 Aug 2017 01:08:35 +0530 Subject: mwifiex: notify cfg80211 about scan abort Driver sends a series of scan commands to firmware to serve a user scan request. If an intermediate scan command fails, driver aborts the scan but it is not being informed to cfg80211. This will cause issues in applications performing periodic scans. Fix this by informing scan abort. Signed-off-by: Cathy Luo Signed-off-by: Ganapathi Bhat Signed-off-by: Kalle Valo --- drivers/net/wireless/marvell/mwifiex/sta_cmdresp.c | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/drivers/net/wireless/marvell/mwifiex/sta_cmdresp.c b/drivers/net/wireless/marvell/mwifiex/sta_cmdresp.c index 0fba5b10ef2d..1bd4e13b8449 100644 --- a/drivers/net/wireless/marvell/mwifiex/sta_cmdresp.c +++ b/drivers/net/wireless/marvell/mwifiex/sta_cmdresp.c @@ -70,11 +70,7 @@ mwifiex_process_cmdresp_error(struct mwifiex_private *priv, break; case HostCmd_CMD_802_11_SCAN: case HostCmd_CMD_802_11_SCAN_EXT: - mwifiex_cancel_pending_scan_cmd(adapter); - - spin_lock_irqsave(&adapter->mwifiex_cmd_lock, flags); - adapter->scan_processing = false; - spin_unlock_irqrestore(&adapter->mwifiex_cmd_lock, flags); + mwifiex_cancel_scan(adapter); break; case HostCmd_CMD_MAC_CONTROL: -- cgit v1.2.3 From 26177d7f3969da1827bc2b5923edcfc2ff4c3a61 Mon Sep 17 00:00:00 2001 From: Ganapathi Bhat Date: Fri, 8 Sep 2017 02:02:43 +0530 Subject: mwifiex: check for mfg_mode in add_virtual_intf If driver is loaded with 'mfg_mode' enabled, then the sending commands are not allowed. So, skip sending commands, to firmware in mwifiex_add_virtual_intf if 'mfg_mode' is enabled. Fixes: 7311ea850079 ("mwifiex: fix AP start problem for newly added interface") Signed-off-by: Ganapathi Bhat Reviewed-by: Brian Norris Signed-off-by: Kalle Valo --- drivers/net/wireless/marvell/mwifiex/cfg80211.c | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/drivers/net/wireless/marvell/mwifiex/cfg80211.c b/drivers/net/wireless/marvell/mwifiex/cfg80211.c index 32c5074da84c..ad1ebd8844d0 100644 --- a/drivers/net/wireless/marvell/mwifiex/cfg80211.c +++ b/drivers/net/wireless/marvell/mwifiex/cfg80211.c @@ -2959,18 +2959,21 @@ struct wireless_dev *mwifiex_add_virtual_intf(struct wiphy *wiphy, } mwifiex_init_priv_params(priv, dev); - mwifiex_set_mac_address(priv, dev); priv->netdev = dev; - ret = mwifiex_send_cmd(priv, HostCmd_CMD_SET_BSS_MODE, - HostCmd_ACT_GEN_SET, 0, NULL, true); - if (ret) - goto err_set_bss_mode; + if (!adapter->mfg_mode) { + mwifiex_set_mac_address(priv, dev); - ret = mwifiex_sta_init_cmd(priv, false, false); - if (ret) - goto err_sta_init; + ret = mwifiex_send_cmd(priv, HostCmd_CMD_SET_BSS_MODE, + HostCmd_ACT_GEN_SET, 0, NULL, true); + if (ret) + goto err_set_bss_mode; + + ret = mwifiex_sta_init_cmd(priv, false, false); + if (ret) + goto err_sta_init; + } mwifiex_setup_ht_caps(&wiphy->bands[NL80211_BAND_2GHZ]->ht_cap, priv); if (adapter->is_hw_11ac_capable) -- cgit v1.2.3 From 85dafc12919659ec744e111b1714792cd3d0a9ca Mon Sep 17 00:00:00 2001 From: Himanshu Jha Date: Mon, 11 Sep 2017 18:16:04 +0530 Subject: mwifiex: remove unnecessary call to memset call to memset to assign 0 value immediately after allocating memory with kzalloc is unnecesaary as kzalloc allocates the memory filled with 0 value. Semantic patch used to resolve this issue: @@ expression e,e2; constant c; statement S; @@ e = kzalloc(e2, c); if(e == NULL) S - memset(e, 0, e2); Signed-off-by: Himanshu Jha Signed-off-by: Kalle Valo --- drivers/net/wireless/marvell/mwifiex/scan.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/net/wireless/marvell/mwifiex/scan.c b/drivers/net/wireless/marvell/mwifiex/scan.c index c9d41ed77fc7..8838b88cd998 100644 --- a/drivers/net/wireless/marvell/mwifiex/scan.c +++ b/drivers/net/wireless/marvell/mwifiex/scan.c @@ -1936,8 +1936,6 @@ mwifiex_active_scan_req_for_passive_chan(struct mwifiex_private *priv) if (!user_scan_cfg) return -ENOMEM; - memset(user_scan_cfg, 0, sizeof(*user_scan_cfg)); - for (id = 0; id < MWIFIEX_USER_SCAN_CHAN_MAX; id++) { if (!priv->hidden_chan[id].chan_number) break; -- cgit v1.2.3 From d157bcfaf8542612fd9ffc0cbbc1e52b85157640 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Sat, 16 Sep 2017 16:34:24 +0100 Subject: mwifiex: make const arrays static to shink object code size Don't populate const arrays on the stack, instead make them static Makes the object code smaller by nearly 300 bytes: Before: text data bss dec hex filename 69260 16149 576 85985 14fe1 cfg80211.o After: text data bss dec hex filename 68385 16725 576 85686 14eb6 cfg80211.o Signed-off-by: Colin Ian King Signed-off-by: Kalle Valo --- drivers/net/wireless/marvell/mwifiex/cfg80211.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/drivers/net/wireless/marvell/mwifiex/cfg80211.c b/drivers/net/wireless/marvell/mwifiex/cfg80211.c index ad1ebd8844d0..a3d142bdf946 100644 --- a/drivers/net/wireless/marvell/mwifiex/cfg80211.c +++ b/drivers/net/wireless/marvell/mwifiex/cfg80211.c @@ -142,7 +142,7 @@ mwifiex_cfg80211_del_key(struct wiphy *wiphy, struct net_device *netdev, u8 key_index, bool pairwise, const u8 *mac_addr) { struct mwifiex_private *priv = mwifiex_netdev_get_priv(netdev); - const u8 bc_mac[] = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff}; + static const u8 bc_mac[] = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff}; const u8 *peer_mac = pairwise ? mac_addr : bc_mac; if (mwifiex_set_encode(priv, NULL, NULL, 0, key_index, peer_mac, 1)) { @@ -454,7 +454,7 @@ mwifiex_cfg80211_add_key(struct wiphy *wiphy, struct net_device *netdev, { struct mwifiex_private *priv = mwifiex_netdev_get_priv(netdev); struct mwifiex_wep_key *wep_key; - const u8 bc_mac[] = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff}; + static const u8 bc_mac[] = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff}; const u8 *peer_mac = pairwise ? mac_addr : bc_mac; if (GET_BSS_ROLE(priv) == MWIFIEX_BSS_ROLE_UAP && @@ -3253,8 +3253,8 @@ static int mwifiex_set_wowlan_mef_entry(struct mwifiex_private *priv, int i, filt_num = 0, ret = 0; bool first_pat = true; u8 byte_seq[MWIFIEX_MEF_MAX_BYTESEQ + 1]; - const u8 ipv4_mc_mac[] = {0x33, 0x33}; - const u8 ipv6_mc_mac[] = {0x01, 0x00, 0x5e}; + static const u8 ipv4_mc_mac[] = {0x33, 0x33}; + static const u8 ipv6_mc_mac[] = {0x01, 0x00, 0x5e}; mef_entry->mode = MEF_MODE_HOST_SLEEP; mef_entry->action = MEF_ACTION_ALLOW_AND_WAKEUP_HOST; @@ -3547,9 +3547,9 @@ static int mwifiex_set_rekey_data(struct wiphy *wiphy, struct net_device *dev, static int mwifiex_get_coalesce_pkt_type(u8 *byte_seq) { - const u8 ipv4_mc_mac[] = {0x33, 0x33}; - const u8 ipv6_mc_mac[] = {0x01, 0x00, 0x5e}; - const u8 bc_mac[] = {0xff, 0xff, 0xff, 0xff}; + static const u8 ipv4_mc_mac[] = {0x33, 0x33}; + static const u8 ipv6_mc_mac[] = {0x01, 0x00, 0x5e}; + static const u8 bc_mac[] = {0xff, 0xff, 0xff, 0xff}; if ((byte_seq[0] & 0x01) && (byte_seq[MWIFIEX_COALESCE_MAX_BYTESEQ] == 1)) -- cgit v1.2.3 From e251a882c0bae39d3d31efe993e977104605a9b3 Mon Sep 17 00:00:00 2001 From: Ganapathi Bhat Date: Mon, 18 Sep 2017 12:25:02 +0530 Subject: mwifiex: avoid storing random_mac in private Application will keep track of whether MAC address randomization is enabled or not during scan. But at present driver is storing 'random_mac' in mwifiex_private which implies even after scan is done driver has some reference to the earlier 'scan request'. To avoid this, make use of 'mac_addr' variable in 'scan_request' to store 'random_mac'. This structure will be freed by cfg80211 once scan is done. Signed-off-by: Ganapathi Bhat Reviewed-by: Brian Norris Signed-off-by: Kalle Valo --- drivers/net/wireless/marvell/mwifiex/cfg80211.c | 11 ++++------- drivers/net/wireless/marvell/mwifiex/main.h | 1 - drivers/net/wireless/marvell/mwifiex/scan.c | 3 ++- 3 files changed, 6 insertions(+), 9 deletions(-) diff --git a/drivers/net/wireless/marvell/mwifiex/cfg80211.c b/drivers/net/wireless/marvell/mwifiex/cfg80211.c index a3d142bdf946..f28040c45d86 100644 --- a/drivers/net/wireless/marvell/mwifiex/cfg80211.c +++ b/drivers/net/wireless/marvell/mwifiex/cfg80211.c @@ -2529,15 +2529,12 @@ mwifiex_cfg80211_scan(struct wiphy *wiphy, priv->scan_request = request; if (request->flags & NL80211_SCAN_FLAG_RANDOM_ADDR) { - ether_addr_copy(priv->random_mac, request->mac_addr); for (i = 0; i < ETH_ALEN; i++) { - priv->random_mac[i] &= request->mac_addr_mask[i]; - priv->random_mac[i] |= get_random_int() & - ~(request->mac_addr_mask[i]); + request->mac_addr[i] &= request->mac_addr_mask[i]; + request->mac_addr[i] |= + get_random_int() & ~(request->mac_addr_mask[i]); } - ether_addr_copy(user_scan_cfg->random_mac, priv->random_mac); - } else { - eth_zero_addr(priv->random_mac); + ether_addr_copy(user_scan_cfg->random_mac, request->mac_addr); } user_scan_cfg->num_ssids = request->n_ssids; diff --git a/drivers/net/wireless/marvell/mwifiex/main.h b/drivers/net/wireless/marvell/mwifiex/main.h index a76bd797e454..a34de8582e91 100644 --- a/drivers/net/wireless/marvell/mwifiex/main.h +++ b/drivers/net/wireless/marvell/mwifiex/main.h @@ -680,7 +680,6 @@ struct mwifiex_private { struct mwifiex_user_scan_chan hidden_chan[MWIFIEX_USER_SCAN_CHAN_MAX]; u8 assoc_resp_ht_param; bool ht_param_present; - u8 random_mac[ETH_ALEN]; }; diff --git a/drivers/net/wireless/marvell/mwifiex/scan.c b/drivers/net/wireless/marvell/mwifiex/scan.c index 8838b88cd998..d7ce7f75ae38 100644 --- a/drivers/net/wireless/marvell/mwifiex/scan.c +++ b/drivers/net/wireless/marvell/mwifiex/scan.c @@ -1946,7 +1946,8 @@ mwifiex_active_scan_req_for_passive_chan(struct mwifiex_private *priv) adapter->active_scan_triggered = true; if (priv->scan_request->flags & NL80211_SCAN_FLAG_RANDOM_ADDR) - ether_addr_copy(user_scan_cfg->random_mac, priv->random_mac); + ether_addr_copy(user_scan_cfg->random_mac, + priv->scan_request->mac_addr); user_scan_cfg->num_ssids = priv->scan_request->n_ssids; user_scan_cfg->ssid_list = priv->scan_request->ssids; -- cgit v1.2.3 From e9a3846afaa4d2d44f33a6aa2dd919e481be599e Mon Sep 17 00:00:00 2001 From: Ganapathi Bhat Date: Mon, 18 Sep 2017 13:12:17 +0530 Subject: mwifiex: use get_random_mask_addr() helper Avoid calculating random MAC address in driver. Instead make use of 'get_random_mask_addr()' function. Signed-off-by: Ganapathi Bhat Reviewed-by: Brian Norris Signed-off-by: Kalle Valo --- drivers/net/wireless/marvell/mwifiex/cfg80211.c | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/drivers/net/wireless/marvell/mwifiex/cfg80211.c b/drivers/net/wireless/marvell/mwifiex/cfg80211.c index f28040c45d86..ac01af4d7bfb 100644 --- a/drivers/net/wireless/marvell/mwifiex/cfg80211.c +++ b/drivers/net/wireless/marvell/mwifiex/cfg80211.c @@ -2503,6 +2503,7 @@ mwifiex_cfg80211_scan(struct wiphy *wiphy, struct ieee80211_channel *chan; struct ieee_types_header *ie; struct mwifiex_user_scan_cfg *user_scan_cfg; + u8 mac_addr[ETH_ALEN]; mwifiex_dbg(priv->adapter, CMD, "info: received scan request on %s\n", dev->name); @@ -2529,12 +2530,10 @@ mwifiex_cfg80211_scan(struct wiphy *wiphy, priv->scan_request = request; if (request->flags & NL80211_SCAN_FLAG_RANDOM_ADDR) { - for (i = 0; i < ETH_ALEN; i++) { - request->mac_addr[i] &= request->mac_addr_mask[i]; - request->mac_addr[i] |= - get_random_int() & ~(request->mac_addr_mask[i]); - } - ether_addr_copy(user_scan_cfg->random_mac, request->mac_addr); + get_random_mask_addr(mac_addr, request->mac_addr, + request->mac_addr_mask); + ether_addr_copy(request->mac_addr, mac_addr); + ether_addr_copy(user_scan_cfg->random_mac, mac_addr); } user_scan_cfg->num_ssids = request->n_ssids; -- cgit v1.2.3 From d4e1b299ec2853dd3d90b71ae86fa2626e60dc25 Mon Sep 17 00:00:00 2001 From: Xiang Gao Date: Wed, 20 Sep 2017 12:18:17 -0400 Subject: ipv6: Use ipv6_authlen for len in ipv6_skip_exthdr In ipv6_skip_exthdr, the lengh of AH header is computed manually as (hp->hdrlen+2)<<2. However, in include/linux/ipv6.h, a macro named ipv6_authlen is already defined for exactly the same job. This commit replaces the manual computation code with the macro. Signed-off-by: Xiang Gao Signed-off-by: David S. Miller --- net/ipv6/exthdrs_core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv6/exthdrs_core.c b/net/ipv6/exthdrs_core.c index 305e2ed730bf..115d60919f72 100644 --- a/net/ipv6/exthdrs_core.c +++ b/net/ipv6/exthdrs_core.c @@ -99,7 +99,7 @@ int ipv6_skip_exthdr(const struct sk_buff *skb, int start, u8 *nexthdrp, break; hdrlen = 8; } else if (nexthdr == NEXTHDR_AUTH) - hdrlen = (hp->hdrlen+2)<<2; + hdrlen = ipv6_authlen(hp); else hdrlen = ipv6_optlen(hp); -- cgit v1.2.3 From 07850a4f74ea0433fe26dccb1ab9556c738a6584 Mon Sep 17 00:00:00 2001 From: Zhang Shengju Date: Wed, 20 Sep 2017 08:12:23 +0800 Subject: macvlan: code refine to check data before using This patch checks data first at one place, return if it's null. Signed-off-by: Zhang Shengju Signed-off-by: David S. Miller --- drivers/net/macvlan.c | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/drivers/net/macvlan.c b/drivers/net/macvlan.c index d2aea961e0f4..1ffe77e95d46 100644 --- a/drivers/net/macvlan.c +++ b/drivers/net/macvlan.c @@ -1231,11 +1231,14 @@ static int macvlan_validate(struct nlattr *tb[], struct nlattr *data[], return -EADDRNOTAVAIL; } - if (data && data[IFLA_MACVLAN_FLAGS] && + if (!data) + return 0; + + if (data[IFLA_MACVLAN_FLAGS] && nla_get_u16(data[IFLA_MACVLAN_FLAGS]) & ~MACVLAN_FLAG_NOPROMISC) return -EINVAL; - if (data && data[IFLA_MACVLAN_MODE]) { + if (data[IFLA_MACVLAN_MODE]) { switch (nla_get_u32(data[IFLA_MACVLAN_MODE])) { case MACVLAN_MODE_PRIVATE: case MACVLAN_MODE_VEPA: @@ -1248,7 +1251,7 @@ static int macvlan_validate(struct nlattr *tb[], struct nlattr *data[], } } - if (data && data[IFLA_MACVLAN_MACADDR_MODE]) { + if (data[IFLA_MACVLAN_MACADDR_MODE]) { switch (nla_get_u32(data[IFLA_MACVLAN_MACADDR_MODE])) { case MACVLAN_MACADDR_ADD: case MACVLAN_MACADDR_DEL: @@ -1260,7 +1263,7 @@ static int macvlan_validate(struct nlattr *tb[], struct nlattr *data[], } } - if (data && data[IFLA_MACVLAN_MACADDR]) { + if (data[IFLA_MACVLAN_MACADDR]) { if (nla_len(data[IFLA_MACVLAN_MACADDR]) != ETH_ALEN) return -EINVAL; @@ -1268,7 +1271,7 @@ static int macvlan_validate(struct nlattr *tb[], struct nlattr *data[], return -EADDRNOTAVAIL; } - if (data && data[IFLA_MACVLAN_MACADDR_COUNT]) + if (data[IFLA_MACVLAN_MACADDR_COUNT]) return -EINVAL; return 0; -- cgit v1.2.3 From 00ba4cb36da682c68dc87d1703a8aaffe2b4e9c5 Mon Sep 17 00:00:00 2001 From: Vincent Bernat Date: Sat, 16 Sep 2017 16:18:33 +0200 Subject: bridge: also trigger RTM_NEWLINK when interface is released from bridge Currently, when an interface is released from a bridge via ioctl(), we get a RTM_DELLINK event through netlink: Deleted 2: dummy0: mtu 1500 master bridge0 state UNKNOWN link/ether 6e:23:c2:54:3a:b3 Userspace has to interpret that as a removal from the bridge, not as a complete removal of the interface. When an bridged interface is completely removed, we get two events: Deleted 2: dummy0: mtu 1500 master bridge0 state DOWN link/ether 6e:23:c2:54:3a:b3 Deleted 2: dummy0: mtu 1500 qdisc noop state DOWN group default link/ether 6e:23:c2:54:3a:b3 brd ff:ff:ff:ff:ff:ff In constrast, when an interface is released from a bond, we get a RTM_NEWLINK with only the new characteristics (no master): 3: dummy1: mtu 1500 qdisc noqueue master bond0 state UNKNOWN group default link/ether ae:dc:7a:8c:9a:3c brd ff:ff:ff:ff:ff:ff 3: dummy1: mtu 1500 qdisc noqueue state UNKNOWN group default link/ether ae:dc:7a:8c:9a:3c brd ff:ff:ff:ff:ff:ff 4: bond0: mtu 1500 qdisc noqueue state UP group default link/ether ae:dc:7a:8c:9a:3c brd ff:ff:ff:ff:ff:ff 3: dummy1: mtu 1500 qdisc noqueue state DOWN group default link/ether ae:dc:7a:8c:9a:3c brd ff:ff:ff:ff:ff:ff 3: dummy1: mtu 1500 qdisc noqueue state DOWN group default link/ether ca:c8:7b:66:f8:25 brd ff:ff:ff:ff:ff:ff 4: bond0: mtu 1500 qdisc noqueue state UP group default link/ether ae:dc:7a:8c:9a:3c brd ff:ff:ff:ff:ff:ff Userland may be confused by the fact we say a link is deleted while its characteristics are only modified. A first solution would have been to turn the RTM_DELLINK event in del_nbp() into a RTM_NEWLINK event. However, maybe some piece of userland is relying on this RTM_DELLINK to detect when a bridged interface is released. Instead, we also emit a RTM_NEWLINK event once the interface is released (without master info). Deleted 2: dummy0: mtu 1500 master bridge0 state UNKNOWN link/ether 8a:bb:e7:94:b1:f8 2: dummy0: mtu 1500 qdisc noqueue state UNKNOWN group default link/ether 8a:bb:e7:94:b1:f8 brd ff:ff:ff:ff:ff:ff This is done only when using ioctl(). When using Netlink, such an event is already automatically emitted in do_setlink(). Signed-off-by: Vincent Bernat Signed-off-by: David S. Miller --- net/bridge/br_ioctl.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/net/bridge/br_ioctl.c b/net/bridge/br_ioctl.c index 7970f8540cbb..3148cb3a8e82 100644 --- a/net/bridge/br_ioctl.c +++ b/net/bridge/br_ioctl.c @@ -99,8 +99,10 @@ static int add_del_if(struct net_bridge *br, int ifindex, int isadd) if (isadd) ret = br_add_if(br, dev); - else + else { ret = br_del_if(br, dev); + rtmsg_ifinfo(RTM_NEWLINK, dev, IFF_MASTER, GFP_KERNEL); + } return ret; } -- cgit v1.2.3 From 4d6a78b477dd9686e7034aa140057ce7a5da5fd3 Mon Sep 17 00:00:00 2001 From: Egil Hjelmeland Date: Tue, 19 Sep 2017 10:09:24 +0200 Subject: net: dsa: lan9303: Add adjust_link() method Make the driver react to device tree "fixed-link" declaration on CPU port. - turn off autonegotiation - force speed 10 or 100 mb/s - force duplex mode Signed-off-by: Egil Hjelmeland Signed-off-by: David S. Miller --- drivers/net/dsa/lan9303-core.c | 40 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 40 insertions(+) diff --git a/drivers/net/dsa/lan9303-core.c b/drivers/net/dsa/lan9303-core.c index b471413d3df9..07355db2ad81 100644 --- a/drivers/net/dsa/lan9303-core.c +++ b/drivers/net/dsa/lan9303-core.c @@ -17,6 +17,7 @@ #include #include #include +#include #include "lan9303.h" @@ -57,6 +58,7 @@ #define LAN9303_SWITCH_CSR_CMD_LANES (BIT(19) | BIT(18) | BIT(17) | BIT(16)) #define LAN9303_VIRT_PHY_BASE 0x70 #define LAN9303_VIRT_SPECIAL_CTRL 0x77 +#define LAN9303_VIRT_SPECIAL_TURBO BIT(10) /*Turbo MII Enable*/ /*13.4 Switch Fabric Control and Status Registers * Accessed indirectly via SWITCH_CSR_CMD, SWITCH_CSR_DATA. @@ -760,6 +762,43 @@ static int lan9303_phy_write(struct dsa_switch *ds, int phy, int regnum, return chip->ops->phy_write(chip, phy, regnum, val); } +static void lan9303_adjust_link(struct dsa_switch *ds, int port, + struct phy_device *phydev) +{ + struct lan9303 *chip = ds->priv; + int ctl, res; + + if (!phy_is_pseudo_fixed_link(phydev)) + return; + + ctl = lan9303_phy_read(ds, port, MII_BMCR); + + ctl &= ~BMCR_ANENABLE; + + if (phydev->speed == SPEED_100) + ctl |= BMCR_SPEED100; + else if (phydev->speed == SPEED_10) + ctl &= ~BMCR_SPEED100; + else + dev_err(ds->dev, "unsupported speed: %d\n", phydev->speed); + + if (phydev->duplex == DUPLEX_FULL) + ctl |= BMCR_FULLDPLX; + else + ctl &= ~BMCR_FULLDPLX; + + res = lan9303_phy_write(ds, port, MII_BMCR, ctl); + + if (port == chip->phy_addr_sel_strap) { + /* Virtual Phy: Remove Turbo 200Mbit mode */ + lan9303_read(chip->regmap, LAN9303_VIRT_SPECIAL_CTRL, &ctl); + + ctl &= ~LAN9303_VIRT_SPECIAL_TURBO; + res = regmap_write(chip->regmap, + LAN9303_VIRT_SPECIAL_CTRL, ctl); + } +} + static int lan9303_port_enable(struct dsa_switch *ds, int port, struct phy_device *phy) { @@ -803,6 +842,7 @@ static const struct dsa_switch_ops lan9303_switch_ops = { .get_strings = lan9303_get_strings, .phy_read = lan9303_phy_read, .phy_write = lan9303_phy_write, + .adjust_link = lan9303_adjust_link, .get_ethtool_stats = lan9303_get_ethtool_stats, .get_sset_count = lan9303_get_sset_count, .port_enable = lan9303_port_enable, -- cgit v1.2.3 From 9457642a405fd68d5ce90f5c432e462277fb666e Mon Sep 17 00:00:00 2001 From: Jason Wang Date: Tue, 19 Sep 2017 17:42:41 +0800 Subject: virtio-net: remove unnecessary parameter of virtnet_xdp_xmit() CC: John Fastabend Signed-off-by: Jason Wang Signed-off-by: David S. Miller --- drivers/net/virtio_net.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index 511f8339fa96..a0ef4b08f0e9 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -373,7 +373,6 @@ static struct sk_buff *page_to_skb(struct virtnet_info *vi, } static bool virtnet_xdp_xmit(struct virtnet_info *vi, - struct receive_queue *rq, struct xdp_buff *xdp) { struct virtio_net_hdr_mrg_rxbuf *hdr; @@ -542,7 +541,7 @@ static struct sk_buff *receive_small(struct net_device *dev, delta = orig_data - xdp.data; break; case XDP_TX: - if (unlikely(!virtnet_xdp_xmit(vi, rq, &xdp))) + if (unlikely(!virtnet_xdp_xmit(vi, &xdp))) trace_xdp_exception(vi->dev, xdp_prog, act); rcu_read_unlock(); goto xdp_xmit; @@ -677,7 +676,7 @@ static struct sk_buff *receive_mergeable(struct net_device *dev, } break; case XDP_TX: - if (unlikely(!virtnet_xdp_xmit(vi, rq, &xdp))) + if (unlikely(!virtnet_xdp_xmit(vi, &xdp))) trace_xdp_exception(vi->dev, xdp_prog, act); ewma_pkt_len_add(&rq->mrg_avg_pkt_len, len); if (unlikely(xdp_page != page)) -- cgit v1.2.3 From 312403453532b209879aa7faeff296bd3dea78af Mon Sep 17 00:00:00 2001 From: Jason Wang Date: Tue, 19 Sep 2017 17:42:42 +0800 Subject: virtio-net: add packet len average only when needed during XDP There's no need to add packet len average in the case of XDP_PASS since it will be done soon after skb is created. Cc: John Fastabend Signed-off-by: Jason Wang Signed-off-by: David S. Miller --- drivers/net/virtio_net.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index a0ef4b08f0e9..db5924c085aa 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -656,6 +656,9 @@ static struct sk_buff *receive_mergeable(struct net_device *dev, xdp.data_end = xdp.data + (len - vi->hdr_len); act = bpf_prog_run_xdp(xdp_prog, &xdp); + if (act != XDP_PASS) + ewma_pkt_len_add(&rq->mrg_avg_pkt_len, len); + switch (act) { case XDP_PASS: /* recalculate offset to account for any header @@ -671,14 +674,12 @@ static struct sk_buff *receive_mergeable(struct net_device *dev, put_page(page); head_skb = page_to_skb(vi, rq, xdp_page, offset, len, PAGE_SIZE); - ewma_pkt_len_add(&rq->mrg_avg_pkt_len, len); return head_skb; } break; case XDP_TX: if (unlikely(!virtnet_xdp_xmit(vi, &xdp))) trace_xdp_exception(vi->dev, xdp_prog, act); - ewma_pkt_len_add(&rq->mrg_avg_pkt_len, len); if (unlikely(xdp_page != page)) goto err_xdp; rcu_read_unlock(); @@ -690,7 +691,6 @@ static struct sk_buff *receive_mergeable(struct net_device *dev, case XDP_DROP: if (unlikely(xdp_page != page)) __free_pages(xdp_page, 0); - ewma_pkt_len_add(&rq->mrg_avg_pkt_len, len); goto err_xdp; } } -- cgit v1.2.3 From 186b3c998c50fc241b51b905081c748455d16b4a Mon Sep 17 00:00:00 2001 From: Jason Wang Date: Tue, 19 Sep 2017 17:42:43 +0800 Subject: virtio-net: support XDP_REDIRECT This patch tries to add XDP_REDIRECT for virtio-net. The changes are not complex as we could use exist XDP_TX helpers for most of the work. The rest is passing the XDP_TX to NAPI handler for implementing batching. Cc: John Fastabend Signed-off-by: Jason Wang Signed-off-by: David S. Miller --- drivers/net/virtio_net. | 0 drivers/net/virtio_net.c | 77 ++++++++++++++++++++++++++++++++++++++---------- 2 files changed, 62 insertions(+), 15 deletions(-) create mode 100644 drivers/net/virtio_net. diff --git a/drivers/net/virtio_net. b/drivers/net/virtio_net. new file mode 100644 index 000000000000..e69de29bb2d1 diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index db5924c085aa..f6c1f135a024 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -29,6 +29,7 @@ #include #include #include +#include #include static int napi_weight = NAPI_POLL_WEIGHT; @@ -372,8 +373,20 @@ static struct sk_buff *page_to_skb(struct virtnet_info *vi, return skb; } -static bool virtnet_xdp_xmit(struct virtnet_info *vi, - struct xdp_buff *xdp) +static void virtnet_xdp_flush(struct net_device *dev) +{ + struct virtnet_info *vi = netdev_priv(dev); + struct send_queue *sq; + unsigned int qp; + + qp = vi->curr_queue_pairs - vi->xdp_queue_pairs + smp_processor_id(); + sq = &vi->sq[qp]; + + virtqueue_kick(sq->vq); +} + +static bool __virtnet_xdp_xmit(struct virtnet_info *vi, + struct xdp_buff *xdp) { struct virtio_net_hdr_mrg_rxbuf *hdr; unsigned int len; @@ -407,10 +420,19 @@ static bool virtnet_xdp_xmit(struct virtnet_info *vi, return false; } - virtqueue_kick(sq->vq); return true; } +static int virtnet_xdp_xmit(struct net_device *dev, struct xdp_buff *xdp) +{ + struct virtnet_info *vi = netdev_priv(dev); + bool sent = __virtnet_xdp_xmit(vi, xdp); + + if (!sent) + return -ENOSPC; + return 0; +} + static unsigned int virtnet_get_headroom(struct virtnet_info *vi) { return vi->xdp_queue_pairs ? VIRTIO_XDP_HEADROOM : 0; @@ -483,7 +505,8 @@ static struct sk_buff *receive_small(struct net_device *dev, struct virtnet_info *vi, struct receive_queue *rq, void *buf, void *ctx, - unsigned int len) + unsigned int len, + bool *xdp_xmit) { struct sk_buff *skb; struct bpf_prog *xdp_prog; @@ -493,7 +516,7 @@ static struct sk_buff *receive_small(struct net_device *dev, unsigned int buflen = SKB_DATA_ALIGN(GOOD_PACKET_LEN + headroom) + SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); struct page *page = virt_to_head_page(buf); - unsigned int delta = 0; + unsigned int delta = 0, err; struct page *xdp_page; len -= vi->hdr_len; @@ -541,8 +564,16 @@ static struct sk_buff *receive_small(struct net_device *dev, delta = orig_data - xdp.data; break; case XDP_TX: - if (unlikely(!virtnet_xdp_xmit(vi, &xdp))) + if (unlikely(!__virtnet_xdp_xmit(vi, &xdp))) trace_xdp_exception(vi->dev, xdp_prog, act); + else + *xdp_xmit = true; + rcu_read_unlock(); + goto xdp_xmit; + case XDP_REDIRECT: + err = xdp_do_redirect(dev, &xdp, xdp_prog); + if (!err) + *xdp_xmit = true; rcu_read_unlock(); goto xdp_xmit; default: @@ -603,7 +634,8 @@ static struct sk_buff *receive_mergeable(struct net_device *dev, struct receive_queue *rq, void *buf, void *ctx, - unsigned int len) + unsigned int len, + bool *xdp_xmit) { struct virtio_net_hdr_mrg_rxbuf *hdr = buf; u16 num_buf = virtio16_to_cpu(vi->vdev, hdr->num_buffers); @@ -613,6 +645,7 @@ static struct sk_buff *receive_mergeable(struct net_device *dev, struct bpf_prog *xdp_prog; unsigned int truesize; unsigned int headroom = mergeable_ctx_to_headroom(ctx); + int err; head_skb = NULL; @@ -678,12 +711,20 @@ static struct sk_buff *receive_mergeable(struct net_device *dev, } break; case XDP_TX: - if (unlikely(!virtnet_xdp_xmit(vi, &xdp))) + if (unlikely(!__virtnet_xdp_xmit(vi, &xdp))) trace_xdp_exception(vi->dev, xdp_prog, act); + else + *xdp_xmit = true; if (unlikely(xdp_page != page)) goto err_xdp; rcu_read_unlock(); goto xdp_xmit; + case XDP_REDIRECT: + err = xdp_do_redirect(dev, &xdp, xdp_prog); + if (err) + *xdp_xmit = true; + rcu_read_unlock(); + goto xdp_xmit; default: bpf_warn_invalid_xdp_action(act); case XDP_ABORTED: @@ -788,7 +829,7 @@ xdp_xmit: } static int receive_buf(struct virtnet_info *vi, struct receive_queue *rq, - void *buf, unsigned int len, void **ctx) + void *buf, unsigned int len, void **ctx, bool *xdp_xmit) { struct net_device *dev = vi->dev; struct sk_buff *skb; @@ -809,11 +850,11 @@ static int receive_buf(struct virtnet_info *vi, struct receive_queue *rq, } if (vi->mergeable_rx_bufs) - skb = receive_mergeable(dev, vi, rq, buf, ctx, len); + skb = receive_mergeable(dev, vi, rq, buf, ctx, len, xdp_xmit); else if (vi->big_packets) skb = receive_big(dev, vi, rq, buf, len); else - skb = receive_small(dev, vi, rq, buf, ctx, len); + skb = receive_small(dev, vi, rq, buf, ctx, len, xdp_xmit); if (unlikely(!skb)) return 0; @@ -1071,7 +1112,7 @@ static void refill_work(struct work_struct *work) } } -static int virtnet_receive(struct receive_queue *rq, int budget) +static int virtnet_receive(struct receive_queue *rq, int budget, bool *xdp_xmit) { struct virtnet_info *vi = rq->vq->vdev->priv; unsigned int len, received = 0, bytes = 0; @@ -1083,13 +1124,13 @@ static int virtnet_receive(struct receive_queue *rq, int budget) while (received < budget && (buf = virtqueue_get_buf_ctx(rq->vq, &len, &ctx))) { - bytes += receive_buf(vi, rq, buf, len, ctx); + bytes += receive_buf(vi, rq, buf, len, ctx, xdp_xmit); received++; } } else { while (received < budget && (buf = virtqueue_get_buf(rq->vq, &len)) != NULL) { - bytes += receive_buf(vi, rq, buf, len, NULL); + bytes += receive_buf(vi, rq, buf, len, NULL, xdp_xmit); received++; } } @@ -1161,15 +1202,19 @@ static int virtnet_poll(struct napi_struct *napi, int budget) struct receive_queue *rq = container_of(napi, struct receive_queue, napi); unsigned int received; + bool xdp_xmit = false; virtnet_poll_cleantx(rq); - received = virtnet_receive(rq, budget); + received = virtnet_receive(rq, budget, &xdp_xmit); /* Out of packets? */ if (received < budget) virtqueue_napi_complete(napi, rq->vq, received); + if (xdp_xmit) + xdp_do_flush_map(); + return received; } @@ -2069,6 +2114,8 @@ static const struct net_device_ops virtnet_netdev = { .ndo_poll_controller = virtnet_netpoll, #endif .ndo_xdp = virtnet_xdp, + .ndo_xdp_xmit = virtnet_xdp_xmit, + .ndo_xdp_flush = virtnet_xdp_flush, .ndo_features_check = passthru_features_check, }; -- cgit v1.2.3 From 0d4a6608f68c7532dcbfec2ea1150c9761767d03 Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Tue, 19 Sep 2017 12:11:43 +0200 Subject: udp: do rmem bulk free even if the rx sk queue is empty The commit 6b229cf77d68 ("udp: add batching to udp_rmem_release()") reduced greatly the cacheline contention between the BH and the US reader batching the rmem updates in most scenarios. Such optimization is explicitly avoided if the US reader is faster then BH processing. My fault, I initially suggested this kind of behavior due to concerns of possible regressions with small sk_rcvbuf values. Tests showed such concerns are misplaced, so this commit relaxes the condition for rmem bulk updates, obtaining small but measurable performance gain in the scenario described above. Signed-off-by: Paolo Abeni Signed-off-by: David S. Miller --- net/ipv4/udp.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index ef29df8648e4..784ced0b9150 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -1212,8 +1212,7 @@ static void udp_rmem_release(struct sock *sk, int size, int partial, if (likely(partial)) { up->forward_deficit += size; size = up->forward_deficit; - if (size < (sk->sk_rcvbuf >> 2) && - !skb_queue_empty(&up->reader_queue)) + if (size < (sk->sk_rcvbuf >> 2)) return; } else { size += up->forward_deficit; -- cgit v1.2.3 From eccaa9e51b77e504fa8e8357e4979bde724e22a8 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Wed, 20 Sep 2017 15:39:59 -0700 Subject: Revert "bridge: also trigger RTM_NEWLINK when interface is released from bridge" This reverts commit 00ba4cb36da682c68dc87d1703a8aaffe2b4e9c5. Discussion with David Ahern determined that this change is actually not needed. Signed-off-by: David S. Miller --- net/bridge/br_ioctl.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/net/bridge/br_ioctl.c b/net/bridge/br_ioctl.c index 3148cb3a8e82..7970f8540cbb 100644 --- a/net/bridge/br_ioctl.c +++ b/net/bridge/br_ioctl.c @@ -99,10 +99,8 @@ static int add_del_if(struct net_bridge *br, int ifindex, int isadd) if (isadd) ret = br_add_if(br, dev); - else { + else ret = br_del_if(br, dev); - rtmsg_ifinfo(RTM_NEWLINK, dev, IFF_MASTER, GFP_KERNEL); - } return ret; } -- cgit v1.2.3 From 53bade8a3372eea0f1276bc96892735c517330fe Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Tue, 19 Sep 2017 18:00:37 -0700 Subject: net: dsa: Utilize dsa_slave_dev_check() Instead of open coding the check. Signed-off-by: Florian Fainelli Reviewed-by: Vivien Didelot Signed-off-by: David S. Miller --- net/dsa/slave.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/dsa/slave.c b/net/dsa/slave.c index d51b10450e1b..6fc9eb094267 100644 --- a/net/dsa/slave.c +++ b/net/dsa/slave.c @@ -1294,7 +1294,7 @@ static int dsa_slave_netdevice_event(struct notifier_block *nb, { struct net_device *dev = netdev_notifier_info_to_dev(ptr); - if (dev->netdev_ops != &dsa_slave_netdev_ops) + if (!dsa_slave_dev_check(dev)) return NOTIFY_DONE; if (event == NETDEV_CHANGEUPPER) -- cgit v1.2.3 From 5c1adf606d6ff519b5c1b9b22b8055f161dd98dd Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Tue, 19 Sep 2017 18:03:45 -0700 Subject: blackfin: tcm-bf518: Remove dsa.h inclusion Nothing in that file uses definitions from that header, so just get rid of it. Signed-off-by: Florian Fainelli Signed-off-by: David S. Miller --- arch/blackfin/mach-bf518/boards/tcm-bf518.c | 1 - 1 file changed, 1 deletion(-) diff --git a/arch/blackfin/mach-bf518/boards/tcm-bf518.c b/arch/blackfin/mach-bf518/boards/tcm-bf518.c index 240d5cb1f02c..37d868085f6a 100644 --- a/arch/blackfin/mach-bf518/boards/tcm-bf518.c +++ b/arch/blackfin/mach-bf518/boards/tcm-bf518.c @@ -25,7 +25,6 @@ #include #include #include -#include /* * Name the Board for the /proc/cpuinfo -- cgit v1.2.3 From 637ae0f44580040702b53e8f0593a5a5fb00473e Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Tue, 19 Sep 2017 18:03:46 -0700 Subject: blackfin: ezbrd: Remove non-functional DSA/KSZ8893M code There is no in tree driver for the KSZ8893M switch driver, so just get rid of the code in that board file. Signed-off-by: Florian Fainelli Signed-off-by: David S. Miller --- arch/blackfin/mach-bf518/boards/ezbrd.c | 47 --------------------------------- 1 file changed, 47 deletions(-) diff --git a/arch/blackfin/mach-bf518/boards/ezbrd.c b/arch/blackfin/mach-bf518/boards/ezbrd.c index d022112927c2..c51d1b810ac3 100644 --- a/arch/blackfin/mach-bf518/boards/ezbrd.c +++ b/arch/blackfin/mach-bf518/boards/ezbrd.c @@ -25,7 +25,6 @@ #include #include #include -#include /* * Name the Board for the /proc/cpuinfo @@ -105,11 +104,7 @@ static const unsigned short bfin_mac_peripherals[] = { static struct bfin_phydev_platform_data bfin_phydev_data[] = { { -#if IS_ENABLED(CONFIG_NET_DSA_KSZ8893M) - .addr = 3, -#else .addr = 1, -#endif .irq = IRQ_MAC_PHYINT, }, }; @@ -119,9 +114,6 @@ static struct bfin_mii_bus_platform_data bfin_mii_bus_data = { .phydev_data = bfin_phydev_data, .phy_mode = PHY_INTERFACE_MODE_MII, .mac_peripherals = bfin_mac_peripherals, -#if IS_ENABLED(CONFIG_NET_DSA_KSZ8893M) - .phy_mask = 0xfff7, /* Only probe the port phy connect to the on chip MAC */ -#endif .vlan1_mask = 1, .vlan2_mask = 2, }; @@ -140,29 +132,6 @@ static struct platform_device bfin_mac_device = { } }; -#if IS_ENABLED(CONFIG_NET_DSA_KSZ8893M) -static struct dsa_chip_data ksz8893m_switch_chip_data = { - .mii_bus = &bfin_mii_bus.dev, - .port_names = { - NULL, - "eth%d", - "eth%d", - "cpu", - }, -}; -static struct dsa_platform_data ksz8893m_switch_data = { - .nr_chips = 1, - .netdev = &bfin_mac_device.dev, - .chip = &ksz8893m_switch_chip_data, -}; - -static struct platform_device ksz8893m_switch_device = { - .name = "dsa", - .id = 0, - .num_resources = 0, - .dev.platform_data = &ksz8893m_switch_data, -}; -#endif #endif #if IS_ENABLED(CONFIG_MTD_M25P80) @@ -228,19 +197,6 @@ static struct spi_board_info bfin_spi_board_info[] __initdata = { }, #endif -#if IS_ENABLED(CONFIG_BFIN_MAC) -#if IS_ENABLED(CONFIG_NET_DSA_KSZ8893M) - { - .modalias = "ksz8893m", - .max_speed_hz = 5000000, - .bus_num = 0, - .chip_select = 1, - .platform_data = NULL, - .mode = SPI_MODE_3, - }, -#endif -#endif - #if IS_ENABLED(CONFIG_MMC_SPI) { .modalias = "mmc_spi", @@ -714,9 +670,6 @@ static struct platform_device *stamp_devices[] __initdata = { #if IS_ENABLED(CONFIG_BFIN_MAC) &bfin_mii_bus, &bfin_mac_device, -#if IS_ENABLED(CONFIG_NET_DSA_KSZ8893M) - &ksz8893m_switch_device, -#endif #endif #if IS_ENABLED(CONFIG_SPI_BFIN5XX) -- cgit v1.2.3 From 34929cb4d691f7f9e217ba0e3f536978cd56aa6c Mon Sep 17 00:00:00 2001 From: Ganesh Goudar Date: Wed, 20 Sep 2017 11:32:07 +0530 Subject: cxgb4: add new T5 pci device id's Add 0x50a5, 0x50a6, 0x50a7, 0x50a8 and 0x50a9 T5 device id's. Signed-off-by: Ganesh Goudar Signed-off-by: David S. Miller --- drivers/net/ethernet/chelsio/cxgb4/t4_pci_id_tbl.h | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4_pci_id_tbl.h b/drivers/net/ethernet/chelsio/cxgb4/t4_pci_id_tbl.h index aa28299aef5f..37d90d63e4a3 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/t4_pci_id_tbl.h +++ b/drivers/net/ethernet/chelsio/cxgb4/t4_pci_id_tbl.h @@ -176,6 +176,11 @@ CH_PCI_DEVICE_ID_TABLE_DEFINE_BEGIN CH_PCI_ID_TABLE_FENTRY(0x50a2), /* Custom T540-KR4 */ CH_PCI_ID_TABLE_FENTRY(0x50a3), /* Custom T580-KR4 */ CH_PCI_ID_TABLE_FENTRY(0x50a4), /* Custom 2x T540-CR */ + CH_PCI_ID_TABLE_FENTRY(0x50a5), /* Custom T522-BT */ + CH_PCI_ID_TABLE_FENTRY(0x50a6), /* Custom T522-BT-SO */ + CH_PCI_ID_TABLE_FENTRY(0x50a7), /* Custom T580-CR */ + CH_PCI_ID_TABLE_FENTRY(0x50a8), /* Custom T580-KR */ + CH_PCI_ID_TABLE_FENTRY(0x50a9), /* Custom T580-KR */ /* T6 adapters: */ -- cgit v1.2.3 From dff37b58ca53b1978e02edf6f8c1dd681799342b Mon Sep 17 00:00:00 2001 From: Nogah Frankel Date: Wed, 20 Sep 2017 16:15:01 +0200 Subject: mlxsw: spectrum_switchdev: Change mc_router to mrouter Change the naming of mc_router to mrouter to keep consistency. Signed-off-by: Nogah Frankel Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- .../net/ethernet/mellanox/mlxsw/spectrum_switchdev.c | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c index d39ffbfcc436..22f8d7428d96 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c @@ -699,10 +699,10 @@ static int mlxsw_sp_port_attr_br_vlan_set(struct mlxsw_sp_port *mlxsw_sp_port, return -EINVAL; } -static int mlxsw_sp_port_attr_mc_router_set(struct mlxsw_sp_port *mlxsw_sp_port, - struct switchdev_trans *trans, - struct net_device *orig_dev, - bool is_port_mc_router) +static int mlxsw_sp_port_attr_mrouter_set(struct mlxsw_sp_port *mlxsw_sp_port, + struct switchdev_trans *trans, + struct net_device *orig_dev, + bool is_port_mrouter) { struct mlxsw_sp_bridge_port *bridge_port; int err; @@ -720,12 +720,12 @@ static int mlxsw_sp_port_attr_mc_router_set(struct mlxsw_sp_port *mlxsw_sp_port, err = mlxsw_sp_bridge_port_flood_table_set(mlxsw_sp_port, bridge_port, MLXSW_SP_FLOOD_TYPE_MC, - is_port_mc_router); + is_port_mrouter); if (err) return err; out: - bridge_port->mrouter = is_port_mc_router; + bridge_port->mrouter = is_port_mrouter; return 0; } @@ -793,9 +793,9 @@ static int mlxsw_sp_port_attr_set(struct net_device *dev, attr->u.vlan_filtering); break; case SWITCHDEV_ATTR_ID_PORT_MROUTER: - err = mlxsw_sp_port_attr_mc_router_set(mlxsw_sp_port, trans, - attr->orig_dev, - attr->u.mrouter); + err = mlxsw_sp_port_attr_mrouter_set(mlxsw_sp_port, trans, + attr->orig_dev, + attr->u.mrouter); break; case SWITCHDEV_ATTR_ID_BRIDGE_MC_DISABLED: err = mlxsw_sp_port_mc_disabled_set(mlxsw_sp_port, trans, -- cgit v1.2.3 From 4cdc35e4ebf2e6b1cf4fe028eb9e711723f9199a Mon Sep 17 00:00:00 2001 From: Nogah Frankel Date: Wed, 20 Sep 2017 16:15:02 +0200 Subject: mlxsw: spectrum_switchdev: Add a ports bitmap to the mid db Add a bitmap of ports to the mid struct to hold the ports that are registered to this mid. Signed-off-by: Nogah Frankel Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlxsw/spectrum.h | 1 + .../net/ethernet/mellanox/mlxsw/spectrum_switchdev.c | 20 +++++++++++++++++--- 2 files changed, 18 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h index 7180d8f3de75..0424bee88407 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h @@ -95,6 +95,7 @@ struct mlxsw_sp_mid { u16 fid; u16 mid; unsigned int ref_count; + unsigned long *ports_in_mid; /* bits array */ }; enum mlxsw_sp_span_type { diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c index 22f8d7428d96..0fde16a22b72 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c @@ -1239,6 +1239,7 @@ static struct mlxsw_sp_mid *__mlxsw_sp_mc_alloc(struct mlxsw_sp *mlxsw_sp, u16 fid) { struct mlxsw_sp_mid *mid; + size_t alloc_size; u16 mid_idx; mid_idx = find_first_zero_bit(mlxsw_sp->bridge->mids_bitmap, @@ -1250,6 +1251,14 @@ static struct mlxsw_sp_mid *__mlxsw_sp_mc_alloc(struct mlxsw_sp *mlxsw_sp, if (!mid) return NULL; + alloc_size = sizeof(unsigned long) * + BITS_TO_LONGS(mlxsw_core_max_ports(mlxsw_sp->core)); + mid->ports_in_mid = kzalloc(alloc_size, GFP_KERNEL); + if (!mid->ports_in_mid) { + kfree(mid); + return NULL; + } + set_bit(mid_idx, mlxsw_sp->bridge->mids_bitmap); ether_addr_copy(mid->addr, addr); mid->fid = fid; @@ -1260,12 +1269,16 @@ static struct mlxsw_sp_mid *__mlxsw_sp_mc_alloc(struct mlxsw_sp *mlxsw_sp, return mid; } -static int __mlxsw_sp_mc_dec_ref(struct mlxsw_sp *mlxsw_sp, +static int __mlxsw_sp_mc_dec_ref(struct mlxsw_sp_port *mlxsw_sp_port, struct mlxsw_sp_mid *mid) { + struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; + + clear_bit(mlxsw_sp_port->local_port, mid->ports_in_mid); if (--mid->ref_count == 0) { list_del(&mid->list); clear_bit(mid->mid, mlxsw_sp->bridge->mids_bitmap); + kfree(mid->ports_in_mid); kfree(mid); return 1; } @@ -1311,6 +1324,7 @@ static int mlxsw_sp_port_mdb_add(struct mlxsw_sp_port *mlxsw_sp_port, } } mid->ref_count++; + set_bit(mlxsw_sp_port->local_port, mid->ports_in_mid); err = mlxsw_sp_port_smid_set(mlxsw_sp_port, mid->mid, true, mid->ref_count == 1); @@ -1331,7 +1345,7 @@ static int mlxsw_sp_port_mdb_add(struct mlxsw_sp_port *mlxsw_sp_port, return 0; err_out: - __mlxsw_sp_mc_dec_ref(mlxsw_sp, mid); + __mlxsw_sp_mc_dec_ref(mlxsw_sp_port, mid); return err; } @@ -1437,7 +1451,7 @@ static int mlxsw_sp_port_mdb_del(struct mlxsw_sp_port *mlxsw_sp_port, netdev_err(dev, "Unable to remove port from SMID\n"); mid_idx = mid->mid; - if (__mlxsw_sp_mc_dec_ref(mlxsw_sp, mid)) { + if (__mlxsw_sp_mc_dec_ref(mlxsw_sp_port, mid)) { err = mlxsw_sp_port_mdb_op(mlxsw_sp, mdb->addr, fid_index, mid_idx, false); if (err) -- cgit v1.2.3 From 0161b9505ab59d4bfc0de66073c9240d1b05040d Mon Sep 17 00:00:00 2001 From: Nogah Frankel Date: Wed, 20 Sep 2017 16:15:03 +0200 Subject: mlxsw: spectrum_switchdev: Remove reference count from mid Since there is a bitmap for the ports registered to each mid, there is no need for a ref count, since it will always be the number of set bits in this bitmap. Any check of the ref count was replaced with checking if the bitmap is empty. Signed-off-by: Nogah Frankel Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlxsw/spectrum.h | 1 - .../net/ethernet/mellanox/mlxsw/spectrum_switchdev.c | 20 ++++++++++---------- 2 files changed, 10 insertions(+), 11 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h index 0424bee88407..6fd0afe4b7a3 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h @@ -94,7 +94,6 @@ struct mlxsw_sp_mid { unsigned char addr[ETH_ALEN]; u16 fid; u16 mid; - unsigned int ref_count; unsigned long *ports_in_mid; /* bits array */ }; diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c index 0fde16a22b72..cb2275ed42ab 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c @@ -1263,19 +1263,19 @@ static struct mlxsw_sp_mid *__mlxsw_sp_mc_alloc(struct mlxsw_sp *mlxsw_sp, ether_addr_copy(mid->addr, addr); mid->fid = fid; mid->mid = mid_idx; - mid->ref_count = 0; list_add_tail(&mid->list, &mlxsw_sp->bridge->mids_list); return mid; } -static int __mlxsw_sp_mc_dec_ref(struct mlxsw_sp_port *mlxsw_sp_port, - struct mlxsw_sp_mid *mid) +static int mlxsw_sp_port_remove_from_mid(struct mlxsw_sp_port *mlxsw_sp_port, + struct mlxsw_sp_mid *mid) { struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; clear_bit(mlxsw_sp_port->local_port, mid->ports_in_mid); - if (--mid->ref_count == 0) { + if (bitmap_empty(mid->ports_in_mid, + mlxsw_core_max_ports(mlxsw_sp->core))) { list_del(&mid->list); clear_bit(mid->mid, mlxsw_sp->bridge->mids_bitmap); kfree(mid->ports_in_mid); @@ -1296,6 +1296,7 @@ static int mlxsw_sp_port_mdb_add(struct mlxsw_sp_port *mlxsw_sp_port, struct mlxsw_sp_bridge_device *bridge_device; struct mlxsw_sp_bridge_port *bridge_port; struct mlxsw_sp_mid *mid; + bool is_new_mid = false; u16 fid_index; int err = 0; @@ -1322,18 +1323,17 @@ static int mlxsw_sp_port_mdb_add(struct mlxsw_sp_port *mlxsw_sp_port, netdev_err(dev, "Unable to allocate MC group\n"); return -ENOMEM; } + is_new_mid = true; } - mid->ref_count++; set_bit(mlxsw_sp_port->local_port, mid->ports_in_mid); - err = mlxsw_sp_port_smid_set(mlxsw_sp_port, mid->mid, true, - mid->ref_count == 1); + err = mlxsw_sp_port_smid_set(mlxsw_sp_port, mid->mid, true, is_new_mid); if (err) { netdev_err(dev, "Unable to set SMID\n"); goto err_out; } - if (mid->ref_count == 1) { + if (is_new_mid) { err = mlxsw_sp_port_mdb_op(mlxsw_sp, mdb->addr, fid_index, mid->mid, true); if (err) { @@ -1345,7 +1345,7 @@ static int mlxsw_sp_port_mdb_add(struct mlxsw_sp_port *mlxsw_sp_port, return 0; err_out: - __mlxsw_sp_mc_dec_ref(mlxsw_sp_port, mid); + mlxsw_sp_port_remove_from_mid(mlxsw_sp_port, mid); return err; } @@ -1451,7 +1451,7 @@ static int mlxsw_sp_port_mdb_del(struct mlxsw_sp_port *mlxsw_sp_port, netdev_err(dev, "Unable to remove port from SMID\n"); mid_idx = mid->mid; - if (__mlxsw_sp_mc_dec_ref(mlxsw_sp_port, mid)) { + if (mlxsw_sp_port_remove_from_mid(mlxsw_sp_port, mid)) { err = mlxsw_sp_port_mdb_op(mlxsw_sp, mdb->addr, fid_index, mid_idx, false); if (err) -- cgit v1.2.3 From b80888a9194f3aecd5edf6a7ede5c23d77bade8b Mon Sep 17 00:00:00 2001 From: Nogah Frankel Date: Wed, 20 Sep 2017 16:15:04 +0200 Subject: mlxsw: spectrum_switchdev: Save mids list per bridge device Instead of saving all the mids in the same list, save them per vlan device. This change allows a more efficient mid find. Also, in the next patches, there will be added a lot of loops over all the mids in bridge device for multicast disable, mrouter change and ndb flush. Signed-off-by: Nogah Frankel Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- .../ethernet/mellanox/mlxsw/spectrum_switchdev.c | 49 +++++++++++----------- 1 file changed, 24 insertions(+), 25 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c index cb2275ed42ab..2ba8a44e1933 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c @@ -67,7 +67,6 @@ struct mlxsw_sp_bridge { u32 ageing_time; bool vlan_enabled_exists; struct list_head bridges_list; - struct list_head mids_list; DECLARE_BITMAP(mids_bitmap, MLXSW_SP_MID_MAX); const struct mlxsw_sp_bridge_ops *bridge_8021q_ops; const struct mlxsw_sp_bridge_ops *bridge_8021d_ops; @@ -77,6 +76,7 @@ struct mlxsw_sp_bridge_device { struct net_device *dev; struct list_head list; struct list_head ports_list; + struct list_head mids_list; u8 vlan_enabled:1, multicast_enabled:1; const struct mlxsw_sp_bridge_ops *ops; @@ -161,6 +161,7 @@ mlxsw_sp_bridge_device_create(struct mlxsw_sp_bridge *bridge, } else { bridge_device->ops = bridge->bridge_8021d_ops; } + INIT_LIST_HEAD(&bridge_device->mids_list); list_add(&bridge_device->list, &bridge->bridges_list); return bridge_device; @@ -170,10 +171,17 @@ static void mlxsw_sp_bridge_device_destroy(struct mlxsw_sp_bridge *bridge, struct mlxsw_sp_bridge_device *bridge_device) { + struct mlxsw_sp_mid *mid, *tmp; + list_del(&bridge_device->list); if (bridge_device->vlan_enabled) bridge->vlan_enabled_exists = false; WARN_ON(!list_empty(&bridge_device->ports_list)); + list_for_each_entry_safe(mid, tmp, &bridge_device->mids_list, list) { + list_del(&mid->list); + clear_bit(mid->mid, bridge->mids_bitmap); + kfree(mid); + } kfree(bridge_device); } @@ -1221,22 +1229,25 @@ static int mlxsw_sp_port_smid_set(struct mlxsw_sp_port *mlxsw_sp_port, u16 mid, return err; } -static struct mlxsw_sp_mid *__mlxsw_sp_mc_get(struct mlxsw_sp *mlxsw_sp, - const unsigned char *addr, - u16 fid) +static struct +mlxsw_sp_mid *__mlxsw_sp_mc_get(struct mlxsw_sp_bridge_device *bridge_device, + const unsigned char *addr, + u16 fid) { struct mlxsw_sp_mid *mid; - list_for_each_entry(mid, &mlxsw_sp->bridge->mids_list, list) { + list_for_each_entry(mid, &bridge_device->mids_list, list) { if (ether_addr_equal(mid->addr, addr) && mid->fid == fid) return mid; } return NULL; } -static struct mlxsw_sp_mid *__mlxsw_sp_mc_alloc(struct mlxsw_sp *mlxsw_sp, - const unsigned char *addr, - u16 fid) +static struct +mlxsw_sp_mid *__mlxsw_sp_mc_alloc(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_bridge_device *bridge_device, + const unsigned char *addr, + u16 fid) { struct mlxsw_sp_mid *mid; size_t alloc_size; @@ -1263,7 +1274,7 @@ static struct mlxsw_sp_mid *__mlxsw_sp_mc_alloc(struct mlxsw_sp *mlxsw_sp, ether_addr_copy(mid->addr, addr); mid->fid = fid; mid->mid = mid_idx; - list_add_tail(&mid->list, &mlxsw_sp->bridge->mids_list); + list_add_tail(&mid->list, &bridge_device->mids_list); return mid; } @@ -1316,9 +1327,10 @@ static int mlxsw_sp_port_mdb_add(struct mlxsw_sp_port *mlxsw_sp_port, fid_index = mlxsw_sp_fid_index(mlxsw_sp_port_vlan->fid); - mid = __mlxsw_sp_mc_get(mlxsw_sp, mdb->addr, fid_index); + mid = __mlxsw_sp_mc_get(bridge_device, mdb->addr, fid_index); if (!mid) { - mid = __mlxsw_sp_mc_alloc(mlxsw_sp, mdb->addr, fid_index); + mid = __mlxsw_sp_mc_alloc(mlxsw_sp, bridge_device, mdb->addr, + fid_index); if (!mid) { netdev_err(dev, "Unable to allocate MC group\n"); return -ENOMEM; @@ -1440,7 +1452,7 @@ static int mlxsw_sp_port_mdb_del(struct mlxsw_sp_port *mlxsw_sp_port, fid_index = mlxsw_sp_fid_index(mlxsw_sp_port_vlan->fid); - mid = __mlxsw_sp_mc_get(mlxsw_sp, mdb->addr, fid_index); + mid = __mlxsw_sp_mc_get(bridge_device, mdb->addr, fid_index); if (!mid) { netdev_err(dev, "Unable to remove port from MC DB\n"); return -EINVAL; @@ -1995,17 +2007,6 @@ static void mlxsw_sp_fdb_fini(struct mlxsw_sp *mlxsw_sp) } -static void mlxsw_sp_mids_fini(struct mlxsw_sp *mlxsw_sp) -{ - struct mlxsw_sp_mid *mid, *tmp; - - list_for_each_entry_safe(mid, tmp, &mlxsw_sp->bridge->mids_list, list) { - list_del(&mid->list); - clear_bit(mid->mid, mlxsw_sp->bridge->mids_bitmap); - kfree(mid); - } -} - int mlxsw_sp_switchdev_init(struct mlxsw_sp *mlxsw_sp) { struct mlxsw_sp_bridge *bridge; @@ -2017,7 +2018,6 @@ int mlxsw_sp_switchdev_init(struct mlxsw_sp *mlxsw_sp) bridge->mlxsw_sp = mlxsw_sp; INIT_LIST_HEAD(&mlxsw_sp->bridge->bridges_list); - INIT_LIST_HEAD(&mlxsw_sp->bridge->mids_list); bridge->bridge_8021q_ops = &mlxsw_sp_bridge_8021q_ops; bridge->bridge_8021d_ops = &mlxsw_sp_bridge_8021d_ops; @@ -2028,7 +2028,6 @@ int mlxsw_sp_switchdev_init(struct mlxsw_sp *mlxsw_sp) void mlxsw_sp_switchdev_fini(struct mlxsw_sp *mlxsw_sp) { mlxsw_sp_fdb_fini(mlxsw_sp); - mlxsw_sp_mids_fini(mlxsw_sp); WARN_ON(!list_empty(&mlxsw_sp->bridge->bridges_list)); kfree(mlxsw_sp->bridge); } -- cgit v1.2.3 From 5f9abc597cdd7a63433b7ebfcf26ee2746a76638 Mon Sep 17 00:00:00 2001 From: Nogah Frankel Date: Wed, 20 Sep 2017 16:15:05 +0200 Subject: mlxsw: spectrum_switchdev: Break smid write function Break the smid write function into two, one that cleans the ports that might be still written there and one that changes an exiting mid entry. Signed-off-by: Nogah Frankel Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- .../ethernet/mellanox/mlxsw/spectrum_switchdev.c | 42 +++++++++++++++------- 1 file changed, 30 insertions(+), 12 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c index 2ba8a44e1933..09ead97d9442 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c @@ -1190,7 +1190,7 @@ mlxsw_sp_port_fdb_set(struct mlxsw_sp_port *mlxsw_sp_port, } static int mlxsw_sp_port_mdb_op(struct mlxsw_sp *mlxsw_sp, const char *addr, - u16 fid, u16 mid, bool adding) + u16 fid, u16 mid_idx, bool adding) { char *sfd_pl; int err; @@ -1201,16 +1201,16 @@ static int mlxsw_sp_port_mdb_op(struct mlxsw_sp *mlxsw_sp, const char *addr, mlxsw_reg_sfd_pack(sfd_pl, mlxsw_sp_sfd_op(adding), 0); mlxsw_reg_sfd_mc_pack(sfd_pl, 0, addr, fid, - MLXSW_REG_SFD_REC_ACTION_NOP, mid); + MLXSW_REG_SFD_REC_ACTION_NOP, mid_idx); err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sfd), sfd_pl); kfree(sfd_pl); return err; } -static int mlxsw_sp_port_smid_set(struct mlxsw_sp_port *mlxsw_sp_port, u16 mid, - bool add, bool clear_all_ports) +/* clean the an entry from the HW and write there a full new entry */ +static int mlxsw_sp_port_smid_full_entry(struct mlxsw_sp *mlxsw_sp, + u16 mid_idx) { - struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; char *smid_pl; int err, i; @@ -1218,12 +1218,29 @@ static int mlxsw_sp_port_smid_set(struct mlxsw_sp_port *mlxsw_sp_port, u16 mid, if (!smid_pl) return -ENOMEM; - mlxsw_reg_smid_pack(smid_pl, mid, mlxsw_sp_port->local_port, add); - if (clear_all_ports) { - for (i = 1; i < mlxsw_core_max_ports(mlxsw_sp->core); i++) - if (mlxsw_sp->ports[i]) - mlxsw_reg_smid_port_mask_set(smid_pl, i, 1); + mlxsw_reg_smid_pack(smid_pl, mid_idx, 0, false); + for (i = 1; i < mlxsw_core_max_ports(mlxsw_sp->core); i++) { + if (mlxsw_sp->ports[i]) + mlxsw_reg_smid_port_mask_set(smid_pl, i, 1); } + + err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(smid), smid_pl); + kfree(smid_pl); + return err; +} + +static int mlxsw_sp_port_smid_set(struct mlxsw_sp_port *mlxsw_sp_port, + u16 mid_idx, bool add) +{ + struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; + char *smid_pl; + int err; + + smid_pl = kmalloc(MLXSW_REG_SMID_LEN, GFP_KERNEL); + if (!smid_pl) + return -ENOMEM; + + mlxsw_reg_smid_pack(smid_pl, mid_idx, mlxsw_sp_port->local_port, add); err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(smid), smid_pl); kfree(smid_pl); return err; @@ -1336,10 +1353,11 @@ static int mlxsw_sp_port_mdb_add(struct mlxsw_sp_port *mlxsw_sp_port, return -ENOMEM; } is_new_mid = true; + mlxsw_sp_port_smid_full_entry(mlxsw_sp, mid->mid); } set_bit(mlxsw_sp_port->local_port, mid->ports_in_mid); - err = mlxsw_sp_port_smid_set(mlxsw_sp_port, mid->mid, true, is_new_mid); + err = mlxsw_sp_port_smid_set(mlxsw_sp_port, mid->mid, true); if (err) { netdev_err(dev, "Unable to set SMID\n"); goto err_out; @@ -1458,7 +1476,7 @@ static int mlxsw_sp_port_mdb_del(struct mlxsw_sp_port *mlxsw_sp_port, return -EINVAL; } - err = mlxsw_sp_port_smid_set(mlxsw_sp_port, mid->mid, false, false); + err = mlxsw_sp_port_smid_set(mlxsw_sp_port, mid->mid, false); if (err) netdev_err(dev, "Unable to remove port from SMID\n"); -- cgit v1.2.3 From 73b433e803d2a3547ee38d1fb2a0bc6f3b03a6d9 Mon Sep 17 00:00:00 2001 From: Nogah Frankel Date: Wed, 20 Sep 2017 16:15:06 +0200 Subject: mlxsw: spectrum_switchdev: Attach mid id allocation to HW write Attach mid getting and releasing mid id to the HW write / remove, and add a flag to indicate whether the mid is in the HW. It is done because mid id is also HW index to this mid. This change allows adding in the following patches the ability to have a mid in the mdb cache but not in the HW. It will be useful for being able to disable the multicast. It means that the mdb is being written / delete to the HW in the mid allocation / removing function, not after them. Signed-off-by: Nogah Frankel Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlxsw/spectrum.h | 1 + .../ethernet/mellanox/mlxsw/spectrum_switchdev.c | 88 ++++++++++++++-------- 2 files changed, 56 insertions(+), 33 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h index 6fd0afe4b7a3..e907ec446a73 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h @@ -94,6 +94,7 @@ struct mlxsw_sp_mid { unsigned char addr[ETH_ALEN]; u16 fid; u16 mid; + bool in_hw; unsigned long *ports_in_mid; /* bits array */ }; diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c index 09ead97d9442..9dd05d87b662 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c @@ -1260,6 +1260,42 @@ mlxsw_sp_mid *__mlxsw_sp_mc_get(struct mlxsw_sp_bridge_device *bridge_device, return NULL; } +static bool +mlxsw_sp_mc_write_mdb_entry(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_mid *mid) +{ + u16 mid_idx; + int err; + + mid_idx = find_first_zero_bit(mlxsw_sp->bridge->mids_bitmap, + MLXSW_SP_MID_MAX); + if (mid_idx == MLXSW_SP_MID_MAX) + return false; + + mid->mid = mid_idx; + err = mlxsw_sp_port_smid_full_entry(mlxsw_sp, mid_idx); + if (err) + return false; + + err = mlxsw_sp_port_mdb_op(mlxsw_sp, mid->addr, mid->fid, mid_idx, + true); + if (err) + return false; + + set_bit(mid_idx, mlxsw_sp->bridge->mids_bitmap); + mid->in_hw = true; + return true; +} + +static int mlxsw_sp_mc_remove_mdb_entry(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_mid *mid) +{ + clear_bit(mid->mid, mlxsw_sp->bridge->mids_bitmap); + mid->in_hw = false; + return mlxsw_sp_port_mdb_op(mlxsw_sp, mid->addr, mid->fid, mid->mid, + false); +} + static struct mlxsw_sp_mid *__mlxsw_sp_mc_alloc(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_bridge_device *bridge_device, @@ -1268,12 +1304,6 @@ mlxsw_sp_mid *__mlxsw_sp_mc_alloc(struct mlxsw_sp *mlxsw_sp, { struct mlxsw_sp_mid *mid; size_t alloc_size; - u16 mid_idx; - - mid_idx = find_first_zero_bit(mlxsw_sp->bridge->mids_bitmap, - MLXSW_SP_MID_MAX); - if (mid_idx == MLXSW_SP_MID_MAX) - return NULL; mid = kzalloc(sizeof(*mid), GFP_KERNEL); if (!mid) @@ -1281,36 +1311,43 @@ mlxsw_sp_mid *__mlxsw_sp_mc_alloc(struct mlxsw_sp *mlxsw_sp, alloc_size = sizeof(unsigned long) * BITS_TO_LONGS(mlxsw_core_max_ports(mlxsw_sp->core)); + mid->ports_in_mid = kzalloc(alloc_size, GFP_KERNEL); - if (!mid->ports_in_mid) { - kfree(mid); - return NULL; - } + if (!mid->ports_in_mid) + goto err_ports_in_mid_alloc; - set_bit(mid_idx, mlxsw_sp->bridge->mids_bitmap); ether_addr_copy(mid->addr, addr); mid->fid = fid; - mid->mid = mid_idx; + mid->in_hw = false; + if (!mlxsw_sp_mc_write_mdb_entry(mlxsw_sp, mid)) + goto err_write_mdb_entry; + list_add_tail(&mid->list, &bridge_device->mids_list); return mid; + +err_write_mdb_entry: + kfree(mid->ports_in_mid); +err_ports_in_mid_alloc: + kfree(mid); + return NULL; } static int mlxsw_sp_port_remove_from_mid(struct mlxsw_sp_port *mlxsw_sp_port, struct mlxsw_sp_mid *mid) { struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; + int err = 0; clear_bit(mlxsw_sp_port->local_port, mid->ports_in_mid); if (bitmap_empty(mid->ports_in_mid, mlxsw_core_max_ports(mlxsw_sp->core))) { + err = mlxsw_sp_mc_remove_mdb_entry(mlxsw_sp, mid); list_del(&mid->list); - clear_bit(mid->mid, mlxsw_sp->bridge->mids_bitmap); kfree(mid->ports_in_mid); kfree(mid); - return 1; } - return 0; + return err; } static int mlxsw_sp_port_mdb_add(struct mlxsw_sp_port *mlxsw_sp_port, @@ -1324,7 +1361,6 @@ static int mlxsw_sp_port_mdb_add(struct mlxsw_sp_port *mlxsw_sp_port, struct mlxsw_sp_bridge_device *bridge_device; struct mlxsw_sp_bridge_port *bridge_port; struct mlxsw_sp_mid *mid; - bool is_new_mid = false; u16 fid_index; int err = 0; @@ -1352,8 +1388,6 @@ static int mlxsw_sp_port_mdb_add(struct mlxsw_sp_port *mlxsw_sp_port, netdev_err(dev, "Unable to allocate MC group\n"); return -ENOMEM; } - is_new_mid = true; - mlxsw_sp_port_smid_full_entry(mlxsw_sp, mid->mid); } set_bit(mlxsw_sp_port->local_port, mid->ports_in_mid); @@ -1363,15 +1397,6 @@ static int mlxsw_sp_port_mdb_add(struct mlxsw_sp_port *mlxsw_sp_port, goto err_out; } - if (is_new_mid) { - err = mlxsw_sp_port_mdb_op(mlxsw_sp, mdb->addr, fid_index, - mid->mid, true); - if (err) { - netdev_err(dev, "Unable to set MC SFD\n"); - goto err_out; - } - } - return 0; err_out: @@ -1481,12 +1506,9 @@ static int mlxsw_sp_port_mdb_del(struct mlxsw_sp_port *mlxsw_sp_port, netdev_err(dev, "Unable to remove port from SMID\n"); mid_idx = mid->mid; - if (mlxsw_sp_port_remove_from_mid(mlxsw_sp_port, mid)) { - err = mlxsw_sp_port_mdb_op(mlxsw_sp, mdb->addr, fid_index, - mid_idx, false); - if (err) - netdev_err(dev, "Unable to remove MC SFD\n"); - } + err = mlxsw_sp_port_remove_from_mid(mlxsw_sp_port, mid); + if (err) + netdev_err(dev, "Unable to remove MC SFD\n"); return err; } -- cgit v1.2.3 From 061e55bfb83e632afcd34130bb19fe7a32325b02 Mon Sep 17 00:00:00 2001 From: Nogah Frankel Date: Wed, 20 Sep 2017 16:15:07 +0200 Subject: mlxsw: spectrum_switchdev: Break mid deletion into two function Break mid deletion into two function, so it will be possible in the future to delete a mid entry for other reasons then switchdev command (like port deletion). Signed-off-by: Nogah Frankel Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- .../ethernet/mellanox/mlxsw/spectrum_switchdev.c | 32 ++++++++++++++-------- 1 file changed, 20 insertions(+), 12 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c index 9dd05d87b662..7f622de6331c 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c @@ -1468,6 +1468,25 @@ static int mlxsw_sp_port_vlans_del(struct mlxsw_sp_port *mlxsw_sp_port, return 0; } +static int +__mlxsw_sp_port_mdb_del(struct mlxsw_sp_port *mlxsw_sp_port, + struct mlxsw_sp_bridge_port *bridge_port, + struct mlxsw_sp_mid *mid) +{ + struct net_device *dev = mlxsw_sp_port->dev; + int err; + + err = mlxsw_sp_port_smid_set(mlxsw_sp_port, mid->mid, false); + if (err) + netdev_err(dev, "Unable to remove port from SMID\n"); + + err = mlxsw_sp_port_remove_from_mid(mlxsw_sp_port, mid); + if (err) + netdev_err(dev, "Unable to remove MC SFD\n"); + + return err; +} + static int mlxsw_sp_port_mdb_del(struct mlxsw_sp_port *mlxsw_sp_port, const struct switchdev_obj_port_mdb *mdb) { @@ -1479,8 +1498,6 @@ static int mlxsw_sp_port_mdb_del(struct mlxsw_sp_port *mlxsw_sp_port, struct mlxsw_sp_bridge_port *bridge_port; struct mlxsw_sp_mid *mid; u16 fid_index; - u16 mid_idx; - int err = 0; bridge_port = mlxsw_sp_bridge_port_find(mlxsw_sp->bridge, orig_dev); if (!bridge_port) @@ -1501,16 +1518,7 @@ static int mlxsw_sp_port_mdb_del(struct mlxsw_sp_port *mlxsw_sp_port, return -EINVAL; } - err = mlxsw_sp_port_smid_set(mlxsw_sp_port, mid->mid, false); - if (err) - netdev_err(dev, "Unable to remove port from SMID\n"); - - mid_idx = mid->mid; - err = mlxsw_sp_port_remove_from_mid(mlxsw_sp_port, mid); - if (err) - netdev_err(dev, "Unable to remove MC SFD\n"); - - return err; + return __mlxsw_sp_port_mdb_del(mlxsw_sp_port, bridge_port, mid); } static int mlxsw_sp_port_obj_del(struct net_device *dev, -- cgit v1.2.3 From 846fd8a0e7dcd9f455a86dc17ddf0a51c124f9c0 Mon Sep 17 00:00:00 2001 From: Nogah Frankel Date: Wed, 20 Sep 2017 16:15:08 +0200 Subject: mlxsw: spectrum_switchdev: Don't write mids to the HW when mc is disabled Don't write multicast related data to the HW when mc is disabled. Also, don't allocate mid id to new mids (so the remove function could know that they weren't wrote to the HW) Signed-off-by: Nogah Frankel Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- .../ethernet/mellanox/mlxsw/spectrum_switchdev.c | 21 +++++++++++++++++---- 1 file changed, 17 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c index 7f622de6331c..cea257a77d09 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c @@ -1290,6 +1290,9 @@ mlxsw_sp_mc_write_mdb_entry(struct mlxsw_sp *mlxsw_sp, static int mlxsw_sp_mc_remove_mdb_entry(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_mid *mid) { + if (!mid->in_hw) + return 0; + clear_bit(mid->mid, mlxsw_sp->bridge->mids_bitmap); mid->in_hw = false; return mlxsw_sp_port_mdb_op(mlxsw_sp, mid->addr, mid->fid, mid->mid, @@ -1319,11 +1322,15 @@ mlxsw_sp_mid *__mlxsw_sp_mc_alloc(struct mlxsw_sp *mlxsw_sp, ether_addr_copy(mid->addr, addr); mid->fid = fid; mid->in_hw = false; + + if (!bridge_device->multicast_enabled) + goto out; + if (!mlxsw_sp_mc_write_mdb_entry(mlxsw_sp, mid)) goto err_write_mdb_entry; +out: list_add_tail(&mid->list, &bridge_device->mids_list); - return mid; err_write_mdb_entry: @@ -1391,6 +1398,9 @@ static int mlxsw_sp_port_mdb_add(struct mlxsw_sp_port *mlxsw_sp_port, } set_bit(mlxsw_sp_port->local_port, mid->ports_in_mid); + if (!bridge_device->multicast_enabled) + return 0; + err = mlxsw_sp_port_smid_set(mlxsw_sp_port, mid->mid, true); if (err) { netdev_err(dev, "Unable to set SMID\n"); @@ -1476,9 +1486,12 @@ __mlxsw_sp_port_mdb_del(struct mlxsw_sp_port *mlxsw_sp_port, struct net_device *dev = mlxsw_sp_port->dev; int err; - err = mlxsw_sp_port_smid_set(mlxsw_sp_port, mid->mid, false); - if (err) - netdev_err(dev, "Unable to remove port from SMID\n"); + if (bridge_port->bridge_device->multicast_enabled) { + err = mlxsw_sp_port_smid_set(mlxsw_sp_port, mid->mid, false); + + if (err) + netdev_err(dev, "Unable to remove port from SMID\n"); + } err = mlxsw_sp_port_remove_from_mid(mlxsw_sp_port, mid); if (err) -- cgit v1.2.3 From 2e3496cd3488729200ff0f1c6381b7016ecd41bd Mon Sep 17 00:00:00 2001 From: Nogah Frankel Date: Wed, 20 Sep 2017 16:15:09 +0200 Subject: mlxsw: spectrum_switchdev: Disable mdb when mc is disabled Remove all the mdb entries from the HW when mc is being disabled and re-write them when it is being enabled. Signed-off-by: Nogah Frankel Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- .../ethernet/mellanox/mlxsw/spectrum_switchdev.c | 41 +++++++++++++++++++--- 1 file changed, 37 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c index cea257a77d09..79806af87b93 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c @@ -121,6 +121,11 @@ mlxsw_sp_bridge_port_fdb_flush(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_bridge_port *bridge_port, u16 fid_index); +static void +mlxsw_sp_bridge_mdb_mc_enable_sync(struct mlxsw_sp_port *mlxsw_sp_port, + struct mlxsw_sp_bridge_device + *bridge_device); + static struct mlxsw_sp_bridge_device * mlxsw_sp_bridge_device_find(const struct mlxsw_sp_bridge *bridge, const struct net_device *br_dev) @@ -757,6 +762,12 @@ static int mlxsw_sp_port_mc_disabled_set(struct mlxsw_sp_port *mlxsw_sp_port, if (!bridge_device) return 0; + if (bridge_device->multicast_enabled != !mc_disabled) { + bridge_device->multicast_enabled = !mc_disabled; + mlxsw_sp_bridge_mdb_mc_enable_sync(mlxsw_sp_port, + bridge_device); + } + list_for_each_entry(bridge_port, &bridge_device->ports_list, list) { enum mlxsw_sp_flood_type packet_type = MLXSW_SP_FLOOD_TYPE_MC; bool member = mc_disabled ? true : bridge_port->mrouter; @@ -1207,9 +1218,8 @@ static int mlxsw_sp_port_mdb_op(struct mlxsw_sp *mlxsw_sp, const char *addr, return err; } -/* clean the an entry from the HW and write there a full new entry */ -static int mlxsw_sp_port_smid_full_entry(struct mlxsw_sp *mlxsw_sp, - u16 mid_idx) +static int mlxsw_sp_port_smid_full_entry(struct mlxsw_sp *mlxsw_sp, u16 mid_idx, + long *ports_bitmap) { char *smid_pl; int err, i; @@ -1224,6 +1234,9 @@ static int mlxsw_sp_port_smid_full_entry(struct mlxsw_sp *mlxsw_sp, mlxsw_reg_smid_port_mask_set(smid_pl, i, 1); } + for_each_set_bit(i, ports_bitmap, mlxsw_core_max_ports(mlxsw_sp->core)) + mlxsw_reg_smid_port_set(smid_pl, i, 1); + err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(smid), smid_pl); kfree(smid_pl); return err; @@ -1273,7 +1286,8 @@ mlxsw_sp_mc_write_mdb_entry(struct mlxsw_sp *mlxsw_sp, return false; mid->mid = mid_idx; - err = mlxsw_sp_port_smid_full_entry(mlxsw_sp, mid_idx); + err = mlxsw_sp_port_smid_full_entry(mlxsw_sp, mid_idx, + mid->ports_in_mid); if (err) return false; @@ -1414,6 +1428,25 @@ err_out: return err; } +static void +mlxsw_sp_bridge_mdb_mc_enable_sync(struct mlxsw_sp_port *mlxsw_sp_port, + struct mlxsw_sp_bridge_device + *bridge_device) +{ + struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; + struct mlxsw_sp_mid *mid; + bool mc_enabled; + + mc_enabled = bridge_device->multicast_enabled; + + list_for_each_entry(mid, &bridge_device->mids_list, list) { + if (mc_enabled) + mlxsw_sp_mc_write_mdb_entry(mlxsw_sp, mid); + else + mlxsw_sp_mc_remove_mdb_entry(mlxsw_sp, mid); + } +} + static int mlxsw_sp_port_obj_add(struct net_device *dev, const struct switchdev_obj *obj, struct switchdev_trans *trans) -- cgit v1.2.3 From 218a8f8a6379bfd359e58f369b7b7660cd12e865 Mon Sep 17 00:00:00 2001 From: Nogah Frankel Date: Wed, 20 Sep 2017 16:15:10 +0200 Subject: mlxsw: spectrum_switchdev: Use generic mc flood function Use the generic mc flood function to decide whether to flood mc to a port when mc is being enabled / disabled. Move this function in the file to avoid forward declaration. Signed-off-by: Nogah Frankel Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- .../net/ethernet/mellanox/mlxsw/spectrum_switchdev.c | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c index 79806af87b93..19ac206879ff 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c @@ -742,6 +742,14 @@ out: return 0; } +static bool mlxsw_sp_mc_flood(const struct mlxsw_sp_bridge_port *bridge_port) +{ + const struct mlxsw_sp_bridge_device *bridge_device; + + bridge_device = bridge_port->bridge_device; + return !bridge_device->multicast_enabled ? true : bridge_port->mrouter; +} + static int mlxsw_sp_port_mc_disabled_set(struct mlxsw_sp_port *mlxsw_sp_port, struct switchdev_trans *trans, struct net_device *orig_dev, @@ -770,7 +778,7 @@ static int mlxsw_sp_port_mc_disabled_set(struct mlxsw_sp_port *mlxsw_sp_port, list_for_each_entry(bridge_port, &bridge_device->ports_list, list) { enum mlxsw_sp_flood_type packet_type = MLXSW_SP_FLOOD_TYPE_MC; - bool member = mc_disabled ? true : bridge_port->mrouter; + bool member = mlxsw_sp_mc_flood(bridge_port); err = mlxsw_sp_bridge_port_flood_table_set(mlxsw_sp_port, bridge_port, @@ -829,14 +837,6 @@ static int mlxsw_sp_port_attr_set(struct net_device *dev, return err; } -static bool mlxsw_sp_mc_flood(const struct mlxsw_sp_bridge_port *bridge_port) -{ - const struct mlxsw_sp_bridge_device *bridge_device; - - bridge_device = bridge_port->bridge_device; - return !bridge_device->multicast_enabled ? true : bridge_port->mrouter; -} - static int mlxsw_sp_port_vlan_fid_join(struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan, struct mlxsw_sp_bridge_port *bridge_port) -- cgit v1.2.3 From 9dad51bdaa4b1846bd9d5307b54dca74efa555ea Mon Sep 17 00:00:00 2001 From: Nogah Frankel Date: Wed, 20 Sep 2017 16:15:11 +0200 Subject: mlxsw: spectrum_switchdev: Flood mc when mc is disabled by user flag When multicast is disabled, flood mc packets only to port that are marked BR_MCAST_FLOOD (instead to all). Signed-off-by: Nogah Frankel Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- .../ethernet/mellanox/mlxsw/spectrum_switchdev.c | 21 +++++++++++++++++---- 1 file changed, 17 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c index 19ac206879ff..50c4d7c735df 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c @@ -262,7 +262,8 @@ mlxsw_sp_bridge_port_create(struct mlxsw_sp_bridge_device *bridge_device, bridge_port->dev = brport_dev; bridge_port->bridge_device = bridge_device; bridge_port->stp_state = BR_STATE_DISABLED; - bridge_port->flags = BR_LEARNING | BR_FLOOD | BR_LEARNING_SYNC; + bridge_port->flags = BR_LEARNING | BR_FLOOD | BR_LEARNING_SYNC | + BR_MCAST_FLOOD; INIT_LIST_HEAD(&bridge_port->vlans_list); list_add(&bridge_port->list, &bridge_device->ports_list); bridge_port->ref_count = 1; @@ -468,7 +469,8 @@ static int mlxsw_sp_port_attr_get(struct net_device *dev, &attr->u.brport_flags); break; case SWITCHDEV_ATTR_ID_PORT_BRIDGE_FLAGS_SUPPORT: - attr->u.brport_flags_support = BR_LEARNING | BR_FLOOD; + attr->u.brport_flags_support = BR_LEARNING | BR_FLOOD | + BR_MCAST_FLOOD; break; default: return -EOPNOTSUPP; @@ -653,8 +655,18 @@ static int mlxsw_sp_port_attr_br_flags_set(struct mlxsw_sp_port *mlxsw_sp_port, if (err) return err; - memcpy(&bridge_port->flags, &brport_flags, sizeof(brport_flags)); + if (bridge_port->bridge_device->multicast_enabled) + goto out; + err = mlxsw_sp_bridge_port_flood_table_set(mlxsw_sp_port, bridge_port, + MLXSW_SP_FLOOD_TYPE_MC, + brport_flags & + BR_MCAST_FLOOD); + if (err) + return err; + +out: + memcpy(&bridge_port->flags, &brport_flags, sizeof(brport_flags)); return 0; } @@ -747,7 +759,8 @@ static bool mlxsw_sp_mc_flood(const struct mlxsw_sp_bridge_port *bridge_port) const struct mlxsw_sp_bridge_device *bridge_device; bridge_device = bridge_port->bridge_device; - return !bridge_device->multicast_enabled ? true : bridge_port->mrouter; + return bridge_device->multicast_enabled ? bridge_port->mrouter : + bridge_port->flags & BR_MCAST_FLOOD; } static int mlxsw_sp_port_mc_disabled_set(struct mlxsw_sp_port *mlxsw_sp_port, -- cgit v1.2.3 From bb5355b27c9da3786a2b5e1583c9d64f492ac7ad Mon Sep 17 00:00:00 2001 From: Nogah Frankel Date: Wed, 20 Sep 2017 16:15:12 +0200 Subject: mlxsw: spectrum_switchdev: Flush the mdb when a port is being removed When a port is being removed from a bridge, flush the bridge mdb to remove the mids of that port. Signed-off-by: Nogah Frankel Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- .../ethernet/mellanox/mlxsw/spectrum_switchdev.c | 39 ++++++++++++++++------ 1 file changed, 29 insertions(+), 10 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c index 50c4d7c735df..bc0787312a06 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c @@ -121,6 +121,10 @@ mlxsw_sp_bridge_port_fdb_flush(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_bridge_port *bridge_port, u16 fid_index); +static void +mlxsw_sp_bridge_port_mdb_flush(struct mlxsw_sp_port *mlxsw_sp_port, + struct mlxsw_sp_bridge_port *bridge_port); + static void mlxsw_sp_bridge_mdb_mc_enable_sync(struct mlxsw_sp_port *mlxsw_sp_port, struct mlxsw_sp_bridge_device @@ -176,17 +180,11 @@ static void mlxsw_sp_bridge_device_destroy(struct mlxsw_sp_bridge *bridge, struct mlxsw_sp_bridge_device *bridge_device) { - struct mlxsw_sp_mid *mid, *tmp; - list_del(&bridge_device->list); if (bridge_device->vlan_enabled) bridge->vlan_enabled_exists = false; WARN_ON(!list_empty(&bridge_device->ports_list)); - list_for_each_entry_safe(mid, tmp, &bridge_device->mids_list, list) { - list_del(&mid->list); - clear_bit(mid->mid, bridge->mids_bitmap); - kfree(mid); - } + WARN_ON(!list_empty(&bridge_device->mids_list)); kfree(bridge_device); } @@ -987,24 +985,28 @@ mlxsw_sp_port_vlan_bridge_leave(struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan) struct mlxsw_sp_bridge_vlan *bridge_vlan; struct mlxsw_sp_bridge_port *bridge_port; u16 vid = mlxsw_sp_port_vlan->vid; - bool last; + bool last_port, last_vlan; if (WARN_ON(mlxsw_sp_fid_type(fid) != MLXSW_SP_FID_TYPE_8021Q && mlxsw_sp_fid_type(fid) != MLXSW_SP_FID_TYPE_8021D)) return; bridge_port = mlxsw_sp_port_vlan->bridge_port; + last_vlan = list_is_singular(&bridge_port->vlans_list); bridge_vlan = mlxsw_sp_bridge_vlan_find(bridge_port, vid); - last = list_is_singular(&bridge_vlan->port_vlan_list); + last_port = list_is_singular(&bridge_vlan->port_vlan_list); list_del(&mlxsw_sp_port_vlan->bridge_vlan_node); mlxsw_sp_bridge_vlan_put(bridge_vlan); mlxsw_sp_port_vid_stp_set(mlxsw_sp_port, vid, BR_STATE_DISABLED); mlxsw_sp_port_vid_learning_set(mlxsw_sp_port, vid, false); - if (last) + if (last_port) mlxsw_sp_bridge_port_fdb_flush(mlxsw_sp_port->mlxsw_sp, bridge_port, mlxsw_sp_fid_index(fid)); + if (last_vlan) + mlxsw_sp_bridge_port_mdb_flush(mlxsw_sp_port, bridge_port); + mlxsw_sp_port_vlan_fid_leave(mlxsw_sp_port_vlan); mlxsw_sp_bridge_port_put(mlxsw_sp_port->mlxsw_sp->bridge, bridge_port); @@ -1580,6 +1582,23 @@ static int mlxsw_sp_port_mdb_del(struct mlxsw_sp_port *mlxsw_sp_port, return __mlxsw_sp_port_mdb_del(mlxsw_sp_port, bridge_port, mid); } +static void +mlxsw_sp_bridge_port_mdb_flush(struct mlxsw_sp_port *mlxsw_sp_port, + struct mlxsw_sp_bridge_port *bridge_port) +{ + struct mlxsw_sp_bridge_device *bridge_device; + struct mlxsw_sp_mid *mid, *tmp; + + bridge_device = bridge_port->bridge_device; + + list_for_each_entry_safe(mid, tmp, &bridge_device->mids_list, list) { + if (test_bit(mlxsw_sp_port->local_port, mid->ports_in_mid)) { + __mlxsw_sp_port_mdb_del(mlxsw_sp_port, bridge_port, + mid); + } + } +} + static int mlxsw_sp_port_obj_del(struct net_device *dev, const struct switchdev_obj *obj) { -- cgit v1.2.3 From 3fba877cb68cfbc1826dd4abc7b1a8fe862adb2a Mon Sep 17 00:00:00 2001 From: Nogah Frankel Date: Wed, 20 Sep 2017 16:15:13 +0200 Subject: mlxsw: spectrum_switchdev: Flood all mc packets to mrouter ports When mc is enabled, whenever a mc packet doesn't hit any mdb entry it is being flood to the ports marked as mrouters. However, all mc packets should be flooded to them even if they match an entry in the mdb. This patch adds the mrouter ports to every mdb entry that is being written to the HW. Signed-off-by: Nogah Frankel Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- .../ethernet/mellanox/mlxsw/spectrum_switchdev.c | 65 ++++++++++++++++++++-- 1 file changed, 60 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c index bc0787312a06..146beaa6b2da 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c @@ -1288,10 +1288,55 @@ mlxsw_sp_mid *__mlxsw_sp_mc_get(struct mlxsw_sp_bridge_device *bridge_device, return NULL; } +static void +mlxsw_sp_bridge_port_get_ports_bitmap(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_bridge_port *bridge_port, + unsigned long *ports_bitmap) +{ + struct mlxsw_sp_port *mlxsw_sp_port; + u64 max_lag_members, i; + int lag_id; + + if (!bridge_port->lagged) { + set_bit(bridge_port->system_port, ports_bitmap); + } else { + max_lag_members = MLXSW_CORE_RES_GET(mlxsw_sp->core, + MAX_LAG_MEMBERS); + lag_id = bridge_port->lag_id; + for (i = 0; i < max_lag_members; i++) { + mlxsw_sp_port = mlxsw_sp_port_lagged_get(mlxsw_sp, + lag_id, i); + if (mlxsw_sp_port) + set_bit(mlxsw_sp_port->local_port, + ports_bitmap); + } + } +} + +static void +mlxsw_sp_mc_get_mrouters_bitmap(unsigned long *flood_bitmap, + struct mlxsw_sp_bridge_device *bridge_device, + struct mlxsw_sp *mlxsw_sp) +{ + struct mlxsw_sp_bridge_port *bridge_port; + + list_for_each_entry(bridge_port, &bridge_device->ports_list, list) { + if (bridge_port->mrouter) { + mlxsw_sp_bridge_port_get_ports_bitmap(mlxsw_sp, + bridge_port, + flood_bitmap); + } + } +} + static bool mlxsw_sp_mc_write_mdb_entry(struct mlxsw_sp *mlxsw_sp, - struct mlxsw_sp_mid *mid) + struct mlxsw_sp_mid *mid, + struct mlxsw_sp_bridge_device *bridge_device) { + long *flood_bitmap; + int num_of_ports; + int alloc_size; u16 mid_idx; int err; @@ -1300,9 +1345,18 @@ mlxsw_sp_mc_write_mdb_entry(struct mlxsw_sp *mlxsw_sp, if (mid_idx == MLXSW_SP_MID_MAX) return false; + num_of_ports = mlxsw_core_max_ports(mlxsw_sp->core); + alloc_size = sizeof(long) * BITS_TO_LONGS(num_of_ports); + flood_bitmap = kzalloc(alloc_size, GFP_KERNEL); + if (!flood_bitmap) + return false; + + bitmap_copy(flood_bitmap, mid->ports_in_mid, num_of_ports); + mlxsw_sp_mc_get_mrouters_bitmap(flood_bitmap, bridge_device, mlxsw_sp); + mid->mid = mid_idx; - err = mlxsw_sp_port_smid_full_entry(mlxsw_sp, mid_idx, - mid->ports_in_mid); + err = mlxsw_sp_port_smid_full_entry(mlxsw_sp, mid_idx, flood_bitmap); + kfree(flood_bitmap); if (err) return false; @@ -1355,7 +1409,7 @@ mlxsw_sp_mid *__mlxsw_sp_mc_alloc(struct mlxsw_sp *mlxsw_sp, if (!bridge_device->multicast_enabled) goto out; - if (!mlxsw_sp_mc_write_mdb_entry(mlxsw_sp, mid)) + if (!mlxsw_sp_mc_write_mdb_entry(mlxsw_sp, mid, bridge_device)) goto err_write_mdb_entry; out: @@ -1456,7 +1510,8 @@ mlxsw_sp_bridge_mdb_mc_enable_sync(struct mlxsw_sp_port *mlxsw_sp_port, list_for_each_entry(mid, &bridge_device->mids_list, list) { if (mc_enabled) - mlxsw_sp_mc_write_mdb_entry(mlxsw_sp, mid); + mlxsw_sp_mc_write_mdb_entry(mlxsw_sp, mid, + bridge_device); else mlxsw_sp_mc_remove_mdb_entry(mlxsw_sp, mid); } -- cgit v1.2.3 From 3ddda1178e41bbe26bb5c6ebf66ae3d0a87ac410 Mon Sep 17 00:00:00 2001 From: Nogah Frankel Date: Wed, 20 Sep 2017 16:15:14 +0200 Subject: mlxsw: spectrum_switchdev: Update the mdb of mrouter port change Whenever a port starts / stops being mrouter, update all the mdb entries in the HW to flood / stop flooding mc packets there. The change should happen only if the port is not in the mid. (If it is, the mid should flood mc packets to this port anyway) Signed-off-by: Nogah Frankel Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- .../ethernet/mellanox/mlxsw/spectrum_switchdev.c | 23 ++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c index 146beaa6b2da..bf1a17557fb3 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c @@ -130,6 +130,11 @@ mlxsw_sp_bridge_mdb_mc_enable_sync(struct mlxsw_sp_port *mlxsw_sp_port, struct mlxsw_sp_bridge_device *bridge_device); +static void +mlxsw_sp_port_mrouter_update_mdb(struct mlxsw_sp_port *mlxsw_sp_port, + struct mlxsw_sp_bridge_port *bridge_port, + bool add); + static struct mlxsw_sp_bridge_device * mlxsw_sp_bridge_device_find(const struct mlxsw_sp_bridge *bridge, const struct net_device *br_dev) @@ -747,6 +752,8 @@ static int mlxsw_sp_port_attr_mrouter_set(struct mlxsw_sp_port *mlxsw_sp_port, if (err) return err; + mlxsw_sp_port_mrouter_update_mdb(mlxsw_sp_port, bridge_port, + is_port_mrouter); out: bridge_port->mrouter = is_port_mrouter; return 0; @@ -1517,6 +1524,22 @@ mlxsw_sp_bridge_mdb_mc_enable_sync(struct mlxsw_sp_port *mlxsw_sp_port, } } +static void +mlxsw_sp_port_mrouter_update_mdb(struct mlxsw_sp_port *mlxsw_sp_port, + struct mlxsw_sp_bridge_port *bridge_port, + bool add) +{ + struct mlxsw_sp_bridge_device *bridge_device; + struct mlxsw_sp_mid *mid; + + bridge_device = bridge_port->bridge_device; + + list_for_each_entry(mid, &bridge_device->mids_list, list) { + if (!test_bit(mlxsw_sp_port->local_port, mid->ports_in_mid)) + mlxsw_sp_port_smid_set(mlxsw_sp_port, mid->mid, add); + } +} + static int mlxsw_sp_port_obj_add(struct net_device *dev, const struct switchdev_obj *obj, struct switchdev_trans *trans) -- cgit v1.2.3 From 0166277706e57779f06b741d25a9e86d99726e2a Mon Sep 17 00:00:00 2001 From: Nogah Frankel Date: Wed, 20 Sep 2017 16:15:15 +0200 Subject: mlxsw: spectrum_switchdev: Remove mrouter flood in mdb flush In mdb flush the port is being removed from all the mids it is registered to. But if the port is mrouter, all the mids floods to it. This patch remove mrouter ports from mids it is not registered to in the mdb flush. Signed-off-by: Nogah Frankel Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c index bf1a17557fb3..459cedc23c47 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c @@ -1673,6 +1673,9 @@ mlxsw_sp_bridge_port_mdb_flush(struct mlxsw_sp_port *mlxsw_sp_port, if (test_bit(mlxsw_sp_port->local_port, mid->ports_in_mid)) { __mlxsw_sp_port_mdb_del(mlxsw_sp_port, bridge_port, mid); + } else if (bridge_device->multicast_enabled && + bridge_port->mrouter) { + mlxsw_sp_port_smid_set(mlxsw_sp_port, mid->mid, false); } } } -- cgit v1.2.3 From ded711c87a0411a7f3f56f8c575d7b642ee0110e Mon Sep 17 00:00:00 2001 From: Nogah Frankel Date: Wed, 20 Sep 2017 16:15:16 +0200 Subject: mlxsw: spectrum_switchdev: Consider mrouter status for mdb changes When a mrouter is registered or leaves a mid, don't update the HW. Signed-off-by: Nogah Frankel Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c index 459cedc23c47..0f9eac5f4ebf 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c @@ -1491,6 +1491,9 @@ static int mlxsw_sp_port_mdb_add(struct mlxsw_sp_port *mlxsw_sp_port, if (!bridge_device->multicast_enabled) return 0; + if (bridge_port->mrouter) + return 0; + err = mlxsw_sp_port_smid_set(mlxsw_sp_port, mid->mid, true); if (err) { netdev_err(dev, "Unable to set SMID\n"); @@ -1613,10 +1616,12 @@ __mlxsw_sp_port_mdb_del(struct mlxsw_sp_port *mlxsw_sp_port, int err; if (bridge_port->bridge_device->multicast_enabled) { - err = mlxsw_sp_port_smid_set(mlxsw_sp_port, mid->mid, false); - - if (err) - netdev_err(dev, "Unable to remove port from SMID\n"); + if (bridge_port->bridge_device->multicast_enabled) { + err = mlxsw_sp_port_smid_set(mlxsw_sp_port, mid->mid, + false); + if (err) + netdev_err(dev, "Unable to remove port from SMID\n"); + } } err = mlxsw_sp_port_remove_from_mid(mlxsw_sp_port, mid); -- cgit v1.2.3 From 79af1f866193de29e65a4dba7d0dab14b0c0ff93 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Thu, 22 Jun 2017 12:20:29 +0200 Subject: mac80211: avoid allocating TXQs that won't be used For AP_VLAN and monitor interfaces we'll never use the TXQs we allocated, so avoid doing so. Signed-off-by: Johannes Berg --- net/mac80211/iface.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c index f75029abf728..2619daa29961 100644 --- a/net/mac80211/iface.c +++ b/net/mac80211/iface.c @@ -1772,7 +1772,9 @@ int ieee80211_if_add(struct ieee80211_local *local, const char *name, sizeof(void *)); int txq_size = 0; - if (local->ops->wake_tx_queue) + if (local->ops->wake_tx_queue && + type != NL80211_IFTYPE_AP_VLAN && + type != NL80211_IFTYPE_MONITOR) txq_size += sizeof(struct txq_info) + local->hw.txq_data_size; -- cgit v1.2.3 From 2512b1b18d0748d867bb22387db7c86b903291ad Mon Sep 17 00:00:00 2001 From: Liad Kaufman Date: Sat, 5 Aug 2017 11:44:31 +0300 Subject: mac80211: extend ieee80211_ie_split to support EXTENSION Current ieee80211_ie_split() implementation doesn't account for elements that are sub-elements of the EXTENSION IE. To extend support to these IEs as well, treat the WLAN_EID_EXTENSION ids in the %ids array as indicating that the next id in the array is a sub-element of the EXTENSION IE. Signed-off-by: Liad Kaufman Signed-off-by: Luca Coelho Signed-off-by: Johannes Berg --- include/net/cfg80211.h | 6 ++++-- net/wireless/util.c | 54 ++++++++++++++++++++++++++++++++++++++++++-------- 2 files changed, 50 insertions(+), 10 deletions(-) diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index f12fa5245a45..aa9d993e519a 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -5934,7 +5934,8 @@ int cfg80211_get_p2p_attr(const u8 *ies, unsigned int len, * @ies: the IE buffer * @ielen: the length of the IE buffer * @ids: an array with element IDs that are allowed before - * the split + * the split. A WLAN_EID_EXTENSION value means that the next + * EID in the list is a sub-element of the EXTENSION IE. * @n_ids: the size of the element ID array * @after_ric: array IE types that come after the RIC element * @n_after_ric: size of the @after_ric array @@ -5965,7 +5966,8 @@ size_t ieee80211_ie_split_ric(const u8 *ies, size_t ielen, * @ies: the IE buffer * @ielen: the length of the IE buffer * @ids: an array with element IDs that are allowed before - * the split + * the split. A WLAN_EID_EXTENSION value means that the next + * EID in the list is a sub-element of the EXTENSION IE. * @n_ids: the size of the element ID array * @offset: offset where to start splitting in the buffer * diff --git a/net/wireless/util.c b/net/wireless/util.c index bcb1284c3415..4aab793c2f00 100644 --- a/net/wireless/util.c +++ b/net/wireless/util.c @@ -1367,13 +1367,29 @@ int cfg80211_get_p2p_attr(const u8 *ies, unsigned int len, } EXPORT_SYMBOL(cfg80211_get_p2p_attr); -static bool ieee80211_id_in_list(const u8 *ids, int n_ids, u8 id) +static bool ieee80211_id_in_list(const u8 *ids, int n_ids, u8 id, bool id_ext) { int i; - for (i = 0; i < n_ids; i++) - if (ids[i] == id) + /* Make sure array values are legal */ + if (WARN_ON(ids[n_ids - 1] == WLAN_EID_EXTENSION)) + return false; + + i = 0; + while (i < n_ids) { + if (ids[i] == WLAN_EID_EXTENSION) { + if (id_ext && (ids[i + 1] == id)) + return true; + + i += 2; + continue; + } + + if (ids[i] == id && !id_ext) return true; + + i++; + } return false; } @@ -1403,14 +1419,36 @@ size_t ieee80211_ie_split_ric(const u8 *ies, size_t ielen, { size_t pos = offset; - while (pos < ielen && ieee80211_id_in_list(ids, n_ids, ies[pos])) { + while (pos < ielen) { + u8 ext = 0; + + if (ies[pos] == WLAN_EID_EXTENSION) + ext = 2; + if ((pos + ext) >= ielen) + break; + + if (!ieee80211_id_in_list(ids, n_ids, ies[pos + ext], + ies[pos] == WLAN_EID_EXTENSION)) + break; + if (ies[pos] == WLAN_EID_RIC_DATA && n_after_ric) { pos = skip_ie(ies, ielen, pos); - while (pos < ielen && - !ieee80211_id_in_list(after_ric, n_after_ric, - ies[pos])) - pos = skip_ie(ies, ielen, pos); + while (pos < ielen) { + if (ies[pos] == WLAN_EID_EXTENSION) + ext = 2; + else + ext = 0; + + if ((pos + ext) >= ielen) + break; + + if (!ieee80211_id_in_list(after_ric, + n_after_ric, + ies[pos + ext], + ext == 2)) + pos = skip_ie(ies, ielen, pos); + } } else { pos = skip_ie(ies, ielen, pos); } -- cgit v1.2.3 From a7f26d8050c4f172d2dc523aabf45c5cbd9558ac Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Sat, 5 Aug 2017 11:44:32 +0300 Subject: mac80211: simplify and clarify IE splitting There's no need to split off IEs from the ones obtained from userspace, if they were already split off, so for example IEs that went before HT don't have to be listed again to go before VHT. Simplify the code here so it's clearer. While at it, also clarify the comments regarding the DMG (60 GHz) elements. Signed-off-by: Johannes Berg --- net/mac80211/mlme.c | 19 +++++++------------ net/mac80211/util.c | 21 +++++++++------------ 2 files changed, 16 insertions(+), 24 deletions(-) diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index 3b8e2709d8de..ee5ca1bc5a20 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -780,11 +780,12 @@ static void ieee80211_send_assoc(struct ieee80211_sub_if_data *sdata) WLAN_EID_SUPPORTED_REGULATORY_CLASSES, WLAN_EID_HT_CAPABILITY, WLAN_EID_BSS_COEX_2040, + /* luckily this is almost always there */ WLAN_EID_EXT_CAPABILITY, WLAN_EID_QOS_TRAFFIC_CAPA, WLAN_EID_TIM_BCAST_REQ, WLAN_EID_INTERWORKING, - /* 60GHz doesn't happen right now */ + /* 60 GHz (Multi-band, DMG, MMS) can't happen */ WLAN_EID_VHT_CAPABILITY, WLAN_EID_OPMODE_NOTIF, }; @@ -811,22 +812,16 @@ static void ieee80211_send_assoc(struct ieee80211_sub_if_data *sdata) /* if present, add any custom IEs that go before VHT */ if (assoc_data->ie_len) { static const u8 before_vht[] = { - WLAN_EID_SSID, - WLAN_EID_SUPP_RATES, - WLAN_EID_EXT_SUPP_RATES, - WLAN_EID_PWR_CAPABILITY, - WLAN_EID_SUPPORTED_CHANNELS, - WLAN_EID_RSN, - WLAN_EID_QOS_CAPA, - WLAN_EID_RRM_ENABLED_CAPABILITIES, - WLAN_EID_MOBILITY_DOMAIN, - WLAN_EID_SUPPORTED_REGULATORY_CLASSES, - WLAN_EID_HT_CAPABILITY, + /* + * no need to list the ones split off before HT + * or generated here + */ WLAN_EID_BSS_COEX_2040, WLAN_EID_EXT_CAPABILITY, WLAN_EID_QOS_TRAFFIC_CAPA, WLAN_EID_TIM_BCAST_REQ, WLAN_EID_INTERWORKING, + /* 60 GHz (Multi-band, DMG, MMS) can't happen */ }; /* RIC already taken above, so no need to handle here anymore */ diff --git a/net/mac80211/util.c b/net/mac80211/util.c index 6aef6793d052..bfecc3e86318 100644 --- a/net/mac80211/util.c +++ b/net/mac80211/util.c @@ -1392,10 +1392,10 @@ static int ieee80211_build_preq_ies_band(struct ieee80211_local *local, /* insert custom IEs that go before HT */ if (ie && ie_len) { static const u8 before_ht[] = { - WLAN_EID_SSID, - WLAN_EID_SUPP_RATES, - WLAN_EID_REQUEST, - WLAN_EID_EXT_SUPP_RATES, + /* + * no need to list the ones split off already + * (or generated here) + */ WLAN_EID_DS_PARAMS, WLAN_EID_SUPPORTED_REGULATORY_CLASSES, }; @@ -1424,20 +1424,17 @@ static int ieee80211_build_preq_ies_band(struct ieee80211_local *local, /* insert custom IEs that go before VHT */ if (ie && ie_len) { static const u8 before_vht[] = { - WLAN_EID_SSID, - WLAN_EID_SUPP_RATES, - WLAN_EID_REQUEST, - WLAN_EID_EXT_SUPP_RATES, - WLAN_EID_DS_PARAMS, - WLAN_EID_SUPPORTED_REGULATORY_CLASSES, - WLAN_EID_HT_CAPABILITY, + /* + * no need to list the ones split off already + * (or generated here) + */ WLAN_EID_BSS_COEX_2040, WLAN_EID_EXT_CAPABILITY, WLAN_EID_SSID_LIST, WLAN_EID_CHANNEL_USAGE, WLAN_EID_INTERWORKING, WLAN_EID_MESH_ID, - /* 60 GHz can't happen here right now */ + /* 60 GHz (Multi-band, DMG, MMS) can't happen */ }; noffset = ieee80211_ie_split(ie, ie_len, before_vht, ARRAY_SIZE(before_vht), -- cgit v1.2.3 From ffa4629e0c2b8b015f5fa174149c6dd269b4142c Mon Sep 17 00:00:00 2001 From: Tova Mussai Date: Sat, 5 Aug 2017 11:44:38 +0300 Subject: nl80211: return error for invalid center_freq in 40 MHz When NL80211_ATTR_WIPHY_CHANNEL_TYPE is given, nl80211 would parse the channel definition the old way, discarding NL80211_ATTR_CENTER_FREQ1, NL80211_ATTR_CENTER_FREQ2 etc. However, it is possible that user space added both NL80211_ATTR_WIPHY_CHANNEL_TYPE and NL80211_ATTR_CENTER_FREQ1 or NL80211_ATTR_CENTER_FREQ2 assuming that all settings would be honored. In such a case, validate that NL80211_ATTR_CENTER_FREQ1 and NL80211_ATTR_CENTER_FREQ2 values match the channel configuration, as otherwise user space would assume that the desired configuration was applied. For example, when trying to start ap with NL80211_ATTR_WIPHY_CHANNEL_TYPE = NL80211_CHAN_HT40MINUS, NL80211_ATTR_WIPHY_FREQ = 5180 and NL80211_ATTR_CENTER_FREQ1 = 5250 without this fix, the ap will start on channel 36 (center_freq1 will be corrected to 5180). With this fix, we will throw an error instead. Signed-off-by: Tova Mussai Signed-off-by: Luca Coelho Signed-off-by: Johannes Berg --- net/wireless/nl80211.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 0df8023f480b..66e97136ab44 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -2122,6 +2122,15 @@ static int nl80211_parse_chandef(struct cfg80211_registered_device *rdev, case NL80211_CHAN_HT40MINUS: cfg80211_chandef_create(chandef, chandef->chan, chantype); + /* user input for center_freq is incorrect */ + if (info->attrs[NL80211_ATTR_CENTER_FREQ1] && + chandef->center_freq1 != nla_get_u32( + info->attrs[NL80211_ATTR_CENTER_FREQ1])) + return -EINVAL; + /* center_freq2 must be zero */ + if (info->attrs[NL80211_ATTR_CENTER_FREQ2] && + nla_get_u32(info->attrs[NL80211_ATTR_CENTER_FREQ2])) + return -EINVAL; break; default: return -EINVAL; -- cgit v1.2.3 From 2d23d0736e3a4a0fdb92b8e46ea476639f16aae8 Mon Sep 17 00:00:00 2001 From: Roee Zamir Date: Sun, 6 Aug 2017 11:38:22 +0300 Subject: nl80211: add OCE scan and capability flags Add Optimized Connectivity Experience (OCE) scan and capability flags. Some of them unique to OCE and some are stand alone. And add scan flags to enable/disable them. Signed-off-by: Roee Zamir Signed-off-by: Luca Coelho Signed-off-by: Johannes Berg --- include/uapi/linux/nl80211.h | 37 ++++++++++-- net/wireless/nl80211.c | 137 ++++++++++++++++++++++++------------------- 2 files changed, 111 insertions(+), 63 deletions(-) diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index 51626b4175c0..76404d8a8863 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -4914,6 +4914,15 @@ enum nl80211_feature_flags { * handshake with 802.1X in station mode (will pass EAP frames to the host * and accept the set_pmk/del_pmk commands), doing it in the host might not * be supported. + * @NL80211_EXT_FEATURE_FILS_MAX_CHANNEL_TIME: Driver is capable of overriding + * the max channel attribute in the FILS request params IE with the + * actual dwell time. + * @NL80211_EXT_FEATURE_ACCEPT_BCAST_PROBE_RESP: Driver accepts broadcast probe + * response + * @NL80211_EXT_FEATURE_OCE_PROBE_REQ_HIGH_TX_RATE: Driver supports sending + * the first probe request in each channel at rate of at least 5.5Mbps. + * @NL80211_EXT_FEATURE_OCE_PROBE_REQ_DEFERRAL_SUPPRESSION: Driver supports + * probe request tx deferral and suppression * * @NUM_NL80211_EXT_FEATURES: number of extended features. * @MAX_NL80211_EXT_FEATURES: highest extended feature index. @@ -4936,6 +4945,10 @@ enum nl80211_ext_feature_index { NL80211_EXT_FEATURE_FILS_SK_OFFLOAD, NL80211_EXT_FEATURE_4WAY_HANDSHAKE_STA_PSK, NL80211_EXT_FEATURE_4WAY_HANDSHAKE_STA_1X, + NL80211_EXT_FEATURE_FILS_MAX_CHANNEL_TIME, + NL80211_EXT_FEATURE_ACCEPT_BCAST_PROBE_RESP, + NL80211_EXT_FEATURE_OCE_PROBE_REQ_HIGH_TX_RATE, + NL80211_EXT_FEATURE_OCE_PROBE_REQ_DEFERRAL_SUPPRESSION, /* add new features before the definition below */ NUM_NL80211_EXT_FEATURES, @@ -5012,12 +5025,28 @@ enum nl80211_timeout_reason { * locally administered 1, multicast 0) is assumed. * This flag must not be requested when the feature isn't supported, check * the nl80211 feature flags for the device. + * @NL80211_SCAN_FLAG_FILS_MAX_CHANNEL_TIME: fill the dwell time in the FILS + * request parameters IE in the probe request + * @NL80211_SCAN_FLAG_ACCEPT_BCAST_PROBE_RESP: accept broadcast probe responses + * @NL80211_SCAN_FLAG_OCE_PROBE_REQ_HIGH_TX_RATE: send probe request frames at + * rate of at least 5.5M. In case non OCE AP is dicovered in the channel, + * only the first probe req in the channel will be sent in high rate. + * @NL80211_SCAN_FLAG_OCE_PROBE_REQ_DEFERRAL_SUPPRESSION: allow probe request + * tx deferral (dot11FILSProbeDelay shall be set to 15ms) + * and suppression (if it has received a broadcast Probe Response frame, + * Beacon frame or FILS Discovery frame from an AP that the STA considers + * a suitable candidate for (re-)association - suitable in terms of + * SSID and/or RSSI */ enum nl80211_scan_flags { - NL80211_SCAN_FLAG_LOW_PRIORITY = 1<<0, - NL80211_SCAN_FLAG_FLUSH = 1<<1, - NL80211_SCAN_FLAG_AP = 1<<2, - NL80211_SCAN_FLAG_RANDOM_ADDR = 1<<3, + NL80211_SCAN_FLAG_LOW_PRIORITY = 1<<0, + NL80211_SCAN_FLAG_FLUSH = 1<<1, + NL80211_SCAN_FLAG_AP = 1<<2, + NL80211_SCAN_FLAG_RANDOM_ADDR = 1<<3, + NL80211_SCAN_FLAG_FILS_MAX_CHANNEL_TIME = 1<<4, + NL80211_SCAN_FLAG_ACCEPT_BCAST_PROBE_RESP = 1<<5, + NL80211_SCAN_FLAG_OCE_PROBE_REQ_HIGH_TX_RATE = 1<<6, + NL80211_SCAN_FLAG_OCE_PROBE_REQ_DEFERRAL_SUPPRESSION = 1<<7, }; /** diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 66e97136ab44..2e6f5f4065f9 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -6619,6 +6619,77 @@ static bool cfg80211_off_channel_oper_allowed(struct wireless_dev *wdev) return regulatory_pre_cac_allowed(wdev->wiphy); } +static int +nl80211_check_scan_flags(struct wiphy *wiphy, struct wireless_dev *wdev, + void *request, struct nlattr **attrs, + bool is_sched_scan) +{ + u8 *mac_addr, *mac_addr_mask; + u32 *flags; + enum nl80211_feature_flags randomness_flag; + + if (!attrs[NL80211_ATTR_SCAN_FLAGS]) + return 0; + + if (is_sched_scan) { + struct cfg80211_sched_scan_request *req = request; + + randomness_flag = wdev ? + NL80211_FEATURE_SCHED_SCAN_RANDOM_MAC_ADDR : + NL80211_FEATURE_ND_RANDOM_MAC_ADDR; + flags = &req->flags; + mac_addr = req->mac_addr; + mac_addr_mask = req->mac_addr_mask; + } else { + struct cfg80211_scan_request *req = request; + + randomness_flag = NL80211_FEATURE_SCAN_RANDOM_MAC_ADDR; + flags = &req->flags; + mac_addr = req->mac_addr; + mac_addr_mask = req->mac_addr_mask; + } + + *flags = nla_get_u32(attrs[NL80211_ATTR_SCAN_FLAGS]); + + if ((*flags & NL80211_SCAN_FLAG_LOW_PRIORITY) && + !(wiphy->features & NL80211_FEATURE_LOW_PRIORITY_SCAN)) + return -EOPNOTSUPP; + + if (*flags & NL80211_SCAN_FLAG_RANDOM_ADDR) { + int err; + + if (!(wiphy->features & randomness_flag) || + (wdev && wdev->current_bss)) + return -EOPNOTSUPP; + + err = nl80211_parse_random_mac(attrs, mac_addr, mac_addr_mask); + if (err) + return err; + } + + if ((*flags & NL80211_SCAN_FLAG_FILS_MAX_CHANNEL_TIME) && + !wiphy_ext_feature_isset(wiphy, + NL80211_EXT_FEATURE_FILS_MAX_CHANNEL_TIME)) + return -EOPNOTSUPP; + + if ((*flags & NL80211_SCAN_FLAG_ACCEPT_BCAST_PROBE_RESP) && + !wiphy_ext_feature_isset(wiphy, + NL80211_EXT_FEATURE_ACCEPT_BCAST_PROBE_RESP)) + return -EOPNOTSUPP; + + if ((*flags & NL80211_SCAN_FLAG_OCE_PROBE_REQ_DEFERRAL_SUPPRESSION) && + !wiphy_ext_feature_isset(wiphy, + NL80211_EXT_FEATURE_OCE_PROBE_REQ_DEFERRAL_SUPPRESSION)) + return -EOPNOTSUPP; + + if ((*flags & NL80211_SCAN_FLAG_OCE_PROBE_REQ_HIGH_TX_RATE) && + !wiphy_ext_feature_isset(wiphy, + NL80211_EXT_FEATURE_OCE_PROBE_REQ_HIGH_TX_RATE)) + return -EOPNOTSUPP; + + return 0; +} + static int nl80211_trigger_scan(struct sk_buff *skb, struct genl_info *info) { struct cfg80211_registered_device *rdev = info->user_ptr[0]; @@ -6824,34 +6895,10 @@ static int nl80211_trigger_scan(struct sk_buff *skb, struct genl_info *info) nla_get_flag(info->attrs[NL80211_ATTR_MEASUREMENT_DURATION_MANDATORY]); } - if (info->attrs[NL80211_ATTR_SCAN_FLAGS]) { - request->flags = nla_get_u32( - info->attrs[NL80211_ATTR_SCAN_FLAGS]); - if ((request->flags & NL80211_SCAN_FLAG_LOW_PRIORITY) && - !(wiphy->features & NL80211_FEATURE_LOW_PRIORITY_SCAN)) { - err = -EOPNOTSUPP; - goto out_free; - } - - if (request->flags & NL80211_SCAN_FLAG_RANDOM_ADDR) { - if (!(wiphy->features & - NL80211_FEATURE_SCAN_RANDOM_MAC_ADDR)) { - err = -EOPNOTSUPP; - goto out_free; - } - - if (wdev->current_bss) { - err = -EOPNOTSUPP; - goto out_free; - } - - err = nl80211_parse_random_mac(info->attrs, - request->mac_addr, - request->mac_addr_mask); - if (err) - goto out_free; - } - } + err = nl80211_check_scan_flags(wiphy, wdev, request, info->attrs, + false); + if (err) + goto out_free; request->no_cck = nla_get_flag(info->attrs[NL80211_ATTR_TX_NO_CCK_RATE]); @@ -7299,37 +7346,9 @@ nl80211_parse_sched_scan(struct wiphy *wiphy, struct wireless_dev *wdev, request->ie_len); } - if (attrs[NL80211_ATTR_SCAN_FLAGS]) { - request->flags = nla_get_u32( - attrs[NL80211_ATTR_SCAN_FLAGS]); - if ((request->flags & NL80211_SCAN_FLAG_LOW_PRIORITY) && - !(wiphy->features & NL80211_FEATURE_LOW_PRIORITY_SCAN)) { - err = -EOPNOTSUPP; - goto out_free; - } - - if (request->flags & NL80211_SCAN_FLAG_RANDOM_ADDR) { - u32 flg = NL80211_FEATURE_SCHED_SCAN_RANDOM_MAC_ADDR; - - if (!wdev) /* must be net-detect */ - flg = NL80211_FEATURE_ND_RANDOM_MAC_ADDR; - - if (!(wiphy->features & flg)) { - err = -EOPNOTSUPP; - goto out_free; - } - - if (wdev && wdev->current_bss) { - err = -EOPNOTSUPP; - goto out_free; - } - - err = nl80211_parse_random_mac(attrs, request->mac_addr, - request->mac_addr_mask); - if (err) - goto out_free; - } - } + err = nl80211_check_scan_flags(wiphy, wdev, request, attrs, true); + if (err) + goto out_free; if (attrs[NL80211_ATTR_SCHED_SCAN_DELAY]) request->delay = -- cgit v1.2.3 From 40b0bd24973487272167a09db040a70c053bedbe Mon Sep 17 00:00:00 2001 From: Roee Zamir Date: Sun, 6 Aug 2017 11:38:23 +0300 Subject: mac80211: oce: enable receiving of bcast probe resp One of OCE's optimizations is acception of broadcast probe responses. Accept broadcast probe responses but don't set NL80211_EXT_FEATURE_ACCEPT_BCAST_PROBE_RESP. Because a device's firmware may filter out the broadcast probe resp - drivers should set this flag. Signed-off-by: Roee Zamir Signed-off-by: Luca Coelho [johannes: make accepting broadcast conditional on the nl80211 scan flag that was added for that specific purpose] Signed-off-by: Johannes Berg --- net/mac80211/scan.c | 37 ++++++++++++++++++++++++++++--------- 1 file changed, 28 insertions(+), 9 deletions(-) diff --git a/net/mac80211/scan.c b/net/mac80211/scan.c index 47d2ed570470..ef2becaade50 100644 --- a/net/mac80211/scan.c +++ b/net/mac80211/scan.c @@ -7,7 +7,7 @@ * Copyright 2006-2007 Jiri Benc * Copyright 2007, Michael Wu * Copyright 2013-2015 Intel Mobile Communications GmbH - * Copyright 2016 Intel Deutschland GmbH + * Copyright 2016-2017 Intel Deutschland GmbH * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as @@ -183,6 +183,20 @@ ieee80211_bss_info_update(struct ieee80211_local *local, return bss; } +static bool ieee80211_scan_accept_presp(struct ieee80211_sub_if_data *sdata, + u32 scan_flags, const u8 *da) +{ + if (!sdata) + return false; + /* accept broadcast for OCE */ + if (scan_flags & NL80211_SCAN_FLAG_ACCEPT_BCAST_PROBE_RESP && + is_broadcast_ether_addr(da)) + return true; + if (scan_flags & NL80211_SCAN_FLAG_RANDOM_ADDR) + return true; + return ether_addr_equal(da, sdata->vif.addr); +} + void ieee80211_scan_rx(struct ieee80211_local *local, struct sk_buff *skb) { struct ieee80211_rx_status *rx_status = IEEE80211_SKB_RXCB(skb); @@ -208,19 +222,24 @@ void ieee80211_scan_rx(struct ieee80211_local *local, struct sk_buff *skb) if (ieee80211_is_probe_resp(mgmt->frame_control)) { struct cfg80211_scan_request *scan_req; struct cfg80211_sched_scan_request *sched_scan_req; + u32 scan_req_flags = 0, sched_scan_req_flags = 0; scan_req = rcu_dereference(local->scan_req); sched_scan_req = rcu_dereference(local->sched_scan_req); - /* ignore ProbeResp to foreign address unless scanning - * with randomised address + if (scan_req) + scan_req_flags = scan_req->flags; + + if (sched_scan_req) + sched_scan_req_flags = sched_scan_req->flags; + + /* ignore ProbeResp to foreign address or non-bcast (OCE) + * unless scanning with randomised address */ - if (!(sdata1 && - (ether_addr_equal(mgmt->da, sdata1->vif.addr) || - scan_req->flags & NL80211_SCAN_FLAG_RANDOM_ADDR)) && - !(sdata2 && - (ether_addr_equal(mgmt->da, sdata2->vif.addr) || - sched_scan_req->flags & NL80211_SCAN_FLAG_RANDOM_ADDR))) + if (!ieee80211_scan_accept_presp(sdata1, scan_req_flags, + mgmt->da) && + !ieee80211_scan_accept_presp(sdata2, sched_scan_req_flags, + mgmt->da)) return; elements = mgmt->u.probe_resp.variable; -- cgit v1.2.3 From 1272c5d89b597995cb10db87dd4a1adc91d36006 Mon Sep 17 00:00:00 2001 From: Luca Coelho Date: Fri, 18 Aug 2017 15:33:56 +0300 Subject: mac80211: add documentation to ieee80211_rx_ba_offl() Add documentation to ieee80211_rx_ba_offl() function and, while at it, rename the bit argument to tid, for consistency. Signed-off-by: Luca Coelho Signed-off-by: Johannes Berg --- include/net/mac80211.h | 8 +++++++- net/mac80211/agg-rx.c | 4 ++-- 2 files changed, 9 insertions(+), 3 deletions(-) diff --git a/include/net/mac80211.h b/include/net/mac80211.h index 885690fa39c8..cc9073e45be9 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -5441,8 +5441,14 @@ void ieee80211_mark_rx_ba_filtered_frames(struct ieee80211_sta *pubsta, u8 tid, */ void ieee80211_send_bar(struct ieee80211_vif *vif, u8 *ra, u16 tid, u16 ssn); +/** + * ieee80211_manage_rx_ba_offl - helper to queue an RX BA work + * @vif: &struct ieee80211_vif pointer from the add_interface callback + * @addr: station mac address + * @tid: the rx tid + */ void ieee80211_manage_rx_ba_offl(struct ieee80211_vif *vif, const u8 *addr, - unsigned int bit); + unsigned int tid); /** * ieee80211_start_rx_ba_session_offl - start a Rx BA session diff --git a/net/mac80211/agg-rx.c b/net/mac80211/agg-rx.c index 2849a1fc41c5..88cc1ae935ea 100644 --- a/net/mac80211/agg-rx.c +++ b/net/mac80211/agg-rx.c @@ -459,7 +459,7 @@ void ieee80211_process_addba_request(struct ieee80211_local *local, } void ieee80211_manage_rx_ba_offl(struct ieee80211_vif *vif, - const u8 *addr, unsigned int bit) + const u8 *addr, unsigned int tid) { struct ieee80211_sub_if_data *sdata = vif_to_sdata(vif); struct ieee80211_local *local = sdata->local; @@ -470,7 +470,7 @@ void ieee80211_manage_rx_ba_offl(struct ieee80211_vif *vif, if (!sta) goto unlock; - set_bit(bit, sta->ampdu_mlme.tid_rx_manage_offl); + set_bit(tid, sta->ampdu_mlme.tid_rx_manage_offl); ieee80211_queue_work(&local->hw, &sta->ampdu_mlme.work); unlock: rcu_read_unlock(); -- cgit v1.2.3 From 1281103770e909e064edbb22a1115a0c14eca081 Mon Sep 17 00:00:00 2001 From: Ilan peer Date: Wed, 6 Sep 2017 17:18:33 +0300 Subject: mac80211: Simplify locking in ieee80211_sta_tear_down_BA_sessions() Simplify the locking in ieee80211_sta_tear_down_BA_sessions() and lock sta->ampdu_mlme.mtx over the entire function instead of locking/unlocking it for each TID etc. Signed-off-by: Ilan Peer Signed-off-by: Johannes Berg --- net/mac80211/ht.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/net/mac80211/ht.c b/net/mac80211/ht.c index d6d0b4201e40..41f5e48f8021 100644 --- a/net/mac80211/ht.c +++ b/net/mac80211/ht.c @@ -290,13 +290,15 @@ void ieee80211_sta_tear_down_BA_sessions(struct sta_info *sta, { int i; + mutex_lock(&sta->ampdu_mlme.mtx); for (i = 0; i < IEEE80211_NUM_TIDS; i++) { - __ieee80211_stop_tx_ba_session(sta, i, reason); - __ieee80211_stop_rx_ba_session(sta, i, WLAN_BACK_RECIPIENT, - WLAN_REASON_QSTA_LEAVE_QBSS, - reason != AGG_STOP_DESTROY_STA && - reason != AGG_STOP_PEER_REQUEST); + ___ieee80211_stop_tx_ba_session(sta, i, reason); + ___ieee80211_stop_rx_ba_session(sta, i, WLAN_BACK_RECIPIENT, + WLAN_REASON_QSTA_LEAVE_QBSS, + reason != AGG_STOP_DESTROY_STA && + reason != AGG_STOP_PEER_REQUEST); } + mutex_unlock(&sta->ampdu_mlme.mtx); /* stopping might queue the work again - so cancel only afterwards */ cancel_work_sync(&sta->ampdu_mlme.work); -- cgit v1.2.3 From 4c121fd690d9c465e4cb09b7859adfdd6a0aee1d Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Fri, 8 Sep 2017 11:54:46 +0200 Subject: mac80211: use offsetofend() This was created using the following spatch: @find@ type S; expression M, M2; position p; @@ offsetof(S, M) + sizeof(M2)@p @script:python@ m << find.M; m2 << find.M2; @@ if not m2.endswith('-> ' + m): cocci.include_match(False) @change@ type find.S; expression find.M, find.M2; position find.p; @@ -offsetof(S, M) + sizeof(M2)@p +offsetofend(S, M) Signed-off-by: Johannes Berg --- net/mac80211/mesh.c | 3 +-- net/mac80211/mesh_hwmp.c | 8 ++++---- net/mac80211/mesh_plink.c | 3 +-- net/mac80211/util.c | 4 ++-- 4 files changed, 8 insertions(+), 10 deletions(-) diff --git a/net/mac80211/mesh.c b/net/mac80211/mesh.c index a550c707cd8a..7a76c4a6df30 100644 --- a/net/mac80211/mesh.c +++ b/net/mac80211/mesh.c @@ -675,8 +675,7 @@ ieee80211_mesh_build_beacon(struct ieee80211_if_mesh *ifmsh) enum nl80211_band band; u8 *pos; struct ieee80211_sub_if_data *sdata; - int hdr_len = offsetof(struct ieee80211_mgmt, u.beacon) + - sizeof(mgmt->u.beacon); + int hdr_len = offsetofend(struct ieee80211_mgmt, u.beacon); sdata = container_of(ifmsh, struct ieee80211_sub_if_data, u.mesh); rcu_read_lock(); diff --git a/net/mac80211/mesh_hwmp.c b/net/mac80211/mesh_hwmp.c index d8bbd0d2225a..146ec6c0f12f 100644 --- a/net/mac80211/mesh_hwmp.c +++ b/net/mac80211/mesh_hwmp.c @@ -111,8 +111,8 @@ static int mesh_path_sel_frame_tx(enum mpath_frame_type action, u8 flags, struct sk_buff *skb; struct ieee80211_mgmt *mgmt; u8 *pos, ie_len; - int hdr_len = offsetof(struct ieee80211_mgmt, u.action.u.mesh_action) + - sizeof(mgmt->u.action.u.mesh_action); + int hdr_len = offsetofend(struct ieee80211_mgmt, + u.action.u.mesh_action); skb = dev_alloc_skb(local->tx_headroom + hdr_len + @@ -242,8 +242,8 @@ int mesh_path_error_tx(struct ieee80211_sub_if_data *sdata, struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh; struct ieee80211_mgmt *mgmt; u8 *pos, ie_len; - int hdr_len = offsetof(struct ieee80211_mgmt, u.action.u.mesh_action) + - sizeof(mgmt->u.action.u.mesh_action); + int hdr_len = offsetofend(struct ieee80211_mgmt, + u.action.u.mesh_action); if (time_before(jiffies, ifmsh->next_perr)) return -EAGAIN; diff --git a/net/mac80211/mesh_plink.c b/net/mac80211/mesh_plink.c index f69c6c38ca43..dc8e10f87207 100644 --- a/net/mac80211/mesh_plink.c +++ b/net/mac80211/mesh_plink.c @@ -220,8 +220,7 @@ static int mesh_plink_frame_tx(struct ieee80211_sub_if_data *sdata, bool include_plid = false; u16 peering_proto = 0; u8 *pos, ie_len = 4; - int hdr_len = offsetof(struct ieee80211_mgmt, u.action.u.self_prot) + - sizeof(mgmt->u.action.u.self_prot); + int hdr_len = offsetofend(struct ieee80211_mgmt, u.action.u.self_prot); int err = -ENOMEM; skb = dev_alloc_skb(local->tx_headroom + diff --git a/net/mac80211/util.c b/net/mac80211/util.c index bfecc3e86318..d57e5f6bd8b6 100644 --- a/net/mac80211/util.c +++ b/net/mac80211/util.c @@ -2977,8 +2977,8 @@ int ieee80211_send_action_csa(struct ieee80211_sub_if_data *sdata, struct ieee80211_mgmt *mgmt; struct ieee80211_local *local = sdata->local; int freq; - int hdr_len = offsetof(struct ieee80211_mgmt, u.action.u.chan_switch) + - sizeof(mgmt->u.action.u.chan_switch); + int hdr_len = offsetofend(struct ieee80211_mgmt, + u.action.u.chan_switch); u8 *pos; if (sdata->vif.type != NL80211_IFTYPE_ADHOC && -- cgit v1.2.3 From 62b093b375e3c10ff39896f2a172146cf5df529f Mon Sep 17 00:00:00 2001 From: Lubomir Rintel Date: Mon, 18 Sep 2017 15:56:51 +0200 Subject: mac80211_hwsim: use dyndbg for debug messages The mac80211_hwsim doesn't offer a way to disable the debugging output. Unfortunately, it's pretty chatty, dumping a lot of stuff into the message buffer. This patch changes it to use dyndbg for controlling the debug output. It's disabled by default, but can be enabled by a module parameter (1), at runtime (2) or persisted in modprobe.conf (3). (1) modprobe mac80211_hwsim dyndbg=+p (2) echo "module mac80211_hwsim +p" >/sys/kernel/debug/dynamic_debug/control (3) echo "options mac80211_hwsim dyndbg=+p" >>/etc/modprobe.d/my.conf Signed-off-by: Lubomir Rintel Signed-off-by: Johannes Berg --- drivers/net/wireless/mac80211_hwsim.c | 192 +++++++++++++++++----------------- 1 file changed, 96 insertions(+), 96 deletions(-) diff --git a/drivers/net/wireless/mac80211_hwsim.c b/drivers/net/wireless/mac80211_hwsim.c index 6467ffac9811..ec2f4c31425a 100644 --- a/drivers/net/wireless/mac80211_hwsim.c +++ b/drivers/net/wireless/mac80211_hwsim.c @@ -396,7 +396,7 @@ static int mac80211_hwsim_vendor_cmd_test(struct wiphy *wiphy, if (!tb[QCA_WLAN_VENDOR_ATTR_TEST]) return -EINVAL; val = nla_get_u32(tb[QCA_WLAN_VENDOR_ATTR_TEST]); - wiphy_debug(wiphy, "%s: test=%u\n", __func__, val); + wiphy_dbg(wiphy, "%s: test=%u\n", __func__, val); /* Send a vendor event as a test. Note that this would not normally be * done within a command handler, but rather, based on some other @@ -643,9 +643,9 @@ static void hwsim_send_ps_poll(void *dat, u8 *mac, struct ieee80211_vif *vif) if (!vp->assoc) return; - wiphy_debug(data->hw->wiphy, - "%s: send PS-Poll to %pM for aid %d\n", - __func__, vp->bssid, vp->aid); + wiphy_dbg(data->hw->wiphy, + "%s: send PS-Poll to %pM for aid %d\n", + __func__, vp->bssid, vp->aid); skb = dev_alloc_skb(sizeof(*pspoll)); if (!skb) @@ -674,9 +674,9 @@ static void hwsim_send_nullfunc(struct mac80211_hwsim_data *data, u8 *mac, if (!vp->assoc) return; - wiphy_debug(data->hw->wiphy, - "%s: send data::nullfunc to %pM ps=%d\n", - __func__, vp->bssid, ps); + wiphy_dbg(data->hw->wiphy, + "%s: send data::nullfunc to %pM ps=%d\n", + __func__, vp->bssid, ps); skb = dev_alloc_skb(sizeof(*hdr)); if (!skb) @@ -1034,7 +1034,7 @@ static void mac80211_hwsim_tx_frame_nl(struct ieee80211_hw *hw, msg_head = genlmsg_put(skb, 0, 0, &hwsim_genl_family, 0, HWSIM_CMD_FRAME); if (msg_head == NULL) { - printk(KERN_DEBUG "mac80211_hwsim: problem with msg_head\n"); + pr_debug("mac80211_hwsim: problem with msg_head\n"); goto nla_put_failure; } @@ -1093,7 +1093,7 @@ static void mac80211_hwsim_tx_frame_nl(struct ieee80211_hw *hw, nla_put_failure: nlmsg_free(skb); err_free_txskb: - printk(KERN_DEBUG "mac80211_hwsim: error occurred in %s\n", __func__); + pr_debug("mac80211_hwsim: error occurred in %s\n", __func__); ieee80211_free_txskb(hw, my_skb); data->tx_failed++; } @@ -1347,7 +1347,7 @@ static void mac80211_hwsim_tx(struct ieee80211_hw *hw, } if (data->idle && !data->tmp_chan) { - wiphy_debug(hw->wiphy, "Trying to TX when idle - reject\n"); + wiphy_dbg(hw->wiphy, "Trying to TX when idle - reject\n"); ieee80211_free_txskb(hw, skb); return; } @@ -1408,7 +1408,7 @@ static void mac80211_hwsim_tx(struct ieee80211_hw *hw, static int mac80211_hwsim_start(struct ieee80211_hw *hw) { struct mac80211_hwsim_data *data = hw->priv; - wiphy_debug(hw->wiphy, "%s\n", __func__); + wiphy_dbg(hw->wiphy, "%s\n", __func__); data->started = true; return 0; } @@ -1419,16 +1419,16 @@ static void mac80211_hwsim_stop(struct ieee80211_hw *hw) struct mac80211_hwsim_data *data = hw->priv; data->started = false; tasklet_hrtimer_cancel(&data->beacon_timer); - wiphy_debug(hw->wiphy, "%s\n", __func__); + wiphy_dbg(hw->wiphy, "%s\n", __func__); } static int mac80211_hwsim_add_interface(struct ieee80211_hw *hw, struct ieee80211_vif *vif) { - wiphy_debug(hw->wiphy, "%s (type=%d mac_addr=%pM)\n", - __func__, ieee80211_vif_type_p2p(vif), - vif->addr); + wiphy_dbg(hw->wiphy, "%s (type=%d mac_addr=%pM)\n", + __func__, ieee80211_vif_type_p2p(vif), + vif->addr); hwsim_set_magic(vif); vif->cab_queue = 0; @@ -1447,9 +1447,9 @@ static int mac80211_hwsim_change_interface(struct ieee80211_hw *hw, bool newp2p) { newtype = ieee80211_iftype_p2p(newtype, newp2p); - wiphy_debug(hw->wiphy, - "%s (old type=%d, new type=%d, mac_addr=%pM)\n", - __func__, ieee80211_vif_type_p2p(vif), + wiphy_dbg(hw->wiphy, + "%s (old type=%d, new type=%d, mac_addr=%pM)\n", + __func__, ieee80211_vif_type_p2p(vif), newtype, vif->addr); hwsim_check_magic(vif); @@ -1465,9 +1465,9 @@ static int mac80211_hwsim_change_interface(struct ieee80211_hw *hw, static void mac80211_hwsim_remove_interface( struct ieee80211_hw *hw, struct ieee80211_vif *vif) { - wiphy_debug(hw->wiphy, "%s (type=%d mac_addr=%pM)\n", - __func__, ieee80211_vif_type_p2p(vif), - vif->addr); + wiphy_dbg(hw->wiphy, "%s (type=%d mac_addr=%pM)\n", + __func__, ieee80211_vif_type_p2p(vif), + vif->addr); hwsim_check_magic(vif); hwsim_clear_magic(vif); } @@ -1589,23 +1589,23 @@ static int mac80211_hwsim_config(struct ieee80211_hw *hw, u32 changed) int idx; if (conf->chandef.chan) - wiphy_debug(hw->wiphy, - "%s (freq=%d(%d - %d)/%s idle=%d ps=%d smps=%s)\n", - __func__, - conf->chandef.chan->center_freq, - conf->chandef.center_freq1, - conf->chandef.center_freq2, - hwsim_chanwidths[conf->chandef.width], - !!(conf->flags & IEEE80211_CONF_IDLE), - !!(conf->flags & IEEE80211_CONF_PS), - smps_modes[conf->smps_mode]); + wiphy_dbg(hw->wiphy, + "%s (freq=%d(%d - %d)/%s idle=%d ps=%d smps=%s)\n", + __func__, + conf->chandef.chan->center_freq, + conf->chandef.center_freq1, + conf->chandef.center_freq2, + hwsim_chanwidths[conf->chandef.width], + !!(conf->flags & IEEE80211_CONF_IDLE), + !!(conf->flags & IEEE80211_CONF_PS), + smps_modes[conf->smps_mode]); else - wiphy_debug(hw->wiphy, - "%s (freq=0 idle=%d ps=%d smps=%s)\n", - __func__, - !!(conf->flags & IEEE80211_CONF_IDLE), - !!(conf->flags & IEEE80211_CONF_PS), - smps_modes[conf->smps_mode]); + wiphy_dbg(hw->wiphy, + "%s (freq=0 idle=%d ps=%d smps=%s)\n", + __func__, + !!(conf->flags & IEEE80211_CONF_IDLE), + !!(conf->flags & IEEE80211_CONF_PS), + smps_modes[conf->smps_mode]); data->idle = !!(conf->flags & IEEE80211_CONF_IDLE); @@ -1659,7 +1659,7 @@ static void mac80211_hwsim_configure_filter(struct ieee80211_hw *hw, { struct mac80211_hwsim_data *data = hw->priv; - wiphy_debug(hw->wiphy, "%s\n", __func__); + wiphy_dbg(hw->wiphy, "%s\n", __func__); data->rx_filter = 0; if (*total_flags & FIF_ALLMULTI) @@ -1688,25 +1688,25 @@ static void mac80211_hwsim_bss_info_changed(struct ieee80211_hw *hw, hwsim_check_magic(vif); - wiphy_debug(hw->wiphy, "%s(changed=0x%x vif->addr=%pM)\n", - __func__, changed, vif->addr); + wiphy_dbg(hw->wiphy, "%s(changed=0x%x vif->addr=%pM)\n", + __func__, changed, vif->addr); if (changed & BSS_CHANGED_BSSID) { - wiphy_debug(hw->wiphy, "%s: BSSID changed: %pM\n", - __func__, info->bssid); + wiphy_dbg(hw->wiphy, "%s: BSSID changed: %pM\n", + __func__, info->bssid); memcpy(vp->bssid, info->bssid, ETH_ALEN); } if (changed & BSS_CHANGED_ASSOC) { - wiphy_debug(hw->wiphy, " ASSOC: assoc=%d aid=%d\n", - info->assoc, info->aid); + wiphy_dbg(hw->wiphy, " ASSOC: assoc=%d aid=%d\n", + info->assoc, info->aid); vp->assoc = info->assoc; vp->aid = info->aid; } if (changed & BSS_CHANGED_BEACON_ENABLED) { - wiphy_debug(hw->wiphy, " BCN EN: %d (BI=%u)\n", - info->enable_beacon, info->beacon_int); + wiphy_dbg(hw->wiphy, " BCN EN: %d (BI=%u)\n", + info->enable_beacon, info->beacon_int); vp->bcn_en = info->enable_beacon; if (data->started && !hrtimer_is_queued(&data->beacon_timer.timer) && @@ -1725,8 +1725,8 @@ static void mac80211_hwsim_bss_info_changed(struct ieee80211_hw *hw, ieee80211_iterate_active_interfaces_atomic( data->hw, IEEE80211_IFACE_ITER_NORMAL, mac80211_hwsim_bcn_en_iter, &count); - wiphy_debug(hw->wiphy, " beaconing vifs remaining: %u", - count); + wiphy_dbg(hw->wiphy, " beaconing vifs remaining: %u", + count); if (count == 0) { tasklet_hrtimer_cancel(&data->beacon_timer); data->beacon_int = 0; @@ -1735,31 +1735,31 @@ static void mac80211_hwsim_bss_info_changed(struct ieee80211_hw *hw, } if (changed & BSS_CHANGED_ERP_CTS_PROT) { - wiphy_debug(hw->wiphy, " ERP_CTS_PROT: %d\n", - info->use_cts_prot); + wiphy_dbg(hw->wiphy, " ERP_CTS_PROT: %d\n", + info->use_cts_prot); } if (changed & BSS_CHANGED_ERP_PREAMBLE) { - wiphy_debug(hw->wiphy, " ERP_PREAMBLE: %d\n", - info->use_short_preamble); + wiphy_dbg(hw->wiphy, " ERP_PREAMBLE: %d\n", + info->use_short_preamble); } if (changed & BSS_CHANGED_ERP_SLOT) { - wiphy_debug(hw->wiphy, " ERP_SLOT: %d\n", info->use_short_slot); + wiphy_dbg(hw->wiphy, " ERP_SLOT: %d\n", info->use_short_slot); } if (changed & BSS_CHANGED_HT) { - wiphy_debug(hw->wiphy, " HT: op_mode=0x%x\n", - info->ht_operation_mode); + wiphy_dbg(hw->wiphy, " HT: op_mode=0x%x\n", + info->ht_operation_mode); } if (changed & BSS_CHANGED_BASIC_RATES) { - wiphy_debug(hw->wiphy, " BASIC_RATES: 0x%llx\n", - (unsigned long long) info->basic_rates); + wiphy_dbg(hw->wiphy, " BASIC_RATES: 0x%llx\n", + (unsigned long long) info->basic_rates); } if (changed & BSS_CHANGED_TXPOWER) - wiphy_debug(hw->wiphy, " TX Power: %d dBm\n", info->txpower); + wiphy_dbg(hw->wiphy, " TX Power: %d dBm\n", info->txpower); } static int mac80211_hwsim_sta_add(struct ieee80211_hw *hw, @@ -1813,11 +1813,11 @@ static int mac80211_hwsim_conf_tx( struct ieee80211_vif *vif, u16 queue, const struct ieee80211_tx_queue_params *params) { - wiphy_debug(hw->wiphy, - "%s (queue=%d txop=%d cw_min=%d cw_max=%d aifs=%d)\n", - __func__, queue, - params->txop, params->cw_min, - params->cw_max, params->aifs); + wiphy_dbg(hw->wiphy, + "%s (queue=%d txop=%d cw_min=%d cw_max=%d aifs=%d)\n", + __func__, queue, + params->txop, params->cw_min, + params->cw_max, params->aifs); return 0; } @@ -1981,7 +1981,7 @@ static void hw_scan_work(struct work_struct *work) .aborted = false, }; - wiphy_debug(hwsim->hw->wiphy, "hw scan complete\n"); + wiphy_dbg(hwsim->hw->wiphy, "hw scan complete\n"); ieee80211_scan_completed(hwsim->hw, &info); hwsim->hw_scan_request = NULL; hwsim->hw_scan_vif = NULL; @@ -1990,8 +1990,8 @@ static void hw_scan_work(struct work_struct *work) return; } - wiphy_debug(hwsim->hw->wiphy, "hw scan %d MHz\n", - req->channels[hwsim->scan_chan_idx]->center_freq); + wiphy_dbg(hwsim->hw->wiphy, "hw scan %d MHz\n", + req->channels[hwsim->scan_chan_idx]->center_freq); hwsim->tmp_chan = req->channels[hwsim->scan_chan_idx]; if (hwsim->tmp_chan->flags & (IEEE80211_CHAN_NO_IR | @@ -2060,7 +2060,7 @@ static int mac80211_hwsim_hw_scan(struct ieee80211_hw *hw, memset(hwsim->survey_data, 0, sizeof(hwsim->survey_data)); mutex_unlock(&hwsim->mutex); - wiphy_debug(hw->wiphy, "hwsim hw_scan request\n"); + wiphy_dbg(hw->wiphy, "hwsim hw_scan request\n"); ieee80211_queue_delayed_work(hwsim->hw, &hwsim->hw_scan, 0); @@ -2075,7 +2075,7 @@ static void mac80211_hwsim_cancel_hw_scan(struct ieee80211_hw *hw, .aborted = true, }; - wiphy_debug(hw->wiphy, "hwsim cancel_hw_scan\n"); + wiphy_dbg(hw->wiphy, "hwsim cancel_hw_scan\n"); cancel_delayed_work_sync(&hwsim->hw_scan); @@ -2096,11 +2096,11 @@ static void mac80211_hwsim_sw_scan(struct ieee80211_hw *hw, mutex_lock(&hwsim->mutex); if (hwsim->scanning) { - printk(KERN_DEBUG "two hwsim sw_scans detected!\n"); + pr_debug("two hwsim sw_scans detected!\n"); goto out; } - printk(KERN_DEBUG "hwsim sw_scan request, prepping stuff\n"); + pr_debug("hwsim sw_scan request, prepping stuff\n"); memcpy(hwsim->scan_addr, mac_addr, ETH_ALEN); hwsim->scanning = true; @@ -2117,7 +2117,7 @@ static void mac80211_hwsim_sw_scan_complete(struct ieee80211_hw *hw, mutex_lock(&hwsim->mutex); - printk(KERN_DEBUG "hwsim sw_scan_complete\n"); + pr_debug("hwsim sw_scan_complete\n"); hwsim->scanning = false; eth_zero_addr(hwsim->scan_addr); @@ -2131,7 +2131,7 @@ static void hw_roc_start(struct work_struct *work) mutex_lock(&hwsim->mutex); - wiphy_debug(hwsim->hw->wiphy, "hwsim ROC begins\n"); + wiphy_dbg(hwsim->hw->wiphy, "hwsim ROC begins\n"); hwsim->tmp_chan = hwsim->roc_chan; ieee80211_ready_on_channel(hwsim->hw); @@ -2151,7 +2151,7 @@ static void hw_roc_done(struct work_struct *work) hwsim->tmp_chan = NULL; mutex_unlock(&hwsim->mutex); - wiphy_debug(hwsim->hw->wiphy, "hwsim ROC expired\n"); + wiphy_dbg(hwsim->hw->wiphy, "hwsim ROC expired\n"); } static int mac80211_hwsim_roc(struct ieee80211_hw *hw, @@ -2172,8 +2172,8 @@ static int mac80211_hwsim_roc(struct ieee80211_hw *hw, hwsim->roc_duration = duration; mutex_unlock(&hwsim->mutex); - wiphy_debug(hw->wiphy, "hwsim ROC (%d MHz, %d ms)\n", - chan->center_freq, duration); + wiphy_dbg(hw->wiphy, "hwsim ROC (%d MHz, %d ms)\n", + chan->center_freq, duration); ieee80211_queue_delayed_work(hw, &hwsim->roc_start, HZ/50); return 0; @@ -2190,7 +2190,7 @@ static int mac80211_hwsim_croc(struct ieee80211_hw *hw) hwsim->tmp_chan = NULL; mutex_unlock(&hwsim->mutex); - wiphy_debug(hw->wiphy, "hwsim ROC canceled\n"); + wiphy_dbg(hw->wiphy, "hwsim ROC canceled\n"); return 0; } @@ -2199,20 +2199,20 @@ static int mac80211_hwsim_add_chanctx(struct ieee80211_hw *hw, struct ieee80211_chanctx_conf *ctx) { hwsim_set_chanctx_magic(ctx); - wiphy_debug(hw->wiphy, - "add channel context control: %d MHz/width: %d/cfreqs:%d/%d MHz\n", - ctx->def.chan->center_freq, ctx->def.width, - ctx->def.center_freq1, ctx->def.center_freq2); + wiphy_dbg(hw->wiphy, + "add channel context control: %d MHz/width: %d/cfreqs:%d/%d MHz\n", + ctx->def.chan->center_freq, ctx->def.width, + ctx->def.center_freq1, ctx->def.center_freq2); return 0; } static void mac80211_hwsim_remove_chanctx(struct ieee80211_hw *hw, struct ieee80211_chanctx_conf *ctx) { - wiphy_debug(hw->wiphy, - "remove channel context control: %d MHz/width: %d/cfreqs:%d/%d MHz\n", - ctx->def.chan->center_freq, ctx->def.width, - ctx->def.center_freq1, ctx->def.center_freq2); + wiphy_dbg(hw->wiphy, + "remove channel context control: %d MHz/width: %d/cfreqs:%d/%d MHz\n", + ctx->def.chan->center_freq, ctx->def.width, + ctx->def.center_freq1, ctx->def.center_freq2); hwsim_check_chanctx_magic(ctx); hwsim_clear_chanctx_magic(ctx); } @@ -2222,10 +2222,10 @@ static void mac80211_hwsim_change_chanctx(struct ieee80211_hw *hw, u32 changed) { hwsim_check_chanctx_magic(ctx); - wiphy_debug(hw->wiphy, - "change channel context control: %d MHz/width: %d/cfreqs:%d/%d MHz\n", - ctx->def.chan->center_freq, ctx->def.width, - ctx->def.center_freq1, ctx->def.center_freq2); + wiphy_dbg(hw->wiphy, + "change channel context control: %d MHz/width: %d/cfreqs:%d/%d MHz\n", + ctx->def.chan->center_freq, ctx->def.width, + ctx->def.center_freq1, ctx->def.center_freq2); } static int mac80211_hwsim_assign_vif_chanctx(struct ieee80211_hw *hw, @@ -2479,7 +2479,7 @@ static int mac80211_hwsim_new_radio(struct genl_info *info, ops = &mac80211_hwsim_mchan_ops; hw = ieee80211_alloc_hw_nm(sizeof(*data), ops, param->hwname); if (!hw) { - printk(KERN_DEBUG "mac80211_hwsim: ieee80211_alloc_hw failed\n"); + pr_debug("mac80211_hwsim: ieee80211_alloc_hw failed\n"); err = -ENOMEM; goto failed; } @@ -2507,7 +2507,7 @@ static int mac80211_hwsim_new_radio(struct genl_info *info, data->dev->driver = &mac80211_hwsim_driver.driver; err = device_bind_driver(data->dev); if (err != 0) { - printk(KERN_DEBUG "mac80211_hwsim: device_bind_driver failed (%d)\n", + pr_debug("mac80211_hwsim: device_bind_driver failed (%d)\n", err); goto failed_bind; } @@ -2698,12 +2698,12 @@ static int mac80211_hwsim_new_radio(struct genl_info *info, err = ieee80211_register_hw(hw); if (err < 0) { - printk(KERN_DEBUG "mac80211_hwsim: ieee80211_register_hw failed (%d)\n", + pr_debug("mac80211_hwsim: ieee80211_register_hw failed (%d)\n", err); goto failed_hw; } - wiphy_debug(hw->wiphy, "hwaddr %pM registered\n", hw->wiphy->perm_addr); + wiphy_dbg(hw->wiphy, "hwaddr %pM registered\n", hw->wiphy->perm_addr); if (param->reg_alpha2) { data->alpha2[0] = param->reg_alpha2[0]; @@ -3067,7 +3067,7 @@ static int hwsim_cloned_frame_received_nl(struct sk_buff *skb_2, return 0; err: - printk(KERN_DEBUG "mac80211_hwsim: error occurred in %s\n", __func__); + pr_debug("mac80211_hwsim: error occurred in %s\n", __func__); out: dev_kfree_skb(skb); return -EINVAL; @@ -3098,7 +3098,7 @@ static int hwsim_register_received_nl(struct sk_buff *skb_2, hwsim_register_wmediumd(net, info->snd_portid); - printk(KERN_DEBUG "mac80211_hwsim: received a REGISTER, " + pr_debug("mac80211_hwsim: received a REGISTER, " "switching to wmediumd mode with pid %d\n", info->snd_portid); return 0; @@ -3387,7 +3387,7 @@ static int __init hwsim_init_netlink(void) return 0; failure: - printk(KERN_DEBUG "mac80211_hwsim: error occurred in %s\n", __func__); + pr_debug("mac80211_hwsim: error occurred in %s\n", __func__); return -EINVAL; } @@ -3578,7 +3578,7 @@ module_init(init_mac80211_hwsim); static void __exit exit_mac80211_hwsim(void) { - printk(KERN_DEBUG "mac80211_hwsim: unregister radios\n"); + pr_debug("mac80211_hwsim: unregister radios\n"); hwsim_exit_netlink(); -- cgit v1.2.3 From a6bcda44843c6dfced0fb973e2607c2a98addfa9 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Tue, 19 Sep 2017 11:52:43 +0200 Subject: cfg80211: remove unused function ieee80211_data_from_8023() This function hasn't been used since the removal of iwmc3200wifi in 2012. It also appears to have a bug when qos=True, since then it'll copy uninitialized stack memory to the SKB. Just remove the function entirely. Reported-by: Jouni Malinen Signed-off-by: Johannes Berg --- Documentation/driver-api/80211/cfg80211.rst | 3 - include/net/cfg80211.h | 13 ---- net/wireless/util.c | 115 ---------------------------- 3 files changed, 131 deletions(-) diff --git a/Documentation/driver-api/80211/cfg80211.rst b/Documentation/driver-api/80211/cfg80211.rst index 8ffac57e1f5b..eeab91b59457 100644 --- a/Documentation/driver-api/80211/cfg80211.rst +++ b/Documentation/driver-api/80211/cfg80211.rst @@ -299,9 +299,6 @@ Data path helpers .. kernel-doc:: include/net/cfg80211.h :functions: ieee80211_data_to_8023 -.. kernel-doc:: include/net/cfg80211.h - :functions: ieee80211_data_from_8023 - .. kernel-doc:: include/net/cfg80211.h :functions: ieee80211_amsdu_to_8023s diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index aa9d993e519a..cc1996081463 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -4346,19 +4346,6 @@ static inline int ieee80211_data_to_8023(struct sk_buff *skb, const u8 *addr, return ieee80211_data_to_8023_exthdr(skb, NULL, addr, iftype); } -/** - * ieee80211_data_from_8023 - convert an 802.3 frame to 802.11 - * @skb: the 802.3 frame - * @addr: the device MAC address - * @iftype: the virtual interface type - * @bssid: the network bssid (used only for iftype STATION and ADHOC) - * @qos: build 802.11 QoS data frame - * Return: 0 on success, or a negative error code. - */ -int ieee80211_data_from_8023(struct sk_buff *skb, const u8 *addr, - enum nl80211_iftype iftype, const u8 *bssid, - bool qos); - /** * ieee80211_amsdu_to_8023s - decode an IEEE 802.11n A-MSDU frame * diff --git a/net/wireless/util.c b/net/wireless/util.c index 4aab793c2f00..7dcdf67cba29 100644 --- a/net/wireless/util.c +++ b/net/wireless/util.c @@ -529,121 +529,6 @@ int ieee80211_data_to_8023_exthdr(struct sk_buff *skb, struct ethhdr *ehdr, } EXPORT_SYMBOL(ieee80211_data_to_8023_exthdr); -int ieee80211_data_from_8023(struct sk_buff *skb, const u8 *addr, - enum nl80211_iftype iftype, - const u8 *bssid, bool qos) -{ - struct ieee80211_hdr hdr; - u16 hdrlen, ethertype; - __le16 fc; - const u8 *encaps_data; - int encaps_len, skip_header_bytes; - int nh_pos, h_pos; - int head_need; - - if (unlikely(skb->len < ETH_HLEN)) - return -EINVAL; - - nh_pos = skb_network_header(skb) - skb->data; - h_pos = skb_transport_header(skb) - skb->data; - - /* convert Ethernet header to proper 802.11 header (based on - * operation mode) */ - ethertype = (skb->data[12] << 8) | skb->data[13]; - fc = cpu_to_le16(IEEE80211_FTYPE_DATA | IEEE80211_STYPE_DATA); - - switch (iftype) { - case NL80211_IFTYPE_AP: - case NL80211_IFTYPE_AP_VLAN: - case NL80211_IFTYPE_P2P_GO: - fc |= cpu_to_le16(IEEE80211_FCTL_FROMDS); - /* DA BSSID SA */ - memcpy(hdr.addr1, skb->data, ETH_ALEN); - memcpy(hdr.addr2, addr, ETH_ALEN); - memcpy(hdr.addr3, skb->data + ETH_ALEN, ETH_ALEN); - hdrlen = 24; - break; - case NL80211_IFTYPE_STATION: - case NL80211_IFTYPE_P2P_CLIENT: - fc |= cpu_to_le16(IEEE80211_FCTL_TODS); - /* BSSID SA DA */ - memcpy(hdr.addr1, bssid, ETH_ALEN); - memcpy(hdr.addr2, skb->data + ETH_ALEN, ETH_ALEN); - memcpy(hdr.addr3, skb->data, ETH_ALEN); - hdrlen = 24; - break; - case NL80211_IFTYPE_OCB: - case NL80211_IFTYPE_ADHOC: - /* DA SA BSSID */ - memcpy(hdr.addr1, skb->data, ETH_ALEN); - memcpy(hdr.addr2, skb->data + ETH_ALEN, ETH_ALEN); - memcpy(hdr.addr3, bssid, ETH_ALEN); - hdrlen = 24; - break; - default: - return -EOPNOTSUPP; - } - - if (qos) { - fc |= cpu_to_le16(IEEE80211_STYPE_QOS_DATA); - hdrlen += 2; - } - - hdr.frame_control = fc; - hdr.duration_id = 0; - hdr.seq_ctrl = 0; - - skip_header_bytes = ETH_HLEN; - if (ethertype == ETH_P_AARP || ethertype == ETH_P_IPX) { - encaps_data = bridge_tunnel_header; - encaps_len = sizeof(bridge_tunnel_header); - skip_header_bytes -= 2; - } else if (ethertype >= ETH_P_802_3_MIN) { - encaps_data = rfc1042_header; - encaps_len = sizeof(rfc1042_header); - skip_header_bytes -= 2; - } else { - encaps_data = NULL; - encaps_len = 0; - } - - skb_pull(skb, skip_header_bytes); - nh_pos -= skip_header_bytes; - h_pos -= skip_header_bytes; - - head_need = hdrlen + encaps_len - skb_headroom(skb); - - if (head_need > 0 || skb_cloned(skb)) { - head_need = max(head_need, 0); - if (head_need) - skb_orphan(skb); - - if (pskb_expand_head(skb, head_need, 0, GFP_ATOMIC)) - return -ENOMEM; - } - - if (encaps_data) { - memcpy(skb_push(skb, encaps_len), encaps_data, encaps_len); - nh_pos += encaps_len; - h_pos += encaps_len; - } - - memcpy(skb_push(skb, hdrlen), &hdr, hdrlen); - - nh_pos += hdrlen; - h_pos += hdrlen; - - /* Update skb pointers to various headers since this modified frame - * is going to go through Linux networking code that may potentially - * need things like pointer to IP header. */ - skb_reset_mac_header(skb); - skb_set_network_header(skb, nh_pos); - skb_set_transport_header(skb, h_pos); - - return 0; -} -EXPORT_SYMBOL(ieee80211_data_from_8023); - static void __frame_add_frag(struct sk_buff *skb, struct page *page, void *ptr, int len, int size) -- cgit v1.2.3 From 65026002d69de006e273749bb799d3b01b757eb0 Mon Sep 17 00:00:00 2001 From: Emmanuel Grumbach Date: Fri, 18 Aug 2017 15:31:41 +0300 Subject: nl80211: add an option to allow MFP without requiring it The user space can now allow the kernel to associate to an AP that requires MFP or that doesn't have MFP enabled in the same NL80211_CMD_CONNECT command, by using a new NL80211_MFP_OPTIONAL flag. The driver / firmware will decide whether to use it or not. Include a feature bit to advertise support for NL80211_MFP_OPTIONAL. This allows new user space to run on old kernels and know that it cannot use the new attribute if it isn't supported. Signed-off-by: Emmanuel Grumbach Signed-off-by: Luca Coelho Signed-off-by: Johannes Berg --- include/uapi/linux/nl80211.h | 13 +++++++++++-- net/wireless/nl80211.c | 8 +++++++- 2 files changed, 18 insertions(+), 3 deletions(-) diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index 76404d8a8863..59ba6ca66a0d 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -1407,8 +1407,12 @@ enum nl80211_commands { * * @NL80211_ATTR_USE_MFP: Whether management frame protection (IEEE 802.11w) is * used for the association (&enum nl80211_mfp, represented as a u32); - * this attribute can be used - * with %NL80211_CMD_ASSOCIATE and %NL80211_CMD_CONNECT requests + * this attribute can be used with %NL80211_CMD_ASSOCIATE and + * %NL80211_CMD_CONNECT requests. %NL80211_MFP_OPTIONAL is not allowed for + * %NL80211_CMD_ASSOCIATE since user space SME is expected and hence, it + * must have decided whether to use management frame protection or not. + * Setting %NL80211_MFP_OPTIONAL with a %NL80211_CMD_CONNECT request will + * let the driver (or the firmware) decide whether to use MFP or not. * * @NL80211_ATTR_STA_FLAGS2: Attribute containing a * &struct nl80211_sta_flag_update. @@ -3947,10 +3951,12 @@ enum nl80211_key_type { * enum nl80211_mfp - Management frame protection state * @NL80211_MFP_NO: Management frame protection not used * @NL80211_MFP_REQUIRED: Management frame protection required + * @NL80211_MFP_OPTIONAL: Management frame protection is optional */ enum nl80211_mfp { NL80211_MFP_NO, NL80211_MFP_REQUIRED, + NL80211_MFP_OPTIONAL, }; enum nl80211_wpa_versions { @@ -4923,6 +4929,8 @@ enum nl80211_feature_flags { * the first probe request in each channel at rate of at least 5.5Mbps. * @NL80211_EXT_FEATURE_OCE_PROBE_REQ_DEFERRAL_SUPPRESSION: Driver supports * probe request tx deferral and suppression + * @NL80211_EXT_FEATURE_MFP_OPTIONAL: Driver supports the %NL80211_MFP_OPTIONAL + * value in %NL80211_ATTR_USE_MFP. * * @NUM_NL80211_EXT_FEATURES: number of extended features. * @MAX_NL80211_EXT_FEATURES: highest extended feature index. @@ -4949,6 +4957,7 @@ enum nl80211_ext_feature_index { NL80211_EXT_FEATURE_ACCEPT_BCAST_PROBE_RESP, NL80211_EXT_FEATURE_OCE_PROBE_REQ_HIGH_TX_RATE, NL80211_EXT_FEATURE_OCE_PROBE_REQ_DEFERRAL_SUPPRESSION, + NL80211_EXT_FEATURE_MFP_OPTIONAL, /* add new features before the definition below */ NUM_NL80211_EXT_FEATURES, diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 2e6f5f4065f9..1e39ba3cfd06 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -8952,8 +8952,14 @@ static int nl80211_connect(struct sk_buff *skb, struct genl_info *info) if (info->attrs[NL80211_ATTR_USE_MFP]) { connect.mfp = nla_get_u32(info->attrs[NL80211_ATTR_USE_MFP]); + if (connect.mfp == NL80211_MFP_OPTIONAL && + !wiphy_ext_feature_isset(&rdev->wiphy, + NL80211_EXT_FEATURE_MFP_OPTIONAL)) + return -EOPNOTSUPP; + if (connect.mfp != NL80211_MFP_REQUIRED && - connect.mfp != NL80211_MFP_NO) + connect.mfp != NL80211_MFP_NO && + connect.mfp != NL80211_MFP_OPTIONAL) return -EINVAL; } else { connect.mfp = NL80211_MFP_NO; -- cgit v1.2.3 From d405fd8cc807c045b23bc2df4a5ab6b85df614f3 Mon Sep 17 00:00:00 2001 From: Gregory Greenman Date: Sat, 5 Aug 2017 11:44:36 +0300 Subject: mac80211: recalculate some sta parameters after insertion Sometimes a station is added already in ASSOC state. For example, in AP mode, when a client station didn't get assoc resp and sends an assoc req again. If a station is inserted when its state is ASSOC or higher, the min chandef and allow_p2p_go_ps should be recalculated again after the insertion. Before this patch the recalculation happened only in sta_info_move_state which occurs before the insertion of the sta and thus even though it calls ieee80211_recalc_min_chandef/_p2p_go_ps_allowed functions, since sdata->local->sta_list is still empty at this point, it doesn't do anything. Signed-off-by: Gregory Greenman Signed-off-by: Luca Coelho Signed-off-by: Johannes Berg --- net/mac80211/sta_info.c | 57 +++++++++++++++++++++++++++---------------------- 1 file changed, 32 insertions(+), 25 deletions(-) diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c index 69615016d5bf..ffcd25c4908c 100644 --- a/net/mac80211/sta_info.c +++ b/net/mac80211/sta_info.c @@ -515,6 +515,31 @@ static int sta_info_insert_drv_state(struct ieee80211_local *local, return err; } +static void +ieee80211_recalc_p2p_go_ps_allowed(struct ieee80211_sub_if_data *sdata) +{ + struct ieee80211_local *local = sdata->local; + bool allow_p2p_go_ps = sdata->vif.p2p; + struct sta_info *sta; + + rcu_read_lock(); + list_for_each_entry_rcu(sta, &local->sta_list, list) { + if (sdata != sta->sdata || + !test_sta_flag(sta, WLAN_STA_ASSOC)) + continue; + if (!sta->sta.support_p2p_ps) { + allow_p2p_go_ps = false; + break; + } + } + rcu_read_unlock(); + + if (allow_p2p_go_ps != sdata->vif.bss_conf.allow_p2p_go_ps) { + sdata->vif.bss_conf.allow_p2p_go_ps = allow_p2p_go_ps; + ieee80211_bss_info_change_notify(sdata, BSS_CHANGED_P2P_PS); + } +} + /* * should be called with sta_mtx locked * this function replaces the mutex lock @@ -561,6 +586,13 @@ static int sta_info_insert_finish(struct sta_info *sta) __acquires(RCU) goto out_remove; set_sta_flag(sta, WLAN_STA_INSERTED); + + if (sta->sta_state >= IEEE80211_STA_ASSOC) { + ieee80211_recalc_min_chandef(sta->sdata); + if (!sta->sta.support_p2p_ps) + ieee80211_recalc_p2p_go_ps_allowed(sta->sdata); + } + /* accept BA sessions now */ clear_sta_flag(sta, WLAN_STA_BLOCK_BA); @@ -1788,31 +1820,6 @@ void ieee80211_sta_set_buffered(struct ieee80211_sta *pubsta, } EXPORT_SYMBOL(ieee80211_sta_set_buffered); -static void -ieee80211_recalc_p2p_go_ps_allowed(struct ieee80211_sub_if_data *sdata) -{ - struct ieee80211_local *local = sdata->local; - bool allow_p2p_go_ps = sdata->vif.p2p; - struct sta_info *sta; - - rcu_read_lock(); - list_for_each_entry_rcu(sta, &local->sta_list, list) { - if (sdata != sta->sdata || - !test_sta_flag(sta, WLAN_STA_ASSOC)) - continue; - if (!sta->sta.support_p2p_ps) { - allow_p2p_go_ps = false; - break; - } - } - rcu_read_unlock(); - - if (allow_p2p_go_ps != sdata->vif.bss_conf.allow_p2p_go_ps) { - sdata->vif.bss_conf.allow_p2p_go_ps = allow_p2p_go_ps; - ieee80211_bss_info_change_notify(sdata, BSS_CHANGED_P2P_PS); - } -} - int sta_info_move_state(struct sta_info *sta, enum ieee80211_sta_state new_state) { -- cgit v1.2.3 From 1bd773c077deeeb2d9ced1fdb6d846169b8e7e4a Mon Sep 17 00:00:00 2001 From: Richard Schütz Date: Thu, 7 Sep 2017 17:47:43 +0200 Subject: wireless: set correct mandatory rate flags MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit According to IEEE Std 802.11-2016 (16.2.3.4 Long PHY SIGNAL field) all of the following rates are mandatory for a HR/DSSS PHY: 1 Mb/s, 2 Mb/s, 5.5 Mb/s and 11 Mb/s. Set IEEE80211_RATE_MANDATORY_B flag for all of these instead of just 1 Mb/s to correctly reflect this. Signed-off-by: Richard Schütz [johannes: use switch statement] Signed-off-by: Johannes Berg --- net/wireless/util.c | 30 ++++++++++++++---------------- 1 file changed, 14 insertions(+), 16 deletions(-) diff --git a/net/wireless/util.c b/net/wireless/util.c index 7dcdf67cba29..7a1fcc6ee060 100644 --- a/net/wireless/util.c +++ b/net/wireless/util.c @@ -157,32 +157,30 @@ static void set_mandatory_flags_band(struct ieee80211_supported_band *sband) case NL80211_BAND_2GHZ: want = 7; for (i = 0; i < sband->n_bitrates; i++) { - if (sband->bitrates[i].bitrate == 10) { + switch (sband->bitrates[i].bitrate) { + case 10: + case 20: + case 55: + case 110: sband->bitrates[i].flags |= IEEE80211_RATE_MANDATORY_B | IEEE80211_RATE_MANDATORY_G; want--; - } - - if (sband->bitrates[i].bitrate == 20 || - sband->bitrates[i].bitrate == 55 || - sband->bitrates[i].bitrate == 110 || - sband->bitrates[i].bitrate == 60 || - sband->bitrates[i].bitrate == 120 || - sband->bitrates[i].bitrate == 240) { + break; + case 60: + case 120: + case 240: sband->bitrates[i].flags |= IEEE80211_RATE_MANDATORY_G; want--; - } - - if (sband->bitrates[i].bitrate != 10 && - sband->bitrates[i].bitrate != 20 && - sband->bitrates[i].bitrate != 55 && - sband->bitrates[i].bitrate != 110) + /* fall through */ + default: sband->bitrates[i].flags |= IEEE80211_RATE_ERP_G; + break; + } } - WARN_ON(want != 0 && want != 3 && want != 6); + WARN_ON(want != 0 && want != 3); break; case NL80211_BAND_60GHZ: /* check for mandatory HT MCS 1..4 */ -- cgit v1.2.3 From 4f88836d4f806d212361eb426bc8a6ce897dbef9 Mon Sep 17 00:00:00 2001 From: Allen Pais Date: Thu, 21 Sep 2017 22:34:22 +0530 Subject: drivers: net: de4x: use setup_timer() helper. Use setup_timer function instead of initializing timer with the function and data fields. Signed-off-by: Allen Pais Signed-off-by: David S. Miller --- drivers/net/ethernet/dec/tulip/de4x5.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/dec/tulip/de4x5.c b/drivers/net/ethernet/dec/tulip/de4x5.c index 0affee9c8aa2..299812e92db7 100644 --- a/drivers/net/ethernet/dec/tulip/de4x5.c +++ b/drivers/net/ethernet/dec/tulip/de4x5.c @@ -1147,9 +1147,8 @@ de4x5_hw_init(struct net_device *dev, u_long iobase, struct device *gendev) lp->timeout = -1; lp->gendev = gendev; spin_lock_init(&lp->lock); - init_timer(&lp->timer); - lp->timer.function = (void (*)(unsigned long))de4x5_ast; - lp->timer.data = (unsigned long)dev; + setup_timer(&lp->timer, (void (*)(unsigned long))de4x5_ast, + (unsigned long)dev); de4x5_parse_params(dev); /* -- cgit v1.2.3 From cdc91b31b81abaa2cf491fd5e9007f4bcd45bc68 Mon Sep 17 00:00:00 2001 From: Allen Pais Date: Thu, 21 Sep 2017 22:34:23 +0530 Subject: drivers: net: b44: use setup_timer() helper. Use setup_timer function instead of initializing timer with the function and data fields. Signed-off-by: Allen Pais Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/b44.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/net/ethernet/broadcom/b44.c b/drivers/net/ethernet/broadcom/b44.c index a1125d10c825..42e44fc03a18 100644 --- a/drivers/net/ethernet/broadcom/b44.c +++ b/drivers/net/ethernet/broadcom/b44.c @@ -1474,10 +1474,8 @@ static int b44_open(struct net_device *dev) goto out; } - init_timer(&bp->timer); + setup_timer(&bp->timer, b44_timer, (unsigned long)bp); bp->timer.expires = jiffies + HZ; - bp->timer.data = (unsigned long) bp; - bp->timer.function = b44_timer; add_timer(&bp->timer); b44_enable_ints(bp); -- cgit v1.2.3 From 334e4a7d5505f59a741b0549f41082e29a914439 Mon Sep 17 00:00:00 2001 From: Allen Pais Date: Thu, 21 Sep 2017 22:34:24 +0530 Subject: drivers: net: pcnet32: use setup_timer() helper. Use setup_timer function instead of initializing timer with the function and data fields. Signed-off-by: Allen Pais Signed-off-by: David S. Miller --- drivers/net/ethernet/amd/pcnet32.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/amd/pcnet32.c b/drivers/net/ethernet/amd/pcnet32.c index 7f60d17819ce..e46153654016 100644 --- a/drivers/net/ethernet/amd/pcnet32.c +++ b/drivers/net/ethernet/amd/pcnet32.c @@ -1970,9 +1970,8 @@ pcnet32_probe1(unsigned long ioaddr, int shared, struct pci_dev *pdev) lp->options |= PCNET32_PORT_MII; } - init_timer(&lp->watchdog_timer); - lp->watchdog_timer.data = (unsigned long)dev; - lp->watchdog_timer.function = (void *)&pcnet32_watchdog; + setup_timer(&lp->watchdog_timer, (void *)&pcnet32_watchdog, + (unsigned long)dev); /* The PCNET32-specific entries in the device structure. */ dev->netdev_ops = &pcnet32_netdev_ops; -- cgit v1.2.3 From 27dd0852644966c43079d3f812ec37e3e3fc864e Mon Sep 17 00:00:00 2001 From: Allen Pais Date: Thu, 21 Sep 2017 22:34:25 +0530 Subject: drivers: net: brcm80211: use setup_timer() helper. Use setup_timer function instead of initializing timer with the function and data fields. Signed-off-by: Allen Pais Signed-off-by: David S. Miller --- drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c index aaed4ab503ad..ab3f22353154 100644 --- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c +++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c @@ -3260,9 +3260,8 @@ static void brcmf_init_escan(struct brcmf_cfg80211_info *cfg) brcmf_cfg80211_escan_handler); cfg->escan_info.escan_state = WL_ESCAN_STATE_IDLE; /* Init scan_timeout timer */ - init_timer(&cfg->escan_timeout); - cfg->escan_timeout.data = (unsigned long) cfg; - cfg->escan_timeout.function = brcmf_escan_timeout; + setup_timer(&cfg->escan_timeout, brcmf_escan_timeout, + (unsigned long)cfg); INIT_WORK(&cfg->escan_timeout_work, brcmf_cfg80211_escan_timeout_worker); } -- cgit v1.2.3 From ba4cc08793a58a97cffc2769acaa53fff4433332 Mon Sep 17 00:00:00 2001 From: Allen Pais Date: Thu, 21 Sep 2017 22:34:26 +0530 Subject: drivers : net: niu: use setup_timer() helper. Use setup_timer function instead of initializing timer with the function and data fields. Signed-off-by: Allen Pais Signed-off-by: David S. Miller --- drivers/net/ethernet/sun/niu.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/net/ethernet/sun/niu.c b/drivers/net/ethernet/sun/niu.c index 6a4e8e1bbd90..bde19b307d0d 100644 --- a/drivers/net/ethernet/sun/niu.c +++ b/drivers/net/ethernet/sun/niu.c @@ -6123,10 +6123,8 @@ static int niu_open(struct net_device *dev) err = niu_init_hw(np); if (!err) { - init_timer(&np->timer); + setup_timer(&np->timer, niu_timer, (unsigned long)np); np->timer.expires = jiffies + HZ; - np->timer.data = (unsigned long) np; - np->timer.function = niu_timer; err = niu_enable_interrupts(np, 1); if (err) -- cgit v1.2.3 From c3bd81cccbaa89da70047f1dcc73443f889735a8 Mon Sep 17 00:00:00 2001 From: Allen Pais Date: Thu, 21 Sep 2017 22:34:27 +0530 Subject: drivers: net: bcm63xx: use setup_timer() helper. Use setup_timer function instead of initializing timer with the function and data fields. Signed-off-by: Allen Pais Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bcm63xx_enet.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bcm63xx_enet.c b/drivers/net/ethernet/broadcom/bcm63xx_enet.c index 4f3845a58126..f8bbbbfca06e 100644 --- a/drivers/net/ethernet/broadcom/bcm63xx_enet.c +++ b/drivers/net/ethernet/broadcom/bcm63xx_enet.c @@ -1857,9 +1857,8 @@ static int bcm_enet_probe(struct platform_device *pdev) spin_lock_init(&priv->rx_lock); /* init rx timeout (used for oom) */ - init_timer(&priv->rx_timeout); - priv->rx_timeout.function = bcm_enet_refill_rx_timer; - priv->rx_timeout.data = (unsigned long)dev; + setup_timer(&priv->rx_timeout, bcm_enet_refill_rx_timer, + (unsigned long)dev); /* init the mib update lock&work */ mutex_init(&priv->mib_update_lock); -- cgit v1.2.3 From b0b404bd9ba28edb795b999e3c35596e236cc5dd Mon Sep 17 00:00:00 2001 From: Allen Pais Date: Thu, 21 Sep 2017 22:34:28 +0530 Subject: drivers: net: declance: use setup_timer() helper. Use setup_timer function instead of initializing timer with the function and data fields. Signed-off-by: Allen Pais Signed-off-by: David S. Miller --- drivers/net/ethernet/amd/declance.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/amd/declance.c b/drivers/net/ethernet/amd/declance.c index 82cc81385033..9bdf81c2cd00 100644 --- a/drivers/net/ethernet/amd/declance.c +++ b/drivers/net/ethernet/amd/declance.c @@ -1246,9 +1246,9 @@ static int dec_lance_probe(struct device *bdev, const int type) * can occur from interrupts (ex. IPv6). So we * use a timer to try again later when necessary. -DaveM */ - init_timer(&lp->multicast_timer); - lp->multicast_timer.data = (unsigned long) dev; - lp->multicast_timer.function = lance_set_multicast_retry; + setup_timer(&lp->multicast_timer, lance_set_multicast_retry, + (unsigned long)dev); + ret = register_netdev(dev); if (ret) { -- cgit v1.2.3 From aa0c72859972f209d7d07654037bd974c11f93ed Mon Sep 17 00:00:00 2001 From: Allen Pais Date: Thu, 21 Sep 2017 22:34:29 +0530 Subject: drivers: net: am79c961: use setup_timer() helper. Use setup_timer function instead of initializing timer with the function and data fields. Signed-off-by: Allen Pais Signed-off-by: David S. Miller --- drivers/net/ethernet/amd/am79c961a.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/net/ethernet/amd/am79c961a.c b/drivers/net/ethernet/amd/am79c961a.c index b11e910850f7..0612dbee00d2 100644 --- a/drivers/net/ethernet/amd/am79c961a.c +++ b/drivers/net/ethernet/amd/am79c961a.c @@ -728,9 +728,7 @@ static int am79c961_probe(struct platform_device *pdev) am79c961_banner(); spin_lock_init(&priv->chip_lock); - init_timer(&priv->timer); - priv->timer.data = (unsigned long)dev; - priv->timer.function = am79c961_timer; + setup_timer(&priv->timer, am79c961_timer, (unsigned long)dev); if (am79c961_hw_init(dev)) goto release; -- cgit v1.2.3 From 07b6901f61813aa547c5a25e118f977022fec9eb Mon Sep 17 00:00:00 2001 From: Allen Pais Date: Thu, 21 Sep 2017 22:34:30 +0530 Subject: drivers: net: et131x: use setup_timer() helper. Use setup_timer function instead of initializing timer with the function and data fields. Signed-off-by: Allen Pais Signed-off-by: David S. Miller --- drivers/net/ethernet/agere/et131x.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/agere/et131x.c b/drivers/net/ethernet/agere/et131x.c index 54eff90e2f02..658e92f79d36 100644 --- a/drivers/net/ethernet/agere/et131x.c +++ b/drivers/net/ethernet/agere/et131x.c @@ -3624,11 +3624,10 @@ static int et131x_open(struct net_device *netdev) int result; /* Start the timer to track NIC errors */ - init_timer(&adapter->error_timer); + setup_timer(&adapter->error_timer, et131x_error_timer_handler, + (unsigned long)adapter); adapter->error_timer.expires = jiffies + msecs_to_jiffies(TX_ERROR_PERIOD); - adapter->error_timer.function = et131x_error_timer_handler; - adapter->error_timer.data = (unsigned long)adapter; add_timer(&adapter->error_timer); result = request_irq(irq, et131x_isr, -- cgit v1.2.3 From 1e153e554fc8a3744d1fa8a36d4cb3b4cfbdc3da Mon Sep 17 00:00:00 2001 From: Allen Pais Date: Thu, 21 Sep 2017 22:34:31 +0530 Subject: drivers: net: appletalk: cops: use setup_timer() helper. Use setup_timer function instead of initializing timer with the function and data fields. Signed-off-by: Allen Pais Signed-off-by: David S. Miller --- drivers/net/appletalk/cops.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/net/appletalk/cops.c b/drivers/net/appletalk/cops.c index 486e1e6997fc..caf04284711a 100644 --- a/drivers/net/appletalk/cops.c +++ b/drivers/net/appletalk/cops.c @@ -424,9 +424,7 @@ static int cops_open(struct net_device *dev) */ if(lp->board==TANGENT) /* Poll 20 times per second */ { - init_timer(&cops_timer); - cops_timer.function = cops_poll; - cops_timer.data = (unsigned long)dev; + setup_timer(&cops_timer, cops_poll, (unsigned long)dev); cops_timer.expires = jiffies + HZ/20; add_timer(&cops_timer); } -- cgit v1.2.3 From 531f3ce953d4c74989eb1ece7ce37a3717532a13 Mon Sep 17 00:00:00 2001 From: Allen Pais Date: Thu, 21 Sep 2017 22:34:32 +0530 Subject: drivers: net: rsi_91x: use setup_timer() helper. Use setup_timer function instead of initializing timer with the function and data fields. Signed-off-by: Allen Pais Signed-off-by: David S. Miller --- drivers/net/wireless/rsi/rsi_91x_hal.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/net/wireless/rsi/rsi_91x_hal.c b/drivers/net/wireless/rsi/rsi_91x_hal.c index 070dfd68bb83..7ad286d6e9a7 100644 --- a/drivers/net/wireless/rsi/rsi_91x_hal.c +++ b/drivers/net/wireless/rsi/rsi_91x_hal.c @@ -411,9 +411,8 @@ static void bl_cmd_timeout(unsigned long priv) static int bl_start_cmd_timer(struct rsi_hw *adapter, u32 timeout) { - init_timer(&adapter->bl_cmd_timer); - adapter->bl_cmd_timer.data = (unsigned long)adapter; - adapter->bl_cmd_timer.function = (void *)&bl_cmd_timeout; + setup_timer(&adapter->bl_cmd_timer, (void *)&bl_cmd_timeout, + (unsigned long)adapter); adapter->bl_cmd_timer.expires = (msecs_to_jiffies(timeout) + jiffies); adapter->blcmd_timer_expired = false; -- cgit v1.2.3 From e7bbad4487ae4005904d00a0a04622f07fadbc5b Mon Sep 17 00:00:00 2001 From: Allen Pais Date: Thu, 21 Sep 2017 22:34:33 +0530 Subject: drivers: net: atp: use setup_timer() helper. Use setup_timer function instead of initializing timer with the function and data fields. Signed-off-by: Allen Pais Signed-off-by: David S. Miller --- drivers/net/ethernet/realtek/atp.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/net/ethernet/realtek/atp.c b/drivers/net/ethernet/realtek/atp.c index bed34684994f..bdc3833fab7e 100644 --- a/drivers/net/ethernet/realtek/atp.c +++ b/drivers/net/ethernet/realtek/atp.c @@ -438,10 +438,8 @@ static int net_open(struct net_device *dev) hardware_init(dev); - init_timer(&lp->timer); + setup_timer(&lp->timer, atp_timed_checker, (unsigned long)dev); lp->timer.expires = jiffies + TIMED_CHECKER; - lp->timer.data = (unsigned long)dev; - lp->timer.function = atp_timed_checker; /* timer handler */ add_timer(&lp->timer); netif_start_queue(dev); -- cgit v1.2.3 From f40c9d5aea0fdf5454464bc477898cc981ff9715 Mon Sep 17 00:00:00 2001 From: Allen Pais Date: Thu, 21 Sep 2017 22:34:34 +0530 Subject: drivers: net: ns83820: use setup_timer() helper. Use setup_timer function instead of initializing timer with the function and data fields. Signed-off-by: Allen Pais Signed-off-by: David S. Miller --- drivers/net/ethernet/natsemi/ns83820.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/net/ethernet/natsemi/ns83820.c b/drivers/net/ethernet/natsemi/ns83820.c index 729095db3e08..99d3c7884a4a 100644 --- a/drivers/net/ethernet/natsemi/ns83820.c +++ b/drivers/net/ethernet/natsemi/ns83820.c @@ -1652,9 +1652,7 @@ static int ns83820_open(struct net_device *ndev) writel(0, dev->base + TXDP_HI); writel(desc, dev->base + TXDP); - init_timer(&dev->tx_watchdog); - dev->tx_watchdog.data = (unsigned long)ndev; - dev->tx_watchdog.function = ns83820_tx_watch; + setup_timer(&dev->tx_watchdog, ns83820_tx_watch, (unsigned long)ndev); mod_timer(&dev->tx_watchdog, jiffies + 2*HZ); netif_start_queue(ndev); /* FIXME: wait for phy to come up */ -- cgit v1.2.3 From 82a8c6745169ec932473658c28679069a7ded95a Mon Sep 17 00:00:00 2001 From: Allen Pais Date: Thu, 21 Sep 2017 22:34:35 +0530 Subject: drivers: net: ixgb: use setup_timer() helper. Use setup_timer function instead of initializing timer with the function and data fields. Signed-off-by: Allen Pais Signed-off-by: David S. Miller --- drivers/net/ethernet/intel/ixgb/ixgb_main.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/intel/ixgb/ixgb_main.c b/drivers/net/ethernet/intel/ixgb/ixgb_main.c index 5a713199653c..1e6ec2277d54 100644 --- a/drivers/net/ethernet/intel/ixgb/ixgb_main.c +++ b/drivers/net/ethernet/intel/ixgb/ixgb_main.c @@ -508,9 +508,8 @@ ixgb_probe(struct pci_dev *pdev, const struct pci_device_id *ent) adapter->part_num = ixgb_get_ee_pba_number(&adapter->hw); - init_timer(&adapter->watchdog_timer); - adapter->watchdog_timer.function = ixgb_watchdog; - adapter->watchdog_timer.data = (unsigned long)adapter; + setup_timer(&adapter->watchdog_timer, ixgb_watchdog, + (unsigned long)adapter); INIT_WORK(&adapter->tx_timeout_task, ixgb_tx_timeout_task); -- cgit v1.2.3 From 88e8aa172596d2eda971d3553d52a8e877805e90 Mon Sep 17 00:00:00 2001 From: Allen Pais Date: Thu, 21 Sep 2017 22:34:36 +0530 Subject: drivers: net: sundance: use setup_timer() helper. Use setup_timer function instead of initializing timer with the function and data fields. Signed-off-by: Allen Pais Signed-off-by: David S. Miller --- drivers/net/ethernet/dlink/sundance.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/net/ethernet/dlink/sundance.c b/drivers/net/ethernet/dlink/sundance.c index 2704bcf023be..6ca9e981ad57 100644 --- a/drivers/net/ethernet/dlink/sundance.c +++ b/drivers/net/ethernet/dlink/sundance.c @@ -913,10 +913,8 @@ static int netdev_open(struct net_device *dev) ioread16(ioaddr + MACCtrl1), ioread16(ioaddr + MACCtrl0)); /* Set the timer to check for link beat. */ - init_timer(&np->timer); + setup_timer(&np->timer, netdev_timer, (unsigned long)dev); np->timer.expires = jiffies + 3*HZ; - np->timer.data = (unsigned long)dev; - np->timer.function = netdev_timer; /* timer handler */ add_timer(&np->timer); /* Enable interrupts by setting the interrupt mask. */ -- cgit v1.2.3 From 4896ad68ec3803d01a0d9fead64451377ea1ec5f Mon Sep 17 00:00:00 2001 From: Allen Pais Date: Thu, 21 Sep 2017 22:34:37 +0530 Subject: drivers: net: tg3: use setup_timer() helper. Use setup_timer function instead of initializing timer with the function and data fields. Signed-off-by: Allen Pais Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/tg3.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/net/ethernet/broadcom/tg3.c b/drivers/net/ethernet/broadcom/tg3.c index 656e6af70f0a..d8d5f207c759 100644 --- a/drivers/net/ethernet/broadcom/tg3.c +++ b/drivers/net/ethernet/broadcom/tg3.c @@ -11087,9 +11087,7 @@ static void tg3_timer_init(struct tg3 *tp) tp->asf_multiplier = (HZ / tp->timer_offset) * TG3_FW_UPDATE_FREQ_SEC; - init_timer(&tp->timer); - tp->timer.data = (unsigned long) tp; - tp->timer.function = tg3_timer; + setup_timer(&tp->timer, tg3_timer, (unsigned long)tp); } static void tg3_timer_start(struct tg3 *tp) -- cgit v1.2.3 From f347bd6b5f6599fe67d040758947dbf9bdd89195 Mon Sep 17 00:00:00 2001 From: Allen Pais Date: Thu, 21 Sep 2017 22:34:38 +0530 Subject: drivers: net: sdla: use setup_timer() helper. Use setup_timer function instead of initializing timer with the function and data fields. Signed-off-by: Allen Pais Signed-off-by: David S. Miller --- drivers/net/wan/sdla.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/net/wan/sdla.c b/drivers/net/wan/sdla.c index 236c62538036..0cc48902dbb9 100644 --- a/drivers/net/wan/sdla.c +++ b/drivers/net/wan/sdla.c @@ -1617,10 +1617,8 @@ static void setup_sdla(struct net_device *dev) flp->deassoc = sdla_deassoc; flp->dlci_conf = sdla_dlci_conf; - init_timer(&flp->timer); + setup_timer(&flp->timer, sdla_poll, (unsigned long)dev); flp->timer.expires = 1; - flp->timer.data = (unsigned long) dev; - flp->timer.function = sdla_poll; } static struct net_device *sdla; -- cgit v1.2.3 From dffec39fb185837468486b6bbd00b18f35e38c82 Mon Sep 17 00:00:00 2001 From: Allen Pais Date: Thu, 21 Sep 2017 22:34:39 +0530 Subject: drivers: net: cisco_hdlc: use setup_timer() helper. Use setup_timer function instead of initializing timer with the function and data fields. Signed-off-by: Allen Pais Signed-off-by: David S. Miller --- drivers/net/wan/hdlc_cisco.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/net/wan/hdlc_cisco.c b/drivers/net/wan/hdlc_cisco.c index a408abc25512..c696d42f4502 100644 --- a/drivers/net/wan/hdlc_cisco.c +++ b/drivers/net/wan/hdlc_cisco.c @@ -293,10 +293,8 @@ static void cisco_start(struct net_device *dev) st->up = st->txseq = st->rxseq = 0; spin_unlock_irqrestore(&st->lock, flags); - init_timer(&st->timer); + setup_timer(&st->timer, cisco_timer, (unsigned long)dev); st->timer.expires = jiffies + HZ; /* First poll after 1 s */ - st->timer.function = cisco_timer; - st->timer.data = (unsigned long)dev; add_timer(&st->timer); } -- cgit v1.2.3 From 18df06c2cafea69b19ab5f274865b89c3ab0f715 Mon Sep 17 00:00:00 2001 From: Allen Pais Date: Thu, 21 Sep 2017 22:34:40 +0530 Subject: drivers: net: slip: use setup_timer() helper. Use setup_timer function instead of initializing timer with the function and data fields. Signed-off-by: Allen Pais Signed-off-by: David S. Miller --- drivers/net/slip/slip.c | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/drivers/net/slip/slip.c b/drivers/net/slip/slip.c index 436dd78c396a..eb8a18991d8c 100644 --- a/drivers/net/slip/slip.c +++ b/drivers/net/slip/slip.c @@ -763,12 +763,8 @@ static struct slip *sl_alloc(dev_t line) sl->mode = SL_MODE_DEFAULT; #ifdef CONFIG_SLIP_SMART /* initialize timer_list struct */ - init_timer(&sl->keepalive_timer); - sl->keepalive_timer.data = (unsigned long)sl; - sl->keepalive_timer.function = sl_keepalive; - init_timer(&sl->outfill_timer); - sl->outfill_timer.data = (unsigned long)sl; - sl->outfill_timer.function = sl_outfill; + setup_timer(&sl->keepalive_timer, sl_keepalive, (unsigned long)sl); + setup_timer(&sl->outfill_timer, sl_outfill, (unsigned long)sl); #endif slip_devs[i] = dev; return sl; -- cgit v1.2.3 From e998092f7b7c2ce3199db91177aa43397a3f76d7 Mon Sep 17 00:00:00 2001 From: Allen Pais Date: Thu, 21 Sep 2017 22:34:41 +0530 Subject: drivers: net: spider_net: use setup_timer() helper. Use setup_timer function instead of initializing timer with the function and data fields. Signed-off-by: Allen Pais Signed-off-by: David S. Miller --- drivers/net/ethernet/toshiba/spider_net.c | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/drivers/net/ethernet/toshiba/spider_net.c b/drivers/net/ethernet/toshiba/spider_net.c index cec9e70ab995..a913538d3213 100644 --- a/drivers/net/ethernet/toshiba/spider_net.c +++ b/drivers/net/ethernet/toshiba/spider_net.c @@ -2256,16 +2256,14 @@ spider_net_setup_netdev(struct spider_net_card *card) pci_set_drvdata(card->pdev, netdev); - init_timer(&card->tx_timer); - card->tx_timer.function = - (void (*)(unsigned long)) spider_net_cleanup_tx_ring; - card->tx_timer.data = (unsigned long) card; + setup_timer(&card->tx_timer, + (void(*)(unsigned long))spider_net_cleanup_tx_ring, + (unsigned long)card); netdev->irq = card->pdev->irq; card->aneg_count = 0; - init_timer(&card->aneg_timer); - card->aneg_timer.function = spider_net_link_phy; - card->aneg_timer.data = (unsigned long) card; + setup_timer(&card->aneg_timer, spider_net_link_phy, + (unsigned long)card); netif_napi_add(netdev, &card->napi, spider_net_poll, SPIDER_NET_NAPI_WEIGHT); -- cgit v1.2.3 From f891f36603dff38a15390e5c950e8ac66f73352b Mon Sep 17 00:00:00 2001 From: Allen Pais Date: Thu, 21 Sep 2017 22:34:42 +0530 Subject: drivers: net: sun: cassini: use setup_timer() helper. Use setup_timer function instead of initializing timer with the function and data fields. Signed-off-by: Allen Pais Signed-off-by: David S. Miller --- drivers/net/ethernet/sun/cassini.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/drivers/net/ethernet/sun/cassini.c b/drivers/net/ethernet/sun/cassini.c index 382993c1561c..a74d78f64af9 100644 --- a/drivers/net/ethernet/sun/cassini.c +++ b/drivers/net/ethernet/sun/cassini.c @@ -5039,10 +5039,7 @@ static int cas_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) spin_lock_init(&cp->stat_lock[N_TX_RINGS]); mutex_init(&cp->pm_mutex); - init_timer(&cp->link_timer); - cp->link_timer.function = cas_link_timer; - cp->link_timer.data = (unsigned long) cp; - + setup_timer(&cp->link_timer, cas_link_timer, (unsigned long)cp); #if 1 /* Just in case the implementation of atomic operations * change so that an explicit initialization is necessary. -- cgit v1.2.3 From ba98e9e2eb32ba8f604f947fa656ef2071d22fa0 Mon Sep 17 00:00:00 2001 From: Allen Pais Date: Thu, 21 Sep 2017 22:34:43 +0530 Subject: drivers: net: natsemi: use setup_timer() helper. Use setup_timer function instead of initializing timer with the function and data fields. Signed-off-by: Allen Pais Signed-off-by: David S. Miller --- drivers/net/ethernet/natsemi/natsemi.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/net/ethernet/natsemi/natsemi.c b/drivers/net/ethernet/natsemi/natsemi.c index 18af2a23a933..dedeacd0bbca 100644 --- a/drivers/net/ethernet/natsemi/natsemi.c +++ b/drivers/net/ethernet/natsemi/natsemi.c @@ -1571,10 +1571,8 @@ static int netdev_open(struct net_device *dev) dev->name, (int)readl(ioaddr + ChipCmd)); /* Set the timer to check for link beat. */ - init_timer(&np->timer); + setup_timer(&np->timer, netdev_timer, (unsigned long)dev); np->timer.expires = round_jiffies(jiffies + NATSEMI_TIMER_FREQ); - np->timer.data = (unsigned long)dev; - np->timer.function = netdev_timer; /* timer handler */ add_timer(&np->timer); return 0; -- cgit v1.2.3 From 3e436a25fcca449cbb044c7483116772873f3e28 Mon Sep 17 00:00:00 2001 From: Allen Pais Date: Thu, 21 Sep 2017 22:34:44 +0530 Subject: drivers: net: winbond-840: use setup_timer() helper. Use setup_timer function instead of initializing timer with the function and data fields. Signed-off-by: Allen Pais Signed-off-by: David S. Miller --- drivers/net/ethernet/dec/tulip/winbond-840.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/net/ethernet/dec/tulip/winbond-840.c b/drivers/net/ethernet/dec/tulip/winbond-840.c index 32d7229544fa..6f88d687b6d2 100644 --- a/drivers/net/ethernet/dec/tulip/winbond-840.c +++ b/drivers/net/ethernet/dec/tulip/winbond-840.c @@ -655,10 +655,8 @@ static int netdev_open(struct net_device *dev) netdev_dbg(dev, "Done netdev_open()\n"); /* Set the timer to check for link beat. */ - init_timer(&np->timer); + setup_timer(&np->timer, netdev_timer, (unsigned long)dev); np->timer.expires = jiffies + 1*HZ; - np->timer.data = (unsigned long)dev; - np->timer.function = netdev_timer; /* timer handler */ add_timer(&np->timer); return 0; out_err: -- cgit v1.2.3 From 7afd516ff75e967873d7bdcb8f9b1180c2400b57 Mon Sep 17 00:00:00 2001 From: Allen Pais Date: Thu, 21 Sep 2017 22:34:45 +0530 Subject: drivers: net: enic: use setup_timer() helper. Use setup_timer function instead of initializing timer with the function and data fields. Signed-off-by: Allen Pais Signed-off-by: David S. Miller --- drivers/net/ethernet/cisco/enic/enic_main.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/cisco/enic/enic_main.c b/drivers/net/ethernet/cisco/enic/enic_main.c index d24ee1ad3be1..4a11baffe02d 100644 --- a/drivers/net/ethernet/cisco/enic/enic_main.c +++ b/drivers/net/ethernet/cisco/enic/enic_main.c @@ -2846,9 +2846,8 @@ static int enic_probe(struct pci_dev *pdev, const struct pci_device_id *ent) /* Setup notification timer, HW reset task, and wq locks */ - init_timer(&enic->notify_timer); - enic->notify_timer.function = enic_notify_timer; - enic->notify_timer.data = (unsigned long)enic; + setup_timer(&enic->notify_timer, enic_notify_timer, + (unsigned long)enic); enic_set_rx_coal_setting(enic); INIT_WORK(&enic->reset, enic_reset); -- cgit v1.2.3 From c41326fbb3a7d64c329267d20c58dd9cc8f22a47 Mon Sep 17 00:00:00 2001 From: Allen Pais Date: Thu, 21 Sep 2017 22:34:46 +0530 Subject: drivers: net: bnx2: use setup_timer() helper. Use setup_timer function instead of initializing timer with the function and data fields. Signed-off-by: Allen Pais Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnx2.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnx2.c b/drivers/net/ethernet/broadcom/bnx2.c index e3af1f3cb61f..b3055a76dfbf 100644 --- a/drivers/net/ethernet/broadcom/bnx2.c +++ b/drivers/net/ethernet/broadcom/bnx2.c @@ -8462,10 +8462,8 @@ bnx2_init_board(struct pci_dev *pdev, struct net_device *dev) bnx2_set_default_link(bp); bp->req_flow_ctrl = FLOW_CTRL_RX | FLOW_CTRL_TX; - init_timer(&bp->timer); + setup_timer(&bp->timer, bnx2_timer, (unsigned long)bp); bp->timer.expires = RUN_AT(BNX2_TIMER_INTERVAL); - bp->timer.data = (unsigned long) bp; - bp->timer.function = bnx2_timer; #ifdef BCM_CNIC if (bnx2_shmem_rd(bp, BNX2_ISCSI_INITIATOR) & BNX2_ISCSI_INITIATOR_EN) -- cgit v1.2.3 From f7c11175bdece128c0b4c4a43e6781b9216db4d9 Mon Sep 17 00:00:00 2001 From: Allen Pais Date: Thu, 21 Sep 2017 22:34:47 +0530 Subject: drivers: net: xen-netback: use setup_timer() helper. Use setup_timer function instead of initializing timer with the function and data fields. Signed-off-by: Allen Pais Signed-off-by: David S. Miller --- drivers/net/xen-netback/interface.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/net/xen-netback/interface.c b/drivers/net/xen-netback/interface.c index ee8ed9da00ad..dcfcb153918c 100644 --- a/drivers/net/xen-netback/interface.c +++ b/drivers/net/xen-netback/interface.c @@ -520,8 +520,7 @@ int xenvif_init_queue(struct xenvif_queue *queue) queue->credit_bytes = queue->remaining_credit = ~0UL; queue->credit_usec = 0UL; - init_timer(&queue->credit_timeout); - queue->credit_timeout.function = xenvif_tx_credit_callback; + setup_timer(&queue->credit_timeout, xenvif_tx_credit_callback, 0UL); queue->credit_window_start = get_jiffies_64(); queue->rx_queue_max = XENVIF_RX_QUEUE_BYTES; -- cgit v1.2.3 From 55d3cef4ee68d7d54c457c7deeb0e393dd386fdc Mon Sep 17 00:00:00 2001 From: Allen Pais Date: Thu, 21 Sep 2017 22:34:48 +0530 Subject: drivers: net: atmel: use setup_timer() helper. Use setup_timer function instead of initializing timer with the function and data fields. Signed-off-by: Allen Pais Signed-off-by: David S. Miller --- drivers/net/wireless/atmel/atmel.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/net/wireless/atmel/atmel.c b/drivers/net/wireless/atmel/atmel.c index b68436b23a63..e816d53c2c05 100644 --- a/drivers/net/wireless/atmel/atmel.c +++ b/drivers/net/wireless/atmel/atmel.c @@ -1579,11 +1579,10 @@ struct net_device *init_atmel_card(unsigned short irq, unsigned long port, priv->default_beacon_period = priv->beacon_period = 100; priv->listen_interval = 1; - init_timer(&priv->management_timer); + setup_timer(&priv->management_timer, atmel_management_timer, + (unsigned long)dev); spin_lock_init(&priv->irqlock); spin_lock_init(&priv->timerlock); - priv->management_timer.function = atmel_management_timer; - priv->management_timer.data = (unsigned long) dev; dev->netdev_ops = &atmel_netdev_ops; dev->wireless_handlers = &atmel_handler_def; -- cgit v1.2.3 From 0625d739cc61d186b65b8619ee8c41da2fd24894 Mon Sep 17 00:00:00 2001 From: Allen Pais Date: Thu, 21 Sep 2017 22:34:49 +0530 Subject: drivers: net: hippi: use setup_timer() helper. Use setup_timer function instead of initializing timer with the function and data fields. Signed-off-by: Allen Pais Signed-off-by: David S. Miller --- drivers/net/hippi/rrunner.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/net/hippi/rrunner.c b/drivers/net/hippi/rrunner.c index 71ddadbf2368..76cc140774a2 100644 --- a/drivers/net/hippi/rrunner.c +++ b/drivers/net/hippi/rrunner.c @@ -1229,10 +1229,8 @@ static int rr_open(struct net_device *dev) /* Set the timer to switch to check for link beat and perhaps switch to an alternate media type. */ - init_timer(&rrpriv->timer); + setup_timer(&rrpriv->timer, rr_timer, (unsigned long)dev); rrpriv->timer.expires = RUN_AT(5*HZ); /* 5 sec. watchdog */ - rrpriv->timer.data = (unsigned long)dev; - rrpriv->timer.function = rr_timer; /* timer handler */ add_timer(&rrpriv->timer); netif_start_queue(dev); -- cgit v1.2.3 From 32db034501d37c60c433d24e9faa41c1fa3136e5 Mon Sep 17 00:00:00 2001 From: Allen Pais Date: Thu, 21 Sep 2017 22:34:50 +0530 Subject: drivers: net: smsc: use setup_timer() helper. Use setup_timer function instead of initializing timer with the function and data fields. Signed-off-by: Allen Pais Signed-off-by: David S. Miller --- drivers/net/ethernet/smsc/epic100.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/net/ethernet/smsc/epic100.c b/drivers/net/ethernet/smsc/epic100.c index 6a0e1d4b597c..2a9724898fcf 100644 --- a/drivers/net/ethernet/smsc/epic100.c +++ b/drivers/net/ethernet/smsc/epic100.c @@ -739,10 +739,8 @@ static int epic_open(struct net_device *dev) /* Set the timer to switch to check for link beat and perhaps switch to an alternate media type. */ - init_timer(&ep->timer); + setup_timer(&ep->timer, epic_timer, (unsigned long)dev); ep->timer.expires = jiffies + 3*HZ; - ep->timer.data = (unsigned long)dev; - ep->timer.function = epic_timer; /* timer handler */ add_timer(&ep->timer); return rc; -- cgit v1.2.3 From d4d8db71db1bf602623e859e6c3e700b604c2072 Mon Sep 17 00:00:00 2001 From: Allen Pais Date: Thu, 21 Sep 2017 22:34:51 +0530 Subject: drivers: net: qlogic: use setup_timer() helper. Use setup_timer function instead of initializing timer with the function and data fields. Signed-off-by: Allen Pais Signed-off-by: David S. Miller --- drivers/net/ethernet/qlogic/qla3xxx.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/net/ethernet/qlogic/qla3xxx.c b/drivers/net/ethernet/qlogic/qla3xxx.c index 2991179c2fd0..05479d435469 100644 --- a/drivers/net/ethernet/qlogic/qla3xxx.c +++ b/drivers/net/ethernet/qlogic/qla3xxx.c @@ -3891,10 +3891,8 @@ static int ql3xxx_probe(struct pci_dev *pdev, INIT_DELAYED_WORK(&qdev->tx_timeout_work, ql_tx_timeout_work); INIT_DELAYED_WORK(&qdev->link_state_work, ql_link_state_machine_work); - init_timer(&qdev->adapter_timer); - qdev->adapter_timer.function = ql3xxx_timer; + setup_timer(&qdev->adapter_timer, ql3xxx_timer, (unsigned long)qdev); qdev->adapter_timer.expires = jiffies + HZ * 2; /* two second delay */ - qdev->adapter_timer.data = (unsigned long)qdev; if (!cards_found) { pr_alert("%s\n", DRV_STRING); -- cgit v1.2.3 From 4a9c07ed71c2b8d755ee585264f80dd2d82a8066 Mon Sep 17 00:00:00 2001 From: Allen Pais Date: Thu, 21 Sep 2017 22:34:52 +0530 Subject: drivers: net: e1000e: use setup_timer() helper. Use setup_timer function instead of initializing timer with the function and data fields. Signed-off-by: Allen Pais Signed-off-by: David S. Miller --- drivers/net/ethernet/intel/e1000e/netdev.c | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/drivers/net/ethernet/intel/e1000e/netdev.c b/drivers/net/ethernet/intel/e1000e/netdev.c index 327dfe5bedc0..8436c5f2c3e8 100644 --- a/drivers/net/ethernet/intel/e1000e/netdev.c +++ b/drivers/net/ethernet/intel/e1000e/netdev.c @@ -7252,13 +7252,10 @@ static int e1000_probe(struct pci_dev *pdev, const struct pci_device_id *ent) goto err_eeprom; } - init_timer(&adapter->watchdog_timer); - adapter->watchdog_timer.function = e1000_watchdog; - adapter->watchdog_timer.data = (unsigned long)adapter; - - init_timer(&adapter->phy_info_timer); - adapter->phy_info_timer.function = e1000_update_phy_info; - adapter->phy_info_timer.data = (unsigned long)adapter; + setup_timer(&adapter->watchdog_timer, e1000_watchdog, + (unsigned long)adapter); + setup_timer(&adapter->phy_info_timer, e1000_update_phy_info, + (unsigned long)adapter); INIT_WORK(&adapter->reset_task, e1000_reset_task); INIT_WORK(&adapter->watchdog_task, e1000_watchdog_task); -- cgit v1.2.3 From af25c31d4bf4bc7d4dea4c093af8b7704ef24f81 Mon Sep 17 00:00:00 2001 From: Allen Pais Date: Thu, 21 Sep 2017 22:34:53 +0530 Subject: drivers: net: amd: use setup_timer() helper. Use setup_timer function instead of initializing timer with the function and data fields. Signed-off-by: Allen Pais Signed-off-by: David S. Miller --- drivers/net/ethernet/amd/sunlance.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/amd/sunlance.c b/drivers/net/ethernet/amd/sunlance.c index 291ca5187f12..0183ffb9d3ba 100644 --- a/drivers/net/ethernet/amd/sunlance.c +++ b/drivers/net/ethernet/amd/sunlance.c @@ -1459,9 +1459,8 @@ no_link_test: * can occur from interrupts (ex. IPv6). So we * use a timer to try again later when necessary. -DaveM */ - init_timer(&lp->multicast_timer); - lp->multicast_timer.data = (unsigned long) dev; - lp->multicast_timer.function = lance_set_multicast_retry; + setup_timer(&lp->multicast_timer, lance_set_multicast_retry, + (unsigned long)dev); if (register_netdev(dev)) { printk(KERN_ERR "SunLance: Cannot register device.\n"); -- cgit v1.2.3 From cec55a92a98fb3205cc4f127d9b70b5a965f3bd7 Mon Sep 17 00:00:00 2001 From: Allen Pais Date: Thu, 21 Sep 2017 22:34:54 +0530 Subject: drivers: net: amd8111e: use setup_timer() helper. Use setup_timer function instead of initializing timer with the function and data fields. Signed-off-by: Allen Pais Signed-off-by: David S. Miller --- drivers/net/ethernet/amd/amd8111e.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/amd/amd8111e.c b/drivers/net/ethernet/amd/amd8111e.c index 7b5df562f30f..7f22af6e37e0 100644 --- a/drivers/net/ethernet/amd/amd8111e.c +++ b/drivers/net/ethernet/amd/amd8111e.c @@ -1883,9 +1883,8 @@ static int amd8111e_probe_one(struct pci_dev *pdev, /* Initialize software ipg timer */ if(lp->options & OPTION_DYN_IPG_ENABLE){ - init_timer(&lp->ipg_data.ipg_timer); - lp->ipg_data.ipg_timer.data = (unsigned long) dev; - lp->ipg_data.ipg_timer.function = (void *)&amd8111e_config_ipg; + setup_timer(&lp->ipg_data.ipg_timer, + (void *)&amd8111e_config_ipg, (unsigned long)dev); lp->ipg_data.ipg_timer.expires = jiffies + IPG_CONVERGE_JIFFIES; lp->ipg_data.ipg = DEFAULT_IPG; -- cgit v1.2.3 From fb4de582a22282fa0f9665fd363dfd3e073c40f6 Mon Sep 17 00:00:00 2001 From: Allen Pais Date: Thu, 21 Sep 2017 22:34:55 +0530 Subject: drivers: net: eql: use setup_timer() helper. Use setup_timer function instead of initializing timer with the function and data fields. Signed-off-by: Allen Pais Signed-off-by: David S. Miller --- drivers/net/eql.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/net/eql.c b/drivers/net/eql.c index fe13bfea30ac..fccce4b47778 100644 --- a/drivers/net/eql.c +++ b/drivers/net/eql.c @@ -178,10 +178,8 @@ static void __init eql_setup(struct net_device *dev) { equalizer_t *eql = netdev_priv(dev); - init_timer(&eql->timer); - eql->timer.data = (unsigned long) eql; + setup_timer(&eql->timer, eql_timer, (unsigned long)eql); eql->timer.expires = jiffies + EQL_DEFAULT_RESCHED_IVAL; - eql->timer.function = eql_timer; spin_lock_init(&eql->queue.lock); INIT_LIST_HEAD(&eql->queue.all_slaves); -- cgit v1.2.3 From 82f5d72da8755cf73387a007ad323a86ea0b5663 Mon Sep 17 00:00:00 2001 From: Allen Pais Date: Thu, 21 Sep 2017 22:34:56 +0530 Subject: drivers: net: can: usb: use setup_timer() helper. Use setup_timer function instead of initializing timer with the function and data fields. Signed-off-by: Allen Pais Signed-off-by: David S. Miller --- drivers/net/can/usb/peak_usb/pcan_usb.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/net/can/usb/peak_usb/pcan_usb.c b/drivers/net/can/usb/peak_usb/pcan_usb.c index 838545ce468d..7e10dbdded28 100644 --- a/drivers/net/can/usb/peak_usb/pcan_usb.c +++ b/drivers/net/can/usb/peak_usb/pcan_usb.c @@ -798,9 +798,8 @@ static int pcan_usb_init(struct peak_usb_device *dev) int err; /* initialize a timer needed to wait for hardware restart */ - init_timer(&pdev->restart_timer); - pdev->restart_timer.function = pcan_usb_restart; - pdev->restart_timer.data = (unsigned long)dev; + setup_timer(&pdev->restart_timer, pcan_usb_restart, + (unsigned long)dev); /* * explicit use of dev_xxx() instead of netdev_xxx() here: -- cgit v1.2.3 From 9f5ca8816b5ccc1fd13baf46e012fb5228e57763 Mon Sep 17 00:00:00 2001 From: Allen Pais Date: Thu, 21 Sep 2017 22:34:57 +0530 Subject: drivers: net: can: use setup_timer() helper. Use setup_timer function instead of initializing timer with the function and data fields. Signed-off-by: Allen Pais Signed-off-by: David S. Miller --- drivers/net/can/grcan.c | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/drivers/net/can/grcan.c b/drivers/net/can/grcan.c index a7be12d9a139..8570cfdaea75 100644 --- a/drivers/net/can/grcan.c +++ b/drivers/net/can/grcan.c @@ -1626,13 +1626,11 @@ static int grcan_setup_netdev(struct platform_device *ofdev, spin_lock_init(&priv->lock); if (priv->need_txbug_workaround) { - init_timer(&priv->rr_timer); - priv->rr_timer.function = grcan_running_reset; - priv->rr_timer.data = (unsigned long)dev; + setup_timer(&priv->rr_timer, grcan_running_reset, + (unsigned long)dev); - init_timer(&priv->hang_timer); - priv->hang_timer.function = grcan_initiate_running_reset; - priv->hang_timer.data = (unsigned long)dev; + setup_timer(&priv->hang_timer, grcan_initiate_running_reset, + (unsigned long)dev); } netif_napi_add(dev, &priv->napi, grcan_poll, GRCAN_NAPI_WEIGHT); -- cgit v1.2.3 From fe9bfe207e200f901361681bf1497386068f1df2 Mon Sep 17 00:00:00 2001 From: Allen Pais Date: Thu, 21 Sep 2017 22:34:58 +0530 Subject: drivers: net: arcnet: use setup_timer() helper. Use setup_timer function instead of initializing timer with the function and data fields. Signed-off-by: Allen Pais Signed-off-by: David S. Miller --- drivers/net/arcnet/arcnet.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/net/arcnet/arcnet.c b/drivers/net/arcnet/arcnet.c index fcfccbb3d9a2..13236b2cdf13 100644 --- a/drivers/net/arcnet/arcnet.c +++ b/drivers/net/arcnet/arcnet.c @@ -450,9 +450,7 @@ struct net_device *alloc_arcdev(const char *name) lp->dev = dev; spin_lock_init(&lp->lock); - init_timer(&lp->timer); - lp->timer.data = (unsigned long) dev; - lp->timer.function = arcnet_timer; + setup_timer(&lp->timer, arcnet_timer, (unsigned long)dev); } return dev; -- cgit v1.2.3 From b9496b6b9100d824033390312dd64244f959a156 Mon Sep 17 00:00:00 2001 From: Allen Pais Date: Thu, 21 Sep 2017 22:34:59 +0530 Subject: drivers: net: ath6kl: use setup_timer() helper. Use setup_timer function instead of initializing timer with the function and data fields. Signed-off-by: Allen Pais Signed-off-by: David S. Miller --- drivers/net/wireless/ath/ath6kl/txrx.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/net/wireless/ath/ath6kl/txrx.c b/drivers/net/wireless/ath/ath6kl/txrx.c index e6b2517e6334..4e5cc2b7045a 100644 --- a/drivers/net/wireless/ath/ath6kl/txrx.c +++ b/drivers/net/wireless/ath/ath6kl/txrx.c @@ -1753,9 +1753,7 @@ void aggr_conn_init(struct ath6kl_vif *vif, struct aggr_info *aggr_info, aggr_conn->aggr_sz = AGGR_SZ_DEFAULT; aggr_conn->dev = vif->ndev; - init_timer(&aggr_conn->timer); - aggr_conn->timer.function = aggr_timeout; - aggr_conn->timer.data = (unsigned long) aggr_conn; + setup_timer(&aggr_conn->timer, aggr_timeout, (unsigned long)aggr_conn); aggr_conn->aggr_info = aggr_info; aggr_conn->timer_scheduled = false; -- cgit v1.2.3 From 6d2bcc14f5731e9357f15d41f7c5677a72e354f9 Mon Sep 17 00:00:00 2001 From: Allen Pais Date: Thu, 21 Sep 2017 22:35:00 +0530 Subject: drivers: net: sun: use setup_timer() helper. Use setup_timer function instead of initializing timer with the function and data fields. Signed-off-by: Allen Pais Signed-off-by: David S. Miller --- drivers/net/ethernet/sun/sungem.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/net/ethernet/sun/sungem.c b/drivers/net/ethernet/sun/sungem.c index fa607d062cb3..b75ab8f44968 100644 --- a/drivers/net/ethernet/sun/sungem.c +++ b/drivers/net/ethernet/sun/sungem.c @@ -2910,9 +2910,7 @@ static int gem_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) gp->msg_enable = DEFAULT_MSG; - init_timer(&gp->link_timer); - gp->link_timer.function = gem_link_timer; - gp->link_timer.data = (unsigned long) gp; + setup_timer(&gp->link_timer, gem_link_timer, (unsigned long)gp); INIT_WORK(&gp->reset_task, gem_reset_task); -- cgit v1.2.3 From ac803d1c5f62937dc142a35dafd180f09b9f9c83 Mon Sep 17 00:00:00 2001 From: Allen Pais Date: Thu, 21 Sep 2017 22:35:01 +0530 Subject: drivers: net: sis900: use setup_timer() helper. Use setup_timer function instead of initializing timer with the function and data fields. Signed-off-by: Allen Pais Signed-off-by: David S. Miller --- drivers/net/ethernet/sis/sis900.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/net/ethernet/sis/sis900.c b/drivers/net/ethernet/sis/sis900.c index 40bd88362e3d..cb61247b0526 100644 --- a/drivers/net/ethernet/sis/sis900.c +++ b/drivers/net/ethernet/sis/sis900.c @@ -1065,10 +1065,8 @@ sis900_open(struct net_device *net_dev) /* Set the timer to switch to check for link beat and perhaps switch to an alternate media type. */ - init_timer(&sis_priv->timer); + setup_timer(&sis_priv->timer, sis900_timer, (unsigned long)net_dev); sis_priv->timer.expires = jiffies + HZ; - sis_priv->timer.data = (unsigned long)net_dev; - sis_priv->timer.function = sis900_timer; add_timer(&sis_priv->timer); return 0; -- cgit v1.2.3 From f1ce56ce5d2a18f5d61ec335aaf5aad978fa9cd6 Mon Sep 17 00:00:00 2001 From: Allen Pais Date: Thu, 21 Sep 2017 22:35:02 +0530 Subject: drivers: net: packetengines: use setup_timer() helper. Use setup_timer function instead of initializing timer with the function and data fields. Signed-off-by: Allen Pais Signed-off-by: David S. Miller --- drivers/net/ethernet/packetengines/yellowfin.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/net/ethernet/packetengines/yellowfin.c b/drivers/net/ethernet/packetengines/yellowfin.c index fa7770da6ef8..33c241f52a71 100644 --- a/drivers/net/ethernet/packetengines/yellowfin.c +++ b/drivers/net/ethernet/packetengines/yellowfin.c @@ -632,10 +632,8 @@ static int yellowfin_open(struct net_device *dev) } /* Set the timer to check for link beat. */ - init_timer(&yp->timer); + setup_timer(&yp->timer, yellowfin_timer, (unsigned long)dev); yp->timer.expires = jiffies + 3*HZ; - yp->timer.data = (unsigned long)dev; - yp->timer.function = yellowfin_timer; /* timer handler */ add_timer(&yp->timer); out: return rc; -- cgit v1.2.3 From 590deff6e7a898fadf3f1fd6425937d481913fb1 Mon Sep 17 00:00:00 2001 From: Allen Pais Date: Thu, 21 Sep 2017 22:35:03 +0530 Subject: drivers: net: mlx5: use setup_timer() helper. Use setup_timer function instead of initializing timer with the function and data fields. Signed-off-by: Allen Pais Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx5/core/health.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/health.c b/drivers/net/ethernet/mellanox/mlx5/core/health.c index 8aea0a065e56..a89a68ce53ad 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/health.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/health.c @@ -320,15 +320,13 @@ void mlx5_start_health_poll(struct mlx5_core_dev *dev) { struct mlx5_core_health *health = &dev->priv.health; - init_timer(&health->timer); + setup_timer(&health->timer, poll_health, (unsigned long)dev); health->sick = 0; clear_bit(MLX5_DROP_NEW_HEALTH_WORK, &health->flags); clear_bit(MLX5_DROP_NEW_RECOVERY_WORK, &health->flags); health->health = &dev->iseg->health; health->health_counter = &dev->iseg->health_counter; - health->timer.data = (unsigned long)dev; - health->timer.function = poll_health; health->timer.expires = round_jiffies(jiffies + MLX5_HEALTH_POLL_INTERVAL); add_timer(&health->timer); } -- cgit v1.2.3 From d2a0012e7632a588683ad6320529659c4cd27131 Mon Sep 17 00:00:00 2001 From: Allen Pais Date: Thu, 21 Sep 2017 22:35:04 +0530 Subject: drivers: net: mlx4: use setup_timer() helper. Use setup_timer function instead of initializing timer with the function and data fields. Signed-off-by: Allen Pais Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx4/catas.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx4/catas.c b/drivers/net/ethernet/mellanox/mlx4/catas.c index 53daa6ca5d83..de0f9e5e42ec 100644 --- a/drivers/net/ethernet/mellanox/mlx4/catas.c +++ b/drivers/net/ethernet/mellanox/mlx4/catas.c @@ -277,7 +277,7 @@ void mlx4_start_catas_poll(struct mlx4_dev *dev) phys_addr_t addr; INIT_LIST_HEAD(&priv->catas_err.list); - init_timer(&priv->catas_err.timer); + setup_timer(&priv->catas_err.timer, poll_catas, (unsigned long)dev); priv->catas_err.map = NULL; if (!mlx4_is_slave(dev)) { @@ -293,8 +293,6 @@ void mlx4_start_catas_poll(struct mlx4_dev *dev) } } - priv->catas_err.timer.data = (unsigned long) dev; - priv->catas_err.timer.function = poll_catas; priv->catas_err.timer.expires = round_jiffies(jiffies + MLX4_CATAS_POLL_INTERVAL); add_timer(&priv->catas_err.timer); -- cgit v1.2.3 From 636873890c63c892fca5ccab8af3a9f3607eb1fc Mon Sep 17 00:00:00 2001 From: Allen Pais Date: Thu, 21 Sep 2017 22:35:05 +0530 Subject: drivers: net: pxa168: use setup_timer() helper. Use setup_timer function instead of initializing timer with the function and data fields. Signed-off-by: Allen Pais Signed-off-by: David S. Miller --- drivers/net/ethernet/marvell/pxa168_eth.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/marvell/pxa168_eth.c b/drivers/net/ethernet/marvell/pxa168_eth.c index 993724959a7c..91b1c154fd29 100644 --- a/drivers/net/ethernet/marvell/pxa168_eth.c +++ b/drivers/net/ethernet/marvell/pxa168_eth.c @@ -1496,9 +1496,8 @@ static int pxa168_eth_probe(struct platform_device *pdev) netif_napi_add(dev, &pep->napi, pxa168_rx_poll, pep->rx_ring_size); memset(&pep->timeout, 0, sizeof(struct timer_list)); - init_timer(&pep->timeout); - pep->timeout.function = rxq_refill_timer_wrapper; - pep->timeout.data = (unsigned long)pep; + setup_timer(&pep->timeout, rxq_refill_timer_wrapper, + (unsigned long)pep); pep->smi_bus = mdiobus_alloc(); if (!pep->smi_bus) { -- cgit v1.2.3 From 34b0cf069d174b2615769102d75b4ce687addeb9 Mon Sep 17 00:00:00 2001 From: Allen Pais Date: Thu, 21 Sep 2017 22:35:06 +0530 Subject: drivers: net: fealnx: use setup_timer() helper. Use setup_timer function instead of initializing timer with the function and data fields. Signed-off-by: Allen Pais Signed-off-by: David S. Miller --- drivers/net/ethernet/fealnx.c | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/fealnx.c b/drivers/net/ethernet/fealnx.c index e92859dab7ae..c8982313d850 100644 --- a/drivers/net/ethernet/fealnx.c +++ b/drivers/net/ethernet/fealnx.c @@ -909,17 +909,13 @@ static int netdev_open(struct net_device *dev) printk(KERN_DEBUG "%s: Done netdev_open().\n", dev->name); /* Set the timer to check for link beat. */ - init_timer(&np->timer); + setup_timer(&np->timer, netdev_timer, (unsigned long)dev); np->timer.expires = RUN_AT(3 * HZ); - np->timer.data = (unsigned long) dev; - np->timer.function = netdev_timer; /* timer handler */ add_timer(&np->timer); - init_timer(&np->reset_timer); - np->reset_timer.data = (unsigned long) dev; - np->reset_timer.function = reset_timer; + setup_timer(&np->reset_timer, reset_timer, (unsigned long)dev); np->reset_timer_armed = 0; return rc; } -- cgit v1.2.3 From a76aec2ac51f8a74659cff9b19f712e8fb984393 Mon Sep 17 00:00:00 2001 From: Allen Pais Date: Thu, 21 Sep 2017 22:35:07 +0530 Subject: drivers: net: dmfe: use setup_timer() helper. Use setup_timer function instead of initializing timer with the function and data fields. Signed-off-by: Allen Pais Signed-off-by: David S. Miller --- drivers/net/ethernet/dec/tulip/dmfe.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/net/ethernet/dec/tulip/dmfe.c b/drivers/net/ethernet/dec/tulip/dmfe.c index 07e10a45beaa..6585f737d08b 100644 --- a/drivers/net/ethernet/dec/tulip/dmfe.c +++ b/drivers/net/ethernet/dec/tulip/dmfe.c @@ -596,10 +596,8 @@ static int dmfe_open(struct net_device *dev) netif_wake_queue(dev); /* set and active a timer process */ - init_timer(&db->timer); + setup_timer(&db->timer, dmfe_timer, (unsigned long)dev); db->timer.expires = DMFE_TIMER_WUT + HZ * 2; - db->timer.data = (unsigned long)dev; - db->timer.function = dmfe_timer; add_timer(&db->timer); return 0; -- cgit v1.2.3 From 6c43824477c2ac722325ba460c2ce683c48fb76b Mon Sep 17 00:00:00 2001 From: Allen Pais Date: Thu, 21 Sep 2017 22:35:08 +0530 Subject: drivers: net: bnxt: use setup_timer() helper. Use setup_timer function instead of initializing timer with the function and data fields. Signed-off-by: Allen Pais Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index aacec8bc19d5..c25f5b555adf 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -7190,9 +7190,7 @@ static int bnxt_init_board(struct pci_dev *pdev, struct net_device *dev) bp->stats_coal_ticks = BNXT_DEF_STATS_COAL_TICKS; - init_timer(&bp->timer); - bp->timer.data = (unsigned long)bp; - bp->timer.function = bnxt_timer; + setup_timer(&bp->timer, bnxt_timer, (unsigned long)bp); bp->current_interval = BNXT_TIMER_INTERVAL; clear_bit(BNXT_STATE_OPEN, &bp->state); -- cgit v1.2.3 From cac40a458ae68b41955cc55a80fc9e0166e429b1 Mon Sep 17 00:00:00 2001 From: Allen Pais Date: Thu, 21 Sep 2017 22:35:09 +0530 Subject: drivers: net: amd: use setup_timer() helper. Use setup_timer function instead of initializing timer with the function and data fields. Signed-off-by: Allen Pais Signed-off-by: David S. Miller --- drivers/net/ethernet/amd/a2065.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/amd/a2065.c b/drivers/net/ethernet/amd/a2065.c index e22f976a0d18..998d30e050a6 100644 --- a/drivers/net/ethernet/amd/a2065.c +++ b/drivers/net/ethernet/amd/a2065.c @@ -733,10 +733,9 @@ static int a2065_init_one(struct zorro_dev *z, dev->watchdog_timeo = 5*HZ; dev->dma = 0; - init_timer(&priv->multicast_timer); - priv->multicast_timer.data = (unsigned long) dev; - priv->multicast_timer.function = - (void (*)(unsigned long))lance_set_multicast; + setup_timer(&priv->multicast_timer, + (void(*)(unsigned long))lance_set_multicast, + (unsigned long)dev); err = register_netdev(dev); if (err) { -- cgit v1.2.3 From 7c214194de36217c06b0dc4ed64d9cf251b261df Mon Sep 17 00:00:00 2001 From: Allen Pais Date: Thu, 21 Sep 2017 22:35:10 +0530 Subject: drivers: net: adi: use setup_timer() helper. Use setup_timer function instead of initializing timer with the function and data fields. Signed-off-by: Allen Pais Signed-off-by: David S. Miller --- drivers/net/ethernet/adi/bfin_mac.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/adi/bfin_mac.c b/drivers/net/ethernet/adi/bfin_mac.c index a251de8d9a91..0658cde1586a 100644 --- a/drivers/net/ethernet/adi/bfin_mac.c +++ b/drivers/net/ethernet/adi/bfin_mac.c @@ -1650,9 +1650,8 @@ static int bfin_mac_probe(struct platform_device *pdev) ndev->netdev_ops = &bfin_mac_netdev_ops; ndev->ethtool_ops = &bfin_mac_ethtool_ops; - init_timer(&lp->tx_reclaim_timer); - lp->tx_reclaim_timer.data = (unsigned long)lp; - lp->tx_reclaim_timer.function = tx_reclaim_skb_timeout; + setup_timer(&lp->tx_reclaim_timer, tx_reclaim_skb_timeout, + (unsigned long)lp); lp->flags = 0; netif_napi_add(ndev, &lp->napi, bfin_mac_poll, CONFIG_BFIN_RX_DESC_NUM); -- cgit v1.2.3 From 13e96b93ff5f1093592419e1f84962d4a266bd3e Mon Sep 17 00:00:00 2001 From: Allen Pais Date: Thu, 21 Sep 2017 22:35:11 +0530 Subject: drivers: net: can: sja1000: use setup_timer() helper. Use setup_timer function instead of initializing timer with the function and data fields. Signed-off-by: Allen Pais Signed-off-by: David S. Miller --- drivers/net/can/sja1000/peak_pcmcia.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/net/can/sja1000/peak_pcmcia.c b/drivers/net/can/sja1000/peak_pcmcia.c index dd56133cc461..4b8758e10bd4 100644 --- a/drivers/net/can/sja1000/peak_pcmcia.c +++ b/drivers/net/can/sja1000/peak_pcmcia.c @@ -692,9 +692,7 @@ static int pcan_probe(struct pcmcia_device *pdev) } /* init the timer which controls the leds */ - init_timer(&card->led_timer); - card->led_timer.function = pcan_led_timer; - card->led_timer.data = (unsigned long)card; + setup_timer(&card->led_timer, pcan_led_timer, (unsigned long)card); /* request the given irq */ err = request_irq(pdev->irq, &pcan_isr, IRQF_SHARED, PCC_NAME, card); -- cgit v1.2.3 From 7890d5341999968b94dbdf9e30ad32f37f82ea10 Mon Sep 17 00:00:00 2001 From: Allen Pais Date: Thu, 21 Sep 2017 22:35:12 +0530 Subject: drivers: net: caif: use setup_timer() helper. Use setup_timer function instead of initializing timer with the function and data fields. Signed-off-by: Allen Pais Signed-off-by: David S. Miller --- drivers/net/caif/caif_hsi.c | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-) diff --git a/drivers/net/caif/caif_hsi.c b/drivers/net/caif/caif_hsi.c index 438966bf51c2..fed75e75207a 100644 --- a/drivers/net/caif/caif_hsi.c +++ b/drivers/net/caif/caif_hsi.c @@ -1211,17 +1211,14 @@ static int cfhsi_open(struct net_device *ndev) init_waitqueue_head(&cfhsi->flush_fifo_wait); /* Setup the inactivity timer. */ - init_timer(&cfhsi->inactivity_timer); - cfhsi->inactivity_timer.data = (unsigned long)cfhsi; - cfhsi->inactivity_timer.function = cfhsi_inactivity_tout; + setup_timer(&cfhsi->inactivity_timer, cfhsi_inactivity_tout, + (unsigned long)cfhsi); /* Setup the slowpath RX timer. */ - init_timer(&cfhsi->rx_slowpath_timer); - cfhsi->rx_slowpath_timer.data = (unsigned long)cfhsi; - cfhsi->rx_slowpath_timer.function = cfhsi_rx_slowpath; + setup_timer(&cfhsi->rx_slowpath_timer, cfhsi_rx_slowpath, + (unsigned long)cfhsi); /* Setup the aggregation timer. */ - init_timer(&cfhsi->aggregation_timer); - cfhsi->aggregation_timer.data = (unsigned long)cfhsi; - cfhsi->aggregation_timer.function = cfhsi_aggregation_tout; + setup_timer(&cfhsi->aggregation_timer, cfhsi_aggregation_tout, + (unsigned long)cfhsi); /* Activate HSI interface. */ res = cfhsi->ops->cfhsi_up(cfhsi->ops); -- cgit v1.2.3 From ba7400ed88adcd07af0dc004be1cf5ab2443cb44 Mon Sep 17 00:00:00 2001 From: Allen Pais Date: Thu, 21 Sep 2017 22:35:13 +0530 Subject: drivers: net: appletalk: use setup_timer() helper. Use setup_timer function instead of initializing timer with the function and data fields. Signed-off-by: Allen Pais Signed-off-by: David S. Miller --- drivers/net/appletalk/ltpc.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/net/appletalk/ltpc.c b/drivers/net/appletalk/ltpc.c index ac755d2950a6..e4aa374caa4d 100644 --- a/drivers/net/appletalk/ltpc.c +++ b/drivers/net/appletalk/ltpc.c @@ -1165,9 +1165,7 @@ struct net_device * __init ltpc_probe(void) dev->irq = 0; /* polled mode -- 20 times per second */ /* this is really, really slow... should it poll more often? */ - init_timer(<pc_timer); - ltpc_timer.function=ltpc_poll; - ltpc_timer.data = (unsigned long) dev; + setup_timer(<pc_timer, ltpc_poll, (unsigned long)dev); ltpc_timer.expires = jiffies + HZ/20; add_timer(<pc_timer); -- cgit v1.2.3 From f2803332f2ef86ad29e247350f697bad4eb1aa93 Mon Sep 17 00:00:00 2001 From: Allen Pais Date: Thu, 21 Sep 2017 22:35:14 +0530 Subject: drivers: net: dscc: use setup_timer() helper. Use setup_timer function instead of initializing timer with the function and data fields. Signed-off-by: Allen Pais Signed-off-by: David S. Miller --- drivers/net/wan/dscc4.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/net/wan/dscc4.c b/drivers/net/wan/dscc4.c index a043fb1367bd..64f176496da4 100644 --- a/drivers/net/wan/dscc4.c +++ b/drivers/net/wan/dscc4.c @@ -1127,10 +1127,8 @@ static int dscc4_open(struct net_device *dev) done: netif_start_queue(dev); - init_timer(&dpriv->timer); + setup_timer(&dpriv->timer, dscc4_timer, (unsigned long)dev); dpriv->timer.expires = jiffies + 10*HZ; - dpriv->timer.data = (unsigned long)dev; - dpriv->timer.function = dscc4_timer; add_timer(&dpriv->timer); netif_carrier_on(dev); -- cgit v1.2.3 From 8d81fe753c1572573be6d529a44b1506e3b0425d Mon Sep 17 00:00:00 2001 From: Allen Pais Date: Thu, 21 Sep 2017 22:35:15 +0530 Subject: drivers: net: hdlc_ppp: use setup_timer() helper. Use setup_timer function instead of initializing timer with the function and data fields. Signed-off-by: Allen Pais Signed-off-by: David S. Miller --- drivers/net/wan/hdlc_ppp.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/net/wan/hdlc_ppp.c b/drivers/net/wan/hdlc_ppp.c index 0d2e00ece804..c7721c729541 100644 --- a/drivers/net/wan/hdlc_ppp.c +++ b/drivers/net/wan/hdlc_ppp.c @@ -610,9 +610,7 @@ static void ppp_start(struct net_device *dev) for (i = 0; i < IDX_COUNT; i++) { struct proto *proto = &ppp->protos[i]; proto->dev = dev; - init_timer(&proto->timer); - proto->timer.function = ppp_timer; - proto->timer.data = (unsigned long)proto; + setup_timer(&proto->timer, ppp_timer, (unsigned long)proto); proto->state = CLOSED; } ppp->protos[IDX_LCP].pid = PID_LCP; -- cgit v1.2.3 From 7e47fc264e2837d971720ccf1f8b35648c2626ea Mon Sep 17 00:00:00 2001 From: Allen Pais Date: Thu, 21 Sep 2017 22:35:16 +0530 Subject: drivers: net: hamradio: use setup_timer() helper. Use setup_timer function instead of initializing timer with the function and data fields. Signed-off-by: Allen Pais Signed-off-by: David S. Miller --- drivers/net/hamradio/6pack.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/net/hamradio/6pack.c b/drivers/net/hamradio/6pack.c index 021a8ec411ab..97fe8dfb602d 100644 --- a/drivers/net/hamradio/6pack.c +++ b/drivers/net/hamradio/6pack.c @@ -623,9 +623,7 @@ static int sixpack_open(struct tty_struct *tty) netif_start_queue(dev); - init_timer(&sp->tx_t); - sp->tx_t.function = sp_xmit_on_air; - sp->tx_t.data = (unsigned long) sp; + setup_timer(&sp->tx_t, sp_xmit_on_air, (unsigned long)sp); init_timer(&sp->resync_t); -- cgit v1.2.3 From 9d90725f33ebd0f30790c26eb5e9e0a098567895 Mon Sep 17 00:00:00 2001 From: Allen Pais Date: Thu, 21 Sep 2017 22:35:17 +0530 Subject: drivers: net: cpsw_ale: use setup_timer() helper. Use setup_timer function instead of initializing timer with the function and data fields. Signed-off-by: Allen Pais Signed-off-by: David S. Miller --- drivers/net/ethernet/ti/cpsw_ale.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/net/ethernet/ti/cpsw_ale.c b/drivers/net/ethernet/ti/cpsw_ale.c index ddd43e09111e..cd1185e66133 100644 --- a/drivers/net/ethernet/ti/cpsw_ale.c +++ b/drivers/net/ethernet/ti/cpsw_ale.c @@ -859,9 +859,7 @@ void cpsw_ale_start(struct cpsw_ale *ale) cpsw_ale_control_set(ale, 0, ALE_ENABLE, 1); cpsw_ale_control_set(ale, 0, ALE_CLEAR, 1); - init_timer(&ale->timer); - ale->timer.data = (unsigned long)ale; - ale->timer.function = cpsw_ale_timer; + setup_timer(&ale->timer, cpsw_ale_timer, (unsigned long)ale); if (ale->ageout) { ale->timer.expires = jiffies + ale->ageout; add_timer(&ale->timer); -- cgit v1.2.3 From 997decfb6aeaa9be41ff557741845bb9fb4bf5bc Mon Sep 17 00:00:00 2001 From: Allen Pais Date: Thu, 21 Sep 2017 22:35:18 +0530 Subject: drivers: net: stmmac: use setup_timer() helper. Use setup_timer function instead of initializing timer with the function and data fields. Signed-off-by: Allen Pais Signed-off-by: David S. Miller --- drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index 1763e48c84e2..f41661a04f23 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -2217,10 +2217,8 @@ static void stmmac_init_tx_coalesce(struct stmmac_priv *priv) { priv->tx_coal_frames = STMMAC_TX_FRAMES; priv->tx_coal_timer = STMMAC_COAL_TX_TIMER; - init_timer(&priv->txtimer); + setup_timer(&priv->txtimer, stmmac_tx_timer, (unsigned long)priv); priv->txtimer.expires = STMMAC_COAL_TIMER(priv->tx_coal_timer); - priv->txtimer.data = (unsigned long)priv; - priv->txtimer.function = stmmac_tx_timer; add_timer(&priv->txtimer); } -- cgit v1.2.3 From 9be5813a29e5b3379db30d00319682fe965febe5 Mon Sep 17 00:00:00 2001 From: Allen Pais Date: Thu, 21 Sep 2017 22:35:19 +0530 Subject: drivers: net: packetengines: use setup_timer() helper. Use setup_timer function instead of initializing timer with the function and data fields. Signed-off-by: Allen Pais Signed-off-by: David S. Miller --- drivers/net/ethernet/packetengines/hamachi.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/net/ethernet/packetengines/hamachi.c b/drivers/net/ethernet/packetengines/hamachi.c index 482b85e4d665..77bc7cca8980 100644 --- a/drivers/net/ethernet/packetengines/hamachi.c +++ b/drivers/net/ethernet/packetengines/hamachi.c @@ -979,10 +979,8 @@ static int hamachi_open(struct net_device *dev) dev->name, readw(ioaddr + RxStatus), readw(ioaddr + TxStatus)); } /* Set the timer to check for link beat. */ - init_timer(&hmp->timer); + setup_timer(&hmp->timer, hamachi_timer, (unsigned long)dev); hmp->timer.expires = RUN_AT((24*HZ)/10); /* 2.4 sec. */ - hmp->timer.data = (unsigned long)dev; - hmp->timer.function = hamachi_timer; /* timer handler */ add_timer(&hmp->timer); return 0; -- cgit v1.2.3 From 7d8fb3a7742513bb5434be704e0b0bf785032f45 Mon Sep 17 00:00:00 2001 From: Allen Pais Date: Thu, 21 Sep 2017 22:35:20 +0530 Subject: drivers: net: i40evf: use setup_timer() helper. Use setup_timer function instead of initializing timer with the function and data fields. Signed-off-by: Allen Pais Signed-off-by: David S. Miller --- drivers/net/ethernet/intel/i40evf/i40evf_main.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/intel/i40evf/i40evf_main.c b/drivers/net/ethernet/intel/i40evf/i40evf_main.c index 1825d956bb00..c243f9da95ae 100644 --- a/drivers/net/ethernet/intel/i40evf/i40evf_main.c +++ b/drivers/net/ethernet/intel/i40evf/i40evf_main.c @@ -2686,9 +2686,8 @@ static void i40evf_init_task(struct work_struct *work) ether_addr_copy(netdev->perm_addr, adapter->hw.mac.addr); } - init_timer(&adapter->watchdog_timer); - adapter->watchdog_timer.function = &i40evf_watchdog_timer; - adapter->watchdog_timer.data = (unsigned long)adapter; + setup_timer(&adapter->watchdog_timer, &i40evf_watchdog_timer, + (unsigned long)adapter); mod_timer(&adapter->watchdog_timer, jiffies + 1); adapter->tx_desc_count = I40EVF_DEFAULT_TXD; -- cgit v1.2.3 From 99e3aa1ea47d0804a69ea7948ddd1251bcd1a635 Mon Sep 17 00:00:00 2001 From: Allen Pais Date: Thu, 21 Sep 2017 22:35:21 +0530 Subject: drivers: net: uli526x: use setup_timer() helper. Use setup_timer function instead of initializing timer with the function and data fields. Signed-off-by: Allen Pais Signed-off-by: David S. Miller --- drivers/net/ethernet/dec/tulip/uli526x.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/net/ethernet/dec/tulip/uli526x.c b/drivers/net/ethernet/dec/tulip/uli526x.c index 7fc248efc4ba..5fbbc0caba99 100644 --- a/drivers/net/ethernet/dec/tulip/uli526x.c +++ b/drivers/net/ethernet/dec/tulip/uli526x.c @@ -491,10 +491,8 @@ static int uli526x_open(struct net_device *dev) netif_wake_queue(dev); /* set and active a timer process */ - init_timer(&db->timer); + setup_timer(&db->timer, uli526x_timer, (unsigned long)dev); db->timer.expires = ULI526X_TIMER_WUT + HZ * 2; - db->timer.data = (unsigned long)dev; - db->timer.function = uli526x_timer; add_timer(&db->timer); return 0; -- cgit v1.2.3 From 570ba3e82befbba7649e459fedc4aab27510ef44 Mon Sep 17 00:00:00 2001 From: Allen Pais Date: Thu, 21 Sep 2017 22:35:22 +0530 Subject: drivers: net: enic: use setup_timer() helper. Use setup_timer function instead of initializing timer with the function and data fields. Signed-off-by: Allen Pais Signed-off-by: David S. Miller --- drivers/net/ethernet/cisco/enic/enic_clsf.h | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/cisco/enic/enic_clsf.h b/drivers/net/ethernet/cisco/enic/enic_clsf.h index 6aa9f89d073b..4bfbf25f9ddc 100644 --- a/drivers/net/ethernet/cisco/enic/enic_clsf.h +++ b/drivers/net/ethernet/cisco/enic/enic_clsf.h @@ -19,9 +19,8 @@ void enic_flow_may_expire(unsigned long data); static inline void enic_rfs_timer_start(struct enic *enic) { - init_timer(&enic->rfs_h.rfs_may_expire); - enic->rfs_h.rfs_may_expire.function = enic_flow_may_expire; - enic->rfs_h.rfs_may_expire.data = (unsigned long)enic; + setup_timer(&enic->rfs_h.rfs_may_expire, enic_flow_may_expire, + (unsigned long)enic); mod_timer(&enic->rfs_h.rfs_may_expire, jiffies + HZ/4); } -- cgit v1.2.3 From 66f06890305eb2c8200cefbc3d6405ff6baef47e Mon Sep 17 00:00:00 2001 From: Allen Pais Date: Thu, 21 Sep 2017 22:35:23 +0530 Subject: drivers: net: cxgb: use setup_timer() helper. Use setup_timer function instead of initializing timer with the function and data fields. Signed-off-by: Allen Pais Signed-off-by: David S. Miller --- drivers/net/ethernet/chelsio/cxgb/sge.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/chelsio/cxgb/sge.c b/drivers/net/ethernet/chelsio/cxgb/sge.c index 0f13a7f7c1d3..75e439918700 100644 --- a/drivers/net/ethernet/chelsio/cxgb/sge.c +++ b/drivers/net/ethernet/chelsio/cxgb/sge.c @@ -2075,9 +2075,8 @@ struct sge *t1_sge_create(struct adapter *adapter, struct sge_params *p) goto nomem_port; } - init_timer(&sge->tx_reclaim_timer); - sge->tx_reclaim_timer.data = (unsigned long)sge; - sge->tx_reclaim_timer.function = sge_tx_reclaim_cb; + setup_timer(&sge->tx_reclaim_timer, sge_tx_reclaim_cb, + (unsigned long)sge); if (is_T2(sge->adapter)) { init_timer(&sge->espibug_timer); -- cgit v1.2.3 From 804dea920b66fa5813278fc55eaa5b2ae39ab110 Mon Sep 17 00:00:00 2001 From: Allen Pais Date: Thu, 21 Sep 2017 22:35:24 +0530 Subject: drivers: net: bnx2x: use setup_timer() helper. Use setup_timer function instead of initializing timer with the function and data fields. Signed-off-by: Allen Pais Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c index c12b4d3e946e..54d1571384a0 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c @@ -12414,10 +12414,8 @@ static int bnx2x_init_bp(struct bnx2x *bp) bp->current_interval = CHIP_REV_IS_SLOW(bp) ? 5*HZ : HZ; - init_timer(&bp->timer); + setup_timer(&bp->timer, bnx2x_timer, (unsigned long)bp); bp->timer.expires = jiffies + bp->current_interval; - bp->timer.data = (unsigned long) bp; - bp->timer.function = bnx2x_timer; if (SHMEM2_HAS(bp, dcbx_lldp_params_offset) && SHMEM2_HAS(bp, dcbx_lldp_dcbx_stat_offset) && -- cgit v1.2.3 From 19569c88b938145e90cc7d6ab45d7fa0edd07beb Mon Sep 17 00:00:00 2001 From: Allen Pais Date: Thu, 21 Sep 2017 22:35:25 +0530 Subject: drivers: net: lmc: use setup_timer() helper. Use setup_timer function instead of initializing timer with the function and data fields. Signed-off-by: Allen Pais Signed-off-by: David S. Miller --- drivers/net/wan/lmc/lmc_main.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/net/wan/lmc/lmc_main.c b/drivers/net/wan/lmc/lmc_main.c index 4698450c77d1..ae69d65158e6 100644 --- a/drivers/net/wan/lmc/lmc_main.c +++ b/drivers/net/wan/lmc/lmc_main.c @@ -1084,10 +1084,8 @@ static int lmc_open(struct net_device *dev) * Setup a timer for the watchdog on probe, and start it running. * Since lmc_ok == 0, it will be a NOP for now. */ - init_timer (&sc->timer); + setup_timer(&sc->timer, lmc_watchdog, (unsigned long)dev); sc->timer.expires = jiffies + HZ; - sc->timer.data = (unsigned long) dev; - sc->timer.function = lmc_watchdog; add_timer (&sc->timer); lmc_trace(dev, "lmc_open out"); -- cgit v1.2.3 From 95ec66968571bf0af0a22effdc1b9d9e62ea6630 Mon Sep 17 00:00:00 2001 From: Joel Fernandes Date: Wed, 20 Sep 2017 09:11:56 -0700 Subject: samples/bpf: Use getppid instead of getpgrp for array map stress When cross-compiling the bpf sample map_perf_test for aarch64, I find that __NR_getpgrp is undefined. This causes build errors. This syscall is deprecated and requires defining __ARCH_WANT_SYSCALL_DEPRECATED. To avoid having to define that, just use a different syscall (getppid) for the array map stress test. Acked-by: Alexei Starovoitov Signed-off-by: Joel Fernandes Acked-by: Daniel Borkmann Signed-off-by: David S. Miller --- samples/bpf/map_perf_test_kern.c | 2 +- samples/bpf/map_perf_test_user.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/samples/bpf/map_perf_test_kern.c b/samples/bpf/map_perf_test_kern.c index 098c857f1eda..2b2ffb97018b 100644 --- a/samples/bpf/map_perf_test_kern.c +++ b/samples/bpf/map_perf_test_kern.c @@ -266,7 +266,7 @@ int stress_hash_map_lookup(struct pt_regs *ctx) return 0; } -SEC("kprobe/sys_getpgrp") +SEC("kprobe/sys_getppid") int stress_array_map_lookup(struct pt_regs *ctx) { u32 key = 1, i; diff --git a/samples/bpf/map_perf_test_user.c b/samples/bpf/map_perf_test_user.c index f388254896f6..a0310fc70057 100644 --- a/samples/bpf/map_perf_test_user.c +++ b/samples/bpf/map_perf_test_user.c @@ -282,7 +282,7 @@ static void test_array_lookup(int cpu) start_time = time_get_ns(); for (i = 0; i < max_cnt; i++) - syscall(__NR_getpgrp, 0); + syscall(__NR_getppid, 0); printf("%d:array_lookup %lld lookups per sec\n", cpu, max_cnt * 1000000000ll * 64 / (time_get_ns() - start_time)); } -- cgit v1.2.3 From 876e88e3273e300895e308bd660c6cfaabb03cd5 Mon Sep 17 00:00:00 2001 From: Joel Fernandes Date: Wed, 20 Sep 2017 09:11:57 -0700 Subject: samples/bpf: Enable cross compiler support When cross compiling, bpf samples use HOSTCC for compiling the non-BPF part of the sample, however what we really want is to use the cross compiler to build for the cross target since that is what will load and run the BPF sample. Detect this and compile samples correctly. Acked-by: Alexei Starovoitov Signed-off-by: Joel Fernandes Acked-by: Daniel Borkmann Signed-off-by: David S. Miller --- samples/bpf/Makefile | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/samples/bpf/Makefile b/samples/bpf/Makefile index cf17c7932a6e..13f74b67ca44 100644 --- a/samples/bpf/Makefile +++ b/samples/bpf/Makefile @@ -177,6 +177,11 @@ HOSTLOADLIBES_syscall_tp += -lelf LLC ?= llc CLANG ?= clang +# Detect that we're cross compiling and use the cross compiler +ifdef CROSS_COMPILE +HOSTCC = $(CROSS_COMPILE)gcc +endif + # Trick to allow make to be run from this directory all: $(MAKE) -C ../../ $(CURDIR)/ -- cgit v1.2.3 From b655fc1c2ee1c2b2d07c0b6f432798b91202718c Mon Sep 17 00:00:00 2001 From: Joel Fernandes Date: Wed, 20 Sep 2017 09:11:58 -0700 Subject: samples/bpf: Fix pt_regs issues when cross-compiling BPF samples fail to build when cross-compiling for ARM64 because of incorrect pt_regs param selection. This is because clang defines __x86_64__ and bpf_headers thinks we're building for x86. Since clang is building for the BPF target, it shouldn't make assumptions about what target the BPF program is going to run on. To fix this, lets pass ARCH so the header knows which target the BPF program is being compiled for and can use the correct pt_regs code. Acked-by: Alexei Starovoitov Signed-off-by: Joel Fernandes Acked-by: Daniel Borkmann Signed-off-by: David S. Miller --- samples/bpf/Makefile | 2 +- tools/testing/selftests/bpf/bpf_helpers.h | 56 +++++++++++++++++++++++++++---- 2 files changed, 50 insertions(+), 8 deletions(-) diff --git a/samples/bpf/Makefile b/samples/bpf/Makefile index 13f74b67ca44..ebc2ad69b62c 100644 --- a/samples/bpf/Makefile +++ b/samples/bpf/Makefile @@ -230,7 +230,7 @@ $(obj)/%.o: $(src)/%.c $(CLANG) $(NOSTDINC_FLAGS) $(LINUXINCLUDE) $(EXTRA_CFLAGS) -I$(obj) \ -I$(srctree)/tools/testing/selftests/bpf/ \ -D__KERNEL__ -D__ASM_SYSREG_H -Wno-unused-value -Wno-pointer-sign \ - -Wno-compare-distinct-pointer-types \ + -D__TARGET_ARCH_$(ARCH) -Wno-compare-distinct-pointer-types \ -Wno-gnu-variable-sized-type-not-at-end \ -Wno-address-of-packed-member -Wno-tautological-compare \ -Wno-unknown-warning-option \ diff --git a/tools/testing/selftests/bpf/bpf_helpers.h b/tools/testing/selftests/bpf/bpf_helpers.h index 36fb9161b34a..4875395b0b52 100644 --- a/tools/testing/selftests/bpf/bpf_helpers.h +++ b/tools/testing/selftests/bpf/bpf_helpers.h @@ -109,7 +109,47 @@ static int (*bpf_skb_under_cgroup)(void *ctx, void *map, int index) = static int (*bpf_skb_change_head)(void *, int len, int flags) = (void *) BPF_FUNC_skb_change_head; +/* Scan the ARCH passed in from ARCH env variable (see Makefile) */ +#if defined(__TARGET_ARCH_x86) + #define bpf_target_x86 + #define bpf_target_defined +#elif defined(__TARGET_ARCH_s930x) + #define bpf_target_s930x + #define bpf_target_defined +#elif defined(__TARGET_ARCH_arm64) + #define bpf_target_arm64 + #define bpf_target_defined +#elif defined(__TARGET_ARCH_mips) + #define bpf_target_mips + #define bpf_target_defined +#elif defined(__TARGET_ARCH_powerpc) + #define bpf_target_powerpc + #define bpf_target_defined +#elif defined(__TARGET_ARCH_sparc) + #define bpf_target_sparc + #define bpf_target_defined +#else + #undef bpf_target_defined +#endif + +/* Fall back to what the compiler says */ +#ifndef bpf_target_defined #if defined(__x86_64__) + #define bpf_target_x86 +#elif defined(__s390x__) + #define bpf_target_s930x +#elif defined(__aarch64__) + #define bpf_target_arm64 +#elif defined(__mips__) + #define bpf_target_mips +#elif defined(__powerpc__) + #define bpf_target_powerpc +#elif defined(__sparc__) + #define bpf_target_sparc +#endif +#endif + +#if defined(bpf_target_x86) #define PT_REGS_PARM1(x) ((x)->di) #define PT_REGS_PARM2(x) ((x)->si) @@ -122,7 +162,7 @@ static int (*bpf_skb_change_head)(void *, int len, int flags) = #define PT_REGS_SP(x) ((x)->sp) #define PT_REGS_IP(x) ((x)->ip) -#elif defined(__s390x__) +#elif defined(bpf_target_s390x) #define PT_REGS_PARM1(x) ((x)->gprs[2]) #define PT_REGS_PARM2(x) ((x)->gprs[3]) @@ -135,7 +175,7 @@ static int (*bpf_skb_change_head)(void *, int len, int flags) = #define PT_REGS_SP(x) ((x)->gprs[15]) #define PT_REGS_IP(x) ((x)->psw.addr) -#elif defined(__aarch64__) +#elif defined(bpf_target_arm64) #define PT_REGS_PARM1(x) ((x)->regs[0]) #define PT_REGS_PARM2(x) ((x)->regs[1]) @@ -148,7 +188,7 @@ static int (*bpf_skb_change_head)(void *, int len, int flags) = #define PT_REGS_SP(x) ((x)->sp) #define PT_REGS_IP(x) ((x)->pc) -#elif defined(__mips__) +#elif defined(bpf_target_mips) #define PT_REGS_PARM1(x) ((x)->regs[4]) #define PT_REGS_PARM2(x) ((x)->regs[5]) @@ -161,7 +201,7 @@ static int (*bpf_skb_change_head)(void *, int len, int flags) = #define PT_REGS_SP(x) ((x)->regs[29]) #define PT_REGS_IP(x) ((x)->cp0_epc) -#elif defined(__powerpc__) +#elif defined(bpf_target_powerpc) #define PT_REGS_PARM1(x) ((x)->gpr[3]) #define PT_REGS_PARM2(x) ((x)->gpr[4]) @@ -172,7 +212,7 @@ static int (*bpf_skb_change_head)(void *, int len, int flags) = #define PT_REGS_SP(x) ((x)->sp) #define PT_REGS_IP(x) ((x)->nip) -#elif defined(__sparc__) +#elif defined(bpf_target_sparc) #define PT_REGS_PARM1(x) ((x)->u_regs[UREG_I0]) #define PT_REGS_PARM2(x) ((x)->u_regs[UREG_I1]) @@ -182,6 +222,8 @@ static int (*bpf_skb_change_head)(void *, int len, int flags) = #define PT_REGS_RET(x) ((x)->u_regs[UREG_I7]) #define PT_REGS_RC(x) ((x)->u_regs[UREG_I0]) #define PT_REGS_SP(x) ((x)->u_regs[UREG_FP]) + +/* Should this also be a bpf_target check for the sparc case? */ #if defined(__arch64__) #define PT_REGS_IP(x) ((x)->tpc) #else @@ -190,10 +232,10 @@ static int (*bpf_skb_change_head)(void *, int len, int flags) = #endif -#ifdef __powerpc__ +#ifdef bpf_target_powerpc #define BPF_KPROBE_READ_RET_IP(ip, ctx) ({ (ip) = (ctx)->link; }) #define BPF_KRETPROBE_READ_RET_IP BPF_KPROBE_READ_RET_IP -#elif defined(__sparc__) +#elif bpf_target_sparc #define BPF_KPROBE_READ_RET_IP(ip, ctx) ({ (ip) = PT_REGS_RET(ctx); }) #define BPF_KRETPROBE_READ_RET_IP BPF_KPROBE_READ_RET_IP #else -- cgit v1.2.3 From 8bf2ac25a96c69985a1a9fbbad7da22ae4343a38 Mon Sep 17 00:00:00 2001 From: Joel Fernandes Date: Wed, 20 Sep 2017 09:11:59 -0700 Subject: samples/bpf: Add documentation on cross compilation Acked-by: Alexei Starovoitov Signed-off-by: Joel Fernandes Acked-by: Daniel Borkmann Signed-off-by: David S. Miller --- samples/bpf/README.rst | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/samples/bpf/README.rst b/samples/bpf/README.rst index 79f9a58f1872..5f27e4faca50 100644 --- a/samples/bpf/README.rst +++ b/samples/bpf/README.rst @@ -64,3 +64,13 @@ It is also possible to point make to the newly compiled 'llc' or 'clang' command via redefining LLC or CLANG on the make command line:: make samples/bpf/ LLC=~/git/llvm/build/bin/llc CLANG=~/git/llvm/build/bin/clang + +Cross compiling samples +----------------------- +In order to cross-compile, say for arm64 targets, export CROSS_COMPILE and ARCH +environment variables before calling make. This will direct make to build +samples for the cross target. + +export ARCH=arm64 +export CROSS_COMPILE="aarch64-linux-gnu-" +make samples/bpf/ LLC=~/git/llvm/build/bin/llc CLANG=~/git/llvm/build/bin/clang -- cgit v1.2.3 From 6e617de84e87d626d1e976fc30e1322239fd4d2d Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Wed, 20 Sep 2017 18:26:53 +0200 Subject: net: avoid a full fib lookup when rp_filter is disabled. Since commit 1dced6a85482 ("ipv4: Restore accept_local behaviour in fib_validate_source()") a full fib lookup is needed even if the rp_filter is disabled, if accept_local is false - which is the default. What we really need in the above scenario is just checking that the source IP address is not local, and in most case we can do that is a cheaper way looking up the ifaddr hash table. This commit adds a helper for such lookup, and uses it to validate the src address when rp_filter is disabled and no 'local' routes are created by the user space in the relevant namespace. A new ipv4 netns flag is added to account for such routes. We need that to preserve the same behavior we had before this patch. It also drops the checks to bail early from __fib_validate_source, added by the commit 1dced6a85482 ("ipv4: Restore accept_local behaviour in fib_validate_source()") they do not give any measurable performance improvement: if we do the lookup with are on a slower path. This improves UDP performances for unconnected sockets when rp_filter is disabled by 5% and also gives small but measurable performance improvement for TCP flood scenarios. v1 -> v2: - use the ifaddr lookup helper in __ip_dev_find(), as suggested by Eric - fall-back to full lookup if custom local routes are present Signed-off-by: Paolo Abeni Signed-off-by: David S. Miller --- include/linux/inetdevice.h | 1 + include/net/netns/ipv4.h | 1 + net/ipv4/devinet.c | 30 ++++++++++++++++++------------ net/ipv4/fib_frontend.c | 22 +++++++++++++++++----- 4 files changed, 37 insertions(+), 17 deletions(-) diff --git a/include/linux/inetdevice.h b/include/linux/inetdevice.h index fb3f809e34e4..751d051f0bc7 100644 --- a/include/linux/inetdevice.h +++ b/include/linux/inetdevice.h @@ -179,6 +179,7 @@ __be32 inet_confirm_addr(struct net *net, struct in_device *in_dev, __be32 dst, __be32 local, int scope); struct in_ifaddr *inet_ifa_byprefix(struct in_device *in_dev, __be32 prefix, __be32 mask); +struct in_ifaddr *inet_lookup_ifaddr_rcu(struct net *net, __be32 addr); static __inline__ bool inet_ifa_match(__be32 addr, struct in_ifaddr *ifa) { return !((addr^ifa->ifa_address)&ifa->ifa_mask); diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h index 20d061c805e3..20720721da4b 100644 --- a/include/net/netns/ipv4.h +++ b/include/net/netns/ipv4.h @@ -49,6 +49,7 @@ struct netns_ipv4 { #ifdef CONFIG_IP_MULTIPLE_TABLES struct fib_rules_ops *rules_ops; bool fib_has_custom_rules; + bool fib_has_custom_local_routes; struct fib_table __rcu *fib_main; struct fib_table __rcu *fib_default; #endif diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index d7adc0616599..7ce22a2c07ce 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c @@ -137,22 +137,12 @@ static void inet_hash_remove(struct in_ifaddr *ifa) */ struct net_device *__ip_dev_find(struct net *net, __be32 addr, bool devref) { - u32 hash = inet_addr_hash(net, addr); struct net_device *result = NULL; struct in_ifaddr *ifa; rcu_read_lock(); - hlist_for_each_entry_rcu(ifa, &inet_addr_lst[hash], hash) { - if (ifa->ifa_local == addr) { - struct net_device *dev = ifa->ifa_dev->dev; - - if (!net_eq(dev_net(dev), net)) - continue; - result = dev; - break; - } - } - if (!result) { + ifa = inet_lookup_ifaddr_rcu(net, addr); + if (!ifa) { struct flowi4 fl4 = { .daddr = addr }; struct fib_result res = { 0 }; struct fib_table *local; @@ -165,6 +155,8 @@ struct net_device *__ip_dev_find(struct net *net, __be32 addr, bool devref) !fib_table_lookup(local, &fl4, &res, FIB_LOOKUP_NOREF) && res.type == RTN_LOCAL) result = FIB_RES_DEV(res); + } else { + result = ifa->ifa_dev->dev; } if (result && devref) dev_hold(result); @@ -173,6 +165,20 @@ struct net_device *__ip_dev_find(struct net *net, __be32 addr, bool devref) } EXPORT_SYMBOL(__ip_dev_find); +/* called under RCU lock */ +struct in_ifaddr *inet_lookup_ifaddr_rcu(struct net *net, __be32 addr) +{ + u32 hash = inet_addr_hash(net, addr); + struct in_ifaddr *ifa; + + hlist_for_each_entry_rcu(ifa, &inet_addr_lst[hash], hash) + if (ifa->ifa_local == addr && + net_eq(dev_net(ifa->ifa_dev->dev), net)) + return ifa; + + return NULL; +} + static void rtmsg_ifa(int event, struct in_ifaddr *, struct nlmsghdr *, u32); static BLOCKING_NOTIFIER_HEAD(inetaddr_chain); diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index 37819ab4cc74..f02819134ba2 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -345,9 +345,6 @@ static int __fib_validate_source(struct sk_buff *skb, __be32 src, __be32 dst, if (res.type != RTN_UNICAST && (res.type != RTN_LOCAL || !IN_DEV_ACCEPT_LOCAL(idev))) goto e_inval; - if (!rpf && !fib_num_tclassid_users(net) && - (dev->ifindex != oif || !IN_DEV_TX_REDIRECTS(idev))) - goto last_resort; fib_combine_itag(itag, &res); dev_match = false; @@ -402,13 +399,26 @@ int fib_validate_source(struct sk_buff *skb, __be32 src, __be32 dst, struct in_device *idev, u32 *itag) { int r = secpath_exists(skb) ? 0 : IN_DEV_RPFILTER(idev); + struct net *net = dev_net(dev); - if (!r && !fib_num_tclassid_users(dev_net(dev)) && - IN_DEV_ACCEPT_LOCAL(idev) && + if (!r && !fib_num_tclassid_users(net) && (dev->ifindex != oif || !IN_DEV_TX_REDIRECTS(idev))) { + if (IN_DEV_ACCEPT_LOCAL(idev)) + goto ok; + /* if no local routes are added from user space we can check + * for local addresses looking-up the ifaddr table + */ + if (net->ipv4.fib_has_custom_local_routes) + goto full_check; + if (inet_lookup_ifaddr_rcu(net, src)) + return -EINVAL; + +ok: *itag = 0; return 0; } + +full_check: return __fib_validate_source(skb, src, dst, tos, oif, dev, r, idev, itag); } @@ -759,6 +769,8 @@ static int inet_rtm_newroute(struct sk_buff *skb, struct nlmsghdr *nlh, } err = fib_table_insert(net, tb, &cfg, extack); + if (!err && cfg.fc_type == RTN_LOCAL) + net->ipv4.fib_has_custom_local_routes = true; errout: return err; } -- cgit v1.2.3 From 0abfd494deefdbab66ac03c1181a614285e7d90c Mon Sep 17 00:00:00 2001 From: Vivien Didelot Date: Wed, 20 Sep 2017 12:28:05 -0400 Subject: net: dsa: use dedicated CPU port Each port in DSA has its own dedicated CPU port currently available in its parent switch's ds->ports[port].cpu_dp. Use it instead of getting the unique tree CPU port, which will be deprecated soon. Signed-off-by: Vivien Didelot Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller --- drivers/net/dsa/b53/b53_common.c | 4 ++-- drivers/net/dsa/bcm_sf2.c | 6 +++--- drivers/net/dsa/mt7530.c | 4 ++-- drivers/net/dsa/mv88e6060.c | 2 +- drivers/net/dsa/qca8k.c | 2 +- 5 files changed, 9 insertions(+), 9 deletions(-) diff --git a/drivers/net/dsa/b53/b53_common.c b/drivers/net/dsa/b53/b53_common.c index a9f2a5b55a5e..d4ce092def83 100644 --- a/drivers/net/dsa/b53/b53_common.c +++ b/drivers/net/dsa/b53/b53_common.c @@ -1336,7 +1336,7 @@ EXPORT_SYMBOL(b53_fdb_dump); int b53_br_join(struct dsa_switch *ds, int port, struct net_device *br) { struct b53_device *dev = ds->priv; - s8 cpu_port = ds->dst->cpu_dp->index; + s8 cpu_port = ds->ports[port].cpu_dp->index; u16 pvlan, reg; unsigned int i; @@ -1382,7 +1382,7 @@ void b53_br_leave(struct dsa_switch *ds, int port, struct net_device *br) { struct b53_device *dev = ds->priv; struct b53_vlan *vl = &dev->vlans[0]; - s8 cpu_port = ds->dst->cpu_dp->index; + s8 cpu_port = ds->ports[port].cpu_dp->index; unsigned int i; u16 pvlan, reg, pvid; diff --git a/drivers/net/dsa/bcm_sf2.c b/drivers/net/dsa/bcm_sf2.c index 0072a959db5b..898d5642b516 100644 --- a/drivers/net/dsa/bcm_sf2.c +++ b/drivers/net/dsa/bcm_sf2.c @@ -661,7 +661,7 @@ static int bcm_sf2_sw_resume(struct dsa_switch *ds) static void bcm_sf2_sw_get_wol(struct dsa_switch *ds, int port, struct ethtool_wolinfo *wol) { - struct net_device *p = ds->dst->cpu_dp->netdev; + struct net_device *p = ds->ports[port].cpu_dp->netdev; struct bcm_sf2_priv *priv = bcm_sf2_to_priv(ds); struct ethtool_wolinfo pwol; @@ -684,9 +684,9 @@ static void bcm_sf2_sw_get_wol(struct dsa_switch *ds, int port, static int bcm_sf2_sw_set_wol(struct dsa_switch *ds, int port, struct ethtool_wolinfo *wol) { - struct net_device *p = ds->dst->cpu_dp->netdev; + struct net_device *p = ds->ports[port].cpu_dp->netdev; struct bcm_sf2_priv *priv = bcm_sf2_to_priv(ds); - s8 cpu_port = ds->dst->cpu_dp->index; + s8 cpu_port = ds->ports[port].cpu_dp->index; struct ethtool_wolinfo pwol; p->ethtool_ops->get_wol(p, &pwol); diff --git a/drivers/net/dsa/mt7530.c b/drivers/net/dsa/mt7530.c index c142b97add2c..faa3b88d2206 100644 --- a/drivers/net/dsa/mt7530.c +++ b/drivers/net/dsa/mt7530.c @@ -928,11 +928,11 @@ mt7530_setup(struct dsa_switch *ds) struct device_node *dn; struct mt7530_dummy_poll p; - /* The parent node of cpu_dp->netdev which holds the common system + /* The parent node of master netdev which holds the common system * controller also is the container for two GMACs nodes representing * as two netdev instances. */ - dn = ds->dst->cpu_dp->netdev->dev.of_node->parent; + dn = ds->ports[MT7530_CPU_PORT].netdev->dev.of_node->parent; priv->ethernet = syscon_node_to_regmap(dn); if (IS_ERR(priv->ethernet)) return PTR_ERR(priv->ethernet); diff --git a/drivers/net/dsa/mv88e6060.c b/drivers/net/dsa/mv88e6060.c index dce7fa57eb55..621cdc46ad81 100644 --- a/drivers/net/dsa/mv88e6060.c +++ b/drivers/net/dsa/mv88e6060.c @@ -176,7 +176,7 @@ static int mv88e6060_setup_port(struct dsa_switch *ds, int p) ((p & 0xf) << PORT_VLAN_MAP_DBNUM_SHIFT) | (dsa_is_cpu_port(ds, p) ? ds->enabled_port_mask : - BIT(ds->dst->cpu_dp->index))); + BIT(ds->ports[p].cpu_dp->index))); /* Port Association Vector: when learning source addresses * of packets, add the address to the address database using diff --git a/drivers/net/dsa/qca8k.c b/drivers/net/dsa/qca8k.c index 5ada7a41449c..82f09711ac1a 100644 --- a/drivers/net/dsa/qca8k.c +++ b/drivers/net/dsa/qca8k.c @@ -506,7 +506,7 @@ qca8k_setup(struct dsa_switch *ds) pr_warn("regmap initialization failed"); /* Initialize CPU port pad mode (xMII type, delays...) */ - phy_mode = of_get_phy_mode(ds->dst->cpu_dp->dn); + phy_mode = of_get_phy_mode(ds->ports[QCA8K_CPU_PORT].dn); if (phy_mode < 0) { pr_err("Can't find phy-mode for master device\n"); return phy_mode; -- cgit v1.2.3 From 4fa7b718881a5358286903b796968cea80993820 Mon Sep 17 00:00:00 2001 From: Vivien Didelot Date: Wed, 20 Sep 2017 19:31:57 -0400 Subject: net: dsa: better scoping of slave functions A few DSA slave functions take a dsa_slave_priv pointer as first argument, whereas the scope of the slave.c functions is the slave net_device structure. Fix this and rename dsa_netpoll_send_skb to dsa_slave_netpoll_send_skb. Signed-off-by: Vivien Didelot Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller --- net/dsa/slave.c | 29 +++++++++++++++-------------- 1 file changed, 15 insertions(+), 14 deletions(-) diff --git a/net/dsa/slave.c b/net/dsa/slave.c index 6fc9eb094267..4a98942a6135 100644 --- a/net/dsa/slave.c +++ b/net/dsa/slave.c @@ -385,10 +385,12 @@ static int dsa_slave_port_attr_get(struct net_device *dev, return 0; } -static inline netdev_tx_t dsa_netpoll_send_skb(struct dsa_slave_priv *p, - struct sk_buff *skb) +static inline netdev_tx_t dsa_slave_netpoll_send_skb(struct net_device *dev, + struct sk_buff *skb) { #ifdef CONFIG_NET_POLL_CONTROLLER + struct dsa_slave_priv *p = netdev_priv(dev); + if (p->netpoll) netpoll_send_skb(p->netpoll, skb); #else @@ -422,7 +424,7 @@ static netdev_tx_t dsa_slave_xmit(struct sk_buff *skb, struct net_device *dev) * tag to be successfully transmitted */ if (unlikely(netpoll_tx_running(dev))) - return dsa_netpoll_send_skb(p, nskb); + return dsa_slave_netpoll_send_skb(dev, nskb); /* Queue the SKB for transmission on the parent interface, but * do not modify its EtherType @@ -736,9 +738,9 @@ static int dsa_slave_get_phys_port_name(struct net_device *dev, } static struct dsa_mall_tc_entry * -dsa_slave_mall_tc_entry_find(struct dsa_slave_priv *p, - unsigned long cookie) +dsa_slave_mall_tc_entry_find(struct net_device *dev, unsigned long cookie) { + struct dsa_slave_priv *p = netdev_priv(dev); struct dsa_mall_tc_entry *mall_tc_entry; list_for_each_entry(mall_tc_entry, &p->mall_tc_list, list) @@ -820,7 +822,7 @@ static void dsa_slave_del_cls_matchall(struct net_device *dev, if (!ds->ops->port_mirror_del) return; - mall_tc_entry = dsa_slave_mall_tc_entry_find(p, cls->cookie); + mall_tc_entry = dsa_slave_mall_tc_entry_find(dev, cls->cookie); if (!mall_tc_entry) return; @@ -1027,10 +1029,9 @@ static int dsa_slave_fixed_link_update(struct net_device *dev, } /* slave device setup *******************************************************/ -static int dsa_slave_phy_connect(struct dsa_slave_priv *p, - struct net_device *slave_dev, - int addr) +static int dsa_slave_phy_connect(struct net_device *slave_dev, int addr) { + struct dsa_slave_priv *p = netdev_priv(slave_dev); struct dsa_switch *ds = p->dp->ds; p->phy = mdiobus_get_phy(ds->slave_mii_bus, addr); @@ -1046,9 +1047,9 @@ static int dsa_slave_phy_connect(struct dsa_slave_priv *p, p->phy_interface); } -static int dsa_slave_phy_setup(struct dsa_slave_priv *p, - struct net_device *slave_dev) +static int dsa_slave_phy_setup(struct net_device *slave_dev) { + struct dsa_slave_priv *p = netdev_priv(slave_dev); struct dsa_switch *ds = p->dp->ds; struct device_node *phy_dn, *port_dn; bool phy_is_fixed = false; @@ -1088,7 +1089,7 @@ static int dsa_slave_phy_setup(struct dsa_slave_priv *p, */ if (!phy_is_fixed && phy_id >= 0 && (ds->phys_mii_mask & (1 << phy_id))) { - ret = dsa_slave_phy_connect(p, slave_dev, phy_id); + ret = dsa_slave_phy_connect(slave_dev, phy_id); if (ret) { netdev_err(slave_dev, "failed to connect to phy%d: %d\n", phy_id, ret); of_node_put(phy_dn); @@ -1111,7 +1112,7 @@ static int dsa_slave_phy_setup(struct dsa_slave_priv *p, * MDIO bus instead */ if (!p->phy) { - ret = dsa_slave_phy_connect(p, slave_dev, p->dp->index); + ret = dsa_slave_phy_connect(slave_dev, p->dp->index); if (ret) { netdev_err(slave_dev, "failed to connect to port %d: %d\n", p->dp->index, ret); @@ -1233,7 +1234,7 @@ int dsa_slave_create(struct dsa_port *port, const char *name) netif_carrier_off(slave_dev); - ret = dsa_slave_phy_setup(p, slave_dev); + ret = dsa_slave_phy_setup(slave_dev); if (ret) { netdev_err(master, "error %d setting up slave phy\n", ret); unregister_netdev(slave_dev); -- cgit v1.2.3 From de40fc5d210f2c31f2c25a9b920861276a71b70d Mon Sep 17 00:00:00 2001 From: Vivien Didelot Date: Wed, 20 Sep 2017 19:32:14 -0400 Subject: net: dsa: add port fdb dump Dumping a DSA port's FDB entries is not specific to a DSA slave, so add a dsa_port_fdb_dump function, similarly to dsa_port_fdb_add and dsa_port_fdb_del. Signed-off-by: Vivien Didelot Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller --- net/dsa/dsa_priv.h | 1 + net/dsa/port.c | 11 +++++++++++ net/dsa/slave.c | 9 ++------- 3 files changed, 14 insertions(+), 7 deletions(-) diff --git a/net/dsa/dsa_priv.h b/net/dsa/dsa_priv.h index f616b3444418..9803952a5b40 100644 --- a/net/dsa/dsa_priv.h +++ b/net/dsa/dsa_priv.h @@ -128,6 +128,7 @@ int dsa_port_fdb_add(struct dsa_port *dp, const unsigned char *addr, u16 vid); int dsa_port_fdb_del(struct dsa_port *dp, const unsigned char *addr, u16 vid); +int dsa_port_fdb_dump(struct dsa_port *dp, dsa_fdb_dump_cb_t *cb, void *data); int dsa_port_mdb_add(struct dsa_port *dp, const struct switchdev_obj_port_mdb *mdb, struct switchdev_trans *trans); diff --git a/net/dsa/port.c b/net/dsa/port.c index 659676ba3f8b..76d43a82d397 100644 --- a/net/dsa/port.c +++ b/net/dsa/port.c @@ -173,6 +173,17 @@ int dsa_port_fdb_del(struct dsa_port *dp, const unsigned char *addr, return dsa_port_notify(dp, DSA_NOTIFIER_FDB_DEL, &info); } +int dsa_port_fdb_dump(struct dsa_port *dp, dsa_fdb_dump_cb_t *cb, void *data) +{ + struct dsa_switch *ds = dp->ds; + int port = dp->index; + + if (!ds->ops->port_fdb_dump) + return -EOPNOTSUPP; + + return ds->ops->port_fdb_dump(ds, port, cb, data); +} + int dsa_port_mdb_add(struct dsa_port *dp, const struct switchdev_obj_port_mdb *mdb, struct switchdev_trans *trans) diff --git a/net/dsa/slave.c b/net/dsa/slave.c index 4a98942a6135..02ace7d462c4 100644 --- a/net/dsa/slave.c +++ b/net/dsa/slave.c @@ -263,16 +263,11 @@ dsa_slave_fdb_dump(struct sk_buff *skb, struct netlink_callback *cb, }; struct dsa_slave_priv *p = netdev_priv(dev); struct dsa_port *dp = p->dp; - struct dsa_switch *ds = dp->ds; int err; - if (!ds->ops->port_fdb_dump) - return -EOPNOTSUPP; - - err = ds->ops->port_fdb_dump(ds, dp->index, - dsa_slave_port_fdb_do_dump, - &dump); + err = dsa_port_fdb_dump(dp, dsa_slave_port_fdb_do_dump, &dump); *idx = dump.idx; + return err; } -- cgit v1.2.3 From b6cd4b5895848968e8fee93fc5e3dc8babc40b9e Mon Sep 17 00:00:00 2001 From: Thomas Meyer Date: Thu, 21 Sep 2017 08:15:26 +0200 Subject: e100: Cocci spatch "pool_zalloc-simple" Use *_pool_zalloc rather than *_pool_alloc followed by memset with 0. Found by coccinelle spatch "api/alloc/pool_zalloc-simple.cocci" Signed-off-by: Thomas Meyer Signed-off-by: David S. Miller --- drivers/net/ethernet/intel/e100.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/intel/e100.c b/drivers/net/ethernet/intel/e100.c index 4d10270ddf8f..184f11242f56 100644 --- a/drivers/net/ethernet/intel/e100.c +++ b/drivers/net/ethernet/intel/e100.c @@ -1910,11 +1910,10 @@ static int e100_alloc_cbs(struct nic *nic) nic->cb_to_use = nic->cb_to_send = nic->cb_to_clean = NULL; nic->cbs_avail = 0; - nic->cbs = pci_pool_alloc(nic->cbs_pool, GFP_KERNEL, - &nic->cbs_dma_addr); + nic->cbs = pci_pool_zalloc(nic->cbs_pool, GFP_KERNEL, + &nic->cbs_dma_addr); if (!nic->cbs) return -ENOMEM; - memset(nic->cbs, 0, count * sizeof(struct cb)); for (cb = nic->cbs, i = 0; i < count; cb++, i++) { cb->next = (i + 1 < count) ? cb + 1 : nic->cbs; -- cgit v1.2.3 From 51957bc53aa7ca60d63f1ba0d9e3d887562b1723 Mon Sep 17 00:00:00 2001 From: Ursula Braun Date: Thu, 21 Sep 2017 09:17:34 +0200 Subject: net/smc: parameter cleanup in smc_cdc_get_free_slot() Use the smc_connection as first parameter with smc_cdc_get_free_slot(). This is just a small code cleanup, no functional change. Signed-off-by: Ursula Braun Signed-off-by: David S. Miller --- net/smc/smc_cdc.c | 7 ++++--- net/smc/smc_cdc.h | 3 ++- net/smc/smc_tx.c | 6 ++---- 3 files changed, 8 insertions(+), 8 deletions(-) diff --git a/net/smc/smc_cdc.c b/net/smc/smc_cdc.c index a7294edbc221..5ef97e5a5f78 100644 --- a/net/smc/smc_cdc.c +++ b/net/smc/smc_cdc.c @@ -62,10 +62,12 @@ static void smc_cdc_tx_handler(struct smc_wr_tx_pend_priv *pnd_snd, bh_unlock_sock(&smc->sk); } -int smc_cdc_get_free_slot(struct smc_link *link, +int smc_cdc_get_free_slot(struct smc_connection *conn, struct smc_wr_buf **wr_buf, struct smc_cdc_tx_pend **pend) { + struct smc_link *link = &conn->lgr->lnk[SMC_SINGLE_LINK]; + return smc_wr_tx_get_free_slot(link, smc_cdc_tx_handler, wr_buf, (struct smc_wr_tx_pend_priv **)pend); } @@ -118,8 +120,7 @@ int smc_cdc_get_slot_and_msg_send(struct smc_connection *conn) struct smc_wr_buf *wr_buf; int rc; - rc = smc_cdc_get_free_slot(&conn->lgr->lnk[SMC_SINGLE_LINK], &wr_buf, - &pend); + rc = smc_cdc_get_free_slot(conn, &wr_buf, &pend); if (rc) return rc; diff --git a/net/smc/smc_cdc.h b/net/smc/smc_cdc.h index 8e1d76f26007..56f883d1159c 100644 --- a/net/smc/smc_cdc.h +++ b/net/smc/smc_cdc.h @@ -206,7 +206,8 @@ static inline void smc_cdc_msg_to_host(struct smc_host_cdc_msg *local, struct smc_cdc_tx_pend; -int smc_cdc_get_free_slot(struct smc_link *link, struct smc_wr_buf **wr_buf, +int smc_cdc_get_free_slot(struct smc_connection *conn, + struct smc_wr_buf **wr_buf, struct smc_cdc_tx_pend **pend); void smc_cdc_tx_dismiss_slots(struct smc_connection *conn); int smc_cdc_msg_send(struct smc_connection *conn, struct smc_wr_buf *wr_buf, diff --git a/net/smc/smc_tx.c b/net/smc/smc_tx.c index 3c656beb8820..e2228f6d1c25 100644 --- a/net/smc/smc_tx.c +++ b/net/smc/smc_tx.c @@ -394,8 +394,7 @@ int smc_tx_sndbuf_nonempty(struct smc_connection *conn) int rc; spin_lock_bh(&conn->send_lock); - rc = smc_cdc_get_free_slot(&conn->lgr->lnk[SMC_SINGLE_LINK], &wr_buf, - &pend); + rc = smc_cdc_get_free_slot(conn, &wr_buf, &pend); if (rc < 0) { if (rc == -EBUSY) { struct smc_sock *smc = @@ -463,8 +462,7 @@ void smc_tx_consumer_update(struct smc_connection *conn) ((to_confirm > conn->rmbe_update_limit) && ((to_confirm > (conn->rmbe_size / 2)) || conn->local_rx_ctrl.prod_flags.write_blocked))) { - rc = smc_cdc_get_free_slot(&conn->lgr->lnk[SMC_SINGLE_LINK], - &wr_buf, &pend); + rc = smc_cdc_get_free_slot(conn, &wr_buf, &pend); if (!rc) rc = smc_cdc_msg_send(conn, wr_buf, pend); if (rc < 0) { -- cgit v1.2.3 From e1f6198e221f472c03b88e352432a01076ec8647 Mon Sep 17 00:00:00 2001 From: Ganesh Goudar Date: Thu, 21 Sep 2017 12:50:47 +0530 Subject: cxgb4: avoid stall while shutting down the adapter do not wait for completion while deleting the filters when the adapter is shutting down because we may not get the response as interrupts will be disabled. Signed-off-by: Ganesh Goudar Signed-off-by: David S. Miller --- drivers/net/ethernet/chelsio/cxgb4/cxgb4.h | 1 + drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c | 7 ++++++- drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c | 4 ++++ 3 files changed, 11 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h index ea72d2d2e1b4..c4e997fdff64 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h @@ -549,6 +549,7 @@ enum { /* adapter flags */ MASTER_PF = (1 << 7), FW_OFLD_CONN = (1 << 9), ROOT_NO_RELAXED_ORDERING = (1 << 10), + SHUTTING_DOWN = (1 << 11), }; enum { diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c index 45b5853ca2f1..97ead2c66751 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c @@ -191,7 +191,8 @@ static int del_filter_wr(struct adapter *adapter, int fidx) return -ENOMEM; fwr = __skb_put(skb, len); - t4_mk_filtdelwr(f->tid, fwr, adapter->sge.fw_evtq.abs_id); + t4_mk_filtdelwr(f->tid, fwr, (adapter->flags & SHUTTING_DOWN) ? -1 + : adapter->sge.fw_evtq.abs_id); /* Mark the filter as "pending" and ship off the Filter Work Request. * When we get the Work Request Reply we'll clear the pending status. @@ -636,6 +637,10 @@ int cxgb4_del_filter(struct net_device *dev, int filter_id) struct filter_ctx ctx; int ret; + /* If we are shutting down the adapter do not wait for completion */ + if (netdev2adap(dev)->flags & SHUTTING_DOWN) + return __cxgb4_del_filter(dev, filter_id, NULL); + init_completion(&ctx.completion); ret = __cxgb4_del_filter(dev, filter_id, &ctx); diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c index 92d9d795d874..5fe81a4e26a6 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c @@ -5254,6 +5254,8 @@ static void remove_one(struct pci_dev *pdev) return; } + adapter->flags |= SHUTTING_DOWN; + if (adapter->pf == 4) { int i; @@ -5339,6 +5341,8 @@ static void shutdown_one(struct pci_dev *pdev) return; } + adapter->flags |= SHUTTING_DOWN; + if (adapter->pf == 4) { int i; -- cgit v1.2.3 From 8701352b22a2f82c814283131587e970a56b69a8 Mon Sep 17 00:00:00 2001 From: Vincent Bernat Date: Thu, 21 Sep 2017 12:05:25 +0200 Subject: bridge: trigger RTM_NEWLINK when interface is modified by bridge ioctl Currently, there is a difference in netlink events received when an interface is modified through bridge ioctl() or through netlink. This patch generates additional events when an interface is added to or removed from a bridge via ioctl(). When adding then removing an interface from a bridge with netlink, we get: 5: dummy1: mtu 1500 qdisc noqueue master bridge0 state UNKNOWN group default link/ether 9e:da:60:ee:cf:c8 brd ff:ff:ff:ff:ff:ff 5: dummy1: mtu 1500 master bridge0 state UNKNOWN link/ether 9e:da:60:ee:cf:c8 5: dummy1: mtu 1500 master bridge0 state UNKNOWN link/ether 9e:da:60:ee:cf:c8 5: dummy1: mtu 1500 master bridge0 state UNKNOWN link/ether 9e:da:60:ee:cf:c8 5: dummy1: mtu 1500 master bridge0 state UNKNOWN link/ether 9e:da:60:ee:cf:c8 5: dummy1: mtu 1500 qdisc noqueue master bridge0 state UNKNOWN group default link/ether 9e:da:60:ee:cf:c8 brd ff:ff:ff:ff:ff:ff 5: dummy1: mtu 1500 qdisc noqueue master bridge0 state UNKNOWN group default link/ether 9e:da:60:ee:cf:c8 brd ff:ff:ff:ff:ff:ff 5: dummy1: mtu 1500 master bridge0 state UNKNOWN link/ether 9e:da:60:ee:cf:c8 Deleted 5: dummy1: mtu 1500 master bridge0 state UNKNOWN link/ether 9e:da:60:ee:cf:c8 5: dummy1: mtu 1500 qdisc noqueue state UNKNOWN group default link/ether 9e:da:60:ee:cf:c8 brd ff:ff:ff:ff:ff:ff When using ioctl(): 5: dummy1: mtu 1500 qdisc noqueue master bridge0 state UNKNOWN group default link/ether 9e:da:60:ee:cf:c8 brd ff:ff:ff:ff:ff:ff 5: dummy1: mtu 1500 master bridge0 state UNKNOWN link/ether 9e:da:60:ee:cf:c8 5: dummy1: mtu 1500 master bridge0 state UNKNOWN link/ether 9e:da:60:ee:cf:c8 5: dummy1: mtu 1500 master bridge0 state UNKNOWN link/ether 9e:da:60:ee:cf:c8 5: dummy1: mtu 1500 qdisc noqueue master bridge0 state UNKNOWN group default link/ether 9e:da:60:ee:cf:c8 brd ff:ff:ff:ff:ff:ff 5: dummy1: mtu 1500 qdisc noqueue master bridge0 state UNKNOWN group default link/ether 9e:da:60:ee:cf:c8 brd ff:ff:ff:ff:ff:ff 5: dummy1: mtu 1500 master bridge0 state UNKNOWN link/ether 9e:da:60:ee:cf:c8 Deleted 5: dummy1: mtu 1500 master bridge0 state UNKNOWN link/ether 9e:da:60:ee:cf:c8 5: dummy1: mtu 1500 qdisc noqueue state UNKNOWN group default link/ether 9e:da:60:ee:cf:c8 brd ff:ff:ff:ff:ff:ff Without this patch, the last netlink notification is not sent. Signed-off-by: Vincent Bernat Reviewed-by: Stephen Hemminger Reviewed-by: David Ahern Signed-off-by: David S. Miller --- net/bridge/br_ioctl.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/bridge/br_ioctl.c b/net/bridge/br_ioctl.c index 7970f8540cbb..66cd98772051 100644 --- a/net/bridge/br_ioctl.c +++ b/net/bridge/br_ioctl.c @@ -102,6 +102,9 @@ static int add_del_if(struct net_bridge *br, int ifindex, int isadd) else ret = br_del_if(br, dev); + if (!ret) + rtmsg_ifinfo(RTM_NEWLINK, dev, IFF_MASTER, GFP_KERNEL); + return ret; } -- cgit v1.2.3 From 2ed343f98178ff232b504bbd006b34db07835082 Mon Sep 17 00:00:00 2001 From: Allen Pais Date: Thu, 21 Sep 2017 16:29:33 +0530 Subject: net:nfc: use setup_timer Use setup_timer function instead of initializing timer with the function and data fields. Signed-off-by: Allen Pais Signed-off-by: David S. Miller --- net/nfc/core.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/net/nfc/core.c b/net/nfc/core.c index 5cf33df888c3..e5e23c2cbe74 100644 --- a/net/nfc/core.c +++ b/net/nfc/core.c @@ -1094,9 +1094,8 @@ struct nfc_dev *nfc_allocate_device(struct nfc_ops *ops, dev->targets_generation = 1; if (ops->check_presence) { - init_timer(&dev->check_pres_timer); - dev->check_pres_timer.data = (unsigned long)dev; - dev->check_pres_timer.function = nfc_check_pres_timeout; + setup_timer(&dev->check_pres_timer, nfc_check_pres_timeout, + (unsigned long)dev); INIT_WORK(&dev->check_pres_work, nfc_check_pres_work); } -- cgit v1.2.3 From 802be571348681a8ee052850e47b9652de7e05d4 Mon Sep 17 00:00:00 2001 From: Allen Pais Date: Thu, 21 Sep 2017 18:17:55 +0530 Subject: net: wan : hdlc: use setup_timer() helper Use setup_timer function instead of initializing timer with the function and data fields. Signed-off-by: Allen Pais Signed-off-by: David S. Miller --- drivers/net/wan/hdlc_fr.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/net/wan/hdlc_fr.c b/drivers/net/wan/hdlc_fr.c index 78596e42a3f3..425a47ffed25 100644 --- a/drivers/net/wan/hdlc_fr.c +++ b/drivers/net/wan/hdlc_fr.c @@ -1003,11 +1003,10 @@ static void fr_start(struct net_device *dev) state(hdlc)->n391cnt = 0; state(hdlc)->txseq = state(hdlc)->rxseq = 0; - init_timer(&state(hdlc)->timer); + setup_timer(&state(hdlc)->timer, fr_timer, + (unsigned long)dev); /* First poll after 1 s */ state(hdlc)->timer.expires = jiffies + HZ; - state(hdlc)->timer.function = fr_timer; - state(hdlc)->timer.data = (unsigned long)dev; add_timer(&state(hdlc)->timer); } else fr_set_link_state(1, dev); -- cgit v1.2.3 From e53a84b2b47119cbb895a4a83e6675ec50cf9fe5 Mon Sep 17 00:00:00 2001 From: Allen Pais Date: Thu, 21 Sep 2017 18:24:15 +0530 Subject: net: usb: catc: use setup_timer() helper Use setup_timer function instead of initializing timer with the function and data fields. Signed-off-by: Allen Pais Signed-off-by: David S. Miller --- drivers/net/usb/catc.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/net/usb/catc.c b/drivers/net/usb/catc.c index dbc90313f472..aeb62e17d19d 100644 --- a/drivers/net/usb/catc.c +++ b/drivers/net/usb/catc.c @@ -805,9 +805,7 @@ static int catc_probe(struct usb_interface *intf, const struct usb_device_id *id spin_lock_init(&catc->tx_lock); spin_lock_init(&catc->ctrl_lock); - init_timer(&catc->timer); - catc->timer.data = (long) catc; - catc->timer.function = catc_stats_timer; + setup_timer(&catc->timer, catc_stats_timer, (long)catc); catc->ctrl_urb = usb_alloc_urb(0, GFP_KERNEL); catc->tx_urb = usb_alloc_urb(0, GFP_KERNEL); -- cgit v1.2.3 From 8447779637172809060e5064afdf52f16a09aa13 Mon Sep 17 00:00:00 2001 From: Allen Pais Date: Thu, 21 Sep 2017 18:32:58 +0530 Subject: net: ti: netcp: use setup_timer Use setup_timer function instead of initializing timer with the function and data fields. Signed-off-by: Allen Pais Signed-off-by: David S. Miller --- drivers/net/ethernet/ti/netcp_ethss.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/ti/netcp_ethss.c b/drivers/net/ethernet/ti/netcp_ethss.c index 28cb38af1a34..4ad821655e51 100644 --- a/drivers/net/ethernet/ti/netcp_ethss.c +++ b/drivers/net/ethernet/ti/netcp_ethss.c @@ -3616,9 +3616,8 @@ static int gbe_probe(struct netcp_device *netcp_device, struct device *dev, } spin_unlock_bh(&gbe_dev->hw_stats_lock); - init_timer(&gbe_dev->timer); - gbe_dev->timer.data = (unsigned long)gbe_dev; - gbe_dev->timer.function = netcp_ethss_timer; + setup_timer(&gbe_dev->timer, netcp_ethss_timer, + (unsigned long)gbe_dev); gbe_dev->timer.expires = jiffies + GBE_TIMER_INTERVAL; add_timer(&gbe_dev->timer); *inst_priv = gbe_dev; -- cgit v1.2.3 From a1f3316dd7b5ce740c774697c664e2e60d095794 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Thu, 21 Sep 2017 18:18:23 -0700 Subject: ipv4: Move fib_has_custom_local_routes outside of IP_MULTIPLE_TABLES. > net/ipv4/fib_frontend.c: In function 'fib_validate_source': > net/ipv4/fib_frontend.c:411:16: error: 'struct netns_ipv4' has no member named 'fib_has_custom_local_routes' > if (net->ipv4.fib_has_custom_local_routes) > ^ > net/ipv4/fib_frontend.c: In function 'inet_rtm_newroute': > net/ipv4/fib_frontend.c:773:12: error: 'struct netns_ipv4' has no member named 'fib_has_custom_local_routes' > net->ipv4.fib_has_custom_local_routes = true; > ^ Fixes: 6e617de84e87 ("net: avoid a full fib lookup when rp_filter is disabled.") Reported-by: Stephen Rothwell Signed-off-by: David S. Miller --- include/net/netns/ipv4.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h index 20720721da4b..8387f099115e 100644 --- a/include/net/netns/ipv4.h +++ b/include/net/netns/ipv4.h @@ -49,10 +49,10 @@ struct netns_ipv4 { #ifdef CONFIG_IP_MULTIPLE_TABLES struct fib_rules_ops *rules_ops; bool fib_has_custom_rules; - bool fib_has_custom_local_routes; struct fib_table __rcu *fib_main; struct fib_table __rcu *fib_default; #endif + bool fib_has_custom_local_routes; #ifdef CONFIG_IP_ROUTE_CLASSID int fib_num_tclassid_users; #endif -- cgit v1.2.3 From a424120765009df13957af20ba4d18e531cfe643 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 21 Sep 2017 07:50:28 -0700 Subject: net: vrf: remove skb_dst_force() after skb_dst_set() skb_dst_set(skb, dst) installs a normal (refcounted) dst, there is no point using skb_dst_force(skb) Signed-off-by: Eric Dumazet Acked-by: David Ahern Signed-off-by: David S. Miller --- drivers/net/vrf.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/net/vrf.c b/drivers/net/vrf.c index 9b243e6f3008..cc18b7b11612 100644 --- a/drivers/net/vrf.c +++ b/drivers/net/vrf.c @@ -132,7 +132,6 @@ static int vrf_local_xmit(struct sk_buff *skb, struct net_device *dev, skb_orphan(skb); skb_dst_set(skb, dst); - skb_dst_force(skb); /* set pkt_type to avoid skb hitting packet taps twice - * once on Tx and again in Rx processing -- cgit v1.2.3 From 411d788a23f7e20b8fc51b548c7204fdecc9d22e Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Thu, 21 Sep 2017 17:36:08 +0200 Subject: test_rhashtable: remove initdata annotation kbuild test robot reported a section mismatch warning w. gcc 4.x: WARNING: lib/test_rhashtable.o(.text+0x139e): Section mismatch in reference from the function rhltable_insert.clone.3() to the variable .init.data:rhlt so remove this annotation. Fixes: cdd4de372ea06 ("test_rhashtable: add test case for rhl_table interface") Reported-by: kbuild test robot Signed-off-by: Florian Westphal Signed-off-by: David S. Miller --- lib/test_rhashtable.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/test_rhashtable.c b/lib/test_rhashtable.c index de4d0584631a..8e83cbdc049c 100644 --- a/lib/test_rhashtable.c +++ b/lib/test_rhashtable.c @@ -251,7 +251,7 @@ static s64 __init test_rhashtable(struct rhashtable *ht, struct test_obj *array, } static struct rhashtable ht; -static struct rhltable rhlt __initdata; +static struct rhltable rhlt; static int __init test_rhltable(unsigned int entries) { -- cgit v1.2.3 From dd5437974964c759570d68e50ce13c313808f79a Mon Sep 17 00:00:00 2001 From: Jason Wang Date: Fri, 22 Sep 2017 14:38:58 +0800 Subject: virtio-net: correctly set xdp_xmit for mergeable buffer We should set xdp_xmit only when xdp_do_redirect() succeed. Cc: John Fastabend Signed-off-by: Jason Wang Signed-off-by: David S. Miller --- drivers/net/virtio_net.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index f6c1f135a024..dd14a4547932 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -721,7 +721,7 @@ static struct sk_buff *receive_mergeable(struct net_device *dev, goto xdp_xmit; case XDP_REDIRECT: err = xdp_do_redirect(dev, &xdp, xdp_prog); - if (err) + if (!err) *xdp_xmit = true; rcu_read_unlock(); goto xdp_xmit; -- cgit v1.2.3 From 242c1a28eb61cb34974e8aa05235d84355940a8a Mon Sep 17 00:00:00 2001 From: Gao Feng Date: Fri, 22 Sep 2017 10:25:22 +0800 Subject: net: Remove useless function skb_header_release There is no one which would invokes the function skb_header_release. So just remove it now. Signed-off-by: Gao Feng Signed-off-by: David S. Miller --- drivers/net/usb/asix_common.c | 2 +- include/linux/skbuff.h | 19 ------------------- net/batman-adv/soft-interface.c | 2 +- 3 files changed, 2 insertions(+), 21 deletions(-) diff --git a/drivers/net/usb/asix_common.c b/drivers/net/usb/asix_common.c index 522d2900cd1d..f4d7362eb325 100644 --- a/drivers/net/usb/asix_common.c +++ b/drivers/net/usb/asix_common.c @@ -245,7 +245,7 @@ struct sk_buff *asix_tx_fixup(struct usbnet *dev, struct sk_buff *skb, * - We are allowed to put 4 bytes at tail if skb_cloned() * is false (and if we have 4 bytes of tailroom) * - * TCP packets for example are cloned, but skb_header_release() + * TCP packets for example are cloned, but __skb_header_release() * was called in tcp stack, allowing us to use headroom for our needs. */ if (!skb_header_cloned(skb) && diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 492828801acb..f9db5539a6fb 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -1456,28 +1456,9 @@ static inline int skb_header_unclone(struct sk_buff *skb, gfp_t pri) return 0; } -/** - * skb_header_release - release reference to header - * @skb: buffer to operate on - * - * Drop a reference to the header part of the buffer. This is done - * by acquiring a payload reference. You must not read from the header - * part of skb->data after this. - * Note : Check if you can use __skb_header_release() instead. - */ -static inline void skb_header_release(struct sk_buff *skb) -{ - BUG_ON(skb->nohdr); - skb->nohdr = 1; - atomic_add(1 << SKB_DATAREF_SHIFT, &skb_shinfo(skb)->dataref); -} - /** * __skb_header_release - release reference to header * @skb: buffer to operate on - * - * Variant of skb_header_release() assuming skb is private to caller. - * We can avoid one atomic operation. */ static inline void __skb_header_release(struct sk_buff *skb) { diff --git a/net/batman-adv/soft-interface.c b/net/batman-adv/soft-interface.c index 10f7edfb176e..c2c986746d0b 100644 --- a/net/batman-adv/soft-interface.c +++ b/net/batman-adv/soft-interface.c @@ -69,7 +69,7 @@ int batadv_skb_head_push(struct sk_buff *skb, unsigned int len) int result; /* TODO: We must check if we can release all references to non-payload - * data using skb_header_release in our skbs to allow skb_cow_header to + * data using __skb_header_release in our skbs to allow skb_cow_header to * work optimally. This means that those skbs are not allowed to read * or write any data which is before the current position of skb->data * after that call and thus allow other skbs with the same data buffer -- cgit v1.2.3 From 52a59bd509e3dc458be99dcf333b778e6e3b3749 Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Thu, 21 Sep 2017 23:33:29 +0300 Subject: net: use 32-bit arithmetic while allocating net device Private part of allocation is never big enough to warrant size_t. Space savings: add/remove: 0/0 grow/shrink: 0/1 up/down: 0/-10 (-10) function old new delta alloc_netdev_mqs 1120 1110 -10 Signed-off-by: Alexey Dobriyan Signed-off-by: David S. Miller --- net/core/dev.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/core/dev.c b/net/core/dev.c index fb766d906148..37d6a3c59e69 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -7989,7 +7989,7 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name, unsigned int txqs, unsigned int rxqs) { struct net_device *dev; - size_t alloc_size; + unsigned int alloc_size; struct net_device *p; BUG_ON(strlen(name) >= sizeof(dev->name)); -- cgit v1.2.3 From 6a345b3dbd1ed83a7877993c6e23c977a84bb483 Mon Sep 17 00:00:00 2001 From: Kumar Sanghvi Date: Thu, 21 Sep 2017 23:41:13 +0530 Subject: cxgb4: add tc flower offload skeleton Add basic skeleton to prepare for offloading tc-flower flows. Signed-off-by: Kumar Sanghvi Signed-off-by: Rahul Lakkireddy Signed-off-by: Ganesh Goudar Signed-off-by: David S. Miller --- drivers/net/ethernet/chelsio/cxgb4/Makefile | 4 +- drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c | 22 +++++++++ .../net/ethernet/chelsio/cxgb4/cxgb4_tc_flower.c | 57 ++++++++++++++++++++++ .../net/ethernet/chelsio/cxgb4/cxgb4_tc_flower.h | 46 +++++++++++++++++ 4 files changed, 128 insertions(+), 1 deletion(-) create mode 100644 drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_flower.c create mode 100644 drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_flower.h diff --git a/drivers/net/ethernet/chelsio/cxgb4/Makefile b/drivers/net/ethernet/chelsio/cxgb4/Makefile index 817212702f0a..fecd7aab673b 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/Makefile +++ b/drivers/net/ethernet/chelsio/cxgb4/Makefile @@ -4,7 +4,9 @@ obj-$(CONFIG_CHELSIO_T4) += cxgb4.o -cxgb4-objs := cxgb4_main.o l2t.o t4_hw.o sge.o clip_tbl.o cxgb4_ethtool.o cxgb4_uld.o sched.o cxgb4_filter.o cxgb4_tc_u32.o cxgb4_ptp.o +cxgb4-objs := cxgb4_main.o l2t.o t4_hw.o sge.o clip_tbl.o cxgb4_ethtool.o \ + cxgb4_uld.o sched.o cxgb4_filter.o cxgb4_tc_u32.o \ + cxgb4_ptp.o cxgb4_tc_flower.o cxgb4-$(CONFIG_CHELSIO_T4_DCB) += cxgb4_dcb.o cxgb4-$(CONFIG_CHELSIO_T4_FCOE) += cxgb4_fcoe.o cxgb4-$(CONFIG_DEBUG_FS) += cxgb4_debugfs.o diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c index 5fe81a4e26a6..5079246aaf2c 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c @@ -79,6 +79,7 @@ #include "l2t.h" #include "sched.h" #include "cxgb4_tc_u32.h" +#include "cxgb4_tc_flower.h" #include "cxgb4_ptp.h" char cxgb4_driver_name[] = KBUILD_MODNAME; @@ -2873,6 +2874,25 @@ static int cxgb_set_tx_maxrate(struct net_device *dev, int index, u32 rate) return err; } +static int cxgb_setup_tc_flower(struct net_device *dev, + struct tc_cls_flower_offload *cls_flower) +{ + if (!is_classid_clsact_ingress(cls_flower->common.classid) || + cls_flower->common.chain_index) + return -EOPNOTSUPP; + + switch (cls_flower->command) { + case TC_CLSFLOWER_REPLACE: + return cxgb4_tc_flower_replace(dev, cls_flower); + case TC_CLSFLOWER_DESTROY: + return cxgb4_tc_flower_destroy(dev, cls_flower); + case TC_CLSFLOWER_STATS: + return cxgb4_tc_flower_stats(dev, cls_flower); + default: + return -EOPNOTSUPP; + } +} + static int cxgb_setup_tc_cls_u32(struct net_device *dev, struct tc_cls_u32_offload *cls_u32) { @@ -2907,6 +2927,8 @@ static int cxgb_setup_tc(struct net_device *dev, enum tc_setup_type type, switch (type) { case TC_SETUP_CLSU32: return cxgb_setup_tc_cls_u32(dev, type_data); + case TC_SETUP_CLSFLOWER: + return cxgb_setup_tc_flower(dev, type_data); default: return -EOPNOTSUPP; } diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_flower.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_flower.c new file mode 100644 index 000000000000..16dff71e4d02 --- /dev/null +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_flower.c @@ -0,0 +1,57 @@ +/* + * This file is part of the Chelsio T4/T5/T6 Ethernet driver for Linux. + * + * Copyright (c) 2017 Chelsio Communications, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include +#include + +#include "cxgb4.h" +#include "cxgb4_tc_flower.h" + +int cxgb4_tc_flower_replace(struct net_device *dev, + struct tc_cls_flower_offload *cls) +{ + return -EOPNOTSUPP; +} + +int cxgb4_tc_flower_destroy(struct net_device *dev, + struct tc_cls_flower_offload *cls) +{ + return -EOPNOTSUPP; +} + +int cxgb4_tc_flower_stats(struct net_device *dev, + struct tc_cls_flower_offload *cls) +{ + return -EOPNOTSUPP; +} diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_flower.h b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_flower.h new file mode 100644 index 000000000000..b321fc205b5a --- /dev/null +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_flower.h @@ -0,0 +1,46 @@ +/* + * This file is part of the Chelsio T4/T5/T6 Ethernet driver for Linux. + * + * Copyright (c) 2017 Chelsio Communications, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef __CXGB4_TC_FLOWER_H +#define __CXGB4_TC_FLOWER_H + +#include + +int cxgb4_tc_flower_replace(struct net_device *dev, + struct tc_cls_flower_offload *cls); +int cxgb4_tc_flower_destroy(struct net_device *dev, + struct tc_cls_flower_offload *cls); +int cxgb4_tc_flower_stats(struct net_device *dev, + struct tc_cls_flower_offload *cls); +#endif /* __CXGB4_TC_FLOWER_H */ -- cgit v1.2.3 From 62488e4b53ae02d82ac000f91ec82b5cfb41d6f2 Mon Sep 17 00:00:00 2001 From: Kumar Sanghvi Date: Thu, 21 Sep 2017 23:41:14 +0530 Subject: cxgb4: add basic tc flower offload support Add support to add/remove flows for offload. Following match and action are supported for offloading a flow: Match: ether-protocol, IPv4/IPv6 addresses, L4 ports (TCP/UDP) Action: drop, redirect to another port on the device. The qualifying flows can have accompanying mask information. Signed-off-by: Kumar Sanghvi Signed-off-by: Rahul Lakkireddy Signed-off-by: Ganesh Goudar Signed-off-by: David S. Miller --- drivers/net/ethernet/chelsio/cxgb4/cxgb4.h | 3 + drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c | 24 ++ drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c | 2 + .../net/ethernet/chelsio/cxgb4/cxgb4_tc_flower.c | 280 ++++++++++++++++++++- .../net/ethernet/chelsio/cxgb4/cxgb4_tc_flower.h | 17 ++ drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h | 1 + 6 files changed, 325 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h index c4e997fdff64..d05721b06178 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h @@ -905,6 +905,9 @@ struct adapter { /* TC u32 offload */ struct cxgb4_tc_u32_table *tc_u32; struct chcr_stats_debug chcr_stats; + + /* TC flower offload */ + DECLARE_HASHTABLE(flower_anymatch_tbl, 9); }; /* Support for "sched-class" command to allow a TX Scheduling Class to be diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c index 97ead2c66751..f3de9cdd4181 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c @@ -148,6 +148,30 @@ static int get_filter_steerq(struct net_device *dev, return iq; } +int cxgb4_get_free_ftid(struct net_device *dev, int family) +{ + struct adapter *adap = netdev2adap(dev); + struct tid_info *t = &adap->tids; + int ftid; + + spin_lock_bh(&t->ftid_lock); + if (family == PF_INET) { + ftid = find_first_zero_bit(t->ftid_bmap, t->nftids); + if (ftid >= t->nftids) + ftid = -1; + } else { + ftid = bitmap_find_free_region(t->ftid_bmap, t->nftids, 2); + if (ftid < 0) + goto out_unlock; + + /* this is only a lookup, keep the found region unallocated */ + bitmap_release_region(t->ftid_bmap, ftid, 2); + } +out_unlock: + spin_unlock_bh(&t->ftid_lock); + return ftid; +} + static int cxgb4_set_ftid(struct tid_info *t, int fidx, int family) { spin_lock_bh(&t->ftid_lock); diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c index 5079246aaf2c..ce33c3addc2b 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c @@ -5105,6 +5105,8 @@ static int init_one(struct pci_dev *pdev, const struct pci_device_id *ent) if (!adapter->tc_u32) dev_warn(&pdev->dev, "could not offload tc u32, continuing\n"); + + cxgb4_init_tc_flower(adapter); } if (is_offload(adapter)) { diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_flower.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_flower.c index 16dff71e4d02..dda34d5a52fb 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_flower.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_flower.c @@ -38,16 +38,287 @@ #include "cxgb4.h" #include "cxgb4_tc_flower.h" +static struct ch_tc_flower_entry *allocate_flower_entry(void) +{ + struct ch_tc_flower_entry *new = kzalloc(sizeof(*new), GFP_KERNEL); + return new; +} + +/* Must be called with either RTNL or rcu_read_lock */ +static struct ch_tc_flower_entry *ch_flower_lookup(struct adapter *adap, + unsigned long flower_cookie) +{ + struct ch_tc_flower_entry *flower_entry; + + hash_for_each_possible_rcu(adap->flower_anymatch_tbl, flower_entry, + link, flower_cookie) + if (flower_entry->tc_flower_cookie == flower_cookie) + return flower_entry; + return NULL; +} + +static void cxgb4_process_flow_match(struct net_device *dev, + struct tc_cls_flower_offload *cls, + struct ch_filter_specification *fs) +{ + u16 addr_type = 0; + + if (dissector_uses_key(cls->dissector, FLOW_DISSECTOR_KEY_CONTROL)) { + struct flow_dissector_key_control *key = + skb_flow_dissector_target(cls->dissector, + FLOW_DISSECTOR_KEY_CONTROL, + cls->key); + + addr_type = key->addr_type; + } + + if (dissector_uses_key(cls->dissector, FLOW_DISSECTOR_KEY_BASIC)) { + struct flow_dissector_key_basic *key = + skb_flow_dissector_target(cls->dissector, + FLOW_DISSECTOR_KEY_BASIC, + cls->key); + struct flow_dissector_key_basic *mask = + skb_flow_dissector_target(cls->dissector, + FLOW_DISSECTOR_KEY_BASIC, + cls->mask); + u16 ethtype_key = ntohs(key->n_proto); + u16 ethtype_mask = ntohs(mask->n_proto); + + if (ethtype_key == ETH_P_ALL) { + ethtype_key = 0; + ethtype_mask = 0; + } + + fs->val.ethtype = ethtype_key; + fs->mask.ethtype = ethtype_mask; + fs->val.proto = key->ip_proto; + fs->mask.proto = mask->ip_proto; + } + + if (addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS) { + struct flow_dissector_key_ipv4_addrs *key = + skb_flow_dissector_target(cls->dissector, + FLOW_DISSECTOR_KEY_IPV4_ADDRS, + cls->key); + struct flow_dissector_key_ipv4_addrs *mask = + skb_flow_dissector_target(cls->dissector, + FLOW_DISSECTOR_KEY_IPV4_ADDRS, + cls->mask); + fs->type = 0; + memcpy(&fs->val.lip[0], &key->dst, sizeof(key->dst)); + memcpy(&fs->val.fip[0], &key->src, sizeof(key->src)); + memcpy(&fs->mask.lip[0], &mask->dst, sizeof(mask->dst)); + memcpy(&fs->mask.fip[0], &mask->src, sizeof(mask->src)); + } + + if (addr_type == FLOW_DISSECTOR_KEY_IPV6_ADDRS) { + struct flow_dissector_key_ipv6_addrs *key = + skb_flow_dissector_target(cls->dissector, + FLOW_DISSECTOR_KEY_IPV6_ADDRS, + cls->key); + struct flow_dissector_key_ipv6_addrs *mask = + skb_flow_dissector_target(cls->dissector, + FLOW_DISSECTOR_KEY_IPV6_ADDRS, + cls->mask); + + fs->type = 1; + memcpy(&fs->val.lip[0], key->dst.s6_addr, sizeof(key->dst)); + memcpy(&fs->val.fip[0], key->src.s6_addr, sizeof(key->src)); + memcpy(&fs->mask.lip[0], mask->dst.s6_addr, sizeof(mask->dst)); + memcpy(&fs->mask.fip[0], mask->src.s6_addr, sizeof(mask->src)); + } + + if (dissector_uses_key(cls->dissector, FLOW_DISSECTOR_KEY_PORTS)) { + struct flow_dissector_key_ports *key, *mask; + + key = skb_flow_dissector_target(cls->dissector, + FLOW_DISSECTOR_KEY_PORTS, + cls->key); + mask = skb_flow_dissector_target(cls->dissector, + FLOW_DISSECTOR_KEY_PORTS, + cls->mask); + fs->val.lport = cpu_to_be16(key->dst); + fs->mask.lport = cpu_to_be16(mask->dst); + fs->val.fport = cpu_to_be16(key->src); + fs->mask.fport = cpu_to_be16(mask->src); + } + + /* Match only packets coming from the ingress port where this + * filter will be created. + */ + fs->val.iport = netdev2pinfo(dev)->port_id; + fs->mask.iport = ~0; +} + +static int cxgb4_validate_flow_match(struct net_device *dev, + struct tc_cls_flower_offload *cls) +{ + if (cls->dissector->used_keys & + ~(BIT(FLOW_DISSECTOR_KEY_CONTROL) | + BIT(FLOW_DISSECTOR_KEY_BASIC) | + BIT(FLOW_DISSECTOR_KEY_IPV4_ADDRS) | + BIT(FLOW_DISSECTOR_KEY_IPV6_ADDRS) | + BIT(FLOW_DISSECTOR_KEY_PORTS))) { + netdev_warn(dev, "Unsupported key used: 0x%x\n", + cls->dissector->used_keys); + return -EOPNOTSUPP; + } + return 0; +} + +static void cxgb4_process_flow_actions(struct net_device *in, + struct tc_cls_flower_offload *cls, + struct ch_filter_specification *fs) +{ + const struct tc_action *a; + LIST_HEAD(actions); + + tcf_exts_to_list(cls->exts, &actions); + list_for_each_entry(a, &actions, list) { + if (is_tcf_gact_shot(a)) { + fs->action = FILTER_DROP; + } else if (is_tcf_mirred_egress_redirect(a)) { + int ifindex = tcf_mirred_ifindex(a); + struct net_device *out = __dev_get_by_index(dev_net(in), + ifindex); + struct port_info *pi = netdev_priv(out); + + fs->action = FILTER_SWITCH; + fs->eport = pi->port_id; + } + } +} + +static int cxgb4_validate_flow_actions(struct net_device *dev, + struct tc_cls_flower_offload *cls) +{ + const struct tc_action *a; + LIST_HEAD(actions); + + tcf_exts_to_list(cls->exts, &actions); + list_for_each_entry(a, &actions, list) { + if (is_tcf_gact_shot(a)) { + /* Do nothing */ + } else if (is_tcf_mirred_egress_redirect(a)) { + struct adapter *adap = netdev2adap(dev); + struct net_device *n_dev; + unsigned int i, ifindex; + bool found = false; + + ifindex = tcf_mirred_ifindex(a); + for_each_port(adap, i) { + n_dev = adap->port[i]; + if (ifindex == n_dev->ifindex) { + found = true; + break; + } + } + + /* If interface doesn't belong to our hw, then + * the provided output port is not valid + */ + if (!found) { + netdev_err(dev, "%s: Out port invalid\n", + __func__); + return -EINVAL; + } + } else { + netdev_err(dev, "%s: Unsupported action\n", __func__); + return -EOPNOTSUPP; + } + } + return 0; +} + int cxgb4_tc_flower_replace(struct net_device *dev, struct tc_cls_flower_offload *cls) { - return -EOPNOTSUPP; + struct adapter *adap = netdev2adap(dev); + struct ch_tc_flower_entry *ch_flower; + struct ch_filter_specification *fs; + struct filter_ctx ctx; + int fidx; + int ret; + + if (cxgb4_validate_flow_actions(dev, cls)) + return -EOPNOTSUPP; + + if (cxgb4_validate_flow_match(dev, cls)) + return -EOPNOTSUPP; + + ch_flower = allocate_flower_entry(); + if (!ch_flower) { + netdev_err(dev, "%s: ch_flower alloc failed.\n", __func__); + return -ENOMEM; + } + + fs = &ch_flower->fs; + fs->hitcnts = 1; + cxgb4_process_flow_actions(dev, cls, fs); + cxgb4_process_flow_match(dev, cls, fs); + + fidx = cxgb4_get_free_ftid(dev, fs->type ? PF_INET6 : PF_INET); + if (fidx < 0) { + netdev_err(dev, "%s: No fidx for offload.\n", __func__); + ret = -ENOMEM; + goto free_entry; + } + + init_completion(&ctx.completion); + ret = __cxgb4_set_filter(dev, fidx, fs, &ctx); + if (ret) { + netdev_err(dev, "%s: filter creation err %d\n", + __func__, ret); + goto free_entry; + } + + /* Wait for reply */ + ret = wait_for_completion_timeout(&ctx.completion, 10 * HZ); + if (!ret) { + ret = -ETIMEDOUT; + goto free_entry; + } + + ret = ctx.result; + /* Check if hw returned error for filter creation */ + if (ret) { + netdev_err(dev, "%s: filter creation err %d\n", + __func__, ret); + goto free_entry; + } + + INIT_HLIST_NODE(&ch_flower->link); + ch_flower->tc_flower_cookie = cls->cookie; + ch_flower->filter_id = ctx.tid; + hash_add_rcu(adap->flower_anymatch_tbl, &ch_flower->link, cls->cookie); + + return ret; + +free_entry: + kfree(ch_flower); + return ret; } int cxgb4_tc_flower_destroy(struct net_device *dev, struct tc_cls_flower_offload *cls) { - return -EOPNOTSUPP; + struct adapter *adap = netdev2adap(dev); + struct ch_tc_flower_entry *ch_flower; + int ret; + + ch_flower = ch_flower_lookup(adap, cls->cookie); + if (!ch_flower) + return -ENOENT; + + ret = cxgb4_del_filter(dev, ch_flower->filter_id); + if (ret) + goto err; + + hash_del_rcu(&ch_flower->link); + kfree_rcu(ch_flower, rcu); + +err: + return ret; } int cxgb4_tc_flower_stats(struct net_device *dev, @@ -55,3 +326,8 @@ int cxgb4_tc_flower_stats(struct net_device *dev, { return -EOPNOTSUPP; } + +void cxgb4_init_tc_flower(struct adapter *adap) +{ + hash_init(adap->flower_anymatch_tbl); +} diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_flower.h b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_flower.h index b321fc205b5a..6145a9e056eb 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_flower.h +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_flower.h @@ -37,10 +37,27 @@ #include +struct ch_tc_flower_stats { + u64 packet_count; + u64 byte_count; + u64 last_used; +}; + +struct ch_tc_flower_entry { + struct ch_filter_specification fs; + struct ch_tc_flower_stats stats; + unsigned long tc_flower_cookie; + struct hlist_node link; + struct rcu_head rcu; + u32 filter_id; +}; + int cxgb4_tc_flower_replace(struct net_device *dev, struct tc_cls_flower_offload *cls); int cxgb4_tc_flower_destroy(struct net_device *dev, struct tc_cls_flower_offload *cls); int cxgb4_tc_flower_stats(struct net_device *dev, struct tc_cls_flower_offload *cls); + +void cxgb4_init_tc_flower(struct adapter *adap); #endif /* __CXGB4_TC_FLOWER_H */ diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h index 84541fce94c5..88487095d14f 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h @@ -212,6 +212,7 @@ struct filter_ctx { struct ch_filter_specification; +int cxgb4_get_free_ftid(struct net_device *dev, int family); int __cxgb4_set_filter(struct net_device *dev, int filter_id, struct ch_filter_specification *fs, struct filter_ctx *ctx); -- cgit v1.2.3 From cf2885a70fc71d5f6b434b86eedfc18ad66ba6f6 Mon Sep 17 00:00:00 2001 From: Kumar Sanghvi Date: Thu, 21 Sep 2017 23:41:15 +0530 Subject: cxgb4: add support to offload action vlan Add support for offloading tc-flower flows having vlan actions: pop, push and modify. Signed-off-by: Kumar Sanghvi Signed-off-by: Rahul Lakkireddy Signed-off-by: Ganesh Goudar Signed-off-by: David S. Miller --- .../net/ethernet/chelsio/cxgb4/cxgb4_tc_flower.c | 42 ++++++++++++++++++++++ 1 file changed, 42 insertions(+) diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_flower.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_flower.c index dda34d5a52fb..e42d2efc9ea2 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_flower.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_flower.c @@ -34,6 +34,7 @@ #include #include +#include #include "cxgb4.h" #include "cxgb4_tc_flower.h" @@ -185,6 +186,27 @@ static void cxgb4_process_flow_actions(struct net_device *in, fs->action = FILTER_SWITCH; fs->eport = pi->port_id; + } else if (is_tcf_vlan(a)) { + u32 vlan_action = tcf_vlan_action(a); + u8 prio = tcf_vlan_push_prio(a); + u16 vid = tcf_vlan_push_vid(a); + u16 vlan_tci = (prio << VLAN_PRIO_SHIFT) | vid; + + switch (vlan_action) { + case TCA_VLAN_ACT_POP: + fs->newvlan |= VLAN_REMOVE; + break; + case TCA_VLAN_ACT_PUSH: + fs->newvlan |= VLAN_INSERT; + fs->vlan = vlan_tci; + break; + case TCA_VLAN_ACT_MODIFY: + fs->newvlan |= VLAN_REWRITE; + fs->vlan = vlan_tci; + break; + default: + break; + } } } } @@ -222,6 +244,26 @@ static int cxgb4_validate_flow_actions(struct net_device *dev, __func__); return -EINVAL; } + } else if (is_tcf_vlan(a)) { + u16 proto = be16_to_cpu(tcf_vlan_push_proto(a)); + u32 vlan_action = tcf_vlan_action(a); + + switch (vlan_action) { + case TCA_VLAN_ACT_POP: + break; + case TCA_VLAN_ACT_PUSH: + case TCA_VLAN_ACT_MODIFY: + if (proto != ETH_P_8021Q) { + netdev_err(dev, "%s: Unsupported vlan proto\n", + __func__); + return -EOPNOTSUPP; + } + break; + default: + netdev_err(dev, "%s: Unsupported vlan action\n", + __func__); + return -EOPNOTSUPP; + } } else { netdev_err(dev, "%s: Unsupported action\n", __func__); return -EOPNOTSUPP; -- cgit v1.2.3 From e0f911c81e93fc23fe1a4fb0318ff1c3b1c9027f Mon Sep 17 00:00:00 2001 From: Kumar Sanghvi Date: Thu, 21 Sep 2017 23:41:16 +0530 Subject: cxgb4: fetch stats for offloaded tc flower flows Add support to retrieve stats from hardware for offloaded tc flower flows. Also, poll for the stats of offloaded flows via timer callback. Signed-off-by: Kumar Sanghvi Signed-off-by: Rahul Lakkireddy Signed-off-by: Ganesh Goudar Signed-off-by: David S. Miller --- drivers/net/ethernet/chelsio/cxgb4/cxgb4.h | 1 + drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c | 76 +++++++++++++++++++++ drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c | 1 + .../net/ethernet/chelsio/cxgb4/cxgb4_tc_flower.c | 79 +++++++++++++++++++++- .../net/ethernet/chelsio/cxgb4/cxgb4_tc_flower.h | 3 + drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h | 2 + 6 files changed, 161 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h index d05721b06178..0db3ab6ad094 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h @@ -908,6 +908,7 @@ struct adapter { /* TC flower offload */ DECLARE_HASHTABLE(flower_anymatch_tbl, 9); + struct timer_list flower_stats_timer; }; /* Support for "sched-class" command to allow a TX Scheduling Class to be diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c index f3de9cdd4181..15361ca2857c 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c @@ -148,6 +148,82 @@ static int get_filter_steerq(struct net_device *dev, return iq; } +static int get_filter_count(struct adapter *adapter, unsigned int fidx, + u64 *pkts, u64 *bytes) +{ + unsigned int tcb_base, tcbaddr; + unsigned int word_offset; + struct filter_entry *f; + __be64 be64_byte_count; + int ret; + + tcb_base = t4_read_reg(adapter, TP_CMM_TCB_BASE_A); + if ((fidx != (adapter->tids.nftids + adapter->tids.nsftids - 1)) && + fidx >= adapter->tids.nftids) + return -E2BIG; + + f = &adapter->tids.ftid_tab[fidx]; + if (!f->valid) + return -EINVAL; + + tcbaddr = tcb_base + f->tid * TCB_SIZE; + + spin_lock(&adapter->win0_lock); + if (is_t4(adapter->params.chip)) { + __be64 be64_count; + + /* T4 doesn't maintain byte counts in hw */ + *bytes = 0; + + /* Get pkts */ + word_offset = 4; + ret = t4_memory_rw(adapter, MEMWIN_NIC, MEM_EDC0, + tcbaddr + (word_offset * sizeof(__be32)), + sizeof(be64_count), + (__be32 *)&be64_count, + T4_MEMORY_READ); + if (ret < 0) + goto out; + *pkts = be64_to_cpu(be64_count); + } else { + __be32 be32_count; + + /* Get bytes */ + word_offset = 4; + ret = t4_memory_rw(adapter, MEMWIN_NIC, MEM_EDC0, + tcbaddr + (word_offset * sizeof(__be32)), + sizeof(be64_byte_count), + &be64_byte_count, + T4_MEMORY_READ); + if (ret < 0) + goto out; + *bytes = be64_to_cpu(be64_byte_count); + + /* Get pkts */ + word_offset = 6; + ret = t4_memory_rw(adapter, MEMWIN_NIC, MEM_EDC0, + tcbaddr + (word_offset * sizeof(__be32)), + sizeof(be32_count), + &be32_count, + T4_MEMORY_READ); + if (ret < 0) + goto out; + *pkts = (u64)be32_to_cpu(be32_count); + } + +out: + spin_unlock(&adapter->win0_lock); + return ret; +} + +int cxgb4_get_filter_counters(struct net_device *dev, unsigned int fidx, + u64 *hitcnt, u64 *bytecnt) +{ + struct adapter *adapter = netdev2adap(dev); + + return get_filter_count(adapter, fidx, hitcnt, bytecnt); +} + int cxgb4_get_free_ftid(struct net_device *dev, int family) { struct adapter *adap = netdev2adap(dev); diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c index ce33c3addc2b..aa93ae95d3b9 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c @@ -4637,6 +4637,7 @@ static void free_some_resources(struct adapter *adapter) kvfree(adapter->l2t); t4_cleanup_sched(adapter); kvfree(adapter->tids.tid_tab); + cxgb4_cleanup_tc_flower(adapter); cxgb4_cleanup_tc_u32(adapter); kfree(adapter->sge.egr_map); kfree(adapter->sge.ingr_map); diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_flower.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_flower.c index e42d2efc9ea2..a36bd66d2834 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_flower.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_flower.c @@ -39,9 +39,12 @@ #include "cxgb4.h" #include "cxgb4_tc_flower.h" +#define STATS_CHECK_PERIOD (HZ / 2) + static struct ch_tc_flower_entry *allocate_flower_entry(void) { struct ch_tc_flower_entry *new = kzalloc(sizeof(*new), GFP_KERNEL); + spin_lock_init(&new->lock); return new; } @@ -363,13 +366,87 @@ err: return ret; } +void ch_flower_stats_cb(unsigned long data) +{ + struct adapter *adap = (struct adapter *)data; + struct ch_tc_flower_entry *flower_entry; + struct ch_tc_flower_stats *ofld_stats; + unsigned int i; + u64 packets; + u64 bytes; + int ret; + + rcu_read_lock(); + hash_for_each_rcu(adap->flower_anymatch_tbl, i, flower_entry, link) { + ret = cxgb4_get_filter_counters(adap->port[0], + flower_entry->filter_id, + &packets, &bytes); + if (!ret) { + spin_lock(&flower_entry->lock); + ofld_stats = &flower_entry->stats; + + if (ofld_stats->prev_packet_count != packets) { + ofld_stats->prev_packet_count = packets; + ofld_stats->last_used = jiffies; + } + spin_unlock(&flower_entry->lock); + } + } + rcu_read_unlock(); + mod_timer(&adap->flower_stats_timer, jiffies + STATS_CHECK_PERIOD); +} + int cxgb4_tc_flower_stats(struct net_device *dev, struct tc_cls_flower_offload *cls) { - return -EOPNOTSUPP; + struct adapter *adap = netdev2adap(dev); + struct ch_tc_flower_stats *ofld_stats; + struct ch_tc_flower_entry *ch_flower; + u64 packets; + u64 bytes; + int ret; + + ch_flower = ch_flower_lookup(adap, cls->cookie); + if (!ch_flower) { + ret = -ENOENT; + goto err; + } + + ret = cxgb4_get_filter_counters(dev, ch_flower->filter_id, + &packets, &bytes); + if (ret < 0) + goto err; + + spin_lock_bh(&ch_flower->lock); + ofld_stats = &ch_flower->stats; + if (ofld_stats->packet_count != packets) { + if (ofld_stats->prev_packet_count != packets) + ofld_stats->last_used = jiffies; + tcf_exts_stats_update(cls->exts, bytes - ofld_stats->byte_count, + packets - ofld_stats->packet_count, + ofld_stats->last_used); + + ofld_stats->packet_count = packets; + ofld_stats->byte_count = bytes; + ofld_stats->prev_packet_count = packets; + } + spin_unlock_bh(&ch_flower->lock); + return 0; + +err: + return ret; } void cxgb4_init_tc_flower(struct adapter *adap) { hash_init(adap->flower_anymatch_tbl); + setup_timer(&adap->flower_stats_timer, ch_flower_stats_cb, + (unsigned long)adap); + mod_timer(&adap->flower_stats_timer, jiffies + STATS_CHECK_PERIOD); +} + +void cxgb4_cleanup_tc_flower(struct adapter *adap) +{ + if (adap->flower_stats_timer.function) + del_timer_sync(&adap->flower_stats_timer); } diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_flower.h b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_flower.h index 6145a9e056eb..604feffc752e 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_flower.h +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_flower.h @@ -38,6 +38,7 @@ #include struct ch_tc_flower_stats { + u64 prev_packet_count; u64 packet_count; u64 byte_count; u64 last_used; @@ -49,6 +50,7 @@ struct ch_tc_flower_entry { unsigned long tc_flower_cookie; struct hlist_node link; struct rcu_head rcu; + spinlock_t lock; /* lock for stats */ u32 filter_id; }; @@ -60,4 +62,5 @@ int cxgb4_tc_flower_stats(struct net_device *dev, struct tc_cls_flower_offload *cls); void cxgb4_init_tc_flower(struct adapter *adap); +void cxgb4_cleanup_tc_flower(struct adapter *adap); #endif /* __CXGB4_TC_FLOWER_H */ diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h index 88487095d14f..52324c77a4fe 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h @@ -221,6 +221,8 @@ int __cxgb4_del_filter(struct net_device *dev, int filter_id, int cxgb4_set_filter(struct net_device *dev, int filter_id, struct ch_filter_specification *fs); int cxgb4_del_filter(struct net_device *dev, int filter_id); +int cxgb4_get_filter_counters(struct net_device *dev, unsigned int fidx, + u64 *hitcnt, u64 *bytecnt); static inline void set_wr_txq(struct sk_buff *skb, int prio, int queue) { -- cgit v1.2.3 From 39e50d9637f9a31967ac9e956b829ee8b50a750f Mon Sep 17 00:00:00 2001 From: Zhu Yanjun Date: Fri, 22 Sep 2017 10:20:21 -0400 Subject: forcedeth: optimize the xmit/rx with unlikely In the xmit/rx fastpath, the function dma_map_single rarely fails. Therefore, add an unlikely() optimization to this error check conditional. Signed-off-by: Zhu Yanjun Signed-off-by: David S. Miller --- drivers/net/ethernet/nvidia/forcedeth.c | 26 ++++++++++++++------------ 1 file changed, 14 insertions(+), 12 deletions(-) diff --git a/drivers/net/ethernet/nvidia/forcedeth.c b/drivers/net/ethernet/nvidia/forcedeth.c index b605b94f4567..a235e8881af9 100644 --- a/drivers/net/ethernet/nvidia/forcedeth.c +++ b/drivers/net/ethernet/nvidia/forcedeth.c @@ -1817,8 +1817,8 @@ static int nv_alloc_rx(struct net_device *dev) skb->data, skb_tailroom(skb), DMA_FROM_DEVICE); - if (dma_mapping_error(&np->pci_dev->dev, - np->put_rx_ctx->dma)) { + if (unlikely(dma_mapping_error(&np->pci_dev->dev, + np->put_rx_ctx->dma))) { kfree_skb(skb); goto packet_dropped; } @@ -1858,8 +1858,8 @@ static int nv_alloc_rx_optimized(struct net_device *dev) skb->data, skb_tailroom(skb), DMA_FROM_DEVICE); - if (dma_mapping_error(&np->pci_dev->dev, - np->put_rx_ctx->dma)) { + if (unlikely(dma_mapping_error(&np->pci_dev->dev, + np->put_rx_ctx->dma))) { kfree_skb(skb); goto packet_dropped; } @@ -2227,8 +2227,8 @@ static netdev_tx_t nv_start_xmit(struct sk_buff *skb, struct net_device *dev) np->put_tx_ctx->dma = dma_map_single(&np->pci_dev->dev, skb->data + offset, bcnt, DMA_TO_DEVICE); - if (dma_mapping_error(&np->pci_dev->dev, - np->put_tx_ctx->dma)) { + if (unlikely(dma_mapping_error(&np->pci_dev->dev, + np->put_tx_ctx->dma))) { /* on DMA mapping error - drop the packet */ dev_kfree_skb_any(skb); u64_stats_update_begin(&np->swstats_tx_syncp); @@ -2268,7 +2268,8 @@ static netdev_tx_t nv_start_xmit(struct sk_buff *skb, struct net_device *dev) frag, offset, bcnt, DMA_TO_DEVICE); - if (dma_mapping_error(&np->pci_dev->dev, np->put_tx_ctx->dma)) { + if (unlikely(dma_mapping_error(&np->pci_dev->dev, + np->put_tx_ctx->dma))) { /* Unwind the mapped fragments */ do { @@ -2377,8 +2378,8 @@ static netdev_tx_t nv_start_xmit_optimized(struct sk_buff *skb, np->put_tx_ctx->dma = dma_map_single(&np->pci_dev->dev, skb->data + offset, bcnt, DMA_TO_DEVICE); - if (dma_mapping_error(&np->pci_dev->dev, - np->put_tx_ctx->dma)) { + if (unlikely(dma_mapping_error(&np->pci_dev->dev, + np->put_tx_ctx->dma))) { /* on DMA mapping error - drop the packet */ dev_kfree_skb_any(skb); u64_stats_update_begin(&np->swstats_tx_syncp); @@ -2419,7 +2420,8 @@ static netdev_tx_t nv_start_xmit_optimized(struct sk_buff *skb, bcnt, DMA_TO_DEVICE); - if (dma_mapping_error(&np->pci_dev->dev, np->put_tx_ctx->dma)) { + if (unlikely(dma_mapping_error(&np->pci_dev->dev, + np->put_tx_ctx->dma))) { /* Unwind the mapped fragments */ do { @@ -5075,8 +5077,8 @@ static int nv_loopback_test(struct net_device *dev) test_dma_addr = dma_map_single(&np->pci_dev->dev, tx_skb->data, skb_tailroom(tx_skb), DMA_FROM_DEVICE); - if (dma_mapping_error(&np->pci_dev->dev, - test_dma_addr)) { + if (unlikely(dma_mapping_error(&np->pci_dev->dev, + test_dma_addr))) { dev_kfree_skb_any(tx_skb); goto out; } -- cgit v1.2.3 From 373b8eeb0c15d4ce58f62afb12f213b1b5bbc3d3 Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Thu, 21 Sep 2017 23:45:43 +0300 Subject: xfrm: make aead_len() return unsigned int Key lengths can't be negative. Comparison with nla_len() is left signed just in case negative value can sneak in there. Signed-off-by: Alexey Dobriyan Signed-off-by: Steffen Klassert --- include/net/xfrm.h | 2 +- net/xfrm/xfrm_user.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/include/net/xfrm.h b/include/net/xfrm.h index f002a2c5e33c..0be4c547e383 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -1764,7 +1764,7 @@ static inline int xfrm_acquire_is_on(struct net *net) } #endif -static inline int aead_len(struct xfrm_algo_aead *alg) +static inline unsigned int aead_len(struct xfrm_algo_aead *alg) { return sizeof(*alg) + ((alg->alg_key_len + 7) / 8); } diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index 2bfbd9121e3b..32c67b80c3ce 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -84,7 +84,7 @@ static int verify_aead(struct nlattr **attrs) return 0; algp = nla_data(rt); - if (nla_len(rt) < aead_len(algp)) + if (nla_len(rt) < (int)aead_len(algp)) return -EINVAL; algp->alg_name[sizeof(algp->alg_name) - 1] = '\0'; -- cgit v1.2.3 From 06cd22f830f28023b82455c82c7db65fc6cf9c16 Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Thu, 21 Sep 2017 23:46:30 +0300 Subject: xfrm: make xfrm_alg_len() return unsigned int Key lengths can't be negative. Comparison with nla_len() is left signed just in case negative value can sneak in there. Signed-off-by: Alexey Dobriyan Signed-off-by: Steffen Klassert --- include/net/xfrm.h | 2 +- net/xfrm/xfrm_user.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/include/net/xfrm.h b/include/net/xfrm.h index 0be4c547e383..2abc0e117f11 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -1769,7 +1769,7 @@ static inline unsigned int aead_len(struct xfrm_algo_aead *alg) return sizeof(*alg) + ((alg->alg_key_len + 7) / 8); } -static inline int xfrm_alg_len(const struct xfrm_algo *alg) +static inline unsigned int xfrm_alg_len(const struct xfrm_algo *alg) { return sizeof(*alg) + ((alg->alg_key_len + 7) / 8); } diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index 32c67b80c3ce..09512d90e6a5 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -42,7 +42,7 @@ static int verify_one_alg(struct nlattr **attrs, enum xfrm_attr_type_t type) return 0; algp = nla_data(rt); - if (nla_len(rt) < xfrm_alg_len(algp)) + if (nla_len(rt) < (int)xfrm_alg_len(algp)) return -EINVAL; switch (type) { -- cgit v1.2.3 From 1bd963a72e859d194d87a5a2a8839efee7e23102 Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Thu, 21 Sep 2017 23:47:09 +0300 Subject: xfrm: make xfrm_alg_auth_len() return unsigned int Key lengths can't be negative. Comparison with nla_len() is left signed just in case negative value can sneak in there. Signed-off-by: Alexey Dobriyan Signed-off-by: Steffen Klassert --- include/net/xfrm.h | 2 +- net/xfrm/xfrm_user.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/include/net/xfrm.h b/include/net/xfrm.h index 2abc0e117f11..5d5e11b653eb 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -1774,7 +1774,7 @@ static inline unsigned int xfrm_alg_len(const struct xfrm_algo *alg) return sizeof(*alg) + ((alg->alg_key_len + 7) / 8); } -static inline int xfrm_alg_auth_len(const struct xfrm_algo_auth *alg) +static inline unsigned int xfrm_alg_auth_len(const struct xfrm_algo_auth *alg) { return sizeof(*alg) + ((alg->alg_key_len + 7) / 8); } diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index 09512d90e6a5..465c23d4ea78 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -68,7 +68,7 @@ static int verify_auth_trunc(struct nlattr **attrs) return 0; algp = nla_data(rt); - if (nla_len(rt) < xfrm_alg_auth_len(algp)) + if (nla_len(rt) < (int)xfrm_alg_auth_len(algp)) return -EINVAL; algp->alg_name[sizeof(algp->alg_name) - 1] = '\0'; -- cgit v1.2.3 From 5e708e47c44366453c33373940455a75fd33f635 Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Thu, 21 Sep 2017 23:47:50 +0300 Subject: xfrm: make xfrm_replay_state_esn_len() return unsigned int Replay detection bitmaps can't have negative length. Comparisons with nla_len() are left signed just in case negative value can sneak in there. Propagate unsignedness for code size savings: add/remove: 0/0 grow/shrink: 0/5 up/down: 0/-38 (-38) function old new delta xfrm_state_construct 1802 1800 -2 xfrm_update_ae_params 295 289 -6 xfrm_state_migrate 1345 1339 -6 xfrm_replay_notify_esn 349 337 -12 xfrm_replay_notify_bmp 345 333 -12 Signed-off-by: Alexey Dobriyan Signed-off-by: Steffen Klassert --- include/net/xfrm.h | 2 +- net/xfrm/xfrm_user.c | 10 +++++----- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/include/net/xfrm.h b/include/net/xfrm.h index 5d5e11b653eb..3cb618bbcfa5 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -1779,7 +1779,7 @@ static inline unsigned int xfrm_alg_auth_len(const struct xfrm_algo_auth *alg) return sizeof(*alg) + ((alg->alg_key_len + 7) / 8); } -static inline int xfrm_replay_state_esn_len(struct xfrm_replay_state_esn *replay_esn) +static inline unsigned int xfrm_replay_state_esn_len(struct xfrm_replay_state_esn *replay_esn) { return sizeof(*replay_esn) + replay_esn->bmp_len * sizeof(__u32); } diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index 465c23d4ea78..83718db5ec9c 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -130,7 +130,7 @@ static inline int verify_replay(struct xfrm_usersa_info *p, if (rs->bmp_len > XFRMA_REPLAY_ESN_MAX / sizeof(rs->bmp[0]) / 8) return -EINVAL; - if (nla_len(rt) < xfrm_replay_state_esn_len(rs) && + if (nla_len(rt) < (int)xfrm_replay_state_esn_len(rs) && nla_len(rt) != sizeof(*rs)) return -EINVAL; } @@ -404,7 +404,7 @@ static inline int xfrm_replay_verify_len(struct xfrm_replay_state_esn *replay_es struct nlattr *rp) { struct xfrm_replay_state_esn *up; - int ulen; + unsigned int ulen; if (!replay_esn || !rp) return 0; @@ -414,7 +414,7 @@ static inline int xfrm_replay_verify_len(struct xfrm_replay_state_esn *replay_es /* Check the overall length and the internal bitmap length to avoid * potential overflow. */ - if (nla_len(rp) < ulen || + if (nla_len(rp) < (int)ulen || xfrm_replay_state_esn_len(replay_esn) != ulen || replay_esn->bmp_len != up->bmp_len) return -EINVAL; @@ -430,14 +430,14 @@ static int xfrm_alloc_replay_state_esn(struct xfrm_replay_state_esn **replay_esn struct nlattr *rta) { struct xfrm_replay_state_esn *p, *pp, *up; - int klen, ulen; + unsigned int klen, ulen; if (!rta) return 0; up = nla_data(rta); klen = xfrm_replay_state_esn_len(up); - ulen = nla_len(rta) >= klen ? klen : sizeof(*up); + ulen = nla_len(rta) >= (int)klen ? klen : sizeof(*up); p = kzalloc(klen, GFP_KERNEL); if (!p) -- cgit v1.2.3 From a1b831f23a2b3306ecf275872a54abcf97b0b977 Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Thu, 21 Sep 2017 23:48:54 +0300 Subject: xfrm: eradicate size_t All netlink message sizes are a) unsigned, b) can't be >= 4GB in size because netlink doesn't support >= 64KB messages in the first place. All those size_t across the code are a scam especially across networking which likes to work with small numbers like 1500 or 65536. Propagate unsignedness and flip some "int" to "unsigned int" as well. This is preparation to switching nlmsg_new() to "unsigned int". Signed-off-by: Alexey Dobriyan Signed-off-by: Steffen Klassert --- net/xfrm/xfrm_user.c | 44 +++++++++++++++++++++++--------------------- 1 file changed, 23 insertions(+), 21 deletions(-) diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index 83718db5ec9c..f7a12aa15086 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -458,9 +458,9 @@ static int xfrm_alloc_replay_state_esn(struct xfrm_replay_state_esn **replay_esn return 0; } -static inline int xfrm_user_sec_ctx_size(struct xfrm_sec_ctx *xfrm_ctx) +static inline unsigned int xfrm_user_sec_ctx_size(struct xfrm_sec_ctx *xfrm_ctx) { - int len = 0; + unsigned int len = 0; if (xfrm_ctx) { len += sizeof(struct xfrm_user_sec_ctx); @@ -1031,7 +1031,7 @@ static inline int xfrm_nlmsg_multicast(struct net *net, struct sk_buff *skb, return -1; } -static inline size_t xfrm_spdinfo_msgsize(void) +static inline unsigned int xfrm_spdinfo_msgsize(void) { return NLMSG_ALIGN(4) + nla_total_size(sizeof(struct xfrmu_spdinfo)) @@ -1157,7 +1157,7 @@ static int xfrm_get_spdinfo(struct sk_buff *skb, struct nlmsghdr *nlh, return nlmsg_unicast(net->xfrm.nlsk, r_skb, sportid); } -static inline size_t xfrm_sadinfo_msgsize(void) +static inline unsigned int xfrm_sadinfo_msgsize(void) { return NLMSG_ALIGN(4) + nla_total_size(sizeof(struct xfrmu_sadhinfo)) @@ -1633,7 +1633,7 @@ static inline int copy_to_user_sec_ctx(struct xfrm_policy *xp, struct sk_buff *s return copy_sec_ctx(xp->security, skb); return 0; } -static inline size_t userpolicy_type_attrsize(void) +static inline unsigned int userpolicy_type_attrsize(void) { #ifdef CONFIG_XFRM_SUB_POLICY return nla_total_size(sizeof(struct xfrm_userpolicy_type)); @@ -1850,9 +1850,9 @@ static int xfrm_flush_sa(struct sk_buff *skb, struct nlmsghdr *nlh, return 0; } -static inline size_t xfrm_aevent_msgsize(struct xfrm_state *x) +static inline unsigned int xfrm_aevent_msgsize(struct xfrm_state *x) { - size_t replay_size = x->replay_esn ? + unsigned int replay_size = x->replay_esn ? xfrm_replay_state_esn_len(x->replay_esn) : sizeof(struct xfrm_replay_state); @@ -2321,8 +2321,8 @@ static int copy_to_user_kmaddress(const struct xfrm_kmaddress *k, struct sk_buff return nla_put(skb, XFRMA_KMADDRESS, sizeof(uk), &uk); } -static inline size_t xfrm_migrate_msgsize(int num_migrate, int with_kma, - int with_encp) +static inline unsigned int xfrm_migrate_msgsize(int num_migrate, int with_kma, + int with_encp) { return NLMSG_ALIGN(sizeof(struct xfrm_userpolicy_id)) + (with_kma ? nla_total_size(sizeof(struct xfrm_kmaddress)) : 0) @@ -2566,7 +2566,7 @@ static void xfrm_netlink_rcv(struct sk_buff *skb) mutex_unlock(&net->xfrm.xfrm_cfg_mutex); } -static inline size_t xfrm_expire_msgsize(void) +static inline unsigned int xfrm_expire_msgsize(void) { return NLMSG_ALIGN(sizeof(struct xfrm_user_expire)) + nla_total_size(sizeof(struct xfrm_mark)); @@ -2654,9 +2654,9 @@ static int xfrm_notify_sa_flush(const struct km_event *c) return xfrm_nlmsg_multicast(net, skb, 0, XFRMNLGRP_SA); } -static inline size_t xfrm_sa_len(struct xfrm_state *x) +static inline unsigned int xfrm_sa_len(struct xfrm_state *x) { - size_t l = 0; + unsigned int l = 0; if (x->aead) l += nla_total_size(aead_len(x->aead)); if (x->aalg) { @@ -2701,8 +2701,9 @@ static int xfrm_notify_sa(struct xfrm_state *x, const struct km_event *c) struct xfrm_usersa_id *id; struct nlmsghdr *nlh; struct sk_buff *skb; - int len = xfrm_sa_len(x); - int headlen, err; + unsigned int len = xfrm_sa_len(x); + unsigned int headlen; + int err; headlen = sizeof(*p); if (c->event == XFRM_MSG_DELSA) { @@ -2776,8 +2777,8 @@ static int xfrm_send_state_notify(struct xfrm_state *x, const struct km_event *c } -static inline size_t xfrm_acquire_msgsize(struct xfrm_state *x, - struct xfrm_policy *xp) +static inline unsigned int xfrm_acquire_msgsize(struct xfrm_state *x, + struct xfrm_policy *xp) { return NLMSG_ALIGN(sizeof(struct xfrm_user_acquire)) + nla_total_size(sizeof(struct xfrm_user_tmpl) * xp->xfrm_nr) @@ -2900,7 +2901,7 @@ static struct xfrm_policy *xfrm_compile_policy(struct sock *sk, int opt, return xp; } -static inline size_t xfrm_polexpire_msgsize(struct xfrm_policy *xp) +static inline unsigned int xfrm_polexpire_msgsize(struct xfrm_policy *xp) { return NLMSG_ALIGN(sizeof(struct xfrm_user_polexpire)) + nla_total_size(sizeof(struct xfrm_user_tmpl) * xp->xfrm_nr) @@ -2957,13 +2958,14 @@ static int xfrm_exp_policy_notify(struct xfrm_policy *xp, int dir, const struct static int xfrm_notify_policy(struct xfrm_policy *xp, int dir, const struct km_event *c) { - int len = nla_total_size(sizeof(struct xfrm_user_tmpl) * xp->xfrm_nr); + unsigned int len = nla_total_size(sizeof(struct xfrm_user_tmpl) * xp->xfrm_nr); struct net *net = xp_net(xp); struct xfrm_userpolicy_info *p; struct xfrm_userpolicy_id *id; struct nlmsghdr *nlh; struct sk_buff *skb; - int headlen, err; + unsigned int headlen; + int err; headlen = sizeof(*p); if (c->event == XFRM_MSG_DELPOLICY) { @@ -3070,7 +3072,7 @@ static int xfrm_send_policy_notify(struct xfrm_policy *xp, int dir, const struct } -static inline size_t xfrm_report_msgsize(void) +static inline unsigned int xfrm_report_msgsize(void) { return NLMSG_ALIGN(sizeof(struct xfrm_user_report)); } @@ -3115,7 +3117,7 @@ static int xfrm_send_report(struct net *net, u8 proto, return xfrm_nlmsg_multicast(net, skb, 0, XFRMNLGRP_REPORT); } -static inline size_t xfrm_mapping_msgsize(void) +static inline unsigned int xfrm_mapping_msgsize(void) { return NLMSG_ALIGN(sizeof(struct xfrm_user_mapping)); } -- cgit v1.2.3 From 50c8cd44ed5fcd2cbbfe19e5b1eb680aa4440186 Mon Sep 17 00:00:00 2001 From: Himanshu Jha Date: Fri, 1 Sep 2017 12:13:34 +0530 Subject: ath9k: remove cast to void pointer casting to void pointer from any pointer type and vice-versa is done implicitly and therefore casting is not needed in such a case. Done using Coccinellle. Semantic Patch used : @r@ expression x; void* e; type T; identifier f; @@ ( *((T *)e) | ((T *)x)[...] | ((T *)x)->f | - (T *) e ) Signed-off-by: Himanshu Jha Signed-off-by: Kalle Valo --- drivers/net/wireless/ath/ath9k/ar9003_mac.c | 4 ++-- drivers/net/wireless/ath/ath9k/dfs.c | 2 +- drivers/net/wireless/ath/ath9k/hif_usb.c | 8 ++++---- drivers/net/wireless/ath/ath9k/htc_drv_beacon.c | 2 +- drivers/net/wireless/ath/ath9k/htc_drv_init.c | 24 ++++++++++++------------ drivers/net/wireless/ath/ath9k/htc_drv_main.c | 2 +- drivers/net/wireless/ath/ath9k/htc_drv_txrx.c | 6 +++--- drivers/net/wireless/ath/ath9k/init.c | 8 ++++---- drivers/net/wireless/ath/ath9k/main.c | 2 +- drivers/net/wireless/ath/ath9k/mci.c | 2 +- drivers/net/wireless/ath/ath9k/wmi.c | 4 ++-- 11 files changed, 32 insertions(+), 32 deletions(-) diff --git a/drivers/net/wireless/ath/ath9k/ar9003_mac.c b/drivers/net/wireless/ath/ath9k/ar9003_mac.c index b3f20b3c0210..e1fe7a7c3ad8 100644 --- a/drivers/net/wireless/ath/ath9k/ar9003_mac.c +++ b/drivers/net/wireless/ath/ath9k/ar9003_mac.c @@ -480,7 +480,7 @@ EXPORT_SYMBOL(ath9k_hw_addrxbuf_edma); int ath9k_hw_process_rxdesc_edma(struct ath_hw *ah, struct ath_rx_status *rxs, void *buf_addr) { - struct ar9003_rxs *rxsp = (struct ar9003_rxs *) buf_addr; + struct ar9003_rxs *rxsp = buf_addr; unsigned int phyerr; if ((rxsp->status11 & AR_RxDone) == 0) @@ -610,7 +610,7 @@ void ath9k_hw_setup_statusring(struct ath_hw *ah, void *ts_start, ah->ts_paddr_start = ts_paddr_start; ah->ts_paddr_end = ts_paddr_start + (size * sizeof(struct ar9003_txs)); ah->ts_size = size; - ah->ts_ring = (struct ar9003_txs *) ts_start; + ah->ts_ring = ts_start; ath9k_hw_reset_txstatus_ring(ah); } diff --git a/drivers/net/wireless/ath/ath9k/dfs.c b/drivers/net/wireless/ath/ath9k/dfs.c index 1ece42c2443d..40a397fd0e0e 100644 --- a/drivers/net/wireless/ath/ath9k/dfs.c +++ b/drivers/net/wireless/ath/ath9k/dfs.c @@ -326,7 +326,7 @@ void ath9k_dfs_process_phyerr(struct ath_softc *sc, void *data, if (ard.ext_rssi & 0x80) ard.ext_rssi = 0; - vdata_end = (char *)data + datalen; + vdata_end = data + datalen; ard.pulse_bw_info = vdata_end[-1]; ard.pulse_length_ext = vdata_end[-2]; ard.pulse_length_pri = vdata_end[-3]; diff --git a/drivers/net/wireless/ath/ath9k/hif_usb.c b/drivers/net/wireless/ath/ath9k/hif_usb.c index c5f4dd808745..56676eaff24c 100644 --- a/drivers/net/wireless/ath/ath9k/hif_usb.c +++ b/drivers/net/wireless/ath/ath9k/hif_usb.c @@ -424,7 +424,7 @@ static int hif_usb_send_tx(struct hif_device_usb *hif_dev, struct sk_buff *skb) static void hif_usb_start(void *hif_handle) { - struct hif_device_usb *hif_dev = (struct hif_device_usb *)hif_handle; + struct hif_device_usb *hif_dev = hif_handle; unsigned long flags; hif_dev->flags |= HIF_USB_START; @@ -436,7 +436,7 @@ static void hif_usb_start(void *hif_handle) static void hif_usb_stop(void *hif_handle) { - struct hif_device_usb *hif_dev = (struct hif_device_usb *)hif_handle; + struct hif_device_usb *hif_dev = hif_handle; struct tx_buf *tx_buf = NULL, *tx_buf_tmp = NULL; unsigned long flags; @@ -457,7 +457,7 @@ static void hif_usb_stop(void *hif_handle) static int hif_usb_send(void *hif_handle, u8 pipe_id, struct sk_buff *skb) { - struct hif_device_usb *hif_dev = (struct hif_device_usb *)hif_handle; + struct hif_device_usb *hif_dev = hif_handle; int ret = 0; switch (pipe_id) { @@ -492,7 +492,7 @@ static inline bool check_index(struct sk_buff *skb, u8 idx) static void hif_usb_sta_drain(void *hif_handle, u8 idx) { - struct hif_device_usb *hif_dev = (struct hif_device_usb *)hif_handle; + struct hif_device_usb *hif_dev = hif_handle; struct sk_buff *skb, *tmp; unsigned long flags; diff --git a/drivers/net/wireless/ath/ath9k/htc_drv_beacon.c b/drivers/net/wireless/ath/ath9k/htc_drv_beacon.c index 2c0e4d26e8f9..f20c839aeda2 100644 --- a/drivers/net/wireless/ath/ath9k/htc_drv_beacon.c +++ b/drivers/net/wireless/ath/ath9k/htc_drv_beacon.c @@ -384,7 +384,7 @@ void ath9k_htc_set_tsfadjust(struct ath9k_htc_priv *priv, static void ath9k_htc_beacon_iter(void *data, u8 *mac, struct ieee80211_vif *vif) { - bool *beacon_configured = (bool *)data; + bool *beacon_configured = data; struct ath9k_htc_vif *avp = (struct ath9k_htc_vif *) vif->drv_priv; if (vif->type == NL80211_IFTYPE_STATION && diff --git a/drivers/net/wireless/ath/ath9k/htc_drv_init.c b/drivers/net/wireless/ath/ath9k/htc_drv_init.c index da2164b0cccc..57de6fa6ff03 100644 --- a/drivers/net/wireless/ath/ath9k/htc_drv_init.c +++ b/drivers/net/wireless/ath/ath9k/htc_drv_init.c @@ -233,7 +233,7 @@ static void ath9k_reg_notifier(struct wiphy *wiphy, static unsigned int ath9k_regread(void *hw_priv, u32 reg_offset) { - struct ath_hw *ah = (struct ath_hw *) hw_priv; + struct ath_hw *ah = hw_priv; struct ath_common *common = ath9k_hw_common(ah); struct ath9k_htc_priv *priv = (struct ath9k_htc_priv *) common->priv; __be32 val, reg = cpu_to_be32(reg_offset); @@ -255,7 +255,7 @@ static unsigned int ath9k_regread(void *hw_priv, u32 reg_offset) static void ath9k_multi_regread(void *hw_priv, u32 *addr, u32 *val, u16 count) { - struct ath_hw *ah = (struct ath_hw *) hw_priv; + struct ath_hw *ah = hw_priv; struct ath_common *common = ath9k_hw_common(ah); struct ath9k_htc_priv *priv = (struct ath9k_htc_priv *) common->priv; __be32 tmpaddr[8]; @@ -301,7 +301,7 @@ static void ath9k_regwrite_multi(struct ath_common *common) static void ath9k_regwrite_single(void *hw_priv, u32 val, u32 reg_offset) { - struct ath_hw *ah = (struct ath_hw *) hw_priv; + struct ath_hw *ah = hw_priv; struct ath_common *common = ath9k_hw_common(ah); struct ath9k_htc_priv *priv = (struct ath9k_htc_priv *) common->priv; const __be32 buf[2] = { @@ -322,7 +322,7 @@ static void ath9k_regwrite_single(void *hw_priv, u32 val, u32 reg_offset) static void ath9k_regwrite_buffer(void *hw_priv, u32 val, u32 reg_offset) { - struct ath_hw *ah = (struct ath_hw *) hw_priv; + struct ath_hw *ah = hw_priv; struct ath_common *common = ath9k_hw_common(ah); struct ath9k_htc_priv *priv = (struct ath9k_htc_priv *) common->priv; @@ -345,7 +345,7 @@ static void ath9k_regwrite_buffer(void *hw_priv, u32 val, u32 reg_offset) static void ath9k_regwrite(void *hw_priv, u32 val, u32 reg_offset) { - struct ath_hw *ah = (struct ath_hw *) hw_priv; + struct ath_hw *ah = hw_priv; struct ath_common *common = ath9k_hw_common(ah); struct ath9k_htc_priv *priv = (struct ath9k_htc_priv *) common->priv; @@ -357,7 +357,7 @@ static void ath9k_regwrite(void *hw_priv, u32 val, u32 reg_offset) static void ath9k_enable_regwrite_buffer(void *hw_priv) { - struct ath_hw *ah = (struct ath_hw *) hw_priv; + struct ath_hw *ah = hw_priv; struct ath_common *common = ath9k_hw_common(ah); struct ath9k_htc_priv *priv = (struct ath9k_htc_priv *) common->priv; @@ -366,7 +366,7 @@ static void ath9k_enable_regwrite_buffer(void *hw_priv) static void ath9k_regwrite_flush(void *hw_priv) { - struct ath_hw *ah = (struct ath_hw *) hw_priv; + struct ath_hw *ah = hw_priv; struct ath_common *common = ath9k_hw_common(ah); struct ath9k_htc_priv *priv = (struct ath9k_htc_priv *) common->priv; @@ -383,7 +383,7 @@ static void ath9k_regwrite_flush(void *hw_priv) static void ath9k_reg_rmw_buffer(void *hw_priv, u32 reg_offset, u32 set, u32 clr) { - struct ath_hw *ah = (struct ath_hw *) hw_priv; + struct ath_hw *ah = hw_priv; struct ath_common *common = ath9k_hw_common(ah); struct ath9k_htc_priv *priv = (struct ath9k_htc_priv *) common->priv; u32 rsp_status; @@ -421,7 +421,7 @@ static void ath9k_reg_rmw_buffer(void *hw_priv, static void ath9k_reg_rmw_flush(void *hw_priv) { - struct ath_hw *ah = (struct ath_hw *) hw_priv; + struct ath_hw *ah = hw_priv; struct ath_common *common = ath9k_hw_common(ah); struct ath9k_htc_priv *priv = (struct ath9k_htc_priv *) common->priv; u32 rsp_status; @@ -453,7 +453,7 @@ static void ath9k_reg_rmw_flush(void *hw_priv) static void ath9k_enable_rmw_buffer(void *hw_priv) { - struct ath_hw *ah = (struct ath_hw *) hw_priv; + struct ath_hw *ah = hw_priv; struct ath_common *common = ath9k_hw_common(ah); struct ath9k_htc_priv *priv = (struct ath9k_htc_priv *) common->priv; @@ -466,7 +466,7 @@ static void ath9k_enable_rmw_buffer(void *hw_priv) static u32 ath9k_reg_rmw_single(void *hw_priv, u32 reg_offset, u32 set, u32 clr) { - struct ath_hw *ah = (struct ath_hw *) hw_priv; + struct ath_hw *ah = hw_priv; struct ath_common *common = ath9k_hw_common(ah); struct ath9k_htc_priv *priv = (struct ath9k_htc_priv *) common->priv; struct register_rmw buf, buf_ret; @@ -490,7 +490,7 @@ static u32 ath9k_reg_rmw_single(void *hw_priv, static u32 ath9k_reg_rmw(void *hw_priv, u32 reg_offset, u32 set, u32 clr) { - struct ath_hw *ah = (struct ath_hw *) hw_priv; + struct ath_hw *ah = hw_priv; struct ath_common *common = ath9k_hw_common(ah); struct ath9k_htc_priv *priv = (struct ath9k_htc_priv *) common->priv; diff --git a/drivers/net/wireless/ath/ath9k/htc_drv_main.c b/drivers/net/wireless/ath/ath9k/htc_drv_main.c index a553c91d41a1..f808e5833d7e 100644 --- a/drivers/net/wireless/ath/ath9k/htc_drv_main.c +++ b/drivers/net/wireless/ath/ath9k/htc_drv_main.c @@ -1483,7 +1483,7 @@ static void ath9k_htc_set_bssid(struct ath9k_htc_priv *priv) static void ath9k_htc_bss_iter(void *data, u8 *mac, struct ieee80211_vif *vif) { - struct ath9k_htc_priv *priv = (struct ath9k_htc_priv *)data; + struct ath9k_htc_priv *priv = data; struct ath_common *common = ath9k_hw_common(priv->ah); struct ieee80211_bss_conf *bss_conf = &vif->bss_conf; diff --git a/drivers/net/wireless/ath/ath9k/htc_drv_txrx.c b/drivers/net/wireless/ath/ath9k/htc_drv_txrx.c index b38a586ea59a..2682da02da54 100644 --- a/drivers/net/wireless/ath/ath9k/htc_drv_txrx.c +++ b/drivers/net/wireless/ath/ath9k/htc_drv_txrx.c @@ -641,7 +641,7 @@ static struct sk_buff* ath9k_htc_tx_get_packet(struct ath9k_htc_priv *priv, void ath9k_htc_txstatus(struct ath9k_htc_priv *priv, void *wmi_event) { - struct wmi_event_txstatus *txs = (struct wmi_event_txstatus *)wmi_event; + struct wmi_event_txstatus *txs = wmi_event; struct __wmi_event_txstatus *__txs; struct sk_buff *skb; struct ath9k_htc_tx_event *tx_pend; @@ -684,7 +684,7 @@ void ath9k_htc_txstatus(struct ath9k_htc_priv *priv, void *wmi_event) void ath9k_htc_txep(void *drv_priv, struct sk_buff *skb, enum htc_endpoint_id ep_id, bool txok) { - struct ath9k_htc_priv *priv = (struct ath9k_htc_priv *) drv_priv; + struct ath9k_htc_priv *priv = drv_priv; struct ath9k_htc_tx_ctl *tx_ctl; struct sk_buff_head *epid_queue; @@ -1103,7 +1103,7 @@ requeue: void ath9k_htc_rxep(void *drv_priv, struct sk_buff *skb, enum htc_endpoint_id ep_id) { - struct ath9k_htc_priv *priv = (struct ath9k_htc_priv *)drv_priv; + struct ath9k_htc_priv *priv = drv_priv; struct ath_hw *ah = priv->ah; struct ath_common *common = ath9k_hw_common(ah); struct ath9k_htc_rxbuf *rxbuf = NULL, *tmp_buf = NULL; diff --git a/drivers/net/wireless/ath/ath9k/init.c b/drivers/net/wireless/ath/ath9k/init.c index bb7936090b91..4bc403620cf9 100644 --- a/drivers/net/wireless/ath/ath9k/init.c +++ b/drivers/net/wireless/ath/ath9k/init.c @@ -117,7 +117,7 @@ static const struct ath_ps_ops ath9k_ps_ops = { static void ath9k_iowrite32(void *hw_priv, u32 val, u32 reg_offset) { - struct ath_hw *ah = (struct ath_hw *) hw_priv; + struct ath_hw *ah = hw_priv; struct ath_common *common = ath9k_hw_common(ah); struct ath_softc *sc = (struct ath_softc *) common->priv; @@ -132,7 +132,7 @@ static void ath9k_iowrite32(void *hw_priv, u32 val, u32 reg_offset) static unsigned int ath9k_ioread32(void *hw_priv, u32 reg_offset) { - struct ath_hw *ah = (struct ath_hw *) hw_priv; + struct ath_hw *ah = hw_priv; struct ath_common *common = ath9k_hw_common(ah); struct ath_softc *sc = (struct ath_softc *) common->priv; u32 val; @@ -172,7 +172,7 @@ static unsigned int __ath9k_reg_rmw(struct ath_softc *sc, u32 reg_offset, static unsigned int ath9k_reg_rmw(void *hw_priv, u32 reg_offset, u32 set, u32 clr) { - struct ath_hw *ah = (struct ath_hw *) hw_priv; + struct ath_hw *ah = hw_priv; struct ath_common *common = ath9k_hw_common(ah); struct ath_softc *sc = (struct ath_softc *) common->priv; unsigned long uninitialized_var(flags); @@ -275,7 +275,7 @@ int ath_descdma_setup(struct ath_softc *sc, struct ath_descdma *dd, if (!dd->dd_desc) return -ENOMEM; - ds = (u8 *) dd->dd_desc; + ds = dd->dd_desc; ath_dbg(common, CONFIG, "%s DMA map: %p (%u) -> %llx (%u)\n", name, ds, (u32) dd->dd_desc_len, ito64(dd->dd_desc_paddr), /*XXX*/(u32) dd->dd_desc_len); diff --git a/drivers/net/wireless/ath/ath9k/main.c b/drivers/net/wireless/ath/ath9k/main.c index 8b4ac7f0a09b..9c24bc077e6b 100644 --- a/drivers/net/wireless/ath/ath9k/main.c +++ b/drivers/net/wireless/ath/ath9k/main.c @@ -1193,7 +1193,7 @@ void ath9k_calculate_summary_state(struct ath_softc *sc, static void ath9k_tpc_vif_iter(void *data, u8 *mac, struct ieee80211_vif *vif) { - int *power = (int *)data; + int *power = data; if (*power < vif->bss_conf.txpower) *power = vif->bss_conf.txpower; diff --git a/drivers/net/wireless/ath/ath9k/mci.c b/drivers/net/wireless/ath/ath9k/mci.c index cf23fd815211..39d46c203f6b 100644 --- a/drivers/net/wireless/ath/ath9k/mci.c +++ b/drivers/net/wireless/ath/ath9k/mci.c @@ -453,7 +453,7 @@ int ath_mci_setup(struct ath_softc *sc) mci->sched_buf.bf_len = ATH_MCI_SCHED_BUF_SIZE; mci->gpm_buf.bf_len = ATH_MCI_GPM_BUF_SIZE; - mci->gpm_buf.bf_addr = (u8 *)mci->sched_buf.bf_addr + mci->sched_buf.bf_len; + mci->gpm_buf.bf_addr = mci->sched_buf.bf_addr + mci->sched_buf.bf_len; mci->gpm_buf.bf_paddr = mci->sched_buf.bf_paddr + mci->sched_buf.bf_len; ret = ar9003_mci_setup(sc->sc_ah, mci->gpm_buf.bf_paddr, diff --git a/drivers/net/wireless/ath/ath9k/wmi.c b/drivers/net/wireless/ath/ath9k/wmi.c index 64a354fa78ab..b0b5579b7560 100644 --- a/drivers/net/wireless/ath/ath9k/wmi.c +++ b/drivers/net/wireless/ath/ath9k/wmi.c @@ -159,7 +159,7 @@ void ath9k_wmi_event_tasklet(unsigned long data) switch (cmd_id) { case WMI_SWBA_EVENTID: - swba = (struct wmi_event_swba *) wmi_event; + swba = wmi_event; ath9k_htc_swba(priv, swba); break; case WMI_FATAL_EVENTID: @@ -207,7 +207,7 @@ static void ath9k_wmi_rsp_callback(struct wmi *wmi, struct sk_buff *skb) static void ath9k_wmi_ctrl_rx(void *priv, struct sk_buff *skb, enum htc_endpoint_id epid) { - struct wmi *wmi = (struct wmi *) priv; + struct wmi *wmi = priv; struct wmi_cmd_hdr *hdr; u16 cmd_id; -- cgit v1.2.3 From 896cbefadf62b431cba469089cc3bd5ecfe69ed5 Mon Sep 17 00:00:00 2001 From: Thomas Meyer Date: Sun, 3 Sep 2017 14:19:31 +0200 Subject: ath9k: Use ARRAY_SIZE macro Use ARRAY_SIZE macro, rather than explicitly coding some variant of it yourself. Found with: find -type f -name "*.c" -o -name "*.h" | xargs perl -p -i -e 's/\bsizeof\s*\(\s*(\w+)\s*\)\s*\ /\s*sizeof\s*\(\s*\1\s*\[\s*0\s*\]\s*\) /ARRAY_SIZE(\1)/g' and manual check/verification. Signed-off-by: Thomas Meyer Signed-off-by: Kalle Valo --- drivers/net/wireless/ath/ath9k/ar9003_eeprom.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/net/wireless/ath/ath9k/ar9003_eeprom.c b/drivers/net/wireless/ath/ath9k/ar9003_eeprom.c index 3dbfd86ebe36..c2e210c0a770 100644 --- a/drivers/net/wireless/ath/ath9k/ar9003_eeprom.c +++ b/drivers/net/wireless/ath/ath9k/ar9003_eeprom.c @@ -15,6 +15,7 @@ */ #include +#include #include "hw.h" #include "ar9003_phy.h" #include "ar9003_eeprom.h" @@ -2946,14 +2947,12 @@ static const struct ar9300_eeprom *ar9300_eep_templates[] = { static const struct ar9300_eeprom *ar9003_eeprom_struct_find_by_id(int id) { -#define N_LOOP (sizeof(ar9300_eep_templates) / sizeof(ar9300_eep_templates[0])) int it; - for (it = 0; it < N_LOOP; it++) + for (it = 0; it < ARRAY_SIZE(ar9300_eep_templates); it++) if (ar9300_eep_templates[it]->templateVersion == id) return ar9300_eep_templates[it]; return NULL; -#undef N_LOOP } static int ath9k_hw_ar9300_check_eeprom(struct ath_hw *ah) -- cgit v1.2.3 From 496cbf3ebb6b37f89a8db84b3ed021eea2205fba Mon Sep 17 00:00:00 2001 From: Bhumika Goyal Date: Thu, 14 Sep 2017 00:31:52 +0530 Subject: ath10k: make ath10k_hw_ce_regs const Make them const as they are not modified in the file referencing them. They are only stored in the const field 'hw_ce_reg' of an ath10k structure. Also, make the declarations in the header const. Signed-off-by: Bhumika Goyal Signed-off-by: Kalle Valo --- drivers/net/wireless/ath/ath10k/hw.c | 4 ++-- drivers/net/wireless/ath/ath10k/hw.h | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/net/wireless/ath/ath10k/hw.c b/drivers/net/wireless/ath/ath10k/hw.c index a860691d635d..07df7c6bc05b 100644 --- a/drivers/net/wireless/ath/ath10k/hw.c +++ b/drivers/net/wireless/ath/ath10k/hw.c @@ -310,7 +310,7 @@ static struct ath10k_hw_ce_dst_src_wm_regs wcn3990_wm_dst_ring = { .wm_high = &wcn3990_dst_wm_high, }; -struct ath10k_hw_ce_regs wcn3990_ce_regs = { +const struct ath10k_hw_ce_regs wcn3990_ce_regs = { .sr_base_addr = 0x00000000, .sr_size_addr = 0x00000008, .dr_base_addr = 0x0000000c, @@ -457,7 +457,7 @@ static struct ath10k_hw_ce_dst_src_wm_regs qcax_wm_dst_ring = { .wm_high = &qcax_dst_wm_high, }; -struct ath10k_hw_ce_regs qcax_ce_regs = { +const struct ath10k_hw_ce_regs qcax_ce_regs = { .sr_base_addr = 0x00000000, .sr_size_addr = 0x00000004, .dr_base_addr = 0x00000008, diff --git a/drivers/net/wireless/ath/ath10k/hw.h b/drivers/net/wireless/ath/ath10k/hw.h index 0c089f6dd3d9..f80840fae517 100644 --- a/drivers/net/wireless/ath/ath10k/hw.h +++ b/drivers/net/wireless/ath/ath10k/hw.h @@ -369,8 +369,8 @@ extern const struct ath10k_hw_values qca99x0_values; extern const struct ath10k_hw_values qca9888_values; extern const struct ath10k_hw_values qca4019_values; extern const struct ath10k_hw_values wcn3990_values; -extern struct ath10k_hw_ce_regs wcn3990_ce_regs; -extern struct ath10k_hw_ce_regs qcax_ce_regs; +extern const struct ath10k_hw_ce_regs wcn3990_ce_regs; +extern const struct ath10k_hw_ce_regs qcax_ce_regs; void ath10k_hw_fill_survey_time(struct ath10k *ar, struct survey_info *survey, u32 cc, u32 rcc, u32 cc_prev, u32 rcc_prev); -- cgit v1.2.3 From ba24d63dd3748bae134365e3bcfd9c13b4e3c3e4 Mon Sep 17 00:00:00 2001 From: Ville Syrjälä Date: Mon, 18 Sep 2017 22:59:19 +0300 Subject: ath9k: Avoid a potential deadlock MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Lockdep warns us that sc_pm_lock and cc_lock can cause a deadlock when cc_lock is acquired by itself with interrupts enabled. Disable irqs whenever taking cc_lock to avoid this. [ 19.094524] kworker/u2:0/5 just changed the state of lock: [ 19.094578] (&(&sc->sc_pm_lock)->rlock){-.-...}, at: [] ath_isr+0x15e/0x200 [ath9k] [ 19.094674] but this lock took another, HARDIRQ-unsafe lock in the past: [ 19.094731] (&(&common->cc_lock)->rlock){+.-...} [ 19.094741] and interrupts could create inverse lock ordering between them. [ 19.094866] other info that might help us debug this: [ 19.094926] Possible interrupt unsafe locking scenario: [ 19.094985] CPU0 CPU1 [ 19.095036] ---- ---- [ 19.095086] lock(&(&common->cc_lock)->rlock); [ 19.095197] local_irq_disable(); [ 19.095305] lock(&(&sc->sc_pm_lock)->rlock); [ 19.095423] lock(&(&common->cc_lock)->rlock); [ 19.095539] [ 19.095636] lock(&(&sc->sc_pm_lock)->rlock); [ 19.095745] *** DEADLOCK *** [ 19.095965] 3 locks held by kworker/u2:0/5: [ 19.096067] #0: ("%s"wiphy_name(local->hw.wiphy)){.+.+.+}, at: [] process_one_work+0x127/0x580 [ 19.096260] #1: ((&local->dynamic_ps_enable_work)){+.+...}, at: [] process_one_work+0x127/0x580 [ 19.096447] #2: (&sc->mutex){+.+...}, at: [] ath9k_config+0x30/0x1d0 [ath9k] [ 19.096639] the shortest dependencies between 2nd lock and 1st lock: [ 19.096813] -> (&(&common->cc_lock)->rlock){+.-...} ops: 38 { [ 19.096816] HARDIRQ-ON-W at: [ 19.096816] __lock_acquire+0x57e/0x1260 [ 19.096816] lock_acquire+0xb1/0x1c0 [ 19.096816] _raw_spin_lock_bh+0x3f/0x50 [ 19.096816] ath_chanctx_set_channel+0xb6/0x2c0 [ath9k] [ 19.096816] ath9k_config+0xa8/0x1d0 [ath9k] [ 19.096816] ieee80211_hw_config+0xa8/0x5f0 [mac80211] [ 19.096816] ieee80211_do_open+0x67a/0x920 [mac80211] [ 19.096816] ieee80211_open+0x41/0x50 [mac80211] [ 19.096816] __dev_open+0xab/0x140 [ 19.096816] __dev_change_flags+0x89/0x150 [ 19.096816] dev_change_flags+0x28/0x60 [ 19.096816] do_setlink+0x290/0x890 [ 19.096816] rtnl_newlink+0x7cf/0x8e0 [ 19.096816] rtnetlink_rcv_msg+0xbf/0x1f0 [ 19.096816] netlink_rcv_skb+0xb9/0xe0 [ 19.096816] rtnetlink_rcv+0x1e/0x30 [ 19.096816] netlink_unicast+0x13a/0x2c0 [ 19.096816] netlink_sendmsg+0x290/0x380 [ 19.096816] ___sys_sendmsg+0x1e2/0x280 [ 19.096816] __sys_sendmsg+0x3f/0x80 [ 19.096816] SyS_socketcall+0x58c/0x6b0 [ 19.096816] do_fast_syscall_32+0x96/0x1d0 [ 19.096816] entry_SYSENTER_32+0x4c/0x7b [ 19.096816] IN-SOFTIRQ-W at: [ 19.096816] __lock_acquire+0x55a/0x1260 [ 19.096816] lock_acquire+0xb1/0x1c0 [ 19.096816] _raw_spin_lock+0x3c/0x50 [ 19.096816] ath_ps_full_sleep+0x24/0x70 [ath9k] [ 19.096816] call_timer_fn+0xa4/0x300 [ 19.096816] run_timer_softirq+0x1b1/0x560 [ 19.096816] __do_softirq+0xb0/0x430 [ 19.096816] do_softirq_own_stack+0x33/0x40 [ 19.096816] irq_exit+0xad/0xc0 [ 19.096816] smp_apic_timer_interrupt+0x31/0x40 [ 19.096816] apic_timer_interrupt+0x37/0x3c [ 19.096816] wp_page_copy+0xb8/0x580 [ 19.096816] do_wp_page+0x64/0x420 [ 19.096816] handle_mm_fault+0x430/0x990 [ 19.096816] __do_page_fault+0x18b/0x430 [ 19.096816] do_page_fault+0xb/0x10 [ 19.096816] common_exception+0x62/0x6a [ 19.096816] INITIAL USE at: [ 19.096816] __lock_acquire+0x204/0x1260 [ 19.096816] lock_acquire+0xb1/0x1c0 [ 19.096816] _raw_spin_lock_bh+0x3f/0x50 [ 19.096816] ath_chanctx_set_channel+0xb6/0x2c0 [ath9k] [ 19.096816] ath9k_config+0xa8/0x1d0 [ath9k] [ 19.096816] ieee80211_hw_config+0xa8/0x5f0 [mac80211] [ 19.096816] ieee80211_do_open+0x67a/0x920 [mac80211] [ 19.096816] ieee80211_open+0x41/0x50 [mac80211] [ 19.096816] __dev_open+0xab/0x140 [ 19.096816] __dev_change_flags+0x89/0x150 [ 19.096816] dev_change_flags+0x28/0x60 [ 19.096816] do_setlink+0x290/0x890 [ 19.096816] rtnl_newlink+0x7cf/0x8e0 [ 19.096816] rtnetlink_rcv_msg+0xbf/0x1f0 [ 19.096816] netlink_rcv_skb+0xb9/0xe0 [ 19.096816] rtnetlink_rcv+0x1e/0x30 [ 19.096816] netlink_unicast+0x13a/0x2c0 [ 19.096816] netlink_sendmsg+0x290/0x380 [ 19.096816] ___sys_sendmsg+0x1e2/0x280 [ 19.096816] __sys_sendmsg+0x3f/0x80 [ 19.096816] SyS_socketcall+0x58c/0x6b0 [ 19.096816] do_fast_syscall_32+0x96/0x1d0 [ 19.096816] entry_SYSENTER_32+0x4c/0x7b [ 19.096816] } [ 19.096816] ... key at: [] __key.61991+0x0/0xffffc96c [ath9k] [ 19.096816] ... acquired at: [ 19.096816] lock_acquire+0xb1/0x1c0 [ 19.096816] _raw_spin_lock+0x3c/0x50 [ 19.096816] ath9k_ps_wakeup+0x85/0xe0 [ath9k] [ 19.096816] ath9k_bss_info_changed+0x2a/0x1b0 [ath9k] [ 19.096816] ieee80211_bss_info_change_notify+0xf3/0x360 [mac80211] [ 19.096816] ieee80211_recalc_txpower+0x33/0x40 [mac80211] [ 19.096816] ieee80211_set_tx_power+0x45/0x1d0 [mac80211] [ 19.096816] cfg80211_wext_siwtxpower+0xd3/0x350 [cfg80211] [ 19.096816] ioctl_standard_call+0x4e/0x400 [ 19.096816] wext_handle_ioctl+0xf4/0x190 [ 19.096816] dev_ioctl+0xb7/0x630 [ 19.096816] sock_ioctl+0x13e/0x2d0 [ 19.096816] do_vfs_ioctl+0x84/0x750 [ 19.096816] SyS_ioctl+0x34/0x60 [ 19.096816] do_fast_syscall_32+0x96/0x1d0 [ 19.096816] entry_SYSENTER_32+0x4c/0x7b [ 19.096816] -> (&(&sc->sc_pm_lock)->rlock){-.-...} ops: 597 { [ 19.096816] IN-HARDIRQ-W at: [ 19.096816] __lock_acquire+0x6ae/0x1260 [ 19.096816] lock_acquire+0xb1/0x1c0 [ 19.096816] _raw_spin_lock_irqsave+0x45/0x60 [ 19.096816] ath_isr+0x15e/0x200 [ath9k] [ 19.096816] __handle_irq_event_percpu+0x44/0x340 [ 19.096816] handle_irq_event_percpu+0x1d/0x50 [ 19.096816] handle_irq_event+0x32/0x60 [ 19.096816] handle_level_irq+0x81/0x100 [ 19.096816] handle_irq+0x9c/0xd0 [ 19.096816] do_IRQ+0x5c/0x120 [ 19.096816] common_interrupt+0x36/0x3c [ 19.096816] _raw_spin_unlock_irqrestore+0x57/0x70 [ 19.096816] ath9k_config+0x16a/0x1d0 [ath9k] [ 19.096816] ieee80211_hw_config+0xa8/0x5f0 [mac80211] [ 19.096816] ieee80211_dynamic_ps_enable_work+0x1c3/0x680 [mac80211] [ 19.096816] process_one_work+0x1d1/0x580 [ 19.096816] worker_thread+0x31/0x380 [ 19.096816] kthread+0xd9/0x110 [ 19.096816] ret_from_fork+0x19/0x24 [ 19.096816] IN-SOFTIRQ-W at: [ 19.096816] __lock_acquire+0x55a/0x1260 [ 19.096816] lock_acquire+0xb1/0x1c0 [ 19.096816] _raw_spin_lock_irqsave+0x45/0x60 [ 19.096816] ath9k_ps_wakeup+0x24/0xe0 [ath9k] [ 19.096816] ath9k_tasklet+0x42/0x260 [ath9k] [ 19.096816] tasklet_action+0x196/0x1e0 [ 19.096816] __do_softirq+0xb0/0x430 [ 19.096816] do_softirq_own_stack+0x33/0x40 [ 19.096816] irq_exit+0xad/0xc0 [ 19.096816] do_IRQ+0x65/0x120 [ 19.096816] common_interrupt+0x36/0x3c [ 19.096816] get_page_from_freelist+0x20a/0x970 [ 19.096816] __alloc_pages_nodemask+0xca/0xed0 [ 19.096816] __get_free_pages+0x14/0x30 [ 19.096816] pgd_alloc+0x1d/0x160 [ 19.096816] mm_init.isra.47+0x13a/0x1b0 [ 19.096816] copy_process.part.54+0xb55/0x1700 [ 19.096816] _do_fork+0xd4/0x6a0 [ 19.096816] SyS_clone+0x27/0x30 [ 19.096816] do_fast_syscall_32+0x96/0x1d0 [ 19.096816] entry_SYSENTER_32+0x4c/0x7b [ 19.096816] INITIAL USE at: [ 19.096816] __lock_acquire+0x204/0x1260 [ 19.096816] lock_acquire+0xb1/0x1c0 [ 19.096816] _raw_spin_lock_irqsave+0x45/0x60 [ 19.096816] ath9k_ps_wakeup+0x24/0xe0 [ath9k] [ 19.096816] ath9k_start+0x29/0x1f0 [ath9k] [ 19.096816] drv_start+0x71/0x270 [mac80211] [ 19.096816] ieee80211_do_open+0x31f/0x920 [mac80211] [ 19.096816] ieee80211_open+0x41/0x50 [mac80211] [ 19.096816] __dev_open+0xab/0x140 [ 19.096816] __dev_change_flags+0x89/0x150 [ 19.096816] dev_change_flags+0x28/0x60 [ 19.096816] do_setlink+0x290/0x890 [ 19.096816] rtnl_newlink+0x7cf/0x8e0 [ 19.096816] rtnetlink_rcv_msg+0xbf/0x1f0 [ 19.096816] netlink_rcv_skb+0xb9/0xe0 [ 19.096816] rtnetlink_rcv+0x1e/0x30 [ 19.096816] netlink_unicast+0x13a/0x2c0 [ 19.096816] netlink_sendmsg+0x290/0x380 [ 19.096816] ___sys_sendmsg+0x1e2/0x280 [ 19.096816] __sys_sendmsg+0x3f/0x80 [ 19.096816] SyS_socketcall+0x58c/0x6b0 [ 19.096816] do_fast_syscall_32+0x96/0x1d0 [ 19.096816] entry_SYSENTER_32+0x4c/0x7b [ 19.096816] } [ 19.096816] ... key at: [] __key.61994+0x0/0xffffc984 [ath9k] [ 19.096816] ... acquired at: [ 19.096816] check_usage_forwards+0x118/0x120 [ 19.096816] mark_lock+0x2e4/0x590 [ 19.096816] __lock_acquire+0x6ae/0x1260 [ 19.096816] lock_acquire+0xb1/0x1c0 [ 19.096816] _raw_spin_lock_irqsave+0x45/0x60 [ 19.096816] ath_isr+0x15e/0x200 [ath9k] [ 19.096816] __handle_irq_event_percpu+0x44/0x340 [ 19.096816] handle_irq_event_percpu+0x1d/0x50 [ 19.096816] handle_irq_event+0x32/0x60 [ 19.096816] handle_level_irq+0x81/0x100 [ 19.096816] handle_irq+0x9c/0xd0 [ 19.096816] do_IRQ+0x5c/0x120 [ 19.096816] common_interrupt+0x36/0x3c [ 19.096816] _raw_spin_unlock_irqrestore+0x57/0x70 [ 19.096816] ath9k_config+0x16a/0x1d0 [ath9k] [ 19.096816] ieee80211_hw_config+0xa8/0x5f0 [mac80211] [ 19.096816] ieee80211_dynamic_ps_enable_work+0x1c3/0x680 [mac80211] [ 19.096816] process_one_work+0x1d1/0x580 [ 19.096816] worker_thread+0x31/0x380 [ 19.096816] kthread+0xd9/0x110 [ 19.096816] ret_from_fork+0x19/0x24 [ 19.096816] stack backtrace: [ 19.096816] CPU: 0 PID: 5 Comm: kworker/u2:0 Not tainted 4.13.0-mgm-ovl+ #51 [ 19.096816] Hardware name: FUJITSU SIEMENS LIFEBOOK S6120/FJNB16C, BIOS Version 1.26 05/10/2004 [ 19.096816] Workqueue: phy0 ieee80211_dynamic_ps_enable_work [mac80211] [ 19.096816] Call Trace: [ 19.096816] [ 19.096816] dump_stack+0x16/0x19 [ 19.096816] print_irq_inversion_bug.part.37+0x16c/0x179 [ 19.096816] check_usage_forwards+0x118/0x120 [ 19.096816] ? ret_from_fork+0x19/0x24 [ 19.096816] ? print_shortest_lock_dependencies+0x1a0/0x1a0 [ 19.096816] mark_lock+0x2e4/0x590 [ 19.096816] ? print_shortest_lock_dependencies+0x1a0/0x1a0 [ 19.096816] __lock_acquire+0x6ae/0x1260 [ 19.096816] lock_acquire+0xb1/0x1c0 [ 19.096816] ? ath_isr+0x15e/0x200 [ath9k] [ 19.096816] _raw_spin_lock_irqsave+0x45/0x60 [ 19.096816] ? ath_isr+0x15e/0x200 [ath9k] [ 19.096816] ath_isr+0x15e/0x200 [ath9k] [ 19.096816] __handle_irq_event_percpu+0x44/0x340 [ 19.096816] handle_irq_event_percpu+0x1d/0x50 [ 19.096816] handle_irq_event+0x32/0x60 [ 19.096816] ? handle_nested_irq+0x100/0x100 [ 19.096816] handle_level_irq+0x81/0x100 [ 19.096816] handle_irq+0x9c/0xd0 [ 19.096816] [ 19.096816] do_IRQ+0x5c/0x120 [ 19.096816] common_interrupt+0x36/0x3c [ 19.096816] EIP: _raw_spin_unlock_irqrestore+0x57/0x70 [ 19.096816] EFLAGS: 00000286 CPU: 0 [ 19.096816] EAX: f60a3600 EBX: 00000286 ECX: 00000006 EDX: 00000001 [ 19.096816] ESI: f46c9e68 EDI: f46c8620 EBP: f60b5e8c ESP: f60b5e84 [ 19.096816] DS: 007b ES: 007b FS: 0000 GS: 0000 SS: 0068 [ 19.096816] ath9k_config+0x16a/0x1d0 [ath9k] [ 19.096816] ieee80211_hw_config+0xa8/0x5f0 [mac80211] [ 19.096816] ? ieee80211_hw_config+0x1db/0x5f0 [mac80211] [ 19.096816] ieee80211_dynamic_ps_enable_work+0x1c3/0x680 [mac80211] [ 19.096816] ? process_one_work+0x127/0x580 [ 19.096816] ? process_one_work+0x127/0x580 [ 19.096816] process_one_work+0x1d1/0x580 [ 19.096816] ? process_one_work+0x127/0x580 [ 19.096816] worker_thread+0x31/0x380 [ 19.096816] kthread+0xd9/0x110 [ 19.096816] ? process_one_work+0x580/0x580 [ 19.096816] ? kthread_create_on_node+0x30/0x30 [ 19.096816] ret_from_fork+0x19/0x24 Cc: QCA ath9k Development Cc: Kalle Valo Cc: netdev@vger.kernel.org Signed-off-by: Ville Syrjälä Signed-off-by: Kalle Valo --- drivers/net/wireless/ath/ath9k/channel.c | 5 +++-- drivers/net/wireless/ath/ath9k/link.c | 4 ++-- drivers/net/wireless/ath/ath9k/main.c | 16 +++++++++------- 3 files changed, 14 insertions(+), 11 deletions(-) diff --git a/drivers/net/wireless/ath/ath9k/channel.c b/drivers/net/wireless/ath/ath9k/channel.c index f0439f2d566b..fad020aa222e 100644 --- a/drivers/net/wireless/ath/ath9k/channel.c +++ b/drivers/net/wireless/ath/ath9k/channel.c @@ -29,6 +29,7 @@ static int ath_set_channel(struct ath_softc *sc) struct cfg80211_chan_def *chandef = &sc->cur_chan->chandef; struct ieee80211_channel *chan = chandef->chan; int pos = chan->hw_value; + unsigned long flags; int old_pos = -1; int r; @@ -42,9 +43,9 @@ static int ath_set_channel(struct ath_softc *sc) chan->center_freq, chandef->width); /* update survey stats for the old channel before switching */ - spin_lock_bh(&common->cc_lock); + spin_lock_irqsave(&common->cc_lock, flags); ath_update_survey_stats(sc); - spin_unlock_bh(&common->cc_lock); + spin_unlock_irqrestore(&common->cc_lock, flags); ath9k_cmn_get_channel(hw, ah, chandef); diff --git a/drivers/net/wireless/ath/ath9k/link.c b/drivers/net/wireless/ath/ath9k/link.c index 27c50562dc47..3f4f01c829f0 100644 --- a/drivers/net/wireless/ath/ath9k/link.c +++ b/drivers/net/wireless/ath/ath9k/link.c @@ -367,10 +367,10 @@ void ath_ani_calibrate(unsigned long data) /* Call ANI routine if necessary */ if (aniflag) { - spin_lock(&common->cc_lock); + spin_lock_irqsave(&common->cc_lock, flags); ath9k_hw_ani_monitor(ah, ah->curchan); ath_update_survey_stats(sc); - spin_unlock(&common->cc_lock); + spin_unlock_irqrestore(&common->cc_lock, flags); } /* Perform calibration if necessary */ diff --git a/drivers/net/wireless/ath/ath9k/main.c b/drivers/net/wireless/ath/ath9k/main.c index 9c24bc077e6b..918773a9231b 100644 --- a/drivers/net/wireless/ath/ath9k/main.c +++ b/drivers/net/wireless/ath/ath9k/main.c @@ -97,11 +97,12 @@ void ath_ps_full_sleep(unsigned long data) { struct ath_softc *sc = (struct ath_softc *) data; struct ath_common *common = ath9k_hw_common(sc->sc_ah); + unsigned long flags; bool reset; - spin_lock(&common->cc_lock); + spin_lock_irqsave(&common->cc_lock, flags); ath_hw_cycle_counters_update(common); - spin_unlock(&common->cc_lock); + spin_unlock_irqrestore(&common->cc_lock, flags); ath9k_hw_setrxabort(sc->sc_ah, 1); ath9k_hw_stopdmarecv(sc->sc_ah, &reset); @@ -394,10 +395,10 @@ void ath9k_tasklet(unsigned long data) if ((ah->config.hw_hang_checks & HW_BB_WATCHDOG) && (status & ATH9K_INT_BB_WATCHDOG)) { - spin_lock(&common->cc_lock); + spin_lock_irqsave(&common->cc_lock, flags); ath_hw_cycle_counters_update(common); ar9003_hw_bb_watchdog_dbg_info(ah); - spin_unlock(&common->cc_lock); + spin_unlock_irqrestore(&common->cc_lock, flags); if (ar9003_hw_bb_watchdog_check(ah)) { type = RESET_TYPE_BB_WATCHDOG; @@ -1955,12 +1956,13 @@ static int ath9k_get_survey(struct ieee80211_hw *hw, int idx, struct ath_common *common = ath9k_hw_common(sc->sc_ah); struct ieee80211_supported_band *sband; struct ieee80211_channel *chan; + unsigned long flags; int pos; if (IS_ENABLED(CONFIG_ATH9K_TX99)) return -EOPNOTSUPP; - spin_lock_bh(&common->cc_lock); + spin_lock_irqsave(&common->cc_lock, flags); if (idx == 0) ath_update_survey_stats(sc); @@ -1974,7 +1976,7 @@ static int ath9k_get_survey(struct ieee80211_hw *hw, int idx, sband = hw->wiphy->bands[NL80211_BAND_5GHZ]; if (!sband || idx >= sband->n_channels) { - spin_unlock_bh(&common->cc_lock); + spin_unlock_irqrestore(&common->cc_lock, flags); return -ENOENT; } @@ -1982,7 +1984,7 @@ static int ath9k_get_survey(struct ieee80211_hw *hw, int idx, pos = chan->hw_value; memcpy(survey, &sc->survey[pos], sizeof(*survey)); survey->channel = chan; - spin_unlock_bh(&common->cc_lock); + spin_unlock_irqrestore(&common->cc_lock, flags); return 0; } -- cgit v1.2.3 From fd52bdae9ab06b4f444b77d833f403ad07b9e72e Mon Sep 17 00:00:00 2001 From: Loic Poulain Date: Tue, 19 Sep 2017 14:33:23 +0200 Subject: wcn36xx: Disable 5GHz for wcn3620 wcn3620 can only operate on 2.4GHz band due to RF limitation. If wcn36xx digital block is associated with an external IRIS RF module, retrieve the id and disable 5GHz band in case of wcn3620 id. Signed-off-by: Loic Poulain Signed-off-by: Kalle Valo --- drivers/net/wireless/ath/wcn36xx/main.c | 12 +++++++++++- drivers/net/wireless/ath/wcn36xx/wcn36xx.h | 6 ++++++ 2 files changed, 17 insertions(+), 1 deletion(-) diff --git a/drivers/net/wireless/ath/wcn36xx/main.c b/drivers/net/wireless/ath/wcn36xx/main.c index 35bd50bcbbd5..fc4130829489 100644 --- a/drivers/net/wireless/ath/wcn36xx/main.c +++ b/drivers/net/wireless/ath/wcn36xx/main.c @@ -1136,7 +1136,8 @@ static int wcn36xx_init_ieee80211(struct wcn36xx *wcn) BIT(NL80211_IFTYPE_MESH_POINT); wcn->hw->wiphy->bands[NL80211_BAND_2GHZ] = &wcn_band_2ghz; - wcn->hw->wiphy->bands[NL80211_BAND_5GHZ] = &wcn_band_5ghz; + if (wcn->rf_id != RF_IRIS_WCN3620) + wcn->hw->wiphy->bands[NL80211_BAND_5GHZ] = &wcn_band_5ghz; wcn->hw->wiphy->max_scan_ssids = WCN36XX_MAX_SCAN_SSIDS; wcn->hw->wiphy->max_scan_ie_len = WCN36XX_MAX_SCAN_IE_LEN; @@ -1169,6 +1170,7 @@ static int wcn36xx_platform_get_resources(struct wcn36xx *wcn, struct platform_device *pdev) { struct device_node *mmio_node; + struct device_node *iris_node; struct resource *res; int index; int ret; @@ -1231,6 +1233,14 @@ static int wcn36xx_platform_get_resources(struct wcn36xx *wcn, goto unmap_ccu; } + /* External RF module */ + iris_node = of_find_node_by_name(mmio_node, "iris"); + if (iris_node) { + if (of_device_is_compatible(iris_node, "qcom,wcn3620")) + wcn->rf_id = RF_IRIS_WCN3620; + of_node_put(iris_node); + } + of_node_put(mmio_node); return 0; diff --git a/drivers/net/wireless/ath/wcn36xx/wcn36xx.h b/drivers/net/wireless/ath/wcn36xx/wcn36xx.h index 6aefba4c0cda..81017e6703b4 100644 --- a/drivers/net/wireless/ath/wcn36xx/wcn36xx.h +++ b/drivers/net/wireless/ath/wcn36xx/wcn36xx.h @@ -94,6 +94,9 @@ enum wcn36xx_ampdu_state { #define WCN36XX_FLAGS(__wcn) (__wcn->hw->flags) #define WCN36XX_MAX_POWER(__wcn) (__wcn->hw->conf.chandef.chan->max_power) +#define RF_UNKNOWN 0x0000 +#define RF_IRIS_WCN3620 0x3620 + static inline void buff_to_be(u32 *buf, size_t len) { int i; @@ -241,6 +244,9 @@ struct wcn36xx { struct sk_buff *tx_ack_skb; + /* RF module */ + unsigned rf_id; + #ifdef CONFIG_WCN36XX_DEBUGFS /* Debug file system entry */ struct wcn36xx_dfs_entry dfs; -- cgit v1.2.3 From 30ac407639390fab45b003003619e430c6f9f728 Mon Sep 17 00:00:00 2001 From: Allen Pais Date: Thu, 21 Sep 2017 18:13:06 +0530 Subject: brcmfmac: use setup_timer() helper Use setup_timer function instead of initializing timer with the function and data fields. Signed-off-by: Allen Pais Signed-off-by: Kalle Valo --- drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.c index 613caca7dc02..5adce0e36fe5 100644 --- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.c +++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.c @@ -4144,10 +4144,8 @@ struct brcmf_sdio *brcmf_sdio_probe(struct brcmf_sdio_dev *sdiodev) init_waitqueue_head(&bus->dcmd_resp_wait); /* Set up the watchdog timer */ - init_timer(&bus->timer); - bus->timer.data = (unsigned long)bus; - bus->timer.function = brcmf_sdio_watchdog; - + setup_timer(&bus->timer, brcmf_sdio_watchdog, + (unsigned long)bus); /* Initialize watchdog thread */ init_completion(&bus->watchdog_wait); bus->watchdog_tsk = kthread_run(brcmf_sdio_watchdog_thread, -- cgit v1.2.3 From d5633bb2c62a95be5f049ee1e7b9ae0869a93c41 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Fri, 22 Sep 2017 15:03:16 +0100 Subject: brcmsmac: make const array ucode_ofdm_rates static, reduces object code size Don't populate const array ucode_ofdm_rates on the stack, instead make it static. Makes the object code smaller by 100 bytes: Before: text data bss dec hex filename 39482 564 0 40046 9c6e phy_cmn.o After text data bss dec hex filename 39326 620 0 39946 9c0a phy_cmn.o (gcc 6.3.0, x86-64) Signed-off-by: Colin Ian King Acked-by: Arend van Spriel Signed-off-by: Kalle Valo --- drivers/net/wireless/broadcom/brcm80211/brcmsmac/phy/phy_cmn.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmsmac/phy/phy_cmn.c b/drivers/net/wireless/broadcom/brcm80211/brcmsmac/phy/phy_cmn.c index 1c4e9dd57960..3a13d176b221 100644 --- a/drivers/net/wireless/broadcom/brcm80211/brcmsmac/phy/phy_cmn.c +++ b/drivers/net/wireless/broadcom/brcm80211/brcmsmac/phy/phy_cmn.c @@ -1916,7 +1916,7 @@ void wlc_phy_txpower_update_shm(struct brcms_phy *pi) pi->hwpwr_txcur); for (j = TXP_FIRST_OFDM; j <= TXP_LAST_OFDM; j++) { - const u8 ucode_ofdm_rates[] = { + static const u8 ucode_ofdm_rates[] = { 0x0c, 0x12, 0x18, 0x24, 0x30, 0x48, 0x60, 0x6c }; offset = wlapi_bmac_rate_shm_offset( -- cgit v1.2.3 From e0a576d7478230aaf51c479db57a2c5ce68e2575 Mon Sep 17 00:00:00 2001 From: Adam Borowski Date: Fri, 8 Sep 2017 12:30:00 +0200 Subject: rtl8xxxu: Don't printk raw binary if serial number is not burned in. I assume that a blank efuse comes with all ones, thus I did not bother recognizing other possible junk values. This matches 100% of dongles I've seen (a single Gembird 8192eu). Signed-off-by: Adam Borowski Signed-off-by: Kalle Valo --- drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_8192e.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_8192e.c b/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_8192e.c index 80fee699f58a..38b2ba1ac6f8 100644 --- a/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_8192e.c +++ b/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_8192e.c @@ -614,7 +614,10 @@ static int rtl8192eu_parse_efuse(struct rtl8xxxu_priv *priv) dev_info(&priv->udev->dev, "Vendor: %.7s\n", efuse->vendor_name); dev_info(&priv->udev->dev, "Product: %.11s\n", efuse->device_name); - dev_info(&priv->udev->dev, "Serial: %.11s\n", efuse->serial); + if (memchr_inv(efuse->serial, 0xff, 11)) + dev_info(&priv->udev->dev, "Serial: %.11s\n", efuse->serial); + else + dev_info(&priv->udev->dev, "Serial not available.\n"); if (rtl8xxxu_debug & RTL8XXXU_DEBUG_EFUSE) { unsigned char *raw = priv->efuse_wifi.raw; -- cgit v1.2.3 From 7dfb0ebd022b15dcafd513ae9270599799764e50 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Tue, 19 Sep 2017 22:05:00 +0100 Subject: mwifiex: make const array tos_to_ac static, reduces object code size Don't populate the read-only const array tos_to_ac on the stack, instead make it static. Makes the object code smaller by 250 bytes: Before: text data bss dec hex filename 26104 2720 128 28952 7118 wmm.o After: text data bss dec hex filename 25758 2816 128 28702 701e wmm.o Signed-off-by: Colin Ian King Signed-off-by: Kalle Valo --- drivers/net/wireless/marvell/mwifiex/wmm.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/wireless/marvell/mwifiex/wmm.c b/drivers/net/wireless/marvell/mwifiex/wmm.c index 0edd26881321..936a0a841af8 100644 --- a/drivers/net/wireless/marvell/mwifiex/wmm.c +++ b/drivers/net/wireless/marvell/mwifiex/wmm.c @@ -359,7 +359,8 @@ static enum mwifiex_wmm_ac_e mwifiex_wmm_convert_tos_to_ac(struct mwifiex_adapter *adapter, u32 tos) { /* Map of TOS UP values to WMM AC */ - const enum mwifiex_wmm_ac_e tos_to_ac[] = { WMM_AC_BE, + static const enum mwifiex_wmm_ac_e tos_to_ac[] = { + WMM_AC_BE, WMM_AC_BK, WMM_AC_BK, WMM_AC_BE, -- cgit v1.2.3 From 192524a4992a8e638a05147f02d6e42cb2d485e1 Mon Sep 17 00:00:00 2001 From: Pavani Muthyala Date: Thu, 21 Sep 2017 18:20:34 +0530 Subject: rsi: add version information We will dump information about firmware version, firmware file name and operating mode during initialization. Signed-off-by: Pavani Muthyala Signed-off-by: Amitkumar Karwar Signed-off-by: Kalle Valo --- drivers/net/wireless/rsi/rsi_91x_debugfs.c | 19 ++++++------------- drivers/net/wireless/rsi/rsi_91x_hal.c | 13 +++++++++++++ drivers/net/wireless/rsi/rsi_91x_main.c | 25 +++++++++++++++++++++++++ drivers/net/wireless/rsi/rsi_hal.h | 3 +++ drivers/net/wireless/rsi/rsi_main.h | 14 ++++++++++---- 5 files changed, 57 insertions(+), 17 deletions(-) diff --git a/drivers/net/wireless/rsi/rsi_91x_debugfs.c b/drivers/net/wireless/rsi/rsi_91x_debugfs.c index e98eb55c26cc..8c6ca8e689e4 100644 --- a/drivers/net/wireless/rsi/rsi_91x_debugfs.c +++ b/drivers/net/wireless/rsi/rsi_91x_debugfs.c @@ -83,19 +83,12 @@ static int rsi_version_read(struct seq_file *seq, void *data) { struct rsi_common *common = seq->private; - common->driver_ver.major = 0; - common->driver_ver.minor = 1; - common->driver_ver.release_num = 0; - common->driver_ver.patch_num = 0; - seq_printf(seq, "Driver : %x.%d.%d.%d\nLMAC : %d.%d.%d.%d\n", - common->driver_ver.major, - common->driver_ver.minor, - common->driver_ver.release_num, - common->driver_ver.patch_num, - common->fw_ver.major, - common->fw_ver.minor, - common->fw_ver.release_num, - common->fw_ver.patch_num); + seq_printf(seq, "LMAC : %d.%d.%d.%d\n", + common->lmac_ver.major, + common->lmac_ver.minor, + common->lmac_ver.release_num, + common->lmac_ver.patch_num); + return 0; } diff --git a/drivers/net/wireless/rsi/rsi_91x_hal.c b/drivers/net/wireless/rsi/rsi_91x_hal.c index 7e8e5d4f5f3d..71b02add81c8 100644 --- a/drivers/net/wireless/rsi/rsi_91x_hal.c +++ b/drivers/net/wireless/rsi/rsi_91x_hal.c @@ -769,6 +769,7 @@ static int auto_fw_upgrade(struct rsi_hw *adapter, u8 *flash_content, static int rsi_load_firmware(struct rsi_hw *adapter) { + struct rsi_common *common = adapter->priv; struct rsi_host_intf_ops *hif_ops = adapter->host_intf_ops; const struct firmware *fw_entry = NULL; u32 regout_val = 0, content_size; @@ -844,6 +845,18 @@ static int rsi_load_firmware(struct rsi_hw *adapter) content_size = fw_entry->size; rsi_dbg(INFO_ZONE, "FW Length = %d bytes\n", content_size); + /* Get the firmware version */ + common->lmac_ver.ver.info.fw_ver[0] = + flash_content[LMAC_VER_OFFSET] & 0xFF; + common->lmac_ver.ver.info.fw_ver[1] = + flash_content[LMAC_VER_OFFSET + 1] & 0xFF; + common->lmac_ver.major = flash_content[LMAC_VER_OFFSET + 2] & 0xFF; + common->lmac_ver.release_num = + flash_content[LMAC_VER_OFFSET + 3] & 0xFF; + common->lmac_ver.minor = flash_content[LMAC_VER_OFFSET + 4] & 0xFF; + common->lmac_ver.patch_num = 0; + rsi_print_version(common); + status = bl_write_header(adapter, flash_content, content_size); if (status) { rsi_dbg(ERR_ZONE, diff --git a/drivers/net/wireless/rsi/rsi_91x_main.c b/drivers/net/wireless/rsi/rsi_91x_main.c index b57bfdcf3549..71b8cfb8252e 100644 --- a/drivers/net/wireless/rsi/rsi_91x_main.c +++ b/drivers/net/wireless/rsi/rsi_91x_main.c @@ -20,6 +20,7 @@ #include #include "rsi_mgmt.h" #include "rsi_common.h" +#include "rsi_hal.h" u32 rsi_zone_enabled = /* INFO_ZONE | INIT_ZONE | @@ -56,6 +57,30 @@ void rsi_dbg(u32 zone, const char *fmt, ...) } EXPORT_SYMBOL_GPL(rsi_dbg); +static char *opmode_str(int oper_mode) +{ + switch (oper_mode) { + case RSI_DEV_OPMODE_WIFI_ALONE: + return "Wi-Fi alone"; + } + + return "Unknown"; +} + +void rsi_print_version(struct rsi_common *common) +{ + rsi_dbg(ERR_ZONE, "================================================\n"); + rsi_dbg(ERR_ZONE, "================ RSI Version Info ==============\n"); + rsi_dbg(ERR_ZONE, "================================================\n"); + rsi_dbg(ERR_ZONE, "FW Version\t: %d.%d.%d\n", + common->lmac_ver.major, common->lmac_ver.minor, + common->lmac_ver.release_num); + rsi_dbg(ERR_ZONE, "Operating mode\t: %d [%s]", + common->oper_mode, opmode_str(common->oper_mode)); + rsi_dbg(ERR_ZONE, "Firmware file\t: %s", common->priv->fw_file_name); + rsi_dbg(ERR_ZONE, "================================================\n"); +} + /** * rsi_prepare_skb() - This function prepares the skb. * @common: Pointer to the driver private structure. diff --git a/drivers/net/wireless/rsi/rsi_hal.h b/drivers/net/wireless/rsi/rsi_hal.h index ad0d6537a678..a09d36b6b765 100644 --- a/drivers/net/wireless/rsi/rsi_hal.h +++ b/drivers/net/wireless/rsi/rsi_hal.h @@ -101,6 +101,9 @@ #define BBP_INFO_40MHZ 0x6 +#define FW_FLASH_OFFSET 0x820 +#define LMAC_VER_OFFSET (FW_FLASH_OFFSET + 0x200) + struct bl_header { __le32 flags; __le32 image_no; diff --git a/drivers/net/wireless/rsi/rsi_main.h b/drivers/net/wireless/rsi/rsi_main.h index 34089ab111aa..a118b7aaeca0 100644 --- a/drivers/net/wireless/rsi/rsi_main.h +++ b/drivers/net/wireless/rsi/rsi_main.h @@ -113,8 +113,13 @@ extern __printf(2, 3) void rsi_dbg(u32 zone, const char *fmt, ...); struct version_info { u16 major; u16 minor; - u16 release_num; - u16 patch_num; + u8 release_num; + u8 patch_num; + union { + struct { + u8 fw_ver[8]; + } info; + } ver; } __packed; struct skb_info { @@ -199,8 +204,7 @@ struct rsi_common { struct vif_priv vif_info[RSI_MAX_VIFS]; bool mgmt_q_block; - struct version_info driver_ver; - struct version_info fw_ver; + struct version_info lmac_ver; struct rsi_thread tx_thread; struct sk_buff_head tx_queue[NUM_EDCA_QUEUES + 2]; @@ -334,6 +338,8 @@ struct rsi_hw { int (*determine_event_timeout)(struct rsi_hw *adapter); }; +void rsi_print_version(struct rsi_common *common); + struct rsi_host_intf_ops { int (*read_pkt)(struct rsi_hw *adapter, u8 *pkt, u32 len); int (*write_pkt)(struct rsi_hw *adapter, u8 *pkt, u32 len); -- cgit v1.2.3 From 20db073327365f41e9b14feacb450df06758b520 Mon Sep 17 00:00:00 2001 From: Karun Eagalapati Date: Thu, 21 Sep 2017 18:21:28 +0530 Subject: rsi: sdio suspend and resume support SDIO suspend and resume handlers are implemented and verified that device works after suspend/resume cycle. Signed-off-by: Karun Eagalapati Signed-off-by: Amitkumar Karwar Signed-off-by: Kalle Valo --- drivers/net/wireless/rsi/rsi_91x_sdio.c | 128 +++++++++++++++++++++++++++++++- drivers/net/wireless/rsi/rsi_sdio.h | 2 + 2 files changed, 126 insertions(+), 4 deletions(-) diff --git a/drivers/net/wireless/rsi/rsi_91x_sdio.c b/drivers/net/wireless/rsi/rsi_91x_sdio.c index 8d3a4839b6ef..b3f8006c0e9b 100644 --- a/drivers/net/wireless/rsi/rsi_91x_sdio.c +++ b/drivers/net/wireless/rsi/rsi_91x_sdio.c @@ -1059,16 +1059,136 @@ static void rsi_disconnect(struct sdio_func *pfunction) } #ifdef CONFIG_PM +static int rsi_set_sdio_pm_caps(struct rsi_hw *adapter) +{ + struct rsi_91x_sdiodev *dev = + (struct rsi_91x_sdiodev *)adapter->rsi_dev; + struct sdio_func *func = dev->pfunction; + int ret; + + ret = sdio_set_host_pm_flags(func, MMC_PM_KEEP_POWER); + if (ret) + rsi_dbg(ERR_ZONE, "Set sdio keep pwr flag failed: %d\n", ret); + + return ret; +} + +static int rsi_sdio_disable_interrupts(struct sdio_func *pfunc) +{ + struct rsi_hw *adapter = sdio_get_drvdata(pfunc); + u8 isr_status = 0, data = 0; + int ret; + unsigned long t1; + + rsi_dbg(INFO_ZONE, "Waiting for interrupts to be cleared.."); + t1 = jiffies; + do { + rsi_sdio_read_register(adapter, RSI_FN1_INT_REGISTER, + &isr_status); + rsi_dbg(INFO_ZONE, "."); + } while ((isr_status) && (jiffies_to_msecs(jiffies - t1) < 20)); + rsi_dbg(INFO_ZONE, "Interrupts cleared\n"); + + sdio_claim_host(pfunc); + ret = rsi_cmd52readbyte(pfunc->card, RSI_INT_ENABLE_REGISTER, &data); + if (ret < 0) { + rsi_dbg(ERR_ZONE, + "%s: Failed to read int enable register\n", + __func__); + goto done; + } + + data &= RSI_INT_ENABLE_MASK; + ret = rsi_cmd52writebyte(pfunc->card, RSI_INT_ENABLE_REGISTER, data); + if (ret < 0) { + rsi_dbg(ERR_ZONE, + "%s: Failed to write to int enable register\n", + __func__); + goto done; + } + ret = rsi_cmd52readbyte(pfunc->card, RSI_INT_ENABLE_REGISTER, &data); + if (ret < 0) { + rsi_dbg(ERR_ZONE, + "%s: Failed to read int enable register\n", + __func__); + goto done; + } + rsi_dbg(INFO_ZONE, "int enable reg content = %x\n", data); + +done: + sdio_release_host(pfunc); + return ret; +} + +static int rsi_sdio_enable_interrupts(struct sdio_func *pfunc) +{ + u8 data; + int ret; + + sdio_claim_host(pfunc); + ret = rsi_cmd52readbyte(pfunc->card, RSI_INT_ENABLE_REGISTER, &data); + if (ret < 0) { + rsi_dbg(ERR_ZONE, + "%s: Failed to read int enable register\n", __func__); + goto done; + } + + data |= ~RSI_INT_ENABLE_MASK & 0xff; + + ret = rsi_cmd52writebyte(pfunc->card, RSI_INT_ENABLE_REGISTER, data); + if (ret < 0) { + rsi_dbg(ERR_ZONE, + "%s: Failed to write to int enable register\n", + __func__); + goto done; + } + + ret = rsi_cmd52readbyte(pfunc->card, RSI_INT_ENABLE_REGISTER, &data); + if (ret < 0) { + rsi_dbg(ERR_ZONE, + "%s: Failed to read int enable register\n", __func__); + goto done; + } + rsi_dbg(INFO_ZONE, "int enable reg content = %x\n", data); + +done: + sdio_release_host(pfunc); + return ret; +} + static int rsi_suspend(struct device *dev) { - /* Not yet implemented */ - return -ENOSYS; + int ret; + struct sdio_func *pfunction = dev_to_sdio_func(dev); + struct rsi_hw *adapter = sdio_get_drvdata(pfunction); + struct rsi_common *common; + + if (!adapter) { + rsi_dbg(ERR_ZONE, "Device is not ready\n"); + return -ENODEV; + } + common = adapter->priv; + rsi_sdio_disable_interrupts(pfunction); + + ret = rsi_set_sdio_pm_caps(adapter); + if (ret) + rsi_dbg(INFO_ZONE, + "Setting power management caps failed\n"); + common->fsm_state = FSM_CARD_NOT_READY; + + return 0; } static int rsi_resume(struct device *dev) { - /* Not yet implemented */ - return -ENOSYS; + struct sdio_func *pfunction = dev_to_sdio_func(dev); + struct rsi_hw *adapter = sdio_get_drvdata(pfunction); + struct rsi_common *common = adapter->priv; + + common->fsm_state = FSM_MAC_INIT_DONE; + rsi_sdio_enable_interrupts(pfunction); + + return 0; } static const struct dev_pm_ops rsi_pm_ops = { diff --git a/drivers/net/wireless/rsi/rsi_sdio.h b/drivers/net/wireless/rsi/rsi_sdio.h index 95e4bed57baf..49c549ba6682 100644 --- a/drivers/net/wireless/rsi/rsi_sdio.h +++ b/drivers/net/wireless/rsi/rsi_sdio.h @@ -48,6 +48,8 @@ enum sdio_interrupt_type { #define RSI_DEVICE_BUFFER_STATUS_REGISTER 0xf3 #define RSI_FN1_INT_REGISTER 0xf9 +#define RSI_INT_ENABLE_REGISTER 0x04 +#define RSI_INT_ENABLE_MASK 0xfc #define RSI_SD_REQUEST_MASTER 0x10000 /* FOR SD CARD ONLY */ -- cgit v1.2.3 From 77d68147745b98c25c9400f3880906333845f230 Mon Sep 17 00:00:00 2001 From: Igor Mitsyanko Date: Thu, 21 Sep 2017 14:34:29 -0700 Subject: qtnfmac: convert channel width from bitfiled to simple enum This will allow to use qlink channel width values to specify BW setting corresponding to enum nl80211_chan_width. Current user is converted to apply BIT() macro manually to each individual qlink_channel_width enumeration value. Signed-off-by: Igor Mitsyanko Signed-off-by: Kalle Valo --- drivers/net/wireless/quantenna/qtnfmac/qlink.h | 16 ++++++++-------- drivers/net/wireless/quantenna/qtnfmac/qlink_util.c | 16 ++++++++-------- 2 files changed, 16 insertions(+), 16 deletions(-) diff --git a/drivers/net/wireless/quantenna/qtnfmac/qlink.h b/drivers/net/wireless/quantenna/qtnfmac/qlink.h index a8242f678496..a69fd470c115 100644 --- a/drivers/net/wireless/quantenna/qtnfmac/qlink.h +++ b/drivers/net/wireless/quantenna/qtnfmac/qlink.h @@ -108,14 +108,14 @@ enum qlink_sta_flags { }; enum qlink_channel_width { - QLINK_CHAN_WIDTH_5 = BIT(0), - QLINK_CHAN_WIDTH_10 = BIT(1), - QLINK_CHAN_WIDTH_20_NOHT = BIT(2), - QLINK_CHAN_WIDTH_20 = BIT(3), - QLINK_CHAN_WIDTH_40 = BIT(4), - QLINK_CHAN_WIDTH_80 = BIT(5), - QLINK_CHAN_WIDTH_80P80 = BIT(6), - QLINK_CHAN_WIDTH_160 = BIT(7), + QLINK_CHAN_WIDTH_5 = 0, + QLINK_CHAN_WIDTH_10, + QLINK_CHAN_WIDTH_20_NOHT, + QLINK_CHAN_WIDTH_20, + QLINK_CHAN_WIDTH_40, + QLINK_CHAN_WIDTH_80, + QLINK_CHAN_WIDTH_80P80, + QLINK_CHAN_WIDTH_160, }; /* QLINK Command messages related definitions diff --git a/drivers/net/wireless/quantenna/qtnfmac/qlink_util.c b/drivers/net/wireless/quantenna/qtnfmac/qlink_util.c index cf024c995fd6..369b77d7864e 100644 --- a/drivers/net/wireless/quantenna/qtnfmac/qlink_util.c +++ b/drivers/net/wireless/quantenna/qtnfmac/qlink_util.c @@ -49,28 +49,28 @@ u8 qlink_chan_width_mask_to_nl(u16 qlink_mask) { u8 result = 0; - if (qlink_mask & QLINK_CHAN_WIDTH_5) + if (qlink_mask & BIT(QLINK_CHAN_WIDTH_5)) result |= BIT(NL80211_CHAN_WIDTH_5); - if (qlink_mask & QLINK_CHAN_WIDTH_10) + if (qlink_mask & BIT(QLINK_CHAN_WIDTH_10)) result |= BIT(NL80211_CHAN_WIDTH_10); - if (qlink_mask & QLINK_CHAN_WIDTH_20_NOHT) + if (qlink_mask & BIT(QLINK_CHAN_WIDTH_20_NOHT)) result |= BIT(NL80211_CHAN_WIDTH_20_NOHT); - if (qlink_mask & QLINK_CHAN_WIDTH_20) + if (qlink_mask & BIT(QLINK_CHAN_WIDTH_20)) result |= BIT(NL80211_CHAN_WIDTH_20); - if (qlink_mask & QLINK_CHAN_WIDTH_40) + if (qlink_mask & BIT(QLINK_CHAN_WIDTH_40)) result |= BIT(NL80211_CHAN_WIDTH_40); - if (qlink_mask & QLINK_CHAN_WIDTH_80) + if (qlink_mask & BIT(QLINK_CHAN_WIDTH_80)) result |= BIT(NL80211_CHAN_WIDTH_80); - if (qlink_mask & QLINK_CHAN_WIDTH_80P80) + if (qlink_mask & BIT(QLINK_CHAN_WIDTH_80P80)) result |= BIT(NL80211_CHAN_WIDTH_80P80); - if (qlink_mask & QLINK_CHAN_WIDTH_160) + if (qlink_mask & BIT(QLINK_CHAN_WIDTH_160)) result |= BIT(NL80211_CHAN_WIDTH_160); return result; -- cgit v1.2.3 From fac7f9bf14814fd2c722eaeec18ca78be2177d84 Mon Sep 17 00:00:00 2001 From: Igor Mitsyanko Date: Thu, 21 Sep 2017 14:34:30 -0700 Subject: qtnfmac: make "Channel change" event report full channel info Specifically, it has to report center frequency, secondary center frequency (for 80+80) and BW. Introduce channel definition structure to qlink and modify channel change event processing function accordingly. Signed-off-by: Igor Mitsyanko Signed-off-by: Kalle Valo --- drivers/net/wireless/quantenna/qtnfmac/event.c | 29 ++++++------ drivers/net/wireless/quantenna/qtnfmac/qlink.h | 18 +++++++- .../net/wireless/quantenna/qtnfmac/qlink_util.c | 52 ++++++++++++++++++++++ .../net/wireless/quantenna/qtnfmac/qlink_util.h | 4 ++ 4 files changed, 85 insertions(+), 18 deletions(-) diff --git a/drivers/net/wireless/quantenna/qtnfmac/event.c b/drivers/net/wireless/quantenna/qtnfmac/event.c index 0fc2814eafad..df58e83fbbb8 100644 --- a/drivers/net/wireless/quantenna/qtnfmac/event.c +++ b/drivers/net/wireless/quantenna/qtnfmac/event.c @@ -25,6 +25,7 @@ #include "trans.h" #include "util.h" #include "event.h" +#include "qlink_util.h" static int qtnf_event_handle_sta_assoc(struct qtnf_wmac *mac, struct qtnf_vif *vif, @@ -359,39 +360,35 @@ qtnf_event_handle_freq_change(struct qtnf_wmac *mac, { struct wiphy *wiphy = priv_to_wiphy(mac); struct cfg80211_chan_def chandef; - struct ieee80211_channel *chan; struct qtnf_vif *vif; - int freq; int i; if (len < sizeof(*data)) { - pr_err("payload is too short\n"); + pr_err("MAC%u: payload is too short\n", mac->macid); return -EINVAL; } - freq = le32_to_cpu(data->freq); - chan = ieee80211_get_channel(wiphy, freq); - if (!chan) { - pr_err("channel at %d MHz not found\n", freq); + qlink_chandef_q2cfg(wiphy, &data->chan, &chandef); + + if (!cfg80211_chandef_valid(&chandef)) { + pr_err("MAC%u: bad channel f1=%u f2=%u bw=%u\n", mac->macid, + chandef.center_freq1, chandef.center_freq2, + chandef.width); return -EINVAL; } - pr_debug("MAC%d switch to new channel %u MHz\n", mac->macid, freq); + pr_debug("MAC%d: new channel ieee=%u freq1=%u freq2=%u bw=%u\n", + mac->macid, chandef.chan->hw_value, chandef.center_freq1, + chandef.center_freq2, chandef.width); if (mac->status & QTNF_MAC_CSA_ACTIVE) { mac->status &= ~QTNF_MAC_CSA_ACTIVE; - if (chan->hw_value != mac->csa_chandef.chan->hw_value) + if (chandef.chan->hw_value != mac->csa_chandef.chan->hw_value) pr_warn("unexpected switch to %u during CSA to %u\n", - chan->hw_value, + chandef.chan->hw_value, mac->csa_chandef.chan->hw_value); } - /* FIXME: need to figure out proper nl80211_channel_type value */ - cfg80211_chandef_create(&chandef, chan, NL80211_CHAN_HT20); - /* fall-back to minimal safe chandef description */ - if (!cfg80211_chandef_valid(&chandef)) - cfg80211_chandef_create(&chandef, chan, NL80211_CHAN_HT20); - memcpy(&mac->chandef, &chandef, sizeof(mac->chandef)); for (i = 0; i < QTNF_MAX_INTF; i++) { diff --git a/drivers/net/wireless/quantenna/qtnfmac/qlink.h b/drivers/net/wireless/quantenna/qtnfmac/qlink.h index a69fd470c115..59368549c086 100644 --- a/drivers/net/wireless/quantenna/qtnfmac/qlink.h +++ b/drivers/net/wireless/quantenna/qtnfmac/qlink.h @@ -118,6 +118,20 @@ enum qlink_channel_width { QLINK_CHAN_WIDTH_160, }; +/** + * struct qlink_chandef - qlink channel definition + * + * @center_freq1: center frequency of first segment + * @center_freq2: center frequency of second segment (80+80 only) + * @width: channel width, one of @enum qlink_channel_width + */ +struct qlink_chandef { + __le16 center_freq1; + __le16 center_freq2; + u8 width; + u8 rsvd[3]; +} __packed; + /* QLINK Command messages related definitions */ @@ -764,11 +778,11 @@ struct qlink_event_bss_leave { /** * struct qlink_event_freq_change - data for QLINK_EVENT_FREQ_CHANGE event * - * @freq: new operating frequency in MHz + * @chan: new operating channel definition */ struct qlink_event_freq_change { struct qlink_event ehdr; - __le32 freq; + struct qlink_chandef chan; } __packed; enum qlink_rxmgmt_flags { diff --git a/drivers/net/wireless/quantenna/qtnfmac/qlink_util.c b/drivers/net/wireless/quantenna/qtnfmac/qlink_util.c index 369b77d7864e..3c1db5bd6393 100644 --- a/drivers/net/wireless/quantenna/qtnfmac/qlink_util.c +++ b/drivers/net/wireless/quantenna/qtnfmac/qlink_util.c @@ -75,3 +75,55 @@ u8 qlink_chan_width_mask_to_nl(u16 qlink_mask) return result; } + +static enum nl80211_chan_width qlink_chanwidth_to_nl(u8 qlw) +{ + switch (qlw) { + case QLINK_CHAN_WIDTH_20_NOHT: + return NL80211_CHAN_WIDTH_20_NOHT; + case QLINK_CHAN_WIDTH_20: + return NL80211_CHAN_WIDTH_20; + case QLINK_CHAN_WIDTH_40: + return NL80211_CHAN_WIDTH_40; + case QLINK_CHAN_WIDTH_80: + return NL80211_CHAN_WIDTH_80; + case QLINK_CHAN_WIDTH_80P80: + return NL80211_CHAN_WIDTH_80P80; + case QLINK_CHAN_WIDTH_160: + return NL80211_CHAN_WIDTH_160; + case QLINK_CHAN_WIDTH_5: + return NL80211_CHAN_WIDTH_5; + case QLINK_CHAN_WIDTH_10: + return NL80211_CHAN_WIDTH_10; + default: + return -1; + } +} + +void qlink_chandef_q2cfg(struct wiphy *wiphy, + const struct qlink_chandef *qch, + struct cfg80211_chan_def *chdef) +{ + chdef->center_freq1 = le16_to_cpu(qch->center_freq1); + chdef->center_freq2 = le16_to_cpu(qch->center_freq2); + chdef->width = qlink_chanwidth_to_nl(qch->width); + + switch (chdef->width) { + case NL80211_CHAN_WIDTH_20_NOHT: + case NL80211_CHAN_WIDTH_20: + case NL80211_CHAN_WIDTH_5: + case NL80211_CHAN_WIDTH_10: + chdef->chan = ieee80211_get_channel(wiphy, chdef->center_freq1); + break; + case NL80211_CHAN_WIDTH_40: + case NL80211_CHAN_WIDTH_80: + case NL80211_CHAN_WIDTH_80P80: + case NL80211_CHAN_WIDTH_160: + chdef->chan = ieee80211_get_channel(wiphy, + chdef->center_freq1 - 10); + break; + default: + chdef->chan = NULL; + break; + } +} diff --git a/drivers/net/wireless/quantenna/qtnfmac/qlink_util.h b/drivers/net/wireless/quantenna/qtnfmac/qlink_util.h index de06c1e20b5b..5e49a8a09977 100644 --- a/drivers/net/wireless/quantenna/qtnfmac/qlink_util.h +++ b/drivers/net/wireless/quantenna/qtnfmac/qlink_util.h @@ -19,6 +19,7 @@ #include #include +#include #include "qlink.h" @@ -62,5 +63,8 @@ static inline void qtnf_cmd_skb_put_tlv_u16(struct sk_buff *skb, u16 qlink_iface_type_to_nl_mask(u16 qlink_type); u8 qlink_chan_width_mask_to_nl(u16 qlink_mask); +void qlink_chandef_q2cfg(struct wiphy *wiphy, + const struct qlink_chandef *qch, + struct cfg80211_chan_def *chdef); #endif /* _QTN_FMAC_QLINK_UTIL_H_ */ -- cgit v1.2.3 From 9e5478b608b5853dc877ea9c80be24349bcb453f Mon Sep 17 00:00:00 2001 From: Igor Mitsyanko Date: Thu, 21 Sep 2017 14:34:31 -0700 Subject: qtnfmac: retrieve current channel info from EP Do not try to cache current operational channel info in driver, this is a potential source of synchronization issues + driver does not really need that info. Introduce GET_CHANNEL command and process it appropriately. Signed-off-by: Igor Mitsyanko Signed-off-by: Kalle Valo --- drivers/net/wireless/quantenna/qtnfmac/cfg80211.c | 35 +++++++++------------ drivers/net/wireless/quantenna/qtnfmac/commands.c | 38 +++++++++++++++++++++++ drivers/net/wireless/quantenna/qtnfmac/commands.h | 1 + drivers/net/wireless/quantenna/qtnfmac/qlink.h | 11 +++++++ 4 files changed, 64 insertions(+), 21 deletions(-) diff --git a/drivers/net/wireless/quantenna/qtnfmac/cfg80211.c b/drivers/net/wireless/quantenna/qtnfmac/cfg80211.c index 856fa6e8327e..0ef1285e7b93 100644 --- a/drivers/net/wireless/quantenna/qtnfmac/cfg80211.c +++ b/drivers/net/wireless/quantenna/qtnfmac/cfg80211.c @@ -793,37 +793,30 @@ qtnf_get_channel(struct wiphy *wiphy, struct wireless_dev *wdev, struct qtnf_wmac *mac = wiphy_priv(wiphy); struct net_device *ndev = wdev->netdev; struct qtnf_vif *vif; + int ret; if (!ndev) return -ENODEV; vif = qtnf_netdev_get_priv(wdev->netdev); - switch (vif->wdev.iftype) { - case NL80211_IFTYPE_STATION: - if (vif->sta_state == QTNF_STA_DISCONNECTED) { - pr_warn("%s: STA disconnected\n", ndev->name); - return -ENODATA; - } - break; - case NL80211_IFTYPE_AP: - if (!(vif->bss_status & QTNF_STATE_AP_START)) { - pr_warn("%s: AP not started\n", ndev->name); - return -ENODATA; - } - break; - default: - pr_err("unsupported vif type (%d)\n", vif->wdev.iftype); - return -ENODATA; + ret = qtnf_cmd_get_channel(vif, chandef); + if (ret) { + pr_err("%s: failed to get channel: %d\n", ndev->name, ret); + goto out; } - if (!cfg80211_chandef_valid(&mac->chandef)) { - pr_err("invalid channel settings on %s\n", ndev->name); - return -ENODATA; + if (!cfg80211_chandef_valid(chandef)) { + pr_err("%s: bad chan freq1=%u freq2=%u bw=%u\n", ndev->name, + chandef->center_freq1, chandef->center_freq2, + chandef->width); + ret = -ENODATA; } - memcpy(chandef, &mac->chandef, sizeof(*chandef)); - return 0; + memcpy(&mac->chandef, chandef, sizeof(mac->chandef)); + +out: + return ret; } static int qtnf_channel_switch(struct wiphy *wiphy, struct net_device *dev, diff --git a/drivers/net/wireless/quantenna/qtnfmac/commands.c b/drivers/net/wireless/quantenna/qtnfmac/commands.c index 4206886b110c..806b88b964b0 100644 --- a/drivers/net/wireless/quantenna/qtnfmac/commands.c +++ b/drivers/net/wireless/quantenna/qtnfmac/commands.c @@ -2358,3 +2358,41 @@ out: qtnf_bus_unlock(mac->bus); return ret; } + +int qtnf_cmd_get_channel(struct qtnf_vif *vif, struct cfg80211_chan_def *chdef) +{ + struct qtnf_bus *bus = vif->mac->bus; + const struct qlink_resp_channel_get *resp; + struct sk_buff *cmd_skb; + struct sk_buff *resp_skb = NULL; + u16 res_code = QLINK_CMD_RESULT_OK; + int ret; + + cmd_skb = qtnf_cmd_alloc_new_cmdskb(vif->mac->macid, vif->vifid, + QLINK_CMD_CHAN_GET, + sizeof(struct qlink_cmd)); + if (unlikely(!cmd_skb)) + return -ENOMEM; + + qtnf_bus_lock(bus); + + ret = qtnf_cmd_send_with_reply(bus, cmd_skb, &resp_skb, &res_code, + sizeof(*resp), NULL); + + qtnf_bus_unlock(bus); + + if (unlikely(ret)) + goto out; + + if (unlikely(res_code != QLINK_CMD_RESULT_OK)) { + ret = -ENODATA; + goto out; + } + + resp = (const struct qlink_resp_channel_get *)resp_skb->data; + qlink_chandef_q2cfg(priv_to_wiphy(vif->mac), &resp->chan, chdef); + +out: + consume_skb(resp_skb); + return ret; +} diff --git a/drivers/net/wireless/quantenna/qtnfmac/commands.h b/drivers/net/wireless/quantenna/qtnfmac/commands.h index 783b20364296..e1bcb83d7705 100644 --- a/drivers/net/wireless/quantenna/qtnfmac/commands.h +++ b/drivers/net/wireless/quantenna/qtnfmac/commands.h @@ -75,5 +75,6 @@ int qtnf_cmd_get_chan_stats(struct qtnf_wmac *mac, u16 channel, struct qtnf_chan_stats *stats); int qtnf_cmd_send_chan_switch(struct qtnf_wmac *mac, struct cfg80211_csa_settings *params); +int qtnf_cmd_get_channel(struct qtnf_vif *vif, struct cfg80211_chan_def *chdef); #endif /* QLINK_COMMANDS_H_ */ diff --git a/drivers/net/wireless/quantenna/qtnfmac/qlink.h b/drivers/net/wireless/quantenna/qtnfmac/qlink.h index 59368549c086..fb88f3ebf083 100644 --- a/drivers/net/wireless/quantenna/qtnfmac/qlink.h +++ b/drivers/net/wireless/quantenna/qtnfmac/qlink.h @@ -169,6 +169,7 @@ enum qlink_cmd_type { QLINK_CMD_REG_NOTIFY = 0x0019, QLINK_CMD_CHANS_INFO_GET = 0x001A, QLINK_CMD_CHAN_SWITCH = 0x001B, + QLINK_CMD_CHAN_GET = 0x001C, QLINK_CMD_CONFIG_AP = 0x0020, QLINK_CMD_START_AP = 0x0021, QLINK_CMD_STOP_AP = 0x0022, @@ -694,6 +695,16 @@ struct qlink_resp_get_chan_stats { u8 info[0]; } __packed; +/** + * struct qlink_resp_channel_get - response for QLINK_CMD_CHAN_GET command + * + * @chan: definition of current operating channel. + */ +struct qlink_resp_channel_get { + struct qlink_resp rhdr; + struct qlink_chandef chan; +} __packed; + /* QLINK Events messages related definitions */ -- cgit v1.2.3 From 96d4eaf20fb8d34a475c4086de3ed9bd483993d6 Mon Sep 17 00:00:00 2001 From: Igor Mitsyanko Date: Thu, 21 Sep 2017 14:34:32 -0700 Subject: qtnfmac: do not cache channel info from "connect" command This makes no sense because real operational channel is choosen based on AP operation, not on what STA is configured to. Signed-off-by: Igor Mitsyanko Signed-off-by: Kalle Valo --- drivers/net/wireless/quantenna/qtnfmac/cfg80211.c | 15 +-------------- drivers/net/wireless/quantenna/qtnfmac/commands.c | 6 ++++-- 2 files changed, 5 insertions(+), 16 deletions(-) diff --git a/drivers/net/wireless/quantenna/qtnfmac/cfg80211.c b/drivers/net/wireless/quantenna/qtnfmac/cfg80211.c index 0ef1285e7b93..17b323e2e83e 100644 --- a/drivers/net/wireless/quantenna/qtnfmac/cfg80211.c +++ b/drivers/net/wireless/quantenna/qtnfmac/cfg80211.c @@ -613,8 +613,6 @@ qtnf_connect(struct wiphy *wiphy, struct net_device *dev, struct cfg80211_connect_params *sme) { struct qtnf_vif *vif = qtnf_netdev_get_priv(dev); - struct qtnf_wmac *mac = wiphy_priv(wiphy); - struct cfg80211_chan_def chandef; struct qtnf_bss_config *bss_cfg; int ret; @@ -627,18 +625,6 @@ qtnf_connect(struct wiphy *wiphy, struct net_device *dev, bss_cfg = &vif->bss_cfg; memset(bss_cfg, 0, sizeof(*bss_cfg)); - if (sme->channel) { - /* FIXME: need to set proper nl80211_channel_type value */ - cfg80211_chandef_create(&chandef, sme->channel, - NL80211_CHAN_HT20); - /* fall-back to minimal safe chandef description */ - if (!cfg80211_chandef_valid(&chandef)) - cfg80211_chandef_create(&chandef, sme->channel, - NL80211_CHAN_HT20); - - memcpy(&mac->chandef, &chandef, sizeof(mac->chandef)); - } - bss_cfg->ssid_len = sme->ssid_len; memcpy(&bss_cfg->ssid, sme->ssid, bss_cfg->ssid_len); bss_cfg->auth_type = sme->auth_type; @@ -663,6 +649,7 @@ qtnf_connect(struct wiphy *wiphy, struct net_device *dev, bss_cfg->connect_flags |= QLINK_STA_CONNECT_USE_RRM; memcpy(&bss_cfg->crypto, &sme->crypto, sizeof(bss_cfg->crypto)); + if (sme->bssid) ether_addr_copy(bss_cfg->bssid, sme->bssid); else diff --git a/drivers/net/wireless/quantenna/qtnfmac/commands.c b/drivers/net/wireless/quantenna/qtnfmac/commands.c index 806b88b964b0..c55bae156b3c 100644 --- a/drivers/net/wireless/quantenna/qtnfmac/commands.c +++ b/drivers/net/wireless/quantenna/qtnfmac/commands.c @@ -2055,8 +2055,10 @@ int qtnf_cmd_send_connect(struct qtnf_vif *vif, ether_addr_copy(cmd->bssid, bss_cfg->bssid); - if (vif->mac->chandef.chan) - cmd->channel = cpu_to_le16(vif->mac->chandef.chan->hw_value); + if (sme->channel) + cmd->channel = cpu_to_le16(sme->channel->hw_value); + else + cmd->channel = 0; cmd->bg_scan_period = cpu_to_le16(bss_cfg->bg_scan_period); -- cgit v1.2.3 From 3656ab0fef5b29e95d251e3f0a0bfb7da311337a Mon Sep 17 00:00:00 2001 From: Igor Mitsyanko Date: Thu, 21 Sep 2017 14:34:33 -0700 Subject: qtnfmac: let wifi card handle channel switch request to the same chan No reason to verify channel switch request in driver, it can simply be forwarded to wireless device. Device can perform required checks and return appropriate error code, and driver may not even have information on current operational channel. Signed-off-by: Igor Mitsyanko Signed-off-by: Kalle Valo --- drivers/net/wireless/quantenna/qtnfmac/cfg80211.c | 5 ----- 1 file changed, 5 deletions(-) diff --git a/drivers/net/wireless/quantenna/qtnfmac/cfg80211.c b/drivers/net/wireless/quantenna/qtnfmac/cfg80211.c index 17b323e2e83e..4590f3038120 100644 --- a/drivers/net/wireless/quantenna/qtnfmac/cfg80211.c +++ b/drivers/net/wireless/quantenna/qtnfmac/cfg80211.c @@ -846,11 +846,6 @@ static int qtnf_channel_switch(struct wiphy *wiphy, struct net_device *dev, return -EINVAL; } - if (cfg80211_chandef_identical(¶ms->chandef, &mac->chandef)) { - pr_err("%s: switch request to the same channel\n", dev->name); - return -EALREADY; - } - ret = qtnf_cmd_send_chan_switch(mac, params); if (ret) pr_warn("%s: failed to switch to channel (%u)\n", -- cgit v1.2.3 From 8c015b9067d608e59a1486b8618eac9e0bd2952f Mon Sep 17 00:00:00 2001 From: Igor Mitsyanko Date: Thu, 21 Sep 2017 14:34:34 -0700 Subject: qtnfmac: pass VIF info to SendChannel command Do not assume whether wireless device can or can not handle switching several interfaces on a single radio to different channels. Device will handle it itself and will return appropriate error code. Signed-off-by: Igor Mitsyanko Signed-off-by: Kalle Valo --- drivers/net/wireless/quantenna/qtnfmac/cfg80211.c | 2 +- drivers/net/wireless/quantenna/qtnfmac/commands.c | 5 +++-- drivers/net/wireless/quantenna/qtnfmac/commands.h | 2 +- 3 files changed, 5 insertions(+), 4 deletions(-) diff --git a/drivers/net/wireless/quantenna/qtnfmac/cfg80211.c b/drivers/net/wireless/quantenna/qtnfmac/cfg80211.c index 4590f3038120..30f8be554406 100644 --- a/drivers/net/wireless/quantenna/qtnfmac/cfg80211.c +++ b/drivers/net/wireless/quantenna/qtnfmac/cfg80211.c @@ -846,7 +846,7 @@ static int qtnf_channel_switch(struct wiphy *wiphy, struct net_device *dev, return -EINVAL; } - ret = qtnf_cmd_send_chan_switch(mac, params); + ret = qtnf_cmd_send_chan_switch(vif, params); if (ret) pr_warn("%s: failed to switch to channel (%u)\n", dev->name, params->chandef.chan->hw_value); diff --git a/drivers/net/wireless/quantenna/qtnfmac/commands.c b/drivers/net/wireless/quantenna/qtnfmac/commands.c index c55bae156b3c..0138dadf93b1 100644 --- a/drivers/net/wireless/quantenna/qtnfmac/commands.c +++ b/drivers/net/wireless/quantenna/qtnfmac/commands.c @@ -2306,15 +2306,16 @@ out: return ret; } -int qtnf_cmd_send_chan_switch(struct qtnf_wmac *mac, +int qtnf_cmd_send_chan_switch(struct qtnf_vif *vif, struct cfg80211_csa_settings *params) { + struct qtnf_wmac *mac = vif->mac; struct qlink_cmd_chan_switch *cmd; struct sk_buff *cmd_skb; u16 res_code = QLINK_CMD_RESULT_OK; int ret; - cmd_skb = qtnf_cmd_alloc_new_cmdskb(mac->macid, 0x0, + cmd_skb = qtnf_cmd_alloc_new_cmdskb(mac->macid, vif->vifid, QLINK_CMD_CHAN_SWITCH, sizeof(*cmd)); diff --git a/drivers/net/wireless/quantenna/qtnfmac/commands.h b/drivers/net/wireless/quantenna/qtnfmac/commands.h index e1bcb83d7705..8a5a82ce82cd 100644 --- a/drivers/net/wireless/quantenna/qtnfmac/commands.h +++ b/drivers/net/wireless/quantenna/qtnfmac/commands.h @@ -73,7 +73,7 @@ int qtnf_cmd_send_updown_intf(struct qtnf_vif *vif, int qtnf_cmd_reg_notify(struct qtnf_bus *bus, struct regulatory_request *req); int qtnf_cmd_get_chan_stats(struct qtnf_wmac *mac, u16 channel, struct qtnf_chan_stats *stats); -int qtnf_cmd_send_chan_switch(struct qtnf_wmac *mac, +int qtnf_cmd_send_chan_switch(struct qtnf_vif *vif, struct cfg80211_csa_settings *params); int qtnf_cmd_get_channel(struct qtnf_vif *vif, struct cfg80211_chan_def *chdef); -- cgit v1.2.3 From 97397633108a34ec5f5f316ffd1d3709e1c0479d Mon Sep 17 00:00:00 2001 From: Igor Mitsyanko Date: Thu, 21 Sep 2017 14:34:35 -0700 Subject: qtnfmac: do not cache CSA chandef info It is never used for anything useful, and all logic is handled by either WiFi card or higher layers. Signed-off-by: Igor Mitsyanko Signed-off-by: Kalle Valo --- drivers/net/wireless/quantenna/qtnfmac/cfg80211.c | 12 ------------ drivers/net/wireless/quantenna/qtnfmac/commands.c | 2 -- drivers/net/wireless/quantenna/qtnfmac/core.h | 1 - drivers/net/wireless/quantenna/qtnfmac/event.c | 8 +------- 4 files changed, 1 insertion(+), 22 deletions(-) diff --git a/drivers/net/wireless/quantenna/qtnfmac/cfg80211.c b/drivers/net/wireless/quantenna/qtnfmac/cfg80211.c index 30f8be554406..262e8cfd8f8d 100644 --- a/drivers/net/wireless/quantenna/qtnfmac/cfg80211.c +++ b/drivers/net/wireless/quantenna/qtnfmac/cfg80211.c @@ -809,7 +809,6 @@ out: static int qtnf_channel_switch(struct wiphy *wiphy, struct net_device *dev, struct cfg80211_csa_settings *params) { - struct qtnf_wmac *mac = wiphy_priv(wiphy); struct qtnf_vif *vif = qtnf_netdev_get_priv(dev); int ret; @@ -830,17 +829,6 @@ static int qtnf_channel_switch(struct wiphy *wiphy, struct net_device *dev, return -EOPNOTSUPP; } - if (vif->vifid != 0) { - if (!(mac->status & QTNF_MAC_CSA_ACTIVE)) - return -EOPNOTSUPP; - - if (!cfg80211_chandef_identical(¶ms->chandef, - &mac->csa_chandef)) - return -EINVAL; - - return 0; - } - if (!cfg80211_chandef_valid(¶ms->chandef)) { pr_err("%s: invalid channel\n", dev->name); return -EINVAL; diff --git a/drivers/net/wireless/quantenna/qtnfmac/commands.c b/drivers/net/wireless/quantenna/qtnfmac/commands.c index 0138dadf93b1..42f7e1dadd29 100644 --- a/drivers/net/wireless/quantenna/qtnfmac/commands.c +++ b/drivers/net/wireless/quantenna/qtnfmac/commands.c @@ -2337,8 +2337,6 @@ int qtnf_cmd_send_chan_switch(struct qtnf_vif *vif, switch (res_code) { case QLINK_CMD_RESULT_OK: - memcpy(&mac->csa_chandef, ¶ms->chandef, - sizeof(mac->csa_chandef)); mac->status |= QTNF_MAC_CSA_ACTIVE; ret = 0; break; diff --git a/drivers/net/wireless/quantenna/qtnfmac/core.h b/drivers/net/wireless/quantenna/qtnfmac/core.h index 066fcd1095a0..521ce094f2a6 100644 --- a/drivers/net/wireless/quantenna/qtnfmac/core.h +++ b/drivers/net/wireless/quantenna/qtnfmac/core.h @@ -147,7 +147,6 @@ struct qtnf_wmac { struct qtnf_vif iflist[QTNF_MAX_INTF]; struct cfg80211_scan_request *scan_req; struct cfg80211_chan_def chandef; - struct cfg80211_chan_def csa_chandef; struct mutex mac_lock; /* lock during wmac speicific ops */ struct timer_list scan_timeout; }; diff --git a/drivers/net/wireless/quantenna/qtnfmac/event.c b/drivers/net/wireless/quantenna/qtnfmac/event.c index df58e83fbbb8..77563b0b1a7a 100644 --- a/drivers/net/wireless/quantenna/qtnfmac/event.c +++ b/drivers/net/wireless/quantenna/qtnfmac/event.c @@ -381,13 +381,7 @@ qtnf_event_handle_freq_change(struct qtnf_wmac *mac, mac->macid, chandef.chan->hw_value, chandef.center_freq1, chandef.center_freq2, chandef.width); - if (mac->status & QTNF_MAC_CSA_ACTIVE) { - mac->status &= ~QTNF_MAC_CSA_ACTIVE; - if (chandef.chan->hw_value != mac->csa_chandef.chan->hw_value) - pr_warn("unexpected switch to %u during CSA to %u\n", - chandef.chan->hw_value, - mac->csa_chandef.chan->hw_value); - } + mac->status &= ~QTNF_MAC_CSA_ACTIVE; memcpy(&mac->chandef, &chandef, sizeof(mac->chandef)); -- cgit v1.2.3 From 6bfe61d697cb24b0838227c40bff8603addae652 Mon Sep 17 00:00:00 2001 From: Igor Mitsyanko Date: Thu, 21 Sep 2017 14:34:36 -0700 Subject: qtnfmac: remove unused mac::status field There are no users of this field and it can safely be removed. Signed-off-by: Igor Mitsyanko Signed-off-by: Kalle Valo --- drivers/net/wireless/quantenna/qtnfmac/commands.c | 1 - drivers/net/wireless/quantenna/qtnfmac/core.h | 5 ----- drivers/net/wireless/quantenna/qtnfmac/event.c | 2 -- 3 files changed, 8 deletions(-) diff --git a/drivers/net/wireless/quantenna/qtnfmac/commands.c b/drivers/net/wireless/quantenna/qtnfmac/commands.c index 42f7e1dadd29..8f95f9842f49 100644 --- a/drivers/net/wireless/quantenna/qtnfmac/commands.c +++ b/drivers/net/wireless/quantenna/qtnfmac/commands.c @@ -2337,7 +2337,6 @@ int qtnf_cmd_send_chan_switch(struct qtnf_vif *vif, switch (res_code) { case QLINK_CMD_RESULT_OK: - mac->status |= QTNF_MAC_CSA_ACTIVE; ret = 0; break; case QLINK_CMD_RESULT_ENOTFOUND: diff --git a/drivers/net/wireless/quantenna/qtnfmac/core.h b/drivers/net/wireless/quantenna/qtnfmac/core.h index 521ce094f2a6..2cd015048598 100644 --- a/drivers/net/wireless/quantenna/qtnfmac/core.h +++ b/drivers/net/wireless/quantenna/qtnfmac/core.h @@ -89,10 +89,6 @@ enum qtnf_sta_state { QTNF_STA_CONNECTED }; -enum qtnf_mac_status { - QTNF_MAC_CSA_ACTIVE = BIT(0) -}; - struct qtnf_vif { struct wireless_dev wdev; u8 vifid; @@ -141,7 +137,6 @@ struct qtnf_wmac { u8 macid; u8 wiphy_registered; u8 macaddr[ETH_ALEN]; - u32 status; struct qtnf_bus *bus; struct qtnf_mac_info macinfo; struct qtnf_vif iflist[QTNF_MAX_INTF]; diff --git a/drivers/net/wireless/quantenna/qtnfmac/event.c b/drivers/net/wireless/quantenna/qtnfmac/event.c index 77563b0b1a7a..b1acc24bdc39 100644 --- a/drivers/net/wireless/quantenna/qtnfmac/event.c +++ b/drivers/net/wireless/quantenna/qtnfmac/event.c @@ -381,8 +381,6 @@ qtnf_event_handle_freq_change(struct qtnf_wmac *mac, mac->macid, chandef.chan->hw_value, chandef.center_freq1, chandef.center_freq2, chandef.width); - mac->status &= ~QTNF_MAC_CSA_ACTIVE; - memcpy(&mac->chandef, &chandef, sizeof(mac->chandef)); for (i = 0; i < QTNF_MAX_INTF; i++) { -- cgit v1.2.3 From 115af851234fc2690753f71685a0007fa4c7f973 Mon Sep 17 00:00:00 2001 From: Igor Mitsyanko Date: Thu, 21 Sep 2017 14:34:37 -0700 Subject: qtnfmac: do not report channel changes until wiphy is registered Wireless device may send "channel changed" event before driver registered this device with wireless core, which will result in warnings. Once device is registered, higher layer will query channel info manually using .get_channel callback. Signed-off-by: Igor Mitsyanko Signed-off-by: Kalle Valo --- drivers/net/wireless/quantenna/qtnfmac/event.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/wireless/quantenna/qtnfmac/event.c b/drivers/net/wireless/quantenna/qtnfmac/event.c index b1acc24bdc39..7481d5bdf647 100644 --- a/drivers/net/wireless/quantenna/qtnfmac/event.c +++ b/drivers/net/wireless/quantenna/qtnfmac/event.c @@ -368,6 +368,9 @@ qtnf_event_handle_freq_change(struct qtnf_wmac *mac, return -EINVAL; } + if (!wiphy->registered) + return 0; + qlink_chandef_q2cfg(wiphy, &data->chan, &chandef); if (!cfg80211_chandef_valid(&chandef)) { -- cgit v1.2.3 From 9b029e178ea174acd96a5ddf97b04c70f4e57885 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Thu, 21 Sep 2017 23:56:30 +0100 Subject: iwlegacy: make const array static to shink object code size Don't populate const array ac_to_fifo on the stack in an inlined function, instead make it static. Makes the object code smaller by over 800 bytes: text data bss dec hex filename 159029 33154 1216 193399 2f377 4965-mac.o text data bss dec hex filename 158122 33250 1216 192588 2f04c 4965-mac.o (gcc version 7.2.0 x86_64) Signed-off-by: Colin Ian King Acked-by: Stanislaw Gruszka Signed-off-by: Kalle Valo --- drivers/net/wireless/intel/iwlegacy/4965-mac.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wireless/intel/iwlegacy/4965-mac.c b/drivers/net/wireless/intel/iwlegacy/4965-mac.c index de9b6522c43f..65eba2c24292 100644 --- a/drivers/net/wireless/intel/iwlegacy/4965-mac.c +++ b/drivers/net/wireless/intel/iwlegacy/4965-mac.c @@ -1480,7 +1480,7 @@ il4965_get_ac_from_tid(u16 tid) static inline int il4965_get_fifo_from_tid(u16 tid) { - const u8 ac_to_fifo[] = { + static const u8 ac_to_fifo[] = { IL_TX_FIFO_VO, IL_TX_FIFO_VI, IL_TX_FIFO_BE, -- cgit v1.2.3 From 96cbe3d638e4287db6482b6223367d3e6cf5871e Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Fri, 22 Sep 2017 16:39:02 +0100 Subject: b43: make const arrays static, reduces object code size Don't populate const arrays on the stack, instead make them static. Makes the object code smaller by over 60 bytes: Before: text data bss dec hex filename 14816 1296 0 16112 3ef0 b43/phy_ht.o After: text data bss dec hex filename 14551 1496 0 16047 3eaf b43/phy_ht.o (gcc 6.3.0, x86-64) Signed-off-by: Colin Ian King Signed-off-by: Kalle Valo --- drivers/net/wireless/broadcom/b43/phy_ht.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/net/wireless/broadcom/b43/phy_ht.c b/drivers/net/wireless/broadcom/b43/phy_ht.c index 718c90e81696..c3158d085c2b 100644 --- a/drivers/net/wireless/broadcom/b43/phy_ht.c +++ b/drivers/net/wireless/broadcom/b43/phy_ht.c @@ -119,7 +119,7 @@ static void b43_radio_2059_rcal(struct b43_wldev *dev) /* Calibrate the internal RC oscillator? */ static void b43_radio_2057_rccal(struct b43_wldev *dev) { - const u16 radio_values[3][2] = { + static const u16 radio_values[3][2] = { { 0x61, 0xE9 }, { 0x69, 0xD5 }, { 0x73, 0x99 }, }; int i; @@ -154,7 +154,7 @@ static void b43_radio_2059_init_pre(struct b43_wldev *dev) static void b43_radio_2059_init(struct b43_wldev *dev) { - const u16 routing[] = { R2059_C1, R2059_C2, R2059_C3 }; + static const u16 routing[] = { R2059_C1, R2059_C2, R2059_C3 }; int i; /* Prepare (reset?) radio */ @@ -263,7 +263,7 @@ static void b43_phy_ht_reset_cca(struct b43_wldev *dev) static void b43_phy_ht_zero_extg(struct b43_wldev *dev) { u8 i, j; - u16 base[] = { 0x40, 0x60, 0x80 }; + static const u16 base[] = { 0x40, 0x60, 0x80 }; for (i = 0; i < ARRAY_SIZE(base); i++) { for (j = 0; j < 4; j++) -- cgit v1.2.3 From 2d04cfcfa2e86cac751979b584e5420dd470903b Mon Sep 17 00:00:00 2001 From: Allen Pais Date: Mon, 25 Sep 2017 13:00:02 +0530 Subject: net: af_packet: use setup_timer() helper. Use setup_timer function instead of initializing timer with the function and data fields. Signed-off-by: Allen Pais Signed-off-by: David S. Miller --- net/packet/af_packet.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index d288f52c53f7..b0f221885dfd 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -544,9 +544,7 @@ static void prb_init_blk_timer(struct packet_sock *po, struct tpacket_kbdq_core *pkc, void (*func) (unsigned long)) { - init_timer(&pkc->retire_blk_timer); - pkc->retire_blk_timer.data = (long)po; - pkc->retire_blk_timer.function = func; + setup_timer(&pkc->retire_blk_timer, func, (long)po); pkc->retire_blk_timer.expires = jiffies; } -- cgit v1.2.3 From b1e07c5486f0df26caf93234bd7db5545a37aba8 Mon Sep 17 00:00:00 2001 From: Allen Pais Date: Mon, 25 Sep 2017 13:00:03 +0530 Subject: net: nfc: hci: use setup_timer() helper. Use setup_timer function instead of initializing timer with the function and data fields. Signed-off-by: Allen Pais Signed-off-by: David S. Miller --- net/nfc/hci/core.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/net/nfc/hci/core.c b/net/nfc/hci/core.c index b740fef0acc5..a8a6e7814e09 100644 --- a/net/nfc/hci/core.c +++ b/net/nfc/hci/core.c @@ -1004,9 +1004,8 @@ int nfc_hci_register_device(struct nfc_hci_dev *hdev) INIT_WORK(&hdev->msg_tx_work, nfc_hci_msg_tx_work); - init_timer(&hdev->cmd_timer); - hdev->cmd_timer.data = (unsigned long)hdev; - hdev->cmd_timer.function = nfc_hci_cmd_timeout; + setup_timer(&hdev->cmd_timer, nfc_hci_cmd_timeout, + (unsigned long)hdev); skb_queue_head_init(&hdev->rx_hcp_frags); -- cgit v1.2.3 From 22d387e13ac357279ddac55694883fd3e30a5639 Mon Sep 17 00:00:00 2001 From: Allen Pais Date: Mon, 25 Sep 2017 13:00:04 +0530 Subject: net: nfc: hci: llc_shdlc: use setup_timer() helper. Use setup_timer function instead of initializing timer with the function and data fields. Signed-off-by: Allen Pais Signed-off-by: David S. Miller --- net/nfc/hci/llc_shdlc.c | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-) diff --git a/net/nfc/hci/llc_shdlc.c b/net/nfc/hci/llc_shdlc.c index 17e59a009ce6..58df37eae1e8 100644 --- a/net/nfc/hci/llc_shdlc.c +++ b/net/nfc/hci/llc_shdlc.c @@ -763,17 +763,14 @@ static void *llc_shdlc_init(struct nfc_hci_dev *hdev, xmit_to_drv_t xmit_to_drv, mutex_init(&shdlc->state_mutex); shdlc->state = SHDLC_DISCONNECTED; - init_timer(&shdlc->connect_timer); - shdlc->connect_timer.data = (unsigned long)shdlc; - shdlc->connect_timer.function = llc_shdlc_connect_timeout; + setup_timer(&shdlc->connect_timer, llc_shdlc_connect_timeout, + (unsigned long)shdlc); - init_timer(&shdlc->t1_timer); - shdlc->t1_timer.data = (unsigned long)shdlc; - shdlc->t1_timer.function = llc_shdlc_t1_timeout; + setup_timer(&shdlc->t1_timer, llc_shdlc_t1_timeout, + (unsigned long)shdlc); - init_timer(&shdlc->t2_timer); - shdlc->t2_timer.data = (unsigned long)shdlc; - shdlc->t2_timer.function = llc_shdlc_t2_timeout; + setup_timer(&shdlc->t2_timer, llc_shdlc_t2_timeout, + (unsigned long)shdlc); shdlc->w = SHDLC_MAX_WINDOW; shdlc->srej_support = SHDLC_SREJ_SUPPORT; -- cgit v1.2.3 From d835b63cc4ee67e59eed9d1957f729c0a30b7331 Mon Sep 17 00:00:00 2001 From: Allen Pais Date: Mon, 25 Sep 2017 13:00:05 +0530 Subject: net: nfc: llcp_core: use setup_timer() helper. Use setup_timer function instead of initializing timer with the function and data fields. Signed-off-by: Allen Pais Signed-off-by: David S. Miller --- net/nfc/llcp_core.c | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/net/nfc/llcp_core.c b/net/nfc/llcp_core.c index 02eef5cf3cce..7988185072e5 100644 --- a/net/nfc/llcp_core.c +++ b/net/nfc/llcp_core.c @@ -1573,9 +1573,8 @@ int nfc_llcp_register_device(struct nfc_dev *ndev) INIT_LIST_HEAD(&local->list); kref_init(&local->ref); mutex_init(&local->sdp_lock); - init_timer(&local->link_timer); - local->link_timer.data = (unsigned long) local; - local->link_timer.function = nfc_llcp_symm_timer; + setup_timer(&local->link_timer, nfc_llcp_symm_timer, + (unsigned long)local); skb_queue_head_init(&local->tx_queue); INIT_WORK(&local->tx_work, nfc_llcp_tx_work); @@ -1601,9 +1600,8 @@ int nfc_llcp_register_device(struct nfc_dev *ndev) mutex_init(&local->sdreq_lock); INIT_HLIST_HEAD(&local->pending_sdreqs); - init_timer(&local->sdreq_timer); - local->sdreq_timer.data = (unsigned long) local; - local->sdreq_timer.function = nfc_llcp_sdreq_timer; + setup_timer(&local->sdreq_timer, nfc_llcp_sdreq_timer, + (unsigned long)local); INIT_WORK(&local->sdreq_timeout_work, nfc_llcp_sdreq_timeout_work); list_add(&local->list, &llcp_devices); -- cgit v1.2.3 From b5d7388f9db78f19e6af7b56a469ca8d1860329d Mon Sep 17 00:00:00 2001 From: Craig Gallek Date: Thu, 21 Sep 2017 18:43:29 -0400 Subject: bpf: Optimize lpm trie delete Before the delete operator was added, this datastructure maintained an invariant that intermediate nodes were only present when necessary to build the tree. This patch updates the delete operation to reinstate that invariant by removing unnecessary intermediate nodes after a node is removed and thus keeping the tree structure at a minimal size. Suggested-by: Daniel Mack Signed-off-by: Craig Gallek Signed-off-by: David S. Miller --- kernel/bpf/lpm_trie.c | 71 +++++++++++++++++++++++++++++++-------------------- 1 file changed, 43 insertions(+), 28 deletions(-) diff --git a/kernel/bpf/lpm_trie.c b/kernel/bpf/lpm_trie.c index 9d58a576b2ae..34d8a690ea05 100644 --- a/kernel/bpf/lpm_trie.c +++ b/kernel/bpf/lpm_trie.c @@ -394,8 +394,8 @@ static int trie_delete_elem(struct bpf_map *map, void *_key) { struct lpm_trie *trie = container_of(map, struct lpm_trie, map); struct bpf_lpm_trie_key *key = _key; - struct lpm_trie_node __rcu **trim; - struct lpm_trie_node *node; + struct lpm_trie_node __rcu **trim, **trim2; + struct lpm_trie_node *node, *parent; unsigned long irq_flags; unsigned int next_bit; size_t matchlen = 0; @@ -407,31 +407,26 @@ static int trie_delete_elem(struct bpf_map *map, void *_key) raw_spin_lock_irqsave(&trie->lock, irq_flags); /* Walk the tree looking for an exact key/length match and keeping - * track of where we could begin trimming the tree. The trim-point - * is the sub-tree along the walk consisting of only single-child - * intermediate nodes and ending at a leaf node that we want to - * remove. + * track of the path we traverse. We will need to know the node + * we wish to delete, and the slot that points to the node we want + * to delete. We may also need to know the nodes parent and the + * slot that contains it. */ trim = &trie->root; - node = rcu_dereference_protected( - trie->root, lockdep_is_held(&trie->lock)); - while (node) { + trim2 = trim; + parent = NULL; + while ((node = rcu_dereference_protected( + *trim, lockdep_is_held(&trie->lock)))) { matchlen = longest_prefix_match(trie, node, key); if (node->prefixlen != matchlen || node->prefixlen == key->prefixlen) break; + parent = node; + trim2 = trim; next_bit = extract_bit(key->data, node->prefixlen); - /* If we hit a node that has more than one child or is a valid - * prefix itself, do not remove it. Reset the root of the trim - * path to its descendant on our path. - */ - if (!(node->flags & LPM_TREE_NODE_FLAG_IM) || - (node->child[0] && node->child[1])) - trim = &node->child[next_bit]; - node = rcu_dereference_protected( - node->child[next_bit], lockdep_is_held(&trie->lock)); + trim = &node->child[next_bit]; } if (!node || node->prefixlen != key->prefixlen || @@ -442,27 +437,47 @@ static int trie_delete_elem(struct bpf_map *map, void *_key) trie->n_entries--; - /* If the node we are removing is not a leaf node, simply mark it + /* If the node we are removing has two children, simply mark it * as intermediate and we are done. */ - if (rcu_access_pointer(node->child[0]) || + if (rcu_access_pointer(node->child[0]) && rcu_access_pointer(node->child[1])) { node->flags |= LPM_TREE_NODE_FLAG_IM; goto out; } - /* trim should now point to the slot holding the start of a path from - * zero or more intermediate nodes to our leaf node for deletion. + /* If the parent of the node we are about to delete is an intermediate + * node, and the deleted node doesn't have any children, we can delete + * the intermediate parent as well and promote its other child + * up the tree. Doing this maintains the invariant that all + * intermediate nodes have exactly 2 children and that there are no + * unnecessary intermediate nodes in the tree. */ - while ((node = rcu_dereference_protected( - *trim, lockdep_is_held(&trie->lock)))) { - RCU_INIT_POINTER(*trim, NULL); - trim = rcu_access_pointer(node->child[0]) ? - &node->child[0] : - &node->child[1]; + if (parent && (parent->flags & LPM_TREE_NODE_FLAG_IM) && + !node->child[0] && !node->child[1]) { + if (node == rcu_access_pointer(parent->child[0])) + rcu_assign_pointer( + *trim2, rcu_access_pointer(parent->child[1])); + else + rcu_assign_pointer( + *trim2, rcu_access_pointer(parent->child[0])); + kfree_rcu(parent, rcu); kfree_rcu(node, rcu); + goto out; } + /* The node we are removing has either zero or one child. If there + * is a child, move it into the removed node's slot then delete + * the node. Otherwise just clear the slot and delete the node. + */ + if (node->child[0]) + rcu_assign_pointer(*trim, rcu_access_pointer(node->child[0])); + else if (node->child[1]) + rcu_assign_pointer(*trim, rcu_access_pointer(node->child[1])); + else + RCU_INIT_POINTER(*trim, NULL); + kfree_rcu(node, rcu); + out: raw_spin_unlock_irqrestore(&trie->lock, irq_flags); -- cgit v1.2.3 From 1b17ca044a82342fb96529e1bdff6da7af90bc53 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Fri, 22 Sep 2017 16:50:23 +0100 Subject: hv_netvsc: make const array ver_list static, reduces object code size Don't populate const array ver_list on the stack, instead make it static. Makes the object code smaller by over 400 bytes: Before: text data bss dec hex filename 18444 3168 320 21932 55ac drivers/net/hyperv/netvsc.o After: text data bss dec hex filename 17950 3224 320 21494 53f6 drivers/net/hyperv/netvsc.o (gcc 6.3.0, x86-64) Signed-off-by: Colin Ian King Reviewed-by: Haiyang Zhang Signed-off-by: David S. Miller --- drivers/net/hyperv/netvsc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/hyperv/netvsc.c b/drivers/net/hyperv/netvsc.c index 8d5077fb0492..b0d323e24978 100644 --- a/drivers/net/hyperv/netvsc.c +++ b/drivers/net/hyperv/netvsc.c @@ -484,7 +484,7 @@ static int netvsc_connect_vsp(struct hv_device *device, struct netvsc_device *net_device, const struct netvsc_device_info *device_info) { - const u32 ver_list[] = { + static const u32 ver_list[] = { NVSP_PROTOCOL_VERSION_1, NVSP_PROTOCOL_VERSION_2, NVSP_PROTOCOL_VERSION_4, NVSP_PROTOCOL_VERSION_5 }; -- cgit v1.2.3 From e94cd8113ce63bca34040aae52d0603baf6ec07c Mon Sep 17 00:00:00 2001 From: Zhang Shengju Date: Fri, 22 Sep 2017 23:57:49 +0800 Subject: net: remove MTU limits for dummy and ifb device These two drivers (dummy and ifb) call ether_setup(), after commit 61e84623ace3 ("net: centralize net_device min/max MTU checking"), the range of mtu is [min_mtu, max_mtu], which is [68, 1500] by default. These two devices should not have limits on MTU. This patch set their min_mtu/max_mtu to 0. So that dev_set_mtu() will not check the mtu range, and can be set with any value. CC: Eric Dumazet CC: Sabrina Dubroca Signed-off-by: Zhang Shengju Signed-off-by: David S. Miller --- drivers/net/dummy.c | 2 +- drivers/net/ifb.c | 3 +++ 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/net/dummy.c b/drivers/net/dummy.c index e31ab3b94c6f..58483af80bdb 100644 --- a/drivers/net/dummy.c +++ b/drivers/net/dummy.c @@ -353,7 +353,7 @@ static void dummy_setup(struct net_device *dev) eth_hw_addr_random(dev); dev->min_mtu = 0; - dev->max_mtu = ETH_MAX_MTU; + dev->max_mtu = 0; } static int dummy_validate(struct nlattr *tb[], struct nlattr *data[], diff --git a/drivers/net/ifb.c b/drivers/net/ifb.c index 8870bd2a2e8a..0008da7e9d4c 100644 --- a/drivers/net/ifb.c +++ b/drivers/net/ifb.c @@ -231,6 +231,9 @@ static void ifb_setup(struct net_device *dev) eth_hw_addr_random(dev); dev->needs_free_netdev = true; dev->priv_destructor = ifb_dev_free; + + dev->min_mtu = 0; + dev->max_mtu = 0; } static netdev_tx_t ifb_xmit(struct sk_buff *skb, struct net_device *dev) -- cgit v1.2.3 From 943170998b200190f99d3fe7e771437e2c51f319 Mon Sep 17 00:00:00 2001 From: Petar Penkov Date: Fri, 22 Sep 2017 13:49:14 -0700 Subject: tun: enable NAPI for TUN/TAP driver Changes TUN driver to use napi_gro_receive() upon receiving packets rather than netif_rx_ni(). Adds flag IFF_NAPI that enables these changes and operation is not affected if the flag is disabled. SKBs are constructed upon packet arrival and are queued to be processed later. The new path was evaluated with a benchmark with the following setup: Open two tap devices and a receiver thread that reads in a loop for each device. Start one sender thread and pin all threads to different CPUs. Send 1M minimum UDP packets to each device and measure sending time for each of the sending methods: napi_gro_receive(): 4.90s netif_rx_ni(): 4.90s netif_receive_skb(): 7.20s Signed-off-by: Petar Penkov Cc: Eric Dumazet Cc: Mahesh Bandewar Cc: Willem de Bruijn Cc: davem@davemloft.net Cc: ppenkov@stanford.edu Acked-by: Mahesh Bandewar Signed-off-by: David S. Miller --- drivers/net/tun.c | 133 +++++++++++++++++++++++++++++++++++++++----- include/uapi/linux/if_tun.h | 1 + 2 files changed, 119 insertions(+), 15 deletions(-) diff --git a/drivers/net/tun.c b/drivers/net/tun.c index 3c9985f29950..f16407242b18 100644 --- a/drivers/net/tun.c +++ b/drivers/net/tun.c @@ -121,7 +121,7 @@ do { \ #define TUN_VNET_BE 0x40000000 #define TUN_FEATURES (IFF_NO_PI | IFF_ONE_QUEUE | IFF_VNET_HDR | \ - IFF_MULTI_QUEUE) + IFF_MULTI_QUEUE | IFF_NAPI) #define GOODCOPY_LEN 128 #define FLT_EXACT_COUNT 8 @@ -172,6 +172,7 @@ struct tun_file { u16 queue_index; unsigned int ifindex; }; + struct napi_struct napi; struct list_head next; struct tun_struct *detached; struct skb_array tx_array; @@ -229,6 +230,68 @@ struct tun_struct { struct bpf_prog __rcu *xdp_prog; }; +static int tun_napi_receive(struct napi_struct *napi, int budget) +{ + struct tun_file *tfile = container_of(napi, struct tun_file, napi); + struct sk_buff_head *queue = &tfile->sk.sk_write_queue; + struct sk_buff_head process_queue; + struct sk_buff *skb; + int received = 0; + + __skb_queue_head_init(&process_queue); + + spin_lock(&queue->lock); + skb_queue_splice_tail_init(queue, &process_queue); + spin_unlock(&queue->lock); + + while (received < budget && (skb = __skb_dequeue(&process_queue))) { + napi_gro_receive(napi, skb); + ++received; + } + + if (!skb_queue_empty(&process_queue)) { + spin_lock(&queue->lock); + skb_queue_splice(&process_queue, queue); + spin_unlock(&queue->lock); + } + + return received; +} + +static int tun_napi_poll(struct napi_struct *napi, int budget) +{ + unsigned int received; + + received = tun_napi_receive(napi, budget); + + if (received < budget) + napi_complete_done(napi, received); + + return received; +} + +static void tun_napi_init(struct tun_struct *tun, struct tun_file *tfile, + bool napi_en) +{ + if (napi_en) { + netif_napi_add(tun->dev, &tfile->napi, tun_napi_poll, + NAPI_POLL_WEIGHT); + napi_enable(&tfile->napi); + } +} + +static void tun_napi_disable(struct tun_struct *tun, struct tun_file *tfile) +{ + if (tun->flags & IFF_NAPI) + napi_disable(&tfile->napi); +} + +static void tun_napi_del(struct tun_struct *tun, struct tun_file *tfile) +{ + if (tun->flags & IFF_NAPI) + netif_napi_del(&tfile->napi); +} + #ifdef CONFIG_TUN_VNET_CROSS_LE static inline bool tun_legacy_is_little_endian(struct tun_struct *tun) { @@ -541,6 +604,11 @@ static void __tun_detach(struct tun_file *tfile, bool clean) tun = rtnl_dereference(tfile->tun); + if (tun && clean) { + tun_napi_disable(tun, tfile); + tun_napi_del(tun, tfile); + } + if (tun && !tfile->detached) { u16 index = tfile->queue_index; BUG_ON(index >= tun->numqueues); @@ -598,6 +666,7 @@ static void tun_detach_all(struct net_device *dev) for (i = 0; i < n; i++) { tfile = rtnl_dereference(tun->tfiles[i]); BUG_ON(!tfile); + tun_napi_disable(tun, tfile); tfile->socket.sk->sk_shutdown = RCV_SHUTDOWN; tfile->socket.sk->sk_data_ready(tfile->socket.sk); RCU_INIT_POINTER(tfile->tun, NULL); @@ -613,6 +682,7 @@ static void tun_detach_all(struct net_device *dev) synchronize_net(); for (i = 0; i < n; i++) { tfile = rtnl_dereference(tun->tfiles[i]); + tun_napi_del(tun, tfile); /* Drop read queue */ tun_queue_purge(tfile); sock_put(&tfile->sk); @@ -631,7 +701,8 @@ static void tun_detach_all(struct net_device *dev) module_put(THIS_MODULE); } -static int tun_attach(struct tun_struct *tun, struct file *file, bool skip_filter) +static int tun_attach(struct tun_struct *tun, struct file *file, + bool skip_filter, bool napi) { struct tun_file *tfile = file->private_data; struct net_device *dev = tun->dev; @@ -677,10 +748,12 @@ static int tun_attach(struct tun_struct *tun, struct file *file, bool skip_filte rcu_assign_pointer(tun->tfiles[tun->numqueues], tfile); tun->numqueues++; - if (tfile->detached) + if (tfile->detached) { tun_enable_queue(tfile); - else + } else { sock_hold(&tfile->sk); + tun_napi_init(tun, tfile, napi); + } tun_set_real_num_queues(tun); @@ -956,13 +1029,28 @@ static void tun_poll_controller(struct net_device *dev) * Tun only receives frames when: * 1) the char device endpoint gets data from user space * 2) the tun socket gets a sendmsg call from user space - * Since both of those are synchronous operations, we are guaranteed - * never to have pending data when we poll for it - * so there is nothing to do here but return. + * If NAPI is not enabled, since both of those are synchronous + * operations, we are guaranteed never to have pending data when we poll + * for it so there is nothing to do here but return. * We need this though so netpoll recognizes us as an interface that * supports polling, which enables bridge devices in virt setups to * still use netconsole + * If NAPI is enabled, however, we need to schedule polling for all + * queues. */ + struct tun_struct *tun = netdev_priv(dev); + + if (tun->flags & IFF_NAPI) { + struct tun_file *tfile; + int i; + + rcu_read_lock(); + for (i = 0; i < tun->numqueues; i++) { + tfile = rcu_dereference(tun->tfiles[i]); + napi_schedule(&tfile->napi); + } + rcu_read_unlock(); + } return; } #endif @@ -1549,11 +1637,25 @@ static ssize_t tun_get_user(struct tun_struct *tun, struct tun_file *tfile, } rxhash = __skb_get_hash_symmetric(skb); -#ifndef CONFIG_4KSTACKS - tun_rx_batched(tun, tfile, skb, more); -#else - netif_rx_ni(skb); -#endif + + if (tun->flags & IFF_NAPI) { + struct sk_buff_head *queue = &tfile->sk.sk_write_queue; + int queue_len; + + spin_lock_bh(&queue->lock); + __skb_queue_tail(queue, skb); + queue_len = skb_queue_len(queue); + spin_unlock(&queue->lock); + + if (!more || queue_len > NAPI_POLL_WEIGHT) + napi_schedule(&tfile->napi); + + local_bh_enable(); + } else if (!IS_ENABLED(CONFIG_4KSTACKS)) { + tun_rx_batched(tun, tfile, skb, more); + } else { + netif_rx_ni(skb); + } stats = get_cpu_ptr(tun->pcpu_stats); u64_stats_update_begin(&stats->syncp); @@ -1980,7 +2082,8 @@ static int tun_set_iff(struct net *net, struct file *file, struct ifreq *ifr) if (err < 0) return err; - err = tun_attach(tun, file, ifr->ifr_flags & IFF_NOFILTER); + err = tun_attach(tun, file, ifr->ifr_flags & IFF_NOFILTER, + ifr->ifr_flags & IFF_NAPI); if (err < 0) return err; @@ -2066,7 +2169,7 @@ static int tun_set_iff(struct net *net, struct file *file, struct ifreq *ifr) NETIF_F_HW_VLAN_STAG_TX); INIT_LIST_HEAD(&tun->disabled); - err = tun_attach(tun, file, false); + err = tun_attach(tun, file, false, ifr->ifr_flags & IFF_NAPI); if (err < 0) goto err_free_flow; @@ -2216,7 +2319,7 @@ static int tun_set_queue(struct file *file, struct ifreq *ifr) ret = security_tun_dev_attach_queue(tun->security); if (ret < 0) goto unlock; - ret = tun_attach(tun, file, false); + ret = tun_attach(tun, file, false, tun->flags & IFF_NAPI); } else if (ifr->ifr_flags & IFF_DETACH_QUEUE) { tun = rtnl_dereference(tfile->tun); if (!tun || !(tun->flags & IFF_MULTI_QUEUE) || tfile->detached) diff --git a/include/uapi/linux/if_tun.h b/include/uapi/linux/if_tun.h index 3cb5e1d85ddd..30b6184884eb 100644 --- a/include/uapi/linux/if_tun.h +++ b/include/uapi/linux/if_tun.h @@ -60,6 +60,7 @@ /* TUNSETIFF ifr flags */ #define IFF_TUN 0x0001 #define IFF_TAP 0x0002 +#define IFF_NAPI 0x0010 #define IFF_NO_PI 0x1000 /* This flag has no real effect */ #define IFF_ONE_QUEUE 0x2000 -- cgit v1.2.3 From 90e33d45940793def6f773b2d528e9f3c84ffdc7 Mon Sep 17 00:00:00 2001 From: Petar Penkov Date: Fri, 22 Sep 2017 13:49:15 -0700 Subject: tun: enable napi_gro_frags() for TUN/TAP driver Add a TUN/TAP receive mode that exercises the napi_gro_frags() interface. This mode is available only in TAP mode, as the interface expects packets with Ethernet headers. Furthermore, packets follow the layout of the iovec_iter that was received. The first iovec is the linear data, and every one after the first is a fragment. If there are more fragments than the max number, drop the packet. Additionally, invoke eth_get_headlen() to exercise flow dissector code and to verify that the header resides in the linear data. The napi_gro_frags() mode requires setting the IFF_NAPI_FRAGS option. This is imposed because this mode is intended for testing via tools like syzkaller and packetdrill, and the increased flexibility it provides can introduce security vulnerabilities. This flag is accepted only if the device is in TAP mode and has the IFF_NAPI flag set as well. This is done because both of these are explicit requirements for correct operation in this mode. Signed-off-by: Petar Penkov Cc: Eric Dumazet Cc: Mahesh Bandewar Cc: Willem de Bruijn Cc: davem@davemloft.net Cc: ppenkov@stanford.edu Acked-by: Mahesh Bandewar Signed-off-by: David S. Miller --- drivers/net/tun.c | 134 ++++++++++++++++++++++++++++++++++++++++++-- include/uapi/linux/if_tun.h | 1 + 2 files changed, 129 insertions(+), 6 deletions(-) diff --git a/drivers/net/tun.c b/drivers/net/tun.c index f16407242b18..9880b3bc8fa5 100644 --- a/drivers/net/tun.c +++ b/drivers/net/tun.c @@ -75,6 +75,7 @@ #include #include #include +#include #include @@ -121,7 +122,8 @@ do { \ #define TUN_VNET_BE 0x40000000 #define TUN_FEATURES (IFF_NO_PI | IFF_ONE_QUEUE | IFF_VNET_HDR | \ - IFF_MULTI_QUEUE | IFF_NAPI) + IFF_MULTI_QUEUE | IFF_NAPI | IFF_NAPI_FRAGS) + #define GOODCOPY_LEN 128 #define FLT_EXACT_COUNT 8 @@ -173,6 +175,7 @@ struct tun_file { unsigned int ifindex; }; struct napi_struct napi; + struct mutex napi_mutex; /* Protects access to the above napi */ struct list_head next; struct tun_struct *detached; struct skb_array tx_array; @@ -277,6 +280,7 @@ static void tun_napi_init(struct tun_struct *tun, struct tun_file *tfile, netif_napi_add(tun->dev, &tfile->napi, tun_napi_poll, NAPI_POLL_WEIGHT); napi_enable(&tfile->napi); + mutex_init(&tfile->napi_mutex); } } @@ -292,6 +296,11 @@ static void tun_napi_del(struct tun_struct *tun, struct tun_file *tfile) netif_napi_del(&tfile->napi); } +static bool tun_napi_frags_enabled(const struct tun_struct *tun) +{ + return READ_ONCE(tun->flags) & IFF_NAPI_FRAGS; +} + #ifdef CONFIG_TUN_VNET_CROSS_LE static inline bool tun_legacy_is_little_endian(struct tun_struct *tun) { @@ -1036,7 +1045,8 @@ static void tun_poll_controller(struct net_device *dev) * supports polling, which enables bridge devices in virt setups to * still use netconsole * If NAPI is enabled, however, we need to schedule polling for all - * queues. + * queues unless we are using napi_gro_frags(), which we call in + * process context and not in NAPI context. */ struct tun_struct *tun = netdev_priv(dev); @@ -1044,6 +1054,9 @@ static void tun_poll_controller(struct net_device *dev) struct tun_file *tfile; int i; + if (tun_napi_frags_enabled(tun)) + return; + rcu_read_lock(); for (i = 0; i < tun->numqueues; i++) { tfile = rcu_dereference(tun->tfiles[i]); @@ -1266,6 +1279,64 @@ static unsigned int tun_chr_poll(struct file *file, poll_table *wait) return mask; } +static struct sk_buff *tun_napi_alloc_frags(struct tun_file *tfile, + size_t len, + const struct iov_iter *it) +{ + struct sk_buff *skb; + size_t linear; + int err; + int i; + + if (it->nr_segs > MAX_SKB_FRAGS + 1) + return ERR_PTR(-ENOMEM); + + local_bh_disable(); + skb = napi_get_frags(&tfile->napi); + local_bh_enable(); + if (!skb) + return ERR_PTR(-ENOMEM); + + linear = iov_iter_single_seg_count(it); + err = __skb_grow(skb, linear); + if (err) + goto free; + + skb->len = len; + skb->data_len = len - linear; + skb->truesize += skb->data_len; + + for (i = 1; i < it->nr_segs; i++) { + size_t fragsz = it->iov[i].iov_len; + unsigned long offset; + struct page *page; + void *data; + + if (fragsz == 0 || fragsz > PAGE_SIZE) { + err = -EINVAL; + goto free; + } + + local_bh_disable(); + data = napi_alloc_frag(fragsz); + local_bh_enable(); + if (!data) { + err = -ENOMEM; + goto free; + } + + page = virt_to_head_page(data); + offset = data - page_address(page); + skb_fill_page_desc(skb, i - 1, page, offset, fragsz); + } + + return skb; +free: + /* frees skb and all frags allocated with napi_alloc_frag() */ + napi_free_frags(&tfile->napi); + return ERR_PTR(err); +} + /* prepad is the amount to reserve at front. len is length after that. * linear is a hint as to how much to copy (usually headers). */ static struct sk_buff *tun_alloc_skb(struct tun_file *tfile, @@ -1478,6 +1549,7 @@ static ssize_t tun_get_user(struct tun_struct *tun, struct tun_file *tfile, int err; u32 rxhash; int skb_xdp = 1; + bool frags = tun_napi_frags_enabled(tun); if (!(tun->dev->flags & IFF_UP)) return -EIO; @@ -1535,7 +1607,7 @@ static ssize_t tun_get_user(struct tun_struct *tun, struct tun_file *tfile, zerocopy = true; } - if (tun_can_build_skb(tun, tfile, len, noblock, zerocopy)) { + if (!frags && tun_can_build_skb(tun, tfile, len, noblock, zerocopy)) { /* For the packet that is not easy to be processed * (e.g gso or jumbo packet), we will do it at after * skb was created with generic XDP routine. @@ -1556,10 +1628,24 @@ static ssize_t tun_get_user(struct tun_struct *tun, struct tun_file *tfile, linear = tun16_to_cpu(tun, gso.hdr_len); } - skb = tun_alloc_skb(tfile, align, copylen, linear, noblock); + if (frags) { + mutex_lock(&tfile->napi_mutex); + skb = tun_napi_alloc_frags(tfile, copylen, from); + /* tun_napi_alloc_frags() enforces a layout for the skb. + * If zerocopy is enabled, then this layout will be + * overwritten by zerocopy_sg_from_iter(). + */ + zerocopy = false; + } else { + skb = tun_alloc_skb(tfile, align, copylen, linear, + noblock); + } + if (IS_ERR(skb)) { if (PTR_ERR(skb) != -EAGAIN) this_cpu_inc(tun->pcpu_stats->rx_dropped); + if (frags) + mutex_unlock(&tfile->napi_mutex); return PTR_ERR(skb); } @@ -1571,6 +1657,11 @@ static ssize_t tun_get_user(struct tun_struct *tun, struct tun_file *tfile, if (err) { this_cpu_inc(tun->pcpu_stats->rx_dropped); kfree_skb(skb); + if (frags) { + tfile->napi.skb = NULL; + mutex_unlock(&tfile->napi_mutex); + } + return -EFAULT; } } @@ -1578,6 +1669,11 @@ static ssize_t tun_get_user(struct tun_struct *tun, struct tun_file *tfile, if (virtio_net_hdr_to_skb(skb, &gso, tun_is_little_endian(tun))) { this_cpu_inc(tun->pcpu_stats->rx_frame_errors); kfree_skb(skb); + if (frags) { + tfile->napi.skb = NULL; + mutex_unlock(&tfile->napi_mutex); + } + return -EINVAL; } @@ -1603,7 +1699,8 @@ static ssize_t tun_get_user(struct tun_struct *tun, struct tun_file *tfile, skb->dev = tun->dev; break; case IFF_TAP: - skb->protocol = eth_type_trans(skb, tun->dev); + if (!frags) + skb->protocol = eth_type_trans(skb, tun->dev); break; } @@ -1638,7 +1735,23 @@ static ssize_t tun_get_user(struct tun_struct *tun, struct tun_file *tfile, rxhash = __skb_get_hash_symmetric(skb); - if (tun->flags & IFF_NAPI) { + if (frags) { + /* Exercise flow dissector code path. */ + u32 headlen = eth_get_headlen(skb->data, skb_headlen(skb)); + + if (headlen > skb_headlen(skb) || headlen < ETH_HLEN) { + this_cpu_inc(tun->pcpu_stats->rx_dropped); + napi_free_frags(&tfile->napi); + mutex_unlock(&tfile->napi_mutex); + WARN_ON(1); + return -ENOMEM; + } + + local_bh_disable(); + napi_gro_frags(&tfile->napi); + local_bh_enable(); + mutex_unlock(&tfile->napi_mutex); + } else if (tun->flags & IFF_NAPI) { struct sk_buff_head *queue = &tfile->sk.sk_write_queue; int queue_len; @@ -2061,6 +2174,15 @@ static int tun_set_iff(struct net *net, struct file *file, struct ifreq *ifr) if (tfile->detached) return -EINVAL; + if ((ifr->ifr_flags & IFF_NAPI_FRAGS)) { + if (!capable(CAP_NET_ADMIN)) + return -EPERM; + + if (!(ifr->ifr_flags & IFF_NAPI) || + (ifr->ifr_flags & TUN_TYPE_MASK) != IFF_TAP) + return -EINVAL; + } + dev = __dev_get_by_name(net, ifr->ifr_name); if (dev) { if (ifr->ifr_flags & IFF_TUN_EXCL) diff --git a/include/uapi/linux/if_tun.h b/include/uapi/linux/if_tun.h index 30b6184884eb..365ade5685c9 100644 --- a/include/uapi/linux/if_tun.h +++ b/include/uapi/linux/if_tun.h @@ -61,6 +61,7 @@ #define IFF_TUN 0x0001 #define IFF_TAP 0x0002 #define IFF_NAPI 0x0010 +#define IFF_NAPI_FRAGS 0x0020 #define IFF_NO_PI 0x1000 /* This flag has no real effect */ #define IFF_ONE_QUEUE 0x2000 -- cgit v1.2.3 From 6450f8f269a9271985e4a8c13920b7e4cf21c0f3 Mon Sep 17 00:00:00 2001 From: Haiyang Zhang Date: Fri, 22 Sep 2017 15:31:38 -0700 Subject: hv_netvsc: Fix the real number of queues of non-vRSS cases For older hosts without multi-channel (vRSS) support, and some error cases, we still need to set the real number of queues to one. This patch adds this missing setting. Fixes: 8195b1396ec8 ("hv_netvsc: fix deadlock on hotplug") Signed-off-by: Haiyang Zhang Reviewed-by: Stephen Hemminger Signed-off-by: David S. Miller --- drivers/net/hyperv/netvsc_drv.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c index a32ae02e1b6c..e9d54c9ee78c 100644 --- a/drivers/net/hyperv/netvsc_drv.c +++ b/drivers/net/hyperv/netvsc_drv.c @@ -1935,6 +1935,12 @@ static int netvsc_probe(struct hv_device *dev, /* We always need headroom for rndis header */ net->needed_headroom = RNDIS_AND_PPI_SIZE; + /* Initialize the number of queues to be 1, we may change it if more + * channels are offered later. + */ + netif_set_real_num_tx_queues(net, 1); + netif_set_real_num_rx_queues(net, 1); + /* Notify the netvsc driver of the new device */ memset(&device_info, 0, sizeof(device_info)); device_info.ring_size = ring_size; -- cgit v1.2.3 From 6457edfe7344ac1da334b9f24a42aacea084a451 Mon Sep 17 00:00:00 2001 From: Vivien Didelot Date: Fri, 22 Sep 2017 19:01:55 -0400 Subject: net: dsa: make slave close symmetrical to open The DSA slave open function configures the unicast MAC addresses on the master device, enable the switch port, change its STP state, then start the PHY device. Make the close function symmetric, by first stopping the PHY device, then changing the STP state, disabling the switch port and restore the master device. Signed-off-by: Vivien Didelot Reviewed-by: Florian Fainelli Reviewed-by: Andrew Lunn Signed-off-by: David S. Miller --- net/dsa/slave.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/net/dsa/slave.c b/net/dsa/slave.c index 02ace7d462c4..c2bb48579032 100644 --- a/net/dsa/slave.c +++ b/net/dsa/slave.c @@ -133,6 +133,11 @@ static int dsa_slave_close(struct net_device *dev) if (p->phy) phy_stop(p->phy); + dsa_port_set_state_now(p->dp, BR_STATE_DISABLED); + + if (ds->ops->port_disable) + ds->ops->port_disable(ds, p->dp->index, p->phy); + dev_mc_unsync(master, dev); dev_uc_unsync(master, dev); if (dev->flags & IFF_ALLMULTI) @@ -143,11 +148,6 @@ static int dsa_slave_close(struct net_device *dev) if (!ether_addr_equal(dev->dev_addr, master->dev_addr)) dev_uc_del(master, dev->dev_addr); - if (ds->ops->port_disable) - ds->ops->port_disable(ds, p->dp->index, p->phy); - - dsa_port_set_state_now(p->dp, BR_STATE_DISABLED); - return 0; } -- cgit v1.2.3 From fb8a6a2b8b7cfee8a0cf2cd431e1d0f45dd1c9e0 Mon Sep 17 00:00:00 2001 From: Vivien Didelot Date: Fri, 22 Sep 2017 19:01:56 -0400 Subject: net: dsa: add port enable and disable helpers Provide dsa_port_enable and dsa_port_disable helpers to respectively enable and disable a switch port. This makes the dsa_port_set_state_now helper static. Signed-off-by: Vivien Didelot Reviewed-by: Florian Fainelli Reviewed-by: Andrew Lunn Signed-off-by: David S. Miller --- net/dsa/dsa_priv.h | 3 ++- net/dsa/port.c | 31 ++++++++++++++++++++++++++++++- net/dsa/slave.c | 19 +++++-------------- 3 files changed, 37 insertions(+), 16 deletions(-) diff --git a/net/dsa/dsa_priv.h b/net/dsa/dsa_priv.h index 9803952a5b40..0298a0f6a349 100644 --- a/net/dsa/dsa_priv.h +++ b/net/dsa/dsa_priv.h @@ -117,7 +117,8 @@ void dsa_master_ethtool_restore(struct net_device *dev); /* port.c */ int dsa_port_set_state(struct dsa_port *dp, u8 state, struct switchdev_trans *trans); -void dsa_port_set_state_now(struct dsa_port *dp, u8 state); +int dsa_port_enable(struct dsa_port *dp, struct phy_device *phy); +void dsa_port_disable(struct dsa_port *dp, struct phy_device *phy); int dsa_port_bridge_join(struct dsa_port *dp, struct net_device *br); void dsa_port_bridge_leave(struct dsa_port *dp, struct net_device *br); int dsa_port_vlan_filtering(struct dsa_port *dp, bool vlan_filtering, diff --git a/net/dsa/port.c b/net/dsa/port.c index 76d43a82d397..72c8dbd3d3f2 100644 --- a/net/dsa/port.c +++ b/net/dsa/port.c @@ -56,7 +56,7 @@ int dsa_port_set_state(struct dsa_port *dp, u8 state, return 0; } -void dsa_port_set_state_now(struct dsa_port *dp, u8 state) +static void dsa_port_set_state_now(struct dsa_port *dp, u8 state) { int err; @@ -65,6 +65,35 @@ void dsa_port_set_state_now(struct dsa_port *dp, u8 state) pr_err("DSA: failed to set STP state %u (%d)\n", state, err); } +int dsa_port_enable(struct dsa_port *dp, struct phy_device *phy) +{ + u8 stp_state = dp->bridge_dev ? BR_STATE_BLOCKING : BR_STATE_FORWARDING; + struct dsa_switch *ds = dp->ds; + int port = dp->index; + int err; + + if (ds->ops->port_enable) { + err = ds->ops->port_enable(ds, port, phy); + if (err) + return err; + } + + dsa_port_set_state_now(dp, stp_state); + + return 0; +} + +void dsa_port_disable(struct dsa_port *dp, struct phy_device *phy) +{ + struct dsa_switch *ds = dp->ds; + int port = dp->index; + + dsa_port_set_state_now(dp, BR_STATE_DISABLED); + + if (ds->ops->port_disable) + ds->ops->port_disable(ds, port, phy); +} + int dsa_port_bridge_join(struct dsa_port *dp, struct net_device *br) { struct dsa_notifier_bridge_info info = { diff --git a/net/dsa/slave.c b/net/dsa/slave.c index c2bb48579032..bd51ef56ec5b 100644 --- a/net/dsa/slave.c +++ b/net/dsa/slave.c @@ -73,9 +73,7 @@ static int dsa_slave_open(struct net_device *dev) { struct dsa_slave_priv *p = netdev_priv(dev); struct dsa_port *dp = p->dp; - struct dsa_switch *ds = dp->ds; struct net_device *master = dsa_master_netdev(p); - u8 stp_state = dp->bridge_dev ? BR_STATE_BLOCKING : BR_STATE_FORWARDING; int err; if (!(master->flags & IFF_UP)) @@ -98,13 +96,9 @@ static int dsa_slave_open(struct net_device *dev) goto clear_allmulti; } - if (ds->ops->port_enable) { - err = ds->ops->port_enable(ds, p->dp->index, p->phy); - if (err) - goto clear_promisc; - } - - dsa_port_set_state_now(p->dp, stp_state); + err = dsa_port_enable(dp, p->phy); + if (err) + goto clear_promisc; if (p->phy) phy_start(p->phy); @@ -128,15 +122,12 @@ static int dsa_slave_close(struct net_device *dev) { struct dsa_slave_priv *p = netdev_priv(dev); struct net_device *master = dsa_master_netdev(p); - struct dsa_switch *ds = p->dp->ds; + struct dsa_port *dp = p->dp; if (p->phy) phy_stop(p->phy); - dsa_port_set_state_now(p->dp, BR_STATE_DISABLED); - - if (ds->ops->port_disable) - ds->ops->port_disable(ds, p->dp->index, p->phy); + dsa_port_disable(dp, p->phy); dev_mc_unsync(master, dev); dev_uc_unsync(master, dev); -- cgit v1.2.3 From 088b8749da1e35b0dd9cb0e6500ca1c94c9bf547 Mon Sep 17 00:00:00 2001 From: Rick Farrington Date: Fri, 22 Sep 2017 17:12:43 -0700 Subject: liquidio: allow override of firmware present in flash Signed-off-by: Rick Farrington Signed-off-by: Felix Manlunas Signed-off-by: David S. Miller --- drivers/net/ethernet/cavium/liquidio/lio_main.c | 68 ++++++++++++++-------- .../net/ethernet/cavium/liquidio/liquidio_image.h | 1 + .../net/ethernet/cavium/liquidio/octeon_device.c | 11 +++- .../net/ethernet/cavium/liquidio/octeon_device.h | 10 ++++ 4 files changed, 64 insertions(+), 26 deletions(-) diff --git a/drivers/net/ethernet/cavium/liquidio/lio_main.c b/drivers/net/ethernet/cavium/liquidio/lio_main.c index e7f54948173f..ce08f710de0b 100644 --- a/drivers/net/ethernet/cavium/liquidio/lio_main.c +++ b/drivers/net/ethernet/cavium/liquidio/lio_main.c @@ -59,9 +59,9 @@ static int debug = -1; module_param(debug, int, 0644); MODULE_PARM_DESC(debug, "NETIF_MSG debug bits"); -static char fw_type[LIO_MAX_FW_TYPE_LEN] = LIO_FW_NAME_TYPE_NIC; +static char fw_type[LIO_MAX_FW_TYPE_LEN] = LIO_FW_NAME_TYPE_AUTO; module_param_string(fw_type, fw_type, sizeof(fw_type), 0444); -MODULE_PARM_DESC(fw_type, "Type of firmware to be loaded. Default \"nic\". Use \"none\" to load firmware from flash."); +MODULE_PARM_DESC(fw_type, "Type of firmware to be loaded (default is \"auto\"), which uses firmware in flash, if present, else loads \"nic\"."); static u32 console_bitmask; module_param(console_bitmask, int, 0644); @@ -1115,10 +1115,10 @@ liquidio_probe(struct pci_dev *pdev, return 0; } -static bool fw_type_is_none(void) +static bool fw_type_is_auto(void) { - return strncmp(fw_type, LIO_FW_NAME_TYPE_NONE, - sizeof(LIO_FW_NAME_TYPE_NONE)) == 0; + return strncmp(fw_type, LIO_FW_NAME_TYPE_AUTO, + sizeof(LIO_FW_NAME_TYPE_AUTO)) == 0; } /** @@ -1302,7 +1302,7 @@ static void octeon_destroy_resources(struct octeon_device *oct) * Implementation note: only soft-reset the device * if it is a CN6XXX OR the LAST CN23XX device. */ - if (fw_type_is_none()) + if (atomic_read(oct->adapter_fw_state) == FW_IS_PRELOADED) octeon_pci_flr(oct); else if (OCTEON_CN6XXX(oct) || !refcount) oct->fn_list.soft_reset(oct); @@ -1934,7 +1934,7 @@ static int load_firmware(struct octeon_device *oct) char fw_name[LIO_MAX_FW_FILENAME_LEN]; char *tmp_fw_type; - if (fw_type[0] == '\0') + if (fw_type_is_auto()) tmp_fw_type = LIO_FW_NAME_TYPE_NIC; else tmp_fw_type = fw_type; @@ -3882,9 +3882,9 @@ octeon_recv_vf_drv_notice(struct octeon_recv_info *recv_info, void *buf) static int octeon_device_init(struct octeon_device *octeon_dev) { int j, ret; - int fw_loaded = 0; char bootcmd[] = "\n"; char *dbg_enb = NULL; + enum lio_fw_state fw_state; struct octeon_device_priv *oct_priv = (struct octeon_device_priv *)octeon_dev->priv; atomic_set(&octeon_dev->status, OCT_DEV_BEGIN_STATE); @@ -3916,24 +3916,40 @@ static int octeon_device_init(struct octeon_device *octeon_dev) octeon_dev->app_mode = CVM_DRV_INVALID_APP; - if (OCTEON_CN23XX_PF(octeon_dev)) { - if (!cn23xx_fw_loaded(octeon_dev) && !fw_type_is_none()) { - fw_loaded = 0; - /* Do a soft reset of the Octeon device. */ - if (octeon_dev->fn_list.soft_reset(octeon_dev)) - return 1; - /* things might have changed */ - if (!cn23xx_fw_loaded(octeon_dev)) - fw_loaded = 0; - else - fw_loaded = 1; - } else { - fw_loaded = 1; - } - } else if (octeon_dev->fn_list.soft_reset(octeon_dev)) { - return 1; + /* CN23XX supports preloaded firmware if the following is true: + * + * The adapter indicates that firmware is currently running AND + * 'fw_type' is 'auto'. + * + * (default state is NEEDS_TO_BE_LOADED, override it if appropriate). + */ + if (OCTEON_CN23XX_PF(octeon_dev) && + cn23xx_fw_loaded(octeon_dev) && fw_type_is_auto()) { + atomic_cmpxchg(octeon_dev->adapter_fw_state, + FW_NEEDS_TO_BE_LOADED, FW_IS_PRELOADED); } + /* If loading firmware, only first device of adapter needs to do so. */ + fw_state = atomic_cmpxchg(octeon_dev->adapter_fw_state, + FW_NEEDS_TO_BE_LOADED, + FW_IS_BEING_LOADED); + + /* Here, [local variable] 'fw_state' is set to one of: + * + * FW_IS_PRELOADED: No firmware is to be loaded (see above) + * FW_NEEDS_TO_BE_LOADED: The driver's first instance will load + * firmware to the adapter. + * FW_IS_BEING_LOADED: The driver's second instance will not load + * firmware to the adapter. + */ + + /* Prior to f/w load, perform a soft reset of the Octeon device; + * if error resetting, return w/error. + */ + if (fw_state == FW_NEEDS_TO_BE_LOADED) + if (octeon_dev->fn_list.soft_reset(octeon_dev)) + return 1; + /* Initialize the dispatch mechanism used to push packets arriving on * Octeon Output queues. */ @@ -4063,7 +4079,7 @@ static int octeon_device_init(struct octeon_device *octeon_dev) atomic_set(&octeon_dev->status, OCT_DEV_IO_QUEUES_DONE); - if ((!OCTEON_CN23XX_PF(octeon_dev)) || !fw_loaded) { + if (fw_state == FW_NEEDS_TO_BE_LOADED) { dev_dbg(&octeon_dev->pci_dev->dev, "Waiting for DDR initialization...\n"); if (!ddr_timeout) { dev_info(&octeon_dev->pci_dev->dev, @@ -4125,6 +4141,8 @@ static int octeon_device_init(struct octeon_device *octeon_dev) dev_err(&octeon_dev->pci_dev->dev, "Could not load firmware to board\n"); return 1; } + + atomic_set(octeon_dev->adapter_fw_state, FW_HAS_BEEN_LOADED); } handshake[octeon_dev->octeon_id].init_ok = 1; diff --git a/drivers/net/ethernet/cavium/liquidio/liquidio_image.h b/drivers/net/ethernet/cavium/liquidio/liquidio_image.h index 78a3685f6fe0..5bf5e8791dfb 100644 --- a/drivers/net/ethernet/cavium/liquidio/liquidio_image.h +++ b/drivers/net/ethernet/cavium/liquidio/liquidio_image.h @@ -24,6 +24,7 @@ #define LIO_FW_BASE_NAME "lio_" #define LIO_FW_NAME_SUFFIX ".bin" #define LIO_FW_NAME_TYPE_NIC "nic" +#define LIO_FW_NAME_TYPE_AUTO "auto" #define LIO_FW_NAME_TYPE_NONE "none" #define LIO_MAX_FIRMWARE_VERSION_LEN 16 diff --git a/drivers/net/ethernet/cavium/liquidio/octeon_device.c b/drivers/net/ethernet/cavium/liquidio/octeon_device.c index 29d53b1763a7..e4aa3395a578 100644 --- a/drivers/net/ethernet/cavium/liquidio/octeon_device.c +++ b/drivers/net/ethernet/cavium/liquidio/octeon_device.c @@ -541,6 +541,7 @@ static char oct_dev_app_str[CVM_DRV_APP_COUNT + 1][32] = { static struct octeon_device *octeon_device[MAX_OCTEON_DEVICES]; static atomic_t adapter_refcounts[MAX_OCTEON_DEVICES]; +static atomic_t adapter_fw_states[MAX_OCTEON_DEVICES]; static u32 octeon_device_count; /* locks device array (i.e. octeon_device[]) */ @@ -770,6 +771,10 @@ int octeon_register_device(struct octeon_device *oct, oct->adapter_refcount = &adapter_refcounts[oct->octeon_id]; atomic_set(oct->adapter_refcount, 0); + /* Like the reference count, the f/w state is shared 'per-adapter' */ + oct->adapter_fw_state = &adapter_fw_states[oct->octeon_id]; + atomic_set(oct->adapter_fw_state, FW_NEEDS_TO_BE_LOADED); + spin_lock(&octeon_devices_lock); for (idx = (int)oct->octeon_id - 1; idx >= 0; idx--) { if (!octeon_device[idx]) { @@ -780,11 +785,15 @@ int octeon_register_device(struct octeon_device *oct, atomic_inc(oct->adapter_refcount); return 1; /* here, refcount is guaranteed to be 1 */ } - /* if another device is at same bus/dev, use its refcounter */ + /* If another device is at same bus/dev, use its refcounter + * (and f/w state variable). + */ if ((octeon_device[idx]->loc.bus == bus) && (octeon_device[idx]->loc.dev == dev)) { oct->adapter_refcount = octeon_device[idx]->adapter_refcount; + oct->adapter_fw_state = + octeon_device[idx]->adapter_fw_state; break; } } diff --git a/drivers/net/ethernet/cavium/liquidio/octeon_device.h b/drivers/net/ethernet/cavium/liquidio/octeon_device.h index 894af199ddef..33d19c4509bc 100644 --- a/drivers/net/ethernet/cavium/liquidio/octeon_device.h +++ b/drivers/net/ethernet/cavium/liquidio/octeon_device.h @@ -50,6 +50,13 @@ enum octeon_pci_swap_mode { OCTEON_PCI_32BIT_LW_SWAP = 3 }; +enum lio_fw_state { + FW_IS_PRELOADED = 0, + FW_NEEDS_TO_BE_LOADED = 1, + FW_IS_BEING_LOADED = 2, + FW_HAS_BEEN_LOADED = 3, +}; + enum { OCTEON_CONFIG_TYPE_DEFAULT = 0, NUM_OCTEON_CONFS, @@ -557,6 +564,9 @@ struct octeon_device { } loc; atomic_t *adapter_refcount; /* reference count of adapter */ + + atomic_t *adapter_fw_state; /* per-adapter, lio_fw_state */ + bool ptp_enable; }; -- cgit v1.2.3 From b36e48209157fdd98a5589a3dd60ff3fbf51e16d Mon Sep 17 00:00:00 2001 From: Rick Farrington Date: Fri, 22 Sep 2017 17:12:47 -0700 Subject: liquidio: verify firmware version when auto-loaded from flash. Signed-off-by: Rick Farrington Signed-off-by: Felix Manlunas Signed-off-by: David S. Miller --- drivers/net/ethernet/cavium/liquidio/lio_main.c | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/cavium/liquidio/lio_main.c b/drivers/net/ethernet/cavium/liquidio/lio_main.c index ce08f710de0b..a3c9867c0340 100644 --- a/drivers/net/ethernet/cavium/liquidio/lio_main.c +++ b/drivers/net/ethernet/cavium/liquidio/lio_main.c @@ -3303,7 +3303,7 @@ static int setup_nic_devices(struct octeon_device *octeon_dev) { struct lio *lio = NULL; struct net_device *netdev; - u8 mac[6], i, j; + u8 mac[6], i, j, *fw_ver; struct octeon_soft_command *sc; struct liquidio_if_cfg_context *ctx; struct liquidio_if_cfg_resp *resp; @@ -3414,6 +3414,22 @@ static int setup_nic_devices(struct octeon_device *octeon_dev) goto setup_nic_dev_fail; } + /* Verify f/w version (in case of 'auto' loading from flash) */ + fw_ver = octeon_dev->fw_info.liquidio_firmware_version; + if (memcmp(LIQUIDIO_BASE_VERSION, + fw_ver, + strlen(LIQUIDIO_BASE_VERSION))) { + dev_err(&octeon_dev->pci_dev->dev, + "Unmatched firmware version. Expected %s.x, got %s.\n", + LIQUIDIO_BASE_VERSION, fw_ver); + goto setup_nic_dev_fail; + } else if (atomic_read(octeon_dev->adapter_fw_state) == + FW_IS_PRELOADED) { + dev_info(&octeon_dev->pci_dev->dev, + "Using auto-loaded firmware version %s.\n", + fw_ver); + } + octeon_swap_8B_data((u64 *)(&resp->cfg_info), (sizeof(struct liquidio_if_cfg_info)) >> 3); -- cgit v1.2.3 From 429cbf6bde1adff108171ad4b2387e62f851d609 Mon Sep 17 00:00:00 2001 From: Rick Farrington Date: Fri, 22 Sep 2017 17:12:51 -0700 Subject: liquidio: update module parameter fw_type to reflect firmware type loaded Signed-off-by: Rick Farrington Signed-off-by: Felix Manlunas Signed-off-by: David S. Miller --- drivers/net/ethernet/cavium/liquidio/lio_main.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/cavium/liquidio/lio_main.c b/drivers/net/ethernet/cavium/liquidio/lio_main.c index a3c9867c0340..963803bc6633 100644 --- a/drivers/net/ethernet/cavium/liquidio/lio_main.c +++ b/drivers/net/ethernet/cavium/liquidio/lio_main.c @@ -1934,10 +1934,12 @@ static int load_firmware(struct octeon_device *oct) char fw_name[LIO_MAX_FW_FILENAME_LEN]; char *tmp_fw_type; - if (fw_type_is_auto()) + if (fw_type_is_auto()) { tmp_fw_type = LIO_FW_NAME_TYPE_NIC; - else + strncpy(fw_type, tmp_fw_type, sizeof(fw_type)); + } else { tmp_fw_type = fw_type; + } sprintf(fw_name, "%s%s%s_%s%s", LIO_FW_DIR, LIO_FW_BASE_NAME, octeon_get_conf(oct)->card_name, tmp_fw_type, -- cgit v1.2.3 From ba581f77df23c8ee70b372966e69cf10bc5453d8 Mon Sep 17 00:00:00 2001 From: Ganesh Goudar Date: Sat, 23 Sep 2017 16:07:28 +0530 Subject: cxgb4: do DCB state reset in couple of places reset the driver's DCB state in couple of places where it was missing. Signed-off-by: Casey Leedom Signed-off-by: Ganesh Goudar Signed-off-by: David S. Miller --- drivers/net/ethernet/chelsio/cxgb4/cxgb4_dcb.c | 15 +++++++++++---- drivers/net/ethernet/chelsio/cxgb4/cxgb4_dcb.h | 1 + drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c | 10 ++++++++-- 3 files changed, 20 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_dcb.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_dcb.c index 6ee2ed30626b..4e7f72b17e82 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_dcb.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_dcb.c @@ -40,8 +40,7 @@ static inline bool cxgb4_dcb_state_synced(enum cxgb4_dcb_state state) return false; } -/* Initialize a port's Data Center Bridging state. Typically used after a - * Link Down event. +/* Initialize a port's Data Center Bridging state. */ void cxgb4_dcb_state_init(struct net_device *dev) { @@ -106,6 +105,15 @@ static void cxgb4_dcb_cleanup_apps(struct net_device *dev) } } +/* Reset a port's Data Center Bridging state. Typically used after a + * Link Down event. + */ +void cxgb4_dcb_reset(struct net_device *dev) +{ + cxgb4_dcb_cleanup_apps(dev); + cxgb4_dcb_state_init(dev); +} + /* Finite State machine for Data Center Bridging. */ void cxgb4_dcb_state_fsm(struct net_device *dev, @@ -194,8 +202,7 @@ void cxgb4_dcb_state_fsm(struct net_device *dev, * state. We need to reset back to a ground state * of incomplete. */ - cxgb4_dcb_cleanup_apps(dev); - cxgb4_dcb_state_init(dev); + cxgb4_dcb_reset(dev); dcb->state = CXGB4_DCB_STATE_FW_INCOMPLETE; dcb->supported = CXGB4_DCBX_FW_SUPPORT; linkwatch_fire_event(dev); diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_dcb.h b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_dcb.h index ccf24d3dc982..02040b99c78a 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_dcb.h +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_dcb.h @@ -131,6 +131,7 @@ struct port_dcb_info { void cxgb4_dcb_state_init(struct net_device *); void cxgb4_dcb_version_init(struct net_device *); +void cxgb4_dcb_reset(struct net_device *dev); void cxgb4_dcb_state_fsm(struct net_device *, enum cxgb4_dcb_state_input); void cxgb4_dcb_handle_fw_update(struct adapter *, const struct fw_port_cmd *); void cxgb4_dcb_set_caps(struct adapter *, const struct fw_port_cmd *); diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c index aa93ae95d3b9..13b636b0af5f 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c @@ -281,7 +281,7 @@ void t4_os_link_changed(struct adapter *adapter, int port_id, int link_stat) else { #ifdef CONFIG_CHELSIO_T4_DCB if (cxgb4_dcb_enabled(dev)) { - cxgb4_dcb_state_init(dev); + cxgb4_dcb_reset(dev); dcb_tx_queue_prio_enable(dev, false); } #endif /* CONFIG_CHELSIO_T4_DCB */ @@ -2304,10 +2304,16 @@ static int cxgb_close(struct net_device *dev) { struct port_info *pi = netdev_priv(dev); struct adapter *adapter = pi->adapter; + int ret; netif_tx_stop_all_queues(dev); netif_carrier_off(dev); - return t4_enable_vi(adapter, adapter->pf, pi->viid, false, false); + ret = t4_enable_vi(adapter, adapter->pf, pi->viid, false, false); +#ifdef CONFIG_CHELSIO_T4_DCB + cxgb4_dcb_reset(dev); + dcb_tx_queue_prio_enable(dev, false); +#endif + return ret; } int cxgb4_create_server_filter(const struct net_device *dev, unsigned int stid, -- cgit v1.2.3 From 9484dc74fcf0750cd6726c9aa27edf97223916a8 Mon Sep 17 00:00:00 2001 From: yuan linyu Date: Sat, 23 Sep 2017 22:36:52 +0800 Subject: tun: delete original tun_get() and rename __tun_get() to tun_get() it seems no need to keep tun_get() and __tun_get() at same time. Signed-off-by: yuan linyu Signed-off-by: David S. Miller --- drivers/net/tun.c | 26 +++++++++++--------------- 1 file changed, 11 insertions(+), 15 deletions(-) diff --git a/drivers/net/tun.c b/drivers/net/tun.c index 9880b3bc8fa5..2c36f6ebad79 100644 --- a/drivers/net/tun.c +++ b/drivers/net/tun.c @@ -774,7 +774,7 @@ out: return err; } -static struct tun_struct *__tun_get(struct tun_file *tfile) +static struct tun_struct *tun_get(struct tun_file *tfile) { struct tun_struct *tun; @@ -787,11 +787,6 @@ static struct tun_struct *__tun_get(struct tun_file *tfile) return tun; } -static struct tun_struct *tun_get(struct file *file) -{ - return __tun_get(file->private_data); -} - static void tun_put(struct tun_struct *tun) { dev_put(tun->dev); @@ -1250,7 +1245,7 @@ static void tun_net_init(struct net_device *dev) static unsigned int tun_chr_poll(struct file *file, poll_table *wait) { struct tun_file *tfile = file->private_data; - struct tun_struct *tun = __tun_get(tfile); + struct tun_struct *tun = tun_get(tfile); struct sock *sk; unsigned int mask = 0; @@ -1784,8 +1779,8 @@ static ssize_t tun_get_user(struct tun_struct *tun, struct tun_file *tfile, static ssize_t tun_chr_write_iter(struct kiocb *iocb, struct iov_iter *from) { struct file *file = iocb->ki_filp; - struct tun_struct *tun = tun_get(file); struct tun_file *tfile = file->private_data; + struct tun_struct *tun = tun_get(tfile); ssize_t result; if (!tun) @@ -1969,7 +1964,7 @@ static ssize_t tun_chr_read_iter(struct kiocb *iocb, struct iov_iter *to) { struct file *file = iocb->ki_filp; struct tun_file *tfile = file->private_data; - struct tun_struct *tun = __tun_get(tfile); + struct tun_struct *tun = tun_get(tfile); ssize_t len = iov_iter_count(to), ret; if (!tun) @@ -2046,7 +2041,7 @@ static int tun_sendmsg(struct socket *sock, struct msghdr *m, size_t total_len) { int ret; struct tun_file *tfile = container_of(sock, struct tun_file, socket); - struct tun_struct *tun = __tun_get(tfile); + struct tun_struct *tun = tun_get(tfile); if (!tun) return -EBADFD; @@ -2062,7 +2057,7 @@ static int tun_recvmsg(struct socket *sock, struct msghdr *m, size_t total_len, int flags) { struct tun_file *tfile = container_of(sock, struct tun_file, socket); - struct tun_struct *tun = __tun_get(tfile); + struct tun_struct *tun = tun_get(tfile); int ret; if (!tun) @@ -2094,7 +2089,7 @@ static int tun_peek_len(struct socket *sock) struct tun_struct *tun; int ret = 0; - tun = __tun_get(tfile); + tun = tun_get(tfile); if (!tun) return 0; @@ -2490,7 +2485,7 @@ static long __tun_chr_ioctl(struct file *file, unsigned int cmd, ret = 0; rtnl_lock(); - tun = __tun_get(tfile); + tun = tun_get(tfile); if (cmd == TUNSETIFF) { ret = -EEXIST; if (tun) @@ -2837,15 +2832,16 @@ static int tun_chr_close(struct inode *inode, struct file *file) } #ifdef CONFIG_PROC_FS -static void tun_chr_show_fdinfo(struct seq_file *m, struct file *f) +static void tun_chr_show_fdinfo(struct seq_file *m, struct file *file) { + struct tun_file *tfile = file->private_data; struct tun_struct *tun; struct ifreq ifr; memset(&ifr, 0, sizeof(ifr)); rtnl_lock(); - tun = tun_get(f); + tun = tun_get(tfile); if (tun) tun_get_iff(current->nsproxy->net_ns, tun, &ifr); rtnl_unlock(); -- cgit v1.2.3 From 3aa605f28b0d004a640a826380b39c7dcf70195d Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sat, 23 Sep 2017 11:07:28 -0700 Subject: sch_netem: faster rb tree removal While running TCP tests involving netem storing millions of packets, I had the idea to speed up tfifo_reset() and did experiments. I tried the rbtree_postorder_for_each_entry_safe() method that is used in skb_rbtree_purge() but discovered it was slower than the current tfifo_reset() method. I measured time taken to release skbs with three occupation levels : 10^4, 10^5 and 10^6 skbs with three methods : 1) (current 'naive' method) while ((p = rb_first(&q->t_root))) { struct sk_buff *skb = netem_rb_to_skb(p); rb_erase(p, &q->t_root); rtnl_kfree_skbs(skb, skb); } 2) Use rb_next() instead of rb_first() in the loop : p = rb_first(&q->t_root); while (p) { struct sk_buff *skb = netem_rb_to_skb(p); p = rb_next(p); rb_erase(&skb->rbnode, &q->t_root); rtnl_kfree_skbs(skb, skb); } 3) "optimized" method using rbtree_postorder_for_each_entry_safe() struct sk_buff *skb, *next; rbtree_postorder_for_each_entry_safe(skb, next, &q->t_root, rbnode) { rtnl_kfree_skbs(skb, skb); } q->t_root = RB_ROOT; Results : method_1:while (rb_first()) rb_erase() 10000 skbs in 690378 ns (69 ns per skb) method_2:rb_first; while (p) { p = rb_next(p); ...} 10000 skbs in 541846 ns (54 ns per skb) method_3:rbtree_postorder_for_each_entry_safe() 10000 skbs in 868307 ns (86 ns per skb) method_1:while (rb_first()) rb_erase() 99996 skbs in 7804021 ns (78 ns per skb) method_2:rb_first; while (p) { p = rb_next(p); ...} 100000 skbs in 5942456 ns (59 ns per skb) method_3:rbtree_postorder_for_each_entry_safe() 100000 skbs in 11584940 ns (115 ns per skb) method_1:while (rb_first()) rb_erase() 1000000 skbs in 108577838 ns (108 ns per skb) method_2:rb_first; while (p) { p = rb_next(p); ...} 1000000 skbs in 82619635 ns (82 ns per skb) method_3:rbtree_postorder_for_each_entry_safe() 1000000 skbs in 127328743 ns (127 ns per skb) Method 2) is simply faster, probably because it maintains a smaller working size set. Note that this is the method we use in tcp_ofo_queue() already. I will also change skb_rbtree_purge() in a second patch. Signed-off-by: Eric Dumazet Acked-by: David Ahern Signed-off-by: David S. Miller --- net/sched/sch_netem.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c index 063a4bdb9ee6..5a4f10080290 100644 --- a/net/sched/sch_netem.c +++ b/net/sched/sch_netem.c @@ -361,12 +361,13 @@ static psched_time_t packet_len_2_sched_time(unsigned int len, struct netem_sche static void tfifo_reset(struct Qdisc *sch) { struct netem_sched_data *q = qdisc_priv(sch); - struct rb_node *p; + struct rb_node *p = rb_first(&q->t_root); - while ((p = rb_first(&q->t_root))) { + while (p) { struct sk_buff *skb = netem_rb_to_skb(p); - rb_erase(p, &q->t_root); + p = rb_next(p); + rb_erase(&skb->rbnode, &q->t_root); rtnl_kfree_skbs(skb, skb); } } -- cgit v1.2.3 From 7c90584c66cc4b033a3b684b0e0950f79e7b7166 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sat, 23 Sep 2017 12:39:12 -0700 Subject: net: speed up skb_rbtree_purge() As measured in my prior patch ("sch_netem: faster rb tree removal"), rbtree_postorder_for_each_entry_safe() is nice looking but much slower than using rb_next() directly, except when tree is small enough to fit in CPU caches (then the cost is the same) Also note that there is not even an increase of text size : $ size net/core/skbuff.o.before net/core/skbuff.o text data bss dec hex filename 40711 1298 0 42009 a419 net/core/skbuff.o.before 40711 1298 0 42009 a419 net/core/skbuff.o From: Eric Dumazet Signed-off-by: David S. Miller --- net/core/skbuff.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 16982de649b9..000ce735fa8d 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -2848,12 +2848,15 @@ EXPORT_SYMBOL(skb_queue_purge); */ void skb_rbtree_purge(struct rb_root *root) { - struct sk_buff *skb, *next; + struct rb_node *p = rb_first(root); - rbtree_postorder_for_each_entry_safe(skb, next, root, rbnode) - kfree_skb(skb); + while (p) { + struct sk_buff *skb = rb_entry(p, struct sk_buff, rbnode); - *root = RB_ROOT; + p = rb_next(p); + rb_erase(&skb->rbnode, root); + kfree_skb(skb); + } } /** -- cgit v1.2.3 From e451ae8e4f6b3f6bd3b83a5595657b5421b3bf69 Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Sat, 23 Sep 2017 23:01:06 +0300 Subject: neigh: make struct neigh_table::entry_size unsigned int Neigh entry size can't be negative. Space savings: add/remove: 0/0 grow/shrink: 0/5 up/down: 0/-7 (-7) function old new delta lowpan_neigh_construct 25 24 -1 clip_seq_sub_iter 152 151 -1 clip_ioctl 1475 1474 -1 clip_constructor 93 92 -1 __neigh_create 2455 2452 -3 Signed-off-by: Alexey Dobriyan Signed-off-by: David S. Miller --- include/net/neighbour.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/net/neighbour.h b/include/net/neighbour.h index 9816df225af3..9a25512e0a6e 100644 --- a/include/net/neighbour.h +++ b/include/net/neighbour.h @@ -190,7 +190,7 @@ struct neigh_hash_table { struct neigh_table { int family; - int entry_size; + unsigned int entry_size; int key_len; __be16 protocol; __u32 (*hash)(const void *pkey, -- cgit v1.2.3 From 01ccdf126ca5f9d4fe0889f65ee67afac910f19c Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Sat, 23 Sep 2017 23:03:04 +0300 Subject: neigh: make strucrt neigh_table::entry_size unsigned int Key length can't be negative. Leave comparisons against nla_len() signed just in case truncated attribute can sneak in there. Space savings: add/remove: 0/0 grow/shrink: 0/7 up/down: 0/-7 (-7) function old new delta pneigh_delete 273 272 -1 mlx5e_rep_netevent_event 1415 1414 -1 mlx5e_create_encap_header_ipv6 1194 1193 -1 mlx5e_create_encap_header_ipv4 1071 1070 -1 cxgb4_l2t_get 1104 1103 -1 __pneigh_lookup 69 68 -1 __neigh_create 2452 2451 -1 Signed-off-by: Alexey Dobriyan Signed-off-by: David S. Miller --- drivers/net/ethernet/chelsio/cxgb4/l2t.c | 4 ++-- include/net/neighbour.h | 2 +- net/core/neighbour.c | 18 +++++++++--------- 3 files changed, 12 insertions(+), 12 deletions(-) diff --git a/drivers/net/ethernet/chelsio/cxgb4/l2t.c b/drivers/net/ethernet/chelsio/cxgb4/l2t.c index f7ef8871dd0b..1817a0307d26 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/l2t.c +++ b/drivers/net/ethernet/chelsio/cxgb4/l2t.c @@ -422,7 +422,7 @@ struct l2t_entry *cxgb4_l2t_get(struct l2t_data *d, struct neighbour *neigh, u8 lport; u16 vlan; struct l2t_entry *e; - int addr_len = neigh->tbl->key_len; + unsigned int addr_len = neigh->tbl->key_len; u32 *addr = (u32 *)neigh->primary_key; int ifidx = neigh->dev->ifindex; int hash = addr_hash(d, addr, addr_len, ifidx); @@ -536,7 +536,7 @@ void t4_l2t_update(struct adapter *adap, struct neighbour *neigh) struct l2t_entry *e; struct sk_buff_head *arpq = NULL; struct l2t_data *d = adap->l2t; - int addr_len = neigh->tbl->key_len; + unsigned int addr_len = neigh->tbl->key_len; u32 *addr = (u32 *) neigh->primary_key; int ifidx = neigh->dev->ifindex; int hash = addr_hash(d, addr, addr_len, ifidx); diff --git a/include/net/neighbour.h b/include/net/neighbour.h index 9a25512e0a6e..2492000e1035 100644 --- a/include/net/neighbour.h +++ b/include/net/neighbour.h @@ -191,7 +191,7 @@ struct neigh_hash_table { struct neigh_table { int family; unsigned int entry_size; - int key_len; + unsigned int key_len; __be16 protocol; __u32 (*hash)(const void *pkey, const struct net_device *dev, diff --git a/net/core/neighbour.c b/net/core/neighbour.c index 16a1a4c4eb57..6ea3a1a7f36a 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -457,7 +457,7 @@ struct neighbour *neigh_lookup_nodev(struct neigh_table *tbl, struct net *net, const void *pkey) { struct neighbour *n; - int key_len = tbl->key_len; + unsigned int key_len = tbl->key_len; u32 hash_val; struct neigh_hash_table *nht; @@ -488,7 +488,7 @@ struct neighbour *__neigh_create(struct neigh_table *tbl, const void *pkey, struct net_device *dev, bool want_ref) { u32 hash_val; - int key_len = tbl->key_len; + unsigned int key_len = tbl->key_len; int error; struct neighbour *n1, *rc, *n = neigh_alloc(tbl, dev); struct neigh_hash_table *nht; @@ -572,7 +572,7 @@ out_neigh_release: } EXPORT_SYMBOL(__neigh_create); -static u32 pneigh_hash(const void *pkey, int key_len) +static u32 pneigh_hash(const void *pkey, unsigned int key_len) { u32 hash_val = *(u32 *)(pkey + key_len - 4); hash_val ^= (hash_val >> 16); @@ -585,7 +585,7 @@ static u32 pneigh_hash(const void *pkey, int key_len) static struct pneigh_entry *__pneigh_lookup_1(struct pneigh_entry *n, struct net *net, const void *pkey, - int key_len, + unsigned int key_len, struct net_device *dev) { while (n) { @@ -601,7 +601,7 @@ static struct pneigh_entry *__pneigh_lookup_1(struct pneigh_entry *n, struct pneigh_entry *__pneigh_lookup(struct neigh_table *tbl, struct net *net, const void *pkey, struct net_device *dev) { - int key_len = tbl->key_len; + unsigned int key_len = tbl->key_len; u32 hash_val = pneigh_hash(pkey, key_len); return __pneigh_lookup_1(tbl->phash_buckets[hash_val], @@ -614,7 +614,7 @@ struct pneigh_entry * pneigh_lookup(struct neigh_table *tbl, struct net_device *dev, int creat) { struct pneigh_entry *n; - int key_len = tbl->key_len; + unsigned int key_len = tbl->key_len; u32 hash_val = pneigh_hash(pkey, key_len); read_lock_bh(&tbl->lock); @@ -659,7 +659,7 @@ int pneigh_delete(struct neigh_table *tbl, struct net *net, const void *pkey, struct net_device *dev) { struct pneigh_entry *n, **np; - int key_len = tbl->key_len; + unsigned int key_len = tbl->key_len; u32 hash_val = pneigh_hash(pkey, key_len); write_lock_bh(&tbl->lock); @@ -1662,7 +1662,7 @@ static int neigh_delete(struct sk_buff *skb, struct nlmsghdr *nlh, if (tbl == NULL) return -EAFNOSUPPORT; - if (nla_len(dst_attr) < tbl->key_len) + if (nla_len(dst_attr) < (int)tbl->key_len) goto out; if (ndm->ndm_flags & NTF_PROXY) { @@ -1730,7 +1730,7 @@ static int neigh_add(struct sk_buff *skb, struct nlmsghdr *nlh, if (tbl == NULL) return -EAFNOSUPPORT; - if (nla_len(tb[NDA_DST]) < tbl->key_len) + if (nla_len(tb[NDA_DST]) < (int)tbl->key_len) goto out; dst = nla_data(tb[NDA_DST]); lladdr = tb[NDA_LLADDR] ? nla_data(tb[NDA_LLADDR]) : NULL; -- cgit v1.2.3 From 763556980dde78166886734ae470664c67067dd4 Mon Sep 17 00:00:00 2001 From: Stefan Schmidt Date: Fri, 22 Sep 2017 10:44:57 +0200 Subject: ieee802154: atusb: fix firmware version check to enable frame retries Geert reported: as fw_ver_maj is unsigned char, gcc 4.1.2 complains: warning: comparison is always true due to limited range of data type Besides the warning the old check would also fail for firmware versions like 1.x with x < 3. These would support frame retries, but the driver would not enable the feature. Reported-by: Geert Uytterhoeven Signed-off-by: Stefan Schmidt --- drivers/net/ieee802154/atusb.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ieee802154/atusb.c b/drivers/net/ieee802154/atusb.c index 115fa3f37a86..6b3cdf4bd667 100644 --- a/drivers/net/ieee802154/atusb.c +++ b/drivers/net/ieee802154/atusb.c @@ -1060,7 +1060,7 @@ static int atusb_probe(struct usb_interface *interface, atusb_get_and_show_build(atusb); atusb_set_extended_addr(atusb); - if (atusb->fw_ver_maj >= 0 && atusb->fw_ver_min >= 3) + if ((atusb->fw_ver_maj == 0 && atusb->fw_ver_min >= 3) || atusb->fw_ver_maj > 0) hw->flags |= IEEE802154_HW_FRAME_RETRIES; ret = atusb_get_and_clear_error(atusb); -- cgit v1.2.3 From 06ff5dad46feee9ebc0b9f14f0ae3fe3d232392e Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Tue, 26 Sep 2017 14:18:36 +0100 Subject: ieee802154: atusb: make two structures static, fixes warnings The arrays atusb_chip_data and hulusb_chip_data are local to the source and do not need to be in global scope, so make them static. Also remove unnecessary forward declaration of atusb_chip_data. Cleans up sparse warnings: symbol 'atusb_chip_data' was not declared. Should it be static? symbol 'hulusb_chip_data' was not declared. Should it be static? Signed-off-by: Colin Ian King Signed-off-by: Stefan Schmidt --- drivers/net/ieee802154/atusb.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/net/ieee802154/atusb.c b/drivers/net/ieee802154/atusb.c index 6b3cdf4bd667..d5584063049f 100644 --- a/drivers/net/ieee802154/atusb.c +++ b/drivers/net/ieee802154/atusb.c @@ -45,8 +45,6 @@ #define ATUSB_ALLOC_DELAY_MS 100 /* delay after failed allocation */ #define ATUSB_TX_TIMEOUT_MS 200 /* on the air timeout */ -struct atusb_chip_data; - struct atusb { struct ieee802154_hw *hw; struct usb_device *usb_dev; @@ -767,14 +765,14 @@ atusb_set_promiscuous_mode(struct ieee802154_hw *hw, const bool on) return 0; } -struct atusb_chip_data atusb_chip_data = { +static struct atusb_chip_data atusb_chip_data = { .t_channel_switch = 1, .rssi_base_val = -91, .set_txpower = atusb_set_txpower, .set_channel = atusb_set_channel, }; -struct atusb_chip_data hulusb_chip_data = { +static struct atusb_chip_data hulusb_chip_data = { .t_channel_switch = 11, .rssi_base_val = -100, .set_txpower = hulusb_set_txpower, -- cgit v1.2.3 From 1f4cf93b133ba6596ae69cfd09b48aa7b47cca41 Mon Sep 17 00:00:00 2001 From: Tobias Klauser Date: Tue, 26 Sep 2017 11:04:23 +0200 Subject: net: ena: Remove redundant unlikely() IS_ERR() already implies unlikely(), so it can be omitted. Signed-off-by: Tobias Klauser Signed-off-by: David S. Miller --- drivers/net/ethernet/amazon/ena/ena_com.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/amazon/ena/ena_com.c b/drivers/net/ethernet/amazon/ena/ena_com.c index 52beba8c7a39..ded29af648c9 100644 --- a/drivers/net/ethernet/amazon/ena/ena_com.c +++ b/drivers/net/ethernet/amazon/ena/ena_com.c @@ -315,7 +315,7 @@ static struct ena_comp_ctx *ena_com_submit_admin_cmd(struct ena_com_admin_queue cmd_size_in_bytes, comp, comp_size_in_bytes); - if (unlikely(IS_ERR(comp_ctx))) + if (IS_ERR(comp_ctx)) admin_queue->running_state = false; spin_unlock_irqrestore(&admin_queue->q_lock, flags); @@ -1130,7 +1130,7 @@ int ena_com_execute_admin_command(struct ena_com_admin_queue *admin_queue, comp_ctx = ena_com_submit_admin_cmd(admin_queue, cmd, cmd_size, comp, comp_size); - if (unlikely(IS_ERR(comp_ctx))) { + if (IS_ERR(comp_ctx)) { if (comp_ctx == ERR_PTR(-ENODEV)) pr_debug("Failed to submit command [%ld]\n", PTR_ERR(comp_ctx)); -- cgit v1.2.3 From 98e4fcff3e755c6c3de2d4f63c080b4009a599bd Mon Sep 17 00:00:00 2001 From: Tobias Klauser Date: Tue, 26 Sep 2017 11:21:42 +0200 Subject: datagram: Remove redundant unlikely() IS_ERR() already implies unlikely(), so it can be omitted. Signed-off-by: Tobias Klauser Signed-off-by: David S. Miller --- net/core/datagram.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/core/datagram.c b/net/core/datagram.c index f7fb7e3f2acf..0b7b4c22719e 100644 --- a/net/core/datagram.c +++ b/net/core/datagram.c @@ -188,7 +188,7 @@ struct sk_buff *__skb_try_recv_from_queue(struct sock *sk, } if (!skb->len) { skb = skb_set_peeked(skb); - if (unlikely(IS_ERR(skb))) { + if (IS_ERR(skb)) { *err = PTR_ERR(skb); return NULL; } -- cgit v1.2.3 From 63a4e80be4f523dbde6412decfa8244ff73cc4b9 Mon Sep 17 00:00:00 2001 From: Tobias Klauser Date: Tue, 26 Sep 2017 11:22:31 +0200 Subject: ipv6: Remove redundant unlikely() IS_ERR() already implies unlikely(), so it can be omitted. Signed-off-by: Tobias Klauser Signed-off-by: David S. Miller --- net/ipv6/addrconf.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 96861c702c06..13c3b697f8c0 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -3297,7 +3297,7 @@ static int fixup_permanent_addr(struct inet6_dev *idev, struct rt6_info *rt, *prev; rt = addrconf_dst_alloc(idev, &ifp->addr, false); - if (unlikely(IS_ERR(rt))) + if (IS_ERR(rt)) return PTR_ERR(rt); /* ifp->rt can be accessed outside of rtnl */ -- cgit v1.2.3 From d9db5e3680c87fdbdf86fcb1c42caea4bf98680c Mon Sep 17 00:00:00 2001 From: Tobias Klauser Date: Tue, 26 Sep 2017 11:22:58 +0200 Subject: kcm: Remove redundant unlikely() IS_ERR() already implies unlikely(), so it can be omitted. Signed-off-by: Tobias Klauser Signed-off-by: David S. Miller --- net/kcm/kcmsock.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/kcm/kcmsock.c b/net/kcm/kcmsock.c index af4e76ac88ff..0b750a22c4b9 100644 --- a/net/kcm/kcmsock.c +++ b/net/kcm/kcmsock.c @@ -1650,7 +1650,7 @@ static int kcm_clone(struct socket *osock, struct kcm_clone *info, } newfile = sock_alloc_file(newsock, 0, osock->sk->sk_prot_creator->name); - if (unlikely(IS_ERR(newfile))) { + if (IS_ERR(newfile)) { err = PTR_ERR(newfile); goto out_sock_alloc_fail; } -- cgit v1.2.3 From 1fac4b2fdbccab69cb781aae68f540be94d5549e Mon Sep 17 00:00:00 2001 From: Tobias Klauser Date: Tue, 26 Sep 2017 15:12:26 +0200 Subject: bnxt_en: Remove redundant unlikely() IS_ERR() already implies unlikely(), so it can be omitted. Signed-off-by: Tobias Klauser Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index c25f5b555adf..5ba49938ba55 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -1491,7 +1491,7 @@ static int bnxt_rx_pkt(struct bnxt *bp, struct bnxt_napi *bnapi, u32 *raw_cons, (struct rx_tpa_end_cmp *)rxcmp, (struct rx_tpa_end_cmp_ext *)rxcmp1, event); - if (unlikely(IS_ERR(skb))) + if (IS_ERR(skb)) return -EBUSY; rc = -ENOMEM; -- cgit v1.2.3 From 92978ee801844b16180f2168ffffd05647da551a Mon Sep 17 00:00:00 2001 From: Tobias Klauser Date: Tue, 26 Sep 2017 15:13:23 +0200 Subject: net/mlx5: Remove redundant unlikely() IS_ERR() already implies unlikely(), so it can be omitted. Signed-off-by: Tobias Klauser Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.c index 4614ddfa91eb..6a7c8b04447e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.c @@ -256,7 +256,7 @@ struct sk_buff *mlx5e_ipsec_handle_tx_skb(struct net_device *netdev, goto drop; } mdata = mlx5e_ipsec_add_metadata(skb); - if (unlikely(IS_ERR(mdata))) { + if (IS_ERR(mdata)) { atomic64_inc(&priv->ipsec->sw_stats.ipsec_tx_drop_metadata); goto drop; } -- cgit v1.2.3 From 2091c227fa855776aafffad7ecd25ac0734df1a0 Mon Sep 17 00:00:00 2001 From: Tobias Klauser Date: Tue, 26 Sep 2017 15:14:15 +0200 Subject: ldmvsw: Remove redundant unlikely() IS_ERR() already implies unlikely(), so it can be omitted. Signed-off-by: Tobias Klauser Signed-off-by: David S. Miller --- drivers/net/ethernet/sun/ldmvsw.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/sun/ldmvsw.c b/drivers/net/ethernet/sun/ldmvsw.c index 5b56c24b6ed2..5feeaa9f0a9e 100644 --- a/drivers/net/ethernet/sun/ldmvsw.c +++ b/drivers/net/ethernet/sun/ldmvsw.c @@ -307,7 +307,7 @@ static int vsw_port_probe(struct vio_dev *vdev, const struct vio_device_id *id) /* Get (or create) the vnet associated with this port */ vp = vsw_get_vnet(hp, vdev->mp, &handle); - if (unlikely(IS_ERR(vp))) { + if (IS_ERR(vp)) { err = PTR_ERR(vp); pr_err("Failed to get vnet for vsw-port\n"); mdesc_release(hp); -- cgit v1.2.3 From b92af5a72ca982f0aa3df22f57a178aa5b0f4357 Mon Sep 17 00:00:00 2001 From: Matan Barak Date: Thu, 10 Aug 2017 15:29:12 +0300 Subject: net/mlx5: Fix creating a new FTE when an existing but full FTE exists Currently, when a flow steering rule is added, we look for a FTE with an identical value. If we find a match, we try to merge the required destinations with the existing ones. In a case where the existing destination list is full, the code should return an error to its consumer. However, the current code just tries to create another FTE. Fixing that by returning an error in this special scenario. Fixes: f478be79a22e ("net/mlx5: Add hash table for flow groups in flow table") Signed-off-by: Matan Barak Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/fs_core.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c index 5a7bea688ec8..6ffe9251bf62 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c @@ -1449,7 +1449,7 @@ try_add_to_existing_fg(struct mlx5_flow_table *ft, int dest_num) { struct mlx5_flow_group *g; - struct mlx5_flow_handle *rule = ERR_PTR(-ENOENT); + struct mlx5_flow_handle *rule; struct rhlist_head *tmp, *list; struct match_list { struct list_head list; @@ -1513,6 +1513,8 @@ try_add_to_existing_fg(struct mlx5_flow_table *ft, unlock_ref_node(&g->node); } + rule = ERR_PTR(-ENOENT); + free_list: if (!list_empty(&match_head)) { struct match_list *match_tmp; @@ -1553,7 +1555,7 @@ _mlx5_add_flow_rules(struct mlx5_flow_table *ft, nested_lock_ref_node(&ft->node, FS_MUTEX_GRANDPARENT); rule = try_add_to_existing_fg(ft, spec, flow_act, dest, dest_num); - if (!IS_ERR(rule)) + if (!IS_ERR(rule) || PTR_ERR(rule) != -ENOENT) goto unlock; g = create_autogroup(ft, spec->match_criteria_enable, -- cgit v1.2.3 From 800350a3f145010c353bd7425428c05ac5cfc26a Mon Sep 17 00:00:00 2001 From: Maor Gottlieb Date: Wed, 23 Aug 2017 17:50:03 +0300 Subject: net/mlx5: Avoid NULL pointer dereference on steering cleanup On cleanup, when the node is the last child of parent then it calls to tree_put_node on the parent, if the parent's reference count is decremented to 0 (for e.g. when deleting last destination of FTE) then we free the parent as well and vice versa. In such a case we will try to free the parent node again. Increment the parent reference count before cleaning it's children will prevent implicit release of the parent object. Fixes: 0da2d66666d3 ('net/mlx5: Properly remove all steering objects') signed-off-by: Maor Gottlieb Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/fs_core.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c index 6ffe9251bf62..f390828d8728 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c @@ -2068,8 +2068,10 @@ static void clean_tree(struct fs_node *node) struct fs_node *iter; struct fs_node *temp; + tree_get_node(node); list_for_each_entry_safe(iter, temp, &node->children, list) clean_tree(iter); + tree_put_node(node); tree_remove_node(node); } } -- cgit v1.2.3 From 75d1d187b2ac86d1af2f1fd125ec21f104ca34b0 Mon Sep 17 00:00:00 2001 From: Maor Gottlieb Date: Sun, 16 Jul 2017 15:18:45 +0300 Subject: net/mlx5: Move the entry index allocator to flow group When new flow table entry is added, we search for free index in the flow group and not in the flow table, therefore we can move the allocator from flow table to flow group. In downstream patches it will enable us to lock smaller part of the steering tree. Signed-off-by: Maor Gottlieb Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/fs_core.c | 18 +++++++++--------- drivers/net/ethernet/mellanox/mlx5/core/fs_core.h | 2 +- 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c index f390828d8728..2a0b5560a8b5 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c @@ -384,7 +384,6 @@ static void del_flow_table(struct fs_node *node) err = mlx5_cmd_destroy_flow_table(dev, ft); if (err) mlx5_core_warn(dev, "flow steering can't destroy ft\n"); - ida_destroy(&ft->fte_allocator); rhltable_destroy(&ft->fgs_hash); fs_get_obj(prio, ft->node.parent); prio->num_ft--; @@ -445,7 +444,7 @@ static void destroy_fte(struct fs_fte *fte, struct mlx5_flow_group *fg) WARN_ON(ret); fte->status = 0; fs_get_obj(ft, fg->node.parent); - ida_simple_remove(&ft->fte_allocator, fte->index); + ida_simple_remove(&fg->fte_allocator, fte->index - fg->start_index); } static void del_fte(struct fs_node *node) @@ -488,6 +487,7 @@ static void del_flow_group(struct fs_node *node) ft->autogroup.num_groups--; rhashtable_destroy(&fg->ftes_hash); + ida_destroy(&fg->fte_allocator); err = rhltable_remove(&ft->fgs_hash, &fg->hash, rhash_fg); @@ -537,6 +537,7 @@ static struct mlx5_flow_group *alloc_flow_group(u32 *create_fg_in) kfree(fg); return ERR_PTR(ret); } + ida_init(&fg->fte_allocator); fg->mask.match_criteria_enable = match_criteria_enable; memcpy(&fg->mask.match_criteria, match_criteria, sizeof(fg->mask.match_criteria)); @@ -575,7 +576,6 @@ static struct mlx5_flow_table *alloc_flow_table(int level, u16 vport, int max_ft ft->flags = flags; INIT_LIST_HEAD(&ft->fwd_rules); mutex_init(&ft->lock); - ida_init(&ft->fte_allocator); return ft; } @@ -892,7 +892,6 @@ static struct mlx5_flow_table *__mlx5_create_flow_table(struct mlx5_flow_namespa destroy_ft: mlx5_cmd_destroy_flow_table(root->dev, ft); free_ft: - ida_destroy(&ft->fte_allocator); kfree(ft); unlock_root: mutex_unlock(&root->chain_lock); @@ -1003,6 +1002,7 @@ err_remove_fg: rhash_fg)); err_free_fg: rhashtable_destroy(&fg->ftes_hash); + ida_destroy(&fg->fte_allocator); kfree(fg); return ERR_PTR(err); @@ -1181,18 +1181,18 @@ static struct fs_fte *create_fte(struct mlx5_flow_group *fg, u32 *match_value, struct mlx5_flow_act *flow_act) { - struct mlx5_flow_table *ft; struct fs_fte *fte; int index; int ret; - fs_get_obj(ft, fg->node.parent); - index = ida_simple_get(&ft->fte_allocator, fg->start_index, - fg->start_index + fg->max_ftes, + index = ida_simple_get(&fg->fte_allocator, 0, + fg->max_ftes, GFP_KERNEL); if (index < 0) return ERR_PTR(index); + index += fg->start_index; + fte = alloc_fte(flow_act, match_value, index); if (IS_ERR(fte)) { ret = PTR_ERR(fte); @@ -1207,7 +1207,7 @@ static struct fs_fte *create_fte(struct mlx5_flow_group *fg, err_hash: kfree(fte); err_alloc: - ida_simple_remove(&ft->fte_allocator, index); + ida_simple_remove(&fg->fte_allocator, index - fg->start_index); return ERR_PTR(ret); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h index 5509a752f98e..02c969c3d333 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h @@ -119,7 +119,6 @@ struct mlx5_flow_table { /* FWD rules that point on this flow table */ struct list_head fwd_rules; u32 flags; - struct ida fte_allocator; struct rhltable fgs_hash; }; @@ -199,6 +198,7 @@ struct mlx5_flow_group { struct mlx5_flow_group_mask mask; u32 start_index; u32 max_ftes; + struct ida fte_allocator; u32 id; struct rhashtable ftes_hash; struct rhlist_head hash; -- cgit v1.2.3 From 46719d77d5f38b8ef04aa5a5cd91263b11d741d7 Mon Sep 17 00:00:00 2001 From: Maor Gottlieb Date: Tue, 25 Jul 2017 19:20:49 +0300 Subject: net/mlx5: Export building of matched flow groups list Refactor the code and export the build of the matched flow groups list to separate function. Signed-off-by: Maor Gottlieb Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/fs_core.c | 100 ++++++++++++++-------- 1 file changed, 64 insertions(+), 36 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c index 2a0b5560a8b5..33bcaca70a69 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c @@ -1441,47 +1441,87 @@ static bool dest_is_valid(struct mlx5_flow_destination *dest, return true; } -static struct mlx5_flow_handle * -try_add_to_existing_fg(struct mlx5_flow_table *ft, - struct mlx5_flow_spec *spec, - struct mlx5_flow_act *flow_act, - struct mlx5_flow_destination *dest, - int dest_num) -{ +struct match_list { + struct list_head list; struct mlx5_flow_group *g; - struct mlx5_flow_handle *rule; +}; + +struct match_list_head { + struct list_head list; + struct match_list first; +}; + +static void free_match_list(struct match_list_head *head) +{ + if (!list_empty(&head->list)) { + struct match_list *iter, *match_tmp; + + list_del(&head->first.list); + list_for_each_entry_safe(iter, match_tmp, &head->list, + list) { + list_del(&iter->list); + kfree(iter); + } + } +} + +static int build_match_list(struct match_list_head *match_head, + struct mlx5_flow_table *ft, + struct mlx5_flow_spec *spec) +{ struct rhlist_head *tmp, *list; - struct match_list { - struct list_head list; - struct mlx5_flow_group *g; - } match_list, *iter; - LIST_HEAD(match_head); + struct mlx5_flow_group *g; + int err = 0; rcu_read_lock(); + INIT_LIST_HEAD(&match_head->list); /* Collect all fgs which has a matching match_criteria */ list = rhltable_lookup(&ft->fgs_hash, spec, rhash_fg); + /* RCU is atomic, we can't execute FW commands here */ rhl_for_each_entry_rcu(g, tmp, list, hash) { struct match_list *curr_match; - if (likely(list_empty(&match_head))) { - match_list.g = g; - list_add_tail(&match_list.list, &match_head); + if (likely(list_empty(&match_head->list))) { + match_head->first.g = g; + list_add_tail(&match_head->first.list, + &match_head->list); continue; } - curr_match = kmalloc(sizeof(*curr_match), GFP_ATOMIC); + curr_match = kmalloc(sizeof(*curr_match), GFP_ATOMIC); if (!curr_match) { - rcu_read_unlock(); - rule = ERR_PTR(-ENOMEM); - goto free_list; + free_match_list(match_head); + err = -ENOMEM; + goto out; } curr_match->g = g; - list_add_tail(&curr_match->list, &match_head); + list_add_tail(&curr_match->list, &match_head->list); } +out: rcu_read_unlock(); + return err; +} + +static struct mlx5_flow_handle * +try_add_to_existing_fg(struct mlx5_flow_table *ft, + struct mlx5_flow_spec *spec, + struct mlx5_flow_act *flow_act, + struct mlx5_flow_destination *dest, + int dest_num) +{ + struct mlx5_flow_group *g; + struct mlx5_flow_handle *rule; + struct match_list_head match_head; + struct match_list *iter; + int err; + + /* Collect all fgs which has a matching match_criteria */ + err = build_match_list(&match_head, ft, spec); + if (err) + return ERR_PTR(err); /* Try to find a fg that already contains a matching fte */ - list_for_each_entry(iter, &match_head, list) { + list_for_each_entry(iter, &match_head.list, list) { struct fs_fte *fte; g = iter->g; @@ -1500,7 +1540,7 @@ try_add_to_existing_fg(struct mlx5_flow_table *ft, /* No group with matching fte found. Try to add a new fte to any * matching fg. */ - list_for_each_entry(iter, &match_head, list) { + list_for_each_entry(iter, &match_head.list, list) { g = iter->g; nested_lock_ref_node(&g->node, FS_MUTEX_PARENT); @@ -1516,19 +1556,7 @@ try_add_to_existing_fg(struct mlx5_flow_table *ft, rule = ERR_PTR(-ENOENT); free_list: - if (!list_empty(&match_head)) { - struct match_list *match_tmp; - - /* The most common case is having one FG. Since we want to - * optimize this case, we save the first on the stack. - * Therefore, no need to free it. - */ - list_del(&list_first_entry(&match_head, typeof(*iter), list)->list); - list_for_each_entry_safe(iter, match_tmp, &match_head, list) { - list_del(&iter->list); - kfree(iter); - } - } + free_match_list(&match_head); return rule; } -- cgit v1.2.3 From 19f100fef4ad46f21cfdfb1eeeb63fc38c2e57f1 Mon Sep 17 00:00:00 2001 From: Maor Gottlieb Date: Wed, 26 Jul 2017 16:28:26 +0300 Subject: net/mlx5: Refactor FTE and FG creation code Split the creation code to two parts: 1) Object allocation - allocate the steering node and initialize its resources. 2) The firmware command execution. Adding active flag to each node - this flag indicates if the object exists in the hardware or not, if not we don't free the hardware resource in error flow. This change will give us the ability to take write lock on the parent node (e.g. FG for FTE creationg) only on the first part. Signed-off-by: Maor Gottlieb Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/fs_core.c | 356 ++++++++++++---------- drivers/net/ethernet/mellanox/mlx5/core/fs_core.h | 1 + 2 files changed, 190 insertions(+), 167 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c index 33bcaca70a69..41f26f440099 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c @@ -179,14 +179,14 @@ find_flow_rule(struct fs_fte *fte, struct mlx5_flow_destination *dest); static void tree_init_node(struct fs_node *node, - unsigned int refcount, void (*remove_func)(struct fs_node *)) { - atomic_set(&node->refcount, refcount); + atomic_set(&node->refcount, 1); INIT_LIST_HEAD(&node->list); INIT_LIST_HEAD(&node->children); mutex_init(&node->lock); node->remove_func = remove_func; + node->active = false; } static void tree_add_node(struct fs_node *node, struct fs_node *parent) @@ -381,9 +381,11 @@ static void del_flow_table(struct fs_node *node) fs_get_obj(ft, node); dev = get_dev(&ft->node); - err = mlx5_cmd_destroy_flow_table(dev, ft); - if (err) - mlx5_core_warn(dev, "flow steering can't destroy ft\n"); + if (node->active) { + err = mlx5_cmd_destroy_flow_table(dev, ft); + if (err) + mlx5_core_warn(dev, "flow steering can't destroy ft\n"); + } rhltable_destroy(&ft->fgs_hash); fs_get_obj(prio, ft->node.parent); prio->num_ft--; @@ -435,18 +437,6 @@ out: } } -static void destroy_fte(struct fs_fte *fte, struct mlx5_flow_group *fg) -{ - struct mlx5_flow_table *ft; - int ret; - - ret = rhashtable_remove_fast(&fg->ftes_hash, &fte->hash, rhash_fte); - WARN_ON(ret); - fte->status = 0; - fs_get_obj(ft, fg->node.parent); - ida_simple_remove(&fg->fte_allocator, fte->index - fg->start_index); -} - static void del_fte(struct fs_node *node) { struct mlx5_flow_table *ft; @@ -461,14 +451,20 @@ static void del_fte(struct fs_node *node) trace_mlx5_fs_del_fte(fte); dev = get_dev(&ft->node); - err = mlx5_cmd_delete_fte(dev, ft, - fte->index); - if (err) - mlx5_core_warn(dev, - "flow steering can't delete fte in index %d of flow group id %d\n", - fte->index, fg->id); + if (node->active) { + err = mlx5_cmd_delete_fte(dev, ft, + fte->index); + if (err) + mlx5_core_warn(dev, + "flow steering can't delete fte in index %d of flow group id %d\n", + fte->index, fg->id); + } - destroy_fte(fte, fg); + err = rhashtable_remove_fast(&fg->ftes_hash, + &fte->hash, + rhash_fte); + WARN_ON(err); + ida_simple_remove(&fg->fte_allocator, fte->index - fg->start_index); } static void del_flow_group(struct fs_node *node) @@ -492,7 +488,7 @@ static void del_flow_group(struct fs_node *node) &fg->hash, rhash_fg); WARN_ON(err); - if (mlx5_cmd_destroy_flow_group(dev, ft, fg->id)) + if (fg->node.active && mlx5_cmd_destroy_flow_group(dev, ft, fg->id)) mlx5_core_warn(dev, "flow steering can't destroy fg %d of ft %d\n", fg->id, ft->id); } @@ -518,14 +514,57 @@ static struct fs_fte *alloc_fte(struct mlx5_flow_act *flow_act, return fte; } -static struct mlx5_flow_group *alloc_flow_group(u32 *create_fg_in) +static struct fs_fte *alloc_insert_fte(struct mlx5_flow_group *fg, + u32 *match_value, + struct mlx5_flow_act *flow_act) +{ + struct fs_fte *fte; + int index; + int ret; + + index = ida_simple_get(&fg->fte_allocator, 0, + fg->max_ftes, + GFP_KERNEL); + if (index < 0) + return ERR_PTR(index); + + fte = alloc_fte(flow_act, match_value, index + fg->start_index); + if (IS_ERR(fte)) { + ret = PTR_ERR(fte); + goto err_ida_remove; + } + + ret = rhashtable_insert_fast(&fg->ftes_hash, + &fte->hash, + rhash_fte); + if (ret) + goto err_free; + + tree_init_node(&fte->node, del_fte); + tree_add_node(&fte->node, &fg->node); + list_add_tail(&fte->node.list, &fg->node.children); + + return fte; + +err_free: + kfree(fte); +err_ida_remove: + ida_simple_remove(&fg->fte_allocator, index); + return ERR_PTR(ret); +} + +static void dealloc_flow_group(struct mlx5_flow_group *fg) +{ + rhashtable_destroy(&fg->ftes_hash); + kfree(fg); +} + +static struct mlx5_flow_group *alloc_flow_group(u8 match_criteria_enable, + void *match_criteria, + int start_index, + int end_index) { struct mlx5_flow_group *fg; - void *match_criteria = MLX5_ADDR_OF(create_flow_group_in, - create_fg_in, match_criteria); - u8 match_criteria_enable = MLX5_GET(create_flow_group_in, - create_fg_in, - match_criteria_enable); int ret; fg = kzalloc(sizeof(*fg), GFP_KERNEL); @@ -536,16 +575,47 @@ static struct mlx5_flow_group *alloc_flow_group(u32 *create_fg_in) if (ret) { kfree(fg); return ERR_PTR(ret); - } +} ida_init(&fg->fte_allocator); fg->mask.match_criteria_enable = match_criteria_enable; memcpy(&fg->mask.match_criteria, match_criteria, sizeof(fg->mask.match_criteria)); fg->node.type = FS_TYPE_FLOW_GROUP; - fg->start_index = MLX5_GET(create_flow_group_in, create_fg_in, - start_flow_index); - fg->max_ftes = MLX5_GET(create_flow_group_in, create_fg_in, - end_flow_index) - fg->start_index + 1; + fg->start_index = start_index; + fg->max_ftes = end_index - start_index + 1; + + return fg; +} + +static struct mlx5_flow_group *alloc_insert_flow_group(struct mlx5_flow_table *ft, + u8 match_criteria_enable, + void *match_criteria, + int start_index, + int end_index, + struct list_head *prev) +{ + struct mlx5_flow_group *fg; + int ret; + + fg = alloc_flow_group(match_criteria_enable, match_criteria, + start_index, end_index); + if (IS_ERR(fg)) + return fg; + + /* initialize refcnt, add to parent list */ + ret = rhltable_insert(&ft->fgs_hash, + &fg->hash, + rhash_fg); + if (ret) { + dealloc_flow_group(fg); + return ERR_PTR(ret); + } + + tree_init_node(&fg->node, del_flow_group); + tree_add_node(&fg->node, &ft->node); + /* Add node to group list */ + list_add(&fg->node.list, prev); + return fg; } @@ -870,7 +940,7 @@ static struct mlx5_flow_table *__mlx5_create_flow_table(struct mlx5_flow_namespa goto unlock_root; } - tree_init_node(&ft->node, 1, del_flow_table); + tree_init_node(&ft->node, del_flow_table); log_table_sz = ft->max_fte ? ilog2(ft->max_fte) : 0; next_ft = find_next_chained_ft(fs_prio); err = mlx5_cmd_create_flow_table(root->dev, ft->vport, ft->op_mod, ft->type, @@ -882,6 +952,7 @@ static struct mlx5_flow_table *__mlx5_create_flow_table(struct mlx5_flow_namespa err = connect_flow_table(root->dev, ft, fs_prio); if (err) goto destroy_ft; + ft->node.active = true; lock_ref_node(&fs_prio->node); tree_add_node(&ft->node, &fs_prio->node); list_add_flow_table(ft, fs_prio); @@ -959,55 +1030,6 @@ mlx5_create_auto_grouped_flow_table(struct mlx5_flow_namespace *ns, } EXPORT_SYMBOL(mlx5_create_auto_grouped_flow_table); -/* Flow table should be locked */ -static struct mlx5_flow_group *create_flow_group_common(struct mlx5_flow_table *ft, - u32 *fg_in, - struct list_head - *prev_fg, - bool is_auto_fg) -{ - struct mlx5_flow_group *fg; - struct mlx5_core_dev *dev = get_dev(&ft->node); - int err; - - if (!dev) - return ERR_PTR(-ENODEV); - - fg = alloc_flow_group(fg_in); - if (IS_ERR(fg)) - return fg; - - err = rhltable_insert(&ft->fgs_hash, &fg->hash, rhash_fg); - if (err) - goto err_free_fg; - - err = mlx5_cmd_create_flow_group(dev, ft, fg_in, &fg->id); - if (err) - goto err_remove_fg; - - if (ft->autogroup.active) - ft->autogroup.num_groups++; - /* Add node to tree */ - tree_init_node(&fg->node, !is_auto_fg, del_flow_group); - tree_add_node(&fg->node, &ft->node); - /* Add node to group list */ - list_add(&fg->node.list, prev_fg); - - trace_mlx5_fs_add_fg(fg); - return fg; - -err_remove_fg: - WARN_ON(rhltable_remove(&ft->fgs_hash, - &fg->hash, - rhash_fg)); -err_free_fg: - rhashtable_destroy(&fg->ftes_hash); - ida_destroy(&fg->fte_allocator); - kfree(fg); - - return ERR_PTR(err); -} - struct mlx5_flow_group *mlx5_create_flow_group(struct mlx5_flow_table *ft, u32 *fg_in) { @@ -1016,7 +1038,13 @@ struct mlx5_flow_group *mlx5_create_flow_group(struct mlx5_flow_table *ft, u8 match_criteria_enable = MLX5_GET(create_flow_group_in, fg_in, match_criteria_enable); + int start_index = MLX5_GET(create_flow_group_in, fg_in, + start_flow_index); + int end_index = MLX5_GET(create_flow_group_in, fg_in, + end_flow_index); + struct mlx5_core_dev *dev = get_dev(&ft->node); struct mlx5_flow_group *fg; + int err; if (!check_valid_mask(match_criteria_enable, match_criteria)) return ERR_PTR(-EINVAL); @@ -1025,8 +1053,20 @@ struct mlx5_flow_group *mlx5_create_flow_group(struct mlx5_flow_table *ft, return ERR_PTR(-EPERM); lock_ref_node(&ft->node); - fg = create_flow_group_common(ft, fg_in, ft->node.children.prev, false); + fg = alloc_insert_flow_group(ft, match_criteria_enable, match_criteria, + start_index, end_index, + ft->node.children.prev); unlock_ref_node(&ft->node); + if (IS_ERR(fg)) + return fg; + + err = mlx5_cmd_create_flow_group(dev, ft, fg_in, &fg->id); + if (err) { + tree_put_node(&fg->node); + return ERR_PTR(err); + } + trace_mlx5_fs_add_fg(fg); + fg->node.active = true; return fg; } @@ -1111,7 +1151,7 @@ create_flow_handle(struct fs_fte *fte, /* Add dest to dests list- we need flow tables to be in the * end of the list for forward to next prio rules. */ - tree_init_node(&rule->node, 1, del_rule); + tree_init_node(&rule->node, del_rule); if (dest && dest[i].type != MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE) list_add(&rule->node.list, &fte->node.children); @@ -1167,6 +1207,7 @@ add_rule_fte(struct fs_fte *fte, if (err) goto free_handle; + fte->node.active = true; fte->status |= FS_FTE_STATUS_EXISTING; out: @@ -1177,59 +1218,17 @@ free_handle: return ERR_PTR(err); } -static struct fs_fte *create_fte(struct mlx5_flow_group *fg, - u32 *match_value, - struct mlx5_flow_act *flow_act) +static struct mlx5_flow_group *alloc_auto_flow_group(struct mlx5_flow_table *ft, + struct mlx5_flow_spec *spec) { - struct fs_fte *fte; - int index; - int ret; - - index = ida_simple_get(&fg->fte_allocator, 0, - fg->max_ftes, - GFP_KERNEL); - if (index < 0) - return ERR_PTR(index); - - index += fg->start_index; - - fte = alloc_fte(flow_act, match_value, index); - if (IS_ERR(fte)) { - ret = PTR_ERR(fte); - goto err_alloc; - } - ret = rhashtable_insert_fast(&fg->ftes_hash, &fte->hash, rhash_fte); - if (ret) - goto err_hash; - - return fte; - -err_hash: - kfree(fte); -err_alloc: - ida_simple_remove(&fg->fte_allocator, index - fg->start_index); - return ERR_PTR(ret); -} - -static struct mlx5_flow_group *create_autogroup(struct mlx5_flow_table *ft, - u8 match_criteria_enable, - u32 *match_criteria) -{ - int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in); struct list_head *prev = &ft->node.children; - unsigned int candidate_index = 0; struct mlx5_flow_group *fg; - void *match_criteria_addr; + unsigned int candidate_index = 0; unsigned int group_size = 0; - u32 *in; if (!ft->autogroup.active) return ERR_PTR(-ENOENT); - in = kvzalloc(inlen, GFP_KERNEL); - if (!in) - return ERR_PTR(-ENOMEM); - if (ft->autogroup.num_groups < ft->autogroup.required_groups) /* We save place for flow groups in addition to max types */ group_size = ft->max_fte / (ft->autogroup.required_groups + 1); @@ -1247,25 +1246,55 @@ static struct mlx5_flow_group *create_autogroup(struct mlx5_flow_table *ft, prev = &fg->node.list; } - if (candidate_index + group_size > ft->max_fte) { - fg = ERR_PTR(-ENOSPC); + if (candidate_index + group_size > ft->max_fte) + return ERR_PTR(-ENOSPC); + + fg = alloc_insert_flow_group(ft, + spec->match_criteria_enable, + spec->match_criteria, + candidate_index, + candidate_index + group_size - 1, + prev); + if (IS_ERR(fg)) goto out; - } + + ft->autogroup.num_groups++; + +out: + return fg; +} + +static int create_auto_flow_group(struct mlx5_flow_table *ft, + struct mlx5_flow_group *fg) +{ + struct mlx5_core_dev *dev = get_dev(&ft->node); + int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in); + void *match_criteria_addr; + int err; + u32 *in; + + in = kvzalloc(inlen, GFP_KERNEL); + if (!in) + return -ENOMEM; MLX5_SET(create_flow_group_in, in, match_criteria_enable, - match_criteria_enable); - MLX5_SET(create_flow_group_in, in, start_flow_index, candidate_index); - MLX5_SET(create_flow_group_in, in, end_flow_index, candidate_index + - group_size - 1); + fg->mask.match_criteria_enable); + MLX5_SET(create_flow_group_in, in, start_flow_index, fg->start_index); + MLX5_SET(create_flow_group_in, in, end_flow_index, fg->start_index + + fg->max_ftes - 1); match_criteria_addr = MLX5_ADDR_OF(create_flow_group_in, in, match_criteria); - memcpy(match_criteria_addr, match_criteria, - MLX5_ST_SZ_BYTES(fte_match_param)); + memcpy(match_criteria_addr, fg->mask.match_criteria, + sizeof(fg->mask.match_criteria)); + + err = mlx5_cmd_create_flow_group(dev, ft, in, &fg->id); + if (!err) { + fg->node.active = true; + trace_mlx5_fs_add_fg(fg); + } - fg = create_flow_group_common(ft, in, prev, true); -out: kvfree(in); - return fg; + return err; } static bool mlx5_flow_dests_cmp(struct mlx5_flow_destination *d1, @@ -1368,23 +1397,17 @@ static struct mlx5_flow_handle *add_rule_fg(struct mlx5_flow_group *fg, } fs_get_obj(ft, fg->node.parent); - fte = create_fte(fg, match_value, flow_act); + fte = alloc_insert_fte(fg, match_value, flow_act); if (IS_ERR(fte)) return (void *)fte; - tree_init_node(&fte->node, 0, del_fte); nested_lock_ref_node(&fte->node, FS_MUTEX_CHILD); handle = add_rule_fte(fte, fg, dest, dest_num, false); if (IS_ERR(handle)) { unlock_ref_node(&fte->node); - destroy_fte(fte, fg); - kfree(fte); + tree_put_node(&fte->node); return handle; } - tree_add_node(&fte->node, &fg->node); - /* fte list isn't sorted */ - list_add_tail(&fte->node.list, &fg->node.children); - trace_mlx5_fs_set_fte(fte, true); add_rules: for (i = 0; i < handle->num_rules; i++) { if (atomic_read(&handle->rule[i]->node.refcount) == 1) { @@ -1571,6 +1594,7 @@ _mlx5_add_flow_rules(struct mlx5_flow_table *ft, { struct mlx5_flow_group *g; struct mlx5_flow_handle *rule; + int err; int i; if (!check_valid_spec(spec)) @@ -1586,24 +1610,22 @@ _mlx5_add_flow_rules(struct mlx5_flow_table *ft, if (!IS_ERR(rule) || PTR_ERR(rule) != -ENOENT) goto unlock; - g = create_autogroup(ft, spec->match_criteria_enable, - spec->match_criteria); + g = alloc_auto_flow_group(ft, spec); if (IS_ERR(g)) { rule = (void *)g; goto unlock; } + err = create_auto_flow_group(ft, g); + if (err) { + rule = ERR_PTR(err); + goto put_fg; + } + rule = add_rule_fg(g, spec->match_value, flow_act, dest, dest_num, NULL); - if (IS_ERR(rule)) { - /* Remove assumes refcount > 0 and autogroup creates a group - * with a refcount = 0. - */ - unlock_ref_node(&ft->node); - tree_get_node(&g->node); - tree_remove_node(&g->node); - return rule; - } +put_fg: + tree_put_node(&g->node); unlock: unlock_ref_node(&ft->node); return rule; @@ -1847,7 +1869,7 @@ static struct fs_prio *fs_create_prio(struct mlx5_flow_namespace *ns, return ERR_PTR(-ENOMEM); fs_prio->node.type = FS_TYPE_PRIO; - tree_init_node(&fs_prio->node, 1, NULL); + tree_init_node(&fs_prio->node, NULL); tree_add_node(&fs_prio->node, &ns->node); fs_prio->num_levels = num_levels; fs_prio->prio = prio; @@ -1873,7 +1895,7 @@ static struct mlx5_flow_namespace *fs_create_namespace(struct fs_prio *prio) return ERR_PTR(-ENOMEM); fs_init_namespace(ns); - tree_init_node(&ns->node, 1, NULL); + tree_init_node(&ns->node, NULL); tree_add_node(&ns->node, &prio->node); list_add_tail(&ns->node.list, &prio->node.children); @@ -1998,7 +2020,7 @@ static struct mlx5_flow_root_namespace *create_root_ns(struct mlx5_flow_steering ns = &root_ns->ns; fs_init_namespace(ns); mutex_init(&root_ns->chain_lock); - tree_init_node(&ns->node, 1, NULL); + tree_init_node(&ns->node, NULL); tree_add_node(&ns->node, NULL); return root_ns; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h index 02c969c3d333..6e5d25b4f8de 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h @@ -82,6 +82,7 @@ struct fs_node { /* lock the node for writing and traversing */ struct mutex lock; atomic_t refcount; + bool active; void (*remove_func)(struct fs_node *); }; -- cgit v1.2.3 From c7784b1c8ab3f44dc2e643a8feb77584792c9108 Mon Sep 17 00:00:00 2001 From: Maor Gottlieb Date: Sun, 27 Aug 2017 09:07:44 +0300 Subject: net/mlx5: Replace fs_node mutex with reader/writer semaphore Currently, steering object is protected by mutex lock, replace the mutex lock with reader/writer semaphore . In this patch we still use only write semaphore. In downstream patches we will switch part of the write locks to read locks. Signed-off-by: Maor Gottlieb Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/fs_core.c | 28 +++++++++++------------ drivers/net/ethernet/mellanox/mlx5/core/fs_core.h | 2 +- 2 files changed, 15 insertions(+), 15 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c index 41f26f440099..9406e7272807 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c @@ -145,10 +145,10 @@ static struct init_tree_node { } }; -enum fs_i_mutex_lock_class { - FS_MUTEX_GRANDPARENT, - FS_MUTEX_PARENT, - FS_MUTEX_CHILD +enum fs_i_lock_class { + FS_LOCK_GRANDPARENT, + FS_LOCK_PARENT, + FS_LOCK_CHILD }; static const struct rhashtable_params rhash_fte = { @@ -184,7 +184,7 @@ static void tree_init_node(struct fs_node *node, atomic_set(&node->refcount, 1); INIT_LIST_HEAD(&node->list); INIT_LIST_HEAD(&node->children); - mutex_init(&node->lock); + init_rwsem(&node->lock); node->remove_func = remove_func; node->active = false; } @@ -208,10 +208,10 @@ static void tree_get_node(struct fs_node *node) } static void nested_lock_ref_node(struct fs_node *node, - enum fs_i_mutex_lock_class class) + enum fs_i_lock_class class) { if (node) { - mutex_lock_nested(&node->lock, class); + down_write_nested(&node->lock, class); atomic_inc(&node->refcount); } } @@ -219,7 +219,7 @@ static void nested_lock_ref_node(struct fs_node *node, static void lock_ref_node(struct fs_node *node) { if (node) { - mutex_lock(&node->lock); + down_write(&node->lock); atomic_inc(&node->refcount); } } @@ -228,7 +228,7 @@ static void unlock_ref_node(struct fs_node *node) { if (node) { atomic_dec(&node->refcount); - mutex_unlock(&node->lock); + up_write(&node->lock); } } @@ -1376,7 +1376,7 @@ static struct mlx5_flow_handle *add_rule_fg(struct mlx5_flow_group *fg, int old_action; int ret; - nested_lock_ref_node(&fte->node, FS_MUTEX_CHILD); + nested_lock_ref_node(&fte->node, FS_LOCK_CHILD); ret = check_conflicting_ftes(fte, flow_act); if (ret) { handle = ERR_PTR(ret); @@ -1400,7 +1400,7 @@ static struct mlx5_flow_handle *add_rule_fg(struct mlx5_flow_group *fg, fte = alloc_insert_fte(fg, match_value, flow_act); if (IS_ERR(fte)) return (void *)fte; - nested_lock_ref_node(&fte->node, FS_MUTEX_CHILD); + nested_lock_ref_node(&fte->node, FS_LOCK_CHILD); handle = add_rule_fte(fte, fg, dest, dest_num, false); if (IS_ERR(handle)) { unlock_ref_node(&fte->node); @@ -1548,7 +1548,7 @@ try_add_to_existing_fg(struct mlx5_flow_table *ft, struct fs_fte *fte; g = iter->g; - nested_lock_ref_node(&g->node, FS_MUTEX_PARENT); + nested_lock_ref_node(&g->node, FS_LOCK_PARENT); fte = rhashtable_lookup_fast(&g->ftes_hash, spec->match_value, rhash_fte); if (fte) { @@ -1566,7 +1566,7 @@ try_add_to_existing_fg(struct mlx5_flow_table *ft, list_for_each_entry(iter, &match_head.list, list) { g = iter->g; - nested_lock_ref_node(&g->node, FS_MUTEX_PARENT); + nested_lock_ref_node(&g->node, FS_LOCK_PARENT); rule = add_rule_fg(g, spec->match_value, flow_act, dest, dest_num, NULL); if (!IS_ERR(rule) || PTR_ERR(rule) != -ENOSPC) { @@ -1605,7 +1605,7 @@ _mlx5_add_flow_rules(struct mlx5_flow_table *ft, return ERR_PTR(-EINVAL); } - nested_lock_ref_node(&ft->node, FS_MUTEX_GRANDPARENT); + nested_lock_ref_node(&ft->node, FS_LOCK_GRANDPARENT); rule = try_add_to_existing_fg(ft, spec, flow_act, dest, dest_num); if (!IS_ERR(rule) || PTR_ERR(rule) != -ENOENT) goto unlock; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h index 6e5d25b4f8de..b5c079f35051 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h @@ -80,7 +80,7 @@ struct fs_node { struct fs_node *parent; struct fs_node *root; /* lock the node for writing and traversing */ - struct mutex lock; + struct rw_semaphore lock; atomic_t refcount; bool active; void (*remove_func)(struct fs_node *); -- cgit v1.2.3 From bd71b08ec2ee4504bcc3b37a9283ce15e93dfacd Mon Sep 17 00:00:00 2001 From: Maor Gottlieb Date: Sun, 27 Aug 2017 09:19:11 +0300 Subject: net/mlx5: Support multiple updates of steering rules in parallel Most of the time spent on adding new flow steering rule is executing the firmware command. The most common action is adding a new flow steering entry. In order to enhance the update rate we parallelize the commands by doing the following: 1) Replace the mutex lock with readers-writers semaphore and take the write lock only when necessary (e.g. allocating a new flow table entry index or adding a node to the parent's children list). When we try to find a suitable child in the parent's children list (e.g. search for flow group with the same match_criteria of the rule) then we only take the read lock. 2) Add versioning mechanism - each steering entity (FT, FG, FTE, DST) will have an incremental version. The version is increased when the entity is changed (e.g. when a new FTE was added to FG - the FG's version is increased). Versioning is used in order to determine if the last traverse of an entity's children is valid or a rescan under write lock is required. This support improves the insertion rate of steering rules from ~5k/sec to ~40k/sec. Signed-off-by: Maor Gottlieb Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/fs_core.c | 386 +++++++++++++++------- drivers/net/ethernet/mellanox/mlx5/core/fs_core.h | 4 +- 2 files changed, 264 insertions(+), 126 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c index 9406e7272807..e7301cf747c5 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c @@ -168,10 +168,16 @@ static const struct rhashtable_params rhash_fg = { }; -static void del_rule(struct fs_node *node); -static void del_flow_table(struct fs_node *node); -static void del_flow_group(struct fs_node *node); -static void del_fte(struct fs_node *node); +static void del_hw_flow_table(struct fs_node *node); +static void del_hw_flow_group(struct fs_node *node); +static void del_hw_fte(struct fs_node *node); +static void del_sw_flow_table(struct fs_node *node); +static void del_sw_flow_group(struct fs_node *node); +static void del_sw_fte(struct fs_node *node); +/* Delete rule (destination) is special case that + * requires to lock the FTE for all the deletion process. + */ +static void del_sw_hw_rule(struct fs_node *node); static bool mlx5_flow_dests_cmp(struct mlx5_flow_destination *d1, struct mlx5_flow_destination *d2); static struct mlx5_flow_rule * @@ -179,13 +185,15 @@ find_flow_rule(struct fs_fte *fte, struct mlx5_flow_destination *dest); static void tree_init_node(struct fs_node *node, - void (*remove_func)(struct fs_node *)) + void (*del_hw_func)(struct fs_node *), + void (*del_sw_func)(struct fs_node *)) { atomic_set(&node->refcount, 1); INIT_LIST_HEAD(&node->list); INIT_LIST_HEAD(&node->children); init_rwsem(&node->lock); - node->remove_func = remove_func; + node->del_hw_func = del_hw_func; + node->del_sw_func = del_sw_func; node->active = false; } @@ -202,50 +210,69 @@ static void tree_add_node(struct fs_node *node, struct fs_node *parent) node->root = parent->root; } -static void tree_get_node(struct fs_node *node) +static int tree_get_node(struct fs_node *node) { - atomic_inc(&node->refcount); + return atomic_add_unless(&node->refcount, 1, 0); } -static void nested_lock_ref_node(struct fs_node *node, - enum fs_i_lock_class class) +static void nested_down_read_ref_node(struct fs_node *node, + enum fs_i_lock_class class) { if (node) { - down_write_nested(&node->lock, class); + down_read_nested(&node->lock, class); atomic_inc(&node->refcount); } } -static void lock_ref_node(struct fs_node *node) +static void nested_down_write_ref_node(struct fs_node *node, + enum fs_i_lock_class class) { if (node) { - down_write(&node->lock); + down_write_nested(&node->lock, class); atomic_inc(&node->refcount); } } -static void unlock_ref_node(struct fs_node *node) +static void down_write_ref_node(struct fs_node *node) { if (node) { - atomic_dec(&node->refcount); - up_write(&node->lock); + down_write(&node->lock); + atomic_inc(&node->refcount); } } +static void up_read_ref_node(struct fs_node *node) +{ + atomic_dec(&node->refcount); + up_read(&node->lock); +} + +static void up_write_ref_node(struct fs_node *node) +{ + atomic_dec(&node->refcount); + up_write(&node->lock); +} + static void tree_put_node(struct fs_node *node) { struct fs_node *parent_node = node->parent; - lock_ref_node(parent_node); if (atomic_dec_and_test(&node->refcount)) { - if (parent_node) + if (node->del_hw_func) + node->del_hw_func(node); + if (parent_node) { + /* Only root namespace doesn't have parent and we just + * need to free its node. + */ + down_write_ref_node(parent_node); list_del_init(&node->list); - if (node->remove_func) - node->remove_func(node); + if (node->del_sw_func) + node->del_sw_func(node); + up_write_ref_node(parent_node); + } kfree(node); node = NULL; } - unlock_ref_node(parent_node); if (!node && parent_node) tree_put_node(parent_node); } @@ -371,11 +398,10 @@ static inline struct mlx5_core_dev *get_dev(struct fs_node *node) return NULL; } -static void del_flow_table(struct fs_node *node) +static void del_hw_flow_table(struct fs_node *node) { struct mlx5_flow_table *ft; struct mlx5_core_dev *dev; - struct fs_prio *prio; int err; fs_get_obj(ft, node); @@ -386,12 +412,21 @@ static void del_flow_table(struct fs_node *node) if (err) mlx5_core_warn(dev, "flow steering can't destroy ft\n"); } +} + +static void del_sw_flow_table(struct fs_node *node) +{ + struct mlx5_flow_table *ft; + struct fs_prio *prio; + + fs_get_obj(ft, node); + rhltable_destroy(&ft->fgs_hash); fs_get_obj(prio, ft->node.parent); prio->num_ft--; } -static void del_rule(struct fs_node *node) +static void del_sw_hw_rule(struct fs_node *node) { struct mlx5_flow_rule *rule; struct mlx5_flow_table *ft; @@ -407,7 +442,6 @@ static void del_rule(struct fs_node *node) fs_get_obj(fg, fte->node.parent); fs_get_obj(ft, fg->node.parent); trace_mlx5_fs_del_rule(rule); - list_del(&rule->node.list); if (rule->sw_action == MLX5_FLOW_CONTEXT_ACTION_FWD_NEXT_PRIO) { mutex_lock(&rule->dest_attr.ft->lock); list_del(&rule->next_ft); @@ -437,7 +471,7 @@ out: } } -static void del_fte(struct fs_node *node) +static void del_hw_fte(struct fs_node *node) { struct mlx5_flow_table *ft; struct mlx5_flow_group *fg; @@ -448,8 +482,8 @@ static void del_fte(struct fs_node *node) fs_get_obj(fte, node); fs_get_obj(fg, fte->node.parent); fs_get_obj(ft, fg->node.parent); - trace_mlx5_fs_del_fte(fte); + trace_mlx5_fs_del_fte(fte); dev = get_dev(&ft->node); if (node->active) { err = mlx5_cmd_delete_fte(dev, ft, @@ -459,6 +493,16 @@ static void del_fte(struct fs_node *node) "flow steering can't delete fte in index %d of flow group id %d\n", fte->index, fg->id); } +} + +static void del_sw_fte(struct fs_node *node) +{ + struct mlx5_flow_group *fg; + struct fs_fte *fte; + int err; + + fs_get_obj(fte, node); + fs_get_obj(fg, fte->node.parent); err = rhashtable_remove_fast(&fg->ftes_hash, &fte->hash, @@ -467,30 +511,39 @@ static void del_fte(struct fs_node *node) ida_simple_remove(&fg->fte_allocator, fte->index - fg->start_index); } -static void del_flow_group(struct fs_node *node) +static void del_hw_flow_group(struct fs_node *node) { struct mlx5_flow_group *fg; struct mlx5_flow_table *ft; struct mlx5_core_dev *dev; - int err; fs_get_obj(fg, node); fs_get_obj(ft, fg->node.parent); dev = get_dev(&ft->node); trace_mlx5_fs_del_fg(fg); - if (ft->autogroup.active) - ft->autogroup.num_groups--; + if (fg->node.active && mlx5_cmd_destroy_flow_group(dev, ft, fg->id)) + mlx5_core_warn(dev, "flow steering can't destroy fg %d of ft %d\n", + fg->id, ft->id); +} + +static void del_sw_flow_group(struct fs_node *node) +{ + struct mlx5_flow_group *fg; + struct mlx5_flow_table *ft; + int err; + + fs_get_obj(fg, node); + fs_get_obj(ft, fg->node.parent); rhashtable_destroy(&fg->ftes_hash); ida_destroy(&fg->fte_allocator); + if (ft->autogroup.active) + ft->autogroup.num_groups--; err = rhltable_remove(&ft->fgs_hash, &fg->hash, rhash_fg); WARN_ON(err); - if (fg->node.active && mlx5_cmd_destroy_flow_group(dev, ft, fg->id)) - mlx5_core_warn(dev, "flow steering can't destroy fg %d of ft %d\n", - fg->id, ft->id); } static struct fs_fte *alloc_fte(struct mlx5_flow_act *flow_act, @@ -540,7 +593,7 @@ static struct fs_fte *alloc_insert_fte(struct mlx5_flow_group *fg, if (ret) goto err_free; - tree_init_node(&fte->node, del_fte); + tree_init_node(&fte->node, del_hw_fte, del_sw_fte); tree_add_node(&fte->node, &fg->node); list_add_tail(&fte->node.list, &fg->node.children); @@ -611,10 +664,11 @@ static struct mlx5_flow_group *alloc_insert_flow_group(struct mlx5_flow_table *f return ERR_PTR(ret); } - tree_init_node(&fg->node, del_flow_group); + tree_init_node(&fg->node, del_hw_flow_group, del_sw_flow_group); tree_add_node(&fg->node, &ft->node); /* Add node to group list */ list_add(&fg->node.list, prev); + atomic_inc(&ft->node.version); return fg; } @@ -794,7 +848,7 @@ static int _mlx5_modify_rule_destination(struct mlx5_flow_rule *rule, fs_get_obj(fte, rule->node.parent); if (!(fte->action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST)) return -EINVAL; - lock_ref_node(&fte->node); + down_write_ref_node(&fte->node); fs_get_obj(fg, fte->node.parent); fs_get_obj(ft, fg->node.parent); @@ -803,7 +857,7 @@ static int _mlx5_modify_rule_destination(struct mlx5_flow_rule *rule, ft, fg->id, modify_mask, fte); - unlock_ref_node(&fte->node); + up_write_ref_node(&fte->node); return err; } @@ -940,7 +994,7 @@ static struct mlx5_flow_table *__mlx5_create_flow_table(struct mlx5_flow_namespa goto unlock_root; } - tree_init_node(&ft->node, del_flow_table); + tree_init_node(&ft->node, del_hw_flow_table, del_sw_flow_table); log_table_sz = ft->max_fte ? ilog2(ft->max_fte) : 0; next_ft = find_next_chained_ft(fs_prio); err = mlx5_cmd_create_flow_table(root->dev, ft->vport, ft->op_mod, ft->type, @@ -953,11 +1007,11 @@ static struct mlx5_flow_table *__mlx5_create_flow_table(struct mlx5_flow_namespa if (err) goto destroy_ft; ft->node.active = true; - lock_ref_node(&fs_prio->node); + down_write_ref_node(&fs_prio->node); tree_add_node(&ft->node, &fs_prio->node); list_add_flow_table(ft, fs_prio); fs_prio->num_ft++; - unlock_ref_node(&fs_prio->node); + up_write_ref_node(&fs_prio->node); mutex_unlock(&root->chain_lock); return ft; destroy_ft: @@ -1052,11 +1106,11 @@ struct mlx5_flow_group *mlx5_create_flow_group(struct mlx5_flow_table *ft, if (ft->autogroup.active) return ERR_PTR(-EPERM); - lock_ref_node(&ft->node); + down_write_ref_node(&ft->node); fg = alloc_insert_flow_group(ft, match_criteria_enable, match_criteria, start_index, end_index, ft->node.children.prev); - unlock_ref_node(&ft->node); + up_write_ref_node(&ft->node); if (IS_ERR(fg)) return fg; @@ -1151,7 +1205,7 @@ create_flow_handle(struct fs_fte *fte, /* Add dest to dests list- we need flow tables to be in the * end of the list for forward to next prio rules. */ - tree_init_node(&rule->node, del_rule); + tree_init_node(&rule->node, NULL, del_sw_hw_rule); if (dest && dest[i].type != MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE) list_add(&rule->node.list, &fte->node.children); @@ -1209,6 +1263,7 @@ add_rule_fte(struct fs_fte *fte, fte->node.active = true; fte->status |= FS_FTE_STATUS_EXISTING; + atomic_inc(&fte->node.version); out: return handle; @@ -1369,54 +1424,30 @@ static struct mlx5_flow_handle *add_rule_fg(struct mlx5_flow_group *fg, struct fs_fte *fte) { struct mlx5_flow_handle *handle; - struct mlx5_flow_table *ft; + int old_action; int i; + int ret; - if (fte) { - int old_action; - int ret; - - nested_lock_ref_node(&fte->node, FS_LOCK_CHILD); - ret = check_conflicting_ftes(fte, flow_act); - if (ret) { - handle = ERR_PTR(ret); - goto unlock_fte; - } - - old_action = fte->action; - fte->action |= flow_act->action; - handle = add_rule_fte(fte, fg, dest, dest_num, - old_action != flow_act->action); - if (IS_ERR(handle)) { - fte->action = old_action; - goto unlock_fte; - } else { - trace_mlx5_fs_set_fte(fte, false); - goto add_rules; - } - } - fs_get_obj(ft, fg->node.parent); + ret = check_conflicting_ftes(fte, flow_act); + if (ret) + return ERR_PTR(ret); - fte = alloc_insert_fte(fg, match_value, flow_act); - if (IS_ERR(fte)) - return (void *)fte; - nested_lock_ref_node(&fte->node, FS_LOCK_CHILD); - handle = add_rule_fte(fte, fg, dest, dest_num, false); + old_action = fte->action; + fte->action |= flow_act->action; + handle = add_rule_fte(fte, fg, dest, dest_num, + old_action != flow_act->action); if (IS_ERR(handle)) { - unlock_ref_node(&fte->node); - tree_put_node(&fte->node); + fte->action = old_action; return handle; } + trace_mlx5_fs_set_fte(fte, false); -add_rules: for (i = 0; i < handle->num_rules; i++) { if (atomic_read(&handle->rule[i]->node.refcount) == 1) { tree_add_node(&handle->rule[i]->node, &fte->node); trace_mlx5_fs_add_rule(handle->rule[i]); } } -unlock_fte: - unlock_ref_node(&fte->node); return handle; } @@ -1480,8 +1511,10 @@ static void free_match_list(struct match_list_head *head) struct match_list *iter, *match_tmp; list_del(&head->first.list); + tree_put_node(&head->first.g->node); list_for_each_entry_safe(iter, match_tmp, &head->list, list) { + tree_put_node(&iter->g->node); list_del(&iter->list); kfree(iter); } @@ -1505,6 +1538,8 @@ static int build_match_list(struct match_list_head *match_head, struct match_list *curr_match; if (likely(list_empty(&match_head->list))) { + if (!tree_get_node(&g->node)) + continue; match_head->first.g = g; list_add_tail(&match_head->first.list, &match_head->list); @@ -1517,6 +1552,10 @@ static int build_match_list(struct match_list_head *match_head, err = -ENOMEM; goto out; } + if (!tree_get_node(&g->node)) { + kfree(curr_match); + continue; + } curr_match->g = g; list_add_tail(&curr_match->list, &match_head->list); } @@ -1525,62 +1564,119 @@ out: return err; } +static u64 matched_fgs_get_version(struct list_head *match_head) +{ + struct match_list *iter; + u64 version = 0; + + list_for_each_entry(iter, match_head, list) + version += (u64)atomic_read(&iter->g->node.version); + return version; +} + static struct mlx5_flow_handle * try_add_to_existing_fg(struct mlx5_flow_table *ft, + struct list_head *match_head, struct mlx5_flow_spec *spec, struct mlx5_flow_act *flow_act, struct mlx5_flow_destination *dest, - int dest_num) + int dest_num, + int ft_version) { struct mlx5_flow_group *g; struct mlx5_flow_handle *rule; - struct match_list_head match_head; struct match_list *iter; - int err; + bool take_write = false; + struct fs_fte *fte; + u64 version; - /* Collect all fgs which has a matching match_criteria */ - err = build_match_list(&match_head, ft, spec); - if (err) - return ERR_PTR(err); + list_for_each_entry(iter, match_head, list) { + nested_down_read_ref_node(&iter->g->node, FS_LOCK_PARENT); + ida_pre_get(&iter->g->fte_allocator, GFP_KERNEL); + } +search_again_locked: + version = matched_fgs_get_version(match_head); /* Try to find a fg that already contains a matching fte */ - list_for_each_entry(iter, &match_head.list, list) { - struct fs_fte *fte; + list_for_each_entry(iter, match_head, list) { + struct fs_fte *fte_tmp; g = iter->g; - nested_lock_ref_node(&g->node, FS_LOCK_PARENT); - fte = rhashtable_lookup_fast(&g->ftes_hash, spec->match_value, - rhash_fte); - if (fte) { - rule = add_rule_fg(g, spec->match_value, - flow_act, dest, dest_num, fte); - unlock_ref_node(&g->node); - goto free_list; + fte_tmp = rhashtable_lookup_fast(&g->ftes_hash, spec->match_value, + rhash_fte); + if (!fte_tmp || !tree_get_node(&fte_tmp->node)) + continue; + + nested_down_write_ref_node(&fte_tmp->node, FS_LOCK_CHILD); + if (!take_write) { + list_for_each_entry(iter, match_head, list) + up_read_ref_node(&iter->g->node); + } else { + list_for_each_entry(iter, match_head, list) + up_write_ref_node(&iter->g->node); } - unlock_ref_node(&g->node); + + rule = add_rule_fg(g, spec->match_value, + flow_act, dest, dest_num, fte_tmp); + up_write_ref_node(&fte_tmp->node); + tree_put_node(&fte_tmp->node); + return rule; } /* No group with matching fte found. Try to add a new fte to any * matching fg. */ - list_for_each_entry(iter, &match_head.list, list) { - g = iter->g; - nested_lock_ref_node(&g->node, FS_LOCK_PARENT); - rule = add_rule_fg(g, spec->match_value, - flow_act, dest, dest_num, NULL); - if (!IS_ERR(rule) || PTR_ERR(rule) != -ENOSPC) { - unlock_ref_node(&g->node); - goto free_list; - } - unlock_ref_node(&g->node); + if (!take_write) { + list_for_each_entry(iter, match_head, list) + up_read_ref_node(&iter->g->node); + list_for_each_entry(iter, match_head, list) + nested_down_write_ref_node(&iter->g->node, + FS_LOCK_PARENT); + take_write = true; } - rule = ERR_PTR(-ENOENT); + /* Check the ft version, for case that new flow group + * was added while the fgs weren't locked + */ + if (atomic_read(&ft->node.version) != ft_version) { + rule = ERR_PTR(-EAGAIN); + goto out; + } -free_list: - free_match_list(&match_head); + /* Check the fgs version, for case the new FTE with the + * same values was added while the fgs weren't locked + */ + if (version != matched_fgs_get_version(match_head)) + goto search_again_locked; + + list_for_each_entry(iter, match_head, list) { + g = iter->g; + + if (!g->node.active) + continue; + fte = alloc_insert_fte(g, spec->match_value, flow_act); + if (IS_ERR(fte)) { + if (PTR_ERR(fte) == -ENOSPC) + continue; + list_for_each_entry(iter, match_head, list) + up_write_ref_node(&iter->g->node); + return (void *)fte; + } + nested_down_write_ref_node(&fte->node, FS_LOCK_CHILD); + list_for_each_entry(iter, match_head, list) + up_write_ref_node(&iter->g->node); + rule = add_rule_fg(g, spec->match_value, + flow_act, dest, dest_num, fte); + up_write_ref_node(&fte->node); + tree_put_node(&fte->node); + return rule; + } + rule = ERR_PTR(-ENOENT); +out: + list_for_each_entry(iter, match_head, list) + up_write_ref_node(&iter->g->node); return rule; } @@ -1594,6 +1690,10 @@ _mlx5_add_flow_rules(struct mlx5_flow_table *ft, { struct mlx5_flow_group *g; struct mlx5_flow_handle *rule; + struct match_list_head match_head; + bool take_write = false; + struct fs_fte *fte; + int version; int err; int i; @@ -1604,31 +1704,67 @@ _mlx5_add_flow_rules(struct mlx5_flow_table *ft, if (!dest_is_valid(&dest[i], flow_act->action, ft)) return ERR_PTR(-EINVAL); } + nested_down_read_ref_node(&ft->node, FS_LOCK_GRANDPARENT); +search_again_locked: + version = atomic_read(&ft->node.version); - nested_lock_ref_node(&ft->node, FS_LOCK_GRANDPARENT); - rule = try_add_to_existing_fg(ft, spec, flow_act, dest, dest_num); - if (!IS_ERR(rule) || PTR_ERR(rule) != -ENOENT) - goto unlock; + /* Collect all fgs which has a matching match_criteria */ + err = build_match_list(&match_head, ft, spec); + if (err) + return ERR_PTR(err); + + if (!take_write) + up_read_ref_node(&ft->node); + + rule = try_add_to_existing_fg(ft, &match_head.list, spec, flow_act, dest, + dest_num, version); + free_match_list(&match_head); + if (!IS_ERR(rule) || + (PTR_ERR(rule) != -ENOENT && PTR_ERR(rule) != -EAGAIN)) + return rule; + + if (!take_write) { + nested_down_write_ref_node(&ft->node, FS_LOCK_GRANDPARENT); + take_write = true; + } + + if (PTR_ERR(rule) == -EAGAIN || + version != atomic_read(&ft->node.version)) + goto search_again_locked; g = alloc_auto_flow_group(ft, spec); if (IS_ERR(g)) { rule = (void *)g; - goto unlock; + up_write_ref_node(&ft->node); + return rule; } + nested_down_write_ref_node(&g->node, FS_LOCK_PARENT); + up_write_ref_node(&ft->node); + err = create_auto_flow_group(ft, g); - if (err) { - rule = ERR_PTR(err); - goto put_fg; + if (err) + goto err_release_fg; + + fte = alloc_insert_fte(g, spec->match_value, flow_act); + if (IS_ERR(fte)) { + err = PTR_ERR(fte); + goto err_release_fg; } + nested_down_write_ref_node(&fte->node, FS_LOCK_CHILD); + up_write_ref_node(&g->node); rule = add_rule_fg(g, spec->match_value, flow_act, dest, - dest_num, NULL); -put_fg: + dest_num, fte); + up_write_ref_node(&fte->node); + tree_put_node(&fte->node); tree_put_node(&g->node); -unlock: - unlock_ref_node(&ft->node); return rule; + +err_release_fg: + up_write_ref_node(&g->node); + tree_put_node(&g->node); + return ERR_PTR(err); } static bool fwd_next_prio_supported(struct mlx5_flow_table *ft) @@ -1869,7 +2005,7 @@ static struct fs_prio *fs_create_prio(struct mlx5_flow_namespace *ns, return ERR_PTR(-ENOMEM); fs_prio->node.type = FS_TYPE_PRIO; - tree_init_node(&fs_prio->node, NULL); + tree_init_node(&fs_prio->node, NULL, NULL); tree_add_node(&fs_prio->node, &ns->node); fs_prio->num_levels = num_levels; fs_prio->prio = prio; @@ -1895,7 +2031,7 @@ static struct mlx5_flow_namespace *fs_create_namespace(struct fs_prio *prio) return ERR_PTR(-ENOMEM); fs_init_namespace(ns); - tree_init_node(&ns->node, NULL); + tree_init_node(&ns->node, NULL, NULL); tree_add_node(&ns->node, &prio->node); list_add_tail(&ns->node.list, &prio->node.children); @@ -2020,7 +2156,7 @@ static struct mlx5_flow_root_namespace *create_root_ns(struct mlx5_flow_steering ns = &root_ns->ns; fs_init_namespace(ns); mutex_init(&root_ns->chain_lock); - tree_init_node(&ns->node, NULL); + tree_init_node(&ns->node, NULL, NULL); tree_add_node(&ns->node, NULL); return root_ns; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h index b5c079f35051..875b753862b0 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h @@ -83,7 +83,9 @@ struct fs_node { struct rw_semaphore lock; atomic_t refcount; bool active; - void (*remove_func)(struct fs_node *); + void (*del_hw_func)(struct fs_node *); + void (*del_sw_func)(struct fs_node *); + atomic_t version; }; struct mlx5_flow_rule { -- cgit v1.2.3 From f5c2ff179f51101893e42e78683b23a487929d6c Mon Sep 17 00:00:00 2001 From: Maor Gottlieb Date: Tue, 29 Aug 2017 19:17:12 +0300 Subject: net/mlx5: Allocate FTE object without lock Allocation of new FTE is a massive operation, part of it could be done without taking the flow group write lock. Split the FTE allocation to two functions of actions which need to be under lock and action which don't have. Signed-off-by: Maor Gottlieb Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/fs_core.c | 92 +++++++++++------------ 1 file changed, 46 insertions(+), 46 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c index e7301cf747c5..bc4bbb72fa86 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c @@ -546,9 +546,33 @@ static void del_sw_flow_group(struct fs_node *node) WARN_ON(err); } -static struct fs_fte *alloc_fte(struct mlx5_flow_act *flow_act, - u32 *match_value, - unsigned int index) +static int insert_fte(struct mlx5_flow_group *fg, struct fs_fte *fte) +{ + int index; + int ret; + + index = ida_simple_get(&fg->fte_allocator, 0, fg->max_ftes, GFP_KERNEL); + if (index < 0) + return index; + + fte->index = index + fg->start_index; + ret = rhashtable_insert_fast(&fg->ftes_hash, + &fte->hash, + rhash_fte); + if (ret) + goto err_ida_remove; + + tree_add_node(&fte->node, &fg->node); + list_add_tail(&fte->node.list, &fg->node.children); + return 0; + +err_ida_remove: + ida_simple_remove(&fg->fte_allocator, index); + return ret; +} + +static struct fs_fte *alloc_fte(u32 *match_value, + struct mlx5_flow_act *flow_act) { struct fs_fte *fte; @@ -559,51 +583,13 @@ static struct fs_fte *alloc_fte(struct mlx5_flow_act *flow_act, memcpy(fte->val, match_value, sizeof(fte->val)); fte->node.type = FS_TYPE_FLOW_ENTRY; fte->flow_tag = flow_act->flow_tag; - fte->index = index; fte->action = flow_act->action; fte->encap_id = flow_act->encap_id; fte->modify_id = flow_act->modify_id; - return fte; -} - -static struct fs_fte *alloc_insert_fte(struct mlx5_flow_group *fg, - u32 *match_value, - struct mlx5_flow_act *flow_act) -{ - struct fs_fte *fte; - int index; - int ret; - - index = ida_simple_get(&fg->fte_allocator, 0, - fg->max_ftes, - GFP_KERNEL); - if (index < 0) - return ERR_PTR(index); - - fte = alloc_fte(flow_act, match_value, index + fg->start_index); - if (IS_ERR(fte)) { - ret = PTR_ERR(fte); - goto err_ida_remove; - } - - ret = rhashtable_insert_fast(&fg->ftes_hash, - &fte->hash, - rhash_fte); - if (ret) - goto err_free; - tree_init_node(&fte->node, del_hw_fte, del_sw_fte); - tree_add_node(&fte->node, &fg->node); - list_add_tail(&fte->node.list, &fg->node.children); return fte; - -err_free: - kfree(fte); -err_ida_remove: - ida_simple_remove(&fg->fte_allocator, index); - return ERR_PTR(ret); } static void dealloc_flow_group(struct mlx5_flow_group *fg) @@ -1589,6 +1575,11 @@ try_add_to_existing_fg(struct mlx5_flow_table *ft, bool take_write = false; struct fs_fte *fte; u64 version; + int err; + + fte = alloc_fte(spec->match_value, flow_act); + if (IS_ERR(fte)) + return ERR_PTR(-ENOMEM); list_for_each_entry(iter, match_head, list) { nested_down_read_ref_node(&iter->g->node, FS_LOCK_PARENT); @@ -1620,6 +1611,7 @@ search_again_locked: flow_act, dest, dest_num, fte_tmp); up_write_ref_node(&fte_tmp->node); tree_put_node(&fte_tmp->node); + kfree(fte); return rule; } @@ -1655,13 +1647,14 @@ search_again_locked: if (!g->node.active) continue; - fte = alloc_insert_fte(g, spec->match_value, flow_act); - if (IS_ERR(fte)) { - if (PTR_ERR(fte) == -ENOSPC) + err = insert_fte(g, fte); + if (err) { + if (err == -ENOSPC) continue; list_for_each_entry(iter, match_head, list) up_write_ref_node(&iter->g->node); - return (void *)fte; + kfree(fte); + return ERR_PTR(err); } nested_down_write_ref_node(&fte->node, FS_LOCK_CHILD); @@ -1677,6 +1670,7 @@ search_again_locked: out: list_for_each_entry(iter, match_head, list) up_write_ref_node(&iter->g->node); + kfree(fte); return rule; } @@ -1746,12 +1740,18 @@ search_again_locked: if (err) goto err_release_fg; - fte = alloc_insert_fte(g, spec->match_value, flow_act); + fte = alloc_fte(spec->match_value, flow_act); if (IS_ERR(fte)) { err = PTR_ERR(fte); goto err_release_fg; } + err = insert_fte(g, fte); + if (err) { + kfree(fte); + goto err_release_fg; + } + nested_down_write_ref_node(&fte->node, FS_LOCK_CHILD); up_write_ref_node(&g->node); rule = add_rule_fg(g, spec->match_value, flow_act, dest, -- cgit v1.2.3 From a369d4ac4dff92129ea0dfa3d66f45a830e29098 Mon Sep 17 00:00:00 2001 From: Maor Gottlieb Date: Sun, 27 Aug 2017 13:18:40 +0300 Subject: net/mlx5: Add FGs and FTEs memory pool Add memory pool allocation for flow groups and flow table entry. It is useful because these objects are not small and could be allocated/deallocated many times. Signed-off-by: Maor Gottlieb Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/fs_core.c | 67 +++++++++++++++++------ drivers/net/ethernet/mellanox/mlx5/core/fs_core.h | 2 + 2 files changed, 53 insertions(+), 16 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c index bc4bbb72fa86..7a136ae2547a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c @@ -269,8 +269,9 @@ static void tree_put_node(struct fs_node *node) if (node->del_sw_func) node->del_sw_func(node); up_write_ref_node(parent_node); + } else { + kfree(node); } - kfree(node); node = NULL; } if (!node && parent_node) @@ -389,6 +390,15 @@ static struct mlx5_flow_root_namespace *find_root(struct fs_node *node) return container_of(ns, struct mlx5_flow_root_namespace, ns); } +static inline struct mlx5_flow_steering *get_steering(struct fs_node *node) +{ + struct mlx5_flow_root_namespace *root = find_root(node); + + if (root) + return root->dev->priv.steering; + return NULL; +} + static inline struct mlx5_core_dev *get_dev(struct fs_node *node) { struct mlx5_flow_root_namespace *root = find_root(node); @@ -424,6 +434,7 @@ static void del_sw_flow_table(struct fs_node *node) rhltable_destroy(&ft->fgs_hash); fs_get_obj(prio, ft->node.parent); prio->num_ft--; + kfree(ft); } static void del_sw_hw_rule(struct fs_node *node) @@ -469,6 +480,7 @@ out: "%s can't del rule fg id=%d fte_index=%d\n", __func__, fg->id, fte->index); } + kfree(rule); } static void del_hw_fte(struct fs_node *node) @@ -497,6 +509,7 @@ static void del_hw_fte(struct fs_node *node) static void del_sw_fte(struct fs_node *node) { + struct mlx5_flow_steering *steering = get_steering(node); struct mlx5_flow_group *fg; struct fs_fte *fte; int err; @@ -509,6 +522,7 @@ static void del_sw_fte(struct fs_node *node) rhash_fte); WARN_ON(err); ida_simple_remove(&fg->fte_allocator, fte->index - fg->start_index); + kmem_cache_free(steering->ftes_cache, fte); } static void del_hw_flow_group(struct fs_node *node) @@ -529,6 +543,7 @@ static void del_hw_flow_group(struct fs_node *node) static void del_sw_flow_group(struct fs_node *node) { + struct mlx5_flow_steering *steering = get_steering(node); struct mlx5_flow_group *fg; struct mlx5_flow_table *ft; int err; @@ -544,6 +559,7 @@ static void del_sw_flow_group(struct fs_node *node) &fg->hash, rhash_fg); WARN_ON(err); + kmem_cache_free(steering->fgs_cache, fg); } static int insert_fte(struct mlx5_flow_group *fg, struct fs_fte *fte) @@ -571,12 +587,14 @@ err_ida_remove: return ret; } -static struct fs_fte *alloc_fte(u32 *match_value, +static struct fs_fte *alloc_fte(struct mlx5_flow_table *ft, + u32 *match_value, struct mlx5_flow_act *flow_act) { + struct mlx5_flow_steering *steering = get_steering(&ft->node); struct fs_fte *fte; - fte = kzalloc(sizeof(*fte), GFP_KERNEL); + fte = kmem_cache_zalloc(steering->ftes_cache, GFP_KERNEL); if (!fte) return ERR_PTR(-ENOMEM); @@ -592,13 +610,15 @@ static struct fs_fte *alloc_fte(u32 *match_value, return fte; } -static void dealloc_flow_group(struct mlx5_flow_group *fg) +static void dealloc_flow_group(struct mlx5_flow_steering *steering, + struct mlx5_flow_group *fg) { rhashtable_destroy(&fg->ftes_hash); - kfree(fg); + kmem_cache_free(steering->fgs_cache, fg); } -static struct mlx5_flow_group *alloc_flow_group(u8 match_criteria_enable, +static struct mlx5_flow_group *alloc_flow_group(struct mlx5_flow_steering *steering, + u8 match_criteria_enable, void *match_criteria, int start_index, int end_index) @@ -606,13 +626,13 @@ static struct mlx5_flow_group *alloc_flow_group(u8 match_criteria_enable, struct mlx5_flow_group *fg; int ret; - fg = kzalloc(sizeof(*fg), GFP_KERNEL); + fg = kmem_cache_zalloc(steering->fgs_cache, GFP_KERNEL); if (!fg) return ERR_PTR(-ENOMEM); ret = rhashtable_init(&fg->ftes_hash, &rhash_fte); if (ret) { - kfree(fg); + kmem_cache_free(steering->fgs_cache, fg); return ERR_PTR(ret); } ida_init(&fg->fte_allocator); @@ -633,10 +653,11 @@ static struct mlx5_flow_group *alloc_insert_flow_group(struct mlx5_flow_table *f int end_index, struct list_head *prev) { + struct mlx5_flow_steering *steering = get_steering(&ft->node); struct mlx5_flow_group *fg; int ret; - fg = alloc_flow_group(match_criteria_enable, match_criteria, + fg = alloc_flow_group(steering, match_criteria_enable, match_criteria, start_index, end_index); if (IS_ERR(fg)) return fg; @@ -646,7 +667,7 @@ static struct mlx5_flow_group *alloc_insert_flow_group(struct mlx5_flow_table *f &fg->hash, rhash_fg); if (ret) { - dealloc_flow_group(fg); + dealloc_flow_group(steering, fg); return ERR_PTR(ret); } @@ -1569,6 +1590,7 @@ try_add_to_existing_fg(struct mlx5_flow_table *ft, int dest_num, int ft_version) { + struct mlx5_flow_steering *steering = get_steering(&ft->node); struct mlx5_flow_group *g; struct mlx5_flow_handle *rule; struct match_list *iter; @@ -1577,7 +1599,7 @@ try_add_to_existing_fg(struct mlx5_flow_table *ft, u64 version; int err; - fte = alloc_fte(spec->match_value, flow_act); + fte = alloc_fte(ft, spec->match_value, flow_act); if (IS_ERR(fte)) return ERR_PTR(-ENOMEM); @@ -1611,7 +1633,7 @@ search_again_locked: flow_act, dest, dest_num, fte_tmp); up_write_ref_node(&fte_tmp->node); tree_put_node(&fte_tmp->node); - kfree(fte); + kmem_cache_free(steering->ftes_cache, fte); return rule; } @@ -1653,7 +1675,7 @@ search_again_locked: continue; list_for_each_entry(iter, match_head, list) up_write_ref_node(&iter->g->node); - kfree(fte); + kmem_cache_free(steering->ftes_cache, fte); return ERR_PTR(err); } @@ -1670,7 +1692,7 @@ search_again_locked: out: list_for_each_entry(iter, match_head, list) up_write_ref_node(&iter->g->node); - kfree(fte); + kmem_cache_free(steering->ftes_cache, fte); return rule; } @@ -1682,6 +1704,7 @@ _mlx5_add_flow_rules(struct mlx5_flow_table *ft, int dest_num) { + struct mlx5_flow_steering *steering = get_steering(&ft->node); struct mlx5_flow_group *g; struct mlx5_flow_handle *rule; struct match_list_head match_head; @@ -1740,7 +1763,7 @@ search_again_locked: if (err) goto err_release_fg; - fte = alloc_fte(spec->match_value, flow_act); + fte = alloc_fte(ft, spec->match_value, flow_act); if (IS_ERR(fte)) { err = PTR_ERR(fte); goto err_release_fg; @@ -1748,7 +1771,7 @@ search_again_locked: err = insert_fte(g, fte); if (err) { - kfree(fte); + kmem_cache_free(steering->ftes_cache, fte); goto err_release_fg; } @@ -2281,6 +2304,8 @@ void mlx5_cleanup_fs(struct mlx5_core_dev *dev) cleanup_root_ns(steering->sniffer_rx_root_ns); cleanup_root_ns(steering->sniffer_tx_root_ns); mlx5_cleanup_fc_stats(dev); + kmem_cache_destroy(steering->ftes_cache); + kmem_cache_destroy(steering->fgs_cache); kfree(steering); } @@ -2386,6 +2411,16 @@ int mlx5_init_fs(struct mlx5_core_dev *dev) steering->dev = dev; dev->priv.steering = steering; + steering->fgs_cache = kmem_cache_create("mlx5_fs_fgs", + sizeof(struct mlx5_flow_group), 0, + 0, NULL); + steering->ftes_cache = kmem_cache_create("mlx5_fs_ftes", sizeof(struct fs_fte), 0, + 0, NULL); + if (!steering->ftes_cache || !steering->fgs_cache) { + err = -ENOMEM; + goto err; + } + if ((((MLX5_CAP_GEN(dev, port_type) == MLX5_CAP_PORT_TYPE_ETH) && (MLX5_CAP_GEN(dev, nic_flow_table))) || ((MLX5_CAP_GEN(dev, port_type) == MLX5_CAP_PORT_TYPE_IB) && diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h index 875b753862b0..ebe184515433 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h @@ -65,6 +65,8 @@ enum fs_fte_status { struct mlx5_flow_steering { struct mlx5_core_dev *dev; + struct kmem_cache *fgs_cache; + struct kmem_cache *ftes_cache; struct mlx5_flow_root_namespace *root_ns; struct mlx5_flow_root_namespace *fdb_root_ns; struct mlx5_flow_root_namespace *esw_egress_root_ns; -- cgit v1.2.3 From e0a8f9de16fce34fc2957eca4c71d3ff2ac286d5 Mon Sep 17 00:00:00 2001 From: Michal Kalderon Date: Sun, 24 Sep 2017 12:09:42 +0300 Subject: qed: Add iWARP enablement support This patch is the last of the initial iWARP patch series. It adds the possiblity to actually detect iWARP from the device and enable it in the critical locations which basically make iWARP available. It wasn't submitted until now as iWARP hadn't been accepted into the rdma tree. Signed-off-by: Michal Kalderon Signed-off-by: Ariel Elior Signed-off-by: David S. Miller --- drivers/net/ethernet/qlogic/qed/qed_cxt.c | 6 ++++++ drivers/net/ethernet/qlogic/qed/qed_mcp.c | 8 ++++---- drivers/net/ethernet/qlogic/qed/qed_rdma.c | 5 ++++- drivers/net/ethernet/qlogic/qed/qed_sp_commands.c | 1 + 4 files changed, 15 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/qlogic/qed/qed_cxt.c b/drivers/net/ethernet/qlogic/qed/qed_cxt.c index af106be8cc08..afd07ad91631 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_cxt.c +++ b/drivers/net/ethernet/qlogic/qed/qed_cxt.c @@ -2069,6 +2069,12 @@ static void qed_rdma_set_pf_params(struct qed_hwfn *p_hwfn, num_srqs = min_t(u32, 32 * 1024, p_params->num_srqs); + if (p_hwfn->mcp_info->func_info.protocol == QED_PCI_ETH_RDMA) { + DP_NOTICE(p_hwfn, + "Current day drivers don't support RoCE & iWARP simultaneously on the same PF. Default to RoCE-only\n"); + p_hwfn->hw_info.personality = QED_PCI_ETH_ROCE; + } + switch (p_hwfn->hw_info.personality) { case QED_PCI_ETH_IWARP: /* Each QP requires one connection */ diff --git a/drivers/net/ethernet/qlogic/qed/qed_mcp.c b/drivers/net/ethernet/qlogic/qed/qed_mcp.c index 376485d99357..8b99c7d26f34 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_mcp.c +++ b/drivers/net/ethernet/qlogic/qed/qed_mcp.c @@ -1691,12 +1691,12 @@ qed_mcp_get_shmem_proto_mfw(struct qed_hwfn *p_hwfn, case FW_MB_PARAM_GET_PF_RDMA_ROCE: *p_proto = QED_PCI_ETH_ROCE; break; + case FW_MB_PARAM_GET_PF_RDMA_IWARP: + *p_proto = QED_PCI_ETH_IWARP; + break; case FW_MB_PARAM_GET_PF_RDMA_BOTH: - DP_NOTICE(p_hwfn, - "Current day drivers don't support RoCE & iWARP. Default to RoCE-only\n"); - *p_proto = QED_PCI_ETH_ROCE; + *p_proto = QED_PCI_ETH_RDMA; break; - case FW_MB_PARAM_GET_PF_RDMA_IWARP: default: DP_NOTICE(p_hwfn, "MFW answers GET_PF_RDMA_PROTOCOL but param is %08x\n", diff --git a/drivers/net/ethernet/qlogic/qed/qed_rdma.c b/drivers/net/ethernet/qlogic/qed/qed_rdma.c index 6fb99518a61f..06715f7403ef 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_rdma.c +++ b/drivers/net/ethernet/qlogic/qed/qed_rdma.c @@ -156,7 +156,10 @@ static int qed_rdma_alloc(struct qed_hwfn *p_hwfn, return rc; p_hwfn->p_rdma_info = p_rdma_info; - p_rdma_info->proto = PROTOCOLID_ROCE; + if (QED_IS_IWARP_PERSONALITY(p_hwfn)) + p_rdma_info->proto = PROTOCOLID_IWARP; + else + p_rdma_info->proto = PROTOCOLID_ROCE; num_cons = qed_cxt_get_proto_cid_count(p_hwfn, p_rdma_info->proto, NULL); diff --git a/drivers/net/ethernet/qlogic/qed/qed_sp_commands.c b/drivers/net/ethernet/qlogic/qed/qed_sp_commands.c index 46d0c3cb83a5..a1d33f35aad3 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_sp_commands.c +++ b/drivers/net/ethernet/qlogic/qed/qed_sp_commands.c @@ -377,6 +377,7 @@ int qed_sp_pf_start(struct qed_hwfn *p_hwfn, p_ramrod->personality = PERSONALITY_ISCSI; break; case QED_PCI_ETH_ROCE: + case QED_PCI_ETH_IWARP: p_ramrod->personality = PERSONALITY_RDMA_AND_ETH; break; default: -- cgit v1.2.3 From d1abfd0b4ee2b83af88098a0c7105622c3d66e73 Mon Sep 17 00:00:00 2001 From: Michal Kalderon Date: Sun, 24 Sep 2017 12:09:43 +0300 Subject: qed: Add iWARP out of order support iWARP requires OOO support which is already provided by the ll2 interface (until now was used only for iSCSI offload). The changes mostly include opening a ll2 dedicated connection for OOO and notifiying the FW about the handle id. Signed-off-by: Michal Kalderon Signed-off-by: Ariel Elior Signed-off-by: David S. Miller --- drivers/net/ethernet/qlogic/qed/qed_iwarp.c | 44 +++++++++++++++++++++++++++++ drivers/net/ethernet/qlogic/qed/qed_iwarp.h | 11 +++++++- drivers/net/ethernet/qlogic/qed/qed_rdma.c | 7 +++-- 3 files changed, 59 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/qlogic/qed/qed_iwarp.c b/drivers/net/ethernet/qlogic/qed/qed_iwarp.c index 9d989c96278c..568e9853cc8d 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_iwarp.c +++ b/drivers/net/ethernet/qlogic/qed/qed_iwarp.c @@ -41,6 +41,7 @@ #include "qed_rdma.h" #include "qed_reg_addr.h" #include "qed_sp.h" +#include "qed_ooo.h" #define QED_IWARP_ORD_DEFAULT 32 #define QED_IWARP_IRD_DEFAULT 32 @@ -119,6 +120,13 @@ static void qed_iwarp_cid_cleaned(struct qed_hwfn *p_hwfn, u32 cid) spin_unlock_bh(&p_hwfn->p_rdma_info->lock); } +void qed_iwarp_init_fw_ramrod(struct qed_hwfn *p_hwfn, + struct iwarp_init_func_params *p_ramrod) +{ + p_ramrod->ll2_ooo_q_index = RESC_START(p_hwfn, QED_LL2_QUEUE) + + p_hwfn->p_rdma_info->iwarp.ll2_ooo_handle; +} + static int qed_iwarp_alloc_cid(struct qed_hwfn *p_hwfn, u32 *cid) { int rc; @@ -1876,6 +1884,16 @@ static int qed_iwarp_ll2_stop(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt) iwarp_info->ll2_syn_handle = QED_IWARP_HANDLE_INVAL; } + if (iwarp_info->ll2_ooo_handle != QED_IWARP_HANDLE_INVAL) { + rc = qed_ll2_terminate_connection(p_hwfn, + iwarp_info->ll2_ooo_handle); + if (rc) + DP_INFO(p_hwfn, "Failed to terminate ooo connection\n"); + + qed_ll2_release_connection(p_hwfn, iwarp_info->ll2_ooo_handle); + iwarp_info->ll2_ooo_handle = QED_IWARP_HANDLE_INVAL; + } + qed_llh_remove_mac_filter(p_hwfn, p_ptt, p_hwfn->p_rdma_info->iwarp.mac_addr); return rc; @@ -1927,10 +1945,12 @@ qed_iwarp_ll2_start(struct qed_hwfn *p_hwfn, struct qed_iwarp_info *iwarp_info; struct qed_ll2_acquire_data data; struct qed_ll2_cbs cbs; + u16 n_ooo_bufs; int rc = 0; iwarp_info = &p_hwfn->p_rdma_info->iwarp; iwarp_info->ll2_syn_handle = QED_IWARP_HANDLE_INVAL; + iwarp_info->ll2_ooo_handle = QED_IWARP_HANDLE_INVAL; iwarp_info->max_mtu = params->max_mtu; @@ -1978,6 +1998,29 @@ qed_iwarp_ll2_start(struct qed_hwfn *p_hwfn, if (rc) goto err; + /* Start OOO connection */ + data.input.conn_type = QED_LL2_TYPE_OOO; + data.input.mtu = params->max_mtu; + + n_ooo_bufs = (QED_IWARP_MAX_OOO * QED_IWARP_RCV_WND_SIZE_DEF) / + iwarp_info->max_mtu; + n_ooo_bufs = min_t(u32, n_ooo_bufs, QED_IWARP_LL2_OOO_MAX_RX_SIZE); + + data.input.rx_num_desc = n_ooo_bufs; + data.input.rx_num_ooo_buffers = n_ooo_bufs; + + data.input.tx_max_bds_per_packet = 1; /* will never be fragmented */ + data.input.tx_num_desc = QED_IWARP_LL2_OOO_DEF_TX_SIZE; + data.p_connection_handle = &iwarp_info->ll2_ooo_handle; + + rc = qed_ll2_acquire_connection(p_hwfn, &data); + if (rc) + goto err; + + rc = qed_ll2_establish_connection(p_hwfn, iwarp_info->ll2_ooo_handle); + if (rc) + goto err; + return rc; err: qed_iwarp_ll2_stop(p_hwfn, p_ptt); @@ -2014,6 +2057,7 @@ int qed_iwarp_setup(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt, qed_spq_register_async_cb(p_hwfn, PROTOCOLID_IWARP, qed_iwarp_async_event); + qed_ooo_setup(p_hwfn); return qed_iwarp_ll2_start(p_hwfn, params, p_ptt); } diff --git a/drivers/net/ethernet/qlogic/qed/qed_iwarp.h b/drivers/net/ethernet/qlogic/qed/qed_iwarp.h index 148ef3c33a5d..9e2bfde894df 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_iwarp.h +++ b/drivers/net/ethernet/qlogic/qed/qed_iwarp.h @@ -47,7 +47,12 @@ enum qed_iwarp_qp_state qed_roce2iwarp_state(enum qed_roce_qp_state state); #define QED_IWARP_LL2_SYN_TX_SIZE (128) #define QED_IWARP_LL2_SYN_RX_SIZE (256) #define QED_IWARP_MAX_SYN_PKT_SIZE (128) -#define QED_IWARP_HANDLE_INVAL (0xff) + +#define QED_IWARP_LL2_OOO_DEF_TX_SIZE (256) +#define QED_IWARP_MAX_OOO (16) +#define QED_IWARP_LL2_OOO_MAX_RX_SIZE (16384) + +#define QED_IWARP_HANDLE_INVAL (0xff) struct qed_iwarp_ll2_buff { void *data; @@ -67,6 +72,7 @@ struct qed_iwarp_info { u8 crc_needed; u8 tcp_flags; u8 ll2_syn_handle; + u8 ll2_ooo_handle; u8 peer2peer; enum mpa_negotiation_mode mpa_rev; enum mpa_rtr_type rtr_type; @@ -147,6 +153,9 @@ int qed_iwarp_alloc(struct qed_hwfn *p_hwfn); int qed_iwarp_setup(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt, struct qed_rdma_start_in_params *params); +void qed_iwarp_init_fw_ramrod(struct qed_hwfn *p_hwfn, + struct iwarp_init_func_params *p_ramrod); + int qed_iwarp_stop(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt); void qed_iwarp_resc_free(struct qed_hwfn *p_hwfn); diff --git a/drivers/net/ethernet/qlogic/qed/qed_rdma.c b/drivers/net/ethernet/qlogic/qed/qed_rdma.c index 06715f7403ef..4f46f2851780 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_rdma.c +++ b/drivers/net/ethernet/qlogic/qed/qed_rdma.c @@ -551,10 +551,13 @@ static int qed_rdma_start_fw(struct qed_hwfn *p_hwfn, if (rc) return rc; - if (QED_IS_IWARP_PERSONALITY(p_hwfn)) + if (QED_IS_IWARP_PERSONALITY(p_hwfn)) { + qed_iwarp_init_fw_ramrod(p_hwfn, + &p_ent->ramrod.iwarp_init_func.iwarp); p_ramrod = &p_ent->ramrod.iwarp_init_func.rdma; - else + } else { p_ramrod = &p_ent->ramrod.roce_init_func.rdma; + } p_params_header = &p_ramrod->params_header; p_params_header->cnq_start_offset = (u8)RESC_START(p_hwfn, -- cgit v1.2.3 From 471115ab9804f45cb8e091e426c9c67fe75e41b0 Mon Sep 17 00:00:00 2001 From: Michal Kalderon Date: Sun, 24 Sep 2017 12:09:44 +0300 Subject: qed: Fix maximum number of CQs for iWARP The maximum number of CQs supported is bound to the number of connections supported, which differs between RoCE and iWARP. This fixes a crash that occurred in iWARP when running 1000 sessions using perftest. Fixes: 67b40dccc45 ("qed: Implement iWARP initialization, teardown and qp operations") Signed-off-by: Michal Kalderon Signed-off-by: Ariel Elior Reviewed-by: Leon Romanovsky Signed-off-by: David S. Miller --- drivers/net/ethernet/qlogic/qed/qed_rdma.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/qlogic/qed/qed_rdma.c b/drivers/net/ethernet/qlogic/qed/qed_rdma.c index 4f46f2851780..c8c4b3940564 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_rdma.c +++ b/drivers/net/ethernet/qlogic/qed/qed_rdma.c @@ -209,11 +209,11 @@ static int qed_rdma_alloc(struct qed_hwfn *p_hwfn, goto free_pd_map; } - /* Allocate bitmap for cq's. The maximum number of CQs is bounded to - * twice the number of QPs. + /* Allocate bitmap for cq's. The maximum number of CQs is bound to + * the number of connections we support. (num_qps in iWARP or + * num_qps/2 in RoCE). */ - rc = qed_rdma_bmap_alloc(p_hwfn, &p_rdma_info->cq_map, - p_rdma_info->num_qps * 2, "CQ"); + rc = qed_rdma_bmap_alloc(p_hwfn, &p_rdma_info->cq_map, num_cons, "CQ"); if (rc) { DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "Failed to allocate cq bitmap, rc = %d\n", rc); @@ -222,10 +222,10 @@ static int qed_rdma_alloc(struct qed_hwfn *p_hwfn, /* Allocate bitmap for toggle bit for cq icids * We toggle the bit every time we create or resize cq for a given icid. - * The maximum number of CQs is bounded to twice the number of QPs. + * Size needs to equal the size of the cq bmap. */ rc = qed_rdma_bmap_alloc(p_hwfn, &p_rdma_info->toggle_bits, - p_rdma_info->num_qps * 2, "Toggle"); + num_cons, "Toggle"); if (rc) { DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "Failed to allocate toogle bits, rc = %d\n", rc); -- cgit v1.2.3 From 1e99c497012cd8647972876f1bd18545bc907aea Mon Sep 17 00:00:00 2001 From: Michal Kalderon Date: Sun, 24 Sep 2017 12:09:45 +0300 Subject: qed: iWARP - Add check for errors on a SYN packet A SYN packet which arrives with errors from FW should be dropped. This required adding an additional field to the ll2 rx completion data. Signed-off-by: Michal Kalderon Signed-off-by: Ariel Elior Signed-off-by: David S. Miller --- drivers/net/ethernet/qlogic/qed/qed_iwarp.c | 8 ++++++++ drivers/net/ethernet/qlogic/qed/qed_ll2.c | 1 + include/linux/qed/qed_ll2_if.h | 1 + 3 files changed, 10 insertions(+) diff --git a/drivers/net/ethernet/qlogic/qed/qed_iwarp.c b/drivers/net/ethernet/qlogic/qed/qed_iwarp.c index 568e9853cc8d..8fc9c811f6e3 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_iwarp.c +++ b/drivers/net/ethernet/qlogic/qed/qed_iwarp.c @@ -1733,6 +1733,14 @@ qed_iwarp_ll2_comp_syn_pkt(void *cxt, struct qed_ll2_comp_rx_data *data) memset(&cm_info, 0, sizeof(cm_info)); ll2_syn_handle = p_hwfn->p_rdma_info->iwarp.ll2_syn_handle; + + /* Check if packet was received with errors... */ + if (data->err_flags) { + DP_NOTICE(p_hwfn, "Error received on SYN packet: 0x%x\n", + data->err_flags); + goto err; + } + if (GET_FIELD(data->parse_flags, PARSING_AND_ERR_FLAGS_L4CHKSMWASCALCULATED) && GET_FIELD(data->parse_flags, PARSING_AND_ERR_FLAGS_L4CHKSMERROR)) { diff --git a/drivers/net/ethernet/qlogic/qed/qed_ll2.c b/drivers/net/ethernet/qlogic/qed/qed_ll2.c index c06ad4f0758e..250afa5486cf 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_ll2.c +++ b/drivers/net/ethernet/qlogic/qed/qed_ll2.c @@ -413,6 +413,7 @@ static void qed_ll2_rxq_parse_reg(struct qed_hwfn *p_hwfn, struct qed_ll2_comp_rx_data *data) { data->parse_flags = le16_to_cpu(p_cqe->rx_cqe_fp.parse_flags.flags); + data->err_flags = le16_to_cpu(p_cqe->rx_cqe_fp.err_flags.flags); data->length.packet_length = le16_to_cpu(p_cqe->rx_cqe_fp.packet_length); data->vlan = le16_to_cpu(p_cqe->rx_cqe_fp.vlan); diff --git a/include/linux/qed/qed_ll2_if.h b/include/linux/qed/qed_ll2_if.h index dd7a3b86bb9e..89fa0bbd54f3 100644 --- a/include/linux/qed/qed_ll2_if.h +++ b/include/linux/qed/qed_ll2_if.h @@ -101,6 +101,7 @@ struct qed_ll2_comp_rx_data { void *cookie; dma_addr_t rx_buf_addr; u16 parse_flags; + u16 err_flags; u16 vlan; bool b_last_packet; u8 connection_handle; -- cgit v1.2.3 From 3bd3b9ed1b602c065aa0b1ba109b9622afa6ff98 Mon Sep 17 00:00:00 2001 From: Himanshu Jha Date: Sun, 24 Sep 2017 17:41:24 +0530 Subject: net: bcm63xx_enet: Use setup_timer and mod_timer Use setup_timer and mod_timer API instead of structure assignments. This is done using Coccinelle and semantic patch used for this as follows: @@ expression x,y,z,a,b; @@ -init_timer (&x); +setup_timer (&x, y, z); +mod_timer (&a, b); -x.function = y; -x.data = z; -x.expires = b; -add_timer(&a); Signed-off-by: Himanshu Jha Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bcm63xx_enet.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bcm63xx_enet.c b/drivers/net/ethernet/broadcom/bcm63xx_enet.c index f8bbbbfca06e..c6221f04a748 100644 --- a/drivers/net/ethernet/broadcom/bcm63xx_enet.c +++ b/drivers/net/ethernet/broadcom/bcm63xx_enet.c @@ -2331,11 +2331,8 @@ static int bcm_enetsw_open(struct net_device *dev) } /* start phy polling timer */ - init_timer(&priv->swphy_poll); - priv->swphy_poll.function = swphy_poll_timer; - priv->swphy_poll.data = (unsigned long)priv; - priv->swphy_poll.expires = jiffies; - add_timer(&priv->swphy_poll); + setup_timer(&priv->swphy_poll, swphy_poll_timer, (unsigned long)priv); + mod_timer(&priv->swphy_poll, jiffies); return 0; out: -- cgit v1.2.3 From 6aaae2b6c4330a46204bca042f1d2f41e8e18dea Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Mon, 25 Sep 2017 02:25:50 +0200 Subject: bpf: rename bpf_compute_data_end into bpf_compute_data_pointers Just do the rename into bpf_compute_data_pointers() as we'll add one more pointer here to recompute. Signed-off-by: Daniel Borkmann Acked-by: Alexei Starovoitov Acked-by: John Fastabend Signed-off-by: David S. Miller --- include/linux/filter.h | 9 ++++++--- kernel/bpf/sockmap.c | 4 ++-- net/bpf/test_run.c | 2 +- net/core/filter.c | 14 +++++++------- net/core/lwt_bpf.c | 2 +- net/sched/act_bpf.c | 4 ++-- net/sched/cls_bpf.c | 4 ++-- 7 files changed, 21 insertions(+), 18 deletions(-) diff --git a/include/linux/filter.h b/include/linux/filter.h index d29e58fde364..052bab3d62e7 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -496,10 +496,13 @@ struct xdp_buff { void *data_hard_start; }; -/* compute the linear packet data range [data, data_end) which - * will be accessed by cls_bpf, act_bpf and lwt programs +/* Compute the linear packet data range [data, data_end) which + * will be accessed by various program types (cls_bpf, act_bpf, + * lwt, ...). Subsystems allowing direct data access must (!) + * ensure that cb[] area can be written to when BPF program is + * invoked (otherwise cb[] save/restore is necessary). */ -static inline void bpf_compute_data_end(struct sk_buff *skb) +static inline void bpf_compute_data_pointers(struct sk_buff *skb) { struct bpf_skb_data_end *cb = (struct bpf_skb_data_end *)skb->cb; diff --git a/kernel/bpf/sockmap.c b/kernel/bpf/sockmap.c index 6424ce0e4969..a298d6666698 100644 --- a/kernel/bpf/sockmap.c +++ b/kernel/bpf/sockmap.c @@ -102,7 +102,7 @@ static int smap_verdict_func(struct smap_psock *psock, struct sk_buff *skb) skb_orphan(skb); skb->sk = psock->sock; - bpf_compute_data_end(skb); + bpf_compute_data_pointers(skb); rc = (*prog->bpf_func)(skb, prog->insnsi); skb->sk = NULL; @@ -369,7 +369,7 @@ static int smap_parse_func_strparser(struct strparser *strp, * any socket yet. */ skb->sk = psock->sock; - bpf_compute_data_end(skb); + bpf_compute_data_pointers(skb); rc = (*prog->bpf_func)(skb, prog->insnsi); skb->sk = NULL; rcu_read_unlock(); diff --git a/net/bpf/test_run.c b/net/bpf/test_run.c index 6be41a44d688..df672517b4fd 100644 --- a/net/bpf/test_run.c +++ b/net/bpf/test_run.c @@ -133,7 +133,7 @@ int bpf_prog_test_run_skb(struct bpf_prog *prog, const union bpf_attr *kattr, if (is_l2) __skb_push(skb, ETH_HLEN); if (is_direct_pkt_access) - bpf_compute_data_end(skb); + bpf_compute_data_pointers(skb); retval = bpf_test_run(prog, skb, repeat, &duration); if (!is_l2) __skb_push(skb, ETH_HLEN); diff --git a/net/core/filter.c b/net/core/filter.c index 82edad58d066..c468e7cfad19 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -1402,7 +1402,7 @@ static inline int bpf_try_make_writable(struct sk_buff *skb, { int err = __bpf_try_make_writable(skb, write_len); - bpf_compute_data_end(skb); + bpf_compute_data_pointers(skb); return err; } @@ -1962,7 +1962,7 @@ BPF_CALL_3(bpf_skb_vlan_push, struct sk_buff *, skb, __be16, vlan_proto, ret = skb_vlan_push(skb, vlan_proto, vlan_tci); bpf_pull_mac_rcsum(skb); - bpf_compute_data_end(skb); + bpf_compute_data_pointers(skb); return ret; } @@ -1984,7 +1984,7 @@ BPF_CALL_1(bpf_skb_vlan_pop, struct sk_buff *, skb) ret = skb_vlan_pop(skb); bpf_pull_mac_rcsum(skb); - bpf_compute_data_end(skb); + bpf_compute_data_pointers(skb); return ret; } @@ -2178,7 +2178,7 @@ BPF_CALL_3(bpf_skb_change_proto, struct sk_buff *, skb, __be16, proto, * need to be verified first. */ ret = bpf_skb_proto_xlat(skb, proto); - bpf_compute_data_end(skb); + bpf_compute_data_pointers(skb); return ret; } @@ -2303,7 +2303,7 @@ static int bpf_skb_adjust_net(struct sk_buff *skb, s32 len_diff) ret = shrink ? bpf_skb_net_shrink(skb, len_diff_abs) : bpf_skb_net_grow(skb, len_diff_abs); - bpf_compute_data_end(skb); + bpf_compute_data_pointers(skb); return ret; } @@ -2394,7 +2394,7 @@ BPF_CALL_3(bpf_skb_change_tail, struct sk_buff *, skb, u32, new_len, skb_gso_reset(skb); } - bpf_compute_data_end(skb); + bpf_compute_data_pointers(skb); return ret; } @@ -2434,7 +2434,7 @@ BPF_CALL_3(bpf_skb_change_head, struct sk_buff *, skb, u32, head_room, skb_reset_mac_header(skb); } - bpf_compute_data_end(skb); + bpf_compute_data_pointers(skb); return 0; } diff --git a/net/core/lwt_bpf.c b/net/core/lwt_bpf.c index 1307731ddfe4..e7e626fb87bb 100644 --- a/net/core/lwt_bpf.c +++ b/net/core/lwt_bpf.c @@ -51,7 +51,7 @@ static int run_lwt_bpf(struct sk_buff *skb, struct bpf_lwt_prog *lwt, */ preempt_disable(); rcu_read_lock(); - bpf_compute_data_end(skb); + bpf_compute_data_pointers(skb); ret = bpf_prog_run_save_cb(lwt->prog, skb); rcu_read_unlock(); diff --git a/net/sched/act_bpf.c b/net/sched/act_bpf.c index c0c707eb2c96..5ef8ce8c83d4 100644 --- a/net/sched/act_bpf.c +++ b/net/sched/act_bpf.c @@ -49,11 +49,11 @@ static int tcf_bpf(struct sk_buff *skb, const struct tc_action *act, filter = rcu_dereference(prog->filter); if (at_ingress) { __skb_push(skb, skb->mac_len); - bpf_compute_data_end(skb); + bpf_compute_data_pointers(skb); filter_res = BPF_PROG_RUN(filter, skb); __skb_pull(skb, skb->mac_len); } else { - bpf_compute_data_end(skb); + bpf_compute_data_pointers(skb); filter_res = BPF_PROG_RUN(filter, skb); } rcu_read_unlock(); diff --git a/net/sched/cls_bpf.c b/net/sched/cls_bpf.c index 520c5027646a..36671b0fb125 100644 --- a/net/sched/cls_bpf.c +++ b/net/sched/cls_bpf.c @@ -99,11 +99,11 @@ static int cls_bpf_classify(struct sk_buff *skb, const struct tcf_proto *tp, } else if (at_ingress) { /* It is safe to push/pull even if skb_shared() */ __skb_push(skb, skb->mac_len); - bpf_compute_data_end(skb); + bpf_compute_data_pointers(skb); filter_res = BPF_PROG_RUN(prog->filter, skb); __skb_pull(skb, skb->mac_len); } else { - bpf_compute_data_end(skb); + bpf_compute_data_pointers(skb); filter_res = BPF_PROG_RUN(prog->filter, skb); } -- cgit v1.2.3 From de8f3a83b0a0fddb2cf56e7a718127e9619ea3da Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Mon, 25 Sep 2017 02:25:51 +0200 Subject: bpf: add meta pointer for direct access This work enables generic transfer of metadata from XDP into skb. The basic idea is that we can make use of the fact that the resulting skb must be linear and already comes with a larger headroom for supporting bpf_xdp_adjust_head(), which mangles xdp->data. Here, we base our work on a similar principle and introduce a small helper bpf_xdp_adjust_meta() for adjusting a new pointer called xdp->data_meta. Thus, the packet has a flexible and programmable room for meta data, followed by the actual packet data. struct xdp_buff is therefore laid out that we first point to data_hard_start, then data_meta directly prepended to data followed by data_end marking the end of packet. bpf_xdp_adjust_head() takes into account whether we have meta data already prepended and if so, memmove()s this along with the given offset provided there's enough room. xdp->data_meta is optional and programs are not required to use it. The rationale is that when we process the packet in XDP (e.g. as DoS filter), we can push further meta data along with it for the XDP_PASS case, and give the guarantee that a clsact ingress BPF program on the same device can pick this up for further post-processing. Since we work with skb there, we can also set skb->mark, skb->priority or other skb meta data out of BPF, thus having this scratch space generic and programmable allows for more flexibility than defining a direct 1:1 transfer of potentially new XDP members into skb (it's also more efficient as we don't need to initialize/handle each of such new members). The facility also works together with GRO aggregation. The scratch space at the head of the packet can be multiple of 4 byte up to 32 byte large. Drivers not yet supporting xdp->data_meta can simply be set up with xdp->data_meta as xdp->data + 1 as bpf_xdp_adjust_meta() will detect this and bail out, such that the subsequent match against xdp->data for later access is guaranteed to fail. The verifier treats xdp->data_meta/xdp->data the same way as we treat xdp->data/xdp->data_end pointer comparisons. The requirement for doing the compare against xdp->data is that it hasn't been modified from it's original address we got from ctx access. It may have a range marking already from prior successful xdp->data/xdp->data_end pointer comparisons though. Signed-off-by: Daniel Borkmann Acked-by: Alexei Starovoitov Acked-by: John Fastabend Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c | 1 + drivers/net/ethernet/cavium/thunder/nicvf_main.c | 1 + drivers/net/ethernet/intel/i40e/i40e_txrx.c | 1 + drivers/net/ethernet/intel/ixgbe/ixgbe_main.c | 1 + drivers/net/ethernet/mellanox/mlx4/en_rx.c | 1 + drivers/net/ethernet/mellanox/mlx5/core/en_rx.c | 1 + .../net/ethernet/netronome/nfp/nfp_net_common.c | 1 + drivers/net/ethernet/qlogic/qede/qede_fp.c | 1 + drivers/net/tun.c | 1 + drivers/net/virtio_net.c | 2 + include/linux/bpf.h | 1 + include/linux/filter.h | 21 +++- include/linux/skbuff.h | 68 +++++++++++- include/uapi/linux/bpf.h | 13 ++- kernel/bpf/verifier.c | 114 ++++++++++++++++----- net/bpf/test_run.c | 1 + net/core/dev.c | 31 +++++- net/core/filter.c | 77 +++++++++++++- net/core/skbuff.c | 2 + 19 files changed, 297 insertions(+), 42 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c index d8f0c837b72c..06ce63c00821 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c @@ -94,6 +94,7 @@ bool bnxt_rx_xdp(struct bnxt *bp, struct bnxt_rx_ring_info *rxr, u16 cons, xdp.data_hard_start = *data_ptr - offset; xdp.data = *data_ptr; + xdp_set_data_meta_invalid(&xdp); xdp.data_end = *data_ptr + *len; orig_data = xdp.data; mapping = rx_buf->mapping - bp->rx_dma_offset; diff --git a/drivers/net/ethernet/cavium/thunder/nicvf_main.c b/drivers/net/ethernet/cavium/thunder/nicvf_main.c index 49b80da51ba7..d68478afccbf 100644 --- a/drivers/net/ethernet/cavium/thunder/nicvf_main.c +++ b/drivers/net/ethernet/cavium/thunder/nicvf_main.c @@ -523,6 +523,7 @@ static inline bool nicvf_xdp_rx(struct nicvf *nic, struct bpf_prog *prog, xdp.data_hard_start = page_address(page); xdp.data = (void *)cpu_addr; + xdp_set_data_meta_invalid(&xdp); xdp.data_end = xdp.data + len; orig_data = xdp.data; diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.c b/drivers/net/ethernet/intel/i40e/i40e_txrx.c index 1519dfb851d0..f426762bd83a 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_txrx.c +++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.c @@ -2107,6 +2107,7 @@ static int i40e_clean_rx_irq(struct i40e_ring *rx_ring, int budget) if (!skb) { xdp.data = page_address(rx_buffer->page) + rx_buffer->page_offset; + xdp_set_data_meta_invalid(&xdp); xdp.data_hard_start = xdp.data - i40e_rx_offset(rx_ring); xdp.data_end = xdp.data + size; diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c index d962368d08d0..04bb03bda1cd 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c @@ -2326,6 +2326,7 @@ static int ixgbe_clean_rx_irq(struct ixgbe_q_vector *q_vector, if (!skb) { xdp.data = page_address(rx_buffer->page) + rx_buffer->page_offset; + xdp_set_data_meta_invalid(&xdp); xdp.data_hard_start = xdp.data - ixgbe_rx_offset(rx_ring); xdp.data_end = xdp.data + size; diff --git a/drivers/net/ethernet/mellanox/mlx4/en_rx.c b/drivers/net/ethernet/mellanox/mlx4/en_rx.c index b97a55c827eb..8f9cb8abc497 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_rx.c @@ -762,6 +762,7 @@ int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int bud xdp.data_hard_start = va - frags[0].page_offset; xdp.data = va; + xdp_set_data_meta_invalid(&xdp); xdp.data_end = xdp.data + length; orig_data = xdp.data; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c index f1dd638384d3..30b3f3fbd719 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c @@ -794,6 +794,7 @@ static inline int mlx5e_xdp_handle(struct mlx5e_rq *rq, return false; xdp.data = va + *rx_headroom; + xdp_set_data_meta_invalid(&xdp); xdp.data_end = xdp.data + *len; xdp.data_hard_start = va; diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c index 1c0187f0af51..e3a38be3600a 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c +++ b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c @@ -1583,6 +1583,7 @@ static int nfp_net_run_xdp(struct bpf_prog *prog, void *data, void *hard_start, xdp.data_hard_start = hard_start; xdp.data = data + *off; + xdp_set_data_meta_invalid(&xdp); xdp.data_end = data + *off + *len; orig_data = xdp.data; diff --git a/drivers/net/ethernet/qlogic/qede/qede_fp.c b/drivers/net/ethernet/qlogic/qede/qede_fp.c index 6fc854b120b0..48ec4c56cddf 100644 --- a/drivers/net/ethernet/qlogic/qede/qede_fp.c +++ b/drivers/net/ethernet/qlogic/qede/qede_fp.c @@ -1004,6 +1004,7 @@ static bool qede_rx_xdp(struct qede_dev *edev, xdp.data_hard_start = page_address(bd->data); xdp.data = xdp.data_hard_start + *data_offset; + xdp_set_data_meta_invalid(&xdp); xdp.data_end = xdp.data + *len; /* Queues always have a full reset currently, so for the time diff --git a/drivers/net/tun.c b/drivers/net/tun.c index 2c36f6ebad79..a6e0bffe3d29 100644 --- a/drivers/net/tun.c +++ b/drivers/net/tun.c @@ -1468,6 +1468,7 @@ static struct sk_buff *tun_build_skb(struct tun_struct *tun, xdp.data_hard_start = buf; xdp.data = buf + pad; + xdp_set_data_meta_invalid(&xdp); xdp.data_end = xdp.data + len; orig_data = xdp.data; act = bpf_prog_run_xdp(xdp_prog, &xdp); diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index dd14a4547932..fc059f193e7d 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -554,6 +554,7 @@ static struct sk_buff *receive_small(struct net_device *dev, xdp.data_hard_start = buf + VIRTNET_RX_PAD + vi->hdr_len; xdp.data = xdp.data_hard_start + xdp_headroom; + xdp_set_data_meta_invalid(&xdp); xdp.data_end = xdp.data + len; orig_data = xdp.data; act = bpf_prog_run_xdp(xdp_prog, &xdp); @@ -686,6 +687,7 @@ static struct sk_buff *receive_mergeable(struct net_device *dev, data = page_address(xdp_page) + offset; xdp.data_hard_start = data - VIRTIO_XDP_HEADROOM + vi->hdr_len; xdp.data = data + vi->hdr_len; + xdp_set_data_meta_invalid(&xdp); xdp.data_end = xdp.data + (len - vi->hdr_len); act = bpf_prog_run_xdp(xdp_prog, &xdp); diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 8390859e79e7..2b672c50f160 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -137,6 +137,7 @@ enum bpf_reg_type { PTR_TO_MAP_VALUE, /* reg points to map element value */ PTR_TO_MAP_VALUE_OR_NULL,/* points to map elem value or NULL */ PTR_TO_STACK, /* reg == frame_pointer + offset */ + PTR_TO_PACKET_META, /* skb->data - meta_len */ PTR_TO_PACKET, /* reg points to skb->data */ PTR_TO_PACKET_END, /* skb->data + headlen */ }; diff --git a/include/linux/filter.h b/include/linux/filter.h index 052bab3d62e7..911d454af107 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -487,12 +487,14 @@ struct sk_filter { struct bpf_skb_data_end { struct qdisc_skb_cb qdisc_cb; + void *data_meta; void *data_end; }; struct xdp_buff { void *data; void *data_end; + void *data_meta; void *data_hard_start; }; @@ -507,7 +509,8 @@ static inline void bpf_compute_data_pointers(struct sk_buff *skb) struct bpf_skb_data_end *cb = (struct bpf_skb_data_end *)skb->cb; BUILD_BUG_ON(sizeof(*cb) > FIELD_SIZEOF(struct sk_buff, cb)); - cb->data_end = skb->data + skb_headlen(skb); + cb->data_meta = skb->data - skb_metadata_len(skb); + cb->data_end = skb->data + skb_headlen(skb); } static inline u8 *bpf_skb_cb(struct sk_buff *skb) @@ -728,8 +731,22 @@ int xdp_do_redirect(struct net_device *dev, struct bpf_prog *prog); void xdp_do_flush_map(void); +/* Drivers not supporting XDP metadata can use this helper, which + * rejects any room expansion for metadata as a result. + */ +static __always_inline void +xdp_set_data_meta_invalid(struct xdp_buff *xdp) +{ + xdp->data_meta = xdp->data + 1; +} + +static __always_inline bool +xdp_data_meta_unsupported(const struct xdp_buff *xdp) +{ + return unlikely(xdp->data_meta > xdp->data); +} + void bpf_warn_invalid_xdp_action(u32 act); -void bpf_warn_invalid_xdp_redirect(u32 ifindex); struct sock *do_sk_redirect_map(void); diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index f9db5539a6fb..19e64bfb1a66 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -489,8 +489,9 @@ int skb_zerocopy_iter_stream(struct sock *sk, struct sk_buff *skb, * the end of the header data, ie. at skb->end. */ struct skb_shared_info { - unsigned short _unused; - unsigned char nr_frags; + __u8 __unused; + __u8 meta_len; + __u8 nr_frags; __u8 tx_flags; unsigned short gso_size; /* Warning: this field is not always filled in (UFO)! */ @@ -3400,6 +3401,69 @@ static inline ktime_t net_invalid_timestamp(void) return 0; } +static inline u8 skb_metadata_len(const struct sk_buff *skb) +{ + return skb_shinfo(skb)->meta_len; +} + +static inline void *skb_metadata_end(const struct sk_buff *skb) +{ + return skb_mac_header(skb); +} + +static inline bool __skb_metadata_differs(const struct sk_buff *skb_a, + const struct sk_buff *skb_b, + u8 meta_len) +{ + const void *a = skb_metadata_end(skb_a); + const void *b = skb_metadata_end(skb_b); + /* Using more efficient varaiant than plain call to memcmp(). */ +#if defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) && BITS_PER_LONG == 64 + u64 diffs = 0; + + switch (meta_len) { +#define __it(x, op) (x -= sizeof(u##op)) +#define __it_diff(a, b, op) (*(u##op *)__it(a, op)) ^ (*(u##op *)__it(b, op)) + case 32: diffs |= __it_diff(a, b, 64); + case 24: diffs |= __it_diff(a, b, 64); + case 16: diffs |= __it_diff(a, b, 64); + case 8: diffs |= __it_diff(a, b, 64); + break; + case 28: diffs |= __it_diff(a, b, 64); + case 20: diffs |= __it_diff(a, b, 64); + case 12: diffs |= __it_diff(a, b, 64); + case 4: diffs |= __it_diff(a, b, 32); + break; + } + return diffs; +#else + return memcmp(a - meta_len, b - meta_len, meta_len); +#endif +} + +static inline bool skb_metadata_differs(const struct sk_buff *skb_a, + const struct sk_buff *skb_b) +{ + u8 len_a = skb_metadata_len(skb_a); + u8 len_b = skb_metadata_len(skb_b); + + if (!(len_a | len_b)) + return false; + + return len_a != len_b ? + true : __skb_metadata_differs(skb_a, skb_b, len_a); +} + +static inline void skb_metadata_set(struct sk_buff *skb, u8 meta_len) +{ + skb_shinfo(skb)->meta_len = meta_len; +} + +static inline void skb_metadata_clear(struct sk_buff *skb) +{ + skb_metadata_set(skb, 0); +} + struct sk_buff *skb_clone_sk(struct sk_buff *skb); #ifdef CONFIG_NETWORK_PHY_TIMESTAMPING diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 43ab5c402f98..e43491ac4823 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -582,6 +582,12 @@ union bpf_attr { * @map: pointer to sockmap to update * @key: key to insert/update sock in map * @flags: same flags as map update elem + * + * int bpf_xdp_adjust_meta(xdp_md, delta) + * Adjust the xdp_md.data_meta by delta + * @xdp_md: pointer to xdp_md + * @delta: An positive/negative integer to be added to xdp_md.data_meta + * Return: 0 on success or negative on error */ #define __BPF_FUNC_MAPPER(FN) \ FN(unspec), \ @@ -638,6 +644,7 @@ union bpf_attr { FN(redirect_map), \ FN(sk_redirect_map), \ FN(sock_map_update), \ + FN(xdp_adjust_meta), /* integer value in 'imm' field of BPF_CALL instruction selects which helper * function eBPF program intends to call @@ -715,7 +722,7 @@ struct __sk_buff { __u32 data_end; __u32 napi_id; - /* accessed by BPF_PROG_TYPE_sk_skb types */ + /* Accessed by BPF_PROG_TYPE_sk_skb types from here to ... */ __u32 family; __u32 remote_ip4; /* Stored in network byte order */ __u32 local_ip4; /* Stored in network byte order */ @@ -723,6 +730,9 @@ struct __sk_buff { __u32 local_ip6[4]; /* Stored in network byte order */ __u32 remote_port; /* Stored in network byte order */ __u32 local_port; /* stored in host byte order */ + /* ... here. */ + + __u32 data_meta; }; struct bpf_tunnel_key { @@ -783,6 +793,7 @@ enum xdp_action { struct xdp_md { __u32 data; __u32 data_end; + __u32 data_meta; }; enum sk_action { diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index b914fbe1383e..f849eca36052 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -177,6 +177,12 @@ static __printf(1, 2) void verbose(const char *fmt, ...) va_end(args); } +static bool type_is_pkt_pointer(enum bpf_reg_type type) +{ + return type == PTR_TO_PACKET || + type == PTR_TO_PACKET_META; +} + /* string representation of 'enum bpf_reg_type' */ static const char * const reg_type_str[] = { [NOT_INIT] = "?", @@ -187,6 +193,7 @@ static const char * const reg_type_str[] = { [PTR_TO_MAP_VALUE_OR_NULL] = "map_value_or_null", [PTR_TO_STACK] = "fp", [PTR_TO_PACKET] = "pkt", + [PTR_TO_PACKET_META] = "pkt_meta", [PTR_TO_PACKET_END] = "pkt_end", }; @@ -226,7 +233,7 @@ static void print_verifier_state(struct bpf_verifier_state *state) verbose("(id=%d", reg->id); if (t != SCALAR_VALUE) verbose(",off=%d", reg->off); - if (t == PTR_TO_PACKET) + if (type_is_pkt_pointer(t)) verbose(",r=%d", reg->range); else if (t == CONST_PTR_TO_MAP || t == PTR_TO_MAP_VALUE || @@ -519,6 +526,31 @@ static void mark_reg_known_zero(struct bpf_reg_state *regs, u32 regno) __mark_reg_known_zero(regs + regno); } +static bool reg_is_pkt_pointer(const struct bpf_reg_state *reg) +{ + return type_is_pkt_pointer(reg->type); +} + +static bool reg_is_pkt_pointer_any(const struct bpf_reg_state *reg) +{ + return reg_is_pkt_pointer(reg) || + reg->type == PTR_TO_PACKET_END; +} + +/* Unmodified PTR_TO_PACKET[_META,_END] register from ctx access. */ +static bool reg_is_init_pkt_pointer(const struct bpf_reg_state *reg, + enum bpf_reg_type which) +{ + /* The register can already have a range from prior markings. + * This is fine as long as it hasn't been advanced from its + * origin. + */ + return reg->type == which && + reg->id == 0 && + reg->off == 0 && + tnum_equals_const(reg->var_off, 0); +} + /* Attempts to improve min/max values based on var_off information */ static void __update_reg_bounds(struct bpf_reg_state *reg) { @@ -702,6 +734,7 @@ static bool is_spillable_regtype(enum bpf_reg_type type) case PTR_TO_STACK: case PTR_TO_CTX: case PTR_TO_PACKET: + case PTR_TO_PACKET_META: case PTR_TO_PACKET_END: case CONST_PTR_TO_MAP: return true; @@ -1047,7 +1080,10 @@ static int check_ptr_alignment(struct bpf_verifier_env *env, switch (reg->type) { case PTR_TO_PACKET: - /* special case, because of NET_IP_ALIGN */ + case PTR_TO_PACKET_META: + /* Special case, because of NET_IP_ALIGN. Given metadata sits + * right in front, treat it the very same way. + */ return check_pkt_ptr_alignment(reg, off, size, strict); case PTR_TO_MAP_VALUE: pointer_desc = "value "; @@ -1124,8 +1160,8 @@ static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regn err = check_ctx_access(env, insn_idx, off, size, t, ®_type); if (!err && t == BPF_READ && value_regno >= 0) { /* ctx access returns either a scalar, or a - * PTR_TO_PACKET[_END]. In the latter case, we know - * the offset is zero. + * PTR_TO_PACKET[_META,_END]. In the latter + * case, we know the offset is zero. */ if (reg_type == SCALAR_VALUE) mark_reg_unknown(state->regs, value_regno); @@ -1170,7 +1206,7 @@ static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regn } else { err = check_stack_read(state, off, size, value_regno); } - } else if (reg->type == PTR_TO_PACKET) { + } else if (reg_is_pkt_pointer(reg)) { if (t == BPF_WRITE && !may_access_direct_pkt_data(env, NULL, t)) { verbose("cannot write into packet\n"); return -EACCES; @@ -1310,6 +1346,7 @@ static int check_helper_mem_access(struct bpf_verifier_env *env, int regno, switch (reg->type) { case PTR_TO_PACKET: + case PTR_TO_PACKET_META: return check_packet_access(env, regno, reg->off, access_size); case PTR_TO_MAP_VALUE: return check_map_access(env, regno, reg->off, access_size); @@ -1342,7 +1379,7 @@ static int check_func_arg(struct bpf_verifier_env *env, u32 regno, return 0; } - if (type == PTR_TO_PACKET && + if (type_is_pkt_pointer(type) && !may_access_direct_pkt_data(env, meta, BPF_READ)) { verbose("helper access to the packet is not allowed\n"); return -EACCES; @@ -1351,7 +1388,8 @@ static int check_func_arg(struct bpf_verifier_env *env, u32 regno, if (arg_type == ARG_PTR_TO_MAP_KEY || arg_type == ARG_PTR_TO_MAP_VALUE) { expected_type = PTR_TO_STACK; - if (type != PTR_TO_PACKET && type != expected_type) + if (!type_is_pkt_pointer(type) && + type != expected_type) goto err_type; } else if (arg_type == ARG_CONST_SIZE || arg_type == ARG_CONST_SIZE_OR_ZERO) { @@ -1375,7 +1413,8 @@ static int check_func_arg(struct bpf_verifier_env *env, u32 regno, */ if (register_is_null(*reg)) /* final test in check_stack_boundary() */; - else if (type != PTR_TO_PACKET && type != PTR_TO_MAP_VALUE && + else if (!type_is_pkt_pointer(type) && + type != PTR_TO_MAP_VALUE && type != expected_type) goto err_type; meta->raw_mode = arg_type == ARG_PTR_TO_UNINIT_MEM; @@ -1401,7 +1440,7 @@ static int check_func_arg(struct bpf_verifier_env *env, u32 regno, verbose("invalid map_ptr to access map->key\n"); return -EACCES; } - if (type == PTR_TO_PACKET) + if (type_is_pkt_pointer(type)) err = check_packet_access(env, regno, reg->off, meta->map_ptr->key_size); else @@ -1417,7 +1456,7 @@ static int check_func_arg(struct bpf_verifier_env *env, u32 regno, verbose("invalid map_ptr to access map->value\n"); return -EACCES; } - if (type == PTR_TO_PACKET) + if (type_is_pkt_pointer(type)) err = check_packet_access(env, regno, reg->off, meta->map_ptr->value_size); else @@ -1590,8 +1629,8 @@ static int check_raw_mode(const struct bpf_func_proto *fn) return count > 1 ? -EINVAL : 0; } -/* Packet data might have moved, any old PTR_TO_PACKET[_END] are now invalid, - * so turn them into unknown SCALAR_VALUE. +/* Packet data might have moved, any old PTR_TO_PACKET[_META,_END] + * are now invalid, so turn them into unknown SCALAR_VALUE. */ static void clear_all_pkt_pointers(struct bpf_verifier_env *env) { @@ -1600,18 +1639,15 @@ static void clear_all_pkt_pointers(struct bpf_verifier_env *env) int i; for (i = 0; i < MAX_BPF_REG; i++) - if (regs[i].type == PTR_TO_PACKET || - regs[i].type == PTR_TO_PACKET_END) + if (reg_is_pkt_pointer_any(®s[i])) mark_reg_unknown(regs, i); for (i = 0; i < MAX_BPF_STACK; i += BPF_REG_SIZE) { if (state->stack_slot_type[i] != STACK_SPILL) continue; reg = &state->spilled_regs[i / BPF_REG_SIZE]; - if (reg->type != PTR_TO_PACKET && - reg->type != PTR_TO_PACKET_END) - continue; - __mark_reg_unknown(reg); + if (reg_is_pkt_pointer_any(reg)) + __mark_reg_unknown(reg); } } @@ -1871,7 +1907,7 @@ static int adjust_ptr_min_max_vals(struct bpf_verifier_env *env, } dst_reg->var_off = tnum_add(ptr_reg->var_off, off_reg->var_off); dst_reg->off = ptr_reg->off; - if (ptr_reg->type == PTR_TO_PACKET) { + if (reg_is_pkt_pointer(ptr_reg)) { dst_reg->id = ++env->id_gen; /* something was added to pkt_ptr, set range to zero */ dst_reg->range = 0; @@ -1931,7 +1967,7 @@ static int adjust_ptr_min_max_vals(struct bpf_verifier_env *env, } dst_reg->var_off = tnum_sub(ptr_reg->var_off, off_reg->var_off); dst_reg->off = ptr_reg->off; - if (ptr_reg->type == PTR_TO_PACKET) { + if (reg_is_pkt_pointer(ptr_reg)) { dst_reg->id = ++env->id_gen; /* something was added to pkt_ptr, set range to zero */ if (smin_val < 0) @@ -2421,7 +2457,8 @@ static int check_alu_op(struct bpf_verifier_env *env, struct bpf_insn *insn) } static void find_good_pkt_pointers(struct bpf_verifier_state *state, - struct bpf_reg_state *dst_reg) + struct bpf_reg_state *dst_reg, + enum bpf_reg_type type) { struct bpf_reg_state *regs = state->regs, *reg; int i; @@ -2483,7 +2520,7 @@ static void find_good_pkt_pointers(struct bpf_verifier_state *state, * dst_reg->off is known < MAX_PACKET_OFF, therefore it fits in a u16. */ for (i = 0; i < MAX_BPF_REG; i++) - if (regs[i].type == PTR_TO_PACKET && regs[i].id == dst_reg->id) + if (regs[i].type == type && regs[i].id == dst_reg->id) /* keep the maximum range already checked */ regs[i].range = max_t(u16, regs[i].range, dst_reg->off); @@ -2491,7 +2528,7 @@ static void find_good_pkt_pointers(struct bpf_verifier_state *state, if (state->stack_slot_type[i] != STACK_SPILL) continue; reg = &state->spilled_regs[i / BPF_REG_SIZE]; - if (reg->type == PTR_TO_PACKET && reg->id == dst_reg->id) + if (reg->type == type && reg->id == dst_reg->id) reg->range = max_t(u16, reg->range, dst_reg->off); } } @@ -2856,19 +2893,39 @@ static int check_cond_jmp_op(struct bpf_verifier_env *env, } else if (BPF_SRC(insn->code) == BPF_X && opcode == BPF_JGT && dst_reg->type == PTR_TO_PACKET && regs[insn->src_reg].type == PTR_TO_PACKET_END) { - find_good_pkt_pointers(this_branch, dst_reg); + find_good_pkt_pointers(this_branch, dst_reg, PTR_TO_PACKET); } else if (BPF_SRC(insn->code) == BPF_X && opcode == BPF_JLT && dst_reg->type == PTR_TO_PACKET && regs[insn->src_reg].type == PTR_TO_PACKET_END) { - find_good_pkt_pointers(other_branch, dst_reg); + find_good_pkt_pointers(other_branch, dst_reg, PTR_TO_PACKET); } else if (BPF_SRC(insn->code) == BPF_X && opcode == BPF_JGE && dst_reg->type == PTR_TO_PACKET_END && regs[insn->src_reg].type == PTR_TO_PACKET) { - find_good_pkt_pointers(other_branch, ®s[insn->src_reg]); + find_good_pkt_pointers(other_branch, ®s[insn->src_reg], + PTR_TO_PACKET); } else if (BPF_SRC(insn->code) == BPF_X && opcode == BPF_JLE && dst_reg->type == PTR_TO_PACKET_END && regs[insn->src_reg].type == PTR_TO_PACKET) { - find_good_pkt_pointers(this_branch, ®s[insn->src_reg]); + find_good_pkt_pointers(this_branch, ®s[insn->src_reg], + PTR_TO_PACKET); + } else if (BPF_SRC(insn->code) == BPF_X && opcode == BPF_JGT && + dst_reg->type == PTR_TO_PACKET_META && + reg_is_init_pkt_pointer(®s[insn->src_reg], PTR_TO_PACKET)) { + find_good_pkt_pointers(this_branch, dst_reg, PTR_TO_PACKET_META); + } else if (BPF_SRC(insn->code) == BPF_X && opcode == BPF_JLT && + dst_reg->type == PTR_TO_PACKET_META && + reg_is_init_pkt_pointer(®s[insn->src_reg], PTR_TO_PACKET)) { + find_good_pkt_pointers(other_branch, dst_reg, PTR_TO_PACKET_META); + } else if (BPF_SRC(insn->code) == BPF_X && opcode == BPF_JGE && + reg_is_init_pkt_pointer(dst_reg, PTR_TO_PACKET) && + regs[insn->src_reg].type == PTR_TO_PACKET_META) { + find_good_pkt_pointers(other_branch, ®s[insn->src_reg], + PTR_TO_PACKET_META); + } else if (BPF_SRC(insn->code) == BPF_X && opcode == BPF_JLE && + reg_is_init_pkt_pointer(dst_reg, PTR_TO_PACKET) && + regs[insn->src_reg].type == PTR_TO_PACKET_META) { + find_good_pkt_pointers(this_branch, ®s[insn->src_reg], + PTR_TO_PACKET_META); } else if (is_pointer_value(env, insn->dst_reg)) { verbose("R%d pointer comparison prohibited\n", insn->dst_reg); return -EACCES; @@ -3298,8 +3355,9 @@ static bool regsafe(struct bpf_reg_state *rold, struct bpf_reg_state *rcur, return false; /* Check our ids match any regs they're supposed to */ return check_ids(rold->id, rcur->id, idmap); + case PTR_TO_PACKET_META: case PTR_TO_PACKET: - if (rcur->type != PTR_TO_PACKET) + if (rcur->type != rold->type) return false; /* We must have at least as much range as the old ptr * did, so that any accesses which were safe before are diff --git a/net/bpf/test_run.c b/net/bpf/test_run.c index df672517b4fd..a86e6687026e 100644 --- a/net/bpf/test_run.c +++ b/net/bpf/test_run.c @@ -162,6 +162,7 @@ int bpf_prog_test_run_xdp(struct bpf_prog *prog, const union bpf_attr *kattr, xdp.data_hard_start = data; xdp.data = data + XDP_PACKET_HEADROOM + NET_IP_ALIGN; + xdp.data_meta = xdp.data; xdp.data_end = xdp.data + size; retval = bpf_test_run(prog, &xdp, repeat, &duration); diff --git a/net/core/dev.c b/net/core/dev.c index 97abddd9039a..e350c768d4b5 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -3864,8 +3864,8 @@ drop: static u32 netif_receive_generic_xdp(struct sk_buff *skb, struct bpf_prog *xdp_prog) { + u32 metalen, act = XDP_DROP; struct xdp_buff xdp; - u32 act = XDP_DROP; void *orig_data; int hlen, off; u32 mac_len; @@ -3876,8 +3876,25 @@ static u32 netif_receive_generic_xdp(struct sk_buff *skb, if (skb_cloned(skb)) return XDP_PASS; - if (skb_linearize(skb)) - goto do_drop; + /* XDP packets must be linear and must have sufficient headroom + * of XDP_PACKET_HEADROOM bytes. This is the guarantee that also + * native XDP provides, thus we need to do it here as well. + */ + if (skb_is_nonlinear(skb) || + skb_headroom(skb) < XDP_PACKET_HEADROOM) { + int hroom = XDP_PACKET_HEADROOM - skb_headroom(skb); + int troom = skb->tail + skb->data_len - skb->end; + + /* In case we have to go down the path and also linearize, + * then lets do the pskb_expand_head() work just once here. + */ + if (pskb_expand_head(skb, + hroom > 0 ? ALIGN(hroom, NET_SKB_PAD) : 0, + troom > 0 ? troom + 128 : 0, GFP_ATOMIC)) + goto do_drop; + if (troom > 0 && __skb_linearize(skb)) + goto do_drop; + } /* The XDP program wants to see the packet starting at the MAC * header. @@ -3885,6 +3902,7 @@ static u32 netif_receive_generic_xdp(struct sk_buff *skb, mac_len = skb->data - skb_mac_header(skb); hlen = skb_headlen(skb) + mac_len; xdp.data = skb->data - mac_len; + xdp.data_meta = xdp.data; xdp.data_end = xdp.data + hlen; xdp.data_hard_start = skb->data - skb_headroom(skb); orig_data = xdp.data; @@ -3902,10 +3920,12 @@ static u32 netif_receive_generic_xdp(struct sk_buff *skb, case XDP_REDIRECT: case XDP_TX: __skb_push(skb, mac_len); - /* fall through */ + break; case XDP_PASS: + metalen = xdp.data - xdp.data_meta; + if (metalen) + skb_metadata_set(skb, metalen); break; - default: bpf_warn_invalid_xdp_action(act); /* fall through */ @@ -4695,6 +4715,7 @@ static void gro_list_prepare(struct napi_struct *napi, struct sk_buff *skb) diffs = (unsigned long)p->dev ^ (unsigned long)skb->dev; diffs |= p->vlan_tci ^ skb->vlan_tci; diffs |= skb_metadata_dst_cmp(p, skb); + diffs |= skb_metadata_differs(p, skb); if (maclen == ETH_HLEN) diffs |= compare_ether_header(skb_mac_header(p), skb_mac_header(skb)); diff --git a/net/core/filter.c b/net/core/filter.c index c468e7cfad19..9b6e7e84aafd 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -2447,14 +2447,26 @@ static const struct bpf_func_proto bpf_skb_change_head_proto = { .arg3_type = ARG_ANYTHING, }; +static unsigned long xdp_get_metalen(const struct xdp_buff *xdp) +{ + return xdp_data_meta_unsupported(xdp) ? 0 : + xdp->data - xdp->data_meta; +} + BPF_CALL_2(bpf_xdp_adjust_head, struct xdp_buff *, xdp, int, offset) { + unsigned long metalen = xdp_get_metalen(xdp); + void *data_start = xdp->data_hard_start + metalen; void *data = xdp->data + offset; - if (unlikely(data < xdp->data_hard_start || + if (unlikely(data < data_start || data > xdp->data_end - ETH_HLEN)) return -EINVAL; + if (metalen) + memmove(xdp->data_meta + offset, + xdp->data_meta, metalen); + xdp->data_meta += offset; xdp->data = data; return 0; @@ -2468,6 +2480,33 @@ static const struct bpf_func_proto bpf_xdp_adjust_head_proto = { .arg2_type = ARG_ANYTHING, }; +BPF_CALL_2(bpf_xdp_adjust_meta, struct xdp_buff *, xdp, int, offset) +{ + void *meta = xdp->data_meta + offset; + unsigned long metalen = xdp->data - meta; + + if (xdp_data_meta_unsupported(xdp)) + return -ENOTSUPP; + if (unlikely(meta < xdp->data_hard_start || + meta > xdp->data)) + return -EINVAL; + if (unlikely((metalen & (sizeof(__u32) - 1)) || + (metalen > 32))) + return -EACCES; + + xdp->data_meta = meta; + + return 0; +} + +static const struct bpf_func_proto bpf_xdp_adjust_meta_proto = { + .func = bpf_xdp_adjust_meta, + .gpl_only = false, + .ret_type = RET_INTEGER, + .arg1_type = ARG_PTR_TO_CTX, + .arg2_type = ARG_ANYTHING, +}; + static int __bpf_tx_xdp(struct net_device *dev, struct bpf_map *map, struct xdp_buff *xdp, @@ -2692,7 +2731,8 @@ bool bpf_helper_changes_pkt_data(void *func) func == bpf_clone_redirect || func == bpf_l3_csum_replace || func == bpf_l4_csum_replace || - func == bpf_xdp_adjust_head) + func == bpf_xdp_adjust_head || + func == bpf_xdp_adjust_meta) return true; return false; @@ -3288,6 +3328,8 @@ xdp_func_proto(enum bpf_func_id func_id) return &bpf_get_smp_processor_id_proto; case BPF_FUNC_xdp_adjust_head: return &bpf_xdp_adjust_head_proto; + case BPF_FUNC_xdp_adjust_meta: + return &bpf_xdp_adjust_meta_proto; case BPF_FUNC_redirect: return &bpf_xdp_redirect_proto; case BPF_FUNC_redirect_map: @@ -3418,6 +3460,7 @@ static bool bpf_skb_is_valid_access(int off, int size, enum bpf_access_type type case bpf_ctx_range_till(struct __sk_buff, remote_ip4, remote_ip4): case bpf_ctx_range_till(struct __sk_buff, local_ip4, local_ip4): case bpf_ctx_range(struct __sk_buff, data): + case bpf_ctx_range(struct __sk_buff, data_meta): case bpf_ctx_range(struct __sk_buff, data_end): if (size != size_default) return false; @@ -3444,6 +3487,7 @@ static bool sk_filter_is_valid_access(int off, int size, switch (off) { case bpf_ctx_range(struct __sk_buff, tc_classid): case bpf_ctx_range(struct __sk_buff, data): + case bpf_ctx_range(struct __sk_buff, data_meta): case bpf_ctx_range(struct __sk_buff, data_end): case bpf_ctx_range_till(struct __sk_buff, family, local_port): return false; @@ -3468,6 +3512,7 @@ static bool lwt_is_valid_access(int off, int size, switch (off) { case bpf_ctx_range(struct __sk_buff, tc_classid): case bpf_ctx_range_till(struct __sk_buff, family, local_port): + case bpf_ctx_range(struct __sk_buff, data_meta): return false; } @@ -3586,6 +3631,9 @@ static bool tc_cls_act_is_valid_access(int off, int size, case bpf_ctx_range(struct __sk_buff, data): info->reg_type = PTR_TO_PACKET; break; + case bpf_ctx_range(struct __sk_buff, data_meta): + info->reg_type = PTR_TO_PACKET_META; + break; case bpf_ctx_range(struct __sk_buff, data_end): info->reg_type = PTR_TO_PACKET_END; break; @@ -3619,6 +3667,9 @@ static bool xdp_is_valid_access(int off, int size, case offsetof(struct xdp_md, data): info->reg_type = PTR_TO_PACKET; break; + case offsetof(struct xdp_md, data_meta): + info->reg_type = PTR_TO_PACKET_META; + break; case offsetof(struct xdp_md, data_end): info->reg_type = PTR_TO_PACKET_END; break; @@ -3677,6 +3728,12 @@ static bool sk_skb_is_valid_access(int off, int size, enum bpf_access_type type, struct bpf_insn_access_aux *info) { + switch (off) { + case bpf_ctx_range(struct __sk_buff, tc_classid): + case bpf_ctx_range(struct __sk_buff, data_meta): + return false; + } + if (type == BPF_WRITE) { switch (off) { case bpf_ctx_range(struct __sk_buff, mark): @@ -3689,8 +3746,6 @@ static bool sk_skb_is_valid_access(int off, int size, } switch (off) { - case bpf_ctx_range(struct __sk_buff, tc_classid): - return false; case bpf_ctx_range(struct __sk_buff, data): info->reg_type = PTR_TO_PACKET; break; @@ -3847,6 +3902,15 @@ static u32 bpf_convert_ctx_access(enum bpf_access_type type, offsetof(struct sk_buff, data)); break; + case offsetof(struct __sk_buff, data_meta): + off = si->off; + off -= offsetof(struct __sk_buff, data_meta); + off += offsetof(struct sk_buff, cb); + off += offsetof(struct bpf_skb_data_end, data_meta); + *insn++ = BPF_LDX_MEM(BPF_SIZEOF(void *), si->dst_reg, + si->src_reg, off); + break; + case offsetof(struct __sk_buff, data_end): off = si->off; off -= offsetof(struct __sk_buff, data_end); @@ -4095,6 +4159,11 @@ static u32 xdp_convert_ctx_access(enum bpf_access_type type, si->dst_reg, si->src_reg, offsetof(struct xdp_buff, data)); break; + case offsetof(struct xdp_md, data_meta): + *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct xdp_buff, data_meta), + si->dst_reg, si->src_reg, + offsetof(struct xdp_buff, data_meta)); + break; case offsetof(struct xdp_md, data_end): *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct xdp_buff, data_end), si->dst_reg, si->src_reg, diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 000ce735fa8d..d98c2e3ce2bf 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -1509,6 +1509,8 @@ int pskb_expand_head(struct sk_buff *skb, int nhead, int ntail, skb->nohdr = 0; atomic_set(&skb_shinfo(skb)->dataref, 1); + skb_metadata_clear(skb); + /* It is not generally safe to change skb->truesize. * For the moment, we really care of rx path, or * when skb is orphaned (not attached to a socket). -- cgit v1.2.3 From ac29991ba137cc0e3b0f647fb41e79300230f15c Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Mon, 25 Sep 2017 02:25:52 +0200 Subject: bpf: update bpf.h uapi header for tools Looks like a couple of updates missed to get carried into tools/include/uapi/, so copy the bpf.h header as usual to pull in latest updates. Signed-off-by: Daniel Borkmann Acked-by: Alexei Starovoitov Acked-by: John Fastabend Signed-off-by: David S. Miller --- tools/include/uapi/linux/bpf.h | 45 ++++++++++++++++++++++++++++++------------ 1 file changed, 32 insertions(+), 13 deletions(-) diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index 461811e57140..e43491ac4823 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -143,12 +143,6 @@ enum bpf_attach_type { #define MAX_BPF_ATTACH_TYPE __MAX_BPF_ATTACH_TYPE -enum bpf_sockmap_flags { - BPF_SOCKMAP_UNSPEC, - BPF_SOCKMAP_STRPARSER, - __MAX_BPF_SOCKMAP_FLAG -}; - /* If BPF_F_ALLOW_OVERRIDE flag is used in BPF_PROG_ATTACH command * to the given target_fd cgroup the descendent cgroup will be able to * override effective bpf program that was inherited from this cgroup @@ -368,9 +362,20 @@ union bpf_attr { * int bpf_redirect(ifindex, flags) * redirect to another netdev * @ifindex: ifindex of the net device - * @flags: bit 0 - if set, redirect to ingress instead of egress - * other bits - reserved - * Return: TC_ACT_REDIRECT + * @flags: + * cls_bpf: + * bit 0 - if set, redirect to ingress instead of egress + * other bits - reserved + * xdp_bpf: + * all bits - reserved + * Return: cls_bpf: TC_ACT_REDIRECT on success or TC_ACT_SHOT on error + * xdp_bfp: XDP_REDIRECT on success or XDP_ABORT on error + * int bpf_redirect_map(map, key, flags) + * redirect to endpoint in map + * @map: pointer to dev map + * @key: index in map to lookup + * @flags: -- + * Return: XDP_REDIRECT on success or XDP_ABORT on error * * u32 bpf_get_route_realm(skb) * retrieve a dst's tclassid @@ -577,6 +582,12 @@ union bpf_attr { * @map: pointer to sockmap to update * @key: key to insert/update sock in map * @flags: same flags as map update elem + * + * int bpf_xdp_adjust_meta(xdp_md, delta) + * Adjust the xdp_md.data_meta by delta + * @xdp_md: pointer to xdp_md + * @delta: An positive/negative integer to be added to xdp_md.data_meta + * Return: 0 on success or negative on error */ #define __BPF_FUNC_MAPPER(FN) \ FN(unspec), \ @@ -632,7 +643,8 @@ union bpf_attr { FN(skb_adjust_room), \ FN(redirect_map), \ FN(sk_redirect_map), \ - FN(sock_map_update), + FN(sock_map_update), \ + FN(xdp_adjust_meta), /* integer value in 'imm' field of BPF_CALL instruction selects which helper * function eBPF program intends to call @@ -710,7 +722,7 @@ struct __sk_buff { __u32 data_end; __u32 napi_id; - /* accessed by BPF_PROG_TYPE_sk_skb types */ + /* Accessed by BPF_PROG_TYPE_sk_skb types from here to ... */ __u32 family; __u32 remote_ip4; /* Stored in network byte order */ __u32 local_ip4; /* Stored in network byte order */ @@ -718,6 +730,9 @@ struct __sk_buff { __u32 local_ip6[4]; /* Stored in network byte order */ __u32 remote_port; /* Stored in network byte order */ __u32 local_port; /* stored in host byte order */ + /* ... here. */ + + __u32 data_meta; }; struct bpf_tunnel_key { @@ -753,20 +768,23 @@ struct bpf_sock { __u32 family; __u32 type; __u32 protocol; + __u32 mark; + __u32 priority; }; #define XDP_PACKET_HEADROOM 256 /* User return codes for XDP prog type. * A valid XDP program must return one of these defined values. All other - * return codes are reserved for future use. Unknown return codes will result - * in packet drop. + * return codes are reserved for future use. Unknown return codes will + * result in packet drops and a warning via bpf_warn_invalid_xdp_action(). */ enum xdp_action { XDP_ABORTED = 0, XDP_DROP, XDP_PASS, XDP_TX, + XDP_REDIRECT, }; /* user accessible metadata for XDP packet hook @@ -775,6 +793,7 @@ enum xdp_action { struct xdp_md { __u32 data; __u32 data_end; + __u32 data_meta; }; enum sk_action { -- cgit v1.2.3 From 22c8852624fc90a90709d1237625ca57999c5092 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Mon, 25 Sep 2017 02:25:53 +0200 Subject: bpf: improve selftests and add tests for meta pointer Add various test_verifier selftests, and a simple xdp/tc functional test that is being attached to veths. Also let new versions of clang use the recently added -mcpu=probe support [1] for the BPF target, so that it can probe the underlying kernel for BPF insn set extensions. We could also just set this options always, where older versions just ignore it and give a note to the user that the -mcpu value is not supported, but given emitting the note cannot be turned off from clang side lets not confuse users running selftests with it, thus fallback to the default generic one when we see that clang doesn't support it. Also allow CPU option to be overridden in the Makefile from command line. [1] https://github.com/llvm-mirror/llvm/commit/d7276a40d87b89aed89978dec6457a5b8b3a0db5 Signed-off-by: Daniel Borkmann Acked-by: Alexei Starovoitov Acked-by: John Fastabend Signed-off-by: David S. Miller --- tools/testing/selftests/bpf/Makefile | 21 ++- tools/testing/selftests/bpf/bpf_helpers.h | 2 + tools/testing/selftests/bpf/test_verifier.c | 247 +++++++++++++++++++++++++++ tools/testing/selftests/bpf/test_xdp_meta.c | 53 ++++++ tools/testing/selftests/bpf/test_xdp_meta.sh | 51 ++++++ 5 files changed, 370 insertions(+), 4 deletions(-) create mode 100644 tools/testing/selftests/bpf/test_xdp_meta.c create mode 100755 tools/testing/selftests/bpf/test_xdp_meta.sh diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile index f4b23d697448..924af8d79bde 100644 --- a/tools/testing/selftests/bpf/Makefile +++ b/tools/testing/selftests/bpf/Makefile @@ -15,9 +15,10 @@ TEST_GEN_PROGS = test_verifier test_tag test_maps test_lru_map test_lpm_map test test_align TEST_GEN_FILES = test_pkt_access.o test_xdp.o test_l4lb.o test_tcp_estats.o test_obj_id.o \ - test_pkt_md_access.o test_xdp_redirect.o sockmap_parse_prog.o sockmap_verdict_prog.o + test_pkt_md_access.o test_xdp_redirect.o test_xdp_meta.o sockmap_parse_prog.o \ + sockmap_verdict_prog.o -TEST_PROGS := test_kmod.sh test_xdp_redirect.sh +TEST_PROGS := test_kmod.sh test_xdp_redirect.sh test_xdp_meta.sh include ../lib.mk @@ -34,8 +35,20 @@ $(BPFOBJ): force $(MAKE) -C $(BPFDIR) OUTPUT=$(OUTPUT)/ CLANG ?= clang +LLC ?= llc + +PROBE := $(shell llc -march=bpf -mcpu=probe -filetype=null /dev/null 2>&1) + +# Let newer LLVM versions transparently probe the kernel for availability +# of full BPF instruction set. +ifeq ($(PROBE),) + CPU ?= probe +else + CPU ?= generic +endif %.o: %.c $(CLANG) -I. -I./include/uapi -I../../../include/uapi \ - -Wno-compare-distinct-pointer-types \ - -O2 -target bpf -c $< -o $@ + -Wno-compare-distinct-pointer-types \ + -O2 -target bpf -emit-llvm -c $< -o - | \ + $(LLC) -march=bpf -mcpu=$(CPU) -filetype=obj -o $@ diff --git a/tools/testing/selftests/bpf/bpf_helpers.h b/tools/testing/selftests/bpf/bpf_helpers.h index 4875395b0b52..a56053db26f5 100644 --- a/tools/testing/selftests/bpf/bpf_helpers.h +++ b/tools/testing/selftests/bpf/bpf_helpers.h @@ -62,6 +62,8 @@ static unsigned long long (*bpf_get_prandom_u32)(void) = (void *) BPF_FUNC_get_prandom_u32; static int (*bpf_xdp_adjust_head)(void *ctx, int offset) = (void *) BPF_FUNC_xdp_adjust_head; +static int (*bpf_xdp_adjust_meta)(void *ctx, int offset) = + (void *) BPF_FUNC_xdp_adjust_meta; static int (*bpf_setsockopt)(void *ctx, int level, int optname, void *optval, int optlen) = (void *) BPF_FUNC_setsockopt; diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c index 26f3250bdcd2..a0426147523d 100644 --- a/tools/testing/selftests/bpf/test_verifier.c +++ b/tools/testing/selftests/bpf/test_verifier.c @@ -6645,6 +6645,253 @@ static struct bpf_test tests[] = { .errstr = "BPF_END uses reserved fields", .result = REJECT, }, + { + "meta access, test1", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct xdp_md, data_meta)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct xdp_md, data)), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8), + BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 1), + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_2, 0), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_XDP, + }, + { + "meta access, test2", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct xdp_md, data_meta)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct xdp_md, data)), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_2), + BPF_ALU64_IMM(BPF_SUB, BPF_REG_0, 8), + BPF_MOV64_REG(BPF_REG_4, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, 8), + BPF_JMP_REG(BPF_JGT, BPF_REG_4, BPF_REG_3, 1), + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, 0), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = REJECT, + .errstr = "invalid access to packet, off=-8", + .prog_type = BPF_PROG_TYPE_XDP, + }, + { + "meta access, test3", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct xdp_md, data_meta)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct xdp_md, data_end)), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8), + BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 1), + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_2, 0), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = REJECT, + .errstr = "invalid access to packet", + .prog_type = BPF_PROG_TYPE_XDP, + }, + { + "meta access, test4", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct xdp_md, data_meta)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct xdp_md, data_end)), + BPF_LDX_MEM(BPF_W, BPF_REG_4, BPF_REG_1, + offsetof(struct xdp_md, data)), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_4), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8), + BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 1), + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_2, 0), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = REJECT, + .errstr = "invalid access to packet", + .prog_type = BPF_PROG_TYPE_XDP, + }, + { + "meta access, test5", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct xdp_md, data_meta)), + BPF_LDX_MEM(BPF_W, BPF_REG_4, BPF_REG_1, + offsetof(struct xdp_md, data)), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_3), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8), + BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_4, 3), + BPF_MOV64_IMM(BPF_REG_2, -8), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_xdp_adjust_meta), + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_3, 0), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = REJECT, + .errstr = "R3 !read_ok", + .prog_type = BPF_PROG_TYPE_XDP, + }, + { + "meta access, test6", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct xdp_md, data_meta)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct xdp_md, data)), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_3), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8), + BPF_MOV64_REG(BPF_REG_4, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, 8), + BPF_JMP_REG(BPF_JGT, BPF_REG_4, BPF_REG_0, 1), + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_2, 0), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = REJECT, + .errstr = "invalid access to packet", + .prog_type = BPF_PROG_TYPE_XDP, + }, + { + "meta access, test7", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct xdp_md, data_meta)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct xdp_md, data)), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_3), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8), + BPF_MOV64_REG(BPF_REG_4, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, 8), + BPF_JMP_REG(BPF_JGT, BPF_REG_4, BPF_REG_3, 1), + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_2, 0), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_XDP, + }, + { + "meta access, test8", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct xdp_md, data_meta)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct xdp_md, data)), + BPF_MOV64_REG(BPF_REG_4, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, 0xFFFF), + BPF_JMP_REG(BPF_JGT, BPF_REG_4, BPF_REG_3, 1), + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_2, 0), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_XDP, + }, + { + "meta access, test9", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct xdp_md, data_meta)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct xdp_md, data)), + BPF_MOV64_REG(BPF_REG_4, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, 0xFFFF), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, 1), + BPF_JMP_REG(BPF_JGT, BPF_REG_4, BPF_REG_3, 1), + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_2, 0), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = REJECT, + .errstr = "invalid access to packet", + .prog_type = BPF_PROG_TYPE_XDP, + }, + { + "meta access, test10", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct xdp_md, data_meta)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct xdp_md, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_4, BPF_REG_1, + offsetof(struct xdp_md, data_end)), + BPF_MOV64_IMM(BPF_REG_5, 42), + BPF_MOV64_IMM(BPF_REG_6, 24), + BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_5, -8), + BPF_STX_XADD(BPF_DW, BPF_REG_10, BPF_REG_6, -8), + BPF_LDX_MEM(BPF_DW, BPF_REG_5, BPF_REG_10, -8), + BPF_JMP_IMM(BPF_JGT, BPF_REG_5, 100, 6), + BPF_ALU64_REG(BPF_ADD, BPF_REG_3, BPF_REG_5), + BPF_MOV64_REG(BPF_REG_5, BPF_REG_3), + BPF_MOV64_REG(BPF_REG_6, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, 8), + BPF_JMP_REG(BPF_JGT, BPF_REG_6, BPF_REG_5, 1), + BPF_LDX_MEM(BPF_B, BPF_REG_2, BPF_REG_2, 0), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = REJECT, + .errstr = "invalid access to packet", + .prog_type = BPF_PROG_TYPE_XDP, + }, + { + "meta access, test11", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct xdp_md, data_meta)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct xdp_md, data)), + BPF_MOV64_IMM(BPF_REG_5, 42), + BPF_MOV64_IMM(BPF_REG_6, 24), + BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_5, -8), + BPF_STX_XADD(BPF_DW, BPF_REG_10, BPF_REG_6, -8), + BPF_LDX_MEM(BPF_DW, BPF_REG_5, BPF_REG_10, -8), + BPF_JMP_IMM(BPF_JGT, BPF_REG_5, 100, 6), + BPF_ALU64_REG(BPF_ADD, BPF_REG_2, BPF_REG_5), + BPF_MOV64_REG(BPF_REG_5, BPF_REG_2), + BPF_MOV64_REG(BPF_REG_6, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, 8), + BPF_JMP_REG(BPF_JGT, BPF_REG_6, BPF_REG_3, 1), + BPF_LDX_MEM(BPF_B, BPF_REG_5, BPF_REG_5, 0), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_XDP, + }, + { + "meta access, test12", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct xdp_md, data_meta)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct xdp_md, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_4, BPF_REG_1, + offsetof(struct xdp_md, data_end)), + BPF_MOV64_REG(BPF_REG_5, BPF_REG_3), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_5, 16), + BPF_JMP_REG(BPF_JGT, BPF_REG_5, BPF_REG_4, 5), + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_3, 0), + BPF_MOV64_REG(BPF_REG_5, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_5, 16), + BPF_JMP_REG(BPF_JGT, BPF_REG_5, BPF_REG_3, 1), + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_2, 0), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_XDP, + }, }; static int probe_filter_length(const struct bpf_insn *fp) diff --git a/tools/testing/selftests/bpf/test_xdp_meta.c b/tools/testing/selftests/bpf/test_xdp_meta.c new file mode 100644 index 000000000000..8d0182650653 --- /dev/null +++ b/tools/testing/selftests/bpf/test_xdp_meta.c @@ -0,0 +1,53 @@ +#include +#include +#include + +#include "bpf_helpers.h" + +#define __round_mask(x, y) ((__typeof__(x))((y) - 1)) +#define round_up(x, y) ((((x) - 1) | __round_mask(x, y)) + 1) +#define ctx_ptr(ctx, mem) (void *)(unsigned long)ctx->mem + +SEC("t") +int ing_cls(struct __sk_buff *ctx) +{ + __u8 *data, *data_meta, *data_end; + __u32 diff = 0; + + data_meta = ctx_ptr(ctx, data_meta); + data_end = ctx_ptr(ctx, data_end); + data = ctx_ptr(ctx, data); + + if (data + ETH_ALEN > data_end || + data_meta + round_up(ETH_ALEN, 4) > data) + return TC_ACT_SHOT; + + diff |= ((__u32 *)data_meta)[0] ^ ((__u32 *)data)[0]; + diff |= ((__u16 *)data_meta)[2] ^ ((__u16 *)data)[2]; + + return diff ? TC_ACT_SHOT : TC_ACT_OK; +} + +SEC("x") +int ing_xdp(struct xdp_md *ctx) +{ + __u8 *data, *data_meta, *data_end; + int ret; + + ret = bpf_xdp_adjust_meta(ctx, -round_up(ETH_ALEN, 4)); + if (ret < 0) + return XDP_DROP; + + data_meta = ctx_ptr(ctx, data_meta); + data_end = ctx_ptr(ctx, data_end); + data = ctx_ptr(ctx, data); + + if (data + ETH_ALEN > data_end || + data_meta + round_up(ETH_ALEN, 4) > data) + return XDP_DROP; + + __builtin_memcpy(data_meta, data, ETH_ALEN); + return XDP_PASS; +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/test_xdp_meta.sh b/tools/testing/selftests/bpf/test_xdp_meta.sh new file mode 100755 index 000000000000..307aa856cee3 --- /dev/null +++ b/tools/testing/selftests/bpf/test_xdp_meta.sh @@ -0,0 +1,51 @@ +#!/bin/sh + +cleanup() +{ + if [ "$?" = "0" ]; then + echo "selftests: test_xdp_meta [PASS]"; + else + echo "selftests: test_xdp_meta [FAILED]"; + fi + + set +e + ip netns del ns1 2> /dev/null + ip netns del ns2 2> /dev/null +} + +ip link set dev lo xdp off 2>/dev/null > /dev/null +if [ $? -ne 0 ];then + echo "selftests: [SKIP] Could not run test without the ip xdp support" + exit 0 +fi +set -e + +ip netns add ns1 +ip netns add ns2 + +trap cleanup 0 2 3 6 9 + +ip link add veth1 type veth peer name veth2 + +ip link set veth1 netns ns1 +ip link set veth2 netns ns2 + +ip netns exec ns1 ip addr add 10.1.1.11/24 dev veth1 +ip netns exec ns2 ip addr add 10.1.1.22/24 dev veth2 + +ip netns exec ns1 tc qdisc add dev veth1 clsact +ip netns exec ns2 tc qdisc add dev veth2 clsact + +ip netns exec ns1 tc filter add dev veth1 ingress bpf da obj test_xdp_meta.o sec t +ip netns exec ns2 tc filter add dev veth2 ingress bpf da obj test_xdp_meta.o sec t + +ip netns exec ns1 ip link set dev veth1 xdp obj test_xdp_meta.o sec x +ip netns exec ns2 ip link set dev veth2 xdp obj test_xdp_meta.o sec x + +ip netns exec ns1 ip link set dev veth1 up +ip netns exec ns2 ip link set dev veth2 up + +ip netns exec ns1 ping -c 1 10.1.1.22 +ip netns exec ns2 ping -c 1 10.1.1.11 + +exit 0 -- cgit v1.2.3 From 65d88fd0baaa5c9def9383ac696097911d4ceb73 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Mon, 25 Sep 2017 02:25:54 +0200 Subject: bpf, nfp: add meta data support Implement support for transferring XDP meta data into skb for nfp driver; before calling into the program, xdp.data_meta points to xdp.data, where on program return with pass verdict, we call into skb_metadata_set(). Signed-off-by: Daniel Borkmann Acked-by: Alexei Starovoitov Acked-by: John Fastabend Reviewed-by: Jakub Kicinski Signed-off-by: David S. Miller --- .../net/ethernet/netronome/nfp/nfp_net_common.c | 40 ++++++++-------------- 1 file changed, 15 insertions(+), 25 deletions(-) diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c index e3a38be3600a..d2f73feb8497 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c +++ b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c @@ -1574,27 +1574,6 @@ nfp_net_tx_xdp_buf(struct nfp_net_dp *dp, struct nfp_net_rx_ring *rx_ring, return true; } -static int nfp_net_run_xdp(struct bpf_prog *prog, void *data, void *hard_start, - unsigned int *off, unsigned int *len) -{ - struct xdp_buff xdp; - void *orig_data; - int ret; - - xdp.data_hard_start = hard_start; - xdp.data = data + *off; - xdp_set_data_meta_invalid(&xdp); - xdp.data_end = data + *off + *len; - - orig_data = xdp.data; - ret = bpf_prog_run_xdp(prog, &xdp); - - *len -= xdp.data - orig_data; - *off += xdp.data - orig_data; - - return ret; -} - /** * nfp_net_rx() - receive up to @budget packets on @rx_ring * @rx_ring: RX ring to receive from @@ -1630,6 +1609,7 @@ static int nfp_net_rx(struct nfp_net_rx_ring *rx_ring, int budget) struct nfp_meta_parsed meta; struct net_device *netdev; dma_addr_t new_dma_addr; + u32 meta_len_xdp = 0; void *new_frag; idx = D_IDX(rx_ring, rx_ring->rd_p); @@ -1708,16 +1688,24 @@ static int nfp_net_rx(struct nfp_net_rx_ring *rx_ring, int budget) if (xdp_prog && !(rxd->rxd.flags & PCIE_DESC_RX_BPF && dp->bpf_offload_xdp) && !meta.portid) { + void *orig_data = rxbuf->frag + pkt_off; unsigned int dma_off; - void *hard_start; + struct xdp_buff xdp; int act; - hard_start = rxbuf->frag + NFP_NET_RX_BUF_HEADROOM; + xdp.data_hard_start = rxbuf->frag + NFP_NET_RX_BUF_HEADROOM; + xdp.data = orig_data; + xdp.data_meta = orig_data; + xdp.data_end = orig_data + pkt_len; + + act = bpf_prog_run_xdp(xdp_prog, &xdp); + + pkt_len -= xdp.data - orig_data; + pkt_off += xdp.data - orig_data; - act = nfp_net_run_xdp(xdp_prog, rxbuf->frag, hard_start, - &pkt_off, &pkt_len); switch (act) { case XDP_PASS: + meta_len_xdp = xdp.data - xdp.data_meta; break; case XDP_TX: dma_off = pkt_off - NFP_NET_RX_BUF_HEADROOM; @@ -1785,6 +1773,8 @@ static int nfp_net_rx(struct nfp_net_rx_ring *rx_ring, int budget) if (rxd->rxd.flags & PCIE_DESC_RX_VLAN) __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), le16_to_cpu(rxd->rxd.vlan)); + if (meta_len_xdp) + skb_metadata_set(skb, meta_len_xdp); napi_gro_receive(&rx_ring->r_vec->napi, skb); } -- cgit v1.2.3 From 366a88fe2f40d6772985ec78cdd34df7f109bb88 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Mon, 25 Sep 2017 02:25:55 +0200 Subject: bpf, ixgbe: add meta data support Implement support for transferring XDP meta data into skb for ixgbe driver; before calling into the program, xdp.data_meta points to xdp.data, where on program return with pass verdict, we call into skb_metadata_set(). We implement this for the default ixgbe_build_skb() variant. For the ixgbe_construct_skb() that is used when legacy-rx buffer mananagement mode is turned on via ethtool, I found that XDP gets 0 headroom, so neither xdp_adjust_head() nor xdp_adjust_meta() can be used with this. Just add a comment with explanation for this operating mode. Signed-off-by: Daniel Borkmann Acked-by: Alexei Starovoitov Acked-by: John Fastabend Signed-off-by: David S. Miller --- drivers/net/ethernet/intel/ixgbe/ixgbe_main.c | 30 +++++++++++++++++++++++---- 1 file changed, 26 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c index 04bb03bda1cd..3942c6208745 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c @@ -2133,6 +2133,21 @@ static struct sk_buff *ixgbe_construct_skb(struct ixgbe_ring *rx_ring, #if L1_CACHE_BYTES < 128 prefetch(xdp->data + L1_CACHE_BYTES); #endif + /* Note, we get here by enabling legacy-rx via: + * + * ethtool --set-priv-flags legacy-rx on + * + * In this mode, we currently get 0 extra XDP headroom as + * opposed to having legacy-rx off, where we process XDP + * packets going to stack via ixgbe_build_skb(). The latter + * provides us currently with 192 bytes of headroom. + * + * For ixgbe_construct_skb() mode it means that the + * xdp->data_meta will always point to xdp->data, since + * the helper cannot expand the head. Should this ever + * change in future for legacy-rx mode on, then lets also + * add xdp->data_meta handling here. + */ /* allocate a skb to store the frags */ skb = napi_alloc_skb(&rx_ring->q_vector->napi, IXGBE_RX_HDR_SIZE); @@ -2165,6 +2180,7 @@ static struct sk_buff *ixgbe_build_skb(struct ixgbe_ring *rx_ring, struct xdp_buff *xdp, union ixgbe_adv_rx_desc *rx_desc) { + unsigned int metasize = xdp->data - xdp->data_meta; #if (PAGE_SIZE < 8192) unsigned int truesize = ixgbe_rx_pg_size(rx_ring) / 2; #else @@ -2174,10 +2190,14 @@ static struct sk_buff *ixgbe_build_skb(struct ixgbe_ring *rx_ring, #endif struct sk_buff *skb; - /* prefetch first cache line of first page */ - prefetch(xdp->data); + /* Prefetch first cache line of first page. If xdp->data_meta + * is unused, this points extactly as xdp->data, otherwise we + * likely have a consumer accessing first few bytes of meta + * data, and then actual data. + */ + prefetch(xdp->data_meta); #if L1_CACHE_BYTES < 128 - prefetch(xdp->data + L1_CACHE_BYTES); + prefetch(xdp->data_meta + L1_CACHE_BYTES); #endif /* build an skb to around the page buffer */ @@ -2188,6 +2208,8 @@ static struct sk_buff *ixgbe_build_skb(struct ixgbe_ring *rx_ring, /* update pointers within the skb to store the data */ skb_reserve(skb, xdp->data - xdp->data_hard_start); __skb_put(skb, xdp->data_end - xdp->data); + if (metasize) + skb_metadata_set(skb, metasize); /* record DMA address if this is the start of a chain of buffers */ if (!ixgbe_test_staterr(rx_desc, IXGBE_RXD_STAT_EOP)) @@ -2326,7 +2348,7 @@ static int ixgbe_clean_rx_irq(struct ixgbe_q_vector *q_vector, if (!skb) { xdp.data = page_address(rx_buffer->page) + rx_buffer->page_offset; - xdp_set_data_meta_invalid(&xdp); + xdp.data_meta = xdp.data; xdp.data_hard_start = xdp.data - ixgbe_rx_offset(rx_ring); xdp.data_end = xdp.data + size; -- cgit v1.2.3 From be2336ebfd7a1aec597a26f086fc4235ab87dd2c Mon Sep 17 00:00:00 2001 From: Arkadi Sharshevsky Date: Mon, 25 Sep 2017 10:32:21 +0200 Subject: mlxsw: spectrum_dpipe: Fix indentation in header description Fix indentation in mlxsw_meta header's description. Signed-off-by: Arkadi Sharshevsky Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- .../net/ethernet/mellanox/mlxsw/spectrum_dpipe.c | 23 ++++++++++++---------- 1 file changed, 13 insertions(+), 10 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_dpipe.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_dpipe.c index 51e6846da72b..91648094ab4c 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_dpipe.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_dpipe.c @@ -46,18 +46,21 @@ enum mlxsw_sp_field_metadata_id { }; static struct devlink_dpipe_field mlxsw_sp_dpipe_fields_metadata[] = { - { .name = "erif_port", - .id = MLXSW_SP_DPIPE_FIELD_METADATA_ERIF_PORT, - .bitwidth = 32, - .mapping_type = DEVLINK_DPIPE_FIELD_MAPPING_TYPE_IFINDEX, + { + .name = "erif_port", + .id = MLXSW_SP_DPIPE_FIELD_METADATA_ERIF_PORT, + .bitwidth = 32, + .mapping_type = DEVLINK_DPIPE_FIELD_MAPPING_TYPE_IFINDEX, }, - { .name = "l3_forward", - .id = MLXSW_SP_DPIPE_FIELD_METADATA_L3_FORWARD, - .bitwidth = 1, + { + .name = "l3_forward", + .id = MLXSW_SP_DPIPE_FIELD_METADATA_L3_FORWARD, + .bitwidth = 1, }, - { .name = "l3_drop", - .id = MLXSW_SP_DPIPE_FIELD_METADATA_L3_DROP, - .bitwidth = 1, + { + .name = "l3_drop", + .id = MLXSW_SP_DPIPE_FIELD_METADATA_L3_DROP, + .bitwidth = 1, }, }; -- cgit v1.2.3 From c0859d697c258f7c864e81bc1f83d1c274e7cf4c Mon Sep 17 00:00:00 2001 From: Arkadi Sharshevsky Date: Mon, 25 Sep 2017 10:32:22 +0200 Subject: mlxsw: Add fields for mlxsw's meta header for adjacency table This patch adds field for mlxsw's meta header which will be used to describe the match/action behavior of the adjacency table. The fields are: 1. Adj_index - The global index of the nexthop group in the adjacency table. 2. Adj_hash_index - Local index offset which is based on packets hash mod the nexthop group size. Signed-off-by: Arkadi Sharshevsky Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlxsw/spectrum_dpipe.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_dpipe.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_dpipe.c index 91648094ab4c..9253273a5c03 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_dpipe.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_dpipe.c @@ -43,6 +43,8 @@ enum mlxsw_sp_field_metadata_id { MLXSW_SP_DPIPE_FIELD_METADATA_ERIF_PORT, MLXSW_SP_DPIPE_FIELD_METADATA_L3_FORWARD, MLXSW_SP_DPIPE_FIELD_METADATA_L3_DROP, + MLXSW_SP_DPIPE_FIELD_METADATA_ADJ_INDEX, + MLXSW_SP_DPIPE_FIELD_METADATA_ADJ_HASH_INDEX, }; static struct devlink_dpipe_field mlxsw_sp_dpipe_fields_metadata[] = { @@ -62,6 +64,16 @@ static struct devlink_dpipe_field mlxsw_sp_dpipe_fields_metadata[] = { .id = MLXSW_SP_DPIPE_FIELD_METADATA_L3_DROP, .bitwidth = 1, }, + { + .name = "adj_index", + .id = MLXSW_SP_DPIPE_FIELD_METADATA_ADJ_INDEX, + .bitwidth = 32, + }, + { + .name = "adj_hash_index", + .id = MLXSW_SP_DPIPE_FIELD_METADATA_ADJ_HASH_INDEX, + .bitwidth = 32, + }, }; enum mlxsw_sp_dpipe_header_id { -- cgit v1.2.3 From dbe4598c1e929a24dc352a7dc523a3cc22a093f2 Mon Sep 17 00:00:00 2001 From: Arkadi Sharshevsky Date: Mon, 25 Sep 2017 10:32:23 +0200 Subject: mlxsw: spectrum_router: Keep nexthops in a linked list Keep nexthops in a linked list for easy access. Signed-off-by: Arkadi Sharshevsky Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c index 0bd93dc88ffa..0cd4b2a7d9d0 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c @@ -78,6 +78,7 @@ struct mlxsw_sp_router { struct rhashtable neigh_ht; struct rhashtable nexthop_group_ht; struct rhashtable nexthop_ht; + struct list_head nexthop_list; struct { struct mlxsw_sp_lpm_tree *trees; unsigned int tree_count; @@ -2028,6 +2029,7 @@ struct mlxsw_sp_nexthop_key { struct mlxsw_sp_nexthop { struct list_head neigh_list_node; /* member of neigh entry list */ struct list_head rif_list_node; + struct list_head router_list_node; struct mlxsw_sp_nexthop_group *nh_grp; /* pointer back to the group * this belongs to */ @@ -2784,6 +2786,8 @@ static int mlxsw_sp_nexthop4_init(struct mlxsw_sp *mlxsw_sp, if (err) return err; + list_add_tail(&nh->router_list_node, &mlxsw_sp->router->nexthop_list); + if (!dev) return 0; @@ -2807,6 +2811,7 @@ static void mlxsw_sp_nexthop4_fini(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_nexthop *nh) { mlxsw_sp_nexthop4_type_fini(mlxsw_sp, nh); + list_del(&nh->router_list_node); mlxsw_sp_nexthop_remove(mlxsw_sp, nh); } @@ -4045,6 +4050,8 @@ static int mlxsw_sp_nexthop6_init(struct mlxsw_sp *mlxsw_sp, nh->nh_grp = nh_grp; memcpy(&nh->gw_addr, &rt->rt6i_gateway, sizeof(nh->gw_addr)); + list_add_tail(&nh->router_list_node, &mlxsw_sp->router->nexthop_list); + if (!dev) return 0; nh->ifindex = dev->ifindex; @@ -4056,6 +4063,7 @@ static void mlxsw_sp_nexthop6_fini(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_nexthop *nh) { mlxsw_sp_nexthop6_type_fini(mlxsw_sp, nh); + list_del(&nh->router_list_node); } static bool mlxsw_sp_rt6_is_gateway(const struct mlxsw_sp *mlxsw_sp, @@ -5990,6 +5998,7 @@ int mlxsw_sp_router_init(struct mlxsw_sp *mlxsw_sp) if (err) goto err_nexthop_group_ht_init; + INIT_LIST_HEAD(&mlxsw_sp->router->nexthop_list); err = mlxsw_sp_lpm_init(mlxsw_sp); if (err) goto err_lpm_init; -- cgit v1.2.3 From ec2437f42b44edc84054feb943d49e8030154c38 Mon Sep 17 00:00:00 2001 From: Arkadi Sharshevsky Date: Mon, 25 Sep 2017 10:32:24 +0200 Subject: mlxsw: spectrum_router: Use helper to check for last neighbor Use list_is_last helper to check for last neighbor. Signed-off-by: Arkadi Sharshevsky Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c index 0cd4b2a7d9d0..65e59a989084 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c @@ -1317,7 +1317,7 @@ mlxsw_sp_rif_neigh_next(struct mlxsw_sp_rif *rif, typeof(*neigh_entry), rif_list_node); } - if (neigh_entry->rif_list_node.next == &rif->neigh_list) + if (list_is_last(&neigh_entry->rif_list_node, &rif->neigh_list)) return NULL; return list_next_entry(neigh_entry, rif_list_node); } -- cgit v1.2.3 From c556cd28930661f337d7989fe74ac31871fd3888 Mon Sep 17 00:00:00 2001 From: Arkadi Sharshevsky Date: Mon, 25 Sep 2017 10:32:25 +0200 Subject: mlxsw: spectrum_router: Add helpers for nexthop access This is done as a preparation before introducing the ability to dump the adjacency table via dpipe, and to count the table size. The current table implementation avoids tunnel entries, thus a helper for checking if the nexthop group contains tunnel entries is also provided. The mlxsw's nexthop representative struct stays private to the router module. Signed-off-by: Arkadi Sharshevsky Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- .../net/ethernet/mellanox/mlxsw/spectrum_router.c | 71 ++++++++++++++++++++++ .../net/ethernet/mellanox/mlxsw/spectrum_router.h | 12 ++++ 2 files changed, 83 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c index 65e59a989084..c062b4f666e3 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c @@ -2068,6 +2068,77 @@ struct mlxsw_sp_nexthop_group { #define nh_rif nexthops[0].rif }; +struct mlxsw_sp_nexthop *mlxsw_sp_nexthop_next(struct mlxsw_sp_router *router, + struct mlxsw_sp_nexthop *nh) +{ + if (!nh) { + if (list_empty(&router->nexthop_list)) + return NULL; + else + return list_first_entry(&router->nexthop_list, + typeof(*nh), router_list_node); + } + if (list_is_last(&nh->router_list_node, &router->nexthop_list)) + return NULL; + return list_next_entry(nh, router_list_node); +} + +bool mlxsw_sp_nexthop_offload(struct mlxsw_sp_nexthop *nh) +{ + return nh->offloaded; +} + +unsigned char *mlxsw_sp_nexthop_ha(struct mlxsw_sp_nexthop *nh) +{ + if (!nh->offloaded) + return NULL; + return nh->neigh_entry->ha; +} + +int mlxsw_sp_nexthop_indexes(struct mlxsw_sp_nexthop *nh, u32 *p_adj_index, + u32 *p_adj_hash_index) +{ + struct mlxsw_sp_nexthop_group *nh_grp = nh->nh_grp; + u32 adj_hash_index = 0; + int i; + + if (!nh->offloaded || !nh_grp->adj_index_valid) + return -EINVAL; + + *p_adj_index = nh_grp->adj_index; + + for (i = 0; i < nh_grp->count; i++) { + struct mlxsw_sp_nexthop *nh_iter = &nh_grp->nexthops[i]; + + if (nh_iter == nh) + break; + if (nh_iter->offloaded) + adj_hash_index++; + } + + *p_adj_hash_index = adj_hash_index; + return 0; +} + +struct mlxsw_sp_rif *mlxsw_sp_nexthop_rif(struct mlxsw_sp_nexthop *nh) +{ + return nh->rif; +} + +bool mlxsw_sp_nexthop_group_has_ipip(struct mlxsw_sp_nexthop *nh) +{ + struct mlxsw_sp_nexthop_group *nh_grp = nh->nh_grp; + int i; + + for (i = 0; i < nh_grp->count; i++) { + struct mlxsw_sp_nexthop *nh_iter = &nh_grp->nexthops[i]; + + if (nh_iter->type == MLXSW_SP_NEXTHOP_TYPE_IPIP) + return true; + } + return false; +} + static struct fib_info * mlxsw_sp_nexthop4_group_fi(const struct mlxsw_sp_nexthop_group *nh_grp) { diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.h index ae4c99b3f2fc..d6951d516cf4 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.h +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.h @@ -62,6 +62,7 @@ enum mlxsw_sp_rif_counter_dir { }; struct mlxsw_sp_neigh_entry; +struct mlxsw_sp_nexthop; struct mlxsw_sp_rif *mlxsw_sp_rif_by_index(const struct mlxsw_sp *mlxsw_sp, u16 rif_index); @@ -108,5 +109,16 @@ union mlxsw_sp_l3addr mlxsw_sp_ipip_netdev_daddr(enum mlxsw_sp_l3proto proto, const struct net_device *ol_dev); __be32 mlxsw_sp_ipip_netdev_daddr4(const struct net_device *ol_dev); +struct mlxsw_sp_nexthop *mlxsw_sp_nexthop_next(struct mlxsw_sp_router *router, + struct mlxsw_sp_nexthop *nh); +bool mlxsw_sp_nexthop_offload(struct mlxsw_sp_nexthop *nh); +unsigned char *mlxsw_sp_nexthop_ha(struct mlxsw_sp_nexthop *nh); +int mlxsw_sp_nexthop_indexes(struct mlxsw_sp_nexthop *nh, u32 *p_adj_index, + u32 *p_adj_hash_index); +struct mlxsw_sp_rif *mlxsw_sp_nexthop_rif(struct mlxsw_sp_nexthop *nh); +bool mlxsw_sp_nexthop_group_has_ipip(struct mlxsw_sp_nexthop *nh); +#define mlxsw_sp_nexthop_for_each(nh, router) \ + for (nh = mlxsw_sp_nexthop_next(router, NULL); nh; \ + nh = mlxsw_sp_nexthop_next(router, nh)) #endif /* _MLXSW_ROUTER_H_*/ -- cgit v1.2.3 From c538adb3c6e77e0f6563b71923a81de182b5132c Mon Sep 17 00:00:00 2001 From: Arkadi Sharshevsky Date: Mon, 25 Sep 2017 10:32:26 +0200 Subject: mlxsw: spectrum_dpipe: Add initial support for the router adjacency table Add initial support for router adjacency table. The table does lookup based on the nexthop-group index and the local nexthop offset. After locating the nexthop entry it sets the destination MAC address and the egress RIF. Signed-off-by: Arkadi Sharshevsky Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- .../net/ethernet/mellanox/mlxsw/spectrum_dpipe.c | 100 ++++++++++++++++++++- .../net/ethernet/mellanox/mlxsw/spectrum_dpipe.h | 1 + 2 files changed, 100 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_dpipe.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_dpipe.c index 9253273a5c03..ca16f8924c0a 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_dpipe.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_dpipe.c @@ -841,6 +841,97 @@ static void mlxsw_sp_dpipe_host6_table_fini(struct mlxsw_sp *mlxsw_sp) MLXSW_SP_DPIPE_TABLE_NAME_HOST6); } +static int mlxsw_sp_dpipe_table_adj_matches_dump(void *priv, + struct sk_buff *skb) +{ + struct devlink_dpipe_match match = {0}; + int err; + + match.type = DEVLINK_DPIPE_MATCH_TYPE_FIELD_EXACT; + match.header = &mlxsw_sp_dpipe_header_metadata; + match.field_id = MLXSW_SP_DPIPE_FIELD_METADATA_ADJ_INDEX; + + err = devlink_dpipe_match_put(skb, &match); + if (err) + return err; + + match.type = DEVLINK_DPIPE_MATCH_TYPE_FIELD_EXACT; + match.header = &mlxsw_sp_dpipe_header_metadata; + match.field_id = MLXSW_SP_DPIPE_FIELD_METADATA_ADJ_HASH_INDEX; + + return devlink_dpipe_match_put(skb, &match); +} + +static int mlxsw_sp_dpipe_table_adj_actions_dump(void *priv, + struct sk_buff *skb) +{ + struct devlink_dpipe_action action = {0}; + int err; + + action.type = DEVLINK_DPIPE_ACTION_TYPE_FIELD_MODIFY; + action.header = &devlink_dpipe_header_ethernet; + action.field_id = DEVLINK_DPIPE_FIELD_ETHERNET_DST_MAC; + + err = devlink_dpipe_action_put(skb, &action); + if (err) + return err; + + action.type = DEVLINK_DPIPE_ACTION_TYPE_FIELD_MODIFY; + action.header = &mlxsw_sp_dpipe_header_metadata; + action.field_id = MLXSW_SP_DPIPE_FIELD_METADATA_ERIF_PORT; + + return devlink_dpipe_action_put(skb, &action); +} + +static u64 mlxsw_sp_dpipe_table_adj_size(struct mlxsw_sp *mlxsw_sp) +{ + struct mlxsw_sp_nexthop *nh; + u64 size = 0; + + mlxsw_sp_nexthop_for_each(nh, mlxsw_sp->router) + if (mlxsw_sp_nexthop_offload(nh) && + !mlxsw_sp_nexthop_group_has_ipip(nh)) + size++; + return size; +} + +static u64 +mlxsw_sp_dpipe_table_adj_size_get(void *priv) +{ + struct mlxsw_sp *mlxsw_sp = priv; + u64 size; + + rtnl_lock(); + size = mlxsw_sp_dpipe_table_adj_size(mlxsw_sp); + rtnl_unlock(); + + return size; +} + +static struct devlink_dpipe_table_ops mlxsw_sp_dpipe_table_adj_ops = { + .matches_dump = mlxsw_sp_dpipe_table_adj_matches_dump, + .actions_dump = mlxsw_sp_dpipe_table_adj_actions_dump, + .size_get = mlxsw_sp_dpipe_table_adj_size_get, +}; + +static int mlxsw_sp_dpipe_adj_table_init(struct mlxsw_sp *mlxsw_sp) +{ + struct devlink *devlink = priv_to_devlink(mlxsw_sp->core); + + return devlink_dpipe_table_register(devlink, + MLXSW_SP_DPIPE_TABLE_NAME_ADJ, + &mlxsw_sp_dpipe_table_adj_ops, + mlxsw_sp, false); +} + +static void mlxsw_sp_dpipe_adj_table_fini(struct mlxsw_sp *mlxsw_sp) +{ + struct devlink *devlink = priv_to_devlink(mlxsw_sp->core); + + devlink_dpipe_table_unregister(devlink, + MLXSW_SP_DPIPE_TABLE_NAME_ADJ); +} + int mlxsw_sp_dpipe_init(struct mlxsw_sp *mlxsw_sp) { struct devlink *devlink = priv_to_devlink(mlxsw_sp->core); @@ -861,8 +952,14 @@ int mlxsw_sp_dpipe_init(struct mlxsw_sp *mlxsw_sp) err = mlxsw_sp_dpipe_host6_table_init(mlxsw_sp); if (err) goto err_host6_table_init; - return 0; + err = mlxsw_sp_dpipe_adj_table_init(mlxsw_sp); + if (err) + goto err_adj_table_init; + + return 0; +err_adj_table_init: + mlxsw_sp_dpipe_host6_table_fini(mlxsw_sp); err_host6_table_init: mlxsw_sp_dpipe_host4_table_fini(mlxsw_sp); err_host4_table_init: @@ -876,6 +973,7 @@ void mlxsw_sp_dpipe_fini(struct mlxsw_sp *mlxsw_sp) { struct devlink *devlink = priv_to_devlink(mlxsw_sp->core); + mlxsw_sp_dpipe_adj_table_fini(mlxsw_sp); mlxsw_sp_dpipe_host6_table_fini(mlxsw_sp); mlxsw_sp_dpipe_host4_table_fini(mlxsw_sp); mlxsw_sp_dpipe_erif_table_fini(mlxsw_sp); diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_dpipe.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum_dpipe.h index 283fde4e6783..815d543cf114 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_dpipe.h +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_dpipe.h @@ -56,5 +56,6 @@ static inline void mlxsw_sp_dpipe_fini(struct mlxsw_sp *mlxsw_sp) #define MLXSW_SP_DPIPE_TABLE_NAME_ERIF "mlxsw_erif" #define MLXSW_SP_DPIPE_TABLE_NAME_HOST4 "mlxsw_host4" #define MLXSW_SP_DPIPE_TABLE_NAME_HOST6 "mlxsw_host6" +#define MLXSW_SP_DPIPE_TABLE_NAME_ADJ "mlxsw_adj" #endif /* _MLXSW_PIPELINE_H_*/ -- cgit v1.2.3 From f4de25fb530c936af7c3d9a158a7dde86adb2848 Mon Sep 17 00:00:00 2001 From: Arkadi Sharshevsky Date: Mon, 25 Sep 2017 10:32:27 +0200 Subject: mlxsw: reg: Add support for counters on RATR In order to add the ability for setting counters on nexthops the RATR register should be extended. Signed-off-by: Arkadi Sharshevsky Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlxsw/reg.h | 44 ++++++++++++++++++++++++------- 1 file changed, 35 insertions(+), 9 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/reg.h b/drivers/net/ethernet/mellanox/mlxsw/reg.h index 17eba19100de..d44e673a4c4e 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/reg.h +++ b/drivers/net/ethernet/mellanox/mlxsw/reg.h @@ -4549,6 +4549,27 @@ MLXSW_ITEM32(reg, ratr, ipip_ipv4_udip, 0x18, 0, 32); */ MLXSW_ITEM32(reg, ratr, ipip_ipv6_ptr, 0x1C, 0, 24); +enum mlxsw_reg_flow_counter_set_type { + /* No count */ + MLXSW_REG_FLOW_COUNTER_SET_TYPE_NO_COUNT = 0x00, + /* Count packets and bytes */ + MLXSW_REG_FLOW_COUNTER_SET_TYPE_PACKETS_BYTES = 0x03, + /* Count only packets */ + MLXSW_REG_FLOW_COUNTER_SET_TYPE_PACKETS = 0x05, +}; + +/* reg_ratr_counter_set_type + * Counter set type for flow counters + * Access: RW + */ +MLXSW_ITEM32(reg, ratr, counter_set_type, 0x28, 24, 8); + +/* reg_ratr_counter_index + * Counter index for flow counters + * Access: RW + */ +MLXSW_ITEM32(reg, ratr, counter_index, 0x28, 0, 24); + static inline void mlxsw_reg_ratr_pack(char *payload, enum mlxsw_reg_ratr_op op, bool valid, @@ -4576,6 +4597,20 @@ static inline void mlxsw_reg_ratr_ipip4_entry_pack(char *payload, u32 ipv4_udip) mlxsw_reg_ratr_ipip_ipv4_udip_set(payload, ipv4_udip); } +static inline void mlxsw_reg_ratr_counter_pack(char *payload, u64 counter_index, + bool counter_enable) +{ + enum mlxsw_reg_flow_counter_set_type set_type; + + if (counter_enable) + set_type = MLXSW_REG_FLOW_COUNTER_SET_TYPE_PACKETS_BYTES; + else + set_type = MLXSW_REG_FLOW_COUNTER_SET_TYPE_NO_COUNT; + + mlxsw_reg_ratr_counter_index_set(payload, counter_index); + mlxsw_reg_ratr_counter_set_type_set(payload, set_type); +} + /* RICNT - Router Interface Counter Register * ----------------------------------------- * The RICNT register retrieves per port performance counters @@ -5297,15 +5332,6 @@ enum mlxsw_reg_rauht_trap_id { */ MLXSW_ITEM32(reg, rauht, trap_id, 0x60, 0, 9); -enum mlxsw_reg_flow_counter_set_type { - /* No count */ - MLXSW_REG_FLOW_COUNTER_SET_TYPE_NO_COUNT = 0x00, - /* Count packets and bytes */ - MLXSW_REG_FLOW_COUNTER_SET_TYPE_PACKETS_BYTES = 0x03, - /* Count only packets */ - MLXSW_REG_FLOW_COUNTER_SET_TYPE_PACKETS = 0x05, -}; - /* reg_rauht_counter_set_type * Counter set type for flow counters * Access: RW -- cgit v1.2.3 From a5390278a5eb573b76d2d28ce576b6b62c2200be Mon Sep 17 00:00:00 2001 From: Arkadi Sharshevsky Date: Mon, 25 Sep 2017 10:32:28 +0200 Subject: mlxsw: spectrum: Add support for setting counters on nexthops Add support for setting counters on nexthops based on dpipe's adjacency table counter status. This patch also adds the ability for getting the counter value, which will be used by the dpipe adjacency table dump implementation in the next patches. Signed-off-by: Arkadi Sharshevsky Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- .../net/ethernet/mellanox/mlxsw/spectrum_router.c | 52 ++++++++++++++++++++-- .../net/ethernet/mellanox/mlxsw/spectrum_router.h | 2 + 2 files changed, 51 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c index c062b4f666e3..a75064a8ba80 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c @@ -2052,6 +2052,8 @@ struct mlxsw_sp_nexthop { struct mlxsw_sp_neigh_entry *neigh_entry; struct mlxsw_sp_ipip_entry *ipip_entry; }; + unsigned int counter_index; + bool counter_valid; }; struct mlxsw_sp_nexthop_group { @@ -2068,6 +2070,41 @@ struct mlxsw_sp_nexthop_group { #define nh_rif nexthops[0].rif }; +static void mlxsw_sp_nexthop_counter_alloc(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_nexthop *nh) +{ + struct devlink *devlink; + + devlink = priv_to_devlink(mlxsw_sp->core); + if (!devlink_dpipe_table_counter_enabled(devlink, + MLXSW_SP_DPIPE_TABLE_NAME_ADJ)) + return; + + if (mlxsw_sp_flow_counter_alloc(mlxsw_sp, &nh->counter_index)) + return; + + nh->counter_valid = true; +} + +static void mlxsw_sp_nexthop_counter_free(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_nexthop *nh) +{ + if (!nh->counter_valid) + return; + mlxsw_sp_flow_counter_free(mlxsw_sp, nh->counter_index); + nh->counter_valid = false; +} + +int mlxsw_sp_nexthop_counter_get(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_nexthop *nh, u64 *p_counter) +{ + if (!nh->counter_valid) + return -EINVAL; + + return mlxsw_sp_flow_counter_get(mlxsw_sp, nh->counter_index, + p_counter, NULL); +} + struct mlxsw_sp_nexthop *mlxsw_sp_nexthop_next(struct mlxsw_sp_router *router, struct mlxsw_sp_nexthop *nh) { @@ -2396,8 +2433,8 @@ static int mlxsw_sp_adj_index_mass_update(struct mlxsw_sp *mlxsw_sp, return 0; } -static int mlxsw_sp_nexthop_mac_update(struct mlxsw_sp *mlxsw_sp, u32 adj_index, - struct mlxsw_sp_nexthop *nh) +static int mlxsw_sp_nexthop_update(struct mlxsw_sp *mlxsw_sp, u32 adj_index, + struct mlxsw_sp_nexthop *nh) { struct mlxsw_sp_neigh_entry *neigh_entry = nh->neigh_entry; char ratr_pl[MLXSW_REG_RATR_LEN]; @@ -2406,6 +2443,11 @@ static int mlxsw_sp_nexthop_mac_update(struct mlxsw_sp *mlxsw_sp, u32 adj_index, true, MLXSW_REG_RATR_TYPE_ETHERNET, adj_index, neigh_entry->rif); mlxsw_reg_ratr_eth_entry_pack(ratr_pl, neigh_entry->ha); + if (nh->counter_valid) + mlxsw_reg_ratr_counter_pack(ratr_pl, nh->counter_index, true); + else + mlxsw_reg_ratr_counter_pack(ratr_pl, 0, false); + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ratr), ratr_pl); } @@ -2440,7 +2482,7 @@ mlxsw_sp_nexthop_group_update(struct mlxsw_sp *mlxsw_sp, if (nh->update || reallocate) { switch (nh->type) { case MLXSW_SP_NEXTHOP_TYPE_ETH: - err = mlxsw_sp_nexthop_mac_update + err = mlxsw_sp_nexthop_update (mlxsw_sp, adj_index, nh); break; case MLXSW_SP_NEXTHOP_TYPE_IPIP: @@ -2857,6 +2899,7 @@ static int mlxsw_sp_nexthop4_init(struct mlxsw_sp *mlxsw_sp, if (err) return err; + mlxsw_sp_nexthop_counter_alloc(mlxsw_sp, nh); list_add_tail(&nh->router_list_node, &mlxsw_sp->router->nexthop_list); if (!dev) @@ -2883,6 +2926,7 @@ static void mlxsw_sp_nexthop4_fini(struct mlxsw_sp *mlxsw_sp, { mlxsw_sp_nexthop4_type_fini(mlxsw_sp, nh); list_del(&nh->router_list_node); + mlxsw_sp_nexthop_counter_free(mlxsw_sp, nh); mlxsw_sp_nexthop_remove(mlxsw_sp, nh); } @@ -4120,6 +4164,7 @@ static int mlxsw_sp_nexthop6_init(struct mlxsw_sp *mlxsw_sp, nh->nh_grp = nh_grp; memcpy(&nh->gw_addr, &rt->rt6i_gateway, sizeof(nh->gw_addr)); + mlxsw_sp_nexthop_counter_alloc(mlxsw_sp, nh); list_add_tail(&nh->router_list_node, &mlxsw_sp->router->nexthop_list); @@ -4135,6 +4180,7 @@ static void mlxsw_sp_nexthop6_fini(struct mlxsw_sp *mlxsw_sp, { mlxsw_sp_nexthop6_type_fini(mlxsw_sp, nh); list_del(&nh->router_list_node); + mlxsw_sp_nexthop_counter_free(mlxsw_sp, nh); } static bool mlxsw_sp_rt6_is_gateway(const struct mlxsw_sp *mlxsw_sp, diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.h index d6951d516cf4..a6e86590939f 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.h +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.h @@ -120,5 +120,7 @@ bool mlxsw_sp_nexthop_group_has_ipip(struct mlxsw_sp_nexthop *nh); #define mlxsw_sp_nexthop_for_each(nh, router) \ for (nh = mlxsw_sp_nexthop_next(router, NULL); nh; \ nh = mlxsw_sp_nexthop_next(router, nh)) +int mlxsw_sp_nexthop_counter_get(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_nexthop *nh, u64 *p_counter); #endif /* _MLXSW_ROUTER_H_*/ -- cgit v1.2.3 From 190d38a52a73ef8ac05c1931dda730e0f9b79095 Mon Sep 17 00:00:00 2001 From: Arkadi Sharshevsky Date: Mon, 25 Sep 2017 10:32:29 +0200 Subject: mlxsw: spectrum_dpipe: Add support for adjacency table dump Add support for adjacency table dump. Signed-off-by: Arkadi Sharshevsky Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- .../net/ethernet/mellanox/mlxsw/spectrum_dpipe.c | 238 +++++++++++++++++++++ 1 file changed, 238 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_dpipe.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_dpipe.c index ca16f8924c0a..e6755a96b269 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_dpipe.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_dpipe.c @@ -895,6 +895,243 @@ static u64 mlxsw_sp_dpipe_table_adj_size(struct mlxsw_sp *mlxsw_sp) return size; } +enum mlxsw_sp_dpipe_table_adj_match { + MLXSW_SP_DPIPE_TABLE_ADJ_MATCH_INDEX, + MLXSW_SP_DPIPE_TABLE_ADJ_MATCH_HASH_INDEX, + MLXSW_SP_DPIPE_TABLE_ADJ_MATCH_COUNT, +}; + +enum mlxsw_sp_dpipe_table_adj_action { + MLXSW_SP_DPIPE_TABLE_ADJ_ACTION_DST_MAC, + MLXSW_SP_DPIPE_TABLE_ADJ_ACTION_ERIF_PORT, + MLXSW_SP_DPIPE_TABLE_ADJ_ACTION_COUNT, +}; + +static void +mlxsw_sp_dpipe_table_adj_match_action_prepare(struct devlink_dpipe_match *matches, + struct devlink_dpipe_action *actions) +{ + struct devlink_dpipe_action *action; + struct devlink_dpipe_match *match; + + match = &matches[MLXSW_SP_DPIPE_TABLE_ADJ_MATCH_INDEX]; + match->type = DEVLINK_DPIPE_MATCH_TYPE_FIELD_EXACT; + match->header = &mlxsw_sp_dpipe_header_metadata; + match->field_id = MLXSW_SP_DPIPE_FIELD_METADATA_ADJ_INDEX; + + match = &matches[MLXSW_SP_DPIPE_TABLE_ADJ_MATCH_HASH_INDEX]; + match->type = DEVLINK_DPIPE_MATCH_TYPE_FIELD_EXACT; + match->header = &mlxsw_sp_dpipe_header_metadata; + match->field_id = MLXSW_SP_DPIPE_FIELD_METADATA_ADJ_HASH_INDEX; + + action = &actions[MLXSW_SP_DPIPE_TABLE_ADJ_ACTION_DST_MAC]; + action->type = DEVLINK_DPIPE_ACTION_TYPE_FIELD_MODIFY; + action->header = &devlink_dpipe_header_ethernet; + action->field_id = DEVLINK_DPIPE_FIELD_ETHERNET_DST_MAC; + + action = &actions[MLXSW_SP_DPIPE_TABLE_ADJ_ACTION_ERIF_PORT]; + action->type = DEVLINK_DPIPE_ACTION_TYPE_FIELD_MODIFY; + action->header = &mlxsw_sp_dpipe_header_metadata; + action->field_id = MLXSW_SP_DPIPE_FIELD_METADATA_ERIF_PORT; +} + +static int +mlxsw_sp_dpipe_table_adj_entry_prepare(struct devlink_dpipe_entry *entry, + struct devlink_dpipe_value *match_values, + struct devlink_dpipe_match *matches, + struct devlink_dpipe_value *action_values, + struct devlink_dpipe_action *actions) +{ struct devlink_dpipe_value *action_value; + struct devlink_dpipe_value *match_value; + struct devlink_dpipe_action *action; + struct devlink_dpipe_match *match; + + entry->match_values = match_values; + entry->match_values_count = MLXSW_SP_DPIPE_TABLE_ADJ_MATCH_COUNT; + + entry->action_values = action_values; + entry->action_values_count = MLXSW_SP_DPIPE_TABLE_ADJ_ACTION_COUNT; + + match = &matches[MLXSW_SP_DPIPE_TABLE_ADJ_MATCH_INDEX]; + match_value = &match_values[MLXSW_SP_DPIPE_TABLE_ADJ_MATCH_INDEX]; + + match_value->match = match; + match_value->value_size = sizeof(u32); + match_value->value = kmalloc(match_value->value_size, GFP_KERNEL); + if (!match_value->value) + return -ENOMEM; + + match = &matches[MLXSW_SP_DPIPE_TABLE_ADJ_MATCH_HASH_INDEX]; + match_value = &match_values[MLXSW_SP_DPIPE_TABLE_ADJ_MATCH_HASH_INDEX]; + + match_value->match = match; + match_value->value_size = sizeof(u32); + match_value->value = kmalloc(match_value->value_size, GFP_KERNEL); + if (!match_value->value) + return -ENOMEM; + + action = &actions[MLXSW_SP_DPIPE_TABLE_ADJ_ACTION_DST_MAC]; + action_value = &action_values[MLXSW_SP_DPIPE_TABLE_ADJ_ACTION_DST_MAC]; + + action_value->action = action; + action_value->value_size = sizeof(u64); + action_value->value = kmalloc(action_value->value_size, GFP_KERNEL); + if (!action_value->value) + return -ENOMEM; + + action = &actions[MLXSW_SP_DPIPE_TABLE_ADJ_ACTION_ERIF_PORT]; + action_value = &action_values[MLXSW_SP_DPIPE_TABLE_ADJ_ACTION_ERIF_PORT]; + + action_value->action = action; + action_value->value_size = sizeof(u32); + action_value->value = kmalloc(action_value->value_size, GFP_KERNEL); + if (!action_value->value) + return -ENOMEM; + + return 0; +} + +static void +__mlxsw_sp_dpipe_table_adj_entry_fill(struct devlink_dpipe_entry *entry, + u32 adj_index, u32 adj_hash_index, + unsigned char *ha, + struct mlxsw_sp_rif *rif) +{ + struct devlink_dpipe_value *value; + u32 *p_rif_value; + u32 *p_index; + + value = &entry->match_values[MLXSW_SP_DPIPE_TABLE_ADJ_MATCH_INDEX]; + p_index = value->value; + *p_index = adj_index; + + value = &entry->match_values[MLXSW_SP_DPIPE_TABLE_ADJ_MATCH_HASH_INDEX]; + p_index = value->value; + *p_index = adj_hash_index; + + value = &entry->action_values[MLXSW_SP_DPIPE_TABLE_ADJ_ACTION_DST_MAC]; + ether_addr_copy(value->value, ha); + + value = &entry->action_values[MLXSW_SP_DPIPE_TABLE_ADJ_ACTION_ERIF_PORT]; + p_rif_value = value->value; + *p_rif_value = mlxsw_sp_rif_index(rif); + value->mapping_value = mlxsw_sp_rif_dev_ifindex(rif); + value->mapping_valid = true; +} + +static void mlxsw_sp_dpipe_table_adj_entry_fill(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_nexthop *nh, + struct devlink_dpipe_entry *entry) +{ + struct mlxsw_sp_rif *rif = mlxsw_sp_nexthop_rif(nh); + unsigned char *ha = mlxsw_sp_nexthop_ha(nh); + u32 adj_hash_index = 0; + u32 adj_index = 0; + int err; + + mlxsw_sp_nexthop_indexes(nh, &adj_index, &adj_hash_index); + __mlxsw_sp_dpipe_table_adj_entry_fill(entry, adj_index, + adj_hash_index, ha, rif); + err = mlxsw_sp_nexthop_counter_get(mlxsw_sp, nh, &entry->counter); + if (!err) + entry->counter_valid = true; +} + +static int +mlxsw_sp_dpipe_table_adj_entries_get(struct mlxsw_sp *mlxsw_sp, + struct devlink_dpipe_entry *entry, + bool counters_enabled, + struct devlink_dpipe_dump_ctx *dump_ctx) +{ + struct mlxsw_sp_nexthop *nh; + int entry_index = 0; + int nh_count_max; + int nh_count = 0; + int nh_skip; + int j; + int err; + + rtnl_lock(); + nh_count_max = mlxsw_sp_dpipe_table_adj_size(mlxsw_sp); +start_again: + err = devlink_dpipe_entry_ctx_prepare(dump_ctx); + if (err) + goto err_ctx_prepare; + j = 0; + nh_skip = nh_count; + mlxsw_sp_nexthop_for_each(nh, mlxsw_sp->router) { + if (!mlxsw_sp_nexthop_offload(nh) || + mlxsw_sp_nexthop_group_has_ipip(nh)) + continue; + + if (nh_count < nh_skip) + goto skip; + + mlxsw_sp_dpipe_table_adj_entry_fill(mlxsw_sp, nh, entry); + entry->index = entry_index; + err = devlink_dpipe_entry_ctx_append(dump_ctx, entry); + if (err) { + if (err == -EMSGSIZE) { + if (!j) + goto err_entry_append; + break; + } + goto err_entry_append; + } + entry_index++; + j++; +skip: + nh_count++; + } + + devlink_dpipe_entry_ctx_close(dump_ctx); + if (nh_count != nh_count_max) + goto start_again; + rtnl_unlock(); + + return 0; + +err_ctx_prepare: +err_entry_append: + rtnl_unlock(); + return err; +} + +static int +mlxsw_sp_dpipe_table_adj_entries_dump(void *priv, bool counters_enabled, + struct devlink_dpipe_dump_ctx *dump_ctx) +{ + struct devlink_dpipe_value action_values[MLXSW_SP_DPIPE_TABLE_ADJ_ACTION_COUNT]; + struct devlink_dpipe_value match_values[MLXSW_SP_DPIPE_TABLE_ADJ_MATCH_COUNT]; + struct devlink_dpipe_action actions[MLXSW_SP_DPIPE_TABLE_ADJ_ACTION_COUNT]; + struct devlink_dpipe_match matches[MLXSW_SP_DPIPE_TABLE_ADJ_MATCH_COUNT]; + struct devlink_dpipe_entry entry = {0}; + struct mlxsw_sp *mlxsw_sp = priv; + int err; + + memset(matches, 0, MLXSW_SP_DPIPE_TABLE_ADJ_MATCH_COUNT * + sizeof(matches[0])); + memset(match_values, 0, MLXSW_SP_DPIPE_TABLE_ADJ_MATCH_COUNT * + sizeof(match_values[0])); + memset(actions, 0, MLXSW_SP_DPIPE_TABLE_ADJ_ACTION_COUNT * + sizeof(actions[0])); + memset(action_values, 0, MLXSW_SP_DPIPE_TABLE_ADJ_ACTION_COUNT * + sizeof(action_values[0])); + + mlxsw_sp_dpipe_table_adj_match_action_prepare(matches, actions); + err = mlxsw_sp_dpipe_table_adj_entry_prepare(&entry, + match_values, matches, + action_values, actions); + if (err) + goto out; + + err = mlxsw_sp_dpipe_table_adj_entries_get(mlxsw_sp, &entry, + counters_enabled, dump_ctx); +out: + devlink_dpipe_entry_clear(&entry); + return err; +} + static u64 mlxsw_sp_dpipe_table_adj_size_get(void *priv) { @@ -911,6 +1148,7 @@ mlxsw_sp_dpipe_table_adj_size_get(void *priv) static struct devlink_dpipe_table_ops mlxsw_sp_dpipe_table_adj_ops = { .matches_dump = mlxsw_sp_dpipe_table_adj_matches_dump, .actions_dump = mlxsw_sp_dpipe_table_adj_actions_dump, + .entries_dump = mlxsw_sp_dpipe_table_adj_entries_dump, .size_get = mlxsw_sp_dpipe_table_adj_size_get, }; -- cgit v1.2.3 From 427e652aa34d90960f729c0b902c3c4a8a821b2e Mon Sep 17 00:00:00 2001 From: Arkadi Sharshevsky Date: Mon, 25 Sep 2017 10:32:30 +0200 Subject: mlxsw: spectrum_dpipe: Add support for controlling nexthop counters Add support for controlling nexthop counters via dpipe. Signed-off-by: Arkadi Sharshevsky Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- .../net/ethernet/mellanox/mlxsw/spectrum_dpipe.c | 24 ++++++++++++++++++++++ .../net/ethernet/mellanox/mlxsw/spectrum_router.c | 12 +++++------ .../net/ethernet/mellanox/mlxsw/spectrum_router.h | 6 ++++++ 3 files changed, 36 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_dpipe.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_dpipe.c index e6755a96b269..a056f23d3a0e 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_dpipe.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_dpipe.c @@ -1132,6 +1132,29 @@ out: return err; } +static int mlxsw_sp_dpipe_table_adj_counters_update(void *priv, bool enable) +{ + struct mlxsw_sp *mlxsw_sp = priv; + struct mlxsw_sp_nexthop *nh; + u32 adj_hash_index = 0; + u32 adj_index = 0; + + mlxsw_sp_nexthop_for_each(nh, mlxsw_sp->router) { + if (!mlxsw_sp_nexthop_offload(nh) || + mlxsw_sp_nexthop_group_has_ipip(nh)) + continue; + + mlxsw_sp_nexthop_indexes(nh, &adj_index, &adj_hash_index); + if (enable) + mlxsw_sp_nexthop_counter_alloc(mlxsw_sp, nh); + else + mlxsw_sp_nexthop_counter_free(mlxsw_sp, nh); + mlxsw_sp_nexthop_update(mlxsw_sp, + adj_index + adj_hash_index, nh); + } + return 0; +} + static u64 mlxsw_sp_dpipe_table_adj_size_get(void *priv) { @@ -1149,6 +1172,7 @@ static struct devlink_dpipe_table_ops mlxsw_sp_dpipe_table_adj_ops = { .matches_dump = mlxsw_sp_dpipe_table_adj_matches_dump, .actions_dump = mlxsw_sp_dpipe_table_adj_actions_dump, .entries_dump = mlxsw_sp_dpipe_table_adj_entries_dump, + .counters_set_update = mlxsw_sp_dpipe_table_adj_counters_update, .size_get = mlxsw_sp_dpipe_table_adj_size_get, }; diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c index a75064a8ba80..321f7356073c 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c @@ -2070,8 +2070,8 @@ struct mlxsw_sp_nexthop_group { #define nh_rif nexthops[0].rif }; -static void mlxsw_sp_nexthop_counter_alloc(struct mlxsw_sp *mlxsw_sp, - struct mlxsw_sp_nexthop *nh) +void mlxsw_sp_nexthop_counter_alloc(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_nexthop *nh) { struct devlink *devlink; @@ -2086,8 +2086,8 @@ static void mlxsw_sp_nexthop_counter_alloc(struct mlxsw_sp *mlxsw_sp, nh->counter_valid = true; } -static void mlxsw_sp_nexthop_counter_free(struct mlxsw_sp *mlxsw_sp, - struct mlxsw_sp_nexthop *nh) +void mlxsw_sp_nexthop_counter_free(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_nexthop *nh) { if (!nh->counter_valid) return; @@ -2433,8 +2433,8 @@ static int mlxsw_sp_adj_index_mass_update(struct mlxsw_sp *mlxsw_sp, return 0; } -static int mlxsw_sp_nexthop_update(struct mlxsw_sp *mlxsw_sp, u32 adj_index, - struct mlxsw_sp_nexthop *nh) +int mlxsw_sp_nexthop_update(struct mlxsw_sp *mlxsw_sp, u32 adj_index, + struct mlxsw_sp_nexthop *nh) { struct mlxsw_sp_neigh_entry *neigh_entry = nh->neigh_entry; char ratr_pl[MLXSW_REG_RATR_LEN]; diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.h index a6e86590939f..3d449180b035 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.h +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.h @@ -122,5 +122,11 @@ bool mlxsw_sp_nexthop_group_has_ipip(struct mlxsw_sp_nexthop *nh); nh = mlxsw_sp_nexthop_next(router, nh)) int mlxsw_sp_nexthop_counter_get(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_nexthop *nh, u64 *p_counter); +int mlxsw_sp_nexthop_update(struct mlxsw_sp *mlxsw_sp, u32 adj_index, + struct mlxsw_sp_nexthop *nh); +void mlxsw_sp_nexthop_counter_alloc(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_nexthop *nh); +void mlxsw_sp_nexthop_counter_free(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_nexthop *nh); #endif /* _MLXSW_ROUTER_H_*/ -- cgit v1.2.3 From f4344e0a48121d07cf8f69415278e84c39dbe9bf Mon Sep 17 00:00:00 2001 From: Vivien Didelot Date: Tue, 26 Sep 2017 17:15:31 -0400 Subject: net: dsa: return -ENODEV is there is no slave PHY Instead of returning -EOPNOTSUPP when a slave device has no PHY, directly return -ENODEV as ethtool and phylib do. Signed-off-by: Vivien Didelot Reviewed-by: Andrew Lunn Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller --- net/dsa/slave.c | 30 +++++++++++++++--------------- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/net/dsa/slave.c b/net/dsa/slave.c index bd51ef56ec5b..79c5a0cd9923 100644 --- a/net/dsa/slave.c +++ b/net/dsa/slave.c @@ -266,10 +266,10 @@ static int dsa_slave_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) { struct dsa_slave_priv *p = netdev_priv(dev); - if (p->phy != NULL) - return phy_mii_ioctl(p->phy, ifr, cmd); + if (!p->phy) + return -ENODEV; - return -EOPNOTSUPP; + return phy_mii_ioctl(p->phy, ifr, cmd); } static int dsa_slave_port_attr_set(struct net_device *dev, @@ -429,7 +429,7 @@ dsa_slave_get_link_ksettings(struct net_device *dev, struct dsa_slave_priv *p = netdev_priv(dev); if (!p->phy) - return -EOPNOTSUPP; + return -ENODEV; phy_ethtool_ksettings_get(p->phy, cmd); @@ -442,10 +442,10 @@ dsa_slave_set_link_ksettings(struct net_device *dev, { struct dsa_slave_priv *p = netdev_priv(dev); - if (p->phy != NULL) - return phy_ethtool_ksettings_set(p->phy, cmd); + if (!p->phy) + return -ENODEV; - return -EOPNOTSUPP; + return phy_ethtool_ksettings_set(p->phy, cmd); } static void dsa_slave_get_drvinfo(struct net_device *dev, @@ -481,22 +481,22 @@ static int dsa_slave_nway_reset(struct net_device *dev) { struct dsa_slave_priv *p = netdev_priv(dev); - if (p->phy != NULL) - return genphy_restart_aneg(p->phy); + if (!p->phy) + return -ENODEV; - return -EOPNOTSUPP; + return genphy_restart_aneg(p->phy); } static u32 dsa_slave_get_link(struct net_device *dev) { struct dsa_slave_priv *p = netdev_priv(dev); - if (p->phy != NULL) { - genphy_update_link(p->phy); - return p->phy->link; - } + if (!p->phy) + return -ENODEV; - return -EOPNOTSUPP; + genphy_update_link(p->phy); + + return p->phy->link; } static int dsa_slave_get_eeprom_len(struct net_device *dev) -- cgit v1.2.3 From 0115dcd1787d2d1c50719fab98d6bcb0c17931a1 Mon Sep 17 00:00:00 2001 From: Vivien Didelot Date: Tue, 26 Sep 2017 17:15:32 -0400 Subject: net: dsa: use slave device phydev There is no need to store a phy_device in dsa_slave_priv since net_device already provides one. Simply s/p->phy/dev->phydev/. Signed-off-by: Vivien Didelot Reviewed-by: Andrew Lunn Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller --- net/dsa/dsa_priv.h | 1 - net/dsa/slave.c | 116 ++++++++++++++++++++++++----------------------------- 2 files changed, 53 insertions(+), 64 deletions(-) diff --git a/net/dsa/dsa_priv.h b/net/dsa/dsa_priv.h index 0298a0f6a349..eccc62776283 100644 --- a/net/dsa/dsa_priv.h +++ b/net/dsa/dsa_priv.h @@ -79,7 +79,6 @@ struct dsa_slave_priv { * The phylib phy_device pointer for the PHY connected * to this port. */ - struct phy_device *phy; phy_interface_t phy_interface; int old_link; int old_pause; diff --git a/net/dsa/slave.c b/net/dsa/slave.c index 79c5a0cd9923..4ea1c6eb0da8 100644 --- a/net/dsa/slave.c +++ b/net/dsa/slave.c @@ -96,12 +96,12 @@ static int dsa_slave_open(struct net_device *dev) goto clear_allmulti; } - err = dsa_port_enable(dp, p->phy); + err = dsa_port_enable(dp, dev->phydev); if (err) goto clear_promisc; - if (p->phy) - phy_start(p->phy); + if (dev->phydev) + phy_start(dev->phydev); return 0; @@ -124,10 +124,10 @@ static int dsa_slave_close(struct net_device *dev) struct net_device *master = dsa_master_netdev(p); struct dsa_port *dp = p->dp; - if (p->phy) - phy_stop(p->phy); + if (dev->phydev) + phy_stop(dev->phydev); - dsa_port_disable(dp, p->phy); + dsa_port_disable(dp, dev->phydev); dev_mc_unsync(master, dev); dev_uc_unsync(master, dev); @@ -264,12 +264,10 @@ dsa_slave_fdb_dump(struct sk_buff *skb, struct netlink_callback *cb, static int dsa_slave_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) { - struct dsa_slave_priv *p = netdev_priv(dev); - - if (!p->phy) + if (!dev->phydev) return -ENODEV; - return phy_mii_ioctl(p->phy, ifr, cmd); + return phy_mii_ioctl(dev->phydev, ifr, cmd); } static int dsa_slave_port_attr_set(struct net_device *dev, @@ -426,12 +424,10 @@ static int dsa_slave_get_link_ksettings(struct net_device *dev, struct ethtool_link_ksettings *cmd) { - struct dsa_slave_priv *p = netdev_priv(dev); - - if (!p->phy) + if (!dev->phydev) return -ENODEV; - phy_ethtool_ksettings_get(p->phy, cmd); + phy_ethtool_ksettings_get(dev->phydev, cmd); return 0; } @@ -440,12 +436,10 @@ static int dsa_slave_set_link_ksettings(struct net_device *dev, const struct ethtool_link_ksettings *cmd) { - struct dsa_slave_priv *p = netdev_priv(dev); - - if (!p->phy) + if (!dev->phydev) return -ENODEV; - return phy_ethtool_ksettings_set(p->phy, cmd); + return phy_ethtool_ksettings_set(dev->phydev, cmd); } static void dsa_slave_get_drvinfo(struct net_device *dev, @@ -479,24 +473,20 @@ dsa_slave_get_regs(struct net_device *dev, struct ethtool_regs *regs, void *_p) static int dsa_slave_nway_reset(struct net_device *dev) { - struct dsa_slave_priv *p = netdev_priv(dev); - - if (!p->phy) + if (!dev->phydev) return -ENODEV; - return genphy_restart_aneg(p->phy); + return genphy_restart_aneg(dev->phydev); } static u32 dsa_slave_get_link(struct net_device *dev) { - struct dsa_slave_priv *p = netdev_priv(dev); - - if (!p->phy) + if (!dev->phydev) return -ENODEV; - genphy_update_link(p->phy); + genphy_update_link(dev->phydev); - return p->phy->link; + return dev->phydev->link; } static int dsa_slave_get_eeprom_len(struct net_device *dev) @@ -631,7 +621,7 @@ static int dsa_slave_set_eee(struct net_device *dev, struct ethtool_eee *e) int ret; /* Port's PHY and MAC both need to be EEE capable */ - if (!p->phy) + if (!dev->phydev) return -ENODEV; if (!ds->ops->set_mac_eee) @@ -642,12 +632,12 @@ static int dsa_slave_set_eee(struct net_device *dev, struct ethtool_eee *e) return ret; if (e->eee_enabled) { - ret = phy_init_eee(p->phy, 0); + ret = phy_init_eee(dev->phydev, 0); if (ret) return ret; } - return phy_ethtool_set_eee(p->phy, e); + return phy_ethtool_set_eee(dev->phydev, e); } static int dsa_slave_get_eee(struct net_device *dev, struct ethtool_eee *e) @@ -657,7 +647,7 @@ static int dsa_slave_get_eee(struct net_device *dev, struct ethtool_eee *e) int ret; /* Port's PHY and MAC both need to be EEE capable */ - if (!p->phy) + if (!dev->phydev) return -ENODEV; if (!ds->ops->get_mac_eee) @@ -667,7 +657,7 @@ static int dsa_slave_get_eee(struct net_device *dev, struct ethtool_eee *e) if (ret) return ret; - return phy_ethtool_get_eee(p->phy, e); + return phy_ethtool_get_eee(dev->phydev, e); } #ifdef CONFIG_NET_POLL_CONTROLLER @@ -976,26 +966,26 @@ static void dsa_slave_adjust_link(struct net_device *dev) struct dsa_switch *ds = p->dp->ds; unsigned int status_changed = 0; - if (p->old_link != p->phy->link) { + if (p->old_link != dev->phydev->link) { status_changed = 1; - p->old_link = p->phy->link; + p->old_link = dev->phydev->link; } - if (p->old_duplex != p->phy->duplex) { + if (p->old_duplex != dev->phydev->duplex) { status_changed = 1; - p->old_duplex = p->phy->duplex; + p->old_duplex = dev->phydev->duplex; } - if (p->old_pause != p->phy->pause) { + if (p->old_pause != dev->phydev->pause) { status_changed = 1; - p->old_pause = p->phy->pause; + p->old_pause = dev->phydev->pause; } if (ds->ops->adjust_link && status_changed) - ds->ops->adjust_link(ds, p->dp->index, p->phy); + ds->ops->adjust_link(ds, p->dp->index, dev->phydev); if (status_changed) - phy_print_status(p->phy); + phy_print_status(dev->phydev); } static int dsa_slave_fixed_link_update(struct net_device *dev, @@ -1020,17 +1010,18 @@ static int dsa_slave_phy_connect(struct net_device *slave_dev, int addr) struct dsa_slave_priv *p = netdev_priv(slave_dev); struct dsa_switch *ds = p->dp->ds; - p->phy = mdiobus_get_phy(ds->slave_mii_bus, addr); - if (!p->phy) { + slave_dev->phydev = mdiobus_get_phy(ds->slave_mii_bus, addr); + if (!slave_dev->phydev) { netdev_err(slave_dev, "no phy at %d\n", addr); return -ENODEV; } /* Use already configured phy mode */ if (p->phy_interface == PHY_INTERFACE_MODE_NA) - p->phy_interface = p->phy->interface; - return phy_connect_direct(slave_dev, p->phy, dsa_slave_adjust_link, - p->phy_interface); + p->phy_interface = slave_dev->phydev->interface; + + return phy_connect_direct(slave_dev, slave_dev->phydev, + dsa_slave_adjust_link, p->phy_interface); } static int dsa_slave_phy_setup(struct net_device *slave_dev) @@ -1082,22 +1073,23 @@ static int dsa_slave_phy_setup(struct net_device *slave_dev) return ret; } } else { - p->phy = of_phy_connect(slave_dev, phy_dn, - dsa_slave_adjust_link, - phy_flags, - p->phy_interface); + slave_dev->phydev = of_phy_connect(slave_dev, phy_dn, + dsa_slave_adjust_link, + phy_flags, + p->phy_interface); } of_node_put(phy_dn); } - if (p->phy && phy_is_fixed) - fixed_phy_set_link_update(p->phy, dsa_slave_fixed_link_update); + if (slave_dev->phydev && phy_is_fixed) + fixed_phy_set_link_update(slave_dev->phydev, + dsa_slave_fixed_link_update); /* We could not connect to a designated PHY, so use the switch internal * MDIO bus instead */ - if (!p->phy) { + if (!slave_dev->phydev) { ret = dsa_slave_phy_connect(slave_dev, p->dp->index); if (ret) { netdev_err(slave_dev, "failed to connect to port %d: %d\n", @@ -1108,7 +1100,7 @@ static int dsa_slave_phy_setup(struct net_device *slave_dev) } } - phy_attached_info(p->phy); + phy_attached_info(slave_dev->phydev); return 0; } @@ -1128,12 +1120,12 @@ int dsa_slave_suspend(struct net_device *slave_dev) netif_device_detach(slave_dev); - if (p->phy) { - phy_stop(p->phy); + if (slave_dev->phydev) { + phy_stop(slave_dev->phydev); p->old_pause = -1; p->old_link = -1; p->old_duplex = -1; - phy_suspend(p->phy); + phy_suspend(slave_dev->phydev); } return 0; @@ -1141,13 +1133,11 @@ int dsa_slave_suspend(struct net_device *slave_dev) int dsa_slave_resume(struct net_device *slave_dev) { - struct dsa_slave_priv *p = netdev_priv(slave_dev); - netif_device_attach(slave_dev); - if (p->phy) { - phy_resume(p->phy); - phy_start(p->phy); + if (slave_dev->phydev) { + phy_resume(slave_dev->phydev); + phy_start(slave_dev->phydev); } return 0; @@ -1240,8 +1230,8 @@ void dsa_slave_destroy(struct net_device *slave_dev) port_dn = p->dp->dn; netif_carrier_off(slave_dev); - if (p->phy) { - phy_disconnect(p->phy); + if (slave_dev->phydev) { + phy_disconnect(slave_dev->phydev); if (of_phy_is_fixed_link(port_dn)) of_phy_deregister_fixed_link(port_dn); -- cgit v1.2.3 From 771df31ace8a0264bcc3576d3f02660e3366cd6d Mon Sep 17 00:00:00 2001 From: Vivien Didelot Date: Tue, 26 Sep 2017 17:15:33 -0400 Subject: net: dsa: use phy_ethtool_get_link_ksettings Use phy_ethtool_get_link_ksettings now that dsa_slave_get_link_ksettings does exactly the same. Signed-off-by: Vivien Didelot Reviewed-by: Andrew Lunn Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller --- net/dsa/slave.c | 13 +------------ 1 file changed, 1 insertion(+), 12 deletions(-) diff --git a/net/dsa/slave.c b/net/dsa/slave.c index 4ea1c6eb0da8..bb0f64f47ae7 100644 --- a/net/dsa/slave.c +++ b/net/dsa/slave.c @@ -420,17 +420,6 @@ static netdev_tx_t dsa_slave_xmit(struct sk_buff *skb, struct net_device *dev) } /* ethtool operations *******************************************************/ -static int -dsa_slave_get_link_ksettings(struct net_device *dev, - struct ethtool_link_ksettings *cmd) -{ - if (!dev->phydev) - return -ENODEV; - - phy_ethtool_ksettings_get(dev->phydev, cmd); - - return 0; -} static int dsa_slave_set_link_ksettings(struct net_device *dev, @@ -921,8 +910,8 @@ static const struct ethtool_ops dsa_slave_ethtool_ops = { .get_wol = dsa_slave_get_wol, .set_eee = dsa_slave_set_eee, .get_eee = dsa_slave_get_eee, - .get_link_ksettings = dsa_slave_get_link_ksettings, .set_link_ksettings = dsa_slave_set_link_ksettings, + .get_link_ksettings = phy_ethtool_get_link_ksettings, .get_rxnfc = dsa_slave_get_rxnfc, .set_rxnfc = dsa_slave_set_rxnfc, }; -- cgit v1.2.3 From aa62a8ca85cd05b4af4d7be70ecb735f65b0449a Mon Sep 17 00:00:00 2001 From: Vivien Didelot Date: Tue, 26 Sep 2017 17:15:34 -0400 Subject: net: dsa: use phy_ethtool_set_link_ksettings Use phy_ethtool_set_link_ksettings now that dsa_slave_set_link_ksettings does exactly the same. Signed-off-by: Vivien Didelot Reviewed-by: Andrew Lunn Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller --- net/dsa/slave.c | 12 +----------- 1 file changed, 1 insertion(+), 11 deletions(-) diff --git a/net/dsa/slave.c b/net/dsa/slave.c index bb0f64f47ae7..d2b632cae468 100644 --- a/net/dsa/slave.c +++ b/net/dsa/slave.c @@ -421,16 +421,6 @@ static netdev_tx_t dsa_slave_xmit(struct sk_buff *skb, struct net_device *dev) /* ethtool operations *******************************************************/ -static int -dsa_slave_set_link_ksettings(struct net_device *dev, - const struct ethtool_link_ksettings *cmd) -{ - if (!dev->phydev) - return -ENODEV; - - return phy_ethtool_ksettings_set(dev->phydev, cmd); -} - static void dsa_slave_get_drvinfo(struct net_device *dev, struct ethtool_drvinfo *drvinfo) { @@ -910,8 +900,8 @@ static const struct ethtool_ops dsa_slave_ethtool_ops = { .get_wol = dsa_slave_get_wol, .set_eee = dsa_slave_set_eee, .get_eee = dsa_slave_get_eee, - .set_link_ksettings = dsa_slave_set_link_ksettings, .get_link_ksettings = phy_ethtool_get_link_ksettings, + .set_link_ksettings = phy_ethtool_set_link_ksettings, .get_rxnfc = dsa_slave_get_rxnfc, .set_rxnfc = dsa_slave_set_rxnfc, }; -- cgit v1.2.3 From 69b2c1629649b1e765516c7b452797b9697de9ac Mon Sep 17 00:00:00 2001 From: Vivien Didelot Date: Tue, 26 Sep 2017 17:15:35 -0400 Subject: net: dsa: use phy_ethtool_nway_reset Use phy_ethtool_nway_reset now that dsa_slave_nway_reset does exactly the same. Signed-off-by: Vivien Didelot Reviewed-by: Andrew Lunn Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller --- net/dsa/slave.c | 10 +--------- 1 file changed, 1 insertion(+), 9 deletions(-) diff --git a/net/dsa/slave.c b/net/dsa/slave.c index d2b632cae468..bf8800de13c1 100644 --- a/net/dsa/slave.c +++ b/net/dsa/slave.c @@ -450,14 +450,6 @@ dsa_slave_get_regs(struct net_device *dev, struct ethtool_regs *regs, void *_p) ds->ops->get_regs(ds, p->dp->index, regs, _p); } -static int dsa_slave_nway_reset(struct net_device *dev) -{ - if (!dev->phydev) - return -ENODEV; - - return genphy_restart_aneg(dev->phydev); -} - static u32 dsa_slave_get_link(struct net_device *dev) { if (!dev->phydev) @@ -888,7 +880,7 @@ static const struct ethtool_ops dsa_slave_ethtool_ops = { .get_drvinfo = dsa_slave_get_drvinfo, .get_regs_len = dsa_slave_get_regs_len, .get_regs = dsa_slave_get_regs, - .nway_reset = dsa_slave_nway_reset, + .nway_reset = phy_ethtool_nway_reset, .get_link = dsa_slave_get_link, .get_eeprom_len = dsa_slave_get_eeprom_len, .get_eeprom = dsa_slave_get_eeprom, -- cgit v1.2.3 From 2a52a8c6e594cdc562f503492ba89ac7bc0c4074 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Mon, 25 Sep 2017 10:58:20 +0200 Subject: mlxsw: spectrum_acl: Propagate errors from mlxsw_afa_block_jump/continue Propagate error instead of doing WARN_ON right away. Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- .../net/ethernet/mellanox/mlxsw/core_acl_flex_actions.c | 14 ++++++++------ .../net/ethernet/mellanox/mlxsw/core_acl_flex_actions.h | 4 ++-- drivers/net/ethernet/mellanox/mlxsw/spectrum.h | 6 +++--- drivers/net/ethernet/mellanox/mlxsw/spectrum_acl.c | 10 +++++----- drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.c | 6 +++++- drivers/net/ethernet/mellanox/mlxsw/spectrum_flower.c | 4 +++- 6 files changed, 26 insertions(+), 18 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_actions.c b/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_actions.c index ab3ffe7a8eda..bc55d0e76705 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_actions.c +++ b/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_actions.c @@ -399,23 +399,25 @@ u32 mlxsw_afa_block_first_set_kvdl_index(struct mlxsw_afa_block *block) } EXPORT_SYMBOL(mlxsw_afa_block_first_set_kvdl_index); -void mlxsw_afa_block_continue(struct mlxsw_afa_block *block) +int mlxsw_afa_block_continue(struct mlxsw_afa_block *block) { - if (WARN_ON(block->finished)) - return; + if (block->finished) + return -EINVAL; mlxsw_afa_set_goto_set(block->cur_set, MLXSW_AFA_SET_GOTO_BINDING_CMD_NONE, 0); block->finished = true; + return 0; } EXPORT_SYMBOL(mlxsw_afa_block_continue); -void mlxsw_afa_block_jump(struct mlxsw_afa_block *block, u16 group_id) +int mlxsw_afa_block_jump(struct mlxsw_afa_block *block, u16 group_id) { - if (WARN_ON(block->finished)) - return; + if (block->finished) + return -EINVAL; mlxsw_afa_set_goto_set(block->cur_set, MLXSW_AFA_SET_GOTO_BINDING_CMD_JUMP, group_id); block->finished = true; + return 0; } EXPORT_SYMBOL(mlxsw_afa_block_jump); diff --git a/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_actions.h b/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_actions.h index 501819c790d6..06b0be432b8f 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_actions.h +++ b/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_actions.h @@ -57,8 +57,8 @@ void mlxsw_afa_block_destroy(struct mlxsw_afa_block *block); int mlxsw_afa_block_commit(struct mlxsw_afa_block *block); char *mlxsw_afa_block_first_set(struct mlxsw_afa_block *block); u32 mlxsw_afa_block_first_set_kvdl_index(struct mlxsw_afa_block *block); -void mlxsw_afa_block_continue(struct mlxsw_afa_block *block); -void mlxsw_afa_block_jump(struct mlxsw_afa_block *block, u16 group_id); +int mlxsw_afa_block_continue(struct mlxsw_afa_block *block); +int mlxsw_afa_block_jump(struct mlxsw_afa_block *block, u16 group_id); int mlxsw_afa_block_append_drop(struct mlxsw_afa_block *block); int mlxsw_afa_block_append_trap(struct mlxsw_afa_block *block, u16 trap_id); int mlxsw_afa_block_append_fwd(struct mlxsw_afa_block *block, diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h index e907ec446a73..9355d914a4c8 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h @@ -468,9 +468,9 @@ void mlxsw_sp_acl_rulei_keymask_buf(struct mlxsw_sp_acl_rule_info *rulei, enum mlxsw_afk_element element, const char *key_value, const char *mask_value, unsigned int len); -void mlxsw_sp_acl_rulei_act_continue(struct mlxsw_sp_acl_rule_info *rulei); -void mlxsw_sp_acl_rulei_act_jump(struct mlxsw_sp_acl_rule_info *rulei, - u16 group_id); +int mlxsw_sp_acl_rulei_act_continue(struct mlxsw_sp_acl_rule_info *rulei); +int mlxsw_sp_acl_rulei_act_jump(struct mlxsw_sp_acl_rule_info *rulei, + u16 group_id); int mlxsw_sp_acl_rulei_act_drop(struct mlxsw_sp_acl_rule_info *rulei); int mlxsw_sp_acl_rulei_act_trap(struct mlxsw_sp_acl_rule_info *rulei); int mlxsw_sp_acl_rulei_act_fwd(struct mlxsw_sp *mlxsw_sp, diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl.c index eede75fbd585..93dcd315f7d6 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl.c @@ -378,15 +378,15 @@ void mlxsw_sp_acl_rulei_keymask_buf(struct mlxsw_sp_acl_rule_info *rulei, key_value, mask_value, len); } -void mlxsw_sp_acl_rulei_act_continue(struct mlxsw_sp_acl_rule_info *rulei) +int mlxsw_sp_acl_rulei_act_continue(struct mlxsw_sp_acl_rule_info *rulei) { - mlxsw_afa_block_continue(rulei->act_block); + return mlxsw_afa_block_continue(rulei->act_block); } -void mlxsw_sp_acl_rulei_act_jump(struct mlxsw_sp_acl_rule_info *rulei, - u16 group_id) +int mlxsw_sp_acl_rulei_act_jump(struct mlxsw_sp_acl_rule_info *rulei, + u16 group_id) { - mlxsw_afa_block_jump(rulei->act_block, group_id); + return mlxsw_afa_block_jump(rulei->act_block, group_id); } int mlxsw_sp_acl_rulei_act_drop(struct mlxsw_sp_acl_rule_info *rulei) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.c index 50b40de1fb91..7e8284b46968 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.c @@ -608,7 +608,10 @@ mlxsw_sp_acl_tcam_region_catchall_add(struct mlxsw_sp *mlxsw_sp, goto err_rulei_create; } - mlxsw_sp_acl_rulei_act_continue(rulei); + err = mlxsw_sp_acl_rulei_act_continue(rulei); + if (WARN_ON(err)) + goto err_rulei_act_continue; + err = mlxsw_sp_acl_rulei_commit(rulei); if (err) goto err_rulei_commit; @@ -623,6 +626,7 @@ mlxsw_sp_acl_tcam_region_catchall_add(struct mlxsw_sp *mlxsw_sp, err_rule_insert: err_rulei_commit: +err_rulei_act_continue: mlxsw_sp_acl_rulei_destroy(rulei); err_rulei_create: parman_item_remove(region->parman, parman_prio, parman_item); diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_flower.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_flower.c index 8aace9a06a5d..f1cedccb58cc 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_flower.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_flower.c @@ -84,7 +84,9 @@ static int mlxsw_sp_flower_parse_actions(struct mlxsw_sp *mlxsw_sp, return PTR_ERR(ruleset); group_id = mlxsw_sp_acl_ruleset_group_id(ruleset); - mlxsw_sp_acl_rulei_act_jump(rulei, group_id); + err = mlxsw_sp_acl_rulei_act_jump(rulei, group_id); + if (err) + return err; } else if (is_tcf_mirred_egress_redirect(a)) { int ifindex = tcf_mirred_ifindex(a); struct net_device *out_dev; -- cgit v1.2.3 From 3b8e9238a8d194e82f0202a5fb68a63686ebe420 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Mon, 25 Sep 2017 10:58:21 +0200 Subject: net: sched: introduce helper to identify gact pass action Introduce a helper called is_tcf_gact_pass which could be used to tell if the action is gact pass or not. Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- include/net/tc_act/tc_gact.h | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/include/net/tc_act/tc_gact.h b/include/net/tc_act/tc_gact.h index 41afe1ce7b16..d979a0d48f9e 100644 --- a/include/net/tc_act/tc_gact.h +++ b/include/net/tc_act/tc_gact.h @@ -33,6 +33,11 @@ static inline bool __is_tcf_gact_act(const struct tc_action *a, int act, return false; } +static inline bool is_tcf_gact_ok(const struct tc_action *a) +{ + return __is_tcf_gact_act(a, TC_ACT_OK, false); +} + static inline bool is_tcf_gact_shot(const struct tc_action *a) { return __is_tcf_gact_act(a, TC_ACT_SHOT, false); -- cgit v1.2.3 From b2925957ec1a9349c6ac42fc5ac95bcf0dd4a6a0 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Mon, 25 Sep 2017 10:58:22 +0200 Subject: mlxsw: spectrum_flower: Offload "ok" termination action If action is "gact_ok", offload it to HW. Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlxsw/spectrum_flower.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_flower.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_flower.c index f1cedccb58cc..2f0e57857ea4 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_flower.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_flower.c @@ -63,7 +63,11 @@ static int mlxsw_sp_flower_parse_actions(struct mlxsw_sp *mlxsw_sp, tcf_exts_to_list(exts, &actions); list_for_each_entry(a, &actions, list) { - if (is_tcf_gact_shot(a)) { + if (is_tcf_gact_ok(a)) { + err = mlxsw_sp_acl_rulei_act_continue(rulei); + if (err) + return err; + } else if (is_tcf_gact_shot(a)) { err = mlxsw_sp_acl_rulei_act_drop(rulei); if (err) return err; -- cgit v1.2.3 From 79ede4ae2d01b0282bfaaf761308a5ac485c8144 Mon Sep 17 00:00:00 2001 From: John Hurley Date: Mon, 25 Sep 2017 12:23:35 +0200 Subject: nfp: add helper to get flower cmsg length Add a helper function that returns the length of the cmsg data when given the cmsg skb Signed-off-by: John Hurley Signed-off-by: Simon Horman Signed-off-by: David S. Miller --- drivers/net/ethernet/netronome/nfp/flower/cmsg.h | 5 +++++ drivers/net/ethernet/netronome/nfp/flower/metadata.c | 2 +- 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/netronome/nfp/flower/cmsg.h b/drivers/net/ethernet/netronome/nfp/flower/cmsg.h index a2ec60344236..7a5ccf0cc7c2 100644 --- a/drivers/net/ethernet/netronome/nfp/flower/cmsg.h +++ b/drivers/net/ethernet/netronome/nfp/flower/cmsg.h @@ -323,6 +323,11 @@ static inline void *nfp_flower_cmsg_get_data(struct sk_buff *skb) return (unsigned char *)skb->data + NFP_FLOWER_CMSG_HLEN; } +static inline int nfp_flower_cmsg_get_data_len(struct sk_buff *skb) +{ + return skb->len - NFP_FLOWER_CMSG_HLEN; +} + struct sk_buff * nfp_flower_cmsg_mac_repr_start(struct nfp_app *app, unsigned int num_ports); void diff --git a/drivers/net/ethernet/netronome/nfp/flower/metadata.c b/drivers/net/ethernet/netronome/nfp/flower/metadata.c index 3226ddc55f99..193520ef23f0 100644 --- a/drivers/net/ethernet/netronome/nfp/flower/metadata.c +++ b/drivers/net/ethernet/netronome/nfp/flower/metadata.c @@ -140,7 +140,7 @@ exit_rcu_unlock: void nfp_flower_rx_flow_stats(struct nfp_app *app, struct sk_buff *skb) { - unsigned int msg_len = skb->len - NFP_FLOWER_CMSG_HLEN; + unsigned int msg_len = nfp_flower_cmsg_get_data_len(skb); struct nfp_fl_stats_frame *stats_frame; unsigned char *msg; int i; -- cgit v1.2.3 From 611aec101ab7c19755e8ea6d480f679aaffed5ad Mon Sep 17 00:00:00 2001 From: John Hurley Date: Mon, 25 Sep 2017 12:23:36 +0200 Subject: nfp: compile flower vxlan tunnel metadata match fields Compile ovs-tc flower vxlan metadata match fields for offloading. Only support offload of tunnel data when the VXLAN port specifically matches well known port 4789. Signed-off-by: John Hurley Signed-off-by: Simon Horman Signed-off-by: David S. Miller --- drivers/net/ethernet/netronome/nfp/flower/cmsg.h | 38 ++++++++++++ drivers/net/ethernet/netronome/nfp/flower/main.h | 2 + drivers/net/ethernet/netronome/nfp/flower/match.c | 60 +++++++++++++++++-- .../net/ethernet/netronome/nfp/flower/offload.c | 70 +++++++++++++++++++--- 4 files changed, 158 insertions(+), 12 deletions(-) diff --git a/drivers/net/ethernet/netronome/nfp/flower/cmsg.h b/drivers/net/ethernet/netronome/nfp/flower/cmsg.h index 7a5ccf0cc7c2..af9165b3b652 100644 --- a/drivers/net/ethernet/netronome/nfp/flower/cmsg.h +++ b/drivers/net/ethernet/netronome/nfp/flower/cmsg.h @@ -83,6 +83,14 @@ #define NFP_FL_PUSH_VLAN_CFI BIT(12) #define NFP_FL_PUSH_VLAN_VID GENMASK(11, 0) +/* Tunnel ports */ +#define NFP_FL_PORT_TYPE_TUN 0x50000000 + +enum nfp_flower_tun_type { + NFP_FL_TUNNEL_NONE = 0, + NFP_FL_TUNNEL_VXLAN = 2, +}; + struct nfp_fl_output { __be16 a_op; __be16 flags; @@ -230,6 +238,36 @@ struct nfp_flower_ipv6 { struct in6_addr ipv6_dst; }; +/* Flow Frame VXLAN --> Tunnel details (4W/16B) + * ----------------------------------------------------------------- + * 3 2 1 + * 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | ipv4_addr_src | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | ipv4_addr_dst | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | tun_flags | tos | ttl | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | gpe_flags | Reserved | Next Protocol | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | VNI | Reserved | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + */ +struct nfp_flower_vxlan { + __be32 ip_src; + __be32 ip_dst; + __be16 tun_flags; + u8 tos; + u8 ttl; + u8 gpe_flags; + u8 reserved[2]; + u8 nxt_proto; + __be32 tun_id; +}; + +#define NFP_FL_TUN_VNI_OFFSET 8 + /* The base header for a control message packet. * Defines an 8-bit version, and an 8-bit type, padded * to a 32-bit word. Rest of the packet is type-specific. diff --git a/drivers/net/ethernet/netronome/nfp/flower/main.h b/drivers/net/ethernet/netronome/nfp/flower/main.h index c20dd00a1cae..cd695eabce02 100644 --- a/drivers/net/ethernet/netronome/nfp/flower/main.h +++ b/drivers/net/ethernet/netronome/nfp/flower/main.h @@ -58,6 +58,8 @@ struct nfp_app; #define NFP_FL_MASK_REUSE_TIME_NS 40000 #define NFP_FL_MASK_ID_LOCATION 1 +#define NFP_FL_VXLAN_PORT 4789 + struct nfp_fl_mask_id { struct circ_buf mask_id_free_list; struct timespec64 *last_used; diff --git a/drivers/net/ethernet/netronome/nfp/flower/match.c b/drivers/net/ethernet/netronome/nfp/flower/match.c index d25b5038c3a2..1fd1bab0611f 100644 --- a/drivers/net/ethernet/netronome/nfp/flower/match.c +++ b/drivers/net/ethernet/netronome/nfp/flower/match.c @@ -77,14 +77,17 @@ nfp_flower_compile_meta(struct nfp_flower_meta_one *frame, u8 key_type) static int nfp_flower_compile_port(struct nfp_flower_in_port *frame, u32 cmsg_port, - bool mask_version) + bool mask_version, enum nfp_flower_tun_type tun_type) { if (mask_version) { frame->in_port = cpu_to_be32(~0); return 0; } - frame->in_port = cpu_to_be32(cmsg_port); + if (tun_type) + frame->in_port = cpu_to_be32(NFP_FL_PORT_TYPE_TUN | tun_type); + else + frame->in_port = cpu_to_be32(cmsg_port); return 0; } @@ -189,15 +192,53 @@ nfp_flower_compile_ipv6(struct nfp_flower_ipv6 *frame, } } +static void +nfp_flower_compile_vxlan(struct nfp_flower_vxlan *frame, + struct tc_cls_flower_offload *flow, + bool mask_version) +{ + struct fl_flow_key *target = mask_version ? flow->mask : flow->key; + struct flow_dissector_key_ipv4_addrs *vxlan_ips; + struct flow_dissector_key_keyid *vni; + + /* Wildcard TOS/TTL/GPE_FLAGS/NXT_PROTO for now. */ + memset(frame, 0, sizeof(struct nfp_flower_vxlan)); + + if (dissector_uses_key(flow->dissector, + FLOW_DISSECTOR_KEY_ENC_KEYID)) { + u32 temp_vni; + + vni = skb_flow_dissector_target(flow->dissector, + FLOW_DISSECTOR_KEY_ENC_KEYID, + target); + temp_vni = be32_to_cpu(vni->keyid) << NFP_FL_TUN_VNI_OFFSET; + frame->tun_id = cpu_to_be32(temp_vni); + } + + if (dissector_uses_key(flow->dissector, + FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS)) { + vxlan_ips = + skb_flow_dissector_target(flow->dissector, + FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS, + target); + frame->ip_src = vxlan_ips->src; + frame->ip_dst = vxlan_ips->dst; + } +} + int nfp_flower_compile_flow_match(struct tc_cls_flower_offload *flow, struct nfp_fl_key_ls *key_ls, struct net_device *netdev, struct nfp_fl_payload *nfp_flow) { + enum nfp_flower_tun_type tun_type = NFP_FL_TUNNEL_NONE; int err; u8 *ext; u8 *msk; + if (key_ls->key_layer & NFP_FLOWER_LAYER_VXLAN) + tun_type = NFP_FL_TUNNEL_VXLAN; + memset(nfp_flow->unmasked_data, 0, key_ls->key_size); memset(nfp_flow->mask_data, 0, key_ls->key_size); @@ -216,14 +257,14 @@ int nfp_flower_compile_flow_match(struct tc_cls_flower_offload *flow, /* Populate Exact Port data. */ err = nfp_flower_compile_port((struct nfp_flower_in_port *)ext, nfp_repr_get_port_id(netdev), - false); + false, tun_type); if (err) return err; /* Populate Mask Port Data. */ err = nfp_flower_compile_port((struct nfp_flower_in_port *)msk, nfp_repr_get_port_id(netdev), - true); + true, tun_type); if (err) return err; @@ -291,5 +332,16 @@ int nfp_flower_compile_flow_match(struct tc_cls_flower_offload *flow, msk += sizeof(struct nfp_flower_ipv6); } + if (key_ls->key_layer & NFP_FLOWER_LAYER_VXLAN) { + /* Populate Exact VXLAN Data. */ + nfp_flower_compile_vxlan((struct nfp_flower_vxlan *)ext, + flow, false); + /* Populate Mask VXLAN Data. */ + nfp_flower_compile_vxlan((struct nfp_flower_vxlan *)msk, + flow, true); + ext += sizeof(struct nfp_flower_vxlan); + msk += sizeof(struct nfp_flower_vxlan); + } + return 0; } diff --git a/drivers/net/ethernet/netronome/nfp/flower/offload.c b/drivers/net/ethernet/netronome/nfp/flower/offload.c index a18b4d2b1d3e..637372ba8f55 100644 --- a/drivers/net/ethernet/netronome/nfp/flower/offload.c +++ b/drivers/net/ethernet/netronome/nfp/flower/offload.c @@ -52,8 +52,25 @@ BIT(FLOW_DISSECTOR_KEY_PORTS) | \ BIT(FLOW_DISSECTOR_KEY_ETH_ADDRS) | \ BIT(FLOW_DISSECTOR_KEY_VLAN) | \ + BIT(FLOW_DISSECTOR_KEY_ENC_KEYID) | \ + BIT(FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS) | \ + BIT(FLOW_DISSECTOR_KEY_ENC_IPV6_ADDRS) | \ + BIT(FLOW_DISSECTOR_KEY_ENC_CONTROL) | \ + BIT(FLOW_DISSECTOR_KEY_ENC_PORTS) | \ BIT(FLOW_DISSECTOR_KEY_IP)) +#define NFP_FLOWER_WHITELIST_TUN_DISSECTOR \ + (BIT(FLOW_DISSECTOR_KEY_ENC_CONTROL) | \ + BIT(FLOW_DISSECTOR_KEY_ENC_KEYID) | \ + BIT(FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS) | \ + BIT(FLOW_DISSECTOR_KEY_ENC_IPV6_ADDRS) | \ + BIT(FLOW_DISSECTOR_KEY_ENC_PORTS)) + +#define NFP_FLOWER_WHITELIST_TUN_DISSECTOR_R \ + (BIT(FLOW_DISSECTOR_KEY_ENC_CONTROL) | \ + BIT(FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS) | \ + BIT(FLOW_DISSECTOR_KEY_ENC_PORTS)) + static int nfp_flower_xmit_flow(struct net_device *netdev, struct nfp_fl_payload *nfp_flow, u8 mtype) @@ -125,15 +142,58 @@ nfp_flower_calculate_key_layers(struct nfp_fl_key_ls *ret_key_ls, if (flow->dissector->used_keys & ~NFP_FLOWER_WHITELIST_DISSECTOR) return -EOPNOTSUPP; + /* If any tun dissector is used then the required set must be used. */ + if (flow->dissector->used_keys & NFP_FLOWER_WHITELIST_TUN_DISSECTOR && + (flow->dissector->used_keys & NFP_FLOWER_WHITELIST_TUN_DISSECTOR_R) + != NFP_FLOWER_WHITELIST_TUN_DISSECTOR_R) + return -EOPNOTSUPP; + + key_layer_two = 0; + key_layer = NFP_FLOWER_LAYER_PORT | NFP_FLOWER_LAYER_MAC; + key_size = sizeof(struct nfp_flower_meta_one) + + sizeof(struct nfp_flower_in_port) + + sizeof(struct nfp_flower_mac_mpls); + if (dissector_uses_key(flow->dissector, FLOW_DISSECTOR_KEY_ENC_CONTROL)) { + struct flow_dissector_key_ipv4_addrs *mask_ipv4 = NULL; + struct flow_dissector_key_ports *mask_enc_ports = NULL; + struct flow_dissector_key_ports *enc_ports = NULL; struct flow_dissector_key_control *mask_enc_ctl = skb_flow_dissector_target(flow->dissector, FLOW_DISSECTOR_KEY_ENC_CONTROL, flow->mask); - /* We are expecting a tunnel. For now we ignore offloading. */ - if (mask_enc_ctl->addr_type) + struct flow_dissector_key_control *enc_ctl = + skb_flow_dissector_target(flow->dissector, + FLOW_DISSECTOR_KEY_ENC_CONTROL, + flow->key); + if (mask_enc_ctl->addr_type != 0xffff || + enc_ctl->addr_type != FLOW_DISSECTOR_KEY_IPV4_ADDRS) return -EOPNOTSUPP; + + /* These fields are already verified as used. */ + mask_ipv4 = + skb_flow_dissector_target(flow->dissector, + FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS, + flow->mask); + if (mask_ipv4->dst != cpu_to_be32(~0)) + return -EOPNOTSUPP; + + mask_enc_ports = + skb_flow_dissector_target(flow->dissector, + FLOW_DISSECTOR_KEY_ENC_PORTS, + flow->mask); + enc_ports = + skb_flow_dissector_target(flow->dissector, + FLOW_DISSECTOR_KEY_ENC_PORTS, + flow->key); + + if (mask_enc_ports->dst != cpu_to_be16(~0) || + enc_ports->dst != htons(NFP_FL_VXLAN_PORT)) + return -EOPNOTSUPP; + + key_layer |= NFP_FLOWER_LAYER_VXLAN; + key_size += sizeof(struct nfp_flower_vxlan); } if (dissector_uses_key(flow->dissector, FLOW_DISSECTOR_KEY_BASIC)) { @@ -151,12 +211,6 @@ nfp_flower_calculate_key_layers(struct nfp_fl_key_ls *ret_key_ls, FLOW_DISSECTOR_KEY_IP, flow->mask); - key_layer_two = 0; - key_layer = NFP_FLOWER_LAYER_PORT | NFP_FLOWER_LAYER_MAC; - key_size = sizeof(struct nfp_flower_meta_one) + - sizeof(struct nfp_flower_in_port) + - sizeof(struct nfp_flower_mac_mpls); - if (mask_basic && mask_basic->n_proto) { /* Ethernet type is present in the key. */ switch (key_basic->n_proto) { -- cgit v1.2.3 From b27d6a95a70de551df828de2b658efd949a9864e Mon Sep 17 00:00:00 2001 From: John Hurley Date: Mon, 25 Sep 2017 12:23:37 +0200 Subject: nfp: compile flower vxlan tunnel set actions Compile set tunnel actions for tc flower. Only support VXLAN and ensure a tunnel destination port of 4789 is used. Signed-off-by: John Hurley Signed-off-by: Simon Horman Signed-off-by: David S. Miller --- drivers/net/ethernet/netronome/nfp/flower/action.c | 169 ++++++++++++++++++--- drivers/net/ethernet/netronome/nfp/flower/cmsg.h | 31 +++- 2 files changed, 179 insertions(+), 21 deletions(-) diff --git a/drivers/net/ethernet/netronome/nfp/flower/action.c b/drivers/net/ethernet/netronome/nfp/flower/action.c index db9750695dc7..38f3835ae176 100644 --- a/drivers/net/ethernet/netronome/nfp/flower/action.c +++ b/drivers/net/ethernet/netronome/nfp/flower/action.c @@ -37,6 +37,7 @@ #include #include #include +#include #include "cmsg.h" #include "main.h" @@ -80,14 +81,27 @@ nfp_fl_push_vlan(struct nfp_fl_push_vlan *push_vlan, push_vlan->vlan_tci = cpu_to_be16(tmp_push_vlan_tci); } +static bool nfp_fl_netdev_is_tunnel_type(struct net_device *out_dev, + enum nfp_flower_tun_type tun_type) +{ + if (!out_dev->rtnl_link_ops) + return false; + + if (!strcmp(out_dev->rtnl_link_ops->kind, "vxlan")) + return tun_type == NFP_FL_TUNNEL_VXLAN; + + return false; +} + static int nfp_fl_output(struct nfp_fl_output *output, const struct tc_action *action, struct nfp_fl_payload *nfp_flow, bool last, - struct net_device *in_dev) + struct net_device *in_dev, enum nfp_flower_tun_type tun_type, + int *tun_out_cnt) { size_t act_size = sizeof(struct nfp_fl_output); + u16 tmp_output_op, tmp_flags; struct net_device *out_dev; - u16 tmp_output_op; int ifindex; /* Set action opcode to output action. */ @@ -97,25 +111,114 @@ nfp_fl_output(struct nfp_fl_output *output, const struct tc_action *action, output->a_op = cpu_to_be16(tmp_output_op); - /* Set action output parameters. */ - output->flags = cpu_to_be16(last ? NFP_FL_OUT_FLAGS_LAST : 0); - ifindex = tcf_mirred_ifindex(action); out_dev = __dev_get_by_index(dev_net(in_dev), ifindex); if (!out_dev) return -EOPNOTSUPP; - /* Only offload egress ports are on the same device as the ingress - * port. + tmp_flags = last ? NFP_FL_OUT_FLAGS_LAST : 0; + + if (tun_type) { + /* Verify the egress netdev matches the tunnel type. */ + if (!nfp_fl_netdev_is_tunnel_type(out_dev, tun_type)) + return -EOPNOTSUPP; + + if (*tun_out_cnt) + return -EOPNOTSUPP; + (*tun_out_cnt)++; + + output->flags = cpu_to_be16(tmp_flags | + NFP_FL_OUT_FLAGS_USE_TUN); + output->port = cpu_to_be32(NFP_FL_PORT_TYPE_TUN | tun_type); + } else { + /* Set action output parameters. */ + output->flags = cpu_to_be16(tmp_flags); + + /* Only offload if egress ports are on the same device as the + * ingress port. + */ + if (!switchdev_port_same_parent_id(in_dev, out_dev)) + return -EOPNOTSUPP; + + output->port = cpu_to_be32(nfp_repr_get_port_id(out_dev)); + if (!output->port) + return -EOPNOTSUPP; + } + nfp_flow->meta.shortcut = output->port; + + return 0; +} + +static bool nfp_fl_supported_tun_port(const struct tc_action *action) +{ + struct ip_tunnel_info *tun = tcf_tunnel_info(action); + + return tun->key.tp_dst == htons(NFP_FL_VXLAN_PORT); +} + +static struct nfp_fl_pre_tunnel *nfp_fl_pre_tunnel(char *act_data, int act_len) +{ + size_t act_size = sizeof(struct nfp_fl_pre_tunnel); + struct nfp_fl_pre_tunnel *pre_tun_act; + u16 tmp_pre_tun_op; + + /* Pre_tunnel action must be first on action list. + * If other actions already exist they need pushed forward. */ - if (!switchdev_port_same_parent_id(in_dev, out_dev)) - return -EOPNOTSUPP; + if (act_len) + memmove(act_data + act_size, act_data, act_len); + + pre_tun_act = (struct nfp_fl_pre_tunnel *)act_data; + + memset(pre_tun_act, 0, act_size); + + tmp_pre_tun_op = + FIELD_PREP(NFP_FL_ACT_LEN_LW, act_size >> NFP_FL_LW_SIZ) | + FIELD_PREP(NFP_FL_ACT_JMP_ID, NFP_FL_ACTION_OPCODE_PRE_TUNNEL); + + pre_tun_act->a_op = cpu_to_be16(tmp_pre_tun_op); - output->port = cpu_to_be32(nfp_repr_get_port_id(out_dev)); - if (!output->port) + return pre_tun_act; +} + +static int +nfp_fl_set_vxlan(struct nfp_fl_set_vxlan *set_vxlan, + const struct tc_action *action, + struct nfp_fl_pre_tunnel *pre_tun) +{ + struct ip_tunnel_info *vxlan = tcf_tunnel_info(action); + size_t act_size = sizeof(struct nfp_fl_set_vxlan); + u32 tmp_set_vxlan_type_index = 0; + u16 tmp_set_vxlan_op; + /* Currently support one pre-tunnel so index is always 0. */ + int pretun_idx = 0; + + if (vxlan->options_len) { + /* Do not support options e.g. vxlan gpe. */ return -EOPNOTSUPP; + } - nfp_flow->meta.shortcut = output->port; + tmp_set_vxlan_op = + FIELD_PREP(NFP_FL_ACT_LEN_LW, act_size >> NFP_FL_LW_SIZ) | + FIELD_PREP(NFP_FL_ACT_JMP_ID, + NFP_FL_ACTION_OPCODE_SET_IPV4_TUNNEL); + + set_vxlan->a_op = cpu_to_be16(tmp_set_vxlan_op); + + /* Set tunnel type and pre-tunnel index. */ + tmp_set_vxlan_type_index |= + FIELD_PREP(NFP_FL_IPV4_TUNNEL_TYPE, NFP_FL_TUNNEL_VXLAN) | + FIELD_PREP(NFP_FL_IPV4_PRE_TUN_INDEX, pretun_idx); + + set_vxlan->tun_type_index = cpu_to_be32(tmp_set_vxlan_type_index); + + set_vxlan->tun_id = vxlan->key.tun_id; + set_vxlan->tun_flags = vxlan->key.tun_flags; + set_vxlan->ipv4_ttl = vxlan->key.ttl; + set_vxlan->ipv4_tos = vxlan->key.tos; + + /* Complete pre_tunnel action. */ + pre_tun->ipv4_dst = vxlan->key.u.ipv4.dst; return 0; } @@ -123,8 +226,11 @@ nfp_fl_output(struct nfp_fl_output *output, const struct tc_action *action, static int nfp_flower_loop_action(const struct tc_action *a, struct nfp_fl_payload *nfp_fl, int *a_len, - struct net_device *netdev) + struct net_device *netdev, + enum nfp_flower_tun_type *tun_type, int *tun_out_cnt) { + struct nfp_fl_pre_tunnel *pre_tun; + struct nfp_fl_set_vxlan *s_vxl; struct nfp_fl_push_vlan *psh_v; struct nfp_fl_pop_vlan *pop_v; struct nfp_fl_output *output; @@ -137,7 +243,8 @@ nfp_flower_loop_action(const struct tc_action *a, return -EOPNOTSUPP; output = (struct nfp_fl_output *)&nfp_fl->action_data[*a_len]; - err = nfp_fl_output(output, a, nfp_fl, true, netdev); + err = nfp_fl_output(output, a, nfp_fl, true, netdev, *tun_type, + tun_out_cnt); if (err) return err; @@ -147,7 +254,8 @@ nfp_flower_loop_action(const struct tc_action *a, return -EOPNOTSUPP; output = (struct nfp_fl_output *)&nfp_fl->action_data[*a_len]; - err = nfp_fl_output(output, a, nfp_fl, false, netdev); + err = nfp_fl_output(output, a, nfp_fl, false, netdev, *tun_type, + tun_out_cnt); if (err) return err; @@ -170,6 +278,29 @@ nfp_flower_loop_action(const struct tc_action *a, nfp_fl_push_vlan(psh_v, a); *a_len += sizeof(struct nfp_fl_push_vlan); + } else if (is_tcf_tunnel_set(a) && nfp_fl_supported_tun_port(a)) { + /* Pre-tunnel action is required for tunnel encap. + * This checks for next hop entries on NFP. + * If none, the packet falls back before applying other actions. + */ + if (*a_len + sizeof(struct nfp_fl_pre_tunnel) + + sizeof(struct nfp_fl_set_vxlan) > NFP_FL_MAX_A_SIZ) + return -EOPNOTSUPP; + + *tun_type = NFP_FL_TUNNEL_VXLAN; + pre_tun = nfp_fl_pre_tunnel(nfp_fl->action_data, *a_len); + nfp_fl->meta.shortcut = cpu_to_be32(NFP_FL_SC_ACT_NULL); + *a_len += sizeof(struct nfp_fl_pre_tunnel); + + s_vxl = (struct nfp_fl_set_vxlan *)&nfp_fl->action_data[*a_len]; + err = nfp_fl_set_vxlan(s_vxl, a, pre_tun); + if (err) + return err; + + *a_len += sizeof(struct nfp_fl_set_vxlan); + } else if (is_tcf_tunnel_release(a)) { + /* Tunnel decap is handled by default so accept action. */ + return 0; } else { /* Currently we do not handle any other actions. */ return -EOPNOTSUPP; @@ -182,18 +313,22 @@ int nfp_flower_compile_action(struct tc_cls_flower_offload *flow, struct net_device *netdev, struct nfp_fl_payload *nfp_flow) { - int act_len, act_cnt, err; + int act_len, act_cnt, err, tun_out_cnt; + enum nfp_flower_tun_type tun_type; const struct tc_action *a; LIST_HEAD(actions); memset(nfp_flow->action_data, 0, NFP_FL_MAX_A_SIZ); nfp_flow->meta.act_len = 0; + tun_type = NFP_FL_TUNNEL_NONE; act_len = 0; act_cnt = 0; + tun_out_cnt = 0; tcf_exts_to_list(flow->exts, &actions); list_for_each_entry(a, &actions, list) { - err = nfp_flower_loop_action(a, nfp_flow, &act_len, netdev); + err = nfp_flower_loop_action(a, nfp_flow, &act_len, netdev, + &tun_type, &tun_out_cnt); if (err) return err; act_cnt++; diff --git a/drivers/net/ethernet/netronome/nfp/flower/cmsg.h b/drivers/net/ethernet/netronome/nfp/flower/cmsg.h index af9165b3b652..ff42ce8a1e9c 100644 --- a/drivers/net/ethernet/netronome/nfp/flower/cmsg.h +++ b/drivers/net/ethernet/netronome/nfp/flower/cmsg.h @@ -67,10 +67,12 @@ #define NFP_FL_LW_SIZ 2 /* Action opcodes */ -#define NFP_FL_ACTION_OPCODE_OUTPUT 0 -#define NFP_FL_ACTION_OPCODE_PUSH_VLAN 1 -#define NFP_FL_ACTION_OPCODE_POP_VLAN 2 -#define NFP_FL_ACTION_OPCODE_NUM 32 +#define NFP_FL_ACTION_OPCODE_OUTPUT 0 +#define NFP_FL_ACTION_OPCODE_PUSH_VLAN 1 +#define NFP_FL_ACTION_OPCODE_POP_VLAN 2 +#define NFP_FL_ACTION_OPCODE_SET_IPV4_TUNNEL 6 +#define NFP_FL_ACTION_OPCODE_PRE_TUNNEL 17 +#define NFP_FL_ACTION_OPCODE_NUM 32 #define NFP_FL_ACT_JMP_ID GENMASK(15, 8) #define NFP_FL_ACT_LEN_LW GENMASK(7, 0) @@ -85,6 +87,8 @@ /* Tunnel ports */ #define NFP_FL_PORT_TYPE_TUN 0x50000000 +#define NFP_FL_IPV4_TUNNEL_TYPE GENMASK(7, 4) +#define NFP_FL_IPV4_PRE_TUN_INDEX GENMASK(2, 0) enum nfp_flower_tun_type { NFP_FL_TUNNEL_NONE = 0, @@ -123,6 +127,25 @@ struct nfp_flower_meta_one { u16 reserved; }; +struct nfp_fl_pre_tunnel { + __be16 a_op; + __be16 reserved; + __be32 ipv4_dst; + /* reserved for use with IPv6 addresses */ + __be32 extra[3]; +}; + +struct nfp_fl_set_vxlan { + __be16 a_op; + __be16 reserved; + __be64 tun_id; + __be32 tun_type_index; + __be16 tun_flags; + u8 ipv4_ttl; + u8 ipv4_tos; + __be32 extra[2]; +} __packed; + /* Metadata with L2 (1W/4B) * ---------------------------------------------------------------- * 3 2 1 -- cgit v1.2.3 From fd0dd1ab1e107369c950796bb9b0e8eab6134bf1 Mon Sep 17 00:00:00 2001 From: John Hurley Date: Mon, 25 Sep 2017 12:23:38 +0200 Subject: nfp: offload flower vxlan endpoint MAC addresses Generate a list of MAC addresses of netdevs that could be used as VXLAN tunnel end points. Give offloaded MACs an index for storage on the NFP in the ranges: 0x100-0x1ff physical port representors 0x200-0x2ff VF port representors 0x300-0x3ff other offloads (e.g. vxlan netdevs, ovs bridges) Assign phys and vf indexes based on unique 8 bit values in the port num. Maintain list of other netdevs to ensure same netdev is not offloaded twice and each gets a unique ID without exhausting the entries. Because the IDs are unique but constant for a netdev, any changes are implemented by overwriting the index on NFP. Signed-off-by: John Hurley Signed-off-by: Simon Horman Signed-off-by: David S. Miller --- drivers/net/ethernet/netronome/nfp/Makefile | 3 +- drivers/net/ethernet/netronome/nfp/flower/cmsg.c | 7 - drivers/net/ethernet/netronome/nfp/flower/cmsg.h | 9 + drivers/net/ethernet/netronome/nfp/flower/main.c | 13 + drivers/net/ethernet/netronome/nfp/flower/main.h | 18 + drivers/net/ethernet/netronome/nfp/flower/match.c | 7 + .../ethernet/netronome/nfp/flower/tunnel_conf.c | 374 +++++++++++++++++++++ 7 files changed, 423 insertions(+), 8 deletions(-) create mode 100644 drivers/net/ethernet/netronome/nfp/flower/tunnel_conf.c diff --git a/drivers/net/ethernet/netronome/nfp/Makefile b/drivers/net/ethernet/netronome/nfp/Makefile index 96e579a15cbe..becaacf1554d 100644 --- a/drivers/net/ethernet/netronome/nfp/Makefile +++ b/drivers/net/ethernet/netronome/nfp/Makefile @@ -37,7 +37,8 @@ nfp-objs += \ flower/main.o \ flower/match.o \ flower/metadata.o \ - flower/offload.o + flower/offload.o \ + flower/tunnel_conf.o endif ifeq ($(CONFIG_BPF_SYSCALL),y) diff --git a/drivers/net/ethernet/netronome/nfp/flower/cmsg.c b/drivers/net/ethernet/netronome/nfp/flower/cmsg.c index c3ca05d10fe1..b756006dba6f 100644 --- a/drivers/net/ethernet/netronome/nfp/flower/cmsg.c +++ b/drivers/net/ethernet/netronome/nfp/flower/cmsg.c @@ -38,17 +38,10 @@ #include #include "main.h" -#include "../nfpcore/nfp_cpp.h" #include "../nfp_net.h" #include "../nfp_net_repr.h" #include "./cmsg.h" -#define nfp_flower_cmsg_warn(app, fmt, args...) \ - do { \ - if (net_ratelimit()) \ - nfp_warn((app)->cpp, fmt, ## args); \ - } while (0) - static struct nfp_flower_cmsg_hdr * nfp_flower_cmsg_get_hdr(struct sk_buff *skb) { diff --git a/drivers/net/ethernet/netronome/nfp/flower/cmsg.h b/drivers/net/ethernet/netronome/nfp/flower/cmsg.h index ff42ce8a1e9c..dc248193c996 100644 --- a/drivers/net/ethernet/netronome/nfp/flower/cmsg.h +++ b/drivers/net/ethernet/netronome/nfp/flower/cmsg.h @@ -39,6 +39,7 @@ #include #include "../nfp_app.h" +#include "../nfpcore/nfp_cpp.h" #define NFP_FLOWER_LAYER_META BIT(0) #define NFP_FLOWER_LAYER_PORT BIT(1) @@ -90,6 +91,12 @@ #define NFP_FL_IPV4_TUNNEL_TYPE GENMASK(7, 4) #define NFP_FL_IPV4_PRE_TUN_INDEX GENMASK(2, 0) +#define nfp_flower_cmsg_warn(app, fmt, args...) \ + do { \ + if (net_ratelimit()) \ + nfp_warn((app)->cpp, fmt, ## args); \ + } while (0) + enum nfp_flower_tun_type { NFP_FL_TUNNEL_NONE = 0, NFP_FL_TUNNEL_VXLAN = 2, @@ -310,6 +317,7 @@ enum nfp_flower_cmsg_type_port { NFP_FLOWER_CMSG_TYPE_FLOW_DEL = 2, NFP_FLOWER_CMSG_TYPE_MAC_REPR = 7, NFP_FLOWER_CMSG_TYPE_PORT_MOD = 8, + NFP_FLOWER_CMSG_TYPE_TUN_MAC = 11, NFP_FLOWER_CMSG_TYPE_FLOW_STATS = 15, NFP_FLOWER_CMSG_TYPE_PORT_ECHO = 16, NFP_FLOWER_CMSG_TYPE_MAX = 32, @@ -343,6 +351,7 @@ enum nfp_flower_cmsg_port_type { NFP_FLOWER_CMSG_PORT_TYPE_UNSPEC = 0x0, NFP_FLOWER_CMSG_PORT_TYPE_PHYS_PORT = 0x1, NFP_FLOWER_CMSG_PORT_TYPE_PCIE_PORT = 0x2, + NFP_FLOWER_CMSG_PORT_TYPE_OTHER_PORT = 0x3, }; enum nfp_flower_cmsg_port_vnic_type { diff --git a/drivers/net/ethernet/netronome/nfp/flower/main.c b/drivers/net/ethernet/netronome/nfp/flower/main.c index 91fe03617106..e46e7c60d491 100644 --- a/drivers/net/ethernet/netronome/nfp/flower/main.c +++ b/drivers/net/ethernet/netronome/nfp/flower/main.c @@ -436,6 +436,16 @@ static void nfp_flower_clean(struct nfp_app *app) app->priv = NULL; } +static int nfp_flower_start(struct nfp_app *app) +{ + return nfp_tunnel_config_start(app); +} + +static void nfp_flower_stop(struct nfp_app *app) +{ + nfp_tunnel_config_stop(app); +} + const struct nfp_app_type app_flower = { .id = NFP_APP_FLOWER_NIC, .name = "flower", @@ -453,6 +463,9 @@ const struct nfp_app_type app_flower = { .repr_open = nfp_flower_repr_netdev_open, .repr_stop = nfp_flower_repr_netdev_stop, + .start = nfp_flower_start, + .stop = nfp_flower_stop, + .ctrl_msg_rx = nfp_flower_cmsg_rx, .sriov_enable = nfp_flower_sriov_enable, diff --git a/drivers/net/ethernet/netronome/nfp/flower/main.h b/drivers/net/ethernet/netronome/nfp/flower/main.h index cd695eabce02..9de375acc254 100644 --- a/drivers/net/ethernet/netronome/nfp/flower/main.h +++ b/drivers/net/ethernet/netronome/nfp/flower/main.h @@ -84,6 +84,13 @@ struct nfp_fl_stats_id { * @flow_table: Hash table used to store flower rules * @cmsg_work: Workqueue for control messages processing * @cmsg_skbs: List of skbs for control message processing + * @nfp_mac_off_list: List of MAC addresses to offload + * @nfp_mac_index_list: List of unique 8-bit indexes for non NFP netdevs + * @nfp_mac_off_lock: Lock for the MAC address list + * @nfp_mac_index_lock: Lock for the MAC index list + * @nfp_mac_off_ids: IDA to manage id assignment for offloaded macs + * @nfp_mac_off_count: Number of MACs in address list + * @nfp_tun_mac_nb: Notifier to monitor link state */ struct nfp_flower_priv { struct nfp_app *app; @@ -96,6 +103,13 @@ struct nfp_flower_priv { DECLARE_HASHTABLE(flow_table, NFP_FLOWER_HASH_BITS); struct work_struct cmsg_work; struct sk_buff_head cmsg_skbs; + struct list_head nfp_mac_off_list; + struct list_head nfp_mac_index_list; + struct mutex nfp_mac_off_lock; + struct mutex nfp_mac_index_lock; + struct ida nfp_mac_off_ids; + int nfp_mac_off_count; + struct notifier_block nfp_tun_mac_nb; }; struct nfp_fl_key_ls { @@ -165,4 +179,8 @@ nfp_flower_remove_fl_table(struct nfp_app *app, unsigned long tc_flower_cookie); void nfp_flower_rx_flow_stats(struct nfp_app *app, struct sk_buff *skb); +int nfp_tunnel_config_start(struct nfp_app *app); +void nfp_tunnel_config_stop(struct nfp_app *app); +void nfp_tunnel_write_macs(struct nfp_app *app); + #endif diff --git a/drivers/net/ethernet/netronome/nfp/flower/match.c b/drivers/net/ethernet/netronome/nfp/flower/match.c index 1fd1bab0611f..cb3ff6c126e8 100644 --- a/drivers/net/ethernet/netronome/nfp/flower/match.c +++ b/drivers/net/ethernet/netronome/nfp/flower/match.c @@ -232,6 +232,7 @@ int nfp_flower_compile_flow_match(struct tc_cls_flower_offload *flow, struct nfp_fl_payload *nfp_flow) { enum nfp_flower_tun_type tun_type = NFP_FL_TUNNEL_NONE; + struct nfp_repr *netdev_repr; int err; u8 *ext; u8 *msk; @@ -341,6 +342,12 @@ int nfp_flower_compile_flow_match(struct tc_cls_flower_offload *flow, flow, true); ext += sizeof(struct nfp_flower_vxlan); msk += sizeof(struct nfp_flower_vxlan); + + /* Configure tunnel end point MAC. */ + if (nfp_netdev_is_nfp_repr(netdev)) { + netdev_repr = netdev_priv(netdev); + nfp_tunnel_write_macs(netdev_repr->app); + } } return 0; diff --git a/drivers/net/ethernet/netronome/nfp/flower/tunnel_conf.c b/drivers/net/ethernet/netronome/nfp/flower/tunnel_conf.c new file mode 100644 index 000000000000..34be85803020 --- /dev/null +++ b/drivers/net/ethernet/netronome/nfp/flower/tunnel_conf.c @@ -0,0 +1,374 @@ +/* + * Copyright (C) 2017 Netronome Systems, Inc. + * + * This software is dual licensed under the GNU General License Version 2, + * June 1991 as shown in the file COPYING in the top-level directory of this + * source tree or the BSD 2-Clause License provided below. You have the + * option to license this software under the complete terms of either license. + * + * The BSD 2-Clause License: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * 1. Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * 2. Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include +#include +#include + +#include "cmsg.h" +#include "main.h" +#include "../nfp_net_repr.h" +#include "../nfp_net.h" + +/** + * struct nfp_tun_mac_addr - configure MAC address of tunnel EP on NFP + * @reserved: reserved for future use + * @count: number of MAC addresses in the message + * @index: index of MAC address in the lookup table + * @addr: interface MAC address + * @addresses: series of MACs to offload + */ +struct nfp_tun_mac_addr { + __be16 reserved; + __be16 count; + struct index_mac_addr { + __be16 index; + u8 addr[ETH_ALEN]; + } addresses[]; +}; + +/** + * struct nfp_tun_mac_offload_entry - list of MACs to offload + * @index: index of MAC address for offloading + * @addr: interface MAC address + * @list: list pointer + */ +struct nfp_tun_mac_offload_entry { + __be16 index; + u8 addr[ETH_ALEN]; + struct list_head list; +}; + +#define NFP_MAX_MAC_INDEX 0xff + +/** + * struct nfp_tun_mac_non_nfp_idx - converts non NFP netdev ifindex to 8-bit id + * @ifindex: netdev ifindex of the device + * @index: index of netdevs mac on NFP + * @list: list pointer + */ +struct nfp_tun_mac_non_nfp_idx { + int ifindex; + u8 index; + struct list_head list; +}; + +static bool nfp_tun_is_netdev_to_offload(struct net_device *netdev) +{ + if (!netdev->rtnl_link_ops) + return false; + if (!strcmp(netdev->rtnl_link_ops->kind, "openvswitch")) + return true; + if (!strcmp(netdev->rtnl_link_ops->kind, "vxlan")) + return true; + + return false; +} + +static int +nfp_flower_xmit_tun_conf(struct nfp_app *app, u8 mtype, u16 plen, void *pdata) +{ + struct sk_buff *skb; + unsigned char *msg; + + skb = nfp_flower_cmsg_alloc(app, plen, mtype); + if (!skb) + return -ENOMEM; + + msg = nfp_flower_cmsg_get_data(skb); + memcpy(msg, pdata, nfp_flower_cmsg_get_data_len(skb)); + + nfp_ctrl_tx(app->ctrl, skb); + return 0; +} + +void nfp_tunnel_write_macs(struct nfp_app *app) +{ + struct nfp_flower_priv *priv = app->priv; + struct nfp_tun_mac_offload_entry *entry; + struct nfp_tun_mac_addr *payload; + struct list_head *ptr, *storage; + int mac_count, err, pay_size; + + mutex_lock(&priv->nfp_mac_off_lock); + if (!priv->nfp_mac_off_count) { + mutex_unlock(&priv->nfp_mac_off_lock); + return; + } + + pay_size = sizeof(struct nfp_tun_mac_addr) + + sizeof(struct index_mac_addr) * priv->nfp_mac_off_count; + + payload = kzalloc(pay_size, GFP_KERNEL); + if (!payload) { + mutex_unlock(&priv->nfp_mac_off_lock); + return; + } + + payload->count = cpu_to_be16(priv->nfp_mac_off_count); + + mac_count = 0; + list_for_each_safe(ptr, storage, &priv->nfp_mac_off_list) { + entry = list_entry(ptr, struct nfp_tun_mac_offload_entry, + list); + payload->addresses[mac_count].index = entry->index; + ether_addr_copy(payload->addresses[mac_count].addr, + entry->addr); + mac_count++; + } + + err = nfp_flower_xmit_tun_conf(app, NFP_FLOWER_CMSG_TYPE_TUN_MAC, + pay_size, payload); + + kfree(payload); + + if (err) { + mutex_unlock(&priv->nfp_mac_off_lock); + /* Write failed so retain list for future retry. */ + return; + } + + /* If list was successfully offloaded, flush it. */ + list_for_each_safe(ptr, storage, &priv->nfp_mac_off_list) { + entry = list_entry(ptr, struct nfp_tun_mac_offload_entry, + list); + list_del(&entry->list); + kfree(entry); + } + + priv->nfp_mac_off_count = 0; + mutex_unlock(&priv->nfp_mac_off_lock); +} + +static int nfp_tun_get_mac_idx(struct nfp_app *app, int ifindex) +{ + struct nfp_flower_priv *priv = app->priv; + struct nfp_tun_mac_non_nfp_idx *entry; + struct list_head *ptr, *storage; + int idx; + + mutex_lock(&priv->nfp_mac_index_lock); + list_for_each_safe(ptr, storage, &priv->nfp_mac_index_list) { + entry = list_entry(ptr, struct nfp_tun_mac_non_nfp_idx, list); + if (entry->ifindex == ifindex) { + idx = entry->index; + mutex_unlock(&priv->nfp_mac_index_lock); + return idx; + } + } + + idx = ida_simple_get(&priv->nfp_mac_off_ids, 0, + NFP_MAX_MAC_INDEX, GFP_KERNEL); + if (idx < 0) { + mutex_unlock(&priv->nfp_mac_index_lock); + return idx; + } + + entry = kmalloc(sizeof(*entry), GFP_KERNEL); + if (!entry) { + mutex_unlock(&priv->nfp_mac_index_lock); + return -ENOMEM; + } + entry->ifindex = ifindex; + entry->index = idx; + list_add_tail(&entry->list, &priv->nfp_mac_index_list); + mutex_unlock(&priv->nfp_mac_index_lock); + + return idx; +} + +static void nfp_tun_del_mac_idx(struct nfp_app *app, int ifindex) +{ + struct nfp_flower_priv *priv = app->priv; + struct nfp_tun_mac_non_nfp_idx *entry; + struct list_head *ptr, *storage; + + mutex_lock(&priv->nfp_mac_index_lock); + list_for_each_safe(ptr, storage, &priv->nfp_mac_index_list) { + entry = list_entry(ptr, struct nfp_tun_mac_non_nfp_idx, list); + if (entry->ifindex == ifindex) { + ida_simple_remove(&priv->nfp_mac_off_ids, + entry->index); + list_del(&entry->list); + kfree(entry); + break; + } + } + mutex_unlock(&priv->nfp_mac_index_lock); +} + +static void nfp_tun_add_to_mac_offload_list(struct net_device *netdev, + struct nfp_app *app) +{ + struct nfp_flower_priv *priv = app->priv; + struct nfp_tun_mac_offload_entry *entry; + u16 nfp_mac_idx; + int port = 0; + + /* Check if MAC should be offloaded. */ + if (!is_valid_ether_addr(netdev->dev_addr)) + return; + + if (nfp_netdev_is_nfp_repr(netdev)) + port = nfp_repr_get_port_id(netdev); + else if (!nfp_tun_is_netdev_to_offload(netdev)) + return; + + entry = kmalloc(sizeof(*entry), GFP_KERNEL); + if (!entry) { + nfp_flower_cmsg_warn(app, "Mem fail when offloading MAC.\n"); + return; + } + + if (FIELD_GET(NFP_FLOWER_CMSG_PORT_TYPE, port) == + NFP_FLOWER_CMSG_PORT_TYPE_PHYS_PORT) { + nfp_mac_idx = port << 8 | NFP_FLOWER_CMSG_PORT_TYPE_PHYS_PORT; + } else if (FIELD_GET(NFP_FLOWER_CMSG_PORT_TYPE, port) == + NFP_FLOWER_CMSG_PORT_TYPE_PCIE_PORT) { + port = FIELD_GET(NFP_FLOWER_CMSG_PORT_VNIC, port); + nfp_mac_idx = port << 8 | NFP_FLOWER_CMSG_PORT_TYPE_PCIE_PORT; + } else { + /* Must assign our own unique 8-bit index. */ + int idx = nfp_tun_get_mac_idx(app, netdev->ifindex); + + if (idx < 0) { + nfp_flower_cmsg_warn(app, "Can't assign non-repr MAC index.\n"); + kfree(entry); + return; + } + nfp_mac_idx = idx << 8 | NFP_FLOWER_CMSG_PORT_TYPE_OTHER_PORT; + } + + entry->index = cpu_to_be16(nfp_mac_idx); + ether_addr_copy(entry->addr, netdev->dev_addr); + + mutex_lock(&priv->nfp_mac_off_lock); + priv->nfp_mac_off_count++; + list_add_tail(&entry->list, &priv->nfp_mac_off_list); + mutex_unlock(&priv->nfp_mac_off_lock); +} + +static int nfp_tun_mac_event_handler(struct notifier_block *nb, + unsigned long event, void *ptr) +{ + struct nfp_flower_priv *app_priv; + struct net_device *netdev; + struct nfp_app *app; + + if (event == NETDEV_DOWN || event == NETDEV_UNREGISTER) { + app_priv = container_of(nb, struct nfp_flower_priv, + nfp_tun_mac_nb); + app = app_priv->app; + netdev = netdev_notifier_info_to_dev(ptr); + + /* If non-nfp netdev then free its offload index. */ + if (nfp_tun_is_netdev_to_offload(netdev)) + nfp_tun_del_mac_idx(app, netdev->ifindex); + } else if (event == NETDEV_UP || event == NETDEV_CHANGEADDR || + event == NETDEV_REGISTER) { + app_priv = container_of(nb, struct nfp_flower_priv, + nfp_tun_mac_nb); + app = app_priv->app; + netdev = netdev_notifier_info_to_dev(ptr); + + nfp_tun_add_to_mac_offload_list(netdev, app); + + /* Force a list write to keep NFP up to date. */ + nfp_tunnel_write_macs(app); + } + return NOTIFY_OK; +} + +int nfp_tunnel_config_start(struct nfp_app *app) +{ + struct nfp_flower_priv *priv = app->priv; + struct net_device *netdev; + int err; + + /* Initialise priv data for MAC offloading. */ + priv->nfp_mac_off_count = 0; + mutex_init(&priv->nfp_mac_off_lock); + INIT_LIST_HEAD(&priv->nfp_mac_off_list); + priv->nfp_tun_mac_nb.notifier_call = nfp_tun_mac_event_handler; + mutex_init(&priv->nfp_mac_index_lock); + INIT_LIST_HEAD(&priv->nfp_mac_index_list); + ida_init(&priv->nfp_mac_off_ids); + + err = register_netdevice_notifier(&priv->nfp_tun_mac_nb); + if (err) + goto err_free_mac_ida; + + /* Parse netdevs already registered for MACs that need offloaded. */ + rtnl_lock(); + for_each_netdev(&init_net, netdev) + nfp_tun_add_to_mac_offload_list(netdev, app); + rtnl_unlock(); + + return 0; + +err_free_mac_ida: + ida_destroy(&priv->nfp_mac_off_ids); + return err; +} + +void nfp_tunnel_config_stop(struct nfp_app *app) +{ + struct nfp_tun_mac_offload_entry *mac_entry; + struct nfp_flower_priv *priv = app->priv; + struct nfp_tun_mac_non_nfp_idx *mac_idx; + struct list_head *ptr, *storage; + + unregister_netdevice_notifier(&priv->nfp_tun_mac_nb); + + /* Free any memory that may be occupied by MAC list. */ + mutex_lock(&priv->nfp_mac_off_lock); + list_for_each_safe(ptr, storage, &priv->nfp_mac_off_list) { + mac_entry = list_entry(ptr, struct nfp_tun_mac_offload_entry, + list); + list_del(&mac_entry->list); + kfree(mac_entry); + } + mutex_unlock(&priv->nfp_mac_off_lock); + + /* Free any memory that may be occupied by MAC index list. */ + mutex_lock(&priv->nfp_mac_index_lock); + list_for_each_safe(ptr, storage, &priv->nfp_mac_index_list) { + mac_idx = list_entry(ptr, struct nfp_tun_mac_non_nfp_idx, + list); + list_del(&mac_idx->list); + kfree(mac_idx); + } + mutex_unlock(&priv->nfp_mac_index_lock); + + ida_destroy(&priv->nfp_mac_off_ids); +} -- cgit v1.2.3 From 2d9ad71a8ce67eea9ee38512a215e1893bd5cf87 Mon Sep 17 00:00:00 2001 From: John Hurley Date: Mon, 25 Sep 2017 12:23:39 +0200 Subject: nfp: offload vxlan IPv4 endpoints of flower rules Maintain a list of IPv4 addresses used as the tunnel destination IP match fields in currently active flower rules. Offload the entire list of NFP_FL_IPV4_ADDRS_MAX (even if some are unused) when new IPs are added or removed. The NFP should only be aware of tunnel end points that are currently used by rules on the device Signed-off-by: John Hurley Reviewed-by: Simon Horman Signed-off-by: Simon Horman Signed-off-by: David S. Miller --- drivers/net/ethernet/netronome/nfp/flower/cmsg.h | 1 + drivers/net/ethernet/netronome/nfp/flower/main.h | 7 ++ drivers/net/ethernet/netronome/nfp/flower/match.c | 14 ++- .../net/ethernet/netronome/nfp/flower/offload.c | 4 + .../ethernet/netronome/nfp/flower/tunnel_conf.c | 120 +++++++++++++++++++++ 5 files changed, 143 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/netronome/nfp/flower/cmsg.h b/drivers/net/ethernet/netronome/nfp/flower/cmsg.h index dc248193c996..6540bb1ceefb 100644 --- a/drivers/net/ethernet/netronome/nfp/flower/cmsg.h +++ b/drivers/net/ethernet/netronome/nfp/flower/cmsg.h @@ -318,6 +318,7 @@ enum nfp_flower_cmsg_type_port { NFP_FLOWER_CMSG_TYPE_MAC_REPR = 7, NFP_FLOWER_CMSG_TYPE_PORT_MOD = 8, NFP_FLOWER_CMSG_TYPE_TUN_MAC = 11, + NFP_FLOWER_CMSG_TYPE_TUN_IPS = 14, NFP_FLOWER_CMSG_TYPE_FLOW_STATS = 15, NFP_FLOWER_CMSG_TYPE_PORT_ECHO = 16, NFP_FLOWER_CMSG_TYPE_MAX = 32, diff --git a/drivers/net/ethernet/netronome/nfp/flower/main.h b/drivers/net/ethernet/netronome/nfp/flower/main.h index 9de375acc254..53306af6cfe8 100644 --- a/drivers/net/ethernet/netronome/nfp/flower/main.h +++ b/drivers/net/ethernet/netronome/nfp/flower/main.h @@ -86,8 +86,10 @@ struct nfp_fl_stats_id { * @cmsg_skbs: List of skbs for control message processing * @nfp_mac_off_list: List of MAC addresses to offload * @nfp_mac_index_list: List of unique 8-bit indexes for non NFP netdevs + * @nfp_ipv4_off_list: List of IPv4 addresses to offload * @nfp_mac_off_lock: Lock for the MAC address list * @nfp_mac_index_lock: Lock for the MAC index list + * @nfp_ipv4_off_lock: Lock for the IPv4 address list * @nfp_mac_off_ids: IDA to manage id assignment for offloaded macs * @nfp_mac_off_count: Number of MACs in address list * @nfp_tun_mac_nb: Notifier to monitor link state @@ -105,8 +107,10 @@ struct nfp_flower_priv { struct sk_buff_head cmsg_skbs; struct list_head nfp_mac_off_list; struct list_head nfp_mac_index_list; + struct list_head nfp_ipv4_off_list; struct mutex nfp_mac_off_lock; struct mutex nfp_mac_index_lock; + struct mutex nfp_ipv4_off_lock; struct ida nfp_mac_off_ids; int nfp_mac_off_count; struct notifier_block nfp_tun_mac_nb; @@ -142,6 +146,7 @@ struct nfp_fl_payload { struct rcu_head rcu; spinlock_t lock; /* lock stats */ struct nfp_fl_stats stats; + __be32 nfp_tun_ipv4_addr; char *unmasked_data; char *mask_data; char *action_data; @@ -182,5 +187,7 @@ void nfp_flower_rx_flow_stats(struct nfp_app *app, struct sk_buff *skb); int nfp_tunnel_config_start(struct nfp_app *app); void nfp_tunnel_config_stop(struct nfp_app *app); void nfp_tunnel_write_macs(struct nfp_app *app); +void nfp_tunnel_del_ipv4_off(struct nfp_app *app, __be32 ipv4); +void nfp_tunnel_add_ipv4_off(struct nfp_app *app, __be32 ipv4); #endif diff --git a/drivers/net/ethernet/netronome/nfp/flower/match.c b/drivers/net/ethernet/netronome/nfp/flower/match.c index cb3ff6c126e8..865a815ab92a 100644 --- a/drivers/net/ethernet/netronome/nfp/flower/match.c +++ b/drivers/net/ethernet/netronome/nfp/flower/match.c @@ -195,7 +195,7 @@ nfp_flower_compile_ipv6(struct nfp_flower_ipv6 *frame, static void nfp_flower_compile_vxlan(struct nfp_flower_vxlan *frame, struct tc_cls_flower_offload *flow, - bool mask_version) + bool mask_version, __be32 *tun_dst) { struct fl_flow_key *target = mask_version ? flow->mask : flow->key; struct flow_dissector_key_ipv4_addrs *vxlan_ips; @@ -223,6 +223,7 @@ nfp_flower_compile_vxlan(struct nfp_flower_vxlan *frame, target); frame->ip_src = vxlan_ips->src; frame->ip_dst = vxlan_ips->dst; + *tun_dst = vxlan_ips->dst; } } @@ -232,6 +233,7 @@ int nfp_flower_compile_flow_match(struct tc_cls_flower_offload *flow, struct nfp_fl_payload *nfp_flow) { enum nfp_flower_tun_type tun_type = NFP_FL_TUNNEL_NONE; + __be32 tun_dst, tun_dst_mask = 0; struct nfp_repr *netdev_repr; int err; u8 *ext; @@ -336,10 +338,10 @@ int nfp_flower_compile_flow_match(struct tc_cls_flower_offload *flow, if (key_ls->key_layer & NFP_FLOWER_LAYER_VXLAN) { /* Populate Exact VXLAN Data. */ nfp_flower_compile_vxlan((struct nfp_flower_vxlan *)ext, - flow, false); + flow, false, &tun_dst); /* Populate Mask VXLAN Data. */ nfp_flower_compile_vxlan((struct nfp_flower_vxlan *)msk, - flow, true); + flow, true, &tun_dst_mask); ext += sizeof(struct nfp_flower_vxlan); msk += sizeof(struct nfp_flower_vxlan); @@ -347,6 +349,12 @@ int nfp_flower_compile_flow_match(struct tc_cls_flower_offload *flow, if (nfp_netdev_is_nfp_repr(netdev)) { netdev_repr = netdev_priv(netdev); nfp_tunnel_write_macs(netdev_repr->app); + + /* Store the tunnel destination in the rule data. + * This must be present and be an exact match. + */ + nfp_flow->nfp_tun_ipv4_addr = tun_dst; + nfp_tunnel_add_ipv4_off(netdev_repr->app, tun_dst); } } diff --git a/drivers/net/ethernet/netronome/nfp/flower/offload.c b/drivers/net/ethernet/netronome/nfp/flower/offload.c index 637372ba8f55..3d9537ebdea4 100644 --- a/drivers/net/ethernet/netronome/nfp/flower/offload.c +++ b/drivers/net/ethernet/netronome/nfp/flower/offload.c @@ -306,6 +306,7 @@ nfp_flower_allocate_new(struct nfp_fl_key_ls *key_layer) if (!flow_pay->action_data) goto err_free_mask; + flow_pay->nfp_tun_ipv4_addr = 0; flow_pay->meta.flags = 0; spin_lock_init(&flow_pay->lock); @@ -415,6 +416,9 @@ nfp_flower_del_offload(struct nfp_app *app, struct net_device *netdev, if (err) goto err_free_flow; + if (nfp_flow->nfp_tun_ipv4_addr) + nfp_tunnel_del_ipv4_off(app, nfp_flow->nfp_tun_ipv4_addr); + err = nfp_flower_xmit_flow(netdev, nfp_flow, NFP_FLOWER_CMSG_TYPE_FLOW_DEL); if (err) diff --git a/drivers/net/ethernet/netronome/nfp/flower/tunnel_conf.c b/drivers/net/ethernet/netronome/nfp/flower/tunnel_conf.c index 34be85803020..185505140f5e 100644 --- a/drivers/net/ethernet/netronome/nfp/flower/tunnel_conf.c +++ b/drivers/net/ethernet/netronome/nfp/flower/tunnel_conf.c @@ -32,6 +32,7 @@ */ #include +#include #include #include @@ -40,6 +41,30 @@ #include "../nfp_net_repr.h" #include "../nfp_net.h" +#define NFP_FL_IPV4_ADDRS_MAX 32 + +/** + * struct nfp_tun_ipv4_addr - set the IP address list on the NFP + * @count: number of IPs populated in the array + * @ipv4_addr: array of IPV4_ADDRS_MAX 32 bit IPv4 addresses + */ +struct nfp_tun_ipv4_addr { + __be32 count; + __be32 ipv4_addr[NFP_FL_IPV4_ADDRS_MAX]; +}; + +/** + * struct nfp_ipv4_addr_entry - cached IPv4 addresses + * @ipv4_addr: IP address + * @ref_count: number of rules currently using this IP + * @list: list pointer + */ +struct nfp_ipv4_addr_entry { + __be32 ipv4_addr; + int ref_count; + struct list_head list; +}; + /** * struct nfp_tun_mac_addr - configure MAC address of tunnel EP on NFP * @reserved: reserved for future use @@ -112,6 +137,87 @@ nfp_flower_xmit_tun_conf(struct nfp_app *app, u8 mtype, u16 plen, void *pdata) return 0; } +static void nfp_tun_write_ipv4_list(struct nfp_app *app) +{ + struct nfp_flower_priv *priv = app->priv; + struct nfp_ipv4_addr_entry *entry; + struct nfp_tun_ipv4_addr payload; + struct list_head *ptr, *storage; + int count; + + memset(&payload, 0, sizeof(struct nfp_tun_ipv4_addr)); + mutex_lock(&priv->nfp_ipv4_off_lock); + count = 0; + list_for_each_safe(ptr, storage, &priv->nfp_ipv4_off_list) { + if (count >= NFP_FL_IPV4_ADDRS_MAX) { + mutex_unlock(&priv->nfp_ipv4_off_lock); + nfp_flower_cmsg_warn(app, "IPv4 offload exceeds limit.\n"); + return; + } + entry = list_entry(ptr, struct nfp_ipv4_addr_entry, list); + payload.ipv4_addr[count++] = entry->ipv4_addr; + } + payload.count = cpu_to_be32(count); + mutex_unlock(&priv->nfp_ipv4_off_lock); + + nfp_flower_xmit_tun_conf(app, NFP_FLOWER_CMSG_TYPE_TUN_IPS, + sizeof(struct nfp_tun_ipv4_addr), + &payload); +} + +void nfp_tunnel_add_ipv4_off(struct nfp_app *app, __be32 ipv4) +{ + struct nfp_flower_priv *priv = app->priv; + struct nfp_ipv4_addr_entry *entry; + struct list_head *ptr, *storage; + + mutex_lock(&priv->nfp_ipv4_off_lock); + list_for_each_safe(ptr, storage, &priv->nfp_ipv4_off_list) { + entry = list_entry(ptr, struct nfp_ipv4_addr_entry, list); + if (entry->ipv4_addr == ipv4) { + entry->ref_count++; + mutex_unlock(&priv->nfp_ipv4_off_lock); + return; + } + } + + entry = kmalloc(sizeof(*entry), GFP_KERNEL); + if (!entry) { + mutex_unlock(&priv->nfp_ipv4_off_lock); + nfp_flower_cmsg_warn(app, "Mem error when offloading IP address.\n"); + return; + } + entry->ipv4_addr = ipv4; + entry->ref_count = 1; + list_add_tail(&entry->list, &priv->nfp_ipv4_off_list); + mutex_unlock(&priv->nfp_ipv4_off_lock); + + nfp_tun_write_ipv4_list(app); +} + +void nfp_tunnel_del_ipv4_off(struct nfp_app *app, __be32 ipv4) +{ + struct nfp_flower_priv *priv = app->priv; + struct nfp_ipv4_addr_entry *entry; + struct list_head *ptr, *storage; + + mutex_lock(&priv->nfp_ipv4_off_lock); + list_for_each_safe(ptr, storage, &priv->nfp_ipv4_off_list) { + entry = list_entry(ptr, struct nfp_ipv4_addr_entry, list); + if (entry->ipv4_addr == ipv4) { + entry->ref_count--; + if (!entry->ref_count) { + list_del(&entry->list); + kfree(entry); + } + break; + } + } + mutex_unlock(&priv->nfp_ipv4_off_lock); + + nfp_tun_write_ipv4_list(app); +} + void nfp_tunnel_write_macs(struct nfp_app *app) { struct nfp_flower_priv *priv = app->priv; @@ -324,6 +430,10 @@ int nfp_tunnel_config_start(struct nfp_app *app) INIT_LIST_HEAD(&priv->nfp_mac_index_list); ida_init(&priv->nfp_mac_off_ids); + /* Initialise priv data for IPv4 offloading. */ + mutex_init(&priv->nfp_ipv4_off_lock); + INIT_LIST_HEAD(&priv->nfp_ipv4_off_list); + err = register_netdevice_notifier(&priv->nfp_tun_mac_nb); if (err) goto err_free_mac_ida; @@ -346,6 +456,7 @@ void nfp_tunnel_config_stop(struct nfp_app *app) struct nfp_tun_mac_offload_entry *mac_entry; struct nfp_flower_priv *priv = app->priv; struct nfp_tun_mac_non_nfp_idx *mac_idx; + struct nfp_ipv4_addr_entry *ip_entry; struct list_head *ptr, *storage; unregister_netdevice_notifier(&priv->nfp_tun_mac_nb); @@ -371,4 +482,13 @@ void nfp_tunnel_config_stop(struct nfp_app *app) mutex_unlock(&priv->nfp_mac_index_lock); ida_destroy(&priv->nfp_mac_off_ids); + + /* Free any memory that may be occupied by ipv4 list. */ + mutex_lock(&priv->nfp_ipv4_off_lock); + list_for_each_safe(ptr, storage, &priv->nfp_ipv4_off_list) { + ip_entry = list_entry(ptr, struct nfp_ipv4_addr_entry, list); + list_del(&ip_entry->list); + kfree(ip_entry); + } + mutex_unlock(&priv->nfp_ipv4_off_lock); } -- cgit v1.2.3 From 8e6a9046b66a7dfb11ae8be226afaaf417649411 Mon Sep 17 00:00:00 2001 From: John Hurley Date: Mon, 25 Sep 2017 12:23:40 +0200 Subject: nfp: flower vxlan neighbour offload Receive a request when the NFP does not know the next hop for a packet that is to be encapsulated in a VXLAN tunnel. Do a route lookup, determine the next hop entry and update neighbour table on NFP. Monitor the kernel neighbour table for link changes and update NFP with relevant information. Overwrite routes with zero values on the NFP when they expire. Signed-off-by: John Hurley Reviewed-by: Simon Horman Signed-off-by: Simon Horman Signed-off-by: David S. Miller --- drivers/net/ethernet/netronome/nfp/flower/cmsg.c | 6 + drivers/net/ethernet/netronome/nfp/flower/cmsg.h | 2 + drivers/net/ethernet/netronome/nfp/flower/main.h | 7 + .../ethernet/netronome/nfp/flower/tunnel_conf.c | 253 +++++++++++++++++++++ 4 files changed, 268 insertions(+) diff --git a/drivers/net/ethernet/netronome/nfp/flower/cmsg.c b/drivers/net/ethernet/netronome/nfp/flower/cmsg.c index b756006dba6f..862787daaa68 100644 --- a/drivers/net/ethernet/netronome/nfp/flower/cmsg.c +++ b/drivers/net/ethernet/netronome/nfp/flower/cmsg.c @@ -181,6 +181,12 @@ nfp_flower_cmsg_process_one_rx(struct nfp_app *app, struct sk_buff *skb) case NFP_FLOWER_CMSG_TYPE_FLOW_STATS: nfp_flower_rx_flow_stats(app, skb); break; + case NFP_FLOWER_CMSG_TYPE_NO_NEIGH: + nfp_tunnel_request_route(app, skb); + break; + case NFP_FLOWER_CMSG_TYPE_TUN_NEIGH: + /* Acks from the NFP that the route is added - ignore. */ + break; default: nfp_flower_cmsg_warn(app, "Cannot handle invalid repr control type %u\n", type); diff --git a/drivers/net/ethernet/netronome/nfp/flower/cmsg.h b/drivers/net/ethernet/netronome/nfp/flower/cmsg.h index 6540bb1ceefb..1dc72a1ed577 100644 --- a/drivers/net/ethernet/netronome/nfp/flower/cmsg.h +++ b/drivers/net/ethernet/netronome/nfp/flower/cmsg.h @@ -317,7 +317,9 @@ enum nfp_flower_cmsg_type_port { NFP_FLOWER_CMSG_TYPE_FLOW_DEL = 2, NFP_FLOWER_CMSG_TYPE_MAC_REPR = 7, NFP_FLOWER_CMSG_TYPE_PORT_MOD = 8, + NFP_FLOWER_CMSG_TYPE_NO_NEIGH = 10, NFP_FLOWER_CMSG_TYPE_TUN_MAC = 11, + NFP_FLOWER_CMSG_TYPE_TUN_NEIGH = 13, NFP_FLOWER_CMSG_TYPE_TUN_IPS = 14, NFP_FLOWER_CMSG_TYPE_FLOW_STATS = 15, NFP_FLOWER_CMSG_TYPE_PORT_ECHO = 16, diff --git a/drivers/net/ethernet/netronome/nfp/flower/main.h b/drivers/net/ethernet/netronome/nfp/flower/main.h index 53306af6cfe8..93ad969c3653 100644 --- a/drivers/net/ethernet/netronome/nfp/flower/main.h +++ b/drivers/net/ethernet/netronome/nfp/flower/main.h @@ -87,12 +87,15 @@ struct nfp_fl_stats_id { * @nfp_mac_off_list: List of MAC addresses to offload * @nfp_mac_index_list: List of unique 8-bit indexes for non NFP netdevs * @nfp_ipv4_off_list: List of IPv4 addresses to offload + * @nfp_neigh_off_list: List of neighbour offloads * @nfp_mac_off_lock: Lock for the MAC address list * @nfp_mac_index_lock: Lock for the MAC index list * @nfp_ipv4_off_lock: Lock for the IPv4 address list + * @nfp_neigh_off_lock: Lock for the neighbour address list * @nfp_mac_off_ids: IDA to manage id assignment for offloaded macs * @nfp_mac_off_count: Number of MACs in address list * @nfp_tun_mac_nb: Notifier to monitor link state + * @nfp_tun_neigh_nb: Notifier to monitor neighbour state */ struct nfp_flower_priv { struct nfp_app *app; @@ -108,12 +111,15 @@ struct nfp_flower_priv { struct list_head nfp_mac_off_list; struct list_head nfp_mac_index_list; struct list_head nfp_ipv4_off_list; + struct list_head nfp_neigh_off_list; struct mutex nfp_mac_off_lock; struct mutex nfp_mac_index_lock; struct mutex nfp_ipv4_off_lock; + struct mutex nfp_neigh_off_lock; struct ida nfp_mac_off_ids; int nfp_mac_off_count; struct notifier_block nfp_tun_mac_nb; + struct notifier_block nfp_tun_neigh_nb; }; struct nfp_fl_key_ls { @@ -189,5 +195,6 @@ void nfp_tunnel_config_stop(struct nfp_app *app); void nfp_tunnel_write_macs(struct nfp_app *app); void nfp_tunnel_del_ipv4_off(struct nfp_app *app, __be32 ipv4); void nfp_tunnel_add_ipv4_off(struct nfp_app *app, __be32 ipv4); +void nfp_tunnel_request_route(struct nfp_app *app, struct sk_buff *skb); #endif diff --git a/drivers/net/ethernet/netronome/nfp/flower/tunnel_conf.c b/drivers/net/ethernet/netronome/nfp/flower/tunnel_conf.c index 185505140f5e..8c6b88a1306b 100644 --- a/drivers/net/ethernet/netronome/nfp/flower/tunnel_conf.c +++ b/drivers/net/ethernet/netronome/nfp/flower/tunnel_conf.c @@ -33,6 +33,7 @@ #include #include +#include #include #include @@ -41,6 +42,44 @@ #include "../nfp_net_repr.h" #include "../nfp_net.h" +/** + * struct nfp_tun_neigh - neighbour/route entry on the NFP + * @dst_ipv4: destination IPv4 address + * @src_ipv4: source IPv4 address + * @dst_addr: destination MAC address + * @src_addr: source MAC address + * @port_id: NFP port to output packet on - associated with source IPv4 + */ +struct nfp_tun_neigh { + __be32 dst_ipv4; + __be32 src_ipv4; + u8 dst_addr[ETH_ALEN]; + u8 src_addr[ETH_ALEN]; + __be32 port_id; +}; + +/** + * struct nfp_tun_req_route_ipv4 - NFP requests a route/neighbour lookup + * @ingress_port: ingress port of packet that signalled request + * @ipv4_addr: destination ipv4 address for route + * @reserved: reserved for future use + */ +struct nfp_tun_req_route_ipv4 { + __be32 ingress_port; + __be32 ipv4_addr; + __be32 reserved[2]; +}; + +/** + * struct nfp_ipv4_route_entry - routes that are offloaded to the NFP + * @ipv4_addr: destination of route + * @list: list pointer + */ +struct nfp_ipv4_route_entry { + __be32 ipv4_addr; + struct list_head list; +}; + #define NFP_FL_IPV4_ADDRS_MAX 32 /** @@ -137,6 +176,197 @@ nfp_flower_xmit_tun_conf(struct nfp_app *app, u8 mtype, u16 plen, void *pdata) return 0; } +static bool nfp_tun_has_route(struct nfp_app *app, __be32 ipv4_addr) +{ + struct nfp_flower_priv *priv = app->priv; + struct nfp_ipv4_route_entry *entry; + struct list_head *ptr, *storage; + + mutex_lock(&priv->nfp_neigh_off_lock); + list_for_each_safe(ptr, storage, &priv->nfp_neigh_off_list) { + entry = list_entry(ptr, struct nfp_ipv4_route_entry, list); + if (entry->ipv4_addr == ipv4_addr) { + mutex_unlock(&priv->nfp_neigh_off_lock); + return true; + } + } + mutex_unlock(&priv->nfp_neigh_off_lock); + return false; +} + +static void nfp_tun_add_route_to_cache(struct nfp_app *app, __be32 ipv4_addr) +{ + struct nfp_flower_priv *priv = app->priv; + struct nfp_ipv4_route_entry *entry; + struct list_head *ptr, *storage; + + mutex_lock(&priv->nfp_neigh_off_lock); + list_for_each_safe(ptr, storage, &priv->nfp_neigh_off_list) { + entry = list_entry(ptr, struct nfp_ipv4_route_entry, list); + if (entry->ipv4_addr == ipv4_addr) { + mutex_unlock(&priv->nfp_neigh_off_lock); + return; + } + } + entry = kmalloc(sizeof(*entry), GFP_KERNEL); + if (!entry) { + mutex_unlock(&priv->nfp_neigh_off_lock); + nfp_flower_cmsg_warn(app, "Mem error when storing new route.\n"); + return; + } + + entry->ipv4_addr = ipv4_addr; + list_add_tail(&entry->list, &priv->nfp_neigh_off_list); + mutex_unlock(&priv->nfp_neigh_off_lock); +} + +static void nfp_tun_del_route_from_cache(struct nfp_app *app, __be32 ipv4_addr) +{ + struct nfp_flower_priv *priv = app->priv; + struct nfp_ipv4_route_entry *entry; + struct list_head *ptr, *storage; + + mutex_lock(&priv->nfp_neigh_off_lock); + list_for_each_safe(ptr, storage, &priv->nfp_neigh_off_list) { + entry = list_entry(ptr, struct nfp_ipv4_route_entry, list); + if (entry->ipv4_addr == ipv4_addr) { + list_del(&entry->list); + kfree(entry); + break; + } + } + mutex_unlock(&priv->nfp_neigh_off_lock); +} + +static void +nfp_tun_write_neigh(struct net_device *netdev, struct nfp_app *app, + struct flowi4 *flow, struct neighbour *neigh) +{ + struct nfp_tun_neigh payload; + + /* Only offload representor IPv4s for now. */ + if (!nfp_netdev_is_nfp_repr(netdev)) + return; + + memset(&payload, 0, sizeof(struct nfp_tun_neigh)); + payload.dst_ipv4 = flow->daddr; + + /* If entry has expired send dst IP with all other fields 0. */ + if (!(neigh->nud_state & NUD_VALID)) { + nfp_tun_del_route_from_cache(app, payload.dst_ipv4); + /* Trigger ARP to verify invalid neighbour state. */ + neigh_event_send(neigh, NULL); + goto send_msg; + } + + /* Have a valid neighbour so populate rest of entry. */ + payload.src_ipv4 = flow->saddr; + ether_addr_copy(payload.src_addr, netdev->dev_addr); + neigh_ha_snapshot(payload.dst_addr, neigh, netdev); + payload.port_id = cpu_to_be32(nfp_repr_get_port_id(netdev)); + /* Add destination of new route to NFP cache. */ + nfp_tun_add_route_to_cache(app, payload.dst_ipv4); + +send_msg: + nfp_flower_xmit_tun_conf(app, NFP_FLOWER_CMSG_TYPE_TUN_NEIGH, + sizeof(struct nfp_tun_neigh), + (unsigned char *)&payload); +} + +static int +nfp_tun_neigh_event_handler(struct notifier_block *nb, unsigned long event, + void *ptr) +{ + struct nfp_flower_priv *app_priv; + struct netevent_redirect *redir; + struct flowi4 flow = {}; + struct neighbour *n; + struct nfp_app *app; + struct rtable *rt; + int err; + + switch (event) { + case NETEVENT_REDIRECT: + redir = (struct netevent_redirect *)ptr; + n = redir->neigh; + break; + case NETEVENT_NEIGH_UPDATE: + n = (struct neighbour *)ptr; + break; + default: + return NOTIFY_DONE; + } + + flow.daddr = *(__be32 *)n->primary_key; + + /* Only concerned with route changes for representors. */ + if (!nfp_netdev_is_nfp_repr(n->dev)) + return NOTIFY_DONE; + + app_priv = container_of(nb, struct nfp_flower_priv, nfp_tun_neigh_nb); + app = app_priv->app; + + /* Only concerned with changes to routes already added to NFP. */ + if (!nfp_tun_has_route(app, flow.daddr)) + return NOTIFY_DONE; + +#if IS_ENABLED(CONFIG_INET) + /* Do a route lookup to populate flow data. */ + rt = ip_route_output_key(dev_net(n->dev), &flow); + err = PTR_ERR_OR_ZERO(rt); + if (err) + return NOTIFY_DONE; +#else + return NOTIFY_DONE; +#endif + + flow.flowi4_proto = IPPROTO_UDP; + nfp_tun_write_neigh(n->dev, app, &flow, n); + + return NOTIFY_OK; +} + +void nfp_tunnel_request_route(struct nfp_app *app, struct sk_buff *skb) +{ + struct nfp_tun_req_route_ipv4 *payload; + struct net_device *netdev; + struct flowi4 flow = {}; + struct neighbour *n; + struct rtable *rt; + int err; + + payload = nfp_flower_cmsg_get_data(skb); + + netdev = nfp_app_repr_get(app, be32_to_cpu(payload->ingress_port)); + if (!netdev) + goto route_fail_warning; + + flow.daddr = payload->ipv4_addr; + flow.flowi4_proto = IPPROTO_UDP; + +#if IS_ENABLED(CONFIG_INET) + /* Do a route lookup on same namespace as ingress port. */ + rt = ip_route_output_key(dev_net(netdev), &flow); + err = PTR_ERR_OR_ZERO(rt); + if (err) + goto route_fail_warning; +#else + goto route_fail_warning; +#endif + + /* Get the neighbour entry for the lookup */ + n = dst_neigh_lookup(&rt->dst, &flow.daddr); + ip_rt_put(rt); + if (!n) + goto route_fail_warning; + nfp_tun_write_neigh(n->dev, app, &flow, n); + neigh_release(n); + return; + +route_fail_warning: + nfp_flower_cmsg_warn(app, "Requested route not found.\n"); +} + static void nfp_tun_write_ipv4_list(struct nfp_app *app) { struct nfp_flower_priv *priv = app->priv; @@ -434,10 +664,19 @@ int nfp_tunnel_config_start(struct nfp_app *app) mutex_init(&priv->nfp_ipv4_off_lock); INIT_LIST_HEAD(&priv->nfp_ipv4_off_list); + /* Initialise priv data for neighbour offloading. */ + mutex_init(&priv->nfp_neigh_off_lock); + INIT_LIST_HEAD(&priv->nfp_neigh_off_list); + priv->nfp_tun_neigh_nb.notifier_call = nfp_tun_neigh_event_handler; + err = register_netdevice_notifier(&priv->nfp_tun_mac_nb); if (err) goto err_free_mac_ida; + err = register_netevent_notifier(&priv->nfp_tun_neigh_nb); + if (err) + goto err_unreg_mac_nb; + /* Parse netdevs already registered for MACs that need offloaded. */ rtnl_lock(); for_each_netdev(&init_net, netdev) @@ -446,6 +685,8 @@ int nfp_tunnel_config_start(struct nfp_app *app) return 0; +err_unreg_mac_nb: + unregister_netdevice_notifier(&priv->nfp_tun_mac_nb); err_free_mac_ida: ida_destroy(&priv->nfp_mac_off_ids); return err; @@ -455,11 +696,13 @@ void nfp_tunnel_config_stop(struct nfp_app *app) { struct nfp_tun_mac_offload_entry *mac_entry; struct nfp_flower_priv *priv = app->priv; + struct nfp_ipv4_route_entry *route_entry; struct nfp_tun_mac_non_nfp_idx *mac_idx; struct nfp_ipv4_addr_entry *ip_entry; struct list_head *ptr, *storage; unregister_netdevice_notifier(&priv->nfp_tun_mac_nb); + unregister_netevent_notifier(&priv->nfp_tun_neigh_nb); /* Free any memory that may be occupied by MAC list. */ mutex_lock(&priv->nfp_mac_off_lock); @@ -491,4 +734,14 @@ void nfp_tunnel_config_stop(struct nfp_app *app) kfree(ip_entry); } mutex_unlock(&priv->nfp_ipv4_off_lock); + + /* Free any memory that may be occupied by the route list. */ + mutex_lock(&priv->nfp_neigh_off_lock); + list_for_each_safe(ptr, storage, &priv->nfp_neigh_off_list) { + route_entry = list_entry(ptr, struct nfp_ipv4_route_entry, + list); + list_del(&route_entry->list); + kfree(route_entry); + } + mutex_unlock(&priv->nfp_neigh_off_lock); } -- cgit v1.2.3 From 856f5b135758ad80053a49f7ce9d1dc0166e3006 Mon Sep 17 00:00:00 2001 From: John Hurley Date: Mon, 25 Sep 2017 12:23:41 +0200 Subject: nfp: flower vxlan neighbour keep-alive Periodically receive messages containing the destination IPs of tunnels that have recently forwarded traffic. Update the neighbour entries 'used' value for these IPs next hop. This prevents the neighbour entry from expiring on timeout but rather signals an ARP to verify the connection. From an NFP perspective, packets will not fall back mid-flow unless the link is verified to be down. Signed-off-by: John Hurley Reviewed-by: Simon Horman Signed-off-by: Simon Horman Signed-off-by: David S. Miller --- drivers/net/ethernet/netronome/nfp/flower/cmsg.c | 3 + drivers/net/ethernet/netronome/nfp/flower/cmsg.h | 1 + drivers/net/ethernet/netronome/nfp/flower/main.h | 1 + .../ethernet/netronome/nfp/flower/tunnel_conf.c | 64 ++++++++++++++++++++++ 4 files changed, 69 insertions(+) diff --git a/drivers/net/ethernet/netronome/nfp/flower/cmsg.c b/drivers/net/ethernet/netronome/nfp/flower/cmsg.c index 862787daaa68..6b71c719deba 100644 --- a/drivers/net/ethernet/netronome/nfp/flower/cmsg.c +++ b/drivers/net/ethernet/netronome/nfp/flower/cmsg.c @@ -184,6 +184,9 @@ nfp_flower_cmsg_process_one_rx(struct nfp_app *app, struct sk_buff *skb) case NFP_FLOWER_CMSG_TYPE_NO_NEIGH: nfp_tunnel_request_route(app, skb); break; + case NFP_FLOWER_CMSG_TYPE_ACTIVE_TUNS: + nfp_tunnel_keep_alive(app, skb); + break; case NFP_FLOWER_CMSG_TYPE_TUN_NEIGH: /* Acks from the NFP that the route is added - ignore. */ break; diff --git a/drivers/net/ethernet/netronome/nfp/flower/cmsg.h b/drivers/net/ethernet/netronome/nfp/flower/cmsg.h index 1dc72a1ed577..504ddaa21701 100644 --- a/drivers/net/ethernet/netronome/nfp/flower/cmsg.h +++ b/drivers/net/ethernet/netronome/nfp/flower/cmsg.h @@ -319,6 +319,7 @@ enum nfp_flower_cmsg_type_port { NFP_FLOWER_CMSG_TYPE_PORT_MOD = 8, NFP_FLOWER_CMSG_TYPE_NO_NEIGH = 10, NFP_FLOWER_CMSG_TYPE_TUN_MAC = 11, + NFP_FLOWER_CMSG_TYPE_ACTIVE_TUNS = 12, NFP_FLOWER_CMSG_TYPE_TUN_NEIGH = 13, NFP_FLOWER_CMSG_TYPE_TUN_IPS = 14, NFP_FLOWER_CMSG_TYPE_FLOW_STATS = 15, diff --git a/drivers/net/ethernet/netronome/nfp/flower/main.h b/drivers/net/ethernet/netronome/nfp/flower/main.h index 93ad969c3653..12c319a219d8 100644 --- a/drivers/net/ethernet/netronome/nfp/flower/main.h +++ b/drivers/net/ethernet/netronome/nfp/flower/main.h @@ -196,5 +196,6 @@ void nfp_tunnel_write_macs(struct nfp_app *app); void nfp_tunnel_del_ipv4_off(struct nfp_app *app, __be32 ipv4); void nfp_tunnel_add_ipv4_off(struct nfp_app *app, __be32 ipv4); void nfp_tunnel_request_route(struct nfp_app *app, struct sk_buff *skb); +void nfp_tunnel_keep_alive(struct nfp_app *app, struct sk_buff *skb); #endif diff --git a/drivers/net/ethernet/netronome/nfp/flower/tunnel_conf.c b/drivers/net/ethernet/netronome/nfp/flower/tunnel_conf.c index 8c6b88a1306b..c495f8f38506 100644 --- a/drivers/net/ethernet/netronome/nfp/flower/tunnel_conf.c +++ b/drivers/net/ethernet/netronome/nfp/flower/tunnel_conf.c @@ -36,12 +36,36 @@ #include #include #include +#include #include "cmsg.h" #include "main.h" #include "../nfp_net_repr.h" #include "../nfp_net.h" +#define NFP_FL_MAX_ROUTES 32 + +/** + * struct nfp_tun_active_tuns - periodic message of active tunnels + * @seq: sequence number of the message + * @count: number of tunnels report in message + * @flags: options part of the request + * @ipv4: dest IPv4 address of active route + * @egress_port: port the encapsulated packet egressed + * @extra: reserved for future use + * @tun_info: tunnels that have sent traffic in reported period + */ +struct nfp_tun_active_tuns { + __be32 seq; + __be32 count; + __be32 flags; + struct route_ip_info { + __be32 ipv4; + __be32 egress_port; + __be32 extra[2]; + } tun_info[]; +}; + /** * struct nfp_tun_neigh - neighbour/route entry on the NFP * @dst_ipv4: destination IPv4 address @@ -147,6 +171,46 @@ struct nfp_tun_mac_non_nfp_idx { struct list_head list; }; +void nfp_tunnel_keep_alive(struct nfp_app *app, struct sk_buff *skb) +{ + struct nfp_tun_active_tuns *payload; + struct net_device *netdev; + int count, i, pay_len; + struct neighbour *n; + __be32 ipv4_addr; + u32 port; + + payload = nfp_flower_cmsg_get_data(skb); + count = be32_to_cpu(payload->count); + if (count > NFP_FL_MAX_ROUTES) { + nfp_flower_cmsg_warn(app, "Tunnel keep-alive request exceeds max routes.\n"); + return; + } + + pay_len = nfp_flower_cmsg_get_data_len(skb); + if (pay_len != sizeof(struct nfp_tun_active_tuns) + + sizeof(struct route_ip_info) * count) { + nfp_flower_cmsg_warn(app, "Corruption in tunnel keep-alive message.\n"); + return; + } + + for (i = 0; i < count; i++) { + ipv4_addr = payload->tun_info[i].ipv4; + port = be32_to_cpu(payload->tun_info[i].egress_port); + netdev = nfp_app_repr_get(app, port); + if (!netdev) + continue; + + n = neigh_lookup(&arp_tbl, &ipv4_addr, netdev); + if (!n) + continue; + + /* Update the used timestamp of neighbour */ + neigh_event_send(n, NULL); + neigh_release(n); + } +} + static bool nfp_tun_is_netdev_to_offload(struct net_device *netdev) { if (!netdev->rtnl_link_ops) -- cgit v1.2.3 From 85e482285bbbd508483cbe08de69c8fe00cdbbfe Mon Sep 17 00:00:00 2001 From: Yotam Gigi Date: Wed, 27 Sep 2017 08:23:11 +0200 Subject: fib: notifier: Add VIF add and delete event types In order for an interface to forward packets according to the kernel multicast routing table, it must be configured with a VIF index according to the mroute user API. The VIF index is then used to refer to that interface in the mroute user API, for example, to set the iif and oifs of an MFC entry. In order to allow drivers to be aware and offload multicast routes, they have to be aware of the VIF add and delete notifications. Due to the fact that a specific VIF can be deleted and re-added pointing to another netdevice, and the MFC routes that point to it will forward the matching packets to the new netdevice, a driver willing to offload MFC cache entries must be aware of the VIF add and delete events in addition to MFC routes notifications. Signed-off-by: Yotam Gigi Reviewed-by: Ido Schimmel Signed-off-by: Jiri Pirko Reviewed-by: Nikolay Aleksandrov Signed-off-by: David S. Miller --- include/net/fib_notifier.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/include/net/fib_notifier.h b/include/net/fib_notifier.h index 669b9716dc7a..54cd6b839d2f 100644 --- a/include/net/fib_notifier.h +++ b/include/net/fib_notifier.h @@ -20,6 +20,8 @@ enum fib_event_type { FIB_EVENT_RULE_DEL, FIB_EVENT_NH_ADD, FIB_EVENT_NH_DEL, + FIB_EVENT_VIF_ADD, + FIB_EVENT_VIF_DEL, }; struct fib_notifier_ops { -- cgit v1.2.3 From 310ebbba3b7396b00bce08a33f1d2de2c74fa257 Mon Sep 17 00:00:00 2001 From: Yotam Gigi Date: Wed, 27 Sep 2017 08:23:12 +0200 Subject: ipmr: Add reference count to MFC entries Next commits will introduce MFC notifications through the atomic fib_notification chain, thus allowing modules to be aware of MFC entries. Due to the fact that modules may need to hold a reference to an MFC entry, add reference count to MFC entries to prevent them from being freed while these modules use them. The reference counting is done only on resolved MFC entries currently. Signed-off-by: Yotam Gigi Reviewed-by: Ido Schimmel Signed-off-by: Jiri Pirko Reviewed-by: Nikolay Aleksandrov Signed-off-by: David S. Miller --- include/linux/mroute.h | 21 +++++++++++++++++++++ net/ipv4/ipmr.c | 8 +++++--- 2 files changed, 26 insertions(+), 3 deletions(-) diff --git a/include/linux/mroute.h b/include/linux/mroute.h index d7f63339ef0b..10028f208efb 100644 --- a/include/linux/mroute.h +++ b/include/linux/mroute.h @@ -109,6 +109,7 @@ struct mfc_cache_cmp_arg { * @wrong_if: number of wrong source interface hits * @lastuse: time of last use of the group (traffic or update) * @ttls: OIF TTL threshold array + * @refcount: reference count for this entry * @list: global entry list * @rcu: used for entry destruction */ @@ -138,6 +139,7 @@ struct mfc_cache { unsigned long wrong_if; unsigned long lastuse; unsigned char ttls[MAXVIFS]; + refcount_t refcount; } res; } mfc_un; struct list_head list; @@ -148,4 +150,23 @@ struct rtmsg; int ipmr_get_route(struct net *net, struct sk_buff *skb, __be32 saddr, __be32 daddr, struct rtmsg *rtm, u32 portid); + +#ifdef CONFIG_IP_MROUTE +void ipmr_cache_free(struct mfc_cache *mfc_cache); +#else +static inline void ipmr_cache_free(struct mfc_cache *mfc_cache) +{ +} +#endif + +static inline void ipmr_cache_put(struct mfc_cache *c) +{ + if (refcount_dec_and_test(&c->mfc_un.res.refcount)) + ipmr_cache_free(c); +} +static inline void ipmr_cache_hold(struct mfc_cache *c) +{ + refcount_inc(&c->mfc_un.res.refcount); +} + #endif diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index c9b3e6e069ae..86dc5f98c5dd 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -652,10 +652,11 @@ static void ipmr_cache_free_rcu(struct rcu_head *head) kmem_cache_free(mrt_cachep, c); } -static inline void ipmr_cache_free(struct mfc_cache *c) +void ipmr_cache_free(struct mfc_cache *c) { call_rcu(&c->rcu, ipmr_cache_free_rcu); } +EXPORT_SYMBOL(ipmr_cache_free); /* Destroy an unresolved cache entry, killing queued skbs * and reporting error to netlink readers. @@ -949,6 +950,7 @@ static struct mfc_cache *ipmr_cache_alloc(void) if (c) { c->mfc_un.res.last_assert = jiffies - MFC_ASSERT_THRESH - 1; c->mfc_un.res.minvif = MAXVIFS; + refcount_set(&c->mfc_un.res.refcount, 1); } return c; } @@ -1162,7 +1164,7 @@ static int ipmr_mfc_delete(struct mr_table *mrt, struct mfcctl *mfc, int parent) rhltable_remove(&mrt->mfc_hash, &c->mnode, ipmr_rht_params); list_del_rcu(&c->list); mroute_netlink_event(mrt, c, RTM_DELROUTE); - ipmr_cache_free(c); + ipmr_cache_put(c); return 0; } @@ -1264,7 +1266,7 @@ static void mroute_clean_tables(struct mr_table *mrt, bool all) rhltable_remove(&mrt->mfc_hash, &c->mnode, ipmr_rht_params); list_del_rcu(&c->list); mroute_netlink_event(mrt, c, RTM_DELROUTE); - ipmr_cache_free(c); + ipmr_cache_put(c); } if (atomic_read(&mrt->cache_resolve_queue_len) != 0) { -- cgit v1.2.3 From 4d65b9487831170e699b2fc64a91b839d729bd78 Mon Sep 17 00:00:00 2001 From: Yotam Gigi Date: Wed, 27 Sep 2017 08:23:13 +0200 Subject: ipmr: Add FIB notification access functions Make the ipmr module register as a FIB notifier. To do that, implement both the ipmr_seq_read and ipmr_dump ops. The ipmr_seq_read op returns a sequence counter that is incremented on every notification related operation done by the ipmr. To implement that, add a sequence counter in the netns_ipv4 struct and increment it whenever a new MFC route or VIF are added or deleted. The sequence operations are protected by the RTNL lock. The ipmr_dump iterates the list of MFC routes and the list of VIF entries and sends notifications about them. The entries dump is done under RCU where the VIF dump uses the mrt_lock too, as the vif->dev field can change under RCU. Signed-off-by: Yotam Gigi Reviewed-by: Ido Schimmel Signed-off-by: Jiri Pirko Reviewed-by: Nikolay Aleksandrov Signed-off-by: David S. Miller --- include/linux/mroute.h | 15 ++++++ include/net/netns/ipv4.h | 3 ++ net/ipv4/ipmr.c | 137 ++++++++++++++++++++++++++++++++++++++++++++++- 3 files changed, 153 insertions(+), 2 deletions(-) diff --git a/include/linux/mroute.h b/include/linux/mroute.h index 10028f208efb..54c5cb82ddcb 100644 --- a/include/linux/mroute.h +++ b/include/linux/mroute.h @@ -5,6 +5,7 @@ #include #include #include +#include #include #ifdef CONFIG_IP_MROUTE @@ -58,6 +59,14 @@ struct vif_device { int link; /* Physical interface index */ }; +struct vif_entry_notifier_info { + struct fib_notifier_info info; + struct net_device *dev; + vifi_t vif_index; + unsigned short vif_flags; + u32 tb_id; +}; + #define VIFF_STATIC 0x8000 #define VIF_EXISTS(_mrt, _idx) ((_mrt)->vif_table[_idx].dev != NULL) @@ -146,6 +155,12 @@ struct mfc_cache { struct rcu_head rcu; }; +struct mfc_entry_notifier_info { + struct fib_notifier_info info; + struct mfc_cache *mfc; + u32 tb_id; +}; + struct rtmsg; int ipmr_get_route(struct net *net, struct sk_buff *skb, __be32 saddr, __be32 daddr, diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h index 8387f099115e..abc84d986da4 100644 --- a/include/net/netns/ipv4.h +++ b/include/net/netns/ipv4.h @@ -163,6 +163,9 @@ struct netns_ipv4 { struct fib_notifier_ops *notifier_ops; unsigned int fib_seq; /* protected by rtnl_mutex */ + struct fib_notifier_ops *ipmr_notifier_ops; + unsigned int ipmr_seq; /* protected by rtnl_mutex */ + atomic_t rt_genid; }; #endif diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index 86dc5f98c5dd..49879c338357 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -264,6 +264,16 @@ static void __net_exit ipmr_rules_exit(struct net *net) fib_rules_unregister(net->ipv4.mr_rules_ops); rtnl_unlock(); } + +static int ipmr_rules_dump(struct net *net, struct notifier_block *nb) +{ + return fib_rules_dump(net, nb, RTNL_FAMILY_IPMR); +} + +static unsigned int ipmr_rules_seq_read(struct net *net) +{ + return fib_rules_seq_read(net, RTNL_FAMILY_IPMR); +} #else #define ipmr_for_each_table(mrt, net) \ for (mrt = net->ipv4.mrt; mrt; mrt = NULL) @@ -298,6 +308,16 @@ static void __net_exit ipmr_rules_exit(struct net *net) net->ipv4.mrt = NULL; rtnl_unlock(); } + +static int ipmr_rules_dump(struct net *net, struct notifier_block *nb) +{ + return 0; +} + +static unsigned int ipmr_rules_seq_read(struct net *net) +{ + return 0; +} #endif static inline int ipmr_hash_cmp(struct rhashtable_compare_arg *arg, @@ -587,6 +607,43 @@ static struct net_device *ipmr_reg_vif(struct net *net, struct mr_table *mrt) } #endif +static int call_ipmr_vif_entry_notifier(struct notifier_block *nb, + struct net *net, + enum fib_event_type event_type, + struct vif_device *vif, + vifi_t vif_index, u32 tb_id) +{ + struct vif_entry_notifier_info info = { + .info = { + .family = RTNL_FAMILY_IPMR, + .net = net, + }, + .dev = vif->dev, + .vif_index = vif_index, + .vif_flags = vif->flags, + .tb_id = tb_id, + }; + + return call_fib_notifier(nb, net, event_type, &info.info); +} + +static int call_ipmr_mfc_entry_notifier(struct notifier_block *nb, + struct net *net, + enum fib_event_type event_type, + struct mfc_cache *mfc, u32 tb_id) +{ + struct mfc_entry_notifier_info info = { + .info = { + .family = RTNL_FAMILY_IPMR, + .net = net, + }, + .mfc = mfc, + .tb_id = tb_id + }; + + return call_fib_notifier(nb, net, event_type, &info.info); +} + /** * vif_delete - Delete a VIF entry * @notify: Set to 1, if the caller is a notifier_call @@ -3050,14 +3107,87 @@ static const struct net_protocol pim_protocol = { }; #endif +static unsigned int ipmr_seq_read(struct net *net) +{ + ASSERT_RTNL(); + + return net->ipv4.ipmr_seq + ipmr_rules_seq_read(net); +} + +static int ipmr_dump(struct net *net, struct notifier_block *nb) +{ + struct mr_table *mrt; + int err; + + err = ipmr_rules_dump(net, nb); + if (err) + return err; + + ipmr_for_each_table(mrt, net) { + struct vif_device *v = &mrt->vif_table[0]; + struct mfc_cache *mfc; + int vifi; + + /* Notifiy on table VIF entries */ + read_lock(&mrt_lock); + for (vifi = 0; vifi < mrt->maxvif; vifi++, v++) { + if (!v->dev) + continue; + + call_ipmr_vif_entry_notifier(nb, net, FIB_EVENT_VIF_ADD, + v, vifi, mrt->id); + } + read_unlock(&mrt_lock); + + /* Notify on table MFC entries */ + list_for_each_entry_rcu(mfc, &mrt->mfc_cache_list, list) + call_ipmr_mfc_entry_notifier(nb, net, + FIB_EVENT_ENTRY_ADD, mfc, + mrt->id); + } + + return 0; +} + +static const struct fib_notifier_ops ipmr_notifier_ops_template = { + .family = RTNL_FAMILY_IPMR, + .fib_seq_read = ipmr_seq_read, + .fib_dump = ipmr_dump, + .owner = THIS_MODULE, +}; + +int __net_init ipmr_notifier_init(struct net *net) +{ + struct fib_notifier_ops *ops; + + net->ipv4.ipmr_seq = 0; + + ops = fib_notifier_ops_register(&ipmr_notifier_ops_template, net); + if (IS_ERR(ops)) + return PTR_ERR(ops); + net->ipv4.ipmr_notifier_ops = ops; + + return 0; +} + +static void __net_exit ipmr_notifier_exit(struct net *net) +{ + fib_notifier_ops_unregister(net->ipv4.ipmr_notifier_ops); + net->ipv4.ipmr_notifier_ops = NULL; +} + /* Setup for IP multicast routing */ static int __net_init ipmr_net_init(struct net *net) { int err; + err = ipmr_notifier_init(net); + if (err) + goto ipmr_notifier_fail; + err = ipmr_rules_init(net); if (err < 0) - goto fail; + goto ipmr_rules_fail; #ifdef CONFIG_PROC_FS err = -ENOMEM; @@ -3074,7 +3204,9 @@ proc_cache_fail: proc_vif_fail: ipmr_rules_exit(net); #endif -fail: +ipmr_rules_fail: + ipmr_notifier_exit(net); +ipmr_notifier_fail: return err; } @@ -3084,6 +3216,7 @@ static void __net_exit ipmr_net_exit(struct net *net) remove_proc_entry("ip_mr_cache", net->proc_net); remove_proc_entry("ip_mr_vif", net->proc_net); #endif + ipmr_notifier_exit(net); ipmr_rules_exit(net); } -- cgit v1.2.3 From b362053a7cc0fcc09b92642ba5dd1ca7fddc9004 Mon Sep 17 00:00:00 2001 From: Yotam Gigi Date: Wed, 27 Sep 2017 08:23:14 +0200 Subject: ipmr: Send FIB notifications on MFC and VIF entries Use the newly introduced notification chain to send events upon VIF and MFC addition and deletion. The MFC notifications are sent only on resolved MFC entries, as unresolved cannot be offloaded. Signed-off-by: Yotam Gigi Reviewed-by: Ido Schimmel Signed-off-by: Jiri Pirko Reviewed-by: Nikolay Aleksandrov Signed-off-by: David S. Miller --- net/ipv4/ipmr.c | 53 +++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 53 insertions(+) diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index 49879c338357..ba71bc402336 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -627,6 +627,27 @@ static int call_ipmr_vif_entry_notifier(struct notifier_block *nb, return call_fib_notifier(nb, net, event_type, &info.info); } +static int call_ipmr_vif_entry_notifiers(struct net *net, + enum fib_event_type event_type, + struct vif_device *vif, + vifi_t vif_index, u32 tb_id) +{ + struct vif_entry_notifier_info info = { + .info = { + .family = RTNL_FAMILY_IPMR, + .net = net, + }, + .dev = vif->dev, + .vif_index = vif_index, + .vif_flags = vif->flags, + .tb_id = tb_id, + }; + + ASSERT_RTNL(); + net->ipv4.ipmr_seq++; + return call_fib_notifiers(net, event_type, &info.info); +} + static int call_ipmr_mfc_entry_notifier(struct notifier_block *nb, struct net *net, enum fib_event_type event_type, @@ -644,6 +665,24 @@ static int call_ipmr_mfc_entry_notifier(struct notifier_block *nb, return call_fib_notifier(nb, net, event_type, &info.info); } +static int call_ipmr_mfc_entry_notifiers(struct net *net, + enum fib_event_type event_type, + struct mfc_cache *mfc, u32 tb_id) +{ + struct mfc_entry_notifier_info info = { + .info = { + .family = RTNL_FAMILY_IPMR, + .net = net, + }, + .mfc = mfc, + .tb_id = tb_id + }; + + ASSERT_RTNL(); + net->ipv4.ipmr_seq++; + return call_fib_notifiers(net, event_type, &info.info); +} + /** * vif_delete - Delete a VIF entry * @notify: Set to 1, if the caller is a notifier_call @@ -651,6 +690,7 @@ static int call_ipmr_mfc_entry_notifier(struct notifier_block *nb, static int vif_delete(struct mr_table *mrt, int vifi, int notify, struct list_head *head) { + struct net *net = read_pnet(&mrt->net); struct vif_device *v; struct net_device *dev; struct in_device *in_dev; @@ -660,6 +700,10 @@ static int vif_delete(struct mr_table *mrt, int vifi, int notify, v = &mrt->vif_table[vifi]; + if (VIF_EXISTS(mrt, vifi)) + call_ipmr_vif_entry_notifiers(net, FIB_EVENT_VIF_DEL, v, vifi, + mrt->id); + write_lock_bh(&mrt_lock); dev = v->dev; v->dev = NULL; @@ -909,6 +953,7 @@ static int vif_add(struct net *net, struct mr_table *mrt, if (vifi+1 > mrt->maxvif) mrt->maxvif = vifi+1; write_unlock_bh(&mrt_lock); + call_ipmr_vif_entry_notifiers(net, FIB_EVENT_VIF_ADD, v, vifi, mrt->id); return 0; } @@ -1209,6 +1254,7 @@ static int ipmr_cache_unresolved(struct mr_table *mrt, vifi_t vifi, static int ipmr_mfc_delete(struct mr_table *mrt, struct mfcctl *mfc, int parent) { + struct net *net = read_pnet(&mrt->net); struct mfc_cache *c; /* The entries are added/deleted only under RTNL */ @@ -1220,6 +1266,7 @@ static int ipmr_mfc_delete(struct mr_table *mrt, struct mfcctl *mfc, int parent) return -ENOENT; rhltable_remove(&mrt->mfc_hash, &c->mnode, ipmr_rht_params); list_del_rcu(&c->list); + call_ipmr_mfc_entry_notifiers(net, FIB_EVENT_ENTRY_DEL, c, mrt->id); mroute_netlink_event(mrt, c, RTM_DELROUTE); ipmr_cache_put(c); @@ -1248,6 +1295,8 @@ static int ipmr_mfc_add(struct net *net, struct mr_table *mrt, if (!mrtsock) c->mfc_flags |= MFC_STATIC; write_unlock_bh(&mrt_lock); + call_ipmr_mfc_entry_notifiers(net, FIB_EVENT_ENTRY_REPLACE, c, + mrt->id); mroute_netlink_event(mrt, c, RTM_NEWROUTE); return 0; } @@ -1297,6 +1346,7 @@ static int ipmr_mfc_add(struct net *net, struct mr_table *mrt, ipmr_cache_resolve(net, mrt, uc, c); ipmr_cache_free(uc); } + call_ipmr_mfc_entry_notifiers(net, FIB_EVENT_ENTRY_ADD, c, mrt->id); mroute_netlink_event(mrt, c, RTM_NEWROUTE); return 0; } @@ -1304,6 +1354,7 @@ static int ipmr_mfc_add(struct net *net, struct mr_table *mrt, /* Close the multicast socket, and clear the vif tables etc */ static void mroute_clean_tables(struct mr_table *mrt, bool all) { + struct net *net = read_pnet(&mrt->net); struct mfc_cache *c, *tmp; LIST_HEAD(list); int i; @@ -1322,6 +1373,8 @@ static void mroute_clean_tables(struct mr_table *mrt, bool all) continue; rhltable_remove(&mrt->mfc_hash, &c->mnode, ipmr_rht_params); list_del_rcu(&c->list); + call_ipmr_mfc_entry_notifiers(net, FIB_EVENT_ENTRY_DEL, c, + mrt->id); mroute_netlink_event(mrt, c, RTM_DELROUTE); ipmr_cache_put(c); } -- cgit v1.2.3 From c7c0bbeae9501a7e42f2fd306d6a6399aca688b6 Mon Sep 17 00:00:00 2001 From: Yotam Gigi Date: Wed, 27 Sep 2017 08:23:15 +0200 Subject: net: ipmr: Add MFC offload indication Allow drivers, registered to the fib notification chain indicate whether a multicast MFC route is offloaded or not, similarly to unicast routes. The indication of whether a route is offloaded is done using the mfc_flags field on an mfc_cache struct, and the information is sent to the userspace via the RTNetlink interface only. Currently, MFC routes are either offloaded or not, thus there is no need to add per-VIF offload indication. Signed-off-by: Yotam Gigi Reviewed-by: Ido Schimmel Signed-off-by: Jiri Pirko Reviewed-by: Nikolay Aleksandrov Signed-off-by: David S. Miller --- include/linux/mroute.h | 2 ++ net/ipv4/ipmr.c | 3 +++ 2 files changed, 5 insertions(+) diff --git a/include/linux/mroute.h b/include/linux/mroute.h index 54c5cb82ddcb..5566580811ce 100644 --- a/include/linux/mroute.h +++ b/include/linux/mroute.h @@ -90,9 +90,11 @@ struct mr_table { /* mfc_flags: * MFC_STATIC - the entry was added statically (not by a routing daemon) + * MFC_OFFLOAD - the entry was offloaded to the hardware */ enum { MFC_STATIC = BIT(0), + MFC_OFFLOAD = BIT(1), }; struct mfc_cache_cmp_arg { diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index ba71bc402336..2a795d2c0502 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -2268,6 +2268,9 @@ static int __ipmr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb, nla_put_u32(skb, RTA_IIF, mrt->vif_table[c->mfc_parent].dev->ifindex) < 0) return -EMSGSIZE; + if (c->mfc_flags & MFC_OFFLOAD) + rtm->rtm_flags |= RTNH_F_OFFLOAD; + if (!(mp_attr = nla_nest_start(skb, RTA_MULTIPATH))) return -EMSGSIZE; -- cgit v1.2.3 From 478e4c2f0067d57d7c17059caafab026ca32084a Mon Sep 17 00:00:00 2001 From: Yotam Gigi Date: Wed, 27 Sep 2017 08:23:16 +0200 Subject: net: mroute: Check if rule is a default rule When the ipmr starts, it adds one default FIB rule that matches all packets and sends them to the DEFAULT (multicast) FIB table. A more complex rule can be added by user to specify that for a specific interface, a packet should be look up at either an arbitrary table or according to the l3mdev of the interface. For drivers willing to offload the ipmr logic into a hardware but don't want to offload all the FIB rules functionality, provide a function that can indicate whether the FIB rule is the default multicast rule, thus only one routing table is needed. This way, a driver can register to the FIB notification chain, get notifications about FIB rules added and trigger some kind of an internal abort mechanism when a non default rule is added by the user. Signed-off-by: Yotam Gigi Reviewed-by: Ido Schimmel Signed-off-by: Jiri Pirko Reviewed-by: Nikolay Aleksandrov Signed-off-by: David S. Miller --- include/linux/mroute.h | 7 +++++++ net/ipv4/ipmr.c | 12 ++++++++++++ 2 files changed, 19 insertions(+) diff --git a/include/linux/mroute.h b/include/linux/mroute.h index 5566580811ce..b072a84fbe1c 100644 --- a/include/linux/mroute.h +++ b/include/linux/mroute.h @@ -5,6 +5,7 @@ #include #include #include +#include #include #include @@ -19,6 +20,7 @@ int ip_mroute_getsockopt(struct sock *, int, char __user *, int __user *); int ipmr_ioctl(struct sock *sk, int cmd, void __user *arg); int ipmr_compat_ioctl(struct sock *sk, unsigned int cmd, void __user *arg); int ip_mr_init(void); +bool ipmr_rule_default(const struct fib_rule *rule); #else static inline int ip_mroute_setsockopt(struct sock *sock, int optname, char __user *optval, unsigned int optlen) @@ -46,6 +48,11 @@ static inline int ip_mroute_opt(int opt) { return 0; } + +static inline bool ipmr_rule_default(const struct fib_rule *rule) +{ + return true; +} #endif struct vif_device { diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index 2a795d2c0502..292a8e80bdfa 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -274,6 +274,12 @@ static unsigned int ipmr_rules_seq_read(struct net *net) { return fib_rules_seq_read(net, RTNL_FAMILY_IPMR); } + +bool ipmr_rule_default(const struct fib_rule *rule) +{ + return fib_rule_matchall(rule) && rule->table == RT_TABLE_DEFAULT; +} +EXPORT_SYMBOL(ipmr_rule_default); #else #define ipmr_for_each_table(mrt, net) \ for (mrt = net->ipv4.mrt; mrt; mrt = NULL) @@ -318,6 +324,12 @@ static unsigned int ipmr_rules_seq_read(struct net *net) { return 0; } + +bool ipmr_rule_default(const struct fib_rule *rule) +{ + return true; +} +EXPORT_SYMBOL(ipmr_rule_default); #endif static inline int ipmr_hash_cmp(struct rhashtable_compare_arg *arg, -- cgit v1.2.3 From c011ec1bbfd69e091ca8d77e13fc251a07be57dc Mon Sep 17 00:00:00 2001 From: Yotam Gigi Date: Wed, 27 Sep 2017 08:23:17 +0200 Subject: mlxsw: spectrum: Add the multicast routing offloading logic Add the multicast router offloading logic, which is in charge of handling the VIF and MFC notifications and translating it to the hardware logic API. The offloading logic has to overcome several obstacles in order to safely comply with the kernel multicast router user API: - It must keep track of the mapping between VIFs to netdevices. The user can add an MFC cache entry pointing to a VIF, delete the VIF and add re-add it with a different netdevice. The offloading logic has to handle this in order to be compatible with the kernel logic. - It must keep track of the mapping between netdevices to spectrum RIFs, as the current hardware implementation assume having a RIF for every port in a multicast router. - It must handle routes pointing to pimreg device to be trapped to the kernel, as the packet should be delivered to userspace. - It must handle routes pointing tunnel VIFs. The current implementation does not support multicast forwarding to tunnels, thus routes that point to a tunnel should be trapped to the kernel. - It must be aware of proxy multicast routes, which include both (*,*) routes and duplicate routes. Currently proxy routes are not offloaded and trigger the abort mechanism: removal of all routes from hardware and triggering the traffic to go through the kernel. The multicast routing offloading logic also updates the counters of the offloaded MFC routes in a periodic work. Signed-off-by: Yotam Gigi Reviewed-by: Ido Schimmel Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlxsw/Makefile | 3 +- drivers/net/ethernet/mellanox/mlxsw/spectrum.h | 1 + drivers/net/ethernet/mellanox/mlxsw/spectrum_mr.c | 1014 +++++++++++++++++++++ drivers/net/ethernet/mellanox/mlxsw/spectrum_mr.h | 133 +++ 4 files changed, 1150 insertions(+), 1 deletion(-) create mode 100644 drivers/net/ethernet/mellanox/mlxsw/spectrum_mr.c create mode 100644 drivers/net/ethernet/mellanox/mlxsw/spectrum_mr.h diff --git a/drivers/net/ethernet/mellanox/mlxsw/Makefile b/drivers/net/ethernet/mellanox/mlxsw/Makefile index 4b88158173f3..9b29764905f3 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/Makefile +++ b/drivers/net/ethernet/mellanox/mlxsw/Makefile @@ -17,7 +17,8 @@ mlxsw_spectrum-objs := spectrum.o spectrum_buffers.o \ spectrum_kvdl.o spectrum_acl_tcam.o \ spectrum_acl.o spectrum_flower.o \ spectrum_cnt.o spectrum_fid.o \ - spectrum_ipip.o spectrum_acl_flex_actions.o + spectrum_ipip.o spectrum_acl_flex_actions.o \ + spectrum_mr.o mlxsw_spectrum-$(CONFIG_MLXSW_SPECTRUM_DCB) += spectrum_dcb.o mlxsw_spectrum-$(CONFIG_NET_DEVLINK) += spectrum_dpipe.o obj-$(CONFIG_MLXSW_MINIMAL) += mlxsw_minimal.o diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h index 9355d914a4c8..44c5259e5548 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h @@ -153,6 +153,7 @@ struct mlxsw_sp { struct mlxsw_sp_sb *sb; struct mlxsw_sp_bridge *bridge; struct mlxsw_sp_router *router; + struct mlxsw_sp_mr *mr; struct mlxsw_afa *afa; struct mlxsw_sp_acl *acl; struct mlxsw_sp_fid_core *fid_core; diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_mr.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_mr.c new file mode 100644 index 000000000000..09120259a45d --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_mr.c @@ -0,0 +1,1014 @@ +/* + * drivers/net/ethernet/mellanox/mlxsw/spectrum_mr.c + * Copyright (c) 2017 Mellanox Technologies. All rights reserved. + * Copyright (c) 2017 Yotam Gigi + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the names of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * Alternatively, this software may be distributed under the terms of the + * GNU General Public License ("GPL") version 2 as published by the Free + * Software Foundation. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include + +#include "spectrum_mr.h" +#include "spectrum_router.h" + +struct mlxsw_sp_mr { + const struct mlxsw_sp_mr_ops *mr_ops; + void *catchall_route_priv; + struct delayed_work stats_update_dw; + struct list_head table_list; +#define MLXSW_SP_MR_ROUTES_COUNTER_UPDATE_INTERVAL 5000 /* ms */ + unsigned long priv[0]; + /* priv has to be always the last item */ +}; + +struct mlxsw_sp_mr_vif { + struct net_device *dev; + const struct mlxsw_sp_rif *rif; + unsigned long vif_flags; + + /* A list of route_vif_entry structs that point to routes that the VIF + * instance is used as one of the egress VIFs + */ + struct list_head route_evif_list; + + /* A list of route_vif_entry structs that point to routes that the VIF + * instance is used as an ingress VIF + */ + struct list_head route_ivif_list; +}; + +struct mlxsw_sp_mr_route_vif_entry { + struct list_head vif_node; + struct list_head route_node; + struct mlxsw_sp_mr_vif *mr_vif; + struct mlxsw_sp_mr_route *mr_route; +}; + +struct mlxsw_sp_mr_table { + struct list_head node; + enum mlxsw_sp_l3proto proto; + struct mlxsw_sp *mlxsw_sp; + u32 vr_id; + struct mlxsw_sp_mr_vif vifs[MAXVIFS]; + struct list_head route_list; + struct rhashtable route_ht; + char catchall_route_priv[0]; + /* catchall_route_priv has to be always the last item */ +}; + +struct mlxsw_sp_mr_route { + struct list_head node; + struct rhash_head ht_node; + struct mlxsw_sp_mr_route_key key; + enum mlxsw_sp_mr_route_action route_action; + u16 min_mtu; + struct mfc_cache *mfc4; + void *route_priv; + const struct mlxsw_sp_mr_table *mr_table; + /* A list of route_vif_entry structs that point to the egress VIFs */ + struct list_head evif_list; + /* A route_vif_entry struct that point to the ingress VIF */ + struct mlxsw_sp_mr_route_vif_entry ivif; +}; + +static const struct rhashtable_params mlxsw_sp_mr_route_ht_params = { + .key_len = sizeof(struct mlxsw_sp_mr_route_key), + .key_offset = offsetof(struct mlxsw_sp_mr_route, key), + .head_offset = offsetof(struct mlxsw_sp_mr_route, ht_node), + .automatic_shrinking = true, +}; + +static bool mlxsw_sp_mr_vif_regular(const struct mlxsw_sp_mr_vif *vif) +{ + return !(vif->vif_flags & (VIFF_TUNNEL | VIFF_REGISTER)); +} + +static bool mlxsw_sp_mr_vif_valid(const struct mlxsw_sp_mr_vif *vif) +{ + return mlxsw_sp_mr_vif_regular(vif) && vif->dev && vif->rif; +} + +static bool mlxsw_sp_mr_vif_rif_invalid(const struct mlxsw_sp_mr_vif *vif) +{ + return mlxsw_sp_mr_vif_regular(vif) && vif->dev && !vif->rif; +} + +static bool +mlxsw_sp_mr_route_ivif_in_evifs(const struct mlxsw_sp_mr_route *mr_route) +{ + vifi_t ivif; + + switch (mr_route->mr_table->proto) { + case MLXSW_SP_L3_PROTO_IPV4: + ivif = mr_route->mfc4->mfc_parent; + return mr_route->mfc4->mfc_un.res.ttls[ivif] != 255; + case MLXSW_SP_L3_PROTO_IPV6: + /* fall through */ + default: + WARN_ON_ONCE(1); + } + return false; +} + +static int +mlxsw_sp_mr_route_valid_evifs_num(const struct mlxsw_sp_mr_route *mr_route) +{ + struct mlxsw_sp_mr_route_vif_entry *rve; + int valid_evifs; + + valid_evifs = 0; + list_for_each_entry(rve, &mr_route->evif_list, route_node) + if (mlxsw_sp_mr_vif_valid(rve->mr_vif)) + valid_evifs++; + return valid_evifs; +} + +static bool mlxsw_sp_mr_route_starg(const struct mlxsw_sp_mr_route *mr_route) +{ + switch (mr_route->mr_table->proto) { + case MLXSW_SP_L3_PROTO_IPV4: + return mr_route->key.source_mask.addr4 == INADDR_ANY; + case MLXSW_SP_L3_PROTO_IPV6: + /* fall through */ + default: + WARN_ON_ONCE(1); + } + return false; +} + +static enum mlxsw_sp_mr_route_action +mlxsw_sp_mr_route_action(const struct mlxsw_sp_mr_route *mr_route) +{ + struct mlxsw_sp_mr_route_vif_entry *rve; + + /* If the ingress port is not regular and resolved, trap the route */ + if (!mlxsw_sp_mr_vif_valid(mr_route->ivif.mr_vif)) + return MLXSW_SP_MR_ROUTE_ACTION_TRAP; + + /* The kernel does not match a (*,G) route that the ingress interface is + * not one of the egress interfaces, so trap these kind of routes. + */ + if (mlxsw_sp_mr_route_starg(mr_route) && + !mlxsw_sp_mr_route_ivif_in_evifs(mr_route)) + return MLXSW_SP_MR_ROUTE_ACTION_TRAP; + + /* If the route has no valid eVIFs, trap it. */ + if (!mlxsw_sp_mr_route_valid_evifs_num(mr_route)) + return MLXSW_SP_MR_ROUTE_ACTION_TRAP; + + /* If either one of the eVIFs is not regular (VIF of type pimreg or + * tunnel) or one of the VIFs has no matching RIF, trap the packet. + */ + list_for_each_entry(rve, &mr_route->evif_list, route_node) { + if (!mlxsw_sp_mr_vif_regular(rve->mr_vif) || + mlxsw_sp_mr_vif_rif_invalid(rve->mr_vif)) + return MLXSW_SP_MR_ROUTE_ACTION_TRAP; + } + return MLXSW_SP_MR_ROUTE_ACTION_FORWARD; +} + +static enum mlxsw_sp_mr_route_prio +mlxsw_sp_mr_route_prio(const struct mlxsw_sp_mr_route *mr_route) +{ + return mlxsw_sp_mr_route_starg(mr_route) ? + MLXSW_SP_MR_ROUTE_PRIO_STARG : MLXSW_SP_MR_ROUTE_PRIO_SG; +} + +static void mlxsw_sp_mr_route4_key(struct mlxsw_sp_mr_table *mr_table, + struct mlxsw_sp_mr_route_key *key, + const struct mfc_cache *mfc) +{ + bool starg = (mfc->mfc_origin == INADDR_ANY); + + memset(key, 0, sizeof(*key)); + key->vrid = mr_table->vr_id; + key->proto = mr_table->proto; + key->group.addr4 = mfc->mfc_mcastgrp; + key->group_mask.addr4 = 0xffffffff; + key->source.addr4 = mfc->mfc_origin; + key->source_mask.addr4 = starg ? 0 : 0xffffffff; +} + +static int mlxsw_sp_mr_route_evif_link(struct mlxsw_sp_mr_route *mr_route, + struct mlxsw_sp_mr_vif *mr_vif) +{ + struct mlxsw_sp_mr_route_vif_entry *rve; + + rve = kzalloc(sizeof(*rve), GFP_KERNEL); + if (!rve) + return -ENOMEM; + rve->mr_route = mr_route; + rve->mr_vif = mr_vif; + list_add_tail(&rve->route_node, &mr_route->evif_list); + list_add_tail(&rve->vif_node, &mr_vif->route_evif_list); + return 0; +} + +static void +mlxsw_sp_mr_route_evif_unlink(struct mlxsw_sp_mr_route_vif_entry *rve) +{ + list_del(&rve->route_node); + list_del(&rve->vif_node); + kfree(rve); +} + +static void mlxsw_sp_mr_route_ivif_link(struct mlxsw_sp_mr_route *mr_route, + struct mlxsw_sp_mr_vif *mr_vif) +{ + mr_route->ivif.mr_route = mr_route; + mr_route->ivif.mr_vif = mr_vif; + list_add_tail(&mr_route->ivif.vif_node, &mr_vif->route_ivif_list); +} + +static void mlxsw_sp_mr_route_ivif_unlink(struct mlxsw_sp_mr_route *mr_route) +{ + list_del(&mr_route->ivif.vif_node); +} + +static int +mlxsw_sp_mr_route_info_create(struct mlxsw_sp_mr_table *mr_table, + struct mlxsw_sp_mr_route *mr_route, + struct mlxsw_sp_mr_route_info *route_info) +{ + struct mlxsw_sp_mr_route_vif_entry *rve; + u16 *erif_indices; + u16 irif_index; + u16 erif = 0; + + erif_indices = kmalloc_array(MAXVIFS, sizeof(*erif_indices), + GFP_KERNEL); + if (!erif_indices) + return -ENOMEM; + + list_for_each_entry(rve, &mr_route->evif_list, route_node) { + if (mlxsw_sp_mr_vif_valid(rve->mr_vif)) { + u16 rifi = mlxsw_sp_rif_index(rve->mr_vif->rif); + + erif_indices[erif++] = rifi; + } + } + + if (mlxsw_sp_mr_vif_valid(mr_route->ivif.mr_vif)) + irif_index = mlxsw_sp_rif_index(mr_route->ivif.mr_vif->rif); + else + irif_index = 0; + + route_info->irif_index = irif_index; + route_info->erif_indices = erif_indices; + route_info->min_mtu = mr_route->min_mtu; + route_info->route_action = mr_route->route_action; + route_info->erif_num = erif; + return 0; +} + +static void +mlxsw_sp_mr_route_info_destroy(struct mlxsw_sp_mr_route_info *route_info) +{ + kfree(route_info->erif_indices); +} + +static int mlxsw_sp_mr_route_write(struct mlxsw_sp_mr_table *mr_table, + struct mlxsw_sp_mr_route *mr_route, + bool replace) +{ + struct mlxsw_sp *mlxsw_sp = mr_table->mlxsw_sp; + struct mlxsw_sp_mr_route_info route_info; + struct mlxsw_sp_mr *mr = mlxsw_sp->mr; + int err; + + err = mlxsw_sp_mr_route_info_create(mr_table, mr_route, &route_info); + if (err) + return err; + + if (!replace) { + struct mlxsw_sp_mr_route_params route_params; + + mr_route->route_priv = kzalloc(mr->mr_ops->route_priv_size, + GFP_KERNEL); + if (!mr_route->route_priv) { + err = -ENOMEM; + goto out; + } + + route_params.key = mr_route->key; + route_params.value = route_info; + route_params.prio = mlxsw_sp_mr_route_prio(mr_route); + err = mr->mr_ops->route_create(mlxsw_sp, mr->priv, + mr_route->route_priv, + &route_params); + if (err) + kfree(mr_route->route_priv); + } else { + err = mr->mr_ops->route_update(mlxsw_sp, mr_route->route_priv, + &route_info); + } +out: + mlxsw_sp_mr_route_info_destroy(&route_info); + return err; +} + +static void mlxsw_sp_mr_route_erase(struct mlxsw_sp_mr_table *mr_table, + struct mlxsw_sp_mr_route *mr_route) +{ + struct mlxsw_sp *mlxsw_sp = mr_table->mlxsw_sp; + struct mlxsw_sp_mr *mr = mlxsw_sp->mr; + + mr->mr_ops->route_destroy(mlxsw_sp, mr->priv, mr_route->route_priv); + kfree(mr_route->route_priv); +} + +static struct mlxsw_sp_mr_route * +mlxsw_sp_mr_route4_create(struct mlxsw_sp_mr_table *mr_table, + struct mfc_cache *mfc) +{ + struct mlxsw_sp_mr_route_vif_entry *rve, *tmp; + struct mlxsw_sp_mr_route *mr_route; + int err; + int i; + + /* Allocate and init a new route and fill it with parameters */ + mr_route = kzalloc(sizeof(*mr_route), GFP_KERNEL); + if (!mr_route) + return ERR_PTR(-ENOMEM); + INIT_LIST_HEAD(&mr_route->evif_list); + mlxsw_sp_mr_route4_key(mr_table, &mr_route->key, mfc); + + /* Find min_mtu and link iVIF and eVIFs */ + mr_route->min_mtu = ETH_MAX_MTU; + ipmr_cache_hold(mfc); + mr_route->mfc4 = mfc; + mr_route->mr_table = mr_table; + for (i = 0; i < MAXVIFS; i++) { + if (mfc->mfc_un.res.ttls[i] != 255) { + err = mlxsw_sp_mr_route_evif_link(mr_route, + &mr_table->vifs[i]); + if (err) + goto err; + if (mr_table->vifs[i].dev && + mr_table->vifs[i].dev->mtu < mr_route->min_mtu) + mr_route->min_mtu = mr_table->vifs[i].dev->mtu; + } + } + mlxsw_sp_mr_route_ivif_link(mr_route, &mr_table->vifs[mfc->mfc_parent]); + if (err) + goto err; + + mr_route->route_action = mlxsw_sp_mr_route_action(mr_route); + return mr_route; +err: + ipmr_cache_put(mfc); + list_for_each_entry_safe(rve, tmp, &mr_route->evif_list, route_node) + mlxsw_sp_mr_route_evif_unlink(rve); + kfree(mr_route); + return ERR_PTR(err); +} + +static void mlxsw_sp_mr_route4_destroy(struct mlxsw_sp_mr_table *mr_table, + struct mlxsw_sp_mr_route *mr_route) +{ + struct mlxsw_sp_mr_route_vif_entry *rve, *tmp; + + mlxsw_sp_mr_route_ivif_unlink(mr_route); + ipmr_cache_put(mr_route->mfc4); + list_for_each_entry_safe(rve, tmp, &mr_route->evif_list, route_node) + mlxsw_sp_mr_route_evif_unlink(rve); + kfree(mr_route); +} + +static void mlxsw_sp_mr_route_destroy(struct mlxsw_sp_mr_table *mr_table, + struct mlxsw_sp_mr_route *mr_route) +{ + switch (mr_table->proto) { + case MLXSW_SP_L3_PROTO_IPV4: + mlxsw_sp_mr_route4_destroy(mr_table, mr_route); + break; + case MLXSW_SP_L3_PROTO_IPV6: + /* fall through */ + default: + WARN_ON_ONCE(1); + } +} + +static void mlxsw_sp_mr_mfc_offload_set(struct mlxsw_sp_mr_route *mr_route, + bool offload) +{ + switch (mr_route->mr_table->proto) { + case MLXSW_SP_L3_PROTO_IPV4: + if (offload) + mr_route->mfc4->mfc_flags |= MFC_OFFLOAD; + else + mr_route->mfc4->mfc_flags &= ~MFC_OFFLOAD; + break; + case MLXSW_SP_L3_PROTO_IPV6: + /* fall through */ + default: + WARN_ON_ONCE(1); + } +} + +static void mlxsw_sp_mr_mfc_offload_update(struct mlxsw_sp_mr_route *mr_route) +{ + bool offload; + + offload = mr_route->route_action != MLXSW_SP_MR_ROUTE_ACTION_TRAP; + mlxsw_sp_mr_mfc_offload_set(mr_route, offload); +} + +static void __mlxsw_sp_mr_route_del(struct mlxsw_sp_mr_table *mr_table, + struct mlxsw_sp_mr_route *mr_route) +{ + mlxsw_sp_mr_mfc_offload_set(mr_route, false); + mlxsw_sp_mr_route_erase(mr_table, mr_route); + rhashtable_remove_fast(&mr_table->route_ht, &mr_route->ht_node, + mlxsw_sp_mr_route_ht_params); + list_del(&mr_route->node); + mlxsw_sp_mr_route_destroy(mr_table, mr_route); +} + +int mlxsw_sp_mr_route4_add(struct mlxsw_sp_mr_table *mr_table, + struct mfc_cache *mfc, bool replace) +{ + struct mlxsw_sp_mr_route *mr_orig_route = NULL; + struct mlxsw_sp_mr_route *mr_route; + int err; + + /* If the route is a (*,*) route, abort, as these kind of routes are + * used for proxy routes. + */ + if (mfc->mfc_origin == INADDR_ANY && mfc->mfc_mcastgrp == INADDR_ANY) { + dev_warn(mr_table->mlxsw_sp->bus_info->dev, + "Offloading proxy routes is not supported.\n"); + return -EINVAL; + } + + /* Create a new route */ + mr_route = mlxsw_sp_mr_route4_create(mr_table, mfc); + if (IS_ERR(mr_route)) + return PTR_ERR(mr_route); + + /* Find any route with a matching key */ + mr_orig_route = rhashtable_lookup_fast(&mr_table->route_ht, + &mr_route->key, + mlxsw_sp_mr_route_ht_params); + if (replace) { + /* On replace case, make the route point to the new route_priv. + */ + if (WARN_ON(!mr_orig_route)) { + err = -ENOENT; + goto err_no_orig_route; + } + mr_route->route_priv = mr_orig_route->route_priv; + } else if (mr_orig_route) { + /* On non replace case, if another route with the same key was + * found, abort, as duplicate routes are used for proxy routes. + */ + dev_warn(mr_table->mlxsw_sp->bus_info->dev, + "Offloading proxy routes is not supported.\n"); + err = -EINVAL; + goto err_duplicate_route; + } + + /* Put it in the table data-structures */ + list_add_tail(&mr_route->node, &mr_table->route_list); + err = rhashtable_insert_fast(&mr_table->route_ht, + &mr_route->ht_node, + mlxsw_sp_mr_route_ht_params); + if (err) + goto err_rhashtable_insert; + + /* Write the route to the hardware */ + err = mlxsw_sp_mr_route_write(mr_table, mr_route, replace); + if (err) + goto err_mr_route_write; + + /* Destroy the original route */ + if (replace) { + rhashtable_remove_fast(&mr_table->route_ht, + &mr_orig_route->ht_node, + mlxsw_sp_mr_route_ht_params); + list_del(&mr_orig_route->node); + mlxsw_sp_mr_route4_destroy(mr_table, mr_orig_route); + } + + mlxsw_sp_mr_mfc_offload_update(mr_route); + return 0; + +err_mr_route_write: + rhashtable_remove_fast(&mr_table->route_ht, &mr_route->ht_node, + mlxsw_sp_mr_route_ht_params); +err_rhashtable_insert: + list_del(&mr_route->node); +err_no_orig_route: +err_duplicate_route: + mlxsw_sp_mr_route4_destroy(mr_table, mr_route); + return err; +} + +void mlxsw_sp_mr_route4_del(struct mlxsw_sp_mr_table *mr_table, + struct mfc_cache *mfc) +{ + struct mlxsw_sp_mr_route *mr_route; + struct mlxsw_sp_mr_route_key key; + + mlxsw_sp_mr_route4_key(mr_table, &key, mfc); + mr_route = rhashtable_lookup_fast(&mr_table->route_ht, &key, + mlxsw_sp_mr_route_ht_params); + if (mr_route) + __mlxsw_sp_mr_route_del(mr_table, mr_route); +} + +/* Should be called after the VIF struct is updated */ +static int +mlxsw_sp_mr_route_ivif_resolve(struct mlxsw_sp_mr_table *mr_table, + struct mlxsw_sp_mr_route_vif_entry *rve) +{ + struct mlxsw_sp *mlxsw_sp = mr_table->mlxsw_sp; + enum mlxsw_sp_mr_route_action route_action; + struct mlxsw_sp_mr *mr = mlxsw_sp->mr; + u16 irif_index; + int err; + + route_action = mlxsw_sp_mr_route_action(rve->mr_route); + if (route_action == MLXSW_SP_MR_ROUTE_ACTION_TRAP) + return 0; + + /* rve->mr_vif->rif is guaranteed to be valid at this stage */ + irif_index = mlxsw_sp_rif_index(rve->mr_vif->rif); + err = mr->mr_ops->route_irif_update(mlxsw_sp, rve->mr_route->route_priv, + irif_index); + if (err) + return err; + + err = mr->mr_ops->route_action_update(mlxsw_sp, + rve->mr_route->route_priv, + route_action); + if (err) + /* No need to rollback here because the iRIF change only takes + * place after the action has been updated. + */ + return err; + + rve->mr_route->route_action = route_action; + mlxsw_sp_mr_mfc_offload_update(rve->mr_route); + return 0; +} + +static void +mlxsw_sp_mr_route_ivif_unresolve(struct mlxsw_sp_mr_table *mr_table, + struct mlxsw_sp_mr_route_vif_entry *rve) +{ + struct mlxsw_sp *mlxsw_sp = mr_table->mlxsw_sp; + struct mlxsw_sp_mr *mr = mlxsw_sp->mr; + + mr->mr_ops->route_action_update(mlxsw_sp, rve->mr_route->route_priv, + MLXSW_SP_MR_ROUTE_ACTION_TRAP); + rve->mr_route->route_action = MLXSW_SP_MR_ROUTE_ACTION_TRAP; + mlxsw_sp_mr_mfc_offload_update(rve->mr_route); +} + +/* Should be called after the RIF struct is updated */ +static int +mlxsw_sp_mr_route_evif_resolve(struct mlxsw_sp_mr_table *mr_table, + struct mlxsw_sp_mr_route_vif_entry *rve) +{ + struct mlxsw_sp *mlxsw_sp = mr_table->mlxsw_sp; + enum mlxsw_sp_mr_route_action route_action; + struct mlxsw_sp_mr *mr = mlxsw_sp->mr; + u16 erif_index = 0; + int err; + + /* Update the route action, as the new eVIF can be a tunnel or a pimreg + * device which will require updating the action. + */ + route_action = mlxsw_sp_mr_route_action(rve->mr_route); + if (route_action != rve->mr_route->route_action) { + err = mr->mr_ops->route_action_update(mlxsw_sp, + rve->mr_route->route_priv, + route_action); + if (err) + return err; + } + + /* Add the eRIF */ + if (mlxsw_sp_mr_vif_valid(rve->mr_vif)) { + erif_index = mlxsw_sp_rif_index(rve->mr_vif->rif); + err = mr->mr_ops->route_erif_add(mlxsw_sp, + rve->mr_route->route_priv, + erif_index); + if (err) + goto err_route_erif_add; + } + + /* Update the minimum MTU */ + if (rve->mr_vif->dev->mtu < rve->mr_route->min_mtu) { + rve->mr_route->min_mtu = rve->mr_vif->dev->mtu; + err = mr->mr_ops->route_min_mtu_update(mlxsw_sp, + rve->mr_route->route_priv, + rve->mr_route->min_mtu); + if (err) + goto err_route_min_mtu_update; + } + + rve->mr_route->route_action = route_action; + mlxsw_sp_mr_mfc_offload_update(rve->mr_route); + return 0; + +err_route_min_mtu_update: + if (mlxsw_sp_mr_vif_valid(rve->mr_vif)) + mr->mr_ops->route_erif_del(mlxsw_sp, rve->mr_route->route_priv, + erif_index); +err_route_erif_add: + if (route_action != rve->mr_route->route_action) + mr->mr_ops->route_action_update(mlxsw_sp, + rve->mr_route->route_priv, + rve->mr_route->route_action); + return err; +} + +/* Should be called before the RIF struct is updated */ +static void +mlxsw_sp_mr_route_evif_unresolve(struct mlxsw_sp_mr_table *mr_table, + struct mlxsw_sp_mr_route_vif_entry *rve) +{ + struct mlxsw_sp *mlxsw_sp = mr_table->mlxsw_sp; + enum mlxsw_sp_mr_route_action route_action; + struct mlxsw_sp_mr *mr = mlxsw_sp->mr; + u16 rifi; + + /* If the unresolved RIF was not valid, no need to delete it */ + if (!mlxsw_sp_mr_vif_valid(rve->mr_vif)) + return; + + /* Update the route action: if there is only one valid eVIF in the + * route, set the action to trap as the VIF deletion will lead to zero + * valid eVIFs. On any other case, use the mlxsw_sp_mr_route_action to + * determine the route action. + */ + if (mlxsw_sp_mr_route_valid_evifs_num(rve->mr_route) == 1) + route_action = MLXSW_SP_MR_ROUTE_ACTION_TRAP; + else + route_action = mlxsw_sp_mr_route_action(rve->mr_route); + if (route_action != rve->mr_route->route_action) + mr->mr_ops->route_action_update(mlxsw_sp, + rve->mr_route->route_priv, + route_action); + + /* Delete the erif from the route */ + rifi = mlxsw_sp_rif_index(rve->mr_vif->rif); + mr->mr_ops->route_erif_del(mlxsw_sp, rve->mr_route->route_priv, rifi); + rve->mr_route->route_action = route_action; + mlxsw_sp_mr_mfc_offload_update(rve->mr_route); +} + +static int mlxsw_sp_mr_vif_resolve(struct mlxsw_sp_mr_table *mr_table, + struct net_device *dev, + struct mlxsw_sp_mr_vif *mr_vif, + unsigned long vif_flags, + const struct mlxsw_sp_rif *rif) +{ + struct mlxsw_sp_mr_route_vif_entry *irve, *erve; + int err; + + /* Update the VIF */ + mr_vif->dev = dev; + mr_vif->rif = rif; + mr_vif->vif_flags = vif_flags; + + /* Update all routes where this VIF is used as an unresolved iRIF */ + list_for_each_entry(irve, &mr_vif->route_ivif_list, vif_node) { + err = mlxsw_sp_mr_route_ivif_resolve(mr_table, irve); + if (err) + goto err_irif_unresolve; + } + + /* Update all routes where this VIF is used as an unresolved eRIF */ + list_for_each_entry(erve, &mr_vif->route_evif_list, vif_node) { + err = mlxsw_sp_mr_route_evif_resolve(mr_table, erve); + if (err) + goto err_erif_unresolve; + } + return 0; + +err_erif_unresolve: + list_for_each_entry_from_reverse(erve, &mr_vif->route_evif_list, + vif_node) + mlxsw_sp_mr_route_evif_unresolve(mr_table, erve); +err_irif_unresolve: + list_for_each_entry_from_reverse(irve, &mr_vif->route_ivif_list, + vif_node) + mlxsw_sp_mr_route_ivif_unresolve(mr_table, irve); + mr_vif->rif = NULL; + return err; +} + +static void mlxsw_sp_mr_vif_unresolve(struct mlxsw_sp_mr_table *mr_table, + struct net_device *dev, + struct mlxsw_sp_mr_vif *mr_vif) +{ + struct mlxsw_sp_mr_route_vif_entry *rve; + + /* Update all routes where this VIF is used as an unresolved eRIF */ + list_for_each_entry(rve, &mr_vif->route_evif_list, vif_node) + mlxsw_sp_mr_route_evif_unresolve(mr_table, rve); + + /* Update all routes where this VIF is used as an unresolved iRIF */ + list_for_each_entry(rve, &mr_vif->route_ivif_list, vif_node) + mlxsw_sp_mr_route_ivif_unresolve(mr_table, rve); + + /* Update the VIF */ + mr_vif->dev = dev; + mr_vif->rif = NULL; +} + +int mlxsw_sp_mr_vif_add(struct mlxsw_sp_mr_table *mr_table, + struct net_device *dev, vifi_t vif_index, + unsigned long vif_flags, const struct mlxsw_sp_rif *rif) +{ + struct mlxsw_sp_mr_vif *mr_vif = &mr_table->vifs[vif_index]; + + if (WARN_ON(vif_index >= MAXVIFS)) + return -EINVAL; + if (mr_vif->dev) + return -EEXIST; + return mlxsw_sp_mr_vif_resolve(mr_table, dev, mr_vif, vif_flags, rif); +} + +void mlxsw_sp_mr_vif_del(struct mlxsw_sp_mr_table *mr_table, vifi_t vif_index) +{ + struct mlxsw_sp_mr_vif *mr_vif = &mr_table->vifs[vif_index]; + + if (WARN_ON(vif_index >= MAXVIFS)) + return; + if (WARN_ON(!mr_vif->dev)) + return; + mlxsw_sp_mr_vif_unresolve(mr_table, NULL, mr_vif); +} + +struct mlxsw_sp_mr_vif * +mlxsw_sp_mr_dev_vif_lookup(struct mlxsw_sp_mr_table *mr_table, + const struct net_device *dev) +{ + vifi_t vif_index; + + for (vif_index = 0; vif_index < MAXVIFS; vif_index++) + if (mr_table->vifs[vif_index].dev == dev) + return &mr_table->vifs[vif_index]; + return NULL; +} + +int mlxsw_sp_mr_rif_add(struct mlxsw_sp_mr_table *mr_table, + const struct mlxsw_sp_rif *rif) +{ + const struct net_device *rif_dev = mlxsw_sp_rif_dev(rif); + struct mlxsw_sp_mr_vif *mr_vif; + + if (!rif_dev) + return 0; + + mr_vif = mlxsw_sp_mr_dev_vif_lookup(mr_table, rif_dev); + if (!mr_vif) + return 0; + return mlxsw_sp_mr_vif_resolve(mr_table, mr_vif->dev, mr_vif, + mr_vif->vif_flags, rif); +} + +void mlxsw_sp_mr_rif_del(struct mlxsw_sp_mr_table *mr_table, + const struct mlxsw_sp_rif *rif) +{ + const struct net_device *rif_dev = mlxsw_sp_rif_dev(rif); + struct mlxsw_sp_mr_vif *mr_vif; + + if (!rif_dev) + return; + + mr_vif = mlxsw_sp_mr_dev_vif_lookup(mr_table, rif_dev); + if (!mr_vif) + return; + mlxsw_sp_mr_vif_unresolve(mr_table, mr_vif->dev, mr_vif); +} + +void mlxsw_sp_mr_rif_mtu_update(struct mlxsw_sp_mr_table *mr_table, + const struct mlxsw_sp_rif *rif, int mtu) +{ + const struct net_device *rif_dev = mlxsw_sp_rif_dev(rif); + struct mlxsw_sp *mlxsw_sp = mr_table->mlxsw_sp; + struct mlxsw_sp_mr_route_vif_entry *rve; + struct mlxsw_sp_mr *mr = mlxsw_sp->mr; + struct mlxsw_sp_mr_vif *mr_vif; + + if (!rif_dev) + return; + + /* Search for a VIF that use that RIF */ + mr_vif = mlxsw_sp_mr_dev_vif_lookup(mr_table, rif_dev); + if (!mr_vif) + return; + + /* Update all the routes that uses that VIF as eVIF */ + list_for_each_entry(rve, &mr_vif->route_evif_list, vif_node) { + if (mtu < rve->mr_route->min_mtu) { + rve->mr_route->min_mtu = mtu; + mr->mr_ops->route_min_mtu_update(mlxsw_sp, + rve->mr_route->route_priv, + mtu); + } + } +} + +struct mlxsw_sp_mr_table *mlxsw_sp_mr_table_create(struct mlxsw_sp *mlxsw_sp, + u32 vr_id, + enum mlxsw_sp_l3proto proto) +{ + struct mlxsw_sp_mr_route_params catchall_route_params = { + .prio = MLXSW_SP_MR_ROUTE_PRIO_CATCHALL, + .key = { + .vrid = vr_id, + }, + .value = { + .route_action = MLXSW_SP_MR_ROUTE_ACTION_TRAP, + } + }; + struct mlxsw_sp_mr *mr = mlxsw_sp->mr; + struct mlxsw_sp_mr_table *mr_table; + int err; + int i; + + mr_table = kzalloc(sizeof(*mr_table) + mr->mr_ops->route_priv_size, + GFP_KERNEL); + if (!mr_table) + return ERR_PTR(-ENOMEM); + + mr_table->vr_id = vr_id; + mr_table->mlxsw_sp = mlxsw_sp; + mr_table->proto = proto; + INIT_LIST_HEAD(&mr_table->route_list); + + err = rhashtable_init(&mr_table->route_ht, + &mlxsw_sp_mr_route_ht_params); + if (err) + goto err_route_rhashtable_init; + + for (i = 0; i < MAXVIFS; i++) { + INIT_LIST_HEAD(&mr_table->vifs[i].route_evif_list); + INIT_LIST_HEAD(&mr_table->vifs[i].route_ivif_list); + } + + err = mr->mr_ops->route_create(mlxsw_sp, mr->priv, + mr_table->catchall_route_priv, + &catchall_route_params); + if (err) + goto err_ops_route_create; + list_add_tail(&mr_table->node, &mr->table_list); + return mr_table; + +err_ops_route_create: + rhashtable_destroy(&mr_table->route_ht); +err_route_rhashtable_init: + kfree(mr_table); + return ERR_PTR(err); +} + +void mlxsw_sp_mr_table_destroy(struct mlxsw_sp_mr_table *mr_table) +{ + struct mlxsw_sp *mlxsw_sp = mr_table->mlxsw_sp; + struct mlxsw_sp_mr *mr = mlxsw_sp->mr; + + WARN_ON(!mlxsw_sp_mr_table_empty(mr_table)); + list_del(&mr_table->node); + mr->mr_ops->route_destroy(mlxsw_sp, mr->priv, + &mr_table->catchall_route_priv); + rhashtable_destroy(&mr_table->route_ht); + kfree(mr_table); +} + +void mlxsw_sp_mr_table_flush(struct mlxsw_sp_mr_table *mr_table) +{ + struct mlxsw_sp_mr_route *mr_route, *tmp; + int i; + + list_for_each_entry_safe(mr_route, tmp, &mr_table->route_list, node) + __mlxsw_sp_mr_route_del(mr_table, mr_route); + + for (i = 0; i < MAXVIFS; i++) { + mr_table->vifs[i].dev = NULL; + mr_table->vifs[i].rif = NULL; + } +} + +bool mlxsw_sp_mr_table_empty(const struct mlxsw_sp_mr_table *mr_table) +{ + int i; + + for (i = 0; i < MAXVIFS; i++) + if (mr_table->vifs[i].dev) + return false; + return list_empty(&mr_table->route_list); +} + +static void mlxsw_sp_mr_route_stats_update(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_mr_route *mr_route) +{ + struct mlxsw_sp_mr *mr = mlxsw_sp->mr; + u64 packets, bytes; + + if (mr_route->route_action == MLXSW_SP_MR_ROUTE_ACTION_TRAP) + return; + + mr->mr_ops->route_stats(mlxsw_sp, mr_route->route_priv, &packets, + &bytes); + + switch (mr_route->mr_table->proto) { + case MLXSW_SP_L3_PROTO_IPV4: + if (mr_route->mfc4->mfc_un.res.pkt != packets) + mr_route->mfc4->mfc_un.res.lastuse = jiffies; + mr_route->mfc4->mfc_un.res.pkt = packets; + mr_route->mfc4->mfc_un.res.bytes = bytes; + break; + case MLXSW_SP_L3_PROTO_IPV6: + /* fall through */ + default: + WARN_ON_ONCE(1); + } +} + +static void mlxsw_sp_mr_stats_update(struct work_struct *work) +{ + struct mlxsw_sp_mr *mr = container_of(work, struct mlxsw_sp_mr, + stats_update_dw.work); + struct mlxsw_sp_mr_table *mr_table; + struct mlxsw_sp_mr_route *mr_route; + unsigned long interval; + + rtnl_lock(); + list_for_each_entry(mr_table, &mr->table_list, node) + list_for_each_entry(mr_route, &mr_table->route_list, node) + mlxsw_sp_mr_route_stats_update(mr_table->mlxsw_sp, + mr_route); + rtnl_unlock(); + + interval = msecs_to_jiffies(MLXSW_SP_MR_ROUTES_COUNTER_UPDATE_INTERVAL); + mlxsw_core_schedule_dw(&mr->stats_update_dw, interval); +} + +int mlxsw_sp_mr_init(struct mlxsw_sp *mlxsw_sp, + const struct mlxsw_sp_mr_ops *mr_ops) +{ + struct mlxsw_sp_mr *mr; + unsigned long interval; + int err; + + mr = kzalloc(sizeof(*mr) + mr_ops->priv_size, GFP_KERNEL); + if (!mr) + return -ENOMEM; + mr->mr_ops = mr_ops; + mlxsw_sp->mr = mr; + INIT_LIST_HEAD(&mr->table_list); + + err = mr_ops->init(mlxsw_sp, mr->priv); + if (err) + goto err; + + /* Create the delayed work for counter updates */ + INIT_DELAYED_WORK(&mr->stats_update_dw, mlxsw_sp_mr_stats_update); + interval = msecs_to_jiffies(MLXSW_SP_MR_ROUTES_COUNTER_UPDATE_INTERVAL); + mlxsw_core_schedule_dw(&mr->stats_update_dw, interval); + return 0; +err: + kfree(mr); + return err; +} + +void mlxsw_sp_mr_fini(struct mlxsw_sp *mlxsw_sp) +{ + struct mlxsw_sp_mr *mr = mlxsw_sp->mr; + + cancel_delayed_work_sync(&mr->stats_update_dw); + mr->mr_ops->fini(mr->priv); + kfree(mr); +} diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_mr.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum_mr.h new file mode 100644 index 000000000000..c851b237d253 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_mr.h @@ -0,0 +1,133 @@ +/* + * drivers/net/ethernet/mellanox/mlxsw/spectrum_mr.h + * Copyright (c) 2017 Mellanox Technologies. All rights reserved. + * Copyright (c) 2017 Yotam Gigi + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the names of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * Alternatively, this software may be distributed under the terms of the + * GNU General Public License ("GPL") version 2 as published by the Free + * Software Foundation. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _MLXSW_SPECTRUM_MCROUTER_H +#define _MLXSW_SPECTRUM_MCROUTER_H + +#include +#include "spectrum_router.h" +#include "spectrum.h" + +enum mlxsw_sp_mr_route_action { + MLXSW_SP_MR_ROUTE_ACTION_FORWARD, + MLXSW_SP_MR_ROUTE_ACTION_TRAP, +}; + +enum mlxsw_sp_mr_route_prio { + MLXSW_SP_MR_ROUTE_PRIO_SG, + MLXSW_SP_MR_ROUTE_PRIO_STARG, + MLXSW_SP_MR_ROUTE_PRIO_CATCHALL, + __MLXSW_SP_MR_ROUTE_PRIO_MAX +}; + +#define MLXSW_SP_MR_ROUTE_PRIO_MAX (__MLXSW_SP_MR_ROUTE_PRIO_MAX - 1) + +struct mlxsw_sp_mr_route_key { + int vrid; + enum mlxsw_sp_l3proto proto; + union mlxsw_sp_l3addr group; + union mlxsw_sp_l3addr group_mask; + union mlxsw_sp_l3addr source; + union mlxsw_sp_l3addr source_mask; +}; + +struct mlxsw_sp_mr_route_info { + enum mlxsw_sp_mr_route_action route_action; + u16 irif_index; + u16 *erif_indices; + size_t erif_num; + u16 min_mtu; +}; + +struct mlxsw_sp_mr_route_params { + struct mlxsw_sp_mr_route_key key; + struct mlxsw_sp_mr_route_info value; + enum mlxsw_sp_mr_route_prio prio; +}; + +struct mlxsw_sp_mr_ops { + int priv_size; + int route_priv_size; + int (*init)(struct mlxsw_sp *mlxsw_sp, void *priv); + int (*route_create)(struct mlxsw_sp *mlxsw_sp, void *priv, + void *route_priv, + struct mlxsw_sp_mr_route_params *route_params); + int (*route_update)(struct mlxsw_sp *mlxsw_sp, void *route_priv, + struct mlxsw_sp_mr_route_info *route_info); + int (*route_stats)(struct mlxsw_sp *mlxsw_sp, void *route_priv, + u64 *packets, u64 *bytes); + int (*route_action_update)(struct mlxsw_sp *mlxsw_sp, void *route_priv, + enum mlxsw_sp_mr_route_action route_action); + int (*route_min_mtu_update)(struct mlxsw_sp *mlxsw_sp, void *route_priv, + u16 min_mtu); + int (*route_irif_update)(struct mlxsw_sp *mlxsw_sp, void *route_priv, + u16 irif_index); + int (*route_erif_add)(struct mlxsw_sp *mlxsw_sp, void *route_priv, + u16 erif_index); + int (*route_erif_del)(struct mlxsw_sp *mlxsw_sp, void *route_priv, + u16 erif_index); + void (*route_destroy)(struct mlxsw_sp *mlxsw_sp, void *priv, + void *route_priv); + void (*fini)(void *priv); +}; + +struct mlxsw_sp_mr; +struct mlxsw_sp_mr_table; + +int mlxsw_sp_mr_init(struct mlxsw_sp *mlxsw_sp, + const struct mlxsw_sp_mr_ops *mr_ops); +void mlxsw_sp_mr_fini(struct mlxsw_sp *mlxsw_sp); +int mlxsw_sp_mr_route4_add(struct mlxsw_sp_mr_table *mr_table, + struct mfc_cache *mfc, bool replace); +void mlxsw_sp_mr_route4_del(struct mlxsw_sp_mr_table *mr_table, + struct mfc_cache *mfc); +int mlxsw_sp_mr_vif_add(struct mlxsw_sp_mr_table *mr_table, + struct net_device *dev, vifi_t vif_index, + unsigned long vif_flags, + const struct mlxsw_sp_rif *rif); +void mlxsw_sp_mr_vif_del(struct mlxsw_sp_mr_table *mr_table, vifi_t vif_index); +int mlxsw_sp_mr_rif_add(struct mlxsw_sp_mr_table *mr_table, + const struct mlxsw_sp_rif *rif); +void mlxsw_sp_mr_rif_del(struct mlxsw_sp_mr_table *mr_table, + const struct mlxsw_sp_rif *rif); +void mlxsw_sp_mr_rif_mtu_update(struct mlxsw_sp_mr_table *mr_table, + const struct mlxsw_sp_rif *rif, int mtu); +struct mlxsw_sp_mr_table *mlxsw_sp_mr_table_create(struct mlxsw_sp *mlxsw_sp, + u32 tb_id, + enum mlxsw_sp_l3proto proto); +void mlxsw_sp_mr_table_destroy(struct mlxsw_sp_mr_table *mr_table); +void mlxsw_sp_mr_table_flush(struct mlxsw_sp_mr_table *mr_table); +bool mlxsw_sp_mr_table_empty(const struct mlxsw_sp_mr_table *mr_table); + +#endif -- cgit v1.2.3 From 0e14c7777acb6d58250cb746685dde0a74d60fe8 Mon Sep 17 00:00:00 2001 From: Yotam Gigi Date: Wed, 27 Sep 2017 08:23:18 +0200 Subject: mlxsw: spectrum: Add the multicast routing hardware logic Implement the multicast routing hardware API introduced in previous patch for the specific spectrum hardware. The spectrum hardware multicast routes are written using the RMFT2 register and point to an ACL flexible action set. The actions used for multicast routes are: - Counter action, which allows counting bytes and packets on multicast routes. - Multicast route action, which provide RPF check and do the actual packet duplication to a list of RIFs. - Trap action, in the case the route action specified by the called is trap. Signed-off-by: Yotam Gigi Reviewed-by: Ido Schimmel Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlxsw/Makefile | 2 +- drivers/net/ethernet/mellanox/mlxsw/spectrum.h | 1 + .../net/ethernet/mellanox/mlxsw/spectrum_mr_tcam.c | 828 +++++++++++++++++++++ .../net/ethernet/mellanox/mlxsw/spectrum_mr_tcam.h | 43 ++ 4 files changed, 873 insertions(+), 1 deletion(-) create mode 100644 drivers/net/ethernet/mellanox/mlxsw/spectrum_mr_tcam.c create mode 100644 drivers/net/ethernet/mellanox/mlxsw/spectrum_mr_tcam.h diff --git a/drivers/net/ethernet/mellanox/mlxsw/Makefile b/drivers/net/ethernet/mellanox/mlxsw/Makefile index 9b29764905f3..4816504419fc 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/Makefile +++ b/drivers/net/ethernet/mellanox/mlxsw/Makefile @@ -18,7 +18,7 @@ mlxsw_spectrum-objs := spectrum.o spectrum_buffers.o \ spectrum_acl.o spectrum_flower.o \ spectrum_cnt.o spectrum_fid.o \ spectrum_ipip.o spectrum_acl_flex_actions.o \ - spectrum_mr.o + spectrum_mr.o spectrum_mr_tcam.o mlxsw_spectrum-$(CONFIG_MLXSW_SPECTRUM_DCB) += spectrum_dcb.o mlxsw_spectrum-$(CONFIG_NET_DEVLINK) += spectrum_dpipe.o obj-$(CONFIG_MLXSW_MINIMAL) += mlxsw_minimal.o diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h index 44c5259e5548..ae67e6046098 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h @@ -139,6 +139,7 @@ struct mlxsw_sp_port_mall_tc_entry { struct mlxsw_sp_sb; struct mlxsw_sp_bridge; struct mlxsw_sp_router; +struct mlxsw_sp_mr; struct mlxsw_sp_acl; struct mlxsw_sp_counter_pool; struct mlxsw_sp_fid_core; diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_mr_tcam.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_mr_tcam.c new file mode 100644 index 000000000000..cda9e9ad10e3 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_mr_tcam.c @@ -0,0 +1,828 @@ +/* + * drivers/net/ethernet/mellanox/mlxsw/spectrum_mr_tcam.c + * Copyright (c) 2017 Mellanox Technologies. All rights reserved. + * Copyright (c) 2017 Yotam Gigi + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the names of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * Alternatively, this software may be distributed under the terms of the + * GNU General Public License ("GPL") version 2 as published by the Free + * Software Foundation. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include +#include +#include + +#include "reg.h" +#include "spectrum.h" +#include "core_acl_flex_actions.h" +#include "spectrum_mr.h" + +struct mlxsw_sp_mr_tcam_region { + struct mlxsw_sp *mlxsw_sp; + enum mlxsw_reg_rtar_key_type rtar_key_type; + struct parman *parman; + struct parman_prio *parman_prios; +}; + +struct mlxsw_sp_mr_tcam { + struct mlxsw_sp_mr_tcam_region ipv4_tcam_region; +}; + +/* This struct maps to one RIGR2 register entry */ +struct mlxsw_sp_mr_erif_sublist { + struct list_head list; + u32 rigr2_kvdl_index; + int num_erifs; + u16 erif_indices[MLXSW_REG_RIGR2_MAX_ERIFS]; + bool synced; +}; + +struct mlxsw_sp_mr_tcam_erif_list { + struct list_head erif_sublists; + u32 kvdl_index; +}; + +static bool +mlxsw_sp_mr_erif_sublist_full(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_mr_erif_sublist *erif_sublist) +{ + int erif_list_entries = MLXSW_CORE_RES_GET(mlxsw_sp->core, + MC_ERIF_LIST_ENTRIES); + + return erif_sublist->num_erifs == erif_list_entries; +} + +static void +mlxsw_sp_mr_erif_list_init(struct mlxsw_sp_mr_tcam_erif_list *erif_list) +{ + INIT_LIST_HEAD(&erif_list->erif_sublists); +} + +#define MLXSW_SP_KVDL_RIGR2_SIZE 1 + +static struct mlxsw_sp_mr_erif_sublist * +mlxsw_sp_mr_erif_sublist_create(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_mr_tcam_erif_list *erif_list) +{ + struct mlxsw_sp_mr_erif_sublist *erif_sublist; + int err; + + erif_sublist = kzalloc(sizeof(*erif_sublist), GFP_KERNEL); + if (!erif_sublist) + return ERR_PTR(-ENOMEM); + err = mlxsw_sp_kvdl_alloc(mlxsw_sp, MLXSW_SP_KVDL_RIGR2_SIZE, + &erif_sublist->rigr2_kvdl_index); + if (err) { + kfree(erif_sublist); + return ERR_PTR(err); + } + + list_add_tail(&erif_sublist->list, &erif_list->erif_sublists); + return erif_sublist; +} + +static void +mlxsw_sp_mr_erif_sublist_destroy(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_mr_erif_sublist *erif_sublist) +{ + list_del(&erif_sublist->list); + mlxsw_sp_kvdl_free(mlxsw_sp, erif_sublist->rigr2_kvdl_index); + kfree(erif_sublist); +} + +static int +mlxsw_sp_mr_erif_list_add(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_mr_tcam_erif_list *erif_list, + u16 erif_index) +{ + struct mlxsw_sp_mr_erif_sublist *sublist; + + /* If either there is no erif_entry or the last one is full, allocate a + * new one. + */ + if (list_empty(&erif_list->erif_sublists)) { + sublist = mlxsw_sp_mr_erif_sublist_create(mlxsw_sp, erif_list); + if (IS_ERR(sublist)) + return PTR_ERR(sublist); + erif_list->kvdl_index = sublist->rigr2_kvdl_index; + } else { + sublist = list_last_entry(&erif_list->erif_sublists, + struct mlxsw_sp_mr_erif_sublist, + list); + sublist->synced = false; + if (mlxsw_sp_mr_erif_sublist_full(mlxsw_sp, sublist)) { + sublist = mlxsw_sp_mr_erif_sublist_create(mlxsw_sp, + erif_list); + if (IS_ERR(sublist)) + return PTR_ERR(sublist); + } + } + + /* Add the eRIF to the last entry's last index */ + sublist->erif_indices[sublist->num_erifs++] = erif_index; + return 0; +} + +static void +mlxsw_sp_mr_erif_list_flush(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_mr_tcam_erif_list *erif_list) +{ + struct mlxsw_sp_mr_erif_sublist *erif_sublist, *tmp; + + list_for_each_entry_safe(erif_sublist, tmp, &erif_list->erif_sublists, + list) + mlxsw_sp_mr_erif_sublist_destroy(mlxsw_sp, erif_sublist); +} + +static int +mlxsw_sp_mr_erif_list_commit(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_mr_tcam_erif_list *erif_list) +{ + struct mlxsw_sp_mr_erif_sublist *curr_sublist; + char rigr2_pl[MLXSW_REG_RIGR2_LEN]; + int err; + int i; + + list_for_each_entry(curr_sublist, &erif_list->erif_sublists, list) { + if (curr_sublist->synced) + continue; + + /* If the sublist is not the last one, pack the next index */ + if (list_is_last(&curr_sublist->list, + &erif_list->erif_sublists)) { + mlxsw_reg_rigr2_pack(rigr2_pl, + curr_sublist->rigr2_kvdl_index, + false, 0); + } else { + struct mlxsw_sp_mr_erif_sublist *next_sublist; + + next_sublist = list_next_entry(curr_sublist, list); + mlxsw_reg_rigr2_pack(rigr2_pl, + curr_sublist->rigr2_kvdl_index, + true, + next_sublist->rigr2_kvdl_index); + } + + /* Pack all the erifs */ + for (i = 0; i < curr_sublist->num_erifs; i++) { + u16 erif_index = curr_sublist->erif_indices[i]; + + mlxsw_reg_rigr2_erif_entry_pack(rigr2_pl, i, true, + erif_index); + } + + /* Write the entry */ + err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rigr2), + rigr2_pl); + if (err) + /* No need of a rollback here because this + * hardware entry should not be pointed yet. + */ + return err; + curr_sublist->synced = true; + } + return 0; +} + +static void mlxsw_sp_mr_erif_list_move(struct mlxsw_sp_mr_tcam_erif_list *to, + struct mlxsw_sp_mr_tcam_erif_list *from) +{ + list_splice(&from->erif_sublists, &to->erif_sublists); + to->kvdl_index = from->kvdl_index; +} + +struct mlxsw_sp_mr_tcam_route { + struct mlxsw_sp_mr_tcam_erif_list erif_list; + struct mlxsw_afa_block *afa_block; + u32 counter_index; + struct parman_item parman_item; + struct parman_prio *parman_prio; + enum mlxsw_sp_mr_route_action action; + struct mlxsw_sp_mr_route_key key; + u16 irif_index; + u16 min_mtu; +}; + +static struct mlxsw_afa_block * +mlxsw_sp_mr_tcam_afa_block_create(struct mlxsw_sp *mlxsw_sp, + enum mlxsw_sp_mr_route_action route_action, + u16 irif_index, u32 counter_index, + u16 min_mtu, + struct mlxsw_sp_mr_tcam_erif_list *erif_list) +{ + struct mlxsw_afa_block *afa_block; + int err; + + afa_block = mlxsw_afa_block_create(mlxsw_sp->afa); + if (IS_ERR(afa_block)) + return afa_block; + + err = mlxsw_afa_block_append_counter(afa_block, counter_index); + if (err) + goto err; + + switch (route_action) { + case MLXSW_SP_MR_ROUTE_ACTION_TRAP: + err = mlxsw_afa_block_append_trap(afa_block, + MLXSW_TRAP_ID_ACL1); + if (err) + goto err; + break; + case MLXSW_SP_MR_ROUTE_ACTION_FORWARD: + /* If we are about to append a multicast router action, commit + * the erif_list. + */ + err = mlxsw_sp_mr_erif_list_commit(mlxsw_sp, erif_list); + if (err) + goto err; + + err = mlxsw_afa_block_append_mcrouter(afa_block, irif_index, + min_mtu, false, + erif_list->kvdl_index); + if (err) + goto err; + break; + default: + err = -EINVAL; + goto err; + } + + err = mlxsw_afa_block_commit(afa_block); + if (err) + goto err; + return afa_block; +err: + mlxsw_afa_block_destroy(afa_block); + return ERR_PTR(err); +} + +static void +mlxsw_sp_mr_tcam_afa_block_destroy(struct mlxsw_afa_block *afa_block) +{ + mlxsw_afa_block_destroy(afa_block); +} + +static int mlxsw_sp_mr_tcam_route_replace(struct mlxsw_sp *mlxsw_sp, + struct parman_item *parman_item, + struct mlxsw_sp_mr_route_key *key, + struct mlxsw_afa_block *afa_block) +{ + char rmft2_pl[MLXSW_REG_RMFT2_LEN]; + + switch (key->proto) { + case MLXSW_SP_L3_PROTO_IPV4: + mlxsw_reg_rmft2_ipv4_pack(rmft2_pl, true, parman_item->index, + key->vrid, + MLXSW_REG_RMFT2_IRIF_MASK_IGNORE, 0, + ntohl(key->group.addr4), + ntohl(key->group_mask.addr4), + ntohl(key->source.addr4), + ntohl(key->source_mask.addr4), + mlxsw_afa_block_first_set(afa_block)); + break; + case MLXSW_SP_L3_PROTO_IPV6: + default: + WARN_ON_ONCE(1); + } + + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rmft2), rmft2_pl); +} + +static int mlxsw_sp_mr_tcam_route_remove(struct mlxsw_sp *mlxsw_sp, int vrid, + struct parman_item *parman_item) +{ + char rmft2_pl[MLXSW_REG_RMFT2_LEN]; + + mlxsw_reg_rmft2_ipv4_pack(rmft2_pl, false, parman_item->index, vrid, + 0, 0, 0, 0, 0, 0, NULL); + + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rmft2), rmft2_pl); +} + +static int +mlxsw_sp_mr_tcam_erif_populate(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_mr_tcam_erif_list *erif_list, + struct mlxsw_sp_mr_route_info *route_info) +{ + int err; + int i; + + for (i = 0; i < route_info->erif_num; i++) { + u16 erif_index = route_info->erif_indices[i]; + + err = mlxsw_sp_mr_erif_list_add(mlxsw_sp, erif_list, + erif_index); + if (err) + return err; + } + return 0; +} + +static int +mlxsw_sp_mr_tcam_route_parman_item_add(struct mlxsw_sp_mr_tcam *mr_tcam, + struct mlxsw_sp_mr_tcam_route *route, + enum mlxsw_sp_mr_route_prio prio) +{ + struct parman_prio *parman_prio = NULL; + int err; + + switch (route->key.proto) { + case MLXSW_SP_L3_PROTO_IPV4: + parman_prio = &mr_tcam->ipv4_tcam_region.parman_prios[prio]; + err = parman_item_add(mr_tcam->ipv4_tcam_region.parman, + parman_prio, &route->parman_item); + if (err) + return err; + break; + case MLXSW_SP_L3_PROTO_IPV6: + default: + WARN_ON_ONCE(1); + } + route->parman_prio = parman_prio; + return 0; +} + +static void +mlxsw_sp_mr_tcam_route_parman_item_remove(struct mlxsw_sp_mr_tcam *mr_tcam, + struct mlxsw_sp_mr_tcam_route *route) +{ + switch (route->key.proto) { + case MLXSW_SP_L3_PROTO_IPV4: + parman_item_remove(mr_tcam->ipv4_tcam_region.parman, + route->parman_prio, &route->parman_item); + break; + case MLXSW_SP_L3_PROTO_IPV6: + default: + WARN_ON_ONCE(1); + } +} + +static int +mlxsw_sp_mr_tcam_route_create(struct mlxsw_sp *mlxsw_sp, void *priv, + void *route_priv, + struct mlxsw_sp_mr_route_params *route_params) +{ + struct mlxsw_sp_mr_tcam_route *route = route_priv; + struct mlxsw_sp_mr_tcam *mr_tcam = priv; + int err; + + route->key = route_params->key; + route->irif_index = route_params->value.irif_index; + route->min_mtu = route_params->value.min_mtu; + route->action = route_params->value.route_action; + + /* Create the egress RIFs list */ + mlxsw_sp_mr_erif_list_init(&route->erif_list); + err = mlxsw_sp_mr_tcam_erif_populate(mlxsw_sp, &route->erif_list, + &route_params->value); + if (err) + goto err_erif_populate; + + /* Create the flow counter */ + err = mlxsw_sp_flow_counter_alloc(mlxsw_sp, &route->counter_index); + if (err) + goto err_counter_alloc; + + /* Create the flexible action block */ + route->afa_block = mlxsw_sp_mr_tcam_afa_block_create(mlxsw_sp, + route->action, + route->irif_index, + route->counter_index, + route->min_mtu, + &route->erif_list); + if (IS_ERR(route->afa_block)) { + err = PTR_ERR(route->afa_block); + goto err_afa_block_create; + } + + /* Allocate place in the TCAM */ + err = mlxsw_sp_mr_tcam_route_parman_item_add(mr_tcam, route, + route_params->prio); + if (err) + goto err_parman_item_add; + + /* Write the route to the TCAM */ + err = mlxsw_sp_mr_tcam_route_replace(mlxsw_sp, &route->parman_item, + &route->key, route->afa_block); + if (err) + goto err_route_replace; + return 0; + +err_route_replace: + mlxsw_sp_mr_tcam_route_parman_item_remove(mr_tcam, route); +err_parman_item_add: + mlxsw_sp_mr_tcam_afa_block_destroy(route->afa_block); +err_afa_block_create: + mlxsw_sp_flow_counter_free(mlxsw_sp, route->counter_index); +err_erif_populate: +err_counter_alloc: + mlxsw_sp_mr_erif_list_flush(mlxsw_sp, &route->erif_list); + return err; +} + +static void mlxsw_sp_mr_tcam_route_destroy(struct mlxsw_sp *mlxsw_sp, + void *priv, void *route_priv) +{ + struct mlxsw_sp_mr_tcam_route *route = route_priv; + struct mlxsw_sp_mr_tcam *mr_tcam = priv; + + mlxsw_sp_mr_tcam_route_remove(mlxsw_sp, route->key.vrid, + &route->parman_item); + mlxsw_sp_mr_tcam_route_parman_item_remove(mr_tcam, route); + mlxsw_sp_mr_tcam_afa_block_destroy(route->afa_block); + mlxsw_sp_flow_counter_free(mlxsw_sp, route->counter_index); + mlxsw_sp_mr_erif_list_flush(mlxsw_sp, &route->erif_list); +} + +static int mlxsw_sp_mr_tcam_route_stats(struct mlxsw_sp *mlxsw_sp, + void *route_priv, u64 *packets, + u64 *bytes) +{ + struct mlxsw_sp_mr_tcam_route *route = route_priv; + + return mlxsw_sp_flow_counter_get(mlxsw_sp, route->counter_index, + packets, bytes); +} + +static int +mlxsw_sp_mr_tcam_route_action_update(struct mlxsw_sp *mlxsw_sp, + void *route_priv, + enum mlxsw_sp_mr_route_action route_action) +{ + struct mlxsw_sp_mr_tcam_route *route = route_priv; + struct mlxsw_afa_block *afa_block; + int err; + + /* Create a new flexible action block */ + afa_block = mlxsw_sp_mr_tcam_afa_block_create(mlxsw_sp, route_action, + route->irif_index, + route->counter_index, + route->min_mtu, + &route->erif_list); + if (IS_ERR(afa_block)) + return PTR_ERR(afa_block); + + /* Update the TCAM route entry */ + err = mlxsw_sp_mr_tcam_route_replace(mlxsw_sp, &route->parman_item, + &route->key, afa_block); + if (err) + goto err; + + /* Delete the old one */ + mlxsw_sp_mr_tcam_afa_block_destroy(route->afa_block); + route->afa_block = afa_block; + route->action = route_action; + return 0; +err: + mlxsw_sp_mr_tcam_afa_block_destroy(afa_block); + return err; +} + +static int mlxsw_sp_mr_tcam_route_min_mtu_update(struct mlxsw_sp *mlxsw_sp, + void *route_priv, u16 min_mtu) +{ + struct mlxsw_sp_mr_tcam_route *route = route_priv; + struct mlxsw_afa_block *afa_block; + int err; + + /* Create a new flexible action block */ + afa_block = mlxsw_sp_mr_tcam_afa_block_create(mlxsw_sp, + route->action, + route->irif_index, + route->counter_index, + min_mtu, + &route->erif_list); + if (IS_ERR(afa_block)) + return PTR_ERR(afa_block); + + /* Update the TCAM route entry */ + err = mlxsw_sp_mr_tcam_route_replace(mlxsw_sp, &route->parman_item, + &route->key, afa_block); + if (err) + goto err; + + /* Delete the old one */ + mlxsw_sp_mr_tcam_afa_block_destroy(route->afa_block); + route->afa_block = afa_block; + route->min_mtu = min_mtu; + return 0; +err: + mlxsw_sp_mr_tcam_afa_block_destroy(afa_block); + return err; +} + +static int mlxsw_sp_mr_tcam_route_irif_update(struct mlxsw_sp *mlxsw_sp, + void *route_priv, u16 irif_index) +{ + struct mlxsw_sp_mr_tcam_route *route = route_priv; + + if (route->action != MLXSW_SP_MR_ROUTE_ACTION_TRAP) + return -EINVAL; + route->irif_index = irif_index; + return 0; +} + +static int mlxsw_sp_mr_tcam_route_erif_add(struct mlxsw_sp *mlxsw_sp, + void *route_priv, u16 erif_index) +{ + struct mlxsw_sp_mr_tcam_route *route = route_priv; + int err; + + err = mlxsw_sp_mr_erif_list_add(mlxsw_sp, &route->erif_list, + erif_index); + if (err) + return err; + + /* Commit the action only if the route action is not TRAP */ + if (route->action != MLXSW_SP_MR_ROUTE_ACTION_TRAP) + return mlxsw_sp_mr_erif_list_commit(mlxsw_sp, + &route->erif_list); + return 0; +} + +static int mlxsw_sp_mr_tcam_route_erif_del(struct mlxsw_sp *mlxsw_sp, + void *route_priv, u16 erif_index) +{ + struct mlxsw_sp_mr_tcam_route *route = route_priv; + struct mlxsw_sp_mr_erif_sublist *erif_sublist; + struct mlxsw_sp_mr_tcam_erif_list erif_list; + struct mlxsw_afa_block *afa_block; + int err; + int i; + + /* Create a copy of the original erif_list without the deleted entry */ + mlxsw_sp_mr_erif_list_init(&erif_list); + list_for_each_entry(erif_sublist, &route->erif_list.erif_sublists, list) { + for (i = 0; i < erif_sublist->num_erifs; i++) { + u16 curr_erif = erif_sublist->erif_indices[i]; + + if (curr_erif == erif_index) + continue; + err = mlxsw_sp_mr_erif_list_add(mlxsw_sp, &erif_list, + curr_erif); + if (err) + goto err_erif_list_add; + } + } + + /* Create the flexible action block pointing to the new erif_list */ + afa_block = mlxsw_sp_mr_tcam_afa_block_create(mlxsw_sp, route->action, + route->irif_index, + route->counter_index, + route->min_mtu, + &erif_list); + if (IS_ERR(afa_block)) { + err = PTR_ERR(afa_block); + goto err_afa_block_create; + } + + /* Update the TCAM route entry */ + err = mlxsw_sp_mr_tcam_route_replace(mlxsw_sp, &route->parman_item, + &route->key, afa_block); + if (err) + goto err_route_write; + + mlxsw_sp_mr_tcam_afa_block_destroy(route->afa_block); + mlxsw_sp_mr_erif_list_flush(mlxsw_sp, &route->erif_list); + route->afa_block = afa_block; + mlxsw_sp_mr_erif_list_move(&route->erif_list, &erif_list); + return 0; + +err_route_write: + mlxsw_sp_mr_tcam_afa_block_destroy(afa_block); +err_afa_block_create: +err_erif_list_add: + mlxsw_sp_mr_erif_list_flush(mlxsw_sp, &erif_list); + return err; +} + +static int +mlxsw_sp_mr_tcam_route_update(struct mlxsw_sp *mlxsw_sp, void *route_priv, + struct mlxsw_sp_mr_route_info *route_info) +{ + struct mlxsw_sp_mr_tcam_route *route = route_priv; + struct mlxsw_sp_mr_tcam_erif_list erif_list; + struct mlxsw_afa_block *afa_block; + int err; + + /* Create a new erif_list */ + mlxsw_sp_mr_erif_list_init(&erif_list); + err = mlxsw_sp_mr_tcam_erif_populate(mlxsw_sp, &erif_list, route_info); + if (err) + goto err_erif_populate; + + /* Create the flexible action block pointing to the new erif_list */ + afa_block = mlxsw_sp_mr_tcam_afa_block_create(mlxsw_sp, + route_info->route_action, + route_info->irif_index, + route->counter_index, + route_info->min_mtu, + &erif_list); + if (IS_ERR(afa_block)) { + err = PTR_ERR(afa_block); + goto err_afa_block_create; + } + + /* Update the TCAM route entry */ + err = mlxsw_sp_mr_tcam_route_replace(mlxsw_sp, &route->parman_item, + &route->key, afa_block); + if (err) + goto err_route_write; + + mlxsw_sp_mr_tcam_afa_block_destroy(route->afa_block); + mlxsw_sp_mr_erif_list_flush(mlxsw_sp, &route->erif_list); + route->afa_block = afa_block; + mlxsw_sp_mr_erif_list_move(&route->erif_list, &erif_list); + route->action = route_info->route_action; + route->irif_index = route_info->irif_index; + route->min_mtu = route_info->min_mtu; + return 0; + +err_route_write: + mlxsw_sp_mr_tcam_afa_block_destroy(afa_block); +err_afa_block_create: +err_erif_populate: + mlxsw_sp_mr_erif_list_flush(mlxsw_sp, &erif_list); + return err; +} + +#define MLXSW_SP_MR_TCAM_REGION_BASE_COUNT 16 +#define MLXSW_SP_MR_TCAM_REGION_RESIZE_STEP 16 + +static int +mlxsw_sp_mr_tcam_region_alloc(struct mlxsw_sp_mr_tcam_region *mr_tcam_region) +{ + struct mlxsw_sp *mlxsw_sp = mr_tcam_region->mlxsw_sp; + char rtar_pl[MLXSW_REG_RTAR_LEN]; + + mlxsw_reg_rtar_pack(rtar_pl, MLXSW_REG_RTAR_OP_ALLOCATE, + mr_tcam_region->rtar_key_type, + MLXSW_SP_MR_TCAM_REGION_BASE_COUNT); + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rtar), rtar_pl); +} + +static void +mlxsw_sp_mr_tcam_region_free(struct mlxsw_sp_mr_tcam_region *mr_tcam_region) +{ + struct mlxsw_sp *mlxsw_sp = mr_tcam_region->mlxsw_sp; + char rtar_pl[MLXSW_REG_RTAR_LEN]; + + mlxsw_reg_rtar_pack(rtar_pl, MLXSW_REG_RTAR_OP_DEALLOCATE, + mr_tcam_region->rtar_key_type, 0); + mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rtar), rtar_pl); +} + +static int mlxsw_sp_mr_tcam_region_parman_resize(void *priv, + unsigned long new_count) +{ + struct mlxsw_sp_mr_tcam_region *mr_tcam_region = priv; + struct mlxsw_sp *mlxsw_sp = mr_tcam_region->mlxsw_sp; + char rtar_pl[MLXSW_REG_RTAR_LEN]; + u64 max_tcam_rules; + + max_tcam_rules = MLXSW_CORE_RES_GET(mlxsw_sp->core, ACL_MAX_TCAM_RULES); + if (new_count > max_tcam_rules) + return -EINVAL; + mlxsw_reg_rtar_pack(rtar_pl, MLXSW_REG_RTAR_OP_RESIZE, + mr_tcam_region->rtar_key_type, new_count); + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rtar), rtar_pl); +} + +static void mlxsw_sp_mr_tcam_region_parman_move(void *priv, + unsigned long from_index, + unsigned long to_index, + unsigned long count) +{ + struct mlxsw_sp_mr_tcam_region *mr_tcam_region = priv; + struct mlxsw_sp *mlxsw_sp = mr_tcam_region->mlxsw_sp; + char rrcr_pl[MLXSW_REG_RRCR_LEN]; + + mlxsw_reg_rrcr_pack(rrcr_pl, MLXSW_REG_RRCR_OP_MOVE, + from_index, count, + mr_tcam_region->rtar_key_type, to_index); + mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rrcr), rrcr_pl); +} + +static const struct parman_ops mlxsw_sp_mr_tcam_region_parman_ops = { + .base_count = MLXSW_SP_MR_TCAM_REGION_BASE_COUNT, + .resize_step = MLXSW_SP_MR_TCAM_REGION_RESIZE_STEP, + .resize = mlxsw_sp_mr_tcam_region_parman_resize, + .move = mlxsw_sp_mr_tcam_region_parman_move, + .algo = PARMAN_ALGO_TYPE_LSORT, +}; + +static int +mlxsw_sp_mr_tcam_region_init(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_mr_tcam_region *mr_tcam_region, + enum mlxsw_reg_rtar_key_type rtar_key_type) +{ + struct parman_prio *parman_prios; + struct parman *parman; + int err; + int i; + + mr_tcam_region->rtar_key_type = rtar_key_type; + mr_tcam_region->mlxsw_sp = mlxsw_sp; + + err = mlxsw_sp_mr_tcam_region_alloc(mr_tcam_region); + if (err) + return err; + + parman = parman_create(&mlxsw_sp_mr_tcam_region_parman_ops, + mr_tcam_region); + if (!parman) { + err = -ENOMEM; + goto err_parman_create; + } + mr_tcam_region->parman = parman; + + parman_prios = kmalloc_array(MLXSW_SP_MR_ROUTE_PRIO_MAX + 1, + sizeof(*parman_prios), GFP_KERNEL); + if (!parman_prios) + goto err_parman_prios_alloc; + mr_tcam_region->parman_prios = parman_prios; + + for (i = 0; i < MLXSW_SP_MR_ROUTE_PRIO_MAX + 1; i++) + parman_prio_init(mr_tcam_region->parman, + &mr_tcam_region->parman_prios[i], i); + return 0; + +err_parman_prios_alloc: + parman_destroy(parman); +err_parman_create: + mlxsw_sp_mr_tcam_region_free(mr_tcam_region); + return err; +} + +static void +mlxsw_sp_mr_tcam_region_fini(struct mlxsw_sp_mr_tcam_region *mr_tcam_region) +{ + int i; + + for (i = 0; i < MLXSW_SP_MR_ROUTE_PRIO_MAX + 1; i++) + parman_prio_fini(&mr_tcam_region->parman_prios[i]); + kfree(mr_tcam_region->parman_prios); + parman_destroy(mr_tcam_region->parman); + mlxsw_sp_mr_tcam_region_free(mr_tcam_region); +} + +static int mlxsw_sp_mr_tcam_init(struct mlxsw_sp *mlxsw_sp, void *priv) +{ + struct mlxsw_sp_mr_tcam *mr_tcam = priv; + + if (!MLXSW_CORE_RES_VALID(mlxsw_sp->core, MC_ERIF_LIST_ENTRIES) || + !MLXSW_CORE_RES_VALID(mlxsw_sp->core, ACL_MAX_TCAM_RULES)) + return -EIO; + + return mlxsw_sp_mr_tcam_region_init(mlxsw_sp, + &mr_tcam->ipv4_tcam_region, + MLXSW_REG_RTAR_KEY_TYPE_IPV4_MULTICAST); +} + +static void mlxsw_sp_mr_tcam_fini(void *priv) +{ + struct mlxsw_sp_mr_tcam *mr_tcam = priv; + + mlxsw_sp_mr_tcam_region_fini(&mr_tcam->ipv4_tcam_region); +} + +const struct mlxsw_sp_mr_ops mlxsw_sp_mr_tcam_ops = { + .priv_size = sizeof(struct mlxsw_sp_mr_tcam), + .route_priv_size = sizeof(struct mlxsw_sp_mr_tcam_route), + .init = mlxsw_sp_mr_tcam_init, + .route_create = mlxsw_sp_mr_tcam_route_create, + .route_update = mlxsw_sp_mr_tcam_route_update, + .route_stats = mlxsw_sp_mr_tcam_route_stats, + .route_action_update = mlxsw_sp_mr_tcam_route_action_update, + .route_min_mtu_update = mlxsw_sp_mr_tcam_route_min_mtu_update, + .route_irif_update = mlxsw_sp_mr_tcam_route_irif_update, + .route_erif_add = mlxsw_sp_mr_tcam_route_erif_add, + .route_erif_del = mlxsw_sp_mr_tcam_route_erif_del, + .route_destroy = mlxsw_sp_mr_tcam_route_destroy, + .fini = mlxsw_sp_mr_tcam_fini, +}; diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_mr_tcam.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum_mr_tcam.h new file mode 100644 index 000000000000..f9b59ee25406 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_mr_tcam.h @@ -0,0 +1,43 @@ +/* + * drivers/net/ethernet/mellanox/mlxsw/spectrum_mr_tcam.h + * Copyright (c) 2017 Mellanox Technologies. All rights reserved. + * Copyright (c) 2017 Yotam Gigi + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the names of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * Alternatively, this software may be distributed under the terms of the + * GNU General Public License ("GPL") version 2 as published by the Free + * Software Foundation. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _MLXSW_SPECTRUM_MCROUTER_TCAM_H +#define _MLXSW_SPECTRUM_MCROUTER_TCAM_H + +#include "spectrum.h" +#include "spectrum_mr.h" + +extern const struct mlxsw_sp_mr_ops mlxsw_sp_mr_tcam_ops; + +#endif -- cgit v1.2.3 From 7e50d435759accec4e17764a8d5a1ef63b79ffd6 Mon Sep 17 00:00:00 2001 From: Yotam Gigi Date: Wed, 27 Sep 2017 08:23:19 +0200 Subject: mlxsw: spectrum: router: Squash the default route table to main Currently, the mlxsw Spectrum driver offloads only either the RT_TABLE_MAIN FIB table or the VRF tables, so the RT_TABLE_LOCAL table is squashed to the RT_TABLE_MAIN table to allow local routes to be offloaded too. By default, multicast MFC routes which are not assigned to any user requested table are put in the RT_TABLE_DEFAULT table. Due to the fact that offloading multicast MFC routes support in Spectrum router logic is going to be introduced soon, squash the default table to MAIN too. Signed-off-by: Yotam Gigi Reviewed-by: Ido Schimmel Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c index 321f7356073c..28c0c84bc966 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c @@ -693,8 +693,8 @@ static int mlxsw_sp_vr_lpm_tree_unbind(struct mlxsw_sp *mlxsw_sp, static u32 mlxsw_sp_fix_tb_id(u32 tb_id) { - /* For our purpose, squash main and local table into one */ - if (tb_id == RT_TABLE_LOCAL) + /* For our purpose, squash main, default and local tables into one */ + if (tb_id == RT_TABLE_LOCAL || tb_id == RT_TABLE_DEFAULT) tb_id = RT_TABLE_MAIN; return tb_id; } -- cgit v1.2.3 From d42b0965b1d4fe0808a2103a3f7c015515b1112e Mon Sep 17 00:00:00 2001 From: Yotam Gigi Date: Wed, 27 Sep 2017 08:23:20 +0200 Subject: mlxsw: spectrum_router: Add multicast routes notification handling functionality Add functionality for calling the multicast routing offloading logic upon MFC and VIF add and delete notifications. In addition, call the multicast routing upon RIF addition and deletion events. As the multicast routing offload logic may sleep, the actual calls are done in a deferred work. To ensure the MFC object is not freed in that interval, a reference is held to it. In case of a failure, the abort mechanism is used, which ejects all the routes from the hardware and triggers the traffic to flow through the kernel. Note: At that stage, the FIB notifications are still ignored, and will be enabled in a further patch. Signed-off-by: Yotam Gigi Reviewed-by: Ido Schimmel Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- .../net/ethernet/mellanox/mlxsw/spectrum_router.c | 187 ++++++++++++++++++++- 1 file changed, 185 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c index 28c0c84bc966..77584422ed08 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c @@ -65,6 +65,8 @@ #include "spectrum_cnt.h" #include "spectrum_dpipe.h" #include "spectrum_ipip.h" +#include "spectrum_mr.h" +#include "spectrum_mr_tcam.h" #include "spectrum_router.h" struct mlxsw_sp_vr; @@ -459,6 +461,7 @@ struct mlxsw_sp_vr { unsigned int rif_count; struct mlxsw_sp_fib *fib4; struct mlxsw_sp_fib *fib6; + struct mlxsw_sp_mr_table *mr4_table; }; static const struct rhashtable_params mlxsw_sp_fib_ht_params; @@ -653,7 +656,7 @@ static void mlxsw_sp_lpm_fini(struct mlxsw_sp *mlxsw_sp) static bool mlxsw_sp_vr_is_used(const struct mlxsw_sp_vr *vr) { - return !!vr->fib4 || !!vr->fib6; + return !!vr->fib4 || !!vr->fib6 || !!vr->mr4_table; } static struct mlxsw_sp_vr *mlxsw_sp_vr_find_unused(struct mlxsw_sp *mlxsw_sp) @@ -744,9 +747,18 @@ static struct mlxsw_sp_vr *mlxsw_sp_vr_create(struct mlxsw_sp *mlxsw_sp, err = PTR_ERR(vr->fib6); goto err_fib6_create; } + vr->mr4_table = mlxsw_sp_mr_table_create(mlxsw_sp, vr->id, + MLXSW_SP_L3_PROTO_IPV4); + if (IS_ERR(vr->mr4_table)) { + err = PTR_ERR(vr->mr4_table); + goto err_mr_table_create; + } vr->tb_id = tb_id; return vr; +err_mr_table_create: + mlxsw_sp_fib_destroy(vr->fib6); + vr->fib6 = NULL; err_fib6_create: mlxsw_sp_fib_destroy(vr->fib4); vr->fib4 = NULL; @@ -755,6 +767,8 @@ err_fib6_create: static void mlxsw_sp_vr_destroy(struct mlxsw_sp_vr *vr) { + mlxsw_sp_mr_table_destroy(vr->mr4_table); + vr->mr4_table = NULL; mlxsw_sp_fib_destroy(vr->fib6); vr->fib6 = NULL; mlxsw_sp_fib_destroy(vr->fib4); @@ -775,7 +789,8 @@ static struct mlxsw_sp_vr *mlxsw_sp_vr_get(struct mlxsw_sp *mlxsw_sp, u32 tb_id) static void mlxsw_sp_vr_put(struct mlxsw_sp_vr *vr) { if (!vr->rif_count && list_empty(&vr->fib4->node_list) && - list_empty(&vr->fib6->node_list)) + list_empty(&vr->fib6->node_list) && + mlxsw_sp_mr_table_empty(vr->mr4_table)) mlxsw_sp_vr_destroy(vr); } @@ -4731,6 +4746,75 @@ static int __mlxsw_sp_router_set_abort_trap(struct mlxsw_sp *mlxsw_sp, return 0; } +static int mlxsw_sp_router_fibmr_add(struct mlxsw_sp *mlxsw_sp, + struct mfc_entry_notifier_info *men_info, + bool replace) +{ + struct mlxsw_sp_vr *vr; + + if (mlxsw_sp->router->aborted) + return 0; + + vr = mlxsw_sp_vr_get(mlxsw_sp, men_info->tb_id); + if (IS_ERR(vr)) + return PTR_ERR(vr); + + return mlxsw_sp_mr_route4_add(vr->mr4_table, men_info->mfc, replace); +} + +static void mlxsw_sp_router_fibmr_del(struct mlxsw_sp *mlxsw_sp, + struct mfc_entry_notifier_info *men_info) +{ + struct mlxsw_sp_vr *vr; + + if (mlxsw_sp->router->aborted) + return; + + vr = mlxsw_sp_vr_find(mlxsw_sp, men_info->tb_id); + if (WARN_ON(!vr)) + return; + + mlxsw_sp_mr_route4_del(vr->mr4_table, men_info->mfc); + mlxsw_sp_vr_put(vr); +} + +static int +mlxsw_sp_router_fibmr_vif_add(struct mlxsw_sp *mlxsw_sp, + struct vif_entry_notifier_info *ven_info) +{ + struct mlxsw_sp_rif *rif; + struct mlxsw_sp_vr *vr; + + if (mlxsw_sp->router->aborted) + return 0; + + vr = mlxsw_sp_vr_get(mlxsw_sp, ven_info->tb_id); + if (IS_ERR(vr)) + return PTR_ERR(vr); + + rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, ven_info->dev); + return mlxsw_sp_mr_vif_add(vr->mr4_table, ven_info->dev, + ven_info->vif_index, + ven_info->vif_flags, rif); +} + +static void +mlxsw_sp_router_fibmr_vif_del(struct mlxsw_sp *mlxsw_sp, + struct vif_entry_notifier_info *ven_info) +{ + struct mlxsw_sp_vr *vr; + + if (mlxsw_sp->router->aborted) + return; + + vr = mlxsw_sp_vr_find(mlxsw_sp, ven_info->tb_id); + if (WARN_ON(!vr)) + return; + + mlxsw_sp_mr_vif_del(vr->mr4_table, ven_info->vif_index); + mlxsw_sp_vr_put(vr); +} + static int mlxsw_sp_router_set_abort_trap(struct mlxsw_sp *mlxsw_sp) { enum mlxsw_reg_ralxx_protocol proto = MLXSW_REG_RALXX_PROTOCOL_IPV4; @@ -4741,6 +4825,10 @@ static int mlxsw_sp_router_set_abort_trap(struct mlxsw_sp *mlxsw_sp) if (err) return err; + /* The multicast router code does not need an abort trap as by default, + * packets that don't match any routes are trapped to the CPU. + */ + proto = MLXSW_REG_RALXX_PROTOCOL_IPV6; return __mlxsw_sp_router_set_abort_trap(mlxsw_sp, proto, MLXSW_SP_LPM_TREE_MIN + 1); @@ -4822,6 +4910,8 @@ static void mlxsw_sp_router_fib_flush(struct mlxsw_sp *mlxsw_sp) if (!mlxsw_sp_vr_is_used(vr)) continue; + + mlxsw_sp_mr_table_flush(vr->mr4_table); mlxsw_sp_vr_fib_flush(mlxsw_sp, vr, MLXSW_SP_L3_PROTO_IPV4); /* If virtual router was only used for IPv4, then it's no @@ -4854,6 +4944,8 @@ struct mlxsw_sp_fib_event_work { struct fib_entry_notifier_info fen_info; struct fib_rule_notifier_info fr_info; struct fib_nh_notifier_info fnh_info; + struct mfc_entry_notifier_info men_info; + struct vif_entry_notifier_info ven_info; }; struct mlxsw_sp *mlxsw_sp; unsigned long event; @@ -4940,6 +5032,55 @@ static void mlxsw_sp_router_fib6_event_work(struct work_struct *work) kfree(fib_work); } +static void mlxsw_sp_router_fibmr_event_work(struct work_struct *work) +{ + struct mlxsw_sp_fib_event_work *fib_work = + container_of(work, struct mlxsw_sp_fib_event_work, work); + struct mlxsw_sp *mlxsw_sp = fib_work->mlxsw_sp; + struct fib_rule *rule; + bool replace; + int err; + + rtnl_lock(); + switch (fib_work->event) { + case FIB_EVENT_ENTRY_REPLACE: /* fall through */ + case FIB_EVENT_ENTRY_ADD: + replace = fib_work->event == FIB_EVENT_ENTRY_REPLACE; + + err = mlxsw_sp_router_fibmr_add(mlxsw_sp, &fib_work->men_info, + replace); + if (err) + mlxsw_sp_router_fib_abort(mlxsw_sp); + ipmr_cache_put(fib_work->men_info.mfc); + break; + case FIB_EVENT_ENTRY_DEL: + mlxsw_sp_router_fibmr_del(mlxsw_sp, &fib_work->men_info); + ipmr_cache_put(fib_work->men_info.mfc); + break; + case FIB_EVENT_VIF_ADD: + err = mlxsw_sp_router_fibmr_vif_add(mlxsw_sp, + &fib_work->ven_info); + if (err) + mlxsw_sp_router_fib_abort(mlxsw_sp); + dev_put(fib_work->ven_info.dev); + break; + case FIB_EVENT_VIF_DEL: + mlxsw_sp_router_fibmr_vif_del(mlxsw_sp, + &fib_work->ven_info); + dev_put(fib_work->ven_info.dev); + break; + case FIB_EVENT_RULE_ADD: /* fall through */ + case FIB_EVENT_RULE_DEL: + rule = fib_work->fr_info.rule; + if (!ipmr_rule_default(rule) && !rule->l3mdev) + mlxsw_sp_router_fib_abort(mlxsw_sp); + fib_rule_put(rule); + break; + } + rtnl_unlock(); + kfree(fib_work); +} + static void mlxsw_sp_router_fib4_event(struct mlxsw_sp_fib_event_work *fib_work, struct fib_notifier_info *info) { @@ -4985,6 +5126,30 @@ static void mlxsw_sp_router_fib6_event(struct mlxsw_sp_fib_event_work *fib_work, } } +static void +mlxsw_sp_router_fibmr_event(struct mlxsw_sp_fib_event_work *fib_work, + struct fib_notifier_info *info) +{ + switch (fib_work->event) { + case FIB_EVENT_ENTRY_REPLACE: /* fall through */ + case FIB_EVENT_ENTRY_ADD: /* fall through */ + case FIB_EVENT_ENTRY_DEL: + memcpy(&fib_work->men_info, info, sizeof(fib_work->men_info)); + ipmr_cache_hold(fib_work->men_info.mfc); + break; + case FIB_EVENT_VIF_ADD: /* fall through */ + case FIB_EVENT_VIF_DEL: + memcpy(&fib_work->ven_info, info, sizeof(fib_work->ven_info)); + dev_hold(fib_work->ven_info.dev); + break; + case FIB_EVENT_RULE_ADD: /* fall through */ + case FIB_EVENT_RULE_DEL: + memcpy(&fib_work->fr_info, info, sizeof(fib_work->fr_info)); + fib_rule_get(fib_work->fr_info.rule); + break; + } +} + /* Called with rcu_read_lock() */ static int mlxsw_sp_router_fib_event(struct notifier_block *nb, unsigned long event, void *ptr) @@ -5014,6 +5179,10 @@ static int mlxsw_sp_router_fib_event(struct notifier_block *nb, INIT_WORK(&fib_work->work, mlxsw_sp_router_fib6_event_work); mlxsw_sp_router_fib6_event(fib_work, info); break; + case RTNL_FAMILY_IPMR: + INIT_WORK(&fib_work->work, mlxsw_sp_router_fibmr_event_work); + mlxsw_sp_router_fibmr_event(fib_work, info); + break; } mlxsw_core_schedule_work(&fib_work->work); @@ -5227,12 +5396,18 @@ mlxsw_sp_rif_create(struct mlxsw_sp *mlxsw_sp, if (err) goto err_configure; + err = mlxsw_sp_mr_rif_add(vr->mr4_table, rif); + if (err) + goto err_mr_rif_add; + mlxsw_sp_rif_counters_alloc(rif); mlxsw_sp->router->rifs[rif_index] = rif; vr->rif_count++; return rif; +err_mr_rif_add: + ops->deconfigure(rif); err_configure: if (fid) mlxsw_sp_fid_put(fid); @@ -5257,6 +5432,7 @@ void mlxsw_sp_rif_destroy(struct mlxsw_sp_rif *rif) vr->rif_count--; mlxsw_sp->router->rifs[rif->rif_index] = NULL; mlxsw_sp_rif_counters_free(rif); + mlxsw_sp_mr_rif_del(vr->mr4_table, rif); ops->deconfigure(rif); if (fid) /* Loopback RIFs are not associated with a FID. */ @@ -6120,6 +6296,10 @@ int mlxsw_sp_router_init(struct mlxsw_sp *mlxsw_sp) if (err) goto err_lpm_init; + err = mlxsw_sp_mr_init(mlxsw_sp, &mlxsw_sp_mr_tcam_ops); + if (err) + goto err_mr_init; + err = mlxsw_sp_vrs_init(mlxsw_sp); if (err) goto err_vrs_init; @@ -6141,6 +6321,8 @@ err_register_fib_notifier: err_neigh_init: mlxsw_sp_vrs_fini(mlxsw_sp); err_vrs_init: + mlxsw_sp_mr_fini(mlxsw_sp); +err_mr_init: mlxsw_sp_lpm_fini(mlxsw_sp); err_lpm_init: rhashtable_destroy(&mlxsw_sp->router->nexthop_group_ht); @@ -6162,6 +6344,7 @@ void mlxsw_sp_router_fini(struct mlxsw_sp *mlxsw_sp) unregister_fib_notifier(&mlxsw_sp->router->fib_nb); mlxsw_sp_neigh_fini(mlxsw_sp); mlxsw_sp_vrs_fini(mlxsw_sp); + mlxsw_sp_mr_fini(mlxsw_sp); mlxsw_sp_lpm_fini(mlxsw_sp); rhashtable_destroy(&mlxsw_sp->router->nexthop_group_ht); rhashtable_destroy(&mlxsw_sp->router->nexthop_ht); -- cgit v1.2.3 From fd890fe98f8b026642d39011d03d22fb4aa66b0f Mon Sep 17 00:00:00 2001 From: Yotam Gigi Date: Wed, 27 Sep 2017 08:23:21 +0200 Subject: mlxsw: spectrum: Notify multicast router on RIF MTU changes Due to the fact that multicast routes hold the minimum MTU of all the egress RIFs and trap packets that don't meet it, notify the mulitcast router code on RIF MTU changes. Signed-off-by: Yotam Gigi Reviewed-by: Ido Schimmel Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c index 77584422ed08..dbd9c196fcfb 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c @@ -5773,6 +5773,17 @@ int mlxsw_sp_netdevice_router_port_event(struct net_device *dev) if (err) goto err_rif_fdb_op; + if (rif->mtu != dev->mtu) { + struct mlxsw_sp_vr *vr; + + /* The RIF is relevant only to its mr_table instance, as unlike + * unicast routing, in multicast routing a RIF cannot be shared + * between several multicast routing tables. + */ + vr = &mlxsw_sp->router->vrs[rif->vr_id]; + mlxsw_sp_mr_rif_mtu_update(vr->mr4_table, rif, dev->mtu); + } + ether_addr_copy(rif->addr, dev->dev_addr); rif->mtu = dev->mtu; -- cgit v1.2.3 From 664375e9567b5eeece8d9ebf85eaf5107cab382d Mon Sep 17 00:00:00 2001 From: Yotam Gigi Date: Wed, 27 Sep 2017 08:23:22 +0200 Subject: mlxsw: spectrum: router: Don't ignore IPMR notifications Make the Spectrum router logic not ignore the RTNL_FAMILY_IPMR FIB notifications. Past commits added the IPMR VIF and MFC add/del notifications via the fib_notifier chain. In addition, a code for handling these notifications in the Spectrum router logic was added. Make the Spectrum router logic not ignore these notifications and forward the requests to the Spectrum multicast router offloading logic. Signed-off-by: Yotam Gigi Reviewed-by: Ido Schimmel Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c index dbd9c196fcfb..ef4b86b3aa9b 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c @@ -5159,7 +5159,8 @@ static int mlxsw_sp_router_fib_event(struct notifier_block *nb, struct mlxsw_sp_router *router; if (!net_eq(info->net, &init_net) || - (info->family != AF_INET && info->family != AF_INET6)) + (info->family != AF_INET && info->family != AF_INET6 && + info->family != RTNL_FAMILY_IPMR)) return NOTIFY_DONE; fib_work = kzalloc(sizeof(*fib_work), GFP_ATOMIC); -- cgit v1.2.3 From 7e7c1afb67074596f6ff96e5276d6084d1648ec1 Mon Sep 17 00:00:00 2001 From: Simon Wunderlich Date: Thu, 28 Sep 2017 13:44:39 +0200 Subject: batman-adv: Start new development cycle Signed-off-by: Simon Wunderlich --- net/batman-adv/main.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/batman-adv/main.h b/net/batman-adv/main.h index 05cc7637c064..edb2f239d04d 100644 --- a/net/batman-adv/main.h +++ b/net/batman-adv/main.h @@ -24,7 +24,7 @@ #define BATADV_DRIVER_DEVICE "batman-adv" #ifndef BATADV_SOURCE_VERSION -#define BATADV_SOURCE_VERSION "2017.3" +#define BATADV_SOURCE_VERSION "2017.4" #endif /* B.A.T.M.A.N. parameters */ -- cgit v1.2.3 From 825ffe1f7b875127bc03faffec0ecfb05906650a Mon Sep 17 00:00:00 2001 From: Sven Eckelmann Date: Wed, 23 Aug 2017 21:52:13 +0200 Subject: batman-adv: Remove unnecessary parentheses checkpatch introduced with commit 63b7c73ec86b ("checkpatch: add --strict check for ifs with unnecessary parentheses") an additional test which identifies some unnecessary parentheses. Remove these unnecessary parentheses to avoid the warnings and to unify the coding style slightly more. Signed-off-by: Sven Eckelmann Signed-off-by: Simon Wunderlich --- net/batman-adv/bat_iv_ogm.c | 24 ++++++++++++------------ net/batman-adv/bat_v.c | 2 +- net/batman-adv/bat_v_elp.c | 6 +++--- net/batman-adv/bat_v_ogm.c | 12 ++++++------ net/batman-adv/distributed-arp-table.c | 4 ++-- net/batman-adv/gateway_client.c | 8 ++++---- net/batman-adv/gateway_common.c | 18 +++++++++--------- net/batman-adv/hard-interface.c | 12 ++++++------ net/batman-adv/icmp_socket.c | 4 ++-- net/batman-adv/main.c | 4 ++-- net/batman-adv/multicast.c | 2 +- net/batman-adv/originator.c | 26 +++++++++++++------------- net/batman-adv/routing.c | 6 +++--- net/batman-adv/send.c | 6 +++--- net/batman-adv/soft-interface.c | 2 +- net/batman-adv/sysfs.c | 4 ++-- net/batman-adv/tp_meter.c | 2 +- 17 files changed, 71 insertions(+), 71 deletions(-) diff --git a/net/batman-adv/bat_iv_ogm.c b/net/batman-adv/bat_iv_ogm.c index 83ba5483455a..1b659ab652fb 100644 --- a/net/batman-adv/bat_iv_ogm.c +++ b/net/batman-adv/bat_iv_ogm.c @@ -916,8 +916,8 @@ static void batadv_iv_ogm_schedule(struct batadv_hard_iface *hard_iface) u16 tvlv_len = 0; unsigned long send_time; - if ((hard_iface->if_status == BATADV_IF_NOT_IN_USE) || - (hard_iface->if_status == BATADV_IF_TO_BE_REMOVED)) + if (hard_iface->if_status == BATADV_IF_NOT_IN_USE || + hard_iface->if_status == BATADV_IF_TO_BE_REMOVED) return; /* the interface gets activated here to avoid race conditions between @@ -1264,7 +1264,7 @@ static bool batadv_iv_ogm_calc_tq(struct batadv_orig_node *orig_node, * drops as they can't send and receive at the same time. */ tq_iface_penalty = BATADV_TQ_MAX_VALUE; - if (if_outgoing && (if_incoming == if_outgoing) && + if (if_outgoing && if_incoming == if_outgoing && batadv_is_wifi_hardif(if_outgoing)) tq_iface_penalty = batadv_hop_penalty(BATADV_TQ_MAX_VALUE, bat_priv); @@ -1369,7 +1369,7 @@ batadv_iv_ogm_update_seqnos(const struct ethhdr *ethhdr, ret = BATADV_NEIGH_DUP; } else { set_mark = 0; - if (is_dup && (ret != BATADV_NEIGH_DUP)) + if (is_dup && ret != BATADV_NEIGH_DUP) ret = BATADV_ORIG_DUP; } @@ -1515,7 +1515,7 @@ batadv_iv_ogm_process_per_outif(const struct sk_buff *skb, int ogm_offset, /* drop packet if sender is not a direct neighbor and if we * don't route towards it */ - if (!is_single_hop_neigh && (!orig_neigh_router)) { + if (!is_single_hop_neigh && !orig_neigh_router) { batadv_dbg(BATADV_DBG_BATMAN, bat_priv, "Drop packet: OGM via unknown neighbor!\n"); goto out_neigh; @@ -1535,7 +1535,7 @@ batadv_iv_ogm_process_per_outif(const struct sk_buff *skb, int ogm_offset, sameseq = orig_ifinfo->last_real_seqno == ntohl(ogm_packet->seqno); similar_ttl = (orig_ifinfo->last_ttl - 3) <= ogm_packet->ttl; - if (is_bidirect && ((dup_status == BATADV_NO_DUP) || + if (is_bidirect && (dup_status == BATADV_NO_DUP || (sameseq && similar_ttl))) { batadv_iv_ogm_orig_update(bat_priv, orig_node, orig_ifinfo, ethhdr, @@ -1553,8 +1553,8 @@ batadv_iv_ogm_process_per_outif(const struct sk_buff *skb, int ogm_offset, /* OGMs from secondary interfaces should only scheduled once * per interface where it has been received, not multiple times */ - if ((ogm_packet->ttl <= 2) && - (if_incoming != if_outgoing)) { + if (ogm_packet->ttl <= 2 && + if_incoming != if_outgoing) { batadv_dbg(BATADV_DBG_BATMAN, bat_priv, "Drop packet: OGM from secondary interface and wrong outgoing interface\n"); goto out_neigh; @@ -1590,7 +1590,7 @@ batadv_iv_ogm_process_per_outif(const struct sk_buff *skb, int ogm_offset, if_incoming, if_outgoing); out_neigh: - if ((orig_neigh_node) && (!is_single_hop_neigh)) + if (orig_neigh_node && !is_single_hop_neigh) batadv_orig_node_put(orig_neigh_node); out: if (router_ifinfo) @@ -2523,9 +2523,9 @@ batadv_iv_gw_get_best_gw_node(struct batadv_priv *bat_priv) tmp_gw_factor *= 100 * 100; tmp_gw_factor >>= 18; - if ((tmp_gw_factor > max_gw_factor) || - ((tmp_gw_factor == max_gw_factor) && - (tq_avg > max_tq))) { + if (tmp_gw_factor > max_gw_factor || + (tmp_gw_factor == max_gw_factor && + tq_avg > max_tq)) { if (curr_gw) batadv_gw_node_put(curr_gw); curr_gw = gw_node; diff --git a/net/batman-adv/bat_v.c b/net/batman-adv/bat_v.c index 4e2724c5b33d..93ef1c06227e 100644 --- a/net/batman-adv/bat_v.c +++ b/net/batman-adv/bat_v.c @@ -767,7 +767,7 @@ batadv_v_gw_get_best_gw_node(struct batadv_priv *bat_priv) if (batadv_v_gw_throughput_get(gw_node, &bw) < 0) goto next; - if (curr_gw && (bw <= max_bw)) + if (curr_gw && bw <= max_bw) goto next; if (curr_gw) diff --git a/net/batman-adv/bat_v_elp.c b/net/batman-adv/bat_v_elp.c index bd1064d98e16..1de992c58b35 100644 --- a/net/batman-adv/bat_v_elp.c +++ b/net/batman-adv/bat_v_elp.c @@ -134,7 +134,7 @@ static u32 batadv_v_elp_get_throughput(struct batadv_hardif_neigh_node *neigh) hard_iface->bat_v.flags &= ~BATADV_FULL_DUPLEX; throughput = link_settings.base.speed; - if (throughput && (throughput != SPEED_UNKNOWN)) + if (throughput && throughput != SPEED_UNKNOWN) return throughput * 10; } @@ -263,8 +263,8 @@ static void batadv_v_elp_periodic_work(struct work_struct *work) goto out; /* we are in the process of shutting this interface down */ - if ((hard_iface->if_status == BATADV_IF_NOT_IN_USE) || - (hard_iface->if_status == BATADV_IF_TO_BE_REMOVED)) + if (hard_iface->if_status == BATADV_IF_NOT_IN_USE || + hard_iface->if_status == BATADV_IF_TO_BE_REMOVED) goto out; /* the interface was enabled but may not be ready yet */ diff --git a/net/batman-adv/bat_v_ogm.c b/net/batman-adv/bat_v_ogm.c index 8be61734fc43..c251445a42a0 100644 --- a/net/batman-adv/bat_v_ogm.c +++ b/net/batman-adv/bat_v_ogm.c @@ -304,8 +304,8 @@ static u32 batadv_v_forward_penalty(struct batadv_priv *bat_priv, * due to the store & forward characteristics of WIFI. * Very low throughput values are the exception. */ - if ((throughput > 10) && - (if_incoming == if_outgoing) && + if (throughput > 10 && + if_incoming == if_outgoing && !(if_incoming->bat_v.flags & BATADV_FULL_DUPLEX)) return throughput / 2; @@ -455,7 +455,7 @@ static int batadv_v_ogm_metric_update(struct batadv_priv *bat_priv, /* drop packets with old seqnos, however accept the first packet after * a host has been rebooted. */ - if ((seq_diff < 0) && !protection_started) + if (seq_diff < 0 && !protection_started) goto out; neigh_node->last_seen = jiffies; @@ -568,8 +568,8 @@ static bool batadv_v_ogm_route_update(struct batadv_priv *bat_priv, router_throughput = router_ifinfo->bat_v.throughput; neigh_throughput = neigh_ifinfo->bat_v.throughput; - if ((neigh_seq_diff < BATADV_OGM_MAX_ORIGDIFF) && - (router_throughput >= neigh_throughput)) + if (neigh_seq_diff < BATADV_OGM_MAX_ORIGDIFF && + router_throughput >= neigh_throughput) goto out; } @@ -621,7 +621,7 @@ batadv_v_ogm_process_per_outif(struct batadv_priv *bat_priv, return; /* only unknown & newer OGMs contain TVLVs we are interested in */ - if ((seqno_age > 0) && (if_outgoing == BATADV_IF_DEFAULT)) + if (seqno_age > 0 && if_outgoing == BATADV_IF_DEFAULT) batadv_tvlv_containers_process(bat_priv, true, orig_node, NULL, NULL, (unsigned char *)(ogm2 + 1), diff --git a/net/batman-adv/distributed-arp-table.c b/net/batman-adv/distributed-arp-table.c index b6cfa78e9381..760c0de72582 100644 --- a/net/batman-adv/distributed-arp-table.c +++ b/net/batman-adv/distributed-arp-table.c @@ -492,8 +492,8 @@ static bool batadv_is_orig_node_eligible(struct batadv_dat_candidate *res, /* this is an hash collision with the temporary selected node. Choose * the one with the lowest address */ - if ((tmp_max == max) && max_orig_node && - (batadv_compare_eth(candidate->orig, max_orig_node->orig) > 0)) + if (tmp_max == max && max_orig_node && + batadv_compare_eth(candidate->orig, max_orig_node->orig) > 0) goto out; ret = true; diff --git a/net/batman-adv/gateway_client.c b/net/batman-adv/gateway_client.c index de9955d5224d..10d521f0b17f 100644 --- a/net/batman-adv/gateway_client.c +++ b/net/batman-adv/gateway_client.c @@ -248,12 +248,12 @@ void batadv_gw_election(struct batadv_priv *bat_priv) } } - if ((curr_gw) && (!next_gw)) { + if (curr_gw && !next_gw) { batadv_dbg(BATADV_DBG_BATMAN, bat_priv, "Removing selected gateway - no gateway in range\n"); batadv_throw_uevent(bat_priv, BATADV_UEV_GW, BATADV_UEV_DEL, NULL); - } else if ((!curr_gw) && (next_gw)) { + } else if (!curr_gw && next_gw) { batadv_dbg(BATADV_DBG_BATMAN, bat_priv, "Adding route to gateway %pM (bandwidth: %u.%u/%u.%u MBit, tq: %i)\n", next_gw->orig_node->orig, @@ -411,8 +411,8 @@ void batadv_gw_node_update(struct batadv_priv *bat_priv, goto out; } - if ((gw_node->bandwidth_down == ntohl(gateway->bandwidth_down)) && - (gw_node->bandwidth_up == ntohl(gateway->bandwidth_up))) + if (gw_node->bandwidth_down == ntohl(gateway->bandwidth_down) && + gw_node->bandwidth_up == ntohl(gateway->bandwidth_up)) goto out; batadv_dbg(BATADV_DBG_BATMAN, bat_priv, diff --git a/net/batman-adv/gateway_common.c b/net/batman-adv/gateway_common.c index 33940c5c74a8..2c26039c23fc 100644 --- a/net/batman-adv/gateway_common.c +++ b/net/batman-adv/gateway_common.c @@ -56,8 +56,8 @@ bool batadv_parse_throughput(struct net_device *net_dev, char *buff, if (strncasecmp(tmp_ptr, "mbit", 4) == 0) bw_unit_type = BATADV_BW_UNIT_MBIT; - if ((strncasecmp(tmp_ptr, "kbit", 4) == 0) || - (bw_unit_type == BATADV_BW_UNIT_MBIT)) + if (strncasecmp(tmp_ptr, "kbit", 4) == 0 || + bw_unit_type == BATADV_BW_UNIT_MBIT) *tmp_ptr = '\0'; } @@ -190,7 +190,7 @@ ssize_t batadv_gw_bandwidth_set(struct net_device *net_dev, char *buff, if (!up_new) up_new = 1; - if ((down_curr == down_new) && (up_curr == up_new)) + if (down_curr == down_new && up_curr == up_new) return count; batadv_gw_reselect(bat_priv); @@ -224,16 +224,16 @@ static void batadv_gw_tvlv_ogm_handler_v1(struct batadv_priv *bat_priv, /* only fetch the tvlv value if the handler wasn't called via the * CIFNOTFND flag and if there is data to fetch */ - if ((flags & BATADV_TVLV_HANDLER_OGM_CIFNOTFND) || - (tvlv_value_len < sizeof(gateway))) { + if (flags & BATADV_TVLV_HANDLER_OGM_CIFNOTFND || + tvlv_value_len < sizeof(gateway)) { gateway.bandwidth_down = 0; gateway.bandwidth_up = 0; } else { gateway_ptr = tvlv_value; gateway.bandwidth_down = gateway_ptr->bandwidth_down; gateway.bandwidth_up = gateway_ptr->bandwidth_up; - if ((gateway.bandwidth_down == 0) || - (gateway.bandwidth_up == 0)) { + if (gateway.bandwidth_down == 0 || + gateway.bandwidth_up == 0) { gateway.bandwidth_down = 0; gateway.bandwidth_up = 0; } @@ -242,8 +242,8 @@ static void batadv_gw_tvlv_ogm_handler_v1(struct batadv_priv *bat_priv, batadv_gw_node_update(bat_priv, orig, &gateway); /* restart gateway selection */ - if ((gateway.bandwidth_down != 0) && - (atomic_read(&bat_priv->gw.mode) == BATADV_GW_MODE_CLIENT)) + if (gateway.bandwidth_down != 0 && + atomic_read(&bat_priv->gw.mode) == BATADV_GW_MODE_CLIENT) batadv_gw_check_election(bat_priv, orig); } diff --git a/net/batman-adv/hard-interface.c b/net/batman-adv/hard-interface.c index e348f76ea8c1..d4aa99c060f9 100644 --- a/net/batman-adv/hard-interface.c +++ b/net/batman-adv/hard-interface.c @@ -504,8 +504,8 @@ static void batadv_check_known_mac_addr(const struct net_device *net_dev) rcu_read_lock(); list_for_each_entry_rcu(hard_iface, &batadv_hardif_list, list) { - if ((hard_iface->if_status != BATADV_IF_ACTIVE) && - (hard_iface->if_status != BATADV_IF_TO_BE_ACTIVATED)) + if (hard_iface->if_status != BATADV_IF_ACTIVE && + hard_iface->if_status != BATADV_IF_TO_BE_ACTIVATED) continue; if (hard_iface->net_dev == net_dev) @@ -568,8 +568,8 @@ int batadv_hardif_min_mtu(struct net_device *soft_iface) rcu_read_lock(); list_for_each_entry_rcu(hard_iface, &batadv_hardif_list, list) { - if ((hard_iface->if_status != BATADV_IF_ACTIVE) && - (hard_iface->if_status != BATADV_IF_TO_BE_ACTIVATED)) + if (hard_iface->if_status != BATADV_IF_ACTIVE && + hard_iface->if_status != BATADV_IF_TO_BE_ACTIVATED) continue; if (hard_iface->soft_iface != soft_iface) @@ -654,8 +654,8 @@ out: static void batadv_hardif_deactivate_interface(struct batadv_hard_iface *hard_iface) { - if ((hard_iface->if_status != BATADV_IF_ACTIVE) && - (hard_iface->if_status != BATADV_IF_TO_BE_ACTIVATED)) + if (hard_iface->if_status != BATADV_IF_ACTIVE && + hard_iface->if_status != BATADV_IF_TO_BE_ACTIVATED) return; hard_iface->if_status = BATADV_IF_INACTIVE; diff --git a/net/batman-adv/icmp_socket.c b/net/batman-adv/icmp_socket.c index 8ead292886d1..bded31121d12 100644 --- a/net/batman-adv/icmp_socket.c +++ b/net/batman-adv/icmp_socket.c @@ -132,10 +132,10 @@ static ssize_t batadv_socket_read(struct file *file, char __user *buf, size_t packet_len; int error; - if ((file->f_flags & O_NONBLOCK) && (socket_client->queue_len == 0)) + if ((file->f_flags & O_NONBLOCK) && socket_client->queue_len == 0) return -EAGAIN; - if ((!buf) || (count < sizeof(struct batadv_icmp_packet))) + if (!buf || count < sizeof(struct batadv_icmp_packet)) return -EINVAL; if (!access_ok(VERIFY_WRITE, buf, count)) diff --git a/net/batman-adv/main.c b/net/batman-adv/main.c index fb381fb26a66..033819aefc39 100644 --- a/net/batman-adv/main.c +++ b/net/batman-adv/main.c @@ -544,8 +544,8 @@ batadv_recv_handler_register(u8 packet_type, struct batadv_hard_iface *); curr = batadv_rx_handler[packet_type]; - if ((curr != batadv_recv_unhandled_packet) && - (curr != batadv_recv_unhandled_unicast_packet)) + if (curr != batadv_recv_unhandled_packet && + curr != batadv_recv_unhandled_unicast_packet) return -EBUSY; batadv_rx_handler[packet_type] = recv_handler; diff --git a/net/batman-adv/multicast.c b/net/batman-adv/multicast.c index d327670641ac..e553a8770a89 100644 --- a/net/batman-adv/multicast.c +++ b/net/batman-adv/multicast.c @@ -1126,7 +1126,7 @@ static void batadv_mcast_tvlv_ogm_handler(struct batadv_priv *bat_priv, bool orig_initialized; if (orig_mcast_enabled && tvlv_value && - (tvlv_value_len >= sizeof(mcast_flags))) + tvlv_value_len >= sizeof(mcast_flags)) mcast_flags = *(u8 *)tvlv_value; spin_lock_bh(&orig->mcast_handler_lock); diff --git a/net/batman-adv/originator.c b/net/batman-adv/originator.c index 8e2a4b205257..2967b86c13da 100644 --- a/net/batman-adv/originator.c +++ b/net/batman-adv/originator.c @@ -1062,9 +1062,9 @@ batadv_purge_neigh_ifinfo(struct batadv_priv *bat_priv, continue; /* don't purge if the interface is not (going) down */ - if ((if_outgoing->if_status != BATADV_IF_INACTIVE) && - (if_outgoing->if_status != BATADV_IF_NOT_IN_USE) && - (if_outgoing->if_status != BATADV_IF_TO_BE_REMOVED)) + if (if_outgoing->if_status != BATADV_IF_INACTIVE && + if_outgoing->if_status != BATADV_IF_NOT_IN_USE && + if_outgoing->if_status != BATADV_IF_TO_BE_REMOVED) continue; batadv_dbg(BATADV_DBG_BATMAN, bat_priv, @@ -1106,9 +1106,9 @@ batadv_purge_orig_ifinfo(struct batadv_priv *bat_priv, continue; /* don't purge if the interface is not (going) down */ - if ((if_outgoing->if_status != BATADV_IF_INACTIVE) && - (if_outgoing->if_status != BATADV_IF_NOT_IN_USE) && - (if_outgoing->if_status != BATADV_IF_TO_BE_REMOVED)) + if (if_outgoing->if_status != BATADV_IF_INACTIVE && + if_outgoing->if_status != BATADV_IF_NOT_IN_USE && + if_outgoing->if_status != BATADV_IF_TO_BE_REMOVED) continue; batadv_dbg(BATADV_DBG_BATMAN, bat_priv, @@ -1155,13 +1155,13 @@ batadv_purge_orig_neighbors(struct batadv_priv *bat_priv, last_seen = neigh_node->last_seen; if_incoming = neigh_node->if_incoming; - if ((batadv_has_timed_out(last_seen, BATADV_PURGE_TIMEOUT)) || - (if_incoming->if_status == BATADV_IF_INACTIVE) || - (if_incoming->if_status == BATADV_IF_NOT_IN_USE) || - (if_incoming->if_status == BATADV_IF_TO_BE_REMOVED)) { - if ((if_incoming->if_status == BATADV_IF_INACTIVE) || - (if_incoming->if_status == BATADV_IF_NOT_IN_USE) || - (if_incoming->if_status == BATADV_IF_TO_BE_REMOVED)) + if (batadv_has_timed_out(last_seen, BATADV_PURGE_TIMEOUT) || + if_incoming->if_status == BATADV_IF_INACTIVE || + if_incoming->if_status == BATADV_IF_NOT_IN_USE || + if_incoming->if_status == BATADV_IF_TO_BE_REMOVED) { + if (if_incoming->if_status == BATADV_IF_INACTIVE || + if_incoming->if_status == BATADV_IF_NOT_IN_USE || + if_incoming->if_status == BATADV_IF_TO_BE_REMOVED) batadv_dbg(BATADV_DBG_BATMAN, bat_priv, "neighbor purge: originator %pM, neighbor: %pM, iface: %s\n", orig_node->orig, neigh_node->addr, diff --git a/net/batman-adv/routing.c b/net/batman-adv/routing.c index f10e3ff26f9d..40d9bf3e5bfe 100644 --- a/net/batman-adv/routing.c +++ b/net/batman-adv/routing.c @@ -93,14 +93,14 @@ static void _batadv_update_route(struct batadv_priv *bat_priv, batadv_orig_ifinfo_put(orig_ifinfo); /* route deleted */ - if ((curr_router) && (!neigh_node)) { + if (curr_router && !neigh_node) { batadv_dbg(BATADV_DBG_ROUTES, bat_priv, "Deleting route towards: %pM\n", orig_node->orig); batadv_tt_global_del_orig(bat_priv, orig_node, -1, "Deleted route towards originator"); /* route added */ - } else if ((!curr_router) && (neigh_node)) { + } else if (!curr_router && neigh_node) { batadv_dbg(BATADV_DBG_ROUTES, bat_priv, "Adding route towards: %pM (via %pM)\n", orig_node->orig, neigh_node->addr); @@ -381,7 +381,7 @@ int batadv_recv_icmp_packet(struct sk_buff *skb, /* add record route information if not full */ if ((icmph->msg_type == BATADV_ECHO_REPLY || icmph->msg_type == BATADV_ECHO_REQUEST) && - (skb->len >= sizeof(struct batadv_icmp_packet_rr))) { + skb->len >= sizeof(struct batadv_icmp_packet_rr)) { if (skb_linearize(skb) < 0) goto free_skb; diff --git a/net/batman-adv/send.c b/net/batman-adv/send.c index 054a65e6eb68..7895323fd2a7 100644 --- a/net/batman-adv/send.c +++ b/net/batman-adv/send.c @@ -142,7 +142,7 @@ int batadv_send_unicast_skb(struct sk_buff *skb, #ifdef CONFIG_BATMAN_ADV_BATMAN_V hardif_neigh = batadv_hardif_neigh_get(neigh->if_incoming, neigh->addr); - if ((hardif_neigh) && (ret != NET_XMIT_DROP)) + if (hardif_neigh && ret != NET_XMIT_DROP) hardif_neigh->bat_v.last_unicast_tx = jiffies; if (hardif_neigh) @@ -615,8 +615,8 @@ batadv_forw_packet_list_steal(struct hlist_head *forw_list, * we delete only packets belonging to the given interface */ if (hard_iface && - (forw_packet->if_incoming != hard_iface) && - (forw_packet->if_outgoing != hard_iface)) + forw_packet->if_incoming != hard_iface && + forw_packet->if_outgoing != hard_iface) continue; hlist_del(&forw_packet->list); diff --git a/net/batman-adv/soft-interface.c b/net/batman-adv/soft-interface.c index c2c986746d0b..7c8288245f80 100644 --- a/net/batman-adv/soft-interface.c +++ b/net/batman-adv/soft-interface.c @@ -160,7 +160,7 @@ static int batadv_interface_set_mac_addr(struct net_device *dev, void *p) static int batadv_interface_change_mtu(struct net_device *dev, int new_mtu) { /* check ranges */ - if ((new_mtu < 68) || (new_mtu > batadv_hardif_min_mtu(dev))) + if (new_mtu < 68 || new_mtu > batadv_hardif_min_mtu(dev)) return -EINVAL; dev->mtu = new_mtu; diff --git a/net/batman-adv/sysfs.c b/net/batman-adv/sysfs.c index 0ae8b30e4eaa..aa187fd42475 100644 --- a/net/batman-adv/sysfs.c +++ b/net/batman-adv/sysfs.c @@ -925,8 +925,8 @@ static int batadv_store_mesh_iface_finish(struct net_device *net_dev, if (hard_iface->if_status == status_tmp) goto out; - if ((hard_iface->soft_iface) && - (strncmp(hard_iface->soft_iface->name, ifname, IFNAMSIZ) == 0)) + if (hard_iface->soft_iface && + strncmp(hard_iface->soft_iface->name, ifname, IFNAMSIZ) == 0) goto out; if (status_tmp == BATADV_IF_NOT_IN_USE) { diff --git a/net/batman-adv/tp_meter.c b/net/batman-adv/tp_meter.c index bfe8effe9238..4b90033f35a8 100644 --- a/net/batman-adv/tp_meter.c +++ b/net/batman-adv/tp_meter.c @@ -1206,7 +1206,7 @@ static int batadv_tp_send_ack(struct batadv_priv *bat_priv, const u8 *dst, /* send the ack */ r = batadv_send_skb_to_orig(skb, orig_node, NULL); - if (unlikely(r < 0) || (r == NET_XMIT_DROP)) { + if (unlikely(r < 0) || r == NET_XMIT_DROP) { ret = BATADV_TP_REASON_DST_UNREACHABLE; goto out; } -- cgit v1.2.3 From 48915aed60c9615e5eaa0f1683640635dade788e Mon Sep 17 00:00:00 2001 From: Sven Eckelmann Date: Thu, 28 Sep 2017 17:16:51 +0200 Subject: batman-adv: Fix "line over 80 characters" checkpatch warning Fixes: 242c1a28eb61 ("net: Remove useless function skb_header_release") Signed-off-by: Sven Eckelmann Signed-off-by: Simon Wunderlich --- net/batman-adv/soft-interface.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/batman-adv/soft-interface.c b/net/batman-adv/soft-interface.c index 7c8288245f80..3af4b0b29b23 100644 --- a/net/batman-adv/soft-interface.c +++ b/net/batman-adv/soft-interface.c @@ -69,8 +69,8 @@ int batadv_skb_head_push(struct sk_buff *skb, unsigned int len) int result; /* TODO: We must check if we can release all references to non-payload - * data using __skb_header_release in our skbs to allow skb_cow_header to - * work optimally. This means that those skbs are not allowed to read + * data using __skb_header_release in our skbs to allow skb_cow_header + * to work optimally. This means that those skbs are not allowed to read * or write any data which is before the current position of skb->data * after that call and thus allow other skbs with the same data buffer * to write freely in that area. -- cgit v1.2.3 From 8f1975e31d8ed0c021a1993a4d9123dd39c740ea Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 25 Sep 2017 09:14:14 -0700 Subject: inetpeer: speed up inetpeer_invalidate_tree() As measured in my prior patch ("sch_netem: faster rb tree removal"), rbtree_postorder_for_each_entry_safe() is nice looking but much slower than using rb_next() directly, except when tree is small enough to fit in CPU caches (then the cost is the same) From: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/inetpeer.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/net/ipv4/inetpeer.c b/net/ipv4/inetpeer.c index e7eb590c86ce..6e5626cc366c 100644 --- a/net/ipv4/inetpeer.c +++ b/net/ipv4/inetpeer.c @@ -284,14 +284,17 @@ EXPORT_SYMBOL(inet_peer_xrlim_allow); void inetpeer_invalidate_tree(struct inet_peer_base *base) { - struct inet_peer *p, *n; + struct rb_node *p = rb_first(&base->rb_root); - rbtree_postorder_for_each_entry_safe(p, n, &base->rb_root, rb_node) { - inet_putpeer(p); + while (p) { + struct inet_peer *peer = rb_entry(p, struct inet_peer, rb_node); + + p = rb_next(p); + rb_erase(&peer->rb_node, &base->rb_root); + inet_putpeer(peer); cond_resched(); } - base->rb_root = RB_ROOT; base->total = 0; } EXPORT_SYMBOL(inetpeer_invalidate_tree); -- cgit v1.2.3 From 76cf546c2802f6e25113ba481d7e85d0298768c6 Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Mon, 25 Sep 2017 10:13:49 -0700 Subject: net_sched: use idr to allocate bpf filter handles Instead of calling cls_bpf_get() in a loop to find a unused handle, just switch to idr API to allocate new handles. Cc: Daniel Borkmann Cc: Chris Mi Cc: Jamal Hadi Salim Signed-off-by: Cong Wang Signed-off-by: David S. Miller --- net/sched/cls_bpf.c | 57 ++++++++++++++++++++++++++--------------------------- 1 file changed, 28 insertions(+), 29 deletions(-) diff --git a/net/sched/cls_bpf.c b/net/sched/cls_bpf.c index 36671b0fb125..6c6b21f6ba62 100644 --- a/net/sched/cls_bpf.c +++ b/net/sched/cls_bpf.c @@ -17,6 +17,7 @@ #include #include #include +#include #include #include @@ -32,7 +33,7 @@ MODULE_DESCRIPTION("TC BPF based classifier"); struct cls_bpf_head { struct list_head plist; - u32 hgen; + struct idr handle_idr; struct rcu_head rcu; }; @@ -238,6 +239,7 @@ static int cls_bpf_init(struct tcf_proto *tp) return -ENOBUFS; INIT_LIST_HEAD_RCU(&head->plist); + idr_init(&head->handle_idr); rcu_assign_pointer(tp->root, head); return 0; @@ -264,6 +266,9 @@ static void cls_bpf_delete_prog_rcu(struct rcu_head *rcu) static void __cls_bpf_delete(struct tcf_proto *tp, struct cls_bpf_prog *prog) { + struct cls_bpf_head *head = rtnl_dereference(tp->root); + + idr_remove_ext(&head->handle_idr, prog->handle); cls_bpf_stop_offload(tp, prog); list_del_rcu(&prog->link); tcf_unbind_filter(tp, &prog->res); @@ -287,6 +292,7 @@ static void cls_bpf_destroy(struct tcf_proto *tp) list_for_each_entry_safe(prog, tmp, &head->plist, link) __cls_bpf_delete(tp, prog); + idr_destroy(&head->handle_idr); kfree_rcu(head, rcu); } @@ -421,27 +427,6 @@ static int cls_bpf_set_parms(struct net *net, struct tcf_proto *tp, return 0; } -static u32 cls_bpf_grab_new_handle(struct tcf_proto *tp, - struct cls_bpf_head *head) -{ - unsigned int i = 0x80000000; - u32 handle; - - do { - if (++head->hgen == 0x7FFFFFFF) - head->hgen = 1; - } while (--i > 0 && cls_bpf_get(tp, head->hgen)); - - if (unlikely(i == 0)) { - pr_err("Insufficient number of handles\n"); - handle = 0; - } else { - handle = head->hgen; - } - - return handle; -} - static int cls_bpf_change(struct net *net, struct sk_buff *in_skb, struct tcf_proto *tp, unsigned long base, u32 handle, struct nlattr **tca, @@ -451,6 +436,7 @@ static int cls_bpf_change(struct net *net, struct sk_buff *in_skb, struct cls_bpf_prog *oldprog = *arg; struct nlattr *tb[TCA_BPF_MAX + 1]; struct cls_bpf_prog *prog; + unsigned long idr_index; int ret; if (tca[TCA_OPTIONS] == NULL) @@ -476,21 +462,30 @@ static int cls_bpf_change(struct net *net, struct sk_buff *in_skb, } } - if (handle == 0) - prog->handle = cls_bpf_grab_new_handle(tp, head); - else + if (handle == 0) { + ret = idr_alloc_ext(&head->handle_idr, prog, &idr_index, + 1, 0x7FFFFFFF, GFP_KERNEL); + if (ret) + goto errout; + prog->handle = idr_index; + } else { + if (!oldprog) { + ret = idr_alloc_ext(&head->handle_idr, prog, &idr_index, + handle, handle + 1, GFP_KERNEL); + if (ret) + goto errout; + } prog->handle = handle; - if (prog->handle == 0) { - ret = -EINVAL; - goto errout; } ret = cls_bpf_set_parms(net, tp, prog, base, tb, tca[TCA_RATE], ovr); if (ret < 0) - goto errout; + goto errout_idr; ret = cls_bpf_offload(tp, prog, oldprog); if (ret) { + if (!oldprog) + idr_remove_ext(&head->handle_idr, prog->handle); __cls_bpf_delete_prog(prog); return ret; } @@ -499,6 +494,7 @@ static int cls_bpf_change(struct net *net, struct sk_buff *in_skb, prog->gen_flags |= TCA_CLS_FLAGS_NOT_IN_HW; if (oldprog) { + idr_replace_ext(&head->handle_idr, prog, handle); list_replace_rcu(&oldprog->link, &prog->link); tcf_unbind_filter(tp, &oldprog->res); call_rcu(&oldprog->rcu, cls_bpf_delete_prog_rcu); @@ -509,6 +505,9 @@ static int cls_bpf_change(struct net *net, struct sk_buff *in_skb, *arg = prog; return 0; +errout_idr: + if (!oldprog) + idr_remove_ext(&head->handle_idr, prog->handle); errout: tcf_exts_destroy(&prog->exts); kfree(prog); -- cgit v1.2.3 From 1d8134fea2eb460698a281f91c03c400f7e546ce Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Mon, 25 Sep 2017 10:13:50 -0700 Subject: net_sched: use idr to allocate basic filter handles Instead of calling basic_get() in a loop to find a unused handle, just switch to idr API to allocate new handles. Cc: Chris Mi Cc: Jamal Hadi Salim Signed-off-by: Cong Wang Signed-off-by: David S. Miller --- net/sched/cls_basic.c | 37 ++++++++++++++++++++++--------------- 1 file changed, 22 insertions(+), 15 deletions(-) diff --git a/net/sched/cls_basic.c b/net/sched/cls_basic.c index d89ebafd2239..cfeb6f158566 100644 --- a/net/sched/cls_basic.c +++ b/net/sched/cls_basic.c @@ -17,13 +17,14 @@ #include #include #include +#include #include #include #include struct basic_head { - u32 hgenerator; struct list_head flist; + struct idr handle_idr; struct rcu_head rcu; }; @@ -78,6 +79,7 @@ static int basic_init(struct tcf_proto *tp) if (head == NULL) return -ENOBUFS; INIT_LIST_HEAD(&head->flist); + idr_init(&head->handle_idr); rcu_assign_pointer(tp->root, head); return 0; } @@ -99,8 +101,10 @@ static void basic_destroy(struct tcf_proto *tp) list_for_each_entry_safe(f, n, &head->flist, link) { list_del_rcu(&f->link); tcf_unbind_filter(tp, &f->res); + idr_remove_ext(&head->handle_idr, f->handle); call_rcu(&f->rcu, basic_delete_filter); } + idr_destroy(&head->handle_idr); kfree_rcu(head, rcu); } @@ -111,6 +115,7 @@ static int basic_delete(struct tcf_proto *tp, void *arg, bool *last) list_del_rcu(&f->link); tcf_unbind_filter(tp, &f->res); + idr_remove_ext(&head->handle_idr, f->handle); call_rcu(&f->rcu, basic_delete_filter); *last = list_empty(&head->flist); return 0; @@ -154,6 +159,7 @@ static int basic_change(struct net *net, struct sk_buff *in_skb, struct nlattr *tb[TCA_BASIC_MAX + 1]; struct basic_filter *fold = (struct basic_filter *) *arg; struct basic_filter *fnew; + unsigned long idr_index; if (tca[TCA_OPTIONS] == NULL) return -EINVAL; @@ -179,30 +185,31 @@ static int basic_change(struct net *net, struct sk_buff *in_skb, err = -EINVAL; if (handle) { fnew->handle = handle; - } else if (fold) { - fnew->handle = fold->handle; + if (!fold) { + err = idr_alloc_ext(&head->handle_idr, fnew, &idr_index, + handle, handle + 1, GFP_KERNEL); + if (err) + goto errout; + } } else { - unsigned int i = 0x80000000; - do { - if (++head->hgenerator == 0x7FFFFFFF) - head->hgenerator = 1; - } while (--i > 0 && basic_get(tp, head->hgenerator)); - - if (i <= 0) { - pr_err("Insufficient number of handles\n"); + err = idr_alloc_ext(&head->handle_idr, fnew, &idr_index, + 1, 0x7FFFFFFF, GFP_KERNEL); + if (err) goto errout; - } - - fnew->handle = head->hgenerator; + fnew->handle = idr_index; } err = basic_set_parms(net, tp, fnew, base, tb, tca[TCA_RATE], ovr); - if (err < 0) + if (err < 0) { + if (!fold) + idr_remove_ext(&head->handle_idr, fnew->handle); goto errout; + } *arg = fnew; if (fold) { + idr_replace_ext(&head->handle_idr, fnew, fnew->handle); list_replace_rcu(&fold->link, &fnew->link); tcf_unbind_filter(tp, &fold->res); call_rcu(&fold->rcu, basic_delete_filter); -- cgit v1.2.3 From e7614370d6f04711c4e4b48f7055e5008fa4ed42 Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Mon, 25 Sep 2017 10:13:51 -0700 Subject: net_sched: use idr to allocate u32 filter handles Instead of calling u32_lookup_ht() in a loop to find a unused handle, just switch to idr API to allocate new handles. u32 filters are special as the handle could contain a hash table id and a key id, so we need two IDR to allocate each of them. Cc: Chris Mi Cc: Jamal Hadi Salim Signed-off-by: Cong Wang Signed-off-by: David S. Miller --- net/sched/cls_u32.c | 108 ++++++++++++++++++++++++++++++++-------------------- 1 file changed, 67 insertions(+), 41 deletions(-) diff --git a/net/sched/cls_u32.c b/net/sched/cls_u32.c index 10b8d851fc6b..094d224411a9 100644 --- a/net/sched/cls_u32.c +++ b/net/sched/cls_u32.c @@ -46,6 +46,7 @@ #include #include #include +#include struct tc_u_knode { struct tc_u_knode __rcu *next; @@ -82,6 +83,7 @@ struct tc_u_hnode { struct tc_u_common *tp_c; int refcnt; unsigned int divisor; + struct idr handle_idr; struct rcu_head rcu; /* The 'ht' field MUST be the last field in structure to allow for * more entries allocated at end of structure. @@ -93,7 +95,7 @@ struct tc_u_common { struct tc_u_hnode __rcu *hlist; struct Qdisc *q; int refcnt; - u32 hgenerator; + struct idr handle_idr; struct hlist_node hnode; struct rcu_head rcu; }; @@ -311,19 +313,19 @@ static void *u32_get(struct tcf_proto *tp, u32 handle) return u32_lookup_key(ht, handle); } -static u32 gen_new_htid(struct tc_u_common *tp_c) +static u32 gen_new_htid(struct tc_u_common *tp_c, struct tc_u_hnode *ptr) { - int i = 0x800; + unsigned long idr_index; + int err; - /* hgenerator only used inside rtnl lock it is safe to increment + /* This is only used inside rtnl lock it is safe to increment * without read _copy_ update semantics */ - do { - if (++tp_c->hgenerator == 0x7FF) - tp_c->hgenerator = 1; - } while (--i > 0 && u32_lookup_ht(tp_c, (tp_c->hgenerator|0x800)<<20)); - - return i > 0 ? (tp_c->hgenerator|0x800)<<20 : 0; + err = idr_alloc_ext(&tp_c->handle_idr, ptr, &idr_index, + 1, 0x7FF, GFP_KERNEL); + if (err) + return 0; + return (u32)(idr_index | 0x800) << 20; } static struct hlist_head *tc_u_common_hash; @@ -366,8 +368,9 @@ static int u32_init(struct tcf_proto *tp) return -ENOBUFS; root_ht->refcnt++; - root_ht->handle = tp_c ? gen_new_htid(tp_c) : 0x80000000; + root_ht->handle = tp_c ? gen_new_htid(tp_c, root_ht) : 0x80000000; root_ht->prio = tp->prio; + idr_init(&root_ht->handle_idr); if (tp_c == NULL) { tp_c = kzalloc(sizeof(*tp_c), GFP_KERNEL); @@ -377,6 +380,7 @@ static int u32_init(struct tcf_proto *tp) } tp_c->q = tp->q; INIT_HLIST_NODE(&tp_c->hnode); + idr_init(&tp_c->handle_idr); h = tc_u_hash(tp); hlist_add_head(&tp_c->hnode, &tc_u_common_hash[h]); @@ -565,6 +569,7 @@ static void u32_clear_hnode(struct tcf_proto *tp, struct tc_u_hnode *ht) rtnl_dereference(n->next)); tcf_unbind_filter(tp, &n->res); u32_remove_hw_knode(tp, n->handle); + idr_remove_ext(&ht->handle_idr, n->handle); call_rcu(&n->rcu, u32_delete_key_freepf_rcu); } } @@ -586,6 +591,8 @@ static int u32_destroy_hnode(struct tcf_proto *tp, struct tc_u_hnode *ht) hn = &phn->next, phn = rtnl_dereference(*hn)) { if (phn == ht) { u32_clear_hw_hnode(tp, ht); + idr_destroy(&ht->handle_idr); + idr_remove_ext(&tp_c->handle_idr, ht->handle); RCU_INIT_POINTER(*hn, ht->next); kfree_rcu(ht, rcu); return 0; @@ -633,6 +640,7 @@ static void u32_destroy(struct tcf_proto *tp) kfree_rcu(ht, rcu); } + idr_destroy(&tp_c->handle_idr); kfree(tp_c); } @@ -701,27 +709,21 @@ ret: return ret; } -#define NR_U32_NODE (1<<12) -static u32 gen_new_kid(struct tc_u_hnode *ht, u32 handle) +static u32 gen_new_kid(struct tc_u_hnode *ht, u32 htid) { - struct tc_u_knode *n; - unsigned long i; - unsigned long *bitmap = kzalloc(BITS_TO_LONGS(NR_U32_NODE) * sizeof(unsigned long), - GFP_KERNEL); - if (!bitmap) - return handle | 0xFFF; - - for (n = rtnl_dereference(ht->ht[TC_U32_HASH(handle)]); - n; - n = rtnl_dereference(n->next)) - set_bit(TC_U32_NODE(n->handle), bitmap); - - i = find_next_zero_bit(bitmap, NR_U32_NODE, 0x800); - if (i >= NR_U32_NODE) - i = find_next_zero_bit(bitmap, NR_U32_NODE, 1); + unsigned long idr_index; + u32 start = htid | 0x800; + u32 max = htid | 0xFFF; + u32 min = htid; + + if (idr_alloc_ext(&ht->handle_idr, NULL, &idr_index, + start, max + 1, GFP_KERNEL)) { + if (idr_alloc_ext(&ht->handle_idr, NULL, &idr_index, + min + 1, max + 1, GFP_KERNEL)) + return max; + } - kfree(bitmap); - return handle | (i >= NR_U32_NODE ? 0xFFF : i); + return (u32)idr_index; } static const struct nla_policy u32_policy[TCA_U32_MAX + 1] = { @@ -806,6 +808,7 @@ static void u32_replace_knode(struct tcf_proto *tp, struct tc_u_common *tp_c, if (pins->handle == n->handle) break; + idr_replace_ext(&ht->handle_idr, n, n->handle); RCU_INIT_POINTER(n->next, pins->next); rcu_assign_pointer(*ins, n); } @@ -937,22 +940,33 @@ static int u32_change(struct net *net, struct sk_buff *in_skb, return -EINVAL; if (TC_U32_KEY(handle)) return -EINVAL; - if (handle == 0) { - handle = gen_new_htid(tp->data); - if (handle == 0) - return -ENOMEM; - } ht = kzalloc(sizeof(*ht) + divisor*sizeof(void *), GFP_KERNEL); if (ht == NULL) return -ENOBUFS; + if (handle == 0) { + handle = gen_new_htid(tp->data, ht); + if (handle == 0) { + kfree(ht); + return -ENOMEM; + } + } else { + err = idr_alloc_ext(&tp_c->handle_idr, ht, NULL, + handle, handle + 1, GFP_KERNEL); + if (err) { + kfree(ht); + return err; + } + } ht->tp_c = tp_c; ht->refcnt = 1; ht->divisor = divisor; ht->handle = handle; ht->prio = tp->prio; + idr_init(&ht->handle_idr); err = u32_replace_hw_hnode(tp, ht, flags); if (err) { + idr_remove_ext(&tp_c->handle_idr, handle); kfree(ht); return err; } @@ -986,24 +1000,33 @@ static int u32_change(struct net *net, struct sk_buff *in_skb, if (TC_U32_HTID(handle) && TC_U32_HTID(handle^htid)) return -EINVAL; handle = htid | TC_U32_NODE(handle); + err = idr_alloc_ext(&ht->handle_idr, NULL, NULL, + handle, handle + 1, + GFP_KERNEL); + if (err) + return err; } else handle = gen_new_kid(ht, htid); - if (tb[TCA_U32_SEL] == NULL) - return -EINVAL; + if (tb[TCA_U32_SEL] == NULL) { + err = -EINVAL; + goto erridr; + } s = nla_data(tb[TCA_U32_SEL]); n = kzalloc(sizeof(*n) + s->nkeys*sizeof(struct tc_u32_key), GFP_KERNEL); - if (n == NULL) - return -ENOBUFS; + if (n == NULL) { + err = -ENOBUFS; + goto erridr; + } #ifdef CONFIG_CLS_U32_PERF size = sizeof(struct tc_u32_pcnt) + s->nkeys * sizeof(u64); n->pf = __alloc_percpu(size, __alignof__(struct tc_u32_pcnt)); if (!n->pf) { - kfree(n); - return -ENOBUFS; + err = -ENOBUFS; + goto errfree; } #endif @@ -1066,9 +1089,12 @@ errhw: errout: tcf_exts_destroy(&n->exts); #ifdef CONFIG_CLS_U32_PERF +errfree: free_percpu(n->pf); #endif kfree(n); +erridr: + idr_remove_ext(&ht->handle_idr, handle); return err; } -- cgit v1.2.3 From 61f26d92510ea1e30f9b69097b34b6a522daa1d2 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Tue, 26 Sep 2017 07:40:42 +0200 Subject: selftests: rtnetlink.sh: add rudimentary vrf test Acked-by: David Ahern Signed-off-by: Florian Westphal Signed-off-by: David S. Miller --- tools/testing/selftests/net/rtnetlink.sh | 42 ++++++++++++++++++++++++++++++++ 1 file changed, 42 insertions(+) diff --git a/tools/testing/selftests/net/rtnetlink.sh b/tools/testing/selftests/net/rtnetlink.sh index 4b48de565cae..a048f7a5f94c 100755 --- a/tools/testing/selftests/net/rtnetlink.sh +++ b/tools/testing/selftests/net/rtnetlink.sh @@ -291,6 +291,47 @@ kci_test_ifalias() echo "PASS: set ifalias $namewant for $devdummy" } +kci_test_vrf() +{ + vrfname="test-vrf" + ret=0 + + ip link show type vrf 2>/dev/null + if [ $? -ne 0 ]; then + echo "SKIP: vrf: iproute2 too old" + return 0 + fi + + ip link add "$vrfname" type vrf table 10 + check_err $? + if [ $ret -ne 0 ];then + echo "FAIL: can't add vrf interface, skipping test" + return 0 + fi + + ip -br link show type vrf | grep -q "$vrfname" + check_err $? + if [ $ret -ne 0 ];then + echo "FAIL: created vrf device not found" + return 1 + fi + + ip link set dev "$vrfname" up + check_err $? + + ip link set dev "$devdummy" master "$vrfname" + check_err $? + ip link del dev "$vrfname" + check_err $? + + if [ $ret -ne 0 ];then + echo "FAIL: vrf" + return 1 + fi + + echo "PASS: vrf" +} + kci_test_rtnl() { kci_add_dummy @@ -306,6 +347,7 @@ kci_test_rtnl() kci_test_bridge kci_test_addrlabel kci_test_ifalias + kci_test_vrf kci_del_dummy } -- cgit v1.2.3 From 79110a0426d8179a51bf3cb698a84f6ec98ca60c Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Tue, 26 Sep 2017 13:58:40 +0200 Subject: rtnetlink: add helper to put master and link ifindexes rtnl_fill_ifinfo currently requires caller to hold the rtnl mutex. Unfortunately the function is quite large which makes it harder to see which spots require the lock, which spots assume it and which ones could do without. Add helpers to factor out the ifindex dumping, one can use rcu to avoid rtnl dependency. Reviewed-by: David Ahern Signed-off-by: Florian Westphal Signed-off-by: David S. Miller --- net/core/rtnetlink.c | 32 +++++++++++++++++++++++++++----- 1 file changed, 27 insertions(+), 5 deletions(-) diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index a78fd61da0ec..c801212ee40e 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -1307,6 +1307,31 @@ static u32 rtnl_get_event(unsigned long event) return rtnl_event_type; } +static int put_master_ifindex(struct sk_buff *skb, struct net_device *dev) +{ + const struct net_device *upper_dev; + int ret = 0; + + rcu_read_lock(); + + upper_dev = netdev_master_upper_dev_get_rcu(dev); + if (upper_dev) + ret = nla_put_u32(skb, IFLA_MASTER, upper_dev->ifindex); + + rcu_read_unlock(); + return ret; +} + +static int nla_put_iflink(struct sk_buff *skb, const struct net_device *dev) +{ + int ifindex = dev_get_iflink(dev); + + if (dev->ifindex == ifindex) + return 0; + + return nla_put_u32(skb, IFLA_LINK, ifindex); +} + static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev, int type, u32 pid, u32 seq, u32 change, unsigned int flags, u32 ext_filter_mask, @@ -1316,7 +1341,6 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev, struct nlmsghdr *nlh; struct nlattr *af_spec; struct rtnl_af_ops *af_ops; - struct net_device *upper_dev = netdev_master_upper_dev_get(dev); ASSERT_RTNL(); nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ifm), flags); @@ -1345,10 +1369,8 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev, #ifdef CONFIG_RPS nla_put_u32(skb, IFLA_NUM_RX_QUEUES, dev->num_rx_queues) || #endif - (dev->ifindex != dev_get_iflink(dev) && - nla_put_u32(skb, IFLA_LINK, dev_get_iflink(dev))) || - (upper_dev && - nla_put_u32(skb, IFLA_MASTER, upper_dev->ifindex)) || + nla_put_iflink(skb, dev) || + put_master_ifindex(skb, dev) || nla_put_u8(skb, IFLA_CARRIER, netif_carrier_ok(dev)) || (dev->qdisc && nla_put_string(skb, IFLA_QDISC, dev->qdisc->ops->id)) || -- cgit v1.2.3 From 250fc3dfdbd3e8b5c751ce7b5a26176ec62ca0f8 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Tue, 26 Sep 2017 13:58:41 +0200 Subject: rtnetlink: add helpers to dump vf information similar to earlier patches, split out more parts of this function to better see what is happening and where we assume rtnl is locked. Reviewed-by: David Ahern Signed-off-by: Florian Westphal Signed-off-by: David S. Miller --- net/core/rtnetlink.c | 50 +++++++++++++++++++++++++++++++------------------- 1 file changed, 31 insertions(+), 19 deletions(-) diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index c801212ee40e..d504e78cd01f 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -1211,6 +1211,36 @@ nla_put_vfinfo_failure: return -EMSGSIZE; } +static noinline_for_stack int rtnl_fill_vf(struct sk_buff *skb, + struct net_device *dev, + u32 ext_filter_mask) +{ + struct nlattr *vfinfo; + int i, num_vfs; + + if (!dev->dev.parent || ((ext_filter_mask & RTEXT_FILTER_VF) == 0)) + return 0; + + num_vfs = dev_num_vf(dev->dev.parent); + if (nla_put_u32(skb, IFLA_NUM_VF, num_vfs)) + return -EMSGSIZE; + + if (!dev->netdev_ops->ndo_get_vf_config) + return 0; + + vfinfo = nla_nest_start(skb, IFLA_VFINFO_LIST); + if (!vfinfo) + return -EMSGSIZE; + + for (i = 0; i < num_vfs; i++) { + if (rtnl_fill_vfinfo(skb, dev, i, vfinfo)) + return -EMSGSIZE; + } + + nla_nest_end(skb, vfinfo); + return 0; +} + static int rtnl_fill_link_ifmap(struct sk_buff *skb, struct net_device *dev) { struct rtnl_link_ifmap map; @@ -1407,27 +1437,9 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev, if (rtnl_fill_stats(skb, dev)) goto nla_put_failure; - if (dev->dev.parent && (ext_filter_mask & RTEXT_FILTER_VF) && - nla_put_u32(skb, IFLA_NUM_VF, dev_num_vf(dev->dev.parent))) + if (rtnl_fill_vf(skb, dev, ext_filter_mask)) goto nla_put_failure; - if (dev->netdev_ops->ndo_get_vf_config && dev->dev.parent && - ext_filter_mask & RTEXT_FILTER_VF) { - int i; - struct nlattr *vfinfo; - int num_vfs = dev_num_vf(dev->dev.parent); - - vfinfo = nla_nest_start(skb, IFLA_VFINFO_LIST); - if (!vfinfo) - goto nla_put_failure; - for (i = 0; i < num_vfs; i++) { - if (rtnl_fill_vfinfo(skb, dev, i, vfinfo)) - goto nla_put_failure; - } - - nla_nest_end(skb, vfinfo); - } - if (rtnl_port_fill(skb, dev, ext_filter_mask)) goto nla_put_failure; -- cgit v1.2.3 From b1e66b9a67d67d0e73091b04b51e524581c8c887 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Tue, 26 Sep 2017 13:58:42 +0200 Subject: rtnetlink: add helpers to dump netnsid information Reviewed-by: David Ahern Signed-off-by: Florian Westphal Signed-off-by: David S. Miller --- net/core/rtnetlink.c | 30 +++++++++++++++++++----------- 1 file changed, 19 insertions(+), 11 deletions(-) diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index d504e78cd01f..d524609c587c 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -1362,6 +1362,23 @@ static int nla_put_iflink(struct sk_buff *skb, const struct net_device *dev) return nla_put_u32(skb, IFLA_LINK, ifindex); } +static int rtnl_fill_link_netnsid(struct sk_buff *skb, + const struct net_device *dev) +{ + if (dev->rtnl_link_ops && dev->rtnl_link_ops->get_link_net) { + struct net *link_net = dev->rtnl_link_ops->get_link_net(dev); + + if (!net_eq(dev_net(dev), link_net)) { + int id = peernet2id_alloc(dev_net(dev), link_net); + + if (nla_put_s32(skb, IFLA_LINK_NETNSID, id)) + return -EMSGSIZE; + } + } + + return 0; +} + static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev, int type, u32 pid, u32 seq, u32 change, unsigned int flags, u32 ext_filter_mask, @@ -1451,17 +1468,8 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev, goto nla_put_failure; } - if (dev->rtnl_link_ops && - dev->rtnl_link_ops->get_link_net) { - struct net *link_net = dev->rtnl_link_ops->get_link_net(dev); - - if (!net_eq(dev_net(dev), link_net)) { - int id = peernet2id_alloc(dev_net(dev), link_net); - - if (nla_put_s32(skb, IFLA_LINK_NETNSID, id)) - goto nla_put_failure; - } - } + if (rtnl_fill_link_netnsid(skb, dev)) + goto nla_put_failure; if (!(af_spec = nla_nest_start(skb, IFLA_AF_SPEC))) goto nla_put_failure; -- cgit v1.2.3 From 4c82a95e523721637d44e0e8d879fa11fa825eec Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Tue, 26 Sep 2017 13:58:43 +0200 Subject: rtnetlink: rtnl_have_link_slave_info doesn't need rtnl it can be switched to rcu. Reviewed-by: David Ahern Signed-off-by: Florian Westphal Signed-off-by: David S. Miller --- net/core/rtnetlink.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index d524609c587c..e6955da0d58d 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -522,11 +522,15 @@ static size_t rtnl_link_get_af_size(const struct net_device *dev, static bool rtnl_have_link_slave_info(const struct net_device *dev) { struct net_device *master_dev; + bool ret = false; - master_dev = netdev_master_upper_dev_get((struct net_device *) dev); + rcu_read_lock(); + + master_dev = netdev_master_upper_dev_get_rcu((struct net_device *)dev); if (master_dev && master_dev->rtnl_link_ops) - return true; - return false; + ret = true; + rcu_read_unlock(); + return ret; } static int rtnl_link_slave_info_fill(struct sk_buff *skb, -- cgit v1.2.3 From 4d8806fd14e1492cd4fb2021f709b163ea3364ad Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Tue, 26 Sep 2017 16:14:09 +0100 Subject: cxgb4: make function ch_flower_stats_cb, fixes warning The function ch_flower_stats_cb is local to the source and does not need to be in global scope, so make it static. Cleans up sparse warnings: symbol 'ch_flower_stats_cb' was not declared. Should it be static? Signed-off-by: Colin Ian King Signed-off-by: David S. Miller --- drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_flower.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_flower.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_flower.c index a36bd66d2834..92a311767381 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_flower.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_flower.c @@ -366,7 +366,7 @@ err: return ret; } -void ch_flower_stats_cb(unsigned long data) +static void ch_flower_stats_cb(unsigned long data) { struct adapter *adap = (struct adapter *)data; struct ch_tc_flower_entry *flower_entry; -- cgit v1.2.3 From 2b7c6ba945fd3c10ca3e030be402098aff2f89d3 Mon Sep 17 00:00:00 2001 From: Edward Cree Date: Tue, 26 Sep 2017 16:35:13 +0100 Subject: bpf/verifier: improve disassembly of BPF_END instructions print_bpf_insn() was treating all BPF_ALU[64] the same, but BPF_END has a different structure: it has a size in insn->imm (even if it's BPF_X) and uses the BPF_SRC (X or K) to indicate which endianness to use. So it needs different code to print it. Signed-off-by: Edward Cree Acked-by: Alexei Starovoitov Acked-by: Daniel Borkmann Signed-off-by: David S. Miller --- kernel/bpf/verifier.c | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index f849eca36052..e8d7bb8e6b98 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -332,26 +332,40 @@ static const char *const bpf_jmp_string[16] = { [BPF_EXIT >> 4] = "exit", }; +static void print_bpf_end_insn(const struct bpf_verifier_env *env, + const struct bpf_insn *insn) +{ + verbose("(%02x) r%d = %s%d r%d\n", insn->code, insn->dst_reg, + BPF_SRC(insn->code) == BPF_TO_BE ? "be" : "le", + insn->imm, insn->dst_reg); +} + static void print_bpf_insn(const struct bpf_verifier_env *env, const struct bpf_insn *insn) { u8 class = BPF_CLASS(insn->code); if (class == BPF_ALU || class == BPF_ALU64) { - if (BPF_SRC(insn->code) == BPF_X) + if (BPF_OP(insn->code) == BPF_END) { + if (class == BPF_ALU64) + verbose("BUG_alu64_%02x\n", insn->code); + else + print_bpf_end_insn(env, insn); + } else if (BPF_SRC(insn->code) == BPF_X) { verbose("(%02x) %sr%d %s %sr%d\n", insn->code, class == BPF_ALU ? "(u32) " : "", insn->dst_reg, bpf_alu_string[BPF_OP(insn->code) >> 4], class == BPF_ALU ? "(u32) " : "", insn->src_reg); - else + } else { verbose("(%02x) %sr%d %s %s%d\n", insn->code, class == BPF_ALU ? "(u32) " : "", insn->dst_reg, bpf_alu_string[BPF_OP(insn->code) >> 4], class == BPF_ALU ? "(u32) " : "", insn->imm); + } } else if (class == BPF_STX) { if (BPF_MODE(insn->code) == BPF_MEM) verbose("(%02x) *(%s *)(r%d %+d) = r%d\n", -- cgit v1.2.3 From 73c864b38383f4abc9b559025cd663f4a81afa89 Mon Sep 17 00:00:00 2001 From: Edward Cree Date: Tue, 26 Sep 2017 16:35:29 +0100 Subject: bpf/verifier: improve disassembly of BPF_NEG instructions BPF_NEG takes only one operand, unlike the bulk of BPF_ALU[64] which are compound-assignments. So give it its own format in print_bpf_insn(). Signed-off-by: Edward Cree Acked-by: Alexei Starovoitov Acked-by: Daniel Borkmann Signed-off-by: David S. Miller --- kernel/bpf/verifier.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index e8d7bb8e6b98..4cf9b72c59a0 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -351,6 +351,11 @@ static void print_bpf_insn(const struct bpf_verifier_env *env, verbose("BUG_alu64_%02x\n", insn->code); else print_bpf_end_insn(env, insn); + } else if (BPF_OP(insn->code) == BPF_NEG) { + verbose("(%02x) r%d = %s-r%d\n", + insn->code, insn->dst_reg, + class == BPF_ALU ? "(u32) " : "", + insn->dst_reg); } else if (BPF_SRC(insn->code) == BPF_X) { verbose("(%02x) %sr%d %s %sr%d\n", insn->code, class == BPF_ALU ? "(u32) " : "", -- cgit v1.2.3 From 352f58b0d9f26d283b10f4c9f21e8717141c1334 Mon Sep 17 00:00:00 2001 From: Aviad Krawczyk Date: Wed, 27 Sep 2017 01:57:50 +0800 Subject: net-next/hinic: Set Rxq irq to specific cpu for NUMA Set Rxq irq to specific cpu for allocating and receiving the skb from the same node. Signed-off-by: Aviad Krawczyk Signed-off-by: David S. Miller --- drivers/net/ethernet/huawei/hinic/hinic_rx.c | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/drivers/net/ethernet/huawei/hinic/hinic_rx.c b/drivers/net/ethernet/huawei/hinic/hinic_rx.c index 1d4f712b15a8..e2e5cdc7119c 100644 --- a/drivers/net/ethernet/huawei/hinic/hinic_rx.c +++ b/drivers/net/ethernet/huawei/hinic/hinic_rx.c @@ -26,6 +26,7 @@ #include #include #include +#include #include #include "hinic_common.h" @@ -171,11 +172,10 @@ static int rx_alloc_pkts(struct hinic_rxq *rxq) struct hinic_sge sge; dma_addr_t dma_addr; struct sk_buff *skb; - int i, alloc_more; u16 prod_idx; + int i; free_wqebbs = hinic_get_rq_free_wqebbs(rxq->rq); - alloc_more = 0; /* Limit the allocation chunks */ if (free_wqebbs > nic_dev->rx_weight) @@ -185,7 +185,6 @@ static int rx_alloc_pkts(struct hinic_rxq *rxq) skb = rx_alloc_skb(rxq, &dma_addr); if (!skb) { netdev_err(rxq->netdev, "Failed to alloc Rx skb\n"); - alloc_more = 1; goto skb_out; } @@ -195,7 +194,6 @@ static int rx_alloc_pkts(struct hinic_rxq *rxq) &prod_idx); if (!rq_wqe) { rx_free_skb(rxq, skb, dma_addr); - alloc_more = 1; goto skb_out; } @@ -211,9 +209,7 @@ skb_out: hinic_rq_update(rxq->rq, prod_idx); } - if (alloc_more) - tasklet_schedule(&rxq->rx_task); - + tasklet_schedule(&rxq->rx_task); return i; } @@ -357,7 +353,7 @@ static int rxq_recv(struct hinic_rxq *rxq, int budget) } if (pkts) - tasklet_schedule(&rxq->rx_task); /* hinic_rx_alloc_pkts */ + tasklet_schedule(&rxq->rx_task); /* rx_alloc_pkts */ u64_stats_update_begin(&rxq->rxq_stats.syncp); rxq->rxq_stats.pkts += pkts; @@ -417,6 +413,8 @@ static int rx_request_irq(struct hinic_rxq *rxq) struct hinic_dev *nic_dev = netdev_priv(rxq->netdev); struct hinic_hwdev *hwdev = nic_dev->hwdev; struct hinic_rq *rq = rxq->rq; + struct hinic_qp *qp; + struct cpumask mask; int err; rx_add_napi(rxq); @@ -432,7 +430,9 @@ static int rx_request_irq(struct hinic_rxq *rxq) return err; } - return 0; + qp = container_of(rq, struct hinic_qp, rq); + cpumask_set_cpu(qp->q_id % num_online_cpus(), &mask); + return irq_set_affinity_hint(rq->irq, &mask); } static void rx_free_irq(struct hinic_rxq *rxq) -- cgit v1.2.3 From bbdc9e687fb3c2920961d7716f1c5519ff7bc595 Mon Sep 17 00:00:00 2001 From: Aviad Krawczyk Date: Wed, 27 Sep 2017 02:11:33 +0800 Subject: net-next/hinic: Fix a case of Tx Queue is Stopped forever Fix the following scenario: 1. tx_free_poll is running on cpu X 2. xmit function is running on cpu Y and fails to get sq wqe 3. tx_free_poll frees wqes on cpu X and checks the queue is not stopped 4. xmit function stops the queue after failed to get sq wqe 5. The queue is stopped forever Signed-off-by: Aviad Krawczyk Signed-off-by: David S. Miller --- drivers/net/ethernet/huawei/hinic/hinic_tx.c | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/huawei/hinic/hinic_tx.c b/drivers/net/ethernet/huawei/hinic/hinic_tx.c index abe3e38cd342..9128858479c4 100644 --- a/drivers/net/ethernet/huawei/hinic/hinic_tx.c +++ b/drivers/net/ethernet/huawei/hinic/hinic_tx.c @@ -212,10 +212,19 @@ netdev_tx_t hinic_xmit_frame(struct sk_buff *skb, struct net_device *netdev) sq_wqe = hinic_sq_get_wqe(txq->sq, wqe_size, &prod_idx); if (!sq_wqe) { - tx_unmap_skb(nic_dev, skb, txq->sges); - netif_stop_subqueue(netdev, qp->q_id); + /* Check for the case free_tx_poll is called in another cpu + * and we stopped the subqueue after free_tx_poll check. + */ + sq_wqe = hinic_sq_get_wqe(txq->sq, wqe_size, &prod_idx); + if (sq_wqe) { + netif_wake_subqueue(nic_dev->netdev, qp->q_id); + goto process_sq_wqe; + } + + tx_unmap_skb(nic_dev, skb, txq->sges); + u64_stats_update_begin(&txq->txq_stats.syncp); txq->txq_stats.tx_busy++; u64_stats_update_end(&txq->txq_stats.syncp); @@ -223,6 +232,7 @@ netdev_tx_t hinic_xmit_frame(struct sk_buff *skb, struct net_device *netdev) goto flush_skbs; } +process_sq_wqe: hinic_sq_prepare_wqe(txq->sq, prod_idx, sq_wqe, txq->sges, nr_sges); hinic_sq_write_wqe(txq->sq, prod_idx, sq_wqe, skb, wqe_size); -- cgit v1.2.3 From 6ade97da601f8af793f6c7a861af754d0f0b6767 Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Tue, 26 Sep 2017 23:12:28 +0300 Subject: arp: make arp_hdr_len() return unsigned int Negative ARP header length are not a thing. Constify arguments while I'm at it. Space savings: add/remove: 0/0 grow/shrink: 0/1 up/down: 0/-3 (-3) function old new delta arpt_do_table 1163 1160 -3 Signed-off-by: Alexey Dobriyan Signed-off-by: David S. Miller --- drivers/net/bonding/bond_main.c | 3 ++- include/linux/if_arp.h | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index c99dc59d729b..d2e94b8559f0 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -2491,7 +2491,8 @@ int bond_arp_rcv(const struct sk_buff *skb, struct bonding *bond, struct slave *curr_active_slave, *curr_arp_slave; unsigned char *arp_ptr; __be32 sip, tip; - int alen, is_arp = skb->protocol == __cpu_to_be16(ETH_P_ARP); + int is_arp = skb->protocol == __cpu_to_be16(ETH_P_ARP); + unsigned int alen; if (!slave_do_arp_validate(bond, slave)) { if ((slave_do_arp_validate_only(bond) && is_arp) || diff --git a/include/linux/if_arp.h b/include/linux/if_arp.h index 3355efc89781..6756fea18b69 100644 --- a/include/linux/if_arp.h +++ b/include/linux/if_arp.h @@ -31,7 +31,7 @@ static inline struct arphdr *arp_hdr(const struct sk_buff *skb) return (struct arphdr *)skb_network_header(skb); } -static inline int arp_hdr_len(struct net_device *dev) +static inline unsigned int arp_hdr_len(const struct net_device *dev) { switch (dev->type) { #if IS_ENABLED(CONFIG_FIREWIRE_NET) -- cgit v1.2.3 From 9ffe79a9c2eec0f30687c2fd8b452bda5c8287b0 Mon Sep 17 00:00:00 2001 From: Yunsheng Lin Date: Wed, 27 Sep 2017 09:45:23 +0800 Subject: net: hns3: Support for dynamically assigning tx buffer to TC This patch add support of dynamically assigning tx buffer to TC when the TC is enabled. It will save buffer for rx direction to avoid packet loss. Signed-off-by: Yunsheng Lin Signed-off-by: David S. Miller --- .../net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h | 1 + .../ethernet/hisilicon/hns3/hns3pf/hclge_main.c | 64 ++++++++++++++++++---- 2 files changed, 55 insertions(+), 10 deletions(-) diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h index 758cf3948131..a81c6cb93ed5 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h @@ -311,6 +311,7 @@ struct hclge_tc_thrd { struct hclge_priv_buf { struct hclge_waterline wl; /* Waterline for low and high*/ u32 buf_size; /* TC private buffer size */ + u32 tx_buf_size; u32 enable; /* Enable TC private buffer or not */ }; diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c index e0685e630afe..eaa3fc355568 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c @@ -1324,7 +1324,7 @@ static int hclge_alloc_vport(struct hclge_dev *hdev) return 0; } -static int hclge_cmd_alloc_tx_buff(struct hclge_dev *hdev, u16 buf_size) +static int hclge_cmd_alloc_tx_buff(struct hclge_dev *hdev) { /* TX buffer size is unit by 128 byte */ #define HCLGE_BUF_SIZE_UNIT_SHIFT 7 @@ -1337,10 +1337,13 @@ static int hclge_cmd_alloc_tx_buff(struct hclge_dev *hdev, u16 buf_size) req = (struct hclge_tx_buff_alloc *)desc.data; hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_TX_BUFF_ALLOC, 0); - for (i = 0; i < HCLGE_TC_NUM; i++) + for (i = 0; i < HCLGE_TC_NUM; i++) { + u32 buf_size = hdev->priv_buf[i].tx_buf_size; + req->tx_pkt_buff[i] = cpu_to_le16((buf_size >> HCLGE_BUF_SIZE_UNIT_SHIFT) | HCLGE_BUF_SIZE_UPDATE_EN_MSK); + } ret = hclge_cmd_send(&hdev->hw, &desc, 1); if (ret) { @@ -1352,9 +1355,9 @@ static int hclge_cmd_alloc_tx_buff(struct hclge_dev *hdev, u16 buf_size) return 0; } -static int hclge_tx_buffer_alloc(struct hclge_dev *hdev, u32 buf_size) +static int hclge_tx_buffer_alloc(struct hclge_dev *hdev) { - int ret = hclge_cmd_alloc_tx_buff(hdev, buf_size); + int ret = hclge_cmd_alloc_tx_buff(hdev); if (ret) { dev_err(&hdev->pdev->dev, @@ -1433,6 +1436,16 @@ static u32 hclge_get_rx_priv_buff_alloced(struct hclge_dev *hdev) return rx_priv; } +static u32 hclge_get_tx_buff_alloced(struct hclge_dev *hdev) +{ + u32 i, total_tx_size = 0; + + for (i = 0; i < HCLGE_MAX_TC_NUM; i++) + total_tx_size += hdev->priv_buf[i].tx_buf_size; + + return total_tx_size; +} + static bool hclge_is_rx_buf_ok(struct hclge_dev *hdev, u32 rx_all) { u32 shared_buf_min, shared_buf_tc, shared_std; @@ -1477,18 +1490,43 @@ static bool hclge_is_rx_buf_ok(struct hclge_dev *hdev, u32 rx_all) return true; } +static int hclge_tx_buffer_calc(struct hclge_dev *hdev) +{ + u32 i, total_size; + + total_size = hdev->pkt_buf_size; + + /* alloc tx buffer for all enabled tc */ + for (i = 0; i < HCLGE_MAX_TC_NUM; i++) { + struct hclge_priv_buf *priv = &hdev->priv_buf[i]; + + if (total_size < HCLGE_DEFAULT_TX_BUF) + return -ENOMEM; + + if (hdev->hw_tc_map & BIT(i)) + priv->tx_buf_size = HCLGE_DEFAULT_TX_BUF; + else + priv->tx_buf_size = 0; + + total_size -= priv->tx_buf_size; + } + + return 0; +} + /* hclge_rx_buffer_calc: calculate the rx private buffer size for all TCs * @hdev: pointer to struct hclge_dev - * @tx_size: the allocated tx buffer for all TCs * @return: 0: calculate sucessful, negative: fail */ -int hclge_rx_buffer_calc(struct hclge_dev *hdev, u32 tx_size) +int hclge_rx_buffer_calc(struct hclge_dev *hdev) { - u32 rx_all = hdev->pkt_buf_size - tx_size; + u32 rx_all = hdev->pkt_buf_size; int no_pfc_priv_num, pfc_priv_num; struct hclge_priv_buf *priv; int i; + rx_all -= hclge_get_tx_buff_alloced(hdev); + /* When DCB is not supported, rx private * buffer is not allocated. */ @@ -1771,7 +1809,6 @@ static int hclge_common_wl_config(struct hclge_dev *hdev) int hclge_buffer_alloc(struct hclge_dev *hdev) { - u32 tx_buf_size = HCLGE_DEFAULT_TX_BUF; int ret; hdev->priv_buf = devm_kmalloc_array(&hdev->pdev->dev, HCLGE_MAX_TC_NUM, @@ -1780,14 +1817,21 @@ int hclge_buffer_alloc(struct hclge_dev *hdev) if (!hdev->priv_buf) return -ENOMEM; - ret = hclge_tx_buffer_alloc(hdev, tx_buf_size); + ret = hclge_tx_buffer_calc(hdev); + if (ret) { + dev_err(&hdev->pdev->dev, + "could not calc tx buffer size for all TCs %d\n", ret); + return ret; + } + + ret = hclge_tx_buffer_alloc(hdev); if (ret) { dev_err(&hdev->pdev->dev, "could not alloc tx buffers %d\n", ret); return ret; } - ret = hclge_rx_buffer_calc(hdev, tx_buf_size); + ret = hclge_rx_buffer_calc(hdev); if (ret) { dev_err(&hdev->pdev->dev, "could not calc rx priv buffer size for all TCs %d\n", -- cgit v1.2.3 From acf61ecd44feae2a78c13d0d7cb8e386741c5cf0 Mon Sep 17 00:00:00 2001 From: Yunsheng Lin Date: Wed, 27 Sep 2017 09:45:24 +0800 Subject: net: hns3: Add support for dynamically buffer reallocation Current buffer allocation can only happen at init, when doing buffer reallocation after init, care must be taken care of memory which priv_buf points to. This patch fixes it by using a dynamic allocated temporary memory. Because we only do buffer reallocation at init or when setting up the DCB parameter, and priv_buf is only used at buffer allocation process, so it is ok to use a dynamic allocated temporary memory. Signed-off-by: Yunsheng Lin Signed-off-by: David S. Miller --- .../net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h | 5 + .../ethernet/hisilicon/hns3/hns3pf/hclge_main.c | 150 +++++++++++---------- .../ethernet/hisilicon/hns3/hns3pf/hclge_main.h | 2 - 3 files changed, 87 insertions(+), 70 deletions(-) diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h index a81c6cb93ed5..6b6d28eff664 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h @@ -322,6 +322,11 @@ struct hclge_shared_buf { u32 buf_size; }; +struct hclge_pkt_buf_alloc { + struct hclge_priv_buf priv_buf[HCLGE_MAX_TC_NUM]; + struct hclge_shared_buf s_buf; +}; + #define HCLGE_RX_COM_WL_EN_B 15 struct hclge_rx_com_wl_buf { __le16 high_wl; diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c index eaa3fc355568..61632feb8c4e 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c @@ -1324,7 +1324,8 @@ static int hclge_alloc_vport(struct hclge_dev *hdev) return 0; } -static int hclge_cmd_alloc_tx_buff(struct hclge_dev *hdev) +static int hclge_cmd_alloc_tx_buff(struct hclge_dev *hdev, + struct hclge_pkt_buf_alloc *buf_alloc) { /* TX buffer size is unit by 128 byte */ #define HCLGE_BUF_SIZE_UNIT_SHIFT 7 @@ -1338,7 +1339,7 @@ static int hclge_cmd_alloc_tx_buff(struct hclge_dev *hdev) hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_TX_BUFF_ALLOC, 0); for (i = 0; i < HCLGE_TC_NUM; i++) { - u32 buf_size = hdev->priv_buf[i].tx_buf_size; + u32 buf_size = buf_alloc->priv_buf[i].tx_buf_size; req->tx_pkt_buff[i] = cpu_to_le16((buf_size >> HCLGE_BUF_SIZE_UNIT_SHIFT) | @@ -1355,9 +1356,10 @@ static int hclge_cmd_alloc_tx_buff(struct hclge_dev *hdev) return 0; } -static int hclge_tx_buffer_alloc(struct hclge_dev *hdev) +static int hclge_tx_buffer_alloc(struct hclge_dev *hdev, + struct hclge_pkt_buf_alloc *buf_alloc) { - int ret = hclge_cmd_alloc_tx_buff(hdev); + int ret = hclge_cmd_alloc_tx_buff(hdev, buf_alloc); if (ret) { dev_err(&hdev->pdev->dev, @@ -1390,13 +1392,14 @@ static int hclge_get_pfc_enalbe_num(struct hclge_dev *hdev) } /* Get the number of pfc enabled TCs, which have private buffer */ -static int hclge_get_pfc_priv_num(struct hclge_dev *hdev) +static int hclge_get_pfc_priv_num(struct hclge_dev *hdev, + struct hclge_pkt_buf_alloc *buf_alloc) { struct hclge_priv_buf *priv; int i, cnt = 0; for (i = 0; i < HCLGE_MAX_TC_NUM; i++) { - priv = &hdev->priv_buf[i]; + priv = &buf_alloc->priv_buf[i]; if ((hdev->tm_info.hw_pfc_map & BIT(i)) && priv->enable) cnt++; @@ -1406,13 +1409,14 @@ static int hclge_get_pfc_priv_num(struct hclge_dev *hdev) } /* Get the number of pfc disabled TCs, which have private buffer */ -static int hclge_get_no_pfc_priv_num(struct hclge_dev *hdev) +static int hclge_get_no_pfc_priv_num(struct hclge_dev *hdev, + struct hclge_pkt_buf_alloc *buf_alloc) { struct hclge_priv_buf *priv; int i, cnt = 0; for (i = 0; i < HCLGE_MAX_TC_NUM; i++) { - priv = &hdev->priv_buf[i]; + priv = &buf_alloc->priv_buf[i]; if (hdev->hw_tc_map & BIT(i) && !(hdev->tm_info.hw_pfc_map & BIT(i)) && priv->enable) @@ -1422,31 +1426,33 @@ static int hclge_get_no_pfc_priv_num(struct hclge_dev *hdev) return cnt; } -static u32 hclge_get_rx_priv_buff_alloced(struct hclge_dev *hdev) +static u32 hclge_get_rx_priv_buff_alloced(struct hclge_pkt_buf_alloc *buf_alloc) { struct hclge_priv_buf *priv; u32 rx_priv = 0; int i; for (i = 0; i < HCLGE_MAX_TC_NUM; i++) { - priv = &hdev->priv_buf[i]; + priv = &buf_alloc->priv_buf[i]; if (priv->enable) rx_priv += priv->buf_size; } return rx_priv; } -static u32 hclge_get_tx_buff_alloced(struct hclge_dev *hdev) +static u32 hclge_get_tx_buff_alloced(struct hclge_pkt_buf_alloc *buf_alloc) { u32 i, total_tx_size = 0; for (i = 0; i < HCLGE_MAX_TC_NUM; i++) - total_tx_size += hdev->priv_buf[i].tx_buf_size; + total_tx_size += buf_alloc->priv_buf[i].tx_buf_size; return total_tx_size; } -static bool hclge_is_rx_buf_ok(struct hclge_dev *hdev, u32 rx_all) +static bool hclge_is_rx_buf_ok(struct hclge_dev *hdev, + struct hclge_pkt_buf_alloc *buf_alloc, + u32 rx_all) { u32 shared_buf_min, shared_buf_tc, shared_std; int tc_num, pfc_enable_num; @@ -1467,30 +1473,31 @@ static bool hclge_is_rx_buf_ok(struct hclge_dev *hdev, u32 rx_all) hdev->mps; shared_std = max_t(u32, shared_buf_min, shared_buf_tc); - rx_priv = hclge_get_rx_priv_buff_alloced(hdev); + rx_priv = hclge_get_rx_priv_buff_alloced(buf_alloc); if (rx_all <= rx_priv + shared_std) return false; shared_buf = rx_all - rx_priv; - hdev->s_buf.buf_size = shared_buf; - hdev->s_buf.self.high = shared_buf; - hdev->s_buf.self.low = 2 * hdev->mps; + buf_alloc->s_buf.buf_size = shared_buf; + buf_alloc->s_buf.self.high = shared_buf; + buf_alloc->s_buf.self.low = 2 * hdev->mps; for (i = 0; i < HCLGE_MAX_TC_NUM; i++) { if ((hdev->hw_tc_map & BIT(i)) && (hdev->tm_info.hw_pfc_map & BIT(i))) { - hdev->s_buf.tc_thrd[i].low = hdev->mps; - hdev->s_buf.tc_thrd[i].high = 2 * hdev->mps; + buf_alloc->s_buf.tc_thrd[i].low = hdev->mps; + buf_alloc->s_buf.tc_thrd[i].high = 2 * hdev->mps; } else { - hdev->s_buf.tc_thrd[i].low = 0; - hdev->s_buf.tc_thrd[i].high = hdev->mps; + buf_alloc->s_buf.tc_thrd[i].low = 0; + buf_alloc->s_buf.tc_thrd[i].high = hdev->mps; } } return true; } -static int hclge_tx_buffer_calc(struct hclge_dev *hdev) +static int hclge_tx_buffer_calc(struct hclge_dev *hdev, + struct hclge_pkt_buf_alloc *buf_alloc) { u32 i, total_size; @@ -1498,7 +1505,7 @@ static int hclge_tx_buffer_calc(struct hclge_dev *hdev) /* alloc tx buffer for all enabled tc */ for (i = 0; i < HCLGE_MAX_TC_NUM; i++) { - struct hclge_priv_buf *priv = &hdev->priv_buf[i]; + struct hclge_priv_buf *priv = &buf_alloc->priv_buf[i]; if (total_size < HCLGE_DEFAULT_TX_BUF) return -ENOMEM; @@ -1516,22 +1523,24 @@ static int hclge_tx_buffer_calc(struct hclge_dev *hdev) /* hclge_rx_buffer_calc: calculate the rx private buffer size for all TCs * @hdev: pointer to struct hclge_dev + * @buf_alloc: pointer to buffer calculation data * @return: 0: calculate sucessful, negative: fail */ -int hclge_rx_buffer_calc(struct hclge_dev *hdev) +int hclge_rx_buffer_calc(struct hclge_dev *hdev, + struct hclge_pkt_buf_alloc *buf_alloc) { u32 rx_all = hdev->pkt_buf_size; int no_pfc_priv_num, pfc_priv_num; struct hclge_priv_buf *priv; int i; - rx_all -= hclge_get_tx_buff_alloced(hdev); + rx_all -= hclge_get_tx_buff_alloced(buf_alloc); /* When DCB is not supported, rx private * buffer is not allocated. */ if (!hnae3_dev_dcb_supported(hdev)) { - if (!hclge_is_rx_buf_ok(hdev, rx_all)) + if (!hclge_is_rx_buf_ok(hdev, buf_alloc, rx_all)) return -ENOMEM; return 0; @@ -1539,7 +1548,7 @@ int hclge_rx_buffer_calc(struct hclge_dev *hdev) /* step 1, try to alloc private buffer for all enabled tc */ for (i = 0; i < HCLGE_MAX_TC_NUM; i++) { - priv = &hdev->priv_buf[i]; + priv = &buf_alloc->priv_buf[i]; if (hdev->hw_tc_map & BIT(i)) { priv->enable = 1; if (hdev->tm_info.hw_pfc_map & BIT(i)) { @@ -1560,14 +1569,14 @@ int hclge_rx_buffer_calc(struct hclge_dev *hdev) } } - if (hclge_is_rx_buf_ok(hdev, rx_all)) + if (hclge_is_rx_buf_ok(hdev, buf_alloc, rx_all)) return 0; /* step 2, try to decrease the buffer size of * no pfc TC's private buffer */ for (i = 0; i < HCLGE_MAX_TC_NUM; i++) { - priv = &hdev->priv_buf[i]; + priv = &buf_alloc->priv_buf[i]; priv->enable = 0; priv->wl.low = 0; @@ -1590,18 +1599,18 @@ int hclge_rx_buffer_calc(struct hclge_dev *hdev) } } - if (hclge_is_rx_buf_ok(hdev, rx_all)) + if (hclge_is_rx_buf_ok(hdev, buf_alloc, rx_all)) return 0; /* step 3, try to reduce the number of pfc disabled TCs, * which have private buffer */ /* get the total no pfc enable TC number, which have private buffer */ - no_pfc_priv_num = hclge_get_no_pfc_priv_num(hdev); + no_pfc_priv_num = hclge_get_no_pfc_priv_num(hdev, buf_alloc); /* let the last to be cleared first */ for (i = HCLGE_MAX_TC_NUM - 1; i >= 0; i--) { - priv = &hdev->priv_buf[i]; + priv = &buf_alloc->priv_buf[i]; if (hdev->hw_tc_map & BIT(i) && !(hdev->tm_info.hw_pfc_map & BIT(i))) { @@ -1613,22 +1622,22 @@ int hclge_rx_buffer_calc(struct hclge_dev *hdev) no_pfc_priv_num--; } - if (hclge_is_rx_buf_ok(hdev, rx_all) || + if (hclge_is_rx_buf_ok(hdev, buf_alloc, rx_all) || no_pfc_priv_num == 0) break; } - if (hclge_is_rx_buf_ok(hdev, rx_all)) + if (hclge_is_rx_buf_ok(hdev, buf_alloc, rx_all)) return 0; /* step 4, try to reduce the number of pfc enabled TCs * which have private buffer. */ - pfc_priv_num = hclge_get_pfc_priv_num(hdev); + pfc_priv_num = hclge_get_pfc_priv_num(hdev, buf_alloc); /* let the last to be cleared first */ for (i = HCLGE_MAX_TC_NUM - 1; i >= 0; i--) { - priv = &hdev->priv_buf[i]; + priv = &buf_alloc->priv_buf[i]; if (hdev->hw_tc_map & BIT(i) && hdev->tm_info.hw_pfc_map & BIT(i)) { @@ -1640,17 +1649,18 @@ int hclge_rx_buffer_calc(struct hclge_dev *hdev) pfc_priv_num--; } - if (hclge_is_rx_buf_ok(hdev, rx_all) || + if (hclge_is_rx_buf_ok(hdev, buf_alloc, rx_all) || pfc_priv_num == 0) break; } - if (hclge_is_rx_buf_ok(hdev, rx_all)) + if (hclge_is_rx_buf_ok(hdev, buf_alloc, rx_all)) return 0; return -ENOMEM; } -static int hclge_rx_priv_buf_alloc(struct hclge_dev *hdev) +static int hclge_rx_priv_buf_alloc(struct hclge_dev *hdev, + struct hclge_pkt_buf_alloc *buf_alloc) { struct hclge_rx_priv_buff *req; struct hclge_desc desc; @@ -1662,7 +1672,7 @@ static int hclge_rx_priv_buf_alloc(struct hclge_dev *hdev) /* Alloc private buffer TCs */ for (i = 0; i < HCLGE_MAX_TC_NUM; i++) { - struct hclge_priv_buf *priv = &hdev->priv_buf[i]; + struct hclge_priv_buf *priv = &buf_alloc->priv_buf[i]; req->buf_num[i] = cpu_to_le16(priv->buf_size >> HCLGE_BUF_UNIT_S); @@ -1671,7 +1681,7 @@ static int hclge_rx_priv_buf_alloc(struct hclge_dev *hdev) } req->shared_buf = - cpu_to_le16((hdev->s_buf.buf_size >> HCLGE_BUF_UNIT_S) | + cpu_to_le16((buf_alloc->s_buf.buf_size >> HCLGE_BUF_UNIT_S) | (1 << HCLGE_TC0_PRI_BUF_EN_B)); ret = hclge_cmd_send(&hdev->hw, &desc, 1); @@ -1686,7 +1696,8 @@ static int hclge_rx_priv_buf_alloc(struct hclge_dev *hdev) #define HCLGE_PRIV_ENABLE(a) ((a) > 0 ? 1 : 0) -static int hclge_rx_priv_wl_config(struct hclge_dev *hdev) +static int hclge_rx_priv_wl_config(struct hclge_dev *hdev, + struct hclge_pkt_buf_alloc *buf_alloc) { struct hclge_rx_priv_wl_buf *req; struct hclge_priv_buf *priv; @@ -1706,7 +1717,9 @@ static int hclge_rx_priv_wl_config(struct hclge_dev *hdev) desc[i].flag &= ~cpu_to_le16(HCLGE_CMD_FLAG_NEXT); for (j = 0; j < HCLGE_TC_NUM_ONE_DESC; j++) { - priv = &hdev->priv_buf[i * HCLGE_TC_NUM_ONE_DESC + j]; + u32 idx = i * HCLGE_TC_NUM_ONE_DESC + j; + + priv = &buf_alloc->priv_buf[idx]; req->tc_wl[j].high = cpu_to_le16(priv->wl.high >> HCLGE_BUF_UNIT_S); req->tc_wl[j].high |= @@ -1731,9 +1744,10 @@ static int hclge_rx_priv_wl_config(struct hclge_dev *hdev) return 0; } -static int hclge_common_thrd_config(struct hclge_dev *hdev) +static int hclge_common_thrd_config(struct hclge_dev *hdev, + struct hclge_pkt_buf_alloc *buf_alloc) { - struct hclge_shared_buf *s_buf = &hdev->s_buf; + struct hclge_shared_buf *s_buf = &buf_alloc->s_buf; struct hclge_rx_com_thrd *req; struct hclge_desc desc[2]; struct hclge_tc_thrd *tc; @@ -1777,9 +1791,10 @@ static int hclge_common_thrd_config(struct hclge_dev *hdev) return 0; } -static int hclge_common_wl_config(struct hclge_dev *hdev) +static int hclge_common_wl_config(struct hclge_dev *hdev, + struct hclge_pkt_buf_alloc *buf_alloc) { - struct hclge_shared_buf *buf = &hdev->s_buf; + struct hclge_shared_buf *buf = &buf_alloc->s_buf; struct hclge_rx_com_wl *req; struct hclge_desc desc; int ret; @@ -1809,69 +1824,68 @@ static int hclge_common_wl_config(struct hclge_dev *hdev) int hclge_buffer_alloc(struct hclge_dev *hdev) { + struct hclge_pkt_buf_alloc *pkt_buf; int ret; - hdev->priv_buf = devm_kmalloc_array(&hdev->pdev->dev, HCLGE_MAX_TC_NUM, - sizeof(struct hclge_priv_buf), - GFP_KERNEL | __GFP_ZERO); - if (!hdev->priv_buf) + pkt_buf = kzalloc(sizeof(*pkt_buf), GFP_KERNEL); + if (!pkt_buf) return -ENOMEM; - ret = hclge_tx_buffer_calc(hdev); + ret = hclge_tx_buffer_calc(hdev, pkt_buf); if (ret) { dev_err(&hdev->pdev->dev, "could not calc tx buffer size for all TCs %d\n", ret); - return ret; + goto out; } - ret = hclge_tx_buffer_alloc(hdev); + ret = hclge_tx_buffer_alloc(hdev, pkt_buf); if (ret) { dev_err(&hdev->pdev->dev, "could not alloc tx buffers %d\n", ret); - return ret; + goto out; } - ret = hclge_rx_buffer_calc(hdev); + ret = hclge_rx_buffer_calc(hdev, pkt_buf); if (ret) { dev_err(&hdev->pdev->dev, "could not calc rx priv buffer size for all TCs %d\n", ret); - return ret; + goto out; } - ret = hclge_rx_priv_buf_alloc(hdev); + ret = hclge_rx_priv_buf_alloc(hdev, pkt_buf); if (ret) { dev_err(&hdev->pdev->dev, "could not alloc rx priv buffer %d\n", ret); - return ret; + goto out; } if (hnae3_dev_dcb_supported(hdev)) { - ret = hclge_rx_priv_wl_config(hdev); + ret = hclge_rx_priv_wl_config(hdev, pkt_buf); if (ret) { dev_err(&hdev->pdev->dev, "could not configure rx private waterline %d\n", ret); - return ret; + goto out; } - ret = hclge_common_thrd_config(hdev); + ret = hclge_common_thrd_config(hdev, pkt_buf); if (ret) { dev_err(&hdev->pdev->dev, "could not configure common threshold %d\n", ret); - return ret; + goto out; } } - ret = hclge_common_wl_config(hdev); - if (ret) { + ret = hclge_common_wl_config(hdev, pkt_buf); + if (ret) dev_err(&hdev->pdev->dev, "could not configure common waterline %d\n", ret); - return ret; - } - return 0; +out: + kfree(pkt_buf); + return ret; } static int hclge_init_roce_base_info(struct hclge_vport *vport) diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h index 9fcfd9395424..4fc36f04c971 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h @@ -463,8 +463,6 @@ struct hclge_dev { u32 pkt_buf_size; /* Total pf buf size for tx/rx */ u32 mps; /* Max packet size */ - struct hclge_priv_buf *priv_buf; - struct hclge_shared_buf s_buf; enum hclge_mta_dmac_sel_type mta_mac_sel_type; bool enable_mta; /* Mutilcast filter enable */ -- cgit v1.2.3 From 9dc2145d910e94d1987fd165ac7643777fcf17c4 Mon Sep 17 00:00:00 2001 From: Yunsheng Lin Date: Wed, 27 Sep 2017 09:45:25 +0800 Subject: net: hns3: Add support for PFC setting in TM module This patch add a pfc_pause_en cmd, and use it to configure PFC option according to fc_mode in hdev->tm_info. Signed-off-by: Yunsheng Lin Signed-off-by: David S. Miller --- .../net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c | 68 ++++++++++++++++++++-- .../net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.h | 5 ++ 2 files changed, 68 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c index 73a75d7cc551..0b4b5d9b0798 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c @@ -124,6 +124,20 @@ static int hclge_mac_pause_en_cfg(struct hclge_dev *hdev, bool tx, bool rx) return hclge_cmd_send(&hdev->hw, &desc, 1); } +static int hclge_pfc_pause_en_cfg(struct hclge_dev *hdev, u8 tx_rx_bitmap, + u8 pfc_bitmap) +{ + struct hclge_desc desc; + struct hclge_pfc_en_cmd *pfc = (struct hclge_pfc_en_cmd *)&desc.data; + + hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_CFG_PFC_PAUSE_EN, false); + + pfc->tx_rx_en_bitmap = tx_rx_bitmap; + pfc->pri_en_bitmap = pfc_bitmap; + + return hclge_cmd_send(&hdev->hw, &desc, 1); +} + static int hclge_fill_pri_array(struct hclge_dev *hdev, u8 *pri, u8 pri_id) { u8 tc; @@ -969,20 +983,64 @@ static int hclge_tm_schd_setup_hw(struct hclge_dev *hdev) return hclge_tm_schd_mode_hw(hdev); } +static int hclge_pfc_setup_hw(struct hclge_dev *hdev) +{ + u8 enable_bitmap = 0; + + if (hdev->tm_info.fc_mode == HCLGE_FC_PFC) + enable_bitmap = HCLGE_TX_MAC_PAUSE_EN_MSK | + HCLGE_RX_MAC_PAUSE_EN_MSK; + + return hclge_pfc_pause_en_cfg(hdev, enable_bitmap, + hdev->tm_info.hw_pfc_map); +} + +static int hclge_mac_pause_setup_hw(struct hclge_dev *hdev) +{ + bool tx_en, rx_en; + + switch (hdev->tm_info.fc_mode) { + case HCLGE_FC_NONE: + tx_en = false; + rx_en = false; + break; + case HCLGE_FC_RX_PAUSE: + tx_en = false; + rx_en = true; + break; + case HCLGE_FC_TX_PAUSE: + tx_en = true; + rx_en = false; + break; + case HCLGE_FC_FULL: + tx_en = true; + rx_en = true; + break; + default: + tx_en = true; + rx_en = true; + } + + return hclge_mac_pause_en_cfg(hdev, tx_en, rx_en); +} + int hclge_pause_setup_hw(struct hclge_dev *hdev) { - bool en = hdev->tm_info.fc_mode != HCLGE_FC_PFC; int ret; u8 i; - ret = hclge_mac_pause_en_cfg(hdev, en, en); - if (ret) - return ret; + if (hdev->tm_info.fc_mode != HCLGE_FC_PFC) + return hclge_mac_pause_setup_hw(hdev); - /* Only DCB-supported dev supports qset back pressure setting */ + /* Only DCB-supported dev supports qset back pressure and pfc cmd */ if (!hnae3_dev_dcb_supported(hdev)) return 0; + /* When MAC is GE Mode, hdev does not support pfc setting */ + ret = hclge_pfc_setup_hw(hdev); + if (ret) + dev_warn(&hdev->pdev->dev, "set pfc pause failed:%d\n", ret); + for (i = 0; i < hdev->tm_info.num_tc; i++) { ret = hclge_tm_qs_bp_cfg(hdev, i); if (ret) diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.h index 85158b0d73fe..8ecd83c50f47 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.h +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.h @@ -94,6 +94,11 @@ struct hclge_bp_to_qs_map_cmd { u32 rsvd1; }; +struct hclge_pfc_en_cmd { + u8 tx_rx_en_bitmap; + u8 pri_en_bitmap; +}; + #define hclge_tm_set_field(dest, string, val) \ hnae_set_field((dest), (HCLGE_TM_SHAP_##string##_MSK), \ (HCLGE_TM_SHAP_##string##_LSH), val) -- cgit v1.2.3 From 0a5677d39ef12739c9c10ef6e8e5f4b0805bfe71 Mon Sep 17 00:00:00 2001 From: Yunsheng Lin Date: Wed, 27 Sep 2017 09:45:26 +0800 Subject: net: hns3: Add support for port shaper setting in TM module This patch add a tm_port_shaper cmd and set port shaper to HCLGE_ETHER_MAX_RATE on TM initialization process. Signed-off-by: Yunsheng Lin Signed-off-by: David S. Miller --- .../net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c | 32 ++++++++++++++++++++++ .../net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.h | 4 +++ 2 files changed, 36 insertions(+) diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c index 0b4b5d9b0798..f79cebd7c95b 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c @@ -301,6 +301,34 @@ static int hclge_tm_pg_shapping_cfg(struct hclge_dev *hdev, return hclge_cmd_send(&hdev->hw, &desc, 1); } +static int hclge_tm_port_shaper_cfg(struct hclge_dev *hdev) +{ + struct hclge_port_shapping_cmd *shap_cfg_cmd; + struct hclge_desc desc; + u32 shapping_para = 0; + u8 ir_u, ir_b, ir_s; + int ret; + + ret = hclge_shaper_para_calc(HCLGE_ETHER_MAX_RATE, + HCLGE_SHAPER_LVL_PORT, + &ir_b, &ir_u, &ir_s); + if (ret) + return ret; + + hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_TM_PORT_SHAPPING, false); + shap_cfg_cmd = (struct hclge_port_shapping_cmd *)desc.data; + + hclge_tm_set_field(shapping_para, IR_B, ir_b); + hclge_tm_set_field(shapping_para, IR_U, ir_u); + hclge_tm_set_field(shapping_para, IR_S, ir_s); + hclge_tm_set_field(shapping_para, BS_B, HCLGE_SHAPER_BS_U_DEF); + hclge_tm_set_field(shapping_para, BS_S, HCLGE_SHAPER_BS_S_DEF); + + shap_cfg_cmd->port_shapping_para = cpu_to_le32(shapping_para); + + return hclge_cmd_send(&hdev->hw, &desc, 1); +} + static int hclge_tm_pri_shapping_cfg(struct hclge_dev *hdev, enum hclge_shap_bucket bucket, u8 pri_id, u8 ir_b, u8 ir_u, u8 ir_s, @@ -864,6 +892,10 @@ static int hclge_tm_shaper_cfg(struct hclge_dev *hdev) { int ret; + ret = hclge_tm_port_shaper_cfg(hdev); + if (ret) + return ret; + ret = hclge_tm_pg_shaper_cfg(hdev); if (ret) return ret; diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.h index 8ecd83c50f47..19a01e41c8b0 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.h +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.h @@ -99,6 +99,10 @@ struct hclge_pfc_en_cmd { u8 pri_en_bitmap; }; +struct hclge_port_shapping_cmd { + __le32 port_shapping_para; +}; + #define hclge_tm_set_field(dest, string, val) \ hnae_set_field((dest), (HCLGE_TM_SHAP_##string##_MSK), \ (HCLGE_TM_SHAP_##string##_LSH), val) -- cgit v1.2.3 From cc9bb43ab394f14096a55ee6101af0e804c05f0f Mon Sep 17 00:00:00 2001 From: Yunsheng Lin Date: Wed, 27 Sep 2017 09:45:27 +0800 Subject: net: hns3: Add tc-based TM support for sriov enabled port When sriov is enabled and TM is in tc-based mode, vf's TM parameters is not set in TM initialization process. This patch add the tc_based TM support for sriov enabled using the information in vport struct. Signed-off-by: Yunsheng Lin Signed-off-by: David S. Miller --- .../net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c | 49 ++++++++++++++-------- 1 file changed, 31 insertions(+), 18 deletions(-) diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c index f79cebd7c95b..ea94d23a79f7 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c @@ -388,13 +388,13 @@ static int hclge_tm_pri_schd_mode_cfg(struct hclge_dev *hdev, u8 pri_id) return hclge_cmd_send(&hdev->hw, &desc, 1); } -static int hclge_tm_qs_schd_mode_cfg(struct hclge_dev *hdev, u16 qs_id) +static int hclge_tm_qs_schd_mode_cfg(struct hclge_dev *hdev, u16 qs_id, u8 mode) { struct hclge_desc desc; hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_TM_QS_SCH_MODE_CFG, false); - if (hdev->tm_info.tc_info[qs_id].tc_sch_mode == HCLGE_SCH_MODE_DWRR) + if (mode == HCLGE_SCH_MODE_DWRR) desc.data[1] = cpu_to_le32(HCLGE_TM_TX_SCHD_DWRR_MSK); else desc.data[1] = 0; @@ -638,17 +638,18 @@ static int hclge_tm_pri_q_qs_cfg(struct hclge_dev *hdev) { struct hclge_vport *vport = hdev->vport; int ret; - u32 i; + u32 i, k; if (hdev->tx_sch_mode == HCLGE_FLAG_TC_BASE_SCH_MODE) { /* Cfg qs -> pri mapping, one by one mapping */ - for (i = 0; i < hdev->tm_info.num_tc; i++) { - ret = hclge_tm_qs_to_pri_map_cfg(hdev, i, i); - if (ret) - return ret; - } + for (k = 0; k < hdev->num_alloc_vport; k++) + for (i = 0; i < hdev->tm_info.num_tc; i++) { + ret = hclge_tm_qs_to_pri_map_cfg( + hdev, vport[k].qs_offset + i, i); + if (ret) + return ret; + } } else if (hdev->tx_sch_mode == HCLGE_FLAG_VNET_BASE_SCH_MODE) { - int k; /* Cfg qs -> pri mapping, qs = tc, pri = vf, 8 qs -> 1 pri */ for (k = 0; k < hdev->num_alloc_vport; k++) for (i = 0; i < HNAE3_MAX_TC; i++) { @@ -797,10 +798,11 @@ static int hclge_tm_pri_shaper_cfg(struct hclge_dev *hdev) static int hclge_tm_pri_tc_base_dwrr_cfg(struct hclge_dev *hdev) { + struct hclge_vport *vport = hdev->vport; struct hclge_pg_info *pg_info; u8 dwrr; int ret; - u32 i; + u32 i, k; for (i = 0; i < hdev->tm_info.num_tc; i++) { pg_info = @@ -811,9 +813,13 @@ static int hclge_tm_pri_tc_base_dwrr_cfg(struct hclge_dev *hdev) if (ret) return ret; - ret = hclge_tm_qs_weight_cfg(hdev, i, dwrr); - if (ret) - return ret; + for (k = 0; k < hdev->num_alloc_vport; k++) { + ret = hclge_tm_qs_weight_cfg( + hdev, vport[k].qs_offset + i, + vport[k].dwrr); + if (ret) + return ret; + } } return 0; @@ -944,7 +950,10 @@ static int hclge_tm_schd_mode_vnet_base_cfg(struct hclge_vport *vport) return ret; for (i = 0; i < kinfo->num_tc; i++) { - ret = hclge_tm_qs_schd_mode_cfg(hdev, vport->qs_offset + i); + u8 sch_mode = hdev->tm_info.tc_info[i].tc_sch_mode; + + ret = hclge_tm_qs_schd_mode_cfg(hdev, vport->qs_offset + i, + sch_mode); if (ret) return ret; } @@ -956,7 +965,7 @@ static int hclge_tm_lvl34_schd_mode_cfg(struct hclge_dev *hdev) { struct hclge_vport *vport = hdev->vport; int ret; - u8 i; + u8 i, k; if (hdev->tx_sch_mode == HCLGE_FLAG_TC_BASE_SCH_MODE) { for (i = 0; i < hdev->tm_info.num_tc; i++) { @@ -964,9 +973,13 @@ static int hclge_tm_lvl34_schd_mode_cfg(struct hclge_dev *hdev) if (ret) return ret; - ret = hclge_tm_qs_schd_mode_cfg(hdev, i); - if (ret) - return ret; + for (k = 0; k < hdev->num_alloc_vport; k++) { + ret = hclge_tm_qs_schd_mode_cfg( + hdev, vport[k].qs_offset + i, + HCLGE_SCH_MODE_DWRR); + if (ret) + return ret; + } } } else { for (i = 0; i < hdev->num_alloc_vport; i++) { -- cgit v1.2.3 From 77f255c1c695c72acb1d1c47d30323a273774ae6 Mon Sep 17 00:00:00 2001 From: Yunsheng Lin Date: Wed, 27 Sep 2017 09:45:28 +0800 Subject: net: hns3: Add some interface for the support of DCB feature This patch add some interface and export some interface from hclge_tm and hclgc_main to support the upcoming DCB feature. Signed-off-by: Yunsheng Lin Signed-off-by: David S. Miller --- .../ethernet/hisilicon/hns3/hns3pf/hclge_main.c | 3 +- .../ethernet/hisilicon/hns3/hns3pf/hclge_main.h | 3 ++ .../net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c | 48 ++++++++++++++++++++-- .../net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.h | 6 +++ 4 files changed, 55 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c index 61632feb8c4e..644f7ff54081 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c @@ -30,7 +30,6 @@ #define HCLGE_64BIT_STATS_FIELD_OFF(f) (offsetof(struct hclge_64_bit_stats, f)) #define HCLGE_32BIT_STATS_FIELD_OFF(f) (offsetof(struct hclge_32_bit_stats, f)) -static int hclge_rss_init_hw(struct hclge_dev *hdev); static int hclge_set_mta_filter_mode(struct hclge_dev *hdev, enum hclge_mta_dmac_sel_type mta_mac_sel, bool enable); @@ -2655,7 +2654,7 @@ static int hclge_get_tc_size(struct hnae3_handle *handle) return hdev->rss_size_max; } -static int hclge_rss_init_hw(struct hclge_dev *hdev) +int hclge_rss_init_hw(struct hclge_dev *hdev) { const u8 hfunc = HCLGE_RSS_HASH_ALGO_TOEPLITZ; struct hclge_vport *vport = hdev->vport; diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h index 4fc36f04c971..394b58788065 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h @@ -515,4 +515,7 @@ static inline int hclge_get_queue_id(struct hnae3_queue *queue) int hclge_cfg_mac_speed_dup(struct hclge_dev *hdev, int speed, u8 duplex); int hclge_set_vf_vlan_common(struct hclge_dev *vport, int vfid, bool is_kill, u16 vlan, u8 qos, __be16 proto); + +int hclge_buffer_alloc(struct hclge_dev *hdev); +int hclge_rss_init_hw(struct hclge_dev *hdev); #endif diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c index ea94d23a79f7..8295684da5ba 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c @@ -883,10 +883,14 @@ static int hclge_tm_pri_dwrr_cfg(struct hclge_dev *hdev) return 0; } -static int hclge_tm_map_cfg(struct hclge_dev *hdev) +int hclge_tm_map_cfg(struct hclge_dev *hdev) { int ret; + ret = hclge_up_to_tc_map(hdev); + if (ret) + return ret; + ret = hclge_tm_pg_to_pri_map(hdev); if (ret) return ret; @@ -994,7 +998,7 @@ static int hclge_tm_lvl34_schd_mode_cfg(struct hclge_dev *hdev) return 0; } -static int hclge_tm_schd_mode_hw(struct hclge_dev *hdev) +int hclge_tm_schd_mode_hw(struct hclge_dev *hdev) { int ret; @@ -1092,7 +1096,45 @@ int hclge_pause_setup_hw(struct hclge_dev *hdev) return ret; } - return hclge_up_to_tc_map(hdev); + return 0; +} + +int hclge_tm_prio_tc_info_update(struct hclge_dev *hdev, u8 *prio_tc) +{ + struct hclge_vport *vport = hdev->vport; + struct hnae3_knic_private_info *kinfo; + u32 i, k; + + for (i = 0; i < HNAE3_MAX_USER_PRIO; i++) { + if (prio_tc[i] >= hdev->tm_info.num_tc) + return -EINVAL; + hdev->tm_info.prio_tc[i] = prio_tc[i]; + + for (k = 0; k < hdev->num_alloc_vport; k++) { + kinfo = &vport[k].nic.kinfo; + kinfo->prio_tc[i] = prio_tc[i]; + } + } + return 0; +} + +void hclge_tm_schd_info_update(struct hclge_dev *hdev, u8 num_tc) +{ + u8 i, bit_map = 0; + + hdev->tm_info.num_tc = num_tc; + + for (i = 0; i < hdev->tm_info.num_tc; i++) + bit_map |= BIT(i); + + if (!bit_map) { + bit_map = 1; + hdev->tm_info.num_tc = 1; + } + + hdev->hw_tc_map = bit_map; + + hclge_tm_schd_info_init(hdev); } int hclge_tm_init_hw(struct hclge_dev *hdev) diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.h index 19a01e41c8b0..bf59961918ab 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.h +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.h @@ -112,4 +112,10 @@ struct hclge_port_shapping_cmd { int hclge_tm_schd_init(struct hclge_dev *hdev); int hclge_pause_setup_hw(struct hclge_dev *hdev); +int hclge_tm_schd_mode_hw(struct hclge_dev *hdev); +int hclge_tm_prio_tc_info_update(struct hclge_dev *hdev, u8 *prio_tc); +void hclge_tm_schd_info_update(struct hclge_dev *hdev, u8 num_tc); +int hclge_tm_dwrr_cfg(struct hclge_dev *hdev); +int hclge_tm_map_cfg(struct hclge_dev *hdev); +int hclge_tm_init_hw(struct hclge_dev *hdev); #endif -- cgit v1.2.3 From cacde272dd00496c2c1c36606a56b340cd967603 Mon Sep 17 00:00:00 2001 From: Yunsheng Lin Date: Wed, 27 Sep 2017 09:45:29 +0800 Subject: net: hns3: Add hclge_dcb module for the support of DCB feature The hclge_dcb module calls the interface from hclge_main/tm and provide interface for the dcb netlink interface. This patch also update Makefiles required to build the DCB supported code in HNS3 Ethernet driver and update the existing Kconfig file in the hisilicon folder. Signed-off-by: Yunsheng Lin Signed-off-by: David S. Miller --- drivers/net/ethernet/hisilicon/Kconfig | 9 + drivers/net/ethernet/hisilicon/hns3/hnae3.h | 17 ++ .../net/ethernet/hisilicon/hns3/hns3pf/Makefile | 2 + .../net/ethernet/hisilicon/hns3/hns3pf/hclge_dcb.c | 304 +++++++++++++++++++++ .../net/ethernet/hisilicon/hns3/hns3pf/hclge_dcb.h | 21 ++ .../ethernet/hisilicon/hns3/hns3pf/hclge_main.c | 25 +- .../ethernet/hisilicon/hns3/hns3pf/hclge_main.h | 3 + 7 files changed, 375 insertions(+), 6 deletions(-) create mode 100644 drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_dcb.c create mode 100644 drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_dcb.h diff --git a/drivers/net/ethernet/hisilicon/Kconfig b/drivers/net/ethernet/hisilicon/Kconfig index 91c7bdb9b43c..9d7cb0387bf7 100644 --- a/drivers/net/ethernet/hisilicon/Kconfig +++ b/drivers/net/ethernet/hisilicon/Kconfig @@ -103,4 +103,13 @@ config HNS3_ENET family of SoCs. This module depends upon HNAE3 driver to access the HNAE3 devices and their associated operations. +config HNS3_DCB + bool "Hisilicon HNS3 Data Center Bridge Support" + default n + depends on HNS3 && HNS3_HCLGE && DCB + ---help--- + Say Y here if you want to use Data Center Bridging (DCB) in the HNS3 driver. + + If unsure, say N. + endif # NET_VENDOR_HISILICON diff --git a/drivers/net/ethernet/hisilicon/hns3/hnae3.h b/drivers/net/ethernet/hisilicon/hns3/hnae3.h index 1a01cadfe5f3..c677530841cf 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hnae3.h +++ b/drivers/net/ethernet/hisilicon/hns3/hnae3.h @@ -28,6 +28,7 @@ */ #include +#include #include #include #include @@ -131,6 +132,7 @@ struct hnae3_client_ops { int (*init_instance)(struct hnae3_handle *handle); void (*uninit_instance)(struct hnae3_handle *handle, bool reset); void (*link_status_change)(struct hnae3_handle *handle, bool state); + int (*setup_tc)(struct hnae3_handle *handle, u8 tc); }; #define HNAE3_CLIENT_NAME_LENGTH 16 @@ -363,6 +365,20 @@ struct hnae3_ae_ops { u16 vlan, u8 qos, __be16 proto); }; +struct hnae3_dcb_ops { + /* IEEE 802.1Qaz std */ + int (*ieee_getets)(struct hnae3_handle *, struct ieee_ets *); + int (*ieee_setets)(struct hnae3_handle *, struct ieee_ets *); + int (*ieee_getpfc)(struct hnae3_handle *, struct ieee_pfc *); + int (*ieee_setpfc)(struct hnae3_handle *, struct ieee_pfc *); + + /* DCBX configuration */ + u8 (*getdcbx)(struct hnae3_handle *); + u8 (*setdcbx)(struct hnae3_handle *, u8); + + int (*map_update)(struct hnae3_handle *); +}; + struct hnae3_ae_algo { const struct hnae3_ae_ops *ops; struct list_head node; @@ -394,6 +410,7 @@ struct hnae3_knic_private_info { u16 num_tqps; /* total number of TQPs in this handle */ struct hnae3_queue **tqp; /* array base of all TQPs in this instance */ + const struct hnae3_dcb_ops *dcb_ops; }; struct hnae3_roce_private_info { diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/Makefile b/drivers/net/ethernet/hisilicon/hns3/hns3pf/Makefile index 162e8a42acd0..7023dc878086 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/Makefile +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/Makefile @@ -7,5 +7,7 @@ ccflags-y := -Idrivers/net/ethernet/hisilicon/hns3 obj-$(CONFIG_HNS3_HCLGE) += hclge.o hclge-objs = hclge_main.o hclge_cmd.o hclge_mdio.o hclge_tm.o +hclge-$(CONFIG_HNS3_DCB) += hclge_dcb.o + obj-$(CONFIG_HNS3_ENET) += hns3.o hns3-objs = hns3_enet.o hns3_ethtool.o diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_dcb.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_dcb.c new file mode 100644 index 000000000000..1b30a6f966d8 --- /dev/null +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_dcb.c @@ -0,0 +1,304 @@ +/* + * Copyright (c) 2016-2017 Hisilicon Limited. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + */ + +#include "hclge_main.h" +#include "hclge_tm.h" +#include "hnae3.h" + +#define BW_PERCENT 100 + +static int hclge_ieee_ets_to_tm_info(struct hclge_dev *hdev, + struct ieee_ets *ets) +{ + u8 i; + + for (i = 0; i < HNAE3_MAX_TC; i++) { + switch (ets->tc_tsa[i]) { + case IEEE_8021QAZ_TSA_STRICT: + hdev->tm_info.tc_info[i].tc_sch_mode = + HCLGE_SCH_MODE_SP; + hdev->tm_info.pg_info[0].tc_dwrr[i] = 0; + break; + case IEEE_8021QAZ_TSA_ETS: + hdev->tm_info.tc_info[i].tc_sch_mode = + HCLGE_SCH_MODE_DWRR; + hdev->tm_info.pg_info[0].tc_dwrr[i] = + ets->tc_tx_bw[i]; + break; + default: + /* Hardware only supports SP (strict priority) + * or ETS (enhanced transmission selection) + * algorithms, if we receive some other value + * from dcbnl, then throw an error. + */ + return -EINVAL; + } + } + + return hclge_tm_prio_tc_info_update(hdev, ets->prio_tc); +} + +static void hclge_tm_info_to_ieee_ets(struct hclge_dev *hdev, + struct ieee_ets *ets) +{ + u32 i; + + memset(ets, 0, sizeof(*ets)); + ets->willing = 1; + ets->ets_cap = hdev->tc_max; + + for (i = 0; i < HNAE3_MAX_TC; i++) { + ets->prio_tc[i] = hdev->tm_info.prio_tc[i]; + ets->tc_tx_bw[i] = hdev->tm_info.pg_info[0].tc_dwrr[i]; + + if (hdev->tm_info.tc_info[i].tc_sch_mode == + HCLGE_SCH_MODE_SP) + ets->tc_tsa[i] = IEEE_8021QAZ_TSA_STRICT; + else + ets->tc_tsa[i] = IEEE_8021QAZ_TSA_ETS; + } +} + +/* IEEE std */ +static int hclge_ieee_getets(struct hnae3_handle *h, struct ieee_ets *ets) +{ + struct hclge_vport *vport = hclge_get_vport(h); + struct hclge_dev *hdev = vport->back; + + hclge_tm_info_to_ieee_ets(hdev, ets); + + return 0; +} + +static int hclge_ets_validate(struct hclge_dev *hdev, struct ieee_ets *ets, + u8 *tc, bool *changed) +{ + u32 total_ets_bw = 0; + u8 max_tc = 0; + u8 i; + + for (i = 0; i < HNAE3_MAX_TC; i++) { + if (ets->prio_tc[i] >= hdev->tc_max || + i >= hdev->tc_max) + return -EINVAL; + + if (ets->prio_tc[i] != hdev->tm_info.prio_tc[i]) + *changed = true; + + if (ets->prio_tc[i] > max_tc) + max_tc = ets->prio_tc[i]; + + switch (ets->tc_tsa[i]) { + case IEEE_8021QAZ_TSA_STRICT: + if (hdev->tm_info.tc_info[i].tc_sch_mode != + HCLGE_SCH_MODE_SP) + *changed = true; + break; + case IEEE_8021QAZ_TSA_ETS: + if (hdev->tm_info.tc_info[i].tc_sch_mode != + HCLGE_SCH_MODE_DWRR) + *changed = true; + + total_ets_bw += ets->tc_tx_bw[i]; + break; + default: + return -EINVAL; + } + } + + if (total_ets_bw != BW_PERCENT) + return -EINVAL; + + *tc = max_tc + 1; + if (*tc != hdev->tm_info.num_tc) + *changed = true; + + return 0; +} + +static int hclge_map_update(struct hnae3_handle *h) +{ + struct hclge_vport *vport = hclge_get_vport(h); + struct hclge_dev *hdev = vport->back; + int ret; + + ret = hclge_tm_map_cfg(hdev); + if (ret) + return ret; + + ret = hclge_tm_schd_mode_hw(hdev); + if (ret) + return ret; + + ret = hclge_pause_setup_hw(hdev); + if (ret) + return ret; + + ret = hclge_buffer_alloc(hdev); + if (ret) + return ret; + + return hclge_rss_init_hw(hdev); +} + +static int hclge_client_setup_tc(struct hclge_dev *hdev) +{ + struct hclge_vport *vport = hdev->vport; + struct hnae3_client *client; + struct hnae3_handle *handle; + int ret; + u32 i; + + for (i = 0; i < hdev->num_vmdq_vport + 1; i++) { + handle = &vport[i].nic; + client = handle->client; + + if (!client || !client->ops || !client->ops->setup_tc) + continue; + + ret = client->ops->setup_tc(handle, hdev->tm_info.num_tc); + if (ret) + return ret; + } + + return 0; +} + +static int hclge_ieee_setets(struct hnae3_handle *h, struct ieee_ets *ets) +{ + struct hclge_vport *vport = hclge_get_vport(h); + struct hclge_dev *hdev = vport->back; + bool map_changed = false; + u8 num_tc = 0; + int ret; + + if (!(hdev->dcbx_cap & DCB_CAP_DCBX_VER_IEEE)) + return -EINVAL; + + ret = hclge_ets_validate(hdev, ets, &num_tc, &map_changed); + if (ret) + return ret; + + hclge_tm_schd_info_update(hdev, num_tc); + + ret = hclge_ieee_ets_to_tm_info(hdev, ets); + if (ret) + return ret; + + if (map_changed) { + ret = hclge_client_setup_tc(hdev); + if (ret) + return ret; + } + + return hclge_tm_dwrr_cfg(hdev); +} + +static int hclge_ieee_getpfc(struct hnae3_handle *h, struct ieee_pfc *pfc) +{ + struct hclge_vport *vport = hclge_get_vport(h); + struct hclge_dev *hdev = vport->back; + u8 i, j, pfc_map, *prio_tc; + + memset(pfc, 0, sizeof(*pfc)); + pfc->pfc_cap = hdev->pfc_max; + prio_tc = hdev->tm_info.prio_tc; + pfc_map = hdev->tm_info.hw_pfc_map; + + /* Pfc setting is based on TC */ + for (i = 0; i < hdev->tm_info.num_tc; i++) { + for (j = 0; j < HNAE3_MAX_USER_PRIO; j++) { + if ((prio_tc[j] == i) && (pfc_map & BIT(i))) + pfc->pfc_en |= BIT(j); + } + } + + return 0; +} + +static int hclge_ieee_setpfc(struct hnae3_handle *h, struct ieee_pfc *pfc) +{ + struct hclge_vport *vport = hclge_get_vport(h); + struct hclge_dev *hdev = vport->back; + u8 i, j, pfc_map, *prio_tc; + + if (!(hdev->dcbx_cap & DCB_CAP_DCBX_VER_IEEE)) + return -EINVAL; + + prio_tc = hdev->tm_info.prio_tc; + pfc_map = 0; + + for (i = 0; i < hdev->tm_info.num_tc; i++) { + for (j = 0; j < HNAE3_MAX_USER_PRIO; j++) { + if ((prio_tc[j] == i) && (pfc->pfc_en & BIT(j))) { + pfc_map |= BIT(i); + break; + } + } + } + + if (pfc_map == hdev->tm_info.hw_pfc_map) + return 0; + + hdev->tm_info.hw_pfc_map = pfc_map; + + return hclge_pause_setup_hw(hdev); +} + +/* DCBX configuration */ +static u8 hclge_getdcbx(struct hnae3_handle *h) +{ + struct hclge_vport *vport = hclge_get_vport(h); + struct hclge_dev *hdev = vport->back; + + return hdev->dcbx_cap; +} + +static u8 hclge_setdcbx(struct hnae3_handle *h, u8 mode) +{ + struct hclge_vport *vport = hclge_get_vport(h); + struct hclge_dev *hdev = vport->back; + + /* No support for LLD_MANAGED modes or CEE */ + if ((mode & DCB_CAP_DCBX_LLD_MANAGED) || + (mode & DCB_CAP_DCBX_VER_CEE) || + !(mode & DCB_CAP_DCBX_HOST)) + return 1; + + hdev->dcbx_cap = mode; + + return 0; +} + +static const struct hnae3_dcb_ops hns3_dcb_ops = { + .ieee_getets = hclge_ieee_getets, + .ieee_setets = hclge_ieee_setets, + .ieee_getpfc = hclge_ieee_getpfc, + .ieee_setpfc = hclge_ieee_setpfc, + .getdcbx = hclge_getdcbx, + .setdcbx = hclge_setdcbx, + .map_update = hclge_map_update, +}; + +void hclge_dcb_ops_set(struct hclge_dev *hdev) +{ + struct hclge_vport *vport = hdev->vport; + struct hnae3_knic_private_info *kinfo; + + /* Hdev does not support DCB or vport is + * not a pf, then dcb_ops is not set. + */ + if (!hnae3_dev_dcb_supported(hdev) || + vport->vport_id != 0) + return; + + kinfo = &vport->nic.kinfo; + kinfo->dcb_ops = &hns3_dcb_ops; + hdev->dcbx_cap = DCB_CAP_DCBX_VER_IEEE | DCB_CAP_DCBX_HOST; +} diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_dcb.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_dcb.h new file mode 100644 index 000000000000..7d808ee96694 --- /dev/null +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_dcb.h @@ -0,0 +1,21 @@ +/* + * Copyright (c) 2016~2017 Hisilicon Limited. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + */ + +#ifndef __HCLGE_DCB_H__ +#define __HCLGE_DCB_H__ + +#include "hclge_main.h" + +#ifdef CONFIG_HNS3_DCB +void hclge_dcb_ops_set(struct hclge_dev *hdev); +#else +static inline void hclge_dcb_ops_set(struct hclge_dev *hdev) {} +#endif + +#endif /* __HCLGE_DCB_H__ */ diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c index 644f7ff54081..dd220eab7f53 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c @@ -19,6 +19,7 @@ #include #include "hclge_cmd.h" +#include "hclge_dcb.h" #include "hclge_main.h" #include "hclge_mdio.h" #include "hclge_tm.h" @@ -1057,7 +1058,7 @@ static int hclge_configure(struct hclge_dev *hdev) hdev->hw.mac.phy_addr = cfg.phy_addr; hdev->num_desc = cfg.tqp_desc_num; hdev->tm_info.num_pg = 1; - hdev->tm_info.num_tc = cfg.tc_num; + hdev->tc_max = cfg.tc_num; hdev->tm_info.hw_pfc_map = 0; ret = hclge_parse_speed(cfg.default_speed, &hdev->hw.mac.speed); @@ -1066,15 +1067,25 @@ static int hclge_configure(struct hclge_dev *hdev) return ret; } - if ((hdev->tm_info.num_tc > HNAE3_MAX_TC) || - (hdev->tm_info.num_tc < 1)) { + if ((hdev->tc_max > HNAE3_MAX_TC) || + (hdev->tc_max < 1)) { dev_warn(&hdev->pdev->dev, "TC num = %d.\n", - hdev->tm_info.num_tc); - hdev->tm_info.num_tc = 1; + hdev->tc_max); + hdev->tc_max = 1; } + /* Dev does not support DCB */ + if (!hnae3_dev_dcb_supported(hdev)) { + hdev->tc_max = 1; + hdev->pfc_max = 0; + } else { + hdev->pfc_max = hdev->tc_max; + } + + hdev->tm_info.num_tc = hdev->tc_max; + /* Currently not support uncontiuous tc */ - for (i = 0; i < cfg.tc_num; i++) + for (i = 0; i < hdev->tm_info.num_tc; i++) hnae_set_bit(hdev->hw_tc_map, i, 1); if (!hdev->num_vmdq_vport && !hdev->num_req_vfs) @@ -4238,6 +4249,8 @@ static int hclge_init_ae_dev(struct hnae3_ae_dev *ae_dev) return ret; } + hclge_dcb_ops_set(hdev); + setup_timer(&hdev->service_timer, hclge_service_timer, (unsigned long)hdev); INIT_WORK(&hdev->service_task, hclge_service_task); diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h index 394b58788065..7c66c00e8a3e 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h @@ -421,8 +421,11 @@ struct hclge_dev { #define HCLGE_FLAG_TC_BASE_SCH_MODE 1 #define HCLGE_FLAG_VNET_BASE_SCH_MODE 2 u8 tx_sch_mode; + u8 tc_max; + u8 pfc_max; u8 default_up; + u8 dcbx_cap; struct hclge_tm_info tm_info; u16 num_msi; -- cgit v1.2.3 From 986743dbf0a70211bba594b5abee33b6661feaa9 Mon Sep 17 00:00:00 2001 From: Yunsheng Lin Date: Wed, 27 Sep 2017 09:45:30 +0800 Subject: net: hns3: Add dcb netlink interface for the support of DCB feature This patch add dcb netlink interface by calling the interface from hclge_dcb module. This patch also update Makefile in order to build hns3_dcbnl module. Signed-off-by: Yunsheng Lin Signed-off-by: David S. Miller --- .../net/ethernet/hisilicon/hns3/hns3pf/Makefile | 2 + .../ethernet/hisilicon/hns3/hns3pf/hns3_dcbnl.c | 106 +++++++++++++++++++++ .../net/ethernet/hisilicon/hns3/hns3pf/hns3_enet.c | 2 + .../net/ethernet/hisilicon/hns3/hns3pf/hns3_enet.h | 7 ++ 4 files changed, 117 insertions(+) create mode 100644 drivers/net/ethernet/hisilicon/hns3/hns3pf/hns3_dcbnl.c diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/Makefile b/drivers/net/ethernet/hisilicon/hns3/hns3pf/Makefile index 7023dc878086..d2b20d01a58c 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/Makefile +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/Makefile @@ -11,3 +11,5 @@ hclge-$(CONFIG_HNS3_DCB) += hclge_dcb.o obj-$(CONFIG_HNS3_ENET) += hns3.o hns3-objs = hns3_enet.o hns3_ethtool.o + +hns3-$(CONFIG_HNS3_DCB) += hns3_dcbnl.o diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hns3_dcbnl.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hns3_dcbnl.c new file mode 100644 index 000000000000..9832172bfb08 --- /dev/null +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hns3_dcbnl.c @@ -0,0 +1,106 @@ +/* + * Copyright (c) 2016-2017 Hisilicon Limited. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + */ + +#include "hnae3.h" +#include "hns3_enet.h" + +static +int hns3_dcbnl_ieee_getets(struct net_device *ndev, struct ieee_ets *ets) +{ + struct hns3_nic_priv *priv = netdev_priv(ndev); + struct hnae3_handle *h = priv->ae_handle; + + if (h->kinfo.dcb_ops->ieee_getets) + return h->kinfo.dcb_ops->ieee_getets(h, ets); + + return -EOPNOTSUPP; +} + +static +int hns3_dcbnl_ieee_setets(struct net_device *ndev, struct ieee_ets *ets) +{ + struct hns3_nic_priv *priv = netdev_priv(ndev); + struct hnae3_handle *h = priv->ae_handle; + + if (h->kinfo.dcb_ops->ieee_setets) + return h->kinfo.dcb_ops->ieee_setets(h, ets); + + return -EOPNOTSUPP; +} + +static +int hns3_dcbnl_ieee_getpfc(struct net_device *ndev, struct ieee_pfc *pfc) +{ + struct hns3_nic_priv *priv = netdev_priv(ndev); + struct hnae3_handle *h = priv->ae_handle; + + if (h->kinfo.dcb_ops->ieee_getpfc) + return h->kinfo.dcb_ops->ieee_getpfc(h, pfc); + + return -EOPNOTSUPP; +} + +static +int hns3_dcbnl_ieee_setpfc(struct net_device *ndev, struct ieee_pfc *pfc) +{ + struct hns3_nic_priv *priv = netdev_priv(ndev); + struct hnae3_handle *h = priv->ae_handle; + + if (h->kinfo.dcb_ops->ieee_setpfc) + return h->kinfo.dcb_ops->ieee_setpfc(h, pfc); + + return -EOPNOTSUPP; +} + +/* DCBX configuration */ +static u8 hns3_dcbnl_getdcbx(struct net_device *ndev) +{ + struct hns3_nic_priv *priv = netdev_priv(ndev); + struct hnae3_handle *h = priv->ae_handle; + + if (h->kinfo.dcb_ops->getdcbx) + return h->kinfo.dcb_ops->getdcbx(h); + + return 0; +} + +/* return 0 if successful, otherwise fail */ +static u8 hns3_dcbnl_setdcbx(struct net_device *ndev, u8 mode) +{ + struct hns3_nic_priv *priv = netdev_priv(ndev); + struct hnae3_handle *h = priv->ae_handle; + + if (h->kinfo.dcb_ops->setdcbx) + return h->kinfo.dcb_ops->setdcbx(h, mode); + + return 1; +} + +static const struct dcbnl_rtnl_ops hns3_dcbnl_ops = { + .ieee_getets = hns3_dcbnl_ieee_getets, + .ieee_setets = hns3_dcbnl_ieee_setets, + .ieee_getpfc = hns3_dcbnl_ieee_getpfc, + .ieee_setpfc = hns3_dcbnl_ieee_setpfc, + .getdcbx = hns3_dcbnl_getdcbx, + .setdcbx = hns3_dcbnl_setdcbx, +}; + +/* hclge_dcbnl_setup - DCBNL setup + * @handle: the corresponding vport handle + * Set up DCBNL + */ +void hns3_dcbnl_setup(struct hnae3_handle *handle) +{ + struct net_device *dev = handle->kinfo.netdev; + + if (!handle->kinfo.dcb_ops) + return; + + dev->dcbnl_ops = &hns3_dcbnl_ops; +} diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hns3_enet.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hns3_enet.c index 35369e1c8036..11dab26f3543 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hns3_enet.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hns3_enet.c @@ -2790,6 +2790,8 @@ static int hns3_client_init(struct hnae3_handle *handle) goto out_reg_netdev_fail; } + hns3_dcbnl_setup(handle); + /* MTU range: (ETH_MIN_MTU(kernel default) - 9706) */ netdev->max_mtu = HNS3_MAX_MTU - (ETH_HLEN + ETH_FCS_LEN + VLAN_HLEN); diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hns3_enet.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hns3_enet.h index 7e8746189747..481eada73e2d 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hns3_enet.h +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hns3_enet.h @@ -590,4 +590,11 @@ static inline void hns3_write_reg(void __iomem *base, u32 reg, u32 value) void hns3_ethtool_set_ops(struct net_device *netdev); int hns3_clean_tx_ring(struct hns3_enet_ring *ring, int budget); + +#ifdef CONFIG_HNS3_DCB +void hns3_dcbnl_setup(struct hnae3_handle *handle); +#else +static inline void hns3_dcbnl_setup(struct hnae3_handle *handle) {} +#endif + #endif -- cgit v1.2.3 From 7979a223305016625d211dd051569933c433f81e Mon Sep 17 00:00:00 2001 From: Yunsheng Lin Date: Wed, 27 Sep 2017 09:45:31 +0800 Subject: net: hns3: Setting for fc_mode and dcb enable flag in TM module After the DCB feature is supported, fc_mode and dcb enable flag must be set according to the DCB parameter. Signed-off-by: Yunsheng Lin Signed-off-by: David S. Miller --- .../net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c | 34 +++++++++++++++++++--- 1 file changed, 30 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c index 8295684da5ba..359ee670d1e1 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c @@ -486,7 +486,11 @@ static void hclge_tm_tc_info_init(struct hclge_dev *hdev) hdev->tm_info.prio_tc[i] = (i >= hdev->tm_info.num_tc) ? 0 : i; - hdev->flag &= ~HCLGE_FLAG_DCB_ENABLE; + /* DCB is enabled if we have more than 1 TC */ + if (hdev->tm_info.num_tc > 1) + hdev->flag |= HCLGE_FLAG_DCB_ENABLE; + else + hdev->flag &= ~HCLGE_FLAG_DCB_ENABLE; } static void hclge_tm_pg_info_init(struct hclge_dev *hdev) @@ -512,6 +516,24 @@ static void hclge_tm_pg_info_init(struct hclge_dev *hdev) } } +static void hclge_pfc_info_init(struct hclge_dev *hdev) +{ + if (!(hdev->flag & HCLGE_FLAG_DCB_ENABLE)) { + if (hdev->fc_mode_last_time == HCLGE_FC_PFC) + dev_warn(&hdev->pdev->dev, + "DCB is disable, but last mode is FC_PFC\n"); + + hdev->tm_info.fc_mode = hdev->fc_mode_last_time; + } else if (hdev->tm_info.fc_mode != HCLGE_FC_PFC) { + /* fc_mode_last_time record the last fc_mode when + * DCB is enabled, so that fc_mode can be set to + * the correct value when DCB is disabled. + */ + hdev->fc_mode_last_time = hdev->tm_info.fc_mode; + hdev->tm_info.fc_mode = HCLGE_FC_PFC; + } +} + static int hclge_tm_schd_info_init(struct hclge_dev *hdev) { if ((hdev->tx_sch_mode != HCLGE_FLAG_TC_BASE_SCH_MODE) && @@ -524,8 +546,7 @@ static int hclge_tm_schd_info_init(struct hclge_dev *hdev) hclge_tm_vport_info_update(hdev); - hdev->tm_info.fc_mode = HCLGE_FC_NONE; - hdev->fc_mode_last_time = hdev->tm_info.fc_mode; + hclge_pfc_info_init(hdev); return 0; } @@ -1158,8 +1179,13 @@ int hclge_tm_init_hw(struct hclge_dev *hdev) int hclge_tm_schd_init(struct hclge_dev *hdev) { - int ret = hclge_tm_schd_info_init(hdev); + int ret; + + /* fc_mode is HCLGE_FC_FULL on reset */ + hdev->tm_info.fc_mode = HCLGE_FC_FULL; + hdev->fc_mode_last_time = hdev->tm_info.fc_mode; + ret = hclge_tm_schd_info_init(hdev); if (ret) return ret; -- cgit v1.2.3 From 9df8f79a4d2957fa3083e8fda0843d8c010351a7 Mon Sep 17 00:00:00 2001 From: Yunsheng Lin Date: Wed, 27 Sep 2017 09:45:32 +0800 Subject: net: hns3: Add DCB support when interacting with network stack When using lldptool to configure DCB parameter, hclge_dcb module call the client_ops->setup_tc to tell network stack which queue and priority is using for specific tc. Signed-off-by: Yunsheng Lin Signed-off-by: David S. Miller --- .../net/ethernet/hisilicon/hns3/hns3pf/hns3_enet.c | 102 ++++++++++++++++++--- 1 file changed, 87 insertions(+), 15 deletions(-) diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hns3_enet.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hns3_enet.c index 11dab26f3543..4a0890f98b70 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hns3_enet.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hns3_enet.c @@ -196,6 +196,32 @@ static void hns3_vector_gl_rl_init(struct hns3_enet_tqp_vector *tqp_vector) tqp_vector->tx_group.flow_level = HNS3_FLOW_LOW; } +static int hns3_nic_set_real_num_queue(struct net_device *netdev) +{ + struct hns3_nic_priv *priv = netdev_priv(netdev); + struct hnae3_handle *h = priv->ae_handle; + struct hnae3_knic_private_info *kinfo = &h->kinfo; + unsigned int queue_size = kinfo->rss_size * kinfo->num_tc; + int ret; + + ret = netif_set_real_num_tx_queues(netdev, queue_size); + if (ret) { + netdev_err(netdev, + "netif_set_real_num_tx_queues fail, ret=%d!\n", + ret); + return ret; + } + + ret = netif_set_real_num_rx_queues(netdev, queue_size); + if (ret) { + netdev_err(netdev, + "netif_set_real_num_rx_queues fail, ret=%d!\n", ret); + return ret; + } + + return 0; +} + static int hns3_nic_net_up(struct net_device *netdev) { struct hns3_nic_priv *priv = netdev_priv(netdev); @@ -232,26 +258,13 @@ out_start_err: static int hns3_nic_net_open(struct net_device *netdev) { - struct hns3_nic_priv *priv = netdev_priv(netdev); - struct hnae3_handle *h = priv->ae_handle; int ret; netif_carrier_off(netdev); - ret = netif_set_real_num_tx_queues(netdev, h->kinfo.num_tqps); - if (ret) { - netdev_err(netdev, - "netif_set_real_num_tx_queues fail, ret=%d!\n", - ret); - return ret; - } - - ret = netif_set_real_num_rx_queues(netdev, h->kinfo.num_tqps); - if (ret) { - netdev_err(netdev, - "netif_set_real_num_rx_queues fail, ret=%d!\n", ret); + ret = hns3_nic_set_real_num_queue(netdev); + if (ret) return ret; - } ret = hns3_nic_net_up(netdev); if (ret) { @@ -2848,10 +2861,69 @@ static void hns3_link_status_change(struct hnae3_handle *handle, bool linkup) } } +static int hns3_client_setup_tc(struct hnae3_handle *handle, u8 tc) +{ + struct hnae3_knic_private_info *kinfo = &handle->kinfo; + struct net_device *ndev = kinfo->netdev; + bool if_running = netif_running(ndev); + int ret; + u8 i; + + if (tc > HNAE3_MAX_TC) + return -EINVAL; + + if (!ndev) + return -ENODEV; + + ret = netdev_set_num_tc(ndev, tc); + if (ret) + return ret; + + if (if_running) { + (void)hns3_nic_net_stop(ndev); + msleep(100); + } + + ret = (kinfo->dcb_ops && kinfo->dcb_ops->map_update) ? + kinfo->dcb_ops->map_update(handle) : -EOPNOTSUPP; + if (ret) + goto err_out; + + if (tc <= 1) { + netdev_reset_tc(ndev); + goto out; + } + + for (i = 0; i < HNAE3_MAX_TC; i++) { + struct hnae3_tc_info *tc_info = &kinfo->tc_info[i]; + + if (tc_info->enable) + netdev_set_tc_queue(ndev, + tc_info->tc, + tc_info->tqp_count, + tc_info->tqp_offset); + } + + for (i = 0; i < HNAE3_MAX_USER_PRIO; i++) { + netdev_set_prio_tc_map(ndev, i, + kinfo->prio_tc[i]); + } + +out: + ret = hns3_nic_set_real_num_queue(ndev); + +err_out: + if (if_running) + (void)hns3_nic_net_open(ndev); + + return ret; +} + const struct hnae3_client_ops client_ops = { .init_instance = hns3_client_init, .uninit_instance = hns3_client_uninit, .link_status_change = hns3_link_status_change, + .setup_tc = hns3_client_setup_tc, }; /* hns3_init_module - Driver registration routine -- cgit v1.2.3 From 5af48b59f35cf712793badabe1a574a0d0ce3bd3 Mon Sep 17 00:00:00 2001 From: Nikolay Aleksandrov Date: Wed, 27 Sep 2017 16:12:44 +0300 Subject: net: bridge: add per-port group_fwd_mask with less restrictions We need to be able to transparently forward most link-local frames via tunnels (e.g. vxlan, qinq). Currently the bridge's group_fwd_mask has a mask which restricts the forwarding of STP and LACP, but we need to be able to forward these over tunnels and control that forwarding on a per-port basis thus add a new per-port group_fwd_mask option which only disallows mac pause frames to be forwarded (they're always dropped anyway). The patch does not change the current default situation - all of the others are still restricted unless configured for forwarding. We have successfully tested this patch with LACP and STP forwarding over VxLAN and qinq tunnels. Signed-off-by: Nikolay Aleksandrov Signed-off-by: David S. Miller --- include/uapi/linux/if_link.h | 1 + net/bridge/br_input.c | 1 + net/bridge/br_netlink.c | 14 +++++++++++++- net/bridge/br_private.h | 10 +++++++++- net/bridge/br_sysfs_if.c | 18 ++++++++++++++++++ 5 files changed, 42 insertions(+), 2 deletions(-) diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h index 8d062c58d5cb..ea87bd708ee9 100644 --- a/include/uapi/linux/if_link.h +++ b/include/uapi/linux/if_link.h @@ -325,6 +325,7 @@ enum { IFLA_BRPORT_MCAST_TO_UCAST, IFLA_BRPORT_VLAN_TUNNEL, IFLA_BRPORT_BCAST_FLOOD, + IFLA_BRPORT_GROUP_FWD_MASK, __IFLA_BRPORT_MAX }; #define IFLA_BRPORT_MAX (__IFLA_BRPORT_MAX - 1) diff --git a/net/bridge/br_input.c b/net/bridge/br_input.c index 7637f58c1226..7cb613776b31 100644 --- a/net/bridge/br_input.c +++ b/net/bridge/br_input.c @@ -289,6 +289,7 @@ rx_handler_result_t br_handle_frame(struct sk_buff **pskb) * * Others reserved for future standardization */ + fwd_mask |= p->group_fwd_mask; switch (dest[5]) { case 0x00: /* Bridge Group Address */ /* If STP is turned off, diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c index 3bc890716c89..dea88a255d26 100644 --- a/net/bridge/br_netlink.c +++ b/net/bridge/br_netlink.c @@ -152,6 +152,7 @@ static inline size_t br_port_info_size(void) #ifdef CONFIG_BRIDGE_IGMP_SNOOPING + nla_total_size(sizeof(u8)) /* IFLA_BRPORT_MULTICAST_ROUTER */ #endif + + nla_total_size(sizeof(u16)) /* IFLA_BRPORT_GROUP_FWD_MASK */ + 0; } @@ -208,7 +209,8 @@ static int br_port_fill_attrs(struct sk_buff *skb, p->topology_change_ack) || nla_put_u8(skb, IFLA_BRPORT_CONFIG_PENDING, p->config_pending) || nla_put_u8(skb, IFLA_BRPORT_VLAN_TUNNEL, !!(p->flags & - BR_VLAN_TUNNEL))) + BR_VLAN_TUNNEL)) || + nla_put_u16(skb, IFLA_BRPORT_GROUP_FWD_MASK, p->group_fwd_mask)) return -EMSGSIZE; timerval = br_timer_value(&p->message_age_timer); @@ -637,6 +639,7 @@ static const struct nla_policy br_port_policy[IFLA_BRPORT_MAX + 1] = { [IFLA_BRPORT_MCAST_TO_UCAST] = { .type = NLA_U8 }, [IFLA_BRPORT_MCAST_FLOOD] = { .type = NLA_U8 }, [IFLA_BRPORT_BCAST_FLOOD] = { .type = NLA_U8 }, + [IFLA_BRPORT_GROUP_FWD_MASK] = { .type = NLA_U16 }, }; /* Change the state of the port and notify spanning tree */ @@ -773,6 +776,15 @@ static int br_setport(struct net_bridge_port *p, struct nlattr *tb[]) return err; } #endif + + if (tb[IFLA_BRPORT_GROUP_FWD_MASK]) { + u16 fwd_mask = nla_get_u16(tb[IFLA_BRPORT_GROUP_FWD_MASK]); + + if (fwd_mask & BR_GROUPFWD_MACPAUSE) + return -EINVAL; + p->group_fwd_mask = fwd_mask; + } + br_port_flags_change(p, old_flags ^ p->flags); return 0; } diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h index e870cfc85b14..020c709a017f 100644 --- a/net/bridge/br_private.h +++ b/net/bridge/br_private.h @@ -36,7 +36,14 @@ /* Control of forwarding link local multicast */ #define BR_GROUPFWD_DEFAULT 0 /* Don't allow forwarding of control protocols like STP, MAC PAUSE and LACP */ -#define BR_GROUPFWD_RESTRICTED 0x0007u +enum { + BR_GROUPFWD_STP = BIT(0), + BR_GROUPFWD_MACPAUSE = BIT(1), + BR_GROUPFWD_LACP = BIT(2), +}; + +#define BR_GROUPFWD_RESTRICTED (BR_GROUPFWD_STP | BR_GROUPFWD_MACPAUSE | \ + BR_GROUPFWD_LACP) /* The Nearest Customer Bridge Group Address, 01-80-C2-00-00-[00,0B,0C,0D,0F] */ #define BR_GROUPFWD_8021AD 0xB801u @@ -268,6 +275,7 @@ struct net_bridge_port { #ifdef CONFIG_NET_SWITCHDEV int offload_fwd_mark; #endif + u16 group_fwd_mask; }; #define br_auto_port(p) ((p)->flags & BR_AUTO_MASK) diff --git a/net/bridge/br_sysfs_if.c b/net/bridge/br_sysfs_if.c index 5d5d413a6cf8..9110d5e56085 100644 --- a/net/bridge/br_sysfs_if.c +++ b/net/bridge/br_sysfs_if.c @@ -165,6 +165,23 @@ static int store_flush(struct net_bridge_port *p, unsigned long v) } static BRPORT_ATTR(flush, S_IWUSR, NULL, store_flush); +static ssize_t show_group_fwd_mask(struct net_bridge_port *p, char *buf) +{ + return sprintf(buf, "%#x\n", p->group_fwd_mask); +} + +static int store_group_fwd_mask(struct net_bridge_port *p, + unsigned long v) +{ + if (v & BR_GROUPFWD_MACPAUSE) + return -EINVAL; + p->group_fwd_mask = v; + + return 0; +} +static BRPORT_ATTR(group_fwd_mask, S_IRUGO | S_IWUSR, show_group_fwd_mask, + store_group_fwd_mask); + BRPORT_ATTR_FLAG(hairpin_mode, BR_HAIRPIN_MODE); BRPORT_ATTR_FLAG(bpdu_guard, BR_BPDU_GUARD); BRPORT_ATTR_FLAG(root_block, BR_ROOT_BLOCK); @@ -223,6 +240,7 @@ static const struct brport_attribute *brport_attrs[] = { &brport_attr_proxyarp_wifi, &brport_attr_multicast_flood, &brport_attr_broadcast_flood, + &brport_attr_group_fwd_mask, NULL }; -- cgit v1.2.3 From cf5d74b85ef40c202c76d90959db4d850f301b95 Mon Sep 17 00:00:00 2001 From: Hoang Tran Date: Wed, 27 Sep 2017 18:30:58 +0200 Subject: tcp: fix under-evaluated ssthresh in TCP Vegas With the commit 76174004a0f19785 (tcp: do not slow start when cwnd equals ssthresh), the comparison to the reduced cwnd in tcp_vegas_ssthresh() would under-evaluate the ssthresh. Signed-off-by: Hoang Tran Signed-off-by: David S. Miller --- net/ipv4/tcp_vegas.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv4/tcp_vegas.c b/net/ipv4/tcp_vegas.c index 218cfcc77650..ee113ff15fd0 100644 --- a/net/ipv4/tcp_vegas.c +++ b/net/ipv4/tcp_vegas.c @@ -158,7 +158,7 @@ EXPORT_SYMBOL_GPL(tcp_vegas_cwnd_event); static inline u32 tcp_vegas_ssthresh(struct tcp_sock *tp) { - return min(tp->snd_ssthresh, tp->snd_cwnd-1); + return min(tp->snd_ssthresh, tp->snd_cwnd); } static void tcp_vegas_cong_avoid(struct sock *sk, u32 ack, u32 acked) -- cgit v1.2.3 From cb4d2b3f03d8eed90be3a194e5b54b734ec4bbe9 Mon Sep 17 00:00:00 2001 From: Martin KaFai Lau Date: Wed, 27 Sep 2017 14:37:52 -0700 Subject: bpf: Add name, load_time, uid and map_ids to bpf_prog_info The patch adds name and load_time to struct bpf_prog_aux. They are also exported to bpf_prog_info. The bpf_prog's name is passed by userspace during BPF_PROG_LOAD. The kernel only stores the first (BPF_PROG_NAME_LEN - 1) bytes and the name stored in the kernel is always \0 terminated. The kernel will reject name that contains characters other than isalnum() and '_'. It will also reject name that is not null terminated. The existing 'user->uid' of the bpf_prog_aux is also exported to the bpf_prog_info as created_by_uid. The existing 'used_maps' of the bpf_prog_aux is exported to the newly added members 'nr_map_ids' and 'map_ids' of the bpf_prog_info. On the input, nr_map_ids tells how big the userspace's map_ids buffer is. On the output, nr_map_ids tells the exact user_map_cnt and it will only copy up to the userspace's map_ids buffer is allowed. Signed-off-by: Martin KaFai Lau Acked-by: Alexei Starovoitov Acked-by: Daniel Borkmann Signed-off-by: David S. Miller --- include/linux/bpf.h | 2 ++ include/uapi/linux/bpf.h | 8 ++++++++ kernel/bpf/syscall.c | 51 +++++++++++++++++++++++++++++++++++++++++++++++- 3 files changed, 60 insertions(+), 1 deletion(-) diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 2b672c50f160..33ccc474fb04 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -187,6 +187,8 @@ struct bpf_prog_aux { struct bpf_map **used_maps; struct bpf_prog *prog; struct user_struct *user; + u64 load_time; /* ns since boottime */ + u8 name[BPF_OBJ_NAME_LEN]; union { struct work_struct work; struct rcu_head rcu; diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index e43491ac4823..bd6348269bf5 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -175,6 +175,8 @@ enum bpf_attach_type { /* Specify numa node during map creation */ #define BPF_F_NUMA_NODE (1U << 2) +#define BPF_OBJ_NAME_LEN 16U + union bpf_attr { struct { /* anonymous struct used by BPF_MAP_CREATE command */ __u32 map_type; /* one of enum bpf_map_type */ @@ -210,6 +212,7 @@ union bpf_attr { __aligned_u64 log_buf; /* user supplied buffer */ __u32 kern_version; /* checked when prog_type=kprobe */ __u32 prog_flags; + __u8 prog_name[BPF_OBJ_NAME_LEN]; }; struct { /* anonymous struct used by BPF_OBJ_* commands */ @@ -812,6 +815,11 @@ struct bpf_prog_info { __u32 xlated_prog_len; __aligned_u64 jited_prog_insns; __aligned_u64 xlated_prog_insns; + __u64 load_time; /* ns since boottime */ + __u32 created_by_uid; + __u32 nr_map_ids; + __aligned_u64 map_ids; + __u8 name[BPF_OBJ_NAME_LEN]; } __attribute__((aligned(8))); struct bpf_map_info { diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index 25d074920a00..45970df3f820 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -23,6 +23,9 @@ #include #include #include +#include +#include +#include #define IS_FD_ARRAY(map) ((map)->map_type == BPF_MAP_TYPE_PROG_ARRAY || \ (map)->map_type == BPF_MAP_TYPE_PERF_EVENT_ARRAY || \ @@ -312,6 +315,30 @@ int bpf_map_new_fd(struct bpf_map *map) offsetof(union bpf_attr, CMD##_LAST_FIELD) - \ sizeof(attr->CMD##_LAST_FIELD)) != NULL +/* dst and src must have at least BPF_OBJ_NAME_LEN number of bytes. + * Return 0 on success and < 0 on error. + */ +static int bpf_obj_name_cpy(char *dst, const char *src) +{ + const char *end = src + BPF_OBJ_NAME_LEN; + + /* Copy all isalnum() and '_' char */ + while (src < end && *src) { + if (!isalnum(*src) && *src != '_') + return -EINVAL; + *dst++ = *src++; + } + + /* No '\0' found in BPF_OBJ_NAME_LEN number of bytes */ + if (src == end) + return -EINVAL; + + /* '\0' terminates dst */ + *dst = 0; + + return 0; +} + #define BPF_MAP_CREATE_LAST_FIELD numa_node /* called via syscall */ static int map_create(union bpf_attr *attr) @@ -973,7 +1000,7 @@ struct bpf_prog *bpf_prog_get_type(u32 ufd, enum bpf_prog_type type) EXPORT_SYMBOL_GPL(bpf_prog_get_type); /* last field in 'union bpf_attr' used by this command */ -#define BPF_PROG_LOAD_LAST_FIELD prog_flags +#define BPF_PROG_LOAD_LAST_FIELD prog_name static int bpf_prog_load(union bpf_attr *attr) { @@ -1037,6 +1064,11 @@ static int bpf_prog_load(union bpf_attr *attr) if (err < 0) goto free_prog; + prog->aux->load_time = ktime_get_boot_ns(); + err = bpf_obj_name_cpy(prog->aux->name, attr->prog_name); + if (err) + goto free_prog; + /* run eBPF verifier */ err = bpf_check(&prog, attr); if (err < 0) @@ -1358,8 +1390,25 @@ static int bpf_prog_get_info_by_fd(struct bpf_prog *prog, info.type = prog->type; info.id = prog->aux->id; + info.load_time = prog->aux->load_time; + info.created_by_uid = from_kuid_munged(current_user_ns(), + prog->aux->user->uid); memcpy(info.tag, prog->tag, sizeof(prog->tag)); + memcpy(info.name, prog->aux->name, sizeof(prog->aux->name)); + + ulen = info.nr_map_ids; + info.nr_map_ids = prog->aux->used_map_cnt; + ulen = min_t(u32, info.nr_map_ids, ulen); + if (ulen) { + u32 *user_map_ids = (u32 *)info.map_ids; + u32 i; + + for (i = 0; i < ulen; i++) + if (put_user(prog->aux->used_maps[i]->id, + &user_map_ids[i])) + return -EFAULT; + } if (!capable(CAP_SYS_ADMIN)) { info.jited_prog_len = 0; -- cgit v1.2.3 From ad5b177bd73f5107d97c36f56395c4281fb6f089 Mon Sep 17 00:00:00 2001 From: Martin KaFai Lau Date: Wed, 27 Sep 2017 14:37:53 -0700 Subject: bpf: Add map_name to bpf_map_info This patch allows userspace to specify a name for a map during BPF_MAP_CREATE. The map's name can later be exported to user space via BPF_OBJ_GET_INFO_BY_FD. Signed-off-by: Martin KaFai Lau Acked-by: Alexei Starovoitov Acked-by: Daniel Borkmann Signed-off-by: David S. Miller --- include/linux/bpf.h | 1 + include/uapi/linux/bpf.h | 2 ++ kernel/bpf/syscall.c | 7 ++++++- 3 files changed, 9 insertions(+), 1 deletion(-) diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 33ccc474fb04..252f4bc9eb25 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -56,6 +56,7 @@ struct bpf_map { struct work_struct work; atomic_t usercnt; struct bpf_map *inner_map_meta; + u8 name[BPF_OBJ_NAME_LEN]; }; /* function argument constraints */ diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index bd6348269bf5..6d2137b4cf38 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -190,6 +190,7 @@ union bpf_attr { __u32 numa_node; /* numa node (effective only if * BPF_F_NUMA_NODE is set). */ + __u8 map_name[BPF_OBJ_NAME_LEN]; }; struct { /* anonymous struct used by BPF_MAP_*_ELEM commands */ @@ -829,6 +830,7 @@ struct bpf_map_info { __u32 value_size; __u32 max_entries; __u32 map_flags; + __u8 name[BPF_OBJ_NAME_LEN]; } __attribute__((aligned(8))); /* User bpf_sock_ops struct to access socket values and specify request ops diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index 45970df3f820..11a7f82a55d1 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -339,7 +339,7 @@ static int bpf_obj_name_cpy(char *dst, const char *src) return 0; } -#define BPF_MAP_CREATE_LAST_FIELD numa_node +#define BPF_MAP_CREATE_LAST_FIELD map_name /* called via syscall */ static int map_create(union bpf_attr *attr) { @@ -361,6 +361,10 @@ static int map_create(union bpf_attr *attr) if (IS_ERR(map)) return PTR_ERR(map); + err = bpf_obj_name_cpy(map->name, attr->map_name); + if (err) + goto free_map_nouncharge; + atomic_set(&map->refcnt, 1); atomic_set(&map->usercnt, 1); @@ -1462,6 +1466,7 @@ static int bpf_map_get_info_by_fd(struct bpf_map *map, info.value_size = map->value_size; info.max_entries = map->max_entries; info.map_flags = map->map_flags; + memcpy(info.name, map->name, sizeof(map->name)); if (copy_to_user(uinfo, &info, info_len) || put_user(info_len, &uattr->info.info_len)) -- cgit v1.2.3 From 88cda1c9da02c8aa31e1d5dcf22e8a35cc8c19f2 Mon Sep 17 00:00:00 2001 From: Martin KaFai Lau Date: Wed, 27 Sep 2017 14:37:54 -0700 Subject: bpf: libbpf: Provide basic API support to specify BPF obj name This patch extends the libbpf to provide API support to allow specifying BPF object name. In tools/lib/bpf/libbpf, the C symbol of the function and the map is used. Regarding section name, all maps are under the same section named "maps". Hence, section name is not a good choice for map's name. To be consistent with map, bpf_prog also follows and uses its function symbol as the prog's name. This patch adds logic to collect function's symbols in libbpf. There is existing codes to collect the map's symbols and no change is needed. The bpf_load_program_name() and bpf_map_create_name() are added to take the name argument. For the other bpf_map_create_xxx() variants, a name argument is directly added to them. In samples/bpf, bpf_load.c in particular, the symbol is also used as the map's name and the map symbols has already been collected in the existing code. For bpf_prog, bpf_load.c does not collect the function symbol name. We can consider to collect them later if there is a need to continue supporting the bpf_load.c. Signed-off-by: Martin KaFai Lau Acked-by: Alexei Starovoitov Acked-by: Daniel Borkmann Signed-off-by: David S. Miller --- samples/bpf/bpf_load.c | 2 + samples/bpf/map_perf_test_user.c | 1 + tools/include/uapi/linux/bpf.h | 10 +++ tools/lib/bpf/bpf.c | 57 +++++++++++---- tools/lib/bpf/bpf.h | 23 ++++-- tools/lib/bpf/libbpf.c | 109 +++++++++++++++++++++------- tools/testing/selftests/bpf/test_verifier.c | 2 +- 7 files changed, 157 insertions(+), 47 deletions(-) diff --git a/samples/bpf/bpf_load.c b/samples/bpf/bpf_load.c index 6aa50098dfb8..18b1c8dd0391 100644 --- a/samples/bpf/bpf_load.c +++ b/samples/bpf/bpf_load.c @@ -221,6 +221,7 @@ static int load_maps(struct bpf_map_data *maps, int nr_maps, int inner_map_fd = map_fd[maps[i].def.inner_map_idx]; map_fd[i] = bpf_create_map_in_map_node(maps[i].def.type, + maps[i].name, maps[i].def.key_size, inner_map_fd, maps[i].def.max_entries, @@ -228,6 +229,7 @@ static int load_maps(struct bpf_map_data *maps, int nr_maps, numa_node); } else { map_fd[i] = bpf_create_map_node(maps[i].def.type, + maps[i].name, maps[i].def.key_size, maps[i].def.value_size, maps[i].def.max_entries, diff --git a/samples/bpf/map_perf_test_user.c b/samples/bpf/map_perf_test_user.c index a0310fc70057..519d9af4b04a 100644 --- a/samples/bpf/map_perf_test_user.c +++ b/samples/bpf/map_perf_test_user.c @@ -137,6 +137,7 @@ static void do_test_lru(enum test_type test, int cpu) inner_lru_map_fds[cpu] = bpf_create_map_node(BPF_MAP_TYPE_LRU_HASH, + test_map_names[INNER_LRU_HASH_PREALLOC], sizeof(uint32_t), sizeof(long), inner_lru_hash_size, 0, diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index e43491ac4823..6d2137b4cf38 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -175,6 +175,8 @@ enum bpf_attach_type { /* Specify numa node during map creation */ #define BPF_F_NUMA_NODE (1U << 2) +#define BPF_OBJ_NAME_LEN 16U + union bpf_attr { struct { /* anonymous struct used by BPF_MAP_CREATE command */ __u32 map_type; /* one of enum bpf_map_type */ @@ -188,6 +190,7 @@ union bpf_attr { __u32 numa_node; /* numa node (effective only if * BPF_F_NUMA_NODE is set). */ + __u8 map_name[BPF_OBJ_NAME_LEN]; }; struct { /* anonymous struct used by BPF_MAP_*_ELEM commands */ @@ -210,6 +213,7 @@ union bpf_attr { __aligned_u64 log_buf; /* user supplied buffer */ __u32 kern_version; /* checked when prog_type=kprobe */ __u32 prog_flags; + __u8 prog_name[BPF_OBJ_NAME_LEN]; }; struct { /* anonymous struct used by BPF_OBJ_* commands */ @@ -812,6 +816,11 @@ struct bpf_prog_info { __u32 xlated_prog_len; __aligned_u64 jited_prog_insns; __aligned_u64 xlated_prog_insns; + __u64 load_time; /* ns since boottime */ + __u32 created_by_uid; + __u32 nr_map_ids; + __aligned_u64 map_ids; + __u8 name[BPF_OBJ_NAME_LEN]; } __attribute__((aligned(8))); struct bpf_map_info { @@ -821,6 +830,7 @@ struct bpf_map_info { __u32 value_size; __u32 max_entries; __u32 map_flags; + __u8 name[BPF_OBJ_NAME_LEN]; } __attribute__((aligned(8))); /* User bpf_sock_ops struct to access socket values and specify request ops diff --git a/tools/lib/bpf/bpf.c b/tools/lib/bpf/bpf.c index 1d6907d379c9..daf624e4c720 100644 --- a/tools/lib/bpf/bpf.c +++ b/tools/lib/bpf/bpf.c @@ -46,6 +46,8 @@ # endif #endif +#define min(x, y) ((x) < (y) ? (x) : (y)) + static inline __u64 ptr_to_u64(const void *ptr) { return (__u64) (unsigned long) ptr; @@ -57,10 +59,11 @@ static inline int sys_bpf(enum bpf_cmd cmd, union bpf_attr *attr, return syscall(__NR_bpf, cmd, attr, size); } -int bpf_create_map_node(enum bpf_map_type map_type, int key_size, - int value_size, int max_entries, __u32 map_flags, - int node) +int bpf_create_map_node(enum bpf_map_type map_type, const char *name, + int key_size, int value_size, int max_entries, + __u32 map_flags, int node) { + __u32 name_len = name ? strlen(name) : 0; union bpf_attr attr; memset(&attr, '\0', sizeof(attr)); @@ -70,6 +73,8 @@ int bpf_create_map_node(enum bpf_map_type map_type, int key_size, attr.value_size = value_size; attr.max_entries = max_entries; attr.map_flags = map_flags; + memcpy(attr.map_name, name, min(name_len, BPF_OBJ_NAME_LEN - 1)); + if (node >= 0) { attr.map_flags |= BPF_F_NUMA_NODE; attr.numa_node = node; @@ -81,14 +86,23 @@ int bpf_create_map_node(enum bpf_map_type map_type, int key_size, int bpf_create_map(enum bpf_map_type map_type, int key_size, int value_size, int max_entries, __u32 map_flags) { - return bpf_create_map_node(map_type, key_size, value_size, + return bpf_create_map_node(map_type, NULL, key_size, value_size, max_entries, map_flags, -1); } -int bpf_create_map_in_map_node(enum bpf_map_type map_type, int key_size, - int inner_map_fd, int max_entries, +int bpf_create_map_name(enum bpf_map_type map_type, const char *name, + int key_size, int value_size, int max_entries, + __u32 map_flags) +{ + return bpf_create_map_node(map_type, name, key_size, value_size, + max_entries, map_flags, -1); +} + +int bpf_create_map_in_map_node(enum bpf_map_type map_type, const char *name, + int key_size, int inner_map_fd, int max_entries, __u32 map_flags, int node) { + __u32 name_len = name ? strlen(name) : 0; union bpf_attr attr; memset(&attr, '\0', sizeof(attr)); @@ -99,6 +113,8 @@ int bpf_create_map_in_map_node(enum bpf_map_type map_type, int key_size, attr.inner_map_fd = inner_map_fd; attr.max_entries = max_entries; attr.map_flags = map_flags; + memcpy(attr.map_name, name, min(name_len, BPF_OBJ_NAME_LEN - 1)); + if (node >= 0) { attr.map_flags |= BPF_F_NUMA_NODE; attr.numa_node = node; @@ -107,19 +123,24 @@ int bpf_create_map_in_map_node(enum bpf_map_type map_type, int key_size, return sys_bpf(BPF_MAP_CREATE, &attr, sizeof(attr)); } -int bpf_create_map_in_map(enum bpf_map_type map_type, int key_size, - int inner_map_fd, int max_entries, __u32 map_flags) +int bpf_create_map_in_map(enum bpf_map_type map_type, const char *name, + int key_size, int inner_map_fd, int max_entries, + __u32 map_flags) { - return bpf_create_map_in_map_node(map_type, key_size, inner_map_fd, - max_entries, map_flags, -1); + return bpf_create_map_in_map_node(map_type, name, key_size, + inner_map_fd, max_entries, map_flags, + -1); } -int bpf_load_program(enum bpf_prog_type type, const struct bpf_insn *insns, - size_t insns_cnt, const char *license, - __u32 kern_version, char *log_buf, size_t log_buf_sz) +int bpf_load_program_name(enum bpf_prog_type type, const char *name, + const struct bpf_insn *insns, + size_t insns_cnt, const char *license, + __u32 kern_version, char *log_buf, + size_t log_buf_sz) { int fd; union bpf_attr attr; + __u32 name_len = name ? strlen(name) : 0; bzero(&attr, sizeof(attr)); attr.prog_type = type; @@ -130,6 +151,7 @@ int bpf_load_program(enum bpf_prog_type type, const struct bpf_insn *insns, attr.log_size = 0; attr.log_level = 0; attr.kern_version = kern_version; + memcpy(attr.prog_name, name, min(name_len, BPF_OBJ_NAME_LEN - 1)); fd = sys_bpf(BPF_PROG_LOAD, &attr, sizeof(attr)); if (fd >= 0 || !log_buf || !log_buf_sz) @@ -143,6 +165,15 @@ int bpf_load_program(enum bpf_prog_type type, const struct bpf_insn *insns, return sys_bpf(BPF_PROG_LOAD, &attr, sizeof(attr)); } +int bpf_load_program(enum bpf_prog_type type, const struct bpf_insn *insns, + size_t insns_cnt, const char *license, + __u32 kern_version, char *log_buf, + size_t log_buf_sz) +{ + return bpf_load_program_name(type, NULL, insns, insns_cnt, license, + kern_version, log_buf, log_buf_sz); +} + int bpf_verify_program(enum bpf_prog_type type, const struct bpf_insn *insns, size_t insns_cnt, int strict_alignment, const char *license, __u32 kern_version, diff --git a/tools/lib/bpf/bpf.h b/tools/lib/bpf/bpf.h index b8ea5843c39e..118d00535a0d 100644 --- a/tools/lib/bpf/bpf.h +++ b/tools/lib/bpf/bpf.h @@ -24,19 +24,28 @@ #include #include -int bpf_create_map_node(enum bpf_map_type map_type, int key_size, - int value_size, int max_entries, __u32 map_flags, - int node); +int bpf_create_map_node(enum bpf_map_type map_type, const char *name, + int key_size, int value_size, int max_entries, + __u32 map_flags, int node); +int bpf_create_map_name(enum bpf_map_type map_type, const char *name, + int key_size, int value_size, int max_entries, + __u32 map_flags); int bpf_create_map(enum bpf_map_type map_type, int key_size, int value_size, int max_entries, __u32 map_flags); -int bpf_create_map_in_map_node(enum bpf_map_type map_type, int key_size, - int inner_map_fd, int max_entries, +int bpf_create_map_in_map_node(enum bpf_map_type map_type, const char *name, + int key_size, int inner_map_fd, int max_entries, __u32 map_flags, int node); -int bpf_create_map_in_map(enum bpf_map_type map_type, int key_size, - int inner_map_fd, int max_entries, __u32 map_flags); +int bpf_create_map_in_map(enum bpf_map_type map_type, const char *name, + int key_size, int inner_map_fd, int max_entries, + __u32 map_flags); /* Recommend log buffer size */ #define BPF_LOG_BUF_SIZE 65536 +int bpf_load_program_name(enum bpf_prog_type type, const char *name, + const struct bpf_insn *insns, + size_t insns_cnt, const char *license, + __u32 kern_version, char *log_buf, + size_t log_buf_sz); int bpf_load_program(enum bpf_prog_type type, const struct bpf_insn *insns, size_t insns_cnt, const char *license, __u32 kern_version, char *log_buf, diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index 35f6dfcdc565..4f402dcdf372 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -171,6 +171,7 @@ int libbpf_strerror(int err, char *buf, size_t size) struct bpf_program { /* Index in elf obj file, for relocation use. */ int idx; + char *name; char *section_name; struct bpf_insn *insns; size_t insns_cnt; @@ -283,6 +284,7 @@ static void bpf_program__exit(struct bpf_program *prog) prog->clear_priv = NULL; bpf_program__unload(prog); + zfree(&prog->name); zfree(&prog->section_name); zfree(&prog->insns); zfree(&prog->reloc_desc); @@ -293,26 +295,27 @@ static void bpf_program__exit(struct bpf_program *prog) } static int -bpf_program__init(void *data, size_t size, char *name, int idx, - struct bpf_program *prog) +bpf_program__init(void *data, size_t size, char *section_name, int idx, + struct bpf_program *prog) { if (size < sizeof(struct bpf_insn)) { - pr_warning("corrupted section '%s'\n", name); + pr_warning("corrupted section '%s'\n", section_name); return -EINVAL; } bzero(prog, sizeof(*prog)); - prog->section_name = strdup(name); + prog->section_name = strdup(section_name); if (!prog->section_name) { - pr_warning("failed to alloc name for prog %s\n", - name); + pr_warning("failed to alloc name for prog under section %s\n", + section_name); goto errout; } prog->insns = malloc(size); if (!prog->insns) { - pr_warning("failed to alloc insns for %s\n", name); + pr_warning("failed to alloc insns for prog under section %s\n", + section_name); goto errout; } prog->insns_cnt = size / sizeof(struct bpf_insn); @@ -331,12 +334,12 @@ errout: static int bpf_object__add_program(struct bpf_object *obj, void *data, size_t size, - char *name, int idx) + char *section_name, int idx) { struct bpf_program prog, *progs; int nr_progs, err; - err = bpf_program__init(data, size, name, idx, &prog); + err = bpf_program__init(data, size, section_name, idx, &prog); if (err) return err; @@ -350,8 +353,8 @@ bpf_object__add_program(struct bpf_object *obj, void *data, size_t size, * is still valid, so don't need special treat for * bpf_close_object(). */ - pr_warning("failed to alloc a new program '%s'\n", - name); + pr_warning("failed to alloc a new program under section '%s'\n", + section_name); bpf_program__exit(&prog); return -ENOMEM; } @@ -364,6 +367,54 @@ bpf_object__add_program(struct bpf_object *obj, void *data, size_t size, return 0; } +static int +bpf_object__init_prog_names(struct bpf_object *obj) +{ + Elf_Data *symbols = obj->efile.symbols; + struct bpf_program *prog; + size_t pi, si; + + for (pi = 0; pi < obj->nr_programs; pi++) { + char *name = NULL; + + prog = &obj->programs[pi]; + + for (si = 0; si < symbols->d_size / sizeof(GElf_Sym) && !name; + si++) { + GElf_Sym sym; + + if (!gelf_getsym(symbols, si, &sym)) + continue; + if (sym.st_shndx != prog->idx) + continue; + + name = elf_strptr(obj->efile.elf, + obj->efile.strtabidx, + sym.st_name); + if (!name) { + pr_warning("failed to get sym name string for prog %s\n", + prog->section_name); + return -LIBBPF_ERRNO__LIBELF; + } + } + + if (!name) { + pr_warning("failed to find sym for prog %s\n", + prog->section_name); + return -EINVAL; + } + + prog->name = strdup(name); + if (!prog->name) { + pr_warning("failed to allocate memory for prog sym %s\n", + name); + return -ENOMEM; + } + } + + return 0; +} + static struct bpf_object *bpf_object__new(const char *path, void *obj_buf, size_t obj_buf_sz) @@ -766,8 +817,12 @@ static int bpf_object__elf_collect(struct bpf_object *obj) pr_warning("Corrupted ELF file: index of strtab invalid\n"); return LIBBPF_ERRNO__FORMAT; } - if (obj->efile.maps_shndx >= 0) + if (obj->efile.maps_shndx >= 0) { err = bpf_object__init_maps(obj); + if (err) + goto out; + } + err = bpf_object__init_prog_names(obj); out: return err; } @@ -870,11 +925,12 @@ bpf_object__create_maps(struct bpf_object *obj) struct bpf_map_def *def = &obj->maps[i].def; int *pfd = &obj->maps[i].fd; - *pfd = bpf_create_map(def->type, - def->key_size, - def->value_size, - def->max_entries, - 0); + *pfd = bpf_create_map_name(def->type, + obj->maps[i].name, + def->key_size, + def->value_size, + def->max_entries, + 0); if (*pfd < 0) { size_t j; int err = *pfd; @@ -982,7 +1038,7 @@ static int bpf_object__collect_reloc(struct bpf_object *obj) } static int -load_program(enum bpf_prog_type type, struct bpf_insn *insns, +load_program(enum bpf_prog_type type, const char *name, struct bpf_insn *insns, int insns_cnt, char *license, u32 kern_version, int *pfd) { int ret; @@ -995,8 +1051,8 @@ load_program(enum bpf_prog_type type, struct bpf_insn *insns, if (!log_buf) pr_warning("Alloc log buffer for bpf loader error, continue without log\n"); - ret = bpf_load_program(type, insns, insns_cnt, license, - kern_version, log_buf, BPF_LOG_BUF_SIZE); + ret = bpf_load_program_name(type, name, insns, insns_cnt, license, + kern_version, log_buf, BPF_LOG_BUF_SIZE); if (ret >= 0) { *pfd = ret; @@ -1021,9 +1077,9 @@ load_program(enum bpf_prog_type type, struct bpf_insn *insns, if (type != BPF_PROG_TYPE_KPROBE) { int fd; - fd = bpf_load_program(BPF_PROG_TYPE_KPROBE, insns, - insns_cnt, license, kern_version, - NULL, 0); + fd = bpf_load_program_name(BPF_PROG_TYPE_KPROBE, name, + insns, insns_cnt, license, + kern_version, NULL, 0); if (fd >= 0) { close(fd); ret = -LIBBPF_ERRNO__PROGTYPE; @@ -1067,8 +1123,8 @@ bpf_program__load(struct bpf_program *prog, pr_warning("Program '%s' is inconsistent: nr(%d) != 1\n", prog->section_name, prog->instances.nr); } - err = load_program(prog->type, prog->insns, prog->insns_cnt, - license, kern_version, &fd); + err = load_program(prog->type, prog->name, prog->insns, + prog->insns_cnt, license, kern_version, &fd); if (!err) prog->instances.fds[0] = fd; goto out; @@ -1096,7 +1152,8 @@ bpf_program__load(struct bpf_program *prog, continue; } - err = load_program(prog->type, result.new_insn_ptr, + err = load_program(prog->type, prog->name, + result.new_insn_ptr, result.new_insn_cnt, license, kern_version, &fd); diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c index a0426147523d..290d5056c165 100644 --- a/tools/testing/selftests/bpf/test_verifier.c +++ b/tools/testing/selftests/bpf/test_verifier.c @@ -6939,7 +6939,7 @@ static int create_map_in_map(void) return inner_map_fd; } - outer_map_fd = bpf_create_map_in_map(BPF_MAP_TYPE_ARRAY_OF_MAPS, + outer_map_fd = bpf_create_map_in_map(BPF_MAP_TYPE_ARRAY_OF_MAPS, NULL, sizeof(int), inner_map_fd, 1, 0); if (outer_map_fd < 0) printf("Failed to create array of maps '%s'!\n", -- cgit v1.2.3 From 6e525d06674a21468b4e728ef880770a0ca5c60d Mon Sep 17 00:00:00 2001 From: Martin KaFai Lau Date: Wed, 27 Sep 2017 14:37:55 -0700 Subject: bpf: Swap the order of checking prog_info and map_info This patch swaps the checking order. It now checks the map_info first and then prog_info. It is a prep work for adding test to the newly added fields (the map_ids of prog_info field in particular). Signed-off-by: Martin KaFai Lau Acked-by: Alexei Starovoitov Acked-by: Daniel Borkmann Signed-off-by: David S. Miller --- tools/testing/selftests/bpf/test_progs.c | 58 +++++++++++++++++--------------- 1 file changed, 30 insertions(+), 28 deletions(-) diff --git a/tools/testing/selftests/bpf/test_progs.c b/tools/testing/selftests/bpf/test_progs.c index 11ee25cea227..31ae27dc8d04 100644 --- a/tools/testing/selftests/bpf/test_progs.c +++ b/tools/testing/selftests/bpf/test_progs.c @@ -316,6 +316,36 @@ static void test_bpf_obj_id(void) error_cnt++; assert(!err); + /* Insert a magic value to the map */ + map_fds[i] = bpf_find_map(__func__, objs[i], "test_map_id"); + assert(map_fds[i] >= 0); + err = bpf_map_update_elem(map_fds[i], &array_key, + &array_magic_value, 0); + assert(!err); + + /* Check getting map info */ + info_len = sizeof(struct bpf_map_info) * 2; + bzero(&map_infos[i], info_len); + err = bpf_obj_get_info_by_fd(map_fds[i], &map_infos[i], + &info_len); + if (CHECK(err || + map_infos[i].type != BPF_MAP_TYPE_ARRAY || + map_infos[i].key_size != sizeof(__u32) || + map_infos[i].value_size != sizeof(__u64) || + map_infos[i].max_entries != 1 || + map_infos[i].map_flags != 0 || + info_len != sizeof(struct bpf_map_info), + "get-map-info(fd)", + "err %d errno %d type %d(%d) info_len %u(%lu) key_size %u value_size %u max_entries %u map_flags %X\n", + err, errno, + map_infos[i].type, BPF_MAP_TYPE_ARRAY, + info_len, sizeof(struct bpf_map_info), + map_infos[i].key_size, + map_infos[i].value_size, + map_infos[i].max_entries, + map_infos[i].map_flags)) + goto done; + /* Check getting prog info */ info_len = sizeof(struct bpf_prog_info) * 2; bzero(&prog_infos[i], info_len); @@ -347,34 +377,6 @@ static void test_bpf_obj_id(void) !!memcmp(xlated_insns, zeros, sizeof(zeros)))) goto done; - map_fds[i] = bpf_find_map(__func__, objs[i], "test_map_id"); - assert(map_fds[i] >= 0); - err = bpf_map_update_elem(map_fds[i], &array_key, - &array_magic_value, 0); - assert(!err); - - /* Check getting map info */ - info_len = sizeof(struct bpf_map_info) * 2; - bzero(&map_infos[i], info_len); - err = bpf_obj_get_info_by_fd(map_fds[i], &map_infos[i], - &info_len); - if (CHECK(err || - map_infos[i].type != BPF_MAP_TYPE_ARRAY || - map_infos[i].key_size != sizeof(__u32) || - map_infos[i].value_size != sizeof(__u64) || - map_infos[i].max_entries != 1 || - map_infos[i].map_flags != 0 || - info_len != sizeof(struct bpf_map_info), - "get-map-info(fd)", - "err %d errno %d type %d(%d) info_len %u(%lu) key_size %u value_size %u max_entries %u map_flags %X\n", - err, errno, - map_infos[i].type, BPF_MAP_TYPE_ARRAY, - info_len, sizeof(struct bpf_map_info), - map_infos[i].key_size, - map_infos[i].value_size, - map_infos[i].max_entries, - map_infos[i].map_flags)) - goto done; } /* Check bpf_prog_get_next_id() */ -- cgit v1.2.3 From 3a8ad560a9674235d1dd2e174434f540924e249f Mon Sep 17 00:00:00 2001 From: Martin KaFai Lau Date: Wed, 27 Sep 2017 14:37:56 -0700 Subject: bpf: Test new fields in bpf_attr and bpf_{prog, map}_info This patch tests newly added fields of the bpf_attr, bpf_prog_info and bpf_map_info. Signed-off-by: Martin KaFai Lau Acked-by: Alexei Starovoitov Acked-by: Daniel Borkmann Signed-off-by: David S. Miller --- tools/testing/selftests/bpf/test_progs.c | 143 ++++++++++++++++++++++++++++--- 1 file changed, 132 insertions(+), 11 deletions(-) diff --git a/tools/testing/selftests/bpf/test_progs.c b/tools/testing/selftests/bpf/test_progs.c index 31ae27dc8d04..69427531408d 100644 --- a/tools/testing/selftests/bpf/test_progs.c +++ b/tools/testing/selftests/bpf/test_progs.c @@ -10,6 +10,7 @@ #include #include #include +#include #include typedef __u16 __sum16; @@ -19,6 +20,8 @@ typedef __u16 __sum16; #include #include #include +#include +#include #include #include @@ -273,16 +276,26 @@ static void test_bpf_obj_id(void) const int nr_iters = 2; const char *file = "./test_obj_id.o"; const char *jit_sysctl = "/proc/sys/net/core/bpf_jit_enable"; + const char *expected_prog_name = "test_obj_id"; + const char *expected_map_name = "test_map_id"; + const __u64 nsec_per_sec = 1000000000; struct bpf_object *objs[nr_iters]; int prog_fds[nr_iters], map_fds[nr_iters]; /* +1 to test for the info_len returned by kernel */ struct bpf_prog_info prog_infos[nr_iters + 1]; struct bpf_map_info map_infos[nr_iters + 1]; + /* Each prog only uses one map. +1 to test nr_map_ids + * returned by kernel. + */ + __u32 map_ids[nr_iters + 1]; char jited_insns[128], xlated_insns[128], zeros[128]; __u32 i, next_id, info_len, nr_id_found, duration = 0; + struct timespec real_time_ts, boot_time_ts; int sysctl_fd, jit_enabled = 0, err = 0; __u64 array_value; + uid_t my_uid = getuid(); + time_t now, load_time; sysctl_fd = open(jit_sysctl, 0, O_RDONLY); if (sysctl_fd != -1) { @@ -307,6 +320,7 @@ static void test_bpf_obj_id(void) /* Check bpf_obj_get_info_by_fd() */ bzero(zeros, sizeof(zeros)); for (i = 0; i < nr_iters; i++) { + now = time(NULL); err = bpf_prog_load(file, BPF_PROG_TYPE_SOCKET_FILTER, &objs[i], &prog_fds[i]); /* test_obj_id.o is a dumb prog. It should never fail @@ -334,16 +348,18 @@ static void test_bpf_obj_id(void) map_infos[i].value_size != sizeof(__u64) || map_infos[i].max_entries != 1 || map_infos[i].map_flags != 0 || - info_len != sizeof(struct bpf_map_info), + info_len != sizeof(struct bpf_map_info) || + strcmp((char *)map_infos[i].name, expected_map_name), "get-map-info(fd)", - "err %d errno %d type %d(%d) info_len %u(%lu) key_size %u value_size %u max_entries %u map_flags %X\n", + "err %d errno %d type %d(%d) info_len %u(%lu) key_size %u value_size %u max_entries %u map_flags %X name %s(%s)\n", err, errno, map_infos[i].type, BPF_MAP_TYPE_ARRAY, info_len, sizeof(struct bpf_map_info), map_infos[i].key_size, map_infos[i].value_size, map_infos[i].max_entries, - map_infos[i].map_flags)) + map_infos[i].map_flags, + map_infos[i].name, expected_map_name)) goto done; /* Check getting prog info */ @@ -355,8 +371,16 @@ static void test_bpf_obj_id(void) prog_infos[i].jited_prog_len = sizeof(jited_insns); prog_infos[i].xlated_prog_insns = ptr_to_u64(xlated_insns); prog_infos[i].xlated_prog_len = sizeof(xlated_insns); + prog_infos[i].map_ids = ptr_to_u64(map_ids + i); + prog_infos[i].nr_map_ids = 2; + err = clock_gettime(CLOCK_REALTIME, &real_time_ts); + assert(!err); + err = clock_gettime(CLOCK_BOOTTIME, &boot_time_ts); + assert(!err); err = bpf_obj_get_info_by_fd(prog_fds[i], &prog_infos[i], &info_len); + load_time = (real_time_ts.tv_sec - boot_time_ts.tv_sec) + + (prog_infos[i].load_time / nsec_per_sec); if (CHECK(err || prog_infos[i].type != BPF_PROG_TYPE_SOCKET_FILTER || info_len != sizeof(struct bpf_prog_info) || @@ -364,9 +388,14 @@ static void test_bpf_obj_id(void) (jit_enabled && !memcmp(jited_insns, zeros, sizeof(zeros))) || !prog_infos[i].xlated_prog_len || - !memcmp(xlated_insns, zeros, sizeof(zeros)), + !memcmp(xlated_insns, zeros, sizeof(zeros)) || + load_time < now - 60 || load_time > now + 60 || + prog_infos[i].created_by_uid != my_uid || + prog_infos[i].nr_map_ids != 1 || + *(int *)prog_infos[i].map_ids != map_infos[i].id || + strcmp((char *)prog_infos[i].name, expected_prog_name), "get-prog-info(fd)", - "err %d errno %d i %d type %d(%d) info_len %u(%lu) jit_enabled %d jited_prog_len %u xlated_prog_len %u jited_prog %d xlated_prog %d\n", + "err %d errno %d i %d type %d(%d) info_len %u(%lu) jit_enabled %d jited_prog_len %u xlated_prog_len %u jited_prog %d xlated_prog %d load_time %lu(%lu) uid %u(%u) nr_map_ids %u(%u) map_id %u(%u) name %s(%s)\n", err, errno, i, prog_infos[i].type, BPF_PROG_TYPE_SOCKET_FILTER, info_len, sizeof(struct bpf_prog_info), @@ -374,9 +403,13 @@ static void test_bpf_obj_id(void) prog_infos[i].jited_prog_len, prog_infos[i].xlated_prog_len, !!memcmp(jited_insns, zeros, sizeof(zeros)), - !!memcmp(xlated_insns, zeros, sizeof(zeros)))) + !!memcmp(xlated_insns, zeros, sizeof(zeros)), + load_time, now, + prog_infos[i].created_by_uid, my_uid, + prog_infos[i].nr_map_ids, 1, + *(int *)prog_infos[i].map_ids, map_infos[i].id, + prog_infos[i].name, expected_prog_name)) goto done; - } /* Check bpf_prog_get_next_id() */ @@ -384,6 +417,7 @@ static void test_bpf_obj_id(void) next_id = 0; while (!bpf_prog_get_next_id(next_id, &next_id)) { struct bpf_prog_info prog_info = {}; + __u32 saved_map_id; int prog_fd; info_len = sizeof(prog_info); @@ -406,16 +440,33 @@ static void test_bpf_obj_id(void) nr_id_found++; + /* Negative test: + * prog_info.nr_map_ids = 1 + * prog_info.map_ids = NULL + */ + prog_info.nr_map_ids = 1; + err = bpf_obj_get_info_by_fd(prog_fd, &prog_info, &info_len); + if (CHECK(!err || errno != EFAULT, + "get-prog-fd-bad-nr-map-ids", "err %d errno %d(%d)", + err, errno, EFAULT)) + break; + bzero(&prog_info, sizeof(prog_info)); + info_len = sizeof(prog_info); + + saved_map_id = *(int *)(prog_infos[i].map_ids); + prog_info.map_ids = prog_infos[i].map_ids; + prog_info.nr_map_ids = 2; err = bpf_obj_get_info_by_fd(prog_fd, &prog_info, &info_len); prog_infos[i].jited_prog_insns = 0; prog_infos[i].xlated_prog_insns = 0; CHECK(err || info_len != sizeof(struct bpf_prog_info) || - memcmp(&prog_info, &prog_infos[i], info_len), + memcmp(&prog_info, &prog_infos[i], info_len) || + *(int *)prog_info.map_ids != saved_map_id, "get-prog-info(next_id->fd)", - "err %d errno %d info_len %u(%lu) memcmp %d\n", + "err %d errno %d info_len %u(%lu) memcmp %d map_id %u(%u)\n", err, errno, info_len, sizeof(struct bpf_prog_info), - memcmp(&prog_info, &prog_infos[i], info_len)); - + memcmp(&prog_info, &prog_infos[i], info_len), + *(int *)prog_info.map_ids, saved_map_id); close(prog_fd); } CHECK(nr_id_found != nr_iters, @@ -497,6 +548,75 @@ static void test_pkt_md_access(void) bpf_object__close(obj); } +static void test_obj_name(void) +{ + struct { + const char *name; + int success; + int expected_errno; + } tests[] = { + { "", 1, 0 }, + { "_123456789ABCDE", 1, 0 }, + { "_123456789ABCDEF", 0, EINVAL }, + { "_123456789ABCD\n", 0, EINVAL }, + }; + struct bpf_insn prog[] = { + BPF_ALU64_IMM(BPF_MOV, BPF_REG_0, 0), + BPF_EXIT_INSN(), + }; + __u32 duration = 0; + int i; + + for (i = 0; i < sizeof(tests) / sizeof(tests[0]); i++) { + size_t name_len = strlen(tests[i].name) + 1; + union bpf_attr attr; + size_t ncopy; + int fd; + + /* test different attr.prog_name during BPF_PROG_LOAD */ + ncopy = name_len < sizeof(attr.prog_name) ? + name_len : sizeof(attr.prog_name); + bzero(&attr, sizeof(attr)); + attr.prog_type = BPF_PROG_TYPE_SCHED_CLS; + attr.insn_cnt = 2; + attr.insns = ptr_to_u64(prog); + attr.license = ptr_to_u64(""); + memcpy(attr.prog_name, tests[i].name, ncopy); + + fd = syscall(__NR_bpf, BPF_PROG_LOAD, &attr, sizeof(attr)); + CHECK((tests[i].success && fd < 0) || + (!tests[i].success && fd != -1) || + (!tests[i].success && errno != tests[i].expected_errno), + "check-bpf-prog-name", + "fd %d(%d) errno %d(%d)\n", + fd, tests[i].success, errno, tests[i].expected_errno); + + if (fd != -1) + close(fd); + + /* test different attr.map_name during BPF_MAP_CREATE */ + ncopy = name_len < sizeof(attr.map_name) ? + name_len : sizeof(attr.map_name); + bzero(&attr, sizeof(attr)); + attr.map_type = BPF_MAP_TYPE_ARRAY; + attr.key_size = 4; + attr.value_size = 4; + attr.max_entries = 1; + attr.map_flags = 0; + memcpy(attr.map_name, tests[i].name, ncopy); + fd = syscall(__NR_bpf, BPF_MAP_CREATE, &attr, sizeof(attr)); + CHECK((tests[i].success && fd < 0) || + (!tests[i].success && fd != -1) || + (!tests[i].success && errno != tests[i].expected_errno), + "check-bpf-map-name", + "fd %d(%d) errno %d(%d)\n", + fd, tests[i].success, errno, tests[i].expected_errno); + + if (fd != -1) + close(fd); + } +} + int main(void) { struct rlimit rinf = { RLIM_INFINITY, RLIM_INFINITY }; @@ -509,6 +629,7 @@ int main(void) test_tcp_estats(); test_bpf_obj_id(); test_pkt_md_access(); + test_obj_name(); printf("Summary: %d PASSED, %d FAILED\n", pass_cnt, error_cnt); return error_cnt ? EXIT_FAILURE : EXIT_SUCCESS; -- cgit v1.2.3 From c7c3e5913bf18eda3cf38932bebdce48351baac9 Mon Sep 17 00:00:00 2001 From: David Ahern Date: Wed, 27 Sep 2017 19:08:00 -0700 Subject: net: ipv4: remove fib_weight fib_weight in fib_info is set but not used. Remove it and the helpers for setting it. Signed-off-by: David Ahern Signed-off-by: David S. Miller --- include/net/ip_fib.h | 3 --- net/ipv4/fib_semantics.c | 9 --------- 2 files changed, 12 deletions(-) diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h index 1a7f7e424320..f80524396c06 100644 --- a/include/net/ip_fib.h +++ b/include/net/ip_fib.h @@ -122,9 +122,6 @@ struct fib_info { #define fib_rtt fib_metrics->metrics[RTAX_RTT-1] #define fib_advmss fib_metrics->metrics[RTAX_ADVMSS-1] int fib_nhs; -#ifdef CONFIG_IP_ROUTE_MULTIPATH - int fib_weight; -#endif struct rcu_head rcu; struct fib_nh fib_nh[0]; #define fib_dev fib_nh[0].nh_dev diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c index 57a5d48acee8..be0874620ecc 100644 --- a/net/ipv4/fib_semantics.c +++ b/net/ipv4/fib_semantics.c @@ -601,17 +601,9 @@ static void fib_rebalance(struct fib_info *fi) atomic_set(&nexthop_nh->nh_upper_bound, upper_bound); } endfor_nexthops(fi); } - -static inline void fib_add_weight(struct fib_info *fi, - const struct fib_nh *nh) -{ - fi->fib_weight += nh->nh_weight; -} - #else /* CONFIG_IP_ROUTE_MULTIPATH */ #define fib_rebalance(fi) do { } while (0) -#define fib_add_weight(fi, nh) do { } while (0) #endif /* CONFIG_IP_ROUTE_MULTIPATH */ @@ -1275,7 +1267,6 @@ struct fib_info *fib_create_info(struct fib_config *cfg, change_nexthops(fi) { fib_info_update_nh_saddr(net, nexthop_nh); - fib_add_weight(fi, nexthop_nh); } endfor_nexthops(fi) fib_rebalance(fi); -- cgit v1.2.3 From fa8fefaa678ea390b873195d19c09930da84a4bb Mon Sep 17 00:00:00 2001 From: David Ahern Date: Wed, 27 Sep 2017 20:41:59 -0700 Subject: net: ipv4: remove fib_info arg to fib_check_nh fib_check_nh does not use the fib_info arg; remove t. Signed-off-by: David Ahern Signed-off-by: David S. Miller --- net/ipv4/fib_semantics.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c index be0874620ecc..467b3c15395f 100644 --- a/net/ipv4/fib_semantics.c +++ b/net/ipv4/fib_semantics.c @@ -766,8 +766,8 @@ bool fib_metrics_match(struct fib_config *cfg, struct fib_info *fi) * | * |-> {local prefix} (terminal node) */ -static int fib_check_nh(struct fib_config *cfg, struct fib_info *fi, - struct fib_nh *nh, struct netlink_ext_ack *extack) +static int fib_check_nh(struct fib_config *cfg, struct fib_nh *nh, + struct netlink_ext_ack *extack) { int err = 0; struct net *net; @@ -1250,7 +1250,7 @@ struct fib_info *fib_create_info(struct fib_config *cfg, int linkdown = 0; change_nexthops(fi) { - err = fib_check_nh(cfg, fi, nexthop_nh, extack); + err = fib_check_nh(cfg, nexthop_nh, extack); if (err != 0) goto failure; if (nexthop_nh->nh_flags & RTNH_F_LINKDOWN) -- cgit v1.2.3 From 2b634bb0686e43a6338fe779fbabd72b6b928fdc Mon Sep 17 00:00:00 2001 From: Jacob Keller Date: Fri, 14 Jul 2017 09:10:14 -0400 Subject: i40e/i40evf: rename bytes_per_int to bytes_per_usec This value is not calculating bytes_per_int, which would actually just be bytes/ITR_COUNTDOWN_START, but rather it's calculating bytes/usecs. Rename the variable for clarity so that future developers understand what the value is actually calculating. Signed-off-by: Jacob Keller Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/i40e/i40e_txrx.c | 12 ++++++------ drivers/net/ethernet/intel/i40evf/i40e_txrx.c | 12 ++++++------ 2 files changed, 12 insertions(+), 12 deletions(-) diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.c b/drivers/net/ethernet/intel/i40e/i40e_txrx.c index f426762bd83a..d9fdf69bbc6e 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_txrx.c +++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.c @@ -960,14 +960,14 @@ static bool i40e_set_new_dynamic_itr(struct i40e_ring_container *rc) { enum i40e_latency_range new_latency_range = rc->latency_range; u32 new_itr = rc->itr; - int bytes_per_int; + int bytes_per_usec; unsigned int usecs, estimated_usecs; if (rc->total_packets == 0 || !rc->itr) return false; usecs = (rc->itr << 1) * ITR_COUNTDOWN_START; - bytes_per_int = rc->total_bytes / usecs; + bytes_per_usec = rc->total_bytes / usecs; /* The calculations in this algorithm depend on interrupts actually * firing at the ITR rate. This may not happen if the packet rate is @@ -993,18 +993,18 @@ static bool i40e_set_new_dynamic_itr(struct i40e_ring_container *rc) */ switch (new_latency_range) { case I40E_LOWEST_LATENCY: - if (bytes_per_int > 10) + if (bytes_per_usec > 10) new_latency_range = I40E_LOW_LATENCY; break; case I40E_LOW_LATENCY: - if (bytes_per_int > 20) + if (bytes_per_usec > 20) new_latency_range = I40E_BULK_LATENCY; - else if (bytes_per_int <= 10) + else if (bytes_per_usec <= 10) new_latency_range = I40E_LOWEST_LATENCY; break; case I40E_BULK_LATENCY: default: - if (bytes_per_int <= 20) + if (bytes_per_usec <= 20) new_latency_range = I40E_LOW_LATENCY; break; } diff --git a/drivers/net/ethernet/intel/i40evf/i40e_txrx.c b/drivers/net/ethernet/intel/i40evf/i40e_txrx.c index c32c62462c84..37e1de886d48 100644 --- a/drivers/net/ethernet/intel/i40evf/i40e_txrx.c +++ b/drivers/net/ethernet/intel/i40evf/i40e_txrx.c @@ -358,14 +358,14 @@ static bool i40e_set_new_dynamic_itr(struct i40e_ring_container *rc) { enum i40e_latency_range new_latency_range = rc->latency_range; u32 new_itr = rc->itr; - int bytes_per_int; + int bytes_per_usec; unsigned int usecs, estimated_usecs; if (rc->total_packets == 0 || !rc->itr) return false; usecs = (rc->itr << 1) * ITR_COUNTDOWN_START; - bytes_per_int = rc->total_bytes / usecs; + bytes_per_usec = rc->total_bytes / usecs; /* The calculations in this algorithm depend on interrupts actually * firing at the ITR rate. This may not happen if the packet rate is @@ -391,18 +391,18 @@ static bool i40e_set_new_dynamic_itr(struct i40e_ring_container *rc) */ switch (new_latency_range) { case I40E_LOWEST_LATENCY: - if (bytes_per_int > 10) + if (bytes_per_usec > 10) new_latency_range = I40E_LOW_LATENCY; break; case I40E_LOW_LATENCY: - if (bytes_per_int > 20) + if (bytes_per_usec > 20) new_latency_range = I40E_BULK_LATENCY; - else if (bytes_per_int <= 10) + else if (bytes_per_usec <= 10) new_latency_range = I40E_LOWEST_LATENCY; break; case I40E_BULK_LATENCY: default: - if (bytes_per_int <= 20) + if (bytes_per_usec <= 20) new_latency_range = I40E_LOW_LATENCY; break; } -- cgit v1.2.3 From 16badf758b25bd00528246ab9af938296b9d368d Mon Sep 17 00:00:00 2001 From: Sudheer Mogilappagari Date: Fri, 14 Jul 2017 09:10:15 -0400 Subject: i40e: Fix unqualified module message while bringing link up In current driver, when ifconfig ethx up is done, the link state doesn't transition to UP inside i40e_open(). It changes after AQ command response is handled in i40e_handle_link_event(). When pf->hw.phy.link_info.link_info is DOWN inside i40e_open(), The state is transient and invalid. So log message gets printed based on incorrect info (i.e link_info and an_info). This commit removes check for unqualified module inside i40e_up_complete(). The existing check in i40e_handle_link_event() logs the error message based on correct link state information. Signed-off-by: Sudheer Mogilappagari Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/i40e/i40e_main.c | 9 --------- 1 file changed, 9 deletions(-) diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c index 6498da8806cb..b235a27232a8 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_main.c +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c @@ -5470,15 +5470,6 @@ static int i40e_up_complete(struct i40e_vsi *vsi) i40e_print_link_message(vsi, true); netif_tx_start_all_queues(vsi->netdev); netif_carrier_on(vsi->netdev); - } else if (vsi->netdev) { - i40e_print_link_message(vsi, false); - /* need to check for qualified module here*/ - if ((pf->hw.phy.link_info.link_info & - I40E_AQ_MEDIA_AVAILABLE) && - (!(pf->hw.phy.link_info.an_info & - I40E_AQ_QUALIFIED_MODULE))) - netdev_err(vsi->netdev, - "the driver failed to link because an unqualified module was detected."); } /* replay FDIR SB filters */ -- cgit v1.2.3 From 9a03449d3ea0f6b497ff3a3bf6203a5e72c7e6be Mon Sep 17 00:00:00 2001 From: Sudheer Mogilappagari Date: Fri, 14 Jul 2017 09:10:16 -0400 Subject: i40e: Fix link down message when interface is brought up i40e_print_link_message() is intended to compare new link state with current link state and print log message only if the new state is different from current state. However in current driver the new state does not get updated when link is going down because of the if condition. When an interface is brought down, vsi->state is set to I40E_VSI_DOWN in i40e_vsi_close() and later i40e_print_link_message() does not get invoked in i40e_link_event due to if condition. Hence link down message doesn't appear when link is going down. The down state is seen later during i40e_open() and old state gets printed. The actual link state doesn't get updated in i40e_close() or i40e_open() but when i40e_handle_link_event is called inside i40e_clean_adminq_subtask. This change allows i40e_print_link_message() to be called when interface is going down and keeps the state information updated. Signed-off-by: Sudheer Mogilappagari Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/i40e/i40e_main.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c index b235a27232a8..2e8fe6186b38 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_main.c +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c @@ -6420,8 +6420,7 @@ static void i40e_link_event(struct i40e_pf *pf) new_link == netif_carrier_ok(vsi->netdev))) return; - if (!test_bit(__I40E_VSI_DOWN, vsi->state)) - i40e_print_link_message(vsi, new_link); + i40e_print_link_message(vsi, new_link); /* Notify the base of the switch tree connected to * the link. Floating VEBs are not notified. -- cgit v1.2.3 From 3fded4663b07f8fa99b9424ca3d5c46b79f6b27e Mon Sep 17 00:00:00 2001 From: Sudheer Mogilappagari Date: Fri, 14 Jul 2017 09:10:18 -0400 Subject: i40e: simplify member variable accesses This commit replaces usage of vsi->back in i40e_print_link_message() (which is actually a PF pointer) with temp variable. Signed-off-by: Sudheer Mogilappagari Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/i40e/i40e_main.c | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c index 2e8fe6186b38..3c650917b54f 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_main.c +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c @@ -5346,13 +5346,14 @@ out: void i40e_print_link_message(struct i40e_vsi *vsi, bool isup) { enum i40e_aq_link_speed new_speed; + struct i40e_pf *pf = vsi->back; char *speed = "Unknown"; char *fc = "Unknown"; char *fec = ""; char *req_fec = ""; char *an = ""; - new_speed = vsi->back->hw.phy.link_info.link_speed; + new_speed = pf->hw.phy.link_info.link_speed; if ((vsi->current_isup == isup) && (vsi->current_speed == new_speed)) return; @@ -5366,13 +5367,13 @@ void i40e_print_link_message(struct i40e_vsi *vsi, bool isup) /* Warn user if link speed on NPAR enabled partition is not at * least 10GB */ - if (vsi->back->hw.func_caps.npar_enable && - (vsi->back->hw.phy.link_info.link_speed == I40E_LINK_SPEED_1GB || - vsi->back->hw.phy.link_info.link_speed == I40E_LINK_SPEED_100MB)) + if (pf->hw.func_caps.npar_enable && + (pf->hw.phy.link_info.link_speed == I40E_LINK_SPEED_1GB || + pf->hw.phy.link_info.link_speed == I40E_LINK_SPEED_100MB)) netdev_warn(vsi->netdev, "The partition detected link speed that is less than 10Gbps\n"); - switch (vsi->back->hw.phy.link_info.link_speed) { + switch (pf->hw.phy.link_info.link_speed) { case I40E_LINK_SPEED_40GB: speed = "40 G"; break; @@ -5395,7 +5396,7 @@ void i40e_print_link_message(struct i40e_vsi *vsi, bool isup) break; } - switch (vsi->back->hw.fc.current_mode) { + switch (pf->hw.fc.current_mode) { case I40E_FC_FULL: fc = "RX/TX"; break; @@ -5410,18 +5411,18 @@ void i40e_print_link_message(struct i40e_vsi *vsi, bool isup) break; } - if (vsi->back->hw.phy.link_info.link_speed == I40E_LINK_SPEED_25GB) { + if (pf->hw.phy.link_info.link_speed == I40E_LINK_SPEED_25GB) { req_fec = ", Requested FEC: None"; fec = ", FEC: None"; an = ", Autoneg: False"; - if (vsi->back->hw.phy.link_info.an_info & I40E_AQ_AN_COMPLETED) + if (pf->hw.phy.link_info.an_info & I40E_AQ_AN_COMPLETED) an = ", Autoneg: True"; - if (vsi->back->hw.phy.link_info.fec_info & + if (pf->hw.phy.link_info.fec_info & I40E_AQ_CONFIG_FEC_KR_ENA) fec = ", FEC: CL74 FC-FEC/BASE-R"; - else if (vsi->back->hw.phy.link_info.fec_info & + else if (pf->hw.phy.link_info.fec_info & I40E_AQ_CONFIG_FEC_RS_ENA) fec = ", FEC: CL108 RS-FEC"; -- cgit v1.2.3 From e04ea00217904fc3f6fddac0b74e74e5ac488fda Mon Sep 17 00:00:00 2001 From: Mariusz Stachura Date: Fri, 14 Jul 2017 09:10:19 -0400 Subject: i40e: relax warning message in case of version mismatch Fortville and Fort Park devices are often on different firmware release schedules. This change relaxes the minor version warning message, so it is only displayed for older FW warning version for old firmware Fortville 3 or earlier. Signed-off-by: Mariusz Stachura Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/i40e/i40e_main.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c index 3c650917b54f..a887087d08cd 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_main.c +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c @@ -11374,8 +11374,7 @@ static int i40e_probe(struct pci_dev *pdev, const struct pci_device_id *ent) hw->aq.api_min_ver > I40E_FW_API_VERSION_MINOR) dev_info(&pdev->dev, "The driver for the device detected a newer version of the NVM image than expected. Please install the most recent version of the network driver.\n"); - else if (hw->aq.api_maj_ver < I40E_FW_API_VERSION_MAJOR || - hw->aq.api_min_ver < (I40E_FW_API_VERSION_MINOR - 1)) + else if (hw->aq.api_maj_ver == 1 && hw->aq.api_min_ver < 4) dev_info(&pdev->dev, "The driver for the device detected an older version of the NVM image than expected. Please update the NVM image.\n"); -- cgit v1.2.3 From 0dc8692e914ac49931d69b5217d5fe0171fc026e Mon Sep 17 00:00:00 2001 From: Mariusz Stachura Date: Fri, 14 Jul 2017 09:27:00 -0400 Subject: i40e: fix for flow director counters not wrapping as expected An errata with GLQF_PCNT causes it to not wrap as expected. This can cause an error in flow director statistics. This patch resets affected counters just after reading. Signed-off-by: Mariusz Stachura Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/i40e/i40e_main.c | 35 +++++++++++++++++++---------- 1 file changed, 23 insertions(+), 12 deletions(-) diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c index a887087d08cd..638f5bad0bd7 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_main.c +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c @@ -599,6 +599,20 @@ static void i40e_stat_update32(struct i40e_hw *hw, u32 reg, *stat = (u32)((new_data + BIT_ULL(32)) - *offset); } +/** + * i40e_stat_update_and_clear32 - read and clear hw reg, update a 32 bit stat + * @hw: ptr to the hardware info + * @reg: the hw reg to read and clear + * @stat: ptr to the stat + **/ +static void i40e_stat_update_and_clear32(struct i40e_hw *hw, u32 reg, u64 *stat) +{ + u32 new_data = rd32(hw, reg); + + wr32(hw, reg, 1); /* must write a nonzero value to clear register */ + *stat += new_data; +} + /** * i40e_update_eth_stats - Update VSI-specific ethernet statistics counters. * @vsi: the VSI to be updated @@ -1040,18 +1054,15 @@ static void i40e_update_pf_stats(struct i40e_pf *pf) &osd->rx_jabber, &nsd->rx_jabber); /* FDIR stats */ - i40e_stat_update32(hw, - I40E_GLQF_PCNT(I40E_FD_ATR_STAT_IDX(pf->hw.pf_id)), - pf->stat_offsets_loaded, - &osd->fd_atr_match, &nsd->fd_atr_match); - i40e_stat_update32(hw, - I40E_GLQF_PCNT(I40E_FD_SB_STAT_IDX(pf->hw.pf_id)), - pf->stat_offsets_loaded, - &osd->fd_sb_match, &nsd->fd_sb_match); - i40e_stat_update32(hw, - I40E_GLQF_PCNT(I40E_FD_ATR_TUNNEL_STAT_IDX(pf->hw.pf_id)), - pf->stat_offsets_loaded, - &osd->fd_atr_tunnel_match, &nsd->fd_atr_tunnel_match); + i40e_stat_update_and_clear32(hw, + I40E_GLQF_PCNT(I40E_FD_ATR_STAT_IDX(hw->pf_id)), + &nsd->fd_atr_match); + i40e_stat_update_and_clear32(hw, + I40E_GLQF_PCNT(I40E_FD_SB_STAT_IDX(hw->pf_id)), + &nsd->fd_sb_match); + i40e_stat_update_and_clear32(hw, + I40E_GLQF_PCNT(I40E_FD_ATR_TUNNEL_STAT_IDX(hw->pf_id)), + &nsd->fd_atr_tunnel_match); val = rd32(hw, I40E_PRTPM_EEE_STAT); nsd->tx_lpi_status = -- cgit v1.2.3 From 905770fa3e6f30b393829ba1c238554e7f238aee Mon Sep 17 00:00:00 2001 From: Mitch Williams Date: Fri, 14 Jul 2017 09:27:01 -0400 Subject: i40evf: lower message level We see this message regularly on VF reset or unload (which invokes a reset). It's essentially meaningless unless it's happening constantly. To prevent consternation, lower the log level to debug so it's not seen under normal circumstance. Signed-off-by: Mitch Williams Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/i40evf/i40evf_virtchnl.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/intel/i40evf/i40evf_virtchnl.c b/drivers/net/ethernet/intel/i40evf/i40evf_virtchnl.c index 85876f4fb1fb..2bb0fe00361f 100644 --- a/drivers/net/ethernet/intel/i40evf/i40evf_virtchnl.c +++ b/drivers/net/ethernet/intel/i40evf/i40evf_virtchnl.c @@ -52,7 +52,7 @@ static int i40evf_send_pf_msg(struct i40evf_adapter *adapter, err = i40e_aq_send_msg_to_pf(hw, op, 0, msg, len, NULL); if (err) - dev_err(&adapter->pdev->dev, "Unable to send opcode %d to PF, err %s, aq_err %s\n", + dev_dbg(&adapter->pdev->dev, "Unable to send opcode %d to PF, err %s, aq_err %s\n", op, i40evf_stat_str(hw, err), i40evf_aq_str(hw, hw->aq.asq_last_status)); return err; -- cgit v1.2.3 From c17401a1dd210a5f22ab1ec7c7366037c158a14c Mon Sep 17 00:00:00 2001 From: Jacob Keller Date: Fri, 14 Jul 2017 09:27:02 -0400 Subject: i40e: use separate state bit for miscellaneous IRQ setup We currently (mis)use the __I40E_RECOVERY_PENDING bit to determine when we should actually request a new IRQ in i40e_setup_misc_vector(). This led to a design mistake where we open-coded the re-setup of the miscellaneous vector in i40e_resume() instead of using the function provided. If we did not open-code this and instead tried to use the i40e_setup_misc_vector() function, it would lead to never reallocating the IRQ. This would lead to a second i40e_suspend() call failing to free the vector due to a NULL pointer dereference. A future patch is going to re-work how the i40e_suspend() and i40e_resume() flows work to clear all IRQ vectors, which would require us to use i40e_setup_misc_vector() directly. Since during this time the __I40E_RECOVERY_PENDING bit is set, we'll never re-allocate the vector. Rather than leaving the open-coded setup in i40e_resume() lets just fix the problem properly in i40e_setup_misc_vector(). Introduce a new state bit which indicates when the IRQ has been assigned, which will be set when i40e_setup_misc_vector is first called. This ultimately resolves the issue of re-requesting the vector, without overloading the __I40E_RECOVERY_PENDING state. This ensures that the suspend/resume cycle can use the setup function instead of open-coding the re-request during resume. Additionally, since the only callers of i40e_stop_misc_vector also want to free it, move this code directly into the function to avoid duplication. Due to the new functionality, rename it to i40e_free_misc_vector(). This lets us drop the extra calls to free and re-enable the vector during i40e_suspend() and i40e_resume(). We don't need to call i40e_setup_misc_Vector() in i40e_resume() because it gets called by the i40e_rebuild() call. Signed-off-by: Jacob Keller Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/i40e/i40e.h | 1 + drivers/net/ethernet/intel/i40e/i40e_main.c | 39 +++++++++++------------------ 2 files changed, 15 insertions(+), 25 deletions(-) diff --git a/drivers/net/ethernet/intel/i40e/i40e.h b/drivers/net/ethernet/intel/i40e/i40e.h index d0c1bf5441d8..b7a539cdca00 100644 --- a/drivers/net/ethernet/intel/i40e/i40e.h +++ b/drivers/net/ethernet/intel/i40e/i40e.h @@ -136,6 +136,7 @@ enum i40e_state_t { __I40E_MDD_EVENT_PENDING, __I40E_VFLR_EVENT_PENDING, __I40E_RESET_RECOVERY_PENDING, + __I40E_MISC_IRQ_REQUESTED, __I40E_RESET_INTR_RECEIVED, __I40E_REINIT_REQUESTED, __I40E_PF_RESET_REQUESTED, diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c index 638f5bad0bd7..3ea4f8b942c3 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_main.c +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c @@ -3604,14 +3604,20 @@ static int i40e_vsi_enable_irq(struct i40e_vsi *vsi) } /** - * i40e_stop_misc_vector - Stop the vector that handles non-queue events + * i40e_free_misc_vector - Free the vector that handles non-queue events * @pf: board private structure **/ -static void i40e_stop_misc_vector(struct i40e_pf *pf) +static void i40e_free_misc_vector(struct i40e_pf *pf) { /* Disable ICR 0 */ wr32(&pf->hw, I40E_PFINT_ICR0_ENA, 0); i40e_flush(&pf->hw); + + if (pf->flags & I40E_FLAG_MSIX_ENABLED && pf->msix_entries) { + synchronize_irq(pf->msix_entries[0].vector); + free_irq(pf->msix_entries[0].vector, pf); + clear_bit(__I40E_MISC_IRQ_REQUESTED, pf->state); + } } /** @@ -4466,11 +4472,7 @@ static void i40e_clear_interrupt_scheme(struct i40e_pf *pf) { int i; - i40e_stop_misc_vector(pf); - if (pf->flags & I40E_FLAG_MSIX_ENABLED && pf->msix_entries) { - synchronize_irq(pf->msix_entries[0].vector); - free_irq(pf->msix_entries[0].vector, pf); - } + i40e_free_misc_vector(pf); i40e_put_lump(pf->irq_pile, pf->iwarp_base_vector, I40E_IWARP_IRQ_PILE_ID); @@ -8365,13 +8367,12 @@ static int i40e_setup_misc_vector(struct i40e_pf *pf) struct i40e_hw *hw = &pf->hw; int err = 0; - /* Only request the irq if this is the first time through, and - * not when we're rebuilding after a Reset - */ - if (!test_bit(__I40E_RESET_RECOVERY_PENDING, pf->state)) { + /* Only request the IRQ once, the first time through. */ + if (!test_and_set_bit(__I40E_MISC_IRQ_REQUESTED, pf->state)) { err = request_irq(pf->msix_entries[0].vector, i40e_intr, 0, pf->int_name, pf); if (err) { + clear_bit(__I40E_MISC_IRQ_REQUESTED, pf->state); dev_info(&pf->pdev->dev, "request_irq for %s failed: %d\n", pf->int_name, err); @@ -12069,11 +12070,8 @@ static int i40e_suspend(struct pci_dev *pdev, pm_message_t state) wr32(hw, I40E_PFPM_APM, (pf->wol_en ? I40E_PFPM_APM_APME_MASK : 0)); wr32(hw, I40E_PFPM_WUFC, (pf->wol_en ? I40E_PFPM_WUFC_MAG_MASK : 0)); - i40e_stop_misc_vector(pf); - if (pf->msix_entries) { - synchronize_irq(pf->msix_entries[0].vector); - free_irq(pf->msix_entries[0].vector, pf); - } + i40e_free_misc_vector(pf); + retval = pci_save_state(pdev); if (retval) return retval; @@ -12113,15 +12111,6 @@ static int i40e_resume(struct pci_dev *pdev) /* handling the reset will rebuild the device state */ if (test_and_clear_bit(__I40E_SUSPENDED, pf->state)) { clear_bit(__I40E_DOWN, pf->state); - if (pf->msix_entries) { - err = request_irq(pf->msix_entries[0].vector, - i40e_intr, 0, pf->int_name, pf); - if (err) { - dev_err(&pf->pdev->dev, - "request_irq for %s failed: %d\n", - pf->int_name, err); - } - } i40e_reset_and_rebuild(pf, false, false); } -- cgit v1.2.3 From 0e5d3da400558b7d30586a2cc1afe02276445636 Mon Sep 17 00:00:00 2001 From: Jacob Keller Date: Fri, 14 Jul 2017 09:27:03 -0400 Subject: i40e: use newer generic PM support instead of legacy PM callbacks Stop using the old legacy PM support, since we now have stable support for the newer generic PM callbacks. This has several advantages. First, we no longer have to manage our own pci_save_state() and power changes, as it's preferred to have the PCI stack do this. Second, these routines get called for both hibernate and suspend to ram, so we can have the driver properly handle all the suspend/resume flows that it needs to. Signed-off-by: Jacob Keller Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/i40e/i40e_main.c | 54 +++++++++-------------------- 1 file changed, 17 insertions(+), 37 deletions(-) diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c index 3ea4f8b942c3..c82360437024 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_main.c +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c @@ -12050,14 +12050,14 @@ static void i40e_shutdown(struct pci_dev *pdev) #ifdef CONFIG_PM /** - * i40e_suspend - PCI callback for moving to D3 - * @pdev: PCI device information struct + * i40e_suspend - PM callback for moving to D3 + * @dev: generic device information structure **/ -static int i40e_suspend(struct pci_dev *pdev, pm_message_t state) +static int i40e_suspend(struct device *dev) { + struct pci_dev *pdev = to_pci_dev(dev); struct i40e_pf *pf = pci_get_drvdata(pdev); struct i40e_hw *hw = &pf->hw; - int retval = 0; set_bit(__I40E_SUSPENDED, pf->state); set_bit(__I40E_DOWN, pf->state); @@ -12072,41 +12072,17 @@ static int i40e_suspend(struct pci_dev *pdev, pm_message_t state) i40e_free_misc_vector(pf); - retval = pci_save_state(pdev); - if (retval) - return retval; - - pci_wake_from_d3(pdev, pf->wol_en); - pci_set_power_state(pdev, PCI_D3hot); - - return retval; + return 0; } /** - * i40e_resume - PCI callback for waking up from D3 - * @pdev: PCI device information struct + * i40e_resume - PM callback for waking up from D3 + * @dev: generic device information structure **/ -static int i40e_resume(struct pci_dev *pdev) +static int i40e_resume(struct device *dev) { + struct pci_dev *pdev = to_pci_dev(dev); struct i40e_pf *pf = pci_get_drvdata(pdev); - u32 err; - - pci_set_power_state(pdev, PCI_D0); - pci_restore_state(pdev); - /* pci_restore_state() clears dev->state_saves, so - * call pci_save_state() again to restore it. - */ - pci_save_state(pdev); - - err = pci_enable_device_mem(pdev); - if (err) { - dev_err(&pdev->dev, "Cannot enable PCI device from suspend\n"); - return err; - } - pci_set_master(pdev); - - /* no wakeup events while running */ - pci_wake_from_d3(pdev, false); /* handling the reset will rebuild the device state */ if (test_and_clear_bit(__I40E_SUSPENDED, pf->state)) { @@ -12117,22 +12093,26 @@ static int i40e_resume(struct pci_dev *pdev) return 0; } -#endif +#endif /* CONFIG_PM */ + static const struct pci_error_handlers i40e_err_handler = { .error_detected = i40e_pci_error_detected, .slot_reset = i40e_pci_error_slot_reset, .resume = i40e_pci_error_resume, }; +static SIMPLE_DEV_PM_OPS(i40e_pm_ops, i40e_suspend, i40e_resume); + static struct pci_driver i40e_driver = { .name = i40e_driver_name, .id_table = i40e_pci_tbl, .probe = i40e_probe, .remove = i40e_remove, #ifdef CONFIG_PM - .suspend = i40e_suspend, - .resume = i40e_resume, -#endif + .driver = { + .pm = &i40e_pm_ops, + }, +#endif /* CONFIG_PM */ .shutdown = i40e_shutdown, .err_handler = &i40e_err_handler, .sriov_configure = i40e_pci_sriov_configure, -- cgit v1.2.3 From 401586c2b9bb16147f3dcc64d3596013625e2c44 Mon Sep 17 00:00:00 2001 From: Jacob Keller Date: Fri, 14 Jul 2017 09:27:04 -0400 Subject: i40e: don't clear suspended state until we finish resuming When handling suspend and resume callbacks we want to make sure that (a) we don't suspend again if we're already suspended and (b) we don't resume again if we're already resuming. Lets make sure we test_and_set the __I40E_SUSPENDED bit in i40e_suspend which ensures that a suspend call when already suspended will exit early. Additionally, if __I40E_SUSPENDED is not set when we begin resuming, exit early as well. Signed-off-by: Jacob Keller Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/i40e/i40e_main.c | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c index c82360437024..494cafde6b26 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_main.c +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c @@ -12059,7 +12059,10 @@ static int i40e_suspend(struct device *dev) struct i40e_pf *pf = pci_get_drvdata(pdev); struct i40e_hw *hw = &pf->hw; - set_bit(__I40E_SUSPENDED, pf->state); + /* If we're already suspended, then there is nothing to do */ + if (test_and_set_bit(__I40E_SUSPENDED, pf->state)) + return 0; + set_bit(__I40E_DOWN, pf->state); if (pf->wol_en && (pf->hw_features & I40E_HW_WOL_MC_MAGIC_PKT_WAKE)) @@ -12084,11 +12087,15 @@ static int i40e_resume(struct device *dev) struct pci_dev *pdev = to_pci_dev(dev); struct i40e_pf *pf = pci_get_drvdata(pdev); - /* handling the reset will rebuild the device state */ - if (test_and_clear_bit(__I40E_SUSPENDED, pf->state)) { - clear_bit(__I40E_DOWN, pf->state); - i40e_reset_and_rebuild(pf, false, false); - } + /* If we're not suspended, then there is nothing to do */ + if (!test_bit(__I40E_SUSPENDED, pf->state)) + return 0; + + clear_bit(__I40E_DOWN, pf->state); + i40e_reset_and_rebuild(pf, false, false); + + /* Clear suspended state last after everything is recovered */ + clear_bit(__I40E_SUSPENDED, pf->state); return 0; } -- cgit v1.2.3 From 5c499228803a77bd4e878c7119fbd40a1dc6d773 Mon Sep 17 00:00:00 2001 From: Jacob Keller Date: Fri, 14 Jul 2017 09:27:05 -0400 Subject: i40e: prevent service task from running while we're suspended Although the service task does check the suspended status before running, it might already be part way through running when we go to suspend. Lets ensure that the service task is stopped and will not be restarted again until we finish resuming. This ensures that service task code does not cause strange interactions with the suspend/resume handlers. Signed-off-by: Jacob Keller Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/i40e/i40e_main.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c index 494cafde6b26..368373459ad5 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_main.c +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c @@ -12065,6 +12065,10 @@ static int i40e_suspend(struct device *dev) set_bit(__I40E_DOWN, pf->state); + /* Ensure service task will not be running */ + del_timer_sync(&pf->service_timer); + cancel_work_sync(&pf->service_task); + if (pf->wol_en && (pf->hw_features & I40E_HW_WOL_MC_MAGIC_PKT_WAKE)) i40e_enable_mc_magic_wake(pf); @@ -12097,6 +12101,10 @@ static int i40e_resume(struct device *dev) /* Clear suspended state last after everything is recovered */ clear_bit(__I40E_SUSPENDED, pf->state); + /* Restart the service task */ + mod_timer(&pf->service_timer, + round_jiffies(jiffies + pf->service_timer_period)); + return 0; } -- cgit v1.2.3 From b980c0634fe56928a45cc3c0f688d96e36705403 Mon Sep 17 00:00:00 2001 From: Jacob Keller Date: Fri, 14 Jul 2017 09:27:06 -0400 Subject: i40e: shutdown all IRQs and disable MSI-X when suspended On some platforms with a large number of CPUs, we will allocate many IRQ vectors. When hibernating, the system will attempt to migrate all of the vectors back to CPU0 when shutting down all the other CPUs. It is possible that we have so many vectors that it cannot re-assign them to CPU0. This is even more likely if we have many devices installed in one platform. The end result is failure to hibernate, as it is not possible to shutdown the CPUs. We can avoid this by disabling MSI-X and clearing our interrupt scheme when the device is suspended. A more ideal solution would be some method for the stack to properly handle this for all drivers, rather than on a case-by-case basis for each driver to fix itself. However, until this more ideal solution exists, we can do our part and shutdown our IRQs during suspend, which should allow systems with a large number of CPUs to safely suspend or hibernate. It may be worth investigating if we should shut down even further when we suspend as it may make the path cleaner, but this was the minimum fix for the hibernation issue mentioned here. Testing-hints: This affects systems with a large number of CPUs, and with multiple devices enabled. Without this change, those platforms are unable to hibernate at all. Signed-off-by: Jacob Keller Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/i40e/i40e_main.c | 68 ++++++++++++++++++++++++- drivers/net/ethernet/intel/i40evf/i40evf_main.c | 2 +- 2 files changed, 68 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c index 368373459ad5..8a44793d5390 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_main.c +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c @@ -8354,6 +8354,57 @@ static int i40e_init_interrupt_scheme(struct i40e_pf *pf) return 0; } +#ifdef CONFIG_PM +/** + * i40e_restore_interrupt_scheme - Restore the interrupt scheme + * @pf: private board data structure + * + * Restore the interrupt scheme that was cleared when we suspended the + * device. This should be called during resume to re-allocate the q_vectors + * and reacquire IRQs. + */ +static int i40e_restore_interrupt_scheme(struct i40e_pf *pf) +{ + int err, i; + + /* We cleared the MSI and MSI-X flags when disabling the old interrupt + * scheme. We need to re-enabled them here in order to attempt to + * re-acquire the MSI or MSI-X vectors + */ + pf->flags |= (I40E_FLAG_MSIX_ENABLED | I40E_FLAG_MSI_ENABLED); + + err = i40e_init_interrupt_scheme(pf); + if (err) + return err; + + /* Now that we've re-acquired IRQs, we need to remap the vectors and + * rings together again. + */ + for (i = 0; i < pf->num_alloc_vsi; i++) { + if (pf->vsi[i]) { + err = i40e_vsi_alloc_q_vectors(pf->vsi[i]); + if (err) + goto err_unwind; + i40e_vsi_map_rings_to_vectors(pf->vsi[i]); + } + } + + err = i40e_setup_misc_vector(pf); + if (err) + goto err_unwind; + + return 0; + +err_unwind: + while (i--) { + if (pf->vsi[i]) + i40e_vsi_free_q_vectors(pf->vsi[i]); + } + + return err; +} +#endif /* CONFIG_PM */ + /** * i40e_setup_misc_vector - Setup the misc vector to handle non queue events * @pf: board private structure @@ -12077,7 +12128,12 @@ static int i40e_suspend(struct device *dev) wr32(hw, I40E_PFPM_APM, (pf->wol_en ? I40E_PFPM_APM_APME_MASK : 0)); wr32(hw, I40E_PFPM_WUFC, (pf->wol_en ? I40E_PFPM_WUFC_MAG_MASK : 0)); - i40e_free_misc_vector(pf); + /* Clear the interrupt scheme and release our IRQs so that the system + * can safely hibernate even when there are a large number of CPUs. + * Otherwise hibernation might fail when mapping all the vectors back + * to CPU0. + */ + i40e_clear_interrupt_scheme(pf); return 0; } @@ -12090,11 +12146,21 @@ static int i40e_resume(struct device *dev) { struct pci_dev *pdev = to_pci_dev(dev); struct i40e_pf *pf = pci_get_drvdata(pdev); + int err; /* If we're not suspended, then there is nothing to do */ if (!test_bit(__I40E_SUSPENDED, pf->state)) return 0; + /* We cleared the interrupt scheme when we suspended, so we need to + * restore it now to resume device functionality. + */ + err = i40e_restore_interrupt_scheme(pf); + if (err) { + dev_err(&pdev->dev, "Cannot restore interrupt scheme: %d\n", + err); + } + clear_bit(__I40E_DOWN, pf->state); i40e_reset_and_rebuild(pf, false, false); diff --git a/drivers/net/ethernet/intel/i40evf/i40evf_main.c b/drivers/net/ethernet/intel/i40evf/i40evf_main.c index c243f9da95ae..80ade6510279 100644 --- a/drivers/net/ethernet/intel/i40evf/i40evf_main.c +++ b/drivers/net/ethernet/intel/i40evf/i40evf_main.c @@ -46,7 +46,7 @@ static const char i40evf_driver_string[] = #define DRV_VERSION_MAJOR 3 #define DRV_VERSION_MINOR 0 -#define DRV_VERSION_BUILD 0 +#define DRV_VERSION_BUILD 1 #define DRV_VERSION __stringify(DRV_VERSION_MAJOR) "." \ __stringify(DRV_VERSION_MINOR) "." \ __stringify(DRV_VERSION_BUILD) \ -- cgit v1.2.3 From c97fc9b6a798f4253c176231ba0aceda6b59b058 Mon Sep 17 00:00:00 2001 From: Alan Brady Date: Fri, 14 Jul 2017 09:27:07 -0400 Subject: i40evf: fix ring to vector mapping The current implementation for mapping queues to vectors is broken because it attempts to map each Tx and Rx ring to its own vector, however we use combined queues so we should actually be mapping the Tx/Rx rings together on one vector. Also in the current implementation, in the case where we have more queues than vectors, we attempt to group the queues together into 'chunks' and map each 'chunk' of queues to a vector. Chunking them together would be more ideal if, and only if, we only had RSS because of the way the hashing algorithm works but in the case of a future patch that enables VF ADq, round robin assignment is better and still works with RSS. This patch resolves both those issues and simplifies the code needed to accomplish this. Instead of treating the case where we have more queues than vectors as special, if we notice our vector index is greater than vectors, reset the vector index to zero and continue mapping. This should ensure that in both cases, whether we have enough vectors for each queue or not, the queues get appropriately mapped. Signed-off-by: Alan Brady Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/i40evf/i40evf_main.c | 48 ++++++------------------- 1 file changed, 10 insertions(+), 38 deletions(-) diff --git a/drivers/net/ethernet/intel/i40evf/i40evf_main.c b/drivers/net/ethernet/intel/i40evf/i40evf_main.c index 80ade6510279..69ef6c1d5364 100644 --- a/drivers/net/ethernet/intel/i40evf/i40evf_main.c +++ b/drivers/net/ethernet/intel/i40evf/i40evf_main.c @@ -432,52 +432,24 @@ i40evf_map_vector_to_txq(struct i40evf_adapter *adapter, int v_idx, int t_idx) **/ static int i40evf_map_rings_to_vectors(struct i40evf_adapter *adapter) { + int rings_remaining = adapter->num_active_queues; + int ridx = 0, vidx = 0; int q_vectors; - int v_start = 0; - int rxr_idx = 0, txr_idx = 0; - int rxr_remaining = adapter->num_active_queues; - int txr_remaining = adapter->num_active_queues; - int i, j; - int rqpv, tqpv; int err = 0; q_vectors = adapter->num_msix_vectors - NONQ_VECS; - /* The ideal configuration... - * We have enough vectors to map one per queue. - */ - if (q_vectors >= (rxr_remaining * 2)) { - for (; rxr_idx < rxr_remaining; v_start++, rxr_idx++) - i40evf_map_vector_to_rxq(adapter, v_start, rxr_idx); + for (; ridx < rings_remaining; ridx++) { + i40evf_map_vector_to_rxq(adapter, vidx, ridx); + i40evf_map_vector_to_txq(adapter, vidx, ridx); - for (; txr_idx < txr_remaining; v_start++, txr_idx++) - i40evf_map_vector_to_txq(adapter, v_start, txr_idx); - goto out; - } - - /* If we don't have enough vectors for a 1-to-1 - * mapping, we'll have to group them so there are - * multiple queues per vector. - * Re-adjusting *qpv takes care of the remainder. - */ - for (i = v_start; i < q_vectors; i++) { - rqpv = DIV_ROUND_UP(rxr_remaining, q_vectors - i); - for (j = 0; j < rqpv; j++) { - i40evf_map_vector_to_rxq(adapter, i, rxr_idx); - rxr_idx++; - rxr_remaining--; - } - } - for (i = v_start; i < q_vectors; i++) { - tqpv = DIV_ROUND_UP(txr_remaining, q_vectors - i); - for (j = 0; j < tqpv; j++) { - i40evf_map_vector_to_txq(adapter, i, txr_idx); - txr_idx++; - txr_remaining--; - } + /* In the case where we have more queues than vectors, continue + * round-robin on vectors until all queues are mapped. + */ + if (++vidx >= q_vectors) + vidx = 0; } -out: adapter->aq_required |= I40EVF_FLAG_AQ_MAP_VECTORS; return err; -- cgit v1.2.3 From a3f5aa907340b5d7b54223ddbaa90410f168864d Mon Sep 17 00:00:00 2001 From: Alan Brady Date: Fri, 14 Jul 2017 09:27:08 -0400 Subject: i40e: Enable VF to negotiate number of allocated queues Currently the PF allocates a default number of queues for each VF and cannot be changed. This patch enables the VF to request a different number of queues allocated to it. This patch also adds a new virtchnl op and capability flag to facilitate this negotiation. After the PF receives a request message, it will set a requested number of queues for that VF. Then when the VF resets, its VSI will get a new number of queues allocated to it. This is a best effort request and since we only allocate a guaranteed default number, if the VF tries to ask for more than the guaranteed number, there may not be enough in HW to accommodate it unless other queues for other VFs are freed. It should also be noted decreasing the number queues allocated to a VF to below the default will NOT enable the allocation of more than 32 VFs per PF and will not free queues guaranteed to each VF by default. Signed-off-by: Alan Brady Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/i40e/i40e.h | 1 + drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c | 75 ++++++++++++++++++++++ drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.h | 1 + include/linux/avf/virtchnl.h | 20 ++++++ 4 files changed, 97 insertions(+) diff --git a/drivers/net/ethernet/intel/i40e/i40e.h b/drivers/net/ethernet/intel/i40e/i40e.h index b7a539cdca00..439c63cb2a0c 100644 --- a/drivers/net/ethernet/intel/i40e/i40e.h +++ b/drivers/net/ethernet/intel/i40e/i40e.h @@ -77,6 +77,7 @@ #define i40e_default_queues_per_vmdq(pf) \ (((pf)->hw_features & I40E_HW_RSS_AQ_CAPABLE) ? 4 : 1) #define I40E_DEFAULT_QUEUES_PER_VF 4 +#define I40E_MAX_VF_QUEUES 16 #define I40E_DEFAULT_QUEUES_PER_TC 1 /* should be a power of 2 */ #define i40e_pf_get_max_q_per_tc(pf) \ (((pf)->hw_features & I40E_HW_128_QP_RSS_CAPABLE) ? 128 : 64) diff --git a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c index 4d1e670f490e..a75396c157d9 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c +++ b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c @@ -815,6 +815,14 @@ static void i40e_free_vf_res(struct i40e_vf *vf) */ clear_bit(I40E_VF_STATE_INIT, &vf->vf_states); + /* It's possible the VF had requeuested more queues than the default so + * do the accounting here when we're about to free them. + */ + if (vf->num_queue_pairs > I40E_DEFAULT_QUEUES_PER_VF) { + pf->queues_left += vf->num_queue_pairs - + I40E_DEFAULT_QUEUES_PER_VF; + } + /* free vsi & disconnect it from the parent uplink */ if (vf->lan_vsi_idx) { i40e_vsi_release(pf->vsi[vf->lan_vsi_idx]); @@ -868,12 +876,27 @@ static int i40e_alloc_vf_res(struct i40e_vf *vf) int total_queue_pairs = 0; int ret; + if (vf->num_req_queues && + vf->num_req_queues <= pf->queues_left + I40E_DEFAULT_QUEUES_PER_VF) + pf->num_vf_qps = vf->num_req_queues; + else + pf->num_vf_qps = I40E_DEFAULT_QUEUES_PER_VF; + /* allocate hw vsi context & associated resources */ ret = i40e_alloc_vsi_res(vf, I40E_VSI_SRIOV); if (ret) goto error_alloc; total_queue_pairs += pf->vsi[vf->lan_vsi_idx]->alloc_queue_pairs; + /* We account for each VF to get a default number of queue pairs. If + * the VF has now requested more, we need to account for that to make + * certain we never request more queues than we actually have left in + * HW. + */ + if (total_queue_pairs > I40E_DEFAULT_QUEUES_PER_VF) + pf->queues_left -= + total_queue_pairs - I40E_DEFAULT_QUEUES_PER_VF; + if (vf->trusted) set_bit(I40E_VIRTCHNL_VF_CAP_PRIVILEGE, &vf->vf_caps); else @@ -1579,6 +1602,9 @@ static int i40e_vc_get_vf_resources_msg(struct i40e_vf *vf, u8 *msg) VIRTCHNL_VF_OFFLOAD_WB_ON_ITR; } + if (vf->driver_caps & VIRTCHNL_VF_OFFLOAD_REQ_QUEUES) + vfres->vf_cap_flags |= VIRTCHNL_VF_OFFLOAD_REQ_QUEUES; + vfres->num_vsis = num_vsis; vfres->num_queue_pairs = vf->num_queue_pairs; vfres->max_vectors = pf->hw.func_caps.num_msix_vectors_vf; @@ -1986,6 +2012,52 @@ error_param: aq_ret); } +/** + * i40e_vc_request_queues_msg + * @vf: pointer to the VF info + * @msg: pointer to the msg buffer + * @msglen: msg length + * + * VFs get a default number of queues but can use this message to request a + * different number. Will respond with either the number requested or the + * maximum we can support. + **/ +static int i40e_vc_request_queues_msg(struct i40e_vf *vf, u8 *msg, int msglen) +{ + struct virtchnl_vf_res_request *vfres = + (struct virtchnl_vf_res_request *)msg; + int req_pairs = vfres->num_queue_pairs; + int cur_pairs = vf->num_queue_pairs; + struct i40e_pf *pf = vf->pf; + + if (!test_bit(I40E_VF_STATE_ACTIVE, &vf->vf_states)) + return -EINVAL; + + if (req_pairs <= 0) { + dev_err(&pf->pdev->dev, + "VF %d tried to request %d queues. Ignoring.\n", + vf->vf_id, req_pairs); + } else if (req_pairs > I40E_MAX_VF_QUEUES) { + dev_err(&pf->pdev->dev, + "VF %d tried to request more than %d queues.\n", + vf->vf_id, + I40E_MAX_VF_QUEUES); + vfres->num_queue_pairs = I40E_MAX_VF_QUEUES; + } else if (req_pairs - cur_pairs > pf->queues_left) { + dev_warn(&pf->pdev->dev, + "VF %d requested %d more queues, but only %d left.\n", + vf->vf_id, + req_pairs - cur_pairs, + pf->queues_left); + vfres->num_queue_pairs = pf->queues_left + cur_pairs; + } else { + vf->num_req_queues = req_pairs; + } + + return i40e_vc_send_msg_to_vf(vf, VIRTCHNL_OP_REQUEST_QUEUES, 0, + (u8 *)vfres, sizeof(vfres)); +} + /** * i40e_vc_get_stats_msg * @vf: pointer to the VF info @@ -2708,6 +2780,9 @@ int i40e_vc_process_vf_msg(struct i40e_pf *pf, s16 vf_id, u32 v_opcode, case VIRTCHNL_OP_DISABLE_VLAN_STRIPPING: ret = i40e_vc_disable_vlan_stripping(vf, msg, msglen); break; + case VIRTCHNL_OP_REQUEST_QUEUES: + ret = i40e_vc_request_queues_msg(vf, msg, msglen); + break; case VIRTCHNL_OP_UNKNOWN: default: diff --git a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.h b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.h index 1f4b0c504368..5111d05d5f2f 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.h +++ b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.h @@ -97,6 +97,7 @@ struct i40e_vf { u16 lan_vsi_id; /* ID as used by firmware */ u8 num_queue_pairs; /* num of qps assigned to VF vsis */ + u8 num_req_queues; /* num of requested qps */ u64 num_mdd_events; /* num of mdd events detected */ /* num of continuous malformed or invalid msgs detected */ u64 num_invalid_msgs; diff --git a/include/linux/avf/virtchnl.h b/include/linux/avf/virtchnl.h index 2b038442c352..60e5d90cb18a 100644 --- a/include/linux/avf/virtchnl.h +++ b/include/linux/avf/virtchnl.h @@ -135,6 +135,7 @@ enum virtchnl_ops { VIRTCHNL_OP_SET_RSS_HENA = 26, VIRTCHNL_OP_ENABLE_VLAN_STRIPPING = 27, VIRTCHNL_OP_DISABLE_VLAN_STRIPPING = 28, + VIRTCHNL_OP_REQUEST_QUEUES = 29, }; /* This macro is used to generate a compilation error if a structure @@ -235,6 +236,7 @@ VIRTCHNL_CHECK_STRUCT_LEN(16, virtchnl_vsi_resource); #define VIRTCHNL_VF_OFFLOAD_RSS_AQ 0x00000008 #define VIRTCHNL_VF_OFFLOAD_RSS_REG 0x00000010 #define VIRTCHNL_VF_OFFLOAD_WB_ON_ITR 0x00000020 +#define VIRTCHNL_VF_OFFLOAD_REQ_QUEUES 0x00000040 #define VIRTCHNL_VF_OFFLOAD_VLAN 0x00010000 #define VIRTCHNL_VF_OFFLOAD_RX_POLLING 0x00020000 #define VIRTCHNL_VF_OFFLOAD_RSS_PCTYPE_V2 0x00040000 @@ -325,6 +327,21 @@ struct virtchnl_vsi_queue_config_info { struct virtchnl_queue_pair_info qpair[1]; }; +/* VIRTCHNL_OP_REQUEST_QUEUES + * VF sends this message to request the PF to allocate additional queues to + * this VF. Each VF gets a guaranteed number of queues on init but asking for + * additional queues must be negotiated. This is a best effort request as it + * is possible the PF does not have enough queues left to support the request. + * If the PF cannot support the number requested it will respond with the + * maximum number it is able to support; otherwise it will respond with the + * number requested. + */ + +/* VF resource request */ +struct virtchnl_vf_res_request { + u16 num_queue_pairs; +}; + VIRTCHNL_CHECK_STRUCT_LEN(72, virtchnl_vsi_queue_config_info); /* VIRTCHNL_OP_CONFIG_IRQ_MAP @@ -691,6 +708,9 @@ virtchnl_vc_validate_vf_msg(struct virtchnl_version_info *ver, u32 v_opcode, case VIRTCHNL_OP_ENABLE_VLAN_STRIPPING: case VIRTCHNL_OP_DISABLE_VLAN_STRIPPING: break; + case VIRTCHNL_OP_REQUEST_QUEUES: + valid_len = sizeof(struct virtchnl_vf_res_request); + break; /* These are always errors coming from the VF. */ case VIRTCHNL_OP_EVENT: case VIRTCHNL_OP_UNKNOWN: -- cgit v1.2.3 From 22b96551f213d7e7d743442c923c266a10306b9b Mon Sep 17 00:00:00 2001 From: Mitch Williams Date: Fri, 14 Jul 2017 09:27:09 -0400 Subject: i40e: refactor FW version checking The i40e driver now supports two different devices with two different firmware versions. So be smart about how we handle these. Move the FW version macros to the appropriate header file, and add a convenience macro that checks the version based on the device. Then use this macro to check whether or not the driver can use the new link info API. Signed-off-by: Mitch Williams Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/i40e/i40e_adminq_cmd.h | 10 +++++++++- drivers/net/ethernet/intel/i40e/i40e_common.c | 6 ++++-- drivers/net/ethernet/intel/i40e/i40e_main.c | 2 +- drivers/net/ethernet/intel/i40evf/i40e_adminq_cmd.h | 10 +++++++++- 4 files changed, 23 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/intel/i40e/i40e_adminq_cmd.h b/drivers/net/ethernet/intel/i40e/i40e_adminq_cmd.h index 5d5f422cbae5..e2a9ec80a623 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_adminq_cmd.h +++ b/drivers/net/ethernet/intel/i40e/i40e_adminq_cmd.h @@ -34,7 +34,15 @@ */ #define I40E_FW_API_VERSION_MAJOR 0x0001 -#define I40E_FW_API_VERSION_MINOR 0x0005 +#define I40E_FW_API_VERSION_MINOR_X722 0x0005 +#define I40E_FW_API_VERSION_MINOR_X710 0x0007 + +#define I40E_FW_MINOR_VERSION(_h) ((_h)->mac.type == I40E_MAC_XL710 ? \ + I40E_FW_API_VERSION_MINOR_X710 : \ + I40E_FW_API_VERSION_MINOR_X722) + +/* API version 1.7 implements additional link and PHY-specific APIs */ +#define I40E_MINOR_VER_GET_LINK_INFO_XL710 0x0007 struct i40e_aq_desc { __le16 flags; diff --git a/drivers/net/ethernet/intel/i40e/i40e_common.c b/drivers/net/ethernet/intel/i40e/i40e_common.c index 111426ba5fbc..7346d8850c8e 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_common.c +++ b/drivers/net/ethernet/intel/i40e/i40e_common.c @@ -1593,8 +1593,10 @@ i40e_status i40e_aq_get_phy_capabilities(struct i40e_hw *hw, status = I40E_ERR_UNKNOWN_PHY; if (report_init) { - hw->phy.phy_types = le32_to_cpu(abilities->phy_type); - hw->phy.phy_types |= ((u64)abilities->phy_type_ext << 32); + if (hw->mac.type == I40E_MAC_XL710 && + hw->aq.api_maj_ver == I40E_FW_API_VERSION_MAJOR && + hw->aq.api_min_ver >= I40E_MINOR_VER_GET_LINK_INFO_XL710) + status = i40e_aq_get_link_info(hw, true, NULL, NULL); } return status; diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c index 8a44793d5390..47f71d7c3ae0 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_main.c +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c @@ -11434,7 +11434,7 @@ static int i40e_probe(struct pci_dev *pdev, const struct pci_device_id *ent) i40e_nvm_version_str(hw)); if (hw->aq.api_maj_ver == I40E_FW_API_VERSION_MAJOR && - hw->aq.api_min_ver > I40E_FW_API_VERSION_MINOR) + hw->aq.api_min_ver > I40E_FW_MINOR_VERSION(hw)) dev_info(&pdev->dev, "The driver for the device detected a newer version of the NVM image than expected. Please install the most recent version of the network driver.\n"); else if (hw->aq.api_maj_ver == 1 && hw->aq.api_min_ver < 4) diff --git a/drivers/net/ethernet/intel/i40evf/i40e_adminq_cmd.h b/drivers/net/ethernet/intel/i40evf/i40e_adminq_cmd.h index 83e63e55c4b4..f9f48d1900b0 100644 --- a/drivers/net/ethernet/intel/i40evf/i40e_adminq_cmd.h +++ b/drivers/net/ethernet/intel/i40evf/i40e_adminq_cmd.h @@ -34,7 +34,15 @@ */ #define I40E_FW_API_VERSION_MAJOR 0x0001 -#define I40E_FW_API_VERSION_MINOR 0x0005 +#define I40E_FW_API_VERSION_MINOR_X722 0x0005 +#define I40E_FW_API_VERSION_MINOR_X710 0x0007 + +#define I40E_FW_MINOR_VERSION(_h) ((_h)->mac.type == I40E_MAC_XL710 ? \ + I40E_FW_API_VERSION_MINOR_X710 : \ + I40E_FW_API_VERSION_MINOR_X722) + +/* API version 1.7 implements additional link and PHY-specific APIs */ +#define I40E_MINOR_VER_GET_LINK_INFO_XL710 0x0007 struct i40e_aq_desc { __le16 flags; -- cgit v1.2.3 From 1f372c7bfb23286d2bf4ce0423ab488e86b74bb2 Mon Sep 17 00:00:00 2001 From: Mike Manning Date: Mon, 25 Sep 2017 22:01:36 +0100 Subject: net: ipv6: send NS for DAD when link operationally up The NS for DAD are sent on admin up as long as a valid qdisc is found. A race condition exists by which these packets will not egress the interface if the operational state of the lower device is not yet up. The solution is to delay DAD until the link is operationally up according to RFC2863. Rather than only doing this, follow the existing code checks by deferring IPv6 device initialization altogether. The fix allows DAD on devices like tunnels that are controlled by userspace control plane. The fix has no impact on regular deployments, but means that there is no IPv6 connectivity until the port has been opened in the case of port-based network access control, which should be desirable. Signed-off-by: Mike Manning Signed-off-by: David S. Miller --- net/ipv6/addrconf.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 13c3b697f8c0..f553f72d0bee 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -303,10 +303,10 @@ static struct ipv6_devconf ipv6_devconf_dflt __read_mostly = { .disable_policy = 0, }; -/* Check if a valid qdisc is available */ -static inline bool addrconf_qdisc_ok(const struct net_device *dev) +/* Check if link is ready: is it up and is a valid qdisc available */ +static inline bool addrconf_link_ready(const struct net_device *dev) { - return !qdisc_tx_is_noop(dev); + return netif_oper_up(dev) && !qdisc_tx_is_noop(dev); } static void addrconf_del_rs_timer(struct inet6_dev *idev) @@ -451,7 +451,7 @@ static struct inet6_dev *ipv6_add_dev(struct net_device *dev) ndev->token = in6addr_any; - if (netif_running(dev) && addrconf_qdisc_ok(dev)) + if (netif_running(dev) && addrconf_link_ready(dev)) ndev->if_flags |= IF_READY; ipv6_mc_init_dev(ndev); @@ -3403,7 +3403,7 @@ static int addrconf_notify(struct notifier_block *this, unsigned long event, /* restore routes for permanent addresses */ addrconf_permanent_addr(dev); - if (!addrconf_qdisc_ok(dev)) { + if (!addrconf_link_ready(dev)) { /* device is not ready yet. */ pr_info("ADDRCONF(NETDEV_UP): %s: link is not ready\n", dev->name); @@ -3418,7 +3418,7 @@ static int addrconf_notify(struct notifier_block *this, unsigned long event, run_pending = 1; } } else if (event == NETDEV_CHANGE) { - if (!addrconf_qdisc_ok(dev)) { + if (!addrconf_link_ready(dev)) { /* device is still not ready. */ break; } -- cgit v1.2.3 From 84e14fe353de7624872e582887712079ba0b2d56 Mon Sep 17 00:00:00 2001 From: Maciej Żenczykowski Date: Tue, 26 Sep 2017 21:32:42 -0700 Subject: net-ipv6: add support for sockopt(SOL_IPV6, IPV6_FREEBIND) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit So far we've been relying on sockopt(SOL_IP, IP_FREEBIND) being usable even on IPv6 sockets. However, it turns out it is perfectly reasonable to want to set freebind on an AF_INET6 SOCK_RAW socket - but there is no way to set any SOL_IP socket option on such a socket (they're all blindly errored out). One use case for this is to allow spoofing src ip on a raw socket via sendmsg cmsg. Tested: built, and booted # python >>> import socket >>> SOL_IP = socket.SOL_IP >>> SOL_IPV6 = socket.IPPROTO_IPV6 >>> IP_FREEBIND = 15 >>> IPV6_FREEBIND = 78 >>> s = socket.socket(socket.AF_INET6, socket.SOCK_DGRAM, 0) >>> s.getsockopt(SOL_IP, IP_FREEBIND) 0 >>> s.getsockopt(SOL_IPV6, IPV6_FREEBIND) 0 >>> s.setsockopt(SOL_IPV6, IPV6_FREEBIND, 1) >>> s.getsockopt(SOL_IP, IP_FREEBIND) 1 >>> s.getsockopt(SOL_IPV6, IPV6_FREEBIND) 1 Signed-off-by: Maciej Żenczykowski Signed-off-by: David S. Miller --- include/uapi/linux/in6.h | 1 + net/ipv6/ipv6_sockglue.c | 12 ++++++++++++ 2 files changed, 13 insertions(+) diff --git a/include/uapi/linux/in6.h b/include/uapi/linux/in6.h index 46444f8fbee4..4f8f3eb0699f 100644 --- a/include/uapi/linux/in6.h +++ b/include/uapi/linux/in6.h @@ -284,6 +284,7 @@ struct in6_flowlabel_req { #define IPV6_TRANSPARENT 75 #define IPV6_UNICAST_IF 76 #define IPV6_RECVFRAGSIZE 77 +#define IPV6_FREEBIND 78 /* * Multicast Routing: diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c index a5e466d4e093..b9404feabd78 100644 --- a/net/ipv6/ipv6_sockglue.c +++ b/net/ipv6/ipv6_sockglue.c @@ -377,6 +377,14 @@ static int do_ipv6_setsockopt(struct sock *sk, int level, int optname, retv = 0; break; + case IPV6_FREEBIND: + if (optlen < sizeof(int)) + goto e_inval; + /* we also don't have a separate freebind bit for IPV6 */ + inet_sk(sk)->freebind = valbool; + retv = 0; + break; + case IPV6_RECVORIGDSTADDR: if (optlen < sizeof(int)) goto e_inval; @@ -1214,6 +1222,10 @@ static int do_ipv6_getsockopt(struct sock *sk, int level, int optname, val = inet_sk(sk)->transparent; break; + case IPV6_FREEBIND: + val = inet_sk(sk)->freebind; + break; + case IPV6_RECVORIGDSTADDR: val = np->rxopt.bits.rxorigdstaddr; break; -- cgit v1.2.3 From 45c1fd61d5cead2ae23880e0677b8660ab9006a4 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Wed, 27 Sep 2017 22:45:13 +0100 Subject: mkiss: remove redundant check on len being zero The check on len is redundant as it is always greater than 1, so just remove it and make the printk less complex. Detected by CoverityScan, CID#1226729 ("Logically dead code") Signed-off-by: Colin Ian King Signed-off-by: David S. Miller --- drivers/net/hamradio/mkiss.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/hamradio/mkiss.c b/drivers/net/hamradio/mkiss.c index aec6c26563cf..54bf8e6e4a09 100644 --- a/drivers/net/hamradio/mkiss.c +++ b/drivers/net/hamradio/mkiss.c @@ -477,7 +477,8 @@ static void ax_encaps(struct net_device *dev, unsigned char *icp, int len) cmd = 0; } ax->crcauto = (cmd ? 0 : 1); - printk(KERN_INFO "mkiss: %s: crc mode %s %d\n", ax->dev->name, (len) ? "set to" : "is", cmd); + printk(KERN_INFO "mkiss: %s: crc mode set to %d\n", + ax->dev->name, cmd); } spin_unlock_bh(&ax->buflock); netif_start_queue(dev); -- cgit v1.2.3 From 706cc9f51d9a22528af18d4b3ffbd17b30a1d3b0 Mon Sep 17 00:00:00 2001 From: Sven Eckelmann Date: Sat, 30 Sep 2017 09:24:28 +0200 Subject: batman-adv: Add argument names for function ptr definitions checkpatch started to report unnamed arguments in function pointer definitions. Add the corresponding names to these definitions to avoid this warning. Signed-off-by: Sven Eckelmann Signed-off-by: Simon Wunderlich --- net/batman-adv/main.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/net/batman-adv/main.c b/net/batman-adv/main.c index 033819aefc39..4daed7ad46f2 100644 --- a/net/batman-adv/main.c +++ b/net/batman-adv/main.c @@ -73,8 +73,8 @@ * list traversals just rcu-locked */ struct list_head batadv_hardif_list; -static int (*batadv_rx_handler[256])(struct sk_buff *, - struct batadv_hard_iface *); +static int (*batadv_rx_handler[256])(struct sk_buff *skb, + struct batadv_hard_iface *recv_if); unsigned char batadv_broadcast_addr[] = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff}; @@ -540,8 +540,8 @@ batadv_recv_handler_register(u8 packet_type, int (*recv_handler)(struct sk_buff *, struct batadv_hard_iface *)) { - int (*curr)(struct sk_buff *, - struct batadv_hard_iface *); + int (*curr)(struct sk_buff *skb, + struct batadv_hard_iface *recv_if); curr = batadv_rx_handler[packet_type]; if (curr != batadv_recv_unhandled_packet && -- cgit v1.2.3 From 21a2774ef5d4fde772fbcb9d0eabb76f585e5af5 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Thu, 28 Sep 2017 11:19:06 -0700 Subject: Revert "net: dsa: bcm_sf2: Defer port enabling to calling port_enable" This reverts commit e85ec74ace29 ("net: dsa: bcm_sf2: Defer port enabling to calling port_enable") because this now makes an unbind followed by a bind to fail connecting to the ingrated PHY. What this patch missed is that we need the PHY to be enabled with bcm_sf2_gphy_enable_set() before probing it on the MDIO bus. This is correctly done in the ops->setup() function, but by the time ops->port_enable() runs, this is too late. Upon unbind we would power down the PHY, and so when we would bind again, the PHY would be left powered off. Fixes: e85ec74ace29 ("net: dsa: bcm_sf2: Defer port enabling to calling port_enable") Signed-off-by: Florian Fainelli Signed-off-by: David S. Miller --- drivers/net/dsa/bcm_sf2.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/drivers/net/dsa/bcm_sf2.c b/drivers/net/dsa/bcm_sf2.c index 898d5642b516..7aecc98d0a18 100644 --- a/drivers/net/dsa/bcm_sf2.c +++ b/drivers/net/dsa/bcm_sf2.c @@ -754,11 +754,14 @@ static int bcm_sf2_sw_setup(struct dsa_switch *ds) struct bcm_sf2_priv *priv = bcm_sf2_to_priv(ds); unsigned int port; - /* Disable unused ports and configure IMP port */ + /* Enable all valid ports and disable those unused */ for (port = 0; port < priv->hw_params.num_ports; port++) { - if (dsa_is_cpu_port(ds, port)) + /* IMP port receives special treatment */ + if ((1 << port) & ds->enabled_port_mask) + bcm_sf2_port_setup(ds, port, NULL); + else if (dsa_is_cpu_port(ds, port)) bcm_sf2_imp_setup(ds, port); - else if (!((1 << port) & ds->enabled_port_mask)) + else bcm_sf2_port_disable(ds, port, NULL); } -- cgit v1.2.3 From e876a8a7e9dd89dc88c12ca2e81beb478dbe9897 Mon Sep 17 00:00:00 2001 From: Mick Tarsel Date: Thu, 28 Sep 2017 13:53:18 -0700 Subject: ibmvnic: Set state UP State is initially reported as UNKNOWN. Before register call netif_carrier_off(). Once the device is opened, call netif_carrier_on() in order to set the state to UP. Signed-off-by: Mick Tarsel Signed-off-by: David S. Miller --- drivers/net/ethernet/ibm/ibmvnic.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c index cb8182f4fdfa..4bc14a901571 100644 --- a/drivers/net/ethernet/ibm/ibmvnic.c +++ b/drivers/net/ethernet/ibm/ibmvnic.c @@ -927,6 +927,7 @@ static int ibmvnic_open(struct net_device *netdev) } rc = __ibmvnic_open(netdev); + netif_carrier_on(netdev); mutex_unlock(&adapter->reset_lock); return rc; @@ -3899,6 +3900,7 @@ static int ibmvnic_probe(struct vio_dev *dev, const struct vio_device_id *id) if (rc) goto ibmvnic_init_fail; + netif_carrier_off(netdev); rc = register_netdev(netdev); if (rc) { dev_err(&dev->dev, "failed to register netdev rc=%d\n", rc); -- cgit v1.2.3 From ef739d8aabaa26dd99f5661e38a86f4d85d8dfed Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Fri, 29 Sep 2017 14:34:22 +0100 Subject: net: ipmr: make function ipmr_notifier_init static The function ipmr_notifier_init is local to the source and does not need to be in global scope, so make it static. Cleans up sparse warning: warning: symbol 'ipmr_notifier_init' was not declared. Should it be static? Signed-off-by: Colin Ian King Signed-off-by: David S. Miller --- net/ipv4/ipmr.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index 292a8e80bdfa..a844738b38bd 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -3224,7 +3224,7 @@ static const struct fib_notifier_ops ipmr_notifier_ops_template = { .owner = THIS_MODULE, }; -int __net_init ipmr_notifier_init(struct net *net) +static int __net_init ipmr_notifier_init(struct net *net) { struct fib_notifier_ops *ops; -- cgit v1.2.3 From b1c49d14200dae47544f7ea6ee9040ded01f8425 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Fri, 29 Sep 2017 15:01:16 +0100 Subject: net_sched: remove redundant assignment to ret The assignment of -EINVAL to variable ret is redundant as it is being overwritten on the following error exit paths or to the return value from the following call to basic_set_parms. Fix this up by removing it. Cleans up clang warning message: net/sched/cls_basic.c:185:2: warning: Value stored to 'err' is never read Fixes: 1d8134fea2eb ("net_sched: use idr to allocate basic filter handles") Signed-off-by: Colin Ian King Acked-by: Cong Wang Signed-off-by: David S. Miller --- net/sched/cls_basic.c | 1 - 1 file changed, 1 deletion(-) diff --git a/net/sched/cls_basic.c b/net/sched/cls_basic.c index cfeb6f158566..700b345b07f9 100644 --- a/net/sched/cls_basic.c +++ b/net/sched/cls_basic.c @@ -182,7 +182,6 @@ static int basic_change(struct net *net, struct sk_buff *in_skb, if (err < 0) goto errout; - err = -EINVAL; if (handle) { fnew->handle = handle; if (!fold) { -- cgit v1.2.3 From 721e08dad17e226ef68819d0a23dc53c25fe8ea5 Mon Sep 17 00:00:00 2001 From: Martin KaFai Lau Date: Fri, 29 Sep 2017 10:52:17 -0700 Subject: bpf: Fix compiler warning on info.map_ids for 32bit platform This patch uses u64_to_user_ptr() to cast info.map_ids to a userspace ptr. It also tags the user_map_ids with '__user' for sparse check. Fixes: cb4d2b3f03d8 ("bpf: Add name, load_time, uid and map_ids to bpf_prog_info") Signed-off-by: Martin KaFai Lau Acked-by: Daniel Borkmann Signed-off-by: David S. Miller --- kernel/bpf/syscall.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index 11a7f82a55d1..b927da66f653 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -1405,7 +1405,7 @@ static int bpf_prog_get_info_by_fd(struct bpf_prog *prog, info.nr_map_ids = prog->aux->used_map_cnt; ulen = min_t(u32, info.nr_map_ids, ulen); if (ulen) { - u32 *user_map_ids = (u32 *)info.map_ids; + u32 __user *user_map_ids = u64_to_user_ptr(info.map_ids); u32 i; for (i = 0; i < ulen; i++) -- cgit v1.2.3 From 09af87d18f6ba05588e6316c47fdacf06e28cce8 Mon Sep 17 00:00:00 2001 From: Simon Xiao Date: Fri, 29 Sep 2017 11:39:46 -0700 Subject: hv_netvsc: report stop_queue and wake_queue Report the numbers of events for stop_queue and wake_queue in ethtool stats. Example: ethtool -S eth0 NIC statistics: ... stop_queue: 7 wake_queue: 7 ... Signed-off-by: Simon Xiao Reviewed-by: Haiyang Zhang Signed-off-by: David S. Miller --- drivers/net/hyperv/hyperv_net.h | 2 ++ drivers/net/hyperv/netvsc.c | 12 ++++++++++-- drivers/net/hyperv/netvsc_drv.c | 2 ++ 3 files changed, 14 insertions(+), 2 deletions(-) diff --git a/drivers/net/hyperv/hyperv_net.h b/drivers/net/hyperv/hyperv_net.h index 5176be76ca7d..6f550e15a41c 100644 --- a/drivers/net/hyperv/hyperv_net.h +++ b/drivers/net/hyperv/hyperv_net.h @@ -686,6 +686,8 @@ struct netvsc_ethtool_stats { unsigned long tx_busy; unsigned long tx_send_full; unsigned long rx_comp_busy; + unsigned long stop_queue; + unsigned long wake_queue; }; struct netvsc_vf_pcpu_stats { diff --git a/drivers/net/hyperv/netvsc.c b/drivers/net/hyperv/netvsc.c index b0d323e24978..6e5194916bbe 100644 --- a/drivers/net/hyperv/netvsc.c +++ b/drivers/net/hyperv/netvsc.c @@ -609,6 +609,7 @@ static void netvsc_send_tx_complete(struct netvsc_device *net_device, { struct sk_buff *skb = (struct sk_buff *)(unsigned long)desc->trans_id; struct net_device *ndev = hv_get_drvdata(device); + struct net_device_context *ndev_ctx = netdev_priv(ndev); struct vmbus_channel *channel = device->channel; u16 q_idx = 0; int queue_sends; @@ -643,8 +644,10 @@ static void netvsc_send_tx_complete(struct netvsc_device *net_device, if (netif_tx_queue_stopped(netdev_get_tx_queue(ndev, q_idx)) && (hv_ringbuf_avail_percent(&channel->outbound) > RING_AVAIL_PERCENT_HIWATER || - queue_sends < 1)) + queue_sends < 1)) { netif_tx_wake_queue(netdev_get_tx_queue(ndev, q_idx)); + ndev_ctx->eth_stats.wake_queue++; + } } static void netvsc_send_completion(struct netvsc_device *net_device, @@ -749,6 +752,7 @@ static inline int netvsc_send_pkt( &net_device->chan_table[packet->q_idx]; struct vmbus_channel *out_channel = nvchan->channel; struct net_device *ndev = hv_get_drvdata(device); + struct net_device_context *ndev_ctx = netdev_priv(ndev); struct netdev_queue *txq = netdev_get_tx_queue(ndev, packet->q_idx); u64 req_id; int ret; @@ -789,12 +793,16 @@ static inline int netvsc_send_pkt( if (ret == 0) { atomic_inc_return(&nvchan->queue_sends); - if (ring_avail < RING_AVAIL_PERCENT_LOWATER) + if (ring_avail < RING_AVAIL_PERCENT_LOWATER) { netif_tx_stop_queue(txq); + ndev_ctx->eth_stats.stop_queue++; + } } else if (ret == -EAGAIN) { netif_tx_stop_queue(txq); + ndev_ctx->eth_stats.stop_queue++; if (atomic_read(&nvchan->queue_sends) < 1) { netif_tx_wake_queue(txq); + ndev_ctx->eth_stats.wake_queue++; ret = -ENOSPC; } } else { diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c index e9d54c9ee78c..f300ae61c6c6 100644 --- a/drivers/net/hyperv/netvsc_drv.c +++ b/drivers/net/hyperv/netvsc_drv.c @@ -1126,6 +1126,8 @@ static const struct { { "tx_busy", offsetof(struct netvsc_ethtool_stats, tx_busy) }, { "tx_send_full", offsetof(struct netvsc_ethtool_stats, tx_send_full) }, { "rx_comp_busy", offsetof(struct netvsc_ethtool_stats, rx_comp_busy) }, + { "stop_queue", offsetof(struct netvsc_ethtool_stats, stop_queue) }, + { "wake_queue", offsetof(struct netvsc_ethtool_stats, wake_queue) }, }, vf_stats[] = { { "vf_rx_packets", offsetof(struct netvsc_vf_pcpu_stats, rx_packets) }, { "vf_rx_bytes", offsetof(struct netvsc_vf_pcpu_stats, rx_bytes) }, -- cgit v1.2.3 From 075cfdd659cb1e86f948f11ba577f27706f0756e Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Fri, 29 Sep 2017 20:51:23 +0100 Subject: net: hns3: fix null pointer dereference before null check pointer ndev is being dereferenced with the call to netif_running before it is being null checked. Re-order the code to only dereference ndev after it has been null checked. Detected by CoverityScan, CID#1457206 ("Dereference before null check") Fixes: 9df8f79a4d29 ("net: hns3: Add DCB support when interacting with network stack") Signed-off-by: Colin Ian King Signed-off-by: David S. Miller --- drivers/net/ethernet/hisilicon/hns3/hns3pf/hns3_enet.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hns3_enet.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hns3_enet.c index 4a0890f98b70..c31506514e5d 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hns3_enet.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hns3_enet.c @@ -2865,7 +2865,7 @@ static int hns3_client_setup_tc(struct hnae3_handle *handle, u8 tc) { struct hnae3_knic_private_info *kinfo = &handle->kinfo; struct net_device *ndev = kinfo->netdev; - bool if_running = netif_running(ndev); + bool if_running; int ret; u8 i; @@ -2875,6 +2875,8 @@ static int hns3_client_setup_tc(struct hnae3_handle *handle, u8 tc) if (!ndev) return -ENODEV; + if_running = netif_running(ndev); + ret = netdev_set_num_tc(ndev, tc); if (ret) return ret; -- cgit v1.2.3 From 3775b1b7f0c330e59c434d1852d7762ae0a9c164 Mon Sep 17 00:00:00 2001 From: Vivien Didelot Date: Fri, 29 Sep 2017 17:19:15 -0400 Subject: net: dsa: add master helper to look up slaves The DSA tagging code does not need to know about the DSA architecture, it only needs to return the slave device corresponding to the source port index (and eventually the source device index for cascade-capable switches) parsed from the frame received on the master device. For this purpose, provide an inline dsa_master_get_slave helper which validates the device and port indexes and look up the slave device. This makes the tagging rcv functions more concise and robust, and also makes dsa_get_cpu_port obsolete. Signed-off-by: Vivien Didelot Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller --- net/dsa/dsa_priv.h | 24 +++++++++++++++++++----- net/dsa/tag_brcm.c | 9 ++------- net/dsa/tag_dsa.c | 18 ++---------------- net/dsa/tag_edsa.c | 18 ++---------------- net/dsa/tag_ksz.c | 9 +++------ net/dsa/tag_lan9303.c | 20 ++------------------ net/dsa/tag_mtk.c | 16 +++------------- net/dsa/tag_qca.c | 17 +++-------------- net/dsa/tag_trailer.c | 9 +++------ 9 files changed, 39 insertions(+), 101 deletions(-) diff --git a/net/dsa/dsa_priv.h b/net/dsa/dsa_priv.h index eccc62776283..d429505dc4e7 100644 --- a/net/dsa/dsa_priv.h +++ b/net/dsa/dsa_priv.h @@ -113,6 +113,25 @@ int dsa_legacy_fdb_del(struct ndmsg *ndm, struct nlattr *tb[], int dsa_master_ethtool_setup(struct net_device *dev); void dsa_master_ethtool_restore(struct net_device *dev); +static inline struct net_device *dsa_master_get_slave(struct net_device *dev, + int device, int port) +{ + struct dsa_switch_tree *dst = dev->dsa_ptr; + struct dsa_switch *ds; + + if (device < 0 || device >= DSA_MAX_SWITCHES) + return NULL; + + ds = dst->ds[device]; + if (!ds) + return NULL; + + if (port < 0 || port >= ds->num_ports) + return NULL; + + return ds->ports[port].netdev; +} + /* port.c */ int dsa_port_set_state(struct dsa_port *dp, u8 state, struct switchdev_trans *trans); @@ -182,9 +201,4 @@ static inline struct net_device *dsa_master_netdev(struct dsa_slave_priv *p) return p->dp->cpu_dp->netdev; } -static inline struct dsa_port *dsa_get_cpu_port(struct dsa_switch_tree *dst) -{ - return dst->cpu_dp; -} - #endif diff --git a/net/dsa/tag_brcm.c b/net/dsa/tag_brcm.c index dbb016434ace..8e4bdb9d9ae3 100644 --- a/net/dsa/tag_brcm.c +++ b/net/dsa/tag_brcm.c @@ -92,9 +92,6 @@ static struct sk_buff *brcm_tag_xmit(struct sk_buff *skb, struct net_device *dev static struct sk_buff *brcm_tag_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt) { - struct dsa_switch_tree *dst = dev->dsa_ptr; - struct dsa_port *cpu_dp = dsa_get_cpu_port(dst); - struct dsa_switch *ds = cpu_dp->ds; int source_port; u8 *brcm_tag; @@ -117,8 +114,8 @@ static struct sk_buff *brcm_tag_rcv(struct sk_buff *skb, struct net_device *dev, /* Locate which port this is coming from */ source_port = brcm_tag[3] & BRCM_EG_PID_MASK; - /* Validate port against switch setup, either the port is totally */ - if (source_port >= ds->num_ports || !ds->ports[source_port].netdev) + skb->dev = dsa_master_get_slave(dev, 0, source_port); + if (!skb->dev) return NULL; /* Remove Broadcom tag and update checksum */ @@ -129,8 +126,6 @@ static struct sk_buff *brcm_tag_rcv(struct sk_buff *skb, struct net_device *dev, skb->data - ETH_HLEN - BRCM_TAG_LEN, 2 * ETH_ALEN); - skb->dev = ds->ports[source_port].netdev; - return skb; } diff --git a/net/dsa/tag_dsa.c b/net/dsa/tag_dsa.c index fbf9ca954773..c77218f173d1 100644 --- a/net/dsa/tag_dsa.c +++ b/net/dsa/tag_dsa.c @@ -67,8 +67,6 @@ static struct sk_buff *dsa_xmit(struct sk_buff *skb, struct net_device *dev) static struct sk_buff *dsa_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt) { - struct dsa_switch_tree *dst = dev->dsa_ptr; - struct dsa_switch *ds; u8 *dsa_header; int source_device; int source_port; @@ -93,18 +91,8 @@ static struct sk_buff *dsa_rcv(struct sk_buff *skb, struct net_device *dev, source_device = dsa_header[0] & 0x1f; source_port = (dsa_header[1] >> 3) & 0x1f; - /* - * Check that the source device exists and that the source - * port is a registered DSA port. - */ - if (source_device >= DSA_MAX_SWITCHES) - return NULL; - - ds = dst->ds[source_device]; - if (!ds) - return NULL; - - if (source_port >= ds->num_ports || !ds->ports[source_port].netdev) + skb->dev = dsa_master_get_slave(dev, source_device, source_port); + if (!skb->dev) return NULL; /* @@ -153,8 +141,6 @@ static struct sk_buff *dsa_rcv(struct sk_buff *skb, struct net_device *dev, 2 * ETH_ALEN); } - skb->dev = ds->ports[source_port].netdev; - return skb; } diff --git a/net/dsa/tag_edsa.c b/net/dsa/tag_edsa.c index 76367ba1b2e2..0b83cbe0c9e8 100644 --- a/net/dsa/tag_edsa.c +++ b/net/dsa/tag_edsa.c @@ -80,8 +80,6 @@ static struct sk_buff *edsa_xmit(struct sk_buff *skb, struct net_device *dev) static struct sk_buff *edsa_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt) { - struct dsa_switch_tree *dst = dev->dsa_ptr; - struct dsa_switch *ds; u8 *edsa_header; int source_device; int source_port; @@ -106,18 +104,8 @@ static struct sk_buff *edsa_rcv(struct sk_buff *skb, struct net_device *dev, source_device = edsa_header[0] & 0x1f; source_port = (edsa_header[1] >> 3) & 0x1f; - /* - * Check that the source device exists and that the source - * port is a registered DSA port. - */ - if (source_device >= DSA_MAX_SWITCHES) - return NULL; - - ds = dst->ds[source_device]; - if (!ds) - return NULL; - - if (source_port >= ds->num_ports || !ds->ports[source_port].netdev) + skb->dev = dsa_master_get_slave(dev, source_device, source_port); + if (!skb->dev) return NULL; /* @@ -172,8 +160,6 @@ static struct sk_buff *edsa_rcv(struct sk_buff *skb, struct net_device *dev, 2 * ETH_ALEN); } - skb->dev = ds->ports[source_port].netdev; - return skb; } diff --git a/net/dsa/tag_ksz.c b/net/dsa/tag_ksz.c index 010ca0a336c4..b241c990cde0 100644 --- a/net/dsa/tag_ksz.c +++ b/net/dsa/tag_ksz.c @@ -80,22 +80,19 @@ static struct sk_buff *ksz_xmit(struct sk_buff *skb, struct net_device *dev) static struct sk_buff *ksz_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt) { - struct dsa_switch_tree *dst = dev->dsa_ptr; - struct dsa_port *cpu_dp = dsa_get_cpu_port(dst); - struct dsa_switch *ds = cpu_dp->ds; u8 *tag; int source_port; tag = skb_tail_pointer(skb) - KSZ_EGRESS_TAG_LEN; source_port = tag[0] & 7; - if (source_port >= ds->num_ports || !ds->ports[source_port].netdev) + + skb->dev = dsa_master_get_slave(dev, 0, source_port); + if (!skb->dev) return NULL; pskb_trim_rcsum(skb, skb->len - KSZ_EGRESS_TAG_LEN); - skb->dev = ds->ports[source_port].netdev; - return skb; } diff --git a/net/dsa/tag_lan9303.c b/net/dsa/tag_lan9303.c index 0b9826105e42..4f211e56c81f 100644 --- a/net/dsa/tag_lan9303.c +++ b/net/dsa/tag_lan9303.c @@ -71,17 +71,8 @@ static struct sk_buff *lan9303_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt) { u16 *lan9303_tag; - struct dsa_switch_tree *dst = dev->dsa_ptr; - struct dsa_switch *ds; unsigned int source_port; - ds = dst->ds[0]; - - if (unlikely(!ds)) { - dev_warn_ratelimited(&dev->dev, "Dropping packet, due to missing DSA switch device\n"); - return NULL; - } - if (unlikely(!pskb_may_pull(skb, LAN9303_TAG_LEN))) { dev_warn_ratelimited(&dev->dev, "Dropping packet, cannot pull\n"); @@ -103,16 +94,12 @@ static struct sk_buff *lan9303_rcv(struct sk_buff *skb, struct net_device *dev, source_port = ntohs(lan9303_tag[1]) & 0x3; - if (source_port >= ds->num_ports) { + skb->dev = dsa_master_get_slave(dev, 0, source_port); + if (!skb->dev) { dev_warn_ratelimited(&dev->dev, "Dropping packet due to invalid source port\n"); return NULL; } - if (!ds->ports[source_port].netdev) { - dev_warn_ratelimited(&dev->dev, "Dropping packet due to invalid netdev or device\n"); - return NULL; - } - /* remove the special VLAN tag between the MAC addresses * and the current ethertype field. */ @@ -120,9 +107,6 @@ static struct sk_buff *lan9303_rcv(struct sk_buff *skb, struct net_device *dev, memmove(skb->data - ETH_HLEN, skb->data - (ETH_HLEN + LAN9303_TAG_LEN), 2 * ETH_ALEN); - /* forward the packet to the dedicated interface */ - skb->dev = ds->ports[source_port].netdev; - return skb; } diff --git a/net/dsa/tag_mtk.c b/net/dsa/tag_mtk.c index ec8ee5f43255..968586c5d40e 100644 --- a/net/dsa/tag_mtk.c +++ b/net/dsa/tag_mtk.c @@ -46,8 +46,6 @@ static struct sk_buff *mtk_tag_xmit(struct sk_buff *skb, static struct sk_buff *mtk_tag_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt) { - struct dsa_switch_tree *dst = dev->dsa_ptr; - struct dsa_switch *ds; int port; __be16 *phdr, hdr; @@ -68,20 +66,12 @@ static struct sk_buff *mtk_tag_rcv(struct sk_buff *skb, struct net_device *dev, skb->data - ETH_HLEN - MTK_HDR_LEN, 2 * ETH_ALEN); - /* This protocol doesn't support cascading multiple - * switches so it's safe to assume the switch is first - * in the tree. - */ - ds = dst->ds[0]; - if (!ds) - return NULL; - /* Get source port information */ port = (hdr & MTK_HDR_RECV_SOURCE_PORT_MASK); - if (!ds->ports[port].netdev) - return NULL; - skb->dev = ds->ports[port].netdev; + skb->dev = dsa_master_get_slave(dev, 0, port); + if (!skb->dev) + return NULL; return skb; } diff --git a/net/dsa/tag_qca.c b/net/dsa/tag_qca.c index 1d4c70711c0f..8d33d9ebf910 100644 --- a/net/dsa/tag_qca.c +++ b/net/dsa/tag_qca.c @@ -65,9 +65,6 @@ static struct sk_buff *qca_tag_xmit(struct sk_buff *skb, struct net_device *dev) static struct sk_buff *qca_tag_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt) { - struct dsa_switch_tree *dst = dev->dsa_ptr; - struct dsa_port *cpu_dp = dsa_get_cpu_port(dst); - struct dsa_switch *ds; u8 ver; int port; __be16 *phdr, hdr; @@ -92,20 +89,12 @@ static struct sk_buff *qca_tag_rcv(struct sk_buff *skb, struct net_device *dev, memmove(skb->data - ETH_HLEN, skb->data - ETH_HLEN - QCA_HDR_LEN, ETH_HLEN - QCA_HDR_LEN); - /* This protocol doesn't support cascading multiple switches so it's - * safe to assume the switch is first in the tree - */ - ds = cpu_dp->ds; - if (!ds) - return NULL; - /* Get source port information */ port = (hdr & QCA_HDR_RECV_SOURCE_PORT_MASK); - if (!ds->ports[port].netdev) - return NULL; - /* Update skb & forward the frame accordingly */ - skb->dev = ds->ports[port].netdev; + skb->dev = dsa_master_get_slave(dev, 0, port); + if (!skb->dev) + return NULL; return skb; } diff --git a/net/dsa/tag_trailer.c b/net/dsa/tag_trailer.c index d2fd4923aa3e..61668be267f5 100644 --- a/net/dsa/tag_trailer.c +++ b/net/dsa/tag_trailer.c @@ -58,9 +58,6 @@ static struct sk_buff *trailer_xmit(struct sk_buff *skb, struct net_device *dev) static struct sk_buff *trailer_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt) { - struct dsa_switch_tree *dst = dev->dsa_ptr; - struct dsa_port *cpu_dp = dsa_get_cpu_port(dst); - struct dsa_switch *ds = cpu_dp->ds; u8 *trailer; int source_port; @@ -73,13 +70,13 @@ static struct sk_buff *trailer_rcv(struct sk_buff *skb, struct net_device *dev, return NULL; source_port = trailer[1] & 7; - if (source_port >= ds->num_ports || !ds->ports[source_port].netdev) + + skb->dev = dsa_master_get_slave(dev, 0, source_port); + if (!skb->dev) return NULL; pskb_trim_rcsum(skb, skb->len - 4); - skb->dev = ds->ports[source_port].netdev; - return skb; } -- cgit v1.2.3 From 7ec764eef934409c4e15539440c31bca0b8de005 Mon Sep 17 00:00:00 2001 From: Vivien Didelot Date: Fri, 29 Sep 2017 17:19:16 -0400 Subject: net: dsa: use cpu_dp in master code Make it clear that the master device is linked to a CPU port by using "cpu_dp" for the dsa_port variable in master.c instead of "port", then use a "port" variable to describe the port index, as usually seen in other places of DSA core. This will make the future patch touching dsa_ptr more readable. There is no functional changes. Signed-off-by: Vivien Didelot Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller --- net/dsa/master.c | 42 ++++++++++++++++++++++-------------------- 1 file changed, 22 insertions(+), 20 deletions(-) diff --git a/net/dsa/master.c b/net/dsa/master.c index 5e5147ec5a44..ef15d35f1574 100644 --- a/net/dsa/master.c +++ b/net/dsa/master.c @@ -17,9 +17,10 @@ static void dsa_master_get_ethtool_stats(struct net_device *dev, uint64_t *data) { struct dsa_switch_tree *dst = dev->dsa_ptr; - struct dsa_port *port = dst->cpu_dp; - struct dsa_switch *ds = port->ds; - const struct ethtool_ops *ops = port->orig_ethtool_ops; + struct dsa_port *cpu_dp = dst->cpu_dp; + const struct ethtool_ops *ops = cpu_dp->orig_ethtool_ops; + struct dsa_switch *ds = cpu_dp->ds; + int port = cpu_dp->index; int count = 0; if (ops && ops->get_sset_count && ops->get_ethtool_stats) { @@ -28,15 +29,15 @@ static void dsa_master_get_ethtool_stats(struct net_device *dev, } if (ds->ops->get_ethtool_stats) - ds->ops->get_ethtool_stats(ds, port->index, data + count); + ds->ops->get_ethtool_stats(ds, port, data + count); } static int dsa_master_get_sset_count(struct net_device *dev, int sset) { struct dsa_switch_tree *dst = dev->dsa_ptr; - struct dsa_port *port = dst->cpu_dp; - struct dsa_switch *ds = port->ds; - const struct ethtool_ops *ops = port->orig_ethtool_ops; + struct dsa_port *cpu_dp = dst->cpu_dp; + const struct ethtool_ops *ops = cpu_dp->orig_ethtool_ops; + struct dsa_switch *ds = cpu_dp->ds; int count = 0; if (ops && ops->get_sset_count) @@ -52,16 +53,17 @@ static void dsa_master_get_strings(struct net_device *dev, uint32_t stringset, uint8_t *data) { struct dsa_switch_tree *dst = dev->dsa_ptr; - struct dsa_port *port = dst->cpu_dp; - struct dsa_switch *ds = port->ds; - const struct ethtool_ops *ops = port->orig_ethtool_ops; + struct dsa_port *cpu_dp = dst->cpu_dp; + const struct ethtool_ops *ops = cpu_dp->orig_ethtool_ops; + struct dsa_switch *ds = cpu_dp->ds; + int port = cpu_dp->index; int len = ETH_GSTRING_LEN; int mcount = 0, count; unsigned int i; uint8_t pfx[4]; uint8_t *ndata; - snprintf(pfx, sizeof(pfx), "p%.2d", port->index); + snprintf(pfx, sizeof(pfx), "p%.2d", port); /* We do not want to be NULL-terminated, since this is a prefix */ pfx[sizeof(pfx) - 1] = '_'; @@ -76,7 +78,7 @@ static void dsa_master_get_strings(struct net_device *dev, uint32_t stringset, * the output after to prepend our CPU port prefix we * constructed earlier */ - ds->ops->get_strings(ds, port->index, ndata); + ds->ops->get_strings(ds, port, ndata); count = ds->ops->get_sset_count(ds); for (i = 0; i < count; i++) { memmove(ndata + (i * len + sizeof(pfx)), @@ -89,17 +91,17 @@ static void dsa_master_get_strings(struct net_device *dev, uint32_t stringset, int dsa_master_ethtool_setup(struct net_device *dev) { struct dsa_switch_tree *dst = dev->dsa_ptr; - struct dsa_port *port = dst->cpu_dp; - struct dsa_switch *ds = port->ds; + struct dsa_port *cpu_dp = dst->cpu_dp; + struct dsa_switch *ds = cpu_dp->ds; struct ethtool_ops *ops; ops = devm_kzalloc(ds->dev, sizeof(*ops), GFP_KERNEL); if (!ops) return -ENOMEM; - port->orig_ethtool_ops = dev->ethtool_ops; - if (port->orig_ethtool_ops) - memcpy(ops, port->orig_ethtool_ops, sizeof(*ops)); + cpu_dp->orig_ethtool_ops = dev->ethtool_ops; + if (cpu_dp->orig_ethtool_ops) + memcpy(ops, cpu_dp->orig_ethtool_ops, sizeof(*ops)); ops->get_sset_count = dsa_master_get_sset_count; ops->get_ethtool_stats = dsa_master_get_ethtool_stats; @@ -113,8 +115,8 @@ int dsa_master_ethtool_setup(struct net_device *dev) void dsa_master_ethtool_restore(struct net_device *dev) { struct dsa_switch_tree *dst = dev->dsa_ptr; - struct dsa_port *port = dst->cpu_dp; + struct dsa_port *cpu_dp = dst->cpu_dp; - dev->ethtool_ops = port->orig_ethtool_ops; - port->orig_ethtool_ops = NULL; + dev->ethtool_ops = cpu_dp->orig_ethtool_ops; + cpu_dp->orig_ethtool_ops = NULL; } -- cgit v1.2.3 From 62fc95876298987c35e9fb10badd467f4787aae7 Mon Sep 17 00:00:00 2001 From: Vivien Didelot Date: Fri, 29 Sep 2017 17:19:17 -0400 Subject: net: dsa: use temporary dsa_device_ops variable When resolving the DSA tagging protocol used by a CPU switch, use a temporary "tag_ops" variable to store the dsa_device_ops instead of using directly dst->tag_ops. This will make the future patches moving this pointer around easier to read. There is no functional changes. Signed-off-by: Vivien Didelot Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller --- net/dsa/dsa2.c | 8 +++++--- net/dsa/legacy.c | 8 +++++--- 2 files changed, 10 insertions(+), 6 deletions(-) diff --git a/net/dsa/dsa2.c b/net/dsa/dsa2.c index dcccaebde708..6a10c5c1639f 100644 --- a/net/dsa/dsa2.c +++ b/net/dsa/dsa2.c @@ -485,6 +485,7 @@ static int dsa_cpu_parse(struct dsa_port *port, u32 index, struct dsa_switch_tree *dst, struct dsa_switch *ds) { + const struct dsa_device_ops *tag_ops; enum dsa_tag_protocol tag_protocol; struct net_device *ethernet_dev; struct device_node *ethernet; @@ -514,13 +515,14 @@ static int dsa_cpu_parse(struct dsa_port *port, u32 index, ds->cpu_port_mask |= BIT(index); tag_protocol = ds->ops->get_tag_protocol(ds); - dst->tag_ops = dsa_resolve_tag_protocol(tag_protocol); - if (IS_ERR(dst->tag_ops)) { + tag_ops = dsa_resolve_tag_protocol(tag_protocol); + if (IS_ERR(tag_ops)) { dev_warn(ds->dev, "No tagger for this switch\n"); ds->cpu_port_mask &= ~BIT(index); - return PTR_ERR(dst->tag_ops); + return PTR_ERR(tag_ops); } + dst->tag_ops = tag_ops; dst->rcv = dst->tag_ops->rcv; return 0; diff --git a/net/dsa/legacy.c b/net/dsa/legacy.c index ae505d8e4417..8e849013f69d 100644 --- a/net/dsa/legacy.c +++ b/net/dsa/legacy.c @@ -144,13 +144,15 @@ static int dsa_switch_setup_one(struct dsa_switch *ds, * switch. */ if (dst->cpu_dp->ds == ds) { + const struct dsa_device_ops *tag_ops; enum dsa_tag_protocol tag_protocol; tag_protocol = ops->get_tag_protocol(ds); - dst->tag_ops = dsa_resolve_tag_protocol(tag_protocol); - if (IS_ERR(dst->tag_ops)) - return PTR_ERR(dst->tag_ops); + tag_ops = dsa_resolve_tag_protocol(tag_protocol); + if (IS_ERR(tag_ops)) + return PTR_ERR(tag_ops); + dst->tag_ops = tag_ops; dst->rcv = dst->tag_ops->rcv; } -- cgit v1.2.3 From 152402483ed75b167d5628d414e876ffa7a6d4c4 Mon Sep 17 00:00:00 2001 From: Vivien Didelot Date: Fri, 29 Sep 2017 17:19:18 -0400 Subject: net: dsa: add tagging ops to port The DSA tagging protocol operations are specific to each CPU port, thus the dsa_device_ops pointer belongs to the dsa_port structure. >From now on assign a slave's xmit copy from its CPU port tagging operations. This will ease the future support for multiple CPU ports. Also keep the tag_ops at the beginning of the dsa_port structure so that we ensure copies for hot path are in cacheline 1. Signed-off-by: Vivien Didelot Signed-off-by: David S. Miller --- include/net/dsa.h | 3 +++ net/dsa/dsa2.c | 1 + net/dsa/dsa_priv.h | 2 +- net/dsa/legacy.c | 1 + net/dsa/slave.c | 3 +-- 5 files changed, 7 insertions(+), 3 deletions(-) diff --git a/include/net/dsa.h b/include/net/dsa.h index 8dee216a5a9b..4d1df2f086e8 100644 --- a/include/net/dsa.h +++ b/include/net/dsa.h @@ -175,6 +175,9 @@ struct dsa_mall_tc_entry { struct dsa_port { + /* CPU port tagging operations used by master or slave devices */ + const struct dsa_device_ops *tag_ops; + struct dsa_switch *ds; unsigned int index; const char *name; diff --git a/net/dsa/dsa2.c b/net/dsa/dsa2.c index 6a10c5c1639f..9eac4726dc0c 100644 --- a/net/dsa/dsa2.c +++ b/net/dsa/dsa2.c @@ -522,6 +522,7 @@ static int dsa_cpu_parse(struct dsa_port *port, u32 index, return PTR_ERR(tag_ops); } + dst->cpu_dp->tag_ops = tag_ops; dst->tag_ops = tag_ops; dst->rcv = dst->tag_ops->rcv; diff --git a/net/dsa/dsa_priv.h b/net/dsa/dsa_priv.h index d429505dc4e7..9397291bb3aa 100644 --- a/net/dsa/dsa_priv.h +++ b/net/dsa/dsa_priv.h @@ -66,7 +66,7 @@ struct dsa_notifier_vlan_info { }; struct dsa_slave_priv { - /* Copy of dp->ds->dst->tag_ops->xmit for faster access in hot path */ + /* Copy of CPU port xmit for faster access in slave transmit hot path */ struct sk_buff * (*xmit)(struct sk_buff *skb, struct net_device *dev); diff --git a/net/dsa/legacy.c b/net/dsa/legacy.c index 8e849013f69d..4d374541815a 100644 --- a/net/dsa/legacy.c +++ b/net/dsa/legacy.c @@ -152,6 +152,7 @@ static int dsa_switch_setup_one(struct dsa_switch *ds, if (IS_ERR(tag_ops)) return PTR_ERR(tag_ops); + dst->cpu_dp->tag_ops = tag_ops; dst->tag_ops = tag_ops; dst->rcv = dst->tag_ops->rcv; } diff --git a/net/dsa/slave.c b/net/dsa/slave.c index bf8800de13c1..4b634db05cee 100644 --- a/net/dsa/slave.c +++ b/net/dsa/slave.c @@ -1117,7 +1117,6 @@ int dsa_slave_resume(struct net_device *slave_dev) int dsa_slave_create(struct dsa_port *port, const char *name) { struct dsa_switch *ds = port->ds; - struct dsa_switch_tree *dst = ds->dst; struct net_device *master; struct net_device *slave_dev; struct dsa_slave_priv *p; @@ -1162,7 +1161,7 @@ int dsa_slave_create(struct dsa_port *port, const char *name) } p->dp = port; INIT_LIST_HEAD(&p->mall_tc_list); - p->xmit = dst->tag_ops->xmit; + p->xmit = cpu_dp->tag_ops->xmit; p->old_pause = -1; p->old_link = -1; -- cgit v1.2.3 From 3e41f93b358a8800194b87995ad076fc50919719 Mon Sep 17 00:00:00 2001 From: Vivien Didelot Date: Fri, 29 Sep 2017 17:19:19 -0400 Subject: net: dsa: prepare master receive hot path In preparation to make DSA master devices point to their corresponding CPU port instead of the whole tree, add copies of dst and rcv in the dsa_port structure so that we keep fast access in the receive hot path. Also keep the copies at the beginning of the dsa_port structure in order to ensure they are available in cacheline 1. Signed-off-by: Vivien Didelot Signed-off-by: David S. Miller --- include/net/dsa.h | 5 +++++ net/dsa/dsa2.c | 4 ++++ net/dsa/legacy.c | 4 ++++ 3 files changed, 13 insertions(+) diff --git a/include/net/dsa.h b/include/net/dsa.h index 4d1df2f086e8..6bda01fa5747 100644 --- a/include/net/dsa.h +++ b/include/net/dsa.h @@ -178,6 +178,11 @@ struct dsa_port { /* CPU port tagging operations used by master or slave devices */ const struct dsa_device_ops *tag_ops; + /* Copies for faster access in master receive hot path */ + struct dsa_switch_tree *dst; + struct sk_buff *(*rcv)(struct sk_buff *skb, struct net_device *dev, + struct packet_type *pt); + struct dsa_switch *ds; unsigned int index; const char *name; diff --git a/net/dsa/dsa2.c b/net/dsa/dsa2.c index 9eac4726dc0c..b71e3bb478e4 100644 --- a/net/dsa/dsa2.c +++ b/net/dsa/dsa2.c @@ -524,7 +524,11 @@ static int dsa_cpu_parse(struct dsa_port *port, u32 index, dst->cpu_dp->tag_ops = tag_ops; dst->tag_ops = tag_ops; + + /* Make a few copies for faster access in master receive hot path */ + dst->cpu_dp->rcv = dst->cpu_dp->tag_ops->rcv; dst->rcv = dst->tag_ops->rcv; + dst->cpu_dp->dst = dst; return 0; } diff --git a/net/dsa/legacy.c b/net/dsa/legacy.c index 4d374541815a..96c7e3f8b8bb 100644 --- a/net/dsa/legacy.c +++ b/net/dsa/legacy.c @@ -154,7 +154,11 @@ static int dsa_switch_setup_one(struct dsa_switch *ds, dst->cpu_dp->tag_ops = tag_ops; dst->tag_ops = tag_ops; + + /* Few copies for faster access in master receive hot path */ + dst->cpu_dp->rcv = dst->cpu_dp->tag_ops->rcv; dst->rcv = dst->tag_ops->rcv; + dst->cpu_dp->dst = dst; } memcpy(ds->rtable, cd->rtable, sizeof(ds->rtable)); -- cgit v1.2.3 From 2f657a600409f1961d67642fe384a9d4be71d36a Mon Sep 17 00:00:00 2001 From: Vivien Didelot Date: Fri, 29 Sep 2017 17:19:20 -0400 Subject: net: dsa: change dsa_ptr for a dsa_port With DSA, a master net device (CPU facing interface) has a dsa_ptr pointer to which hangs a dsa_switch_tree. This is not correct because a master interface is wired to a dedicated switch port, and because we can theoretically have several master interfaces pointing to several CPU ports of the same switch fabric. Change the master interface's dsa_ptr for the CPU dsa_port pointer. This is a step towards supporting multiple CPU ports. Signed-off-by: Vivien Didelot Signed-off-by: David S. Miller --- include/linux/netdevice.h | 4 ++-- net/dsa/dsa.c | 6 +++--- net/dsa/dsa2.c | 2 +- net/dsa/dsa_priv.h | 3 ++- net/dsa/legacy.c | 2 +- net/dsa/master.c | 15 +++++---------- 6 files changed, 14 insertions(+), 18 deletions(-) diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index f535779d9dc1..e1d6ef130611 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -55,7 +55,7 @@ struct netpoll_info; struct device; struct phy_device; -struct dsa_switch_tree; +struct dsa_port; /* 802.11 specific */ struct wireless_dev; @@ -1752,7 +1752,7 @@ struct net_device { struct vlan_info __rcu *vlan_info; #endif #if IS_ENABLED(CONFIG_NET_DSA) - struct dsa_switch_tree *dsa_ptr; + struct dsa_port *dsa_ptr; #endif #if IS_ENABLED(CONFIG_TIPC) struct tipc_bearer __rcu *tipc_ptr; diff --git a/net/dsa/dsa.c b/net/dsa/dsa.c index 81c852e32821..51ca2a524a27 100644 --- a/net/dsa/dsa.c +++ b/net/dsa/dsa.c @@ -160,12 +160,12 @@ EXPORT_SYMBOL_GPL(dsa_dev_to_net_device); static int dsa_switch_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, struct net_device *unused) { - struct dsa_switch_tree *dst = dev->dsa_ptr; + struct dsa_port *cpu_dp = dev->dsa_ptr; struct sk_buff *nskb = NULL; struct pcpu_sw_netstats *s; struct dsa_slave_priv *p; - if (unlikely(dst == NULL)) { + if (unlikely(!cpu_dp)) { kfree_skb(skb); return 0; } @@ -174,7 +174,7 @@ static int dsa_switch_rcv(struct sk_buff *skb, struct net_device *dev, if (!skb) return 0; - nskb = dst->rcv(skb, dev, pt); + nskb = cpu_dp->rcv(skb, dev, pt); if (!nskb) { kfree_skb(skb); return 0; diff --git a/net/dsa/dsa2.c b/net/dsa/dsa2.c index b71e3bb478e4..62302558f38c 100644 --- a/net/dsa/dsa2.c +++ b/net/dsa/dsa2.c @@ -438,7 +438,7 @@ static int dsa_dst_apply(struct dsa_switch_tree *dst) * sent to the tag format's receive function. */ wmb(); - dst->cpu_dp->netdev->dsa_ptr = dst; + dst->cpu_dp->netdev->dsa_ptr = dst->cpu_dp; err = dsa_master_ethtool_setup(dst->cpu_dp->netdev); if (err) diff --git a/net/dsa/dsa_priv.h b/net/dsa/dsa_priv.h index 9397291bb3aa..2850077cc9cc 100644 --- a/net/dsa/dsa_priv.h +++ b/net/dsa/dsa_priv.h @@ -116,7 +116,8 @@ void dsa_master_ethtool_restore(struct net_device *dev); static inline struct net_device *dsa_master_get_slave(struct net_device *dev, int device, int port) { - struct dsa_switch_tree *dst = dev->dsa_ptr; + struct dsa_port *cpu_dp = dev->dsa_ptr; + struct dsa_switch_tree *dst = cpu_dp->dst; struct dsa_switch *ds; if (device < 0 || device >= DSA_MAX_SWITCHES) diff --git a/net/dsa/legacy.c b/net/dsa/legacy.c index 96c7e3f8b8bb..71917505a5cc 100644 --- a/net/dsa/legacy.c +++ b/net/dsa/legacy.c @@ -607,7 +607,7 @@ static int dsa_setup_dst(struct dsa_switch_tree *dst, struct net_device *dev, * sent to the tag format's receive function. */ wmb(); - dev->dsa_ptr = dst; + dev->dsa_ptr = dst->cpu_dp; return dsa_master_ethtool_setup(dst->cpu_dp->netdev); } diff --git a/net/dsa/master.c b/net/dsa/master.c index ef15d35f1574..5f3f57e372e0 100644 --- a/net/dsa/master.c +++ b/net/dsa/master.c @@ -16,8 +16,7 @@ static void dsa_master_get_ethtool_stats(struct net_device *dev, struct ethtool_stats *stats, uint64_t *data) { - struct dsa_switch_tree *dst = dev->dsa_ptr; - struct dsa_port *cpu_dp = dst->cpu_dp; + struct dsa_port *cpu_dp = dev->dsa_ptr; const struct ethtool_ops *ops = cpu_dp->orig_ethtool_ops; struct dsa_switch *ds = cpu_dp->ds; int port = cpu_dp->index; @@ -34,8 +33,7 @@ static void dsa_master_get_ethtool_stats(struct net_device *dev, static int dsa_master_get_sset_count(struct net_device *dev, int sset) { - struct dsa_switch_tree *dst = dev->dsa_ptr; - struct dsa_port *cpu_dp = dst->cpu_dp; + struct dsa_port *cpu_dp = dev->dsa_ptr; const struct ethtool_ops *ops = cpu_dp->orig_ethtool_ops; struct dsa_switch *ds = cpu_dp->ds; int count = 0; @@ -52,8 +50,7 @@ static int dsa_master_get_sset_count(struct net_device *dev, int sset) static void dsa_master_get_strings(struct net_device *dev, uint32_t stringset, uint8_t *data) { - struct dsa_switch_tree *dst = dev->dsa_ptr; - struct dsa_port *cpu_dp = dst->cpu_dp; + struct dsa_port *cpu_dp = dev->dsa_ptr; const struct ethtool_ops *ops = cpu_dp->orig_ethtool_ops; struct dsa_switch *ds = cpu_dp->ds; int port = cpu_dp->index; @@ -90,8 +87,7 @@ static void dsa_master_get_strings(struct net_device *dev, uint32_t stringset, int dsa_master_ethtool_setup(struct net_device *dev) { - struct dsa_switch_tree *dst = dev->dsa_ptr; - struct dsa_port *cpu_dp = dst->cpu_dp; + struct dsa_port *cpu_dp = dev->dsa_ptr; struct dsa_switch *ds = cpu_dp->ds; struct ethtool_ops *ops; @@ -114,8 +110,7 @@ int dsa_master_ethtool_setup(struct net_device *dev) void dsa_master_ethtool_restore(struct net_device *dev) { - struct dsa_switch_tree *dst = dev->dsa_ptr; - struct dsa_port *cpu_dp = dst->cpu_dp; + struct dsa_port *cpu_dp = dev->dsa_ptr; dev->ethtool_ops = cpu_dp->orig_ethtool_ops; cpu_dp->orig_ethtool_ops = NULL; -- cgit v1.2.3 From aa193d9b1d7ea6893ce24a9d141f676950563987 Mon Sep 17 00:00:00 2001 From: Vivien Didelot Date: Fri, 29 Sep 2017 17:19:21 -0400 Subject: net: dsa: remove tag ops from the switch tree Now that the dsa_ptr is a dsa_port instance, there is no need to keep the tag operations in the dsa_switch_tree structure. Remove it. Signed-off-by: Vivien Didelot Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller --- include/net/dsa.h | 11 ----------- net/dsa/dsa2.c | 2 -- net/dsa/legacy.c | 2 -- 3 files changed, 15 deletions(-) diff --git a/include/net/dsa.h b/include/net/dsa.h index 6bda01fa5747..10dceccd9ce8 100644 --- a/include/net/dsa.h +++ b/include/net/dsa.h @@ -130,11 +130,6 @@ struct dsa_switch_tree { */ struct dsa_platform_data *pd; - /* Copy of tag_ops->rcv for faster access in hot path */ - struct sk_buff * (*rcv)(struct sk_buff *skb, - struct net_device *dev, - struct packet_type *pt); - /* * The switch port to which the CPU is attached. */ @@ -144,12 +139,6 @@ struct dsa_switch_tree { * Data for the individual switch chips. */ struct dsa_switch *ds[DSA_MAX_SWITCHES]; - - /* - * Tagging protocol operations for adding and removing an - * encapsulation tag. - */ - const struct dsa_device_ops *tag_ops; }; /* TC matchall action types, only mirroring for now */ diff --git a/net/dsa/dsa2.c b/net/dsa/dsa2.c index 62302558f38c..54ed054777bd 100644 --- a/net/dsa/dsa2.c +++ b/net/dsa/dsa2.c @@ -523,11 +523,9 @@ static int dsa_cpu_parse(struct dsa_port *port, u32 index, } dst->cpu_dp->tag_ops = tag_ops; - dst->tag_ops = tag_ops; /* Make a few copies for faster access in master receive hot path */ dst->cpu_dp->rcv = dst->cpu_dp->tag_ops->rcv; - dst->rcv = dst->tag_ops->rcv; dst->cpu_dp->dst = dst; return 0; diff --git a/net/dsa/legacy.c b/net/dsa/legacy.c index 71917505a5cc..19ff6e0a21dc 100644 --- a/net/dsa/legacy.c +++ b/net/dsa/legacy.c @@ -153,11 +153,9 @@ static int dsa_switch_setup_one(struct dsa_switch *ds, return PTR_ERR(tag_ops); dst->cpu_dp->tag_ops = tag_ops; - dst->tag_ops = tag_ops; /* Few copies for faster access in master receive hot path */ dst->cpu_dp->rcv = dst->cpu_dp->tag_ops->rcv; - dst->rcv = dst->tag_ops->rcv; dst->cpu_dp->dst = dst; } -- cgit v1.2.3 From e1cfcbe82b4534bd0f99fef92a6d33843fd85e0e Mon Sep 17 00:00:00 2001 From: Haishuang Yan Date: Wed, 27 Sep 2017 11:35:40 +0800 Subject: ipv4: Namespaceify tcp_fastopen knob Different namespace application might require enable TCP Fast Open feature independently of the host. This patch series continues making more of the TCP Fast Open related sysctl knobs be per net-namespace. Reported-by: Luca BRUNO Signed-off-by: Haishuang Yan Signed-off-by: David S. Miller --- include/net/netns/ipv4.h | 1 + include/net/tcp.h | 1 - net/ipv4/af_inet.c | 7 ++++--- net/ipv4/sysctl_net_ipv4.c | 14 +++++++------- net/ipv4/tcp.c | 4 ++-- net/ipv4/tcp_fastopen.c | 11 +++++------ net/ipv4/tcp_ipv4.c | 2 ++ 7 files changed, 21 insertions(+), 19 deletions(-) diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h index abc84d986da4..16420ccaef15 100644 --- a/include/net/netns/ipv4.h +++ b/include/net/netns/ipv4.h @@ -128,6 +128,7 @@ struct netns_ipv4 { int sysctl_tcp_timestamps; struct inet_timewait_death_row tcp_death_row; int sysctl_max_syn_backlog; + int sysctl_tcp_fastopen; #ifdef CONFIG_NET_L3_MASTER_DEV int sysctl_udp_l3mdev_accept; diff --git a/include/net/tcp.h b/include/net/tcp.h index 770b608c8439..9e414a99034f 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -240,7 +240,6 @@ void tcp_time_wait(struct sock *sk, int state, int timeo); /* sysctl variables for tcp */ -extern int sysctl_tcp_fastopen; extern int sysctl_tcp_retrans_collapse; extern int sysctl_tcp_stdurg; extern int sysctl_tcp_rfc1337; diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index e31108e5ef79..ddd126d120ac 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -195,7 +195,7 @@ int inet_listen(struct socket *sock, int backlog) { struct sock *sk = sock->sk; unsigned char old_state; - int err; + int err, tcp_fastopen; lock_sock(sk); @@ -217,8 +217,9 @@ int inet_listen(struct socket *sock, int backlog) * because the socket was in TCP_LISTEN state previously but * was shutdown() rather than close(). */ - if ((sysctl_tcp_fastopen & TFO_SERVER_WO_SOCKOPT1) && - (sysctl_tcp_fastopen & TFO_SERVER_ENABLE) && + tcp_fastopen = sock_net(sk)->ipv4.sysctl_tcp_fastopen; + if ((tcp_fastopen & TFO_SERVER_WO_SOCKOPT1) && + (tcp_fastopen & TFO_SERVER_ENABLE) && !inet_csk(sk)->icsk_accept_queue.fastopenq.max_qlen) { fastopen_queue_tune(sk, backlog); tcp_fastopen_init_key_once(true); diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index 0d3c038d7b04..e31e853cf486 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -400,13 +400,6 @@ static struct ctl_table ipv4_table[] = { .mode = 0644, .proc_handler = proc_dointvec }, - { - .procname = "tcp_fastopen", - .data = &sysctl_tcp_fastopen, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec, - }, { .procname = "tcp_fastopen_key", .mode = 0600, @@ -1085,6 +1078,13 @@ static struct ctl_table ipv4_net_table[] = { .mode = 0644, .proc_handler = proc_dointvec }, + { + .procname = "tcp_fastopen", + .data = &init_net.ipv4.sysctl_tcp_fastopen, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec, + }, #ifdef CONFIG_IP_ROUTE_MULTIPATH { .procname = "fib_multipath_use_neigh", diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 5091402720ab..dac56c4ad357 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -1126,7 +1126,7 @@ static int tcp_sendmsg_fastopen(struct sock *sk, struct msghdr *msg, struct sockaddr *uaddr = msg->msg_name; int err, flags; - if (!(sysctl_tcp_fastopen & TFO_CLIENT_ENABLE) || + if (!(sock_net(sk)->ipv4.sysctl_tcp_fastopen & TFO_CLIENT_ENABLE) || (uaddr && msg->msg_namelen >= sizeof(uaddr->sa_family) && uaddr->sa_family == AF_UNSPEC)) return -EOPNOTSUPP; @@ -2759,7 +2759,7 @@ static int do_tcp_setsockopt(struct sock *sk, int level, case TCP_FASTOPEN_CONNECT: if (val > 1 || val < 0) { err = -EINVAL; - } else if (sysctl_tcp_fastopen & TFO_CLIENT_ENABLE) { + } else if (net->ipv4.sysctl_tcp_fastopen & TFO_CLIENT_ENABLE) { if (sk->sk_state == TCP_CLOSE) tp->fastopen_connect = val; else diff --git a/net/ipv4/tcp_fastopen.c b/net/ipv4/tcp_fastopen.c index e3c33220c418..31b08ec38cb8 100644 --- a/net/ipv4/tcp_fastopen.c +++ b/net/ipv4/tcp_fastopen.c @@ -9,8 +9,6 @@ #include #include -int sysctl_tcp_fastopen __read_mostly = TFO_CLIENT_ENABLE; - struct tcp_fastopen_context __rcu *tcp_fastopen_ctx; static DEFINE_SPINLOCK(tcp_fastopen_ctx_lock); @@ -279,21 +277,22 @@ struct sock *tcp_try_fastopen(struct sock *sk, struct sk_buff *skb, struct request_sock *req, struct tcp_fastopen_cookie *foc) { - struct tcp_fastopen_cookie valid_foc = { .len = -1 }; bool syn_data = TCP_SKB_CB(skb)->end_seq != TCP_SKB_CB(skb)->seq + 1; + int tcp_fastopen = sock_net(sk)->ipv4.sysctl_tcp_fastopen; + struct tcp_fastopen_cookie valid_foc = { .len = -1 }; struct sock *child; if (foc->len == 0) /* Client requests a cookie */ NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPFASTOPENCOOKIEREQD); - if (!((sysctl_tcp_fastopen & TFO_SERVER_ENABLE) && + if (!((tcp_fastopen & TFO_SERVER_ENABLE) && (syn_data || foc->len >= 0) && tcp_fastopen_queue_check(sk))) { foc->len = -1; return NULL; } - if (syn_data && (sysctl_tcp_fastopen & TFO_SERVER_COOKIE_NOT_REQD)) + if (syn_data && (tcp_fastopen & TFO_SERVER_COOKIE_NOT_REQD)) goto fastopen; if (foc->len >= 0 && /* Client presents or requests a cookie */ @@ -347,7 +346,7 @@ bool tcp_fastopen_cookie_check(struct sock *sk, u16 *mss, return false; } - if (sysctl_tcp_fastopen & TFO_CLIENT_NO_COOKIE) { + if (sock_net(sk)->ipv4.sysctl_tcp_fastopen & TFO_CLIENT_NO_COOKIE) { cookie->len = -1; return true; } diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index d9416b5162bc..88409b13c9d2 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -2472,6 +2472,8 @@ static int __net_init tcp_sk_init(struct net *net) net->ipv4.sysctl_tcp_window_scaling = 1; net->ipv4.sysctl_tcp_timestamps = 1; + net->ipv4.sysctl_tcp_fastopen = TFO_CLIENT_ENABLE; + return 0; fail: tcp_sk_exit(net); -- cgit v1.2.3 From dd000598a39b6937fcefdf143720ec9fb5250e72 Mon Sep 17 00:00:00 2001 From: Haishuang Yan Date: Wed, 27 Sep 2017 11:35:41 +0800 Subject: ipv4: Remove the 'publish' logic in tcp_fastopen_init_key_once MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The 'publish' logic is not necessary after commit dfea2aa65424 ("tcp: Do not call tcp_fastopen_reset_cipher from interrupt context"), because in tcp_fastopen_cookie_gen,it wouldn't call tcp_fastopen_init_key_once. Signed-off-by: Haishuang Yan Signed-off-by: David S. Miller --- include/net/tcp.h | 2 +- net/ipv4/af_inet.c | 2 +- net/ipv4/sysctl_net_ipv4.c | 5 ----- net/ipv4/tcp.c | 2 +- net/ipv4/tcp_fastopen.c | 4 ++-- 5 files changed, 5 insertions(+), 10 deletions(-) diff --git a/include/net/tcp.h b/include/net/tcp.h index 9e414a99034f..d9376e2458e9 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -1555,7 +1555,7 @@ void tcp_fastopen_add_skb(struct sock *sk, struct sk_buff *skb); struct sock *tcp_try_fastopen(struct sock *sk, struct sk_buff *skb, struct request_sock *req, struct tcp_fastopen_cookie *foc); -void tcp_fastopen_init_key_once(bool publish); +void tcp_fastopen_init_key_once(void); bool tcp_fastopen_cookie_check(struct sock *sk, u16 *mss, struct tcp_fastopen_cookie *cookie); bool tcp_fastopen_defer_connect(struct sock *sk, int *err); diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index ddd126d120ac..e73ce79d7176 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -222,7 +222,7 @@ int inet_listen(struct socket *sock, int backlog) (tcp_fastopen & TFO_SERVER_ENABLE) && !inet_csk(sk)->icsk_accept_queue.fastopenq.max_qlen) { fastopen_queue_tune(sk, backlog); - tcp_fastopen_init_key_once(true); + tcp_fastopen_init_key_once(); } err = inet_csk_listen_start(sk, backlog); diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index e31e853cf486..f6324ead0e19 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -282,11 +282,6 @@ static int proc_tcp_fastopen_key(struct ctl_table *ctl, int write, ret = -EINVAL; goto bad_key; } - /* Generate a dummy secret but don't publish it. This - * is needed so we don't regenerate a new key on the - * first invocation of tcp_fastopen_cookie_gen - */ - tcp_fastopen_init_key_once(false); tcp_fastopen_reset_cipher(user_key, TCP_FASTOPEN_KEY_LENGTH); } diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index dac56c4ad357..4e395452d69f 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -2749,7 +2749,7 @@ static int do_tcp_setsockopt(struct sock *sk, int level, case TCP_FASTOPEN: if (val >= 0 && ((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN))) { - tcp_fastopen_init_key_once(true); + tcp_fastopen_init_key_once(); fastopen_queue_tune(sk, val); } else { diff --git a/net/ipv4/tcp_fastopen.c b/net/ipv4/tcp_fastopen.c index 31b08ec38cb8..8c8f0f0af59d 100644 --- a/net/ipv4/tcp_fastopen.c +++ b/net/ipv4/tcp_fastopen.c @@ -13,7 +13,7 @@ struct tcp_fastopen_context __rcu *tcp_fastopen_ctx; static DEFINE_SPINLOCK(tcp_fastopen_ctx_lock); -void tcp_fastopen_init_key_once(bool publish) +void tcp_fastopen_init_key_once(void) { static u8 key[TCP_FASTOPEN_KEY_LENGTH]; @@ -23,7 +23,7 @@ void tcp_fastopen_init_key_once(bool publish) * All call sites of tcp_fastopen_cookie_gen also check * for a valid cookie, so this is an acceptable risk. */ - if (net_get_random_once(key, sizeof(key)) && publish) + if (net_get_random_once(key, sizeof(key))) tcp_fastopen_reset_cipher(key, sizeof(key)); } -- cgit v1.2.3 From 437138485656c41e32b8c63c0987cfa0348be0e6 Mon Sep 17 00:00:00 2001 From: Haishuang Yan Date: Wed, 27 Sep 2017 11:35:42 +0800 Subject: ipv4: Namespaceify tcp_fastopen_key knob Different namespace application might require different tcp_fastopen_key independently of the host. David Miller pointed out there is a leak without releasing the context of tcp_fastopen_key during netns teardown. So add the release action in exit_batch path. Tested: 1. Container namespace: # cat /proc/sys/net/ipv4/tcp_fastopen_key: 2817fff2-f803cf97-eadfd1f3-78c0992b cookie key in tcp syn packets: Fast Open Cookie Kind: TCP Fast Open Cookie (34) Length: 10 Fast Open Cookie: 1e5dd82a8c492ca9 2. Host: # cat /proc/sys/net/ipv4/tcp_fastopen_key: 107d7c5f-68eb2ac7-02fb06e6-ed341702 cookie key in tcp syn packets: Fast Open Cookie Kind: TCP Fast Open Cookie (34) Length: 10 Fast Open Cookie: e213c02bf0afbc8a Signed-off-by: Haishuang Yan Signed-off-by: David S. Miller --- include/net/netns/ipv4.h | 4 +++ include/net/tcp.h | 6 ++--- net/ipv4/af_inet.c | 2 +- net/ipv4/sysctl_net_ipv4.c | 21 ++++++++------- net/ipv4/tcp.c | 2 +- net/ipv4/tcp_fastopen.c | 64 +++++++++++++++++++++++++++++++--------------- net/ipv4/tcp_ipv4.c | 6 +++++ 7 files changed, 70 insertions(+), 35 deletions(-) diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h index 16420ccaef15..7bb9603ff66c 100644 --- a/include/net/netns/ipv4.h +++ b/include/net/netns/ipv4.h @@ -36,6 +36,8 @@ struct inet_timewait_death_row { int sysctl_max_tw_buckets; }; +struct tcp_fastopen_context; + struct netns_ipv4 { #ifdef CONFIG_SYSCTL struct ctl_table_header *forw_hdr; @@ -129,6 +131,8 @@ struct netns_ipv4 { struct inet_timewait_death_row tcp_death_row; int sysctl_max_syn_backlog; int sysctl_tcp_fastopen; + struct tcp_fastopen_context __rcu *tcp_fastopen_ctx; + spinlock_t tcp_fastopen_ctx_lock; #ifdef CONFIG_NET_L3_MASTER_DEV int sysctl_udp_l3mdev_accept; diff --git a/include/net/tcp.h b/include/net/tcp.h index d9376e2458e9..6d25d8305054 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -1549,13 +1549,13 @@ struct tcp_fastopen_request { }; void tcp_free_fastopen_req(struct tcp_sock *tp); -extern struct tcp_fastopen_context __rcu *tcp_fastopen_ctx; -int tcp_fastopen_reset_cipher(void *key, unsigned int len); +void tcp_fastopen_ctx_destroy(struct net *net); +int tcp_fastopen_reset_cipher(struct net *net, void *key, unsigned int len); void tcp_fastopen_add_skb(struct sock *sk, struct sk_buff *skb); struct sock *tcp_try_fastopen(struct sock *sk, struct sk_buff *skb, struct request_sock *req, struct tcp_fastopen_cookie *foc); -void tcp_fastopen_init_key_once(void); +void tcp_fastopen_init_key_once(struct net *net); bool tcp_fastopen_cookie_check(struct sock *sk, u16 *mss, struct tcp_fastopen_cookie *cookie); bool tcp_fastopen_defer_connect(struct sock *sk, int *err); diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index e73ce79d7176..43a1bbed7a42 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -222,7 +222,7 @@ int inet_listen(struct socket *sock, int backlog) (tcp_fastopen & TFO_SERVER_ENABLE) && !inet_csk(sk)->icsk_accept_queue.fastopenq.max_qlen) { fastopen_queue_tune(sk, backlog); - tcp_fastopen_init_key_once(); + tcp_fastopen_init_key_once(sock_net(sk)); } err = inet_csk_listen_start(sk, backlog); diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index f6324ead0e19..20e19fe78dbd 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -251,10 +251,12 @@ static int proc_allowed_congestion_control(struct ctl_table *ctl, return ret; } -static int proc_tcp_fastopen_key(struct ctl_table *ctl, int write, +static int proc_tcp_fastopen_key(struct ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { + struct net *net = container_of(table->data, struct net, + ipv4.sysctl_tcp_fastopen); struct ctl_table tbl = { .maxlen = (TCP_FASTOPEN_KEY_LENGTH * 2 + 10) }; struct tcp_fastopen_context *ctxt; int ret; @@ -265,7 +267,7 @@ static int proc_tcp_fastopen_key(struct ctl_table *ctl, int write, return -ENOMEM; rcu_read_lock(); - ctxt = rcu_dereference(tcp_fastopen_ctx); + ctxt = rcu_dereference(net->ipv4.tcp_fastopen_ctx); if (ctxt) memcpy(user_key, ctxt->key, TCP_FASTOPEN_KEY_LENGTH); else @@ -282,7 +284,7 @@ static int proc_tcp_fastopen_key(struct ctl_table *ctl, int write, ret = -EINVAL; goto bad_key; } - tcp_fastopen_reset_cipher(user_key, TCP_FASTOPEN_KEY_LENGTH); + tcp_fastopen_reset_cipher(net, user_key, TCP_FASTOPEN_KEY_LENGTH); } bad_key: @@ -395,12 +397,6 @@ static struct ctl_table ipv4_table[] = { .mode = 0644, .proc_handler = proc_dointvec }, - { - .procname = "tcp_fastopen_key", - .mode = 0600, - .maxlen = ((TCP_FASTOPEN_KEY_LENGTH * 2) + 10), - .proc_handler = proc_tcp_fastopen_key, - }, { .procname = "tcp_fastopen_blackhole_timeout_sec", .data = &sysctl_tcp_fastopen_blackhole_timeout, @@ -1080,6 +1076,13 @@ static struct ctl_table ipv4_net_table[] = { .mode = 0644, .proc_handler = proc_dointvec, }, + { + .procname = "tcp_fastopen_key", + .mode = 0600, + .data = &init_net.ipv4.sysctl_tcp_fastopen, + .maxlen = ((TCP_FASTOPEN_KEY_LENGTH * 2) + 10), + .proc_handler = proc_tcp_fastopen_key, + }, #ifdef CONFIG_IP_ROUTE_MULTIPATH { .procname = "fib_multipath_use_neigh", diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 4e395452d69f..23225c98d287 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -2749,7 +2749,7 @@ static int do_tcp_setsockopt(struct sock *sk, int level, case TCP_FASTOPEN: if (val >= 0 && ((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN))) { - tcp_fastopen_init_key_once(); + tcp_fastopen_init_key_once(net); fastopen_queue_tune(sk, val); } else { diff --git a/net/ipv4/tcp_fastopen.c b/net/ipv4/tcp_fastopen.c index 8c8f0f0af59d..4eae44ac3cb0 100644 --- a/net/ipv4/tcp_fastopen.c +++ b/net/ipv4/tcp_fastopen.c @@ -9,13 +9,18 @@ #include #include -struct tcp_fastopen_context __rcu *tcp_fastopen_ctx; - -static DEFINE_SPINLOCK(tcp_fastopen_ctx_lock); - -void tcp_fastopen_init_key_once(void) +void tcp_fastopen_init_key_once(struct net *net) { - static u8 key[TCP_FASTOPEN_KEY_LENGTH]; + u8 key[TCP_FASTOPEN_KEY_LENGTH]; + struct tcp_fastopen_context *ctxt; + + rcu_read_lock(); + ctxt = rcu_dereference(net->ipv4.tcp_fastopen_ctx); + if (ctxt) { + rcu_read_unlock(); + return; + } + rcu_read_unlock(); /* tcp_fastopen_reset_cipher publishes the new context * atomically, so we allow this race happening here. @@ -23,8 +28,8 @@ void tcp_fastopen_init_key_once(void) * All call sites of tcp_fastopen_cookie_gen also check * for a valid cookie, so this is an acceptable risk. */ - if (net_get_random_once(key, sizeof(key))) - tcp_fastopen_reset_cipher(key, sizeof(key)); + get_random_bytes(key, sizeof(key)); + tcp_fastopen_reset_cipher(net, key, sizeof(key)); } static void tcp_fastopen_ctx_free(struct rcu_head *head) @@ -35,7 +40,22 @@ static void tcp_fastopen_ctx_free(struct rcu_head *head) kfree(ctx); } -int tcp_fastopen_reset_cipher(void *key, unsigned int len) +void tcp_fastopen_ctx_destroy(struct net *net) +{ + struct tcp_fastopen_context *ctxt; + + spin_lock(&net->ipv4.tcp_fastopen_ctx_lock); + + ctxt = rcu_dereference_protected(net->ipv4.tcp_fastopen_ctx, + lockdep_is_held(&net->ipv4.tcp_fastopen_ctx_lock)); + rcu_assign_pointer(net->ipv4.tcp_fastopen_ctx, NULL); + spin_unlock(&net->ipv4.tcp_fastopen_ctx_lock); + + if (ctxt) + call_rcu(&ctxt->rcu, tcp_fastopen_ctx_free); +} + +int tcp_fastopen_reset_cipher(struct net *net, void *key, unsigned int len) { int err; struct tcp_fastopen_context *ctx, *octx; @@ -59,26 +79,27 @@ error: kfree(ctx); } memcpy(ctx->key, key, len); - spin_lock(&tcp_fastopen_ctx_lock); + spin_lock(&net->ipv4.tcp_fastopen_ctx_lock); - octx = rcu_dereference_protected(tcp_fastopen_ctx, - lockdep_is_held(&tcp_fastopen_ctx_lock)); - rcu_assign_pointer(tcp_fastopen_ctx, ctx); - spin_unlock(&tcp_fastopen_ctx_lock); + octx = rcu_dereference_protected(net->ipv4.tcp_fastopen_ctx, + lockdep_is_held(&net->ipv4.tcp_fastopen_ctx_lock)); + rcu_assign_pointer(net->ipv4.tcp_fastopen_ctx, ctx); + spin_unlock(&net->ipv4.tcp_fastopen_ctx_lock); if (octx) call_rcu(&octx->rcu, tcp_fastopen_ctx_free); return err; } -static bool __tcp_fastopen_cookie_gen(const void *path, +static bool __tcp_fastopen_cookie_gen(struct net *net, + const void *path, struct tcp_fastopen_cookie *foc) { struct tcp_fastopen_context *ctx; bool ok = false; rcu_read_lock(); - ctx = rcu_dereference(tcp_fastopen_ctx); + ctx = rcu_dereference(net->ipv4.tcp_fastopen_ctx); if (ctx) { crypto_cipher_encrypt_one(ctx->tfm, foc->val, path); foc->len = TCP_FASTOPEN_COOKIE_SIZE; @@ -94,7 +115,8 @@ static bool __tcp_fastopen_cookie_gen(const void *path, * * XXX (TFO) - refactor when TCP_FASTOPEN_COOKIE_SIZE != AES_BLOCK_SIZE. */ -static bool tcp_fastopen_cookie_gen(struct request_sock *req, +static bool tcp_fastopen_cookie_gen(struct net *net, + struct request_sock *req, struct sk_buff *syn, struct tcp_fastopen_cookie *foc) { @@ -102,7 +124,7 @@ static bool tcp_fastopen_cookie_gen(struct request_sock *req, const struct iphdr *iph = ip_hdr(syn); __be32 path[4] = { iph->saddr, iph->daddr, 0, 0 }; - return __tcp_fastopen_cookie_gen(path, foc); + return __tcp_fastopen_cookie_gen(net, path, foc); } #if IS_ENABLED(CONFIG_IPV6) @@ -110,13 +132,13 @@ static bool tcp_fastopen_cookie_gen(struct request_sock *req, const struct ipv6hdr *ip6h = ipv6_hdr(syn); struct tcp_fastopen_cookie tmp; - if (__tcp_fastopen_cookie_gen(&ip6h->saddr, &tmp)) { + if (__tcp_fastopen_cookie_gen(net, &ip6h->saddr, &tmp)) { struct in6_addr *buf = &tmp.addr; int i; for (i = 0; i < 4; i++) buf->s6_addr32[i] ^= ip6h->daddr.s6_addr32[i]; - return __tcp_fastopen_cookie_gen(buf, foc); + return __tcp_fastopen_cookie_gen(net, buf, foc); } } #endif @@ -296,7 +318,7 @@ struct sock *tcp_try_fastopen(struct sock *sk, struct sk_buff *skb, goto fastopen; if (foc->len >= 0 && /* Client presents or requests a cookie */ - tcp_fastopen_cookie_gen(req, skb, &valid_foc) && + tcp_fastopen_cookie_gen(sock_net(sk), req, skb, &valid_foc) && foc->len == TCP_FASTOPEN_COOKIE_SIZE && foc->len == valid_foc.len && !memcmp(foc->val, valid_foc.val, foc->len)) { diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 88409b13c9d2..49c74c0d0d21 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -2473,6 +2473,7 @@ static int __net_init tcp_sk_init(struct net *net) net->ipv4.sysctl_tcp_timestamps = 1; net->ipv4.sysctl_tcp_fastopen = TFO_CLIENT_ENABLE; + spin_lock_init(&net->ipv4.tcp_fastopen_ctx_lock); return 0; fail: @@ -2483,7 +2484,12 @@ fail: static void __net_exit tcp_sk_exit_batch(struct list_head *net_exit_list) { + struct net *net; + inet_twsk_purge(&tcp_hashinfo, AF_INET); + + list_for_each_entry(net, net_exit_list, exit_list) + tcp_fastopen_ctx_destroy(net); } static struct pernet_operations __net_initdata tcp_sk_ops = { -- cgit v1.2.3 From 3733be14a32bae288b61ed28341e593baba983af Mon Sep 17 00:00:00 2001 From: Haishuang Yan Date: Wed, 27 Sep 2017 11:35:43 +0800 Subject: ipv4: Namespaceify tcp_fastopen_blackhole_timeout knob Different namespace application might require different time period in second to disable Fastopen on active TCP sockets. Tested: Simulate following similar situation that the server's data gets dropped after 3WHS. C ---- syn-data ---> S C <--- syn/ack ----- S C ---- ack --------> S S (accept & write) C? X <- data ------ S [retry and timeout] And then print netstat of TCPFastOpenBlackhole, the counter increased as expected when the firewall blackhole issue is detected and active TFO is disabled. # cat /proc/net/netstat | awk '{print $91}' TCPFastOpenBlackhole 1 Signed-off-by: Haishuang Yan Signed-off-by: David S. Miller --- include/net/netns/ipv4.h | 3 +++ net/ipv4/sysctl_net_ipv4.c | 20 +++++++++++--------- net/ipv4/tcp_fastopen.c | 30 +++++++++++------------------- net/ipv4/tcp_ipv4.c | 2 ++ 4 files changed, 27 insertions(+), 28 deletions(-) diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h index 7bb9603ff66c..2c4222a5d102 100644 --- a/include/net/netns/ipv4.h +++ b/include/net/netns/ipv4.h @@ -133,6 +133,9 @@ struct netns_ipv4 { int sysctl_tcp_fastopen; struct tcp_fastopen_context __rcu *tcp_fastopen_ctx; spinlock_t tcp_fastopen_ctx_lock; + unsigned int sysctl_tcp_fastopen_blackhole_timeout; + atomic_t tfo_active_disable_times; + unsigned long tfo_active_disable_stamp; #ifdef CONFIG_NET_L3_MASTER_DEV int sysctl_udp_l3mdev_accept; diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index 20e19fe78dbd..cac8dd309f39 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -355,11 +355,13 @@ static int proc_tfo_blackhole_detect_timeout(struct ctl_table *table, void __user *buffer, size_t *lenp, loff_t *ppos) { + struct net *net = container_of(table->data, struct net, + ipv4.sysctl_tcp_fastopen_blackhole_timeout); int ret; ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos); if (write && ret == 0) - tcp_fastopen_active_timeout_reset(); + atomic_set(&net->ipv4.tfo_active_disable_times, 0); return ret; } @@ -397,14 +399,6 @@ static struct ctl_table ipv4_table[] = { .mode = 0644, .proc_handler = proc_dointvec }, - { - .procname = "tcp_fastopen_blackhole_timeout_sec", - .data = &sysctl_tcp_fastopen_blackhole_timeout, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_tfo_blackhole_detect_timeout, - .extra1 = &zero, - }, { .procname = "tcp_abort_on_overflow", .data = &sysctl_tcp_abort_on_overflow, @@ -1083,6 +1077,14 @@ static struct ctl_table ipv4_net_table[] = { .maxlen = ((TCP_FASTOPEN_KEY_LENGTH * 2) + 10), .proc_handler = proc_tcp_fastopen_key, }, + { + .procname = "tcp_fastopen_blackhole_timeout_sec", + .data = &init_net.ipv4.sysctl_tcp_fastopen_blackhole_timeout, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_tfo_blackhole_detect_timeout, + .extra1 = &zero, + }, #ifdef CONFIG_IP_ROUTE_MULTIPATH { .procname = "fib_multipath_use_neigh", diff --git a/net/ipv4/tcp_fastopen.c b/net/ipv4/tcp_fastopen.c index 4eae44ac3cb0..de470e7e586f 100644 --- a/net/ipv4/tcp_fastopen.c +++ b/net/ipv4/tcp_fastopen.c @@ -422,25 +422,16 @@ EXPORT_SYMBOL(tcp_fastopen_defer_connect); * TFO connection with data exchanges. */ -/* Default to 1hr */ -unsigned int sysctl_tcp_fastopen_blackhole_timeout __read_mostly = 60 * 60; -static atomic_t tfo_active_disable_times __read_mostly = ATOMIC_INIT(0); -static unsigned long tfo_active_disable_stamp __read_mostly; - /* Disable active TFO and record current jiffies and * tfo_active_disable_times */ void tcp_fastopen_active_disable(struct sock *sk) { - atomic_inc(&tfo_active_disable_times); - tfo_active_disable_stamp = jiffies; - NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPFASTOPENBLACKHOLE); -} + struct net *net = sock_net(sk); -/* Reset tfo_active_disable_times to 0 */ -void tcp_fastopen_active_timeout_reset(void) -{ - atomic_set(&tfo_active_disable_times, 0); + atomic_inc(&net->ipv4.tfo_active_disable_times); + net->ipv4.tfo_active_disable_stamp = jiffies; + NET_INC_STATS(net, LINUX_MIB_TCPFASTOPENBLACKHOLE); } /* Calculate timeout for tfo active disable @@ -449,17 +440,18 @@ void tcp_fastopen_active_timeout_reset(void) */ bool tcp_fastopen_active_should_disable(struct sock *sk) { - int tfo_da_times = atomic_read(&tfo_active_disable_times); - int multiplier; + unsigned int tfo_bh_timeout = sock_net(sk)->ipv4.sysctl_tcp_fastopen_blackhole_timeout; + int tfo_da_times = atomic_read(&sock_net(sk)->ipv4.tfo_active_disable_times); unsigned long timeout; + int multiplier; if (!tfo_da_times) return false; /* Limit timout to max: 2^6 * initial timeout */ multiplier = 1 << min(tfo_da_times - 1, 6); - timeout = multiplier * sysctl_tcp_fastopen_blackhole_timeout * HZ; - if (time_before(jiffies, tfo_active_disable_stamp + timeout)) + timeout = multiplier * tfo_bh_timeout * HZ; + if (time_before(jiffies, sock_net(sk)->ipv4.tfo_active_disable_stamp + timeout)) return true; /* Mark check bit so we can check for successful active TFO @@ -495,10 +487,10 @@ void tcp_fastopen_active_disable_ofo_check(struct sock *sk) } } } else if (tp->syn_fastopen_ch && - atomic_read(&tfo_active_disable_times)) { + atomic_read(&sock_net(sk)->ipv4.tfo_active_disable_times)) { dst = sk_dst_get(sk); if (!(dst && dst->dev && (dst->dev->flags & IFF_LOOPBACK))) - tcp_fastopen_active_timeout_reset(); + atomic_set(&sock_net(sk)->ipv4.tfo_active_disable_times, 0); dst_release(dst); } } diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 49c74c0d0d21..ad3b5bbaf942 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -2474,6 +2474,8 @@ static int __net_init tcp_sk_init(struct net *net) net->ipv4.sysctl_tcp_fastopen = TFO_CLIENT_ENABLE; spin_lock_init(&net->ipv4.tcp_fastopen_ctx_lock); + net->ipv4.sysctl_tcp_fastopen_blackhole_timeout = 60 * 60; + atomic_set(&net->ipv4.tfo_active_disable_times, 0); return 0; fail: -- cgit v1.2.3 From b80ccfe9bbcac70e66fdfaef73f0988a27f9a68c Mon Sep 17 00:00:00 2001 From: Maciej Żenczykowski Date: Tue, 26 Sep 2017 20:37:22 -0700 Subject: net-ipv6: remove unused IP6_ECN_clear() function MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This function is unused, and furthermore it is buggy since it suffers from the same issue that requires IP6_ECN_set_ce() to take a pointer to the skb so that it may (in case of CHECKSUM_COMPLETE) update skb->csum Instead of fixing it, let's just outright remove it. Tested: builds, and 'git grep IP6_ECN_clear' comes up empty Signed-off-by: Maciej Żenczykowski Signed-off-by: David S. Miller --- include/net/inet_ecn.h | 5 ----- 1 file changed, 5 deletions(-) diff --git a/include/net/inet_ecn.h b/include/net/inet_ecn.h index dce2d586d9ce..f5ff16d72fe6 100644 --- a/include/net/inet_ecn.h +++ b/include/net/inet_ecn.h @@ -133,11 +133,6 @@ static inline int IP6_ECN_set_ce(struct sk_buff *skb, struct ipv6hdr *iph) return 1; } -static inline void IP6_ECN_clear(struct ipv6hdr *iph) -{ - *(__be32*)iph &= ~htonl(INET_ECN_MASK << 20); -} - static inline void ipv6_copy_dscp(unsigned int dscp, struct ipv6hdr *inner) { dscp &= ~INET_ECN_MASK; -- cgit v1.2.3 From 9c86b846ce02f7e35d7234cf090b80553eba5389 Mon Sep 17 00:00:00 2001 From: Jonas Gorski Date: Sun, 1 Oct 2017 13:02:15 +0200 Subject: bcm63xx_enet: correct clock usage Check the return code of prepare_enable and change one last instance of enable only to prepare_enable. Also properly disable and release the clock in error paths and on remove for enetsw. Signed-off-by: Jonas Gorski Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bcm63xx_enet.c | 31 +++++++++++++++++++++------- 1 file changed, 23 insertions(+), 8 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bcm63xx_enet.c b/drivers/net/ethernet/broadcom/bcm63xx_enet.c index c6221f04a748..a45ec97b5b1e 100644 --- a/drivers/net/ethernet/broadcom/bcm63xx_enet.c +++ b/drivers/net/ethernet/broadcom/bcm63xx_enet.c @@ -1773,7 +1773,9 @@ static int bcm_enet_probe(struct platform_device *pdev) ret = PTR_ERR(priv->mac_clk); goto out; } - clk_prepare_enable(priv->mac_clk); + ret = clk_prepare_enable(priv->mac_clk); + if (ret) + goto out_put_clk_mac; /* initialize default and fetch platform data */ priv->rx_ring_size = BCMENET_DEF_RX_DESC; @@ -1805,9 +1807,11 @@ static int bcm_enet_probe(struct platform_device *pdev) if (IS_ERR(priv->phy_clk)) { ret = PTR_ERR(priv->phy_clk); priv->phy_clk = NULL; - goto out_put_clk_mac; + goto out_disable_clk_mac; } - clk_prepare_enable(priv->phy_clk); + ret = clk_prepare_enable(priv->phy_clk); + if (ret) + goto out_put_clk_phy; } /* do minimal hardware init to be able to probe mii bus */ @@ -1900,13 +1904,16 @@ out_free_mdio: out_uninit_hw: /* turn off mdc clock */ enet_writel(priv, 0, ENET_MIISC_REG); - if (priv->phy_clk) { + if (priv->phy_clk) clk_disable_unprepare(priv->phy_clk); + +out_put_clk_phy: + if (priv->phy_clk) clk_put(priv->phy_clk); - } -out_put_clk_mac: +out_disable_clk_mac: clk_disable_unprepare(priv->mac_clk); +out_put_clk_mac: clk_put(priv->mac_clk); out: free_netdev(dev); @@ -2748,7 +2755,9 @@ static int bcm_enetsw_probe(struct platform_device *pdev) ret = PTR_ERR(priv->mac_clk); goto out_unmap; } - clk_enable(priv->mac_clk); + ret = clk_prepare_enable(priv->mac_clk); + if (ret) + goto out_put_clk; priv->rx_chan = 0; priv->tx_chan = 1; @@ -2769,7 +2778,7 @@ static int bcm_enetsw_probe(struct platform_device *pdev) ret = register_netdev(dev); if (ret) - goto out_put_clk; + goto out_disable_clk; netif_carrier_off(dev); platform_set_drvdata(pdev, dev); @@ -2778,6 +2787,9 @@ static int bcm_enetsw_probe(struct platform_device *pdev) return 0; +out_disable_clk: + clk_disable_unprepare(priv->mac_clk); + out_put_clk: clk_put(priv->mac_clk); @@ -2809,6 +2821,9 @@ static int bcm_enetsw_remove(struct platform_device *pdev) res = platform_get_resource(pdev, IORESOURCE_MEM, 0); release_mem_region(res->start, resource_size(res)); + clk_disable_unprepare(priv->mac_clk); + clk_put(priv->mac_clk); + free_netdev(dev); return 0; } -- cgit v1.2.3 From d6213c1f2ad54a964b77471690264ed685718928 Mon Sep 17 00:00:00 2001 From: Jonas Gorski Date: Sun, 1 Oct 2017 13:02:16 +0200 Subject: bcm63xx_enet: do not write to random DMA channel on BCM6345 The DMA controller regs actually point to DMA channel 0, so the write to ENETDMA_CFG_REG will actually modify a random DMA channel. Since DMA controller registers do not exist on BCM6345, guard the write with the usual check for dma_has_sram. Signed-off-by: Jonas Gorski Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bcm63xx_enet.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/broadcom/bcm63xx_enet.c b/drivers/net/ethernet/broadcom/bcm63xx_enet.c index a45ec97b5b1e..a1e1e12e187a 100644 --- a/drivers/net/ethernet/broadcom/bcm63xx_enet.c +++ b/drivers/net/ethernet/broadcom/bcm63xx_enet.c @@ -1062,7 +1062,8 @@ static int bcm_enet_open(struct net_device *dev) val = enet_readl(priv, ENET_CTL_REG); val |= ENET_CTL_ENABLE_MASK; enet_writel(priv, val, ENET_CTL_REG); - enet_dma_writel(priv, ENETDMA_CFG_EN_MASK, ENETDMA_CFG_REG); + if (priv->dma_has_sram) + enet_dma_writel(priv, ENETDMA_CFG_EN_MASK, ENETDMA_CFG_REG); enet_dmac_writel(priv, priv->dma_chan_en_mask, ENETDMAC_CHANCFG, priv->rx_chan); -- cgit v1.2.3 From 527a48713b01057d94aeec8f4383b1e20c82522c Mon Sep 17 00:00:00 2001 From: Jonas Gorski Date: Sun, 1 Oct 2017 13:02:17 +0200 Subject: bcm63xx_enet: do not rely on probe order Do not rely on the shared device being probed before the enet(sw) devices. This makes it easier to eventually move out the shared device as a dma controller driver (what it should be). Signed-off-by: Jonas Gorski Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bcm63xx_enet.c | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bcm63xx_enet.c b/drivers/net/ethernet/broadcom/bcm63xx_enet.c index a1e1e12e187a..8caf6abab3a6 100644 --- a/drivers/net/ethernet/broadcom/bcm63xx_enet.c +++ b/drivers/net/ethernet/broadcom/bcm63xx_enet.c @@ -1722,10 +1722,8 @@ static int bcm_enet_probe(struct platform_device *pdev) const char *clk_name; int i, ret; - /* stop if shared driver failed, assume driver->probe will be - * called in the same order we register devices (correct ?) */ if (!bcm_enet_shared_base[0]) - return -ENODEV; + return -EPROBE_DEFER; res_irq = platform_get_resource(pdev, IORESOURCE_IRQ, 0); res_irq_rx = platform_get_resource(pdev, IORESOURCE_IRQ, 1); @@ -2696,11 +2694,8 @@ static int bcm_enetsw_probe(struct platform_device *pdev) struct resource *res_mem; int ret, irq_rx, irq_tx; - /* stop if shared driver failed, assume driver->probe will be - * called in the same order we register devices (correct ?) - */ if (!bcm_enet_shared_base[0]) - return -ENODEV; + return -EPROBE_DEFER; res_mem = platform_get_resource(pdev, IORESOURCE_MEM, 0); irq_rx = platform_get_irq(pdev, 0); -- cgit v1.2.3 From 7e697ce99ceb09538cdc1dfa9ebb3db60236b0a7 Mon Sep 17 00:00:00 2001 From: Jonas Gorski Date: Sun, 1 Oct 2017 13:02:18 +0200 Subject: bcm63xx_enet: use managed functions for clock/ioremap Use managed functions where possible to reduce the amount of resource handling on error and remove paths. Signed-off-by: Jonas Gorski Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bcm63xx_enet.c | 54 +++++++--------------------- 1 file changed, 12 insertions(+), 42 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bcm63xx_enet.c b/drivers/net/ethernet/broadcom/bcm63xx_enet.c index 8caf6abab3a6..059ef4f1d137 100644 --- a/drivers/net/ethernet/broadcom/bcm63xx_enet.c +++ b/drivers/net/ethernet/broadcom/bcm63xx_enet.c @@ -1767,14 +1767,14 @@ static int bcm_enet_probe(struct platform_device *pdev) clk_name = "enet1"; } - priv->mac_clk = clk_get(&pdev->dev, clk_name); + priv->mac_clk = devm_clk_get(&pdev->dev, clk_name); if (IS_ERR(priv->mac_clk)) { ret = PTR_ERR(priv->mac_clk); goto out; } ret = clk_prepare_enable(priv->mac_clk); if (ret) - goto out_put_clk_mac; + goto out; /* initialize default and fetch platform data */ priv->rx_ring_size = BCMENET_DEF_RX_DESC; @@ -1802,7 +1802,7 @@ static int bcm_enet_probe(struct platform_device *pdev) if (priv->mac_id == 0 && priv->has_phy && !priv->use_external_mii) { /* using internal PHY, enable clock */ - priv->phy_clk = clk_get(&pdev->dev, "ephy"); + priv->phy_clk = devm_clk_get(&pdev->dev, "ephy"); if (IS_ERR(priv->phy_clk)) { ret = PTR_ERR(priv->phy_clk); priv->phy_clk = NULL; @@ -1810,7 +1810,7 @@ static int bcm_enet_probe(struct platform_device *pdev) } ret = clk_prepare_enable(priv->phy_clk); if (ret) - goto out_put_clk_phy; + goto out_disable_clk_mac; } /* do minimal hardware init to be able to probe mii bus */ @@ -1906,14 +1906,8 @@ out_uninit_hw: if (priv->phy_clk) clk_disable_unprepare(priv->phy_clk); -out_put_clk_phy: - if (priv->phy_clk) - clk_put(priv->phy_clk); - out_disable_clk_mac: clk_disable_unprepare(priv->mac_clk); -out_put_clk_mac: - clk_put(priv->mac_clk); out: free_netdev(dev); return ret; @@ -1949,12 +1943,10 @@ static int bcm_enet_remove(struct platform_device *pdev) } /* disable hw block clocks */ - if (priv->phy_clk) { + if (priv->phy_clk) clk_disable_unprepare(priv->phy_clk); - clk_put(priv->phy_clk); - } + clk_disable_unprepare(priv->mac_clk); - clk_put(priv->mac_clk); free_netdev(dev); return 0; @@ -2734,26 +2726,20 @@ static int bcm_enetsw_probe(struct platform_device *pdev) if (ret) goto out; - if (!request_mem_region(res_mem->start, resource_size(res_mem), - "bcm63xx_enetsw")) { - ret = -EBUSY; + priv->base = devm_ioremap_resource(&pdev->dev, res_mem); + if (IS_ERR(priv->base)) { + ret = PTR_ERR(priv->base); goto out; } - priv->base = ioremap(res_mem->start, resource_size(res_mem)); - if (priv->base == NULL) { - ret = -ENOMEM; - goto out_release_mem; - } - - priv->mac_clk = clk_get(&pdev->dev, "enetsw"); + priv->mac_clk = devm_clk_get(&pdev->dev, "enetsw"); if (IS_ERR(priv->mac_clk)) { ret = PTR_ERR(priv->mac_clk); - goto out_unmap; + goto out; } ret = clk_prepare_enable(priv->mac_clk); if (ret) - goto out_put_clk; + goto out; priv->rx_chan = 0; priv->tx_chan = 1; @@ -2785,15 +2771,6 @@ static int bcm_enetsw_probe(struct platform_device *pdev) out_disable_clk: clk_disable_unprepare(priv->mac_clk); - -out_put_clk: - clk_put(priv->mac_clk); - -out_unmap: - iounmap(priv->base); - -out_release_mem: - release_mem_region(res_mem->start, resource_size(res_mem)); out: free_netdev(dev); return ret; @@ -2805,20 +2782,13 @@ static int bcm_enetsw_remove(struct platform_device *pdev) { struct bcm_enet_priv *priv; struct net_device *dev; - struct resource *res; /* stop netdevice */ dev = platform_get_drvdata(pdev); priv = netdev_priv(dev); unregister_netdev(dev); - /* release device resources */ - iounmap(priv->base); - res = platform_get_resource(pdev, IORESOURCE_MEM, 0); - release_mem_region(res->start, resource_size(res)); - clk_disable_unprepare(priv->mac_clk); - clk_put(priv->mac_clk); free_netdev(dev); return 0; -- cgit v1.2.3 From 4e78e5c5d881bf2d6267545a554c1baf245257b7 Mon Sep 17 00:00:00 2001 From: Jonas Gorski Date: Sun, 1 Oct 2017 13:02:19 +0200 Subject: bcm63xx_enet: drop unneeded NULL phy_clk check clk_disable and clk_unprepare are NULL-safe, so need to duplicate the NULL check of the functions. Signed-off-by: Jonas Gorski Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bcm63xx_enet.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bcm63xx_enet.c b/drivers/net/ethernet/broadcom/bcm63xx_enet.c index 059ef4f1d137..f6bc13fe8a99 100644 --- a/drivers/net/ethernet/broadcom/bcm63xx_enet.c +++ b/drivers/net/ethernet/broadcom/bcm63xx_enet.c @@ -1903,8 +1903,7 @@ out_free_mdio: out_uninit_hw: /* turn off mdc clock */ enet_writel(priv, 0, ENET_MIISC_REG); - if (priv->phy_clk) - clk_disable_unprepare(priv->phy_clk); + clk_disable_unprepare(priv->phy_clk); out_disable_clk_mac: clk_disable_unprepare(priv->mac_clk); @@ -1943,9 +1942,7 @@ static int bcm_enet_remove(struct platform_device *pdev) } /* disable hw block clocks */ - if (priv->phy_clk) - clk_disable_unprepare(priv->phy_clk); - + clk_disable_unprepare(priv->phy_clk); clk_disable_unprepare(priv->mac_clk); free_netdev(dev); -- cgit v1.2.3 From 840f922317fb5c20841d6d7f3853ead506546ade Mon Sep 17 00:00:00 2001 From: Jonas Gorski Date: Sun, 1 Oct 2017 13:02:20 +0200 Subject: bcm63xx_enet: remove unneeded include We don't use anyhing from that file, so drop it. Signed-off-by: Jonas Gorski Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bcm63xx_enet.h | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/net/ethernet/broadcom/bcm63xx_enet.h b/drivers/net/ethernet/broadcom/bcm63xx_enet.h index 0a1b7b2e55bd..dd6ae3077433 100644 --- a/drivers/net/ethernet/broadcom/bcm63xx_enet.h +++ b/drivers/net/ethernet/broadcom/bcm63xx_enet.h @@ -8,7 +8,6 @@ #include #include -#include #include #include -- cgit v1.2.3 From 45bfbc013b4294cadafbef821d377d3a99c7ab1e Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Sun, 1 Oct 2017 17:27:35 +0100 Subject: mlxsw: spectrum: fix uninitialized value in err In the unlikely event that mfc->mfc_un.res.ttls[i] is 255 for all values of i from 0 to MAXIVS-1, the err is not set at all and hence has a garbage value on the error return at the end of the function, so initialize it to 0. Also, the error return check on err and goto to err: inside the for loop makes it impossible for err to be zero at the end of the for loop, so we can remove the redundant err check at the end of the loop. Detected by CoverityScan CID#1457207 ("Unitialized scalar value") Fixes: c011ec1bbfd6 ("mlxsw: spectrum: Add the multicast routing offloading logic") Signed-off-by: Colin Ian King Reviewed-by: Yotam Gigi Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlxsw/spectrum_mr.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_mr.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_mr.c index 09120259a45d..4aaf6ca1be7c 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_mr.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_mr.c @@ -349,7 +349,7 @@ mlxsw_sp_mr_route4_create(struct mlxsw_sp_mr_table *mr_table, { struct mlxsw_sp_mr_route_vif_entry *rve, *tmp; struct mlxsw_sp_mr_route *mr_route; - int err; + int err = 0; int i; /* Allocate and init a new route and fill it with parameters */ @@ -376,8 +376,6 @@ mlxsw_sp_mr_route4_create(struct mlxsw_sp_mr_table *mr_table, } } mlxsw_sp_mr_route_ivif_link(mr_route, &mr_table->vifs[mfc->mfc_parent]); - if (err) - goto err; mr_route->route_action = mlxsw_sp_mr_route_action(mr_route); return mr_route; -- cgit v1.2.3 From 0929567a7a2dab8455a7313956973ff0d339709a Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Sun, 1 Oct 2017 14:07:34 -0700 Subject: samples/bpf: fix warnings in xdp_monitor_user MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Make local functions static to fix HOSTCC samples/bpf/xdp_monitor_user.o samples/bpf/xdp_monitor_user.c:64:7: warning: no previous prototype for ‘gettime’ [-Wmissing-prototypes] __u64 gettime(void) ^~~~~~~ samples/bpf/xdp_monitor_user.c:209:6: warning: no previous prototype for ‘print_bpf_prog_info’ [-Wmissing-prototypes] void print_bpf_prog_info(void) ^~~~~~~~~~~~~~~~~~~ Fixes: 3ffab5460264 ("samples/bpf: xdp_monitor tool based on tracepoints") Signed-off-by: Stephen Hemminger Acked-by: Alexei Starovoitov Acked-by: Jesper Dangaard Brouer Signed-off-by: David S. Miller --- samples/bpf/xdp_monitor_user.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/samples/bpf/xdp_monitor_user.c b/samples/bpf/xdp_monitor_user.c index b51b4f5e3257..c5ab8b776973 100644 --- a/samples/bpf/xdp_monitor_user.c +++ b/samples/bpf/xdp_monitor_user.c @@ -61,7 +61,7 @@ static void usage(char *argv[]) } #define NANOSEC_PER_SEC 1000000000 /* 10^9 */ -__u64 gettime(void) +static __u64 gettime(void) { struct timespec t; int res; @@ -206,7 +206,7 @@ static void stats_poll(int interval, bool err_only) } } -void print_bpf_prog_info(void) +static void print_bpf_prog_info(void) { int i; -- cgit v1.2.3 From 504871e602d9a9ea2321d47ca506887417f54e75 Mon Sep 17 00:00:00 2001 From: Manikanta Pubbisetty Date: Tue, 26 Sep 2017 00:39:15 +0530 Subject: mac80211: fix bandwidth computation for TDLS peers Section 11.23.1 of 80211-2016 specification allows TDLS peers to operate on wider bandwidths though they are connected to a BSS which do not support wider bandwidth operations, provided both the peers advertise wider bandwidth capabilities. The existing logic considers the minimum of station's and AP's capability for bandwidth computation. The same logic applies for TDLS peers as well, this restricts operating on wider bandwidths over a TDLS link when the peers are connected to legacy APs. As an example, if 80Mhz VHT capable peers are connected to a 20Mhz 5 GHz AP, then as per the existing logic TDLS operation will be restricted to 20Mhz. Address this problem by not considering BSS capability in bandwidth computation if the participating TDLS peers have wider bandwidth capability. Signed-off-by: Manikanta Pubbisetty [lots of wording/typo fixes] Signed-off-by: Johannes Berg --- net/mac80211/vht.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/net/mac80211/vht.c b/net/mac80211/vht.c index 19ec2189d3ac..b9276ac849fa 100644 --- a/net/mac80211/vht.c +++ b/net/mac80211/vht.c @@ -386,6 +386,16 @@ enum ieee80211_sta_rx_bandwidth ieee80211_sta_cur_vht_bw(struct sta_info *sta) bw = ieee80211_sta_cap_rx_bw(sta); bw = min(bw, sta->cur_max_bandwidth); + + /* Don't consider AP's bandwidth for TDLS peers, section 11.23.1 of + * IEEE80211-2016 specification makes higher bandwidth operation + * possible on the TDLS link if the peers have wider bandwidth + * capability. + */ + if (test_sta_flag(sta, WLAN_STA_TDLS_PEER) && + test_sta_flag(sta, WLAN_STA_TDLS_WIDER_BW)) + return bw; + bw = min(bw, ieee80211_chan_width_to_rx_bw(bss_width)); return bw; -- cgit v1.2.3 From 8f797c288e3a788e9578dd4db08b624a6d4b6a9b Mon Sep 17 00:00:00 2001 From: Stanislaw Gruszka Date: Tue, 26 Sep 2017 13:48:05 +0200 Subject: mac80211: fix STA_SLOW_THRESHOLD htmldocs failure Patch fixes htmldocs build problem: Error(.//net/mac80211/sta_info.h:416): cannot understand prototype: 'STA_SLOW_THRESHOLD 6000 ' Signed-off-by: Stanislaw Gruszka Signed-off-by: Johannes Berg --- net/mac80211/sta_info.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/mac80211/sta_info.h b/net/mac80211/sta_info.h index 3acbdfa9f649..a35c964f6217 100644 --- a/net/mac80211/sta_info.h +++ b/net/mac80211/sta_info.h @@ -398,7 +398,7 @@ struct ieee80211_sta_rx_stats { u64 msdu[IEEE80211_NUM_TIDS + 1]; }; -/** +/* * The bandwidth threshold below which the per-station CoDel parameters will be * scaled to be more lenient (to prevent starvation of slow stations). This * value will be scaled by the number of active stations when it is being -- cgit v1.2.3 From 66b1bedf662518e9b6367990a87e9601b35a94c1 Mon Sep 17 00:00:00 2001 From: Avraham Stern Date: Fri, 29 Sep 2017 14:21:14 +0300 Subject: ieee80211: Add WFA TPC report element OUI type Add Transmit Power Control OUI type definition for WLAN_OUI_MICROSOFT. Signed-off-by: Avraham Stern Signed-off-by: Luca Coelho Signed-off-by: Johannes Berg --- include/linux/ieee80211.h | 1 + 1 file changed, 1 insertion(+) diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h index 55a604ad459f..ee6657a0ed69 100644 --- a/include/linux/ieee80211.h +++ b/include/linux/ieee80211.h @@ -2445,6 +2445,7 @@ enum ieee80211_sa_query_action { #define WLAN_OUI_TYPE_MICROSOFT_WPA 1 #define WLAN_OUI_TYPE_MICROSOFT_WMM 2 #define WLAN_OUI_TYPE_MICROSOFT_WPS 4 +#define WLAN_OUI_TYPE_MICROSOFT_TPC 8 /* * WMM/802.11e Tspec Element -- cgit v1.2.3 From 503c1fb98ba3859c13863957c7c65c92371a9e50 Mon Sep 17 00:00:00 2001 From: Avraham Stern Date: Fri, 29 Sep 2017 14:21:49 +0200 Subject: cfg80211/nl80211: add a port authorized event Add an event that indicates that a connection is authorized (i.e. the 4 way handshake was performed by the driver). This event should be sent by the driver after sending a connect/roamed event. This is useful for networks that require 802.1X authentication. In cases that the driver supports 4 way handshake offload, but the 802.1X authentication is managed by user space, the driver needs to inform user space right after the 802.11 association was completed so user space can initialize its 802.1X state machine etc. However, it is also possible that the AP will choose to skip the 802.1X authentication (e.g. when PMKSA caching is used) and proceed with the 4 way handshake immediately. In this case the driver needs to inform user space that 802.1X authentication is no longer required (e.g. to prevent user space from disconnecting since it did not get any EAPOLs from the AP). This is also useful for roaming, in which case it is possible that the driver used the Fast Transition protocol so 802.1X is not required. Since there will now be a dedicated notification indicating that the connection is authorized, the authorized flag can be removed from the roamed event. Drivers can send the new port authorized event right after sending the roamed event to indicate the new AP is already authorized. This therefore reserves the old PORT_AUTHORIZED attribute. Signed-off-by: Avraham Stern Signed-off-by: Johannes Berg --- include/net/cfg80211.h | 21 +++++++++++++++++---- include/uapi/linux/nl80211.h | 28 ++++++++++++++++----------- net/wireless/core.h | 5 +++++ net/wireless/nl80211.c | 34 ++++++++++++++++++++++++++++++--- net/wireless/nl80211.h | 2 ++ net/wireless/sme.c | 45 +++++++++++++++++++++++++++++++++++++++++++- net/wireless/util.c | 3 +++ 7 files changed, 119 insertions(+), 19 deletions(-) diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index cc1996081463..8b8118a7fadb 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -5428,9 +5428,6 @@ cfg80211_connect_timeout(struct net_device *dev, const u8 *bssid, * @req_ie_len: association request IEs length * @resp_ie: association response IEs (may be %NULL) * @resp_ie_len: assoc response IEs length - * @authorized: true if the 802.1X authentication was done by the driver or is - * not needed (e.g., when Fast Transition protocol was used), false - * otherwise. Ignored for networks that don't use 802.1X authentication. */ struct cfg80211_roam_info { struct ieee80211_channel *channel; @@ -5440,7 +5437,6 @@ struct cfg80211_roam_info { size_t req_ie_len; const u8 *resp_ie; size_t resp_ie_len; - bool authorized; }; /** @@ -5464,6 +5460,23 @@ struct cfg80211_roam_info { void cfg80211_roamed(struct net_device *dev, struct cfg80211_roam_info *info, gfp_t gfp); +/** + * cfg80211_port_authorized - notify cfg80211 of successful security association + * + * @dev: network device + * @bssid: the BSSID of the AP + * @gfp: allocation flags + * + * This function should be called by a driver that supports 4 way handshake + * offload after a security association was successfully established (i.e., + * the 4 way handshake was completed successfully). The call to this function + * should be preceded with a call to cfg80211_connect_result(), + * cfg80211_connect_done(), cfg80211_connect_bss() or cfg80211_roamed() to + * indicate the 802.11 association. + */ +void cfg80211_port_authorized(struct net_device *dev, const u8 *bssid, + gfp_t gfp); + /** * cfg80211_disconnected - notify cfg80211 that connection was dropped * diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index 59ba6ca66a0d..95832ce03a44 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -569,13 +569,14 @@ * authentication/association or not receiving a response from the AP. * Non-zero %NL80211_ATTR_STATUS_CODE value is indicated in that case as * well to remain backwards compatible. - * @NL80211_CMD_ROAM: notifcation indicating the card/driver roamed by itself. - * When the driver roamed in a network that requires 802.1X authentication, - * %NL80211_ATTR_PORT_AUTHORIZED should be set if the 802.1X authentication - * was done by the driver or if roaming was done using Fast Transition - * protocol (in which case 802.1X authentication is not needed). If - * %NL80211_ATTR_PORT_AUTHORIZED is not set, user space is responsible for - * the 802.1X authentication. + * When establishing a security association, drivers that support 4 way + * handshake offload should send %NL80211_CMD_PORT_AUTHORIZED event when + * the 4 way handshake is completed successfully. + * @NL80211_CMD_ROAM: Notification indicating the card/driver roamed by itself. + * When a security association was established with the new AP (e.g. if + * the FT protocol was used for roaming or the driver completed the 4 way + * handshake), this event should be followed by an + * %NL80211_CMD_PORT_AUTHORIZED event. * @NL80211_CMD_DISCONNECT: drop a given connection; also used to notify * userspace that a connection was dropped by the AP or due to other * reasons, for this the %NL80211_ATTR_DISCONNECTED_BY_AP and @@ -982,6 +983,12 @@ * @NL80211_CMD_DEL_PMK: For offloaded 4-Way handshake, delete the previously * configured PMK for the authenticator address identified by * &NL80211_ATTR_MAC. + * @NL80211_CMD_PORT_AUTHORIZED: An event that indicates that the 4 way + * handshake was completed successfully by the driver. The BSSID is + * specified with &NL80211_ATTR_MAC. Drivers that support 4 way handshake + * offload should send this event after indicating 802.11 association with + * &NL80211_CMD_CONNECT or &NL80211_CMD_ROAM. If the 4 way handshake failed + * &NL80211_CMD_DISCONNECT should be indicated instead. * * @NL80211_CMD_MAX: highest used command number * @__NL80211_CMD_AFTER_LAST: internal use @@ -1185,6 +1192,8 @@ enum nl80211_commands { NL80211_CMD_SET_PMK, NL80211_CMD_DEL_PMK, + NL80211_CMD_PORT_AUTHORIZED, + /* add new commands above here */ /* used to define NL80211_CMD_MAX below */ @@ -2138,10 +2147,7 @@ enum nl80211_commands { * in %NL80211_CMD_CONNECT to indicate that for 802.1X authentication it * wants to use the supported offload of the 4-way handshake. * @NL80211_ATTR_PMKR0_NAME: PMK-R0 Name for offloaded FT. - * @NL80211_ATTR_PORT_AUTHORIZED: flag attribute used in %NL80211_CMD_ROAMED - * notification indicating that that 802.1X authentication was done by - * the driver or is not needed (because roaming used the Fast Transition - * protocol). + * @NL80211_ATTR_PORT_AUTHORIZED: (reserved) * * @NUM_NL80211_ATTR: total number of nl80211_attrs available * @NL80211_ATTR_MAX: highest attribute number currently defined diff --git a/net/wireless/core.h b/net/wireless/core.h index 6e809325af3b..35165f42c2a8 100644 --- a/net/wireless/core.h +++ b/net/wireless/core.h @@ -216,6 +216,7 @@ enum cfg80211_event_type { EVENT_DISCONNECTED, EVENT_IBSS_JOINED, EVENT_STOPPED, + EVENT_PORT_AUTHORIZED, }; struct cfg80211_event { @@ -235,6 +236,9 @@ struct cfg80211_event { u8 bssid[ETH_ALEN]; struct ieee80211_channel *channel; } ij; + struct { + u8 bssid[ETH_ALEN]; + } pa; }; }; @@ -385,6 +389,7 @@ int cfg80211_disconnect(struct cfg80211_registered_device *rdev, bool wextev); void __cfg80211_roamed(struct wireless_dev *wdev, struct cfg80211_roam_info *info); +void __cfg80211_port_authorized(struct wireless_dev *wdev, const u8 *bssid); int cfg80211_mgd_wext_connect(struct cfg80211_registered_device *rdev, struct wireless_dev *wdev); void cfg80211_autodisconnect_wk(struct work_struct *work); diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 1e39ba3cfd06..90e212db6889 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -13830,9 +13830,7 @@ void nl80211_send_roamed(struct cfg80211_registered_device *rdev, info->req_ie)) || (info->resp_ie && nla_put(msg, NL80211_ATTR_RESP_IE, info->resp_ie_len, - info->resp_ie)) || - (info->authorized && - nla_put_flag(msg, NL80211_ATTR_PORT_AUTHORIZED))) + info->resp_ie))) goto nla_put_failure; genlmsg_end(msg, hdr); @@ -13846,6 +13844,36 @@ void nl80211_send_roamed(struct cfg80211_registered_device *rdev, nlmsg_free(msg); } +void nl80211_send_port_authorized(struct cfg80211_registered_device *rdev, + struct net_device *netdev, const u8 *bssid) +{ + struct sk_buff *msg; + void *hdr; + + msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); + if (!msg) + return; + + hdr = nl80211hdr_put(msg, 0, 0, 0, NL80211_CMD_PORT_AUTHORIZED); + if (!hdr) { + nlmsg_free(msg); + return; + } + + if (nla_put(msg, NL80211_ATTR_MAC, ETH_ALEN, bssid)) + goto nla_put_failure; + + genlmsg_end(msg, hdr); + + genlmsg_multicast_netns(&nl80211_fam, wiphy_net(&rdev->wiphy), msg, 0, + NL80211_MCGRP_MLME, GFP_KERNEL); + return; + + nla_put_failure: + genlmsg_cancel(msg, hdr); + nlmsg_free(msg); +} + void nl80211_send_disconnected(struct cfg80211_registered_device *rdev, struct net_device *netdev, u16 reason, const u8 *ie, size_t ie_len, bool from_ap) diff --git a/net/wireless/nl80211.h b/net/wireless/nl80211.h index b96933322077..bf9e772a30b9 100644 --- a/net/wireless/nl80211.h +++ b/net/wireless/nl80211.h @@ -58,6 +58,8 @@ void nl80211_send_connect_result(struct cfg80211_registered_device *rdev, void nl80211_send_roamed(struct cfg80211_registered_device *rdev, struct net_device *netdev, struct cfg80211_roam_info *info, gfp_t gfp); +void nl80211_send_port_authorized(struct cfg80211_registered_device *rdev, + struct net_device *netdev, const u8 *bssid); void nl80211_send_disconnected(struct cfg80211_registered_device *rdev, struct net_device *netdev, u16 reason, const u8 *ie, size_t ie_len, bool from_ap); diff --git a/net/wireless/sme.c b/net/wireless/sme.c index 0a49b88070d0..f38ed490e42b 100644 --- a/net/wireless/sme.c +++ b/net/wireless/sme.c @@ -960,7 +960,6 @@ void cfg80211_roamed(struct net_device *dev, struct cfg80211_roam_info *info, ev->rm.resp_ie_len = info->resp_ie_len; memcpy((void *)ev->rm.resp_ie, info->resp_ie, info->resp_ie_len); ev->rm.bss = info->bss; - ev->rm.authorized = info->authorized; spin_lock_irqsave(&wdev->event_lock, flags); list_add_tail(&ev->list, &wdev->event_list); @@ -969,6 +968,50 @@ void cfg80211_roamed(struct net_device *dev, struct cfg80211_roam_info *info, } EXPORT_SYMBOL(cfg80211_roamed); +void __cfg80211_port_authorized(struct wireless_dev *wdev, const u8 *bssid) +{ + ASSERT_WDEV_LOCK(wdev); + + if (WARN_ON(wdev->iftype != NL80211_IFTYPE_STATION)) + return; + + if (WARN_ON(!wdev->current_bss) || + WARN_ON(!ether_addr_equal(wdev->current_bss->pub.bssid, bssid))) + return; + + nl80211_send_port_authorized(wiphy_to_rdev(wdev->wiphy), wdev->netdev, + bssid); +} + +void cfg80211_port_authorized(struct net_device *dev, const u8 *bssid, + gfp_t gfp) +{ + struct wireless_dev *wdev = dev->ieee80211_ptr; + struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy); + struct cfg80211_event *ev; + unsigned long flags; + + if (WARN_ON(!bssid)) + return; + + ev = kzalloc(sizeof(*ev), gfp); + if (!ev) + return; + + ev->type = EVENT_PORT_AUTHORIZED; + memcpy(ev->pa.bssid, bssid, ETH_ALEN); + + /* + * Use the wdev event list so that if there are pending + * connected/roamed events, they will be reported first. + */ + spin_lock_irqsave(&wdev->event_lock, flags); + list_add_tail(&ev->list, &wdev->event_list); + spin_unlock_irqrestore(&wdev->event_lock, flags); + queue_work(cfg80211_wq, &rdev->event_work); +} +EXPORT_SYMBOL(cfg80211_port_authorized); + void __cfg80211_disconnected(struct net_device *dev, const u8 *ie, size_t ie_len, u16 reason, bool from_ap) { diff --git a/net/wireless/util.c b/net/wireless/util.c index 7a1fcc6ee060..ff21c314a609 100644 --- a/net/wireless/util.c +++ b/net/wireless/util.c @@ -846,6 +846,9 @@ void cfg80211_process_wdev_events(struct wireless_dev *wdev) case EVENT_STOPPED: __cfg80211_leave(wiphy_to_rdev(wdev->wiphy), wdev); break; + case EVENT_PORT_AUTHORIZED: + __cfg80211_port_authorized(wdev, ev->pa.bssid); + break; } wdev_unlock(wdev); -- cgit v1.2.3 From 73f2c8e933b1dcf432ac8c6965a6e67af630077f Mon Sep 17 00:00:00 2001 From: Kevin Cernekee Date: Sat, 16 Sep 2017 21:08:22 -0700 Subject: brcmfmac: Avoid possible out-of-bounds read In brcmf_p2p_notify_rx_mgmt_p2p_probereq(), chanspec is assigned before the length of rxframe is validated. This could lead to uninitialized data being accessed (but not printed). Since we already have a perfectly good endian-swapped copy of rxframe->chanspec in ch.chspec, and ch.chspec is not modified by decchspec(), avoid the extra assignment and use ch.chspec in the debug print. Suggested-by: Mattias Nissler Signed-off-by: Kevin Cernekee Reviewed-by: Arend van Spriel Signed-off-by: Kalle Valo --- drivers/net/wireless/broadcom/brcm80211/brcmfmac/p2p.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/p2p.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/p2p.c index 2ce675ab40ef..1c450c0727cb 100644 --- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/p2p.c +++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/p2p.c @@ -1853,7 +1853,6 @@ s32 brcmf_p2p_notify_rx_mgmt_p2p_probereq(struct brcmf_if *ifp, struct afx_hdl *afx_hdl = &p2p->afx_hdl; struct brcmf_cfg80211_vif *vif = ifp->vif; struct brcmf_rx_mgmt_data *rxframe = (struct brcmf_rx_mgmt_data *)data; - u16 chanspec = be16_to_cpu(rxframe->chanspec); struct brcmu_chan ch; u8 *mgmt_frame; u32 mgmt_frame_len; @@ -1906,7 +1905,7 @@ s32 brcmf_p2p_notify_rx_mgmt_p2p_probereq(struct brcmf_if *ifp, cfg80211_rx_mgmt(&vif->wdev, freq, 0, mgmt_frame, mgmt_frame_len, 0); brcmf_dbg(INFO, "mgmt_frame_len (%d) , e->datalen (%d), chanspec (%04x), freq (%d)\n", - mgmt_frame_len, e->datalen, chanspec, freq); + mgmt_frame_len, e->datalen, ch.chspec, freq); return 0; } -- cgit v1.2.3 From a7c9acc452b21b56c99dd7dfe0ab542f7baa6563 Mon Sep 17 00:00:00 2001 From: Kevin Cernekee Date: Sat, 16 Sep 2017 21:08:23 -0700 Subject: brcmfmac: Delete redundant length check brcmf_fweh_process_event() sets event->datalen to the endian-swapped value of event_packet->msg.datalen, which is the same as emsg.datalen. This length is already validated in brcmf_fweh_process_event(), so there is no need to check it again upon dequeuing the event. Suggested-by: Arend van Spriel Signed-off-by: Kevin Cernekee Signed-off-by: Kalle Valo --- drivers/net/wireless/broadcom/brcm80211/brcmfmac/fweh.c | 5 ----- 1 file changed, 5 deletions(-) diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/fweh.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/fweh.c index 4eb1e1ce9ace..27e661fa356f 100644 --- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/fweh.c +++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/fweh.c @@ -257,11 +257,6 @@ static void brcmf_fweh_event_worker(struct work_struct *work) brcmf_dbg_hex_dump(BRCMF_EVENT_ON(), event->data, min_t(u32, emsg.datalen, 64), "event payload, len=%d\n", emsg.datalen); - if (emsg.datalen > event->datalen) { - brcmf_err("event invalid length header=%d, msg=%d\n", - event->datalen, emsg.datalen); - goto event_free; - } /* special handling of interface event */ if (event->code == BRCMF_E_IF) { -- cgit v1.2.3 From 17a91809942ca32c70026d2d5ba3348a2c4fdf8f Mon Sep 17 00:00:00 2001 From: Jacob Keller Date: Mon, 2 Oct 2017 07:17:50 -0700 Subject: fm10k: ensure we process SM mbx when processing VF mbx When we process VF mailboxes, the driver is likely going to also queue up messages to the switch manager. This process merely queues up the FIFO, but doesn't actually begin the transmission process. Because we hold the mailbox lock during this VF processing, the PF<->SM mailbox is not getting processed at this time. Ensure that we actually process the PF<->SM mailbox in between each PF<->VF mailbox. This should ensure prompt transmission of the messages queued up after each VF message is received and handled. Signed-off-by: Jacob Keller Tested-by: Krishneil Singh Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/fm10k/fm10k_iov.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_iov.c b/drivers/net/ethernet/intel/fm10k/fm10k_iov.c index 5f4dac0d36ef..2ec49116fe91 100644 --- a/drivers/net/ethernet/intel/fm10k/fm10k_iov.c +++ b/drivers/net/ethernet/intel/fm10k/fm10k_iov.c @@ -126,6 +126,9 @@ process_mbx: struct fm10k_mbx_info *mbx = &vf_info->mbx; u16 glort = vf_info->glort; + /* process the SM mailbox first to drain outgoing messages */ + hw->mbx.ops.process(hw, &hw->mbx); + /* verify port mapping is valid, if not reset port */ if (vf_info->vf_flags && !fm10k_glort_valid_pf(hw, glort)) hw->iov.ops.reset_lport(hw, vf_info); -- cgit v1.2.3 From b52b7f7059f2df8eb3258a25bc69e12dc21ebcd7 Mon Sep 17 00:00:00 2001 From: Jacob Keller Date: Wed, 8 Mar 2017 15:55:43 -0800 Subject: fm10k: reschedule service event if we stall the PF<->SM mailbox When we are handling PF<->VF mailbox messages, it is possible that the VF will send us so many messages that the PF<->SM FIFO will fill up. In this case, we stop the loop and wait until the service event is rescheduled. Normally this should happen due to an interrupt. But it is possible that we don't get another interrupt for a while and it isn't until the service timer actually reschedules us. Instead, simply reschedule immediately which will cause the service event to be run again as soon as we exit. This ensures that we promptly handle all of the PF<->VF messages with minimal delay, while still giving time for the SM mailbox to drain. Signed-off-by: Jacob Keller Tested-by: Krishneil Singh Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/fm10k/fm10k_iov.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_iov.c b/drivers/net/ethernet/intel/fm10k/fm10k_iov.c index 2ec49116fe91..d8356c494f06 100644 --- a/drivers/net/ethernet/intel/fm10k/fm10k_iov.c +++ b/drivers/net/ethernet/intel/fm10k/fm10k_iov.c @@ -143,6 +143,10 @@ process_mbx: if (!hw->mbx.ops.tx_ready(&hw->mbx, FM10K_VFMBX_MSG_MTU)) { /* keep track of how many times this occurs */ interface->hw_sm_mbx_full++; + + /* make sure we try again momentarily */ + fm10k_service_event_schedule(interface); + break; } -- cgit v1.2.3 From 95f49d4bdee34dd0f68446bb260ab537f62ed9b3 Mon Sep 17 00:00:00 2001 From: Markus Elfring Date: Mon, 8 May 2017 18:18:09 +0200 Subject: fm10k: Use seq_putc() in fm10k_dbg_desc_break() Two single characters should be put into a sequence. Thus use the corresponding function "seq_putc". This issue was detected by using the Coccinelle software. Signed-off-by: Markus Elfring Tested-by: Krishneil Singh Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/fm10k/fm10k_debugfs.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_debugfs.c b/drivers/net/ethernet/intel/fm10k/fm10k_debugfs.c index 5116fd043630..14df09e2d964 100644 --- a/drivers/net/ethernet/intel/fm10k/fm10k_debugfs.c +++ b/drivers/net/ethernet/intel/fm10k/fm10k_debugfs.c @@ -52,9 +52,9 @@ static void fm10k_dbg_desc_seq_stop(struct seq_file __always_unused *s, static void fm10k_dbg_desc_break(struct seq_file *s, int i) { while (i--) - seq_puts(s, "-"); + seq_putc(s, '-'); - seq_puts(s, "\n"); + seq_putc(s, '\n'); } static int fm10k_dbg_tx_desc_seq_show(struct seq_file *s, void *v) -- cgit v1.2.3 From 5c66d1251d67714e9f6e6b0af18ca989109b876f Mon Sep 17 00:00:00 2001 From: Jacob Keller Date: Mon, 10 Jul 2017 13:23:04 -0700 Subject: fm10k: stop spurious link down messages when Tx FIFO is full In fm10k_get_host_state_generic, we check the mailbox tx_read() function to ensure that the mailbox is still open. This function also checks to make sure we have space to transmit another message. Unfortunately, if we just recently sent a bunch of messages (such as enabling hundreds of VLANs on a VF) this can result in a race where the watchdog task thinks the link went down just because we haven't had time to process all these messages yet. Instead, lets just check whether the mailbox is still open. This ensures that we don't race with the Tx FIFO, and we only link down once the mailbox is not open. This is safe, because if the FIFO fills up and we're unable to send a message for too long, we'll end up triggering the timeout detection which results in a reset. Additionally, since we still check to ensure the mailbox state is OPEN, we'll transition to link down whenever the mailbox closes as well. Signed-off-by: Jacob Keller Tested-by: Krishneil Singh Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/fm10k/fm10k_common.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_common.c b/drivers/net/ethernet/intel/fm10k/fm10k_common.c index 62a6ad9b3eed..736a9f087bc9 100644 --- a/drivers/net/ethernet/intel/fm10k/fm10k_common.c +++ b/drivers/net/ethernet/intel/fm10k/fm10k_common.c @@ -1,5 +1,5 @@ /* Intel(R) Ethernet Switch Host Interface Driver - * Copyright(c) 2013 - 2016 Intel Corporation. + * Copyright(c) 2013 - 2017 Intel Corporation. * * This program is free software; you can redistribute it and/or modify it * under the terms and conditions of the GNU General Public License, @@ -517,8 +517,8 @@ s32 fm10k_get_host_state_generic(struct fm10k_hw *hw, bool *host_ready) goto out; } - /* verify Mailbox is still valid */ - if (!mbx->ops.tx_ready(mbx, FM10K_VFMBX_MSG_MTU)) + /* verify Mailbox is still open */ + if (mbx->state != FM10K_STATE_OPEN) goto out; /* interface cannot receive traffic without logical ports */ -- cgit v1.2.3 From 375ce90eab7ee1c87eefa2cd312b0be9ac961082 Mon Sep 17 00:00:00 2001 From: Jacob Keller Date: Mon, 10 Jul 2017 13:23:05 -0700 Subject: fm10k: fix typos on fall through comments Newer versions of GCC since version 7 now warn when a case statement may fall through without an explicit comment. "Fallthough" does not count as it is misspelled. Fix the typos for these comments to appease the new warnings. Signed-off-by: Jacob Keller Tested-by: Krishneil Singh Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/fm10k/fm10k_mbx.c | 4 ++-- drivers/net/ethernet/intel/fm10k/fm10k_pf.c | 10 +++++----- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_mbx.c b/drivers/net/ethernet/intel/fm10k/fm10k_mbx.c index 334088a101c3..244d3ad58ca7 100644 --- a/drivers/net/ethernet/intel/fm10k/fm10k_mbx.c +++ b/drivers/net/ethernet/intel/fm10k/fm10k_mbx.c @@ -1,5 +1,5 @@ /* Intel(R) Ethernet Switch Host Interface Driver - * Copyright(c) 2013 - 2016 Intel Corporation. + * Copyright(c) 2013 - 2017 Intel Corporation. * * This program is free software; you can redistribute it and/or modify it * under the terms and conditions of the GNU General Public License, @@ -1586,7 +1586,7 @@ s32 fm10k_pfvf_mbx_init(struct fm10k_hw *hw, struct fm10k_mbx_info *mbx, mbx->mbmem_reg = FM10K_MBMEM_VF(id, 0); break; } - /* fallthough */ + /* fall through */ default: return FM10K_MBX_ERR_NO_MBX; } diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_pf.c b/drivers/net/ethernet/intel/fm10k/fm10k_pf.c index 40ee0242a80a..9e4fb3a44376 100644 --- a/drivers/net/ethernet/intel/fm10k/fm10k_pf.c +++ b/drivers/net/ethernet/intel/fm10k/fm10k_pf.c @@ -1,5 +1,5 @@ /* Intel(R) Ethernet Switch Host Interface Driver - * Copyright(c) 2013 - 2016 Intel Corporation. + * Copyright(c) 2013 - 2017 Intel Corporation. * * This program is free software; you can redistribute it and/or modify it * under the terms and conditions of the GNU General Public License, @@ -1334,19 +1334,19 @@ static u8 fm10k_iov_supported_xcast_mode_pf(struct fm10k_vf_info *vf_info, case FM10K_XCAST_MODE_PROMISC: if (vf_flags & FM10K_VF_FLAG_PROMISC_CAPABLE) return FM10K_XCAST_MODE_PROMISC; - /* fallthough */ + /* fall through */ case FM10K_XCAST_MODE_ALLMULTI: if (vf_flags & FM10K_VF_FLAG_ALLMULTI_CAPABLE) return FM10K_XCAST_MODE_ALLMULTI; - /* fallthough */ + /* fall through */ case FM10K_XCAST_MODE_MULTI: if (vf_flags & FM10K_VF_FLAG_MULTI_CAPABLE) return FM10K_XCAST_MODE_MULTI; - /* fallthough */ + /* fall through */ case FM10K_XCAST_MODE_NONE: if (vf_flags & FM10K_VF_FLAG_NONE_CAPABLE) return FM10K_XCAST_MODE_NONE; - /* fallthough */ + /* fall through */ default: break; } -- cgit v1.2.3 From b94dd008c401fc73a8d843e3219356255f40c1ed Mon Sep 17 00:00:00 2001 From: Jacob Keller Date: Mon, 10 Jul 2017 13:23:06 -0700 Subject: fm10k: avoid possible truncation of q_vector->name New versions of GCC since version 7 began warning about possible truncation of calls to snprintf. We can fix this and avoid false positives. First, we should pass the full buffer size to snprintf, because it guarantees a NULL character as part of its passed length, so passing len-1 is simply wasting a byte of possible storage. Second, if we make the ri and ti variables unsigned, the compiler is able to correctly reason that the value never gets larger than 256, so it doesn't need to warn about the full space required to print a signed integer. Signed-off-by: Jacob Keller Tested-by: Krishneil Singh Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/fm10k/fm10k_pci.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_pci.c b/drivers/net/ethernet/intel/fm10k/fm10k_pci.c index 63784576ae8b..9212b3fa3b62 100644 --- a/drivers/net/ethernet/intel/fm10k/fm10k_pci.c +++ b/drivers/net/ethernet/intel/fm10k/fm10k_pci.c @@ -1544,7 +1544,7 @@ int fm10k_qv_request_irq(struct fm10k_intfc *interface) struct net_device *dev = interface->netdev; struct fm10k_hw *hw = &interface->hw; struct msix_entry *entry; - int ri = 0, ti = 0; + unsigned int ri = 0, ti = 0; int vector, err; entry = &interface->msix_entries[NON_Q_VECTORS(hw)]; @@ -1554,15 +1554,15 @@ int fm10k_qv_request_irq(struct fm10k_intfc *interface) /* name the vector */ if (q_vector->tx.count && q_vector->rx.count) { - snprintf(q_vector->name, sizeof(q_vector->name) - 1, - "%s-TxRx-%d", dev->name, ri++); + snprintf(q_vector->name, sizeof(q_vector->name), + "%s-TxRx-%u", dev->name, ri++); ti++; } else if (q_vector->rx.count) { - snprintf(q_vector->name, sizeof(q_vector->name) - 1, - "%s-rx-%d", dev->name, ri++); + snprintf(q_vector->name, sizeof(q_vector->name), + "%s-rx-%u", dev->name, ri++); } else if (q_vector->tx.count) { - snprintf(q_vector->name, sizeof(q_vector->name) - 1, - "%s-tx-%d", dev->name, ti++); + snprintf(q_vector->name, sizeof(q_vector->name), + "%s-tx-%u", dev->name, ti++); } else { /* skip this unused q_vector */ continue; -- cgit v1.2.3 From 523a0b558db4ca205522976077911e5efe235781 Mon Sep 17 00:00:00 2001 From: Jacob Keller Date: Mon, 10 Jul 2017 13:23:07 -0700 Subject: fm10k: add missing fall through comment Newer versions of GCC starting with 7 now additionally warn when a case statement may fall through without an explicit comment mentioning it. Add such a comment to silence the warning, as this is expected. Unfortunately the comment must come directly before the next case statement, so we put it outside the #ifdef. Otherwise, the compiler cannot properly detect it and thus the warning is displayed regardless. Signed-off-by: Jacob Keller Tested-by: Krishneil Singh Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/fm10k/fm10k_main.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_main.c b/drivers/net/ethernet/intel/fm10k/fm10k_main.c index 9dffaba85ae6..189d52a8a605 100644 --- a/drivers/net/ethernet/intel/fm10k/fm10k_main.c +++ b/drivers/net/ethernet/intel/fm10k/fm10k_main.c @@ -876,6 +876,7 @@ static void fm10k_tx_csum(struct fm10k_ring *tx_ring, case IPPROTO_GRE: if (skb->encapsulation) break; + /* fall through */ default: if (unlikely(net_ratelimit())) { dev_warn(tx_ring->dev, -- cgit v1.2.3 From 8bac58be1700dab3cac8cb53ed0651da40777024 Mon Sep 17 00:00:00 2001 From: Jacob Keller Date: Mon, 10 Jul 2017 13:23:08 -0700 Subject: fm10k: avoid needless delay when loading driver When we load the driver, we set the last_reset to be in the future, which delays the initial driver reset. Additionally, the service task isn't scheduled to run automatically until the timer runs out. This causes a needless delay of the first reset to begin talking to the switch manager. We can avoid this by simply not setting last_reset and immediately scheduling the service task while in probe. This allows the device to wake up faster, and avoids this delay. Signed-off-by: Jacob Keller Tested-by: Krishneil Singh Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/fm10k/fm10k_pci.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_pci.c b/drivers/net/ethernet/intel/fm10k/fm10k_pci.c index 9212b3fa3b62..6c2c4bffaedf 100644 --- a/drivers/net/ethernet/intel/fm10k/fm10k_pci.c +++ b/drivers/net/ethernet/intel/fm10k/fm10k_pci.c @@ -1800,9 +1800,6 @@ static int fm10k_sw_init(struct fm10k_intfc *interface, netdev->vlan_features |= NETIF_F_HIGHDMA; } - /* delay any future reset requests */ - interface->last_reset = jiffies + (10 * HZ); - /* reset and initialize the hardware so it is in a known state */ err = hw->mac.ops.reset_hw(hw); if (err) { @@ -2079,8 +2076,9 @@ static int fm10k_probe(struct pci_dev *pdev, const struct pci_device_id *ent) /* enable SR-IOV after registering netdev to enforce PF/VF ordering */ fm10k_iov_configure(pdev, 0); - /* clear the service task disable bit to allow service task to start */ + /* clear the service task disable bit and kick off service task */ clear_bit(__FM10K_SERVICE_DISABLE, interface->state); + fm10k_service_event_schedule(interface); return 0; -- cgit v1.2.3 From 4abf01b43b62525e4f1a20dd1a2bc4a1967d8928 Mon Sep 17 00:00:00 2001 From: Jacob Keller Date: Mon, 10 Jul 2017 13:23:09 -0700 Subject: fm10k: simplify reading PFVFLRE register We're doing a really convoluted bitshift and read for the PFVFLRE register. Just reading the PFVFLRE(1), shifting it by 32, then reading PFVFLRE(0) should be sufficient. Signed-off-by: Jacob Keller Tested-by: Krishneil Singh Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/fm10k/fm10k_iov.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_iov.c b/drivers/net/ethernet/intel/fm10k/fm10k_iov.c index d8356c494f06..dfc88a463735 100644 --- a/drivers/net/ethernet/intel/fm10k/fm10k_iov.c +++ b/drivers/net/ethernet/intel/fm10k/fm10k_iov.c @@ -1,5 +1,5 @@ /* Intel(R) Ethernet Switch Host Interface Driver - * Copyright(c) 2013 - 2016 Intel Corporation. + * Copyright(c) 2013 - 2017 Intel Corporation. * * This program is free software; you can redistribute it and/or modify it * under the terms and conditions of the GNU General Public License, @@ -67,10 +67,8 @@ s32 fm10k_iov_event(struct fm10k_intfc *interface) /* read VFLRE to determine if any VFs have been reset */ do { - vflre = fm10k_read_reg(hw, FM10K_PFVFLRE(0)); + vflre = fm10k_read_reg(hw, FM10K_PFVFLRE(1)); vflre <<= 32; - vflre |= fm10k_read_reg(hw, FM10K_PFVFLRE(1)); - vflre = (vflre << 32) | (vflre >> 32); vflre |= fm10k_read_reg(hw, FM10K_PFVFLRE(0)); i = iov_data->num_vfs; -- cgit v1.2.3 From d876c1583bb1b7f7264880265b824e88b791aa5d Mon Sep 17 00:00:00 2001 From: Jacob Keller Date: Mon, 10 Jul 2017 13:23:10 -0700 Subject: fm10k: don't loop while resetting VFs due to VFLR event We've always had a really weird looping construction for resetting VFs. We read the VFLRE register and reset the VF if the corresponding bit is set, which makes sense. However we loop continuously until we no longer have any bits left unset. At first this makes sense, as a sort of "keep trying until we succeed" concept. Unfortunately this causes a problem if we happen to surprise remove while this code is executing, because in this case we'll always read all 1s for the VFLRE register. This results in a hard lockup on the CPU because the loop will never terminate. Because our own reset function will clear the VFLR event register always, (except when we've lost PCIe link obviously) there is no real reason to loop. In practice, we'll loop over once and find that no VFs are pending anymore. Lets just check once. Since we're clear the notification when we reset there's no benefit to the loop. Additionally, there shouldn't be a race as future VLFRE events should trigger an interrupt. Additionally, we didn't warn or do anything in the looped case anyways. Signed-off-by: Jacob Keller Tested-by: Krishneil Singh Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/fm10k/fm10k_iov.c | 24 +++++++++++------------- 1 file changed, 11 insertions(+), 13 deletions(-) diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_iov.c b/drivers/net/ethernet/intel/fm10k/fm10k_iov.c index dfc88a463735..03897720bf0b 100644 --- a/drivers/net/ethernet/intel/fm10k/fm10k_iov.c +++ b/drivers/net/ethernet/intel/fm10k/fm10k_iov.c @@ -66,23 +66,21 @@ s32 fm10k_iov_event(struct fm10k_intfc *interface) goto read_unlock; /* read VFLRE to determine if any VFs have been reset */ - do { - vflre = fm10k_read_reg(hw, FM10K_PFVFLRE(1)); - vflre <<= 32; - vflre |= fm10k_read_reg(hw, FM10K_PFVFLRE(0)); + vflre = fm10k_read_reg(hw, FM10K_PFVFLRE(1)); + vflre <<= 32; + vflre |= fm10k_read_reg(hw, FM10K_PFVFLRE(0)); - i = iov_data->num_vfs; + i = iov_data->num_vfs; - for (vflre <<= 64 - i; vflre && i--; vflre += vflre) { - struct fm10k_vf_info *vf_info = &iov_data->vf_info[i]; + for (vflre <<= 64 - i; vflre && i--; vflre += vflre) { + struct fm10k_vf_info *vf_info = &iov_data->vf_info[i]; - if (vflre >= 0) - continue; + if (vflre >= 0) + continue; - hw->iov.ops.reset_resources(hw, vf_info); - vf_info->mbx.ops.connect(hw, &vf_info->mbx); - } - } while (i != iov_data->num_vfs); + hw->iov.ops.reset_resources(hw, vf_info); + vf_info->mbx.ops.connect(hw, &vf_info->mbx); + } read_unlock: rcu_read_unlock(); -- cgit v1.2.3 From dd5eede2b711350f684e8510300cb3762a821ae6 Mon Sep 17 00:00:00 2001 From: Jacob Keller Date: Mon, 10 Jul 2017 13:23:11 -0700 Subject: fm10k: avoid divide by zero in rare cases when device is resetting It is possible that under rare circumstances the device is undergoing a reset, such as when a PFLR occurs, and the device may be transmitting simultaneously. In this case, we might attempt to divide by zero when finding the proper r_idx. Instead, lets read the num_tx_queues once, and make sure it's non-zero. Signed-off-by: Jacob Keller Tested-by: Krishneil Singh Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/fm10k/fm10k_netdev.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_netdev.c b/drivers/net/ethernet/intel/fm10k/fm10k_netdev.c index e69d49d91d67..77d495fedced 100644 --- a/drivers/net/ethernet/intel/fm10k/fm10k_netdev.c +++ b/drivers/net/ethernet/intel/fm10k/fm10k_netdev.c @@ -643,9 +643,13 @@ int fm10k_close(struct net_device *netdev) static netdev_tx_t fm10k_xmit_frame(struct sk_buff *skb, struct net_device *dev) { struct fm10k_intfc *interface = netdev_priv(dev); + int num_tx_queues = READ_ONCE(interface->num_tx_queues); unsigned int r_idx = skb->queue_mapping; int err; + if (!num_tx_queues) + return NETDEV_TX_BUSY; + if ((skb->protocol == htons(ETH_P_8021Q)) && !skb_vlan_tag_present(skb)) { /* FM10K only supports hardware tagging, any tags in frame @@ -698,8 +702,8 @@ static netdev_tx_t fm10k_xmit_frame(struct sk_buff *skb, struct net_device *dev) __skb_put(skb, pad_len); } - if (r_idx >= interface->num_tx_queues) - r_idx %= interface->num_tx_queues; + if (r_idx >= num_tx_queues) + r_idx %= num_tx_queues; err = fm10k_xmit_frame_ring(skb, interface->tx_ring[r_idx]); -- cgit v1.2.3 From 65b0a469e9e6d025ce9cc46f6cc94d28abf5561c Mon Sep 17 00:00:00 2001 From: Jacob Keller Date: Mon, 10 Jul 2017 13:23:12 -0700 Subject: fm10k: move fm10k_prepare_for_reset and fm10k_handle_reset A future patch needs these functions defined earlier in the file. Move them closer to above where they will be called. Signed-off-by: Jacob Keller Tested-by: Krishneil Singh Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/fm10k/fm10k_pci.c | 58 ++++++++++++++-------------- 1 file changed, 29 insertions(+), 29 deletions(-) diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_pci.c b/drivers/net/ethernet/intel/fm10k/fm10k_pci.c index 6c2c4bffaedf..41335154d6b1 100644 --- a/drivers/net/ethernet/intel/fm10k/fm10k_pci.c +++ b/drivers/net/ethernet/intel/fm10k/fm10k_pci.c @@ -132,35 +132,6 @@ static void fm10k_service_timer(unsigned long data) fm10k_service_event_schedule(interface); } -static void fm10k_detach_subtask(struct fm10k_intfc *interface) -{ - struct net_device *netdev = interface->netdev; - u32 __iomem *hw_addr; - u32 value; - - /* do nothing if device is still present or hw_addr is set */ - if (netif_device_present(netdev) || interface->hw.hw_addr) - return; - - /* check the real address space to see if we've recovered */ - hw_addr = READ_ONCE(interface->uc_addr); - value = readl(hw_addr); - if (~value) { - interface->hw.hw_addr = interface->uc_addr; - netif_device_attach(netdev); - set_bit(FM10K_FLAG_RESET_REQUESTED, interface->flags); - netdev_warn(netdev, "PCIe link restored, device now attached\n"); - return; - } - - rtnl_lock(); - - if (netif_running(netdev)) - dev_close(netdev); - - rtnl_unlock(); -} - static void fm10k_prepare_for_reset(struct fm10k_intfc *interface) { struct net_device *netdev = interface->netdev; @@ -270,6 +241,35 @@ reinit_err: return err; } +static void fm10k_detach_subtask(struct fm10k_intfc *interface) +{ + struct net_device *netdev = interface->netdev; + u32 __iomem *hw_addr; + u32 value; + + /* do nothing if device is still present or hw_addr is set */ + if (netif_device_present(netdev) || interface->hw.hw_addr) + return; + + /* check the real address space to see if we've recovered */ + hw_addr = READ_ONCE(interface->uc_addr); + value = readl(hw_addr); + if (~value) { + interface->hw.hw_addr = interface->uc_addr; + netif_device_attach(netdev); + set_bit(FM10K_FLAG_RESET_REQUESTED, interface->flags); + netdev_warn(netdev, "PCIe link restored, device now attached\n"); + return; + } + + rtnl_lock(); + + if (netif_running(netdev)) + dev_close(netdev); + + rtnl_unlock(); +} + static void fm10k_reinit(struct fm10k_intfc *interface) { int err; -- cgit v1.2.3 From 04914390f5a197da1e042f585e1263ad8ebff632 Mon Sep 17 00:00:00 2001 From: Jacob Keller Date: Mon, 10 Jul 2017 13:23:13 -0700 Subject: fm10k: prevent race condition of __FM10K_SERVICE_SCHED Although very unlikely, it is possible that cancel_work_sync() may stop the service_task before it actually started. In this case, the __FM10K_SERVICE_SCHED bit will never be cleared. This results in the service task being unable to reschedule in the future. Add a helper function which sets the service disable bit, waits for the service task to stop and clears the schedule bit, thus avoiding the race condition. We know the schedule bit is safe to clear because the cancel_work_sync() guarantees the service task is not running. Add a helper function also to restart the service task, for symmetry. This is not strictly needed but helps the mental model of how to stop and start the service task. This race could only happen in fm10k_suspend/fm10k_resume as this is the only place where the service task is actually restarted. Thus, suspend/resume testing would be ideal. However, note that the chance of this happening is very slim as the service event is scheduled for immediate execution, and you would have to trigger a suspend at almost the exact same time as the service task was scheduled. Signed-off-by: Jacob Keller Tested-by: Krishneil Singh Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/fm10k/fm10k_pci.c | 32 ++++++++++++++++++++++------ 1 file changed, 25 insertions(+), 7 deletions(-) diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_pci.c b/drivers/net/ethernet/intel/fm10k/fm10k_pci.c index 41335154d6b1..9575f7c1862d 100644 --- a/drivers/net/ethernet/intel/fm10k/fm10k_pci.c +++ b/drivers/net/ethernet/intel/fm10k/fm10k_pci.c @@ -118,6 +118,27 @@ static void fm10k_service_event_complete(struct fm10k_intfc *interface) fm10k_service_event_schedule(interface); } +static void fm10k_stop_service_event(struct fm10k_intfc *interface) +{ + set_bit(__FM10K_SERVICE_DISABLE, interface->state); + cancel_work_sync(&interface->service_task); + + /* It's possible that cancel_work_sync stopped the service task from + * running before it could actually start. In this case the + * __FM10K_SERVICE_SCHED bit will never be cleared. Since we know that + * the service task cannot be running at this point, we need to clear + * the scheduled bit, as otherwise the service task may never be + * restarted. + */ + clear_bit(__FM10K_SERVICE_SCHED, interface->state); +} + +static void fm10k_start_service_event(struct fm10k_intfc *interface) +{ + clear_bit(__FM10K_SERVICE_DISABLE, interface->state); + fm10k_service_event_schedule(interface); +} + /** * fm10k_service_timer - Timer Call-back * @data: pointer to interface cast into an unsigned long @@ -2116,8 +2137,7 @@ static void fm10k_remove(struct pci_dev *pdev) del_timer_sync(&interface->service_timer); - set_bit(__FM10K_SERVICE_DISABLE, interface->state); - cancel_work_sync(&interface->service_task); + fm10k_stop_service_event(interface); /* free netdev, this may bounce the interrupts due to setup_tc */ if (netdev->reg_state == NETREG_REGISTERED) @@ -2155,8 +2175,7 @@ static void fm10k_prepare_suspend(struct fm10k_intfc *interface) * stopped. We stop the watchdog task until after we resume software * activity. */ - set_bit(__FM10K_SERVICE_DISABLE, interface->state); - cancel_work_sync(&interface->service_task); + fm10k_stop_service_event(interface); fm10k_prepare_for_reset(interface); } @@ -2183,9 +2202,8 @@ static int fm10k_handle_resume(struct fm10k_intfc *interface) interface->link_down_event = jiffies + (HZ); set_bit(__FM10K_LINK_DOWN, interface->state); - /* clear the service task disable bit to allow service task to start */ - clear_bit(__FM10K_SERVICE_DISABLE, interface->state); - fm10k_service_event_schedule(interface); + /* restart the service task */ + fm10k_start_service_event(interface); return err; } -- cgit v1.2.3 From 32f16369e59fcc505c5ed93a6a8cad3d5636b463 Mon Sep 17 00:00:00 2001 From: Simon Horman Date: Mon, 2 Oct 2017 10:41:15 +0200 Subject: net/dst: Make skb parameter of skb{metadata_dst, tunnel_info}() const Make the skb parameter of skb_metadata_dst() and skb_tunnel_info() const as they are not modified. This is in preparation for using them in call-sites where skb is const. Signed-off-by: Simon Horman Reviewed-by: Jakub Kicinski Signed-off-by: David S. Miller --- include/net/dst_metadata.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/include/net/dst_metadata.h b/include/net/dst_metadata.h index a803129a4849..9fba2ebf6dda 100644 --- a/include/net/dst_metadata.h +++ b/include/net/dst_metadata.h @@ -24,7 +24,7 @@ struct metadata_dst { } u; }; -static inline struct metadata_dst *skb_metadata_dst(struct sk_buff *skb) +static inline struct metadata_dst *skb_metadata_dst(const struct sk_buff *skb) { struct metadata_dst *md_dst = (struct metadata_dst *) skb_dst(skb); @@ -34,7 +34,8 @@ static inline struct metadata_dst *skb_metadata_dst(struct sk_buff *skb) return NULL; } -static inline struct ip_tunnel_info *skb_tunnel_info(struct sk_buff *skb) +static inline struct ip_tunnel_info * +skb_tunnel_info(const struct sk_buff *skb) { struct metadata_dst *md_dst = skb_metadata_dst(skb); struct dst_entry *dst; -- cgit v1.2.3 From a38402bc50709aac76796a955a15152a76e3fd4e Mon Sep 17 00:00:00 2001 From: Simon Horman Date: Mon, 2 Oct 2017 10:41:16 +0200 Subject: flow_dissector: dissect tunnel info Move dissection of tunnel info from the flower classifier to the flow dissector where all other dissection occurs. This should not have any behavioural affect on other users of the flow dissector. Signed-off-by: Simon Horman Reviewed-by: Jakub Kicinski Acked-by: Jiri Pirko Signed-off-by: David S. Miller --- net/core/flow_dissector.c | 100 ++++++++++++++++++++++++++++++++++++++++++++++ net/sched/cls_flower.c | 25 ------------ 2 files changed, 100 insertions(+), 25 deletions(-) diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c index 0a977373d003..1f5caafb4492 100644 --- a/net/core/flow_dissector.c +++ b/net/core/flow_dissector.c @@ -5,6 +5,7 @@ #include #include #include +#include #include #include #include @@ -115,6 +116,102 @@ __be32 __skb_flow_get_ports(const struct sk_buff *skb, int thoff, u8 ip_proto, } EXPORT_SYMBOL(__skb_flow_get_ports); +static void +skb_flow_dissect_set_enc_addr_type(enum flow_dissector_key_id type, + struct flow_dissector *flow_dissector, + void *target_container) +{ + struct flow_dissector_key_control *ctrl; + + if (!dissector_uses_key(flow_dissector, FLOW_DISSECTOR_KEY_ENC_CONTROL)) + return; + + ctrl = skb_flow_dissector_target(flow_dissector, + FLOW_DISSECTOR_KEY_ENC_CONTROL, + target_container); + ctrl->addr_type = type; +} + +static void +__skb_flow_dissect_tunnel_info(const struct sk_buff *skb, + struct flow_dissector *flow_dissector, + void *target_container) +{ + struct ip_tunnel_info *info; + struct ip_tunnel_key *key; + + /* A quick check to see if there might be something to do. */ + if (!dissector_uses_key(flow_dissector, + FLOW_DISSECTOR_KEY_ENC_KEYID) && + !dissector_uses_key(flow_dissector, + FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS) && + !dissector_uses_key(flow_dissector, + FLOW_DISSECTOR_KEY_ENC_IPV6_ADDRS) && + !dissector_uses_key(flow_dissector, + FLOW_DISSECTOR_KEY_ENC_CONTROL) && + !dissector_uses_key(flow_dissector, + FLOW_DISSECTOR_KEY_ENC_PORTS)) + return; + + info = skb_tunnel_info(skb); + if (!info) + return; + + key = &info->key; + + switch (ip_tunnel_info_af(info)) { + case AF_INET: + skb_flow_dissect_set_enc_addr_type(FLOW_DISSECTOR_KEY_IPV4_ADDRS, + flow_dissector, + target_container); + if (dissector_uses_key(flow_dissector, + FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS)) { + struct flow_dissector_key_ipv4_addrs *ipv4; + + ipv4 = skb_flow_dissector_target(flow_dissector, + FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS, + target_container); + ipv4->src = key->u.ipv4.src; + ipv4->dst = key->u.ipv4.dst; + } + break; + case AF_INET6: + skb_flow_dissect_set_enc_addr_type(FLOW_DISSECTOR_KEY_IPV6_ADDRS, + flow_dissector, + target_container); + if (dissector_uses_key(flow_dissector, + FLOW_DISSECTOR_KEY_ENC_IPV6_ADDRS)) { + struct flow_dissector_key_ipv6_addrs *ipv6; + + ipv6 = skb_flow_dissector_target(flow_dissector, + FLOW_DISSECTOR_KEY_ENC_IPV6_ADDRS, + target_container); + ipv6->src = key->u.ipv6.src; + ipv6->dst = key->u.ipv6.dst; + } + break; + } + + if (dissector_uses_key(flow_dissector, FLOW_DISSECTOR_KEY_ENC_KEYID)) { + struct flow_dissector_key_keyid *keyid; + + keyid = skb_flow_dissector_target(flow_dissector, + FLOW_DISSECTOR_KEY_ENC_KEYID, + target_container); + keyid->keyid = tunnel_id_to_key32(key->tun_id); + } + + if (dissector_uses_key(flow_dissector, FLOW_DISSECTOR_KEY_ENC_PORTS)) { + struct flow_dissector_key_ports *tp; + + tp = skb_flow_dissector_target(flow_dissector, + FLOW_DISSECTOR_KEY_ENC_PORTS, + target_container); + tp->src = key->tp_src; + tp->dst = key->tp_dst; + } +} + static enum flow_dissect_ret __skb_flow_dissect_mpls(const struct sk_buff *skb, struct flow_dissector *flow_dissector, @@ -478,6 +575,9 @@ bool __skb_flow_dissect(const struct sk_buff *skb, FLOW_DISSECTOR_KEY_BASIC, target_container); + __skb_flow_dissect_tunnel_info(skb, flow_dissector, + target_container); + if (dissector_uses_key(flow_dissector, FLOW_DISSECTOR_KEY_ETH_ADDRS)) { struct ethhdr *eth = eth_hdr(skb); diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c index d230cb4c8094..db831ac708f6 100644 --- a/net/sched/cls_flower.c +++ b/net/sched/cls_flower.c @@ -152,37 +152,12 @@ static int fl_classify(struct sk_buff *skb, const struct tcf_proto *tp, struct cls_fl_filter *f; struct fl_flow_key skb_key; struct fl_flow_key skb_mkey; - struct ip_tunnel_info *info; if (!atomic_read(&head->ht.nelems)) return -1; fl_clear_masked_range(&skb_key, &head->mask); - info = skb_tunnel_info(skb); - if (info) { - struct ip_tunnel_key *key = &info->key; - - switch (ip_tunnel_info_af(info)) { - case AF_INET: - skb_key.enc_control.addr_type = - FLOW_DISSECTOR_KEY_IPV4_ADDRS; - skb_key.enc_ipv4.src = key->u.ipv4.src; - skb_key.enc_ipv4.dst = key->u.ipv4.dst; - break; - case AF_INET6: - skb_key.enc_control.addr_type = - FLOW_DISSECTOR_KEY_IPV6_ADDRS; - skb_key.enc_ipv6.src = key->u.ipv6.src; - skb_key.enc_ipv6.dst = key->u.ipv6.dst; - break; - } - - skb_key.enc_key_id.keyid = tunnel_id_to_key32(key->tun_id); - skb_key.enc_tp.src = key->tp_src; - skb_key.enc_tp.dst = key->tp_dst; - } - skb_key.indev_ifindex = skb->skb_iif; /* skb_flow_dissect() does not set n_proto in case an unknown protocol, * so do it rather here. -- cgit v1.2.3 From 6227efc1a20b24a21ce8e9122c0569696d644aff Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Mon, 2 Oct 2017 12:05:29 +0200 Subject: selftests: rtnetlink.sh: add vxlan and fou test cases fou test lifted from ip-fou man page. Signed-off-by: Florian Westphal Signed-off-by: David S. Miller --- tools/testing/selftests/net/rtnetlink.sh | 96 ++++++++++++++++++++++++++++++++ 1 file changed, 96 insertions(+) diff --git a/tools/testing/selftests/net/rtnetlink.sh b/tools/testing/selftests/net/rtnetlink.sh index a048f7a5f94c..62c87da92770 100755 --- a/tools/testing/selftests/net/rtnetlink.sh +++ b/tools/testing/selftests/net/rtnetlink.sh @@ -332,6 +332,101 @@ kci_test_vrf() echo "PASS: vrf" } +kci_test_encap_vxlan() +{ + ret=0 + vxlan="test-vxlan0" + vlan="test-vlan0" + testns="$1" + + ip netns exec "$testns" ip link add "$vxlan" type vxlan id 42 group 239.1.1.1 \ + dev "$devdummy" dstport 4789 2>/dev/null + if [ $? -ne 0 ]; then + echo "FAIL: can't add vxlan interface, skipping test" + return 0 + fi + check_err $? + + ip netns exec "$testns" ip addr add 10.2.11.49/24 dev "$vxlan" + check_err $? + + ip netns exec "$testns" ip link set up dev "$vxlan" + check_err $? + + ip netns exec "$testns" ip link add link "$vxlan" name "$vlan" type vlan id 1 + check_err $? + + ip netns exec "$testns" ip link del "$vxlan" + check_err $? + + if [ $ret -ne 0 ]; then + echo "FAIL: vxlan" + return 1 + fi + echo "PASS: vxlan" +} + +kci_test_encap_fou() +{ + ret=0 + name="test-fou" + testns="$1" + + ip fou help 2>&1 |grep -q 'Usage: ip fou' + if [ $? -ne 0 ];then + echo "SKIP: fou: iproute2 too old" + return 1 + fi + + ip netns exec "$testns" ip fou add port 7777 ipproto 47 2>/dev/null + if [ $? -ne 0 ];then + echo "FAIL: can't add fou port 7777, skipping test" + return 1 + fi + + ip netns exec "$testns" ip fou add port 8888 ipproto 4 + check_err $? + + ip netns exec "$testns" ip fou del port 9999 2>/dev/null + check_fail $? + + ip netns exec "$testns" ip fou del port 7777 + check_err $? + + if [ $ret -ne 0 ]; then + echo "FAIL: fou" + return 1 + fi + + echo "PASS: fou" +} + +# test various encap methods, use netns to avoid unwanted interference +kci_test_encap() +{ + testns="testns" + ret=0 + + ip netns add "$testns" + if [ $? -ne 0 ]; then + echo "SKIP encap tests: cannot add net namespace $testns" + return 1 + fi + + ip netns exec "$testns" ip link set lo up + check_err $? + + ip netns exec "$testns" ip link add name "$devdummy" type dummy + check_err $? + ip netns exec "$testns" ip link set "$devdummy" up + check_err $? + + kci_test_encap_vxlan "$testns" + kci_test_encap_fou "$testns" + + ip netns del "$testns" +} + kci_test_rtnl() { kci_add_dummy @@ -348,6 +443,7 @@ kci_test_rtnl() kci_test_addrlabel kci_test_ifalias kci_test_vrf + kci_test_encap kci_del_dummy } -- cgit v1.2.3 From 7ff176f81dd4675d08951d2395c7ddb1d9974da6 Mon Sep 17 00:00:00 2001 From: Petr Machata Date: Mon, 2 Oct 2017 12:21:57 +0200 Subject: mlxsw: spectrum_router: Fix a typo Signed-off-by: Petr Machata Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c index ef4b86b3aa9b..75078a3fbee8 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c @@ -1680,7 +1680,7 @@ __mlxsw_sp_router_neighs_update_rauhtd(struct mlxsw_sp *mlxsw_sp, err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(rauhtd), rauhtd_pl); if (err) { - dev_err_ratelimited(mlxsw_sp->bus_info->dev, "Failed to dump neighbour talbe\n"); + dev_err_ratelimited(mlxsw_sp->bus_info->dev, "Failed to dump neighbour table\n"); break; } num_rec = mlxsw_reg_rauhtd_num_rec_get(rauhtd_pl); -- cgit v1.2.3 From 85f44a15b1920babc2d28c11f514f5b217a29968 Mon Sep 17 00:00:00 2001 From: Petr Machata Date: Mon, 2 Oct 2017 12:21:58 +0200 Subject: mlxsw: spectrum_router: Drop a redundant condition Signed-off-by: Petr Machata Reviewed-by: Ido Schimmel Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c index 75078a3fbee8..58bc04cbbef4 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c @@ -3251,7 +3251,7 @@ mlxsw_sp_fib_entry_offload_refresh(struct mlxsw_sp_fib_entry *fib_entry, return; if (mlxsw_sp_fib_entry_should_offload(fib_entry)) mlxsw_sp_fib_entry_offload_set(fib_entry); - else if (!mlxsw_sp_fib_entry_should_offload(fib_entry)) + else mlxsw_sp_fib_entry_offload_unset(fib_entry); return; default: -- cgit v1.2.3 From f2f2efb807d339513199b1bb771806c90cce83ae Mon Sep 17 00:00:00 2001 From: Mika Westerberg Date: Mon, 2 Oct 2017 13:38:28 +0300 Subject: byteorder: Move {cpu_to_be32, be32_to_cpu}_array() from Thunderbolt to core We will be using these when communicating XDomain discovery protocol over Thunderbolt link but they might be useful for other drivers as well. Make them available through byteorder/generic.h. Suggested-by: Andy Shevchenko Signed-off-by: Mika Westerberg Reviewed-by: Michael Jamet Reviewed-by: Yehezkel Bernat Reviewed-by: Andy Shevchenko Signed-off-by: David S. Miller --- drivers/thunderbolt/ctl.c | 14 -------------- include/linux/byteorder/generic.h | 16 ++++++++++++++++ 2 files changed, 16 insertions(+), 14 deletions(-) diff --git a/drivers/thunderbolt/ctl.c b/drivers/thunderbolt/ctl.c index fb40dd0588b9..e6a4c9458c76 100644 --- a/drivers/thunderbolt/ctl.c +++ b/drivers/thunderbolt/ctl.c @@ -289,20 +289,6 @@ static void tb_cfg_print_error(struct tb_ctl *ctl, } } -static void cpu_to_be32_array(__be32 *dst, const u32 *src, size_t len) -{ - int i; - for (i = 0; i < len; i++) - dst[i] = cpu_to_be32(src[i]); -} - -static void be32_to_cpu_array(u32 *dst, __be32 *src, size_t len) -{ - int i; - for (i = 0; i < len; i++) - dst[i] = be32_to_cpu(src[i]); -} - static __be32 tb_crc(const void *data, size_t len) { return cpu_to_be32(~__crc32c_le(~0, data, len)); diff --git a/include/linux/byteorder/generic.h b/include/linux/byteorder/generic.h index 89f67c1c3160..805d16654459 100644 --- a/include/linux/byteorder/generic.h +++ b/include/linux/byteorder/generic.h @@ -170,4 +170,20 @@ static inline void be64_add_cpu(__be64 *var, u64 val) *var = cpu_to_be64(be64_to_cpu(*var) + val); } +static inline void cpu_to_be32_array(__be32 *dst, const u32 *src, size_t len) +{ + int i; + + for (i = 0; i < len; i++) + dst[i] = cpu_to_be32(src[i]); +} + +static inline void be32_to_cpu_array(u32 *dst, const __be32 *src, size_t len) +{ + int i; + + for (i = 0; i < len; i++) + dst[i] = be32_to_cpu(src[i]); +} + #endif /* _LINUX_BYTEORDER_GENERIC_H */ -- cgit v1.2.3 From 806717081ab6088957c4857d42941159a07cc571 Mon Sep 17 00:00:00 2001 From: Mika Westerberg Date: Mon, 2 Oct 2017 13:38:29 +0300 Subject: thunderbolt: Remove __packed from ICM message structures These messages are all 32-bit aligned and they should be packed without the __packed attribute just fine. It also allows compiler to generate better code on some architectures. Signed-off-by: Mika Westerberg Reviewed-by: Michael Jamet Reviewed-by: Yehezkel Bernat Reviewed-by: Andy Shevchenko Signed-off-by: David S. Miller --- drivers/thunderbolt/tb_msgs.h | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/drivers/thunderbolt/tb_msgs.h b/drivers/thunderbolt/tb_msgs.h index de6441e4a060..f3adf58a40ce 100644 --- a/drivers/thunderbolt/tb_msgs.h +++ b/drivers/thunderbolt/tb_msgs.h @@ -130,7 +130,7 @@ struct icm_pkg_header { u8 flags; u8 packet_id; u8 total_packets; -} __packed; +}; #define ICM_FLAGS_ERROR BIT(0) #define ICM_FLAGS_NO_KEY BIT(1) @@ -139,20 +139,20 @@ struct icm_pkg_header { struct icm_pkg_driver_ready { struct icm_pkg_header hdr; -} __packed; +}; struct icm_pkg_driver_ready_response { struct icm_pkg_header hdr; u8 romver; u8 ramver; u16 security_level; -} __packed; +}; /* Falcon Ridge & Alpine Ridge common messages */ struct icm_fr_pkg_get_topology { struct icm_pkg_header hdr; -} __packed; +}; #define ICM_GET_TOPOLOGY_PACKETS 14 @@ -167,7 +167,7 @@ struct icm_fr_pkg_get_topology_response { u32 reserved[2]; u32 ports[16]; u32 port_hop_info[16]; -} __packed; +}; #define ICM_SWITCH_USED BIT(0) #define ICM_SWITCH_UPSTREAM_PORT_MASK GENMASK(7, 1) @@ -184,7 +184,7 @@ struct icm_fr_event_device_connected { u8 connection_id; u16 link_info; u32 ep_name[55]; -} __packed; +}; #define ICM_LINK_INFO_LINK_MASK 0x7 #define ICM_LINK_INFO_DEPTH_SHIFT 4 @@ -197,13 +197,13 @@ struct icm_fr_pkg_approve_device { u8 connection_key; u8 connection_id; u16 reserved; -} __packed; +}; struct icm_fr_event_device_disconnected { struct icm_pkg_header hdr; u16 reserved; u16 link_info; -} __packed; +}; struct icm_fr_pkg_add_device_key { struct icm_pkg_header hdr; @@ -212,7 +212,7 @@ struct icm_fr_pkg_add_device_key { u8 connection_id; u16 reserved; u32 key[8]; -} __packed; +}; struct icm_fr_pkg_add_device_key_response { struct icm_pkg_header hdr; @@ -220,7 +220,7 @@ struct icm_fr_pkg_add_device_key_response { u8 connection_key; u8 connection_id; u16 reserved; -} __packed; +}; struct icm_fr_pkg_challenge_device { struct icm_pkg_header hdr; @@ -229,7 +229,7 @@ struct icm_fr_pkg_challenge_device { u8 connection_id; u16 reserved; u32 challenge[8]; -} __packed; +}; struct icm_fr_pkg_challenge_device_response { struct icm_pkg_header hdr; @@ -239,7 +239,7 @@ struct icm_fr_pkg_challenge_device_response { u16 reserved; u32 challenge[8]; u32 response[8]; -} __packed; +}; /* Alpine Ridge only messages */ @@ -247,7 +247,7 @@ struct icm_ar_pkg_get_route { struct icm_pkg_header hdr; u16 reserved; u16 link_info; -} __packed; +}; struct icm_ar_pkg_get_route_response { struct icm_pkg_header hdr; @@ -255,6 +255,6 @@ struct icm_ar_pkg_get_route_response { u16 link_info; u32 route_hi; u32 route_lo; -} __packed; +}; #endif -- cgit v1.2.3 From cdae7c07e3e3509eaabc18c1640a55dc5b99c179 Mon Sep 17 00:00:00 2001 From: Mika Westerberg Date: Mon, 2 Oct 2017 13:38:30 +0300 Subject: thunderbolt: Add support for XDomain properties Thunderbolt XDomain discovery protocol uses directories which contain properties and other directories to exchange information about what capabilities the remote host supports. This also includes identification information like device ID and name. This adds support for parsing and formatting these properties and establishes an API drivers can use in addition to the core Thunderbolt driver. This API is exposed in a new header: include/linux/thunderbolt.h. This code is based on the work done by Amir Levy and Michael Jamet. Signed-off-by: Michael Jamet Signed-off-by: Mika Westerberg Reviewed-by: Yehezkel Bernat Reviewed-by: Andy Shevchenko Signed-off-by: David S. Miller --- drivers/thunderbolt/Makefile | 2 +- drivers/thunderbolt/property.c | 670 +++++++++++++++++++++++++++++++++++++++++ include/linux/thunderbolt.h | 89 ++++++ 3 files changed, 760 insertions(+), 1 deletion(-) create mode 100644 drivers/thunderbolt/property.c create mode 100644 include/linux/thunderbolt.h diff --git a/drivers/thunderbolt/Makefile b/drivers/thunderbolt/Makefile index 4900febc6c8a..7afd21f5383a 100644 --- a/drivers/thunderbolt/Makefile +++ b/drivers/thunderbolt/Makefile @@ -1,3 +1,3 @@ obj-${CONFIG_THUNDERBOLT} := thunderbolt.o thunderbolt-objs := nhi.o ctl.o tb.o switch.o cap.o path.o tunnel_pci.o eeprom.o -thunderbolt-objs += domain.o dma_port.o icm.o +thunderbolt-objs += domain.o dma_port.o icm.o property.o diff --git a/drivers/thunderbolt/property.c b/drivers/thunderbolt/property.c new file mode 100644 index 000000000000..8fe913a95b4a --- /dev/null +++ b/drivers/thunderbolt/property.c @@ -0,0 +1,670 @@ +/* + * Thunderbolt XDomain property support + * + * Copyright (C) 2017, Intel Corporation + * Authors: Michael Jamet + * Mika Westerberg + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include +#include +#include +#include +#include + +struct tb_property_entry { + u32 key_hi; + u32 key_lo; + u16 length; + u8 reserved; + u8 type; + u32 value; +}; + +struct tb_property_rootdir_entry { + u32 magic; + u32 length; + struct tb_property_entry entries[]; +}; + +struct tb_property_dir_entry { + u32 uuid[4]; + struct tb_property_entry entries[]; +}; + +#define TB_PROPERTY_ROOTDIR_MAGIC 0x55584401 + +static struct tb_property_dir *__tb_property_parse_dir(const u32 *block, + size_t block_len, unsigned int dir_offset, size_t dir_len, + bool is_root); + +static inline void parse_dwdata(void *dst, const void *src, size_t dwords) +{ + be32_to_cpu_array(dst, src, dwords); +} + +static inline void format_dwdata(void *dst, const void *src, size_t dwords) +{ + cpu_to_be32_array(dst, src, dwords); +} + +static bool tb_property_entry_valid(const struct tb_property_entry *entry, + size_t block_len) +{ + switch (entry->type) { + case TB_PROPERTY_TYPE_DIRECTORY: + case TB_PROPERTY_TYPE_DATA: + case TB_PROPERTY_TYPE_TEXT: + if (entry->length > block_len) + return false; + if (entry->value + entry->length > block_len) + return false; + break; + + case TB_PROPERTY_TYPE_VALUE: + if (entry->length != 1) + return false; + break; + } + + return true; +} + +static bool tb_property_key_valid(const char *key) +{ + return key && strlen(key) <= TB_PROPERTY_KEY_SIZE; +} + +static struct tb_property * +tb_property_alloc(const char *key, enum tb_property_type type) +{ + struct tb_property *property; + + property = kzalloc(sizeof(*property), GFP_KERNEL); + if (!property) + return NULL; + + strcpy(property->key, key); + property->type = type; + INIT_LIST_HEAD(&property->list); + + return property; +} + +static struct tb_property *tb_property_parse(const u32 *block, size_t block_len, + const struct tb_property_entry *entry) +{ + char key[TB_PROPERTY_KEY_SIZE + 1]; + struct tb_property *property; + struct tb_property_dir *dir; + + if (!tb_property_entry_valid(entry, block_len)) + return NULL; + + parse_dwdata(key, entry, 2); + key[TB_PROPERTY_KEY_SIZE] = '\0'; + + property = tb_property_alloc(key, entry->type); + if (!property) + return NULL; + + property->length = entry->length; + + switch (property->type) { + case TB_PROPERTY_TYPE_DIRECTORY: + dir = __tb_property_parse_dir(block, block_len, entry->value, + entry->length, false); + if (!dir) { + kfree(property); + return NULL; + } + property->value.dir = dir; + break; + + case TB_PROPERTY_TYPE_DATA: + property->value.data = kcalloc(property->length, sizeof(u32), + GFP_KERNEL); + if (!property->value.data) { + kfree(property); + return NULL; + } + parse_dwdata(property->value.data, block + entry->value, + entry->length); + break; + + case TB_PROPERTY_TYPE_TEXT: + property->value.text = kcalloc(property->length, sizeof(u32), + GFP_KERNEL); + if (!property->value.text) { + kfree(property); + return NULL; + } + parse_dwdata(property->value.text, block + entry->value, + entry->length); + /* Force null termination */ + property->value.text[property->length * 4 - 1] = '\0'; + break; + + case TB_PROPERTY_TYPE_VALUE: + property->value.immediate = entry->value; + break; + + default: + property->type = TB_PROPERTY_TYPE_UNKNOWN; + break; + } + + return property; +} + +static struct tb_property_dir *__tb_property_parse_dir(const u32 *block, + size_t block_len, unsigned int dir_offset, size_t dir_len, bool is_root) +{ + const struct tb_property_entry *entries; + size_t i, content_len, nentries; + unsigned int content_offset; + struct tb_property_dir *dir; + + dir = kzalloc(sizeof(*dir), GFP_KERNEL); + if (!dir) + return NULL; + + if (is_root) { + content_offset = dir_offset + 2; + content_len = dir_len; + } else { + dir->uuid = kmemdup(&block[dir_offset], sizeof(*dir->uuid), + GFP_KERNEL); + content_offset = dir_offset + 4; + content_len = dir_len - 4; /* Length includes UUID */ + } + + entries = (const struct tb_property_entry *)&block[content_offset]; + nentries = content_len / (sizeof(*entries) / 4); + + INIT_LIST_HEAD(&dir->properties); + + for (i = 0; i < nentries; i++) { + struct tb_property *property; + + property = tb_property_parse(block, block_len, &entries[i]); + if (!property) { + tb_property_free_dir(dir); + return NULL; + } + + list_add_tail(&property->list, &dir->properties); + } + + return dir; +} + +/** + * tb_property_parse_dir() - Parses properties from given property block + * @block: Property block to parse + * @block_len: Number of dword elements in the property block + * + * This function parses the XDomain properties data block into format that + * can be traversed using the helper functions provided by this module. + * Upon success returns the parsed directory. In case of error returns + * %NULL. The resulting &struct tb_property_dir needs to be released by + * calling tb_property_free_dir() when not needed anymore. + * + * The @block is expected to be root directory. + */ +struct tb_property_dir *tb_property_parse_dir(const u32 *block, + size_t block_len) +{ + const struct tb_property_rootdir_entry *rootdir = + (const struct tb_property_rootdir_entry *)block; + + if (rootdir->magic != TB_PROPERTY_ROOTDIR_MAGIC) + return NULL; + if (rootdir->length > block_len) + return NULL; + + return __tb_property_parse_dir(block, block_len, 0, rootdir->length, + true); +} + +/** + * tb_property_create_dir() - Creates new property directory + * @uuid: UUID used to identify the particular directory + * + * Creates new, empty property directory. If @uuid is %NULL then the + * directory is assumed to be root directory. + */ +struct tb_property_dir *tb_property_create_dir(const uuid_t *uuid) +{ + struct tb_property_dir *dir; + + dir = kzalloc(sizeof(*dir), GFP_KERNEL); + if (!dir) + return NULL; + + INIT_LIST_HEAD(&dir->properties); + if (uuid) { + dir->uuid = kmemdup(uuid, sizeof(*dir->uuid), GFP_KERNEL); + if (!dir->uuid) { + kfree(dir); + return NULL; + } + } + + return dir; +} +EXPORT_SYMBOL_GPL(tb_property_create_dir); + +static void tb_property_free(struct tb_property *property) +{ + switch (property->type) { + case TB_PROPERTY_TYPE_DIRECTORY: + tb_property_free_dir(property->value.dir); + break; + + case TB_PROPERTY_TYPE_DATA: + kfree(property->value.data); + break; + + case TB_PROPERTY_TYPE_TEXT: + kfree(property->value.text); + break; + + default: + break; + } + + kfree(property); +} + +/** + * tb_property_free_dir() - Release memory allocated for property directory + * @dir: Directory to release + * + * This will release all the memory the directory occupies including all + * descendants. It is OK to pass %NULL @dir, then the function does + * nothing. + */ +void tb_property_free_dir(struct tb_property_dir *dir) +{ + struct tb_property *property, *tmp; + + if (!dir) + return; + + list_for_each_entry_safe(property, tmp, &dir->properties, list) { + list_del(&property->list); + tb_property_free(property); + } + kfree(dir->uuid); + kfree(dir); +} +EXPORT_SYMBOL_GPL(tb_property_free_dir); + +static size_t tb_property_dir_length(const struct tb_property_dir *dir, + bool recurse, size_t *data_len) +{ + const struct tb_property *property; + size_t len = 0; + + if (dir->uuid) + len += sizeof(*dir->uuid) / 4; + else + len += sizeof(struct tb_property_rootdir_entry) / 4; + + list_for_each_entry(property, &dir->properties, list) { + len += sizeof(struct tb_property_entry) / 4; + + switch (property->type) { + case TB_PROPERTY_TYPE_DIRECTORY: + if (recurse) { + len += tb_property_dir_length( + property->value.dir, recurse, data_len); + } + /* Reserve dword padding after each directory */ + if (data_len) + *data_len += 1; + break; + + case TB_PROPERTY_TYPE_DATA: + case TB_PROPERTY_TYPE_TEXT: + if (data_len) + *data_len += property->length; + break; + + default: + break; + } + } + + return len; +} + +static ssize_t __tb_property_format_dir(const struct tb_property_dir *dir, + u32 *block, unsigned int start_offset, size_t block_len) +{ + unsigned int data_offset, dir_end; + const struct tb_property *property; + struct tb_property_entry *entry; + size_t dir_len, data_len = 0; + int ret; + + /* + * The structure of property block looks like following. Leaf + * data/text is included right after the directory and each + * directory follows each other (even nested ones). + * + * +----------+ <-- start_offset + * | header | <-- root directory header + * +----------+ --- + * | entry 0 | -^--------------------. + * +----------+ | | + * | entry 1 | -|--------------------|--. + * +----------+ | | | + * | entry 2 | -|-----------------. | | + * +----------+ | | | | + * : : | dir_len | | | + * . . | | | | + * : : | | | | + * +----------+ | | | | + * | entry n | v | | | + * +----------+ <-- data_offset | | | + * | data 0 | <------------------|--' | + * +----------+ | | + * | data 1 | <------------------|-----' + * +----------+ | + * | 00000000 | padding | + * +----------+ <-- dir_end <------' + * | UUID | <-- directory UUID (child directory) + * +----------+ + * | entry 0 | + * +----------+ + * | entry 1 | + * +----------+ + * : : + * . . + * : : + * +----------+ + * | entry n | + * +----------+ + * | data 0 | + * +----------+ + * + * We use dir_end to hold pointer to the end of the directory. It + * will increase as we add directories and each directory should be + * added starting from previous dir_end. + */ + dir_len = tb_property_dir_length(dir, false, &data_len); + data_offset = start_offset + dir_len; + dir_end = start_offset + data_len + dir_len; + + if (data_offset > dir_end) + return -EINVAL; + if (dir_end > block_len) + return -EINVAL; + + /* Write headers first */ + if (dir->uuid) { + struct tb_property_dir_entry *pe; + + pe = (struct tb_property_dir_entry *)&block[start_offset]; + memcpy(pe->uuid, dir->uuid, sizeof(pe->uuid)); + entry = pe->entries; + } else { + struct tb_property_rootdir_entry *re; + + re = (struct tb_property_rootdir_entry *)&block[start_offset]; + re->magic = TB_PROPERTY_ROOTDIR_MAGIC; + re->length = dir_len - sizeof(*re) / 4; + entry = re->entries; + } + + list_for_each_entry(property, &dir->properties, list) { + const struct tb_property_dir *child; + + format_dwdata(entry, property->key, 2); + entry->type = property->type; + + switch (property->type) { + case TB_PROPERTY_TYPE_DIRECTORY: + child = property->value.dir; + ret = __tb_property_format_dir(child, block, dir_end, + block_len); + if (ret < 0) + return ret; + entry->length = tb_property_dir_length(child, false, + NULL); + entry->value = dir_end; + dir_end = ret; + break; + + case TB_PROPERTY_TYPE_DATA: + format_dwdata(&block[data_offset], property->value.data, + property->length); + entry->length = property->length; + entry->value = data_offset; + data_offset += entry->length; + break; + + case TB_PROPERTY_TYPE_TEXT: + format_dwdata(&block[data_offset], property->value.text, + property->length); + entry->length = property->length; + entry->value = data_offset; + data_offset += entry->length; + break; + + case TB_PROPERTY_TYPE_VALUE: + entry->length = property->length; + entry->value = property->value.immediate; + break; + + default: + break; + } + + entry++; + } + + return dir_end; +} + +/** + * tb_property_format_dir() - Formats directory to the packed XDomain format + * @dir: Directory to format + * @block: Property block where the packed data is placed + * @block_len: Length of the property block + * + * This function formats the directory to the packed format that can be + * then send over the thunderbolt fabric to receiving host. Returns %0 in + * case of success and negative errno on faulure. Passing %NULL in @block + * returns number of entries the block takes. + */ +ssize_t tb_property_format_dir(const struct tb_property_dir *dir, u32 *block, + size_t block_len) +{ + ssize_t ret; + + if (!block) { + size_t dir_len, data_len = 0; + + dir_len = tb_property_dir_length(dir, true, &data_len); + return dir_len + data_len; + } + + ret = __tb_property_format_dir(dir, block, 0, block_len); + return ret < 0 ? ret : 0; +} + +/** + * tb_property_add_immediate() - Add immediate property to directory + * @parent: Directory to add the property + * @key: Key for the property + * @value: Immediate value to store with the property + */ +int tb_property_add_immediate(struct tb_property_dir *parent, const char *key, + u32 value) +{ + struct tb_property *property; + + if (!tb_property_key_valid(key)) + return -EINVAL; + + property = tb_property_alloc(key, TB_PROPERTY_TYPE_VALUE); + if (!property) + return -ENOMEM; + + property->length = 1; + property->value.immediate = value; + + list_add_tail(&property->list, &parent->properties); + return 0; +} +EXPORT_SYMBOL_GPL(tb_property_add_immediate); + +/** + * tb_property_add_data() - Adds arbitrary data property to directory + * @parent: Directory to add the property + * @key: Key for the property + * @buf: Data buffer to add + * @buflen: Number of bytes in the data buffer + * + * Function takes a copy of @buf and adds it to the directory. + */ +int tb_property_add_data(struct tb_property_dir *parent, const char *key, + const void *buf, size_t buflen) +{ + /* Need to pad to dword boundary */ + size_t size = round_up(buflen, 4); + struct tb_property *property; + + if (!tb_property_key_valid(key)) + return -EINVAL; + + property = tb_property_alloc(key, TB_PROPERTY_TYPE_DATA); + if (!property) + return -ENOMEM; + + property->length = size / 4; + property->value.data = kzalloc(size, GFP_KERNEL); + memcpy(property->value.data, buf, buflen); + + list_add_tail(&property->list, &parent->properties); + return 0; +} +EXPORT_SYMBOL_GPL(tb_property_add_data); + +/** + * tb_property_add_text() - Adds string property to directory + * @parent: Directory to add the property + * @key: Key for the property + * @text: String to add + * + * Function takes a copy of @text and adds it to the directory. + */ +int tb_property_add_text(struct tb_property_dir *parent, const char *key, + const char *text) +{ + /* Need to pad to dword boundary */ + size_t size = round_up(strlen(text) + 1, 4); + struct tb_property *property; + + if (!tb_property_key_valid(key)) + return -EINVAL; + + property = tb_property_alloc(key, TB_PROPERTY_TYPE_TEXT); + if (!property) + return -ENOMEM; + + property->length = size / 4; + property->value.data = kzalloc(size, GFP_KERNEL); + strcpy(property->value.text, text); + + list_add_tail(&property->list, &parent->properties); + return 0; +} +EXPORT_SYMBOL_GPL(tb_property_add_text); + +/** + * tb_property_add_dir() - Adds a directory to the parent directory + * @parent: Directory to add the property + * @key: Key for the property + * @dir: Directory to add + */ +int tb_property_add_dir(struct tb_property_dir *parent, const char *key, + struct tb_property_dir *dir) +{ + struct tb_property *property; + + if (!tb_property_key_valid(key)) + return -EINVAL; + + property = tb_property_alloc(key, TB_PROPERTY_TYPE_DIRECTORY); + if (!property) + return -ENOMEM; + + property->value.dir = dir; + + list_add_tail(&property->list, &parent->properties); + return 0; +} +EXPORT_SYMBOL_GPL(tb_property_add_dir); + +/** + * tb_property_remove() - Removes property from a parent directory + * @property: Property to remove + * + * Note memory for @property is released as well so it is not allowed to + * touch the object after call to this function. + */ +void tb_property_remove(struct tb_property *property) +{ + list_del(&property->list); + kfree(property); +} +EXPORT_SYMBOL_GPL(tb_property_remove); + +/** + * tb_property_find() - Find a property from a directory + * @dir: Directory where the property is searched + * @key: Key to look for + * @type: Type of the property + * + * Finds and returns property from the given directory. Does not recurse + * into sub-directories. Returns %NULL if the property was not found. + */ +struct tb_property *tb_property_find(struct tb_property_dir *dir, + const char *key, enum tb_property_type type) +{ + struct tb_property *property; + + list_for_each_entry(property, &dir->properties, list) { + if (property->type == type && !strcmp(property->key, key)) + return property; + } + + return NULL; +} +EXPORT_SYMBOL_GPL(tb_property_find); + +/** + * tb_property_get_next() - Get next property from directory + * @dir: Directory holding properties + * @prev: Previous property in the directory (%NULL returns the first) + */ +struct tb_property *tb_property_get_next(struct tb_property_dir *dir, + struct tb_property *prev) +{ + if (prev) { + if (list_is_last(&prev->list, &dir->properties)) + return NULL; + return list_next_entry(prev, list); + } + return list_first_entry_or_null(&dir->properties, struct tb_property, + list); +} +EXPORT_SYMBOL_GPL(tb_property_get_next); diff --git a/include/linux/thunderbolt.h b/include/linux/thunderbolt.h new file mode 100644 index 000000000000..96561c1265ae --- /dev/null +++ b/include/linux/thunderbolt.h @@ -0,0 +1,89 @@ +/* + * Thunderbolt service API + * + * Copyright (C) 2017, Intel Corporation + * Authors: Michael Jamet + * Mika Westerberg + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#ifndef THUNDERBOLT_H_ +#define THUNDERBOLT_H_ + +#include +#include + +/** + * struct tb_property_dir - XDomain property directory + * @uuid: Directory UUID or %NULL if root directory + * @properties: List of properties in this directory + * + * User needs to provide serialization if needed. + */ +struct tb_property_dir { + const uuid_t *uuid; + struct list_head properties; +}; + +enum tb_property_type { + TB_PROPERTY_TYPE_UNKNOWN = 0x00, + TB_PROPERTY_TYPE_DIRECTORY = 0x44, + TB_PROPERTY_TYPE_DATA = 0x64, + TB_PROPERTY_TYPE_TEXT = 0x74, + TB_PROPERTY_TYPE_VALUE = 0x76, +}; + +#define TB_PROPERTY_KEY_SIZE 8 + +/** + * struct tb_property - XDomain property + * @list: Used to link properties together in a directory + * @key: Key for the property (always terminated). + * @type: Type of the property + * @length: Length of the property data in dwords + * @value: Property value + * + * Users use @type to determine which field in @value is filled. + */ +struct tb_property { + struct list_head list; + char key[TB_PROPERTY_KEY_SIZE + 1]; + enum tb_property_type type; + size_t length; + union { + struct tb_property_dir *dir; + u8 *data; + char *text; + u32 immediate; + } value; +}; + +struct tb_property_dir *tb_property_parse_dir(const u32 *block, + size_t block_len); +ssize_t tb_property_format_dir(const struct tb_property_dir *dir, u32 *block, + size_t block_len); +struct tb_property_dir *tb_property_create_dir(const uuid_t *uuid); +void tb_property_free_dir(struct tb_property_dir *dir); +int tb_property_add_immediate(struct tb_property_dir *parent, const char *key, + u32 value); +int tb_property_add_data(struct tb_property_dir *parent, const char *key, + const void *buf, size_t buflen); +int tb_property_add_text(struct tb_property_dir *parent, const char *key, + const char *text); +int tb_property_add_dir(struct tb_property_dir *parent, const char *key, + struct tb_property_dir *dir); +void tb_property_remove(struct tb_property *tb_property); +struct tb_property *tb_property_find(struct tb_property_dir *dir, + const char *key, enum tb_property_type type); +struct tb_property *tb_property_get_next(struct tb_property_dir *dir, + struct tb_property *prev); + +#define tb_property_for_each(dir, property) \ + for (property = tb_property_get_next(dir, NULL); \ + property; \ + property = tb_property_get_next(dir, property)) + +#endif /* THUNDERBOLT_H_ */ -- cgit v1.2.3 From eaf8ff35a345449207ad116e2574c19780ec9a98 Mon Sep 17 00:00:00 2001 From: Mika Westerberg Date: Mon, 2 Oct 2017 13:38:31 +0300 Subject: thunderbolt: Move enum tb_cfg_pkg_type to thunderbolt.h These will be needed by Thunderbolt services when sending and receiving XDomain control messages. While there change TB_CFG_PKG_PREPARE_TO_SLEEP value to be decimal in order to be consistent with other members. Signed-off-by: Mika Westerberg Reviewed-by: Michael Jamet Reviewed-by: Yehezkel Bernat Reviewed-by: Andy Shevchenko Signed-off-by: David S. Miller --- drivers/thunderbolt/ctl.h | 1 + drivers/thunderbolt/tb_msgs.h | 17 ----------------- include/linux/thunderbolt.h | 17 +++++++++++++++++ 3 files changed, 18 insertions(+), 17 deletions(-) diff --git a/drivers/thunderbolt/ctl.h b/drivers/thunderbolt/ctl.h index 36fd28b1c1c5..d0f21e1e0b8b 100644 --- a/drivers/thunderbolt/ctl.h +++ b/drivers/thunderbolt/ctl.h @@ -8,6 +8,7 @@ #define _TB_CFG #include +#include #include "nhi.h" #include "tb_msgs.h" diff --git a/drivers/thunderbolt/tb_msgs.h b/drivers/thunderbolt/tb_msgs.h index f3adf58a40ce..f2b2550cd97c 100644 --- a/drivers/thunderbolt/tb_msgs.h +++ b/drivers/thunderbolt/tb_msgs.h @@ -15,23 +15,6 @@ #include #include -enum tb_cfg_pkg_type { - TB_CFG_PKG_READ = 1, - TB_CFG_PKG_WRITE = 2, - TB_CFG_PKG_ERROR = 3, - TB_CFG_PKG_NOTIFY_ACK = 4, - TB_CFG_PKG_EVENT = 5, - TB_CFG_PKG_XDOMAIN_REQ = 6, - TB_CFG_PKG_XDOMAIN_RESP = 7, - TB_CFG_PKG_OVERRIDE = 8, - TB_CFG_PKG_RESET = 9, - TB_CFG_PKG_ICM_EVENT = 10, - TB_CFG_PKG_ICM_CMD = 11, - TB_CFG_PKG_ICM_RESP = 12, - TB_CFG_PKG_PREPARE_TO_SLEEP = 0xd, - -}; - enum tb_cfg_space { TB_CFG_HOPS = 0, TB_CFG_PORT = 1, diff --git a/include/linux/thunderbolt.h b/include/linux/thunderbolt.h index 96561c1265ae..b512b1e2b4f2 100644 --- a/include/linux/thunderbolt.h +++ b/include/linux/thunderbolt.h @@ -1,6 +1,7 @@ /* * Thunderbolt service API * + * Copyright (C) 2014 Andreas Noever * Copyright (C) 2017, Intel Corporation * Authors: Michael Jamet * Mika Westerberg @@ -16,6 +17,22 @@ #include #include +enum tb_cfg_pkg_type { + TB_CFG_PKG_READ = 1, + TB_CFG_PKG_WRITE = 2, + TB_CFG_PKG_ERROR = 3, + TB_CFG_PKG_NOTIFY_ACK = 4, + TB_CFG_PKG_EVENT = 5, + TB_CFG_PKG_XDOMAIN_REQ = 6, + TB_CFG_PKG_XDOMAIN_RESP = 7, + TB_CFG_PKG_OVERRIDE = 8, + TB_CFG_PKG_RESET = 9, + TB_CFG_PKG_ICM_EVENT = 10, + TB_CFG_PKG_ICM_CMD = 11, + TB_CFG_PKG_ICM_RESP = 12, + TB_CFG_PKG_PREPARE_TO_SLEEP = 13, +}; + /** * struct tb_property_dir - XDomain property directory * @uuid: Directory UUID or %NULL if root directory -- cgit v1.2.3 From 9e99b9f4d5c36340dabda6d14053195b2a43796b Mon Sep 17 00:00:00 2001 From: Mika Westerberg Date: Mon, 2 Oct 2017 13:38:32 +0300 Subject: thunderbolt: Move thunderbolt domain structure to thunderbolt.h These are needed by Thunderbolt services so move them to thunderbolt.h to make sure they are available outside of drivers/thunderbolt. Signed-off-by: Mika Westerberg Reviewed-by: Michael Jamet Reviewed-by: Yehezkel Bernat Reviewed-by: Andy Shevchenko Signed-off-by: David S. Miller --- drivers/thunderbolt/tb.h | 42 ------------------------------------------ include/linux/thunderbolt.h | 45 +++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 45 insertions(+), 42 deletions(-) diff --git a/drivers/thunderbolt/tb.h b/drivers/thunderbolt/tb.h index e0deee4f1eb0..2fefe76621ca 100644 --- a/drivers/thunderbolt/tb.h +++ b/drivers/thunderbolt/tb.h @@ -39,20 +39,6 @@ struct tb_switch_nvm { bool authenticating; }; -/** - * enum tb_security_level - Thunderbolt security level - * @TB_SECURITY_NONE: No security, legacy mode - * @TB_SECURITY_USER: User approval required at minimum - * @TB_SECURITY_SECURE: One time saved key required at minimum - * @TB_SECURITY_DPONLY: Only tunnel Display port (and USB) - */ -enum tb_security_level { - TB_SECURITY_NONE, - TB_SECURITY_USER, - TB_SECURITY_SECURE, - TB_SECURITY_DPONLY, -}; - #define TB_SWITCH_KEY_SIZE 32 /* Each physical port contains 2 links on modern controllers */ #define TB_SWITCH_LINKS_PER_PHY_PORT 2 @@ -223,33 +209,6 @@ struct tb_cm_ops { int (*disconnect_pcie_paths)(struct tb *tb); }; -/** - * struct tb - main thunderbolt bus structure - * @dev: Domain device - * @lock: Big lock. Must be held when accessing any struct - * tb_switch / struct tb_port. - * @nhi: Pointer to the NHI structure - * @ctl: Control channel for this domain - * @wq: Ordered workqueue for all domain specific work - * @root_switch: Root switch of this domain - * @cm_ops: Connection manager specific operations vector - * @index: Linux assigned domain number - * @security_level: Current security level - * @privdata: Private connection manager specific data - */ -struct tb { - struct device dev; - struct mutex lock; - struct tb_nhi *nhi; - struct tb_ctl *ctl; - struct workqueue_struct *wq; - struct tb_switch *root_switch; - const struct tb_cm_ops *cm_ops; - int index; - enum tb_security_level security_level; - unsigned long privdata[0]; -}; - static inline void *tb_priv(struct tb *tb) { return (void *)tb->privdata; @@ -368,7 +327,6 @@ static inline int tb_port_write(struct tb_port *port, const void *buffer, struct tb *icm_probe(struct tb_nhi *nhi); struct tb *tb_probe(struct tb_nhi *nhi); -extern struct bus_type tb_bus_type; extern struct device_type tb_domain_type; extern struct device_type tb_switch_type; diff --git a/include/linux/thunderbolt.h b/include/linux/thunderbolt.h index b512b1e2b4f2..910b1bf92112 100644 --- a/include/linux/thunderbolt.h +++ b/include/linux/thunderbolt.h @@ -14,7 +14,9 @@ #ifndef THUNDERBOLT_H_ #define THUNDERBOLT_H_ +#include #include +#include #include enum tb_cfg_pkg_type { @@ -33,6 +35,49 @@ enum tb_cfg_pkg_type { TB_CFG_PKG_PREPARE_TO_SLEEP = 13, }; +/** + * enum tb_security_level - Thunderbolt security level + * @TB_SECURITY_NONE: No security, legacy mode + * @TB_SECURITY_USER: User approval required at minimum + * @TB_SECURITY_SECURE: One time saved key required at minimum + * @TB_SECURITY_DPONLY: Only tunnel Display port (and USB) + */ +enum tb_security_level { + TB_SECURITY_NONE, + TB_SECURITY_USER, + TB_SECURITY_SECURE, + TB_SECURITY_DPONLY, +}; + +/** + * struct tb - main thunderbolt bus structure + * @dev: Domain device + * @lock: Big lock. Must be held when accessing any struct + * tb_switch / struct tb_port. + * @nhi: Pointer to the NHI structure + * @ctl: Control channel for this domain + * @wq: Ordered workqueue for all domain specific work + * @root_switch: Root switch of this domain + * @cm_ops: Connection manager specific operations vector + * @index: Linux assigned domain number + * @security_level: Current security level + * @privdata: Private connection manager specific data + */ +struct tb { + struct device dev; + struct mutex lock; + struct tb_nhi *nhi; + struct tb_ctl *ctl; + struct workqueue_struct *wq; + struct tb_switch *root_switch; + const struct tb_cm_ops *cm_ops; + int index; + enum tb_security_level security_level; + unsigned long privdata[0]; +}; + +extern struct bus_type tb_bus_type; + /** * struct tb_property_dir - XDomain property directory * @uuid: Directory UUID or %NULL if root directory -- cgit v1.2.3 From e69b71f8458b78a2ef44e3d07374a8f46e45123d Mon Sep 17 00:00:00 2001 From: Mika Westerberg Date: Mon, 2 Oct 2017 13:38:33 +0300 Subject: thunderbolt: Move tb_switch_phy_port_from_link() to thunderbolt.h A Thunderbolt service might need to find the physical port from a link the cable is connected to. For instance networking driver uses this information to generate MAC address according the Apple ThunderboltIP protocol. Move this function to thunderbolt.h and rename it to tb_phy_port_from_link() to reflect the fact that it does not take switch as parameter. Signed-off-by: Mika Westerberg Reviewed-by: Michael Jamet Reviewed-by: Yehezkel Bernat Reviewed-by: Andy Shevchenko Signed-off-by: David S. Miller --- drivers/thunderbolt/icm.c | 2 +- drivers/thunderbolt/tb.h | 7 ------- include/linux/thunderbolt.h | 7 +++++++ 3 files changed, 8 insertions(+), 8 deletions(-) diff --git a/drivers/thunderbolt/icm.c b/drivers/thunderbolt/icm.c index 53250fc057e1..8c22b91ed040 100644 --- a/drivers/thunderbolt/icm.c +++ b/drivers/thunderbolt/icm.c @@ -89,7 +89,7 @@ static inline struct tb *icm_to_tb(struct icm *icm) static inline u8 phy_port_from_route(u64 route, u8 depth) { - return tb_switch_phy_port_from_link(route >> ((depth - 1) * 8)); + return tb_phy_port_from_link(route >> ((depth - 1) * 8)); } static inline u8 dual_link_from_link(u8 link) diff --git a/drivers/thunderbolt/tb.h b/drivers/thunderbolt/tb.h index 2fefe76621ca..ea21d927bd09 100644 --- a/drivers/thunderbolt/tb.h +++ b/drivers/thunderbolt/tb.h @@ -40,8 +40,6 @@ struct tb_switch_nvm { }; #define TB_SWITCH_KEY_SIZE 32 -/* Each physical port contains 2 links on modern controllers */ -#define TB_SWITCH_LINKS_PER_PHY_PORT 2 /** * struct tb_switch - a thunderbolt switch @@ -367,11 +365,6 @@ struct tb_switch *tb_switch_find_by_link_depth(struct tb *tb, u8 link, u8 depth); struct tb_switch *tb_switch_find_by_uuid(struct tb *tb, const uuid_t *uuid); -static inline unsigned int tb_switch_phy_port_from_link(unsigned int link) -{ - return (link - 1) / TB_SWITCH_LINKS_PER_PHY_PORT; -} - static inline void tb_switch_put(struct tb_switch *sw) { put_device(&sw->dev); diff --git a/include/linux/thunderbolt.h b/include/linux/thunderbolt.h index 910b1bf92112..43b8d1e09341 100644 --- a/include/linux/thunderbolt.h +++ b/include/linux/thunderbolt.h @@ -78,6 +78,13 @@ struct tb { extern struct bus_type tb_bus_type; +#define TB_LINKS_PER_PHY_PORT 2 + +static inline unsigned int tb_phy_port_from_link(unsigned int link) +{ + return (link - 1) / TB_LINKS_PER_PHY_PORT; +} + /** * struct tb_property_dir - XDomain property directory * @uuid: Directory UUID or %NULL if root directory -- cgit v1.2.3 From d1ff70241a275133e1a0258b7c23588b122276c8 Mon Sep 17 00:00:00 2001 From: Mika Westerberg Date: Mon, 2 Oct 2017 13:38:34 +0300 Subject: thunderbolt: Add support for XDomain discovery protocol When two hosts are connected over a Thunderbolt cable, there is a protocol they can use to communicate capabilities supported by the host. The discovery protocol uses automatically configured control channel (ring 0) and is build on top of request/response transactions using special XDomain primitives provided by the Thunderbolt base protocol. The capabilities consists of a root directory block of basic properties used for identification of the host, and then there can be zero or more directories each describing a Thunderbolt service and its capabilities. Once both sides have discovered what is supported the two hosts can setup high-speed DMA paths and transfer data to the other side using whatever protocol was agreed based on the properties. The software protocol used to communicate which DMA paths to enable is service specific. This patch adds support for the XDomain discovery protocol to the Thunderbolt bus. We model each remote host connection as a Linux XDomain device. For each Thunderbolt service found supported on the XDomain device, we create Linux Thunderbolt service device which Thunderbolt service drivers can then bind to based on the protocol identification information retrieved from the property directory describing the service. This code is based on the work done by Amir Levy and Michael Jamet. Signed-off-by: Michael Jamet Signed-off-by: Mika Westerberg Reviewed-by: Yehezkel Bernat Reviewed-by: Andy Shevchenko Signed-off-by: David S. Miller --- Documentation/ABI/testing/sysfs-bus-thunderbolt | 48 + drivers/thunderbolt/Makefile | 2 +- drivers/thunderbolt/ctl.c | 11 +- drivers/thunderbolt/ctl.h | 2 +- drivers/thunderbolt/domain.c | 197 ++- drivers/thunderbolt/icm.c | 218 +++- drivers/thunderbolt/nhi.h | 2 + drivers/thunderbolt/switch.c | 7 +- drivers/thunderbolt/tb.h | 39 +- drivers/thunderbolt/tb_msgs.h | 123 ++ drivers/thunderbolt/xdomain.c | 1576 +++++++++++++++++++++++ include/linux/mod_devicetable.h | 26 + include/linux/thunderbolt.h | 242 ++++ scripts/mod/devicetable-offsets.c | 7 + scripts/mod/file2alias.c | 25 + 15 files changed, 2507 insertions(+), 18 deletions(-) create mode 100644 drivers/thunderbolt/xdomain.c diff --git a/Documentation/ABI/testing/sysfs-bus-thunderbolt b/Documentation/ABI/testing/sysfs-bus-thunderbolt index 392bef5bd399..93798c02e28b 100644 --- a/Documentation/ABI/testing/sysfs-bus-thunderbolt +++ b/Documentation/ABI/testing/sysfs-bus-thunderbolt @@ -110,3 +110,51 @@ Description: When new NVM image is written to the non-active NVM is directly the status value from the DMA configuration based mailbox before the device is power cycled. Writing 0 here clears the status. + +What: /sys/bus/thunderbolt/devices/./key +Date: Jan 2018 +KernelVersion: 4.15 +Contact: thunderbolt-software@lists.01.org +Description: This contains name of the property directory the XDomain + service exposes. This entry describes the protocol in + question. Following directories are already reserved by + the Apple XDomain specification: + + network: IP/ethernet over Thunderbolt + targetdm: Target disk mode protocol over Thunderbolt + extdisp: External display mode protocol over Thunderbolt + +What: /sys/bus/thunderbolt/devices/./modalias +Date: Jan 2018 +KernelVersion: 4.15 +Contact: thunderbolt-software@lists.01.org +Description: Stores the same MODALIAS value emitted by uevent for + the XDomain service. Format: tbtsvc:kSpNvNrN + +What: /sys/bus/thunderbolt/devices/./prtcid +Date: Jan 2018 +KernelVersion: 4.15 +Contact: thunderbolt-software@lists.01.org +Description: This contains XDomain protocol identifier the XDomain + service supports. + +What: /sys/bus/thunderbolt/devices/./prtcvers +Date: Jan 2018 +KernelVersion: 4.15 +Contact: thunderbolt-software@lists.01.org +Description: This contains XDomain protocol version the XDomain + service supports. + +What: /sys/bus/thunderbolt/devices/./prtcrevs +Date: Jan 2018 +KernelVersion: 4.15 +Contact: thunderbolt-software@lists.01.org +Description: This contains XDomain software version the XDomain + service supports. + +What: /sys/bus/thunderbolt/devices/./prtcstns +Date: Jan 2018 +KernelVersion: 4.15 +Contact: thunderbolt-software@lists.01.org +Description: This contains XDomain service specific settings as + bitmask. Format: %x diff --git a/drivers/thunderbolt/Makefile b/drivers/thunderbolt/Makefile index 7afd21f5383a..f2f0de27252b 100644 --- a/drivers/thunderbolt/Makefile +++ b/drivers/thunderbolt/Makefile @@ -1,3 +1,3 @@ obj-${CONFIG_THUNDERBOLT} := thunderbolt.o thunderbolt-objs := nhi.o ctl.o tb.o switch.o cap.o path.o tunnel_pci.o eeprom.o -thunderbolt-objs += domain.o dma_port.o icm.o property.o +thunderbolt-objs += domain.o dma_port.o icm.o property.o xdomain.o diff --git a/drivers/thunderbolt/ctl.c b/drivers/thunderbolt/ctl.c index e6a4c9458c76..46e393c5fd1d 100644 --- a/drivers/thunderbolt/ctl.c +++ b/drivers/thunderbolt/ctl.c @@ -368,10 +368,10 @@ static int tb_ctl_tx(struct tb_ctl *ctl, const void *data, size_t len, /** * tb_ctl_handle_event() - acknowledge a plug event, invoke ctl->callback */ -static void tb_ctl_handle_event(struct tb_ctl *ctl, enum tb_cfg_pkg_type type, +static bool tb_ctl_handle_event(struct tb_ctl *ctl, enum tb_cfg_pkg_type type, struct ctl_pkg *pkg, size_t size) { - ctl->callback(ctl->callback_data, type, pkg->buffer, size); + return ctl->callback(ctl->callback_data, type, pkg->buffer, size); } static void tb_ctl_rx_submit(struct ctl_pkg *pkg) @@ -444,6 +444,8 @@ static void tb_ctl_rx_callback(struct tb_ring *ring, struct ring_frame *frame, break; case TB_CFG_PKG_EVENT: + case TB_CFG_PKG_XDOMAIN_RESP: + case TB_CFG_PKG_XDOMAIN_REQ: if (*(__be32 *)(pkg->buffer + frame->size) != crc32) { tb_ctl_err(pkg->ctl, "RX: checksum mismatch, dropping packet\n"); @@ -451,8 +453,9 @@ static void tb_ctl_rx_callback(struct tb_ring *ring, struct ring_frame *frame, } /* Fall through */ case TB_CFG_PKG_ICM_EVENT: - tb_ctl_handle_event(pkg->ctl, frame->eof, pkg, frame->size); - goto rx; + if (tb_ctl_handle_event(pkg->ctl, frame->eof, pkg, frame->size)) + goto rx; + break; default: break; diff --git a/drivers/thunderbolt/ctl.h b/drivers/thunderbolt/ctl.h index d0f21e1e0b8b..85c49dd301ea 100644 --- a/drivers/thunderbolt/ctl.h +++ b/drivers/thunderbolt/ctl.h @@ -16,7 +16,7 @@ /* control channel */ struct tb_ctl; -typedef void (*event_cb)(void *data, enum tb_cfg_pkg_type type, +typedef bool (*event_cb)(void *data, enum tb_cfg_pkg_type type, const void *buf, size_t size); struct tb_ctl *tb_ctl_alloc(struct tb_nhi *nhi, event_cb cb, void *cb_data); diff --git a/drivers/thunderbolt/domain.c b/drivers/thunderbolt/domain.c index 9f2dcd48974d..9b90115319ce 100644 --- a/drivers/thunderbolt/domain.c +++ b/drivers/thunderbolt/domain.c @@ -20,6 +20,98 @@ static DEFINE_IDA(tb_domain_ida); +static bool match_service_id(const struct tb_service_id *id, + const struct tb_service *svc) +{ + if (id->match_flags & TBSVC_MATCH_PROTOCOL_KEY) { + if (strcmp(id->protocol_key, svc->key)) + return false; + } + + if (id->match_flags & TBSVC_MATCH_PROTOCOL_ID) { + if (id->protocol_id != svc->prtcid) + return false; + } + + if (id->match_flags & TBSVC_MATCH_PROTOCOL_VERSION) { + if (id->protocol_version != svc->prtcvers) + return false; + } + + if (id->match_flags & TBSVC_MATCH_PROTOCOL_VERSION) { + if (id->protocol_revision != svc->prtcrevs) + return false; + } + + return true; +} + +static const struct tb_service_id *__tb_service_match(struct device *dev, + struct device_driver *drv) +{ + struct tb_service_driver *driver; + const struct tb_service_id *ids; + struct tb_service *svc; + + svc = tb_to_service(dev); + if (!svc) + return NULL; + + driver = container_of(drv, struct tb_service_driver, driver); + if (!driver->id_table) + return NULL; + + for (ids = driver->id_table; ids->match_flags != 0; ids++) { + if (match_service_id(ids, svc)) + return ids; + } + + return NULL; +} + +static int tb_service_match(struct device *dev, struct device_driver *drv) +{ + return !!__tb_service_match(dev, drv); +} + +static int tb_service_probe(struct device *dev) +{ + struct tb_service *svc = tb_to_service(dev); + struct tb_service_driver *driver; + const struct tb_service_id *id; + + driver = container_of(dev->driver, struct tb_service_driver, driver); + id = __tb_service_match(dev, &driver->driver); + + return driver->probe(svc, id); +} + +static int tb_service_remove(struct device *dev) +{ + struct tb_service *svc = tb_to_service(dev); + struct tb_service_driver *driver; + + driver = container_of(dev->driver, struct tb_service_driver, driver); + if (driver->remove) + driver->remove(svc); + + return 0; +} + +static void tb_service_shutdown(struct device *dev) +{ + struct tb_service_driver *driver; + struct tb_service *svc; + + svc = tb_to_service(dev); + if (!svc || !dev->driver) + return; + + driver = container_of(dev->driver, struct tb_service_driver, driver); + if (driver->shutdown) + driver->shutdown(svc); +} + static const char * const tb_security_names[] = { [TB_SECURITY_NONE] = "none", [TB_SECURITY_USER] = "user", @@ -52,6 +144,10 @@ static const struct attribute_group *domain_attr_groups[] = { struct bus_type tb_bus_type = { .name = "thunderbolt", + .match = tb_service_match, + .probe = tb_service_probe, + .remove = tb_service_remove, + .shutdown = tb_service_shutdown, }; static void tb_domain_release(struct device *dev) @@ -128,17 +224,26 @@ err_free: return NULL; } -static void tb_domain_event_cb(void *data, enum tb_cfg_pkg_type type, +static bool tb_domain_event_cb(void *data, enum tb_cfg_pkg_type type, const void *buf, size_t size) { struct tb *tb = data; if (!tb->cm_ops->handle_event) { tb_warn(tb, "domain does not have event handler\n"); - return; + return true; } - tb->cm_ops->handle_event(tb, type, buf, size); + switch (type) { + case TB_CFG_PKG_XDOMAIN_REQ: + case TB_CFG_PKG_XDOMAIN_RESP: + return tb_xdomain_handle_request(tb, type, buf, size); + + default: + tb->cm_ops->handle_event(tb, type, buf, size); + } + + return true; } /** @@ -443,9 +548,92 @@ int tb_domain_disconnect_pcie_paths(struct tb *tb) return tb->cm_ops->disconnect_pcie_paths(tb); } +/** + * tb_domain_approve_xdomain_paths() - Enable DMA paths for XDomain + * @tb: Domain enabling the DMA paths + * @xd: XDomain DMA paths are created to + * + * Calls connection manager specific method to enable DMA paths to the + * XDomain in question. + * + * Return: 0% in case of success and negative errno otherwise. In + * particular returns %-ENOTSUPP if the connection manager + * implementation does not support XDomains. + */ +int tb_domain_approve_xdomain_paths(struct tb *tb, struct tb_xdomain *xd) +{ + if (!tb->cm_ops->approve_xdomain_paths) + return -ENOTSUPP; + + return tb->cm_ops->approve_xdomain_paths(tb, xd); +} + +/** + * tb_domain_disconnect_xdomain_paths() - Disable DMA paths for XDomain + * @tb: Domain disabling the DMA paths + * @xd: XDomain whose DMA paths are disconnected + * + * Calls connection manager specific method to disconnect DMA paths to + * the XDomain in question. + * + * Return: 0% in case of success and negative errno otherwise. In + * particular returns %-ENOTSUPP if the connection manager + * implementation does not support XDomains. + */ +int tb_domain_disconnect_xdomain_paths(struct tb *tb, struct tb_xdomain *xd) +{ + if (!tb->cm_ops->disconnect_xdomain_paths) + return -ENOTSUPP; + + return tb->cm_ops->disconnect_xdomain_paths(tb, xd); +} + +static int disconnect_xdomain(struct device *dev, void *data) +{ + struct tb_xdomain *xd; + struct tb *tb = data; + int ret = 0; + + xd = tb_to_xdomain(dev); + if (xd && xd->tb == tb) + ret = tb_xdomain_disable_paths(xd); + + return ret; +} + +/** + * tb_domain_disconnect_all_paths() - Disconnect all paths for the domain + * @tb: Domain whose paths are disconnected + * + * This function can be used to disconnect all paths (PCIe, XDomain) for + * example in preparation for host NVM firmware upgrade. After this is + * called the paths cannot be established without resetting the switch. + * + * Return: %0 in case of success and negative errno otherwise. + */ +int tb_domain_disconnect_all_paths(struct tb *tb) +{ + int ret; + + ret = tb_domain_disconnect_pcie_paths(tb); + if (ret) + return ret; + + return bus_for_each_dev(&tb_bus_type, NULL, tb, disconnect_xdomain); +} + int tb_domain_init(void) { - return bus_register(&tb_bus_type); + int ret; + + ret = tb_xdomain_init(); + if (ret) + return ret; + ret = bus_register(&tb_bus_type); + if (ret) + tb_xdomain_exit(); + + return ret; } void tb_domain_exit(void) @@ -453,4 +641,5 @@ void tb_domain_exit(void) bus_unregister(&tb_bus_type); ida_destroy(&tb_domain_ida); tb_switch_exit(); + tb_xdomain_exit(); } diff --git a/drivers/thunderbolt/icm.c b/drivers/thunderbolt/icm.c index 8c22b91ed040..ab02d13f40b7 100644 --- a/drivers/thunderbolt/icm.c +++ b/drivers/thunderbolt/icm.c @@ -60,6 +60,8 @@ * @get_route: Find a route string for given switch * @device_connected: Handle device connected ICM message * @device_disconnected: Handle device disconnected ICM message + * @xdomain_connected - Handle XDomain connected ICM message + * @xdomain_disconnected - Handle XDomain disconnected ICM message */ struct icm { struct mutex request_lock; @@ -74,6 +76,10 @@ struct icm { const struct icm_pkg_header *hdr); void (*device_disconnected)(struct tb *tb, const struct icm_pkg_header *hdr); + void (*xdomain_connected)(struct tb *tb, + const struct icm_pkg_header *hdr); + void (*xdomain_disconnected)(struct tb *tb, + const struct icm_pkg_header *hdr); }; struct icm_notification { @@ -89,7 +95,10 @@ static inline struct tb *icm_to_tb(struct icm *icm) static inline u8 phy_port_from_route(u64 route, u8 depth) { - return tb_phy_port_from_link(route >> ((depth - 1) * 8)); + u8 link; + + link = depth ? route >> ((depth - 1) * 8) : route; + return tb_phy_port_from_link(link); } static inline u8 dual_link_from_link(u8 link) @@ -320,6 +329,51 @@ static int icm_fr_challenge_switch_key(struct tb *tb, struct tb_switch *sw, return 0; } +static int icm_fr_approve_xdomain_paths(struct tb *tb, struct tb_xdomain *xd) +{ + struct icm_fr_pkg_approve_xdomain_response reply; + struct icm_fr_pkg_approve_xdomain request; + int ret; + + memset(&request, 0, sizeof(request)); + request.hdr.code = ICM_APPROVE_XDOMAIN; + request.link_info = xd->depth << ICM_LINK_INFO_DEPTH_SHIFT | xd->link; + memcpy(&request.remote_uuid, xd->remote_uuid, sizeof(*xd->remote_uuid)); + + request.transmit_path = xd->transmit_path; + request.transmit_ring = xd->transmit_ring; + request.receive_path = xd->receive_path; + request.receive_ring = xd->receive_ring; + + memset(&reply, 0, sizeof(reply)); + ret = icm_request(tb, &request, sizeof(request), &reply, sizeof(reply), + 1, ICM_TIMEOUT); + if (ret) + return ret; + + if (reply.hdr.flags & ICM_FLAGS_ERROR) + return -EIO; + + return 0; +} + +static int icm_fr_disconnect_xdomain_paths(struct tb *tb, struct tb_xdomain *xd) +{ + u8 phy_port; + u8 cmd; + + phy_port = tb_phy_port_from_link(xd->link); + if (phy_port == 0) + cmd = NHI_MAILBOX_DISCONNECT_PA; + else + cmd = NHI_MAILBOX_DISCONNECT_PB; + + nhi_mailbox_cmd(tb->nhi, cmd, 1); + usleep_range(10, 50); + nhi_mailbox_cmd(tb->nhi, cmd, 2); + return 0; +} + static void remove_switch(struct tb_switch *sw) { struct tb_switch *parent_sw; @@ -475,6 +529,141 @@ icm_fr_device_disconnected(struct tb *tb, const struct icm_pkg_header *hdr) tb_switch_put(sw); } +static void remove_xdomain(struct tb_xdomain *xd) +{ + struct tb_switch *sw; + + sw = tb_to_switch(xd->dev.parent); + tb_port_at(xd->route, sw)->xdomain = NULL; + tb_xdomain_remove(xd); +} + +static void +icm_fr_xdomain_connected(struct tb *tb, const struct icm_pkg_header *hdr) +{ + const struct icm_fr_event_xdomain_connected *pkg = + (const struct icm_fr_event_xdomain_connected *)hdr; + struct tb_xdomain *xd; + struct tb_switch *sw; + u8 link, depth; + bool approved; + u64 route; + + /* + * After NVM upgrade adding root switch device fails because we + * initiated reset. During that time ICM might still send + * XDomain connected message which we ignore here. + */ + if (!tb->root_switch) + return; + + link = pkg->link_info & ICM_LINK_INFO_LINK_MASK; + depth = (pkg->link_info & ICM_LINK_INFO_DEPTH_MASK) >> + ICM_LINK_INFO_DEPTH_SHIFT; + approved = pkg->link_info & ICM_LINK_INFO_APPROVED; + + if (link > ICM_MAX_LINK || depth > ICM_MAX_DEPTH) { + tb_warn(tb, "invalid topology %u.%u, ignoring\n", link, depth); + return; + } + + route = get_route(pkg->local_route_hi, pkg->local_route_lo); + + xd = tb_xdomain_find_by_uuid(tb, &pkg->remote_uuid); + if (xd) { + u8 xd_phy_port, phy_port; + + xd_phy_port = phy_port_from_route(xd->route, xd->depth); + phy_port = phy_port_from_route(route, depth); + + if (xd->depth == depth && xd_phy_port == phy_port) { + xd->link = link; + xd->route = route; + xd->is_unplugged = false; + tb_xdomain_put(xd); + return; + } + + /* + * If we find an existing XDomain connection remove it + * now. We need to go through login handshake and + * everything anyway to be able to re-establish the + * connection. + */ + remove_xdomain(xd); + tb_xdomain_put(xd); + } + + /* + * Look if there already exists an XDomain in the same place + * than the new one and in that case remove it because it is + * most likely another host that got disconnected. + */ + xd = tb_xdomain_find_by_link_depth(tb, link, depth); + if (!xd) { + u8 dual_link; + + dual_link = dual_link_from_link(link); + if (dual_link) + xd = tb_xdomain_find_by_link_depth(tb, dual_link, + depth); + } + if (xd) { + remove_xdomain(xd); + tb_xdomain_put(xd); + } + + /* + * If the user disconnected a switch during suspend and + * connected another host to the same port, remove the switch + * first. + */ + sw = get_switch_at_route(tb->root_switch, route); + if (sw) + remove_switch(sw); + + sw = tb_switch_find_by_link_depth(tb, link, depth); + if (!sw) { + tb_warn(tb, "no switch exists at %u.%u, ignoring\n", link, + depth); + return; + } + + xd = tb_xdomain_alloc(sw->tb, &sw->dev, route, + &pkg->local_uuid, &pkg->remote_uuid); + if (!xd) { + tb_switch_put(sw); + return; + } + + xd->link = link; + xd->depth = depth; + + tb_port_at(route, sw)->xdomain = xd; + + tb_xdomain_add(xd); + tb_switch_put(sw); +} + +static void +icm_fr_xdomain_disconnected(struct tb *tb, const struct icm_pkg_header *hdr) +{ + const struct icm_fr_event_xdomain_disconnected *pkg = + (const struct icm_fr_event_xdomain_disconnected *)hdr; + struct tb_xdomain *xd; + + /* + * If the connection is through one or multiple devices, the + * XDomain device is removed along with them so it is fine if we + * cannot find it here. + */ + xd = tb_xdomain_find_by_uuid(tb, &pkg->remote_uuid); + if (xd) { + remove_xdomain(xd); + tb_xdomain_put(xd); + } +} + static struct pci_dev *get_upstream_port(struct pci_dev *pdev) { struct pci_dev *parent; @@ -594,6 +783,12 @@ static void icm_handle_notification(struct work_struct *work) case ICM_EVENT_DEVICE_DISCONNECTED: icm->device_disconnected(tb, n->pkg); break; + case ICM_EVENT_XDOMAIN_CONNECTED: + icm->xdomain_connected(tb, n->pkg); + break; + case ICM_EVENT_XDOMAIN_DISCONNECTED: + icm->xdomain_disconnected(tb, n->pkg); + break; } mutex_unlock(&tb->lock); @@ -927,6 +1122,10 @@ static void icm_unplug_children(struct tb_switch *sw) if (tb_is_upstream_port(port)) continue; + if (port->xdomain) { + port->xdomain->is_unplugged = true; + continue; + } if (!port->remote) continue; @@ -943,6 +1142,13 @@ static void icm_free_unplugged_children(struct tb_switch *sw) if (tb_is_upstream_port(port)) continue; + + if (port->xdomain && port->xdomain->is_unplugged) { + tb_xdomain_remove(port->xdomain); + port->xdomain = NULL; + continue; + } + if (!port->remote) continue; @@ -1009,8 +1215,10 @@ static int icm_start(struct tb *tb) tb->root_switch->no_nvm_upgrade = x86_apple_machine; ret = tb_switch_add(tb->root_switch); - if (ret) + if (ret) { tb_switch_put(tb->root_switch); + tb->root_switch = NULL; + } return ret; } @@ -1042,6 +1250,8 @@ static const struct tb_cm_ops icm_fr_ops = { .add_switch_key = icm_fr_add_switch_key, .challenge_switch_key = icm_fr_challenge_switch_key, .disconnect_pcie_paths = icm_disconnect_pcie_paths, + .approve_xdomain_paths = icm_fr_approve_xdomain_paths, + .disconnect_xdomain_paths = icm_fr_disconnect_xdomain_paths, }; struct tb *icm_probe(struct tb_nhi *nhi) @@ -1064,6 +1274,8 @@ struct tb *icm_probe(struct tb_nhi *nhi) icm->get_route = icm_fr_get_route; icm->device_connected = icm_fr_device_connected; icm->device_disconnected = icm_fr_device_disconnected; + icm->xdomain_connected = icm_fr_xdomain_connected; + icm->xdomain_disconnected = icm_fr_xdomain_disconnected; tb->cm_ops = &icm_fr_ops; break; @@ -1077,6 +1289,8 @@ struct tb *icm_probe(struct tb_nhi *nhi) icm->get_route = icm_ar_get_route; icm->device_connected = icm_fr_device_connected; icm->device_disconnected = icm_fr_device_disconnected; + icm->xdomain_connected = icm_fr_xdomain_connected; + icm->xdomain_disconnected = icm_fr_xdomain_disconnected; tb->cm_ops = &icm_fr_ops; break; } diff --git a/drivers/thunderbolt/nhi.h b/drivers/thunderbolt/nhi.h index 5b5bb2c436be..0e05828983db 100644 --- a/drivers/thunderbolt/nhi.h +++ b/drivers/thunderbolt/nhi.h @@ -157,6 +157,8 @@ enum nhi_mailbox_cmd { NHI_MAILBOX_SAVE_DEVS = 0x05, NHI_MAILBOX_DISCONNECT_PCIE_PATHS = 0x06, NHI_MAILBOX_DRV_UNLOADS = 0x07, + NHI_MAILBOX_DISCONNECT_PA = 0x10, + NHI_MAILBOX_DISCONNECT_PB = 0x11, NHI_MAILBOX_ALLOW_ALL_DEVS = 0x23, }; diff --git a/drivers/thunderbolt/switch.c b/drivers/thunderbolt/switch.c index 53f40c57df59..dfc357d33e1e 100644 --- a/drivers/thunderbolt/switch.c +++ b/drivers/thunderbolt/switch.c @@ -171,11 +171,11 @@ static int nvm_authenticate_host(struct tb_switch *sw) /* * Root switch NVM upgrade requires that we disconnect the - * existing PCIe paths first (in case it is not in safe mode + * existing paths first (in case it is not in safe mode * already). */ if (!sw->safe_mode) { - ret = tb_domain_disconnect_pcie_paths(sw->tb); + ret = tb_domain_disconnect_all_paths(sw->tb); if (ret) return ret; /* @@ -1363,6 +1363,9 @@ void tb_switch_remove(struct tb_switch *sw) if (sw->ports[i].remote) tb_switch_remove(sw->ports[i].remote->sw); sw->ports[i].remote = NULL; + if (sw->ports[i].xdomain) + tb_xdomain_remove(sw->ports[i].xdomain); + sw->ports[i].xdomain = NULL; } if (!sw->is_unplugged) diff --git a/drivers/thunderbolt/tb.h b/drivers/thunderbolt/tb.h index ea21d927bd09..74af9d4929ab 100644 --- a/drivers/thunderbolt/tb.h +++ b/drivers/thunderbolt/tb.h @@ -9,6 +9,7 @@ #include #include +#include #include #include "tb_regs.h" @@ -109,14 +110,25 @@ struct tb_switch { /** * struct tb_port - a thunderbolt port, part of a tb_switch + * @config: Cached port configuration read from registers + * @sw: Switch the port belongs to + * @remote: Remote port (%NULL if not connected) + * @xdomain: Remote host (%NULL if not connected) + * @cap_phy: Offset, zero if not found + * @port: Port number on switch + * @disabled: Disabled by eeprom + * @dual_link_port: If the switch is connected using two ports, points + * to the other port. + * @link_nr: Is this primary or secondary port on the dual_link. */ struct tb_port { struct tb_regs_port_header config; struct tb_switch *sw; - struct tb_port *remote; /* remote port, NULL if not connected */ - int cap_phy; /* offset, zero if not found */ - u8 port; /* port number on switch */ - bool disabled; /* disabled by eeprom */ + struct tb_port *remote; + struct tb_xdomain *xdomain; + int cap_phy; + u8 port; + bool disabled; struct tb_port *dual_link_port; u8 link_nr:1; }; @@ -189,6 +201,8 @@ struct tb_path { * @add_switch_key: Add key to switch * @challenge_switch_key: Challenge switch using key * @disconnect_pcie_paths: Disconnects PCIe paths before NVM update + * @approve_xdomain_paths: Approve (establish) XDomain DMA paths + * @disconnect_xdomain_paths: Disconnect XDomain DMA paths */ struct tb_cm_ops { int (*driver_ready)(struct tb *tb); @@ -205,6 +219,8 @@ struct tb_cm_ops { int (*challenge_switch_key)(struct tb *tb, struct tb_switch *sw, const u8 *challenge, u8 *response); int (*disconnect_pcie_paths)(struct tb *tb); + int (*approve_xdomain_paths)(struct tb *tb, struct tb_xdomain *xd); + int (*disconnect_xdomain_paths)(struct tb *tb, struct tb_xdomain *xd); }; static inline void *tb_priv(struct tb *tb) @@ -331,6 +347,8 @@ extern struct device_type tb_switch_type; int tb_domain_init(void); void tb_domain_exit(void); void tb_switch_exit(void); +int tb_xdomain_init(void); +void tb_xdomain_exit(void); struct tb *tb_domain_alloc(struct tb_nhi *nhi, size_t privsize); int tb_domain_add(struct tb *tb); @@ -343,6 +361,9 @@ int tb_domain_approve_switch(struct tb *tb, struct tb_switch *sw); int tb_domain_approve_switch_key(struct tb *tb, struct tb_switch *sw); int tb_domain_challenge_switch_key(struct tb *tb, struct tb_switch *sw); int tb_domain_disconnect_pcie_paths(struct tb *tb); +int tb_domain_approve_xdomain_paths(struct tb *tb, struct tb_xdomain *xd); +int tb_domain_disconnect_xdomain_paths(struct tb *tb, struct tb_xdomain *xd); +int tb_domain_disconnect_all_paths(struct tb *tb); static inline void tb_domain_put(struct tb *tb) { @@ -422,4 +443,14 @@ static inline u64 tb_downstream_route(struct tb_port *port) | ((u64) port->port << (port->sw->config.depth * 8)); } +bool tb_xdomain_handle_request(struct tb *tb, enum tb_cfg_pkg_type type, + const void *buf, size_t size); +struct tb_xdomain *tb_xdomain_alloc(struct tb *tb, struct device *parent, + u64 route, const uuid_t *local_uuid, + const uuid_t *remote_uuid); +void tb_xdomain_add(struct tb_xdomain *xd); +void tb_xdomain_remove(struct tb_xdomain *xd); +struct tb_xdomain *tb_xdomain_find_by_link_depth(struct tb *tb, u8 link, + u8 depth); + #endif diff --git a/drivers/thunderbolt/tb_msgs.h b/drivers/thunderbolt/tb_msgs.h index f2b2550cd97c..b0a092baa605 100644 --- a/drivers/thunderbolt/tb_msgs.h +++ b/drivers/thunderbolt/tb_msgs.h @@ -101,11 +101,14 @@ enum icm_pkg_code { ICM_CHALLENGE_DEVICE = 0x5, ICM_ADD_DEVICE_KEY = 0x6, ICM_GET_ROUTE = 0xa, + ICM_APPROVE_XDOMAIN = 0x10, }; enum icm_event_code { ICM_EVENT_DEVICE_CONNECTED = 3, ICM_EVENT_DEVICE_DISCONNECTED = 4, + ICM_EVENT_XDOMAIN_CONNECTED = 6, + ICM_EVENT_XDOMAIN_DISCONNECTED = 7, }; struct icm_pkg_header { @@ -188,6 +191,25 @@ struct icm_fr_event_device_disconnected { u16 link_info; }; +struct icm_fr_event_xdomain_connected { + struct icm_pkg_header hdr; + u16 reserved; + u16 link_info; + uuid_t remote_uuid; + uuid_t local_uuid; + u32 local_route_hi; + u32 local_route_lo; + u32 remote_route_hi; + u32 remote_route_lo; +}; + +struct icm_fr_event_xdomain_disconnected { + struct icm_pkg_header hdr; + u16 reserved; + u16 link_info; + uuid_t remote_uuid; +}; + struct icm_fr_pkg_add_device_key { struct icm_pkg_header hdr; uuid_t ep_uuid; @@ -224,6 +246,28 @@ struct icm_fr_pkg_challenge_device_response { u32 response[8]; }; +struct icm_fr_pkg_approve_xdomain { + struct icm_pkg_header hdr; + u16 reserved; + u16 link_info; + uuid_t remote_uuid; + u16 transmit_path; + u16 transmit_ring; + u16 receive_path; + u16 receive_ring; +}; + +struct icm_fr_pkg_approve_xdomain_response { + struct icm_pkg_header hdr; + u16 reserved; + u16 link_info; + uuid_t remote_uuid; + u16 transmit_path; + u16 transmit_ring; + u16 receive_path; + u16 receive_ring; +}; + /* Alpine Ridge only messages */ struct icm_ar_pkg_get_route { @@ -240,4 +284,83 @@ struct icm_ar_pkg_get_route_response { u32 route_lo; }; +/* XDomain messages */ + +struct tb_xdomain_header { + u32 route_hi; + u32 route_lo; + u32 length_sn; +}; + +#define TB_XDOMAIN_LENGTH_MASK GENMASK(5, 0) +#define TB_XDOMAIN_SN_MASK GENMASK(28, 27) +#define TB_XDOMAIN_SN_SHIFT 27 + +enum tb_xdp_type { + UUID_REQUEST_OLD = 1, + UUID_RESPONSE = 2, + PROPERTIES_REQUEST, + PROPERTIES_RESPONSE, + PROPERTIES_CHANGED_REQUEST, + PROPERTIES_CHANGED_RESPONSE, + ERROR_RESPONSE, + UUID_REQUEST = 12, +}; + +struct tb_xdp_header { + struct tb_xdomain_header xd_hdr; + uuid_t uuid; + u32 type; +}; + +struct tb_xdp_properties { + struct tb_xdp_header hdr; + uuid_t src_uuid; + uuid_t dst_uuid; + u16 offset; + u16 reserved; +}; + +struct tb_xdp_properties_response { + struct tb_xdp_header hdr; + uuid_t src_uuid; + uuid_t dst_uuid; + u16 offset; + u16 data_length; + u32 generation; + u32 data[0]; +}; + +/* + * Max length of data array single XDomain property response is allowed + * to carry. + */ +#define TB_XDP_PROPERTIES_MAX_DATA_LENGTH \ + (((256 - 4 - sizeof(struct tb_xdp_properties_response))) / 4) + +/* Maximum size of the total property block in dwords we allow */ +#define TB_XDP_PROPERTIES_MAX_LENGTH 500 + +struct tb_xdp_properties_changed { + struct tb_xdp_header hdr; + uuid_t src_uuid; +}; + +struct tb_xdp_properties_changed_response { + struct tb_xdp_header hdr; +}; + +enum tb_xdp_error { + ERROR_SUCCESS, + ERROR_UNKNOWN_PACKET, + ERROR_UNKNOWN_DOMAIN, + ERROR_NOT_SUPPORTED, + ERROR_NOT_READY, +}; + +struct tb_xdp_error_response { + struct tb_xdp_header hdr; + u32 error; +}; + #endif diff --git a/drivers/thunderbolt/xdomain.c b/drivers/thunderbolt/xdomain.c new file mode 100644 index 000000000000..f2d06f6f7be9 --- /dev/null +++ b/drivers/thunderbolt/xdomain.c @@ -0,0 +1,1576 @@ +/* + * Thunderbolt XDomain discovery protocol support + * + * Copyright (C) 2017, Intel Corporation + * Authors: Michael Jamet + * Mika Westerberg + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include +#include +#include +#include +#include +#include + +#include "tb.h" + +#define XDOMAIN_DEFAULT_TIMEOUT 5000 /* ms */ +#define XDOMAIN_PROPERTIES_RETRIES 60 +#define XDOMAIN_PROPERTIES_CHANGED_RETRIES 10 + +struct xdomain_request_work { + struct work_struct work; + struct tb_xdp_header *pkg; + struct tb *tb; +}; + +/* Serializes access to the properties and protocol handlers below */ +static DEFINE_MUTEX(xdomain_lock); + +/* Properties exposed to the remote domains */ +static struct tb_property_dir *xdomain_property_dir; +static u32 *xdomain_property_block; +static u32 xdomain_property_block_len; +static u32 xdomain_property_block_gen; + +/* Additional protocol handlers */ +static LIST_HEAD(protocol_handlers); + +/* UUID for XDomain discovery protocol: b638d70e-42ff-40bb-97c2-90e2c0b2ff07 */ +static const uuid_t tb_xdp_uuid = + UUID_INIT(0xb638d70e, 0x42ff, 0x40bb, + 0x97, 0xc2, 0x90, 0xe2, 0xc0, 0xb2, 0xff, 0x07); + +static bool tb_xdomain_match(const struct tb_cfg_request *req, + const struct ctl_pkg *pkg) +{ + switch (pkg->frame.eof) { + case TB_CFG_PKG_ERROR: + return true; + + case TB_CFG_PKG_XDOMAIN_RESP: { + const struct tb_xdp_header *res_hdr = pkg->buffer; + const struct tb_xdp_header *req_hdr = req->request; + u8 req_seq, res_seq; + + if (pkg->frame.size < req->response_size / 4) + return false; + + /* Make sure route matches */ + if ((res_hdr->xd_hdr.route_hi & ~BIT(31)) != + req_hdr->xd_hdr.route_hi) + return false; + if ((res_hdr->xd_hdr.route_lo) != req_hdr->xd_hdr.route_lo) + return false; + + /* Then check that the sequence number matches */ + res_seq = res_hdr->xd_hdr.length_sn & TB_XDOMAIN_SN_MASK; + res_seq >>= TB_XDOMAIN_SN_SHIFT; + req_seq = req_hdr->xd_hdr.length_sn & TB_XDOMAIN_SN_MASK; + req_seq >>= TB_XDOMAIN_SN_SHIFT; + if (res_seq != req_seq) + return false; + + /* Check that the XDomain protocol matches */ + if (!uuid_equal(&res_hdr->uuid, &req_hdr->uuid)) + return false; + + return true; + } + + default: + return false; + } +} + +static bool tb_xdomain_copy(struct tb_cfg_request *req, + const struct ctl_pkg *pkg) +{ + memcpy(req->response, pkg->buffer, req->response_size); + req->result.err = 0; + return true; +} + +static void response_ready(void *data) +{ + tb_cfg_request_put(data); +} + +static int __tb_xdomain_response(struct tb_ctl *ctl, const void *response, + size_t size, enum tb_cfg_pkg_type type) +{ + struct tb_cfg_request *req; + + req = tb_cfg_request_alloc(); + if (!req) + return -ENOMEM; + + req->match = tb_xdomain_match; + req->copy = tb_xdomain_copy; + req->request = response; + req->request_size = size; + req->request_type = type; + + return tb_cfg_request(ctl, req, response_ready, req); +} + +/** + * tb_xdomain_response() - Send a XDomain response message + * @xd: XDomain to send the message + * @response: Response to send + * @size: Size of the response + * @type: PDF type of the response + * + * This can be used to send a XDomain response message to the other + * domain. No response for the message is expected. + * + * Return: %0 in case of success and negative errno in case of failure + */ +int tb_xdomain_response(struct tb_xdomain *xd, const void *response, + size_t size, enum tb_cfg_pkg_type type) +{ + return __tb_xdomain_response(xd->tb->ctl, response, size, type); +} +EXPORT_SYMBOL_GPL(tb_xdomain_response); + +static int __tb_xdomain_request(struct tb_ctl *ctl, const void *request, + size_t request_size, enum tb_cfg_pkg_type request_type, void *response, + size_t response_size, enum tb_cfg_pkg_type response_type, + unsigned int timeout_msec) +{ + struct tb_cfg_request *req; + struct tb_cfg_result res; + + req = tb_cfg_request_alloc(); + if (!req) + return -ENOMEM; + + req->match = tb_xdomain_match; + req->copy = tb_xdomain_copy; + req->request = request; + req->request_size = request_size; + req->request_type = request_type; + req->response = response; + req->response_size = response_size; + req->response_type = response_type; + + res = tb_cfg_request_sync(ctl, req, timeout_msec); + + tb_cfg_request_put(req); + + return res.err == 1 ? -EIO : res.err; +} + +/** + * tb_xdomain_request() - Send a XDomain request + * @xd: XDomain to send the request + * @request: Request to send + * @request_size: Size of the request in bytes + * @request_type: PDF type of the request + * @response: Response is copied here + * @response_size: Expected size of the response in bytes + * @response_type: Expected PDF type of the response + * @timeout_msec: Timeout in milliseconds to wait for the response + * + * This function can be used to send XDomain control channel messages to + * the other domain. The function waits until the response is received + * or when timeout triggers. Whichever comes first. + * + * Return: %0 in case of success and negative errno in case of failure + */ +int tb_xdomain_request(struct tb_xdomain *xd, const void *request, + size_t request_size, enum tb_cfg_pkg_type request_type, + void *response, size_t response_size, + enum tb_cfg_pkg_type response_type, unsigned int timeout_msec) +{ + return __tb_xdomain_request(xd->tb->ctl, request, request_size, + request_type, response, response_size, + response_type, timeout_msec); +} +EXPORT_SYMBOL_GPL(tb_xdomain_request); + +static inline void tb_xdp_fill_header(struct tb_xdp_header *hdr, u64 route, + u8 sequence, enum tb_xdp_type type, size_t size) +{ + u32 length_sn; + + length_sn = (size - sizeof(hdr->xd_hdr)) / 4; + length_sn |= (sequence << TB_XDOMAIN_SN_SHIFT) & TB_XDOMAIN_SN_MASK; + + hdr->xd_hdr.route_hi = upper_32_bits(route); + hdr->xd_hdr.route_lo = lower_32_bits(route); + hdr->xd_hdr.length_sn = length_sn; + hdr->type = type; + memcpy(&hdr->uuid, &tb_xdp_uuid, sizeof(tb_xdp_uuid)); +} + +static int tb_xdp_handle_error(const struct tb_xdp_header *hdr) +{ + const struct tb_xdp_error_response *error; + + if (hdr->type != ERROR_RESPONSE) + return 0; + + error = (const struct tb_xdp_error_response *)hdr; + + switch (error->error) { + case ERROR_UNKNOWN_PACKET: + case ERROR_UNKNOWN_DOMAIN: + return -EIO; + case ERROR_NOT_SUPPORTED: + return -ENOTSUPP; + case ERROR_NOT_READY: + return -EAGAIN; + default: + break; + } + + return 0; +} + +static int tb_xdp_error_response(struct tb_ctl *ctl, u64 route, u8 sequence, + enum tb_xdp_error error) +{ + struct tb_xdp_error_response res; + + memset(&res, 0, sizeof(res)); + tb_xdp_fill_header(&res.hdr, route, sequence, ERROR_RESPONSE, + sizeof(res)); + res.error = error; + + return __tb_xdomain_response(ctl, &res, sizeof(res), + TB_CFG_PKG_XDOMAIN_RESP); +} + +static int tb_xdp_properties_request(struct tb_ctl *ctl, u64 route, + const uuid_t *src_uuid, const uuid_t *dst_uuid, int retry, + u32 **block, u32 *generation) +{ + struct tb_xdp_properties_response *res; + struct tb_xdp_properties req; + u16 data_len, len; + size_t total_size; + u32 *data = NULL; + int ret; + + total_size = sizeof(*res) + TB_XDP_PROPERTIES_MAX_DATA_LENGTH * 4; + res = kzalloc(total_size, GFP_KERNEL); + if (!res) + return -ENOMEM; + + memset(&req, 0, sizeof(req)); + tb_xdp_fill_header(&req.hdr, route, retry % 4, PROPERTIES_REQUEST, + sizeof(req)); + memcpy(&req.src_uuid, src_uuid, sizeof(*src_uuid)); + memcpy(&req.dst_uuid, dst_uuid, sizeof(*dst_uuid)); + + len = 0; + data_len = 0; + + do { + ret = __tb_xdomain_request(ctl, &req, sizeof(req), + TB_CFG_PKG_XDOMAIN_REQ, res, + total_size, TB_CFG_PKG_XDOMAIN_RESP, + XDOMAIN_DEFAULT_TIMEOUT); + if (ret) + goto err; + + ret = tb_xdp_handle_error(&res->hdr); + if (ret) + goto err; + + /* + * Package length includes the whole payload without the + * XDomain header. Validate first that the package is at + * least size of the response structure. + */ + len = res->hdr.xd_hdr.length_sn & TB_XDOMAIN_LENGTH_MASK; + if (len < sizeof(*res) / 4) { + ret = -EINVAL; + goto err; + } + + len += sizeof(res->hdr.xd_hdr) / 4; + len -= sizeof(*res) / 4; + + if (res->offset != req.offset) { + ret = -EINVAL; + goto err; + } + + /* + * First time allocate block that has enough space for + * the whole properties block. + */ + if (!data) { + data_len = res->data_length; + if (data_len > TB_XDP_PROPERTIES_MAX_LENGTH) { + ret = -E2BIG; + goto err; + } + + data = kcalloc(data_len, sizeof(u32), GFP_KERNEL); + if (!data) { + ret = -ENOMEM; + goto err; + } + } + + memcpy(data + req.offset, res->data, len * 4); + req.offset += len; + } while (!data_len || req.offset < data_len); + + *block = data; + *generation = res->generation; + + kfree(res); + + return data_len; + +err: + kfree(data); + kfree(res); + + return ret; +} + +static int tb_xdp_properties_response(struct tb *tb, struct tb_ctl *ctl, + u64 route, u8 sequence, const uuid_t *src_uuid, + const struct tb_xdp_properties *req) +{ + struct tb_xdp_properties_response *res; + size_t total_size; + u16 len; + int ret; + + /* + * Currently we expect all requests to be directed to us. The + * protocol supports forwarding, though which we might add + * support later on. + */ + if (!uuid_equal(src_uuid, &req->dst_uuid)) { + tb_xdp_error_response(ctl, route, sequence, + ERROR_UNKNOWN_DOMAIN); + return 0; + } + + mutex_lock(&xdomain_lock); + + if (req->offset >= xdomain_property_block_len) { + mutex_unlock(&xdomain_lock); + return -EINVAL; + } + + len = xdomain_property_block_len - req->offset; + len = min_t(u16, len, TB_XDP_PROPERTIES_MAX_DATA_LENGTH); + total_size = sizeof(*res) + len * 4; + + res = kzalloc(total_size, GFP_KERNEL); + if (!res) { + mutex_unlock(&xdomain_lock); + return -ENOMEM; + } + + tb_xdp_fill_header(&res->hdr, route, sequence, PROPERTIES_RESPONSE, + total_size); + res->generation = xdomain_property_block_gen; + res->data_length = xdomain_property_block_len; + res->offset = req->offset; + uuid_copy(&res->src_uuid, src_uuid); + uuid_copy(&res->dst_uuid, &req->src_uuid); + memcpy(res->data, &xdomain_property_block[req->offset], len * 4); + + mutex_unlock(&xdomain_lock); + + ret = __tb_xdomain_response(ctl, res, total_size, + TB_CFG_PKG_XDOMAIN_RESP); + + kfree(res); + return ret; +} + +static int tb_xdp_properties_changed_request(struct tb_ctl *ctl, u64 route, + int retry, const uuid_t *uuid) +{ + struct tb_xdp_properties_changed_response res; + struct tb_xdp_properties_changed req; + int ret; + + memset(&req, 0, sizeof(req)); + tb_xdp_fill_header(&req.hdr, route, retry % 4, + PROPERTIES_CHANGED_REQUEST, sizeof(req)); + uuid_copy(&req.src_uuid, uuid); + + memset(&res, 0, sizeof(res)); + ret = __tb_xdomain_request(ctl, &req, sizeof(req), + TB_CFG_PKG_XDOMAIN_REQ, &res, sizeof(res), + TB_CFG_PKG_XDOMAIN_RESP, + XDOMAIN_DEFAULT_TIMEOUT); + if (ret) + return ret; + + return tb_xdp_handle_error(&res.hdr); +} + +static int +tb_xdp_properties_changed_response(struct tb_ctl *ctl, u64 route, u8 sequence) +{ + struct tb_xdp_properties_changed_response res; + + memset(&res, 0, sizeof(res)); + tb_xdp_fill_header(&res.hdr, route, sequence, + PROPERTIES_CHANGED_RESPONSE, sizeof(res)); + return __tb_xdomain_response(ctl, &res, sizeof(res), + TB_CFG_PKG_XDOMAIN_RESP); +} + +/** + * tb_register_protocol_handler() - Register protocol handler + * @handler: Handler to register + * + * This allows XDomain service drivers to hook into incoming XDomain + * messages. After this function is called the service driver needs to + * be able to handle calls to callback whenever a package with the + * registered protocol is received. + */ +int tb_register_protocol_handler(struct tb_protocol_handler *handler) +{ + if (!handler->uuid || !handler->callback) + return -EINVAL; + if (uuid_equal(handler->uuid, &tb_xdp_uuid)) + return -EINVAL; + + mutex_lock(&xdomain_lock); + list_add_tail(&handler->list, &protocol_handlers); + mutex_unlock(&xdomain_lock); + + return 0; +} +EXPORT_SYMBOL_GPL(tb_register_protocol_handler); + +/** + * tb_unregister_protocol_handler() - Unregister protocol handler + * @handler: Handler to unregister + * + * Removes the previously registered protocol handler. + */ +void tb_unregister_protocol_handler(struct tb_protocol_handler *handler) +{ + mutex_lock(&xdomain_lock); + list_del_init(&handler->list); + mutex_unlock(&xdomain_lock); +} +EXPORT_SYMBOL_GPL(tb_unregister_protocol_handler); + +static void tb_xdp_handle_request(struct work_struct *work) +{ + struct xdomain_request_work *xw = container_of(work, typeof(*xw), work); + const struct tb_xdp_header *pkg = xw->pkg; + const struct tb_xdomain_header *xhdr = &pkg->xd_hdr; + struct tb *tb = xw->tb; + struct tb_ctl *ctl = tb->ctl; + const uuid_t *uuid; + int ret = 0; + u8 sequence; + u64 route; + + route = ((u64)xhdr->route_hi << 32 | xhdr->route_lo) & ~BIT_ULL(63); + sequence = xhdr->length_sn & TB_XDOMAIN_SN_MASK; + sequence >>= TB_XDOMAIN_SN_SHIFT; + + mutex_lock(&tb->lock); + if (tb->root_switch) + uuid = tb->root_switch->uuid; + else + uuid = NULL; + mutex_unlock(&tb->lock); + + if (!uuid) { + tb_xdp_error_response(ctl, route, sequence, ERROR_NOT_READY); + goto out; + } + + switch (pkg->type) { + case PROPERTIES_REQUEST: + ret = tb_xdp_properties_response(tb, ctl, route, sequence, uuid, + (const struct tb_xdp_properties *)pkg); + break; + + case PROPERTIES_CHANGED_REQUEST: { + const struct tb_xdp_properties_changed *xchg = + (const struct tb_xdp_properties_changed *)pkg; + struct tb_xdomain *xd; + + ret = tb_xdp_properties_changed_response(ctl, route, sequence); + + /* + * Since the properties have been changed, let's update + * the xdomain related to this connection as well in + * case there is a change in services it offers. + */ + xd = tb_xdomain_find_by_uuid_locked(tb, &xchg->src_uuid); + if (xd) { + queue_delayed_work(tb->wq, &xd->get_properties_work, + msecs_to_jiffies(50)); + tb_xdomain_put(xd); + } + + break; + } + + default: + break; + } + + if (ret) { + tb_warn(tb, "failed to send XDomain response for %#x\n", + pkg->type); + } + +out: + kfree(xw->pkg); + kfree(xw); +} + +static void +tb_xdp_schedule_request(struct tb *tb, const struct tb_xdp_header *hdr, + size_t size) +{ + struct xdomain_request_work *xw; + + xw = kmalloc(sizeof(*xw), GFP_KERNEL); + if (!xw) + return; + + INIT_WORK(&xw->work, tb_xdp_handle_request); + xw->pkg = kmemdup(hdr, size, GFP_KERNEL); + xw->tb = tb; + + queue_work(tb->wq, &xw->work); +} + +/** + * tb_register_service_driver() - Register XDomain service driver + * @drv: Driver to register + * + * Registers new service driver from @drv to the bus. + */ +int tb_register_service_driver(struct tb_service_driver *drv) +{ + drv->driver.bus = &tb_bus_type; + return driver_register(&drv->driver); +} +EXPORT_SYMBOL_GPL(tb_register_service_driver); + +/** + * tb_unregister_service_driver() - Unregister XDomain service driver + * @xdrv: Driver to unregister + * + * Unregisters XDomain service driver from the bus. + */ +void tb_unregister_service_driver(struct tb_service_driver *drv) +{ + driver_unregister(&drv->driver); +} +EXPORT_SYMBOL_GPL(tb_unregister_service_driver); + +static ssize_t key_show(struct device *dev, struct device_attribute *attr, + char *buf) +{ + struct tb_service *svc = container_of(dev, struct tb_service, dev); + + /* + * It should be null terminated but anything else is pretty much + * allowed. + */ + return sprintf(buf, "%*pEp\n", (int)strlen(svc->key), svc->key); +} +static DEVICE_ATTR_RO(key); + +static int get_modalias(struct tb_service *svc, char *buf, size_t size) +{ + return snprintf(buf, size, "tbsvc:k%sp%08Xv%08Xr%08X", svc->key, + svc->prtcid, svc->prtcvers, svc->prtcrevs); +} + +static ssize_t modalias_show(struct device *dev, struct device_attribute *attr, + char *buf) +{ + struct tb_service *svc = container_of(dev, struct tb_service, dev); + + /* Full buffer size except new line and null termination */ + get_modalias(svc, buf, PAGE_SIZE - 2); + return sprintf(buf, "%s\n", buf); +} +static DEVICE_ATTR_RO(modalias); + +static ssize_t prtcid_show(struct device *dev, struct device_attribute *attr, + char *buf) +{ + struct tb_service *svc = container_of(dev, struct tb_service, dev); + + return sprintf(buf, "%u\n", svc->prtcid); +} +static DEVICE_ATTR_RO(prtcid); + +static ssize_t prtcvers_show(struct device *dev, struct device_attribute *attr, + char *buf) +{ + struct tb_service *svc = container_of(dev, struct tb_service, dev); + + return sprintf(buf, "%u\n", svc->prtcvers); +} +static DEVICE_ATTR_RO(prtcvers); + +static ssize_t prtcrevs_show(struct device *dev, struct device_attribute *attr, + char *buf) +{ + struct tb_service *svc = container_of(dev, struct tb_service, dev); + + return sprintf(buf, "%u\n", svc->prtcrevs); +} +static DEVICE_ATTR_RO(prtcrevs); + +static ssize_t prtcstns_show(struct device *dev, struct device_attribute *attr, + char *buf) +{ + struct tb_service *svc = container_of(dev, struct tb_service, dev); + + return sprintf(buf, "0x%08x\n", svc->prtcstns); +} +static DEVICE_ATTR_RO(prtcstns); + +static struct attribute *tb_service_attrs[] = { + &dev_attr_key.attr, + &dev_attr_modalias.attr, + &dev_attr_prtcid.attr, + &dev_attr_prtcvers.attr, + &dev_attr_prtcrevs.attr, + &dev_attr_prtcstns.attr, + NULL, +}; + +static struct attribute_group tb_service_attr_group = { + .attrs = tb_service_attrs, +}; + +static const struct attribute_group *tb_service_attr_groups[] = { + &tb_service_attr_group, + NULL, +}; + +static int tb_service_uevent(struct device *dev, struct kobj_uevent_env *env) +{ + struct tb_service *svc = container_of(dev, struct tb_service, dev); + char modalias[64]; + + get_modalias(svc, modalias, sizeof(modalias)); + return add_uevent_var(env, "MODALIAS=%s", modalias); +} + +static void tb_service_release(struct device *dev) +{ + struct tb_service *svc = container_of(dev, struct tb_service, dev); + struct tb_xdomain *xd = tb_service_parent(svc); + + ida_simple_remove(&xd->service_ids, svc->id); + kfree(svc->key); + kfree(svc); +} + +struct device_type tb_service_type = { + .name = "thunderbolt_service", + .groups = tb_service_attr_groups, + .uevent = tb_service_uevent, + .release = tb_service_release, +}; +EXPORT_SYMBOL_GPL(tb_service_type); + +static int remove_missing_service(struct device *dev, void *data) +{ + struct tb_xdomain *xd = data; + struct tb_service *svc; + + svc = tb_to_service(dev); + if (!svc) + return 0; + + if (!tb_property_find(xd->properties, svc->key, + TB_PROPERTY_TYPE_DIRECTORY)) + device_unregister(dev); + + return 0; +} + +static int find_service(struct device *dev, void *data) +{ + const struct tb_property *p = data; + struct tb_service *svc; + + svc = tb_to_service(dev); + if (!svc) + return 0; + + return !strcmp(svc->key, p->key); +} + +static int populate_service(struct tb_service *svc, + struct tb_property *property) +{ + struct tb_property_dir *dir = property->value.dir; + struct tb_property *p; + + /* Fill in standard properties */ + p = tb_property_find(dir, "prtcid", TB_PROPERTY_TYPE_VALUE); + if (p) + svc->prtcid = p->value.immediate; + p = tb_property_find(dir, "prtcvers", TB_PROPERTY_TYPE_VALUE); + if (p) + svc->prtcvers = p->value.immediate; + p = tb_property_find(dir, "prtcrevs", TB_PROPERTY_TYPE_VALUE); + if (p) + svc->prtcrevs = p->value.immediate; + p = tb_property_find(dir, "prtcstns", TB_PROPERTY_TYPE_VALUE); + if (p) + svc->prtcstns = p->value.immediate; + + svc->key = kstrdup(property->key, GFP_KERNEL); + if (!svc->key) + return -ENOMEM; + + return 0; +} + +static void enumerate_services(struct tb_xdomain *xd) +{ + struct tb_service *svc; + struct tb_property *p; + struct device *dev; + + /* + * First remove all services that are not available anymore in + * the updated property block. + */ + device_for_each_child_reverse(&xd->dev, xd, remove_missing_service); + + /* Then re-enumerate properties creating new services as we go */ + tb_property_for_each(xd->properties, p) { + if (p->type != TB_PROPERTY_TYPE_DIRECTORY) + continue; + + /* If the service exists already we are fine */ + dev = device_find_child(&xd->dev, p, find_service); + if (dev) { + put_device(dev); + continue; + } + + svc = kzalloc(sizeof(*svc), GFP_KERNEL); + if (!svc) + break; + + if (populate_service(svc, p)) { + kfree(svc); + break; + } + + svc->id = ida_simple_get(&xd->service_ids, 0, 0, GFP_KERNEL); + svc->dev.bus = &tb_bus_type; + svc->dev.type = &tb_service_type; + svc->dev.parent = &xd->dev; + dev_set_name(&svc->dev, "%s.%d", dev_name(&xd->dev), svc->id); + + if (device_register(&svc->dev)) { + put_device(&svc->dev); + break; + } + } +} + +static int populate_properties(struct tb_xdomain *xd, + struct tb_property_dir *dir) +{ + const struct tb_property *p; + + /* Required properties */ + p = tb_property_find(dir, "deviceid", TB_PROPERTY_TYPE_VALUE); + if (!p) + return -EINVAL; + xd->device = p->value.immediate; + + p = tb_property_find(dir, "vendorid", TB_PROPERTY_TYPE_VALUE); + if (!p) + return -EINVAL; + xd->vendor = p->value.immediate; + + kfree(xd->device_name); + xd->device_name = NULL; + kfree(xd->vendor_name); + xd->vendor_name = NULL; + + /* Optional properties */ + p = tb_property_find(dir, "deviceid", TB_PROPERTY_TYPE_TEXT); + if (p) + xd->device_name = kstrdup(p->value.text, GFP_KERNEL); + p = tb_property_find(dir, "vendorid", TB_PROPERTY_TYPE_TEXT); + if (p) + xd->vendor_name = kstrdup(p->value.text, GFP_KERNEL); + + return 0; +} + +/* Called with @xd->lock held */ +static void tb_xdomain_restore_paths(struct tb_xdomain *xd) +{ + if (!xd->resume) + return; + + xd->resume = false; + if (xd->transmit_path) { + dev_dbg(&xd->dev, "re-establishing DMA path\n"); + tb_domain_approve_xdomain_paths(xd->tb, xd); + } +} + +static void tb_xdomain_get_properties(struct work_struct *work) +{ + struct tb_xdomain *xd = container_of(work, typeof(*xd), + get_properties_work.work); + struct tb_property_dir *dir; + struct tb *tb = xd->tb; + bool update = false; + u32 *block = NULL; + u32 gen = 0; + int ret; + + ret = tb_xdp_properties_request(tb->ctl, xd->route, xd->local_uuid, + xd->remote_uuid, xd->properties_retries, + &block, &gen); + if (ret < 0) { + if (xd->properties_retries-- > 0) { + queue_delayed_work(xd->tb->wq, &xd->get_properties_work, + msecs_to_jiffies(1000)); + } else { + /* Give up now */ + dev_err(&xd->dev, + "failed read XDomain properties from %pUb\n", + xd->remote_uuid); + } + return; + } + + xd->properties_retries = XDOMAIN_PROPERTIES_RETRIES; + + mutex_lock(&xd->lock); + + /* Only accept newer generation properties */ + if (xd->properties && gen <= xd->property_block_gen) { + /* + * On resume it is likely that the properties block is + * not changed (unless the other end added or removed + * services). However, we need to make sure the existing + * DMA paths are restored properly. + */ + tb_xdomain_restore_paths(xd); + goto err_free_block; + } + + dir = tb_property_parse_dir(block, ret); + if (!dir) { + dev_err(&xd->dev, "failed to parse XDomain properties\n"); + goto err_free_block; + } + + ret = populate_properties(xd, dir); + if (ret) { + dev_err(&xd->dev, "missing XDomain properties in response\n"); + goto err_free_dir; + } + + /* Release the existing one */ + if (xd->properties) { + tb_property_free_dir(xd->properties); + update = true; + } + + xd->properties = dir; + xd->property_block_gen = gen; + + tb_xdomain_restore_paths(xd); + + mutex_unlock(&xd->lock); + + kfree(block); + + /* + * Now the device should be ready enough so we can add it to the + * bus and let userspace know about it. If the device is already + * registered, we notify the userspace that it has changed. + */ + if (!update) { + if (device_add(&xd->dev)) { + dev_err(&xd->dev, "failed to add XDomain device\n"); + return; + } + } else { + kobject_uevent(&xd->dev.kobj, KOBJ_CHANGE); + } + + enumerate_services(xd); + return; + +err_free_dir: + tb_property_free_dir(dir); +err_free_block: + kfree(block); + mutex_unlock(&xd->lock); +} + +static void tb_xdomain_properties_changed(struct work_struct *work) +{ + struct tb_xdomain *xd = container_of(work, typeof(*xd), + properties_changed_work.work); + int ret; + + ret = tb_xdp_properties_changed_request(xd->tb->ctl, xd->route, + xd->properties_changed_retries, xd->local_uuid); + if (ret) { + if (xd->properties_changed_retries-- > 0) + queue_delayed_work(xd->tb->wq, + &xd->properties_changed_work, + msecs_to_jiffies(1000)); + return; + } + + xd->properties_changed_retries = XDOMAIN_PROPERTIES_CHANGED_RETRIES; +} + +static ssize_t device_show(struct device *dev, struct device_attribute *attr, + char *buf) +{ + struct tb_xdomain *xd = container_of(dev, struct tb_xdomain, dev); + + return sprintf(buf, "%#x\n", xd->device); +} +static DEVICE_ATTR_RO(device); + +static ssize_t +device_name_show(struct device *dev, struct device_attribute *attr, char *buf) +{ + struct tb_xdomain *xd = container_of(dev, struct tb_xdomain, dev); + int ret; + + if (mutex_lock_interruptible(&xd->lock)) + return -ERESTARTSYS; + ret = sprintf(buf, "%s\n", xd->device_name ? xd->device_name : ""); + mutex_unlock(&xd->lock); + + return ret; +} +static DEVICE_ATTR_RO(device_name); + +static ssize_t vendor_show(struct device *dev, struct device_attribute *attr, + char *buf) +{ + struct tb_xdomain *xd = container_of(dev, struct tb_xdomain, dev); + + return sprintf(buf, "%#x\n", xd->vendor); +} +static DEVICE_ATTR_RO(vendor); + +static ssize_t +vendor_name_show(struct device *dev, struct device_attribute *attr, char *buf) +{ + struct tb_xdomain *xd = container_of(dev, struct tb_xdomain, dev); + int ret; + + if (mutex_lock_interruptible(&xd->lock)) + return -ERESTARTSYS; + ret = sprintf(buf, "%s\n", xd->vendor_name ? xd->vendor_name : ""); + mutex_unlock(&xd->lock); + + return ret; +} +static DEVICE_ATTR_RO(vendor_name); + +static ssize_t unique_id_show(struct device *dev, struct device_attribute *attr, + char *buf) +{ + struct tb_xdomain *xd = container_of(dev, struct tb_xdomain, dev); + + return sprintf(buf, "%pUb\n", xd->remote_uuid); +} +static DEVICE_ATTR_RO(unique_id); + +static struct attribute *xdomain_attrs[] = { + &dev_attr_device.attr, + &dev_attr_device_name.attr, + &dev_attr_unique_id.attr, + &dev_attr_vendor.attr, + &dev_attr_vendor_name.attr, + NULL, +}; + +static struct attribute_group xdomain_attr_group = { + .attrs = xdomain_attrs, +}; + +static const struct attribute_group *xdomain_attr_groups[] = { + &xdomain_attr_group, + NULL, +}; + +static void tb_xdomain_release(struct device *dev) +{ + struct tb_xdomain *xd = container_of(dev, struct tb_xdomain, dev); + + put_device(xd->dev.parent); + + tb_property_free_dir(xd->properties); + ida_destroy(&xd->service_ids); + + kfree(xd->local_uuid); + kfree(xd->remote_uuid); + kfree(xd->device_name); + kfree(xd->vendor_name); + kfree(xd); +} + +static void start_handshake(struct tb_xdomain *xd) +{ + xd->properties_retries = XDOMAIN_PROPERTIES_RETRIES; + xd->properties_changed_retries = XDOMAIN_PROPERTIES_CHANGED_RETRIES; + + /* Start exchanging properties with the other host */ + queue_delayed_work(xd->tb->wq, &xd->properties_changed_work, + msecs_to_jiffies(100)); + queue_delayed_work(xd->tb->wq, &xd->get_properties_work, + msecs_to_jiffies(1000)); +} + +static void stop_handshake(struct tb_xdomain *xd) +{ + xd->properties_retries = 0; + xd->properties_changed_retries = 0; + + cancel_delayed_work_sync(&xd->get_properties_work); + cancel_delayed_work_sync(&xd->properties_changed_work); +} + +static int __maybe_unused tb_xdomain_suspend(struct device *dev) +{ + stop_handshake(tb_to_xdomain(dev)); + return 0; +} + +static int __maybe_unused tb_xdomain_resume(struct device *dev) +{ + struct tb_xdomain *xd = tb_to_xdomain(dev); + + /* + * Ask tb_xdomain_get_properties() restore any existing DMA + * paths after properties are re-read. + */ + xd->resume = true; + start_handshake(xd); + + return 0; +} + +static const struct dev_pm_ops tb_xdomain_pm_ops = { + SET_SYSTEM_SLEEP_PM_OPS(tb_xdomain_suspend, tb_xdomain_resume) +}; + +struct device_type tb_xdomain_type = { + .name = "thunderbolt_xdomain", + .release = tb_xdomain_release, + .pm = &tb_xdomain_pm_ops, +}; +EXPORT_SYMBOL_GPL(tb_xdomain_type); + +/** + * tb_xdomain_alloc() - Allocate new XDomain object + * @tb: Domain where the XDomain belongs + * @parent: Parent device (the switch through the connection to the + * other domain is reached). + * @route: Route string used to reach the other domain + * @local_uuid: Our local domain UUID + * @remote_uuid: UUID of the other domain + * + * Allocates new XDomain structure and returns pointer to that. The + * object must be released by calling tb_xdomain_put(). + */ +struct tb_xdomain *tb_xdomain_alloc(struct tb *tb, struct device *parent, + u64 route, const uuid_t *local_uuid, + const uuid_t *remote_uuid) +{ + struct tb_xdomain *xd; + + xd = kzalloc(sizeof(*xd), GFP_KERNEL); + if (!xd) + return NULL; + + xd->tb = tb; + xd->route = route; + ida_init(&xd->service_ids); + mutex_init(&xd->lock); + INIT_DELAYED_WORK(&xd->get_properties_work, tb_xdomain_get_properties); + INIT_DELAYED_WORK(&xd->properties_changed_work, + tb_xdomain_properties_changed); + + xd->local_uuid = kmemdup(local_uuid, sizeof(uuid_t), GFP_KERNEL); + if (!xd->local_uuid) + goto err_free; + + xd->remote_uuid = kmemdup(remote_uuid, sizeof(uuid_t), GFP_KERNEL); + if (!xd->remote_uuid) + goto err_free_local_uuid; + + device_initialize(&xd->dev); + xd->dev.parent = get_device(parent); + xd->dev.bus = &tb_bus_type; + xd->dev.type = &tb_xdomain_type; + xd->dev.groups = xdomain_attr_groups; + dev_set_name(&xd->dev, "%u-%llx", tb->index, route); + + return xd; + +err_free_local_uuid: + kfree(xd->local_uuid); +err_free: + kfree(xd); + + return NULL; +} + +/** + * tb_xdomain_add() - Add XDomain to the bus + * @xd: XDomain to add + * + * This function starts XDomain discovery protocol handshake and + * eventually adds the XDomain to the bus. After calling this function + * the caller needs to call tb_xdomain_remove() in order to remove and + * release the object regardless whether the handshake succeeded or not. + */ +void tb_xdomain_add(struct tb_xdomain *xd) +{ + /* Start exchanging properties with the other host */ + start_handshake(xd); +} + +static int unregister_service(struct device *dev, void *data) +{ + device_unregister(dev); + return 0; +} + +/** + * tb_xdomain_remove() - Remove XDomain from the bus + * @xd: XDomain to remove + * + * This will stop all ongoing configuration work and remove the XDomain + * along with any services from the bus. When the last reference to @xd + * is released the object will be released as well. + */ +void tb_xdomain_remove(struct tb_xdomain *xd) +{ + stop_handshake(xd); + + device_for_each_child_reverse(&xd->dev, xd, unregister_service); + + if (!device_is_registered(&xd->dev)) + put_device(&xd->dev); + else + device_unregister(&xd->dev); +} + +/** + * tb_xdomain_enable_paths() - Enable DMA paths for XDomain connection + * @xd: XDomain connection + * @transmit_path: HopID of the transmit path the other end is using to + * send packets + * @transmit_ring: DMA ring used to receive packets from the other end + * @receive_path: HopID of the receive path the other end is using to + * receive packets + * @receive_ring: DMA ring used to send packets to the other end + * + * The function enables DMA paths accordingly so that after successful + * return the caller can send and receive packets using high-speed DMA + * path. + * + * Return: %0 in case of success and negative errno in case of error + */ +int tb_xdomain_enable_paths(struct tb_xdomain *xd, u16 transmit_path, + u16 transmit_ring, u16 receive_path, + u16 receive_ring) +{ + int ret; + + mutex_lock(&xd->lock); + + if (xd->transmit_path) { + ret = xd->transmit_path == transmit_path ? 0 : -EBUSY; + goto exit_unlock; + } + + xd->transmit_path = transmit_path; + xd->transmit_ring = transmit_ring; + xd->receive_path = receive_path; + xd->receive_ring = receive_ring; + + ret = tb_domain_approve_xdomain_paths(xd->tb, xd); + +exit_unlock: + mutex_unlock(&xd->lock); + + return ret; +} +EXPORT_SYMBOL_GPL(tb_xdomain_enable_paths); + +/** + * tb_xdomain_disable_paths() - Disable DMA paths for XDomain connection + * @xd: XDomain connection + * + * This does the opposite of tb_xdomain_enable_paths(). After call to + * this the caller is not expected to use the rings anymore. + * + * Return: %0 in case of success and negative errno in case of error + */ +int tb_xdomain_disable_paths(struct tb_xdomain *xd) +{ + int ret = 0; + + mutex_lock(&xd->lock); + if (xd->transmit_path) { + xd->transmit_path = 0; + xd->transmit_ring = 0; + xd->receive_path = 0; + xd->receive_ring = 0; + + ret = tb_domain_disconnect_xdomain_paths(xd->tb, xd); + } + mutex_unlock(&xd->lock); + + return ret; +} +EXPORT_SYMBOL_GPL(tb_xdomain_disable_paths); + +struct tb_xdomain_lookup { + const uuid_t *uuid; + u8 link; + u8 depth; +}; + +static struct tb_xdomain *switch_find_xdomain(struct tb_switch *sw, + const struct tb_xdomain_lookup *lookup) +{ + int i; + + for (i = 1; i <= sw->config.max_port_number; i++) { + struct tb_port *port = &sw->ports[i]; + struct tb_xdomain *xd; + + if (tb_is_upstream_port(port)) + continue; + + if (port->xdomain) { + xd = port->xdomain; + + if (lookup->uuid) { + if (uuid_equal(xd->remote_uuid, lookup->uuid)) + return xd; + } else if (lookup->link == xd->link && + lookup->depth == xd->depth) { + return xd; + } + } else if (port->remote) { + xd = switch_find_xdomain(port->remote->sw, lookup); + if (xd) + return xd; + } + } + + return NULL; +} + +/** + * tb_xdomain_find_by_uuid() - Find an XDomain by UUID + * @tb: Domain where the XDomain belongs to + * @uuid: UUID to look for + * + * Finds XDomain by walking through the Thunderbolt topology below @tb. + * The returned XDomain will have its reference count increased so the + * caller needs to call tb_xdomain_put() when it is done with the + * object. + * + * This will find all XDomains including the ones that are not yet added + * to the bus (handshake is still in progress). + * + * The caller needs to hold @tb->lock. + */ +struct tb_xdomain *tb_xdomain_find_by_uuid(struct tb *tb, const uuid_t *uuid) +{ + struct tb_xdomain_lookup lookup; + struct tb_xdomain *xd; + + memset(&lookup, 0, sizeof(lookup)); + lookup.uuid = uuid; + + xd = switch_find_xdomain(tb->root_switch, &lookup); + if (xd) { + get_device(&xd->dev); + return xd; + } + + return NULL; +} +EXPORT_SYMBOL_GPL(tb_xdomain_find_by_uuid); + +/** + * tb_xdomain_find_by_link_depth() - Find an XDomain by link and depth + * @tb: Domain where the XDomain belongs to + * @link: Root switch link number + * @depth: Depth in the link + * + * Finds XDomain by walking through the Thunderbolt topology below @tb. + * The returned XDomain will have its reference count increased so the + * caller needs to call tb_xdomain_put() when it is done with the + * object. + * + * This will find all XDomains including the ones that are not yet added + * to the bus (handshake is still in progress). + * + * The caller needs to hold @tb->lock. + */ +struct tb_xdomain *tb_xdomain_find_by_link_depth(struct tb *tb, u8 link, + u8 depth) +{ + struct tb_xdomain_lookup lookup; + struct tb_xdomain *xd; + + memset(&lookup, 0, sizeof(lookup)); + lookup.link = link; + lookup.depth = depth; + + xd = switch_find_xdomain(tb->root_switch, &lookup); + if (xd) { + get_device(&xd->dev); + return xd; + } + + return NULL; +} + +bool tb_xdomain_handle_request(struct tb *tb, enum tb_cfg_pkg_type type, + const void *buf, size_t size) +{ + const struct tb_protocol_handler *handler, *tmp; + const struct tb_xdp_header *hdr = buf; + unsigned int length; + int ret = 0; + + /* We expect the packet is at least size of the header */ + length = hdr->xd_hdr.length_sn & TB_XDOMAIN_LENGTH_MASK; + if (length != size / 4 - sizeof(hdr->xd_hdr) / 4) + return true; + if (length < sizeof(*hdr) / 4 - sizeof(hdr->xd_hdr) / 4) + return true; + + /* + * Handle XDomain discovery protocol packets directly here. For + * other protocols (based on their UUID) we call registered + * handlers in turn. + */ + if (uuid_equal(&hdr->uuid, &tb_xdp_uuid)) { + if (type == TB_CFG_PKG_XDOMAIN_REQ) { + tb_xdp_schedule_request(tb, hdr, size); + return true; + } + return false; + } + + mutex_lock(&xdomain_lock); + list_for_each_entry_safe(handler, tmp, &protocol_handlers, list) { + if (!uuid_equal(&hdr->uuid, handler->uuid)) + continue; + + mutex_unlock(&xdomain_lock); + ret = handler->callback(buf, size, handler->data); + mutex_lock(&xdomain_lock); + + if (ret) + break; + } + mutex_unlock(&xdomain_lock); + + return ret > 0; +} + +static int rebuild_property_block(void) +{ + u32 *block, len; + int ret; + + ret = tb_property_format_dir(xdomain_property_dir, NULL, 0); + if (ret < 0) + return ret; + + len = ret; + + block = kcalloc(len, sizeof(u32), GFP_KERNEL); + if (!block) + return -ENOMEM; + + ret = tb_property_format_dir(xdomain_property_dir, block, len); + if (ret) { + kfree(block); + return ret; + } + + kfree(xdomain_property_block); + xdomain_property_block = block; + xdomain_property_block_len = len; + xdomain_property_block_gen++; + + return 0; +} + +static int update_xdomain(struct device *dev, void *data) +{ + struct tb_xdomain *xd; + + xd = tb_to_xdomain(dev); + if (xd) { + queue_delayed_work(xd->tb->wq, &xd->properties_changed_work, + msecs_to_jiffies(50)); + } + + return 0; +} + +static void update_all_xdomains(void) +{ + bus_for_each_dev(&tb_bus_type, NULL, NULL, update_xdomain); +} + +static bool remove_directory(const char *key, const struct tb_property_dir *dir) +{ + struct tb_property *p; + + p = tb_property_find(xdomain_property_dir, key, + TB_PROPERTY_TYPE_DIRECTORY); + if (p && p->value.dir == dir) { + tb_property_remove(p); + return true; + } + return false; +} + +/** + * tb_register_property_dir() - Register property directory to the host + * @key: Key (name) of the directory to add + * @dir: Directory to add + * + * Service drivers can use this function to add new property directory + * to the host available properties. The other connected hosts are + * notified so they can re-read properties of this host if they are + * interested. + * + * Return: %0 on success and negative errno on failure + */ +int tb_register_property_dir(const char *key, struct tb_property_dir *dir) +{ + int ret; + + if (!key || strlen(key) > 8) + return -EINVAL; + + mutex_lock(&xdomain_lock); + if (tb_property_find(xdomain_property_dir, key, + TB_PROPERTY_TYPE_DIRECTORY)) { + ret = -EEXIST; + goto err_unlock; + } + + ret = tb_property_add_dir(xdomain_property_dir, key, dir); + if (ret) + goto err_unlock; + + ret = rebuild_property_block(); + if (ret) { + remove_directory(key, dir); + goto err_unlock; + } + + mutex_unlock(&xdomain_lock); + update_all_xdomains(); + return 0; + +err_unlock: + mutex_unlock(&xdomain_lock); + return ret; +} +EXPORT_SYMBOL_GPL(tb_register_property_dir); + +/** + * tb_unregister_property_dir() - Removes property directory from host + * @key: Key (name) of the directory + * @dir: Directory to remove + * + * This will remove the existing directory from this host and notify the + * connected hosts about the change. + */ +void tb_unregister_property_dir(const char *key, struct tb_property_dir *dir) +{ + int ret = 0; + + mutex_lock(&xdomain_lock); + if (remove_directory(key, dir)) + ret = rebuild_property_block(); + mutex_unlock(&xdomain_lock); + + if (!ret) + update_all_xdomains(); +} +EXPORT_SYMBOL_GPL(tb_unregister_property_dir); + +int tb_xdomain_init(void) +{ + int ret; + + xdomain_property_dir = tb_property_create_dir(NULL); + if (!xdomain_property_dir) + return -ENOMEM; + + /* + * Initialize standard set of properties without any service + * directories. Those will be added by service drivers + * themselves when they are loaded. + */ + tb_property_add_immediate(xdomain_property_dir, "vendorid", + PCI_VENDOR_ID_INTEL); + tb_property_add_text(xdomain_property_dir, "vendorid", "Intel Corp."); + tb_property_add_immediate(xdomain_property_dir, "deviceid", 0x1); + tb_property_add_text(xdomain_property_dir, "deviceid", + utsname()->nodename); + tb_property_add_immediate(xdomain_property_dir, "devicerv", 0x80000100); + + ret = rebuild_property_block(); + if (ret) { + tb_property_free_dir(xdomain_property_dir); + xdomain_property_dir = NULL; + } + + return ret; +} + +void tb_xdomain_exit(void) +{ + kfree(xdomain_property_block); + tb_property_free_dir(xdomain_property_dir); +} diff --git a/include/linux/mod_devicetable.h b/include/linux/mod_devicetable.h index 694cebb50f72..7625c3b81f84 100644 --- a/include/linux/mod_devicetable.h +++ b/include/linux/mod_devicetable.h @@ -683,5 +683,31 @@ struct fsl_mc_device_id { const char obj_type[16]; }; +/** + * struct tb_service_id - Thunderbolt service identifiers + * @match_flags: Flags used to match the structure + * @protocol_key: Protocol key the service supports + * @protocol_id: Protocol id the service supports + * @protocol_version: Version of the protocol + * @protocol_revision: Revision of the protocol software + * @driver_data: Driver specific data + * + * Thunderbolt XDomain services are exposed as devices where each device + * carries the protocol information the service supports. Thunderbolt + * XDomain service drivers match against that information. + */ +struct tb_service_id { + __u32 match_flags; + char protocol_key[8 + 1]; + __u32 protocol_id; + __u32 protocol_version; + __u32 protocol_revision; + kernel_ulong_t driver_data; +}; + +#define TBSVC_MATCH_PROTOCOL_KEY 0x0001 +#define TBSVC_MATCH_PROTOCOL_ID 0x0002 +#define TBSVC_MATCH_PROTOCOL_VERSION 0x0004 +#define TBSVC_MATCH_PROTOCOL_REVISION 0x0008 #endif /* LINUX_MOD_DEVICETABLE_H */ diff --git a/include/linux/thunderbolt.h b/include/linux/thunderbolt.h index 43b8d1e09341..18c0e3d5e85c 100644 --- a/include/linux/thunderbolt.h +++ b/include/linux/thunderbolt.h @@ -17,6 +17,7 @@ #include #include #include +#include #include enum tb_cfg_pkg_type { @@ -77,6 +78,8 @@ struct tb { }; extern struct bus_type tb_bus_type; +extern struct device_type tb_service_type; +extern struct device_type tb_xdomain_type; #define TB_LINKS_PER_PHY_PORT 2 @@ -155,4 +158,243 @@ struct tb_property *tb_property_get_next(struct tb_property_dir *dir, property; \ property = tb_property_get_next(dir, property)) +int tb_register_property_dir(const char *key, struct tb_property_dir *dir); +void tb_unregister_property_dir(const char *key, struct tb_property_dir *dir); + +/** + * struct tb_xdomain - Cross-domain (XDomain) connection + * @dev: XDomain device + * @tb: Pointer to the domain + * @remote_uuid: UUID of the remote domain (host) + * @local_uuid: Cached local UUID + * @route: Route string the other domain can be reached + * @vendor: Vendor ID of the remote domain + * @device: Device ID of the demote domain + * @lock: Lock to serialize access to the following fields of this structure + * @vendor_name: Name of the vendor (or %NULL if not known) + * @device_name: Name of the device (or %NULL if not known) + * @is_unplugged: The XDomain is unplugged + * @resume: The XDomain is being resumed + * @transmit_path: HopID which the remote end expects us to transmit + * @transmit_ring: Local ring (hop) where outgoing packets are pushed + * @receive_path: HopID which we expect the remote end to transmit + * @receive_ring: Local ring (hop) where incoming packets arrive + * @service_ids: Used to generate IDs for the services + * @properties: Properties exported by the remote domain + * @property_block_gen: Generation of @properties + * @properties_lock: Lock protecting @properties. + * @get_properties_work: Work used to get remote domain properties + * @properties_retries: Number of times left to read properties + * @properties_changed_work: Work used to notify the remote domain that + * our properties have changed + * @properties_changed_retries: Number of times left to send properties + * changed notification + * @link: Root switch link the remote domain is connected (ICM only) + * @depth: Depth in the chain the remote domain is connected (ICM only) + * + * This structure represents connection across two domains (hosts). + * Each XDomain contains zero or more services which are exposed as + * &struct tb_service objects. + * + * Service drivers may access this structure if they need to enumerate + * non-standard properties but they need hold @lock when doing so + * because properties can be changed asynchronously in response to + * changes in the remote domain. + */ +struct tb_xdomain { + struct device dev; + struct tb *tb; + uuid_t *remote_uuid; + const uuid_t *local_uuid; + u64 route; + u16 vendor; + u16 device; + struct mutex lock; + const char *vendor_name; + const char *device_name; + bool is_unplugged; + bool resume; + u16 transmit_path; + u16 transmit_ring; + u16 receive_path; + u16 receive_ring; + struct ida service_ids; + struct tb_property_dir *properties; + u32 property_block_gen; + struct delayed_work get_properties_work; + int properties_retries; + struct delayed_work properties_changed_work; + int properties_changed_retries; + u8 link; + u8 depth; +}; + +int tb_xdomain_enable_paths(struct tb_xdomain *xd, u16 transmit_path, + u16 transmit_ring, u16 receive_path, + u16 receive_ring); +int tb_xdomain_disable_paths(struct tb_xdomain *xd); +struct tb_xdomain *tb_xdomain_find_by_uuid(struct tb *tb, const uuid_t *uuid); + +static inline struct tb_xdomain * +tb_xdomain_find_by_uuid_locked(struct tb *tb, const uuid_t *uuid) +{ + struct tb_xdomain *xd; + + mutex_lock(&tb->lock); + xd = tb_xdomain_find_by_uuid(tb, uuid); + mutex_unlock(&tb->lock); + + return xd; +} + +static inline struct tb_xdomain *tb_xdomain_get(struct tb_xdomain *xd) +{ + if (xd) + get_device(&xd->dev); + return xd; +} + +static inline void tb_xdomain_put(struct tb_xdomain *xd) +{ + if (xd) + put_device(&xd->dev); +} + +static inline bool tb_is_xdomain(const struct device *dev) +{ + return dev->type == &tb_xdomain_type; +} + +static inline struct tb_xdomain *tb_to_xdomain(struct device *dev) +{ + if (tb_is_xdomain(dev)) + return container_of(dev, struct tb_xdomain, dev); + return NULL; +} + +int tb_xdomain_response(struct tb_xdomain *xd, const void *response, + size_t size, enum tb_cfg_pkg_type type); +int tb_xdomain_request(struct tb_xdomain *xd, const void *request, + size_t request_size, enum tb_cfg_pkg_type request_type, + void *response, size_t response_size, + enum tb_cfg_pkg_type response_type, + unsigned int timeout_msec); + +/** + * tb_protocol_handler - Protocol specific handler + * @uuid: XDomain messages with this UUID are dispatched to this handler + * @callback: Callback called with the XDomain message. Returning %1 + * here tells the XDomain core that the message was handled + * by this handler and should not be forwared to other + * handlers. + * @data: Data passed with the callback + * @list: Handlers are linked using this + * + * Thunderbolt services can hook into incoming XDomain requests by + * registering protocol handler. Only limitation is that the XDomain + * discovery protocol UUID cannot be registered since it is handled by + * the core XDomain code. + * + * The @callback must check that the message is really directed to the + * service the driver implements. + */ +struct tb_protocol_handler { + const uuid_t *uuid; + int (*callback)(const void *buf, size_t size, void *data); + void *data; + struct list_head list; +}; + +int tb_register_protocol_handler(struct tb_protocol_handler *handler); +void tb_unregister_protocol_handler(struct tb_protocol_handler *handler); + +/** + * struct tb_service - Thunderbolt service + * @dev: XDomain device + * @id: ID of the service (shown in sysfs) + * @key: Protocol key from the properties directory + * @prtcid: Protocol ID from the properties directory + * @prtcvers: Protocol version from the properties directory + * @prtcrevs: Protocol software revision from the properties directory + * @prtcstns: Protocol settings mask from the properties directory + * + * Each domain exposes set of services it supports as collection of + * properties. For each service there will be one corresponding + * &struct tb_service. Service drivers are bound to these. + */ +struct tb_service { + struct device dev; + int id; + const char *key; + u32 prtcid; + u32 prtcvers; + u32 prtcrevs; + u32 prtcstns; +}; + +static inline struct tb_service *tb_service_get(struct tb_service *svc) +{ + if (svc) + get_device(&svc->dev); + return svc; +} + +static inline void tb_service_put(struct tb_service *svc) +{ + if (svc) + put_device(&svc->dev); +} + +static inline bool tb_is_service(const struct device *dev) +{ + return dev->type == &tb_service_type; +} + +static inline struct tb_service *tb_to_service(struct device *dev) +{ + if (tb_is_service(dev)) + return container_of(dev, struct tb_service, dev); + return NULL; +} + +/** + * tb_service_driver - Thunderbolt service driver + * @driver: Driver structure + * @probe: Called when the driver is probed + * @remove: Called when the driver is removed (optional) + * @shutdown: Called at shutdown time to stop the service (optional) + * @id_table: Table of service identifiers the driver supports + */ +struct tb_service_driver { + struct device_driver driver; + int (*probe)(struct tb_service *svc, const struct tb_service_id *id); + void (*remove)(struct tb_service *svc); + void (*shutdown)(struct tb_service *svc); + const struct tb_service_id *id_table; +}; + +#define TB_SERVICE(key, id) \ + .match_flags = TBSVC_MATCH_PROTOCOL_KEY | \ + TBSVC_MATCH_PROTOCOL_ID, \ + .protocol_key = (key), \ + .protocol_id = (id) + +int tb_register_service_driver(struct tb_service_driver *drv); +void tb_unregister_service_driver(struct tb_service_driver *drv); + +static inline void *tb_service_get_drvdata(const struct tb_service *svc) +{ + return dev_get_drvdata(&svc->dev); +} + +static inline void tb_service_set_drvdata(struct tb_service *svc, void *data) +{ + dev_set_drvdata(&svc->dev, data); +} + +static inline struct tb_xdomain *tb_service_parent(struct tb_service *svc) +{ + return tb_to_xdomain(svc->dev.parent); +} + #endif /* THUNDERBOLT_H_ */ diff --git a/scripts/mod/devicetable-offsets.c b/scripts/mod/devicetable-offsets.c index e4d90e50f6fe..57263f2f8f2f 100644 --- a/scripts/mod/devicetable-offsets.c +++ b/scripts/mod/devicetable-offsets.c @@ -206,5 +206,12 @@ int main(void) DEVID_FIELD(fsl_mc_device_id, vendor); DEVID_FIELD(fsl_mc_device_id, obj_type); + DEVID(tb_service_id); + DEVID_FIELD(tb_service_id, match_flags); + DEVID_FIELD(tb_service_id, protocol_key); + DEVID_FIELD(tb_service_id, protocol_id); + DEVID_FIELD(tb_service_id, protocol_version); + DEVID_FIELD(tb_service_id, protocol_revision); + return 0; } diff --git a/scripts/mod/file2alias.c b/scripts/mod/file2alias.c index 29d6699d5a06..6ef6e63f96fd 100644 --- a/scripts/mod/file2alias.c +++ b/scripts/mod/file2alias.c @@ -1301,6 +1301,31 @@ static int do_fsl_mc_entry(const char *filename, void *symval, } ADD_TO_DEVTABLE("fslmc", fsl_mc_device_id, do_fsl_mc_entry); +/* Looks like: tbsvc:kSpNvNrN */ +static int do_tbsvc_entry(const char *filename, void *symval, char *alias) +{ + DEF_FIELD(symval, tb_service_id, match_flags); + DEF_FIELD_ADDR(symval, tb_service_id, protocol_key); + DEF_FIELD(symval, tb_service_id, protocol_id); + DEF_FIELD(symval, tb_service_id, protocol_version); + DEF_FIELD(symval, tb_service_id, protocol_revision); + + strcpy(alias, "tbsvc:"); + if (match_flags & TBSVC_MATCH_PROTOCOL_KEY) + sprintf(alias + strlen(alias), "k%s", *protocol_key); + else + strcat(alias + strlen(alias), "k*"); + ADD(alias, "p", match_flags & TBSVC_MATCH_PROTOCOL_ID, protocol_id); + ADD(alias, "v", match_flags & TBSVC_MATCH_PROTOCOL_VERSION, + protocol_version); + ADD(alias, "r", match_flags & TBSVC_MATCH_PROTOCOL_REVISION, + protocol_revision); + + add_wildcard(alias); + return 1; +} +ADD_TO_DEVTABLE("tbsvc", tb_service_id, do_tbsvc_entry); + /* Does namelen bytes of name exactly match the symbol? */ static bool sym_is(const char *name, unsigned namelen, const char *symbol) { -- cgit v1.2.3 From 8c6bba10fb9262d7b3a11e86a40621d5b37810a6 Mon Sep 17 00:00:00 2001 From: Mika Westerberg Date: Mon, 2 Oct 2017 13:38:35 +0300 Subject: thunderbolt: Configure interrupt throttling for all interrupts This will keep the interrupt delivery rate reasonable. The value used here (128 us) is a recommendation from the hardware people. This code is based on the work done by Amir Levy and Michael Jamet. Signed-off-by: Michael Jamet Signed-off-by: Mika Westerberg Reviewed-by: Yehezkel Bernat Reviewed-by: Andy Shevchenko Signed-off-by: David S. Miller --- drivers/thunderbolt/nhi.c | 23 ++++++++++++++++++++--- drivers/thunderbolt/nhi_regs.h | 2 ++ 2 files changed, 22 insertions(+), 3 deletions(-) diff --git a/drivers/thunderbolt/nhi.c b/drivers/thunderbolt/nhi.c index 05af126a2435..8a7a3d0133f9 100644 --- a/drivers/thunderbolt/nhi.c +++ b/drivers/thunderbolt/nhi.c @@ -651,6 +651,22 @@ static int nhi_suspend_noirq(struct device *dev) return tb_domain_suspend_noirq(tb); } +static void nhi_enable_int_throttling(struct tb_nhi *nhi) +{ + /* Throttling is specified in 256ns increments */ + u32 throttle = DIV_ROUND_UP(128 * NSEC_PER_USEC, 256); + unsigned int i; + + /* + * Configure interrupt throttling for all vectors even if we + * only use few. + */ + for (i = 0; i < MSIX_MAX_VECS; i++) { + u32 reg = REG_INT_THROTTLING_RATE + i * 4; + iowrite32(throttle, nhi->iobase + reg); + } +} + static int nhi_resume_noirq(struct device *dev) { struct pci_dev *pdev = to_pci_dev(dev); @@ -663,6 +679,8 @@ static int nhi_resume_noirq(struct device *dev) */ if (!pci_device_is_present(pdev)) tb->nhi->going_away = true; + else + nhi_enable_int_throttling(tb->nhi); return tb_domain_resume_noirq(tb); } @@ -717,6 +735,8 @@ static int nhi_init_msi(struct tb_nhi *nhi) /* In case someone left them on. */ nhi_disable_interrupts(nhi); + nhi_enable_int_throttling(nhi); + ida_init(&nhi->msix_ida); /* @@ -796,9 +816,6 @@ static int nhi_probe(struct pci_dev *pdev, const struct pci_device_id *id) pci_set_master(pdev); - /* magic value - clock related? */ - iowrite32(3906250 / 10000, nhi->iobase + 0x38c00); - tb = icm_probe(nhi); if (!tb) tb = tb_probe(nhi); diff --git a/drivers/thunderbolt/nhi_regs.h b/drivers/thunderbolt/nhi_regs.h index 09ed574e92ff..46eff69b19ad 100644 --- a/drivers/thunderbolt/nhi_regs.h +++ b/drivers/thunderbolt/nhi_regs.h @@ -95,6 +95,8 @@ struct ring_desc { #define REG_RING_INTERRUPT_BASE 0x38200 #define RING_INTERRUPT_REG_COUNT(nhi) ((31 + 2 * nhi->hop_count) / 32) +#define REG_INT_THROTTLING_RATE 0x38c00 + /* Interrupt Vector Allocation */ #define REG_INT_VEC_ALLOC_BASE 0x38c40 #define REG_INT_VEC_ALLOC_BITS 4 -- cgit v1.2.3 From 9fb1e654dcf781e71a0ea7c5bdfea3ba85d1d06d Mon Sep 17 00:00:00 2001 From: Mika Westerberg Date: Mon, 2 Oct 2017 13:38:36 +0300 Subject: thunderbolt: Add support for frame mode When high-speed DMA paths are used to transfer arbitrary data over a Thunderbolt link, DMA rings should be in frame mode instead of raw mode. The latter is used by the control channel (ring 0). In frame mode each data frame can hold up to 4kB payload. This patch modifies the DMA ring code to allow configuring a ring to be in frame mode by passing a new flag (RING_FLAG_FRAME) to the ring when it is allocated. In addition there might be need to enable end-to-end (E2E) workaround for the ring to prevent losing Rx frames in certain situations. We add another flag (RING_FLAG_E2E) that can be used for this purpose. This code is based on the work done by Amir Levy and Michael Jamet. Signed-off-by: Michael Jamet Signed-off-by: Mika Westerberg Reviewed-by: Yehezkel Bernat Reviewed-by: Andy Shevchenko Signed-off-by: David S. Miller --- drivers/thunderbolt/ctl.c | 3 +- drivers/thunderbolt/nhi.c | 76 ++++++++++++++++++++++++++---------------- drivers/thunderbolt/nhi.h | 10 +++++- drivers/thunderbolt/nhi_regs.h | 2 ++ 4 files changed, 61 insertions(+), 30 deletions(-) diff --git a/drivers/thunderbolt/ctl.c b/drivers/thunderbolt/ctl.c index 46e393c5fd1d..05400b77dcd7 100644 --- a/drivers/thunderbolt/ctl.c +++ b/drivers/thunderbolt/ctl.c @@ -618,7 +618,8 @@ struct tb_ctl *tb_ctl_alloc(struct tb_nhi *nhi, event_cb cb, void *cb_data) if (!ctl->tx) goto err; - ctl->rx = ring_alloc_rx(nhi, 0, 10, RING_FLAG_NO_SUSPEND); + ctl->rx = ring_alloc_rx(nhi, 0, 10, RING_FLAG_NO_SUSPEND, 0xffff, + 0xffff); if (!ctl->rx) goto err; diff --git a/drivers/thunderbolt/nhi.c b/drivers/thunderbolt/nhi.c index 8a7a3d0133f9..bebcad3d2c1f 100644 --- a/drivers/thunderbolt/nhi.c +++ b/drivers/thunderbolt/nhi.c @@ -21,6 +21,12 @@ #define RING_TYPE(ring) ((ring)->is_tx ? "TX ring" : "RX ring") +/* + * Used to enable end-to-end workaround for missing RX packets. Do not + * use this ring for anything else. + */ +#define RING_E2E_UNUSED_HOPID 2 + /* * Minimal number of vectors when we use MSI-X. Two for control channel * Rx/Tx and the rest four are for cross domain DMA paths. @@ -229,23 +235,6 @@ static void ring_work(struct work_struct *work) frame->eof = ring->descriptors[ring->tail].eof; frame->sof = ring->descriptors[ring->tail].sof; frame->flags = ring->descriptors[ring->tail].flags; - if (frame->sof != 0) - dev_WARN(&ring->nhi->pdev->dev, - "%s %d got unexpected SOF: %#x\n", - RING_TYPE(ring), ring->hop, - frame->sof); - /* - * known flags: - * raw not enabled, interupt not set: 0x2=0010 - * raw enabled: 0xa=1010 - * raw not enabled: 0xb=1011 - * partial frame (>MAX_FRAME_SIZE): 0xe=1110 - */ - if (frame->flags != 0xa) - dev_WARN(&ring->nhi->pdev->dev, - "%s %d got unexpected flags: %#x\n", - RING_TYPE(ring), ring->hop, - frame->flags); } ring->tail = (ring->tail + 1) % ring->size; } @@ -321,12 +310,17 @@ static void ring_release_msix(struct tb_ring *ring) } static struct tb_ring *ring_alloc(struct tb_nhi *nhi, u32 hop, int size, - bool transmit, unsigned int flags) + bool transmit, unsigned int flags, + u16 sof_mask, u16 eof_mask) { struct tb_ring *ring = NULL; dev_info(&nhi->pdev->dev, "allocating %s ring %d of size %d\n", transmit ? "TX" : "RX", hop, size); + /* Tx Ring 2 is reserved for E2E workaround */ + if (transmit && hop == RING_E2E_UNUSED_HOPID) + return NULL; + mutex_lock(&nhi->lock); if (hop >= nhi->hop_count) { dev_WARN(&nhi->pdev->dev, "invalid hop: %d\n", hop); @@ -353,6 +347,8 @@ static struct tb_ring *ring_alloc(struct tb_nhi *nhi, u32 hop, int size, ring->is_tx = transmit; ring->size = size; ring->flags = flags; + ring->sof_mask = sof_mask; + ring->eof_mask = eof_mask; ring->head = 0; ring->tail = 0; ring->running = false; @@ -384,13 +380,13 @@ err: struct tb_ring *ring_alloc_tx(struct tb_nhi *nhi, int hop, int size, unsigned int flags) { - return ring_alloc(nhi, hop, size, true, flags); + return ring_alloc(nhi, hop, size, true, flags, 0, 0); } struct tb_ring *ring_alloc_rx(struct tb_nhi *nhi, int hop, int size, - unsigned int flags) + unsigned int flags, u16 sof_mask, u16 eof_mask) { - return ring_alloc(nhi, hop, size, false, flags); + return ring_alloc(nhi, hop, size, false, flags, sof_mask, eof_mask); } /** @@ -400,6 +396,9 @@ struct tb_ring *ring_alloc_rx(struct tb_nhi *nhi, int hop, int size, */ void ring_start(struct tb_ring *ring) { + u16 frame_size; + u32 flags; + mutex_lock(&ring->nhi->lock); mutex_lock(&ring->lock); if (ring->nhi->going_away) @@ -411,18 +410,39 @@ void ring_start(struct tb_ring *ring) dev_info(&ring->nhi->pdev->dev, "starting %s %d\n", RING_TYPE(ring), ring->hop); + if (ring->flags & RING_FLAG_FRAME) { + /* Means 4096 */ + frame_size = 0; + flags = RING_FLAG_ENABLE; + } else { + frame_size = TB_FRAME_SIZE; + flags = RING_FLAG_ENABLE | RING_FLAG_RAW; + } + + if (ring->flags & RING_FLAG_E2E && !ring->is_tx) { + u32 hop; + + /* + * In order not to lose Rx packets we enable end-to-end + * workaround which transfers Rx credits to an unused Tx + * HopID. + */ + hop = RING_E2E_UNUSED_HOPID << REG_RX_OPTIONS_E2E_HOP_SHIFT; + hop &= REG_RX_OPTIONS_E2E_HOP_MASK; + flags |= hop | RING_FLAG_E2E_FLOW_CONTROL; + } + ring_iowrite64desc(ring, ring->descriptors_dma, 0); if (ring->is_tx) { ring_iowrite32desc(ring, ring->size, 12); ring_iowrite32options(ring, 0, 4); /* time releated ? */ - ring_iowrite32options(ring, - RING_FLAG_ENABLE | RING_FLAG_RAW, 0); + ring_iowrite32options(ring, flags, 0); } else { - ring_iowrite32desc(ring, - (TB_FRAME_SIZE << 16) | ring->size, 12); - ring_iowrite32options(ring, 0xffffffff, 4); /* SOF EOF mask */ - ring_iowrite32options(ring, - RING_FLAG_ENABLE | RING_FLAG_RAW, 0); + u32 sof_eof_mask = ring->sof_mask << 16 | ring->eof_mask; + + ring_iowrite32desc(ring, (frame_size << 16) | ring->size, 12); + ring_iowrite32options(ring, sof_eof_mask, 4); + ring_iowrite32options(ring, flags, 0); } ring_interrupt_active(ring, true); ring->running = true; diff --git a/drivers/thunderbolt/nhi.h b/drivers/thunderbolt/nhi.h index 0e05828983db..4503ddbeccb3 100644 --- a/drivers/thunderbolt/nhi.h +++ b/drivers/thunderbolt/nhi.h @@ -56,6 +56,8 @@ struct tb_nhi { * @irq: MSI-X irq number if the ring uses MSI-X. %0 otherwise. * @vector: MSI-X vector number the ring uses (only set if @irq is > 0) * @flags: Ring specific flags + * @sof_mask: Bit mask used to detect start of frame PDF + * @eof_mask: Bit mask used to detect end of frame PDF */ struct tb_ring { struct mutex lock; @@ -74,10 +76,16 @@ struct tb_ring { int irq; u8 vector; unsigned int flags; + u16 sof_mask; + u16 eof_mask; }; /* Leave ring interrupt enabled on suspend */ #define RING_FLAG_NO_SUSPEND BIT(0) +/* Configure the ring to be in frame mode */ +#define RING_FLAG_FRAME BIT(1) +/* Enable end-to-end flow control */ +#define RING_FLAG_E2E BIT(2) struct ring_frame; typedef void (*ring_cb)(struct tb_ring*, struct ring_frame*, bool canceled); @@ -100,7 +108,7 @@ struct ring_frame { struct tb_ring *ring_alloc_tx(struct tb_nhi *nhi, int hop, int size, unsigned int flags); struct tb_ring *ring_alloc_rx(struct tb_nhi *nhi, int hop, int size, - unsigned int flags); + unsigned int flags, u16 sof_mask, u16 eof_mask); void ring_start(struct tb_ring *ring); void ring_stop(struct tb_ring *ring); void ring_free(struct tb_ring *ring); diff --git a/drivers/thunderbolt/nhi_regs.h b/drivers/thunderbolt/nhi_regs.h index 46eff69b19ad..491a4c0c18fc 100644 --- a/drivers/thunderbolt/nhi_regs.h +++ b/drivers/thunderbolt/nhi_regs.h @@ -77,6 +77,8 @@ struct ring_desc { * ..: unknown */ #define REG_RX_OPTIONS_BASE 0x29800 +#define REG_RX_OPTIONS_E2E_HOP_MASK GENMASK(22, 12) +#define REG_RX_OPTIONS_E2E_HOP_SHIFT 12 /* * three bitfields: tx, rx, rx overflow -- cgit v1.2.3 From 3b3d9f4da96493e4f68d0a80ab210763a24f8b33 Mon Sep 17 00:00:00 2001 From: Mika Westerberg Date: Mon, 2 Oct 2017 13:38:37 +0300 Subject: thunderbolt: Export ring handling functions to modules These are used by Thunderbolt services to send and receive frames over the high-speed DMA rings. We also put the functions to tb_ namespace to make sure we do not collide with others and add missing kernel-doc comments for the exported functions. Signed-off-by: Mika Westerberg Reviewed-by: Michael Jamet Reviewed-by: Yehezkel Bernat Reviewed-by: Andy Shevchenko Signed-off-by: David S. Miller --- drivers/thunderbolt/ctl.c | 20 +++--- drivers/thunderbolt/nhi.c | 62 +++++++++++------ drivers/thunderbolt/nhi.h | 147 +---------------------------------------- include/linux/thunderbolt.h | 158 ++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 211 insertions(+), 176 deletions(-) diff --git a/drivers/thunderbolt/ctl.c b/drivers/thunderbolt/ctl.c index 05400b77dcd7..dd10789e1dbb 100644 --- a/drivers/thunderbolt/ctl.c +++ b/drivers/thunderbolt/ctl.c @@ -359,7 +359,7 @@ static int tb_ctl_tx(struct tb_ctl *ctl, const void *data, size_t len, cpu_to_be32_array(pkg->buffer, data, len / 4); *(__be32 *) (pkg->buffer + len) = tb_crc(pkg->buffer, len); - res = ring_tx(ctl->tx, &pkg->frame); + res = tb_ring_tx(ctl->tx, &pkg->frame); if (res) /* ring is stopped */ tb_ctl_pkg_free(pkg); return res; @@ -376,7 +376,7 @@ static bool tb_ctl_handle_event(struct tb_ctl *ctl, enum tb_cfg_pkg_type type, static void tb_ctl_rx_submit(struct ctl_pkg *pkg) { - ring_rx(pkg->ctl->rx, &pkg->frame); /* + tb_ring_rx(pkg->ctl->rx, &pkg->frame); /* * We ignore failures during stop. * All rx packets are referenced * from ctl->rx_packets, so we do @@ -614,11 +614,11 @@ struct tb_ctl *tb_ctl_alloc(struct tb_nhi *nhi, event_cb cb, void *cb_data) if (!ctl->frame_pool) goto err; - ctl->tx = ring_alloc_tx(nhi, 0, 10, RING_FLAG_NO_SUSPEND); + ctl->tx = tb_ring_alloc_tx(nhi, 0, 10, RING_FLAG_NO_SUSPEND); if (!ctl->tx) goto err; - ctl->rx = ring_alloc_rx(nhi, 0, 10, RING_FLAG_NO_SUSPEND, 0xffff, + ctl->rx = tb_ring_alloc_rx(nhi, 0, 10, RING_FLAG_NO_SUSPEND, 0xffff, 0xffff); if (!ctl->rx) goto err; @@ -652,9 +652,9 @@ void tb_ctl_free(struct tb_ctl *ctl) return; if (ctl->rx) - ring_free(ctl->rx); + tb_ring_free(ctl->rx); if (ctl->tx) - ring_free(ctl->tx); + tb_ring_free(ctl->tx); /* free RX packets */ for (i = 0; i < TB_CTL_RX_PKG_COUNT; i++) @@ -673,8 +673,8 @@ void tb_ctl_start(struct tb_ctl *ctl) { int i; tb_ctl_info(ctl, "control channel starting...\n"); - ring_start(ctl->tx); /* is used to ack hotplug packets, start first */ - ring_start(ctl->rx); + tb_ring_start(ctl->tx); /* is used to ack hotplug packets, start first */ + tb_ring_start(ctl->rx); for (i = 0; i < TB_CTL_RX_PKG_COUNT; i++) tb_ctl_rx_submit(ctl->rx_packets[i]); @@ -695,8 +695,8 @@ void tb_ctl_stop(struct tb_ctl *ctl) ctl->running = false; mutex_unlock(&ctl->request_queue_lock); - ring_stop(ctl->rx); - ring_stop(ctl->tx); + tb_ring_stop(ctl->rx); + tb_ring_stop(ctl->tx); if (!list_empty(&ctl->request_queue)) tb_ctl_WARN(ctl, "dangling request in request_queue\n"); diff --git a/drivers/thunderbolt/nhi.c b/drivers/thunderbolt/nhi.c index bebcad3d2c1f..e0a47f7581cb 100644 --- a/drivers/thunderbolt/nhi.c +++ b/drivers/thunderbolt/nhi.c @@ -253,7 +253,7 @@ invoke_callback: } } -int __ring_enqueue(struct tb_ring *ring, struct ring_frame *frame) +int __tb_ring_enqueue(struct tb_ring *ring, struct ring_frame *frame) { int ret = 0; mutex_lock(&ring->lock); @@ -266,6 +266,7 @@ int __ring_enqueue(struct tb_ring *ring, struct ring_frame *frame) mutex_unlock(&ring->lock); return ret; } +EXPORT_SYMBOL_GPL(__tb_ring_enqueue); static irqreturn_t ring_msix(int irq, void *data) { @@ -309,9 +310,9 @@ static void ring_release_msix(struct tb_ring *ring) ring->irq = 0; } -static struct tb_ring *ring_alloc(struct tb_nhi *nhi, u32 hop, int size, - bool transmit, unsigned int flags, - u16 sof_mask, u16 eof_mask) +static struct tb_ring *tb_ring_alloc(struct tb_nhi *nhi, u32 hop, int size, + bool transmit, unsigned int flags, + u16 sof_mask, u16 eof_mask) { struct tb_ring *ring = NULL; dev_info(&nhi->pdev->dev, "allocating %s ring %d of size %d\n", @@ -377,24 +378,42 @@ err: return NULL; } -struct tb_ring *ring_alloc_tx(struct tb_nhi *nhi, int hop, int size, - unsigned int flags) +/** + * tb_ring_alloc_tx() - Allocate DMA ring for transmit + * @nhi: Pointer to the NHI the ring is to be allocated + * @hop: HopID (ring) to allocate + * @size: Number of entries in the ring + * @flags: Flags for the ring + */ +struct tb_ring *tb_ring_alloc_tx(struct tb_nhi *nhi, int hop, int size, + unsigned int flags) { - return ring_alloc(nhi, hop, size, true, flags, 0, 0); + return tb_ring_alloc(nhi, hop, size, true, flags, 0, 0); } +EXPORT_SYMBOL_GPL(tb_ring_alloc_tx); -struct tb_ring *ring_alloc_rx(struct tb_nhi *nhi, int hop, int size, - unsigned int flags, u16 sof_mask, u16 eof_mask) +/** + * tb_ring_alloc_rx() - Allocate DMA ring for receive + * @nhi: Pointer to the NHI the ring is to be allocated + * @hop: HopID (ring) to allocate + * @size: Number of entries in the ring + * @flags: Flags for the ring + * @sof_mask: Mask of PDF values that start a frame + * @eof_mask: Mask of PDF values that end a frame + */ +struct tb_ring *tb_ring_alloc_rx(struct tb_nhi *nhi, int hop, int size, + unsigned int flags, u16 sof_mask, u16 eof_mask) { - return ring_alloc(nhi, hop, size, false, flags, sof_mask, eof_mask); + return tb_ring_alloc(nhi, hop, size, false, flags, sof_mask, eof_mask); } +EXPORT_SYMBOL_GPL(tb_ring_alloc_rx); /** - * ring_start() - enable a ring + * tb_ring_start() - enable a ring * - * Must not be invoked in parallel with ring_stop(). + * Must not be invoked in parallel with tb_ring_stop(). */ -void ring_start(struct tb_ring *ring) +void tb_ring_start(struct tb_ring *ring) { u16 frame_size; u32 flags; @@ -450,21 +469,22 @@ err: mutex_unlock(&ring->lock); mutex_unlock(&ring->nhi->lock); } - +EXPORT_SYMBOL_GPL(tb_ring_start); /** - * ring_stop() - shutdown a ring + * tb_ring_stop() - shutdown a ring * * Must not be invoked from a callback. * - * This method will disable the ring. Further calls to ring_tx/ring_rx will - * return -ESHUTDOWN until ring_stop has been called. + * This method will disable the ring. Further calls to + * tb_ring_tx/tb_ring_rx will return -ESHUTDOWN until ring_stop has been + * called. * * All enqueued frames will be canceled and their callbacks will be executed * with frame->canceled set to true (on the callback thread). This method * returns only after all callback invocations have finished. */ -void ring_stop(struct tb_ring *ring) +void tb_ring_stop(struct tb_ring *ring) { mutex_lock(&ring->nhi->lock); mutex_lock(&ring->lock); @@ -497,9 +517,10 @@ err: schedule_work(&ring->work); flush_work(&ring->work); } +EXPORT_SYMBOL_GPL(tb_ring_stop); /* - * ring_free() - free ring + * tb_ring_free() - free ring * * When this method returns all invocations of ring->callback will have * finished. @@ -508,7 +529,7 @@ err: * * Must NOT be called from ring_frame->callback! */ -void ring_free(struct tb_ring *ring) +void tb_ring_free(struct tb_ring *ring) { mutex_lock(&ring->nhi->lock); /* @@ -550,6 +571,7 @@ void ring_free(struct tb_ring *ring) mutex_destroy(&ring->lock); kfree(ring); } +EXPORT_SYMBOL_GPL(tb_ring_free); /** * nhi_mailbox_cmd() - Send a command through NHI mailbox diff --git a/drivers/thunderbolt/nhi.h b/drivers/thunderbolt/nhi.h index 4503ddbeccb3..771d09ca5dc5 100644 --- a/drivers/thunderbolt/nhi.h +++ b/drivers/thunderbolt/nhi.h @@ -7,152 +7,7 @@ #ifndef DSL3510_H_ #define DSL3510_H_ -#include -#include -#include - -/** - * struct tb_nhi - thunderbolt native host interface - * @lock: Must be held during ring creation/destruction. Is acquired by - * interrupt_work when dispatching interrupts to individual rings. - * @pdev: Pointer to the PCI device - * @iobase: MMIO space of the NHI - * @tx_rings: All Tx rings available on this host controller - * @rx_rings: All Rx rings available on this host controller - * @msix_ida: Used to allocate MSI-X vectors for rings - * @going_away: The host controller device is about to disappear so when - * this flag is set, avoid touching the hardware anymore. - * @interrupt_work: Work scheduled to handle ring interrupt when no - * MSI-X is used. - * @hop_count: Number of rings (end point hops) supported by NHI. - */ -struct tb_nhi { - struct mutex lock; - struct pci_dev *pdev; - void __iomem *iobase; - struct tb_ring **tx_rings; - struct tb_ring **rx_rings; - struct ida msix_ida; - bool going_away; - struct work_struct interrupt_work; - u32 hop_count; -}; - -/** - * struct tb_ring - thunderbolt TX or RX ring associated with a NHI - * @lock: Lock serializing actions to this ring. Must be acquired after - * nhi->lock. - * @nhi: Pointer to the native host controller interface - * @size: Size of the ring - * @hop: Hop (DMA channel) associated with this ring - * @head: Head of the ring (write next descriptor here) - * @tail: Tail of the ring (complete next descriptor here) - * @descriptors: Allocated descriptors for this ring - * @queue: Queue holding frames to be transferred over this ring - * @in_flight: Queue holding frames that are currently in flight - * @work: Interrupt work structure - * @is_tx: Is the ring Tx or Rx - * @running: Is the ring running - * @irq: MSI-X irq number if the ring uses MSI-X. %0 otherwise. - * @vector: MSI-X vector number the ring uses (only set if @irq is > 0) - * @flags: Ring specific flags - * @sof_mask: Bit mask used to detect start of frame PDF - * @eof_mask: Bit mask used to detect end of frame PDF - */ -struct tb_ring { - struct mutex lock; - struct tb_nhi *nhi; - int size; - int hop; - int head; - int tail; - struct ring_desc *descriptors; - dma_addr_t descriptors_dma; - struct list_head queue; - struct list_head in_flight; - struct work_struct work; - bool is_tx:1; - bool running:1; - int irq; - u8 vector; - unsigned int flags; - u16 sof_mask; - u16 eof_mask; -}; - -/* Leave ring interrupt enabled on suspend */ -#define RING_FLAG_NO_SUSPEND BIT(0) -/* Configure the ring to be in frame mode */ -#define RING_FLAG_FRAME BIT(1) -/* Enable end-to-end flow control */ -#define RING_FLAG_E2E BIT(2) - -struct ring_frame; -typedef void (*ring_cb)(struct tb_ring*, struct ring_frame*, bool canceled); - -/** - * struct ring_frame - for use with ring_rx/ring_tx - */ -struct ring_frame { - dma_addr_t buffer_phy; - ring_cb callback; - struct list_head list; - u32 size:12; /* TX: in, RX: out*/ - u32 flags:12; /* RX: out */ - u32 eof:4; /* TX:in, RX: out */ - u32 sof:4; /* TX:in, RX: out */ -}; - -#define TB_FRAME_SIZE 0x100 /* minimum size for ring_rx */ - -struct tb_ring *ring_alloc_tx(struct tb_nhi *nhi, int hop, int size, - unsigned int flags); -struct tb_ring *ring_alloc_rx(struct tb_nhi *nhi, int hop, int size, - unsigned int flags, u16 sof_mask, u16 eof_mask); -void ring_start(struct tb_ring *ring); -void ring_stop(struct tb_ring *ring); -void ring_free(struct tb_ring *ring); - -int __ring_enqueue(struct tb_ring *ring, struct ring_frame *frame); - -/** - * ring_rx() - enqueue a frame on an RX ring - * - * frame->buffer, frame->buffer_phy and frame->callback have to be set. The - * buffer must contain at least TB_FRAME_SIZE bytes. - * - * frame->callback will be invoked with frame->size, frame->flags, frame->eof, - * frame->sof set once the frame has been received. - * - * If ring_stop is called after the packet has been enqueued frame->callback - * will be called with canceled set to true. - * - * Return: Returns ESHUTDOWN if ring_stop has been called. Zero otherwise. - */ -static inline int ring_rx(struct tb_ring *ring, struct ring_frame *frame) -{ - WARN_ON(ring->is_tx); - return __ring_enqueue(ring, frame); -} - -/** - * ring_tx() - enqueue a frame on an TX ring - * - * frame->buffer, frame->buffer_phy, frame->callback, frame->size, frame->eof - * and frame->sof have to be set. - * - * frame->callback will be invoked with once the frame has been transmitted. - * - * If ring_stop is called after the packet has been enqueued frame->callback - * will be called with canceled set to true. - * - * Return: Returns ESHUTDOWN if ring_stop has been called. Zero otherwise. - */ -static inline int ring_tx(struct tb_ring *ring, struct ring_frame *frame) -{ - WARN_ON(!ring->is_tx); - return __ring_enqueue(ring, frame); -} +#include enum nhi_fw_mode { NHI_FW_SAFE_MODE, diff --git a/include/linux/thunderbolt.h b/include/linux/thunderbolt.h index 18c0e3d5e85c..9ddb83ad890f 100644 --- a/include/linux/thunderbolt.h +++ b/include/linux/thunderbolt.h @@ -15,10 +15,12 @@ #define THUNDERBOLT_H_ #include +#include #include #include #include #include +#include enum tb_cfg_pkg_type { TB_CFG_PKG_READ = 1, @@ -397,4 +399,160 @@ static inline struct tb_xdomain *tb_service_parent(struct tb_service *svc) return tb_to_xdomain(svc->dev.parent); } +/** + * struct tb_nhi - thunderbolt native host interface + * @lock: Must be held during ring creation/destruction. Is acquired by + * interrupt_work when dispatching interrupts to individual rings. + * @pdev: Pointer to the PCI device + * @iobase: MMIO space of the NHI + * @tx_rings: All Tx rings available on this host controller + * @rx_rings: All Rx rings available on this host controller + * @msix_ida: Used to allocate MSI-X vectors for rings + * @going_away: The host controller device is about to disappear so when + * this flag is set, avoid touching the hardware anymore. + * @interrupt_work: Work scheduled to handle ring interrupt when no + * MSI-X is used. + * @hop_count: Number of rings (end point hops) supported by NHI. + */ +struct tb_nhi { + struct mutex lock; + struct pci_dev *pdev; + void __iomem *iobase; + struct tb_ring **tx_rings; + struct tb_ring **rx_rings; + struct ida msix_ida; + bool going_away; + struct work_struct interrupt_work; + u32 hop_count; +}; + +/** + * struct tb_ring - thunderbolt TX or RX ring associated with a NHI + * @lock: Lock serializing actions to this ring. Must be acquired after + * nhi->lock. + * @nhi: Pointer to the native host controller interface + * @size: Size of the ring + * @hop: Hop (DMA channel) associated with this ring + * @head: Head of the ring (write next descriptor here) + * @tail: Tail of the ring (complete next descriptor here) + * @descriptors: Allocated descriptors for this ring + * @queue: Queue holding frames to be transferred over this ring + * @in_flight: Queue holding frames that are currently in flight + * @work: Interrupt work structure + * @is_tx: Is the ring Tx or Rx + * @running: Is the ring running + * @irq: MSI-X irq number if the ring uses MSI-X. %0 otherwise. + * @vector: MSI-X vector number the ring uses (only set if @irq is > 0) + * @flags: Ring specific flags + * @sof_mask: Bit mask used to detect start of frame PDF + * @eof_mask: Bit mask used to detect end of frame PDF + */ +struct tb_ring { + struct mutex lock; + struct tb_nhi *nhi; + int size; + int hop; + int head; + int tail; + struct ring_desc *descriptors; + dma_addr_t descriptors_dma; + struct list_head queue; + struct list_head in_flight; + struct work_struct work; + bool is_tx:1; + bool running:1; + int irq; + u8 vector; + unsigned int flags; + u16 sof_mask; + u16 eof_mask; +}; + +/* Leave ring interrupt enabled on suspend */ +#define RING_FLAG_NO_SUSPEND BIT(0) +/* Configure the ring to be in frame mode */ +#define RING_FLAG_FRAME BIT(1) +/* Enable end-to-end flow control */ +#define RING_FLAG_E2E BIT(2) + +struct ring_frame; +typedef void (*ring_cb)(struct tb_ring *, struct ring_frame *, bool canceled); + +/** + * struct ring_frame - For use with ring_rx/ring_tx + * @buffer_phy: DMA mapped address of the frame + * @callback: Callback called when the frame is finished + * @list: Frame is linked to a queue using this + * @size: Size of the frame in bytes (%0 means %4096) + * @flags: Flags for the frame (see &enum ring_desc_flags) + * @eof: End of frame protocol defined field + * @sof: Start of frame protocol defined field + */ +struct ring_frame { + dma_addr_t buffer_phy; + ring_cb callback; + struct list_head list; + u32 size:12; + u32 flags:12; + u32 eof:4; + u32 sof:4; +}; + +/* Minimum size for ring_rx */ +#define TB_FRAME_SIZE 0x100 + +struct tb_ring *tb_ring_alloc_tx(struct tb_nhi *nhi, int hop, int size, + unsigned int flags); +struct tb_ring *tb_ring_alloc_rx(struct tb_nhi *nhi, int hop, int size, + unsigned int flags, u16 sof_mask, + u16 eof_mask); +void tb_ring_start(struct tb_ring *ring); +void tb_ring_stop(struct tb_ring *ring); +void tb_ring_free(struct tb_ring *ring); + +int __tb_ring_enqueue(struct tb_ring *ring, struct ring_frame *frame); + +/** + * tb_ring_rx() - enqueue a frame on an RX ring + * @ring: Ring to enqueue the frame + * @frame: Frame to enqueue + * + * @frame->buffer, @frame->buffer_phy and @frame->callback have to be set. The + * buffer must contain at least %TB_FRAME_SIZE bytes. + * + * @frame->callback will be invoked with @frame->size, @frame->flags, + * @frame->eof, @frame->sof set once the frame has been received. + * + * If ring_stop() is called after the packet has been enqueued + * @frame->callback will be called with canceled set to true. + * + * Return: Returns %-ESHUTDOWN if ring_stop has been called. Zero otherwise. + */ +static inline int tb_ring_rx(struct tb_ring *ring, struct ring_frame *frame) +{ + WARN_ON(ring->is_tx); + return __tb_ring_enqueue(ring, frame); +} + +/** + * tb_ring_tx() - enqueue a frame on an TX ring + * @ring: Ring the enqueue the frame + * @frame: Frame to enqueue + * + * @frame->buffer, @frame->buffer_phy, @frame->callback, @frame->size, + * @frame->eof and @frame->sof have to be set. + * + * @frame->callback will be invoked with once the frame has been transmitted. + * + * If ring_stop() is called after the packet has been enqueued @frame->callback + * will be called with canceled set to true. + * + * Return: Returns %-ESHUTDOWN if ring_stop has been called. Zero otherwise. + */ +static inline int tb_ring_tx(struct tb_ring *ring, struct ring_frame *frame) +{ + WARN_ON(!ring->is_tx); + return __tb_ring_enqueue(ring, frame); +} + #endif /* THUNDERBOLT_H_ */ -- cgit v1.2.3 From 2a91ec63f8a11e70d4b958dd4df867fec0247179 Mon Sep 17 00:00:00 2001 From: Mika Westerberg Date: Mon, 2 Oct 2017 13:38:38 +0300 Subject: thunderbolt: Move ring descriptor flags to thunderbolt.h A Thunderbolt service driver might need to check if there was an error with the descriptor when in frame mode. We also add two Rx specific error flags RING_DESC_CRC_ERROR and RING_DESC_BUFFER_OVERRUN. Signed-off-by: Mika Westerberg Reviewed-by: Michael Jamet Reviewed-by: Yehezkel Bernat Reviewed-by: Andy Shevchenko Signed-off-by: David S. Miller --- drivers/thunderbolt/nhi_regs.h | 7 ------- include/linux/thunderbolt.h | 18 ++++++++++++++++++ 2 files changed, 18 insertions(+), 7 deletions(-) diff --git a/drivers/thunderbolt/nhi_regs.h b/drivers/thunderbolt/nhi_regs.h index 491a4c0c18fc..5ed6934e31e7 100644 --- a/drivers/thunderbolt/nhi_regs.h +++ b/drivers/thunderbolt/nhi_regs.h @@ -17,13 +17,6 @@ enum ring_flags { RING_FLAG_ENABLE = 1 << 31, }; -enum ring_desc_flags { - RING_DESC_ISOCH = 0x1, /* TX only? */ - RING_DESC_COMPLETED = 0x2, /* set by NHI */ - RING_DESC_POSTED = 0x4, /* always set this */ - RING_DESC_INTERRUPT = 0x8, /* request an interrupt on completion */ -}; - /** * struct ring_desc - TX/RX ring entry * diff --git a/include/linux/thunderbolt.h b/include/linux/thunderbolt.h index 9ddb83ad890f..e3b9af7be0ad 100644 --- a/include/linux/thunderbolt.h +++ b/include/linux/thunderbolt.h @@ -478,6 +478,24 @@ struct tb_ring { struct ring_frame; typedef void (*ring_cb)(struct tb_ring *, struct ring_frame *, bool canceled); +/** + * enum ring_desc_flags - Flags for DMA ring descriptor + * %RING_DESC_ISOCH: Enable isonchronous DMA (Tx only) + * %RING_DESC_CRC_ERROR: In frame mode CRC check failed for the frame (Rx only) + * %RING_DESC_COMPLETED: Descriptor completed (set by NHI) + * %RING_DESC_POSTED: Always set this + * %RING_DESC_BUFFER_OVERRUN: RX buffer overrun + * %RING_DESC_INTERRUPT: Request an interrupt on completion + */ +enum ring_desc_flags { + RING_DESC_ISOCH = 0x1, + RING_DESC_CRC_ERROR = 0x1, + RING_DESC_COMPLETED = 0x2, + RING_DESC_POSTED = 0x4, + RING_DESC_BUFFER_OVERRUN = 0x04, + RING_DESC_INTERRUPT = 0x8, +}; + /** * struct ring_frame - For use with ring_rx/ring_tx * @buffer_phy: DMA mapped address of the frame -- cgit v1.2.3 From 22b7de1000e66d739c431d6be4e7e97c69fa7c98 Mon Sep 17 00:00:00 2001 From: Mika Westerberg Date: Mon, 2 Oct 2017 13:38:39 +0300 Subject: thunderbolt: Use spinlock in ring serialization This makes it possible to enqueue frames also from atomic context which is needed for example, when networking packets are sent over a Thunderbolt cable. Signed-off-by: Mika Westerberg Reviewed-by: Michael Jamet Reviewed-by: Yehezkel Bernat Reviewed-by: Andy Shevchenko Signed-off-by: David S. Miller --- drivers/thunderbolt/nhi.c | 26 ++++++++++++++------------ include/linux/thunderbolt.h | 2 +- 2 files changed, 15 insertions(+), 13 deletions(-) diff --git a/drivers/thunderbolt/nhi.c b/drivers/thunderbolt/nhi.c index e0a47f7581cb..7d1891ec3c47 100644 --- a/drivers/thunderbolt/nhi.c +++ b/drivers/thunderbolt/nhi.c @@ -212,8 +212,10 @@ static void ring_work(struct work_struct *work) struct tb_ring *ring = container_of(work, typeof(*ring), work); struct ring_frame *frame; bool canceled = false; + unsigned long flags; LIST_HEAD(done); - mutex_lock(&ring->lock); + + spin_lock_irqsave(&ring->lock, flags); if (!ring->running) { /* Move all frames to done and mark them as canceled. */ @@ -241,7 +243,8 @@ static void ring_work(struct work_struct *work) ring_write_descriptors(ring); invoke_callback: - mutex_unlock(&ring->lock); /* allow callbacks to schedule new work */ + /* allow callbacks to schedule new work */ + spin_unlock_irqrestore(&ring->lock, flags); while (!list_empty(&done)) { frame = list_first_entry(&done, typeof(*frame), list); /* @@ -255,15 +258,17 @@ invoke_callback: int __tb_ring_enqueue(struct tb_ring *ring, struct ring_frame *frame) { + unsigned long flags; int ret = 0; - mutex_lock(&ring->lock); + + spin_lock_irqsave(&ring->lock, flags); if (ring->running) { list_add_tail(&frame->list, &ring->queue); ring_write_descriptors(ring); } else { ret = -ESHUTDOWN; } - mutex_unlock(&ring->lock); + spin_unlock_irqrestore(&ring->lock, flags); return ret; } EXPORT_SYMBOL_GPL(__tb_ring_enqueue); @@ -338,7 +343,7 @@ static struct tb_ring *tb_ring_alloc(struct tb_nhi *nhi, u32 hop, int size, if (!ring) goto err; - mutex_init(&ring->lock); + spin_lock_init(&ring->lock); INIT_LIST_HEAD(&ring->queue); INIT_LIST_HEAD(&ring->in_flight); INIT_WORK(&ring->work, ring_work); @@ -371,8 +376,6 @@ static struct tb_ring *tb_ring_alloc(struct tb_nhi *nhi, u32 hop, int size, return ring; err: - if (ring) - mutex_destroy(&ring->lock); kfree(ring); mutex_unlock(&nhi->lock); return NULL; @@ -419,7 +422,7 @@ void tb_ring_start(struct tb_ring *ring) u32 flags; mutex_lock(&ring->nhi->lock); - mutex_lock(&ring->lock); + spin_lock_irq(&ring->lock); if (ring->nhi->going_away) goto err; if (ring->running) { @@ -466,7 +469,7 @@ void tb_ring_start(struct tb_ring *ring) ring_interrupt_active(ring, true); ring->running = true; err: - mutex_unlock(&ring->lock); + spin_unlock_irq(&ring->lock); mutex_unlock(&ring->nhi->lock); } EXPORT_SYMBOL_GPL(tb_ring_start); @@ -487,7 +490,7 @@ EXPORT_SYMBOL_GPL(tb_ring_start); void tb_ring_stop(struct tb_ring *ring) { mutex_lock(&ring->nhi->lock); - mutex_lock(&ring->lock); + spin_lock_irq(&ring->lock); dev_info(&ring->nhi->pdev->dev, "stopping %s %d\n", RING_TYPE(ring), ring->hop); if (ring->nhi->going_away) @@ -508,7 +511,7 @@ void tb_ring_stop(struct tb_ring *ring) ring->running = false; err: - mutex_unlock(&ring->lock); + spin_unlock_irq(&ring->lock); mutex_unlock(&ring->nhi->lock); /* @@ -568,7 +571,6 @@ void tb_ring_free(struct tb_ring *ring) * to finish before freeing the ring. */ flush_work(&ring->work); - mutex_destroy(&ring->lock); kfree(ring); } EXPORT_SYMBOL_GPL(tb_ring_free); diff --git a/include/linux/thunderbolt.h b/include/linux/thunderbolt.h index e3b9af7be0ad..cf9e42db780f 100644 --- a/include/linux/thunderbolt.h +++ b/include/linux/thunderbolt.h @@ -448,7 +448,7 @@ struct tb_nhi { * @eof_mask: Bit mask used to detect end of frame PDF */ struct tb_ring { - struct mutex lock; + spinlock_t lock; struct tb_nhi *nhi; int size; int hop; -- cgit v1.2.3 From 59120e06101db72442acf4c8b364a0c76d8faa68 Mon Sep 17 00:00:00 2001 From: Mika Westerberg Date: Mon, 2 Oct 2017 13:38:40 +0300 Subject: thunderbolt: Use spinlock in NHI serialization This is needed because ring polling functionality can be called from atomic contexts when networking and other high-speed traffic is transferred over a Thunderbolt cable. Signed-off-by: Mika Westerberg Reviewed-by: Michael Jamet Reviewed-by: Yehezkel Bernat Reviewed-by: Andy Shevchenko Signed-off-by: David S. Miller --- drivers/thunderbolt/nhi.c | 75 +++++++++++++++++++++++++-------------------- include/linux/thunderbolt.h | 2 +- 2 files changed, 42 insertions(+), 35 deletions(-) diff --git a/drivers/thunderbolt/nhi.c b/drivers/thunderbolt/nhi.c index 7d1891ec3c47..0b3c0640048b 100644 --- a/drivers/thunderbolt/nhi.c +++ b/drivers/thunderbolt/nhi.c @@ -327,21 +327,9 @@ static struct tb_ring *tb_ring_alloc(struct tb_nhi *nhi, u32 hop, int size, if (transmit && hop == RING_E2E_UNUSED_HOPID) return NULL; - mutex_lock(&nhi->lock); - if (hop >= nhi->hop_count) { - dev_WARN(&nhi->pdev->dev, "invalid hop: %d\n", hop); - goto err; - } - if (transmit && nhi->tx_rings[hop]) { - dev_WARN(&nhi->pdev->dev, "TX hop %d already allocated\n", hop); - goto err; - } else if (!transmit && nhi->rx_rings[hop]) { - dev_WARN(&nhi->pdev->dev, "RX hop %d already allocated\n", hop); - goto err; - } ring = kzalloc(sizeof(*ring), GFP_KERNEL); if (!ring) - goto err; + return NULL; spin_lock_init(&ring->lock); INIT_LIST_HEAD(&ring->queue); @@ -359,25 +347,45 @@ static struct tb_ring *tb_ring_alloc(struct tb_nhi *nhi, u32 hop, int size, ring->tail = 0; ring->running = false; - if (ring_request_msix(ring, flags & RING_FLAG_NO_SUSPEND)) - goto err; - ring->descriptors = dma_alloc_coherent(&ring->nhi->pdev->dev, size * sizeof(*ring->descriptors), &ring->descriptors_dma, GFP_KERNEL | __GFP_ZERO); if (!ring->descriptors) - goto err; + goto err_free_ring; + if (ring_request_msix(ring, flags & RING_FLAG_NO_SUSPEND)) + goto err_free_descs; + + spin_lock_irq(&nhi->lock); + if (hop >= nhi->hop_count) { + dev_WARN(&nhi->pdev->dev, "invalid hop: %d\n", hop); + goto err_release_msix; + } + if (transmit && nhi->tx_rings[hop]) { + dev_WARN(&nhi->pdev->dev, "TX hop %d already allocated\n", hop); + goto err_release_msix; + } else if (!transmit && nhi->rx_rings[hop]) { + dev_WARN(&nhi->pdev->dev, "RX hop %d already allocated\n", hop); + goto err_release_msix; + } if (transmit) nhi->tx_rings[hop] = ring; else nhi->rx_rings[hop] = ring; - mutex_unlock(&nhi->lock); + spin_unlock_irq(&nhi->lock); + return ring; -err: +err_release_msix: + spin_unlock_irq(&nhi->lock); + ring_release_msix(ring); +err_free_descs: + dma_free_coherent(&ring->nhi->pdev->dev, + ring->size * sizeof(*ring->descriptors), + ring->descriptors, ring->descriptors_dma); +err_free_ring: kfree(ring); - mutex_unlock(&nhi->lock); + return NULL; } @@ -421,8 +429,8 @@ void tb_ring_start(struct tb_ring *ring) u16 frame_size; u32 flags; - mutex_lock(&ring->nhi->lock); - spin_lock_irq(&ring->lock); + spin_lock_irq(&ring->nhi->lock); + spin_lock(&ring->lock); if (ring->nhi->going_away) goto err; if (ring->running) { @@ -469,8 +477,8 @@ void tb_ring_start(struct tb_ring *ring) ring_interrupt_active(ring, true); ring->running = true; err: - spin_unlock_irq(&ring->lock); - mutex_unlock(&ring->nhi->lock); + spin_unlock(&ring->lock); + spin_unlock_irq(&ring->nhi->lock); } EXPORT_SYMBOL_GPL(tb_ring_start); @@ -489,8 +497,8 @@ EXPORT_SYMBOL_GPL(tb_ring_start); */ void tb_ring_stop(struct tb_ring *ring) { - mutex_lock(&ring->nhi->lock); - spin_lock_irq(&ring->lock); + spin_lock_irq(&ring->nhi->lock); + spin_lock(&ring->lock); dev_info(&ring->nhi->pdev->dev, "stopping %s %d\n", RING_TYPE(ring), ring->hop); if (ring->nhi->going_away) @@ -511,8 +519,8 @@ void tb_ring_stop(struct tb_ring *ring) ring->running = false; err: - spin_unlock_irq(&ring->lock); - mutex_unlock(&ring->nhi->lock); + spin_unlock(&ring->lock); + spin_unlock_irq(&ring->nhi->lock); /* * schedule ring->work to invoke callbacks on all remaining frames. @@ -534,7 +542,7 @@ EXPORT_SYMBOL_GPL(tb_ring_stop); */ void tb_ring_free(struct tb_ring *ring) { - mutex_lock(&ring->nhi->lock); + spin_lock_irq(&ring->nhi->lock); /* * Dissociate the ring from the NHI. This also ensures that * nhi_interrupt_work cannot reschedule ring->work. @@ -564,7 +572,7 @@ void tb_ring_free(struct tb_ring *ring) RING_TYPE(ring), ring->hop); - mutex_unlock(&ring->nhi->lock); + spin_unlock_irq(&ring->nhi->lock); /** * ring->work can no longer be scheduled (it is scheduled only * by nhi_interrupt_work, ring_stop and ring_msix). Wait for it @@ -639,7 +647,7 @@ static void nhi_interrupt_work(struct work_struct *work) int type = 0; /* current interrupt type 0: TX, 1: RX, 2: RX overflow */ struct tb_ring *ring; - mutex_lock(&nhi->lock); + spin_lock_irq(&nhi->lock); /* * Starting at REG_RING_NOTIFY_BASE there are three status bitfields @@ -677,7 +685,7 @@ static void nhi_interrupt_work(struct work_struct *work) /* we do not check ring->running, this is done in ring->work */ schedule_work(&ring->work); } - mutex_unlock(&nhi->lock); + spin_unlock_irq(&nhi->lock); } static irqreturn_t nhi_msi(int irq, void *data) @@ -767,7 +775,6 @@ static void nhi_shutdown(struct tb_nhi *nhi) devm_free_irq(&nhi->pdev->dev, nhi->pdev->irq, nhi); flush_work(&nhi->interrupt_work); } - mutex_destroy(&nhi->lock); ida_destroy(&nhi->msix_ida); } @@ -856,7 +863,7 @@ static int nhi_probe(struct pci_dev *pdev, const struct pci_device_id *id) return res; } - mutex_init(&nhi->lock); + spin_lock_init(&nhi->lock); pci_set_master(pdev); diff --git a/include/linux/thunderbolt.h b/include/linux/thunderbolt.h index cf9e42db780f..d59e3f9a35c4 100644 --- a/include/linux/thunderbolt.h +++ b/include/linux/thunderbolt.h @@ -415,7 +415,7 @@ static inline struct tb_xdomain *tb_service_parent(struct tb_service *svc) * @hop_count: Number of rings (end point hops) supported by NHI. */ struct tb_nhi { - struct mutex lock; + spinlock_t lock; struct pci_dev *pdev; void __iomem *iobase; struct tb_ring **tx_rings; -- cgit v1.2.3 From 4ffe722eefcb07c76701f03e0d759fbaecedf79f Mon Sep 17 00:00:00 2001 From: Mika Westerberg Date: Mon, 2 Oct 2017 13:38:41 +0300 Subject: thunderbolt: Add polling mode for rings In order to support things like networking over Thunderbolt cable, there needs to be a way to switch the ring to a mode where it can be polled with the interrupt masked. We implement such mode so that the caller can allocate a ring by passing pointer to a function that is then called when an interrupt is triggered. Completed frames can be fetched using tb_ring_poll() and the interrupt can be re-enabled when the caller is finished with polling by using tb_ring_poll_complete(). Signed-off-by: Mika Westerberg Reviewed-by: Michael Jamet Reviewed-by: Yehezkel Bernat Reviewed-by: Andy Shevchenko Signed-off-by: David S. Miller --- drivers/thunderbolt/ctl.c | 2 +- drivers/thunderbolt/nhi.c | 126 ++++++++++++++++++++++++++++++++++++++++---- include/linux/thunderbolt.h | 23 +++++--- 3 files changed, 134 insertions(+), 17 deletions(-) diff --git a/drivers/thunderbolt/ctl.c b/drivers/thunderbolt/ctl.c index dd10789e1dbb..d079dbba2c03 100644 --- a/drivers/thunderbolt/ctl.c +++ b/drivers/thunderbolt/ctl.c @@ -619,7 +619,7 @@ struct tb_ctl *tb_ctl_alloc(struct tb_nhi *nhi, event_cb cb, void *cb_data) goto err; ctl->rx = tb_ring_alloc_rx(nhi, 0, 10, RING_FLAG_NO_SUSPEND, 0xffff, - 0xffff); + 0xffff, NULL, NULL); if (!ctl->rx) goto err; diff --git a/drivers/thunderbolt/nhi.c b/drivers/thunderbolt/nhi.c index 0b3c0640048b..af0a80ddf594 100644 --- a/drivers/thunderbolt/nhi.c +++ b/drivers/thunderbolt/nhi.c @@ -252,7 +252,8 @@ invoke_callback: * Do not hold on to it. */ list_del_init(&frame->list); - frame->callback(ring, frame, canceled); + if (frame->callback) + frame->callback(ring, frame, canceled); } } @@ -273,11 +274,106 @@ int __tb_ring_enqueue(struct tb_ring *ring, struct ring_frame *frame) } EXPORT_SYMBOL_GPL(__tb_ring_enqueue); +/** + * tb_ring_poll() - Poll one completed frame from the ring + * @ring: Ring to poll + * + * This function can be called when @start_poll callback of the @ring + * has been called. It will read one completed frame from the ring and + * return it to the caller. Returns %NULL if there is no more completed + * frames. + */ +struct ring_frame *tb_ring_poll(struct tb_ring *ring) +{ + struct ring_frame *frame = NULL; + unsigned long flags; + + spin_lock_irqsave(&ring->lock, flags); + if (!ring->running) + goto unlock; + if (ring_empty(ring)) + goto unlock; + + if (ring->descriptors[ring->tail].flags & RING_DESC_COMPLETED) { + frame = list_first_entry(&ring->in_flight, typeof(*frame), + list); + list_del_init(&frame->list); + + if (!ring->is_tx) { + frame->size = ring->descriptors[ring->tail].length; + frame->eof = ring->descriptors[ring->tail].eof; + frame->sof = ring->descriptors[ring->tail].sof; + frame->flags = ring->descriptors[ring->tail].flags; + } + + ring->tail = (ring->tail + 1) % ring->size; + } + +unlock: + spin_unlock_irqrestore(&ring->lock, flags); + return frame; +} +EXPORT_SYMBOL_GPL(tb_ring_poll); + +static void __ring_interrupt_mask(struct tb_ring *ring, bool mask) +{ + int idx = ring_interrupt_index(ring); + int reg = REG_RING_INTERRUPT_BASE + idx / 32 * 4; + int bit = idx % 32; + u32 val; + + val = ioread32(ring->nhi->iobase + reg); + if (mask) + val &= ~BIT(bit); + else + val |= BIT(bit); + iowrite32(val, ring->nhi->iobase + reg); +} + +/* Both @nhi->lock and @ring->lock should be held */ +static void __ring_interrupt(struct tb_ring *ring) +{ + if (!ring->running) + return; + + if (ring->start_poll) { + __ring_interrupt_mask(ring, false); + ring->start_poll(ring->poll_data); + } else { + schedule_work(&ring->work); + } +} + +/** + * tb_ring_poll_complete() - Re-start interrupt for the ring + * @ring: Ring to re-start the interrupt + * + * This will re-start (unmask) the ring interrupt once the user is done + * with polling. + */ +void tb_ring_poll_complete(struct tb_ring *ring) +{ + unsigned long flags; + + spin_lock_irqsave(&ring->nhi->lock, flags); + spin_lock(&ring->lock); + if (ring->start_poll) + __ring_interrupt_mask(ring, false); + spin_unlock(&ring->lock); + spin_unlock_irqrestore(&ring->nhi->lock, flags); +} +EXPORT_SYMBOL_GPL(tb_ring_poll_complete); + static irqreturn_t ring_msix(int irq, void *data) { struct tb_ring *ring = data; - schedule_work(&ring->work); + spin_lock(&ring->nhi->lock); + spin_lock(&ring->lock); + __ring_interrupt(ring); + spin_unlock(&ring->lock); + spin_unlock(&ring->nhi->lock); + return IRQ_HANDLED; } @@ -317,7 +413,9 @@ static void ring_release_msix(struct tb_ring *ring) static struct tb_ring *tb_ring_alloc(struct tb_nhi *nhi, u32 hop, int size, bool transmit, unsigned int flags, - u16 sof_mask, u16 eof_mask) + u16 sof_mask, u16 eof_mask, + void (*start_poll)(void *), + void *poll_data) { struct tb_ring *ring = NULL; dev_info(&nhi->pdev->dev, "allocating %s ring %d of size %d\n", @@ -346,6 +444,8 @@ static struct tb_ring *tb_ring_alloc(struct tb_nhi *nhi, u32 hop, int size, ring->head = 0; ring->tail = 0; ring->running = false; + ring->start_poll = start_poll; + ring->poll_data = poll_data; ring->descriptors = dma_alloc_coherent(&ring->nhi->pdev->dev, size * sizeof(*ring->descriptors), @@ -399,7 +499,7 @@ err_free_ring: struct tb_ring *tb_ring_alloc_tx(struct tb_nhi *nhi, int hop, int size, unsigned int flags) { - return tb_ring_alloc(nhi, hop, size, true, flags, 0, 0); + return tb_ring_alloc(nhi, hop, size, true, flags, 0, 0, NULL, NULL); } EXPORT_SYMBOL_GPL(tb_ring_alloc_tx); @@ -411,11 +511,17 @@ EXPORT_SYMBOL_GPL(tb_ring_alloc_tx); * @flags: Flags for the ring * @sof_mask: Mask of PDF values that start a frame * @eof_mask: Mask of PDF values that end a frame + * @start_poll: If not %NULL the ring will call this function when an + * interrupt is triggered and masked, instead of callback + * in each Rx frame. + * @poll_data: Optional data passed to @start_poll */ struct tb_ring *tb_ring_alloc_rx(struct tb_nhi *nhi, int hop, int size, - unsigned int flags, u16 sof_mask, u16 eof_mask) + unsigned int flags, u16 sof_mask, u16 eof_mask, + void (*start_poll)(void *), void *poll_data) { - return tb_ring_alloc(nhi, hop, size, false, flags, sof_mask, eof_mask); + return tb_ring_alloc(nhi, hop, size, false, flags, sof_mask, eof_mask, + start_poll, poll_data); } EXPORT_SYMBOL_GPL(tb_ring_alloc_rx); @@ -556,6 +662,7 @@ void tb_ring_free(struct tb_ring *ring) dev_WARN(&ring->nhi->pdev->dev, "%s %d still running\n", RING_TYPE(ring), ring->hop); } + spin_unlock_irq(&ring->nhi->lock); ring_release_msix(ring); @@ -572,7 +679,6 @@ void tb_ring_free(struct tb_ring *ring) RING_TYPE(ring), ring->hop); - spin_unlock_irq(&ring->nhi->lock); /** * ring->work can no longer be scheduled (it is scheduled only * by nhi_interrupt_work, ring_stop and ring_msix). Wait for it @@ -682,8 +788,10 @@ static void nhi_interrupt_work(struct work_struct *work) hop); continue; } - /* we do not check ring->running, this is done in ring->work */ - schedule_work(&ring->work); + + spin_lock(&ring->lock); + __ring_interrupt(ring); + spin_unlock(&ring->lock); } spin_unlock_irq(&nhi->lock); } diff --git a/include/linux/thunderbolt.h b/include/linux/thunderbolt.h index d59e3f9a35c4..36925e3aec7c 100644 --- a/include/linux/thunderbolt.h +++ b/include/linux/thunderbolt.h @@ -446,6 +446,9 @@ struct tb_nhi { * @flags: Ring specific flags * @sof_mask: Bit mask used to detect start of frame PDF * @eof_mask: Bit mask used to detect end of frame PDF + * @start_poll: Called when ring interrupt is triggered to start + * polling. Passing %NULL keeps the ring in interrupt mode. + * @poll_data: Data passed to @start_poll */ struct tb_ring { spinlock_t lock; @@ -466,6 +469,8 @@ struct tb_ring { unsigned int flags; u16 sof_mask; u16 eof_mask; + void (*start_poll)(void *data); + void *poll_data; }; /* Leave ring interrupt enabled on suspend */ @@ -499,7 +504,7 @@ enum ring_desc_flags { /** * struct ring_frame - For use with ring_rx/ring_tx * @buffer_phy: DMA mapped address of the frame - * @callback: Callback called when the frame is finished + * @callback: Callback called when the frame is finished (optional) * @list: Frame is linked to a queue using this * @size: Size of the frame in bytes (%0 means %4096) * @flags: Flags for the frame (see &enum ring_desc_flags) @@ -522,8 +527,8 @@ struct ring_frame { struct tb_ring *tb_ring_alloc_tx(struct tb_nhi *nhi, int hop, int size, unsigned int flags); struct tb_ring *tb_ring_alloc_rx(struct tb_nhi *nhi, int hop, int size, - unsigned int flags, u16 sof_mask, - u16 eof_mask); + unsigned int flags, u16 sof_mask, u16 eof_mask, + void (*start_poll)(void *), void *poll_data); void tb_ring_start(struct tb_ring *ring); void tb_ring_stop(struct tb_ring *ring); void tb_ring_free(struct tb_ring *ring); @@ -535,8 +540,8 @@ int __tb_ring_enqueue(struct tb_ring *ring, struct ring_frame *frame); * @ring: Ring to enqueue the frame * @frame: Frame to enqueue * - * @frame->buffer, @frame->buffer_phy and @frame->callback have to be set. The - * buffer must contain at least %TB_FRAME_SIZE bytes. + * @frame->buffer, @frame->buffer_phy have to be set. The buffer must + * contain at least %TB_FRAME_SIZE bytes. * * @frame->callback will be invoked with @frame->size, @frame->flags, * @frame->eof, @frame->sof set once the frame has been received. @@ -557,8 +562,8 @@ static inline int tb_ring_rx(struct tb_ring *ring, struct ring_frame *frame) * @ring: Ring the enqueue the frame * @frame: Frame to enqueue * - * @frame->buffer, @frame->buffer_phy, @frame->callback, @frame->size, - * @frame->eof and @frame->sof have to be set. + * @frame->buffer, @frame->buffer_phy, @frame->size, @frame->eof and + * @frame->sof have to be set. * * @frame->callback will be invoked with once the frame has been transmitted. * @@ -573,4 +578,8 @@ static inline int tb_ring_tx(struct tb_ring *ring, struct ring_frame *frame) return __tb_ring_enqueue(ring, frame); } +/* Used only when the ring is in polling mode */ +struct ring_frame *tb_ring_poll(struct tb_ring *ring); +void tb_ring_poll_complete(struct tb_ring *ring); + #endif /* THUNDERBOLT_H_ */ -- cgit v1.2.3 From 3304559e353f098d7e0ed5ca981e26c406513e12 Mon Sep 17 00:00:00 2001 From: Mika Westerberg Date: Mon, 2 Oct 2017 13:38:42 +0300 Subject: thunderbolt: Add function to retrieve DMA device for the ring This is needed when Thunderbolt service drivers need to DMA map memory before it is passed down to the ring. Signed-off-by: Mika Westerberg Reviewed-by: Michael Jamet Reviewed-by: Yehezkel Bernat Reviewed-by: Andy Shevchenko Signed-off-by: David S. Miller --- include/linux/thunderbolt.h | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/include/linux/thunderbolt.h b/include/linux/thunderbolt.h index 36925e3aec7c..7b69853188b1 100644 --- a/include/linux/thunderbolt.h +++ b/include/linux/thunderbolt.h @@ -19,6 +19,7 @@ #include #include #include +#include #include #include @@ -582,4 +583,16 @@ static inline int tb_ring_tx(struct tb_ring *ring, struct ring_frame *frame) struct ring_frame *tb_ring_poll(struct tb_ring *ring); void tb_ring_poll_complete(struct tb_ring *ring); +/** + * tb_ring_dma_device() - Return device used for DMA mapping + * @ring: Ring whose DMA device is retrieved + * + * Use this function when you are mapping DMA for buffers that are + * passed to the ring for sending/receiving. + */ +static inline struct device *tb_ring_dma_device(struct tb_ring *ring) +{ + return &ring->nhi->pdev->dev; +} + #endif /* THUNDERBOLT_H_ */ -- cgit v1.2.3 From 9a01c7c26cf7cbd1f58d06319e798833e85ff550 Mon Sep 17 00:00:00 2001 From: Mika Westerberg Date: Mon, 2 Oct 2017 13:38:43 +0300 Subject: thunderbolt: Allocate ring HopID automatically if requested Thunderbolt services should not care which HopID (ring) they use for sending and receiving packets over the high-speed DMA path, so make tb_ring_alloc_rx() and tb_ring_alloc_tx() accept negative HopID. This means that the NHI will allocate next available HopID for the caller automatically. These HopIDs will be allocated from the range which is not reserved for the Thunderbolt protocol (8 .. hop_count - 1). The allocated HopID can be retrieved from ring->hop field after the ring has been allocated successfully if needed. Signed-off-by: Mika Westerberg Reviewed-by: Michael Jamet Reviewed-by: Yehezkel Bernat Reviewed-by: Andy Shevchenko Signed-off-by: David S. Miller --- drivers/thunderbolt/nhi.c | 78 ++++++++++++++++++++++++++++++++++++----------- 1 file changed, 60 insertions(+), 18 deletions(-) diff --git a/drivers/thunderbolt/nhi.c b/drivers/thunderbolt/nhi.c index af0a80ddf594..0e79eebfcbb7 100644 --- a/drivers/thunderbolt/nhi.c +++ b/drivers/thunderbolt/nhi.c @@ -26,6 +26,8 @@ * use this ring for anything else. */ #define RING_E2E_UNUSED_HOPID 2 +/* HopIDs 0-7 are reserved by the Thunderbolt protocol */ +#define RING_FIRST_USABLE_HOPID 8 /* * Minimal number of vectors when we use MSI-X. Two for control channel @@ -411,6 +413,62 @@ static void ring_release_msix(struct tb_ring *ring) ring->irq = 0; } +static int nhi_alloc_hop(struct tb_nhi *nhi, struct tb_ring *ring) +{ + int ret = 0; + + spin_lock_irq(&nhi->lock); + + if (ring->hop < 0) { + unsigned int i; + + /* + * Automatically allocate HopID from the non-reserved + * range 8 .. hop_count - 1. + */ + for (i = RING_FIRST_USABLE_HOPID; i < nhi->hop_count; i++) { + if (ring->is_tx) { + if (!nhi->tx_rings[i]) { + ring->hop = i; + break; + } + } else { + if (!nhi->rx_rings[i]) { + ring->hop = i; + break; + } + } + } + } + + if (ring->hop < 0 || ring->hop >= nhi->hop_count) { + dev_warn(&nhi->pdev->dev, "invalid hop: %d\n", ring->hop); + ret = -EINVAL; + goto err_unlock; + } + if (ring->is_tx && nhi->tx_rings[ring->hop]) { + dev_warn(&nhi->pdev->dev, "TX hop %d already allocated\n", + ring->hop); + ret = -EBUSY; + goto err_unlock; + } else if (!ring->is_tx && nhi->rx_rings[ring->hop]) { + dev_warn(&nhi->pdev->dev, "RX hop %d already allocated\n", + ring->hop); + ret = -EBUSY; + goto err_unlock; + } + + if (ring->is_tx) + nhi->tx_rings[ring->hop] = ring; + else + nhi->rx_rings[ring->hop] = ring; + +err_unlock: + spin_unlock_irq(&nhi->lock); + + return ret; +} + static struct tb_ring *tb_ring_alloc(struct tb_nhi *nhi, u32 hop, int size, bool transmit, unsigned int flags, u16 sof_mask, u16 eof_mask, @@ -456,28 +514,12 @@ static struct tb_ring *tb_ring_alloc(struct tb_nhi *nhi, u32 hop, int size, if (ring_request_msix(ring, flags & RING_FLAG_NO_SUSPEND)) goto err_free_descs; - spin_lock_irq(&nhi->lock); - if (hop >= nhi->hop_count) { - dev_WARN(&nhi->pdev->dev, "invalid hop: %d\n", hop); + if (nhi_alloc_hop(nhi, ring)) goto err_release_msix; - } - if (transmit && nhi->tx_rings[hop]) { - dev_WARN(&nhi->pdev->dev, "TX hop %d already allocated\n", hop); - goto err_release_msix; - } else if (!transmit && nhi->rx_rings[hop]) { - dev_WARN(&nhi->pdev->dev, "RX hop %d already allocated\n", hop); - goto err_release_msix; - } - if (transmit) - nhi->tx_rings[hop] = ring; - else - nhi->rx_rings[hop] = ring; - spin_unlock_irq(&nhi->lock); return ring; err_release_msix: - spin_unlock_irq(&nhi->lock); ring_release_msix(ring); err_free_descs: dma_free_coherent(&ring->nhi->pdev->dev, @@ -506,7 +548,7 @@ EXPORT_SYMBOL_GPL(tb_ring_alloc_tx); /** * tb_ring_alloc_rx() - Allocate DMA ring for receive * @nhi: Pointer to the NHI the ring is to be allocated - * @hop: HopID (ring) to allocate + * @hop: HopID (ring) to allocate. Pass %-1 for automatic allocation. * @size: Number of entries in the ring * @flags: Flags for the ring * @sof_mask: Mask of PDF values that start a frame -- cgit v1.2.3 From 467cd25bf2aec14c2e2990512248b72f46f94c53 Mon Sep 17 00:00:00 2001 From: Mika Westerberg Date: Mon, 2 Oct 2017 13:38:44 +0300 Subject: MAINTAINERS: Add thunderbolt.h to the Thunderbolt driver entry The new API header (include/linux/thunderbolt.h) is maintained by the Thunderbolt driver maintainers. Signed-off-by: Mika Westerberg Reviewed-by: Michael Jamet Reviewed-by: Yehezkel Bernat Signed-off-by: David S. Miller --- MAINTAINERS | 1 + 1 file changed, 1 insertion(+) diff --git a/MAINTAINERS b/MAINTAINERS index 6671f375f7fc..c1c90d962012 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -13278,6 +13278,7 @@ M: Mika Westerberg M: Yehezkel Bernat S: Maintained F: drivers/thunderbolt/ +F: include/linux/thunderbolt.h THUNDERX GPIO DRIVER M: David Daney -- cgit v1.2.3 From e69b6c02b4c3b8d03be7136f90dd9551ad5a5a5e Mon Sep 17 00:00:00 2001 From: Amir Levy Date: Mon, 2 Oct 2017 13:38:45 +0300 Subject: net: Add support for networking over Thunderbolt cable ThunderboltIP is a protocol created by Apple to tunnel IP/ethernet traffic over a Thunderbolt cable. The protocol consists of configuration phase where each side sends ThunderboltIP login packets (the protocol is determined by UUID in the XDomain packet header) over the configuration channel. Once both sides get positive acknowledgment to their login packet, they configure high-speed DMA path accordingly. This DMA path is then used to transmit and receive networking traffic. This patch creates a virtual ethernet interface the host software can use in the same way as any other networking interface. Once the interface is brought up successfully network packets get tunneled over the Thunderbolt cable to the remote host and back. The connection is terminated by sending a ThunderboltIP logout packet over the configuration channel. We do this when the network interface is brought down by user or the driver is unloaded. Signed-off-by: Amir Levy Signed-off-by: Michael Jamet Signed-off-by: Mika Westerberg Reviewed-by: Yehezkel Bernat Reviewed-by: Andy Shevchenko Signed-off-by: David S. Miller --- Documentation/admin-guide/thunderbolt.rst | 24 + drivers/net/Kconfig | 12 + drivers/net/Makefile | 3 + drivers/net/thunderbolt.c | 1362 +++++++++++++++++++++++++++++ 4 files changed, 1401 insertions(+) create mode 100644 drivers/net/thunderbolt.c diff --git a/Documentation/admin-guide/thunderbolt.rst b/Documentation/admin-guide/thunderbolt.rst index 6a4cd1f159ca..5c62d11d77e8 100644 --- a/Documentation/admin-guide/thunderbolt.rst +++ b/Documentation/admin-guide/thunderbolt.rst @@ -197,3 +197,27 @@ information is missing. To recover from this mode, one needs to flash a valid NVM image to the host host controller in the same way it is done in the previous chapter. + +Networking over Thunderbolt cable +--------------------------------- +Thunderbolt technology allows software communication across two hosts +connected by a Thunderbolt cable. + +It is possible to tunnel any kind of traffic over Thunderbolt link but +currently we only support Apple ThunderboltIP protocol. + +If the other host is running Windows or macOS only thing you need to +do is to connect Thunderbolt cable between the two hosts, the +``thunderbolt-net`` is loaded automatically. If the other host is also +Linux you should load ``thunderbolt-net`` manually on one host (it does +not matter which one):: + + # modprobe thunderbolt-net + +This triggers module load on the other host automatically. If the driver +is built-in to the kernel image, there is no need to do anything. + +The driver will create one virtual ethernet interface per Thunderbolt +port which are named like ``thunderbolt0`` and so on. From this point +you can either use standard userspace tools like ``ifconfig`` to +configure the interface or let your GUI to handle it automatically. diff --git a/drivers/net/Kconfig b/drivers/net/Kconfig index aba0d652095b..0936da592e12 100644 --- a/drivers/net/Kconfig +++ b/drivers/net/Kconfig @@ -483,6 +483,18 @@ config FUJITSU_ES This driver provides support for Extended Socket network device on Extended Partitioning of FUJITSU PRIMEQUEST 2000 E2 series. +config THUNDERBOLT_NET + tristate "Networking over Thunderbolt cable" + depends on THUNDERBOLT && INET + help + Select this if you want to create network between two + computers over a Thunderbolt cable. The driver supports Apple + ThunderboltIP protocol and allows communication with any host + supporting the same protocol including Windows and macOS. + + To compile this driver a module, choose M here. The module will be + called thunderbolt-net. + source "drivers/net/hyperv/Kconfig" endif # NETDEVICES diff --git a/drivers/net/Makefile b/drivers/net/Makefile index 8dff900085d6..7c8f4dd3a7c5 100644 --- a/drivers/net/Makefile +++ b/drivers/net/Makefile @@ -74,3 +74,6 @@ obj-$(CONFIG_HYPERV_NET) += hyperv/ obj-$(CONFIG_NTB_NETDEV) += ntb_netdev.o obj-$(CONFIG_FUJITSU_ES) += fjes/ + +thunderbolt-net-y += thunderbolt.o +obj-$(CONFIG_THUNDERBOLT_NET) += thunderbolt-net.o diff --git a/drivers/net/thunderbolt.c b/drivers/net/thunderbolt.c new file mode 100644 index 000000000000..1a7bc0bf4598 --- /dev/null +++ b/drivers/net/thunderbolt.c @@ -0,0 +1,1362 @@ +/* + * Networking over Thunderbolt cable using Apple ThunderboltIP protocol + * + * Copyright (C) 2017, Intel Corporation + * Authors: Amir Levy + * Michael Jamet + * Mika Westerberg + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +/* Protocol timeouts in ms */ +#define TBNET_LOGIN_DELAY 4500 +#define TBNET_LOGIN_TIMEOUT 500 +#define TBNET_LOGOUT_TIMEOUT 100 + +#define TBNET_RING_SIZE 256 +#define TBNET_LOCAL_PATH 0xf +#define TBNET_LOGIN_RETRIES 60 +#define TBNET_LOGOUT_RETRIES 5 +#define TBNET_MATCH_FRAGS_ID BIT(1) +#define TBNET_MAX_MTU SZ_64K +#define TBNET_FRAME_SIZE SZ_4K +#define TBNET_MAX_PAYLOAD_SIZE \ + (TBNET_FRAME_SIZE - sizeof(struct thunderbolt_ip_frame_header)) +/* Rx packets need to hold space for skb_shared_info */ +#define TBNET_RX_MAX_SIZE \ + (TBNET_FRAME_SIZE + SKB_DATA_ALIGN(sizeof(struct skb_shared_info))) +#define TBNET_RX_PAGE_ORDER get_order(TBNET_RX_MAX_SIZE) +#define TBNET_RX_PAGE_SIZE (PAGE_SIZE << TBNET_RX_PAGE_ORDER) + +#define TBNET_L0_PORT_NUM(route) ((route) & GENMASK(5, 0)) + +/** + * struct thunderbolt_ip_frame_header - Header for each Thunderbolt frame + * @frame_size: size of the data with the frame + * @frame_index: running index on the frames + * @frame_id: ID of the frame to match frames to specific packet + * @frame_count: how many frames assembles a full packet + * + * Each data frame passed to the high-speed DMA ring has this header. If + * the XDomain network directory announces that %TBNET_MATCH_FRAGS_ID is + * supported then @frame_id is filled, otherwise it stays %0. + */ +struct thunderbolt_ip_frame_header { + u32 frame_size; + u16 frame_index; + u16 frame_id; + u32 frame_count; +}; + +enum thunderbolt_ip_frame_pdf { + TBIP_PDF_FRAME_START = 1, + TBIP_PDF_FRAME_END, +}; + +enum thunderbolt_ip_type { + TBIP_LOGIN, + TBIP_LOGIN_RESPONSE, + TBIP_LOGOUT, + TBIP_STATUS, +}; + +struct thunderbolt_ip_header { + u32 route_hi; + u32 route_lo; + u32 length_sn; + uuid_t uuid; + uuid_t initiator_uuid; + uuid_t target_uuid; + u32 type; + u32 command_id; +}; + +#define TBIP_HDR_LENGTH_MASK GENMASK(5, 0) +#define TBIP_HDR_SN_MASK GENMASK(28, 27) +#define TBIP_HDR_SN_SHIFT 27 + +struct thunderbolt_ip_login { + struct thunderbolt_ip_header hdr; + u32 proto_version; + u32 transmit_path; + u32 reserved[4]; +}; + +#define TBIP_LOGIN_PROTO_VERSION 1 + +struct thunderbolt_ip_login_response { + struct thunderbolt_ip_header hdr; + u32 status; + u32 receiver_mac[2]; + u32 receiver_mac_len; + u32 reserved[4]; +}; + +struct thunderbolt_ip_logout { + struct thunderbolt_ip_header hdr; +}; + +struct thunderbolt_ip_status { + struct thunderbolt_ip_header hdr; + u32 status; +}; + +struct tbnet_stats { + u64 tx_packets; + u64 rx_packets; + u64 tx_bytes; + u64 rx_bytes; + u64 rx_errors; + u64 tx_errors; + u64 rx_length_errors; + u64 rx_over_errors; + u64 rx_crc_errors; + u64 rx_missed_errors; +}; + +struct tbnet_frame { + struct net_device *dev; + struct page *page; + struct ring_frame frame; +}; + +struct tbnet_ring { + struct tbnet_frame frames[TBNET_RING_SIZE]; + unsigned int cons; + unsigned int prod; + struct tb_ring *ring; +}; + +/** + * struct tbnet - ThunderboltIP network driver private data + * @svc: XDomain service the driver is bound to + * @xd: XDomain the service blongs to + * @handler: ThunderboltIP configuration protocol handler + * @dev: Networking device + * @napi: NAPI structure for Rx polling + * @stats: Network statistics + * @skb: Network packet that is currently processed on Rx path + * @command_id: ID used for next configuration protocol packet + * @login_sent: ThunderboltIP login message successfully sent + * @login_received: ThunderboltIP login message received from the remote + * host + * @transmit_path: HopID the other end needs to use building the + * opposite side path. + * @connection_lock: Lock serializing access to @login_sent, + * @login_received and @transmit_path. + * @login_retries: Number of login retries currently done + * @login_work: Worker to send ThunderboltIP login packets + * @connected_work: Worker that finalizes the ThunderboltIP connection + * setup and enables DMA paths for high speed data + * transfers + * @rx_hdr: Copy of the currently processed Rx frame. Used when a + * network packet consists of multiple Thunderbolt frames. + * In host byte order. + * @rx_ring: Software ring holding Rx frames + * @frame_id: Frame ID use for next Tx packet + * (if %TBNET_MATCH_FRAGS_ID is supported in both ends) + * @tx_ring: Software ring holding Tx frames + */ +struct tbnet { + const struct tb_service *svc; + struct tb_xdomain *xd; + struct tb_protocol_handler handler; + struct net_device *dev; + struct napi_struct napi; + struct tbnet_stats stats; + struct sk_buff *skb; + atomic_t command_id; + bool login_sent; + bool login_received; + u32 transmit_path; + struct mutex connection_lock; + int login_retries; + struct delayed_work login_work; + struct work_struct connected_work; + struct thunderbolt_ip_frame_header rx_hdr; + struct tbnet_ring rx_ring; + atomic_t frame_id; + struct tbnet_ring tx_ring; +}; + +/* Network property directory UUID: c66189ca-1cce-4195-bdb8-49592e5f5a4f */ +static const uuid_t tbnet_dir_uuid = + UUID_INIT(0xc66189ca, 0x1cce, 0x4195, + 0xbd, 0xb8, 0x49, 0x59, 0x2e, 0x5f, 0x5a, 0x4f); + +/* ThunderboltIP protocol UUID: 798f589e-3616-8a47-97c6-5664a920c8dd */ +static const uuid_t tbnet_svc_uuid = + UUID_INIT(0x798f589e, 0x3616, 0x8a47, + 0x97, 0xc6, 0x56, 0x64, 0xa9, 0x20, 0xc8, 0xdd); + +static struct tb_property_dir *tbnet_dir; + +static void tbnet_fill_header(struct thunderbolt_ip_header *hdr, u64 route, + u8 sequence, const uuid_t *initiator_uuid, const uuid_t *target_uuid, + enum thunderbolt_ip_type type, size_t size, u32 command_id) +{ + u32 length_sn; + + /* Length does not include route_hi/lo and length_sn fields */ + length_sn = (size - 3 * 4) / 4; + length_sn |= (sequence << TBIP_HDR_SN_SHIFT) & TBIP_HDR_SN_MASK; + + hdr->route_hi = upper_32_bits(route); + hdr->route_lo = lower_32_bits(route); + hdr->length_sn = length_sn; + uuid_copy(&hdr->uuid, &tbnet_svc_uuid); + uuid_copy(&hdr->initiator_uuid, initiator_uuid); + uuid_copy(&hdr->target_uuid, target_uuid); + hdr->type = type; + hdr->command_id = command_id; +} + +static int tbnet_login_response(struct tbnet *net, u64 route, u8 sequence, + u32 command_id) +{ + struct thunderbolt_ip_login_response reply; + struct tb_xdomain *xd = net->xd; + + memset(&reply, 0, sizeof(reply)); + tbnet_fill_header(&reply.hdr, route, sequence, xd->local_uuid, + xd->remote_uuid, TBIP_LOGIN_RESPONSE, sizeof(reply), + command_id); + memcpy(reply.receiver_mac, net->dev->dev_addr, ETH_ALEN); + reply.receiver_mac_len = ETH_ALEN; + + return tb_xdomain_response(xd, &reply, sizeof(reply), + TB_CFG_PKG_XDOMAIN_RESP); +} + +static int tbnet_login_request(struct tbnet *net, u8 sequence) +{ + struct thunderbolt_ip_login_response reply; + struct thunderbolt_ip_login request; + struct tb_xdomain *xd = net->xd; + + memset(&request, 0, sizeof(request)); + tbnet_fill_header(&request.hdr, xd->route, sequence, xd->local_uuid, + xd->remote_uuid, TBIP_LOGIN, sizeof(request), + atomic_inc_return(&net->command_id)); + + request.proto_version = TBIP_LOGIN_PROTO_VERSION; + request.transmit_path = TBNET_LOCAL_PATH; + + return tb_xdomain_request(xd, &request, sizeof(request), + TB_CFG_PKG_XDOMAIN_RESP, &reply, + sizeof(reply), TB_CFG_PKG_XDOMAIN_RESP, + TBNET_LOGIN_TIMEOUT); +} + +static int tbnet_logout_response(struct tbnet *net, u64 route, u8 sequence, + u32 command_id) +{ + struct thunderbolt_ip_status reply; + struct tb_xdomain *xd = net->xd; + + memset(&reply, 0, sizeof(reply)); + tbnet_fill_header(&reply.hdr, route, sequence, xd->local_uuid, + xd->remote_uuid, TBIP_STATUS, sizeof(reply), + atomic_inc_return(&net->command_id)); + return tb_xdomain_response(xd, &reply, sizeof(reply), + TB_CFG_PKG_XDOMAIN_RESP); +} + +static int tbnet_logout_request(struct tbnet *net) +{ + struct thunderbolt_ip_logout request; + struct thunderbolt_ip_status reply; + struct tb_xdomain *xd = net->xd; + + memset(&request, 0, sizeof(request)); + tbnet_fill_header(&request.hdr, xd->route, 0, xd->local_uuid, + xd->remote_uuid, TBIP_LOGOUT, sizeof(request), + atomic_inc_return(&net->command_id)); + + return tb_xdomain_request(xd, &request, sizeof(request), + TB_CFG_PKG_XDOMAIN_RESP, &reply, + sizeof(reply), TB_CFG_PKG_XDOMAIN_RESP, + TBNET_LOGOUT_TIMEOUT); +} + +static void start_login(struct tbnet *net) +{ + mutex_lock(&net->connection_lock); + net->login_sent = false; + net->login_received = false; + mutex_unlock(&net->connection_lock); + + queue_delayed_work(system_long_wq, &net->login_work, + msecs_to_jiffies(1000)); +} + +static void stop_login(struct tbnet *net) +{ + cancel_delayed_work_sync(&net->login_work); + cancel_work_sync(&net->connected_work); +} + +static inline unsigned int tbnet_frame_size(const struct tbnet_frame *tf) +{ + return tf->frame.size ? : TBNET_FRAME_SIZE; +} + +static void tbnet_free_buffers(struct tbnet_ring *ring) +{ + unsigned int i; + + for (i = 0; i < TBNET_RING_SIZE; i++) { + struct device *dma_dev = tb_ring_dma_device(ring->ring); + struct tbnet_frame *tf = &ring->frames[i]; + enum dma_data_direction dir; + unsigned int order; + size_t size; + + if (!tf->page) + continue; + + if (ring->ring->is_tx) { + dir = DMA_TO_DEVICE; + order = 0; + size = tbnet_frame_size(tf); + } else { + dir = DMA_FROM_DEVICE; + order = TBNET_RX_PAGE_ORDER; + size = TBNET_RX_PAGE_SIZE; + } + + if (tf->frame.buffer_phy) + dma_unmap_page(dma_dev, tf->frame.buffer_phy, size, + dir); + + __free_pages(tf->page, order); + tf->page = NULL; + } + + ring->cons = 0; + ring->prod = 0; +} + +static void tbnet_tear_down(struct tbnet *net, bool send_logout) +{ + netif_carrier_off(net->dev); + netif_stop_queue(net->dev); + + stop_login(net); + + mutex_lock(&net->connection_lock); + + if (net->login_sent && net->login_received) { + int retries = TBNET_LOGOUT_RETRIES; + + while (send_logout && retries-- > 0) { + int ret = tbnet_logout_request(net); + if (ret != -ETIMEDOUT) + break; + } + + tb_ring_stop(net->rx_ring.ring); + tb_ring_stop(net->tx_ring.ring); + tbnet_free_buffers(&net->rx_ring); + tbnet_free_buffers(&net->tx_ring); + + if (tb_xdomain_disable_paths(net->xd)) + netdev_warn(net->dev, "failed to disable DMA paths\n"); + } + + net->login_retries = 0; + net->login_sent = false; + net->login_received = false; + + mutex_unlock(&net->connection_lock); +} + +static int tbnet_handle_packet(const void *buf, size_t size, void *data) +{ + const struct thunderbolt_ip_login *pkg = buf; + struct tbnet *net = data; + u32 command_id; + int ret = 0; + u8 sequence; + u64 route; + + /* Make sure the packet is for us */ + if (size < sizeof(struct thunderbolt_ip_header)) + return 0; + if (!uuid_equal(&pkg->hdr.initiator_uuid, net->xd->remote_uuid)) + return 0; + if (!uuid_equal(&pkg->hdr.target_uuid, net->xd->local_uuid)) + return 0; + + route = ((u64)pkg->hdr.route_hi << 32) | pkg->hdr.route_lo; + route &= ~BIT_ULL(63); + if (route != net->xd->route) + return 0; + + sequence = pkg->hdr.length_sn & TBIP_HDR_SN_MASK; + sequence >>= TBIP_HDR_SN_SHIFT; + command_id = pkg->hdr.command_id; + + switch (pkg->hdr.type) { + case TBIP_LOGIN: + if (!netif_running(net->dev)) + break; + + ret = tbnet_login_response(net, route, sequence, + pkg->hdr.command_id); + if (!ret) { + mutex_lock(&net->connection_lock); + net->login_received = true; + net->transmit_path = pkg->transmit_path; + + /* If we reached the number of max retries or + * previous logout, schedule another round of + * login retries + */ + if (net->login_retries >= TBNET_LOGIN_RETRIES || + !net->login_sent) { + net->login_retries = 0; + queue_delayed_work(system_long_wq, + &net->login_work, 0); + } + mutex_unlock(&net->connection_lock); + + queue_work(system_long_wq, &net->connected_work); + } + break; + + case TBIP_LOGOUT: + ret = tbnet_logout_response(net, route, sequence, command_id); + if (!ret) + tbnet_tear_down(net, false); + break; + + default: + return 0; + } + + if (ret) + netdev_warn(net->dev, "failed to send ThunderboltIP response\n"); + + return 1; +} + +static unsigned int tbnet_available_buffers(const struct tbnet_ring *ring) +{ + return ring->prod - ring->cons; +} + +static int tbnet_alloc_rx_buffers(struct tbnet *net, unsigned int nbuffers) +{ + struct tbnet_ring *ring = &net->rx_ring; + int ret; + + while (nbuffers--) { + struct device *dma_dev = tb_ring_dma_device(ring->ring); + unsigned int index = ring->prod & (TBNET_RING_SIZE - 1); + struct tbnet_frame *tf = &ring->frames[index]; + dma_addr_t dma_addr; + + if (tf->page) + break; + + /* Allocate page (order > 0) so that it can hold maximum + * ThunderboltIP frame (4kB) and the additional room for + * SKB shared info required by build_skb(). + */ + tf->page = dev_alloc_pages(TBNET_RX_PAGE_ORDER); + if (!tf->page) { + ret = -ENOMEM; + goto err_free; + } + + dma_addr = dma_map_page(dma_dev, tf->page, 0, + TBNET_RX_PAGE_SIZE, DMA_FROM_DEVICE); + if (dma_mapping_error(dma_dev, dma_addr)) { + ret = -ENOMEM; + goto err_free; + } + + tf->frame.buffer_phy = dma_addr; + tf->dev = net->dev; + + tb_ring_rx(ring->ring, &tf->frame); + + ring->prod++; + } + + return 0; + +err_free: + tbnet_free_buffers(ring); + return ret; +} + +static struct tbnet_frame *tbnet_get_tx_buffer(struct tbnet *net) +{ + struct tbnet_ring *ring = &net->tx_ring; + struct tbnet_frame *tf; + unsigned int index; + + if (!tbnet_available_buffers(ring)) + return NULL; + + index = ring->cons++ & (TBNET_RING_SIZE - 1); + + tf = &ring->frames[index]; + tf->frame.size = 0; + tf->frame.buffer_phy = 0; + + return tf; +} + +static void tbnet_tx_callback(struct tb_ring *ring, struct ring_frame *frame, + bool canceled) +{ + struct tbnet_frame *tf = container_of(frame, typeof(*tf), frame); + struct device *dma_dev = tb_ring_dma_device(ring); + struct tbnet *net = netdev_priv(tf->dev); + + dma_unmap_page(dma_dev, tf->frame.buffer_phy, tbnet_frame_size(tf), + DMA_TO_DEVICE); + + /* Return buffer to the ring */ + net->tx_ring.prod++; + + if (tbnet_available_buffers(&net->tx_ring) >= TBNET_RING_SIZE / 2) + netif_wake_queue(net->dev); +} + +static int tbnet_alloc_tx_buffers(struct tbnet *net) +{ + struct tbnet_ring *ring = &net->tx_ring; + unsigned int i; + + for (i = 0; i < TBNET_RING_SIZE; i++) { + struct tbnet_frame *tf = &ring->frames[i]; + + tf->page = alloc_page(GFP_KERNEL); + if (!tf->page) { + tbnet_free_buffers(ring); + return -ENOMEM; + } + + tf->dev = net->dev; + tf->frame.callback = tbnet_tx_callback; + tf->frame.sof = TBIP_PDF_FRAME_START; + tf->frame.eof = TBIP_PDF_FRAME_END; + } + + ring->cons = 0; + ring->prod = TBNET_RING_SIZE - 1; + + return 0; +} + +static void tbnet_connected_work(struct work_struct *work) +{ + struct tbnet *net = container_of(work, typeof(*net), connected_work); + bool connected; + int ret; + + if (netif_carrier_ok(net->dev)) + return; + + mutex_lock(&net->connection_lock); + connected = net->login_sent && net->login_received; + mutex_unlock(&net->connection_lock); + + if (!connected) + return; + + /* Both logins successful so enable the high-speed DMA paths and + * start the network device queue. + */ + ret = tb_xdomain_enable_paths(net->xd, TBNET_LOCAL_PATH, + net->rx_ring.ring->hop, + net->transmit_path, + net->tx_ring.ring->hop); + if (ret) { + netdev_err(net->dev, "failed to enable DMA paths\n"); + return; + } + + tb_ring_start(net->tx_ring.ring); + tb_ring_start(net->rx_ring.ring); + + ret = tbnet_alloc_rx_buffers(net, TBNET_RING_SIZE); + if (ret) + goto err_stop_rings; + + ret = tbnet_alloc_tx_buffers(net); + if (ret) + goto err_free_rx_buffers; + + netif_carrier_on(net->dev); + netif_start_queue(net->dev); + return; + +err_free_rx_buffers: + tbnet_free_buffers(&net->rx_ring); +err_stop_rings: + tb_ring_stop(net->rx_ring.ring); + tb_ring_stop(net->tx_ring.ring); +} + +static void tbnet_login_work(struct work_struct *work) +{ + struct tbnet *net = container_of(work, typeof(*net), login_work.work); + unsigned long delay = msecs_to_jiffies(TBNET_LOGIN_DELAY); + int ret; + + if (netif_carrier_ok(net->dev)) + return; + + ret = tbnet_login_request(net, net->login_retries % 4); + if (ret) { + if (net->login_retries++ < TBNET_LOGIN_RETRIES) { + queue_delayed_work(system_long_wq, &net->login_work, + delay); + } else { + netdev_info(net->dev, "ThunderboltIP login timed out\n"); + } + } else { + net->login_retries = 0; + + mutex_lock(&net->connection_lock); + net->login_sent = true; + mutex_unlock(&net->connection_lock); + + queue_work(system_long_wq, &net->connected_work); + } +} + +static bool tbnet_check_frame(struct tbnet *net, const struct tbnet_frame *tf, + const struct thunderbolt_ip_frame_header *hdr) +{ + u32 frame_id, frame_count, frame_size, frame_index; + unsigned int size; + + if (tf->frame.flags & RING_DESC_CRC_ERROR) { + net->stats.rx_crc_errors++; + return false; + } else if (tf->frame.flags & RING_DESC_BUFFER_OVERRUN) { + net->stats.rx_over_errors++; + return false; + } + + /* Should be greater than just header i.e. contains data */ + size = tbnet_frame_size(tf); + if (size <= sizeof(*hdr)) { + net->stats.rx_length_errors++; + return false; + } + + frame_count = le32_to_cpu(hdr->frame_count); + frame_size = le32_to_cpu(hdr->frame_size); + frame_index = le16_to_cpu(hdr->frame_index); + frame_id = le16_to_cpu(hdr->frame_id); + + if ((frame_size > size - sizeof(*hdr)) || !frame_size) { + net->stats.rx_length_errors++; + return false; + } + + /* In case we're in the middle of packet, validate the frame + * header based on first fragment of the packet. + */ + if (net->skb && net->rx_hdr.frame_count) { + /* Check the frame count fits the count field */ + if (frame_count != net->rx_hdr.frame_count) { + net->stats.rx_length_errors++; + return false; + } + + /* Check the frame identifiers are incremented correctly, + * and id is matching. + */ + if (frame_index != net->rx_hdr.frame_index + 1 || + frame_id != net->rx_hdr.frame_id) { + net->stats.rx_missed_errors++; + return false; + } + + if (net->skb->len + frame_size > TBNET_MAX_MTU) { + net->stats.rx_length_errors++; + return false; + } + + return true; + } + + /* Start of packet, validate the frame header */ + if (frame_count == 0 || frame_count > TBNET_RING_SIZE / 4) { + net->stats.rx_length_errors++; + return false; + } + if (frame_index != 0) { + net->stats.rx_missed_errors++; + return false; + } + + return true; +} + +static int tbnet_poll(struct napi_struct *napi, int budget) +{ + struct tbnet *net = container_of(napi, struct tbnet, napi); + unsigned int cleaned_count = tbnet_available_buffers(&net->rx_ring); + struct device *dma_dev = tb_ring_dma_device(net->rx_ring.ring); + unsigned int rx_packets = 0; + + while (rx_packets < budget) { + const struct thunderbolt_ip_frame_header *hdr; + unsigned int hdr_size = sizeof(*hdr); + struct sk_buff *skb = NULL; + struct ring_frame *frame; + struct tbnet_frame *tf; + struct page *page; + bool last = true; + u32 frame_size; + + /* Return some buffers to hardware, one at a time is too + * slow so allocate MAX_SKB_FRAGS buffers at the same + * time. + */ + if (cleaned_count >= MAX_SKB_FRAGS) { + tbnet_alloc_rx_buffers(net, cleaned_count); + cleaned_count = 0; + } + + frame = tb_ring_poll(net->rx_ring.ring); + if (!frame) + break; + + dma_unmap_page(dma_dev, frame->buffer_phy, + TBNET_RX_PAGE_SIZE, DMA_FROM_DEVICE); + + tf = container_of(frame, typeof(*tf), frame); + + page = tf->page; + tf->page = NULL; + net->rx_ring.cons++; + cleaned_count++; + + hdr = page_address(page); + if (!tbnet_check_frame(net, tf, hdr)) { + __free_pages(page, TBNET_RX_PAGE_ORDER); + dev_kfree_skb_any(net->skb); + net->skb = NULL; + continue; + } + + frame_size = le32_to_cpu(hdr->frame_size); + + skb = net->skb; + if (!skb) { + skb = build_skb(page_address(page), + TBNET_RX_PAGE_SIZE); + if (!skb) { + __free_pages(page, TBNET_RX_PAGE_ORDER); + net->stats.rx_errors++; + break; + } + + skb_reserve(skb, hdr_size); + skb_put(skb, frame_size); + + net->skb = skb; + } else { + skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, + page, hdr_size, frame_size, + TBNET_RX_PAGE_SIZE - hdr_size); + } + + net->rx_hdr.frame_size = frame_size; + net->rx_hdr.frame_count = le32_to_cpu(hdr->frame_count); + net->rx_hdr.frame_index = le16_to_cpu(hdr->frame_index); + net->rx_hdr.frame_id = le16_to_cpu(hdr->frame_id); + last = net->rx_hdr.frame_index == net->rx_hdr.frame_count - 1; + + rx_packets++; + net->stats.rx_bytes += frame_size; + + if (last) { + skb->protocol = eth_type_trans(skb, net->dev); + napi_gro_receive(&net->napi, skb); + net->skb = NULL; + } + } + + net->stats.rx_packets += rx_packets; + + if (cleaned_count) + tbnet_alloc_rx_buffers(net, cleaned_count); + + if (rx_packets >= budget) + return budget; + + napi_complete_done(napi, rx_packets); + /* Re-enable the ring interrupt */ + tb_ring_poll_complete(net->rx_ring.ring); + + return rx_packets; +} + +static void tbnet_start_poll(void *data) +{ + struct tbnet *net = data; + + napi_schedule(&net->napi); +} + +static int tbnet_open(struct net_device *dev) +{ + struct tbnet *net = netdev_priv(dev); + struct tb_xdomain *xd = net->xd; + u16 sof_mask, eof_mask; + struct tb_ring *ring; + + netif_carrier_off(dev); + + ring = tb_ring_alloc_tx(xd->tb->nhi, -1, TBNET_RING_SIZE, + RING_FLAG_FRAME); + if (!ring) { + netdev_err(dev, "failed to allocate Tx ring\n"); + return -ENOMEM; + } + net->tx_ring.ring = ring; + + sof_mask = BIT(TBIP_PDF_FRAME_START); + eof_mask = BIT(TBIP_PDF_FRAME_END); + + ring = tb_ring_alloc_rx(xd->tb->nhi, -1, TBNET_RING_SIZE, + RING_FLAG_FRAME | RING_FLAG_E2E, sof_mask, + eof_mask, tbnet_start_poll, net); + if (!ring) { + netdev_err(dev, "failed to allocate Rx ring\n"); + tb_ring_free(net->tx_ring.ring); + net->tx_ring.ring = NULL; + return -ENOMEM; + } + net->rx_ring.ring = ring; + + napi_enable(&net->napi); + start_login(net); + + return 0; +} + +static int tbnet_stop(struct net_device *dev) +{ + struct tbnet *net = netdev_priv(dev); + + napi_disable(&net->napi); + + tbnet_tear_down(net, true); + + tb_ring_free(net->rx_ring.ring); + net->rx_ring.ring = NULL; + tb_ring_free(net->tx_ring.ring); + net->tx_ring.ring = NULL; + + return 0; +} + +static bool tbnet_xmit_map(struct device *dma_dev, struct tbnet_frame *tf) +{ + dma_addr_t dma_addr; + + dma_addr = dma_map_page(dma_dev, tf->page, 0, tbnet_frame_size(tf), + DMA_TO_DEVICE); + if (dma_mapping_error(dma_dev, dma_addr)) + return false; + + tf->frame.buffer_phy = dma_addr; + return true; +} + +static bool tbnet_xmit_csum_and_map(struct tbnet *net, struct sk_buff *skb, + struct tbnet_frame **frames, u32 frame_count) +{ + struct thunderbolt_ip_frame_header *hdr = page_address(frames[0]->page); + struct device *dma_dev = tb_ring_dma_device(net->tx_ring.ring); + __wsum wsum = htonl(skb->len - skb_transport_offset(skb)); + unsigned int i, len, offset = skb_transport_offset(skb); + __be16 protocol = skb->protocol; + void *data = skb->data; + void *dest = hdr + 1; + __sum16 *tucso; + + if (skb->ip_summed != CHECKSUM_PARTIAL) { + /* No need to calculate checksum so we just update the + * total frame count and map the frames for DMA. + */ + for (i = 0; i < frame_count; i++) { + hdr = page_address(frames[i]->page); + hdr->frame_count = cpu_to_le32(frame_count); + if (!tbnet_xmit_map(dma_dev, frames[i])) + goto err_unmap; + } + + return true; + } + + if (protocol == htons(ETH_P_8021Q)) { + struct vlan_hdr *vhdr, vh; + + vhdr = skb_header_pointer(skb, ETH_HLEN, sizeof(vh), &vh); + if (!vhdr) + return false; + + protocol = vhdr->h_vlan_encapsulated_proto; + } + + /* Data points on the beginning of packet. + * Check is the checksum absolute place in the packet. + * ipcso will update IP checksum. + * tucso will update TCP/UPD checksum. + */ + if (protocol == htons(ETH_P_IP)) { + __sum16 *ipcso = dest + ((void *)&(ip_hdr(skb)->check) - data); + + *ipcso = 0; + *ipcso = ip_fast_csum(dest + skb_network_offset(skb), + ip_hdr(skb)->ihl); + + if (ip_hdr(skb)->protocol == IPPROTO_TCP) + tucso = dest + ((void *)&(tcp_hdr(skb)->check) - data); + else if (ip_hdr(skb)->protocol == IPPROTO_UDP) + tucso = dest + ((void *)&(udp_hdr(skb)->check) - data); + else + return false; + + *tucso = ~csum_tcpudp_magic(ip_hdr(skb)->saddr, + ip_hdr(skb)->daddr, 0, + ip_hdr(skb)->protocol, 0); + } else if (skb_is_gso_v6(skb)) { + tucso = dest + ((void *)&(tcp_hdr(skb)->check) - data); + *tucso = ~csum_ipv6_magic(&ipv6_hdr(skb)->saddr, + &ipv6_hdr(skb)->daddr, 0, + IPPROTO_TCP, 0); + return false; + } else if (protocol == htons(ETH_P_IPV6)) { + tucso = dest + skb_checksum_start_offset(skb) + skb->csum_offset; + *tucso = ~csum_ipv6_magic(&ipv6_hdr(skb)->saddr, + &ipv6_hdr(skb)->daddr, 0, + ipv6_hdr(skb)->nexthdr, 0); + } else { + return false; + } + + /* First frame was headers, rest of the frames contain data. + * Calculate checksum over each frame. + */ + for (i = 0; i < frame_count; i++) { + hdr = page_address(frames[i]->page); + dest = (void *)(hdr + 1) + offset; + len = le32_to_cpu(hdr->frame_size) - offset; + wsum = csum_partial(dest, len, wsum); + hdr->frame_count = cpu_to_le32(frame_count); + + offset = 0; + } + + *tucso = csum_fold(wsum); + + /* Checksum is finally calculated and we don't touch the memory + * anymore, so DMA map the frames now. + */ + for (i = 0; i < frame_count; i++) { + if (!tbnet_xmit_map(dma_dev, frames[i])) + goto err_unmap; + } + + return true; + +err_unmap: + while (i--) + dma_unmap_page(dma_dev, frames[i]->frame.buffer_phy, + tbnet_frame_size(frames[i]), DMA_TO_DEVICE); + + return false; +} + +static void *tbnet_kmap_frag(struct sk_buff *skb, unsigned int frag_num, + unsigned int *len) +{ + const skb_frag_t *frag = &skb_shinfo(skb)->frags[frag_num]; + + *len = skb_frag_size(frag); + return kmap_atomic(skb_frag_page(frag)) + frag->page_offset; +} + +static netdev_tx_t tbnet_start_xmit(struct sk_buff *skb, + struct net_device *dev) +{ + struct tbnet *net = netdev_priv(dev); + struct tbnet_frame *frames[MAX_SKB_FRAGS]; + u16 frame_id = atomic_read(&net->frame_id); + struct thunderbolt_ip_frame_header *hdr; + unsigned int len = skb_headlen(skb); + unsigned int data_len = skb->len; + unsigned int nframes, i; + unsigned int frag = 0; + void *src = skb->data; + u32 frame_index = 0; + bool unmap = false; + void *dest; + + nframes = DIV_ROUND_UP(data_len, TBNET_MAX_PAYLOAD_SIZE); + if (tbnet_available_buffers(&net->tx_ring) < nframes) { + netif_stop_queue(net->dev); + return NETDEV_TX_BUSY; + } + + frames[frame_index] = tbnet_get_tx_buffer(net); + if (!frames[frame_index]) + goto err_drop; + + hdr = page_address(frames[frame_index]->page); + dest = hdr + 1; + + /* If overall packet is bigger than the frame data size */ + while (data_len > TBNET_MAX_PAYLOAD_SIZE) { + unsigned int size_left = TBNET_MAX_PAYLOAD_SIZE; + + hdr->frame_size = cpu_to_le32(TBNET_MAX_PAYLOAD_SIZE); + hdr->frame_index = cpu_to_le16(frame_index); + hdr->frame_id = cpu_to_le16(frame_id); + + do { + if (len > size_left) { + /* Copy data onto Tx buffer data with + * full frame size then break and go to + * next frame + */ + memcpy(dest, src, size_left); + len -= size_left; + dest += size_left; + src += size_left; + break; + } + + memcpy(dest, src, len); + size_left -= len; + dest += len; + + if (unmap) { + kunmap_atomic(src); + unmap = false; + } + + /* Ensure all fragments have been processed */ + if (frag < skb_shinfo(skb)->nr_frags) { + /* Map and then unmap quickly */ + src = tbnet_kmap_frag(skb, frag++, &len); + unmap = true; + } else if (unlikely(size_left > 0)) { + goto err_drop; + } + } while (size_left > 0); + + data_len -= TBNET_MAX_PAYLOAD_SIZE; + frame_index++; + + frames[frame_index] = tbnet_get_tx_buffer(net); + if (!frames[frame_index]) + goto err_drop; + + hdr = page_address(frames[frame_index]->page); + dest = hdr + 1; + } + + hdr->frame_size = cpu_to_le32(data_len); + hdr->frame_index = cpu_to_le16(frame_index); + hdr->frame_id = cpu_to_le16(frame_id); + + frames[frame_index]->frame.size = data_len + sizeof(*hdr); + + /* In case the remaining data_len is smaller than a frame */ + while (len < data_len) { + memcpy(dest, src, len); + data_len -= len; + dest += len; + + if (unmap) { + kunmap_atomic(src); + unmap = false; + } + + if (frag < skb_shinfo(skb)->nr_frags) { + src = tbnet_kmap_frag(skb, frag++, &len); + unmap = true; + } else if (unlikely(data_len > 0)) { + goto err_drop; + } + } + + memcpy(dest, src, data_len); + + if (unmap) + kunmap_atomic(src); + + if (!tbnet_xmit_csum_and_map(net, skb, frames, frame_index + 1)) + goto err_drop; + + for (i = 0; i < frame_index + 1; i++) + tb_ring_tx(net->tx_ring.ring, &frames[i]->frame); + + if (net->svc->prtcstns & TBNET_MATCH_FRAGS_ID) + atomic_inc(&net->frame_id); + + net->stats.tx_packets++; + net->stats.tx_bytes += skb->len; + + dev_consume_skb_any(skb); + + return NETDEV_TX_OK; + +err_drop: + /* We can re-use the buffers */ + net->tx_ring.cons -= frame_index; + + dev_kfree_skb_any(skb); + net->stats.tx_errors++; + + return NETDEV_TX_OK; +} + +static void tbnet_get_stats64(struct net_device *dev, + struct rtnl_link_stats64 *stats) +{ + struct tbnet *net = netdev_priv(dev); + + stats->tx_packets = net->stats.tx_packets; + stats->rx_packets = net->stats.rx_packets; + stats->tx_bytes = net->stats.tx_bytes; + stats->rx_bytes = net->stats.rx_bytes; + stats->rx_errors = net->stats.rx_errors + net->stats.rx_length_errors + + net->stats.rx_over_errors + net->stats.rx_crc_errors + + net->stats.rx_missed_errors; + stats->tx_errors = net->stats.tx_errors; + stats->rx_length_errors = net->stats.rx_length_errors; + stats->rx_over_errors = net->stats.rx_over_errors; + stats->rx_crc_errors = net->stats.rx_crc_errors; + stats->rx_missed_errors = net->stats.rx_missed_errors; +} + +static const struct net_device_ops tbnet_netdev_ops = { + .ndo_open = tbnet_open, + .ndo_stop = tbnet_stop, + .ndo_start_xmit = tbnet_start_xmit, + .ndo_get_stats64 = tbnet_get_stats64, +}; + +static void tbnet_generate_mac(struct net_device *dev) +{ + const struct tbnet *net = netdev_priv(dev); + const struct tb_xdomain *xd = net->xd; + u8 phy_port; + u32 hash; + + phy_port = tb_phy_port_from_link(TBNET_L0_PORT_NUM(xd->route)); + + /* Unicast and locally administered MAC */ + dev->dev_addr[0] = phy_port << 4 | 0x02; + hash = jhash2((u32 *)xd->local_uuid, 4, 0); + memcpy(dev->dev_addr + 1, &hash, sizeof(hash)); + hash = jhash2((u32 *)xd->local_uuid, 4, hash); + dev->dev_addr[5] = hash & 0xff; +} + +static int tbnet_probe(struct tb_service *svc, const struct tb_service_id *id) +{ + struct tb_xdomain *xd = tb_service_parent(svc); + struct net_device *dev; + struct tbnet *net; + int ret; + + dev = alloc_etherdev(sizeof(*net)); + if (!dev) + return -ENOMEM; + + SET_NETDEV_DEV(dev, &svc->dev); + + net = netdev_priv(dev); + INIT_DELAYED_WORK(&net->login_work, tbnet_login_work); + INIT_WORK(&net->connected_work, tbnet_connected_work); + mutex_init(&net->connection_lock); + atomic_set(&net->command_id, 0); + atomic_set(&net->frame_id, 0); + net->svc = svc; + net->dev = dev; + net->xd = xd; + + tbnet_generate_mac(dev); + + strcpy(dev->name, "thunderbolt%d"); + dev->netdev_ops = &tbnet_netdev_ops; + + /* ThunderboltIP takes advantage of TSO packets but instead of + * segmenting them we just split the packet into Thunderbolt + * frames (maximum payload size of each frame is 4084 bytes) and + * calculate checksum over the whole packet here. + * + * The receiving side does the opposite if the host OS supports + * LRO, otherwise it needs to split the large packet into MTU + * sized smaller packets. + * + * In order to receive large packets from the networking stack, + * we need to announce support for most of the offloading + * features here. + */ + dev->hw_features = NETIF_F_SG | NETIF_F_ALL_TSO | NETIF_F_GRO | + NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM; + dev->features = dev->hw_features | NETIF_F_HIGHDMA; + dev->hard_header_len += sizeof(struct thunderbolt_ip_frame_header); + + netif_napi_add(dev, &net->napi, tbnet_poll, NAPI_POLL_WEIGHT); + + /* MTU range: 68 - 65522 */ + dev->min_mtu = ETH_MIN_MTU; + dev->max_mtu = TBNET_MAX_MTU - ETH_HLEN; + + net->handler.uuid = &tbnet_svc_uuid; + net->handler.callback = tbnet_handle_packet, + net->handler.data = net; + tb_register_protocol_handler(&net->handler); + + tb_service_set_drvdata(svc, net); + + ret = register_netdev(dev); + if (ret) { + tb_unregister_protocol_handler(&net->handler); + free_netdev(dev); + return ret; + } + + return 0; +} + +static void tbnet_remove(struct tb_service *svc) +{ + struct tbnet *net = tb_service_get_drvdata(svc); + + unregister_netdev(net->dev); + tb_unregister_protocol_handler(&net->handler); + free_netdev(net->dev); +} + +static void tbnet_shutdown(struct tb_service *svc) +{ + tbnet_tear_down(tb_service_get_drvdata(svc), true); +} + +static int __maybe_unused tbnet_suspend(struct device *dev) +{ + struct tb_service *svc = tb_to_service(dev); + struct tbnet *net = tb_service_get_drvdata(svc); + + stop_login(net); + if (netif_running(net->dev)) { + netif_device_detach(net->dev); + tb_ring_stop(net->rx_ring.ring); + tb_ring_stop(net->tx_ring.ring); + tbnet_free_buffers(&net->rx_ring); + tbnet_free_buffers(&net->tx_ring); + } + + return 0; +} + +static int __maybe_unused tbnet_resume(struct device *dev) +{ + struct tb_service *svc = tb_to_service(dev); + struct tbnet *net = tb_service_get_drvdata(svc); + + netif_carrier_off(net->dev); + if (netif_running(net->dev)) { + netif_device_attach(net->dev); + start_login(net); + } + + return 0; +} + +static const struct dev_pm_ops tbnet_pm_ops = { + SET_SYSTEM_SLEEP_PM_OPS(tbnet_suspend, tbnet_resume) +}; + +static const struct tb_service_id tbnet_ids[] = { + { TB_SERVICE("network", 1) }, + { }, +}; +MODULE_DEVICE_TABLE(tbsvc, tbnet_ids); + +static struct tb_service_driver tbnet_driver = { + .driver = { + .owner = THIS_MODULE, + .name = "thunderbolt-net", + .pm = &tbnet_pm_ops, + }, + .probe = tbnet_probe, + .remove = tbnet_remove, + .shutdown = tbnet_shutdown, + .id_table = tbnet_ids, +}; + +static int __init tbnet_init(void) +{ + int ret; + + tbnet_dir = tb_property_create_dir(&tbnet_dir_uuid); + if (!tbnet_dir) + return -ENOMEM; + + tb_property_add_immediate(tbnet_dir, "prtcid", 1); + tb_property_add_immediate(tbnet_dir, "prtcvers", 1); + tb_property_add_immediate(tbnet_dir, "prtcrevs", 1); + tb_property_add_immediate(tbnet_dir, "prtcstns", + TBNET_MATCH_FRAGS_ID); + + ret = tb_register_property_dir("network", tbnet_dir); + if (ret) { + tb_property_free_dir(tbnet_dir); + return ret; + } + + return tb_register_service_driver(&tbnet_driver); +} +module_init(tbnet_init); + +static void __exit tbnet_exit(void) +{ + tb_unregister_service_driver(&tbnet_driver); + tb_unregister_property_dir("network", tbnet_dir); + tb_property_free_dir(tbnet_dir); +} +module_exit(tbnet_exit); + +MODULE_AUTHOR("Amir Levy "); +MODULE_AUTHOR("Michael Jamet "); +MODULE_AUTHOR("Mika Westerberg "); +MODULE_DESCRIPTION("Thunderbolt network driver"); +MODULE_LICENSE("GPL v2"); -- cgit v1.2.3 From c024297e7be34b6f0e578afc76c5b9f7708a5832 Mon Sep 17 00:00:00 2001 From: Mika Westerberg Date: Mon, 2 Oct 2017 13:38:46 +0300 Subject: MAINTAINERS: Add entry for Thunderbolt network driver I will be maintaining the Thunderbolt network driver along with Michael and Yehezkel. Signed-off-by: Michael Jamet Signed-off-by: Yehezkel Bernat Signed-off-by: Mika Westerberg Signed-off-by: David S. Miller --- MAINTAINERS | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/MAINTAINERS b/MAINTAINERS index c1c90d962012..5231392cf4bd 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -13280,6 +13280,14 @@ S: Maintained F: drivers/thunderbolt/ F: include/linux/thunderbolt.h +THUNDERBOLT NETWORK DRIVER +M: Michael Jamet +M: Mika Westerberg +M: Yehezkel Bernat +L: netdev@vger.kernel.org +S: Maintained +F: drivers/net/thunderbolt.c + THUNDERX GPIO DRIVER M: David Daney S: Maintained -- cgit v1.2.3 From e50d5751c807853cd0fce0b5c46479cc6274014f Mon Sep 17 00:00:00 2001 From: Shannon Nelson Date: Mon, 24 Jul 2017 18:17:42 -0700 Subject: i40e: limit lan queue count in large CPU count machine When a machine has more CPUs than queue pairs, e.g. 512 cores, the counting gets a little funky and turns off Flow Director with the message: not enough queues for Flow Director. Flow Director feature is disabled This patch limits the number of lan queues initially allocated to be sure we have some left for FD and other features. Signed-off-by: Shannon Nelson Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/i40e/i40e_main.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c index 47f71d7c3ae0..387f0863f794 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_main.c +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c @@ -11093,6 +11093,7 @@ static int i40e_setup_pf_switch(struct i40e_pf *pf, bool reinit) static void i40e_determine_queue_usage(struct i40e_pf *pf) { int queues_left; + int q_max; pf->num_lan_qps = 0; @@ -11139,10 +11140,12 @@ static void i40e_determine_queue_usage(struct i40e_pf *pf) I40E_FLAG_DCB_ENABLED); dev_info(&pf->pdev->dev, "not enough queues for DCB. DCB is disabled.\n"); } - pf->num_lan_qps = max_t(int, pf->rss_size_max, - num_online_cpus()); - pf->num_lan_qps = min_t(int, pf->num_lan_qps, - pf->hw.func_caps.num_tx_qp); + + /* limit lan qps to the smaller of qps, cpus or msix */ + q_max = max_t(int, pf->rss_size_max, num_online_cpus()); + q_max = min_t(int, q_max, pf->hw.func_caps.num_tx_qp); + q_max = min_t(int, q_max, pf->hw.func_caps.num_msix_vectors); + pf->num_lan_qps = q_max; queues_left -= pf->num_lan_qps; } -- cgit v1.2.3 From 5872866e166c38ad1c1028fb9cf7dd756c0ef43e Mon Sep 17 00:00:00 2001 From: Lihong Yang Date: Thu, 27 Jul 2017 03:17:09 -0700 Subject: i40e: remove logically dead code This patch removes the !vf condition check that cannot be true in i40e_ndo_set_vf_trust function Detected by CoverityScan, CID 1397531 Logically dead code Signed-off-by: Lihong Yang Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c index a75396c157d9..e6b95e1e1a33 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c +++ b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c @@ -3354,8 +3354,6 @@ int i40e_ndo_set_vf_trust(struct net_device *netdev, int vf_id, bool setting) vf = &pf->vf[vf_id]; - if (!vf) - return -EINVAL; if (setting == vf->trusted) goto out; -- cgit v1.2.3 From 54902349ee95045b67e2f0c39b75f5418540064b Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Sun, 6 Aug 2017 23:37:01 +0200 Subject: i40e: Fix a potential NULL pointer dereference If 'kzalloc()' fails, a NULL pointer will be dereferenced. Return an error code (-ENOMEM) instead. Signed-off-by: Christophe JAILLET Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c index e6b95e1e1a33..9e3667fc7f6a 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c +++ b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c @@ -423,6 +423,9 @@ static int i40e_config_iwarp_qvlist(struct i40e_vf *vf, (sizeof(struct virtchnl_iwarp_qv_info) * (qvlist_info->num_vectors - 1)); vf->qvlist_info = kzalloc(size, GFP_KERNEL); + if (!vf->qvlist_info) + return -ENOMEM; + vf->qvlist_info->num_vectors = qvlist_info->num_vectors; msix_vf = pf->hw.func_caps.num_msix_vectors_vf; -- cgit v1.2.3 From d60bcc798000e015940fb47eb23b79dd2fda5c9e Mon Sep 17 00:00:00 2001 From: Filip Sadowski Date: Tue, 22 Aug 2017 06:57:43 -0400 Subject: i40e: Fix reporting of supported link modes This patch fixes incorrect reporting of supported link modes on some NICs. Signed-off-by: Filip Sadowski Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/i40e/i40e_adminq_cmd.h | 20 ++++++++++++++++++-- drivers/net/ethernet/intel/i40e/i40e_common.c | 11 ++++++++++- drivers/net/ethernet/intel/i40evf/i40e_adminq_cmd.h | 20 ++++++++++++++++++-- 3 files changed, 46 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/intel/i40e/i40e_adminq_cmd.h b/drivers/net/ethernet/intel/i40e/i40e_adminq_cmd.h index e2a9ec80a623..5d0291c1337e 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_adminq_cmd.h +++ b/drivers/net/ethernet/intel/i40e/i40e_adminq_cmd.h @@ -1734,6 +1734,8 @@ enum i40e_aq_phy_type { I40E_PHY_TYPE_10GBASE_CR1_CU = 0xB, I40E_PHY_TYPE_10GBASE_AOC = 0xC, I40E_PHY_TYPE_40GBASE_AOC = 0xD, + I40E_PHY_TYPE_UNRECOGNIZED = 0xE, + I40E_PHY_TYPE_UNSUPPORTED = 0xF, I40E_PHY_TYPE_100BASE_TX = 0x11, I40E_PHY_TYPE_1000BASE_T = 0x12, I40E_PHY_TYPE_10GBASE_T = 0x13, @@ -1752,6 +1754,8 @@ enum i40e_aq_phy_type { I40E_PHY_TYPE_25GBASE_CR = 0x20, I40E_PHY_TYPE_25GBASE_SR = 0x21, I40E_PHY_TYPE_25GBASE_LR = 0x22, + I40E_PHY_TYPE_EMPTY = 0xFE, + I40E_PHY_TYPE_DEFAULT = 0xFF, I40E_PHY_TYPE_MAX }; @@ -1942,19 +1946,31 @@ struct i40e_aqc_get_link_status { #define I40E_AQ_25G_SERDES_UCODE_ERR 0X04 #define I40E_AQ_25G_NIMB_UCODE_ERR 0X05 u8 loopback; /* use defines from i40e_aqc_set_lb_mode */ +/* Since firmware API 1.7 loopback field keeps power class info as well */ +#define I40E_AQ_LOOPBACK_MASK 0x07 +#define I40E_AQ_PWR_CLASS_SHIFT_LB 6 +#define I40E_AQ_PWR_CLASS_MASK_LB (0x03 << I40E_AQ_PWR_CLASS_SHIFT_LB) __le16 max_frame_size; u8 config; #define I40E_AQ_CONFIG_FEC_KR_ENA 0x01 #define I40E_AQ_CONFIG_FEC_RS_ENA 0x02 #define I40E_AQ_CONFIG_CRC_ENA 0x04 #define I40E_AQ_CONFIG_PACING_MASK 0x78 - u8 power_desc; + union { + struct { + u8 power_desc; #define I40E_AQ_LINK_POWER_CLASS_1 0x00 #define I40E_AQ_LINK_POWER_CLASS_2 0x01 #define I40E_AQ_LINK_POWER_CLASS_3 0x02 #define I40E_AQ_LINK_POWER_CLASS_4 0x03 #define I40E_AQ_PWR_CLASS_MASK 0x03 - u8 reserved[4]; + u8 reserved[4]; + }; + struct { + u8 link_type[4]; + u8 link_type_ext; + }; + }; }; I40E_CHECK_CMD_LENGTH(i40e_aqc_get_link_status); diff --git a/drivers/net/ethernet/intel/i40e/i40e_common.c b/drivers/net/ethernet/intel/i40e/i40e_common.c index 7346d8850c8e..64c15f4c9d2b 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_common.c +++ b/drivers/net/ethernet/intel/i40e/i40e_common.c @@ -1821,7 +1821,7 @@ i40e_status i40e_aq_get_link_info(struct i40e_hw *hw, hw_link_info->fec_info = resp->config & (I40E_AQ_CONFIG_FEC_KR_ENA | I40E_AQ_CONFIG_FEC_RS_ENA); hw_link_info->ext_info = resp->ext_info; - hw_link_info->loopback = resp->loopback; + hw_link_info->loopback = resp->loopback & I40E_AQ_LOOPBACK_MASK; hw_link_info->max_frame_size = le16_to_cpu(resp->max_frame_size); hw_link_info->pacing = resp->config & I40E_AQ_CONFIG_PACING_MASK; @@ -1852,6 +1852,15 @@ i40e_status i40e_aq_get_link_info(struct i40e_hw *hw, hw->aq.fw_min_ver < 40)) && hw_link_info->phy_type == 0xE) hw_link_info->phy_type = I40E_PHY_TYPE_10GBASE_SFPP_CU; + if (hw->aq.api_maj_ver == I40E_FW_API_VERSION_MAJOR && + hw->aq.api_min_ver >= 7) { + __le32 tmp; + + memcpy(&tmp, resp->link_type, sizeof(tmp)); + hw->phy.phy_types = le32_to_cpu(tmp); + hw->phy.phy_types |= ((u64)resp->link_type_ext << 32); + } + /* save link status information */ if (link) *link = *hw_link_info; diff --git a/drivers/net/ethernet/intel/i40evf/i40e_adminq_cmd.h b/drivers/net/ethernet/intel/i40evf/i40e_adminq_cmd.h index f9f48d1900b0..709d114fc305 100644 --- a/drivers/net/ethernet/intel/i40evf/i40e_adminq_cmd.h +++ b/drivers/net/ethernet/intel/i40evf/i40e_adminq_cmd.h @@ -1730,6 +1730,8 @@ enum i40e_aq_phy_type { I40E_PHY_TYPE_10GBASE_CR1_CU = 0xB, I40E_PHY_TYPE_10GBASE_AOC = 0xC, I40E_PHY_TYPE_40GBASE_AOC = 0xD, + I40E_PHY_TYPE_UNRECOGNIZED = 0xE, + I40E_PHY_TYPE_UNSUPPORTED = 0xF, I40E_PHY_TYPE_100BASE_TX = 0x11, I40E_PHY_TYPE_1000BASE_T = 0x12, I40E_PHY_TYPE_10GBASE_T = 0x13, @@ -1748,6 +1750,8 @@ enum i40e_aq_phy_type { I40E_PHY_TYPE_25GBASE_CR = 0x20, I40E_PHY_TYPE_25GBASE_SR = 0x21, I40E_PHY_TYPE_25GBASE_LR = 0x22, + I40E_PHY_TYPE_EMPTY = 0xFE, + I40E_PHY_TYPE_DEFAULT = 0xFF, I40E_PHY_TYPE_MAX }; @@ -1938,19 +1942,31 @@ struct i40e_aqc_get_link_status { #define I40E_AQ_25G_SERDES_UCODE_ERR 0X04 #define I40E_AQ_25G_NIMB_UCODE_ERR 0X05 u8 loopback; /* use defines from i40e_aqc_set_lb_mode */ +/* Since firmware API 1.7 loopback field keeps power class info as well */ +#define I40E_AQ_LOOPBACK_MASK 0x07 +#define I40E_AQ_PWR_CLASS_SHIFT_LB 6 +#define I40E_AQ_PWR_CLASS_MASK_LB (0x03 << I40E_AQ_PWR_CLASS_SHIFT_LB) __le16 max_frame_size; u8 config; #define I40E_AQ_CONFIG_FEC_KR_ENA 0x01 #define I40E_AQ_CONFIG_FEC_RS_ENA 0x02 #define I40E_AQ_CONFIG_CRC_ENA 0x04 #define I40E_AQ_CONFIG_PACING_MASK 0x78 - u8 power_desc; + union { + struct { + u8 power_desc; #define I40E_AQ_LINK_POWER_CLASS_1 0x00 #define I40E_AQ_LINK_POWER_CLASS_2 0x01 #define I40E_AQ_LINK_POWER_CLASS_3 0x02 #define I40E_AQ_LINK_POWER_CLASS_4 0x03 #define I40E_AQ_PWR_CLASS_MASK 0x03 - u8 reserved[4]; + u8 reserved[4]; + }; + struct { + u8 link_type[4]; + u8 link_type_ext; + }; + }; }; I40E_CHECK_CMD_LENGTH(i40e_aqc_get_link_status); -- cgit v1.2.3 From 9c0e5caf6398d6b892dc5046c421890e32ab5fa3 Mon Sep 17 00:00:00 2001 From: Filip Sadowski Date: Tue, 22 Aug 2017 06:57:44 -0400 Subject: i40e: Add support for 'ethtool -m' This patch adds support for 'ethtool -m' command which displays information about (Q)SFP+ module plugged into NIC's cage. Signed-off-by: Filip Sadowski Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/i40e/i40e_adminq_cmd.h | 18 +++ drivers/net/ethernet/intel/i40e/i40e_common.c | 69 +++++++++ drivers/net/ethernet/intel/i40e/i40e_ethtool.c | 154 +++++++++++++++++++++ drivers/net/ethernet/intel/i40e/i40e_prototype.h | 9 ++ drivers/net/ethernet/intel/i40e/i40e_type.h | 13 ++ .../net/ethernet/intel/i40evf/i40e_adminq_cmd.h | 18 +++ drivers/net/ethernet/intel/i40evf/i40e_common.c | 69 +++++++++ drivers/net/ethernet/intel/i40evf/i40e_prototype.h | 9 ++ drivers/net/ethernet/intel/i40evf/i40e_type.h | 12 ++ 9 files changed, 371 insertions(+) diff --git a/drivers/net/ethernet/intel/i40e/i40e_adminq_cmd.h b/drivers/net/ethernet/intel/i40e/i40e_adminq_cmd.h index 5d0291c1337e..ed7bbe14bc6e 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_adminq_cmd.h +++ b/drivers/net/ethernet/intel/i40e/i40e_adminq_cmd.h @@ -244,6 +244,8 @@ enum i40e_admin_queue_opc { i40e_aqc_opc_set_phy_debug = 0x0622, i40e_aqc_opc_upload_ext_phy_fm = 0x0625, i40e_aqc_opc_run_phy_activity = 0x0626, + i40e_aqc_opc_set_phy_register = 0x0628, + i40e_aqc_opc_get_phy_register = 0x0629, /* NVM commands */ i40e_aqc_opc_nvm_read = 0x0701, @@ -2053,6 +2055,22 @@ struct i40e_aqc_run_phy_activity { I40E_CHECK_CMD_LENGTH(i40e_aqc_run_phy_activity); +/* Set PHY Register command (0x0628) */ +/* Get PHY Register command (0x0629) */ +struct i40e_aqc_phy_register_access { + u8 phy_interface; +#define I40E_AQ_PHY_REG_ACCESS_INTERNAL 0 +#define I40E_AQ_PHY_REG_ACCESS_EXTERNAL 1 +#define I40E_AQ_PHY_REG_ACCESS_EXTERNAL_MODULE 2 + u8 dev_address; + u8 reserved1[2]; + __le32 reg_address; + __le32 reg_value; + u8 reserved2[4]; +}; + +I40E_CHECK_CMD_LENGTH(i40e_aqc_phy_register_access); + /* NVM Read command (indirect 0x0701) * NVM Erase commands (direct 0x0702) * NVM Update commands (indirect 0x0703) diff --git a/drivers/net/ethernet/intel/i40e/i40e_common.c b/drivers/net/ethernet/intel/i40e/i40e_common.c index 64c15f4c9d2b..fada03799850 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_common.c +++ b/drivers/net/ethernet/intel/i40e/i40e_common.c @@ -5062,6 +5062,75 @@ do_retry: wr32(hw, reg_addr, reg_val); } +/** + * i40e_aq_set_phy_register + * @hw: pointer to the hw struct + * @phy_select: select which phy should be accessed + * @dev_addr: PHY device address + * @reg_addr: PHY register address + * @reg_val: new register value + * @cmd_details: pointer to command details structure or NULL + * + * Write the external PHY register. + **/ +i40e_status i40e_aq_set_phy_register(struct i40e_hw *hw, + u8 phy_select, u8 dev_addr, + u32 reg_addr, u32 reg_val, + struct i40e_asq_cmd_details *cmd_details) +{ + struct i40e_aq_desc desc; + struct i40e_aqc_phy_register_access *cmd = + (struct i40e_aqc_phy_register_access *)&desc.params.raw; + i40e_status status; + + i40e_fill_default_direct_cmd_desc(&desc, + i40e_aqc_opc_set_phy_register); + + cmd->phy_interface = phy_select; + cmd->dev_address = dev_addr; + cmd->reg_address = cpu_to_le32(reg_addr); + cmd->reg_value = cpu_to_le32(reg_val); + + status = i40e_asq_send_command(hw, &desc, NULL, 0, cmd_details); + + return status; +} + +/** + * i40e_aq_get_phy_register + * @hw: pointer to the hw struct + * @phy_select: select which phy should be accessed + * @dev_addr: PHY device address + * @reg_addr: PHY register address + * @reg_val: read register value + * @cmd_details: pointer to command details structure or NULL + * + * Read the external PHY register. + **/ +i40e_status i40e_aq_get_phy_register(struct i40e_hw *hw, + u8 phy_select, u8 dev_addr, + u32 reg_addr, u32 *reg_val, + struct i40e_asq_cmd_details *cmd_details) +{ + struct i40e_aq_desc desc; + struct i40e_aqc_phy_register_access *cmd = + (struct i40e_aqc_phy_register_access *)&desc.params.raw; + i40e_status status; + + i40e_fill_default_direct_cmd_desc(&desc, + i40e_aqc_opc_get_phy_register); + + cmd->phy_interface = phy_select; + cmd->dev_address = dev_addr; + cmd->reg_address = cpu_to_le32(reg_addr); + + status = i40e_asq_send_command(hw, &desc, NULL, 0, cmd_details); + if (!status) + *reg_val = le32_to_cpu(cmd->reg_value); + + return status; +} + /** * i40e_aq_write_ppp - Write pipeline personalization profile (ppp) * @hw: pointer to the hw struct diff --git a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c index 05e89864f781..1136d02e2e95 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c +++ b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c @@ -4196,6 +4196,158 @@ flags_complete: return 0; } +/** + * i40e_get_module_info - get (Q)SFP+ module type info + * @netdev: network interface device structure + * @modinfo: module EEPROM size and layout information structure + **/ +static int i40e_get_module_info(struct net_device *netdev, + struct ethtool_modinfo *modinfo) +{ + struct i40e_netdev_priv *np = netdev_priv(netdev); + struct i40e_vsi *vsi = np->vsi; + struct i40e_pf *pf = vsi->back; + struct i40e_hw *hw = &pf->hw; + u32 sff8472_comp = 0; + u32 sff8472_swap = 0; + u32 sff8636_rev = 0; + i40e_status status; + u32 type = 0; + + /* Check if firmware supports reading module EEPROM. */ + if (!(hw->flags & I40E_HW_FLAG_AQ_PHY_ACCESS_CAPABLE)) { + netdev_err(vsi->netdev, "Module EEPROM memory read not supported. Please update the NVM image.\n"); + return -EINVAL; + } + + status = i40e_update_link_info(hw); + if (status) + return -EIO; + + if (hw->phy.link_info.phy_type == I40E_PHY_TYPE_EMPTY) { + netdev_err(vsi->netdev, "Cannot read module EEPROM memory. No module connected.\n"); + return -EINVAL; + } + + type = hw->phy.link_info.module_type[0]; + + switch (type) { + case I40E_MODULE_TYPE_SFP: + status = i40e_aq_get_phy_register(hw, + I40E_AQ_PHY_REG_ACCESS_EXTERNAL_MODULE, + I40E_I2C_EEPROM_DEV_ADDR, + I40E_MODULE_SFF_8472_COMP, + &sff8472_comp, NULL); + if (status) + return -EIO; + + status = i40e_aq_get_phy_register(hw, + I40E_AQ_PHY_REG_ACCESS_EXTERNAL_MODULE, + I40E_I2C_EEPROM_DEV_ADDR, + I40E_MODULE_SFF_8472_SWAP, + &sff8472_swap, NULL); + if (status) + return -EIO; + + /* Check if the module requires address swap to access + * the other EEPROM memory page. + */ + if (sff8472_swap & I40E_MODULE_SFF_ADDR_MODE) { + netdev_warn(vsi->netdev, "Module address swap to access page 0xA2 is not supported.\n"); + modinfo->type = ETH_MODULE_SFF_8079; + modinfo->eeprom_len = ETH_MODULE_SFF_8079_LEN; + } else if (sff8472_comp == 0x00) { + /* Module is not SFF-8472 compliant */ + modinfo->type = ETH_MODULE_SFF_8079; + modinfo->eeprom_len = ETH_MODULE_SFF_8079_LEN; + } else { + modinfo->type = ETH_MODULE_SFF_8472; + modinfo->eeprom_len = ETH_MODULE_SFF_8472_LEN; + } + break; + case I40E_MODULE_TYPE_QSFP_PLUS: + /* Read from memory page 0. */ + status = i40e_aq_get_phy_register(hw, + I40E_AQ_PHY_REG_ACCESS_EXTERNAL_MODULE, + 0, + I40E_MODULE_REVISION_ADDR, + &sff8636_rev, NULL); + if (status) + return -EIO; + /* Determine revision compliance byte */ + if (sff8636_rev > 0x02) { + /* Module is SFF-8636 compliant */ + modinfo->type = ETH_MODULE_SFF_8636; + modinfo->eeprom_len = I40E_MODULE_QSFP_MAX_LEN; + } else { + modinfo->type = ETH_MODULE_SFF_8436; + modinfo->eeprom_len = I40E_MODULE_QSFP_MAX_LEN; + } + break; + case I40E_MODULE_TYPE_QSFP28: + modinfo->type = ETH_MODULE_SFF_8636; + modinfo->eeprom_len = I40E_MODULE_QSFP_MAX_LEN; + break; + default: + netdev_err(vsi->netdev, "Module type unrecognized\n"); + return -EINVAL; + } + return 0; +} + +/** + * i40e_get_module_eeprom - fills buffer with (Q)SFP+ module memory contents + * @netdev: network interface device structure + * @ee: EEPROM dump request structure + * @data: buffer to be filled with EEPROM contents + **/ +static int i40e_get_module_eeprom(struct net_device *netdev, + struct ethtool_eeprom *ee, + u8 *data) +{ + struct i40e_netdev_priv *np = netdev_priv(netdev); + struct i40e_vsi *vsi = np->vsi; + struct i40e_pf *pf = vsi->back; + struct i40e_hw *hw = &pf->hw; + bool is_sfp = false; + i40e_status status; + u32 value = 0; + int i; + + if (!ee || !ee->len || !data) + return -EINVAL; + + if (hw->phy.link_info.module_type[0] == I40E_MODULE_TYPE_SFP) + is_sfp = true; + + for (i = 0; i < ee->len; i++) { + u32 offset = i + ee->offset; + u32 addr = is_sfp ? I40E_I2C_EEPROM_DEV_ADDR : 0; + + /* Check if we need to access the other memory page */ + if (is_sfp) { + if (offset >= ETH_MODULE_SFF_8079_LEN) { + offset -= ETH_MODULE_SFF_8079_LEN; + addr = I40E_I2C_EEPROM_DEV_ADDR2; + } + } else { + while (offset >= ETH_MODULE_SFF_8436_LEN) { + /* Compute memory page number and offset. */ + offset -= ETH_MODULE_SFF_8436_LEN / 2; + addr++; + } + } + + status = i40e_aq_get_phy_register(hw, + I40E_AQ_PHY_REG_ACCESS_EXTERNAL_MODULE, + addr, offset, &value, NULL); + if (status) + return -EIO; + data[i] = value; + } + return 0; +} + static const struct ethtool_ops i40e_ethtool_ops = { .get_drvinfo = i40e_get_drvinfo, .get_regs_len = i40e_get_regs_len, @@ -4228,6 +4380,8 @@ static const struct ethtool_ops i40e_ethtool_ops = { .set_rxfh = i40e_set_rxfh, .get_channels = i40e_get_channels, .set_channels = i40e_set_channels, + .get_module_info = i40e_get_module_info, + .get_module_eeprom = i40e_get_module_eeprom, .get_ts_info = i40e_get_ts_info, .get_priv_flags = i40e_get_priv_flags, .set_priv_flags = i40e_set_priv_flags, diff --git a/drivers/net/ethernet/intel/i40e/i40e_prototype.h b/drivers/net/ethernet/intel/i40e/i40e_prototype.h index a39b13197891..01502561035c 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_prototype.h +++ b/drivers/net/ethernet/intel/i40e/i40e_prototype.h @@ -360,6 +360,15 @@ i40e_status i40e_aq_rx_ctl_write_register(struct i40e_hw *hw, u32 reg_addr, u32 reg_val, struct i40e_asq_cmd_details *cmd_details); void i40e_write_rx_ctl(struct i40e_hw *hw, u32 reg_addr, u32 reg_val); +i40e_status i40e_aq_set_phy_register(struct i40e_hw *hw, + u8 phy_select, u8 dev_addr, + u32 reg_addr, u32 reg_val, + struct i40e_asq_cmd_details *cmd_details); +i40e_status i40e_aq_get_phy_register(struct i40e_hw *hw, + u8 phy_select, u8 dev_addr, + u32 reg_addr, u32 *reg_val, + struct i40e_asq_cmd_details *cmd_details); + i40e_status i40e_read_phy_register_clause22(struct i40e_hw *hw, u16 reg, u8 phy_addr, u16 *value); i40e_status i40e_write_phy_register_clause22(struct i40e_hw *hw, diff --git a/drivers/net/ethernet/intel/i40e/i40e_type.h b/drivers/net/ethernet/intel/i40e/i40e_type.h index fd4bbdd88b57..8b0b9f826b7f 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_type.h +++ b/drivers/net/ethernet/intel/i40e/i40e_type.h @@ -428,6 +428,18 @@ struct i40e_nvm_access { u8 data[1]; }; +/* (Q)SFP module access definitions */ +#define I40E_I2C_EEPROM_DEV_ADDR 0xA0 +#define I40E_I2C_EEPROM_DEV_ADDR2 0xA2 +#define I40E_MODULE_TYPE_ADDR 0x00 +#define I40E_MODULE_REVISION_ADDR 0x01 +#define I40E_MODULE_SFF_8472_COMP 0x5E +#define I40E_MODULE_SFF_8472_SWAP 0x5C +#define I40E_MODULE_SFF_ADDR_MODE 0x04 +#define I40E_MODULE_TYPE_QSFP_PLUS 0x0D +#define I40E_MODULE_TYPE_QSFP28 0x11 +#define I40E_MODULE_QSFP_MAX_LEN 640 + /* PCI bus types */ enum i40e_bus_type { i40e_bus_type_unknown = 0, @@ -598,6 +610,7 @@ struct i40e_hw { struct i40e_dcbx_config desired_dcbx_config; /* CEE Desired Cfg */ #define I40E_HW_FLAG_AQ_SRCTL_ACCESS_ENABLE BIT_ULL(0) +#define I40E_HW_FLAG_AQ_PHY_ACCESS_CAPABLE BIT_ULL(2) u64 flags; /* debug mask */ diff --git a/drivers/net/ethernet/intel/i40evf/i40e_adminq_cmd.h b/drivers/net/ethernet/intel/i40evf/i40e_adminq_cmd.h index 709d114fc305..eee7ece42b39 100644 --- a/drivers/net/ethernet/intel/i40evf/i40e_adminq_cmd.h +++ b/drivers/net/ethernet/intel/i40evf/i40e_adminq_cmd.h @@ -244,6 +244,8 @@ enum i40e_admin_queue_opc { i40e_aqc_opc_set_phy_debug = 0x0622, i40e_aqc_opc_upload_ext_phy_fm = 0x0625, i40e_aqc_opc_run_phy_activity = 0x0626, + i40e_aqc_opc_set_phy_register = 0x0628, + i40e_aqc_opc_get_phy_register = 0x0629, /* NVM commands */ i40e_aqc_opc_nvm_read = 0x0701, @@ -2046,6 +2048,22 @@ struct i40e_aqc_run_phy_activity { I40E_CHECK_CMD_LENGTH(i40e_aqc_run_phy_activity); +/* Set PHY Register command (0x0628) */ +/* Get PHY Register command (0x0629) */ +struct i40e_aqc_phy_register_access { + u8 phy_interface; +#define I40E_AQ_PHY_REG_ACCESS_INTERNAL 0 +#define I40E_AQ_PHY_REG_ACCESS_EXTERNAL 1 +#define I40E_AQ_PHY_REG_ACCESS_EXTERNAL_MODULE 2 + u8 dev_address; + u8 reserved1[2]; + __le32 reg_address; + __le32 reg_value; + u8 reserved2[4]; +}; + +I40E_CHECK_CMD_LENGTH(i40e_aqc_phy_register_access); + /* NVM Read command (indirect 0x0701) * NVM Erase commands (direct 0x0702) * NVM Update commands (indirect 0x0703) diff --git a/drivers/net/ethernet/intel/i40evf/i40e_common.c b/drivers/net/ethernet/intel/i40evf/i40e_common.c index 8d3a2bfe186a..7d70bf69b249 100644 --- a/drivers/net/ethernet/intel/i40evf/i40e_common.c +++ b/drivers/net/ethernet/intel/i40evf/i40e_common.c @@ -1041,6 +1041,75 @@ do_retry: wr32(hw, reg_addr, reg_val); } +/** + * i40evf_aq_set_phy_register + * @hw: pointer to the hw struct + * @phy_select: select which phy should be accessed + * @dev_addr: PHY device address + * @reg_addr: PHY register address + * @reg_val: new register value + * @cmd_details: pointer to command details structure or NULL + * + * Reset the external PHY. + **/ +i40e_status i40evf_aq_set_phy_register(struct i40e_hw *hw, + u8 phy_select, u8 dev_addr, + u32 reg_addr, u32 reg_val, + struct i40e_asq_cmd_details *cmd_details) +{ + struct i40e_aq_desc desc; + struct i40e_aqc_phy_register_access *cmd = + (struct i40e_aqc_phy_register_access *)&desc.params.raw; + i40e_status status; + + i40evf_fill_default_direct_cmd_desc(&desc, + i40e_aqc_opc_set_phy_register); + + cmd->phy_interface = phy_select; + cmd->dev_address = dev_addr; + cmd->reg_address = cpu_to_le32(reg_addr); + cmd->reg_value = cpu_to_le32(reg_val); + + status = i40evf_asq_send_command(hw, &desc, NULL, 0, cmd_details); + + return status; +} + +/** + * i40evf_aq_get_phy_register + * @hw: pointer to the hw struct + * @phy_select: select which phy should be accessed + * @dev_addr: PHY device address + * @reg_addr: PHY register address + * @reg_val: read register value + * @cmd_details: pointer to command details structure or NULL + * + * Reset the external PHY. + **/ +i40e_status i40evf_aq_get_phy_register(struct i40e_hw *hw, + u8 phy_select, u8 dev_addr, + u32 reg_addr, u32 *reg_val, + struct i40e_asq_cmd_details *cmd_details) +{ + struct i40e_aq_desc desc; + struct i40e_aqc_phy_register_access *cmd = + (struct i40e_aqc_phy_register_access *)&desc.params.raw; + i40e_status status; + + i40evf_fill_default_direct_cmd_desc(&desc, + i40e_aqc_opc_get_phy_register); + + cmd->phy_interface = phy_select; + cmd->dev_address = dev_addr; + cmd->reg_address = cpu_to_le32(reg_addr); + + status = i40evf_asq_send_command(hw, &desc, NULL, 0, cmd_details); + if (!status) + *reg_val = le32_to_cpu(cmd->reg_value); + + return status; +} + /** * i40e_aq_send_msg_to_pf * @hw: pointer to the hardware structure diff --git a/drivers/net/ethernet/intel/i40evf/i40e_prototype.h b/drivers/net/ethernet/intel/i40evf/i40e_prototype.h index c9836bba487d..b624b5994075 100644 --- a/drivers/net/ethernet/intel/i40evf/i40e_prototype.h +++ b/drivers/net/ethernet/intel/i40evf/i40e_prototype.h @@ -111,6 +111,15 @@ i40e_status i40evf_aq_rx_ctl_write_register(struct i40e_hw *hw, u32 reg_addr, u32 reg_val, struct i40e_asq_cmd_details *cmd_details); void i40evf_write_rx_ctl(struct i40e_hw *hw, u32 reg_addr, u32 reg_val); +i40e_status i40e_aq_set_phy_register(struct i40e_hw *hw, + u8 phy_select, u8 dev_addr, + u32 reg_addr, u32 reg_val, + struct i40e_asq_cmd_details *cmd_details); +i40e_status i40e_aq_get_phy_register(struct i40e_hw *hw, + u8 phy_select, u8 dev_addr, + u32 reg_addr, u32 *reg_val, + struct i40e_asq_cmd_details *cmd_details); + i40e_status i40e_read_phy_register(struct i40e_hw *hw, u8 page, u16 reg, u8 phy_addr, u16 *value); i40e_status i40e_write_phy_register(struct i40e_hw *hw, u8 page, diff --git a/drivers/net/ethernet/intel/i40evf/i40e_type.h b/drivers/net/ethernet/intel/i40evf/i40e_type.h index 2ea919d9cdcf..b53584e3d580 100644 --- a/drivers/net/ethernet/intel/i40evf/i40e_type.h +++ b/drivers/net/ethernet/intel/i40evf/i40e_type.h @@ -401,6 +401,18 @@ struct i40e_nvm_access { u8 data[1]; }; +/* (Q)SFP module access definitions */ +#define I40E_I2C_EEPROM_DEV_ADDR 0xA0 +#define I40E_I2C_EEPROM_DEV_ADDR2 0xA2 +#define I40E_MODULE_TYPE_ADDR 0x00 +#define I40E_MODULE_REVISION_ADDR 0x01 +#define I40E_MODULE_SFF_8472_COMP 0x5E +#define I40E_MODULE_SFF_8472_SWAP 0x5C +#define I40E_MODULE_SFF_ADDR_MODE 0x04 +#define I40E_MODULE_TYPE_QSFP_PLUS 0x0D +#define I40E_MODULE_TYPE_QSFP28 0x11 +#define I40E_MODULE_QSFP_MAX_LEN 640 + /* PCI bus types */ enum i40e_bus_type { i40e_bus_type_unknown = 0, -- cgit v1.2.3 From 00f6c2f5e20bbdb638e58c50c6e6b1d8b796d6f6 Mon Sep 17 00:00:00 2001 From: Mariusz Stachura Date: Tue, 22 Aug 2017 06:57:45 -0400 Subject: i40e: use admin queue for setting LEDs behavior Instead of accessing register directly, use newly added AQC in order to blink LEDs. Introduce and utilize a new flag to prevent excessive API version checking. Signed-off-by: Mariusz Stachura Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/i40e/i40e_adminq.c | 6 ++ drivers/net/ethernet/intel/i40e/i40e_common.c | 115 ++++++++++++++++++++------ drivers/net/ethernet/intel/i40evf/i40e_type.h | 2 + 3 files changed, 99 insertions(+), 24 deletions(-) diff --git a/drivers/net/ethernet/intel/i40e/i40e_adminq.c b/drivers/net/ethernet/intel/i40e/i40e_adminq.c index ba04988e0598..08f63226105a 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_adminq.c +++ b/drivers/net/ethernet/intel/i40e/i40e_adminq.c @@ -607,6 +607,12 @@ i40e_status i40e_init_adminq(struct i40e_hw *hw) &oem_lo); hw->nvm.oem_ver = ((u32)oem_hi << 16) | oem_lo; + if (hw->mac.type == I40E_MAC_XL710 && + hw->aq.api_maj_ver == I40E_FW_API_VERSION_MAJOR && + hw->aq.api_min_ver >= I40E_MINOR_VER_GET_LINK_INFO_XL710) { + hw->flags |= I40E_HW_FLAG_AQ_PHY_ACCESS_CAPABLE; + } + if (hw->aq.api_maj_ver > I40E_FW_API_VERSION_MAJOR) { ret_code = I40E_ERR_FIRMWARE_API_VERSION; goto init_adminq_free_arq; diff --git a/drivers/net/ethernet/intel/i40e/i40e_common.c b/drivers/net/ethernet/intel/i40e/i40e_common.c index fada03799850..a4838779de5d 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_common.c +++ b/drivers/net/ethernet/intel/i40e/i40e_common.c @@ -4836,6 +4836,74 @@ phy_blinking_end: return status; } +/** + * i40e_led_get_reg - read LED register + * @hw: pointer to the HW structure + * @led_addr: LED register address + * @reg_val: read register value + **/ +static enum i40e_status_code i40e_led_get_reg(struct i40e_hw *hw, u16 led_addr, + u32 *reg_val) +{ + enum i40e_status_code status; + u8 phy_addr = 0; + u8 port_num; + u32 i; + + *reg_val = 0; + if (hw->flags & I40E_HW_FLAG_AQ_PHY_ACCESS_CAPABLE) { + status = + i40e_aq_get_phy_register(hw, + I40E_AQ_PHY_REG_ACCESS_EXTERNAL, + I40E_PHY_COM_REG_PAGE, + I40E_PHY_LED_PROV_REG_1, + reg_val, NULL); + } else { + i = rd32(hw, I40E_PFGEN_PORTNUM); + port_num = (u8)(i & I40E_PFGEN_PORTNUM_PORT_NUM_MASK); + phy_addr = i40e_get_phy_address(hw, port_num); + status = i40e_read_phy_register_clause45(hw, + I40E_PHY_COM_REG_PAGE, + led_addr, phy_addr, + (u16 *)reg_val); + } + return status; +} + +/** + * i40e_led_set_reg - write LED register + * @hw: pointer to the HW structure + * @led_addr: LED register address + * @reg_val: register value to write + **/ +static enum i40e_status_code i40e_led_set_reg(struct i40e_hw *hw, u16 led_addr, + u32 reg_val) +{ + enum i40e_status_code status; + u8 phy_addr = 0; + u8 port_num; + u32 i; + + if (hw->flags & I40E_HW_FLAG_AQ_PHY_ACCESS_CAPABLE) { + status = + i40e_aq_set_phy_register(hw, + I40E_AQ_PHY_REG_ACCESS_EXTERNAL, + I40E_PHY_COM_REG_PAGE, + I40E_PHY_LED_PROV_REG_1, + reg_val, NULL); + } else { + i = rd32(hw, I40E_PFGEN_PORTNUM); + port_num = (u8)(i & I40E_PFGEN_PORTNUM_PORT_NUM_MASK); + phy_addr = i40e_get_phy_address(hw, port_num); + status = i40e_write_phy_register_clause45(hw, + I40E_PHY_COM_REG_PAGE, + led_addr, phy_addr, + (u16)reg_val); + } + + return status; +} + /** * i40e_led_get_phy - return current on/off mode * @hw: pointer to the hw struct @@ -4853,7 +4921,19 @@ i40e_status i40e_led_get_phy(struct i40e_hw *hw, u16 *led_addr, u16 temp_addr; u8 port_num; u32 i; - + u32 reg_val_aq; + + if (hw->flags & I40E_HW_FLAG_AQ_PHY_ACCESS_CAPABLE) { + status = + i40e_aq_get_phy_register(hw, + I40E_AQ_PHY_REG_ACCESS_EXTERNAL, + I40E_PHY_COM_REG_PAGE, + I40E_PHY_LED_PROV_REG_1, + ®_val_aq, NULL); + if (status == I40E_SUCCESS) + *val = (u16)reg_val_aq; + return status; + } temp_addr = I40E_PHY_LED_PROV_REG_1; i = rd32(hw, I40E_PFGEN_PORTNUM); port_num = (u8)(i & I40E_PFGEN_PORTNUM_PORT_NUM_MASK); @@ -4888,51 +4968,38 @@ i40e_status i40e_led_set_phy(struct i40e_hw *hw, bool on, u16 led_addr, u32 mode) { i40e_status status = 0; - u16 led_ctl = 0; - u16 led_reg = 0; - u8 phy_addr = 0; - u8 port_num; - u32 i; + u32 led_ctl = 0; + u32 led_reg = 0; - i = rd32(hw, I40E_PFGEN_PORTNUM); - port_num = (u8)(i & I40E_PFGEN_PORTNUM_PORT_NUM_MASK); - phy_addr = i40e_get_phy_address(hw, port_num); - status = i40e_read_phy_register_clause45(hw, I40E_PHY_COM_REG_PAGE, - led_addr, phy_addr, &led_reg); + status = i40e_led_get_reg(hw, led_addr, &led_reg); if (status) return status; led_ctl = led_reg; if (led_reg & I40E_PHY_LED_LINK_MODE_MASK) { led_reg = 0; - status = i40e_write_phy_register_clause45(hw, - I40E_PHY_COM_REG_PAGE, - led_addr, phy_addr, - led_reg); + status = i40e_led_set_reg(hw, led_addr, led_reg); if (status) return status; } - status = i40e_read_phy_register_clause45(hw, I40E_PHY_COM_REG_PAGE, - led_addr, phy_addr, &led_reg); + status = i40e_led_get_reg(hw, led_addr, &led_reg); if (status) goto restore_config; if (on) led_reg = I40E_PHY_LED_MANUAL_ON; else led_reg = 0; - status = i40e_write_phy_register_clause45(hw, I40E_PHY_COM_REG_PAGE, - led_addr, phy_addr, led_reg); + + status = i40e_led_set_reg(hw, led_addr, led_reg); if (status) goto restore_config; if (mode & I40E_PHY_LED_MODE_ORIG) { led_ctl = (mode & I40E_PHY_LED_MODE_MASK); - status = i40e_write_phy_register_clause45(hw, - I40E_PHY_COM_REG_PAGE, - led_addr, phy_addr, led_ctl); + status = i40e_led_set_reg(hw, led_addr, led_ctl); } return status; + restore_config: - status = i40e_write_phy_register_clause45(hw, I40E_PHY_COM_REG_PAGE, - led_addr, phy_addr, led_ctl); + status = i40e_led_set_reg(hw, led_addr, led_ctl); return status; } diff --git a/drivers/net/ethernet/intel/i40evf/i40e_type.h b/drivers/net/ethernet/intel/i40evf/i40e_type.h index b53584e3d580..48eacf5e73e4 100644 --- a/drivers/net/ethernet/intel/i40evf/i40e_type.h +++ b/drivers/net/ethernet/intel/i40evf/i40e_type.h @@ -568,6 +568,8 @@ struct i40e_hw { /* LLDP/DCBX Status */ u16 dcbx_status; +#define I40E_HW_FLAG_AQ_PHY_ACCESS_CAPABLE BIT_ULL(2) + /* DCBX info */ struct i40e_dcbx_config local_dcbx_config; /* Oper/Local Cfg */ struct i40e_dcbx_config remote_dcbx_config; /* Peer Cfg */ -- cgit v1.2.3 From ba4e003d29c1d32776f156695fb00adf7df86ee2 Mon Sep 17 00:00:00 2001 From: Jacob Keller Date: Tue, 22 Aug 2017 06:57:46 -0400 Subject: i40e: don't hold spinlock while resetting VF When we refactored handling of the PVID in commit 9af52f60b2d9 ("i40e: use (add|rm)_vlan_all_mac helper functions when changing PVID") we introduced a scheduling while atomic regression. This occurred because we now held the spinlock across a call to i40e_reset_vf(), which results in a usleep_range() call that triggers a scheduling while atomic bug. This was rare as it only occurred if the user configured a VLAN on a VF and also attempted to reconfigure the VF from the host system with a port VLAN. We do need to hold the lock while calling i40e_is_vsi_in_vlan(), but we should not be holding it while we reset the VF. We'll fix this by introducing a separate helper function i40e_vsi_has_vlans which checks whether we have a PVID and whether the VSI has configured VLANs. This helper function will manage its own need for the mac_filter_hash_lock. Then, we can move the acquiring of the spinlock until after we reset the VF, which ensures that we do not sleep while holding the lock. Using a separate function like this makes the code more clear and is easier to read than attempting to release and re-acquire the spinlock when we reset the VF. Signed-off-by: Jacob Keller Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c | 36 +++++++++++++++++++--- 1 file changed, 32 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c index 9e3667fc7f6a..53ead127b293 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c +++ b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c @@ -2925,6 +2925,34 @@ error_param: return ret; } +/** + * i40e_vsi_has_vlans - True if VSI has configured VLANs + * @vsi: pointer to the vsi + * + * Check if a VSI has configured any VLANs. False if we have a port VLAN or if + * we have no configured VLANs. Do not call while holding the + * mac_filter_hash_lock. + */ +static bool i40e_vsi_has_vlans(struct i40e_vsi *vsi) +{ + bool have_vlans; + + /* If we have a port VLAN, then the VSI cannot have any VLANs + * configured, as all MAC/VLAN filters will be assigned to the PVID. + */ + if (vsi->info.pvid) + return false; + + /* Since we don't have a PVID, we know that if the device is in VLAN + * mode it must be because of a VLAN filter configured on this VSI. + */ + spin_lock_bh(&vsi->mac_filter_hash_lock); + have_vlans = i40e_is_vsi_in_vlan(vsi); + spin_unlock_bh(&vsi->mac_filter_hash_lock); + + return have_vlans; +} + /** * i40e_ndo_set_vf_port_vlan * @netdev: network interface device structure @@ -2977,10 +3005,7 @@ int i40e_ndo_set_vf_port_vlan(struct net_device *netdev, int vf_id, /* duplicate request, so just return success */ goto error_pvid; - /* Locked once because multiple functions below iterate list */ - spin_lock_bh(&vsi->mac_filter_hash_lock); - - if (le16_to_cpu(vsi->info.pvid) == 0 && i40e_is_vsi_in_vlan(vsi)) { + if (i40e_vsi_has_vlans(vsi)) { dev_err(&pf->pdev->dev, "VF %d has already configured VLAN filters and the administrator is requesting a port VLAN override.\nPlease unload and reload the VF driver for this change to take effect.\n", vf_id); @@ -2993,6 +3018,9 @@ int i40e_ndo_set_vf_port_vlan(struct net_device *netdev, int vf_id, vsi = pf->vsi[vf->lan_vsi_idx]; } + /* Locked once because multiple functions below iterate list */ + spin_lock_bh(&vsi->mac_filter_hash_lock); + /* Check for condition where there was already a port VLAN ID * filter set and now it is being deleted by setting it to zero. * Additionally check for the condition where there was a port -- cgit v1.2.3 From eeeddbb80640ef63466a54bc118f66c81487bc42 Mon Sep 17 00:00:00 2001 From: Jacob Keller Date: Tue, 22 Aug 2017 06:57:47 -0400 Subject: i40e: drop i40e_pf *pf from i40e_vc_disable_vf() It's never used, and the vf structure could get back to the PF if necessary. Lets just drop the extra unneeded parameter. Signed-off-by: Jacob Keller Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c index 53ead127b293..70a79864177a 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c +++ b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c @@ -154,12 +154,11 @@ void i40e_vc_notify_vf_reset(struct i40e_vf *vf) /** * i40e_vc_disable_vf - * @pf: pointer to the PF info * @vf: pointer to the VF info * * Disable the VF through a SW reset **/ -static inline void i40e_vc_disable_vf(struct i40e_pf *pf, struct i40e_vf *vf) +static inline void i40e_vc_disable_vf(struct i40e_vf *vf) { i40e_vc_notify_vf_reset(vf); i40e_reset_vf(vf, false); @@ -2918,7 +2917,7 @@ int i40e_ndo_set_vf_mac(struct net_device *netdev, int vf_id, u8 *mac) } /* Force the VF driver stop so it has to reload with new MAC address */ - i40e_vc_disable_vf(pf, vf); + i40e_vc_disable_vf(vf); dev_info(&pf->pdev->dev, "Reload the VF driver to make this change effective.\n"); error_param: @@ -3013,7 +3012,7 @@ int i40e_ndo_set_vf_port_vlan(struct net_device *netdev, int vf_id, * the right thing by reconfiguring his network correctly * and then reloading the VF driver. */ - i40e_vc_disable_vf(pf, vf); + i40e_vc_disable_vf(vf); /* During reset the VF got a new VSI, so refresh the pointer. */ vsi = pf->vsi[vf->lan_vsi_idx]; } -- cgit v1.2.3 From f18d20218a14d11d8fd6ed32e66ad199c8c93280 Mon Sep 17 00:00:00 2001 From: Jacob Keller Date: Tue, 22 Aug 2017 06:57:48 -0400 Subject: i40e: make use of i40e_vc_disable_vf Replace i40e_vc_notify_vf_reset and i40e_reset_vf with a call to i40e_vc_disable_vf which does this exact thing. This matches similar code patterns throughout the driver. Signed-off-by: Jacob Keller Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c index 70a79864177a..94ee243f110e 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c +++ b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c @@ -3388,8 +3388,7 @@ int i40e_ndo_set_vf_trust(struct net_device *netdev, int vf_id, bool setting) goto out; vf->trusted = setting; - i40e_vc_notify_vf_reset(vf); - i40e_reset_vf(vf, false); + i40e_vc_disable_vf(vf); dev_info(&pf->pdev->dev, "VF %u is now %strusted\n", vf_id, setting ? "" : "un"); out: -- cgit v1.2.3 From d43d60e5eb9504aa6f8f390aa0313cc8e3816b82 Mon Sep 17 00:00:00 2001 From: Jacob Keller Date: Tue, 22 Aug 2017 06:57:49 -0400 Subject: i40e: ensure reset occurs when disabling VF It is possible although rare that we may not reset when i40e_vc_disable_vf() is called. This can lead to some weird circumstances with some values not being properly set. Modify i40e_reset_vf() to return a code indicating whether it reset or not. Now, i40e_vc_disable_vf() can wait until a reset actually occurs. If it fails to free up within a reasonable time frame we'll display a warning message. Signed-off-by: Jacob Keller Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c | 42 +++++++++++++++++----- drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.h | 4 +-- 2 files changed, 35 insertions(+), 11 deletions(-) diff --git a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c index 94ee243f110e..7742cf3d38d9 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c +++ b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c @@ -156,12 +156,28 @@ void i40e_vc_notify_vf_reset(struct i40e_vf *vf) * i40e_vc_disable_vf * @vf: pointer to the VF info * - * Disable the VF through a SW reset + * Disable the VF through a SW reset. **/ static inline void i40e_vc_disable_vf(struct i40e_vf *vf) { + int i; + i40e_vc_notify_vf_reset(vf); - i40e_reset_vf(vf, false); + + /* We want to ensure that an actual reset occurs initiated after this + * function was called. However, we do not want to wait forever, so + * we'll give a reasonable time and print a message if we failed to + * ensure a reset. + */ + for (i = 0; i < 20; i++) { + if (i40e_reset_vf(vf, false)) + return; + usleep_range(10000, 20000); + } + + dev_warn(&vf->pf->pdev->dev, + "Failed to initiate reset for VF %d after 200 milliseconds\n", + vf->vf_id); } /** @@ -1051,9 +1067,9 @@ static void i40e_cleanup_reset_vf(struct i40e_vf *vf) * @vf: pointer to the VF structure * @flr: VFLR was issued or not * - * reset the VF + * Returns true if the VF is reset, false otherwise. **/ -void i40e_reset_vf(struct i40e_vf *vf, bool flr) +bool i40e_reset_vf(struct i40e_vf *vf, bool flr) { struct i40e_pf *pf = vf->pf; struct i40e_hw *hw = &pf->hw; @@ -1061,9 +1077,11 @@ void i40e_reset_vf(struct i40e_vf *vf, bool flr) u32 reg; int i; - /* If VFs have been disabled, there is no need to reset */ + /* If the VFs have been disabled, this means something else is + * resetting the VF, so we shouldn't continue. + */ if (test_and_set_bit(__I40E_VF_DISABLE, pf->state)) - return; + return false; i40e_trigger_vf_reset(vf, flr); @@ -1100,6 +1118,8 @@ void i40e_reset_vf(struct i40e_vf *vf, bool flr) i40e_flush(hw); clear_bit(__I40E_VF_DISABLE, pf->state); + + return true; } /** @@ -1111,8 +1131,10 @@ void i40e_reset_vf(struct i40e_vf *vf, bool flr) * VF, then do all the waiting in one chunk, and finally finish restoring each * VF after the wait. This is useful during PF routines which need to reset * all VFs, as otherwise it must perform these resets in a serialized fashion. + * + * Returns true if any VFs were reset, and false otherwise. **/ -void i40e_reset_all_vfs(struct i40e_pf *pf, bool flr) +bool i40e_reset_all_vfs(struct i40e_pf *pf, bool flr) { struct i40e_hw *hw = &pf->hw; struct i40e_vf *vf; @@ -1121,11 +1143,11 @@ void i40e_reset_all_vfs(struct i40e_pf *pf, bool flr) /* If we don't have any VFs, then there is nothing to reset */ if (!pf->num_alloc_vfs) - return; + return false; /* If VFs have been disabled, there is no need to reset */ if (test_and_set_bit(__I40E_VF_DISABLE, pf->state)) - return; + return false; /* Begin reset on all VFs at once */ for (v = 0; v < pf->num_alloc_vfs; v++) @@ -1200,6 +1222,8 @@ void i40e_reset_all_vfs(struct i40e_pf *pf, bool flr) i40e_flush(hw); clear_bit(__I40E_VF_DISABLE, pf->state); + + return true; } /** diff --git a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.h b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.h index 5111d05d5f2f..5ea42ad094bc 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.h +++ b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.h @@ -122,8 +122,8 @@ int i40e_alloc_vfs(struct i40e_pf *pf, u16 num_alloc_vfs); int i40e_vc_process_vf_msg(struct i40e_pf *pf, s16 vf_id, u32 v_opcode, u32 v_retval, u8 *msg, u16 msglen); int i40e_vc_process_vflr_event(struct i40e_pf *pf); -void i40e_reset_vf(struct i40e_vf *vf, bool flr); -void i40e_reset_all_vfs(struct i40e_pf *pf, bool flr); +bool i40e_reset_vf(struct i40e_vf *vf, bool flr); +bool i40e_reset_all_vfs(struct i40e_pf *pf, bool flr); void i40e_vc_notify_vf_reset(struct i40e_vf *vf); /* VF configuration related iplink handlers */ -- cgit v1.2.3 From 5b36e8d04b4439c9ceb814bfdfe1284737f9c632 Mon Sep 17 00:00:00 2001 From: Alan Brady Date: Tue, 22 Aug 2017 06:57:50 -0400 Subject: i40evf: Enable VF to request an alternate queue allocation Currently the VF gets a default number of allocated queues from HW on init and it could choose to enable or disable those allocated queues. This makes it such that the VF can request more or less underlying allocated queues from the PF. First the VF negotiates the number of queues it wants that can be supported by the PF and if successful asks for a reset. During reset the PF will reallocate the HW queues for the VF and will then remap the new queues. Signed-off-by: Alan Brady Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/i40evf/i40evf.h | 4 + drivers/net/ethernet/intel/i40evf/i40evf_ethtool.c | 38 ++++++++- drivers/net/ethernet/intel/i40evf/i40evf_main.c | 94 ++++++++++++++++++++-- .../net/ethernet/intel/i40evf/i40evf_virtchnl.c | 44 +++++++++- 4 files changed, 173 insertions(+), 7 deletions(-) diff --git a/drivers/net/ethernet/intel/i40evf/i40evf.h b/drivers/net/ethernet/intel/i40evf/i40evf.h index 82f69031e5cd..5982362c5643 100644 --- a/drivers/net/ethernet/intel/i40evf/i40evf.h +++ b/drivers/net/ethernet/intel/i40evf/i40evf.h @@ -102,6 +102,7 @@ struct i40e_vsi { #define I40E_TX_CTXTDESC(R, i) \ (&(((struct i40e_tx_context_desc *)((R)->desc))[i])) #define MAX_QUEUES 16 +#define I40EVF_MAX_REQ_QUEUES 4 #define I40EVF_HKEY_ARRAY_SIZE ((I40E_VFQF_HKEY_MAX_INDEX + 1) * 4) #define I40EVF_HLUT_ARRAY_SIZE ((I40E_VFQF_HLUT_MAX_INDEX + 1) * 4) @@ -200,6 +201,7 @@ struct i40evf_adapter { struct list_head vlan_filter_list; char misc_vector_name[IFNAMSIZ + 9]; int num_active_queues; + int num_req_queues; /* TX */ struct i40e_ring *tx_rings; @@ -235,6 +237,7 @@ struct i40evf_adapter { #define I40EVF_FLAG_PROMISC_ON BIT(18) #define I40EVF_FLAG_ALLMULTI_ON BIT(19) #define I40EVF_FLAG_LEGACY_RX BIT(20) +#define I40EVF_FLAG_REINIT_ITR_NEEDED BIT(21) /* duplicates for common code */ #define I40E_FLAG_DCB_ENABLED 0 #define I40E_FLAG_RX_CSUM_ENABLED I40EVF_FLAG_RX_CSUM_ENABLED @@ -349,6 +352,7 @@ void i40evf_deconfigure_queues(struct i40evf_adapter *adapter); void i40evf_enable_queues(struct i40evf_adapter *adapter); void i40evf_disable_queues(struct i40evf_adapter *adapter); void i40evf_map_queues(struct i40evf_adapter *adapter); +int i40evf_request_queues(struct i40evf_adapter *adapter, int num); void i40evf_add_ether_addrs(struct i40evf_adapter *adapter); void i40evf_del_ether_addrs(struct i40evf_adapter *adapter); void i40evf_add_vlans(struct i40evf_adapter *adapter); diff --git a/drivers/net/ethernet/intel/i40evf/i40evf_ethtool.c b/drivers/net/ethernet/intel/i40evf/i40evf_ethtool.c index 65874d6b3ab9..da006fa3fec1 100644 --- a/drivers/net/ethernet/intel/i40evf/i40evf_ethtool.c +++ b/drivers/net/ethernet/intel/i40evf/i40evf_ethtool.c @@ -669,7 +669,7 @@ static void i40evf_get_channels(struct net_device *netdev, struct i40evf_adapter *adapter = netdev_priv(netdev); /* Report maximum channels */ - ch->max_combined = adapter->num_active_queues; + ch->max_combined = I40EVF_MAX_REQ_QUEUES; ch->max_other = NONQ_VECS; ch->other_count = NONQ_VECS; @@ -677,6 +677,41 @@ static void i40evf_get_channels(struct net_device *netdev, ch->combined_count = adapter->num_active_queues; } +/** + * i40evf_set_channels: set the new channel count + * @netdev: network interface device structure + * @ch: channel information structure + * + * Negotiate a new number of channels with the PF then do a reset. During + * reset we'll realloc queues and fix the RSS table. Returns 0 on success, + * negative on failure. + **/ +static int i40evf_set_channels(struct net_device *netdev, + struct ethtool_channels *ch) +{ + struct i40evf_adapter *adapter = netdev_priv(netdev); + int num_req = ch->combined_count; + + if (num_req != adapter->num_active_queues && + !(adapter->vf_res->vf_cap_flags & + VIRTCHNL_VF_OFFLOAD_REQ_QUEUES)) { + dev_info(&adapter->pdev->dev, "PF is not capable of queue negotiation.\n"); + return -EINVAL; + } + + /* All of these should have already been checked by ethtool before this + * even gets to us, but just to be sure. + */ + if (num_req <= 0 || num_req > I40EVF_MAX_REQ_QUEUES) + return -EINVAL; + + if (ch->rx_count || ch->tx_count || ch->other_count != NONQ_VECS) + return -EINVAL; + + adapter->num_req_queues = num_req; + return i40evf_request_queues(adapter, num_req); +} + /** * i40evf_get_rxfh_key_size - get the RSS hash key size * @netdev: network interface device structure @@ -785,6 +820,7 @@ static const struct ethtool_ops i40evf_ethtool_ops = { .get_rxfh = i40evf_get_rxfh, .set_rxfh = i40evf_set_rxfh, .get_channels = i40evf_get_channels, + .set_channels = i40evf_set_channels, .get_rxfh_key_size = i40evf_get_rxfh_key_size, .get_link_ksettings = i40evf_get_link_ksettings, }; diff --git a/drivers/net/ethernet/intel/i40evf/i40evf_main.c b/drivers/net/ethernet/intel/i40evf/i40evf_main.c index 69ef6c1d5364..8c513ce84345 100644 --- a/drivers/net/ethernet/intel/i40evf/i40evf_main.c +++ b/drivers/net/ethernet/intel/i40evf/i40evf_main.c @@ -1189,9 +1189,18 @@ static int i40evf_alloc_queues(struct i40evf_adapter *adapter) { int i, num_active_queues; - num_active_queues = min_t(int, - adapter->vsi_res->num_queue_pairs, - (int)(num_online_cpus())); + /* If we're in reset reallocating queues we don't actually know yet for + * certain the PF gave us the number of queues we asked for but we'll + * assume it did. Once basic reset is finished we'll confirm once we + * start negotiating config with PF. + */ + if (adapter->num_req_queues) + num_active_queues = adapter->num_req_queues; + else + num_active_queues = min_t(int, + adapter->vsi_res->num_queue_pairs, + (int)(num_online_cpus())); + adapter->tx_rings = kcalloc(num_active_queues, sizeof(struct i40e_ring), GFP_KERNEL); @@ -1539,6 +1548,48 @@ static void i40evf_free_rss(struct i40evf_adapter *adapter) adapter->rss_lut = NULL; } +/** + * i40evf_reinit_interrupt_scheme - Reallocate queues and vectors + * @adapter: board private structure + * + * Returns 0 on success, negative on failure + **/ +static int i40evf_reinit_interrupt_scheme(struct i40evf_adapter *adapter) +{ + struct net_device *netdev = adapter->netdev; + int err; + + if (netif_running(netdev)) + i40evf_free_traffic_irqs(adapter); + i40evf_free_misc_irq(adapter); + i40evf_reset_interrupt_capability(adapter); + i40evf_free_q_vectors(adapter); + i40evf_free_queues(adapter); + + err = i40evf_init_interrupt_scheme(adapter); + if (err) + goto err; + + netif_tx_stop_all_queues(netdev); + + err = i40evf_request_misc_irq(adapter); + if (err) + goto err; + + set_bit(__I40E_VSI_DOWN, adapter->vsi.state); + + err = i40evf_map_rings_to_vectors(adapter); + if (err) + goto err; + + if (RSS_AQ(adapter)) + adapter->aq_required |= I40EVF_FLAG_AQ_CONFIGURE_RSS; + else + err = i40evf_init_rss(adapter); +err: + return err; +} + /** * i40evf_watchdog_timer - Periodic call-back timer * @data: pointer to adapter disguised as unsigned long @@ -1885,8 +1936,15 @@ continue_reset: if (err) dev_info(&adapter->pdev->dev, "Failed to init adminq: %d\n", err); + adapter->aq_required = 0; - adapter->aq_required = I40EVF_FLAG_AQ_GET_CONFIG; + if (adapter->flags & I40EVF_FLAG_REINIT_ITR_NEEDED) { + err = i40evf_reinit_interrupt_scheme(adapter); + if (err) + goto reset_err; + } + + adapter->aq_required |= I40EVF_FLAG_AQ_GET_CONFIG; adapter->aq_required |= I40EVF_FLAG_AQ_MAP_VECTORS; /* re-add all MAC filters */ @@ -1916,6 +1974,15 @@ continue_reset: if (err) goto reset_err; + if (adapter->flags & I40EVF_FLAG_REINIT_ITR_NEEDED) { + err = i40evf_request_traffic_irqs(adapter, + netdev->name); + if (err) + goto reset_err; + + adapter->flags &= ~I40EVF_FLAG_REINIT_ITR_NEEDED; + } + i40evf_configure(adapter); i40evf_up_complete(adapter); @@ -2431,9 +2498,9 @@ static int i40evf_check_reset_complete(struct i40e_hw *hw) int i40evf_process_config(struct i40evf_adapter *adapter) { struct virtchnl_vf_resource *vfres = adapter->vf_res; + int i, num_req_queues = adapter->num_req_queues; struct net_device *netdev = adapter->netdev; struct i40e_vsi *vsi = &adapter->vsi; - int i; netdev_features_t hw_enc_features; netdev_features_t hw_features; @@ -2447,6 +2514,23 @@ int i40evf_process_config(struct i40evf_adapter *adapter) return -ENODEV; } + if (num_req_queues && + num_req_queues != adapter->vsi_res->num_queue_pairs) { + /* Problem. The PF gave us fewer queues than what we had + * negotiated in our request. Need a reset to see if we can't + * get back to a working state. + */ + dev_err(&adapter->pdev->dev, + "Requested %d queues, but PF only gave us %d.\n", + num_req_queues, + adapter->vsi_res->num_queue_pairs); + adapter->flags |= I40EVF_FLAG_REINIT_ITR_NEEDED; + adapter->num_req_queues = adapter->vsi_res->num_queue_pairs; + i40evf_schedule_reset(adapter); + return -ENODEV; + } + adapter->num_req_queues = 0; + hw_enc_features = NETIF_F_SG | NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM | diff --git a/drivers/net/ethernet/intel/i40evf/i40evf_virtchnl.c b/drivers/net/ethernet/intel/i40evf/i40evf_virtchnl.c index 2bb0fe00361f..2bb81c39d85f 100644 --- a/drivers/net/ethernet/intel/i40evf/i40evf_virtchnl.c +++ b/drivers/net/ethernet/intel/i40evf/i40evf_virtchnl.c @@ -160,7 +160,8 @@ int i40evf_send_vf_config_msg(struct i40evf_adapter *adapter) VIRTCHNL_VF_OFFLOAD_WB_ON_ITR | VIRTCHNL_VF_OFFLOAD_RSS_PCTYPE_V2 | VIRTCHNL_VF_OFFLOAD_ENCAP | - VIRTCHNL_VF_OFFLOAD_ENCAP_CSUM; + VIRTCHNL_VF_OFFLOAD_ENCAP_CSUM | + VIRTCHNL_VF_OFFLOAD_REQ_QUEUES; adapter->current_op = VIRTCHNL_OP_GET_VF_RESOURCES; adapter->aq_required &= ~I40EVF_FLAG_AQ_GET_CONFIG; @@ -384,6 +385,32 @@ void i40evf_map_queues(struct i40evf_adapter *adapter) kfree(vimi); } +/** + * i40evf_request_queues + * @adapter: adapter structure + * @num: number of requested queues + * + * We get a default number of queues from the PF. This enables us to request a + * different number. Returns 0 on success, negative on failure + **/ +int i40evf_request_queues(struct i40evf_adapter *adapter, int num) +{ + struct virtchnl_vf_res_request vfres; + + if (adapter->current_op != VIRTCHNL_OP_UNKNOWN) { + /* bail because we already have a command pending */ + dev_err(&adapter->pdev->dev, "Cannot request queues, command %d pending\n", + adapter->current_op); + return -EBUSY; + } + + vfres.num_queue_pairs = num; + + adapter->current_op = VIRTCHNL_OP_REQUEST_QUEUES; + return i40evf_send_pf_msg(adapter, VIRTCHNL_OP_REQUEST_QUEUES, + (u8 *)&vfres, sizeof(vfres)); +} + /** * i40evf_add_ether_addrs * @adapter: adapter structure @@ -1068,6 +1095,21 @@ void i40evf_virtchnl_completion(struct i40evf_adapter *adapter, "Invalid message %d from PF\n", v_opcode); } break; + case VIRTCHNL_OP_REQUEST_QUEUES: { + struct virtchnl_vf_res_request *vfres = + (struct virtchnl_vf_res_request *)msg; + if (vfres->num_queue_pairs == adapter->num_req_queues) { + adapter->flags |= I40EVF_FLAG_REINIT_ITR_NEEDED; + i40evf_schedule_reset(adapter); + } else { + dev_info(&adapter->pdev->dev, + "Requested %d queues, PF can support %d\n", + adapter->num_req_queues, + vfres->num_queue_pairs); + adapter->num_req_queues = 0; + } + } + break; default: if (adapter->current_op && (v_opcode != adapter->current_op)) dev_warn(&adapter->pdev->dev, "Expected response %d from PF, received %d\n", -- cgit v1.2.3 From 1b7b7596aeebc21913bad49eb6a2c364c4b2988a Mon Sep 17 00:00:00 2001 From: Mitch Williams Date: Tue, 22 Aug 2017 06:57:51 -0400 Subject: i40e: make i40evf_map_rings_to_vectors void This function cannot fail, so why is it returning a value? And why are we checking it? Why shouldn't we just make it void? Why is this commit message made up of only questions? Signed-off-by: Mitch Williams Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/i40evf/i40evf_main.c | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/drivers/net/ethernet/intel/i40evf/i40evf_main.c b/drivers/net/ethernet/intel/i40evf/i40evf_main.c index 8c513ce84345..f2f1e754c2ce 100644 --- a/drivers/net/ethernet/intel/i40evf/i40evf_main.c +++ b/drivers/net/ethernet/intel/i40evf/i40evf_main.c @@ -430,12 +430,11 @@ i40evf_map_vector_to_txq(struct i40evf_adapter *adapter, int v_idx, int t_idx) * group the rings as "efficiently" as possible. You would add new * mapping configurations in here. **/ -static int i40evf_map_rings_to_vectors(struct i40evf_adapter *adapter) +static void i40evf_map_rings_to_vectors(struct i40evf_adapter *adapter) { int rings_remaining = adapter->num_active_queues; int ridx = 0, vidx = 0; int q_vectors; - int err = 0; q_vectors = adapter->num_msix_vectors - NONQ_VECS; @@ -451,8 +450,6 @@ static int i40evf_map_rings_to_vectors(struct i40evf_adapter *adapter) } adapter->aq_required |= I40EVF_FLAG_AQ_MAP_VECTORS; - - return err; } #ifdef CONFIG_NET_POLL_CONTROLLER @@ -1578,9 +1575,7 @@ static int i40evf_reinit_interrupt_scheme(struct i40evf_adapter *adapter) set_bit(__I40E_VSI_DOWN, adapter->vsi.state); - err = i40evf_map_rings_to_vectors(adapter); - if (err) - goto err; + i40evf_map_rings_to_vectors(adapter); if (RSS_AQ(adapter)) adapter->aq_required |= I40EVF_FLAG_AQ_CONFIGURE_RSS; -- cgit v1.2.3 From 41d0a4d0c8b144e44d92ea95e975d2434748d806 Mon Sep 17 00:00:00 2001 From: Alan Brady Date: Tue, 22 Aug 2017 06:57:52 -0400 Subject: i40e: fix handling of vf_states variable Currently we inappropriately clear the vf_states variable with a null assignment. This is problematic because we should be using atomic bitops on this variable and we don't actually want to clear all the flags. We should just clear the ones we know we want to clear. Additionally remove the I40E_VF_STATE_FCOEENA bit because it is no longer being used. Signed-off-by: Alan Brady Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c | 5 ++++- drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.h | 1 - 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c index 7742cf3d38d9..989a65d60ac9 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c +++ b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c @@ -879,7 +879,8 @@ static void i40e_free_vf_res(struct i40e_vf *vf) } /* reset some of the state variables keeping track of the resources */ vf->num_queue_pairs = 0; - vf->vf_states = 0; + clear_bit(I40E_VF_STATE_MC_PROMISC, &vf->vf_states); + clear_bit(I40E_VF_STATE_UC_PROMISC, &vf->vf_states); } /** @@ -1586,6 +1587,8 @@ static int i40e_vc_get_vf_resources_msg(struct i40e_vf *vf, u8 *msg) (vf->driver_caps & VIRTCHNL_VF_OFFLOAD_IWARP)) { vfres->vf_cap_flags |= VIRTCHNL_VF_OFFLOAD_IWARP; set_bit(I40E_VF_STATE_IWARPENA, &vf->vf_states); + } else { + clear_bit(I40E_VF_STATE_IWARPENA, &vf->vf_states); } if (vf->driver_caps & VIRTCHNL_VF_OFFLOAD_RSS_PF) { diff --git a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.h b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.h index 5ea42ad094bc..5efc4f92bb37 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.h +++ b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.h @@ -56,7 +56,6 @@ enum i40e_vf_states { I40E_VF_STATE_INIT = 0, I40E_VF_STATE_ACTIVE, I40E_VF_STATE_IWARPENA, - I40E_VF_STATE_FCOEENA, I40E_VF_STATE_DISABLED, I40E_VF_STATE_MC_PROMISC, I40E_VF_STATE_UC_PROMISC, -- cgit v1.2.3 From c53d11f669c0e7d0daf46a717b6712ad0b09de99 Mon Sep 17 00:00:00 2001 From: Alan Brady Date: Tue, 22 Aug 2017 06:57:53 -0400 Subject: i40e: fix client notify of VF reset Currently there is a bug in which the PF driver fails to inform clients of a VF reset which then causes clients to leak resources. The bug exists because we were incorrectly checking the I40E_VF_STATE_PRE_ENABLE bit. When a VF is first init we go through a reset to initialize variables and allocate resources but we don't want to inform clients of this first reset since the client isn't fully enabled yet so we set a state bit signifying we're in a "pre-enabled" client state. During the first reset we should be clearing the bit, allowing all following resets to notify the client of the reset when the bit is not set. This patch fixes the issue by negating the 'test_and_clear_bit' check to accurately reflect the behavior we want. Signed-off-by: Alan Brady Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c index 989a65d60ac9..04568137e029 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c +++ b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c @@ -1050,8 +1050,8 @@ static void i40e_cleanup_reset_vf(struct i40e_vf *vf) set_bit(I40E_VF_STATE_ACTIVE, &vf->vf_states); clear_bit(I40E_VF_STATE_DISABLED, &vf->vf_states); /* Do not notify the client during VF init */ - if (test_and_clear_bit(I40E_VF_STATE_PRE_ENABLE, - &vf->vf_states)) + if (!test_and_clear_bit(I40E_VF_STATE_PRE_ENABLE, + &vf->vf_states)) i40e_notify_client_of_vf_reset(pf, abs_vf_id); vf->num_vlan = 0; } -- cgit v1.2.3 From ab243ec9401d164531cc9bc07fb32231d72d1280 Mon Sep 17 00:00:00 2001 From: Scott Peterson Date: Tue, 22 Aug 2017 06:57:54 -0400 Subject: i40e: Stop dropping 802.1ad tags - eth proto 0x88a8 Enable i40e to pass traffic with VLAN tags using the 802.1ad ethernet protocol ID (0x88a8). This requires NIC firmware providing version 1.7 of the API. With older NIC firmware 802.1ad tagged packets will continue to be dropped. No VLAN offloads nor RSS are supported for 802.1ad VLANs. Signed-off-by: Scott Peterson Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/i40e/i40e_adminq.c | 6 ++++++ drivers/net/ethernet/intel/i40e/i40e_adminq_cmd.h | 17 ++++++++++++++++- drivers/net/ethernet/intel/i40e/i40e_common.c | 6 +++++- drivers/net/ethernet/intel/i40e/i40e_main.c | 7 +++++++ drivers/net/ethernet/intel/i40e/i40e_type.h | 6 ++++++ drivers/net/ethernet/intel/i40evf/i40e_adminq_cmd.h | 17 ++++++++++++++++- drivers/net/ethernet/intel/i40evf/i40e_type.h | 6 ++++++ 7 files changed, 62 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/intel/i40e/i40e_adminq.c b/drivers/net/ethernet/intel/i40e/i40e_adminq.c index 08f63226105a..9dcb2a961197 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_adminq.c +++ b/drivers/net/ethernet/intel/i40e/i40e_adminq.c @@ -613,6 +613,12 @@ i40e_status i40e_init_adminq(struct i40e_hw *hw) hw->flags |= I40E_HW_FLAG_AQ_PHY_ACCESS_CAPABLE; } + /* The ability to RX (not drop) 802.1ad frames was added in API 1.7 */ + if (hw->aq.api_maj_ver > 1 || + (hw->aq.api_maj_ver == 1 && + hw->aq.api_min_ver >= 7)) + hw->flags |= I40E_HW_FLAG_802_1AD_CAPABLE; + if (hw->aq.api_maj_ver > I40E_FW_API_VERSION_MAJOR) { ret_code = I40E_ERR_FIRMWARE_API_VERSION; goto init_adminq_free_arq; diff --git a/drivers/net/ethernet/intel/i40e/i40e_adminq_cmd.h b/drivers/net/ethernet/intel/i40e/i40e_adminq_cmd.h index ed7bbe14bc6e..4c85ea9cd89a 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_adminq_cmd.h +++ b/drivers/net/ethernet/intel/i40e/i40e_adminq_cmd.h @@ -775,7 +775,22 @@ struct i40e_aqc_set_switch_config { #define I40E_AQ_SET_SWITCH_CFG_PROMISC 0x0001 #define I40E_AQ_SET_SWITCH_CFG_L2_FILTER 0x0002 __le16 valid_flags; - u8 reserved[12]; + /* The ethertype in switch_tag is dropped on ingress and used + * internally by the switch. Set this to zero for the default + * of 0x88a8 (802.1ad). Should be zero for firmware API + * versions lower than 1.7. + */ + __le16 switch_tag; + /* The ethertypes in first_tag and second_tag are used to + * match the outer and inner VLAN tags (respectively) when HW + * double VLAN tagging is enabled via the set port parameters + * AQ command. Otherwise these are both ignored. Set them to + * zero for their defaults of 0x8100 (802.1Q). Should be zero + * for firmware API versions lower than 1.7. + */ + __le16 first_tag; + __le16 second_tag; + u8 reserved[6]; }; I40E_CHECK_CMD_LENGTH(i40e_aqc_set_switch_config); diff --git a/drivers/net/ethernet/intel/i40e/i40e_common.c b/drivers/net/ethernet/intel/i40e/i40e_common.c index a4838779de5d..60542beda7ad 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_common.c +++ b/drivers/net/ethernet/intel/i40e/i40e_common.c @@ -2402,7 +2402,11 @@ enum i40e_status_code i40e_aq_set_switch_config(struct i40e_hw *hw, i40e_aqc_opc_set_switch_config); scfg->flags = cpu_to_le16(flags); scfg->valid_flags = cpu_to_le16(valid_flags); - + if (hw->flags & I40E_HW_FLAG_802_1AD_CAPABLE) { + scfg->switch_tag = cpu_to_le16(hw->switch_tag); + scfg->first_tag = cpu_to_le16(hw->first_tag); + scfg->second_tag = cpu_to_le16(hw->second_tag); + } status = i40e_asq_send_command(hw, &desc, NULL, 0, cmd_details); return status; diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c index 387f0863f794..3f9e89b054ec 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_main.c +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c @@ -11361,6 +11361,13 @@ static int i40e_probe(struct pci_dev *pdev, const struct pci_device_id *ent) hw->bus.bus_id = pdev->bus->number; pf->instance = pfs_found; + /* Select something other than the 802.1ad ethertype for the + * switch to use internally and drop on ingress. + */ + hw->switch_tag = 0xffff; + hw->first_tag = ETH_P_8021AD; + hw->second_tag = ETH_P_8021Q; + INIT_LIST_HEAD(&pf->l3_flex_pit_list); INIT_LIST_HEAD(&pf->l4_flex_pit_list); diff --git a/drivers/net/ethernet/intel/i40e/i40e_type.h b/drivers/net/ethernet/intel/i40e/i40e_type.h index 8b0b9f826b7f..4b32b1d38a66 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_type.h +++ b/drivers/net/ethernet/intel/i40e/i40e_type.h @@ -610,9 +610,15 @@ struct i40e_hw { struct i40e_dcbx_config desired_dcbx_config; /* CEE Desired Cfg */ #define I40E_HW_FLAG_AQ_SRCTL_ACCESS_ENABLE BIT_ULL(0) +#define I40E_HW_FLAG_802_1AD_CAPABLE BIT_ULL(1) #define I40E_HW_FLAG_AQ_PHY_ACCESS_CAPABLE BIT_ULL(2) u64 flags; + /* Used in set switch config AQ command */ + u16 switch_tag; + u16 first_tag; + u16 second_tag; + /* debug mask */ u32 debug_mask; char err_str[16]; diff --git a/drivers/net/ethernet/intel/i40evf/i40e_adminq_cmd.h b/drivers/net/ethernet/intel/i40evf/i40e_adminq_cmd.h index eee7ece42b39..ed5602f4bbcd 100644 --- a/drivers/net/ethernet/intel/i40evf/i40e_adminq_cmd.h +++ b/drivers/net/ethernet/intel/i40evf/i40e_adminq_cmd.h @@ -771,7 +771,22 @@ struct i40e_aqc_set_switch_config { #define I40E_AQ_SET_SWITCH_CFG_PROMISC 0x0001 #define I40E_AQ_SET_SWITCH_CFG_L2_FILTER 0x0002 __le16 valid_flags; - u8 reserved[12]; + /* The ethertype in switch_tag is dropped on ingress and used + * internally by the switch. Set this to zero for the default + * of 0x88a8 (802.1ad). Should be zero for firmware API + * versions lower than 1.7. + */ + __le16 switch_tag; + /* The ethertypes in first_tag and second_tag are used to + * match the outer and inner VLAN tags (respectively) when HW + * double VLAN tagging is enabled via the set port parameters + * AQ command. Otherwise these are both ignored. Set them to + * zero for their defaults of 0x8100 (802.1Q). Should be zero + * for firmware API versions lower than 1.7. + */ + __le16 first_tag; + __le16 second_tag; + u8 reserved[6]; }; I40E_CHECK_CMD_LENGTH(i40e_aqc_set_switch_config); diff --git a/drivers/net/ethernet/intel/i40evf/i40e_type.h b/drivers/net/ethernet/intel/i40evf/i40e_type.h index 48eacf5e73e4..9364b67fff9c 100644 --- a/drivers/net/ethernet/intel/i40evf/i40e_type.h +++ b/drivers/net/ethernet/intel/i40evf/i40e_type.h @@ -568,6 +568,7 @@ struct i40e_hw { /* LLDP/DCBX Status */ u16 dcbx_status; +#define I40E_HW_FLAG_802_1AD_CAPABLE BIT_ULL(1) #define I40E_HW_FLAG_AQ_PHY_ACCESS_CAPABLE BIT_ULL(2) /* DCBX info */ @@ -575,6 +576,11 @@ struct i40e_hw { struct i40e_dcbx_config remote_dcbx_config; /* Peer Cfg */ struct i40e_dcbx_config desired_dcbx_config; /* CEE Desired Cfg */ + /* Used in set switch config AQ command */ + u16 switch_tag; + u16 first_tag; + u16 second_tag; + /* debug mask */ u32 debug_mask; char err_str[16]; -- cgit v1.2.3 From 0b40f457488d966878eec413a91f27d9b21e6ce5 Mon Sep 17 00:00:00 2001 From: Jacob Keller Date: Mon, 10 Jul 2017 13:23:14 -0700 Subject: fm10k: prepare_for_reset() when we lose PCIe Link If we lose PCIe link, such as when an unannounced PFLR event occurs, or when a device is surprise removed, we currently detach the device and close the netdev. This unfortunately leaves a lot of things still active, such as the msix_mbx_pf IRQ, and Tx/Rx resources. This can cause problems because the register reads will return potentially invalid values which may result in unknown driver behavior. Begin the process of resetting using fm10k_prepare_for_reset(), much in the same way as the suspend and resume cycle does. This will attempt to shutdown as much as possible, in order to prevent possible issues. A naive implementation for this has issues, because there are now multiple flows calling the reset logic and setting a reset bit. This would cause problems, because the "re-attach" routine might call fm10k_handle_reset() prior to the reset actually finishing. Instead, we'll add state bits to indicate which flow actually initiated the reset. For the general reset flow, we'll assume that if someone else is resetting that we do not need to handle it at all, so it does not need its own state bit. For the suspend case, we will simply issue a warning indicating that we are attempting to recover from this case when resuming. For the detached subtask, we'll simply refuse to re-attach until we've actually initiated a reset as part of that flow. Finally, we'll stop attempting to manage the mailbox subtask when we're detached, since there's nothing we can do if we don't have a PCIe address. Overall this produces a much cleaner shutdown and recovery cycle for a PCIe surprise remove event. Signed-off-by: Jacob Keller Tested-by: Krishneil Singh Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/fm10k/fm10k.h | 2 + drivers/net/ethernet/intel/fm10k/fm10k_pci.c | 103 ++++++++++++++++++++------- 2 files changed, 79 insertions(+), 26 deletions(-) diff --git a/drivers/net/ethernet/intel/fm10k/fm10k.h b/drivers/net/ethernet/intel/fm10k/fm10k.h index 689c413b7782..ba70c58ca920 100644 --- a/drivers/net/ethernet/intel/fm10k/fm10k.h +++ b/drivers/net/ethernet/intel/fm10k/fm10k.h @@ -270,6 +270,8 @@ enum fm10k_flags_t { enum fm10k_state_t { __FM10K_RESETTING, + __FM10K_RESET_DETACHED, + __FM10K_RESET_SUSPENDED, __FM10K_DOWN, __FM10K_SERVICE_SCHED, __FM10K_SERVICE_REQUEST, diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_pci.c b/drivers/net/ethernet/intel/fm10k/fm10k_pci.c index 9575f7c1862d..4e5e3e64beda 100644 --- a/drivers/net/ethernet/intel/fm10k/fm10k_pci.c +++ b/drivers/net/ethernet/intel/fm10k/fm10k_pci.c @@ -153,7 +153,15 @@ static void fm10k_service_timer(unsigned long data) fm10k_service_event_schedule(interface); } -static void fm10k_prepare_for_reset(struct fm10k_intfc *interface) +/** + * fm10k_prepare_for_reset - Prepare the driver and device for a pending reset + * @interface: fm10k private data structure + * + * This function prepares for a device reset by shutting as much down as we + * can. It does nothing and returns false if __FM10K_RESETTING was already set + * prior to calling this function. It returns true if it actually did work. + */ +static bool fm10k_prepare_for_reset(struct fm10k_intfc *interface) { struct net_device *netdev = interface->netdev; @@ -162,8 +170,9 @@ static void fm10k_prepare_for_reset(struct fm10k_intfc *interface) /* put off any impending NetWatchDogTimeout */ netif_trans_update(netdev); - while (test_and_set_bit(__FM10K_RESETTING, interface->state)) - usleep_range(1000, 2000); + /* Nothing to do if a reset is already in progress */ + if (test_and_set_bit(__FM10K_RESETTING, interface->state)) + return false; rtnl_lock(); @@ -181,6 +190,8 @@ static void fm10k_prepare_for_reset(struct fm10k_intfc *interface) interface->last_reset = jiffies + (10 * HZ); rtnl_unlock(); + + return true; } static int fm10k_handle_reset(struct fm10k_intfc *interface) @@ -189,6 +200,8 @@ static int fm10k_handle_reset(struct fm10k_intfc *interface) struct fm10k_hw *hw = &interface->hw; int err; + WARN_ON(!test_bit(__FM10K_RESETTING, interface->state)); + rtnl_lock(); pci_set_master(interface->pdev); @@ -267,51 +280,75 @@ static void fm10k_detach_subtask(struct fm10k_intfc *interface) struct net_device *netdev = interface->netdev; u32 __iomem *hw_addr; u32 value; + int err; - /* do nothing if device is still present or hw_addr is set */ + /* do nothing if netdev is still present or hw_addr is set */ if (netif_device_present(netdev) || interface->hw.hw_addr) return; + /* We've lost the PCIe register space, and can no longer access the + * device. Shut everything except the detach subtask down and prepare + * to reset the device in case we recover. If we actually prepare for + * reset, indicate that we're detached. + */ + if (fm10k_prepare_for_reset(interface)) + set_bit(__FM10K_RESET_DETACHED, interface->state); + /* check the real address space to see if we've recovered */ hw_addr = READ_ONCE(interface->uc_addr); value = readl(hw_addr); if (~value) { + /* Make sure the reset was initiated because we detached, + * otherwise we might race with a different reset flow. + */ + if (!test_and_clear_bit(__FM10K_RESET_DETACHED, + interface->state)) + return; + + /* Restore the hardware address */ interface->hw.hw_addr = interface->uc_addr; + + /* PCIe link has been restored, and the device is active + * again. Restore everything and reset the device. + */ + err = fm10k_handle_reset(interface); + if (err) { + netdev_err(netdev, "Unable to reset device: %d\n", err); + interface->hw.hw_addr = NULL; + return; + } + + /* Re-attach the netdev */ netif_device_attach(netdev); - set_bit(FM10K_FLAG_RESET_REQUESTED, interface->flags); netdev_warn(netdev, "PCIe link restored, device now attached\n"); return; } - - rtnl_lock(); - - if (netif_running(netdev)) - dev_close(netdev); - - rtnl_unlock(); } -static void fm10k_reinit(struct fm10k_intfc *interface) +static void fm10k_reset_subtask(struct fm10k_intfc *interface) { int err; - fm10k_prepare_for_reset(interface); - - err = fm10k_handle_reset(interface); - if (err) - dev_err(&interface->pdev->dev, - "fm10k_handle_reset failed: %d\n", err); -} - -static void fm10k_reset_subtask(struct fm10k_intfc *interface) -{ if (!test_and_clear_bit(FM10K_FLAG_RESET_REQUESTED, interface->flags)) return; + /* If another thread has already prepared to reset the device, we + * should not attempt to handle a reset here, since we'd race with + * that thread. This may happen if we suspend the device or if the + * PCIe link is lost. In this case, we'll just ignore the RESET + * request, as it will (eventually) be taken care of when the thread + * which actually started the reset is finished. + */ + if (!fm10k_prepare_for_reset(interface)) + return; + netdev_err(interface->netdev, "Reset interface\n"); - fm10k_reinit(interface); + err = fm10k_handle_reset(interface); + if (err) + dev_err(&interface->pdev->dev, + "fm10k_handle_reset failed: %d\n", err); } /** @@ -381,6 +418,10 @@ static void fm10k_watchdog_update_host_state(struct fm10k_intfc *interface) **/ static void fm10k_mbx_subtask(struct fm10k_intfc *interface) { + /* If we're resetting, bail out */ + if (test_bit(__FM10K_RESETTING, interface->state)) + return; + /* process upstream mailbox and update device state */ fm10k_watchdog_update_host_state(interface); @@ -630,9 +671,11 @@ static void fm10k_service_task(struct work_struct *work) interface = container_of(work, struct fm10k_intfc, service_task); + /* Check whether we're detached first */ + fm10k_detach_subtask(interface); + /* tasks run even when interface is down */ fm10k_mbx_subtask(interface); - fm10k_detach_subtask(interface); fm10k_reset_subtask(interface); /* tasks only run when interface is up */ @@ -2177,7 +2220,8 @@ static void fm10k_prepare_suspend(struct fm10k_intfc *interface) */ fm10k_stop_service_event(interface); - fm10k_prepare_for_reset(interface); + if (fm10k_prepare_for_reset(interface)) + set_bit(__FM10K_RESET_SUSPENDED, interface->state); } static int fm10k_handle_resume(struct fm10k_intfc *interface) @@ -2185,6 +2229,13 @@ static int fm10k_handle_resume(struct fm10k_intfc *interface) struct fm10k_hw *hw = &interface->hw; int err; + /* Even if we didn't properly prepare for reset in + * fm10k_prepare_suspend, we'll attempt to resume anyways. + */ + if (!test_and_clear_bit(__FM10K_RESET_SUSPENDED, interface->state)) + dev_warn(&interface->pdev->dev, + "Device was shut down as part of suspend... Attempting to recover\n"); + /* reset statistics starting values */ hw->mac.ops.rebind_hw_stats(hw, &interface->stats); -- cgit v1.2.3 From b4fcd43661df0d84cc4e030ab7a26533114889b9 Mon Sep 17 00:00:00 2001 From: Jacob Keller Date: Mon, 10 Jul 2017 13:23:15 -0700 Subject: fm10k: use spinlock to implement mailbox lock Lets not re-invent the locking wheel. Remove our bitlock and use a proper spinlock instead. Signed-off-by: Jacob Keller Tested-by: Krishneil Singh Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/fm10k/fm10k.h | 15 +++++---------- drivers/net/ethernet/intel/fm10k/fm10k_pci.c | 3 +++ 2 files changed, 8 insertions(+), 10 deletions(-) diff --git a/drivers/net/ethernet/intel/fm10k/fm10k.h b/drivers/net/ethernet/intel/fm10k/fm10k.h index ba70c58ca920..74542e9d63c7 100644 --- a/drivers/net/ethernet/intel/fm10k/fm10k.h +++ b/drivers/net/ethernet/intel/fm10k/fm10k.h @@ -276,7 +276,6 @@ enum fm10k_state_t { __FM10K_SERVICE_SCHED, __FM10K_SERVICE_REQUEST, __FM10K_SERVICE_DISABLE, - __FM10K_MBX_LOCK, __FM10K_LINK_DOWN, __FM10K_UPDATING_STATS, /* This value must be last and determines the BITMAP size */ @@ -346,6 +345,8 @@ struct fm10k_intfc { struct fm10k_hw_stats stats; struct fm10k_hw hw; + /* Mailbox lock */ + spinlock_t mbx_lock; u32 __iomem *uc_addr; u32 __iomem *sw_addr; u16 msg_enable; @@ -386,23 +387,17 @@ struct fm10k_intfc { static inline void fm10k_mbx_lock(struct fm10k_intfc *interface) { - /* busy loop if we cannot obtain the lock as some calls - * such as ndo_set_rx_mode may be made in atomic context - */ - while (test_and_set_bit(__FM10K_MBX_LOCK, interface->state)) - udelay(20); + spin_lock(&interface->mbx_lock); } static inline void fm10k_mbx_unlock(struct fm10k_intfc *interface) { - /* flush memory to make sure state is correct */ - smp_mb__before_atomic(); - clear_bit(__FM10K_MBX_LOCK, interface->state); + spin_unlock(&interface->mbx_lock); } static inline int fm10k_mbx_trylock(struct fm10k_intfc *interface) { - return !test_and_set_bit(__FM10K_MBX_LOCK, interface->state); + return spin_trylock(&interface->mbx_lock); } /* fm10k_test_staterr - test bits in Rx descriptor status and error fields */ diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_pci.c b/drivers/net/ethernet/intel/fm10k/fm10k_pci.c index 4e5e3e64beda..240772ad5d69 100644 --- a/drivers/net/ethernet/intel/fm10k/fm10k_pci.c +++ b/drivers/net/ethernet/intel/fm10k/fm10k_pci.c @@ -1921,6 +1921,9 @@ static int fm10k_sw_init(struct fm10k_intfc *interface, netdev_rss_key_fill(rss_key, sizeof(rss_key)); memcpy(interface->rssrk, rss_key, sizeof(rss_key)); + /* Initialize the mailbox lock */ + spin_lock_init(&interface->mbx_lock); + /* Start off interface as being down */ set_bit(__FM10K_DOWN, interface->state); set_bit(__FM10K_UPDATING_STATS, interface->state); -- cgit v1.2.3 From 8249c47c6ba48cd3eba7c3ca7f8e733ee815c39b Mon Sep 17 00:00:00 2001 From: Jacob Keller Date: Mon, 10 Jul 2017 13:23:16 -0700 Subject: fm10k: use generic PM hooks instead of legacy PCIe power hooks Replace the PCI specific legacy power management hooks with the new generic power management hooks which work properly for both suspend and hibernate. The new generic system is better and properly handles the lower level PCIe power management rather than forcing the driver to handle it. Signed-off-by: Jacob Keller Tested-by: Krishneil Singh Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/fm10k/fm10k_pci.c | 67 +++++++++------------------- 1 file changed, 22 insertions(+), 45 deletions(-) diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_pci.c b/drivers/net/ethernet/intel/fm10k/fm10k_pci.c index 240772ad5d69..aef39909e4a2 100644 --- a/drivers/net/ethernet/intel/fm10k/fm10k_pci.c +++ b/drivers/net/ethernet/intel/fm10k/fm10k_pci.c @@ -2264,36 +2264,19 @@ static int fm10k_handle_resume(struct fm10k_intfc *interface) #ifdef CONFIG_PM /** - * fm10k_resume - Restore device to pre-sleep state - * @pdev: PCI device information struct + * fm10k_resume - Generic PM resume hook + * @dev: generic device structure * - * fm10k_resume is called after the system has powered back up from a sleep - * state and is ready to resume operation. This function is meant to restore - * the device back to its pre-sleep state. + * Generic PM hook used when waking the device from a low power state after + * suspend or hibernation. This function does not need to handle lower PCIe + * device state as the stack takes care of that for us. **/ -static int fm10k_resume(struct pci_dev *pdev) +static int fm10k_resume(struct device *dev) { - struct fm10k_intfc *interface = pci_get_drvdata(pdev); + struct fm10k_intfc *interface = pci_get_drvdata(to_pci_dev(dev)); struct net_device *netdev = interface->netdev; struct fm10k_hw *hw = &interface->hw; - u32 err; - - pci_set_power_state(pdev, PCI_D0); - pci_restore_state(pdev); - - /* pci_restore_state clears dev->state_saved so call - * pci_save_state to restore it. - */ - pci_save_state(pdev); - - err = pci_enable_device_mem(pdev); - if (err) { - dev_err(&pdev->dev, "Cannot enable PCI device from suspend\n"); - return err; - } - pci_set_master(pdev); - - pci_wake_from_d3(pdev, false); + int err; /* refresh hw_addr in case it was dropped */ hw->hw_addr = interface->uc_addr; @@ -2308,36 +2291,27 @@ static int fm10k_resume(struct pci_dev *pdev) } /** - * fm10k_suspend - Prepare the device for a system sleep state - * @pdev: PCI device information struct + * fm10k_suspend - Generic PM suspend hook + * @dev: generic device structure * - * fm10k_suspend is meant to shutdown the device prior to the system entering - * a sleep state. The fm10k hardware does not support wake on lan so the - * driver simply needs to shut down the device so it is in a low power state. + * Generic PM hook used when setting the device into a low power state for + * system suspend or hibernation. This function does not need to handle lower + * PCIe device state as the stack takes care of that for us. **/ -static int fm10k_suspend(struct pci_dev *pdev, - pm_message_t __always_unused state) +static int fm10k_suspend(struct device *dev) { - struct fm10k_intfc *interface = pci_get_drvdata(pdev); + struct fm10k_intfc *interface = pci_get_drvdata(to_pci_dev(dev)); struct net_device *netdev = interface->netdev; - int err = 0; netif_device_detach(netdev); fm10k_prepare_suspend(interface); - err = pci_save_state(pdev); - if (err) - return err; - - pci_disable_device(pdev); - pci_wake_from_d3(pdev, false); - pci_set_power_state(pdev, PCI_D3hot); - return 0; } #endif /* CONFIG_PM */ + /** * fm10k_io_error_detected - called when PCI error is detected * @pdev: Pointer to PCI device @@ -2447,15 +2421,18 @@ static const struct pci_error_handlers fm10k_err_handler = { .reset_done = fm10k_io_reset_done, }; +static SIMPLE_DEV_PM_OPS(fm10k_pm_ops, fm10k_suspend, fm10k_resume); + static struct pci_driver fm10k_driver = { .name = fm10k_driver_name, .id_table = fm10k_pci_tbl, .probe = fm10k_probe, .remove = fm10k_remove, #ifdef CONFIG_PM - .suspend = fm10k_suspend, - .resume = fm10k_resume, -#endif + .driver = { + .pm = &fm10k_pm_ops, + }, +#endif /* CONFIG_PM */ .sriov_configure = fm10k_iov_configure, .err_handler = &fm10k_err_handler }; -- cgit v1.2.3 From fc9173682dcf73cfe3324267424ef17e854bb444 Mon Sep 17 00:00:00 2001 From: Jacob Keller Date: Mon, 10 Jul 2017 13:23:17 -0700 Subject: fm10k: introduce a message queue for MAC/VLAN messages Under some circumstances, when dealing with a large number of MAC address or VLAN updates at once, the fm10k driver, particularly the VFs can overload the mailbox with too many messages at once. This results in a mailbox timeout, which causes the driver to initiate a reset. During the reset, we re-send all the same messages that originally caused the timeout. This results in a cycle of resets each triggering a future reset. To fix or avoid this, we introduce a workqueue item which monitors a queue of MAC and VLAN requests. These requests are queued to the end of the list, and we process as a FIFO periodically. Initially we only handle requests for the netdev, but we do handle unicast MAC addresses, multicast MAC addresses, and update VLAN requests. A future patch will add support to use this queue for handling MAC update requests from the VF<->PF mailbox. The MAC/VLAN work item will keep checking to make sure that each request does not overflow the mailbox and cause a timeout. If it might, then the work item will reschedule itself a short time later. This avoids any reset cycle, since we never send the message if the mailbox is not ready. As an alternative, we tried increasing the mailbox message FIFO, but this just delays the problem and results in needless memory waste on the system. Our new message queue is dynamically allocated so only uses as much memory as it needs. Additionally, it need not be contiguous like the Tx and Rx FIFOs. Note that this patch chose to only create a queue for MAC and VLAN messages, since these are the only messages sent in a large enough volume to cause the reset loop. Other messages are very unlikely to overflow the mailbox Tx FIFO so easily. Signed-off-by: Jacob Keller Tested-by: Krishneil Singh Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/fm10k/fm10k.h | 39 +++++ drivers/net/ethernet/intel/fm10k/fm10k_netdev.c | 199 ++++++++++++++++++----- drivers/net/ethernet/intel/fm10k/fm10k_pci.c | 201 ++++++++++++++++++++++++ 3 files changed, 397 insertions(+), 42 deletions(-) diff --git a/drivers/net/ethernet/intel/fm10k/fm10k.h b/drivers/net/ethernet/intel/fm10k/fm10k.h index 74542e9d63c7..40856bc0f3b9 100644 --- a/drivers/net/ethernet/intel/fm10k/fm10k.h +++ b/drivers/net/ethernet/intel/fm10k/fm10k.h @@ -248,6 +248,29 @@ struct fm10k_udp_port { __be16 port; }; +enum fm10k_macvlan_request_type { + FM10K_UC_MAC_REQUEST, + FM10K_MC_MAC_REQUEST, + FM10K_VLAN_REQUEST +}; + +struct fm10k_macvlan_request { + enum fm10k_macvlan_request_type type; + struct list_head list; + union { + struct fm10k_mac_request { + u8 addr[ETH_ALEN]; + u16 glort; + u16 vid; + } mac; + struct fm10k_vlan_request { + u32 vid; + u8 vsi; + } vlan; + }; + bool set; +}; + /* one work queue for entire driver */ extern struct workqueue_struct *fm10k_workqueue; @@ -276,6 +299,9 @@ enum fm10k_state_t { __FM10K_SERVICE_SCHED, __FM10K_SERVICE_REQUEST, __FM10K_SERVICE_DISABLE, + __FM10K_MACVLAN_SCHED, + __FM10K_MACVLAN_REQUEST, + __FM10K_MACVLAN_DISABLE, __FM10K_LINK_DOWN, __FM10K_UPDATING_STATS, /* This value must be last and determines the BITMAP size */ @@ -368,6 +394,12 @@ struct fm10k_intfc { struct list_head vxlan_port; struct list_head geneve_port; + /* MAC/VLAN update queue */ + struct list_head macvlan_requests; + struct delayed_work macvlan_task; + /* MAC/VLAN update queue lock */ + spinlock_t macvlan_lock; + #ifdef CONFIG_DEBUG_FS struct dentry *dbg_intfc; #endif /* CONFIG_DEBUG_FS */ @@ -487,6 +519,7 @@ void fm10k_up(struct fm10k_intfc *interface); void fm10k_down(struct fm10k_intfc *interface); void fm10k_update_stats(struct fm10k_intfc *interface); void fm10k_service_event_schedule(struct fm10k_intfc *interface); +void fm10k_macvlan_schedule(struct fm10k_intfc *interface); void fm10k_update_rx_drop_en(struct fm10k_intfc *interface); #ifdef CONFIG_NET_POLL_CONTROLLER void fm10k_netpoll(struct net_device *netdev); @@ -507,6 +540,12 @@ void fm10k_reset_rx_state(struct fm10k_intfc *); int fm10k_setup_tc(struct net_device *dev, u8 tc); int fm10k_open(struct net_device *netdev); int fm10k_close(struct net_device *netdev); +int fm10k_queue_vlan_request(struct fm10k_intfc *interface, u32 vid, + u8 vsi, bool set); +int fm10k_queue_mac_request(struct fm10k_intfc *interface, u16 glort, + const unsigned char *addr, u16 vid, bool set); +void fm10k_clear_macvlan_queue(struct fm10k_intfc *interface, + u16 glort, bool vlans); /* Ethtool */ void fm10k_set_ethtool_ops(struct net_device *dev); diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_netdev.c b/drivers/net/ethernet/intel/fm10k/fm10k_netdev.c index 77d495fedced..81e4425f0529 100644 --- a/drivers/net/ethernet/intel/fm10k/fm10k_netdev.c +++ b/drivers/net/ethernet/intel/fm10k/fm10k_netdev.c @@ -758,11 +758,132 @@ static bool fm10k_host_mbx_ready(struct fm10k_intfc *interface) return (hw->mac.type == fm10k_mac_vf || interface->host_ready); } +/** + * fm10k_queue_vlan_request - Queue a VLAN update request + * @interface: the fm10k interface structure + * @vid: the VLAN vid + * @vsi: VSI index number + * @set: whether to set or clear + * + * This function queues up a VLAN update. For VFs, this must be sent to the + * managing PF over the mailbox. For PFs, we'll use the same handling so that + * it's similar to the VF. This avoids storming the PF<->VF mailbox with too + * many VLAN updates during reset. + */ +int fm10k_queue_vlan_request(struct fm10k_intfc *interface, + u32 vid, u8 vsi, bool set) +{ + struct fm10k_macvlan_request *request; + unsigned long flags; + + /* This must be atomic since we may be called while the netdev + * addr_list_lock is held + */ + request = kzalloc(sizeof(*request), GFP_ATOMIC); + if (!request) + return -ENOMEM; + + request->type = FM10K_VLAN_REQUEST; + request->vlan.vid = vid; + request->vlan.vsi = vsi; + request->set = set; + + spin_lock_irqsave(&interface->macvlan_lock, flags); + list_add_tail(&request->list, &interface->macvlan_requests); + spin_unlock_irqrestore(&interface->macvlan_lock, flags); + + fm10k_macvlan_schedule(interface); + + return 0; +} + +/** + * fm10k_queue_mac_request - Queue a MAC update request + * @interface: the fm10k interface structure + * @glort: the target glort for this update + * @addr: the address to update + * @vid: the vid to update + * @sync: whether to add or remove + * + * This function queues up a MAC request for sending to the switch manager. + * A separate thread monitors the queue and sends updates to the switch + * manager. Return 0 on success, and negative error code on failure. + **/ +int fm10k_queue_mac_request(struct fm10k_intfc *interface, u16 glort, + const unsigned char *addr, u16 vid, bool set) +{ + struct fm10k_macvlan_request *request; + unsigned long flags; + + /* This must be atomic since we may be called while the netdev + * addr_list_lock is held + */ + request = kzalloc(sizeof(*request), GFP_ATOMIC); + if (!request) + return -ENOMEM; + + if (is_multicast_ether_addr(addr)) + request->type = FM10K_MC_MAC_REQUEST; + else + request->type = FM10K_UC_MAC_REQUEST; + + ether_addr_copy(request->mac.addr, addr); + request->mac.glort = glort; + request->mac.vid = vid; + request->set = set; + + spin_lock_irqsave(&interface->macvlan_lock, flags); + list_add_tail(&request->list, &interface->macvlan_requests); + spin_unlock_irqrestore(&interface->macvlan_lock, flags); + + fm10k_macvlan_schedule(interface); + + return 0; +} + +/** + * fm10k_clear_macvlan_queue - Cancel pending updates for a given glort + * @interface: the fm10k interface structure + * @glort: the target glort to clear + * @vlans: true to clear VLAN messages, false to ignore them + * + * Cancel any outstanding MAC/VLAN requests for a given glort. This is + * expected to be called when a logical port goes down. + **/ +void fm10k_clear_macvlan_queue(struct fm10k_intfc *interface, + u16 glort, bool vlans) + +{ + struct fm10k_macvlan_request *r, *tmp; + unsigned long flags; + + spin_lock_irqsave(&interface->macvlan_lock, flags); + + /* Free any outstanding MAC/VLAN requests for this interface */ + list_for_each_entry_safe(r, tmp, &interface->macvlan_requests, list) { + switch (r->type) { + case FM10K_MC_MAC_REQUEST: + case FM10K_UC_MAC_REQUEST: + /* Don't free requests for other interfaces */ + if (r->mac.glort != glort) + break; + /* fall through */ + case FM10K_VLAN_REQUEST: + if (vlans) { + list_del(&r->list); + kfree(r); + } + break; + } + } + + spin_unlock_irqrestore(&interface->macvlan_lock, flags); +} + static int fm10k_uc_vlan_unsync(struct net_device *netdev, const unsigned char *uc_addr) { struct fm10k_intfc *interface = netdev_priv(netdev); - struct fm10k_hw *hw = &interface->hw; u16 glort = interface->glort; u16 vid = interface->vid; bool set = !!(vid / VLAN_N_VID); @@ -771,10 +892,7 @@ static int fm10k_uc_vlan_unsync(struct net_device *netdev, /* drop any leading bits on the VLAN ID */ vid &= VLAN_N_VID - 1; - if (fm10k_host_mbx_ready(interface)) - err = hw->mac.ops.update_uc_addr(hw, glort, uc_addr, - vid, set, 0); - + err = fm10k_queue_mac_request(interface, glort, uc_addr, vid, set); if (err) return err; @@ -786,7 +904,6 @@ static int fm10k_mc_vlan_unsync(struct net_device *netdev, const unsigned char *mc_addr) { struct fm10k_intfc *interface = netdev_priv(netdev); - struct fm10k_hw *hw = &interface->hw; u16 glort = interface->glort; u16 vid = interface->vid; bool set = !!(vid / VLAN_N_VID); @@ -795,9 +912,7 @@ static int fm10k_mc_vlan_unsync(struct net_device *netdev, /* drop any leading bits on the VLAN ID */ vid &= VLAN_N_VID - 1; - if (fm10k_host_mbx_ready(interface)) - err = hw->mac.ops.update_mc_addr(hw, glort, mc_addr, vid, set); - + err = fm10k_queue_mac_request(interface, glort, mc_addr, vid, set); if (err) return err; @@ -855,18 +970,14 @@ static int fm10k_update_vid(struct net_device *netdev, u16 vid, bool set) /* only need to update the VLAN if not in promiscuous mode */ if (!(netdev->flags & IFF_PROMISC)) { - err = hw->mac.ops.update_vlan(hw, vid, 0, set); + err = fm10k_queue_vlan_request(interface, vid, 0, set); if (err) goto err_out; } - /* update our base MAC address if host's mailbox is ready */ - if (fm10k_host_mbx_ready(interface)) - err = hw->mac.ops.update_uc_addr(hw, interface->glort, - hw->mac.addr, vid, set, 0); - else - err = -EHOSTDOWN; - + /* Update our base MAC address */ + err = fm10k_queue_mac_request(interface, interface->glort, + hw->mac.addr, vid, set); if (err) goto err_out; @@ -910,7 +1021,6 @@ static u16 fm10k_find_next_vlan(struct fm10k_intfc *interface, u16 vid) static void fm10k_clear_unused_vlans(struct fm10k_intfc *interface) { - struct fm10k_hw *hw = &interface->hw; u32 vid, prev_vid; /* loop through and find any gaps in the table */ @@ -922,7 +1032,7 @@ static void fm10k_clear_unused_vlans(struct fm10k_intfc *interface) /* send request to clear multiple bits at a time */ prev_vid += (vid - prev_vid - 1) << FM10K_VLAN_LENGTH_SHIFT; - hw->mac.ops.update_vlan(hw, prev_vid, 0, false); + fm10k_queue_vlan_request(interface, prev_vid, 0, false); } } @@ -937,15 +1047,11 @@ static int __fm10k_uc_sync(struct net_device *dev, if (!is_valid_ether_addr(addr)) return -EADDRNOTAVAIL; - /* update table with current entries if host's mailbox is ready */ - if (!fm10k_host_mbx_ready(interface)) - return -EHOSTDOWN; - for (vid = hw->mac.default_vid ? fm10k_find_next_vlan(interface, 0) : 1; vid < VLAN_N_VID; vid = fm10k_find_next_vlan(interface, vid)) { - err = hw->mac.ops.update_uc_addr(hw, glort, addr, - vid, sync, 0); + err = fm10k_queue_mac_request(interface, glort, + addr, vid, sync); if (err) return err; } @@ -1002,15 +1108,18 @@ static int __fm10k_mc_sync(struct net_device *dev, struct fm10k_intfc *interface = netdev_priv(dev); struct fm10k_hw *hw = &interface->hw; u16 vid, glort = interface->glort; + s32 err; - /* update table with current entries if host's mailbox is ready */ - if (!fm10k_host_mbx_ready(interface)) - return 0; + if (!is_multicast_ether_addr(addr)) + return -EADDRNOTAVAIL; for (vid = hw->mac.default_vid ? fm10k_find_next_vlan(interface, 0) : 1; vid < VLAN_N_VID; vid = fm10k_find_next_vlan(interface, vid)) { - hw->mac.ops.update_mc_addr(hw, glort, addr, vid, sync); + err = fm10k_queue_mac_request(interface, glort, + addr, vid, sync); + if (err) + return err; } return 0; @@ -1050,7 +1159,8 @@ static void fm10k_set_rx_mode(struct net_device *dev) if (interface->xcast_mode != xcast_mode) { /* update VLAN table */ if (xcast_mode == FM10K_XCAST_MODE_PROMISC) - hw->mac.ops.update_vlan(hw, FM10K_VLAN_ALL, 0, true); + fm10k_queue_vlan_request(interface, FM10K_VLAN_ALL, + 0, true); if (interface->xcast_mode == FM10K_XCAST_MODE_PROMISC) fm10k_clear_unused_vlans(interface); @@ -1098,22 +1208,20 @@ void fm10k_restore_rx_state(struct fm10k_intfc *interface) interface->glort_count, true); /* update VLAN table */ - hw->mac.ops.update_vlan(hw, FM10K_VLAN_ALL, 0, - xcast_mode == FM10K_XCAST_MODE_PROMISC); + fm10k_queue_vlan_request(interface, FM10K_VLAN_ALL, 0, + xcast_mode == FM10K_XCAST_MODE_PROMISC); /* Add filter for VLAN 0 */ - hw->mac.ops.update_vlan(hw, 0, 0, true); + fm10k_queue_vlan_request(interface, 0, 0, true); /* update table with current entries */ for (vid = hw->mac.default_vid ? fm10k_find_next_vlan(interface, 0) : 1; vid < VLAN_N_VID; vid = fm10k_find_next_vlan(interface, vid)) { - hw->mac.ops.update_vlan(hw, vid, 0, true); + fm10k_queue_vlan_request(interface, vid, 0, true); - /* Update unicast entries if host's mailbox is ready */ - if (fm10k_host_mbx_ready(interface)) - hw->mac.ops.update_uc_addr(hw, glort, hw->mac.addr, - vid, true, 0); + fm10k_queue_mac_request(interface, glort, + hw->mac.addr, vid, true); } /* update xcast mode before synchronizing addresses if host's mailbox @@ -1140,6 +1248,13 @@ void fm10k_reset_rx_state(struct fm10k_intfc *interface) struct net_device *netdev = interface->netdev; struct fm10k_hw *hw = &interface->hw; + /* Wait for MAC/VLAN work to finish */ + while (test_bit(__FM10K_MACVLAN_SCHED, interface->state)) + usleep_range(1000, 2000); + + /* Cancel pending MAC/VLAN requests */ + fm10k_clear_macvlan_queue(interface, interface->glort, true); + fm10k_mbx_lock(interface); /* clear the logical port state on lower device if host's mailbox is @@ -1374,8 +1489,8 @@ static void *fm10k_dfwd_add_station(struct net_device *dev, if (fm10k_host_mbx_ready(interface)) { hw->mac.ops.update_xcast_mode(hw, glort, FM10K_XCAST_MODE_MULTI); - hw->mac.ops.update_uc_addr(hw, glort, sdev->dev_addr, - 0, true, 0); + fm10k_queue_mac_request(interface, glort, sdev->dev_addr, + 0, true); } fm10k_mbx_unlock(interface); @@ -1414,8 +1529,8 @@ static void fm10k_dfwd_del_station(struct net_device *dev, void *priv) if (fm10k_host_mbx_ready(interface)) { hw->mac.ops.update_xcast_mode(hw, glort, FM10K_XCAST_MODE_NONE); - hw->mac.ops.update_uc_addr(hw, glort, sdev->dev_addr, - 0, false, 0); + fm10k_queue_mac_request(interface, glort, sdev->dev_addr, + 0, false); } fm10k_mbx_unlock(interface); diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_pci.c b/drivers/net/ethernet/intel/fm10k/fm10k_pci.c index aef39909e4a2..58538ce997e1 100644 --- a/drivers/net/ethernet/intel/fm10k/fm10k_pci.c +++ b/drivers/net/ethernet/intel/fm10k/fm10k_pci.c @@ -91,6 +91,76 @@ static int fm10k_hw_ready(struct fm10k_intfc *interface) return FM10K_REMOVED(hw->hw_addr) ? -ENODEV : 0; } +/** + * fm10k_macvlan_schedule - Schedule MAC/VLAN queue task + * @interface: fm10k private interface structure + * + * Schedule the MAC/VLAN queue monitor task. If the MAC/VLAN task cannot be + * started immediately, request that it be restarted when possible. + */ +void fm10k_macvlan_schedule(struct fm10k_intfc *interface) +{ + /* Avoid processing the MAC/VLAN queue when the service task is + * disabled, or when we're resetting the device. + */ + if (!test_bit(__FM10K_MACVLAN_DISABLE, interface->state) && + !test_and_set_bit(__FM10K_MACVLAN_SCHED, interface->state)) { + clear_bit(__FM10K_MACVLAN_REQUEST, interface->state); + /* We delay the actual start of execution in order to allow + * multiple MAC/VLAN updates to accumulate before handling + * them, and to allow some time to let the mailbox drain + * between runs. + */ + queue_delayed_work(fm10k_workqueue, + &interface->macvlan_task, 10); + } else { + set_bit(__FM10K_MACVLAN_REQUEST, interface->state); + } +} + +/** + * fm10k_stop_macvlan_task - Stop the MAC/VLAN queue monitor + * @interface: fm10k private interface structure + * + * Wait until the MAC/VLAN queue task has stopped, and cancel any future + * requests. + */ +static void fm10k_stop_macvlan_task(struct fm10k_intfc *interface) +{ + /* Disable the MAC/VLAN work item */ + set_bit(__FM10K_MACVLAN_DISABLE, interface->state); + + /* Make sure we waited until any current invocations have stopped */ + cancel_delayed_work_sync(&interface->macvlan_task); + + /* We set the __FM10K_MACVLAN_SCHED bit when we schedule the task. + * However, it may not be unset of the MAC/VLAN task never actually + * got a chance to run. Since we've canceled the task here, and it + * cannot be rescheuled right now, we need to ensure the scheduled bit + * gets unset. + */ + clear_bit(__FM10K_MACVLAN_SCHED, interface->state); +} + +/** + * fm10k_resume_macvlan_task - Restart the MAC/VLAN queue monitor + * @interface: fm10k private interface structure + * + * Clear the __FM10K_MACVLAN_DISABLE bit and, if a request occurred, schedule + * the MAC/VLAN work monitor. + */ +static void fm10k_resume_macvlan_task(struct fm10k_intfc *interface) +{ + /* Re-enable the MAC/VLAN work item */ + clear_bit(__FM10K_MACVLAN_DISABLE, interface->state); + + /* We might have received a MAC/VLAN request while disabled. If so, + * kick off the queue now. + */ + if (test_bit(__FM10K_MACVLAN_REQUEST, interface->state)) + fm10k_macvlan_schedule(interface); +} + void fm10k_service_event_schedule(struct fm10k_intfc *interface) { if (!test_bit(__FM10K_SERVICE_DISABLE, interface->state) && @@ -174,6 +244,12 @@ static bool fm10k_prepare_for_reset(struct fm10k_intfc *interface) if (test_and_set_bit(__FM10K_RESETTING, interface->state)) return false; + /* As the MAC/VLAN task will be accessing registers it must not be + * running while we reset. Although the task will not be scheduled + * once we start resetting it may already be running + */ + fm10k_stop_macvlan_task(interface); + rtnl_lock(); fm10k_iov_suspend(interface->pdev); @@ -258,6 +334,8 @@ static int fm10k_handle_reset(struct fm10k_intfc *interface) rtnl_unlock(); + fm10k_resume_macvlan_task(interface); + clear_bit(__FM10K_RESETTING, interface->state); return err; @@ -686,6 +764,112 @@ static void fm10k_service_task(struct work_struct *work) fm10k_service_event_complete(interface); } +/** + * fm10k_macvlan_task - send queued MAC/VLAN requests to switch manager + * @work: pointer to work_struct containing our data + * + * This work item handles sending MAC/VLAN updates to the switch manager. When + * the interface is up, it will attempt to queue mailbox messages to the + * switch manager requesting updates for MAC/VLAN pairs. If the Tx fifo of the + * mailbox is full, it will reschedule itself to try again in a short while. + * This ensures that the driver does not overload the switch mailbox with too + * many simultaneous requests, causing an unnecessary reset. + **/ +static void fm10k_macvlan_task(struct work_struct *work) +{ + struct fm10k_macvlan_request *item; + struct fm10k_intfc *interface; + struct delayed_work *dwork; + struct list_head *requests; + struct fm10k_hw *hw; + unsigned long flags; + + dwork = to_delayed_work(work); + interface = container_of(dwork, struct fm10k_intfc, macvlan_task); + hw = &interface->hw; + requests = &interface->macvlan_requests; + + do { + /* Pop the first item off the list */ + spin_lock_irqsave(&interface->macvlan_lock, flags); + item = list_first_entry_or_null(requests, + struct fm10k_macvlan_request, + list); + if (item) + list_del_init(&item->list); + + spin_unlock_irqrestore(&interface->macvlan_lock, flags); + + /* We have no more items to process */ + if (!item) + goto done; + + fm10k_mbx_lock(interface); + + /* Check that we have plenty of space to send the message. We + * want to ensure that the mailbox stays low enough to avoid a + * change in the host state, otherwise we may see spurious + * link up / link down notifications. + */ + if (!hw->mbx.ops.tx_ready(&hw->mbx, FM10K_VFMBX_MSG_MTU + 5)) { + hw->mbx.ops.process(hw, &hw->mbx); + set_bit(__FM10K_MACVLAN_REQUEST, interface->state); + fm10k_mbx_unlock(interface); + + /* Put the request back on the list */ + spin_lock_irqsave(&interface->macvlan_lock, flags); + list_add(&item->list, requests); + spin_unlock_irqrestore(&interface->macvlan_lock, flags); + break; + } + + switch (item->type) { + case FM10K_MC_MAC_REQUEST: + hw->mac.ops.update_mc_addr(hw, + item->mac.glort, + item->mac.addr, + item->mac.vid, + item->set); + break; + case FM10K_UC_MAC_REQUEST: + hw->mac.ops.update_uc_addr(hw, + item->mac.glort, + item->mac.addr, + item->mac.vid, + item->set, + 0); + break; + case FM10K_VLAN_REQUEST: + hw->mac.ops.update_vlan(hw, + item->vlan.vid, + item->vlan.vsi, + item->set); + break; + default: + break; + } + + fm10k_mbx_unlock(interface); + + /* Free the item now that we've sent the update */ + kfree(item); + } while (true); + +done: + WARN_ON(!test_bit(__FM10K_MACVLAN_SCHED, interface->state)); + + /* flush memory to make sure state is correct */ + smp_mb__before_atomic(); + clear_bit(__FM10K_MACVLAN_SCHED, interface->state); + + /* If a MAC/VLAN request was scheduled since we started, we should + * re-schedule. However, there is no reason to re-schedule if there is + * no work to do. + */ + if (test_bit(__FM10K_MACVLAN_REQUEST, interface->state)) + fm10k_macvlan_schedule(interface); +} + /** * fm10k_configure_tx_ring - Configure Tx ring after Reset * @interface: board private structure @@ -1918,11 +2102,15 @@ static int fm10k_sw_init(struct fm10k_intfc *interface, INIT_LIST_HEAD(&interface->vxlan_port); INIT_LIST_HEAD(&interface->geneve_port); + /* Initialize the MAC/VLAN queue */ + INIT_LIST_HEAD(&interface->macvlan_requests); + netdev_rss_key_fill(rss_key, sizeof(rss_key)); memcpy(interface->rssrk, rss_key, sizeof(rss_key)); /* Initialize the mailbox lock */ spin_lock_init(&interface->mbx_lock); + spin_lock_init(&interface->macvlan_lock); /* Start off interface as being down */ set_bit(__FM10K_DOWN, interface->state); @@ -2131,6 +2319,9 @@ static int fm10k_probe(struct pci_dev *pdev, const struct pci_device_id *ent) (unsigned long)interface); INIT_WORK(&interface->service_task, fm10k_service_task); + /* Setup the MAC/VLAN queue */ + INIT_DELAYED_WORK(&interface->macvlan_task, fm10k_macvlan_task); + /* kick off service timer now, even when interface is down */ mod_timer(&interface->service_timer, (HZ * 2) + jiffies); @@ -2184,6 +2375,10 @@ static void fm10k_remove(struct pci_dev *pdev) del_timer_sync(&interface->service_timer); fm10k_stop_service_event(interface); + fm10k_stop_macvlan_task(interface); + + /* Remove all pending MAC/VLAN requests */ + fm10k_clear_macvlan_queue(interface, interface->glort, true); /* free netdev, this may bounce the interrupts due to setup_tc */ if (netdev->reg_state == NETREG_REGISTERED) @@ -2220,6 +2415,9 @@ static void fm10k_prepare_suspend(struct fm10k_intfc *interface) * a surprise remove if the PCIe device is disabled while we're * stopped. We stop the watchdog task until after we resume software * activity. + * + * Note that the MAC/VLAN task will be stopped as part of preparing + * for reset so we don't need to handle it here. */ fm10k_stop_service_event(interface); @@ -2259,6 +2457,9 @@ static int fm10k_handle_resume(struct fm10k_intfc *interface) /* restart the service task */ fm10k_start_service_event(interface); + /* Restart the MAC/VLAN request queue in-case of outstanding events */ + fm10k_macvlan_schedule(interface); + return err; } -- cgit v1.2.3 From 1f5c27e52857c9ba8f1ee4ed5093bee1a341f330 Mon Sep 17 00:00:00 2001 From: Jacob Keller Date: Mon, 10 Jul 2017 13:23:18 -0700 Subject: fm10k: use the MAC/VLAN queue for VF<->PF MAC/VLAN requests Now that we have a working MAC/VLAN queue for handling MAC/VLAN messages from the netdev, replace the default handler for the VF<->PF messages. This new handler is very similar to the default code, but uses the MAC/VLAN queue instead of sending the message directly. Unfortunately we can't easily re-use the default code, so we'll just replace the entire function. This ensures that a VF requesting a large number of VLANs or MAC addresses does not start a reset cycle, as explained in the commit which introduced the message queue. Signed-off-by: Jacob Keller Reviewed-by: Ngai-mint Kwan Tested-by: Krishneil Singh Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/fm10k/fm10k_iov.c | 132 ++++++++++++++++++++++++++- drivers/net/ethernet/intel/fm10k/fm10k_pf.c | 2 +- drivers/net/ethernet/intel/fm10k/fm10k_pf.h | 3 +- 3 files changed, 133 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_iov.c b/drivers/net/ethernet/intel/fm10k/fm10k_iov.c index 03897720bf0b..4a17cc903eed 100644 --- a/drivers/net/ethernet/intel/fm10k/fm10k_iov.c +++ b/drivers/net/ethernet/intel/fm10k/fm10k_iov.c @@ -35,10 +35,133 @@ static s32 fm10k_iov_msg_error(struct fm10k_hw *hw, u32 **results, return fm10k_tlv_msg_error(hw, results, mbx); } +/** + * fm10k_iov_msg_queue_mac_vlan - Message handler for MAC/VLAN request from VF + * @hw: Pointer to hardware structure + * @results: Pointer array to message, results[0] is pointer to message + * @mbx: Pointer to mailbox information structure + * + * This function is a custom handler for MAC/VLAN requests from the VF. The + * assumption is that it is acceptable to directly hand off the message from + * the VF to the PF's switch manager. However, we use a MAC/VLAN message + * queue to avoid overloading the mailbox when a large number of requests + * come in. + **/ +static s32 fm10k_iov_msg_queue_mac_vlan(struct fm10k_hw *hw, u32 **results, + struct fm10k_mbx_info *mbx) +{ + struct fm10k_vf_info *vf_info = (struct fm10k_vf_info *)mbx; + struct fm10k_intfc *interface = hw->back; + u8 mac[ETH_ALEN]; + u32 *result; + int err = 0; + bool set; + u16 vlan; + u32 vid; + + /* we shouldn't be updating rules on a disabled interface */ + if (!FM10K_VF_FLAG_ENABLED(vf_info)) + err = FM10K_ERR_PARAM; + + if (!err && !!results[FM10K_MAC_VLAN_MSG_VLAN]) { + result = results[FM10K_MAC_VLAN_MSG_VLAN]; + + /* record VLAN id requested */ + err = fm10k_tlv_attr_get_u32(result, &vid); + if (err) + return err; + + set = !(vid & FM10K_VLAN_CLEAR); + vid &= ~FM10K_VLAN_CLEAR; + + /* if the length field has been set, this is a multi-bit + * update request. For multi-bit requests, simply disallow + * them when the pf_vid has been set. In this case, the PF + * should have already cleared the VLAN_TABLE, and if we + * allowed them, it could allow a rogue VF to receive traffic + * on a VLAN it was not assigned. In the single-bit case, we + * need to modify requests for VLAN 0 to use the default PF or + * SW vid when assigned. + */ + + if (vid >> 16) { + /* prevent multi-bit requests when PF has + * administratively set the VLAN for this VF + */ + if (vf_info->pf_vid) + return FM10K_ERR_PARAM; + } else { + err = fm10k_iov_select_vid(vf_info, (u16)vid); + if (err < 0) + return err; + + vid = err; + } + + /* update VSI info for VF in regards to VLAN table */ + err = hw->mac.ops.update_vlan(hw, vid, vf_info->vsi, set); + } + + if (!err && !!results[FM10K_MAC_VLAN_MSG_MAC]) { + result = results[FM10K_MAC_VLAN_MSG_MAC]; + + /* record unicast MAC address requested */ + err = fm10k_tlv_attr_get_mac_vlan(result, mac, &vlan); + if (err) + return err; + + /* block attempts to set MAC for a locked device */ + if (is_valid_ether_addr(vf_info->mac) && + !ether_addr_equal(mac, vf_info->mac)) + return FM10K_ERR_PARAM; + + set = !(vlan & FM10K_VLAN_CLEAR); + vlan &= ~FM10K_VLAN_CLEAR; + + err = fm10k_iov_select_vid(vf_info, vlan); + if (err < 0) + return err; + + vlan = (u16)err; + + /* Add this request to the MAC/VLAN queue */ + err = fm10k_queue_mac_request(interface, vf_info->glort, + mac, vlan, set); + } + + if (!err && !!results[FM10K_MAC_VLAN_MSG_MULTICAST]) { + result = results[FM10K_MAC_VLAN_MSG_MULTICAST]; + + /* record multicast MAC address requested */ + err = fm10k_tlv_attr_get_mac_vlan(result, mac, &vlan); + if (err) + return err; + + /* verify that the VF is allowed to request multicast */ + if (!(vf_info->vf_flags & FM10K_VF_FLAG_MULTI_ENABLED)) + return FM10K_ERR_PARAM; + + set = !(vlan & FM10K_VLAN_CLEAR); + vlan &= ~FM10K_VLAN_CLEAR; + + err = fm10k_iov_select_vid(vf_info, vlan); + if (err < 0) + return err; + + vlan = (u16)err; + + /* Add this request to the MAC/VLAN queue */ + err = fm10k_queue_mac_request(interface, vf_info->glort, + mac, vlan, set); + } + + return err; +} + static const struct fm10k_msg_data iov_mbx_data[] = { FM10K_TLV_MSG_TEST_HANDLER(fm10k_tlv_msg_test), FM10K_VF_MSG_MSIX_HANDLER(fm10k_iov_msg_msix_pf), - FM10K_VF_MSG_MAC_VLAN_HANDLER(fm10k_iov_msg_mac_vlan_pf), + FM10K_VF_MSG_MAC_VLAN_HANDLER(fm10k_iov_msg_queue_mac_vlan), FM10K_VF_MSG_LPORT_STATE_HANDLER(fm10k_iov_msg_lport_state_pf), FM10K_TLV_MSG_ERROR_HANDLER(fm10k_iov_msg_error), }; @@ -126,8 +249,10 @@ process_mbx: hw->mbx.ops.process(hw, &hw->mbx); /* verify port mapping is valid, if not reset port */ - if (vf_info->vf_flags && !fm10k_glort_valid_pf(hw, glort)) + if (vf_info->vf_flags && !fm10k_glort_valid_pf(hw, glort)) { hw->iov.ops.reset_lport(hw, vf_info); + fm10k_clear_macvlan_queue(interface, glort, false); + } /* reset VFs that have mailbox timed out */ if (!mbx->timeout) { @@ -190,6 +315,7 @@ void fm10k_iov_suspend(struct pci_dev *pdev) hw->iov.ops.reset_resources(hw, vf_info); hw->iov.ops.reset_lport(hw, vf_info); + fm10k_clear_macvlan_queue(interface, vf_info->glort, false); } } @@ -414,6 +540,8 @@ static inline void fm10k_reset_vf_info(struct fm10k_intfc *interface, /* disable LPORT for this VF which clears switch rules */ hw->iov.ops.reset_lport(hw, vf_info); + fm10k_clear_macvlan_queue(interface, vf_info->glort, false); + /* assign new MAC+VLAN for this VF */ hw->iov.ops.assign_default_mac_vlan(hw, vf_info); diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_pf.c b/drivers/net/ethernet/intel/fm10k/fm10k_pf.c index 9e4fb3a44376..425d814aed4d 100644 --- a/drivers/net/ethernet/intel/fm10k/fm10k_pf.c +++ b/drivers/net/ethernet/intel/fm10k/fm10k_pf.c @@ -1186,7 +1186,7 @@ s32 fm10k_iov_msg_msix_pf(struct fm10k_hw *hw, u32 **results, * Will report an error if the VLAN ID is out of range. For VID = 0, it will * return either the pf_vid or sw_vid depending on which one is set. */ -static s32 fm10k_iov_select_vid(struct fm10k_vf_info *vf_info, u16 vid) +s32 fm10k_iov_select_vid(struct fm10k_vf_info *vf_info, u16 vid) { if (!vid) return vf_info->pf_vid ? vf_info->pf_vid : vf_info->sw_vid; diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_pf.h b/drivers/net/ethernet/intel/fm10k/fm10k_pf.h index 3336d3c10760..e04d41f1a532 100644 --- a/drivers/net/ethernet/intel/fm10k/fm10k_pf.h +++ b/drivers/net/ethernet/intel/fm10k/fm10k_pf.h @@ -1,5 +1,5 @@ /* Intel(R) Ethernet Switch Host Interface Driver - * Copyright(c) 2013 - 2016 Intel Corporation. + * Copyright(c) 2013 - 2017 Intel Corporation. * * This program is free software; you can redistribute it and/or modify it * under the terms and conditions of the GNU General Public License, @@ -114,6 +114,7 @@ extern const struct fm10k_tlv_attr fm10k_err_msg_attr[]; #define FM10K_PF_MSG_ERR_HANDLER(msg, func) \ FM10K_MSG_HANDLER(FM10K_PF_MSG_ID_##msg, fm10k_err_msg_attr, func) +s32 fm10k_iov_select_vid(struct fm10k_vf_info *vf_info, u16 vid); s32 fm10k_iov_msg_msix_pf(struct fm10k_hw *, u32 **, struct fm10k_mbx_info *); s32 fm10k_iov_msg_mac_vlan_pf(struct fm10k_hw *, u32 **, struct fm10k_mbx_info *); -- cgit v1.2.3 From ef57ab791c81b3a83c75a312b15b42f7440bb425 Mon Sep 17 00:00:00 2001 From: Jacob Keller Date: Mon, 10 Jul 2017 13:23:19 -0700 Subject: fm10k: bump version number Signed-off-by: Jacob Keller Tested-by: Krishneil Singh Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/fm10k/fm10k_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_main.c b/drivers/net/ethernet/intel/fm10k/fm10k_main.c index 189d52a8a605..5d56ed5ad7a6 100644 --- a/drivers/net/ethernet/intel/fm10k/fm10k_main.c +++ b/drivers/net/ethernet/intel/fm10k/fm10k_main.c @@ -28,7 +28,7 @@ #include "fm10k.h" -#define DRV_VERSION "0.21.7-k" +#define DRV_VERSION "0.22.1-k" #define DRV_SUMMARY "Intel(R) Ethernet Switch Host Interface Driver" const char fm10k_driver_version[] = DRV_VERSION; char fm10k_driver_name[] = "fm10k"; -- cgit v1.2.3 From c0ad8ef3df091ef179d78dccb810024612dcfa44 Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Fri, 11 Aug 2017 09:17:15 -0700 Subject: fm10k: Fix misuse of net_ratelimit() Correct the backward logic using !net_ratelimit() Miscellanea: o Add a blank line before the error return label Signed-off-by: Joe Perches Tested-by: Krishneil Singh Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/fm10k/fm10k_main.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_main.c b/drivers/net/ethernet/intel/fm10k/fm10k_main.c index 5d56ed5ad7a6..dbd69310f263 100644 --- a/drivers/net/ethernet/intel/fm10k/fm10k_main.c +++ b/drivers/net/ethernet/intel/fm10k/fm10k_main.c @@ -806,9 +806,10 @@ static int fm10k_tso(struct fm10k_ring *tx_ring, tx_desc->mss = cpu_to_le16(skb_shinfo(skb)->gso_size); return 1; + err_vxlan: tx_ring->netdev->features &= ~NETIF_F_GSO_UDP_TUNNEL; - if (!net_ratelimit()) + if (net_ratelimit()) netdev_err(tx_ring->netdev, "TSO requested for unsupported tunnel, disabling offload\n"); return -1; -- cgit v1.2.3 From 87be98927eb0bfa5484dfbe5ba2f6b7f91dd9187 Mon Sep 17 00:00:00 2001 From: Jacob Keller Date: Fri, 11 Aug 2017 11:14:37 -0700 Subject: fm10k: prefer %s and __func__ for diagnostic prints Don't hard code the function names in the diagnostic output when these reset related routines fail. Instead, use %s and __func__ so that future refactors don't need to change the print outs. Additionally, while we are here, add missing function header comments for the new reset_prepare and reset_done function handlers. Signed-off-by: Jacob Keller Tested-by: Krishneil Singh Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/fm10k/fm10k_pci.c | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_pci.c b/drivers/net/ethernet/intel/fm10k/fm10k_pci.c index 58538ce997e1..1e9ae3197b17 100644 --- a/drivers/net/ethernet/intel/fm10k/fm10k_pci.c +++ b/drivers/net/ethernet/intel/fm10k/fm10k_pci.c @@ -2588,11 +2588,18 @@ static void fm10k_io_resume(struct pci_dev *pdev) if (err) dev_warn(&pdev->dev, - "fm10k_io_resume failed: %d\n", err); + "%s failed: %d\n", __func__, err); else netif_device_attach(netdev); } +/** + * fm10k_io_reset_prepare - called when PCI function is about to be reset + * @pdev: Pointer to PCI device + * + * This callback is called when the PCI function is about to be reset, + * allowing the device driver to prepare for it. + */ static void fm10k_io_reset_prepare(struct pci_dev *pdev) { /* warn incase we have any active VF devices */ @@ -2602,6 +2609,13 @@ static void fm10k_io_reset_prepare(struct pci_dev *pdev) fm10k_prepare_suspend(pci_get_drvdata(pdev)); } +/** + * fm10k_io_reset_done - called when PCI function has finished resetting + * @pdev: Pointer to PCI device + * + * This callback is called just after the PCI function is reset, such as via + * /sys/class/net//device/reset or similar. + */ static void fm10k_io_reset_done(struct pci_dev *pdev) { struct fm10k_intfc *interface = pci_get_drvdata(pdev); @@ -2609,7 +2623,7 @@ static void fm10k_io_reset_done(struct pci_dev *pdev) if (err) { dev_warn(&pdev->dev, - "fm10k_io_reset_notify failed: %d\n", err); + "%s failed: %d\n", __func__, err); netif_device_detach(interface->netdev); } } -- cgit v1.2.3 From 3e256ac5b1ec307e5dd5a4c99fbdbc651446c738 Mon Sep 17 00:00:00 2001 From: Jacob Keller Date: Fri, 11 Aug 2017 11:14:58 -0700 Subject: fm10k: fix mis-ordered parameters in declaration for .ndo_set_vf_bw We've had support for setting both a minimum and maximum bandwidth via .ndo_set_vf_bw since commit 883a9ccbae56 ("fm10k: Add support for SR-IOV to driver", 2014-09-20). Likely because we do not support minimum rates, the declaration mis-ordered the "unused" parameter, which causes warnings when analyzed with cppcheck. Fix this warning by properly declaring the min_rate and max_rate variables in the declaration and definition (rather than using "unused"). Also rename "rate" to max_rate so as to clarify that we only support setting the maximum rate. Signed-off-by: Jacob Keller Tested-by: Krishneil Singh Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/fm10k/fm10k.h | 4 ++-- drivers/net/ethernet/intel/fm10k/fm10k_iov.c | 9 +++++---- 2 files changed, 7 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/intel/fm10k/fm10k.h b/drivers/net/ethernet/intel/fm10k/fm10k.h index 40856bc0f3b9..46973fb234c5 100644 --- a/drivers/net/ethernet/intel/fm10k/fm10k.h +++ b/drivers/net/ethernet/intel/fm10k/fm10k.h @@ -562,8 +562,8 @@ s32 fm10k_iov_update_pvid(struct fm10k_intfc *interface, u16 glort, u16 pvid); int fm10k_ndo_set_vf_mac(struct net_device *netdev, int vf_idx, u8 *mac); int fm10k_ndo_set_vf_vlan(struct net_device *netdev, int vf_idx, u16 vid, u8 qos, __be16 vlan_proto); -int fm10k_ndo_set_vf_bw(struct net_device *netdev, int vf_idx, int rate, - int unused); +int fm10k_ndo_set_vf_bw(struct net_device *netdev, int vf_idx, + int __always_unused min_rate, int max_rate); int fm10k_ndo_get_vf_config(struct net_device *netdev, int vf_idx, struct ifla_vf_info *ivi); diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_iov.c b/drivers/net/ethernet/intel/fm10k/fm10k_iov.c index 4a17cc903eed..ea3ab24265ee 100644 --- a/drivers/net/ethernet/intel/fm10k/fm10k_iov.c +++ b/drivers/net/ethernet/intel/fm10k/fm10k_iov.c @@ -613,7 +613,7 @@ int fm10k_ndo_set_vf_vlan(struct net_device *netdev, int vf_idx, u16 vid, } int fm10k_ndo_set_vf_bw(struct net_device *netdev, int vf_idx, - int __always_unused unused, int rate) + int __always_unused min_rate, int max_rate) { struct fm10k_intfc *interface = netdev_priv(netdev); struct fm10k_iov_data *iov_data = interface->iov_data; @@ -624,14 +624,15 @@ int fm10k_ndo_set_vf_bw(struct net_device *netdev, int vf_idx, return -EINVAL; /* rate limit cannot be less than 10Mbs or greater than link speed */ - if (rate && ((rate < FM10K_VF_TC_MIN) || rate > FM10K_VF_TC_MAX)) + if (max_rate && + (max_rate < FM10K_VF_TC_MIN || max_rate > FM10K_VF_TC_MAX)) return -EINVAL; /* store values */ - iov_data->vf_info[vf_idx].rate = rate; + iov_data->vf_info[vf_idx].rate = max_rate; /* update hardware configuration */ - hw->iov.ops.configure_tc(hw, vf_idx, rate); + hw->iov.ops.configure_tc(hw, vf_idx, max_rate); return 0; } -- cgit v1.2.3 From a047fbae23e1d94da28f81fb0f86fab4e473a094 Mon Sep 17 00:00:00 2001 From: Arjun Vynipadath Date: Tue, 3 Oct 2017 11:43:05 +0530 Subject: cxgb4: Update comment for min_mtu We have lost a comment for minimum mtu value set for netdevice with 'commit d894be57ca92 ("ethernet: use net core MTU range checking in more drivers"). Updating it accordingly. Signed-off-by: Arjun Vynipadath Signed-off-by: Ganesh Goudar Signed-off-by: David S. Miller --- drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c index 13b636b0af5f..fe4cbe22d5d7 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c @@ -5024,7 +5024,7 @@ static int init_one(struct pci_dev *pdev, const struct pci_device_id *ent) netdev->priv_flags |= IFF_UNICAST_FLT; /* MTU range: 81 - 9600 */ - netdev->min_mtu = 81; + netdev->min_mtu = 81; /* accommodate SACK */ netdev->max_mtu = MAX_MTU; netdev->netdev_ops = &cxgb4_netdev_ops; -- cgit v1.2.3 From abf4bb6b63d0a54266f8e7eff3720c1974063971 Mon Sep 17 00:00:00 2001 From: Yotam Gigi Date: Tue, 3 Oct 2017 09:58:06 +0200 Subject: skbuff: Add the offload_mr_fwd_mark field Similarly to the offload_fwd_mark field, the offload_mr_fwd_mark field is used to allow partial offloading of MFC multicast routes. Switchdev drivers can offload MFC multicast routes to the hardware by registering to the FIB notification chain. When one of the route output interfaces is not offload-able, i.e. has different parent ID, the route cannot be fully offloaded by the hardware. Examples to non-offload-able devices are a management NIC, dummy device, pimreg device, etc. Similar problem exists in the bridge module, as one bridge can hold interfaces with different parent IDs. At the bridge, the problem is solved by the offload_fwd_mark skb field. Currently, when a route cannot go through full offload, the only solution for a switchdev driver is not to offload it at all and let the packet go through slow path. Using the offload_mr_fwd_mark field, a driver can indicate that a packet was already forwarded by hardware to all the devices with the same parent ID as the input device. Further patches in this patch-set are going to enhance ipmr to skip multicast forwarding to devices with the same parent ID if a packets is marked with that field. The reason why the already existing "offload_fwd_mark" bit cannot be used is that a switchdev driver would want to make the distinction between a packet that has already gone through L2 forwarding but did not go through multicast forwarding, and a packet that has already gone through both L2 and multicast forwarding. For example: when a packet is ingressing from a switchport enslaved to a bridge, which is configured with multicast forwarding, the following scenarios are possible: - The packet can be trapped to the CPU due to exception while multicast forwarding (for example, MTU error). In that case, it had already gone through L2 forwarding in the hardware, thus A switchdev driver would want to set the skb->offload_fwd_mark and not the skb->offload_mr_fwd_mark. - The packet can also be trapped due to a pimreg/dummy device used as one of the output interfaces. In that case, it can go through both L2 and (partial) multicast forwarding inside the hardware, thus a switchdev driver would want to set both the skb->offload_fwd_mark and skb->offload_mr_fwd_mark. Signed-off-by: Yotam Gigi Reviewed-by: Ido Schimmel Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- include/linux/skbuff.h | 1 + 1 file changed, 1 insertion(+) diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 19e64bfb1a66..ada821466e88 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -772,6 +772,7 @@ struct sk_buff { __u8 remcsum_offload:1; #ifdef CONFIG_NET_SWITCHDEV __u8 offload_fwd_mark:1; + __u8 offload_mr_fwd_mark:1; #endif #ifdef CONFIG_NET_CLS_ACT __u8 tc_skip_classify:1; -- cgit v1.2.3 From 5d8b3e69fc5e5ccafc9db1251bb7c78a8622fddd Mon Sep 17 00:00:00 2001 From: Yotam Gigi Date: Tue, 3 Oct 2017 09:58:07 +0200 Subject: ipv4: ipmr: Add the parent ID field to VIF struct In order to allow the ipmr module to do partial multicast forwarding according to the device parent ID, add the device parent ID field to the VIF struct. This way, the forwarding path can use the parent ID field without invoking switchdev calls, which requires the RTNL lock. When a new VIF is added, set the device parent ID field in it by invoking the switchdev_port_attr_get call. Signed-off-by: Yotam Gigi Reviewed-by: Ido Schimmel Signed-off-by: Jiri Pirko Signed-off-by: Nikolay Aleksandrov Signed-off-by: David S. Miller --- include/linux/mroute.h | 1 + net/ipv4/ipmr.c | 11 +++++++++++ 2 files changed, 12 insertions(+) diff --git a/include/linux/mroute.h b/include/linux/mroute.h index b072a84fbe1c..8242d05df35e 100644 --- a/include/linux/mroute.h +++ b/include/linux/mroute.h @@ -57,6 +57,7 @@ static inline bool ipmr_rule_default(const struct fib_rule *rule) struct vif_device { struct net_device *dev; /* Device we are using */ + struct netdev_phys_item_id dev_parent_id; /* Device parent ID */ unsigned long bytes_in,bytes_out; unsigned long pkt_in,pkt_out; /* Statistics */ unsigned long rate_limit; /* Traffic shaping (NI) */ diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index a844738b38bd..1b161ada7ae6 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -67,6 +67,7 @@ #include #include #include +#include struct ipmr_rule { struct fib_rule common; @@ -868,6 +869,9 @@ static int vif_add(struct net *net, struct mr_table *mrt, struct vifctl *vifc, int mrtsock) { int vifi = vifc->vifc_vifi; + struct switchdev_attr attr = { + .id = SWITCHDEV_ATTR_ID_PORT_PARENT_ID, + }; struct vif_device *v = &mrt->vif_table[vifi]; struct net_device *dev; struct in_device *in_dev; @@ -942,6 +946,13 @@ static int vif_add(struct net *net, struct mr_table *mrt, /* Fill in the VIF structures */ + attr.orig_dev = dev; + if (!switchdev_port_attr_get(dev, &attr)) { + memcpy(v->dev_parent_id.id, attr.u.ppid.id, attr.u.ppid.id_len); + v->dev_parent_id.id_len = attr.u.ppid.id_len; + } else { + v->dev_parent_id.id_len = 0; + } v->rate_limit = vifc->vifc_rate_limit; v->local = vifc->vifc_lcl_addr.s_addr; v->remote = vifc->vifc_rmt_addr.s_addr; -- cgit v1.2.3 From a5bc9294d70fe85729bb343eef281ccbe78ff119 Mon Sep 17 00:00:00 2001 From: Yotam Gigi Date: Tue, 3 Oct 2017 09:58:08 +0200 Subject: ipv4: ipmr: Don't forward packets already forwarded by hardware Change the ipmr module to not forward packets if: - The packet is marked with the offload_mr_fwd_mark, and - Both input interface and output interface share the same parent ID. This way, a packet can go through partial multicast forwarding in the hardware, where it will be forwarded only to the devices that share the same parent ID (AKA, reside inside the same hardware). The kernel will forward the packet to all other interfaces. To do this, add the ipmr_offload_forward helper, which per skb, ingress VIF and egress VIF, returns whether the forwarding was offloaded to hardware. The ipmr_queue_xmit frees the skb and does not forward it if the result is a true value. All the forwarding path code compiles out when the CONFIG_NET_SWITCHDEV is not set. Signed-off-by: Yotam Gigi Reviewed-by: Ido Schimmel Signed-off-by: Jiri Pirko Reviewed-by: Nikolay Aleksandrov Signed-off-by: David S. Miller --- net/ipv4/ipmr.c | 37 ++++++++++++++++++++++++++++++++----- 1 file changed, 32 insertions(+), 5 deletions(-) diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index 1b161ada7ae6..b3ee01b0551b 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -1859,10 +1859,33 @@ static inline int ipmr_forward_finish(struct net *net, struct sock *sk, return dst_output(net, sk, skb); } +#ifdef CONFIG_NET_SWITCHDEV +static bool ipmr_forward_offloaded(struct sk_buff *skb, struct mr_table *mrt, + int in_vifi, int out_vifi) +{ + struct vif_device *out_vif = &mrt->vif_table[out_vifi]; + struct vif_device *in_vif = &mrt->vif_table[in_vifi]; + + if (!skb->offload_mr_fwd_mark) + return false; + if (!out_vif->dev_parent_id.id_len || !in_vif->dev_parent_id.id_len) + return false; + return netdev_phys_item_id_same(&out_vif->dev_parent_id, + &in_vif->dev_parent_id); +} +#else +static bool ipmr_forward_offloaded(struct sk_buff *skb, struct mr_table *mrt, + int in_vifi, int out_vifi) +{ + return false; +} +#endif + /* Processing handlers for ipmr_forward */ static void ipmr_queue_xmit(struct net *net, struct mr_table *mrt, - struct sk_buff *skb, struct mfc_cache *c, int vifi) + int in_vifi, struct sk_buff *skb, + struct mfc_cache *c, int vifi) { const struct iphdr *iph = ip_hdr(skb); struct vif_device *vif = &mrt->vif_table[vifi]; @@ -1883,6 +1906,9 @@ static void ipmr_queue_xmit(struct net *net, struct mr_table *mrt, goto out_free; } + if (ipmr_forward_offloaded(skb, mrt, in_vifi, vifi)) + goto out_free; + if (vif->flags & VIFF_TUNNEL) { rt = ip_route_output_ports(net, &fl4, NULL, vif->remote, vif->local, @@ -2060,8 +2086,8 @@ forward: struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC); if (skb2) - ipmr_queue_xmit(net, mrt, skb2, cache, - psend); + ipmr_queue_xmit(net, mrt, true_vifi, + skb2, cache, psend); } psend = ct; } @@ -2072,9 +2098,10 @@ last_forward: struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC); if (skb2) - ipmr_queue_xmit(net, mrt, skb2, cache, psend); + ipmr_queue_xmit(net, mrt, true_vifi, skb2, + cache, psend); } else { - ipmr_queue_xmit(net, mrt, skb, cache, psend); + ipmr_queue_xmit(net, mrt, true_vifi, skb, cache, psend); return; } } -- cgit v1.2.3 From 267872435515185e2e600a721fdddeea90f96ffa Mon Sep 17 00:00:00 2001 From: Yotam Gigi Date: Tue, 3 Oct 2017 09:58:09 +0200 Subject: mlxsw: acl: Introduce ACL trap and forward action Use trap/discard flex action to implement trap and forward. The action will later be used for multicast routing, as the multicast routing mechanism is done using ACL flexible actions in Spectrum hardware. Using that action, it will be possible to implement a trap-and-forward route. Signed-off-by: Yotam Gigi Reviewed-by: Ido Schimmel Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- .../net/ethernet/mellanox/mlxsw/core_acl_flex_actions.c | 17 +++++++++++++++++ .../net/ethernet/mellanox/mlxsw/core_acl_flex_actions.h | 2 ++ 2 files changed, 19 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_actions.c b/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_actions.c index bc55d0e76705..6a979a09ab72 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_actions.c +++ b/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_actions.c @@ -676,6 +676,7 @@ enum mlxsw_afa_trapdisc_trap_action { MLXSW_ITEM32(afa, trapdisc, trap_action, 0x00, 24, 4); enum mlxsw_afa_trapdisc_forward_action { + MLXSW_AFA_TRAPDISC_FORWARD_ACTION_FORWARD = 1, MLXSW_AFA_TRAPDISC_FORWARD_ACTION_DISCARD = 3, }; @@ -729,6 +730,22 @@ int mlxsw_afa_block_append_trap(struct mlxsw_afa_block *block, u16 trap_id) } EXPORT_SYMBOL(mlxsw_afa_block_append_trap); +int mlxsw_afa_block_append_trap_and_forward(struct mlxsw_afa_block *block, + u16 trap_id) +{ + char *act = mlxsw_afa_block_append_action(block, + MLXSW_AFA_TRAPDISC_CODE, + MLXSW_AFA_TRAPDISC_SIZE); + + if (!act) + return -ENOBUFS; + mlxsw_afa_trapdisc_pack(act, MLXSW_AFA_TRAPDISC_TRAP_ACTION_TRAP, + MLXSW_AFA_TRAPDISC_FORWARD_ACTION_FORWARD, + trap_id); + return 0; +} +EXPORT_SYMBOL(mlxsw_afa_block_append_trap_and_forward); + /* Forwarding Action * ----------------- * Forwarding Action can be used to implement Policy Based Switching (PBS) diff --git a/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_actions.h b/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_actions.h index 06b0be432b8f..a8d3314c3a24 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_actions.h +++ b/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_actions.h @@ -61,6 +61,8 @@ int mlxsw_afa_block_continue(struct mlxsw_afa_block *block); int mlxsw_afa_block_jump(struct mlxsw_afa_block *block, u16 group_id); int mlxsw_afa_block_append_drop(struct mlxsw_afa_block *block); int mlxsw_afa_block_append_trap(struct mlxsw_afa_block *block, u16 trap_id); +int mlxsw_afa_block_append_trap_and_forward(struct mlxsw_afa_block *block, + u16 trap_id); int mlxsw_afa_block_append_fwd(struct mlxsw_afa_block *block, u8 local_port, bool in_port); int mlxsw_afa_block_append_vlan_modify(struct mlxsw_afa_block *block, -- cgit v1.2.3 From a0040c8c935548e1efb1a28f07f15d7ec7918055 Mon Sep 17 00:00:00 2001 From: Yotam Gigi Date: Tue, 3 Oct 2017 09:58:10 +0200 Subject: mlxsw: spectrum: Add trap for multicast trap-and-forward routes When a multicast route is configured with trap-and-forward action, the packets should be marked with skb->offload_mr_fwd_mark, in order to prevent the packets from being forwarded again by the kernel ipmr module. Due to this, it is not possible to use the already existing multicast trap (MLXSW_TRAP_ID_ACL1) as the packet should be marked differently. Add the MLXSW_TRAP_ID_ACL2 which is for trap-and-forward multicast routes, and set the offload_mr_fwd_mark skb field in its handler. Signed-off-by: Yotam Gigi Reviewed-by: Ido Schimmel Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlxsw/spectrum.c | 13 +++++++++++++ drivers/net/ethernet/mellanox/mlxsw/trap.h | 2 ++ 2 files changed, 15 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c index e9b94430afed..3adf237c951a 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c @@ -3312,6 +3312,14 @@ static void mlxsw_sp_rx_listener_mark_func(struct sk_buff *skb, u8 local_port, return mlxsw_sp_rx_listener_no_mark_func(skb, local_port, priv); } +static void mlxsw_sp_rx_listener_mr_mark_func(struct sk_buff *skb, + u8 local_port, void *priv) +{ + skb->offload_mr_fwd_mark = 1; + skb->offload_fwd_mark = 1; + return mlxsw_sp_rx_listener_no_mark_func(skb, local_port, priv); +} + static void mlxsw_sp_rx_listener_sample_func(struct sk_buff *skb, u8 local_port, void *priv) { @@ -3355,6 +3363,10 @@ out: MLXSW_RXL(mlxsw_sp_rx_listener_mark_func, _trap_id, _action, \ _is_ctrl, SP_##_trap_group, DISCARD) +#define MLXSW_SP_RXL_MR_MARK(_trap_id, _action, _trap_group, _is_ctrl) \ + MLXSW_RXL(mlxsw_sp_rx_listener_mr_mark_func, _trap_id, _action, \ + _is_ctrl, SP_##_trap_group, DISCARD) + #define MLXSW_SP_EVENTL(_func, _trap_id) \ MLXSW_EVENTL(_func, _trap_id, SP_EVENT) @@ -3425,6 +3437,7 @@ static const struct mlxsw_listener mlxsw_sp_listener[] = { MLXSW_SP_RXL_MARK(IPV4_PIM, TRAP_TO_CPU, PIM, false), MLXSW_SP_RXL_MARK(RPF, TRAP_TO_CPU, RPF, false), MLXSW_SP_RXL_MARK(ACL1, TRAP_TO_CPU, MULTICAST, false), + MLXSW_SP_RXL_MR_MARK(ACL2, TRAP_TO_CPU, MULTICAST, false), }; static int mlxsw_sp_cpu_policers_set(struct mlxsw_core *mlxsw_core) diff --git a/drivers/net/ethernet/mellanox/mlxsw/trap.h b/drivers/net/ethernet/mellanox/mlxsw/trap.h index a98103539f6b..ec6cef8267ae 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/trap.h +++ b/drivers/net/ethernet/mellanox/mlxsw/trap.h @@ -93,6 +93,8 @@ enum { MLXSW_TRAP_ID_ACL0 = 0x1C0, /* Multicast trap used for routes with trap action */ MLXSW_TRAP_ID_ACL1 = 0x1C1, + /* Multicast trap used for routes with trap-and-forward action */ + MLXSW_TRAP_ID_ACL2 = 0x1C2, MLXSW_TRAP_ID_MAX = 0x1FF }; -- cgit v1.2.3 From 607feadef89ac806df5a0be983afef77247e1541 Mon Sep 17 00:00:00 2001 From: Yotam Gigi Date: Tue, 3 Oct 2017 09:58:11 +0200 Subject: mlxsw: spectrum: mr_tcam: Add trap-and-forward multicast route In addition to the current multicast route actions, which include trap route action and a forward route action, add the trap-and-forward multicast route action, and implement it in the multicast routing hardware logic. To implement that, add a trap-and-forward ACL action as the last action in the route flexible action set. The used trap is the ACL2 trap, which marks the packets with offload_mr_forward_mark, to prevent the packet from being forwarded again by the kernel. Note: At that stage the offloading logic does not support trap-and-forward multicast routes. This patch adds the support only in the hardware logic. Signed-off-by: Yotam Gigi Reviewed-by: Ido Schimmel Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlxsw/spectrum_mr.h | 1 + drivers/net/ethernet/mellanox/mlxsw/spectrum_mr_tcam.c | 8 ++++++++ 2 files changed, 9 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_mr.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum_mr.h index c851b237d253..5d26a122af49 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_mr.h +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_mr.h @@ -42,6 +42,7 @@ enum mlxsw_sp_mr_route_action { MLXSW_SP_MR_ROUTE_ACTION_FORWARD, MLXSW_SP_MR_ROUTE_ACTION_TRAP, + MLXSW_SP_MR_ROUTE_ACTION_TRAP_AND_FORWARD, }; enum mlxsw_sp_mr_route_prio { diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_mr_tcam.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_mr_tcam.c index cda9e9ad10e3..3ffb28dd4057 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_mr_tcam.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_mr_tcam.c @@ -253,6 +253,7 @@ mlxsw_sp_mr_tcam_afa_block_create(struct mlxsw_sp *mlxsw_sp, if (err) goto err; break; + case MLXSW_SP_MR_ROUTE_ACTION_TRAP_AND_FORWARD: case MLXSW_SP_MR_ROUTE_ACTION_FORWARD: /* If we are about to append a multicast router action, commit * the erif_list. @@ -266,6 +267,13 @@ mlxsw_sp_mr_tcam_afa_block_create(struct mlxsw_sp *mlxsw_sp, erif_list->kvdl_index); if (err) goto err; + + if (route_action == MLXSW_SP_MR_ROUTE_ACTION_TRAP_AND_FORWARD) { + err = mlxsw_afa_block_append_trap_and_forward(afa_block, + MLXSW_TRAP_ID_ACL2); + if (err) + goto err; + } break; default: err = -EINVAL; -- cgit v1.2.3 From f60c254998de80feaec8e4122960ab64e8045214 Mon Sep 17 00:00:00 2001 From: Yotam Gigi Date: Tue, 3 Oct 2017 09:58:12 +0200 Subject: mlxsw: spectrum: mr: Support trap-and-forward routes Add the support of trap-and-forward route action in the multicast routing offloading logic. A route will be set to trap-and-forward action if one (or more) of its output interfaces is not offload-able, i.e. does not have a valid Spectrum RIF. This way, a route with mixed output VIFs list, which contains both offload-able and un-offload-able devices can go through partial offloading in hardware, and the rest will be done in the kernel ipmr module. Signed-off-by: Yotam Gigi Reviewed-by: Ido Schimmel Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlxsw/spectrum_mr.c | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_mr.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_mr.c index 4aaf6ca1be7c..1f84bb8e9135 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_mr.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_mr.c @@ -114,9 +114,9 @@ static bool mlxsw_sp_mr_vif_valid(const struct mlxsw_sp_mr_vif *vif) return mlxsw_sp_mr_vif_regular(vif) && vif->dev && vif->rif; } -static bool mlxsw_sp_mr_vif_rif_invalid(const struct mlxsw_sp_mr_vif *vif) +static bool mlxsw_sp_mr_vif_exists(const struct mlxsw_sp_mr_vif *vif) { - return mlxsw_sp_mr_vif_regular(vif) && vif->dev && !vif->rif; + return vif->dev; } static bool @@ -182,14 +182,13 @@ mlxsw_sp_mr_route_action(const struct mlxsw_sp_mr_route *mr_route) if (!mlxsw_sp_mr_route_valid_evifs_num(mr_route)) return MLXSW_SP_MR_ROUTE_ACTION_TRAP; - /* If either one of the eVIFs is not regular (VIF of type pimreg or - * tunnel) or one of the VIFs has no matching RIF, trap the packet. + /* If one of the eVIFs has no RIF, trap-and-forward the route as there + * is some more routing to do in software too. */ - list_for_each_entry(rve, &mr_route->evif_list, route_node) { - if (!mlxsw_sp_mr_vif_regular(rve->mr_vif) || - mlxsw_sp_mr_vif_rif_invalid(rve->mr_vif)) - return MLXSW_SP_MR_ROUTE_ACTION_TRAP; - } + list_for_each_entry(rve, &mr_route->evif_list, route_node) + if (mlxsw_sp_mr_vif_exists(rve->mr_vif) && !rve->mr_vif->rif) + return MLXSW_SP_MR_ROUTE_ACTION_TRAP_AND_FORWARD; + return MLXSW_SP_MR_ROUTE_ACTION_FORWARD; } -- cgit v1.2.3 From 161ae6b04d5da0de518ea600d8fffe61208e243c Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Tue, 3 Oct 2017 11:39:18 +0100 Subject: net: dsa: lan9303: make functions lan9303_mdio_phy_{read|write} static The functions lan9303_mdio_phy_write and lan9303_mdio_phy_read are local to the source and do not need to be in global scope, so make them static. Cleans up sparse warnings: symbol 'lan9303_mdio_phy_write' was not declared. Should it be static? symbol 'lan9303_mdio_phy_read' was not declared. Should it be static? Signed-off-by: Colin Ian King Reviewed-by: Andrew Lunn Signed-off-by: David S. Miller --- drivers/net/dsa/lan9303_mdio.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/net/dsa/lan9303_mdio.c b/drivers/net/dsa/lan9303_mdio.c index fc16668a487f..0bc56b9900f9 100644 --- a/drivers/net/dsa/lan9303_mdio.c +++ b/drivers/net/dsa/lan9303_mdio.c @@ -67,14 +67,15 @@ static int lan9303_mdio_read(void *ctx, uint32_t reg, uint32_t *val) return 0; } -int lan9303_mdio_phy_write(struct lan9303 *chip, int phy, int reg, u16 val) +static int lan9303_mdio_phy_write(struct lan9303 *chip, int phy, int reg, + u16 val) { struct lan9303_mdio *sw_dev = dev_get_drvdata(chip->dev); return mdiobus_write_nested(sw_dev->device->bus, phy, reg, val); } -int lan9303_mdio_phy_read(struct lan9303 *chip, int phy, int reg) +static int lan9303_mdio_phy_read(struct lan9303 *chip, int phy, int reg) { struct lan9303_mdio *sw_dev = dev_get_drvdata(chip->dev); -- cgit v1.2.3 From 360cc342c9037ed487adf07212000f69c0ffa14d Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Tue, 3 Oct 2017 11:46:33 +0100 Subject: net: dsa: mt7530: make functions mt7530_phy_write static The function mt7530_phy_write is local to the source and does not need to be in global scope, so make it static. Cleans up sparse warnings: symbol 'mt7530_phy_write' was not declared. Should it be static? Signed-off-by: Colin Ian King Reviewed-by: Andrew Lunn Signed-off-by: David S. Miller --- drivers/net/dsa/mt7530.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/dsa/mt7530.c b/drivers/net/dsa/mt7530.c index faa3b88d2206..034241696ce2 100644 --- a/drivers/net/dsa/mt7530.c +++ b/drivers/net/dsa/mt7530.c @@ -564,7 +564,8 @@ static int mt7530_phy_read(struct dsa_switch *ds, int port, int regnum) return mdiobus_read_nested(priv->bus, port, regnum); } -int mt7530_phy_write(struct dsa_switch *ds, int port, int regnum, u16 val) +static int mt7530_phy_write(struct dsa_switch *ds, int port, int regnum, + u16 val) { struct mt7530_priv *priv = ds->priv; -- cgit v1.2.3 From b508e0b6e47c85a095ef056f3de6ba9d396c490c Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Tue, 3 Oct 2017 13:53:03 +0300 Subject: mlxsw: spectrum: Fix check for IS_ERR() instead of NULL mlxsw_afa_block_create() doesn't return error pointers, it returns NULL on error. Fixes: 0e14c7777acb ("mlxsw: spectrum: Add the multicast routing hardware logic") Signed-off-by: Dan Carpenter Acked-by: Yotam Gigi Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlxsw/spectrum_mr_tcam.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_mr_tcam.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_mr_tcam.c index 3ffb28dd4057..3a61896ae4d8 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_mr_tcam.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_mr_tcam.c @@ -239,8 +239,8 @@ mlxsw_sp_mr_tcam_afa_block_create(struct mlxsw_sp *mlxsw_sp, int err; afa_block = mlxsw_afa_block_create(mlxsw_sp->afa); - if (IS_ERR(afa_block)) - return afa_block; + if (!afa_block) + return ERR_PTR(-ENOMEM); err = mlxsw_afa_block_append_counter(afa_block, counter_index); if (err) -- cgit v1.2.3 From b5c7d4e54c9ab830e5c03f92377fe15cbae64d0d Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Tue, 3 Oct 2017 13:53:41 +0300 Subject: mlxsw: spectrum: Add missing error code on allocation failure We accidentally return success if the kmalloc_array() call fails. Fixes: 0e14c7777acb ("mlxsw: spectrum: Add the multicast routing hardware logic") Signed-off-by: Dan Carpenter Acked-by: Yotam Gigi Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlxsw/spectrum_mr_tcam.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_mr_tcam.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_mr_tcam.c index 3a61896ae4d8..39c21c70ac32 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_mr_tcam.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_mr_tcam.c @@ -771,8 +771,10 @@ mlxsw_sp_mr_tcam_region_init(struct mlxsw_sp *mlxsw_sp, parman_prios = kmalloc_array(MLXSW_SP_MR_ROUTE_PRIO_MAX + 1, sizeof(*parman_prios), GFP_KERNEL); - if (!parman_prios) + if (!parman_prios) { + err = -ENOMEM; goto err_parman_prios_alloc; + } mr_tcam_region->parman_prios = parman_prios; for (i = 0; i < MLXSW_SP_MR_ROUTE_PRIO_MAX + 1; i++) -- cgit v1.2.3 From 4d2c0cda07448ea6980f00102dc3964eb25e241c Mon Sep 17 00:00:00 2001 From: Mahesh Bandewar Date: Wed, 27 Sep 2017 18:03:49 -0700 Subject: bonding: speed/duplex update at NETDEV_UP event Some NIC drivers don't have correct speed/duplex settings at the time they send NETDEV_UP notification and that messes up the bonding state. Especially 802.3ad mode which is very sensitive to these settings. In the current implementation we invoke bond_update_speed_duplex() when we receive NETDEV_UP, however, ignore the return value. If the values we get are invalid (UNKNOWN), then slave gets removed from the aggregator with speed and duplex set to UNKNOWN while link is still marked as UP. This patch fixes this scenario. Also 802.3ad mode is sensitive to these conditions while other modes are not, so making sure that it doesn't change the behavior for other modes. Signed-off-by: Mahesh Bandewar Signed-off-by: David S. Miller --- drivers/net/bonding/bond_main.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index d2e94b8559f0..b19dc033fb36 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -3073,7 +3073,16 @@ static int bond_slave_netdev_event(unsigned long event, break; case NETDEV_UP: case NETDEV_CHANGE: - bond_update_speed_duplex(slave); + /* For 802.3ad mode only: + * Getting invalid Speed/Duplex values here will put slave + * in weird state. So mark it as link-down for the time + * being and let link-monitoring (miimon) set it right when + * correct speeds/duplex are available. + */ + if (bond_update_speed_duplex(slave) && + BOND_MODE(bond) == BOND_MODE_8023AD) + slave->link = BOND_LINK_DOWN; + if (BOND_MODE(bond) == BOND_MODE_8023AD) bond_3ad_adapter_speed_duplex_changed(slave); /* Fallthrough */ -- cgit v1.2.3 From 6c5570016b972d9b1f0f6c2dca9cc0422b1f92bf Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Mon, 2 Oct 2017 23:50:05 +0200 Subject: net: core: decouple ifalias get/set from rtnl lock Device alias can be set by either rtnetlink (rtnl is held) or sysfs. rtnetlink hold the rtnl mutex, sysfs acquires it for this purpose. Add an extra mutex for it and use rcu to protect concurrent accesses. This allows the sysfs path to not take rtnl and would later allow to not hold it when dumping ifalias. Based on suggestion from Eric Dumazet. Signed-off-by: Florian Westphal Signed-off-by: David S. Miller --- include/linux/netdevice.h | 8 +++++++- net/core/dev.c | 52 +++++++++++++++++++++++++++++++++++------------ net/core/net-sysfs.c | 17 ++++++++-------- net/core/rtnetlink.c | 13 ++++++++++-- 4 files changed, 65 insertions(+), 25 deletions(-) diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index e1d6ef130611..d04424cfffba 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -826,6 +826,11 @@ struct xfrmdev_ops { }; #endif +struct dev_ifalias { + struct rcu_head rcuhead; + char ifalias[]; +}; + /* * This structure defines the management hooks for network devices. * The following hooks can be defined; unless noted otherwise, they are @@ -1632,7 +1637,7 @@ enum netdev_priv_flags { struct net_device { char name[IFNAMSIZ]; struct hlist_node name_hlist; - char *ifalias; + struct dev_ifalias __rcu *ifalias; /* * I/O specific fields * FIXME: Merge these and struct ifmap into one @@ -3275,6 +3280,7 @@ void __dev_notify_flags(struct net_device *, unsigned int old_flags, unsigned int gchanges); int dev_change_name(struct net_device *, const char *); int dev_set_alias(struct net_device *, const char *, size_t); +int dev_get_alias(const struct net_device *, char *, size_t); int dev_change_net_namespace(struct net_device *, struct net *, const char *); int __dev_set_mtu(struct net_device *, int); int dev_set_mtu(struct net_device *, int); diff --git a/net/core/dev.c b/net/core/dev.c index e350c768d4b5..1770097cfd86 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -188,6 +188,8 @@ static struct napi_struct *napi_by_id(unsigned int napi_id); DEFINE_RWLOCK(dev_base_lock); EXPORT_SYMBOL(dev_base_lock); +static DEFINE_MUTEX(ifalias_mutex); + /* protects napi_hash addition/deletion and napi_gen_id */ static DEFINE_SPINLOCK(napi_hash_lock); @@ -1265,29 +1267,53 @@ rollback: */ int dev_set_alias(struct net_device *dev, const char *alias, size_t len) { - char *new_ifalias; - - ASSERT_RTNL(); + struct dev_ifalias *new_alias = NULL; if (len >= IFALIASZ) return -EINVAL; - if (!len) { - kfree(dev->ifalias); - dev->ifalias = NULL; - return 0; + if (len) { + new_alias = kmalloc(sizeof(*new_alias) + len + 1, GFP_KERNEL); + if (!new_alias) + return -ENOMEM; + + memcpy(new_alias->ifalias, alias, len); + new_alias->ifalias[len] = 0; } - new_ifalias = krealloc(dev->ifalias, len + 1, GFP_KERNEL); - if (!new_ifalias) - return -ENOMEM; - dev->ifalias = new_ifalias; - memcpy(dev->ifalias, alias, len); - dev->ifalias[len] = 0; + mutex_lock(&ifalias_mutex); + rcu_swap_protected(dev->ifalias, new_alias, + mutex_is_locked(&ifalias_mutex)); + mutex_unlock(&ifalias_mutex); + + if (new_alias) + kfree_rcu(new_alias, rcuhead); return len; } +/** + * dev_get_alias - get ifalias of a device + * @dev: device + * @alias: buffer to store name of ifalias + * @len: size of buffer + * + * get ifalias for a device. Caller must make sure dev cannot go + * away, e.g. rcu read lock or own a reference count to device. + */ +int dev_get_alias(const struct net_device *dev, char *name, size_t len) +{ + const struct dev_ifalias *alias; + int ret = 0; + + rcu_read_lock(); + alias = rcu_dereference(dev->ifalias); + if (alias) + ret = snprintf(name, len, "%s", alias->ifalias); + rcu_read_unlock(); + + return ret; +} /** * netdev_features_change - device changes features diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c index 927a6dcbad96..51d5836d8fb9 100644 --- a/net/core/net-sysfs.c +++ b/net/core/net-sysfs.c @@ -391,10 +391,7 @@ static ssize_t ifalias_store(struct device *dev, struct device_attribute *attr, if (len > 0 && buf[len - 1] == '\n') --count; - if (!rtnl_trylock()) - return restart_syscall(); ret = dev_set_alias(netdev, buf, count); - rtnl_unlock(); return ret < 0 ? ret : len; } @@ -403,13 +400,12 @@ static ssize_t ifalias_show(struct device *dev, struct device_attribute *attr, char *buf) { const struct net_device *netdev = to_net_dev(dev); + char tmp[IFALIASZ]; ssize_t ret = 0; - if (!rtnl_trylock()) - return restart_syscall(); - if (netdev->ifalias) - ret = sprintf(buf, "%s\n", netdev->ifalias); - rtnl_unlock(); + ret = dev_get_alias(netdev, tmp, sizeof(tmp)); + if (ret > 0) + ret = sprintf(buf, "%s\n", tmp); return ret; } static DEVICE_ATTR_RW(ifalias); @@ -1488,7 +1484,10 @@ static void netdev_release(struct device *d) BUG_ON(dev->reg_state != NETREG_RELEASED); - kfree(dev->ifalias); + /* no need to wait for rcu grace period: + * device is dead and about to be freed. + */ + kfree(rcu_access_pointer(dev->ifalias)); netdev_freemem(dev); } diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index e6955da0d58d..3961f87cdc76 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -1366,6 +1366,16 @@ static int nla_put_iflink(struct sk_buff *skb, const struct net_device *dev) return nla_put_u32(skb, IFLA_LINK, ifindex); } +static noinline_for_stack int nla_put_ifalias(struct sk_buff *skb, + struct net_device *dev) +{ + char buf[IFALIASZ]; + int ret; + + ret = dev_get_alias(dev, buf, sizeof(buf)); + return ret > 0 ? nla_put_string(skb, IFLA_IFALIAS, buf) : 0; +} + static int rtnl_fill_link_netnsid(struct sk_buff *skb, const struct net_device *dev) { @@ -1425,8 +1435,7 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev, nla_put_u8(skb, IFLA_CARRIER, netif_carrier_ok(dev)) || (dev->qdisc && nla_put_string(skb, IFLA_QDISC, dev->qdisc->ops->id)) || - (dev->ifalias && - nla_put_string(skb, IFLA_IFALIAS, dev->ifalias)) || + nla_put_ifalias(skb, dev) || nla_put_u32(skb, IFLA_CARRIER_CHANGES, atomic_read(&dev->carrier_changes)) || nla_put_u8(skb, IFLA_PROTO_DOWN, dev->proto_down)) -- cgit v1.2.3 From 1ae2eaaa229bc350b6f38fbf4ab9c873532aecfb Mon Sep 17 00:00:00 2001 From: Marcelo Ricardo Leitner Date: Tue, 3 Oct 2017 19:20:08 -0300 Subject: sctp: silence warns on sctp_stream_init allocations As SCTP supports up to 65535 streams, that can lead to very large allocations in sctp_stream_init(). As Xin Long noticed, systems with small amounts of memory are more prone to not have enough memory and dump warnings on dmesg initiated by user actions. Thus, silence them. Also, if the reallocation of stream->out is not necessary, skip it and keep the memory we already have. Reported-by: Xin Long Tested-by: Xin Long Signed-off-by: Marcelo Ricardo Leitner Signed-off-by: David S. Miller --- net/sctp/stream.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/net/sctp/stream.c b/net/sctp/stream.c index 63ea15503714..1afa95558083 100644 --- a/net/sctp/stream.c +++ b/net/sctp/stream.c @@ -40,9 +40,14 @@ int sctp_stream_init(struct sctp_stream *stream, __u16 outcnt, __u16 incnt, { int i; + gfp |= __GFP_NOWARN; + /* Initial stream->out size may be very big, so free it and alloc - * a new one with new outcnt to save memory. + * a new one with new outcnt to save memory if needed. */ + if (outcnt == stream->outcnt) + goto in; + kfree(stream->out); stream->out = kcalloc(outcnt, sizeof(*stream->out), gfp); @@ -53,6 +58,7 @@ int sctp_stream_init(struct sctp_stream *stream, __u16 outcnt, __u16 incnt, for (i = 0; i < stream->outcnt; i++) stream->out[i].state = SCTP_STREAM_OPEN; +in: if (!incnt) return 0; -- cgit v1.2.3 From e090abd0d81c7bdbf0c0ba2224624be2b15ded0d Mon Sep 17 00:00:00 2001 From: Marcelo Ricardo Leitner Date: Tue, 3 Oct 2017 19:20:09 -0300 Subject: sctp: factor out stream->out allocation There is 1 place allocating it and 2 other reallocating. Move such procedures to a common function. Tested-by: Xin Long Signed-off-by: Marcelo Ricardo Leitner Signed-off-by: David S. Miller --- net/sctp/stream.c | 52 ++++++++++++++++++++++++++++++++-------------------- 1 file changed, 32 insertions(+), 20 deletions(-) diff --git a/net/sctp/stream.c b/net/sctp/stream.c index 1afa95558083..6d0e997d301f 100644 --- a/net/sctp/stream.c +++ b/net/sctp/stream.c @@ -35,6 +35,30 @@ #include #include +static int sctp_stream_alloc_out(struct sctp_stream *stream, __u16 outcnt, + gfp_t gfp) +{ + struct sctp_stream_out *out; + + out = kmalloc_array(outcnt, sizeof(*out), gfp); + if (!out) + return -ENOMEM; + + if (stream->out) { + memcpy(out, stream->out, min(outcnt, stream->outcnt) * + sizeof(*out)); + kfree(stream->out); + } + + if (outcnt > stream->outcnt) + memset(out + stream->outcnt, 0, + (outcnt - stream->outcnt) * sizeof(*out)); + + stream->out = out; + + return 0; +} + int sctp_stream_init(struct sctp_stream *stream, __u16 outcnt, __u16 incnt, gfp_t gfp) { @@ -48,11 +72,9 @@ int sctp_stream_init(struct sctp_stream *stream, __u16 outcnt, __u16 incnt, if (outcnt == stream->outcnt) goto in; - kfree(stream->out); - - stream->out = kcalloc(outcnt, sizeof(*stream->out), gfp); - if (!stream->out) - return -ENOMEM; + i = sctp_stream_alloc_out(stream, outcnt, gfp); + if (i) + return i; stream->outcnt = outcnt; for (i = 0; i < stream->outcnt; i++) @@ -276,15 +298,9 @@ int sctp_send_add_streams(struct sctp_association *asoc, } if (out) { - struct sctp_stream_out *streamout; - - streamout = krealloc(stream->out, outcnt * sizeof(*streamout), - GFP_KERNEL); - if (!streamout) + retval = sctp_stream_alloc_out(stream, outcnt, GFP_KERNEL); + if (retval) goto out; - - memset(streamout + stream->outcnt, 0, out * sizeof(*streamout)); - stream->out = streamout; } chunk = sctp_make_strreset_addstrm(asoc, out, in); @@ -682,10 +698,10 @@ struct sctp_chunk *sctp_process_strreset_addstrm_in( struct sctp_strreset_addstrm *addstrm = param.v; struct sctp_stream *stream = &asoc->stream; __u32 result = SCTP_STRRESET_DENIED; - struct sctp_stream_out *streamout; struct sctp_chunk *chunk = NULL; __u32 request_seq, outcnt; __u16 out, i; + int ret; request_seq = ntohl(addstrm->request_seq); if (TSN_lt(asoc->strreset_inseq, request_seq) || @@ -714,14 +730,10 @@ struct sctp_chunk *sctp_process_strreset_addstrm_in( if (!out || outcnt > SCTP_MAX_STREAM) goto out; - streamout = krealloc(stream->out, outcnt * sizeof(*streamout), - GFP_ATOMIC); - if (!streamout) + ret = sctp_stream_alloc_out(stream, outcnt, GFP_ATOMIC); + if (ret) goto out; - memset(streamout + stream->outcnt, 0, out * sizeof(*streamout)); - stream->out = streamout; - chunk = sctp_make_strreset_addstrm(asoc, out, 0); if (!chunk) goto out; -- cgit v1.2.3 From 1fdb8d8fefe2e7320ea15a65051758a4c4332f05 Mon Sep 17 00:00:00 2001 From: Marcelo Ricardo Leitner Date: Tue, 3 Oct 2017 19:20:10 -0300 Subject: sctp: factor out stream->in allocation There is 1 place allocating it and another reallocating. Move such procedures to a common function. v2: updated changelog Tested-by: Xin Long Signed-off-by: Marcelo Ricardo Leitner Signed-off-by: David S. Miller --- net/sctp/stream.c | 36 ++++++++++++++++++++++++++++-------- 1 file changed, 28 insertions(+), 8 deletions(-) diff --git a/net/sctp/stream.c b/net/sctp/stream.c index 6d0e997d301f..952437d656cc 100644 --- a/net/sctp/stream.c +++ b/net/sctp/stream.c @@ -59,6 +59,31 @@ static int sctp_stream_alloc_out(struct sctp_stream *stream, __u16 outcnt, return 0; } +static int sctp_stream_alloc_in(struct sctp_stream *stream, __u16 incnt, + gfp_t gfp) +{ + struct sctp_stream_in *in; + + in = kmalloc_array(incnt, sizeof(*stream->in), gfp); + + if (!in) + return -ENOMEM; + + if (stream->in) { + memcpy(in, stream->in, min(incnt, stream->incnt) * + sizeof(*in)); + kfree(stream->in); + } + + if (incnt > stream->incnt) + memset(in + stream->incnt, 0, + (incnt - stream->incnt) * sizeof(*in)); + + stream->in = in; + + return 0; +} + int sctp_stream_init(struct sctp_stream *stream, __u16 outcnt, __u16 incnt, gfp_t gfp) { @@ -84,8 +109,8 @@ in: if (!incnt) return 0; - stream->in = kcalloc(incnt, sizeof(*stream->in), gfp); - if (!stream->in) { + i = sctp_stream_alloc_in(stream, incnt, gfp); + if (i) { kfree(stream->out); stream->out = NULL; return -ENOMEM; @@ -623,7 +648,6 @@ struct sctp_chunk *sctp_process_strreset_addstrm_out( struct sctp_strreset_addstrm *addstrm = param.v; struct sctp_stream *stream = &asoc->stream; __u32 result = SCTP_STRRESET_DENIED; - struct sctp_stream_in *streamin; __u32 request_seq, incnt; __u16 in, i; @@ -670,13 +694,9 @@ struct sctp_chunk *sctp_process_strreset_addstrm_out( if (!in || incnt > SCTP_MAX_STREAM) goto out; - streamin = krealloc(stream->in, incnt * sizeof(*streamin), - GFP_ATOMIC); - if (!streamin) + if (sctp_stream_alloc_in(stream, incnt, GFP_ATOMIC)) goto out; - memset(streamin + stream->incnt, 0, in * sizeof(*streamin)); - stream->in = streamin; stream->incnt = incnt; result = SCTP_STRRESET_PERFORMED; -- cgit v1.2.3 From f952be79cebd49d04154781d99408867a069d375 Mon Sep 17 00:00:00 2001 From: Marcelo Ricardo Leitner Date: Tue, 3 Oct 2017 19:20:11 -0300 Subject: sctp: introduce struct sctp_stream_out_ext With the stream schedulers, sctp_stream_out will become too big to be allocated by kmalloc and as we need to allocate with BH disabled, we cannot use __vmalloc in sctp_stream_init(). This patch moves out the stats from sctp_stream_out to sctp_stream_out_ext, which will be allocated only when the application tries to sendmsg something on it. Just the introduction of sctp_stream_out_ext would already fix the issue described above by splitting the allocation in two. Moving the stats to it also reduces the pressure on the allocator as we will ask for less memory atomically when creating the socket and we will use GFP_KERNEL later. Then, for stream schedulers, we will just use sctp_stream_out_ext. Tested-by: Xin Long Signed-off-by: Marcelo Ricardo Leitner Signed-off-by: David S. Miller --- include/net/sctp/structs.h | 10 ++++++++-- net/sctp/chunk.c | 6 +++--- net/sctp/outqueue.c | 4 ++-- net/sctp/socket.c | 27 +++++++++++++++++++++------ net/sctp/stream.c | 16 ++++++++++++++++ 5 files changed, 50 insertions(+), 13 deletions(-) diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h index 0477945de1a3..9b2b30b3ba4d 100644 --- a/include/net/sctp/structs.h +++ b/include/net/sctp/structs.h @@ -84,6 +84,7 @@ struct sctp_ulpq; struct sctp_ep_common; struct crypto_shash; struct sctp_stream; +struct sctp_stream_out; #include @@ -380,6 +381,7 @@ struct sctp_sender_hb_info { int sctp_stream_init(struct sctp_stream *stream, __u16 outcnt, __u16 incnt, gfp_t gfp); +int sctp_stream_init_ext(struct sctp_stream *stream, __u16 sid); void sctp_stream_free(struct sctp_stream *stream); void sctp_stream_clear(struct sctp_stream *stream); void sctp_stream_update(struct sctp_stream *stream, struct sctp_stream *new); @@ -1315,11 +1317,15 @@ struct sctp_inithdr_host { __u32 initial_tsn; }; +struct sctp_stream_out_ext { + __u64 abandoned_unsent[SCTP_PR_INDEX(MAX) + 1]; + __u64 abandoned_sent[SCTP_PR_INDEX(MAX) + 1]; +}; + struct sctp_stream_out { __u16 ssn; __u8 state; - __u64 abandoned_unsent[SCTP_PR_INDEX(MAX) + 1]; - __u64 abandoned_sent[SCTP_PR_INDEX(MAX) + 1]; + struct sctp_stream_out_ext *ext; }; struct sctp_stream_in { diff --git a/net/sctp/chunk.c b/net/sctp/chunk.c index 3afac275ee82..7b261afc47b9 100644 --- a/net/sctp/chunk.c +++ b/net/sctp/chunk.c @@ -311,10 +311,10 @@ int sctp_chunk_abandoned(struct sctp_chunk *chunk) if (chunk->sent_count) { chunk->asoc->abandoned_sent[SCTP_PR_INDEX(TTL)]++; - streamout->abandoned_sent[SCTP_PR_INDEX(TTL)]++; + streamout->ext->abandoned_sent[SCTP_PR_INDEX(TTL)]++; } else { chunk->asoc->abandoned_unsent[SCTP_PR_INDEX(TTL)]++; - streamout->abandoned_unsent[SCTP_PR_INDEX(TTL)]++; + streamout->ext->abandoned_unsent[SCTP_PR_INDEX(TTL)]++; } return 1; } else if (SCTP_PR_RTX_ENABLED(chunk->sinfo.sinfo_flags) && @@ -323,7 +323,7 @@ int sctp_chunk_abandoned(struct sctp_chunk *chunk) &chunk->asoc->stream.out[chunk->sinfo.sinfo_stream]; chunk->asoc->abandoned_sent[SCTP_PR_INDEX(RTX)]++; - streamout->abandoned_sent[SCTP_PR_INDEX(RTX)]++; + streamout->ext->abandoned_sent[SCTP_PR_INDEX(RTX)]++; return 1; } else if (!SCTP_PR_POLICY(chunk->sinfo.sinfo_flags) && chunk->msg->expires_at && diff --git a/net/sctp/outqueue.c b/net/sctp/outqueue.c index 2966ff400755..746b07b7937d 100644 --- a/net/sctp/outqueue.c +++ b/net/sctp/outqueue.c @@ -366,7 +366,7 @@ static int sctp_prsctp_prune_sent(struct sctp_association *asoc, streamout = &asoc->stream.out[chk->sinfo.sinfo_stream]; asoc->sent_cnt_removable--; asoc->abandoned_sent[SCTP_PR_INDEX(PRIO)]++; - streamout->abandoned_sent[SCTP_PR_INDEX(PRIO)]++; + streamout->ext->abandoned_sent[SCTP_PR_INDEX(PRIO)]++; if (!chk->tsn_gap_acked) { if (chk->transport) @@ -404,7 +404,7 @@ static int sctp_prsctp_prune_unsent(struct sctp_association *asoc, struct sctp_stream_out *streamout = &asoc->stream.out[chk->sinfo.sinfo_stream]; - streamout->abandoned_unsent[SCTP_PR_INDEX(PRIO)]++; + streamout->ext->abandoned_unsent[SCTP_PR_INDEX(PRIO)]++; } msg_len -= SCTP_DATA_SNDSIZE(chk) + diff --git a/net/sctp/socket.c b/net/sctp/socket.c index d4730ada7f32..d207734326b0 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -1927,6 +1927,13 @@ static int sctp_sendmsg(struct sock *sk, struct msghdr *msg, size_t msg_len) goto out_free; } + /* Allocate sctp_stream_out_ext if not already done */ + if (unlikely(!asoc->stream.out[sinfo->sinfo_stream].ext)) { + err = sctp_stream_init_ext(&asoc->stream, sinfo->sinfo_stream); + if (err) + goto out_free; + } + if (sctp_wspace(asoc) < msg_len) sctp_prsctp_prune(asoc, sinfo, msg_len - sctp_wspace(asoc)); @@ -6645,7 +6652,7 @@ static int sctp_getsockopt_pr_streamstatus(struct sock *sk, int len, char __user *optval, int __user *optlen) { - struct sctp_stream_out *streamout; + struct sctp_stream_out_ext *streamoute; struct sctp_association *asoc; struct sctp_prstatus params; int retval = -EINVAL; @@ -6668,21 +6675,29 @@ static int sctp_getsockopt_pr_streamstatus(struct sock *sk, int len, if (!asoc || params.sprstat_sid >= asoc->stream.outcnt) goto out; - streamout = &asoc->stream.out[params.sprstat_sid]; + streamoute = asoc->stream.out[params.sprstat_sid].ext; + if (!streamoute) { + /* Not allocated yet, means all stats are 0 */ + params.sprstat_abandoned_unsent = 0; + params.sprstat_abandoned_sent = 0; + retval = 0; + goto out; + } + if (policy == SCTP_PR_SCTP_NONE) { params.sprstat_abandoned_unsent = 0; params.sprstat_abandoned_sent = 0; for (policy = 0; policy <= SCTP_PR_INDEX(MAX); policy++) { params.sprstat_abandoned_unsent += - streamout->abandoned_unsent[policy]; + streamoute->abandoned_unsent[policy]; params.sprstat_abandoned_sent += - streamout->abandoned_sent[policy]; + streamoute->abandoned_sent[policy]; } } else { params.sprstat_abandoned_unsent = - streamout->abandoned_unsent[__SCTP_PR_INDEX(policy)]; + streamoute->abandoned_unsent[__SCTP_PR_INDEX(policy)]; params.sprstat_abandoned_sent = - streamout->abandoned_sent[__SCTP_PR_INDEX(policy)]; + streamoute->abandoned_sent[__SCTP_PR_INDEX(policy)]; } if (put_user(len, optlen) || copy_to_user(optval, ¶ms, len)) { diff --git a/net/sctp/stream.c b/net/sctp/stream.c index 952437d656cc..055ca25bbc91 100644 --- a/net/sctp/stream.c +++ b/net/sctp/stream.c @@ -121,8 +121,24 @@ in: return 0; } +int sctp_stream_init_ext(struct sctp_stream *stream, __u16 sid) +{ + struct sctp_stream_out_ext *soute; + + soute = kzalloc(sizeof(*soute), GFP_KERNEL); + if (!soute) + return -ENOMEM; + stream->out[sid].ext = soute; + + return 0; +} + void sctp_stream_free(struct sctp_stream *stream) { + int i; + + for (i = 0; i < stream->outcnt; i++) + kfree(stream->out[i].ext); kfree(stream->out); kfree(stream->in); } -- cgit v1.2.3 From 2fc019f790312e703efa1a44204c586112a430dc Mon Sep 17 00:00:00 2001 From: Marcelo Ricardo Leitner Date: Tue, 3 Oct 2017 19:20:12 -0300 Subject: sctp: introduce sctp_chunk_stream_no Add a helper to fetch the stream number from a given chunk. Tested-by: Xin Long Signed-off-by: Marcelo Ricardo Leitner Signed-off-by: David S. Miller --- include/net/sctp/structs.h | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h index 9b2b30b3ba4d..c48f7999fe9b 100644 --- a/include/net/sctp/structs.h +++ b/include/net/sctp/structs.h @@ -642,6 +642,11 @@ void sctp_init_addrs(struct sctp_chunk *, union sctp_addr *, union sctp_addr *); const union sctp_addr *sctp_source(const struct sctp_chunk *chunk); +static inline __u16 sctp_chunk_stream_no(struct sctp_chunk *ch) +{ + return ntohs(ch->subh.data_hdr->stream); +} + enum { SCTP_ADDR_NEW, /* new address added to assoc/ep */ SCTP_ADDR_SRC, /* address can be used as source */ -- cgit v1.2.3 From 5bbbbe32a43199c2b9ea5ea66fab6241c64beb51 Mon Sep 17 00:00:00 2001 From: Marcelo Ricardo Leitner Date: Tue, 3 Oct 2017 19:20:13 -0300 Subject: sctp: introduce stream scheduler foundations This patch introduces the hooks necessary to do stream scheduling, as per RFC Draft ndata. It also introduces the first scheduler, which is what we do today but now factored out: first come first served (FCFS). With stream scheduling now we have to track which chunk was enqueued on which stream and be able to select another other than the in front of the main outqueue. So we introduce a list on sctp_stream_out_ext structure for this purpose. We reuse sctp_chunk->transmitted_list space for the list above, as the chunk cannot belong to the two lists at the same time. By using the union in there, we can have distinct names for these moments. sctp_sched_ops are the operations expected to be implemented by each scheduler. The dequeueing is a bit particular to this implementation but it is to match how we dequeue packets today. We first dequeue and then check if it fits the packet and if not, we requeue it at head. Thus why we don't have a peek operation but have dequeue_done instead, which is called once the chunk can be safely considered as transmitted. The check removed from sctp_outq_flush is now performed by sctp_stream_outq_migrate, which is only called during assoc setup. (sctp_sendmsg() also checks for it) The only operation that is foreseen but not yet added here is a way to signalize that a new packet is starting or that the packet is done, for round robin scheduler per packet, but is intentionally left to the patch that actually implements it. Support for I-DATA chunks, also described in this RFC, with user message interleaving is straightforward as it just requires the schedulers to probe for the feature and ignore datamsg boundaries when dequeueing. See-also: https://tools.ietf.org/html/draft-ietf-tsvwg-sctp-ndata-13 Tested-by: Xin Long Signed-off-by: Marcelo Ricardo Leitner Signed-off-by: David S. Miller --- include/net/sctp/stream_sched.h | 72 +++++++++++ include/net/sctp/structs.h | 15 ++- include/uapi/linux/sctp.h | 6 + net/sctp/Makefile | 2 +- net/sctp/outqueue.c | 59 +++++---- net/sctp/sm_sideeffect.c | 3 + net/sctp/stream.c | 88 +++++++++++-- net/sctp/stream_sched.c | 270 ++++++++++++++++++++++++++++++++++++++++ 8 files changed, 477 insertions(+), 38 deletions(-) create mode 100644 include/net/sctp/stream_sched.h create mode 100644 net/sctp/stream_sched.c diff --git a/include/net/sctp/stream_sched.h b/include/net/sctp/stream_sched.h new file mode 100644 index 000000000000..c676550a4c7d --- /dev/null +++ b/include/net/sctp/stream_sched.h @@ -0,0 +1,72 @@ +/* SCTP kernel implementation + * (C) Copyright Red Hat Inc. 2017 + * + * These are definitions used by the stream schedulers, defined in RFC + * draft ndata (https://tools.ietf.org/html/draft-ietf-tsvwg-sctp-ndata-11) + * + * This SCTP implementation is free software; + * you can redistribute it and/or modify it under the terms of + * the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * This SCTP implementation is distributed in the hope that it + * will be useful, but WITHOUT ANY WARRANTY; without even the implied + * ************************ + * warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * See the GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with GNU CC; see the file COPYING. If not, see + * . + * + * Please send any bug reports or fixes you make to the + * email addresses: + * lksctp developers + * + * Written or modified by: + * Marcelo Ricardo Leitner + */ + +#ifndef __sctp_stream_sched_h__ +#define __sctp_stream_sched_h__ + +struct sctp_sched_ops { + /* Property handling for a given stream */ + int (*set)(struct sctp_stream *stream, __u16 sid, __u16 value, + gfp_t gfp); + int (*get)(struct sctp_stream *stream, __u16 sid, __u16 *value); + + /* Init the specific scheduler */ + int (*init)(struct sctp_stream *stream); + /* Init a stream */ + int (*init_sid)(struct sctp_stream *stream, __u16 sid, gfp_t gfp); + /* Frees the entire thing */ + void (*free)(struct sctp_stream *stream); + + /* Enqueue a chunk */ + void (*enqueue)(struct sctp_outq *q, struct sctp_datamsg *msg); + /* Dequeue a chunk */ + struct sctp_chunk *(*dequeue)(struct sctp_outq *q); + /* Called only if the chunk fit the packet */ + void (*dequeue_done)(struct sctp_outq *q, struct sctp_chunk *chunk); + /* Sched all chunks already enqueued */ + void (*sched_all)(struct sctp_stream *steam); + /* Unched all chunks already enqueued */ + void (*unsched_all)(struct sctp_stream *steam); +}; + +int sctp_sched_set_sched(struct sctp_association *asoc, + enum sctp_sched_type sched); +int sctp_sched_get_sched(struct sctp_association *asoc); +int sctp_sched_set_value(struct sctp_association *asoc, __u16 sid, + __u16 value, gfp_t gfp); +int sctp_sched_get_value(struct sctp_association *asoc, __u16 sid, + __u16 *value); +void sctp_sched_dequeue_done(struct sctp_outq *q, struct sctp_chunk *ch); + +void sctp_sched_dequeue_common(struct sctp_outq *q, struct sctp_chunk *ch); +int sctp_sched_init_sid(struct sctp_stream *stream, __u16 sid, gfp_t gfp); +struct sctp_sched_ops *sctp_sched_ops_from_stream(struct sctp_stream *stream); + +#endif /* __sctp_stream_sched_h__ */ diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h index c48f7999fe9b..3c22a30fd71b 100644 --- a/include/net/sctp/structs.h +++ b/include/net/sctp/structs.h @@ -84,7 +84,6 @@ struct sctp_ulpq; struct sctp_ep_common; struct crypto_shash; struct sctp_stream; -struct sctp_stream_out; #include @@ -531,8 +530,12 @@ struct sctp_chunk { /* How many times this chunk have been sent, for prsctp RTX policy */ int sent_count; - /* This is our link to the per-transport transmitted list. */ - struct list_head transmitted_list; + union { + /* This is our link to the per-transport transmitted list. */ + struct list_head transmitted_list; + /* List in specific stream outq */ + struct list_head stream_list; + }; /* This field is used by chunks that hold fragmented data. * For the first fragment this is the list that holds the rest of @@ -1019,6 +1022,9 @@ struct sctp_outq { /* Data pending that has never been transmitted. */ struct list_head out_chunk_list; + /* Stream scheduler being used */ + struct sctp_sched_ops *sched; + unsigned int out_qlen; /* Total length of queued data chunks. */ /* Error of send failed, may used in SCTP_SEND_FAILED event. */ @@ -1325,6 +1331,7 @@ struct sctp_inithdr_host { struct sctp_stream_out_ext { __u64 abandoned_unsent[SCTP_PR_INDEX(MAX) + 1]; __u64 abandoned_sent[SCTP_PR_INDEX(MAX) + 1]; + struct list_head outq; /* chunks enqueued by this stream */ }; struct sctp_stream_out { @@ -1342,6 +1349,8 @@ struct sctp_stream { struct sctp_stream_in *in; __u16 outcnt; __u16 incnt; + /* Current stream being sent, if any */ + struct sctp_stream_out *out_curr; }; #define SCTP_STREAM_CLOSED 0x00 diff --git a/include/uapi/linux/sctp.h b/include/uapi/linux/sctp.h index 6217ff8500a1..4487e7625ddb 100644 --- a/include/uapi/linux/sctp.h +++ b/include/uapi/linux/sctp.h @@ -1088,4 +1088,10 @@ struct sctp_add_streams { uint16_t sas_outstrms; }; +/* SCTP Stream schedulers */ +enum sctp_sched_type { + SCTP_SS_FCFS, + SCTP_SS_MAX = SCTP_SS_FCFS +}; + #endif /* _UAPI_SCTP_H */ diff --git a/net/sctp/Makefile b/net/sctp/Makefile index 70f1b570bab9..0f6e6d1d69fd 100644 --- a/net/sctp/Makefile +++ b/net/sctp/Makefile @@ -12,7 +12,7 @@ sctp-y := sm_statetable.o sm_statefuns.o sm_sideeffect.o \ inqueue.o outqueue.o ulpqueue.o \ tsnmap.o bind_addr.o socket.o primitive.o \ output.o input.o debug.o stream.o auth.o \ - offload.o + offload.o stream_sched.o sctp_probe-y := probe.o diff --git a/net/sctp/outqueue.c b/net/sctp/outqueue.c index 746b07b7937d..4db012aa25f7 100644 --- a/net/sctp/outqueue.c +++ b/net/sctp/outqueue.c @@ -50,6 +50,7 @@ #include #include +#include /* Declare internal functions here. */ static int sctp_acked(struct sctp_sackhdr *sack, __u32 tsn); @@ -72,32 +73,38 @@ static void sctp_outq_flush(struct sctp_outq *q, int rtx_timeout, gfp_t gfp); /* Add data to the front of the queue. */ static inline void sctp_outq_head_data(struct sctp_outq *q, - struct sctp_chunk *ch) + struct sctp_chunk *ch) { + struct sctp_stream_out_ext *oute; + __u16 stream; + list_add(&ch->list, &q->out_chunk_list); q->out_qlen += ch->skb->len; + + stream = sctp_chunk_stream_no(ch); + oute = q->asoc->stream.out[stream].ext; + list_add(&ch->stream_list, &oute->outq); } /* Take data from the front of the queue. */ static inline struct sctp_chunk *sctp_outq_dequeue_data(struct sctp_outq *q) { - struct sctp_chunk *ch = NULL; - - if (!list_empty(&q->out_chunk_list)) { - struct list_head *entry = q->out_chunk_list.next; - - ch = list_entry(entry, struct sctp_chunk, list); - list_del_init(entry); - q->out_qlen -= ch->skb->len; - } - return ch; + return q->sched->dequeue(q); } + /* Add data chunk to the end of the queue. */ static inline void sctp_outq_tail_data(struct sctp_outq *q, struct sctp_chunk *ch) { + struct sctp_stream_out_ext *oute; + __u16 stream; + list_add_tail(&ch->list, &q->out_chunk_list); q->out_qlen += ch->skb->len; + + stream = sctp_chunk_stream_no(ch); + oute = q->asoc->stream.out[stream].ext; + list_add_tail(&ch->stream_list, &oute->outq); } /* @@ -207,6 +214,7 @@ void sctp_outq_init(struct sctp_association *asoc, struct sctp_outq *q) INIT_LIST_HEAD(&q->retransmit); INIT_LIST_HEAD(&q->sacked); INIT_LIST_HEAD(&q->abandoned); + sctp_sched_set_sched(asoc, SCTP_SS_FCFS); } /* Free the outqueue structure and any related pending chunks. @@ -258,6 +266,7 @@ static void __sctp_outq_teardown(struct sctp_outq *q) /* Throw away any leftover data chunks. */ while ((chunk = sctp_outq_dequeue_data(q)) != NULL) { + sctp_sched_dequeue_done(q, chunk); /* Mark as send failure. */ sctp_chunk_fail(chunk, q->error); @@ -391,13 +400,14 @@ static int sctp_prsctp_prune_unsent(struct sctp_association *asoc, struct sctp_outq *q = &asoc->outqueue; struct sctp_chunk *chk, *temp; + q->sched->unsched_all(&asoc->stream); + list_for_each_entry_safe(chk, temp, &q->out_chunk_list, list) { if (!SCTP_PR_PRIO_ENABLED(chk->sinfo.sinfo_flags) || chk->sinfo.sinfo_timetolive <= sinfo->sinfo_timetolive) continue; - list_del_init(&chk->list); - q->out_qlen -= chk->skb->len; + sctp_sched_dequeue_common(q, chk); asoc->sent_cnt_removable--; asoc->abandoned_unsent[SCTP_PR_INDEX(PRIO)]++; if (chk->sinfo.sinfo_stream < asoc->stream.outcnt) { @@ -415,6 +425,8 @@ static int sctp_prsctp_prune_unsent(struct sctp_association *asoc, break; } + q->sched->sched_all(&asoc->stream); + return msg_len; } @@ -1033,22 +1045,9 @@ static void sctp_outq_flush(struct sctp_outq *q, int rtx_timeout, gfp_t gfp) while ((chunk = sctp_outq_dequeue_data(q)) != NULL) { __u32 sid = ntohs(chunk->subh.data_hdr->stream); - /* RFC 2960 6.5 Every DATA chunk MUST carry a valid - * stream identifier. - */ - if (chunk->sinfo.sinfo_stream >= asoc->stream.outcnt) { - - /* Mark as failed send. */ - sctp_chunk_fail(chunk, SCTP_ERROR_INV_STRM); - if (asoc->peer.prsctp_capable && - SCTP_PR_PRIO_ENABLED(chunk->sinfo.sinfo_flags)) - asoc->sent_cnt_removable--; - sctp_chunk_free(chunk); - continue; - } - /* Has this chunk expired? */ if (sctp_chunk_abandoned(chunk)) { + sctp_sched_dequeue_done(q, chunk); sctp_chunk_fail(chunk, 0); sctp_chunk_free(chunk); continue; @@ -1070,6 +1069,7 @@ static void sctp_outq_flush(struct sctp_outq *q, int rtx_timeout, gfp_t gfp) new_transport = asoc->peer.active_path; if (new_transport->state == SCTP_UNCONFIRMED) { WARN_ONCE(1, "Attempt to send packet on unconfirmed path."); + sctp_sched_dequeue_done(q, chunk); sctp_chunk_fail(chunk, 0); sctp_chunk_free(chunk); continue; @@ -1133,6 +1133,11 @@ static void sctp_outq_flush(struct sctp_outq *q, int rtx_timeout, gfp_t gfp) else asoc->stats.oodchunks++; + /* Only now it's safe to consider this + * chunk as sent, sched-wise. + */ + sctp_sched_dequeue_done(q, chunk); + break; default: diff --git a/net/sctp/sm_sideeffect.c b/net/sctp/sm_sideeffect.c index e6a2974e020e..402bfbb888cd 100644 --- a/net/sctp/sm_sideeffect.c +++ b/net/sctp/sm_sideeffect.c @@ -50,6 +50,7 @@ #include #include #include +#include static int sctp_cmd_interpreter(enum sctp_event event_type, union sctp_subtype subtype, @@ -1089,6 +1090,8 @@ static void sctp_cmd_send_msg(struct sctp_association *asoc, list_for_each_entry(chunk, &msg->chunks, frag_list) sctp_outq_tail(&asoc->outqueue, chunk, gfp); + + asoc->outqueue.sched->enqueue(&asoc->outqueue, msg); } diff --git a/net/sctp/stream.c b/net/sctp/stream.c index 055ca25bbc91..5ea33a2c453b 100644 --- a/net/sctp/stream.c +++ b/net/sctp/stream.c @@ -32,8 +32,61 @@ * Xin Long */ +#include #include #include +#include + +/* Migrates chunks from stream queues to new stream queues if needed, + * but not across associations. Also, removes those chunks to streams + * higher than the new max. + */ +static void sctp_stream_outq_migrate(struct sctp_stream *stream, + struct sctp_stream *new, __u16 outcnt) +{ + struct sctp_association *asoc; + struct sctp_chunk *ch, *temp; + struct sctp_outq *outq; + int i; + + asoc = container_of(stream, struct sctp_association, stream); + outq = &asoc->outqueue; + + list_for_each_entry_safe(ch, temp, &outq->out_chunk_list, list) { + __u16 sid = sctp_chunk_stream_no(ch); + + if (sid < outcnt) + continue; + + sctp_sched_dequeue_common(outq, ch); + /* No need to call dequeue_done here because + * the chunks are not scheduled by now. + */ + + /* Mark as failed send. */ + sctp_chunk_fail(ch, SCTP_ERROR_INV_STRM); + if (asoc->peer.prsctp_capable && + SCTP_PR_PRIO_ENABLED(ch->sinfo.sinfo_flags)) + asoc->sent_cnt_removable--; + + sctp_chunk_free(ch); + } + + if (new) { + /* Here we actually move the old ext stuff into the new + * buffer, because we want to keep it. Then + * sctp_stream_update will swap ->out pointers. + */ + for (i = 0; i < outcnt; i++) { + kfree(new->out[i].ext); + new->out[i].ext = stream->out[i].ext; + stream->out[i].ext = NULL; + } + } + + for (i = outcnt; i < stream->outcnt; i++) + kfree(stream->out[i].ext); +} static int sctp_stream_alloc_out(struct sctp_stream *stream, __u16 outcnt, gfp_t gfp) @@ -87,7 +140,8 @@ static int sctp_stream_alloc_in(struct sctp_stream *stream, __u16 incnt, int sctp_stream_init(struct sctp_stream *stream, __u16 outcnt, __u16 incnt, gfp_t gfp) { - int i; + struct sctp_sched_ops *sched = sctp_sched_ops_from_stream(stream); + int i, ret = 0; gfp |= __GFP_NOWARN; @@ -97,6 +151,11 @@ int sctp_stream_init(struct sctp_stream *stream, __u16 outcnt, __u16 incnt, if (outcnt == stream->outcnt) goto in; + /* Filter out chunks queued on streams that won't exist anymore */ + sched->unsched_all(stream); + sctp_stream_outq_migrate(stream, NULL, outcnt); + sched->sched_all(stream); + i = sctp_stream_alloc_out(stream, outcnt, gfp); if (i) return i; @@ -105,20 +164,27 @@ int sctp_stream_init(struct sctp_stream *stream, __u16 outcnt, __u16 incnt, for (i = 0; i < stream->outcnt; i++) stream->out[i].state = SCTP_STREAM_OPEN; + sched->init(stream); + in: if (!incnt) - return 0; + goto out; i = sctp_stream_alloc_in(stream, incnt, gfp); if (i) { - kfree(stream->out); - stream->out = NULL; - return -ENOMEM; + ret = -ENOMEM; + goto free; } stream->incnt = incnt; + goto out; - return 0; +free: + sched->free(stream); + kfree(stream->out); + stream->out = NULL; +out: + return ret; } int sctp_stream_init_ext(struct sctp_stream *stream, __u16 sid) @@ -130,13 +196,15 @@ int sctp_stream_init_ext(struct sctp_stream *stream, __u16 sid) return -ENOMEM; stream->out[sid].ext = soute; - return 0; + return sctp_sched_init_sid(stream, sid, GFP_KERNEL); } void sctp_stream_free(struct sctp_stream *stream) { + struct sctp_sched_ops *sched = sctp_sched_ops_from_stream(stream); int i; + sched->free(stream); for (i = 0; i < stream->outcnt; i++) kfree(stream->out[i].ext); kfree(stream->out); @@ -156,6 +224,10 @@ void sctp_stream_clear(struct sctp_stream *stream) void sctp_stream_update(struct sctp_stream *stream, struct sctp_stream *new) { + struct sctp_sched_ops *sched = sctp_sched_ops_from_stream(stream); + + sched->unsched_all(stream); + sctp_stream_outq_migrate(stream, new, new->outcnt); sctp_stream_free(stream); stream->out = new->out; @@ -163,6 +235,8 @@ void sctp_stream_update(struct sctp_stream *stream, struct sctp_stream *new) stream->outcnt = new->outcnt; stream->incnt = new->incnt; + sched->sched_all(stream); + new->out = NULL; new->in = NULL; } diff --git a/net/sctp/stream_sched.c b/net/sctp/stream_sched.c new file mode 100644 index 000000000000..40a9a9de2b98 --- /dev/null +++ b/net/sctp/stream_sched.c @@ -0,0 +1,270 @@ +/* SCTP kernel implementation + * (C) Copyright Red Hat Inc. 2017 + * + * This file is part of the SCTP kernel implementation + * + * These functions manipulate sctp stream queue/scheduling. + * + * This SCTP implementation is free software; + * you can redistribute it and/or modify it under the terms of + * the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * This SCTP implementation is distributed in the hope that it + * will be useful, but WITHOUT ANY WARRANTY; without even the implied + * ************************ + * warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * See the GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with GNU CC; see the file COPYING. If not, see + * . + * + * Please send any bug reports or fixes you make to the + * email addresched(es): + * lksctp developers + * + * Written or modified by: + * Marcelo Ricardo Leitner + */ + +#include +#include +#include +#include + +/* First Come First Serve (a.k.a. FIFO) + * RFC DRAFT ndata Section 3.1 + */ +static int sctp_sched_fcfs_set(struct sctp_stream *stream, __u16 sid, + __u16 value, gfp_t gfp) +{ + return 0; +} + +static int sctp_sched_fcfs_get(struct sctp_stream *stream, __u16 sid, + __u16 *value) +{ + *value = 0; + return 0; +} + +static int sctp_sched_fcfs_init(struct sctp_stream *stream) +{ + return 0; +} + +static int sctp_sched_fcfs_init_sid(struct sctp_stream *stream, __u16 sid, + gfp_t gfp) +{ + return 0; +} + +static void sctp_sched_fcfs_free(struct sctp_stream *stream) +{ +} + +static void sctp_sched_fcfs_enqueue(struct sctp_outq *q, + struct sctp_datamsg *msg) +{ +} + +static struct sctp_chunk *sctp_sched_fcfs_dequeue(struct sctp_outq *q) +{ + struct sctp_stream *stream = &q->asoc->stream; + struct sctp_chunk *ch = NULL; + struct list_head *entry; + + if (list_empty(&q->out_chunk_list)) + goto out; + + if (stream->out_curr) { + ch = list_entry(stream->out_curr->ext->outq.next, + struct sctp_chunk, stream_list); + } else { + entry = q->out_chunk_list.next; + ch = list_entry(entry, struct sctp_chunk, list); + } + + sctp_sched_dequeue_common(q, ch); + +out: + return ch; +} + +static void sctp_sched_fcfs_dequeue_done(struct sctp_outq *q, + struct sctp_chunk *chunk) +{ +} + +static void sctp_sched_fcfs_sched_all(struct sctp_stream *stream) +{ +} + +static void sctp_sched_fcfs_unsched_all(struct sctp_stream *stream) +{ +} + +static struct sctp_sched_ops sctp_sched_fcfs = { + .set = sctp_sched_fcfs_set, + .get = sctp_sched_fcfs_get, + .init = sctp_sched_fcfs_init, + .init_sid = sctp_sched_fcfs_init_sid, + .free = sctp_sched_fcfs_free, + .enqueue = sctp_sched_fcfs_enqueue, + .dequeue = sctp_sched_fcfs_dequeue, + .dequeue_done = sctp_sched_fcfs_dequeue_done, + .sched_all = sctp_sched_fcfs_sched_all, + .unsched_all = sctp_sched_fcfs_unsched_all, +}; + +/* API to other parts of the stack */ + +struct sctp_sched_ops *sctp_sched_ops[] = { + &sctp_sched_fcfs, +}; + +int sctp_sched_set_sched(struct sctp_association *asoc, + enum sctp_sched_type sched) +{ + struct sctp_sched_ops *n = sctp_sched_ops[sched]; + struct sctp_sched_ops *old = asoc->outqueue.sched; + struct sctp_datamsg *msg = NULL; + struct sctp_chunk *ch; + int i, ret = 0; + + if (old == n) + return ret; + + if (sched > SCTP_SS_MAX) + return -EINVAL; + + if (old) { + old->free(&asoc->stream); + + /* Give the next scheduler a clean slate. */ + for (i = 0; i < asoc->stream.outcnt; i++) { + void *p = asoc->stream.out[i].ext; + + if (!p) + continue; + + p += offsetofend(struct sctp_stream_out_ext, outq); + memset(p, 0, sizeof(struct sctp_stream_out_ext) - + offsetofend(struct sctp_stream_out_ext, outq)); + } + } + + asoc->outqueue.sched = n; + n->init(&asoc->stream); + for (i = 0; i < asoc->stream.outcnt; i++) { + if (!asoc->stream.out[i].ext) + continue; + + ret = n->init_sid(&asoc->stream, i, GFP_KERNEL); + if (ret) + goto err; + } + + /* We have to requeue all chunks already queued. */ + list_for_each_entry(ch, &asoc->outqueue.out_chunk_list, list) { + if (ch->msg == msg) + continue; + msg = ch->msg; + n->enqueue(&asoc->outqueue, msg); + } + + return ret; + +err: + n->free(&asoc->stream); + asoc->outqueue.sched = &sctp_sched_fcfs; /* Always safe */ + + return ret; +} + +int sctp_sched_get_sched(struct sctp_association *asoc) +{ + int i; + + for (i = 0; i <= SCTP_SS_MAX; i++) + if (asoc->outqueue.sched == sctp_sched_ops[i]) + return i; + + return 0; +} + +int sctp_sched_set_value(struct sctp_association *asoc, __u16 sid, + __u16 value, gfp_t gfp) +{ + if (sid >= asoc->stream.outcnt) + return -EINVAL; + + if (!asoc->stream.out[sid].ext) { + int ret; + + ret = sctp_stream_init_ext(&asoc->stream, sid); + if (ret) + return ret; + } + + return asoc->outqueue.sched->set(&asoc->stream, sid, value, gfp); +} + +int sctp_sched_get_value(struct sctp_association *asoc, __u16 sid, + __u16 *value) +{ + if (sid >= asoc->stream.outcnt) + return -EINVAL; + + if (!asoc->stream.out[sid].ext) + return 0; + + return asoc->outqueue.sched->get(&asoc->stream, sid, value); +} + +void sctp_sched_dequeue_done(struct sctp_outq *q, struct sctp_chunk *ch) +{ + if (!list_is_last(&ch->frag_list, &ch->msg->chunks)) { + struct sctp_stream_out *sout; + __u16 sid; + + /* datamsg is not finish, so save it as current one, + * in case application switch scheduler or a higher + * priority stream comes in. + */ + sid = sctp_chunk_stream_no(ch); + sout = &q->asoc->stream.out[sid]; + q->asoc->stream.out_curr = sout; + return; + } + + q->asoc->stream.out_curr = NULL; + q->sched->dequeue_done(q, ch); +} + +/* Auxiliary functions for the schedulers */ +void sctp_sched_dequeue_common(struct sctp_outq *q, struct sctp_chunk *ch) +{ + list_del_init(&ch->list); + list_del_init(&ch->stream_list); + q->out_qlen -= ch->skb->len; +} + +int sctp_sched_init_sid(struct sctp_stream *stream, __u16 sid, gfp_t gfp) +{ + struct sctp_sched_ops *sched = sctp_sched_ops_from_stream(stream); + + INIT_LIST_HEAD(&stream->out[sid].ext->outq); + return sched->init_sid(stream, sid, gfp); +} + +struct sctp_sched_ops *sctp_sched_ops_from_stream(struct sctp_stream *stream) +{ + struct sctp_association *asoc; + + asoc = container_of(stream, struct sctp_association, stream); + + return asoc->outqueue.sched; +} -- cgit v1.2.3 From 13aa8770fe42d246c6f3a8eb814b85bccb428011 Mon Sep 17 00:00:00 2001 From: Marcelo Ricardo Leitner Date: Tue, 3 Oct 2017 19:20:14 -0300 Subject: sctp: add sockopt to get/set stream scheduler As defined per RFC Draft ndata Section 4.3.2, named as SCTP_STREAM_SCHEDULER. See-also: https://tools.ietf.org/html/draft-ietf-tsvwg-sctp-ndata-13 Tested-by: Xin Long Signed-off-by: Marcelo Ricardo Leitner Signed-off-by: David S. Miller --- include/uapi/linux/sctp.h | 1 + net/sctp/socket.c | 75 +++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 76 insertions(+) diff --git a/include/uapi/linux/sctp.h b/include/uapi/linux/sctp.h index 4487e7625ddb..0050f10087d2 100644 --- a/include/uapi/linux/sctp.h +++ b/include/uapi/linux/sctp.h @@ -122,6 +122,7 @@ typedef __s32 sctp_assoc_t; #define SCTP_RESET_ASSOC 120 #define SCTP_ADD_STREAMS 121 #define SCTP_SOCKOPT_PEELOFF_FLAGS 122 +#define SCTP_STREAM_SCHEDULER 123 /* PR-SCTP policies */ #define SCTP_PR_SCTP_NONE 0x0000 diff --git a/net/sctp/socket.c b/net/sctp/socket.c index d207734326b0..ae35dbf2810f 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -79,6 +79,7 @@ #include #include #include +#include /* Forward declarations for internal helper functions. */ static int sctp_writeable(struct sock *sk); @@ -3914,6 +3915,36 @@ out: return retval; } +static int sctp_setsockopt_scheduler(struct sock *sk, + char __user *optval, + unsigned int optlen) +{ + struct sctp_association *asoc; + struct sctp_assoc_value params; + int retval = -EINVAL; + + if (optlen < sizeof(params)) + goto out; + + optlen = sizeof(params); + if (copy_from_user(¶ms, optval, optlen)) { + retval = -EFAULT; + goto out; + } + + if (params.assoc_value > SCTP_SS_MAX) + goto out; + + asoc = sctp_id2assoc(sk, params.assoc_id); + if (!asoc) + goto out; + + retval = sctp_sched_set_sched(asoc, params.assoc_value); + +out: + return retval; +} + /* API 6.2 setsockopt(), getsockopt() * * Applications use setsockopt() and getsockopt() to set or retrieve @@ -4095,6 +4126,9 @@ static int sctp_setsockopt(struct sock *sk, int level, int optname, case SCTP_ADD_STREAMS: retval = sctp_setsockopt_add_streams(sk, optval, optlen); break; + case SCTP_STREAM_SCHEDULER: + retval = sctp_setsockopt_scheduler(sk, optval, optlen); + break; default: retval = -ENOPROTOOPT; break; @@ -6793,6 +6827,43 @@ out: return retval; } +static int sctp_getsockopt_scheduler(struct sock *sk, int len, + char __user *optval, + int __user *optlen) +{ + struct sctp_assoc_value params; + struct sctp_association *asoc; + int retval = -EFAULT; + + if (len < sizeof(params)) { + retval = -EINVAL; + goto out; + } + + len = sizeof(params); + if (copy_from_user(¶ms, optval, len)) + goto out; + + asoc = sctp_id2assoc(sk, params.assoc_id); + if (!asoc) { + retval = -EINVAL; + goto out; + } + + params.assoc_value = sctp_sched_get_sched(asoc); + + if (put_user(len, optlen)) + goto out; + + if (copy_to_user(optval, ¶ms, len)) + goto out; + + retval = 0; + +out: + return retval; +} + static int sctp_getsockopt(struct sock *sk, int level, int optname, char __user *optval, int __user *optlen) { @@ -6975,6 +7046,10 @@ static int sctp_getsockopt(struct sock *sk, int level, int optname, retval = sctp_getsockopt_enable_strreset(sk, len, optval, optlen); break; + case SCTP_STREAM_SCHEDULER: + retval = sctp_getsockopt_scheduler(sk, len, optval, + optlen); + break; default: retval = -ENOPROTOOPT; break; -- cgit v1.2.3 From 0ccdf3c7fdeda511b10def19505178a9d2d3fccd Mon Sep 17 00:00:00 2001 From: Marcelo Ricardo Leitner Date: Tue, 3 Oct 2017 19:20:15 -0300 Subject: sctp: add sockopt to get/set stream scheduler parameters As defined per RFC Draft ndata Section 4.3.3, named as SCTP_STREAM_SCHEDULER_VALUE. See-also: https://tools.ietf.org/html/draft-ietf-tsvwg-sctp-ndata-13 Tested-by: Xin Long Signed-off-by: Marcelo Ricardo Leitner Signed-off-by: David S. Miller --- include/uapi/linux/sctp.h | 7 +++++ net/sctp/socket.c | 77 +++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 84 insertions(+) diff --git a/include/uapi/linux/sctp.h b/include/uapi/linux/sctp.h index 0050f10087d2..00ac417d2c4f 100644 --- a/include/uapi/linux/sctp.h +++ b/include/uapi/linux/sctp.h @@ -123,6 +123,7 @@ typedef __s32 sctp_assoc_t; #define SCTP_ADD_STREAMS 121 #define SCTP_SOCKOPT_PEELOFF_FLAGS 122 #define SCTP_STREAM_SCHEDULER 123 +#define SCTP_STREAM_SCHEDULER_VALUE 124 /* PR-SCTP policies */ #define SCTP_PR_SCTP_NONE 0x0000 @@ -815,6 +816,12 @@ struct sctp_assoc_value { uint32_t assoc_value; }; +struct sctp_stream_value { + sctp_assoc_t assoc_id; + uint16_t stream_id; + uint16_t stream_value; +}; + /* * 7.2.2 Peer Address Information * diff --git a/net/sctp/socket.c b/net/sctp/socket.c index ae35dbf2810f..88c28421ec15 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -3945,6 +3945,34 @@ out: return retval; } +static int sctp_setsockopt_scheduler_value(struct sock *sk, + char __user *optval, + unsigned int optlen) +{ + struct sctp_association *asoc; + struct sctp_stream_value params; + int retval = -EINVAL; + + if (optlen < sizeof(params)) + goto out; + + optlen = sizeof(params); + if (copy_from_user(¶ms, optval, optlen)) { + retval = -EFAULT; + goto out; + } + + asoc = sctp_id2assoc(sk, params.assoc_id); + if (!asoc) + goto out; + + retval = sctp_sched_set_value(asoc, params.stream_id, + params.stream_value, GFP_KERNEL); + +out: + return retval; +} + /* API 6.2 setsockopt(), getsockopt() * * Applications use setsockopt() and getsockopt() to set or retrieve @@ -4129,6 +4157,9 @@ static int sctp_setsockopt(struct sock *sk, int level, int optname, case SCTP_STREAM_SCHEDULER: retval = sctp_setsockopt_scheduler(sk, optval, optlen); break; + case SCTP_STREAM_SCHEDULER_VALUE: + retval = sctp_setsockopt_scheduler_value(sk, optval, optlen); + break; default: retval = -ENOPROTOOPT; break; @@ -6864,6 +6895,48 @@ out: return retval; } +static int sctp_getsockopt_scheduler_value(struct sock *sk, int len, + char __user *optval, + int __user *optlen) +{ + struct sctp_stream_value params; + struct sctp_association *asoc; + int retval = -EFAULT; + + if (len < sizeof(params)) { + retval = -EINVAL; + goto out; + } + + len = sizeof(params); + if (copy_from_user(¶ms, optval, len)) + goto out; + + asoc = sctp_id2assoc(sk, params.assoc_id); + if (!asoc) { + retval = -EINVAL; + goto out; + } + + retval = sctp_sched_get_value(asoc, params.stream_id, + ¶ms.stream_value); + if (retval) + goto out; + + if (put_user(len, optlen)) { + retval = -EFAULT; + goto out; + } + + if (copy_to_user(optval, ¶ms, len)) { + retval = -EFAULT; + goto out; + } + +out: + return retval; +} + static int sctp_getsockopt(struct sock *sk, int level, int optname, char __user *optval, int __user *optlen) { @@ -7050,6 +7123,10 @@ static int sctp_getsockopt(struct sock *sk, int level, int optname, retval = sctp_getsockopt_scheduler(sk, len, optval, optlen); break; + case SCTP_STREAM_SCHEDULER_VALUE: + retval = sctp_getsockopt_scheduler_value(sk, len, optval, + optlen); + break; default: retval = -ENOPROTOOPT; break; -- cgit v1.2.3 From 637784ade221a3c8a7ecd0f583eddd95d6276b9a Mon Sep 17 00:00:00 2001 From: Marcelo Ricardo Leitner Date: Tue, 3 Oct 2017 19:20:16 -0300 Subject: sctp: introduce priority based stream scheduler This patch introduces RFC Draft ndata section 3.4 Priority Based Scheduler (SCTP_SS_PRIO). It works by having a struct sctp_stream_priority for each priority configured. This struct is then enlisted on a queue ordered per priority if, and only if, there is a stream with data queued, so that dequeueing is very straightforward: either finish current datamsg or simply dequeue from the highest priority queued, which is the next stream pointed, and that's it. If there are multiple streams assigned with the same priority and with data queued, it will do round robin amongst them while respecting datamsgs boundaries (when not using idata chunks), to be reasonably fair. We intentionally don't maintain a list of priorities nor a list of all streams with the same priority to save memory. The first would mean at least 2 other pointers per priority (which, for 1000 priorities, that can mean 16kB) and the second would also mean 2 other pointers but per stream. As SCTP supports up to 65535 streams on a given asoc, that's 1MB. This impacts when giving a priority to some stream, as we have to find out if the new priority is already being used and if we can free the old one, and also when tearing down. The new fields in struct sctp_stream_out_ext and sctp_stream are added under a union because that memory is to be shared with other schedulers. See-also: https://tools.ietf.org/html/draft-ietf-tsvwg-sctp-ndata-13 Tested-by: Xin Long Signed-off-by: Marcelo Ricardo Leitner Signed-off-by: David S. Miller --- include/net/sctp/structs.h | 24 +++ include/uapi/linux/sctp.h | 3 +- net/sctp/Makefile | 2 +- net/sctp/stream_sched.c | 3 + net/sctp/stream_sched_prio.c | 347 +++++++++++++++++++++++++++++++++++++++++++ 5 files changed, 377 insertions(+), 2 deletions(-) create mode 100644 net/sctp/stream_sched_prio.c diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h index 3c22a30fd71b..40eb8d66a37c 100644 --- a/include/net/sctp/structs.h +++ b/include/net/sctp/structs.h @@ -1328,10 +1328,27 @@ struct sctp_inithdr_host { __u32 initial_tsn; }; +struct sctp_stream_priorities { + /* List of priorities scheduled */ + struct list_head prio_sched; + /* List of streams scheduled */ + struct list_head active; + /* The next stream stream in line */ + struct sctp_stream_out_ext *next; + __u16 prio; +}; + struct sctp_stream_out_ext { __u64 abandoned_unsent[SCTP_PR_INDEX(MAX) + 1]; __u64 abandoned_sent[SCTP_PR_INDEX(MAX) + 1]; struct list_head outq; /* chunks enqueued by this stream */ + union { + struct { + /* Scheduled streams list */ + struct list_head prio_list; + struct sctp_stream_priorities *prio_head; + }; + }; }; struct sctp_stream_out { @@ -1351,6 +1368,13 @@ struct sctp_stream { __u16 incnt; /* Current stream being sent, if any */ struct sctp_stream_out *out_curr; + union { + /* Fields used by priority scheduler */ + struct { + /* List of priorities scheduled */ + struct list_head prio_list; + }; + }; }; #define SCTP_STREAM_CLOSED 0x00 diff --git a/include/uapi/linux/sctp.h b/include/uapi/linux/sctp.h index 00ac417d2c4f..850fa8b29d7e 100644 --- a/include/uapi/linux/sctp.h +++ b/include/uapi/linux/sctp.h @@ -1099,7 +1099,8 @@ struct sctp_add_streams { /* SCTP Stream schedulers */ enum sctp_sched_type { SCTP_SS_FCFS, - SCTP_SS_MAX = SCTP_SS_FCFS + SCTP_SS_PRIO, + SCTP_SS_MAX = SCTP_SS_PRIO }; #endif /* _UAPI_SCTP_H */ diff --git a/net/sctp/Makefile b/net/sctp/Makefile index 0f6e6d1d69fd..647c9cfd4e95 100644 --- a/net/sctp/Makefile +++ b/net/sctp/Makefile @@ -12,7 +12,7 @@ sctp-y := sm_statetable.o sm_statefuns.o sm_sideeffect.o \ inqueue.o outqueue.o ulpqueue.o \ tsnmap.o bind_addr.o socket.o primitive.o \ output.o input.o debug.o stream.o auth.o \ - offload.o stream_sched.o + offload.o stream_sched.o stream_sched_prio.o sctp_probe-y := probe.o diff --git a/net/sctp/stream_sched.c b/net/sctp/stream_sched.c index 40a9a9de2b98..115ddb765169 100644 --- a/net/sctp/stream_sched.c +++ b/net/sctp/stream_sched.c @@ -121,8 +121,11 @@ static struct sctp_sched_ops sctp_sched_fcfs = { /* API to other parts of the stack */ +extern struct sctp_sched_ops sctp_sched_prio; + struct sctp_sched_ops *sctp_sched_ops[] = { &sctp_sched_fcfs, + &sctp_sched_prio, }; int sctp_sched_set_sched(struct sctp_association *asoc, diff --git a/net/sctp/stream_sched_prio.c b/net/sctp/stream_sched_prio.c new file mode 100644 index 000000000000..384dbf3c8760 --- /dev/null +++ b/net/sctp/stream_sched_prio.c @@ -0,0 +1,347 @@ +/* SCTP kernel implementation + * (C) Copyright Red Hat Inc. 2017 + * + * This file is part of the SCTP kernel implementation + * + * These functions manipulate sctp stream queue/scheduling. + * + * This SCTP implementation is free software; + * you can redistribute it and/or modify it under the terms of + * the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * This SCTP implementation is distributed in the hope that it + * will be useful, but WITHOUT ANY WARRANTY; without even the implied + * ************************ + * warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * See the GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with GNU CC; see the file COPYING. If not, see + * . + * + * Please send any bug reports or fixes you make to the + * email addresched(es): + * lksctp developers + * + * Written or modified by: + * Marcelo Ricardo Leitner + */ + +#include +#include +#include +#include + +/* Priority handling + * RFC DRAFT ndata section 3.4 + */ + +static void sctp_sched_prio_unsched_all(struct sctp_stream *stream); + +static struct sctp_stream_priorities *sctp_sched_prio_new_head( + struct sctp_stream *stream, int prio, gfp_t gfp) +{ + struct sctp_stream_priorities *p; + + p = kmalloc(sizeof(*p), gfp); + if (!p) + return NULL; + + INIT_LIST_HEAD(&p->prio_sched); + INIT_LIST_HEAD(&p->active); + p->next = NULL; + p->prio = prio; + + return p; +} + +static struct sctp_stream_priorities *sctp_sched_prio_get_head( + struct sctp_stream *stream, int prio, gfp_t gfp) +{ + struct sctp_stream_priorities *p; + int i; + + /* Look into scheduled priorities first, as they are sorted and + * we can find it fast IF it's scheduled. + */ + list_for_each_entry(p, &stream->prio_list, prio_sched) { + if (p->prio == prio) + return p; + if (p->prio > prio) + break; + } + + /* No luck. So we search on all streams now. */ + for (i = 0; i < stream->outcnt; i++) { + if (!stream->out[i].ext) + continue; + + p = stream->out[i].ext->prio_head; + if (!p) + /* Means all other streams won't be initialized + * as well. + */ + break; + if (p->prio == prio) + return p; + } + + /* If not even there, allocate a new one. */ + return sctp_sched_prio_new_head(stream, prio, gfp); +} + +static void sctp_sched_prio_next_stream(struct sctp_stream_priorities *p) +{ + struct list_head *pos; + + pos = p->next->prio_list.next; + if (pos == &p->active) + pos = pos->next; + p->next = list_entry(pos, struct sctp_stream_out_ext, prio_list); +} + +static bool sctp_sched_prio_unsched(struct sctp_stream_out_ext *soute) +{ + bool scheduled = false; + + if (!list_empty(&soute->prio_list)) { + struct sctp_stream_priorities *prio_head = soute->prio_head; + + /* Scheduled */ + scheduled = true; + + if (prio_head->next == soute) + /* Try to move to the next stream */ + sctp_sched_prio_next_stream(prio_head); + + list_del_init(&soute->prio_list); + + /* Also unsched the priority if this was the last stream */ + if (list_empty(&prio_head->active)) { + list_del_init(&prio_head->prio_sched); + /* If there is no stream left, clear next */ + prio_head->next = NULL; + } + } + + return scheduled; +} + +static void sctp_sched_prio_sched(struct sctp_stream *stream, + struct sctp_stream_out_ext *soute) +{ + struct sctp_stream_priorities *prio, *prio_head; + + prio_head = soute->prio_head; + + /* Nothing to do if already scheduled */ + if (!list_empty(&soute->prio_list)) + return; + + /* Schedule the stream. If there is a next, we schedule the new + * one before it, so it's the last in round robin order. + * If there isn't, we also have to schedule the priority. + */ + if (prio_head->next) { + list_add(&soute->prio_list, prio_head->next->prio_list.prev); + return; + } + + list_add(&soute->prio_list, &prio_head->active); + prio_head->next = soute; + + list_for_each_entry(prio, &stream->prio_list, prio_sched) { + if (prio->prio > prio_head->prio) { + list_add(&prio_head->prio_sched, prio->prio_sched.prev); + return; + } + } + + list_add_tail(&prio_head->prio_sched, &stream->prio_list); +} + +static int sctp_sched_prio_set(struct sctp_stream *stream, __u16 sid, + __u16 prio, gfp_t gfp) +{ + struct sctp_stream_out *sout = &stream->out[sid]; + struct sctp_stream_out_ext *soute = sout->ext; + struct sctp_stream_priorities *prio_head, *old; + bool reschedule = false; + int i; + + prio_head = sctp_sched_prio_get_head(stream, prio, gfp); + if (!prio_head) + return -ENOMEM; + + reschedule = sctp_sched_prio_unsched(soute); + old = soute->prio_head; + soute->prio_head = prio_head; + if (reschedule) + sctp_sched_prio_sched(stream, soute); + + if (!old) + /* Happens when we set the priority for the first time */ + return 0; + + for (i = 0; i < stream->outcnt; i++) { + soute = stream->out[i].ext; + if (soute && soute->prio_head == old) + /* It's still in use, nothing else to do here. */ + return 0; + } + + /* No hits, we are good to free it. */ + kfree(old); + + return 0; +} + +static int sctp_sched_prio_get(struct sctp_stream *stream, __u16 sid, + __u16 *value) +{ + *value = stream->out[sid].ext->prio_head->prio; + return 0; +} + +static int sctp_sched_prio_init(struct sctp_stream *stream) +{ + INIT_LIST_HEAD(&stream->prio_list); + + return 0; +} + +static int sctp_sched_prio_init_sid(struct sctp_stream *stream, __u16 sid, + gfp_t gfp) +{ + INIT_LIST_HEAD(&stream->out[sid].ext->prio_list); + return sctp_sched_prio_set(stream, sid, 0, gfp); +} + +static void sctp_sched_prio_free(struct sctp_stream *stream) +{ + struct sctp_stream_priorities *prio, *n; + LIST_HEAD(list); + int i; + + /* As we don't keep a list of priorities, to avoid multiple + * frees we have to do it in 3 steps: + * 1. unsched everyone, so the lists are free to use in 2. + * 2. build the list of the priorities + * 3. free the list + */ + sctp_sched_prio_unsched_all(stream); + for (i = 0; i < stream->outcnt; i++) { + if (!stream->out[i].ext) + continue; + prio = stream->out[i].ext->prio_head; + if (prio && list_empty(&prio->prio_sched)) + list_add(&prio->prio_sched, &list); + } + list_for_each_entry_safe(prio, n, &list, prio_sched) { + list_del_init(&prio->prio_sched); + kfree(prio); + } +} + +static void sctp_sched_prio_enqueue(struct sctp_outq *q, + struct sctp_datamsg *msg) +{ + struct sctp_stream *stream; + struct sctp_chunk *ch; + __u16 sid; + + ch = list_first_entry(&msg->chunks, struct sctp_chunk, frag_list); + sid = sctp_chunk_stream_no(ch); + stream = &q->asoc->stream; + sctp_sched_prio_sched(stream, stream->out[sid].ext); +} + +static struct sctp_chunk *sctp_sched_prio_dequeue(struct sctp_outq *q) +{ + struct sctp_stream *stream = &q->asoc->stream; + struct sctp_stream_priorities *prio; + struct sctp_stream_out_ext *soute; + struct sctp_chunk *ch = NULL; + + /* Bail out quickly if queue is empty */ + if (list_empty(&q->out_chunk_list)) + goto out; + + /* Find which chunk is next. It's easy, it's either the current + * one or the first chunk on the next active stream. + */ + if (stream->out_curr) { + soute = stream->out_curr->ext; + } else { + prio = list_entry(stream->prio_list.next, + struct sctp_stream_priorities, prio_sched); + soute = prio->next; + } + ch = list_entry(soute->outq.next, struct sctp_chunk, stream_list); + sctp_sched_dequeue_common(q, ch); + +out: + return ch; +} + +static void sctp_sched_prio_dequeue_done(struct sctp_outq *q, + struct sctp_chunk *ch) +{ + struct sctp_stream_priorities *prio; + struct sctp_stream_out_ext *soute; + __u16 sid; + + /* Last chunk on that msg, move to the next stream on + * this priority. + */ + sid = sctp_chunk_stream_no(ch); + soute = q->asoc->stream.out[sid].ext; + prio = soute->prio_head; + + sctp_sched_prio_next_stream(prio); + + if (list_empty(&soute->outq)) + sctp_sched_prio_unsched(soute); +} + +static void sctp_sched_prio_sched_all(struct sctp_stream *stream) +{ + struct sctp_association *asoc; + struct sctp_stream_out *sout; + struct sctp_chunk *ch; + + asoc = container_of(stream, struct sctp_association, stream); + list_for_each_entry(ch, &asoc->outqueue.out_chunk_list, list) { + __u16 sid; + + sid = sctp_chunk_stream_no(ch); + sout = &stream->out[sid]; + if (sout->ext) + sctp_sched_prio_sched(stream, sout->ext); + } +} + +static void sctp_sched_prio_unsched_all(struct sctp_stream *stream) +{ + struct sctp_stream_priorities *p, *tmp; + struct sctp_stream_out_ext *soute, *souttmp; + + list_for_each_entry_safe(p, tmp, &stream->prio_list, prio_sched) + list_for_each_entry_safe(soute, souttmp, &p->active, prio_list) + sctp_sched_prio_unsched(soute); +} + +struct sctp_sched_ops sctp_sched_prio = { + .set = sctp_sched_prio_set, + .get = sctp_sched_prio_get, + .init = sctp_sched_prio_init, + .init_sid = sctp_sched_prio_init_sid, + .free = sctp_sched_prio_free, + .enqueue = sctp_sched_prio_enqueue, + .dequeue = sctp_sched_prio_dequeue, + .dequeue_done = sctp_sched_prio_dequeue_done, + .sched_all = sctp_sched_prio_sched_all, + .unsched_all = sctp_sched_prio_unsched_all, +}; -- cgit v1.2.3 From ac1ed8b82cd60ba8e7d84103ac1414b8c577c485 Mon Sep 17 00:00:00 2001 From: Marcelo Ricardo Leitner Date: Tue, 3 Oct 2017 19:20:17 -0300 Subject: sctp: introduce round robin stream scheduler This patch introduces RFC Draft ndata section 3.2 Priority Based Scheduler (SCTP_SS_RR). Works by maintaining a list of enqueued streams and tracking the last one used to send data. When the datamsg is done, it switches to the next stream. See-also: https://tools.ietf.org/html/draft-ietf-tsvwg-sctp-ndata-13 Tested-by: Xin Long Signed-off-by: Marcelo Ricardo Leitner Signed-off-by: David S. Miller --- include/net/sctp/structs.h | 11 +++ include/uapi/linux/sctp.h | 3 +- net/sctp/Makefile | 3 +- net/sctp/stream_sched.c | 2 + net/sctp/stream_sched_rr.c | 201 +++++++++++++++++++++++++++++++++++++++++++++ 5 files changed, 218 insertions(+), 2 deletions(-) create mode 100644 net/sctp/stream_sched_rr.c diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h index 40eb8d66a37c..16f949eef52f 100644 --- a/include/net/sctp/structs.h +++ b/include/net/sctp/structs.h @@ -1348,6 +1348,10 @@ struct sctp_stream_out_ext { struct list_head prio_list; struct sctp_stream_priorities *prio_head; }; + /* Fields used by RR scheduler */ + struct { + struct list_head rr_list; + }; }; }; @@ -1374,6 +1378,13 @@ struct sctp_stream { /* List of priorities scheduled */ struct list_head prio_list; }; + /* Fields used by RR scheduler */ + struct { + /* List of streams scheduled */ + struct list_head rr_list; + /* The next stream stream in line */ + struct sctp_stream_out_ext *rr_next; + }; }; }; diff --git a/include/uapi/linux/sctp.h b/include/uapi/linux/sctp.h index 850fa8b29d7e..6cd7d416ca40 100644 --- a/include/uapi/linux/sctp.h +++ b/include/uapi/linux/sctp.h @@ -1100,7 +1100,8 @@ struct sctp_add_streams { enum sctp_sched_type { SCTP_SS_FCFS, SCTP_SS_PRIO, - SCTP_SS_MAX = SCTP_SS_PRIO + SCTP_SS_RR, + SCTP_SS_MAX = SCTP_SS_RR }; #endif /* _UAPI_SCTP_H */ diff --git a/net/sctp/Makefile b/net/sctp/Makefile index 647c9cfd4e95..bf90c5397719 100644 --- a/net/sctp/Makefile +++ b/net/sctp/Makefile @@ -12,7 +12,8 @@ sctp-y := sm_statetable.o sm_statefuns.o sm_sideeffect.o \ inqueue.o outqueue.o ulpqueue.o \ tsnmap.o bind_addr.o socket.o primitive.o \ output.o input.o debug.o stream.o auth.o \ - offload.o stream_sched.o stream_sched_prio.o + offload.o stream_sched.o stream_sched_prio.o \ + stream_sched_rr.o sctp_probe-y := probe.o diff --git a/net/sctp/stream_sched.c b/net/sctp/stream_sched.c index 115ddb765169..03513a9fa110 100644 --- a/net/sctp/stream_sched.c +++ b/net/sctp/stream_sched.c @@ -122,10 +122,12 @@ static struct sctp_sched_ops sctp_sched_fcfs = { /* API to other parts of the stack */ extern struct sctp_sched_ops sctp_sched_prio; +extern struct sctp_sched_ops sctp_sched_rr; struct sctp_sched_ops *sctp_sched_ops[] = { &sctp_sched_fcfs, &sctp_sched_prio, + &sctp_sched_rr, }; int sctp_sched_set_sched(struct sctp_association *asoc, diff --git a/net/sctp/stream_sched_rr.c b/net/sctp/stream_sched_rr.c new file mode 100644 index 000000000000..7612a438c5b9 --- /dev/null +++ b/net/sctp/stream_sched_rr.c @@ -0,0 +1,201 @@ +/* SCTP kernel implementation + * (C) Copyright Red Hat Inc. 2017 + * + * This file is part of the SCTP kernel implementation + * + * These functions manipulate sctp stream queue/scheduling. + * + * This SCTP implementation is free software; + * you can redistribute it and/or modify it under the terms of + * the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * This SCTP implementation is distributed in the hope that it + * will be useful, but WITHOUT ANY WARRANTY; without even the implied + * ************************ + * warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * See the GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with GNU CC; see the file COPYING. If not, see + * . + * + * Please send any bug reports or fixes you make to the + * email addresched(es): + * lksctp developers + * + * Written or modified by: + * Marcelo Ricardo Leitner + */ + +#include +#include +#include +#include + +/* Priority handling + * RFC DRAFT ndata section 3.2 + */ +static void sctp_sched_rr_unsched_all(struct sctp_stream *stream); + +static void sctp_sched_rr_next_stream(struct sctp_stream *stream) +{ + struct list_head *pos; + + pos = stream->rr_next->rr_list.next; + if (pos == &stream->rr_list) + pos = pos->next; + stream->rr_next = list_entry(pos, struct sctp_stream_out_ext, rr_list); +} + +static void sctp_sched_rr_unsched(struct sctp_stream *stream, + struct sctp_stream_out_ext *soute) +{ + if (stream->rr_next == soute) + /* Try to move to the next stream */ + sctp_sched_rr_next_stream(stream); + + list_del_init(&soute->rr_list); + + /* If we have no other stream queued, clear next */ + if (list_empty(&stream->rr_list)) + stream->rr_next = NULL; +} + +static void sctp_sched_rr_sched(struct sctp_stream *stream, + struct sctp_stream_out_ext *soute) +{ + if (!list_empty(&soute->rr_list)) + /* Already scheduled. */ + return; + + /* Schedule the stream */ + list_add_tail(&soute->rr_list, &stream->rr_list); + + if (!stream->rr_next) + stream->rr_next = soute; +} + +static int sctp_sched_rr_set(struct sctp_stream *stream, __u16 sid, + __u16 prio, gfp_t gfp) +{ + return 0; +} + +static int sctp_sched_rr_get(struct sctp_stream *stream, __u16 sid, + __u16 *value) +{ + return 0; +} + +static int sctp_sched_rr_init(struct sctp_stream *stream) +{ + INIT_LIST_HEAD(&stream->rr_list); + stream->rr_next = NULL; + + return 0; +} + +static int sctp_sched_rr_init_sid(struct sctp_stream *stream, __u16 sid, + gfp_t gfp) +{ + INIT_LIST_HEAD(&stream->out[sid].ext->rr_list); + + return 0; +} + +static void sctp_sched_rr_free(struct sctp_stream *stream) +{ + sctp_sched_rr_unsched_all(stream); +} + +static void sctp_sched_rr_enqueue(struct sctp_outq *q, + struct sctp_datamsg *msg) +{ + struct sctp_stream *stream; + struct sctp_chunk *ch; + __u16 sid; + + ch = list_first_entry(&msg->chunks, struct sctp_chunk, frag_list); + sid = sctp_chunk_stream_no(ch); + stream = &q->asoc->stream; + sctp_sched_rr_sched(stream, stream->out[sid].ext); +} + +static struct sctp_chunk *sctp_sched_rr_dequeue(struct sctp_outq *q) +{ + struct sctp_stream *stream = &q->asoc->stream; + struct sctp_stream_out_ext *soute; + struct sctp_chunk *ch = NULL; + + /* Bail out quickly if queue is empty */ + if (list_empty(&q->out_chunk_list)) + goto out; + + /* Find which chunk is next */ + if (stream->out_curr) + soute = stream->out_curr->ext; + else + soute = stream->rr_next; + ch = list_entry(soute->outq.next, struct sctp_chunk, stream_list); + + sctp_sched_dequeue_common(q, ch); + +out: + return ch; +} + +static void sctp_sched_rr_dequeue_done(struct sctp_outq *q, + struct sctp_chunk *ch) +{ + struct sctp_stream_out_ext *soute; + __u16 sid; + + /* Last chunk on that msg, move to the next stream */ + sid = sctp_chunk_stream_no(ch); + soute = q->asoc->stream.out[sid].ext; + + sctp_sched_rr_next_stream(&q->asoc->stream); + + if (list_empty(&soute->outq)) + sctp_sched_rr_unsched(&q->asoc->stream, soute); +} + +static void sctp_sched_rr_sched_all(struct sctp_stream *stream) +{ + struct sctp_association *asoc; + struct sctp_stream_out_ext *soute; + struct sctp_chunk *ch; + + asoc = container_of(stream, struct sctp_association, stream); + list_for_each_entry(ch, &asoc->outqueue.out_chunk_list, list) { + __u16 sid; + + sid = sctp_chunk_stream_no(ch); + soute = stream->out[sid].ext; + if (soute) + sctp_sched_rr_sched(stream, soute); + } +} + +static void sctp_sched_rr_unsched_all(struct sctp_stream *stream) +{ + struct sctp_stream_out_ext *soute, *tmp; + + list_for_each_entry_safe(soute, tmp, &stream->rr_list, rr_list) + sctp_sched_rr_unsched(stream, soute); +} + +struct sctp_sched_ops sctp_sched_rr = { + .set = sctp_sched_rr_set, + .get = sctp_sched_rr_get, + .init = sctp_sched_rr_init, + .init_sid = sctp_sched_rr_init_sid, + .free = sctp_sched_rr_free, + .enqueue = sctp_sched_rr_enqueue, + .dequeue = sctp_sched_rr_dequeue, + .dequeue_done = sctp_sched_rr_dequeue_done, + .sched_all = sctp_sched_rr_sched_all, + .unsched_all = sctp_sched_rr_unsched_all, +}; -- cgit v1.2.3 From acd669a8f67ed47f5edd385741486cc7a259a446 Mon Sep 17 00:00:00 2001 From: Ganesh Goudar Date: Tue, 3 Oct 2017 11:10:53 +0530 Subject: cxgb4: add new T6 pci device id's Add 0x6085 T6 device id. Signed-off-by: Ganesh Goudar Signed-off-by: David S. Miller --- drivers/net/ethernet/chelsio/cxgb4/t4_pci_id_tbl.h | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4_pci_id_tbl.h b/drivers/net/ethernet/chelsio/cxgb4/t4_pci_id_tbl.h index 37d90d63e4a3..633e9751a25e 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/t4_pci_id_tbl.h +++ b/drivers/net/ethernet/chelsio/cxgb4/t4_pci_id_tbl.h @@ -202,6 +202,7 @@ CH_PCI_DEVICE_ID_TABLE_DEFINE_BEGIN CH_PCI_ID_TABLE_FENTRY(0x6082), /* Custom T6225-CR SFP28 */ CH_PCI_ID_TABLE_FENTRY(0x6083), /* Custom T62100-CR QSFP28 */ CH_PCI_ID_TABLE_FENTRY(0x6084), /* Custom T64100-CR QSFP28 */ + CH_PCI_ID_TABLE_FENTRY(0x6085), /* Custom T6240-SO */ CH_PCI_DEVICE_ID_TABLE_DEFINE_END; #endif /* __T4_PCI_ID_TBL_H__ */ -- cgit v1.2.3 From eba0f28473b2c20d4212ff19c118723a5a547e20 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Tue, 19 Sep 2017 21:40:19 +0100 Subject: ath9k: make const array reg_hole_list static, reduces object code size Don't populate the read-only array reg_hole_list on the stack, instead make it static. Makes the object code smaller by over 200 bytes: Before: text data bss dec hex filename 57518 15248 0 72766 11c3e debug.o After: text data bss dec hex filename 57218 15344 0 72562 11b72 debug.o Signed-off-by: Colin Ian King Signed-off-by: Kalle Valo --- drivers/net/wireless/ath/ath9k/debug.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wireless/ath/ath9k/debug.c b/drivers/net/wireless/ath/ath9k/debug.c index 01fa30117288..5a0a05abd51a 100644 --- a/drivers/net/wireless/ath/ath9k/debug.c +++ b/drivers/net/wireless/ath/ath9k/debug.c @@ -916,7 +916,7 @@ static int open_file_regdump(struct inode *inode, struct file *file) u8 *buf; int i, j = 0; unsigned long num_regs, regdump_len, max_reg_offset; - const struct reg_hole { + static const struct reg_hole { u32 start; u32 end; } reg_hole_list[] = { -- cgit v1.2.3 From 522e89d60b62d16075a7bc96dd5eb2f3eb813635 Mon Sep 17 00:00:00 2001 From: simran singhal Date: Mon, 11 Sep 2017 21:52:37 +0200 Subject: netfilter: ipset: Compress return logic Simplify function returns by merging assignment and return into one command line. Signed-off-by: simran singhal Signed-off-by: Jozsef Kadlecsik Signed-off-by: Pablo Neira Ayuso --- net/netfilter/ipset/ip_set_list_set.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/net/netfilter/ipset/ip_set_list_set.c b/net/netfilter/ipset/ip_set_list_set.c index 178d4eba013b..2fff6b5dc6f0 100644 --- a/net/netfilter/ipset/ip_set_list_set.c +++ b/net/netfilter/ipset/ip_set_list_set.c @@ -453,7 +453,6 @@ static size_t list_set_memsize(const struct list_set *map, size_t dsize) { struct set_elem *e; - size_t memsize; u32 n = 0; rcu_read_lock(); @@ -461,9 +460,7 @@ list_set_memsize(const struct list_set *map, size_t dsize) n++; rcu_read_unlock(); - memsize = sizeof(*map) + n * dsize; - - return memsize; + return (sizeof(*map) + n * dsize); } static int -- cgit v1.2.3 From 8851e799ffde0d85fab6716075975200c9b5094c Mon Sep 17 00:00:00 2001 From: Jozsef Kadlecsik Date: Mon, 11 Sep 2017 21:52:38 +0200 Subject: netfilter: ipset: Fix sparse warnings Signed-off-by: Jozsef Kadlecsik Signed-off-by: Pablo Neira Ayuso --- net/netfilter/ipset/ip_set_hash_ipportnet.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/netfilter/ipset/ip_set_hash_ipportnet.c b/net/netfilter/ipset/ip_set_hash_ipportnet.c index 5ab1b99a53c2..24bf55860108 100644 --- a/net/netfilter/ipset/ip_set_hash_ipportnet.c +++ b/net/netfilter/ipset/ip_set_hash_ipportnet.c @@ -434,7 +434,7 @@ hash_ipportnet6_uadt(struct ip_set *set, struct nlattr *tb[], if (unlikely(tb[IPSET_ATTR_IP_TO])) return -IPSET_ERR_HASH_RANGE_UNSUPPORTED; if (unlikely(tb[IPSET_ATTR_CIDR])) { - u8 cidr = nla_get_u8(tb[IPSET_ATTR_CIDR]); + cidr = nla_get_u8(tb[IPSET_ATTR_CIDR]); if (cidr != HOST_MASK) return -IPSET_ERR_INVALID_CIDR; -- cgit v1.2.3 From 63c2af90e502fca6cda2855541d43443666e337f Mon Sep 17 00:00:00 2001 From: Aaron Conole Date: Mon, 11 Sep 2017 21:52:39 +0200 Subject: netfilter: ipset: deduplicate prefixlen maps The prefixlen maps used here are identical, and have been since introduction. It seems to make sense to use a single large map, that the preprocessor will fill appropriately. Signed-off-by: Aaron Conole Signed-off-by: Jozsef Kadlecsik Signed-off-by: Pablo Neira Ayuso --- net/netfilter/ipset/pfxlen.c | 395 +++++++++++++++---------------------------- 1 file changed, 137 insertions(+), 258 deletions(-) diff --git a/net/netfilter/ipset/pfxlen.c b/net/netfilter/ipset/pfxlen.c index 1c8a42c1056c..d5be9c25fad6 100644 --- a/net/netfilter/ipset/pfxlen.c +++ b/net/netfilter/ipset/pfxlen.c @@ -3,6 +3,141 @@ /* Prefixlen maps for fast conversions, by Jan Engelhardt. */ +#ifdef E +#undef E +#endif + +#define PREFIXES_MAP \ + E(0x00000000, 0x00000000, 0x00000000, 0x00000000), \ + E(0x80000000, 0x00000000, 0x00000000, 0x00000000), \ + E(0xC0000000, 0x00000000, 0x00000000, 0x00000000), \ + E(0xE0000000, 0x00000000, 0x00000000, 0x00000000), \ + E(0xF0000000, 0x00000000, 0x00000000, 0x00000000), \ + E(0xF8000000, 0x00000000, 0x00000000, 0x00000000), \ + E(0xFC000000, 0x00000000, 0x00000000, 0x00000000), \ + E(0xFE000000, 0x00000000, 0x00000000, 0x00000000), \ + E(0xFF000000, 0x00000000, 0x00000000, 0x00000000), \ + E(0xFF800000, 0x00000000, 0x00000000, 0x00000000), \ + E(0xFFC00000, 0x00000000, 0x00000000, 0x00000000), \ + E(0xFFE00000, 0x00000000, 0x00000000, 0x00000000), \ + E(0xFFF00000, 0x00000000, 0x00000000, 0x00000000), \ + E(0xFFF80000, 0x00000000, 0x00000000, 0x00000000), \ + E(0xFFFC0000, 0x00000000, 0x00000000, 0x00000000), \ + E(0xFFFE0000, 0x00000000, 0x00000000, 0x00000000), \ + E(0xFFFF0000, 0x00000000, 0x00000000, 0x00000000), \ + E(0xFFFF8000, 0x00000000, 0x00000000, 0x00000000), \ + E(0xFFFFC000, 0x00000000, 0x00000000, 0x00000000), \ + E(0xFFFFE000, 0x00000000, 0x00000000, 0x00000000), \ + E(0xFFFFF000, 0x00000000, 0x00000000, 0x00000000), \ + E(0xFFFFF800, 0x00000000, 0x00000000, 0x00000000), \ + E(0xFFFFFC00, 0x00000000, 0x00000000, 0x00000000), \ + E(0xFFFFFE00, 0x00000000, 0x00000000, 0x00000000), \ + E(0xFFFFFF00, 0x00000000, 0x00000000, 0x00000000), \ + E(0xFFFFFF80, 0x00000000, 0x00000000, 0x00000000), \ + E(0xFFFFFFC0, 0x00000000, 0x00000000, 0x00000000), \ + E(0xFFFFFFE0, 0x00000000, 0x00000000, 0x00000000), \ + E(0xFFFFFFF0, 0x00000000, 0x00000000, 0x00000000), \ + E(0xFFFFFFF8, 0x00000000, 0x00000000, 0x00000000), \ + E(0xFFFFFFFC, 0x00000000, 0x00000000, 0x00000000), \ + E(0xFFFFFFFE, 0x00000000, 0x00000000, 0x00000000), \ + E(0xFFFFFFFF, 0x00000000, 0x00000000, 0x00000000), \ + E(0xFFFFFFFF, 0x80000000, 0x00000000, 0x00000000), \ + E(0xFFFFFFFF, 0xC0000000, 0x00000000, 0x00000000), \ + E(0xFFFFFFFF, 0xE0000000, 0x00000000, 0x00000000), \ + E(0xFFFFFFFF, 0xF0000000, 0x00000000, 0x00000000), \ + E(0xFFFFFFFF, 0xF8000000, 0x00000000, 0x00000000), \ + E(0xFFFFFFFF, 0xFC000000, 0x00000000, 0x00000000), \ + E(0xFFFFFFFF, 0xFE000000, 0x00000000, 0x00000000), \ + E(0xFFFFFFFF, 0xFF000000, 0x00000000, 0x00000000), \ + E(0xFFFFFFFF, 0xFF800000, 0x00000000, 0x00000000), \ + E(0xFFFFFFFF, 0xFFC00000, 0x00000000, 0x00000000), \ + E(0xFFFFFFFF, 0xFFE00000, 0x00000000, 0x00000000), \ + E(0xFFFFFFFF, 0xFFF00000, 0x00000000, 0x00000000), \ + E(0xFFFFFFFF, 0xFFF80000, 0x00000000, 0x00000000), \ + E(0xFFFFFFFF, 0xFFFC0000, 0x00000000, 0x00000000), \ + E(0xFFFFFFFF, 0xFFFE0000, 0x00000000, 0x00000000), \ + E(0xFFFFFFFF, 0xFFFF0000, 0x00000000, 0x00000000), \ + E(0xFFFFFFFF, 0xFFFF8000, 0x00000000, 0x00000000), \ + E(0xFFFFFFFF, 0xFFFFC000, 0x00000000, 0x00000000), \ + E(0xFFFFFFFF, 0xFFFFE000, 0x00000000, 0x00000000), \ + E(0xFFFFFFFF, 0xFFFFF000, 0x00000000, 0x00000000), \ + E(0xFFFFFFFF, 0xFFFFF800, 0x00000000, 0x00000000), \ + E(0xFFFFFFFF, 0xFFFFFC00, 0x00000000, 0x00000000), \ + E(0xFFFFFFFF, 0xFFFFFE00, 0x00000000, 0x00000000), \ + E(0xFFFFFFFF, 0xFFFFFF00, 0x00000000, 0x00000000), \ + E(0xFFFFFFFF, 0xFFFFFF80, 0x00000000, 0x00000000), \ + E(0xFFFFFFFF, 0xFFFFFFC0, 0x00000000, 0x00000000), \ + E(0xFFFFFFFF, 0xFFFFFFE0, 0x00000000, 0x00000000), \ + E(0xFFFFFFFF, 0xFFFFFFF0, 0x00000000, 0x00000000), \ + E(0xFFFFFFFF, 0xFFFFFFF8, 0x00000000, 0x00000000), \ + E(0xFFFFFFFF, 0xFFFFFFFC, 0x00000000, 0x00000000), \ + E(0xFFFFFFFF, 0xFFFFFFFE, 0x00000000, 0x00000000), \ + E(0xFFFFFFFF, 0xFFFFFFFF, 0x00000000, 0x00000000), \ + E(0xFFFFFFFF, 0xFFFFFFFF, 0x80000000, 0x00000000), \ + E(0xFFFFFFFF, 0xFFFFFFFF, 0xC0000000, 0x00000000), \ + E(0xFFFFFFFF, 0xFFFFFFFF, 0xE0000000, 0x00000000), \ + E(0xFFFFFFFF, 0xFFFFFFFF, 0xF0000000, 0x00000000), \ + E(0xFFFFFFFF, 0xFFFFFFFF, 0xF8000000, 0x00000000), \ + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFC000000, 0x00000000), \ + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFE000000, 0x00000000), \ + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFF000000, 0x00000000), \ + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFF800000, 0x00000000), \ + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFC00000, 0x00000000), \ + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFE00000, 0x00000000), \ + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFF00000, 0x00000000), \ + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFF80000, 0x00000000), \ + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFC0000, 0x00000000), \ + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFE0000, 0x00000000), \ + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFF0000, 0x00000000), \ + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFF8000, 0x00000000), \ + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFC000, 0x00000000), \ + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFE000, 0x00000000), \ + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFF000, 0x00000000), \ + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFF800, 0x00000000), \ + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFC00, 0x00000000), \ + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFE00, 0x00000000), \ + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFF00, 0x00000000), \ + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFF80, 0x00000000), \ + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFC0, 0x00000000), \ + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFE0, 0x00000000), \ + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFF0, 0x00000000), \ + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFF8, 0x00000000), \ + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFC, 0x00000000), \ + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFE, 0x00000000), \ + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0x00000000), \ + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0x80000000), \ + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xC0000000), \ + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xE0000000), \ + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xF0000000), \ + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xF8000000), \ + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFC000000), \ + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFE000000), \ + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFF000000), \ + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFF800000), \ + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFC00000), \ + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFE00000), \ + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFF00000), \ + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFF80000), \ + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFC0000), \ + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFE0000), \ + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFF0000), \ + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFF8000), \ + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFC000), \ + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFE000), \ + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFF000), \ + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFF800), \ + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFC00), \ + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFE00), \ + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFF00), \ + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFF80), \ + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFC0), \ + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFE0), \ + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFF0), \ + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFF8), \ + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFC), \ + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFE), \ + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF), + #define E(a, b, c, d) \ {.ip6 = { \ htonl(a), htonl(b), \ @@ -13,135 +148,7 @@ * just use prefixlen_netmask_map[prefixlength].ip. */ const union nf_inet_addr ip_set_netmask_map[] = { - E(0x00000000, 0x00000000, 0x00000000, 0x00000000), - E(0x80000000, 0x00000000, 0x00000000, 0x00000000), - E(0xC0000000, 0x00000000, 0x00000000, 0x00000000), - E(0xE0000000, 0x00000000, 0x00000000, 0x00000000), - E(0xF0000000, 0x00000000, 0x00000000, 0x00000000), - E(0xF8000000, 0x00000000, 0x00000000, 0x00000000), - E(0xFC000000, 0x00000000, 0x00000000, 0x00000000), - E(0xFE000000, 0x00000000, 0x00000000, 0x00000000), - E(0xFF000000, 0x00000000, 0x00000000, 0x00000000), - E(0xFF800000, 0x00000000, 0x00000000, 0x00000000), - E(0xFFC00000, 0x00000000, 0x00000000, 0x00000000), - E(0xFFE00000, 0x00000000, 0x00000000, 0x00000000), - E(0xFFF00000, 0x00000000, 0x00000000, 0x00000000), - E(0xFFF80000, 0x00000000, 0x00000000, 0x00000000), - E(0xFFFC0000, 0x00000000, 0x00000000, 0x00000000), - E(0xFFFE0000, 0x00000000, 0x00000000, 0x00000000), - E(0xFFFF0000, 0x00000000, 0x00000000, 0x00000000), - E(0xFFFF8000, 0x00000000, 0x00000000, 0x00000000), - E(0xFFFFC000, 0x00000000, 0x00000000, 0x00000000), - E(0xFFFFE000, 0x00000000, 0x00000000, 0x00000000), - E(0xFFFFF000, 0x00000000, 0x00000000, 0x00000000), - E(0xFFFFF800, 0x00000000, 0x00000000, 0x00000000), - E(0xFFFFFC00, 0x00000000, 0x00000000, 0x00000000), - E(0xFFFFFE00, 0x00000000, 0x00000000, 0x00000000), - E(0xFFFFFF00, 0x00000000, 0x00000000, 0x00000000), - E(0xFFFFFF80, 0x00000000, 0x00000000, 0x00000000), - E(0xFFFFFFC0, 0x00000000, 0x00000000, 0x00000000), - E(0xFFFFFFE0, 0x00000000, 0x00000000, 0x00000000), - E(0xFFFFFFF0, 0x00000000, 0x00000000, 0x00000000), - E(0xFFFFFFF8, 0x00000000, 0x00000000, 0x00000000), - E(0xFFFFFFFC, 0x00000000, 0x00000000, 0x00000000), - E(0xFFFFFFFE, 0x00000000, 0x00000000, 0x00000000), - E(0xFFFFFFFF, 0x00000000, 0x00000000, 0x00000000), - E(0xFFFFFFFF, 0x80000000, 0x00000000, 0x00000000), - E(0xFFFFFFFF, 0xC0000000, 0x00000000, 0x00000000), - E(0xFFFFFFFF, 0xE0000000, 0x00000000, 0x00000000), - E(0xFFFFFFFF, 0xF0000000, 0x00000000, 0x00000000), - E(0xFFFFFFFF, 0xF8000000, 0x00000000, 0x00000000), - E(0xFFFFFFFF, 0xFC000000, 0x00000000, 0x00000000), - E(0xFFFFFFFF, 0xFE000000, 0x00000000, 0x00000000), - E(0xFFFFFFFF, 0xFF000000, 0x00000000, 0x00000000), - E(0xFFFFFFFF, 0xFF800000, 0x00000000, 0x00000000), - E(0xFFFFFFFF, 0xFFC00000, 0x00000000, 0x00000000), - E(0xFFFFFFFF, 0xFFE00000, 0x00000000, 0x00000000), - E(0xFFFFFFFF, 0xFFF00000, 0x00000000, 0x00000000), - E(0xFFFFFFFF, 0xFFF80000, 0x00000000, 0x00000000), - E(0xFFFFFFFF, 0xFFFC0000, 0x00000000, 0x00000000), - E(0xFFFFFFFF, 0xFFFE0000, 0x00000000, 0x00000000), - E(0xFFFFFFFF, 0xFFFF0000, 0x00000000, 0x00000000), - E(0xFFFFFFFF, 0xFFFF8000, 0x00000000, 0x00000000), - E(0xFFFFFFFF, 0xFFFFC000, 0x00000000, 0x00000000), - E(0xFFFFFFFF, 0xFFFFE000, 0x00000000, 0x00000000), - E(0xFFFFFFFF, 0xFFFFF000, 0x00000000, 0x00000000), - E(0xFFFFFFFF, 0xFFFFF800, 0x00000000, 0x00000000), - E(0xFFFFFFFF, 0xFFFFFC00, 0x00000000, 0x00000000), - E(0xFFFFFFFF, 0xFFFFFE00, 0x00000000, 0x00000000), - E(0xFFFFFFFF, 0xFFFFFF00, 0x00000000, 0x00000000), - E(0xFFFFFFFF, 0xFFFFFF80, 0x00000000, 0x00000000), - E(0xFFFFFFFF, 0xFFFFFFC0, 0x00000000, 0x00000000), - E(0xFFFFFFFF, 0xFFFFFFE0, 0x00000000, 0x00000000), - E(0xFFFFFFFF, 0xFFFFFFF0, 0x00000000, 0x00000000), - E(0xFFFFFFFF, 0xFFFFFFF8, 0x00000000, 0x00000000), - E(0xFFFFFFFF, 0xFFFFFFFC, 0x00000000, 0x00000000), - E(0xFFFFFFFF, 0xFFFFFFFE, 0x00000000, 0x00000000), - E(0xFFFFFFFF, 0xFFFFFFFF, 0x00000000, 0x00000000), - E(0xFFFFFFFF, 0xFFFFFFFF, 0x80000000, 0x00000000), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xC0000000, 0x00000000), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xE0000000, 0x00000000), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xF0000000, 0x00000000), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xF8000000, 0x00000000), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFC000000, 0x00000000), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFE000000, 0x00000000), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFF000000, 0x00000000), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFF800000, 0x00000000), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFC00000, 0x00000000), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFE00000, 0x00000000), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFF00000, 0x00000000), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFF80000, 0x00000000), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFC0000, 0x00000000), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFE0000, 0x00000000), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFF0000, 0x00000000), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFF8000, 0x00000000), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFC000, 0x00000000), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFE000, 0x00000000), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFF000, 0x00000000), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFF800, 0x00000000), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFC00, 0x00000000), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFE00, 0x00000000), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFF00, 0x00000000), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFF80, 0x00000000), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFC0, 0x00000000), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFE0, 0x00000000), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFF0, 0x00000000), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFF8, 0x00000000), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFC, 0x00000000), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFE, 0x00000000), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0x00000000), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0x80000000), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xC0000000), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xE0000000), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xF0000000), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xF8000000), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFC000000), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFE000000), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFF000000), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFF800000), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFC00000), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFE00000), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFF00000), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFF80000), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFC0000), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFE0000), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFF0000), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFF8000), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFC000), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFE000), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFF000), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFF800), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFC00), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFE00), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFF00), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFF80), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFC0), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFE0), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFF0), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFF8), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFC), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFE), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF), + PREFIXES_MAP }; EXPORT_SYMBOL_GPL(ip_set_netmask_map); @@ -155,135 +162,7 @@ EXPORT_SYMBOL_GPL(ip_set_netmask_map); * just use prefixlen_hostmask_map[prefixlength].ip. */ const union nf_inet_addr ip_set_hostmask_map[] = { - E(0x00000000, 0x00000000, 0x00000000, 0x00000000), - E(0x80000000, 0x00000000, 0x00000000, 0x00000000), - E(0xC0000000, 0x00000000, 0x00000000, 0x00000000), - E(0xE0000000, 0x00000000, 0x00000000, 0x00000000), - E(0xF0000000, 0x00000000, 0x00000000, 0x00000000), - E(0xF8000000, 0x00000000, 0x00000000, 0x00000000), - E(0xFC000000, 0x00000000, 0x00000000, 0x00000000), - E(0xFE000000, 0x00000000, 0x00000000, 0x00000000), - E(0xFF000000, 0x00000000, 0x00000000, 0x00000000), - E(0xFF800000, 0x00000000, 0x00000000, 0x00000000), - E(0xFFC00000, 0x00000000, 0x00000000, 0x00000000), - E(0xFFE00000, 0x00000000, 0x00000000, 0x00000000), - E(0xFFF00000, 0x00000000, 0x00000000, 0x00000000), - E(0xFFF80000, 0x00000000, 0x00000000, 0x00000000), - E(0xFFFC0000, 0x00000000, 0x00000000, 0x00000000), - E(0xFFFE0000, 0x00000000, 0x00000000, 0x00000000), - E(0xFFFF0000, 0x00000000, 0x00000000, 0x00000000), - E(0xFFFF8000, 0x00000000, 0x00000000, 0x00000000), - E(0xFFFFC000, 0x00000000, 0x00000000, 0x00000000), - E(0xFFFFE000, 0x00000000, 0x00000000, 0x00000000), - E(0xFFFFF000, 0x00000000, 0x00000000, 0x00000000), - E(0xFFFFF800, 0x00000000, 0x00000000, 0x00000000), - E(0xFFFFFC00, 0x00000000, 0x00000000, 0x00000000), - E(0xFFFFFE00, 0x00000000, 0x00000000, 0x00000000), - E(0xFFFFFF00, 0x00000000, 0x00000000, 0x00000000), - E(0xFFFFFF80, 0x00000000, 0x00000000, 0x00000000), - E(0xFFFFFFC0, 0x00000000, 0x00000000, 0x00000000), - E(0xFFFFFFE0, 0x00000000, 0x00000000, 0x00000000), - E(0xFFFFFFF0, 0x00000000, 0x00000000, 0x00000000), - E(0xFFFFFFF8, 0x00000000, 0x00000000, 0x00000000), - E(0xFFFFFFFC, 0x00000000, 0x00000000, 0x00000000), - E(0xFFFFFFFE, 0x00000000, 0x00000000, 0x00000000), - E(0xFFFFFFFF, 0x00000000, 0x00000000, 0x00000000), - E(0xFFFFFFFF, 0x80000000, 0x00000000, 0x00000000), - E(0xFFFFFFFF, 0xC0000000, 0x00000000, 0x00000000), - E(0xFFFFFFFF, 0xE0000000, 0x00000000, 0x00000000), - E(0xFFFFFFFF, 0xF0000000, 0x00000000, 0x00000000), - E(0xFFFFFFFF, 0xF8000000, 0x00000000, 0x00000000), - E(0xFFFFFFFF, 0xFC000000, 0x00000000, 0x00000000), - E(0xFFFFFFFF, 0xFE000000, 0x00000000, 0x00000000), - E(0xFFFFFFFF, 0xFF000000, 0x00000000, 0x00000000), - E(0xFFFFFFFF, 0xFF800000, 0x00000000, 0x00000000), - E(0xFFFFFFFF, 0xFFC00000, 0x00000000, 0x00000000), - E(0xFFFFFFFF, 0xFFE00000, 0x00000000, 0x00000000), - E(0xFFFFFFFF, 0xFFF00000, 0x00000000, 0x00000000), - E(0xFFFFFFFF, 0xFFF80000, 0x00000000, 0x00000000), - E(0xFFFFFFFF, 0xFFFC0000, 0x00000000, 0x00000000), - E(0xFFFFFFFF, 0xFFFE0000, 0x00000000, 0x00000000), - E(0xFFFFFFFF, 0xFFFF0000, 0x00000000, 0x00000000), - E(0xFFFFFFFF, 0xFFFF8000, 0x00000000, 0x00000000), - E(0xFFFFFFFF, 0xFFFFC000, 0x00000000, 0x00000000), - E(0xFFFFFFFF, 0xFFFFE000, 0x00000000, 0x00000000), - E(0xFFFFFFFF, 0xFFFFF000, 0x00000000, 0x00000000), - E(0xFFFFFFFF, 0xFFFFF800, 0x00000000, 0x00000000), - E(0xFFFFFFFF, 0xFFFFFC00, 0x00000000, 0x00000000), - E(0xFFFFFFFF, 0xFFFFFE00, 0x00000000, 0x00000000), - E(0xFFFFFFFF, 0xFFFFFF00, 0x00000000, 0x00000000), - E(0xFFFFFFFF, 0xFFFFFF80, 0x00000000, 0x00000000), - E(0xFFFFFFFF, 0xFFFFFFC0, 0x00000000, 0x00000000), - E(0xFFFFFFFF, 0xFFFFFFE0, 0x00000000, 0x00000000), - E(0xFFFFFFFF, 0xFFFFFFF0, 0x00000000, 0x00000000), - E(0xFFFFFFFF, 0xFFFFFFF8, 0x00000000, 0x00000000), - E(0xFFFFFFFF, 0xFFFFFFFC, 0x00000000, 0x00000000), - E(0xFFFFFFFF, 0xFFFFFFFE, 0x00000000, 0x00000000), - E(0xFFFFFFFF, 0xFFFFFFFF, 0x00000000, 0x00000000), - E(0xFFFFFFFF, 0xFFFFFFFF, 0x80000000, 0x00000000), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xC0000000, 0x00000000), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xE0000000, 0x00000000), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xF0000000, 0x00000000), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xF8000000, 0x00000000), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFC000000, 0x00000000), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFE000000, 0x00000000), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFF000000, 0x00000000), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFF800000, 0x00000000), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFC00000, 0x00000000), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFE00000, 0x00000000), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFF00000, 0x00000000), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFF80000, 0x00000000), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFC0000, 0x00000000), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFE0000, 0x00000000), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFF0000, 0x00000000), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFF8000, 0x00000000), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFC000, 0x00000000), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFE000, 0x00000000), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFF000, 0x00000000), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFF800, 0x00000000), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFC00, 0x00000000), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFE00, 0x00000000), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFF00, 0x00000000), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFF80, 0x00000000), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFC0, 0x00000000), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFE0, 0x00000000), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFF0, 0x00000000), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFF8, 0x00000000), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFC, 0x00000000), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFE, 0x00000000), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0x00000000), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0x80000000), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xC0000000), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xE0000000), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xF0000000), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xF8000000), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFC000000), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFE000000), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFF000000), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFF800000), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFC00000), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFE00000), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFF00000), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFF80000), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFC0000), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFE0000), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFF0000), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFF8000), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFC000), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFE000), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFF000), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFF800), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFC00), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFE00), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFF00), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFF80), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFC0), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFE0), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFF0), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFF8), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFC), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFE), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF), + PREFIXES_MAP }; EXPORT_SYMBOL_GPL(ip_set_hostmask_map); -- cgit v1.2.3 From 4d86d38186271438ef002c5ae6e04836f01bf8bf Mon Sep 17 00:00:00 2001 From: Simon Horman Date: Wed, 4 Oct 2017 09:54:27 +0200 Subject: ravb: RX checksum offload Add support for RX checksum offload. This is enabled by default and may be disabled and re-enabled using ethtool: # ethtool -K eth0 rx off # ethtool -K eth0 rx on The RAVB provides a simple checksumming scheme which appears to be completely compatible with CHECKSUM_COMPLETE: sum of all packet data after the L2 header is appended to packet data; this may be trivially read by the driver and used to update the skb accordingly. In terms of performance throughput is close to gigabit line-rate both with and without RX checksum offload enabled. Perf output, however, appears to indicate that significantly less time is spent in do_csum(). This is as expected. Test results with RX checksum offload enabled: # /usr/bin/perf_3.16 record -o /run/perf.data -a netperf -t TCP_MAERTS -H 10.4.3.162 MIGRATED TCP MAERTS TEST from 0.0.0.0 (0.0.0.0) port 0 AF_INET to 10.4.3.162 () port 0 AF_INET : demo enable_enobufs failed: getprotobyname Recv Send Send Socket Socket Message Elapsed Size Size Size Time Throughput bytes bytes bytes secs. 10^6bits/sec 87380 16384 16384 10.00 937.54 Summary of output of perf report: 18.28% ksoftirqd/0 [kernel.kallsyms] [k] _raw_spin_unlock_irqrestore 10.34% ksoftirqd/0 [kernel.kallsyms] [k] __pi_memcpy 9.83% ksoftirqd/0 [kernel.kallsyms] [k] ravb_poll 7.89% ksoftirqd/0 [kernel.kallsyms] [k] skb_put 4.01% ksoftirqd/0 [kernel.kallsyms] [k] dev_gro_receive 3.37% netperf [kernel.kallsyms] [k] __arch_copy_to_user 3.17% swapper [kernel.kallsyms] [k] arch_cpu_idle 2.55% swapper [kernel.kallsyms] [k] tick_nohz_idle_enter 2.04% ksoftirqd/0 [kernel.kallsyms] [k] __pi___inval_dcache_area 2.03% swapper [kernel.kallsyms] [k] _raw_spin_unlock_irq 1.96% ksoftirqd/0 [kernel.kallsyms] [k] __netdev_alloc_skb 1.59% ksoftirqd/0 [kernel.kallsyms] [k] __slab_alloc.isra.83 Test results without RX checksum offload enabled: # /usr/bin/perf_3.16 record -o /run/perf.data -a netperf -t TCP_MAERTS -H 10.4.3.162 MIGRATED TCP MAERTS TEST from 0.0.0.0 (0.0.0.0) port 0 AF_INET to 10.4.3.162 () port 0 AF_INET : demo enable_enobufs failed: getprotobyname Recv Send Send Socket Socket Message Elapsed Size Size Size Time Throughput bytes bytes bytes secs. 10^6bits/sec 87380 16384 16384 10.00 940.20 Summary of output of perf report: 17.10% ksoftirqd/0 [kernel.kallsyms] [k] _raw_spin_unlock_irqrestore 10.99% ksoftirqd/0 [kernel.kallsyms] [k] __pi_memcpy 8.87% ksoftirqd/0 [kernel.kallsyms] [k] ravb_poll 8.16% ksoftirqd/0 [kernel.kallsyms] [k] skb_put 7.42% ksoftirqd/0 [kernel.kallsyms] [k] do_csum 3.91% ksoftirqd/0 [kernel.kallsyms] [k] dev_gro_receive 2.31% swapper [kernel.kallsyms] [k] arch_cpu_idle 2.16% ksoftirqd/0 [kernel.kallsyms] [k] __pi___inval_dcache_area 2.14% ksoftirqd/0 [kernel.kallsyms] [k] __netdev_alloc_skb 1.93% netperf [kernel.kallsyms] [k] __arch_copy_to_user 1.79% swapper [kernel.kallsyms] [k] tick_nohz_idle_enter 1.63% ksoftirqd/0 [kernel.kallsyms] [k] __slab_alloc.isra.83 Above results collected on an R-Car Gen 3 Salvator-X/r8a7796 ES1.0. Also tested on a R-Car Gen 3 Salvator-X/r8a7795 ES1.0. By inspection this also appears to be compatible with the ravb found on R-Car Gen 2 SoCs, however, this patch is currently untested on such hardware. Signed-off-by: Simon Horman Signed-off-by: David S. Miller --- drivers/net/ethernet/renesas/ravb_main.c | 55 +++++++++++++++++++++++++++++++- 1 file changed, 54 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/renesas/ravb_main.c b/drivers/net/ethernet/renesas/ravb_main.c index fdf30bfa403b..a8822a756e08 100644 --- a/drivers/net/ethernet/renesas/ravb_main.c +++ b/drivers/net/ethernet/renesas/ravb_main.c @@ -403,8 +403,9 @@ static void ravb_emac_init(struct net_device *ndev) /* Receive frame limit set register */ ravb_write(ndev, ndev->mtu + ETH_HLEN + VLAN_HLEN + ETH_FCS_LEN, RFLR); - /* PAUSE prohibition */ + /* EMAC Mode: PAUSE prohibition; Duplex; RX Checksum; TX; RX */ ravb_write(ndev, ECMR_ZPF | (priv->duplex ? ECMR_DM : 0) | + (ndev->features & NETIF_F_RXCSUM ? ECMR_RCSC : 0) | ECMR_TE | ECMR_RE, ECMR); ravb_set_rate(ndev); @@ -520,6 +521,19 @@ static void ravb_get_tx_tstamp(struct net_device *ndev) } } +static void ravb_rx_csum(struct sk_buff *skb) +{ + u8 *hw_csum; + + /* The hardware checksum is 2 bytes appended to packet data */ + if (unlikely(skb->len < 2)) + return; + hw_csum = skb_tail_pointer(skb) - 2; + skb->csum = csum_unfold((__force __sum16)get_unaligned_le16(hw_csum)); + skb->ip_summed = CHECKSUM_COMPLETE; + skb_trim(skb, skb->len - 2); +} + /* Packet receive function for Ethernet AVB */ static bool ravb_rx(struct net_device *ndev, int *quota, int q) { @@ -587,8 +601,11 @@ static bool ravb_rx(struct net_device *ndev, int *quota, int q) ts.tv_nsec = le32_to_cpu(desc->ts_n); shhwtstamps->hwtstamp = timespec64_to_ktime(ts); } + skb_put(skb, pkt_len); skb->protocol = eth_type_trans(skb, ndev); + if (ndev->features & NETIF_F_RXCSUM) + ravb_rx_csum(skb); napi_gro_receive(&priv->napi[q], skb); stats->rx_packets++; stats->rx_bytes += pkt_len; @@ -1842,6 +1859,38 @@ static int ravb_do_ioctl(struct net_device *ndev, struct ifreq *req, int cmd) return phy_mii_ioctl(phydev, req, cmd); } +static void ravb_set_rx_csum(struct net_device *ndev, bool enable) +{ + struct ravb_private *priv = netdev_priv(ndev); + unsigned long flags; + + spin_lock_irqsave(&priv->lock, flags); + + /* Disable TX and RX */ + ravb_rcv_snd_disable(ndev); + + /* Modify RX Checksum setting */ + ravb_modify(ndev, ECMR, ECMR_RCSC, enable ? ECMR_RCSC : 0); + + /* Enable TX and RX */ + ravb_rcv_snd_enable(ndev); + + spin_unlock_irqrestore(&priv->lock, flags); +} + +static int ravb_set_features(struct net_device *ndev, + netdev_features_t features) +{ + netdev_features_t changed = ndev->features ^ features; + + if (changed & NETIF_F_RXCSUM) + ravb_set_rx_csum(ndev, features & NETIF_F_RXCSUM); + + ndev->features = features; + + return 0; +} + static const struct net_device_ops ravb_netdev_ops = { .ndo_open = ravb_open, .ndo_stop = ravb_close, @@ -1853,6 +1902,7 @@ static const struct net_device_ops ravb_netdev_ops = { .ndo_do_ioctl = ravb_do_ioctl, .ndo_validate_addr = eth_validate_addr, .ndo_set_mac_address = eth_mac_addr, + .ndo_set_features = ravb_set_features, }; /* MDIO bus init function */ @@ -2004,6 +2054,9 @@ static int ravb_probe(struct platform_device *pdev) if (!ndev) return -ENOMEM; + ndev->features = NETIF_F_RXCSUM; + ndev->hw_features = NETIF_F_RXCSUM; + pm_runtime_enable(&pdev->dev); pm_runtime_get_sync(&pdev->dev); -- cgit v1.2.3 From 20e883204f0268b423799781e5efed786f8a11ba Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Wed, 4 Oct 2017 13:56:50 +0200 Subject: net: core: fix kerneldoc comment net/core/dev.c:1306: warning: No description found for parameter 'name' net/core/dev.c:1306: warning: Excess function parameter 'alias' description in 'dev_get_alias' Fixes: 6c5570016b97 ("net: core: decouple ifalias get/set from rtnl lock") Reported-by: kbuild test robot Signed-off-by: Florian Westphal Signed-off-by: David S. Miller --- net/core/dev.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/core/dev.c b/net/core/dev.c index 1770097cfd86..454f05441546 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1295,7 +1295,7 @@ int dev_set_alias(struct net_device *dev, const char *alias, size_t len) /** * dev_get_alias - get ifalias of a device * @dev: device - * @alias: buffer to store name of ifalias + * @name: buffer to store name of ifalias * @len: size of buffer * * get ifalias for a device. Caller must make sure dev cannot go -- cgit v1.2.3 From ebf6b13142f947be576b40edce214788dfe1d3e3 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Wed, 4 Oct 2017 14:20:37 +0100 Subject: cxgb4vf: make a couple of functions static The functions t4vf_link_down_rc_str and t4vf_handle_get_port_info are local to the source and do not need to be in global scope, so make them static. Cleans up sparse warnings: symbol 't4vf_link_down_rc_str' was not declared. Should it be static? symbol 't4vf_handle_get_port_info' was not declared. Should it be static? Signed-off-by: Colin Ian King Signed-off-by: David S. Miller --- drivers/net/ethernet/chelsio/cxgb4vf/t4vf_hw.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/chelsio/cxgb4vf/t4vf_hw.c b/drivers/net/ethernet/chelsio/cxgb4vf/t4vf_hw.c index a8d94963b4d0..67aec59a14e6 100644 --- a/drivers/net/ethernet/chelsio/cxgb4vf/t4vf_hw.c +++ b/drivers/net/ethernet/chelsio/cxgb4vf/t4vf_hw.c @@ -1812,7 +1812,7 @@ int t4vf_eth_eq_free(struct adapter *adapter, unsigned int eqid) * * Returns a string representation of the Link Down Reason Code. */ -const char *t4vf_link_down_rc_str(unsigned char link_down_rc) +static const char *t4vf_link_down_rc_str(unsigned char link_down_rc) { static const char * const reason[] = { "Link Down", @@ -1838,8 +1838,8 @@ const char *t4vf_link_down_rc_str(unsigned char link_down_rc) * * Processes a GET_PORT_INFO FW reply message. */ -void t4vf_handle_get_port_info(struct port_info *pi, - const struct fw_port_cmd *cmd) +static void t4vf_handle_get_port_info(struct port_info *pi, + const struct fw_port_cmd *cmd) { int action = FW_PORT_CMD_ACTION_G(be32_to_cpu(cmd->action_to_len16)); struct adapter *adapter = pi->adapter; -- cgit v1.2.3 From e774d96b7d2c3489bfb5bbdc2b65ed41cd68d3d5 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Wed, 4 Oct 2017 15:55:29 +0200 Subject: rtnetlink: remove slave_validate callback no users in the tree. Signed-off-by: Florian Westphal Acked-by: Jiri Pirko Signed-off-by: David S. Miller --- include/net/rtnetlink.h | 3 --- net/core/rtnetlink.c | 6 ------ 2 files changed, 9 deletions(-) diff --git a/include/net/rtnetlink.h b/include/net/rtnetlink.h index 21837ca68ecc..6520993ff449 100644 --- a/include/net/rtnetlink.h +++ b/include/net/rtnetlink.h @@ -93,9 +93,6 @@ struct rtnl_link_ops { int slave_maxtype; const struct nla_policy *slave_policy; - int (*slave_validate)(struct nlattr *tb[], - struct nlattr *data[], - struct netlink_ext_ack *extack); int (*slave_changelink)(struct net_device *dev, struct net_device *slave_dev, struct nlattr *tb[], diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 3961f87cdc76..b63c5759641f 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -2631,12 +2631,6 @@ replay: return err; slave_data = slave_attr; } - if (m_ops->slave_validate) { - err = m_ops->slave_validate(tb, slave_data, - extack); - if (err < 0) - return err; - } } if (dev) { -- cgit v1.2.3 From 5c45121dc39026ab2139910e57cf933fd57d30f2 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Wed, 4 Oct 2017 15:58:49 +0200 Subject: rtnetlink: remove __rtnl_af_unregister switch the only caller to rtnl_af_unregister. Signed-off-by: Florian Westphal Signed-off-by: David S. Miller --- include/net/rtnetlink.h | 2 -- net/core/rtnetlink.c | 14 +------------- net/ipv6/addrconf.c | 4 ++-- 3 files changed, 3 insertions(+), 17 deletions(-) diff --git a/include/net/rtnetlink.h b/include/net/rtnetlink.h index 6520993ff449..e3ca8e2e3103 100644 --- a/include/net/rtnetlink.h +++ b/include/net/rtnetlink.h @@ -151,8 +151,6 @@ struct rtnl_af_ops { size_t (*get_stats_af_size)(const struct net_device *dev); }; -void __rtnl_af_unregister(struct rtnl_af_ops *ops); - void rtnl_af_register(struct rtnl_af_ops *ops); void rtnl_af_unregister(struct rtnl_af_ops *ops); diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index b63c5759641f..3fb1ca33cba4 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -475,18 +475,6 @@ void rtnl_af_register(struct rtnl_af_ops *ops) } EXPORT_SYMBOL_GPL(rtnl_af_register); -/** - * __rtnl_af_unregister - Unregister rtnl_af_ops from rtnetlink. - * @ops: struct rtnl_af_ops * to unregister - * - * The caller must hold the rtnl_mutex. - */ -void __rtnl_af_unregister(struct rtnl_af_ops *ops) -{ - list_del(&ops->list); -} -EXPORT_SYMBOL_GPL(__rtnl_af_unregister); - /** * rtnl_af_unregister - Unregister rtnl_af_ops from rtnetlink. * @ops: struct rtnl_af_ops * to unregister @@ -494,7 +482,7 @@ EXPORT_SYMBOL_GPL(__rtnl_af_unregister); void rtnl_af_unregister(struct rtnl_af_ops *ops) { rtnl_lock(); - __rtnl_af_unregister(ops); + list_del(&ops->list); rtnl_unlock(); } EXPORT_SYMBOL_GPL(rtnl_af_unregister); diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index f553f72d0bee..837418ff2d4b 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -6618,9 +6618,9 @@ void addrconf_cleanup(void) unregister_pernet_subsys(&addrconf_ops); ipv6_addr_label_cleanup(); - rtnl_lock(); + rtnl_af_unregister(&inet6_ops); - __rtnl_af_unregister(&inet6_ops); + rtnl_lock(); /* clean dev list */ for_each_netdev(&init_net, dev) { -- cgit v1.2.3 From e9b871ee098dfb91780e13bfc6e91e82c7b4ad73 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Wed, 4 Oct 2017 16:22:59 +0200 Subject: selftests: rtnetlink: try concurrent change of ifalias to make sure this is serialized correctly. Signed-off-by: Florian Westphal Signed-off-by: David S. Miller --- tools/testing/selftests/net/rtnetlink.sh | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/tools/testing/selftests/net/rtnetlink.sh b/tools/testing/selftests/net/rtnetlink.sh index 62c87da92770..e8c86c416ed0 100755 --- a/tools/testing/selftests/net/rtnetlink.sh +++ b/tools/testing/selftests/net/rtnetlink.sh @@ -278,6 +278,12 @@ kci_test_ifalias() ip link show "$devdummy" | grep -q "alias $namewant" check_fail $? + for i in $(seq 1 100); do + uuidgen > "$syspathname" & + done + + wait + # re-add the alias -- kernel should free mem when dummy dev is removed ip link set dev "$devdummy" alias "$namewant" check_err $? -- cgit v1.2.3 From c818fa9e288be5be7e360c33cf4f5e30f9fa206e Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 4 Oct 2017 10:48:35 -0700 Subject: net: cache skb_shinfo() in skb_try_coalesce() Compiler does not really know that skb_shinfo(to|from) are constants in skb_try_coalesce(), lets cache their values to shrink code. We might even take care of skb_zcopy() calls later. $ size net/core/skbuff.o.before net/core/skbuff.o text data bss dec hex filename 40727 1298 0 42025 a429 net/core/skbuff.o.before 40631 1298 0 41929 a3c9 net/core/skbuff.o Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/core/skbuff.c | 29 ++++++++++++++++------------- 1 file changed, 16 insertions(+), 13 deletions(-) diff --git a/net/core/skbuff.c b/net/core/skbuff.c index d98c2e3ce2bf..822a90e56aea 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -4767,6 +4767,7 @@ EXPORT_SYMBOL(kfree_skb_partial); bool skb_try_coalesce(struct sk_buff *to, struct sk_buff *from, bool *fragstolen, int *delta_truesize) { + struct skb_shared_info *to_shinfo, *from_shinfo; int i, delta, len = from->len; *fragstolen = false; @@ -4781,7 +4782,9 @@ bool skb_try_coalesce(struct sk_buff *to, struct sk_buff *from, return true; } - if (skb_has_frag_list(to) || skb_has_frag_list(from)) + to_shinfo = skb_shinfo(to); + from_shinfo = skb_shinfo(from); + if (to_shinfo->frag_list || from_shinfo->frag_list) return false; if (skb_zcopy(to) || skb_zcopy(from)) return false; @@ -4790,8 +4793,8 @@ bool skb_try_coalesce(struct sk_buff *to, struct sk_buff *from, struct page *page; unsigned int offset; - if (skb_shinfo(to)->nr_frags + - skb_shinfo(from)->nr_frags >= MAX_SKB_FRAGS) + if (to_shinfo->nr_frags + + from_shinfo->nr_frags >= MAX_SKB_FRAGS) return false; if (skb_head_is_locked(from)) @@ -4802,12 +4805,12 @@ bool skb_try_coalesce(struct sk_buff *to, struct sk_buff *from, page = virt_to_head_page(from->head); offset = from->data - (unsigned char *)page_address(page); - skb_fill_page_desc(to, skb_shinfo(to)->nr_frags, + skb_fill_page_desc(to, to_shinfo->nr_frags, page, offset, skb_headlen(from)); *fragstolen = true; } else { - if (skb_shinfo(to)->nr_frags + - skb_shinfo(from)->nr_frags > MAX_SKB_FRAGS) + if (to_shinfo->nr_frags + + from_shinfo->nr_frags > MAX_SKB_FRAGS) return false; delta = from->truesize - SKB_TRUESIZE(skb_end_offset(from)); @@ -4815,19 +4818,19 @@ bool skb_try_coalesce(struct sk_buff *to, struct sk_buff *from, WARN_ON_ONCE(delta < len); - memcpy(skb_shinfo(to)->frags + skb_shinfo(to)->nr_frags, - skb_shinfo(from)->frags, - skb_shinfo(from)->nr_frags * sizeof(skb_frag_t)); - skb_shinfo(to)->nr_frags += skb_shinfo(from)->nr_frags; + memcpy(to_shinfo->frags + to_shinfo->nr_frags, + from_shinfo->frags, + from_shinfo->nr_frags * sizeof(skb_frag_t)); + to_shinfo->nr_frags += from_shinfo->nr_frags; if (!skb_cloned(from)) - skb_shinfo(from)->nr_frags = 0; + from_shinfo->nr_frags = 0; /* if the skb is not cloned this does nothing * since we set nr_frags to 0. */ - for (i = 0; i < skb_shinfo(from)->nr_frags; i++) - skb_frag_ref(from, i); + for (i = 0; i < from_shinfo->nr_frags; i++) + __skb_frag_ref(&from_shinfo->frags[i]); to->truesize += delta; to->len += len; -- cgit v1.2.3 From 324bda9e6c5add86ba2e1066476481c48132aca0 Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Mon, 2 Oct 2017 22:50:21 -0700 Subject: bpf: multi program support for cgroup+bpf introduce BPF_F_ALLOW_MULTI flag that can be used to attach multiple bpf programs to a cgroup. The difference between three possible flags for BPF_PROG_ATTACH command: - NONE(default): No further bpf programs allowed in the subtree. - BPF_F_ALLOW_OVERRIDE: If a sub-cgroup installs some bpf program, the program in this cgroup yields to sub-cgroup program. - BPF_F_ALLOW_MULTI: If a sub-cgroup installs some bpf program, that cgroup program gets run in addition to the program in this cgroup. NONE and BPF_F_ALLOW_OVERRIDE existed before. This patch doesn't change their behavior. It only clarifies the semantics in relation to new flag. Only one program is allowed to be attached to a cgroup with NONE or BPF_F_ALLOW_OVERRIDE flag. Multiple programs are allowed to be attached to a cgroup with BPF_F_ALLOW_MULTI flag. They are executed in FIFO order (those that were attached first, run first) The programs of sub-cgroup are executed first, then programs of this cgroup and then programs of parent cgroup. All eligible programs are executed regardless of return code from earlier programs. To allow efficient execution of multiple programs attached to a cgroup and to avoid penalizing cgroups without any programs attached introduce 'struct bpf_prog_array' which is RCU protected array of pointers to bpf programs. Signed-off-by: Alexei Starovoitov Acked-by: Daniel Borkmann Acked-by: Martin KaFai Lau for cgroup bits Acked-by: Tejun Heo Signed-off-by: David S. Miller --- include/linux/bpf-cgroup.h | 46 +++-- include/linux/bpf.h | 32 ++++ include/linux/filter.h | 2 +- include/uapi/linux/bpf.h | 42 +++- kernel/bpf/cgroup.c | 467 ++++++++++++++++++++++++++++++++------------- kernel/bpf/core.c | 31 +++ kernel/bpf/syscall.c | 37 ++-- kernel/cgroup/cgroup.c | 28 ++- 8 files changed, 516 insertions(+), 169 deletions(-) diff --git a/include/linux/bpf-cgroup.h b/include/linux/bpf-cgroup.h index d41d40ac3efd..102e56fbb6de 100644 --- a/include/linux/bpf-cgroup.h +++ b/include/linux/bpf-cgroup.h @@ -14,27 +14,42 @@ struct bpf_sock_ops_kern; extern struct static_key_false cgroup_bpf_enabled_key; #define cgroup_bpf_enabled static_branch_unlikely(&cgroup_bpf_enabled_key) +struct bpf_prog_list { + struct list_head node; + struct bpf_prog *prog; +}; + +struct bpf_prog_array; + struct cgroup_bpf { - /* - * Store two sets of bpf_prog pointers, one for programs that are - * pinned directly to this cgroup, and one for those that are effective - * when this cgroup is accessed. + /* array of effective progs in this cgroup */ + struct bpf_prog_array __rcu *effective[MAX_BPF_ATTACH_TYPE]; + + /* attached progs to this cgroup and attach flags + * when flags == 0 or BPF_F_ALLOW_OVERRIDE the progs list will + * have either zero or one element + * when BPF_F_ALLOW_MULTI the list can have up to BPF_CGROUP_MAX_PROGS */ - struct bpf_prog *prog[MAX_BPF_ATTACH_TYPE]; - struct bpf_prog __rcu *effective[MAX_BPF_ATTACH_TYPE]; - bool disallow_override[MAX_BPF_ATTACH_TYPE]; + struct list_head progs[MAX_BPF_ATTACH_TYPE]; + u32 flags[MAX_BPF_ATTACH_TYPE]; + + /* temp storage for effective prog array used by prog_attach/detach */ + struct bpf_prog_array __rcu *inactive; }; void cgroup_bpf_put(struct cgroup *cgrp); -void cgroup_bpf_inherit(struct cgroup *cgrp, struct cgroup *parent); +int cgroup_bpf_inherit(struct cgroup *cgrp); -int __cgroup_bpf_update(struct cgroup *cgrp, struct cgroup *parent, - struct bpf_prog *prog, enum bpf_attach_type type, - bool overridable); +int __cgroup_bpf_attach(struct cgroup *cgrp, struct bpf_prog *prog, + enum bpf_attach_type type, u32 flags); +int __cgroup_bpf_detach(struct cgroup *cgrp, struct bpf_prog *prog, + enum bpf_attach_type type, u32 flags); -/* Wrapper for __cgroup_bpf_update() protected by cgroup_mutex */ -int cgroup_bpf_update(struct cgroup *cgrp, struct bpf_prog *prog, - enum bpf_attach_type type, bool overridable); +/* Wrapper for __cgroup_bpf_*() protected by cgroup_mutex */ +int cgroup_bpf_attach(struct cgroup *cgrp, struct bpf_prog *prog, + enum bpf_attach_type type, u32 flags); +int cgroup_bpf_detach(struct cgroup *cgrp, struct bpf_prog *prog, + enum bpf_attach_type type, u32 flags); int __cgroup_bpf_run_filter_skb(struct sock *sk, struct sk_buff *skb, @@ -96,8 +111,7 @@ int __cgroup_bpf_run_filter_sock_ops(struct sock *sk, struct cgroup_bpf {}; static inline void cgroup_bpf_put(struct cgroup *cgrp) {} -static inline void cgroup_bpf_inherit(struct cgroup *cgrp, - struct cgroup *parent) {} +static inline int cgroup_bpf_inherit(struct cgroup *cgrp) { return 0; } #define BPF_CGROUP_RUN_PROG_INET_INGRESS(sk,skb) ({ 0; }) #define BPF_CGROUP_RUN_PROG_INET_EGRESS(sk,skb) ({ 0; }) diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 252f4bc9eb25..a6964b75f070 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -241,6 +241,38 @@ int bpf_prog_test_run_xdp(struct bpf_prog *prog, const union bpf_attr *kattr, int bpf_prog_test_run_skb(struct bpf_prog *prog, const union bpf_attr *kattr, union bpf_attr __user *uattr); +/* an array of programs to be executed under rcu_lock. + * + * Typical usage: + * ret = BPF_PROG_RUN_ARRAY(&bpf_prog_array, ctx, BPF_PROG_RUN); + * + * the structure returned by bpf_prog_array_alloc() should be populated + * with program pointers and the last pointer must be NULL. + * The user has to keep refcnt on the program and make sure the program + * is removed from the array before bpf_prog_put(). + * The 'struct bpf_prog_array *' should only be replaced with xchg() + * since other cpus are walking the array of pointers in parallel. + */ +struct bpf_prog_array { + struct rcu_head rcu; + struct bpf_prog *progs[0]; +}; + +struct bpf_prog_array __rcu *bpf_prog_array_alloc(u32 prog_cnt, gfp_t flags); +void bpf_prog_array_free(struct bpf_prog_array __rcu *progs); + +#define BPF_PROG_RUN_ARRAY(array, ctx, func) \ + ({ \ + struct bpf_prog **_prog; \ + u32 _ret = 1; \ + rcu_read_lock(); \ + _prog = rcu_dereference(array)->progs; \ + for (; *_prog; _prog++) \ + _ret &= func(*_prog, ctx); \ + rcu_read_unlock(); \ + _ret; \ + }) + #ifdef CONFIG_BPF_SYSCALL DECLARE_PER_CPU(int, bpf_prog_active); diff --git a/include/linux/filter.h b/include/linux/filter.h index 911d454af107..2d2db394b0ca 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -481,7 +481,7 @@ struct sk_filter { struct bpf_prog *prog; }; -#define BPF_PROG_RUN(filter, ctx) (*filter->bpf_func)(ctx, filter->insnsi) +#define BPF_PROG_RUN(filter, ctx) (*(filter)->bpf_func)(ctx, (filter)->insnsi) #define BPF_SKB_CB_LEN QDISC_CB_PRIV_LEN diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 6d2137b4cf38..762f74bc6c47 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -143,11 +143,47 @@ enum bpf_attach_type { #define MAX_BPF_ATTACH_TYPE __MAX_BPF_ATTACH_TYPE -/* If BPF_F_ALLOW_OVERRIDE flag is used in BPF_PROG_ATTACH command - * to the given target_fd cgroup the descendent cgroup will be able to - * override effective bpf program that was inherited from this cgroup +/* cgroup-bpf attach flags used in BPF_PROG_ATTACH command + * + * NONE(default): No further bpf programs allowed in the subtree. + * + * BPF_F_ALLOW_OVERRIDE: If a sub-cgroup installs some bpf program, + * the program in this cgroup yields to sub-cgroup program. + * + * BPF_F_ALLOW_MULTI: If a sub-cgroup installs some bpf program, + * that cgroup program gets run in addition to the program in this cgroup. + * + * Only one program is allowed to be attached to a cgroup with + * NONE or BPF_F_ALLOW_OVERRIDE flag. + * Attaching another program on top of NONE or BPF_F_ALLOW_OVERRIDE will + * release old program and attach the new one. Attach flags has to match. + * + * Multiple programs are allowed to be attached to a cgroup with + * BPF_F_ALLOW_MULTI flag. They are executed in FIFO order + * (those that were attached first, run first) + * The programs of sub-cgroup are executed first, then programs of + * this cgroup and then programs of parent cgroup. + * When children program makes decision (like picking TCP CA or sock bind) + * parent program has a chance to override it. + * + * A cgroup with MULTI or OVERRIDE flag allows any attach flags in sub-cgroups. + * A cgroup with NONE doesn't allow any programs in sub-cgroups. + * Ex1: + * cgrp1 (MULTI progs A, B) -> + * cgrp2 (OVERRIDE prog C) -> + * cgrp3 (MULTI prog D) -> + * cgrp4 (OVERRIDE prog E) -> + * cgrp5 (NONE prog F) + * the event in cgrp5 triggers execution of F,D,A,B in that order. + * if prog F is detached, the execution is E,D,A,B + * if prog F and D are detached, the execution is E,A,B + * if prog F, E and D are detached, the execution is C,A,B + * + * All eligible programs are executed regardless of return code from + * earlier programs. */ #define BPF_F_ALLOW_OVERRIDE (1U << 0) +#define BPF_F_ALLOW_MULTI (1U << 1) /* If BPF_F_STRICT_ALIGNMENT is used in BPF_PROG_LOAD command, the * verifier will perform strict alignment checking as if the kernel diff --git a/kernel/bpf/cgroup.c b/kernel/bpf/cgroup.c index 546113430049..6b7500bbdb53 100644 --- a/kernel/bpf/cgroup.c +++ b/kernel/bpf/cgroup.c @@ -27,129 +27,361 @@ void cgroup_bpf_put(struct cgroup *cgrp) { unsigned int type; - for (type = 0; type < ARRAY_SIZE(cgrp->bpf.prog); type++) { - struct bpf_prog *prog = cgrp->bpf.prog[type]; - - if (prog) { - bpf_prog_put(prog); + for (type = 0; type < ARRAY_SIZE(cgrp->bpf.progs); type++) { + struct list_head *progs = &cgrp->bpf.progs[type]; + struct bpf_prog_list *pl, *tmp; + + list_for_each_entry_safe(pl, tmp, progs, node) { + list_del(&pl->node); + bpf_prog_put(pl->prog); + kfree(pl); static_branch_dec(&cgroup_bpf_enabled_key); } + bpf_prog_array_free(cgrp->bpf.effective[type]); + } +} + +/* count number of elements in the list. + * it's slow but the list cannot be long + */ +static u32 prog_list_length(struct list_head *head) +{ + struct bpf_prog_list *pl; + u32 cnt = 0; + + list_for_each_entry(pl, head, node) { + if (!pl->prog) + continue; + cnt++; } + return cnt; +} + +/* if parent has non-overridable prog attached, + * disallow attaching new programs to the descendent cgroup. + * if parent has overridable or multi-prog, allow attaching + */ +static bool hierarchy_allows_attach(struct cgroup *cgrp, + enum bpf_attach_type type, + u32 new_flags) +{ + struct cgroup *p; + + p = cgroup_parent(cgrp); + if (!p) + return true; + do { + u32 flags = p->bpf.flags[type]; + u32 cnt; + + if (flags & BPF_F_ALLOW_MULTI) + return true; + cnt = prog_list_length(&p->bpf.progs[type]); + WARN_ON_ONCE(cnt > 1); + if (cnt == 1) + return !!(flags & BPF_F_ALLOW_OVERRIDE); + p = cgroup_parent(p); + } while (p); + return true; +} + +/* compute a chain of effective programs for a given cgroup: + * start from the list of programs in this cgroup and add + * all parent programs. + * Note that parent's F_ALLOW_OVERRIDE-type program is yielding + * to programs in this cgroup + */ +static int compute_effective_progs(struct cgroup *cgrp, + enum bpf_attach_type type, + struct bpf_prog_array __rcu **array) +{ + struct bpf_prog_array __rcu *progs; + struct bpf_prog_list *pl; + struct cgroup *p = cgrp; + int cnt = 0; + + /* count number of effective programs by walking parents */ + do { + if (cnt == 0 || (p->bpf.flags[type] & BPF_F_ALLOW_MULTI)) + cnt += prog_list_length(&p->bpf.progs[type]); + p = cgroup_parent(p); + } while (p); + + progs = bpf_prog_array_alloc(cnt, GFP_KERNEL); + if (!progs) + return -ENOMEM; + + /* populate the array with effective progs */ + cnt = 0; + p = cgrp; + do { + if (cnt == 0 || (p->bpf.flags[type] & BPF_F_ALLOW_MULTI)) + list_for_each_entry(pl, + &p->bpf.progs[type], node) { + if (!pl->prog) + continue; + rcu_dereference_protected(progs, 1)-> + progs[cnt++] = pl->prog; + } + p = cgroup_parent(p); + } while (p); + + *array = progs; + return 0; +} + +static void activate_effective_progs(struct cgroup *cgrp, + enum bpf_attach_type type, + struct bpf_prog_array __rcu *array) +{ + struct bpf_prog_array __rcu *old_array; + + old_array = xchg(&cgrp->bpf.effective[type], array); + /* free prog array after grace period, since __cgroup_bpf_run_*() + * might be still walking the array + */ + bpf_prog_array_free(old_array); } /** * cgroup_bpf_inherit() - inherit effective programs from parent * @cgrp: the cgroup to modify - * @parent: the parent to inherit from */ -void cgroup_bpf_inherit(struct cgroup *cgrp, struct cgroup *parent) +int cgroup_bpf_inherit(struct cgroup *cgrp) { - unsigned int type; +/* has to use marco instead of const int, since compiler thinks + * that array below is variable length + */ +#define NR ARRAY_SIZE(cgrp->bpf.effective) + struct bpf_prog_array __rcu *arrays[NR] = {}; + int i; - for (type = 0; type < ARRAY_SIZE(cgrp->bpf.effective); type++) { - struct bpf_prog *e; + for (i = 0; i < NR; i++) + INIT_LIST_HEAD(&cgrp->bpf.progs[i]); - e = rcu_dereference_protected(parent->bpf.effective[type], - lockdep_is_held(&cgroup_mutex)); - rcu_assign_pointer(cgrp->bpf.effective[type], e); - cgrp->bpf.disallow_override[type] = parent->bpf.disallow_override[type]; - } + for (i = 0; i < NR; i++) + if (compute_effective_progs(cgrp, i, &arrays[i])) + goto cleanup; + + for (i = 0; i < NR; i++) + activate_effective_progs(cgrp, i, arrays[i]); + + return 0; +cleanup: + for (i = 0; i < NR; i++) + bpf_prog_array_free(arrays[i]); + return -ENOMEM; } +#define BPF_CGROUP_MAX_PROGS 64 + /** - * __cgroup_bpf_update() - Update the pinned program of a cgroup, and + * __cgroup_bpf_attach() - Attach the program to a cgroup, and * propagate the change to descendants * @cgrp: The cgroup which descendants to traverse - * @parent: The parent of @cgrp, or %NULL if @cgrp is the root - * @prog: A new program to pin - * @type: Type of pinning operation (ingress/egress) - * - * Each cgroup has a set of two pointers for bpf programs; one for eBPF - * programs it owns, and which is effective for execution. - * - * If @prog is not %NULL, this function attaches a new program to the cgroup - * and releases the one that is currently attached, if any. @prog is then made - * the effective program of type @type in that cgroup. - * - * If @prog is %NULL, the currently attached program of type @type is released, - * and the effective program of the parent cgroup (if any) is inherited to - * @cgrp. - * - * Then, the descendants of @cgrp are walked and the effective program for - * each of them is set to the effective program of @cgrp unless the - * descendant has its own program attached, in which case the subbranch is - * skipped. This ensures that delegated subcgroups with own programs are left - * untouched. + * @prog: A program to attach + * @type: Type of attach operation * * Must be called with cgroup_mutex held. */ -int __cgroup_bpf_update(struct cgroup *cgrp, struct cgroup *parent, - struct bpf_prog *prog, enum bpf_attach_type type, - bool new_overridable) +int __cgroup_bpf_attach(struct cgroup *cgrp, struct bpf_prog *prog, + enum bpf_attach_type type, u32 flags) { - struct bpf_prog *old_prog, *effective = NULL; - struct cgroup_subsys_state *pos; - bool overridable = true; - - if (parent) { - overridable = !parent->bpf.disallow_override[type]; - effective = rcu_dereference_protected(parent->bpf.effective[type], - lockdep_is_held(&cgroup_mutex)); - } - - if (prog && effective && !overridable) - /* if parent has non-overridable prog attached, disallow - * attaching new programs to descendent cgroup - */ + struct list_head *progs = &cgrp->bpf.progs[type]; + struct bpf_prog *old_prog = NULL; + struct cgroup_subsys_state *css; + struct bpf_prog_list *pl; + bool pl_was_allocated; + u32 old_flags; + int err; + + if ((flags & BPF_F_ALLOW_OVERRIDE) && (flags & BPF_F_ALLOW_MULTI)) + /* invalid combination */ + return -EINVAL; + + if (!hierarchy_allows_attach(cgrp, type, flags)) return -EPERM; - if (prog && effective && overridable != new_overridable) - /* if parent has overridable prog attached, only - * allow overridable programs in descendent cgroup + if (!list_empty(progs) && cgrp->bpf.flags[type] != flags) + /* Disallow attaching non-overridable on top + * of existing overridable in this cgroup. + * Disallow attaching multi-prog if overridable or none */ return -EPERM; - old_prog = cgrp->bpf.prog[type]; - - if (prog) { - overridable = new_overridable; - effective = prog; - if (old_prog && - cgrp->bpf.disallow_override[type] == new_overridable) - /* disallow attaching non-overridable on top - * of existing overridable in this cgroup - * and vice versa - */ - return -EPERM; + if (prog_list_length(progs) >= BPF_CGROUP_MAX_PROGS) + return -E2BIG; + + if (flags & BPF_F_ALLOW_MULTI) { + list_for_each_entry(pl, progs, node) + if (pl->prog == prog) + /* disallow attaching the same prog twice */ + return -EINVAL; + + pl = kmalloc(sizeof(*pl), GFP_KERNEL); + if (!pl) + return -ENOMEM; + pl_was_allocated = true; + pl->prog = prog; + list_add_tail(&pl->node, progs); + } else { + if (list_empty(progs)) { + pl = kmalloc(sizeof(*pl), GFP_KERNEL); + if (!pl) + return -ENOMEM; + pl_was_allocated = true; + list_add_tail(&pl->node, progs); + } else { + pl = list_first_entry(progs, typeof(*pl), node); + old_prog = pl->prog; + pl_was_allocated = false; + } + pl->prog = prog; } - if (!prog && !old_prog) - /* report error when trying to detach and nothing is attached */ - return -ENOENT; + old_flags = cgrp->bpf.flags[type]; + cgrp->bpf.flags[type] = flags; - cgrp->bpf.prog[type] = prog; + /* allocate and recompute effective prog arrays */ + css_for_each_descendant_pre(css, &cgrp->self) { + struct cgroup *desc = container_of(css, struct cgroup, self); - css_for_each_descendant_pre(pos, &cgrp->self) { - struct cgroup *desc = container_of(pos, struct cgroup, self); - - /* skip the subtree if the descendant has its own program */ - if (desc->bpf.prog[type] && desc != cgrp) { - pos = css_rightmost_descendant(pos); - } else { - rcu_assign_pointer(desc->bpf.effective[type], - effective); - desc->bpf.disallow_override[type] = !overridable; - } + err = compute_effective_progs(desc, type, &desc->bpf.inactive); + if (err) + goto cleanup; } - if (prog) - static_branch_inc(&cgroup_bpf_enabled_key); + /* all allocations were successful. Activate all prog arrays */ + css_for_each_descendant_pre(css, &cgrp->self) { + struct cgroup *desc = container_of(css, struct cgroup, self); + activate_effective_progs(desc, type, desc->bpf.inactive); + desc->bpf.inactive = NULL; + } + + static_branch_inc(&cgroup_bpf_enabled_key); if (old_prog) { bpf_prog_put(old_prog); static_branch_dec(&cgroup_bpf_enabled_key); } return 0; + +cleanup: + /* oom while computing effective. Free all computed effective arrays + * since they were not activated + */ + css_for_each_descendant_pre(css, &cgrp->self) { + struct cgroup *desc = container_of(css, struct cgroup, self); + + bpf_prog_array_free(desc->bpf.inactive); + desc->bpf.inactive = NULL; + } + + /* and cleanup the prog list */ + pl->prog = old_prog; + if (pl_was_allocated) { + list_del(&pl->node); + kfree(pl); + } + return err; +} + +/** + * __cgroup_bpf_detach() - Detach the program from a cgroup, and + * propagate the change to descendants + * @cgrp: The cgroup which descendants to traverse + * @prog: A program to detach or NULL + * @type: Type of detach operation + * + * Must be called with cgroup_mutex held. + */ +int __cgroup_bpf_detach(struct cgroup *cgrp, struct bpf_prog *prog, + enum bpf_attach_type type, u32 unused_flags) +{ + struct list_head *progs = &cgrp->bpf.progs[type]; + u32 flags = cgrp->bpf.flags[type]; + struct bpf_prog *old_prog = NULL; + struct cgroup_subsys_state *css; + struct bpf_prog_list *pl; + int err; + + if (flags & BPF_F_ALLOW_MULTI) { + if (!prog) + /* to detach MULTI prog the user has to specify valid FD + * of the program to be detached + */ + return -EINVAL; + } else { + if (list_empty(progs)) + /* report error when trying to detach and nothing is attached */ + return -ENOENT; + } + + if (flags & BPF_F_ALLOW_MULTI) { + /* find the prog and detach it */ + list_for_each_entry(pl, progs, node) { + if (pl->prog != prog) + continue; + old_prog = prog; + /* mark it deleted, so it's ignored while + * recomputing effective + */ + pl->prog = NULL; + break; + } + if (!old_prog) + return -ENOENT; + } else { + /* to maintain backward compatibility NONE and OVERRIDE cgroups + * allow detaching with invalid FD (prog==NULL) + */ + pl = list_first_entry(progs, typeof(*pl), node); + old_prog = pl->prog; + pl->prog = NULL; + } + + /* allocate and recompute effective prog arrays */ + css_for_each_descendant_pre(css, &cgrp->self) { + struct cgroup *desc = container_of(css, struct cgroup, self); + + err = compute_effective_progs(desc, type, &desc->bpf.inactive); + if (err) + goto cleanup; + } + + /* all allocations were successful. Activate all prog arrays */ + css_for_each_descendant_pre(css, &cgrp->self) { + struct cgroup *desc = container_of(css, struct cgroup, self); + + activate_effective_progs(desc, type, desc->bpf.inactive); + desc->bpf.inactive = NULL; + } + + /* now can actually delete it from this cgroup list */ + list_del(&pl->node); + kfree(pl); + if (list_empty(progs)) + /* last program was detached, reset flags to zero */ + cgrp->bpf.flags[type] = 0; + + bpf_prog_put(old_prog); + static_branch_dec(&cgroup_bpf_enabled_key); + return 0; + +cleanup: + /* oom while computing effective. Free all computed effective arrays + * since they were not activated + */ + css_for_each_descendant_pre(css, &cgrp->self) { + struct cgroup *desc = container_of(css, struct cgroup, self); + + bpf_prog_array_free(desc->bpf.inactive); + desc->bpf.inactive = NULL; + } + + /* and restore back old_prog */ + pl->prog = old_prog; + return err; } /** @@ -171,36 +403,26 @@ int __cgroup_bpf_run_filter_skb(struct sock *sk, struct sk_buff *skb, enum bpf_attach_type type) { - struct bpf_prog *prog; + unsigned int offset = skb->data - skb_network_header(skb); + struct sock *save_sk; struct cgroup *cgrp; - int ret = 0; + int ret; if (!sk || !sk_fullsock(sk)) return 0; - if (sk->sk_family != AF_INET && - sk->sk_family != AF_INET6) + if (sk->sk_family != AF_INET && sk->sk_family != AF_INET6) return 0; cgrp = sock_cgroup_ptr(&sk->sk_cgrp_data); - - rcu_read_lock(); - - prog = rcu_dereference(cgrp->bpf.effective[type]); - if (prog) { - unsigned int offset = skb->data - skb_network_header(skb); - struct sock *save_sk = skb->sk; - - skb->sk = sk; - __skb_push(skb, offset); - ret = bpf_prog_run_save_cb(prog, skb) == 1 ? 0 : -EPERM; - __skb_pull(skb, offset); - skb->sk = save_sk; - } - - rcu_read_unlock(); - - return ret; + save_sk = skb->sk; + skb->sk = sk; + __skb_push(skb, offset); + ret = BPF_PROG_RUN_ARRAY(cgrp->bpf.effective[type], skb, + bpf_prog_run_save_cb); + __skb_pull(skb, offset); + skb->sk = save_sk; + return ret == 1 ? 0 : -EPERM; } EXPORT_SYMBOL(__cgroup_bpf_run_filter_skb); @@ -221,19 +443,10 @@ int __cgroup_bpf_run_filter_sk(struct sock *sk, enum bpf_attach_type type) { struct cgroup *cgrp = sock_cgroup_ptr(&sk->sk_cgrp_data); - struct bpf_prog *prog; - int ret = 0; - - - rcu_read_lock(); - - prog = rcu_dereference(cgrp->bpf.effective[type]); - if (prog) - ret = BPF_PROG_RUN(prog, sk) == 1 ? 0 : -EPERM; + int ret; - rcu_read_unlock(); - - return ret; + ret = BPF_PROG_RUN_ARRAY(cgrp->bpf.effective[type], sk, BPF_PROG_RUN); + return ret == 1 ? 0 : -EPERM; } EXPORT_SYMBOL(__cgroup_bpf_run_filter_sk); @@ -258,18 +471,10 @@ int __cgroup_bpf_run_filter_sock_ops(struct sock *sk, enum bpf_attach_type type) { struct cgroup *cgrp = sock_cgroup_ptr(&sk->sk_cgrp_data); - struct bpf_prog *prog; - int ret = 0; - - - rcu_read_lock(); - - prog = rcu_dereference(cgrp->bpf.effective[type]); - if (prog) - ret = BPF_PROG_RUN(prog, sock_ops) == 1 ? 0 : -EPERM; - - rcu_read_unlock(); + int ret; - return ret; + ret = BPF_PROG_RUN_ARRAY(cgrp->bpf.effective[type], sock_ops, + BPF_PROG_RUN); + return ret == 1 ? 0 : -EPERM; } EXPORT_SYMBOL(__cgroup_bpf_run_filter_sock_ops); diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c index 917cc04a0a94..6b49e1991ae7 100644 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c @@ -1381,6 +1381,37 @@ struct bpf_prog *bpf_prog_select_runtime(struct bpf_prog *fp, int *err) } EXPORT_SYMBOL_GPL(bpf_prog_select_runtime); +/* to avoid allocating empty bpf_prog_array for cgroups that + * don't have bpf program attached use one global 'empty_prog_array' + * It will not be modified the caller of bpf_prog_array_alloc() + * (since caller requested prog_cnt == 0) + * that pointer should be 'freed' by bpf_prog_array_free() + */ +static struct { + struct bpf_prog_array hdr; + struct bpf_prog *null_prog; +} empty_prog_array = { + .null_prog = NULL, +}; + +struct bpf_prog_array __rcu *bpf_prog_array_alloc(u32 prog_cnt, gfp_t flags) +{ + if (prog_cnt) + return kzalloc(sizeof(struct bpf_prog_array) + + sizeof(struct bpf_prog *) * (prog_cnt + 1), + flags); + + return &empty_prog_array.hdr; +} + +void bpf_prog_array_free(struct bpf_prog_array __rcu *progs) +{ + if (!progs || + progs == (struct bpf_prog_array __rcu *)&empty_prog_array.hdr) + return; + kfree_rcu(progs, rcu); +} + static void bpf_prog_free_deferred(struct work_struct *work) { struct bpf_prog_aux *aux; diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index b927da66f653..51bee695d32c 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -1168,6 +1168,9 @@ static int sockmap_get_from_fd(const union bpf_attr *attr, bool attach) return 0; } +#define BPF_F_ATTACH_MASK \ + (BPF_F_ALLOW_OVERRIDE | BPF_F_ALLOW_MULTI) + static int bpf_prog_attach(const union bpf_attr *attr) { enum bpf_prog_type ptype; @@ -1181,7 +1184,7 @@ static int bpf_prog_attach(const union bpf_attr *attr) if (CHECK_ATTR(BPF_PROG_ATTACH)) return -EINVAL; - if (attr->attach_flags & ~BPF_F_ALLOW_OVERRIDE) + if (attr->attach_flags & ~BPF_F_ATTACH_MASK) return -EINVAL; switch (attr->attach_type) { @@ -1212,8 +1215,8 @@ static int bpf_prog_attach(const union bpf_attr *attr) return PTR_ERR(cgrp); } - ret = cgroup_bpf_update(cgrp, prog, attr->attach_type, - attr->attach_flags & BPF_F_ALLOW_OVERRIDE); + ret = cgroup_bpf_attach(cgrp, prog, attr->attach_type, + attr->attach_flags); if (ret) bpf_prog_put(prog); cgroup_put(cgrp); @@ -1225,6 +1228,8 @@ static int bpf_prog_attach(const union bpf_attr *attr) static int bpf_prog_detach(const union bpf_attr *attr) { + enum bpf_prog_type ptype; + struct bpf_prog *prog; struct cgroup *cgrp; int ret; @@ -1237,23 +1242,33 @@ static int bpf_prog_detach(const union bpf_attr *attr) switch (attr->attach_type) { case BPF_CGROUP_INET_INGRESS: case BPF_CGROUP_INET_EGRESS: + ptype = BPF_PROG_TYPE_CGROUP_SKB; + break; case BPF_CGROUP_INET_SOCK_CREATE: + ptype = BPF_PROG_TYPE_CGROUP_SOCK; + break; case BPF_CGROUP_SOCK_OPS: - cgrp = cgroup_get_from_fd(attr->target_fd); - if (IS_ERR(cgrp)) - return PTR_ERR(cgrp); - - ret = cgroup_bpf_update(cgrp, NULL, attr->attach_type, false); - cgroup_put(cgrp); + ptype = BPF_PROG_TYPE_SOCK_OPS; break; case BPF_SK_SKB_STREAM_PARSER: case BPF_SK_SKB_STREAM_VERDICT: - ret = sockmap_get_from_fd(attr, false); - break; + return sockmap_get_from_fd(attr, false); default: return -EINVAL; } + cgrp = cgroup_get_from_fd(attr->target_fd); + if (IS_ERR(cgrp)) + return PTR_ERR(cgrp); + + prog = bpf_prog_get_type(attr->attach_bpf_fd, ptype); + if (IS_ERR(prog)) + prog = NULL; + + ret = cgroup_bpf_detach(cgrp, prog, attr->attach_type, 0); + if (prog) + bpf_prog_put(prog); + cgroup_put(cgrp); return ret; } diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c index d6551cd45238..57eb866ae78d 100644 --- a/kernel/cgroup/cgroup.c +++ b/kernel/cgroup/cgroup.c @@ -1896,6 +1896,9 @@ int cgroup_setup_root(struct cgroup_root *root, u16 ss_mask, int ref_flags) if (ret) goto destroy_root; + ret = cgroup_bpf_inherit(root_cgrp); + WARN_ON_ONCE(ret); + trace_cgroup_setup_root(root); /* @@ -4713,6 +4716,9 @@ static struct cgroup *cgroup_create(struct cgroup *parent) cgrp->self.parent = &parent->self; cgrp->root = root; cgrp->level = level; + ret = cgroup_bpf_inherit(cgrp); + if (ret) + goto out_idr_free; for (tcgrp = cgrp; tcgrp; tcgrp = cgroup_parent(tcgrp)) { cgrp->ancestor_ids[tcgrp->level] = tcgrp->id; @@ -4747,13 +4753,12 @@ static struct cgroup *cgroup_create(struct cgroup *parent) if (!cgroup_on_dfl(cgrp)) cgrp->subtree_control = cgroup_control(cgrp); - if (parent) - cgroup_bpf_inherit(cgrp, parent); - cgroup_propagate_control(cgrp); return cgrp; +out_idr_free: + cgroup_idr_remove(&root->cgroup_idr, cgrp->id); out_cancel_ref: percpu_ref_exit(&cgrp->self.refcnt); out_free_cgrp: @@ -5736,14 +5741,23 @@ void cgroup_sk_free(struct sock_cgroup_data *skcd) #endif /* CONFIG_SOCK_CGROUP_DATA */ #ifdef CONFIG_CGROUP_BPF -int cgroup_bpf_update(struct cgroup *cgrp, struct bpf_prog *prog, - enum bpf_attach_type type, bool overridable) +int cgroup_bpf_attach(struct cgroup *cgrp, struct bpf_prog *prog, + enum bpf_attach_type type, u32 flags) +{ + int ret; + + mutex_lock(&cgroup_mutex); + ret = __cgroup_bpf_attach(cgrp, prog, type, flags); + mutex_unlock(&cgroup_mutex); + return ret; +} +int cgroup_bpf_detach(struct cgroup *cgrp, struct bpf_prog *prog, + enum bpf_attach_type type, u32 flags) { - struct cgroup *parent = cgroup_parent(cgrp); int ret; mutex_lock(&cgroup_mutex); - ret = __cgroup_bpf_update(cgrp, parent, prog, type, overridable); + ret = __cgroup_bpf_detach(cgrp, prog, type, flags); mutex_unlock(&cgroup_mutex); return ret; } -- cgit v1.2.3 From 468e2f64d220fe2dc11caa2bcb9b3a1e50fc7321 Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Mon, 2 Oct 2017 22:50:22 -0700 Subject: bpf: introduce BPF_PROG_QUERY command introduce BPF_PROG_QUERY command to retrieve a set of either attached programs to given cgroup or a set of effective programs that will execute for events within a cgroup Signed-off-by: Alexei Starovoitov Acked-by: Daniel Borkmann Acked-by: Martin KaFai Lau for cgroup bits Acked-by: Tejun Heo Signed-off-by: David S. Miller --- include/linux/bpf-cgroup.h | 4 ++++ include/linux/bpf.h | 3 +++ include/uapi/linux/bpf.h | 13 +++++++++++++ kernel/bpf/cgroup.c | 46 ++++++++++++++++++++++++++++++++++++++++++++++ kernel/bpf/core.c | 38 ++++++++++++++++++++++++++++++++++++++ kernel/bpf/syscall.c | 34 ++++++++++++++++++++++++++++++++++ kernel/cgroup/cgroup.c | 10 ++++++++++ 7 files changed, 148 insertions(+) diff --git a/include/linux/bpf-cgroup.h b/include/linux/bpf-cgroup.h index 102e56fbb6de..359b6f5d3d90 100644 --- a/include/linux/bpf-cgroup.h +++ b/include/linux/bpf-cgroup.h @@ -44,12 +44,16 @@ int __cgroup_bpf_attach(struct cgroup *cgrp, struct bpf_prog *prog, enum bpf_attach_type type, u32 flags); int __cgroup_bpf_detach(struct cgroup *cgrp, struct bpf_prog *prog, enum bpf_attach_type type, u32 flags); +int __cgroup_bpf_query(struct cgroup *cgrp, const union bpf_attr *attr, + union bpf_attr __user *uattr); /* Wrapper for __cgroup_bpf_*() protected by cgroup_mutex */ int cgroup_bpf_attach(struct cgroup *cgrp, struct bpf_prog *prog, enum bpf_attach_type type, u32 flags); int cgroup_bpf_detach(struct cgroup *cgrp, struct bpf_prog *prog, enum bpf_attach_type type, u32 flags); +int cgroup_bpf_query(struct cgroup *cgrp, const union bpf_attr *attr, + union bpf_attr __user *uattr); int __cgroup_bpf_run_filter_skb(struct sock *sk, struct sk_buff *skb, diff --git a/include/linux/bpf.h b/include/linux/bpf.h index a6964b75f070..a67daea731ab 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -260,6 +260,9 @@ struct bpf_prog_array { struct bpf_prog_array __rcu *bpf_prog_array_alloc(u32 prog_cnt, gfp_t flags); void bpf_prog_array_free(struct bpf_prog_array __rcu *progs); +int bpf_prog_array_length(struct bpf_prog_array __rcu *progs); +int bpf_prog_array_copy_to_user(struct bpf_prog_array __rcu *progs, + __u32 __user *prog_ids, u32 cnt); #define BPF_PROG_RUN_ARRAY(array, ctx, func) \ ({ \ diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 762f74bc6c47..cb2b9f95160a 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -92,6 +92,7 @@ enum bpf_cmd { BPF_PROG_GET_FD_BY_ID, BPF_MAP_GET_FD_BY_ID, BPF_OBJ_GET_INFO_BY_FD, + BPF_PROG_QUERY, }; enum bpf_map_type { @@ -211,6 +212,9 @@ enum bpf_attach_type { /* Specify numa node during map creation */ #define BPF_F_NUMA_NODE (1U << 2) +/* flags for BPF_PROG_QUERY */ +#define BPF_F_QUERY_EFFECTIVE (1U << 0) + #define BPF_OBJ_NAME_LEN 16U union bpf_attr { @@ -289,6 +293,15 @@ union bpf_attr { __u32 info_len; __aligned_u64 info; } info; + + struct { /* anonymous struct used by BPF_PROG_QUERY command */ + __u32 target_fd; /* container object to query */ + __u32 attach_type; + __u32 query_flags; + __u32 attach_flags; + __aligned_u64 prog_ids; + __u32 prog_cnt; + } query; } __attribute__((aligned(8))); /* BPF helper function descriptions: diff --git a/kernel/bpf/cgroup.c b/kernel/bpf/cgroup.c index 6b7500bbdb53..e88abc0865d5 100644 --- a/kernel/bpf/cgroup.c +++ b/kernel/bpf/cgroup.c @@ -384,6 +384,52 @@ cleanup: return err; } +/* Must be called with cgroup_mutex held to avoid races. */ +int __cgroup_bpf_query(struct cgroup *cgrp, const union bpf_attr *attr, + union bpf_attr __user *uattr) +{ + __u32 __user *prog_ids = u64_to_user_ptr(attr->query.prog_ids); + enum bpf_attach_type type = attr->query.attach_type; + struct list_head *progs = &cgrp->bpf.progs[type]; + u32 flags = cgrp->bpf.flags[type]; + int cnt, ret = 0, i; + + if (attr->query.query_flags & BPF_F_QUERY_EFFECTIVE) + cnt = bpf_prog_array_length(cgrp->bpf.effective[type]); + else + cnt = prog_list_length(progs); + + if (copy_to_user(&uattr->query.attach_flags, &flags, sizeof(flags))) + return -EFAULT; + if (copy_to_user(&uattr->query.prog_cnt, &cnt, sizeof(cnt))) + return -EFAULT; + if (attr->query.prog_cnt == 0 || !prog_ids || !cnt) + /* return early if user requested only program count + flags */ + return 0; + if (attr->query.prog_cnt < cnt) { + cnt = attr->query.prog_cnt; + ret = -ENOSPC; + } + + if (attr->query.query_flags & BPF_F_QUERY_EFFECTIVE) { + return bpf_prog_array_copy_to_user(cgrp->bpf.effective[type], + prog_ids, cnt); + } else { + struct bpf_prog_list *pl; + u32 id; + + i = 0; + list_for_each_entry(pl, progs, node) { + id = pl->prog->aux->id; + if (copy_to_user(prog_ids + i, &id, sizeof(id))) + return -EFAULT; + if (++i == cnt) + break; + } + } + return ret; +} + /** * __cgroup_bpf_run_filter_skb() - Run a program for packet filtering * @sk: The socket sending or receiving traffic diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c index 6b49e1991ae7..eba966c09053 100644 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c @@ -1412,6 +1412,44 @@ void bpf_prog_array_free(struct bpf_prog_array __rcu *progs) kfree_rcu(progs, rcu); } +int bpf_prog_array_length(struct bpf_prog_array __rcu *progs) +{ + struct bpf_prog **prog; + u32 cnt = 0; + + rcu_read_lock(); + prog = rcu_dereference(progs)->progs; + for (; *prog; prog++) + cnt++; + rcu_read_unlock(); + return cnt; +} + +int bpf_prog_array_copy_to_user(struct bpf_prog_array __rcu *progs, + __u32 __user *prog_ids, u32 cnt) +{ + struct bpf_prog **prog; + u32 i = 0, id; + + rcu_read_lock(); + prog = rcu_dereference(progs)->progs; + for (; *prog; prog++) { + id = (*prog)->aux->id; + if (copy_to_user(prog_ids + i, &id, sizeof(id))) { + rcu_read_unlock(); + return -EFAULT; + } + if (++i == cnt) { + prog++; + break; + } + } + rcu_read_unlock(); + if (*prog) + return -ENOSPC; + return 0; +} + static void bpf_prog_free_deferred(struct work_struct *work) { struct bpf_prog_aux *aux; diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index 51bee695d32c..0048cb24ba7b 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -1272,6 +1272,37 @@ static int bpf_prog_detach(const union bpf_attr *attr) return ret; } +#define BPF_PROG_QUERY_LAST_FIELD query.prog_cnt + +static int bpf_prog_query(const union bpf_attr *attr, + union bpf_attr __user *uattr) +{ + struct cgroup *cgrp; + int ret; + + if (!capable(CAP_NET_ADMIN)) + return -EPERM; + if (CHECK_ATTR(BPF_PROG_QUERY)) + return -EINVAL; + if (attr->query.query_flags & ~BPF_F_QUERY_EFFECTIVE) + return -EINVAL; + + switch (attr->query.attach_type) { + case BPF_CGROUP_INET_INGRESS: + case BPF_CGROUP_INET_EGRESS: + case BPF_CGROUP_INET_SOCK_CREATE: + case BPF_CGROUP_SOCK_OPS: + break; + default: + return -EINVAL; + } + cgrp = cgroup_get_from_fd(attr->query.target_fd); + if (IS_ERR(cgrp)) + return PTR_ERR(cgrp); + ret = cgroup_bpf_query(cgrp, attr, uattr); + cgroup_put(cgrp); + return ret; +} #endif /* CONFIG_CGROUP_BPF */ #define BPF_PROG_TEST_RUN_LAST_FIELD test.duration @@ -1568,6 +1599,9 @@ SYSCALL_DEFINE3(bpf, int, cmd, union bpf_attr __user *, uattr, unsigned int, siz case BPF_PROG_DETACH: err = bpf_prog_detach(&attr); break; + case BPF_PROG_QUERY: + err = bpf_prog_query(&attr, uattr); + break; #endif case BPF_PROG_TEST_RUN: err = bpf_prog_test_run(&attr, uattr); diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c index 57eb866ae78d..269512b94a94 100644 --- a/kernel/cgroup/cgroup.c +++ b/kernel/cgroup/cgroup.c @@ -5761,4 +5761,14 @@ int cgroup_bpf_detach(struct cgroup *cgrp, struct bpf_prog *prog, mutex_unlock(&cgroup_mutex); return ret; } +int cgroup_bpf_query(struct cgroup *cgrp, const union bpf_attr *attr, + union bpf_attr __user *uattr) +{ + int ret; + + mutex_lock(&cgroup_mutex); + ret = __cgroup_bpf_query(cgrp, attr, uattr); + mutex_unlock(&cgroup_mutex); + return ret; +} #endif /* CONFIG_CGROUP_BPF */ -- cgit v1.2.3 From 390ee7e29fc8e6e90d3065b00afb047c4ee552f9 Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Mon, 2 Oct 2017 22:50:23 -0700 Subject: bpf: enforce return code for cgroup-bpf programs with addition of tnum logic the verifier got smart enough and we can enforce return codes at program load time. For now do so for cgroup-bpf program types. Signed-off-by: Alexei Starovoitov Acked-by: Daniel Borkmann Signed-off-by: David S. Miller --- kernel/bpf/verifier.c | 40 ++++++++++++++++ tools/testing/selftests/bpf/test_verifier.c | 72 +++++++++++++++++++++++++++++ 2 files changed, 112 insertions(+) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 4cf9b72c59a0..52b022310f6a 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -3073,6 +3073,43 @@ static int check_ld_abs(struct bpf_verifier_env *env, struct bpf_insn *insn) return 0; } +static int check_return_code(struct bpf_verifier_env *env) +{ + struct bpf_reg_state *reg; + struct tnum range = tnum_range(0, 1); + + switch (env->prog->type) { + case BPF_PROG_TYPE_CGROUP_SKB: + case BPF_PROG_TYPE_CGROUP_SOCK: + case BPF_PROG_TYPE_SOCK_OPS: + break; + default: + return 0; + } + + reg = &env->cur_state.regs[BPF_REG_0]; + if (reg->type != SCALAR_VALUE) { + verbose("At program exit the register R0 is not a known value (%s)\n", + reg_type_str[reg->type]); + return -EINVAL; + } + + if (!tnum_in(range, reg->var_off)) { + verbose("At program exit the register R0 "); + if (!tnum_is_unknown(reg->var_off)) { + char tn_buf[48]; + + tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off); + verbose("has value %s", tn_buf); + } else { + verbose("has unknown scalar value"); + } + verbose(" should have been 0 or 1\n"); + return -EINVAL; + } + return 0; +} + /* non-recursive DFS pseudo code * 1 procedure DFS-iterative(G,v): * 2 label v as discovered @@ -3863,6 +3900,9 @@ static int do_check(struct bpf_verifier_env *env) return -EACCES; } + err = check_return_code(env); + if (err) + return err; process_bpf_exit: insn_idx = pop_stack(env, &prev_insn_idx); if (insn_idx < 0) { diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c index 290d5056c165..cc91d0159f43 100644 --- a/tools/testing/selftests/bpf/test_verifier.c +++ b/tools/testing/selftests/bpf/test_verifier.c @@ -6892,6 +6892,78 @@ static struct bpf_test tests[] = { .result = ACCEPT, .prog_type = BPF_PROG_TYPE_XDP, }, + { + "bpf_exit with invalid return code. test1", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, 0), + BPF_EXIT_INSN(), + }, + .errstr = "R0 has value (0x0; 0xffffffff)", + .result = REJECT, + .prog_type = BPF_PROG_TYPE_CGROUP_SOCK, + }, + { + "bpf_exit with invalid return code. test2", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, 0), + BPF_ALU64_IMM(BPF_AND, BPF_REG_0, 1), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_CGROUP_SOCK, + }, + { + "bpf_exit with invalid return code. test3", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, 0), + BPF_ALU64_IMM(BPF_AND, BPF_REG_0, 3), + BPF_EXIT_INSN(), + }, + .errstr = "R0 has value (0x0; 0x3)", + .result = REJECT, + .prog_type = BPF_PROG_TYPE_CGROUP_SOCK, + }, + { + "bpf_exit with invalid return code. test4", + .insns = { + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_CGROUP_SOCK, + }, + { + "bpf_exit with invalid return code. test5", + .insns = { + BPF_MOV64_IMM(BPF_REG_0, 2), + BPF_EXIT_INSN(), + }, + .errstr = "R0 has value (0x2; 0x0)", + .result = REJECT, + .prog_type = BPF_PROG_TYPE_CGROUP_SOCK, + }, + { + "bpf_exit with invalid return code. test6", + .insns = { + BPF_MOV64_REG(BPF_REG_0, BPF_REG_1), + BPF_EXIT_INSN(), + }, + .errstr = "R0 is not a known value (ctx)", + .result = REJECT, + .prog_type = BPF_PROG_TYPE_CGROUP_SOCK, + }, + { + "bpf_exit with invalid return code. test7", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, 0), + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, 4), + BPF_ALU64_REG(BPF_MUL, BPF_REG_0, BPF_REG_2), + BPF_EXIT_INSN(), + }, + .errstr = "R0 has unknown scalar value", + .result = REJECT, + .prog_type = BPF_PROG_TYPE_CGROUP_SOCK, + }, }; static int probe_filter_length(const struct bpf_insn *fp) -- cgit v1.2.3 From 244d20efdb68c5c1a26c667baeb232ea163e2f69 Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Mon, 2 Oct 2017 22:50:24 -0700 Subject: libbpf: introduce bpf_prog_detach2() introduce bpf_prog_detach2() that takes one more argument prog_fd vs bpf_prog_detach() that takes only attach_fd and type. Signed-off-by: Alexei Starovoitov Acked-by: Daniel Borkmann Signed-off-by: David S. Miller --- tools/lib/bpf/bpf.c | 12 ++++++++++++ tools/lib/bpf/bpf.h | 1 + 2 files changed, 13 insertions(+) diff --git a/tools/lib/bpf/bpf.c b/tools/lib/bpf/bpf.c index daf624e4c720..d4b6ba8292ee 100644 --- a/tools/lib/bpf/bpf.c +++ b/tools/lib/bpf/bpf.c @@ -291,6 +291,18 @@ int bpf_prog_detach(int target_fd, enum bpf_attach_type type) return sys_bpf(BPF_PROG_DETACH, &attr, sizeof(attr)); } +int bpf_prog_detach2(int prog_fd, int target_fd, enum bpf_attach_type type) +{ + union bpf_attr attr; + + bzero(&attr, sizeof(attr)); + attr.target_fd = target_fd; + attr.attach_bpf_fd = prog_fd; + attr.attach_type = type; + + return sys_bpf(BPF_PROG_DETACH, &attr, sizeof(attr)); +} + int bpf_prog_test_run(int prog_fd, int repeat, void *data, __u32 size, void *data_out, __u32 *size_out, __u32 *retval, __u32 *duration) diff --git a/tools/lib/bpf/bpf.h b/tools/lib/bpf/bpf.h index 118d00535a0d..afd64727c9cf 100644 --- a/tools/lib/bpf/bpf.h +++ b/tools/lib/bpf/bpf.h @@ -66,6 +66,7 @@ int bpf_obj_get(const char *pathname); int bpf_prog_attach(int prog_fd, int attachable_fd, enum bpf_attach_type type, unsigned int flags); int bpf_prog_detach(int attachable_fd, enum bpf_attach_type type); +int bpf_prog_detach2(int prog_fd, int attachable_fd, enum bpf_attach_type type); int bpf_prog_test_run(int prog_fd, int repeat, void *data, __u32 size, void *data_out, __u32 *size_out, __u32 *retval, __u32 *duration); -- cgit v1.2.3 From 39323e788cb672adba8709ca407bd6763aae577d Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Mon, 2 Oct 2017 22:50:25 -0700 Subject: samples/bpf: add multi-prog cgroup test case create 5 cgroups, attach 6 progs and check that progs are executed as: cgrp1 (MULTI progs A, B) -> cgrp2 (OVERRIDE prog C) -> cgrp3 (MULTI prog D) -> cgrp4 (OVERRIDE prog E) -> cgrp5 (NONE prog F) the event in cgrp5 triggers execution of F,D,A,B in that order. if prog F is detached, the execution is E,D,A,B if prog F and D are detached, the execution is E,A,B if prog F, E and D are detached, the execution is C,A,B Signed-off-by: Alexei Starovoitov Acked-by: Daniel Borkmann Signed-off-by: David S. Miller --- samples/bpf/cgroup_helpers.c | 4 +- samples/bpf/test_cgrp2_attach2.c | 188 +++++++++++++++++++++++++++++++++++++-- 2 files changed, 185 insertions(+), 7 deletions(-) diff --git a/samples/bpf/cgroup_helpers.c b/samples/bpf/cgroup_helpers.c index 9d1be9426401..88bdcf4b1670 100644 --- a/samples/bpf/cgroup_helpers.c +++ b/samples/bpf/cgroup_helpers.c @@ -56,7 +56,7 @@ int setup_cgroup_environment(void) return 1; } - if (mount("none", CGROUP_MOUNT_PATH, "cgroup2", 0, NULL)) { + if (mount("none", CGROUP_MOUNT_PATH, "cgroup2", 0, NULL) && errno != EBUSY) { log_err("mount cgroup2"); return 1; } @@ -163,7 +163,7 @@ int create_and_get_cgroup(char *path) format_cgroup_path(cgroup_path, path); if (mkdir(cgroup_path, 0777) && errno != EEXIST) { - log_err("mkdiring cgroup"); + log_err("mkdiring cgroup %s .. %s", path, cgroup_path); return 0; } diff --git a/samples/bpf/test_cgrp2_attach2.c b/samples/bpf/test_cgrp2_attach2.c index 3049b1f26267..9a9f6836e5e9 100644 --- a/samples/bpf/test_cgrp2_attach2.c +++ b/samples/bpf/test_cgrp2_attach2.c @@ -30,7 +30,7 @@ #define FOO "/foo" #define BAR "/foo/bar/" -#define PING_CMD "ping -c1 -w1 127.0.0.1" +#define PING_CMD "ping -c1 -w1 127.0.0.1 > /dev/null" char bpf_log_buf[BPF_LOG_BUF_SIZE]; @@ -55,8 +55,7 @@ static int prog_load(int verdict) return ret; } - -int main(int argc, char **argv) +static int test_foo_bar(void) { int drop_prog, allow_prog, foo = 0, bar = 0, rc = 0; @@ -189,8 +188,187 @@ out: close(bar); cleanup_cgroup_environment(); if (!rc) - printf("PASS\n"); + printf("### override:PASS\n"); + else + printf("### override:FAIL\n"); + return rc; +} + +static int map_fd = -1; + +static int prog_load_cnt(int verdict, int val) +{ + if (map_fd < 0) + map_fd = bpf_create_map(BPF_MAP_TYPE_ARRAY, 4, 8, 1, 0); + if (map_fd < 0) { + printf("failed to create map '%s'\n", strerror(errno)); + return -1; + } + + struct bpf_insn prog[] = { + BPF_MOV32_IMM(BPF_REG_0, 0), + BPF_STX_MEM(BPF_W, BPF_REG_10, BPF_REG_0, -4), /* *(u32 *)(fp - 4) = r0 */ + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4), /* r2 = fp - 4 */ + BPF_LD_MAP_FD(BPF_REG_1, map_fd), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2), + BPF_MOV64_IMM(BPF_REG_1, val), /* r1 = 1 */ + BPF_RAW_INSN(BPF_STX | BPF_XADD | BPF_DW, BPF_REG_0, BPF_REG_1, 0, 0), /* xadd r0 += r1 */ + BPF_MOV64_IMM(BPF_REG_0, verdict), /* r0 = verdict */ + BPF_EXIT_INSN(), + }; + size_t insns_cnt = sizeof(prog) / sizeof(struct bpf_insn); + int ret; + + ret = bpf_load_program(BPF_PROG_TYPE_CGROUP_SKB, + prog, insns_cnt, "GPL", 0, + bpf_log_buf, BPF_LOG_BUF_SIZE); + + if (ret < 0) { + log_err("Loading program"); + printf("Output from verifier:\n%s\n-------\n", bpf_log_buf); + return 0; + } + return ret; +} + + +static int test_multiprog(void) +{ + int cg1 = 0, cg2 = 0, cg3 = 0, cg4 = 0, cg5 = 0, key = 0; + int drop_prog, allow_prog[6] = {}, rc = 0; + unsigned long long value; + int i = 0; + + for (i = 0; i < 6; i++) { + allow_prog[i] = prog_load_cnt(1, 1 << i); + if (!allow_prog[i]) + goto err; + } + drop_prog = prog_load_cnt(0, 1); + if (!drop_prog) + goto err; + + if (setup_cgroup_environment()) + goto err; + + cg1 = create_and_get_cgroup("/cg1"); + if (!cg1) + goto err; + cg2 = create_and_get_cgroup("/cg1/cg2"); + if (!cg2) + goto err; + cg3 = create_and_get_cgroup("/cg1/cg2/cg3"); + if (!cg3) + goto err; + cg4 = create_and_get_cgroup("/cg1/cg2/cg3/cg4"); + if (!cg4) + goto err; + cg5 = create_and_get_cgroup("/cg1/cg2/cg3/cg4/cg5"); + if (!cg5) + goto err; + + if (join_cgroup("/cg1/cg2/cg3/cg4/cg5")) + goto err; + + if (bpf_prog_attach(allow_prog[0], cg1, BPF_CGROUP_INET_EGRESS, 2)) { + log_err("Attaching prog to cg1"); + goto err; + } + if (!bpf_prog_attach(allow_prog[0], cg1, BPF_CGROUP_INET_EGRESS, 2)) { + log_err("Unexpected success attaching the same prog to cg1"); + goto err; + } + if (bpf_prog_attach(allow_prog[1], cg1, BPF_CGROUP_INET_EGRESS, 2)) { + log_err("Attaching prog2 to cg1"); + goto err; + } + if (bpf_prog_attach(allow_prog[2], cg2, BPF_CGROUP_INET_EGRESS, 1)) { + log_err("Attaching prog to cg2"); + goto err; + } + if (bpf_prog_attach(allow_prog[3], cg3, BPF_CGROUP_INET_EGRESS, 2)) { + log_err("Attaching prog to cg3"); + goto err; + } + if (bpf_prog_attach(allow_prog[4], cg4, BPF_CGROUP_INET_EGRESS, 1)) { + log_err("Attaching prog to cg4"); + goto err; + } + if (bpf_prog_attach(allow_prog[5], cg5, BPF_CGROUP_INET_EGRESS, 0)) { + log_err("Attaching prog to cg5"); + goto err; + } + assert(system(PING_CMD) == 0); + assert(bpf_map_lookup_elem(map_fd, &key, &value) == 0); + assert(value == 1 + 2 + 8 + 32); + + /* detach bottom program and ping again */ + if (bpf_prog_detach2(-1, cg5, BPF_CGROUP_INET_EGRESS)) { + log_err("Detaching prog from cg5"); + goto err; + } + value = 0; + assert(bpf_map_update_elem(map_fd, &key, &value, 0) == 0); + assert(system(PING_CMD) == 0); + assert(bpf_map_lookup_elem(map_fd, &key, &value) == 0); + assert(value == 1 + 2 + 8 + 16); + + /* detach 3rd from bottom program and ping again */ + errno = 0; + if (!bpf_prog_detach2(0, cg3, BPF_CGROUP_INET_EGRESS)) { + log_err("Unexpected success on detach from cg3"); + goto err; + } + if (bpf_prog_detach2(allow_prog[3], cg3, BPF_CGROUP_INET_EGRESS)) { + log_err("Detaching from cg3"); + goto err; + } + value = 0; + assert(bpf_map_update_elem(map_fd, &key, &value, 0) == 0); + assert(system(PING_CMD) == 0); + assert(bpf_map_lookup_elem(map_fd, &key, &value) == 0); + assert(value == 1 + 2 + 16); + + /* detach 2nd from bottom program and ping again */ + if (bpf_prog_detach2(-1, cg4, BPF_CGROUP_INET_EGRESS)) { + log_err("Detaching prog from cg4"); + goto err; + } + value = 0; + assert(bpf_map_update_elem(map_fd, &key, &value, 0) == 0); + assert(system(PING_CMD) == 0); + assert(bpf_map_lookup_elem(map_fd, &key, &value) == 0); + assert(value == 1 + 2 + 4); + goto out; +err: + rc = 1; + +out: + for (i = 0; i < 6; i++) + if (allow_prog[i] > 0) + close(allow_prog[i]); + close(cg1); + close(cg2); + close(cg3); + close(cg4); + close(cg5); + cleanup_cgroup_environment(); + if (!rc) + printf("### multi:PASS\n"); else - printf("FAIL\n"); + printf("### multi:FAIL\n"); return rc; } + +int main(int argc, char **argv) +{ + int rc = 0; + + rc = test_foo_bar(); + if (rc) + return rc; + + return test_multiprog(); +} -- cgit v1.2.3 From defd9c476fa6b01b4eb5450452bfd202138decb7 Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Mon, 2 Oct 2017 22:50:26 -0700 Subject: libbpf: sync bpf.h tools/include/uapi/linux/bpf.h got out of sync with actual kernel header. Update it. Signed-off-by: Alexei Starovoitov Acked-by: Daniel Borkmann Signed-off-by: David S. Miller --- tools/include/uapi/linux/bpf.h | 55 +++++++++++++++++++++++++++++++++++++++--- 1 file changed, 52 insertions(+), 3 deletions(-) diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index 6d2137b4cf38..cb2b9f95160a 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -92,6 +92,7 @@ enum bpf_cmd { BPF_PROG_GET_FD_BY_ID, BPF_MAP_GET_FD_BY_ID, BPF_OBJ_GET_INFO_BY_FD, + BPF_PROG_QUERY, }; enum bpf_map_type { @@ -143,11 +144,47 @@ enum bpf_attach_type { #define MAX_BPF_ATTACH_TYPE __MAX_BPF_ATTACH_TYPE -/* If BPF_F_ALLOW_OVERRIDE flag is used in BPF_PROG_ATTACH command - * to the given target_fd cgroup the descendent cgroup will be able to - * override effective bpf program that was inherited from this cgroup +/* cgroup-bpf attach flags used in BPF_PROG_ATTACH command + * + * NONE(default): No further bpf programs allowed in the subtree. + * + * BPF_F_ALLOW_OVERRIDE: If a sub-cgroup installs some bpf program, + * the program in this cgroup yields to sub-cgroup program. + * + * BPF_F_ALLOW_MULTI: If a sub-cgroup installs some bpf program, + * that cgroup program gets run in addition to the program in this cgroup. + * + * Only one program is allowed to be attached to a cgroup with + * NONE or BPF_F_ALLOW_OVERRIDE flag. + * Attaching another program on top of NONE or BPF_F_ALLOW_OVERRIDE will + * release old program and attach the new one. Attach flags has to match. + * + * Multiple programs are allowed to be attached to a cgroup with + * BPF_F_ALLOW_MULTI flag. They are executed in FIFO order + * (those that were attached first, run first) + * The programs of sub-cgroup are executed first, then programs of + * this cgroup and then programs of parent cgroup. + * When children program makes decision (like picking TCP CA or sock bind) + * parent program has a chance to override it. + * + * A cgroup with MULTI or OVERRIDE flag allows any attach flags in sub-cgroups. + * A cgroup with NONE doesn't allow any programs in sub-cgroups. + * Ex1: + * cgrp1 (MULTI progs A, B) -> + * cgrp2 (OVERRIDE prog C) -> + * cgrp3 (MULTI prog D) -> + * cgrp4 (OVERRIDE prog E) -> + * cgrp5 (NONE prog F) + * the event in cgrp5 triggers execution of F,D,A,B in that order. + * if prog F is detached, the execution is E,D,A,B + * if prog F and D are detached, the execution is E,A,B + * if prog F, E and D are detached, the execution is C,A,B + * + * All eligible programs are executed regardless of return code from + * earlier programs. */ #define BPF_F_ALLOW_OVERRIDE (1U << 0) +#define BPF_F_ALLOW_MULTI (1U << 1) /* If BPF_F_STRICT_ALIGNMENT is used in BPF_PROG_LOAD command, the * verifier will perform strict alignment checking as if the kernel @@ -175,6 +212,9 @@ enum bpf_attach_type { /* Specify numa node during map creation */ #define BPF_F_NUMA_NODE (1U << 2) +/* flags for BPF_PROG_QUERY */ +#define BPF_F_QUERY_EFFECTIVE (1U << 0) + #define BPF_OBJ_NAME_LEN 16U union bpf_attr { @@ -253,6 +293,15 @@ union bpf_attr { __u32 info_len; __aligned_u64 info; } info; + + struct { /* anonymous struct used by BPF_PROG_QUERY command */ + __u32 target_fd; /* container object to query */ + __u32 attach_type; + __u32 query_flags; + __u32 attach_flags; + __aligned_u64 prog_ids; + __u32 prog_cnt; + } query; } __attribute__((aligned(8))); /* BPF helper function descriptions: -- cgit v1.2.3 From 5d0cbf9b6c11964bf2f4041a5b0b6f81cb169736 Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Mon, 2 Oct 2017 22:50:27 -0700 Subject: libbpf: add support for BPF_PROG_QUERY add support for BPF_PROG_QUERY command to libbpf Signed-off-by: Alexei Starovoitov Acked-by: Daniel Borkmann Signed-off-by: David S. Miller --- tools/lib/bpf/bpf.c | 20 ++++++++++++++++++++ tools/lib/bpf/bpf.h | 3 ++- 2 files changed, 22 insertions(+), 1 deletion(-) diff --git a/tools/lib/bpf/bpf.c b/tools/lib/bpf/bpf.c index d4b6ba8292ee..5128677e4117 100644 --- a/tools/lib/bpf/bpf.c +++ b/tools/lib/bpf/bpf.c @@ -303,6 +303,26 @@ int bpf_prog_detach2(int prog_fd, int target_fd, enum bpf_attach_type type) return sys_bpf(BPF_PROG_DETACH, &attr, sizeof(attr)); } +int bpf_prog_query(int target_fd, enum bpf_attach_type type, __u32 query_flags, + __u32 *attach_flags, __u32 *prog_ids, __u32 *prog_cnt) +{ + union bpf_attr attr; + int ret; + + bzero(&attr, sizeof(attr)); + attr.query.target_fd = target_fd; + attr.query.attach_type = type; + attr.query.query_flags = query_flags; + attr.query.prog_cnt = *prog_cnt; + attr.query.prog_ids = ptr_to_u64(prog_ids); + + ret = sys_bpf(BPF_PROG_QUERY, &attr, sizeof(attr)); + if (attach_flags) + *attach_flags = attr.query.attach_flags; + *prog_cnt = attr.query.prog_cnt; + return ret; +} + int bpf_prog_test_run(int prog_fd, int repeat, void *data, __u32 size, void *data_out, __u32 *size_out, __u32 *retval, __u32 *duration) diff --git a/tools/lib/bpf/bpf.h b/tools/lib/bpf/bpf.h index afd64727c9cf..6534889e2b2f 100644 --- a/tools/lib/bpf/bpf.h +++ b/tools/lib/bpf/bpf.h @@ -75,5 +75,6 @@ int bpf_map_get_next_id(__u32 start_id, __u32 *next_id); int bpf_prog_get_fd_by_id(__u32 id); int bpf_map_get_fd_by_id(__u32 id); int bpf_obj_get_info_by_fd(int prog_fd, void *info, __u32 *info_len); - +int bpf_prog_query(int target_fd, enum bpf_attach_type type, __u32 query_flags, + __u32 *attach_flags, __u32 *prog_ids, __u32 *prog_cnt); #endif -- cgit v1.2.3 From dfc069998ebb010f910dfec379dab4f44d331980 Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Mon, 2 Oct 2017 22:50:28 -0700 Subject: samples/bpf: use bpf_prog_query() interface use BPF_PROG_QUERY command to strengthen test coverage Signed-off-by: Alexei Starovoitov Acked-by: Daniel Borkmann Signed-off-by: David S. Miller --- samples/bpf/test_cgrp2_attach2.c | 36 ++++++++++++++++++++++++++++++++++++ 1 file changed, 36 insertions(+) diff --git a/samples/bpf/test_cgrp2_attach2.c b/samples/bpf/test_cgrp2_attach2.c index 9a9f6836e5e9..3e8232cc04a8 100644 --- a/samples/bpf/test_cgrp2_attach2.c +++ b/samples/bpf/test_cgrp2_attach2.c @@ -236,6 +236,7 @@ static int prog_load_cnt(int verdict, int val) static int test_multiprog(void) { + __u32 prog_ids[4], prog_cnt = 0, attach_flags, saved_prog_id; int cg1 = 0, cg2 = 0, cg3 = 0, cg4 = 0, cg5 = 0, key = 0; int drop_prog, allow_prog[6] = {}, rc = 0; unsigned long long value; @@ -304,6 +305,32 @@ static int test_multiprog(void) assert(bpf_map_lookup_elem(map_fd, &key, &value) == 0); assert(value == 1 + 2 + 8 + 32); + /* query the number of effective progs in cg5 */ + assert(bpf_prog_query(cg5, BPF_CGROUP_INET_EGRESS, BPF_F_QUERY_EFFECTIVE, + NULL, NULL, &prog_cnt) == 0); + assert(prog_cnt == 4); + /* retrieve prog_ids of effective progs in cg5 */ + assert(bpf_prog_query(cg5, BPF_CGROUP_INET_EGRESS, BPF_F_QUERY_EFFECTIVE, + &attach_flags, prog_ids, &prog_cnt) == 0); + assert(prog_cnt == 4); + assert(attach_flags == 0); + saved_prog_id = prog_ids[0]; + /* check enospc handling */ + prog_ids[0] = 0; + prog_cnt = 2; + assert(bpf_prog_query(cg5, BPF_CGROUP_INET_EGRESS, BPF_F_QUERY_EFFECTIVE, + &attach_flags, prog_ids, &prog_cnt) == -1 && + errno == ENOSPC); + assert(prog_cnt == 4); + /* check that prog_ids are returned even when buffer is too small */ + assert(prog_ids[0] == saved_prog_id); + /* retrieve prog_id of single attached prog in cg5 */ + prog_ids[0] = 0; + assert(bpf_prog_query(cg5, BPF_CGROUP_INET_EGRESS, 0, + NULL, prog_ids, &prog_cnt) == 0); + assert(prog_cnt == 1); + assert(prog_ids[0] == saved_prog_id); + /* detach bottom program and ping again */ if (bpf_prog_detach2(-1, cg5, BPF_CGROUP_INET_EGRESS)) { log_err("Detaching prog from cg5"); @@ -341,6 +368,15 @@ static int test_multiprog(void) assert(system(PING_CMD) == 0); assert(bpf_map_lookup_elem(map_fd, &key, &value) == 0); assert(value == 1 + 2 + 4); + + prog_cnt = 4; + assert(bpf_prog_query(cg5, BPF_CGROUP_INET_EGRESS, BPF_F_QUERY_EFFECTIVE, + &attach_flags, prog_ids, &prog_cnt) == 0); + assert(prog_cnt == 3); + assert(attach_flags == 0); + assert(bpf_prog_query(cg5, BPF_CGROUP_INET_EGRESS, 0, + NULL, prog_ids, &prog_cnt) == 0); + assert(prog_cnt == 0); goto out; err: rc = 1; -- cgit v1.2.3 From 6621dd29eb9b5e6774ec7a9a75161352fdea47fc Mon Sep 17 00:00:00 2001 From: Nicolas Dichtel Date: Tue, 3 Oct 2017 13:53:23 +0200 Subject: dev: advertise the new nsid when the netns iface changes x-netns interfaces are bound to two netns: the link netns and the upper netns. Usually, this kind of interfaces is created in the link netns and then moved to the upper netns. At the end, the interface is visible only in the upper netns. The link nsid is advertised via netlink in the upper netns, thus the user always knows where is the link part. There is no such mechanism in the link netns. When the interface is moved to another netns, the user cannot "follow" it. This patch adds a new netlink attribute which helps to follow an interface which moves to another netns. When the interface is unregistered, the new nsid is advertised. If the interface is a x-netns interface (ie rtnl_link_ops->get_link_net is defined), the nsid is allocated if needed. CC: Jason A. Donenfeld Signed-off-by: Nicolas Dichtel Signed-off-by: David S. Miller --- include/linux/rtnetlink.h | 4 +++- include/uapi/linux/if_link.h | 1 + net/core/dev.c | 11 ++++++++--- net/core/rtnetlink.c | 31 ++++++++++++++++++++++--------- 4 files changed, 34 insertions(+), 13 deletions(-) diff --git a/include/linux/rtnetlink.h b/include/linux/rtnetlink.h index dea59c8eec54..1251638e60d3 100644 --- a/include/linux/rtnetlink.h +++ b/include/linux/rtnetlink.h @@ -17,9 +17,11 @@ extern int rtnl_put_cacheinfo(struct sk_buff *skb, struct dst_entry *dst, u32 id, long expires, u32 error); void rtmsg_ifinfo(int type, struct net_device *dev, unsigned change, gfp_t flags); +void rtmsg_ifinfo_newnet(int type, struct net_device *dev, unsigned int change, + gfp_t flags, int *new_nsid); struct sk_buff *rtmsg_ifinfo_build_skb(int type, struct net_device *dev, unsigned change, u32 event, - gfp_t flags); + gfp_t flags, int *new_nsid); void rtmsg_ifinfo_send(struct sk_buff *skb, struct net_device *dev, gfp_t flags); diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h index ea87bd708ee9..cd580fc0e58f 100644 --- a/include/uapi/linux/if_link.h +++ b/include/uapi/linux/if_link.h @@ -158,6 +158,7 @@ enum { IFLA_PAD, IFLA_XDP, IFLA_EVENT, + IFLA_NEW_NETNSID, __IFLA_MAX }; diff --git a/net/core/dev.c b/net/core/dev.c index 454f05441546..bffc75429184 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -145,6 +145,7 @@ #include #include #include +#include #include "net-sysfs.h" @@ -7204,7 +7205,7 @@ static void rollback_registered_many(struct list_head *head) if (!dev->rtnl_link_ops || dev->rtnl_link_state == RTNL_LINK_INITIALIZED) skb = rtmsg_ifinfo_build_skb(RTM_DELLINK, dev, ~0U, 0, - GFP_KERNEL); + GFP_KERNEL, NULL); /* * Flush the unicast and multicast chains @@ -8291,7 +8292,7 @@ EXPORT_SYMBOL(unregister_netdev); int dev_change_net_namespace(struct net_device *dev, struct net *net, const char *pat) { - int err; + int err, new_nsid; ASSERT_RTNL(); @@ -8347,7 +8348,11 @@ int dev_change_net_namespace(struct net_device *dev, struct net *net, const char call_netdevice_notifiers(NETDEV_UNREGISTER, dev); rcu_barrier(); call_netdevice_notifiers(NETDEV_UNREGISTER_FINAL, dev); - rtmsg_ifinfo(RTM_DELLINK, dev, ~0U, GFP_KERNEL); + if (dev->rtnl_link_ops && dev->rtnl_link_ops->get_link_net) + new_nsid = peernet2id_alloc(dev_net(dev), net); + else + new_nsid = peernet2id(dev_net(dev), net); + rtmsg_ifinfo_newnet(RTM_DELLINK, dev, ~0U, GFP_KERNEL, &new_nsid); /* * Flush the unicast and multicast chains diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 3fb1ca33cba4..1ee98b1369d5 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -915,6 +915,7 @@ static noinline size_t if_nlmsg_size(const struct net_device *dev, + nla_total_size(IFNAMSIZ) /* IFLA_PHYS_PORT_NAME */ + rtnl_xdp_size() /* IFLA_XDP */ + nla_total_size(4) /* IFLA_EVENT */ + + nla_total_size(4) /* IFLA_NEW_NETNSID */ + nla_total_size(1); /* IFLA_PROTO_DOWN */ } @@ -1384,7 +1385,7 @@ static int rtnl_fill_link_netnsid(struct sk_buff *skb, static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev, int type, u32 pid, u32 seq, u32 change, unsigned int flags, u32 ext_filter_mask, - u32 event) + u32 event, int *new_nsid) { struct ifinfomsg *ifm; struct nlmsghdr *nlh; @@ -1472,6 +1473,10 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev, if (rtnl_fill_link_netnsid(skb, dev)) goto nla_put_failure; + if (new_nsid && + nla_put_s32(skb, IFLA_NEW_NETNSID, *new_nsid) < 0) + goto nla_put_failure; + if (!(af_spec = nla_nest_start(skb, IFLA_AF_SPEC))) goto nla_put_failure; @@ -1701,7 +1706,7 @@ static int rtnl_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb) NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, 0, flags, - ext_filter_mask, 0); + ext_filter_mask, 0, NULL); if (err < 0) { if (likely(skb->len)) @@ -2808,7 +2813,7 @@ static int rtnl_getlink(struct sk_buff *skb, struct nlmsghdr *nlh, return -ENOBUFS; err = rtnl_fill_ifinfo(nskb, dev, RTM_NEWLINK, NETLINK_CB(skb).portid, - nlh->nlmsg_seq, 0, 0, ext_filter_mask, 0); + nlh->nlmsg_seq, 0, 0, ext_filter_mask, 0, NULL); if (err < 0) { /* -EMSGSIZE implies BUG in if_nlmsg_size */ WARN_ON(err == -EMSGSIZE); @@ -2893,7 +2898,7 @@ static int rtnl_dump_all(struct sk_buff *skb, struct netlink_callback *cb) struct sk_buff *rtmsg_ifinfo_build_skb(int type, struct net_device *dev, unsigned int change, - u32 event, gfp_t flags) + u32 event, gfp_t flags, int *new_nsid) { struct net *net = dev_net(dev); struct sk_buff *skb; @@ -2904,7 +2909,8 @@ struct sk_buff *rtmsg_ifinfo_build_skb(int type, struct net_device *dev, if (skb == NULL) goto errout; - err = rtnl_fill_ifinfo(skb, dev, type, 0, 0, change, 0, 0, event); + err = rtnl_fill_ifinfo(skb, dev, type, 0, 0, change, 0, 0, event, + new_nsid); if (err < 0) { /* -EMSGSIZE implies BUG in if_nlmsg_size() */ WARN_ON(err == -EMSGSIZE); @@ -2927,14 +2933,14 @@ void rtmsg_ifinfo_send(struct sk_buff *skb, struct net_device *dev, gfp_t flags) static void rtmsg_ifinfo_event(int type, struct net_device *dev, unsigned int change, u32 event, - gfp_t flags) + gfp_t flags, int *new_nsid) { struct sk_buff *skb; if (dev->reg_state != NETREG_REGISTERED) return; - skb = rtmsg_ifinfo_build_skb(type, dev, change, event, flags); + skb = rtmsg_ifinfo_build_skb(type, dev, change, event, flags, new_nsid); if (skb) rtmsg_ifinfo_send(skb, dev, flags); } @@ -2942,10 +2948,17 @@ static void rtmsg_ifinfo_event(int type, struct net_device *dev, void rtmsg_ifinfo(int type, struct net_device *dev, unsigned int change, gfp_t flags) { - rtmsg_ifinfo_event(type, dev, change, rtnl_get_event(0), flags); + rtmsg_ifinfo_event(type, dev, change, rtnl_get_event(0), flags, NULL); } EXPORT_SYMBOL(rtmsg_ifinfo); +void rtmsg_ifinfo_newnet(int type, struct net_device *dev, unsigned int change, + gfp_t flags, int *new_nsid) +{ + rtmsg_ifinfo_event(type, dev, change, rtnl_get_event(0), flags, + new_nsid); +} + static int nlmsg_populate_fdb_fill(struct sk_buff *skb, struct net_device *dev, u8 *addr, u16 vid, u32 pid, u32 seq, @@ -4321,7 +4334,7 @@ static int rtnetlink_event(struct notifier_block *this, unsigned long event, voi case NETDEV_RESEND_IGMP: case NETDEV_CHANGEINFODATA: rtmsg_ifinfo_event(RTM_NEWLINK, dev, 0, rtnl_get_event(event), - GFP_KERNEL); + GFP_KERNEL, NULL); break; default: break; -- cgit v1.2.3 From 51d0c04795a4b5d9a188336884887a9d394a94b0 Mon Sep 17 00:00:00 2001 From: David Ahern Date: Wed, 4 Oct 2017 17:48:45 -0700 Subject: net: Add extack to netdev_notifier_info Add netlink_ext_ack to netdev_notifier_info to allow notifier handlers to return errors to userspace. Clean up the initialization in dev.c such that extack is easily added in subsequent patches where relevant. Specifically, remove the init call in call_netdevice_notifiers_info and have callers initalize on stack when info is declared. Signed-off-by: David Ahern Signed-off-by: David S. Miller --- include/linux/netdevice.h | 10 +++++- net/core/dev.c | 79 ++++++++++++++++++++++++++++------------------- 2 files changed, 56 insertions(+), 33 deletions(-) diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index d04424cfffba..05fcaba4b0d9 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -2309,7 +2309,8 @@ int register_netdevice_notifier(struct notifier_block *nb); int unregister_netdevice_notifier(struct notifier_block *nb); struct netdev_notifier_info { - struct net_device *dev; + struct net_device *dev; + struct netlink_ext_ack *extack; }; struct netdev_notifier_change_info { @@ -2334,6 +2335,7 @@ static inline void netdev_notifier_info_init(struct netdev_notifier_info *info, struct net_device *dev) { info->dev = dev; + info->extack = NULL; } static inline struct net_device * @@ -2342,6 +2344,12 @@ netdev_notifier_info_to_dev(const struct netdev_notifier_info *info) return info->dev; } +static inline struct netlink_ext_ack * +netdev_notifier_info_to_extack(const struct netdev_notifier_info *info) +{ + return info->extack; +} + int call_netdevice_notifiers(unsigned long val, struct net_device *dev); diff --git a/net/core/dev.c b/net/core/dev.c index bffc75429184..e27a6bc0ac4d 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -163,7 +163,6 @@ static struct list_head offload_base __read_mostly; static int netif_rx_internal(struct sk_buff *skb); static int call_netdevice_notifiers_info(unsigned long val, - struct net_device *dev, struct netdev_notifier_info *info); static struct napi_struct *napi_by_id(unsigned int napi_id); @@ -1339,10 +1338,11 @@ EXPORT_SYMBOL(netdev_features_change); void netdev_state_change(struct net_device *dev) { if (dev->flags & IFF_UP) { - struct netdev_notifier_change_info change_info; + struct netdev_notifier_change_info change_info = { + .info.dev = dev, + }; - change_info.flags_changed = 0; - call_netdevice_notifiers_info(NETDEV_CHANGE, dev, + call_netdevice_notifiers_info(NETDEV_CHANGE, &change_info.info); rtmsg_ifinfo(RTM_NEWLINK, dev, 0, GFP_KERNEL); } @@ -1563,9 +1563,10 @@ EXPORT_SYMBOL(dev_disable_lro); static int call_netdevice_notifier(struct notifier_block *nb, unsigned long val, struct net_device *dev) { - struct netdev_notifier_info info; + struct netdev_notifier_info info = { + .dev = dev, + }; - netdev_notifier_info_init(&info, dev); return nb->notifier_call(nb, val, &info); } @@ -1690,11 +1691,9 @@ EXPORT_SYMBOL(unregister_netdevice_notifier); */ static int call_netdevice_notifiers_info(unsigned long val, - struct net_device *dev, struct netdev_notifier_info *info) { ASSERT_RTNL(); - netdev_notifier_info_init(info, dev); return raw_notifier_call_chain(&netdev_chain, val, info); } @@ -1709,9 +1708,11 @@ static int call_netdevice_notifiers_info(unsigned long val, int call_netdevice_notifiers(unsigned long val, struct net_device *dev) { - struct netdev_notifier_info info; + struct netdev_notifier_info info = { + .dev = dev, + }; - return call_netdevice_notifiers_info(val, dev, &info); + return call_netdevice_notifiers_info(val, &info); } EXPORT_SYMBOL(call_netdevice_notifiers); @@ -6278,7 +6279,15 @@ static int __netdev_upper_dev_link(struct net_device *dev, struct net_device *upper_dev, bool master, void *upper_priv, void *upper_info) { - struct netdev_notifier_changeupper_info changeupper_info; + struct netdev_notifier_changeupper_info changeupper_info = { + .info = { + .dev = dev, + }, + .upper_dev = upper_dev, + .master = master, + .linking = true, + .upper_info = upper_info, + }; int ret = 0; ASSERT_RTNL(); @@ -6296,12 +6305,7 @@ static int __netdev_upper_dev_link(struct net_device *dev, if (master && netdev_master_upper_dev_get(dev)) return -EBUSY; - changeupper_info.upper_dev = upper_dev; - changeupper_info.master = master; - changeupper_info.linking = true; - changeupper_info.upper_info = upper_info; - - ret = call_netdevice_notifiers_info(NETDEV_PRECHANGEUPPER, dev, + ret = call_netdevice_notifiers_info(NETDEV_PRECHANGEUPPER, &changeupper_info.info); ret = notifier_to_errno(ret); if (ret) @@ -6312,7 +6316,7 @@ static int __netdev_upper_dev_link(struct net_device *dev, if (ret) return ret; - ret = call_netdevice_notifiers_info(NETDEV_CHANGEUPPER, dev, + ret = call_netdevice_notifiers_info(NETDEV_CHANGEUPPER, &changeupper_info.info); ret = notifier_to_errno(ret); if (ret) @@ -6376,20 +6380,24 @@ EXPORT_SYMBOL(netdev_master_upper_dev_link); void netdev_upper_dev_unlink(struct net_device *dev, struct net_device *upper_dev) { - struct netdev_notifier_changeupper_info changeupper_info; + struct netdev_notifier_changeupper_info changeupper_info = { + .info = { + .dev = dev, + }, + .upper_dev = upper_dev, + .linking = false, + }; ASSERT_RTNL(); - changeupper_info.upper_dev = upper_dev; changeupper_info.master = netdev_master_upper_dev_get(dev) == upper_dev; - changeupper_info.linking = false; - call_netdevice_notifiers_info(NETDEV_PRECHANGEUPPER, dev, + call_netdevice_notifiers_info(NETDEV_PRECHANGEUPPER, &changeupper_info.info); __netdev_adjacent_dev_unlink_neighbour(dev, upper_dev); - call_netdevice_notifiers_info(NETDEV_CHANGEUPPER, dev, + call_netdevice_notifiers_info(NETDEV_CHANGEUPPER, &changeupper_info.info); } EXPORT_SYMBOL(netdev_upper_dev_unlink); @@ -6405,11 +6413,13 @@ EXPORT_SYMBOL(netdev_upper_dev_unlink); void netdev_bonding_info_change(struct net_device *dev, struct netdev_bonding_info *bonding_info) { - struct netdev_notifier_bonding_info info; + struct netdev_notifier_bonding_info info = { + .info.dev = dev, + }; memcpy(&info.bonding_info, bonding_info, sizeof(struct netdev_bonding_info)); - call_netdevice_notifiers_info(NETDEV_BONDING_INFO, dev, + call_netdevice_notifiers_info(NETDEV_BONDING_INFO, &info.info); } EXPORT_SYMBOL(netdev_bonding_info_change); @@ -6535,11 +6545,13 @@ EXPORT_SYMBOL(dev_get_nest_level); void netdev_lower_state_changed(struct net_device *lower_dev, void *lower_state_info) { - struct netdev_notifier_changelowerstate_info changelowerstate_info; + struct netdev_notifier_changelowerstate_info changelowerstate_info = { + .info.dev = lower_dev, + }; ASSERT_RTNL(); changelowerstate_info.lower_state_info = lower_state_info; - call_netdevice_notifiers_info(NETDEV_CHANGELOWERSTATE, lower_dev, + call_netdevice_notifiers_info(NETDEV_CHANGELOWERSTATE, &changelowerstate_info.info); } EXPORT_SYMBOL(netdev_lower_state_changed); @@ -6830,11 +6842,14 @@ void __dev_notify_flags(struct net_device *dev, unsigned int old_flags, if (dev->flags & IFF_UP && (changes & ~(IFF_UP | IFF_PROMISC | IFF_ALLMULTI | IFF_VOLATILE))) { - struct netdev_notifier_change_info change_info; - - change_info.flags_changed = changes; - call_netdevice_notifiers_info(NETDEV_CHANGE, dev, - &change_info.info); + struct netdev_notifier_change_info change_info = { + .info = { + .dev = dev, + }, + .flags_changed = changes, + }; + + call_netdevice_notifiers_info(NETDEV_CHANGE, &change_info.info); } } -- cgit v1.2.3 From 33eaf2a6eb48ebf00374aaaf4b1b43f9950dcbe4 Mon Sep 17 00:00:00 2001 From: David Ahern Date: Wed, 4 Oct 2017 17:48:46 -0700 Subject: net: Add extack to ndo_add_slave Pass extack to do_set_master and down to ndo_add_slave Signed-off-by: David Ahern Signed-off-by: David S. Miller --- drivers/net/bonding/bond_main.c | 5 +++-- drivers/net/bonding/bond_options.c | 2 +- drivers/net/team/team.c | 3 ++- drivers/net/vrf.c | 3 ++- include/linux/netdevice.h | 3 ++- include/net/bonding.h | 3 ++- net/batman-adv/soft-interface.c | 3 ++- net/bridge/br_device.c | 3 ++- net/core/rtnetlink.c | 10 ++++++---- 9 files changed, 22 insertions(+), 13 deletions(-) diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index b19dc033fb36..78feb94a36db 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -1328,7 +1328,8 @@ void bond_lower_state_changed(struct slave *slave) } /* enslave device to bond device */ -int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev) +int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev, + struct netlink_ext_ack *extack) { struct bonding *bond = netdev_priv(bond_dev); const struct net_device_ops *slave_ops = slave_dev->netdev_ops; @@ -3492,7 +3493,7 @@ static int bond_do_ioctl(struct net_device *bond_dev, struct ifreq *ifr, int cmd switch (cmd) { case BOND_ENSLAVE_OLD: case SIOCBONDENSLAVE: - res = bond_enslave(bond_dev, slave_dev); + res = bond_enslave(bond_dev, slave_dev, NULL); break; case BOND_RELEASE_OLD: case SIOCBONDRELEASE: diff --git a/drivers/net/bonding/bond_options.c b/drivers/net/bonding/bond_options.c index 5931aa2fe997..8a9b085c2a98 100644 --- a/drivers/net/bonding/bond_options.c +++ b/drivers/net/bonding/bond_options.c @@ -1383,7 +1383,7 @@ static int bond_option_slaves_set(struct bonding *bond, switch (command[0]) { case '+': netdev_dbg(bond->dev, "Adding slave %s\n", dev->name); - ret = bond_enslave(bond->dev, dev); + ret = bond_enslave(bond->dev, dev, NULL); break; case '-': diff --git a/drivers/net/team/team.c b/drivers/net/team/team.c index ae53e899259f..4359d45aa131 100644 --- a/drivers/net/team/team.c +++ b/drivers/net/team/team.c @@ -1914,7 +1914,8 @@ static int team_netpoll_setup(struct net_device *dev, } #endif -static int team_add_slave(struct net_device *dev, struct net_device *port_dev) +static int team_add_slave(struct net_device *dev, struct net_device *port_dev, + struct netlink_ext_ack *extack) { struct team *team = netdev_priv(dev); int err; diff --git a/drivers/net/vrf.c b/drivers/net/vrf.c index cc18b7b11612..4a082ef53533 100644 --- a/drivers/net/vrf.c +++ b/drivers/net/vrf.c @@ -788,7 +788,8 @@ err: return ret; } -static int vrf_add_slave(struct net_device *dev, struct net_device *port_dev) +static int vrf_add_slave(struct net_device *dev, struct net_device *port_dev, + struct netlink_ext_ack *extack) { if (netif_is_l3_master(port_dev) || netif_is_l3_slave(port_dev)) return -EINVAL; diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 05fcaba4b0d9..368a5064a487 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1246,7 +1246,8 @@ struct net_device_ops { u32 flow_id); #endif int (*ndo_add_slave)(struct net_device *dev, - struct net_device *slave_dev); + struct net_device *slave_dev, + struct netlink_ext_ack *extack); int (*ndo_del_slave)(struct net_device *dev, struct net_device *slave_dev); netdev_features_t (*ndo_fix_features)(struct net_device *dev, diff --git a/include/net/bonding.h b/include/net/bonding.h index b2e68657a216..2860cc66c2bb 100644 --- a/include/net/bonding.h +++ b/include/net/bonding.h @@ -596,7 +596,8 @@ void bond_destroy_sysfs(struct bond_net *net); void bond_prepare_sysfs_group(struct bonding *bond); int bond_sysfs_slave_add(struct slave *slave); void bond_sysfs_slave_del(struct slave *slave); -int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev); +int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev, + struct netlink_ext_ack *extack); int bond_release(struct net_device *bond_dev, struct net_device *slave_dev); u32 bond_xmit_hash(struct bonding *bond, struct sk_buff *skb); int bond_set_carrier(struct bonding *bond); diff --git a/net/batman-adv/soft-interface.c b/net/batman-adv/soft-interface.c index c2c986746d0b..e7d5fbb6ad53 100644 --- a/net/batman-adv/soft-interface.c +++ b/net/batman-adv/soft-interface.c @@ -867,7 +867,8 @@ free_bat_counters: * Return: 0 if successful or error otherwise. */ static int batadv_softif_slave_add(struct net_device *dev, - struct net_device *slave_dev) + struct net_device *slave_dev, + struct netlink_ext_ack *extack) { struct batadv_hard_iface *hard_iface; struct net *net = dev_net(dev); diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c index f6b6a92f1c48..cb0131d70ab1 100644 --- a/net/bridge/br_device.c +++ b/net/bridge/br_device.c @@ -320,7 +320,8 @@ void br_netpoll_disable(struct net_bridge_port *p) #endif -static int br_add_slave(struct net_device *dev, struct net_device *slave_dev) +static int br_add_slave(struct net_device *dev, struct net_device *slave_dev, + struct netlink_ext_ack *extack) { struct net_bridge *br = netdev_priv(dev); diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 1ee98b1369d5..c5ee429bcce9 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -1957,7 +1957,8 @@ static int do_setvfinfo(struct net_device *dev, struct nlattr **tb) return err; } -static int do_set_master(struct net_device *dev, int ifindex) +static int do_set_master(struct net_device *dev, int ifindex, + struct netlink_ext_ack *extack) { struct net_device *upper_dev = netdev_master_upper_dev_get(dev); const struct net_device_ops *ops; @@ -1982,7 +1983,7 @@ static int do_set_master(struct net_device *dev, int ifindex) return -EINVAL; ops = upper_dev->netdev_ops; if (ops->ndo_add_slave) { - err = ops->ndo_add_slave(upper_dev, dev); + err = ops->ndo_add_slave(upper_dev, dev, extack); if (err) return err; } else { @@ -2115,7 +2116,7 @@ static int do_setlink(const struct sk_buff *skb, } if (tb[IFLA_MASTER]) { - err = do_set_master(dev, nla_get_u32(tb[IFLA_MASTER])); + err = do_set_master(dev, nla_get_u32(tb[IFLA_MASTER]), extack); if (err) goto errout; status |= DO_SETLINK_MODIFIED; @@ -2753,7 +2754,8 @@ replay: goto out_unregister; } if (tb[IFLA_MASTER]) { - err = do_set_master(dev, nla_get_u32(tb[IFLA_MASTER])); + err = do_set_master(dev, nla_get_u32(tb[IFLA_MASTER]), + extack); if (err) goto out_unregister; } -- cgit v1.2.3 From 42ab19ee90292993370a30ad242599d75a3b749e Mon Sep 17 00:00:00 2001 From: David Ahern Date: Wed, 4 Oct 2017 17:48:47 -0700 Subject: net: Add extack to upper device linking Add extack arg to netdev_upper_dev_link and netdev_master_upper_dev_link Signed-off-by: David Ahern Signed-off-by: David S. Miller --- drivers/net/bonding/bond_main.c | 7 ++++--- drivers/net/ethernet/qualcomm/rmnet/rmnet_config.c | 2 +- drivers/net/hyperv/netvsc_drv.c | 2 +- drivers/net/ipvlan/ipvlan_main.c | 2 +- drivers/net/macsec.c | 2 +- drivers/net/macvlan.c | 7 ++++--- drivers/net/macvtap.c | 2 +- drivers/net/team/team.c | 2 +- drivers/net/usb/qmi_wwan.c | 2 +- drivers/net/vrf.c | 7 ++++--- include/linux/if_macvlan.h | 3 ++- include/linux/netdevice.h | 6 ++++-- net/8021q/vlan.c | 6 +++--- net/8021q/vlan.h | 2 +- net/8021q/vlan_netlink.c | 2 +- net/batman-adv/hard-interface.c | 2 +- net/bridge/br_if.c | 2 +- net/core/dev.c | 15 ++++++++++----- net/openvswitch/vport-netdev.c | 3 ++- 19 files changed, 44 insertions(+), 32 deletions(-) diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index 78feb94a36db..bc92307c2082 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -1217,14 +1217,15 @@ static enum netdev_lag_tx_type bond_lag_tx_type(struct bonding *bond) } } -static int bond_master_upper_dev_link(struct bonding *bond, struct slave *slave) +static int bond_master_upper_dev_link(struct bonding *bond, struct slave *slave, + struct netlink_ext_ack *extack) { struct netdev_lag_upper_info lag_upper_info; int err; lag_upper_info.tx_type = bond_lag_tx_type(bond); err = netdev_master_upper_dev_link(slave->dev, bond->dev, slave, - &lag_upper_info); + &lag_upper_info, extack); if (err) return err; rtmsg_ifinfo(RTM_NEWLINK, slave->dev, IFF_SLAVE, GFP_KERNEL); @@ -1710,7 +1711,7 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev, goto err_detach; } - res = bond_master_upper_dev_link(bond, new_slave); + res = bond_master_upper_dev_link(bond, new_slave, extack); if (res) { netdev_dbg(bond_dev, "Error %d calling bond_master_upper_dev_link\n", res); goto err_unregister; diff --git a/drivers/net/ethernet/qualcomm/rmnet/rmnet_config.c b/drivers/net/ethernet/qualcomm/rmnet/rmnet_config.c index 98f22551eb45..1af326a60cbb 100644 --- a/drivers/net/ethernet/qualcomm/rmnet/rmnet_config.c +++ b/drivers/net/ethernet/qualcomm/rmnet/rmnet_config.c @@ -178,7 +178,7 @@ static int rmnet_newlink(struct net *src_net, struct net_device *dev, if (err) goto err1; - err = netdev_master_upper_dev_link(dev, real_dev, NULL, NULL); + err = netdev_master_upper_dev_link(dev, real_dev, NULL, NULL, extack); if (err) goto err2; diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c index f300ae61c6c6..dfb986421ec6 100644 --- a/drivers/net/hyperv/netvsc_drv.c +++ b/drivers/net/hyperv/netvsc_drv.c @@ -1748,7 +1748,7 @@ static int netvsc_vf_join(struct net_device *vf_netdev, goto rx_handler_failed; } - ret = netdev_upper_dev_link(vf_netdev, ndev); + ret = netdev_upper_dev_link(vf_netdev, ndev, NULL); if (ret != 0) { netdev_err(vf_netdev, "can not set master device %s (err = %d)\n", diff --git a/drivers/net/ipvlan/ipvlan_main.c b/drivers/net/ipvlan/ipvlan_main.c index c74893c1e620..57c3856bab05 100644 --- a/drivers/net/ipvlan/ipvlan_main.c +++ b/drivers/net/ipvlan/ipvlan_main.c @@ -584,7 +584,7 @@ int ipvlan_link_new(struct net *src_net, struct net_device *dev, if (err < 0) goto remove_ida; - err = netdev_upper_dev_link(phy_dev, dev); + err = netdev_upper_dev_link(phy_dev, dev, extack); if (err) { goto unregister_netdev; } diff --git a/drivers/net/macsec.c b/drivers/net/macsec.c index 98e4deaa3a6a..ccbe4eaffe4d 100644 --- a/drivers/net/macsec.c +++ b/drivers/net/macsec.c @@ -3244,7 +3244,7 @@ static int macsec_newlink(struct net *net, struct net_device *dev, &macsec_netdev_addr_lock_key, macsec_get_nest_level(dev)); - err = netdev_upper_dev_link(real_dev, dev); + err = netdev_upper_dev_link(real_dev, dev, extack); if (err < 0) goto unregister; diff --git a/drivers/net/macvlan.c b/drivers/net/macvlan.c index 1ffe77e95d46..858bd66511a2 100644 --- a/drivers/net/macvlan.c +++ b/drivers/net/macvlan.c @@ -1344,7 +1344,8 @@ static int macvlan_changelink_sources(struct macvlan_dev *vlan, u32 mode, } int macvlan_common_newlink(struct net *src_net, struct net_device *dev, - struct nlattr *tb[], struct nlattr *data[]) + struct nlattr *tb[], struct nlattr *data[], + struct netlink_ext_ack *extack) { struct macvlan_dev *vlan = netdev_priv(dev); struct macvlan_port *port; @@ -1433,7 +1434,7 @@ int macvlan_common_newlink(struct net *src_net, struct net_device *dev, goto destroy_macvlan_port; dev->priv_flags |= IFF_MACVLAN; - err = netdev_upper_dev_link(lowerdev, dev); + err = netdev_upper_dev_link(lowerdev, dev, extack); if (err) goto unregister_netdev; @@ -1456,7 +1457,7 @@ static int macvlan_newlink(struct net *src_net, struct net_device *dev, struct nlattr *tb[], struct nlattr *data[], struct netlink_ext_ack *extack) { - return macvlan_common_newlink(src_net, dev, tb, data); + return macvlan_common_newlink(src_net, dev, tb, data, extack); } void macvlan_dellink(struct net_device *dev, struct list_head *head) diff --git a/drivers/net/macvtap.c b/drivers/net/macvtap.c index c2d0ea2fb019..f62aea2fcfa9 100644 --- a/drivers/net/macvtap.c +++ b/drivers/net/macvtap.c @@ -105,7 +105,7 @@ static int macvtap_newlink(struct net *src_net, struct net_device *dev, /* Don't put anything that may fail after macvlan_common_newlink * because we can't undo what it does. */ - err = macvlan_common_newlink(src_net, dev, tb, data); + err = macvlan_common_newlink(src_net, dev, tb, data, extack); if (err) { netdev_rx_handler_unregister(dev); return err; diff --git a/drivers/net/team/team.c b/drivers/net/team/team.c index 4359d45aa131..a468439969df 100644 --- a/drivers/net/team/team.c +++ b/drivers/net/team/team.c @@ -1112,7 +1112,7 @@ static int team_upper_dev_link(struct team *team, struct team_port *port) lag_upper_info.tx_type = team->mode->lag_tx_type; err = netdev_master_upper_dev_link(port->dev, team->dev, NULL, - &lag_upper_info); + &lag_upper_info, NULL); if (err) return err; port->dev->priv_flags |= IFF_TEAM_PORT; diff --git a/drivers/net/usb/qmi_wwan.c b/drivers/net/usb/qmi_wwan.c index 8c3733608271..db7279d5b250 100644 --- a/drivers/net/usb/qmi_wwan.c +++ b/drivers/net/usb/qmi_wwan.c @@ -221,7 +221,7 @@ static int qmimux_register_device(struct net_device *real_dev, u8 mux_id) /* Account for reference in struct qmimux_priv_priv */ dev_hold(real_dev); - err = netdev_upper_dev_link(real_dev, new_dev); + err = netdev_upper_dev_link(real_dev, new_dev, NULL); if (err) goto out_unregister_netdev; diff --git a/drivers/net/vrf.c b/drivers/net/vrf.c index 4a082ef53533..77d0655a0250 100644 --- a/drivers/net/vrf.c +++ b/drivers/net/vrf.c @@ -764,7 +764,8 @@ static void cycle_netdev(struct net_device *dev) } } -static int do_vrf_add_slave(struct net_device *dev, struct net_device *port_dev) +static int do_vrf_add_slave(struct net_device *dev, struct net_device *port_dev, + struct netlink_ext_ack *extack) { int ret; @@ -775,7 +776,7 @@ static int do_vrf_add_slave(struct net_device *dev, struct net_device *port_dev) return -EOPNOTSUPP; port_dev->priv_flags |= IFF_L3MDEV_SLAVE; - ret = netdev_master_upper_dev_link(port_dev, dev, NULL, NULL); + ret = netdev_master_upper_dev_link(port_dev, dev, NULL, NULL, extack); if (ret < 0) goto err; @@ -794,7 +795,7 @@ static int vrf_add_slave(struct net_device *dev, struct net_device *port_dev, if (netif_is_l3_master(port_dev) || netif_is_l3_slave(port_dev)) return -EINVAL; - return do_vrf_add_slave(dev, port_dev); + return do_vrf_add_slave(dev, port_dev, extack); } /* inverse of do_vrf_add_slave */ diff --git a/include/linux/if_macvlan.h b/include/linux/if_macvlan.h index c9ec1343d187..10e319f41fb1 100644 --- a/include/linux/if_macvlan.h +++ b/include/linux/if_macvlan.h @@ -72,7 +72,8 @@ static inline void macvlan_count_rx(const struct macvlan_dev *vlan, extern void macvlan_common_setup(struct net_device *dev); extern int macvlan_common_newlink(struct net *src_net, struct net_device *dev, - struct nlattr *tb[], struct nlattr *data[]); + struct nlattr *tb[], struct nlattr *data[], + struct netlink_ext_ack *extack); extern void macvlan_count_rx(const struct macvlan_dev *vlan, unsigned int len, bool success, diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 368a5064a487..31bb3010c69b 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -3919,10 +3919,12 @@ void *netdev_adjacent_get_private(struct list_head *adj_list); void *netdev_lower_get_first_private_rcu(struct net_device *dev); struct net_device *netdev_master_upper_dev_get(struct net_device *dev); struct net_device *netdev_master_upper_dev_get_rcu(struct net_device *dev); -int netdev_upper_dev_link(struct net_device *dev, struct net_device *upper_dev); +int netdev_upper_dev_link(struct net_device *dev, struct net_device *upper_dev, + struct netlink_ext_ack *extack); int netdev_master_upper_dev_link(struct net_device *dev, struct net_device *upper_dev, - void *upper_priv, void *upper_info); + void *upper_priv, void *upper_info, + struct netlink_ext_ack *extack); void netdev_upper_dev_unlink(struct net_device *dev, struct net_device *upper_dev); void netdev_adjacent_rename_links(struct net_device *dev, char *oldname); diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c index 9649579b5b9f..71c3e045505b 100644 --- a/net/8021q/vlan.c +++ b/net/8021q/vlan.c @@ -138,7 +138,7 @@ int vlan_check_real_dev(struct net_device *real_dev, return 0; } -int register_vlan_dev(struct net_device *dev) +int register_vlan_dev(struct net_device *dev, struct netlink_ext_ack *extack) { struct vlan_dev_priv *vlan = vlan_dev_priv(dev); struct net_device *real_dev = vlan->real_dev; @@ -174,7 +174,7 @@ int register_vlan_dev(struct net_device *dev) if (err < 0) goto out_uninit_mvrp; - err = netdev_upper_dev_link(real_dev, dev); + err = netdev_upper_dev_link(real_dev, dev, extack); if (err) goto out_unregister_netdev; @@ -270,7 +270,7 @@ static int register_vlan_device(struct net_device *real_dev, u16 vlan_id) vlan->flags = VLAN_FLAG_REORDER_HDR; new_dev->rtnl_link_ops = &vlan_link_ops; - err = register_vlan_dev(new_dev); + err = register_vlan_dev(new_dev, NULL); if (err < 0) goto out_free_newdev; diff --git a/net/8021q/vlan.h b/net/8021q/vlan.h index df8bd65dd370..94f8eed9f9b3 100644 --- a/net/8021q/vlan.h +++ b/net/8021q/vlan.h @@ -107,7 +107,7 @@ void vlan_dev_get_realdev_name(const struct net_device *dev, char *result); int vlan_check_real_dev(struct net_device *real_dev, __be16 protocol, u16 vlan_id); void vlan_setup(struct net_device *dev); -int register_vlan_dev(struct net_device *dev); +int register_vlan_dev(struct net_device *dev, struct netlink_ext_ack *extack); void unregister_vlan_dev(struct net_device *dev, struct list_head *head); bool vlan_dev_inherit_address(struct net_device *dev, struct net_device *real_dev); diff --git a/net/8021q/vlan_netlink.c b/net/8021q/vlan_netlink.c index 5e831de3103e..6e7c5a6a7930 100644 --- a/net/8021q/vlan_netlink.c +++ b/net/8021q/vlan_netlink.c @@ -160,7 +160,7 @@ static int vlan_newlink(struct net *src_net, struct net_device *dev, if (err < 0) return err; - return register_vlan_dev(dev); + return register_vlan_dev(dev, extack); } static inline size_t vlan_qos_map_size(unsigned int n) diff --git a/net/batman-adv/hard-interface.c b/net/batman-adv/hard-interface.c index e348f76ea8c1..f7b413b9297e 100644 --- a/net/batman-adv/hard-interface.c +++ b/net/batman-adv/hard-interface.c @@ -738,7 +738,7 @@ int batadv_hardif_enable_interface(struct batadv_hard_iface *hard_iface, bat_priv = netdev_priv(hard_iface->soft_iface); ret = netdev_master_upper_dev_link(hard_iface->net_dev, - soft_iface, NULL, NULL); + soft_iface, NULL, NULL, NULL); if (ret) goto err_dev; diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c index f3aef22931ab..0a3fd727048d 100644 --- a/net/bridge/br_if.c +++ b/net/bridge/br_if.c @@ -540,7 +540,7 @@ int br_add_if(struct net_bridge *br, struct net_device *dev) dev->priv_flags |= IFF_BRIDGE_PORT; - err = netdev_master_upper_dev_link(dev, br->dev, NULL, NULL); + err = netdev_master_upper_dev_link(dev, br->dev, NULL, NULL, NULL); if (err) goto err5; diff --git a/net/core/dev.c b/net/core/dev.c index e27a6bc0ac4d..fcddccb6be41 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -6277,11 +6277,13 @@ static void __netdev_adjacent_dev_unlink_neighbour(struct net_device *dev, static int __netdev_upper_dev_link(struct net_device *dev, struct net_device *upper_dev, bool master, - void *upper_priv, void *upper_info) + void *upper_priv, void *upper_info, + struct netlink_ext_ack *extack) { struct netdev_notifier_changeupper_info changeupper_info = { .info = { .dev = dev, + .extack = extack, }, .upper_dev = upper_dev, .master = master, @@ -6341,9 +6343,11 @@ rollback: * returns zero. */ int netdev_upper_dev_link(struct net_device *dev, - struct net_device *upper_dev) + struct net_device *upper_dev, + struct netlink_ext_ack *extack) { - return __netdev_upper_dev_link(dev, upper_dev, false, NULL, NULL); + return __netdev_upper_dev_link(dev, upper_dev, false, + NULL, NULL, extack); } EXPORT_SYMBOL(netdev_upper_dev_link); @@ -6362,10 +6366,11 @@ EXPORT_SYMBOL(netdev_upper_dev_link); */ int netdev_master_upper_dev_link(struct net_device *dev, struct net_device *upper_dev, - void *upper_priv, void *upper_info) + void *upper_priv, void *upper_info, + struct netlink_ext_ack *extack) { return __netdev_upper_dev_link(dev, upper_dev, true, - upper_priv, upper_info); + upper_priv, upper_info, extack); } EXPORT_SYMBOL(netdev_master_upper_dev_link); diff --git a/net/openvswitch/vport-netdev.c b/net/openvswitch/vport-netdev.c index 0389398fa4ab..2e5e7a41d8ef 100644 --- a/net/openvswitch/vport-netdev.c +++ b/net/openvswitch/vport-netdev.c @@ -108,7 +108,8 @@ struct vport *ovs_netdev_link(struct vport *vport, const char *name) rtnl_lock(); err = netdev_master_upper_dev_link(vport->dev, - get_dpdev(vport->dp), NULL, NULL); + get_dpdev(vport->dp), + NULL, NULL, NULL); if (err) goto error_unlock; -- cgit v1.2.3 From de3baa3ed72f09f8d2ba797645d052cd5f5de27d Mon Sep 17 00:00:00 2001 From: David Ahern Date: Wed, 4 Oct 2017 17:48:48 -0700 Subject: net: vrf: Add extack messages for enslave errors Signed-off-by: David Ahern Signed-off-by: David S. Miller --- drivers/net/vrf.c | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/drivers/net/vrf.c b/drivers/net/vrf.c index 77d0655a0250..0b54f553228e 100644 --- a/drivers/net/vrf.c +++ b/drivers/net/vrf.c @@ -772,8 +772,11 @@ static int do_vrf_add_slave(struct net_device *dev, struct net_device *port_dev, /* do not allow loopback device to be enslaved to a VRF. * The vrf device acts as the loopback for the vrf. */ - if (port_dev == dev_net(dev)->loopback_dev) + if (port_dev == dev_net(dev)->loopback_dev) { + NL_SET_ERR_MSG(extack, + "Can not enslave loopback device to a VRF"); return -EOPNOTSUPP; + } port_dev->priv_flags |= IFF_L3MDEV_SLAVE; ret = netdev_master_upper_dev_link(port_dev, dev, NULL, NULL, extack); @@ -792,7 +795,13 @@ err: static int vrf_add_slave(struct net_device *dev, struct net_device *port_dev, struct netlink_ext_ack *extack) { - if (netif_is_l3_master(port_dev) || netif_is_l3_slave(port_dev)) + if (netif_is_l3_master(port_dev)) { + NL_SET_ERR_MSG(extack, + "Can not enslave an L3 master device to a VRF"); + return -EINVAL; + } + + if (netif_is_l3_slave(port_dev)) return -EINVAL; return do_vrf_add_slave(dev, port_dev, extack); -- cgit v1.2.3 From 759088bda21f4887c645579418e8b478eb570d78 Mon Sep 17 00:00:00 2001 From: David Ahern Date: Wed, 4 Oct 2017 17:48:49 -0700 Subject: net: bonding: Add extack messages for some enslave failures A number of bond_enslave errors are logged using the netdev_err API. Return those messages to userspace via the extack facility. Signed-off-by: David Ahern Signed-off-by: David S. Miller --- drivers/net/bonding/bond_main.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index bc92307c2082..172eeeb68152 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -1348,12 +1348,14 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev, /* already in-use? */ if (netdev_is_rx_handler_busy(slave_dev)) { + NL_SET_ERR_MSG(extack, "Device is in use and cannot be enslaved"); netdev_err(bond_dev, "Error: Device is in use and cannot be enslaved\n"); return -EBUSY; } if (bond_dev == slave_dev) { + NL_SET_ERR_MSG(extack, "Cannot enslave bond to itself."); netdev_err(bond_dev, "cannot enslave bond to itself.\n"); return -EPERM; } @@ -1364,6 +1366,7 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev, netdev_dbg(bond_dev, "%s is NETIF_F_VLAN_CHALLENGED\n", slave_dev->name); if (vlan_uses_dev(bond_dev)) { + NL_SET_ERR_MSG(extack, "Can not enslave VLAN challenged device to VLAN enabled bond"); netdev_err(bond_dev, "Error: cannot enslave VLAN challenged slave %s on VLAN enabled bond %s\n", slave_dev->name, bond_dev->name); return -EPERM; @@ -1383,6 +1386,7 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev, * enslaving it; the old ifenslave will not. */ if (slave_dev->flags & IFF_UP) { + NL_SET_ERR_MSG(extack, "Device can not be enslaved while up"); netdev_err(bond_dev, "%s is up - this may be due to an out of date ifenslave\n", slave_dev->name); return -EPERM; @@ -1423,6 +1427,7 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev, bond_dev); } } else if (bond_dev->type != slave_dev->type) { + NL_SET_ERR_MSG(extack, "Device type is different from other slaves"); netdev_err(bond_dev, "%s ether type (%d) is different from other slaves (%d), can not enslave it\n", slave_dev->name, slave_dev->type, bond_dev->type); return -EINVAL; @@ -1430,6 +1435,7 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev, if (slave_dev->type == ARPHRD_INFINIBAND && BOND_MODE(bond) != BOND_MODE_ACTIVEBACKUP) { + NL_SET_ERR_MSG(extack, "Only active-backup mode is supported for infiniband slaves"); netdev_warn(bond_dev, "Type (%d) supports only active-backup mode\n", slave_dev->type); res = -EOPNOTSUPP; @@ -1445,6 +1451,7 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev, bond->params.fail_over_mac = BOND_FOM_ACTIVE; netdev_warn(bond_dev, "Setting fail_over_mac to active for active-backup mode\n"); } else { + NL_SET_ERR_MSG(extack, "Slave device does not support setting the MAC address, but fail_over_mac is not set to active"); netdev_err(bond_dev, "The slave device specified does not support setting the MAC address, but fail_over_mac is not set to active\n"); res = -EOPNOTSUPP; goto err_undo_flags; -- cgit v1.2.3 From ca752be006013ac6f19721280f44c91ef07ad3d1 Mon Sep 17 00:00:00 2001 From: David Ahern Date: Wed, 4 Oct 2017 17:48:50 -0700 Subject: net: bridge: Pass extack to down to netdev_master_upper_dev_link Pass extack arg to br_add_if. Add messages for a couple of failures and pass arg to netdev_master_upper_dev_link. Signed-off-by: David Ahern Acked-by: Stephen Hemminger Signed-off-by: David S. Miller --- net/bridge/br_device.c | 2 +- net/bridge/br_if.c | 15 +++++++++++---- net/bridge/br_ioctl.c | 2 +- net/bridge/br_private.h | 3 ++- 4 files changed, 15 insertions(+), 7 deletions(-) diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c index cb0131d70ab1..7acb77c9bd65 100644 --- a/net/bridge/br_device.c +++ b/net/bridge/br_device.c @@ -326,7 +326,7 @@ static int br_add_slave(struct net_device *dev, struct net_device *slave_dev, { struct net_bridge *br = netdev_priv(dev); - return br_add_if(br, slave_dev); + return br_add_if(br, slave_dev, extack); } static int br_del_slave(struct net_device *dev, struct net_device *slave_dev) diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c index 0a3fd727048d..59a74a414e20 100644 --- a/net/bridge/br_if.c +++ b/net/bridge/br_if.c @@ -480,7 +480,8 @@ netdev_features_t br_features_recompute(struct net_bridge *br, } /* called with RTNL */ -int br_add_if(struct net_bridge *br, struct net_device *dev) +int br_add_if(struct net_bridge *br, struct net_device *dev, + struct netlink_ext_ack *extack) { struct net_bridge_port *p; int err = 0; @@ -500,16 +501,22 @@ int br_add_if(struct net_bridge *br, struct net_device *dev) return -EINVAL; /* No bridging of bridges */ - if (dev->netdev_ops->ndo_start_xmit == br_dev_xmit) + if (dev->netdev_ops->ndo_start_xmit == br_dev_xmit) { + NL_SET_ERR_MSG(extack, + "Can not enslave a bridge to a bridge"); return -ELOOP; + } /* Device is already being bridged */ if (br_port_exists(dev)) return -EBUSY; /* No bridging devices that dislike that (e.g. wireless) */ - if (dev->priv_flags & IFF_DONT_BRIDGE) + if (dev->priv_flags & IFF_DONT_BRIDGE) { + NL_SET_ERR_MSG(extack, + "Device does not allow enslaving to a bridge"); return -EOPNOTSUPP; + } p = new_nbp(br, dev); if (IS_ERR(p)) @@ -540,7 +547,7 @@ int br_add_if(struct net_bridge *br, struct net_device *dev) dev->priv_flags |= IFF_BRIDGE_PORT; - err = netdev_master_upper_dev_link(dev, br->dev, NULL, NULL, NULL); + err = netdev_master_upper_dev_link(dev, br->dev, NULL, NULL, extack); if (err) goto err5; diff --git a/net/bridge/br_ioctl.c b/net/bridge/br_ioctl.c index 66cd98772051..8f29103935a3 100644 --- a/net/bridge/br_ioctl.c +++ b/net/bridge/br_ioctl.c @@ -98,7 +98,7 @@ static int add_del_if(struct net_bridge *br, int ifindex, int isadd) return -EINVAL; if (isadd) - ret = br_add_if(br, dev); + ret = br_add_if(br, dev, NULL); else ret = br_del_if(br, dev); diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h index 020c709a017f..ab4df24f7bba 100644 --- a/net/bridge/br_private.h +++ b/net/bridge/br_private.h @@ -566,7 +566,8 @@ void br_flood(struct net_bridge *br, struct sk_buff *skb, void br_port_carrier_check(struct net_bridge_port *p); int br_add_bridge(struct net *net, const char *name); int br_del_bridge(struct net *net, const char *name); -int br_add_if(struct net_bridge *br, struct net_device *dev); +int br_add_if(struct net_bridge *br, struct net_device *dev, + struct netlink_ext_ack *extack); int br_del_if(struct net_bridge *br, struct net_device *dev); int br_min_mtu(const struct net_bridge *br); netdev_features_t br_features_recompute(struct net_bridge *br, -- cgit v1.2.3 From e58376e1df2aaffbf12753959142a50f824c46ea Mon Sep 17 00:00:00 2001 From: David Ahern Date: Wed, 4 Oct 2017 17:48:51 -0700 Subject: mlxsw: spectrum: Add extack messages for enslave failures mlxsw fails device enslavement for a number of reasons. Use the extack facility to return an error message to the user stating why the enslave is failing. Messages are prefixed with "spectrum" so users know it is a constraint imposed by the hardware driver. For example: $ ip li add br0.11 link br0 type vlan id 11 $ ip li set swp11 master br0 Error: spectrum: Enslaving a port to a device that already has an upper device is not supported. Signed-off-by: David Ahern Reviewed-by: Ido Schimmel Tested-by: Ido Schimmel Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlxsw/spectrum.c | 47 ++++++++++++++++++++------ 1 file changed, 37 insertions(+), 10 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c index 3adf237c951a..5cd4df08ce97 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c @@ -4019,14 +4019,21 @@ static int mlxsw_sp_lag_index_get(struct mlxsw_sp *mlxsw_sp, static bool mlxsw_sp_master_lag_check(struct mlxsw_sp *mlxsw_sp, struct net_device *lag_dev, - struct netdev_lag_upper_info *lag_upper_info) + struct netdev_lag_upper_info *lag_upper_info, + struct netlink_ext_ack *extack) { u16 lag_id; - if (mlxsw_sp_lag_index_get(mlxsw_sp, lag_dev, &lag_id) != 0) + if (mlxsw_sp_lag_index_get(mlxsw_sp, lag_dev, &lag_id) != 0) { + NL_SET_ERR_MSG(extack, + "spectrum: Exceeded number of supported LAG devices"); return false; - if (lag_upper_info->tx_type != NETDEV_LAG_TX_TYPE_HASH) + } + if (lag_upper_info->tx_type != NETDEV_LAG_TX_TYPE_HASH) { + NL_SET_ERR_MSG(extack, + "spectrum: LAG device using unsupported Tx type"); return false; + } return true; } @@ -4231,6 +4238,7 @@ static int mlxsw_sp_netdevice_port_upper_event(struct net_device *lower_dev, { struct netdev_notifier_changeupper_info *info; struct mlxsw_sp_port *mlxsw_sp_port; + struct netlink_ext_ack *extack; struct net_device *upper_dev; struct mlxsw_sp *mlxsw_sp; int err = 0; @@ -4238,6 +4246,7 @@ static int mlxsw_sp_netdevice_port_upper_event(struct net_device *lower_dev, mlxsw_sp_port = netdev_priv(dev); mlxsw_sp = mlxsw_sp_port->mlxsw_sp; info = ptr; + extack = netdev_notifier_info_to_extack(&info->info); switch (event) { case NETDEV_PRECHANGEUPPER: @@ -4245,25 +4254,43 @@ static int mlxsw_sp_netdevice_port_upper_event(struct net_device *lower_dev, if (!is_vlan_dev(upper_dev) && !netif_is_lag_master(upper_dev) && !netif_is_bridge_master(upper_dev) && - !netif_is_ovs_master(upper_dev)) + !netif_is_ovs_master(upper_dev)) { + NL_SET_ERR_MSG(extack, + "spectrum: Unknown upper device type"); return -EINVAL; + } if (!info->linking) break; - if (netdev_has_any_upper_dev(upper_dev)) + if (netdev_has_any_upper_dev(upper_dev)) { + NL_SET_ERR_MSG(extack, + "spectrum: Enslaving a port to a device that already has an upper device is not supported"); return -EINVAL; + } if (netif_is_lag_master(upper_dev) && !mlxsw_sp_master_lag_check(mlxsw_sp, upper_dev, - info->upper_info)) + info->upper_info, extack)) return -EINVAL; - if (netif_is_lag_master(upper_dev) && vlan_uses_dev(dev)) + if (netif_is_lag_master(upper_dev) && vlan_uses_dev(dev)) { + NL_SET_ERR_MSG(extack, + "spectrum: Master device is a LAG master and this device has a VLAN"); return -EINVAL; + } if (netif_is_lag_port(dev) && is_vlan_dev(upper_dev) && - !netif_is_lag_master(vlan_dev_real_dev(upper_dev))) + !netif_is_lag_master(vlan_dev_real_dev(upper_dev))) { + NL_SET_ERR_MSG(extack, + "spectrum: Can not put a VLAN on a LAG port"); return -EINVAL; - if (netif_is_ovs_master(upper_dev) && vlan_uses_dev(dev)) + } + if (netif_is_ovs_master(upper_dev) && vlan_uses_dev(dev)) { + NL_SET_ERR_MSG(extack, + "spectrum: Master device is an OVS master and this device has a VLAN"); return -EINVAL; - if (netif_is_ovs_port(dev) && is_vlan_dev(upper_dev)) + } + if (netif_is_ovs_port(dev) && is_vlan_dev(upper_dev)) { + NL_SET_ERR_MSG(extack, + "spectrum: Can not put a VLAN on an OVS port"); return -EINVAL; + } break; case NETDEV_CHANGEUPPER: upper_dev = info->upper_dev; -- cgit v1.2.3 From a92bb546cff0b31d96d5919ebfeda9c678756b42 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Wed, 4 Oct 2017 20:10:03 -0700 Subject: tools: rename tools/net directory to tools/bpf We currently only have BPF tools in the tools/net directory. We are about to add more BPF tools there, not necessarily networking related, rename the directory and related Makefile targets to bpf. Suggested-by: Daniel Borkmann Signed-off-by: Jakub Kicinski Reviewed-by: Simon Horman Acked-by: Daniel Borkmann Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- MAINTAINERS | 3 +- tools/Makefile | 14 +- tools/bpf/Makefile | 35 ++ tools/bpf/bpf_asm.c | 52 ++ tools/bpf/bpf_dbg.c | 1395 ++++++++++++++++++++++++++++++++++++++++++++ tools/bpf/bpf_exp.l | 198 +++++++ tools/bpf/bpf_exp.y | 656 +++++++++++++++++++++ tools/bpf/bpf_jit_disasm.c | 320 ++++++++++ tools/net/Makefile | 35 -- tools/net/bpf_asm.c | 52 -- tools/net/bpf_dbg.c | 1395 -------------------------------------------- tools/net/bpf_exp.l | 198 ------- tools/net/bpf_exp.y | 656 --------------------- tools/net/bpf_jit_disasm.c | 320 ---------- 14 files changed, 2664 insertions(+), 2665 deletions(-) create mode 100644 tools/bpf/Makefile create mode 100644 tools/bpf/bpf_asm.c create mode 100644 tools/bpf/bpf_dbg.c create mode 100644 tools/bpf/bpf_exp.l create mode 100644 tools/bpf/bpf_exp.y create mode 100644 tools/bpf/bpf_jit_disasm.c delete mode 100644 tools/net/Makefile delete mode 100644 tools/net/bpf_asm.c delete mode 100644 tools/net/bpf_dbg.c delete mode 100644 tools/net/bpf_exp.l delete mode 100644 tools/net/bpf_exp.y delete mode 100644 tools/net/bpf_jit_disasm.c diff --git a/MAINTAINERS b/MAINTAINERS index 5231392cf4bd..004816a585b8 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -2725,7 +2725,7 @@ F: net/core/filter.c F: net/sched/act_bpf.c F: net/sched/cls_bpf.c F: samples/bpf/ -F: tools/net/bpf* +F: tools/bpf/ F: tools/testing/selftests/bpf/ BROADCOM B44 10/100 ETHERNET DRIVER @@ -9416,7 +9416,6 @@ F: include/uapi/linux/in.h F: include/uapi/linux/net.h F: include/uapi/linux/netdevice.h F: include/uapi/linux/net_namespace.h -F: tools/net/ F: tools/testing/selftests/net/ F: lib/random32.c diff --git a/tools/Makefile b/tools/Makefile index 9dfede37c8ff..df6fcb293fbc 100644 --- a/tools/Makefile +++ b/tools/Makefile @@ -19,7 +19,7 @@ help: @echo ' kvm_stat - top-like utility for displaying kvm statistics' @echo ' leds - LEDs tools' @echo ' liblockdep - user-space wrapper for kernel locking-validator' - @echo ' net - misc networking tools' + @echo ' bpf - misc BPF tools' @echo ' perf - Linux performance measurement and analysis tool' @echo ' selftests - various kernel selftests' @echo ' spi - spi tools' @@ -57,7 +57,7 @@ acpi: FORCE cpupower: FORCE $(call descend,power/$@) -cgroup firewire hv guest spi usb virtio vm net iio gpio objtool leds: FORCE +cgroup firewire hv guest spi usb virtio vm bpf iio gpio objtool leds: FORCE $(call descend,$@) liblockdep: FORCE @@ -91,7 +91,7 @@ kvm_stat: FORCE all: acpi cgroup cpupower gpio hv firewire liblockdep \ perf selftests spi turbostat usb \ - virtio vm net x86_energy_perf_policy \ + virtio vm bpf x86_energy_perf_policy \ tmon freefall iio objtool kvm_stat acpi_install: @@ -100,7 +100,7 @@ acpi_install: cpupower_install: $(call descend,power/$(@:_install=),install) -cgroup_install firewire_install gpio_install hv_install iio_install perf_install spi_install usb_install virtio_install vm_install net_install objtool_install: +cgroup_install firewire_install gpio_install hv_install iio_install perf_install spi_install usb_install virtio_install vm_install bpf_install objtool_install: $(call descend,$(@:_install=),install) liblockdep_install: @@ -124,7 +124,7 @@ kvm_stat_install: install: acpi_install cgroup_install cpupower_install gpio_install \ hv_install firewire_install iio_install liblockdep_install \ perf_install selftests_install turbostat_install usb_install \ - virtio_install vm_install net_install x86_energy_perf_policy_install \ + virtio_install vm_install bpf_install x86_energy_perf_policy_install \ tmon_install freefall_install objtool_install kvm_stat_install acpi_clean: @@ -133,7 +133,7 @@ acpi_clean: cpupower_clean: $(call descend,power/cpupower,clean) -cgroup_clean hv_clean firewire_clean spi_clean usb_clean virtio_clean vm_clean net_clean iio_clean gpio_clean objtool_clean leds_clean: +cgroup_clean hv_clean firewire_clean spi_clean usb_clean virtio_clean vm_clean bpf_clean iio_clean gpio_clean objtool_clean leds_clean: $(call descend,$(@:_clean=),clean) liblockdep_clean: @@ -169,7 +169,7 @@ build_clean: clean: acpi_clean cgroup_clean cpupower_clean hv_clean firewire_clean \ perf_clean selftests_clean turbostat_clean spi_clean usb_clean virtio_clean \ - vm_clean net_clean iio_clean x86_energy_perf_policy_clean tmon_clean \ + vm_clean bpf_clean iio_clean x86_energy_perf_policy_clean tmon_clean \ freefall_clean build_clean libbpf_clean libsubcmd_clean liblockdep_clean \ gpio_clean objtool_clean leds_clean diff --git a/tools/bpf/Makefile b/tools/bpf/Makefile new file mode 100644 index 000000000000..ddf888010652 --- /dev/null +++ b/tools/bpf/Makefile @@ -0,0 +1,35 @@ +prefix = /usr + +CC = gcc +LEX = flex +YACC = bison + +CFLAGS += -Wall -O2 +CFLAGS += -D__EXPORTED_HEADERS__ -I../../include/uapi -I../../include + +%.yacc.c: %.y + $(YACC) -o $@ -d $< + +%.lex.c: %.l + $(LEX) -o $@ $< + +all : bpf_jit_disasm bpf_dbg bpf_asm + +bpf_jit_disasm : CFLAGS += -DPACKAGE='bpf_jit_disasm' +bpf_jit_disasm : LDLIBS = -lopcodes -lbfd -ldl +bpf_jit_disasm : bpf_jit_disasm.o + +bpf_dbg : LDLIBS = -lreadline +bpf_dbg : bpf_dbg.o + +bpf_asm : LDLIBS = +bpf_asm : bpf_asm.o bpf_exp.yacc.o bpf_exp.lex.o +bpf_exp.lex.o : bpf_exp.yacc.c + +clean : + rm -rf *.o bpf_jit_disasm bpf_dbg bpf_asm bpf_exp.yacc.* bpf_exp.lex.* + +install : + install bpf_jit_disasm $(prefix)/bin/bpf_jit_disasm + install bpf_dbg $(prefix)/bin/bpf_dbg + install bpf_asm $(prefix)/bin/bpf_asm diff --git a/tools/bpf/bpf_asm.c b/tools/bpf/bpf_asm.c new file mode 100644 index 000000000000..c15aef097b04 --- /dev/null +++ b/tools/bpf/bpf_asm.c @@ -0,0 +1,52 @@ +/* + * Minimal BPF assembler + * + * Instead of libpcap high-level filter expressions, it can be quite + * useful to define filters in low-level BPF assembler (that is kept + * close to Steven McCanne and Van Jacobson's original BPF paper). + * In particular for BPF JIT implementors, JIT security auditors, or + * just for defining BPF expressions that contain extensions which are + * not supported by compilers. + * + * How to get into it: + * + * 1) read Documentation/networking/filter.txt + * 2) Run `bpf_asm [-c] ` to translate into binary + * blob that is loadable with xt_bpf, cls_bpf et al. Note: -c will + * pretty print a C-like construct. + * + * Copyright 2013 Daniel Borkmann + * Licensed under the GNU General Public License, version 2.0 (GPLv2) + */ + +#include +#include +#include + +extern void bpf_asm_compile(FILE *fp, bool cstyle); + +int main(int argc, char **argv) +{ + FILE *fp = stdin; + bool cstyle = false; + int i; + + for (i = 1; i < argc; i++) { + if (!strncmp("-c", argv[i], 2)) { + cstyle = true; + continue; + } + + fp = fopen(argv[i], "r"); + if (!fp) { + fp = stdin; + continue; + } + + break; + } + + bpf_asm_compile(fp, cstyle); + + return 0; +} diff --git a/tools/bpf/bpf_dbg.c b/tools/bpf/bpf_dbg.c new file mode 100644 index 000000000000..4f254bcc4423 --- /dev/null +++ b/tools/bpf/bpf_dbg.c @@ -0,0 +1,1395 @@ +/* + * Minimal BPF debugger + * + * Minimal BPF debugger that mimics the kernel's engine (w/o extensions) + * and allows for single stepping through selected packets from a pcap + * with a provided user filter in order to facilitate verification of a + * BPF program. Besides others, this is useful to verify BPF programs + * before attaching to a live system, and can be used in socket filters, + * cls_bpf, xt_bpf, team driver and e.g. PTP code; in particular when a + * single more complex BPF program is being used. Reasons for a more + * complex BPF program are likely primarily to optimize execution time + * for making a verdict when multiple simple BPF programs are combined + * into one in order to prevent parsing same headers multiple times. + * + * More on how to debug BPF opcodes see Documentation/networking/filter.txt + * which is the main document on BPF. Mini howto for getting started: + * + * 1) `./bpf_dbg` to enter the shell (shell cmds denoted with '>'): + * 2) > load bpf 6,40 0 0 12,21 0 3 20... (output from `bpf_asm` or + * `tcpdump -iem1 -ddd port 22 | tr '\n' ','` to load as filter) + * 3) > load pcap foo.pcap + * 4) > run /disassemble/dump/quit (self-explanatory) + * 5) > breakpoint 2 (sets bp at loaded BPF insns 2, do `run` then; + * multiple bps can be set, of course, a call to `breakpoint` + * w/o args shows currently loaded bps, `breakpoint reset` for + * resetting all breakpoints) + * 6) > select 3 (`run` etc will start from the 3rd packet in the pcap) + * 7) > step [-, +] (performs single stepping through the BPF) + * + * Copyright 2013 Daniel Borkmann + * Licensed under the GNU General Public License, version 2.0 (GPLv2) + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define TCPDUMP_MAGIC 0xa1b2c3d4 + +#define BPF_LDX_B (BPF_LDX | BPF_B) +#define BPF_LDX_W (BPF_LDX | BPF_W) +#define BPF_JMP_JA (BPF_JMP | BPF_JA) +#define BPF_JMP_JEQ (BPF_JMP | BPF_JEQ) +#define BPF_JMP_JGT (BPF_JMP | BPF_JGT) +#define BPF_JMP_JGE (BPF_JMP | BPF_JGE) +#define BPF_JMP_JSET (BPF_JMP | BPF_JSET) +#define BPF_ALU_ADD (BPF_ALU | BPF_ADD) +#define BPF_ALU_SUB (BPF_ALU | BPF_SUB) +#define BPF_ALU_MUL (BPF_ALU | BPF_MUL) +#define BPF_ALU_DIV (BPF_ALU | BPF_DIV) +#define BPF_ALU_MOD (BPF_ALU | BPF_MOD) +#define BPF_ALU_NEG (BPF_ALU | BPF_NEG) +#define BPF_ALU_AND (BPF_ALU | BPF_AND) +#define BPF_ALU_OR (BPF_ALU | BPF_OR) +#define BPF_ALU_XOR (BPF_ALU | BPF_XOR) +#define BPF_ALU_LSH (BPF_ALU | BPF_LSH) +#define BPF_ALU_RSH (BPF_ALU | BPF_RSH) +#define BPF_MISC_TAX (BPF_MISC | BPF_TAX) +#define BPF_MISC_TXA (BPF_MISC | BPF_TXA) +#define BPF_LD_B (BPF_LD | BPF_B) +#define BPF_LD_H (BPF_LD | BPF_H) +#define BPF_LD_W (BPF_LD | BPF_W) + +#ifndef array_size +# define array_size(x) (sizeof(x) / sizeof((x)[0])) +#endif + +#ifndef __check_format_printf +# define __check_format_printf(pos_fmtstr, pos_fmtargs) \ + __attribute__ ((format (printf, (pos_fmtstr), (pos_fmtargs)))) +#endif + +enum { + CMD_OK, + CMD_ERR, + CMD_EX, +}; + +struct shell_cmd { + const char *name; + int (*func)(char *args); +}; + +struct pcap_filehdr { + uint32_t magic; + uint16_t version_major; + uint16_t version_minor; + int32_t thiszone; + uint32_t sigfigs; + uint32_t snaplen; + uint32_t linktype; +}; + +struct pcap_timeval { + int32_t tv_sec; + int32_t tv_usec; +}; + +struct pcap_pkthdr { + struct pcap_timeval ts; + uint32_t caplen; + uint32_t len; +}; + +struct bpf_regs { + uint32_t A; + uint32_t X; + uint32_t M[BPF_MEMWORDS]; + uint32_t R; + bool Rs; + uint16_t Pc; +}; + +static struct sock_filter bpf_image[BPF_MAXINSNS + 1]; +static unsigned int bpf_prog_len; + +static int bpf_breakpoints[64]; +static struct bpf_regs bpf_regs[BPF_MAXINSNS + 1]; +static struct bpf_regs bpf_curr; +static unsigned int bpf_regs_len; + +static int pcap_fd = -1; +static unsigned int pcap_packet; +static size_t pcap_map_size; +static char *pcap_ptr_va_start, *pcap_ptr_va_curr; + +static const char * const op_table[] = { + [BPF_ST] = "st", + [BPF_STX] = "stx", + [BPF_LD_B] = "ldb", + [BPF_LD_H] = "ldh", + [BPF_LD_W] = "ld", + [BPF_LDX] = "ldx", + [BPF_LDX_B] = "ldxb", + [BPF_JMP_JA] = "ja", + [BPF_JMP_JEQ] = "jeq", + [BPF_JMP_JGT] = "jgt", + [BPF_JMP_JGE] = "jge", + [BPF_JMP_JSET] = "jset", + [BPF_ALU_ADD] = "add", + [BPF_ALU_SUB] = "sub", + [BPF_ALU_MUL] = "mul", + [BPF_ALU_DIV] = "div", + [BPF_ALU_MOD] = "mod", + [BPF_ALU_NEG] = "neg", + [BPF_ALU_AND] = "and", + [BPF_ALU_OR] = "or", + [BPF_ALU_XOR] = "xor", + [BPF_ALU_LSH] = "lsh", + [BPF_ALU_RSH] = "rsh", + [BPF_MISC_TAX] = "tax", + [BPF_MISC_TXA] = "txa", + [BPF_RET] = "ret", +}; + +static __check_format_printf(1, 2) int rl_printf(const char *fmt, ...) +{ + int ret; + va_list vl; + + va_start(vl, fmt); + ret = vfprintf(rl_outstream, fmt, vl); + va_end(vl); + + return ret; +} + +static int matches(const char *cmd, const char *pattern) +{ + int len = strlen(cmd); + + if (len > strlen(pattern)) + return -1; + + return memcmp(pattern, cmd, len); +} + +static void hex_dump(const uint8_t *buf, size_t len) +{ + int i; + + rl_printf("%3u: ", 0); + for (i = 0; i < len; i++) { + if (i && !(i % 16)) + rl_printf("\n%3u: ", i); + rl_printf("%02x ", buf[i]); + } + rl_printf("\n"); +} + +static bool bpf_prog_loaded(void) +{ + if (bpf_prog_len == 0) + rl_printf("no bpf program loaded!\n"); + + return bpf_prog_len > 0; +} + +static void bpf_disasm(const struct sock_filter f, unsigned int i) +{ + const char *op, *fmt; + int val = f.k; + char buf[256]; + + switch (f.code) { + case BPF_RET | BPF_K: + op = op_table[BPF_RET]; + fmt = "#%#x"; + break; + case BPF_RET | BPF_A: + op = op_table[BPF_RET]; + fmt = "a"; + break; + case BPF_RET | BPF_X: + op = op_table[BPF_RET]; + fmt = "x"; + break; + case BPF_MISC_TAX: + op = op_table[BPF_MISC_TAX]; + fmt = ""; + break; + case BPF_MISC_TXA: + op = op_table[BPF_MISC_TXA]; + fmt = ""; + break; + case BPF_ST: + op = op_table[BPF_ST]; + fmt = "M[%d]"; + break; + case BPF_STX: + op = op_table[BPF_STX]; + fmt = "M[%d]"; + break; + case BPF_LD_W | BPF_ABS: + op = op_table[BPF_LD_W]; + fmt = "[%d]"; + break; + case BPF_LD_H | BPF_ABS: + op = op_table[BPF_LD_H]; + fmt = "[%d]"; + break; + case BPF_LD_B | BPF_ABS: + op = op_table[BPF_LD_B]; + fmt = "[%d]"; + break; + case BPF_LD_W | BPF_LEN: + op = op_table[BPF_LD_W]; + fmt = "#len"; + break; + case BPF_LD_W | BPF_IND: + op = op_table[BPF_LD_W]; + fmt = "[x+%d]"; + break; + case BPF_LD_H | BPF_IND: + op = op_table[BPF_LD_H]; + fmt = "[x+%d]"; + break; + case BPF_LD_B | BPF_IND: + op = op_table[BPF_LD_B]; + fmt = "[x+%d]"; + break; + case BPF_LD | BPF_IMM: + op = op_table[BPF_LD_W]; + fmt = "#%#x"; + break; + case BPF_LDX | BPF_IMM: + op = op_table[BPF_LDX]; + fmt = "#%#x"; + break; + case BPF_LDX_B | BPF_MSH: + op = op_table[BPF_LDX_B]; + fmt = "4*([%d]&0xf)"; + break; + case BPF_LD | BPF_MEM: + op = op_table[BPF_LD_W]; + fmt = "M[%d]"; + break; + case BPF_LDX | BPF_MEM: + op = op_table[BPF_LDX]; + fmt = "M[%d]"; + break; + case BPF_JMP_JA: + op = op_table[BPF_JMP_JA]; + fmt = "%d"; + val = i + 1 + f.k; + break; + case BPF_JMP_JGT | BPF_X: + op = op_table[BPF_JMP_JGT]; + fmt = "x"; + break; + case BPF_JMP_JGT | BPF_K: + op = op_table[BPF_JMP_JGT]; + fmt = "#%#x"; + break; + case BPF_JMP_JGE | BPF_X: + op = op_table[BPF_JMP_JGE]; + fmt = "x"; + break; + case BPF_JMP_JGE | BPF_K: + op = op_table[BPF_JMP_JGE]; + fmt = "#%#x"; + break; + case BPF_JMP_JEQ | BPF_X: + op = op_table[BPF_JMP_JEQ]; + fmt = "x"; + break; + case BPF_JMP_JEQ | BPF_K: + op = op_table[BPF_JMP_JEQ]; + fmt = "#%#x"; + break; + case BPF_JMP_JSET | BPF_X: + op = op_table[BPF_JMP_JSET]; + fmt = "x"; + break; + case BPF_JMP_JSET | BPF_K: + op = op_table[BPF_JMP_JSET]; + fmt = "#%#x"; + break; + case BPF_ALU_NEG: + op = op_table[BPF_ALU_NEG]; + fmt = ""; + break; + case BPF_ALU_LSH | BPF_X: + op = op_table[BPF_ALU_LSH]; + fmt = "x"; + break; + case BPF_ALU_LSH | BPF_K: + op = op_table[BPF_ALU_LSH]; + fmt = "#%d"; + break; + case BPF_ALU_RSH | BPF_X: + op = op_table[BPF_ALU_RSH]; + fmt = "x"; + break; + case BPF_ALU_RSH | BPF_K: + op = op_table[BPF_ALU_RSH]; + fmt = "#%d"; + break; + case BPF_ALU_ADD | BPF_X: + op = op_table[BPF_ALU_ADD]; + fmt = "x"; + break; + case BPF_ALU_ADD | BPF_K: + op = op_table[BPF_ALU_ADD]; + fmt = "#%d"; + break; + case BPF_ALU_SUB | BPF_X: + op = op_table[BPF_ALU_SUB]; + fmt = "x"; + break; + case BPF_ALU_SUB | BPF_K: + op = op_table[BPF_ALU_SUB]; + fmt = "#%d"; + break; + case BPF_ALU_MUL | BPF_X: + op = op_table[BPF_ALU_MUL]; + fmt = "x"; + break; + case BPF_ALU_MUL | BPF_K: + op = op_table[BPF_ALU_MUL]; + fmt = "#%d"; + break; + case BPF_ALU_DIV | BPF_X: + op = op_table[BPF_ALU_DIV]; + fmt = "x"; + break; + case BPF_ALU_DIV | BPF_K: + op = op_table[BPF_ALU_DIV]; + fmt = "#%d"; + break; + case BPF_ALU_MOD | BPF_X: + op = op_table[BPF_ALU_MOD]; + fmt = "x"; + break; + case BPF_ALU_MOD | BPF_K: + op = op_table[BPF_ALU_MOD]; + fmt = "#%d"; + break; + case BPF_ALU_AND | BPF_X: + op = op_table[BPF_ALU_AND]; + fmt = "x"; + break; + case BPF_ALU_AND | BPF_K: + op = op_table[BPF_ALU_AND]; + fmt = "#%#x"; + break; + case BPF_ALU_OR | BPF_X: + op = op_table[BPF_ALU_OR]; + fmt = "x"; + break; + case BPF_ALU_OR | BPF_K: + op = op_table[BPF_ALU_OR]; + fmt = "#%#x"; + break; + case BPF_ALU_XOR | BPF_X: + op = op_table[BPF_ALU_XOR]; + fmt = "x"; + break; + case BPF_ALU_XOR | BPF_K: + op = op_table[BPF_ALU_XOR]; + fmt = "#%#x"; + break; + default: + op = "nosup"; + fmt = "%#x"; + val = f.code; + break; + } + + memset(buf, 0, sizeof(buf)); + snprintf(buf, sizeof(buf), fmt, val); + buf[sizeof(buf) - 1] = 0; + + if ((BPF_CLASS(f.code) == BPF_JMP && BPF_OP(f.code) != BPF_JA)) + rl_printf("l%d:\t%s %s, l%d, l%d\n", i, op, buf, + i + 1 + f.jt, i + 1 + f.jf); + else + rl_printf("l%d:\t%s %s\n", i, op, buf); +} + +static void bpf_dump_curr(struct bpf_regs *r, struct sock_filter *f) +{ + int i, m = 0; + + rl_printf("pc: [%u]\n", r->Pc); + rl_printf("code: [%u] jt[%u] jf[%u] k[%u]\n", + f->code, f->jt, f->jf, f->k); + rl_printf("curr: "); + bpf_disasm(*f, r->Pc); + + if (f->jt || f->jf) { + rl_printf("jt: "); + bpf_disasm(*(f + f->jt + 1), r->Pc + f->jt + 1); + rl_printf("jf: "); + bpf_disasm(*(f + f->jf + 1), r->Pc + f->jf + 1); + } + + rl_printf("A: [%#08x][%u]\n", r->A, r->A); + rl_printf("X: [%#08x][%u]\n", r->X, r->X); + if (r->Rs) + rl_printf("ret: [%#08x][%u]!\n", r->R, r->R); + + for (i = 0; i < BPF_MEMWORDS; i++) { + if (r->M[i]) { + m++; + rl_printf("M[%d]: [%#08x][%u]\n", i, r->M[i], r->M[i]); + } + } + if (m == 0) + rl_printf("M[0,%d]: [%#08x][%u]\n", BPF_MEMWORDS - 1, 0, 0); +} + +static void bpf_dump_pkt(uint8_t *pkt, uint32_t pkt_caplen, uint32_t pkt_len) +{ + if (pkt_caplen != pkt_len) + rl_printf("cap: %u, len: %u\n", pkt_caplen, pkt_len); + else + rl_printf("len: %u\n", pkt_len); + + hex_dump(pkt, pkt_caplen); +} + +static void bpf_disasm_all(const struct sock_filter *f, unsigned int len) +{ + unsigned int i; + + for (i = 0; i < len; i++) + bpf_disasm(f[i], i); +} + +static void bpf_dump_all(const struct sock_filter *f, unsigned int len) +{ + unsigned int i; + + rl_printf("/* { op, jt, jf, k }, */\n"); + for (i = 0; i < len; i++) + rl_printf("{ %#04x, %2u, %2u, %#010x },\n", + f[i].code, f[i].jt, f[i].jf, f[i].k); +} + +static bool bpf_runnable(struct sock_filter *f, unsigned int len) +{ + int sock, ret, i; + struct sock_fprog bpf = { + .filter = f, + .len = len, + }; + + sock = socket(AF_INET, SOCK_DGRAM, 0); + if (sock < 0) { + rl_printf("cannot open socket!\n"); + return false; + } + ret = setsockopt(sock, SOL_SOCKET, SO_ATTACH_FILTER, &bpf, sizeof(bpf)); + close(sock); + if (ret < 0) { + rl_printf("program not allowed to run by kernel!\n"); + return false; + } + for (i = 0; i < len; i++) { + if (BPF_CLASS(f[i].code) == BPF_LD && + f[i].k > SKF_AD_OFF) { + rl_printf("extensions currently not supported!\n"); + return false; + } + } + + return true; +} + +static void bpf_reset_breakpoints(void) +{ + int i; + + for (i = 0; i < array_size(bpf_breakpoints); i++) + bpf_breakpoints[i] = -1; +} + +static void bpf_set_breakpoints(unsigned int where) +{ + int i; + bool set = false; + + for (i = 0; i < array_size(bpf_breakpoints); i++) { + if (bpf_breakpoints[i] == (int) where) { + rl_printf("breakpoint already set!\n"); + set = true; + break; + } + + if (bpf_breakpoints[i] == -1 && set == false) { + bpf_breakpoints[i] = where; + set = true; + } + } + + if (!set) + rl_printf("too many breakpoints set, reset first!\n"); +} + +static void bpf_dump_breakpoints(void) +{ + int i; + + rl_printf("breakpoints: "); + + for (i = 0; i < array_size(bpf_breakpoints); i++) { + if (bpf_breakpoints[i] < 0) + continue; + rl_printf("%d ", bpf_breakpoints[i]); + } + + rl_printf("\n"); +} + +static void bpf_reset(void) +{ + bpf_regs_len = 0; + + memset(bpf_regs, 0, sizeof(bpf_regs)); + memset(&bpf_curr, 0, sizeof(bpf_curr)); +} + +static void bpf_safe_regs(void) +{ + memcpy(&bpf_regs[bpf_regs_len++], &bpf_curr, sizeof(bpf_curr)); +} + +static bool bpf_restore_regs(int off) +{ + unsigned int index = bpf_regs_len - 1 + off; + + if (index == 0) { + bpf_reset(); + return true; + } else if (index < bpf_regs_len) { + memcpy(&bpf_curr, &bpf_regs[index], sizeof(bpf_curr)); + bpf_regs_len = index; + return true; + } else { + rl_printf("reached bottom of register history stack!\n"); + return false; + } +} + +static uint32_t extract_u32(uint8_t *pkt, uint32_t off) +{ + uint32_t r; + + memcpy(&r, &pkt[off], sizeof(r)); + + return ntohl(r); +} + +static uint16_t extract_u16(uint8_t *pkt, uint32_t off) +{ + uint16_t r; + + memcpy(&r, &pkt[off], sizeof(r)); + + return ntohs(r); +} + +static uint8_t extract_u8(uint8_t *pkt, uint32_t off) +{ + return pkt[off]; +} + +static void set_return(struct bpf_regs *r) +{ + r->R = 0; + r->Rs = true; +} + +static void bpf_single_step(struct bpf_regs *r, struct sock_filter *f, + uint8_t *pkt, uint32_t pkt_caplen, + uint32_t pkt_len) +{ + uint32_t K = f->k; + int d; + + switch (f->code) { + case BPF_RET | BPF_K: + r->R = K; + r->Rs = true; + break; + case BPF_RET | BPF_A: + r->R = r->A; + r->Rs = true; + break; + case BPF_RET | BPF_X: + r->R = r->X; + r->Rs = true; + break; + case BPF_MISC_TAX: + r->X = r->A; + break; + case BPF_MISC_TXA: + r->A = r->X; + break; + case BPF_ST: + r->M[K] = r->A; + break; + case BPF_STX: + r->M[K] = r->X; + break; + case BPF_LD_W | BPF_ABS: + d = pkt_caplen - K; + if (d >= sizeof(uint32_t)) + r->A = extract_u32(pkt, K); + else + set_return(r); + break; + case BPF_LD_H | BPF_ABS: + d = pkt_caplen - K; + if (d >= sizeof(uint16_t)) + r->A = extract_u16(pkt, K); + else + set_return(r); + break; + case BPF_LD_B | BPF_ABS: + d = pkt_caplen - K; + if (d >= sizeof(uint8_t)) + r->A = extract_u8(pkt, K); + else + set_return(r); + break; + case BPF_LD_W | BPF_IND: + d = pkt_caplen - (r->X + K); + if (d >= sizeof(uint32_t)) + r->A = extract_u32(pkt, r->X + K); + break; + case BPF_LD_H | BPF_IND: + d = pkt_caplen - (r->X + K); + if (d >= sizeof(uint16_t)) + r->A = extract_u16(pkt, r->X + K); + else + set_return(r); + break; + case BPF_LD_B | BPF_IND: + d = pkt_caplen - (r->X + K); + if (d >= sizeof(uint8_t)) + r->A = extract_u8(pkt, r->X + K); + else + set_return(r); + break; + case BPF_LDX_B | BPF_MSH: + d = pkt_caplen - K; + if (d >= sizeof(uint8_t)) { + r->X = extract_u8(pkt, K); + r->X = (r->X & 0xf) << 2; + } else + set_return(r); + break; + case BPF_LD_W | BPF_LEN: + r->A = pkt_len; + break; + case BPF_LDX_W | BPF_LEN: + r->A = pkt_len; + break; + case BPF_LD | BPF_IMM: + r->A = K; + break; + case BPF_LDX | BPF_IMM: + r->X = K; + break; + case BPF_LD | BPF_MEM: + r->A = r->M[K]; + break; + case BPF_LDX | BPF_MEM: + r->X = r->M[K]; + break; + case BPF_JMP_JA: + r->Pc += K; + break; + case BPF_JMP_JGT | BPF_X: + r->Pc += r->A > r->X ? f->jt : f->jf; + break; + case BPF_JMP_JGT | BPF_K: + r->Pc += r->A > K ? f->jt : f->jf; + break; + case BPF_JMP_JGE | BPF_X: + r->Pc += r->A >= r->X ? f->jt : f->jf; + break; + case BPF_JMP_JGE | BPF_K: + r->Pc += r->A >= K ? f->jt : f->jf; + break; + case BPF_JMP_JEQ | BPF_X: + r->Pc += r->A == r->X ? f->jt : f->jf; + break; + case BPF_JMP_JEQ | BPF_K: + r->Pc += r->A == K ? f->jt : f->jf; + break; + case BPF_JMP_JSET | BPF_X: + r->Pc += r->A & r->X ? f->jt : f->jf; + break; + case BPF_JMP_JSET | BPF_K: + r->Pc += r->A & K ? f->jt : f->jf; + break; + case BPF_ALU_NEG: + r->A = -r->A; + break; + case BPF_ALU_LSH | BPF_X: + r->A <<= r->X; + break; + case BPF_ALU_LSH | BPF_K: + r->A <<= K; + break; + case BPF_ALU_RSH | BPF_X: + r->A >>= r->X; + break; + case BPF_ALU_RSH | BPF_K: + r->A >>= K; + break; + case BPF_ALU_ADD | BPF_X: + r->A += r->X; + break; + case BPF_ALU_ADD | BPF_K: + r->A += K; + break; + case BPF_ALU_SUB | BPF_X: + r->A -= r->X; + break; + case BPF_ALU_SUB | BPF_K: + r->A -= K; + break; + case BPF_ALU_MUL | BPF_X: + r->A *= r->X; + break; + case BPF_ALU_MUL | BPF_K: + r->A *= K; + break; + case BPF_ALU_DIV | BPF_X: + case BPF_ALU_MOD | BPF_X: + if (r->X == 0) { + set_return(r); + break; + } + goto do_div; + case BPF_ALU_DIV | BPF_K: + case BPF_ALU_MOD | BPF_K: + if (K == 0) { + set_return(r); + break; + } +do_div: + switch (f->code) { + case BPF_ALU_DIV | BPF_X: + r->A /= r->X; + break; + case BPF_ALU_DIV | BPF_K: + r->A /= K; + break; + case BPF_ALU_MOD | BPF_X: + r->A %= r->X; + break; + case BPF_ALU_MOD | BPF_K: + r->A %= K; + break; + } + break; + case BPF_ALU_AND | BPF_X: + r->A &= r->X; + break; + case BPF_ALU_AND | BPF_K: + r->A &= K; + break; + case BPF_ALU_OR | BPF_X: + r->A |= r->X; + break; + case BPF_ALU_OR | BPF_K: + r->A |= K; + break; + case BPF_ALU_XOR | BPF_X: + r->A ^= r->X; + break; + case BPF_ALU_XOR | BPF_K: + r->A ^= K; + break; + } +} + +static bool bpf_pc_has_breakpoint(uint16_t pc) +{ + int i; + + for (i = 0; i < array_size(bpf_breakpoints); i++) { + if (bpf_breakpoints[i] < 0) + continue; + if (bpf_breakpoints[i] == pc) + return true; + } + + return false; +} + +static bool bpf_handle_breakpoint(struct bpf_regs *r, struct sock_filter *f, + uint8_t *pkt, uint32_t pkt_caplen, + uint32_t pkt_len) +{ + rl_printf("-- register dump --\n"); + bpf_dump_curr(r, &f[r->Pc]); + rl_printf("-- packet dump --\n"); + bpf_dump_pkt(pkt, pkt_caplen, pkt_len); + rl_printf("(breakpoint)\n"); + return true; +} + +static int bpf_run_all(struct sock_filter *f, uint16_t bpf_len, uint8_t *pkt, + uint32_t pkt_caplen, uint32_t pkt_len) +{ + bool stop = false; + + while (bpf_curr.Rs == false && stop == false) { + bpf_safe_regs(); + + if (bpf_pc_has_breakpoint(bpf_curr.Pc)) + stop = bpf_handle_breakpoint(&bpf_curr, f, pkt, + pkt_caplen, pkt_len); + + bpf_single_step(&bpf_curr, &f[bpf_curr.Pc], pkt, pkt_caplen, + pkt_len); + bpf_curr.Pc++; + } + + return stop ? -1 : bpf_curr.R; +} + +static int bpf_run_stepping(struct sock_filter *f, uint16_t bpf_len, + uint8_t *pkt, uint32_t pkt_caplen, + uint32_t pkt_len, int next) +{ + bool stop = false; + int i = 1; + + while (bpf_curr.Rs == false && stop == false) { + bpf_safe_regs(); + + if (i++ == next) + stop = bpf_handle_breakpoint(&bpf_curr, f, pkt, + pkt_caplen, pkt_len); + + bpf_single_step(&bpf_curr, &f[bpf_curr.Pc], pkt, pkt_caplen, + pkt_len); + bpf_curr.Pc++; + } + + return stop ? -1 : bpf_curr.R; +} + +static bool pcap_loaded(void) +{ + if (pcap_fd < 0) + rl_printf("no pcap file loaded!\n"); + + return pcap_fd >= 0; +} + +static struct pcap_pkthdr *pcap_curr_pkt(void) +{ + return (void *) pcap_ptr_va_curr; +} + +static bool pcap_next_pkt(void) +{ + struct pcap_pkthdr *hdr = pcap_curr_pkt(); + + if (pcap_ptr_va_curr + sizeof(*hdr) - + pcap_ptr_va_start >= pcap_map_size) + return false; + if (hdr->caplen == 0 || hdr->len == 0 || hdr->caplen > hdr->len) + return false; + if (pcap_ptr_va_curr + sizeof(*hdr) + hdr->caplen - + pcap_ptr_va_start >= pcap_map_size) + return false; + + pcap_ptr_va_curr += (sizeof(*hdr) + hdr->caplen); + return true; +} + +static void pcap_reset_pkt(void) +{ + pcap_ptr_va_curr = pcap_ptr_va_start + sizeof(struct pcap_filehdr); +} + +static int try_load_pcap(const char *file) +{ + struct pcap_filehdr *hdr; + struct stat sb; + int ret; + + pcap_fd = open(file, O_RDONLY); + if (pcap_fd < 0) { + rl_printf("cannot open pcap [%s]!\n", strerror(errno)); + return CMD_ERR; + } + + ret = fstat(pcap_fd, &sb); + if (ret < 0) { + rl_printf("cannot fstat pcap file!\n"); + return CMD_ERR; + } + + if (!S_ISREG(sb.st_mode)) { + rl_printf("not a regular pcap file, duh!\n"); + return CMD_ERR; + } + + pcap_map_size = sb.st_size; + if (pcap_map_size <= sizeof(struct pcap_filehdr)) { + rl_printf("pcap file too small!\n"); + return CMD_ERR; + } + + pcap_ptr_va_start = mmap(NULL, pcap_map_size, PROT_READ, + MAP_SHARED | MAP_LOCKED, pcap_fd, 0); + if (pcap_ptr_va_start == MAP_FAILED) { + rl_printf("mmap of file failed!"); + return CMD_ERR; + } + + hdr = (void *) pcap_ptr_va_start; + if (hdr->magic != TCPDUMP_MAGIC) { + rl_printf("wrong pcap magic!\n"); + return CMD_ERR; + } + + pcap_reset_pkt(); + + return CMD_OK; + +} + +static void try_close_pcap(void) +{ + if (pcap_fd >= 0) { + munmap(pcap_ptr_va_start, pcap_map_size); + close(pcap_fd); + + pcap_ptr_va_start = pcap_ptr_va_curr = NULL; + pcap_map_size = 0; + pcap_packet = 0; + pcap_fd = -1; + } +} + +static int cmd_load_bpf(char *bpf_string) +{ + char sp, *token, separator = ','; + unsigned short bpf_len, i = 0; + struct sock_filter tmp; + + bpf_prog_len = 0; + memset(bpf_image, 0, sizeof(bpf_image)); + + if (sscanf(bpf_string, "%hu%c", &bpf_len, &sp) != 2 || + sp != separator || bpf_len > BPF_MAXINSNS || bpf_len == 0) { + rl_printf("syntax error in head length encoding!\n"); + return CMD_ERR; + } + + token = bpf_string; + while ((token = strchr(token, separator)) && (++token)[0]) { + if (i >= bpf_len) { + rl_printf("program exceeds encoded length!\n"); + return CMD_ERR; + } + + if (sscanf(token, "%hu %hhu %hhu %u,", + &tmp.code, &tmp.jt, &tmp.jf, &tmp.k) != 4) { + rl_printf("syntax error at instruction %d!\n", i); + return CMD_ERR; + } + + bpf_image[i].code = tmp.code; + bpf_image[i].jt = tmp.jt; + bpf_image[i].jf = tmp.jf; + bpf_image[i].k = tmp.k; + + i++; + } + + if (i != bpf_len) { + rl_printf("syntax error exceeding encoded length!\n"); + return CMD_ERR; + } else + bpf_prog_len = bpf_len; + if (!bpf_runnable(bpf_image, bpf_prog_len)) + bpf_prog_len = 0; + + return CMD_OK; +} + +static int cmd_load_pcap(char *file) +{ + char *file_trim, *tmp; + + file_trim = strtok_r(file, " ", &tmp); + if (file_trim == NULL) + return CMD_ERR; + + try_close_pcap(); + + return try_load_pcap(file_trim); +} + +static int cmd_load(char *arg) +{ + char *subcmd, *cont, *tmp = strdup(arg); + int ret = CMD_OK; + + subcmd = strtok_r(tmp, " ", &cont); + if (subcmd == NULL) + goto out; + if (matches(subcmd, "bpf") == 0) { + bpf_reset(); + bpf_reset_breakpoints(); + + ret = cmd_load_bpf(cont); + } else if (matches(subcmd, "pcap") == 0) { + ret = cmd_load_pcap(cont); + } else { +out: + rl_printf("bpf : load bpf code\n"); + rl_printf("pcap : load pcap file\n"); + ret = CMD_ERR; + } + + free(tmp); + return ret; +} + +static int cmd_step(char *num) +{ + struct pcap_pkthdr *hdr; + int steps, ret; + + if (!bpf_prog_loaded() || !pcap_loaded()) + return CMD_ERR; + + steps = strtol(num, NULL, 10); + if (steps == 0 || strlen(num) == 0) + steps = 1; + if (steps < 0) { + if (!bpf_restore_regs(steps)) + return CMD_ERR; + steps = 1; + } + + hdr = pcap_curr_pkt(); + ret = bpf_run_stepping(bpf_image, bpf_prog_len, + (uint8_t *) hdr + sizeof(*hdr), + hdr->caplen, hdr->len, steps); + if (ret >= 0 || bpf_curr.Rs) { + bpf_reset(); + if (!pcap_next_pkt()) { + rl_printf("(going back to first packet)\n"); + pcap_reset_pkt(); + } else { + rl_printf("(next packet)\n"); + } + } + + return CMD_OK; +} + +static int cmd_select(char *num) +{ + unsigned int which, i; + bool have_next = true; + + if (!pcap_loaded() || strlen(num) == 0) + return CMD_ERR; + + which = strtoul(num, NULL, 10); + if (which == 0) { + rl_printf("packet count starts with 1, clamping!\n"); + which = 1; + } + + pcap_reset_pkt(); + bpf_reset(); + + for (i = 0; i < which && (have_next = pcap_next_pkt()); i++) + /* noop */; + if (!have_next || pcap_curr_pkt() == NULL) { + rl_printf("no packet #%u available!\n", which); + pcap_reset_pkt(); + return CMD_ERR; + } + + return CMD_OK; +} + +static int cmd_breakpoint(char *subcmd) +{ + if (!bpf_prog_loaded()) + return CMD_ERR; + if (strlen(subcmd) == 0) + bpf_dump_breakpoints(); + else if (matches(subcmd, "reset") == 0) + bpf_reset_breakpoints(); + else { + unsigned int where = strtoul(subcmd, NULL, 10); + + if (where < bpf_prog_len) { + bpf_set_breakpoints(where); + rl_printf("breakpoint at: "); + bpf_disasm(bpf_image[where], where); + } + } + + return CMD_OK; +} + +static int cmd_run(char *num) +{ + static uint32_t pass, fail; + bool has_limit = true; + int pkts = 0, i = 0; + + if (!bpf_prog_loaded() || !pcap_loaded()) + return CMD_ERR; + + pkts = strtol(num, NULL, 10); + if (pkts == 0 || strlen(num) == 0) + has_limit = false; + + do { + struct pcap_pkthdr *hdr = pcap_curr_pkt(); + int ret = bpf_run_all(bpf_image, bpf_prog_len, + (uint8_t *) hdr + sizeof(*hdr), + hdr->caplen, hdr->len); + if (ret > 0) + pass++; + else if (ret == 0) + fail++; + else + return CMD_OK; + bpf_reset(); + } while (pcap_next_pkt() && (!has_limit || (has_limit && ++i < pkts))); + + rl_printf("bpf passes:%u fails:%u\n", pass, fail); + + pcap_reset_pkt(); + bpf_reset(); + + pass = fail = 0; + return CMD_OK; +} + +static int cmd_disassemble(char *line_string) +{ + bool single_line = false; + unsigned long line; + + if (!bpf_prog_loaded()) + return CMD_ERR; + if (strlen(line_string) > 0 && + (line = strtoul(line_string, NULL, 10)) < bpf_prog_len) + single_line = true; + if (single_line) + bpf_disasm(bpf_image[line], line); + else + bpf_disasm_all(bpf_image, bpf_prog_len); + + return CMD_OK; +} + +static int cmd_dump(char *dontcare) +{ + if (!bpf_prog_loaded()) + return CMD_ERR; + + bpf_dump_all(bpf_image, bpf_prog_len); + + return CMD_OK; +} + +static int cmd_quit(char *dontcare) +{ + return CMD_EX; +} + +static const struct shell_cmd cmds[] = { + { .name = "load", .func = cmd_load }, + { .name = "select", .func = cmd_select }, + { .name = "step", .func = cmd_step }, + { .name = "run", .func = cmd_run }, + { .name = "breakpoint", .func = cmd_breakpoint }, + { .name = "disassemble", .func = cmd_disassemble }, + { .name = "dump", .func = cmd_dump }, + { .name = "quit", .func = cmd_quit }, +}; + +static int execf(char *arg) +{ + char *cmd, *cont, *tmp = strdup(arg); + int i, ret = 0, len; + + cmd = strtok_r(tmp, " ", &cont); + if (cmd == NULL) + goto out; + len = strlen(cmd); + for (i = 0; i < array_size(cmds); i++) { + if (len != strlen(cmds[i].name)) + continue; + if (strncmp(cmds[i].name, cmd, len) == 0) { + ret = cmds[i].func(cont); + break; + } + } +out: + free(tmp); + return ret; +} + +static char *shell_comp_gen(const char *buf, int state) +{ + static int list_index, len; + + if (!state) { + list_index = 0; + len = strlen(buf); + } + + for (; list_index < array_size(cmds); ) { + const char *name = cmds[list_index].name; + + list_index++; + if (strncmp(name, buf, len) == 0) + return strdup(name); + } + + return NULL; +} + +static char **shell_completion(const char *buf, int start, int end) +{ + char **matches = NULL; + + if (start == 0) + matches = rl_completion_matches(buf, shell_comp_gen); + + return matches; +} + +static void intr_shell(int sig) +{ + if (rl_end) + rl_kill_line(-1, 0); + + rl_crlf(); + rl_refresh_line(0, 0); + rl_free_line_state(); +} + +static void init_shell(FILE *fin, FILE *fout) +{ + char file[128]; + + snprintf(file, sizeof(file), "%s/.bpf_dbg_history", getenv("HOME")); + read_history(file); + + rl_instream = fin; + rl_outstream = fout; + + rl_readline_name = "bpf_dbg"; + rl_terminal_name = getenv("TERM"); + + rl_catch_signals = 0; + rl_catch_sigwinch = 1; + + rl_attempted_completion_function = shell_completion; + + rl_bind_key('\t', rl_complete); + + rl_bind_key_in_map('\t', rl_complete, emacs_meta_keymap); + rl_bind_key_in_map('\033', rl_complete, emacs_meta_keymap); + + snprintf(file, sizeof(file), "%s/.bpf_dbg_init", getenv("HOME")); + rl_read_init_file(file); + + rl_prep_terminal(0); + rl_set_signals(); + + signal(SIGINT, intr_shell); +} + +static void exit_shell(FILE *fin, FILE *fout) +{ + char file[128]; + + snprintf(file, sizeof(file), "%s/.bpf_dbg_history", getenv("HOME")); + write_history(file); + + clear_history(); + rl_deprep_terminal(); + + try_close_pcap(); + + if (fin != stdin) + fclose(fin); + if (fout != stdout) + fclose(fout); +} + +static int run_shell_loop(FILE *fin, FILE *fout) +{ + char *buf; + + init_shell(fin, fout); + + while ((buf = readline("> ")) != NULL) { + int ret = execf(buf); + if (ret == CMD_EX) + break; + if (ret == CMD_OK && strlen(buf) > 0) + add_history(buf); + + free(buf); + } + + exit_shell(fin, fout); + return 0; +} + +int main(int argc, char **argv) +{ + FILE *fin = NULL, *fout = NULL; + + if (argc >= 2) + fin = fopen(argv[1], "r"); + if (argc >= 3) + fout = fopen(argv[2], "w"); + + return run_shell_loop(fin ? : stdin, fout ? : stdout); +} diff --git a/tools/bpf/bpf_exp.l b/tools/bpf/bpf_exp.l new file mode 100644 index 000000000000..bd83149e7be0 --- /dev/null +++ b/tools/bpf/bpf_exp.l @@ -0,0 +1,198 @@ +/* + * BPF asm code lexer + * + * This program is free software; you can distribute it and/or modify + * it under the terms of the GNU General Public License as published + * by the Free Software Foundation; either version 2 of the License, + * or (at your option) any later version. + * + * Syntax kept close to: + * + * Steven McCanne and Van Jacobson. 1993. The BSD packet filter: a new + * architecture for user-level packet capture. In Proceedings of the + * USENIX Winter 1993 Conference Proceedings on USENIX Winter 1993 + * Conference Proceedings (USENIX'93). USENIX Association, Berkeley, + * CA, USA, 2-2. + * + * Copyright 2013 Daniel Borkmann + * Licensed under the GNU General Public License, version 2.0 (GPLv2) + */ + +%{ + +#include +#include +#include +#include + +#include + +#include "bpf_exp.yacc.h" + +extern void yyerror(const char *str); + +%} + +%option align +%option ecs + +%option nounput +%option noreject +%option noinput +%option noyywrap + +%option 8bit +%option caseless +%option yylineno + +%% + +"ldb" { return OP_LDB; } +"ldh" { return OP_LDH; } +"ld" { return OP_LD; } +"ldi" { return OP_LDI; } +"ldx" { return OP_LDX; } +"ldxi" { return OP_LDXI; } +"ldxb" { return OP_LDXB; } +"st" { return OP_ST; } +"stx" { return OP_STX; } +"jmp" { return OP_JMP; } +"ja" { return OP_JMP; } +"jeq" { return OP_JEQ; } +"jneq" { return OP_JNEQ; } +"jne" { return OP_JNEQ; } +"jlt" { return OP_JLT; } +"jle" { return OP_JLE; } +"jgt" { return OP_JGT; } +"jge" { return OP_JGE; } +"jset" { return OP_JSET; } +"add" { return OP_ADD; } +"sub" { return OP_SUB; } +"mul" { return OP_MUL; } +"div" { return OP_DIV; } +"mod" { return OP_MOD; } +"neg" { return OP_NEG; } +"and" { return OP_AND; } +"xor" { return OP_XOR; } +"or" { return OP_OR; } +"lsh" { return OP_LSH; } +"rsh" { return OP_RSH; } +"ret" { return OP_RET; } +"tax" { return OP_TAX; } +"txa" { return OP_TXA; } + +"#"?("len") { return K_PKT_LEN; } + +"#"?("proto") { + yylval.number = SKF_AD_PROTOCOL; + return extension; + } +"#"?("type") { + yylval.number = SKF_AD_PKTTYPE; + return extension; + } +"#"?("poff") { + yylval.number = SKF_AD_PAY_OFFSET; + return extension; + } +"#"?("ifidx") { + yylval.number = SKF_AD_IFINDEX; + return extension; + } +"#"?("nla") { + yylval.number = SKF_AD_NLATTR; + return extension; + } +"#"?("nlan") { + yylval.number = SKF_AD_NLATTR_NEST; + return extension; + } +"#"?("mark") { + yylval.number = SKF_AD_MARK; + return extension; + } +"#"?("queue") { + yylval.number = SKF_AD_QUEUE; + return extension; + } +"#"?("hatype") { + yylval.number = SKF_AD_HATYPE; + return extension; + } +"#"?("rxhash") { + yylval.number = SKF_AD_RXHASH; + return extension; + } +"#"?("cpu") { + yylval.number = SKF_AD_CPU; + return extension; + } +"#"?("vlan_tci") { + yylval.number = SKF_AD_VLAN_TAG; + return extension; + } +"#"?("vlan_pr") { + yylval.number = SKF_AD_VLAN_TAG_PRESENT; + return extension; + } +"#"?("vlan_avail") { + yylval.number = SKF_AD_VLAN_TAG_PRESENT; + return extension; + } +"#"?("vlan_tpid") { + yylval.number = SKF_AD_VLAN_TPID; + return extension; + } +"#"?("rand") { + yylval.number = SKF_AD_RANDOM; + return extension; + } + +":" { return ':'; } +"," { return ','; } +"#" { return '#'; } +"%" { return '%'; } +"[" { return '['; } +"]" { return ']'; } +"(" { return '('; } +")" { return ')'; } +"x" { return 'x'; } +"a" { return 'a'; } +"+" { return '+'; } +"M" { return 'M'; } +"*" { return '*'; } +"&" { return '&'; } + +([0][x][a-fA-F0-9]+) { + yylval.number = strtoul(yytext, NULL, 16); + return number; + } +([0][b][0-1]+) { + yylval.number = strtol(yytext + 2, NULL, 2); + return number; + } +(([0])|([-+]?[1-9][0-9]*)) { + yylval.number = strtol(yytext, NULL, 10); + return number; + } +([0][0-9]+) { + yylval.number = strtol(yytext + 1, NULL, 8); + return number; + } +[a-zA-Z_][a-zA-Z0-9_]+ { + yylval.label = strdup(yytext); + return label; + } + +"/*"([^\*]|\*[^/])*"*/" { /* NOP */ } +";"[^\n]* { /* NOP */ } +^#.* { /* NOP */ } +[ \t]+ { /* NOP */ } +[ \n]+ { /* NOP */ } + +. { + printf("unknown character \'%s\'", yytext); + yyerror("lex unknown character"); + } + +%% diff --git a/tools/bpf/bpf_exp.y b/tools/bpf/bpf_exp.y new file mode 100644 index 000000000000..56ba1de50784 --- /dev/null +++ b/tools/bpf/bpf_exp.y @@ -0,0 +1,656 @@ +/* + * BPF asm code parser + * + * This program is free software; you can distribute it and/or modify + * it under the terms of the GNU General Public License as published + * by the Free Software Foundation; either version 2 of the License, + * or (at your option) any later version. + * + * Syntax kept close to: + * + * Steven McCanne and Van Jacobson. 1993. The BSD packet filter: a new + * architecture for user-level packet capture. In Proceedings of the + * USENIX Winter 1993 Conference Proceedings on USENIX Winter 1993 + * Conference Proceedings (USENIX'93). USENIX Association, Berkeley, + * CA, USA, 2-2. + * + * Copyright 2013 Daniel Borkmann + * Licensed under the GNU General Public License, version 2.0 (GPLv2) + */ + +%{ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "bpf_exp.yacc.h" + +enum jmp_type { JTL, JFL, JKL }; + +extern FILE *yyin; +extern int yylineno; +extern int yylex(void); +extern void yyerror(const char *str); + +extern void bpf_asm_compile(FILE *fp, bool cstyle); +static void bpf_set_curr_instr(uint16_t op, uint8_t jt, uint8_t jf, uint32_t k); +static void bpf_set_curr_label(char *label); +static void bpf_set_jmp_label(char *label, enum jmp_type type); + +%} + +%union { + char *label; + uint32_t number; +} + +%token OP_LDB OP_LDH OP_LD OP_LDX OP_ST OP_STX OP_JMP OP_JEQ OP_JGT OP_JGE +%token OP_JSET OP_ADD OP_SUB OP_MUL OP_DIV OP_AND OP_OR OP_XOR OP_LSH OP_RSH +%token OP_RET OP_TAX OP_TXA OP_LDXB OP_MOD OP_NEG OP_JNEQ OP_JLT OP_JLE OP_LDI +%token OP_LDXI + +%token K_PKT_LEN + +%token ':' ',' '[' ']' '(' ')' 'x' 'a' '+' 'M' '*' '&' '#' '%' + +%token extension number label + +%type