summaryrefslogtreecommitdiff
path: root/net
diff options
context:
space:
mode:
Diffstat (limited to 'net')
-rw-r--r--net/Kconfig4
-rw-r--r--net/atm/Kconfig2
-rw-r--r--net/ax25/Kconfig6
-rw-r--r--net/batman-adv/bat_iv_ogm.c4
-rw-r--r--net/batman-adv/bat_v_elp.c2
-rw-r--r--net/batman-adv/bat_v_ogm.c4
-rw-r--r--net/batman-adv/distributed-arp-table.h5
-rw-r--r--net/batman-adv/main.h2
-rw-r--r--net/batman-adv/trace.h1
-rw-r--r--net/batman-adv/types.h2
-rw-r--r--net/bridge/Kconfig12
-rw-r--r--net/bridge/Makefile2
-rw-r--r--net/bridge/br_device.c3
-rw-r--r--net/bridge/br_if.c2
-rw-r--r--net/bridge/br_input.c3
-rw-r--r--net/bridge/br_ioctl.c3
-rw-r--r--net/bridge/br_mrp.c559
-rw-r--r--net/bridge/br_mrp_netlink.c120
-rw-r--r--net/bridge/br_mrp_switchdev.c140
-rw-r--r--net/bridge/br_netlink.c12
-rw-r--r--net/bridge/br_private.h38
-rw-r--r--net/bridge/br_private_mrp.h63
-rw-r--r--net/bridge/br_stp.c6
-rw-r--r--net/bridge/br_stp_if.c11
-rw-r--r--net/bridge/br_sysfs_br.c4
-rw-r--r--net/caif/chnl_net.c3
-rw-r--r--net/ceph/Kconfig2
-rw-r--r--net/core/gen_stats.c2
-rw-r--r--net/core/netpoll.c9
-rw-r--r--net/core/pktgen.c2
-rw-r--r--net/core/rtnetlink.c2
-rw-r--r--net/core/skbuff.c2
-rw-r--r--net/decnet/Kconfig4
-rw-r--r--net/dns_resolver/Kconfig2
-rw-r--r--net/dns_resolver/dns_key.c2
-rw-r--r--net/dns_resolver/dns_query.c2
-rw-r--r--net/hsr/hsr_main.c22
-rw-r--r--net/hsr/hsr_main.h19
-rw-r--r--net/ipv4/Kconfig2
-rw-r--r--net/ipv4/af_inet.c1
-rw-r--r--net/ipv4/fib_semantics.c3
-rw-r--r--net/ipv4/icmp.c2
-rw-r--r--net/ipv4/inet_diag.c38
-rw-r--r--net/ipv4/nexthop.c5
-rw-r--r--net/ipv4/sysctl_net_ipv4.c16
-rw-r--r--net/ipv4/tcp.c4
-rw-r--r--net/ipv4/tcp_input.c51
-rw-r--r--net/ipv4/tcp_ipv4.c1
-rw-r--r--net/ipv4/tcp_output.c6
-rw-r--r--net/ipv4/tcp_timer.c8
-rw-r--r--net/ipv6/Kconfig2
-rw-r--r--net/ipv6/addrconf.c12
-rw-r--r--net/ipv6/addrconf_core.c3
-rw-r--r--net/ipv6/anycast.c4
-rw-r--r--net/ipv6/ila/ila.h5
-rw-r--r--net/ipv6/ndisc.c2
-rw-r--r--net/ipv6/route.c14
-rw-r--r--net/lapb/Kconfig2
-rw-r--r--net/mac80211/tx.c2
-rw-r--r--net/netfilter/Kconfig2
-rw-r--r--net/netfilter/nf_conntrack_standalone.c4
-rw-r--r--net/netfilter/nf_flow_table_offload.c3
-rw-r--r--net/netfilter/nf_tables_api.c38
-rw-r--r--net/netfilter/nft_nat.c110
-rw-r--r--net/netlink/Makefile2
-rw-r--r--net/netlink/genetlink.c78
-rw-r--r--net/netlink/policy.c308
-rw-r--r--net/rxrpc/Kconfig6
-rw-r--r--net/rxrpc/sysctl.c2
-rw-r--r--net/sched/Kconfig12
-rw-r--r--net/sched/Makefile1
-rw-r--r--net/sched/act_api.c13
-rw-r--r--net/sched/act_gate.c636
-rw-r--r--net/sched/cls_api.c33
-rw-r--r--net/sched/sch_red.c9
-rw-r--r--net/smc/af_smc.c190
-rw-r--r--net/smc/smc.h1
-rw-r--r--net/smc/smc_cdc.c8
-rw-r--r--net/smc/smc_clc.c12
-rw-r--r--net/smc/smc_clc.h2
-rw-r--r--net/smc/smc_core.c909
-rw-r--r--net/smc/smc_core.h138
-rw-r--r--net/smc/smc_ib.c66
-rw-r--r--net/smc/smc_ib.h10
-rw-r--r--net/smc/smc_ism.c3
-rw-r--r--net/smc/smc_llc.c847
-rw-r--r--net/smc/smc_llc.h50
-rw-r--r--net/smc/smc_pnet.c550
-rw-r--r--net/smc/smc_pnet.h7
-rw-r--r--net/smc/smc_tx.c15
-rw-r--r--net/smc/smc_wr.c21
-rw-r--r--net/wireless/nl80211.c91
-rw-r--r--net/wireless/nl80211.h2
-rw-r--r--net/wireless/pmsr.c3
-rw-r--r--net/wireless/radiotap.c2
-rw-r--r--net/x25/Kconfig4
96 files changed, 4277 insertions, 1177 deletions
diff --git a/net/Kconfig b/net/Kconfig
index df8d8c9bd021..c5ba2d180c43 100644
--- a/net/Kconfig
+++ b/net/Kconfig
@@ -86,7 +86,7 @@ config INET
"Sysctl support" below, you can change various aspects of the
behavior of the TCP/IP code by writing to the (virtual) files in
/proc/sys/net/ipv4/*; the options are explained in the file
- <file:Documentation/networking/ip-sysctl.txt>.
+ <file:Documentation/networking/ip-sysctl.rst>.
Short answer: say Y.
@@ -344,7 +344,7 @@ config NET_PKTGEN
what was just said, you don't need it: say N.
Documentation on how to use the packet generator can be found
- at <file:Documentation/networking/pktgen.txt>.
+ at <file:Documentation/networking/pktgen.rst>.
To compile this code as a module, choose M here: the
module will be called pktgen.
diff --git a/net/atm/Kconfig b/net/atm/Kconfig
index 271f682e8438..e61dcc9f85b2 100644
--- a/net/atm/Kconfig
+++ b/net/atm/Kconfig
@@ -16,7 +16,7 @@ config ATM
of your ATM card below.
Note that you need a set of user-space programs to actually make use
- of ATM. See the file <file:Documentation/networking/atm.txt> for
+ of ATM. See the file <file:Documentation/networking/atm.rst> for
further details.
config ATM_CLIP
diff --git a/net/ax25/Kconfig b/net/ax25/Kconfig
index 043fd5437809..97d686d115c0 100644
--- a/net/ax25/Kconfig
+++ b/net/ax25/Kconfig
@@ -40,7 +40,7 @@ config AX25
radio as well as information about how to configure an AX.25 port is
contained in the AX25-HOWTO, available from
<http://www.tldp.org/docs.html#howto>. You might also want to
- check out the file <file:Documentation/networking/ax25.txt> in the
+ check out the file <file:Documentation/networking/ax25.rst> in the
kernel source. More information about digital amateur radio in
general is on the WWW at
<http://www.tapr.org/>.
@@ -88,7 +88,7 @@ config NETROM
users as well as information about how to configure an AX.25 port is
contained in the Linux Ham Wiki, available from
<http://www.linux-ax25.org>. You also might want to check out the
- file <file:Documentation/networking/ax25.txt>. More information about
+ file <file:Documentation/networking/ax25.rst>. More information about
digital amateur radio in general is on the WWW at
<http://www.tapr.org/>.
@@ -107,7 +107,7 @@ config ROSE
users as well as information about how to configure an AX.25 port is
contained in the Linux Ham Wiki, available from
<http://www.linux-ax25.org>. You also might want to check out the
- file <file:Documentation/networking/ax25.txt>. More information about
+ file <file:Documentation/networking/ax25.rst>. More information about
digital amateur radio in general is on the WWW at
<http://www.tapr.org/>.
diff --git a/net/batman-adv/bat_iv_ogm.c b/net/batman-adv/bat_iv_ogm.c
index a7c8dd7ae513..e87f19c82e8d 100644
--- a/net/batman-adv/bat_iv_ogm.c
+++ b/net/batman-adv/bat_iv_ogm.c
@@ -280,7 +280,7 @@ batadv_iv_ogm_emit_send_time(const struct batadv_priv *bat_priv)
unsigned int msecs;
msecs = atomic_read(&bat_priv->orig_interval) - BATADV_JITTER;
- msecs += prandom_u32() % (2 * BATADV_JITTER);
+ msecs += prandom_u32_max(2 * BATADV_JITTER);
return jiffies + msecs_to_jiffies(msecs);
}
@@ -288,7 +288,7 @@ batadv_iv_ogm_emit_send_time(const struct batadv_priv *bat_priv)
/* when do we schedule a ogm packet to be sent */
static unsigned long batadv_iv_ogm_fwd_send_time(void)
{
- return jiffies + msecs_to_jiffies(prandom_u32() % (BATADV_JITTER / 2));
+ return jiffies + msecs_to_jiffies(prandom_u32_max(BATADV_JITTER / 2));
}
/* apply hop penalty for a normal link */
diff --git a/net/batman-adv/bat_v_elp.c b/net/batman-adv/bat_v_elp.c
index 1e3172db7492..353e49c40e7f 100644
--- a/net/batman-adv/bat_v_elp.c
+++ b/net/batman-adv/bat_v_elp.c
@@ -49,7 +49,7 @@ static void batadv_v_elp_start_timer(struct batadv_hard_iface *hard_iface)
unsigned int msecs;
msecs = atomic_read(&hard_iface->bat_v.elp_interval) - BATADV_JITTER;
- msecs += prandom_u32() % (2 * BATADV_JITTER);
+ msecs += prandom_u32_max(2 * BATADV_JITTER);
queue_delayed_work(batadv_event_workqueue, &hard_iface->bat_v.elp_wq,
msecs_to_jiffies(msecs));
diff --git a/net/batman-adv/bat_v_ogm.c b/net/batman-adv/bat_v_ogm.c
index 969466218999..0959d32be65c 100644
--- a/net/batman-adv/bat_v_ogm.c
+++ b/net/batman-adv/bat_v_ogm.c
@@ -88,7 +88,7 @@ static void batadv_v_ogm_start_queue_timer(struct batadv_hard_iface *hard_iface)
unsigned int msecs = BATADV_MAX_AGGREGATION_MS * 1000;
/* msecs * [0.9, 1.1] */
- msecs += prandom_u32() % (msecs / 5) - (msecs / 10);
+ msecs += prandom_u32_max(msecs / 5) - (msecs / 10);
queue_delayed_work(batadv_event_workqueue, &hard_iface->bat_v.aggr_wq,
msecs_to_jiffies(msecs / 1000));
}
@@ -107,7 +107,7 @@ static void batadv_v_ogm_start_timer(struct batadv_priv *bat_priv)
return;
msecs = atomic_read(&bat_priv->orig_interval) - BATADV_JITTER;
- msecs += prandom_u32() % (2 * BATADV_JITTER);
+ msecs += prandom_u32_max(2 * BATADV_JITTER);
queue_delayed_work(batadv_event_workqueue, &bat_priv->bat_v.ogm_wq,
msecs_to_jiffies(msecs));
}
diff --git a/net/batman-adv/distributed-arp-table.h b/net/batman-adv/distributed-arp-table.h
index 2bff2f4a325c..4e031661682a 100644
--- a/net/batman-adv/distributed-arp-table.h
+++ b/net/batman-adv/distributed-arp-table.h
@@ -163,11 +163,6 @@ static inline void batadv_dat_init_own_addr(struct batadv_priv *bat_priv,
{
}
-static inline void batadv_arp_change_timeout(struct net_device *soft_iface,
- const char *name)
-{
-}
-
static inline int batadv_dat_init(struct batadv_priv *bat_priv)
{
return 0;
diff --git a/net/batman-adv/main.h b/net/batman-adv/main.h
index 2a234d0ad445..61d8dbe8c954 100644
--- a/net/batman-adv/main.h
+++ b/net/batman-adv/main.h
@@ -13,7 +13,7 @@
#define BATADV_DRIVER_DEVICE "batman-adv"
#ifndef BATADV_SOURCE_VERSION
-#define BATADV_SOURCE_VERSION "2020.1"
+#define BATADV_SOURCE_VERSION "2020.2"
#endif
/* B.A.T.M.A.N. parameters */
diff --git a/net/batman-adv/trace.h b/net/batman-adv/trace.h
index f631b1e01b89..a87547570b4e 100644
--- a/net/batman-adv/trace.h
+++ b/net/batman-adv/trace.h
@@ -15,7 +15,6 @@
#include <linux/percpu.h>
#include <linux/printk.h>
#include <linux/tracepoint.h>
-#include <linux/types.h>
#undef TRACE_SYSTEM
#define TRACE_SYSTEM batadv
diff --git a/net/batman-adv/types.h b/net/batman-adv/types.h
index 4a17a66cc572..d152b8e81f61 100644
--- a/net/batman-adv/types.h
+++ b/net/batman-adv/types.h
@@ -1086,7 +1086,7 @@ struct batadv_priv_bla {
* struct batadv_priv_debug_log - debug logging data
*/
struct batadv_priv_debug_log {
- /** @log_buff: buffer holding the logs (ring bufer) */
+ /** @log_buff: buffer holding the logs (ring buffer) */
char log_buff[BATADV_LOG_BUF_LEN];
/** @log_start: index of next character to read */
diff --git a/net/bridge/Kconfig b/net/bridge/Kconfig
index e4fb050e2078..51a6414145d2 100644
--- a/net/bridge/Kconfig
+++ b/net/bridge/Kconfig
@@ -61,3 +61,15 @@ config BRIDGE_VLAN_FILTERING
Say N to exclude this support and reduce the binary size.
If unsure, say Y.
+
+config BRIDGE_MRP
+ bool "MRP protocol"
+ depends on BRIDGE
+ default n
+ help
+ If you say Y here, then the Ethernet bridge will be able to run MRP
+ protocol to detect loops
+
+ Say N to exclude this support and reduce the binary size.
+
+ If unsure, say N.
diff --git a/net/bridge/Makefile b/net/bridge/Makefile
index 49da7ae6f077..ccb394236fbd 100644
--- a/net/bridge/Makefile
+++ b/net/bridge/Makefile
@@ -25,3 +25,5 @@ bridge-$(CONFIG_BRIDGE_VLAN_FILTERING) += br_vlan.o br_vlan_tunnel.o br_vlan_opt
bridge-$(CONFIG_NET_SWITCHDEV) += br_switchdev.o
obj-$(CONFIG_NETFILTER) += netfilter/
+
+bridge-$(CONFIG_BRIDGE_MRP) += br_mrp_switchdev.o br_mrp.o br_mrp_netlink.o
diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c
index 0e3dbc5f3c34..8ec1362588af 100644
--- a/net/bridge/br_device.c
+++ b/net/bridge/br_device.c
@@ -463,6 +463,9 @@ void br_dev_setup(struct net_device *dev)
spin_lock_init(&br->lock);
INIT_LIST_HEAD(&br->port_list);
INIT_HLIST_HEAD(&br->fdb_list);
+#if IS_ENABLED(CONFIG_BRIDGE_MRP)
+ INIT_LIST_HEAD(&br->mrp_list);
+#endif
spin_lock_init(&br->hash_lock);
br->bridge_id.prio[0] = 0x80;
diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c
index 4fe30b182ee7..ca685c0cdf95 100644
--- a/net/bridge/br_if.c
+++ b/net/bridge/br_if.c
@@ -333,6 +333,8 @@ static void del_nbp(struct net_bridge_port *p)
br_stp_disable_port(p);
spin_unlock_bh(&br->lock);
+ br_mrp_port_del(br, p);
+
br_ifinfo_notify(RTM_DELLINK, NULL, p);
list_del_rcu(&p->list);
diff --git a/net/bridge/br_input.c b/net/bridge/br_input.c
index fcc260840028..d5c34f36f0f4 100644
--- a/net/bridge/br_input.c
+++ b/net/bridge/br_input.c
@@ -342,6 +342,9 @@ rx_handler_result_t br_handle_frame(struct sk_buff **pskb)
}
}
+ if (unlikely(br_mrp_process(p, skb)))
+ return RX_HANDLER_PASS;
+
forward:
switch (p->state) {
case BR_STATE_FORWARDING:
diff --git a/net/bridge/br_ioctl.c b/net/bridge/br_ioctl.c
index ae22d784b88a..5e71fc8b826f 100644
--- a/net/bridge/br_ioctl.c
+++ b/net/bridge/br_ioctl.c
@@ -242,8 +242,7 @@ static int old_dev_ioctl(struct net_device *dev, struct ifreq *rq, int cmd)
if (!ns_capable(dev_net(dev)->user_ns, CAP_NET_ADMIN))
return -EPERM;
- br_stp_set_enabled(br, args[1]);
- ret = 0;
+ ret = br_stp_set_enabled(br, args[1], NULL);
break;
case BRCTL_SET_BRIDGE_PRIORITY:
diff --git a/net/bridge/br_mrp.c b/net/bridge/br_mrp.c
new file mode 100644
index 000000000000..d7bc09de4c13
--- /dev/null
+++ b/net/bridge/br_mrp.c
@@ -0,0 +1,559 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+
+#include <linux/mrp_bridge.h>
+#include "br_private_mrp.h"
+
+static const u8 mrp_test_dmac[ETH_ALEN] = { 0x1, 0x15, 0x4e, 0x0, 0x0, 0x1 };
+
+static struct net_bridge_port *br_mrp_get_port(struct net_bridge *br,
+ u32 ifindex)
+{
+ struct net_bridge_port *res = NULL;
+ struct net_bridge_port *port;
+
+ list_for_each_entry(port, &br->port_list, list) {
+ if (port->dev->ifindex == ifindex) {
+ res = port;
+ break;
+ }
+ }
+
+ return res;
+}
+
+static struct br_mrp *br_mrp_find_id(struct net_bridge *br, u32 ring_id)
+{
+ struct br_mrp *res = NULL;
+ struct br_mrp *mrp;
+
+ list_for_each_entry_rcu(mrp, &br->mrp_list, list,
+ lockdep_rtnl_is_held()) {
+ if (mrp->ring_id == ring_id) {
+ res = mrp;
+ break;
+ }
+ }
+
+ return res;
+}
+
+static struct br_mrp *br_mrp_find_port(struct net_bridge *br,
+ struct net_bridge_port *p)
+{
+ struct br_mrp *res = NULL;
+ struct br_mrp *mrp;
+
+ list_for_each_entry_rcu(mrp, &br->mrp_list, list,
+ lockdep_rtnl_is_held()) {
+ if (rcu_access_pointer(mrp->p_port) == p ||
+ rcu_access_pointer(mrp->s_port) == p) {
+ res = mrp;
+ break;
+ }
+ }
+
+ return res;
+}
+
+static int br_mrp_next_seq(struct br_mrp *mrp)
+{
+ mrp->seq_id++;
+ return mrp->seq_id;
+}
+
+static struct sk_buff *br_mrp_skb_alloc(struct net_bridge_port *p,
+ const u8 *src, const u8 *dst)
+{
+ struct ethhdr *eth_hdr;
+ struct sk_buff *skb;
+ u16 *version;
+
+ skb = dev_alloc_skb(MRP_MAX_FRAME_LENGTH);
+ if (!skb)
+ return NULL;
+
+ skb->dev = p->dev;
+ skb->protocol = htons(ETH_P_MRP);
+ skb->priority = MRP_FRAME_PRIO;
+ skb_reserve(skb, sizeof(*eth_hdr));
+
+ eth_hdr = skb_push(skb, sizeof(*eth_hdr));
+ ether_addr_copy(eth_hdr->h_dest, dst);
+ ether_addr_copy(eth_hdr->h_source, src);
+ eth_hdr->h_proto = htons(ETH_P_MRP);
+
+ version = skb_put(skb, sizeof(*version));
+ *version = cpu_to_be16(MRP_VERSION);
+
+ return skb;
+}
+
+static void br_mrp_skb_tlv(struct sk_buff *skb,
+ enum br_mrp_tlv_header_type type,
+ u8 length)
+{
+ struct br_mrp_tlv_hdr *hdr;
+
+ hdr = skb_put(skb, sizeof(*hdr));
+ hdr->type = type;
+ hdr->length = length;
+}
+
+static void br_mrp_skb_common(struct sk_buff *skb, struct br_mrp *mrp)
+{
+ struct br_mrp_common_hdr *hdr;
+
+ br_mrp_skb_tlv(skb, BR_MRP_TLV_HEADER_COMMON, sizeof(*hdr));
+
+ hdr = skb_put(skb, sizeof(*hdr));
+ hdr->seq_id = cpu_to_be16(br_mrp_next_seq(mrp));
+ memset(hdr->domain, 0xff, MRP_DOMAIN_UUID_LENGTH);
+}
+
+static struct sk_buff *br_mrp_alloc_test_skb(struct br_mrp *mrp,
+ struct net_bridge_port *p,
+ enum br_mrp_port_role_type port_role)
+{
+ struct br_mrp_ring_test_hdr *hdr = NULL;
+ struct sk_buff *skb = NULL;
+
+ if (!p)
+ return NULL;
+
+ skb = br_mrp_skb_alloc(p, p->dev->dev_addr, mrp_test_dmac);
+ if (!skb)
+ return NULL;
+
+ br_mrp_skb_tlv(skb, BR_MRP_TLV_HEADER_RING_TEST, sizeof(*hdr));
+ hdr = skb_put(skb, sizeof(*hdr));
+
+ hdr->prio = cpu_to_be16(MRP_DEFAULT_PRIO);
+ ether_addr_copy(hdr->sa, p->br->dev->dev_addr);
+ hdr->port_role = cpu_to_be16(port_role);
+ hdr->state = cpu_to_be16(mrp->ring_state);
+ hdr->transitions = cpu_to_be16(mrp->ring_transitions);
+ hdr->timestamp = cpu_to_be32(jiffies_to_msecs(jiffies));
+
+ br_mrp_skb_common(skb, mrp);
+ br_mrp_skb_tlv(skb, BR_MRP_TLV_HEADER_END, 0x0);
+
+ return skb;
+}
+
+static void br_mrp_test_work_expired(struct work_struct *work)
+{
+ struct delayed_work *del_work = to_delayed_work(work);
+ struct br_mrp *mrp = container_of(del_work, struct br_mrp, test_work);
+ struct net_bridge_port *p;
+ bool notify_open = false;
+ struct sk_buff *skb;
+
+ if (time_before_eq(mrp->test_end, jiffies))
+ return;
+
+ if (mrp->test_count_miss < mrp->test_max_miss) {
+ mrp->test_count_miss++;
+ } else {
+ /* Notify that the ring is open only if the ring state is
+ * closed, otherwise it would continue to notify at every
+ * interval.
+ */
+ if (mrp->ring_state == BR_MRP_RING_STATE_CLOSED)
+ notify_open = true;
+ }
+
+ rcu_read_lock();
+
+ p = rcu_dereference(mrp->p_port);
+ if (p) {
+ skb = br_mrp_alloc_test_skb(mrp, p, BR_MRP_PORT_ROLE_PRIMARY);
+ if (!skb)
+ goto out;
+
+ skb_reset_network_header(skb);
+ dev_queue_xmit(skb);
+
+ if (notify_open && !mrp->ring_role_offloaded)
+ br_mrp_port_open(p->dev, true);
+ }
+
+ p = rcu_dereference(mrp->s_port);
+ if (p) {
+ skb = br_mrp_alloc_test_skb(mrp, p, BR_MRP_PORT_ROLE_SECONDARY);
+ if (!skb)
+ goto out;
+
+ skb_reset_network_header(skb);
+ dev_queue_xmit(skb);
+
+ if (notify_open && !mrp->ring_role_offloaded)
+ br_mrp_port_open(p->dev, true);
+ }
+
+out:
+ rcu_read_unlock();
+
+ queue_delayed_work(system_wq, &mrp->test_work,
+ usecs_to_jiffies(mrp->test_interval));
+}
+
+/* Deletes the MRP instance.
+ * note: called under rtnl_lock
+ */
+static void br_mrp_del_impl(struct net_bridge *br, struct br_mrp *mrp)
+{
+ struct net_bridge_port *p;
+
+ /* Stop sending MRP_Test frames */
+ cancel_delayed_work_sync(&mrp->test_work);
+ br_mrp_switchdev_send_ring_test(br, mrp, 0, 0, 0);
+
+ br_mrp_switchdev_del(br, mrp);
+
+ /* Reset the ports */
+ p = rtnl_dereference(mrp->p_port);
+ if (p) {
+ spin_lock_bh(&br->lock);
+ p->state = BR_STATE_FORWARDING;
+ p->flags &= ~BR_MRP_AWARE;
+ spin_unlock_bh(&br->lock);
+ br_mrp_port_switchdev_set_state(p, BR_STATE_FORWARDING);
+ rcu_assign_pointer(mrp->p_port, NULL);
+ }
+
+ p = rtnl_dereference(mrp->s_port);
+ if (p) {
+ spin_lock_bh(&br->lock);
+ p->state = BR_STATE_FORWARDING;
+ p->flags &= ~BR_MRP_AWARE;
+ spin_unlock_bh(&br->lock);
+ br_mrp_port_switchdev_set_state(p, BR_STATE_FORWARDING);
+ rcu_assign_pointer(mrp->s_port, NULL);
+ }
+
+ list_del_rcu(&mrp->list);
+ kfree_rcu(mrp, rcu);
+}
+
+/* Adds a new MRP instance.
+ * note: called under rtnl_lock
+ */
+int br_mrp_add(struct net_bridge *br, struct br_mrp_instance *instance)
+{
+ struct net_bridge_port *p;
+ struct br_mrp *mrp;
+ int err;
+
+ /* If the ring exists, it is not possible to create another one with the
+ * same ring_id
+ */
+ mrp = br_mrp_find_id(br, instance->ring_id);
+ if (mrp)
+ return -EINVAL;
+
+ if (!br_mrp_get_port(br, instance->p_ifindex) ||
+ !br_mrp_get_port(br, instance->s_ifindex))
+ return -EINVAL;
+
+ mrp = kzalloc(sizeof(*mrp), GFP_KERNEL);
+ if (!mrp)
+ return -ENOMEM;
+
+ mrp->ring_id = instance->ring_id;
+
+ p = br_mrp_get_port(br, instance->p_ifindex);
+ spin_lock_bh(&br->lock);
+ p->state = BR_STATE_FORWARDING;
+ p->flags |= BR_MRP_AWARE;
+ spin_unlock_bh(&br->lock);
+ rcu_assign_pointer(mrp->p_port, p);
+
+ p = br_mrp_get_port(br, instance->s_ifindex);
+ spin_lock_bh(&br->lock);
+ p->state = BR_STATE_FORWARDING;
+ p->flags |= BR_MRP_AWARE;
+ spin_unlock_bh(&br->lock);
+ rcu_assign_pointer(mrp->s_port, p);
+
+ INIT_DELAYED_WORK(&mrp->test_work, br_mrp_test_work_expired);
+ list_add_tail_rcu(&mrp->list, &br->mrp_list);
+
+ err = br_mrp_switchdev_add(br, mrp);
+ if (err)
+ goto delete_mrp;
+
+ return 0;
+
+delete_mrp:
+ br_mrp_del_impl(br, mrp);
+
+ return err;
+}
+
+/* Deletes the MRP instance from which the port is part of
+ * note: called under rtnl_lock
+ */
+void br_mrp_port_del(struct net_bridge *br, struct net_bridge_port *p)
+{
+ struct br_mrp *mrp = br_mrp_find_port(br, p);
+
+ /* If the port is not part of a MRP instance just bail out */
+ if (!mrp)
+ return;
+
+ br_mrp_del_impl(br, mrp);
+}
+
+/* Deletes existing MRP instance based on ring_id
+ * note: called under rtnl_lock
+ */
+int br_mrp_del(struct net_bridge *br, struct br_mrp_instance *instance)
+{
+ struct br_mrp *mrp = br_mrp_find_id(br, instance->ring_id);
+
+ if (!mrp)
+ return -EINVAL;
+
+ br_mrp_del_impl(br, mrp);
+
+ return 0;
+}
+
+/* Set port state, port state can be forwarding, blocked or disabled
+ * note: already called with rtnl_lock
+ */
+int br_mrp_set_port_state(struct net_bridge_port *p,
+ enum br_mrp_port_state_type state)
+{
+ if (!p || !(p->flags & BR_MRP_AWARE))
+ return -EINVAL;
+
+ spin_lock_bh(&p->br->lock);
+
+ if (state == BR_MRP_PORT_STATE_FORWARDING)
+ p->state = BR_STATE_FORWARDING;
+ else
+ p->state = BR_STATE_BLOCKING;
+
+ spin_unlock_bh(&p->br->lock);
+
+ br_mrp_port_switchdev_set_state(p, state);
+
+ return 0;
+}
+
+/* Set port role, port role can be primary or secondary
+ * note: already called with rtnl_lock
+ */
+int br_mrp_set_port_role(struct net_bridge_port *p,
+ struct br_mrp_port_role *role)
+{
+ struct br_mrp *mrp;
+
+ if (!p || !(p->flags & BR_MRP_AWARE))
+ return -EINVAL;
+
+ mrp = br_mrp_find_id(p->br, role->ring_id);
+
+ if (!mrp)
+ return -EINVAL;
+
+ if (role->role == BR_MRP_PORT_ROLE_PRIMARY)
+ rcu_assign_pointer(mrp->p_port, p);
+ else
+ rcu_assign_pointer(mrp->s_port, p);
+
+ br_mrp_port_switchdev_set_role(p, role->role);
+
+ return 0;
+}
+
+/* Set ring state, ring state can be only Open or Closed
+ * note: already called with rtnl_lock
+ */
+int br_mrp_set_ring_state(struct net_bridge *br,
+ struct br_mrp_ring_state *state)
+{
+ struct br_mrp *mrp = br_mrp_find_id(br, state->ring_id);
+
+ if (!mrp)
+ return -EINVAL;
+
+ if (mrp->ring_state == BR_MRP_RING_STATE_CLOSED &&
+ state->ring_state != BR_MRP_RING_STATE_CLOSED)
+ mrp->ring_transitions++;
+
+ mrp->ring_state = state->ring_state;
+
+ br_mrp_switchdev_set_ring_state(br, mrp, state->ring_state);
+
+ return 0;
+}
+
+/* Set ring role, ring role can be only MRM(Media Redundancy Manager) or
+ * MRC(Media Redundancy Client).
+ * note: already called with rtnl_lock
+ */
+int br_mrp_set_ring_role(struct net_bridge *br,
+ struct br_mrp_ring_role *role)
+{
+ struct br_mrp *mrp = br_mrp_find_id(br, role->ring_id);
+ int err;
+
+ if (!mrp)
+ return -EINVAL;
+
+ mrp->ring_role = role->ring_role;
+
+ /* If there is an error just bailed out */
+ err = br_mrp_switchdev_set_ring_role(br, mrp, role->ring_role);
+ if (err && err != -EOPNOTSUPP)
+ return err;
+
+ /* Now detect if the HW actually applied the role or not. If the HW
+ * applied the role it means that the SW will not to do those operations
+ * anymore. For example if the role ir MRM then the HW will notify the
+ * SW when ring is open, but if the is not pushed to the HW the SW will
+ * need to detect when the ring is open
+ */
+ mrp->ring_role_offloaded = err == -EOPNOTSUPP ? 0 : 1;
+
+ return 0;
+}
+
+/* Start to generate MRP test frames, the frames are generated by HW and if it
+ * fails, they are generated by the SW.
+ * note: already called with rtnl_lock
+ */
+int br_mrp_start_test(struct net_bridge *br,
+ struct br_mrp_start_test *test)
+{
+ struct br_mrp *mrp = br_mrp_find_id(br, test->ring_id);
+
+ if (!mrp)
+ return -EINVAL;
+
+ /* Try to push it to the HW and if it fails then continue to generate in
+ * SW and if that also fails then return error
+ */
+ if (!br_mrp_switchdev_send_ring_test(br, mrp, test->interval,
+ test->max_miss, test->period))
+ return 0;
+
+ mrp->test_interval = test->interval;
+ mrp->test_end = jiffies + usecs_to_jiffies(test->period);
+ mrp->test_max_miss = test->max_miss;
+ mrp->test_count_miss = 0;
+ queue_delayed_work(system_wq, &mrp->test_work,
+ usecs_to_jiffies(test->interval));
+
+ return 0;
+}
+
+/* Process only MRP Test frame. All the other MRP frames are processed by
+ * userspace application
+ * note: already called with rcu_read_lock
+ */
+static void br_mrp_mrm_process(struct br_mrp *mrp, struct net_bridge_port *port,
+ struct sk_buff *skb)
+{
+ const struct br_mrp_tlv_hdr *hdr;
+ struct br_mrp_tlv_hdr _hdr;
+
+ /* Each MRP header starts with a version field which is 16 bits.
+ * Therefore skip the version and get directly the TLV header.
+ */
+ hdr = skb_header_pointer(skb, sizeof(uint16_t), sizeof(_hdr), &_hdr);
+ if (!hdr)
+ return;
+
+ if (hdr->type != BR_MRP_TLV_HEADER_RING_TEST)
+ return;
+
+ mrp->test_count_miss = 0;
+
+ /* Notify the userspace that the ring is closed only when the ring is
+ * not closed
+ */
+ if (mrp->ring_state != BR_MRP_RING_STATE_CLOSED)
+ br_mrp_port_open(port->dev, false);
+}
+
+/* This will just forward the frame to the other mrp ring port(MRC role) or will
+ * not do anything.
+ * note: already called with rcu_read_lock
+ */
+static int br_mrp_rcv(struct net_bridge_port *p,
+ struct sk_buff *skb, struct net_device *dev)
+{
+ struct net_device *s_dev, *p_dev, *d_dev;
+ struct net_bridge_port *p_port, *s_port;
+ struct net_bridge *br;
+ struct sk_buff *nskb;
+ struct br_mrp *mrp;
+
+ /* If port is disabled don't accept any frames */
+ if (p->state == BR_STATE_DISABLED)
+ return 0;
+
+ br = p->br;
+ mrp = br_mrp_find_port(br, p);
+ if (unlikely(!mrp))
+ return 0;
+
+ p_port = rcu_dereference(mrp->p_port);
+ if (!p_port)
+ return 0;
+
+ s_port = rcu_dereference(mrp->s_port);
+ if (!s_port)
+ return 0;
+
+ /* If the role is MRM then don't forward the frames */
+ if (mrp->ring_role == BR_MRP_RING_ROLE_MRM) {
+ br_mrp_mrm_process(mrp, p, skb);
+ return 1;
+ }
+
+ /* Clone the frame and forward it on the other MRP port */
+ nskb = skb_clone(skb, GFP_ATOMIC);
+ if (!nskb)
+ return 0;
+
+ p_dev = p_port->dev;
+ s_dev = s_port->dev;
+
+ if (p_dev == dev)
+ d_dev = s_dev;
+ else
+ d_dev = p_dev;
+
+ nskb->dev = d_dev;
+ skb_push(nskb, ETH_HLEN);
+ dev_queue_xmit(nskb);
+
+ return 1;
+}
+
+/* Check if the frame was received on a port that is part of MRP ring
+ * and if the frame has MRP eth. In that case process the frame otherwise do
+ * normal forwarding.
+ * note: already called with rcu_read_lock
+ */
+int br_mrp_process(struct net_bridge_port *p, struct sk_buff *skb)
+{
+ /* If there is no MRP instance do normal forwarding */
+ if (likely(!(p->flags & BR_MRP_AWARE)))
+ goto out;
+
+ if (unlikely(skb->protocol == htons(ETH_P_MRP)))
+ return br_mrp_rcv(p, skb, p->dev);
+
+out:
+ return 0;
+}
+
+bool br_mrp_enabled(struct net_bridge *br)
+{
+ return !list_empty(&br->mrp_list);
+}
diff --git a/net/bridge/br_mrp_netlink.c b/net/bridge/br_mrp_netlink.c
new file mode 100644
index 000000000000..503896638be0
--- /dev/null
+++ b/net/bridge/br_mrp_netlink.c
@@ -0,0 +1,120 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+
+#include <net/genetlink.h>
+
+#include <uapi/linux/mrp_bridge.h>
+#include "br_private.h"
+#include "br_private_mrp.h"
+
+static const struct nla_policy br_mrp_policy[IFLA_BRIDGE_MRP_MAX + 1] = {
+ [IFLA_BRIDGE_MRP_UNSPEC] = { .type = NLA_REJECT },
+ [IFLA_BRIDGE_MRP_INSTANCE] = { .type = NLA_EXACT_LEN,
+ .len = sizeof(struct br_mrp_instance)},
+ [IFLA_BRIDGE_MRP_PORT_STATE] = { .type = NLA_U32 },
+ [IFLA_BRIDGE_MRP_PORT_ROLE] = { .type = NLA_EXACT_LEN,
+ .len = sizeof(struct br_mrp_port_role)},
+ [IFLA_BRIDGE_MRP_RING_STATE] = { .type = NLA_EXACT_LEN,
+ .len = sizeof(struct br_mrp_ring_state)},
+ [IFLA_BRIDGE_MRP_RING_ROLE] = { .type = NLA_EXACT_LEN,
+ .len = sizeof(struct br_mrp_ring_role)},
+ [IFLA_BRIDGE_MRP_START_TEST] = { .type = NLA_EXACT_LEN,
+ .len = sizeof(struct br_mrp_start_test)},
+};
+
+int br_mrp_parse(struct net_bridge *br, struct net_bridge_port *p,
+ struct nlattr *attr, int cmd, struct netlink_ext_ack *extack)
+{
+ struct nlattr *tb[IFLA_BRIDGE_MRP_MAX + 1];
+ int err;
+
+ if (br->stp_enabled != BR_NO_STP) {
+ NL_SET_ERR_MSG_MOD(extack, "MRP can't be enabled if STP is already enabled\n");
+ return -EINVAL;
+ }
+
+ err = nla_parse_nested(tb, IFLA_BRIDGE_MRP_MAX, attr,
+ br_mrp_policy, extack);
+ if (err)
+ return err;
+
+ if (tb[IFLA_BRIDGE_MRP_INSTANCE]) {
+ struct br_mrp_instance *instance =
+ nla_data(tb[IFLA_BRIDGE_MRP_INSTANCE]);
+
+ if (cmd == RTM_SETLINK)
+ err = br_mrp_add(br, instance);
+ else
+ err = br_mrp_del(br, instance);
+ if (err)
+ return err;
+ }
+
+ if (tb[IFLA_BRIDGE_MRP_PORT_STATE]) {
+ enum br_mrp_port_state_type state =
+ nla_get_u32(tb[IFLA_BRIDGE_MRP_PORT_STATE]);
+
+ err = br_mrp_set_port_state(p, state);
+ if (err)
+ return err;
+ }
+
+ if (tb[IFLA_BRIDGE_MRP_PORT_ROLE]) {
+ struct br_mrp_port_role *role =
+ nla_data(tb[IFLA_BRIDGE_MRP_PORT_ROLE]);
+
+ err = br_mrp_set_port_role(p, role);
+ if (err)
+ return err;
+ }
+
+ if (tb[IFLA_BRIDGE_MRP_RING_STATE]) {
+ struct br_mrp_ring_state *state =
+ nla_data(tb[IFLA_BRIDGE_MRP_RING_STATE]);
+
+ err = br_mrp_set_ring_state(br, state);
+ if (err)
+ return err;
+ }
+
+ if (tb[IFLA_BRIDGE_MRP_RING_ROLE]) {
+ struct br_mrp_ring_role *role =
+ nla_data(tb[IFLA_BRIDGE_MRP_RING_ROLE]);
+
+ err = br_mrp_set_ring_role(br, role);
+ if (err)
+ return err;
+ }
+
+ if (tb[IFLA_BRIDGE_MRP_START_TEST]) {
+ struct br_mrp_start_test *test =
+ nla_data(tb[IFLA_BRIDGE_MRP_START_TEST]);
+
+ err = br_mrp_start_test(br, test);
+ if (err)
+ return err;
+ }
+
+ return 0;
+}
+
+int br_mrp_port_open(struct net_device *dev, u8 loc)
+{
+ struct net_bridge_port *p;
+ int err = 0;
+
+ p = br_port_get_rcu(dev);
+ if (!p) {
+ err = -EINVAL;
+ goto out;
+ }
+
+ if (loc)
+ p->flags |= BR_MRP_LOST_CONT;
+ else
+ p->flags &= ~BR_MRP_LOST_CONT;
+
+ br_ifinfo_notify(RTM_NEWLINK, NULL, p);
+
+out:
+ return err;
+}
diff --git a/net/bridge/br_mrp_switchdev.c b/net/bridge/br_mrp_switchdev.c
new file mode 100644
index 000000000000..51cb1d5a24b4
--- /dev/null
+++ b/net/bridge/br_mrp_switchdev.c
@@ -0,0 +1,140 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+
+#include <net/switchdev.h>
+
+#include "br_private_mrp.h"
+
+int br_mrp_switchdev_add(struct net_bridge *br, struct br_mrp *mrp)
+{
+ struct switchdev_obj_mrp mrp_obj = {
+ .obj.orig_dev = br->dev,
+ .obj.id = SWITCHDEV_OBJ_ID_MRP,
+ .p_port = rtnl_dereference(mrp->p_port)->dev,
+ .s_port = rtnl_dereference(mrp->s_port)->dev,
+ .ring_id = mrp->ring_id,
+ };
+ int err;
+
+ err = switchdev_port_obj_add(br->dev, &mrp_obj.obj, NULL);
+
+ if (err && err != -EOPNOTSUPP)
+ return err;
+
+ return 0;
+}
+
+int br_mrp_switchdev_del(struct net_bridge *br, struct br_mrp *mrp)
+{
+ struct switchdev_obj_mrp mrp_obj = {
+ .obj.orig_dev = br->dev,
+ .obj.id = SWITCHDEV_OBJ_ID_MRP,
+ .p_port = NULL,
+ .s_port = NULL,
+ .ring_id = mrp->ring_id,
+ };
+ int err;
+
+ err = switchdev_port_obj_del(br->dev, &mrp_obj.obj);
+
+ if (err && err != -EOPNOTSUPP)
+ return err;
+
+ return 0;
+}
+
+int br_mrp_switchdev_set_ring_role(struct net_bridge *br,
+ struct br_mrp *mrp,
+ enum br_mrp_ring_role_type role)
+{
+ struct switchdev_obj_ring_role_mrp mrp_role = {
+ .obj.orig_dev = br->dev,
+ .obj.id = SWITCHDEV_OBJ_ID_RING_ROLE_MRP,
+ .ring_role = role,
+ .ring_id = mrp->ring_id,
+ };
+ int err;
+
+ if (role == BR_MRP_RING_ROLE_DISABLED)
+ err = switchdev_port_obj_del(br->dev, &mrp_role.obj);
+ else
+ err = switchdev_port_obj_add(br->dev, &mrp_role.obj, NULL);
+
+ return err;
+}
+
+int br_mrp_switchdev_send_ring_test(struct net_bridge *br,
+ struct br_mrp *mrp, u32 interval,
+ u8 max_miss, u32 period)
+{
+ struct switchdev_obj_ring_test_mrp test = {
+ .obj.orig_dev = br->dev,
+ .obj.id = SWITCHDEV_OBJ_ID_RING_TEST_MRP,
+ .interval = interval,
+ .max_miss = max_miss,
+ .ring_id = mrp->ring_id,
+ .period = period,
+ };
+ int err;
+
+ if (interval == 0)
+ err = switchdev_port_obj_del(br->dev, &test.obj);
+ else
+ err = switchdev_port_obj_add(br->dev, &test.obj, NULL);
+
+ return err;
+}
+
+int br_mrp_switchdev_set_ring_state(struct net_bridge *br,
+ struct br_mrp *mrp,
+ enum br_mrp_ring_state_type state)
+{
+ struct switchdev_obj_ring_state_mrp mrp_state = {
+ .obj.orig_dev = br->dev,
+ .obj.id = SWITCHDEV_OBJ_ID_RING_STATE_MRP,
+ .ring_state = state,
+ .ring_id = mrp->ring_id,
+ };
+ int err;
+
+ err = switchdev_port_obj_add(br->dev, &mrp_state.obj, NULL);
+
+ if (err && err != -EOPNOTSUPP)
+ return err;
+
+ return 0;
+}
+
+int br_mrp_port_switchdev_set_state(struct net_bridge_port *p,
+ enum br_mrp_port_state_type state)
+{
+ struct switchdev_attr attr = {
+ .orig_dev = p->dev,
+ .id = SWITCHDEV_ATTR_ID_MRP_PORT_STATE,
+ .u.mrp_port_state = state,
+ };
+ int err;
+
+ err = switchdev_port_attr_set(p->dev, &attr);
+ if (err && err != -EOPNOTSUPP)
+ br_warn(p->br, "error setting offload MRP state on port %u(%s)\n",
+ (unsigned int)p->port_no, p->dev->name);
+
+ return err;
+}
+
+int br_mrp_port_switchdev_set_role(struct net_bridge_port *p,
+ enum br_mrp_port_role_type role)
+{
+ struct switchdev_attr attr = {
+ .orig_dev = p->dev,
+ .id = SWITCHDEV_ATTR_ID_MRP_PORT_ROLE,
+ .u.mrp_port_role = role,
+ };
+ int err;
+
+ err = switchdev_port_attr_set(p->dev, &attr);
+ if (err && err != -EOPNOTSUPP)
+ return err;
+
+ return 0;
+}
diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c
index 43dab4066f91..a774e19c41bb 100644
--- a/net/bridge/br_netlink.c
+++ b/net/bridge/br_netlink.c
@@ -151,6 +151,7 @@ static inline size_t br_port_info_size(void)
+ nla_total_size(sizeof(u8)) /* IFLA_BRPORT_MULTICAST_ROUTER */
#endif
+ nla_total_size(sizeof(u16)) /* IFLA_BRPORT_GROUP_FWD_MASK */
+ + nla_total_size(sizeof(u8)) /* IFLA_BRPORT_MRP_RING_OPEN */
+ 0;
}
@@ -213,6 +214,8 @@ static int br_port_fill_attrs(struct sk_buff *skb,
nla_put_u16(skb, IFLA_BRPORT_GROUP_FWD_MASK, p->group_fwd_mask) ||
nla_put_u8(skb, IFLA_BRPORT_NEIGH_SUPPRESS,
!!(p->flags & BR_NEIGH_SUPPRESS)) ||
+ nla_put_u8(skb, IFLA_BRPORT_MRP_RING_OPEN, !!(p->flags &
+ BR_MRP_LOST_CONT)) ||
nla_put_u8(skb, IFLA_BRPORT_ISOLATED, !!(p->flags & BR_ISOLATED)))
return -EMSGSIZE;
@@ -669,6 +672,11 @@ static int br_afspec(struct net_bridge *br,
if (err)
return err;
break;
+ case IFLA_BRIDGE_MRP:
+ err = br_mrp_parse(br, p, attr, cmd, extack);
+ if (err)
+ return err;
+ break;
}
}
@@ -1101,7 +1109,9 @@ static int br_changelink(struct net_device *brdev, struct nlattr *tb[],
if (data[IFLA_BR_STP_STATE]) {
u32 stp_enabled = nla_get_u32(data[IFLA_BR_STP_STATE]);
- br_stp_set_enabled(br, stp_enabled);
+ err = br_stp_set_enabled(br, stp_enabled, extack);
+ if (err)
+ return err;
}
if (data[IFLA_BR_PRIORITY]) {
diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h
index 1f97703a52ff..c35647cb138a 100644
--- a/net/bridge/br_private.h
+++ b/net/bridge/br_private.h
@@ -428,6 +428,10 @@ struct net_bridge {
int offload_fwd_mark;
#endif
struct hlist_head fdb_list;
+
+#if IS_ENABLED(CONFIG_BRIDGE_MRP)
+ struct list_head __rcu mrp_list;
+#endif
};
struct br_input_skb_cb {
@@ -1279,7 +1283,8 @@ int br_set_ageing_time(struct net_bridge *br, clock_t ageing_time);
/* br_stp_if.c */
void br_stp_enable_bridge(struct net_bridge *br);
void br_stp_disable_bridge(struct net_bridge *br);
-void br_stp_set_enabled(struct net_bridge *br, unsigned long val);
+int br_stp_set_enabled(struct net_bridge *br, unsigned long val,
+ struct netlink_ext_ack *extack);
void br_stp_enable_port(struct net_bridge_port *p);
void br_stp_disable_port(struct net_bridge_port *p);
bool br_stp_recalculate_bridge_id(struct net_bridge *br);
@@ -1304,6 +1309,37 @@ unsigned long br_timer_value(const struct timer_list *timer);
extern int (*br_fdb_test_addr_hook)(struct net_device *dev, unsigned char *addr);
#endif
+/* br_mrp.c */
+#if IS_ENABLED(CONFIG_BRIDGE_MRP)
+int br_mrp_parse(struct net_bridge *br, struct net_bridge_port *p,
+ struct nlattr *attr, int cmd, struct netlink_ext_ack *extack);
+int br_mrp_process(struct net_bridge_port *p, struct sk_buff *skb);
+bool br_mrp_enabled(struct net_bridge *br);
+void br_mrp_port_del(struct net_bridge *br, struct net_bridge_port *p);
+#else
+static inline int br_mrp_parse(struct net_bridge *br, struct net_bridge_port *p,
+ struct nlattr *attr, int cmd,
+ struct netlink_ext_ack *extack)
+{
+ return -EOPNOTSUPP;
+}
+
+static inline int br_mrp_process(struct net_bridge_port *p, struct sk_buff *skb)
+{
+ return 0;
+}
+
+static inline bool br_mrp_enabled(struct net_bridge *br)
+{
+ return 0;
+}
+
+static inline void br_mrp_port_del(struct net_bridge *br,
+ struct net_bridge_port *p)
+{
+}
+#endif
+
/* br_netlink.c */
extern struct rtnl_link_ops br_link_ops;
int br_netlink_init(void);
diff --git a/net/bridge/br_private_mrp.h b/net/bridge/br_private_mrp.h
new file mode 100644
index 000000000000..2921a4b59f8e
--- /dev/null
+++ b/net/bridge/br_private_mrp.h
@@ -0,0 +1,63 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+
+#ifndef _BR_PRIVATE_MRP_H_
+#define _BR_PRIVATE_MRP_H_
+
+#include "br_private.h"
+#include <uapi/linux/mrp_bridge.h>
+
+struct br_mrp {
+ /* list of mrp instances */
+ struct list_head __rcu list;
+
+ struct net_bridge_port __rcu *p_port;
+ struct net_bridge_port __rcu *s_port;
+
+ u32 ring_id;
+
+ enum br_mrp_ring_role_type ring_role;
+ u8 ring_role_offloaded;
+ enum br_mrp_ring_state_type ring_state;
+ u32 ring_transitions;
+
+ struct delayed_work test_work;
+ u32 test_interval;
+ unsigned long test_end;
+ u32 test_count_miss;
+ u32 test_max_miss;
+
+ u32 seq_id;
+
+ struct rcu_head rcu;
+};
+
+/* br_mrp.c */
+int br_mrp_add(struct net_bridge *br, struct br_mrp_instance *instance);
+int br_mrp_del(struct net_bridge *br, struct br_mrp_instance *instance);
+int br_mrp_set_port_state(struct net_bridge_port *p,
+ enum br_mrp_port_state_type state);
+int br_mrp_set_port_role(struct net_bridge_port *p,
+ struct br_mrp_port_role *role);
+int br_mrp_set_ring_state(struct net_bridge *br,
+ struct br_mrp_ring_state *state);
+int br_mrp_set_ring_role(struct net_bridge *br, struct br_mrp_ring_role *role);
+int br_mrp_start_test(struct net_bridge *br, struct br_mrp_start_test *test);
+
+/* br_mrp_switchdev.c */
+int br_mrp_switchdev_add(struct net_bridge *br, struct br_mrp *mrp);
+int br_mrp_switchdev_del(struct net_bridge *br, struct br_mrp *mrp);
+int br_mrp_switchdev_set_ring_role(struct net_bridge *br, struct br_mrp *mrp,
+ enum br_mrp_ring_role_type role);
+int br_mrp_switchdev_set_ring_state(struct net_bridge *br, struct br_mrp *mrp,
+ enum br_mrp_ring_state_type state);
+int br_mrp_switchdev_send_ring_test(struct net_bridge *br, struct br_mrp *mrp,
+ u32 interval, u8 max_miss, u32 period);
+int br_mrp_port_switchdev_set_state(struct net_bridge_port *p,
+ enum br_mrp_port_state_type state);
+int br_mrp_port_switchdev_set_role(struct net_bridge_port *p,
+ enum br_mrp_port_role_type role);
+
+/* br_mrp_netlink.c */
+int br_mrp_port_open(struct net_device *dev, u8 loc);
+
+#endif /* _BR_PRIVATE_MRP_H */
diff --git a/net/bridge/br_stp.c b/net/bridge/br_stp.c
index 1f14b8455345..3e88be7aa269 100644
--- a/net/bridge/br_stp.c
+++ b/net/bridge/br_stp.c
@@ -36,6 +36,12 @@ void br_set_state(struct net_bridge_port *p, unsigned int state)
};
int err;
+ /* Don't change the state of the ports if they are driven by a different
+ * protocol.
+ */
+ if (p->flags & BR_MRP_AWARE)
+ return;
+
p->state = state;
err = switchdev_port_attr_set(p->dev, &attr);
if (err && err != -EOPNOTSUPP)
diff --git a/net/bridge/br_stp_if.c b/net/bridge/br_stp_if.c
index d174d3a566aa..a42850b7eb9a 100644
--- a/net/bridge/br_stp_if.c
+++ b/net/bridge/br_stp_if.c
@@ -196,10 +196,17 @@ static void br_stp_stop(struct net_bridge *br)
br->stp_enabled = BR_NO_STP;
}
-void br_stp_set_enabled(struct net_bridge *br, unsigned long val)
+int br_stp_set_enabled(struct net_bridge *br, unsigned long val,
+ struct netlink_ext_ack *extack)
{
ASSERT_RTNL();
+ if (br_mrp_enabled(br)) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "STP can't be enabled if MRP is already enabled\n");
+ return -EINVAL;
+ }
+
if (val) {
if (br->stp_enabled == BR_NO_STP)
br_stp_start(br);
@@ -207,6 +214,8 @@ void br_stp_set_enabled(struct net_bridge *br, unsigned long val)
if (br->stp_enabled != BR_NO_STP)
br_stp_stop(br);
}
+
+ return 0;
}
/* called under bridge lock */
diff --git a/net/bridge/br_sysfs_br.c b/net/bridge/br_sysfs_br.c
index 9ab0f00b1081..7db06e3f642a 100644
--- a/net/bridge/br_sysfs_br.c
+++ b/net/bridge/br_sysfs_br.c
@@ -126,9 +126,7 @@ static ssize_t stp_state_show(struct device *d,
static int set_stp_state(struct net_bridge *br, unsigned long val)
{
- br_stp_set_enabled(br, val);
-
- return 0;
+ return br_stp_set_enabled(br, val, NULL);
}
static ssize_t stp_state_store(struct device *d,
diff --git a/net/caif/chnl_net.c b/net/caif/chnl_net.c
index a56628962852..79b6a04d8eb6 100644
--- a/net/caif/chnl_net.c
+++ b/net/caif/chnl_net.c
@@ -211,7 +211,8 @@ static void chnl_flowctrl_cb(struct cflayer *layr, enum caif_ctrlcmd flow,
}
}
-static int chnl_net_start_xmit(struct sk_buff *skb, struct net_device *dev)
+static netdev_tx_t chnl_net_start_xmit(struct sk_buff *skb,
+ struct net_device *dev)
{
struct chnl_net *priv;
struct cfpkt *pkt = NULL;
diff --git a/net/ceph/Kconfig b/net/ceph/Kconfig
index 2e8e6f904920..d7bec7adc267 100644
--- a/net/ceph/Kconfig
+++ b/net/ceph/Kconfig
@@ -39,6 +39,6 @@ config CEPH_LIB_USE_DNS_RESOLVER
be resolved using the CONFIG_DNS_RESOLVER facility.
For information on how to use CONFIG_DNS_RESOLVER consult
- Documentation/networking/dns_resolver.txt
+ Documentation/networking/dns_resolver.rst
If unsure, say N.
diff --git a/net/core/gen_stats.c b/net/core/gen_stats.c
index 1d653fbfcf52..e491b083b348 100644
--- a/net/core/gen_stats.c
+++ b/net/core/gen_stats.c
@@ -6,7 +6,7 @@
* Jamal Hadi Salim
* Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
*
- * See Documentation/networking/gen_stats.txt
+ * See Documentation/networking/gen_stats.rst
*/
#include <linux/types.h>
diff --git a/net/core/netpoll.c b/net/core/netpoll.c
index 849380a622ef..15b366a1a958 100644
--- a/net/core/netpoll.c
+++ b/net/core/netpoll.c
@@ -69,10 +69,11 @@ module_param(carrier_timeout, uint, 0644);
#define np_notice(np, fmt, ...) \
pr_notice("%s: " fmt, np->name, ##__VA_ARGS__)
-static int netpoll_start_xmit(struct sk_buff *skb, struct net_device *dev,
- struct netdev_queue *txq)
+static netdev_tx_t netpoll_start_xmit(struct sk_buff *skb,
+ struct net_device *dev,
+ struct netdev_queue *txq)
{
- int status = NETDEV_TX_OK;
+ netdev_tx_t status = NETDEV_TX_OK;
netdev_features_t features;
features = netif_skb_features(skb);
@@ -307,7 +308,7 @@ static int netpoll_owner_active(struct net_device *dev)
void netpoll_send_skb_on_dev(struct netpoll *np, struct sk_buff *skb,
struct net_device *dev)
{
- int status = NETDEV_TX_BUSY;
+ netdev_tx_t status = NETDEV_TX_BUSY;
unsigned long tries;
/* It is up to the caller to keep npinfo alive. */
struct netpoll_info *npinfo;
diff --git a/net/core/pktgen.c b/net/core/pktgen.c
index 08e2811b5274..b53b6d38c4df 100644
--- a/net/core/pktgen.c
+++ b/net/core/pktgen.c
@@ -56,7 +56,7 @@
* Integrated to 2.5.x 021029 --Lucio Maciel (luciomaciel@zipmail.com.br)
*
* 021124 Finished major redesign and rewrite for new functionality.
- * See Documentation/networking/pktgen.txt for how to use this.
+ * See Documentation/networking/pktgen.rst for how to use this.
*
* The new operation:
* For each CPU one thread/process is created at start. This process checks
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index d6f4f4a9e8ba..2269199c5891 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -3997,8 +3997,8 @@ static int rtnl_fdb_del(struct sk_buff *skb, struct nlmsghdr *nlh,
struct ndmsg *ndm;
struct nlattr *tb[NDA_MAX+1];
struct net_device *dev;
- int err = -EINVAL;
__u8 *addr;
+ int err;
u16 vid;
if (!netlink_capable(skb, CAP_NET_ADMIN))
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 7e29590482ce..1bf0c3d278e7 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -102,7 +102,7 @@ EXPORT_SYMBOL(sysctl_max_skb_frags);
static void skb_panic(struct sk_buff *skb, unsigned int sz, void *addr,
const char msg[])
{
- pr_emerg("%s: text:%p len:%d put:%d head:%p data:%p tail:%#lx end:%#lx dev:%s\n",
+ pr_emerg("%s: text:%px len:%d put:%d head:%px data:%px tail:%#lx end:%#lx dev:%s\n",
msg, addr, skb->len, sz, skb->head, skb->data,
(unsigned long)skb->tail, (unsigned long)skb->end,
skb->dev ? skb->dev->name : "<NULL>");
diff --git a/net/decnet/Kconfig b/net/decnet/Kconfig
index 0935453ccfd5..8f98fb2f2ec9 100644
--- a/net/decnet/Kconfig
+++ b/net/decnet/Kconfig
@@ -15,7 +15,7 @@ config DECNET
<http://linux-decnet.sourceforge.net/>.
More detailed documentation is available in
- <file:Documentation/networking/decnet.txt>.
+ <file:Documentation/networking/decnet.rst>.
Be sure to say Y to "/proc file system support" and "Sysctl support"
below when using DECnet, since you will need sysctl support to aid
@@ -40,4 +40,4 @@ config DECNET_ROUTER
filtering" option will be required for the forthcoming routing daemon
to work.
- See <file:Documentation/networking/decnet.txt> for more information.
+ See <file:Documentation/networking/decnet.rst> for more information.
diff --git a/net/dns_resolver/Kconfig b/net/dns_resolver/Kconfig
index 0a1c2238b4bd..255df9b6e9e8 100644
--- a/net/dns_resolver/Kconfig
+++ b/net/dns_resolver/Kconfig
@@ -19,7 +19,7 @@ config DNS_RESOLVER
SMB2 later. DNS Resolver is supported by the userspace upcall
helper "/sbin/dns.resolver" via /etc/request-key.conf.
- See <file:Documentation/networking/dns_resolver.txt> for further
+ See <file:Documentation/networking/dns_resolver.rst> for further
information.
To compile this as a module, choose M here: the module will be called
diff --git a/net/dns_resolver/dns_key.c b/net/dns_resolver/dns_key.c
index ad53eb31d40f..3aced951d5ab 100644
--- a/net/dns_resolver/dns_key.c
+++ b/net/dns_resolver/dns_key.c
@@ -1,6 +1,6 @@
/* Key type used to cache DNS lookups made by the kernel
*
- * See Documentation/networking/dns_resolver.txt
+ * See Documentation/networking/dns_resolver.rst
*
* Copyright (c) 2007 Igor Mammedov
* Author(s): Igor Mammedov (niallain@gmail.com)
diff --git a/net/dns_resolver/dns_query.c b/net/dns_resolver/dns_query.c
index cab4e0df924f..82b084cc1cc6 100644
--- a/net/dns_resolver/dns_query.c
+++ b/net/dns_resolver/dns_query.c
@@ -1,7 +1,7 @@
/* Upcall routine, designed to work as a key type and working through
* /sbin/request-key to contact userspace when handling DNS queries.
*
- * See Documentation/networking/dns_resolver.txt
+ * See Documentation/networking/dns_resolver.rst
*
* Copyright (c) 2007 Igor Mammedov
* Author(s): Igor Mammedov (niallain@gmail.com)
diff --git a/net/hsr/hsr_main.c b/net/hsr/hsr_main.c
index 26d6c39f24e1..e2564de67603 100644
--- a/net/hsr/hsr_main.c
+++ b/net/hsr/hsr_main.c
@@ -15,12 +15,23 @@
#include "hsr_framereg.h"
#include "hsr_slave.h"
+static bool hsr_slave_empty(struct hsr_priv *hsr)
+{
+ struct hsr_port *port;
+
+ hsr_for_each_port(hsr, port)
+ if (port->type != HSR_PT_MASTER)
+ return false;
+ return true;
+}
+
static int hsr_netdev_notify(struct notifier_block *nb, unsigned long event,
void *ptr)
{
- struct net_device *dev;
struct hsr_port *port, *master;
+ struct net_device *dev;
struct hsr_priv *hsr;
+ LIST_HEAD(list_kill);
int mtu_max;
int res;
@@ -85,8 +96,15 @@ static int hsr_netdev_notify(struct notifier_block *nb, unsigned long event,
master->dev->mtu = mtu_max;
break;
case NETDEV_UNREGISTER:
- if (!is_hsr_master(dev))
+ if (!is_hsr_master(dev)) {
+ master = hsr_port_get_hsr(port->hsr, HSR_PT_MASTER);
hsr_del_port(port);
+ if (hsr_slave_empty(master->hsr)) {
+ unregister_netdevice_queue(master->dev,
+ &list_kill);
+ unregister_netdevice_many(&list_kill);
+ }
+ }
break;
case NETDEV_PRE_TYPE_CHANGE:
/* HSR works only on Ethernet devices. Refuse slave to change
diff --git a/net/hsr/hsr_main.h b/net/hsr/hsr_main.h
index 7321cf8d6d2c..f74193465bf5 100644
--- a/net/hsr/hsr_main.h
+++ b/net/hsr/hsr_main.h
@@ -62,15 +62,6 @@ struct hsr_tag {
* with the path field in-between, which seems strange. I'm guessing the MAC
* address definition is in error.
*/
-static inline u16 get_hsr_tag_path(struct hsr_tag *ht)
-{
- return ntohs(ht->path_and_LSDU_size) >> 12;
-}
-
-static inline u16 get_hsr_tag_LSDU_size(struct hsr_tag *ht)
-{
- return ntohs(ht->path_and_LSDU_size) & 0x0FFF;
-}
static inline void set_hsr_tag_path(struct hsr_tag *ht, u16 path)
{
@@ -103,16 +94,6 @@ struct hsr_sup_payload {
unsigned char macaddress_A[ETH_ALEN];
} __packed;
-static inline u16 get_hsr_stag_path(struct hsr_sup_tag *hst)
-{
- return get_hsr_tag_path((struct hsr_tag *)hst);
-}
-
-static inline u16 get_hsr_stag_HSR_ver(struct hsr_sup_tag *hst)
-{
- return get_hsr_tag_LSDU_size((struct hsr_tag *)hst);
-}
-
static inline void set_hsr_stag_path(struct hsr_sup_tag *hst, u16 path)
{
set_hsr_tag_path((struct hsr_tag *)hst, path);
diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig
index 25a8888826b8..5da4733067fb 100644
--- a/net/ipv4/Kconfig
+++ b/net/ipv4/Kconfig
@@ -49,7 +49,7 @@ config IP_ADVANCED_ROUTER
Note that some distributions enable it in startup scripts.
For details about rp_filter strict and loose mode read
- <file:Documentation/networking/ip-sysctl.txt>.
+ <file:Documentation/networking/ip-sysctl.rst>.
If unsure, say N here.
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index c618e242490f..6177c4ba0037 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -1835,6 +1835,7 @@ static __net_init int inet_init_net(struct net *net)
net->ipv4.sysctl_ip_early_demux = 1;
net->ipv4.sysctl_udp_early_demux = 1;
net->ipv4.sysctl_tcp_early_demux = 1;
+ net->ipv4.sysctl_nexthop_compat_mode = 1;
#ifdef CONFIG_SYSCTL
net->ipv4.sysctl_ip_prot_sock = PROT_SOCK;
#endif
diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c
index 55ca2e521828..e53871e4a097 100644
--- a/net/ipv4/fib_semantics.c
+++ b/net/ipv4/fib_semantics.c
@@ -1780,6 +1780,8 @@ int fib_dump_info(struct sk_buff *skb, u32 portid, u32 seq, int event,
goto nla_put_failure;
if (nexthop_is_blackhole(fi->nh))
rtm->rtm_type = RTN_BLACKHOLE;
+ if (!fi->fib_net->ipv4.sysctl_nexthop_compat_mode)
+ goto offload;
}
if (nhs == 1) {
@@ -1805,6 +1807,7 @@ int fib_dump_info(struct sk_buff *skb, u32 portid, u32 seq, int event,
goto nla_put_failure;
}
+offload:
if (fri->offload)
rtm->rtm_flags |= RTM_F_OFFLOAD;
if (fri->trap)
diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c
index fc61f51d87a3..956a806649f7 100644
--- a/net/ipv4/icmp.c
+++ b/net/ipv4/icmp.c
@@ -853,7 +853,7 @@ static bool icmp_unreach(struct sk_buff *skb)
case ICMP_FRAG_NEEDED:
/* for documentation of the ip_no_pmtu_disc
* values please see
- * Documentation/networking/ip-sysctl.txt
+ * Documentation/networking/ip-sysctl.rst
*/
switch (net->ipv4.sysctl_ip_no_pmtu_disc) {
default:
diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c
index 5d50aad3cdbf..0034092358c3 100644
--- a/net/ipv4/inet_diag.c
+++ b/net/ipv4/inet_diag.c
@@ -43,6 +43,9 @@ struct inet_diag_entry {
u16 userlocks;
u32 ifindex;
u32 mark;
+#ifdef CONFIG_SOCK_CGROUP_DATA
+ u64 cgroup_id;
+#endif
};
static DEFINE_MUTEX(inet_diag_table_mutex);
@@ -162,6 +165,13 @@ int inet_diag_msg_attrs_fill(struct sock *sk, struct sk_buff *skb,
goto errout;
}
+#ifdef CONFIG_SOCK_CGROUP_DATA
+ if (nla_put_u64_64bit(skb, INET_DIAG_CGROUP_ID,
+ cgroup_id(sock_cgroup_ptr(&sk->sk_cgrp_data)),
+ INET_DIAG_PAD))
+ goto errout;
+#endif
+
r->idiag_uid = from_kuid_munged(user_ns, sock_i_uid(sk));
r->idiag_inode = sock_i_ino(sk);
@@ -675,6 +685,16 @@ static int inet_diag_bc_run(const struct nlattr *_bc,
yes = 0;
break;
}
+#ifdef CONFIG_SOCK_CGROUP_DATA
+ case INET_DIAG_BC_CGROUP_COND: {
+ u64 cgroup_id;
+
+ cgroup_id = get_unaligned((const u64 *)(op + 1));
+ if (cgroup_id != entry->cgroup_id)
+ yes = 0;
+ break;
+ }
+#endif
}
if (yes) {
@@ -725,6 +745,9 @@ int inet_diag_bc_sk(const struct nlattr *bc, struct sock *sk)
entry.mark = inet_rsk(inet_reqsk(sk))->ir_mark;
else
entry.mark = 0;
+#ifdef CONFIG_SOCK_CGROUP_DATA
+ entry.cgroup_id = cgroup_id(sock_cgroup_ptr(&sk->sk_cgrp_data));
+#endif
return inet_diag_bc_run(bc, &entry);
}
@@ -814,6 +837,15 @@ static bool valid_markcond(const struct inet_diag_bc_op *op, int len,
return len >= *min_len;
}
+#ifdef CONFIG_SOCK_CGROUP_DATA
+static bool valid_cgroupcond(const struct inet_diag_bc_op *op, int len,
+ int *min_len)
+{
+ *min_len += sizeof(u64);
+ return len >= *min_len;
+}
+#endif
+
static int inet_diag_bc_audit(const struct nlattr *attr,
const struct sk_buff *skb)
{
@@ -856,6 +888,12 @@ static int inet_diag_bc_audit(const struct nlattr *attr,
if (!valid_markcond(bc, len, &min_len))
return -EINVAL;
break;
+#ifdef CONFIG_SOCK_CGROUP_DATA
+ case INET_DIAG_BC_CGROUP_COND:
+ if (!valid_cgroupcond(bc, len, &min_len))
+ return -EINVAL;
+ break;
+#endif
case INET_DIAG_BC_AUTO:
case INET_DIAG_BC_JMP:
case INET_DIAG_BC_NOP:
diff --git a/net/ipv4/nexthop.c b/net/ipv4/nexthop.c
index fdfca534d094..3957364d556c 100644
--- a/net/ipv4/nexthop.c
+++ b/net/ipv4/nexthop.c
@@ -784,7 +784,8 @@ static void __remove_nexthop_fib(struct net *net, struct nexthop *nh)
list_for_each_entry_safe(f6i, tmp, &nh->f6i_list, nh_list) {
/* __ip6_del_rt does a release, so do a hold here */
fib6_info_hold(f6i);
- ipv6_stub->ip6_del_rt(net, f6i);
+ ipv6_stub->ip6_del_rt(net, f6i,
+ !net->ipv4.sysctl_nexthop_compat_mode);
}
}
@@ -1041,7 +1042,7 @@ out:
if (!rc) {
nh_base_seq_inc(net);
nexthop_notify(RTM_NEWNEXTHOP, new_nh, &cfg->nlinfo);
- if (replace_notify)
+ if (replace_notify && net->ipv4.sysctl_nexthop_compat_mode)
nexthop_replace_notify(net, new_nh, &cfg->nlinfo);
}
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index 868e317cc324..5653e3b011bf 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -703,6 +703,15 @@ static struct ctl_table ipv4_net_table[] = {
.proc_handler = proc_tcp_early_demux
},
{
+ .procname = "nexthop_compat_mode",
+ .data = &init_net.ipv4.sysctl_nexthop_compat_mode,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = SYSCTL_ZERO,
+ .extra2 = SYSCTL_ONE,
+ },
+ {
.procname = "ip_default_ttl",
.data = &init_net.ipv4.sysctl_ip_default_ttl,
.maxlen = sizeof(int),
@@ -1313,6 +1322,13 @@ static struct ctl_table ipv4_net_table[] = {
.proc_handler = proc_doulongvec_minmax,
},
{
+ .procname = "tcp_comp_sack_slack_ns",
+ .data = &init_net.ipv4.sysctl_tcp_comp_sack_slack_ns,
+ .maxlen = sizeof(unsigned long),
+ .mode = 0644,
+ .proc_handler = proc_doulongvec_minmax,
+ },
+ {
.procname = "tcp_comp_sack_nr",
.data = &init_net.ipv4.sysctl_tcp_comp_sack_nr,
.maxlen = sizeof(int),
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 6d87de434377..8c1250103959 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -3035,8 +3035,8 @@ static int do_tcp_setsockopt(struct sock *sk, int level,
case TCP_LINGER2:
if (val < 0)
tp->linger2 = -1;
- else if (val > net->ipv4.sysctl_tcp_fin_timeout / HZ)
- tp->linger2 = 0;
+ else if (val > TCP_FIN_TIMEOUT_MAX / HZ)
+ tp->linger2 = TCP_FIN_TIMEOUT_MAX;
else
tp->linger2 = val * HZ;
break;
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index bf4ced9273e8..d68128a672ab 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -4327,6 +4327,33 @@ static void tcp_sack_maybe_coalesce(struct tcp_sock *tp)
}
}
+static void tcp_sack_compress_send_ack(struct sock *sk)
+{
+ struct tcp_sock *tp = tcp_sk(sk);
+
+ if (!tp->compressed_ack)
+ return;
+
+ if (hrtimer_try_to_cancel(&tp->compressed_ack_timer) == 1)
+ __sock_put(sk);
+
+ /* Since we have to send one ack finally,
+ * substract one from tp->compressed_ack to keep
+ * LINUX_MIB_TCPACKCOMPRESSED accurate.
+ */
+ NET_ADD_STATS(sock_net(sk), LINUX_MIB_TCPACKCOMPRESSED,
+ tp->compressed_ack - 1);
+
+ tp->compressed_ack = 0;
+ tcp_send_ack(sk);
+}
+
+/* Reasonable amount of sack blocks included in TCP SACK option
+ * The max is 4, but this becomes 3 if TCP timestamps are there.
+ * Given that SACK packets might be lost, be conservative and use 2.
+ */
+#define TCP_SACK_BLOCKS_EXPECTED 2
+
static void tcp_sack_new_ofo_skb(struct sock *sk, u32 seq, u32 end_seq)
{
struct tcp_sock *tp = tcp_sk(sk);
@@ -4339,6 +4366,8 @@ static void tcp_sack_new_ofo_skb(struct sock *sk, u32 seq, u32 end_seq)
for (this_sack = 0; this_sack < cur_sacks; this_sack++, sp++) {
if (tcp_sack_extend(sp, seq, end_seq)) {
+ if (this_sack >= TCP_SACK_BLOCKS_EXPECTED)
+ tcp_sack_compress_send_ack(sk);
/* Rotate this_sack to the first one. */
for (; this_sack > 0; this_sack--, sp--)
swap(*sp, *(sp - 1));
@@ -4348,6 +4377,9 @@ static void tcp_sack_new_ofo_skb(struct sock *sk, u32 seq, u32 end_seq)
}
}
+ if (this_sack >= TCP_SACK_BLOCKS_EXPECTED)
+ tcp_sack_compress_send_ack(sk);
+
/* Could not find an adjacent existing SACK, build a new one,
* put it at the front, and shift everyone else down. We
* always know there is at least one SACK present already here.
@@ -4355,8 +4387,6 @@ static void tcp_sack_new_ofo_skb(struct sock *sk, u32 seq, u32 end_seq)
* If the sack array is full, forget about the last one.
*/
if (this_sack >= TCP_NUM_SACKS) {
- if (tp->compressed_ack > TCP_FASTRETRANS_THRESH)
- tcp_send_ack(sk);
this_sack--;
tp->rx_opt.num_sacks--;
sp--;
@@ -5275,15 +5305,13 @@ send_now:
if (tp->compressed_ack_rcv_nxt != tp->rcv_nxt) {
tp->compressed_ack_rcv_nxt = tp->rcv_nxt;
- if (tp->compressed_ack > TCP_FASTRETRANS_THRESH)
- NET_ADD_STATS(sock_net(sk), LINUX_MIB_TCPACKCOMPRESSED,
- tp->compressed_ack - TCP_FASTRETRANS_THRESH);
- tp->compressed_ack = 0;
+ tp->dup_ack_counter = 0;
}
-
- if (++tp->compressed_ack <= TCP_FASTRETRANS_THRESH)
+ if (tp->dup_ack_counter < TCP_FASTRETRANS_THRESH) {
+ tp->dup_ack_counter++;
goto send_now;
-
+ }
+ tp->compressed_ack++;
if (hrtimer_is_queued(&tp->compressed_ack_timer))
return;
@@ -5296,8 +5324,9 @@ send_now:
delay = min_t(unsigned long, sock_net(sk)->ipv4.sysctl_tcp_comp_sack_delay_ns,
rtt * (NSEC_PER_USEC >> 3)/20);
sock_hold(sk);
- hrtimer_start(&tp->compressed_ack_timer, ns_to_ktime(delay),
- HRTIMER_MODE_REL_PINNED_SOFT);
+ hrtimer_start_range_ns(&tp->compressed_ack_timer, ns_to_ktime(delay),
+ sock_net(sk)->ipv4.sysctl_tcp_comp_sack_slack_ns,
+ HRTIMER_MODE_REL_PINNED_SOFT);
}
static inline void tcp_ack_snd_check(struct sock *sk)
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 83a5d24e13b8..6c05f1ceb538 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -2780,6 +2780,7 @@ static int __net_init tcp_sk_init(struct net *net)
sizeof(init_net.ipv4.sysctl_tcp_wmem));
}
net->ipv4.sysctl_tcp_comp_sack_delay_ns = NSEC_PER_MSEC;
+ net->ipv4.sysctl_tcp_comp_sack_slack_ns = 100 * NSEC_PER_USEC;
net->ipv4.sysctl_tcp_comp_sack_nr = 44;
net->ipv4.sysctl_tcp_fastopen = TFO_CLIENT_ENABLE;
spin_lock_init(&net->ipv4.tcp_fastopen_ctx_lock);
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index ba4482130f08..c414aeb1efa9 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -184,10 +184,10 @@ static inline void tcp_event_ack_sent(struct sock *sk, unsigned int pkts,
{
struct tcp_sock *tp = tcp_sk(sk);
- if (unlikely(tp->compressed_ack > TCP_FASTRETRANS_THRESH)) {
+ if (unlikely(tp->compressed_ack)) {
NET_ADD_STATS(sock_net(sk), LINUX_MIB_TCPACKCOMPRESSED,
- tp->compressed_ack - TCP_FASTRETRANS_THRESH);
- tp->compressed_ack = TCP_FASTRETRANS_THRESH;
+ tp->compressed_ack);
+ tp->compressed_ack = 0;
if (hrtimer_try_to_cancel(&tp->compressed_ack_timer) == 1)
__sock_put(sk);
}
diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c
index c3f26dcd6704..ada046f425d2 100644
--- a/net/ipv4/tcp_timer.c
+++ b/net/ipv4/tcp_timer.c
@@ -753,8 +753,14 @@ static enum hrtimer_restart tcp_compressed_ack_kick(struct hrtimer *timer)
bh_lock_sock(sk);
if (!sock_owned_by_user(sk)) {
- if (tp->compressed_ack > TCP_FASTRETRANS_THRESH)
+ if (tp->compressed_ack) {
+ /* Since we have to send one ack finally,
+ * substract one from tp->compressed_ack to keep
+ * LINUX_MIB_TCPACKCOMPRESSED accurate.
+ */
+ tp->compressed_ack--;
tcp_send_ack(sk);
+ }
} else {
if (!test_and_set_bit(TCP_DELACK_TIMER_DEFERRED,
&sk->sk_tsq_flags))
diff --git a/net/ipv6/Kconfig b/net/ipv6/Kconfig
index 2ccaee98fddb..5a6111da26c4 100644
--- a/net/ipv6/Kconfig
+++ b/net/ipv6/Kconfig
@@ -13,7 +13,7 @@ menuconfig IPV6
For general information about IPv6, see
<https://en.wikipedia.org/wiki/IPv6>.
For specific information about IPv6 under Linux, see
- Documentation/networking/ipv6.txt and read the HOWTO at
+ Documentation/networking/ipv6.rst and read the HOWTO at
<http://www.tldp.org/HOWTO/Linux+IPv6-HOWTO/>
To compile this protocol support as a module, choose M here: the
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index bad198e6b863..26e666fe9a0e 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -1238,7 +1238,7 @@ cleanup_prefix_route(struct inet6_ifaddr *ifp, unsigned long expires,
ifp->idev->dev, 0, RTF_DEFAULT, true);
if (f6i) {
if (del_rt)
- ip6_del_rt(dev_net(ifp->idev->dev), f6i);
+ ip6_del_rt(dev_net(ifp->idev->dev), f6i, false);
else {
if (!(f6i->fib6_flags & RTF_EXPIRES))
fib6_set_expires(f6i, expires);
@@ -2718,7 +2718,7 @@ void addrconf_prefix_rcv(struct net_device *dev, u8 *opt, int len, bool sllao)
if (rt) {
/* Autoconf prefix route */
if (valid_lft == 0) {
- ip6_del_rt(net, rt);
+ ip6_del_rt(net, rt, false);
rt = NULL;
} else if (addrconf_finite_timeout(rt_expires)) {
/* not infinity */
@@ -3813,7 +3813,7 @@ restart:
spin_unlock_bh(&ifa->lock);
if (rt)
- ip6_del_rt(net, rt);
+ ip6_del_rt(net, rt, false);
if (state != INET6_IFADDR_STATE_DEAD) {
__ipv6_ifa_notify(RTM_DELADDR, ifa);
@@ -4652,7 +4652,7 @@ static int modify_prefix_route(struct inet6_ifaddr *ifp,
prio = ifp->rt_priority ? : IP6_RT_PRIO_ADDRCONF;
if (f6i->fib6_metric != prio) {
/* delete old one */
- ip6_del_rt(dev_net(ifp->idev->dev), f6i);
+ ip6_del_rt(dev_net(ifp->idev->dev), f6i, false);
/* add new one */
addrconf_prefix_route(modify_peer ? &ifp->peer_addr : &ifp->addr,
@@ -6073,10 +6073,10 @@ static void __ipv6_ifa_notify(int event, struct inet6_ifaddr *ifp)
ifp->idev->dev, 0, 0,
false);
if (rt)
- ip6_del_rt(net, rt);
+ ip6_del_rt(net, rt, false);
}
if (ifp->rt) {
- ip6_del_rt(net, ifp->rt);
+ ip6_del_rt(net, ifp->rt, false);
ifp->rt = NULL;
}
rt_genid_bump_ipv6(net);
diff --git a/net/ipv6/addrconf_core.c b/net/ipv6/addrconf_core.c
index ea00ce3d4117..9ebf3fe0d2b1 100644
--- a/net/ipv6/addrconf_core.c
+++ b/net/ipv6/addrconf_core.c
@@ -185,7 +185,8 @@ static int eafnosupport_fib6_nh_init(struct net *net, struct fib6_nh *fib6_nh,
return -EAFNOSUPPORT;
}
-static int eafnosupport_ip6_del_rt(struct net *net, struct fib6_info *rt)
+static int eafnosupport_ip6_del_rt(struct net *net, struct fib6_info *rt,
+ bool skip_notify)
{
return -EAFNOSUPPORT;
}
diff --git a/net/ipv6/anycast.c b/net/ipv6/anycast.c
index fed91ab7ec46..893261230ffc 100644
--- a/net/ipv6/anycast.c
+++ b/net/ipv6/anycast.c
@@ -364,7 +364,7 @@ int __ipv6_dev_ac_dec(struct inet6_dev *idev, const struct in6_addr *addr)
ipv6_del_acaddr_hash(aca);
addrconf_leave_solict(idev, &aca->aca_addr);
- ip6_del_rt(dev_net(idev->dev), aca->aca_rt);
+ ip6_del_rt(dev_net(idev->dev), aca->aca_rt, false);
aca_put(aca);
return 0;
@@ -393,7 +393,7 @@ void ipv6_ac_destroy_dev(struct inet6_dev *idev)
addrconf_leave_solict(idev, &aca->aca_addr);
- ip6_del_rt(dev_net(idev->dev), aca->aca_rt);
+ ip6_del_rt(dev_net(idev->dev), aca->aca_rt, false);
aca_put(aca);
diff --git a/net/ipv6/ila/ila.h b/net/ipv6/ila/ila.h
index bb6fc0d54dae..ad5f6f6ba333 100644
--- a/net/ipv6/ila/ila.h
+++ b/net/ipv6/ila/ila.h
@@ -68,11 +68,6 @@ static inline struct ila_addr *ila_a2i(struct in6_addr *addr)
return (struct ila_addr *)addr;
}
-static inline bool ila_addr_is_ila(struct ila_addr *iaddr)
-{
- return (iaddr->ident.type != ILA_ATYPE_IID);
-}
-
struct ila_params {
struct ila_locator locator;
struct ila_locator locator_match;
diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c
index 58f1255295d3..27f29b957ee7 100644
--- a/net/ipv6/ndisc.c
+++ b/net/ipv6/ndisc.c
@@ -1302,7 +1302,7 @@ static void ndisc_router_discovery(struct sk_buff *skb)
}
}
if (rt && lifetime == 0) {
- ip6_del_rt(net, rt);
+ ip6_del_rt(net, rt, false);
rt = NULL;
}
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index acdb31e38412..3912aac7854d 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -984,7 +984,7 @@ int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
gwaddr, dev);
if (rt && !lifetime) {
- ip6_del_rt(net, rt);
+ ip6_del_rt(net, rt, false);
rt = NULL;
}
@@ -3729,9 +3729,12 @@ out:
return err;
}
-int ip6_del_rt(struct net *net, struct fib6_info *rt)
+int ip6_del_rt(struct net *net, struct fib6_info *rt, bool skip_notify)
{
- struct nl_info info = { .nl_net = net };
+ struct nl_info info = {
+ .nl_net = net,
+ .skip_notify = skip_notify
+ };
return __ip6_del_rt(rt, &info);
}
@@ -4252,7 +4255,7 @@ restart:
(!idev || idev->cnf.accept_ra != 2) &&
fib6_info_hold_safe(rt)) {
rcu_read_unlock();
- ip6_del_rt(net, rt);
+ ip6_del_rt(net, rt, false);
goto restart;
}
}
@@ -5554,7 +5557,8 @@ static int rt6_fill_node(struct net *net, struct sk_buff *skb,
if (nexthop_is_blackhole(rt->nh))
rtm->rtm_type = RTN_BLACKHOLE;
- if (rt6_fill_node_nexthop(skb, rt->nh, &nh_flags) < 0)
+ if (net->ipv4.sysctl_nexthop_compat_mode &&
+ rt6_fill_node_nexthop(skb, rt->nh, &nh_flags) < 0)
goto nla_put_failure;
rtm->rtm_flags |= nh_flags;
diff --git a/net/lapb/Kconfig b/net/lapb/Kconfig
index 6acfc999c952..5b50e8d64f26 100644
--- a/net/lapb/Kconfig
+++ b/net/lapb/Kconfig
@@ -15,7 +15,7 @@ config LAPB
currently supports LAPB only over Ethernet connections. If you want
to use LAPB connections over Ethernet, say Y here and to "LAPB over
Ethernet driver" below. Read
- <file:Documentation/networking/lapb-module.txt> for technical
+ <file:Documentation/networking/lapb-module.rst> for technical
details.
To compile this driver as a module, choose M here: the
diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
index 82846aca86d9..9849c14694db 100644
--- a/net/mac80211/tx.c
+++ b/net/mac80211/tx.c
@@ -2144,7 +2144,7 @@ static bool ieee80211_parse_tx_radiotap(struct ieee80211_local *local,
/*
* Please update the file
- * Documentation/networking/mac80211-injection.txt
+ * Documentation/networking/mac80211-injection.rst
* when parsing new fields here.
*/
diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig
index 468fea1aebba..3a3915d2e1ea 100644
--- a/net/netfilter/Kconfig
+++ b/net/netfilter/Kconfig
@@ -1043,7 +1043,7 @@ config NETFILTER_XT_TARGET_TPROXY
on Netfilter connection tracking and NAT, unlike REDIRECT.
For it to work you will have to configure certain iptables rules
and use policy routing. For more information on how to set it up
- see Documentation/networking/tproxy.txt.
+ see Documentation/networking/tproxy.rst.
To compile it as a module, choose M here. If unsure, say N.
diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c
index 31b027b12ff3..6a26299cb064 100644
--- a/net/netfilter/nf_conntrack_standalone.c
+++ b/net/netfilter/nf_conntrack_standalone.c
@@ -348,7 +348,9 @@ static int ct_seq_show(struct seq_file *s, void *v)
if (seq_print_acct(s, ct, IP_CT_DIR_REPLY))
goto release;
- if (test_bit(IPS_OFFLOAD_BIT, &ct->status))
+ if (test_bit(IPS_HW_OFFLOAD_BIT, &ct->status))
+ seq_puts(s, "[HW_OFFLOAD] ");
+ else if (test_bit(IPS_OFFLOAD_BIT, &ct->status))
seq_puts(s, "[OFFLOAD] ");
else if (test_bit(IPS_ASSURED_BIT, &ct->status))
seq_puts(s, "[ASSURED] ");
diff --git a/net/netfilter/nf_flow_table_offload.c b/net/netfilter/nf_flow_table_offload.c
index e3b099c14eff..a2abb0feab7f 100644
--- a/net/netfilter/nf_flow_table_offload.c
+++ b/net/netfilter/nf_flow_table_offload.c
@@ -754,12 +754,15 @@ static void flow_offload_work_add(struct flow_offload_work *offload)
err = flow_offload_rule_add(offload, flow_rule);
if (err < 0)
set_bit(NF_FLOW_HW_REFRESH, &offload->flow->flags);
+ else
+ set_bit(IPS_HW_OFFLOAD_BIT, &offload->flow->ct->status);
nf_flow_offload_destroy(flow_rule);
}
static void flow_offload_work_del(struct flow_offload_work *offload)
{
+ clear_bit(IPS_HW_OFFLOAD_BIT, &offload->flow->ct->status);
flow_offload_tuple_del(offload, FLOW_OFFLOAD_DIR_ORIGINAL);
flow_offload_tuple_del(offload, FLOW_OFFLOAD_DIR_REPLY);
set_bit(NF_FLOW_HW_DEAD, &offload->flow->flags);
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index 9780bd93b7e4..3558e76e2733 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -4669,6 +4669,25 @@ static int nft_setelem_parse_key(struct nft_ctx *ctx, struct nft_set *set,
return 0;
}
+static int nft_setelem_parse_data(struct nft_ctx *ctx, struct nft_set *set,
+ struct nft_data_desc *desc,
+ struct nft_data *data,
+ struct nlattr *attr)
+{
+ int err;
+
+ err = nft_data_init(ctx, data, NFT_DATA_VALUE_MAXLEN, desc, attr);
+ if (err < 0)
+ return err;
+
+ if (desc->type != NFT_DATA_VERDICT && desc->len != set->dlen) {
+ nft_data_release(data, desc->type);
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
static int nft_get_set_elem(struct nft_ctx *ctx, struct nft_set *set,
const struct nlattr *attr)
{
@@ -4946,7 +4965,6 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set,
struct nft_expr *expr = NULL;
struct nft_userdata *udata;
struct nft_data_desc desc;
- struct nft_data data;
enum nft_registers dreg;
struct nft_trans *trans;
u32 flags = 0;
@@ -5072,15 +5090,11 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set,
}
if (nla[NFTA_SET_ELEM_DATA] != NULL) {
- err = nft_data_init(ctx, &data, sizeof(data), &desc,
- nla[NFTA_SET_ELEM_DATA]);
+ err = nft_setelem_parse_data(ctx, set, &desc, &elem.data.val,
+ nla[NFTA_SET_ELEM_DATA]);
if (err < 0)
goto err_parse_key_end;
- err = -EINVAL;
- if (set->dtype != NFT_DATA_VERDICT && desc.len != set->dlen)
- goto err_parse_data;
-
dreg = nft_type_to_reg(set->dtype);
list_for_each_entry(binding, &set->bindings, list) {
struct nft_ctx bind_ctx = {
@@ -5094,14 +5108,14 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set,
continue;
err = nft_validate_register_store(&bind_ctx, dreg,
- &data,
+ &elem.data.val,
desc.type, desc.len);
if (err < 0)
goto err_parse_data;
if (desc.type == NFT_DATA_VERDICT &&
- (data.verdict.code == NFT_GOTO ||
- data.verdict.code == NFT_JUMP))
+ (elem.data.val.verdict.code == NFT_GOTO ||
+ elem.data.val.verdict.code == NFT_JUMP))
nft_validate_state_update(ctx->net,
NFT_VALIDATE_NEED);
}
@@ -5123,7 +5137,7 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set,
err = -ENOMEM;
elem.priv = nft_set_elem_init(set, &tmpl, elem.key.val.data,
- elem.key_end.val.data, data.data,
+ elem.key_end.val.data, elem.data.val.data,
timeout, expiration, GFP_KERNEL);
if (elem.priv == NULL)
goto err_parse_data;
@@ -5201,7 +5215,7 @@ err_trans:
nf_tables_set_elem_destroy(ctx, set, elem.priv);
err_parse_data:
if (nla[NFTA_SET_ELEM_DATA] != NULL)
- nft_data_release(&data, desc.type);
+ nft_data_release(&elem.data.val, desc.type);
err_parse_key_end:
nft_data_release(&elem.key_end.val, NFT_DATA_VALUE);
err_parse_key:
diff --git a/net/netfilter/nft_nat.c b/net/netfilter/nft_nat.c
index 8b44a4de5329..23a7bfd10521 100644
--- a/net/netfilter/nft_nat.c
+++ b/net/netfilter/nft_nat.c
@@ -30,6 +30,76 @@ struct nft_nat {
u16 flags;
};
+static void nft_nat_setup_addr(struct nf_nat_range2 *range,
+ const struct nft_regs *regs,
+ const struct nft_nat *priv)
+{
+ switch (priv->family) {
+ case AF_INET:
+ range->min_addr.ip = (__force __be32)
+ regs->data[priv->sreg_addr_min];
+ range->max_addr.ip = (__force __be32)
+ regs->data[priv->sreg_addr_max];
+ break;
+ case AF_INET6:
+ memcpy(range->min_addr.ip6, &regs->data[priv->sreg_addr_min],
+ sizeof(range->min_addr.ip6));
+ memcpy(range->max_addr.ip6, &regs->data[priv->sreg_addr_max],
+ sizeof(range->max_addr.ip6));
+ break;
+ }
+}
+
+static void nft_nat_setup_proto(struct nf_nat_range2 *range,
+ const struct nft_regs *regs,
+ const struct nft_nat *priv)
+{
+ range->min_proto.all = (__force __be16)
+ nft_reg_load16(&regs->data[priv->sreg_proto_min]);
+ range->max_proto.all = (__force __be16)
+ nft_reg_load16(&regs->data[priv->sreg_proto_max]);
+}
+
+static void nft_nat_setup_netmap(struct nf_nat_range2 *range,
+ const struct nft_pktinfo *pkt,
+ const struct nft_nat *priv)
+{
+ struct sk_buff *skb = pkt->skb;
+ union nf_inet_addr new_addr;
+ __be32 netmask;
+ int i, len = 0;
+
+ switch (priv->type) {
+ case NFT_NAT_SNAT:
+ if (nft_pf(pkt) == NFPROTO_IPV4) {
+ new_addr.ip = ip_hdr(skb)->saddr;
+ len = sizeof(struct in_addr);
+ } else {
+ new_addr.in6 = ipv6_hdr(skb)->saddr;
+ len = sizeof(struct in6_addr);
+ }
+ break;
+ case NFT_NAT_DNAT:
+ if (nft_pf(pkt) == NFPROTO_IPV4) {
+ new_addr.ip = ip_hdr(skb)->daddr;
+ len = sizeof(struct in_addr);
+ } else {
+ new_addr.in6 = ipv6_hdr(skb)->daddr;
+ len = sizeof(struct in6_addr);
+ }
+ break;
+ }
+
+ for (i = 0; i < len / sizeof(__be32); i++) {
+ netmask = ~(range->min_addr.ip6[i] ^ range->max_addr.ip6[i]);
+ new_addr.ip6[i] &= ~netmask;
+ new_addr.ip6[i] |= range->min_addr.ip6[i] & netmask;
+ }
+
+ range->min_addr = new_addr;
+ range->max_addr = new_addr;
+}
+
static void nft_nat_eval(const struct nft_expr *expr,
struct nft_regs *regs,
const struct nft_pktinfo *pkt)
@@ -40,33 +110,17 @@ static void nft_nat_eval(const struct nft_expr *expr,
struct nf_nat_range2 range;
memset(&range, 0, sizeof(range));
- if (priv->sreg_addr_min) {
- if (priv->family == AF_INET) {
- range.min_addr.ip = (__force __be32)
- regs->data[priv->sreg_addr_min];
- range.max_addr.ip = (__force __be32)
- regs->data[priv->sreg_addr_max];
- } else {
- memcpy(range.min_addr.ip6,
- &regs->data[priv->sreg_addr_min],
- sizeof(range.min_addr.ip6));
- memcpy(range.max_addr.ip6,
- &regs->data[priv->sreg_addr_max],
- sizeof(range.max_addr.ip6));
- }
- range.flags |= NF_NAT_RANGE_MAP_IPS;
+ if (priv->sreg_addr_min) {
+ nft_nat_setup_addr(&range, regs, priv);
+ if (priv->flags & NF_NAT_RANGE_NETMAP)
+ nft_nat_setup_netmap(&range, pkt, priv);
}
- if (priv->sreg_proto_min) {
- range.min_proto.all = (__force __be16)nft_reg_load16(
- &regs->data[priv->sreg_proto_min]);
- range.max_proto.all = (__force __be16)nft_reg_load16(
- &regs->data[priv->sreg_proto_max]);
- range.flags |= NF_NAT_RANGE_PROTO_SPECIFIED;
- }
+ if (priv->sreg_proto_min)
+ nft_nat_setup_proto(&range, regs, priv);
- range.flags |= priv->flags;
+ range.flags = priv->flags;
regs->verdict.code = nf_nat_setup_info(ct, &range, priv->type);
}
@@ -129,7 +183,7 @@ static int nft_nat_init(const struct nft_ctx *ctx, const struct nft_expr *expr,
priv->type = NF_NAT_MANIP_DST;
break;
default:
- return -EINVAL;
+ return -EOPNOTSUPP;
}
if (tb[NFTA_NAT_FAMILY] == NULL)
@@ -169,6 +223,8 @@ static int nft_nat_init(const struct nft_ctx *ctx, const struct nft_expr *expr,
} else {
priv->sreg_addr_max = priv->sreg_addr_min;
}
+
+ priv->flags |= NF_NAT_RANGE_MAP_IPS;
}
plen = sizeof_field(struct nf_nat_range, min_addr.all);
@@ -191,12 +247,14 @@ static int nft_nat_init(const struct nft_ctx *ctx, const struct nft_expr *expr,
} else {
priv->sreg_proto_max = priv->sreg_proto_min;
}
+
+ priv->flags |= NF_NAT_RANGE_PROTO_SPECIFIED;
}
if (tb[NFTA_NAT_FLAGS]) {
- priv->flags = ntohl(nla_get_be32(tb[NFTA_NAT_FLAGS]));
+ priv->flags |= ntohl(nla_get_be32(tb[NFTA_NAT_FLAGS]));
if (priv->flags & ~NF_NAT_RANGE_MASK)
- return -EINVAL;
+ return -EOPNOTSUPP;
}
return nf_ct_netns_get(ctx->net, family);
diff --git a/net/netlink/Makefile b/net/netlink/Makefile
index de42df7f0068..e05202708c90 100644
--- a/net/netlink/Makefile
+++ b/net/netlink/Makefile
@@ -3,7 +3,7 @@
# Makefile for the netlink driver.
#
-obj-y := af_netlink.o genetlink.o
+obj-y := af_netlink.o genetlink.o policy.o
obj-$(CONFIG_NETLINK_DIAG) += netlink_diag.o
netlink_diag-y := diag.o
diff --git a/net/netlink/genetlink.c b/net/netlink/genetlink.c
index 9f357aa22b94..2f049692e012 100644
--- a/net/netlink/genetlink.c
+++ b/net/netlink/genetlink.c
@@ -1043,6 +1043,80 @@ static int genl_ctrl_event(int event, const struct genl_family *family,
return 0;
}
+static int ctrl_dumppolicy(struct sk_buff *skb, struct netlink_callback *cb)
+{
+ const struct genl_family *rt;
+ unsigned int fam_id = cb->args[0];
+ int err;
+
+ if (!fam_id) {
+ struct nlattr *tb[CTRL_ATTR_MAX + 1];
+
+ err = genlmsg_parse(cb->nlh, &genl_ctrl, tb,
+ genl_ctrl.maxattr,
+ genl_ctrl.policy, cb->extack);
+ if (err)
+ return err;
+
+ if (!tb[CTRL_ATTR_FAMILY_ID] && !tb[CTRL_ATTR_FAMILY_NAME])
+ return -EINVAL;
+
+ if (tb[CTRL_ATTR_FAMILY_ID]) {
+ fam_id = nla_get_u16(tb[CTRL_ATTR_FAMILY_ID]);
+ } else {
+ rt = genl_family_find_byname(
+ nla_data(tb[CTRL_ATTR_FAMILY_NAME]));
+ if (!rt)
+ return -ENOENT;
+ fam_id = rt->id;
+ }
+ }
+
+ rt = genl_family_find_byid(fam_id);
+ if (!rt)
+ return -ENOENT;
+
+ if (!rt->policy)
+ return -ENODATA;
+
+ err = netlink_policy_dump_start(rt->policy, rt->maxattr, &cb->args[1]);
+ if (err)
+ return err;
+
+ while (netlink_policy_dump_loop(&cb->args[1])) {
+ void *hdr;
+ struct nlattr *nest;
+
+ hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).portid,
+ cb->nlh->nlmsg_seq, &genl_ctrl,
+ NLM_F_MULTI, CTRL_CMD_GETPOLICY);
+ if (!hdr)
+ goto nla_put_failure;
+
+ if (nla_put_u16(skb, CTRL_ATTR_FAMILY_ID, rt->id))
+ goto nla_put_failure;
+
+ nest = nla_nest_start(skb, CTRL_ATTR_POLICY);
+ if (!nest)
+ goto nla_put_failure;
+
+ if (netlink_policy_dump_write(skb, cb->args[1]))
+ goto nla_put_failure;
+
+ nla_nest_end(skb, nest);
+
+ genlmsg_end(skb, hdr);
+ continue;
+
+nla_put_failure:
+ genlmsg_cancel(skb, hdr);
+ break;
+ }
+
+ cb->args[0] = fam_id;
+ return skb->len;
+}
+
static const struct genl_ops genl_ctrl_ops[] = {
{
.cmd = CTRL_CMD_GETFAMILY,
@@ -1050,6 +1124,10 @@ static const struct genl_ops genl_ctrl_ops[] = {
.doit = ctrl_getfamily,
.dumpit = ctrl_dumpfamily,
},
+ {
+ .cmd = CTRL_CMD_GETPOLICY,
+ .dumpit = ctrl_dumppolicy,
+ },
};
static const struct genl_multicast_group genl_ctrl_groups[] = {
diff --git a/net/netlink/policy.c b/net/netlink/policy.c
new file mode 100644
index 000000000000..f6491853c797
--- /dev/null
+++ b/net/netlink/policy.c
@@ -0,0 +1,308 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * NETLINK Policy advertisement to userspace
+ *
+ * Authors: Johannes Berg <johannes@sipsolutions.net>
+ *
+ * Copyright 2019 Intel Corporation
+ */
+
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <linux/types.h>
+#include <net/netlink.h>
+
+#define INITIAL_POLICIES_ALLOC 10
+
+struct nl_policy_dump {
+ unsigned int policy_idx;
+ unsigned int attr_idx;
+ unsigned int n_alloc;
+ struct {
+ const struct nla_policy *policy;
+ unsigned int maxtype;
+ } policies[];
+};
+
+static int add_policy(struct nl_policy_dump **statep,
+ const struct nla_policy *policy,
+ unsigned int maxtype)
+{
+ struct nl_policy_dump *state = *statep;
+ unsigned int n_alloc, i;
+
+ if (!policy || !maxtype)
+ return 0;
+
+ for (i = 0; i < state->n_alloc; i++) {
+ if (state->policies[i].policy == policy)
+ return 0;
+
+ if (!state->policies[i].policy) {
+ state->policies[i].policy = policy;
+ state->policies[i].maxtype = maxtype;
+ return 0;
+ }
+ }
+
+ n_alloc = state->n_alloc + INITIAL_POLICIES_ALLOC;
+ state = krealloc(state, struct_size(state, policies, n_alloc),
+ GFP_KERNEL);
+ if (!state)
+ return -ENOMEM;
+
+ state->policies[state->n_alloc].policy = policy;
+ state->policies[state->n_alloc].maxtype = maxtype;
+ state->n_alloc = n_alloc;
+ *statep = state;
+
+ return 0;
+}
+
+static unsigned int get_policy_idx(struct nl_policy_dump *state,
+ const struct nla_policy *policy)
+{
+ unsigned int i;
+
+ for (i = 0; i < state->n_alloc; i++) {
+ if (state->policies[i].policy == policy)
+ return i;
+ }
+
+ WARN_ON_ONCE(1);
+ return -1;
+}
+
+int netlink_policy_dump_start(const struct nla_policy *policy,
+ unsigned int maxtype,
+ unsigned long *_state)
+{
+ struct nl_policy_dump *state;
+ unsigned int policy_idx;
+ int err;
+
+ /* also returns 0 if "*_state" is our ERR_PTR() end marker */
+ if (*_state)
+ return 0;
+
+ /*
+ * walk the policies and nested ones first, and build
+ * a linear list of them.
+ */
+
+ state = kzalloc(struct_size(state, policies, INITIAL_POLICIES_ALLOC),
+ GFP_KERNEL);
+ if (!state)
+ return -ENOMEM;
+ state->n_alloc = INITIAL_POLICIES_ALLOC;
+
+ err = add_policy(&state, policy, maxtype);
+ if (err)
+ return err;
+
+ for (policy_idx = 0;
+ policy_idx < state->n_alloc && state->policies[policy_idx].policy;
+ policy_idx++) {
+ const struct nla_policy *policy;
+ unsigned int type;
+
+ policy = state->policies[policy_idx].policy;
+
+ for (type = 0;
+ type <= state->policies[policy_idx].maxtype;
+ type++) {
+ switch (policy[type].type) {
+ case NLA_NESTED:
+ case NLA_NESTED_ARRAY:
+ err = add_policy(&state,
+ policy[type].nested_policy,
+ policy[type].len);
+ if (err)
+ return err;
+ break;
+ default:
+ break;
+ }
+ }
+ }
+
+ *_state = (unsigned long)state;
+
+ return 0;
+}
+
+static bool netlink_policy_dump_finished(struct nl_policy_dump *state)
+{
+ return state->policy_idx >= state->n_alloc ||
+ !state->policies[state->policy_idx].policy;
+}
+
+bool netlink_policy_dump_loop(unsigned long *_state)
+{
+ struct nl_policy_dump *state = (void *)*_state;
+
+ if (IS_ERR(state))
+ return false;
+
+ if (netlink_policy_dump_finished(state)) {
+ kfree(state);
+ /* store end marker instead of freed state */
+ *_state = (unsigned long)ERR_PTR(-ENOENT);
+ return false;
+ }
+
+ return true;
+}
+
+int netlink_policy_dump_write(struct sk_buff *skb, unsigned long _state)
+{
+ struct nl_policy_dump *state = (void *)_state;
+ const struct nla_policy *pt;
+ struct nlattr *policy, *attr;
+ enum netlink_attribute_type type;
+ bool again;
+
+send_attribute:
+ again = false;
+
+ pt = &state->policies[state->policy_idx].policy[state->attr_idx];
+
+ policy = nla_nest_start(skb, state->policy_idx);
+ if (!policy)
+ return -ENOBUFS;
+
+ attr = nla_nest_start(skb, state->attr_idx);
+ if (!attr)
+ goto nla_put_failure;
+
+ switch (pt->type) {
+ default:
+ case NLA_UNSPEC:
+ case NLA_REJECT:
+ /* skip - use NLA_MIN_LEN to advertise such */
+ nla_nest_cancel(skb, policy);
+ again = true;
+ goto next;
+ case NLA_NESTED:
+ type = NL_ATTR_TYPE_NESTED;
+ /* fall through */
+ case NLA_NESTED_ARRAY:
+ if (pt->type == NLA_NESTED_ARRAY)
+ type = NL_ATTR_TYPE_NESTED_ARRAY;
+ if (pt->nested_policy && pt->len &&
+ (nla_put_u32(skb, NL_POLICY_TYPE_ATTR_POLICY_IDX,
+ get_policy_idx(state, pt->nested_policy)) ||
+ nla_put_u32(skb, NL_POLICY_TYPE_ATTR_POLICY_MAXTYPE,
+ pt->len)))
+ goto nla_put_failure;
+ break;
+ case NLA_U8:
+ case NLA_U16:
+ case NLA_U32:
+ case NLA_U64:
+ case NLA_MSECS: {
+ struct netlink_range_validation range;
+
+ if (pt->type == NLA_U8)
+ type = NL_ATTR_TYPE_U8;
+ else if (pt->type == NLA_U16)
+ type = NL_ATTR_TYPE_U16;
+ else if (pt->type == NLA_U32)
+ type = NL_ATTR_TYPE_U32;
+ else
+ type = NL_ATTR_TYPE_U64;
+
+ nla_get_range_unsigned(pt, &range);
+
+ if (nla_put_u64_64bit(skb, NL_POLICY_TYPE_ATTR_MIN_VALUE_U,
+ range.min, NL_POLICY_TYPE_ATTR_PAD) ||
+ nla_put_u64_64bit(skb, NL_POLICY_TYPE_ATTR_MAX_VALUE_U,
+ range.max, NL_POLICY_TYPE_ATTR_PAD))
+ goto nla_put_failure;
+ break;
+ }
+ case NLA_S8:
+ case NLA_S16:
+ case NLA_S32:
+ case NLA_S64: {
+ struct netlink_range_validation_signed range;
+
+ if (pt->type == NLA_S8)
+ type = NL_ATTR_TYPE_S8;
+ else if (pt->type == NLA_S16)
+ type = NL_ATTR_TYPE_S16;
+ else if (pt->type == NLA_S32)
+ type = NL_ATTR_TYPE_S32;
+ else
+ type = NL_ATTR_TYPE_S64;
+
+ nla_get_range_signed(pt, &range);
+
+ if (nla_put_s64(skb, NL_POLICY_TYPE_ATTR_MIN_VALUE_S,
+ range.min, NL_POLICY_TYPE_ATTR_PAD) ||
+ nla_put_s64(skb, NL_POLICY_TYPE_ATTR_MAX_VALUE_S,
+ range.max, NL_POLICY_TYPE_ATTR_PAD))
+ goto nla_put_failure;
+ break;
+ }
+ case NLA_BITFIELD32:
+ type = NL_ATTR_TYPE_BITFIELD32;
+ if (nla_put_u32(skb, NL_POLICY_TYPE_ATTR_BITFIELD32_MASK,
+ pt->bitfield32_valid))
+ goto nla_put_failure;
+ break;
+ case NLA_EXACT_LEN:
+ type = NL_ATTR_TYPE_BINARY;
+ if (nla_put_u32(skb, NL_POLICY_TYPE_ATTR_MIN_LENGTH, pt->len) ||
+ nla_put_u32(skb, NL_POLICY_TYPE_ATTR_MAX_LENGTH, pt->len))
+ goto nla_put_failure;
+ break;
+ case NLA_STRING:
+ case NLA_NUL_STRING:
+ case NLA_BINARY:
+ if (pt->type == NLA_STRING)
+ type = NL_ATTR_TYPE_STRING;
+ else if (pt->type == NLA_NUL_STRING)
+ type = NL_ATTR_TYPE_NUL_STRING;
+ else
+ type = NL_ATTR_TYPE_BINARY;
+ if (pt->len && nla_put_u32(skb, NL_POLICY_TYPE_ATTR_MAX_LENGTH,
+ pt->len))
+ goto nla_put_failure;
+ break;
+ case NLA_MIN_LEN:
+ type = NL_ATTR_TYPE_BINARY;
+ if (nla_put_u32(skb, NL_POLICY_TYPE_ATTR_MIN_LENGTH, pt->len))
+ goto nla_put_failure;
+ break;
+ case NLA_FLAG:
+ type = NL_ATTR_TYPE_FLAG;
+ break;
+ }
+
+ if (nla_put_u32(skb, NL_POLICY_TYPE_ATTR_TYPE, type))
+ goto nla_put_failure;
+
+ /* finish and move state to next attribute */
+ nla_nest_end(skb, attr);
+ nla_nest_end(skb, policy);
+
+next:
+ state->attr_idx += 1;
+ if (state->attr_idx > state->policies[state->policy_idx].maxtype) {
+ state->attr_idx = 0;
+ state->policy_idx++;
+ }
+
+ if (again) {
+ if (netlink_policy_dump_finished(state))
+ return -ENODATA;
+ goto send_attribute;
+ }
+
+ return 0;
+
+nla_put_failure:
+ nla_nest_cancel(skb, policy);
+ return -ENOBUFS;
+}
diff --git a/net/rxrpc/Kconfig b/net/rxrpc/Kconfig
index 57ebb29c26ad..d706bb408365 100644
--- a/net/rxrpc/Kconfig
+++ b/net/rxrpc/Kconfig
@@ -18,7 +18,7 @@ config AF_RXRPC
This module at the moment only supports client operations and is
currently incomplete.
- See Documentation/networking/rxrpc.txt.
+ See Documentation/networking/rxrpc.rst.
config AF_RXRPC_IPV6
bool "IPv6 support for RxRPC"
@@ -41,7 +41,7 @@ config AF_RXRPC_DEBUG
help
Say Y here to make runtime controllable debugging messages appear.
- See Documentation/networking/rxrpc.txt.
+ See Documentation/networking/rxrpc.rst.
config RXKAD
@@ -56,4 +56,4 @@ config RXKAD
Provide kerberos 4 and AFS kaserver security handling for AF_RXRPC
through the use of the key retention service.
- See Documentation/networking/rxrpc.txt.
+ See Documentation/networking/rxrpc.rst.
diff --git a/net/rxrpc/sysctl.c b/net/rxrpc/sysctl.c
index 2bbb38161851..174e903e18de 100644
--- a/net/rxrpc/sysctl.c
+++ b/net/rxrpc/sysctl.c
@@ -21,7 +21,7 @@ static const unsigned long max_jiffies = MAX_JIFFY_OFFSET;
/*
* RxRPC operating parameters.
*
- * See Documentation/networking/rxrpc.txt and the variable definitions for more
+ * See Documentation/networking/rxrpc.rst and the variable definitions for more
* information on the individual parameters.
*/
static struct ctl_table rxrpc_sysctl_table[] = {
diff --git a/net/sched/Kconfig b/net/sched/Kconfig
index bfbefb7bff9d..2f20073f4f84 100644
--- a/net/sched/Kconfig
+++ b/net/sched/Kconfig
@@ -981,6 +981,18 @@ config NET_ACT_CT
To compile this code as a module, choose M here: the
module will be called act_ct.
+config NET_ACT_GATE
+ tristate "Frame gate entry list control tc action"
+ depends on NET_CLS_ACT
+ help
+ Say Y here to allow to control the ingress flow to be passed at
+ specific time slot and be dropped at other specific time slot by
+ the gate entry list.
+
+ If unsure, say N.
+ To compile this code as a module, choose M here: the
+ module will be called act_gate.
+
config NET_IFE_SKBMARK
tristate "Support to encoding decoding skb mark on IFE action"
depends on NET_ACT_IFE
diff --git a/net/sched/Makefile b/net/sched/Makefile
index 31c367a6cd09..66bbf9a98f9e 100644
--- a/net/sched/Makefile
+++ b/net/sched/Makefile
@@ -30,6 +30,7 @@ obj-$(CONFIG_NET_IFE_SKBPRIO) += act_meta_skbprio.o
obj-$(CONFIG_NET_IFE_SKBTCINDEX) += act_meta_skbtcindex.o
obj-$(CONFIG_NET_ACT_TUNNEL_KEY)+= act_tunnel_key.o
obj-$(CONFIG_NET_ACT_CT) += act_ct.o
+obj-$(CONFIG_NET_ACT_GATE) += act_gate.o
obj-$(CONFIG_NET_SCH_FIFO) += sch_fifo.o
obj-$(CONFIG_NET_SCH_CBQ) += sch_cbq.o
obj-$(CONFIG_NET_SCH_HTB) += sch_htb.o
diff --git a/net/sched/act_api.c b/net/sched/act_api.c
index df4560909157..fbbec2e562f5 100644
--- a/net/sched/act_api.c
+++ b/net/sched/act_api.c
@@ -876,19 +876,14 @@ static u8 tcf_action_hw_stats_get(struct nlattr *hw_stats_attr)
return hw_stats_bf.value;
}
-static const u32 tca_act_flags_allowed = TCA_ACT_FLAGS_NO_PERCPU_STATS;
-static const u32 tca_act_hw_stats_allowed = TCA_ACT_HW_STATS_ANY;
-
static const struct nla_policy tcf_action_policy[TCA_ACT_MAX + 1] = {
[TCA_ACT_KIND] = { .type = NLA_STRING },
[TCA_ACT_INDEX] = { .type = NLA_U32 },
[TCA_ACT_COOKIE] = { .type = NLA_BINARY,
.len = TC_COOKIE_MAX_SIZE },
[TCA_ACT_OPTIONS] = { .type = NLA_NESTED },
- [TCA_ACT_FLAGS] = { .type = NLA_BITFIELD32,
- .validation_data = &tca_act_flags_allowed },
- [TCA_ACT_HW_STATS] = { .type = NLA_BITFIELD32,
- .validation_data = &tca_act_hw_stats_allowed },
+ [TCA_ACT_FLAGS] = NLA_POLICY_BITFIELD32(TCA_ACT_FLAGS_NO_PERCPU_STATS),
+ [TCA_ACT_HW_STATS] = NLA_POLICY_BITFIELD32(TCA_ACT_HW_STATS_ANY),
};
struct tc_action *tcf_action_init_1(struct net *net, struct tcf_proto *tp,
@@ -1454,10 +1449,8 @@ static int tcf_action_add(struct net *net, struct nlattr *nla,
return ret;
}
-static u32 tcaa_root_flags_allowed = TCA_FLAG_LARGE_DUMP_ON;
static const struct nla_policy tcaa_policy[TCA_ROOT_MAX + 1] = {
- [TCA_ROOT_FLAGS] = { .type = NLA_BITFIELD32,
- .validation_data = &tcaa_root_flags_allowed },
+ [TCA_ROOT_FLAGS] = NLA_POLICY_BITFIELD32(TCA_FLAG_LARGE_DUMP_ON),
[TCA_ROOT_TIME_DELTA] = { .type = NLA_U32 },
};
diff --git a/net/sched/act_gate.c b/net/sched/act_gate.c
new file mode 100644
index 000000000000..35fc48795541
--- /dev/null
+++ b/net/sched/act_gate.c
@@ -0,0 +1,636 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/* Copyright 2020 NXP */
+
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/string.h>
+#include <linux/errno.h>
+#include <linux/skbuff.h>
+#include <linux/rtnetlink.h>
+#include <linux/init.h>
+#include <linux/slab.h>
+#include <net/act_api.h>
+#include <net/netlink.h>
+#include <net/pkt_cls.h>
+#include <net/tc_act/tc_gate.h>
+
+static unsigned int gate_net_id;
+static struct tc_action_ops act_gate_ops;
+
+static ktime_t gate_get_time(struct tcf_gate *gact)
+{
+ ktime_t mono = ktime_get();
+
+ switch (gact->tk_offset) {
+ case TK_OFFS_MAX:
+ return mono;
+ default:
+ return ktime_mono_to_any(mono, gact->tk_offset);
+ }
+
+ return KTIME_MAX;
+}
+
+static int gate_get_start_time(struct tcf_gate *gact, ktime_t *start)
+{
+ struct tcf_gate_params *param = &gact->param;
+ ktime_t now, base, cycle;
+ u64 n;
+
+ base = ns_to_ktime(param->tcfg_basetime);
+ now = gate_get_time(gact);
+
+ if (ktime_after(base, now)) {
+ *start = base;
+ return 0;
+ }
+
+ cycle = param->tcfg_cycletime;
+
+ /* cycle time should not be zero */
+ if (!cycle)
+ return -EFAULT;
+
+ n = div64_u64(ktime_sub_ns(now, base), cycle);
+ *start = ktime_add_ns(base, (n + 1) * cycle);
+ return 0;
+}
+
+static void gate_start_timer(struct tcf_gate *gact, ktime_t start)
+{
+ ktime_t expires;
+
+ expires = hrtimer_get_expires(&gact->hitimer);
+ if (expires == 0)
+ expires = KTIME_MAX;
+
+ start = min_t(ktime_t, start, expires);
+
+ hrtimer_start(&gact->hitimer, start, HRTIMER_MODE_ABS_SOFT);
+}
+
+static enum hrtimer_restart gate_timer_func(struct hrtimer *timer)
+{
+ struct tcf_gate *gact = container_of(timer, struct tcf_gate,
+ hitimer);
+ struct tcf_gate_params *p = &gact->param;
+ struct tcfg_gate_entry *next;
+ ktime_t close_time, now;
+
+ spin_lock(&gact->tcf_lock);
+
+ next = gact->next_entry;
+
+ /* cycle start, clear pending bit, clear total octets */
+ gact->current_gate_status = next->gate_state ? GATE_ACT_GATE_OPEN : 0;
+ gact->current_entry_octets = 0;
+ gact->current_max_octets = next->maxoctets;
+
+ gact->current_close_time = ktime_add_ns(gact->current_close_time,
+ next->interval);
+
+ close_time = gact->current_close_time;
+
+ if (list_is_last(&next->list, &p->entries))
+ next = list_first_entry(&p->entries,
+ struct tcfg_gate_entry, list);
+ else
+ next = list_next_entry(next, list);
+
+ now = gate_get_time(gact);
+
+ if (ktime_after(now, close_time)) {
+ ktime_t cycle, base;
+ u64 n;
+
+ cycle = p->tcfg_cycletime;
+ base = ns_to_ktime(p->tcfg_basetime);
+ n = div64_u64(ktime_sub_ns(now, base), cycle);
+ close_time = ktime_add_ns(base, (n + 1) * cycle);
+ }
+
+ gact->next_entry = next;
+
+ hrtimer_set_expires(&gact->hitimer, close_time);
+
+ spin_unlock(&gact->tcf_lock);
+
+ return HRTIMER_RESTART;
+}
+
+static int tcf_gate_act(struct sk_buff *skb, const struct tc_action *a,
+ struct tcf_result *res)
+{
+ struct tcf_gate *gact = to_gate(a);
+
+ spin_lock(&gact->tcf_lock);
+
+ tcf_lastuse_update(&gact->tcf_tm);
+ bstats_update(&gact->tcf_bstats, skb);
+
+ if (unlikely(gact->current_gate_status & GATE_ACT_PENDING)) {
+ spin_unlock(&gact->tcf_lock);
+ return gact->tcf_action;
+ }
+
+ if (!(gact->current_gate_status & GATE_ACT_GATE_OPEN))
+ goto drop;
+
+ if (gact->current_max_octets >= 0) {
+ gact->current_entry_octets += qdisc_pkt_len(skb);
+ if (gact->current_entry_octets > gact->current_max_octets) {
+ gact->tcf_qstats.overlimits++;
+ goto drop;
+ }
+ }
+
+ spin_unlock(&gact->tcf_lock);
+
+ return gact->tcf_action;
+drop:
+ gact->tcf_qstats.drops++;
+ spin_unlock(&gact->tcf_lock);
+
+ return TC_ACT_SHOT;
+}
+
+static const struct nla_policy entry_policy[TCA_GATE_ENTRY_MAX + 1] = {
+ [TCA_GATE_ENTRY_INDEX] = { .type = NLA_U32 },
+ [TCA_GATE_ENTRY_GATE] = { .type = NLA_FLAG },
+ [TCA_GATE_ENTRY_INTERVAL] = { .type = NLA_U32 },
+ [TCA_GATE_ENTRY_IPV] = { .type = NLA_S32 },
+ [TCA_GATE_ENTRY_MAX_OCTETS] = { .type = NLA_S32 },
+};
+
+static const struct nla_policy gate_policy[TCA_GATE_MAX + 1] = {
+ [TCA_GATE_PARMS] = { .len = sizeof(struct tc_gate),
+ .type = NLA_EXACT_LEN },
+ [TCA_GATE_PRIORITY] = { .type = NLA_S32 },
+ [TCA_GATE_ENTRY_LIST] = { .type = NLA_NESTED },
+ [TCA_GATE_BASE_TIME] = { .type = NLA_U64 },
+ [TCA_GATE_CYCLE_TIME] = { .type = NLA_U64 },
+ [TCA_GATE_CYCLE_TIME_EXT] = { .type = NLA_U64 },
+ [TCA_GATE_FLAGS] = { .type = NLA_U32 },
+ [TCA_GATE_CLOCKID] = { .type = NLA_S32 },
+};
+
+static int fill_gate_entry(struct nlattr **tb, struct tcfg_gate_entry *entry,
+ struct netlink_ext_ack *extack)
+{
+ u32 interval = 0;
+
+ entry->gate_state = nla_get_flag(tb[TCA_GATE_ENTRY_GATE]);
+
+ if (tb[TCA_GATE_ENTRY_INTERVAL])
+ interval = nla_get_u32(tb[TCA_GATE_ENTRY_INTERVAL]);
+
+ if (interval == 0) {
+ NL_SET_ERR_MSG(extack, "Invalid interval for schedule entry");
+ return -EINVAL;
+ }
+
+ entry->interval = interval;
+
+ if (tb[TCA_GATE_ENTRY_IPV])
+ entry->ipv = nla_get_s32(tb[TCA_GATE_ENTRY_IPV]);
+ else
+ entry->ipv = -1;
+
+ if (tb[TCA_GATE_ENTRY_MAX_OCTETS])
+ entry->maxoctets = nla_get_s32(tb[TCA_GATE_ENTRY_MAX_OCTETS]);
+ else
+ entry->maxoctets = -1;
+
+ return 0;
+}
+
+static int parse_gate_entry(struct nlattr *n, struct tcfg_gate_entry *entry,
+ int index, struct netlink_ext_ack *extack)
+{
+ struct nlattr *tb[TCA_GATE_ENTRY_MAX + 1] = { };
+ int err;
+
+ err = nla_parse_nested(tb, TCA_GATE_ENTRY_MAX, n, entry_policy, extack);
+ if (err < 0) {
+ NL_SET_ERR_MSG(extack, "Could not parse nested entry");
+ return -EINVAL;
+ }
+
+ entry->index = index;
+
+ return fill_gate_entry(tb, entry, extack);
+}
+
+static void release_entry_list(struct list_head *entries)
+{
+ struct tcfg_gate_entry *entry, *e;
+
+ list_for_each_entry_safe(entry, e, entries, list) {
+ list_del(&entry->list);
+ kfree(entry);
+ }
+}
+
+static int parse_gate_list(struct nlattr *list_attr,
+ struct tcf_gate_params *sched,
+ struct netlink_ext_ack *extack)
+{
+ struct tcfg_gate_entry *entry;
+ struct nlattr *n;
+ int err, rem;
+ int i = 0;
+
+ if (!list_attr)
+ return -EINVAL;
+
+ nla_for_each_nested(n, list_attr, rem) {
+ if (nla_type(n) != TCA_GATE_ONE_ENTRY) {
+ NL_SET_ERR_MSG(extack, "Attribute isn't type 'entry'");
+ continue;
+ }
+
+ entry = kzalloc(sizeof(*entry), GFP_ATOMIC);
+ if (!entry) {
+ NL_SET_ERR_MSG(extack, "Not enough memory for entry");
+ err = -ENOMEM;
+ goto release_list;
+ }
+
+ err = parse_gate_entry(n, entry, i, extack);
+ if (err < 0) {
+ kfree(entry);
+ goto release_list;
+ }
+
+ list_add_tail(&entry->list, &sched->entries);
+ i++;
+ }
+
+ sched->num_entries = i;
+
+ return i;
+
+release_list:
+ release_entry_list(&sched->entries);
+
+ return err;
+}
+
+static int tcf_gate_init(struct net *net, struct nlattr *nla,
+ struct nlattr *est, struct tc_action **a,
+ int ovr, int bind, bool rtnl_held,
+ struct tcf_proto *tp, u32 flags,
+ struct netlink_ext_ack *extack)
+{
+ struct tc_action_net *tn = net_generic(net, gate_net_id);
+ enum tk_offsets tk_offset = TK_OFFS_TAI;
+ struct nlattr *tb[TCA_GATE_MAX + 1];
+ struct tcf_chain *goto_ch = NULL;
+ struct tcf_gate_params *p;
+ s32 clockid = CLOCK_TAI;
+ struct tcf_gate *gact;
+ struct tc_gate *parm;
+ int ret = 0, err;
+ u64 basetime = 0;
+ u32 gflags = 0;
+ s32 prio = -1;
+ ktime_t start;
+ u32 index;
+
+ if (!nla)
+ return -EINVAL;
+
+ err = nla_parse_nested(tb, TCA_GATE_MAX, nla, gate_policy, extack);
+ if (err < 0)
+ return err;
+
+ if (!tb[TCA_GATE_PARMS])
+ return -EINVAL;
+
+ parm = nla_data(tb[TCA_GATE_PARMS]);
+ index = parm->index;
+
+ err = tcf_idr_check_alloc(tn, &index, a, bind);
+ if (err < 0)
+ return err;
+
+ if (err && bind)
+ return 0;
+
+ if (!err) {
+ ret = tcf_idr_create(tn, index, est, a,
+ &act_gate_ops, bind, false, 0);
+ if (ret) {
+ tcf_idr_cleanup(tn, index);
+ return ret;
+ }
+
+ ret = ACT_P_CREATED;
+ } else if (!ovr) {
+ tcf_idr_release(*a, bind);
+ return -EEXIST;
+ }
+
+ if (tb[TCA_GATE_PRIORITY])
+ prio = nla_get_s32(tb[TCA_GATE_PRIORITY]);
+
+ if (tb[TCA_GATE_BASE_TIME])
+ basetime = nla_get_u64(tb[TCA_GATE_BASE_TIME]);
+
+ if (tb[TCA_GATE_FLAGS])
+ gflags = nla_get_u32(tb[TCA_GATE_FLAGS]);
+
+ if (tb[TCA_GATE_CLOCKID]) {
+ clockid = nla_get_s32(tb[TCA_GATE_CLOCKID]);
+ switch (clockid) {
+ case CLOCK_REALTIME:
+ tk_offset = TK_OFFS_REAL;
+ break;
+ case CLOCK_MONOTONIC:
+ tk_offset = TK_OFFS_MAX;
+ break;
+ case CLOCK_BOOTTIME:
+ tk_offset = TK_OFFS_BOOT;
+ break;
+ case CLOCK_TAI:
+ tk_offset = TK_OFFS_TAI;
+ break;
+ default:
+ NL_SET_ERR_MSG(extack, "Invalid 'clockid'");
+ goto release_idr;
+ }
+ }
+
+ err = tcf_action_check_ctrlact(parm->action, tp, &goto_ch, extack);
+ if (err < 0)
+ goto release_idr;
+
+ gact = to_gate(*a);
+
+ spin_lock_bh(&gact->tcf_lock);
+ p = &gact->param;
+
+ if (tb[TCA_GATE_CYCLE_TIME]) {
+ p->tcfg_cycletime = nla_get_u64(tb[TCA_GATE_CYCLE_TIME]);
+ if (!p->tcfg_cycletime_ext)
+ goto chain_put;
+ }
+
+ INIT_LIST_HEAD(&p->entries);
+ if (tb[TCA_GATE_ENTRY_LIST]) {
+ err = parse_gate_list(tb[TCA_GATE_ENTRY_LIST], p, extack);
+ if (err < 0)
+ goto chain_put;
+ }
+
+ if (!p->tcfg_cycletime) {
+ struct tcfg_gate_entry *entry;
+ ktime_t cycle = 0;
+
+ list_for_each_entry(entry, &p->entries, list)
+ cycle = ktime_add_ns(cycle, entry->interval);
+ p->tcfg_cycletime = cycle;
+ }
+
+ if (tb[TCA_GATE_CYCLE_TIME_EXT])
+ p->tcfg_cycletime_ext =
+ nla_get_u64(tb[TCA_GATE_CYCLE_TIME_EXT]);
+
+ p->tcfg_priority = prio;
+ p->tcfg_basetime = basetime;
+ p->tcfg_clockid = clockid;
+ p->tcfg_flags = gflags;
+
+ gact->tk_offset = tk_offset;
+ hrtimer_init(&gact->hitimer, clockid, HRTIMER_MODE_ABS_SOFT);
+ gact->hitimer.function = gate_timer_func;
+
+ err = gate_get_start_time(gact, &start);
+ if (err < 0) {
+ NL_SET_ERR_MSG(extack,
+ "Internal error: failed get start time");
+ release_entry_list(&p->entries);
+ goto chain_put;
+ }
+
+ gact->current_close_time = start;
+ gact->current_gate_status = GATE_ACT_GATE_OPEN | GATE_ACT_PENDING;
+
+ gact->next_entry = list_first_entry(&p->entries,
+ struct tcfg_gate_entry, list);
+
+ goto_ch = tcf_action_set_ctrlact(*a, parm->action, goto_ch);
+
+ gate_start_timer(gact, start);
+
+ spin_unlock_bh(&gact->tcf_lock);
+
+ if (goto_ch)
+ tcf_chain_put_by_act(goto_ch);
+
+ if (ret == ACT_P_CREATED)
+ tcf_idr_insert(tn, *a);
+
+ return ret;
+
+chain_put:
+ spin_unlock_bh(&gact->tcf_lock);
+
+ if (goto_ch)
+ tcf_chain_put_by_act(goto_ch);
+release_idr:
+ tcf_idr_release(*a, bind);
+ return err;
+}
+
+static void tcf_gate_cleanup(struct tc_action *a)
+{
+ struct tcf_gate *gact = to_gate(a);
+ struct tcf_gate_params *p;
+
+ hrtimer_cancel(&gact->hitimer);
+
+ p = &gact->param;
+
+ release_entry_list(&p->entries);
+}
+
+static int dumping_entry(struct sk_buff *skb,
+ struct tcfg_gate_entry *entry)
+{
+ struct nlattr *item;
+
+ item = nla_nest_start_noflag(skb, TCA_GATE_ONE_ENTRY);
+ if (!item)
+ return -ENOSPC;
+
+ if (nla_put_u32(skb, TCA_GATE_ENTRY_INDEX, entry->index))
+ goto nla_put_failure;
+
+ if (entry->gate_state && nla_put_flag(skb, TCA_GATE_ENTRY_GATE))
+ goto nla_put_failure;
+
+ if (nla_put_u32(skb, TCA_GATE_ENTRY_INTERVAL, entry->interval))
+ goto nla_put_failure;
+
+ if (nla_put_s32(skb, TCA_GATE_ENTRY_MAX_OCTETS, entry->maxoctets))
+ goto nla_put_failure;
+
+ if (nla_put_s32(skb, TCA_GATE_ENTRY_IPV, entry->ipv))
+ goto nla_put_failure;
+
+ return nla_nest_end(skb, item);
+
+nla_put_failure:
+ nla_nest_cancel(skb, item);
+ return -1;
+}
+
+static int tcf_gate_dump(struct sk_buff *skb, struct tc_action *a,
+ int bind, int ref)
+{
+ unsigned char *b = skb_tail_pointer(skb);
+ struct tcf_gate *gact = to_gate(a);
+ struct tc_gate opt = {
+ .index = gact->tcf_index,
+ .refcnt = refcount_read(&gact->tcf_refcnt) - ref,
+ .bindcnt = atomic_read(&gact->tcf_bindcnt) - bind,
+ };
+ struct tcfg_gate_entry *entry;
+ struct tcf_gate_params *p;
+ struct nlattr *entry_list;
+ struct tcf_t t;
+
+ spin_lock_bh(&gact->tcf_lock);
+ opt.action = gact->tcf_action;
+
+ p = &gact->param;
+
+ if (nla_put(skb, TCA_GATE_PARMS, sizeof(opt), &opt))
+ goto nla_put_failure;
+
+ if (nla_put_u64_64bit(skb, TCA_GATE_BASE_TIME,
+ p->tcfg_basetime, TCA_GATE_PAD))
+ goto nla_put_failure;
+
+ if (nla_put_u64_64bit(skb, TCA_GATE_CYCLE_TIME,
+ p->tcfg_cycletime, TCA_GATE_PAD))
+ goto nla_put_failure;
+
+ if (nla_put_u64_64bit(skb, TCA_GATE_CYCLE_TIME_EXT,
+ p->tcfg_cycletime_ext, TCA_GATE_PAD))
+ goto nla_put_failure;
+
+ if (nla_put_s32(skb, TCA_GATE_CLOCKID, p->tcfg_clockid))
+ goto nla_put_failure;
+
+ if (nla_put_u32(skb, TCA_GATE_FLAGS, p->tcfg_flags))
+ goto nla_put_failure;
+
+ if (nla_put_s32(skb, TCA_GATE_PRIORITY, p->tcfg_priority))
+ goto nla_put_failure;
+
+ entry_list = nla_nest_start_noflag(skb, TCA_GATE_ENTRY_LIST);
+ if (!entry_list)
+ goto nla_put_failure;
+
+ list_for_each_entry(entry, &p->entries, list) {
+ if (dumping_entry(skb, entry) < 0)
+ goto nla_put_failure;
+ }
+
+ nla_nest_end(skb, entry_list);
+
+ tcf_tm_dump(&t, &gact->tcf_tm);
+ if (nla_put_64bit(skb, TCA_GATE_TM, sizeof(t), &t, TCA_GATE_PAD))
+ goto nla_put_failure;
+ spin_unlock_bh(&gact->tcf_lock);
+
+ return skb->len;
+
+nla_put_failure:
+ spin_unlock_bh(&gact->tcf_lock);
+ nlmsg_trim(skb, b);
+ return -1;
+}
+
+static int tcf_gate_walker(struct net *net, struct sk_buff *skb,
+ struct netlink_callback *cb, int type,
+ const struct tc_action_ops *ops,
+ struct netlink_ext_ack *extack)
+{
+ struct tc_action_net *tn = net_generic(net, gate_net_id);
+
+ return tcf_generic_walker(tn, skb, cb, type, ops, extack);
+}
+
+static void tcf_gate_stats_update(struct tc_action *a, u64 bytes, u32 packets,
+ u64 lastuse, bool hw)
+{
+ struct tcf_gate *gact = to_gate(a);
+ struct tcf_t *tm = &gact->tcf_tm;
+
+ tcf_action_update_stats(a, bytes, packets, false, hw);
+ tm->lastuse = max_t(u64, tm->lastuse, lastuse);
+}
+
+static int tcf_gate_search(struct net *net, struct tc_action **a, u32 index)
+{
+ struct tc_action_net *tn = net_generic(net, gate_net_id);
+
+ return tcf_idr_search(tn, a, index);
+}
+
+static size_t tcf_gate_get_fill_size(const struct tc_action *act)
+{
+ return nla_total_size(sizeof(struct tc_gate));
+}
+
+static struct tc_action_ops act_gate_ops = {
+ .kind = "gate",
+ .id = TCA_ID_GATE,
+ .owner = THIS_MODULE,
+ .act = tcf_gate_act,
+ .dump = tcf_gate_dump,
+ .init = tcf_gate_init,
+ .cleanup = tcf_gate_cleanup,
+ .walk = tcf_gate_walker,
+ .stats_update = tcf_gate_stats_update,
+ .get_fill_size = tcf_gate_get_fill_size,
+ .lookup = tcf_gate_search,
+ .size = sizeof(struct tcf_gate),
+};
+
+static __net_init int gate_init_net(struct net *net)
+{
+ struct tc_action_net *tn = net_generic(net, gate_net_id);
+
+ return tc_action_net_init(net, tn, &act_gate_ops);
+}
+
+static void __net_exit gate_exit_net(struct list_head *net_list)
+{
+ tc_action_net_exit(net_list, gate_net_id);
+}
+
+static struct pernet_operations gate_net_ops = {
+ .init = gate_init_net,
+ .exit_batch = gate_exit_net,
+ .id = &gate_net_id,
+ .size = sizeof(struct tc_action_net),
+};
+
+static int __init gate_init_module(void)
+{
+ return tcf_register_action(&act_gate_ops, &gate_net_ops);
+}
+
+static void __exit gate_cleanup_module(void)
+{
+ tcf_unregister_action(&act_gate_ops, &gate_net_ops);
+}
+
+module_init(gate_init_module);
+module_exit(gate_cleanup_module);
+MODULE_LICENSE("GPL v2");
diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c
index 11b683c45c28..7e85c91d0752 100644
--- a/net/sched/cls_api.c
+++ b/net/sched/cls_api.c
@@ -39,6 +39,7 @@
#include <net/tc_act/tc_skbedit.h>
#include <net/tc_act/tc_ct.h>
#include <net/tc_act/tc_mpls.h>
+#include <net/tc_act/tc_gate.h>
#include <net/flow_offload.h>
extern const struct nla_policy rtm_tca_policy[TCA_MAX + 1];
@@ -3526,6 +3527,27 @@ static void tcf_sample_get_group(struct flow_action_entry *entry,
#endif
}
+static void tcf_gate_entry_destructor(void *priv)
+{
+ struct action_gate_entry *oe = priv;
+
+ kfree(oe);
+}
+
+static int tcf_gate_get_entries(struct flow_action_entry *entry,
+ const struct tc_action *act)
+{
+ entry->gate.entries = tcf_gate_get_list(act);
+
+ if (!entry->gate.entries)
+ return -EINVAL;
+
+ entry->destructor = tcf_gate_entry_destructor;
+ entry->destructor_priv = entry->gate.entries;
+
+ return 0;
+}
+
int tc_setup_flow_action(struct flow_action *flow_action,
const struct tcf_exts *exts)
{
@@ -3672,6 +3694,17 @@ int tc_setup_flow_action(struct flow_action *flow_action,
} else if (is_tcf_skbedit_priority(act)) {
entry->id = FLOW_ACTION_PRIORITY;
entry->priority = tcf_skbedit_priority(act);
+ } else if (is_tcf_gate(act)) {
+ entry->id = FLOW_ACTION_GATE;
+ entry->gate.index = tcf_gate_index(act);
+ entry->gate.prio = tcf_gate_prio(act);
+ entry->gate.basetime = tcf_gate_basetime(act);
+ entry->gate.cycletime = tcf_gate_cycletime(act);
+ entry->gate.cycletimeext = tcf_gate_cycletimeext(act);
+ entry->gate.num_entries = tcf_gate_num_entries(act);
+ err = tcf_gate_get_entries(entry, act);
+ if (err)
+ goto err_out;
} else {
err = -EOPNOTSUPP;
goto err_out_locked;
diff --git a/net/sched/sch_red.c b/net/sched/sch_red.c
index c7de47c942e3..555a1b9e467f 100644
--- a/net/sched/sch_red.c
+++ b/net/sched/sch_red.c
@@ -48,7 +48,7 @@ struct red_sched_data {
struct Qdisc *qdisc;
};
-static const u32 red_supported_flags = TC_RED_HISTORIC_FLAGS | TC_RED_NODROP;
+#define TC_RED_SUPPORTED_FLAGS (TC_RED_HISTORIC_FLAGS | TC_RED_NODROP)
static inline int red_use_ecn(struct red_sched_data *q)
{
@@ -212,8 +212,7 @@ static const struct nla_policy red_policy[TCA_RED_MAX + 1] = {
[TCA_RED_PARMS] = { .len = sizeof(struct tc_red_qopt) },
[TCA_RED_STAB] = { .len = RED_STAB_SIZE },
[TCA_RED_MAX_P] = { .type = NLA_U32 },
- [TCA_RED_FLAGS] = { .type = NLA_BITFIELD32,
- .validation_data = &red_supported_flags },
+ [TCA_RED_FLAGS] = NLA_POLICY_BITFIELD32(TC_RED_SUPPORTED_FLAGS),
};
static int red_change(struct Qdisc *sch, struct nlattr *opt,
@@ -248,7 +247,7 @@ static int red_change(struct Qdisc *sch, struct nlattr *opt,
return -EINVAL;
err = red_get_flags(ctl->flags, TC_RED_HISTORIC_FLAGS,
- tb[TCA_RED_FLAGS], red_supported_flags,
+ tb[TCA_RED_FLAGS], TC_RED_SUPPORTED_FLAGS,
&flags_bf, &userbits, extack);
if (err)
return err;
@@ -372,7 +371,7 @@ static int red_dump(struct Qdisc *sch, struct sk_buff *skb)
if (nla_put(skb, TCA_RED_PARMS, sizeof(opt), &opt) ||
nla_put_u32(skb, TCA_RED_MAX_P, q->parms.max_P) ||
nla_put_bitfield32(skb, TCA_RED_FLAGS,
- q->flags, red_supported_flags))
+ q->flags, TC_RED_SUPPORTED_FLAGS))
goto nla_put_failure;
return nla_nest_end(skb, opts);
diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c
index 6fd44bdb0fc3..6663a63be9e4 100644
--- a/net/smc/af_smc.c
+++ b/net/smc/af_smc.c
@@ -337,50 +337,62 @@ static void smc_copy_sock_settings_to_smc(struct smc_sock *smc)
smc_copy_sock_settings(&smc->sk, smc->clcsock->sk, SK_FLAGS_CLC_TO_SMC);
}
-/* register a new rmb, send confirm_rkey msg to register with peer */
-static int smc_reg_rmb(struct smc_link *link, struct smc_buf_desc *rmb_desc,
- bool conf_rkey)
-{
- if (!rmb_desc->wr_reg) {
- /* register memory region for new rmb */
- if (smc_wr_reg_send(link, rmb_desc->mr_rx[SMC_SINGLE_LINK])) {
- rmb_desc->regerr = 1;
- return -EFAULT;
- }
- rmb_desc->wr_reg = 1;
+/* register the new rmb on all links */
+static int smcr_lgr_reg_rmbs(struct smc_link *link,
+ struct smc_buf_desc *rmb_desc)
+{
+ struct smc_link_group *lgr = link->lgr;
+ int i, rc = 0;
+
+ rc = smc_llc_flow_initiate(lgr, SMC_LLC_FLOW_RKEY);
+ if (rc)
+ return rc;
+ /* protect against parallel smc_llc_cli_rkey_exchange() and
+ * parallel smcr_link_reg_rmb()
+ */
+ mutex_lock(&lgr->llc_conf_mutex);
+ for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
+ if (lgr->lnk[i].state != SMC_LNK_ACTIVE)
+ continue;
+ rc = smcr_link_reg_rmb(&lgr->lnk[i], rmb_desc);
+ if (rc)
+ goto out;
}
- if (!conf_rkey)
- return 0;
+
/* exchange confirm_rkey msg with peer */
- if (smc_llc_do_confirm_rkey(link, rmb_desc)) {
- rmb_desc->regerr = 1;
- return -EFAULT;
+ rc = smc_llc_do_confirm_rkey(link, rmb_desc);
+ if (rc) {
+ rc = -EFAULT;
+ goto out;
}
- return 0;
+ rmb_desc->is_conf_rkey = true;
+out:
+ mutex_unlock(&lgr->llc_conf_mutex);
+ smc_llc_flow_stop(lgr, &lgr->llc_flow_lcl);
+ return rc;
}
-static int smc_clnt_conf_first_link(struct smc_sock *smc)
+static int smcr_clnt_conf_first_link(struct smc_sock *smc)
{
- struct net *net = sock_net(smc->clcsock->sk);
- struct smc_link_group *lgr = smc->conn.lgr;
- struct smc_link *link;
- int rest;
+ struct smc_link *link = smc->conn.lnk;
+ struct smc_llc_qentry *qentry;
int rc;
- link = &lgr->lnk[SMC_SINGLE_LINK];
+ link->lgr->type = SMC_LGR_SINGLE;
+
/* receive CONFIRM LINK request from server over RoCE fabric */
- rest = wait_for_completion_interruptible_timeout(
- &link->llc_confirm,
- SMC_LLC_WAIT_FIRST_TIME);
- if (rest <= 0) {
+ qentry = smc_llc_wait(link->lgr, NULL, SMC_LLC_WAIT_TIME,
+ SMC_LLC_CONFIRM_LINK);
+ if (!qentry) {
struct smc_clc_msg_decline dclc;
rc = smc_clc_wait_msg(smc, &dclc, sizeof(dclc),
SMC_CLC_DECLINE, CLC_WAIT_TIME_SHORT);
return rc == -EAGAIN ? SMC_CLC_DECL_TIMEOUT_CL : rc;
}
-
- if (link->llc_confirm_rc)
+ rc = smc_llc_eval_conf_link(qentry, SMC_LLC_REQ);
+ smc_llc_flow_qentry_del(&link->lgr->llc_flow_lcl);
+ if (rc)
return SMC_CLC_DECL_RMBE_EC;
rc = smc_ib_modify_qp_rts(link);
@@ -389,34 +401,33 @@ static int smc_clnt_conf_first_link(struct smc_sock *smc)
smc_wr_remember_qp_attr(link);
- if (smc_reg_rmb(link, smc->conn.rmb_desc, false))
+ if (smcr_link_reg_rmb(link, smc->conn.rmb_desc))
return SMC_CLC_DECL_ERR_REGRMB;
+ /* confirm_rkey is implicit on 1st contact */
+ smc->conn.rmb_desc->is_conf_rkey = true;
+
/* send CONFIRM LINK response over RoCE fabric */
rc = smc_llc_send_confirm_link(link, SMC_LLC_RESP);
if (rc < 0)
return SMC_CLC_DECL_TIMEOUT_CL;
- /* receive ADD LINK request from server over RoCE fabric */
- rest = wait_for_completion_interruptible_timeout(&link->llc_add,
- SMC_LLC_WAIT_TIME);
- if (rest <= 0) {
+ smc_llc_link_active(link);
+
+ /* optional 2nd link, receive ADD LINK request from server */
+ qentry = smc_llc_wait(link->lgr, NULL, SMC_LLC_WAIT_TIME,
+ SMC_LLC_ADD_LINK);
+ if (!qentry) {
struct smc_clc_msg_decline dclc;
rc = smc_clc_wait_msg(smc, &dclc, sizeof(dclc),
SMC_CLC_DECLINE, CLC_WAIT_TIME_SHORT);
- return rc == -EAGAIN ? SMC_CLC_DECL_TIMEOUT_AL : rc;
+ if (rc == -EAGAIN)
+ rc = 0; /* no DECLINE received, go with one link */
+ return rc;
}
-
- /* send add link reject message, only one link supported for now */
- rc = smc_llc_send_add_link(link,
- link->smcibdev->mac[link->ibport - 1],
- link->gid, SMC_LLC_RESP);
- if (rc < 0)
- return SMC_CLC_DECL_TIMEOUT_AL;
-
- smc_llc_link_active(link, net->ipv4.sysctl_tcp_keepalive_time);
-
+ smc_llc_flow_qentry_clr(&link->lgr->llc_flow_lcl);
+ /* tbd: call smc_llc_cli_add_link(link, qentry); */
return 0;
}
@@ -596,8 +607,8 @@ static int smc_connect_rdma(struct smc_sock *smc,
struct smc_clc_msg_accept_confirm *aclc,
struct smc_init_info *ini)
{
+ int i, reason_code = 0;
struct smc_link *link;
- int reason_code = 0;
ini->is_smcd = false;
ini->ib_lcl = &aclc->lcl;
@@ -610,10 +621,28 @@ static int smc_connect_rdma(struct smc_sock *smc,
mutex_unlock(&smc_client_lgr_pending);
return reason_code;
}
- link = &smc->conn.lgr->lnk[SMC_SINGLE_LINK];
smc_conn_save_peer_info(smc, aclc);
+ if (ini->cln_first_contact == SMC_FIRST_CONTACT) {
+ link = smc->conn.lnk;
+ } else {
+ /* set link that was assigned by server */
+ link = NULL;
+ for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
+ struct smc_link *l = &smc->conn.lgr->lnk[i];
+
+ if (l->peer_qpn == ntoh24(aclc->qpn)) {
+ link = l;
+ break;
+ }
+ }
+ if (!link)
+ return smc_connect_abort(smc, SMC_CLC_DECL_NOSRVLINK,
+ ini->cln_first_contact);
+ smc->conn.lnk = link;
+ }
+
/* create send buffer and rmb */
if (smc_buf_create(smc, false))
return smc_connect_abort(smc, SMC_CLC_DECL_MEM,
@@ -622,7 +651,7 @@ static int smc_connect_rdma(struct smc_sock *smc,
if (ini->cln_first_contact == SMC_FIRST_CONTACT)
smc_link_save_peer_info(link, aclc);
- if (smc_rmb_rtoken_handling(&smc->conn, aclc))
+ if (smc_rmb_rtoken_handling(&smc->conn, link, aclc))
return smc_connect_abort(smc, SMC_CLC_DECL_ERR_RTOK,
ini->cln_first_contact);
@@ -634,7 +663,7 @@ static int smc_connect_rdma(struct smc_sock *smc,
return smc_connect_abort(smc, SMC_CLC_DECL_ERR_RDYLNK,
ini->cln_first_contact);
} else {
- if (smc_reg_rmb(link, smc->conn.rmb_desc, true))
+ if (smcr_lgr_reg_rmbs(link, smc->conn.rmb_desc))
return smc_connect_abort(smc, SMC_CLC_DECL_ERR_REGRMB,
ini->cln_first_contact);
}
@@ -649,7 +678,9 @@ static int smc_connect_rdma(struct smc_sock *smc,
if (ini->cln_first_contact == SMC_FIRST_CONTACT) {
/* QP confirmation over RoCE fabric */
- reason_code = smc_clnt_conf_first_link(smc);
+ smc_llc_flow_initiate(link->lgr, SMC_LLC_FLOW_ADD_LINK);
+ reason_code = smcr_clnt_conf_first_link(smc);
+ smc_llc_flow_stop(link->lgr, &link->lgr->llc_flow_lcl);
if (reason_code)
return smc_connect_abort(smc, reason_code,
ini->cln_first_contact);
@@ -999,17 +1030,15 @@ void smc_close_non_accepted(struct sock *sk)
sock_put(sk); /* final sock_put */
}
-static int smc_serv_conf_first_link(struct smc_sock *smc)
+static int smcr_serv_conf_first_link(struct smc_sock *smc)
{
- struct net *net = sock_net(smc->clcsock->sk);
- struct smc_link_group *lgr = smc->conn.lgr;
- struct smc_link *link;
- int rest;
+ struct smc_link *link = smc->conn.lnk;
+ struct smc_llc_qentry *qentry;
int rc;
- link = &lgr->lnk[SMC_SINGLE_LINK];
+ link->lgr->type = SMC_LGR_SINGLE;
- if (smc_reg_rmb(link, smc->conn.rmb_desc, false))
+ if (smcr_link_reg_rmb(link, smc->conn.rmb_desc))
return SMC_CLC_DECL_ERR_REGRMB;
/* send CONFIRM LINK request to client over the RoCE fabric */
@@ -1018,40 +1047,27 @@ static int smc_serv_conf_first_link(struct smc_sock *smc)
return SMC_CLC_DECL_TIMEOUT_CL;
/* receive CONFIRM LINK response from client over the RoCE fabric */
- rest = wait_for_completion_interruptible_timeout(
- &link->llc_confirm_resp,
- SMC_LLC_WAIT_FIRST_TIME);
- if (rest <= 0) {
+ qentry = smc_llc_wait(link->lgr, link, SMC_LLC_WAIT_TIME,
+ SMC_LLC_CONFIRM_LINK);
+ if (!qentry) {
struct smc_clc_msg_decline dclc;
rc = smc_clc_wait_msg(smc, &dclc, sizeof(dclc),
SMC_CLC_DECLINE, CLC_WAIT_TIME_SHORT);
return rc == -EAGAIN ? SMC_CLC_DECL_TIMEOUT_CL : rc;
}
-
- if (link->llc_confirm_resp_rc)
+ rc = smc_llc_eval_conf_link(qentry, SMC_LLC_RESP);
+ smc_llc_flow_qentry_del(&link->lgr->llc_flow_lcl);
+ if (rc)
return SMC_CLC_DECL_RMBE_EC;
- /* send ADD LINK request to client over the RoCE fabric */
- rc = smc_llc_send_add_link(link,
- link->smcibdev->mac[link->ibport - 1],
- link->gid, SMC_LLC_REQ);
- if (rc < 0)
- return SMC_CLC_DECL_TIMEOUT_AL;
-
- /* receive ADD LINK response from client over the RoCE fabric */
- rest = wait_for_completion_interruptible_timeout(&link->llc_add_resp,
- SMC_LLC_WAIT_TIME);
- if (rest <= 0) {
- struct smc_clc_msg_decline dclc;
-
- rc = smc_clc_wait_msg(smc, &dclc, sizeof(dclc),
- SMC_CLC_DECLINE, CLC_WAIT_TIME_SHORT);
- return rc == -EAGAIN ? SMC_CLC_DECL_TIMEOUT_AL : rc;
- }
+ /* confirm_rkey is implicit on 1st contact */
+ smc->conn.rmb_desc->is_conf_rkey = true;
- smc_llc_link_active(link, net->ipv4.sysctl_tcp_keepalive_time);
+ smc_llc_link_active(link);
+ /* initial contact - try to establish second link */
+ /* tbd: call smc_llc_srv_add_link(link); */
return 0;
}
@@ -1194,10 +1210,10 @@ static int smc_listen_ism_init(struct smc_sock *new_smc,
/* listen worker: register buffers */
static int smc_listen_rdma_reg(struct smc_sock *new_smc, int local_contact)
{
- struct smc_link *link = &new_smc->conn.lgr->lnk[SMC_SINGLE_LINK];
+ struct smc_connection *conn = &new_smc->conn;
if (local_contact != SMC_FIRST_CONTACT) {
- if (smc_reg_rmb(link, new_smc->conn.rmb_desc, true))
+ if (smcr_lgr_reg_rmbs(conn->lnk, conn->rmb_desc))
return SMC_CLC_DECL_ERR_REGRMB;
}
smc_rmb_sync_sg_for_device(&new_smc->conn);
@@ -1210,13 +1226,13 @@ static int smc_listen_rdma_finish(struct smc_sock *new_smc,
struct smc_clc_msg_accept_confirm *cclc,
int local_contact)
{
- struct smc_link *link = &new_smc->conn.lgr->lnk[SMC_SINGLE_LINK];
+ struct smc_link *link = new_smc->conn.lnk;
int reason_code = 0;
if (local_contact == SMC_FIRST_CONTACT)
smc_link_save_peer_info(link, cclc);
- if (smc_rmb_rtoken_handling(&new_smc->conn, cclc)) {
+ if (smc_rmb_rtoken_handling(&new_smc->conn, link, cclc)) {
reason_code = SMC_CLC_DECL_ERR_RTOK;
goto decline;
}
@@ -1227,7 +1243,9 @@ static int smc_listen_rdma_finish(struct smc_sock *new_smc,
goto decline;
}
/* QP confirmation over RoCE fabric */
- reason_code = smc_serv_conf_first_link(new_smc);
+ smc_llc_flow_initiate(link->lgr, SMC_LLC_FLOW_ADD_LINK);
+ reason_code = smcr_serv_conf_first_link(new_smc);
+ smc_llc_flow_stop(link->lgr, &link->lgr->llc_flow_lcl);
if (reason_code)
goto decline;
}
diff --git a/net/smc/smc.h b/net/smc/smc.h
index be11ba41190f..1a084afa7372 100644
--- a/net/smc/smc.h
+++ b/net/smc/smc.h
@@ -121,6 +121,7 @@ enum smc_urg_state {
struct smc_connection {
struct rb_node alert_node;
struct smc_link_group *lgr; /* link group of connection */
+ struct smc_link *lnk; /* assigned SMC-R link */
u32 alert_token_local; /* unique conn. id */
u8 peer_rmbe_idx; /* from tcp handshake */
int peer_rmbe_size; /* size of peer rx buffer */
diff --git a/net/smc/smc_cdc.c b/net/smc/smc_cdc.c
index 164f1584861b..f64589d823aa 100644
--- a/net/smc/smc_cdc.c
+++ b/net/smc/smc_cdc.c
@@ -57,7 +57,7 @@ int smc_cdc_get_free_slot(struct smc_connection *conn,
struct smc_rdma_wr **wr_rdma_buf,
struct smc_cdc_tx_pend **pend)
{
- struct smc_link *link = &conn->lgr->lnk[SMC_SINGLE_LINK];
+ struct smc_link *link = conn->lnk;
int rc;
rc = smc_wr_tx_get_free_slot(link, smc_cdc_tx_handler, wr_buf,
@@ -91,12 +91,10 @@ int smc_cdc_msg_send(struct smc_connection *conn,
struct smc_wr_buf *wr_buf,
struct smc_cdc_tx_pend *pend)
{
+ struct smc_link *link = conn->lnk;
union smc_host_cursor cfed;
- struct smc_link *link;
int rc;
- link = &conn->lgr->lnk[SMC_SINGLE_LINK];
-
smc_cdc_add_pending_send(conn, pend);
conn->tx_cdc_seq++;
@@ -165,7 +163,7 @@ static void smc_cdc_tx_dismisser(struct smc_wr_tx_pend_priv *tx_pend)
void smc_cdc_tx_dismiss_slots(struct smc_connection *conn)
{
- struct smc_link *link = &conn->lgr->lnk[SMC_SINGLE_LINK];
+ struct smc_link *link = conn->lnk;
smc_wr_tx_dismiss_slots(link, SMC_CDC_MSG_TYPE,
smc_cdc_tx_filter, smc_cdc_tx_dismisser,
diff --git a/net/smc/smc_clc.c b/net/smc/smc_clc.c
index ea0068f0173c..d5627df24215 100644
--- a/net/smc/smc_clc.c
+++ b/net/smc/smc_clc.c
@@ -496,7 +496,7 @@ int smc_clc_send_confirm(struct smc_sock *smc)
sizeof(SMCD_EYECATCHER));
} else {
/* SMC-R specific settings */
- link = &conn->lgr->lnk[SMC_SINGLE_LINK];
+ link = conn->lnk;
memcpy(cclc.hdr.eyecatcher, SMC_EYECATCHER,
sizeof(SMC_EYECATCHER));
cclc.hdr.path = SMC_TYPE_R;
@@ -508,13 +508,13 @@ int smc_clc_send_confirm(struct smc_sock *smc)
ETH_ALEN);
hton24(cclc.qpn, link->roce_qp->qp_num);
cclc.rmb_rkey =
- htonl(conn->rmb_desc->mr_rx[SMC_SINGLE_LINK]->rkey);
+ htonl(conn->rmb_desc->mr_rx[link->link_idx]->rkey);
cclc.rmbe_idx = 1; /* for now: 1 RMB = 1 RMBE */
cclc.rmbe_alert_token = htonl(conn->alert_token_local);
cclc.qp_mtu = min(link->path_mtu, link->peer_mtu);
cclc.rmbe_size = conn->rmbe_size_short;
cclc.rmb_dma_addr = cpu_to_be64((u64)sg_dma_address
- (conn->rmb_desc->sgt[SMC_SINGLE_LINK].sgl));
+ (conn->rmb_desc->sgt[link->link_idx].sgl));
hton24(cclc.psn, link->psn_initial);
memcpy(cclc.smcr_trl.eyecatcher, SMC_EYECATCHER,
sizeof(SMC_EYECATCHER));
@@ -572,7 +572,7 @@ int smc_clc_send_accept(struct smc_sock *new_smc, int srv_first_contact)
memcpy(aclc.hdr.eyecatcher, SMC_EYECATCHER,
sizeof(SMC_EYECATCHER));
aclc.hdr.path = SMC_TYPE_R;
- link = &conn->lgr->lnk[SMC_SINGLE_LINK];
+ link = conn->lnk;
memcpy(aclc.lcl.id_for_peer, local_systemid,
sizeof(local_systemid));
memcpy(&aclc.lcl.gid, link->gid, SMC_GID_SIZE);
@@ -580,13 +580,13 @@ int smc_clc_send_accept(struct smc_sock *new_smc, int srv_first_contact)
ETH_ALEN);
hton24(aclc.qpn, link->roce_qp->qp_num);
aclc.rmb_rkey =
- htonl(conn->rmb_desc->mr_rx[SMC_SINGLE_LINK]->rkey);
+ htonl(conn->rmb_desc->mr_rx[link->link_idx]->rkey);
aclc.rmbe_idx = 1; /* as long as 1 RMB = 1 RMBE */
aclc.rmbe_alert_token = htonl(conn->alert_token_local);
aclc.qp_mtu = link->path_mtu;
aclc.rmbe_size = conn->rmbe_size_short,
aclc.rmb_dma_addr = cpu_to_be64((u64)sg_dma_address
- (conn->rmb_desc->sgt[SMC_SINGLE_LINK].sgl));
+ (conn->rmb_desc->sgt[link->link_idx].sgl));
hton24(aclc.psn, link->psn_initial);
memcpy(aclc.smcr_trl.eyecatcher, SMC_EYECATCHER,
sizeof(SMC_EYECATCHER));
diff --git a/net/smc/smc_clc.h b/net/smc/smc_clc.h
index ca209272e5fa..465876701b75 100644
--- a/net/smc/smc_clc.h
+++ b/net/smc/smc_clc.h
@@ -44,6 +44,8 @@
#define SMC_CLC_DECL_DIFFPREFIX 0x03070000 /* IP prefix / subnet mismatch */
#define SMC_CLC_DECL_GETVLANERR 0x03080000 /* err to get vlan id of ip device*/
#define SMC_CLC_DECL_ISMVLANERR 0x03090000 /* err to reg vlan id on ism dev */
+#define SMC_CLC_DECL_NOACTLINK 0x030a0000 /* no active smc-r link in lgr */
+#define SMC_CLC_DECL_NOSRVLINK 0x030b0000 /* SMC-R link from srv not found */
#define SMC_CLC_DECL_SYNCERR 0x04000000 /* synchronization error */
#define SMC_CLC_DECL_PEERDECL 0x05000000 /* peer declined during handshake */
#define SMC_CLC_DECL_INTERR 0x09990000 /* internal error */
diff --git a/net/smc/smc_core.c b/net/smc/smc_core.c
index 824c5211b027..60c708f6de51 100644
--- a/net/smc/smc_core.c
+++ b/net/smc/smc_core.c
@@ -44,10 +44,20 @@ static struct smc_lgr_list smc_lgr_list = { /* established link groups */
static atomic_t lgr_cnt = ATOMIC_INIT(0); /* number of existing link groups */
static DECLARE_WAIT_QUEUE_HEAD(lgrs_deleted);
+struct smc_ib_up_work {
+ struct work_struct work;
+ struct smc_link_group *lgr;
+ struct smc_ib_device *smcibdev;
+ u8 ibport;
+};
+
static void smc_buf_free(struct smc_link_group *lgr, bool is_rmb,
struct smc_buf_desc *buf_desc);
static void __smc_lgr_terminate(struct smc_link_group *lgr, bool soft);
+static void smc_link_up_work(struct work_struct *work);
+static void smc_link_down_work(struct work_struct *work);
+
/* return head of link group list and its lock for a given link group */
static inline struct list_head *smc_lgr_list_head(struct smc_link_group *lgr,
spinlock_t **lgr_lock)
@@ -116,7 +126,7 @@ static void smc_lgr_add_alert_token(struct smc_connection *conn)
* Requires @conns_lock
* Note that '0' is a reserved value and not assigned.
*/
-static void smc_lgr_register_conn(struct smc_connection *conn)
+static int smc_lgr_register_conn(struct smc_connection *conn)
{
struct smc_sock *smc = container_of(conn, struct smc_sock, conn);
static atomic_t nexttoken = ATOMIC_INIT(0);
@@ -131,7 +141,24 @@ static void smc_lgr_register_conn(struct smc_connection *conn)
conn->alert_token_local = 0;
}
smc_lgr_add_alert_token(conn);
+
+ /* assign the new connection to a link */
+ if (!conn->lgr->is_smcd) {
+ struct smc_link *lnk;
+ int i;
+
+ /* tbd - link balancing */
+ for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
+ lnk = &conn->lgr->lnk[i];
+ if (lnk->state == SMC_LNK_ACTIVATING ||
+ lnk->state == SMC_LNK_ACTIVE)
+ conn->lnk = lnk;
+ }
+ if (!conn->lnk)
+ return SMC_CLC_DECL_NOACTLINK;
+ }
conn->lgr->conns_num++;
+ return 0;
}
/* Unregister connection and reset the alert token of the given connection<
@@ -175,20 +202,6 @@ void smc_lgr_cleanup_early(struct smc_connection *conn)
smc_lgr_schedule_free_work_fast(lgr);
}
-/* Send delete link, either as client to request the initiation
- * of the DELETE LINK sequence from server; or as server to
- * initiate the delete processing. See smc_llc_rx_delete_link().
- */
-static int smc_link_send_delete(struct smc_link *lnk, bool orderly)
-{
- if (lnk->state == SMC_LNK_ACTIVE &&
- !smc_llc_send_delete_link(lnk, SMC_LLC_REQ, orderly)) {
- smc_llc_link_deleting(lnk);
- return 0;
- }
- return -ENOTCONN;
-}
-
static void smc_lgr_free(struct smc_link_group *lgr);
static void smc_lgr_free_work(struct work_struct *work)
@@ -197,8 +210,8 @@ static void smc_lgr_free_work(struct work_struct *work)
struct smc_link_group,
free_work);
spinlock_t *lgr_lock;
- struct smc_link *lnk;
bool conns;
+ int i;
smc_lgr_list_head(lgr, &lgr_lock);
spin_lock_bh(lgr_lock);
@@ -214,26 +227,21 @@ static void smc_lgr_free_work(struct work_struct *work)
return;
}
list_del_init(&lgr->list); /* remove from smc_lgr_list */
-
- lnk = &lgr->lnk[SMC_SINGLE_LINK];
- if (!lgr->is_smcd && !lgr->terminating) {
- /* try to send del link msg, on error free lgr immediately */
- if (lnk->state == SMC_LNK_ACTIVE &&
- !smc_link_send_delete(lnk, true)) {
- /* reschedule in case we never receive a response */
- smc_lgr_schedule_free_work(lgr);
- spin_unlock_bh(lgr_lock);
- return;
- }
- }
lgr->freeing = 1; /* this instance does the freeing, no new schedule */
spin_unlock_bh(lgr_lock);
cancel_delayed_work(&lgr->free_work);
- if (!lgr->is_smcd && lnk->state != SMC_LNK_INACTIVE)
- smc_llc_link_inactive(lnk);
if (lgr->is_smcd && !lgr->terminating)
smc_ism_signal_shutdown(lgr);
+ if (!lgr->is_smcd) {
+ for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
+ struct smc_link *lnk = &lgr->lnk[i];
+
+ if (smc_link_usable(lnk))
+ lnk->state = SMC_LNK_INACTIVE;
+ }
+ wake_up_interruptible_all(&lgr->llc_waiter);
+ }
smc_lgr_free(lgr);
}
@@ -245,6 +253,88 @@ static void smc_lgr_terminate_work(struct work_struct *work)
__smc_lgr_terminate(lgr, true);
}
+/* return next unique link id for the lgr */
+static u8 smcr_next_link_id(struct smc_link_group *lgr)
+{
+ u8 link_id;
+ int i;
+
+ while (1) {
+ link_id = ++lgr->next_link_id;
+ if (!link_id) /* skip zero as link_id */
+ link_id = ++lgr->next_link_id;
+ for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
+ if (smc_link_usable(&lgr->lnk[i]) &&
+ lgr->lnk[i].link_id == link_id)
+ continue;
+ }
+ break;
+ }
+ return link_id;
+}
+
+static int smcr_link_init(struct smc_link_group *lgr, struct smc_link *lnk,
+ u8 link_idx, struct smc_init_info *ini)
+{
+ u8 rndvec[3];
+ int rc;
+
+ get_device(&ini->ib_dev->ibdev->dev);
+ atomic_inc(&ini->ib_dev->lnk_cnt);
+ lnk->state = SMC_LNK_ACTIVATING;
+ lnk->link_id = smcr_next_link_id(lgr);
+ lnk->lgr = lgr;
+ lnk->link_idx = link_idx;
+ lnk->smcibdev = ini->ib_dev;
+ lnk->ibport = ini->ib_port;
+ lnk->path_mtu = ini->ib_dev->pattr[ini->ib_port - 1].active_mtu;
+ INIT_WORK(&lnk->link_down_wrk, smc_link_down_work);
+ if (!ini->ib_dev->initialized) {
+ rc = (int)smc_ib_setup_per_ibdev(ini->ib_dev);
+ if (rc)
+ goto out;
+ }
+ get_random_bytes(rndvec, sizeof(rndvec));
+ lnk->psn_initial = rndvec[0] + (rndvec[1] << 8) +
+ (rndvec[2] << 16);
+ rc = smc_ib_determine_gid(lnk->smcibdev, lnk->ibport,
+ ini->vlan_id, lnk->gid, &lnk->sgid_index);
+ if (rc)
+ goto out;
+ rc = smc_llc_link_init(lnk);
+ if (rc)
+ goto out;
+ rc = smc_wr_alloc_link_mem(lnk);
+ if (rc)
+ goto clear_llc_lnk;
+ rc = smc_ib_create_protection_domain(lnk);
+ if (rc)
+ goto free_link_mem;
+ rc = smc_ib_create_queue_pair(lnk);
+ if (rc)
+ goto dealloc_pd;
+ rc = smc_wr_create_link(lnk);
+ if (rc)
+ goto destroy_qp;
+ return 0;
+
+destroy_qp:
+ smc_ib_destroy_queue_pair(lnk);
+dealloc_pd:
+ smc_ib_dealloc_protection_domain(lnk);
+free_link_mem:
+ smc_wr_free_link_mem(lnk);
+clear_llc_lnk:
+ smc_llc_link_clear(lnk);
+out:
+ put_device(&ini->ib_dev->ibdev->dev);
+ memset(lnk, 0, sizeof(struct smc_link));
+ lnk->state = SMC_LNK_UNUSED;
+ if (!atomic_dec_return(&ini->ib_dev->lnk_cnt))
+ wake_up(&ini->ib_dev->lnks_deleted);
+ return rc;
+}
+
/* create a new SMC link group */
static int smc_lgr_create(struct smc_sock *smc, struct smc_init_info *ini)
{
@@ -252,7 +342,7 @@ static int smc_lgr_create(struct smc_sock *smc, struct smc_init_info *ini)
struct list_head *lgr_list;
struct smc_link *lnk;
spinlock_t *lgr_lock;
- u8 rndvec[3];
+ u8 link_idx;
int rc = 0;
int i;
@@ -274,13 +364,14 @@ static int smc_lgr_create(struct smc_sock *smc, struct smc_init_info *ini)
lgr->freefast = 0;
lgr->freeing = 0;
lgr->vlan_id = ini->vlan_id;
- rwlock_init(&lgr->sndbufs_lock);
- rwlock_init(&lgr->rmbs_lock);
+ mutex_init(&lgr->sndbufs_lock);
+ mutex_init(&lgr->rmbs_lock);
rwlock_init(&lgr->conns_lock);
for (i = 0; i < SMC_RMBE_SIZES; i++) {
INIT_LIST_HEAD(&lgr->sndbufs[i]);
INIT_LIST_HEAD(&lgr->rmbs[i]);
}
+ lgr->next_link_id = 0;
smc_lgr_list.num += SMC_LGR_NUM_INCR;
memcpy(&lgr->id, (u8 *)&smc_lgr_list.num, SMC_LGR_ID_SIZE);
INIT_DELAYED_WORK(&lgr->free_work, smc_lgr_free_work);
@@ -297,48 +388,21 @@ static int smc_lgr_create(struct smc_sock *smc, struct smc_init_info *ini)
atomic_inc(&ini->ism_dev->lgr_cnt);
} else {
/* SMC-R specific settings */
- get_device(&ini->ib_dev->ibdev->dev);
lgr->role = smc->listen_smc ? SMC_SERV : SMC_CLNT;
memcpy(lgr->peer_systemid, ini->ib_lcl->id_for_peer,
SMC_SYSTEMID_LEN);
+ memcpy(lgr->pnet_id, ini->ib_dev->pnetid[ini->ib_port - 1],
+ SMC_MAX_PNETID_LEN);
+ smc_llc_lgr_init(lgr, smc);
- lnk = &lgr->lnk[SMC_SINGLE_LINK];
- /* initialize link */
- lnk->state = SMC_LNK_ACTIVATING;
- lnk->link_id = SMC_SINGLE_LINK;
- lnk->smcibdev = ini->ib_dev;
- lnk->ibport = ini->ib_port;
- lgr_list = &smc_lgr_list.list;
- lgr_lock = &smc_lgr_list.lock;
- lnk->path_mtu =
- ini->ib_dev->pattr[ini->ib_port - 1].active_mtu;
- if (!ini->ib_dev->initialized)
- smc_ib_setup_per_ibdev(ini->ib_dev);
- get_random_bytes(rndvec, sizeof(rndvec));
- lnk->psn_initial = rndvec[0] + (rndvec[1] << 8) +
- (rndvec[2] << 16);
- rc = smc_ib_determine_gid(lnk->smcibdev, lnk->ibport,
- ini->vlan_id, lnk->gid,
- &lnk->sgid_index);
- if (rc)
- goto free_lgr;
- rc = smc_llc_link_init(lnk);
+ link_idx = SMC_SINGLE_LINK;
+ lnk = &lgr->lnk[link_idx];
+ rc = smcr_link_init(lgr, lnk, link_idx, ini);
if (rc)
goto free_lgr;
- rc = smc_wr_alloc_link_mem(lnk);
- if (rc)
- goto clear_llc_lnk;
- rc = smc_ib_create_protection_domain(lnk);
- if (rc)
- goto free_link_mem;
- rc = smc_ib_create_queue_pair(lnk);
- if (rc)
- goto dealloc_pd;
- rc = smc_wr_create_link(lnk);
- if (rc)
- goto destroy_qp;
+ lgr_list = &smc_lgr_list.list;
+ lgr_lock = &smc_lgr_list.lock;
atomic_inc(&lgr_cnt);
- atomic_inc(&ini->ib_dev->lnk_cnt);
}
smc->conn.lgr = lgr;
spin_lock_bh(lgr_lock);
@@ -346,14 +410,6 @@ static int smc_lgr_create(struct smc_sock *smc, struct smc_init_info *ini)
spin_unlock_bh(lgr_lock);
return 0;
-destroy_qp:
- smc_ib_destroy_queue_pair(lnk);
-dealloc_pd:
- smc_ib_dealloc_protection_domain(lnk);
-free_link_mem:
- smc_wr_free_link_mem(lnk);
-clear_llc_lnk:
- smc_llc_link_clear(lnk);
free_lgr:
kfree(lgr);
ism_put_vlan:
@@ -369,29 +425,45 @@ out:
return rc;
}
+static void smcr_buf_unuse(struct smc_buf_desc *rmb_desc,
+ struct smc_link_group *lgr)
+{
+ int rc;
+
+ if (rmb_desc->is_conf_rkey && !list_empty(&lgr->list)) {
+ /* unregister rmb with peer */
+ rc = smc_llc_flow_initiate(lgr, SMC_LLC_FLOW_RKEY);
+ if (!rc) {
+ /* protect against smc_llc_cli_rkey_exchange() */
+ mutex_lock(&lgr->llc_conf_mutex);
+ smc_llc_do_delete_rkey(lgr, rmb_desc);
+ rmb_desc->is_conf_rkey = false;
+ mutex_unlock(&lgr->llc_conf_mutex);
+ smc_llc_flow_stop(lgr, &lgr->llc_flow_lcl);
+ }
+ }
+
+ if (rmb_desc->is_reg_err) {
+ /* buf registration failed, reuse not possible */
+ mutex_lock(&lgr->rmbs_lock);
+ list_del(&rmb_desc->list);
+ mutex_unlock(&lgr->rmbs_lock);
+
+ smc_buf_free(lgr, true, rmb_desc);
+ } else {
+ rmb_desc->used = 0;
+ }
+}
+
static void smc_buf_unuse(struct smc_connection *conn,
struct smc_link_group *lgr)
{
if (conn->sndbuf_desc)
conn->sndbuf_desc->used = 0;
- if (conn->rmb_desc) {
- if (!conn->rmb_desc->regerr) {
- if (!lgr->is_smcd && !list_empty(&lgr->list)) {
- /* unregister rmb with peer */
- smc_llc_do_delete_rkey(
- &lgr->lnk[SMC_SINGLE_LINK],
- conn->rmb_desc);
- }
- conn->rmb_desc->used = 0;
- } else {
- /* buf registration failed, reuse not possible */
- write_lock_bh(&lgr->rmbs_lock);
- list_del(&conn->rmb_desc->list);
- write_unlock_bh(&lgr->rmbs_lock);
-
- smc_buf_free(lgr, true, conn->rmb_desc);
- }
- }
+ if (conn->rmb_desc && lgr->is_smcd)
+ conn->rmb_desc->used = 0;
+ else if (conn->rmb_desc)
+ smcr_buf_unuse(conn->rmb_desc, lgr);
}
/* remove a finished connection from its link group */
@@ -417,35 +489,91 @@ void smc_conn_free(struct smc_connection *conn)
smc_lgr_schedule_free_work(lgr);
}
-static void smc_link_clear(struct smc_link *lnk)
+/* unregister a link from a buf_desc */
+static void smcr_buf_unmap_link(struct smc_buf_desc *buf_desc, bool is_rmb,
+ struct smc_link *lnk)
+{
+ if (is_rmb)
+ buf_desc->is_reg_mr[lnk->link_idx] = false;
+ if (!buf_desc->is_map_ib[lnk->link_idx])
+ return;
+ if (is_rmb) {
+ if (buf_desc->mr_rx[lnk->link_idx]) {
+ smc_ib_put_memory_region(
+ buf_desc->mr_rx[lnk->link_idx]);
+ buf_desc->mr_rx[lnk->link_idx] = NULL;
+ }
+ smc_ib_buf_unmap_sg(lnk, buf_desc, DMA_FROM_DEVICE);
+ } else {
+ smc_ib_buf_unmap_sg(lnk, buf_desc, DMA_TO_DEVICE);
+ }
+ sg_free_table(&buf_desc->sgt[lnk->link_idx]);
+ buf_desc->is_map_ib[lnk->link_idx] = false;
+}
+
+/* unmap all buffers of lgr for a deleted link */
+static void smcr_buf_unmap_lgr(struct smc_link *lnk)
+{
+ struct smc_link_group *lgr = lnk->lgr;
+ struct smc_buf_desc *buf_desc, *bf;
+ int i;
+
+ for (i = 0; i < SMC_RMBE_SIZES; i++) {
+ mutex_lock(&lgr->rmbs_lock);
+ list_for_each_entry_safe(buf_desc, bf, &lgr->rmbs[i], list)
+ smcr_buf_unmap_link(buf_desc, true, lnk);
+ mutex_unlock(&lgr->rmbs_lock);
+ mutex_lock(&lgr->sndbufs_lock);
+ list_for_each_entry_safe(buf_desc, bf, &lgr->sndbufs[i],
+ list)
+ smcr_buf_unmap_link(buf_desc, false, lnk);
+ mutex_unlock(&lgr->sndbufs_lock);
+ }
+}
+
+static void smcr_rtoken_clear_link(struct smc_link *lnk)
{
+ struct smc_link_group *lgr = lnk->lgr;
+ int i;
+
+ for (i = 0; i < SMC_RMBS_PER_LGR_MAX; i++) {
+ lgr->rtokens[i][lnk->link_idx].rkey = 0;
+ lgr->rtokens[i][lnk->link_idx].dma_addr = 0;
+ }
+}
+
+/* must be called under lgr->llc_conf_mutex lock */
+void smcr_link_clear(struct smc_link *lnk)
+{
+ struct smc_ib_device *smcibdev;
+
+ if (!lnk->lgr || lnk->state == SMC_LNK_UNUSED)
+ return;
lnk->peer_qpn = 0;
smc_llc_link_clear(lnk);
+ smcr_buf_unmap_lgr(lnk);
+ smcr_rtoken_clear_link(lnk);
smc_ib_modify_qp_reset(lnk);
smc_wr_free_link(lnk);
smc_ib_destroy_queue_pair(lnk);
smc_ib_dealloc_protection_domain(lnk);
smc_wr_free_link_mem(lnk);
- if (!atomic_dec_return(&lnk->smcibdev->lnk_cnt))
- wake_up(&lnk->smcibdev->lnks_deleted);
+ put_device(&lnk->smcibdev->ibdev->dev);
+ smcibdev = lnk->smcibdev;
+ memset(lnk, 0, sizeof(struct smc_link));
+ lnk->state = SMC_LNK_UNUSED;
+ if (!atomic_dec_return(&smcibdev->lnk_cnt))
+ wake_up(&smcibdev->lnks_deleted);
}
static void smcr_buf_free(struct smc_link_group *lgr, bool is_rmb,
struct smc_buf_desc *buf_desc)
{
- struct smc_link *lnk = &lgr->lnk[SMC_SINGLE_LINK];
+ int i;
+
+ for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++)
+ smcr_buf_unmap_link(buf_desc, is_rmb, &lgr->lnk[i]);
- if (is_rmb) {
- if (buf_desc->mr_rx[SMC_SINGLE_LINK])
- smc_ib_put_memory_region(
- buf_desc->mr_rx[SMC_SINGLE_LINK]);
- smc_ib_buf_unmap_sg(lnk->smcibdev, buf_desc,
- DMA_FROM_DEVICE);
- } else {
- smc_ib_buf_unmap_sg(lnk->smcibdev, buf_desc,
- DMA_TO_DEVICE);
- }
- sg_free_table(&buf_desc->sgt[SMC_SINGLE_LINK]);
if (buf_desc->pages)
__free_pages(buf_desc->pages, buf_desc->order);
kfree(buf_desc);
@@ -503,6 +631,8 @@ static void smc_lgr_free_bufs(struct smc_link_group *lgr)
/* remove a link group */
static void smc_lgr_free(struct smc_link_group *lgr)
{
+ int i;
+
smc_lgr_free_bufs(lgr);
if (lgr->is_smcd) {
if (!lgr->terminating) {
@@ -512,8 +642,11 @@ static void smc_lgr_free(struct smc_link_group *lgr)
if (!atomic_dec_return(&lgr->smcd->lgr_cnt))
wake_up(&lgr->smcd->lgrs_deleted);
} else {
- smc_link_clear(&lgr->lnk[SMC_SINGLE_LINK]);
- put_device(&lgr->lnk[SMC_SINGLE_LINK].smcibdev->ibdev->dev);
+ for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
+ if (lgr->lnk[i].state != SMC_LNK_UNUSED)
+ smcr_link_clear(&lgr->lnk[i]);
+ }
+ smc_llc_lgr_clear(lgr);
if (!atomic_dec_return(&lgr_cnt))
wake_up(&lgrs_deleted);
}
@@ -581,16 +714,21 @@ static void smc_conn_kill(struct smc_connection *conn, bool soft)
static void smc_lgr_cleanup(struct smc_link_group *lgr)
{
+ int i;
+
if (lgr->is_smcd) {
smc_ism_signal_shutdown(lgr);
smcd_unregister_all_dmbs(lgr);
smc_ism_put_vlan(lgr->smcd, lgr->vlan_id);
put_device(&lgr->smcd->dev);
} else {
- struct smc_link *lnk = &lgr->lnk[SMC_SINGLE_LINK];
+ for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
+ struct smc_link *lnk = &lgr->lnk[i];
- if (lnk->state != SMC_LNK_INACTIVE)
- smc_llc_link_inactive(lnk);
+ if (smc_link_usable(lnk))
+ lnk->state = SMC_LNK_INACTIVE;
+ }
+ wake_up_interruptible_all(&lgr->llc_waiter);
}
}
@@ -609,8 +747,6 @@ static void __smc_lgr_terminate(struct smc_link_group *lgr, bool soft)
if (!soft)
cancel_delayed_work_sync(&lgr->free_work);
lgr->terminating = 1;
- if (!lgr->is_smcd)
- smc_llc_link_inactive(&lgr->lnk[SMC_SINGLE_LINK]);
/* kill remaining link group connections */
read_lock_bh(&lgr->conns_lock);
@@ -651,29 +787,6 @@ void smc_lgr_terminate_sched(struct smc_link_group *lgr)
schedule_work(&lgr->terminate_work);
}
-/* Called when IB port is terminated */
-void smc_port_terminate(struct smc_ib_device *smcibdev, u8 ibport)
-{
- struct smc_link_group *lgr, *l;
- LIST_HEAD(lgr_free_list);
-
- spin_lock_bh(&smc_lgr_list.lock);
- list_for_each_entry_safe(lgr, l, &smc_lgr_list.list, list) {
- if (!lgr->is_smcd &&
- lgr->lnk[SMC_SINGLE_LINK].smcibdev == smcibdev &&
- lgr->lnk[SMC_SINGLE_LINK].ibport == ibport) {
- list_move(&lgr->list, &lgr_free_list);
- lgr->freeing = 1;
- }
- }
- spin_unlock_bh(&smc_lgr_list.lock);
-
- list_for_each_entry_safe(lgr, l, &lgr_free_list, list) {
- list_del_init(&lgr->list);
- __smc_lgr_terminate(lgr, false);
- }
-}
-
/* Called when peer lgr shutdown (regularly or abnormally) is received */
void smc_smcd_terminate(struct smcd_dev *dev, u64 peer_gid, unsigned short vlan)
{
@@ -728,6 +841,7 @@ void smc_smcr_terminate_all(struct smc_ib_device *smcibdev)
{
struct smc_link_group *lgr, *lg;
LIST_HEAD(lgr_free_list);
+ int i;
spin_lock_bh(&smc_lgr_list.lock);
if (!smcibdev) {
@@ -736,9 +850,9 @@ void smc_smcr_terminate_all(struct smc_ib_device *smcibdev)
lgr->freeing = 1;
} else {
list_for_each_entry_safe(lgr, lg, &smc_lgr_list.list, list) {
- if (lgr->lnk[SMC_SINGLE_LINK].smcibdev == smcibdev) {
- list_move(&lgr->list, &lgr_free_list);
- lgr->freeing = 1;
+ for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
+ if (lgr->lnk[i].smcibdev == smcibdev)
+ smcr_link_down_cond_sched(&lgr->lnk[i]);
}
}
}
@@ -759,6 +873,170 @@ void smc_smcr_terminate_all(struct smc_ib_device *smcibdev)
}
}
+/* link is up - establish alternate link if applicable */
+static void smcr_link_up(struct smc_link_group *lgr,
+ struct smc_ib_device *smcibdev, u8 ibport)
+{
+ struct smc_link *link = NULL;
+
+ if (list_empty(&lgr->list) ||
+ lgr->type == SMC_LGR_SYMMETRIC ||
+ lgr->type == SMC_LGR_ASYMMETRIC_PEER)
+ return;
+
+ if (lgr->role == SMC_SERV) {
+ /* trigger local add link processing */
+ link = smc_llc_usable_link(lgr);
+ if (!link)
+ return;
+ /* tbd: call smc_llc_srv_add_link_local(link); */
+ } else {
+ /* invite server to start add link processing */
+ u8 gid[SMC_GID_SIZE];
+
+ if (smc_ib_determine_gid(smcibdev, ibport, lgr->vlan_id, gid,
+ NULL))
+ return;
+ if (lgr->llc_flow_lcl.type != SMC_LLC_FLOW_NONE) {
+ /* some other llc task is ongoing */
+ wait_event_interruptible_timeout(lgr->llc_waiter,
+ (lgr->llc_flow_lcl.type == SMC_LLC_FLOW_NONE),
+ SMC_LLC_WAIT_TIME);
+ }
+ if (list_empty(&lgr->list) ||
+ !smc_ib_port_active(smcibdev, ibport))
+ return; /* lgr or device no longer active */
+ link = smc_llc_usable_link(lgr);
+ if (!link)
+ return;
+ smc_llc_send_add_link(link, smcibdev->mac[ibport - 1], gid,
+ NULL, SMC_LLC_REQ);
+ }
+}
+
+void smcr_port_add(struct smc_ib_device *smcibdev, u8 ibport)
+{
+ struct smc_ib_up_work *ib_work;
+ struct smc_link_group *lgr, *n;
+
+ list_for_each_entry_safe(lgr, n, &smc_lgr_list.list, list) {
+ if (strncmp(smcibdev->pnetid[ibport - 1], lgr->pnet_id,
+ SMC_MAX_PNETID_LEN) ||
+ lgr->type == SMC_LGR_SYMMETRIC ||
+ lgr->type == SMC_LGR_ASYMMETRIC_PEER)
+ continue;
+ ib_work = kmalloc(sizeof(*ib_work), GFP_KERNEL);
+ if (!ib_work)
+ continue;
+ INIT_WORK(&ib_work->work, smc_link_up_work);
+ ib_work->lgr = lgr;
+ ib_work->smcibdev = smcibdev;
+ ib_work->ibport = ibport;
+ schedule_work(&ib_work->work);
+ }
+}
+
+/* link is down - switch connections to alternate link,
+ * must be called under lgr->llc_conf_mutex lock
+ */
+static void smcr_link_down(struct smc_link *lnk)
+{
+ struct smc_link_group *lgr = lnk->lgr;
+ struct smc_link *to_lnk;
+ int del_link_id;
+
+ if (!lgr || lnk->state == SMC_LNK_UNUSED || list_empty(&lgr->list))
+ return;
+
+ smc_ib_modify_qp_reset(lnk);
+ to_lnk = NULL;
+ /* tbd: call to_lnk = smc_switch_conns(lgr, lnk, true); */
+ if (!to_lnk) { /* no backup link available */
+ smcr_link_clear(lnk);
+ return;
+ }
+ lgr->type = SMC_LGR_SINGLE;
+ del_link_id = lnk->link_id;
+
+ if (lgr->role == SMC_SERV) {
+ /* trigger local delete link processing */
+ } else {
+ if (lgr->llc_flow_lcl.type != SMC_LLC_FLOW_NONE) {
+ /* another llc task is ongoing */
+ mutex_unlock(&lgr->llc_conf_mutex);
+ wait_event_interruptible_timeout(lgr->llc_waiter,
+ (lgr->llc_flow_lcl.type == SMC_LLC_FLOW_NONE),
+ SMC_LLC_WAIT_TIME);
+ mutex_lock(&lgr->llc_conf_mutex);
+ }
+ smc_llc_send_delete_link(to_lnk, del_link_id, SMC_LLC_REQ, true,
+ SMC_LLC_DEL_LOST_PATH);
+ }
+}
+
+/* must be called under lgr->llc_conf_mutex lock */
+void smcr_link_down_cond(struct smc_link *lnk)
+{
+ if (smc_link_downing(&lnk->state))
+ smcr_link_down(lnk);
+}
+
+/* will get the lgr->llc_conf_mutex lock */
+void smcr_link_down_cond_sched(struct smc_link *lnk)
+{
+ if (smc_link_downing(&lnk->state))
+ schedule_work(&lnk->link_down_wrk);
+}
+
+void smcr_port_err(struct smc_ib_device *smcibdev, u8 ibport)
+{
+ struct smc_link_group *lgr, *n;
+ int i;
+
+ list_for_each_entry_safe(lgr, n, &smc_lgr_list.list, list) {
+ if (strncmp(smcibdev->pnetid[ibport - 1], lgr->pnet_id,
+ SMC_MAX_PNETID_LEN))
+ continue; /* lgr is not affected */
+ if (list_empty(&lgr->list))
+ continue;
+ for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
+ struct smc_link *lnk = &lgr->lnk[i];
+
+ if (smc_link_usable(lnk) &&
+ lnk->smcibdev == smcibdev && lnk->ibport == ibport)
+ smcr_link_down_cond_sched(lnk);
+ }
+ }
+}
+
+static void smc_link_up_work(struct work_struct *work)
+{
+ struct smc_ib_up_work *ib_work = container_of(work,
+ struct smc_ib_up_work,
+ work);
+ struct smc_link_group *lgr = ib_work->lgr;
+
+ if (list_empty(&lgr->list))
+ goto out;
+ smcr_link_up(lgr, ib_work->smcibdev, ib_work->ibport);
+out:
+ kfree(ib_work);
+}
+
+static void smc_link_down_work(struct work_struct *work)
+{
+ struct smc_link *link = container_of(work, struct smc_link,
+ link_down_wrk);
+ struct smc_link_group *lgr = link->lgr;
+
+ if (list_empty(&lgr->list))
+ return;
+ wake_up_interruptible_all(&lgr->llc_waiter);
+ mutex_lock(&lgr->llc_conf_mutex);
+ smcr_link_down(link);
+ mutex_unlock(&lgr->llc_conf_mutex);
+}
+
/* Determine vlan of internal TCP socket.
* @vlan_id: address to store the determined vlan id into
*/
@@ -810,15 +1088,21 @@ static bool smcr_lgr_match(struct smc_link_group *lgr,
struct smc_clc_msg_local *lcl,
enum smc_lgr_role role, u32 clcqpn)
{
- return !memcmp(lgr->peer_systemid, lcl->id_for_peer,
- SMC_SYSTEMID_LEN) &&
- !memcmp(lgr->lnk[SMC_SINGLE_LINK].peer_gid, &lcl->gid,
- SMC_GID_SIZE) &&
- !memcmp(lgr->lnk[SMC_SINGLE_LINK].peer_mac, lcl->mac,
- sizeof(lcl->mac)) &&
- lgr->role == role &&
- (lgr->role == SMC_SERV ||
- lgr->lnk[SMC_SINGLE_LINK].peer_qpn == clcqpn);
+ int i;
+
+ if (memcmp(lgr->peer_systemid, lcl->id_for_peer, SMC_SYSTEMID_LEN) ||
+ lgr->role != role)
+ return false;
+
+ for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
+ if (lgr->lnk[i].state != SMC_LNK_ACTIVE)
+ continue;
+ if ((lgr->role == SMC_SERV || lgr->lnk[i].peer_qpn == clcqpn) &&
+ !memcmp(lgr->lnk[i].peer_gid, &lcl->gid, SMC_GID_SIZE) &&
+ !memcmp(lgr->lnk[i].peer_mac, lcl->mac, sizeof(lcl->mac)))
+ return true;
+ }
+ return false;
}
static bool smcd_lgr_match(struct smc_link_group *lgr,
@@ -859,15 +1143,17 @@ int smc_conn_create(struct smc_sock *smc, struct smc_init_info *ini)
/* link group found */
ini->cln_first_contact = SMC_REUSE_CONTACT;
conn->lgr = lgr;
- smc_lgr_register_conn(conn); /* add smc conn to lgr */
- if (delayed_work_pending(&lgr->free_work))
- cancel_delayed_work(&lgr->free_work);
+ rc = smc_lgr_register_conn(conn); /* add conn to lgr */
write_unlock_bh(&lgr->conns_lock);
+ if (!rc && delayed_work_pending(&lgr->free_work))
+ cancel_delayed_work(&lgr->free_work);
break;
}
write_unlock_bh(&lgr->conns_lock);
}
spin_unlock_bh(lgr_lock);
+ if (rc)
+ return rc;
if (role == SMC_CLNT && !ini->srv_first_contact &&
ini->cln_first_contact == SMC_FIRST_CONTACT) {
@@ -885,8 +1171,10 @@ create:
goto out;
lgr = conn->lgr;
write_lock_bh(&lgr->conns_lock);
- smc_lgr_register_conn(conn); /* add smc conn to lgr */
+ rc = smc_lgr_register_conn(conn); /* add smc conn to lgr */
write_unlock_bh(&lgr->conns_lock);
+ if (rc)
+ goto out;
}
conn->local_tx_ctrl.common.type = SMC_CDC_MSG_TYPE;
conn->local_tx_ctrl.len = SMC_WR_TX_SIZE;
@@ -934,19 +1222,19 @@ int smc_uncompress_bufsize(u8 compressed)
* buffer size; if not available, return NULL
*/
static struct smc_buf_desc *smc_buf_get_slot(int compressed_bufsize,
- rwlock_t *lock,
+ struct mutex *lock,
struct list_head *buf_list)
{
struct smc_buf_desc *buf_slot;
- read_lock_bh(lock);
+ mutex_lock(lock);
list_for_each_entry(buf_slot, buf_list, list) {
if (cmpxchg(&buf_slot->used, 0, 1) == 0) {
- read_unlock_bh(lock);
+ mutex_unlock(lock);
return buf_slot;
}
}
- read_unlock_bh(lock);
+ mutex_unlock(lock);
return NULL;
}
@@ -959,12 +1247,135 @@ static inline int smc_rmb_wnd_update_limit(int rmbe_size)
return min_t(int, rmbe_size / 10, SOCK_MIN_SNDBUF / 2);
}
+/* map an rmb buf to a link */
+static int smcr_buf_map_link(struct smc_buf_desc *buf_desc, bool is_rmb,
+ struct smc_link *lnk)
+{
+ int rc;
+
+ if (buf_desc->is_map_ib[lnk->link_idx])
+ return 0;
+
+ rc = sg_alloc_table(&buf_desc->sgt[lnk->link_idx], 1, GFP_KERNEL);
+ if (rc)
+ return rc;
+ sg_set_buf(buf_desc->sgt[lnk->link_idx].sgl,
+ buf_desc->cpu_addr, buf_desc->len);
+
+ /* map sg table to DMA address */
+ rc = smc_ib_buf_map_sg(lnk, buf_desc,
+ is_rmb ? DMA_FROM_DEVICE : DMA_TO_DEVICE);
+ /* SMC protocol depends on mapping to one DMA address only */
+ if (rc != 1) {
+ rc = -EAGAIN;
+ goto free_table;
+ }
+
+ /* create a new memory region for the RMB */
+ if (is_rmb) {
+ rc = smc_ib_get_memory_region(lnk->roce_pd,
+ IB_ACCESS_REMOTE_WRITE |
+ IB_ACCESS_LOCAL_WRITE,
+ buf_desc, lnk->link_idx);
+ if (rc)
+ goto buf_unmap;
+ smc_ib_sync_sg_for_device(lnk, buf_desc, DMA_FROM_DEVICE);
+ }
+ buf_desc->is_map_ib[lnk->link_idx] = true;
+ return 0;
+
+buf_unmap:
+ smc_ib_buf_unmap_sg(lnk, buf_desc,
+ is_rmb ? DMA_FROM_DEVICE : DMA_TO_DEVICE);
+free_table:
+ sg_free_table(&buf_desc->sgt[lnk->link_idx]);
+ return rc;
+}
+
+/* register a new rmb on IB device,
+ * must be called under lgr->llc_conf_mutex lock
+ */
+int smcr_link_reg_rmb(struct smc_link *link, struct smc_buf_desc *rmb_desc)
+{
+ if (list_empty(&link->lgr->list))
+ return -ENOLINK;
+ if (!rmb_desc->is_reg_mr[link->link_idx]) {
+ /* register memory region for new rmb */
+ if (smc_wr_reg_send(link, rmb_desc->mr_rx[link->link_idx])) {
+ rmb_desc->is_reg_err = true;
+ return -EFAULT;
+ }
+ rmb_desc->is_reg_mr[link->link_idx] = true;
+ }
+ return 0;
+}
+
+static int _smcr_buf_map_lgr(struct smc_link *lnk, struct mutex *lock,
+ struct list_head *lst, bool is_rmb)
+{
+ struct smc_buf_desc *buf_desc, *bf;
+ int rc = 0;
+
+ mutex_lock(lock);
+ list_for_each_entry_safe(buf_desc, bf, lst, list) {
+ if (!buf_desc->used)
+ continue;
+ rc = smcr_buf_map_link(buf_desc, is_rmb, lnk);
+ if (rc)
+ goto out;
+ }
+out:
+ mutex_unlock(lock);
+ return rc;
+}
+
+/* map all used buffers of lgr for a new link */
+int smcr_buf_map_lgr(struct smc_link *lnk)
+{
+ struct smc_link_group *lgr = lnk->lgr;
+ int i, rc = 0;
+
+ for (i = 0; i < SMC_RMBE_SIZES; i++) {
+ rc = _smcr_buf_map_lgr(lnk, &lgr->rmbs_lock,
+ &lgr->rmbs[i], true);
+ if (rc)
+ return rc;
+ rc = _smcr_buf_map_lgr(lnk, &lgr->sndbufs_lock,
+ &lgr->sndbufs[i], false);
+ if (rc)
+ return rc;
+ }
+ return 0;
+}
+
+/* register all used buffers of lgr for a new link,
+ * must be called under lgr->llc_conf_mutex lock
+ */
+int smcr_buf_reg_lgr(struct smc_link *lnk)
+{
+ struct smc_link_group *lgr = lnk->lgr;
+ struct smc_buf_desc *buf_desc, *bf;
+ int i, rc = 0;
+
+ mutex_lock(&lgr->rmbs_lock);
+ for (i = 0; i < SMC_RMBE_SIZES; i++) {
+ list_for_each_entry_safe(buf_desc, bf, &lgr->rmbs[i], list) {
+ if (!buf_desc->used)
+ continue;
+ rc = smcr_link_reg_rmb(lnk, buf_desc);
+ if (rc)
+ goto out;
+ }
+ }
+out:
+ mutex_unlock(&lgr->rmbs_lock);
+ return rc;
+}
+
static struct smc_buf_desc *smcr_new_buf_create(struct smc_link_group *lgr,
bool is_rmb, int bufsize)
{
struct smc_buf_desc *buf_desc;
- struct smc_link *lnk;
- int rc;
/* try to alloc a new buffer */
buf_desc = kzalloc(sizeof(*buf_desc), GFP_KERNEL);
@@ -981,41 +1392,33 @@ static struct smc_buf_desc *smcr_new_buf_create(struct smc_link_group *lgr,
return ERR_PTR(-EAGAIN);
}
buf_desc->cpu_addr = (void *)page_address(buf_desc->pages);
+ buf_desc->len = bufsize;
+ return buf_desc;
+}
- /* build the sg table from the pages */
- lnk = &lgr->lnk[SMC_SINGLE_LINK];
- rc = sg_alloc_table(&buf_desc->sgt[SMC_SINGLE_LINK], 1,
- GFP_KERNEL);
- if (rc) {
- smc_buf_free(lgr, is_rmb, buf_desc);
- return ERR_PTR(rc);
- }
- sg_set_buf(buf_desc->sgt[SMC_SINGLE_LINK].sgl,
- buf_desc->cpu_addr, bufsize);
+/* map buf_desc on all usable links,
+ * unused buffers stay mapped as long as the link is up
+ */
+static int smcr_buf_map_usable_links(struct smc_link_group *lgr,
+ struct smc_buf_desc *buf_desc, bool is_rmb)
+{
+ int i, rc = 0;
- /* map sg table to DMA address */
- rc = smc_ib_buf_map_sg(lnk->smcibdev, buf_desc,
- is_rmb ? DMA_FROM_DEVICE : DMA_TO_DEVICE);
- /* SMC protocol depends on mapping to one DMA address only */
- if (rc != 1) {
- smc_buf_free(lgr, is_rmb, buf_desc);
- return ERR_PTR(-EAGAIN);
- }
+ /* protect against parallel link reconfiguration */
+ mutex_lock(&lgr->llc_conf_mutex);
+ for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
+ struct smc_link *lnk = &lgr->lnk[i];
- /* create a new memory region for the RMB */
- if (is_rmb) {
- rc = smc_ib_get_memory_region(lnk->roce_pd,
- IB_ACCESS_REMOTE_WRITE |
- IB_ACCESS_LOCAL_WRITE,
- buf_desc);
- if (rc) {
- smc_buf_free(lgr, is_rmb, buf_desc);
- return ERR_PTR(rc);
+ if (!smc_link_usable(lnk))
+ continue;
+ if (smcr_buf_map_link(buf_desc, is_rmb, lnk)) {
+ rc = -ENOMEM;
+ goto out;
}
}
-
- buf_desc->len = bufsize;
- return buf_desc;
+out:
+ mutex_unlock(&lgr->llc_conf_mutex);
+ return rc;
}
#define SMCD_DMBE_SIZES 7 /* 0 -> 16KB, 1 -> 32KB, .. 6 -> 1MB */
@@ -1062,8 +1465,8 @@ static int __smc_buf_create(struct smc_sock *smc, bool is_smcd, bool is_rmb)
struct smc_link_group *lgr = conn->lgr;
struct list_head *buf_list;
int bufsize, bufsize_short;
+ struct mutex *lock; /* lock buffer list */
int sk_buf_size;
- rwlock_t *lock;
if (is_rmb)
/* use socket recv buffer size (w/o overhead) as start value */
@@ -1104,15 +1507,22 @@ static int __smc_buf_create(struct smc_sock *smc, bool is_smcd, bool is_rmb)
continue;
buf_desc->used = 1;
- write_lock_bh(lock);
+ mutex_lock(lock);
list_add(&buf_desc->list, buf_list);
- write_unlock_bh(lock);
+ mutex_unlock(lock);
break; /* found */
}
if (IS_ERR(buf_desc))
return -ENOMEM;
+ if (!is_smcd) {
+ if (smcr_buf_map_usable_links(lgr, buf_desc, is_rmb)) {
+ smcr_buf_unuse(buf_desc, lgr);
+ return -ENOMEM;
+ }
+ }
+
if (is_rmb) {
conn->rmb_desc = buf_desc;
conn->rmbe_size_short = bufsize_short;
@@ -1132,42 +1542,44 @@ static int __smc_buf_create(struct smc_sock *smc, bool is_smcd, bool is_rmb)
void smc_sndbuf_sync_sg_for_cpu(struct smc_connection *conn)
{
- struct smc_link_group *lgr = conn->lgr;
-
- if (!conn->lgr || conn->lgr->is_smcd)
+ if (!conn->lgr || conn->lgr->is_smcd || !smc_link_usable(conn->lnk))
return;
- smc_ib_sync_sg_for_cpu(lgr->lnk[SMC_SINGLE_LINK].smcibdev,
- conn->sndbuf_desc, DMA_TO_DEVICE);
+ smc_ib_sync_sg_for_cpu(conn->lnk, conn->sndbuf_desc, DMA_TO_DEVICE);
}
void smc_sndbuf_sync_sg_for_device(struct smc_connection *conn)
{
- struct smc_link_group *lgr = conn->lgr;
-
- if (!conn->lgr || conn->lgr->is_smcd)
+ if (!conn->lgr || conn->lgr->is_smcd || !smc_link_usable(conn->lnk))
return;
- smc_ib_sync_sg_for_device(lgr->lnk[SMC_SINGLE_LINK].smcibdev,
- conn->sndbuf_desc, DMA_TO_DEVICE);
+ smc_ib_sync_sg_for_device(conn->lnk, conn->sndbuf_desc, DMA_TO_DEVICE);
}
void smc_rmb_sync_sg_for_cpu(struct smc_connection *conn)
{
- struct smc_link_group *lgr = conn->lgr;
+ int i;
if (!conn->lgr || conn->lgr->is_smcd)
return;
- smc_ib_sync_sg_for_cpu(lgr->lnk[SMC_SINGLE_LINK].smcibdev,
- conn->rmb_desc, DMA_FROM_DEVICE);
+ for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
+ if (!smc_link_usable(&conn->lgr->lnk[i]))
+ continue;
+ smc_ib_sync_sg_for_cpu(&conn->lgr->lnk[i], conn->rmb_desc,
+ DMA_FROM_DEVICE);
+ }
}
void smc_rmb_sync_sg_for_device(struct smc_connection *conn)
{
- struct smc_link_group *lgr = conn->lgr;
+ int i;
if (!conn->lgr || conn->lgr->is_smcd)
return;
- smc_ib_sync_sg_for_device(lgr->lnk[SMC_SINGLE_LINK].smcibdev,
- conn->rmb_desc, DMA_FROM_DEVICE);
+ for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
+ if (!smc_link_usable(&conn->lgr->lnk[i]))
+ continue;
+ smc_ib_sync_sg_for_device(&conn->lgr->lnk[i], conn->rmb_desc,
+ DMA_FROM_DEVICE);
+ }
}
/* create the send and receive buffer for an SMC socket;
@@ -1202,16 +1614,64 @@ static inline int smc_rmb_reserve_rtoken_idx(struct smc_link_group *lgr)
return -ENOSPC;
}
+static int smc_rtoken_find_by_link(struct smc_link_group *lgr, int lnk_idx,
+ u32 rkey)
+{
+ int i;
+
+ for (i = 0; i < SMC_RMBS_PER_LGR_MAX; i++) {
+ if (test_bit(i, lgr->rtokens_used_mask) &&
+ lgr->rtokens[i][lnk_idx].rkey == rkey)
+ return i;
+ }
+ return -ENOENT;
+}
+
+/* set rtoken for a new link to an existing rmb */
+void smc_rtoken_set(struct smc_link_group *lgr, int link_idx, int link_idx_new,
+ __be32 nw_rkey_known, __be64 nw_vaddr, __be32 nw_rkey)
+{
+ int rtok_idx;
+
+ rtok_idx = smc_rtoken_find_by_link(lgr, link_idx, ntohl(nw_rkey_known));
+ if (rtok_idx == -ENOENT)
+ return;
+ lgr->rtokens[rtok_idx][link_idx_new].rkey = ntohl(nw_rkey);
+ lgr->rtokens[rtok_idx][link_idx_new].dma_addr = be64_to_cpu(nw_vaddr);
+}
+
+/* set rtoken for a new link whose link_id is given */
+void smc_rtoken_set2(struct smc_link_group *lgr, int rtok_idx, int link_id,
+ __be64 nw_vaddr, __be32 nw_rkey)
+{
+ u64 dma_addr = be64_to_cpu(nw_vaddr);
+ u32 rkey = ntohl(nw_rkey);
+ bool found = false;
+ int link_idx;
+
+ for (link_idx = 0; link_idx < SMC_LINKS_PER_LGR_MAX; link_idx++) {
+ if (lgr->lnk[link_idx].link_id == link_id) {
+ found = true;
+ break;
+ }
+ }
+ if (!found)
+ return;
+ lgr->rtokens[rtok_idx][link_idx].rkey = rkey;
+ lgr->rtokens[rtok_idx][link_idx].dma_addr = dma_addr;
+}
+
/* add a new rtoken from peer */
-int smc_rtoken_add(struct smc_link_group *lgr, __be64 nw_vaddr, __be32 nw_rkey)
+int smc_rtoken_add(struct smc_link *lnk, __be64 nw_vaddr, __be32 nw_rkey)
{
+ struct smc_link_group *lgr = smc_get_lgr(lnk);
u64 dma_addr = be64_to_cpu(nw_vaddr);
u32 rkey = ntohl(nw_rkey);
int i;
for (i = 0; i < SMC_RMBS_PER_LGR_MAX; i++) {
- if ((lgr->rtokens[i][SMC_SINGLE_LINK].rkey == rkey) &&
- (lgr->rtokens[i][SMC_SINGLE_LINK].dma_addr == dma_addr) &&
+ if (lgr->rtokens[i][lnk->link_idx].rkey == rkey &&
+ lgr->rtokens[i][lnk->link_idx].dma_addr == dma_addr &&
test_bit(i, lgr->rtokens_used_mask)) {
/* already in list */
return i;
@@ -1220,23 +1680,25 @@ int smc_rtoken_add(struct smc_link_group *lgr, __be64 nw_vaddr, __be32 nw_rkey)
i = smc_rmb_reserve_rtoken_idx(lgr);
if (i < 0)
return i;
- lgr->rtokens[i][SMC_SINGLE_LINK].rkey = rkey;
- lgr->rtokens[i][SMC_SINGLE_LINK].dma_addr = dma_addr;
+ lgr->rtokens[i][lnk->link_idx].rkey = rkey;
+ lgr->rtokens[i][lnk->link_idx].dma_addr = dma_addr;
return i;
}
-/* delete an rtoken */
-int smc_rtoken_delete(struct smc_link_group *lgr, __be32 nw_rkey)
+/* delete an rtoken from all links */
+int smc_rtoken_delete(struct smc_link *lnk, __be32 nw_rkey)
{
+ struct smc_link_group *lgr = smc_get_lgr(lnk);
u32 rkey = ntohl(nw_rkey);
- int i;
+ int i, j;
for (i = 0; i < SMC_RMBS_PER_LGR_MAX; i++) {
- if (lgr->rtokens[i][SMC_SINGLE_LINK].rkey == rkey &&
+ if (lgr->rtokens[i][lnk->link_idx].rkey == rkey &&
test_bit(i, lgr->rtokens_used_mask)) {
- lgr->rtokens[i][SMC_SINGLE_LINK].rkey = 0;
- lgr->rtokens[i][SMC_SINGLE_LINK].dma_addr = 0;
-
+ for (j = 0; j < SMC_LINKS_PER_LGR_MAX; j++) {
+ lgr->rtokens[i][j].rkey = 0;
+ lgr->rtokens[i][j].dma_addr = 0;
+ }
clear_bit(i, lgr->rtokens_used_mask);
return 0;
}
@@ -1246,9 +1708,10 @@ int smc_rtoken_delete(struct smc_link_group *lgr, __be32 nw_rkey)
/* save rkey and dma_addr received from peer during clc handshake */
int smc_rmb_rtoken_handling(struct smc_connection *conn,
+ struct smc_link *lnk,
struct smc_clc_msg_accept_confirm *clc)
{
- conn->rtoken_idx = smc_rtoken_add(conn->lgr, clc->rmb_dma_addr,
+ conn->rtoken_idx = smc_rtoken_add(lnk, clc->rmb_dma_addr,
clc->rmb_rkey);
if (conn->rtoken_idx < 0)
return conn->rtoken_idx;
diff --git a/net/smc/smc_core.h b/net/smc/smc_core.h
index 8041db20c753..555ada9d2423 100644
--- a/net/smc/smc_core.h
+++ b/net/smc/smc_core.h
@@ -32,10 +32,10 @@ enum smc_lgr_role { /* possible roles of a link group */
};
enum smc_link_state { /* possible states of a link */
+ SMC_LNK_UNUSED, /* link is unused */
SMC_LNK_INACTIVE, /* link is inactive */
SMC_LNK_ACTIVATING, /* link is being activated */
SMC_LNK_ACTIVE, /* link is active */
- SMC_LNK_DELETING, /* link is being deleted */
};
#define SMC_WR_BUF_SIZE 48 /* size of work request buffer */
@@ -115,29 +115,20 @@ struct smc_link {
u8 peer_mac[ETH_ALEN]; /* = gid[8:10||13:15] */
u8 peer_gid[SMC_GID_SIZE]; /* gid of peer*/
u8 link_id; /* unique # within link group */
+ u8 link_idx; /* index in lgr link array */
+ struct smc_link_group *lgr; /* parent link group */
+ struct work_struct link_down_wrk; /* wrk to bring link down */
enum smc_link_state state; /* state of link */
- struct workqueue_struct *llc_wq; /* single thread work queue */
- struct completion llc_confirm; /* wait for rx of conf link */
- struct completion llc_confirm_resp; /* wait 4 rx of cnf lnk rsp */
- int llc_confirm_rc; /* rc from confirm link msg */
- int llc_confirm_resp_rc; /* rc from conf_resp msg */
- struct completion llc_add; /* wait for rx of add link */
- struct completion llc_add_resp; /* wait for rx of add link rsp*/
struct delayed_work llc_testlink_wrk; /* testlink worker */
struct completion llc_testlink_resp; /* wait for rx of testlink */
int llc_testlink_time; /* testlink interval */
- struct completion llc_confirm_rkey; /* wait 4 rx of cnf rkey */
- int llc_confirm_rkey_rc; /* rc from cnf rkey msg */
- struct completion llc_delete_rkey; /* wait 4 rx of del rkey */
- int llc_delete_rkey_rc; /* rc from del rkey msg */
- struct mutex llc_delete_rkey_mutex; /* serialize usage */
};
/* For now we just allow one parallel link per link group. The SMC protocol
* allows more (up to 8).
*/
-#define SMC_LINKS_PER_LGR_MAX 1
+#define SMC_LINKS_PER_LGR_MAX 3
#define SMC_SINGLE_LINK 0
#define SMC_FIRST_CONTACT 1 /* first contact to a peer */
@@ -150,25 +141,32 @@ struct smc_buf_desc {
struct page *pages;
int len; /* length of buffer */
u32 used; /* currently used / unused */
- u8 wr_reg : 1; /* mem region registered */
- u8 regerr : 1; /* err during registration */
union {
struct { /* SMC-R */
- struct sg_table sgt[SMC_LINKS_PER_LGR_MAX];
- /* virtual buffer */
- struct ib_mr *mr_rx[SMC_LINKS_PER_LGR_MAX];
- /* for rmb only: memory region
- * incl. rkey provided to peer
- */
- u32 order; /* allocation order */
+ struct sg_table sgt[SMC_LINKS_PER_LGR_MAX];
+ /* virtual buffer */
+ struct ib_mr *mr_rx[SMC_LINKS_PER_LGR_MAX];
+ /* for rmb only: memory region
+ * incl. rkey provided to peer
+ */
+ u32 order; /* allocation order */
+
+ u8 is_conf_rkey;
+ /* confirm_rkey done */
+ u8 is_reg_mr[SMC_LINKS_PER_LGR_MAX];
+ /* mem region registered */
+ u8 is_map_ib[SMC_LINKS_PER_LGR_MAX];
+ /* mem region mapped to lnk */
+ u8 is_reg_err;
+ /* buffer registration err */
};
struct { /* SMC-D */
- unsigned short sba_idx;
- /* SBA index number */
- u64 token;
- /* DMB token number */
- dma_addr_t dma_addr;
- /* DMA address */
+ unsigned short sba_idx;
+ /* SBA index number */
+ u64 token;
+ /* DMB token number */
+ dma_addr_t dma_addr;
+ /* DMA address */
};
};
};
@@ -188,6 +186,28 @@ struct smc_rtoken { /* address/key of remote RMB */
struct smcd_dev;
+enum smc_lgr_type { /* redundancy state of lgr */
+ SMC_LGR_NONE, /* no active links, lgr to be deleted */
+ SMC_LGR_SINGLE, /* 1 active RNIC on each peer */
+ SMC_LGR_SYMMETRIC, /* 2 active RNICs on each peer */
+ SMC_LGR_ASYMMETRIC_PEER, /* local has 2, peer 1 active RNICs */
+ SMC_LGR_ASYMMETRIC_LOCAL, /* local has 1, peer 2 active RNICs */
+};
+
+enum smc_llc_flowtype {
+ SMC_LLC_FLOW_NONE = 0,
+ SMC_LLC_FLOW_ADD_LINK = 2,
+ SMC_LLC_FLOW_DEL_LINK = 4,
+ SMC_LLC_FLOW_RKEY = 6,
+};
+
+struct smc_llc_qentry;
+
+struct smc_llc_flow {
+ enum smc_llc_flowtype type;
+ struct smc_llc_qentry *qentry;
+};
+
struct smc_link_group {
struct list_head list;
struct rb_root conns_all; /* connection tree */
@@ -196,9 +216,9 @@ struct smc_link_group {
unsigned short vlan_id; /* vlan id of link group */
struct list_head sndbufs[SMC_RMBE_SIZES];/* tx buffers */
- rwlock_t sndbufs_lock; /* protects tx buffers */
+ struct mutex sndbufs_lock; /* protects tx buffers */
struct list_head rmbs[SMC_RMBE_SIZES]; /* rx buffers */
- rwlock_t rmbs_lock; /* protects rx buffers */
+ struct mutex rmbs_lock; /* protects rx buffers */
u8 id[SMC_LGR_ID_SIZE]; /* unique lgr id */
struct delayed_work free_work; /* delayed freeing of an lgr */
@@ -222,6 +242,32 @@ struct smc_link_group {
/* remote addr/key pairs */
DECLARE_BITMAP(rtokens_used_mask, SMC_RMBS_PER_LGR_MAX);
/* used rtoken elements */
+ u8 next_link_id;
+ enum smc_lgr_type type;
+ /* redundancy state */
+ u8 pnet_id[SMC_MAX_PNETID_LEN + 1];
+ /* pnet id of this lgr */
+ struct list_head llc_event_q;
+ /* queue for llc events */
+ spinlock_t llc_event_q_lock;
+ /* protects llc_event_q */
+ struct mutex llc_conf_mutex;
+ /* protects lgr reconfig. */
+ struct work_struct llc_add_link_work;
+ struct work_struct llc_event_work;
+ /* llc event worker */
+ wait_queue_head_t llc_waiter;
+ /* w4 next llc event */
+ struct smc_llc_flow llc_flow_lcl;
+ /* llc local control field */
+ struct smc_llc_flow llc_flow_rmt;
+ /* llc remote control field */
+ struct smc_llc_qentry *delayed_event;
+ /* arrived when flow active */
+ spinlock_t llc_flow_lock;
+ /* protects llc flow */
+ int llc_testlink_time;
+ /* link keep alive time */
};
struct { /* SMC-D */
u64 peer_gid;
@@ -285,6 +331,14 @@ static inline struct smc_connection *smc_lgr_find_conn(
return res;
}
+/* returns true if the specified link is usable */
+static inline bool smc_link_usable(struct smc_link *lnk)
+{
+ if (lnk->state == SMC_LNK_UNUSED || lnk->state == SMC_LNK_INACTIVE)
+ return false;
+ return true;
+}
+
struct smc_sock;
struct smc_clc_msg_accept_confirm;
struct smc_clc_msg_local;
@@ -292,17 +346,22 @@ struct smc_clc_msg_local;
void smc_lgr_forget(struct smc_link_group *lgr);
void smc_lgr_cleanup_early(struct smc_connection *conn);
void smc_lgr_terminate_sched(struct smc_link_group *lgr);
-void smc_port_terminate(struct smc_ib_device *smcibdev, u8 ibport);
+void smcr_port_add(struct smc_ib_device *smcibdev, u8 ibport);
+void smcr_port_err(struct smc_ib_device *smcibdev, u8 ibport);
void smc_smcd_terminate(struct smcd_dev *dev, u64 peer_gid,
unsigned short vlan);
void smc_smcd_terminate_all(struct smcd_dev *dev);
void smc_smcr_terminate_all(struct smc_ib_device *smcibdev);
int smc_buf_create(struct smc_sock *smc, bool is_smcd);
int smc_uncompress_bufsize(u8 compressed);
-int smc_rmb_rtoken_handling(struct smc_connection *conn,
+int smc_rmb_rtoken_handling(struct smc_connection *conn, struct smc_link *link,
struct smc_clc_msg_accept_confirm *clc);
-int smc_rtoken_add(struct smc_link_group *lgr, __be64 nw_vaddr, __be32 nw_rkey);
-int smc_rtoken_delete(struct smc_link_group *lgr, __be32 nw_rkey);
+int smc_rtoken_add(struct smc_link *lnk, __be64 nw_vaddr, __be32 nw_rkey);
+int smc_rtoken_delete(struct smc_link *lnk, __be32 nw_rkey);
+void smc_rtoken_set(struct smc_link_group *lgr, int link_idx, int link_idx_new,
+ __be32 nw_rkey_known, __be64 nw_vaddr, __be32 nw_rkey);
+void smc_rtoken_set2(struct smc_link_group *lgr, int rtok_idx, int link_id,
+ __be64 nw_vaddr, __be32 nw_rkey);
void smc_sndbuf_sync_sg_for_cpu(struct smc_connection *conn);
void smc_sndbuf_sync_sg_for_device(struct smc_connection *conn);
void smc_rmb_sync_sg_for_cpu(struct smc_connection *conn);
@@ -315,8 +374,15 @@ void smc_lgr_schedule_free_work_fast(struct smc_link_group *lgr);
int smc_core_init(void);
void smc_core_exit(void);
+void smcr_link_clear(struct smc_link *lnk);
+int smcr_buf_map_lgr(struct smc_link *lnk);
+int smcr_buf_reg_lgr(struct smc_link *lnk);
+int smcr_link_reg_rmb(struct smc_link *link, struct smc_buf_desc *rmb_desc);
+void smcr_link_down_cond(struct smc_link *lnk);
+void smcr_link_down_cond_sched(struct smc_link *lnk);
+
static inline struct smc_link_group *smc_get_lgr(struct smc_link *link)
{
- return container_of(link, struct smc_link_group, lnk[SMC_SINGLE_LINK]);
+ return link->lgr;
}
#endif
diff --git a/net/smc/smc_ib.c b/net/smc/smc_ib.c
index 04b6fefb8bce..2c743caad69a 100644
--- a/net/smc/smc_ib.c
+++ b/net/smc/smc_ib.c
@@ -249,9 +249,10 @@ static void smc_ib_port_event_work(struct work_struct *work)
clear_bit(port_idx, &smcibdev->port_event_mask);
if (!smc_ib_port_active(smcibdev, port_idx + 1)) {
set_bit(port_idx, smcibdev->ports_going_away);
- smc_port_terminate(smcibdev, port_idx + 1);
+ smcr_port_err(smcibdev, port_idx + 1);
} else {
clear_bit(port_idx, smcibdev->ports_going_away);
+ smcr_port_add(smcibdev, port_idx + 1);
}
}
}
@@ -389,15 +390,15 @@ void smc_ib_put_memory_region(struct ib_mr *mr)
ib_dereg_mr(mr);
}
-static int smc_ib_map_mr_sg(struct smc_buf_desc *buf_slot)
+static int smc_ib_map_mr_sg(struct smc_buf_desc *buf_slot, u8 link_idx)
{
unsigned int offset = 0;
int sg_num;
/* map the largest prefix of a dma mapped SG list */
- sg_num = ib_map_mr_sg(buf_slot->mr_rx[SMC_SINGLE_LINK],
- buf_slot->sgt[SMC_SINGLE_LINK].sgl,
- buf_slot->sgt[SMC_SINGLE_LINK].orig_nents,
+ sg_num = ib_map_mr_sg(buf_slot->mr_rx[link_idx],
+ buf_slot->sgt[link_idx].sgl,
+ buf_slot->sgt[link_idx].orig_nents,
&offset, PAGE_SIZE);
return sg_num;
@@ -405,29 +406,29 @@ static int smc_ib_map_mr_sg(struct smc_buf_desc *buf_slot)
/* Allocate a memory region and map the dma mapped SG list of buf_slot */
int smc_ib_get_memory_region(struct ib_pd *pd, int access_flags,
- struct smc_buf_desc *buf_slot)
+ struct smc_buf_desc *buf_slot, u8 link_idx)
{
- if (buf_slot->mr_rx[SMC_SINGLE_LINK])
+ if (buf_slot->mr_rx[link_idx])
return 0; /* already done */
- buf_slot->mr_rx[SMC_SINGLE_LINK] =
+ buf_slot->mr_rx[link_idx] =
ib_alloc_mr(pd, IB_MR_TYPE_MEM_REG, 1 << buf_slot->order);
- if (IS_ERR(buf_slot->mr_rx[SMC_SINGLE_LINK])) {
+ if (IS_ERR(buf_slot->mr_rx[link_idx])) {
int rc;
- rc = PTR_ERR(buf_slot->mr_rx[SMC_SINGLE_LINK]);
- buf_slot->mr_rx[SMC_SINGLE_LINK] = NULL;
+ rc = PTR_ERR(buf_slot->mr_rx[link_idx]);
+ buf_slot->mr_rx[link_idx] = NULL;
return rc;
}
- if (smc_ib_map_mr_sg(buf_slot) != 1)
+ if (smc_ib_map_mr_sg(buf_slot, link_idx) != 1)
return -EINVAL;
return 0;
}
/* synchronize buffer usage for cpu access */
-void smc_ib_sync_sg_for_cpu(struct smc_ib_device *smcibdev,
+void smc_ib_sync_sg_for_cpu(struct smc_link *lnk,
struct smc_buf_desc *buf_slot,
enum dma_data_direction data_direction)
{
@@ -435,11 +436,11 @@ void smc_ib_sync_sg_for_cpu(struct smc_ib_device *smcibdev,
unsigned int i;
/* for now there is just one DMA address */
- for_each_sg(buf_slot->sgt[SMC_SINGLE_LINK].sgl, sg,
- buf_slot->sgt[SMC_SINGLE_LINK].nents, i) {
+ for_each_sg(buf_slot->sgt[lnk->link_idx].sgl, sg,
+ buf_slot->sgt[lnk->link_idx].nents, i) {
if (!sg_dma_len(sg))
break;
- ib_dma_sync_single_for_cpu(smcibdev->ibdev,
+ ib_dma_sync_single_for_cpu(lnk->smcibdev->ibdev,
sg_dma_address(sg),
sg_dma_len(sg),
data_direction);
@@ -447,7 +448,7 @@ void smc_ib_sync_sg_for_cpu(struct smc_ib_device *smcibdev,
}
/* synchronize buffer usage for device access */
-void smc_ib_sync_sg_for_device(struct smc_ib_device *smcibdev,
+void smc_ib_sync_sg_for_device(struct smc_link *lnk,
struct smc_buf_desc *buf_slot,
enum dma_data_direction data_direction)
{
@@ -455,11 +456,11 @@ void smc_ib_sync_sg_for_device(struct smc_ib_device *smcibdev,
unsigned int i;
/* for now there is just one DMA address */
- for_each_sg(buf_slot->sgt[SMC_SINGLE_LINK].sgl, sg,
- buf_slot->sgt[SMC_SINGLE_LINK].nents, i) {
+ for_each_sg(buf_slot->sgt[lnk->link_idx].sgl, sg,
+ buf_slot->sgt[lnk->link_idx].nents, i) {
if (!sg_dma_len(sg))
break;
- ib_dma_sync_single_for_device(smcibdev->ibdev,
+ ib_dma_sync_single_for_device(lnk->smcibdev->ibdev,
sg_dma_address(sg),
sg_dma_len(sg),
data_direction);
@@ -467,15 +468,15 @@ void smc_ib_sync_sg_for_device(struct smc_ib_device *smcibdev,
}
/* Map a new TX or RX buffer SG-table to DMA */
-int smc_ib_buf_map_sg(struct smc_ib_device *smcibdev,
+int smc_ib_buf_map_sg(struct smc_link *lnk,
struct smc_buf_desc *buf_slot,
enum dma_data_direction data_direction)
{
int mapped_nents;
- mapped_nents = ib_dma_map_sg(smcibdev->ibdev,
- buf_slot->sgt[SMC_SINGLE_LINK].sgl,
- buf_slot->sgt[SMC_SINGLE_LINK].orig_nents,
+ mapped_nents = ib_dma_map_sg(lnk->smcibdev->ibdev,
+ buf_slot->sgt[lnk->link_idx].sgl,
+ buf_slot->sgt[lnk->link_idx].orig_nents,
data_direction);
if (!mapped_nents)
return -ENOMEM;
@@ -483,18 +484,18 @@ int smc_ib_buf_map_sg(struct smc_ib_device *smcibdev,
return mapped_nents;
}
-void smc_ib_buf_unmap_sg(struct smc_ib_device *smcibdev,
+void smc_ib_buf_unmap_sg(struct smc_link *lnk,
struct smc_buf_desc *buf_slot,
enum dma_data_direction data_direction)
{
- if (!buf_slot->sgt[SMC_SINGLE_LINK].sgl->dma_address)
+ if (!buf_slot->sgt[lnk->link_idx].sgl->dma_address)
return; /* already unmapped */
- ib_dma_unmap_sg(smcibdev->ibdev,
- buf_slot->sgt[SMC_SINGLE_LINK].sgl,
- buf_slot->sgt[SMC_SINGLE_LINK].orig_nents,
+ ib_dma_unmap_sg(lnk->smcibdev->ibdev,
+ buf_slot->sgt[lnk->link_idx].sgl,
+ buf_slot->sgt[lnk->link_idx].orig_nents,
data_direction);
- buf_slot->sgt[SMC_SINGLE_LINK].sgl->dma_address = 0;
+ buf_slot->sgt[lnk->link_idx].sgl->dma_address = 0;
}
long smc_ib_setup_per_ibdev(struct smc_ib_device *smcibdev)
@@ -579,8 +580,9 @@ static void smc_ib_add_dev(struct ib_device *ibdev)
i++) {
set_bit(i, &smcibdev->port_event_mask);
/* determine pnetids of the port */
- smc_pnetid_by_dev_port(ibdev->dev.parent, i,
- smcibdev->pnetid[i]);
+ if (smc_pnetid_by_dev_port(ibdev->dev.parent, i,
+ smcibdev->pnetid[i]))
+ smc_pnetid_by_table_ib(smcibdev, i + 1);
}
schedule_work(&smcibdev->port_event_work);
}
diff --git a/net/smc/smc_ib.h b/net/smc/smc_ib.h
index 5c2b115d36da..e6a696ae15f3 100644
--- a/net/smc/smc_ib.h
+++ b/net/smc/smc_ib.h
@@ -59,10 +59,10 @@ struct smc_link;
int smc_ib_register_client(void) __init;
void smc_ib_unregister_client(void);
bool smc_ib_port_active(struct smc_ib_device *smcibdev, u8 ibport);
-int smc_ib_buf_map_sg(struct smc_ib_device *smcibdev,
+int smc_ib_buf_map_sg(struct smc_link *lnk,
struct smc_buf_desc *buf_slot,
enum dma_data_direction data_direction);
-void smc_ib_buf_unmap_sg(struct smc_ib_device *smcibdev,
+void smc_ib_buf_unmap_sg(struct smc_link *lnk,
struct smc_buf_desc *buf_slot,
enum dma_data_direction data_direction);
void smc_ib_dealloc_protection_domain(struct smc_link *lnk);
@@ -74,12 +74,12 @@ int smc_ib_modify_qp_rts(struct smc_link *lnk);
int smc_ib_modify_qp_reset(struct smc_link *lnk);
long smc_ib_setup_per_ibdev(struct smc_ib_device *smcibdev);
int smc_ib_get_memory_region(struct ib_pd *pd, int access_flags,
- struct smc_buf_desc *buf_slot);
+ struct smc_buf_desc *buf_slot, u8 link_idx);
void smc_ib_put_memory_region(struct ib_mr *mr);
-void smc_ib_sync_sg_for_cpu(struct smc_ib_device *smcibdev,
+void smc_ib_sync_sg_for_cpu(struct smc_link *lnk,
struct smc_buf_desc *buf_slot,
enum dma_data_direction data_direction);
-void smc_ib_sync_sg_for_device(struct smc_ib_device *smcibdev,
+void smc_ib_sync_sg_for_device(struct smc_link *lnk,
struct smc_buf_desc *buf_slot,
enum dma_data_direction data_direction);
int smc_ib_determine_gid(struct smc_ib_device *smcibdev, u8 ibport,
diff --git a/net/smc/smc_ism.c b/net/smc/smc_ism.c
index 5c4727d5066e..32be2da2cb85 100644
--- a/net/smc/smc_ism.c
+++ b/net/smc/smc_ism.c
@@ -296,7 +296,8 @@ struct smcd_dev *smcd_alloc_dev(struct device *parent, const char *name,
device_initialize(&smcd->dev);
dev_set_name(&smcd->dev, name);
smcd->ops = ops;
- smc_pnetid_by_dev_port(parent, 0, smcd->pnetid);
+ if (smc_pnetid_by_dev_port(parent, 0, smcd->pnetid))
+ smc_pnetid_by_table_smcd(smcd);
spin_lock_init(&smcd->lock);
spin_lock_init(&smcd->lgr_lock);
diff --git a/net/smc/smc_llc.c b/net/smc/smc_llc.c
index 0e52aab53d97..50f59746bdf9 100644
--- a/net/smc/smc_llc.c
+++ b/net/smc/smc_llc.c
@@ -58,7 +58,13 @@ struct smc_llc_msg_add_link { /* type 0x02 */
u8 sender_gid[SMC_GID_SIZE];
u8 sender_qp_num[3];
u8 link_num;
- u8 flags2; /* QP mtu */
+#if defined(__BIG_ENDIAN_BITFIELD)
+ u8 reserved3 : 4,
+ qp_mtu : 4;
+#elif defined(__LITTLE_ENDIAN_BITFIELD)
+ u8 qp_mtu : 4,
+ reserved3 : 4;
+#endif
u8 initial_psn[3];
u8 reserved[8];
};
@@ -98,13 +104,8 @@ struct smc_llc_msg_confirm_rkey { /* type 0x06 */
u8 reserved;
};
-struct smc_llc_msg_confirm_rkey_cont { /* type 0x08 */
- struct smc_llc_hdr hd;
- u8 num_rkeys;
- struct smc_rmb_rtoken rtoken[SMC_LLC_RKEYS_PER_MSG];
-};
-
#define SMC_LLC_DEL_RKEY_MAX 8
+#define SMC_LLC_FLAG_RKEY_RETRY 0x10
#define SMC_LLC_FLAG_RKEY_NEG 0x20
struct smc_llc_msg_delete_rkey { /* type 0x09 */
@@ -122,7 +123,6 @@ union smc_llc_msg {
struct smc_llc_msg_del_link delete_link;
struct smc_llc_msg_confirm_rkey confirm_rkey;
- struct smc_llc_msg_confirm_rkey_cont confirm_rkey_cont;
struct smc_llc_msg_delete_rkey delete_rkey;
struct smc_llc_msg_test_link test_link;
@@ -134,6 +134,160 @@ union smc_llc_msg {
#define SMC_LLC_FLAG_RESP 0x80
+struct smc_llc_qentry {
+ struct list_head list;
+ struct smc_link *link;
+ union smc_llc_msg msg;
+};
+
+struct smc_llc_qentry *smc_llc_flow_qentry_clr(struct smc_llc_flow *flow)
+{
+ struct smc_llc_qentry *qentry = flow->qentry;
+
+ flow->qentry = NULL;
+ return qentry;
+}
+
+void smc_llc_flow_qentry_del(struct smc_llc_flow *flow)
+{
+ struct smc_llc_qentry *qentry;
+
+ if (flow->qentry) {
+ qentry = flow->qentry;
+ flow->qentry = NULL;
+ kfree(qentry);
+ }
+}
+
+static inline void smc_llc_flow_qentry_set(struct smc_llc_flow *flow,
+ struct smc_llc_qentry *qentry)
+{
+ flow->qentry = qentry;
+}
+
+/* try to start a new llc flow, initiated by an incoming llc msg */
+static bool smc_llc_flow_start(struct smc_llc_flow *flow,
+ struct smc_llc_qentry *qentry)
+{
+ struct smc_link_group *lgr = qentry->link->lgr;
+
+ spin_lock_bh(&lgr->llc_flow_lock);
+ if (flow->type) {
+ /* a flow is already active */
+ if ((qentry->msg.raw.hdr.common.type == SMC_LLC_ADD_LINK ||
+ qentry->msg.raw.hdr.common.type == SMC_LLC_DELETE_LINK) &&
+ !lgr->delayed_event) {
+ lgr->delayed_event = qentry;
+ } else {
+ /* forget this llc request */
+ kfree(qentry);
+ }
+ spin_unlock_bh(&lgr->llc_flow_lock);
+ return false;
+ }
+ switch (qentry->msg.raw.hdr.common.type) {
+ case SMC_LLC_ADD_LINK:
+ flow->type = SMC_LLC_FLOW_ADD_LINK;
+ break;
+ case SMC_LLC_DELETE_LINK:
+ flow->type = SMC_LLC_FLOW_DEL_LINK;
+ break;
+ case SMC_LLC_CONFIRM_RKEY:
+ case SMC_LLC_DELETE_RKEY:
+ flow->type = SMC_LLC_FLOW_RKEY;
+ break;
+ default:
+ flow->type = SMC_LLC_FLOW_NONE;
+ }
+ if (qentry == lgr->delayed_event)
+ lgr->delayed_event = NULL;
+ spin_unlock_bh(&lgr->llc_flow_lock);
+ smc_llc_flow_qentry_set(flow, qentry);
+ return true;
+}
+
+/* start a new local llc flow, wait till current flow finished */
+int smc_llc_flow_initiate(struct smc_link_group *lgr,
+ enum smc_llc_flowtype type)
+{
+ enum smc_llc_flowtype allowed_remote = SMC_LLC_FLOW_NONE;
+ int rc;
+
+ /* all flows except confirm_rkey and delete_rkey are exclusive,
+ * confirm/delete rkey flows can run concurrently (local and remote)
+ */
+ if (type == SMC_LLC_FLOW_RKEY)
+ allowed_remote = SMC_LLC_FLOW_RKEY;
+again:
+ if (list_empty(&lgr->list))
+ return -ENODEV;
+ spin_lock_bh(&lgr->llc_flow_lock);
+ if (lgr->llc_flow_lcl.type == SMC_LLC_FLOW_NONE &&
+ (lgr->llc_flow_rmt.type == SMC_LLC_FLOW_NONE ||
+ lgr->llc_flow_rmt.type == allowed_remote)) {
+ lgr->llc_flow_lcl.type = type;
+ spin_unlock_bh(&lgr->llc_flow_lock);
+ return 0;
+ }
+ spin_unlock_bh(&lgr->llc_flow_lock);
+ rc = wait_event_interruptible_timeout(lgr->llc_waiter,
+ (lgr->llc_flow_lcl.type == SMC_LLC_FLOW_NONE &&
+ (lgr->llc_flow_rmt.type == SMC_LLC_FLOW_NONE ||
+ lgr->llc_flow_rmt.type == allowed_remote)),
+ SMC_LLC_WAIT_TIME);
+ if (!rc)
+ return -ETIMEDOUT;
+ goto again;
+}
+
+/* finish the current llc flow */
+void smc_llc_flow_stop(struct smc_link_group *lgr, struct smc_llc_flow *flow)
+{
+ spin_lock_bh(&lgr->llc_flow_lock);
+ memset(flow, 0, sizeof(*flow));
+ flow->type = SMC_LLC_FLOW_NONE;
+ spin_unlock_bh(&lgr->llc_flow_lock);
+ if (!list_empty(&lgr->list) && lgr->delayed_event &&
+ flow == &lgr->llc_flow_lcl)
+ schedule_work(&lgr->llc_event_work);
+ else
+ wake_up_interruptible(&lgr->llc_waiter);
+}
+
+/* lnk is optional and used for early wakeup when link goes down, useful in
+ * cases where we wait for a response on the link after we sent a request
+ */
+struct smc_llc_qentry *smc_llc_wait(struct smc_link_group *lgr,
+ struct smc_link *lnk,
+ int time_out, u8 exp_msg)
+{
+ struct smc_llc_flow *flow = &lgr->llc_flow_lcl;
+
+ wait_event_interruptible_timeout(lgr->llc_waiter,
+ (flow->qentry ||
+ (lnk && !smc_link_usable(lnk)) ||
+ list_empty(&lgr->list)),
+ time_out);
+ if (!flow->qentry ||
+ (lnk && !smc_link_usable(lnk)) || list_empty(&lgr->list)) {
+ smc_llc_flow_qentry_del(flow);
+ goto out;
+ }
+ if (exp_msg && flow->qentry->msg.raw.hdr.common.type != exp_msg) {
+ if (exp_msg == SMC_LLC_ADD_LINK &&
+ flow->qentry->msg.raw.hdr.common.type ==
+ SMC_LLC_DELETE_LINK) {
+ /* flow_start will delay the unexpected msg */
+ smc_llc_flow_start(&lgr->llc_flow_lcl,
+ smc_llc_flow_qentry_clr(flow));
+ return NULL;
+ }
+ smc_llc_flow_qentry_del(flow);
+ }
+out:
+ return flow->qentry;
+}
+
/********************************** send *************************************/
struct smc_llc_tx_pend {
@@ -215,27 +369,44 @@ int smc_llc_send_confirm_link(struct smc_link *link,
}
/* send LLC confirm rkey request */
-static int smc_llc_send_confirm_rkey(struct smc_link *link,
+static int smc_llc_send_confirm_rkey(struct smc_link *send_link,
struct smc_buf_desc *rmb_desc)
{
struct smc_llc_msg_confirm_rkey *rkeyllc;
struct smc_wr_tx_pend_priv *pend;
struct smc_wr_buf *wr_buf;
- int rc;
+ struct smc_link *link;
+ int i, rc, rtok_ix;
- rc = smc_llc_add_pending_send(link, &wr_buf, &pend);
+ rc = smc_llc_add_pending_send(send_link, &wr_buf, &pend);
if (rc)
return rc;
rkeyllc = (struct smc_llc_msg_confirm_rkey *)wr_buf;
memset(rkeyllc, 0, sizeof(*rkeyllc));
rkeyllc->hd.common.type = SMC_LLC_CONFIRM_RKEY;
rkeyllc->hd.length = sizeof(struct smc_llc_msg_confirm_rkey);
+
+ rtok_ix = 1;
+ for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
+ link = &send_link->lgr->lnk[i];
+ if (link->state == SMC_LNK_ACTIVE && link != send_link) {
+ rkeyllc->rtoken[rtok_ix].link_id = link->link_id;
+ rkeyllc->rtoken[rtok_ix].rmb_key =
+ htonl(rmb_desc->mr_rx[link->link_idx]->rkey);
+ rkeyllc->rtoken[rtok_ix].rmb_vaddr = cpu_to_be64(
+ (u64)sg_dma_address(
+ rmb_desc->sgt[link->link_idx].sgl));
+ rtok_ix++;
+ }
+ }
+ /* rkey of send_link is in rtoken[0] */
+ rkeyllc->rtoken[0].num_rkeys = rtok_ix - 1;
rkeyllc->rtoken[0].rmb_key =
- htonl(rmb_desc->mr_rx[SMC_SINGLE_LINK]->rkey);
+ htonl(rmb_desc->mr_rx[send_link->link_idx]->rkey);
rkeyllc->rtoken[0].rmb_vaddr = cpu_to_be64(
- (u64)sg_dma_address(rmb_desc->sgt[SMC_SINGLE_LINK].sgl));
+ (u64)sg_dma_address(rmb_desc->sgt[send_link->link_idx].sgl));
/* send llc message */
- rc = smc_wr_tx_send(link, pend);
+ rc = smc_wr_tx_send(send_link, pend);
return rc;
}
@@ -256,32 +427,15 @@ static int smc_llc_send_delete_rkey(struct smc_link *link,
rkeyllc->hd.common.type = SMC_LLC_DELETE_RKEY;
rkeyllc->hd.length = sizeof(struct smc_llc_msg_delete_rkey);
rkeyllc->num_rkeys = 1;
- rkeyllc->rkey[0] = htonl(rmb_desc->mr_rx[SMC_SINGLE_LINK]->rkey);
+ rkeyllc->rkey[0] = htonl(rmb_desc->mr_rx[link->link_idx]->rkey);
/* send llc message */
rc = smc_wr_tx_send(link, pend);
return rc;
}
-/* prepare an add link message */
-static void smc_llc_prep_add_link(struct smc_llc_msg_add_link *addllc,
- struct smc_link *link, u8 mac[], u8 gid[],
- enum smc_llc_reqresp reqresp)
-{
- memset(addllc, 0, sizeof(*addllc));
- addllc->hd.common.type = SMC_LLC_ADD_LINK;
- addllc->hd.length = sizeof(struct smc_llc_msg_add_link);
- if (reqresp == SMC_LLC_RESP) {
- addllc->hd.flags |= SMC_LLC_FLAG_RESP;
- /* always reject more links for now */
- addllc->hd.flags |= SMC_LLC_FLAG_ADD_LNK_REJ;
- addllc->hd.add_link_rej_rsn = SMC_LLC_REJ_RSN_NO_ALT_PATH;
- }
- memcpy(addllc->sender_mac, mac, ETH_ALEN);
- memcpy(addllc->sender_gid, gid, SMC_GID_SIZE);
-}
-
/* send ADD LINK request or response */
int smc_llc_send_add_link(struct smc_link *link, u8 mac[], u8 gid[],
+ struct smc_link *link_new,
enum smc_llc_reqresp reqresp)
{
struct smc_llc_msg_add_link *addllc;
@@ -293,32 +447,33 @@ int smc_llc_send_add_link(struct smc_link *link, u8 mac[], u8 gid[],
if (rc)
return rc;
addllc = (struct smc_llc_msg_add_link *)wr_buf;
- smc_llc_prep_add_link(addllc, link, mac, gid, reqresp);
+
+ memset(addllc, 0, sizeof(*addllc));
+ addllc->hd.common.type = SMC_LLC_ADD_LINK;
+ addllc->hd.length = sizeof(struct smc_llc_msg_add_link);
+ if (reqresp == SMC_LLC_RESP)
+ addllc->hd.flags |= SMC_LLC_FLAG_RESP;
+ memcpy(addllc->sender_mac, mac, ETH_ALEN);
+ memcpy(addllc->sender_gid, gid, SMC_GID_SIZE);
+ if (link_new) {
+ addllc->link_num = link_new->link_id;
+ hton24(addllc->sender_qp_num, link_new->roce_qp->qp_num);
+ hton24(addllc->initial_psn, link_new->psn_initial);
+ if (reqresp == SMC_LLC_REQ)
+ addllc->qp_mtu = link_new->path_mtu;
+ else
+ addllc->qp_mtu = min(link_new->path_mtu,
+ link_new->peer_mtu);
+ }
/* send llc message */
rc = smc_wr_tx_send(link, pend);
return rc;
}
-/* prepare a delete link message */
-static void smc_llc_prep_delete_link(struct smc_llc_msg_del_link *delllc,
- struct smc_link *link,
- enum smc_llc_reqresp reqresp, bool orderly)
-{
- memset(delllc, 0, sizeof(*delllc));
- delllc->hd.common.type = SMC_LLC_DELETE_LINK;
- delllc->hd.length = sizeof(struct smc_llc_msg_add_link);
- if (reqresp == SMC_LLC_RESP)
- delllc->hd.flags |= SMC_LLC_FLAG_RESP;
- /* DEL_LINK_ALL because only 1 link supported */
- delllc->hd.flags |= SMC_LLC_FLAG_DEL_LINK_ALL;
- if (orderly)
- delllc->hd.flags |= SMC_LLC_FLAG_DEL_LINK_ORDERLY;
- delllc->link_num = link->link_id;
-}
-
/* send DELETE LINK request or response */
-int smc_llc_send_delete_link(struct smc_link *link,
- enum smc_llc_reqresp reqresp, bool orderly)
+int smc_llc_send_delete_link(struct smc_link *link, u8 link_del_id,
+ enum smc_llc_reqresp reqresp, bool orderly,
+ u32 reason)
{
struct smc_llc_msg_del_link *delllc;
struct smc_wr_tx_pend_priv *pend;
@@ -329,7 +484,19 @@ int smc_llc_send_delete_link(struct smc_link *link,
if (rc)
return rc;
delllc = (struct smc_llc_msg_del_link *)wr_buf;
- smc_llc_prep_delete_link(delllc, link, reqresp, orderly);
+
+ memset(delllc, 0, sizeof(*delllc));
+ delllc->hd.common.type = SMC_LLC_DELETE_LINK;
+ delllc->hd.length = sizeof(struct smc_llc_msg_del_link);
+ if (reqresp == SMC_LLC_RESP)
+ delllc->hd.flags |= SMC_LLC_FLAG_RESP;
+ if (orderly)
+ delllc->hd.flags |= SMC_LLC_FLAG_DEL_LINK_ORDERLY;
+ if (link_del_id)
+ delllc->link_num = link_del_id;
+ else
+ delllc->hd.flags |= SMC_LLC_FLAG_DEL_LINK_ALL;
+ delllc->reason = htonl(reason);
/* send llc message */
rc = smc_wr_tx_send(link, pend);
return rc;
@@ -356,104 +523,64 @@ static int smc_llc_send_test_link(struct smc_link *link, u8 user_data[16])
return rc;
}
-struct smc_llc_send_work {
- struct work_struct work;
- struct smc_link *link;
- int llclen;
- union smc_llc_msg llcbuf;
-};
-
-/* worker that sends a prepared message */
-static void smc_llc_send_message_work(struct work_struct *work)
+/* schedule an llc send on link, may wait for buffers */
+static int smc_llc_send_message(struct smc_link *link, void *llcbuf)
{
- struct smc_llc_send_work *llcwrk = container_of(work,
- struct smc_llc_send_work, work);
struct smc_wr_tx_pend_priv *pend;
struct smc_wr_buf *wr_buf;
int rc;
- if (llcwrk->link->state == SMC_LNK_INACTIVE)
- goto out;
- rc = smc_llc_add_pending_send(llcwrk->link, &wr_buf, &pend);
+ if (!smc_link_usable(link))
+ return -ENOLINK;
+ rc = smc_llc_add_pending_send(link, &wr_buf, &pend);
if (rc)
- goto out;
- memcpy(wr_buf, &llcwrk->llcbuf, llcwrk->llclen);
- smc_wr_tx_send(llcwrk->link, pend);
-out:
- kfree(llcwrk);
-}
-
-/* copy llcbuf and schedule an llc send on link */
-static int smc_llc_send_message(struct smc_link *link, void *llcbuf, int llclen)
-{
- struct smc_llc_send_work *wrk = kmalloc(sizeof(*wrk), GFP_ATOMIC);
-
- if (!wrk)
- return -ENOMEM;
- INIT_WORK(&wrk->work, smc_llc_send_message_work);
- wrk->link = link;
- wrk->llclen = llclen;
- memcpy(&wrk->llcbuf, llcbuf, llclen);
- queue_work(link->llc_wq, &wrk->work);
- return 0;
+ return rc;
+ memcpy(wr_buf, llcbuf, sizeof(union smc_llc_msg));
+ return smc_wr_tx_send(link, pend);
}
/********************************* receive ***********************************/
-static void smc_llc_rx_confirm_link(struct smc_link *link,
- struct smc_llc_msg_confirm_link *llc)
+static int smc_llc_alloc_alt_link(struct smc_link_group *lgr,
+ enum smc_lgr_type lgr_new_t)
{
- struct smc_link_group *lgr = smc_get_lgr(link);
- int conf_rc;
-
- /* RMBE eyecatchers are not supported */
- if (llc->hd.flags & SMC_LLC_FLAG_NO_RMBE_EYEC)
- conf_rc = 0;
- else
- conf_rc = ENOTSUPP;
-
- if (llc->hd.flags & SMC_LLC_FLAG_RESP) {
- if (lgr->role == SMC_SERV &&
- link->state == SMC_LNK_ACTIVATING) {
- link->llc_confirm_resp_rc = conf_rc;
- complete(&link->llc_confirm_resp);
- }
+ int i;
+
+ if (lgr->type == SMC_LGR_SYMMETRIC ||
+ (lgr->type != SMC_LGR_SINGLE &&
+ (lgr_new_t == SMC_LGR_ASYMMETRIC_LOCAL ||
+ lgr_new_t == SMC_LGR_ASYMMETRIC_PEER)))
+ return -EMLINK;
+
+ if (lgr_new_t == SMC_LGR_ASYMMETRIC_LOCAL ||
+ lgr_new_t == SMC_LGR_ASYMMETRIC_PEER) {
+ for (i = SMC_LINKS_PER_LGR_MAX - 1; i >= 0; i--)
+ if (lgr->lnk[i].state == SMC_LNK_UNUSED)
+ return i;
} else {
- if (lgr->role == SMC_CLNT &&
- link->state == SMC_LNK_ACTIVATING) {
- link->llc_confirm_rc = conf_rc;
- link->link_id = llc->link_num;
- complete(&link->llc_confirm);
- }
+ for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++)
+ if (lgr->lnk[i].state == SMC_LNK_UNUSED)
+ return i;
}
+ return -EMLINK;
}
-static void smc_llc_rx_add_link(struct smc_link *link,
- struct smc_llc_msg_add_link *llc)
+/* worker to process an add link message */
+static void smc_llc_add_link_work(struct work_struct *work)
{
- struct smc_link_group *lgr = smc_get_lgr(link);
-
- if (llc->hd.flags & SMC_LLC_FLAG_RESP) {
- if (link->state == SMC_LNK_ACTIVATING)
- complete(&link->llc_add_resp);
- } else {
- if (link->state == SMC_LNK_ACTIVATING) {
- complete(&link->llc_add);
- return;
- }
+ struct smc_link_group *lgr = container_of(work, struct smc_link_group,
+ llc_add_link_work);
- if (lgr->role == SMC_SERV) {
- smc_llc_prep_add_link(llc, link,
- link->smcibdev->mac[link->ibport - 1],
- link->gid, SMC_LLC_REQ);
-
- } else {
- smc_llc_prep_add_link(llc, link,
- link->smcibdev->mac[link->ibport - 1],
- link->gid, SMC_LLC_RESP);
- }
- smc_llc_send_message(link, llc, sizeof(*llc));
+ if (list_empty(&lgr->list)) {
+ /* link group is terminating */
+ smc_llc_flow_qentry_del(&lgr->llc_flow_lcl);
+ goto out;
}
+
+ /* tbd: call smc_llc_process_cli_add_link(lgr); */
+ /* tbd: call smc_llc_process_srv_add_link(lgr); */
+out:
+ smc_llc_flow_stop(lgr, &lgr->llc_flow_lcl);
}
static void smc_llc_rx_delete_link(struct smc_link *link,
@@ -461,133 +588,263 @@ static void smc_llc_rx_delete_link(struct smc_link *link,
{
struct smc_link_group *lgr = smc_get_lgr(link);
- if (llc->hd.flags & SMC_LLC_FLAG_RESP) {
- if (lgr->role == SMC_SERV)
- smc_lgr_schedule_free_work_fast(lgr);
+ smc_lgr_forget(lgr);
+ if (lgr->role == SMC_SERV) {
+ /* client asks to delete this link, send request */
+ smc_llc_send_delete_link(link, 0, SMC_LLC_REQ, true,
+ SMC_LLC_DEL_PROG_INIT_TERM);
} else {
- smc_lgr_forget(lgr);
- smc_llc_link_deleting(link);
- if (lgr->role == SMC_SERV) {
- /* client asks to delete this link, send request */
- smc_llc_prep_delete_link(llc, link, SMC_LLC_REQ, true);
- } else {
- /* server requests to delete this link, send response */
- smc_llc_prep_delete_link(llc, link, SMC_LLC_RESP, true);
- }
- smc_llc_send_message(link, llc, sizeof(*llc));
- smc_lgr_terminate_sched(lgr);
+ /* server requests to delete this link, send response */
+ smc_llc_send_delete_link(link, 0, SMC_LLC_RESP, true,
+ SMC_LLC_DEL_PROG_INIT_TERM);
}
+ smcr_link_down_cond(link);
}
-static void smc_llc_rx_test_link(struct smc_link *link,
- struct smc_llc_msg_test_link *llc)
+/* process a confirm_rkey request from peer, remote flow */
+static void smc_llc_rmt_conf_rkey(struct smc_link_group *lgr)
{
- if (llc->hd.flags & SMC_LLC_FLAG_RESP) {
- if (link->state == SMC_LNK_ACTIVE)
- complete(&link->llc_testlink_resp);
- } else {
- llc->hd.flags |= SMC_LLC_FLAG_RESP;
- smc_llc_send_message(link, llc, sizeof(*llc));
- }
+ struct smc_llc_msg_confirm_rkey *llc;
+ struct smc_llc_qentry *qentry;
+ struct smc_link *link;
+ int num_entries;
+ int rk_idx;
+ int i;
+
+ qentry = lgr->llc_flow_rmt.qentry;
+ llc = &qentry->msg.confirm_rkey;
+ link = qentry->link;
+
+ num_entries = llc->rtoken[0].num_rkeys;
+ /* first rkey entry is for receiving link */
+ rk_idx = smc_rtoken_add(link,
+ llc->rtoken[0].rmb_vaddr,
+ llc->rtoken[0].rmb_key);
+ if (rk_idx < 0)
+ goto out_err;
+
+ for (i = 1; i <= min_t(u8, num_entries, SMC_LLC_RKEYS_PER_MSG - 1); i++)
+ smc_rtoken_set2(lgr, rk_idx, llc->rtoken[i].link_id,
+ llc->rtoken[i].rmb_vaddr,
+ llc->rtoken[i].rmb_key);
+ /* max links is 3 so there is no need to support conf_rkey_cont msgs */
+ goto out;
+out_err:
+ llc->hd.flags |= SMC_LLC_FLAG_RKEY_NEG;
+ llc->hd.flags |= SMC_LLC_FLAG_RKEY_RETRY;
+out:
+ llc->hd.flags |= SMC_LLC_FLAG_RESP;
+ smc_llc_send_message(link, &qentry->msg);
+ smc_llc_flow_qentry_del(&lgr->llc_flow_rmt);
}
-static void smc_llc_rx_confirm_rkey(struct smc_link *link,
- struct smc_llc_msg_confirm_rkey *llc)
+/* process a delete_rkey request from peer, remote flow */
+static void smc_llc_rmt_delete_rkey(struct smc_link_group *lgr)
{
- int rc;
-
- if (llc->hd.flags & SMC_LLC_FLAG_RESP) {
- link->llc_confirm_rkey_rc = llc->hd.flags &
- SMC_LLC_FLAG_RKEY_NEG;
- complete(&link->llc_confirm_rkey);
- } else {
- rc = smc_rtoken_add(smc_get_lgr(link),
- llc->rtoken[0].rmb_vaddr,
- llc->rtoken[0].rmb_key);
+ struct smc_llc_msg_delete_rkey *llc;
+ struct smc_llc_qentry *qentry;
+ struct smc_link *link;
+ u8 err_mask = 0;
+ int i, max;
- /* ignore rtokens for other links, we have only one link */
+ qentry = lgr->llc_flow_rmt.qentry;
+ llc = &qentry->msg.delete_rkey;
+ link = qentry->link;
- llc->hd.flags |= SMC_LLC_FLAG_RESP;
- if (rc < 0)
- llc->hd.flags |= SMC_LLC_FLAG_RKEY_NEG;
- smc_llc_send_message(link, llc, sizeof(*llc));
+ max = min_t(u8, llc->num_rkeys, SMC_LLC_DEL_RKEY_MAX);
+ for (i = 0; i < max; i++) {
+ if (smc_rtoken_delete(link, llc->rkey[i]))
+ err_mask |= 1 << (SMC_LLC_DEL_RKEY_MAX - 1 - i);
}
-}
-
-static void smc_llc_rx_confirm_rkey_cont(struct smc_link *link,
- struct smc_llc_msg_confirm_rkey_cont *llc)
-{
- if (llc->hd.flags & SMC_LLC_FLAG_RESP) {
- /* unused as long as we don't send this type of msg */
- } else {
- /* ignore rtokens for other links, we have only one link */
- llc->hd.flags |= SMC_LLC_FLAG_RESP;
- smc_llc_send_message(link, llc, sizeof(*llc));
+ if (err_mask) {
+ llc->hd.flags |= SMC_LLC_FLAG_RKEY_NEG;
+ llc->err_mask = err_mask;
}
+ llc->hd.flags |= SMC_LLC_FLAG_RESP;
+ smc_llc_send_message(link, &qentry->msg);
+ smc_llc_flow_qentry_del(&lgr->llc_flow_rmt);
}
-static void smc_llc_rx_delete_rkey(struct smc_link *link,
- struct smc_llc_msg_delete_rkey *llc)
+/* flush the llc event queue */
+static void smc_llc_event_flush(struct smc_link_group *lgr)
{
- u8 err_mask = 0;
- int i, max;
-
- if (llc->hd.flags & SMC_LLC_FLAG_RESP) {
- link->llc_delete_rkey_rc = llc->hd.flags &
- SMC_LLC_FLAG_RKEY_NEG;
- complete(&link->llc_delete_rkey);
- } else {
- max = min_t(u8, llc->num_rkeys, SMC_LLC_DEL_RKEY_MAX);
- for (i = 0; i < max; i++) {
- if (smc_rtoken_delete(smc_get_lgr(link), llc->rkey[i]))
- err_mask |= 1 << (SMC_LLC_DEL_RKEY_MAX - 1 - i);
- }
-
- if (err_mask) {
- llc->hd.flags |= SMC_LLC_FLAG_RKEY_NEG;
- llc->err_mask = err_mask;
- }
+ struct smc_llc_qentry *qentry, *q;
- llc->hd.flags |= SMC_LLC_FLAG_RESP;
- smc_llc_send_message(link, llc, sizeof(*llc));
+ spin_lock_bh(&lgr->llc_event_q_lock);
+ list_for_each_entry_safe(qentry, q, &lgr->llc_event_q, list) {
+ list_del_init(&qentry->list);
+ kfree(qentry);
}
+ spin_unlock_bh(&lgr->llc_event_q_lock);
}
-static void smc_llc_rx_handler(struct ib_wc *wc, void *buf)
+static void smc_llc_event_handler(struct smc_llc_qentry *qentry)
{
- struct smc_link *link = (struct smc_link *)wc->qp->qp_context;
- union smc_llc_msg *llc = buf;
+ union smc_llc_msg *llc = &qentry->msg;
+ struct smc_link *link = qentry->link;
+ struct smc_link_group *lgr = link->lgr;
- if (wc->byte_len < sizeof(*llc))
- return; /* short message */
- if (llc->raw.hdr.length != sizeof(*llc))
- return; /* invalid message */
- if (link->state == SMC_LNK_INACTIVE)
- return; /* link not active, drop msg */
+ if (!smc_link_usable(link))
+ goto out;
switch (llc->raw.hdr.common.type) {
case SMC_LLC_TEST_LINK:
- smc_llc_rx_test_link(link, &llc->test_link);
- break;
- case SMC_LLC_CONFIRM_LINK:
- smc_llc_rx_confirm_link(link, &llc->confirm_link);
+ llc->test_link.hd.flags |= SMC_LLC_FLAG_RESP;
+ smc_llc_send_message(link, llc);
break;
case SMC_LLC_ADD_LINK:
- smc_llc_rx_add_link(link, &llc->add_link);
+ if (list_empty(&lgr->list))
+ goto out; /* lgr is terminating */
+ if (lgr->role == SMC_CLNT) {
+ if (lgr->llc_flow_lcl.type == SMC_LLC_FLOW_ADD_LINK) {
+ /* a flow is waiting for this message */
+ smc_llc_flow_qentry_set(&lgr->llc_flow_lcl,
+ qentry);
+ wake_up_interruptible(&lgr->llc_waiter);
+ } else if (smc_llc_flow_start(&lgr->llc_flow_lcl,
+ qentry)) {
+ schedule_work(&lgr->llc_add_link_work);
+ }
+ } else if (smc_llc_flow_start(&lgr->llc_flow_lcl, qentry)) {
+ /* as smc server, handle client suggestion */
+ schedule_work(&lgr->llc_add_link_work);
+ }
+ return;
+ case SMC_LLC_CONFIRM_LINK:
+ if (lgr->llc_flow_lcl.type != SMC_LLC_FLOW_NONE) {
+ /* a flow is waiting for this message */
+ smc_llc_flow_qentry_set(&lgr->llc_flow_lcl, qentry);
+ wake_up_interruptible(&lgr->llc_waiter);
+ return;
+ }
break;
case SMC_LLC_DELETE_LINK:
smc_llc_rx_delete_link(link, &llc->delete_link);
break;
case SMC_LLC_CONFIRM_RKEY:
- smc_llc_rx_confirm_rkey(link, &llc->confirm_rkey);
- break;
+ /* new request from remote, assign to remote flow */
+ if (smc_llc_flow_start(&lgr->llc_flow_rmt, qentry)) {
+ /* process here, does not wait for more llc msgs */
+ smc_llc_rmt_conf_rkey(lgr);
+ smc_llc_flow_stop(lgr, &lgr->llc_flow_rmt);
+ }
+ return;
case SMC_LLC_CONFIRM_RKEY_CONT:
- smc_llc_rx_confirm_rkey_cont(link, &llc->confirm_rkey_cont);
+ /* not used because max links is 3, and 3 rkeys fit into
+ * one CONFIRM_RKEY message
+ */
break;
case SMC_LLC_DELETE_RKEY:
- smc_llc_rx_delete_rkey(link, &llc->delete_rkey);
+ /* new request from remote, assign to remote flow */
+ if (smc_llc_flow_start(&lgr->llc_flow_rmt, qentry)) {
+ /* process here, does not wait for more llc msgs */
+ smc_llc_rmt_delete_rkey(lgr);
+ smc_llc_flow_stop(lgr, &lgr->llc_flow_rmt);
+ }
+ return;
+ }
+out:
+ kfree(qentry);
+}
+
+/* worker to process llc messages on the event queue */
+static void smc_llc_event_work(struct work_struct *work)
+{
+ struct smc_link_group *lgr = container_of(work, struct smc_link_group,
+ llc_event_work);
+ struct smc_llc_qentry *qentry;
+
+ if (!lgr->llc_flow_lcl.type && lgr->delayed_event) {
+ if (smc_link_usable(lgr->delayed_event->link)) {
+ smc_llc_event_handler(lgr->delayed_event);
+ } else {
+ qentry = lgr->delayed_event;
+ lgr->delayed_event = NULL;
+ kfree(qentry);
+ }
+ }
+
+again:
+ spin_lock_bh(&lgr->llc_event_q_lock);
+ if (!list_empty(&lgr->llc_event_q)) {
+ qentry = list_first_entry(&lgr->llc_event_q,
+ struct smc_llc_qentry, list);
+ list_del_init(&qentry->list);
+ spin_unlock_bh(&lgr->llc_event_q_lock);
+ smc_llc_event_handler(qentry);
+ goto again;
+ }
+ spin_unlock_bh(&lgr->llc_event_q_lock);
+}
+
+/* process llc responses in tasklet context */
+static void smc_llc_rx_response(struct smc_link *link,
+ struct smc_llc_qentry *qentry)
+{
+ u8 llc_type = qentry->msg.raw.hdr.common.type;
+
+ switch (llc_type) {
+ case SMC_LLC_TEST_LINK:
+ if (link->state == SMC_LNK_ACTIVE)
+ complete(&link->llc_testlink_resp);
break;
+ case SMC_LLC_ADD_LINK:
+ case SMC_LLC_CONFIRM_LINK:
+ case SMC_LLC_CONFIRM_RKEY:
+ case SMC_LLC_DELETE_RKEY:
+ /* assign responses to the local flow, we requested them */
+ smc_llc_flow_qentry_set(&link->lgr->llc_flow_lcl, qentry);
+ wake_up_interruptible(&link->lgr->llc_waiter);
+ return;
+ case SMC_LLC_DELETE_LINK:
+ if (link->lgr->role == SMC_SERV)
+ smc_lgr_schedule_free_work_fast(link->lgr);
+ break;
+ case SMC_LLC_CONFIRM_RKEY_CONT:
+ /* not used because max links is 3 */
+ break;
+ }
+ kfree(qentry);
+}
+
+static void smc_llc_enqueue(struct smc_link *link, union smc_llc_msg *llc)
+{
+ struct smc_link_group *lgr = link->lgr;
+ struct smc_llc_qentry *qentry;
+ unsigned long flags;
+
+ qentry = kmalloc(sizeof(*qentry), GFP_ATOMIC);
+ if (!qentry)
+ return;
+ qentry->link = link;
+ INIT_LIST_HEAD(&qentry->list);
+ memcpy(&qentry->msg, llc, sizeof(union smc_llc_msg));
+
+ /* process responses immediately */
+ if (llc->raw.hdr.flags & SMC_LLC_FLAG_RESP) {
+ smc_llc_rx_response(link, qentry);
+ return;
}
+
+ /* add requests to event queue */
+ spin_lock_irqsave(&lgr->llc_event_q_lock, flags);
+ list_add_tail(&qentry->list, &lgr->llc_event_q);
+ spin_unlock_irqrestore(&lgr->llc_event_q_lock, flags);
+ schedule_work(&link->lgr->llc_event_work);
+}
+
+/* copy received msg and add it to the event queue */
+static void smc_llc_rx_handler(struct ib_wc *wc, void *buf)
+{
+ struct smc_link *link = (struct smc_link *)wc->qp->qp_context;
+ union smc_llc_msg *llc = buf;
+
+ if (wc->byte_len < sizeof(*llc))
+ return; /* short message */
+ if (llc->raw.hdr.length != sizeof(*llc))
+ return; /* invalid message */
+
+ smc_llc_enqueue(link, llc);
}
/***************************** worker, utils *********************************/
@@ -613,112 +870,130 @@ static void smc_llc_testlink_work(struct work_struct *work)
/* receive TEST LINK response over RoCE fabric */
rc = wait_for_completion_interruptible_timeout(&link->llc_testlink_resp,
SMC_LLC_WAIT_TIME);
+ if (link->state != SMC_LNK_ACTIVE)
+ return; /* link state changed */
if (rc <= 0) {
- smc_lgr_terminate_sched(smc_get_lgr(link));
+ smcr_link_down_cond_sched(link);
return;
}
next_interval = link->llc_testlink_time;
out:
- queue_delayed_work(link->llc_wq, &link->llc_testlink_wrk,
- next_interval);
+ schedule_delayed_work(&link->llc_testlink_wrk, next_interval);
}
-int smc_llc_link_init(struct smc_link *link)
+void smc_llc_lgr_init(struct smc_link_group *lgr, struct smc_sock *smc)
{
- struct smc_link_group *lgr = smc_get_lgr(link);
- link->llc_wq = alloc_ordered_workqueue("llc_wq-%x:%x)", WQ_MEM_RECLAIM,
- *((u32 *)lgr->id),
- link->link_id);
- if (!link->llc_wq)
- return -ENOMEM;
- init_completion(&link->llc_confirm);
- init_completion(&link->llc_confirm_resp);
- init_completion(&link->llc_add);
- init_completion(&link->llc_add_resp);
- init_completion(&link->llc_confirm_rkey);
- init_completion(&link->llc_delete_rkey);
- mutex_init(&link->llc_delete_rkey_mutex);
- init_completion(&link->llc_testlink_resp);
- INIT_DELAYED_WORK(&link->llc_testlink_wrk, smc_llc_testlink_work);
- return 0;
+ struct net *net = sock_net(smc->clcsock->sk);
+
+ INIT_WORK(&lgr->llc_event_work, smc_llc_event_work);
+ INIT_WORK(&lgr->llc_add_link_work, smc_llc_add_link_work);
+ INIT_LIST_HEAD(&lgr->llc_event_q);
+ spin_lock_init(&lgr->llc_event_q_lock);
+ spin_lock_init(&lgr->llc_flow_lock);
+ init_waitqueue_head(&lgr->llc_waiter);
+ mutex_init(&lgr->llc_conf_mutex);
+ lgr->llc_testlink_time = net->ipv4.sysctl_tcp_keepalive_time;
}
-void smc_llc_link_active(struct smc_link *link, int testlink_time)
+/* called after lgr was removed from lgr_list */
+void smc_llc_lgr_clear(struct smc_link_group *lgr)
{
- link->state = SMC_LNK_ACTIVE;
- if (testlink_time) {
- link->llc_testlink_time = testlink_time * HZ;
- queue_delayed_work(link->llc_wq, &link->llc_testlink_wrk,
- link->llc_testlink_time);
+ smc_llc_event_flush(lgr);
+ wake_up_interruptible_all(&lgr->llc_waiter);
+ cancel_work_sync(&lgr->llc_event_work);
+ cancel_work_sync(&lgr->llc_add_link_work);
+ if (lgr->delayed_event) {
+ kfree(lgr->delayed_event);
+ lgr->delayed_event = NULL;
}
}
-void smc_llc_link_deleting(struct smc_link *link)
+int smc_llc_link_init(struct smc_link *link)
{
- link->state = SMC_LNK_DELETING;
- smc_wr_wakeup_tx_wait(link);
+ init_completion(&link->llc_testlink_resp);
+ INIT_DELAYED_WORK(&link->llc_testlink_wrk, smc_llc_testlink_work);
+ return 0;
}
-/* called in tasklet context */
-void smc_llc_link_inactive(struct smc_link *link)
+void smc_llc_link_active(struct smc_link *link)
{
- link->state = SMC_LNK_INACTIVE;
- cancel_delayed_work(&link->llc_testlink_wrk);
- smc_wr_wakeup_reg_wait(link);
- smc_wr_wakeup_tx_wait(link);
+ link->state = SMC_LNK_ACTIVE;
+ if (link->lgr->llc_testlink_time) {
+ link->llc_testlink_time = link->lgr->llc_testlink_time * HZ;
+ schedule_delayed_work(&link->llc_testlink_wrk,
+ link->llc_testlink_time);
+ }
}
/* called in worker context */
void smc_llc_link_clear(struct smc_link *link)
{
- flush_workqueue(link->llc_wq);
- destroy_workqueue(link->llc_wq);
+ complete(&link->llc_testlink_resp);
+ cancel_delayed_work_sync(&link->llc_testlink_wrk);
+ smc_wr_wakeup_reg_wait(link);
+ smc_wr_wakeup_tx_wait(link);
}
-/* register a new rtoken at the remote peer */
-int smc_llc_do_confirm_rkey(struct smc_link *link,
+/* register a new rtoken at the remote peer (for all links) */
+int smc_llc_do_confirm_rkey(struct smc_link *send_link,
struct smc_buf_desc *rmb_desc)
{
- int rc;
+ struct smc_link_group *lgr = send_link->lgr;
+ struct smc_llc_qentry *qentry = NULL;
+ int rc = 0;
- /* protected by mutex smc_create_lgr_pending */
- reinit_completion(&link->llc_confirm_rkey);
- rc = smc_llc_send_confirm_rkey(link, rmb_desc);
+ rc = smc_llc_send_confirm_rkey(send_link, rmb_desc);
if (rc)
- return rc;
+ goto out;
/* receive CONFIRM RKEY response from server over RoCE fabric */
- rc = wait_for_completion_interruptible_timeout(&link->llc_confirm_rkey,
- SMC_LLC_WAIT_TIME);
- if (rc <= 0 || link->llc_confirm_rkey_rc)
- return -EFAULT;
- return 0;
+ qentry = smc_llc_wait(lgr, send_link, SMC_LLC_WAIT_TIME,
+ SMC_LLC_CONFIRM_RKEY);
+ if (!qentry || (qentry->msg.raw.hdr.flags & SMC_LLC_FLAG_RKEY_NEG))
+ rc = -EFAULT;
+out:
+ if (qentry)
+ smc_llc_flow_qentry_del(&lgr->llc_flow_lcl);
+ return rc;
}
/* unregister an rtoken at the remote peer */
-int smc_llc_do_delete_rkey(struct smc_link *link,
+int smc_llc_do_delete_rkey(struct smc_link_group *lgr,
struct smc_buf_desc *rmb_desc)
{
+ struct smc_llc_qentry *qentry = NULL;
+ struct smc_link *send_link;
int rc = 0;
- mutex_lock(&link->llc_delete_rkey_mutex);
- if (link->state != SMC_LNK_ACTIVE)
- goto out;
- reinit_completion(&link->llc_delete_rkey);
- rc = smc_llc_send_delete_rkey(link, rmb_desc);
+ send_link = smc_llc_usable_link(lgr);
+ if (!send_link)
+ return -ENOLINK;
+
+ /* protected by llc_flow control */
+ rc = smc_llc_send_delete_rkey(send_link, rmb_desc);
if (rc)
goto out;
/* receive DELETE RKEY response from server over RoCE fabric */
- rc = wait_for_completion_interruptible_timeout(&link->llc_delete_rkey,
- SMC_LLC_WAIT_TIME);
- if (rc <= 0 || link->llc_delete_rkey_rc)
+ qentry = smc_llc_wait(lgr, send_link, SMC_LLC_WAIT_TIME,
+ SMC_LLC_DELETE_RKEY);
+ if (!qentry || (qentry->msg.raw.hdr.flags & SMC_LLC_FLAG_RKEY_NEG))
rc = -EFAULT;
- else
- rc = 0;
out:
- mutex_unlock(&link->llc_delete_rkey_mutex);
+ if (qentry)
+ smc_llc_flow_qentry_del(&lgr->llc_flow_lcl);
return rc;
}
+/* evaluate confirm link request or response */
+int smc_llc_eval_conf_link(struct smc_llc_qentry *qentry,
+ enum smc_llc_reqresp type)
+{
+ if (type == SMC_LLC_REQ) /* SMC server assigns link_id */
+ qentry->link->link_id = qentry->msg.confirm_link.link_num;
+ if (!(qentry->msg.raw.hdr.flags & SMC_LLC_FLAG_NO_RMBE_EYEC))
+ return -ENOTSUPP;
+ return 0;
+}
+
/***************************** init, exit, misc ******************************/
static struct smc_wr_rx_handler smc_llc_rx_handlers[] = {
diff --git a/net/smc/smc_llc.h b/net/smc/smc_llc.h
index 461c0c3ef76e..4ed4486e5082 100644
--- a/net/smc/smc_llc.h
+++ b/net/smc/smc_llc.h
@@ -35,22 +35,58 @@ enum smc_llc_msg_type {
SMC_LLC_DELETE_RKEY = 0x09,
};
+#define smc_link_downing(state) \
+ (cmpxchg(state, SMC_LNK_ACTIVE, SMC_LNK_INACTIVE) == SMC_LNK_ACTIVE)
+
+/* LLC DELETE LINK Request Reason Codes */
+#define SMC_LLC_DEL_LOST_PATH 0x00010000
+#define SMC_LLC_DEL_OP_INIT_TERM 0x00020000
+#define SMC_LLC_DEL_PROG_INIT_TERM 0x00030000
+#define SMC_LLC_DEL_PROT_VIOL 0x00040000
+#define SMC_LLC_DEL_NO_ASYM_NEEDED 0x00050000
+/* LLC DELETE LINK Response Reason Codes */
+#define SMC_LLC_DEL_NOLNK 0x00100000 /* Unknown Link ID (no link) */
+#define SMC_LLC_DEL_NOLGR 0x00200000 /* Unknown Link Group */
+
+/* returns a usable link of the link group, or NULL */
+static inline struct smc_link *smc_llc_usable_link(struct smc_link_group *lgr)
+{
+ int i;
+
+ for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++)
+ if (smc_link_usable(&lgr->lnk[i]))
+ return &lgr->lnk[i];
+ return NULL;
+}
+
/* transmit */
int smc_llc_send_confirm_link(struct smc_link *lnk,
enum smc_llc_reqresp reqresp);
int smc_llc_send_add_link(struct smc_link *link, u8 mac[], u8 gid[],
+ struct smc_link *link_new,
enum smc_llc_reqresp reqresp);
-int smc_llc_send_delete_link(struct smc_link *link,
- enum smc_llc_reqresp reqresp, bool orderly);
+int smc_llc_send_delete_link(struct smc_link *link, u8 link_del_id,
+ enum smc_llc_reqresp reqresp, bool orderly,
+ u32 reason);
+void smc_llc_lgr_init(struct smc_link_group *lgr, struct smc_sock *smc);
+void smc_llc_lgr_clear(struct smc_link_group *lgr);
int smc_llc_link_init(struct smc_link *link);
-void smc_llc_link_active(struct smc_link *link, int testlink_time);
-void smc_llc_link_deleting(struct smc_link *link);
-void smc_llc_link_inactive(struct smc_link *link);
+void smc_llc_link_active(struct smc_link *link);
void smc_llc_link_clear(struct smc_link *link);
-int smc_llc_do_confirm_rkey(struct smc_link *link,
+int smc_llc_do_confirm_rkey(struct smc_link *send_link,
struct smc_buf_desc *rmb_desc);
-int smc_llc_do_delete_rkey(struct smc_link *link,
+int smc_llc_do_delete_rkey(struct smc_link_group *lgr,
struct smc_buf_desc *rmb_desc);
+int smc_llc_flow_initiate(struct smc_link_group *lgr,
+ enum smc_llc_flowtype type);
+void smc_llc_flow_stop(struct smc_link_group *lgr, struct smc_llc_flow *flow);
+int smc_llc_eval_conf_link(struct smc_llc_qentry *qentry,
+ enum smc_llc_reqresp type);
+struct smc_llc_qentry *smc_llc_wait(struct smc_link_group *lgr,
+ struct smc_link *lnk,
+ int time_out, u8 exp_msg);
+struct smc_llc_qentry *smc_llc_flow_qentry_clr(struct smc_llc_flow *flow);
+void smc_llc_flow_qentry_del(struct smc_llc_flow *flow);
int smc_llc_init(void) __init;
#endif /* SMC_LLC_H */
diff --git a/net/smc/smc_pnet.c b/net/smc/smc_pnet.c
index 2a5ed47c3e08..50c96e843fab 100644
--- a/net/smc/smc_pnet.c
+++ b/net/smc/smc_pnet.c
@@ -50,29 +50,26 @@ static struct nla_policy smc_pnet_policy[SMC_PNETID_MAX + 1] = {
static struct genl_family smc_pnet_nl_family;
-/**
- * struct smc_user_pnetentry - pnet identifier name entry for/from user
- * @list: List node.
- * @pnet_name: Pnet identifier name
- * @ndev: pointer to network device.
- * @smcibdev: Pointer to IB device.
- * @ib_port: Port of IB device.
- * @smcd_dev: Pointer to smcd device.
- */
-struct smc_user_pnetentry {
- struct list_head list;
- char pnet_name[SMC_MAX_PNETID_LEN + 1];
- struct net_device *ndev;
- struct smc_ib_device *smcibdev;
- u8 ib_port;
- struct smcd_dev *smcd_dev;
+enum smc_pnet_nametype {
+ SMC_PNET_ETH = 1,
+ SMC_PNET_IB = 2,
};
/* pnet entry stored in pnet table */
struct smc_pnetentry {
struct list_head list;
char pnet_name[SMC_MAX_PNETID_LEN + 1];
- struct net_device *ndev;
+ enum smc_pnet_nametype type;
+ union {
+ struct {
+ char eth_name[IFNAMSIZ + 1];
+ struct net_device *ndev;
+ };
+ struct {
+ char ib_name[IB_DEVICE_NAME_MAX + 1];
+ u8 ib_port;
+ };
+ };
};
/* Check if two given pnetids match */
@@ -106,14 +103,15 @@ static int smc_pnet_remove_by_pnetid(struct net *net, char *pnet_name)
sn = net_generic(net, smc_net_id);
pnettable = &sn->pnettable;
- /* remove netdevices */
+ /* remove table entry */
write_lock(&pnettable->lock);
list_for_each_entry_safe(pnetelem, tmp_pe, &pnettable->pnetlist,
list) {
if (!pnet_name ||
smc_pnet_match(pnetelem->pnet_name, pnet_name)) {
list_del(&pnetelem->list);
- dev_put(pnetelem->ndev);
+ if (pnetelem->type == SMC_PNET_ETH && pnetelem->ndev)
+ dev_put(pnetelem->ndev);
kfree(pnetelem);
rc = 0;
}
@@ -155,9 +153,9 @@ static int smc_pnet_remove_by_pnetid(struct net *net, char *pnet_name)
return rc;
}
-/* Remove a pnet entry mentioning a given network device from the pnet table.
+/* Add the reference to a given network device to the pnet table.
*/
-static int smc_pnet_remove_by_ndev(struct net_device *ndev)
+static int smc_pnet_add_by_ndev(struct net_device *ndev)
{
struct smc_pnetentry *pnetelem, *tmp_pe;
struct smc_pnettable *pnettable;
@@ -171,10 +169,10 @@ static int smc_pnet_remove_by_ndev(struct net_device *ndev)
write_lock(&pnettable->lock);
list_for_each_entry_safe(pnetelem, tmp_pe, &pnettable->pnetlist, list) {
- if (pnetelem->ndev == ndev) {
- list_del(&pnetelem->list);
- dev_put(pnetelem->ndev);
- kfree(pnetelem);
+ if (pnetelem->type == SMC_PNET_ETH && !pnetelem->ndev &&
+ !strncmp(pnetelem->eth_name, ndev->name, IFNAMSIZ)) {
+ dev_hold(ndev);
+ pnetelem->ndev = ndev;
rc = 0;
break;
}
@@ -183,80 +181,67 @@ static int smc_pnet_remove_by_ndev(struct net_device *ndev)
return rc;
}
-/* Append a pnetid to the end of the pnet table if not already on this list.
+/* Remove the reference to a given network device from the pnet table.
*/
-static int smc_pnet_enter(struct smc_pnettable *pnettable,
- struct smc_user_pnetentry *new_pnetelem)
+static int smc_pnet_remove_by_ndev(struct net_device *ndev)
{
- u8 pnet_null[SMC_MAX_PNETID_LEN] = {0};
- u8 ndev_pnetid[SMC_MAX_PNETID_LEN];
- struct smc_pnetentry *tmp_pnetelem;
- struct smc_pnetentry *pnetelem;
- bool new_smcddev = false;
- struct net_device *ndev;
- bool new_netdev = true;
- bool new_ibdev = false;
-
- if (new_pnetelem->smcibdev) {
- struct smc_ib_device *ib_dev = new_pnetelem->smcibdev;
- int ib_port = new_pnetelem->ib_port;
+ struct smc_pnetentry *pnetelem, *tmp_pe;
+ struct smc_pnettable *pnettable;
+ struct net *net = dev_net(ndev);
+ struct smc_net *sn;
+ int rc = -ENOENT;
- spin_lock(&smc_ib_devices.lock);
- if (smc_pnet_match(ib_dev->pnetid[ib_port - 1], pnet_null)) {
- memcpy(ib_dev->pnetid[ib_port - 1],
- new_pnetelem->pnet_name, SMC_MAX_PNETID_LEN);
- ib_dev->pnetid_by_user[ib_port - 1] = true;
- new_ibdev = true;
- }
- spin_unlock(&smc_ib_devices.lock);
- }
- if (new_pnetelem->smcd_dev) {
- struct smcd_dev *smcd_dev = new_pnetelem->smcd_dev;
+ /* get pnettable for namespace */
+ sn = net_generic(net, smc_net_id);
+ pnettable = &sn->pnettable;
- spin_lock(&smcd_dev_list.lock);
- if (smc_pnet_match(smcd_dev->pnetid, pnet_null)) {
- memcpy(smcd_dev->pnetid, new_pnetelem->pnet_name,
- SMC_MAX_PNETID_LEN);
- smcd_dev->pnetid_by_user = true;
- new_smcddev = true;
+ write_lock(&pnettable->lock);
+ list_for_each_entry_safe(pnetelem, tmp_pe, &pnettable->pnetlist, list) {
+ if (pnetelem->type == SMC_PNET_ETH && pnetelem->ndev == ndev) {
+ dev_put(pnetelem->ndev);
+ pnetelem->ndev = NULL;
+ rc = 0;
+ break;
}
- spin_unlock(&smcd_dev_list.lock);
}
+ write_unlock(&pnettable->lock);
+ return rc;
+}
- if (!new_pnetelem->ndev)
- return (new_ibdev || new_smcddev) ? 0 : -EEXIST;
+/* Apply pnetid to ib device when no pnetid is set.
+ */
+static bool smc_pnet_apply_ib(struct smc_ib_device *ib_dev, u8 ib_port,
+ char *pnet_name)
+{
+ u8 pnet_null[SMC_MAX_PNETID_LEN] = {0};
+ bool applied = false;
- /* check if (base) netdev already has a pnetid. If there is one, we do
- * not want to add a pnet table entry
- */
- ndev = pnet_find_base_ndev(new_pnetelem->ndev);
- if (!smc_pnetid_by_dev_port(ndev->dev.parent, ndev->dev_port,
- ndev_pnetid))
- return (new_ibdev || new_smcddev) ? 0 : -EEXIST;
+ spin_lock(&smc_ib_devices.lock);
+ if (smc_pnet_match(ib_dev->pnetid[ib_port - 1], pnet_null)) {
+ memcpy(ib_dev->pnetid[ib_port - 1], pnet_name,
+ SMC_MAX_PNETID_LEN);
+ ib_dev->pnetid_by_user[ib_port - 1] = true;
+ applied = true;
+ }
+ spin_unlock(&smc_ib_devices.lock);
+ return applied;
+}
- /* add a new netdev entry to the pnet table if there isn't one */
- tmp_pnetelem = kzalloc(sizeof(*pnetelem), GFP_KERNEL);
- if (!tmp_pnetelem)
- return -ENOMEM;
- memcpy(tmp_pnetelem->pnet_name, new_pnetelem->pnet_name,
- SMC_MAX_PNETID_LEN);
- tmp_pnetelem->ndev = new_pnetelem->ndev;
+/* Apply pnetid to smcd device when no pnetid is set.
+ */
+static bool smc_pnet_apply_smcd(struct smcd_dev *smcd_dev, char *pnet_name)
+{
+ u8 pnet_null[SMC_MAX_PNETID_LEN] = {0};
+ bool applied = false;
- write_lock(&pnettable->lock);
- list_for_each_entry(pnetelem, &pnettable->pnetlist, list) {
- if (pnetelem->ndev == new_pnetelem->ndev)
- new_netdev = false;
- }
- if (new_netdev) {
- dev_hold(tmp_pnetelem->ndev);
- list_add_tail(&tmp_pnetelem->list, &pnettable->pnetlist);
- write_unlock(&pnettable->lock);
- } else {
- write_unlock(&pnettable->lock);
- kfree(tmp_pnetelem);
+ spin_lock(&smcd_dev_list.lock);
+ if (smc_pnet_match(smcd_dev->pnetid, pnet_null)) {
+ memcpy(smcd_dev->pnetid, pnet_name, SMC_MAX_PNETID_LEN);
+ smcd_dev->pnetid_by_user = true;
+ applied = true;
}
-
- return (new_netdev || new_ibdev || new_smcddev) ? 0 : -EEXIST;
+ spin_unlock(&smcd_dev_list.lock);
+ return applied;
}
/* The limit for pnetid is 16 characters.
@@ -323,57 +308,167 @@ out:
return smcd_dev;
}
-/* Parse the supplied netlink attributes and fill a pnetentry structure.
- * For ethernet and infiniband device names verify that the devices exist.
+static int smc_pnet_add_eth(struct smc_pnettable *pnettable, struct net *net,
+ char *eth_name, char *pnet_name)
+{
+ struct smc_pnetentry *tmp_pe, *new_pe;
+ struct net_device *ndev, *base_ndev;
+ u8 ndev_pnetid[SMC_MAX_PNETID_LEN];
+ bool new_netdev;
+ int rc;
+
+ /* check if (base) netdev already has a pnetid. If there is one, we do
+ * not want to add a pnet table entry
+ */
+ rc = -EEXIST;
+ ndev = dev_get_by_name(net, eth_name); /* dev_hold() */
+ if (ndev) {
+ base_ndev = pnet_find_base_ndev(ndev);
+ if (!smc_pnetid_by_dev_port(base_ndev->dev.parent,
+ base_ndev->dev_port, ndev_pnetid))
+ goto out_put;
+ }
+
+ /* add a new netdev entry to the pnet table if there isn't one */
+ rc = -ENOMEM;
+ new_pe = kzalloc(sizeof(*new_pe), GFP_KERNEL);
+ if (!new_pe)
+ goto out_put;
+ new_pe->type = SMC_PNET_ETH;
+ memcpy(new_pe->pnet_name, pnet_name, SMC_MAX_PNETID_LEN);
+ strncpy(new_pe->eth_name, eth_name, IFNAMSIZ);
+ new_pe->ndev = ndev;
+
+ rc = -EEXIST;
+ new_netdev = true;
+ write_lock(&pnettable->lock);
+ list_for_each_entry(tmp_pe, &pnettable->pnetlist, list) {
+ if (tmp_pe->type == SMC_PNET_ETH &&
+ !strncmp(tmp_pe->eth_name, eth_name, IFNAMSIZ)) {
+ new_netdev = false;
+ break;
+ }
+ }
+ if (new_netdev) {
+ list_add_tail(&new_pe->list, &pnettable->pnetlist);
+ write_unlock(&pnettable->lock);
+ } else {
+ write_unlock(&pnettable->lock);
+ kfree(new_pe);
+ goto out_put;
+ }
+ return 0;
+
+out_put:
+ if (ndev)
+ dev_put(ndev);
+ return rc;
+}
+
+static int smc_pnet_add_ib(struct smc_pnettable *pnettable, char *ib_name,
+ u8 ib_port, char *pnet_name)
+{
+ struct smc_pnetentry *tmp_pe, *new_pe;
+ struct smc_ib_device *ib_dev;
+ bool smcddev_applied = true;
+ bool ibdev_applied = true;
+ struct smcd_dev *smcd_dev;
+ bool new_ibdev;
+
+ /* try to apply the pnetid to active devices */
+ ib_dev = smc_pnet_find_ib(ib_name);
+ if (ib_dev)
+ ibdev_applied = smc_pnet_apply_ib(ib_dev, ib_port, pnet_name);
+ smcd_dev = smc_pnet_find_smcd(ib_name);
+ if (smcd_dev)
+ smcddev_applied = smc_pnet_apply_smcd(smcd_dev, pnet_name);
+ /* Apply fails when a device has a hardware-defined pnetid set, do not
+ * add a pnet table entry in that case.
+ */
+ if (!ibdev_applied || !smcddev_applied)
+ return -EEXIST;
+
+ /* add a new ib entry to the pnet table if there isn't one */
+ new_pe = kzalloc(sizeof(*new_pe), GFP_KERNEL);
+ if (!new_pe)
+ return -ENOMEM;
+ new_pe->type = SMC_PNET_IB;
+ memcpy(new_pe->pnet_name, pnet_name, SMC_MAX_PNETID_LEN);
+ strncpy(new_pe->ib_name, ib_name, IB_DEVICE_NAME_MAX);
+ new_pe->ib_port = ib_port;
+
+ new_ibdev = true;
+ write_lock(&pnettable->lock);
+ list_for_each_entry(tmp_pe, &pnettable->pnetlist, list) {
+ if (tmp_pe->type == SMC_PNET_IB &&
+ !strncmp(tmp_pe->ib_name, ib_name, IB_DEVICE_NAME_MAX)) {
+ new_ibdev = false;
+ break;
+ }
+ }
+ if (new_ibdev) {
+ list_add_tail(&new_pe->list, &pnettable->pnetlist);
+ write_unlock(&pnettable->lock);
+ } else {
+ write_unlock(&pnettable->lock);
+ kfree(new_pe);
+ }
+ return (new_ibdev) ? 0 : -EEXIST;
+}
+
+/* Append a pnetid to the end of the pnet table if not already on this list.
*/
-static int smc_pnet_fill_entry(struct net *net,
- struct smc_user_pnetentry *pnetelem,
- struct nlattr *tb[])
+static int smc_pnet_enter(struct net *net, struct nlattr *tb[])
{
- char *string, *ibname;
+ char pnet_name[SMC_MAX_PNETID_LEN + 1];
+ struct smc_pnettable *pnettable;
+ bool new_netdev = false;
+ bool new_ibdev = false;
+ struct smc_net *sn;
+ u8 ibport = 1;
+ char *string;
int rc;
- memset(pnetelem, 0, sizeof(*pnetelem));
- INIT_LIST_HEAD(&pnetelem->list);
+ /* get pnettable for namespace */
+ sn = net_generic(net, smc_net_id);
+ pnettable = &sn->pnettable;
rc = -EINVAL;
if (!tb[SMC_PNETID_NAME])
goto error;
string = (char *)nla_data(tb[SMC_PNETID_NAME]);
- if (!smc_pnetid_valid(string, pnetelem->pnet_name))
+ if (!smc_pnetid_valid(string, pnet_name))
goto error;
- rc = -EINVAL;
if (tb[SMC_PNETID_ETHNAME]) {
string = (char *)nla_data(tb[SMC_PNETID_ETHNAME]);
- pnetelem->ndev = dev_get_by_name(net, string);
- if (!pnetelem->ndev)
+ rc = smc_pnet_add_eth(pnettable, net, string, pnet_name);
+ if (!rc)
+ new_netdev = true;
+ else if (rc != -EEXIST)
goto error;
}
/* if this is not the initial namespace, stop here */
if (net != &init_net)
- return 0;
+ return new_netdev ? 0 : -EEXIST;
rc = -EINVAL;
if (tb[SMC_PNETID_IBNAME]) {
- ibname = (char *)nla_data(tb[SMC_PNETID_IBNAME]);
- ibname = strim(ibname);
- pnetelem->smcibdev = smc_pnet_find_ib(ibname);
- pnetelem->smcd_dev = smc_pnet_find_smcd(ibname);
- if (!pnetelem->smcibdev && !pnetelem->smcd_dev)
- goto error;
- if (pnetelem->smcibdev) {
- if (!tb[SMC_PNETID_IBPORT])
- goto error;
- pnetelem->ib_port = nla_get_u8(tb[SMC_PNETID_IBPORT]);
- if (pnetelem->ib_port < 1 ||
- pnetelem->ib_port > SMC_MAX_PORTS)
+ string = (char *)nla_data(tb[SMC_PNETID_IBNAME]);
+ string = strim(string);
+ if (tb[SMC_PNETID_IBPORT]) {
+ ibport = nla_get_u8(tb[SMC_PNETID_IBPORT]);
+ if (ibport < 1 || ibport > SMC_MAX_PORTS)
goto error;
}
+ rc = smc_pnet_add_ib(pnettable, string, ibport, pnet_name);
+ if (!rc)
+ new_ibdev = true;
+ else if (rc != -EEXIST)
+ goto error;
}
-
- return 0;
+ return (new_netdev || new_ibdev) ? 0 : -EEXIST;
error:
return rc;
@@ -381,28 +476,22 @@ error:
/* Convert an smc_pnetentry to a netlink attribute sequence */
static int smc_pnet_set_nla(struct sk_buff *msg,
- struct smc_user_pnetentry *pnetelem)
+ struct smc_pnetentry *pnetelem)
{
if (nla_put_string(msg, SMC_PNETID_NAME, pnetelem->pnet_name))
return -1;
- if (pnetelem->ndev) {
+ if (pnetelem->type == SMC_PNET_ETH) {
if (nla_put_string(msg, SMC_PNETID_ETHNAME,
- pnetelem->ndev->name))
+ pnetelem->eth_name))
return -1;
} else {
if (nla_put_string(msg, SMC_PNETID_ETHNAME, "n/a"))
return -1;
}
- if (pnetelem->smcibdev) {
- if (nla_put_string(msg, SMC_PNETID_IBNAME,
- dev_name(pnetelem->smcibdev->ibdev->dev.parent)) ||
+ if (pnetelem->type == SMC_PNET_IB) {
+ if (nla_put_string(msg, SMC_PNETID_IBNAME, pnetelem->ib_name) ||
nla_put_u8(msg, SMC_PNETID_IBPORT, pnetelem->ib_port))
return -1;
- } else if (pnetelem->smcd_dev) {
- if (nla_put_string(msg, SMC_PNETID_IBNAME,
- dev_name(&pnetelem->smcd_dev->dev)) ||
- nla_put_u8(msg, SMC_PNETID_IBPORT, 1))
- return -1;
} else {
if (nla_put_string(msg, SMC_PNETID_IBNAME, "n/a") ||
nla_put_u8(msg, SMC_PNETID_IBPORT, 0xff))
@@ -415,21 +504,8 @@ static int smc_pnet_set_nla(struct sk_buff *msg,
static int smc_pnet_add(struct sk_buff *skb, struct genl_info *info)
{
struct net *net = genl_info_net(info);
- struct smc_user_pnetentry pnetelem;
- struct smc_pnettable *pnettable;
- struct smc_net *sn;
- int rc;
-
- /* get pnettable for namespace */
- sn = net_generic(net, smc_net_id);
- pnettable = &sn->pnettable;
- rc = smc_pnet_fill_entry(net, &pnetelem, info->attrs);
- if (!rc)
- rc = smc_pnet_enter(pnettable, &pnetelem);
- if (pnetelem.ndev)
- dev_put(pnetelem.ndev);
- return rc;
+ return smc_pnet_enter(net, info->attrs);
}
static int smc_pnet_del(struct sk_buff *skb, struct genl_info *info)
@@ -450,7 +526,7 @@ static int smc_pnet_dump_start(struct netlink_callback *cb)
static int smc_pnet_dumpinfo(struct sk_buff *skb,
u32 portid, u32 seq, u32 flags,
- struct smc_user_pnetentry *pnetelem)
+ struct smc_pnetentry *pnetelem)
{
void *hdr;
@@ -469,91 +545,32 @@ static int smc_pnet_dumpinfo(struct sk_buff *skb,
static int _smc_pnet_dump(struct net *net, struct sk_buff *skb, u32 portid,
u32 seq, u8 *pnetid, int start_idx)
{
- struct smc_user_pnetentry tmp_entry;
struct smc_pnettable *pnettable;
struct smc_pnetentry *pnetelem;
- struct smc_ib_device *ibdev;
- struct smcd_dev *smcd_dev;
struct smc_net *sn;
int idx = 0;
- int ibport;
/* get pnettable for namespace */
sn = net_generic(net, smc_net_id);
pnettable = &sn->pnettable;
- /* dump netdevices */
+ /* dump pnettable entries */
read_lock(&pnettable->lock);
list_for_each_entry(pnetelem, &pnettable->pnetlist, list) {
if (pnetid && !smc_pnet_match(pnetelem->pnet_name, pnetid))
continue;
if (idx++ < start_idx)
continue;
- memset(&tmp_entry, 0, sizeof(tmp_entry));
- memcpy(&tmp_entry.pnet_name, pnetelem->pnet_name,
- SMC_MAX_PNETID_LEN);
- tmp_entry.ndev = pnetelem->ndev;
+ /* if this is not the initial namespace, dump only netdev */
+ if (net != &init_net && pnetelem->type != SMC_PNET_ETH)
+ continue;
if (smc_pnet_dumpinfo(skb, portid, seq, NLM_F_MULTI,
- &tmp_entry)) {
+ pnetelem)) {
--idx;
break;
}
}
read_unlock(&pnettable->lock);
-
- /* if this is not the initial namespace, stop here */
- if (net != &init_net)
- return idx;
-
- /* dump ib devices */
- spin_lock(&smc_ib_devices.lock);
- list_for_each_entry(ibdev, &smc_ib_devices.list, list) {
- for (ibport = 0; ibport < SMC_MAX_PORTS; ibport++) {
- if (ibdev->pnetid_by_user[ibport]) {
- if (pnetid &&
- !smc_pnet_match(ibdev->pnetid[ibport],
- pnetid))
- continue;
- if (idx++ < start_idx)
- continue;
- memset(&tmp_entry, 0, sizeof(tmp_entry));
- memcpy(&tmp_entry.pnet_name,
- ibdev->pnetid[ibport],
- SMC_MAX_PNETID_LEN);
- tmp_entry.smcibdev = ibdev;
- tmp_entry.ib_port = ibport + 1;
- if (smc_pnet_dumpinfo(skb, portid, seq,
- NLM_F_MULTI,
- &tmp_entry)) {
- --idx;
- break;
- }
- }
- }
- }
- spin_unlock(&smc_ib_devices.lock);
-
- /* dump smcd devices */
- spin_lock(&smcd_dev_list.lock);
- list_for_each_entry(smcd_dev, &smcd_dev_list.list, list) {
- if (smcd_dev->pnetid_by_user) {
- if (pnetid && !smc_pnet_match(smcd_dev->pnetid, pnetid))
- continue;
- if (idx++ < start_idx)
- continue;
- memset(&tmp_entry, 0, sizeof(tmp_entry));
- memcpy(&tmp_entry.pnet_name, smcd_dev->pnetid,
- SMC_MAX_PNETID_LEN);
- tmp_entry.smcd_dev = smcd_dev;
- if (smc_pnet_dumpinfo(skb, portid, seq, NLM_F_MULTI,
- &tmp_entry)) {
- --idx;
- break;
- }
- }
- }
- spin_unlock(&smcd_dev_list.lock);
-
return idx;
}
@@ -659,6 +676,9 @@ static int smc_pnet_netdev_event(struct notifier_block *this,
case NETDEV_UNREGISTER:
smc_pnet_remove_by_ndev(event_dev);
return NOTIFY_OK;
+ case NETDEV_REGISTER:
+ smc_pnet_add_by_ndev(event_dev);
+ return NOTIFY_OK;
default:
return NOTIFY_DONE;
}
@@ -744,7 +764,7 @@ static int smc_pnet_find_ndev_pnetid_by_table(struct net_device *ndev,
read_lock(&pnettable->lock);
list_for_each_entry(pnetelem, &pnettable->pnetlist, list) {
- if (ndev == pnetelem->ndev) {
+ if (pnetelem->type == SMC_PNET_ETH && ndev == pnetelem->ndev) {
/* get pnetid of netdev device */
memcpy(pnetid, pnetelem->pnet_name, SMC_MAX_PNETID_LEN);
rc = 0;
@@ -755,6 +775,45 @@ static int smc_pnet_find_ndev_pnetid_by_table(struct net_device *ndev,
return rc;
}
+/* find a roce device for the given pnetid */
+static void _smc_pnet_find_roce_by_pnetid(u8 *pnet_id,
+ struct smc_init_info *ini,
+ struct smc_ib_device *known_dev)
+{
+ struct smc_ib_device *ibdev;
+ int i;
+
+ ini->ib_dev = NULL;
+ spin_lock(&smc_ib_devices.lock);
+ list_for_each_entry(ibdev, &smc_ib_devices.list, list) {
+ if (ibdev == known_dev)
+ continue;
+ for (i = 1; i <= SMC_MAX_PORTS; i++) {
+ if (!rdma_is_port_valid(ibdev->ibdev, i))
+ continue;
+ if (smc_pnet_match(ibdev->pnetid[i - 1], pnet_id) &&
+ smc_ib_port_active(ibdev, i) &&
+ !test_bit(i - 1, ibdev->ports_going_away) &&
+ !smc_ib_determine_gid(ibdev, i, ini->vlan_id,
+ ini->ib_gid, NULL)) {
+ ini->ib_dev = ibdev;
+ ini->ib_port = i;
+ goto out;
+ }
+ }
+ }
+out:
+ spin_unlock(&smc_ib_devices.lock);
+}
+
+/* find alternate roce device with same pnet_id and vlan_id */
+void smc_pnet_find_alt_roce(struct smc_link_group *lgr,
+ struct smc_init_info *ini,
+ struct smc_ib_device *known_dev)
+{
+ _smc_pnet_find_roce_by_pnetid(lgr->pnet_id, ini, known_dev);
+}
+
/* if handshake network device belongs to a roce device, return its
* IB device and port
*/
@@ -801,8 +860,6 @@ static void smc_pnet_find_roce_by_pnetid(struct net_device *ndev,
struct smc_init_info *ini)
{
u8 ndev_pnetid[SMC_MAX_PNETID_LEN];
- struct smc_ib_device *ibdev;
- int i;
ndev = pnet_find_base_ndev(ndev);
if (smc_pnetid_by_dev_port(ndev->dev.parent, ndev->dev_port,
@@ -811,25 +868,7 @@ static void smc_pnet_find_roce_by_pnetid(struct net_device *ndev,
smc_pnet_find_rdma_dev(ndev, ini);
return; /* pnetid could not be determined */
}
-
- spin_lock(&smc_ib_devices.lock);
- list_for_each_entry(ibdev, &smc_ib_devices.list, list) {
- for (i = 1; i <= SMC_MAX_PORTS; i++) {
- if (!rdma_is_port_valid(ibdev->ibdev, i))
- continue;
- if (smc_pnet_match(ibdev->pnetid[i - 1], ndev_pnetid) &&
- smc_ib_port_active(ibdev, i) &&
- !test_bit(i - 1, ibdev->ports_going_away) &&
- !smc_ib_determine_gid(ibdev, i, ini->vlan_id,
- ini->ib_gid, NULL)) {
- ini->ib_dev = ibdev;
- ini->ib_port = i;
- goto out;
- }
- }
- }
-out:
- spin_unlock(&smc_ib_devices.lock);
+ _smc_pnet_find_roce_by_pnetid(ndev_pnetid, ini, NULL);
}
static void smc_pnet_find_ism_by_pnetid(struct net_device *ndev,
@@ -895,3 +934,60 @@ out_rel:
out:
return;
}
+
+/* Lookup and apply a pnet table entry to the given ib device.
+ */
+int smc_pnetid_by_table_ib(struct smc_ib_device *smcibdev, u8 ib_port)
+{
+ char *ib_name = smcibdev->ibdev->name;
+ struct smc_pnettable *pnettable;
+ struct smc_pnetentry *tmp_pe;
+ struct smc_net *sn;
+ int rc = -ENOENT;
+
+ /* get pnettable for init namespace */
+ sn = net_generic(&init_net, smc_net_id);
+ pnettable = &sn->pnettable;
+
+ read_lock(&pnettable->lock);
+ list_for_each_entry(tmp_pe, &pnettable->pnetlist, list) {
+ if (tmp_pe->type == SMC_PNET_IB &&
+ !strncmp(tmp_pe->ib_name, ib_name, IB_DEVICE_NAME_MAX) &&
+ tmp_pe->ib_port == ib_port) {
+ smc_pnet_apply_ib(smcibdev, ib_port, tmp_pe->pnet_name);
+ rc = 0;
+ break;
+ }
+ }
+ read_unlock(&pnettable->lock);
+
+ return rc;
+}
+
+/* Lookup and apply a pnet table entry to the given smcd device.
+ */
+int smc_pnetid_by_table_smcd(struct smcd_dev *smcddev)
+{
+ const char *ib_name = dev_name(&smcddev->dev);
+ struct smc_pnettable *pnettable;
+ struct smc_pnetentry *tmp_pe;
+ struct smc_net *sn;
+ int rc = -ENOENT;
+
+ /* get pnettable for init namespace */
+ sn = net_generic(&init_net, smc_net_id);
+ pnettable = &sn->pnettable;
+
+ read_lock(&pnettable->lock);
+ list_for_each_entry(tmp_pe, &pnettable->pnetlist, list) {
+ if (tmp_pe->type == SMC_PNET_IB &&
+ !strncmp(tmp_pe->ib_name, ib_name, IB_DEVICE_NAME_MAX)) {
+ smc_pnet_apply_smcd(smcddev, tmp_pe->pnet_name);
+ rc = 0;
+ break;
+ }
+ }
+ read_unlock(&pnettable->lock);
+
+ return rc;
+}
diff --git a/net/smc/smc_pnet.h b/net/smc/smc_pnet.h
index 4564e4d69c2e..811a65986691 100644
--- a/net/smc/smc_pnet.h
+++ b/net/smc/smc_pnet.h
@@ -19,6 +19,7 @@
struct smc_ib_device;
struct smcd_dev;
struct smc_init_info;
+struct smc_link_group;
/**
* struct smc_pnettable - SMC PNET table anchor
@@ -46,5 +47,9 @@ void smc_pnet_exit(void);
void smc_pnet_net_exit(struct net *net);
void smc_pnet_find_roce_resource(struct sock *sk, struct smc_init_info *ini);
void smc_pnet_find_ism_resource(struct sock *sk, struct smc_init_info *ini);
-
+int smc_pnetid_by_table_ib(struct smc_ib_device *smcibdev, u8 ib_port);
+int smc_pnetid_by_table_smcd(struct smcd_dev *smcd);
+void smc_pnet_find_alt_roce(struct smc_link_group *lgr,
+ struct smc_init_info *ini,
+ struct smc_ib_device *known_dev);
#endif
diff --git a/net/smc/smc_tx.c b/net/smc/smc_tx.c
index 9f1ade86d70e..417204572a69 100644
--- a/net/smc/smc_tx.c
+++ b/net/smc/smc_tx.c
@@ -269,22 +269,21 @@ static int smc_tx_rdma_write(struct smc_connection *conn, int peer_rmbe_offset,
int num_sges, struct ib_rdma_wr *rdma_wr)
{
struct smc_link_group *lgr = conn->lgr;
- struct smc_link *link;
+ struct smc_link *link = conn->lnk;
int rc;
- link = &lgr->lnk[SMC_SINGLE_LINK];
rdma_wr->wr.wr_id = smc_wr_tx_get_next_wr_id(link);
rdma_wr->wr.num_sge = num_sges;
rdma_wr->remote_addr =
- lgr->rtokens[conn->rtoken_idx][SMC_SINGLE_LINK].dma_addr +
+ lgr->rtokens[conn->rtoken_idx][link->link_idx].dma_addr +
/* RMBE within RMB */
conn->tx_off +
/* offset within RMBE */
peer_rmbe_offset;
- rdma_wr->rkey = lgr->rtokens[conn->rtoken_idx][SMC_SINGLE_LINK].rkey;
+ rdma_wr->rkey = lgr->rtokens[conn->rtoken_idx][link->link_idx].rkey;
rc = ib_post_send(link->roce_qp, &rdma_wr->wr, NULL);
if (rc)
- smc_lgr_terminate_sched(lgr);
+ smcr_link_down_cond_sched(link);
return rc;
}
@@ -310,8 +309,10 @@ static int smcr_tx_rdma_writes(struct smc_connection *conn, size_t len,
size_t dst_off, size_t dst_len,
struct smc_rdma_wr *wr_rdma_buf)
{
+ struct smc_link *link = conn->lnk;
+
dma_addr_t dma_addr =
- sg_dma_address(conn->sndbuf_desc->sgt[SMC_SINGLE_LINK].sgl);
+ sg_dma_address(conn->sndbuf_desc->sgt[link->link_idx].sgl);
int src_len_sum = src_len, dst_len_sum = dst_len;
int sent_count = src_off;
int srcchunk, dstchunk;
@@ -507,7 +508,7 @@ static int smcr_tx_sndbuf_nonempty(struct smc_connection *conn)
if (!pflags->urg_data_present) {
rc = smc_tx_rdma_writes(conn, wr_rdma_buf);
if (rc) {
- smc_wr_tx_put_slot(&conn->lgr->lnk[SMC_SINGLE_LINK],
+ smc_wr_tx_put_slot(conn->lnk,
(struct smc_wr_tx_pend_priv *)pend);
goto out_unlock;
}
diff --git a/net/smc/smc_wr.c b/net/smc/smc_wr.c
index 337ee52ad3d3..031e6c9561b1 100644
--- a/net/smc/smc_wr.c
+++ b/net/smc/smc_wr.c
@@ -120,8 +120,8 @@ static inline void smc_wr_tx_process_cqe(struct ib_wc *wc)
sizeof(link->wr_tx_bufs[i]));
clear_bit(i, link->wr_tx_mask);
}
- /* terminate connections of this link group abnormally */
- smc_lgr_terminate_sched(smc_get_lgr(link));
+ /* terminate link */
+ smcr_link_down_cond_sched(link);
}
if (pnd_snd.handler)
pnd_snd.handler(&pnd_snd.priv, link, wc->status);
@@ -207,13 +207,13 @@ int smc_wr_tx_get_free_slot(struct smc_link *link,
} else {
rc = wait_event_interruptible_timeout(
link->wr_tx_wait,
- link->state == SMC_LNK_INACTIVE ||
+ !smc_link_usable(link) ||
lgr->terminating ||
(smc_wr_tx_get_free_slot_index(link, &idx) != -EBUSY),
SMC_WR_TX_WAIT_FREE_SLOT_TIME);
if (!rc) {
- /* timeout - terminate connections */
- smc_lgr_terminate_sched(lgr);
+ /* timeout - terminate link */
+ smcr_link_down_cond_sched(link);
return -EPIPE;
}
if (idx == link->wr_tx_cnt)
@@ -270,7 +270,7 @@ int smc_wr_tx_send(struct smc_link *link, struct smc_wr_tx_pend_priv *priv)
rc = ib_post_send(link->roce_qp, &link->wr_tx_ibs[pend->idx], NULL);
if (rc) {
smc_wr_tx_put_slot(link, priv);
- smc_lgr_terminate_sched(smc_get_lgr(link));
+ smcr_link_down_cond_sched(link);
}
return rc;
}
@@ -294,8 +294,8 @@ int smc_wr_reg_send(struct smc_link *link, struct ib_mr *mr)
(link->wr_reg_state != POSTED),
SMC_WR_REG_MR_WAIT_TIME);
if (!rc) {
- /* timeout - terminate connections */
- smc_lgr_terminate_sched(smc_get_lgr(link));
+ /* timeout - terminate link */
+ smcr_link_down_cond_sched(link);
return -EPIPE;
}
if (rc == -ERESTARTSYS)
@@ -393,10 +393,7 @@ static inline void smc_wr_rx_process_cqes(struct ib_wc wc[], int num)
case IB_WC_RETRY_EXC_ERR:
case IB_WC_RNR_RETRY_EXC_ERR:
case IB_WC_WR_FLUSH_ERR:
- /* terminate connections of this link group
- * abnormally
- */
- smc_lgr_terminate_sched(smc_get_lgr(link));
+ smcr_link_down_cond_sched(link);
break;
default:
smc_wr_rx_post(link); /* refill WR RX */
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index 692bcd35f809..519414468b5d 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -253,6 +253,8 @@ static int validate_ie_attr(const struct nlattr *attr,
}
/* policy for the attributes */
+static const struct nla_policy nl80211_policy[NUM_NL80211_ATTR];
+
static const struct nla_policy
nl80211_ftm_responder_policy[NL80211_FTM_RESP_ATTR_MAX + 1] = {
[NL80211_FTM_RESP_ATTR_ENABLED] = { .type = NLA_FLAG, },
@@ -296,11 +298,7 @@ nl80211_pmsr_req_attr_policy[NL80211_PMSR_REQ_ATTR_MAX + 1] = {
static const struct nla_policy
nl80211_psmr_peer_attr_policy[NL80211_PMSR_PEER_ATTR_MAX + 1] = {
[NL80211_PMSR_PEER_ATTR_ADDR] = NLA_POLICY_ETH_ADDR,
- /*
- * we could specify this again to be the top-level policy,
- * but that would open us up to recursion problems ...
- */
- [NL80211_PMSR_PEER_ATTR_CHAN] = { .type = NLA_NESTED },
+ [NL80211_PMSR_PEER_ATTR_CHAN] = NLA_POLICY_NESTED(nl80211_policy),
[NL80211_PMSR_PEER_ATTR_REQ] =
NLA_POLICY_NESTED(nl80211_pmsr_req_attr_policy),
[NL80211_PMSR_PEER_ATTR_RESP] = { .type = NLA_REJECT },
@@ -347,7 +345,7 @@ nl80211_tid_config_attr_policy[NL80211_TID_CONFIG_ATTR_MAX + 1] = {
NLA_POLICY_MAX(NLA_U8, NL80211_TID_CONFIG_DISABLE),
};
-const struct nla_policy nl80211_policy[NUM_NL80211_ATTR] = {
+static const struct nla_policy nl80211_policy[NUM_NL80211_ATTR] = {
[0] = { .strict_start_type = NL80211_ATTR_HE_OBSS_PD },
[NL80211_ATTR_WIPHY] = { .type = NLA_U32 },
[NL80211_ATTR_WIPHY_NAME] = { .type = NLA_NUL_STRING,
@@ -378,11 +376,8 @@ const struct nla_policy nl80211_policy[NUM_NL80211_ATTR] = {
[NL80211_ATTR_IFINDEX] = { .type = NLA_U32 },
[NL80211_ATTR_IFNAME] = { .type = NLA_NUL_STRING, .len = IFNAMSIZ-1 },
- [NL80211_ATTR_MAC] = { .type = NLA_EXACT_LEN_WARN, .len = ETH_ALEN },
- [NL80211_ATTR_PREV_BSSID] = {
- .type = NLA_EXACT_LEN_WARN,
- .len = ETH_ALEN
- },
+ [NL80211_ATTR_MAC] = NLA_POLICY_EXACT_LEN_WARN(ETH_ALEN),
+ [NL80211_ATTR_PREV_BSSID] = NLA_POLICY_EXACT_LEN_WARN(ETH_ALEN),
[NL80211_ATTR_KEY] = { .type = NLA_NESTED, },
[NL80211_ATTR_KEY_DATA] = { .type = NLA_BINARY,
@@ -434,10 +429,7 @@ const struct nla_policy nl80211_policy[NUM_NL80211_ATTR] = {
[NL80211_ATTR_MESH_CONFIG] = { .type = NLA_NESTED },
[NL80211_ATTR_SUPPORT_MESH_AUTH] = { .type = NLA_FLAG },
- [NL80211_ATTR_HT_CAPABILITY] = {
- .type = NLA_EXACT_LEN_WARN,
- .len = NL80211_HT_CAPABILITY_LEN
- },
+ [NL80211_ATTR_HT_CAPABILITY] = NLA_POLICY_EXACT_LEN_WARN(NL80211_HT_CAPABILITY_LEN),
[NL80211_ATTR_MGMT_SUBTYPE] = { .type = NLA_U8 },
[NL80211_ATTR_IE] = NLA_POLICY_VALIDATE_FN(NLA_BINARY,
@@ -468,10 +460,7 @@ const struct nla_policy nl80211_policy[NUM_NL80211_ATTR] = {
[NL80211_ATTR_WPA_VERSIONS] = { .type = NLA_U32 },
[NL80211_ATTR_PID] = { .type = NLA_U32 },
[NL80211_ATTR_4ADDR] = { .type = NLA_U8 },
- [NL80211_ATTR_PMKID] = {
- .type = NLA_EXACT_LEN_WARN,
- .len = WLAN_PMKID_LEN
- },
+ [NL80211_ATTR_PMKID] = NLA_POLICY_EXACT_LEN_WARN(WLAN_PMKID_LEN),
[NL80211_ATTR_DURATION] = { .type = NLA_U32 },
[NL80211_ATTR_COOKIE] = { .type = NLA_U64 },
[NL80211_ATTR_TX_RATES] = { .type = NLA_NESTED },
@@ -535,10 +524,7 @@ const struct nla_policy nl80211_policy[NUM_NL80211_ATTR] = {
[NL80211_ATTR_WDEV] = { .type = NLA_U64 },
[NL80211_ATTR_USER_REG_HINT_TYPE] = { .type = NLA_U32 },
[NL80211_ATTR_AUTH_DATA] = { .type = NLA_BINARY, },
- [NL80211_ATTR_VHT_CAPABILITY] = {
- .type = NLA_EXACT_LEN_WARN,
- .len = NL80211_VHT_CAPABILITY_LEN
- },
+ [NL80211_ATTR_VHT_CAPABILITY] = NLA_POLICY_EXACT_LEN_WARN(NL80211_VHT_CAPABILITY_LEN),
[NL80211_ATTR_SCAN_FLAGS] = { .type = NLA_U32 },
[NL80211_ATTR_P2P_CTWINDOW] = NLA_POLICY_MAX(NLA_U8, 127),
[NL80211_ATTR_P2P_OPPPS] = NLA_POLICY_MAX(NLA_U8, 1),
@@ -576,10 +562,7 @@ const struct nla_policy nl80211_policy[NUM_NL80211_ATTR] = {
[NL80211_ATTR_VENDOR_DATA] = { .type = NLA_BINARY },
[NL80211_ATTR_QOS_MAP] = { .type = NLA_BINARY,
.len = IEEE80211_QOS_MAP_LEN_MAX },
- [NL80211_ATTR_MAC_HINT] = {
- .type = NLA_EXACT_LEN_WARN,
- .len = ETH_ALEN
- },
+ [NL80211_ATTR_MAC_HINT] = NLA_POLICY_EXACT_LEN_WARN(ETH_ALEN),
[NL80211_ATTR_WIPHY_FREQ_HINT] = { .type = NLA_U32 },
[NL80211_ATTR_TDLS_PEER_CAPABILITY] = { .type = NLA_U32 },
[NL80211_ATTR_SOCKET_OWNER] = { .type = NLA_FLAG },
@@ -591,10 +574,7 @@ const struct nla_policy nl80211_policy[NUM_NL80211_ATTR] = {
[NL80211_ATTR_ADMITTED_TIME] = { .type = NLA_U16 },
[NL80211_ATTR_SMPS_MODE] = { .type = NLA_U8 },
[NL80211_ATTR_OPER_CLASS] = { .type = NLA_U8 },
- [NL80211_ATTR_MAC_MASK] = {
- .type = NLA_EXACT_LEN_WARN,
- .len = ETH_ALEN
- },
+ [NL80211_ATTR_MAC_MASK] = NLA_POLICY_EXACT_LEN_WARN(ETH_ALEN),
[NL80211_ATTR_WIPHY_SELF_MANAGED_REG] = { .type = NLA_FLAG },
[NL80211_ATTR_NETNS_FD] = { .type = NLA_U32 },
[NL80211_ATTR_SCHED_SCAN_DELAY] = { .type = NLA_U32 },
@@ -606,21 +586,15 @@ const struct nla_policy nl80211_policy[NUM_NL80211_ATTR] = {
[NL80211_ATTR_MU_MIMO_GROUP_DATA] = {
.len = VHT_MUMIMO_GROUPS_DATA_LEN
},
- [NL80211_ATTR_MU_MIMO_FOLLOW_MAC_ADDR] = {
- .type = NLA_EXACT_LEN_WARN,
- .len = ETH_ALEN
- },
+ [NL80211_ATTR_MU_MIMO_FOLLOW_MAC_ADDR] = NLA_POLICY_EXACT_LEN_WARN(ETH_ALEN),
[NL80211_ATTR_NAN_MASTER_PREF] = NLA_POLICY_MIN(NLA_U8, 1),
[NL80211_ATTR_BANDS] = { .type = NLA_U32 },
[NL80211_ATTR_NAN_FUNC] = { .type = NLA_NESTED },
[NL80211_ATTR_FILS_KEK] = { .type = NLA_BINARY,
.len = FILS_MAX_KEK_LEN },
- [NL80211_ATTR_FILS_NONCES] = {
- .type = NLA_EXACT_LEN_WARN,
- .len = 2 * FILS_NONCE_LEN
- },
+ [NL80211_ATTR_FILS_NONCES] = NLA_POLICY_EXACT_LEN_WARN(2 * FILS_NONCE_LEN),
[NL80211_ATTR_MULTICAST_TO_UNICAST_ENABLED] = { .type = NLA_FLAG, },
- [NL80211_ATTR_BSSID] = { .type = NLA_EXACT_LEN_WARN, .len = ETH_ALEN },
+ [NL80211_ATTR_BSSID] = NLA_POLICY_EXACT_LEN_WARN(ETH_ALEN),
[NL80211_ATTR_SCHED_SCAN_RELATIVE_RSSI] = { .type = NLA_S8 },
[NL80211_ATTR_SCHED_SCAN_RSSI_ADJUST] = {
.len = sizeof(struct nl80211_bss_select_rssi_adjust)
@@ -633,7 +607,7 @@ const struct nla_policy nl80211_policy[NUM_NL80211_ATTR] = {
[NL80211_ATTR_FILS_ERP_NEXT_SEQ_NUM] = { .type = NLA_U16 },
[NL80211_ATTR_FILS_ERP_RRK] = { .type = NLA_BINARY,
.len = FILS_ERP_MAX_RRK_LEN },
- [NL80211_ATTR_FILS_CACHE_ID] = { .type = NLA_EXACT_LEN_WARN, .len = 2 },
+ [NL80211_ATTR_FILS_CACHE_ID] = NLA_POLICY_EXACT_LEN_WARN(2),
[NL80211_ATTR_PMK] = { .type = NLA_BINARY, .len = PMK_MAX_LEN },
[NL80211_ATTR_SCHED_SCAN_MULTI] = { .type = NLA_FLAG },
[NL80211_ATTR_EXTERNAL_AUTH_SUPPORT] = { .type = NLA_FLAG },
@@ -703,10 +677,7 @@ static const struct nla_policy
nl80211_wowlan_tcp_policy[NUM_NL80211_WOWLAN_TCP] = {
[NL80211_WOWLAN_TCP_SRC_IPV4] = { .type = NLA_U32 },
[NL80211_WOWLAN_TCP_DST_IPV4] = { .type = NLA_U32 },
- [NL80211_WOWLAN_TCP_DST_MAC] = {
- .type = NLA_EXACT_LEN_WARN,
- .len = ETH_ALEN
- },
+ [NL80211_WOWLAN_TCP_DST_MAC] = NLA_POLICY_EXACT_LEN_WARN(ETH_ALEN),
[NL80211_WOWLAN_TCP_SRC_PORT] = { .type = NLA_U16 },
[NL80211_WOWLAN_TCP_DST_PORT] = { .type = NLA_U16 },
[NL80211_WOWLAN_TCP_DATA_PAYLOAD] = { .type = NLA_MIN_LEN, .len = 1 },
@@ -736,18 +707,9 @@ nl80211_coalesce_policy[NUM_NL80211_ATTR_COALESCE_RULE] = {
/* policy for GTK rekey offload attributes */
static const struct nla_policy
nl80211_rekey_policy[NUM_NL80211_REKEY_DATA] = {
- [NL80211_REKEY_DATA_KEK] = {
- .type = NLA_EXACT_LEN_WARN,
- .len = NL80211_KEK_LEN,
- },
- [NL80211_REKEY_DATA_KCK] = {
- .type = NLA_EXACT_LEN_WARN,
- .len = NL80211_KCK_LEN,
- },
- [NL80211_REKEY_DATA_REPLAY_CTR] = {
- .type = NLA_EXACT_LEN_WARN,
- .len = NL80211_REPLAY_CTR_LEN
- },
+ [NL80211_REKEY_DATA_KEK] = NLA_POLICY_EXACT_LEN_WARN(NL80211_KEK_LEN),
+ [NL80211_REKEY_DATA_KCK] = NLA_POLICY_EXACT_LEN_WARN(NL80211_KCK_LEN),
+ [NL80211_REKEY_DATA_REPLAY_CTR] = NLA_POLICY_EXACT_LEN_WARN(NL80211_REPLAY_CTR_LEN),
};
static const struct nla_policy
@@ -762,10 +724,7 @@ static const struct nla_policy
nl80211_match_policy[NL80211_SCHED_SCAN_MATCH_ATTR_MAX + 1] = {
[NL80211_SCHED_SCAN_MATCH_ATTR_SSID] = { .type = NLA_BINARY,
.len = IEEE80211_MAX_SSID_LEN },
- [NL80211_SCHED_SCAN_MATCH_ATTR_BSSID] = {
- .type = NLA_EXACT_LEN_WARN,
- .len = ETH_ALEN
- },
+ [NL80211_SCHED_SCAN_MATCH_ATTR_BSSID] = NLA_POLICY_EXACT_LEN_WARN(ETH_ALEN),
[NL80211_SCHED_SCAN_MATCH_ATTR_RSSI] = { .type = NLA_U32 },
[NL80211_SCHED_SCAN_MATCH_PER_BAND_RSSI] =
NLA_POLICY_NESTED(nl80211_match_band_rssi_policy),
@@ -797,10 +756,7 @@ nl80211_nan_func_policy[NL80211_NAN_FUNC_ATTR_MAX + 1] = {
[NL80211_NAN_FUNC_SUBSCRIBE_ACTIVE] = { .type = NLA_FLAG },
[NL80211_NAN_FUNC_FOLLOW_UP_ID] = { .type = NLA_U8 },
[NL80211_NAN_FUNC_FOLLOW_UP_REQ_ID] = { .type = NLA_U8 },
- [NL80211_NAN_FUNC_FOLLOW_UP_DEST] = {
- .type = NLA_EXACT_LEN_WARN,
- .len = ETH_ALEN
- },
+ [NL80211_NAN_FUNC_FOLLOW_UP_DEST] = NLA_POLICY_EXACT_LEN_WARN(ETH_ALEN),
[NL80211_NAN_FUNC_CLOSE_RANGE] = { .type = NLA_FLAG },
[NL80211_NAN_FUNC_TTL] = { .type = NLA_U32 },
[NL80211_NAN_FUNC_SERVICE_INFO] = { .type = NLA_BINARY,
@@ -4406,10 +4362,7 @@ static const struct nla_policy nl80211_txattr_policy[NL80211_TXRATE_MAX + 1] = {
.len = NL80211_MAX_SUPP_RATES },
[NL80211_TXRATE_HT] = { .type = NLA_BINARY,
.len = NL80211_MAX_SUPP_HT_RATES },
- [NL80211_TXRATE_VHT] = {
- .type = NLA_EXACT_LEN_WARN,
- .len = sizeof(struct nl80211_txrate_vht),
- },
+ [NL80211_TXRATE_VHT] = NLA_POLICY_EXACT_LEN_WARN(sizeof(struct nl80211_txrate_vht)),
[NL80211_TXRATE_GI] = { .type = NLA_U8 },
};
diff --git a/net/wireless/nl80211.h b/net/wireless/nl80211.h
index a41e94a49a89..d3e8e426c486 100644
--- a/net/wireless/nl80211.h
+++ b/net/wireless/nl80211.h
@@ -11,8 +11,6 @@
int nl80211_init(void);
void nl80211_exit(void);
-extern const struct nla_policy nl80211_policy[NUM_NL80211_ATTR];
-
void *nl80211hdr_put(struct sk_buff *skb, u32 portid, u32 seq,
int flags, u8 cmd);
bool nl80211_put_sta_rate(struct sk_buff *msg, struct rate_info *info,
diff --git a/net/wireless/pmsr.c b/net/wireless/pmsr.c
index 63dc8023447f..a95c79d18349 100644
--- a/net/wireless/pmsr.c
+++ b/net/wireless/pmsr.c
@@ -187,10 +187,9 @@ static int pmsr_parse_peer(struct cfg80211_registered_device *rdev,
/* reuse info->attrs */
memset(info->attrs, 0, sizeof(*info->attrs) * (NL80211_ATTR_MAX + 1));
- /* need to validate here, we don't want to have validation recursion */
err = nla_parse_nested_deprecated(info->attrs, NL80211_ATTR_MAX,
tb[NL80211_PMSR_PEER_ATTR_CHAN],
- nl80211_policy, info->extack);
+ NULL, info->extack);
if (err)
return err;
diff --git a/net/wireless/radiotap.c b/net/wireless/radiotap.c
index 6582d155e2fc..d5e28239e030 100644
--- a/net/wireless/radiotap.c
+++ b/net/wireless/radiotap.c
@@ -90,7 +90,7 @@ static const struct ieee80211_radiotap_namespace radiotap_ns = {
* iterator.this_arg for type "type" safely on all arches.
*
* Example code:
- * See Documentation/networking/radiotap-headers.txt
+ * See Documentation/networking/radiotap-headers.rst
*/
int ieee80211_radiotap_iterator_init(
diff --git a/net/x25/Kconfig b/net/x25/Kconfig
index 2ecb2e5e241e..9f0d58b0b90b 100644
--- a/net/x25/Kconfig
+++ b/net/x25/Kconfig
@@ -20,8 +20,8 @@ config X25
You can read more about X.25 at <http://www.sangoma.com/tutorials/x25/> and
<http://docwiki.cisco.com/wiki/X.25>.
Information about X.25 for Linux is contained in the files
- <file:Documentation/networking/x25.txt> and
- <file:Documentation/networking/x25-iface.txt>.
+ <file:Documentation/networking/x25.rst> and
+ <file:Documentation/networking/x25-iface.rst>.
One connects to an X.25 network either with a dedicated network card
using the X.21 protocol (not yet supported by Linux) or one can do