diff options
Diffstat (limited to 'net/tipc')
-rw-r--r-- | net/tipc/Makefile | 2 | ||||
-rw-r--r-- | net/tipc/addr.h | 6 | ||||
-rw-r--r-- | net/tipc/bearer.c | 147 | ||||
-rw-r--r-- | net/tipc/bearer.h | 19 | ||||
-rw-r--r-- | net/tipc/core.c | 10 | ||||
-rw-r--r-- | net/tipc/core.h | 18 | ||||
-rw-r--r-- | net/tipc/discover.c | 12 | ||||
-rw-r--r-- | net/tipc/discover.h | 2 | ||||
-rw-r--r-- | net/tipc/link.c | 105 | ||||
-rw-r--r-- | net/tipc/link.h | 2 | ||||
-rw-r--r-- | net/tipc/monitor.c | 803 | ||||
-rw-r--r-- | net/tipc/monitor.h | 82 | ||||
-rw-r--r-- | net/tipc/msg.c | 6 | ||||
-rw-r--r-- | net/tipc/msg.h | 40 | ||||
-rw-r--r-- | net/tipc/name_distr.c | 35 | ||||
-rw-r--r-- | net/tipc/netlink.c | 27 | ||||
-rw-r--r-- | net/tipc/netlink.h | 1 | ||||
-rw-r--r-- | net/tipc/netlink_compat.c | 116 | ||||
-rw-r--r-- | net/tipc/node.c | 260 | ||||
-rw-r--r-- | net/tipc/node.h | 11 | ||||
-rw-r--r-- | net/tipc/server.c | 30 | ||||
-rw-r--r-- | net/tipc/server.h | 4 | ||||
-rw-r--r-- | net/tipc/socket.c | 203 | ||||
-rw-r--r-- | net/tipc/socket.h | 17 | ||||
-rw-r--r-- | net/tipc/subscr.c | 7 | ||||
-rw-r--r-- | net/tipc/udp_media.c | 24 |
26 files changed, 1702 insertions, 287 deletions
diff --git a/net/tipc/Makefile b/net/tipc/Makefile index 57e460be4692..31b9f9c52974 100644 --- a/net/tipc/Makefile +++ b/net/tipc/Makefile @@ -6,7 +6,7 @@ obj-$(CONFIG_TIPC) := tipc.o tipc-y += addr.o bcast.o bearer.o \ core.o link.o discover.o msg.o \ - name_distr.o subscr.o name_table.o net.o \ + name_distr.o subscr.o monitor.o name_table.o net.o \ netlink.o netlink_compat.o node.o socket.o eth_media.o \ server.o socket.o diff --git a/net/tipc/addr.h b/net/tipc/addr.h index 93f7c983be33..bebb347803ce 100644 --- a/net/tipc/addr.h +++ b/net/tipc/addr.h @@ -43,9 +43,6 @@ #include <net/netns/generic.h> #include "core.h" -#define TIPC_ZONE_MASK 0xff000000u -#define TIPC_CLUSTER_MASK 0xfffff000u - static inline u32 tipc_own_addr(struct net *net) { struct tipc_net *tn = net_generic(net, tipc_net_id); @@ -60,7 +57,7 @@ static inline u32 tipc_zone_mask(u32 addr) static inline u32 tipc_cluster_mask(u32 addr) { - return addr & TIPC_CLUSTER_MASK; + return addr & TIPC_ZONE_CLUSTER_MASK; } u32 tipc_own_addr(struct net *net); @@ -73,4 +70,5 @@ int tipc_addr_node_valid(u32 addr); int tipc_in_scope(u32 domain, u32 addr); int tipc_addr_scope(u32 domain); char *tipc_addr_string_fill(char *string, u32 addr); + #endif diff --git a/net/tipc/bearer.c b/net/tipc/bearer.c index 27a5406213c6..65b1bbf133bd 100644 --- a/net/tipc/bearer.c +++ b/net/tipc/bearer.c @@ -1,7 +1,7 @@ /* * net/tipc/bearer.c: TIPC bearer code * - * Copyright (c) 1996-2006, 2013-2014, Ericsson AB + * Copyright (c) 1996-2006, 2013-2016, Ericsson AB * Copyright (c) 2004-2006, 2010-2013, Wind River Systems * All rights reserved. * @@ -39,6 +39,7 @@ #include "bearer.h" #include "link.h" #include "discover.h" +#include "monitor.h" #include "bcast.h" #include "netlink.h" @@ -170,6 +171,27 @@ struct tipc_bearer *tipc_bearer_find(struct net *net, const char *name) return NULL; } +/* tipc_bearer_get_name - get the bearer name from its id. + * @net: network namespace + * @name: a pointer to the buffer where the name will be stored. + * @bearer_id: the id to get the name from. + */ +int tipc_bearer_get_name(struct net *net, char *name, u32 bearer_id) +{ + struct tipc_net *tn = tipc_net(net); + struct tipc_bearer *b; + + if (bearer_id >= MAX_BEARERS) + return -EINVAL; + + b = rtnl_dereference(tn->bearer_list[bearer_id]); + if (!b) + return -EINVAL; + + strcpy(name, b->name); + return 0; +} + void tipc_bearer_add_dest(struct net *net, u32 bearer_id, u32 dest) { struct tipc_net *tn = net_generic(net, tipc_net_id); @@ -205,6 +227,7 @@ static int tipc_enable_bearer(struct net *net, const char *name, struct tipc_bearer *b; struct tipc_media *m; struct tipc_bearer_names b_names; + struct sk_buff *skb; char addr_string[16]; u32 bearer_id; u32 with_this_prio; @@ -223,7 +246,7 @@ static int tipc_enable_bearer(struct net *net, const char *name, if (tipc_addr_domain_valid(disc_domain) && (disc_domain != tn->own_addr)) { if (tipc_in_scope(disc_domain, tn->own_addr)) { - disc_domain = tn->own_addr & TIPC_CLUSTER_MASK; + disc_domain = tn->own_addr & TIPC_ZONE_CLUSTER_MASK; res = 0; /* accept any node in own cluster */ } else if (in_own_cluster_exact(net, disc_domain)) res = 0; /* accept specified node in own cluster */ @@ -301,7 +324,7 @@ restart: b->net_plane = bearer_id + 'A'; b->priority = priority; - res = tipc_disc_create(net, b, &b->bcast_addr); + res = tipc_disc_create(net, b, &b->bcast_addr, &skb); if (res) { bearer_disable(net, b); pr_warn("Bearer <%s> rejected, discovery object creation failed\n", @@ -310,6 +333,11 @@ restart: } rcu_assign_pointer(tn->bearer_list[bearer_id], b); + if (skb) + tipc_bearer_xmit_skb(net, bearer_id, skb, &b->bcast_addr); + + if (tipc_mon_create(net, bearer_id)) + return -ENOMEM; pr_info("Enabled bearer <%s>, discovery domain %s, priority %u\n", name, @@ -328,6 +356,21 @@ static int tipc_reset_bearer(struct net *net, struct tipc_bearer *b) return 0; } +/* tipc_bearer_reset_all - reset all links on all bearers + */ +void tipc_bearer_reset_all(struct net *net) +{ + struct tipc_net *tn = tipc_net(net); + struct tipc_bearer *b; + int i; + + for (i = 0; i < MAX_BEARERS; i++) { + b = rcu_dereference_rtnl(tn->bearer_list[i]); + if (b) + tipc_reset_bearer(net, b); + } +} + /** * bearer_disable * @@ -335,24 +378,18 @@ static int tipc_reset_bearer(struct net *net, struct tipc_bearer *b) */ static void bearer_disable(struct net *net, struct tipc_bearer *b) { - struct tipc_net *tn = net_generic(net, tipc_net_id); - u32 i; + struct tipc_net *tn = tipc_net(net); + int bearer_id = b->identity; pr_info("Disabling bearer <%s>\n", b->name); b->media->disable_media(b); - - tipc_node_delete_links(net, b->identity); + tipc_node_delete_links(net, bearer_id); RCU_INIT_POINTER(b->media_ptr, NULL); if (b->link_req) tipc_disc_delete(b->link_req); - - for (i = 0; i < MAX_BEARERS; i++) { - if (b == rtnl_dereference(tn->bearer_list[i])) { - RCU_INIT_POINTER(tn->bearer_list[i], NULL); - break; - } - } + RCU_INIT_POINTER(tn->bearer_list[bearer_id], NULL); kfree_rcu(b, rcu); + tipc_mon_delete(net, bearer_id); } int tipc_enable_l2_media(struct net *net, struct tipc_bearer *b, @@ -394,7 +431,7 @@ void tipc_disable_l2_media(struct tipc_bearer *b) /** * tipc_l2_send_msg - send a TIPC packet out over an L2 interface - * @buf: the packet to be sent + * @skb: the packet to be sent * @b: the bearer through which the packet is to be sent * @dest: peer destination address */ @@ -403,17 +440,21 @@ int tipc_l2_send_msg(struct net *net, struct sk_buff *skb, { struct net_device *dev; int delta; + void *tipc_ptr; dev = (struct net_device *)rcu_dereference_rtnl(b->media_ptr); if (!dev) return 0; + /* Send RESET message even if bearer is detached from device */ + tipc_ptr = rcu_dereference_rtnl(dev->tipc_ptr); + if (unlikely(!tipc_ptr && !msg_is_reset(buf_msg(skb)))) + goto drop; + delta = dev->hard_header_len - skb_headroom(skb); if ((delta > 0) && - pskb_expand_head(skb, SKB_DATA_ALIGN(delta), 0, GFP_ATOMIC)) { - kfree_skb(skb); - return 0; - } + pskb_expand_head(skb, SKB_DATA_ALIGN(delta), 0, GFP_ATOMIC)) + goto drop; skb_reset_network_header(skb); skb->dev = dev; @@ -422,6 +463,9 @@ int tipc_l2_send_msg(struct net *net, struct sk_buff *skb, dev->dev_addr, skb->len); dev_queue_xmit(skb); return 0; +drop: + kfree_skb(skb); + return 0; } int tipc_bearer_mtu(struct net *net, u32 bearer_id) @@ -450,6 +494,8 @@ void tipc_bearer_xmit_skb(struct net *net, u32 bearer_id, b = rcu_dereference_rtnl(tn->bearer_list[bearer_id]); if (likely(b)) b->media->send_msg(net, skb, b, dest); + else + kfree_skb(skb); rcu_read_unlock(); } @@ -468,11 +514,11 @@ void tipc_bearer_xmit(struct net *net, u32 bearer_id, rcu_read_lock(); b = rcu_dereference_rtnl(tn->bearer_list[bearer_id]); - if (likely(b)) { - skb_queue_walk_safe(xmitq, skb, tmp) { - __skb_dequeue(xmitq); - b->media->send_msg(net, skb, b, dst); - } + if (unlikely(!b)) + __skb_queue_purge(xmitq); + skb_queue_walk_safe(xmitq, skb, tmp) { + __skb_dequeue(xmitq); + b->media->send_msg(net, skb, b, dst); } rcu_read_unlock(); } @@ -490,14 +536,14 @@ void tipc_bearer_bc_xmit(struct net *net, u32 bearer_id, rcu_read_lock(); b = rcu_dereference_rtnl(tn->bearer_list[bearer_id]); - if (likely(b)) { - skb_queue_walk_safe(xmitq, skb, tmp) { - hdr = buf_msg(skb); - msg_set_non_seq(hdr, 1); - msg_set_mc_netid(hdr, net_id); - __skb_dequeue(xmitq); - b->media->send_msg(net, skb, b, &b->bcast_addr); - } + if (unlikely(!b)) + __skb_queue_purge(xmitq); + skb_queue_walk_safe(xmitq, skb, tmp) { + hdr = buf_msg(skb); + msg_set_non_seq(hdr, 1); + msg_set_mc_netid(hdr, net_id); + __skb_dequeue(xmitq); + b->media->send_msg(net, skb, b, &b->bcast_addr); } rcu_read_unlock(); } @@ -513,24 +559,21 @@ void tipc_bearer_bc_xmit(struct net *net, u32 bearer_id, * ignores packets sent using interface multicast, and traffic sent to other * nodes (which can happen if interface is running in promiscuous mode). */ -static int tipc_l2_rcv_msg(struct sk_buff *buf, struct net_device *dev, +static int tipc_l2_rcv_msg(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, struct net_device *orig_dev) { struct tipc_bearer *b; rcu_read_lock(); b = rcu_dereference_rtnl(dev->tipc_ptr); - if (likely(b)) { - if (likely(buf->pkt_type <= PACKET_BROADCAST)) { - buf->next = NULL; - tipc_rcv(dev_net(dev), buf, b); - rcu_read_unlock(); - return NET_RX_SUCCESS; - } + if (likely(b && (skb->pkt_type <= PACKET_BROADCAST))) { + skb->next = NULL; + tipc_rcv(dev_net(dev), skb, b); + rcu_read_unlock(); + return NET_RX_SUCCESS; } rcu_read_unlock(); - - kfree_skb(buf); + kfree_skb(skb); return NET_RX_DROP; } @@ -548,9 +591,18 @@ static int tipc_l2_device_event(struct notifier_block *nb, unsigned long evt, { struct net_device *dev = netdev_notifier_info_to_dev(ptr); struct net *net = dev_net(dev); + struct tipc_net *tn = tipc_net(net); struct tipc_bearer *b; + int i; b = rtnl_dereference(dev->tipc_ptr); + if (!b) { + for (i = 0; i < MAX_BEARERS; b = NULL, i++) { + b = rtnl_dereference(tn->bearer_list[i]); + if (b && (b->media_ptr == dev)) + break; + } + } if (!b) return NOTIFY_DONE; @@ -560,13 +612,20 @@ static int tipc_l2_device_event(struct notifier_block *nb, unsigned long evt, case NETDEV_CHANGE: if (netif_carrier_ok(dev)) break; + case NETDEV_UP: + rcu_assign_pointer(dev->tipc_ptr, b); + break; case NETDEV_GOING_DOWN: + RCU_INIT_POINTER(dev->tipc_ptr, NULL); + synchronize_net(); + tipc_reset_bearer(net, b); + break; case NETDEV_CHANGEMTU: tipc_reset_bearer(net, b); break; case NETDEV_CHANGEADDR: b->media->raw2addr(b, &b->addr, - (char *)dev->dev_addr); + (char *)dev->dev_addr); tipc_reset_bearer(net, b); break; case NETDEV_UNREGISTER: @@ -794,7 +853,7 @@ int tipc_nl_bearer_enable(struct sk_buff *skb, struct genl_info *info) u32 prio; prio = TIPC_MEDIA_LINK_PRI; - domain = tn->own_addr & TIPC_CLUSTER_MASK; + domain = tn->own_addr & TIPC_ZONE_CLUSTER_MASK; if (!info->attrs[TIPC_NLA_BEARER]) return -EINVAL; diff --git a/net/tipc/bearer.h b/net/tipc/bearer.h index e31820516774..43757f1f9cb3 100644 --- a/net/tipc/bearer.h +++ b/net/tipc/bearer.h @@ -1,7 +1,7 @@ /* * net/tipc/bearer.h: Include file for TIPC bearer code * - * Copyright (c) 1996-2006, 2013-2014, Ericsson AB + * Copyright (c) 1996-2006, 2013-2016, Ericsson AB * Copyright (c) 2005, 2010-2011, Wind River Systems * All rights reserved. * @@ -42,8 +42,6 @@ #include <net/genetlink.h> #define MAX_MEDIA 3 -#define MAX_NODES 4096 -#define WSIZE 32 /* Identifiers associated with TIPC message header media address info * - address info field is 32 bytes long @@ -62,16 +60,6 @@ #define TIPC_MEDIA_TYPE_UDP 3 /** - * struct tipc_node_map - set of node identifiers - * @count: # of nodes in set - * @map: bitmap of node identifiers that are in the set - */ -struct tipc_node_map { - u32 count; - u32 map[MAX_NODES / WSIZE]; -}; - -/** * struct tipc_media_addr - destination address used by TIPC bearers * @value: address info (format defined by media) * @media_id: TIPC media type identifier @@ -142,7 +130,6 @@ struct tipc_media { * @identity: array index of this bearer within TIPC bearer array * @link_req: ptr to (optional) structure making periodic link setup requests * @net_plane: network plane ('A' through 'H') currently associated with bearer - * @nodes: indicates which nodes in cluster can be reached through bearer * * Note: media-specific code is responsible for initialization of the fields * indicated below when a bearer is enabled; TIPC's generic bearer code takes @@ -163,8 +150,6 @@ struct tipc_bearer { u32 identity; struct tipc_link_req *link_req; char net_plane; - int node_cnt; - struct tipc_node_map nodes; }; struct tipc_bearer_names { @@ -212,7 +197,9 @@ int tipc_l2_send_msg(struct net *net, struct sk_buff *buf, void tipc_bearer_add_dest(struct net *net, u32 bearer_id, u32 dest); void tipc_bearer_remove_dest(struct net *net, u32 bearer_id, u32 dest); struct tipc_bearer *tipc_bearer_find(struct net *net, const char *name); +int tipc_bearer_get_name(struct net *net, char *name, u32 bearer_id); struct tipc_media *tipc_media_find(const char *name); +void tipc_bearer_reset_all(struct net *net); int tipc_bearer_setup(void); void tipc_bearer_cleanup(void); void tipc_bearer_stop(struct net *net); diff --git a/net/tipc/core.c b/net/tipc/core.c index 03a842870c52..236b043a4156 100644 --- a/net/tipc/core.c +++ b/net/tipc/core.c @@ -57,6 +57,7 @@ static int __net_init tipc_init_net(struct net *net) tn->net_id = 4711; tn->own_addr = 0; + tn->mon_threshold = TIPC_DEF_MON_THRESHOLD; get_random_bytes(&tn->random, sizeof(int)); INIT_LIST_HEAD(&tn->node_list); spin_lock_init(&tn->node_list_lock); @@ -69,6 +70,7 @@ static int __net_init tipc_init_net(struct net *net) if (err) goto out_nametbl; + INIT_LIST_HEAD(&tn->dist_queue); err = tipc_topsrv_start(net); if (err) goto out_subscr; @@ -111,11 +113,9 @@ static int __init tipc_init(void) pr_info("Activated (version " TIPC_MOD_VER ")\n"); - sysctl_tipc_rmem[0] = TIPC_CONN_OVERLOAD_LIMIT >> 4 << - TIPC_LOW_IMPORTANCE; - sysctl_tipc_rmem[1] = TIPC_CONN_OVERLOAD_LIMIT >> 4 << - TIPC_CRITICAL_IMPORTANCE; - sysctl_tipc_rmem[2] = TIPC_CONN_OVERLOAD_LIMIT; + sysctl_tipc_rmem[0] = RCVBUF_MIN; + sysctl_tipc_rmem[1] = RCVBUF_DEF; + sysctl_tipc_rmem[2] = RCVBUF_MAX; err = tipc_netlink_start(); if (err) diff --git a/net/tipc/core.h b/net/tipc/core.h index 5504d63503df..a1845fb27d80 100644 --- a/net/tipc/core.h +++ b/net/tipc/core.h @@ -66,11 +66,13 @@ struct tipc_bc_base; struct tipc_link; struct tipc_name_table; struct tipc_server; +struct tipc_monitor; #define TIPC_MOD_VER "2.0.0" -#define NODE_HTABLE_SIZE 512 -#define MAX_BEARERS 3 +#define NODE_HTABLE_SIZE 512 +#define MAX_BEARERS 3 +#define TIPC_DEF_MON_THRESHOLD 32 extern int tipc_net_id __read_mostly; extern int sysctl_tipc_rmem[3] __read_mostly; @@ -88,6 +90,10 @@ struct tipc_net { u32 num_nodes; u32 num_links; + /* Neighbor monitoring list */ + struct tipc_monitor *monitors[MAX_BEARERS]; + int mon_threshold; + /* Bearer list */ struct tipc_bearer __rcu *bearer_list[MAX_BEARERS + 1]; @@ -103,6 +109,9 @@ struct tipc_net { spinlock_t nametbl_lock; struct name_table *nametbl; + /* Name dist queue */ + struct list_head dist_queue; + /* Topology subscription server */ struct tipc_server *topsrv; atomic_t subscription_count; @@ -123,6 +132,11 @@ static inline struct list_head *tipc_nodes(struct net *net) return &tipc_net(net)->node_list; } +static inline unsigned int tipc_hashfn(u32 addr) +{ + return addr & (NODE_HTABLE_SIZE - 1); +} + static inline u16 mod(u16 x) { return x & 0xffffu; diff --git a/net/tipc/discover.c b/net/tipc/discover.c index f1e738e80535..6b109a808d4c 100644 --- a/net/tipc/discover.c +++ b/net/tipc/discover.c @@ -135,9 +135,12 @@ void tipc_disc_rcv(struct net *net, struct sk_buff *skb, u16 caps = msg_node_capabilities(hdr); bool respond = false; bool dupl_addr = false; + int err; - bearer->media->msg2addr(bearer, &maddr, msg_media_addr(hdr)); + err = bearer->media->msg2addr(bearer, &maddr, msg_media_addr(hdr)); kfree_skb(skb); + if (err) + return; /* Ensure message from node is valid and communication is permitted */ if (net_id != tn->net_id) @@ -268,10 +271,9 @@ exit: * Returns 0 if successful, otherwise -errno. */ int tipc_disc_create(struct net *net, struct tipc_bearer *b, - struct tipc_media_addr *dest) + struct tipc_media_addr *dest, struct sk_buff **skb) { struct tipc_link_req *req; - struct sk_buff *skb; req = kmalloc(sizeof(*req), GFP_ATOMIC); if (!req) @@ -293,9 +295,7 @@ int tipc_disc_create(struct net *net, struct tipc_bearer *b, setup_timer(&req->timer, disc_timeout, (unsigned long)req); mod_timer(&req->timer, jiffies + req->timer_intv); b->link_req = req; - skb = skb_clone(req->buf, GFP_ATOMIC); - if (skb) - tipc_bearer_xmit_skb(net, req->bearer_id, skb, &req->dest); + *skb = skb_clone(req->buf, GFP_ATOMIC); return 0; } diff --git a/net/tipc/discover.h b/net/tipc/discover.h index c9b12770c5ed..b80a335389c0 100644 --- a/net/tipc/discover.h +++ b/net/tipc/discover.h @@ -40,7 +40,7 @@ struct tipc_link_req; int tipc_disc_create(struct net *net, struct tipc_bearer *b_ptr, - struct tipc_media_addr *dest); + struct tipc_media_addr *dest, struct sk_buff **skb); void tipc_disc_delete(struct tipc_link_req *req); void tipc_disc_reset(struct net *net, struct tipc_bearer *b_ptr); void tipc_disc_add_dest(struct tipc_link_req *req); diff --git a/net/tipc/link.c b/net/tipc/link.c index 7d2bb3e70baa..877d94f34814 100644 --- a/net/tipc/link.c +++ b/net/tipc/link.c @@ -42,6 +42,7 @@ #include "name_distr.h" #include "discover.h" #include "netlink.h" +#include "monitor.h" #include <linux/pkt_sched.h> @@ -87,7 +88,6 @@ struct tipc_stats { * @peer_bearer_id: bearer id used by link's peer endpoint * @bearer_id: local bearer id used by link * @tolerance: minimum link continuity loss needed to reset link [in ms] - * @keepalive_intv: link keepalive timer interval * @abort_limit: # of unacknowledged continuity probes needed to reset link * @state: current state of link FSM * @peer_caps: bitmap describing capabilities of peer node @@ -96,6 +96,7 @@ struct tipc_stats { * @pmsg: convenience pointer to "proto_msg" field * @priority: current link priority * @net_plane: current link network plane ('A' through 'H') + * @mon_state: cookie with information needed by link monitor * @backlog_limit: backlog queue congestion thresholds (indexed by importance) * @exp_msg_count: # of tunnelled messages expected during link changeover * @reset_rcv_checkpt: seq # of last acknowledged message at time of link reset @@ -131,7 +132,6 @@ struct tipc_link { u32 peer_bearer_id; u32 bearer_id; u32 tolerance; - unsigned long keepalive_intv; u32 abort_limit; u32 state; u16 peer_caps; @@ -140,6 +140,8 @@ struct tipc_link { char if_name[TIPC_MAX_IF_NAME]; u32 priority; char net_plane; + struct tipc_mon_state mon_state; + u16 rst_cnt; /* Failover/synch */ u16 drop_point; @@ -348,6 +350,8 @@ void tipc_link_remove_bc_peer(struct tipc_link *snd_l, u16 ack = snd_l->snd_nxt - 1; snd_l->ackers--; + rcv_l->bc_peer_is_up = true; + rcv_l->state = LINK_ESTABLISHED; tipc_link_bc_ack_rcv(rcv_l, ack, xmitq); tipc_link_reset(rcv_l); rcv_l->state = LINK_RESET; @@ -701,40 +705,42 @@ static void link_profile_stats(struct tipc_link *l) /* tipc_link_timeout - perform periodic task as instructed from node timeout */ -/* tipc_link_timeout - perform periodic task as instructed from node timeout - */ int tipc_link_timeout(struct tipc_link *l, struct sk_buff_head *xmitq) { + int mtyp = 0; int rc = 0; - int mtyp = STATE_MSG; - bool xmit = false; - bool prb = false; + bool state = false; + bool probe = false; + bool setup = false; u16 bc_snt = l->bc_sndlink->snd_nxt - 1; u16 bc_acked = l->bc_rcvlink->acked; - bool bc_up = link_is_up(l->bc_rcvlink); - - link_profile_stats(l); + struct tipc_mon_state *mstate = &l->mon_state; switch (l->state) { case LINK_ESTABLISHED: case LINK_SYNCHING: - if (!l->silent_intv_cnt) { - if (bc_up && (bc_acked != bc_snt)) - xmit = true; - } else if (l->silent_intv_cnt <= l->abort_limit) { - xmit = true; - prb = true; - } else { - rc |= tipc_link_fsm_evt(l, LINK_FAILURE_EVT); - } - l->silent_intv_cnt++; + mtyp = STATE_MSG; + link_profile_stats(l); + tipc_mon_get_state(l->net, l->addr, mstate, l->bearer_id); + if (mstate->reset || (l->silent_intv_cnt > l->abort_limit)) + return tipc_link_fsm_evt(l, LINK_FAILURE_EVT); + state = bc_acked != bc_snt; + state |= l->bc_rcvlink->rcv_unacked; + state |= l->rcv_unacked; + state |= !skb_queue_empty(&l->transmq); + state |= !skb_queue_empty(&l->deferdq); + probe = mstate->probing; + probe |= l->silent_intv_cnt; + if (probe || mstate->monitoring) + l->silent_intv_cnt++; break; case LINK_RESET: - xmit = true; + setup = l->rst_cnt++ <= 4; + setup |= !(l->rst_cnt % 16); mtyp = RESET_MSG; break; case LINK_ESTABLISHING: - xmit = true; + setup = true; mtyp = ACTIVATE_MSG; break; case LINK_PEER_RESET: @@ -745,8 +751,8 @@ int tipc_link_timeout(struct tipc_link *l, struct sk_buff_head *xmitq) break; } - if (xmit) - tipc_link_build_proto_msg(l, mtyp, prb, 0, 0, 0, xmitq); + if (state || probe || setup) + tipc_link_build_proto_msg(l, mtyp, probe, 0, 0, 0, xmitq); return rc; } @@ -833,9 +839,11 @@ void tipc_link_reset(struct tipc_link *l) l->rcv_nxt = 1; l->acked = 0; l->silent_intv_cnt = 0; + l->rst_cnt = 0; l->stats.recv_info = 0; l->stale_count = 0; l->bc_peer_is_up = false; + memset(&l->mon_state, 0, sizeof(l->mon_state)); tipc_link_reset_stats(l); } @@ -1110,12 +1118,12 @@ static bool tipc_link_release_pkts(struct tipc_link *l, u16 acked) return released; } -/* tipc_link_build_ack_msg: prepare link acknowledge message for transmission +/* tipc_link_build_state_msg: prepare link state message for transmission * * Note that sending of broadcast ack is coordinated among nodes, to reduce * risk of ack storms towards the sender */ -int tipc_link_build_ack_msg(struct tipc_link *l, struct sk_buff_head *xmitq) +int tipc_link_build_state_msg(struct tipc_link *l, struct sk_buff_head *xmitq) { if (!l) return 0; @@ -1140,11 +1148,17 @@ int tipc_link_build_ack_msg(struct tipc_link *l, struct sk_buff_head *xmitq) void tipc_link_build_reset_msg(struct tipc_link *l, struct sk_buff_head *xmitq) { int mtyp = RESET_MSG; + struct sk_buff *skb; if (l->state == LINK_ESTABLISHING) mtyp = ACTIVATE_MSG; tipc_link_build_proto_msg(l, mtyp, 0, 0, 0, 0, xmitq); + + /* Inform peer that this endpoint is going down if applicable */ + skb = skb_peek_tail(xmitq); + if (skb && (l->state == LINK_RESET)) + msg_set_peer_stopping(buf_msg(skb), 1); } /* tipc_link_build_nack_msg: prepare link nack message for transmission @@ -1219,7 +1233,7 @@ int tipc_link_rcv(struct tipc_link *l, struct sk_buff *skb, if (!tipc_data_input(l, skb, l->inputq)) rc |= tipc_link_input(l, skb, l->inputq); if (unlikely(++l->rcv_unacked >= TIPC_MIN_LINK_WIN)) - rc |= tipc_link_build_ack_msg(l, xmitq); + rc |= tipc_link_build_state_msg(l, xmitq); if (unlikely(rc & ~TIPC_LINK_SND_BC_ACK)) break; } while ((skb = __skb_dequeue(defq))); @@ -1238,6 +1252,9 @@ static void tipc_link_build_proto_msg(struct tipc_link *l, int mtyp, bool probe, struct tipc_msg *hdr; struct sk_buff_head *dfq = &l->deferdq; bool node_up = link_is_up(l->bc_rcvlink); + struct tipc_mon_state *mstate = &l->mon_state; + int dlen = 0; + void *data; /* Don't send protocol message during reset or link failover */ if (tipc_link_is_blocked(l)) @@ -1250,12 +1267,13 @@ static void tipc_link_build_proto_msg(struct tipc_link *l, int mtyp, bool probe, rcvgap = buf_seqno(skb_peek(dfq)) - l->rcv_nxt; skb = tipc_msg_create(LINK_PROTOCOL, mtyp, INT_H_SIZE, - TIPC_MAX_IF_NAME, l->addr, + tipc_max_domain_size, l->addr, tipc_own_addr(l->net), 0, 0, 0); if (!skb) return; hdr = buf_msg(skb); + data = msg_data(hdr); msg_set_session(hdr, l->session); msg_set_bearer_id(hdr, l->bearer_id); msg_set_net_plane(hdr, l->net_plane); @@ -1271,14 +1289,18 @@ static void tipc_link_build_proto_msg(struct tipc_link *l, int mtyp, bool probe, if (mtyp == STATE_MSG) { msg_set_seq_gap(hdr, rcvgap); - msg_set_size(hdr, INT_H_SIZE); msg_set_probe(hdr, probe); + tipc_mon_prep(l->net, data, &dlen, mstate, l->bearer_id); + msg_set_size(hdr, INT_H_SIZE + dlen); + skb_trim(skb, INT_H_SIZE + dlen); l->stats.sent_states++; l->rcv_unacked = 0; } else { /* RESET_MSG or ACTIVATE_MSG */ msg_set_max_pkt(hdr, l->advertised_mtu); - strcpy(msg_data(hdr), l->if_name); + strcpy(data, l->if_name); + msg_set_size(hdr, INT_H_SIZE + TIPC_MAX_IF_NAME); + skb_trim(skb, INT_H_SIZE + TIPC_MAX_IF_NAME); } if (probe) l->stats.sent_probes++; @@ -1371,7 +1393,9 @@ static int tipc_link_proto_rcv(struct tipc_link *l, struct sk_buff *skb, u16 peers_tol = msg_link_tolerance(hdr); u16 peers_prio = msg_linkprio(hdr); u16 rcv_nxt = l->rcv_nxt; + u16 dlen = msg_data_sz(hdr); int mtyp = msg_type(hdr); + void *data; char *if_name; int rc = 0; @@ -1381,6 +1405,10 @@ static int tipc_link_proto_rcv(struct tipc_link *l, struct sk_buff *skb, if (tipc_own_addr(l->net) > msg_prevnode(hdr)) l->net_plane = msg_net_plane(hdr); + skb_linearize(skb); + hdr = buf_msg(skb); + data = msg_data(hdr); + switch (mtyp) { case RESET_MSG: @@ -1391,8 +1419,6 @@ static int tipc_link_proto_rcv(struct tipc_link *l, struct sk_buff *skb, /* fall thru' */ case ACTIVATE_MSG: - skb_linearize(skb); - hdr = buf_msg(skb); /* Complete own link name with peer's interface name */ if_name = strrchr(l->name, ':') + 1; @@ -1400,7 +1426,7 @@ static int tipc_link_proto_rcv(struct tipc_link *l, struct sk_buff *skb, break; if (msg_data_sz(hdr) < TIPC_MAX_IF_NAME) break; - strncpy(if_name, msg_data(hdr), TIPC_MAX_IF_NAME); + strncpy(if_name, data, TIPC_MAX_IF_NAME); /* Update own tolerance if peer indicates a non-zero value */ if (in_range(peers_tol, TIPC_MIN_LINK_TOL, TIPC_MAX_LINK_TOL)) @@ -1411,7 +1437,9 @@ static int tipc_link_proto_rcv(struct tipc_link *l, struct sk_buff *skb, l->priority = peers_prio; /* ACTIVATE_MSG serves as PEER_RESET if link is already down */ - if ((mtyp == RESET_MSG) || !link_is_up(l)) + if (msg_peer_stopping(hdr)) + rc = tipc_link_fsm_evt(l, LINK_FAILURE_EVT); + else if ((mtyp == RESET_MSG) || !link_is_up(l)) rc = tipc_link_fsm_evt(l, LINK_PEER_RESET_EVT); /* ACTIVATE_MSG takes up link if it was already locally reset */ @@ -1446,6 +1474,8 @@ static int tipc_link_proto_rcv(struct tipc_link *l, struct sk_buff *skb, rc = TIPC_LINK_UP_EVT; break; } + tipc_mon_rcv(l->net, data, dlen, l->addr, + &l->mon_state, l->bearer_id); /* Send NACK if peer has sent pkts we haven't received yet */ if (more(peers_snd_nxt, rcv_nxt) && !tipc_link_is_synching(l)) @@ -1554,7 +1584,12 @@ void tipc_link_bc_sync_rcv(struct tipc_link *l, struct tipc_msg *hdr, if (!msg_peer_node_is_up(hdr)) return; - l->bc_peer_is_up = true; + /* Open when peer ackowledges our bcast init msg (pkt #1) */ + if (msg_ack(hdr)) + l->bc_peer_is_up = true; + + if (!l->bc_peer_is_up) + return; /* Ignore if peers_snd_nxt goes beyond receive window */ if (more(peers_snd_nxt, l->rcv_nxt + l->window)) diff --git a/net/tipc/link.h b/net/tipc/link.h index 6a94175ee20a..d7e9d42fcb2d 100644 --- a/net/tipc/link.h +++ b/net/tipc/link.h @@ -123,7 +123,7 @@ int tipc_nl_parse_link_prop(struct nlattr *prop, struct nlattr *props[]); int tipc_link_timeout(struct tipc_link *l, struct sk_buff_head *xmitq); int tipc_link_rcv(struct tipc_link *l, struct sk_buff *skb, struct sk_buff_head *xmitq); -int tipc_link_build_ack_msg(struct tipc_link *l, struct sk_buff_head *xmitq); +int tipc_link_build_state_msg(struct tipc_link *l, struct sk_buff_head *xmitq); void tipc_link_add_bc_peer(struct tipc_link *snd_l, struct tipc_link *uc_l, struct sk_buff_head *xmitq); diff --git a/net/tipc/monitor.c b/net/tipc/monitor.c new file mode 100644 index 000000000000..b62caa1c770c --- /dev/null +++ b/net/tipc/monitor.c @@ -0,0 +1,803 @@ +/* + * net/tipc/monitor.c + * + * Copyright (c) 2016, Ericsson AB + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the names of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * Alternatively, this software may be distributed under the terms of the + * GNU General Public License ("GPL") version 2 as published by the Free + * Software Foundation. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include <net/genetlink.h> +#include "core.h" +#include "addr.h" +#include "monitor.h" +#include "bearer.h" + +#define MAX_MON_DOMAIN 64 +#define MON_TIMEOUT 120000 +#define MAX_PEER_DOWN_EVENTS 4 + +/* struct tipc_mon_domain: domain record to be transferred between peers + * @len: actual size of domain record + * @gen: current generation of sender's domain + * @ack_gen: most recent generation of self's domain acked by peer + * @member_cnt: number of domain member nodes described in this record + * @up_map: bit map indicating which of the members the sender considers up + * @members: identity of the domain members + */ +struct tipc_mon_domain { + u16 len; + u16 gen; + u16 ack_gen; + u16 member_cnt; + u64 up_map; + u32 members[MAX_MON_DOMAIN]; +}; + +/* struct tipc_peer: state of a peer node and its domain + * @addr: tipc node identity of peer + * @head_map: shows which other nodes currently consider peer 'up' + * @domain: most recent domain record from peer + * @hash: position in hashed lookup list + * @list: position in linked list, in circular ascending order by 'addr' + * @applied: number of reported domain members applied on this monitor list + * @is_up: peer is up as seen from this node + * @is_head: peer is assigned domain head as seen from this node + * @is_local: peer is in local domain and should be continuously monitored + * @down_cnt: - numbers of other peers which have reported this on lost + */ +struct tipc_peer { + u32 addr; + struct tipc_mon_domain *domain; + struct hlist_node hash; + struct list_head list; + u8 applied; + u8 down_cnt; + bool is_up; + bool is_head; + bool is_local; +}; + +struct tipc_monitor { + struct hlist_head peers[NODE_HTABLE_SIZE]; + int peer_cnt; + struct tipc_peer *self; + rwlock_t lock; + struct tipc_mon_domain cache; + u16 list_gen; + u16 dom_gen; + struct net *net; + struct timer_list timer; + unsigned long timer_intv; +}; + +static struct tipc_monitor *tipc_monitor(struct net *net, int bearer_id) +{ + return tipc_net(net)->monitors[bearer_id]; +} + +const int tipc_max_domain_size = sizeof(struct tipc_mon_domain); + +/* dom_rec_len(): actual length of domain record for transport + */ +static int dom_rec_len(struct tipc_mon_domain *dom, u16 mcnt) +{ + return ((void *)&dom->members - (void *)dom) + (mcnt * sizeof(u32)); +} + +/* dom_size() : calculate size of own domain based on number of peers + */ +static int dom_size(int peers) +{ + int i = 0; + + while ((i * i) < peers) + i++; + return i < MAX_MON_DOMAIN ? i : MAX_MON_DOMAIN; +} + +static void map_set(u64 *up_map, int i, unsigned int v) +{ + *up_map &= ~(1ULL << i); + *up_map |= ((u64)v << i); +} + +static int map_get(u64 up_map, int i) +{ + return (up_map & (1 << i)) >> i; +} + +static struct tipc_peer *peer_prev(struct tipc_peer *peer) +{ + return list_last_entry(&peer->list, struct tipc_peer, list); +} + +static struct tipc_peer *peer_nxt(struct tipc_peer *peer) +{ + return list_first_entry(&peer->list, struct tipc_peer, list); +} + +static struct tipc_peer *peer_head(struct tipc_peer *peer) +{ + while (!peer->is_head) + peer = peer_prev(peer); + return peer; +} + +static struct tipc_peer *get_peer(struct tipc_monitor *mon, u32 addr) +{ + struct tipc_peer *peer; + unsigned int thash = tipc_hashfn(addr); + + hlist_for_each_entry(peer, &mon->peers[thash], hash) { + if (peer->addr == addr) + return peer; + } + return NULL; +} + +static struct tipc_peer *get_self(struct net *net, int bearer_id) +{ + struct tipc_monitor *mon = tipc_monitor(net, bearer_id); + + return mon->self; +} + +static inline bool tipc_mon_is_active(struct net *net, struct tipc_monitor *mon) +{ + struct tipc_net *tn = tipc_net(net); + + return mon->peer_cnt > tn->mon_threshold; +} + +/* mon_identify_lost_members() : - identify amd mark potentially lost members + */ +static void mon_identify_lost_members(struct tipc_peer *peer, + struct tipc_mon_domain *dom_bef, + int applied_bef) +{ + struct tipc_peer *member = peer; + struct tipc_mon_domain *dom_aft = peer->domain; + int applied_aft = peer->applied; + int i; + + for (i = 0; i < applied_bef; i++) { + member = peer_nxt(member); + + /* Do nothing if self or peer already see member as down */ + if (!member->is_up || !map_get(dom_bef->up_map, i)) + continue; + + /* Loss of local node must be detected by active probing */ + if (member->is_local) + continue; + + /* Start probing if member was removed from applied domain */ + if (!applied_aft || (applied_aft < i)) { + member->down_cnt = 1; + continue; + } + + /* Member loss is confirmed if it is still in applied domain */ + if (!map_get(dom_aft->up_map, i)) + member->down_cnt++; + } +} + +/* mon_apply_domain() : match a peer's domain record against monitor list + */ +static void mon_apply_domain(struct tipc_monitor *mon, + struct tipc_peer *peer) +{ + struct tipc_mon_domain *dom = peer->domain; + struct tipc_peer *member; + u32 addr; + int i; + + if (!dom || !peer->is_up) + return; + + /* Scan across domain members and match against monitor list */ + peer->applied = 0; + member = peer_nxt(peer); + for (i = 0; i < dom->member_cnt; i++) { + addr = dom->members[i]; + if (addr != member->addr) + return; + peer->applied++; + member = peer_nxt(member); + } +} + +/* mon_update_local_domain() : update after peer addition/removal/up/down + */ +static void mon_update_local_domain(struct tipc_monitor *mon) +{ + struct tipc_peer *self = mon->self; + struct tipc_mon_domain *cache = &mon->cache; + struct tipc_mon_domain *dom = self->domain; + struct tipc_peer *peer = self; + u64 prev_up_map = dom->up_map; + u16 member_cnt, i; + bool diff; + + /* Update local domain size based on current size of cluster */ + member_cnt = dom_size(mon->peer_cnt) - 1; + self->applied = member_cnt; + + /* Update native and cached outgoing local domain records */ + dom->len = dom_rec_len(dom, member_cnt); + diff = dom->member_cnt != member_cnt; + dom->member_cnt = member_cnt; + for (i = 0; i < member_cnt; i++) { + peer = peer_nxt(peer); + diff |= dom->members[i] != peer->addr; + dom->members[i] = peer->addr; + map_set(&dom->up_map, i, peer->is_up); + cache->members[i] = htonl(peer->addr); + } + diff |= dom->up_map != prev_up_map; + if (!diff) + return; + dom->gen = ++mon->dom_gen; + cache->len = htons(dom->len); + cache->gen = htons(dom->gen); + cache->member_cnt = htons(member_cnt); + cache->up_map = cpu_to_be64(dom->up_map); + mon_apply_domain(mon, self); +} + +/* mon_update_neighbors() : update preceding neighbors of added/removed peer + */ +static void mon_update_neighbors(struct tipc_monitor *mon, + struct tipc_peer *peer) +{ + int dz, i; + + dz = dom_size(mon->peer_cnt); + for (i = 0; i < dz; i++) { + mon_apply_domain(mon, peer); + peer = peer_prev(peer); + } +} + +/* mon_assign_roles() : reassign peer roles after a network change + * The monitor list is consistent at this stage; i.e., each peer is monitoring + * a set of domain members as matched between domain record and the monitor list + */ +static void mon_assign_roles(struct tipc_monitor *mon, struct tipc_peer *head) +{ + struct tipc_peer *peer = peer_nxt(head); + struct tipc_peer *self = mon->self; + int i = 0; + + for (; peer != self; peer = peer_nxt(peer)) { + peer->is_local = false; + + /* Update domain member */ + if (i++ < head->applied) { + peer->is_head = false; + if (head == self) + peer->is_local = true; + continue; + } + /* Assign next domain head */ + if (!peer->is_up) + continue; + if (peer->is_head) + break; + head = peer; + head->is_head = true; + i = 0; + } + mon->list_gen++; +} + +void tipc_mon_remove_peer(struct net *net, u32 addr, int bearer_id) +{ + struct tipc_monitor *mon = tipc_monitor(net, bearer_id); + struct tipc_peer *self = get_self(net, bearer_id); + struct tipc_peer *peer, *prev, *head; + + write_lock_bh(&mon->lock); + peer = get_peer(mon, addr); + if (!peer) + goto exit; + prev = peer_prev(peer); + list_del(&peer->list); + hlist_del(&peer->hash); + kfree(peer->domain); + kfree(peer); + mon->peer_cnt--; + head = peer_head(prev); + if (head == self) + mon_update_local_domain(mon); + mon_update_neighbors(mon, prev); + + /* Revert to full-mesh monitoring if we reach threshold */ + if (!tipc_mon_is_active(net, mon)) { + list_for_each_entry(peer, &self->list, list) { + kfree(peer->domain); + peer->domain = NULL; + peer->applied = 0; + } + } + mon_assign_roles(mon, head); +exit: + write_unlock_bh(&mon->lock); +} + +static bool tipc_mon_add_peer(struct tipc_monitor *mon, u32 addr, + struct tipc_peer **peer) +{ + struct tipc_peer *self = mon->self; + struct tipc_peer *cur, *prev, *p; + + p = kzalloc(sizeof(*p), GFP_ATOMIC); + *peer = p; + if (!p) + return false; + p->addr = addr; + + /* Add new peer to lookup list */ + INIT_LIST_HEAD(&p->list); + hlist_add_head(&p->hash, &mon->peers[tipc_hashfn(addr)]); + + /* Sort new peer into iterator list, in ascending circular order */ + prev = self; + list_for_each_entry(cur, &self->list, list) { + if ((addr > prev->addr) && (addr < cur->addr)) + break; + if (((addr < cur->addr) || (addr > prev->addr)) && + (prev->addr > cur->addr)) + break; + prev = cur; + } + list_add_tail(&p->list, &cur->list); + mon->peer_cnt++; + mon_update_neighbors(mon, p); + return true; +} + +void tipc_mon_peer_up(struct net *net, u32 addr, int bearer_id) +{ + struct tipc_monitor *mon = tipc_monitor(net, bearer_id); + struct tipc_peer *self = get_self(net, bearer_id); + struct tipc_peer *peer, *head; + + write_lock_bh(&mon->lock); + peer = get_peer(mon, addr); + if (!peer && !tipc_mon_add_peer(mon, addr, &peer)) + goto exit; + peer->is_up = true; + head = peer_head(peer); + if (head == self) + mon_update_local_domain(mon); + mon_assign_roles(mon, head); +exit: + write_unlock_bh(&mon->lock); +} + +void tipc_mon_peer_down(struct net *net, u32 addr, int bearer_id) +{ + struct tipc_monitor *mon = tipc_monitor(net, bearer_id); + struct tipc_peer *self = get_self(net, bearer_id); + struct tipc_peer *peer, *head; + struct tipc_mon_domain *dom; + int applied; + + write_lock_bh(&mon->lock); + peer = get_peer(mon, addr); + if (!peer) { + pr_warn("Mon: unknown link %x/%u DOWN\n", addr, bearer_id); + goto exit; + } + applied = peer->applied; + peer->applied = 0; + dom = peer->domain; + peer->domain = NULL; + if (peer->is_head) + mon_identify_lost_members(peer, dom, applied); + kfree(dom); + peer->is_up = false; + peer->is_head = false; + peer->is_local = false; + peer->down_cnt = 0; + head = peer_head(peer); + if (head == self) + mon_update_local_domain(mon); + mon_assign_roles(mon, head); +exit: + write_unlock_bh(&mon->lock); +} + +/* tipc_mon_rcv - process monitor domain event message + */ +void tipc_mon_rcv(struct net *net, void *data, u16 dlen, u32 addr, + struct tipc_mon_state *state, int bearer_id) +{ + struct tipc_monitor *mon = tipc_monitor(net, bearer_id); + struct tipc_mon_domain *arrv_dom = data; + struct tipc_mon_domain dom_bef; + struct tipc_mon_domain *dom; + struct tipc_peer *peer; + u16 new_member_cnt = ntohs(arrv_dom->member_cnt); + int new_dlen = dom_rec_len(arrv_dom, new_member_cnt); + u16 new_gen = ntohs(arrv_dom->gen); + u16 acked_gen = ntohs(arrv_dom->ack_gen); + bool probing = state->probing; + int i, applied_bef; + + state->probing = false; + if (!dlen) + return; + + /* Sanity check received domain record */ + if ((dlen < new_dlen) || ntohs(arrv_dom->len) != new_dlen) { + pr_warn_ratelimited("Received illegal domain record\n"); + return; + } + + /* Synch generation numbers with peer if link just came up */ + if (!state->synched) { + state->peer_gen = new_gen - 1; + state->acked_gen = acked_gen; + state->synched = true; + } + + if (more(acked_gen, state->acked_gen)) + state->acked_gen = acked_gen; + + /* Drop duplicate unless we are waiting for a probe response */ + if (!more(new_gen, state->peer_gen) && !probing) + return; + + write_lock_bh(&mon->lock); + peer = get_peer(mon, addr); + if (!peer || !peer->is_up) + goto exit; + + /* Peer is confirmed, stop any ongoing probing */ + peer->down_cnt = 0; + + /* Task is done for duplicate record */ + if (!more(new_gen, state->peer_gen)) + goto exit; + + state->peer_gen = new_gen; + + /* Cache current domain record for later use */ + dom_bef.member_cnt = 0; + dom = peer->domain; + if (dom) + memcpy(&dom_bef, dom, dom->len); + + /* Transform and store received domain record */ + if (!dom || (dom->len < new_dlen)) { + kfree(dom); + dom = kmalloc(new_dlen, GFP_ATOMIC); + peer->domain = dom; + if (!dom) + goto exit; + } + dom->len = new_dlen; + dom->gen = new_gen; + dom->member_cnt = new_member_cnt; + dom->up_map = be64_to_cpu(arrv_dom->up_map); + for (i = 0; i < new_member_cnt; i++) + dom->members[i] = ntohl(arrv_dom->members[i]); + + /* Update peers affected by this domain record */ + applied_bef = peer->applied; + mon_apply_domain(mon, peer); + mon_identify_lost_members(peer, &dom_bef, applied_bef); + mon_assign_roles(mon, peer_head(peer)); +exit: + write_unlock_bh(&mon->lock); +} + +void tipc_mon_prep(struct net *net, void *data, int *dlen, + struct tipc_mon_state *state, int bearer_id) +{ + struct tipc_monitor *mon = tipc_monitor(net, bearer_id); + struct tipc_mon_domain *dom = data; + u16 gen = mon->dom_gen; + u16 len; + + if (!tipc_mon_is_active(net, mon)) + return; + + /* Send only a dummy record with ack if peer has acked our last sent */ + if (likely(state->acked_gen == gen)) { + len = dom_rec_len(dom, 0); + *dlen = len; + dom->len = htons(len); + dom->gen = htons(gen); + dom->ack_gen = htons(state->peer_gen); + dom->member_cnt = 0; + return; + } + /* Send the full record */ + read_lock_bh(&mon->lock); + len = ntohs(mon->cache.len); + *dlen = len; + memcpy(data, &mon->cache, len); + read_unlock_bh(&mon->lock); + dom->ack_gen = htons(state->peer_gen); +} + +void tipc_mon_get_state(struct net *net, u32 addr, + struct tipc_mon_state *state, + int bearer_id) +{ + struct tipc_monitor *mon = tipc_monitor(net, bearer_id); + struct tipc_peer *peer; + + /* Used cached state if table has not changed */ + if (!state->probing && + (state->list_gen == mon->list_gen) && + (state->acked_gen == mon->dom_gen)) + return; + + read_lock_bh(&mon->lock); + peer = get_peer(mon, addr); + if (peer) { + state->probing = state->acked_gen != mon->dom_gen; + state->probing |= peer->down_cnt; + state->reset |= peer->down_cnt >= MAX_PEER_DOWN_EVENTS; + state->monitoring = peer->is_local; + state->monitoring |= peer->is_head; + state->list_gen = mon->list_gen; + } + read_unlock_bh(&mon->lock); +} + +static void mon_timeout(unsigned long m) +{ + struct tipc_monitor *mon = (void *)m; + struct tipc_peer *self; + int best_member_cnt = dom_size(mon->peer_cnt) - 1; + + write_lock_bh(&mon->lock); + self = mon->self; + if (self && (best_member_cnt != self->applied)) { + mon_update_local_domain(mon); + mon_assign_roles(mon, self); + } + write_unlock_bh(&mon->lock); + mod_timer(&mon->timer, jiffies + mon->timer_intv); +} + +int tipc_mon_create(struct net *net, int bearer_id) +{ + struct tipc_net *tn = tipc_net(net); + struct tipc_monitor *mon; + struct tipc_peer *self; + struct tipc_mon_domain *dom; + + if (tn->monitors[bearer_id]) + return 0; + + mon = kzalloc(sizeof(*mon), GFP_ATOMIC); + self = kzalloc(sizeof(*self), GFP_ATOMIC); + dom = kzalloc(sizeof(*dom), GFP_ATOMIC); + if (!mon || !self || !dom) { + kfree(mon); + kfree(self); + kfree(dom); + return -ENOMEM; + } + tn->monitors[bearer_id] = mon; + rwlock_init(&mon->lock); + mon->net = net; + mon->peer_cnt = 1; + mon->self = self; + self->domain = dom; + self->addr = tipc_own_addr(net); + self->is_up = true; + self->is_head = true; + INIT_LIST_HEAD(&self->list); + setup_timer(&mon->timer, mon_timeout, (unsigned long)mon); + mon->timer_intv = msecs_to_jiffies(MON_TIMEOUT + (tn->random & 0xffff)); + mod_timer(&mon->timer, jiffies + mon->timer_intv); + return 0; +} + +void tipc_mon_delete(struct net *net, int bearer_id) +{ + struct tipc_net *tn = tipc_net(net); + struct tipc_monitor *mon = tipc_monitor(net, bearer_id); + struct tipc_peer *self = get_self(net, bearer_id); + struct tipc_peer *peer, *tmp; + + write_lock_bh(&mon->lock); + tn->monitors[bearer_id] = NULL; + list_for_each_entry_safe(peer, tmp, &self->list, list) { + list_del(&peer->list); + hlist_del(&peer->hash); + kfree(peer->domain); + kfree(peer); + } + mon->self = NULL; + write_unlock_bh(&mon->lock); + del_timer_sync(&mon->timer); + kfree(self->domain); + kfree(self); + kfree(mon); +} + +int tipc_nl_monitor_set_threshold(struct net *net, u32 cluster_size) +{ + struct tipc_net *tn = tipc_net(net); + + if (cluster_size > TIPC_CLUSTER_SIZE) + return -EINVAL; + + tn->mon_threshold = cluster_size; + + return 0; +} + +int tipc_nl_monitor_get_threshold(struct net *net) +{ + struct tipc_net *tn = tipc_net(net); + + return tn->mon_threshold; +} + +int __tipc_nl_add_monitor_peer(struct tipc_peer *peer, struct tipc_nl_msg *msg) +{ + struct tipc_mon_domain *dom = peer->domain; + struct nlattr *attrs; + void *hdr; + + hdr = genlmsg_put(msg->skb, msg->portid, msg->seq, &tipc_genl_family, + NLM_F_MULTI, TIPC_NL_MON_PEER_GET); + if (!hdr) + return -EMSGSIZE; + + attrs = nla_nest_start(msg->skb, TIPC_NLA_MON_PEER); + if (!attrs) + goto msg_full; + + if (nla_put_u32(msg->skb, TIPC_NLA_MON_PEER_ADDR, peer->addr)) + goto attr_msg_full; + if (nla_put_u32(msg->skb, TIPC_NLA_MON_PEER_APPLIED, peer->applied)) + goto attr_msg_full; + + if (peer->is_up) + if (nla_put_flag(msg->skb, TIPC_NLA_MON_PEER_UP)) + goto attr_msg_full; + if (peer->is_local) + if (nla_put_flag(msg->skb, TIPC_NLA_MON_PEER_LOCAL)) + goto attr_msg_full; + if (peer->is_head) + if (nla_put_flag(msg->skb, TIPC_NLA_MON_PEER_HEAD)) + goto attr_msg_full; + + if (dom) { + if (nla_put_u32(msg->skb, TIPC_NLA_MON_PEER_DOMGEN, dom->gen)) + goto attr_msg_full; + if (nla_put_u64_64bit(msg->skb, TIPC_NLA_MON_PEER_UPMAP, + dom->up_map, TIPC_NLA_MON_PEER_PAD)) + goto attr_msg_full; + if (nla_put(msg->skb, TIPC_NLA_MON_PEER_MEMBERS, + dom->member_cnt * sizeof(u32), &dom->members)) + goto attr_msg_full; + } + + nla_nest_end(msg->skb, attrs); + genlmsg_end(msg->skb, hdr); + return 0; + +attr_msg_full: + nla_nest_cancel(msg->skb, attrs); +msg_full: + genlmsg_cancel(msg->skb, hdr); + + return -EMSGSIZE; +} + +int tipc_nl_add_monitor_peer(struct net *net, struct tipc_nl_msg *msg, + u32 bearer_id, u32 *prev_node) +{ + struct tipc_monitor *mon = tipc_monitor(net, bearer_id); + struct tipc_peer *peer = mon->self; + + if (!mon) + return -EINVAL; + + read_lock_bh(&mon->lock); + do { + if (*prev_node) { + if (peer->addr == *prev_node) + *prev_node = 0; + else + continue; + } + if (__tipc_nl_add_monitor_peer(peer, msg)) { + *prev_node = peer->addr; + read_unlock_bh(&mon->lock); + return -EMSGSIZE; + } + } while ((peer = peer_nxt(peer)) != mon->self); + read_unlock_bh(&mon->lock); + + return 0; +} + +int __tipc_nl_add_monitor(struct net *net, struct tipc_nl_msg *msg, + u32 bearer_id) +{ + struct tipc_monitor *mon = tipc_monitor(net, bearer_id); + char bearer_name[TIPC_MAX_BEARER_NAME]; + struct nlattr *attrs; + void *hdr; + int ret; + + ret = tipc_bearer_get_name(net, bearer_name, bearer_id); + if (ret || !mon) + return -EINVAL; + + hdr = genlmsg_put(msg->skb, msg->portid, msg->seq, &tipc_genl_family, + NLM_F_MULTI, TIPC_NL_MON_GET); + if (!hdr) + return -EMSGSIZE; + + attrs = nla_nest_start(msg->skb, TIPC_NLA_MON); + if (!attrs) + goto msg_full; + + read_lock_bh(&mon->lock); + if (nla_put_u32(msg->skb, TIPC_NLA_MON_REF, bearer_id)) + goto attr_msg_full; + if (tipc_mon_is_active(net, mon)) + if (nla_put_flag(msg->skb, TIPC_NLA_MON_ACTIVE)) + goto attr_msg_full; + if (nla_put_string(msg->skb, TIPC_NLA_MON_BEARER_NAME, bearer_name)) + goto attr_msg_full; + if (nla_put_u32(msg->skb, TIPC_NLA_MON_PEERCNT, mon->peer_cnt)) + goto attr_msg_full; + if (nla_put_u32(msg->skb, TIPC_NLA_MON_LISTGEN, mon->list_gen)) + goto attr_msg_full; + + read_unlock_bh(&mon->lock); + nla_nest_end(msg->skb, attrs); + genlmsg_end(msg->skb, hdr); + + return 0; + +attr_msg_full: + read_unlock_bh(&mon->lock); + nla_nest_cancel(msg->skb, attrs); +msg_full: + genlmsg_cancel(msg->skb, hdr); + + return -EMSGSIZE; +} diff --git a/net/tipc/monitor.h b/net/tipc/monitor.h new file mode 100644 index 000000000000..2a21b93e0d04 --- /dev/null +++ b/net/tipc/monitor.h @@ -0,0 +1,82 @@ +/* + * net/tipc/monitor.h + * + * Copyright (c) 2015, Ericsson AB + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the names of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * Alternatively, this software may be distributed under the terms of the + * GNU General Public License ("GPL") version 2 as published by the Free + * Software Foundation. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _TIPC_MONITOR_H +#define _TIPC_MONITOR_H + +#include "netlink.h" + +/* struct tipc_mon_state: link instance's cache of monitor list and domain state + * @list_gen: current generation of this node's monitor list + * @gen: current generation of this node's local domain + * @peer_gen: most recent domain generation received from peer + * @acked_gen: most recent generation of self's domain acked by peer + * @monitoring: this peer endpoint should continuously monitored + * @probing: peer endpoint should be temporarily probed for potential loss + * @synched: domain record's generation has been synched with peer after reset + */ +struct tipc_mon_state { + u16 list_gen; + u16 peer_gen; + u16 acked_gen; + bool monitoring :1; + bool probing :1; + bool reset :1; + bool synched :1; +}; + +int tipc_mon_create(struct net *net, int bearer_id); +void tipc_mon_delete(struct net *net, int bearer_id); + +void tipc_mon_peer_up(struct net *net, u32 addr, int bearer_id); +void tipc_mon_peer_down(struct net *net, u32 addr, int bearer_id); +void tipc_mon_prep(struct net *net, void *data, int *dlen, + struct tipc_mon_state *state, int bearer_id); +void tipc_mon_rcv(struct net *net, void *data, u16 dlen, u32 addr, + struct tipc_mon_state *state, int bearer_id); +void tipc_mon_get_state(struct net *net, u32 addr, + struct tipc_mon_state *state, + int bearer_id); +void tipc_mon_remove_peer(struct net *net, u32 addr, int bearer_id); + +int tipc_nl_monitor_set_threshold(struct net *net, u32 cluster_size); +int tipc_nl_monitor_get_threshold(struct net *net); +int __tipc_nl_add_monitor(struct net *net, struct tipc_nl_msg *msg, + u32 bearer_id); +int tipc_nl_add_monitor_peer(struct net *net, struct tipc_nl_msg *msg, + u32 bearer_id, u32 *prev_node); + +extern const int tipc_max_domain_size; +#endif diff --git a/net/tipc/msg.c b/net/tipc/msg.c index 8740930f0787..17201aa8423d 100644 --- a/net/tipc/msg.c +++ b/net/tipc/msg.c @@ -41,6 +41,8 @@ #include "name_table.h" #define MAX_FORWARD_SIZE 1024 +#define BUF_HEADROOM (LL_MAX_HEADER + 48) +#define BUF_TAILROOM 16 static unsigned int align(unsigned int i) { @@ -505,6 +507,10 @@ bool tipc_msg_reverse(u32 own_node, struct sk_buff **skb, int err) msg_set_hdr_sz(hdr, BASIC_H_SIZE); } + if (skb_cloned(_skb) && + pskb_expand_head(_skb, BUF_HEADROOM, BUF_TAILROOM, GFP_KERNEL)) + goto exit; + /* Now reverse the concerned fields */ msg_set_errcode(hdr, err); msg_set_origport(hdr, msg_destport(&ohdr)); diff --git a/net/tipc/msg.h b/net/tipc/msg.h index 55778a0aebf3..7cf52fb39bee 100644 --- a/net/tipc/msg.h +++ b/net/tipc/msg.h @@ -94,17 +94,6 @@ struct plist; #define TIPC_MEDIA_INFO_OFFSET 5 -/** - * TIPC message buffer code - * - * TIPC message buffer headroom reserves space for the worst-case - * link-level device header (in case the message is sent off-node). - * - * Note: Headroom should be a multiple of 4 to ensure the TIPC header fields - * are word aligned for quicker access - */ -#define BUF_HEADROOM (LL_MAX_HEADER + 48) - struct tipc_skb_cb { void *handle; struct sk_buff *tail; @@ -715,6 +704,16 @@ static inline void msg_set_redundant_link(struct tipc_msg *m, u32 r) msg_set_bits(m, 5, 12, 0x1, r); } +static inline u32 msg_peer_stopping(struct tipc_msg *m) +{ + return msg_bits(m, 5, 13, 0x1); +} + +static inline void msg_set_peer_stopping(struct tipc_msg *m, u32 s) +{ + msg_set_bits(m, 5, 13, 0x1, s); +} + static inline char *msg_media_addr(struct tipc_msg *m) { return (char *)&m->hdr[TIPC_MEDIA_INFO_OFFSET]; @@ -733,16 +732,26 @@ static inline void msg_set_msgcnt(struct tipc_msg *m, u16 n) msg_set_bits(m, 9, 16, 0xffff, n); } -static inline u32 msg_bcast_tag(struct tipc_msg *m) +static inline u32 msg_conn_ack(struct tipc_msg *m) { return msg_bits(m, 9, 16, 0xffff); } -static inline void msg_set_bcast_tag(struct tipc_msg *m, u32 n) +static inline void msg_set_conn_ack(struct tipc_msg *m, u32 n) { msg_set_bits(m, 9, 16, 0xffff, n); } +static inline u32 msg_adv_win(struct tipc_msg *m) +{ + return msg_bits(m, 9, 0, 0xffff); +} + +static inline void msg_set_adv_win(struct tipc_msg *m, u32 n) +{ + msg_set_bits(m, 9, 0, 0xffff, n); +} + static inline u32 msg_max_pkt(struct tipc_msg *m) { return msg_bits(m, 9, 16, 0xffff) * 4; @@ -779,6 +788,11 @@ static inline bool msg_peer_node_is_up(struct tipc_msg *m) return msg_redundant_link(m); } +static inline bool msg_is_reset(struct tipc_msg *hdr) +{ + return (msg_user(hdr) == LINK_PROTOCOL) && (msg_type(hdr) == RESET_MSG); +} + struct sk_buff *tipc_buf_acquire(u32 size); bool tipc_msg_validate(struct sk_buff *skb); bool tipc_msg_reverse(u32 own_addr, struct sk_buff **skb, int err); diff --git a/net/tipc/name_distr.c b/net/tipc/name_distr.c index ebe9d0ff6e9e..6b626a64b517 100644 --- a/net/tipc/name_distr.c +++ b/net/tipc/name_distr.c @@ -40,11 +40,6 @@ int sysctl_tipc_named_timeout __read_mostly = 2000; -/** - * struct tipc_dist_queue - queue holding deferred name table updates - */ -static struct list_head tipc_dist_queue = LIST_HEAD_INIT(tipc_dist_queue); - struct distr_queue_item { struct distr_item i; u32 dtype; @@ -229,12 +224,31 @@ static void tipc_publ_purge(struct net *net, struct publication *publ, u32 addr) kfree_rcu(p, rcu); } +/** + * tipc_dist_queue_purge - remove deferred updates from a node that went down + */ +static void tipc_dist_queue_purge(struct net *net, u32 addr) +{ + struct tipc_net *tn = net_generic(net, tipc_net_id); + struct distr_queue_item *e, *tmp; + + spin_lock_bh(&tn->nametbl_lock); + list_for_each_entry_safe(e, tmp, &tn->dist_queue, next) { + if (e->node != addr) + continue; + list_del(&e->next); + kfree(e); + } + spin_unlock_bh(&tn->nametbl_lock); +} + void tipc_publ_notify(struct net *net, struct list_head *nsub_list, u32 addr) { struct publication *publ, *tmp; list_for_each_entry_safe(publ, tmp, nsub_list, nodesub_list) tipc_publ_purge(net, publ, addr); + tipc_dist_queue_purge(net, addr); } /** @@ -279,9 +293,11 @@ static bool tipc_update_nametbl(struct net *net, struct distr_item *i, * tipc_named_add_backlog - add a failed name table update to the backlog * */ -static void tipc_named_add_backlog(struct distr_item *i, u32 type, u32 node) +static void tipc_named_add_backlog(struct net *net, struct distr_item *i, + u32 type, u32 node) { struct distr_queue_item *e; + struct tipc_net *tn = net_generic(net, tipc_net_id); unsigned long now = get_jiffies_64(); e = kzalloc(sizeof(*e), GFP_ATOMIC); @@ -291,7 +307,7 @@ static void tipc_named_add_backlog(struct distr_item *i, u32 type, u32 node) e->node = node; e->expires = now + msecs_to_jiffies(sysctl_tipc_named_timeout); memcpy(e, i, sizeof(*i)); - list_add_tail(&e->next, &tipc_dist_queue); + list_add_tail(&e->next, &tn->dist_queue); } /** @@ -301,10 +317,11 @@ static void tipc_named_add_backlog(struct distr_item *i, u32 type, u32 node) void tipc_named_process_backlog(struct net *net) { struct distr_queue_item *e, *tmp; + struct tipc_net *tn = net_generic(net, tipc_net_id); char addr[16]; unsigned long now = get_jiffies_64(); - list_for_each_entry_safe(e, tmp, &tipc_dist_queue, next) { + list_for_each_entry_safe(e, tmp, &tn->dist_queue, next) { if (time_after(e->expires, now)) { if (!tipc_update_nametbl(net, &e->i, e->node, e->dtype)) continue; @@ -344,7 +361,7 @@ void tipc_named_rcv(struct net *net, struct sk_buff_head *inputq) node = msg_orignode(msg); while (count--) { if (!tipc_update_nametbl(net, item, node, mtype)) - tipc_named_add_backlog(item, mtype, node); + tipc_named_add_backlog(net, item, mtype, node); item++; } kfree_skb(skb); diff --git a/net/tipc/netlink.c b/net/tipc/netlink.c index 56935df2167a..a84daec0afe9 100644 --- a/net/tipc/netlink.c +++ b/net/tipc/netlink.c @@ -52,7 +52,8 @@ static const struct nla_policy tipc_nl_policy[TIPC_NLA_MAX + 1] = { [TIPC_NLA_MEDIA] = { .type = NLA_NESTED, }, [TIPC_NLA_NODE] = { .type = NLA_NESTED, }, [TIPC_NLA_NET] = { .type = NLA_NESTED, }, - [TIPC_NLA_NAME_TABLE] = { .type = NLA_NESTED, } + [TIPC_NLA_NAME_TABLE] = { .type = NLA_NESTED, }, + [TIPC_NLA_MON] = { .type = NLA_NESTED, }, }; const struct nla_policy @@ -61,6 +62,12 @@ tipc_nl_name_table_policy[TIPC_NLA_NAME_TABLE_MAX + 1] = { [TIPC_NLA_NAME_TABLE_PUBL] = { .type = NLA_NESTED } }; +const struct nla_policy tipc_nl_monitor_policy[TIPC_NLA_MON_MAX + 1] = { + [TIPC_NLA_MON_UNSPEC] = { .type = NLA_UNSPEC }, + [TIPC_NLA_MON_REF] = { .type = NLA_U32 }, + [TIPC_NLA_MON_ACTIVATION_THRESHOLD] = { .type = NLA_U32 }, +}; + const struct nla_policy tipc_nl_sock_policy[TIPC_NLA_SOCK_MAX + 1] = { [TIPC_NLA_SOCK_UNSPEC] = { .type = NLA_UNSPEC }, [TIPC_NLA_SOCK_ADDR] = { .type = NLA_U32 }, @@ -214,7 +221,23 @@ static const struct genl_ops tipc_genl_v2_ops[] = { .cmd = TIPC_NL_NAME_TABLE_GET, .dumpit = tipc_nl_name_table_dump, .policy = tipc_nl_policy, - } + }, + { + .cmd = TIPC_NL_MON_SET, + .doit = tipc_nl_node_set_monitor, + .policy = tipc_nl_policy, + }, + { + .cmd = TIPC_NL_MON_GET, + .doit = tipc_nl_node_get_monitor, + .dumpit = tipc_nl_node_dump_monitor, + .policy = tipc_nl_policy, + }, + { + .cmd = TIPC_NL_MON_PEER_GET, + .dumpit = tipc_nl_node_dump_monitor_peer, + .policy = tipc_nl_policy, + }, }; int tipc_nlmsg_parse(const struct nlmsghdr *nlh, struct nlattr ***attr) diff --git a/net/tipc/netlink.h b/net/tipc/netlink.h index ed1dbcb4afbd..4ba0ad422110 100644 --- a/net/tipc/netlink.h +++ b/net/tipc/netlink.h @@ -55,6 +55,7 @@ extern const struct nla_policy tipc_nl_prop_policy[]; extern const struct nla_policy tipc_nl_bearer_policy[]; extern const struct nla_policy tipc_nl_media_policy[]; extern const struct nla_policy tipc_nl_udp_policy[]; +extern const struct nla_policy tipc_nl_monitor_policy[]; int tipc_netlink_start(void); int tipc_netlink_compat_start(void); diff --git a/net/tipc/netlink_compat.c b/net/tipc/netlink_compat.c index d7d050f44fc1..1fd464764765 100644 --- a/net/tipc/netlink_compat.c +++ b/net/tipc/netlink_compat.c @@ -346,9 +346,15 @@ static int tipc_nl_compat_bearer_dump(struct tipc_nl_compat_msg *msg, struct nlattr **attrs) { struct nlattr *bearer[TIPC_NLA_BEARER_MAX + 1]; + int err; + + if (!attrs[TIPC_NLA_BEARER]) + return -EINVAL; - nla_parse_nested(bearer, TIPC_NLA_BEARER_MAX, attrs[TIPC_NLA_BEARER], - NULL); + err = nla_parse_nested(bearer, TIPC_NLA_BEARER_MAX, + attrs[TIPC_NLA_BEARER], NULL); + if (err) + return err; return tipc_add_tlv(msg->rep, TIPC_TLV_BEARER_NAME, nla_data(bearer[TIPC_NLA_BEARER_NAME]), @@ -460,14 +466,31 @@ static int tipc_nl_compat_link_stat_dump(struct tipc_nl_compat_msg *msg, struct nlattr *link[TIPC_NLA_LINK_MAX + 1]; struct nlattr *prop[TIPC_NLA_PROP_MAX + 1]; struct nlattr *stats[TIPC_NLA_STATS_MAX + 1]; + int err; - nla_parse_nested(link, TIPC_NLA_LINK_MAX, attrs[TIPC_NLA_LINK], NULL); + if (!attrs[TIPC_NLA_LINK]) + return -EINVAL; - nla_parse_nested(prop, TIPC_NLA_PROP_MAX, link[TIPC_NLA_LINK_PROP], - NULL); + err = nla_parse_nested(link, TIPC_NLA_LINK_MAX, attrs[TIPC_NLA_LINK], + NULL); + if (err) + return err; + + if (!link[TIPC_NLA_LINK_PROP]) + return -EINVAL; - nla_parse_nested(stats, TIPC_NLA_STATS_MAX, link[TIPC_NLA_LINK_STATS], - NULL); + err = nla_parse_nested(prop, TIPC_NLA_PROP_MAX, + link[TIPC_NLA_LINK_PROP], NULL); + if (err) + return err; + + if (!link[TIPC_NLA_LINK_STATS]) + return -EINVAL; + + err = nla_parse_nested(stats, TIPC_NLA_STATS_MAX, + link[TIPC_NLA_LINK_STATS], NULL); + if (err) + return err; name = (char *)TLV_DATA(msg->req); if (strcmp(name, nla_data(link[TIPC_NLA_LINK_NAME])) != 0) @@ -569,12 +592,20 @@ static int tipc_nl_compat_link_dump(struct tipc_nl_compat_msg *msg, { struct nlattr *link[TIPC_NLA_LINK_MAX + 1]; struct tipc_link_info link_info; + int err; - nla_parse_nested(link, TIPC_NLA_LINK_MAX, attrs[TIPC_NLA_LINK], NULL); + if (!attrs[TIPC_NLA_LINK]) + return -EINVAL; + + err = nla_parse_nested(link, TIPC_NLA_LINK_MAX, attrs[TIPC_NLA_LINK], + NULL); + if (err) + return err; link_info.dest = nla_get_flag(link[TIPC_NLA_LINK_DEST]); link_info.up = htonl(nla_get_flag(link[TIPC_NLA_LINK_UP])); - strcpy(link_info.str, nla_data(link[TIPC_NLA_LINK_NAME])); + nla_strlcpy(link_info.str, link[TIPC_NLA_LINK_NAME], + TIPC_MAX_LINK_NAME); return tipc_add_tlv(msg->rep, TIPC_TLV_LINK_INFO, &link_info, sizeof(link_info)); @@ -758,12 +789,23 @@ static int tipc_nl_compat_name_table_dump(struct tipc_nl_compat_msg *msg, u32 node, depth, type, lowbound, upbound; static const char * const scope_str[] = {"", " zone", " cluster", " node"}; + int err; - nla_parse_nested(nt, TIPC_NLA_NAME_TABLE_MAX, - attrs[TIPC_NLA_NAME_TABLE], NULL); + if (!attrs[TIPC_NLA_NAME_TABLE]) + return -EINVAL; - nla_parse_nested(publ, TIPC_NLA_PUBL_MAX, nt[TIPC_NLA_NAME_TABLE_PUBL], - NULL); + err = nla_parse_nested(nt, TIPC_NLA_NAME_TABLE_MAX, + attrs[TIPC_NLA_NAME_TABLE], NULL); + if (err) + return err; + + if (!nt[TIPC_NLA_NAME_TABLE_PUBL]) + return -EINVAL; + + err = nla_parse_nested(publ, TIPC_NLA_PUBL_MAX, + nt[TIPC_NLA_NAME_TABLE_PUBL], NULL); + if (err) + return err; ntq = (struct tipc_name_table_query *)TLV_DATA(msg->req); @@ -802,7 +844,7 @@ static int tipc_nl_compat_name_table_dump(struct tipc_nl_compat_msg *msg, goto out; tipc_tlv_sprintf(msg->rep, "%-10u %s", - nla_get_u32(publ[TIPC_NLA_PUBL_REF]), + nla_get_u32(publ[TIPC_NLA_PUBL_KEY]), scope_str[nla_get_u32(publ[TIPC_NLA_PUBL_SCOPE])]); out: tipc_tlv_sprintf(msg->rep, "\n"); @@ -815,8 +857,15 @@ static int __tipc_nl_compat_publ_dump(struct tipc_nl_compat_msg *msg, { u32 type, lower, upper; struct nlattr *publ[TIPC_NLA_PUBL_MAX + 1]; + int err; - nla_parse_nested(publ, TIPC_NLA_PUBL_MAX, attrs[TIPC_NLA_PUBL], NULL); + if (!attrs[TIPC_NLA_PUBL]) + return -EINVAL; + + err = nla_parse_nested(publ, TIPC_NLA_PUBL_MAX, attrs[TIPC_NLA_PUBL], + NULL); + if (err) + return err; type = nla_get_u32(publ[TIPC_NLA_PUBL_TYPE]); lower = nla_get_u32(publ[TIPC_NLA_PUBL_LOWER]); @@ -876,7 +925,13 @@ static int tipc_nl_compat_sk_dump(struct tipc_nl_compat_msg *msg, u32 sock_ref; struct nlattr *sock[TIPC_NLA_SOCK_MAX + 1]; - nla_parse_nested(sock, TIPC_NLA_SOCK_MAX, attrs[TIPC_NLA_SOCK], NULL); + if (!attrs[TIPC_NLA_SOCK]) + return -EINVAL; + + err = nla_parse_nested(sock, TIPC_NLA_SOCK_MAX, attrs[TIPC_NLA_SOCK], + NULL); + if (err) + return err; sock_ref = nla_get_u32(sock[TIPC_NLA_SOCK_REF]); tipc_tlv_sprintf(msg->rep, "%u:", sock_ref); @@ -917,9 +972,15 @@ static int tipc_nl_compat_media_dump(struct tipc_nl_compat_msg *msg, struct nlattr **attrs) { struct nlattr *media[TIPC_NLA_MEDIA_MAX + 1]; + int err; + + if (!attrs[TIPC_NLA_MEDIA]) + return -EINVAL; - nla_parse_nested(media, TIPC_NLA_MEDIA_MAX, attrs[TIPC_NLA_MEDIA], - NULL); + err = nla_parse_nested(media, TIPC_NLA_MEDIA_MAX, attrs[TIPC_NLA_MEDIA], + NULL); + if (err) + return err; return tipc_add_tlv(msg->rep, TIPC_TLV_MEDIA_NAME, nla_data(media[TIPC_NLA_MEDIA_NAME]), @@ -931,8 +992,15 @@ static int tipc_nl_compat_node_dump(struct tipc_nl_compat_msg *msg, { struct tipc_node_info node_info; struct nlattr *node[TIPC_NLA_NODE_MAX + 1]; + int err; - nla_parse_nested(node, TIPC_NLA_NODE_MAX, attrs[TIPC_NLA_NODE], NULL); + if (!attrs[TIPC_NLA_NODE]) + return -EINVAL; + + err = nla_parse_nested(node, TIPC_NLA_NODE_MAX, attrs[TIPC_NLA_NODE], + NULL); + if (err) + return err; node_info.addr = htonl(nla_get_u32(node[TIPC_NLA_NODE_ADDR])); node_info.up = htonl(nla_get_flag(node[TIPC_NLA_NODE_UP])); @@ -971,8 +1039,16 @@ static int tipc_nl_compat_net_dump(struct tipc_nl_compat_msg *msg, { __be32 id; struct nlattr *net[TIPC_NLA_NET_MAX + 1]; + int err; + + if (!attrs[TIPC_NLA_NET]) + return -EINVAL; + + err = nla_parse_nested(net, TIPC_NLA_NET_MAX, attrs[TIPC_NLA_NET], + NULL); + if (err) + return err; - nla_parse_nested(net, TIPC_NLA_NET_MAX, attrs[TIPC_NLA_NET], NULL); id = htonl(nla_get_u32(net[TIPC_NLA_NET_ID])); return tipc_add_tlv(msg->rep, TIPC_TLV_UNSIGNED, &id, sizeof(id)); diff --git a/net/tipc/node.c b/net/tipc/node.c index ace178fd3850..21974191e425 100644 --- a/net/tipc/node.c +++ b/net/tipc/node.c @@ -1,7 +1,7 @@ /* * net/tipc/node.c: TIPC node management routines * - * Copyright (c) 2000-2006, 2012-2015, Ericsson AB + * Copyright (c) 2000-2006, 2012-2016, Ericsson AB * Copyright (c) 2005-2006, 2010-2014, Wind River Systems * All rights reserved. * @@ -40,6 +40,7 @@ #include "name_distr.h" #include "socket.h" #include "bcast.h" +#include "monitor.h" #include "discover.h" #include "netlink.h" @@ -191,15 +192,18 @@ int tipc_node_get_mtu(struct net *net, u32 addr, u32 sel) tipc_node_put(n); return mtu; } -/* - * A trivial power-of-two bitmask technique is used for speed, since this - * operation is done for every incoming TIPC packet. The number of hash table - * entries has been chosen so that no hash chain exceeds 8 nodes and will - * usually be much smaller (typically only a single node). - */ -static unsigned int tipc_hashfn(u32 addr) + +u16 tipc_node_get_capabilities(struct net *net, u32 addr) { - return addr & (NODE_HTABLE_SIZE - 1); + struct tipc_node *n; + u16 caps; + + n = tipc_node_find(net, addr); + if (unlikely(!n)) + return TIPC_NODE_CAPABILITIES; + caps = n->capabilities; + tipc_node_put(n); + return caps; } static void tipc_node_kref_release(struct kref *kref) @@ -265,6 +269,7 @@ static void tipc_node_write_unlock(struct tipc_node *n) u32 addr = 0; u32 flags = n->action_flags; u32 link_id = 0; + u32 bearer_id; struct list_head *publ_list; if (likely(!flags)) { @@ -274,6 +279,7 @@ static void tipc_node_write_unlock(struct tipc_node *n) addr = n->addr; link_id = n->link_id; + bearer_id = link_id & 0xffff; publ_list = &n->publ_list; n->action_flags &= ~(TIPC_NOTIFY_NODE_DOWN | TIPC_NOTIFY_NODE_UP | @@ -287,13 +293,16 @@ static void tipc_node_write_unlock(struct tipc_node *n) if (flags & TIPC_NOTIFY_NODE_UP) tipc_named_node_up(net, addr); - if (flags & TIPC_NOTIFY_LINK_UP) + if (flags & TIPC_NOTIFY_LINK_UP) { + tipc_mon_peer_up(net, addr, bearer_id); tipc_nametbl_publish(net, TIPC_LINK_STATE, addr, addr, TIPC_NODE_SCOPE, link_id, addr); - - if (flags & TIPC_NOTIFY_LINK_DOWN) + } + if (flags & TIPC_NOTIFY_LINK_DOWN) { + tipc_mon_peer_down(net, addr, bearer_id); tipc_nametbl_withdraw(net, TIPC_LINK_STATE, addr, link_id, addr); + } } struct tipc_node *tipc_node_create(struct net *net, u32 addr, u16 capabilities) @@ -304,8 +313,11 @@ struct tipc_node *tipc_node_create(struct net *net, u32 addr, u16 capabilities) spin_lock_bh(&tn->node_list_lock); n = tipc_node_find(net, addr); - if (n) + if (n) { + /* Same node may come back with new capabilities */ + n->capabilities = capabilities; goto exit; + } n = kzalloc(sizeof(*n), GFP_ATOMIC); if (!n) { pr_warn("Node creation failed, no memory\n"); @@ -361,14 +373,13 @@ static void tipc_node_calculate_timer(struct tipc_node *n, struct tipc_link *l) { unsigned long tol = tipc_link_tolerance(l); unsigned long intv = ((tol / 4) > 500) ? 500 : tol / 4; - unsigned long keepalive_intv = msecs_to_jiffies(intv); /* Link with lowest tolerance determines timer interval */ - if (keepalive_intv < n->keepalive_intv) - n->keepalive_intv = keepalive_intv; + if (intv < n->keepalive_intv) + n->keepalive_intv = intv; - /* Ensure link's abort limit corresponds to current interval */ - tipc_link_set_abort_limit(l, tol / jiffies_to_msecs(n->keepalive_intv)); + /* Ensure link's abort limit corresponds to current tolerance */ + tipc_link_set_abort_limit(l, tol / n->keepalive_intv); } static void tipc_node_delete(struct tipc_node *node) @@ -509,7 +520,7 @@ static void tipc_node_timeout(unsigned long data) if (rc & TIPC_LINK_DOWN_EVT) tipc_node_link_down(n, bearer_id, false); } - mod_timer(&n->timer, jiffies + n->keepalive_intv); + mod_timer(&n->timer, jiffies + msecs_to_jiffies(n->keepalive_intv)); } /** @@ -525,7 +536,7 @@ static void __tipc_node_link_up(struct tipc_node *n, int bearer_id, struct tipc_link *ol = node_active_link(n, 0); struct tipc_link *nl = n->links[bearer_id].link; - if (!nl) + if (!nl || tipc_link_is_up(nl)) return; tipc_link_fsm_evt(nl, LINK_ESTABLISH_EVT); @@ -545,12 +556,16 @@ static void __tipc_node_link_up(struct tipc_node *n, int bearer_id, pr_debug("Established link <%s> on network plane %c\n", tipc_link_name(nl), tipc_link_plane(nl)); + /* Ensure that a STATE message goes first */ + tipc_link_build_state_msg(nl, xmitq); + /* First link? => give it both slots */ if (!ol) { *slot0 = bearer_id; *slot1 = bearer_id; tipc_node_fsm_evt(n, SELF_ESTABL_CONTACT_EVT); n->action_flags |= TIPC_NOTIFY_NODE_UP; + tipc_link_set_active(nl, true); tipc_bcast_add_peer(n->net, nl, xmitq); return; } @@ -581,8 +596,12 @@ static void __tipc_node_link_up(struct tipc_node *n, int bearer_id, static void tipc_node_link_up(struct tipc_node *n, int bearer_id, struct sk_buff_head *xmitq) { + struct tipc_media_addr *maddr; + tipc_node_write_lock(n); __tipc_node_link_up(n, bearer_id, xmitq); + maddr = &n->links[bearer_id].maddr; + tipc_bearer_xmit(n->net, bearer_id, xmitq, maddr); tipc_node_write_unlock(n); } @@ -667,6 +686,7 @@ static void tipc_node_link_down(struct tipc_node *n, int bearer_id, bool delete) struct tipc_link *l = le->link; struct tipc_media_addr *maddr; struct sk_buff_head xmitq; + int old_bearer_id = bearer_id; if (!l) return; @@ -686,6 +706,8 @@ static void tipc_node_link_down(struct tipc_node *n, int bearer_id, bool delete) tipc_link_fsm_evt(l, LINK_RESET_EVT); } tipc_node_write_unlock(n); + if (delete) + tipc_mon_remove_peer(n->net, n->addr, old_bearer_id); tipc_bearer_xmit(n->net, bearer_id, &xmitq, maddr); tipc_sk_rcv(n->net, &le->inputq); } @@ -710,6 +732,7 @@ void tipc_node_check_dest(struct net *net, u32 onode, bool accept_addr = false; bool reset = true; char *if_name; + unsigned long intv; *dupl_addr = false; *respond = false; @@ -815,9 +838,11 @@ void tipc_node_check_dest(struct net *net, u32 onode, le->link = l; n->link_cnt++; tipc_node_calculate_timer(n, l); - if (n->link_cnt == 1) - if (!mod_timer(&n->timer, jiffies + n->keepalive_intv)) + if (n->link_cnt == 1) { + intv = jiffies + msecs_to_jiffies(n->keepalive_intv); + if (!mod_timer(&n->timer, intv)) tipc_node_get(n); + } } memcpy(&le->maddr, maddr, sizeof(*maddr)); exit: @@ -925,7 +950,7 @@ static void tipc_node_fsm_evt(struct tipc_node *n, int evt) state = SELF_UP_PEER_UP; break; case SELF_LOST_CONTACT_EVT: - state = SELF_DOWN_PEER_LEAVING; + state = SELF_DOWN_PEER_DOWN; break; case SELF_ESTABL_CONTACT_EVT: case PEER_LOST_CONTACT_EVT: @@ -944,7 +969,7 @@ static void tipc_node_fsm_evt(struct tipc_node *n, int evt) state = SELF_UP_PEER_UP; break; case PEER_LOST_CONTACT_EVT: - state = SELF_LEAVING_PEER_DOWN; + state = SELF_DOWN_PEER_DOWN; break; case SELF_LOST_CONTACT_EVT: case PEER_ESTABL_CONTACT_EVT: @@ -1272,14 +1297,10 @@ static void tipc_node_bc_rcv(struct net *net, struct sk_buff *skb, int bearer_id rc = tipc_bcast_rcv(net, be->link, skb); - /* Broadcast link reset may happen at reassembly failure */ - if (rc & TIPC_LINK_DOWN_EVT) - tipc_node_reset_links(n); - /* Broadcast ACKs are sent on a unicast link */ if (rc & TIPC_LINK_SND_BC_ACK) { tipc_node_read_lock(n); - tipc_link_build_ack_msg(le->link, &xmitq); + tipc_link_build_state_msg(le->link, &xmitq); tipc_node_read_unlock(n); } @@ -1295,6 +1316,17 @@ static void tipc_node_bc_rcv(struct net *net, struct sk_buff *skb, int bearer_id spin_unlock_bh(&be->inputq2.lock); tipc_sk_mcast_rcv(net, &be->arrvq, &be->inputq2); } + + if (rc & TIPC_LINK_DOWN_EVT) { + /* Reception reassembly failure => reset all links to peer */ + if (!tipc_link_is_up(be->link)) + tipc_node_reset_links(n); + + /* Retransmission failure => reset all links to all peers */ + if (!tipc_link_is_up(tipc_bc_sndlink(net))) + tipc_bearer_reset_all(net); + } + tipc_node_put(n); } @@ -1444,6 +1476,7 @@ void tipc_rcv(struct net *net, struct sk_buff *skb, struct tipc_bearer *b) int bearer_id = b->identity; struct tipc_link_entry *le; u16 bc_ack = msg_bcast_ack(hdr); + u32 self = tipc_own_addr(net); int rc = 0; __skb_queue_head_init(&xmitq); @@ -1460,6 +1493,10 @@ void tipc_rcv(struct net *net, struct sk_buff *skb, struct tipc_bearer *b) return tipc_node_bc_rcv(net, skb, bearer_id); } + /* Discard unicast link messages destined for another node */ + if (unlikely(!msg_short(hdr) && (msg_destnode(hdr) != self))) + goto discard; + /* Locate neighboring node that sent packet */ n = tipc_node_find(net, msg_prevnode(hdr)); if (unlikely(!n)) @@ -1891,3 +1928,168 @@ out: return skb->len; } + +int tipc_nl_node_set_monitor(struct sk_buff *skb, struct genl_info *info) +{ + struct nlattr *attrs[TIPC_NLA_MON_MAX + 1]; + struct net *net = sock_net(skb->sk); + int err; + + if (!info->attrs[TIPC_NLA_MON]) + return -EINVAL; + + err = nla_parse_nested(attrs, TIPC_NLA_MON_MAX, + info->attrs[TIPC_NLA_MON], + tipc_nl_monitor_policy); + if (err) + return err; + + if (attrs[TIPC_NLA_MON_ACTIVATION_THRESHOLD]) { + u32 val; + + val = nla_get_u32(attrs[TIPC_NLA_MON_ACTIVATION_THRESHOLD]); + err = tipc_nl_monitor_set_threshold(net, val); + if (err) + return err; + } + + return 0; +} + +static int __tipc_nl_add_monitor_prop(struct net *net, struct tipc_nl_msg *msg) +{ + struct nlattr *attrs; + void *hdr; + u32 val; + + hdr = genlmsg_put(msg->skb, msg->portid, msg->seq, &tipc_genl_family, + 0, TIPC_NL_MON_GET); + if (!hdr) + return -EMSGSIZE; + + attrs = nla_nest_start(msg->skb, TIPC_NLA_MON); + if (!attrs) + goto msg_full; + + val = tipc_nl_monitor_get_threshold(net); + + if (nla_put_u32(msg->skb, TIPC_NLA_MON_ACTIVATION_THRESHOLD, val)) + goto attr_msg_full; + + nla_nest_end(msg->skb, attrs); + genlmsg_end(msg->skb, hdr); + + return 0; + +attr_msg_full: + nla_nest_cancel(msg->skb, attrs); +msg_full: + genlmsg_cancel(msg->skb, hdr); + + return -EMSGSIZE; +} + +int tipc_nl_node_get_monitor(struct sk_buff *skb, struct genl_info *info) +{ + struct net *net = sock_net(skb->sk); + struct tipc_nl_msg msg; + int err; + + msg.skb = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL); + msg.portid = info->snd_portid; + msg.seq = info->snd_seq; + + err = __tipc_nl_add_monitor_prop(net, &msg); + if (err) { + nlmsg_free(msg.skb); + return err; + } + + return genlmsg_reply(msg.skb, info); +} + +int tipc_nl_node_dump_monitor(struct sk_buff *skb, struct netlink_callback *cb) +{ + struct net *net = sock_net(skb->sk); + u32 prev_bearer = cb->args[0]; + struct tipc_nl_msg msg; + int err; + int i; + + if (prev_bearer == MAX_BEARERS) + return 0; + + msg.skb = skb; + msg.portid = NETLINK_CB(cb->skb).portid; + msg.seq = cb->nlh->nlmsg_seq; + + rtnl_lock(); + for (i = prev_bearer; i < MAX_BEARERS; i++) { + prev_bearer = i; + err = __tipc_nl_add_monitor(net, &msg, prev_bearer); + if (err) + goto out; + } + +out: + rtnl_unlock(); + cb->args[0] = prev_bearer; + + return skb->len; +} + +int tipc_nl_node_dump_monitor_peer(struct sk_buff *skb, + struct netlink_callback *cb) +{ + struct net *net = sock_net(skb->sk); + u32 prev_node = cb->args[1]; + u32 bearer_id = cb->args[2]; + int done = cb->args[0]; + struct tipc_nl_msg msg; + int err; + + if (!prev_node) { + struct nlattr **attrs; + struct nlattr *mon[TIPC_NLA_MON_MAX + 1]; + + err = tipc_nlmsg_parse(cb->nlh, &attrs); + if (err) + return err; + + if (!attrs[TIPC_NLA_MON]) + return -EINVAL; + + err = nla_parse_nested(mon, TIPC_NLA_MON_MAX, + attrs[TIPC_NLA_MON], + tipc_nl_monitor_policy); + if (err) + return err; + + if (!mon[TIPC_NLA_MON_REF]) + return -EINVAL; + + bearer_id = nla_get_u32(mon[TIPC_NLA_MON_REF]); + + if (bearer_id >= MAX_BEARERS) + return -EINVAL; + } + + if (done) + return 0; + + msg.skb = skb; + msg.portid = NETLINK_CB(cb->skb).portid; + msg.seq = cb->nlh->nlmsg_seq; + + rtnl_lock(); + err = tipc_nl_add_monitor_peer(net, &msg, bearer_id, &prev_node); + if (!err) + done = 1; + + rtnl_unlock(); + cb->args[0] = done; + cb->args[1] = prev_node; + cb->args[2] = bearer_id; + + return skb->len; +} diff --git a/net/tipc/node.h b/net/tipc/node.h index f39d9d06e8bb..d69fdfcc0ec9 100644 --- a/net/tipc/node.h +++ b/net/tipc/node.h @@ -45,10 +45,11 @@ /* Optional capabilities supported by this code version */ enum { - TIPC_BCAST_SYNCH = (1 << 1) + TIPC_BCAST_SYNCH = (1 << 1), + TIPC_BLOCK_FLOWCTL = (2 << 1) }; -#define TIPC_NODE_CAPABILITIES TIPC_BCAST_SYNCH +#define TIPC_NODE_CAPABILITIES (TIPC_BCAST_SYNCH | TIPC_BLOCK_FLOWCTL) #define INVALID_BEARER_ID -1 void tipc_node_stop(struct net *net); @@ -70,10 +71,16 @@ void tipc_node_broadcast(struct net *net, struct sk_buff *skb); int tipc_node_add_conn(struct net *net, u32 dnode, u32 port, u32 peer_port); void tipc_node_remove_conn(struct net *net, u32 dnode, u32 port); int tipc_node_get_mtu(struct net *net, u32 addr, u32 sel); +u16 tipc_node_get_capabilities(struct net *net, u32 addr); int tipc_nl_node_dump(struct sk_buff *skb, struct netlink_callback *cb); int tipc_nl_node_dump_link(struct sk_buff *skb, struct netlink_callback *cb); int tipc_nl_node_reset_link_stats(struct sk_buff *skb, struct genl_info *info); int tipc_nl_node_get_link(struct sk_buff *skb, struct genl_info *info); int tipc_nl_node_set_link(struct sk_buff *skb, struct genl_info *info); +int tipc_nl_node_set_monitor(struct sk_buff *skb, struct genl_info *info); +int tipc_nl_node_get_monitor(struct sk_buff *skb, struct genl_info *info); +int tipc_nl_node_dump_monitor(struct sk_buff *skb, struct netlink_callback *cb); +int tipc_nl_node_dump_monitor_peer(struct sk_buff *skb, + struct netlink_callback *cb); #endif diff --git a/net/tipc/server.c b/net/tipc/server.c index 2446bfbaa309..215849ce453d 100644 --- a/net/tipc/server.c +++ b/net/tipc/server.c @@ -86,6 +86,7 @@ struct outqueue_entry { static void tipc_recv_work(struct work_struct *work); static void tipc_send_work(struct work_struct *work); static void tipc_clean_outqueues(struct tipc_conn *con); +static void tipc_sock_release(struct tipc_conn *con); static void tipc_conn_kref_release(struct kref *kref) { @@ -102,6 +103,7 @@ static void tipc_conn_kref_release(struct kref *kref) } saddr->scope = -TIPC_NODE_SCOPE; kernel_bind(sock, (struct sockaddr *)saddr, sizeof(*saddr)); + tipc_sock_release(con); sock_release(sock); con->sock = NULL; } @@ -136,28 +138,28 @@ static void sock_data_ready(struct sock *sk) { struct tipc_conn *con; - read_lock(&sk->sk_callback_lock); + read_lock_bh(&sk->sk_callback_lock); con = sock2con(sk); if (con && test_bit(CF_CONNECTED, &con->flags)) { conn_get(con); if (!queue_work(con->server->rcv_wq, &con->rwork)) conn_put(con); } - read_unlock(&sk->sk_callback_lock); + read_unlock_bh(&sk->sk_callback_lock); } static void sock_write_space(struct sock *sk) { struct tipc_conn *con; - read_lock(&sk->sk_callback_lock); + read_lock_bh(&sk->sk_callback_lock); con = sock2con(sk); if (con && test_bit(CF_CONNECTED, &con->flags)) { conn_get(con); if (!queue_work(con->server->send_wq, &con->swork)) conn_put(con); } - read_unlock(&sk->sk_callback_lock); + read_unlock_bh(&sk->sk_callback_lock); } static void tipc_register_callbacks(struct socket *sock, struct tipc_conn *con) @@ -184,26 +186,31 @@ static void tipc_unregister_callbacks(struct tipc_conn *con) write_unlock_bh(&sk->sk_callback_lock); } +static void tipc_sock_release(struct tipc_conn *con) +{ + struct tipc_server *s = con->server; + + if (con->conid) + s->tipc_conn_release(con->conid, con->usr_data); + + tipc_unregister_callbacks(con); +} + static void tipc_close_conn(struct tipc_conn *con) { struct tipc_server *s = con->server; if (test_and_clear_bit(CF_CONNECTED, &con->flags)) { - if (con->conid) - s->tipc_conn_shutdown(con->conid, con->usr_data); spin_lock_bh(&s->idr_lock); idr_remove(&s->conn_idr, con->conid); s->idr_in_use--; spin_unlock_bh(&s->idr_lock); - tipc_unregister_callbacks(con); - /* We shouldn't flush pending works as we may be in the * thread. In fact the races with pending rx/tx work structs * are harmless for us here as we have already deleted this - * connection from server connection list and set - * sk->sk_user_data to 0 before releasing connection object. + * connection from server connection list. */ kernel_sock_shutdown(con->sock, SHUT_RDWR); @@ -411,13 +418,12 @@ static struct outqueue_entry *tipc_alloc_entry(void *data, int len) if (!entry) return NULL; - buf = kmalloc(len, GFP_ATOMIC); + buf = kmemdup(data, len, GFP_ATOMIC); if (!buf) { kfree(entry); return NULL; } - memcpy(buf, data, len); entry->iov.iov_base = buf; entry->iov.iov_len = len; diff --git a/net/tipc/server.h b/net/tipc/server.h index 9015faedb1b0..34f8055afa3b 100644 --- a/net/tipc/server.h +++ b/net/tipc/server.h @@ -53,7 +53,7 @@ * @send_wq: send workqueue * @max_rcvbuf_size: maximum permitted receive message length * @tipc_conn_new: callback will be called when new connection is incoming - * @tipc_conn_shutdown: callback will be called when connection is shut down + * @tipc_conn_release: callback will be called before releasing the connection * @tipc_conn_recvmsg: callback will be called when message arrives * @saddr: TIPC server address * @name: server name @@ -70,7 +70,7 @@ struct tipc_server { struct workqueue_struct *send_wq; int max_rcvbuf_size; void *(*tipc_conn_new)(int conid); - void (*tipc_conn_shutdown)(int conid, void *usr_data); + void (*tipc_conn_release)(int conid, void *usr_data); void (*tipc_conn_recvmsg)(struct net *net, int conid, struct sockaddr_tipc *addr, void *usr_data, void *buf, size_t len); diff --git a/net/tipc/socket.c b/net/tipc/socket.c index 3eeb50a27b89..c49b8df438cb 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -96,8 +96,11 @@ struct tipc_sock { uint conn_timeout; atomic_t dupl_rcvcnt; bool link_cong; - uint sent_unacked; - uint rcv_unacked; + u16 snt_unacked; + u16 snd_win; + u16 peer_caps; + u16 rcv_unacked; + u16 rcv_win; struct sockaddr_tipc remote; struct rhash_head node; struct rcu_head rcu; @@ -227,9 +230,29 @@ static struct tipc_sock *tipc_sk(const struct sock *sk) return container_of(sk, struct tipc_sock, sk); } -static int tsk_conn_cong(struct tipc_sock *tsk) +static bool tsk_conn_cong(struct tipc_sock *tsk) { - return tsk->sent_unacked >= TIPC_FLOWCTRL_WIN; + return tsk->snt_unacked >= tsk->snd_win; +} + +/* tsk_blocks(): translate a buffer size in bytes to number of + * advertisable blocks, taking into account the ratio truesize(len)/len + * We can trust that this ratio is always < 4 for len >= FLOWCTL_BLK_SZ + */ +static u16 tsk_adv_blocks(int len) +{ + return len / FLOWCTL_BLK_SZ / 4; +} + +/* tsk_inc(): increment counter for sent or received data + * - If block based flow control is not supported by peer we + * fall back to message based ditto, incrementing the counter + */ +static u16 tsk_inc(struct tipc_sock *tsk, int msglen) +{ + if (likely(tsk->peer_caps & TIPC_BLOCK_FLOWCTL)) + return ((msglen / FLOWCTL_BLK_SZ) + 1); + return 1; } /** @@ -366,7 +389,7 @@ static int tipc_sk_create(struct net *net, struct socket *sock, sock->state = state; sock_init_data(sock, sk); if (tipc_sk_insert(tsk)) { - pr_warn("Socket create failed; port numbrer exhausted\n"); + pr_warn("Socket create failed; port number exhausted\n"); return -EINVAL; } msg_set_origport(msg, tsk->portid); @@ -377,9 +400,12 @@ static int tipc_sk_create(struct net *net, struct socket *sock, sk->sk_write_space = tipc_write_space; sk->sk_destruct = tipc_sock_destruct; tsk->conn_timeout = CONN_TIMEOUT_DEFAULT; - tsk->sent_unacked = 0; atomic_set(&tsk->dupl_rcvcnt, 0); + /* Start out with safe limits until we receive an advertised window */ + tsk->snd_win = tsk_adv_blocks(RCVBUF_MIN); + tsk->rcv_win = tsk->snd_win; + if (sock->state == SS_READY) { tsk_set_unreturnable(tsk, true); if (sock->type == SOCK_DGRAM) @@ -770,12 +796,14 @@ void tipc_sk_mcast_rcv(struct net *net, struct sk_buff_head *arrvq, * @tsk: receiving socket * @skb: pointer to message buffer. */ -static void tipc_sk_proto_rcv(struct tipc_sock *tsk, struct sk_buff *skb) +static void tipc_sk_proto_rcv(struct tipc_sock *tsk, struct sk_buff *skb, + struct sk_buff_head *xmitq) { struct sock *sk = &tsk->sk; + u32 onode = tsk_own_node(tsk); struct tipc_msg *hdr = buf_msg(skb); int mtyp = msg_type(hdr); - int conn_cong; + bool conn_cong; /* Ignore if connection cannot be validated: */ if (!tsk_peer_msg(tsk, hdr)) @@ -785,11 +813,14 @@ static void tipc_sk_proto_rcv(struct tipc_sock *tsk, struct sk_buff *skb) if (mtyp == CONN_PROBE) { msg_set_type(hdr, CONN_PROBE_REPLY); - tipc_sk_respond(sk, skb, TIPC_OK); + if (tipc_msg_reverse(onode, &skb, TIPC_OK)) + __skb_queue_tail(xmitq, skb); return; } else if (mtyp == CONN_ACK) { conn_cong = tsk_conn_cong(tsk); - tsk->sent_unacked -= msg_msgcnt(hdr); + tsk->snt_unacked -= msg_conn_ack(hdr); + if (tsk->peer_caps & TIPC_BLOCK_FLOWCTL) + tsk->snd_win = msg_adv_win(hdr); if (conn_cong) sk->sk_write_space(sk); } else if (mtyp != CONN_PROBE_REPLY) { @@ -1020,12 +1051,14 @@ static int __tipc_send_stream(struct socket *sock, struct msghdr *m, size_t dsz) u32 dnode; uint mtu, send, sent = 0; struct iov_iter save; + int hlen = MIN_H_SIZE; /* Handle implied connection establishment */ if (unlikely(dest)) { rc = __tipc_sendmsg(sock, m, dsz); + hlen = msg_hdr_sz(mhdr); if (dsz && (dsz == rc)) - tsk->sent_unacked = 1; + tsk->snt_unacked = tsk_inc(tsk, dsz + hlen); return rc; } if (dsz > (uint)INT_MAX) @@ -1054,7 +1087,7 @@ next: if (likely(!tsk_conn_cong(tsk))) { rc = tipc_node_xmit(net, &pktchain, dnode, portid); if (likely(!rc)) { - tsk->sent_unacked++; + tsk->snt_unacked += tsk_inc(tsk, send + hlen); sent += send; if (sent == dsz) return dsz; @@ -1118,6 +1151,13 @@ static void tipc_sk_finish_conn(struct tipc_sock *tsk, u32 peer_port, sk_reset_timer(sk, &sk->sk_timer, jiffies + tsk->probing_intv); tipc_node_add_conn(net, peer_node, tsk->portid, peer_port); tsk->max_pkt = tipc_node_get_mtu(net, peer_node, tsk->portid); + tsk->peer_caps = tipc_node_get_capabilities(net, peer_node); + if (tsk->peer_caps & TIPC_BLOCK_FLOWCTL) + return; + + /* Fall back to message based flow control */ + tsk->rcv_win = FLOWCTL_MSG_WIN; + tsk->snd_win = FLOWCTL_MSG_WIN; } /** @@ -1214,7 +1254,7 @@ static int tipc_sk_anc_data_recv(struct msghdr *m, struct tipc_msg *msg, return 0; } -static void tipc_sk_send_ack(struct tipc_sock *tsk, uint ack) +static void tipc_sk_send_ack(struct tipc_sock *tsk) { struct net *net = sock_net(&tsk->sk); struct sk_buff *skb = NULL; @@ -1230,7 +1270,14 @@ static void tipc_sk_send_ack(struct tipc_sock *tsk, uint ack) if (!skb) return; msg = buf_msg(skb); - msg_set_msgcnt(msg, ack); + msg_set_conn_ack(msg, tsk->rcv_unacked); + tsk->rcv_unacked = 0; + + /* Adjust to and advertize the correct window limit */ + if (tsk->peer_caps & TIPC_BLOCK_FLOWCTL) { + tsk->rcv_win = tsk_adv_blocks(tsk->sk.sk_rcvbuf); + msg_set_adv_win(msg, tsk->rcv_win); + } tipc_node_xmit_skb(net, skb, dnode, msg_link_selector(msg)); } @@ -1288,7 +1335,7 @@ static int tipc_recvmsg(struct socket *sock, struct msghdr *m, size_t buf_len, long timeo; unsigned int sz; u32 err; - int res; + int res, hlen; /* Catch invalid receive requests */ if (unlikely(!buf_len)) @@ -1313,6 +1360,7 @@ restart: buf = skb_peek(&sk->sk_receive_queue); msg = buf_msg(buf); sz = msg_data_sz(msg); + hlen = msg_hdr_sz(msg); err = msg_errcode(msg); /* Discard an empty non-errored message & try again */ @@ -1335,7 +1383,7 @@ restart: sz = buf_len; m->msg_flags |= MSG_TRUNC; } - res = skb_copy_datagram_msg(buf, msg_hdr_sz(msg), m, sz); + res = skb_copy_datagram_msg(buf, hlen, m, sz); if (res) goto exit; res = sz; @@ -1347,15 +1395,15 @@ restart: res = -ECONNRESET; } - /* Consume received message (optional) */ - if (likely(!(flags & MSG_PEEK))) { - if ((sock->state != SS_READY) && - (++tsk->rcv_unacked >= TIPC_CONNACK_INTV)) { - tipc_sk_send_ack(tsk, tsk->rcv_unacked); - tsk->rcv_unacked = 0; - } - tsk_advance_rx_queue(sk); + if (unlikely(flags & MSG_PEEK)) + goto exit; + + if (likely(sock->state != SS_READY)) { + tsk->rcv_unacked += tsk_inc(tsk, hlen + sz); + if (unlikely(tsk->rcv_unacked >= (tsk->rcv_win / 4))) + tipc_sk_send_ack(tsk); } + tsk_advance_rx_queue(sk); exit: release_sock(sk); return res; @@ -1384,7 +1432,7 @@ static int tipc_recv_stream(struct socket *sock, struct msghdr *m, int sz_to_copy, target, needed; int sz_copied = 0; u32 err; - int res = 0; + int res = 0, hlen; /* Catch invalid receive attempts */ if (unlikely(!buf_len)) @@ -1410,6 +1458,7 @@ restart: buf = skb_peek(&sk->sk_receive_queue); msg = buf_msg(buf); sz = msg_data_sz(msg); + hlen = msg_hdr_sz(msg); err = msg_errcode(msg); /* Discard an empty non-errored message & try again */ @@ -1434,8 +1483,7 @@ restart: needed = (buf_len - sz_copied); sz_to_copy = (sz <= needed) ? sz : needed; - res = skb_copy_datagram_msg(buf, msg_hdr_sz(msg) + offset, - m, sz_to_copy); + res = skb_copy_datagram_msg(buf, hlen + offset, m, sz_to_copy); if (res) goto exit; @@ -1457,20 +1505,18 @@ restart: res = -ECONNRESET; } - /* Consume received message (optional) */ - if (likely(!(flags & MSG_PEEK))) { - if (unlikely(++tsk->rcv_unacked >= TIPC_CONNACK_INTV)) { - tipc_sk_send_ack(tsk, tsk->rcv_unacked); - tsk->rcv_unacked = 0; - } - tsk_advance_rx_queue(sk); - } + if (unlikely(flags & MSG_PEEK)) + goto exit; + + tsk->rcv_unacked += tsk_inc(tsk, hlen + sz); + if (unlikely(tsk->rcv_unacked >= (tsk->rcv_win / 4))) + tipc_sk_send_ack(tsk); + tsk_advance_rx_queue(sk); /* Loop around if more data is required */ if ((sz_copied < buf_len) && /* didn't get all requested data */ (!skb_queue_empty(&sk->sk_receive_queue) || (sz_copied < target)) && /* and more is ready or required */ - (!(flags & MSG_PEEK)) && /* and aren't just peeking at data */ (!err)) /* and haven't reached a FIN */ goto restart; @@ -1602,30 +1648,33 @@ static bool filter_connect(struct tipc_sock *tsk, struct sk_buff *skb) /** * rcvbuf_limit - get proper overload limit of socket receive queue * @sk: socket - * @buf: message + * @skb: message * - * For all connection oriented messages, irrespective of importance, - * the default overload value (i.e. 67MB) is set as limit. + * For connection oriented messages, irrespective of importance, + * default queue limit is 2 MB. * - * For all connectionless messages, by default new queue limits are - * as belows: + * For connectionless messages, queue limits are based on message + * importance as follows: * - * TIPC_LOW_IMPORTANCE (4 MB) - * TIPC_MEDIUM_IMPORTANCE (8 MB) - * TIPC_HIGH_IMPORTANCE (16 MB) - * TIPC_CRITICAL_IMPORTANCE (32 MB) + * TIPC_LOW_IMPORTANCE (2 MB) + * TIPC_MEDIUM_IMPORTANCE (4 MB) + * TIPC_HIGH_IMPORTANCE (8 MB) + * TIPC_CRITICAL_IMPORTANCE (16 MB) * * Returns overload limit according to corresponding message importance */ -static unsigned int rcvbuf_limit(struct sock *sk, struct sk_buff *buf) +static unsigned int rcvbuf_limit(struct sock *sk, struct sk_buff *skb) { - struct tipc_msg *msg = buf_msg(buf); + struct tipc_sock *tsk = tipc_sk(sk); + struct tipc_msg *hdr = buf_msg(skb); + + if (unlikely(!msg_connected(hdr))) + return sk->sk_rcvbuf << msg_importance(hdr); - if (msg_connected(msg)) - return sysctl_tipc_rmem[2]; + if (likely(tsk->peer_caps & TIPC_BLOCK_FLOWCTL)) + return sk->sk_rcvbuf; - return sk->sk_rcvbuf >> TIPC_CRITICAL_IMPORTANCE << - msg_importance(msg); + return FLOWCTL_MSG_LIM; } /** @@ -1640,7 +1689,8 @@ static unsigned int rcvbuf_limit(struct sock *sk, struct sk_buff *buf) * * Returns true if message was added to socket receive queue, otherwise false */ -static bool filter_rcv(struct sock *sk, struct sk_buff *skb) +static bool filter_rcv(struct sock *sk, struct sk_buff *skb, + struct sk_buff_head *xmitq) { struct socket *sock = sk->sk_socket; struct tipc_sock *tsk = tipc_sk(sk); @@ -1650,7 +1700,7 @@ static bool filter_rcv(struct sock *sk, struct sk_buff *skb) int usr = msg_user(hdr); if (unlikely(msg_user(hdr) == CONN_MANAGER)) { - tipc_sk_proto_rcv(tsk, skb); + tipc_sk_proto_rcv(tsk, skb, xmitq); return false; } @@ -1693,7 +1743,8 @@ static bool filter_rcv(struct sock *sk, struct sk_buff *skb) return true; reject: - tipc_sk_respond(sk, skb, err); + if (tipc_msg_reverse(tsk_own_node(tsk), &skb, err)) + __skb_queue_tail(xmitq, skb); return false; } @@ -1709,9 +1760,24 @@ reject: static int tipc_backlog_rcv(struct sock *sk, struct sk_buff *skb) { unsigned int truesize = skb->truesize; + struct sk_buff_head xmitq; + u32 dnode, selector; - if (likely(filter_rcv(sk, skb))) + __skb_queue_head_init(&xmitq); + + if (likely(filter_rcv(sk, skb, &xmitq))) { atomic_add(truesize, &tipc_sk(sk)->dupl_rcvcnt); + return 0; + } + + if (skb_queue_empty(&xmitq)) + return 0; + + /* Send response/rejected message */ + skb = __skb_dequeue(&xmitq); + dnode = msg_destnode(buf_msg(skb)); + selector = msg_origport(buf_msg(skb)); + tipc_node_xmit_skb(sock_net(sk), skb, dnode, selector); return 0; } @@ -1725,12 +1791,13 @@ static int tipc_backlog_rcv(struct sock *sk, struct sk_buff *skb) * Caller must hold socket lock */ static void tipc_sk_enqueue(struct sk_buff_head *inputq, struct sock *sk, - u32 dport) + u32 dport, struct sk_buff_head *xmitq) { + unsigned long time_limit = jiffies + 2; + struct sk_buff *skb; unsigned int lim; atomic_t *dcnt; - struct sk_buff *skb; - unsigned long time_limit = jiffies + 2; + u32 onode; while (skb_queue_len(inputq)) { if (unlikely(time_after_eq(jiffies, time_limit))) @@ -1742,20 +1809,22 @@ static void tipc_sk_enqueue(struct sk_buff_head *inputq, struct sock *sk, /* Add message directly to receive queue if possible */ if (!sock_owned_by_user(sk)) { - filter_rcv(sk, skb); + filter_rcv(sk, skb, xmitq); continue; } /* Try backlog, compensating for double-counted bytes */ dcnt = &tipc_sk(sk)->dupl_rcvcnt; - if (sk->sk_backlog.len) + if (!sk->sk_backlog.len) atomic_set(dcnt, 0); lim = rcvbuf_limit(sk, skb) + atomic_read(dcnt); if (likely(!sk_add_backlog(sk, skb, lim))) continue; /* Overload => reject message back to sender */ - tipc_sk_respond(sk, skb, TIPC_ERR_OVERLOAD); + onode = tipc_own_addr(sock_net(sk)); + if (tipc_msg_reverse(onode, &skb, TIPC_ERR_OVERLOAD)) + __skb_queue_tail(xmitq, skb); break; } } @@ -1768,12 +1837,14 @@ static void tipc_sk_enqueue(struct sk_buff_head *inputq, struct sock *sk, */ void tipc_sk_rcv(struct net *net, struct sk_buff_head *inputq) { + struct sk_buff_head xmitq; u32 dnode, dport = 0; int err; struct tipc_sock *tsk; struct sock *sk; struct sk_buff *skb; + __skb_queue_head_init(&xmitq); while (skb_queue_len(inputq)) { dport = tipc_skb_peek_port(inputq, dport); tsk = tipc_sk_lookup(net, dport); @@ -1781,9 +1852,14 @@ void tipc_sk_rcv(struct net *net, struct sk_buff_head *inputq) if (likely(tsk)) { sk = &tsk->sk; if (likely(spin_trylock_bh(&sk->sk_lock.slock))) { - tipc_sk_enqueue(inputq, sk, dport); + tipc_sk_enqueue(inputq, sk, dport, &xmitq); spin_unlock_bh(&sk->sk_lock.slock); } + /* Send pending response/rejected messages, if any */ + while ((skb = __skb_dequeue(&xmitq))) { + dnode = msg_destnode(buf_msg(skb)); + tipc_node_xmit_skb(net, skb, dnode, dport); + } sock_put(sk); continue; } @@ -2807,6 +2883,9 @@ int tipc_nl_publ_dump(struct sk_buff *skb, struct netlink_callback *cb) if (err) return err; + if (!attrs[TIPC_NLA_SOCK]) + return -EINVAL; + err = nla_parse_nested(sock, TIPC_NLA_SOCK_MAX, attrs[TIPC_NLA_SOCK], tipc_nl_sock_policy); diff --git a/net/tipc/socket.h b/net/tipc/socket.h index 4241f22069dc..06fb5944cf76 100644 --- a/net/tipc/socket.h +++ b/net/tipc/socket.h @@ -1,6 +1,6 @@ /* net/tipc/socket.h: Include file for TIPC socket code * - * Copyright (c) 2014-2015, Ericsson AB + * Copyright (c) 2014-2016, Ericsson AB * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -38,10 +38,17 @@ #include <net/sock.h> #include <net/genetlink.h> -#define TIPC_CONNACK_INTV 256 -#define TIPC_FLOWCTRL_WIN (TIPC_CONNACK_INTV * 2) -#define TIPC_CONN_OVERLOAD_LIMIT ((TIPC_FLOWCTRL_WIN * 2 + 1) * \ - SKB_TRUESIZE(TIPC_MAX_USER_MSG_SIZE)) +/* Compatibility values for deprecated message based flow control */ +#define FLOWCTL_MSG_WIN 512 +#define FLOWCTL_MSG_LIM ((FLOWCTL_MSG_WIN * 2 + 1) * SKB_TRUESIZE(MAX_MSG_SIZE)) + +#define FLOWCTL_BLK_SZ 1024 + +/* Socket receive buffer sizes */ +#define RCVBUF_MIN (FLOWCTL_BLK_SZ * 512) +#define RCVBUF_DEF (FLOWCTL_BLK_SZ * 1024 * 2) +#define RCVBUF_MAX (FLOWCTL_BLK_SZ * 1024 * 16) + int tipc_socket_init(void); void tipc_socket_stop(void); void tipc_sk_rcv(struct net *net, struct sk_buff_head *inputq); diff --git a/net/tipc/subscr.c b/net/tipc/subscr.c index e6cb386fbf34..0dd02244e21d 100644 --- a/net/tipc/subscr.c +++ b/net/tipc/subscr.c @@ -302,7 +302,7 @@ static void tipc_subscrp_subscribe(struct net *net, struct tipc_subscr *s, } /* Handle one termination request for the subscriber */ -static void tipc_subscrb_shutdown_cb(int conid, void *usr_data) +static void tipc_subscrb_release_cb(int conid, void *usr_data) { tipc_subscrb_delete((struct tipc_subscriber *)usr_data); } @@ -326,8 +326,7 @@ static void tipc_subscrb_rcv_cb(struct net *net, int conid, return tipc_subscrp_cancel(s, subscriber); } - if (s) - tipc_subscrp_subscribe(net, s, subscriber, swap); + tipc_subscrp_subscribe(net, s, subscriber, swap); } /* Handle one request to establish a new subscriber */ @@ -365,7 +364,7 @@ int tipc_topsrv_start(struct net *net) topsrv->max_rcvbuf_size = sizeof(struct tipc_subscr); topsrv->tipc_conn_recvmsg = tipc_subscrb_rcv_cb; topsrv->tipc_conn_new = tipc_subscrb_connect_cb; - topsrv->tipc_conn_shutdown = tipc_subscrb_shutdown_cb; + topsrv->tipc_conn_release = tipc_subscrb_release_cb; strncpy(topsrv->name, name, strlen(name) + 1); tn->topsrv = topsrv; diff --git a/net/tipc/udp_media.c b/net/tipc/udp_media.c index c9cf2be3674a..b016c011970b 100644 --- a/net/tipc/udp_media.c +++ b/net/tipc/udp_media.c @@ -63,7 +63,7 @@ */ struct udp_media_addr { __be16 proto; - __be16 udp_port; + __be16 port; union { struct in_addr ipv4; struct in6_addr ipv6; @@ -108,9 +108,9 @@ static int tipc_udp_addr2str(struct tipc_media_addr *a, char *buf, int size) struct udp_media_addr *ua = (struct udp_media_addr *)&a->value; if (ntohs(ua->proto) == ETH_P_IP) - snprintf(buf, size, "%pI4:%u", &ua->ipv4, ntohs(ua->udp_port)); + snprintf(buf, size, "%pI4:%u", &ua->ipv4, ntohs(ua->port)); else if (ntohs(ua->proto) == ETH_P_IPV6) - snprintf(buf, size, "%pI6:%u", &ua->ipv6, ntohs(ua->udp_port)); + snprintf(buf, size, "%pI6:%u", &ua->ipv6, ntohs(ua->port)); else pr_err("Invalid UDP media address\n"); return 0; @@ -178,8 +178,8 @@ static int tipc_udp_send_msg(struct net *net, struct sk_buff *skb, skb->dev = rt->dst.dev; ttl = ip4_dst_hoplimit(&rt->dst); udp_tunnel_xmit_skb(rt, ub->ubsock->sk, skb, src->ipv4.s_addr, - dst->ipv4.s_addr, 0, ttl, 0, src->udp_port, - dst->udp_port, false, true); + dst->ipv4.s_addr, 0, ttl, 0, src->port, + dst->port, false, true); #if IS_ENABLED(CONFIG_IPV6) } else { struct dst_entry *ndst; @@ -196,8 +196,8 @@ static int tipc_udp_send_msg(struct net *net, struct sk_buff *skb, ttl = ip6_dst_hoplimit(ndst); err = udp_tunnel6_xmit_skb(ndst, ub->ubsock->sk, skb, ndst->dev, &src->ipv6, - &dst->ipv6, 0, ttl, 0, src->udp_port, - dst->udp_port, false); + &dst->ipv6, 0, ttl, 0, src->port, + dst->port, false); #endif } return err; @@ -292,12 +292,12 @@ err: ip4 = (struct sockaddr_in *)&sa_local; local->proto = htons(ETH_P_IP); - local->udp_port = ip4->sin_port; + local->port = ip4->sin_port; local->ipv4.s_addr = ip4->sin_addr.s_addr; ip4 = (struct sockaddr_in *)&sa_remote; remote->proto = htons(ETH_P_IP); - remote->udp_port = ip4->sin_port; + remote->port = ip4->sin_port; remote->ipv4.s_addr = ip4->sin_addr.s_addr; return 0; @@ -312,13 +312,13 @@ err: return -EINVAL; local->proto = htons(ETH_P_IPV6); - local->udp_port = ip6->sin6_port; + local->port = ip6->sin6_port; memcpy(&local->ipv6, &ip6->sin6_addr, sizeof(struct in6_addr)); ub->ifindex = ip6->sin6_scope_id; ip6 = (struct sockaddr_in6 *)&sa_remote; remote->proto = htons(ETH_P_IPV6); - remote->udp_port = ip6->sin6_port; + remote->port = ip6->sin6_port; memcpy(&remote->ipv6, &ip6->sin6_addr, sizeof(struct in6_addr)); return 0; #endif @@ -386,7 +386,7 @@ static int tipc_udp_enable(struct net *net, struct tipc_bearer *b, err = -EAFNOSUPPORT; goto err; } - udp_conf.local_udp_port = local.udp_port; + udp_conf.local_udp_port = local.port; err = udp_sock_create(net, &udp_conf, &ub->ubsock); if (err) goto err; |