From cad2929dc4321b1f237767e9bd271b61a2eaa752 Mon Sep 17 00:00:00 2001 From: Hoang Huu Le Date: Wed, 17 Jun 2020 13:56:05 +0700 Subject: tipc: update a binding service via broadcast Currently, updating binding table (add service binding to name table/withdraw a service binding) is being sent over replicast. However, if we are scaling up clusters to > 100 nodes/containers this method is less affection because of looping through nodes in a cluster one by one. It is worth to use broadcast to update a binding service. This way, the binding table can be updated on all peer nodes in one shot. Broadcast is used when all peer nodes, as indicated by a new capability flag TIPC_NAMED_BCAST, support reception of this message type. Four problems need to be considered when introducing this feature. 1) When establishing a link to a new peer node we still update this by a unicast 'bulk' update. This may lead to race conditions, where a later broadcast publication/withdrawal bypass the 'bulk', resulting in disordered publications, or even that a withdrawal may arrive before the corresponding publication. We solve this by adding an 'is_last_bulk' bit in the last bulk messages so that it can be distinguished from all other messages. Only when this message has arrived do we open up for reception of broadcast publications/withdrawals. 2) When a first legacy node is added to the cluster all distribution will switch over to use the legacy 'replicast' method, while the opposite happens when the last legacy node leaves the cluster. This entails another risk of message disordering that has to be handled. We solve this by adding a sequence number to the broadcast/replicast messages, so that disordering can be discovered and corrected. Note however that we don't need to consider potential message loss or duplication at this protocol level. 3) Bulk messages don't contain any sequence numbers, and will always arrive in order. Hence we must exempt those from the sequence number control and deliver them unconditionally. We solve this by adding a new 'is_bulk' bit in those messages so that they can be recognized. 4) Legacy messages, which don't contain any new bits or sequence numbers, but neither can arrive out of order, also need to be exempt from the initial synchronization and sequence number check, and delivered unconditionally. Therefore, we add another 'is_not_legacy' bit to all new messages so that those can be distinguished from legacy messages and the latter delivered directly. v1->v2: - fix warning issue reported by kbuild test robot - add santiy check to drop the publication message with a sequence number that is lower than the agreed synch point Signed-off-by: kernel test robot Signed-off-by: Hoang Huu Le Acked-by: Jon Maloy Signed-off-by: David S. Miller --- net/tipc/link.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net/tipc/link.c') diff --git a/net/tipc/link.c b/net/tipc/link.c index ee3b8d0576b8..eac89a3e22ce 100644 --- a/net/tipc/link.c +++ b/net/tipc/link.c @@ -2745,7 +2745,7 @@ int tipc_nl_add_bc_link(struct net *net, struct tipc_nl_msg *msg, void *hdr; struct nlattr *attrs; struct nlattr *prop; - u32 bc_mode = tipc_bcast_get_broadcast_mode(net); + u32 bc_mode = tipc_bcast_get_mode(net); u32 bc_ratio = tipc_bcast_get_broadcast_ratio(net); if (!bcl) -- cgit v1.2.3 From e034c6d23bc43266af1fa983212218f4aa38f995 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Thu, 18 Jun 2020 08:35:00 -0500 Subject: tipc: Use struct_size() helper Make use of the struct_size() helper instead of an open-coded version in order to avoid any potential type mistakes. This code was detected with the help of Coccinelle and, audited and fixed manually. Signed-off-by: Gustavo A. R. Silva Signed-off-by: David S. Miller --- net/tipc/link.c | 8 ++++---- net/tipc/msg.h | 6 ++---- 2 files changed, 6 insertions(+), 8 deletions(-) (limited to 'net/tipc/link.c') diff --git a/net/tipc/link.c b/net/tipc/link.c index eac89a3e22ce..1c579357ccdf 100644 --- a/net/tipc/link.c +++ b/net/tipc/link.c @@ -1385,12 +1385,12 @@ u16 tipc_get_gap_ack_blks(struct tipc_gap_ack_blks **ga, struct tipc_link *l, p = (struct tipc_gap_ack_blks *)msg_data(hdr); sz = ntohs(p->len); /* Sanity check */ - if (sz == tipc_gap_ack_blks_sz(p->ugack_cnt + p->bgack_cnt)) { + if (sz == struct_size(p, gacks, p->ugack_cnt + p->bgack_cnt)) { /* Good, check if the desired type exists */ if ((uc && p->ugack_cnt) || (!uc && p->bgack_cnt)) goto ok; /* Backward compatible: peer might not support bc, but uc? */ - } else if (uc && sz == tipc_gap_ack_blks_sz(p->ugack_cnt)) { + } else if (uc && sz == struct_size(p, gacks, p->ugack_cnt)) { if (p->ugack_cnt) { p->bgack_cnt = 0; goto ok; @@ -1472,7 +1472,7 @@ static u16 tipc_build_gap_ack_blks(struct tipc_link *l, struct tipc_msg *hdr) __tipc_build_gap_ack_blks(ga, l, ga->bgack_cnt) : 0; /* Total len */ - len = tipc_gap_ack_blks_sz(ga->bgack_cnt + ga->ugack_cnt); + len = struct_size(ga, gacks, ga->bgack_cnt + ga->ugack_cnt); ga->len = htons(len); return len; } @@ -1521,7 +1521,7 @@ static int tipc_link_advance_transmq(struct tipc_link *l, struct tipc_link *r, gacks = &ga->gacks[ga->bgack_cnt]; } else if (ga) { /* Copy the Gap ACKs, bc part, for later renewal if needed */ - this_ga = kmemdup(ga, tipc_gap_ack_blks_sz(ga->bgack_cnt), + this_ga = kmemdup(ga, struct_size(ga, gacks, ga->bgack_cnt), GFP_ATOMIC); if (likely(this_ga)) { this_ga->start_index = 0; diff --git a/net/tipc/msg.h b/net/tipc/msg.h index 65119e81ff0c..1016e96db5c4 100644 --- a/net/tipc/msg.h +++ b/net/tipc/msg.h @@ -189,11 +189,9 @@ struct tipc_gap_ack_blks { struct tipc_gap_ack gacks[]; }; -#define tipc_gap_ack_blks_sz(n) (sizeof(struct tipc_gap_ack_blks) + \ - sizeof(struct tipc_gap_ack) * (n)) - #define MAX_GAP_ACK_BLKS 128 -#define MAX_GAP_ACK_BLKS_SZ tipc_gap_ack_blks_sz(MAX_GAP_ACK_BLKS) +#define MAX_GAP_ACK_BLKS_SZ (sizeof(struct tipc_gap_ack_blks) + \ + sizeof(struct tipc_gap_ack) * MAX_GAP_ACK_BLKS) static inline struct tipc_msg *buf_msg(struct sk_buff *skb) { -- cgit v1.2.3 From a34f829164f3c70d7f53bb532ddcc39fa890b722 Mon Sep 17 00:00:00 2001 From: Hamish Martin Date: Thu, 9 Jul 2020 09:06:44 +1200 Subject: tipc: fix retransmission on unicast links A scenario has been observed where a 'bc_init' message for a link is not retransmitted if it fails to be received by the peer. This leads to the peer never establishing the link fully and it discarding all other data received on the link. In this scenario the message is lost in transit to the peer. The issue is traced to the 'nxt_retr' field of the skb not being initialised for links that aren't a bc_sndlink. This leads to the comparison in tipc_link_advance_transmq() that gates whether to attempt retransmission of a message performing in an undesirable way. Depending on the relative value of 'jiffies', this comparison: time_before(jiffies, TIPC_SKB_CB(skb)->nxt_retr) may return true or false given that 'nxt_retr' remains at the uninitialised value of 0 for non bc_sndlinks. This is most noticeable shortly after boot when jiffies is initialised to a high value (to flush out rollover bugs) and we compare a jiffies of, say, 4294940189 to zero. In that case time_before returns 'true' leading to the skb not being retransmitted. The fix is to ensure that all skbs have a valid 'nxt_retr' time set for them and this is achieved by refactoring the setting of this value into a central function. With this fix, transmission losses of 'bc_init' messages do not stall the link establishment forever because the 'bc_init' message is retransmitted and the link eventually establishes correctly. Fixes: 382f598fb66b ("tipc: reduce duplicate packets for unicast traffic") Acked-by: Jon Maloy Signed-off-by: Hamish Martin Signed-off-by: David S. Miller --- net/tipc/link.c | 26 ++++++++++++++++++-------- 1 file changed, 18 insertions(+), 8 deletions(-) (limited to 'net/tipc/link.c') diff --git a/net/tipc/link.c b/net/tipc/link.c index ee3b8d0576b8..263d950e70e9 100644 --- a/net/tipc/link.c +++ b/net/tipc/link.c @@ -921,6 +921,21 @@ static void link_prepare_wakeup(struct tipc_link *l) } +/** + * tipc_link_set_skb_retransmit_time - set the time at which retransmission of + * the given skb should be next attempted + * @skb: skb to set a future retransmission time for + * @l: link the skb will be transmitted on + */ +static void tipc_link_set_skb_retransmit_time(struct sk_buff *skb, + struct tipc_link *l) +{ + if (link_is_bc_sndlink(l)) + TIPC_SKB_CB(skb)->nxt_retr = TIPC_BC_RETR_LIM; + else + TIPC_SKB_CB(skb)->nxt_retr = TIPC_UC_RETR_TIME; +} + void tipc_link_reset(struct tipc_link *l) { struct sk_buff_head list; @@ -1036,9 +1051,7 @@ int tipc_link_xmit(struct tipc_link *l, struct sk_buff_head *list, return -ENOBUFS; } __skb_queue_tail(transmq, skb); - /* next retransmit attempt */ - if (link_is_bc_sndlink(l)) - TIPC_SKB_CB(skb)->nxt_retr = TIPC_BC_RETR_LIM; + tipc_link_set_skb_retransmit_time(skb, l); __skb_queue_tail(xmitq, _skb); TIPC_SKB_CB(skb)->ackers = l->ackers; l->rcv_unacked = 0; @@ -1139,9 +1152,7 @@ static void tipc_link_advance_backlog(struct tipc_link *l, if (unlikely(skb == l->backlog[imp].target_bskb)) l->backlog[imp].target_bskb = NULL; __skb_queue_tail(&l->transmq, skb); - /* next retransmit attempt */ - if (link_is_bc_sndlink(l)) - TIPC_SKB_CB(skb)->nxt_retr = TIPC_BC_RETR_LIM; + tipc_link_set_skb_retransmit_time(skb, l); __skb_queue_tail(xmitq, _skb); TIPC_SKB_CB(skb)->ackers = l->ackers; @@ -1584,8 +1595,7 @@ release: /* retransmit skb if unrestricted*/ if (time_before(jiffies, TIPC_SKB_CB(skb)->nxt_retr)) continue; - TIPC_SKB_CB(skb)->nxt_retr = (is_uc) ? - TIPC_UC_RETR_TIME : TIPC_BC_RETR_LIM; + tipc_link_set_skb_retransmit_time(skb, l); _skb = pskb_copy(skb, GFP_ATOMIC); if (!_skb) continue; -- cgit v1.2.3 From d8141208b032eaee0efeacaadf1734f65db73ac5 Mon Sep 17 00:00:00 2001 From: Andrew Lunn Date: Mon, 13 Jul 2020 01:15:14 +0200 Subject: net: tipc: kerneldoc fixes Simple fixes which require no deep knowledge of the code. Cc: Jon Maloy Cc: Ying Xue Signed-off-by: Andrew Lunn Acked-by: Jon Maloy Signed-off-by: David S. Miller --- net/tipc/bearer.c | 2 +- net/tipc/discover.c | 5 ++--- net/tipc/link.c | 6 +++--- net/tipc/msg.c | 2 +- net/tipc/node.c | 4 ++-- net/tipc/socket.c | 8 +++----- net/tipc/udp_media.c | 2 +- 7 files changed, 13 insertions(+), 16 deletions(-) (limited to 'net/tipc/link.c') diff --git a/net/tipc/bearer.c b/net/tipc/bearer.c index e366ec9a7e4d..808b147df7d5 100644 --- a/net/tipc/bearer.c +++ b/net/tipc/bearer.c @@ -595,7 +595,7 @@ void tipc_bearer_bc_xmit(struct net *net, u32 bearer_id, /** * tipc_l2_rcv_msg - handle incoming TIPC message from an interface - * @buf: the received packet + * @skb: the received message * @dev: the net device that the packet was received on * @pt: the packet_type structure which was used to register this handler * @orig_dev: the original receive net device in case the device is a bond diff --git a/net/tipc/discover.c b/net/tipc/discover.c index bfe43da127c0..d4ecacddb40c 100644 --- a/net/tipc/discover.c +++ b/net/tipc/discover.c @@ -74,7 +74,7 @@ struct tipc_discoverer { /** * tipc_disc_init_msg - initialize a link setup message * @net: the applicable net namespace - * @type: message type (request or response) + * @mtyp: message type (request or response) * @b: ptr to bearer issuing message */ static void tipc_disc_init_msg(struct net *net, struct sk_buff *skb, @@ -339,7 +339,7 @@ exit: * @net: the applicable net namespace * @b: ptr to bearer issuing requests * @dest: destination address for request messages - * @dest_domain: network domain to which links can be established + * @skb: pointer to created frame * * Returns 0 if successful, otherwise -errno. */ @@ -393,7 +393,6 @@ void tipc_disc_delete(struct tipc_discoverer *d) * tipc_disc_reset - reset object to send periodic link setup requests * @net: the applicable net namespace * @b: ptr to bearer issuing requests - * @dest_domain: network domain to which links can be established */ void tipc_disc_reset(struct net *net, struct tipc_bearer *b) { diff --git a/net/tipc/link.c b/net/tipc/link.c index f1d9c33dae72..d46d4ee5c4fd 100644 --- a/net/tipc/link.c +++ b/net/tipc/link.c @@ -445,7 +445,7 @@ u32 tipc_link_state(struct tipc_link *l) /** * tipc_link_create - create a new link - * @n: pointer to associated node + * @net: pointer to associated network namespace * @if_name: associated interface name * @bearer_id: id (index) of associated bearer * @tolerance: link tolerance to be used by link @@ -530,7 +530,7 @@ bool tipc_link_create(struct net *net, char *if_name, int bearer_id, /** * tipc_link_bc_create - create new link to be used for broadcast - * @n: pointer to associated node + * @net: pointer to associated network namespace * @mtu: mtu to be used initially if no peers * @window: send window to be used * @inputq: queue to put messages ready for delivery @@ -989,7 +989,7 @@ void tipc_link_reset(struct tipc_link *l) /** * tipc_link_xmit(): enqueue buffer list according to queue situation - * @link: link to use + * @l: link to use * @list: chain of buffers containing message * @xmitq: returned list of packets to be sent by caller * diff --git a/net/tipc/msg.c b/net/tipc/msg.c index 01b64869a173..848fae674532 100644 --- a/net/tipc/msg.c +++ b/net/tipc/msg.c @@ -202,7 +202,7 @@ err: /** * tipc_msg_append(): Append data to tail of an existing buffer queue - * @hdr: header to be used + * @_hdr: header to be used * @m: the data to be appended * @mss: max allowable size of buffer * @dlen: size of data to be appended diff --git a/net/tipc/node.c b/net/tipc/node.c index 030a51c4d1fa..4edcee3088da 100644 --- a/net/tipc/node.c +++ b/net/tipc/node.c @@ -1515,7 +1515,7 @@ static void node_lost_contact(struct tipc_node *n, * tipc_node_get_linkname - get the name of a link * * @bearer_id: id of the bearer - * @node: peer node address + * @addr: peer node address * @linkname: link name output buffer * * Returns 0 on success @@ -2022,7 +2022,7 @@ static bool tipc_node_check_state(struct tipc_node *n, struct sk_buff *skb, * tipc_rcv - process TIPC packets/messages arriving from off-node * @net: the applicable net namespace * @skb: TIPC packet - * @bearer: pointer to bearer message arrived on + * @b: pointer to bearer message arrived on * * Invoked with no locks held. Bearer pointer must point to a valid bearer * structure (i.e. cannot be NULL), but bearer can be inactive. diff --git a/net/tipc/socket.c b/net/tipc/socket.c index a94f38333698..fc388cef6471 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -711,7 +711,6 @@ exit: * tipc_getname - get port ID of socket or peer socket * @sock: socket structure * @uaddr: area for returned socket address - * @uaddr_len: area for returned length of socket address * @peer: 0 = own ID, 1 = current peer ID, 2 = current/former peer ID * * Returns 0 on success, errno otherwise @@ -1053,7 +1052,7 @@ static int tipc_send_group_anycast(struct socket *sock, struct msghdr *m, /** * tipc_send_group_bcast - send message to all members in communication group - * @sk: socket structure + * @sock: socket structure * @m: message to send * @dlen: total length of message data * @timeout: timeout to wait for wakeup @@ -1673,7 +1672,7 @@ static void tipc_sk_finish_conn(struct tipc_sock *tsk, u32 peer_port, /** * tipc_sk_set_orig_addr - capture sender's address for received message * @m: descriptor for message info - * @hdr: received message header + * @skb: received message * * Note: Address is not captured if not requested by receiver. */ @@ -2095,7 +2094,6 @@ static void tipc_write_space(struct sock *sk) /** * tipc_data_ready - wake up threads to indicate messages have been received * @sk: socket - * @len: the length of messages */ static void tipc_data_ready(struct sock *sk) { @@ -2677,7 +2675,7 @@ static int tipc_wait_for_accept(struct socket *sock, long timeo) /** * tipc_accept - wait for connection request * @sock: listening socket - * @newsock: new socket that is to be connected + * @new_sock: new socket that is to be connected * @flags: file-related flags associated with socket * * Returns 0 on success, errno otherwise diff --git a/net/tipc/udp_media.c b/net/tipc/udp_media.c index 28a283f26a8d..d91b7c543e39 100644 --- a/net/tipc/udp_media.c +++ b/net/tipc/udp_media.c @@ -565,7 +565,7 @@ msg_full: /** * tipc_parse_udp_addr - build udp media address from netlink data - * @nlattr: netlink attribute containing sockaddr storage aligned address + * @nla: netlink attribute containing sockaddr storage aligned address * @addr: tipc media address to fill with address, port and protocol type * @scope_id: IPv6 scope id pointer, not NULL indicates it's required */ -- cgit v1.2.3 From 6ef9dcb78046b346b5508ca1659848b136a343c2 Mon Sep 17 00:00:00 2001 From: Tung Nguyen Date: Tue, 21 Jul 2020 08:57:05 +0700 Subject: tipc: allow to build NACK message in link timeout function Commit 02288248b051 ("tipc: eliminate gap indicator from ACK messages") eliminated sending of the 'gap' indicator in regular ACK messages and only allowed to build NACK message with enabled probe/probe_reply. However, necessary correction for building NACK message was missed in tipc_link_timeout() function. This leads to significant delay and link reset (due to retransmission failure) in lossy environment. This commit fixes it by setting the 'probe' flag to 'true' when the receive deferred queue is not empty. As a result, NACK message will be built to send back to another peer. Fixes: 02288248b051 ("tipc: eliminate gap indicator from ACK messages") Acked-by: Jon Maloy Signed-off-by: Tung Nguyen Signed-off-by: David S. Miller --- net/tipc/link.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net/tipc/link.c') diff --git a/net/tipc/link.c b/net/tipc/link.c index 263d950e70e9..d40f8e5b7683 100644 --- a/net/tipc/link.c +++ b/net/tipc/link.c @@ -827,11 +827,11 @@ int tipc_link_timeout(struct tipc_link *l, struct sk_buff_head *xmitq) state |= l->bc_rcvlink->rcv_unacked; state |= l->rcv_unacked; state |= !skb_queue_empty(&l->transmq); - state |= !skb_queue_empty(&l->deferdq); probe = mstate->probing; probe |= l->silent_intv_cnt; if (probe || mstate->monitoring) l->silent_intv_cnt++; + probe |= !skb_queue_empty(&l->deferdq); if (l->snd_nxt == l->checkpoint) { tipc_link_update_cwin(l, 0, 0); probe = true; -- cgit v1.2.3