From c9655008e7845bcfdaac10a1ed8554ec167aea88 Mon Sep 17 00:00:00 2001 From: Pengcheng Yang Date: Mon, 30 Dec 2019 17:54:41 +0800 Subject: tcp: fix "old stuff" D-SACK causing SACK to be treated as D-SACK When we receive a D-SACK, where the sequence number satisfies: undo_marker <= start_seq < end_seq <= prior_snd_una we consider this is a valid D-SACK and tcp_is_sackblock_valid() returns true, then this D-SACK is discarded as "old stuff", but the variable first_sack_index is not marked as negative in tcp_sacktag_write_queue(). If this D-SACK also carries a SACK that needs to be processed (for example, the previous SACK segment was lost), this SACK will be treated as a D-SACK in the following processing of tcp_sacktag_write_queue(), which will eventually lead to incorrect updates of undo_retrans and reordering. Fixes: fd6dad616d4f ("[TCP]: Earlier SACK block verification & simplify access to them") Signed-off-by: Pengcheng Yang Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/tcp_input.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'net/ipv4/tcp_input.c') diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 88b987ca9ebb..0238b554a1f0 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -1727,8 +1727,11 @@ tcp_sacktag_write_queue(struct sock *sk, const struct sk_buff *ack_skb, } /* Ignore very old stuff early */ - if (!after(sp[used_sacks].end_seq, prior_snd_una)) + if (!after(sp[used_sacks].end_seq, prior_snd_una)) { + if (i == 0) + first_sack_index = -1; continue; + } used_sacks++; } -- cgit v1.2.3 From e176b1ba476cf36f723cfcc7a9e57f3cb47dec70 Mon Sep 17 00:00:00 2001 From: Pengcheng Yang Date: Tue, 14 Jan 2020 17:23:40 +0800 Subject: tcp: fix marked lost packets not being retransmitted When the packet pointed to by retransmit_skb_hint is unlinked by ACK, retransmit_skb_hint will be set to NULL in tcp_clean_rtx_queue(). If packet loss is detected at this time, retransmit_skb_hint will be set to point to the current packet loss in tcp_verify_retransmit_hint(), then the packets that were previously marked lost but not retransmitted due to the restriction of cwnd will be skipped and cannot be retransmitted. To fix this, when retransmit_skb_hint is NULL, retransmit_skb_hint can be reset only after all marked lost packets are retransmitted (retrans_out >= lost_out), otherwise we need to traverse from tcp_rtx_queue_head in tcp_xmit_retransmit_queue(). Packetdrill to demonstrate: // Disable RACK and set max_reordering to keep things simple 0 `sysctl -q net.ipv4.tcp_recovery=0` +0 `sysctl -q net.ipv4.tcp_max_reordering=3` // Establish a connection +0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 +0 bind(3, ..., ...) = 0 +0 listen(3, 1) = 0 +.1 < S 0:0(0) win 32792 +0 > S. 0:0(0) ack 1 <...> +.01 < . 1:1(0) ack 1 win 257 +0 accept(3, ..., ...) = 4 // Send 8 data segments +0 write(4, ..., 8000) = 8000 +0 > P. 1:8001(8000) ack 1 // Enter recovery and 1:3001 is marked lost +.01 < . 1:1(0) ack 1 win 257 +0 < . 1:1(0) ack 1 win 257 +0 < . 1:1(0) ack 1 win 257 // Retransmit 1:1001, now retransmit_skb_hint points to 1001:2001 +0 > . 1:1001(1000) ack 1 // 1001:2001 was ACKed causing retransmit_skb_hint to be set to NULL +.01 < . 1:1(0) ack 2001 win 257 // Now retransmit_skb_hint points to 4001:5001 which is now marked lost // BUG: 2001:3001 was not retransmitted +0 > . 2001:3001(1000) ack 1 Signed-off-by: Pengcheng Yang Acked-by: Neal Cardwell Tested-by: Neal Cardwell Signed-off-by: David S. Miller --- net/ipv4/tcp_input.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'net/ipv4/tcp_input.c') diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 0238b554a1f0..5347ab2c9c58 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -915,9 +915,10 @@ static void tcp_check_sack_reordering(struct sock *sk, const u32 low_seq, /* This must be called before lost_out is incremented */ static void tcp_verify_retransmit_hint(struct tcp_sock *tp, struct sk_buff *skb) { - if (!tp->retransmit_skb_hint || - before(TCP_SKB_CB(skb)->seq, - TCP_SKB_CB(tp->retransmit_skb_hint)->seq)) + if ((!tp->retransmit_skb_hint && tp->retrans_out >= tp->lost_out) || + (tp->retransmit_skb_hint && + before(TCP_SKB_CB(skb)->seq, + TCP_SKB_CB(tp->retransmit_skb_hint)->seq))) tp->retransmit_skb_hint = skb; } -- cgit v1.2.3