summaryrefslogtreecommitdiff
path: root/net/ipv4
diff options
context:
space:
mode:
authorEric Dumazet <edumazet@google.com>2020-05-12 06:54:30 -0700
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>2020-05-20 08:20:10 +0200
commit23300d6a39d7a1fa3cd6f8421d70e406cf1b85d0 (patch)
treee4ec04e3385207d060b2275abc3e9dcc476dbe8a /net/ipv4
parentcb4f789860659186b55955e5629141612a574aee (diff)
tcp: fix SO_RCVLOWAT hangs with fat skbs
[ Upstream commit 24adbc1676af4e134e709ddc7f34cf2adc2131e4 ] We autotune rcvbuf whenever SO_RCVLOWAT is set to account for 100% overhead in tcp_set_rcvlowat() This works well when skb->len/skb->truesize ratio is bigger than 0.5 But if we receive packets with small MSS, we can end up in a situation where not enough bytes are available in the receive queue to satisfy RCVLOWAT setting. As our sk_rcvbuf limit is hit, we send zero windows in ACK packets, preventing remote peer from sending more data. Even autotuning does not help, because it only triggers at the time user process drains the queue. If no EPOLLIN is generated, this can not happen. Note poll() has a similar issue, after commit c7004482e8dc ("tcp: Respect SO_RCVLOWAT in tcp_poll().") Fixes: 03f45c883c6f ("tcp: avoid extra wakeups for SO_RCVLOWAT users") Signed-off-by: Eric Dumazet <edumazet@google.com> Acked-by: Soheil Hassas Yeganeh <soheil@google.com> Signed-off-by: David S. Miller <davem@davemloft.net> Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Diffstat (limited to 'net/ipv4')
-rw-r--r--net/ipv4/tcp.c14
-rw-r--r--net/ipv4/tcp_input.c3
2 files changed, 13 insertions, 4 deletions
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 83829764773b..2a658c2c8903 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -477,9 +477,17 @@ static void tcp_tx_timestamp(struct sock *sk, u16 tsflags)
static inline bool tcp_stream_is_readable(const struct tcp_sock *tp,
int target, struct sock *sk)
{
- return (READ_ONCE(tp->rcv_nxt) - READ_ONCE(tp->copied_seq) >= target) ||
- (sk->sk_prot->stream_memory_read ?
- sk->sk_prot->stream_memory_read(sk) : false);
+ int avail = READ_ONCE(tp->rcv_nxt) - READ_ONCE(tp->copied_seq);
+
+ if (avail > 0) {
+ if (avail >= target)
+ return true;
+ if (tcp_rmem_pressure(sk))
+ return true;
+ }
+ if (sk->sk_prot->stream_memory_read)
+ return sk->sk_prot->stream_memory_read(sk);
+ return false;
}
/*
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 5af22c9712a6..677facbeed26 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -4751,7 +4751,8 @@ void tcp_data_ready(struct sock *sk)
const struct tcp_sock *tp = tcp_sk(sk);
int avail = tp->rcv_nxt - tp->copied_seq;
- if (avail < sk->sk_rcvlowat && !sock_flag(sk, SOCK_DONE))
+ if (avail < sk->sk_rcvlowat && !tcp_rmem_pressure(sk) &&
+ !sock_flag(sk, SOCK_DONE))
return;
sk->sk_data_ready(sk);