summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorEric Dumazet <edumazet@google.com>2022-01-31 10:26:03 -0800
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>2022-02-08 18:23:12 +0100
commit0a70f118475e037732557796accd0878a00fc25a (patch)
tree267be30de634928f94569584d9802c7a29d0951c
parent8070f13e8f938d0160ac6305364c460b8981837a (diff)
tcp: fix possible socket leaks in internal pacing mode
This patch is addressing an issue in stable linux-4.19 only. In linux-4.20, TCP stack adopted EDT (Earliest Departure Time) model and this issue was incidentally fixed. Issue at hand was an extra sock_hold() from tcp_internal_pacing() in paths not using tcp_xmit_retransmit_queue() Jason Xing reported this leak and provided a patch stopping the extra sock_hold() to happen. This patch is more complete and makes sure to avoid unnecessary extra delays, by reprogramming the high resolution timer. Fixes: 73a6bab5aa2a ("tcp: switch pacing timer to softirq based hrtimer") Reference: https://lore.kernel.org/all/CANn89i+7-wE4xr5D9DpH+N-xkL1SB8oVghCKgz+CT5eG1ODQhA@mail.gmail.com/ Signed-off-by: Eric Dumazet <edumazet@google.com> Reported-by: Jason Xing <kerneljasonxing@gmail.com> Reported-by: Zhang Changzhong <zhangchangzhong@huawei.com> Cc: liweishi <liweishi@kuaishou.com> Cc: Shujin Li <lishujin@kuaishou.com> Cc: Neal Cardwell <ncardwell@google.com> Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
-rw-r--r--net/ipv4/tcp_output.c31
1 files changed, 23 insertions, 8 deletions
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 941c655cad91..c97c027a8d77 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -968,6 +968,8 @@ enum hrtimer_restart tcp_pace_kick(struct hrtimer *timer)
static void tcp_internal_pacing(struct sock *sk, const struct sk_buff *skb)
{
+ struct tcp_sock *tp = tcp_sk(sk);
+ ktime_t expire, now;
u64 len_ns;
u32 rate;
@@ -979,12 +981,28 @@ static void tcp_internal_pacing(struct sock *sk, const struct sk_buff *skb)
len_ns = (u64)skb->len * NSEC_PER_SEC;
do_div(len_ns, rate);
- hrtimer_start(&tcp_sk(sk)->pacing_timer,
- ktime_add_ns(ktime_get(), len_ns),
+ now = ktime_get();
+ /* If hrtimer is already armed, then our caller has not
+ * used tcp_pacing_check().
+ */
+ if (unlikely(hrtimer_is_queued(&tp->pacing_timer))) {
+ expire = hrtimer_get_softexpires(&tp->pacing_timer);
+ if (ktime_after(expire, now))
+ now = expire;
+ if (hrtimer_try_to_cancel(&tp->pacing_timer) == 1)
+ __sock_put(sk);
+ }
+ hrtimer_start(&tp->pacing_timer, ktime_add_ns(now, len_ns),
HRTIMER_MODE_ABS_PINNED_SOFT);
sock_hold(sk);
}
+static bool tcp_pacing_check(const struct sock *sk)
+{
+ return tcp_needs_internal_pacing(sk) &&
+ hrtimer_is_queued(&tcp_sk(sk)->pacing_timer);
+}
+
static void tcp_update_skb_after_send(struct tcp_sock *tp, struct sk_buff *skb)
{
skb->skb_mstamp = tp->tcp_mstamp;
@@ -2121,6 +2139,9 @@ static int tcp_mtu_probe(struct sock *sk)
if (!tcp_can_coalesce_send_queue_head(sk, probe_size))
return -1;
+ if (tcp_pacing_check(sk))
+ return -1;
+
/* We're allowed to probe. Build it now. */
nskb = sk_stream_alloc_skb(sk, probe_size, GFP_ATOMIC, false);
if (!nskb)
@@ -2194,12 +2215,6 @@ static int tcp_mtu_probe(struct sock *sk)
return -1;
}
-static bool tcp_pacing_check(const struct sock *sk)
-{
- return tcp_needs_internal_pacing(sk) &&
- hrtimer_is_queued(&tcp_sk(sk)->pacing_timer);
-}
-
/* TCP Small Queues :
* Control number of packets in qdisc/devices to two packets / or ~1 ms.
* (These limits are doubled for retransmits)