summaryrefslogtreecommitdiff
path: root/net/ipv4/tcp_timer.c
diff options
context:
space:
mode:
Diffstat (limited to 'net/ipv4/tcp_timer.c')
-rw-r--r--net/ipv4/tcp_timer.c89
1 files changed, 73 insertions, 16 deletions
diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c
index b144a26359bc..440a5c6004f6 100644
--- a/net/ipv4/tcp_timer.c
+++ b/net/ipv4/tcp_timer.c
@@ -19,6 +19,7 @@
*/
#include <linux/module.h>
+#include <linux/gfp.h>
#include <net/tcp.h>
int sysctl_tcp_syn_retries __read_mostly = TCP_SYN_RETRIES;
@@ -29,6 +30,7 @@ int sysctl_tcp_keepalive_intvl __read_mostly = TCP_KEEPALIVE_INTVL;
int sysctl_tcp_retries1 __read_mostly = TCP_RETR1;
int sysctl_tcp_retries2 __read_mostly = TCP_RETR2;
int sysctl_tcp_orphan_retries __read_mostly;
+int sysctl_tcp_thin_linear_timeouts __read_mostly;
static void tcp_write_timer(unsigned long);
static void tcp_delack_timer(unsigned long);
@@ -132,22 +134,52 @@ static void tcp_mtu_probing(struct inet_connection_sock *icsk, struct sock *sk)
}
}
+/* This function calculates a "timeout" which is equivalent to the timeout of a
+ * TCP connection after "boundary" unsuccessful, exponentially backed-off
+ * retransmissions with an initial RTO of TCP_RTO_MIN.
+ */
+static bool retransmits_timed_out(struct sock *sk,
+ unsigned int boundary)
+{
+ unsigned int timeout, linear_backoff_thresh;
+ unsigned int start_ts;
+
+ if (!inet_csk(sk)->icsk_retransmits)
+ return false;
+
+ if (unlikely(!tcp_sk(sk)->retrans_stamp))
+ start_ts = TCP_SKB_CB(tcp_write_queue_head(sk))->when;
+ else
+ start_ts = tcp_sk(sk)->retrans_stamp;
+
+ linear_backoff_thresh = ilog2(TCP_RTO_MAX/TCP_RTO_MIN);
+
+ if (boundary <= linear_backoff_thresh)
+ timeout = ((2 << boundary) - 1) * TCP_RTO_MIN;
+ else
+ timeout = ((2 << linear_backoff_thresh) - 1) * TCP_RTO_MIN +
+ (boundary - linear_backoff_thresh) * TCP_RTO_MAX;
+
+ return (tcp_time_stamp - start_ts) >= timeout;
+}
+
/* A write timeout has occurred. Process the after effects. */
static int tcp_write_timeout(struct sock *sk)
{
struct inet_connection_sock *icsk = inet_csk(sk);
int retry_until;
+ bool do_reset;
if ((1 << sk->sk_state) & (TCPF_SYN_SENT | TCPF_SYN_RECV)) {
if (icsk->icsk_retransmits)
- dst_negative_advice(&sk->sk_dst_cache);
+ dst_negative_advice(sk);
retry_until = icsk->icsk_syn_retries ? : sysctl_tcp_syn_retries;
} else {
- if (icsk->icsk_retransmits >= sysctl_tcp_retries1) {
+ if (retransmits_timed_out(sk, sysctl_tcp_retries1)) {
/* Black hole detection */
tcp_mtu_probing(icsk, sk);
- dst_negative_advice(&sk->sk_dst_cache);
+ dst_negative_advice(sk);
}
retry_until = sysctl_tcp_retries2;
@@ -155,13 +187,15 @@ static int tcp_write_timeout(struct sock *sk)
const int alive = (icsk->icsk_rto < TCP_RTO_MAX);
retry_until = tcp_orphan_retries(sk, alive);
+ do_reset = alive ||
+ !retransmits_timed_out(sk, retry_until);
- if (tcp_out_of_resources(sk, alive || icsk->icsk_retransmits < retry_until))
+ if (tcp_out_of_resources(sk, do_reset))
return 1;
}
}
- if (icsk->icsk_retransmits >= retry_until) {
+ if (retransmits_timed_out(sk, retry_until)) {
/* Has it gone just too far? */
tcp_write_err(sk);
return 1;
@@ -279,7 +313,7 @@ static void tcp_probe_timer(struct sock *sk)
* The TCP retransmit timer.
*/
-static void tcp_retransmit_timer(struct sock *sk)
+void tcp_retransmit_timer(struct sock *sk)
{
struct tcp_sock *tp = tcp_sk(sk);
struct inet_connection_sock *icsk = inet_csk(sk);
@@ -300,15 +334,15 @@ static void tcp_retransmit_timer(struct sock *sk)
struct inet_sock *inet = inet_sk(sk);
if (sk->sk_family == AF_INET) {
LIMIT_NETDEBUG(KERN_DEBUG "TCP: Peer %pI4:%u/%u unexpectedly shrunk window %u:%u (repaired)\n",
- &inet->daddr, ntohs(inet->dport),
- inet->num, tp->snd_una, tp->snd_nxt);
+ &inet->inet_daddr, ntohs(inet->inet_dport),
+ inet->inet_num, tp->snd_una, tp->snd_nxt);
}
#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
else if (sk->sk_family == AF_INET6) {
struct ipv6_pinfo *np = inet6_sk(sk);
LIMIT_NETDEBUG(KERN_DEBUG "TCP: Peer %pI6:%u/%u unexpectedly shrunk window %u:%u (repaired)\n",
- &np->daddr, ntohs(inet->dport),
- inet->num, tp->snd_una, tp->snd_nxt);
+ &np->daddr, ntohs(inet->inet_dport),
+ inet->inet_num, tp->snd_una, tp->snd_nxt);
}
#endif
#endif
@@ -383,9 +417,27 @@ static void tcp_retransmit_timer(struct sock *sk)
icsk->icsk_retransmits++;
out_reset_timer:
- icsk->icsk_rto = min(icsk->icsk_rto << 1, TCP_RTO_MAX);
+ /* If stream is thin, use linear timeouts. Since 'icsk_backoff' is
+ * used to reset timer, set to 0. Recalculate 'icsk_rto' as this
+ * might be increased if the stream oscillates between thin and thick,
+ * thus the old value might already be too high compared to the value
+ * set by 'tcp_set_rto' in tcp_input.c which resets the rto without
+ * backoff. Limit to TCP_THIN_LINEAR_RETRIES before initiating
+ * exponential backoff behaviour to avoid continue hammering
+ * linear-timeout retransmissions into a black hole
+ */
+ if (sk->sk_state == TCP_ESTABLISHED &&
+ (tp->thin_lto || sysctl_tcp_thin_linear_timeouts) &&
+ tcp_stream_is_thin(tp) &&
+ icsk->icsk_retransmits <= TCP_THIN_LINEAR_RETRIES) {
+ icsk->icsk_backoff = 0;
+ icsk->icsk_rto = min(__tcp_set_rto(tp), TCP_RTO_MAX);
+ } else {
+ /* Use normal (exponential) backoff */
+ icsk->icsk_rto = min(icsk->icsk_rto << 1, TCP_RTO_MAX);
+ }
inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, icsk->icsk_rto, TCP_RTO_MAX);
- if (icsk->icsk_retransmits > sysctl_tcp_retries1)
+ if (retransmits_timed_out(sk, sysctl_tcp_retries1 + 1))
__sk_dst_reset(sk);
out:;
@@ -442,6 +494,12 @@ static void tcp_synack_timer(struct sock *sk)
TCP_TIMEOUT_INIT, TCP_RTO_MAX);
}
+void tcp_syn_ack_timeout(struct sock *sk, struct request_sock *req)
+{
+ NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPTIMEOUTS);
+}
+EXPORT_SYMBOL(tcp_syn_ack_timeout);
+
void tcp_set_keepalive(struct sock *sk, int val)
{
if ((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN))
@@ -459,7 +517,7 @@ static void tcp_keepalive_timer (unsigned long data)
struct sock *sk = (struct sock *) data;
struct inet_connection_sock *icsk = inet_csk(sk);
struct tcp_sock *tp = tcp_sk(sk);
- __u32 elapsed;
+ u32 elapsed;
/* Only process if socket is not in use. */
bh_lock_sock(sk);
@@ -496,11 +554,10 @@ static void tcp_keepalive_timer (unsigned long data)
if (tp->packets_out || tcp_send_head(sk))
goto resched;
- elapsed = tcp_time_stamp - tp->rcv_tstamp;
+ elapsed = keepalive_time_elapsed(tp);
if (elapsed >= keepalive_time_when(tp)) {
- if ((!tp->keepalive_probes && icsk->icsk_probes_out >= sysctl_tcp_keepalive_probes) ||
- (tp->keepalive_probes && icsk->icsk_probes_out >= tp->keepalive_probes)) {
+ if (icsk->icsk_probes_out >= keepalive_probes(tp)) {
tcp_send_active_reset(sk, GFP_ATOMIC);
tcp_write_err(sk);
goto out;