From c6214a97c86c660de4f7ddb8eed925192e646161 Mon Sep 17 00:00:00 2001 From: Nikolay Borisov Date: Wed, 3 Feb 2016 09:46:54 +0200 Subject: ipv4: Namespaceify tcp_retries2 sysctl knob Signed-off-by: Nikolay Borisov Signed-off-by: David S. Miller --- net/ipv4/tcp_output.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'net/ipv4/tcp_output.c') diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index fda379cd600d..7beb3f688b7a 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -3476,6 +3476,7 @@ void tcp_send_probe0(struct sock *sk) { struct inet_connection_sock *icsk = inet_csk(sk); struct tcp_sock *tp = tcp_sk(sk); + struct net *net = sock_net(sk); unsigned long probe_max; int err; @@ -3489,7 +3490,7 @@ void tcp_send_probe0(struct sock *sk) } if (err <= 0) { - if (icsk->icsk_backoff < sysctl_tcp_retries2) + if (icsk->icsk_backoff < net->ipv4.sysctl_tcp_retries2) icsk->icsk_backoff++; icsk->icsk_probes_out++; probe_max = TCP_RTO_MAX; -- cgit v1.2.3 From 4979f2d9f7262b9b180bc83de8d70f7a7721c085 Mon Sep 17 00:00:00 2001 From: Nikolay Borisov Date: Wed, 3 Feb 2016 09:46:57 +0200 Subject: ipv4: Namespaceify tcp_notsent_lowat sysctl knob Signed-off-by: Nikolay Borisov Signed-off-by: David S. Miller --- include/net/netns/ipv4.h | 1 + include/net/tcp.h | 4 ++-- net/ipv4/sysctl_net_ipv4.c | 14 +++++++------- net/ipv4/tcp_ipv4.c | 1 + net/ipv4/tcp_output.c | 3 --- 5 files changed, 11 insertions(+), 12 deletions(-) (limited to 'net/ipv4/tcp_output.c') diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h index de5ff4385e84..4d6ec3f6fafe 100644 --- a/include/net/netns/ipv4.h +++ b/include/net/netns/ipv4.h @@ -106,6 +106,7 @@ struct netns_ipv4 { int sysctl_tcp_retries2; int sysctl_tcp_orphan_retries; int sysctl_tcp_fin_timeout; + unsigned int sysctl_tcp_notsent_lowat; struct ping_group_range ping_group_range; diff --git a/include/net/tcp.h b/include/net/tcp.h index 3f160c2e6960..9b2cb0c8d876 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -267,7 +267,6 @@ extern int sysctl_tcp_thin_dupack; extern int sysctl_tcp_early_retrans; extern int sysctl_tcp_limit_output_bytes; extern int sysctl_tcp_challenge_ack_limit; -extern unsigned int sysctl_tcp_notsent_lowat; extern int sysctl_tcp_min_tso_segs; extern int sysctl_tcp_min_rtt_wlen; extern int sysctl_tcp_autocorking; @@ -1682,7 +1681,8 @@ void __tcp_v4_send_check(struct sk_buff *skb, __be32 saddr, __be32 daddr); static inline u32 tcp_notsent_lowat(const struct tcp_sock *tp) { - return tp->notsent_lowat ?: sysctl_tcp_notsent_lowat; + struct net *net = sock_net((struct sock *)tp); + return tp->notsent_lowat ?: net->ipv4.sysctl_tcp_notsent_lowat; } static inline bool tcp_stream_memory_free(const struct sock *sk) diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index 8bd335a2cba8..44bb59824267 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -455,13 +455,6 @@ static struct ctl_table ipv4_table[] = { .proc_handler = proc_dointvec_minmax, .extra1 = &one, }, - { - .procname = "tcp_notsent_lowat", - .data = &sysctl_tcp_notsent_lowat, - .maxlen = sizeof(sysctl_tcp_notsent_lowat), - .mode = 0644, - .proc_handler = proc_dointvec, - }, { .procname = "tcp_rmem", .data = &sysctl_tcp_rmem, @@ -960,6 +953,13 @@ static struct ctl_table ipv4_net_table[] = { .mode = 0644, .proc_handler = proc_dointvec_jiffies, }, + { + .procname = "tcp_notsent_lowat", + .data = &init_net.ipv4.sysctl_tcp_notsent_lowat, + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = proc_dointvec, + }, { } }; diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 36c83c28d9c9..11ae706f53a1 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -2395,6 +2395,7 @@ static int __net_init tcp_sk_init(struct net *net) net->ipv4.sysctl_tcp_retries2 = TCP_RETR2; net->ipv4.sysctl_tcp_orphan_retries = 0; net->ipv4.sysctl_tcp_fin_timeout = TCP_FIN_TIMEOUT; + net->ipv4.sysctl_tcp_notsent_lowat = UINT_MAX; return 0; fail: diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 7beb3f688b7a..7d2c7a400456 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -62,9 +62,6 @@ int sysctl_tcp_tso_win_divisor __read_mostly = 3; /* By default, RFC2861 behavior. */ int sysctl_tcp_slow_start_after_idle __read_mostly = 1; -unsigned int sysctl_tcp_notsent_lowat __read_mostly = UINT_MAX; -EXPORT_SYMBOL(sysctl_tcp_notsent_lowat); - static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle, int push_one, gfp_t gfp); -- cgit v1.2.3 From a44d6eacdaf56f74fad699af7f4925a5f5ac0e7f Mon Sep 17 00:00:00 2001 From: Martin KaFai Lau Date: Mon, 14 Mar 2016 10:52:15 -0700 Subject: tcp: Add RFC4898 tcpEStatsPerfDataSegsOut/In Per RFC4898, they count segments sent/received containing a positive length data segment (that includes retransmission segments carrying data). Unlike tcpi_segs_out/in, tcpi_data_segs_out/in excludes segments carrying no data (e.g. pure ack). The patch also updates the segs_in in tcp_fastopen_add_skb() so that segs_in >= data_segs_in property is kept. Together with retransmission data, tcpi_data_segs_out gives a better signal on the rxmit rate. v6: Rebase on the latest net-next v5: Eric pointed out that checking skb->len is still needed in tcp_fastopen_add_skb() because skb can carry a FIN without data. Hence, instead of open coding segs_in and data_segs_in, tcp_segs_in() helper is used. Comment is added to the fastopen case to explain why segs_in has to be reset and tcp_segs_in() has to be called before __skb_pull(). v4: Add comment to the changes in tcp_fastopen_add_skb() and also add remark on this case in the commit message. v3: Add const modifier to the skb parameter in tcp_segs_in() v2: Rework based on recent fix by Eric: commit a9d99ce28ed3 ("tcp: fix tcpi_segs_in after connection establishment") Signed-off-by: Martin KaFai Lau Cc: Chris Rapier Cc: Eric Dumazet Cc: Marcelo Ricardo Leitner Cc: Neal Cardwell Cc: Yuchung Cheng Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/tcp.h | 6 ++++++ include/net/tcp.h | 10 ++++++++++ include/uapi/linux/tcp.h | 2 ++ net/ipv4/tcp.c | 2 ++ net/ipv4/tcp_fastopen.c | 8 ++++++++ net/ipv4/tcp_ipv4.c | 2 +- net/ipv4/tcp_minisocks.c | 2 +- net/ipv4/tcp_output.c | 4 +++- net/ipv6/tcp_ipv6.c | 2 +- 9 files changed, 34 insertions(+), 4 deletions(-) (limited to 'net/ipv4/tcp_output.c') diff --git a/include/linux/tcp.h b/include/linux/tcp.h index bcbf51da4e1e..7be9b1242354 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -158,6 +158,9 @@ struct tcp_sock { u32 segs_in; /* RFC4898 tcpEStatsPerfSegsIn * total number of segments in. */ + u32 data_segs_in; /* RFC4898 tcpEStatsPerfDataSegsIn + * total number of data segments in. + */ u32 rcv_nxt; /* What we want to receive next */ u32 copied_seq; /* Head of yet unread data */ u32 rcv_wup; /* rcv_nxt on last window update sent */ @@ -165,6 +168,9 @@ struct tcp_sock { u32 segs_out; /* RFC4898 tcpEStatsPerfSegsOut * The total number of segments sent. */ + u32 data_segs_out; /* RFC4898 tcpEStatsPerfDataSegsOut + * total number of data segments sent. + */ u64 bytes_acked; /* RFC4898 tcpEStatsAppHCThruOctetsAcked * sum(delta(snd_una)), or how many bytes * were acked. diff --git a/include/net/tcp.h b/include/net/tcp.h index 0302636af98c..c8dbd293daae 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -1840,4 +1840,14 @@ static inline int tcp_inq(struct sock *sk) return answ; } +static inline void tcp_segs_in(struct tcp_sock *tp, const struct sk_buff *skb) +{ + u16 segs_in; + + segs_in = max_t(u16, 1, skb_shinfo(skb)->gso_segs); + tp->segs_in += segs_in; + if (skb->len > tcp_hdrlen(skb)) + tp->data_segs_in += segs_in; +} + #endif /* _TCP_H */ diff --git a/include/uapi/linux/tcp.h b/include/uapi/linux/tcp.h index fe95446e9abf..53e8e3fe6b1b 100644 --- a/include/uapi/linux/tcp.h +++ b/include/uapi/linux/tcp.h @@ -199,6 +199,8 @@ struct tcp_info { __u32 tcpi_notsent_bytes; __u32 tcpi_min_rtt; + __u32 tcpi_data_segs_in; /* RFC4898 tcpEStatsDataSegsIn */ + __u32 tcpi_data_segs_out; /* RFC4898 tcpEStatsDataSegsOut */ }; /* for TCP_MD5SIG socket option */ diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index a265f00b9df9..992b3103ec3e 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -2715,6 +2715,8 @@ void tcp_get_info(struct sock *sk, struct tcp_info *info) info->tcpi_notsent_bytes = max(0, notsent_bytes); info->tcpi_min_rtt = tcp_min_rtt(tp); + info->tcpi_data_segs_in = tp->data_segs_in; + info->tcpi_data_segs_out = tp->data_segs_out; } EXPORT_SYMBOL_GPL(tcp_get_info); diff --git a/net/ipv4/tcp_fastopen.c b/net/ipv4/tcp_fastopen.c index fdb286ddba04..4fc0061bebf4 100644 --- a/net/ipv4/tcp_fastopen.c +++ b/net/ipv4/tcp_fastopen.c @@ -140,6 +140,14 @@ void tcp_fastopen_add_skb(struct sock *sk, struct sk_buff *skb) return; skb_dst_drop(skb); + /* segs_in has been initialized to 1 in tcp_create_openreq_child(). + * Hence, reset segs_in to 0 before calling tcp_segs_in() + * to avoid double counting. Also, tcp_segs_in() expects + * skb->len to include the tcp_hdrlen. Hence, it should + * be called before __skb_pull(). + */ + tp->segs_in = 0; + tcp_segs_in(tp, skb); __skb_pull(skb, tcp_hdrlen(skb)); skb_set_owner_r(skb, sk); diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 4c8d58dfac9b..0b02ef773705 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -1650,7 +1650,7 @@ process: sk_incoming_cpu_update(sk); bh_lock_sock_nested(sk); - tcp_sk(sk)->segs_in += max_t(u16, 1, skb_shinfo(skb)->gso_segs); + tcp_segs_in(tcp_sk(sk), skb); ret = 0; if (!sock_owned_by_user(sk)) { if (!tcp_prequeue(sk, skb)) diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index ae90e4b34bd3..acb366dd61e6 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -812,7 +812,7 @@ int tcp_child_process(struct sock *parent, struct sock *child, int ret = 0; int state = child->sk_state; - tcp_sk(child)->segs_in += max_t(u16, 1, skb_shinfo(skb)->gso_segs); + tcp_segs_in(tcp_sk(child), skb); if (!sock_owned_by_user(child)) { ret = tcp_rcv_state_process(child, skb); /* Wakeup parent, send SIGIO */ diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 7d2c7a400456..7d2dc015cd19 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -1003,8 +1003,10 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it, if (likely(tcb->tcp_flags & TCPHDR_ACK)) tcp_event_ack_sent(sk, tcp_skb_pcount(skb)); - if (skb->len != tcp_header_size) + if (skb->len != tcp_header_size) { tcp_event_data_sent(tp, sk); + tp->data_segs_out += tcp_skb_pcount(skb); + } if (after(tcb->end_seq, tp->snd_nxt) || tcb->seq == tcb->end_seq) TCP_ADD_STATS(sock_net(sk), TCP_MIB_OUTSEGS, diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 33f2820181f9..9c16565b70cc 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -1443,7 +1443,7 @@ process: sk_incoming_cpu_update(sk); bh_lock_sock_nested(sk); - tcp_sk(sk)->segs_in += max_t(u16, 1, skb_shinfo(skb)->gso_segs); + tcp_segs_in(tcp_sk(sk), skb); ret = 0; if (!sock_owned_by_user(sk)) { if (!tcp_prequeue(sk, skb)) -- cgit v1.2.3