From 7450aaf61f0ae2ee6cc6491138d11df2c25e7609 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 30 Nov 2015 08:57:28 -0800 Subject: tcp: suppress too verbose messages in tcp_send_ack() If tcp_send_ack() can not allocate skb, we properly handle this and setup a timer to try later. Use __GFP_NOWARN to avoid polluting syslog in the case host is under memory pressure, so that pertinent messages are not lost under a flood of useless information. sk_gfp_atomic() can use its gfp_mask argument (all callers currently were using GFP_ATOMIC before this patch) We rename sk_gfp_atomic() to sk_gfp_mask() to clearly express this function now takes into account its second argument (gfp_mask) Note that when tcp_transmit_skb() is called with clone_it set to false, we do not attempt memory allocations, so can pass a 0 gfp_mask, which most compilers can emit faster than a non zero or constant value. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv6/tcp_ipv6.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'net/ipv6/tcp_ipv6.c') diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index c5429a636f1a..41bcd59a2ac7 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -1130,7 +1130,7 @@ static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff * */ tcp_md5_do_add(newsk, (union tcp_md5_addr *)&newsk->sk_v6_daddr, AF_INET6, key->key, key->keylen, - sk_gfp_atomic(sk, GFP_ATOMIC)); + sk_gfp_mask(sk, GFP_ATOMIC)); } #endif @@ -1146,7 +1146,7 @@ static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff * /* Clone pktoptions received with SYN, if we own the req */ if (ireq->pktopts) { newnp->pktoptions = skb_clone(ireq->pktopts, - sk_gfp_atomic(sk, GFP_ATOMIC)); + sk_gfp_mask(sk, GFP_ATOMIC)); consume_skb(ireq->pktopts); ireq->pktopts = NULL; if (newnp->pktoptions) @@ -1212,7 +1212,7 @@ static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb) --ANK (980728) */ if (np->rxopt.all) - opt_skb = skb_clone(skb, sk_gfp_atomic(sk, GFP_ATOMIC)); + opt_skb = skb_clone(skb, sk_gfp_mask(sk, GFP_ATOMIC)); if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */ struct dst_entry *dst = sk->sk_rx_dst; -- cgit v1.2.3 From c1e64e298b8cad309091b95d8436a0255c84f54a Mon Sep 17 00:00:00 2001 From: Lorenzo Colitti Date: Wed, 16 Dec 2015 12:30:05 +0900 Subject: net: diag: Support destroying TCP sockets. This implements SOCK_DESTROY for TCP sockets. It causes all blocking calls on the socket to fail fast with ECONNABORTED and causes a protocol close of the socket. It informs the other end of the connection by sending a RST, i.e., initiating a TCP ABORT as per RFC 793. ECONNABORTED was chosen for consistency with FreeBSD. Signed-off-by: Lorenzo Colitti Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/tcp.h | 2 ++ net/ipv4/Kconfig | 13 +++++++++++++ net/ipv4/tcp.c | 32 ++++++++++++++++++++++++++++++++ net/ipv4/tcp_diag.c | 19 +++++++++++++++++++ net/ipv4/tcp_ipv4.c | 1 + net/ipv6/tcp_ipv6.c | 1 + 6 files changed, 68 insertions(+) (limited to 'net/ipv6/tcp_ipv6.c') diff --git a/include/net/tcp.h b/include/net/tcp.h index f80e74c5ad18..3077735b348d 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -1170,6 +1170,8 @@ void tcp_set_state(struct sock *sk, int state); void tcp_done(struct sock *sk); +int tcp_abort(struct sock *sk, int err); + static inline void tcp_sack_reset(struct tcp_options_received *rx_opt) { rx_opt->dsack = 0; diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig index 416dfa004cfb..c22920525e5d 100644 --- a/net/ipv4/Kconfig +++ b/net/ipv4/Kconfig @@ -436,6 +436,19 @@ config INET_UDP_DIAG Support for UDP socket monitoring interface used by the ss tool. If unsure, say Y. +config INET_DIAG_DESTROY + bool "INET: allow privileged process to administratively close sockets" + depends on INET_DIAG + default n + ---help--- + Provides a SOCK_DESTROY operation that allows privileged processes + (e.g., a connection manager or a network administration tool such as + ss) to close sockets opened by other processes. Closing a socket in + this way interrupts any blocking read/write/connect operations on + the socket and causes future socket calls to behave as if the socket + had been disconnected. + If unsure, say N. + menuconfig TCP_CONG_ADVANCED bool "TCP: advanced congestion control" ---help--- diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 92b3e61b847d..2c0e340518d2 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -3080,6 +3080,38 @@ void tcp_done(struct sock *sk) } EXPORT_SYMBOL_GPL(tcp_done); +int tcp_abort(struct sock *sk, int err) +{ + if (!sk_fullsock(sk)) { + sock_gen_put(sk); + return -EOPNOTSUPP; + } + + /* Don't race with userspace socket closes such as tcp_close. */ + lock_sock(sk); + + /* Don't race with BH socket closes such as inet_csk_listen_stop. */ + local_bh_disable(); + bh_lock_sock(sk); + + if (!sock_flag(sk, SOCK_DEAD)) { + sk->sk_err = err; + /* This barrier is coupled with smp_rmb() in tcp_poll() */ + smp_wmb(); + sk->sk_error_report(sk); + if (tcp_need_reset(sk->sk_state)) + tcp_send_active_reset(sk, GFP_ATOMIC); + tcp_done(sk); + } + + bh_unlock_sock(sk); + local_bh_enable(); + release_sock(sk); + sock_put(sk); + return 0; +} +EXPORT_SYMBOL_GPL(tcp_abort); + extern struct tcp_congestion_ops tcp_reno; static __initdata unsigned long thash_entries; diff --git a/net/ipv4/tcp_diag.c b/net/ipv4/tcp_diag.c index b31604086edd..4d610934fb39 100644 --- a/net/ipv4/tcp_diag.c +++ b/net/ipv4/tcp_diag.c @@ -10,6 +10,8 @@ */ #include +#include +#include #include #include @@ -46,12 +48,29 @@ static int tcp_diag_dump_one(struct sk_buff *in_skb, const struct nlmsghdr *nlh, return inet_diag_dump_one_icsk(&tcp_hashinfo, in_skb, nlh, req); } +#ifdef CONFIG_INET_DIAG_DESTROY +static int tcp_diag_destroy(struct sk_buff *in_skb, + const struct inet_diag_req_v2 *req) +{ + struct net *net = sock_net(in_skb->sk); + struct sock *sk = inet_diag_find_one_icsk(net, &tcp_hashinfo, req); + + if (IS_ERR(sk)) + return PTR_ERR(sk); + + return sock_diag_destroy(sk, ECONNABORTED); +} +#endif + static const struct inet_diag_handler tcp_diag_handler = { .dump = tcp_diag_dump, .dump_one = tcp_diag_dump_one, .idiag_get_info = tcp_diag_get_info, .idiag_type = IPPROTO_TCP, .idiag_info_size = sizeof(struct tcp_info), +#ifdef CONFIG_INET_DIAG_DESTROY + .destroy = tcp_diag_destroy, +#endif }; static int __init tcp_diag_init(void) diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index db003438aaf5..7aa13bd3de29 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -2342,6 +2342,7 @@ struct proto tcp_prot = { .destroy_cgroup = tcp_destroy_cgroup, .proto_cgroup = tcp_proto_cgroup, #endif + .diag_destroy = tcp_abort, }; EXPORT_SYMBOL(tcp_prot); diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index c16e3fbf6854..5382c2662fa2 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -1890,6 +1890,7 @@ struct proto tcpv6_prot = { .proto_cgroup = tcp_proto_cgroup, #endif .clear_sk = tcp_v6_clear_sk, + .diag_destroy = tcp_abort, }; static const struct inet6_protocol tcpv6_protocol = { -- cgit v1.2.3 From e46787f0dd9385449fd77246d4fddb8634350af8 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Mon, 21 Dec 2015 21:29:25 +0100 Subject: tcp: send_reset: test for non-NULL sk first tcp_md5_do_lookup requires a full socket, so once we extend _send_reset() to also accept timewait socket we would have to change if (!sk && hash_location) to something like if ((!sk || !sk_fullsock(sk)) && hash_location) { ... } else { (sk && sk_fullsock(sk)) tcp_md5_do_lookup() } Switch the two branches: check if we have a socket first, then fall back to a listener lookup if we saw a md5 option (hash_location). Signed-off-by: Florian Westphal Acked-by: Eric Dumazet Acked-by: Hannes Frederic Sowa Signed-off-by: David S. Miller --- net/ipv4/tcp_ipv4.c | 11 +++++------ net/ipv6/tcp_ipv6.c | 6 +++--- 2 files changed, 8 insertions(+), 9 deletions(-) (limited to 'net/ipv6/tcp_ipv6.c') diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 46e92fbd26a8..eb29c2f5bcea 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -587,7 +587,7 @@ static void tcp_v4_send_reset(const struct sock *sk, struct sk_buff *skb) } rep; struct ip_reply_arg arg; #ifdef CONFIG_TCP_MD5SIG - struct tcp_md5sig_key *key; + struct tcp_md5sig_key *key = NULL; const __u8 *hash_location = NULL; unsigned char newhash[16]; int genhash; @@ -627,7 +627,10 @@ static void tcp_v4_send_reset(const struct sock *sk, struct sk_buff *skb) net = sk ? sock_net(sk) : dev_net(skb_dst(skb)->dev); #ifdef CONFIG_TCP_MD5SIG hash_location = tcp_parse_md5sig_option(th); - if (!sk && hash_location) { + if (sk) { + key = tcp_md5_do_lookup(sk, (union tcp_md5_addr *) + &ip_hdr(skb)->saddr, AF_INET); + } else if (hash_location) { /* * active side is lost. Try to find listening socket through * source port, and then find md5 key through listening socket. @@ -651,10 +654,6 @@ static void tcp_v4_send_reset(const struct sock *sk, struct sk_buff *skb) genhash = tcp_v4_md5_hash_skb(newhash, key, NULL, skb); if (genhash || memcmp(hash_location, newhash, 16) != 0) goto release_sk1; - } else { - key = sk ? tcp_md5_do_lookup(sk, (union tcp_md5_addr *) - &ip_hdr(skb)->saddr, - AF_INET) : NULL; } if (key) { diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index f03d2b0445fd..32fa0de9982a 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -854,7 +854,9 @@ static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb) #ifdef CONFIG_TCP_MD5SIG hash_location = tcp_parse_md5sig_option(th); - if (!sk && hash_location) { + if (sk) { + key = tcp_v6_md5_do_lookup(sk, &ipv6h->saddr); + } else if (hash_location) { /* * active side is lost. Try to find listening socket through * source port, and then find md5 key through listening socket. @@ -877,8 +879,6 @@ static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb) genhash = tcp_v6_md5_hash_skb(newhash, key, NULL, skb); if (genhash || memcmp(hash_location, newhash, 16) != 0) goto release_sk1; - } else { - key = sk ? tcp_v6_md5_do_lookup(sk, &ipv6h->saddr) : NULL; } #endif -- cgit v1.2.3 From 271c3b9b7bdae09c7da467ac1ae96e3298754977 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Mon, 21 Dec 2015 21:29:26 +0100 Subject: tcp: honour SO_BINDTODEVICE for TW_RST case too Hannes points out that when we generate tcp reset for timewait sockets we pretend we found no socket and pass NULL sk to tcp_vX_send_reset(). Make it cope with inet tw sockets and then provide tw sk. This makes RSTs appear on correct interface when SO_BINDTODEVICE is used. Packetdrill test case: // want default route to be used, we rely on BINDTODEVICE `ip route del 192.0.2.0/24 via 192.168.0.2 dev tun0` 0.000 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 // test case still works due to BINDTODEVICE 0.001 setsockopt(3, SOL_SOCKET, SO_BINDTODEVICE, "tun0", 4) = 0 0.100...0.200 connect(3, ..., ...) = 0 0.100 > S 0:0(0) 0.200 < S. 0:0(0) ack 1 win 32792 0.200 > . 1:1(0) ack 1 0.210 close(3) = 0 0.210 > F. 1:1(0) ack 1 win 29200 0.300 < . 1:1(0) ack 2 win 46 // more data while in FIN_WAIT2, expect RST 1.300 < P. 1:1001(1000) ack 1 win 46 // fails without this change -- default route is used 1.301 > R 1:1(0) win 0 Reported-by: Hannes Frederic Sowa Signed-off-by: Florian Westphal Acked-by: Eric Dumazet Acked-by: Hannes Frederic Sowa Signed-off-by: David S. Miller --- net/ipv4/tcp_ipv4.c | 12 +++++++++--- net/ipv4/tcp_minisocks.c | 7 ++----- net/ipv6/tcp_ipv6.c | 6 ++++-- 3 files changed, 15 insertions(+), 10 deletions(-) (limited to 'net/ipv6/tcp_ipv6.c') diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index eb29c2f5bcea..fc4f72686705 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -627,7 +627,7 @@ static void tcp_v4_send_reset(const struct sock *sk, struct sk_buff *skb) net = sk ? sock_net(sk) : dev_net(skb_dst(skb)->dev); #ifdef CONFIG_TCP_MD5SIG hash_location = tcp_parse_md5sig_option(th); - if (sk) { + if (sk && sk_fullsock(sk)) { key = tcp_md5_do_lookup(sk, (union tcp_md5_addr *) &ip_hdr(skb)->saddr, AF_INET); } else if (hash_location) { @@ -674,7 +674,8 @@ static void tcp_v4_send_reset(const struct sock *sk, struct sk_buff *skb) ip_hdr(skb)->saddr, /* XXX */ arg.iov[0].iov_len, IPPROTO_TCP, 0); arg.csumoffset = offsetof(struct tcphdr, check) / 2; - arg.flags = (sk && inet_sk(sk)->transparent) ? IP_REPLY_ARG_NOSRCCHECK : 0; + arg.flags = (sk && inet_sk_transparent(sk)) ? IP_REPLY_ARG_NOSRCCHECK : 0; + /* When socket is gone, all binding information is lost. * routing might fail in this case. No choice here, if we choose to force * input interface, we will misroute in case of asymmetric route. @@ -682,6 +683,9 @@ static void tcp_v4_send_reset(const struct sock *sk, struct sk_buff *skb) if (sk) arg.bound_dev_if = sk->sk_bound_dev_if; + BUILD_BUG_ON(offsetof(struct sock, sk_bound_dev_if) != + offsetof(struct inet_timewait_sock, tw_bound_dev_if)); + arg.tos = ip_hdr(skb)->tos; ip_send_unicast_reply(*this_cpu_ptr(net->ipv4.tcp_sk), skb, &TCP_SKB_CB(skb)->header.h4.opt, @@ -1705,7 +1709,9 @@ do_time_wait: tcp_v4_timewait_ack(sk, skb); break; case TCP_TW_RST: - goto no_tcp_socket; + tcp_v4_send_reset(sk, skb); + inet_twsk_deschedule_put(inet_twsk(sk)); + goto discard_it; case TCP_TW_SUCCESS:; } goto discard_it; diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index ac6b1961ffeb..75632a925824 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -131,7 +131,7 @@ tcp_timewait_state_process(struct inet_timewait_sock *tw, struct sk_buff *skb, goto kill; if (th->syn && !before(TCP_SKB_CB(skb)->seq, tcptw->tw_rcv_nxt)) - goto kill_with_rst; + return TCP_TW_RST; /* Dup ACK? */ if (!th->ack || @@ -145,11 +145,8 @@ tcp_timewait_state_process(struct inet_timewait_sock *tw, struct sk_buff *skb, * reset. */ if (!th->fin || - TCP_SKB_CB(skb)->end_seq != tcptw->tw_rcv_nxt + 1) { -kill_with_rst: - inet_twsk_deschedule_put(tw); + TCP_SKB_CB(skb)->end_seq != tcptw->tw_rcv_nxt + 1) return TCP_TW_RST; - } /* FIN arrived, enter true time-wait state. */ tw->tw_substate = TCP_TIME_WAIT; diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 32fa0de9982a..9ecb012291cf 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -854,7 +854,7 @@ static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb) #ifdef CONFIG_TCP_MD5SIG hash_location = tcp_parse_md5sig_option(th); - if (sk) { + if (sk && sk_fullsock(sk)) { key = tcp_v6_md5_do_lookup(sk, &ipv6h->saddr); } else if (hash_location) { /* @@ -1516,7 +1516,9 @@ do_time_wait: break; case TCP_TW_RST: tcp_v6_restore_cb(skb); - goto no_tcp_socket; + tcp_v6_send_reset(sk, skb); + inet_twsk_deschedule_put(inet_twsk(sk)); + goto discard_it; case TCP_TW_SUCCESS: ; } -- cgit v1.2.3