From a30aad50c26cac63026e5dfcc2e055ae63fe6ef7 Mon Sep 17 00:00:00 2001 From: Alexey Kodanev Date: Thu, 9 Mar 2017 13:53:55 +0300 Subject: tcp: rename *_sequence_number() to *_seq_and_tsoff() The functions that are returning tcp sequence number also setup TS offset value, so rename them to better describe their purpose. No functional changes in this patch. Suggested-by: Eric Dumazet Signed-off-by: Alexey Kodanev Signed-off-by: David S. Miller --- net/ipv4/tcp_ipv4.c | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) (limited to 'net/ipv4/tcp_ipv4.c') diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 9a89b8deafae..7b332ed66488 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -94,12 +94,12 @@ static int tcp_v4_md5_hash_hdr(char *md5_hash, const struct tcp_md5sig_key *key, struct inet_hashinfo tcp_hashinfo; EXPORT_SYMBOL(tcp_hashinfo); -static u32 tcp_v4_init_sequence(const struct sk_buff *skb, u32 *tsoff) +static u32 tcp_v4_init_seq_and_tsoff(const struct sk_buff *skb, u32 *tsoff) { - return secure_tcp_sequence_number(ip_hdr(skb)->daddr, - ip_hdr(skb)->saddr, - tcp_hdr(skb)->dest, - tcp_hdr(skb)->source, tsoff); + return secure_tcp_seq_and_tsoff(ip_hdr(skb)->daddr, + ip_hdr(skb)->saddr, + tcp_hdr(skb)->dest, + tcp_hdr(skb)->source, tsoff); } int tcp_twsk_unique(struct sock *sk, struct sock *sktw, void *twp) @@ -236,11 +236,11 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) rt = NULL; if (likely(!tp->repair)) { - seq = secure_tcp_sequence_number(inet->inet_saddr, - inet->inet_daddr, - inet->inet_sport, - usin->sin_port, - &tp->tsoffset); + seq = secure_tcp_seq_and_tsoff(inet->inet_saddr, + inet->inet_daddr, + inet->inet_sport, + usin->sin_port, + &tp->tsoffset); if (!tp->write_seq) tp->write_seq = seq; } @@ -1249,7 +1249,7 @@ static const struct tcp_request_sock_ops tcp_request_sock_ipv4_ops = { .cookie_init_seq = cookie_v4_init_sequence, #endif .route_req = tcp_v4_route_req, - .init_seq = tcp_v4_init_sequence, + .init_seq_tsoff = tcp_v4_init_seq_and_tsoff, .send_synack = tcp_v4_send_synack, }; -- cgit v1.2.3 From d82bae12dc38d79a2b77473f5eb0612a3d69c55b Mon Sep 17 00:00:00 2001 From: Soheil Hassas Yeganeh Date: Wed, 15 Mar 2017 16:30:45 -0400 Subject: tcp: remove per-destination timestamp cache Commit 8a5bd45f6616 (tcp: randomize tcp timestamp offsets for each connection) randomizes TCP timestamps per connection. After this commit, there is no guarantee that the timestamps received from the same destination are monotonically increasing. As a result, the per-destination timestamp cache in TCP metrics (i.e., tcpm_ts in struct tcp_metrics_block) is broken and cannot be relied upon. Remove the per-destination timestamp cache and all related code paths. Note that this cache was already broken for caching timestamps of multiple machines behind a NAT sharing the same address. Signed-off-by: Soheil Hassas Yeganeh Signed-off-by: Eric Dumazet Signed-off-by: Neal Cardwell Signed-off-by: Yuchung Cheng Cc: Lutz Vieweg Cc: Florian Westphal Signed-off-by: David S. Miller --- include/net/tcp.h | 6 +- net/ipv4/tcp_input.c | 6 +- net/ipv4/tcp_ipv4.c | 4 -- net/ipv4/tcp_metrics.c | 147 ++--------------------------------------------- net/ipv4/tcp_minisocks.c | 22 ++----- net/ipv6/tcp_ipv6.c | 5 -- 6 files changed, 11 insertions(+), 179 deletions(-) (limited to 'net/ipv4/tcp_ipv4.c') diff --git a/include/net/tcp.h b/include/net/tcp.h index bede8f7fa742..c81f3b958d44 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -406,11 +406,7 @@ void tcp_clear_retrans(struct tcp_sock *tp); void tcp_update_metrics(struct sock *sk); void tcp_init_metrics(struct sock *sk); void tcp_metrics_init(void); -bool tcp_peer_is_proven(struct request_sock *req, struct dst_entry *dst, - bool paws_check, bool timestamps); -bool tcp_remember_stamp(struct sock *sk); -bool tcp_tw_remember_stamp(struct inet_timewait_sock *tw); -void tcp_fetch_timewait_stamp(struct sock *sk, struct dst_entry *dst); +bool tcp_peer_is_proven(struct request_sock *req, struct dst_entry *dst); void tcp_disable_fack(struct tcp_sock *tp); void tcp_close(struct sock *sk, long timeout); void tcp_init_sock(struct sock *sk); diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 96b67a8b18c3..aafec0676d3e 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -6342,8 +6342,7 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops, dst = af_ops->route_req(sk, &fl, req, &strict); if (dst && strict && - !tcp_peer_is_proven(req, dst, true, - tmp_opt.saw_tstamp)) { + !tcp_peer_is_proven(req, dst)) { NET_INC_STATS(sock_net(sk), LINUX_MIB_PAWSPASSIVEREJECTED); goto drop_and_release; } @@ -6352,8 +6351,7 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops, else if (!net->ipv4.sysctl_tcp_syncookies && (net->ipv4.sysctl_max_syn_backlog - inet_csk_reqsk_queue_len(sk) < (net->ipv4.sysctl_max_syn_backlog >> 2)) && - !tcp_peer_is_proven(req, dst, false, - tmp_opt.saw_tstamp)) { + !tcp_peer_is_proven(req, dst)) { /* Without syncookies last quarter of * backlog is filled with destinations, * proven to be alive. diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 08d870e45658..d8b401fff9fe 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -198,10 +198,6 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) tp->write_seq = 0; } - if (tcp_death_row->sysctl_tw_recycle && - !tp->rx_opt.ts_recent_stamp && fl4->daddr == daddr) - tcp_fetch_timewait_stamp(sk, &rt->dst); - inet->inet_dport = usin->sin_port; sk_daddr_set(sk, daddr); diff --git a/net/ipv4/tcp_metrics.c b/net/ipv4/tcp_metrics.c index 0f46e5fe31ad..9d0d4f39e42b 100644 --- a/net/ipv4/tcp_metrics.c +++ b/net/ipv4/tcp_metrics.c @@ -45,8 +45,6 @@ struct tcp_metrics_block { struct inetpeer_addr tcpm_saddr; struct inetpeer_addr tcpm_daddr; unsigned long tcpm_stamp; - u32 tcpm_ts; - u32 tcpm_ts_stamp; u32 tcpm_lock; u32 tcpm_vals[TCP_METRIC_MAX_KERNEL + 1]; struct tcp_fastopen_metrics tcpm_fastopen; @@ -123,8 +121,6 @@ static void tcpm_suck_dst(struct tcp_metrics_block *tm, tm->tcpm_vals[TCP_METRIC_SSTHRESH] = dst_metric_raw(dst, RTAX_SSTHRESH); tm->tcpm_vals[TCP_METRIC_CWND] = dst_metric_raw(dst, RTAX_CWND); tm->tcpm_vals[TCP_METRIC_REORDERING] = dst_metric_raw(dst, RTAX_REORDERING); - tm->tcpm_ts = 0; - tm->tcpm_ts_stamp = 0; if (fastopen_clear) { tm->tcpm_fastopen.mss = 0; tm->tcpm_fastopen.syn_loss = 0; @@ -273,48 +269,6 @@ static struct tcp_metrics_block *__tcp_get_metrics_req(struct request_sock *req, return tm; } -static struct tcp_metrics_block *__tcp_get_metrics_tw(struct inet_timewait_sock *tw) -{ - struct tcp_metrics_block *tm; - struct inetpeer_addr saddr, daddr; - unsigned int hash; - struct net *net; - - if (tw->tw_family == AF_INET) { - inetpeer_set_addr_v4(&saddr, tw->tw_rcv_saddr); - inetpeer_set_addr_v4(&daddr, tw->tw_daddr); - hash = ipv4_addr_hash(tw->tw_daddr); - } -#if IS_ENABLED(CONFIG_IPV6) - else if (tw->tw_family == AF_INET6) { - if (ipv6_addr_v4mapped(&tw->tw_v6_daddr)) { - inetpeer_set_addr_v4(&saddr, tw->tw_rcv_saddr); - inetpeer_set_addr_v4(&daddr, tw->tw_daddr); - hash = ipv4_addr_hash(tw->tw_daddr); - } else { - inetpeer_set_addr_v6(&saddr, &tw->tw_v6_rcv_saddr); - inetpeer_set_addr_v6(&daddr, &tw->tw_v6_daddr); - hash = ipv6_addr_hash(&tw->tw_v6_daddr); - } - } -#endif - else - return NULL; - - net = twsk_net(tw); - hash ^= net_hash_mix(net); - hash = hash_32(hash, tcp_metrics_hash_log); - - for (tm = rcu_dereference(tcp_metrics_hash[hash].chain); tm; - tm = rcu_dereference(tm->tcpm_next)) { - if (addr_same(&tm->tcpm_saddr, &saddr) && - addr_same(&tm->tcpm_daddr, &daddr) && - net_eq(tm_net(tm), net)) - break; - } - return tm; -} - static struct tcp_metrics_block *tcp_get_metrics(struct sock *sk, struct dst_entry *dst, bool create) @@ -573,8 +527,7 @@ reset: tp->snd_cwnd_stamp = tcp_time_stamp; } -bool tcp_peer_is_proven(struct request_sock *req, struct dst_entry *dst, - bool paws_check, bool timestamps) +bool tcp_peer_is_proven(struct request_sock *req, struct dst_entry *dst) { struct tcp_metrics_block *tm; bool ret; @@ -584,94 +537,10 @@ bool tcp_peer_is_proven(struct request_sock *req, struct dst_entry *dst, rcu_read_lock(); tm = __tcp_get_metrics_req(req, dst); - if (paws_check) { - if (tm && - (u32)get_seconds() - tm->tcpm_ts_stamp < TCP_PAWS_MSL && - ((s32)(tm->tcpm_ts - req->ts_recent) > TCP_PAWS_WINDOW || - !timestamps)) - ret = false; - else - ret = true; - } else { - if (tm && tcp_metric_get(tm, TCP_METRIC_RTT) && tm->tcpm_ts_stamp) - ret = true; - else - ret = false; - } - rcu_read_unlock(); - - return ret; -} - -void tcp_fetch_timewait_stamp(struct sock *sk, struct dst_entry *dst) -{ - struct tcp_metrics_block *tm; - - rcu_read_lock(); - tm = tcp_get_metrics(sk, dst, true); - if (tm) { - struct tcp_sock *tp = tcp_sk(sk); - - if ((u32)get_seconds() - tm->tcpm_ts_stamp <= TCP_PAWS_MSL) { - tp->rx_opt.ts_recent_stamp = tm->tcpm_ts_stamp; - tp->rx_opt.ts_recent = tm->tcpm_ts; - } - } - rcu_read_unlock(); -} -EXPORT_SYMBOL_GPL(tcp_fetch_timewait_stamp); - -/* VJ's idea. Save last timestamp seen from this destination and hold - * it at least for normal timewait interval to use for duplicate - * segment detection in subsequent connections, before they enter - * synchronized state. - */ -bool tcp_remember_stamp(struct sock *sk) -{ - struct dst_entry *dst = __sk_dst_get(sk); - bool ret = false; - - if (dst) { - struct tcp_metrics_block *tm; - - rcu_read_lock(); - tm = tcp_get_metrics(sk, dst, true); - if (tm) { - struct tcp_sock *tp = tcp_sk(sk); - - if ((s32)(tm->tcpm_ts - tp->rx_opt.ts_recent) <= 0 || - ((u32)get_seconds() - tm->tcpm_ts_stamp > TCP_PAWS_MSL && - tm->tcpm_ts_stamp <= (u32)tp->rx_opt.ts_recent_stamp)) { - tm->tcpm_ts_stamp = (u32)tp->rx_opt.ts_recent_stamp; - tm->tcpm_ts = tp->rx_opt.ts_recent; - } - ret = true; - } - rcu_read_unlock(); - } - return ret; -} - -bool tcp_tw_remember_stamp(struct inet_timewait_sock *tw) -{ - struct tcp_metrics_block *tm; - bool ret = false; - - rcu_read_lock(); - tm = __tcp_get_metrics_tw(tw); - if (tm) { - const struct tcp_timewait_sock *tcptw; - struct sock *sk = (struct sock *) tw; - - tcptw = tcp_twsk(sk); - if ((s32)(tm->tcpm_ts - tcptw->tw_ts_recent) <= 0 || - ((u32)get_seconds() - tm->tcpm_ts_stamp > TCP_PAWS_MSL && - tm->tcpm_ts_stamp <= (u32)tcptw->tw_ts_recent_stamp)) { - tm->tcpm_ts_stamp = (u32)tcptw->tw_ts_recent_stamp; - tm->tcpm_ts = tcptw->tw_ts_recent; - } + if (tm && tcp_metric_get(tm, TCP_METRIC_RTT)) ret = true; - } + else + ret = false; rcu_read_unlock(); return ret; @@ -791,14 +660,6 @@ static int tcp_metrics_fill_info(struct sk_buff *msg, jiffies - tm->tcpm_stamp, TCP_METRICS_ATTR_PAD) < 0) goto nla_put_failure; - if (tm->tcpm_ts_stamp) { - if (nla_put_s32(msg, TCP_METRICS_ATTR_TW_TS_STAMP, - (s32) (get_seconds() - tm->tcpm_ts_stamp)) < 0) - goto nla_put_failure; - if (nla_put_u32(msg, TCP_METRICS_ATTR_TW_TSVAL, - tm->tcpm_ts) < 0) - goto nla_put_failure; - } { int n = 0; diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index 7e16243cdb58..692f974e5abe 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -94,7 +94,6 @@ tcp_timewait_state_process(struct inet_timewait_sock *tw, struct sk_buff *skb, struct tcp_options_received tmp_opt; struct tcp_timewait_sock *tcptw = tcp_twsk((struct sock *)tw); bool paws_reject = false; - struct inet_timewait_death_row *tcp_death_row = &sock_net((struct sock*)tw)->ipv4.tcp_death_row; tmp_opt.saw_tstamp = 0; if (th->doff > (sizeof(*th) >> 2) && tcptw->tw_ts_recent_stamp) { @@ -149,12 +148,7 @@ tcp_timewait_state_process(struct inet_timewait_sock *tw, struct sk_buff *skb, tcptw->tw_ts_recent = tmp_opt.rcv_tsval; } - if (tcp_death_row->sysctl_tw_recycle && - tcptw->tw_ts_recent_stamp && - tcp_tw_remember_stamp(tw)) - inet_twsk_reschedule(tw, tw->tw_timeout); - else - inet_twsk_reschedule(tw, TCP_TIMEWAIT_LEN); + inet_twsk_reschedule(tw, TCP_TIMEWAIT_LEN); return TCP_TW_ACK; } @@ -259,12 +253,8 @@ void tcp_time_wait(struct sock *sk, int state, int timeo) const struct inet_connection_sock *icsk = inet_csk(sk); const struct tcp_sock *tp = tcp_sk(sk); struct inet_timewait_sock *tw; - bool recycle_ok = false; struct inet_timewait_death_row *tcp_death_row = &sock_net(sk)->ipv4.tcp_death_row; - if (tcp_death_row->sysctl_tw_recycle && tp->rx_opt.ts_recent_stamp) - recycle_ok = tcp_remember_stamp(sk); - tw = inet_twsk_alloc(sk, tcp_death_row, state); if (tw) { @@ -317,13 +307,9 @@ void tcp_time_wait(struct sock *sk, int state, int timeo) if (timeo < rto) timeo = rto; - if (recycle_ok) { - tw->tw_timeout = rto; - } else { - tw->tw_timeout = TCP_TIMEWAIT_LEN; - if (state == TCP_TIME_WAIT) - timeo = TCP_TIMEWAIT_LEN; - } + tw->tw_timeout = TCP_TIMEWAIT_LEN; + if (state == TCP_TIME_WAIT) + timeo = TCP_TIMEWAIT_LEN; inet_twsk_schedule(tw, timeo); /* Linkage updates. */ diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index c73a431fd06f..853cb43e3e3c 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -265,11 +265,6 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr, sk->sk_gso_type = SKB_GSO_TCPV6; ip6_dst_store(sk, dst, NULL, NULL); - if (tcp_death_row->sysctl_tw_recycle && - !tp->rx_opt.ts_recent_stamp && - ipv6_addr_equal(&fl6.daddr, &sk->sk_v6_daddr)) - tcp_fetch_timewait_stamp(sk, dst); - icsk->icsk_ext_hdr_len = 0; if (opt) icsk->icsk_ext_hdr_len = opt->opt_flen + -- cgit v1.2.3 From 4396e46187ca5070219b81773c4e65088dac50cc Mon Sep 17 00:00:00 2001 From: Soheil Hassas Yeganeh Date: Wed, 15 Mar 2017 16:30:46 -0400 Subject: tcp: remove tcp_tw_recycle The tcp_tw_recycle was already broken for connections behind NAT, since the per-destination timestamp is not monotonically increasing for multiple machines behind a single destination address. After the randomization of TCP timestamp offsets in commit 8a5bd45f6616 (tcp: randomize tcp timestamp offsets for each connection), the tcp_tw_recycle is broken for all types of connections for the same reason: the timestamps received from a single machine is not monotonically increasing, anymore. Remove tcp_tw_recycle, since it is not functional. Also, remove the PAWSPassive SNMP counter since it is only used for tcp_tw_recycle, and simplify tcp_v4_route_req and tcp_v6_route_req since the strict argument is only set when tcp_tw_recycle is enabled. Signed-off-by: Soheil Hassas Yeganeh Signed-off-by: Eric Dumazet Signed-off-by: Neal Cardwell Signed-off-by: Yuchung Cheng Cc: Lutz Vieweg Cc: Florian Westphal Signed-off-by: David S. Miller --- Documentation/networking/ip-sysctl.txt | 5 ----- include/net/netns/ipv4.h | 1 - include/net/tcp.h | 3 +-- include/uapi/linux/snmp.h | 1 - net/ipv4/proc.c | 1 - net/ipv4/sysctl_net_ipv4.c | 7 ------- net/ipv4/tcp_input.c | 30 +++++------------------------- net/ipv4/tcp_ipv4.c | 15 ++------------- net/ipv6/tcp_ipv6.c | 5 +---- 9 files changed, 9 insertions(+), 59 deletions(-) (limited to 'net/ipv4/tcp_ipv4.c') diff --git a/Documentation/networking/ip-sysctl.txt b/Documentation/networking/ip-sysctl.txt index ab0230461377..ed3d0791eb27 100644 --- a/Documentation/networking/ip-sysctl.txt +++ b/Documentation/networking/ip-sysctl.txt @@ -640,11 +640,6 @@ tcp_tso_win_divisor - INTEGER building larger TSO frames. Default: 3 -tcp_tw_recycle - BOOLEAN - Enable fast recycling TIME-WAIT sockets. Default value is 0. - It should not be changed without advice/request of technical - experts. - tcp_tw_reuse - BOOLEAN Allow to reuse TIME-WAIT sockets for new connections when it is safe from protocol viewpoint. Default value is 0. diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h index 622d2da27135..2e9d649ba169 100644 --- a/include/net/netns/ipv4.h +++ b/include/net/netns/ipv4.h @@ -33,7 +33,6 @@ struct inet_timewait_death_row { atomic_t tw_count; struct inet_hashinfo *hashinfo ____cacheline_aligned_in_smp; - int sysctl_tw_recycle; int sysctl_max_tw_buckets; }; diff --git a/include/net/tcp.h b/include/net/tcp.h index c81f3b958d44..e614ad4d613e 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -1810,8 +1810,7 @@ struct tcp_request_sock_ops { __u16 *mss); #endif struct dst_entry *(*route_req)(const struct sock *sk, struct flowi *fl, - const struct request_sock *req, - bool *strict); + const struct request_sock *req); __u32 (*init_seq_tsoff)(const struct sk_buff *skb, u32 *tsoff); int (*send_synack)(const struct sock *sk, struct dst_entry *dst, struct flowi *fl, struct request_sock *req, diff --git a/include/uapi/linux/snmp.h b/include/uapi/linux/snmp.h index 3b2bed7ca9a4..cec0e171d20c 100644 --- a/include/uapi/linux/snmp.h +++ b/include/uapi/linux/snmp.h @@ -177,7 +177,6 @@ enum LINUX_MIB_TIMEWAITED, /* TimeWaited */ LINUX_MIB_TIMEWAITRECYCLED, /* TimeWaitRecycled */ LINUX_MIB_TIMEWAITKILLED, /* TimeWaitKilled */ - LINUX_MIB_PAWSPASSIVEREJECTED, /* PAWSPassiveRejected */ LINUX_MIB_PAWSACTIVEREJECTED, /* PAWSActiveRejected */ LINUX_MIB_PAWSESTABREJECTED, /* PAWSEstabRejected */ LINUX_MIB_DELAYEDACKS, /* DelayedACKs */ diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c index 69cf49e8356d..4ccbf464d1ac 100644 --- a/net/ipv4/proc.c +++ b/net/ipv4/proc.c @@ -199,7 +199,6 @@ static const struct snmp_mib snmp4_net_list[] = { SNMP_MIB_ITEM("TW", LINUX_MIB_TIMEWAITED), SNMP_MIB_ITEM("TWRecycled", LINUX_MIB_TIMEWAITRECYCLED), SNMP_MIB_ITEM("TWKilled", LINUX_MIB_TIMEWAITKILLED), - SNMP_MIB_ITEM("PAWSPassive", LINUX_MIB_PAWSPASSIVEREJECTED), SNMP_MIB_ITEM("PAWSActive", LINUX_MIB_PAWSACTIVEREJECTED), SNMP_MIB_ITEM("PAWSEstab", LINUX_MIB_PAWSESTABREJECTED), SNMP_MIB_ITEM("DelayedACKs", LINUX_MIB_DELAYEDACKS), diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index d6880a6149ee..11aaef0939b2 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -980,13 +980,6 @@ static struct ctl_table ipv4_net_table[] = { .mode = 0644, .proc_handler = proc_dointvec }, - { - .procname = "tcp_tw_recycle", - .data = &init_net.ipv4.tcp_death_row.sysctl_tw_recycle, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec - }, { .procname = "tcp_max_syn_backlog", .data = &init_net.ipv4.sysctl_max_syn_backlog, diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index aafec0676d3e..bb09c7095988 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -6327,31 +6327,11 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops, af_ops->init_seq_tsoff(skb, &tcp_rsk(req)->ts_off); if (!want_cookie && !isn) { - /* VJ's idea. We save last timestamp seen - * from the destination in peer table, when entering - * state TIME-WAIT, and check against it before - * accepting new connection request. - * - * If "isn" is not zero, this request hit alive - * timewait bucket, so that all the necessary checks - * are made in the function processing timewait state. - */ - if (net->ipv4.tcp_death_row.sysctl_tw_recycle) { - bool strict; - - dst = af_ops->route_req(sk, &fl, req, &strict); - - if (dst && strict && - !tcp_peer_is_proven(req, dst)) { - NET_INC_STATS(sock_net(sk), LINUX_MIB_PAWSPASSIVEREJECTED); - goto drop_and_release; - } - } /* Kill the following clause, if you dislike this way. */ - else if (!net->ipv4.sysctl_tcp_syncookies && - (net->ipv4.sysctl_max_syn_backlog - inet_csk_reqsk_queue_len(sk) < - (net->ipv4.sysctl_max_syn_backlog >> 2)) && - !tcp_peer_is_proven(req, dst)) { + if (!net->ipv4.sysctl_tcp_syncookies && + (net->ipv4.sysctl_max_syn_backlog - inet_csk_reqsk_queue_len(sk) < + (net->ipv4.sysctl_max_syn_backlog >> 2)) && + !tcp_peer_is_proven(req, dst)) { /* Without syncookies last quarter of * backlog is filled with destinations, * proven to be alive. @@ -6367,7 +6347,7 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops, isn = af_ops->init_seq_tsoff(skb, &tcp_rsk(req)->ts_off); } if (!dst) { - dst = af_ops->route_req(sk, &fl, req, NULL); + dst = af_ops->route_req(sk, &fl, req); if (!dst) goto drop_and_free; } diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index d8b401fff9fe..7482b5d11861 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -1213,19 +1213,9 @@ static void tcp_v4_init_req(struct request_sock *req, static struct dst_entry *tcp_v4_route_req(const struct sock *sk, struct flowi *fl, - const struct request_sock *req, - bool *strict) + const struct request_sock *req) { - struct dst_entry *dst = inet_csk_route_req(sk, &fl->u.ip4, req); - - if (strict) { - if (fl->u.ip4.daddr == inet_rsk(req)->ir_rmt_addr) - *strict = true; - else - *strict = false; - } - - return dst; + return inet_csk_route_req(sk, &fl->u.ip4, req); } struct request_sock_ops tcp_request_sock_ops __read_mostly = { @@ -2462,7 +2452,6 @@ static int __net_init tcp_sk_init(struct net *net) net->ipv4.sysctl_tcp_tw_reuse = 0; cnt = tcp_hashinfo.ehash_mask + 1; - net->ipv4.tcp_death_row.sysctl_tw_recycle = 0; net->ipv4.tcp_death_row.sysctl_max_tw_buckets = (cnt + 1) / 2; net->ipv4.tcp_death_row.hashinfo = &tcp_hashinfo; diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 853cb43e3e3c..0f08d718a002 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -722,11 +722,8 @@ static void tcp_v6_init_req(struct request_sock *req, static struct dst_entry *tcp_v6_route_req(const struct sock *sk, struct flowi *fl, - const struct request_sock *req, - bool *strict) + const struct request_sock *req) { - if (strict) - *strict = true; return inet6_csk_route_req(sk, &fl->u.ip6, req, IPPROTO_TCP); } -- cgit v1.2.3 From e5907459ce7e2b6bc397007865ad492f10c2aeac Mon Sep 17 00:00:00 2001 From: Alexander Duyck Date: Fri, 24 Mar 2017 10:08:00 -0700 Subject: tcp: Record Rx hash and NAPI ID in tcp_child_process While working on some recent busy poll changes we found that child sockets were being instantiated without NAPI ID being set. In our first attempt to fix it, it was suggested that we should just pull programming the NAPI ID into the function itself since all callers will need to have it set. In addition to the NAPI ID change I have dropped the code that was populating the Rx hash since it was actually being populated in tcp_get_cookie_sock. Reported-by: Sridhar Samudrala Signed-off-by: Alexander Duyck Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/tcp_ipv4.c | 2 -- net/ipv4/tcp_minisocks.c | 4 ++++ net/ipv6/tcp_ipv6.c | 2 -- 3 files changed, 4 insertions(+), 4 deletions(-) (limited to 'net/ipv4/tcp_ipv4.c') diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 7482b5d11861..20cbd2f07f28 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -1409,8 +1409,6 @@ int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb) if (!nsk) goto discard; if (nsk != sk) { - sock_rps_save_rxhash(nsk, skb); - sk_mark_napi_id(nsk, skb); if (tcp_child_process(sk, nsk, skb)) { rsk = nsk; goto reset; diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index 1e217948be62..8f6373b0cd77 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -26,6 +26,7 @@ #include #include #include +#include int sysctl_tcp_abort_on_overflow __read_mostly; @@ -799,6 +800,9 @@ int tcp_child_process(struct sock *parent, struct sock *child, int ret = 0; int state = child->sk_state; + /* record NAPI ID of child */ + sk_mark_napi_id(child, skb); + tcp_segs_in(tcp_sk(child), skb); if (!sock_owned_by_user(child)) { ret = tcp_rcv_state_process(child, skb); diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 031a8c019f7a..8e42e8f54b70 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -1293,8 +1293,6 @@ static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb) goto discard; if (nsk != sk) { - sock_rps_save_rxhash(nsk, skb); - sk_mark_napi_id(nsk, skb); if (tcp_child_process(sk, nsk, skb)) goto reset; if (opt_skb) -- cgit v1.2.3 From 5f0d5a3ae7cff0d7fa943c199c3a2e44f23e1fac Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Wed, 18 Jan 2017 02:53:44 -0800 Subject: mm: Rename SLAB_DESTROY_BY_RCU to SLAB_TYPESAFE_BY_RCU A group of Linux kernel hackers reported chasing a bug that resulted from their assumption that SLAB_DESTROY_BY_RCU provided an existence guarantee, that is, that no block from such a slab would be reallocated during an RCU read-side critical section. Of course, that is not the case. Instead, SLAB_DESTROY_BY_RCU only prevents freeing of an entire slab of blocks. However, there is a phrase for this, namely "type safety". This commit therefore renames SLAB_DESTROY_BY_RCU to SLAB_TYPESAFE_BY_RCU in order to avoid future instances of this sort of confusion. Signed-off-by: Paul E. McKenney Cc: Christoph Lameter Cc: Pekka Enberg Cc: David Rientjes Cc: Joonsoo Kim Cc: Andrew Morton Cc: Acked-by: Johannes Weiner Acked-by: Vlastimil Babka [ paulmck: Add comments mentioning the old name, as requested by Eric Dumazet, in order to help people familiar with the old name find the new one. ] Acked-by: David Rientjes --- Documentation/RCU/00-INDEX | 2 +- Documentation/RCU/rculist_nulls.txt | 6 +++--- Documentation/RCU/whatisRCU.txt | 3 ++- drivers/gpu/drm/i915/i915_gem.c | 2 +- drivers/gpu/drm/i915/i915_gem_request.h | 2 +- drivers/staging/lustre/lustre/ldlm/ldlm_lockd.c | 2 +- fs/jbd2/journal.c | 2 +- fs/signalfd.c | 2 +- include/linux/dma-fence.h | 4 ++-- include/linux/slab.h | 6 ++++-- include/net/sock.h | 2 +- kernel/fork.c | 4 ++-- kernel/signal.c | 2 +- mm/kasan/kasan.c | 6 +++--- mm/kmemcheck.c | 2 +- mm/rmap.c | 4 ++-- mm/slab.c | 6 +++--- mm/slab.h | 4 ++-- mm/slab_common.c | 6 +++--- mm/slob.c | 6 +++--- mm/slub.c | 12 ++++++------ net/dccp/ipv4.c | 2 +- net/dccp/ipv6.c | 2 +- net/ipv4/tcp_ipv4.c | 2 +- net/ipv6/tcp_ipv6.c | 2 +- net/llc/af_llc.c | 2 +- net/llc/llc_conn.c | 4 ++-- net/llc/llc_sap.c | 2 +- net/netfilter/nf_conntrack_core.c | 8 ++++---- net/smc/af_smc.c | 2 +- 30 files changed, 57 insertions(+), 54 deletions(-) (limited to 'net/ipv4/tcp_ipv4.c') diff --git a/Documentation/RCU/00-INDEX b/Documentation/RCU/00-INDEX index f773a264ae02..1672573b037a 100644 --- a/Documentation/RCU/00-INDEX +++ b/Documentation/RCU/00-INDEX @@ -17,7 +17,7 @@ rcu_dereference.txt rcubarrier.txt - RCU and Unloadable Modules rculist_nulls.txt - - RCU list primitives for use with SLAB_DESTROY_BY_RCU + - RCU list primitives for use with SLAB_TYPESAFE_BY_RCU rcuref.txt - Reference-count design for elements of lists/arrays protected by RCU rcu.txt diff --git a/Documentation/RCU/rculist_nulls.txt b/Documentation/RCU/rculist_nulls.txt index 18f9651ff23d..8151f0195f76 100644 --- a/Documentation/RCU/rculist_nulls.txt +++ b/Documentation/RCU/rculist_nulls.txt @@ -1,5 +1,5 @@ Using hlist_nulls to protect read-mostly linked lists and -objects using SLAB_DESTROY_BY_RCU allocations. +objects using SLAB_TYPESAFE_BY_RCU allocations. Please read the basics in Documentation/RCU/listRCU.txt @@ -7,7 +7,7 @@ Using special makers (called 'nulls') is a convenient way to solve following problem : A typical RCU linked list managing objects which are -allocated with SLAB_DESTROY_BY_RCU kmem_cache can +allocated with SLAB_TYPESAFE_BY_RCU kmem_cache can use following algos : 1) Lookup algo @@ -96,7 +96,7 @@ unlock_chain(); // typically a spin_unlock() 3) Remove algo -------------- Nothing special here, we can use a standard RCU hlist deletion. -But thanks to SLAB_DESTROY_BY_RCU, beware a deleted object can be reused +But thanks to SLAB_TYPESAFE_BY_RCU, beware a deleted object can be reused very very fast (before the end of RCU grace period) if (put_last_reference_on(obj) { diff --git a/Documentation/RCU/whatisRCU.txt b/Documentation/RCU/whatisRCU.txt index 5cbd8b2395b8..91c912e86915 100644 --- a/Documentation/RCU/whatisRCU.txt +++ b/Documentation/RCU/whatisRCU.txt @@ -925,7 +925,8 @@ d. Do you need RCU grace periods to complete even in the face e. Is your workload too update-intensive for normal use of RCU, but inappropriate for other synchronization mechanisms? - If so, consider SLAB_DESTROY_BY_RCU. But please be careful! + If so, consider SLAB_TYPESAFE_BY_RCU (which was originally + named SLAB_DESTROY_BY_RCU). But please be careful! f. Do you need read-side critical sections that are respected even though they are in the middle of the idle loop, during diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 6908123162d1..3b668895ac24 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -4552,7 +4552,7 @@ i915_gem_load_init(struct drm_i915_private *dev_priv) dev_priv->requests = KMEM_CACHE(drm_i915_gem_request, SLAB_HWCACHE_ALIGN | SLAB_RECLAIM_ACCOUNT | - SLAB_DESTROY_BY_RCU); + SLAB_TYPESAFE_BY_RCU); if (!dev_priv->requests) goto err_vmas; diff --git a/drivers/gpu/drm/i915/i915_gem_request.h b/drivers/gpu/drm/i915/i915_gem_request.h index ea511f06efaf..9ee2750e1dde 100644 --- a/drivers/gpu/drm/i915/i915_gem_request.h +++ b/drivers/gpu/drm/i915/i915_gem_request.h @@ -493,7 +493,7 @@ static inline struct drm_i915_gem_request * __i915_gem_active_get_rcu(const struct i915_gem_active *active) { /* Performing a lockless retrieval of the active request is super - * tricky. SLAB_DESTROY_BY_RCU merely guarantees that the backing + * tricky. SLAB_TYPESAFE_BY_RCU merely guarantees that the backing * slab of request objects will not be freed whilst we hold the * RCU read lock. It does not guarantee that the request itself * will not be freed and then *reused*. Viz, diff --git a/drivers/staging/lustre/lustre/ldlm/ldlm_lockd.c b/drivers/staging/lustre/lustre/ldlm/ldlm_lockd.c index 12647af5a336..e7fb47e84a93 100644 --- a/drivers/staging/lustre/lustre/ldlm/ldlm_lockd.c +++ b/drivers/staging/lustre/lustre/ldlm/ldlm_lockd.c @@ -1071,7 +1071,7 @@ int ldlm_init(void) ldlm_lock_slab = kmem_cache_create("ldlm_locks", sizeof(struct ldlm_lock), 0, SLAB_HWCACHE_ALIGN | - SLAB_DESTROY_BY_RCU, NULL); + SLAB_TYPESAFE_BY_RCU, NULL); if (!ldlm_lock_slab) { kmem_cache_destroy(ldlm_resource_slab); return -ENOMEM; diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c index a1a359bfcc9c..7f8f962454e5 100644 --- a/fs/jbd2/journal.c +++ b/fs/jbd2/journal.c @@ -2340,7 +2340,7 @@ static int jbd2_journal_init_journal_head_cache(void) jbd2_journal_head_cache = kmem_cache_create("jbd2_journal_head", sizeof(struct journal_head), 0, /* offset */ - SLAB_TEMPORARY | SLAB_DESTROY_BY_RCU, + SLAB_TEMPORARY | SLAB_TYPESAFE_BY_RCU, NULL); /* ctor */ retval = 0; if (!jbd2_journal_head_cache) { diff --git a/fs/signalfd.c b/fs/signalfd.c index 270221fcef42..7e3d71109f51 100644 --- a/fs/signalfd.c +++ b/fs/signalfd.c @@ -38,7 +38,7 @@ void signalfd_cleanup(struct sighand_struct *sighand) /* * The lockless check can race with remove_wait_queue() in progress, * but in this case its caller should run under rcu_read_lock() and - * sighand_cachep is SLAB_DESTROY_BY_RCU, we can safely return. + * sighand_cachep is SLAB_TYPESAFE_BY_RCU, we can safely return. */ if (likely(!waitqueue_active(wqh))) return; diff --git a/include/linux/dma-fence.h b/include/linux/dma-fence.h index 6048fa404e57..a5195a7d6f77 100644 --- a/include/linux/dma-fence.h +++ b/include/linux/dma-fence.h @@ -229,7 +229,7 @@ static inline struct dma_fence *dma_fence_get_rcu(struct dma_fence *fence) * * Function returns NULL if no refcount could be obtained, or the fence. * This function handles acquiring a reference to a fence that may be - * reallocated within the RCU grace period (such as with SLAB_DESTROY_BY_RCU), + * reallocated within the RCU grace period (such as with SLAB_TYPESAFE_BY_RCU), * so long as the caller is using RCU on the pointer to the fence. * * An alternative mechanism is to employ a seqlock to protect a bunch of @@ -257,7 +257,7 @@ dma_fence_get_rcu_safe(struct dma_fence * __rcu *fencep) * have successfully acquire a reference to it. If it no * longer matches, we are holding a reference to some other * reallocated pointer. This is possible if the allocator - * is using a freelist like SLAB_DESTROY_BY_RCU where the + * is using a freelist like SLAB_TYPESAFE_BY_RCU where the * fence remains valid for the RCU grace period, but it * may be reallocated. When using such allocators, we are * responsible for ensuring the reference we get is to diff --git a/include/linux/slab.h b/include/linux/slab.h index 3c37a8c51921..04a7f7993e67 100644 --- a/include/linux/slab.h +++ b/include/linux/slab.h @@ -28,7 +28,7 @@ #define SLAB_STORE_USER 0x00010000UL /* DEBUG: Store the last owner for bug hunting */ #define SLAB_PANIC 0x00040000UL /* Panic if kmem_cache_create() fails */ /* - * SLAB_DESTROY_BY_RCU - **WARNING** READ THIS! + * SLAB_TYPESAFE_BY_RCU - **WARNING** READ THIS! * * This delays freeing the SLAB page by a grace period, it does _NOT_ * delay object freeing. This means that if you do kmem_cache_free() @@ -61,8 +61,10 @@ * * rcu_read_lock before reading the address, then rcu_read_unlock after * taking the spinlock within the structure expected at that address. + * + * Note that SLAB_TYPESAFE_BY_RCU was originally named SLAB_DESTROY_BY_RCU. */ -#define SLAB_DESTROY_BY_RCU 0x00080000UL /* Defer freeing slabs to RCU */ +#define SLAB_TYPESAFE_BY_RCU 0x00080000UL /* Defer freeing slabs to RCU */ #define SLAB_MEM_SPREAD 0x00100000UL /* Spread some memory over cpuset */ #define SLAB_TRACE 0x00200000UL /* Trace allocations and frees */ diff --git a/include/net/sock.h b/include/net/sock.h index 5e5997654db6..59cdccaa30e7 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -993,7 +993,7 @@ struct smc_hashinfo; struct module; /* - * caches using SLAB_DESTROY_BY_RCU should let .next pointer from nulls nodes + * caches using SLAB_TYPESAFE_BY_RCU should let .next pointer from nulls nodes * un-modified. Special care is taken when initializing object to zero. */ static inline void sk_prot_clear_nulls(struct sock *sk, int size) diff --git a/kernel/fork.c b/kernel/fork.c index 6c463c80e93d..9330ce24f1bb 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -1313,7 +1313,7 @@ void __cleanup_sighand(struct sighand_struct *sighand) if (atomic_dec_and_test(&sighand->count)) { signalfd_cleanup(sighand); /* - * sighand_cachep is SLAB_DESTROY_BY_RCU so we can free it + * sighand_cachep is SLAB_TYPESAFE_BY_RCU so we can free it * without an RCU grace period, see __lock_task_sighand(). */ kmem_cache_free(sighand_cachep, sighand); @@ -2144,7 +2144,7 @@ void __init proc_caches_init(void) { sighand_cachep = kmem_cache_create("sighand_cache", sizeof(struct sighand_struct), 0, - SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_DESTROY_BY_RCU| + SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_TYPESAFE_BY_RCU| SLAB_NOTRACK|SLAB_ACCOUNT, sighand_ctor); signal_cachep = kmem_cache_create("signal_cache", sizeof(struct signal_struct), 0, diff --git a/kernel/signal.c b/kernel/signal.c index 7e59ebc2c25e..6df5f72158e4 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -1237,7 +1237,7 @@ struct sighand_struct *__lock_task_sighand(struct task_struct *tsk, } /* * This sighand can be already freed and even reused, but - * we rely on SLAB_DESTROY_BY_RCU and sighand_ctor() which + * we rely on SLAB_TYPESAFE_BY_RCU and sighand_ctor() which * initializes ->siglock: this slab can't go away, it has * the same object type, ->siglock can't be reinitialized. * diff --git a/mm/kasan/kasan.c b/mm/kasan/kasan.c index 98b27195e38b..4b20061102f6 100644 --- a/mm/kasan/kasan.c +++ b/mm/kasan/kasan.c @@ -413,7 +413,7 @@ void kasan_cache_create(struct kmem_cache *cache, size_t *size, *size += sizeof(struct kasan_alloc_meta); /* Add free meta. */ - if (cache->flags & SLAB_DESTROY_BY_RCU || cache->ctor || + if (cache->flags & SLAB_TYPESAFE_BY_RCU || cache->ctor || cache->object_size < sizeof(struct kasan_free_meta)) { cache->kasan_info.free_meta_offset = *size; *size += sizeof(struct kasan_free_meta); @@ -561,7 +561,7 @@ static void kasan_poison_slab_free(struct kmem_cache *cache, void *object) unsigned long rounded_up_size = round_up(size, KASAN_SHADOW_SCALE_SIZE); /* RCU slabs could be legally used after free within the RCU period */ - if (unlikely(cache->flags & SLAB_DESTROY_BY_RCU)) + if (unlikely(cache->flags & SLAB_TYPESAFE_BY_RCU)) return; kasan_poison_shadow(object, rounded_up_size, KASAN_KMALLOC_FREE); @@ -572,7 +572,7 @@ bool kasan_slab_free(struct kmem_cache *cache, void *object) s8 shadow_byte; /* RCU slabs could be legally used after free within the RCU period */ - if (unlikely(cache->flags & SLAB_DESTROY_BY_RCU)) + if (unlikely(cache->flags & SLAB_TYPESAFE_BY_RCU)) return false; shadow_byte = READ_ONCE(*(s8 *)kasan_mem_to_shadow(object)); diff --git a/mm/kmemcheck.c b/mm/kmemcheck.c index 5bf191756a4a..2d5959c5f7c5 100644 --- a/mm/kmemcheck.c +++ b/mm/kmemcheck.c @@ -95,7 +95,7 @@ void kmemcheck_slab_alloc(struct kmem_cache *s, gfp_t gfpflags, void *object, void kmemcheck_slab_free(struct kmem_cache *s, void *object, size_t size) { /* TODO: RCU freeing is unsupported for now; hide false positives. */ - if (!s->ctor && !(s->flags & SLAB_DESTROY_BY_RCU)) + if (!s->ctor && !(s->flags & SLAB_TYPESAFE_BY_RCU)) kmemcheck_mark_freed(object, size); } diff --git a/mm/rmap.c b/mm/rmap.c index 49ed681ccc7b..8ffd59df8a3f 100644 --- a/mm/rmap.c +++ b/mm/rmap.c @@ -430,7 +430,7 @@ static void anon_vma_ctor(void *data) void __init anon_vma_init(void) { anon_vma_cachep = kmem_cache_create("anon_vma", sizeof(struct anon_vma), - 0, SLAB_DESTROY_BY_RCU|SLAB_PANIC|SLAB_ACCOUNT, + 0, SLAB_TYPESAFE_BY_RCU|SLAB_PANIC|SLAB_ACCOUNT, anon_vma_ctor); anon_vma_chain_cachep = KMEM_CACHE(anon_vma_chain, SLAB_PANIC|SLAB_ACCOUNT); @@ -481,7 +481,7 @@ struct anon_vma *page_get_anon_vma(struct page *page) * If this page is still mapped, then its anon_vma cannot have been * freed. But if it has been unmapped, we have no security against the * anon_vma structure being freed and reused (for another anon_vma: - * SLAB_DESTROY_BY_RCU guarantees that - so the atomic_inc_not_zero() + * SLAB_TYPESAFE_BY_RCU guarantees that - so the atomic_inc_not_zero() * above cannot corrupt). */ if (!page_mapped(page)) { diff --git a/mm/slab.c b/mm/slab.c index 807d86c76908..93c827864862 100644 --- a/mm/slab.c +++ b/mm/slab.c @@ -1728,7 +1728,7 @@ static void slab_destroy(struct kmem_cache *cachep, struct page *page) freelist = page->freelist; slab_destroy_debugcheck(cachep, page); - if (unlikely(cachep->flags & SLAB_DESTROY_BY_RCU)) + if (unlikely(cachep->flags & SLAB_TYPESAFE_BY_RCU)) call_rcu(&page->rcu_head, kmem_rcu_free); else kmem_freepages(cachep, page); @@ -1924,7 +1924,7 @@ static bool set_objfreelist_slab_cache(struct kmem_cache *cachep, cachep->num = 0; - if (cachep->ctor || flags & SLAB_DESTROY_BY_RCU) + if (cachep->ctor || flags & SLAB_TYPESAFE_BY_RCU) return false; left = calculate_slab_order(cachep, size, @@ -2030,7 +2030,7 @@ __kmem_cache_create (struct kmem_cache *cachep, unsigned long flags) if (size < 4096 || fls(size - 1) == fls(size-1 + REDZONE_ALIGN + 2 * sizeof(unsigned long long))) flags |= SLAB_RED_ZONE | SLAB_STORE_USER; - if (!(flags & SLAB_DESTROY_BY_RCU)) + if (!(flags & SLAB_TYPESAFE_BY_RCU)) flags |= SLAB_POISON; #endif #endif diff --git a/mm/slab.h b/mm/slab.h index 65e7c3fcac72..9cfcf099709c 100644 --- a/mm/slab.h +++ b/mm/slab.h @@ -126,7 +126,7 @@ static inline unsigned long kmem_cache_flags(unsigned long object_size, /* Legal flag mask for kmem_cache_create(), for various configurations */ #define SLAB_CORE_FLAGS (SLAB_HWCACHE_ALIGN | SLAB_CACHE_DMA | SLAB_PANIC | \ - SLAB_DESTROY_BY_RCU | SLAB_DEBUG_OBJECTS ) + SLAB_TYPESAFE_BY_RCU | SLAB_DEBUG_OBJECTS ) #if defined(CONFIG_DEBUG_SLAB) #define SLAB_DEBUG_FLAGS (SLAB_RED_ZONE | SLAB_POISON | SLAB_STORE_USER) @@ -415,7 +415,7 @@ static inline size_t slab_ksize(const struct kmem_cache *s) * back there or track user information then we can * only use the space before that information. */ - if (s->flags & (SLAB_DESTROY_BY_RCU | SLAB_STORE_USER)) + if (s->flags & (SLAB_TYPESAFE_BY_RCU | SLAB_STORE_USER)) return s->inuse; /* * Else we can use all the padding etc for the allocation diff --git a/mm/slab_common.c b/mm/slab_common.c index 09d0e849b07f..01a0fe2eb332 100644 --- a/mm/slab_common.c +++ b/mm/slab_common.c @@ -39,7 +39,7 @@ static DECLARE_WORK(slab_caches_to_rcu_destroy_work, * Set of flags that will prevent slab merging */ #define SLAB_NEVER_MERGE (SLAB_RED_ZONE | SLAB_POISON | SLAB_STORE_USER | \ - SLAB_TRACE | SLAB_DESTROY_BY_RCU | SLAB_NOLEAKTRACE | \ + SLAB_TRACE | SLAB_TYPESAFE_BY_RCU | SLAB_NOLEAKTRACE | \ SLAB_FAILSLAB | SLAB_KASAN) #define SLAB_MERGE_SAME (SLAB_RECLAIM_ACCOUNT | SLAB_CACHE_DMA | \ @@ -500,7 +500,7 @@ static void slab_caches_to_rcu_destroy_workfn(struct work_struct *work) struct kmem_cache *s, *s2; /* - * On destruction, SLAB_DESTROY_BY_RCU kmem_caches are put on the + * On destruction, SLAB_TYPESAFE_BY_RCU kmem_caches are put on the * @slab_caches_to_rcu_destroy list. The slab pages are freed * through RCU and and the associated kmem_cache are dereferenced * while freeing the pages, so the kmem_caches should be freed only @@ -537,7 +537,7 @@ static int shutdown_cache(struct kmem_cache *s) memcg_unlink_cache(s); list_del(&s->list); - if (s->flags & SLAB_DESTROY_BY_RCU) { + if (s->flags & SLAB_TYPESAFE_BY_RCU) { list_add_tail(&s->list, &slab_caches_to_rcu_destroy); schedule_work(&slab_caches_to_rcu_destroy_work); } else { diff --git a/mm/slob.c b/mm/slob.c index eac04d4357ec..1bae78d71096 100644 --- a/mm/slob.c +++ b/mm/slob.c @@ -126,7 +126,7 @@ static inline void clear_slob_page_free(struct page *sp) /* * struct slob_rcu is inserted at the tail of allocated slob blocks, which - * were created with a SLAB_DESTROY_BY_RCU slab. slob_rcu is used to free + * were created with a SLAB_TYPESAFE_BY_RCU slab. slob_rcu is used to free * the block using call_rcu. */ struct slob_rcu { @@ -524,7 +524,7 @@ EXPORT_SYMBOL(ksize); int __kmem_cache_create(struct kmem_cache *c, unsigned long flags) { - if (flags & SLAB_DESTROY_BY_RCU) { + if (flags & SLAB_TYPESAFE_BY_RCU) { /* leave room for rcu footer at the end of object */ c->size += sizeof(struct slob_rcu); } @@ -598,7 +598,7 @@ static void kmem_rcu_free(struct rcu_head *head) void kmem_cache_free(struct kmem_cache *c, void *b) { kmemleak_free_recursive(b, c->flags); - if (unlikely(c->flags & SLAB_DESTROY_BY_RCU)) { + if (unlikely(c->flags & SLAB_TYPESAFE_BY_RCU)) { struct slob_rcu *slob_rcu; slob_rcu = b + (c->size - sizeof(struct slob_rcu)); slob_rcu->size = c->size; diff --git a/mm/slub.c b/mm/slub.c index 7f4bc7027ed5..57e5156f02be 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -1687,7 +1687,7 @@ static void rcu_free_slab(struct rcu_head *h) static void free_slab(struct kmem_cache *s, struct page *page) { - if (unlikely(s->flags & SLAB_DESTROY_BY_RCU)) { + if (unlikely(s->flags & SLAB_TYPESAFE_BY_RCU)) { struct rcu_head *head; if (need_reserve_slab_rcu) { @@ -2963,7 +2963,7 @@ static __always_inline void slab_free(struct kmem_cache *s, struct page *page, * slab_free_freelist_hook() could have put the items into quarantine. * If so, no need to free them. */ - if (s->flags & SLAB_KASAN && !(s->flags & SLAB_DESTROY_BY_RCU)) + if (s->flags & SLAB_KASAN && !(s->flags & SLAB_TYPESAFE_BY_RCU)) return; do_slab_free(s, page, head, tail, cnt, addr); } @@ -3433,7 +3433,7 @@ static int calculate_sizes(struct kmem_cache *s, int forced_order) * the slab may touch the object after free or before allocation * then we should never poison the object itself. */ - if ((flags & SLAB_POISON) && !(flags & SLAB_DESTROY_BY_RCU) && + if ((flags & SLAB_POISON) && !(flags & SLAB_TYPESAFE_BY_RCU) && !s->ctor) s->flags |= __OBJECT_POISON; else @@ -3455,7 +3455,7 @@ static int calculate_sizes(struct kmem_cache *s, int forced_order) */ s->inuse = size; - if (((flags & (SLAB_DESTROY_BY_RCU | SLAB_POISON)) || + if (((flags & (SLAB_TYPESAFE_BY_RCU | SLAB_POISON)) || s->ctor)) { /* * Relocate free pointer after the object if it is not @@ -3537,7 +3537,7 @@ static int kmem_cache_open(struct kmem_cache *s, unsigned long flags) s->flags = kmem_cache_flags(s->size, flags, s->name, s->ctor); s->reserved = 0; - if (need_reserve_slab_rcu && (s->flags & SLAB_DESTROY_BY_RCU)) + if (need_reserve_slab_rcu && (s->flags & SLAB_TYPESAFE_BY_RCU)) s->reserved = sizeof(struct rcu_head); if (!calculate_sizes(s, -1)) @@ -5042,7 +5042,7 @@ SLAB_ATTR_RO(cache_dma); static ssize_t destroy_by_rcu_show(struct kmem_cache *s, char *buf) { - return sprintf(buf, "%d\n", !!(s->flags & SLAB_DESTROY_BY_RCU)); + return sprintf(buf, "%d\n", !!(s->flags & SLAB_TYPESAFE_BY_RCU)); } SLAB_ATTR_RO(destroy_by_rcu); diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c index 409d0cfd3447..90210a0e3888 100644 --- a/net/dccp/ipv4.c +++ b/net/dccp/ipv4.c @@ -950,7 +950,7 @@ static struct proto dccp_v4_prot = { .orphan_count = &dccp_orphan_count, .max_header = MAX_DCCP_HEADER, .obj_size = sizeof(struct dccp_sock), - .slab_flags = SLAB_DESTROY_BY_RCU, + .slab_flags = SLAB_TYPESAFE_BY_RCU, .rsk_prot = &dccp_request_sock_ops, .twsk_prot = &dccp_timewait_sock_ops, .h.hashinfo = &dccp_hashinfo, diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c index 233b57367758..b4019a5e4551 100644 --- a/net/dccp/ipv6.c +++ b/net/dccp/ipv6.c @@ -1012,7 +1012,7 @@ static struct proto dccp_v6_prot = { .orphan_count = &dccp_orphan_count, .max_header = MAX_DCCP_HEADER, .obj_size = sizeof(struct dccp6_sock), - .slab_flags = SLAB_DESTROY_BY_RCU, + .slab_flags = SLAB_TYPESAFE_BY_RCU, .rsk_prot = &dccp6_request_sock_ops, .twsk_prot = &dccp6_timewait_sock_ops, .h.hashinfo = &dccp_hashinfo, diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 9a89b8deafae..82c89abeb989 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -2398,7 +2398,7 @@ struct proto tcp_prot = { .sysctl_rmem = sysctl_tcp_rmem, .max_header = MAX_TCP_HEADER, .obj_size = sizeof(struct tcp_sock), - .slab_flags = SLAB_DESTROY_BY_RCU, + .slab_flags = SLAB_TYPESAFE_BY_RCU, .twsk_prot = &tcp_timewait_sock_ops, .rsk_prot = &tcp_request_sock_ops, .h.hashinfo = &tcp_hashinfo, diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 60a5295a7de6..bdbc4327ebee 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -1919,7 +1919,7 @@ struct proto tcpv6_prot = { .sysctl_rmem = sysctl_tcp_rmem, .max_header = MAX_TCP_HEADER, .obj_size = sizeof(struct tcp6_sock), - .slab_flags = SLAB_DESTROY_BY_RCU, + .slab_flags = SLAB_TYPESAFE_BY_RCU, .twsk_prot = &tcp6_timewait_sock_ops, .rsk_prot = &tcp6_request_sock_ops, .h.hashinfo = &tcp_hashinfo, diff --git a/net/llc/af_llc.c b/net/llc/af_llc.c index 06186d608a27..d096ca563054 100644 --- a/net/llc/af_llc.c +++ b/net/llc/af_llc.c @@ -142,7 +142,7 @@ static struct proto llc_proto = { .name = "LLC", .owner = THIS_MODULE, .obj_size = sizeof(struct llc_sock), - .slab_flags = SLAB_DESTROY_BY_RCU, + .slab_flags = SLAB_TYPESAFE_BY_RCU, }; /** diff --git a/net/llc/llc_conn.c b/net/llc/llc_conn.c index 8bc5a1bd2d45..9b02c13d258b 100644 --- a/net/llc/llc_conn.c +++ b/net/llc/llc_conn.c @@ -506,7 +506,7 @@ static struct sock *__llc_lookup_established(struct llc_sap *sap, again: sk_nulls_for_each_rcu(rc, node, laddr_hb) { if (llc_estab_match(sap, daddr, laddr, rc)) { - /* Extra checks required by SLAB_DESTROY_BY_RCU */ + /* Extra checks required by SLAB_TYPESAFE_BY_RCU */ if (unlikely(!atomic_inc_not_zero(&rc->sk_refcnt))) goto again; if (unlikely(llc_sk(rc)->sap != sap || @@ -565,7 +565,7 @@ static struct sock *__llc_lookup_listener(struct llc_sap *sap, again: sk_nulls_for_each_rcu(rc, node, laddr_hb) { if (llc_listener_match(sap, laddr, rc)) { - /* Extra checks required by SLAB_DESTROY_BY_RCU */ + /* Extra checks required by SLAB_TYPESAFE_BY_RCU */ if (unlikely(!atomic_inc_not_zero(&rc->sk_refcnt))) goto again; if (unlikely(llc_sk(rc)->sap != sap || diff --git a/net/llc/llc_sap.c b/net/llc/llc_sap.c index 5404d0d195cc..63b6ab056370 100644 --- a/net/llc/llc_sap.c +++ b/net/llc/llc_sap.c @@ -328,7 +328,7 @@ static struct sock *llc_lookup_dgram(struct llc_sap *sap, again: sk_nulls_for_each_rcu(rc, node, laddr_hb) { if (llc_dgram_match(sap, laddr, rc)) { - /* Extra checks required by SLAB_DESTROY_BY_RCU */ + /* Extra checks required by SLAB_TYPESAFE_BY_RCU */ if (unlikely(!atomic_inc_not_zero(&rc->sk_refcnt))) goto again; if (unlikely(llc_sk(rc)->sap != sap || diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index 071b97fcbefb..fdcdac7916b2 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -914,7 +914,7 @@ static unsigned int early_drop_list(struct net *net, continue; /* kill only if still in same netns -- might have moved due to - * SLAB_DESTROY_BY_RCU rules. + * SLAB_TYPESAFE_BY_RCU rules. * * We steal the timer reference. If that fails timer has * already fired or someone else deleted it. Just drop ref @@ -1069,7 +1069,7 @@ __nf_conntrack_alloc(struct net *net, /* * Do not use kmem_cache_zalloc(), as this cache uses - * SLAB_DESTROY_BY_RCU. + * SLAB_TYPESAFE_BY_RCU. */ ct = kmem_cache_alloc(nf_conntrack_cachep, gfp); if (ct == NULL) @@ -1114,7 +1114,7 @@ void nf_conntrack_free(struct nf_conn *ct) struct net *net = nf_ct_net(ct); /* A freed object has refcnt == 0, that's - * the golden rule for SLAB_DESTROY_BY_RCU + * the golden rule for SLAB_TYPESAFE_BY_RCU */ NF_CT_ASSERT(atomic_read(&ct->ct_general.use) == 0); @@ -1878,7 +1878,7 @@ int nf_conntrack_init_start(void) nf_conntrack_cachep = kmem_cache_create("nf_conntrack", sizeof(struct nf_conn), NFCT_INFOMASK + 1, - SLAB_DESTROY_BY_RCU | SLAB_HWCACHE_ALIGN, NULL); + SLAB_TYPESAFE_BY_RCU | SLAB_HWCACHE_ALIGN, NULL); if (!nf_conntrack_cachep) goto err_cachep; diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c index 85837ab90e89..d34bbd6d8f38 100644 --- a/net/smc/af_smc.c +++ b/net/smc/af_smc.c @@ -101,7 +101,7 @@ struct proto smc_proto = { .unhash = smc_unhash_sk, .obj_size = sizeof(struct smc_sock), .h.smc_hash = &smc_v4_hashinfo, - .slab_flags = SLAB_DESTROY_BY_RCU, + .slab_flags = SLAB_TYPESAFE_BY_RCU, }; EXPORT_SYMBOL_GPL(smc_proto); -- cgit v1.2.3 From cf1ef3f0719b4dcb74810ed507e2a2540f9811b4 Mon Sep 17 00:00:00 2001 From: Wei Wang Date: Thu, 20 Apr 2017 14:45:46 -0700 Subject: net/tcp_fastopen: Disable active side TFO in certain scenarios Middlebox firewall issues can potentially cause server's data being blackholed after a successful 3WHS using TFO. Following are the related reports from Apple: https://www.nanog.org/sites/default/files/Paasch_Network_Support.pdf Slide 31 identifies an issue where the client ACK to the server's data sent during a TFO'd handshake is dropped. C ---> syn-data ---> S C <--- syn/ack ----- S C (accept & write) C <---- data ------- S C ----- ACK -> X S [retry and timeout] https://www.ietf.org/proceedings/94/slides/slides-94-tcpm-13.pdf Slide 5 shows a similar situation that the server's data gets dropped after 3WHS. C ---- syn-data ---> S C <--- syn/ack ----- S C ---- ack --------> S S (accept & write) C? X <- data ------ S [retry and timeout] This is the worst failure b/c the client can not detect such behavior to mitigate the situation (such as disabling TFO). Failing to proceed, the application (e.g., SSL library) may simply timeout and retry with TFO again, and the process repeats indefinitely. The proposed solution is to disable active TFO globally under the following circumstances: 1. client side TFO socket detects out of order FIN 2. client side TFO socket receives out of order RST We disable active side TFO globally for 1hr at first. Then if it happens again, we disable it for 2h, then 4h, 8h, ... And we reset the timeout to 1hr if a client side TFO sockets not opened on loopback has successfully received data segs from server. And we examine this condition during close(). The rational behind it is that when such firewall issue happens, application running on the client should eventually close the socket as it is not able to get the data it is expecting. Or application running on the server should close the socket as it is not able to receive any response from client. In both cases, out of order FIN or RST will get received on the client given that the firewall will not block them as no data are in those frames. And we want to disable active TFO globally as it helps if the middle box is very close to the client and most of the connections are likely to fail. Also, add a debug sysctl: tcp_fastopen_blackhole_detect_timeout_sec: the initial timeout to use when firewall blackhole issue happens. This can be set and read. When setting it to 0, it means to disable the active disable logic. Signed-off-by: Wei Wang Acked-by: Yuchung Cheng Acked-by: Neal Cardwell Signed-off-by: David S. Miller --- Documentation/networking/ip-sysctl.txt | 8 +++ include/linux/tcp.h | 1 + include/net/tcp.h | 6 ++ net/ipv4/sysctl_net_ipv4.c | 21 +++++++ net/ipv4/tcp.c | 1 + net/ipv4/tcp_fastopen.c | 101 +++++++++++++++++++++++++++++++++ net/ipv4/tcp_input.c | 23 ++++++-- net/ipv4/tcp_ipv4.c | 3 + 8 files changed, 160 insertions(+), 4 deletions(-) (limited to 'net/ipv4/tcp_ipv4.c') diff --git a/Documentation/networking/ip-sysctl.txt b/Documentation/networking/ip-sysctl.txt index b1c6500e7a8d..974ab47ae53a 100644 --- a/Documentation/networking/ip-sysctl.txt +++ b/Documentation/networking/ip-sysctl.txt @@ -602,6 +602,14 @@ tcp_fastopen - INTEGER Note that that additional client or server features are only effective if the basic support (0x1 and 0x2) are enabled respectively. +tcp_fastopen_blackhole_timeout_sec - INTEGER + Initial time period in second to disable Fastopen on active TCP sockets + when a TFO firewall blackhole issue happens. + This time period will grow exponentially when more blackhole issues + get detected right after Fastopen is re-enabled and will reset to + initial value when the blackhole issue goes away. + By default, it is set to 1hr. + tcp_syn_retries - INTEGER Number of times initial SYNs for an active TCP connection attempt will be retransmitted. Should not be higher than 127. Default value diff --git a/include/linux/tcp.h b/include/linux/tcp.h index cfc2d9506ce8..cbe5b602a2d3 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -233,6 +233,7 @@ struct tcp_sock { u8 syn_data:1, /* SYN includes data */ syn_fastopen:1, /* SYN includes Fast Open option */ syn_fastopen_exp:1,/* SYN includes Fast Open exp. option */ + syn_fastopen_ch:1, /* Active TFO re-enabling probe */ syn_data_acked:1,/* data in SYN is acked by SYN-ACK */ save_syn:1, /* Save headers of SYN packet */ is_cwnd_limited:1;/* forward progress limited by snd_cwnd? */ diff --git a/include/net/tcp.h b/include/net/tcp.h index cc6ae0a95201..c1abc2abbdcb 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -1506,6 +1506,12 @@ struct tcp_fastopen_context { struct rcu_head rcu; }; +extern unsigned int sysctl_tcp_fastopen_blackhole_timeout; +void tcp_fastopen_active_disable(void); +bool tcp_fastopen_active_should_disable(struct sock *sk); +void tcp_fastopen_active_disable_ofo_check(struct sock *sk); +void tcp_fastopen_active_timeout_reset(void); + /* Latencies incurred by various limits for a sender. They are * chronograph-like stats that are mutually exclusive. */ diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index ddac9e64b702..86957e9cd6c6 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -350,6 +350,19 @@ static int proc_udp_early_demux(struct ctl_table *table, int write, return ret; } +static int proc_tfo_blackhole_detect_timeout(struct ctl_table *table, + int write, + void __user *buffer, + size_t *lenp, loff_t *ppos) +{ + int ret; + + ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos); + if (write && ret == 0) + tcp_fastopen_active_timeout_reset(); + return ret; +} + static struct ctl_table ipv4_table[] = { { .procname = "tcp_timestamps", @@ -399,6 +412,14 @@ static struct ctl_table ipv4_table[] = { .maxlen = ((TCP_FASTOPEN_KEY_LENGTH * 2) + 10), .proc_handler = proc_tcp_fastopen_key, }, + { + .procname = "tcp_fastopen_blackhole_timeout_sec", + .data = &sysctl_tcp_fastopen_blackhole_timeout, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_tfo_blackhole_detect_timeout, + .extra1 = &zero, + }, { .procname = "tcp_abort_on_overflow", .data = &sysctl_tcp_abort_on_overflow, diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 04843ae77b9e..efc976ae66ae 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -2296,6 +2296,7 @@ int tcp_disconnect(struct sock *sk, int flags) tcp_clear_xmit_timers(sk); __skb_queue_purge(&sk->sk_receive_queue); tcp_write_queue_purge(sk); + tcp_fastopen_active_disable_ofo_check(sk); skb_rbtree_purge(&tp->out_of_order_queue); inet->inet_dport = 0; diff --git a/net/ipv4/tcp_fastopen.c b/net/ipv4/tcp_fastopen.c index 8ea4e9787f82..ff2d30ffc6f3 100644 --- a/net/ipv4/tcp_fastopen.c +++ b/net/ipv4/tcp_fastopen.c @@ -341,6 +341,13 @@ bool tcp_fastopen_cookie_check(struct sock *sk, u16 *mss, cookie->len = -1; return false; } + + /* Firewall blackhole issue check */ + if (tcp_fastopen_active_should_disable(sk)) { + cookie->len = -1; + return false; + } + if (sysctl_tcp_fastopen & TFO_CLIENT_NO_COOKIE) { cookie->len = -1; return true; @@ -380,3 +387,97 @@ bool tcp_fastopen_defer_connect(struct sock *sk, int *err) return false; } EXPORT_SYMBOL(tcp_fastopen_defer_connect); + +/* + * The following code block is to deal with middle box issues with TFO: + * Middlebox firewall issues can potentially cause server's data being + * blackholed after a successful 3WHS using TFO. + * The proposed solution is to disable active TFO globally under the + * following circumstances: + * 1. client side TFO socket receives out of order FIN + * 2. client side TFO socket receives out of order RST + * We disable active side TFO globally for 1hr at first. Then if it + * happens again, we disable it for 2h, then 4h, 8h, ... + * And we reset the timeout back to 1hr when we see a successful active + * TFO connection with data exchanges. + */ + +/* Default to 1hr */ +unsigned int sysctl_tcp_fastopen_blackhole_timeout __read_mostly = 60 * 60; +static atomic_t tfo_active_disable_times __read_mostly = ATOMIC_INIT(0); +static unsigned long tfo_active_disable_stamp __read_mostly; + +/* Disable active TFO and record current jiffies and + * tfo_active_disable_times + */ +void tcp_fastopen_active_disable(void) +{ + atomic_inc(&tfo_active_disable_times); + tfo_active_disable_stamp = jiffies; +} + +/* Reset tfo_active_disable_times to 0 */ +void tcp_fastopen_active_timeout_reset(void) +{ + atomic_set(&tfo_active_disable_times, 0); +} + +/* Calculate timeout for tfo active disable + * Return true if we are still in the active TFO disable period + * Return false if timeout already expired and we should use active TFO + */ +bool tcp_fastopen_active_should_disable(struct sock *sk) +{ + int tfo_da_times = atomic_read(&tfo_active_disable_times); + int multiplier; + unsigned long timeout; + + if (!tfo_da_times) + return false; + + /* Limit timout to max: 2^6 * initial timeout */ + multiplier = 1 << min(tfo_da_times - 1, 6); + timeout = multiplier * sysctl_tcp_fastopen_blackhole_timeout * HZ; + if (time_before(jiffies, tfo_active_disable_stamp + timeout)) + return true; + + /* Mark check bit so we can check for successful active TFO + * condition and reset tfo_active_disable_times + */ + tcp_sk(sk)->syn_fastopen_ch = 1; + return false; +} + +/* Disable active TFO if FIN is the only packet in the ofo queue + * and no data is received. + * Also check if we can reset tfo_active_disable_times if data is + * received successfully on a marked active TFO sockets opened on + * a non-loopback interface + */ +void tcp_fastopen_active_disable_ofo_check(struct sock *sk) +{ + struct tcp_sock *tp = tcp_sk(sk); + struct rb_node *p; + struct sk_buff *skb; + struct dst_entry *dst; + + if (!tp->syn_fastopen) + return; + + if (!tp->data_segs_in) { + p = rb_first(&tp->out_of_order_queue); + if (p && !rb_next(p)) { + skb = rb_entry(p, struct sk_buff, rbnode); + if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN) { + tcp_fastopen_active_disable(); + return; + } + } + } else if (tp->syn_fastopen_ch && + atomic_read(&tfo_active_disable_times)) { + dst = sk_dst_get(sk); + if (!(dst && dst->dev && (dst->dev->flags & IFF_LOOPBACK))) + tcp_fastopen_active_timeout_reset(); + dst_release(dst); + } +} diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 341f021f02a2..9f342a67dc74 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -5300,8 +5300,16 @@ static bool tcp_validate_incoming(struct sock *sk, struct sk_buff *skb, if (rst_seq_match) tcp_reset(sk); - else + else { + /* Disable TFO if RST is out-of-order + * and no data has been received + * for current active TFO socket + */ + if (tp->syn_fastopen && !tp->data_segs_in && + sk->sk_state == TCP_ESTABLISHED) + tcp_fastopen_active_disable(); tcp_send_challenge_ack(sk, skb); + } goto discard; } @@ -6044,9 +6052,16 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb) break; } - if (tp->linger2 < 0 || - (TCP_SKB_CB(skb)->end_seq != TCP_SKB_CB(skb)->seq && - after(TCP_SKB_CB(skb)->end_seq - th->fin, tp->rcv_nxt))) { + if (tp->linger2 < 0) { + tcp_done(sk); + NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPABORTONDATA); + return 1; + } + if (TCP_SKB_CB(skb)->end_seq != TCP_SKB_CB(skb)->seq && + after(TCP_SKB_CB(skb)->end_seq - th->fin, tp->rcv_nxt)) { + /* Receive out of order FIN after close() */ + if (tp->syn_fastopen && th->fin) + tcp_fastopen_active_disable(); tcp_done(sk); NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPABORTONDATA); return 1; diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 20cbd2f07f28..cbbafe546c0f 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -1855,6 +1855,9 @@ void tcp_v4_destroy_sock(struct sock *sk) /* Cleanup up the write buffer. */ tcp_write_queue_purge(sk); + /* Check if we want to disable active TFO */ + tcp_fastopen_active_disable_ofo_check(sk); + /* Cleans up our, hopefully empty, out_of_order_queue. */ skb_rbtree_purge(&tp->out_of_order_queue); -- cgit v1.2.3 From 84b114b98452c431299d99c135f751659e517acb Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 5 May 2017 06:56:54 -0700 Subject: tcp: randomize timestamps on syncookies Whole point of randomization was to hide server uptime, but an attacker can simply start a syn flood and TCP generates 'old style' timestamps, directly revealing server jiffies value. Also, TSval sent by the server to a particular remote address vary depending on syncookies being sent or not, potentially triggering PAWS drops for innocent clients. Lets implement proper randomization, including for SYNcookies. Also we do not need to export sysctl_tcp_timestamps, since it is not used from a module. In v2, I added Florian feedback and contribution, adding tsoff to tcp_get_cookie_sock(). v3 removed one unused variable in tcp_v4_connect() as Florian spotted. Fixes: 95a22caee396c ("tcp: randomize tcp timestamp offsets for each connection") Signed-off-by: Eric Dumazet Reviewed-by: Florian Westphal Tested-by: Florian Westphal Cc: Yuchung Cheng Signed-off-by: David S. Miller --- include/net/secure_seq.h | 10 ++++++---- include/net/tcp.h | 5 +++-- net/core/secure_seq.c | 31 +++++++++++++++++++------------ net/ipv4/syncookies.c | 12 ++++++++++-- net/ipv4/tcp_input.c | 8 +++----- net/ipv4/tcp_ipv4.c | 32 +++++++++++++++++++------------- net/ipv6/syncookies.c | 10 +++++++++- net/ipv6/tcp_ipv6.c | 32 +++++++++++++++++++------------- 8 files changed, 88 insertions(+), 52 deletions(-) (limited to 'net/ipv4/tcp_ipv4.c') diff --git a/include/net/secure_seq.h b/include/net/secure_seq.h index fe236b3429f0..b94006f6fbdd 100644 --- a/include/net/secure_seq.h +++ b/include/net/secure_seq.h @@ -6,10 +6,12 @@ u32 secure_ipv4_port_ephemeral(__be32 saddr, __be32 daddr, __be16 dport); u32 secure_ipv6_port_ephemeral(const __be32 *saddr, const __be32 *daddr, __be16 dport); -u32 secure_tcp_seq_and_tsoff(__be32 saddr, __be32 daddr, - __be16 sport, __be16 dport, u32 *tsoff); -u32 secure_tcpv6_seq_and_tsoff(const __be32 *saddr, const __be32 *daddr, - __be16 sport, __be16 dport, u32 *tsoff); +u32 secure_tcp_seq(__be32 saddr, __be32 daddr, + __be16 sport, __be16 dport); +u32 secure_tcp_ts_off(__be32 saddr, __be32 daddr); +u32 secure_tcpv6_seq(const __be32 *saddr, const __be32 *daddr, + __be16 sport, __be16 dport); +u32 secure_tcpv6_ts_off(const __be32 *saddr, const __be32 *daddr); u64 secure_dccp_sequence_number(__be32 saddr, __be32 daddr, __be16 sport, __be16 dport); u64 secure_dccpv6_sequence_number(__be32 *saddr, __be32 *daddr, diff --git a/include/net/tcp.h b/include/net/tcp.h index 270e5cc43c99..8c0e5a901d64 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -470,7 +470,7 @@ void inet_sk_rx_dst_set(struct sock *sk, const struct sk_buff *skb); /* From syncookies.c */ struct sock *tcp_get_cookie_sock(struct sock *sk, struct sk_buff *skb, struct request_sock *req, - struct dst_entry *dst); + struct dst_entry *dst, u32 tsoff); int __cookie_v4_check(const struct iphdr *iph, const struct tcphdr *th, u32 cookie); struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb); @@ -1822,7 +1822,8 @@ struct tcp_request_sock_ops { #endif struct dst_entry *(*route_req)(const struct sock *sk, struct flowi *fl, const struct request_sock *req); - __u32 (*init_seq_tsoff)(const struct sk_buff *skb, u32 *tsoff); + u32 (*init_seq)(const struct sk_buff *skb); + u32 (*init_ts_off)(const struct sk_buff *skb); int (*send_synack)(const struct sock *sk, struct dst_entry *dst, struct flowi *fl, struct request_sock *req, struct tcp_fastopen_cookie *foc, diff --git a/net/core/secure_seq.c b/net/core/secure_seq.c index 6bd2f8fb0476..ae35cce3a40d 100644 --- a/net/core/secure_seq.c +++ b/net/core/secure_seq.c @@ -24,9 +24,13 @@ static siphash_key_t ts_secret __read_mostly; static __always_inline void net_secret_init(void) { - net_get_random_once(&ts_secret, sizeof(ts_secret)); net_get_random_once(&net_secret, sizeof(net_secret)); } + +static __always_inline void ts_secret_init(void) +{ + net_get_random_once(&ts_secret, sizeof(ts_secret)); +} #endif #ifdef CONFIG_INET @@ -47,7 +51,7 @@ static u32 seq_scale(u32 seq) #endif #if IS_ENABLED(CONFIG_IPV6) -static u32 secure_tcpv6_ts_off(const __be32 *saddr, const __be32 *daddr) +u32 secure_tcpv6_ts_off(const __be32 *saddr, const __be32 *daddr) { const struct { struct in6_addr saddr; @@ -60,12 +64,14 @@ static u32 secure_tcpv6_ts_off(const __be32 *saddr, const __be32 *daddr) if (sysctl_tcp_timestamps != 1) return 0; + ts_secret_init(); return siphash(&combined, offsetofend(typeof(combined), daddr), &ts_secret); } +EXPORT_SYMBOL(secure_tcpv6_ts_off); -u32 secure_tcpv6_seq_and_tsoff(const __be32 *saddr, const __be32 *daddr, - __be16 sport, __be16 dport, u32 *tsoff) +u32 secure_tcpv6_seq(const __be32 *saddr, const __be32 *daddr, + __be16 sport, __be16 dport) { const struct { struct in6_addr saddr; @@ -78,14 +84,14 @@ u32 secure_tcpv6_seq_and_tsoff(const __be32 *saddr, const __be32 *daddr, .sport = sport, .dport = dport }; - u64 hash; + u32 hash; + net_secret_init(); hash = siphash(&combined, offsetofend(typeof(combined), dport), &net_secret); - *tsoff = secure_tcpv6_ts_off(saddr, daddr); return seq_scale(hash); } -EXPORT_SYMBOL(secure_tcpv6_seq_and_tsoff); +EXPORT_SYMBOL(secure_tcpv6_seq); u32 secure_ipv6_port_ephemeral(const __be32 *saddr, const __be32 *daddr, __be16 dport) @@ -107,11 +113,12 @@ EXPORT_SYMBOL(secure_ipv6_port_ephemeral); #endif #ifdef CONFIG_INET -static u32 secure_tcp_ts_off(__be32 saddr, __be32 daddr) +u32 secure_tcp_ts_off(__be32 saddr, __be32 daddr) { if (sysctl_tcp_timestamps != 1) return 0; + ts_secret_init(); return siphash_2u32((__force u32)saddr, (__force u32)daddr, &ts_secret); } @@ -121,15 +128,15 @@ static u32 secure_tcp_ts_off(__be32 saddr, __be32 daddr) * it would be easy enough to have the former function use siphash_4u32, passing * the arguments as separate u32. */ -u32 secure_tcp_seq_and_tsoff(__be32 saddr, __be32 daddr, - __be16 sport, __be16 dport, u32 *tsoff) +u32 secure_tcp_seq(__be32 saddr, __be32 daddr, + __be16 sport, __be16 dport) { - u64 hash; + u32 hash; + net_secret_init(); hash = siphash_3u32((__force u32)saddr, (__force u32)daddr, (__force u32)sport << 16 | (__force u32)dport, &net_secret); - *tsoff = secure_tcp_ts_off(saddr, daddr); return seq_scale(hash); } diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c index 496b97e17aaf..0257d965f111 100644 --- a/net/ipv4/syncookies.c +++ b/net/ipv4/syncookies.c @@ -16,6 +16,7 @@ #include #include #include +#include #include #include @@ -203,7 +204,7 @@ EXPORT_SYMBOL_GPL(__cookie_v4_check); struct sock *tcp_get_cookie_sock(struct sock *sk, struct sk_buff *skb, struct request_sock *req, - struct dst_entry *dst) + struct dst_entry *dst, u32 tsoff) { struct inet_connection_sock *icsk = inet_csk(sk); struct sock *child; @@ -213,6 +214,7 @@ struct sock *tcp_get_cookie_sock(struct sock *sk, struct sk_buff *skb, NULL, &own_req); if (child) { atomic_set(&req->rsk_refcnt, 1); + tcp_sk(child)->tsoffset = tsoff; sock_rps_save_rxhash(child, skb); inet_csk_reqsk_queue_add(sk, req, child); } else { @@ -292,6 +294,7 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb) struct rtable *rt; __u8 rcv_wscale; struct flowi4 fl4; + u32 tsoff = 0; if (!sock_net(sk)->ipv4.sysctl_tcp_syncookies || !th->ack || th->rst) goto out; @@ -311,6 +314,11 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb) memset(&tcp_opt, 0, sizeof(tcp_opt)); tcp_parse_options(skb, &tcp_opt, 0, NULL); + if (tcp_opt.saw_tstamp && tcp_opt.rcv_tsecr) { + tsoff = secure_tcp_ts_off(ip_hdr(skb)->daddr, ip_hdr(skb)->saddr); + tcp_opt.rcv_tsecr -= tsoff; + } + if (!cookie_timestamp_decode(&tcp_opt)) goto out; @@ -381,7 +389,7 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb) ireq->rcv_wscale = rcv_wscale; ireq->ecn_ok = cookie_ecn_ok(&tcp_opt, sock_net(sk), &rt->dst); - ret = tcp_get_cookie_sock(sk, skb, req, &rt->dst); + ret = tcp_get_cookie_sock(sk, skb, req, &rt->dst, tsoff); /* ip_queue_xmit() depends on our flow being setup * Normal sockets get it right from inet_csk_route_child_sock() */ diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 9739962bfb3f..5a3ad09e2786 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -85,7 +85,6 @@ int sysctl_tcp_dsack __read_mostly = 1; int sysctl_tcp_app_win __read_mostly = 31; int sysctl_tcp_adv_win_scale __read_mostly = 1; EXPORT_SYMBOL(sysctl_tcp_adv_win_scale); -EXPORT_SYMBOL(sysctl_tcp_timestamps); /* rfc5961 challenge ack rate limiting */ int sysctl_tcp_challenge_ack_limit = 1000; @@ -6347,8 +6346,8 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops, if (security_inet_conn_request(sk, skb, req)) goto drop_and_free; - if (isn && tmp_opt.tstamp_ok) - af_ops->init_seq_tsoff(skb, &tcp_rsk(req)->ts_off); + if (tmp_opt.tstamp_ok) + tcp_rsk(req)->ts_off = af_ops->init_ts_off(skb); if (!want_cookie && !isn) { /* Kill the following clause, if you dislike this way. */ @@ -6368,7 +6367,7 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops, goto drop_and_release; } - isn = af_ops->init_seq_tsoff(skb, &tcp_rsk(req)->ts_off); + isn = af_ops->init_seq(skb); } if (!dst) { dst = af_ops->route_req(sk, &fl, req); @@ -6380,7 +6379,6 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops, if (want_cookie) { isn = cookie_init_sequence(af_ops, sk, skb, &req->mss); - tcp_rsk(req)->ts_off = 0; req->cookie_ts = tmp_opt.tstamp_ok; if (!tmp_opt.tstamp_ok) inet_rsk(req)->ecn_ok = 0; diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index cbbafe546c0f..3a51582bef55 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -94,12 +94,18 @@ static int tcp_v4_md5_hash_hdr(char *md5_hash, const struct tcp_md5sig_key *key, struct inet_hashinfo tcp_hashinfo; EXPORT_SYMBOL(tcp_hashinfo); -static u32 tcp_v4_init_seq_and_tsoff(const struct sk_buff *skb, u32 *tsoff) +static u32 tcp_v4_init_seq(const struct sk_buff *skb) { - return secure_tcp_seq_and_tsoff(ip_hdr(skb)->daddr, - ip_hdr(skb)->saddr, - tcp_hdr(skb)->dest, - tcp_hdr(skb)->source, tsoff); + return secure_tcp_seq(ip_hdr(skb)->daddr, + ip_hdr(skb)->saddr, + tcp_hdr(skb)->dest, + tcp_hdr(skb)->source); +} + +static u32 tcp_v4_init_ts_off(const struct sk_buff *skb) +{ + return secure_tcp_ts_off(ip_hdr(skb)->daddr, + ip_hdr(skb)->saddr); } int tcp_twsk_unique(struct sock *sk, struct sock *sktw, void *twp) @@ -145,7 +151,6 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) struct flowi4 *fl4; struct rtable *rt; int err; - u32 seq; struct ip_options_rcu *inet_opt; struct inet_timewait_death_row *tcp_death_row = &sock_net(sk)->ipv4.tcp_death_row; @@ -232,13 +237,13 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) rt = NULL; if (likely(!tp->repair)) { - seq = secure_tcp_seq_and_tsoff(inet->inet_saddr, - inet->inet_daddr, - inet->inet_sport, - usin->sin_port, - &tp->tsoffset); if (!tp->write_seq) - tp->write_seq = seq; + tp->write_seq = secure_tcp_seq(inet->inet_saddr, + inet->inet_daddr, + inet->inet_sport, + usin->sin_port); + tp->tsoffset = secure_tcp_ts_off(inet->inet_saddr, + inet->inet_daddr); } inet->inet_id = tp->write_seq ^ jiffies; @@ -1239,7 +1244,8 @@ static const struct tcp_request_sock_ops tcp_request_sock_ipv4_ops = { .cookie_init_seq = cookie_v4_init_sequence, #endif .route_req = tcp_v4_route_req, - .init_seq_tsoff = tcp_v4_init_seq_and_tsoff, + .init_seq = tcp_v4_init_seq, + .init_ts_off = tcp_v4_init_ts_off, .send_synack = tcp_v4_send_synack, }; diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c index 895ff650db43..5abc3692b901 100644 --- a/net/ipv6/syncookies.c +++ b/net/ipv6/syncookies.c @@ -18,6 +18,7 @@ #include #include #include +#include #include #include @@ -143,6 +144,7 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb) int mss; struct dst_entry *dst; __u8 rcv_wscale; + u32 tsoff = 0; if (!sock_net(sk)->ipv4.sysctl_tcp_syncookies || !th->ack || th->rst) goto out; @@ -162,6 +164,12 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb) memset(&tcp_opt, 0, sizeof(tcp_opt)); tcp_parse_options(skb, &tcp_opt, 0, NULL); + if (tcp_opt.saw_tstamp && tcp_opt.rcv_tsecr) { + tsoff = secure_tcpv6_ts_off(ipv6_hdr(skb)->daddr.s6_addr32, + ipv6_hdr(skb)->saddr.s6_addr32); + tcp_opt.rcv_tsecr -= tsoff; + } + if (!cookie_timestamp_decode(&tcp_opt)) goto out; @@ -242,7 +250,7 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb) ireq->rcv_wscale = rcv_wscale; ireq->ecn_ok = cookie_ecn_ok(&tcp_opt, sock_net(sk), dst); - ret = tcp_get_cookie_sock(sk, skb, req, dst); + ret = tcp_get_cookie_sock(sk, skb, req, dst, tsoff); out: return ret; out_free: diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 8e42e8f54b70..aeb9497b5bb7 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -101,12 +101,18 @@ static void inet6_sk_rx_dst_set(struct sock *sk, const struct sk_buff *skb) } } -static u32 tcp_v6_init_seq_and_tsoff(const struct sk_buff *skb, u32 *tsoff) +static u32 tcp_v6_init_seq(const struct sk_buff *skb) { - return secure_tcpv6_seq_and_tsoff(ipv6_hdr(skb)->daddr.s6_addr32, - ipv6_hdr(skb)->saddr.s6_addr32, - tcp_hdr(skb)->dest, - tcp_hdr(skb)->source, tsoff); + return secure_tcpv6_seq(ipv6_hdr(skb)->daddr.s6_addr32, + ipv6_hdr(skb)->saddr.s6_addr32, + tcp_hdr(skb)->dest, + tcp_hdr(skb)->source); +} + +static u32 tcp_v6_init_ts_off(const struct sk_buff *skb) +{ + return secure_tcpv6_ts_off(ipv6_hdr(skb)->daddr.s6_addr32, + ipv6_hdr(skb)->saddr.s6_addr32); } static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr, @@ -122,7 +128,6 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr, struct flowi6 fl6; struct dst_entry *dst; int addr_type; - u32 seq; int err; struct inet_timewait_death_row *tcp_death_row = &sock_net(sk)->ipv4.tcp_death_row; @@ -282,13 +287,13 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr, sk_set_txhash(sk); if (likely(!tp->repair)) { - seq = secure_tcpv6_seq_and_tsoff(np->saddr.s6_addr32, - sk->sk_v6_daddr.s6_addr32, - inet->inet_sport, - inet->inet_dport, - &tp->tsoffset); if (!tp->write_seq) - tp->write_seq = seq; + tp->write_seq = secure_tcpv6_seq(np->saddr.s6_addr32, + sk->sk_v6_daddr.s6_addr32, + inet->inet_sport, + inet->inet_dport); + tp->tsoffset = secure_tcpv6_ts_off(np->saddr.s6_addr32, + sk->sk_v6_daddr.s6_addr32); } if (tcp_fastopen_defer_connect(sk, &err)) @@ -749,7 +754,8 @@ static const struct tcp_request_sock_ops tcp_request_sock_ipv6_ops = { .cookie_init_seq = cookie_v6_init_sequence, #endif .route_req = tcp_v6_route_req, - .init_seq_tsoff = tcp_v6_init_seq_and_tsoff, + .init_seq = tcp_v6_init_seq, + .init_ts_off = tcp_v6_init_ts_off, .send_synack = tcp_v6_send_synack, }; -- cgit v1.2.3