From bdaae17da81db79b9aa4dfbf43305cfeef64f6a8 Mon Sep 17 00:00:00 2001 From: Ilpo Järvinen Date: Wed, 21 Feb 2007 22:59:58 -0800 Subject: [TCP] FRTO: Moved tcp_use_frto from tcp.h to tcp_input.c MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In addition, removed inline. Signed-off-by: Ilpo Järvinen Signed-off-by: David S. Miller --- include/net/tcp.h | 14 +------------- 1 file changed, 1 insertion(+), 13 deletions(-) (limited to 'include/net/tcp.h') diff --git a/include/net/tcp.h b/include/net/tcp.h index 5c472f255b77..572a77bb6907 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -341,6 +341,7 @@ extern struct sock * tcp_check_req(struct sock *sk,struct sk_buff *skb, extern int tcp_child_process(struct sock *parent, struct sock *child, struct sk_buff *skb); +extern int tcp_use_frto(const struct sock *sk); extern void tcp_enter_frto(struct sock *sk); extern void tcp_enter_loss(struct sock *sk, int how); extern void tcp_clear_retrans(struct tcp_sock *tp); @@ -1033,19 +1034,6 @@ static inline int tcp_paws_check(const struct tcp_options_received *rx_opt, int #define TCP_CHECK_TIMER(sk) do { } while (0) -static inline int tcp_use_frto(const struct sock *sk) -{ - const struct tcp_sock *tp = tcp_sk(sk); - - /* F-RTO must be activated in sysctl and there must be some - * unsent new data, and the advertised window should allow - * sending it. - */ - return (sysctl_tcp_frto && sk->sk_send_head && - !after(TCP_SKB_CB(sk->sk_send_head)->end_seq, - tp->snd_una + tp->snd_wnd)); -} - static inline void tcp_mib_init(void) { /* See RFC 2012 */ -- cgit v1.2.3 From 46d0de4ed92650b95f27acae09914996bbe624e7 Mon Sep 17 00:00:00 2001 From: Ilpo Järvinen Date: Wed, 21 Feb 2007 23:10:39 -0800 Subject: [TCP] FRTO: Entry is allowed only during (New)Reno like recovery MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This interpretation comes from RFC4138: "If the sender implements some loss recovery algorithm other than Reno or NewReno [FHG04], the F-RTO algorithm SHOULD NOT be entered when earlier fast recovery is underway." I think the RFC means to say (especially in the light of Appendix B) that ...recovery is underway (not just fast recovery) or was underway when it was interrupted by an earlier (F-)RTO that hasn't yet been resolved (snd_una has not advanced enough). Thus, my interpretation is that whenever TCP has ever retransmitted other than head, basic version cannot be used because then the order assumptions which are used as FRTO basis do not hold. NewReno has only the head segment retransmitted at a time. Therefore, walk up to the segment that has not been SACKed, if that segment is not retransmitted nor anything before it, we know for sure, that nothing after the non-SACKed segment should be either. This assumption is valid because TCPCB_EVER_RETRANS does not leave holes but each non-SACKed segment is rexmitted in-order. Check for retrans_out > 1 avoids more expensive walk through the skb list, as we can know the result beforehand: F-RTO will not be allowed. SACKed skb can turn into non-SACked only in the extremely rare case of SACK reneging, in this case we might fail to detect retransmissions if there were them for any other than head. To get rid of that feature, whole rexmit queue would have to be walked (always) or FRTO should be prevented when SACK reneging happens. Of course RTO should still trigger after reneging which makes this issue even less likely to show up. And as long as the response is as conservative as it's now, nothing bad happens even then. Signed-off-by: Ilpo Järvinen Signed-off-by: David S. Miller --- include/net/tcp.h | 2 +- net/ipv4/tcp_input.c | 25 +++++++++++++++++++++---- 2 files changed, 22 insertions(+), 5 deletions(-) (limited to 'include/net/tcp.h') diff --git a/include/net/tcp.h b/include/net/tcp.h index 572a77bb6907..7fd6b77519c3 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -341,7 +341,7 @@ extern struct sock * tcp_check_req(struct sock *sk,struct sk_buff *skb, extern int tcp_child_process(struct sock *parent, struct sock *child, struct sk_buff *skb); -extern int tcp_use_frto(const struct sock *sk); +extern int tcp_use_frto(struct sock *sk); extern void tcp_enter_frto(struct sock *sk); extern void tcp_enter_loss(struct sock *sk, int how); extern void tcp_clear_retrans(struct tcp_sock *tp); diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 723cee63791f..a283fc12186e 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -1239,14 +1239,31 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_ /* F-RTO can only be used if these conditions are satisfied: * - there must be some unsent new data * - the advertised window should allow sending it + * - TCP has never retransmitted anything other than head */ -int tcp_use_frto(const struct sock *sk) +int tcp_use_frto(struct sock *sk) { const struct tcp_sock *tp = tcp_sk(sk); + struct sk_buff *skb; + + if (!sysctl_tcp_frto || !sk->sk_send_head || + after(TCP_SKB_CB(sk->sk_send_head)->end_seq, + tp->snd_una + tp->snd_wnd)) + return 0; - return (sysctl_tcp_frto && sk->sk_send_head && - !after(TCP_SKB_CB(sk->sk_send_head)->end_seq, - tp->snd_una + tp->snd_wnd)); + /* Avoid expensive walking of rexmit queue if possible */ + if (tp->retrans_out > 1) + return 0; + + skb = skb_peek(&sk->sk_write_queue)->next; /* Skips head */ + sk_stream_for_retrans_queue_from(skb, sk) { + if (TCP_SKB_CB(skb)->sacked&TCPCB_RETRANS) + return 0; + /* Short-circuit when first non-SACKed skb has been checked */ + if (!(TCP_SKB_CB(skb)->sacked&TCPCB_SACKED_ACKED)) + break; + } + return 1; } /* RTO occurred, but do not yet enter Loss state. Instead, defer RTO -- cgit v1.2.3 From 886236c1247ab5e2ad9c73f6e9a652e3ae3c8b07 Mon Sep 17 00:00:00 2001 From: John Heffner Date: Sun, 25 Mar 2007 19:21:45 -0700 Subject: [TCP]: Add RFC3742 Limited Slow-Start, controlled by variable sysctl_tcp_max_ssthresh. Signed-off-by: John Heffner Signed-off-by: David S. Miller --- include/linux/sysctl.h | 1 + include/net/tcp.h | 1 + net/ipv4/sysctl_net_ipv4.c | 8 ++++++++ net/ipv4/tcp_cong.c | 31 ++++++++++++++++++++++--------- 4 files changed, 32 insertions(+), 9 deletions(-) (limited to 'include/net/tcp.h') diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h index 9a8970bf99a6..98e0fd241a25 100644 --- a/include/linux/sysctl.h +++ b/include/linux/sysctl.h @@ -438,6 +438,7 @@ enum NET_CIPSOV4_RBM_STRICTVALID=121, NET_TCP_AVAIL_CONG_CONTROL=122, NET_TCP_ALLOWED_CONG_CONTROL=123, + NET_TCP_MAX_SSTHRESH=124, }; enum { diff --git a/include/net/tcp.h b/include/net/tcp.h index 7fd6b77519c3..6d09f5085f6a 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -230,6 +230,7 @@ extern int sysctl_tcp_mtu_probing; extern int sysctl_tcp_base_mss; extern int sysctl_tcp_workaround_signed_windows; extern int sysctl_tcp_slow_start_after_idle; +extern int sysctl_tcp_max_ssthresh; extern atomic_t tcp_memory_allocated; extern atomic_t tcp_sockets_allocated; diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index 0aa304711a96..d68effe98e8d 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -803,6 +803,14 @@ ctl_table ipv4_table[] = { .proc_handler = &proc_allowed_congestion_control, .strategy = &strategy_allowed_congestion_control, }, + { + .ctl_name = NET_TCP_MAX_SSTHRESH, + .procname = "tcp_max_ssthresh", + .data = &sysctl_tcp_max_ssthresh, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = &proc_dointvec, + }, { .ctl_name = 0 } }; diff --git a/net/ipv4/tcp_cong.c b/net/ipv4/tcp_cong.c index 34ae3f13483a..ccd88407e0cd 100644 --- a/net/ipv4/tcp_cong.c +++ b/net/ipv4/tcp_cong.c @@ -12,6 +12,8 @@ #include #include +int sysctl_tcp_max_ssthresh = 0; + static DEFINE_SPINLOCK(tcp_cong_list_lock); static LIST_HEAD(tcp_cong_list); @@ -274,10 +276,13 @@ int tcp_set_congestion_control(struct sock *sk, const char *name) /* - * Linear increase during slow start + * Slow start (exponential increase) with + * RFC3742 Limited Slow Start (fast linear increase) support. */ void tcp_slow_start(struct tcp_sock *tp) { + int cnt = 0; + if (sysctl_tcp_abc) { /* RFC3465: Slow Start * TCP sender SHOULD increase cwnd by the number of @@ -286,17 +291,25 @@ void tcp_slow_start(struct tcp_sock *tp) */ if (tp->bytes_acked < tp->mss_cache) return; - - /* We MAY increase by 2 if discovered delayed ack */ - if (sysctl_tcp_abc > 1 && tp->bytes_acked >= 2*tp->mss_cache) { - if (tp->snd_cwnd < tp->snd_cwnd_clamp) - tp->snd_cwnd++; - } } + + if (sysctl_tcp_max_ssthresh > 0 && + tp->snd_cwnd > sysctl_tcp_max_ssthresh) + cnt += sysctl_tcp_max_ssthresh>>1; + else + cnt += tp->snd_cwnd; + + /* RFC3465: We MAY increase by 2 if discovered delayed ack */ + if (sysctl_tcp_abc > 1 && tp->bytes_acked >= 2*tp->mss_cache) + cnt <<= 1; tp->bytes_acked = 0; - if (tp->snd_cwnd < tp->snd_cwnd_clamp) - tp->snd_cwnd++; + tp->snd_cwnd_cnt += cnt; + while (tp->snd_cwnd_cnt >= tp->snd_cwnd) { + tp->snd_cwnd_cnt -= tp->snd_cwnd; + if (tp->snd_cwnd < tp->snd_cwnd_clamp) + tp->snd_cwnd++; + } } EXPORT_SYMBOL_GPL(tcp_slow_start); -- cgit v1.2.3 From 3cfe3baaf07c9e40a75f9a70662de56df1c246a8 Mon Sep 17 00:00:00 2001 From: Ilpo Järvinen Date: Tue, 27 Feb 2007 10:09:49 -0800 Subject: [TCP]: Add two new spurious RTO responses to FRTO MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit New sysctl tcp_frto_response is added to select amongst these responses: - Rate halving based; reuses CA_CWR state (default) - Very conservative; used to be the only one available (=1) - Undo cwr; undoes ssthresh and cwnd reductions (=2) The response with rate halving requires a new parameter to tcp_enter_cwr because FRTO has already reduced ssthresh and doing a second reduction there has to be prevented. In addition, to keep things nice on 80 cols screen, a local variable was added. Signed-off-by: Ilpo Järvinen Signed-off-by: David S. Miller --- include/linux/sysctl.h | 1 + include/net/tcp.h | 3 ++- net/ipv4/sysctl_net_ipv4.c | 8 ++++++++ net/ipv4/tcp_input.c | 36 ++++++++++++++++++++++++++++++++---- net/ipv4/tcp_output.c | 2 +- 5 files changed, 44 insertions(+), 6 deletions(-) (limited to 'include/net/tcp.h') diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h index 98e0fd241a25..c9ccb550206f 100644 --- a/include/linux/sysctl.h +++ b/include/linux/sysctl.h @@ -439,6 +439,7 @@ enum NET_TCP_AVAIL_CONG_CONTROL=122, NET_TCP_ALLOWED_CONG_CONTROL=123, NET_TCP_MAX_SSTHRESH=124, + NET_TCP_FRTO_RESPONSE=125, }; enum { diff --git a/include/net/tcp.h b/include/net/tcp.h index 6d09f5085f6a..f0c9e3400a09 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -220,6 +220,7 @@ extern int sysctl_tcp_app_win; extern int sysctl_tcp_adv_win_scale; extern int sysctl_tcp_tw_reuse; extern int sysctl_tcp_frto; +extern int sysctl_tcp_frto_response; extern int sysctl_tcp_low_latency; extern int sysctl_tcp_dma_copybreak; extern int sysctl_tcp_nometrics_save; @@ -738,7 +739,7 @@ static inline void tcp_sync_left_out(struct tcp_sock *tp) tp->left_out = tp->sacked_out + tp->lost_out; } -extern void tcp_enter_cwr(struct sock *sk); +extern void tcp_enter_cwr(struct sock *sk, const int set_ssthresh); extern __u32 tcp_init_cwnd(struct tcp_sock *tp, struct dst_entry *dst); /* Slow start with delack produces 3 packets of burst, so that diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index d68effe98e8d..6817d6485df5 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -646,6 +646,14 @@ ctl_table ipv4_table[] = { .mode = 0644, .proc_handler = &proc_dointvec }, + { + .ctl_name = NET_TCP_FRTO_RESPONSE, + .procname = "tcp_frto_response", + .data = &sysctl_tcp_frto_response, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = &proc_dointvec + }, { .ctl_name = NET_TCP_LOW_LATENCY, .procname = "tcp_low_latency", diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index f6ba07f0d816..322e43c56461 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -86,6 +86,7 @@ int sysctl_tcp_stdurg __read_mostly; int sysctl_tcp_rfc1337 __read_mostly; int sysctl_tcp_max_orphans __read_mostly = NR_FILE; int sysctl_tcp_frto __read_mostly; +int sysctl_tcp_frto_response __read_mostly; int sysctl_tcp_nometrics_save __read_mostly; int sysctl_tcp_moderate_rcvbuf __read_mostly = 1; @@ -762,15 +763,17 @@ __u32 tcp_init_cwnd(struct tcp_sock *tp, struct dst_entry *dst) } /* Set slow start threshold and cwnd not falling to slow start */ -void tcp_enter_cwr(struct sock *sk) +void tcp_enter_cwr(struct sock *sk, const int set_ssthresh) { struct tcp_sock *tp = tcp_sk(sk); + const struct inet_connection_sock *icsk = inet_csk(sk); tp->prior_ssthresh = 0; tp->bytes_acked = 0; if (inet_csk(sk)->icsk_ca_state < TCP_CA_CWR) { tp->undo_marker = 0; - tp->snd_ssthresh = inet_csk(sk)->icsk_ca_ops->ssthresh(sk); + if (set_ssthresh) + tp->snd_ssthresh = icsk->icsk_ca_ops->ssthresh(sk); tp->snd_cwnd = min(tp->snd_cwnd, tcp_packets_in_flight(tp) + 1U); tp->snd_cwnd_cnt = 0; @@ -2003,7 +2006,7 @@ static void tcp_try_to_open(struct sock *sk, struct tcp_sock *tp, int flag) tp->retrans_stamp = 0; if (flag&FLAG_ECE) - tcp_enter_cwr(sk); + tcp_enter_cwr(sk, 1); if (inet_csk(sk)->icsk_ca_state != TCP_CA_CWR) { int state = TCP_CA_Open; @@ -2579,6 +2582,21 @@ static void tcp_conservative_spur_to_response(struct tcp_sock *tp) tcp_moderate_cwnd(tp); } +/* A conservative spurious RTO response algorithm: reduce cwnd using + * rate halving and continue in congestion avoidance. + */ +static void tcp_ratehalving_spur_to_response(struct sock *sk) +{ + struct tcp_sock *tp = tcp_sk(sk); + tcp_enter_cwr(sk, 0); + tp->high_seq = tp->frto_highmark; /* Smoother w/o this? - ij */ +} + +static void tcp_undo_spur_to_response(struct sock *sk) +{ + tcp_undo_cwr(sk, 1); +} + /* F-RTO spurious RTO detection algorithm (RFC4138) * * F-RTO affects during two new ACKs following RTO (well, almost, see inline @@ -2661,7 +2679,17 @@ static int tcp_process_frto(struct sock *sk, u32 prior_snd_una, int flag) tp->frto_counter = 2; return 1; } else /* frto_counter == 2 */ { - tcp_conservative_spur_to_response(tp); + switch (sysctl_tcp_frto_response) { + case 2: + tcp_undo_spur_to_response(sk); + break; + case 1: + tcp_conservative_spur_to_response(tp); + break; + default: + tcp_ratehalving_spur_to_response(sk); + break; + }; tp->frto_counter = 0; } return 0; diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 3c24881f2a65..d19b2f3b70fd 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -545,7 +545,7 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it, if (likely(err <= 0)) return err; - tcp_enter_cwr(sk); + tcp_enter_cwr(sk, 1); return net_xmit_eval(err); -- cgit v1.2.3 From 9d729f72dca9406025bcfa9c1f660d71d9ef0ff5 Mon Sep 17 00:00:00 2001 From: James Morris Date: Sun, 4 Mar 2007 16:12:44 -0800 Subject: [NET]: Convert xtime.tv_sec to get_seconds() Where appropriate, convert references to xtime.tv_sec to the get_seconds() helper function. Signed-off-by: James Morris Signed-off-by: David S. Miller --- include/net/tcp.h | 4 ++-- net/ipv4/route.c | 2 +- net/ipv4/tcp_input.c | 6 +++--- net/ipv4/tcp_ipv4.c | 10 +++++----- net/ipv4/tcp_minisocks.c | 8 ++++---- net/ipv6/xfrm6_output.c | 2 +- net/rxrpc/main.c | 2 +- net/xfrm/xfrm_policy.c | 12 ++++++------ net/xfrm/xfrm_state.c | 6 +++--- 9 files changed, 26 insertions(+), 26 deletions(-) (limited to 'include/net/tcp.h') diff --git a/include/net/tcp.h b/include/net/tcp.h index f0c9e3400a09..181c0600af1c 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -1014,7 +1014,7 @@ static inline int tcp_paws_check(const struct tcp_options_received *rx_opt, int { if ((s32)(rx_opt->rcv_tsval - rx_opt->ts_recent) >= 0) return 0; - if (xtime.tv_sec >= rx_opt->ts_recent_stamp + TCP_PAWS_24DAYS) + if (get_seconds() >= rx_opt->ts_recent_stamp + TCP_PAWS_24DAYS) return 0; /* RST segments are not recommended to carry timestamp, @@ -1029,7 +1029,7 @@ static inline int tcp_paws_check(const struct tcp_options_received *rx_opt, int However, we can relax time bounds for RST segments to MSL. */ - if (rst && xtime.tv_sec >= rx_opt->ts_recent_stamp + TCP_PAWS_MSL) + if (rst && get_seconds() >= rx_opt->ts_recent_stamp + TCP_PAWS_MSL) return 0; return 1; } diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 37e0d4d5cf94..0b3d7bf40f4e 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -2683,7 +2683,7 @@ static int rt_fill_info(struct sk_buff *skb, u32 pid, u32 seq, int event, id = rt->peer->ip_id_count; if (rt->peer->tcp_ts_stamp) { ts = rt->peer->tcp_ts; - tsage = xtime.tv_sec - rt->peer->tcp_ts_stamp; + tsage = get_seconds() - rt->peer->tcp_ts_stamp; } } diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index d894bbcc1d24..d0a3630f41a7 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -2933,7 +2933,7 @@ static int tcp_fast_parse_options(struct sk_buff *skb, struct tcphdr *th, static inline void tcp_store_ts_recent(struct tcp_sock *tp) { tp->rx_opt.ts_recent = tp->rx_opt.rcv_tsval; - tp->rx_opt.ts_recent_stamp = xtime.tv_sec; + tp->rx_opt.ts_recent_stamp = get_seconds(); } static inline void tcp_replace_ts_recent(struct tcp_sock *tp, u32 seq) @@ -2947,7 +2947,7 @@ static inline void tcp_replace_ts_recent(struct tcp_sock *tp, u32 seq) */ if((s32)(tp->rx_opt.rcv_tsval - tp->rx_opt.ts_recent) >= 0 || - xtime.tv_sec >= tp->rx_opt.ts_recent_stamp + TCP_PAWS_24DAYS) + get_seconds() >= tp->rx_opt.ts_recent_stamp + TCP_PAWS_24DAYS) tcp_store_ts_recent(tp); } } @@ -2999,7 +2999,7 @@ static inline int tcp_paws_discard(const struct sock *sk, const struct sk_buff * { const struct tcp_sock *tp = tcp_sk(sk); return ((s32)(tp->rx_opt.ts_recent - tp->rx_opt.rcv_tsval) > TCP_PAWS_WINDOW && - xtime.tv_sec < tp->rx_opt.ts_recent_stamp + TCP_PAWS_24DAYS && + get_seconds() < tp->rx_opt.ts_recent_stamp + TCP_PAWS_24DAYS && !tcp_disordered_ack(sk, skb)); } diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index f6793b4cc669..addac1110f94 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -149,7 +149,7 @@ int tcp_twsk_unique(struct sock *sk, struct sock *sktw, void *twp) */ if (tcptw->tw_ts_recent_stamp && (twp == NULL || (sysctl_tcp_tw_reuse && - xtime.tv_sec - tcptw->tw_ts_recent_stamp > 1))) { + get_seconds() - tcptw->tw_ts_recent_stamp > 1))) { tp->write_seq = tcptw->tw_snd_nxt + 65535 + 2; if (tp->write_seq == 0) tp->write_seq = 1; @@ -224,7 +224,7 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) * when trying new connection. */ if (peer != NULL && - peer->tcp_ts_stamp + TCP_PAWS_MSL >= xtime.tv_sec) { + peer->tcp_ts_stamp + TCP_PAWS_MSL >= get_seconds()) { tp->rx_opt.ts_recent_stamp = peer->tcp_ts_stamp; tp->rx_opt.ts_recent = peer->tcp_ts; } @@ -1351,7 +1351,7 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb) (dst = inet_csk_route_req(sk, req)) != NULL && (peer = rt_get_peer((struct rtable *)dst)) != NULL && peer->v4daddr == saddr) { - if (xtime.tv_sec < peer->tcp_ts_stamp + TCP_PAWS_MSL && + if (get_seconds() < peer->tcp_ts_stamp + TCP_PAWS_MSL && (s32)(peer->tcp_ts - req->ts_recent) > TCP_PAWS_WINDOW) { NET_INC_STATS_BH(LINUX_MIB_PAWSPASSIVEREJECTED); @@ -1770,7 +1770,7 @@ int tcp_v4_remember_stamp(struct sock *sk) if (peer) { if ((s32)(peer->tcp_ts - tp->rx_opt.ts_recent) <= 0 || - (peer->tcp_ts_stamp + TCP_PAWS_MSL < xtime.tv_sec && + (peer->tcp_ts_stamp + TCP_PAWS_MSL < get_seconds() && peer->tcp_ts_stamp <= tp->rx_opt.ts_recent_stamp)) { peer->tcp_ts_stamp = tp->rx_opt.ts_recent_stamp; peer->tcp_ts = tp->rx_opt.ts_recent; @@ -1791,7 +1791,7 @@ int tcp_v4_tw_remember_stamp(struct inet_timewait_sock *tw) const struct tcp_timewait_sock *tcptw = tcp_twsk((struct sock *)tw); if ((s32)(peer->tcp_ts - tcptw->tw_ts_recent) <= 0 || - (peer->tcp_ts_stamp + TCP_PAWS_MSL < xtime.tv_sec && + (peer->tcp_ts_stamp + TCP_PAWS_MSL < get_seconds() && peer->tcp_ts_stamp <= tcptw->tw_ts_recent_stamp)) { peer->tcp_ts_stamp = tcptw->tw_ts_recent_stamp; peer->tcp_ts = tcptw->tw_ts_recent; diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index 706932726a11..ac4ce48a6599 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -149,7 +149,7 @@ kill_with_rst: tw->tw_substate = TCP_TIME_WAIT; tcptw->tw_rcv_nxt = TCP_SKB_CB(skb)->end_seq; if (tmp_opt.saw_tstamp) { - tcptw->tw_ts_recent_stamp = xtime.tv_sec; + tcptw->tw_ts_recent_stamp = get_seconds(); tcptw->tw_ts_recent = tmp_opt.rcv_tsval; } @@ -208,7 +208,7 @@ kill: if (tmp_opt.saw_tstamp) { tcptw->tw_ts_recent = tmp_opt.rcv_tsval; - tcptw->tw_ts_recent_stamp = xtime.tv_sec; + tcptw->tw_ts_recent_stamp = get_seconds(); } inet_twsk_put(tw); @@ -458,7 +458,7 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct request_sock *req, if (newtp->rx_opt.tstamp_ok) { newtp->rx_opt.ts_recent = req->ts_recent; - newtp->rx_opt.ts_recent_stamp = xtime.tv_sec; + newtp->rx_opt.ts_recent_stamp = get_seconds(); newtp->tcp_header_len = sizeof(struct tcphdr) + TCPOLEN_TSTAMP_ALIGNED; } else { newtp->rx_opt.ts_recent_stamp = 0; @@ -504,7 +504,7 @@ struct sock *tcp_check_req(struct sock *sk,struct sk_buff *skb, * it can be estimated (approximately) * from another data. */ - tmp_opt.ts_recent_stamp = xtime.tv_sec - ((TCP_TIMEOUT_INIT/HZ)<retrans); + tmp_opt.ts_recent_stamp = get_seconds() - ((TCP_TIMEOUT_INIT/HZ)<retrans); paws_reject = tcp_paws_check(&tmp_opt, th->rst); } } diff --git a/net/ipv6/xfrm6_output.c b/net/ipv6/xfrm6_output.c index d6d786b89d2b..8e4170f9a0da 100644 --- a/net/ipv6/xfrm6_output.c +++ b/net/ipv6/xfrm6_output.c @@ -76,7 +76,7 @@ static int xfrm6_output_one(struct sk_buff *skb) x->curlft.bytes += skb->len; x->curlft.packets++; if (x->props.mode == XFRM_MODE_ROUTEOPTIMIZATION) - x->lastused = (u64)xtime.tv_sec; + x->lastused = get_seconds(); spin_unlock_bh(&x->lock); diff --git a/net/rxrpc/main.c b/net/rxrpc/main.c index baec1f7fd8b9..cead31b5bdf5 100644 --- a/net/rxrpc/main.c +++ b/net/rxrpc/main.c @@ -37,7 +37,7 @@ static int __init rxrpc_initialise(void) int ret; /* my epoch value */ - rxrpc_epoch = htonl(xtime.tv_sec); + rxrpc_epoch = htonl(get_seconds()); /* register the /proc interface */ #ifdef CONFIG_PROC_FS diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index 785c3e39f062..194257554553 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -268,7 +268,7 @@ static inline unsigned long make_jiffies(long secs) static void xfrm_policy_timer(unsigned long data) { struct xfrm_policy *xp = (struct xfrm_policy*)data; - unsigned long now = (unsigned long)xtime.tv_sec; + unsigned long now = get_seconds(); long next = LONG_MAX; int warn = 0; int dir; @@ -690,7 +690,7 @@ int xfrm_policy_insert(int dir, struct xfrm_policy *policy, int excl) } policy->index = delpol ? delpol->index : xfrm_gen_index(policy->type, dir); hlist_add_head(&policy->byidx, xfrm_policy_byidx+idx_hash(policy->index)); - policy->curlft.add_time = (unsigned long)xtime.tv_sec; + policy->curlft.add_time = get_seconds(); policy->curlft.use_time = 0; if (!mod_timer(&policy->timer, jiffies + HZ)) xfrm_pol_hold(policy); @@ -1133,7 +1133,7 @@ int xfrm_sk_policy_insert(struct sock *sk, int dir, struct xfrm_policy *pol) old_pol = sk->sk_policy[dir]; sk->sk_policy[dir] = pol; if (pol) { - pol->curlft.add_time = (unsigned long)xtime.tv_sec; + pol->curlft.add_time = get_seconds(); pol->index = xfrm_gen_index(pol->type, XFRM_POLICY_MAX+dir); __xfrm_policy_link(pol, XFRM_POLICY_MAX+dir); } @@ -1386,7 +1386,7 @@ restart: return 0; family = dst_orig->ops->family; - policy->curlft.use_time = (unsigned long)xtime.tv_sec; + policy->curlft.use_time = get_seconds(); pols[0] = policy; npols ++; xfrm_nr += pols[0]->xfrm_nr; @@ -1682,7 +1682,7 @@ int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb, return 1; } - pol->curlft.use_time = (unsigned long)xtime.tv_sec; + pol->curlft.use_time = get_seconds(); pols[0] = pol; npols ++; @@ -1694,7 +1694,7 @@ int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb, if (pols[1]) { if (IS_ERR(pols[1])) return 0; - pols[1]->curlft.use_time = (unsigned long)xtime.tv_sec; + pols[1]->curlft.use_time = get_seconds(); npols ++; } } diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index e3a0bcfa5df1..63a20e818164 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -233,7 +233,7 @@ static inline unsigned long make_jiffies(long secs) static void xfrm_timer_handler(unsigned long data) { struct xfrm_state *x = (struct xfrm_state*)data; - unsigned long now = (unsigned long)xtime.tv_sec; + unsigned long now = get_seconds(); long next = LONG_MAX; int warn = 0; int err = 0; @@ -326,7 +326,7 @@ struct xfrm_state *xfrm_state_alloc(void) init_timer(&x->rtimer); x->rtimer.function = xfrm_replay_timer_handler; x->rtimer.data = (unsigned long)x; - x->curlft.add_time = (unsigned long)xtime.tv_sec; + x->curlft.add_time = get_seconds(); x->lft.soft_byte_limit = XFRM_INF; x->lft.soft_packet_limit = XFRM_INF; x->lft.hard_byte_limit = XFRM_INF; @@ -1051,7 +1051,7 @@ EXPORT_SYMBOL(xfrm_state_update); int xfrm_state_check_expire(struct xfrm_state *x) { if (!x->curlft.use_time) - x->curlft.use_time = (unsigned long)xtime.tv_sec; + x->curlft.use_time = get_seconds(); if (x->km.state != XFRM_STATE_VALID) return -EINVAL; -- cgit v1.2.3 From fe067e8ab5e0dc5ca3c54634924c628da92090b4 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Wed, 7 Mar 2007 12:12:44 -0800 Subject: [TCP]: Abstract out all write queue operations. This allows the write queue implementation to be changed, for example, to one which allows fast interval searching. Signed-off-by: David S. Miller --- include/net/sock.h | 21 ---------- include/net/tcp.h | 114 ++++++++++++++++++++++++++++++++++++++++++++++++++ net/ipv4/tcp.c | 32 +++++++------- net/ipv4/tcp_input.c | 64 ++++++++++++++++++---------- net/ipv4/tcp_ipv4.c | 2 +- net/ipv4/tcp_output.c | 95 +++++++++++++++++++---------------------- net/ipv4/tcp_timer.c | 10 ++--- 7 files changed, 221 insertions(+), 117 deletions(-) (limited to 'include/net/tcp.h') diff --git a/include/net/sock.h b/include/net/sock.h index 9583639090d2..2974bacc8850 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -710,15 +710,6 @@ static inline void sk_stream_mem_reclaim(struct sock *sk) __sk_stream_mem_reclaim(sk); } -static inline void sk_stream_writequeue_purge(struct sock *sk) -{ - struct sk_buff *skb; - - while ((skb = __skb_dequeue(&sk->sk_write_queue)) != NULL) - sk_stream_free_skb(sk, skb); - sk_stream_mem_reclaim(sk); -} - static inline int sk_stream_rmem_schedule(struct sock *sk, struct sk_buff *skb) { return (int)skb->truesize <= sk->sk_forward_alloc || @@ -1256,18 +1247,6 @@ static inline struct page *sk_stream_alloc_page(struct sock *sk) return page; } -#define sk_stream_for_retrans_queue(skb, sk) \ - for (skb = (sk)->sk_write_queue.next; \ - (skb != (sk)->sk_send_head) && \ - (skb != (struct sk_buff *)&(sk)->sk_write_queue); \ - skb = skb->next) - -/*from STCP for fast SACK Process*/ -#define sk_stream_for_retrans_queue_from(skb, sk) \ - for (; (skb != (sk)->sk_send_head) && \ - (skb != (struct sk_buff *)&(sk)->sk_write_queue); \ - skb = skb->next) - /* * Default write policy as shown to user space via poll/select/SIGIO */ diff --git a/include/net/tcp.h b/include/net/tcp.h index 181c0600af1c..6dacc352dcf1 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -1162,6 +1162,120 @@ static inline void tcp_put_md5sig_pool(void) put_cpu(); } +/* write queue abstraction */ +static inline void tcp_write_queue_purge(struct sock *sk) +{ + struct sk_buff *skb; + + while ((skb = __skb_dequeue(&sk->sk_write_queue)) != NULL) + sk_stream_free_skb(sk, skb); + sk_stream_mem_reclaim(sk); +} + +static inline struct sk_buff *tcp_write_queue_head(struct sock *sk) +{ + struct sk_buff *skb = sk->sk_write_queue.next; + if (skb == (struct sk_buff *) &sk->sk_write_queue) + return NULL; + return skb; +} + +static inline struct sk_buff *tcp_write_queue_tail(struct sock *sk) +{ + struct sk_buff *skb = sk->sk_write_queue.prev; + if (skb == (struct sk_buff *) &sk->sk_write_queue) + return NULL; + return skb; +} + +static inline struct sk_buff *tcp_write_queue_next(struct sock *sk, struct sk_buff *skb) +{ + return skb->next; +} + +#define tcp_for_write_queue(skb, sk) \ + for (skb = (sk)->sk_write_queue.next; \ + (skb != (struct sk_buff *)&(sk)->sk_write_queue); \ + skb = skb->next) + +#define tcp_for_write_queue_from(skb, sk) \ + for (; (skb != (struct sk_buff *)&(sk)->sk_write_queue);\ + skb = skb->next) + +static inline struct sk_buff *tcp_send_head(struct sock *sk) +{ + return sk->sk_send_head; +} + +static inline void tcp_advance_send_head(struct sock *sk, struct sk_buff *skb) +{ + sk->sk_send_head = skb->next; + if (sk->sk_send_head == (struct sk_buff *)&sk->sk_write_queue) + sk->sk_send_head = NULL; +} + +static inline void tcp_check_send_head(struct sock *sk, struct sk_buff *skb_unlinked) +{ + if (sk->sk_send_head == skb_unlinked) + sk->sk_send_head = NULL; +} + +static inline void tcp_init_send_head(struct sock *sk) +{ + sk->sk_send_head = NULL; +} + +static inline void __tcp_add_write_queue_tail(struct sock *sk, struct sk_buff *skb) +{ + __skb_queue_tail(&sk->sk_write_queue, skb); +} + +static inline void tcp_add_write_queue_tail(struct sock *sk, struct sk_buff *skb) +{ + __tcp_add_write_queue_tail(sk, skb); + + /* Queue it, remembering where we must start sending. */ + if (sk->sk_send_head == NULL) + sk->sk_send_head = skb; +} + +static inline void __tcp_add_write_queue_head(struct sock *sk, struct sk_buff *skb) +{ + __skb_queue_head(&sk->sk_write_queue, skb); +} + +/* Insert buff after skb on the write queue of sk. */ +static inline void tcp_insert_write_queue_after(struct sk_buff *skb, + struct sk_buff *buff, + struct sock *sk) +{ + __skb_append(skb, buff, &sk->sk_write_queue); +} + +/* Insert skb between prev and next on the write queue of sk. */ +static inline void tcp_insert_write_queue_before(struct sk_buff *new, + struct sk_buff *skb, + struct sock *sk) +{ + __skb_insert(new, skb->prev, skb, &sk->sk_write_queue); +} + +static inline void tcp_unlink_write_queue(struct sk_buff *skb, struct sock *sk) +{ + __skb_unlink(skb, &sk->sk_write_queue); +} + +static inline int tcp_skb_is_last(const struct sock *sk, + const struct sk_buff *skb) +{ + return skb->next == (struct sk_buff *)&sk->sk_write_queue; +} + +static inline int tcp_write_queue_empty(struct sock *sk) +{ + return skb_queue_empty(&sk->sk_write_queue); +} + /* /proc */ enum tcp_seq_states { TCP_SEQ_STATE_LISTENING, diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 3834b10b5115..689f9330f1b9 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -470,10 +470,8 @@ static inline void skb_entail(struct sock *sk, struct tcp_sock *tp, tcb->flags = TCPCB_FLAG_ACK; tcb->sacked = 0; skb_header_release(skb); - __skb_queue_tail(&sk->sk_write_queue, skb); + tcp_add_write_queue_tail(sk, skb); sk_charge_skb(sk, skb); - if (!sk->sk_send_head) - sk->sk_send_head = skb; if (tp->nonagle & TCP_NAGLE_PUSH) tp->nonagle &= ~TCP_NAGLE_PUSH; } @@ -491,8 +489,8 @@ static inline void tcp_mark_urg(struct tcp_sock *tp, int flags, static inline void tcp_push(struct sock *sk, struct tcp_sock *tp, int flags, int mss_now, int nonagle) { - if (sk->sk_send_head) { - struct sk_buff *skb = sk->sk_write_queue.prev; + if (tcp_send_head(sk)) { + struct sk_buff *skb = tcp_write_queue_tail(sk); if (!(flags & MSG_MORE) || forced_push(tp)) tcp_mark_push(tp, skb); tcp_mark_urg(tp, flags, skb); @@ -526,13 +524,13 @@ static ssize_t do_tcp_sendpages(struct sock *sk, struct page **pages, int poffse goto do_error; while (psize > 0) { - struct sk_buff *skb = sk->sk_write_queue.prev; + struct sk_buff *skb = tcp_write_queue_tail(sk); struct page *page = pages[poffset / PAGE_SIZE]; int copy, i, can_coalesce; int offset = poffset % PAGE_SIZE; int size = min_t(size_t, psize, PAGE_SIZE - offset); - if (!sk->sk_send_head || (copy = size_goal - skb->len) <= 0) { + if (!tcp_send_head(sk) || (copy = size_goal - skb->len) <= 0) { new_segment: if (!sk_stream_memory_free(sk)) goto wait_for_sndbuf; @@ -589,7 +587,7 @@ new_segment: if (forced_push(tp)) { tcp_mark_push(tp, skb); __tcp_push_pending_frames(sk, tp, mss_now, TCP_NAGLE_PUSH); - } else if (skb == sk->sk_send_head) + } else if (skb == tcp_send_head(sk)) tcp_push_one(sk, mss_now); continue; @@ -704,9 +702,9 @@ int tcp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, while (seglen > 0) { int copy; - skb = sk->sk_write_queue.prev; + skb = tcp_write_queue_tail(sk); - if (!sk->sk_send_head || + if (!tcp_send_head(sk) || (copy = size_goal - skb->len) <= 0) { new_segment: @@ -833,7 +831,7 @@ new_segment: if (forced_push(tp)) { tcp_mark_push(tp, skb); __tcp_push_pending_frames(sk, tp, mss_now, TCP_NAGLE_PUSH); - } else if (skb == sk->sk_send_head) + } else if (skb == tcp_send_head(sk)) tcp_push_one(sk, mss_now); continue; @@ -860,9 +858,11 @@ out: do_fault: if (!skb->len) { - if (sk->sk_send_head == skb) - sk->sk_send_head = NULL; - __skb_unlink(skb, &sk->sk_write_queue); + tcp_unlink_write_queue(skb, sk); + /* It is the one place in all of TCP, except connection + * reset, where we can be unlinking the send_head. + */ + tcp_check_send_head(sk, skb); sk_stream_free_skb(sk, skb); } @@ -1732,7 +1732,7 @@ int tcp_disconnect(struct sock *sk, int flags) tcp_clear_xmit_timers(sk); __skb_queue_purge(&sk->sk_receive_queue); - sk_stream_writequeue_purge(sk); + tcp_write_queue_purge(sk); __skb_queue_purge(&tp->out_of_order_queue); #ifdef CONFIG_NET_DMA __skb_queue_purge(&sk->sk_async_wait_queue); @@ -1758,7 +1758,7 @@ int tcp_disconnect(struct sock *sk, int flags) tcp_set_ca_state(sk, TCP_CA_Open); tcp_clear_retrans(tp); inet_csk_delack_init(sk); - sk->sk_send_head = NULL; + tcp_init_send_head(sk); tp->rx_opt.saw_tstamp = 0; tcp_sack_reset(&tp->rx_opt); __sk_dst_reset(sk); diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index d0a3630f41a7..22d0bb03c5da 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -1044,7 +1044,7 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_ cached_skb = tp->fastpath_skb_hint; cached_fack_count = tp->fastpath_cnt_hint; if (!cached_skb) { - cached_skb = sk->sk_write_queue.next; + cached_skb = tcp_write_queue_head(sk); cached_fack_count = 0; } @@ -1061,10 +1061,13 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_ if (after(end_seq, tp->high_seq)) flag |= FLAG_DATA_LOST; - sk_stream_for_retrans_queue_from(skb, sk) { + tcp_for_write_queue_from(skb, sk) { int in_sack, pcount; u8 sacked; + if (skb == tcp_send_head(sk)) + break; + cached_skb = skb; cached_fack_count = fack_count; if (i == first_sack_index) { @@ -1213,7 +1216,9 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_ if (lost_retrans && icsk->icsk_ca_state == TCP_CA_Recovery) { struct sk_buff *skb; - sk_stream_for_retrans_queue(skb, sk) { + tcp_for_write_queue(skb, sk) { + if (skb == tcp_send_head(sk)) + break; if (after(TCP_SKB_CB(skb)->seq, lost_retrans)) break; if (!after(TCP_SKB_CB(skb)->end_seq, tp->snd_una)) @@ -1266,8 +1271,8 @@ int tcp_use_frto(struct sock *sk) const struct tcp_sock *tp = tcp_sk(sk); struct sk_buff *skb; - if (!sysctl_tcp_frto || !sk->sk_send_head || - after(TCP_SKB_CB(sk->sk_send_head)->end_seq, + if (!sysctl_tcp_frto || !tcp_send_head(sk) || + after(TCP_SKB_CB(tcp_send_head(sk))->end_seq, tp->snd_una + tp->snd_wnd)) return 0; @@ -1278,8 +1283,11 @@ int tcp_use_frto(struct sock *sk) if (tp->retrans_out > 1) return 0; - skb = skb_peek(&sk->sk_write_queue)->next; /* Skips head */ - sk_stream_for_retrans_queue_from(skb, sk) { + skb = tcp_write_queue_head(sk); + skb = tcp_write_queue_next(sk, skb); /* Skips head */ + tcp_for_write_queue_from(skb, sk) { + if (skb == tcp_send_head(sk)) + break; if (TCP_SKB_CB(skb)->sacked&TCPCB_RETRANS) return 0; /* Short-circuit when first non-SACKed skb has been checked */ @@ -1343,7 +1351,7 @@ void tcp_enter_frto(struct sock *sk) tp->undo_marker = tp->snd_una; tp->undo_retrans = 0; - skb = skb_peek(&sk->sk_write_queue); + skb = tcp_write_queue_head(sk); if (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_RETRANS) { TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_RETRANS; tp->retrans_out -= tcp_skb_pcount(skb); @@ -1380,7 +1388,9 @@ static void tcp_enter_frto_loss(struct sock *sk, int allowed_segments, int flag) tp->fackets_out = 0; tp->retrans_out = 0; - sk_stream_for_retrans_queue(skb, sk) { + tcp_for_write_queue(skb, sk) { + if (skb == tcp_send_head(sk)) + break; cnt += tcp_skb_pcount(skb); /* * Count the retransmission made on RTO correctly (only when @@ -1468,7 +1478,9 @@ void tcp_enter_loss(struct sock *sk, int how) if (!how) tp->undo_marker = tp->snd_una; - sk_stream_for_retrans_queue(skb, sk) { + tcp_for_write_queue(skb, sk) { + if (skb == tcp_send_head(sk)) + break; cnt += tcp_skb_pcount(skb); if (TCP_SKB_CB(skb)->sacked&TCPCB_RETRANS) tp->undo_marker = 0; @@ -1503,14 +1515,14 @@ static int tcp_check_sack_reneging(struct sock *sk) * receiver _host_ is heavily congested (or buggy). * Do processing similar to RTO timeout. */ - if ((skb = skb_peek(&sk->sk_write_queue)) != NULL && + if ((skb = tcp_write_queue_head(sk)) != NULL && (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED)) { struct inet_connection_sock *icsk = inet_csk(sk); NET_INC_STATS_BH(LINUX_MIB_TCPSACKRENEGING); tcp_enter_loss(sk, 1); icsk->icsk_retransmits++; - tcp_retransmit_skb(sk, skb_peek(&sk->sk_write_queue)); + tcp_retransmit_skb(sk, tcp_write_queue_head(sk)); inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, icsk->icsk_rto, TCP_RTO_MAX); return 1; @@ -1531,7 +1543,7 @@ static inline int tcp_skb_timedout(struct sock *sk, struct sk_buff *skb) static inline int tcp_head_timedout(struct sock *sk, struct tcp_sock *tp) { return tp->packets_out && - tcp_skb_timedout(sk, skb_peek(&sk->sk_write_queue)); + tcp_skb_timedout(sk, tcp_write_queue_head(sk)); } /* Linux NewReno/SACK/FACK/ECN state machine. @@ -1726,11 +1738,13 @@ static void tcp_mark_head_lost(struct sock *sk, struct tcp_sock *tp, skb = tp->lost_skb_hint; cnt = tp->lost_cnt_hint; } else { - skb = sk->sk_write_queue.next; + skb = tcp_write_queue_head(sk); cnt = 0; } - sk_stream_for_retrans_queue_from(skb, sk) { + tcp_for_write_queue_from(skb, sk) { + if (skb == tcp_send_head(sk)) + break; /* TODO: do this better */ /* this is not the most efficient way to do this... */ tp->lost_skb_hint = skb; @@ -1777,9 +1791,11 @@ static void tcp_update_scoreboard(struct sock *sk, struct tcp_sock *tp) struct sk_buff *skb; skb = tp->scoreboard_skb_hint ? tp->scoreboard_skb_hint - : sk->sk_write_queue.next; + : tcp_write_queue_head(sk); - sk_stream_for_retrans_queue_from(skb, sk) { + tcp_for_write_queue_from(skb, sk) { + if (skb == tcp_send_head(sk)) + break; if (!tcp_skb_timedout(sk, skb)) break; @@ -1970,7 +1986,9 @@ static int tcp_try_undo_loss(struct sock *sk, struct tcp_sock *tp) { if (tcp_may_undo(tp)) { struct sk_buff *skb; - sk_stream_for_retrans_queue(skb, sk) { + tcp_for_write_queue(skb, sk) { + if (skb == tcp_send_head(sk)) + break; TCP_SKB_CB(skb)->sacked &= ~TCPCB_LOST; } @@ -2382,8 +2400,8 @@ static int tcp_clean_rtx_queue(struct sock *sk, __s32 *seq_rtt_p) = icsk->icsk_ca_ops->rtt_sample; struct timeval tv = { .tv_sec = 0, .tv_usec = 0 }; - while ((skb = skb_peek(&sk->sk_write_queue)) && - skb != sk->sk_send_head) { + while ((skb = tcp_write_queue_head(sk)) && + skb != tcp_send_head(sk)) { struct tcp_skb_cb *scb = TCP_SKB_CB(skb); __u8 sacked = scb->sacked; @@ -2446,7 +2464,7 @@ static int tcp_clean_rtx_queue(struct sock *sk, __s32 *seq_rtt_p) } tcp_dec_pcount_approx(&tp->fackets_out, skb); tcp_packets_out_dec(tp, skb); - __skb_unlink(skb, &sk->sk_write_queue); + tcp_unlink_write_queue(skb, sk); sk_stream_free_skb(sk, skb); clear_all_retrans_hints(tp); } @@ -2495,7 +2513,7 @@ static void tcp_ack_probe(struct sock *sk) /* Was it a usable window open? */ - if (!after(TCP_SKB_CB(sk->sk_send_head)->end_seq, + if (!after(TCP_SKB_CB(tcp_send_head(sk))->end_seq, tp->snd_una + tp->snd_wnd)) { icsk->icsk_backoff = 0; inet_csk_clear_xmit_timer(sk, ICSK_TIME_PROBE0); @@ -2795,7 +2813,7 @@ no_queue: * being used to time the probes, and is probably far higher than * it needs to be for normal retransmission. */ - if (sk->sk_send_head) + if (tcp_send_head(sk)) tcp_ack_probe(sk); return 1; diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index addac1110f94..3326681b8429 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -1890,7 +1890,7 @@ int tcp_v4_destroy_sock(struct sock *sk) tcp_cleanup_congestion_control(sk); /* Cleanup up the write buffer. */ - sk_stream_writequeue_purge(sk); + tcp_write_queue_purge(sk); /* Cleans up our, hopefully empty, out_of_order_queue. */ __skb_queue_purge(&tp->out_of_order_queue); diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index d19b2f3b70fd..2a62b55b15f1 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -65,9 +65,7 @@ int sysctl_tcp_slow_start_after_idle __read_mostly = 1; static void update_send_head(struct sock *sk, struct tcp_sock *tp, struct sk_buff *skb) { - sk->sk_send_head = skb->next; - if (sk->sk_send_head == (struct sk_buff *)&sk->sk_write_queue) - sk->sk_send_head = NULL; + tcp_advance_send_head(sk, skb); tp->snd_nxt = TCP_SKB_CB(skb)->end_seq; tcp_packets_out_inc(sk, tp, skb); } @@ -567,12 +565,8 @@ static void tcp_queue_skb(struct sock *sk, struct sk_buff *skb) /* Advance write_seq and place onto the write_queue. */ tp->write_seq = TCP_SKB_CB(skb)->end_seq; skb_header_release(skb); - __skb_queue_tail(&sk->sk_write_queue, skb); + tcp_add_write_queue_tail(sk, skb); sk_charge_skb(sk, skb); - - /* Queue it, remembering where we must start sending. */ - if (sk->sk_send_head == NULL) - sk->sk_send_head = skb; } static void tcp_set_skb_tso_segs(struct sock *sk, struct sk_buff *skb, unsigned int mss_now) @@ -705,7 +699,7 @@ int tcp_fragment(struct sock *sk, struct sk_buff *skb, u32 len, unsigned int mss /* Link BUFF into the send queue. */ skb_header_release(buff); - __skb_append(skb, buff, &sk->sk_write_queue); + tcp_insert_write_queue_after(skb, buff, sk); return 0; } @@ -1056,7 +1050,7 @@ static inline int tcp_snd_wnd_test(struct tcp_sock *tp, struct sk_buff *skb, uns return !after(end_seq, tp->snd_una + tp->snd_wnd); } -/* This checks if the data bearing packet SKB (usually sk->sk_send_head) +/* This checks if the data bearing packet SKB (usually tcp_send_head(sk)) * should be put on the wire right now. If so, it returns the number of * packets allowed by the congestion window. */ @@ -1079,15 +1073,9 @@ static unsigned int tcp_snd_test(struct sock *sk, struct sk_buff *skb, return cwnd_quota; } -static inline int tcp_skb_is_last(const struct sock *sk, - const struct sk_buff *skb) -{ - return skb->next == (struct sk_buff *)&sk->sk_write_queue; -} - int tcp_may_send_now(struct sock *sk, struct tcp_sock *tp) { - struct sk_buff *skb = sk->sk_send_head; + struct sk_buff *skb = tcp_send_head(sk); return (skb && tcp_snd_test(sk, skb, tcp_current_mss(sk, 1), @@ -1143,7 +1131,7 @@ static int tso_fragment(struct sock *sk, struct sk_buff *skb, unsigned int len, /* Link BUFF into the send queue. */ skb_header_release(buff); - __skb_append(skb, buff, &sk->sk_write_queue); + tcp_insert_write_queue_after(skb, buff, sk); return 0; } @@ -1249,10 +1237,10 @@ static int tcp_mtu_probe(struct sock *sk) /* Have enough data in the send queue to probe? */ len = 0; - if ((skb = sk->sk_send_head) == NULL) + if ((skb = tcp_send_head(sk)) == NULL) return -1; while ((len += skb->len) < probe_size && !tcp_skb_is_last(sk, skb)) - skb = skb->next; + skb = tcp_write_queue_next(sk, skb); if (len < probe_size) return -1; @@ -1279,9 +1267,9 @@ static int tcp_mtu_probe(struct sock *sk) return -1; sk_charge_skb(sk, nskb); - skb = sk->sk_send_head; - __skb_insert(nskb, skb->prev, skb, &sk->sk_write_queue); - sk->sk_send_head = nskb; + skb = tcp_send_head(sk); + tcp_insert_write_queue_before(nskb, skb, sk); + tcp_advance_send_head(sk, skb); TCP_SKB_CB(nskb)->seq = TCP_SKB_CB(skb)->seq; TCP_SKB_CB(nskb)->end_seq = TCP_SKB_CB(skb)->seq + probe_size; @@ -1292,7 +1280,7 @@ static int tcp_mtu_probe(struct sock *sk) len = 0; while (len < probe_size) { - next = skb->next; + next = tcp_write_queue_next(sk, skb); copy = min_t(int, skb->len, probe_size - len); if (nskb->ip_summed) @@ -1305,7 +1293,7 @@ static int tcp_mtu_probe(struct sock *sk) /* We've eaten all the data from this skb. * Throw it away. */ TCP_SKB_CB(nskb)->flags |= TCP_SKB_CB(skb)->flags; - __skb_unlink(skb, &sk->sk_write_queue); + tcp_unlink_write_queue(skb, sk); sk_stream_free_skb(sk, skb); } else { TCP_SKB_CB(nskb)->flags |= TCP_SKB_CB(skb)->flags & @@ -1377,7 +1365,7 @@ static int tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle) sent_pkts = 1; } - while ((skb = sk->sk_send_head)) { + while ((skb = tcp_send_head(sk))) { unsigned int limit; tso_segs = tcp_init_tso_segs(sk, skb, mss_now); @@ -1435,7 +1423,7 @@ static int tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle) tcp_cwnd_validate(sk, tp); return 0; } - return !tp->packets_out && sk->sk_send_head; + return !tp->packets_out && tcp_send_head(sk); } /* Push out any pending frames which were held back due to @@ -1445,7 +1433,7 @@ static int tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle) void __tcp_push_pending_frames(struct sock *sk, struct tcp_sock *tp, unsigned int cur_mss, int nonagle) { - struct sk_buff *skb = sk->sk_send_head; + struct sk_buff *skb = tcp_send_head(sk); if (skb) { if (tcp_write_xmit(sk, cur_mss, nonagle)) @@ -1459,7 +1447,7 @@ void __tcp_push_pending_frames(struct sock *sk, struct tcp_sock *tp, void tcp_push_one(struct sock *sk, unsigned int mss_now) { struct tcp_sock *tp = tcp_sk(sk); - struct sk_buff *skb = sk->sk_send_head; + struct sk_buff *skb = tcp_send_head(sk); unsigned int tso_segs, cwnd_quota; BUG_ON(!skb || skb->len < mss_now); @@ -1620,7 +1608,7 @@ u32 __tcp_select_window(struct sock *sk) static void tcp_retrans_try_collapse(struct sock *sk, struct sk_buff *skb, int mss_now) { struct tcp_sock *tp = tcp_sk(sk); - struct sk_buff *next_skb = skb->next; + struct sk_buff *next_skb = tcp_write_queue_next(sk, skb); /* The first test we must make is that neither of these two * SKB's are still referenced by someone else. @@ -1652,7 +1640,7 @@ static void tcp_retrans_try_collapse(struct sock *sk, struct sk_buff *skb, int m clear_all_retrans_hints(tp); /* Ok. We will be able to collapse the packet. */ - __skb_unlink(next_skb, &sk->sk_write_queue); + tcp_unlink_write_queue(next_skb, sk); memcpy(skb_put(skb, next_skb_size), next_skb->data, next_skb_size); @@ -1706,7 +1694,9 @@ void tcp_simple_retransmit(struct sock *sk) unsigned int mss = tcp_current_mss(sk, 0); int lost = 0; - sk_stream_for_retrans_queue(skb, sk) { + tcp_for_write_queue(skb, sk) { + if (skb == tcp_send_head(sk)) + break; if (skb->len > mss && !(TCP_SKB_CB(skb)->sacked&TCPCB_SACKED_ACKED)) { if (TCP_SKB_CB(skb)->sacked&TCPCB_SACKED_RETRANS) { @@ -1790,10 +1780,10 @@ int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb) /* Collapse two adjacent packets if worthwhile and we can. */ if(!(TCP_SKB_CB(skb)->flags & TCPCB_FLAG_SYN) && (skb->len < (cur_mss >> 1)) && - (skb->next != sk->sk_send_head) && - (skb->next != (struct sk_buff *)&sk->sk_write_queue) && - (skb_shinfo(skb)->nr_frags == 0 && skb_shinfo(skb->next)->nr_frags == 0) && - (tcp_skb_pcount(skb) == 1 && tcp_skb_pcount(skb->next) == 1) && + (tcp_write_queue_next(sk, skb) != tcp_send_head(sk)) && + (!tcp_skb_is_last(sk, skb)) && + (skb_shinfo(skb)->nr_frags == 0 && skb_shinfo(tcp_write_queue_next(sk, skb))->nr_frags == 0) && + (tcp_skb_pcount(skb) == 1 && tcp_skb_pcount(tcp_write_queue_next(sk, skb)) == 1) && (sysctl_tcp_retrans_collapse != 0)) tcp_retrans_try_collapse(sk, skb, cur_mss); @@ -1872,15 +1862,17 @@ void tcp_xmit_retransmit_queue(struct sock *sk) skb = tp->retransmit_skb_hint; packet_cnt = tp->retransmit_cnt_hint; }else{ - skb = sk->sk_write_queue.next; + skb = tcp_write_queue_head(sk); packet_cnt = 0; } /* First pass: retransmit lost packets. */ if (tp->lost_out) { - sk_stream_for_retrans_queue_from(skb, sk) { + tcp_for_write_queue_from(skb, sk) { __u8 sacked = TCP_SKB_CB(skb)->sacked; + if (skb == tcp_send_head(sk)) + break; /* we could do better than to assign each time */ tp->retransmit_skb_hint = skb; tp->retransmit_cnt_hint = packet_cnt; @@ -1906,8 +1898,7 @@ void tcp_xmit_retransmit_queue(struct sock *sk) else NET_INC_STATS_BH(LINUX_MIB_TCPSLOWSTARTRETRANS); - if (skb == - skb_peek(&sk->sk_write_queue)) + if (skb == tcp_write_queue_head(sk)) inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, inet_csk(sk)->icsk_rto, TCP_RTO_MAX); @@ -1944,11 +1935,13 @@ void tcp_xmit_retransmit_queue(struct sock *sk) skb = tp->forward_skb_hint; packet_cnt = tp->forward_cnt_hint; } else{ - skb = sk->sk_write_queue.next; + skb = tcp_write_queue_head(sk); packet_cnt = 0; } - sk_stream_for_retrans_queue_from(skb, sk) { + tcp_for_write_queue_from(skb, sk) { + if (skb == tcp_send_head(sk)) + break; tp->forward_cnt_hint = packet_cnt; tp->forward_skb_hint = skb; @@ -1973,7 +1966,7 @@ void tcp_xmit_retransmit_queue(struct sock *sk) break; } - if (skb == skb_peek(&sk->sk_write_queue)) + if (skb == tcp_write_queue_head(sk)) inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, inet_csk(sk)->icsk_rto, TCP_RTO_MAX); @@ -1989,7 +1982,7 @@ void tcp_xmit_retransmit_queue(struct sock *sk) void tcp_send_fin(struct sock *sk) { struct tcp_sock *tp = tcp_sk(sk); - struct sk_buff *skb = skb_peek_tail(&sk->sk_write_queue); + struct sk_buff *skb = tcp_write_queue_tail(sk); int mss_now; /* Optimization, tack on the FIN if we have a queue of @@ -1998,7 +1991,7 @@ void tcp_send_fin(struct sock *sk) */ mss_now = tcp_current_mss(sk, 1); - if (sk->sk_send_head != NULL) { + if (tcp_send_head(sk) != NULL) { TCP_SKB_CB(skb)->flags |= TCPCB_FLAG_FIN; TCP_SKB_CB(skb)->end_seq++; tp->write_seq++; @@ -2071,7 +2064,7 @@ int tcp_send_synack(struct sock *sk) { struct sk_buff* skb; - skb = skb_peek(&sk->sk_write_queue); + skb = tcp_write_queue_head(sk); if (skb == NULL || !(TCP_SKB_CB(skb)->flags&TCPCB_FLAG_SYN)) { printk(KERN_DEBUG "tcp_send_synack: wrong queue state\n"); return -EFAULT; @@ -2081,9 +2074,9 @@ int tcp_send_synack(struct sock *sk) struct sk_buff *nskb = skb_copy(skb, GFP_ATOMIC); if (nskb == NULL) return -ENOMEM; - __skb_unlink(skb, &sk->sk_write_queue); + tcp_unlink_write_queue(skb, sk); skb_header_release(nskb); - __skb_queue_head(&sk->sk_write_queue, nskb); + __tcp_add_write_queue_head(sk, nskb); sk_stream_free_skb(sk, skb); sk_charge_skb(sk, nskb); skb = nskb; @@ -2285,7 +2278,7 @@ int tcp_connect(struct sock *sk) TCP_SKB_CB(buff)->when = tcp_time_stamp; tp->retrans_stamp = TCP_SKB_CB(buff)->when; skb_header_release(buff); - __skb_queue_tail(&sk->sk_write_queue, buff); + __tcp_add_write_queue_tail(sk, buff); sk_charge_skb(sk, buff); tp->packets_out += tcp_skb_pcount(buff); tcp_transmit_skb(sk, buff, 1, GFP_KERNEL); @@ -2441,7 +2434,7 @@ int tcp_write_wakeup(struct sock *sk) struct tcp_sock *tp = tcp_sk(sk); struct sk_buff *skb; - if ((skb = sk->sk_send_head) != NULL && + if ((skb = tcp_send_head(sk)) != NULL && before(TCP_SKB_CB(skb)->seq, tp->snd_una+tp->snd_wnd)) { int err; unsigned int mss = tcp_current_mss(sk, 0); @@ -2491,7 +2484,7 @@ void tcp_send_probe0(struct sock *sk) err = tcp_write_wakeup(sk); - if (tp->packets_out || !sk->sk_send_head) { + if (tp->packets_out || !tcp_send_head(sk)) { /* Cancel probe timer, if it is not required. */ icsk->icsk_probes_out = 0; icsk->icsk_backoff = 0; diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c index a9243cfc1bea..2ca97b20929d 100644 --- a/net/ipv4/tcp_timer.c +++ b/net/ipv4/tcp_timer.c @@ -233,7 +233,7 @@ static void tcp_probe_timer(struct sock *sk) struct tcp_sock *tp = tcp_sk(sk); int max_probes; - if (tp->packets_out || !sk->sk_send_head) { + if (tp->packets_out || !tcp_send_head(sk)) { icsk->icsk_probes_out = 0; return; } @@ -284,7 +284,7 @@ static void tcp_retransmit_timer(struct sock *sk) if (!tp->packets_out) goto out; - BUG_TRAP(!skb_queue_empty(&sk->sk_write_queue)); + BUG_TRAP(!tcp_write_queue_empty(sk)); if (!tp->snd_wnd && !sock_flag(sk, SOCK_DEAD) && !((1 << sk->sk_state) & (TCPF_SYN_SENT | TCPF_SYN_RECV))) { @@ -306,7 +306,7 @@ static void tcp_retransmit_timer(struct sock *sk) goto out; } tcp_enter_loss(sk, 0); - tcp_retransmit_skb(sk, skb_peek(&sk->sk_write_queue)); + tcp_retransmit_skb(sk, tcp_write_queue_head(sk)); __sk_dst_reset(sk); goto out_reset_timer; } @@ -341,7 +341,7 @@ static void tcp_retransmit_timer(struct sock *sk) tcp_enter_loss(sk, 0); } - if (tcp_retransmit_skb(sk, skb_peek(&sk->sk_write_queue)) > 0) { + if (tcp_retransmit_skb(sk, tcp_write_queue_head(sk)) > 0) { /* Retransmission failed because of local congestion, * do not backoff. */ @@ -482,7 +482,7 @@ static void tcp_keepalive_timer (unsigned long data) elapsed = keepalive_time_when(tp); /* It is alive without keepalive 8) */ - if (tp->packets_out || sk->sk_send_head) + if (tp->packets_out || tcp_send_head(sk)) goto resched; elapsed = tcp_time_stamp - tp->rcv_tstamp; -- cgit v1.2.3 From aa8223c7bb0b05183e1737881ed21827aa5b9e73 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 10 Apr 2007 21:04:22 -0700 Subject: [SK_BUFF]: Introduce tcp_hdr(), remove skb->h.th Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: David S. Miller --- drivers/net/atl1/atl1_main.c | 7 ++++--- drivers/net/bnx2.c | 8 ++++---- drivers/net/chelsio/sge.c | 2 +- drivers/net/cxgb3/sge.c | 2 +- drivers/net/e1000/e1000_main.c | 11 ++++++----- drivers/net/ioc3-eth.c | 2 +- drivers/net/ixgb/ixgb_main.c | 7 ++++--- drivers/net/mv643xx_eth.c | 2 +- drivers/net/tg3.c | 15 +++++++-------- drivers/s390/net/qeth_eddp.c | 2 +- drivers/s390/net/qeth_tso.h | 4 ++-- include/linux/skbuff.h | 1 - include/linux/tcp.h | 9 +++++++-- include/net/tcp.h | 2 +- include/net/tcp_ecn.h | 6 +++--- net/ipv4/ip_output.c | 4 ++-- net/ipv4/syncookies.c | 36 ++++++++++++++++++------------------ net/ipv4/tcp.c | 22 +++++++++++----------- net/ipv4/tcp_input.c | 28 +++++++++++++++------------- net/ipv4/tcp_ipv4.c | 32 ++++++++++++++++---------------- net/ipv4/tcp_minisocks.c | 9 +++++---- net/ipv4/tcp_output.c | 13 ++++++++----- net/ipv6/tcp_ipv6.c | 32 ++++++++++++++++---------------- 23 files changed, 134 insertions(+), 122 deletions(-) (limited to 'include/net/tcp.h') diff --git a/drivers/net/atl1/atl1_main.c b/drivers/net/atl1/atl1_main.c index 8d5994751e2e..d60c2217332c 100644 --- a/drivers/net/atl1/atl1_main.c +++ b/drivers/net/atl1/atl1_main.c @@ -1298,9 +1298,10 @@ static int atl1_tso(struct atl1_adapter *adapter, struct sk_buff *skb, iph->tot_len = 0; iph->check = 0; - skb->h.th->check = ~csum_tcpudp_magic(iph->saddr, - iph->daddr, 0, - IPPROTO_TCP, 0); + tcp_hdr(skb)->check = ~csum_tcpudp_magic(iph->saddr, + iph->daddr, 0, + IPPROTO_TCP, + 0); ipofst = skb_network_offset(skb); if (ipofst != ENET_HEADER_SIZE) /* 802.3 frame */ tso->tsopl |= 1 << TSO_PARAM_ETHTYPE_SHIFT; diff --git a/drivers/net/bnx2.c b/drivers/net/bnx2.c index 73512fb16452..7e7b5f344030 100644 --- a/drivers/net/bnx2.c +++ b/drivers/net/bnx2.c @@ -4524,7 +4524,7 @@ bnx2_start_xmit(struct sk_buff *skb, struct net_device *dev) vlan_tag_flags |= TX_BD_FLAGS_SW_LSO; tcp_opt_len = 0; - if (skb->h.th->doff > 5) + if (tcp_hdr(skb)->doff > 5) tcp_opt_len = tcp_optlen(skb); ip_tcp_len = ip_hdrlen(skb) + sizeof(struct tcphdr); @@ -4532,9 +4532,9 @@ bnx2_start_xmit(struct sk_buff *skb, struct net_device *dev) iph = ip_hdr(skb); iph->check = 0; iph->tot_len = htons(mss + ip_tcp_len + tcp_opt_len); - skb->h.th->check = ~csum_tcpudp_magic(iph->saddr, iph->daddr, - 0, IPPROTO_TCP, 0); - + tcp_hdr(skb)->check = ~csum_tcpudp_magic(iph->saddr, + iph->daddr, 0, + IPPROTO_TCP, 0); if (tcp_opt_len || (iph->ihl > 5)) { vlan_tag_flags |= ((iph->ihl - 5) + (tcp_opt_len >> 2)) << 8; diff --git a/drivers/net/chelsio/sge.c b/drivers/net/chelsio/sge.c index a4204dff3636..43e92f9f0bcd 100644 --- a/drivers/net/chelsio/sge.c +++ b/drivers/net/chelsio/sge.c @@ -1872,7 +1872,7 @@ int t1_start_xmit(struct sk_buff *skb, struct net_device *dev) hdr->opcode = CPL_TX_PKT_LSO; hdr->ip_csum_dis = hdr->l4_csum_dis = 0; hdr->ip_hdr_words = ip_hdr(skb)->ihl; - hdr->tcp_hdr_words = skb->h.th->doff; + hdr->tcp_hdr_words = tcp_hdr(skb)->doff; hdr->eth_type_mss = htons(MK_ETH_TYPE_MSS(eth_type, skb_shinfo(skb)->gso_size)); hdr->len = htonl(skb->len - sizeof(*hdr)); diff --git a/drivers/net/cxgb3/sge.c b/drivers/net/cxgb3/sge.c index d38b1bcd138e..a70fe9145a2e 100644 --- a/drivers/net/cxgb3/sge.c +++ b/drivers/net/cxgb3/sge.c @@ -901,7 +901,7 @@ static void write_tx_pkt_wr(struct adapter *adap, struct sk_buff *skb, CPL_ETH_II : CPL_ETH_II_VLAN; tso_info |= V_LSO_ETH_TYPE(eth_type) | V_LSO_IPHDR_WORDS(ip_hdr(skb)->ihl) | - V_LSO_TCPHDR_WORDS(skb->h.th->doff); + V_LSO_TCPHDR_WORDS(tcp_hdr(skb)->doff); hdr->lso_info = htonl(tso_info); flits = 3; } else { diff --git a/drivers/net/e1000/e1000_main.c b/drivers/net/e1000/e1000_main.c index 4572fbba50f9..e86deb2ef823 100644 --- a/drivers/net/e1000/e1000_main.c +++ b/drivers/net/e1000/e1000_main.c @@ -2893,14 +2893,15 @@ e1000_tso(struct e1000_adapter *adapter, struct e1000_tx_ring *tx_ring, struct iphdr *iph = ip_hdr(skb); iph->tot_len = 0; iph->check = 0; - skb->h.th->check = ~csum_tcpudp_magic(iph->saddr, - iph->daddr, 0, - IPPROTO_TCP, 0); + tcp_hdr(skb)->check = ~csum_tcpudp_magic(iph->saddr, + iph->daddr, 0, + IPPROTO_TCP, + 0); cmd_length = E1000_TXD_CMD_IP; ipcse = skb_transport_offset(skb) - 1; } else if (skb->protocol == htons(ETH_P_IPV6)) { ipv6_hdr(skb)->payload_len = 0; - skb->h.th->check = + tcp_hdr(skb)->check = ~csum_ipv6_magic(&ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr, 0, IPPROTO_TCP, 0); @@ -2909,7 +2910,7 @@ e1000_tso(struct e1000_adapter *adapter, struct e1000_tx_ring *tx_ring, ipcss = skb_network_offset(skb); ipcso = (void *)&(ip_hdr(skb)->check) - (void *)skb->data; tucss = skb_transport_offset(skb); - tucso = (void *)&(skb->h.th->check) - (void *)skb->data; + tucso = (void *)&(tcp_hdr(skb)->check) - (void *)skb->data; tucse = 0; cmd_length |= (E1000_TXD_CMD_DEXT | E1000_TXD_CMD_TSE | diff --git a/drivers/net/ioc3-eth.c b/drivers/net/ioc3-eth.c index ba012e10d79a..bc62e770a256 100644 --- a/drivers/net/ioc3-eth.c +++ b/drivers/net/ioc3-eth.c @@ -1426,7 +1426,7 @@ static int ioc3_start_xmit(struct sk_buff *skb, struct net_device *dev) } if (proto == IPPROTO_TCP) { csoff += offsetof(struct tcphdr, check); - skb->h.th->check = csum; + tcp_hdr(skb)->check = csum; } w0 = ETXD_DOCHECKSUM | (csoff << ETXD_CHKOFF_SHIFT); diff --git a/drivers/net/ixgb/ixgb_main.c b/drivers/net/ixgb/ixgb_main.c index 96550d681623..e729ced52dc3 100644 --- a/drivers/net/ixgb/ixgb_main.c +++ b/drivers/net/ixgb/ixgb_main.c @@ -1195,13 +1195,14 @@ ixgb_tso(struct ixgb_adapter *adapter, struct sk_buff *skb) iph = ip_hdr(skb); iph->tot_len = 0; iph->check = 0; - skb->h.th->check = ~csum_tcpudp_magic(iph->saddr, iph->daddr, - 0, IPPROTO_TCP, 0); + tcp_hdr(skb)->check = ~csum_tcpudp_magic(iph->saddr, + iph->daddr, 0, + IPPROTO_TCP, 0); ipcss = skb_network_offset(skb); ipcso = (void *)&(iph->check) - (void *)skb->data; ipcse = skb_transport_offset(skb) - 1; tucss = skb_transport_offset(skb); - tucso = (void *)&(skb->h.th->check) - (void *)skb->data; + tucso = (void *)&(tcp_hdr(skb)->check) - (void *)skb->data; tucse = 0; i = adapter->tx_ring.next_to_use; diff --git a/drivers/net/mv643xx_eth.c b/drivers/net/mv643xx_eth.c index 43723839e934..ab15ecd4b3d6 100644 --- a/drivers/net/mv643xx_eth.c +++ b/drivers/net/mv643xx_eth.c @@ -1169,7 +1169,7 @@ static void eth_tx_submit_descs_for_skb(struct mv643xx_private *mp, desc->l4i_chk = udp_hdr(skb)->check; break; case IPPROTO_TCP: - desc->l4i_chk = skb->h.th->check; + desc->l4i_chk = tcp_hdr(skb)->check; break; default: BUG(); diff --git a/drivers/net/tg3.c b/drivers/net/tg3.c index 7ca30d76bf6f..414365c3198d 100644 --- a/drivers/net/tg3.c +++ b/drivers/net/tg3.c @@ -3922,7 +3922,7 @@ static int tg3_start_xmit(struct sk_buff *skb, struct net_device *dev) base_flags |= (TXD_FLAG_CPU_PRE_DMA | TXD_FLAG_CPU_POST_DMA); - skb->h.th->check = 0; + tcp_hdr(skb)->check = 0; } else if (skb->ip_summed == CHECKSUM_PARTIAL) @@ -4080,14 +4080,13 @@ static int tg3_start_xmit_dma_bug(struct sk_buff *skb, struct net_device *dev) iph->check = 0; iph->tot_len = htons(mss + hdr_len); if (tp->tg3_flags2 & TG3_FLG2_HW_TSO) { - skb->h.th->check = 0; + tcp_hdr(skb)->check = 0; base_flags &= ~TXD_FLAG_TCPUDP_CSUM; - } - else { - skb->h.th->check = ~csum_tcpudp_magic(iph->saddr, - iph->daddr, 0, - IPPROTO_TCP, 0); - } + } else + tcp_hdr(skb)->check = ~csum_tcpudp_magic(iph->saddr, + iph->daddr, 0, + IPPROTO_TCP, + 0); if ((tp->tg3_flags2 & TG3_FLG2_HW_TSO) || (GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5705)) { diff --git a/drivers/s390/net/qeth_eddp.c b/drivers/s390/net/qeth_eddp.c index 273f1745a009..b8e84674e170 100644 --- a/drivers/s390/net/qeth_eddp.c +++ b/drivers/s390/net/qeth_eddp.c @@ -416,7 +416,7 @@ __qeth_eddp_fill_context_tcp(struct qeth_eddp_context *ctx, eddp->skb_offset += VLAN_HLEN; #endif /* CONFIG_QETH_VLAN */ } - tcph = eddp->skb->h.th; + tcph = tcp_hdr(eddp->skb); while (eddp->skb_offset < eddp->skb->len) { data_len = min((int)skb_shinfo(eddp->skb)->gso_size, (int)(eddp->skb->len - eddp->skb_offset)); diff --git a/drivers/s390/net/qeth_tso.h b/drivers/s390/net/qeth_tso.h index 4040bdd8c327..c20e923cf9ad 100644 --- a/drivers/s390/net/qeth_tso.h +++ b/drivers/s390/net/qeth_tso.h @@ -41,7 +41,7 @@ qeth_tso_fill_header(struct qeth_card *card, struct sk_buff *skb) hdr = (struct qeth_hdr_tso *) skb->data; iph = ip_hdr(skb); - tcph = skb->h.th; + tcph = tcp_hdr(skb); /*fix header to TSO values ...*/ hdr->hdr.hdr.l3.id = QETH_HEADER_TYPE_TSO; /*set values which are fix for the first approach ...*/ @@ -65,7 +65,7 @@ qeth_tso_set_tcpip_header(struct qeth_card *card, struct sk_buff *skb) { struct iphdr *iph = ip_hdr(skb); struct ipv6hdr *ip6h = ipv6_hdr(skb); - struct tcphdr *tcph = skb->h.th; + struct tcphdr *tcph = tcp_hdr(skb); tcph->check = 0; if (skb->protocol == ETH_P_IPV6) { diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index e580416de78a..8f158d66d2a8 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -237,7 +237,6 @@ struct sk_buff { /* 4 byte hole on 64 bit*/ union { - struct tcphdr *th; struct iphdr *ipiph; struct ipv6hdr *ipv6h; unsigned char *raw; diff --git a/include/linux/tcp.h b/include/linux/tcp.h index 244ae0dacf4a..911d937fb4c1 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -178,14 +178,19 @@ struct tcp_md5sig { #include #include +static inline struct tcphdr *tcp_hdr(const struct sk_buff *skb) +{ + return (struct tcphdr *)skb->h.raw; +} + static inline unsigned int tcp_hdrlen(const struct sk_buff *skb) { - return skb->h.th->doff * 4; + return tcp_hdr(skb)->doff * 4; } static inline unsigned int tcp_optlen(const struct sk_buff *skb) { - return (skb->h.th->doff - 5) * 4; + return (tcp_hdr(skb)->doff - 5) * 4; } /* This defines a selective acknowledgement block. */ diff --git a/include/net/tcp.h b/include/net/tcp.h index 6dacc352dcf1..af9273204cfd 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -984,7 +984,7 @@ static inline void tcp_openreq_init(struct request_sock *req, ireq->wscale_ok = rx_opt->wscale_ok; ireq->acked = 0; ireq->ecn_ok = 0; - ireq->rmt_port = skb->h.th->source; + ireq->rmt_port = tcp_hdr(skb)->source; } extern void tcp_enter_memory_pressure(void); diff --git a/include/net/tcp_ecn.h b/include/net/tcp_ecn.h index 4629d77173f2..b5f7c6ac0880 100644 --- a/include/net/tcp_ecn.h +++ b/include/net/tcp_ecn.h @@ -54,7 +54,7 @@ static inline void TCP_ECN_send(struct sock *sk, struct tcp_sock *tp, INET_ECN_xmit(sk); if (tp->ecn_flags&TCP_ECN_QUEUE_CWR) { tp->ecn_flags &= ~TCP_ECN_QUEUE_CWR; - skb->h.th->cwr = 1; + tcp_hdr(skb)->cwr = 1; skb_shinfo(skb)->gso_type |= SKB_GSO_TCP_ECN; } } else { @@ -62,7 +62,7 @@ static inline void TCP_ECN_send(struct sock *sk, struct tcp_sock *tp, INET_ECN_dontxmit(sk); } if (tp->ecn_flags & TCP_ECN_DEMAND_CWR) - skb->h.th->ece = 1; + tcp_hdr(skb)->ece = 1; } } @@ -70,7 +70,7 @@ static inline void TCP_ECN_send(struct sock *sk, struct tcp_sock *tp, static inline void TCP_ECN_accept_cwr(struct tcp_sock *tp, struct sk_buff *skb) { - if (skb->h.th->cwr) + if (tcp_hdr(skb)->cwr) tp->ecn_flags &= ~TCP_ECN_DEMAND_CWR; } diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index 6d92358fc513..602268661eb3 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -1352,8 +1352,8 @@ void ip_send_reply(struct sock *sk, struct sk_buff *skb, struct ip_reply_arg *ar .tos = RT_TOS(ip_hdr(skb)->tos) } }, /* Not quite clean, but right. */ .uli_u = { .ports = - { .sport = skb->h.th->dest, - .dport = skb->h.th->source } }, + { .sport = tcp_hdr(skb)->dest, + .dport = tcp_hdr(skb)->source } }, .proto = sk->sk_protocol }; security_skb_classify_flow(skb, &fl); if (ip_route_output_key(&rt, &fl)) diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c index 261607178491..2da1be0589a9 100644 --- a/net/ipv4/syncookies.c +++ b/net/ipv4/syncookies.c @@ -125,10 +125,11 @@ static __u16 const msstab[] = { __u32 cookie_v4_init_sequence(struct sock *sk, struct sk_buff *skb, __u16 *mssp) { struct tcp_sock *tp = tcp_sk(sk); + const struct iphdr *iph = ip_hdr(skb); + const struct tcphdr *th = tcp_hdr(skb); int mssind; const __u16 mss = *mssp; - tp->last_synq_overflow = jiffies; /* XXX sort msstab[] by probability? Binary search? */ @@ -138,9 +139,8 @@ __u32 cookie_v4_init_sequence(struct sock *sk, struct sk_buff *skb, __u16 *mssp) NET_INC_STATS_BH(LINUX_MIB_SYNCOOKIESSENT); - return secure_tcp_syn_cookie(ip_hdr(skb)->saddr, ip_hdr(skb)->daddr, - skb->h.th->source, skb->h.th->dest, - ntohl(skb->h.th->seq), + return secure_tcp_syn_cookie(iph->saddr, iph->daddr, + th->source, th->dest, ntohl(th->seq), jiffies / (HZ * 60), mssind); } @@ -157,14 +157,13 @@ __u32 cookie_v4_init_sequence(struct sock *sk, struct sk_buff *skb, __u16 *mssp) */ static inline int cookie_check(struct sk_buff *skb, __u32 cookie) { - __u32 seq; - __u32 mssind; - - seq = ntohl(skb->h.th->seq)-1; - mssind = check_tcp_syn_cookie(cookie, - ip_hdr(skb)->saddr, ip_hdr(skb)->daddr, - skb->h.th->source, skb->h.th->dest, - seq, jiffies / (HZ * 60), COUNTER_TRIES); + const struct iphdr *iph = ip_hdr(skb); + const struct tcphdr *th = tcp_hdr(skb); + __u32 seq = ntohl(th->seq) - 1; + __u32 mssind = check_tcp_syn_cookie(cookie, iph->saddr, iph->daddr, + th->source, th->dest, seq, + jiffies / (HZ * 60), + COUNTER_TRIES); return mssind < NUM_MSS ? msstab[mssind] + 1 : 0; } @@ -191,14 +190,15 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb, struct inet_request_sock *ireq; struct tcp_request_sock *treq; struct tcp_sock *tp = tcp_sk(sk); - __u32 cookie = ntohl(skb->h.th->ack_seq) - 1; + const struct tcphdr *th = tcp_hdr(skb); + __u32 cookie = ntohl(th->ack_seq) - 1; struct sock *ret = sk; struct request_sock *req; int mss; struct rtable *rt; __u8 rcv_wscale; - if (!sysctl_tcp_syncookies || !skb->h.th->ack) + if (!sysctl_tcp_syncookies || !th->ack) goto out; if (time_after(jiffies, tp->last_synq_overflow + TCP_TIMEOUT_INIT) || @@ -220,10 +220,10 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb, } ireq = inet_rsk(req); treq = tcp_rsk(req); - treq->rcv_isn = ntohl(skb->h.th->seq) - 1; + treq->rcv_isn = ntohl(th->seq) - 1; treq->snt_isn = cookie; req->mss = mss; - ireq->rmt_port = skb->h.th->source; + ireq->rmt_port = th->source; ireq->loc_addr = ip_hdr(skb)->daddr; ireq->rmt_addr = ip_hdr(skb)->saddr; ireq->opt = NULL; @@ -261,8 +261,8 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb, .tos = RT_CONN_FLAGS(sk) } }, .proto = IPPROTO_TCP, .uli_u = { .ports = - { .sport = skb->h.th->dest, - .dport = skb->h.th->source } } }; + { .sport = th->dest, + .dport = th->source } } }; security_req_classify_flow(req, &fl); if (ip_route_output_key(&rt, &fl)) { reqsk_free(req); diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 689f9330f1b9..f832f3c33ab1 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -425,7 +425,7 @@ int tcp_ioctl(struct sock *sk, int cmd, unsigned long arg) /* Subtract 1, if FIN is in queue. */ if (answ && !skb_queue_empty(&sk->sk_receive_queue)) answ -= - ((struct sk_buff *)sk->sk_receive_queue.prev)->h.th->fin; + tcp_hdr((struct sk_buff *)sk->sk_receive_queue.prev)->fin; } else answ = tp->urg_seq - tp->copied_seq; release_sock(sk); @@ -1016,9 +1016,9 @@ static inline struct sk_buff *tcp_recv_skb(struct sock *sk, u32 seq, u32 *off) skb_queue_walk(&sk->sk_receive_queue, skb) { offset = seq - TCP_SKB_CB(skb)->seq; - if (skb->h.th->syn) + if (tcp_hdr(skb)->syn) offset--; - if (offset < skb->len || skb->h.th->fin) { + if (offset < skb->len || tcp_hdr(skb)->fin) { *off = offset; return skb; } @@ -1070,7 +1070,7 @@ int tcp_read_sock(struct sock *sk, read_descriptor_t *desc, if (offset != skb->len) break; } - if (skb->h.th->fin) { + if (tcp_hdr(skb)->fin) { sk_eat_skb(sk, skb, 0); ++seq; break; @@ -1174,11 +1174,11 @@ int tcp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, break; } offset = *seq - TCP_SKB_CB(skb)->seq; - if (skb->h.th->syn) + if (tcp_hdr(skb)->syn) offset--; if (offset < skb->len) goto found_ok_skb; - if (skb->h.th->fin) + if (tcp_hdr(skb)->fin) goto found_fin_ok; BUG_TRAP(flags & MSG_PEEK); skb = skb->next; @@ -1394,7 +1394,7 @@ skip_copy: if (used + offset < skb->len) continue; - if (skb->h.th->fin) + if (tcp_hdr(skb)->fin) goto found_fin_ok; if (!(flags & MSG_PEEK)) { sk_eat_skb(sk, skb, copied_early); @@ -1563,7 +1563,7 @@ void tcp_close(struct sock *sk, long timeout) */ while ((skb = __skb_dequeue(&sk->sk_receive_queue)) != NULL) { u32 len = TCP_SKB_CB(skb)->end_seq - TCP_SKB_CB(skb)->seq - - skb->h.th->fin; + tcp_hdr(skb)->fin; data_was_unread += len; __kfree_skb(skb); } @@ -2170,7 +2170,7 @@ struct sk_buff *tcp_tso_segment(struct sk_buff *skb, int features) if (!pskb_may_pull(skb, sizeof(*th))) goto out; - th = skb->h.th; + th = tcp_hdr(skb); thlen = th->doff * 4; if (thlen < sizeof(*th)) goto out; @@ -2210,7 +2210,7 @@ struct sk_buff *tcp_tso_segment(struct sk_buff *skb, int features) delta = htonl(oldlen + (thlen + len)); skb = segs; - th = skb->h.th; + th = tcp_hdr(skb); seq = ntohl(th->seq); do { @@ -2224,7 +2224,7 @@ struct sk_buff *tcp_tso_segment(struct sk_buff *skb, int features) seq += len; skb = skb->next; - th = skb->h.th; + th = tcp_hdr(skb); th->seq = htonl(seq); th->cwr = 0; diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 2776a8b01339..c1ce36237380 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -148,7 +148,7 @@ static void tcp_measure_rcv_mss(struct sock *sk, * to handle super-low mtu links fairly. */ (len >= TCP_MIN_MSS + sizeof(struct tcphdr) && - !(tcp_flag_word(skb->h.th)&TCP_REMNANT))) { + !(tcp_flag_word(tcp_hdr(skb)) & TCP_REMNANT))) { /* Subtract also invariant (if peer is RFC compliant), * tcp header plus fixed timestamp option length. * Resulting "len" is MSS free of SACK jitter. @@ -2559,9 +2559,9 @@ static int tcp_ack_update_window(struct sock *sk, struct tcp_sock *tp, struct sk_buff *skb, u32 ack, u32 ack_seq) { int flag = 0; - u32 nwin = ntohs(skb->h.th->window); + u32 nwin = ntohs(tcp_hdr(skb)->window); - if (likely(!skb->h.th->syn)) + if (likely(!tcp_hdr(skb)->syn)) nwin <<= tp->rx_opt.snd_wscale; if (tcp_may_update_window(tp, ack, ack_seq, nwin)) { @@ -2766,7 +2766,7 @@ static int tcp_ack(struct sock *sk, struct sk_buff *skb, int flag) if (TCP_SKB_CB(skb)->sacked) flag |= tcp_sacktag_write_queue(sk, skb, prior_snd_una); - if (TCP_ECN_rcv_ecn_echo(tp, skb->h.th)) + if (TCP_ECN_rcv_ecn_echo(tp, tcp_hdr(skb))) flag |= FLAG_ECE; tcp_ca_event(sk, CA_EVENT_SLOW_ACK); @@ -2833,7 +2833,7 @@ uninteresting_ack: void tcp_parse_options(struct sk_buff *skb, struct tcp_options_received *opt_rx, int estab) { unsigned char *ptr; - struct tcphdr *th = skb->h.th; + struct tcphdr *th = tcp_hdr(skb); int length=(th->doff*4)-sizeof(struct tcphdr); ptr = (unsigned char *)(th + 1); @@ -2995,7 +2995,7 @@ static inline void tcp_replace_ts_recent(struct tcp_sock *tp, u32 seq) static int tcp_disordered_ack(const struct sock *sk, const struct sk_buff *skb) { struct tcp_sock *tp = tcp_sk(sk); - struct tcphdr *th = skb->h.th; + struct tcphdr *th = tcp_hdr(skb); u32 seq = TCP_SKB_CB(skb)->seq; u32 ack = TCP_SKB_CB(skb)->ack_seq; @@ -3357,8 +3357,8 @@ static void tcp_ofo_queue(struct sock *sk) __skb_unlink(skb, &tp->out_of_order_queue); __skb_queue_tail(&sk->sk_receive_queue, skb); tp->rcv_nxt = TCP_SKB_CB(skb)->end_seq; - if (skb->h.th->fin) - tcp_fin(skb, sk, skb->h.th); + if (tcp_hdr(skb)->fin) + tcp_fin(skb, sk, tcp_hdr(skb)); } } @@ -3366,7 +3366,7 @@ static int tcp_prune_queue(struct sock *sk); static void tcp_data_queue(struct sock *sk, struct sk_buff *skb) { - struct tcphdr *th = skb->h.th; + struct tcphdr *th = tcp_hdr(skb); struct tcp_sock *tp = tcp_sk(sk); int eaten = -1; @@ -3605,7 +3605,7 @@ tcp_collapse(struct sock *sk, struct sk_buff_head *list, * - bloated or contains data before "start" or * overlaps to the next one. */ - if (!skb->h.th->syn && !skb->h.th->fin && + if (!tcp_hdr(skb)->syn && !tcp_hdr(skb)->fin && (tcp_win_from_space(skb->truesize) > skb->len || before(TCP_SKB_CB(skb)->seq, start) || (skb->next != tail && @@ -3616,7 +3616,7 @@ tcp_collapse(struct sock *sk, struct sk_buff_head *list, start = TCP_SKB_CB(skb)->end_seq; skb = skb->next; } - if (skb == tail || skb->h.th->syn || skb->h.th->fin) + if (skb == tail || tcp_hdr(skb)->syn || tcp_hdr(skb)->fin) return; while (before(start, end)) { @@ -3665,7 +3665,9 @@ tcp_collapse(struct sock *sk, struct sk_buff_head *list, __kfree_skb(skb); NET_INC_STATS_BH(LINUX_MIB_TCPRCVCOLLAPSED); skb = next; - if (skb == tail || skb->h.th->syn || skb->h.th->fin) + if (skb == tail || + tcp_hdr(skb)->syn || + tcp_hdr(skb)->fin) return; } } @@ -4072,7 +4074,7 @@ static int tcp_dma_try_early_copy(struct sock *sk, struct sk_buff *skb, int hlen tcp_rcv_space_adjust(sk); if ((tp->ucopy.len == 0) || - (tcp_flag_word(skb->h.th) & TCP_FLAG_PSH) || + (tcp_flag_word(tcp_hdr(skb)) & TCP_FLAG_PSH) || (atomic_read(&sk->sk_rmem_alloc) > (sk->sk_rcvbuf >> 1))) { tp->ucopy.wakeup = 1; sk->sk_data_ready(sk, 0); diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index c146a02f8495..617a5e4ca010 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -127,8 +127,8 @@ static inline __u32 tcp_v4_init_sequence(struct sk_buff *skb) { return secure_tcp_sequence_number(ip_hdr(skb)->daddr, ip_hdr(skb)->saddr, - skb->h.th->dest, - skb->h.th->source); + tcp_hdr(skb)->dest, + tcp_hdr(skb)->source); } int tcp_twsk_unique(struct sock *sk, struct sock *sktw, void *twp) @@ -499,7 +499,7 @@ out: void tcp_v4_send_check(struct sock *sk, int len, struct sk_buff *skb) { struct inet_sock *inet = inet_sk(sk); - struct tcphdr *th = skb->h.th; + struct tcphdr *th = tcp_hdr(skb); if (skb->ip_summed == CHECKSUM_PARTIAL) { th->check = ~tcp_v4_check(len, inet->saddr, @@ -522,7 +522,7 @@ int tcp_v4_gso_send_check(struct sk_buff *skb) return -EINVAL; iph = ip_hdr(skb); - th = skb->h.th; + th = tcp_hdr(skb); th->check = 0; th->check = ~tcp_v4_check(skb->len, iph->saddr, iph->daddr, 0); @@ -546,7 +546,7 @@ int tcp_v4_gso_send_check(struct sk_buff *skb) static void tcp_v4_send_reset(struct sock *sk, struct sk_buff *skb) { - struct tcphdr *th = skb->h.th; + struct tcphdr *th = tcp_hdr(skb); struct { struct tcphdr th; #ifdef CONFIG_TCP_MD5SIG @@ -622,7 +622,7 @@ static void tcp_v4_send_ack(struct tcp_timewait_sock *twsk, struct sk_buff *skb, u32 seq, u32 ack, u32 win, u32 ts) { - struct tcphdr *th = skb->h.th; + struct tcphdr *th = tcp_hdr(skb); struct { struct tcphdr th; __be32 opt[(TCPOLEN_TSTAMP_ALIGNED >> 2) @@ -745,7 +745,7 @@ static int tcp_v4_send_synack(struct sock *sk, struct request_sock *req, skb = tcp_make_synack(sk, dst, req); if (skb) { - struct tcphdr *th = skb->h.th; + struct tcphdr *th = tcp_hdr(skb); th->check = tcp_v4_check(skb->len, ireq->loc_addr, @@ -781,7 +781,7 @@ static void syn_flood_warning(struct sk_buff *skb) warntime = jiffies; printk(KERN_INFO "possible SYN flooding on port %d. Sending cookies.\n", - ntohs(skb->h.th->dest)); + ntohs(tcp_hdr(skb)->dest)); } } #endif @@ -1134,7 +1134,7 @@ static int tcp_v4_inbound_md5_hash(struct sock *sk, struct sk_buff *skb) __u8 *hash_location = NULL; struct tcp_md5sig_key *hash_expected; const struct iphdr *iph = ip_hdr(skb); - struct tcphdr *th = skb->h.th; + struct tcphdr *th = tcp_hdr(skb); int length = (th->doff << 2) - sizeof(struct tcphdr); int genhash; unsigned char *ptr; @@ -1327,7 +1327,7 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb) ireq->rmt_addr = saddr; ireq->opt = tcp_v4_save_options(sk, skb); if (!want_cookie) - TCP_ECN_create_request(req, skb->h.th); + TCP_ECN_create_request(req, tcp_hdr(skb)); if (want_cookie) { #ifdef CONFIG_SYN_COOKIES @@ -1375,7 +1375,7 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb) LIMIT_NETDEBUG(KERN_DEBUG "TCP: drop open " "request from %u.%u.%u.%u/%u\n", NIPQUAD(saddr), - ntohs(skb->h.th->source)); + ntohs(tcp_hdr(skb)->source)); dst_release(dst); goto drop_and_free; } @@ -1481,7 +1481,7 @@ exit: static struct sock *tcp_v4_hnd_req(struct sock *sk, struct sk_buff *skb) { - struct tcphdr *th = skb->h.th; + struct tcphdr *th = tcp_hdr(skb); const struct iphdr *iph = ip_hdr(skb); struct sock *nsk; struct request_sock **prev; @@ -1556,7 +1556,7 @@ int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb) if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */ TCP_CHECK_TIMER(sk); - if (tcp_rcv_established(sk, skb, skb->h.th, skb->len)) { + if (tcp_rcv_established(sk, skb, tcp_hdr(skb), skb->len)) { rsk = sk; goto reset; } @@ -1582,7 +1582,7 @@ int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb) } TCP_CHECK_TIMER(sk); - if (tcp_rcv_state_process(sk, skb, skb->h.th, skb->len)) { + if (tcp_rcv_state_process(sk, skb, tcp_hdr(skb), skb->len)) { rsk = sk; goto reset; } @@ -1625,7 +1625,7 @@ int tcp_v4_rcv(struct sk_buff *skb) if (!pskb_may_pull(skb, sizeof(struct tcphdr))) goto discard_it; - th = skb->h.th; + th = tcp_hdr(skb); if (th->doff < sizeof(struct tcphdr) / 4) goto bad_packet; @@ -1640,7 +1640,7 @@ int tcp_v4_rcv(struct sk_buff *skb) tcp_v4_checksum_init(skb))) goto bad_packet; - th = skb->h.th; + th = tcp_hdr(skb); iph = ip_hdr(skb); TCP_SKB_CB(skb)->seq = ntohl(th->seq); TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin + diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index 463d2b24d2db..a12b08fca5ad 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -453,7 +453,8 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct request_sock *req, newtp->rx_opt.snd_wscale = newtp->rx_opt.rcv_wscale = 0; newtp->window_clamp = min(newtp->window_clamp, 65535U); } - newtp->snd_wnd = ntohs(skb->h.th->window) << newtp->rx_opt.snd_wscale; + newtp->snd_wnd = (ntohs(tcp_hdr(skb)->window) << + newtp->rx_opt.snd_wscale); newtp->max_window = newtp->snd_wnd; if (newtp->rx_opt.tstamp_ok) { @@ -488,7 +489,7 @@ struct sock *tcp_check_req(struct sock *sk,struct sk_buff *skb, struct request_sock *req, struct request_sock **prev) { - struct tcphdr *th = skb->h.th; + const struct tcphdr *th = tcp_hdr(skb); __be32 flg = tcp_flag_word(th) & (TCP_FLAG_RST|TCP_FLAG_SYN|TCP_FLAG_ACK); int paws_reject = 0; struct tcp_options_received tmp_opt; @@ -710,8 +711,8 @@ int tcp_child_process(struct sock *parent, struct sock *child, int state = child->sk_state; if (!sock_owned_by_user(child)) { - ret = tcp_rcv_state_process(child, skb, skb->h.th, skb->len); - + ret = tcp_rcv_state_process(child, skb, tcp_hdr(skb), + skb->len); /* Wakeup parent, send SIGIO */ if (state == TCP_SYN_RECV && child->sk_state != state) parent->sk_data_ready(parent, 0); diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index f19f5fb361b5..29c53fbb2204 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -465,11 +465,12 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it, tcp_header_size += TCPOLEN_MD5SIG_ALIGNED; #endif - th = (struct tcphdr *) skb_push(skb, tcp_header_size); - skb->h.th = th; + skb_push(skb, tcp_header_size); + skb_reset_transport_header(skb); skb_set_owner_w(skb, sk); /* Build TCP header and checksum it. */ + th = tcp_hdr(skb); th->source = inet->sport; th->dest = inet->dport; th->seq = htonl(tcb->seq); @@ -524,7 +525,7 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it, tp->af_specific->calc_md5_hash(md5_hash_location, md5, sk, NULL, NULL, - skb->h.th, + tcp_hdr(skb), sk->sk_protocol, skb->len); } @@ -2128,8 +2129,10 @@ struct sk_buff * tcp_make_synack(struct sock *sk, struct dst_entry *dst, if (md5) tcp_header_size += TCPOLEN_MD5SIG_ALIGNED; #endif - skb->h.th = th = (struct tcphdr *) skb_push(skb, tcp_header_size); + skb_push(skb, tcp_header_size); + skb_reset_transport_header(skb); + th = tcp_hdr(skb); memset(th, 0, sizeof(struct tcphdr)); th->syn = 1; th->ack = 1; @@ -2183,7 +2186,7 @@ struct sk_buff * tcp_make_synack(struct sock *sk, struct dst_entry *dst, tp->af_specific->calc_md5_hash(md5_hash_location, md5, NULL, dst, req, - skb->h.th, sk->sk_protocol, + tcp_hdr(skb), sk->sk_protocol, skb->len); } #endif diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index c573353f21cd..4a55da079f5f 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -117,8 +117,8 @@ static __u32 tcp_v6_init_sequence(struct sk_buff *skb) { return secure_tcpv6_sequence_number(ipv6_hdr(skb)->daddr.s6_addr32, ipv6_hdr(skb)->saddr.s6_addr32, - skb->h.th->dest, - skb->h.th->source); + tcp_hdr(skb)->dest, + tcp_hdr(skb)->source); } static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr, @@ -509,7 +509,7 @@ static int tcp_v6_send_synack(struct sock *sk, struct request_sock *req, skb = tcp_make_synack(sk, dst, req); if (skb) { - struct tcphdr *th = skb->h.th; + struct tcphdr *th = tcp_hdr(skb); th->check = tcp_v6_check(th, skb->len, &treq->loc_addr, &treq->rmt_addr, @@ -838,7 +838,7 @@ static int tcp_v6_inbound_md5_hash (struct sock *sk, struct sk_buff *skb) __u8 *hash_location = NULL; struct tcp_md5sig_key *hash_expected; struct ipv6hdr *ip6h = ipv6_hdr(skb); - struct tcphdr *th = skb->h.th; + struct tcphdr *th = tcp_hdr(skb); int length = (th->doff << 2) - sizeof (*th); int genhash; u8 *ptr; @@ -946,7 +946,7 @@ static struct timewait_sock_ops tcp6_timewait_sock_ops = { static void tcp_v6_send_check(struct sock *sk, int len, struct sk_buff *skb) { struct ipv6_pinfo *np = inet6_sk(sk); - struct tcphdr *th = skb->h.th; + struct tcphdr *th = tcp_hdr(skb); if (skb->ip_summed == CHECKSUM_PARTIAL) { th->check = ~csum_ipv6_magic(&np->saddr, &np->daddr, len, IPPROTO_TCP, 0); @@ -967,7 +967,7 @@ static int tcp_v6_gso_send_check(struct sk_buff *skb) return -EINVAL; ipv6h = ipv6_hdr(skb); - th = skb->h.th; + th = tcp_hdr(skb); th->check = 0; th->check = ~csum_ipv6_magic(&ipv6h->saddr, &ipv6h->daddr, skb->len, @@ -979,7 +979,7 @@ static int tcp_v6_gso_send_check(struct sk_buff *skb) static void tcp_v6_send_reset(struct sock *sk, struct sk_buff *skb) { - struct tcphdr *th = skb->h.th, *t1; + struct tcphdr *th = tcp_hdr(skb), *t1; struct sk_buff *buff; struct flowi fl; int tot_len = sizeof(*th); @@ -1079,7 +1079,7 @@ static void tcp_v6_send_reset(struct sock *sk, struct sk_buff *skb) static void tcp_v6_send_ack(struct tcp_timewait_sock *tw, struct sk_buff *skb, u32 seq, u32 ack, u32 win, u32 ts) { - struct tcphdr *th = skb->h.th, *t1; + struct tcphdr *th = tcp_hdr(skb), *t1; struct sk_buff *buff; struct flowi fl; int tot_len = sizeof(struct tcphdr); @@ -1195,7 +1195,7 @@ static void tcp_v6_reqsk_send_ack(struct sk_buff *skb, struct request_sock *req) static struct sock *tcp_v6_hnd_req(struct sock *sk,struct sk_buff *skb) { struct request_sock *req, **prev; - const struct tcphdr *th = skb->h.th; + const struct tcphdr *th = tcp_hdr(skb); struct sock *nsk; /* Find possible connection requests. */ @@ -1275,7 +1275,7 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb) treq = inet6_rsk(req); ipv6_addr_copy(&treq->rmt_addr, &ipv6_hdr(skb)->saddr); ipv6_addr_copy(&treq->loc_addr, &ipv6_hdr(skb)->daddr); - TCP_ECN_create_request(req, skb->h.th); + TCP_ECN_create_request(req, tcp_hdr(skb)); treq->pktopts = NULL; if (ipv6_opt_accepted(sk, skb) || np->rxopt.bits.rxinfo || np->rxopt.bits.rxoinfo || @@ -1528,14 +1528,14 @@ out: static __sum16 tcp_v6_checksum_init(struct sk_buff *skb) { if (skb->ip_summed == CHECKSUM_COMPLETE) { - if (!tcp_v6_check(skb->h.th, skb->len, &ipv6_hdr(skb)->saddr, + if (!tcp_v6_check(tcp_hdr(skb), skb->len, &ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr, skb->csum)) { skb->ip_summed = CHECKSUM_UNNECESSARY; return 0; } } - skb->csum = ~csum_unfold(tcp_v6_check(skb->h.th, skb->len, + skb->csum = ~csum_unfold(tcp_v6_check(tcp_hdr(skb), skb->len, &ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr, 0)); @@ -1601,7 +1601,7 @@ static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb) if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */ TCP_CHECK_TIMER(sk); - if (tcp_rcv_established(sk, skb, skb->h.th, skb->len)) + if (tcp_rcv_established(sk, skb, tcp_hdr(skb), skb->len)) goto reset; TCP_CHECK_TIMER(sk); if (opt_skb) @@ -1632,7 +1632,7 @@ static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb) } TCP_CHECK_TIMER(sk); - if (tcp_rcv_state_process(sk, skb, skb->h.th, skb->len)) + if (tcp_rcv_state_process(sk, skb, tcp_hdr(skb), skb->len)) goto reset; TCP_CHECK_TIMER(sk); if (opt_skb) @@ -1698,7 +1698,7 @@ static int tcp_v6_rcv(struct sk_buff **pskb) if (!pskb_may_pull(skb, sizeof(struct tcphdr))) goto discard_it; - th = skb->h.th; + th = tcp_hdr(skb); if (th->doff < sizeof(struct tcphdr)/4) goto bad_packet; @@ -1709,7 +1709,7 @@ static int tcp_v6_rcv(struct sk_buff **pskb) tcp_v6_checksum_init(skb))) goto bad_packet; - th = skb->h.th; + th = tcp_hdr(skb); TCP_SKB_CB(skb)->seq = ntohl(th->seq); TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin + skb->len - th->doff*4); -- cgit v1.2.3 From 604763722c655c7e3f31ecf6f7b4dafcd26a7a15 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Mon, 9 Apr 2007 11:59:39 -0700 Subject: [NET]: Treat CHECKSUM_PARTIAL as CHECKSUM_UNNECESSARY When a transmitted packet is looped back directly, CHECKSUM_PARTIAL maps to the semantics of CHECKSUM_UNNECESSARY. Therefore we should treat it as such in the stack. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- include/linux/skbuff.h | 16 +++++++++++----- include/net/tcp.h | 2 +- include/net/udp.h | 2 +- net/core/netpoll.c | 2 +- net/ipv4/ipvs/ip_vs_core.c | 6 ++---- net/ipv4/tcp_input.c | 6 +++--- net/ipv4/tcp_ipv4.c | 3 +-- net/ipv4/udp.c | 4 ++-- net/ipv6/raw.c | 4 ++-- net/ipv6/tcp_ipv6.c | 3 +-- net/ipv6/udp.c | 4 ++-- net/sctp/input.c | 3 +-- net/sunrpc/socklib.c | 2 +- 13 files changed, 29 insertions(+), 28 deletions(-) (limited to 'include/net/tcp.h') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 910560e85561..c413afbe0b9c 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -32,10 +32,11 @@ #define HAVE_ALLOC_SKB /* For the drivers to know */ #define HAVE_ALIGNABLE_SKB /* Ditto 8) */ +/* Don't change this without changing skb_csum_unnecessary! */ #define CHECKSUM_NONE 0 -#define CHECKSUM_PARTIAL 1 -#define CHECKSUM_UNNECESSARY 2 -#define CHECKSUM_COMPLETE 3 +#define CHECKSUM_UNNECESSARY 1 +#define CHECKSUM_COMPLETE 2 +#define CHECKSUM_PARTIAL 3 #define SKB_DATA_ALIGN(X) (((X) + (SMP_CACHE_BYTES - 1)) & \ ~(SMP_CACHE_BYTES - 1)) @@ -1572,6 +1573,11 @@ static inline void __net_timestamp(struct sk_buff *skb) extern __sum16 __skb_checksum_complete_head(struct sk_buff *skb, int len); extern __sum16 __skb_checksum_complete(struct sk_buff *skb); +static inline int skb_csum_unnecessary(const struct sk_buff *skb) +{ + return skb->ip_summed & CHECKSUM_UNNECESSARY; +} + /** * skb_checksum_complete - Calculate checksum of an entire packet * @skb: packet to process @@ -1590,8 +1596,8 @@ extern __sum16 __skb_checksum_complete(struct sk_buff *skb); */ static inline unsigned int skb_checksum_complete(struct sk_buff *skb) { - return skb->ip_summed != CHECKSUM_UNNECESSARY && - __skb_checksum_complete(skb); + return skb_csum_unnecessary(skb) ? + 0 : __skb_checksum_complete(skb); } #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) diff --git a/include/net/tcp.h b/include/net/tcp.h index af9273204cfd..07f724e02f84 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -818,7 +818,7 @@ static inline __sum16 __tcp_checksum_complete(struct sk_buff *skb) static inline int tcp_checksum_complete(struct sk_buff *skb) { - return skb->ip_summed != CHECKSUM_UNNECESSARY && + return !skb_csum_unnecessary(skb) && __tcp_checksum_complete(skb); } diff --git a/include/net/udp.h b/include/net/udp.h index 4906ed7113e7..98755ebaf163 100644 --- a/include/net/udp.h +++ b/include/net/udp.h @@ -77,7 +77,7 @@ static inline __sum16 __udp_lib_checksum_complete(struct sk_buff *skb) static inline int udp_lib_checksum_complete(struct sk_buff *skb) { - return skb->ip_summed != CHECKSUM_UNNECESSARY && + return !skb_csum_unnecessary(skb) && __udp_lib_checksum_complete(skb); } diff --git a/net/core/netpoll.c b/net/core/netpoll.c index 1fb30c3528bc..b316435b0e2a 100644 --- a/net/core/netpoll.c +++ b/net/core/netpoll.c @@ -86,7 +86,7 @@ static __sum16 checksum_udp(struct sk_buff *skb, struct udphdr *uh, { __wsum psum; - if (uh->check == 0 || skb->ip_summed == CHECKSUM_UNNECESSARY) + if (uh->check == 0 || skb_csum_unnecessary(skb)) return 0; psum = csum_tcpudp_nofold(saddr, daddr, ulen, IPPROTO_UDP, 0); diff --git a/net/ipv4/ipvs/ip_vs_core.c b/net/ipv4/ipvs/ip_vs_core.c index 62cfbed317bf..f005a2f929f4 100644 --- a/net/ipv4/ipvs/ip_vs_core.c +++ b/net/ipv4/ipvs/ip_vs_core.c @@ -681,8 +681,7 @@ static int ip_vs_out_icmp(struct sk_buff **pskb, int *related) } /* Ensure the checksum is correct */ - if (skb->ip_summed != CHECKSUM_UNNECESSARY && - ip_vs_checksum_complete(skb, ihl)) { + if (!skb_csum_unnecessary(skb) && ip_vs_checksum_complete(skb, ihl)) { /* Failed checksum! */ IP_VS_DBG(1, "Forward ICMP: failed checksum from %d.%d.%d.%d!\n", NIPQUAD(iph->saddr)); @@ -921,8 +920,7 @@ ip_vs_in_icmp(struct sk_buff **pskb, int *related, unsigned int hooknum) verdict = NF_DROP; /* Ensure the checksum is correct */ - if (skb->ip_summed != CHECKSUM_UNNECESSARY && - ip_vs_checksum_complete(skb, ihl)) { + if (!skb_csum_unnecessary(skb) && ip_vs_checksum_complete(skb, ihl)) { /* Failed checksum! */ IP_VS_DBG(1, "Incoming ICMP: failed checksum from %d.%d.%d.%d!\n", NIPQUAD(iph->saddr)); diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 9c3b4c7a50ad..d1604f59d77e 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -4009,7 +4009,7 @@ static int tcp_copy_to_iovec(struct sock *sk, struct sk_buff *skb, int hlen) int err; local_bh_enable(); - if (skb->ip_summed==CHECKSUM_UNNECESSARY) + if (skb_csum_unnecessary(skb)) err = skb_copy_datagram_iovec(skb, hlen, tp->ucopy.iov, chunk); else err = skb_copy_and_csum_datagram_iovec(skb, hlen, @@ -4041,7 +4041,7 @@ static __sum16 __tcp_checksum_complete_user(struct sock *sk, struct sk_buff *skb static inline int tcp_checksum_complete_user(struct sock *sk, struct sk_buff *skb) { - return skb->ip_summed != CHECKSUM_UNNECESSARY && + return !skb_csum_unnecessary(skb) && __tcp_checksum_complete_user(sk, skb); } @@ -4059,7 +4059,7 @@ static int tcp_dma_try_early_copy(struct sock *sk, struct sk_buff *skb, int hlen if (!tp->ucopy.dma_chan && tp->ucopy.pinned_list) tp->ucopy.dma_chan = get_softnet_dma(); - if (tp->ucopy.dma_chan && skb->ip_summed == CHECKSUM_UNNECESSARY) { + if (tp->ucopy.dma_chan && skb_csum_unnecessary(skb)) { dma_cookie = dma_skb_copy_datagram_iovec(tp->ucopy.dma_chan, skb, hlen, tp->ucopy.iov, chunk, tp->ucopy.pinned_list); diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index a091a99ad263..5a3e7f839fc5 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -1638,8 +1638,7 @@ int tcp_v4_rcv(struct sk_buff *skb) * Packet length and doff are validated by header prediction, * provided case of th->doff==0 is eliminated. * So, we defer the checks. */ - if ((skb->ip_summed != CHECKSUM_UNNECESSARY && - tcp_v4_checksum_init(skb))) + if (!skb_csum_unnecessary(skb) && tcp_v4_checksum_init(skb)) goto bad_packet; th = tcp_hdr(skb); diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 5ad7a26e3091..cec0f2cc49b7 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -848,7 +848,7 @@ try_again: goto csum_copy_err; } - if (skb->ip_summed == CHECKSUM_UNNECESSARY) + if (skb_csum_unnecessary(skb)) err = skb_copy_datagram_iovec(skb, sizeof(struct udphdr), msg->msg_iov, copied ); else { @@ -1190,7 +1190,7 @@ static inline int udp4_csum_init(struct sk_buff *skb, struct udphdr *uh, proto, skb->csum)) skb->ip_summed = CHECKSUM_UNNECESSARY; } - if (skb->ip_summed != CHECKSUM_UNNECESSARY) + if (!skb_csum_unnecessary(skb)) skb->csum = csum_tcpudp_nofold(iph->saddr, iph->daddr, skb->len, proto, 0); /* Probably, we should checksum udp header (it should be in cache diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c index 2b3be68b70a7..f65fcd7704ca 100644 --- a/net/ipv6/raw.c +++ b/net/ipv6/raw.c @@ -368,7 +368,7 @@ int rawv6_rcv(struct sock *sk, struct sk_buff *skb) skb->len, inet->num, skb->csum)) skb->ip_summed = CHECKSUM_UNNECESSARY; } - if (skb->ip_summed != CHECKSUM_UNNECESSARY) + if (!skb_csum_unnecessary(skb)) skb->csum = ~csum_unfold(csum_ipv6_magic(&ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr, skb->len, @@ -421,7 +421,7 @@ static int rawv6_recvmsg(struct kiocb *iocb, struct sock *sk, msg->msg_flags |= MSG_TRUNC; } - if (skb->ip_summed==CHECKSUM_UNNECESSARY) { + if (skb_csum_unnecessary(skb)) { err = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, copied); } else if (msg->msg_flags&MSG_TRUNC) { if (__skb_checksum_complete(skb)) diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 7e824b97126d..2b668a6ae698 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -1707,8 +1707,7 @@ static int tcp_v6_rcv(struct sk_buff **pskb) if (!pskb_may_pull(skb, th->doff*4)) goto discard_it; - if ((skb->ip_summed != CHECKSUM_UNNECESSARY && - tcp_v6_checksum_init(skb))) + if (!skb_csum_unnecessary(skb) && tcp_v6_checksum_init(skb)) goto bad_packet; th = tcp_hdr(skb); diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 1e3dfb20b1cf..b083c09e3d2d 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -153,7 +153,7 @@ try_again: goto csum_copy_err; } - if (skb->ip_summed == CHECKSUM_UNNECESSARY) + if (skb_csum_unnecessary(skb)) err = skb_copy_datagram_iovec(skb, sizeof(struct udphdr), msg->msg_iov, copied ); else { @@ -397,7 +397,7 @@ static inline int udp6_csum_init(struct sk_buff *skb, struct udphdr *uh, skb->len, proto, skb->csum)) skb->ip_summed = CHECKSUM_UNNECESSARY; - if (skb->ip_summed != CHECKSUM_UNNECESSARY) + if (!skb_csum_unnecessary(skb)) skb->csum = ~csum_unfold(csum_ipv6_magic(&ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr, skb->len, proto, 0)); diff --git a/net/sctp/input.c b/net/sctp/input.c index 18b97eedc1fa..885109fb3dda 100644 --- a/net/sctp/input.c +++ b/net/sctp/input.c @@ -140,8 +140,7 @@ int sctp_rcv(struct sk_buff *skb) __skb_pull(skb, skb_transport_offset(skb)); if (skb->len < sizeof(struct sctphdr)) goto discard_it; - if ((skb->ip_summed != CHECKSUM_UNNECESSARY) && - (sctp_rcv_checksum(skb) < 0)) + if (!skb_csum_unnecessary(skb) && sctp_rcv_checksum(skb) < 0) goto discard_it; skb_pull(skb, sizeof(struct sctphdr)); diff --git a/net/sunrpc/socklib.c b/net/sunrpc/socklib.c index 634885b0c04d..1d377d1ab7f4 100644 --- a/net/sunrpc/socklib.c +++ b/net/sunrpc/socklib.c @@ -154,7 +154,7 @@ int csum_partial_copy_to_xdr(struct xdr_buf *xdr, struct sk_buff *skb) desc.offset = sizeof(struct udphdr); desc.count = skb->len - desc.offset; - if (skb->ip_summed == CHECKSUM_UNNECESSARY) + if (skb_csum_unnecessary(skb)) goto no_checksum; desc.csum = csum_partial(skb->data, desc.offset, skb->csum); -- cgit v1.2.3 From 4ac02bab77438b484a5cf855a002fb6a1d592894 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Fri, 20 Apr 2007 17:11:46 -0700 Subject: [TCP]: Uninline tcp_done(). The function is quite big and has several call sites and nothing to collapse by compiler optimization on inlining. Besides it's nicer to read in a in .c file. Signed-off-by: Andi Kleen Signed-off-by: David S. Miller --- include/net/tcp.h | 16 +--------------- net/ipv4/tcp.c | 17 +++++++++++++++++ 2 files changed, 18 insertions(+), 15 deletions(-) (limited to 'include/net/tcp.h') diff --git a/include/net/tcp.h b/include/net/tcp.h index 07f724e02f84..e79803353c83 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -921,21 +921,7 @@ static inline void tcp_set_state(struct sock *sk, int state) #endif } -static inline void tcp_done(struct sock *sk) -{ - if(sk->sk_state == TCP_SYN_SENT || sk->sk_state == TCP_SYN_RECV) - TCP_INC_STATS_BH(TCP_MIB_ATTEMPTFAILS); - - tcp_set_state(sk, TCP_CLOSE); - tcp_clear_xmit_timers(sk); - - sk->sk_shutdown = SHUTDOWN_MASK; - - if (!sock_flag(sk, SOCK_DEAD)) - sk->sk_state_change(sk); - else - inet_csk_destroy_sock(sk); -} +extern void tcp_done(struct sock *sk); static inline void tcp_sack_reset(struct tcp_options_received *rx_opt) { diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 4664733f139c..99ad52c00c96 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -2375,6 +2375,23 @@ void __tcp_put_md5sig_pool(void) EXPORT_SYMBOL(__tcp_put_md5sig_pool); #endif +void tcp_done(struct sock *sk) +{ + if(sk->sk_state == TCP_SYN_SENT || sk->sk_state == TCP_SYN_RECV) + TCP_INC_STATS_BH(TCP_MIB_ATTEMPTFAILS); + + tcp_set_state(sk, TCP_CLOSE); + tcp_clear_xmit_timers(sk); + + sk->sk_shutdown = SHUTDOWN_MASK; + + if (!sock_flag(sk, SOCK_DEAD)) + sk->sk_state_change(sk); + else + inet_csk_destroy_sock(sk); +} +EXPORT_SYMBOL_GPL(tcp_done); + extern void __skb_cb_too_small_for_tcp(int, int); extern struct tcp_congestion_ops tcp_reno; -- cgit v1.2.3 From 9e412ba7632f71259a53085665d4983b78257b7c Mon Sep 17 00:00:00 2001 From: Ilpo Järvinen Date: Fri, 20 Apr 2007 22:18:02 -0700 Subject: [TCP]: Sed magic converts func(sk, tp, ...) -> func(sk, ...) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This is (mostly) automated change using magic: sed -e '/struct sock \*sk/ N' -e '/struct sock \*sk/ N' -e '/struct sock \*sk/ N' -e '/struct sock \*sk/ N' -e 's|struct sock \*sk,[\n\t ]*struct tcp_sock \*tp\([^{]*\n{\n\)| struct sock \*sk\1\tstruct tcp_sock *tp = tcp_sk(sk);\n|g' -e 's|struct sock \*sk, struct tcp_sock \*tp| struct sock \*sk|g' -e 's|sk, tp\([^-]\)|sk\1|g' Fixed four unused variable (tp) warnings that were introduced. In addition, manually added newlines after local variables and tweaked function arguments positioning. $ gcc --version gcc (GCC) 4.1.1 20060525 (Red Hat 4.1.1-1) ... $ codiff -fV built-in.o.old built-in.o.new net/ipv4/route.c: rt_cache_flush | +14 1 function changed, 14 bytes added net/ipv4/tcp.c: tcp_setsockopt | -5 tcp_sendpage | -25 tcp_sendmsg | -16 3 functions changed, 46 bytes removed net/ipv4/tcp_input.c: tcp_try_undo_recovery | +3 tcp_try_undo_dsack | +2 tcp_mark_head_lost | -12 tcp_ack | -15 tcp_event_data_recv | -32 tcp_rcv_state_process | -10 tcp_rcv_established | +1 7 functions changed, 6 bytes added, 69 bytes removed, diff: -63 net/ipv4/tcp_output.c: update_send_head | -9 tcp_transmit_skb | +19 tcp_cwnd_validate | +1 tcp_write_wakeup | -17 __tcp_push_pending_frames | -25 tcp_push_one | -8 tcp_send_fin | -4 7 functions changed, 20 bytes added, 63 bytes removed, diff: -43 built-in.o.new: 18 functions changed, 40 bytes added, 178 bytes removed, diff: -138 Signed-off-by: Ilpo Järvinen Signed-off-by: David S. Miller --- include/net/tcp.h | 25 +++++---- include/net/tcp_ecn.h | 11 ++-- net/ipv4/tcp.c | 39 +++++++------- net/ipv4/tcp_input.c | 145 ++++++++++++++++++++++++++++++-------------------- net/ipv4/tcp_output.c | 54 ++++++++++--------- 5 files changed, 158 insertions(+), 116 deletions(-) (limited to 'include/net/tcp.h') diff --git a/include/net/tcp.h b/include/net/tcp.h index e79803353c83..43910fe3c448 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -420,9 +420,9 @@ extern __u32 cookie_v4_init_sequence(struct sock *sk, struct sk_buff *skb, /* tcp_output.c */ -extern void __tcp_push_pending_frames(struct sock *sk, struct tcp_sock *tp, - unsigned int cur_mss, int nonagle); -extern int tcp_may_send_now(struct sock *sk, struct tcp_sock *tp); +extern void __tcp_push_pending_frames(struct sock *sk, unsigned int cur_mss, + int nonagle); +extern int tcp_may_send_now(struct sock *sk); extern int tcp_retransmit_skb(struct sock *, struct sk_buff *); extern void tcp_xmit_retransmit_queue(struct sock *); extern void tcp_simple_retransmit(struct sock *); @@ -479,8 +479,10 @@ static inline void tcp_fast_path_on(struct tcp_sock *tp) __tcp_fast_path_on(tp, tp->snd_wnd >> tp->rx_opt.snd_wscale); } -static inline void tcp_fast_path_check(struct sock *sk, struct tcp_sock *tp) +static inline void tcp_fast_path_check(struct sock *sk) { + struct tcp_sock *tp = tcp_sk(sk); + if (skb_queue_empty(&tp->out_of_order_queue) && tp->rcv_wnd && atomic_read(&sk->sk_rmem_alloc) < sk->sk_rcvbuf && @@ -591,10 +593,10 @@ static inline void tcp_dec_pcount_approx(__u32 *count, } } -static inline void tcp_packets_out_inc(struct sock *sk, - struct tcp_sock *tp, +static inline void tcp_packets_out_inc(struct sock *sk, const struct sk_buff *skb) { + struct tcp_sock *tp = tcp_sk(sk); int orig = tp->packets_out; tp->packets_out += tcp_skb_pcount(skb); @@ -778,18 +780,21 @@ static inline void tcp_minshall_update(struct tcp_sock *tp, int mss, tp->snd_sml = TCP_SKB_CB(skb)->end_seq; } -static inline void tcp_check_probe_timer(struct sock *sk, struct tcp_sock *tp) +static inline void tcp_check_probe_timer(struct sock *sk) { + struct tcp_sock *tp = tcp_sk(sk); const struct inet_connection_sock *icsk = inet_csk(sk); + if (!tp->packets_out && !icsk->icsk_pending) inet_csk_reset_xmit_timer(sk, ICSK_TIME_PROBE0, icsk->icsk_rto, TCP_RTO_MAX); } -static inline void tcp_push_pending_frames(struct sock *sk, - struct tcp_sock *tp) +static inline void tcp_push_pending_frames(struct sock *sk) { - __tcp_push_pending_frames(sk, tp, tcp_current_mss(sk, 1), tp->nonagle); + struct tcp_sock *tp = tcp_sk(sk); + + __tcp_push_pending_frames(sk, tcp_current_mss(sk, 1), tp->nonagle); } static inline void tcp_init_wl(struct tcp_sock *tp, u32 ack, u32 seq) diff --git a/include/net/tcp_ecn.h b/include/net/tcp_ecn.h index b5f7c6ac0880..89eb3e05116d 100644 --- a/include/net/tcp_ecn.h +++ b/include/net/tcp_ecn.h @@ -27,9 +27,10 @@ static inline void TCP_ECN_send_synack(struct tcp_sock *tp, TCP_SKB_CB(skb)->flags &= ~TCPCB_FLAG_ECE; } -static inline void TCP_ECN_send_syn(struct sock *sk, struct tcp_sock *tp, - struct sk_buff *skb) +static inline void TCP_ECN_send_syn(struct sock *sk, struct sk_buff *skb) { + struct tcp_sock *tp = tcp_sk(sk); + tp->ecn_flags = 0; if (sysctl_tcp_ecn) { TCP_SKB_CB(skb)->flags |= TCPCB_FLAG_ECE|TCPCB_FLAG_CWR; @@ -44,9 +45,11 @@ TCP_ECN_make_synack(struct request_sock *req, struct tcphdr *th) th->ece = 1; } -static inline void TCP_ECN_send(struct sock *sk, struct tcp_sock *tp, - struct sk_buff *skb, int tcp_header_len) +static inline void TCP_ECN_send(struct sock *sk, struct sk_buff *skb, + int tcp_header_len) { + struct tcp_sock *tp = tcp_sk(sk); + if (tp->ecn_flags & TCP_ECN_OK) { /* Not-retransmitted data segment: set ECT and inject CWR. */ if (skb->len != tcp_header_len && diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 99ad52c00c96..2cf9a898ce50 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -460,9 +460,9 @@ static inline int forced_push(struct tcp_sock *tp) return after(tp->write_seq, tp->pushed_seq + (tp->max_window >> 1)); } -static inline void skb_entail(struct sock *sk, struct tcp_sock *tp, - struct sk_buff *skb) +static inline void skb_entail(struct sock *sk, struct sk_buff *skb) { + struct tcp_sock *tp = tcp_sk(sk); struct tcp_skb_cb *tcb = TCP_SKB_CB(skb); skb->csum = 0; @@ -486,15 +486,17 @@ static inline void tcp_mark_urg(struct tcp_sock *tp, int flags, } } -static inline void tcp_push(struct sock *sk, struct tcp_sock *tp, int flags, - int mss_now, int nonagle) +static inline void tcp_push(struct sock *sk, int flags, int mss_now, + int nonagle) { + struct tcp_sock *tp = tcp_sk(sk); + if (tcp_send_head(sk)) { struct sk_buff *skb = tcp_write_queue_tail(sk); if (!(flags & MSG_MORE) || forced_push(tp)) tcp_mark_push(tp, skb); tcp_mark_urg(tp, flags, skb); - __tcp_push_pending_frames(sk, tp, mss_now, + __tcp_push_pending_frames(sk, mss_now, (flags & MSG_MORE) ? TCP_NAGLE_CORK : nonagle); } } @@ -540,7 +542,7 @@ new_segment: if (!skb) goto wait_for_memory; - skb_entail(sk, tp, skb); + skb_entail(sk, skb); copy = size_goal; } @@ -586,7 +588,7 @@ new_segment: if (forced_push(tp)) { tcp_mark_push(tp, skb); - __tcp_push_pending_frames(sk, tp, mss_now, TCP_NAGLE_PUSH); + __tcp_push_pending_frames(sk, mss_now, TCP_NAGLE_PUSH); } else if (skb == tcp_send_head(sk)) tcp_push_one(sk, mss_now); continue; @@ -595,7 +597,7 @@ wait_for_sndbuf: set_bit(SOCK_NOSPACE, &sk->sk_socket->flags); wait_for_memory: if (copied) - tcp_push(sk, tp, flags & ~MSG_MORE, mss_now, TCP_NAGLE_PUSH); + tcp_push(sk, flags & ~MSG_MORE, mss_now, TCP_NAGLE_PUSH); if ((err = sk_stream_wait_memory(sk, &timeo)) != 0) goto do_error; @@ -606,7 +608,7 @@ wait_for_memory: out: if (copied) - tcp_push(sk, tp, flags, mss_now, tp->nonagle); + tcp_push(sk, flags, mss_now, tp->nonagle); return copied; do_error: @@ -637,8 +639,9 @@ ssize_t tcp_sendpage(struct socket *sock, struct page *page, int offset, #define TCP_PAGE(sk) (sk->sk_sndmsg_page) #define TCP_OFF(sk) (sk->sk_sndmsg_off) -static inline int select_size(struct sock *sk, struct tcp_sock *tp) +static inline int select_size(struct sock *sk) { + struct tcp_sock *tp = tcp_sk(sk); int tmp = tp->mss_cache; if (sk->sk_route_caps & NETIF_F_SG) { @@ -714,7 +717,7 @@ new_segment: if (!sk_stream_memory_free(sk)) goto wait_for_sndbuf; - skb = sk_stream_alloc_pskb(sk, select_size(sk, tp), + skb = sk_stream_alloc_pskb(sk, select_size(sk), 0, sk->sk_allocation); if (!skb) goto wait_for_memory; @@ -725,7 +728,7 @@ new_segment: if (sk->sk_route_caps & NETIF_F_ALL_CSUM) skb->ip_summed = CHECKSUM_PARTIAL; - skb_entail(sk, tp, skb); + skb_entail(sk, skb); copy = size_goal; } @@ -830,7 +833,7 @@ new_segment: if (forced_push(tp)) { tcp_mark_push(tp, skb); - __tcp_push_pending_frames(sk, tp, mss_now, TCP_NAGLE_PUSH); + __tcp_push_pending_frames(sk, mss_now, TCP_NAGLE_PUSH); } else if (skb == tcp_send_head(sk)) tcp_push_one(sk, mss_now); continue; @@ -839,7 +842,7 @@ wait_for_sndbuf: set_bit(SOCK_NOSPACE, &sk->sk_socket->flags); wait_for_memory: if (copied) - tcp_push(sk, tp, flags & ~MSG_MORE, mss_now, TCP_NAGLE_PUSH); + tcp_push(sk, flags & ~MSG_MORE, mss_now, TCP_NAGLE_PUSH); if ((err = sk_stream_wait_memory(sk, &timeo)) != 0) goto do_error; @@ -851,7 +854,7 @@ wait_for_memory: out: if (copied) - tcp_push(sk, tp, flags, mss_now, tp->nonagle); + tcp_push(sk, flags, mss_now, tp->nonagle); TCP_CHECK_TIMER(sk); release_sock(sk); return copied; @@ -1389,7 +1392,7 @@ do_prequeue: skip_copy: if (tp->urg_data && after(tp->copied_seq, tp->urg_seq)) { tp->urg_data = 0; - tcp_fast_path_check(sk, tp); + tcp_fast_path_check(sk); } if (used + offset < skb->len) continue; @@ -1830,7 +1833,7 @@ static int do_tcp_setsockopt(struct sock *sk, int level, * for currently queued segments. */ tp->nonagle |= TCP_NAGLE_OFF|TCP_NAGLE_PUSH; - tcp_push_pending_frames(sk, tp); + tcp_push_pending_frames(sk); } else { tp->nonagle &= ~TCP_NAGLE_OFF; } @@ -1854,7 +1857,7 @@ static int do_tcp_setsockopt(struct sock *sk, int level, tp->nonagle &= ~TCP_NAGLE_CORK; if (tp->nonagle&TCP_NAGLE_OFF) tp->nonagle |= TCP_NAGLE_PUSH; - tcp_push_pending_frames(sk, tp); + tcp_push_pending_frames(sk); } break; diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 2fbfc2e4209c..633389390788 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -235,9 +235,9 @@ static void tcp_fixup_sndbuf(struct sock *sk) */ /* Slow part of check#2. */ -static int __tcp_grow_window(const struct sock *sk, struct tcp_sock *tp, - const struct sk_buff *skb) +static int __tcp_grow_window(const struct sock *sk, const struct sk_buff *skb) { + struct tcp_sock *tp = tcp_sk(sk); /* Optimize this! */ int truesize = tcp_win_from_space(skb->truesize)/2; int window = tcp_win_from_space(sysctl_tcp_rmem[2])/2; @@ -252,9 +252,11 @@ static int __tcp_grow_window(const struct sock *sk, struct tcp_sock *tp, return 0; } -static void tcp_grow_window(struct sock *sk, struct tcp_sock *tp, +static void tcp_grow_window(struct sock *sk, struct sk_buff *skb) { + struct tcp_sock *tp = tcp_sk(sk); + /* Check #1 */ if (tp->rcv_ssthresh < tp->window_clamp && (int)tp->rcv_ssthresh < tcp_space(sk) && @@ -267,7 +269,7 @@ static void tcp_grow_window(struct sock *sk, struct tcp_sock *tp, if (tcp_win_from_space(skb->truesize) <= skb->len) incr = 2*tp->advmss; else - incr = __tcp_grow_window(sk, tp, skb); + incr = __tcp_grow_window(sk, skb); if (incr) { tp->rcv_ssthresh = min(tp->rcv_ssthresh + incr, tp->window_clamp); @@ -330,8 +332,9 @@ static void tcp_init_buffer_space(struct sock *sk) } /* 5. Recalculate window clamp after socket hit its memory bounds. */ -static void tcp_clamp_window(struct sock *sk, struct tcp_sock *tp) +static void tcp_clamp_window(struct sock *sk) { + struct tcp_sock *tp = tcp_sk(sk); struct inet_connection_sock *icsk = inet_csk(sk); icsk->icsk_ack.quick = 0; @@ -503,8 +506,9 @@ new_measure: * each ACK we send, he increments snd_cwnd and transmits more of his * queue. -DaveM */ -static void tcp_event_data_recv(struct sock *sk, struct tcp_sock *tp, struct sk_buff *skb) +static void tcp_event_data_recv(struct sock *sk, struct sk_buff *skb) { + struct tcp_sock *tp = tcp_sk(sk); struct inet_connection_sock *icsk = inet_csk(sk); u32 now; @@ -545,7 +549,7 @@ static void tcp_event_data_recv(struct sock *sk, struct tcp_sock *tp, struct sk_ TCP_ECN_check_ce(tp, skb); if (skb->len >= 128) - tcp_grow_window(sk, tp, skb); + tcp_grow_window(sk, skb); } /* Called to compute a smoothed rtt estimate. The data fed to this @@ -1541,8 +1545,10 @@ static inline int tcp_skb_timedout(struct sock *sk, struct sk_buff *skb) return (tcp_time_stamp - TCP_SKB_CB(skb)->when > inet_csk(sk)->icsk_rto); } -static inline int tcp_head_timedout(struct sock *sk, struct tcp_sock *tp) +static inline int tcp_head_timedout(struct sock *sk) { + struct tcp_sock *tp = tcp_sk(sk); + return tp->packets_out && tcp_skb_timedout(sk, tcp_write_queue_head(sk)); } @@ -1640,8 +1646,9 @@ static inline int tcp_head_timedout(struct sock *sk, struct tcp_sock *tp) * Main question: may we further continue forward transmission * with the same cwnd? */ -static int tcp_time_to_recover(struct sock *sk, struct tcp_sock *tp) +static int tcp_time_to_recover(struct sock *sk) { + struct tcp_sock *tp = tcp_sk(sk); __u32 packets_out; /* Do not perform any recovery during FRTO algorithm */ @@ -1659,7 +1666,7 @@ static int tcp_time_to_recover(struct sock *sk, struct tcp_sock *tp) /* Trick#3 : when we use RFC2988 timer restart, fast * retransmit can be triggered by timeout of queue head. */ - if (tcp_head_timedout(sk, tp)) + if (tcp_head_timedout(sk)) return 1; /* Trick#4: It is still not OK... But will it be useful to delay @@ -1668,7 +1675,7 @@ static int tcp_time_to_recover(struct sock *sk, struct tcp_sock *tp) packets_out = tp->packets_out; if (packets_out <= tp->reordering && tp->sacked_out >= max_t(__u32, packets_out/2, sysctl_tcp_reordering) && - !tcp_may_send_now(sk, tp)) { + !tcp_may_send_now(sk)) { /* We have nothing to send. This connection is limited * either by receiver window or by application. */ @@ -1708,8 +1715,10 @@ static void tcp_add_reno_sack(struct sock *sk) /* Account for ACK, ACKing some data in Reno Recovery phase. */ -static void tcp_remove_reno_sacks(struct sock *sk, struct tcp_sock *tp, int acked) +static void tcp_remove_reno_sacks(struct sock *sk, int acked) { + struct tcp_sock *tp = tcp_sk(sk); + if (acked > 0) { /* One ACK acked hole. The rest eat duplicate ACKs. */ if (acked-1 >= tp->sacked_out) @@ -1728,9 +1737,10 @@ static inline void tcp_reset_reno_sack(struct tcp_sock *tp) } /* Mark head of queue up as lost. */ -static void tcp_mark_head_lost(struct sock *sk, struct tcp_sock *tp, +static void tcp_mark_head_lost(struct sock *sk, int packets, u32 high_seq) { + struct tcp_sock *tp = tcp_sk(sk); struct sk_buff *skb; int cnt; @@ -1771,15 +1781,17 @@ static void tcp_mark_head_lost(struct sock *sk, struct tcp_sock *tp, /* Account newly detected lost packet(s) */ -static void tcp_update_scoreboard(struct sock *sk, struct tcp_sock *tp) +static void tcp_update_scoreboard(struct sock *sk) { + struct tcp_sock *tp = tcp_sk(sk); + if (IsFack(tp)) { int lost = tp->fackets_out - tp->reordering; if (lost <= 0) lost = 1; - tcp_mark_head_lost(sk, tp, lost, tp->high_seq); + tcp_mark_head_lost(sk, lost, tp->high_seq); } else { - tcp_mark_head_lost(sk, tp, 1, tp->high_seq); + tcp_mark_head_lost(sk, 1, tp->high_seq); } /* New heuristics: it is possible only after we switched @@ -1787,7 +1799,7 @@ static void tcp_update_scoreboard(struct sock *sk, struct tcp_sock *tp) * Hence, we can detect timed out packets during fast * retransmit without falling to slow start. */ - if (!IsReno(tp) && tcp_head_timedout(sk, tp)) { + if (!IsReno(tp) && tcp_head_timedout(sk)) { struct sk_buff *skb; skb = tp->scoreboard_skb_hint ? tp->scoreboard_skb_hint @@ -1867,9 +1879,11 @@ static inline int tcp_packet_delayed(struct tcp_sock *tp) /* Undo procedures. */ #if FASTRETRANS_DEBUG > 1 -static void DBGUNDO(struct sock *sk, struct tcp_sock *tp, const char *msg) +static void DBGUNDO(struct sock *sk, const char *msg) { + struct tcp_sock *tp = tcp_sk(sk); struct inet_sock *inet = inet_sk(sk); + printk(KERN_DEBUG "Undo %s %u.%u.%u.%u/%u c%u l%u ss%u/%u p%u\n", msg, NIPQUAD(inet->daddr), ntohs(inet->dport), @@ -1915,13 +1929,15 @@ static inline int tcp_may_undo(struct tcp_sock *tp) } /* People celebrate: "We love our President!" */ -static int tcp_try_undo_recovery(struct sock *sk, struct tcp_sock *tp) +static int tcp_try_undo_recovery(struct sock *sk) { + struct tcp_sock *tp = tcp_sk(sk); + if (tcp_may_undo(tp)) { /* Happy end! We did not retransmit anything * or our original transmission succeeded. */ - DBGUNDO(sk, tp, inet_csk(sk)->icsk_ca_state == TCP_CA_Loss ? "loss" : "retrans"); + DBGUNDO(sk, inet_csk(sk)->icsk_ca_state == TCP_CA_Loss ? "loss" : "retrans"); tcp_undo_cwr(sk, 1); if (inet_csk(sk)->icsk_ca_state == TCP_CA_Loss) NET_INC_STATS_BH(LINUX_MIB_TCPLOSSUNDO); @@ -1941,10 +1957,12 @@ static int tcp_try_undo_recovery(struct sock *sk, struct tcp_sock *tp) } /* Try to undo cwnd reduction, because D-SACKs acked all retransmitted data */ -static void tcp_try_undo_dsack(struct sock *sk, struct tcp_sock *tp) +static void tcp_try_undo_dsack(struct sock *sk) { + struct tcp_sock *tp = tcp_sk(sk); + if (tp->undo_marker && !tp->undo_retrans) { - DBGUNDO(sk, tp, "D-SACK"); + DBGUNDO(sk, "D-SACK"); tcp_undo_cwr(sk, 1); tp->undo_marker = 0; NET_INC_STATS_BH(LINUX_MIB_TCPDSACKUNDO); @@ -1953,9 +1971,9 @@ static void tcp_try_undo_dsack(struct sock *sk, struct tcp_sock *tp) /* Undo during fast recovery after partial ACK. */ -static int tcp_try_undo_partial(struct sock *sk, struct tcp_sock *tp, - int acked) +static int tcp_try_undo_partial(struct sock *sk, int acked) { + struct tcp_sock *tp = tcp_sk(sk); /* Partial ACK arrived. Force Hoe's retransmit. */ int failed = IsReno(tp) || tp->fackets_out>tp->reordering; @@ -1968,7 +1986,7 @@ static int tcp_try_undo_partial(struct sock *sk, struct tcp_sock *tp, tcp_update_reordering(sk, tcp_fackets_out(tp) + acked, 1); - DBGUNDO(sk, tp, "Hoe"); + DBGUNDO(sk, "Hoe"); tcp_undo_cwr(sk, 0); NET_INC_STATS_BH(LINUX_MIB_TCPPARTIALUNDO); @@ -1982,8 +2000,10 @@ static int tcp_try_undo_partial(struct sock *sk, struct tcp_sock *tp, } /* Undo during loss recovery after partial ACK. */ -static int tcp_try_undo_loss(struct sock *sk, struct tcp_sock *tp) +static int tcp_try_undo_loss(struct sock *sk) { + struct tcp_sock *tp = tcp_sk(sk); + if (tcp_may_undo(tp)) { struct sk_buff *skb; tcp_for_write_queue(skb, sk) { @@ -1994,7 +2014,7 @@ static int tcp_try_undo_loss(struct sock *sk, struct tcp_sock *tp) clear_all_retrans_hints(tp); - DBGUNDO(sk, tp, "partial loss"); + DBGUNDO(sk, "partial loss"); tp->lost_out = 0; tp->left_out = tp->sacked_out; tcp_undo_cwr(sk, 1); @@ -2016,8 +2036,10 @@ static inline void tcp_complete_cwr(struct sock *sk) tcp_ca_event(sk, CA_EVENT_COMPLETE_CWR); } -static void tcp_try_to_open(struct sock *sk, struct tcp_sock *tp, int flag) +static void tcp_try_to_open(struct sock *sk, int flag) { + struct tcp_sock *tp = tcp_sk(sk); + tp->left_out = tp->sacked_out; if (tp->retrans_out == 0) @@ -2111,7 +2133,7 @@ tcp_fastretrans_alert(struct sock *sk, u32 prior_snd_una, before(tp->snd_una, tp->high_seq) && icsk->icsk_ca_state != TCP_CA_Open && tp->fackets_out > tp->reordering) { - tcp_mark_head_lost(sk, tp, tp->fackets_out-tp->reordering, tp->high_seq); + tcp_mark_head_lost(sk, tp->fackets_out-tp->reordering, tp->high_seq); NET_INC_STATS_BH(LINUX_MIB_TCPLOSS); } @@ -2127,7 +2149,7 @@ tcp_fastretrans_alert(struct sock *sk, u32 prior_snd_una, switch (icsk->icsk_ca_state) { case TCP_CA_Loss: icsk->icsk_retransmits = 0; - if (tcp_try_undo_recovery(sk, tp)) + if (tcp_try_undo_recovery(sk)) return; break; @@ -2141,7 +2163,7 @@ tcp_fastretrans_alert(struct sock *sk, u32 prior_snd_una, break; case TCP_CA_Disorder: - tcp_try_undo_dsack(sk, tp); + tcp_try_undo_dsack(sk); if (!tp->undo_marker || /* For SACK case do not Open to allow to undo * catching for all duplicate ACKs. */ @@ -2154,7 +2176,7 @@ tcp_fastretrans_alert(struct sock *sk, u32 prior_snd_una, case TCP_CA_Recovery: if (IsReno(tp)) tcp_reset_reno_sack(tp); - if (tcp_try_undo_recovery(sk, tp)) + if (tcp_try_undo_recovery(sk)) return; tcp_complete_cwr(sk); break; @@ -2170,14 +2192,14 @@ tcp_fastretrans_alert(struct sock *sk, u32 prior_snd_una, } else { int acked = prior_packets - tp->packets_out; if (IsReno(tp)) - tcp_remove_reno_sacks(sk, tp, acked); - is_dupack = tcp_try_undo_partial(sk, tp, acked); + tcp_remove_reno_sacks(sk, acked); + is_dupack = tcp_try_undo_partial(sk, acked); } break; case TCP_CA_Loss: if (flag&FLAG_DATA_ACKED) icsk->icsk_retransmits = 0; - if (!tcp_try_undo_loss(sk, tp)) { + if (!tcp_try_undo_loss(sk)) { tcp_moderate_cwnd(tp); tcp_xmit_retransmit_queue(sk); return; @@ -2194,10 +2216,10 @@ tcp_fastretrans_alert(struct sock *sk, u32 prior_snd_una, } if (icsk->icsk_ca_state == TCP_CA_Disorder) - tcp_try_undo_dsack(sk, tp); + tcp_try_undo_dsack(sk); - if (!tcp_time_to_recover(sk, tp)) { - tcp_try_to_open(sk, tp, flag); + if (!tcp_time_to_recover(sk)) { + tcp_try_to_open(sk, flag); return; } @@ -2236,8 +2258,8 @@ tcp_fastretrans_alert(struct sock *sk, u32 prior_snd_una, tcp_set_ca_state(sk, TCP_CA_Recovery); } - if (is_dupack || tcp_head_timedout(sk, tp)) - tcp_update_scoreboard(sk, tp); + if (is_dupack || tcp_head_timedout(sk)) + tcp_update_scoreboard(sk); tcp_cwnd_down(sk); tcp_xmit_retransmit_queue(sk); } @@ -2313,8 +2335,10 @@ static void tcp_cong_avoid(struct sock *sk, u32 ack, u32 rtt, * RFC2988 recommends to restart timer to now+rto. */ -static void tcp_ack_packets_out(struct sock *sk, struct tcp_sock *tp) +static void tcp_ack_packets_out(struct sock *sk) { + struct tcp_sock *tp = tcp_sk(sk); + if (!tp->packets_out) { inet_csk_clear_xmit_timer(sk, ICSK_TIME_RETRANS); } else { @@ -2471,7 +2495,7 @@ static int tcp_clean_rtx_queue(struct sock *sk, __s32 *seq_rtt_p) if (acked&FLAG_ACKED) { tcp_ack_update_rtt(sk, acked, seq_rtt); - tcp_ack_packets_out(sk, tp); + tcp_ack_packets_out(sk); if (rtt_sample && !(acked & FLAG_RETRANS_DATA_ACKED)) (*rtt_sample)(sk, tcp_usrtt(&tv)); @@ -2556,9 +2580,10 @@ static inline int tcp_may_update_window(const struct tcp_sock *tp, const u32 ack * Window update algorithm, described in RFC793/RFC1122 (used in linux-2.2 * and in FreeBSD. NetBSD's one is even worse.) is wrong. */ -static int tcp_ack_update_window(struct sock *sk, struct tcp_sock *tp, - struct sk_buff *skb, u32 ack, u32 ack_seq) +static int tcp_ack_update_window(struct sock *sk, struct sk_buff *skb, u32 ack, + u32 ack_seq) { + struct tcp_sock *tp = tcp_sk(sk); int flag = 0; u32 nwin = ntohs(tcp_hdr(skb)->window); @@ -2576,7 +2601,7 @@ static int tcp_ack_update_window(struct sock *sk, struct tcp_sock *tp, * fast path is recovered for sending TCP. */ tp->pred_flags = 0; - tcp_fast_path_check(sk, tp); + tcp_fast_path_check(sk); if (nwin > tp->max_window) { tp->max_window = nwin; @@ -2762,7 +2787,7 @@ static int tcp_ack(struct sock *sk, struct sk_buff *skb, int flag) else NET_INC_STATS_BH(LINUX_MIB_TCPPUREACKS); - flag |= tcp_ack_update_window(sk, tp, skb, ack, ack_seq); + flag |= tcp_ack_update_window(sk, skb, ack, ack_seq); if (TCP_SKB_CB(skb)->sacked) flag |= tcp_sacktag_write_queue(sk, skb, prior_snd_una); @@ -3426,7 +3451,7 @@ queue_and_out: } tp->rcv_nxt = TCP_SKB_CB(skb)->end_seq; if (skb->len) - tcp_event_data_recv(sk, tp, skb); + tcp_event_data_recv(sk, skb); if (th->fin) tcp_fin(skb, sk, th); @@ -3443,7 +3468,7 @@ queue_and_out: if (tp->rx_opt.num_sacks) tcp_sack_remove(tp); - tcp_fast_path_check(sk, tp); + tcp_fast_path_check(sk); if (eaten > 0) __kfree_skb(skb); @@ -3734,7 +3759,7 @@ static int tcp_prune_queue(struct sock *sk) NET_INC_STATS_BH(LINUX_MIB_PRUNECALLED); if (atomic_read(&sk->sk_rmem_alloc) >= sk->sk_rcvbuf) - tcp_clamp_window(sk, tp); + tcp_clamp_window(sk); else if (tcp_memory_pressure) tp->rcv_ssthresh = min(tp->rcv_ssthresh, 4U * tp->advmss); @@ -3803,8 +3828,10 @@ void tcp_cwnd_application_limited(struct sock *sk) tp->snd_cwnd_stamp = tcp_time_stamp; } -static int tcp_should_expand_sndbuf(struct sock *sk, struct tcp_sock *tp) +static int tcp_should_expand_sndbuf(struct sock *sk) { + struct tcp_sock *tp = tcp_sk(sk); + /* If the user specified a specific send buffer setting, do * not modify it. */ @@ -3836,7 +3863,7 @@ static void tcp_new_space(struct sock *sk) { struct tcp_sock *tp = tcp_sk(sk); - if (tcp_should_expand_sndbuf(sk, tp)) { + if (tcp_should_expand_sndbuf(sk)) { int sndmem = max_t(u32, tp->rx_opt.mss_clamp, tp->mss_cache) + MAX_TCP_HEADER + 16 + sizeof(struct sk_buff), demanded = max_t(unsigned int, tp->snd_cwnd, @@ -3860,9 +3887,9 @@ static void tcp_check_space(struct sock *sk) } } -static inline void tcp_data_snd_check(struct sock *sk, struct tcp_sock *tp) +static inline void tcp_data_snd_check(struct sock *sk) { - tcp_push_pending_frames(sk, tp); + tcp_push_pending_frames(sk); tcp_check_space(sk); } @@ -4196,7 +4223,7 @@ int tcp_rcv_established(struct sock *sk, struct sk_buff *skb, */ tcp_ack(sk, skb, 0); __kfree_skb(skb); - tcp_data_snd_check(sk, tp); + tcp_data_snd_check(sk); return 0; } else { /* Header too small */ TCP_INC_STATS_BH(TCP_MIB_INERRS); @@ -4267,12 +4294,12 @@ int tcp_rcv_established(struct sock *sk, struct sk_buff *skb, tp->rcv_nxt = TCP_SKB_CB(skb)->end_seq; } - tcp_event_data_recv(sk, tp, skb); + tcp_event_data_recv(sk, skb); if (TCP_SKB_CB(skb)->ack_seq != tp->snd_una) { /* Well, only one small jumplet in fast path... */ tcp_ack(sk, skb, FLAG_DATA); - tcp_data_snd_check(sk, tp); + tcp_data_snd_check(sk); if (!inet_csk_ack_scheduled(sk)) goto no_ack; } @@ -4355,7 +4382,7 @@ step5: /* step 7: process the segment text */ tcp_data_queue(sk, skb); - tcp_data_snd_check(sk, tp); + tcp_data_snd_check(sk); tcp_ack_snd_check(sk); return 0; @@ -4672,7 +4699,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb, /* Do step6 onward by hand. */ tcp_urg(sk, skb, th); __kfree_skb(skb); - tcp_data_snd_check(sk, tp); + tcp_data_snd_check(sk); return 0; } @@ -4864,7 +4891,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb, /* tcp_data could move socket to TIME-WAIT */ if (sk->sk_state != TCP_CLOSE) { - tcp_data_snd_check(sk, tp); + tcp_data_snd_check(sk); tcp_ack_snd_check(sk); } diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 94d9f0c63682..3a60aea744ae 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -62,12 +62,13 @@ int sysctl_tcp_base_mss __read_mostly = 512; /* By default, RFC2861 behavior. */ int sysctl_tcp_slow_start_after_idle __read_mostly = 1; -static void update_send_head(struct sock *sk, struct tcp_sock *tp, - struct sk_buff *skb) +static void update_send_head(struct sock *sk, struct sk_buff *skb) { + struct tcp_sock *tp = tcp_sk(sk); + tcp_advance_send_head(sk, skb); tp->snd_nxt = TCP_SKB_CB(skb)->end_seq; - tcp_packets_out_inc(sk, tp, skb); + tcp_packets_out_inc(sk, skb); } /* SND.NXT, if window was not shrunk. @@ -76,8 +77,10 @@ static void update_send_head(struct sock *sk, struct tcp_sock *tp, * Anything in between SND.UNA...SND.UNA+SND.WND also can be already * invalid. OK, let's make this for now: */ -static inline __u32 tcp_acceptable_seq(struct sock *sk, struct tcp_sock *tp) +static inline __u32 tcp_acceptable_seq(struct sock *sk) { + struct tcp_sock *tp = tcp_sk(sk); + if (!before(tp->snd_una+tp->snd_wnd, tp->snd_nxt)) return tp->snd_nxt; else @@ -516,7 +519,7 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it, md5 ? &md5_hash_location : #endif NULL); - TCP_ECN_send(sk, tp, skb, tcp_header_size); + TCP_ECN_send(sk, skb, tcp_header_size); } #ifdef CONFIG_TCP_MD5SIG @@ -927,8 +930,9 @@ unsigned int tcp_current_mss(struct sock *sk, int large_allowed) /* Congestion window validation. (RFC2861) */ -static void tcp_cwnd_validate(struct sock *sk, struct tcp_sock *tp) +static void tcp_cwnd_validate(struct sock *sk) { + struct tcp_sock *tp = tcp_sk(sk); __u32 packets_out = tp->packets_out; if (packets_out >= tp->snd_cwnd) { @@ -1076,8 +1080,9 @@ static unsigned int tcp_snd_test(struct sock *sk, struct sk_buff *skb, return cwnd_quota; } -int tcp_may_send_now(struct sock *sk, struct tcp_sock *tp) +int tcp_may_send_now(struct sock *sk) { + struct tcp_sock *tp = tcp_sk(sk); struct sk_buff *skb = tcp_send_head(sk); return (skb && @@ -1144,8 +1149,9 @@ static int tso_fragment(struct sock *sk, struct sk_buff *skb, unsigned int len, * * This algorithm is from John Heffner. */ -static int tcp_tso_should_defer(struct sock *sk, struct tcp_sock *tp, struct sk_buff *skb) +static int tcp_tso_should_defer(struct sock *sk, struct sk_buff *skb) { + struct tcp_sock *tp = tcp_sk(sk); const struct inet_connection_sock *icsk = inet_csk(sk); u32 send_win, cong_win, limit, in_flight; @@ -1324,7 +1330,7 @@ static int tcp_mtu_probe(struct sock *sk) /* Decrement cwnd here because we are sending * effectively two packets. */ tp->snd_cwnd--; - update_send_head(sk, tp, nskb); + update_send_head(sk, nskb); icsk->icsk_mtup.probe_size = tcp_mss_to_mtu(sk, nskb->len); tp->mtu_probe.probe_seq_start = TCP_SKB_CB(nskb)->seq; @@ -1387,7 +1393,7 @@ static int tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle) nonagle : TCP_NAGLE_PUSH)))) break; } else { - if (tcp_tso_should_defer(sk, tp, skb)) + if (tcp_tso_should_defer(sk, skb)) break; } @@ -1416,14 +1422,14 @@ static int tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle) /* Advance the send_head. This one is sent out. * This call will increment packets_out. */ - update_send_head(sk, tp, skb); + update_send_head(sk, skb); tcp_minshall_update(tp, mss_now, skb); sent_pkts++; } if (likely(sent_pkts)) { - tcp_cwnd_validate(sk, tp); + tcp_cwnd_validate(sk); return 0; } return !tp->packets_out && tcp_send_head(sk); @@ -1433,14 +1439,14 @@ static int tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle) * TCP_CORK or attempt at coalescing tiny packets. * The socket must be locked by the caller. */ -void __tcp_push_pending_frames(struct sock *sk, struct tcp_sock *tp, - unsigned int cur_mss, int nonagle) +void __tcp_push_pending_frames(struct sock *sk, unsigned int cur_mss, + int nonagle) { struct sk_buff *skb = tcp_send_head(sk); if (skb) { if (tcp_write_xmit(sk, cur_mss, nonagle)) - tcp_check_probe_timer(sk, tp); + tcp_check_probe_timer(sk); } } @@ -1484,8 +1490,8 @@ void tcp_push_one(struct sock *sk, unsigned int mss_now) TCP_SKB_CB(skb)->when = tcp_time_stamp; if (likely(!tcp_transmit_skb(sk, skb, 1, sk->sk_allocation))) { - update_send_head(sk, tp, skb); - tcp_cwnd_validate(sk, tp); + update_send_head(sk, skb); + tcp_cwnd_validate(sk); return; } } @@ -1933,7 +1939,7 @@ void tcp_xmit_retransmit_queue(struct sock *sk) * segments to send. */ - if (tcp_may_send_now(sk, tp)) + if (tcp_may_send_now(sk)) return; if (tp->forward_skb_hint) { @@ -2023,7 +2029,7 @@ void tcp_send_fin(struct sock *sk) TCP_SKB_CB(skb)->end_seq = TCP_SKB_CB(skb)->seq + 1; tcp_queue_skb(sk, skb); } - __tcp_push_pending_frames(sk, tp, mss_now, TCP_NAGLE_OFF); + __tcp_push_pending_frames(sk, mss_now, TCP_NAGLE_OFF); } /* We get here when a process closes a file descriptor (either due to @@ -2033,7 +2039,6 @@ void tcp_send_fin(struct sock *sk) */ void tcp_send_active_reset(struct sock *sk, gfp_t priority) { - struct tcp_sock *tp = tcp_sk(sk); struct sk_buff *skb; /* NOTE: No TCP options attached and we never retransmit this. */ @@ -2053,7 +2058,7 @@ void tcp_send_active_reset(struct sock *sk, gfp_t priority) skb_shinfo(skb)->gso_type = 0; /* Send it off. */ - TCP_SKB_CB(skb)->seq = tcp_acceptable_seq(sk, tp); + TCP_SKB_CB(skb)->seq = tcp_acceptable_seq(sk); TCP_SKB_CB(skb)->end_seq = TCP_SKB_CB(skb)->seq; TCP_SKB_CB(skb)->when = tcp_time_stamp; if (tcp_transmit_skb(sk, skb, 0, priority)) @@ -2271,7 +2276,7 @@ int tcp_connect(struct sock *sk) skb_reserve(buff, MAX_TCP_HEADER); TCP_SKB_CB(buff)->flags = TCPCB_FLAG_SYN; - TCP_ECN_send_syn(sk, tp, buff); + TCP_ECN_send_syn(sk, buff); TCP_SKB_CB(buff)->sacked = 0; skb_shinfo(buff)->gso_segs = 1; skb_shinfo(buff)->gso_size = 0; @@ -2363,7 +2368,6 @@ void tcp_send_ack(struct sock *sk) { /* If we have been reset, we may not send again. */ if (sk->sk_state != TCP_CLOSE) { - struct tcp_sock *tp = tcp_sk(sk); struct sk_buff *buff; /* We are not putting this on the write queue, so @@ -2389,7 +2393,7 @@ void tcp_send_ack(struct sock *sk) skb_shinfo(buff)->gso_type = 0; /* Send it off, this clears delayed acks for us. */ - TCP_SKB_CB(buff)->seq = TCP_SKB_CB(buff)->end_seq = tcp_acceptable_seq(sk, tp); + TCP_SKB_CB(buff)->seq = TCP_SKB_CB(buff)->end_seq = tcp_acceptable_seq(sk); TCP_SKB_CB(buff)->when = tcp_time_stamp; tcp_transmit_skb(sk, buff, 0, GFP_ATOMIC); } @@ -2467,7 +2471,7 @@ int tcp_write_wakeup(struct sock *sk) TCP_SKB_CB(skb)->when = tcp_time_stamp; err = tcp_transmit_skb(sk, skb, 1, GFP_ATOMIC); if (!err) { - update_send_head(sk, tp, skb); + update_send_head(sk, skb); } return err; } else { -- cgit v1.2.3 From 164891aadf1721fca4dce473bb0e0998181537c6 Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Mon, 23 Apr 2007 22:26:16 -0700 Subject: [TCP]: Congestion control API update. Do some simple changes to make congestion control API faster/cleaner. * use ktime_t rather than timeval * merge rtt sampling into existing ack callback this means one indirect call versus two per ack. * use flags bits to store options/settings Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- include/linux/skbuff.h | 5 +++++ include/net/tcp.h | 9 +++++---- net/ipv4/tcp_bic.c | 2 +- net/ipv4/tcp_cong.c | 14 +++++++------- net/ipv4/tcp_cubic.c | 2 +- net/ipv4/tcp_htcp.c | 2 +- net/ipv4/tcp_illinois.c | 16 +++++++--------- net/ipv4/tcp_input.c | 25 ++++++++----------------- net/ipv4/tcp_lp.c | 8 +++++--- net/ipv4/tcp_output.c | 2 +- net/ipv4/tcp_vegas.c | 10 +++++++--- net/ipv4/tcp_veno.c | 10 +++++++--- net/ipv4/tcp_westwood.c | 2 +- net/ipv4/tcp_yeah.c | 6 ++++-- net/ipv4/tcp_yeah.h | 7 +++++-- 15 files changed, 65 insertions(+), 55 deletions(-) (limited to 'include/net/tcp.h') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 50f6f6a094cf..2694cb3ca763 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -1569,6 +1569,11 @@ static inline void __net_timestamp(struct sk_buff *skb) skb->tstamp = ktime_get_real(); } +static inline ktime_t net_timedelta(ktime_t t) +{ + return ktime_sub(ktime_get_real(), t); +} + extern __sum16 __skb_checksum_complete_head(struct sk_buff *skb, int len); extern __sum16 __skb_checksum_complete(struct sk_buff *skb); diff --git a/include/net/tcp.h b/include/net/tcp.h index 43910fe3c448..a385797f160a 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -629,9 +629,12 @@ enum tcp_ca_event { #define TCP_CA_MAX 128 #define TCP_CA_BUF_MAX (TCP_CA_NAME_MAX*TCP_CA_MAX) +#define TCP_CONG_NON_RESTRICTED 0x1 +#define TCP_CONG_RTT_STAMP 0x2 + struct tcp_congestion_ops { struct list_head list; - int non_restricted; + unsigned long flags; /* initialize private data (optional) */ void (*init)(struct sock *sk); @@ -645,8 +648,6 @@ struct tcp_congestion_ops { /* do new cwnd calculation (required) */ void (*cong_avoid)(struct sock *sk, u32 ack, u32 rtt, u32 in_flight, int good_ack); - /* round trip time sample per acked packet (optional) */ - void (*rtt_sample)(struct sock *sk, u32 usrtt); /* call before changing ca_state (optional) */ void (*set_state)(struct sock *sk, u8 new_state); /* call when cwnd event occurs (optional) */ @@ -654,7 +655,7 @@ struct tcp_congestion_ops { /* new value of cwnd after loss (optional) */ u32 (*undo_cwnd)(struct sock *sk); /* hook for packet ack accounting (optional) */ - void (*pkts_acked)(struct sock *sk, u32 num_acked); + void (*pkts_acked)(struct sock *sk, u32 num_acked, ktime_t last); /* get info for inet_diag (optional) */ void (*get_info)(struct sock *sk, u32 ext, struct sk_buff *skb); diff --git a/net/ipv4/tcp_bic.c b/net/ipv4/tcp_bic.c index 5730333cd0ac..281c9f913257 100644 --- a/net/ipv4/tcp_bic.c +++ b/net/ipv4/tcp_bic.c @@ -206,7 +206,7 @@ static void bictcp_state(struct sock *sk, u8 new_state) /* Track delayed acknowledgment ratio using sliding window * ratio = (15*ratio + sample) / 16 */ -static void bictcp_acked(struct sock *sk, u32 cnt) +static void bictcp_acked(struct sock *sk, u32 cnt, ktime_t last) { const struct inet_connection_sock *icsk = inet_csk(sk); diff --git a/net/ipv4/tcp_cong.c b/net/ipv4/tcp_cong.c index ccd88407e0cd..86b26539e54b 100644 --- a/net/ipv4/tcp_cong.c +++ b/net/ipv4/tcp_cong.c @@ -126,7 +126,7 @@ int tcp_set_default_congestion_control(const char *name) #endif if (ca) { - ca->non_restricted = 1; /* default is always allowed */ + ca->flags |= TCP_CONG_NON_RESTRICTED; /* default is always allowed */ list_move(&ca->list, &tcp_cong_list); ret = 0; } @@ -181,7 +181,7 @@ void tcp_get_allowed_congestion_control(char *buf, size_t maxlen) *buf = '\0'; rcu_read_lock(); list_for_each_entry_rcu(ca, &tcp_cong_list, list) { - if (!ca->non_restricted) + if (!(ca->flags & TCP_CONG_NON_RESTRICTED)) continue; offs += snprintf(buf + offs, maxlen - offs, "%s%s", @@ -212,16 +212,16 @@ int tcp_set_allowed_congestion_control(char *val) } } - /* pass 2 clear */ + /* pass 2 clear old values */ list_for_each_entry_rcu(ca, &tcp_cong_list, list) - ca->non_restricted = 0; + ca->flags &= ~TCP_CONG_NON_RESTRICTED; /* pass 3 mark as allowed */ while ((name = strsep(&val, " ")) && *name) { ca = tcp_ca_find(name); WARN_ON(!ca); if (ca) - ca->non_restricted = 1; + ca->flags |= TCP_CONG_NON_RESTRICTED; } out: spin_unlock(&tcp_cong_list_lock); @@ -256,7 +256,7 @@ int tcp_set_congestion_control(struct sock *sk, const char *name) if (!ca) err = -ENOENT; - else if (!(ca->non_restricted || capable(CAP_NET_ADMIN))) + else if (!((ca->flags & TCP_CONG_NON_RESTRICTED) || capable(CAP_NET_ADMIN))) err = -EPERM; else if (!try_module_get(ca->owner)) @@ -371,8 +371,8 @@ u32 tcp_reno_min_cwnd(const struct sock *sk) EXPORT_SYMBOL_GPL(tcp_reno_min_cwnd); struct tcp_congestion_ops tcp_reno = { + .flags = TCP_CONG_NON_RESTRICTED, .name = "reno", - .non_restricted = 1, .owner = THIS_MODULE, .ssthresh = tcp_reno_ssthresh, .cong_avoid = tcp_reno_cong_avoid, diff --git a/net/ipv4/tcp_cubic.c b/net/ipv4/tcp_cubic.c index 296845be912b..14224487b16b 100644 --- a/net/ipv4/tcp_cubic.c +++ b/net/ipv4/tcp_cubic.c @@ -334,7 +334,7 @@ static void bictcp_state(struct sock *sk, u8 new_state) /* Track delayed acknowledgment ratio using sliding window * ratio = (15*ratio + sample) / 16 */ -static void bictcp_acked(struct sock *sk, u32 cnt) +static void bictcp_acked(struct sock *sk, u32 cnt, ktime_t last) { const struct inet_connection_sock *icsk = inet_csk(sk); diff --git a/net/ipv4/tcp_htcp.c b/net/ipv4/tcp_htcp.c index 1020eb48d8d1..4ba4a7ae0a85 100644 --- a/net/ipv4/tcp_htcp.c +++ b/net/ipv4/tcp_htcp.c @@ -98,7 +98,7 @@ static inline void measure_rtt(struct sock *sk) } } -static void measure_achieved_throughput(struct sock *sk, u32 pkts_acked) +static void measure_achieved_throughput(struct sock *sk, u32 pkts_acked, ktime_t last) { const struct inet_connection_sock *icsk = inet_csk(sk); const struct tcp_sock *tp = tcp_sk(sk); diff --git a/net/ipv4/tcp_illinois.c b/net/ipv4/tcp_illinois.c index ae6298600886..8e3165917f72 100644 --- a/net/ipv4/tcp_illinois.c +++ b/net/ipv4/tcp_illinois.c @@ -83,9 +83,14 @@ static void tcp_illinois_init(struct sock *sk) } /* Measure RTT for each ack. */ -static void tcp_illinois_rtt_sample(struct sock *sk, u32 rtt) +static void tcp_illinois_acked(struct sock *sk, u32 pkts_acked, ktime_t last) { struct illinois *ca = inet_csk_ca(sk); + u32 rtt; + + ca->acked = pkts_acked; + + rtt = ktime_to_ns(net_timedelta(last)) / NSEC_PER_USEC; /* ignore bogus values, this prevents wraparound in alpha math */ if (rtt > RTT_MAX) @@ -103,13 +108,6 @@ static void tcp_illinois_rtt_sample(struct sock *sk, u32 rtt) ca->sum_rtt += rtt; } -/* Capture count of packets covered by ack, to adjust for delayed acks */ -static void tcp_illinois_acked(struct sock *sk, u32 pkts_acked) -{ - struct illinois *ca = inet_csk_ca(sk); - ca->acked = pkts_acked; -} - /* Maximum queuing delay */ static inline u32 max_delay(const struct illinois *ca) { @@ -325,12 +323,12 @@ static void tcp_illinois_info(struct sock *sk, u32 ext, } static struct tcp_congestion_ops tcp_illinois = { + .flags = TCP_CONG_RTT_STAMP, .init = tcp_illinois_init, .ssthresh = tcp_illinois_ssthresh, .min_cwnd = tcp_reno_min_cwnd, .cong_avoid = tcp_illinois_cong_avoid, .set_state = tcp_illinois_state, - .rtt_sample = tcp_illinois_rtt_sample, .get_info = tcp_illinois_info, .pkts_acked = tcp_illinois_acked, diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 633389390788..051f0f815f17 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -2402,14 +2402,6 @@ static int tcp_tso_acked(struct sock *sk, struct sk_buff *skb, return acked; } -static u32 tcp_usrtt(struct timeval *tv) -{ - struct timeval now; - - do_gettimeofday(&now); - return (now.tv_sec - tv->tv_sec) * 1000000 + (now.tv_usec - tv->tv_usec); -} - /* Remove acknowledged frames from the retransmission queue. */ static int tcp_clean_rtx_queue(struct sock *sk, __s32 *seq_rtt_p) { @@ -2420,9 +2412,7 @@ static int tcp_clean_rtx_queue(struct sock *sk, __s32 *seq_rtt_p) int acked = 0; __s32 seq_rtt = -1; u32 pkts_acked = 0; - void (*rtt_sample)(struct sock *sk, u32 usrtt) - = icsk->icsk_ca_ops->rtt_sample; - struct timeval tv = { .tv_sec = 0, .tv_usec = 0 }; + ktime_t last_ackt = ktime_set(0,0); while ((skb = tcp_write_queue_head(sk)) && skb != tcp_send_head(sk)) { @@ -2471,7 +2461,7 @@ static int tcp_clean_rtx_queue(struct sock *sk, __s32 *seq_rtt_p) seq_rtt = -1; } else if (seq_rtt < 0) { seq_rtt = now - scb->when; - skb_get_timestamp(skb, &tv); + last_ackt = skb->tstamp; } if (sacked & TCPCB_SACKED_ACKED) tp->sacked_out -= tcp_skb_pcount(skb); @@ -2484,7 +2474,7 @@ static int tcp_clean_rtx_queue(struct sock *sk, __s32 *seq_rtt_p) } } else if (seq_rtt < 0) { seq_rtt = now - scb->when; - skb_get_timestamp(skb, &tv); + last_ackt = skb->tstamp; } tcp_dec_pcount_approx(&tp->fackets_out, skb); tcp_packets_out_dec(tp, skb); @@ -2494,13 +2484,14 @@ static int tcp_clean_rtx_queue(struct sock *sk, __s32 *seq_rtt_p) } if (acked&FLAG_ACKED) { + const struct tcp_congestion_ops *ca_ops + = inet_csk(sk)->icsk_ca_ops; + tcp_ack_update_rtt(sk, acked, seq_rtt); tcp_ack_packets_out(sk); - if (rtt_sample && !(acked & FLAG_RETRANS_DATA_ACKED)) - (*rtt_sample)(sk, tcp_usrtt(&tv)); - if (icsk->icsk_ca_ops->pkts_acked) - icsk->icsk_ca_ops->pkts_acked(sk, pkts_acked); + if (ca_ops->pkts_acked) + ca_ops->pkts_acked(sk, pkts_acked, last_ackt); } #if FASTRETRANS_DEBUG > 0 diff --git a/net/ipv4/tcp_lp.c b/net/ipv4/tcp_lp.c index f0ebaf0e21cb..b4e062ab24a1 100644 --- a/net/ipv4/tcp_lp.c +++ b/net/ipv4/tcp_lp.c @@ -218,7 +218,7 @@ static u32 tcp_lp_owd_calculator(struct sock *sk) * 3. calc smoothed OWD (SOWD). * Most ideas come from the original TCP-LP implementation. */ -static void tcp_lp_rtt_sample(struct sock *sk, u32 usrtt) +static void tcp_lp_rtt_sample(struct sock *sk, u32 rtt) { struct lp *lp = inet_csk_ca(sk); s64 mowd = tcp_lp_owd_calculator(sk); @@ -261,11 +261,13 @@ static void tcp_lp_rtt_sample(struct sock *sk, u32 usrtt) * newReno in increase case. * We work it out by following the idea from TCP-LP's paper directly */ -static void tcp_lp_pkts_acked(struct sock *sk, u32 num_acked) +static void tcp_lp_pkts_acked(struct sock *sk, u32 num_acked, ktime_t last) { struct tcp_sock *tp = tcp_sk(sk); struct lp *lp = inet_csk_ca(sk); + tcp_lp_rtt_sample(sk, ktime_to_ns(net_timedelta(last)) / NSEC_PER_USEC); + /* calc inference */ if (tcp_time_stamp > tp->rx_opt.rcv_tsecr) lp->inference = 3 * (tcp_time_stamp - tp->rx_opt.rcv_tsecr); @@ -312,11 +314,11 @@ static void tcp_lp_pkts_acked(struct sock *sk, u32 num_acked) } static struct tcp_congestion_ops tcp_lp = { + .flags = TCP_CONG_RTT_STAMP, .init = tcp_lp_init, .ssthresh = tcp_reno_ssthresh, .cong_avoid = tcp_lp_cong_avoid, .min_cwnd = tcp_reno_min_cwnd, - .rtt_sample = tcp_lp_rtt_sample, .pkts_acked = tcp_lp_pkts_acked, .owner = THIS_MODULE, diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 3a60aea744ae..e70a6840cb64 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -409,7 +409,7 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it, /* If congestion control is doing timestamping, we must * take such a timestamp before we potentially clone/copy. */ - if (icsk->icsk_ca_ops->rtt_sample) + if (icsk->icsk_ca_ops->flags & TCP_CONG_RTT_STAMP) __net_timestamp(skb); if (likely(clone_it)) { diff --git a/net/ipv4/tcp_vegas.c b/net/ipv4/tcp_vegas.c index 87e72bef6d08..f4104eeb5f26 100644 --- a/net/ipv4/tcp_vegas.c +++ b/net/ipv4/tcp_vegas.c @@ -120,10 +120,13 @@ static void tcp_vegas_init(struct sock *sk) * o min-filter RTT samples from a much longer window (forever for now) * to find the propagation delay (baseRTT) */ -static void tcp_vegas_rtt_calc(struct sock *sk, u32 usrtt) +static void tcp_vegas_pkts_acked(struct sock *sk, u32 cnt, ktime_t last) { struct vegas *vegas = inet_csk_ca(sk); - u32 vrtt = usrtt + 1; /* Never allow zero rtt or baseRTT */ + u32 vrtt; + + /* Never allow zero rtt or baseRTT */ + vrtt = (ktime_to_ns(net_timedelta(last)) / NSEC_PER_USEC) + 1; /* Filter to find propagation delay: */ if (vrtt < vegas->baseRTT) @@ -353,11 +356,12 @@ static void tcp_vegas_get_info(struct sock *sk, u32 ext, } static struct tcp_congestion_ops tcp_vegas = { + .flags = TCP_CONG_RTT_STAMP, .init = tcp_vegas_init, .ssthresh = tcp_reno_ssthresh, .cong_avoid = tcp_vegas_cong_avoid, .min_cwnd = tcp_reno_min_cwnd, - .rtt_sample = tcp_vegas_rtt_calc, + .pkts_acked = tcp_vegas_pkts_acked, .set_state = tcp_vegas_state, .cwnd_event = tcp_vegas_cwnd_event, .get_info = tcp_vegas_get_info, diff --git a/net/ipv4/tcp_veno.c b/net/ipv4/tcp_veno.c index ce57bf302f6c..0b50d0607a0e 100644 --- a/net/ipv4/tcp_veno.c +++ b/net/ipv4/tcp_veno.c @@ -69,10 +69,13 @@ static void tcp_veno_init(struct sock *sk) } /* Do rtt sampling needed for Veno. */ -static void tcp_veno_rtt_calc(struct sock *sk, u32 usrtt) +static void tcp_veno_pkts_acked(struct sock *sk, u32 cnt, ktime_t last) { struct veno *veno = inet_csk_ca(sk); - u32 vrtt = usrtt + 1; /* Never allow zero rtt or basertt */ + u32 vrtt; + + /* Never allow zero rtt or baseRTT */ + vrtt = (ktime_to_ns(net_timedelta(last)) / NSEC_PER_USEC) + 1; /* Filter to find propagation delay: */ if (vrtt < veno->basertt) @@ -199,10 +202,11 @@ static u32 tcp_veno_ssthresh(struct sock *sk) } static struct tcp_congestion_ops tcp_veno = { + .flags = TCP_CONG_RTT_STAMP, .init = tcp_veno_init, .ssthresh = tcp_veno_ssthresh, .cong_avoid = tcp_veno_cong_avoid, - .rtt_sample = tcp_veno_rtt_calc, + .pkts_acked = tcp_veno_pkts_acked, .set_state = tcp_veno_state, .cwnd_event = tcp_veno_cwnd_event, diff --git a/net/ipv4/tcp_westwood.c b/net/ipv4/tcp_westwood.c index ae1026a67720..e61e09dd513e 100644 --- a/net/ipv4/tcp_westwood.c +++ b/net/ipv4/tcp_westwood.c @@ -100,7 +100,7 @@ static void westwood_filter(struct westwood *w, u32 delta) * Called after processing group of packets. * but all westwood needs is the last sample of srtt. */ -static void tcp_westwood_pkts_acked(struct sock *sk, u32 cnt) +static void tcp_westwood_pkts_acked(struct sock *sk, u32 cnt, ktime_t last) { struct westwood *w = inet_csk_ca(sk); if (cnt > 0) diff --git a/net/ipv4/tcp_yeah.c b/net/ipv4/tcp_yeah.c index 46dd1bee583a..81ef02c1649a 100644 --- a/net/ipv4/tcp_yeah.c +++ b/net/ipv4/tcp_yeah.c @@ -64,13 +64,15 @@ static void tcp_yeah_init(struct sock *sk) } -static void tcp_yeah_pkts_acked(struct sock *sk, u32 pkts_acked) +static void tcp_yeah_pkts_acked(struct sock *sk, u32 pkts_acked, ktime_t last) { const struct inet_connection_sock *icsk = inet_csk(sk); struct yeah *yeah = inet_csk_ca(sk); if (icsk->icsk_ca_state == TCP_CA_Open) yeah->pkts_acked = pkts_acked; + + tcp_vegas_pkts_acked(sk, pkts_acked, last); } static void tcp_yeah_cong_avoid(struct sock *sk, u32 ack, @@ -237,11 +239,11 @@ static u32 tcp_yeah_ssthresh(struct sock *sk) { } static struct tcp_congestion_ops tcp_yeah = { + .flags = TCP_CONG_RTT_STAMP, .init = tcp_yeah_init, .ssthresh = tcp_yeah_ssthresh, .cong_avoid = tcp_yeah_cong_avoid, .min_cwnd = tcp_reno_min_cwnd, - .rtt_sample = tcp_vegas_rtt_calc, .set_state = tcp_vegas_state, .cwnd_event = tcp_vegas_cwnd_event, .get_info = tcp_vegas_get_info, diff --git a/net/ipv4/tcp_yeah.h b/net/ipv4/tcp_yeah.h index a62d82038fd0..33ad5385c188 100644 --- a/net/ipv4/tcp_yeah.h +++ b/net/ipv4/tcp_yeah.h @@ -81,10 +81,13 @@ static void tcp_vegas_state(struct sock *sk, u8 ca_state) * o min-filter RTT samples from a much longer window (forever for now) * to find the propagation delay (baseRTT) */ -static void tcp_vegas_rtt_calc(struct sock *sk, u32 usrtt) +static void tcp_vegas_pkts_acked(struct sock *sk, u32 cnt, ktime_t last) { struct vegas *vegas = inet_csk_ca(sk); - u32 vrtt = usrtt + 1; /* Never allow zero rtt or baseRTT */ + u32 vrtt; + + /* Never allow zero rtt or baseRTT */ + vrtt = (ktime_to_ns(net_timedelta(last)) / NSEC_PER_USEC) + 1; /* Filter to find propagation delay: */ if (vrtt < vegas->baseRTT) -- cgit v1.2.3 From 34588b4c046c34773e5a1a962da7b78b05c4d1bd Mon Sep 17 00:00:00 2001 From: Ilpo Järvinen Date: Mon, 30 Apr 2007 00:57:33 -0700 Subject: [TCP]: Catch skb with S+L bugs earlier MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit SACKED_ACKED and LOST are mutually exclusive with SACK, thus having their sum larger than packets_out is bug with SACK. Eventually these bugs trigger traps in the tcp_clean_rtx_queue with SACK but it's much more informative to do this here. Non-SACK TCP, however, could get more than packets_out duplicate ACKs which each increment sacked_out, so it makes sense to do this kind of limitting for non-SACK TCP but not for SACK enabled one. Perhaps the author had the opposite in mind but did the logic accidently wrong way around? Anyway, the sacked_out incrementer code for non-SACK already deals this issue before calling sync_left_out so this trapping can be done unconditionally. Signed-off-by: Ilpo Järvinen Signed-off-by: David S. Miller --- include/net/tcp.h | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'include/net/tcp.h') diff --git a/include/net/tcp.h b/include/net/tcp.h index a385797f160a..c6ecd455edab 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -736,9 +736,7 @@ static inline __u32 tcp_current_ssthresh(const struct sock *sk) static inline void tcp_sync_left_out(struct tcp_sock *tp) { - if (tp->rx_opt.sack_ok && - (tp->sacked_out >= tp->packets_out - tp->lost_out)) - tp->sacked_out = tp->packets_out - tp->lost_out; + BUG_ON(tp->sacked_out + tp->lost_out > tp->packets_out); tp->left_out = tp->sacked_out + tp->lost_out; } -- cgit v1.2.3 From d551e4541dd60ae53459f77a971f2d6043431f5f Mon Sep 17 00:00:00 2001 From: Ilpo Järvinen Date: Mon, 30 Apr 2007 00:42:20 -0700 Subject: [TCP] FRTO: RFC4138 allows Nagle override when new data must be sent MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This is a corner case where less than MSS sized new data thingie is awaiting in the send queue. For F-RTO to work correctly, a new data segment must be sent at certain point or F-RTO cannot be used at all. RFC4138 allows overriding of Nagle at that point. Implementation uses frto_counter states 2 and 3 to distinguish when Nagle override is needed. Signed-off-by: Ilpo Järvinen Signed-off-by: David S. Miller --- include/net/tcp.h | 5 +++++ net/ipv4/tcp_input.c | 13 ++++++++----- net/ipv4/tcp_output.c | 6 ++++-- 3 files changed, 17 insertions(+), 7 deletions(-) (limited to 'include/net/tcp.h') diff --git a/include/net/tcp.h b/include/net/tcp.h index c6ecd455edab..ef8f9d4dae85 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -1199,9 +1199,14 @@ static inline struct sk_buff *tcp_send_head(struct sock *sk) static inline void tcp_advance_send_head(struct sock *sk, struct sk_buff *skb) { + struct tcp_sock *tp = tcp_sk(sk); + sk->sk_send_head = skb->next; if (sk->sk_send_head == (struct sk_buff *)&sk->sk_write_queue) sk->sk_send_head = NULL; + /* Don't override Nagle indefinately with F-RTO */ + if (tp->frto_counter == 2) + tp->frto_counter = 3; } static inline void tcp_check_send_head(struct sock *sk, struct sk_buff *skb_unlinked) diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 6b669898b197..7641b2761a14 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -2637,7 +2637,9 @@ static void tcp_undo_spur_to_response(struct sock *sk, int flag) * algorithm is not part of the F-RTO detection algorithm * given in RFC4138 but can be selected separately). * Otherwise (basically on duplicate ACK), RTO was (likely) caused by a loss - * and TCP falls back to conventional RTO recovery. + * and TCP falls back to conventional RTO recovery. F-RTO allows overriding + * of Nagle, this is done using frto_counter states 2 and 3, when a new data + * segment of any size sent during F-RTO, state 2 is upgraded to 3. * * Rationale: if the RTO was spurious, new ACKs should arrive from the * original window even after we transmit two new data segments. @@ -2666,7 +2668,7 @@ static int tcp_process_frto(struct sock *sk, u32 prior_snd_una, int flag) inet_csk(sk)->icsk_retransmits = 0; if (!before(tp->snd_una, tp->frto_highmark)) { - tcp_enter_frto_loss(sk, tp->frto_counter + 1, flag); + tcp_enter_frto_loss(sk, (tp->frto_counter == 1 ? 2 : 3), flag); return 1; } @@ -2692,7 +2694,7 @@ static int tcp_process_frto(struct sock *sk, u32 prior_snd_una, int flag) return 1; } - if ((tp->frto_counter == 2) && + if ((tp->frto_counter >= 2) && (!(flag&FLAG_FORWARD_PROGRESS) || ((flag&FLAG_DATA_SACKED) && !(flag&FLAG_ONLY_ORIG_SACKED)))) { /* RFC4138 shortcoming (see comment above) */ @@ -2709,14 +2711,15 @@ static int tcp_process_frto(struct sock *sk, u32 prior_snd_una, int flag) if (!tcp_send_head(sk) || after(TCP_SKB_CB(tcp_send_head(sk))->end_seq, tp->snd_una + tp->snd_wnd)) { - tcp_enter_frto_loss(sk, tp->frto_counter + 1, flag); + tcp_enter_frto_loss(sk, (tp->frto_counter == 1 ? 2 : 3), + flag); return 1; } tp->snd_cwnd = tcp_packets_in_flight(tp) + 2; tp->frto_counter = 2; return 1; - } else /* frto_counter == 2 */ { + } else { switch (sysctl_tcp_frto_response) { case 2: tcp_undo_spur_to_response(sk, flag); diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index b5fa3c19afee..0faacf9c419d 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -1035,8 +1035,10 @@ static inline int tcp_nagle_test(struct tcp_sock *tp, struct sk_buff *skb, if (nonagle & TCP_NAGLE_PUSH) return 1; - /* Don't use the nagle rule for urgent data (or for the final FIN). */ - if (tp->urg_mode || + /* Don't use the nagle rule for urgent data (or for the final FIN). + * Nagle can be ignored during F-RTO too (see RFC4138). + */ + if (tp->urg_mode || (tp->frto_counter == 2) || (TCP_SKB_CB(skb)->flags & TCPCB_FLAG_FIN)) return 1; -- cgit v1.2.3