From 59a08cba6a604a265e45e9b970e372554cf46627 Mon Sep 17 00:00:00 2001
From: Ilpo Järvinen <ilpo.jarvinen@helsinki.fi>
Date: Sat, 28 Feb 2009 04:44:28 +0000
Subject: tcp: fix lost_cnt_hint miscounts
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

It is possible that lost_cnt_hint gets underflow in
tcp_clean_rtx_queue because the cumulative ACK can cover
the segment where lost_skb_hint points to only partially,
which means that the hint is not cleared, opposite to what
my (earlier) comment claimed.

Also I don't agree what I ended up writing about non-trivial
case there to be what I intented to say. It was not supposed
to happen that the hint won't get cleared and we underflow
in any scenario.

In general, this is quite hard to trigger in practice.

Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@helsinki.fi>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp_input.c | 13 +++++--------
 1 file changed, 5 insertions(+), 8 deletions(-)

(limited to 'net/ipv4/tcp_input.c')

diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index c28976a7e596..3f2f09091bcf 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -3273,18 +3273,15 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets,
 		if (tcp_is_reno(tp)) {
 			tcp_remove_reno_sacks(sk, pkts_acked);
 		} else {
+			int delta;
+
 			/* Non-retransmitted hole got filled? That's reordering */
 			if (reord < prior_fackets)
 				tcp_update_reordering(sk, tp->fackets_out - reord, 0);
 
-			/* No need to care for underflows here because
-			 * the lost_skb_hint gets NULLed if we're past it
-			 * (or something non-trivial happened)
-			 */
-			if (tcp_is_fack(tp))
-				tp->lost_cnt_hint -= pkts_acked;
-			else
-				tp->lost_cnt_hint -= prior_sacked - tp->sacked_out;
+			delta = tcp_is_fack(tp) ? pkts_acked :
+						  prior_sacked - tp->sacked_out;
+			tp->lost_cnt_hint -= min(tp->lost_cnt_hint, delta);
 		}
 
 		tp->fackets_out -= min(pkts_acked, tp->fackets_out);
-- 
cgit v1.2.3


From d0af4160d19ff2849386140881e729f9ba86f2aa Mon Sep 17 00:00:00 2001
From: Ilpo Järvinen <ilpo.jarvinen@helsinki.fi>
Date: Sat, 28 Feb 2009 04:44:32 +0000
Subject: tcp: remove redundant code from tcp_mark_lost_retrans
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Arnd Hannemann <hannemann@nets.rwth-aachen.de> noticed and was
puzzled by the fact that !tcp_is_fack(tp) leads to early return
near the beginning and the later on tcp_is_fack(tp) was still
used in an if condition. The later check was a left-over from
RFC3517 SACK stuff (== !tcp_is_fack(tp) behavior nowadays) as
there wasn't clear way how to handle this particular check
cheaply in the spirit of RFC3517 (using only SACK blocks, not
holes + SACK blocks as with FACK). I sort of left it there as
a reminder but since it's confusing other people just remove
it and comment the missing-feature stuff instead.

Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@helsinki.fi>
Cc: Arnd Hannemann <hannemann@nets.rwth-aachen.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp_input.c | 16 ++++++++++++----
 1 file changed, 12 insertions(+), 4 deletions(-)

(limited to 'net/ipv4/tcp_input.c')

diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 3f2f09091bcf..125b4517f368 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -1178,10 +1178,18 @@ static void tcp_mark_lost_retrans(struct sock *sk)
 		if (!(TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_RETRANS))
 			continue;
 
-		if (after(received_upto, ack_seq) &&
-		    (tcp_is_fack(tp) ||
-		     !before(received_upto,
-			     ack_seq + tp->reordering * tp->mss_cache))) {
+		/* TODO: We would like to get rid of tcp_is_fack(tp) only
+		 * constraint here (see above) but figuring out that at
+		 * least tp->reordering SACK blocks reside between ack_seq
+		 * and received_upto is not easy task to do cheaply with
+		 * the available datastructures.
+		 *
+		 * Whether FACK should check here for tp->reordering segs
+		 * in-between one could argue for either way (it would be
+		 * rather simple to implement as we could count fack_count
+		 * during the walk and do tp->fackets_out - fack_count).
+		 */
+		if (after(received_upto, ack_seq)) {
 			TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_RETRANS;
 			tp->retrans_out -= tcp_skb_pcount(skb);
 
-- 
cgit v1.2.3


From 7363a5b233734dba339f2874ff6ed6c489d3d865 Mon Sep 17 00:00:00 2001
From: Ilpo Järvinen <ilpo.jarvinen@helsinki.fi>
Date: Sat, 28 Feb 2009 04:44:33 +0000
Subject: tcp: separate timeout marking loop to it's own function
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Some comment about its current state added. So far I have
seen very few cases where the thing is actually useful,
usually just marginally (though admittedly I don't usually
see top of window losses where it seems possible that there
could be some gain), instead, more often the cases suffer
from L-marking spike which is certainly not desirable
(I'll bury improving it to my todo list, but on a low
prio position).

Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@helsinki.fi>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp_input.c | 63 ++++++++++++++++++++++++++++++++--------------------
 1 file changed, 39 insertions(+), 24 deletions(-)

(limited to 'net/ipv4/tcp_input.c')

diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 125b4517f368..03f5ede87224 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -2461,6 +2461,44 @@ static int tcp_time_to_recover(struct sock *sk)
 	return 0;
 }
 
+/* New heuristics: it is possible only after we switched to restart timer
+ * each time when something is ACKed. Hence, we can detect timed out packets
+ * during fast retransmit without falling to slow start.
+ *
+ * Usefulness of this as is very questionable, since we should know which of
+ * the segments is the next to timeout which is relatively expensive to find
+ * in general case unless we add some data structure just for that. The
+ * current approach certainly won't find the right one too often and when it
+ * finally does find _something_ it usually marks large part of the window
+ * right away (because a retransmission with a larger timestamp blocks the
+ * loop from advancing). -ij
+ */
+static void tcp_timeout_skbs(struct sock *sk)
+{
+	struct tcp_sock *tp = tcp_sk(sk);
+	struct sk_buff *skb;
+
+	if (!tcp_is_fack(tp) || !tcp_head_timedout(sk))
+		return;
+
+	skb = tp->scoreboard_skb_hint;
+	if (tp->scoreboard_skb_hint == NULL)
+		skb = tcp_write_queue_head(sk);
+
+	tcp_for_write_queue_from(skb, sk) {
+		if (skb == tcp_send_head(sk))
+			break;
+		if (!tcp_skb_timedout(sk, skb))
+			break;
+
+		tcp_skb_mark_lost(tp, skb);
+	}
+
+	tp->scoreboard_skb_hint = skb;
+
+	tcp_verify_left_out(tp);
+}
+
 /* Mark head of queue up as lost. With RFC3517 SACK, the packets is
  * is against sacked "cnt", otherwise it's against facked "cnt"
  */
@@ -2533,30 +2571,7 @@ static void tcp_update_scoreboard(struct sock *sk, int fast_rexmit)
 		tcp_mark_head_lost(sk, sacked_upto);
 	}
 
-	/* New heuristics: it is possible only after we switched
-	 * to restart timer each time when something is ACKed.
-	 * Hence, we can detect timed out packets during fast
-	 * retransmit without falling to slow start.
-	 */
-	if (tcp_is_fack(tp) && tcp_head_timedout(sk)) {
-		struct sk_buff *skb;
-
-		skb = tp->scoreboard_skb_hint ? tp->scoreboard_skb_hint
-			: tcp_write_queue_head(sk);
-
-		tcp_for_write_queue_from(skb, sk) {
-			if (skb == tcp_send_head(sk))
-				break;
-			if (!tcp_skb_timedout(sk, skb))
-				break;
-
-			tcp_skb_mark_lost(tp, skb);
-		}
-
-		tp->scoreboard_skb_hint = skb;
-
-		tcp_verify_left_out(tp);
-	}
+	tcp_timeout_skbs(sk);
 }
 
 /* CWND moderation, preventing bursts due to too big ACKs
-- 
cgit v1.2.3


From cabeccbd172cc305f4383f5a4808ae254745275f Mon Sep 17 00:00:00 2001
From: Ilpo Järvinen <ilpo.jarvinen@helsinki.fi>
Date: Sat, 28 Feb 2009 04:44:38 +0000
Subject: tcp: kill eff_sacks "cache", the sole user can calculate itself
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Also fixes insignificant bug that would cause sending of stale
SACK block (would occur in some corner cases).

Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@helsinki.fi>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/tcp.h      |  1 -
 include/net/tcp.h        |  1 -
 net/ipv4/tcp_input.c     | 15 ++-------------
 net/ipv4/tcp_minisocks.c |  3 +--
 net/ipv4/tcp_output.c    | 12 ++++++------
 5 files changed, 9 insertions(+), 23 deletions(-)

(limited to 'net/ipv4/tcp_input.c')

diff --git a/include/linux/tcp.h b/include/linux/tcp.h
index 0cd99e6baca5..4b86ad71e054 100644
--- a/include/linux/tcp.h
+++ b/include/linux/tcp.h
@@ -218,7 +218,6 @@ struct tcp_options_received {
 		snd_wscale : 4,	/* Window scaling received from sender	*/
 		rcv_wscale : 4;	/* Window scaling to send to receiver	*/
 /*	SACKs data	*/
-	u8	eff_sacks;	/* Size of SACK array to send with next packet */
 	u8	num_sacks;	/* Number of SACK blocks		*/
 	u16	user_mss;  	/* mss requested by user in ioctl */
 	u16	mss_clamp;	/* Maximal mss, negotiated at connection setup */
diff --git a/include/net/tcp.h b/include/net/tcp.h
index 0366a559afec..055e4946d4c8 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -926,7 +926,6 @@ extern void tcp_done(struct sock *sk);
 static inline void tcp_sack_reset(struct tcp_options_received *rx_opt)
 {
 	rx_opt->dsack = 0;
-	rx_opt->eff_sacks = 0;
 	rx_opt->num_sacks = 0;
 }
 
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 03f5ede87224..e4442a293eb0 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -4099,7 +4099,6 @@ static void tcp_dsack_set(struct sock *sk, u32 seq, u32 end_seq)
 		tp->rx_opt.dsack = 1;
 		tp->duplicate_sack[0].start_seq = seq;
 		tp->duplicate_sack[0].end_seq = end_seq;
-		tp->rx_opt.eff_sacks = tp->rx_opt.num_sacks + 1;
 	}
 }
 
@@ -4154,8 +4153,6 @@ static void tcp_sack_maybe_coalesce(struct tcp_sock *tp)
 			 * Decrease num_sacks.
 			 */
 			tp->rx_opt.num_sacks--;
-			tp->rx_opt.eff_sacks = tp->rx_opt.num_sacks +
-					       tp->rx_opt.dsack;
 			for (i = this_sack; i < tp->rx_opt.num_sacks; i++)
 				sp[i] = sp[i + 1];
 			continue;
@@ -4218,7 +4215,6 @@ new_sack:
 	sp->start_seq = seq;
 	sp->end_seq = end_seq;
 	tp->rx_opt.num_sacks++;
-	tp->rx_opt.eff_sacks = tp->rx_opt.num_sacks + tp->rx_opt.dsack;
 }
 
 /* RCV.NXT advances, some SACKs should be eaten. */
@@ -4232,7 +4228,6 @@ static void tcp_sack_remove(struct tcp_sock *tp)
 	/* Empty ofo queue, hence, all the SACKs are eaten. Clear. */
 	if (skb_queue_empty(&tp->out_of_order_queue)) {
 		tp->rx_opt.num_sacks = 0;
-		tp->rx_opt.eff_sacks = tp->rx_opt.dsack;
 		return;
 	}
 
@@ -4253,11 +4248,8 @@ static void tcp_sack_remove(struct tcp_sock *tp)
 		this_sack++;
 		sp++;
 	}
-	if (num_sacks != tp->rx_opt.num_sacks) {
+	if (num_sacks != tp->rx_opt.num_sacks)
 		tp->rx_opt.num_sacks = num_sacks;
-		tp->rx_opt.eff_sacks = tp->rx_opt.num_sacks +
-				       tp->rx_opt.dsack;
-	}
 }
 
 /* This one checks to see if we can put data from the
@@ -4333,10 +4325,8 @@ static void tcp_data_queue(struct sock *sk, struct sk_buff *skb)
 
 	TCP_ECN_accept_cwr(tp, skb);
 
-	if (tp->rx_opt.dsack) {
+	if (tp->rx_opt.dsack)
 		tp->rx_opt.dsack = 0;
-		tp->rx_opt.eff_sacks = tp->rx_opt.num_sacks;
-	}
 
 	/*  Queue data for delivery to the user.
 	 *  Packets in sequence go to the receive queue.
@@ -4456,7 +4446,6 @@ drop:
 		if (tcp_is_sack(tp)) {
 			tp->rx_opt.num_sacks = 1;
 			tp->rx_opt.dsack     = 0;
-			tp->rx_opt.eff_sacks = 1;
 			tp->selective_acks[0].start_seq = TCP_SKB_CB(skb)->seq;
 			tp->selective_acks[0].end_seq =
 						TCP_SKB_CB(skb)->end_seq;
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index f67effbb102b..bb3d8b35f19a 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -434,9 +434,8 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct request_sock *req,
 		newtp->rx_opt.saw_tstamp = 0;
 
 		newtp->rx_opt.dsack = 0;
-		newtp->rx_opt.eff_sacks = 0;
-
 		newtp->rx_opt.num_sacks = 0;
+
 		newtp->urg_data = 0;
 
 		if (sock_flag(newsk, SOCK_KEEPOPEN))
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 61445b57610c..1555bb73b638 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -441,10 +441,8 @@ static void tcp_options_write(__be32 *ptr, struct tcp_sock *tp,
 			*ptr++ = htonl(sp[this_sack].end_seq);
 		}
 
-		if (tp->rx_opt.dsack) {
+		if (tp->rx_opt.dsack)
 			tp->rx_opt.dsack = 0;
-			tp->rx_opt.eff_sacks = tp->rx_opt.num_sacks;
-		}
 	}
 }
 
@@ -550,6 +548,7 @@ static unsigned tcp_established_options(struct sock *sk, struct sk_buff *skb,
 	struct tcp_skb_cb *tcb = skb ? TCP_SKB_CB(skb) : NULL;
 	struct tcp_sock *tp = tcp_sk(sk);
 	unsigned size = 0;
+	unsigned int eff_sacks;
 
 #ifdef CONFIG_TCP_MD5SIG
 	*md5 = tp->af_specific->md5_lookup(sk, sk);
@@ -568,10 +567,11 @@ static unsigned tcp_established_options(struct sock *sk, struct sk_buff *skb,
 		size += TCPOLEN_TSTAMP_ALIGNED;
 	}
 
-	if (unlikely(tp->rx_opt.eff_sacks)) {
+	eff_sacks = tp->rx_opt.num_sacks + tp->rx_opt.dsack;
+	if (unlikely(eff_sacks)) {
 		const unsigned remaining = MAX_TCP_OPTION_SPACE - size;
 		opts->num_sack_blocks =
-			min_t(unsigned, tp->rx_opt.eff_sacks,
+			min_t(unsigned, eff_sacks,
 			      (remaining - TCPOLEN_SACK_BASE_ALIGNED) /
 			      TCPOLEN_SACK_PERBLOCK);
 		size += TCPOLEN_SACK_BASE_ALIGNED +
@@ -1418,7 +1418,7 @@ static int tcp_mtu_probe(struct sock *sk)
 	    icsk->icsk_mtup.probe_size ||
 	    inet_csk(sk)->icsk_ca_state != TCP_CA_Open ||
 	    tp->snd_cwnd < 11 ||
-	    tp->rx_opt.eff_sacks)
+	    tp->rx_opt.num_sacks || tp->rx_opt.dsack)
 		return -1;
 
 	/* Very simple search strategy: just double the MSS. */
-- 
cgit v1.2.3


From ee7537b63a28b42b22e48842dfeedc66d96b71f1 Mon Sep 17 00:00:00 2001
From: Hantzis Fotis <xantzis@ceid.upatras.gr>
Date: Mon, 2 Mar 2009 22:42:02 -0800
Subject: tcp: tcp_init_wl / tcp_update_wl argument cleanup

The above functions from include/net/tcp.h have been defined with an
argument that they never use. The argument is 'u32 ack' which is never
used inside the function body, and thus it can be removed. The rest of
the patch involves the necessary changes to the function callers of the
above two functions.

Signed-off-by: Hantzis Fotis <xantzis@ceid.upatras.gr>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/tcp.h        | 4 ++--
 net/ipv4/tcp_input.c     | 9 ++++-----
 net/ipv4/tcp_minisocks.c | 2 +-
 net/ipv4/tcp_output.c    | 2 +-
 4 files changed, 8 insertions(+), 9 deletions(-)

(limited to 'net/ipv4/tcp_input.c')

diff --git a/include/net/tcp.h b/include/net/tcp.h
index 055e4946d4c8..d74ac301e6bc 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -825,12 +825,12 @@ static inline void tcp_push_pending_frames(struct sock *sk)
 	__tcp_push_pending_frames(sk, tcp_current_mss(sk, 1), tp->nonagle);
 }
 
-static inline void tcp_init_wl(struct tcp_sock *tp, u32 ack, u32 seq)
+static inline void tcp_init_wl(struct tcp_sock *tp, u32 seq)
 {
 	tp->snd_wl1 = seq;
 }
 
-static inline void tcp_update_wl(struct tcp_sock *tp, u32 ack, u32 seq)
+static inline void tcp_update_wl(struct tcp_sock *tp, u32 seq)
 {
 	tp->snd_wl1 = seq;
 }
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index e4442a293eb0..5ecd7aa25979 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -3416,7 +3416,7 @@ static int tcp_ack_update_window(struct sock *sk, struct sk_buff *skb, u32 ack,
 
 	if (tcp_may_update_window(tp, ack, ack_seq, nwin)) {
 		flag |= FLAG_WIN_UPDATE;
-		tcp_update_wl(tp, ack, ack_seq);
+		tcp_update_wl(tp, ack_seq);
 
 		if (tp->snd_wnd != nwin) {
 			tp->snd_wnd = nwin;
@@ -3621,7 +3621,7 @@ static int tcp_ack(struct sock *sk, struct sk_buff *skb, int flag)
 		 * No more checks are required.
 		 * Note, we use the fact that SND.UNA>=SND.WL2.
 		 */
-		tcp_update_wl(tp, ack, ack_seq);
+		tcp_update_wl(tp, ack_seq);
 		tp->snd_una = ack;
 		flag |= FLAG_WIN_UPDATE;
 
@@ -5418,7 +5418,7 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb,
 		 * never scaled.
 		 */
 		tp->snd_wnd = ntohs(th->window);
-		tcp_init_wl(tp, TCP_SKB_CB(skb)->ack_seq, TCP_SKB_CB(skb)->seq);
+		tcp_init_wl(tp, TCP_SKB_CB(skb)->seq);
 
 		if (!tp->rx_opt.wscale_ok) {
 			tp->rx_opt.snd_wscale = tp->rx_opt.rcv_wscale = 0;
@@ -5679,8 +5679,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
 				tp->snd_una = TCP_SKB_CB(skb)->ack_seq;
 				tp->snd_wnd = ntohs(th->window) <<
 					      tp->rx_opt.snd_wscale;
-				tcp_init_wl(tp, TCP_SKB_CB(skb)->ack_seq,
-					    TCP_SKB_CB(skb)->seq);
+				tcp_init_wl(tp, TCP_SKB_CB(skb)->seq);
 
 				/* tcp_ack considers this ACK as duplicate
 				 * and does not calculate rtt.
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index bb3d8b35f19a..4b0df3e6b609 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -399,7 +399,7 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct request_sock *req,
 
 		tcp_prequeue_init(newtp);
 
-		tcp_init_wl(newtp, treq->snt_isn, treq->rcv_isn);
+		tcp_init_wl(newtp, treq->rcv_isn);
 
 		newtp->srtt = 0;
 		newtp->mdev = TCP_TIMEOUT_INIT;
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 920c57b90ded..eb285befdf3b 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -2337,7 +2337,7 @@ static void tcp_connect_init(struct sock *sk)
 	sk->sk_err = 0;
 	sock_reset_flag(sk, SOCK_DONE);
 	tp->snd_wnd = 0;
-	tcp_init_wl(tp, tp->write_seq, 0);
+	tcp_init_wl(tp, 0);
 	tp->snd_una = tp->write_seq;
 	tp->snd_sml = tp->write_seq;
 	tp->snd_up = tp->write_seq;
-- 
cgit v1.2.3


From 5861f8e58dd84fc34b691c2e8d4824dea68c360e Mon Sep 17 00:00:00 2001
From: Ilpo Järvinen <ilpo.jarvinen@helsinki.fi>
Date: Sat, 14 Mar 2009 14:23:01 +0000
Subject: tcp: remove pointless .dsack/.num_sacks code
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

In the pure assignment case, the earlier zeroing is
still in effect.

David S. Miller raised concerns if the ifs are there to avoid
dirtying cachelines. I came to these conclusions:

> We'll be dirty it anyway (now that I check), the first "real" statement
> in tcp_rcv_established is:
>
>       tp->rx_opt.saw_tstamp = 0;
>
> ...that'll land on the same dword. :-/
>
> I suppose the blocks are there just because they had more complexity
> inside when they had to calculate the eff_sacks too (maybe it would
> have been better to just remove them in that drop-patch so you would
> have had less head-ache :-)).

Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@helsinki.fi>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp_input.c  | 7 ++-----
 net/ipv4/tcp_output.c | 3 +--
 2 files changed, 3 insertions(+), 7 deletions(-)

(limited to 'net/ipv4/tcp_input.c')

diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 5ecd7aa25979..cd39d1d02dc3 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -4248,8 +4248,7 @@ static void tcp_sack_remove(struct tcp_sock *tp)
 		this_sack++;
 		sp++;
 	}
-	if (num_sacks != tp->rx_opt.num_sacks)
-		tp->rx_opt.num_sacks = num_sacks;
+	tp->rx_opt.num_sacks = num_sacks;
 }
 
 /* This one checks to see if we can put data from the
@@ -4325,8 +4324,7 @@ static void tcp_data_queue(struct sock *sk, struct sk_buff *skb)
 
 	TCP_ECN_accept_cwr(tp, skb);
 
-	if (tp->rx_opt.dsack)
-		tp->rx_opt.dsack = 0;
+	tp->rx_opt.dsack = 0;
 
 	/*  Queue data for delivery to the user.
 	 *  Packets in sequence go to the receive queue.
@@ -4445,7 +4443,6 @@ drop:
 		/* Initial out of order segment, build 1 SACK. */
 		if (tcp_is_sack(tp)) {
 			tp->rx_opt.num_sacks = 1;
-			tp->rx_opt.dsack     = 0;
 			tp->selective_acks[0].start_seq = TCP_SKB_CB(skb)->seq;
 			tp->selective_acks[0].end_seq =
 						TCP_SKB_CB(skb)->end_seq;
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index eb285befdf3b..325658039139 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -441,8 +441,7 @@ static void tcp_options_write(__be32 *ptr, struct tcp_sock *tp,
 			*ptr++ = htonl(sp[this_sack].end_seq);
 		}
 
-		if (tp->rx_opt.dsack)
-			tp->rx_opt.dsack = 0;
+		tp->rx_opt.dsack = 0;
 	}
 }
 
-- 
cgit v1.2.3


From c43d558a5139a3b22dcac3f19f64ecb39130b02e Mon Sep 17 00:00:00 2001
From: Ilpo Järvinen <ilpo.jarvinen@helsinki.fi>
Date: Sat, 14 Mar 2009 14:23:02 +0000
Subject: tcp: kill dead end_seq variable in clean_rtx_queue
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

I've already forgotten what for this was necessary, anyway
it's no longer used (if it ever was).

Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@helsinki.fi>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp_input.c | 3 ---
 1 file changed, 3 deletions(-)

(limited to 'net/ipv4/tcp_input.c')

diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index cd39d1d02dc3..f527a16a7b33 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -3201,7 +3201,6 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets,
 
 	while ((skb = tcp_write_queue_head(sk)) && skb != tcp_send_head(sk)) {
 		struct tcp_skb_cb *scb = TCP_SKB_CB(skb);
-		u32 end_seq;
 		u32 acked_pcount;
 		u8 sacked = scb->sacked;
 
@@ -3216,10 +3215,8 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets,
 				break;
 
 			fully_acked = 0;
-			end_seq = tp->snd_una;
 		} else {
 			acked_pcount = tcp_skb_pcount(skb);
-			end_seq = scb->end_seq;
 		}
 
 		/* MTU probing checks */
-- 
cgit v1.2.3


From c887e6d2d9aee56ee7c9f2af4cec3a5efdcc4c72 Mon Sep 17 00:00:00 2001
From: Ilpo Järvinen <ilpo.jarvinen@helsinki.fi>
Date: Sat, 14 Mar 2009 14:23:03 +0000
Subject: tcp: consolidate paws check
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Wow, it was quite tricky to merge that stream of negations
but I think I finally got it right:

check & replace_ts_recent:
(s32)(rcv_tsval - ts_recent) >= 0                  => 0
(s32)(ts_recent - rcv_tsval) <= 0                  => 0

discard:
(s32)(ts_recent - rcv_tsval)  > TCP_PAWS_WINDOW    => 1
(s32)(ts_recent - rcv_tsval) <= TCP_PAWS_WINDOW    => 0

I toggled the return values of tcp_paws_check around since
the old encoding added yet-another negation making tracking
of truth-values really complicated.

Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@helsinki.fi>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/tcp.h        | 18 ++++++++++++++----
 net/ipv4/tcp_input.c     | 11 +++++------
 net/ipv4/tcp_minisocks.c |  4 ++--
 3 files changed, 21 insertions(+), 12 deletions(-)

(limited to 'net/ipv4/tcp_input.c')

diff --git a/include/net/tcp.h b/include/net/tcp.h
index d74ac301e6bc..255ca35bea05 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -997,11 +997,21 @@ static inline int tcp_fin_time(const struct sock *sk)
 	return fin_timeout;
 }
 
-static inline int tcp_paws_check(const struct tcp_options_received *rx_opt, int rst)
+static inline int tcp_paws_check(const struct tcp_options_received *rx_opt,
+				 int paws_win)
 {
-	if ((s32)(rx_opt->rcv_tsval - rx_opt->ts_recent) >= 0)
-		return 0;
-	if (get_seconds() >= rx_opt->ts_recent_stamp + TCP_PAWS_24DAYS)
+	if ((s32)(rx_opt->ts_recent - rx_opt->rcv_tsval) <= paws_win)
+		return 1;
+	if (unlikely(get_seconds() >= rx_opt->ts_recent_stamp + TCP_PAWS_24DAYS))
+		return 1;
+
+	return 0;
+}
+
+static inline int tcp_paws_reject(const struct tcp_options_received *rx_opt,
+				  int rst)
+{
+	if (tcp_paws_check(rx_opt, 0))
 		return 0;
 
 	/* RST segments are not recommended to carry timestamp,
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index f527a16a7b33..b7d02c5dd6da 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -3883,8 +3883,7 @@ static inline void tcp_replace_ts_recent(struct tcp_sock *tp, u32 seq)
 		 * Not only, also it occurs for expired timestamps.
 		 */
 
-		if ((s32)(tp->rx_opt.rcv_tsval - tp->rx_opt.ts_recent) >= 0 ||
-		   get_seconds() >= tp->rx_opt.ts_recent_stamp + TCP_PAWS_24DAYS)
+		if (tcp_paws_check(&tp->rx_opt, 0))
 			tcp_store_ts_recent(tp);
 	}
 }
@@ -3936,9 +3935,9 @@ static inline int tcp_paws_discard(const struct sock *sk,
 				   const struct sk_buff *skb)
 {
 	const struct tcp_sock *tp = tcp_sk(sk);
-	return ((s32)(tp->rx_opt.ts_recent - tp->rx_opt.rcv_tsval) > TCP_PAWS_WINDOW &&
-		get_seconds() < tp->rx_opt.ts_recent_stamp + TCP_PAWS_24DAYS &&
-		!tcp_disordered_ack(sk, skb));
+
+	return !tcp_paws_check(&tp->rx_opt, TCP_PAWS_WINDOW) &&
+	       !tcp_disordered_ack(sk, skb);
 }
 
 /* Check segment sequence number for validity.
@@ -5513,7 +5512,7 @@ discard:
 
 	/* PAWS check. */
 	if (tp->rx_opt.ts_recent_stamp && tp->rx_opt.saw_tstamp &&
-	    tcp_paws_check(&tp->rx_opt, 0))
+	    tcp_paws_reject(&tp->rx_opt, 0))
 		goto discard_and_undo;
 
 	if (th->syn) {
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index 4b0df3e6b609..43bbba7926ee 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -107,7 +107,7 @@ tcp_timewait_state_process(struct inet_timewait_sock *tw, struct sk_buff *skb,
 		if (tmp_opt.saw_tstamp) {
 			tmp_opt.ts_recent	= tcptw->tw_ts_recent;
 			tmp_opt.ts_recent_stamp	= tcptw->tw_ts_recent_stamp;
-			paws_reject = tcp_paws_check(&tmp_opt, th->rst);
+			paws_reject = tcp_paws_reject(&tmp_opt, th->rst);
 		}
 	}
 
@@ -511,7 +511,7 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb,
 			 * from another data.
 			 */
 			tmp_opt.ts_recent_stamp = get_seconds() - ((TCP_TIMEOUT_INIT/HZ)<<req->retrans);
-			paws_reject = tcp_paws_check(&tmp_opt, th->rst);
+			paws_reject = tcp_paws_reject(&tmp_opt, th->rst);
 		}
 	}
 
-- 
cgit v1.2.3


From 72211e90501f954f586481c25521c3724cda3cc7 Mon Sep 17 00:00:00 2001
From: Ilpo Järvinen <ilpo.jarvinen@helsinki.fi>
Date: Sat, 14 Mar 2009 14:23:04 +0000
Subject: tcp: don't check mtu probe completion in the loop
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

It seems that no variables clash such that we couldn't do
the check just once later on. Therefore move it.

Also kill dead obvious comment, dead argument and add
unlikely since this mtu probe does not happen too often.

Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@helsinki.fi>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp_input.c | 13 ++++++-------
 1 file changed, 6 insertions(+), 7 deletions(-)

(limited to 'net/ipv4/tcp_input.c')

diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index b7d02c5dd6da..311c30f73ee4 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -2836,7 +2836,7 @@ static void tcp_mtup_probe_failed(struct sock *sk)
 	icsk->icsk_mtup.probe_size = 0;
 }
 
-static void tcp_mtup_probe_success(struct sock *sk, struct sk_buff *skb)
+static void tcp_mtup_probe_success(struct sock *sk)
 {
 	struct tcp_sock *tp = tcp_sk(sk);
 	struct inet_connection_sock *icsk = inet_csk(sk);
@@ -3219,12 +3219,6 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets,
 			acked_pcount = tcp_skb_pcount(skb);
 		}
 
-		/* MTU probing checks */
-		if (fully_acked && icsk->icsk_mtup.probe_size &&
-		    !after(tp->mtu_probe.probe_seq_end, scb->end_seq)) {
-			tcp_mtup_probe_success(sk, skb);
-		}
-
 		if (sacked & TCPCB_RETRANS) {
 			if (sacked & TCPCB_SACKED_RETRANS)
 				tp->retrans_out -= acked_pcount;
@@ -3287,6 +3281,11 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets,
 		const struct tcp_congestion_ops *ca_ops
 			= inet_csk(sk)->icsk_ca_ops;
 
+		if (unlikely(icsk->icsk_mtup.probe_size &&
+			     !after(tp->mtu_probe.probe_seq_end, tp->snd_una))) {
+			tcp_mtup_probe_success(sk);
+		}
+
 		tcp_ack_update_rtt(sk, flag, seq_rtt);
 		tcp_rearm_rto(sk);
 
-- 
cgit v1.2.3


From 0c54b85f2828128274f319a1eb3ce7f604fe2a53 Mon Sep 17 00:00:00 2001
From: Ilpo Järvinen <ilpo.jarvinen@helsinki.fi>
Date: Sat, 14 Mar 2009 14:23:05 +0000
Subject: tcp: simplify tcp_current_mss
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

There's very little need for most of the callsites to get
tp->xmit_goal_size updated. That will cost us divide as is,
so slice the function in two. Also, the only users of the
tp->xmit_goal_size are directly behind tcp_current_mss(),
so there's no need to store that variable into tcp_sock
at all! The drop of xmit_goal_size currently leaves 16-bit
hole and some reorganization would again be necessary to
change that (but I'm aiming to fill that hole with u16
xmit_goal_size_segs to cache the results of the remaining
divide to get that tso on regression).

Bring xmit_goal_size parts into tcp.c

Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@helsinki.fi>
Cc: Evgeniy Polyakov <zbr@ioremap.net>
Cc: Ingo Molnar <mingo@elte.hu>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/tcp.h   |  1 -
 include/net/tcp.h     | 13 +++++++++++--
 net/ipv4/tcp.c        | 43 +++++++++++++++++++++++++++++++++++--------
 net/ipv4/tcp_input.c  |  2 +-
 net/ipv4/tcp_output.c | 41 +++++++----------------------------------
 5 files changed, 54 insertions(+), 46 deletions(-)

(limited to 'net/ipv4/tcp_input.c')

diff --git a/include/linux/tcp.h b/include/linux/tcp.h
index 4b86ad71e054..ad2021ccc55a 100644
--- a/include/linux/tcp.h
+++ b/include/linux/tcp.h
@@ -248,7 +248,6 @@ struct tcp_sock {
 	/* inet_connection_sock has to be the first member of tcp_sock */
 	struct inet_connection_sock	inet_conn;
 	u16	tcp_header_len;	/* Bytes of tcp header to send		*/
-	u16	xmit_size_goal;	/* Goal for segmenting output packets	*/
 
 /*
  *	Header prediction flags
diff --git a/include/net/tcp.h b/include/net/tcp.h
index 255ca35bea05..e54c76d75495 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -481,7 +481,16 @@ static inline void tcp_clear_xmit_timers(struct sock *sk)
 }
 
 extern unsigned int tcp_sync_mss(struct sock *sk, u32 pmtu);
-extern unsigned int tcp_current_mss(struct sock *sk, int large);
+extern unsigned int tcp_current_mss(struct sock *sk);
+
+/* Bound MSS / TSO packet size with the half of the window */
+static inline int tcp_bound_to_half_wnd(struct tcp_sock *tp, int pktsize)
+{
+	if (tp->max_window && pktsize > (tp->max_window >> 1))
+		return max(tp->max_window >> 1, 68U - tp->tcp_header_len);
+	else
+		return pktsize;
+}
 
 /* tcp.c */
 extern void tcp_get_info(struct sock *, struct tcp_info *);
@@ -822,7 +831,7 @@ static inline void tcp_push_pending_frames(struct sock *sk)
 {
 	struct tcp_sock *tp = tcp_sk(sk);
 
-	__tcp_push_pending_frames(sk, tcp_current_mss(sk, 1), tp->nonagle);
+	__tcp_push_pending_frames(sk, tcp_current_mss(sk), tp->nonagle);
 }
 
 static inline void tcp_init_wl(struct tcp_sock *tp, u32 seq)
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index d3f9beee74c0..886596ff0aae 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -661,6 +661,37 @@ struct sk_buff *sk_stream_alloc_skb(struct sock *sk, int size, gfp_t gfp)
 	return NULL;
 }
 
+static unsigned int tcp_xmit_size_goal(struct sock *sk, u32 mss_now,
+				       int large_allowed)
+{
+	struct tcp_sock *tp = tcp_sk(sk);
+	u32 xmit_size_goal;
+
+	xmit_size_goal = mss_now;
+
+	if (large_allowed && sk_can_gso(sk)) {
+		xmit_size_goal = ((sk->sk_gso_max_size - 1) -
+				  inet_csk(sk)->icsk_af_ops->net_header_len -
+				  inet_csk(sk)->icsk_ext_hdr_len -
+				  tp->tcp_header_len);
+
+		xmit_size_goal = tcp_bound_to_half_wnd(tp, xmit_size_goal);
+		xmit_size_goal -= (xmit_size_goal % mss_now);
+	}
+
+	return xmit_size_goal;
+}
+
+static int tcp_send_mss(struct sock *sk, int *size_goal, int flags)
+{
+	int mss_now;
+
+	mss_now = tcp_current_mss(sk);
+	*size_goal = tcp_xmit_size_goal(sk, mss_now, !(flags & MSG_OOB));
+
+	return mss_now;
+}
+
 static ssize_t do_tcp_sendpages(struct sock *sk, struct page **pages, int poffset,
 			 size_t psize, int flags)
 {
@@ -677,8 +708,7 @@ static ssize_t do_tcp_sendpages(struct sock *sk, struct page **pages, int poffse
 
 	clear_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags);
 
-	mss_now = tcp_current_mss(sk, !(flags&MSG_OOB));
-	size_goal = tp->xmit_size_goal;
+	mss_now = tcp_send_mss(sk, &size_goal, flags);
 	copied = 0;
 
 	err = -EPIPE;
@@ -761,8 +791,7 @@ wait_for_memory:
 		if ((err = sk_stream_wait_memory(sk, &timeo)) != 0)
 			goto do_error;
 
-		mss_now = tcp_current_mss(sk, !(flags&MSG_OOB));
-		size_goal = tp->xmit_size_goal;
+		mss_now = tcp_send_mss(sk, &size_goal, flags);
 	}
 
 out:
@@ -844,8 +873,7 @@ int tcp_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg,
 	/* This should be in poll */
 	clear_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags);
 
-	mss_now = tcp_current_mss(sk, !(flags&MSG_OOB));
-	size_goal = tp->xmit_size_goal;
+	mss_now = tcp_send_mss(sk, &size_goal, flags);
 
 	/* Ok commence sending. */
 	iovlen = msg->msg_iovlen;
@@ -1007,8 +1035,7 @@ wait_for_memory:
 			if ((err = sk_stream_wait_memory(sk, &timeo)) != 0)
 				goto do_error;
 
-			mss_now = tcp_current_mss(sk, !(flags&MSG_OOB));
-			size_goal = tp->xmit_size_goal;
+			mss_now = tcp_send_mss(sk, &size_goal, flags);
 		}
 	}
 
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 311c30f73ee4..fae78e3eccc4 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -2864,7 +2864,7 @@ void tcp_simple_retransmit(struct sock *sk)
 	const struct inet_connection_sock *icsk = inet_csk(sk);
 	struct tcp_sock *tp = tcp_sk(sk);
 	struct sk_buff *skb;
-	unsigned int mss = tcp_current_mss(sk, 0);
+	unsigned int mss = tcp_current_mss(sk);
 	u32 prior_lost = tp->lost_out;
 
 	tcp_for_write_queue(skb, sk) {
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 325658039139..c1f259d2d33b 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -921,7 +921,7 @@ int tcp_trim_head(struct sock *sk, struct sk_buff *skb, u32 len)
 	 * factor and mss.
 	 */
 	if (tcp_skb_pcount(skb) > 1)
-		tcp_set_skb_tso_segs(sk, skb, tcp_current_mss(sk, 1));
+		tcp_set_skb_tso_segs(sk, skb, tcp_current_mss(sk));
 
 	return 0;
 }
@@ -982,15 +982,6 @@ void tcp_mtup_init(struct sock *sk)
 	icsk->icsk_mtup.probe_size = 0;
 }
 
-/* Bound MSS / TSO packet size with the half of the window */
-static int tcp_bound_to_half_wnd(struct tcp_sock *tp, int pktsize)
-{
-	if (tp->max_window && pktsize > (tp->max_window >> 1))
-		return max(tp->max_window >> 1, 68U - tp->tcp_header_len);
-	else
-		return pktsize;
-}
-
 /* This function synchronize snd mss to current pmtu/exthdr set.
 
    tp->rx_opt.user_mss is mss set by user by TCP_MAXSEG. It does NOT counts
@@ -1037,22 +1028,17 @@ unsigned int tcp_sync_mss(struct sock *sk, u32 pmtu)
 /* Compute the current effective MSS, taking SACKs and IP options,
  * and even PMTU discovery events into account.
  */
-unsigned int tcp_current_mss(struct sock *sk, int large_allowed)
+unsigned int tcp_current_mss(struct sock *sk)
 {
 	struct tcp_sock *tp = tcp_sk(sk);
 	struct dst_entry *dst = __sk_dst_get(sk);
 	u32 mss_now;
-	u16 xmit_size_goal;
-	int doing_tso = 0;
 	unsigned header_len;
 	struct tcp_out_options opts;
 	struct tcp_md5sig_key *md5;
 
 	mss_now = tp->mss_cache;
 
-	if (large_allowed && sk_can_gso(sk))
-		doing_tso = 1;
-
 	if (dst) {
 		u32 mtu = dst_mtu(dst);
 		if (mtu != inet_csk(sk)->icsk_pmtu_cookie)
@@ -1070,19 +1056,6 @@ unsigned int tcp_current_mss(struct sock *sk, int large_allowed)
 		mss_now -= delta;
 	}
 
-	xmit_size_goal = mss_now;
-
-	if (doing_tso) {
-		xmit_size_goal = ((sk->sk_gso_max_size - 1) -
-				  inet_csk(sk)->icsk_af_ops->net_header_len -
-				  inet_csk(sk)->icsk_ext_hdr_len -
-				  tp->tcp_header_len);
-
-		xmit_size_goal = tcp_bound_to_half_wnd(tp, xmit_size_goal);
-		xmit_size_goal -= (xmit_size_goal % mss_now);
-	}
-	tp->xmit_size_goal = xmit_size_goal;
-
 	return mss_now;
 }
 
@@ -1264,7 +1237,7 @@ int tcp_may_send_now(struct sock *sk)
 	struct sk_buff *skb = tcp_send_head(sk);
 
 	return (skb &&
-		tcp_snd_test(sk, skb, tcp_current_mss(sk, 1),
+		tcp_snd_test(sk, skb, tcp_current_mss(sk),
 			     (tcp_skb_is_last(sk, skb) ?
 			      tp->nonagle : TCP_NAGLE_PUSH)));
 }
@@ -1421,7 +1394,7 @@ static int tcp_mtu_probe(struct sock *sk)
 		return -1;
 
 	/* Very simple search strategy: just double the MSS. */
-	mss_now = tcp_current_mss(sk, 0);
+	mss_now = tcp_current_mss(sk);
 	probe_size = 2 * tp->mss_cache;
 	size_needed = probe_size + (tp->reordering + 1) * tp->mss_cache;
 	if (probe_size > tcp_mtu_to_mss(sk, icsk->icsk_mtup.search_high)) {
@@ -1903,7 +1876,7 @@ int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb)
 	if (inet_csk(sk)->icsk_af_ops->rebuild_header(sk))
 		return -EHOSTUNREACH; /* Routing failure or similar. */
 
-	cur_mss = tcp_current_mss(sk, 0);
+	cur_mss = tcp_current_mss(sk);
 
 	/* If receiver has shrunk his window, and skb is out of
 	 * new window, do not retransmit it. The exception is the
@@ -2111,7 +2084,7 @@ void tcp_send_fin(struct sock *sk)
 	 * unsent frames.  But be careful about outgoing SACKS
 	 * and IP options.
 	 */
-	mss_now = tcp_current_mss(sk, 1);
+	mss_now = tcp_current_mss(sk);
 
 	if (tcp_send_head(sk) != NULL) {
 		TCP_SKB_CB(skb)->flags |= TCPCB_FLAG_FIN;
@@ -2523,7 +2496,7 @@ int tcp_write_wakeup(struct sock *sk)
 	if ((skb = tcp_send_head(sk)) != NULL &&
 	    before(TCP_SKB_CB(skb)->seq, tcp_wnd_end(tp))) {
 		int err;
-		unsigned int mss = tcp_current_mss(sk, 0);
+		unsigned int mss = tcp_current_mss(sk);
 		unsigned int seg_size = tcp_wnd_end(tp) - TCP_SKB_CB(skb)->seq;
 
 		if (before(tp->pushed_seq, TCP_SKB_CB(skb)->end_seq))
-- 
cgit v1.2.3


From a0bffffc148cd8e75a48a89ad2ddb74e4081a20a Mon Sep 17 00:00:00 2001
From: Ilpo Järvinen <ilpo.jarvinen@helsinki.fi>
Date: Sat, 21 Mar 2009 13:36:17 -0700
Subject: net/*: use linux/kernel.h swap()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

tcp_sack_swap seems unnecessary so I pushed swap to the caller.
Also removed comment that seemed then pointless, and added include
when not already there. Compile tested.

Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@helsinki.fi>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/decnet/dn_route.c |  6 +-----
 net/ipv4/tcp_input.c  | 23 +++--------------------
 net/ipv6/addrconf.c   |  7 ++-----
 net/sched/sch_tbf.c   |  9 ++-------
 4 files changed, 8 insertions(+), 37 deletions(-)

(limited to 'net/ipv4/tcp_input.c')

diff --git a/net/decnet/dn_route.c b/net/decnet/dn_route.c
index 5130dee0b384..0cc4394117df 100644
--- a/net/decnet/dn_route.c
+++ b/net/decnet/dn_route.c
@@ -380,7 +380,6 @@ static int dn_return_short(struct sk_buff *skb)
 	unsigned char *ptr;
 	__le16 *src;
 	__le16 *dst;
-	__le16 tmp;
 
 	/* Add back headers */
 	skb_push(skb, skb->data - skb_network_header(skb));
@@ -399,10 +398,7 @@ static int dn_return_short(struct sk_buff *skb)
 	ptr += 2;
 	*ptr = 0; /* Zero hop count */
 
-	/* Swap source and destination */
-	tmp  = *src;
-	*src = *dst;
-	*dst = tmp;
+	swap(*src, *dst);
 
 	skb->pkt_type = PACKET_OUTGOING;
 	dn_rt_finish_output(skb, NULL, NULL);
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index fae78e3eccc4..8ac82b3703ab 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -64,6 +64,7 @@
 #include <linux/mm.h>
 #include <linux/module.h>
 #include <linux/sysctl.h>
+#include <linux/kernel.h>
 #include <net/dst.h>
 #include <net/tcp.h>
 #include <net/inet_common.h>
@@ -1802,11 +1803,7 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb,
 	for (i = used_sacks - 1; i > 0; i--) {
 		for (j = 0; j < i; j++) {
 			if (after(sp[j].start_seq, sp[j + 1].start_seq)) {
-				struct tcp_sack_block tmp;
-
-				tmp = sp[j];
-				sp[j] = sp[j + 1];
-				sp[j + 1] = tmp;
+				swap(sp[j], sp[j + 1]);
 
 				/* Track where the first SACK block goes to */
 				if (j == first_sack_index)
@@ -4156,20 +4153,6 @@ static void tcp_sack_maybe_coalesce(struct tcp_sock *tp)
 	}
 }
 
-static inline void tcp_sack_swap(struct tcp_sack_block *sack1,
-				 struct tcp_sack_block *sack2)
-{
-	__u32 tmp;
-
-	tmp = sack1->start_seq;
-	sack1->start_seq = sack2->start_seq;
-	sack2->start_seq = tmp;
-
-	tmp = sack1->end_seq;
-	sack1->end_seq = sack2->end_seq;
-	sack2->end_seq = tmp;
-}
-
 static void tcp_sack_new_ofo_skb(struct sock *sk, u32 seq, u32 end_seq)
 {
 	struct tcp_sock *tp = tcp_sk(sk);
@@ -4184,7 +4167,7 @@ static void tcp_sack_new_ofo_skb(struct sock *sk, u32 seq, u32 end_seq)
 		if (tcp_sack_extend(sp, seq, end_seq)) {
 			/* Rotate this_sack to the first one. */
 			for (; this_sack > 0; this_sack--, sp--)
-				tcp_sack_swap(sp, sp - 1);
+				swap(*sp, *(sp - 1));
 			if (cur_sacks > 1)
 				tcp_sack_maybe_coalesce(tp);
 			return;
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 717584bad02e..8499da9e76a2 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -40,6 +40,7 @@
 
 #include <linux/errno.h>
 #include <linux/types.h>
+#include <linux/kernel.h>
 #include <linux/socket.h>
 #include <linux/sockios.h>
 #include <linux/net.h>
@@ -1215,16 +1216,12 @@ int ipv6_dev_get_saddr(struct net *net, struct net_device *dst_dev,
 					}
 					break;
 				} else if (minihiscore < miniscore) {
-					struct ipv6_saddr_score *tmp;
-
 					if (hiscore->ifa)
 						in6_ifa_put(hiscore->ifa);
 
 					in6_ifa_hold(score->ifa);
 
-					tmp = hiscore;
-					hiscore = score;
-					score = tmp;
+					swap(hiscore, score);
 
 					/* restore our iterator */
 					score->ifa = hiscore->ifa;
diff --git a/net/sched/sch_tbf.c b/net/sched/sch_tbf.c
index a2f93c09f3cc..e22dfe85e43e 100644
--- a/net/sched/sch_tbf.c
+++ b/net/sched/sch_tbf.c
@@ -236,7 +236,6 @@ static int tbf_change(struct Qdisc* sch, struct nlattr *opt)
 	struct tc_tbf_qopt *qopt;
 	struct qdisc_rate_table *rtab = NULL;
 	struct qdisc_rate_table *ptab = NULL;
-	struct qdisc_rate_table *tmp;
 	struct Qdisc *child = NULL;
 	int max_size,n;
 
@@ -295,13 +294,9 @@ static int tbf_change(struct Qdisc* sch, struct nlattr *opt)
 	q->tokens = q->buffer;
 	q->ptokens = q->mtu;
 
-	tmp = q->R_tab;
-	q->R_tab = rtab;
-	rtab = tmp;
+	swap(q->R_tab, rtab);
+	swap(q->P_tab, ptab);
 
-	tmp = q->P_tab;
-	q->P_tab = ptab;
-	ptab = tmp;
 	sch_tree_unlock(sch);
 	err = 0;
 done:
-- 
cgit v1.2.3


From 96e0bf4b5193d0d97d139f99e2dd128763d55521 Mon Sep 17 00:00:00 2001
From: John Dykstra <john.dykstra1@gmail.com>
Date: Sun, 22 Mar 2009 21:49:57 -0700
Subject: tcp: Discard segments that ack data not yet sent

Discard incoming packets whose ack field iincludes data not yet sent.
This is consistent with RFC 793 Section 3.9.

Change tcp_ack() to distinguish between too-small and too-large ack
field values.  Keep segments with too-large ack fields out of the fast
path, and change slow path to discard them.

Reported-by:  Oliver Zheng <mailinglists+netdev@oliverzheng.com>
Signed-off-by: John Dykstra <john.dykstra1@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp_input.c | 27 +++++++++++++++++----------
 1 file changed, 17 insertions(+), 10 deletions(-)

(limited to 'net/ipv4/tcp_input.c')

diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 8ac82b3703ab..2bc8e27a163d 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -3585,15 +3585,18 @@ static int tcp_ack(struct sock *sk, struct sk_buff *skb, int flag)
 	int prior_packets;
 	int frto_cwnd = 0;
 
-	/* If the ack is newer than sent or older than previous acks
+	/* If the ack is older than previous acks
 	 * then we can probably ignore it.
 	 */
-	if (after(ack, tp->snd_nxt))
-		goto uninteresting_ack;
-
 	if (before(ack, prior_snd_una))
 		goto old_ack;
 
+	/* If the ack includes data we haven't sent yet, discard
+	 * this segment (RFC793 Section 3.9).
+	 */
+	if (after(ack, tp->snd_nxt))
+		goto invalid_ack;
+
 	if (after(ack, prior_snd_una))
 		flag |= FLAG_SND_UNA_ADVANCED;
 
@@ -3683,6 +3686,10 @@ no_queue:
 		tcp_ack_probe(sk);
 	return 1;
 
+invalid_ack:
+	SOCK_DEBUG(sk, "Ack %u after %u:%u\n", ack, tp->snd_una, tp->snd_nxt);
+	return -1;
+
 old_ack:
 	if (TCP_SKB_CB(skb)->sacked) {
 		tcp_sacktag_write_queue(sk, skb, prior_snd_una);
@@ -3690,8 +3697,7 @@ old_ack:
 			tcp_try_keep_open(sk);
 	}
 
-uninteresting_ack:
-	SOCK_DEBUG(sk, "Ack %u out of %u:%u\n", ack, tp->snd_una, tp->snd_nxt);
+	SOCK_DEBUG(sk, "Ack %u before %u:%u\n", ack, tp->snd_una, tp->snd_nxt);
 	return 0;
 }
 
@@ -5141,7 +5147,8 @@ int tcp_rcv_established(struct sock *sk, struct sk_buff *skb,
 	 */
 
 	if ((tcp_flag_word(th) & TCP_HP_BITS) == tp->pred_flags &&
-	    TCP_SKB_CB(skb)->seq == tp->rcv_nxt) {
+	    TCP_SKB_CB(skb)->seq == tp->rcv_nxt &&
+	    !after(TCP_SKB_CB(skb)->ack_seq, tp->snd_nxt)) {
 		int tcp_header_len = tp->tcp_header_len;
 
 		/* Timestamp header prediction: tcp_header_len
@@ -5294,8 +5301,8 @@ slow_path:
 		return -res;
 
 step5:
-	if (th->ack)
-		tcp_ack(sk, skb, FLAG_SLOWPATH);
+	if (th->ack && tcp_ack(sk, skb, FLAG_SLOWPATH) < 0)
+		goto discard;
 
 	tcp_rcv_rtt_measure_ts(sk, skb);
 
@@ -5632,7 +5639,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
 
 	/* step 5: check the ACK field */
 	if (th->ack) {
-		int acceptable = tcp_ack(sk, skb, FLAG_SLOWPATH);
+		int acceptable = tcp_ack(sk, skb, FLAG_SLOWPATH) > 0;
 
 		switch (sk->sk_state) {
 		case TCP_SYN_RECV:
-- 
cgit v1.2.3