From 522e7548a9bd40305df41c0beae69448b7620d6b Mon Sep 17 00:00:00 2001
From: Ilpo Järvinen <ilpo.jarvinen@helsinki.fi>
Date: Wed, 21 Feb 2007 22:54:52 -0800
Subject: [TCP] FRTO: Incorrectly clears TCPCB_EVER_RETRANS bit
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

FRTO was slightly too brave... Should only clear
TCPCB_SACKED_RETRANS bit.

Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@helsinki.fi>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp_input.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net/ipv4/tcp_input.c')

diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 1a14191687ac..b21e232d5d33 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -1266,7 +1266,7 @@ void tcp_enter_frto(struct sock *sk)
 	tp->undo_retrans = 0;
 
 	sk_stream_for_retrans_queue(skb, sk) {
-		TCP_SKB_CB(skb)->sacked &= ~TCPCB_RETRANS;
+		TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_RETRANS;
 	}
 	tcp_sync_left_out(tp);
 
-- 
cgit v1.2.3


From 9ead9a1d385ae2c52a6dcf2828d84ce66be04fc2 Mon Sep 17 00:00:00 2001
From: Ilpo Järvinen <ilpo.jarvinen@helsinki.fi>
Date: Wed, 21 Feb 2007 22:56:19 -0800
Subject: [TCP] FRTO: Separated response from FRTO detection algorithm
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

FRTO spurious RTO detection algorithm (RFC4138) does not include response
to a detected spurious RTO but can use different response algorithms.

Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@helsinki.fi>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp_input.c | 16 ++++++++++------
 1 file changed, 10 insertions(+), 6 deletions(-)

(limited to 'net/ipv4/tcp_input.c')

diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index b21e232d5d33..c5be3d0465f5 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -2467,6 +2467,15 @@ static int tcp_ack_update_window(struct sock *sk, struct tcp_sock *tp,
 	return flag;
 }
 
+/* A very conservative spurious RTO response algorithm: reduce cwnd and
+ * continue in congestion avoidance.
+ */
+static void tcp_conservative_spur_to_response(struct tcp_sock *tp)
+{
+	tp->snd_cwnd = min(tp->snd_cwnd, tp->snd_ssthresh);
+	tcp_moderate_cwnd(tp);
+}
+
 static void tcp_process_frto(struct sock *sk, u32 prior_snd_una)
 {
 	struct tcp_sock *tp = tcp_sk(sk);
@@ -2488,12 +2497,7 @@ static void tcp_process_frto(struct sock *sk, u32 prior_snd_una)
 		 */
 		tp->snd_cwnd = tcp_packets_in_flight(tp) + 2;
 	} else {
-		/* Also the second ACK after RTO advances the window.
-		 * The RTO was likely spurious. Reduce cwnd and continue
-		 * in congestion avoidance
-		 */
-		tp->snd_cwnd = min(tp->snd_cwnd, tp->snd_ssthresh);
-		tcp_moderate_cwnd(tp);
+		tcp_conservative_spur_to_response(tp);
 	}
 
 	/* F-RTO affects on two new ACKs following RTO.
-- 
cgit v1.2.3


From bdaae17da81db79b9aa4dfbf43305cfeef64f6a8 Mon Sep 17 00:00:00 2001
From: Ilpo Järvinen <ilpo.jarvinen@helsinki.fi>
Date: Wed, 21 Feb 2007 22:59:58 -0800
Subject: [TCP] FRTO: Moved tcp_use_frto from tcp.h to tcp_input.c
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

In addition, removed inline.

Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@helsinki.fi>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/tcp.h    | 14 +-------------
 net/ipv4/tcp_input.c | 13 +++++++++++++
 2 files changed, 14 insertions(+), 13 deletions(-)

(limited to 'net/ipv4/tcp_input.c')

diff --git a/include/net/tcp.h b/include/net/tcp.h
index 5c472f255b77..572a77bb6907 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -341,6 +341,7 @@ extern struct sock *		tcp_check_req(struct sock *sk,struct sk_buff *skb,
 extern int			tcp_child_process(struct sock *parent,
 						  struct sock *child,
 						  struct sk_buff *skb);
+extern int			tcp_use_frto(const struct sock *sk);
 extern void			tcp_enter_frto(struct sock *sk);
 extern void			tcp_enter_loss(struct sock *sk, int how);
 extern void			tcp_clear_retrans(struct tcp_sock *tp);
@@ -1033,19 +1034,6 @@ static inline int tcp_paws_check(const struct tcp_options_received *rx_opt, int
 
 #define TCP_CHECK_TIMER(sk) do { } while (0)
 
-static inline int tcp_use_frto(const struct sock *sk)
-{
-	const struct tcp_sock *tp = tcp_sk(sk);
-	
-	/* F-RTO must be activated in sysctl and there must be some
-	 * unsent new data, and the advertised window should allow
-	 * sending it.
-	 */
-	return (sysctl_tcp_frto && sk->sk_send_head &&
-		!after(TCP_SKB_CB(sk->sk_send_head)->end_seq,
-		       tp->snd_una + tp->snd_wnd));
-}
-
 static inline void tcp_mib_init(void)
 {
 	/* See RFC 2012 */
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index c5be3d0465f5..fe96e176d85a 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -1236,6 +1236,19 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_
 	return flag;
 }
 
+int tcp_use_frto(const struct sock *sk)
+{
+	const struct tcp_sock *tp = tcp_sk(sk);
+
+	/* F-RTO must be activated in sysctl and there must be some
+	 * unsent new data, and the advertised window should allow
+	 * sending it.
+	 */
+	return (sysctl_tcp_frto && sk->sk_send_head &&
+		!after(TCP_SKB_CB(sk->sk_send_head)->end_seq,
+		       tp->snd_una + tp->snd_wnd));
+}
+
 /* RTO occurred, but do not yet enter loss state. Instead, transmit two new
  * segments to see from the next ACKs whether any data was really missing.
  * If the RTO was spurious, new ACKs should arrive.
-- 
cgit v1.2.3


From 30935cf4f915c3178ce63331d6ff4c82163e26af Mon Sep 17 00:00:00 2001
From: Ilpo Järvinen <ilpo.jarvinen@helsinki.fi>
Date: Wed, 21 Feb 2007 23:01:36 -0800
Subject: [TCP] FRTO: Comment cleanup & improvement
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Moved comments out from the body of process_frto() to the head
(preferred way; see Documentation/CodingStyle). Bonus: it's much
easier to read in this compacted form.

FRTO algorithm and implementation is described in greater detail.
For interested reader, more information is available in RFC4138.

Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@helsinki.fi>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp_input.c | 49 ++++++++++++++++++++++++++++++++-----------------
 1 file changed, 32 insertions(+), 17 deletions(-)

(limited to 'net/ipv4/tcp_input.c')

diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index fe96e176d85a..561e5d404988 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -1236,22 +1236,22 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_
 	return flag;
 }
 
+/* F-RTO can only be used if these conditions are satisfied:
+ *  - there must be some unsent new data
+ *  - the advertised window should allow sending it
+ */
 int tcp_use_frto(const struct sock *sk)
 {
 	const struct tcp_sock *tp = tcp_sk(sk);
 
-	/* F-RTO must be activated in sysctl and there must be some
-	 * unsent new data, and the advertised window should allow
-	 * sending it.
-	 */
 	return (sysctl_tcp_frto && sk->sk_send_head &&
 		!after(TCP_SKB_CB(sk->sk_send_head)->end_seq,
 		       tp->snd_una + tp->snd_wnd));
 }
 
-/* RTO occurred, but do not yet enter loss state. Instead, transmit two new
- * segments to see from the next ACKs whether any data was really missing.
- * If the RTO was spurious, new ACKs should arrive.
+/* RTO occurred, but do not yet enter Loss state. Instead, defer RTO
+ * recovery a bit and use heuristics in tcp_process_frto() to detect if
+ * the RTO was spurious.
  */
 void tcp_enter_frto(struct sock *sk)
 {
@@ -2489,6 +2489,30 @@ static void tcp_conservative_spur_to_response(struct tcp_sock *tp)
 	tcp_moderate_cwnd(tp);
 }
 
+/* F-RTO spurious RTO detection algorithm (RFC4138)
+ *
+ * F-RTO affects during two new ACKs following RTO. State (ACK number) is kept
+ * in frto_counter. When ACK advances window (but not to or beyond highest
+ * sequence sent before RTO):
+ *   On First ACK,  send two new segments out.
+ *   On Second ACK, RTO was likely spurious. Do spurious response (response
+ *                  algorithm is not part of the F-RTO detection algorithm
+ *                  given in RFC4138 but can be selected separately).
+ * Otherwise (basically on duplicate ACK), RTO was (likely) caused by a loss
+ * and TCP falls back to conventional RTO recovery.
+ *
+ * Rationale: if the RTO was spurious, new ACKs should arrive from the
+ * original window even after we transmit two new data segments.
+ *
+ * F-RTO is implemented (mainly) in four functions:
+ *   - tcp_use_frto() is used to determine if TCP is can use F-RTO
+ *   - tcp_enter_frto() prepares TCP state on RTO if F-RTO is used, it is
+ *     called when tcp_use_frto() showed green light
+ *   - tcp_process_frto() handles incoming ACKs during F-RTO algorithm
+ *   - tcp_enter_frto_loss() is called if there is not enough evidence
+ *     to prove that the RTO is indeed spurious. It transfers the control
+ *     from F-RTO to the conventional RTO recovery
+ */
 static void tcp_process_frto(struct sock *sk, u32 prior_snd_una)
 {
 	struct tcp_sock *tp = tcp_sk(sk);
@@ -2497,25 +2521,16 @@ static void tcp_process_frto(struct sock *sk, u32 prior_snd_una)
 
 	if (tp->snd_una == prior_snd_una ||
 	    !before(tp->snd_una, tp->frto_highmark)) {
-		/* RTO was caused by loss, start retransmitting in
-		 * go-back-N slow start
-		 */
 		tcp_enter_frto_loss(sk);
 		return;
 	}
 
 	if (tp->frto_counter == 1) {
-		/* First ACK after RTO advances the window: allow two new
-		 * segments out.
-		 */
 		tp->snd_cwnd = tcp_packets_in_flight(tp) + 2;
-	} else {
+	} else /* frto_counter == 2 */ {
 		tcp_conservative_spur_to_response(tp);
 	}
 
-	/* F-RTO affects on two new ACKs following RTO.
-	 * At latest on third ACK the TCP behavior is back to normal.
-	 */
 	tp->frto_counter = (tp->frto_counter + 1) % 3;
 }
 
-- 
cgit v1.2.3


From 7487c48c4fd15d1e2542be1183b783562cfe10bc Mon Sep 17 00:00:00 2001
From: Ilpo Järvinen <ilpo.jarvinen@helsinki.fi>
Date: Wed, 21 Feb 2007 23:02:30 -0800
Subject: [TCP] FRTO: Consecutive RTOs keep prior_ssthresh and ssthresh
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

In case a latency spike causes more than one RTO, the later should not
cause the already reduced ssthresh to propagate into the prior_ssthresh
since FRTO declares all such RTOs spurious at once or none of them. In
treating of ssthresh, we mimic what tcp_enter_loss() does.

The previous state (in frto_counter) must be available until we have
checked it in tcp_enter_frto(), and also ACK information flag in
process_frto().

Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@helsinki.fi>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp_input.c | 20 ++++++++++++++------
 1 file changed, 14 insertions(+), 6 deletions(-)

(limited to 'net/ipv4/tcp_input.c')

diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 561e5d404988..194e880af51e 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -1252,6 +1252,10 @@ int tcp_use_frto(const struct sock *sk)
 /* RTO occurred, but do not yet enter Loss state. Instead, defer RTO
  * recovery a bit and use heuristics in tcp_process_frto() to detect if
  * the RTO was spurious.
+ *
+ * Do like tcp_enter_loss() would; when RTO expires the second time it
+ * does:
+ *  "Reduce ssthresh if it has not yet been made inside this window."
  */
 void tcp_enter_frto(struct sock *sk)
 {
@@ -1259,11 +1263,10 @@ void tcp_enter_frto(struct sock *sk)
 	struct tcp_sock *tp = tcp_sk(sk);
 	struct sk_buff *skb;
 
-	tp->frto_counter = 1;
-
-	if (icsk->icsk_ca_state <= TCP_CA_Disorder ||
+	if ((!tp->frto_counter && icsk->icsk_ca_state <= TCP_CA_Disorder) ||
 	    tp->snd_una == tp->high_seq ||
-	    (icsk->icsk_ca_state == TCP_CA_Loss && !icsk->icsk_retransmits)) {
+	    ((icsk->icsk_ca_state == TCP_CA_Loss || tp->frto_counter) &&
+	     !icsk->icsk_retransmits)) {
 		tp->prior_ssthresh = tcp_current_ssthresh(sk);
 		tp->snd_ssthresh = icsk->icsk_ca_ops->ssthresh(sk);
 		tcp_ca_event(sk, CA_EVENT_FRTO);
@@ -1285,6 +1288,7 @@ void tcp_enter_frto(struct sock *sk)
 
 	tcp_set_ca_state(sk, TCP_CA_Open);
 	tp->frto_highmark = tp->snd_nxt;
+	tp->frto_counter = 1;
 }
 
 /* Enter Loss state after F-RTO was applied. Dupack arrived after RTO,
@@ -2513,12 +2517,16 @@ static void tcp_conservative_spur_to_response(struct tcp_sock *tp)
  *     to prove that the RTO is indeed spurious. It transfers the control
  *     from F-RTO to the conventional RTO recovery
  */
-static void tcp_process_frto(struct sock *sk, u32 prior_snd_una)
+static void tcp_process_frto(struct sock *sk, u32 prior_snd_una, int flag)
 {
 	struct tcp_sock *tp = tcp_sk(sk);
 
 	tcp_sync_left_out(tp);
 
+	/* Duplicate the behavior from Loss state (fastretrans_alert) */
+	if (flag&FLAG_DATA_ACKED)
+		inet_csk(sk)->icsk_retransmits = 0;
+
 	if (tp->snd_una == prior_snd_una ||
 	    !before(tp->snd_una, tp->frto_highmark)) {
 		tcp_enter_frto_loss(sk);
@@ -2607,7 +2615,7 @@ static int tcp_ack(struct sock *sk, struct sk_buff *skb, int flag)
 	flag |= tcp_clean_rtx_queue(sk, &seq_rtt);
 
 	if (tp->frto_counter)
-		tcp_process_frto(sk, prior_snd_una);
+		tcp_process_frto(sk, prior_snd_una, flag);
 
 	if (tcp_ack_is_dubious(sk, flag)) {
 		/* Advance CWND, if state allows this. */
-- 
cgit v1.2.3


From 7b0eb22b1d3b049306813a4aaa52966650f7491c Mon Sep 17 00:00:00 2001
From: Ilpo Järvinen <ilpo.jarvinen@helsinki.fi>
Date: Wed, 21 Feb 2007 23:03:35 -0800
Subject: [TCP] FRTO: Use Disorder state during operation instead of Open
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Retransmission counter assumptions are to be changed. Forcing
reason to do this exist: Using sysctl in check would be racy
as soon as FRTO starts to ignore some ACKs (doing that in the
following patches). Userspace may disable it at any moment
giving nice oops if timing is right. frto_counter would be
inaccessible from userspace, but with SACK enhanced FRTO
retrans_out can include other than head, and possibly leaving
it non-zero after spurious RTO, boom again.

Luckily, solution seems rather simple: never go directly to Open
state but use Disorder instead. This does not really change much,
since TCP could anyway change its state to Disorder during FRTO
using path tcp_fastretrans_alert -> tcp_try_to_open (e.g., when
a SACK block makes ACK dubious). Besides, Disorder seems to be
the state where TCP should be if not recovering (in Recovery or
Loss state) while having some retransmissions in-flight (see
tcp_try_to_open), which is exactly what happens with FRTO.

Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@helsinki.fi>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp_input.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'net/ipv4/tcp_input.c')

diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 194e880af51e..e806839acdd9 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -1286,7 +1286,8 @@ void tcp_enter_frto(struct sock *sk)
 	}
 	tcp_sync_left_out(tp);
 
-	tcp_set_ca_state(sk, TCP_CA_Open);
+	tcp_set_ca_state(sk, TCP_CA_Disorder);
+	tp->high_seq = tp->snd_nxt;
 	tp->frto_highmark = tp->snd_nxt;
 	tp->frto_counter = 1;
 }
@@ -2014,8 +2015,7 @@ tcp_fastretrans_alert(struct sock *sk, u32 prior_snd_una,
 	/* E. Check state exit conditions. State can be terminated
 	 *    when high_seq is ACKed. */
 	if (icsk->icsk_ca_state == TCP_CA_Open) {
-		if (!sysctl_tcp_frto)
-			BUG_TRAP(tp->retrans_out == 0);
+		BUG_TRAP(tp->retrans_out == 0);
 		tp->retrans_stamp = 0;
 	} else if (!before(tp->snd_una, tp->high_seq)) {
 		switch (icsk->icsk_ca_state) {
-- 
cgit v1.2.3


From 6408d206c7484615ecae54bf6474a02c94e9e862 Mon Sep 17 00:00:00 2001
From: Ilpo Järvinen <ilpo.jarvinen@helsinki.fi>
Date: Wed, 21 Feb 2007 23:04:11 -0800
Subject: [TCP] FRTO: Ignore some uninteresting ACKs
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Handles RFC4138 shortcoming (in step 2); it should also have case
c) which ignores ACKs that are not duplicates nor advance window
(opposite dir data, winupdate).

Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@helsinki.fi>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp_input.c | 13 ++++++++++---
 1 file changed, 10 insertions(+), 3 deletions(-)

(limited to 'net/ipv4/tcp_input.c')

diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index e806839acdd9..e990d562f5e3 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -2495,9 +2495,9 @@ static void tcp_conservative_spur_to_response(struct tcp_sock *tp)
 
 /* F-RTO spurious RTO detection algorithm (RFC4138)
  *
- * F-RTO affects during two new ACKs following RTO. State (ACK number) is kept
- * in frto_counter. When ACK advances window (but not to or beyond highest
- * sequence sent before RTO):
+ * F-RTO affects during two new ACKs following RTO (well, almost, see inline
+ * comments). State (ACK number) is kept in frto_counter. When ACK advances
+ * window (but not to or beyond highest sequence sent before RTO):
  *   On First ACK,  send two new segments out.
  *   On Second ACK, RTO was likely spurious. Do spurious response (response
  *                  algorithm is not part of the F-RTO detection algorithm
@@ -2527,6 +2527,13 @@ static void tcp_process_frto(struct sock *sk, u32 prior_snd_una, int flag)
 	if (flag&FLAG_DATA_ACKED)
 		inet_csk(sk)->icsk_retransmits = 0;
 
+	/* RFC4138 shortcoming in step 2; should also have case c): ACK isn't
+	 * duplicate nor advances window, e.g., opposite dir data, winupdate
+	 */
+	if ((tp->snd_una == prior_snd_una) && (flag&FLAG_NOT_DUP) &&
+	    !(flag&FLAG_FORWARD_PROGRESS))
+		return;
+
 	if (tp->snd_una == prior_snd_una ||
 	    !before(tp->snd_una, tp->frto_highmark)) {
 		tcp_enter_frto_loss(sk);
-- 
cgit v1.2.3


From 95c4922bf9330eb2c71b752359dd89c4e166f3c5 Mon Sep 17 00:00:00 2001
From: Ilpo Järvinen <ilpo.jarvinen@helsinki.fi>
Date: Wed, 21 Feb 2007 23:05:18 -0800
Subject: [TCP] FRTO: fixes fallback to conventional recovery
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The FRTO detection did not care how ACK pattern affects to cwnd
calculation of the conventional recovery. This caused incorrect
setting of cwnd when the fallback becames necessary. The
knowledge tcp_process_frto() has about the incoming ACK is now
passed on to tcp_enter_frto_loss() in allowed_segments parameter
that gives the number of segments that must be added to
packets-in-flight while calculating the new cwnd.

Instead of snd_una we use FLAG_DATA_ACKED in duplicate ACK
detection because RFC4138 states (in Section 2.2):
  If the first acknowledgment after the RTO retransmission
  does not acknowledge all of the data that was retransmitted
  in step 1, the TCP sender reverts to the conventional RTO
  recovery.  Otherwise, a malicious receiver acknowledging
  partial segments could cause the sender to declare the
  timeout spurious in a case where data was lost.

If the next ACK after RTO is duplicate, we do not retransmit
anything, which is equal to what conservative conventional
recovery does in such case.

Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@helsinki.fi>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp_input.c | 14 +++++++++-----
 1 file changed, 9 insertions(+), 5 deletions(-)

(limited to 'net/ipv4/tcp_input.c')

diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index e990d562f5e3..cc935c8a6aae 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -1296,7 +1296,7 @@ void tcp_enter_frto(struct sock *sk)
  * which indicates that we should follow the traditional RTO recovery,
  * i.e. mark everything lost and do go-back-N retransmission.
  */
-static void tcp_enter_frto_loss(struct sock *sk)
+static void tcp_enter_frto_loss(struct sock *sk, int allowed_segments)
 {
 	struct tcp_sock *tp = tcp_sk(sk);
 	struct sk_buff *skb;
@@ -1326,7 +1326,7 @@ static void tcp_enter_frto_loss(struct sock *sk)
 	}
 	tcp_sync_left_out(tp);
 
-	tp->snd_cwnd = tp->frto_counter + tcp_packets_in_flight(tp)+1;
+	tp->snd_cwnd = tcp_packets_in_flight(tp) + allowed_segments;
 	tp->snd_cwnd_cnt = 0;
 	tp->snd_cwnd_stamp = tcp_time_stamp;
 	tp->undo_marker = 0;
@@ -2527,6 +2527,11 @@ static void tcp_process_frto(struct sock *sk, u32 prior_snd_una, int flag)
 	if (flag&FLAG_DATA_ACKED)
 		inet_csk(sk)->icsk_retransmits = 0;
 
+	if (!before(tp->snd_una, tp->frto_highmark)) {
+		tcp_enter_frto_loss(sk, tp->frto_counter + 1);
+		return;
+	}
+
 	/* RFC4138 shortcoming in step 2; should also have case c): ACK isn't
 	 * duplicate nor advances window, e.g., opposite dir data, winupdate
 	 */
@@ -2534,9 +2539,8 @@ static void tcp_process_frto(struct sock *sk, u32 prior_snd_una, int flag)
 	    !(flag&FLAG_FORWARD_PROGRESS))
 		return;
 
-	if (tp->snd_una == prior_snd_una ||
-	    !before(tp->snd_una, tp->frto_highmark)) {
-		tcp_enter_frto_loss(sk);
+	if (!(flag&FLAG_DATA_ACKED)) {
+		tcp_enter_frto_loss(sk, (tp->frto_counter == 1 ? 0 : 3));
 		return;
 	}
 
-- 
cgit v1.2.3


From aa8b6a7ad147dfbaaf10368ff15df9418b670d8b Mon Sep 17 00:00:00 2001
From: Ilpo Järvinen <ilpo.jarvinen@helsinki.fi>
Date: Wed, 21 Feb 2007 23:06:03 -0800
Subject: [TCP] FRTO: Response should reset also snd_cwnd_cnt
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Since purpose is to reduce CWND, we prevent immediate growth. This
is not a major issue nor is "the correct way" specified anywhere.

Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@helsinki.fi>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp_input.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'net/ipv4/tcp_input.c')

diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index cc935c8a6aae..924b2e6d7d15 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -2490,6 +2490,7 @@ static int tcp_ack_update_window(struct sock *sk, struct tcp_sock *tp,
 static void tcp_conservative_spur_to_response(struct tcp_sock *tp)
 {
 	tp->snd_cwnd = min(tp->snd_cwnd, tp->snd_ssthresh);
+	tp->snd_cwnd_cnt = 0;
 	tcp_moderate_cwnd(tp);
 }
 
-- 
cgit v1.2.3


From 52c63f1e86ebb18ef4b710b5b647e552a041e5ca Mon Sep 17 00:00:00 2001
From: Ilpo Järvinen <ilpo.jarvinen@helsinki.fi>
Date: Wed, 21 Feb 2007 23:06:52 -0800
Subject: [TCP]: Don't enter to fast recovery while using FRTO
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Because TCP is not in Loss state during FRTO recovery, fast
recovery could be triggered by accident. Non-SACK FRTO is more
robust than not yet included SACK-enhanced version (that can
receiver high number of duplicate ACKs with SACK blocks during
FRTO), at least with unidirectional transfers, but under
extraordinary patterns fast recovery can be incorrectly
triggered, e.g., Data loss+ACK losses => cumulative ACK with
enough SACK blocks to meet sacked_out >= dupthresh condition).

Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@helsinki.fi>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp_input.c | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'net/ipv4/tcp_input.c')

diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 924b2e6d7d15..7213740477ee 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -1547,6 +1547,10 @@ static int tcp_time_to_recover(struct sock *sk, struct tcp_sock *tp)
 {
 	__u32 packets_out;
 
+	/* Do not perform any recovery during FRTO algorithm */
+	if (tp->frto_counter)
+		return 0;
+
 	/* Trick#1: The loss is proven. */
 	if (tp->lost_out)
 		return 1;
-- 
cgit v1.2.3


From 94d0ea7786714d78d7cb73144bb850254dd0bb78 Mon Sep 17 00:00:00 2001
From: Ilpo Järvinen <ilpo.jarvinen@helsinki.fi>
Date: Wed, 21 Feb 2007 23:07:27 -0800
Subject: [TCP] FRTO: frto_counter modulo-op converted to two assignments
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@helsinki.fi>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp_input.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'net/ipv4/tcp_input.c')

diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 7213740477ee..9dc5754141e9 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -2551,11 +2551,11 @@ static void tcp_process_frto(struct sock *sk, u32 prior_snd_una, int flag)
 
 	if (tp->frto_counter == 1) {
 		tp->snd_cwnd = tcp_packets_in_flight(tp) + 2;
+		tp->frto_counter = 2;
 	} else /* frto_counter == 2 */ {
 		tcp_conservative_spur_to_response(tp);
+		tp->frto_counter = 0;
 	}
-
-	tp->frto_counter = (tp->frto_counter + 1) % 3;
 }
 
 /* This routine deals with incoming acks, but not outgoing ones. */
-- 
cgit v1.2.3


From 7c9a4a5b67926dd186d427bc5b9fce6ccbde154c Mon Sep 17 00:00:00 2001
From: Ilpo Järvinen <ilpo.jarvinen@helsinki.fi>
Date: Wed, 21 Feb 2007 23:08:34 -0800
Subject: [TCP]: Prevent unrelated cwnd adjustment while using FRTO
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

FRTO controls cwnd when it still processes the ACK input or it
has just reverted back to conventional RTO recovery; the normal
rules apply when FRTO has reverted to standard congestion
control.

Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@helsinki.fi>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp_input.c | 18 +++++++++++-------
 1 file changed, 11 insertions(+), 7 deletions(-)

(limited to 'net/ipv4/tcp_input.c')

diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 9dc5754141e9..723cee63791f 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -2522,7 +2522,7 @@ static void tcp_conservative_spur_to_response(struct tcp_sock *tp)
  *     to prove that the RTO is indeed spurious. It transfers the control
  *     from F-RTO to the conventional RTO recovery
  */
-static void tcp_process_frto(struct sock *sk, u32 prior_snd_una, int flag)
+static int tcp_process_frto(struct sock *sk, u32 prior_snd_una, int flag)
 {
 	struct tcp_sock *tp = tcp_sk(sk);
 
@@ -2534,7 +2534,7 @@ static void tcp_process_frto(struct sock *sk, u32 prior_snd_una, int flag)
 
 	if (!before(tp->snd_una, tp->frto_highmark)) {
 		tcp_enter_frto_loss(sk, tp->frto_counter + 1);
-		return;
+		return 1;
 	}
 
 	/* RFC4138 shortcoming in step 2; should also have case c): ACK isn't
@@ -2542,20 +2542,22 @@ static void tcp_process_frto(struct sock *sk, u32 prior_snd_una, int flag)
 	 */
 	if ((tp->snd_una == prior_snd_una) && (flag&FLAG_NOT_DUP) &&
 	    !(flag&FLAG_FORWARD_PROGRESS))
-		return;
+		return 1;
 
 	if (!(flag&FLAG_DATA_ACKED)) {
 		tcp_enter_frto_loss(sk, (tp->frto_counter == 1 ? 0 : 3));
-		return;
+		return 1;
 	}
 
 	if (tp->frto_counter == 1) {
 		tp->snd_cwnd = tcp_packets_in_flight(tp) + 2;
 		tp->frto_counter = 2;
+		return 1;
 	} else /* frto_counter == 2 */ {
 		tcp_conservative_spur_to_response(tp);
 		tp->frto_counter = 0;
 	}
+	return 0;
 }
 
 /* This routine deals with incoming acks, but not outgoing ones. */
@@ -2569,6 +2571,7 @@ static int tcp_ack(struct sock *sk, struct sk_buff *skb, int flag)
 	u32 prior_in_flight;
 	s32 seq_rtt;
 	int prior_packets;
+	int frto_cwnd = 0;
 
 	/* If the ack is newer than sent or older than previous acks
 	 * then we can probably ignore it.
@@ -2631,15 +2634,16 @@ static int tcp_ack(struct sock *sk, struct sk_buff *skb, int flag)
 	flag |= tcp_clean_rtx_queue(sk, &seq_rtt);
 
 	if (tp->frto_counter)
-		tcp_process_frto(sk, prior_snd_una, flag);
+		frto_cwnd = tcp_process_frto(sk, prior_snd_una, flag);
 
 	if (tcp_ack_is_dubious(sk, flag)) {
 		/* Advance CWND, if state allows this. */
-		if ((flag & FLAG_DATA_ACKED) && tcp_may_raise_cwnd(sk, flag))
+		if ((flag & FLAG_DATA_ACKED) && !frto_cwnd &&
+		    tcp_may_raise_cwnd(sk, flag))
 			tcp_cong_avoid(sk, ack,  seq_rtt, prior_in_flight, 0);
 		tcp_fastretrans_alert(sk, prior_snd_una, prior_packets, flag);
 	} else {
-		if ((flag & FLAG_DATA_ACKED))
+		if ((flag & FLAG_DATA_ACKED) && !frto_cwnd)
 			tcp_cong_avoid(sk, ack, seq_rtt, prior_in_flight, 1);
 	}
 
-- 
cgit v1.2.3


From 46d0de4ed92650b95f27acae09914996bbe624e7 Mon Sep 17 00:00:00 2001
From: Ilpo Järvinen <ilpo.jarvinen@helsinki.fi>
Date: Wed, 21 Feb 2007 23:10:39 -0800
Subject: [TCP] FRTO: Entry is allowed only during (New)Reno like recovery
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This interpretation comes from RFC4138:
    "If the sender implements some loss recovery algorithm other
     than Reno or NewReno [FHG04], the F-RTO algorithm SHOULD
     NOT be entered when earlier fast recovery is underway."

I think the RFC means to say (especially in the light of
Appendix B) that ...recovery is underway (not just fast recovery)
or was underway when it was interrupted by an earlier (F-)RTO
that hasn't yet been resolved (snd_una has not advanced enough).
Thus, my interpretation is that whenever TCP has ever
retransmitted other than head, basic version cannot be used
because then the order assumptions which are used as FRTO basis
do not hold.

NewReno has only the head segment retransmitted at a time.
Therefore, walk up to the segment that has not been SACKed, if
that segment is not retransmitted nor anything before it, we know
for sure, that nothing after the non-SACKed segment should be
either. This assumption is valid because TCPCB_EVER_RETRANS does
not leave holes but each non-SACKed segment is rexmitted
in-order.

Check for retrans_out > 1 avoids more expensive walk through the
skb list, as we can know the result beforehand: F-RTO will not be
allowed.

SACKed skb can turn into non-SACked only in the extremely rare
case of SACK reneging, in this case we might fail to detect
retransmissions if there were them for any other than head. To
get rid of that feature, whole rexmit queue would have to be
walked (always) or FRTO should be prevented when SACK reneging
happens. Of course RTO should still trigger after reneging which
makes this issue even less likely to show up. And as long as the
response is as conservative as it's now, nothing bad happens even
then.

Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@helsinki.fi>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/tcp.h    |  2 +-
 net/ipv4/tcp_input.c | 25 +++++++++++++++++++++----
 2 files changed, 22 insertions(+), 5 deletions(-)

(limited to 'net/ipv4/tcp_input.c')

diff --git a/include/net/tcp.h b/include/net/tcp.h
index 572a77bb6907..7fd6b77519c3 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -341,7 +341,7 @@ extern struct sock *		tcp_check_req(struct sock *sk,struct sk_buff *skb,
 extern int			tcp_child_process(struct sock *parent,
 						  struct sock *child,
 						  struct sk_buff *skb);
-extern int			tcp_use_frto(const struct sock *sk);
+extern int			tcp_use_frto(struct sock *sk);
 extern void			tcp_enter_frto(struct sock *sk);
 extern void			tcp_enter_loss(struct sock *sk, int how);
 extern void			tcp_clear_retrans(struct tcp_sock *tp);
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 723cee63791f..a283fc12186e 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -1239,14 +1239,31 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_
 /* F-RTO can only be used if these conditions are satisfied:
  *  - there must be some unsent new data
  *  - the advertised window should allow sending it
+ *  - TCP has never retransmitted anything other than head
  */
-int tcp_use_frto(const struct sock *sk)
+int tcp_use_frto(struct sock *sk)
 {
 	const struct tcp_sock *tp = tcp_sk(sk);
+	struct sk_buff *skb;
+
+	if (!sysctl_tcp_frto || !sk->sk_send_head ||
+		after(TCP_SKB_CB(sk->sk_send_head)->end_seq,
+		      tp->snd_una + tp->snd_wnd))
+		return 0;
 
-	return (sysctl_tcp_frto && sk->sk_send_head &&
-		!after(TCP_SKB_CB(sk->sk_send_head)->end_seq,
-		       tp->snd_una + tp->snd_wnd));
+	/* Avoid expensive walking of rexmit queue if possible */
+	if (tp->retrans_out > 1)
+		return 0;
+
+	skb = skb_peek(&sk->sk_write_queue)->next;	/* Skips head */
+	sk_stream_for_retrans_queue_from(skb, sk) {
+		if (TCP_SKB_CB(skb)->sacked&TCPCB_RETRANS)
+			return 0;
+		/* Short-circuit when first non-SACKed skb has been checked */
+		if (!(TCP_SKB_CB(skb)->sacked&TCPCB_SACKED_ACKED))
+			break;
+	}
+	return 1;
 }
 
 /* RTO occurred, but do not yet enter Loss state. Instead, defer RTO
-- 
cgit v1.2.3


From d1a54c6a0a3f9c2c4ef71982d89b8571bd9eaa51 Mon Sep 17 00:00:00 2001
From: Ilpo Järvinen <ilpo.jarvinen@helsinki.fi>
Date: Wed, 21 Feb 2007 23:11:57 -0800
Subject: [TCP] FRTO: Reverse RETRANS bit clearing logic
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Previously RETRANS bits were cleared on the entry to FRTO. We
postpone that into tcp_enter_frto_loss, which is really the
place were the clearing should be done anyway. This allows
simplification of the logic from a clearing loop to the head skb
clearing only.

Besides, the other changes made in the previous patches to
tcp_use_frto made it impossible for the non-SACKed FRTO to be
entered if other than the head has been rexmitted.

With SACK-enhanced FRTO (and Appendix B), however, there can be
a number retransmissions in flight when RTO expires (same thing
could happen before this patchset also with non-SACK FRTO). To
not introduce any jumpiness into the packet counting during FRTO,
instead of clearing RETRANS bits from skbs during entry, do it
later on.

Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@helsinki.fi>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp_input.c | 35 +++++++++++++++++++++++------------
 1 file changed, 23 insertions(+), 12 deletions(-)

(limited to 'net/ipv4/tcp_input.c')

diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index a283fc12186e..3ef7e9e07964 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -1268,7 +1268,11 @@ int tcp_use_frto(struct sock *sk)
 
 /* RTO occurred, but do not yet enter Loss state. Instead, defer RTO
  * recovery a bit and use heuristics in tcp_process_frto() to detect if
- * the RTO was spurious.
+ * the RTO was spurious. Only clear SACKED_RETRANS of the head here to
+ * keep retrans_out counting accurate (with SACK F-RTO, other than head
+ * may still have that bit set); TCPCB_LOST and remaining SACKED_RETRANS
+ * bits are handled if the Loss state is really to be entered (in
+ * tcp_enter_frto_loss).
  *
  * Do like tcp_enter_loss() would; when RTO expires the second time it
  * does:
@@ -1289,17 +1293,13 @@ void tcp_enter_frto(struct sock *sk)
 		tcp_ca_event(sk, CA_EVENT_FRTO);
 	}
 
-	/* Have to clear retransmission markers here to keep the bookkeeping
-	 * in shape, even though we are not yet in Loss state.
-	 * If something was really lost, it is eventually caught up
-	 * in tcp_enter_frto_loss.
-	 */
-	tp->retrans_out = 0;
 	tp->undo_marker = tp->snd_una;
 	tp->undo_retrans = 0;
 
-	sk_stream_for_retrans_queue(skb, sk) {
+	skb = skb_peek(&sk->sk_write_queue);
+	if (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_RETRANS) {
 		TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_RETRANS;
+		tp->retrans_out -= tcp_skb_pcount(skb);
 	}
 	tcp_sync_left_out(tp);
 
@@ -1313,7 +1313,7 @@ void tcp_enter_frto(struct sock *sk)
  * which indicates that we should follow the traditional RTO recovery,
  * i.e. mark everything lost and do go-back-N retransmission.
  */
-static void tcp_enter_frto_loss(struct sock *sk, int allowed_segments)
+static void tcp_enter_frto_loss(struct sock *sk, int allowed_segments, int flag)
 {
 	struct tcp_sock *tp = tcp_sk(sk);
 	struct sk_buff *skb;
@@ -1322,10 +1322,21 @@ static void tcp_enter_frto_loss(struct sock *sk, int allowed_segments)
 	tp->sacked_out = 0;
 	tp->lost_out = 0;
 	tp->fackets_out = 0;
+	tp->retrans_out = 0;
 
 	sk_stream_for_retrans_queue(skb, sk) {
 		cnt += tcp_skb_pcount(skb);
-		TCP_SKB_CB(skb)->sacked &= ~TCPCB_LOST;
+		/*
+		 * Count the retransmission made on RTO correctly (only when
+		 * waiting for the first ACK and did not get it)...
+		 */
+		if ((tp->frto_counter == 1) && !(flag&FLAG_DATA_ACKED)) {
+			tp->retrans_out += tcp_skb_pcount(skb);
+			/* ...enter this if branch just for the first segment */
+			flag |= FLAG_DATA_ACKED;
+		} else {
+			TCP_SKB_CB(skb)->sacked &= ~(TCPCB_LOST|TCPCB_SACKED_RETRANS);
+		}
 		if (!(TCP_SKB_CB(skb)->sacked&TCPCB_SACKED_ACKED)) {
 
 			/* Do not mark those segments lost that were
@@ -2550,7 +2561,7 @@ static int tcp_process_frto(struct sock *sk, u32 prior_snd_una, int flag)
 		inet_csk(sk)->icsk_retransmits = 0;
 
 	if (!before(tp->snd_una, tp->frto_highmark)) {
-		tcp_enter_frto_loss(sk, tp->frto_counter + 1);
+		tcp_enter_frto_loss(sk, tp->frto_counter + 1, flag);
 		return 1;
 	}
 
@@ -2562,7 +2573,7 @@ static int tcp_process_frto(struct sock *sk, u32 prior_snd_una, int flag)
 		return 1;
 
 	if (!(flag&FLAG_DATA_ACKED)) {
-		tcp_enter_frto_loss(sk, (tp->frto_counter == 1 ? 0 : 3));
+		tcp_enter_frto_loss(sk, (tp->frto_counter == 1 ? 0 : 3), flag);
 		return 1;
 	}
 
-- 
cgit v1.2.3


From 66e93e45c09affa407750cc06398492e8b897848 Mon Sep 17 00:00:00 2001
From: Ilpo Järvinen <ilpo.jarvinen@helsinki.fi>
Date: Wed, 21 Feb 2007 23:13:47 -0800
Subject: [TCP] FRTO: Fake cwnd for ssthresh callback
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

TCP without FRTO would be in Loss state with small cwnd. FRTO,
however, leaves cwnd (typically) to a larger value which causes
ssthresh to become too large in case RTO is triggered again
compared to what conventional recovery would do. Because
consecutive RTOs result in only a single ssthresh reduction,
RTO+cumulative ACK+RTO pattern is required to trigger this
event.

A large comment is included for congestion control module writers
trying to figure out what CA_EVENT_FRTO handler should do because
there exists a remote possibility of incompatibility between
FRTO and module defined ssthresh functions.

Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@helsinki.fi>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp_input.c | 26 +++++++++++++++++++++++++-
 1 file changed, 25 insertions(+), 1 deletion(-)

(limited to 'net/ipv4/tcp_input.c')

diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 3ef7e9e07964..055721d8495e 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -1289,7 +1289,31 @@ void tcp_enter_frto(struct sock *sk)
 	    ((icsk->icsk_ca_state == TCP_CA_Loss || tp->frto_counter) &&
 	     !icsk->icsk_retransmits)) {
 		tp->prior_ssthresh = tcp_current_ssthresh(sk);
-		tp->snd_ssthresh = icsk->icsk_ca_ops->ssthresh(sk);
+		/* Our state is too optimistic in ssthresh() call because cwnd
+		 * is not reduced until tcp_enter_frto_loss() when previous FRTO
+		 * recovery has not yet completed. Pattern would be this: RTO,
+		 * Cumulative ACK, RTO (2xRTO for the same segment does not end
+		 * up here twice).
+		 * RFC4138 should be more specific on what to do, even though
+		 * RTO is quite unlikely to occur after the first Cumulative ACK
+		 * due to back-off and complexity of triggering events ...
+		 */
+		if (tp->frto_counter) {
+			u32 stored_cwnd;
+			stored_cwnd = tp->snd_cwnd;
+			tp->snd_cwnd = 2;
+			tp->snd_ssthresh = icsk->icsk_ca_ops->ssthresh(sk);
+			tp->snd_cwnd = stored_cwnd;
+		} else {
+			tp->snd_ssthresh = icsk->icsk_ca_ops->ssthresh(sk);
+		}
+		/* ... in theory, cong.control module could do "any tricks" in
+		 * ssthresh(), which means that ca_state, lost bits and lost_out
+		 * counter would have to be faked before the call occurs. We
+		 * consider that too expensive, unlikely and hacky, so modules
+		 * using these in ssthresh() must deal these incompatibility
+		 * issues if they receives CA_EVENT_FRTO and frto_counter != 0
+		 */
 		tcp_ca_event(sk, CA_EVENT_FRTO);
 	}
 
-- 
cgit v1.2.3


From 288035f915686a9a9e85e0358c5392bb5d7ae58d Mon Sep 17 00:00:00 2001
From: Ilpo Järvinen <ilpo.jarvinen@helsinki.fi>
Date: Wed, 21 Feb 2007 23:14:42 -0800
Subject: [TCP]: Prevent reordering adjustments during FRTO
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

To be honest, I'm not too sure how the reord stuff works in the
first place but this seems necessary.

When FRTO has been active, the one and only retransmission could
be unnecessary but the state and sending order might not be what
the sacktag code expects it to be (to work correctly).

Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@helsinki.fi>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp_input.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'net/ipv4/tcp_input.c')

diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 055721d8495e..df516d4eca96 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -1224,7 +1224,8 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_
 
 	tp->left_out = tp->sacked_out + tp->lost_out;
 
-	if ((reord < tp->fackets_out) && icsk->icsk_ca_state != TCP_CA_Loss)
+	if ((reord < tp->fackets_out) && icsk->icsk_ca_state != TCP_CA_Loss &&
+	    (tp->frto_highmark && after(tp->snd_una, tp->frto_highmark)))
 		tcp_update_reordering(sk, ((tp->fackets_out + 1) - reord), 0);
 
 #if FASTRETRANS_DEBUG > 0
-- 
cgit v1.2.3


From 4dc2665e3634d720a62bd27128fc8781fcdad2dc Mon Sep 17 00:00:00 2001
From: Ilpo Järvinen <ilpo.jarvinen@helsinki.fi>
Date: Wed, 21 Feb 2007 23:16:11 -0800
Subject: [TCP]: SACK enhanced FRTO
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Implements the SACK-enhanced FRTO given in RFC4138 using the
variant given in Appendix B.

RFC4138, Appendix B:
  "This means that in order to declare timeout spurious, the TCP
   sender must receive an acknowledgment for non-retransmitted
   segment between SND.UNA and RecoveryPoint in algorithm step 3.
   RecoveryPoint is defined in conservative SACK-recovery
   algorithm [RFC3517]"

The basic version of the FRTO algorithm can still be used also
when SACK is enabled. To enabled SACK-enhanced version, tcp_frto
sysctl is set to 2.

Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@helsinki.fi>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp_input.c | 76 ++++++++++++++++++++++++++++++++++++++++++++--------
 1 file changed, 65 insertions(+), 11 deletions(-)

(limited to 'net/ipv4/tcp_input.c')

diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index df516d4eca96..bb3f234668b3 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -100,6 +100,7 @@ int sysctl_tcp_abc __read_mostly;
 #define FLAG_ECE		0x40 /* ECE in this ACK				*/
 #define FLAG_DATA_LOST		0x80 /* SACK detected data lossage.		*/
 #define FLAG_SLOWPATH		0x100 /* Do not skip RFC checks for window update.*/
+#define FLAG_ONLY_ORIG_SACKED	0x200 /* SACKs only non-rexmit sent before RTO */
 
 #define FLAG_ACKED		(FLAG_DATA_ACKED|FLAG_SYN_ACKED)
 #define FLAG_NOT_DUP		(FLAG_DATA|FLAG_WIN_UPDATE|FLAG_ACKED)
@@ -110,6 +111,8 @@ int sysctl_tcp_abc __read_mostly;
 #define IsFack(tp) ((tp)->rx_opt.sack_ok & 2)
 #define IsDSack(tp) ((tp)->rx_opt.sack_ok & 4)
 
+#define IsSackFrto() (sysctl_tcp_frto == 0x2)
+
 #define TCP_REMNANT (TCP_FLAG_FIN|TCP_FLAG_URG|TCP_FLAG_SYN|TCP_FLAG_PSH)
 
 /* Adapt the MSS value used to make delayed ack decision to the
@@ -1159,6 +1162,18 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_
 						/* clear lost hint */
 						tp->retransmit_skb_hint = NULL;
 					}
+					/* SACK enhanced F-RTO detection.
+					 * Set flag if and only if non-rexmitted
+					 * segments below frto_highmark are
+					 * SACKed (RFC4138; Appendix B).
+					 * Clearing correct due to in-order walk
+					 */
+					if (after(end_seq, tp->frto_highmark)) {
+						flag &= ~FLAG_ONLY_ORIG_SACKED;
+					} else {
+						if (!(sacked & TCPCB_RETRANS))
+							flag |= FLAG_ONLY_ORIG_SACKED;
+					}
 				}
 
 				TCP_SKB_CB(skb)->sacked |= TCPCB_SACKED_ACKED;
@@ -1240,7 +1255,8 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_
 /* F-RTO can only be used if these conditions are satisfied:
  *  - there must be some unsent new data
  *  - the advertised window should allow sending it
- *  - TCP has never retransmitted anything other than head
+ *  - TCP has never retransmitted anything other than head (SACK enhanced
+ *    variant from Appendix B of RFC4138 is more robust here)
  */
 int tcp_use_frto(struct sock *sk)
 {
@@ -1252,6 +1268,9 @@ int tcp_use_frto(struct sock *sk)
 		      tp->snd_una + tp->snd_wnd))
 		return 0;
 
+	if (IsSackFrto())
+		return 1;
+
 	/* Avoid expensive walking of rexmit queue if possible */
 	if (tp->retrans_out > 1)
 		return 0;
@@ -1328,9 +1347,18 @@ void tcp_enter_frto(struct sock *sk)
 	}
 	tcp_sync_left_out(tp);
 
+	/* Earlier loss recovery underway (see RFC4138; Appendix B).
+	 * The last condition is necessary at least in tp->frto_counter case.
+	 */
+	if (IsSackFrto() && (tp->frto_counter ||
+	    ((1 << icsk->icsk_ca_state) & (TCPF_CA_Recovery|TCPF_CA_Loss))) &&
+	    after(tp->high_seq, tp->snd_una)) {
+		tp->frto_highmark = tp->high_seq;
+	} else {
+		tp->frto_highmark = tp->snd_nxt;
+	}
 	tcp_set_ca_state(sk, TCP_CA_Disorder);
 	tp->high_seq = tp->snd_nxt;
-	tp->frto_highmark = tp->snd_nxt;
 	tp->frto_counter = 1;
 }
 
@@ -2566,6 +2594,10 @@ static void tcp_conservative_spur_to_response(struct tcp_sock *tp)
  * Rationale: if the RTO was spurious, new ACKs should arrive from the
  * original window even after we transmit two new data segments.
  *
+ * SACK version:
+ *   on first step, wait until first cumulative ACK arrives, then move to
+ *   the second step. In second step, the next ACK decides.
+ *
  * F-RTO is implemented (mainly) in four functions:
  *   - tcp_use_frto() is used to determine if TCP is can use F-RTO
  *   - tcp_enter_frto() prepares TCP state on RTO if F-RTO is used, it is
@@ -2590,16 +2622,38 @@ static int tcp_process_frto(struct sock *sk, u32 prior_snd_una, int flag)
 		return 1;
 	}
 
-	/* RFC4138 shortcoming in step 2; should also have case c): ACK isn't
-	 * duplicate nor advances window, e.g., opposite dir data, winupdate
-	 */
-	if ((tp->snd_una == prior_snd_una) && (flag&FLAG_NOT_DUP) &&
-	    !(flag&FLAG_FORWARD_PROGRESS))
-		return 1;
+	if (!IsSackFrto() || IsReno(tp)) {
+		/* RFC4138 shortcoming in step 2; should also have case c):
+		 * ACK isn't duplicate nor advances window, e.g., opposite dir
+		 * data, winupdate
+		 */
+		if ((tp->snd_una == prior_snd_una) && (flag&FLAG_NOT_DUP) &&
+		    !(flag&FLAG_FORWARD_PROGRESS))
+			return 1;
 
-	if (!(flag&FLAG_DATA_ACKED)) {
-		tcp_enter_frto_loss(sk, (tp->frto_counter == 1 ? 0 : 3), flag);
-		return 1;
+		if (!(flag&FLAG_DATA_ACKED)) {
+			tcp_enter_frto_loss(sk, (tp->frto_counter == 1 ? 0 : 3),
+					    flag);
+			return 1;
+		}
+	} else {
+		if (!(flag&FLAG_DATA_ACKED) && (tp->frto_counter == 1)) {
+			/* Prevent sending of new data. */
+			tp->snd_cwnd = min(tp->snd_cwnd,
+					   tcp_packets_in_flight(tp));
+			return 1;
+		}
+
+		if ((tp->frto_counter == 2) &&
+		    (!(flag&FLAG_FORWARD_PROGRESS) ||
+		     ((flag&FLAG_DATA_SACKED) && !(flag&FLAG_ONLY_ORIG_SACKED)))) {
+			/* RFC4138 shortcoming (see comment above) */
+			if (!(flag&FLAG_FORWARD_PROGRESS) && (flag&FLAG_NOT_DUP))
+				return 1;
+
+			tcp_enter_frto_loss(sk, 3, flag);
+			return 1;
+		}
 	}
 
 	if (tp->frto_counter == 1) {
-- 
cgit v1.2.3


From c5e7af0df5d7234afd8596560d9f570cfc6c18bf Mon Sep 17 00:00:00 2001
From: Ilpo Järvinen <ilpo.jarvinen@helsinki.fi>
Date: Fri, 23 Feb 2007 16:22:06 -0800
Subject: [TCP]: Correct reordering detection change (no FRTO case)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The reordering detection must work also when FRTO has not been
used at all which was the original intention of mine, just the
expression of the idea was flawed.

Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@helsinki.fi>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp_input.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net/ipv4/tcp_input.c')

diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index bb3f234668b3..f6ba07f0d816 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -1240,7 +1240,7 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_
 	tp->left_out = tp->sacked_out + tp->lost_out;
 
 	if ((reord < tp->fackets_out) && icsk->icsk_ca_state != TCP_CA_Loss &&
-	    (tp->frto_highmark && after(tp->snd_una, tp->frto_highmark)))
+	    (!tp->frto_highmark || after(tp->snd_una, tp->frto_highmark)))
 		tcp_update_reordering(sk, ((tp->fackets_out + 1) - reord), 0);
 
 #if FASTRETRANS_DEBUG > 0
-- 
cgit v1.2.3


From 3cfe3baaf07c9e40a75f9a70662de56df1c246a8 Mon Sep 17 00:00:00 2001
From: Ilpo Järvinen <ilpo.jarvinen@helsinki.fi>
Date: Tue, 27 Feb 2007 10:09:49 -0800
Subject: [TCP]: Add two new spurious RTO responses to FRTO
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

New sysctl tcp_frto_response is added to select amongst these
responses:
	- Rate halving based; reuses CA_CWR state (default)
	- Very conservative; used to be the only one available (=1)
	- Undo cwr; undoes ssthresh and cwnd reductions (=2)

The response with rate halving requires a new parameter to
tcp_enter_cwr because FRTO has already reduced ssthresh and
doing a second reduction there has to be prevented. In addition,
to keep things nice on 80 cols screen, a local variable was
added.

Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@helsinki.fi>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/sysctl.h     |  1 +
 include/net/tcp.h          |  3 ++-
 net/ipv4/sysctl_net_ipv4.c |  8 ++++++++
 net/ipv4/tcp_input.c       | 36 ++++++++++++++++++++++++++++++++----
 net/ipv4/tcp_output.c      |  2 +-
 5 files changed, 44 insertions(+), 6 deletions(-)

(limited to 'net/ipv4/tcp_input.c')

diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h
index 98e0fd241a25..c9ccb550206f 100644
--- a/include/linux/sysctl.h
+++ b/include/linux/sysctl.h
@@ -439,6 +439,7 @@ enum
 	NET_TCP_AVAIL_CONG_CONTROL=122,
 	NET_TCP_ALLOWED_CONG_CONTROL=123,
 	NET_TCP_MAX_SSTHRESH=124,
+	NET_TCP_FRTO_RESPONSE=125,
 };
 
 enum {
diff --git a/include/net/tcp.h b/include/net/tcp.h
index 6d09f5085f6a..f0c9e3400a09 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -220,6 +220,7 @@ extern int sysctl_tcp_app_win;
 extern int sysctl_tcp_adv_win_scale;
 extern int sysctl_tcp_tw_reuse;
 extern int sysctl_tcp_frto;
+extern int sysctl_tcp_frto_response;
 extern int sysctl_tcp_low_latency;
 extern int sysctl_tcp_dma_copybreak;
 extern int sysctl_tcp_nometrics_save;
@@ -738,7 +739,7 @@ static inline void tcp_sync_left_out(struct tcp_sock *tp)
 	tp->left_out = tp->sacked_out + tp->lost_out;
 }
 
-extern void tcp_enter_cwr(struct sock *sk);
+extern void tcp_enter_cwr(struct sock *sk, const int set_ssthresh);
 extern __u32 tcp_init_cwnd(struct tcp_sock *tp, struct dst_entry *dst);
 
 /* Slow start with delack produces 3 packets of burst, so that
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index d68effe98e8d..6817d6485df5 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -646,6 +646,14 @@ ctl_table ipv4_table[] = {
 		.mode		= 0644,
 		.proc_handler	= &proc_dointvec
 	},
+	{
+		.ctl_name	= NET_TCP_FRTO_RESPONSE,
+		.procname	= "tcp_frto_response",
+		.data		= &sysctl_tcp_frto_response,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec
+	},
 	{
 		.ctl_name	= NET_TCP_LOW_LATENCY,
 		.procname	= "tcp_low_latency",
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index f6ba07f0d816..322e43c56461 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -86,6 +86,7 @@ int sysctl_tcp_stdurg __read_mostly;
 int sysctl_tcp_rfc1337 __read_mostly;
 int sysctl_tcp_max_orphans __read_mostly = NR_FILE;
 int sysctl_tcp_frto __read_mostly;
+int sysctl_tcp_frto_response __read_mostly;
 int sysctl_tcp_nometrics_save __read_mostly;
 
 int sysctl_tcp_moderate_rcvbuf __read_mostly = 1;
@@ -762,15 +763,17 @@ __u32 tcp_init_cwnd(struct tcp_sock *tp, struct dst_entry *dst)
 }
 
 /* Set slow start threshold and cwnd not falling to slow start */
-void tcp_enter_cwr(struct sock *sk)
+void tcp_enter_cwr(struct sock *sk, const int set_ssthresh)
 {
 	struct tcp_sock *tp = tcp_sk(sk);
+	const struct inet_connection_sock *icsk = inet_csk(sk);
 
 	tp->prior_ssthresh = 0;
 	tp->bytes_acked = 0;
 	if (inet_csk(sk)->icsk_ca_state < TCP_CA_CWR) {
 		tp->undo_marker = 0;
-		tp->snd_ssthresh = inet_csk(sk)->icsk_ca_ops->ssthresh(sk);
+		if (set_ssthresh)
+			tp->snd_ssthresh = icsk->icsk_ca_ops->ssthresh(sk);
 		tp->snd_cwnd = min(tp->snd_cwnd,
 				   tcp_packets_in_flight(tp) + 1U);
 		tp->snd_cwnd_cnt = 0;
@@ -2003,7 +2006,7 @@ static void tcp_try_to_open(struct sock *sk, struct tcp_sock *tp, int flag)
 		tp->retrans_stamp = 0;
 
 	if (flag&FLAG_ECE)
-		tcp_enter_cwr(sk);
+		tcp_enter_cwr(sk, 1);
 
 	if (inet_csk(sk)->icsk_ca_state != TCP_CA_CWR) {
 		int state = TCP_CA_Open;
@@ -2579,6 +2582,21 @@ static void tcp_conservative_spur_to_response(struct tcp_sock *tp)
 	tcp_moderate_cwnd(tp);
 }
 
+/* A conservative spurious RTO response algorithm: reduce cwnd using
+ * rate halving and continue in congestion avoidance.
+ */
+static void tcp_ratehalving_spur_to_response(struct sock *sk)
+{
+	struct tcp_sock *tp = tcp_sk(sk);
+	tcp_enter_cwr(sk, 0);
+	tp->high_seq = tp->frto_highmark; 	/* Smoother w/o this? - ij */
+}
+
+static void tcp_undo_spur_to_response(struct sock *sk)
+{
+	tcp_undo_cwr(sk, 1);
+}
+
 /* F-RTO spurious RTO detection algorithm (RFC4138)
  *
  * F-RTO affects during two new ACKs following RTO (well, almost, see inline
@@ -2661,7 +2679,17 @@ static int tcp_process_frto(struct sock *sk, u32 prior_snd_una, int flag)
 		tp->frto_counter = 2;
 		return 1;
 	} else /* frto_counter == 2 */ {
-		tcp_conservative_spur_to_response(tp);
+		switch (sysctl_tcp_frto_response) {
+		case 2:
+			tcp_undo_spur_to_response(sk);
+			break;
+		case 1:
+			tcp_conservative_spur_to_response(tp);
+			break;
+		default:
+			tcp_ratehalving_spur_to_response(sk);
+			break;
+		};
 		tp->frto_counter = 0;
 	}
 	return 0;
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 3c24881f2a65..d19b2f3b70fd 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -545,7 +545,7 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
 	if (likely(err <= 0))
 		return err;
 
-	tcp_enter_cwr(sk);
+	tcp_enter_cwr(sk, 1);
 
 	return net_xmit_eval(err);
 
-- 
cgit v1.2.3


From e01f9d7793be82e6c252efbd52c399d3eb65abe4 Mon Sep 17 00:00:00 2001
From: Ilpo Järvinen <ilpo.jarvinen@helsinki.fi>
Date: Fri, 2 Mar 2007 13:27:25 -0800
Subject: [TCP]: Complete icsk-to-local-variable change (in tcp_enter_cwr)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

A local variable for icsk was created but this change was
missing. Spotted by Jarek Poplawski.

Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@helsinki.fi>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp_input.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net/ipv4/tcp_input.c')

diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 322e43c56461..cb715eadf8f5 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -770,7 +770,7 @@ void tcp_enter_cwr(struct sock *sk, const int set_ssthresh)
 
 	tp->prior_ssthresh = 0;
 	tp->bytes_acked = 0;
-	if (inet_csk(sk)->icsk_ca_state < TCP_CA_CWR) {
+	if (icsk->icsk_ca_state < TCP_CA_CWR) {
 		tp->undo_marker = 0;
 		if (set_ssthresh)
 			tp->snd_ssthresh = icsk->icsk_ca_ops->ssthresh(sk);
-- 
cgit v1.2.3


From e317f6f69cb95527799d308a9421b7dc1252989a Mon Sep 17 00:00:00 2001
From: Ilpo Järvinen <ilpo.jarvinen@helsinki.fi>
Date: Fri, 2 Mar 2007 13:34:19 -0800
Subject: [TCP]: FRTO undo response falls back to ratehalving one if ECEd
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Undoing ssthresh is disabled in fastretrans_alert whenever
FLAG_ECE is set by clearing prior_ssthresh. The clearing does
not protect FRTO because FRTO operates before fastretrans_alert.
Moving the clearing of prior_ssthresh earlier seems to be a
suboptimal solution to the FRTO case because then FLAG_ECE will
cause a second ssthresh reduction in try_to_open (the first
occurred when FRTO was entered). So instead, FRTO falls back
immediately to the rate halving response, which switches TCP to
CA_CWR state preventing the latter reduction of ssthresh.

If the first ECE arrived before the ACK after which FRTO is able
to decide RTO as spurious, prior_ssthresh is already cleared.
Thus no undoing for ssthresh occurs. Besides, FLAG_ECE should be
set also in the following ACKs resulting in rate halving response
that sees TCP is already in CA_CWR, which again prevents an extra
ssthresh reduction on that round-trip.

If the first ECE arrived before RTO, ssthresh has already been
adapted and prior_ssthresh remains cleared on entry because TCP
is in CA_CWR (the same applies also to a case where FRTO is
entered more than once and ECE comes in the middle).

High_seq must not be touched after tcp_enter_cwr because CWR
round-trip calculation depends on it.

I believe that after this patch, FRTO should be ECN-safe and
even able to take advantage of synergy benefits.

Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@helsinki.fi>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp_input.c | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

(limited to 'net/ipv4/tcp_input.c')

diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index cb715eadf8f5..d894bbcc1d24 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -2587,14 +2587,15 @@ static void tcp_conservative_spur_to_response(struct tcp_sock *tp)
  */
 static void tcp_ratehalving_spur_to_response(struct sock *sk)
 {
-	struct tcp_sock *tp = tcp_sk(sk);
 	tcp_enter_cwr(sk, 0);
-	tp->high_seq = tp->frto_highmark; 	/* Smoother w/o this? - ij */
 }
 
-static void tcp_undo_spur_to_response(struct sock *sk)
+static void tcp_undo_spur_to_response(struct sock *sk, int flag)
 {
-	tcp_undo_cwr(sk, 1);
+	if (flag&FLAG_ECE)
+		tcp_ratehalving_spur_to_response(sk);
+	else
+		tcp_undo_cwr(sk, 1);
 }
 
 /* F-RTO spurious RTO detection algorithm (RFC4138)
@@ -2681,7 +2682,7 @@ static int tcp_process_frto(struct sock *sk, u32 prior_snd_una, int flag)
 	} else /* frto_counter == 2 */ {
 		switch (sysctl_tcp_frto_response) {
 		case 2:
-			tcp_undo_spur_to_response(sk);
+			tcp_undo_spur_to_response(sk, flag);
 			break;
 		case 1:
 			tcp_conservative_spur_to_response(tp);
-- 
cgit v1.2.3


From 9d729f72dca9406025bcfa9c1f660d71d9ef0ff5 Mon Sep 17 00:00:00 2001
From: James Morris <jmorris@namei.org>
Date: Sun, 4 Mar 2007 16:12:44 -0800
Subject: [NET]: Convert xtime.tv_sec to get_seconds()

Where appropriate, convert references to xtime.tv_sec to the
get_seconds() helper function.

Signed-off-by: James Morris <jmorris@namei.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/tcp.h        |  4 ++--
 net/ipv4/route.c         |  2 +-
 net/ipv4/tcp_input.c     |  6 +++---
 net/ipv4/tcp_ipv4.c      | 10 +++++-----
 net/ipv4/tcp_minisocks.c |  8 ++++----
 net/ipv6/xfrm6_output.c  |  2 +-
 net/rxrpc/main.c         |  2 +-
 net/xfrm/xfrm_policy.c   | 12 ++++++------
 net/xfrm/xfrm_state.c    |  6 +++---
 9 files changed, 26 insertions(+), 26 deletions(-)

(limited to 'net/ipv4/tcp_input.c')

diff --git a/include/net/tcp.h b/include/net/tcp.h
index f0c9e3400a09..181c0600af1c 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -1014,7 +1014,7 @@ static inline int tcp_paws_check(const struct tcp_options_received *rx_opt, int
 {
 	if ((s32)(rx_opt->rcv_tsval - rx_opt->ts_recent) >= 0)
 		return 0;
-	if (xtime.tv_sec >= rx_opt->ts_recent_stamp + TCP_PAWS_24DAYS)
+	if (get_seconds() >= rx_opt->ts_recent_stamp + TCP_PAWS_24DAYS)
 		return 0;
 
 	/* RST segments are not recommended to carry timestamp,
@@ -1029,7 +1029,7 @@ static inline int tcp_paws_check(const struct tcp_options_received *rx_opt, int
 
 	   However, we can relax time bounds for RST segments to MSL.
 	 */
-	if (rst && xtime.tv_sec >= rx_opt->ts_recent_stamp + TCP_PAWS_MSL)
+	if (rst && get_seconds() >= rx_opt->ts_recent_stamp + TCP_PAWS_MSL)
 		return 0;
 	return 1;
 }
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 37e0d4d5cf94..0b3d7bf40f4e 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -2683,7 +2683,7 @@ static int rt_fill_info(struct sk_buff *skb, u32 pid, u32 seq, int event,
 		id = rt->peer->ip_id_count;
 		if (rt->peer->tcp_ts_stamp) {
 			ts = rt->peer->tcp_ts;
-			tsage = xtime.tv_sec - rt->peer->tcp_ts_stamp;
+			tsage = get_seconds() - rt->peer->tcp_ts_stamp;
 		}
 	}
 
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index d894bbcc1d24..d0a3630f41a7 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -2933,7 +2933,7 @@ static int tcp_fast_parse_options(struct sk_buff *skb, struct tcphdr *th,
 static inline void tcp_store_ts_recent(struct tcp_sock *tp)
 {
 	tp->rx_opt.ts_recent = tp->rx_opt.rcv_tsval;
-	tp->rx_opt.ts_recent_stamp = xtime.tv_sec;
+	tp->rx_opt.ts_recent_stamp = get_seconds();
 }
 
 static inline void tcp_replace_ts_recent(struct tcp_sock *tp, u32 seq)
@@ -2947,7 +2947,7 @@ static inline void tcp_replace_ts_recent(struct tcp_sock *tp, u32 seq)
 		 */
 
 		if((s32)(tp->rx_opt.rcv_tsval - tp->rx_opt.ts_recent) >= 0 ||
-		   xtime.tv_sec >= tp->rx_opt.ts_recent_stamp + TCP_PAWS_24DAYS)
+		   get_seconds() >= tp->rx_opt.ts_recent_stamp + TCP_PAWS_24DAYS)
 			tcp_store_ts_recent(tp);
 	}
 }
@@ -2999,7 +2999,7 @@ static inline int tcp_paws_discard(const struct sock *sk, const struct sk_buff *
 {
 	const struct tcp_sock *tp = tcp_sk(sk);
 	return ((s32)(tp->rx_opt.ts_recent - tp->rx_opt.rcv_tsval) > TCP_PAWS_WINDOW &&
-		xtime.tv_sec < tp->rx_opt.ts_recent_stamp + TCP_PAWS_24DAYS &&
+		get_seconds() < tp->rx_opt.ts_recent_stamp + TCP_PAWS_24DAYS &&
 		!tcp_disordered_ack(sk, skb));
 }
 
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index f6793b4cc669..addac1110f94 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -149,7 +149,7 @@ int tcp_twsk_unique(struct sock *sk, struct sock *sktw, void *twp)
 	 */
 	if (tcptw->tw_ts_recent_stamp &&
 	    (twp == NULL || (sysctl_tcp_tw_reuse &&
-			     xtime.tv_sec - tcptw->tw_ts_recent_stamp > 1))) {
+			     get_seconds() - tcptw->tw_ts_recent_stamp > 1))) {
 		tp->write_seq = tcptw->tw_snd_nxt + 65535 + 2;
 		if (tp->write_seq == 0)
 			tp->write_seq = 1;
@@ -224,7 +224,7 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
 		 * when trying new connection.
 		 */
 		if (peer != NULL &&
-		    peer->tcp_ts_stamp + TCP_PAWS_MSL >= xtime.tv_sec) {
+		    peer->tcp_ts_stamp + TCP_PAWS_MSL >= get_seconds()) {
 			tp->rx_opt.ts_recent_stamp = peer->tcp_ts_stamp;
 			tp->rx_opt.ts_recent = peer->tcp_ts;
 		}
@@ -1351,7 +1351,7 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
 		    (dst = inet_csk_route_req(sk, req)) != NULL &&
 		    (peer = rt_get_peer((struct rtable *)dst)) != NULL &&
 		    peer->v4daddr == saddr) {
-			if (xtime.tv_sec < peer->tcp_ts_stamp + TCP_PAWS_MSL &&
+			if (get_seconds() < peer->tcp_ts_stamp + TCP_PAWS_MSL &&
 			    (s32)(peer->tcp_ts - req->ts_recent) >
 							TCP_PAWS_WINDOW) {
 				NET_INC_STATS_BH(LINUX_MIB_PAWSPASSIVEREJECTED);
@@ -1770,7 +1770,7 @@ int tcp_v4_remember_stamp(struct sock *sk)
 
 	if (peer) {
 		if ((s32)(peer->tcp_ts - tp->rx_opt.ts_recent) <= 0 ||
-		    (peer->tcp_ts_stamp + TCP_PAWS_MSL < xtime.tv_sec &&
+		    (peer->tcp_ts_stamp + TCP_PAWS_MSL < get_seconds() &&
 		     peer->tcp_ts_stamp <= tp->rx_opt.ts_recent_stamp)) {
 			peer->tcp_ts_stamp = tp->rx_opt.ts_recent_stamp;
 			peer->tcp_ts = tp->rx_opt.ts_recent;
@@ -1791,7 +1791,7 @@ int tcp_v4_tw_remember_stamp(struct inet_timewait_sock *tw)
 		const struct tcp_timewait_sock *tcptw = tcp_twsk((struct sock *)tw);
 
 		if ((s32)(peer->tcp_ts - tcptw->tw_ts_recent) <= 0 ||
-		    (peer->tcp_ts_stamp + TCP_PAWS_MSL < xtime.tv_sec &&
+		    (peer->tcp_ts_stamp + TCP_PAWS_MSL < get_seconds() &&
 		     peer->tcp_ts_stamp <= tcptw->tw_ts_recent_stamp)) {
 			peer->tcp_ts_stamp = tcptw->tw_ts_recent_stamp;
 			peer->tcp_ts	   = tcptw->tw_ts_recent;
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index 706932726a11..ac4ce48a6599 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -149,7 +149,7 @@ kill_with_rst:
 		tw->tw_substate	  = TCP_TIME_WAIT;
 		tcptw->tw_rcv_nxt = TCP_SKB_CB(skb)->end_seq;
 		if (tmp_opt.saw_tstamp) {
-			tcptw->tw_ts_recent_stamp = xtime.tv_sec;
+			tcptw->tw_ts_recent_stamp = get_seconds();
 			tcptw->tw_ts_recent	  = tmp_opt.rcv_tsval;
 		}
 
@@ -208,7 +208,7 @@ kill:
 
 		if (tmp_opt.saw_tstamp) {
 			tcptw->tw_ts_recent	  = tmp_opt.rcv_tsval;
-			tcptw->tw_ts_recent_stamp = xtime.tv_sec;
+			tcptw->tw_ts_recent_stamp = get_seconds();
 		}
 
 		inet_twsk_put(tw);
@@ -458,7 +458,7 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct request_sock *req,
 
 		if (newtp->rx_opt.tstamp_ok) {
 			newtp->rx_opt.ts_recent = req->ts_recent;
-			newtp->rx_opt.ts_recent_stamp = xtime.tv_sec;
+			newtp->rx_opt.ts_recent_stamp = get_seconds();
 			newtp->tcp_header_len = sizeof(struct tcphdr) + TCPOLEN_TSTAMP_ALIGNED;
 		} else {
 			newtp->rx_opt.ts_recent_stamp = 0;
@@ -504,7 +504,7 @@ struct sock *tcp_check_req(struct sock *sk,struct sk_buff *skb,
 			 * it can be estimated (approximately)
 			 * from another data.
 			 */
-			tmp_opt.ts_recent_stamp = xtime.tv_sec - ((TCP_TIMEOUT_INIT/HZ)<<req->retrans);
+			tmp_opt.ts_recent_stamp = get_seconds() - ((TCP_TIMEOUT_INIT/HZ)<<req->retrans);
 			paws_reject = tcp_paws_check(&tmp_opt, th->rst);
 		}
 	}
diff --git a/net/ipv6/xfrm6_output.c b/net/ipv6/xfrm6_output.c
index d6d786b89d2b..8e4170f9a0da 100644
--- a/net/ipv6/xfrm6_output.c
+++ b/net/ipv6/xfrm6_output.c
@@ -76,7 +76,7 @@ static int xfrm6_output_one(struct sk_buff *skb)
 		x->curlft.bytes += skb->len;
 		x->curlft.packets++;
 		if (x->props.mode == XFRM_MODE_ROUTEOPTIMIZATION)
-			x->lastused = (u64)xtime.tv_sec;
+			x->lastused = get_seconds();
 
 		spin_unlock_bh(&x->lock);
 
diff --git a/net/rxrpc/main.c b/net/rxrpc/main.c
index baec1f7fd8b9..cead31b5bdf5 100644
--- a/net/rxrpc/main.c
+++ b/net/rxrpc/main.c
@@ -37,7 +37,7 @@ static int __init rxrpc_initialise(void)
 	int ret;
 
 	/* my epoch value */
-	rxrpc_epoch = htonl(xtime.tv_sec);
+	rxrpc_epoch = htonl(get_seconds());
 
 	/* register the /proc interface */
 #ifdef CONFIG_PROC_FS
diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index 785c3e39f062..194257554553 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -268,7 +268,7 @@ static inline unsigned long make_jiffies(long secs)
 static void xfrm_policy_timer(unsigned long data)
 {
 	struct xfrm_policy *xp = (struct xfrm_policy*)data;
-	unsigned long now = (unsigned long)xtime.tv_sec;
+	unsigned long now = get_seconds();
 	long next = LONG_MAX;
 	int warn = 0;
 	int dir;
@@ -690,7 +690,7 @@ int xfrm_policy_insert(int dir, struct xfrm_policy *policy, int excl)
 	}
 	policy->index = delpol ? delpol->index : xfrm_gen_index(policy->type, dir);
 	hlist_add_head(&policy->byidx, xfrm_policy_byidx+idx_hash(policy->index));
-	policy->curlft.add_time = (unsigned long)xtime.tv_sec;
+	policy->curlft.add_time = get_seconds();
 	policy->curlft.use_time = 0;
 	if (!mod_timer(&policy->timer, jiffies + HZ))
 		xfrm_pol_hold(policy);
@@ -1133,7 +1133,7 @@ int xfrm_sk_policy_insert(struct sock *sk, int dir, struct xfrm_policy *pol)
 	old_pol = sk->sk_policy[dir];
 	sk->sk_policy[dir] = pol;
 	if (pol) {
-		pol->curlft.add_time = (unsigned long)xtime.tv_sec;
+		pol->curlft.add_time = get_seconds();
 		pol->index = xfrm_gen_index(pol->type, XFRM_POLICY_MAX+dir);
 		__xfrm_policy_link(pol, XFRM_POLICY_MAX+dir);
 	}
@@ -1386,7 +1386,7 @@ restart:
 		return 0;
 
 	family = dst_orig->ops->family;
-	policy->curlft.use_time = (unsigned long)xtime.tv_sec;
+	policy->curlft.use_time = get_seconds();
 	pols[0] = policy;
 	npols ++;
 	xfrm_nr += pols[0]->xfrm_nr;
@@ -1682,7 +1682,7 @@ int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb,
 		return 1;
 	}
 
-	pol->curlft.use_time = (unsigned long)xtime.tv_sec;
+	pol->curlft.use_time = get_seconds();
 
 	pols[0] = pol;
 	npols ++;
@@ -1694,7 +1694,7 @@ int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb,
 		if (pols[1]) {
 			if (IS_ERR(pols[1]))
 				return 0;
-			pols[1]->curlft.use_time = (unsigned long)xtime.tv_sec;
+			pols[1]->curlft.use_time = get_seconds();
 			npols ++;
 		}
 	}
diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c
index e3a0bcfa5df1..63a20e818164 100644
--- a/net/xfrm/xfrm_state.c
+++ b/net/xfrm/xfrm_state.c
@@ -233,7 +233,7 @@ static inline unsigned long make_jiffies(long secs)
 static void xfrm_timer_handler(unsigned long data)
 {
 	struct xfrm_state *x = (struct xfrm_state*)data;
-	unsigned long now = (unsigned long)xtime.tv_sec;
+	unsigned long now = get_seconds();
 	long next = LONG_MAX;
 	int warn = 0;
 	int err = 0;
@@ -326,7 +326,7 @@ struct xfrm_state *xfrm_state_alloc(void)
 		init_timer(&x->rtimer);
 		x->rtimer.function = xfrm_replay_timer_handler;
 		x->rtimer.data     = (unsigned long)x;
-		x->curlft.add_time = (unsigned long)xtime.tv_sec;
+		x->curlft.add_time = get_seconds();
 		x->lft.soft_byte_limit = XFRM_INF;
 		x->lft.soft_packet_limit = XFRM_INF;
 		x->lft.hard_byte_limit = XFRM_INF;
@@ -1051,7 +1051,7 @@ EXPORT_SYMBOL(xfrm_state_update);
 int xfrm_state_check_expire(struct xfrm_state *x)
 {
 	if (!x->curlft.use_time)
-		x->curlft.use_time = (unsigned long)xtime.tv_sec;
+		x->curlft.use_time = get_seconds();
 
 	if (x->km.state != XFRM_STATE_VALID)
 		return -EINVAL;
-- 
cgit v1.2.3


From fe067e8ab5e0dc5ca3c54634924c628da92090b4 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@sunset.davemloft.net>
Date: Wed, 7 Mar 2007 12:12:44 -0800
Subject: [TCP]: Abstract out all write queue operations.

This allows the write queue implementation to be changed,
for example, to one which allows fast interval searching.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/sock.h    |  21 ----------
 include/net/tcp.h     | 114 ++++++++++++++++++++++++++++++++++++++++++++++++++
 net/ipv4/tcp.c        |  32 +++++++-------
 net/ipv4/tcp_input.c  |  64 ++++++++++++++++++----------
 net/ipv4/tcp_ipv4.c   |   2 +-
 net/ipv4/tcp_output.c |  95 +++++++++++++++++++----------------------
 net/ipv4/tcp_timer.c  |  10 ++---
 7 files changed, 221 insertions(+), 117 deletions(-)

(limited to 'net/ipv4/tcp_input.c')

diff --git a/include/net/sock.h b/include/net/sock.h
index 9583639090d2..2974bacc8850 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -710,15 +710,6 @@ static inline void sk_stream_mem_reclaim(struct sock *sk)
 		__sk_stream_mem_reclaim(sk);
 }
 
-static inline void sk_stream_writequeue_purge(struct sock *sk)
-{
-	struct sk_buff *skb;
-
-	while ((skb = __skb_dequeue(&sk->sk_write_queue)) != NULL)
-		sk_stream_free_skb(sk, skb);
-	sk_stream_mem_reclaim(sk);
-}
-
 static inline int sk_stream_rmem_schedule(struct sock *sk, struct sk_buff *skb)
 {
 	return (int)skb->truesize <= sk->sk_forward_alloc ||
@@ -1256,18 +1247,6 @@ static inline struct page *sk_stream_alloc_page(struct sock *sk)
 	return page;
 }
 
-#define sk_stream_for_retrans_queue(skb, sk)				\
-		for (skb = (sk)->sk_write_queue.next;			\
-		     (skb != (sk)->sk_send_head) &&			\
-		     (skb != (struct sk_buff *)&(sk)->sk_write_queue);	\
-		     skb = skb->next)
-
-/*from STCP for fast SACK Process*/
-#define sk_stream_for_retrans_queue_from(skb, sk)			\
-		for (; (skb != (sk)->sk_send_head) &&                   \
-		     (skb != (struct sk_buff *)&(sk)->sk_write_queue);	\
-		     skb = skb->next)
-
 /*
  *	Default write policy as shown to user space via poll/select/SIGIO
  */
diff --git a/include/net/tcp.h b/include/net/tcp.h
index 181c0600af1c..6dacc352dcf1 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -1162,6 +1162,120 @@ static inline void		tcp_put_md5sig_pool(void)
 	put_cpu();
 }
 
+/* write queue abstraction */
+static inline void tcp_write_queue_purge(struct sock *sk)
+{
+	struct sk_buff *skb;
+
+	while ((skb = __skb_dequeue(&sk->sk_write_queue)) != NULL)
+		sk_stream_free_skb(sk, skb);
+	sk_stream_mem_reclaim(sk);
+}
+
+static inline struct sk_buff *tcp_write_queue_head(struct sock *sk)
+{
+	struct sk_buff *skb = sk->sk_write_queue.next;
+	if (skb == (struct sk_buff *) &sk->sk_write_queue)
+		return NULL;
+	return skb;
+}
+
+static inline struct sk_buff *tcp_write_queue_tail(struct sock *sk)
+{
+	struct sk_buff *skb = sk->sk_write_queue.prev;
+	if (skb == (struct sk_buff *) &sk->sk_write_queue)
+		return NULL;
+	return skb;
+}
+
+static inline struct sk_buff *tcp_write_queue_next(struct sock *sk, struct sk_buff *skb)
+{
+	return skb->next;
+}
+
+#define tcp_for_write_queue(skb, sk)					\
+		for (skb = (sk)->sk_write_queue.next;			\
+		     (skb != (struct sk_buff *)&(sk)->sk_write_queue);	\
+		     skb = skb->next)
+
+#define tcp_for_write_queue_from(skb, sk)				\
+		for (; (skb != (struct sk_buff *)&(sk)->sk_write_queue);\
+		     skb = skb->next)
+
+static inline struct sk_buff *tcp_send_head(struct sock *sk)
+{
+	return sk->sk_send_head;
+}
+
+static inline void tcp_advance_send_head(struct sock *sk, struct sk_buff *skb)
+{
+	sk->sk_send_head = skb->next;
+	if (sk->sk_send_head == (struct sk_buff *)&sk->sk_write_queue)
+		sk->sk_send_head = NULL;
+}
+
+static inline void tcp_check_send_head(struct sock *sk, struct sk_buff *skb_unlinked)
+{
+	if (sk->sk_send_head == skb_unlinked)
+		sk->sk_send_head = NULL;
+}
+
+static inline void tcp_init_send_head(struct sock *sk)
+{
+	sk->sk_send_head = NULL;
+}
+
+static inline void __tcp_add_write_queue_tail(struct sock *sk, struct sk_buff *skb)
+{
+	__skb_queue_tail(&sk->sk_write_queue, skb);
+}
+
+static inline void tcp_add_write_queue_tail(struct sock *sk, struct sk_buff *skb)
+{
+	__tcp_add_write_queue_tail(sk, skb);
+
+	/* Queue it, remembering where we must start sending. */
+	if (sk->sk_send_head == NULL)
+		sk->sk_send_head = skb;
+}
+
+static inline void __tcp_add_write_queue_head(struct sock *sk, struct sk_buff *skb)
+{
+	__skb_queue_head(&sk->sk_write_queue, skb);
+}
+
+/* Insert buff after skb on the write queue of sk.  */
+static inline void tcp_insert_write_queue_after(struct sk_buff *skb,
+						struct sk_buff *buff,
+						struct sock *sk)
+{
+	__skb_append(skb, buff, &sk->sk_write_queue);
+}
+
+/* Insert skb between prev and next on the write queue of sk.  */
+static inline void tcp_insert_write_queue_before(struct sk_buff *new,
+						  struct sk_buff *skb,
+						  struct sock *sk)
+{
+	__skb_insert(new, skb->prev, skb, &sk->sk_write_queue);
+}
+
+static inline void tcp_unlink_write_queue(struct sk_buff *skb, struct sock *sk)
+{
+	__skb_unlink(skb, &sk->sk_write_queue);
+}
+
+static inline int tcp_skb_is_last(const struct sock *sk,
+				  const struct sk_buff *skb)
+{
+	return skb->next == (struct sk_buff *)&sk->sk_write_queue;
+}
+
+static inline int tcp_write_queue_empty(struct sock *sk)
+{
+	return skb_queue_empty(&sk->sk_write_queue);
+}
+
 /* /proc */
 enum tcp_seq_states {
 	TCP_SEQ_STATE_LISTENING,
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 3834b10b5115..689f9330f1b9 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -470,10 +470,8 @@ static inline void skb_entail(struct sock *sk, struct tcp_sock *tp,
 	tcb->flags   = TCPCB_FLAG_ACK;
 	tcb->sacked  = 0;
 	skb_header_release(skb);
-	__skb_queue_tail(&sk->sk_write_queue, skb);
+	tcp_add_write_queue_tail(sk, skb);
 	sk_charge_skb(sk, skb);
-	if (!sk->sk_send_head)
-		sk->sk_send_head = skb;
 	if (tp->nonagle & TCP_NAGLE_PUSH)
 		tp->nonagle &= ~TCP_NAGLE_PUSH;
 }
@@ -491,8 +489,8 @@ static inline void tcp_mark_urg(struct tcp_sock *tp, int flags,
 static inline void tcp_push(struct sock *sk, struct tcp_sock *tp, int flags,
 			    int mss_now, int nonagle)
 {
-	if (sk->sk_send_head) {
-		struct sk_buff *skb = sk->sk_write_queue.prev;
+	if (tcp_send_head(sk)) {
+		struct sk_buff *skb = tcp_write_queue_tail(sk);
 		if (!(flags & MSG_MORE) || forced_push(tp))
 			tcp_mark_push(tp, skb);
 		tcp_mark_urg(tp, flags, skb);
@@ -526,13 +524,13 @@ static ssize_t do_tcp_sendpages(struct sock *sk, struct page **pages, int poffse
 		goto do_error;
 
 	while (psize > 0) {
-		struct sk_buff *skb = sk->sk_write_queue.prev;
+		struct sk_buff *skb = tcp_write_queue_tail(sk);
 		struct page *page = pages[poffset / PAGE_SIZE];
 		int copy, i, can_coalesce;
 		int offset = poffset % PAGE_SIZE;
 		int size = min_t(size_t, psize, PAGE_SIZE - offset);
 
-		if (!sk->sk_send_head || (copy = size_goal - skb->len) <= 0) {
+		if (!tcp_send_head(sk) || (copy = size_goal - skb->len) <= 0) {
 new_segment:
 			if (!sk_stream_memory_free(sk))
 				goto wait_for_sndbuf;
@@ -589,7 +587,7 @@ new_segment:
 		if (forced_push(tp)) {
 			tcp_mark_push(tp, skb);
 			__tcp_push_pending_frames(sk, tp, mss_now, TCP_NAGLE_PUSH);
-		} else if (skb == sk->sk_send_head)
+		} else if (skb == tcp_send_head(sk))
 			tcp_push_one(sk, mss_now);
 		continue;
 
@@ -704,9 +702,9 @@ int tcp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
 		while (seglen > 0) {
 			int copy;
 
-			skb = sk->sk_write_queue.prev;
+			skb = tcp_write_queue_tail(sk);
 
-			if (!sk->sk_send_head ||
+			if (!tcp_send_head(sk) ||
 			    (copy = size_goal - skb->len) <= 0) {
 
 new_segment:
@@ -833,7 +831,7 @@ new_segment:
 			if (forced_push(tp)) {
 				tcp_mark_push(tp, skb);
 				__tcp_push_pending_frames(sk, tp, mss_now, TCP_NAGLE_PUSH);
-			} else if (skb == sk->sk_send_head)
+			} else if (skb == tcp_send_head(sk))
 				tcp_push_one(sk, mss_now);
 			continue;
 
@@ -860,9 +858,11 @@ out:
 
 do_fault:
 	if (!skb->len) {
-		if (sk->sk_send_head == skb)
-			sk->sk_send_head = NULL;
-		__skb_unlink(skb, &sk->sk_write_queue);
+		tcp_unlink_write_queue(skb, sk);
+		/* It is the one place in all of TCP, except connection
+		 * reset, where we can be unlinking the send_head.
+		 */
+		tcp_check_send_head(sk, skb);
 		sk_stream_free_skb(sk, skb);
 	}
 
@@ -1732,7 +1732,7 @@ int tcp_disconnect(struct sock *sk, int flags)
 
 	tcp_clear_xmit_timers(sk);
 	__skb_queue_purge(&sk->sk_receive_queue);
-	sk_stream_writequeue_purge(sk);
+	tcp_write_queue_purge(sk);
 	__skb_queue_purge(&tp->out_of_order_queue);
 #ifdef CONFIG_NET_DMA
 	__skb_queue_purge(&sk->sk_async_wait_queue);
@@ -1758,7 +1758,7 @@ int tcp_disconnect(struct sock *sk, int flags)
 	tcp_set_ca_state(sk, TCP_CA_Open);
 	tcp_clear_retrans(tp);
 	inet_csk_delack_init(sk);
-	sk->sk_send_head = NULL;
+	tcp_init_send_head(sk);
 	tp->rx_opt.saw_tstamp = 0;
 	tcp_sack_reset(&tp->rx_opt);
 	__sk_dst_reset(sk);
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index d0a3630f41a7..22d0bb03c5da 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -1044,7 +1044,7 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_
 	cached_skb = tp->fastpath_skb_hint;
 	cached_fack_count = tp->fastpath_cnt_hint;
 	if (!cached_skb) {
-		cached_skb = sk->sk_write_queue.next;
+		cached_skb = tcp_write_queue_head(sk);
 		cached_fack_count = 0;
 	}
 
@@ -1061,10 +1061,13 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_
 		if (after(end_seq, tp->high_seq))
 			flag |= FLAG_DATA_LOST;
 
-		sk_stream_for_retrans_queue_from(skb, sk) {
+		tcp_for_write_queue_from(skb, sk) {
 			int in_sack, pcount;
 			u8 sacked;
 
+			if (skb == tcp_send_head(sk))
+				break;
+
 			cached_skb = skb;
 			cached_fack_count = fack_count;
 			if (i == first_sack_index) {
@@ -1213,7 +1216,9 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_
 	if (lost_retrans && icsk->icsk_ca_state == TCP_CA_Recovery) {
 		struct sk_buff *skb;
 
-		sk_stream_for_retrans_queue(skb, sk) {
+		tcp_for_write_queue(skb, sk) {
+			if (skb == tcp_send_head(sk))
+				break;
 			if (after(TCP_SKB_CB(skb)->seq, lost_retrans))
 				break;
 			if (!after(TCP_SKB_CB(skb)->end_seq, tp->snd_una))
@@ -1266,8 +1271,8 @@ int tcp_use_frto(struct sock *sk)
 	const struct tcp_sock *tp = tcp_sk(sk);
 	struct sk_buff *skb;
 
-	if (!sysctl_tcp_frto || !sk->sk_send_head ||
-		after(TCP_SKB_CB(sk->sk_send_head)->end_seq,
+	if (!sysctl_tcp_frto || !tcp_send_head(sk) ||
+		after(TCP_SKB_CB(tcp_send_head(sk))->end_seq,
 		      tp->snd_una + tp->snd_wnd))
 		return 0;
 
@@ -1278,8 +1283,11 @@ int tcp_use_frto(struct sock *sk)
 	if (tp->retrans_out > 1)
 		return 0;
 
-	skb = skb_peek(&sk->sk_write_queue)->next;	/* Skips head */
-	sk_stream_for_retrans_queue_from(skb, sk) {
+	skb = tcp_write_queue_head(sk);
+	skb = tcp_write_queue_next(sk, skb);	/* Skips head */
+	tcp_for_write_queue_from(skb, sk) {
+		if (skb == tcp_send_head(sk))
+			break;
 		if (TCP_SKB_CB(skb)->sacked&TCPCB_RETRANS)
 			return 0;
 		/* Short-circuit when first non-SACKed skb has been checked */
@@ -1343,7 +1351,7 @@ void tcp_enter_frto(struct sock *sk)
 	tp->undo_marker = tp->snd_una;
 	tp->undo_retrans = 0;
 
-	skb = skb_peek(&sk->sk_write_queue);
+	skb = tcp_write_queue_head(sk);
 	if (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_RETRANS) {
 		TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_RETRANS;
 		tp->retrans_out -= tcp_skb_pcount(skb);
@@ -1380,7 +1388,9 @@ static void tcp_enter_frto_loss(struct sock *sk, int allowed_segments, int flag)
 	tp->fackets_out = 0;
 	tp->retrans_out = 0;
 
-	sk_stream_for_retrans_queue(skb, sk) {
+	tcp_for_write_queue(skb, sk) {
+		if (skb == tcp_send_head(sk))
+			break;
 		cnt += tcp_skb_pcount(skb);
 		/*
 		 * Count the retransmission made on RTO correctly (only when
@@ -1468,7 +1478,9 @@ void tcp_enter_loss(struct sock *sk, int how)
 	if (!how)
 		tp->undo_marker = tp->snd_una;
 
-	sk_stream_for_retrans_queue(skb, sk) {
+	tcp_for_write_queue(skb, sk) {
+		if (skb == tcp_send_head(sk))
+			break;
 		cnt += tcp_skb_pcount(skb);
 		if (TCP_SKB_CB(skb)->sacked&TCPCB_RETRANS)
 			tp->undo_marker = 0;
@@ -1503,14 +1515,14 @@ static int tcp_check_sack_reneging(struct sock *sk)
 	 * receiver _host_ is heavily congested (or buggy).
 	 * Do processing similar to RTO timeout.
 	 */
-	if ((skb = skb_peek(&sk->sk_write_queue)) != NULL &&
+	if ((skb = tcp_write_queue_head(sk)) != NULL &&
 	    (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED)) {
 		struct inet_connection_sock *icsk = inet_csk(sk);
 		NET_INC_STATS_BH(LINUX_MIB_TCPSACKRENEGING);
 
 		tcp_enter_loss(sk, 1);
 		icsk->icsk_retransmits++;
-		tcp_retransmit_skb(sk, skb_peek(&sk->sk_write_queue));
+		tcp_retransmit_skb(sk, tcp_write_queue_head(sk));
 		inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
 					  icsk->icsk_rto, TCP_RTO_MAX);
 		return 1;
@@ -1531,7 +1543,7 @@ static inline int tcp_skb_timedout(struct sock *sk, struct sk_buff *skb)
 static inline int tcp_head_timedout(struct sock *sk, struct tcp_sock *tp)
 {
 	return tp->packets_out &&
-	       tcp_skb_timedout(sk, skb_peek(&sk->sk_write_queue));
+	       tcp_skb_timedout(sk, tcp_write_queue_head(sk));
 }
 
 /* Linux NewReno/SACK/FACK/ECN state machine.
@@ -1726,11 +1738,13 @@ static void tcp_mark_head_lost(struct sock *sk, struct tcp_sock *tp,
 		skb = tp->lost_skb_hint;
 		cnt = tp->lost_cnt_hint;
 	} else {
-		skb = sk->sk_write_queue.next;
+		skb = tcp_write_queue_head(sk);
 		cnt = 0;
 	}
 
-	sk_stream_for_retrans_queue_from(skb, sk) {
+	tcp_for_write_queue_from(skb, sk) {
+		if (skb == tcp_send_head(sk))
+			break;
 		/* TODO: do this better */
 		/* this is not the most efficient way to do this... */
 		tp->lost_skb_hint = skb;
@@ -1777,9 +1791,11 @@ static void tcp_update_scoreboard(struct sock *sk, struct tcp_sock *tp)
 		struct sk_buff *skb;
 
 		skb = tp->scoreboard_skb_hint ? tp->scoreboard_skb_hint
-			: sk->sk_write_queue.next;
+			: tcp_write_queue_head(sk);
 
-		sk_stream_for_retrans_queue_from(skb, sk) {
+		tcp_for_write_queue_from(skb, sk) {
+			if (skb == tcp_send_head(sk))
+				break;
 			if (!tcp_skb_timedout(sk, skb))
 				break;
 
@@ -1970,7 +1986,9 @@ static int tcp_try_undo_loss(struct sock *sk, struct tcp_sock *tp)
 {
 	if (tcp_may_undo(tp)) {
 		struct sk_buff *skb;
-		sk_stream_for_retrans_queue(skb, sk) {
+		tcp_for_write_queue(skb, sk) {
+			if (skb == tcp_send_head(sk))
+				break;
 			TCP_SKB_CB(skb)->sacked &= ~TCPCB_LOST;
 		}
 
@@ -2382,8 +2400,8 @@ static int tcp_clean_rtx_queue(struct sock *sk, __s32 *seq_rtt_p)
 		= icsk->icsk_ca_ops->rtt_sample;
 	struct timeval tv = { .tv_sec = 0, .tv_usec = 0 };
 
-	while ((skb = skb_peek(&sk->sk_write_queue)) &&
-	       skb != sk->sk_send_head) {
+	while ((skb = tcp_write_queue_head(sk)) &&
+	       skb != tcp_send_head(sk)) {
 		struct tcp_skb_cb *scb = TCP_SKB_CB(skb);
 		__u8 sacked = scb->sacked;
 
@@ -2446,7 +2464,7 @@ static int tcp_clean_rtx_queue(struct sock *sk, __s32 *seq_rtt_p)
 		}
 		tcp_dec_pcount_approx(&tp->fackets_out, skb);
 		tcp_packets_out_dec(tp, skb);
-		__skb_unlink(skb, &sk->sk_write_queue);
+		tcp_unlink_write_queue(skb, sk);
 		sk_stream_free_skb(sk, skb);
 		clear_all_retrans_hints(tp);
 	}
@@ -2495,7 +2513,7 @@ static void tcp_ack_probe(struct sock *sk)
 
 	/* Was it a usable window open? */
 
-	if (!after(TCP_SKB_CB(sk->sk_send_head)->end_seq,
+	if (!after(TCP_SKB_CB(tcp_send_head(sk))->end_seq,
 		   tp->snd_una + tp->snd_wnd)) {
 		icsk->icsk_backoff = 0;
 		inet_csk_clear_xmit_timer(sk, ICSK_TIME_PROBE0);
@@ -2795,7 +2813,7 @@ no_queue:
 	 * being used to time the probes, and is probably far higher than
 	 * it needs to be for normal retransmission.
 	 */
-	if (sk->sk_send_head)
+	if (tcp_send_head(sk))
 		tcp_ack_probe(sk);
 	return 1;
 
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index addac1110f94..3326681b8429 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -1890,7 +1890,7 @@ int tcp_v4_destroy_sock(struct sock *sk)
 	tcp_cleanup_congestion_control(sk);
 
 	/* Cleanup up the write buffer. */
-	sk_stream_writequeue_purge(sk);
+	tcp_write_queue_purge(sk);
 
 	/* Cleans up our, hopefully empty, out_of_order_queue. */
 	__skb_queue_purge(&tp->out_of_order_queue);
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index d19b2f3b70fd..2a62b55b15f1 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -65,9 +65,7 @@ int sysctl_tcp_slow_start_after_idle __read_mostly = 1;
 static void update_send_head(struct sock *sk, struct tcp_sock *tp,
 			     struct sk_buff *skb)
 {
-	sk->sk_send_head = skb->next;
-	if (sk->sk_send_head == (struct sk_buff *)&sk->sk_write_queue)
-		sk->sk_send_head = NULL;
+	tcp_advance_send_head(sk, skb);
 	tp->snd_nxt = TCP_SKB_CB(skb)->end_seq;
 	tcp_packets_out_inc(sk, tp, skb);
 }
@@ -567,12 +565,8 @@ static void tcp_queue_skb(struct sock *sk, struct sk_buff *skb)
 	/* Advance write_seq and place onto the write_queue. */
 	tp->write_seq = TCP_SKB_CB(skb)->end_seq;
 	skb_header_release(skb);
-	__skb_queue_tail(&sk->sk_write_queue, skb);
+	tcp_add_write_queue_tail(sk, skb);
 	sk_charge_skb(sk, skb);
-
-	/* Queue it, remembering where we must start sending. */
-	if (sk->sk_send_head == NULL)
-		sk->sk_send_head = skb;
 }
 
 static void tcp_set_skb_tso_segs(struct sock *sk, struct sk_buff *skb, unsigned int mss_now)
@@ -705,7 +699,7 @@ int tcp_fragment(struct sock *sk, struct sk_buff *skb, u32 len, unsigned int mss
 
 	/* Link BUFF into the send queue. */
 	skb_header_release(buff);
-	__skb_append(skb, buff, &sk->sk_write_queue);
+	tcp_insert_write_queue_after(skb, buff, sk);
 
 	return 0;
 }
@@ -1056,7 +1050,7 @@ static inline int tcp_snd_wnd_test(struct tcp_sock *tp, struct sk_buff *skb, uns
 	return !after(end_seq, tp->snd_una + tp->snd_wnd);
 }
 
-/* This checks if the data bearing packet SKB (usually sk->sk_send_head)
+/* This checks if the data bearing packet SKB (usually tcp_send_head(sk))
  * should be put on the wire right now.  If so, it returns the number of
  * packets allowed by the congestion window.
  */
@@ -1079,15 +1073,9 @@ static unsigned int tcp_snd_test(struct sock *sk, struct sk_buff *skb,
 	return cwnd_quota;
 }
 
-static inline int tcp_skb_is_last(const struct sock *sk,
-				  const struct sk_buff *skb)
-{
-	return skb->next == (struct sk_buff *)&sk->sk_write_queue;
-}
-
 int tcp_may_send_now(struct sock *sk, struct tcp_sock *tp)
 {
-	struct sk_buff *skb = sk->sk_send_head;
+	struct sk_buff *skb = tcp_send_head(sk);
 
 	return (skb &&
 		tcp_snd_test(sk, skb, tcp_current_mss(sk, 1),
@@ -1143,7 +1131,7 @@ static int tso_fragment(struct sock *sk, struct sk_buff *skb, unsigned int len,
 
 	/* Link BUFF into the send queue. */
 	skb_header_release(buff);
-	__skb_append(skb, buff, &sk->sk_write_queue);
+	tcp_insert_write_queue_after(skb, buff, sk);
 
 	return 0;
 }
@@ -1249,10 +1237,10 @@ static int tcp_mtu_probe(struct sock *sk)
 
 	/* Have enough data in the send queue to probe? */
 	len = 0;
-	if ((skb = sk->sk_send_head) == NULL)
+	if ((skb = tcp_send_head(sk)) == NULL)
 		return -1;
 	while ((len += skb->len) < probe_size && !tcp_skb_is_last(sk, skb))
-		skb = skb->next;
+		skb = tcp_write_queue_next(sk, skb);
 	if (len < probe_size)
 		return -1;
 
@@ -1279,9 +1267,9 @@ static int tcp_mtu_probe(struct sock *sk)
 		return -1;
 	sk_charge_skb(sk, nskb);
 
-	skb = sk->sk_send_head;
-	__skb_insert(nskb, skb->prev, skb, &sk->sk_write_queue);
-	sk->sk_send_head = nskb;
+	skb = tcp_send_head(sk);
+	tcp_insert_write_queue_before(nskb, skb, sk);
+	tcp_advance_send_head(sk, skb);
 
 	TCP_SKB_CB(nskb)->seq = TCP_SKB_CB(skb)->seq;
 	TCP_SKB_CB(nskb)->end_seq = TCP_SKB_CB(skb)->seq + probe_size;
@@ -1292,7 +1280,7 @@ static int tcp_mtu_probe(struct sock *sk)
 
 	len = 0;
 	while (len < probe_size) {
-		next = skb->next;
+		next = tcp_write_queue_next(sk, skb);
 
 		copy = min_t(int, skb->len, probe_size - len);
 		if (nskb->ip_summed)
@@ -1305,7 +1293,7 @@ static int tcp_mtu_probe(struct sock *sk)
 			/* We've eaten all the data from this skb.
 			 * Throw it away. */
 			TCP_SKB_CB(nskb)->flags |= TCP_SKB_CB(skb)->flags;
-			__skb_unlink(skb, &sk->sk_write_queue);
+			tcp_unlink_write_queue(skb, sk);
 			sk_stream_free_skb(sk, skb);
 		} else {
 			TCP_SKB_CB(nskb)->flags |= TCP_SKB_CB(skb)->flags &
@@ -1377,7 +1365,7 @@ static int tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle)
 		sent_pkts = 1;
 	}
 
-	while ((skb = sk->sk_send_head)) {
+	while ((skb = tcp_send_head(sk))) {
 		unsigned int limit;
 
 		tso_segs = tcp_init_tso_segs(sk, skb, mss_now);
@@ -1435,7 +1423,7 @@ static int tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle)
 		tcp_cwnd_validate(sk, tp);
 		return 0;
 	}
-	return !tp->packets_out && sk->sk_send_head;
+	return !tp->packets_out && tcp_send_head(sk);
 }
 
 /* Push out any pending frames which were held back due to
@@ -1445,7 +1433,7 @@ static int tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle)
 void __tcp_push_pending_frames(struct sock *sk, struct tcp_sock *tp,
 			       unsigned int cur_mss, int nonagle)
 {
-	struct sk_buff *skb = sk->sk_send_head;
+	struct sk_buff *skb = tcp_send_head(sk);
 
 	if (skb) {
 		if (tcp_write_xmit(sk, cur_mss, nonagle))
@@ -1459,7 +1447,7 @@ void __tcp_push_pending_frames(struct sock *sk, struct tcp_sock *tp,
 void tcp_push_one(struct sock *sk, unsigned int mss_now)
 {
 	struct tcp_sock *tp = tcp_sk(sk);
-	struct sk_buff *skb = sk->sk_send_head;
+	struct sk_buff *skb = tcp_send_head(sk);
 	unsigned int tso_segs, cwnd_quota;
 
 	BUG_ON(!skb || skb->len < mss_now);
@@ -1620,7 +1608,7 @@ u32 __tcp_select_window(struct sock *sk)
 static void tcp_retrans_try_collapse(struct sock *sk, struct sk_buff *skb, int mss_now)
 {
 	struct tcp_sock *tp = tcp_sk(sk);
-	struct sk_buff *next_skb = skb->next;
+	struct sk_buff *next_skb = tcp_write_queue_next(sk, skb);
 
 	/* The first test we must make is that neither of these two
 	 * SKB's are still referenced by someone else.
@@ -1652,7 +1640,7 @@ static void tcp_retrans_try_collapse(struct sock *sk, struct sk_buff *skb, int m
 		clear_all_retrans_hints(tp);
 
 		/* Ok.	We will be able to collapse the packet. */
-		__skb_unlink(next_skb, &sk->sk_write_queue);
+		tcp_unlink_write_queue(next_skb, sk);
 
 		memcpy(skb_put(skb, next_skb_size), next_skb->data, next_skb_size);
 
@@ -1706,7 +1694,9 @@ void tcp_simple_retransmit(struct sock *sk)
 	unsigned int mss = tcp_current_mss(sk, 0);
 	int lost = 0;
 
-	sk_stream_for_retrans_queue(skb, sk) {
+	tcp_for_write_queue(skb, sk) {
+		if (skb == tcp_send_head(sk))
+			break;
 		if (skb->len > mss &&
 		    !(TCP_SKB_CB(skb)->sacked&TCPCB_SACKED_ACKED)) {
 			if (TCP_SKB_CB(skb)->sacked&TCPCB_SACKED_RETRANS) {
@@ -1790,10 +1780,10 @@ int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb)
 	/* Collapse two adjacent packets if worthwhile and we can. */
 	if(!(TCP_SKB_CB(skb)->flags & TCPCB_FLAG_SYN) &&
 	   (skb->len < (cur_mss >> 1)) &&
-	   (skb->next != sk->sk_send_head) &&
-	   (skb->next != (struct sk_buff *)&sk->sk_write_queue) &&
-	   (skb_shinfo(skb)->nr_frags == 0 && skb_shinfo(skb->next)->nr_frags == 0) &&
-	   (tcp_skb_pcount(skb) == 1 && tcp_skb_pcount(skb->next) == 1) &&
+	   (tcp_write_queue_next(sk, skb) != tcp_send_head(sk)) &&
+	   (!tcp_skb_is_last(sk, skb)) &&
+	   (skb_shinfo(skb)->nr_frags == 0 && skb_shinfo(tcp_write_queue_next(sk, skb))->nr_frags == 0) &&
+	   (tcp_skb_pcount(skb) == 1 && tcp_skb_pcount(tcp_write_queue_next(sk, skb)) == 1) &&
 	   (sysctl_tcp_retrans_collapse != 0))
 		tcp_retrans_try_collapse(sk, skb, cur_mss);
 
@@ -1872,15 +1862,17 @@ void tcp_xmit_retransmit_queue(struct sock *sk)
 		skb = tp->retransmit_skb_hint;
 		packet_cnt = tp->retransmit_cnt_hint;
 	}else{
-		skb = sk->sk_write_queue.next;
+		skb = tcp_write_queue_head(sk);
 		packet_cnt = 0;
 	}
 
 	/* First pass: retransmit lost packets. */
 	if (tp->lost_out) {
-		sk_stream_for_retrans_queue_from(skb, sk) {
+		tcp_for_write_queue_from(skb, sk) {
 			__u8 sacked = TCP_SKB_CB(skb)->sacked;
 
+			if (skb == tcp_send_head(sk))
+				break;
 			/* we could do better than to assign each time */
 			tp->retransmit_skb_hint = skb;
 			tp->retransmit_cnt_hint = packet_cnt;
@@ -1906,8 +1898,7 @@ void tcp_xmit_retransmit_queue(struct sock *sk)
 					else
 						NET_INC_STATS_BH(LINUX_MIB_TCPSLOWSTARTRETRANS);
 
-					if (skb ==
-					    skb_peek(&sk->sk_write_queue))
+					if (skb == tcp_write_queue_head(sk))
 						inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
 									  inet_csk(sk)->icsk_rto,
 									  TCP_RTO_MAX);
@@ -1944,11 +1935,13 @@ void tcp_xmit_retransmit_queue(struct sock *sk)
 		skb = tp->forward_skb_hint;
 		packet_cnt = tp->forward_cnt_hint;
 	} else{
-		skb = sk->sk_write_queue.next;
+		skb = tcp_write_queue_head(sk);
 		packet_cnt = 0;
 	}
 
-	sk_stream_for_retrans_queue_from(skb, sk) {
+	tcp_for_write_queue_from(skb, sk) {
+		if (skb == tcp_send_head(sk))
+			break;
 		tp->forward_cnt_hint = packet_cnt;
 		tp->forward_skb_hint = skb;
 
@@ -1973,7 +1966,7 @@ void tcp_xmit_retransmit_queue(struct sock *sk)
 			break;
 		}
 
-		if (skb == skb_peek(&sk->sk_write_queue))
+		if (skb == tcp_write_queue_head(sk))
 			inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
 						  inet_csk(sk)->icsk_rto,
 						  TCP_RTO_MAX);
@@ -1989,7 +1982,7 @@ void tcp_xmit_retransmit_queue(struct sock *sk)
 void tcp_send_fin(struct sock *sk)
 {
 	struct tcp_sock *tp = tcp_sk(sk);
-	struct sk_buff *skb = skb_peek_tail(&sk->sk_write_queue);
+	struct sk_buff *skb = tcp_write_queue_tail(sk);
 	int mss_now;
 
 	/* Optimization, tack on the FIN if we have a queue of
@@ -1998,7 +1991,7 @@ void tcp_send_fin(struct sock *sk)
 	 */
 	mss_now = tcp_current_mss(sk, 1);
 
-	if (sk->sk_send_head != NULL) {
+	if (tcp_send_head(sk) != NULL) {
 		TCP_SKB_CB(skb)->flags |= TCPCB_FLAG_FIN;
 		TCP_SKB_CB(skb)->end_seq++;
 		tp->write_seq++;
@@ -2071,7 +2064,7 @@ int tcp_send_synack(struct sock *sk)
 {
 	struct sk_buff* skb;
 
-	skb = skb_peek(&sk->sk_write_queue);
+	skb = tcp_write_queue_head(sk);
 	if (skb == NULL || !(TCP_SKB_CB(skb)->flags&TCPCB_FLAG_SYN)) {
 		printk(KERN_DEBUG "tcp_send_synack: wrong queue state\n");
 		return -EFAULT;
@@ -2081,9 +2074,9 @@ int tcp_send_synack(struct sock *sk)
 			struct sk_buff *nskb = skb_copy(skb, GFP_ATOMIC);
 			if (nskb == NULL)
 				return -ENOMEM;
-			__skb_unlink(skb, &sk->sk_write_queue);
+			tcp_unlink_write_queue(skb, sk);
 			skb_header_release(nskb);
-			__skb_queue_head(&sk->sk_write_queue, nskb);
+			__tcp_add_write_queue_head(sk, nskb);
 			sk_stream_free_skb(sk, skb);
 			sk_charge_skb(sk, nskb);
 			skb = nskb;
@@ -2285,7 +2278,7 @@ int tcp_connect(struct sock *sk)
 	TCP_SKB_CB(buff)->when = tcp_time_stamp;
 	tp->retrans_stamp = TCP_SKB_CB(buff)->when;
 	skb_header_release(buff);
-	__skb_queue_tail(&sk->sk_write_queue, buff);
+	__tcp_add_write_queue_tail(sk, buff);
 	sk_charge_skb(sk, buff);
 	tp->packets_out += tcp_skb_pcount(buff);
 	tcp_transmit_skb(sk, buff, 1, GFP_KERNEL);
@@ -2441,7 +2434,7 @@ int tcp_write_wakeup(struct sock *sk)
 		struct tcp_sock *tp = tcp_sk(sk);
 		struct sk_buff *skb;
 
-		if ((skb = sk->sk_send_head) != NULL &&
+		if ((skb = tcp_send_head(sk)) != NULL &&
 		    before(TCP_SKB_CB(skb)->seq, tp->snd_una+tp->snd_wnd)) {
 			int err;
 			unsigned int mss = tcp_current_mss(sk, 0);
@@ -2491,7 +2484,7 @@ void tcp_send_probe0(struct sock *sk)
 
 	err = tcp_write_wakeup(sk);
 
-	if (tp->packets_out || !sk->sk_send_head) {
+	if (tp->packets_out || !tcp_send_head(sk)) {
 		/* Cancel probe timer, if it is not required. */
 		icsk->icsk_probes_out = 0;
 		icsk->icsk_backoff = 0;
diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c
index a9243cfc1bea..2ca97b20929d 100644
--- a/net/ipv4/tcp_timer.c
+++ b/net/ipv4/tcp_timer.c
@@ -233,7 +233,7 @@ static void tcp_probe_timer(struct sock *sk)
 	struct tcp_sock *tp = tcp_sk(sk);
 	int max_probes;
 
-	if (tp->packets_out || !sk->sk_send_head) {
+	if (tp->packets_out || !tcp_send_head(sk)) {
 		icsk->icsk_probes_out = 0;
 		return;
 	}
@@ -284,7 +284,7 @@ static void tcp_retransmit_timer(struct sock *sk)
 	if (!tp->packets_out)
 		goto out;
 
-	BUG_TRAP(!skb_queue_empty(&sk->sk_write_queue));
+	BUG_TRAP(!tcp_write_queue_empty(sk));
 
 	if (!tp->snd_wnd && !sock_flag(sk, SOCK_DEAD) &&
 	    !((1 << sk->sk_state) & (TCPF_SYN_SENT | TCPF_SYN_RECV))) {
@@ -306,7 +306,7 @@ static void tcp_retransmit_timer(struct sock *sk)
 			goto out;
 		}
 		tcp_enter_loss(sk, 0);
-		tcp_retransmit_skb(sk, skb_peek(&sk->sk_write_queue));
+		tcp_retransmit_skb(sk, tcp_write_queue_head(sk));
 		__sk_dst_reset(sk);
 		goto out_reset_timer;
 	}
@@ -341,7 +341,7 @@ static void tcp_retransmit_timer(struct sock *sk)
 		tcp_enter_loss(sk, 0);
 	}
 
-	if (tcp_retransmit_skb(sk, skb_peek(&sk->sk_write_queue)) > 0) {
+	if (tcp_retransmit_skb(sk, tcp_write_queue_head(sk)) > 0) {
 		/* Retransmission failed because of local congestion,
 		 * do not backoff.
 		 */
@@ -482,7 +482,7 @@ static void tcp_keepalive_timer (unsigned long data)
 	elapsed = keepalive_time_when(tp);
 
 	/* It is alive without keepalive 8) */
-	if (tp->packets_out || sk->sk_send_head)
+	if (tp->packets_out || tcp_send_head(sk))
 		goto resched;
 
 	elapsed = tcp_time_stamp - tp->rcv_tstamp;
-- 
cgit v1.2.3


From 2de979bd7da9c8b39cc0aabb0ab5aa1516d929eb Mon Sep 17 00:00:00 2001
From: Stephen Hemminger <shemminger@linux-foundation.org>
Date: Thu, 8 Mar 2007 20:45:19 -0800
Subject: [TCP]: whitespace cleanup

Add whitespace around keywords.

Signed-off-by: Stephen Hemminger <shemminger@linux-foundation.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp_hybla.c     |  2 +-
 net/ipv4/tcp_input.c     | 57 ++++++++++++++++++++++++------------------------
 net/ipv4/tcp_minisocks.c |  6 ++---
 net/ipv4/tcp_output.c    | 34 +++++++++++++++--------------
 net/ipv4/tcp_westwood.c  |  2 +-
 5 files changed, 51 insertions(+), 50 deletions(-)

(limited to 'net/ipv4/tcp_input.c')

diff --git a/net/ipv4/tcp_hybla.c b/net/ipv4/tcp_hybla.c
index 59e691d26f64..e5be35117223 100644
--- a/net/ipv4/tcp_hybla.c
+++ b/net/ipv4/tcp_hybla.c
@@ -144,7 +144,7 @@ static void hybla_cong_avoid(struct sock *sk, u32 ack, u32 rtt,
 	ca->snd_cwnd_cents += odd;
 
 	/* check when fractions goes >=128 and increase cwnd by 1. */
-	while(ca->snd_cwnd_cents >= 128) {
+	while (ca->snd_cwnd_cents >= 128) {
 		tp->snd_cwnd++;
 		ca->snd_cwnd_cents -= 128;
 		tp->snd_cwnd_cnt = 0;
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 22d0bb03c5da..fb0256085948 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -578,7 +578,7 @@ static void tcp_rtt_estimator(struct sock *sk, const __u32 mrtt)
 	 * does not matter how to _calculate_ it. Seems, it was trap
 	 * that VJ failed to avoid. 8)
 	 */
-	if(m == 0)
+	if (m == 0)
 		m = 1;
 	if (tp->srtt != 0) {
 		m -= (tp->srtt >> 3);	/* m is now error in rtt est */
@@ -1758,12 +1758,11 @@ static void tcp_mark_head_lost(struct sock *sk, struct tcp_sock *tp,
 
 			/* clear xmit_retransmit_queue hints
 			 *  if this is beyond hint */
-			if(tp->retransmit_skb_hint != NULL &&
-			   before(TCP_SKB_CB(skb)->seq,
-				  TCP_SKB_CB(tp->retransmit_skb_hint)->seq)) {
-
+			if (tp->retransmit_skb_hint != NULL &&
+			    before(TCP_SKB_CB(skb)->seq,
+				   TCP_SKB_CB(tp->retransmit_skb_hint)->seq))
 				tp->retransmit_skb_hint = NULL;
-			}
+
 		}
 	}
 	tcp_sync_left_out(tp);
@@ -2441,7 +2440,7 @@ static int tcp_clean_rtx_queue(struct sock *sk, __s32 *seq_rtt_p)
 
 		if (sacked) {
 			if (sacked & TCPCB_RETRANS) {
-				if(sacked & TCPCB_SACKED_RETRANS)
+				if (sacked & TCPCB_SACKED_RETRANS)
 					tp->retrans_out -= tcp_skb_pcount(skb);
 				acked |= FLAG_RETRANS_DATA_ACKED;
 				seq_rtt = -1;
@@ -2840,7 +2839,7 @@ void tcp_parse_options(struct sk_buff *skb, struct tcp_options_received *opt_rx,
 	ptr = (unsigned char *)(th + 1);
 	opt_rx->saw_tstamp = 0;
 
-	while(length>0) {
+	while (length > 0) {
 		int opcode=*ptr++;
 		int opsize;
 
@@ -2856,9 +2855,9 @@ void tcp_parse_options(struct sk_buff *skb, struct tcp_options_received *opt_rx,
 					return;
 				if (opsize > length)
 					return;	/* don't parse partial options */
-				switch(opcode) {
+				switch (opcode) {
 				case TCPOPT_MSS:
-					if(opsize==TCPOLEN_MSS && th->syn && !estab) {
+					if (opsize==TCPOLEN_MSS && th->syn && !estab) {
 						u16 in_mss = ntohs(get_unaligned((__be16 *)ptr));
 						if (in_mss) {
 							if (opt_rx->user_mss && opt_rx->user_mss < in_mss)
@@ -2868,12 +2867,12 @@ void tcp_parse_options(struct sk_buff *skb, struct tcp_options_received *opt_rx,
 					}
 					break;
 				case TCPOPT_WINDOW:
-					if(opsize==TCPOLEN_WINDOW && th->syn && !estab)
+					if (opsize==TCPOLEN_WINDOW && th->syn && !estab)
 						if (sysctl_tcp_window_scaling) {
 							__u8 snd_wscale = *(__u8 *) ptr;
 							opt_rx->wscale_ok = 1;
 							if (snd_wscale > 14) {
-								if(net_ratelimit())
+								if (net_ratelimit())
 									printk(KERN_INFO "tcp_parse_options: Illegal window "
 									       "scaling value %d >14 received.\n",
 									       snd_wscale);
@@ -2883,7 +2882,7 @@ void tcp_parse_options(struct sk_buff *skb, struct tcp_options_received *opt_rx,
 						}
 					break;
 				case TCPOPT_TIMESTAMP:
-					if(opsize==TCPOLEN_TIMESTAMP) {
+					if (opsize==TCPOLEN_TIMESTAMP) {
 						if ((estab && opt_rx->tstamp_ok) ||
 						    (!estab && sysctl_tcp_timestamps)) {
 							opt_rx->saw_tstamp = 1;
@@ -2893,7 +2892,7 @@ void tcp_parse_options(struct sk_buff *skb, struct tcp_options_received *opt_rx,
 					}
 					break;
 				case TCPOPT_SACK_PERM:
-					if(opsize==TCPOLEN_SACK_PERM && th->syn && !estab) {
+					if (opsize==TCPOLEN_SACK_PERM && th->syn && !estab) {
 						if (sysctl_tcp_sack) {
 							opt_rx->sack_ok = 1;
 							tcp_sack_reset(opt_rx);
@@ -2902,7 +2901,7 @@ void tcp_parse_options(struct sk_buff *skb, struct tcp_options_received *opt_rx,
 					break;
 
 				case TCPOPT_SACK:
-					if((opsize >= (TCPOLEN_SACK_BASE + TCPOLEN_SACK_PERBLOCK)) &&
+					if ((opsize >= (TCPOLEN_SACK_BASE + TCPOLEN_SACK_PERBLOCK)) &&
 					   !((opsize - TCPOLEN_SACK_BASE) % TCPOLEN_SACK_PERBLOCK) &&
 					   opt_rx->sack_ok) {
 						TCP_SKB_CB(skb)->sacked = (ptr - 2) - (unsigned char *)th;
@@ -2964,7 +2963,7 @@ static inline void tcp_replace_ts_recent(struct tcp_sock *tp, u32 seq)
 		 * Not only, also it occurs for expired timestamps.
 		 */
 
-		if((s32)(tp->rx_opt.rcv_tsval - tp->rx_opt.ts_recent) >= 0 ||
+		if ((s32)(tp->rx_opt.rcv_tsval - tp->rx_opt.ts_recent) >= 0 ||
 		   get_seconds() >= tp->rx_opt.ts_recent_stamp + TCP_PAWS_24DAYS)
 			tcp_store_ts_recent(tp);
 	}
@@ -3223,7 +3222,7 @@ static void tcp_sack_maybe_coalesce(struct tcp_sock *tp)
 			 */
 			tp->rx_opt.num_sacks--;
 			tp->rx_opt.eff_sacks = min(tp->rx_opt.num_sacks + tp->rx_opt.dsack, 4 - tp->rx_opt.tstamp_ok);
-			for(i=this_sack; i < tp->rx_opt.num_sacks; i++)
+			for (i=this_sack; i < tp->rx_opt.num_sacks; i++)
 				sp[i] = sp[i+1];
 			continue;
 		}
@@ -3276,7 +3275,7 @@ static void tcp_sack_new_ofo_skb(struct sock *sk, u32 seq, u32 end_seq)
 		tp->rx_opt.num_sacks--;
 		sp--;
 	}
-	for(; this_sack > 0; this_sack--, sp--)
+	for (; this_sack > 0; this_sack--, sp--)
 		*sp = *(sp-1);
 
 new_sack:
@@ -3302,7 +3301,7 @@ static void tcp_sack_remove(struct tcp_sock *tp)
 		return;
 	}
 
-	for(this_sack = 0; this_sack < num_sacks; ) {
+	for (this_sack = 0; this_sack < num_sacks; ) {
 		/* Check if the start of the sack is covered by RCV.NXT. */
 		if (!before(tp->rcv_nxt, sp->start_seq)) {
 			int i;
@@ -3358,7 +3357,7 @@ static void tcp_ofo_queue(struct sock *sk)
 		__skb_unlink(skb, &tp->out_of_order_queue);
 		__skb_queue_tail(&sk->sk_receive_queue, skb);
 		tp->rcv_nxt = TCP_SKB_CB(skb)->end_seq;
-		if(skb->h.th->fin)
+		if (skb->h.th->fin)
 			tcp_fin(skb, sk, skb->h.th);
 	}
 }
@@ -3424,9 +3423,9 @@ queue_and_out:
 			__skb_queue_tail(&sk->sk_receive_queue, skb);
 		}
 		tp->rcv_nxt = TCP_SKB_CB(skb)->end_seq;
-		if(skb->len)
+		if (skb->len)
 			tcp_event_data_recv(sk, tp, skb);
-		if(th->fin)
+		if (th->fin)
 			tcp_fin(skb, sk, th);
 
 		if (!skb_queue_empty(&tp->out_of_order_queue)) {
@@ -4323,7 +4322,7 @@ slow_path:
 		goto discard;
 	}
 
-	if(th->rst) {
+	if (th->rst) {
 		tcp_reset(sk);
 		goto discard;
 	}
@@ -4338,7 +4337,7 @@ slow_path:
 	}
 
 step5:
-	if(th->ack)
+	if (th->ack)
 		tcp_ack(sk, skb, FLAG_SLOWPATH);
 
 	tcp_rcv_rtt_measure_ts(sk, skb);
@@ -4626,13 +4625,13 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
 		goto discard;
 
 	case TCP_LISTEN:
-		if(th->ack)
+		if (th->ack)
 			return 1;
 
-		if(th->rst)
+		if (th->rst)
 			goto discard;
 
-		if(th->syn) {
+		if (th->syn) {
 			if (icsk->icsk_af_ops->conn_request(sk, skb) < 0)
 				return 1;
 
@@ -4688,7 +4687,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
 	}
 
 	/* step 2: check RST bit */
-	if(th->rst) {
+	if (th->rst) {
 		tcp_reset(sk);
 		goto discard;
 	}
@@ -4711,7 +4710,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
 	if (th->ack) {
 		int acceptable = tcp_ack(sk, skb, FLAG_SLOWPATH);
 
-		switch(sk->sk_state) {
+		switch (sk->sk_state) {
 		case TCP_SYN_RECV:
 			if (acceptable) {
 				tp->copied_seq = tp->rcv_nxt;
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index ac4ce48a6599..463d2b24d2db 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -246,7 +246,7 @@ kill:
 	if (paws_reject)
 		NET_INC_STATS_BH(LINUX_MIB_PAWSESTABREJECTED);
 
-	if(!th->rst) {
+	if (!th->rst) {
 		/* In this case we must reset the TIMEWAIT timer.
 		 *
 		 * If it is ACKless SYN it may be both old duplicate
@@ -324,7 +324,7 @@ void tcp_time_wait(struct sock *sk, int state, int timeo)
 				if (tcp_alloc_md5sig_pool() == NULL)
 					BUG();
 			}
-		} while(0);
+		} while (0);
 #endif
 
 		/* Linkage updates. */
@@ -438,7 +438,7 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct request_sock *req,
 						       keepalive_time_when(newtp));
 
 		newtp->rx_opt.tstamp_ok = ireq->tstamp_ok;
-		if((newtp->rx_opt.sack_ok = ireq->sack_ok) != 0) {
+		if ((newtp->rx_opt.sack_ok = ireq->sack_ok) != 0) {
 			if (sysctl_tcp_fack)
 				newtp->rx_opt.sack_ok |= 2;
 		}
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 2a62b55b15f1..f19f5fb361b5 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -236,7 +236,7 @@ static u16 tcp_select_window(struct sock *sk)
 	u32 new_win = __tcp_select_window(sk);
 
 	/* Never shrink the offered window */
-	if(new_win < cur_win) {
+	if (new_win < cur_win) {
 		/* Danger Will Robinson!
 		 * Don't update rcv_wup/rcv_wnd here or else
 		 * we will not be able to advertise a zero
@@ -287,10 +287,12 @@ static void tcp_build_and_update_options(__be32 *ptr, struct tcp_sock *tp,
 			       (TCPOPT_SACK <<  8) |
 			       (TCPOLEN_SACK_BASE + (tp->rx_opt.eff_sacks *
 						     TCPOLEN_SACK_PERBLOCK)));
-		for(this_sack = 0; this_sack < tp->rx_opt.eff_sacks; this_sack++) {
+
+		for (this_sack = 0; this_sack < tp->rx_opt.eff_sacks; this_sack++) {
 			*ptr++ = htonl(sp[this_sack].start_seq);
 			*ptr++ = htonl(sp[this_sack].end_seq);
 		}
+
 		if (tp->rx_opt.dsack) {
 			tp->rx_opt.dsack = 0;
 			tp->rx_opt.eff_sacks--;
@@ -335,7 +337,7 @@ static void tcp_syn_build_options(__be32 *ptr, int mss, int ts, int sack,
 	 */
 	*ptr++ = htonl((TCPOPT_MSS << 24) | (TCPOLEN_MSS << 16) | mss);
 	if (ts) {
-		if(sack)
+		if (sack)
 			*ptr++ = htonl((TCPOPT_SACK_PERM << 24) |
 				       (TCPOLEN_SACK_PERM << 16) |
 				       (TCPOPT_TIMESTAMP << 8) |
@@ -347,7 +349,7 @@ static void tcp_syn_build_options(__be32 *ptr, int mss, int ts, int sack,
 				       TCPOLEN_TIMESTAMP);
 		*ptr++ = htonl(tstamp);		/* TSVAL */
 		*ptr++ = htonl(ts_recent);	/* TSECR */
-	} else if(sack)
+	} else if (sack)
 		*ptr++ = htonl((TCPOPT_NOP << 24) |
 			       (TCPOPT_NOP << 16) |
 			       (TCPOPT_SACK_PERM << 8) |
@@ -428,7 +430,7 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
 	sysctl_flags = 0;
 	if (unlikely(tcb->flags & TCPCB_FLAG_SYN)) {
 		tcp_header_size = sizeof(struct tcphdr) + TCPOLEN_MSS;
-		if(sysctl_tcp_timestamps) {
+		if (sysctl_tcp_timestamps) {
 			tcp_header_size += TCPOLEN_TSTAMP_ALIGNED;
 			sysctl_flags |= SYSCTL_FLAG_TSTAMPS;
 		}
@@ -1618,7 +1620,7 @@ static void tcp_retrans_try_collapse(struct sock *sk, struct sk_buff *skb, int m
 		u16 flags = TCP_SKB_CB(skb)->flags;
 
 		/* Also punt if next skb has been SACK'd. */
-		if(TCP_SKB_CB(next_skb)->sacked & TCPCB_SACKED_ACKED)
+		if (TCP_SKB_CB(next_skb)->sacked & TCPCB_SACKED_ACKED)
 			return;
 
 		/* Next skb is out of window. */
@@ -1778,13 +1780,13 @@ int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb)
 	}
 
 	/* Collapse two adjacent packets if worthwhile and we can. */
-	if(!(TCP_SKB_CB(skb)->flags & TCPCB_FLAG_SYN) &&
-	   (skb->len < (cur_mss >> 1)) &&
-	   (tcp_write_queue_next(sk, skb) != tcp_send_head(sk)) &&
-	   (!tcp_skb_is_last(sk, skb)) &&
-	   (skb_shinfo(skb)->nr_frags == 0 && skb_shinfo(tcp_write_queue_next(sk, skb))->nr_frags == 0) &&
-	   (tcp_skb_pcount(skb) == 1 && tcp_skb_pcount(tcp_write_queue_next(sk, skb)) == 1) &&
-	   (sysctl_tcp_retrans_collapse != 0))
+	if (!(TCP_SKB_CB(skb)->flags & TCPCB_FLAG_SYN) &&
+	    (skb->len < (cur_mss >> 1)) &&
+	    (tcp_write_queue_next(sk, skb) != tcp_send_head(sk)) &&
+	    (!tcp_skb_is_last(sk, skb)) &&
+	    (skb_shinfo(skb)->nr_frags == 0 && skb_shinfo(tcp_write_queue_next(sk, skb))->nr_frags == 0) &&
+	    (tcp_skb_pcount(skb) == 1 && tcp_skb_pcount(tcp_write_queue_next(sk, skb)) == 1) &&
+	    (sysctl_tcp_retrans_collapse != 0))
 		tcp_retrans_try_collapse(sk, skb, cur_mss);
 
 	if (inet_csk(sk)->icsk_af_ops->rebuild_header(sk))
@@ -1794,9 +1796,9 @@ int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb)
 	 * retransmit when old data is attached.  So strip it off
 	 * since it is cheap to do so and saves bytes on the network.
 	 */
-	if(skb->len > 0 &&
-	   (TCP_SKB_CB(skb)->flags & TCPCB_FLAG_FIN) &&
-	   tp->snd_una == (TCP_SKB_CB(skb)->end_seq - 1)) {
+	if (skb->len > 0 &&
+	    (TCP_SKB_CB(skb)->flags & TCPCB_FLAG_FIN) &&
+	    tp->snd_una == (TCP_SKB_CB(skb)->end_seq - 1)) {
 		if (!pskb_trim(skb, 0)) {
 			TCP_SKB_CB(skb)->seq = TCP_SKB_CB(skb)->end_seq - 1;
 			skb_shinfo(skb)->gso_segs = 1;
diff --git a/net/ipv4/tcp_westwood.c b/net/ipv4/tcp_westwood.c
index 4e1b61032a9c..1f91aeae10af 100644
--- a/net/ipv4/tcp_westwood.c
+++ b/net/ipv4/tcp_westwood.c
@@ -226,7 +226,7 @@ static void tcp_westwood_event(struct sock *sk, enum tcp_ca_event event)
 	struct tcp_sock *tp = tcp_sk(sk);
 	struct westwood *w = inet_csk_ca(sk);
 
-	switch(event) {
+	switch (event) {
 	case CA_EVENT_FAST_ACK:
 		westwood_fast_bw(sk);
 		break;
-- 
cgit v1.2.3


From c51957dafa6f960c5c6372aa3da6c8fa71c13730 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Sat, 10 Mar 2007 12:47:22 -0300
Subject: [TCP]: Do the layer header setting in tcp_collapse relative to
 skb->data

That is equal to skb->head before skb_reserve, to help in the layer header
changes.

Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp_input.c | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

(limited to 'net/ipv4/tcp_input.c')

diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index fb0256085948..e5d1c2c8cea7 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -3632,11 +3632,13 @@ tcp_collapse(struct sock *sk, struct sk_buff_head *list,
 		nskb = alloc_skb(copy+header, GFP_ATOMIC);
 		if (!nskb)
 			return;
+
+		nskb->mac.raw = nskb->data + (skb->mac.raw - skb->head);
+		nskb->nh.raw = nskb->data + (skb->nh.raw - skb->head);
+		nskb->h.raw = nskb->data + (skb->h.raw - skb->head);
+
 		skb_reserve(nskb, header);
 		memcpy(nskb->head, skb->head, header);
-		nskb->nh.raw = nskb->head + (skb->nh.raw-skb->head);
-		nskb->h.raw = nskb->head + (skb->h.raw-skb->head);
-		nskb->mac.raw = nskb->head + (skb->mac.raw-skb->head);
 		memcpy(nskb->cb, skb->cb, sizeof(skb->cb));
 		TCP_SKB_CB(nskb)->seq = TCP_SKB_CB(nskb)->end_seq = start;
 		__skb_insert(nskb, skb->prev, skb, list);
-- 
cgit v1.2.3


From 31713c333ddbb66d694829082620b69b71c4b09a Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Sat, 10 Mar 2007 12:48:37 -0300
Subject: [TCP]: Use skb_set_mac_header in tcp_collapse

Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp_input.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net/ipv4/tcp_input.c')

diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index e5d1c2c8cea7..1ec05bd673a7 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -3633,7 +3633,7 @@ tcp_collapse(struct sock *sk, struct sk_buff_head *list,
 		if (!nskb)
 			return;
 
-		nskb->mac.raw = nskb->data + (skb->mac.raw - skb->head);
+		skb_set_mac_header(nskb, skb->mac.raw - skb->head);
 		nskb->nh.raw = nskb->data + (skb->nh.raw - skb->head);
 		nskb->h.raw = nskb->data + (skb->h.raw - skb->head);
 
-- 
cgit v1.2.3


From 98e399f82ab3a6d863d1d4a7ea48925cc91c830e Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Mon, 19 Mar 2007 15:33:04 -0700
Subject: [SK_BUFF]: Introduce skb_mac_header()

For the places where we need a pointer to the mac header, it is still legal to
touch skb->mac.raw directly if just adding to, subtracting from or setting it
to another layer header.

This one also converts some more cases to skb_reset_mac_header() that my
regex missed as it had no spaces before nor after '=', ugh.

Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 arch/um/drivers/daemon_kern.c              |  2 +-
 arch/um/drivers/mcast_kern.c               |  2 +-
 arch/um/drivers/pcap_kern.c                |  2 +-
 arch/um/drivers/slip_kern.c                |  2 +-
 arch/um/drivers/slirp_kern.c               |  2 +-
 arch/um/os-Linux/drivers/ethertap_kern.c   |  2 +-
 arch/um/os-Linux/drivers/tuntap_kern.c     |  2 +-
 drivers/block/aoe/aoe.h                    |  2 +-
 drivers/ieee1394/eth1394.h                 |  2 +-
 drivers/media/dvb/dvb-core/dvb_net.c       |  2 +-
 drivers/message/fusion/mptlan.c            | 26 ++++++++++++++------------
 drivers/net/arcnet/capmode.c               |  4 ++--
 drivers/net/plip.c                         |  2 +-
 drivers/net/slip.c                         |  2 +-
 drivers/net/wan/hostess_sv11.c             |  2 +-
 drivers/net/wan/sealevel.c                 |  2 +-
 drivers/net/wan/syncppp.c                  |  2 +-
 drivers/net/wireless/airo.c                |  2 +-
 drivers/net/wireless/hostap/hostap_main.c  | 14 +++++++-------
 drivers/net/wireless/orinoco.c             |  2 +-
 drivers/net/wireless/wavelan.c             |  5 +++--
 drivers/net/wireless/wavelan_cs.c          |  4 ++--
 drivers/s390/net/claw.c                    |  2 +-
 include/linux/if_ether.h                   |  2 +-
 include/linux/if_tr.h                      |  2 +-
 include/linux/if_vlan.h                    |  2 +-
 include/linux/netfilter_bridge/ebt_802_3.h |  2 +-
 include/linux/skbuff.h                     | 10 ++++++++++
 net/802/hippi.c                            |  2 +-
 net/appletalk/ddp.c                        |  6 +++---
 net/ax25/af_ax25.c                         |  5 +++--
 net/bluetooth/bnep/core.c                  | 11 +++++++----
 net/bridge/br_netfilter.c                  |  5 +++--
 net/core/dev.c                             |  2 +-
 net/core/filter.c                          |  2 +-
 net/core/skbuff.c                          |  2 +-
 net/ipv4/netfilter/ipt_LOG.c               |  4 ++--
 net/ipv4/netfilter/ipt_ULOG.c              |  4 ++--
 net/ipv4/route.c                           |  4 ++--
 net/ipv4/tcp_input.c                       |  2 +-
 net/ipv4/xfrm4_mode_tunnel.c               |  4 ++--
 net/ipv6/ndisc.c                           |  3 ++-
 net/ipv6/netfilter/ip6t_LOG.c              |  5 +++--
 net/ipv6/netfilter/ip6t_eui64.c            |  4 ++--
 net/ipv6/xfrm6_mode_beet.c                 |  4 ++--
 net/ipv6/xfrm6_mode_tunnel.c               |  4 ++--
 net/netfilter/xt_mac.c                     |  4 ++--
 net/packet/af_packet.c                     |  8 ++++----
 net/tipc/eth_media.c                       |  4 ++--
 49 files changed, 108 insertions(+), 88 deletions(-)

(limited to 'net/ipv4/tcp_input.c')

diff --git a/arch/um/drivers/daemon_kern.c b/arch/um/drivers/daemon_kern.c
index 9c2e7a758f21..adeece11e596 100644
--- a/arch/um/drivers/daemon_kern.c
+++ b/arch/um/drivers/daemon_kern.c
@@ -46,7 +46,7 @@ static int daemon_read(int fd, struct sk_buff **skb,
 {
 	*skb = ether_adjust_skb(*skb, ETH_HEADER_OTHER);
 	if(*skb == NULL) return(-ENOMEM);
-	return(net_recvfrom(fd, (*skb)->mac.raw, 
+	return(net_recvfrom(fd, skb_mac_header(*skb),
 			    (*skb)->dev->mtu + ETH_HEADER_OTHER));
 }
 
diff --git a/arch/um/drivers/mcast_kern.c b/arch/um/drivers/mcast_kern.c
index 52ccb7b53cd2..e6b8e0dd72a8 100644
--- a/arch/um/drivers/mcast_kern.c
+++ b/arch/um/drivers/mcast_kern.c
@@ -50,7 +50,7 @@ static int mcast_read(int fd, struct sk_buff **skb, struct uml_net_private *lp)
 {
 	*skb = ether_adjust_skb(*skb, ETH_HEADER_OTHER);
 	if(*skb == NULL) return(-ENOMEM);
-	return(net_recvfrom(fd, (*skb)->mac.raw, 
+	return(net_recvfrom(fd, skb_mac_header(*skb),
 			    (*skb)->dev->mtu + ETH_HEADER_OTHER));
 }
 
diff --git a/arch/um/drivers/pcap_kern.c b/arch/um/drivers/pcap_kern.c
index e67362acf0e7..948849343ca4 100644
--- a/arch/um/drivers/pcap_kern.c
+++ b/arch/um/drivers/pcap_kern.c
@@ -36,7 +36,7 @@ static int pcap_read(int fd, struct sk_buff **skb,
 {
 	*skb = ether_adjust_skb(*skb, ETH_HEADER_OTHER);
 	if(*skb == NULL) return(-ENOMEM);
-	return(pcap_user_read(fd, (*skb)->mac.raw, 
+	return(pcap_user_read(fd, skb_mac_header(*skb),
 			      (*skb)->dev->mtu + ETH_HEADER_OTHER,
 			      (struct pcap_data *) &lp->user));
 }
diff --git a/arch/um/drivers/slip_kern.c b/arch/um/drivers/slip_kern.c
index 25634bd1f585..125c44f77638 100644
--- a/arch/um/drivers/slip_kern.c
+++ b/arch/um/drivers/slip_kern.c
@@ -49,7 +49,7 @@ static unsigned short slip_protocol(struct sk_buff *skbuff)
 static int slip_read(int fd, struct sk_buff **skb, 
 		       struct uml_net_private *lp)
 {
-	return(slip_user_read(fd, (*skb)->mac.raw, (*skb)->dev->mtu, 
+	return(slip_user_read(fd, skb_mac_header(*skb), (*skb)->dev->mtu,
 			      (struct slip_data *) &lp->user));
 }
 
diff --git a/arch/um/drivers/slirp_kern.c b/arch/um/drivers/slirp_kern.c
index b3ed8fb874ab..0a0324a6d290 100644
--- a/arch/um/drivers/slirp_kern.c
+++ b/arch/um/drivers/slirp_kern.c
@@ -53,7 +53,7 @@ static unsigned short slirp_protocol(struct sk_buff *skbuff)
 static int slirp_read(int fd, struct sk_buff **skb, 
 		       struct uml_net_private *lp)
 {
-	return(slirp_user_read(fd, (*skb)->mac.raw, (*skb)->dev->mtu, 
+	return(slirp_user_read(fd, skb_mac_header(*skb), (*skb)->dev->mtu,
 			      (struct slirp_data *) &lp->user));
 }
 
diff --git a/arch/um/os-Linux/drivers/ethertap_kern.c b/arch/um/os-Linux/drivers/ethertap_kern.c
index 70541821775f..12689141414d 100644
--- a/arch/um/os-Linux/drivers/ethertap_kern.c
+++ b/arch/um/os-Linux/drivers/ethertap_kern.c
@@ -43,7 +43,7 @@ static int etap_read(int fd, struct sk_buff **skb, struct uml_net_private *lp)
 
 	*skb = ether_adjust_skb(*skb, ETH_HEADER_ETHERTAP);
 	if(*skb == NULL) return(-ENOMEM);
-	len = net_recvfrom(fd, (*skb)->mac.raw, 
+	len = net_recvfrom(fd, skb_mac_header(*skb),
 			   (*skb)->dev->mtu + 2 * ETH_HEADER_ETHERTAP);
 	if(len <= 0) return(len);
 	skb_pull(*skb, 2);
diff --git a/arch/um/os-Linux/drivers/tuntap_kern.c b/arch/um/os-Linux/drivers/tuntap_kern.c
index 76570a2c25c3..f1714e7fb1d0 100644
--- a/arch/um/os-Linux/drivers/tuntap_kern.c
+++ b/arch/um/os-Linux/drivers/tuntap_kern.c
@@ -43,7 +43,7 @@ static int tuntap_read(int fd, struct sk_buff **skb,
 {
 	*skb = ether_adjust_skb(*skb, ETH_HEADER_OTHER);
 	if(*skb == NULL) return(-ENOMEM);
-	return(net_read(fd, (*skb)->mac.raw, 
+	return(net_read(fd, skb_mac_header(*skb),
 			(*skb)->dev->mtu + ETH_HEADER_OTHER));
 }
 
diff --git a/drivers/block/aoe/aoe.h b/drivers/block/aoe/aoe.h
index 4c34f8d31cc9..1d8466817943 100644
--- a/drivers/block/aoe/aoe.h
+++ b/drivers/block/aoe/aoe.h
@@ -53,7 +53,7 @@ struct aoe_hdr {
 
 static inline struct aoe_hdr *aoe_hdr(const struct sk_buff *skb)
 {
-	return (struct aoe_hdr *)skb->mac.raw;
+	return (struct aoe_hdr *)skb_mac_header(skb);
 }
 #endif
 
diff --git a/drivers/ieee1394/eth1394.h b/drivers/ieee1394/eth1394.h
index c45cbff9138d..1e8356535149 100644
--- a/drivers/ieee1394/eth1394.h
+++ b/drivers/ieee1394/eth1394.h
@@ -90,7 +90,7 @@ struct eth1394hdr {
 
 static inline struct eth1394hdr *eth1394_hdr(const struct sk_buff *skb)
 {
-	return (struct eth1394hdr *)skb->mac.raw;
+	return (struct eth1394hdr *)skb_mac_header(skb);
 }
 #endif
 
diff --git a/drivers/media/dvb/dvb-core/dvb_net.c b/drivers/media/dvb/dvb-core/dvb_net.c
index 76e9c36597eb..c6b004182d91 100644
--- a/drivers/media/dvb/dvb-core/dvb_net.c
+++ b/drivers/media/dvb/dvb-core/dvb_net.c
@@ -174,7 +174,7 @@ static unsigned short dvb_net_eth_type_trans(struct sk_buff *skb,
 	struct ethhdr *eth;
 	unsigned char *rawp;
 
-	skb->mac.raw=skb->data;
+	skb_reset_mac_header(skb);
 	skb_pull(skb,dev->hard_header_len);
 	eth = eth_hdr(skb);
 
diff --git a/drivers/message/fusion/mptlan.c b/drivers/message/fusion/mptlan.c
index d5b878d56280..21fe1b66808c 100644
--- a/drivers/message/fusion/mptlan.c
+++ b/drivers/message/fusion/mptlan.c
@@ -714,6 +714,7 @@ mpt_lan_sdu_send (struct sk_buff *skb, struct net_device *dev)
 	LANSendRequest_t *pSendReq;
 	SGETransaction32_t *pTrans;
 	SGESimple64_t *pSimple;
+	const unsigned char *mac;
 	dma_addr_t dma;
 	unsigned long flags;
 	int ctx;
@@ -784,6 +785,7 @@ mpt_lan_sdu_send (struct sk_buff *skb, struct net_device *dev)
 //			IOC_AND_NETDEV_NAMES_s_s(dev),
 //			ctx, skb, skb->data));
 
+	mac = skb_mac_header(skb);
 #ifdef QLOGIC_NAA_WORKAROUND
 {
 	struct NAA_Hosed *nh;
@@ -793,12 +795,12 @@ mpt_lan_sdu_send (struct sk_buff *skb, struct net_device *dev)
 	   drops. */
 	read_lock_irq(&bad_naa_lock);
 	for (nh = mpt_bad_naa; nh != NULL; nh=nh->next) {
-		if ((nh->ieee[0] == skb->mac.raw[0]) &&
-		    (nh->ieee[1] == skb->mac.raw[1]) &&
-		    (nh->ieee[2] == skb->mac.raw[2]) &&
-		    (nh->ieee[3] == skb->mac.raw[3]) &&
-		    (nh->ieee[4] == skb->mac.raw[4]) &&
-		    (nh->ieee[5] == skb->mac.raw[5])) {
+		if ((nh->ieee[0] == mac[0]) &&
+		    (nh->ieee[1] == mac[1]) &&
+		    (nh->ieee[2] == mac[2]) &&
+		    (nh->ieee[3] == mac[3]) &&
+		    (nh->ieee[4] == mac[4]) &&
+		    (nh->ieee[5] == mac[5])) {
 			cur_naa = nh->NAA;
 			dlprintk ((KERN_INFO "mptlan/sdu_send: using NAA value "
 				  "= %04x.\n", cur_naa));
@@ -810,12 +812,12 @@ mpt_lan_sdu_send (struct sk_buff *skb, struct net_device *dev)
 #endif
 
 	pTrans->TransactionDetails[0] = cpu_to_le32((cur_naa         << 16) |
-						    (skb->mac.raw[0] <<  8) |
-						    (skb->mac.raw[1] <<  0));
-	pTrans->TransactionDetails[1] = cpu_to_le32((skb->mac.raw[2] << 24) |
-						    (skb->mac.raw[3] << 16) |
-						    (skb->mac.raw[4] <<  8) |
-						    (skb->mac.raw[5] <<  0));
+						    (mac[0] <<  8) |
+						    (mac[1] <<  0));
+	pTrans->TransactionDetails[1] = cpu_to_le32((mac[2] << 24) |
+						    (mac[3] << 16) |
+						    (mac[4] <<  8) |
+						    (mac[5] <<  0));
 
 	pSimple = (SGESimple64_t *) &pTrans->TransactionDetails[2];
 
diff --git a/drivers/net/arcnet/capmode.c b/drivers/net/arcnet/capmode.c
index 6c764b66e9cc..f6a87bd20ff2 100644
--- a/drivers/net/arcnet/capmode.c
+++ b/drivers/net/arcnet/capmode.c
@@ -123,7 +123,7 @@ static void rx(struct net_device *dev, int bufnum,
 	skb_put(skb, length + ARC_HDR_SIZE + sizeof(int));
 	skb->dev = dev;
 	skb_reset_mac_header(skb);
-	pkt = (struct archdr *)skb->mac.raw;
+	pkt = (struct archdr *)skb_mac_header(skb);
 	skb_pull(skb, ARC_HDR_SIZE);
 
 	/* up to sizeof(pkt->soft) has already been copied from the card */
@@ -269,7 +269,7 @@ static int ack_tx(struct net_device *dev, int acked)
   ackskb->dev = dev;
 
   skb_reset_mac_header(ackskb);
-  ackpkt = (struct archdr *)ackskb->mac.raw;
+  ackpkt = (struct archdr *)skb_mac_header(ackskb);
   /* skb_pull(ackskb, ARC_HDR_SIZE); */
 
 
diff --git a/drivers/net/plip.c b/drivers/net/plip.c
index 6bb085f54437..8754cf3356b0 100644
--- a/drivers/net/plip.c
+++ b/drivers/net/plip.c
@@ -546,7 +546,7 @@ static __be16 plip_type_trans(struct sk_buff *skb, struct net_device *dev)
 	struct ethhdr *eth;
 	unsigned char *rawp;
 
-	skb->mac.raw=skb->data;
+	skb_reset_mac_header(skb);
 	skb_pull(skb,dev->hard_header_len);
 	eth = eth_hdr(skb);
 
diff --git a/drivers/net/slip.c b/drivers/net/slip.c
index 2f4b1de7a2b4..65bd20fac820 100644
--- a/drivers/net/slip.c
+++ b/drivers/net/slip.c
@@ -363,7 +363,7 @@ sl_bump(struct slip *sl)
 	}
 	skb->dev = sl->dev;
 	memcpy(skb_put(skb,count), sl->rbuff, count);
-	skb->mac.raw=skb->data;
+	skb_reset_mac_header(skb);
 	skb->protocol=htons(ETH_P_IP);
 	netif_rx(skb);
 	sl->dev->last_rx = jiffies;
diff --git a/drivers/net/wan/hostess_sv11.c b/drivers/net/wan/hostess_sv11.c
index a02c5fb40567..9ba3e4ee6ec7 100644
--- a/drivers/net/wan/hostess_sv11.c
+++ b/drivers/net/wan/hostess_sv11.c
@@ -59,7 +59,7 @@ static void hostess_input(struct z8530_channel *c, struct sk_buff *skb)
 	/* Drop the CRC - it's not a good idea to try and negotiate it ;) */
 	skb_trim(skb, skb->len-2);
 	skb->protocol=__constant_htons(ETH_P_WAN_PPP);
-	skb->mac.raw=skb->data;
+	skb_reset_mac_header(skb);
 	skb->dev=c->netdevice;
 	/*
 	 *	Send it to the PPP layer. We don't have time to process
diff --git a/drivers/net/wan/sealevel.c b/drivers/net/wan/sealevel.c
index 70fb1b98b1dd..131358108c5a 100644
--- a/drivers/net/wan/sealevel.c
+++ b/drivers/net/wan/sealevel.c
@@ -61,7 +61,7 @@ static void sealevel_input(struct z8530_channel *c, struct sk_buff *skb)
 	/* Drop the CRC - it's not a good idea to try and negotiate it ;) */
 	skb_trim(skb, skb->len-2);
 	skb->protocol=htons(ETH_P_WAN_PPP);
-	skb->mac.raw=skb->data;
+	skb_reset_mac_header(skb);
 	skb->dev=c->netdevice;
 	/*
 	 *	Send it to the PPP layer. We don't have time to process
diff --git a/drivers/net/wan/syncppp.c b/drivers/net/wan/syncppp.c
index 218f7b574ab3..67fc67cfd452 100644
--- a/drivers/net/wan/syncppp.c
+++ b/drivers/net/wan/syncppp.c
@@ -227,7 +227,7 @@ static void sppp_input (struct net_device *dev, struct sk_buff *skb)
 	unsigned long flags;
 
 	skb->dev=dev;
-	skb->mac.raw=skb->data;
+	skb_reset_mac_header(skb);
 
 	if (dev->flags & IFF_RUNNING)
 	{
diff --git a/drivers/net/wireless/airo.c b/drivers/net/wireless/airo.c
index 692a23f9834d..7fe0a61091a6 100644
--- a/drivers/net/wireless/airo.c
+++ b/drivers/net/wireless/airo.c
@@ -2444,7 +2444,7 @@ static int add_airo_dev( struct net_device *dev );
 
 static int wll_header_parse(struct sk_buff *skb, unsigned char *haddr)
 {
-	memcpy(haddr, skb->mac.raw + 10, ETH_ALEN);
+	memcpy(haddr, skb_mac_header(skb) + 10, ETH_ALEN);
 	return ETH_ALEN;
 }
 
diff --git a/drivers/net/wireless/hostap/hostap_main.c b/drivers/net/wireless/hostap/hostap_main.c
index 0e29ff762879..c2616e7b0059 100644
--- a/drivers/net/wireless/hostap/hostap_main.c
+++ b/drivers/net/wireless/hostap/hostap_main.c
@@ -590,20 +590,20 @@ void hostap_dump_tx_header(const char *name, const struct hfa384x_tx_frame *tx)
 
 int hostap_80211_header_parse(struct sk_buff *skb, unsigned char *haddr)
 {
-	memcpy(haddr, skb->mac.raw + 10, ETH_ALEN); /* addr2 */
+	memcpy(haddr, skb_mac_header(skb) + 10, ETH_ALEN); /* addr2 */
 	return ETH_ALEN;
 }
 
 
 int hostap_80211_prism_header_parse(struct sk_buff *skb, unsigned char *haddr)
 {
-	if (*(u32 *)skb->mac.raw == LWNG_CAP_DID_BASE) {
-		memcpy(haddr, skb->mac.raw +
-		       sizeof(struct linux_wlan_ng_prism_hdr) + 10,
+	const unsigned char *mac = skb_mac_header(skb);
+
+	if (*(u32 *)mac == LWNG_CAP_DID_BASE) {
+		memcpy(haddr, mac + sizeof(struct linux_wlan_ng_prism_hdr) + 10,
 		       ETH_ALEN); /* addr2 */
-	} else { /* (*(u32 *)skb->mac.raw == htonl(LWNG_CAPHDR_VERSION)) */
-		memcpy(haddr, skb->mac.raw +
-		       sizeof(struct linux_wlan_ng_cap_hdr) + 10,
+	} else { /* (*(u32 *)mac == htonl(LWNG_CAPHDR_VERSION)) */
+		memcpy(haddr, mac + sizeof(struct linux_wlan_ng_cap_hdr) + 10,
 		       ETH_ALEN); /* addr2 */
 	}
 	return ETH_ALEN;
diff --git a/drivers/net/wireless/orinoco.c b/drivers/net/wireless/orinoco.c
index f1415bff527f..062286dc8e15 100644
--- a/drivers/net/wireless/orinoco.c
+++ b/drivers/net/wireless/orinoco.c
@@ -689,7 +689,7 @@ static void orinoco_stat_gather(struct net_device *dev,
 	/* Note : gcc will optimise the whole section away if
 	 * WIRELESS_SPY is not defined... - Jean II */
 	if (SPY_NUMBER(priv)) {
-		orinoco_spy_gather(dev, skb->mac.raw + ETH_ALEN,
+		orinoco_spy_gather(dev, skb_mac_header(skb) + ETH_ALEN,
 				   desc->signal, desc->silence);
 	}
 }
diff --git a/drivers/net/wireless/wavelan.c b/drivers/net/wireless/wavelan.c
index 69cb1471096b..2bf77b1ee531 100644
--- a/drivers/net/wireless/wavelan.c
+++ b/drivers/net/wireless/wavelan.c
@@ -2517,7 +2517,8 @@ wv_packet_read(struct net_device * dev, u16 buf_off, int sksize)
 	skb->protocol = eth_type_trans(skb, dev);
 
 #ifdef DEBUG_RX_INFO
-	wv_packet_info(skb->mac.raw, sksize, dev->name, "wv_packet_read");
+	wv_packet_info(skb_mac_header(skb), sksize, dev->name,
+		       "wv_packet_read");
 #endif				/* DEBUG_RX_INFO */
 
 	/* Statistics-gathering and associated stuff.
@@ -2553,7 +2554,7 @@ wv_packet_read(struct net_device * dev, u16 buf_off, int sksize)
 
 		/* Spying stuff */
 #ifdef IW_WIRELESS_SPY
-		wl_spy_gather(dev, skb->mac.raw + WAVELAN_ADDR_SIZE,
+		wl_spy_gather(dev, skb_mac_header(skb) + WAVELAN_ADDR_SIZE,
 			      stats);
 #endif /* IW_WIRELESS_SPY */
 #ifdef HISTOGRAM
diff --git a/drivers/net/wireless/wavelan_cs.c b/drivers/net/wireless/wavelan_cs.c
index 9351ee773314..67b867f837ca 100644
--- a/drivers/net/wireless/wavelan_cs.c
+++ b/drivers/net/wireless/wavelan_cs.c
@@ -2889,7 +2889,7 @@ wv_packet_read(struct net_device *		dev,
   skb->protocol = eth_type_trans(skb, dev);
 
 #ifdef DEBUG_RX_INFO
-  wv_packet_info(skb->mac.raw, sksize, dev->name, "wv_packet_read");
+  wv_packet_info(skb_mac_header(skb), sksize, dev->name, "wv_packet_read");
 #endif	/* DEBUG_RX_INFO */
      
   /* Statistics gathering & stuff associated.
@@ -2923,7 +2923,7 @@ wv_packet_read(struct net_device *		dev,
 #endif	/* WAVELAN_ROAMING */
 	  
 #ifdef WIRELESS_SPY
-      wl_spy_gather(dev, skb->mac.raw + WAVELAN_ADDR_SIZE, stats);
+      wl_spy_gather(dev, skb_mac_header(skb) + WAVELAN_ADDR_SIZE, stats);
 #endif	/* WIRELESS_SPY */
 #ifdef HISTOGRAM
       wl_his_gather(dev, stats);
diff --git a/drivers/s390/net/claw.c b/drivers/s390/net/claw.c
index 7809a79feec7..6dd64d0c8d45 100644
--- a/drivers/s390/net/claw.c
+++ b/drivers/s390/net/claw.c
@@ -3525,8 +3525,8 @@ unpack_next:
                                 memcpy(skb_put(skb,len_of_data),
 					privptr->p_mtc_envelope,
 					len_of_data);
-                                skb->mac.raw=skb->data;
                                 skb->dev=dev;
+				skb_reset_mac_header(skb);
                                 skb->protocol=htons(ETH_P_IP);
                                 skb->ip_summed=CHECKSUM_UNNECESSARY;
                                 privptr->stats.rx_packets++;
diff --git a/include/linux/if_ether.h b/include/linux/if_ether.h
index ab08f35cbc35..f6863fbcf334 100644
--- a/include/linux/if_ether.h
+++ b/include/linux/if_ether.h
@@ -112,7 +112,7 @@ struct ethhdr {
 
 static inline struct ethhdr *eth_hdr(const struct sk_buff *skb)
 {
-	return (struct ethhdr *)skb->mac.raw;
+	return (struct ethhdr *)skb_mac_header(skb);
 }
 
 #ifdef CONFIG_SYSCTL
diff --git a/include/linux/if_tr.h b/include/linux/if_tr.h
index 2f94cf2c7abb..046e9d95ba9a 100644
--- a/include/linux/if_tr.h
+++ b/include/linux/if_tr.h
@@ -47,7 +47,7 @@ struct trh_hdr {
 
 static inline struct trh_hdr *tr_hdr(const struct sk_buff *skb)
 {
-	return (struct trh_hdr *)skb->mac.raw;
+	return (struct trh_hdr *)skb_mac_header(skb);
 }
 #ifdef CONFIG_SYSCTL
 extern struct ctl_table tr_table[];
diff --git a/include/linux/if_vlan.h b/include/linux/if_vlan.h
index d103580c72d2..544490d9d0bd 100644
--- a/include/linux/if_vlan.h
+++ b/include/linux/if_vlan.h
@@ -51,7 +51,7 @@ struct vlan_ethhdr {
 
 static inline struct vlan_ethhdr *vlan_eth_hdr(const struct sk_buff *skb)
 {
-	return (struct vlan_ethhdr *)skb->mac.raw;
+	return (struct vlan_ethhdr *)skb_mac_header(skb);
 }
 
 struct vlan_hdr {
diff --git a/include/linux/netfilter_bridge/ebt_802_3.h b/include/linux/netfilter_bridge/ebt_802_3.h
index 07f044ff1a6b..a11b0c2017fd 100644
--- a/include/linux/netfilter_bridge/ebt_802_3.h
+++ b/include/linux/netfilter_bridge/ebt_802_3.h
@@ -54,7 +54,7 @@ struct ebt_802_3_hdr {
 
 static inline struct ebt_802_3_hdr *ebt_802_3_hdr(const struct sk_buff *skb)
 {
-	return (struct ebt_802_3_hdr *)skb->mac.raw;
+	return (struct ebt_802_3_hdr *)skb_mac_header(skb);
 }
 #endif
 
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 43ab6cbf8446..dff81af454b7 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -960,6 +960,16 @@ static inline void skb_reserve(struct sk_buff *skb, int len)
 	skb->tail += len;
 }
 
+static inline unsigned char *skb_mac_header(const struct sk_buff *skb)
+{
+	return skb->mac.raw;
+}
+
+static inline int skb_mac_header_was_set(const struct sk_buff *skb)
+{
+	return skb->mac.raw != NULL;
+}
+
 static inline void skb_reset_mac_header(struct sk_buff *skb)
 {
 	skb->mac.raw = skb->data;
diff --git a/net/802/hippi.c b/net/802/hippi.c
index d87190038edb..87ffc12b6891 100644
--- a/net/802/hippi.c
+++ b/net/802/hippi.c
@@ -132,7 +132,7 @@ __be16 hippi_type_trans(struct sk_buff *skb, struct net_device *dev)
 	 */
 	skb->dev = dev;
 	skb_reset_mac_header(skb);
-	hip = (struct hippi_hdr *)skb->mac.raw;
+	hip = (struct hippi_hdr *)skb_mac_header(skb);
 	skb_pull(skb, HIPPI_HLEN);
 
 	/*
diff --git a/net/appletalk/ddp.c b/net/appletalk/ddp.c
index 32b82705b685..934f25993ce8 100644
--- a/net/appletalk/ddp.c
+++ b/net/appletalk/ddp.c
@@ -1484,7 +1484,7 @@ static int ltalk_rcv(struct sk_buff *skb, struct net_device *dev,
 		     struct packet_type *pt, struct net_device *orig_dev)
 {
 	/* Expand any short form frames */
-	if (skb->mac.raw[2] == 1) {
+	if (skb_mac_header(skb)[2] == 1) {
 		struct ddpehdr *ddp;
 		/* Find our address */
 		struct atalk_addr *ap = atalk_find_dev_addr(dev);
@@ -1510,8 +1510,8 @@ static int ltalk_rcv(struct sk_buff *skb, struct net_device *dev,
 		 * we write the network numbers !
 		 */
 
-		ddp->deh_dnode = skb->mac.raw[0];     /* From physical header */
-		ddp->deh_snode = skb->mac.raw[1];     /* From physical header */
+		ddp->deh_dnode = skb_mac_header(skb)[0];     /* From physical header */
+		ddp->deh_snode = skb_mac_header(skb)[1];     /* From physical header */
 
 		ddp->deh_dnet  = ap->s_net;	/* Network number */
 		ddp->deh_snet  = ap->s_net;
diff --git a/net/ax25/af_ax25.c b/net/ax25/af_ax25.c
index 62605dc5a2c8..c89e4f6f9025 100644
--- a/net/ax25/af_ax25.c
+++ b/net/ax25/af_ax25.c
@@ -1645,9 +1645,10 @@ static int ax25_recvmsg(struct kiocb *iocb, struct socket *sock,
 		struct sockaddr_ax25 *sax = (struct sockaddr_ax25 *)msg->msg_name;
 		ax25_digi digi;
 		ax25_address src;
+		const unsigned char *mac = skb_mac_header(skb);
 
-		ax25_addr_parse(skb->mac.raw+1, skb->data-skb->mac.raw-1, &src, NULL, &digi, NULL, NULL);
-
+		ax25_addr_parse(mac + 1, skb->data - mac - 1, &src, NULL,
+				&digi, NULL, NULL);
 		sax->sax25_family = AF_AX25;
 		/* We set this correctly, even though we may not let the
 		   application know the digi calls further down (because it
diff --git a/net/bluetooth/bnep/core.c b/net/bluetooth/bnep/core.c
index b1c2fa96c69e..97156c4abc8d 100644
--- a/net/bluetooth/bnep/core.c
+++ b/net/bluetooth/bnep/core.c
@@ -364,17 +364,20 @@ static inline int bnep_rx_frame(struct bnep_session *s, struct sk_buff *skb)
 
 	case BNEP_COMPRESSED_SRC_ONLY:
 		memcpy(__skb_put(nskb, ETH_ALEN), s->eh.h_dest, ETH_ALEN);
-		memcpy(__skb_put(nskb, ETH_ALEN), skb->mac.raw, ETH_ALEN);
+		memcpy(__skb_put(nskb, ETH_ALEN), skb_mac_header(skb), ETH_ALEN);
 		put_unaligned(s->eh.h_proto, (__be16 *) __skb_put(nskb, 2));
 		break;
 
 	case BNEP_COMPRESSED_DST_ONLY:
-		memcpy(__skb_put(nskb, ETH_ALEN), skb->mac.raw, ETH_ALEN);
-		memcpy(__skb_put(nskb, ETH_ALEN + 2), s->eh.h_source, ETH_ALEN + 2);
+		memcpy(__skb_put(nskb, ETH_ALEN), skb_mac_header(skb),
+		       ETH_ALEN);
+		memcpy(__skb_put(nskb, ETH_ALEN + 2), s->eh.h_source,
+		       ETH_ALEN + 2);
 		break;
 
 	case BNEP_GENERAL:
-		memcpy(__skb_put(nskb, ETH_ALEN * 2), skb->mac.raw, ETH_ALEN * 2);
+		memcpy(__skb_put(nskb, ETH_ALEN * 2), skb_mac_header(skb),
+		       ETH_ALEN * 2);
 		put_unaligned(s->eh.h_proto, (__be16 *) __skb_put(nskb, 2));
 		break;
 	}
diff --git a/net/bridge/br_netfilter.c b/net/bridge/br_netfilter.c
index 5439a3c46c3e..1163c4f69899 100644
--- a/net/bridge/br_netfilter.c
+++ b/net/bridge/br_netfilter.c
@@ -753,7 +753,8 @@ static unsigned int br_nf_post_routing(unsigned int hook, struct sk_buff **pskb,
 #ifdef CONFIG_NETFILTER_DEBUG
 	/* Be very paranoid. This probably won't happen anymore, but let's
 	 * keep the check just to be sure... */
-	if (skb->mac.raw < skb->head || skb->mac.raw + ETH_HLEN > skb->data) {
+	if (skb_mac_header(skb) < skb->head ||
+	    skb_mac_header(skb) + ETH_HLEN > skb->data) {
 		printk(KERN_CRIT "br_netfilter: Argh!! br_nf_post_routing: "
 		       "bad mac.raw pointer.\n");
 		goto print_error;
@@ -808,7 +809,7 @@ print_error:
 		if (realoutdev)
 			printk("[%s]", realoutdev->name);
 	}
-	printk(" head:%p, raw:%p, data:%p\n", skb->head, skb->mac.raw,
+	printk(" head:%p, raw:%p, data:%p\n", skb->head, skb_mac_header(skb),
 	       skb->data);
 	dump_stack();
 	return NF_ACCEPT;
diff --git a/net/core/dev.c b/net/core/dev.c
index 2fcaf5bc4a9c..560560fe3064 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -1232,7 +1232,7 @@ struct sk_buff *skb_gso_segment(struct sk_buff *skb, int features)
 	}
 	rcu_read_unlock();
 
-	__skb_push(skb, skb->data - skb->mac.raw);
+	__skb_push(skb, skb->data - skb_mac_header(skb));
 
 	return segs;
 }
diff --git a/net/core/filter.c b/net/core/filter.c
index 8d185a089c53..1cc128d05422 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -44,7 +44,7 @@ static void *__load_pointer(struct sk_buff *skb, int k)
 	if (k >= SKF_NET_OFF)
 		ptr = skb->nh.raw + k - SKF_NET_OFF;
 	else if (k >= SKF_LL_OFF)
-		ptr = skb->mac.raw + k - SKF_LL_OFF;
+		ptr = skb_mac_header(skb) + k - SKF_LL_OFF;
 
 	if (ptr >= skb->head && ptr < skb->tail)
 		return ptr;
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 8f6ebd0d3693..1493c95f633e 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -1878,7 +1878,7 @@ struct sk_buff *skb_segment(struct sk_buff *skb, int features)
 	struct sk_buff *segs = NULL;
 	struct sk_buff *tail = NULL;
 	unsigned int mss = skb_shinfo(skb)->gso_size;
-	unsigned int doffset = skb->data - skb->mac.raw;
+	unsigned int doffset = skb->data - skb_mac_header(skb);
 	unsigned int offset = doffset;
 	unsigned int headroom;
 	unsigned int len;
diff --git a/net/ipv4/netfilter/ipt_LOG.c b/net/ipv4/netfilter/ipt_LOG.c
index d9c37fd94228..c697971fe317 100644
--- a/net/ipv4/netfilter/ipt_LOG.c
+++ b/net/ipv4/netfilter/ipt_LOG.c
@@ -399,9 +399,9 @@ ipt_log_packet(unsigned int pf,
 		/* MAC logging for input chain only. */
 		printk("MAC=");
 		if (skb->dev && skb->dev->hard_header_len
-		    && skb->mac.raw != (void*)skb->nh.iph) {
+		    && skb->mac.raw != skb->nh.raw) {
 			int i;
-			unsigned char *p = skb->mac.raw;
+			const unsigned char *p = skb_mac_header(skb);
 			for (i = 0; i < skb->dev->hard_header_len; i++,p++)
 				printk("%02x%c", *p,
 				       i==skb->dev->hard_header_len - 1
diff --git a/net/ipv4/netfilter/ipt_ULOG.c b/net/ipv4/netfilter/ipt_ULOG.c
index 9718b666a380..fae2a34d23d0 100644
--- a/net/ipv4/netfilter/ipt_ULOG.c
+++ b/net/ipv4/netfilter/ipt_ULOG.c
@@ -251,9 +251,9 @@ static void ipt_ulog_packet(unsigned int hooknum,
 		*(pm->prefix) = '\0';
 
 	if (in && in->hard_header_len > 0
-	    && skb->mac.raw != (void *) skb->nh.iph
+	    && skb->mac.raw != skb->nh.raw
 	    && in->hard_header_len <= ULOG_MAC_LEN) {
-		memcpy(pm->mac, skb->mac.raw, in->hard_header_len);
+		memcpy(pm->mac, skb_mac_header(skb), in->hard_header_len);
 		pm->mac_len = in->hard_header_len;
 	} else
 		pm->mac_len = 0;
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 29ee7be45aa6..486ab93127ce 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -1698,9 +1698,9 @@ static void ip_handle_martian_source(struct net_device *dev,
 		printk(KERN_WARNING "martian source %u.%u.%u.%u from "
 			"%u.%u.%u.%u, on dev %s\n",
 			NIPQUAD(daddr), NIPQUAD(saddr), dev->name);
-		if (dev->hard_header_len && skb->mac.raw) {
+		if (dev->hard_header_len && skb_mac_header_was_set(skb)) {
 			int i;
-			unsigned char *p = skb->mac.raw;
+			const unsigned char *p = skb_mac_header(skb);
 			printk(KERN_WARNING "ll header: ");
 			for (i = 0; i < dev->hard_header_len; i++, p++) {
 				printk("%02x", *p);
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 1ec05bd673a7..f5e019cefc15 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -3633,7 +3633,7 @@ tcp_collapse(struct sock *sk, struct sk_buff_head *list,
 		if (!nskb)
 			return;
 
-		skb_set_mac_header(nskb, skb->mac.raw - skb->head);
+		skb_set_mac_header(nskb, skb_mac_header(skb) - skb->head);
 		nskb->nh.raw = nskb->data + (skb->nh.raw - skb->head);
 		nskb->h.raw = nskb->data + (skb->h.raw - skb->head);
 
diff --git a/net/ipv4/xfrm4_mode_tunnel.c b/net/ipv4/xfrm4_mode_tunnel.c
index f09055d3a768..8e123e30cf61 100644
--- a/net/ipv4/xfrm4_mode_tunnel.c
+++ b/net/ipv4/xfrm4_mode_tunnel.c
@@ -126,9 +126,9 @@ static int xfrm4_tunnel_input(struct xfrm_state *x, struct sk_buff *skb)
 		skb->protocol = htons(ETH_P_IPV6);
 	}
 #endif
-	old_mac = skb->mac.raw;
+	old_mac = skb_mac_header(skb);
 	skb_set_mac_header(skb, -skb->mac_len);
-	memmove(skb->mac.raw, old_mac, skb->mac_len);
+	memmove(skb_mac_header(skb), old_mac, skb->mac_len);
 	skb->nh.raw = skb->data;
 	err = 0;
 
diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c
index 053147a0027e..a3e3d9e2f44b 100644
--- a/net/ipv6/ndisc.c
+++ b/net/ipv6/ndisc.c
@@ -828,7 +828,8 @@ static void ndisc_recv_ns(struct sk_buff *skb)
 		if (ifp->flags & (IFA_F_TENTATIVE|IFA_F_OPTIMISTIC)) {
 			if (dad) {
 				if (dev->type == ARPHRD_IEEE802_TR) {
-					unsigned char *sadr = skb->mac.raw;
+					const unsigned char *sadr;
+					sadr = skb_mac_header(skb);
 					if (((sadr[8] ^ dev->dev_addr[0]) & 0x7f) == 0 &&
 					    sadr[9] == dev->dev_addr[1] &&
 					    sadr[10] == dev->dev_addr[2] &&
diff --git a/net/ipv6/netfilter/ip6t_LOG.c b/net/ipv6/netfilter/ip6t_LOG.c
index afaa039d0b7b..fc9e51a77784 100644
--- a/net/ipv6/netfilter/ip6t_LOG.c
+++ b/net/ipv6/netfilter/ip6t_LOG.c
@@ -397,7 +397,7 @@ ip6t_log_packet(unsigned int pf,
 		printk("MAC=");
 		if (skb->dev && (len = skb->dev->hard_header_len) &&
 		    skb->mac.raw != skb->nh.raw) {
-			unsigned char *p = skb->mac.raw;
+			const unsigned char *p = skb_mac_header(skb);
 			int i;
 
 			if (skb->dev->type == ARPHRD_SIT &&
@@ -412,7 +412,8 @@ ip6t_log_packet(unsigned int pf,
 			printk(" ");
 
 			if (skb->dev->type == ARPHRD_SIT) {
-				struct iphdr *iph = (struct iphdr *)skb->mac.raw;
+				const struct iphdr *iph =
+					(struct iphdr *)skb_mac_header(skb);
 				printk("TUNNEL=%u.%u.%u.%u->%u.%u.%u.%u ",
 				       NIPQUAD(iph->saddr),
 				       NIPQUAD(iph->daddr));
diff --git a/net/ipv6/netfilter/ip6t_eui64.c b/net/ipv6/netfilter/ip6t_eui64.c
index 967bed71d4a8..c2676066a80f 100644
--- a/net/ipv6/netfilter/ip6t_eui64.c
+++ b/net/ipv6/netfilter/ip6t_eui64.c
@@ -32,8 +32,8 @@ match(const struct sk_buff *skb,
 	unsigned char eui64[8];
 	int i = 0;
 
-	if (!(skb->mac.raw >= skb->head &&
-	      (skb->mac.raw + ETH_HLEN) <= skb->data) &&
+	if (!(skb_mac_header(skb) >= skb->head &&
+	      (skb_mac_header(skb) + ETH_HLEN) <= skb->data) &&
 	    offset != 0) {
 		*hotdrop = 1;
 		return 0;
diff --git a/net/ipv6/xfrm6_mode_beet.c b/net/ipv6/xfrm6_mode_beet.c
index 53cfe1a10ccd..79364b1e965a 100644
--- a/net/ipv6/xfrm6_mode_beet.c
+++ b/net/ipv6/xfrm6_mode_beet.c
@@ -70,9 +70,9 @@ static int xfrm6_beet_input(struct xfrm_state *x, struct sk_buff *skb)
 	memmove(skb->data, skb->nh.raw, size);
 	skb->nh.raw = skb->data;
 
-	old_mac = skb->mac.raw;
+	old_mac = skb_mac_header(skb);
 	skb_set_mac_header(skb, -skb->mac_len);
-	memmove(skb->mac.raw, old_mac, skb->mac_len);
+	memmove(skb_mac_header(skb), old_mac, skb->mac_len);
 
 	ip6h = skb->nh.ipv6h;
 	ip6h->payload_len = htons(skb->len - size);
diff --git a/net/ipv6/xfrm6_mode_tunnel.c b/net/ipv6/xfrm6_mode_tunnel.c
index d2c560c181a1..5bb0677d3730 100644
--- a/net/ipv6/xfrm6_mode_tunnel.c
+++ b/net/ipv6/xfrm6_mode_tunnel.c
@@ -108,9 +108,9 @@ static int xfrm6_tunnel_input(struct xfrm_state *x, struct sk_buff *skb)
 			ip6ip_ecn_decapsulate(skb);
 		skb->protocol = htons(ETH_P_IP);
 	}
-	old_mac = skb->mac.raw;
+	old_mac = skb_mac_header(skb);
 	skb_set_mac_header(skb, -skb->mac_len);
-	memmove(skb->mac.raw, old_mac, skb->mac_len);
+	memmove(skb_mac_header(skb), old_mac, skb->mac_len);
 	skb->nh.raw = skb->data;
 	err = 0;
 
diff --git a/net/netfilter/xt_mac.c b/net/netfilter/xt_mac.c
index d430d90d7b26..1d3a1d98b885 100644
--- a/net/netfilter/xt_mac.c
+++ b/net/netfilter/xt_mac.c
@@ -37,8 +37,8 @@ match(const struct sk_buff *skb,
     const struct xt_mac_info *info = matchinfo;
 
     /* Is mac pointer valid? */
-    return (skb->mac.raw >= skb->head
-	    && (skb->mac.raw + ETH_HLEN) <= skb->data
+    return (skb_mac_header(skb) >= skb->head &&
+	    (skb_mac_header(skb) + ETH_HLEN) <= skb->data
 	    /* If so, compare... */
 	    && ((!compare_ether_addr(eth_hdr(skb)->h_source, info->srcaddr))
 		^ info->invert));
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index 6f8c72d2413b..73cb2d3e27d2 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -284,7 +284,7 @@ static int packet_rcv_spkt(struct sk_buff *skb, struct net_device *dev,  struct
 	 *	Incoming packets have ll header pulled,
 	 *	push it back.
 	 *
-	 *	For outgoing ones skb->data == skb->mac.raw
+	 *	For outgoing ones skb->data == skb_mac_header(skb)
 	 *	so that this procedure is noop.
 	 */
 
@@ -303,7 +303,7 @@ static int packet_rcv_spkt(struct sk_buff *skb, struct net_device *dev,  struct
 
 	spkt = &PACKET_SKB_CB(skb)->sa.pkt;
 
-	skb_push(skb, skb->data-skb->mac.raw);
+	skb_push(skb, skb->data - skb_mac_header(skb));
 
 	/*
 	 *	The SOCK_PACKET socket receives _all_ frames.
@@ -488,7 +488,7 @@ static int packet_rcv(struct sk_buff *skb, struct net_device *dev, struct packet
 		   never delivered to user.
 		 */
 		if (sk->sk_type != SOCK_DGRAM)
-			skb_push(skb, skb->data - skb->mac.raw);
+			skb_push(skb, skb->data - skb_mac_header(skb));
 		else if (skb->pkt_type == PACKET_OUTGOING) {
 			/* Special case: outgoing packets have ll header at head */
 			skb_pull(skb, skb->nh.raw - skb->data);
@@ -592,7 +592,7 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev, struct packe
 
 	if (dev->hard_header) {
 		if (sk->sk_type != SOCK_DGRAM)
-			skb_push(skb, skb->data - skb->mac.raw);
+			skb_push(skb, skb->data - skb_mac_header(skb));
 		else if (skb->pkt_type == PACKET_OUTGOING) {
 			/* Special case: outgoing packets have ll header at head */
 			skb_pull(skb, skb->nh.raw - skb->data);
diff --git a/net/tipc/eth_media.c b/net/tipc/eth_media.c
index f71ba9db611e..03a9db364538 100644
--- a/net/tipc/eth_media.c
+++ b/net/tipc/eth_media.c
@@ -99,8 +99,8 @@ static int recv_msg(struct sk_buff *buf, struct net_device *dev,
 
 	if (likely(eb_ptr->bearer)) {
 	       if (likely(!dev->promiscuity) ||
-		   !memcmp(buf->mac.raw,dev->dev_addr,ETH_ALEN) ||
-		   !memcmp(buf->mac.raw,dev->broadcast,ETH_ALEN)) {
+		   !memcmp(skb_mac_header(buf), dev->dev_addr, ETH_ALEN) ||
+		   !memcmp(skb_mac_header(buf), dev->broadcast, ETH_ALEN)) {
 			size = msg_size((struct tipc_msg *)buf->data);
 			skb_trim(buf, size);
 			if (likely(buf->len == size)) {
-- 
cgit v1.2.3


From d56f90a7c96da5187f0cdf07ee7434fe6aa78bbc Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Tue, 10 Apr 2007 20:50:43 -0700
Subject: [SK_BUFF]: Introduce skb_network_header()

For the places where we need a pointer to the network header, it is still legal
to touch skb->nh.raw directly if just adding to, subtracting from or setting it
to another layer header.

Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/isdn/i4l/isdn_net.c                    |  5 ++-
 drivers/net/bonding/bond_alb.c                 |  2 +-
 drivers/net/loopback.c                         |  7 ++--
 drivers/net/pasemi_mac.c                       |  6 ++-
 drivers/s390/net/qeth_main.c                   |  6 ++-
 include/linux/if_pppox.h                       |  2 +-
 include/linux/skbuff.h                         |  5 +++
 include/net/cipso_ipv4.h                       |  2 +-
 include/net/inet_ecn.h                         |  6 ++-
 include/net/llc_pdu.h                          |  4 +-
 include/net/pkt_cls.h                          |  2 +-
 net/bridge/br_netfilter.c                      | 12 +++---
 net/core/dev.c                                 |  9 +++--
 net/core/filter.c                              |  2 +-
 net/dccp/ipv6.c                                |  8 ++--
 net/decnet/dn_route.c                          |  4 +-
 net/ipv4/af_inet.c                             |  2 +-
 net/ipv4/ah4.c                                 |  5 ++-
 net/ipv4/esp4.c                                |  7 ++--
 net/ipv4/icmp.c                                |  4 +-
 net/ipv4/ip_fragment.c                         |  2 +-
 net/ipv4/ip_options.c                          | 12 +++---
 net/ipv4/ip_output.c                           |  6 +--
 net/ipv4/ip_sockglue.c                         |  8 ++--
 net/ipv4/ipmr.c                                |  2 +-
 net/ipv4/ipvs/ip_vs_app.c                      |  4 +-
 net/ipv4/ipvs/ip_vs_core.c                     |  3 +-
 net/ipv4/netfilter/arpt_mangle.c               |  2 +-
 net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c |  9 ++---
 net/ipv4/raw.c                                 |  2 +-
 net/ipv4/tcp_input.c                           |  2 +-
 net/ipv4/xfrm4_input.c                         |  2 +-
 net/ipv4/xfrm4_mode_beet.c                     |  4 +-
 net/ipv4/xfrm4_mode_transport.c                |  4 +-
 net/ipv4/xfrm4_policy.c                        |  2 +-
 net/ipv6/af_inet6.c                            |  3 +-
 net/ipv6/ah6.c                                 | 12 +++---
 net/ipv6/datagram.c                            | 31 ++++++++------
 net/ipv6/esp6.c                                |  4 +-
 net/ipv6/exthdrs.c                             | 56 +++++++++++++++-----------
 net/ipv6/icmp.c                                |  3 +-
 net/ipv6/ip6_input.c                           |  4 +-
 net/ipv6/ip6_output.c                          | 23 ++++++-----
 net/ipv6/ip6_tunnel.c                          |  5 ++-
 net/ipv6/ipcomp6.c                             |  4 +-
 net/ipv6/mip6.c                                | 29 +++++++------
 net/ipv6/netfilter/nf_conntrack_reasm.c        | 19 +++++----
 net/ipv6/raw.c                                 |  5 ++-
 net/ipv6/reassembly.c                          | 25 ++++++++----
 net/ipv6/tcp_ipv6.c                            |  8 +++-
 net/ipv6/xfrm6_input.c                         |  6 +--
 net/ipv6/xfrm6_mode_beet.c                     |  2 +-
 net/ipv6/xfrm6_mode_transport.c                |  6 ++-
 net/ipv6/xfrm6_mode_tunnel.c                   |  8 ++--
 net/ipv6/xfrm6_policy.c                        | 16 ++++----
 net/netfilter/xt_TCPMSS.c                      |  4 +-
 net/sched/act_pedit.c                          |  2 +-
 net/sched/cls_u32.c                            |  2 +-
 net/sched/em_u32.c                             |  2 +-
 59 files changed, 258 insertions(+), 185 deletions(-)

(limited to 'net/ipv4/tcp_input.c')

diff --git a/drivers/isdn/i4l/isdn_net.c b/drivers/isdn/i4l/isdn_net.c
index 0c2b3752e46e..cd3b1fa4a414 100644
--- a/drivers/isdn/i4l/isdn_net.c
+++ b/drivers/isdn/i4l/isdn_net.c
@@ -872,7 +872,8 @@ typedef struct {
 static void
 isdn_net_log_skb(struct sk_buff * skb, isdn_net_local * lp)
 {
-	u_char *p = skb->nh.raw; /* hopefully, this was set correctly */
+	/* hopefully, this was set correctly */
+	const u_char *p = skb_network_header(skb);
 	unsigned short proto = ntohs(skb->protocol);
 	int data_ofs;
 	ip_ports *ipp;
@@ -880,7 +881,7 @@ isdn_net_log_skb(struct sk_buff * skb, isdn_net_local * lp)
 
 	addinfo[0] = '\0';
 	/* This check stolen from 2.1.72 dev_queue_xmit_nit() */
-	if (skb->nh.raw < skb->data || skb->nh.raw >= skb->tail) {
+	if (p < skb->data || p >= skb->tail) {
 		/* fall back to old isdn_net_log_packet method() */
 		char * buf = skb->data;
 
diff --git a/drivers/net/bonding/bond_alb.c b/drivers/net/bonding/bond_alb.c
index 5c2a12c2b997..86cfcb3f8131 100644
--- a/drivers/net/bonding/bond_alb.c
+++ b/drivers/net/bonding/bond_alb.c
@@ -106,7 +106,7 @@ struct arp_pkt {
 
 static inline struct arp_pkt *arp_pkt(const struct sk_buff *skb)
 {
-	return (struct arp_pkt *)skb->nh.raw;
+	return (struct arp_pkt *)skb_network_header(skb);
 }
 
 /* Forward declaration */
diff --git a/drivers/net/loopback.c b/drivers/net/loopback.c
index af476d2a513d..9265c27b13b2 100644
--- a/drivers/net/loopback.c
+++ b/drivers/net/loopback.c
@@ -76,7 +76,8 @@ static DEFINE_PER_CPU(struct pcpu_lstats, pcpu_lstats);
 static void emulate_large_send_offload(struct sk_buff *skb)
 {
 	struct iphdr *iph = skb->nh.iph;
-	struct tcphdr *th = (struct tcphdr*)(skb->nh.raw + (iph->ihl * 4));
+	struct tcphdr *th = (struct tcphdr *)(skb_network_header(skb) +
+					      (iph->ihl * 4));
 	unsigned int doffset = (iph->ihl + th->doff) * 4;
 	unsigned int mtu = skb_shinfo(skb)->gso_size + doffset;
 	unsigned int offset = 0;
@@ -93,7 +94,7 @@ static void emulate_large_send_offload(struct sk_buff *skb)
 		skb_set_mac_header(nskb, -ETH_HLEN);
 		skb_reset_network_header(nskb);
 		iph = nskb->nh.iph;
-		memcpy(nskb->data, skb->nh.raw, doffset);
+		memcpy(nskb->data, skb_network_header(skb), doffset);
 		if (skb_copy_bits(skb,
 				  doffset + offset,
 				  nskb->data + doffset,
@@ -108,7 +109,7 @@ static void emulate_large_send_offload(struct sk_buff *skb)
 		memcpy(nskb->cb, skb->cb, sizeof(skb->cb));
 		nskb->pkt_type = skb->pkt_type;
 
-		th = (struct tcphdr*)(nskb->nh.raw + iph->ihl*4);
+		th = (struct tcphdr *)(skb_network_header(nskb) + iph->ihl * 4);
 		iph->tot_len = htons(frag_size + doffset);
 		iph->id = htons(id);
 		iph->check = 0;
diff --git a/drivers/net/pasemi_mac.c b/drivers/net/pasemi_mac.c
index 3f4213f3d5de..82218720bc3e 100644
--- a/drivers/net/pasemi_mac.c
+++ b/drivers/net/pasemi_mac.c
@@ -729,16 +729,18 @@ static int pasemi_mac_start_tx(struct sk_buff *skb, struct net_device *dev)
 	dflags = XCT_MACTX_O | XCT_MACTX_ST | XCT_MACTX_SS | XCT_MACTX_CRC_PAD;
 
 	if (skb->ip_summed == CHECKSUM_PARTIAL) {
+		const unsigned char *nh = skb_network_header(skb);
+
 		switch (skb->nh.iph->protocol) {
 		case IPPROTO_TCP:
 			dflags |= XCT_MACTX_CSUM_TCP;
 			dflags |= XCT_MACTX_IPH((skb->h.raw - skb->nh.raw) >> 2);
-			dflags |= XCT_MACTX_IPO(skb->nh.raw - skb->data);
+			dflags |= XCT_MACTX_IPO(nh - skb->data);
 			break;
 		case IPPROTO_UDP:
 			dflags |= XCT_MACTX_CSUM_UDP;
 			dflags |= XCT_MACTX_IPH((skb->h.raw - skb->nh.raw) >> 2);
-			dflags |= XCT_MACTX_IPO(skb->nh.raw - skb->data);
+			dflags |= XCT_MACTX_IPO(nh - skb->data);
 			break;
 		}
 	}
diff --git a/drivers/s390/net/qeth_main.c b/drivers/s390/net/qeth_main.c
index c0ee6d94ea38..0ff29e0628b5 100644
--- a/drivers/s390/net/qeth_main.c
+++ b/drivers/s390/net/qeth_main.c
@@ -3778,9 +3778,11 @@ qeth_get_cast_type(struct qeth_card *card, struct sk_buff *skb)
 	}
 	/* try something else */
 	if (skb->protocol == ETH_P_IPV6)
-		return (skb->nh.raw[24] == 0xff) ? RTN_MULTICAST : 0;
+		return (skb_network_header(skb)[24] == 0xff) ?
+				RTN_MULTICAST : 0;
 	else if (skb->protocol == ETH_P_IP)
-		return ((skb->nh.raw[16] & 0xf0) == 0xe0) ? RTN_MULTICAST : 0;
+		return ((skb_network_header(skb)[16] & 0xf0) == 0xe0) ?
+				RTN_MULTICAST : 0;
 	/* ... */
 	if (!memcmp(skb->data, skb->dev->broadcast, 6))
 		return RTN_BROADCAST;
diff --git a/include/linux/if_pppox.h b/include/linux/if_pppox.h
index 7044f8ab30a0..29d6579ff1a0 100644
--- a/include/linux/if_pppox.h
+++ b/include/linux/if_pppox.h
@@ -116,7 +116,7 @@ struct pppoe_hdr {
 
 static inline struct pppoe_hdr *pppoe_hdr(const struct sk_buff *skb)
 {
-	return (struct pppoe_hdr *)skb->nh.raw;
+	return (struct pppoe_hdr *)skb_network_header(skb);
 }
 
 struct pppoe_opt {
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 47cc8b07c2b4..76d30f34b986 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -960,6 +960,11 @@ static inline void skb_reserve(struct sk_buff *skb, int len)
 	skb->tail += len;
 }
 
+static inline unsigned char *skb_network_header(const struct sk_buff *skb)
+{
+	return skb->nh.raw;
+}
+
 static inline void skb_reset_network_header(struct sk_buff *skb)
 {
 	skb->nh.raw = skb->data;
diff --git a/include/net/cipso_ipv4.h b/include/net/cipso_ipv4.h
index 4c9522c5178f..4f90f5554fac 100644
--- a/include/net/cipso_ipv4.h
+++ b/include/net/cipso_ipv4.h
@@ -120,7 +120,7 @@ extern int cipso_v4_rbm_strictvalid;
  */
 
 #define CIPSO_V4_OPTEXIST(x) (IPCB(x)->opt.cipso != 0)
-#define CIPSO_V4_OPTPTR(x) ((x)->nh.raw + IPCB(x)->opt.cipso)
+#define CIPSO_V4_OPTPTR(x) (skb_network_header(x) + IPCB(x)->opt.cipso)
 
 /*
  * DOI List Functions
diff --git a/include/net/inet_ecn.h b/include/net/inet_ecn.h
index 10117c8503e8..b9ed3898e368 100644
--- a/include/net/inet_ecn.h
+++ b/include/net/inet_ecn.h
@@ -114,12 +114,14 @@ static inline int INET_ECN_set_ce(struct sk_buff *skb)
 {
 	switch (skb->protocol) {
 	case __constant_htons(ETH_P_IP):
-		if (skb->nh.raw + sizeof(struct iphdr) <= skb->tail)
+		if (skb_network_header(skb) + sizeof(struct iphdr) <=
+		    skb->tail)
 			return IP_ECN_set_ce(skb->nh.iph);
 		break;
 
 	case __constant_htons(ETH_P_IPV6):
-		if (skb->nh.raw + sizeof(struct ipv6hdr) <= skb->tail)
+		if (skb_network_header(skb) + sizeof(struct ipv6hdr) <=
+		    skb->tail)
 			return IP6_ECN_set_ce(skb->nh.ipv6h);
 		break;
 	}
diff --git a/include/net/llc_pdu.h b/include/net/llc_pdu.h
index 778f75a40b4d..4a8f58b17e43 100644
--- a/include/net/llc_pdu.h
+++ b/include/net/llc_pdu.h
@@ -203,7 +203,7 @@ struct llc_pdu_sn {
 
 static inline struct llc_pdu_sn *llc_pdu_sn_hdr(struct sk_buff *skb)
 {
-	return (struct llc_pdu_sn *)skb->nh.raw;
+	return (struct llc_pdu_sn *)skb_network_header(skb);
 }
 
 /* Un-numbered PDU format (3 bytes in length) */
@@ -215,7 +215,7 @@ struct llc_pdu_un {
 
 static inline struct llc_pdu_un *llc_pdu_un_hdr(struct sk_buff *skb)
 {
-	return (struct llc_pdu_un *)skb->nh.raw;
+	return (struct llc_pdu_un *)skb_network_header(skb);
 }
 
 /**
diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h
index 02647fe3d74b..8a6b0e7bded5 100644
--- a/include/net/pkt_cls.h
+++ b/include/net/pkt_cls.h
@@ -326,7 +326,7 @@ static inline unsigned char * tcf_get_base_ptr(struct sk_buff *skb, int layer)
 		case TCF_LAYER_LINK:
 			return skb->data;
 		case TCF_LAYER_NETWORK:
-			return skb->nh.raw;
+			return skb_network_header(skb);
 		case TCF_LAYER_TRANSPORT:
 			return skb->h.raw;
 	}
diff --git a/net/bridge/br_netfilter.c b/net/bridge/br_netfilter.c
index 1163c4f69899..8a56d8963025 100644
--- a/net/bridge/br_netfilter.c
+++ b/net/bridge/br_netfilter.c
@@ -374,7 +374,8 @@ static int check_hbh_len(struct sk_buff *skb)
 {
 	unsigned char *raw = (u8 *) (skb->nh.ipv6h + 1);
 	u32 pkt_len;
-	int off = raw - skb->nh.raw;
+	const unsigned char *nh = skb_network_header(skb);
+	int off = raw - nh;
 	int len = (raw[1] + 1) << 3;
 
 	if ((raw + len) - skb->data > skb_headlen(skb))
@@ -384,9 +385,9 @@ static int check_hbh_len(struct sk_buff *skb)
 	len -= 2;
 
 	while (len > 0) {
-		int optlen = skb->nh.raw[off + 1] + 2;
+		int optlen = nh[off + 1] + 2;
 
-		switch (skb->nh.raw[off]) {
+		switch (nh[off]) {
 		case IPV6_TLV_PAD0:
 			optlen = 1;
 			break;
@@ -395,9 +396,9 @@ static int check_hbh_len(struct sk_buff *skb)
 			break;
 
 		case IPV6_TLV_JUMBO:
-			if (skb->nh.raw[off + 1] != 4 || (off & 3) != 2)
+			if (nh[off + 1] != 4 || (off & 3) != 2)
 				goto bad;
-			pkt_len = ntohl(*(__be32 *) (skb->nh.raw + off + 2));
+			pkt_len = ntohl(*(__be32 *) (nh + off + 2));
 			if (pkt_len <= IPV6_MAXPLEN ||
 			    skb->nh.ipv6h->payload_len)
 				goto bad;
@@ -406,6 +407,7 @@ static int check_hbh_len(struct sk_buff *skb)
 			if (pskb_trim_rcsum(skb,
 					    pkt_len + sizeof(struct ipv6hdr)))
 				goto bad;
+			nh = skb_network_header(skb);
 			break;
 		default:
 			if (optlen > len)
diff --git a/net/core/dev.c b/net/core/dev.c
index 1b0758254ba0..54ffe9db9b02 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -1068,8 +1068,8 @@ static void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev)
 			 */
 			skb_reset_mac_header(skb2);
 
-			if (skb2->nh.raw < skb2->data ||
-			    skb2->nh.raw > skb2->tail) {
+			if (skb_network_header(skb2) < skb2->data ||
+			    skb_network_header(skb2) > skb2->tail) {
 				if (net_ratelimit())
 					printk(KERN_CRIT "protocol %04x is "
 					       "buggy, dev %s\n",
@@ -1207,7 +1207,7 @@ struct sk_buff *skb_gso_segment(struct sk_buff *skb, int features)
 	BUG_ON(skb_shinfo(skb)->frag_list);
 
 	skb_reset_mac_header(skb);
-	skb->mac_len = skb->nh.raw - skb->data;
+	skb->mac_len = skb->nh.raw - skb->mac.raw;
 	__skb_pull(skb, skb->mac_len);
 
 	if (unlikely(skb->ip_summed != CHECKSUM_PARTIAL)) {
@@ -1224,7 +1224,8 @@ struct sk_buff *skb_gso_segment(struct sk_buff *skb, int features)
 				segs = ERR_PTR(err);
 				if (err || skb_gso_ok(skb, features))
 					break;
-				__skb_push(skb, skb->data - skb->nh.raw);
+				__skb_push(skb, (skb->data -
+						 skb_network_header(skb)));
 			}
 			segs = ptype->gso_segment(skb, features);
 			break;
diff --git a/net/core/filter.c b/net/core/filter.c
index 1cc128d05422..d2358a5e6339 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -42,7 +42,7 @@ static void *__load_pointer(struct sk_buff *skb, int k)
 	u8 *ptr = NULL;
 
 	if (k >= SKF_NET_OFF)
-		ptr = skb->nh.raw + k - SKF_NET_OFF;
+		ptr = skb_network_header(skb) + k - SKF_NET_OFF;
 	else if (k >= SKF_LL_OFF)
 		ptr = skb_mac_header(skb) + k - SKF_LL_OFF;
 
diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c
index 7f51e8db3967..627d0c3c51cf 100644
--- a/net/dccp/ipv6.c
+++ b/net/dccp/ipv6.c
@@ -261,8 +261,8 @@ static int dccp_v6_send_response(struct sock *sk, struct request_sock *req,
 
 			if (rxopt->srcrt)
 				opt = ipv6_invert_rthdr(sk,
-					(struct ipv6_rt_hdr *)(pktopts->nh.raw +
-							       rxopt->srcrt));
+			  (struct ipv6_rt_hdr *)(skb_network_header(pktopts) +
+						 rxopt->srcrt));
 		}
 
 		if (opt != NULL && opt->srcrt != NULL) {
@@ -573,8 +573,8 @@ static struct sock *dccp_v6_request_recv_sock(struct sock *sk,
 
 		if (rxopt->srcrt)
 			opt = ipv6_invert_rthdr(sk,
-				(struct ipv6_rt_hdr *)(ireq6->pktopts->nh.raw +
-						       rxopt->srcrt));
+		   (struct ipv6_rt_hdr *)(skb_network_header(ireq6->pktopts) +
+					  rxopt->srcrt));
 	}
 
 	if (dst == NULL) {
diff --git a/net/decnet/dn_route.c b/net/decnet/dn_route.c
index 34079b7ba1d3..32a7db36c9e5 100644
--- a/net/decnet/dn_route.c
+++ b/net/decnet/dn_route.c
@@ -386,7 +386,7 @@ static int dn_return_short(struct sk_buff *skb)
 	__le16 tmp;
 
 	/* Add back headers */
-	skb_push(skb, skb->data - skb->nh.raw);
+	skb_push(skb, skb->data - skb_network_header(skb));
 
 	if ((skb = skb_unshare(skb, GFP_ATOMIC)) == NULL)
 		return NET_RX_DROP;
@@ -425,7 +425,7 @@ static int dn_return_long(struct sk_buff *skb)
 	unsigned char tmp[ETH_ALEN];
 
 	/* Add back all headers */
-	skb_push(skb, skb->data - skb->nh.raw);
+	skb_push(skb, skb->data - skb_network_header(skb));
 
 	if ((skb = skb_unshare(skb, GFP_ATOMIC)) == NULL)
 		return NET_RX_DROP;
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index 6e5575b0abef..ab552a6098f9 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -1184,7 +1184,7 @@ static struct sk_buff *inet_gso_segment(struct sk_buff *skb, int features)
 		iph->id = htons(id++);
 		iph->tot_len = htons(skb->len - skb->mac_len);
 		iph->check = 0;
-		iph->check = ip_fast_csum(skb->nh.raw, iph->ihl);
+		iph->check = ip_fast_csum(skb_network_header(skb), iph->ihl);
 	} while ((skb = skb->next));
 
 out:
diff --git a/net/ipv4/ah4.c b/net/ipv4/ah4.c
index 7194eb40b6d0..95ddbbd1552a 100644
--- a/net/ipv4/ah4.c
+++ b/net/ipv4/ah4.c
@@ -154,7 +154,7 @@ static int ah_input(struct xfrm_state *x, struct sk_buff *skb)
 	ah = (struct ip_auth_hdr*)skb->data;
 	iph = skb->nh.iph;
 
-	ihl = skb->data - skb->nh.raw;
+	ihl = skb->data - skb_network_header(skb);
 	memcpy(work_buf, iph, ihl);
 
 	iph->ttl = 0;
@@ -181,7 +181,8 @@ static int ah_input(struct xfrm_state *x, struct sk_buff *skb)
 		}
 	}
 	((struct iphdr*)work_buf)->protocol = ah->nexthdr;
-	skb->h.raw = memcpy(skb->nh.raw += ah_hlen, work_buf, ihl);
+	skb->nh.raw += ah_hlen;
+	skb->h.raw = memcpy(skb_network_header(skb), work_buf, ihl);
 	__skb_pull(skb, ah_hlen + ihl);
 
 	return 0;
diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c
index 31041127eeb8..222d21e5bbeb 100644
--- a/net/ipv4/esp4.c
+++ b/net/ipv4/esp4.c
@@ -57,9 +57,10 @@ static int esp_output(struct xfrm_state *x, struct sk_buff *skb)
 	*(u8*)(trailer->tail + clen-skb->len - 2) = (clen - skb->len)-2;
 	pskb_put(skb, trailer, clen - skb->len);
 
-	__skb_push(skb, skb->data - skb->nh.raw);
+	__skb_push(skb, skb->data - skb_network_header(skb));
 	top_iph = skb->nh.iph;
-	esph = (struct ip_esp_hdr *)(skb->nh.raw + top_iph->ihl*4);
+	esph = (struct ip_esp_hdr *)(skb_network_header(skb) +
+				     top_iph->ihl * 4);
 	top_iph->tot_len = htons(skb->len + alen);
 	*(u8*)(trailer->tail - 1) = top_iph->protocol;
 
@@ -222,7 +223,7 @@ static int esp_input(struct xfrm_state *x, struct sk_buff *skb)
 
 	if (x->encap) {
 		struct xfrm_encap_tmpl *encap = x->encap;
-		struct udphdr *uh = (void *)(skb->nh.raw + ihl);
+		struct udphdr *uh = (void *)(skb_network_header(skb) + ihl);
 
 		/*
 		 * 1) if the NAT-T peer's IP or port changed then
diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c
index 4b7a0d946a0d..ff124d40c585 100644
--- a/net/ipv4/icmp.c
+++ b/net/ipv4/icmp.c
@@ -484,7 +484,7 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info)
 			u8 _inner_type, *itp;
 
 			itp = skb_header_pointer(skb_in,
-						 skb_in->nh.raw +
+						 skb_network_header(skb_in) +
 						 (iph->ihl << 2) +
 						 offsetof(struct icmphdr,
 							  type) -
@@ -536,7 +536,7 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info)
 	icmp_param.data.icmph.un.gateway = info;
 	icmp_param.data.icmph.checksum	 = 0;
 	icmp_param.skb	  = skb_in;
-	icmp_param.offset = skb_in->nh.raw - skb_in->data;
+	icmp_param.offset = skb_network_offset(skb_in);
 	icmp_out_count(icmp_param.data.icmph.type);
 	inet_sk(icmp_socket->sk)->tos = tos;
 	ipc.addr = iph->saddr;
diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c
index 3dfd7581cfc6..268a6c7347f2 100644
--- a/net/ipv4/ip_fragment.c
+++ b/net/ipv4/ip_fragment.c
@@ -658,7 +658,7 @@ static struct sk_buff *ip_frag_reasm(struct ipq *qp, struct net_device *dev)
 	}
 
 	skb_shinfo(head)->frag_list = head->next;
-	skb_push(head, head->data - head->nh.raw);
+	skb_push(head, head->data - skb_network_header(head));
 	atomic_sub(head->truesize, &ip_frag_mem);
 
 	for (fp=head->next; fp; fp = fp->next) {
diff --git a/net/ipv4/ip_options.c b/net/ipv4/ip_options.c
index f906a80d5a87..f7e9db612565 100644
--- a/net/ipv4/ip_options.c
+++ b/net/ipv4/ip_options.c
@@ -40,7 +40,7 @@
 void ip_options_build(struct sk_buff * skb, struct ip_options * opt,
 			    __be32 daddr, struct rtable *rt, int is_frag)
 {
-	unsigned char * iph = skb->nh.raw;
+	unsigned char *iph = skb_network_header(skb);
 
 	memcpy(&(IPCB(skb)->opt), opt, sizeof(struct ip_options));
 	memcpy(iph+sizeof(struct iphdr), opt->__data, opt->optlen);
@@ -104,7 +104,7 @@ int ip_options_echo(struct ip_options * dopt, struct sk_buff * skb)
 		return 0;
 	}
 
-	sptr = skb->nh.raw;
+	sptr = skb_network_header(skb);
 	dptr = dopt->__data;
 
 	if (skb->dst)
@@ -217,7 +217,7 @@ int ip_options_echo(struct ip_options * dopt, struct sk_buff * skb)
 
 void ip_options_fragment(struct sk_buff * skb)
 {
-	unsigned char * optptr = skb->nh.raw + sizeof(struct iphdr);
+	unsigned char *optptr = skb_network_header(skb) + sizeof(struct iphdr);
 	struct ip_options * opt = &(IPCB(skb)->opt);
 	int  l = opt->optlen;
 	int  optlen;
@@ -264,7 +264,7 @@ int ip_options_compile(struct ip_options * opt, struct sk_buff * skb)
 
 	if (!opt) {
 		opt = &(IPCB(skb)->opt);
-		iph = skb->nh.raw;
+		iph = skb_network_header(skb);
 		opt->optlen = ((struct iphdr *)iph)->ihl*4 - sizeof(struct iphdr);
 		optptr = iph + sizeof(struct iphdr);
 		opt->is_data = 0;
@@ -563,7 +563,7 @@ void ip_forward_options(struct sk_buff *skb)
 	struct   ip_options * opt	= &(IPCB(skb)->opt);
 	unsigned char * optptr;
 	struct rtable *rt = (struct rtable*)skb->dst;
-	unsigned char *raw = skb->nh.raw;
+	unsigned char *raw = skb_network_header(skb);
 
 	if (opt->rr_needaddr) {
 		optptr = (unsigned char *)raw + opt->rr;
@@ -609,7 +609,7 @@ int ip_options_rcv_srr(struct sk_buff *skb)
 	int srrspace, srrptr;
 	__be32 nexthop;
 	struct iphdr *iph = skb->nh.iph;
-	unsigned char * optptr = skb->nh.raw + opt->srr;
+	unsigned char *optptr = skb_network_header(skb) + opt->srr;
 	struct rtable *rt = (struct rtable*)skb->dst;
 	struct rtable *rt2;
 	int err;
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index 669f5d97c6eb..eae228469627 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -503,7 +503,7 @@ int ip_fragment(struct sk_buff *skb, int (*output)(struct sk_buff*))
 				frag->h.raw = frag->data;
 				__skb_push(frag, hlen);
 				skb_reset_network_header(frag);
-				memcpy(frag->nh.raw, iph, hlen);
+				memcpy(skb_network_header(frag), iph, hlen);
 				iph = frag->nh.iph;
 				iph->tot_len = htons(frag->len);
 				ip_copy_metadata(frag, skb);
@@ -607,7 +607,7 @@ slow_path:
 		 *	Copy the packet header into the new buffer.
 		 */
 
-		memcpy(skb2->nh.raw, skb->data, hlen);
+		memcpy(skb_network_header(skb2), skb->data, hlen);
 
 		/*
 		 *	Copy a block of the IP datagram.
@@ -1198,7 +1198,7 @@ int ip_push_pending_frames(struct sock *sk)
 	tail_skb = &(skb_shinfo(skb)->frag_list);
 
 	/* move skb->data to ip header from ext header */
-	if (skb->data < skb->nh.raw)
+	if (skb->data < skb_network_header(skb))
 		__skb_pull(skb, skb_network_offset(skb));
 	while ((tmp_skb = __skb_dequeue(&sk->sk_write_queue)) != NULL) {
 		__skb_pull(tmp_skb, skb->h.raw - skb->nh.raw);
diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c
index f8ab654b6a35..70888e1ef6b7 100644
--- a/net/ipv4/ip_sockglue.c
+++ b/net/ipv4/ip_sockglue.c
@@ -273,7 +273,8 @@ void ip_icmp_error(struct sock *sk, struct sk_buff *skb, int err,
 	serr->ee.ee_pad = 0;
 	serr->ee.ee_info = info;
 	serr->ee.ee_data = 0;
-	serr->addr_offset = (u8*)&(((struct iphdr*)(skb->h.icmph+1))->daddr) - skb->nh.raw;
+	serr->addr_offset = (u8 *)&(((struct iphdr *)(skb->h.icmph + 1))->daddr) -
+				   skb_network_header(skb);
 	serr->port = port;
 
 	skb->h.raw = payload;
@@ -309,7 +310,7 @@ void ip_local_error(struct sock *sk, int err, __be32 daddr, __be16 port, u32 inf
 	serr->ee.ee_pad = 0;
 	serr->ee.ee_info = info;
 	serr->ee.ee_data = 0;
-	serr->addr_offset = (u8*)&iph->daddr - skb->nh.raw;
+	serr->addr_offset = (u8 *)&iph->daddr - skb_network_header(skb);
 	serr->port = port;
 
 	skb->h.raw = skb->tail;
@@ -355,7 +356,8 @@ int ip_recv_error(struct sock *sk, struct msghdr *msg, int len)
 	sin = (struct sockaddr_in *)msg->msg_name;
 	if (sin) {
 		sin->sin_family = AF_INET;
-		sin->sin_addr.s_addr = *(__be32*)(skb->nh.raw + serr->addr_offset);
+		sin->sin_addr.s_addr = *(__be32 *)(skb_network_header(skb) +
+						   serr->addr_offset);
 		sin->sin_port = serr->port;
 		memset(&sin->sin_zero, 0, sizeof(sin->sin_zero));
 	}
diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index 51528d3cc334..4a8d99bca441 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -563,7 +563,7 @@ static int ipmr_cache_report(struct sk_buff *pkt, vifi_t vifi, int assert)
 		 */
 		msg = (struct igmpmsg*)skb_push(skb, sizeof(struct iphdr));
 		skb->nh.raw = skb->h.raw = (u8*)msg;
-		memcpy(msg, pkt->nh.raw, sizeof(struct iphdr));
+		memcpy(msg, skb_network_header(pkt), sizeof(struct iphdr));
 		msg->im_msgtype = IGMPMSG_WHOLEPKT;
 		msg->im_mbz = 0;
 		msg->im_vif = reg_vif_num;
diff --git a/net/ipv4/ipvs/ip_vs_app.c b/net/ipv4/ipvs/ip_vs_app.c
index 22e104c6a493..f29d3a27eec6 100644
--- a/net/ipv4/ipvs/ip_vs_app.c
+++ b/net/ipv4/ipvs/ip_vs_app.c
@@ -338,7 +338,7 @@ static inline int app_tcp_pkt_out(struct ip_vs_conn *cp, struct sk_buff **pskb,
 	if (!ip_vs_make_skb_writable(pskb, tcp_offset + sizeof(*th)))
 		return 0;
 
-	th = (struct tcphdr *)((*pskb)->nh.raw + tcp_offset);
+	th = (struct tcphdr *)(skb_network_header(*pskb) + tcp_offset);
 
 	/*
 	 *	Remember seq number in case this pkt gets resized
@@ -413,7 +413,7 @@ static inline int app_tcp_pkt_in(struct ip_vs_conn *cp, struct sk_buff **pskb,
 	if (!ip_vs_make_skb_writable(pskb, tcp_offset + sizeof(*th)))
 		return 0;
 
-	th = (struct tcphdr *)((*pskb)->nh.raw + tcp_offset);
+	th = (struct tcphdr *)(skb_network_header(*pskb) + tcp_offset);
 
 	/*
 	 *	Remember seq number in case this pkt gets resized
diff --git a/net/ipv4/ipvs/ip_vs_core.c b/net/ipv4/ipvs/ip_vs_core.c
index 858686d616a2..5d54dd2ce12f 100644
--- a/net/ipv4/ipvs/ip_vs_core.c
+++ b/net/ipv4/ipvs/ip_vs_core.c
@@ -559,7 +559,8 @@ void ip_vs_nat_icmp(struct sk_buff *skb, struct ip_vs_protocol *pp,
 {
 	struct iphdr *iph	 = skb->nh.iph;
 	unsigned int icmp_offset = iph->ihl*4;
-	struct icmphdr *icmph	 = (struct icmphdr *)(skb->nh.raw + icmp_offset);
+	struct icmphdr *icmph	 = (struct icmphdr *)(skb_network_header(skb) +
+						      icmp_offset);
 	struct iphdr *ciph	 = (struct iphdr *)(icmph + 1);
 
 	if (inout) {
diff --git a/net/ipv4/netfilter/arpt_mangle.c b/net/ipv4/netfilter/arpt_mangle.c
index 709db4d3f48f..af1c8593eb19 100644
--- a/net/ipv4/netfilter/arpt_mangle.c
+++ b/net/ipv4/netfilter/arpt_mangle.c
@@ -31,7 +31,7 @@ target(struct sk_buff **pskb,
 	}
 
 	arp = (*pskb)->nh.arph;
-	arpptr = (*pskb)->nh.raw + sizeof(*arp);
+	arpptr = skb_network_header(*pskb) + sizeof(*arp);
 	pln = arp->ar_pln;
 	hln = arp->ar_hln;
 	/* We assume that pln and hln were checked in the match */
diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
index 8f3e92d20df8..7cebbff0b0c3 100644
--- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
+++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
@@ -105,7 +105,7 @@ ipv4_prepare(struct sk_buff **pskb, unsigned int hooknum, unsigned int *dataoff,
 		return -NF_DROP;
 	}
 
-	*dataoff = (*pskb)->nh.raw - (*pskb)->data + (*pskb)->nh.iph->ihl*4;
+	*dataoff = skb_network_offset(*pskb) + (*pskb)->nh.iph->ihl * 4;
 	*protonum = (*pskb)->nh.iph->protocol;
 
 	return NF_ACCEPT;
@@ -151,10 +151,9 @@ static unsigned int ipv4_conntrack_help(unsigned int hooknum,
 	if (!help || !help->helper)
 		return NF_ACCEPT;
 
-	return help->helper->help(pskb,
-			       (*pskb)->nh.raw - (*pskb)->data
-					       + (*pskb)->nh.iph->ihl*4,
-			       ct, ctinfo);
+	return help->helper->help(pskb, (skb_network_offset(*pskb) +
+					 (*pskb)->nh.iph->ihl * 4),
+				  ct, ctinfo);
 }
 
 static unsigned int ipv4_conntrack_defrag(unsigned int hooknum,
diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c
index 67e5e3c035c1..a3d02fdfc066 100644
--- a/net/ipv4/raw.c
+++ b/net/ipv4/raw.c
@@ -256,7 +256,7 @@ int raw_rcv(struct sock *sk, struct sk_buff *skb)
 	}
 	nf_reset(skb);
 
-	skb_push(skb, skb->data - skb->nh.raw);
+	skb_push(skb, skb->data - skb_network_header(skb));
 
 	raw_rcv_skb(sk, skb);
 	return 0;
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index f5e019cefc15..00190835cea1 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -3634,7 +3634,7 @@ tcp_collapse(struct sock *sk, struct sk_buff_head *list,
 			return;
 
 		skb_set_mac_header(nskb, skb_mac_header(skb) - skb->head);
-		nskb->nh.raw = nskb->data + (skb->nh.raw - skb->head);
+		nskb->nh.raw = nskb->data + (skb_network_header(skb) - skb->head);
 		nskb->h.raw = nskb->data + (skb->h.raw - skb->head);
 
 		skb_reserve(nskb, header);
diff --git a/net/ipv4/xfrm4_input.c b/net/ipv4/xfrm4_input.c
index 78e80deb7e89..d89969c502dd 100644
--- a/net/ipv4/xfrm4_input.c
+++ b/net/ipv4/xfrm4_input.c
@@ -146,7 +146,7 @@ int xfrm4_rcv_encap(struct sk_buff *skb, __u16 encap_type)
 		return 0;
 	} else {
 #ifdef CONFIG_NETFILTER
-		__skb_push(skb, skb->data - skb->nh.raw);
+		__skb_push(skb, skb->data - skb_network_header(skb));
 		skb->nh.iph->tot_len = htons(skb->len);
 		ip_send_check(skb->nh.iph);
 
diff --git a/net/ipv4/xfrm4_mode_beet.c b/net/ipv4/xfrm4_mode_beet.c
index eaaf3565f3b2..505fca034a1f 100644
--- a/net/ipv4/xfrm4_mode_beet.c
+++ b/net/ipv4/xfrm4_mode_beet.c
@@ -98,7 +98,7 @@ static int xfrm4_beet_input(struct xfrm_state *x, struct sk_buff *skb)
 	}
 
 	skb->nh.raw = skb->data + (phlen - sizeof(*iph));
-	memmove(skb->nh.raw, iph, sizeof(*iph));
+	memmove(skb_network_header(skb), iph, sizeof(*iph));
 	skb->h.raw = skb->data + (phlen + optlen);
 	skb->data = skb->h.raw;
 
@@ -112,7 +112,7 @@ static int xfrm4_beet_input(struct xfrm_state *x, struct sk_buff *skb)
 	else
 		iph->protocol = protocol;
 	iph->check = 0;
-	iph->check = ip_fast_csum(skb->nh.raw, iph->ihl);
+	iph->check = ip_fast_csum(skb_network_header(skb), iph->ihl);
 	err = 0;
 out:
 	return err;
diff --git a/net/ipv4/xfrm4_mode_transport.c b/net/ipv4/xfrm4_mode_transport.c
index a820dde2c862..b198087c073e 100644
--- a/net/ipv4/xfrm4_mode_transport.c
+++ b/net/ipv4/xfrm4_mode_transport.c
@@ -34,7 +34,7 @@ static int xfrm4_transport_output(struct xfrm_state *x, struct sk_buff *skb)
 
 	skb_push(skb, x->props.header_len);
 	skb_reset_network_header(skb);
-	memmove(skb->nh.raw, iph, ihl);
+	memmove(skb_network_header(skb), iph, ihl);
 	return 0;
 }
 
@@ -51,7 +51,7 @@ static int xfrm4_transport_input(struct xfrm_state *x, struct sk_buff *skb)
 	int ihl = skb->data - skb->h.raw;
 
 	if (skb->h.raw != skb->nh.raw) {
-		memmove(skb->h.raw, skb->nh.raw, ihl);
+		memmove(skb->h.raw, skb_network_header(skb), ihl);
 		skb->nh.raw = skb->h.raw;
 	}
 	skb->nh.iph->tot_len = htons(skb->len + ihl);
diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c
index 98a833ce1114..fbb1d3decf02 100644
--- a/net/ipv4/xfrm4_policy.c
+++ b/net/ipv4/xfrm4_policy.c
@@ -210,7 +210,7 @@ static void
 _decode_session4(struct sk_buff *skb, struct flowi *fl)
 {
 	struct iphdr *iph = skb->nh.iph;
-	u8 *xprth = skb->nh.raw + iph->ihl*4;
+	u8 *xprth = skb_network_header(skb) + iph->ihl * 4;
 
 	memset(fl, 0, sizeof(struct flowi));
 	if (!(iph->frag_off & htons(IP_MF | IP_OFFSET))) {
diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index 2ff070417955..7b917f856e1c 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -693,7 +693,8 @@ int ipv6_opt_accepted(struct sock *sk, struct sk_buff *skb)
 	if (np->rxopt.all) {
 		if ((opt->hop && (np->rxopt.bits.hopopts ||
 				  np->rxopt.bits.ohopopts)) ||
-		    ((IPV6_FLOWINFO_MASK & *(__be32*)skb->nh.raw) &&
+		    ((IPV6_FLOWINFO_MASK &
+		      *(__be32 *)skb_network_header(skb)) &&
 		     np->rxopt.bits.rxflow) ||
 		    (opt->srcrt && (np->rxopt.bits.srcrt ||
 		     np->rxopt.bits.osrcrt)) ||
diff --git a/net/ipv6/ah6.c b/net/ipv6/ah6.c
index dc68b7269c3c..1c914386982f 100644
--- a/net/ipv6/ah6.c
+++ b/net/ipv6/ah6.c
@@ -238,8 +238,8 @@ static int ah6_output(struct xfrm_state *x, struct sk_buff *skb)
 	top_iph = (struct ipv6hdr *)skb->data;
 	top_iph->payload_len = htons(skb->len - sizeof(*top_iph));
 
-	nexthdr = *skb->nh.raw;
-	*skb->nh.raw = IPPROTO_AH;
+	nexthdr = *skb_network_header(skb);
+	*skb_network_header(skb) = IPPROTO_AH;
 
 	/* When there are no extension headers, we only need to save the first
 	 * 8 bytes of the base IP header.
@@ -341,7 +341,7 @@ static int ah6_input(struct xfrm_state *x, struct sk_buff *skb)
 	    pskb_expand_head(skb, 0, 0, GFP_ATOMIC))
 		goto out;
 
-	hdr_len = skb->data - skb->nh.raw;
+	hdr_len = skb->data - skb_network_header(skb);
 	ah = (struct ipv6_auth_hdr*)skb->data;
 	ahp = x->data;
 	nexthdr = ah->nexthdr;
@@ -354,7 +354,7 @@ static int ah6_input(struct xfrm_state *x, struct sk_buff *skb)
 	if (!pskb_may_pull(skb, ah_hlen))
 		goto out;
 
-	tmp_hdr = kmemdup(skb->nh.raw, hdr_len, GFP_ATOMIC);
+	tmp_hdr = kmemdup(skb_network_header(skb), hdr_len, GFP_ATOMIC);
 	if (!tmp_hdr)
 		goto out;
 	if (ipv6_clear_mutable_options(skb->nh.ipv6h, hdr_len, XFRM_POLICY_IN))
@@ -382,7 +382,9 @@ static int ah6_input(struct xfrm_state *x, struct sk_buff *skb)
 		}
 	}
 
-	skb->h.raw = memcpy(skb->nh.raw += ah_hlen, tmp_hdr, hdr_len);
+	skb->nh.raw += ah_hlen;
+	memcpy(skb_network_header(skb), tmp_hdr, hdr_len);
+	skb->h.raw = skb->nh.raw;
 	__skb_pull(skb, ah_hlen + hdr_len);
 
 	kfree(tmp_hdr);
diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c
index 31a20f17c854..7a86db6163ee 100644
--- a/net/ipv6/datagram.c
+++ b/net/ipv6/datagram.c
@@ -227,7 +227,8 @@ void ipv6_icmp_error(struct sock *sk, struct sk_buff *skb, int err,
 	serr->ee.ee_pad = 0;
 	serr->ee.ee_info = info;
 	serr->ee.ee_data = 0;
-	serr->addr_offset = (u8*)&(((struct ipv6hdr*)(icmph+1))->daddr) - skb->nh.raw;
+	serr->addr_offset = (u8 *)&(((struct ipv6hdr *)(icmph + 1))->daddr) -
+				  skb_network_header(skb);
 	serr->port = port;
 
 	skb->h.raw = payload;
@@ -264,7 +265,7 @@ void ipv6_local_error(struct sock *sk, int err, struct flowi *fl, u32 info)
 	serr->ee.ee_pad = 0;
 	serr->ee.ee_info = info;
 	serr->ee.ee_data = 0;
-	serr->addr_offset = (u8*)&iph->daddr - skb->nh.raw;
+	serr->addr_offset = (u8 *)&iph->daddr - skb_network_header(skb);
 	serr->port = fl->fl_ip_dport;
 
 	skb->h.raw = skb->tail;
@@ -310,21 +311,24 @@ int ipv6_recv_error(struct sock *sk, struct msghdr *msg, int len)
 
 	sin = (struct sockaddr_in6 *)msg->msg_name;
 	if (sin) {
+		const unsigned char *nh = skb_network_header(skb);
 		sin->sin6_family = AF_INET6;
 		sin->sin6_flowinfo = 0;
 		sin->sin6_port = serr->port;
 		sin->sin6_scope_id = 0;
 		if (serr->ee.ee_origin == SO_EE_ORIGIN_ICMP6) {
 			ipv6_addr_copy(&sin->sin6_addr,
-			  (struct in6_addr *)(skb->nh.raw + serr->addr_offset));
+				  (struct in6_addr *)(nh + serr->addr_offset));
 			if (np->sndflow)
-				sin->sin6_flowinfo = *(__be32*)(skb->nh.raw + serr->addr_offset - 24) & IPV6_FLOWINFO_MASK;
+				sin->sin6_flowinfo =
+					(*(__be32 *)(nh + serr->addr_offset - 24) &
+					 IPV6_FLOWINFO_MASK);
 			if (ipv6_addr_type(&sin->sin6_addr) & IPV6_ADDR_LINKLOCAL)
 				sin->sin6_scope_id = IP6CB(skb)->iif;
 		} else {
 			ipv6_addr_set(&sin->sin6_addr, 0, 0,
 				      htonl(0xffff),
-				      *(__be32*)(skb->nh.raw + serr->addr_offset));
+				      *(__be32 *)(nh + serr->addr_offset));
 		}
 	}
 
@@ -382,6 +386,7 @@ int datagram_recv_ctl(struct sock *sk, struct msghdr *msg, struct sk_buff *skb)
 {
 	struct ipv6_pinfo *np = inet6_sk(sk);
 	struct inet6_skb_parm *opt = IP6CB(skb);
+	unsigned char *nh = skb_network_header(skb);
 
 	if (np->rxopt.bits.rxinfo) {
 		struct in6_pktinfo src_info;
@@ -401,14 +406,14 @@ int datagram_recv_ctl(struct sock *sk, struct msghdr *msg, struct sk_buff *skb)
 		put_cmsg(msg, SOL_IPV6, IPV6_TCLASS, sizeof(tclass), &tclass);
 	}
 
-	if (np->rxopt.bits.rxflow && (*(__be32*)skb->nh.raw & IPV6_FLOWINFO_MASK)) {
-		__be32 flowinfo = *(__be32*)skb->nh.raw & IPV6_FLOWINFO_MASK;
+	if (np->rxopt.bits.rxflow && (*(__be32 *)nh & IPV6_FLOWINFO_MASK)) {
+		__be32 flowinfo = *(__be32 *)nh & IPV6_FLOWINFO_MASK;
 		put_cmsg(msg, SOL_IPV6, IPV6_FLOWINFO, sizeof(flowinfo), &flowinfo);
 	}
 
 	/* HbH is allowed only once */
 	if (np->rxopt.bits.hopopts && opt->hop) {
-		u8 *ptr = skb->nh.raw + opt->hop;
+		u8 *ptr = nh + opt->hop;
 		put_cmsg(msg, SOL_IPV6, IPV6_HOPOPTS, (ptr[1]+1)<<3, ptr);
 	}
 
@@ -428,7 +433,7 @@ int datagram_recv_ctl(struct sock *sk, struct msghdr *msg, struct sk_buff *skb)
 
 		while (off <= opt->lastopt) {
 			unsigned len;
-			u8 *ptr = skb->nh.raw + off;
+			u8 *ptr = nh + off;
 
 			switch(nexthdr) {
 			case IPPROTO_DSTOPTS:
@@ -470,19 +475,19 @@ int datagram_recv_ctl(struct sock *sk, struct msghdr *msg, struct sk_buff *skb)
 		put_cmsg(msg, SOL_IPV6, IPV6_2292HOPLIMIT, sizeof(hlim), &hlim);
 	}
 	if (np->rxopt.bits.ohopopts && opt->hop) {
-		u8 *ptr = skb->nh.raw + opt->hop;
+		u8 *ptr = nh + opt->hop;
 		put_cmsg(msg, SOL_IPV6, IPV6_2292HOPOPTS, (ptr[1]+1)<<3, ptr);
 	}
 	if (np->rxopt.bits.odstopts && opt->dst0) {
-		u8 *ptr = skb->nh.raw + opt->dst0;
+		u8 *ptr = nh + opt->dst0;
 		put_cmsg(msg, SOL_IPV6, IPV6_2292DSTOPTS, (ptr[1]+1)<<3, ptr);
 	}
 	if (np->rxopt.bits.osrcrt && opt->srcrt) {
-		struct ipv6_rt_hdr *rthdr = (struct ipv6_rt_hdr *)(skb->nh.raw + opt->srcrt);
+		struct ipv6_rt_hdr *rthdr = (struct ipv6_rt_hdr *)(nh + opt->srcrt);
 		put_cmsg(msg, SOL_IPV6, IPV6_2292RTHDR, (rthdr->hdrlen+1) << 3, rthdr);
 	}
 	if (np->rxopt.bits.odstopts && opt->dst1) {
-		u8 *ptr = skb->nh.raw + opt->dst1;
+		u8 *ptr = nh + opt->dst1;
 		put_cmsg(msg, SOL_IPV6, IPV6_2292DSTOPTS, (ptr[1]+1)<<3, ptr);
 	}
 	return 0;
diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c
index 363e63ffecca..6e6b57ac8013 100644
--- a/net/ipv6/esp6.c
+++ b/net/ipv6/esp6.c
@@ -92,8 +92,8 @@ static int esp6_output(struct xfrm_state *x, struct sk_buff *skb)
 	top_iph = (struct ipv6hdr *)__skb_push(skb, hdr_len);
 	esph = (struct ipv6_esp_hdr *)skb->h.raw;
 	top_iph->payload_len = htons(skb->len + alen - sizeof(*top_iph));
-	*(u8*)(trailer->tail - 1) = *skb->nh.raw;
-	*skb->nh.raw = IPPROTO_ESP;
+	*(u8 *)(trailer->tail - 1) = *skb_network_header(skb);
+	*skb_network_header(skb) = IPPROTO_ESP;
 
 	esph->spi = x->id.spi;
 	esph->seq_no = htonl(++x->replay.oseq);
diff --git a/net/ipv6/exthdrs.c b/net/ipv6/exthdrs.c
index fce5abde554f..9ebf120ba6d3 100644
--- a/net/ipv6/exthdrs.c
+++ b/net/ipv6/exthdrs.c
@@ -50,13 +50,14 @@
 
 int ipv6_find_tlv(struct sk_buff *skb, int offset, int type)
 {
-	int packet_len = skb->tail - skb->nh.raw;
+	const unsigned char *nh = skb_network_header(skb);
+	int packet_len = skb->tail - nh;
 	struct ipv6_opt_hdr *hdr;
 	int len;
 
 	if (offset + 2 > packet_len)
 		goto bad;
-	hdr = (struct ipv6_opt_hdr*)(skb->nh.raw + offset);
+	hdr = (struct ipv6_opt_hdr *)(nh + offset);
 	len = ((hdr->hdrlen + 1) << 3);
 
 	if (offset + len > packet_len)
@@ -66,7 +67,7 @@ int ipv6_find_tlv(struct sk_buff *skb, int offset, int type)
 	len -= 2;
 
 	while (len > 0) {
-		int opttype = skb->nh.raw[offset];
+		int opttype = nh[offset];
 		int optlen;
 
 		if (opttype == type)
@@ -77,7 +78,7 @@ int ipv6_find_tlv(struct sk_buff *skb, int offset, int type)
 			optlen = 1;
 			break;
 		default:
-			optlen = skb->nh.raw[offset + 1] + 2;
+			optlen = nh[offset + 1] + 2;
 			if (optlen > len)
 				goto bad;
 			break;
@@ -113,7 +114,7 @@ static int ip6_tlvopt_unknown(struct sk_buff **skbp, int optoff)
 {
 	struct sk_buff *skb = *skbp;
 
-	switch ((skb->nh.raw[optoff] & 0xC0) >> 6) {
+	switch ((skb_network_header(skb)[optoff] & 0xC0) >> 6) {
 	case 0: /* ignore */
 		return 1;
 
@@ -141,6 +142,7 @@ static int ip6_parse_tlv(struct tlvtype_proc *procs, struct sk_buff **skbp)
 {
 	struct sk_buff *skb = *skbp;
 	struct tlvtype_proc *curr;
+	const unsigned char *nh = skb_network_header(skb);
 	int off = skb->h.raw - skb->nh.raw;
 	int len = ((skb->h.raw[1]+1)<<3);
 
@@ -151,9 +153,9 @@ static int ip6_parse_tlv(struct tlvtype_proc *procs, struct sk_buff **skbp)
 	len -= 2;
 
 	while (len > 0) {
-		int optlen = skb->nh.raw[off+1]+2;
+		int optlen = nh[off + 1] + 2;
 
-		switch (skb->nh.raw[off]) {
+		switch (nh[off]) {
 		case IPV6_TLV_PAD0:
 			optlen = 1;
 			break;
@@ -165,7 +167,7 @@ static int ip6_parse_tlv(struct tlvtype_proc *procs, struct sk_buff **skbp)
 			if (optlen > len)
 				goto bad;
 			for (curr=procs; curr->type >= 0; curr++) {
-				if (curr->type == skb->nh.raw[off]) {
+				if (curr->type == nh[off]) {
 					/* type specific length/alignment
 					   checks will be performed in the
 					   func(). */
@@ -211,7 +213,7 @@ static int ipv6_dest_hao(struct sk_buff **skbp, int optoff)
 	opt->dsthao = opt->dst1;
 	opt->dst1 = 0;
 
-	hao = (struct ipv6_destopt_hao *)(skb->nh.raw + optoff);
+	hao = (struct ipv6_destopt_hao *)(skb_network_header(skb) + optoff);
 
 	if (hao->length != 16) {
 		LIMIT_NETDEBUG(
@@ -244,8 +246,9 @@ static int ipv6_dest_hao(struct sk_buff **skbp, int optoff)
 
 		/* update all variable using below by copied skbuff */
 		*skbp = skb = skb2;
-		hao = (struct ipv6_destopt_hao *)(skb2->nh.raw + optoff);
-		ipv6h = (struct ipv6hdr *)skb2->nh.raw;
+		hao = (struct ipv6_destopt_hao *)(skb_network_header(skb2) +
+						  optoff);
+		ipv6h = skb2->nh.ipv6h;
 	}
 
 	if (skb->ip_summed == CHECKSUM_COMPLETE)
@@ -406,7 +409,8 @@ static int ipv6_rthdr_rcv(struct sk_buff **skbp)
 	default:
 		IP6_INC_STATS_BH(ip6_dst_idev(skb->dst),
 				 IPSTATS_MIB_INHDRERRORS);
-		icmpv6_param_prob(skb, ICMPV6_HDR_FIELD, (&hdr->type) - skb->nh.raw);
+		icmpv6_param_prob(skb, ICMPV6_HDR_FIELD,
+				  (&hdr->type) - skb_network_header(skb));
 		return -1;
 	}
 
@@ -443,7 +447,7 @@ looped_back:
 		skb->h.raw += (hdr->hdrlen + 1) << 3;
 		opt->dst0 = opt->dst1;
 		opt->dst1 = 0;
-		opt->nhoff = (&hdr->nexthdr) - skb->nh.raw;
+		opt->nhoff = (&hdr->nexthdr) - skb_network_header(skb);
 		return 1;
 	}
 
@@ -452,7 +456,9 @@ looped_back:
 		if (hdr->hdrlen & 0x01) {
 			IP6_INC_STATS_BH(ip6_dst_idev(skb->dst),
 					 IPSTATS_MIB_INHDRERRORS);
-			icmpv6_param_prob(skb, ICMPV6_HDR_FIELD, (&hdr->hdrlen) - skb->nh.raw);
+			icmpv6_param_prob(skb, ICMPV6_HDR_FIELD,
+					  ((&hdr->hdrlen) -
+					   skb_network_header(skb)));
 			return -1;
 		}
 		break;
@@ -479,7 +485,9 @@ looped_back:
 	if (hdr->segments_left > n) {
 		IP6_INC_STATS_BH(ip6_dst_idev(skb->dst),
 				 IPSTATS_MIB_INHDRERRORS);
-		icmpv6_param_prob(skb, ICMPV6_HDR_FIELD, (&hdr->segments_left) - skb->nh.raw);
+		icmpv6_param_prob(skb, ICMPV6_HDR_FIELD,
+				  ((&hdr->segments_left) -
+				   skb_network_header(skb)));
 		return -1;
 	}
 
@@ -547,7 +555,7 @@ looped_back:
 	dst_release(xchg(&skb->dst, NULL));
 	ip6_route_input(skb);
 	if (skb->dst->error) {
-		skb_push(skb, skb->data - skb->nh.raw);
+		skb_push(skb, skb->data - skb_network_header(skb));
 		dst_input(skb);
 		return -1;
 	}
@@ -565,7 +573,7 @@ looped_back:
 		goto looped_back;
 	}
 
-	skb_push(skb, skb->data - skb->nh.raw);
+	skb_push(skb, skb->data - skb_network_header(skb));
 	dst_input(skb);
 	return -1;
 }
@@ -656,13 +664,14 @@ EXPORT_SYMBOL_GPL(ipv6_invert_rthdr);
 static int ipv6_hop_ra(struct sk_buff **skbp, int optoff)
 {
 	struct sk_buff *skb = *skbp;
+	const unsigned char *nh = skb_network_header(skb);
 
-	if (skb->nh.raw[optoff+1] == 2) {
+	if (nh[optoff + 1] == 2) {
 		IP6CB(skb)->ra = optoff;
 		return 1;
 	}
 	LIMIT_NETDEBUG(KERN_DEBUG "ipv6_hop_ra: wrong RA length %d\n",
-		       skb->nh.raw[optoff+1]);
+		       nh[optoff + 1]);
 	kfree_skb(skb);
 	return 0;
 }
@@ -672,17 +681,18 @@ static int ipv6_hop_ra(struct sk_buff **skbp, int optoff)
 static int ipv6_hop_jumbo(struct sk_buff **skbp, int optoff)
 {
 	struct sk_buff *skb = *skbp;
+	const unsigned char *nh = skb_network_header(skb);
 	u32 pkt_len;
 
-	if (skb->nh.raw[optoff+1] != 4 || (optoff&3) != 2) {
+	if (nh[optoff + 1] != 4 || (optoff & 3) != 2) {
 		LIMIT_NETDEBUG(KERN_DEBUG "ipv6_hop_jumbo: wrong jumbo opt length/alignment %d\n",
-			       skb->nh.raw[optoff+1]);
+			       nh[optoff+1]);
 		IP6_INC_STATS_BH(ip6_dst_idev(skb->dst),
 				 IPSTATS_MIB_INHDRERRORS);
 		goto drop;
 	}
 
-	pkt_len = ntohl(*(__be32*)(skb->nh.raw+optoff+2));
+	pkt_len = ntohl(*(__be32 *)(nh + optoff + 2));
 	if (pkt_len <= IPV6_MAXPLEN) {
 		IP6_INC_STATS_BH(ip6_dst_idev(skb->dst), IPSTATS_MIB_INHDRERRORS);
 		icmpv6_param_prob(skb, ICMPV6_HDR_FIELD, optoff+2);
@@ -727,7 +737,7 @@ int ipv6_parse_hopopts(struct sk_buff **skbp)
 	struct inet6_skb_parm *opt = IP6CB(skb);
 
 	/*
-	 * skb->nh.raw is equal to skb->data, and
+	 * skb_network_header(skb) is equal to skb->data, and
 	 * skb->h.raw - skb->nh.raw is always equal to
 	 * sizeof(struct ipv6hdr) by definition of
 	 * hop-by-hop options.
diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c
index aa4a0a59ffac..e5293b34229f 100644
--- a/net/ipv6/icmp.c
+++ b/net/ipv6/icmp.c
@@ -284,7 +284,8 @@ static void mip6_addr_swap(struct sk_buff *skb)
 	if (opt->dsthao) {
 		off = ipv6_find_tlv(skb, opt->dsthao, IPV6_TLV_HAO);
 		if (likely(off >= 0)) {
-			hao = (struct ipv6_destopt_hao *)(skb->nh.raw + off);
+			hao = (struct ipv6_destopt_hao *)
+					(skb_network_header(skb) + off);
 			ipv6_addr_copy(&tmp, &iph->saddr);
 			ipv6_addr_copy(&iph->saddr, &hao->addr);
 			ipv6_addr_copy(&hao->addr, &tmp);
diff --git a/net/ipv6/ip6_input.c b/net/ipv6/ip6_input.c
index 61e7a6c8141d..aecc74da0721 100644
--- a/net/ipv6/ip6_input.c
+++ b/net/ipv6/ip6_input.c
@@ -163,7 +163,7 @@ resubmit:
 	if (!pskb_pull(skb, skb->h.raw - skb->data))
 		goto discard;
 	nhoff = IP6CB(skb)->nhoff;
-	nexthdr = skb->nh.raw[nhoff];
+	nexthdr = skb_network_header(skb)[nhoff];
 
 	raw_sk = sk_head(&raw_v6_htable[nexthdr & (MAX_INET_PROTOS - 1)]);
 	if (raw_sk && !ipv6_raw_deliver(skb, nexthdr))
@@ -181,7 +181,7 @@ resubmit:
 			   indefinitely. */
 			nf_reset(skb);
 
-			skb_postpull_rcsum(skb, skb->nh.raw,
+			skb_postpull_rcsum(skb, skb_network_header(skb),
 					   skb->h.raw - skb->nh.raw);
 			hdr = skb->nh.ipv6h;
 			if (ipv6_addr_is_multicast(&hdr->daddr) &&
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index 47d00210cba1..f1dfcc319717 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -323,10 +323,11 @@ static int ip6_forward_proxy_check(struct sk_buff *skb)
 	if (nexthdr == IPPROTO_ICMPV6) {
 		struct icmp6hdr *icmp6;
 
-		if (!pskb_may_pull(skb, skb->nh.raw + offset + 1 - skb->data))
+		if (!pskb_may_pull(skb, (skb_network_header(skb) +
+					 offset + 1 - skb->data)))
 			return 0;
 
-		icmp6 = (struct icmp6hdr *)(skb->nh.raw + offset);
+		icmp6 = (struct icmp6hdr *)(skb_network_header(skb) + offset);
 
 		switch (icmp6->icmp6_type) {
 		case NDISC_ROUTER_SOLICITATION:
@@ -392,7 +393,7 @@ int ip6_forward(struct sk_buff *skb)
 	 *	that different fragments will go along one path. --ANK
 	 */
 	if (opt->ra) {
-		u8 *ptr = skb->nh.raw + opt->ra;
+		u8 *ptr = skb_network_header(skb) + opt->ra;
 		if (ip6_call_ra_chain(skb, (ptr[2]<<8) + ptr[3]))
 			return 0;
 	}
@@ -527,7 +528,7 @@ int ip6_find_1stfragopt(struct sk_buff *skb, u8 **nexthdr)
 {
 	u16 offset = sizeof(struct ipv6hdr);
 	struct ipv6_opt_hdr *exthdr = (struct ipv6_opt_hdr*)(skb->nh.ipv6h + 1);
-	unsigned int packet_len = skb->tail - skb->nh.raw;
+	unsigned int packet_len = skb->tail - skb_network_header(skb);
 	int found_rhdr = 0;
 	*nexthdr = &skb->nh.ipv6h->nexthdr;
 
@@ -554,7 +555,8 @@ int ip6_find_1stfragopt(struct sk_buff *skb, u8 **nexthdr)
 
 		offset += ipv6_optlen(exthdr);
 		*nexthdr = &exthdr->nexthdr;
-		exthdr = (struct ipv6_opt_hdr*)(skb->nh.raw + offset);
+		exthdr = (struct ipv6_opt_hdr *)(skb_network_header(skb) +
+						 offset);
 	}
 
 	return offset;
@@ -620,7 +622,7 @@ static int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *))
 		/* BUILD HEADER */
 
 		*prevhdr = NEXTHDR_FRAGMENT;
-		tmp_hdr = kmemdup(skb->nh.raw, hlen, GFP_ATOMIC);
+		tmp_hdr = kmemdup(skb_network_header(skb), hlen, GFP_ATOMIC);
 		if (!tmp_hdr) {
 			IP6_INC_STATS(ip6_dst_idev(skb->dst), IPSTATS_MIB_FRAGFAILS);
 			return -ENOMEM;
@@ -630,7 +632,7 @@ static int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *))
 		fh = (struct frag_hdr*)__skb_push(skb, sizeof(struct frag_hdr));
 		__skb_push(skb, hlen);
 		skb_reset_network_header(skb);
-		memcpy(skb->nh.raw, tmp_hdr, hlen);
+		memcpy(skb_network_header(skb), tmp_hdr, hlen);
 
 		ipv6_select_ident(skb, fh);
 		fh->nexthdr = nexthdr;
@@ -654,7 +656,8 @@ static int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *))
 				fh = (struct frag_hdr*)__skb_push(frag, sizeof(struct frag_hdr));
 				__skb_push(frag, hlen);
 				skb_reset_network_header(frag);
-				memcpy(frag->nh.raw, tmp_hdr, hlen);
+				memcpy(skb_network_header(frag), tmp_hdr,
+				       hlen);
 				offset += skb->len - hlen - sizeof(struct frag_hdr);
 				fh->nexthdr = nexthdr;
 				fh->reserved = 0;
@@ -753,7 +756,7 @@ slow_path:
 		/*
 		 *	Copy the packet header into the new buffer.
 		 */
-		memcpy(frag->nh.raw, skb->data, hlen);
+		memcpy(skb_network_header(frag), skb->data, hlen);
 
 		/*
 		 *	Build fragment header.
@@ -1329,7 +1332,7 @@ int ip6_push_pending_frames(struct sock *sk)
 	tail_skb = &(skb_shinfo(skb)->frag_list);
 
 	/* move skb->data to ip header from ext header */
-	if (skb->data < skb->nh.raw)
+	if (skb->data < skb_network_header(skb))
 		__skb_pull(skb, skb_network_offset(skb));
 	while ((tmp_skb = __skb_dequeue(&sk->sk_write_queue)) != NULL) {
 		__skb_pull(tmp_skb, skb->h.raw - skb->nh.raw);
diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c
index a1e4f39c6793..aafbdfa8d785 100644
--- a/net/ipv6/ip6_tunnel.c
+++ b/net/ipv6/ip6_tunnel.c
@@ -995,9 +995,10 @@ ip6ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev)
 	    !ip6_tnl_xmit_ctl(t) || ip6_tnl_addr_conflict(t, ipv6h))
 		return -1;
 
-	if ((offset = parse_tlv_tnl_enc_lim(skb, skb->nh.raw)) > 0) {
+	offset = parse_tlv_tnl_enc_lim(skb, skb_network_header(skb));
+	if (offset > 0) {
 		struct ipv6_tlv_tnl_enc_lim *tel;
-		tel = (struct ipv6_tlv_tnl_enc_lim *) &skb->nh.raw[offset];
+		tel = (struct ipv6_tlv_tnl_enc_lim *)&skb_network_header(skb)[offset];
 		if (tel->encap_limit == 0) {
 			icmpv6_send(skb, ICMPV6_PARAMPROB,
 				    ICMPV6_HDR_FIELD, offset + 2, skb->dev);
diff --git a/net/ipv6/ipcomp6.c b/net/ipv6/ipcomp6.c
index 5724ba9f75de..3e71d1691b7d 100644
--- a/net/ipv6/ipcomp6.c
+++ b/net/ipv6/ipcomp6.c
@@ -166,10 +166,10 @@ static int ipcomp6_output(struct xfrm_state *x, struct sk_buff *skb)
 	top_iph->payload_len = htons(skb->len - sizeof(struct ipv6hdr));
 
 	ipch = (struct ipv6_comp_hdr *)start;
-	ipch->nexthdr = *skb->nh.raw;
+	ipch->nexthdr = *skb_network_header(skb);
 	ipch->flags = 0;
 	ipch->cpi = htons((u16 )ntohl(x->id.spi));
-	*skb->nh.raw = IPPROTO_COMP;
+	*skb_network_header(skb) = IPPROTO_COMP;
 
 out_ok:
 	return 0;
diff --git a/net/ipv6/mip6.c b/net/ipv6/mip6.c
index 0afcabdd8ed6..bb4033553f3b 100644
--- a/net/ipv6/mip6.c
+++ b/net/ipv6/mip6.c
@@ -99,14 +99,16 @@ int mip6_mh_filter(struct sock *sk, struct sk_buff *skb)
 	if (mh->ip6mh_hdrlen < mip6_mh_len(mh->ip6mh_type)) {
 		LIMIT_NETDEBUG(KERN_DEBUG "mip6: MH message too short: %d vs >=%d\n",
 			       mh->ip6mh_hdrlen, mip6_mh_len(mh->ip6mh_type));
-		mip6_param_prob(skb, 0, (&mh->ip6mh_hdrlen) - skb->nh.raw);
+		mip6_param_prob(skb, 0, ((&mh->ip6mh_hdrlen) -
+					 skb_network_header(skb)));
 		return -1;
 	}
 
 	if (mh->ip6mh_proto != IPPROTO_NONE) {
 		LIMIT_NETDEBUG(KERN_DEBUG "mip6: MH invalid payload proto = %d\n",
 			       mh->ip6mh_proto);
-		mip6_param_prob(skb, 0, (&mh->ip6mh_proto) - skb->nh.raw);
+		mip6_param_prob(skb, 0, ((&mh->ip6mh_proto) -
+					 skb_network_header(skb)));
 		return -1;
 	}
 
@@ -152,8 +154,8 @@ static int mip6_destopt_output(struct xfrm_state *x, struct sk_buff *skb)
 	iph = (struct ipv6hdr *)skb->data;
 	iph->payload_len = htons(skb->len - sizeof(*iph));
 
-	nexthdr = *skb->nh.raw;
-	*skb->nh.raw = IPPROTO_DSTOPTS;
+	nexthdr = *skb_network_header(skb);
+	*skb_network_header(skb) = IPPROTO_DSTOPTS;
 
 	dstopt = (struct ipv6_destopt_hdr *)skb->h.raw;
 	dstopt->nexthdr = nexthdr;
@@ -215,7 +217,8 @@ static int mip6_destopt_reject(struct xfrm_state *x, struct sk_buff *skb, struct
 	if (likely(opt->dsthao)) {
 		offset = ipv6_find_tlv(skb, opt->dsthao, IPV6_TLV_HAO);
 		if (likely(offset >= 0))
-			hao = (struct ipv6_destopt_hao *)(skb->nh.raw + offset);
+			hao = (struct ipv6_destopt_hao *)
+					(skb_network_header(skb) + offset);
 	}
 
 	skb_get_timestamp(skb, &stamp);
@@ -254,7 +257,8 @@ static int mip6_destopt_offset(struct xfrm_state *x, struct sk_buff *skb,
 {
 	u16 offset = sizeof(struct ipv6hdr);
 	struct ipv6_opt_hdr *exthdr = (struct ipv6_opt_hdr*)(skb->nh.ipv6h + 1);
-	unsigned int packet_len = skb->tail - skb->nh.raw;
+	const unsigned char *nh = skb_network_header(skb);
+	unsigned int packet_len = skb->tail - nh;
 	int found_rhdr = 0;
 
 	*nexthdr = &skb->nh.ipv6h->nexthdr;
@@ -288,7 +292,7 @@ static int mip6_destopt_offset(struct xfrm_state *x, struct sk_buff *skb,
 
 		offset += ipv6_optlen(exthdr);
 		*nexthdr = &exthdr->nexthdr;
-		exthdr = (struct ipv6_opt_hdr*)(skb->nh.raw + offset);
+		exthdr = (struct ipv6_opt_hdr *)(nh + offset);
 	}
 
 	return offset;
@@ -361,8 +365,8 @@ static int mip6_rthdr_output(struct xfrm_state *x, struct sk_buff *skb)
 	iph = (struct ipv6hdr *)skb->data;
 	iph->payload_len = htons(skb->len - sizeof(*iph));
 
-	nexthdr = *skb->nh.raw;
-	*skb->nh.raw = IPPROTO_ROUTING;
+	nexthdr = *skb_network_header(skb);
+	*skb_network_header(skb) = IPPROTO_ROUTING;
 
 	rt2 = (struct rt2_hdr *)skb->h.raw;
 	rt2->rt_hdr.nexthdr = nexthdr;
@@ -384,7 +388,8 @@ static int mip6_rthdr_offset(struct xfrm_state *x, struct sk_buff *skb,
 {
 	u16 offset = sizeof(struct ipv6hdr);
 	struct ipv6_opt_hdr *exthdr = (struct ipv6_opt_hdr*)(skb->nh.ipv6h + 1);
-	unsigned int packet_len = skb->tail - skb->nh.raw;
+	const unsigned char *nh = skb_network_header(skb);
+	unsigned int packet_len = skb->tail - nh;
 	int found_rhdr = 0;
 
 	*nexthdr = &skb->nh.ipv6h->nexthdr;
@@ -397,7 +402,7 @@ static int mip6_rthdr_offset(struct xfrm_state *x, struct sk_buff *skb,
 		case NEXTHDR_ROUTING:
 			if (offset + 3 <= packet_len) {
 				struct ipv6_rt_hdr *rt;
-				rt = (struct ipv6_rt_hdr *)(skb->nh.raw + offset);
+				rt = (struct ipv6_rt_hdr *)(nh + offset);
 				if (rt->type != 0)
 					return offset;
 			}
@@ -417,7 +422,7 @@ static int mip6_rthdr_offset(struct xfrm_state *x, struct sk_buff *skb,
 
 		offset += ipv6_optlen(exthdr);
 		*nexthdr = &exthdr->nexthdr;
-		exthdr = (struct ipv6_opt_hdr*)(skb->nh.raw + offset);
+		exthdr = (struct ipv6_opt_hdr *)(nh + offset);
 	}
 
 	return offset;
diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c
index c311b9a12ca6..bc1d09584008 100644
--- a/net/ipv6/netfilter/nf_conntrack_reasm.c
+++ b/net/ipv6/netfilter/nf_conntrack_reasm.c
@@ -408,11 +408,12 @@ static int nf_ct_frag6_queue(struct nf_ct_frag6_queue *fq, struct sk_buff *skb,
 		return -1;
 	}
 
-	if (skb->ip_summed == CHECKSUM_COMPLETE)
+	if (skb->ip_summed == CHECKSUM_COMPLETE) {
+		const unsigned char *nh = skb_network_header(skb);
 		skb->csum = csum_sub(skb->csum,
-				     csum_partial(skb->nh.raw,
-						  (u8*)(fhdr + 1) - skb->nh.raw,
+				     csum_partial(nh, (u8 *)(fhdr + 1) - nh,
 						  0));
+	}
 
 	/* Is this the final fragment? */
 	if (!(fhdr->frag_off & htons(IP6_MF))) {
@@ -583,7 +584,9 @@ nf_ct_frag6_reasm(struct nf_ct_frag6_queue *fq, struct net_device *dev)
 	BUG_TRAP(NFCT_FRAG6_CB(head)->offset == 0);
 
 	/* Unfragmented part is taken from the first segment. */
-	payload_len = (head->data - head->nh.raw) - sizeof(struct ipv6hdr) + fq->len - sizeof(struct frag_hdr);
+	payload_len = ((head->data - skb_network_header(head)) -
+		       sizeof(struct ipv6hdr) + fq->len -
+		       sizeof(struct frag_hdr));
 	if (payload_len > IPV6_MAXPLEN) {
 		DEBUGP("payload len is too large.\n");
 		goto out_oversize;
@@ -624,7 +627,7 @@ nf_ct_frag6_reasm(struct nf_ct_frag6_queue *fq, struct net_device *dev)
 
 	/* We have to remove fragment header from datagram and to relocate
 	 * header in order to calculate ICV correctly. */
-	head->nh.raw[fq->nhoffset] = head->h.raw[0];
+	skb_network_header(head)[fq->nhoffset] = head->h.raw[0];
 	memmove(head->head + sizeof(struct frag_hdr), head->head,
 		(head->data - head->head) - sizeof(struct frag_hdr));
 	head->mac.raw += sizeof(struct frag_hdr);
@@ -632,7 +635,7 @@ nf_ct_frag6_reasm(struct nf_ct_frag6_queue *fq, struct net_device *dev)
 
 	skb_shinfo(head)->frag_list = head->next;
 	head->h.raw = head->data;
-	skb_push(head, head->data - head->nh.raw);
+	skb_push(head, head->data - skb_network_header(head));
 	atomic_sub(head->truesize, &nf_ct_frag6_mem);
 
 	for (fp=head->next; fp; fp = fp->next) {
@@ -653,7 +656,9 @@ nf_ct_frag6_reasm(struct nf_ct_frag6_queue *fq, struct net_device *dev)
 
 	/* Yes, and fold redundant checksum back. 8) */
 	if (head->ip_summed == CHECKSUM_COMPLETE)
-		head->csum = csum_partial(head->nh.raw, head->h.raw-head->nh.raw, head->csum);
+		head->csum = csum_partial(skb_network_header(head),
+					  head->h.raw - head->nh.raw,
+					  head->csum);
 
 	fq->fragments = NULL;
 
diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c
index 5f26645195dc..9b2bcde73f19 100644
--- a/net/ipv6/raw.c
+++ b/net/ipv6/raw.c
@@ -361,7 +361,7 @@ int rawv6_rcv(struct sock *sk, struct sk_buff *skb)
 		skb->ip_summed = CHECKSUM_UNNECESSARY;
 
 	if (skb->ip_summed == CHECKSUM_COMPLETE) {
-		skb_postpull_rcsum(skb, skb->nh.raw,
+		skb_postpull_rcsum(skb, skb_network_header(skb),
 				   skb->h.raw - skb->nh.raw);
 		if (!csum_ipv6_magic(&skb->nh.ipv6h->saddr,
 				     &skb->nh.ipv6h->daddr,
@@ -488,7 +488,8 @@ static int rawv6_push_pending_frames(struct sock *sk, struct flowi *fl,
 		goto out;
 
 	offset = rp->offset;
-	total_len = inet_sk(sk)->cork.length - (skb->nh.raw - skb->data);
+	total_len = inet_sk(sk)->cork.length - (skb_network_header(skb) -
+						skb->data);
 	if (offset >= total_len - 1) {
 		err = -EINVAL;
 		ip6_flush_pending_frames(sk);
diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c
index 1dde449379fb..f85e49acb91a 100644
--- a/net/ipv6/reassembly.c
+++ b/net/ipv6/reassembly.c
@@ -436,13 +436,18 @@ static void ip6_frag_queue(struct frag_queue *fq, struct sk_buff *skb,
 	if ((unsigned int)end > IPV6_MAXPLEN) {
 		IP6_INC_STATS_BH(ip6_dst_idev(skb->dst),
 				 IPSTATS_MIB_INHDRERRORS);
-		icmpv6_param_prob(skb,ICMPV6_HDR_FIELD, (u8*)&fhdr->frag_off - skb->nh.raw);
+		icmpv6_param_prob(skb, ICMPV6_HDR_FIELD,
+				  ((u8 *)&fhdr->frag_off -
+				   skb_network_header(skb)));
 		return;
 	}
 
-	if (skb->ip_summed == CHECKSUM_COMPLETE)
+	if (skb->ip_summed == CHECKSUM_COMPLETE) {
+		const unsigned char *nh = skb_network_header(skb);
 		skb->csum = csum_sub(skb->csum,
-				     csum_partial(skb->nh.raw, (u8*)(fhdr+1)-skb->nh.raw, 0));
+				     csum_partial(nh, (u8 *)(fhdr + 1) - nh,
+						  0));
+	}
 
 	/* Is this the final fragment? */
 	if (!(fhdr->frag_off & htons(IP6_MF))) {
@@ -605,7 +610,9 @@ static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff **skb_in,
 	BUG_TRAP(FRAG6_CB(head)->offset == 0);
 
 	/* Unfragmented part is taken from the first segment. */
-	payload_len = (head->data - head->nh.raw) - sizeof(struct ipv6hdr) + fq->len - sizeof(struct frag_hdr);
+	payload_len = ((head->data - skb_network_header(head)) -
+		       sizeof(struct ipv6hdr) + fq->len -
+		       sizeof(struct frag_hdr));
 	if (payload_len > IPV6_MAXPLEN)
 		goto out_oversize;
 
@@ -639,7 +646,7 @@ static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff **skb_in,
 	/* We have to remove fragment header from datagram and to relocate
 	 * header in order to calculate ICV correctly. */
 	nhoff = fq->nhoffset;
-	head->nh.raw[nhoff] = head->h.raw[0];
+	skb_network_header(head)[nhoff] = head->h.raw[0];
 	memmove(head->head + sizeof(struct frag_hdr), head->head,
 		(head->data - head->head) - sizeof(struct frag_hdr));
 	head->mac.raw += sizeof(struct frag_hdr);
@@ -647,7 +654,7 @@ static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff **skb_in,
 
 	skb_shinfo(head)->frag_list = head->next;
 	head->h.raw = head->data;
-	skb_push(head, head->data - head->nh.raw);
+	skb_push(head, head->data - skb_network_header(head));
 	atomic_sub(head->truesize, &ip6_frag_mem);
 
 	for (fp=head->next; fp; fp = fp->next) {
@@ -671,7 +678,9 @@ static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff **skb_in,
 
 	/* Yes, and fold redundant checksum back. 8) */
 	if (head->ip_summed == CHECKSUM_COMPLETE)
-		head->csum = csum_partial(head->nh.raw, head->h.raw-head->nh.raw, head->csum);
+		head->csum = csum_partial(skb_network_header(head),
+					  head->h.raw - head->nh.raw,
+					  head->csum);
 
 	rcu_read_lock();
 	IP6_INC_STATS_BH(__in6_dev_get(dev), IPSTATS_MIB_REASMOKS);
@@ -725,7 +734,7 @@ static int ipv6_frag_rcv(struct sk_buff **skbp)
 		skb->h.raw += sizeof(struct frag_hdr);
 		IP6_INC_STATS_BH(ip6_dst_idev(skb->dst), IPSTATS_MIB_REASMOKS);
 
-		IP6CB(skb)->nhoff = (u8*)fhdr - skb->nh.raw;
+		IP6CB(skb)->nhoff = (u8 *)fhdr - skb_network_header(skb);
 		return 1;
 	}
 
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 92f99927d12d..80a52ab1e384 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -486,7 +486,9 @@ static int tcp_v6_send_synack(struct sock *sk, struct request_sock *req,
 			struct sk_buff *pktopts = treq->pktopts;
 			struct inet6_skb_parm *rxopt = IP6CB(pktopts);
 			if (rxopt->srcrt)
-				opt = ipv6_invert_rthdr(sk, (struct ipv6_rt_hdr*)(pktopts->nh.raw + rxopt->srcrt));
+				opt = ipv6_invert_rthdr(sk,
+			  (struct ipv6_rt_hdr *)(skb_network_header(pktopts) +
+						 rxopt->srcrt));
 		}
 
 		if (opt && opt->srcrt) {
@@ -1389,7 +1391,9 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
 	    opt == NULL && treq->pktopts) {
 		struct inet6_skb_parm *rxopt = IP6CB(treq->pktopts);
 		if (rxopt->srcrt)
-			opt = ipv6_invert_rthdr(sk, (struct ipv6_rt_hdr *)(treq->pktopts->nh.raw + rxopt->srcrt));
+			opt = ipv6_invert_rthdr(sk,
+		   (struct ipv6_rt_hdr *)(skb_network_header(treq->pktopts) +
+					  rxopt->srcrt));
 	}
 
 	if (dst == NULL) {
diff --git a/net/ipv6/xfrm6_input.c b/net/ipv6/xfrm6_input.c
index 33a1b9200431..5c929f886129 100644
--- a/net/ipv6/xfrm6_input.c
+++ b/net/ipv6/xfrm6_input.c
@@ -28,7 +28,7 @@ int xfrm6_rcv_spi(struct sk_buff *skb, __be32 spi)
 	unsigned int nhoff;
 
 	nhoff = IP6CB(skb)->nhoff;
-	nexthdr = skb->nh.raw[nhoff];
+	nexthdr = skb_network_header(skb)[nhoff];
 
 	seq = 0;
 	if (!spi && (err = xfrm_parse_spi(skb, nexthdr, &spi, &seq)) != 0)
@@ -58,7 +58,7 @@ int xfrm6_rcv_spi(struct sk_buff *skb, __be32 spi)
 		if (nexthdr <= 0)
 			goto drop_unlock;
 
-		skb->nh.raw[nhoff] = nexthdr;
+		skb_network_header(skb)[nhoff] = nexthdr;
 
 		if (x->props.replay_window)
 			xfrm_replay_advance(x, seq);
@@ -113,7 +113,7 @@ int xfrm6_rcv_spi(struct sk_buff *skb, __be32 spi)
 	} else {
 #ifdef CONFIG_NETFILTER
 		skb->nh.ipv6h->payload_len = htons(skb->len);
-		__skb_push(skb, skb->data - skb->nh.raw);
+		__skb_push(skb, skb->data - skb_network_header(skb));
 
 		NF_HOOK(PF_INET6, NF_IP6_PRE_ROUTING, skb, skb->dev, NULL,
 			ip6_rcv_finish);
diff --git a/net/ipv6/xfrm6_mode_beet.c b/net/ipv6/xfrm6_mode_beet.c
index c015bfde2b1c..247e2d5d2acf 100644
--- a/net/ipv6/xfrm6_mode_beet.c
+++ b/net/ipv6/xfrm6_mode_beet.c
@@ -67,7 +67,7 @@ static int xfrm6_beet_input(struct xfrm_state *x, struct sk_buff *skb)
 		goto out;
 
 	skb_push(skb, size);
-	memmove(skb->data, skb->nh.raw, size);
+	memmove(skb->data, skb_network_header(skb), size);
 	skb_reset_network_header(skb);
 
 	old_mac = skb_mac_header(skb);
diff --git a/net/ipv6/xfrm6_mode_transport.c b/net/ipv6/xfrm6_mode_transport.c
index 3a4b39b12bad..ace0bbf4f25d 100644
--- a/net/ipv6/xfrm6_mode_transport.c
+++ b/net/ipv6/xfrm6_mode_transport.c
@@ -53,8 +53,10 @@ static int xfrm6_transport_input(struct xfrm_state *x, struct sk_buff *skb)
 {
 	int ihl = skb->data - skb->h.raw;
 
-	if (skb->h.raw != skb->nh.raw)
-		skb->nh.raw = memmove(skb->h.raw, skb->nh.raw, ihl);
+	if (skb->h.raw != skb->nh.raw) {
+		memmove(skb->h.raw, skb_network_header(skb), ihl);
+		skb->nh.raw = skb->h.raw;
+	}
 	skb->nh.ipv6h->payload_len = htons(skb->len + ihl -
 					   sizeof(struct ipv6hdr));
 	skb->h.raw = skb->data;
diff --git a/net/ipv6/xfrm6_mode_tunnel.c b/net/ipv6/xfrm6_mode_tunnel.c
index 8ce5ef2d0b1c..498f17b5c42f 100644
--- a/net/ipv6/xfrm6_mode_tunnel.c
+++ b/net/ipv6/xfrm6_mode_tunnel.c
@@ -87,9 +87,10 @@ static int xfrm6_tunnel_input(struct xfrm_state *x, struct sk_buff *skb)
 {
 	int err = -EINVAL;
 	const unsigned char *old_mac;
+	const unsigned char *nh = skb_network_header(skb);
 
-	if (skb->nh.raw[IP6CB(skb)->nhoff] != IPPROTO_IPV6
-	    && skb->nh.raw[IP6CB(skb)->nhoff] != IPPROTO_IPIP)
+	if (nh[IP6CB(skb)->nhoff] != IPPROTO_IPV6 &&
+	    nh[IP6CB(skb)->nhoff] != IPPROTO_IPIP)
 		goto out;
 	if (!pskb_may_pull(skb, sizeof(struct ipv6hdr)))
 		goto out;
@@ -98,7 +99,8 @@ static int xfrm6_tunnel_input(struct xfrm_state *x, struct sk_buff *skb)
 	    (err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC)))
 		goto out;
 
-	if (skb->nh.raw[IP6CB(skb)->nhoff] == IPPROTO_IPV6) {
+	nh = skb_network_header(skb);
+	if (nh[IP6CB(skb)->nhoff] == IPPROTO_IPV6) {
 		if (x->props.flags & XFRM_STATE_DECAP_DSCP)
 			ipv6_copy_dscp(skb->nh.ipv6h, skb->h.ipv6h);
 		if (!(x->props.flags & XFRM_STATE_NOECN))
diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c
index d8a585bd2cb4..cb5a723d4cb4 100644
--- a/net/ipv6/xfrm6_policy.c
+++ b/net/ipv6/xfrm6_policy.c
@@ -273,14 +273,16 @@ _decode_session6(struct sk_buff *skb, struct flowi *fl)
 	u16 offset = skb->h.raw - skb->nh.raw;
 	struct ipv6hdr *hdr = skb->nh.ipv6h;
 	struct ipv6_opt_hdr *exthdr;
-	u8 nexthdr = skb->nh.raw[IP6CB(skb)->nhoff];
+	const unsigned char *nh = skb_network_header(skb);
+	u8 nexthdr = nh[IP6CB(skb)->nhoff];
 
 	memset(fl, 0, sizeof(struct flowi));
 	ipv6_addr_copy(&fl->fl6_dst, &hdr->daddr);
 	ipv6_addr_copy(&fl->fl6_src, &hdr->saddr);
 
-	while (pskb_may_pull(skb, skb->nh.raw + offset + 1 - skb->data)) {
-		exthdr = (struct ipv6_opt_hdr*)(skb->nh.raw + offset);
+	while (pskb_may_pull(skb, nh + offset + 1 - skb->data)) {
+		nh = skb_network_header(skb);
+		exthdr = (struct ipv6_opt_hdr *)(nh + offset);
 
 		switch (nexthdr) {
 		case NEXTHDR_ROUTING:
@@ -288,7 +290,7 @@ _decode_session6(struct sk_buff *skb, struct flowi *fl)
 		case NEXTHDR_DEST:
 			offset += ipv6_optlen(exthdr);
 			nexthdr = exthdr->nexthdr;
-			exthdr = (struct ipv6_opt_hdr*)(skb->nh.raw + offset);
+			exthdr = (struct ipv6_opt_hdr *)(nh + offset);
 			break;
 
 		case IPPROTO_UDP:
@@ -296,7 +298,7 @@ _decode_session6(struct sk_buff *skb, struct flowi *fl)
 		case IPPROTO_TCP:
 		case IPPROTO_SCTP:
 		case IPPROTO_DCCP:
-			if (pskb_may_pull(skb, skb->nh.raw + offset + 4 - skb->data)) {
+			if (pskb_may_pull(skb, nh + offset + 4 - skb->data)) {
 				__be16 *ports = (__be16 *)exthdr;
 
 				fl->fl_ip_sport = ports[0];
@@ -306,7 +308,7 @@ _decode_session6(struct sk_buff *skb, struct flowi *fl)
 			return;
 
 		case IPPROTO_ICMPV6:
-			if (pskb_may_pull(skb, skb->nh.raw + offset + 2 - skb->data)) {
+			if (pskb_may_pull(skb, nh + offset + 2 - skb->data)) {
 				u8 *icmp = (u8 *)exthdr;
 
 				fl->fl_icmp_type = icmp[0];
@@ -317,7 +319,7 @@ _decode_session6(struct sk_buff *skb, struct flowi *fl)
 
 #ifdef CONFIG_IPV6_MIP6
 		case IPPROTO_MH:
-			if (pskb_may_pull(skb, skb->nh.raw + offset + 3 - skb->data)) {
+			if (pskb_may_pull(skb, nh + offset + 3 - skb->data)) {
 				struct ip6_mh *mh;
 				mh = (struct ip6_mh *)exthdr;
 
diff --git a/net/netfilter/xt_TCPMSS.c b/net/netfilter/xt_TCPMSS.c
index db7e38c08de2..afc0c60e19d5 100644
--- a/net/netfilter/xt_TCPMSS.c
+++ b/net/netfilter/xt_TCPMSS.c
@@ -54,7 +54,7 @@ tcpmss_mangle_packet(struct sk_buff **pskb,
 		return -1;
 
 	tcplen = (*pskb)->len - tcphoff;
-	tcph = (struct tcphdr *)((*pskb)->nh.raw + tcphoff);
+	tcph = (struct tcphdr *)(skb_network_header(*pskb) + tcphoff);
 
 	/* Since it passed flags test in tcp match, we know it is is
 	   not a fragment, and has data >= tcp header length.  SYN
@@ -113,7 +113,7 @@ tcpmss_mangle_packet(struct sk_buff **pskb,
 			return -1;
 		kfree_skb(*pskb);
 		*pskb = newskb;
-		tcph = (struct tcphdr *)((*pskb)->nh.raw + tcphoff);
+		tcph = (struct tcphdr *)(skb_network_header(*pskb) + tcphoff);
 	}
 
 	skb_put((*pskb), TCPOLEN_MSS);
diff --git a/net/sched/act_pedit.c b/net/sched/act_pedit.c
index 3d6a2fcc9ce4..20813eee8af4 100644
--- a/net/sched/act_pedit.c
+++ b/net/sched/act_pedit.c
@@ -136,7 +136,7 @@ static int tcf_pedit(struct sk_buff *skb, struct tc_action *a,
 		}
 	}
 
-	pptr = skb->nh.raw;
+	pptr = skb_network_header(skb);
 
 	spin_lock(&p->tcf_lock);
 
diff --git a/net/sched/cls_u32.c b/net/sched/cls_u32.c
index 0bcb16928d25..695b34051b9f 100644
--- a/net/sched/cls_u32.c
+++ b/net/sched/cls_u32.c
@@ -119,7 +119,7 @@ static int u32_classify(struct sk_buff *skb, struct tcf_proto *tp, struct tcf_re
 	} stack[TC_U32_MAXDEPTH];
 
 	struct tc_u_hnode *ht = (struct tc_u_hnode*)tp->root;
-	u8 *ptr = skb->nh.raw;
+	u8 *ptr = skb_network_header(skb);
 	struct tc_u_knode *n;
 	int sdepth = 0;
 	int off2 = 0;
diff --git a/net/sched/em_u32.c b/net/sched/em_u32.c
index cd0600c67969..0a2a7fe08de3 100644
--- a/net/sched/em_u32.c
+++ b/net/sched/em_u32.c
@@ -22,7 +22,7 @@ static int em_u32_match(struct sk_buff *skb, struct tcf_ematch *em,
 			struct tcf_pkt_info *info)
 {
 	struct tc_u32_key *key = (struct tc_u32_key *) em->data;
-	unsigned char *ptr = skb->nh.raw;
+	const unsigned char *ptr = skb_network_header(skb);
 
 	if (info) {
 		if (info->ptr)
-- 
cgit v1.2.3


From c14d2450cb7fe1786e2ec325172baf66922bf597 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Sun, 11 Mar 2007 22:39:41 -0300
Subject: [SK_BUFF]: Introduce skb_set_network_header

For the cases where the network header is being set to a offset from skb->data.

Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/skbuff.h | 5 +++++
 net/ax25/ax25_out.c    | 6 ++++--
 net/ipv4/ip_output.c   | 4 ++--
 net/ipv4/tcp_input.c   | 3 ++-
 net/ipv6/ip6_output.c  | 4 ++--
 5 files changed, 15 insertions(+), 7 deletions(-)

(limited to 'net/ipv4/tcp_input.c')

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 76d30f34b986..870438fba93f 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -970,6 +970,11 @@ static inline void skb_reset_network_header(struct sk_buff *skb)
 	skb->nh.raw = skb->data;
 }
 
+static inline void skb_set_network_header(struct sk_buff *skb, const int offset)
+{
+	skb->nh.raw = skb->data + offset;
+}
+
 static inline int skb_network_offset(const struct sk_buff *skb)
 {
 	return skb->nh.raw - skb->data;
diff --git a/net/ax25/ax25_out.c b/net/ax25/ax25_out.c
index 02dea851a11a..e66953ce53e7 100644
--- a/net/ax25/ax25_out.c
+++ b/net/ax25/ax25_out.c
@@ -148,7 +148,8 @@ void ax25_output(ax25_cb *ax25, int paclen, struct sk_buff *skb)
 
 			if (ka9qfrag == 1) {
 				skb_reserve(skbn, frontlen + 2);
-				skbn->nh.raw = skbn->data + skb_network_offset(skb);
+				skb_set_network_header(skbn,
+						      skb_network_offset(skb));
 				memcpy(skb_put(skbn, len), skb->data, len);
 				p = skb_push(skbn, 2);
 
@@ -161,7 +162,8 @@ void ax25_output(ax25_cb *ax25, int paclen, struct sk_buff *skb)
 				}
 			} else {
 				skb_reserve(skbn, frontlen + 1);
-				skbn->nh.raw = skbn->data + skb_network_offset(skb);
+				skb_set_network_header(skbn,
+						      skb_network_offset(skb));
 				memcpy(skb_put(skbn, len), skb->data, len);
 				p = skb_push(skbn, 1);
 				*p = AX25_P_TEXT;
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index eae228469627..15de9d43950e 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -928,9 +928,9 @@ alloc_new_skb:
 			 *	Find where to start putting bytes.
 			 */
 			data = skb_put(skb, fraglen);
-			skb->nh.raw = data + exthdrlen;
+			skb_set_network_header(skb, exthdrlen);
+			skb->h.raw = skb->nh.raw + fragheaderlen;
 			data += fragheaderlen;
-			skb->h.raw = data + exthdrlen;
 
 			if (fraggap) {
 				skb->csum = skb_copy_and_csum_bits(
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 00190835cea1..5da823a32250 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -3634,7 +3634,8 @@ tcp_collapse(struct sock *sk, struct sk_buff_head *list,
 			return;
 
 		skb_set_mac_header(nskb, skb_mac_header(skb) - skb->head);
-		nskb->nh.raw = nskb->data + (skb_network_header(skb) - skb->head);
+		skb_set_network_header(nskb,
+				       skb_network_header(skb) - skb->head);
 		nskb->h.raw = nskb->data + (skb->h.raw - skb->head);
 
 		skb_reserve(nskb, header);
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index f1dfcc319717..bd25825c0ccd 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -1206,9 +1206,9 @@ alloc_new_skb:
 			 *	Find where to start putting bytes
 			 */
 			data = skb_put(skb, fraglen);
-			skb->nh.raw = data + exthdrlen;
+			skb_set_network_header(skb, exthdrlen);
 			data += fragheaderlen;
-			skb->h.raw = data + exthdrlen;
+			skb->h.raw = skb->nh.raw + fragheaderlen;
 
 			if (fraggap) {
 				skb->csum = skb_copy_and_csum_bits(
-- 
cgit v1.2.3


From 967b05f64e27d04a4c8879addd0e1c52137e2c9e Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Tue, 13 Mar 2007 13:51:52 -0300
Subject: [SK_BUFF]: Introduce skb_set_transport_header

For the cases where the transport header is being set to a offset from
skb->data.

Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/skbuff.h                  |  6 ++++++
 net/ax25/af_ax25.c                      | 20 ++++++++------------
 net/ax25/ax25_in.c                      |  2 +-
 net/ipv4/esp4.c                         |  3 ++-
 net/ipv4/ip_output.c                    | 19 ++++++++-----------
 net/ipv4/tcp_input.c                    |  2 +-
 net/ipv6/ah6.c                          |  2 +-
 net/ipv6/esp6.c                         |  4 ++--
 net/ipv6/netfilter/nf_conntrack_reasm.c |  2 +-
 net/ipv6/xfrm6_mode_beet.c              |  2 +-
 net/ipv6/xfrm6_mode_ro.c                |  2 +-
 net/ipv6/xfrm6_mode_transport.c         |  2 +-
 12 files changed, 33 insertions(+), 33 deletions(-)

(limited to 'net/ipv4/tcp_input.c')

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 64c3c1687e49..684292efa823 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -962,6 +962,12 @@ static inline void skb_reset_transport_header(struct sk_buff *skb)
 	skb->h.raw = skb->data;
 }
 
+static inline void skb_set_transport_header(struct sk_buff *skb,
+					    const int offset)
+{
+	skb->h.raw = skb->data + offset;
+}
+
 static inline int skb_transport_offset(const struct sk_buff *skb)
 {
 	return skb->h.raw - skb->data;
diff --git a/net/ax25/af_ax25.c b/net/ax25/af_ax25.c
index 14db01a4ff63..75d4d695edec 100644
--- a/net/ax25/af_ax25.c
+++ b/net/ax25/af_ax25.c
@@ -1425,7 +1425,6 @@ static int ax25_sendmsg(struct kiocb *iocb, struct socket *sock,
 	struct sockaddr_ax25 sax;
 	struct sk_buff *skb;
 	ax25_digi dtmp, *dp;
-	unsigned char *asmptr;
 	ax25_cb *ax25;
 	size_t size;
 	int lv, err, addr_len = msg->msg_namelen;
@@ -1551,10 +1550,8 @@ static int ax25_sendmsg(struct kiocb *iocb, struct socket *sock,
 	skb_reset_network_header(skb);
 
 	/* Add the PID if one is not supplied by the user in the skb */
-	if (!ax25->pidincl) {
-		asmptr  = skb_push(skb, 1);
-		*asmptr = sk->sk_protocol;
-	}
+	if (!ax25->pidincl)
+		*skb_push(skb, 1) = sk->sk_protocol;
 
 	SOCK_DEBUG(sk, "AX.25: Transmitting buffer\n");
 
@@ -1573,7 +1570,7 @@ static int ax25_sendmsg(struct kiocb *iocb, struct socket *sock,
 		goto out;
 	}
 
-	asmptr = skb_push(skb, 1 + ax25_addr_size(dp));
+	skb_push(skb, 1 + ax25_addr_size(dp));
 
 	SOCK_DEBUG(sk, "Building AX.25 Header (dp=%p).\n", dp);
 
@@ -1581,17 +1578,16 @@ static int ax25_sendmsg(struct kiocb *iocb, struct socket *sock,
 		SOCK_DEBUG(sk, "Num digipeaters=%d\n", dp->ndigi);
 
 	/* Build an AX.25 header */
-	asmptr += (lv = ax25_addr_build(asmptr, &ax25->source_addr,
-					&sax.sax25_call, dp,
-					AX25_COMMAND, AX25_MODULUS));
+	lv = ax25_addr_build(skb->data, &ax25->source_addr, &sax.sax25_call,
+			     dp, AX25_COMMAND, AX25_MODULUS);
 
 	SOCK_DEBUG(sk, "Built header (%d bytes)\n",lv);
 
-	skb->h.raw = asmptr;
+	skb_set_transport_header(skb, lv);
 
-	SOCK_DEBUG(sk, "base=%p pos=%p\n", skb->data, asmptr);
+	SOCK_DEBUG(sk, "base=%p pos=%p\n", skb->data, skb->h.raw);
 
-	*asmptr = AX25_UI;
+	*skb->h.raw = AX25_UI;
 
 	/* Datagram frames go straight out of the door as UI */
 	ax25_queue_xmit(skb, ax25->ax25_dev->dev);
diff --git a/net/ax25/ax25_in.c b/net/ax25/ax25_in.c
index 724ad5ce75d4..31c59387a6fc 100644
--- a/net/ax25/ax25_in.c
+++ b/net/ax25/ax25_in.c
@@ -233,7 +233,7 @@ static int ax25_rcv(struct sk_buff *skb, struct net_device *dev,
 
 	/* UI frame - bypass LAPB processing */
 	if ((*skb->data & ~0x10) == AX25_UI && dp.lastrepeat + 1 == dp.ndigi) {
-		skb->h.raw = skb->data + 2;		/* skip control and pid */
+		skb_set_transport_header(skb, 2); /* skip control and pid */
 
 		ax25_send_to_raw(&dest, skb, skb->data[1]);
 
diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c
index 957674562801..82543eebfa52 100644
--- a/net/ipv4/esp4.c
+++ b/net/ipv4/esp4.c
@@ -261,7 +261,8 @@ static int esp_input(struct xfrm_state *x, struct sk_buff *skb)
 
 	iph->protocol = nexthdr[1];
 	pskb_trim(skb, skb->len - alen - padlen - 2);
-	skb->h.raw = __skb_pull(skb, sizeof(*esph) + esp->conf.ivlen) - ihl;
+	__skb_pull(skb, sizeof(*esph) + esp->conf.ivlen);
+	skb_set_transport_header(skb, -ihl);
 
 	return 0;
 
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index 63c05be0764d..6d92358fc513 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -593,7 +593,7 @@ slow_path:
 		skb_reserve(skb2, ll_rs);
 		skb_put(skb2, len + hlen);
 		skb_reset_network_header(skb2);
-		skb2->h.raw = skb2->data + hlen;
+		skb2->h.raw = skb2->nh.raw + hlen;
 
 		/*
 		 *	Charge the memory for the fragment to any owner
@@ -724,7 +724,7 @@ static inline int ip_ufo_append_data(struct sock *sk,
 		skb_reset_network_header(skb);
 
 		/* initialize protocol header pointer */
-		skb->h.raw = skb->data + fragheaderlen;
+		skb->h.raw = skb->nh.raw + fragheaderlen;
 
 		skb->ip_summed = CHECKSUM_PARTIAL;
 		skb->csum = 0;
@@ -1099,8 +1099,6 @@ ssize_t	ip_append_page(struct sock *sk, struct page *page,
 		}
 		if (len <= 0) {
 			struct sk_buff *skb_prev;
-			char *data;
-			struct iphdr *iph;
 			int alloclen;
 
 			skb_prev = skb;
@@ -1123,16 +1121,15 @@ ssize_t	ip_append_page(struct sock *sk, struct page *page,
 			/*
 			 *	Find where to start putting bytes.
 			 */
-			data = skb_put(skb, fragheaderlen + fraggap);
+			skb_put(skb, fragheaderlen + fraggap);
 			skb_reset_network_header(skb);
-			iph = ip_hdr(skb);
-			data += fragheaderlen;
-			skb->h.raw = data;
+			skb->h.raw = skb->nh.raw + fragheaderlen;
 
 			if (fraggap) {
-				skb->csum = skb_copy_and_csum_bits(
-					skb_prev, maxfraglen,
-					data, fraggap, 0);
+				skb->csum = skb_copy_and_csum_bits(skb_prev,
+								   maxfraglen,
+								   skb->h.raw,
+								   fraggap, 0);
 				skb_prev->csum = csum_sub(skb_prev->csum,
 							  skb->csum);
 				pskb_trim_unique(skb_prev, maxfraglen);
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 5da823a32250..2776a8b01339 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -3636,7 +3636,7 @@ tcp_collapse(struct sock *sk, struct sk_buff_head *list,
 		skb_set_mac_header(nskb, skb_mac_header(skb) - skb->head);
 		skb_set_network_header(nskb,
 				       skb_network_header(skb) - skb->head);
-		nskb->h.raw = nskb->data + (skb->h.raw - skb->head);
+		skb_set_transport_header(nskb, skb->h.raw - skb->head);
 
 		skb_reserve(nskb, header);
 		memcpy(nskb->head, skb->head, header);
diff --git a/net/ipv6/ah6.c b/net/ipv6/ah6.c
index b682d2368c2a..e5ee981d3e10 100644
--- a/net/ipv6/ah6.c
+++ b/net/ipv6/ah6.c
@@ -247,7 +247,7 @@ static int ah6_output(struct xfrm_state *x, struct sk_buff *skb)
 	memcpy(tmp_base, top_iph, sizeof(tmp_base));
 
 	tmp_ext = NULL;
-	extlen = skb->h.raw - (unsigned char *)(top_iph + 1);
+	extlen = skb_transport_offset(skb) + sizeof(struct ipv6hdr);
 	if (extlen) {
 		extlen += sizeof(*tmp_ext);
 		tmp_ext = kmalloc(extlen, GFP_ATOMIC);
diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c
index 35905867ded1..ad522b7b5771 100644
--- a/net/ipv6/esp6.c
+++ b/net/ipv6/esp6.c
@@ -228,8 +228,8 @@ static int esp6_input(struct xfrm_state *x, struct sk_buff *skb)
 		ret = nexthdr[1];
 	}
 
-	skb->h.raw = __skb_pull(skb, sizeof(*esph) + esp->conf.ivlen) - hdr_len;
-
+	__skb_pull(skb, sizeof(*esph) + esp->conf.ivlen);
+	skb_set_transport_header(skb, -hdr_len);
 out:
 	return ret;
 }
diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c
index 75138cf1fa61..015950522c8b 100644
--- a/net/ipv6/netfilter/nf_conntrack_reasm.c
+++ b/net/ipv6/netfilter/nf_conntrack_reasm.c
@@ -785,7 +785,7 @@ struct sk_buff *nf_ct_frag6_gather(struct sk_buff *skb)
 		goto ret_orig;
 	}
 
-	clone->h.raw = clone->data + fhoff;
+	skb_set_transport_header(clone, fhoff);
 	hdr = ipv6_hdr(clone);
 	fhdr = (struct frag_hdr *)clone->h.raw;
 
diff --git a/net/ipv6/xfrm6_mode_beet.c b/net/ipv6/xfrm6_mode_beet.c
index b5a48c255f07..abac09409ded 100644
--- a/net/ipv6/xfrm6_mode_beet.c
+++ b/net/ipv6/xfrm6_mode_beet.c
@@ -42,7 +42,7 @@ static int xfrm6_beet_output(struct xfrm_state *x, struct sk_buff *skb)
 
 	hdr_len = ip6_find_1stfragopt(skb, &prevhdr);
 	skb->nh.raw = prevhdr - x->props.header_len;
-	skb->h.raw = skb->data + hdr_len;
+	skb_set_transport_header(skb, hdr_len);
 	memmove(skb->data, iph, hdr_len);
 
 	skb_reset_network_header(skb);
diff --git a/net/ipv6/xfrm6_mode_ro.c b/net/ipv6/xfrm6_mode_ro.c
index d01958d921ac..da48ecf3fe96 100644
--- a/net/ipv6/xfrm6_mode_ro.c
+++ b/net/ipv6/xfrm6_mode_ro.c
@@ -54,7 +54,7 @@ static int xfrm6_ro_output(struct xfrm_state *x, struct sk_buff *skb)
 
 	hdr_len = x->type->hdr_offset(x, skb, &prevhdr);
 	skb->nh.raw = prevhdr - x->props.header_len;
-	skb->h.raw = skb->data + hdr_len;
+	skb_set_transport_header(skb, hdr_len);
 	memmove(skb->data, iph, hdr_len);
 	return 0;
 }
diff --git a/net/ipv6/xfrm6_mode_transport.c b/net/ipv6/xfrm6_mode_transport.c
index cae6cacd58c4..0134d74ef087 100644
--- a/net/ipv6/xfrm6_mode_transport.c
+++ b/net/ipv6/xfrm6_mode_transport.c
@@ -36,7 +36,7 @@ static int xfrm6_transport_output(struct xfrm_state *x, struct sk_buff *skb)
 
 	hdr_len = x->type->hdr_offset(x, skb, &prevhdr);
 	skb->nh.raw = prevhdr - x->props.header_len;
-	skb->h.raw = skb->data + hdr_len;
+	skb_set_transport_header(skb, hdr_len);
 	memmove(skb->data, iph, hdr_len);
 	return 0;
 }
-- 
cgit v1.2.3


From aa8223c7bb0b05183e1737881ed21827aa5b9e73 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Tue, 10 Apr 2007 21:04:22 -0700
Subject: [SK_BUFF]: Introduce tcp_hdr(), remove skb->h.th

Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/atl1/atl1_main.c   |  7 ++++---
 drivers/net/bnx2.c             |  8 ++++----
 drivers/net/chelsio/sge.c      |  2 +-
 drivers/net/cxgb3/sge.c        |  2 +-
 drivers/net/e1000/e1000_main.c | 11 ++++++-----
 drivers/net/ioc3-eth.c         |  2 +-
 drivers/net/ixgb/ixgb_main.c   |  7 ++++---
 drivers/net/mv643xx_eth.c      |  2 +-
 drivers/net/tg3.c              | 15 +++++++--------
 drivers/s390/net/qeth_eddp.c   |  2 +-
 drivers/s390/net/qeth_tso.h    |  4 ++--
 include/linux/skbuff.h         |  1 -
 include/linux/tcp.h            |  9 +++++++--
 include/net/tcp.h              |  2 +-
 include/net/tcp_ecn.h          |  6 +++---
 net/ipv4/ip_output.c           |  4 ++--
 net/ipv4/syncookies.c          | 36 ++++++++++++++++++------------------
 net/ipv4/tcp.c                 | 22 +++++++++++-----------
 net/ipv4/tcp_input.c           | 28 +++++++++++++++-------------
 net/ipv4/tcp_ipv4.c            | 32 ++++++++++++++++----------------
 net/ipv4/tcp_minisocks.c       |  9 +++++----
 net/ipv4/tcp_output.c          | 13 ++++++++-----
 net/ipv6/tcp_ipv6.c            | 32 ++++++++++++++++----------------
 23 files changed, 134 insertions(+), 122 deletions(-)

(limited to 'net/ipv4/tcp_input.c')

diff --git a/drivers/net/atl1/atl1_main.c b/drivers/net/atl1/atl1_main.c
index 8d5994751e2e..d60c2217332c 100644
--- a/drivers/net/atl1/atl1_main.c
+++ b/drivers/net/atl1/atl1_main.c
@@ -1298,9 +1298,10 @@ static int atl1_tso(struct atl1_adapter *adapter, struct sk_buff *skb,
 
 			iph->tot_len = 0;
 			iph->check = 0;
-			skb->h.th->check = ~csum_tcpudp_magic(iph->saddr,
-							      iph->daddr, 0,
-							      IPPROTO_TCP, 0);
+			tcp_hdr(skb)->check = ~csum_tcpudp_magic(iph->saddr,
+								 iph->daddr, 0,
+								 IPPROTO_TCP,
+								 0);
 			ipofst = skb_network_offset(skb);
 			if (ipofst != ENET_HEADER_SIZE) /* 802.3 frame */
 				tso->tsopl |= 1 << TSO_PARAM_ETHTYPE_SHIFT;
diff --git a/drivers/net/bnx2.c b/drivers/net/bnx2.c
index 73512fb16452..7e7b5f344030 100644
--- a/drivers/net/bnx2.c
+++ b/drivers/net/bnx2.c
@@ -4524,7 +4524,7 @@ bnx2_start_xmit(struct sk_buff *skb, struct net_device *dev)
 		vlan_tag_flags |= TX_BD_FLAGS_SW_LSO;
 
 		tcp_opt_len = 0;
-		if (skb->h.th->doff > 5)
+		if (tcp_hdr(skb)->doff > 5)
 			tcp_opt_len = tcp_optlen(skb);
 
 		ip_tcp_len = ip_hdrlen(skb) + sizeof(struct tcphdr);
@@ -4532,9 +4532,9 @@ bnx2_start_xmit(struct sk_buff *skb, struct net_device *dev)
 		iph = ip_hdr(skb);
 		iph->check = 0;
 		iph->tot_len = htons(mss + ip_tcp_len + tcp_opt_len);
-		skb->h.th->check = ~csum_tcpudp_magic(iph->saddr, iph->daddr,
-						      0, IPPROTO_TCP, 0);
-
+		tcp_hdr(skb)->check = ~csum_tcpudp_magic(iph->saddr,
+							 iph->daddr, 0,
+							 IPPROTO_TCP, 0);
 		if (tcp_opt_len || (iph->ihl > 5)) {
 			vlan_tag_flags |= ((iph->ihl - 5) +
 					   (tcp_opt_len >> 2)) << 8;
diff --git a/drivers/net/chelsio/sge.c b/drivers/net/chelsio/sge.c
index a4204dff3636..43e92f9f0bcd 100644
--- a/drivers/net/chelsio/sge.c
+++ b/drivers/net/chelsio/sge.c
@@ -1872,7 +1872,7 @@ int t1_start_xmit(struct sk_buff *skb, struct net_device *dev)
 		hdr->opcode = CPL_TX_PKT_LSO;
 		hdr->ip_csum_dis = hdr->l4_csum_dis = 0;
 		hdr->ip_hdr_words = ip_hdr(skb)->ihl;
-		hdr->tcp_hdr_words = skb->h.th->doff;
+		hdr->tcp_hdr_words = tcp_hdr(skb)->doff;
 		hdr->eth_type_mss = htons(MK_ETH_TYPE_MSS(eth_type,
 							  skb_shinfo(skb)->gso_size));
 		hdr->len = htonl(skb->len - sizeof(*hdr));
diff --git a/drivers/net/cxgb3/sge.c b/drivers/net/cxgb3/sge.c
index d38b1bcd138e..a70fe9145a2e 100644
--- a/drivers/net/cxgb3/sge.c
+++ b/drivers/net/cxgb3/sge.c
@@ -901,7 +901,7 @@ static void write_tx_pkt_wr(struct adapter *adap, struct sk_buff *skb,
 		    CPL_ETH_II : CPL_ETH_II_VLAN;
 		tso_info |= V_LSO_ETH_TYPE(eth_type) |
 		    V_LSO_IPHDR_WORDS(ip_hdr(skb)->ihl) |
-		    V_LSO_TCPHDR_WORDS(skb->h.th->doff);
+		    V_LSO_TCPHDR_WORDS(tcp_hdr(skb)->doff);
 		hdr->lso_info = htonl(tso_info);
 		flits = 3;
 	} else {
diff --git a/drivers/net/e1000/e1000_main.c b/drivers/net/e1000/e1000_main.c
index 4572fbba50f9..e86deb2ef823 100644
--- a/drivers/net/e1000/e1000_main.c
+++ b/drivers/net/e1000/e1000_main.c
@@ -2893,14 +2893,15 @@ e1000_tso(struct e1000_adapter *adapter, struct e1000_tx_ring *tx_ring,
 			struct iphdr *iph = ip_hdr(skb);
 			iph->tot_len = 0;
 			iph->check = 0;
-			skb->h.th->check = ~csum_tcpudp_magic(iph->saddr,
-							      iph->daddr, 0,
-							      IPPROTO_TCP, 0);
+			tcp_hdr(skb)->check = ~csum_tcpudp_magic(iph->saddr,
+								 iph->daddr, 0,
+								 IPPROTO_TCP,
+								 0);
 			cmd_length = E1000_TXD_CMD_IP;
 			ipcse = skb_transport_offset(skb) - 1;
 		} else if (skb->protocol == htons(ETH_P_IPV6)) {
 			ipv6_hdr(skb)->payload_len = 0;
-			skb->h.th->check =
+			tcp_hdr(skb)->check =
 				~csum_ipv6_magic(&ipv6_hdr(skb)->saddr,
 						 &ipv6_hdr(skb)->daddr,
 						 0, IPPROTO_TCP, 0);
@@ -2909,7 +2910,7 @@ e1000_tso(struct e1000_adapter *adapter, struct e1000_tx_ring *tx_ring,
 		ipcss = skb_network_offset(skb);
 		ipcso = (void *)&(ip_hdr(skb)->check) - (void *)skb->data;
 		tucss = skb_transport_offset(skb);
-		tucso = (void *)&(skb->h.th->check) - (void *)skb->data;
+		tucso = (void *)&(tcp_hdr(skb)->check) - (void *)skb->data;
 		tucse = 0;
 
 		cmd_length |= (E1000_TXD_CMD_DEXT | E1000_TXD_CMD_TSE |
diff --git a/drivers/net/ioc3-eth.c b/drivers/net/ioc3-eth.c
index ba012e10d79a..bc62e770a256 100644
--- a/drivers/net/ioc3-eth.c
+++ b/drivers/net/ioc3-eth.c
@@ -1426,7 +1426,7 @@ static int ioc3_start_xmit(struct sk_buff *skb, struct net_device *dev)
 		}
 		if (proto == IPPROTO_TCP) {
 			csoff += offsetof(struct tcphdr, check);
-			skb->h.th->check = csum;
+			tcp_hdr(skb)->check = csum;
 		}
 
 		w0 = ETXD_DOCHECKSUM | (csoff << ETXD_CHKOFF_SHIFT);
diff --git a/drivers/net/ixgb/ixgb_main.c b/drivers/net/ixgb/ixgb_main.c
index 96550d681623..e729ced52dc3 100644
--- a/drivers/net/ixgb/ixgb_main.c
+++ b/drivers/net/ixgb/ixgb_main.c
@@ -1195,13 +1195,14 @@ ixgb_tso(struct ixgb_adapter *adapter, struct sk_buff *skb)
 		iph = ip_hdr(skb);
 		iph->tot_len = 0;
 		iph->check = 0;
-		skb->h.th->check = ~csum_tcpudp_magic(iph->saddr, iph->daddr,
-						      0, IPPROTO_TCP, 0);
+		tcp_hdr(skb)->check = ~csum_tcpudp_magic(iph->saddr,
+							 iph->daddr, 0,
+							 IPPROTO_TCP, 0);
 		ipcss = skb_network_offset(skb);
 		ipcso = (void *)&(iph->check) - (void *)skb->data;
 		ipcse = skb_transport_offset(skb) - 1;
 		tucss = skb_transport_offset(skb);
-		tucso = (void *)&(skb->h.th->check) - (void *)skb->data;
+		tucso = (void *)&(tcp_hdr(skb)->check) - (void *)skb->data;
 		tucse = 0;
 
 		i = adapter->tx_ring.next_to_use;
diff --git a/drivers/net/mv643xx_eth.c b/drivers/net/mv643xx_eth.c
index 43723839e934..ab15ecd4b3d6 100644
--- a/drivers/net/mv643xx_eth.c
+++ b/drivers/net/mv643xx_eth.c
@@ -1169,7 +1169,7 @@ static void eth_tx_submit_descs_for_skb(struct mv643xx_private *mp,
 			desc->l4i_chk = udp_hdr(skb)->check;
 			break;
 		case IPPROTO_TCP:
-			desc->l4i_chk = skb->h.th->check;
+			desc->l4i_chk = tcp_hdr(skb)->check;
 			break;
 		default:
 			BUG();
diff --git a/drivers/net/tg3.c b/drivers/net/tg3.c
index 7ca30d76bf6f..414365c3198d 100644
--- a/drivers/net/tg3.c
+++ b/drivers/net/tg3.c
@@ -3922,7 +3922,7 @@ static int tg3_start_xmit(struct sk_buff *skb, struct net_device *dev)
 		base_flags |= (TXD_FLAG_CPU_PRE_DMA |
 			       TXD_FLAG_CPU_POST_DMA);
 
-		skb->h.th->check = 0;
+		tcp_hdr(skb)->check = 0;
 
 	}
 	else if (skb->ip_summed == CHECKSUM_PARTIAL)
@@ -4080,14 +4080,13 @@ static int tg3_start_xmit_dma_bug(struct sk_buff *skb, struct net_device *dev)
 		iph->check = 0;
 		iph->tot_len = htons(mss + hdr_len);
 		if (tp->tg3_flags2 & TG3_FLG2_HW_TSO) {
-			skb->h.th->check = 0;
+			tcp_hdr(skb)->check = 0;
 			base_flags &= ~TXD_FLAG_TCPUDP_CSUM;
-		}
-		else {
-			skb->h.th->check = ~csum_tcpudp_magic(iph->saddr,
-							      iph->daddr, 0,
-							      IPPROTO_TCP, 0);
-		}
+		} else
+			tcp_hdr(skb)->check = ~csum_tcpudp_magic(iph->saddr,
+								 iph->daddr, 0,
+								 IPPROTO_TCP,
+								 0);
 
 		if ((tp->tg3_flags2 & TG3_FLG2_HW_TSO) ||
 		    (GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5705)) {
diff --git a/drivers/s390/net/qeth_eddp.c b/drivers/s390/net/qeth_eddp.c
index 273f1745a009..b8e84674e170 100644
--- a/drivers/s390/net/qeth_eddp.c
+++ b/drivers/s390/net/qeth_eddp.c
@@ -416,7 +416,7 @@ __qeth_eddp_fill_context_tcp(struct qeth_eddp_context *ctx,
                        eddp->skb_offset += VLAN_HLEN;
 #endif /* CONFIG_QETH_VLAN */
        }
-	tcph = eddp->skb->h.th;
+	tcph = tcp_hdr(eddp->skb);
 	while (eddp->skb_offset < eddp->skb->len) {
 		data_len = min((int)skb_shinfo(eddp->skb)->gso_size,
 			       (int)(eddp->skb->len - eddp->skb_offset));
diff --git a/drivers/s390/net/qeth_tso.h b/drivers/s390/net/qeth_tso.h
index 4040bdd8c327..c20e923cf9ad 100644
--- a/drivers/s390/net/qeth_tso.h
+++ b/drivers/s390/net/qeth_tso.h
@@ -41,7 +41,7 @@ qeth_tso_fill_header(struct qeth_card *card, struct sk_buff *skb)
 
 	hdr  = (struct qeth_hdr_tso *) skb->data;
 	iph  = ip_hdr(skb);
-	tcph = skb->h.th;
+	tcph = tcp_hdr(skb);
 	/*fix header to TSO values ...*/
 	hdr->hdr.hdr.l3.id = QETH_HEADER_TYPE_TSO;
 	/*set values which are fix for the first approach ...*/
@@ -65,7 +65,7 @@ qeth_tso_set_tcpip_header(struct qeth_card *card, struct sk_buff *skb)
 {
 	struct iphdr *iph    = ip_hdr(skb);
 	struct ipv6hdr *ip6h = ipv6_hdr(skb);
-	struct tcphdr *tcph  = skb->h.th;
+	struct tcphdr *tcph  = tcp_hdr(skb);
 
 	tcph->check = 0;
 	if (skb->protocol == ETH_P_IPV6) {
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index e580416de78a..8f158d66d2a8 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -237,7 +237,6 @@ struct sk_buff {
 	/* 4 byte hole on 64 bit*/
 
 	union {
-		struct tcphdr	*th;
 		struct iphdr	*ipiph;
 		struct ipv6hdr	*ipv6h;
 		unsigned char	*raw;
diff --git a/include/linux/tcp.h b/include/linux/tcp.h
index 244ae0dacf4a..911d937fb4c1 100644
--- a/include/linux/tcp.h
+++ b/include/linux/tcp.h
@@ -178,14 +178,19 @@ struct tcp_md5sig {
 #include <net/inet_connection_sock.h>
 #include <net/inet_timewait_sock.h>
 
+static inline struct tcphdr *tcp_hdr(const struct sk_buff *skb)
+{
+	return (struct tcphdr *)skb->h.raw;
+}
+
 static inline unsigned int tcp_hdrlen(const struct sk_buff *skb)
 {
-	return skb->h.th->doff * 4;
+	return tcp_hdr(skb)->doff * 4;
 }
 
 static inline unsigned int tcp_optlen(const struct sk_buff *skb)
 {
-	return (skb->h.th->doff - 5) * 4;
+	return (tcp_hdr(skb)->doff - 5) * 4;
 }
 
 /* This defines a selective acknowledgement block. */
diff --git a/include/net/tcp.h b/include/net/tcp.h
index 6dacc352dcf1..af9273204cfd 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -984,7 +984,7 @@ static inline void tcp_openreq_init(struct request_sock *req,
 	ireq->wscale_ok = rx_opt->wscale_ok;
 	ireq->acked = 0;
 	ireq->ecn_ok = 0;
-	ireq->rmt_port = skb->h.th->source;
+	ireq->rmt_port = tcp_hdr(skb)->source;
 }
 
 extern void tcp_enter_memory_pressure(void);
diff --git a/include/net/tcp_ecn.h b/include/net/tcp_ecn.h
index 4629d77173f2..b5f7c6ac0880 100644
--- a/include/net/tcp_ecn.h
+++ b/include/net/tcp_ecn.h
@@ -54,7 +54,7 @@ static inline void TCP_ECN_send(struct sock *sk, struct tcp_sock *tp,
 			INET_ECN_xmit(sk);
 			if (tp->ecn_flags&TCP_ECN_QUEUE_CWR) {
 				tp->ecn_flags &= ~TCP_ECN_QUEUE_CWR;
-				skb->h.th->cwr = 1;
+				tcp_hdr(skb)->cwr = 1;
 				skb_shinfo(skb)->gso_type |= SKB_GSO_TCP_ECN;
 			}
 		} else {
@@ -62,7 +62,7 @@ static inline void TCP_ECN_send(struct sock *sk, struct tcp_sock *tp,
 			INET_ECN_dontxmit(sk);
 		}
 		if (tp->ecn_flags & TCP_ECN_DEMAND_CWR)
-			skb->h.th->ece = 1;
+			tcp_hdr(skb)->ece = 1;
 	}
 }
 
@@ -70,7 +70,7 @@ static inline void TCP_ECN_send(struct sock *sk, struct tcp_sock *tp,
 
 static inline void TCP_ECN_accept_cwr(struct tcp_sock *tp, struct sk_buff *skb)
 {
-	if (skb->h.th->cwr)
+	if (tcp_hdr(skb)->cwr)
 		tp->ecn_flags &= ~TCP_ECN_DEMAND_CWR;
 }
 
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index 6d92358fc513..602268661eb3 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -1352,8 +1352,8 @@ void ip_send_reply(struct sock *sk, struct sk_buff *skb, struct ip_reply_arg *ar
 						.tos = RT_TOS(ip_hdr(skb)->tos) } },
 				    /* Not quite clean, but right. */
 				    .uli_u = { .ports =
-					       { .sport = skb->h.th->dest,
-						 .dport = skb->h.th->source } },
+					       { .sport = tcp_hdr(skb)->dest,
+						 .dport = tcp_hdr(skb)->source } },
 				    .proto = sk->sk_protocol };
 		security_skb_classify_flow(skb, &fl);
 		if (ip_route_output_key(&rt, &fl))
diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c
index 261607178491..2da1be0589a9 100644
--- a/net/ipv4/syncookies.c
+++ b/net/ipv4/syncookies.c
@@ -125,10 +125,11 @@ static __u16 const msstab[] = {
 __u32 cookie_v4_init_sequence(struct sock *sk, struct sk_buff *skb, __u16 *mssp)
 {
 	struct tcp_sock *tp = tcp_sk(sk);
+	const struct iphdr *iph = ip_hdr(skb);
+	const struct tcphdr *th = tcp_hdr(skb);
 	int mssind;
 	const __u16 mss = *mssp;
 
-
 	tp->last_synq_overflow = jiffies;
 
 	/* XXX sort msstab[] by probability?  Binary search? */
@@ -138,9 +139,8 @@ __u32 cookie_v4_init_sequence(struct sock *sk, struct sk_buff *skb, __u16 *mssp)
 
 	NET_INC_STATS_BH(LINUX_MIB_SYNCOOKIESSENT);
 
-	return secure_tcp_syn_cookie(ip_hdr(skb)->saddr, ip_hdr(skb)->daddr,
-				     skb->h.th->source, skb->h.th->dest,
-				     ntohl(skb->h.th->seq),
+	return secure_tcp_syn_cookie(iph->saddr, iph->daddr,
+				     th->source, th->dest, ntohl(th->seq),
 				     jiffies / (HZ * 60), mssind);
 }
 
@@ -157,14 +157,13 @@ __u32 cookie_v4_init_sequence(struct sock *sk, struct sk_buff *skb, __u16 *mssp)
  */
 static inline int cookie_check(struct sk_buff *skb, __u32 cookie)
 {
-	__u32 seq;
-	__u32 mssind;
-
-	seq = ntohl(skb->h.th->seq)-1;
-	mssind = check_tcp_syn_cookie(cookie,
-				      ip_hdr(skb)->saddr, ip_hdr(skb)->daddr,
-				      skb->h.th->source, skb->h.th->dest,
-				      seq, jiffies / (HZ * 60), COUNTER_TRIES);
+	const struct iphdr *iph = ip_hdr(skb);
+	const struct tcphdr *th = tcp_hdr(skb);
+	__u32 seq = ntohl(th->seq) - 1;
+	__u32 mssind = check_tcp_syn_cookie(cookie, iph->saddr, iph->daddr,
+					    th->source, th->dest, seq,
+					    jiffies / (HZ * 60),
+					    COUNTER_TRIES);
 
 	return mssind < NUM_MSS ? msstab[mssind] + 1 : 0;
 }
@@ -191,14 +190,15 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb,
 	struct inet_request_sock *ireq;
 	struct tcp_request_sock *treq;
 	struct tcp_sock *tp = tcp_sk(sk);
-	__u32 cookie = ntohl(skb->h.th->ack_seq) - 1;
+	const struct tcphdr *th = tcp_hdr(skb);
+	__u32 cookie = ntohl(th->ack_seq) - 1;
 	struct sock *ret = sk;
 	struct request_sock *req;
 	int mss;
 	struct rtable *rt;
 	__u8 rcv_wscale;
 
-	if (!sysctl_tcp_syncookies || !skb->h.th->ack)
+	if (!sysctl_tcp_syncookies || !th->ack)
 		goto out;
 
 	if (time_after(jiffies, tp->last_synq_overflow + TCP_TIMEOUT_INIT) ||
@@ -220,10 +220,10 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb,
 	}
 	ireq = inet_rsk(req);
 	treq = tcp_rsk(req);
-	treq->rcv_isn		= ntohl(skb->h.th->seq) - 1;
+	treq->rcv_isn		= ntohl(th->seq) - 1;
 	treq->snt_isn		= cookie;
 	req->mss		= mss;
-	ireq->rmt_port		= skb->h.th->source;
+	ireq->rmt_port		= th->source;
 	ireq->loc_addr		= ip_hdr(skb)->daddr;
 	ireq->rmt_addr		= ip_hdr(skb)->saddr;
 	ireq->opt		= NULL;
@@ -261,8 +261,8 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb,
 						.tos = RT_CONN_FLAGS(sk) } },
 				    .proto = IPPROTO_TCP,
 				    .uli_u = { .ports =
-					       { .sport = skb->h.th->dest,
-						 .dport = skb->h.th->source } } };
+					       { .sport = th->dest,
+						 .dport = th->source } } };
 		security_req_classify_flow(req, &fl);
 		if (ip_route_output_key(&rt, &fl)) {
 			reqsk_free(req);
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 689f9330f1b9..f832f3c33ab1 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -425,7 +425,7 @@ int tcp_ioctl(struct sock *sk, int cmd, unsigned long arg)
 			/* Subtract 1, if FIN is in queue. */
 			if (answ && !skb_queue_empty(&sk->sk_receive_queue))
 				answ -=
-		       ((struct sk_buff *)sk->sk_receive_queue.prev)->h.th->fin;
+		       tcp_hdr((struct sk_buff *)sk->sk_receive_queue.prev)->fin;
 		} else
 			answ = tp->urg_seq - tp->copied_seq;
 		release_sock(sk);
@@ -1016,9 +1016,9 @@ static inline struct sk_buff *tcp_recv_skb(struct sock *sk, u32 seq, u32 *off)
 
 	skb_queue_walk(&sk->sk_receive_queue, skb) {
 		offset = seq - TCP_SKB_CB(skb)->seq;
-		if (skb->h.th->syn)
+		if (tcp_hdr(skb)->syn)
 			offset--;
-		if (offset < skb->len || skb->h.th->fin) {
+		if (offset < skb->len || tcp_hdr(skb)->fin) {
 			*off = offset;
 			return skb;
 		}
@@ -1070,7 +1070,7 @@ int tcp_read_sock(struct sock *sk, read_descriptor_t *desc,
 			if (offset != skb->len)
 				break;
 		}
-		if (skb->h.th->fin) {
+		if (tcp_hdr(skb)->fin) {
 			sk_eat_skb(sk, skb, 0);
 			++seq;
 			break;
@@ -1174,11 +1174,11 @@ int tcp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
 				break;
 			}
 			offset = *seq - TCP_SKB_CB(skb)->seq;
-			if (skb->h.th->syn)
+			if (tcp_hdr(skb)->syn)
 				offset--;
 			if (offset < skb->len)
 				goto found_ok_skb;
-			if (skb->h.th->fin)
+			if (tcp_hdr(skb)->fin)
 				goto found_fin_ok;
 			BUG_TRAP(flags & MSG_PEEK);
 			skb = skb->next;
@@ -1394,7 +1394,7 @@ skip_copy:
 		if (used + offset < skb->len)
 			continue;
 
-		if (skb->h.th->fin)
+		if (tcp_hdr(skb)->fin)
 			goto found_fin_ok;
 		if (!(flags & MSG_PEEK)) {
 			sk_eat_skb(sk, skb, copied_early);
@@ -1563,7 +1563,7 @@ void tcp_close(struct sock *sk, long timeout)
 	 */
 	while ((skb = __skb_dequeue(&sk->sk_receive_queue)) != NULL) {
 		u32 len = TCP_SKB_CB(skb)->end_seq - TCP_SKB_CB(skb)->seq -
-			  skb->h.th->fin;
+			  tcp_hdr(skb)->fin;
 		data_was_unread += len;
 		__kfree_skb(skb);
 	}
@@ -2170,7 +2170,7 @@ struct sk_buff *tcp_tso_segment(struct sk_buff *skb, int features)
 	if (!pskb_may_pull(skb, sizeof(*th)))
 		goto out;
 
-	th = skb->h.th;
+	th = tcp_hdr(skb);
 	thlen = th->doff * 4;
 	if (thlen < sizeof(*th))
 		goto out;
@@ -2210,7 +2210,7 @@ struct sk_buff *tcp_tso_segment(struct sk_buff *skb, int features)
 	delta = htonl(oldlen + (thlen + len));
 
 	skb = segs;
-	th = skb->h.th;
+	th = tcp_hdr(skb);
 	seq = ntohl(th->seq);
 
 	do {
@@ -2224,7 +2224,7 @@ struct sk_buff *tcp_tso_segment(struct sk_buff *skb, int features)
 
 		seq += len;
 		skb = skb->next;
-		th = skb->h.th;
+		th = tcp_hdr(skb);
 
 		th->seq = htonl(seq);
 		th->cwr = 0;
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 2776a8b01339..c1ce36237380 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -148,7 +148,7 @@ static void tcp_measure_rcv_mss(struct sock *sk,
 		     * to handle super-low mtu links fairly.
 		     */
 		    (len >= TCP_MIN_MSS + sizeof(struct tcphdr) &&
-		     !(tcp_flag_word(skb->h.th)&TCP_REMNANT))) {
+		     !(tcp_flag_word(tcp_hdr(skb)) & TCP_REMNANT))) {
 			/* Subtract also invariant (if peer is RFC compliant),
 			 * tcp header plus fixed timestamp option length.
 			 * Resulting "len" is MSS free of SACK jitter.
@@ -2559,9 +2559,9 @@ static int tcp_ack_update_window(struct sock *sk, struct tcp_sock *tp,
 				 struct sk_buff *skb, u32 ack, u32 ack_seq)
 {
 	int flag = 0;
-	u32 nwin = ntohs(skb->h.th->window);
+	u32 nwin = ntohs(tcp_hdr(skb)->window);
 
-	if (likely(!skb->h.th->syn))
+	if (likely(!tcp_hdr(skb)->syn))
 		nwin <<= tp->rx_opt.snd_wscale;
 
 	if (tcp_may_update_window(tp, ack, ack_seq, nwin)) {
@@ -2766,7 +2766,7 @@ static int tcp_ack(struct sock *sk, struct sk_buff *skb, int flag)
 		if (TCP_SKB_CB(skb)->sacked)
 			flag |= tcp_sacktag_write_queue(sk, skb, prior_snd_una);
 
-		if (TCP_ECN_rcv_ecn_echo(tp, skb->h.th))
+		if (TCP_ECN_rcv_ecn_echo(tp, tcp_hdr(skb)))
 			flag |= FLAG_ECE;
 
 		tcp_ca_event(sk, CA_EVENT_SLOW_ACK);
@@ -2833,7 +2833,7 @@ uninteresting_ack:
 void tcp_parse_options(struct sk_buff *skb, struct tcp_options_received *opt_rx, int estab)
 {
 	unsigned char *ptr;
-	struct tcphdr *th = skb->h.th;
+	struct tcphdr *th = tcp_hdr(skb);
 	int length=(th->doff*4)-sizeof(struct tcphdr);
 
 	ptr = (unsigned char *)(th + 1);
@@ -2995,7 +2995,7 @@ static inline void tcp_replace_ts_recent(struct tcp_sock *tp, u32 seq)
 static int tcp_disordered_ack(const struct sock *sk, const struct sk_buff *skb)
 {
 	struct tcp_sock *tp = tcp_sk(sk);
-	struct tcphdr *th = skb->h.th;
+	struct tcphdr *th = tcp_hdr(skb);
 	u32 seq = TCP_SKB_CB(skb)->seq;
 	u32 ack = TCP_SKB_CB(skb)->ack_seq;
 
@@ -3357,8 +3357,8 @@ static void tcp_ofo_queue(struct sock *sk)
 		__skb_unlink(skb, &tp->out_of_order_queue);
 		__skb_queue_tail(&sk->sk_receive_queue, skb);
 		tp->rcv_nxt = TCP_SKB_CB(skb)->end_seq;
-		if (skb->h.th->fin)
-			tcp_fin(skb, sk, skb->h.th);
+		if (tcp_hdr(skb)->fin)
+			tcp_fin(skb, sk, tcp_hdr(skb));
 	}
 }
 
@@ -3366,7 +3366,7 @@ static int tcp_prune_queue(struct sock *sk);
 
 static void tcp_data_queue(struct sock *sk, struct sk_buff *skb)
 {
-	struct tcphdr *th = skb->h.th;
+	struct tcphdr *th = tcp_hdr(skb);
 	struct tcp_sock *tp = tcp_sk(sk);
 	int eaten = -1;
 
@@ -3605,7 +3605,7 @@ tcp_collapse(struct sock *sk, struct sk_buff_head *list,
 		 * - bloated or contains data before "start" or
 		 *   overlaps to the next one.
 		 */
-		if (!skb->h.th->syn && !skb->h.th->fin &&
+		if (!tcp_hdr(skb)->syn && !tcp_hdr(skb)->fin &&
 		    (tcp_win_from_space(skb->truesize) > skb->len ||
 		     before(TCP_SKB_CB(skb)->seq, start) ||
 		     (skb->next != tail &&
@@ -3616,7 +3616,7 @@ tcp_collapse(struct sock *sk, struct sk_buff_head *list,
 		start = TCP_SKB_CB(skb)->end_seq;
 		skb = skb->next;
 	}
-	if (skb == tail || skb->h.th->syn || skb->h.th->fin)
+	if (skb == tail || tcp_hdr(skb)->syn || tcp_hdr(skb)->fin)
 		return;
 
 	while (before(start, end)) {
@@ -3665,7 +3665,9 @@ tcp_collapse(struct sock *sk, struct sk_buff_head *list,
 				__kfree_skb(skb);
 				NET_INC_STATS_BH(LINUX_MIB_TCPRCVCOLLAPSED);
 				skb = next;
-				if (skb == tail || skb->h.th->syn || skb->h.th->fin)
+				if (skb == tail ||
+				    tcp_hdr(skb)->syn ||
+				    tcp_hdr(skb)->fin)
 					return;
 			}
 		}
@@ -4072,7 +4074,7 @@ static int tcp_dma_try_early_copy(struct sock *sk, struct sk_buff *skb, int hlen
 		tcp_rcv_space_adjust(sk);
 
 		if ((tp->ucopy.len == 0) ||
-		    (tcp_flag_word(skb->h.th) & TCP_FLAG_PSH) ||
+		    (tcp_flag_word(tcp_hdr(skb)) & TCP_FLAG_PSH) ||
 		    (atomic_read(&sk->sk_rmem_alloc) > (sk->sk_rcvbuf >> 1))) {
 			tp->ucopy.wakeup = 1;
 			sk->sk_data_ready(sk, 0);
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index c146a02f8495..617a5e4ca010 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -127,8 +127,8 @@ static inline __u32 tcp_v4_init_sequence(struct sk_buff *skb)
 {
 	return secure_tcp_sequence_number(ip_hdr(skb)->daddr,
 					  ip_hdr(skb)->saddr,
-					  skb->h.th->dest,
-					  skb->h.th->source);
+					  tcp_hdr(skb)->dest,
+					  tcp_hdr(skb)->source);
 }
 
 int tcp_twsk_unique(struct sock *sk, struct sock *sktw, void *twp)
@@ -499,7 +499,7 @@ out:
 void tcp_v4_send_check(struct sock *sk, int len, struct sk_buff *skb)
 {
 	struct inet_sock *inet = inet_sk(sk);
-	struct tcphdr *th = skb->h.th;
+	struct tcphdr *th = tcp_hdr(skb);
 
 	if (skb->ip_summed == CHECKSUM_PARTIAL) {
 		th->check = ~tcp_v4_check(len, inet->saddr,
@@ -522,7 +522,7 @@ int tcp_v4_gso_send_check(struct sk_buff *skb)
 		return -EINVAL;
 
 	iph = ip_hdr(skb);
-	th = skb->h.th;
+	th = tcp_hdr(skb);
 
 	th->check = 0;
 	th->check = ~tcp_v4_check(skb->len, iph->saddr, iph->daddr, 0);
@@ -546,7 +546,7 @@ int tcp_v4_gso_send_check(struct sk_buff *skb)
 
 static void tcp_v4_send_reset(struct sock *sk, struct sk_buff *skb)
 {
-	struct tcphdr *th = skb->h.th;
+	struct tcphdr *th = tcp_hdr(skb);
 	struct {
 		struct tcphdr th;
 #ifdef CONFIG_TCP_MD5SIG
@@ -622,7 +622,7 @@ static void tcp_v4_send_ack(struct tcp_timewait_sock *twsk,
 			    struct sk_buff *skb, u32 seq, u32 ack,
 			    u32 win, u32 ts)
 {
-	struct tcphdr *th = skb->h.th;
+	struct tcphdr *th = tcp_hdr(skb);
 	struct {
 		struct tcphdr th;
 		__be32 opt[(TCPOLEN_TSTAMP_ALIGNED >> 2)
@@ -745,7 +745,7 @@ static int tcp_v4_send_synack(struct sock *sk, struct request_sock *req,
 	skb = tcp_make_synack(sk, dst, req);
 
 	if (skb) {
-		struct tcphdr *th = skb->h.th;
+		struct tcphdr *th = tcp_hdr(skb);
 
 		th->check = tcp_v4_check(skb->len,
 					 ireq->loc_addr,
@@ -781,7 +781,7 @@ static void syn_flood_warning(struct sk_buff *skb)
 		warntime = jiffies;
 		printk(KERN_INFO
 		       "possible SYN flooding on port %d. Sending cookies.\n",
-		       ntohs(skb->h.th->dest));
+		       ntohs(tcp_hdr(skb)->dest));
 	}
 }
 #endif
@@ -1134,7 +1134,7 @@ static int tcp_v4_inbound_md5_hash(struct sock *sk, struct sk_buff *skb)
 	__u8 *hash_location = NULL;
 	struct tcp_md5sig_key *hash_expected;
 	const struct iphdr *iph = ip_hdr(skb);
-	struct tcphdr *th = skb->h.th;
+	struct tcphdr *th = tcp_hdr(skb);
 	int length = (th->doff << 2) - sizeof(struct tcphdr);
 	int genhash;
 	unsigned char *ptr;
@@ -1327,7 +1327,7 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
 	ireq->rmt_addr = saddr;
 	ireq->opt = tcp_v4_save_options(sk, skb);
 	if (!want_cookie)
-		TCP_ECN_create_request(req, skb->h.th);
+		TCP_ECN_create_request(req, tcp_hdr(skb));
 
 	if (want_cookie) {
 #ifdef CONFIG_SYN_COOKIES
@@ -1375,7 +1375,7 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
 			LIMIT_NETDEBUG(KERN_DEBUG "TCP: drop open "
 				       "request from %u.%u.%u.%u/%u\n",
 				       NIPQUAD(saddr),
-				       ntohs(skb->h.th->source));
+				       ntohs(tcp_hdr(skb)->source));
 			dst_release(dst);
 			goto drop_and_free;
 		}
@@ -1481,7 +1481,7 @@ exit:
 
 static struct sock *tcp_v4_hnd_req(struct sock *sk, struct sk_buff *skb)
 {
-	struct tcphdr *th = skb->h.th;
+	struct tcphdr *th = tcp_hdr(skb);
 	const struct iphdr *iph = ip_hdr(skb);
 	struct sock *nsk;
 	struct request_sock **prev;
@@ -1556,7 +1556,7 @@ int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb)
 
 	if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */
 		TCP_CHECK_TIMER(sk);
-		if (tcp_rcv_established(sk, skb, skb->h.th, skb->len)) {
+		if (tcp_rcv_established(sk, skb, tcp_hdr(skb), skb->len)) {
 			rsk = sk;
 			goto reset;
 		}
@@ -1582,7 +1582,7 @@ int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb)
 	}
 
 	TCP_CHECK_TIMER(sk);
-	if (tcp_rcv_state_process(sk, skb, skb->h.th, skb->len)) {
+	if (tcp_rcv_state_process(sk, skb, tcp_hdr(skb), skb->len)) {
 		rsk = sk;
 		goto reset;
 	}
@@ -1625,7 +1625,7 @@ int tcp_v4_rcv(struct sk_buff *skb)
 	if (!pskb_may_pull(skb, sizeof(struct tcphdr)))
 		goto discard_it;
 
-	th = skb->h.th;
+	th = tcp_hdr(skb);
 
 	if (th->doff < sizeof(struct tcphdr) / 4)
 		goto bad_packet;
@@ -1640,7 +1640,7 @@ int tcp_v4_rcv(struct sk_buff *skb)
 	     tcp_v4_checksum_init(skb)))
 		goto bad_packet;
 
-	th = skb->h.th;
+	th = tcp_hdr(skb);
 	iph = ip_hdr(skb);
 	TCP_SKB_CB(skb)->seq = ntohl(th->seq);
 	TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin +
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index 463d2b24d2db..a12b08fca5ad 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -453,7 +453,8 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct request_sock *req,
 			newtp->rx_opt.snd_wscale = newtp->rx_opt.rcv_wscale = 0;
 			newtp->window_clamp = min(newtp->window_clamp, 65535U);
 		}
-		newtp->snd_wnd = ntohs(skb->h.th->window) << newtp->rx_opt.snd_wscale;
+		newtp->snd_wnd = (ntohs(tcp_hdr(skb)->window) <<
+				  newtp->rx_opt.snd_wscale);
 		newtp->max_window = newtp->snd_wnd;
 
 		if (newtp->rx_opt.tstamp_ok) {
@@ -488,7 +489,7 @@ struct sock *tcp_check_req(struct sock *sk,struct sk_buff *skb,
 			   struct request_sock *req,
 			   struct request_sock **prev)
 {
-	struct tcphdr *th = skb->h.th;
+	const struct tcphdr *th = tcp_hdr(skb);
 	__be32 flg = tcp_flag_word(th) & (TCP_FLAG_RST|TCP_FLAG_SYN|TCP_FLAG_ACK);
 	int paws_reject = 0;
 	struct tcp_options_received tmp_opt;
@@ -710,8 +711,8 @@ int tcp_child_process(struct sock *parent, struct sock *child,
 	int state = child->sk_state;
 
 	if (!sock_owned_by_user(child)) {
-		ret = tcp_rcv_state_process(child, skb, skb->h.th, skb->len);
-
+		ret = tcp_rcv_state_process(child, skb, tcp_hdr(skb),
+					    skb->len);
 		/* Wakeup parent, send SIGIO */
 		if (state == TCP_SYN_RECV && child->sk_state != state)
 			parent->sk_data_ready(parent, 0);
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index f19f5fb361b5..29c53fbb2204 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -465,11 +465,12 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
 		tcp_header_size += TCPOLEN_MD5SIG_ALIGNED;
 #endif
 
-	th = (struct tcphdr *) skb_push(skb, tcp_header_size);
-	skb->h.th = th;
+	skb_push(skb, tcp_header_size);
+	skb_reset_transport_header(skb);
 	skb_set_owner_w(skb, sk);
 
 	/* Build TCP header and checksum it. */
+	th = tcp_hdr(skb);
 	th->source		= inet->sport;
 	th->dest		= inet->dport;
 	th->seq			= htonl(tcb->seq);
@@ -524,7 +525,7 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
 		tp->af_specific->calc_md5_hash(md5_hash_location,
 					       md5,
 					       sk, NULL, NULL,
-					       skb->h.th,
+					       tcp_hdr(skb),
 					       sk->sk_protocol,
 					       skb->len);
 	}
@@ -2128,8 +2129,10 @@ struct sk_buff * tcp_make_synack(struct sock *sk, struct dst_entry *dst,
 	if (md5)
 		tcp_header_size += TCPOLEN_MD5SIG_ALIGNED;
 #endif
-	skb->h.th = th = (struct tcphdr *) skb_push(skb, tcp_header_size);
+	skb_push(skb, tcp_header_size);
+	skb_reset_transport_header(skb);
 
+	th = tcp_hdr(skb);
 	memset(th, 0, sizeof(struct tcphdr));
 	th->syn = 1;
 	th->ack = 1;
@@ -2183,7 +2186,7 @@ struct sk_buff * tcp_make_synack(struct sock *sk, struct dst_entry *dst,
 		tp->af_specific->calc_md5_hash(md5_hash_location,
 					       md5,
 					       NULL, dst, req,
-					       skb->h.th, sk->sk_protocol,
+					       tcp_hdr(skb), sk->sk_protocol,
 					       skb->len);
 	}
 #endif
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index c573353f21cd..4a55da079f5f 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -117,8 +117,8 @@ static __u32 tcp_v6_init_sequence(struct sk_buff *skb)
 {
 	return secure_tcpv6_sequence_number(ipv6_hdr(skb)->daddr.s6_addr32,
 					    ipv6_hdr(skb)->saddr.s6_addr32,
-					    skb->h.th->dest,
-					    skb->h.th->source);
+					    tcp_hdr(skb)->dest,
+					    tcp_hdr(skb)->source);
 }
 
 static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
@@ -509,7 +509,7 @@ static int tcp_v6_send_synack(struct sock *sk, struct request_sock *req,
 
 	skb = tcp_make_synack(sk, dst, req);
 	if (skb) {
-		struct tcphdr *th = skb->h.th;
+		struct tcphdr *th = tcp_hdr(skb);
 
 		th->check = tcp_v6_check(th, skb->len,
 					 &treq->loc_addr, &treq->rmt_addr,
@@ -838,7 +838,7 @@ static int tcp_v6_inbound_md5_hash (struct sock *sk, struct sk_buff *skb)
 	__u8 *hash_location = NULL;
 	struct tcp_md5sig_key *hash_expected;
 	struct ipv6hdr *ip6h = ipv6_hdr(skb);
-	struct tcphdr *th = skb->h.th;
+	struct tcphdr *th = tcp_hdr(skb);
 	int length = (th->doff << 2) - sizeof (*th);
 	int genhash;
 	u8 *ptr;
@@ -946,7 +946,7 @@ static struct timewait_sock_ops tcp6_timewait_sock_ops = {
 static void tcp_v6_send_check(struct sock *sk, int len, struct sk_buff *skb)
 {
 	struct ipv6_pinfo *np = inet6_sk(sk);
-	struct tcphdr *th = skb->h.th;
+	struct tcphdr *th = tcp_hdr(skb);
 
 	if (skb->ip_summed == CHECKSUM_PARTIAL) {
 		th->check = ~csum_ipv6_magic(&np->saddr, &np->daddr, len, IPPROTO_TCP,  0);
@@ -967,7 +967,7 @@ static int tcp_v6_gso_send_check(struct sk_buff *skb)
 		return -EINVAL;
 
 	ipv6h = ipv6_hdr(skb);
-	th = skb->h.th;
+	th = tcp_hdr(skb);
 
 	th->check = 0;
 	th->check = ~csum_ipv6_magic(&ipv6h->saddr, &ipv6h->daddr, skb->len,
@@ -979,7 +979,7 @@ static int tcp_v6_gso_send_check(struct sk_buff *skb)
 
 static void tcp_v6_send_reset(struct sock *sk, struct sk_buff *skb)
 {
-	struct tcphdr *th = skb->h.th, *t1;
+	struct tcphdr *th = tcp_hdr(skb), *t1;
 	struct sk_buff *buff;
 	struct flowi fl;
 	int tot_len = sizeof(*th);
@@ -1079,7 +1079,7 @@ static void tcp_v6_send_reset(struct sock *sk, struct sk_buff *skb)
 static void tcp_v6_send_ack(struct tcp_timewait_sock *tw,
 			    struct sk_buff *skb, u32 seq, u32 ack, u32 win, u32 ts)
 {
-	struct tcphdr *th = skb->h.th, *t1;
+	struct tcphdr *th = tcp_hdr(skb), *t1;
 	struct sk_buff *buff;
 	struct flowi fl;
 	int tot_len = sizeof(struct tcphdr);
@@ -1195,7 +1195,7 @@ static void tcp_v6_reqsk_send_ack(struct sk_buff *skb, struct request_sock *req)
 static struct sock *tcp_v6_hnd_req(struct sock *sk,struct sk_buff *skb)
 {
 	struct request_sock *req, **prev;
-	const struct tcphdr *th = skb->h.th;
+	const struct tcphdr *th = tcp_hdr(skb);
 	struct sock *nsk;
 
 	/* Find possible connection requests. */
@@ -1275,7 +1275,7 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
 	treq = inet6_rsk(req);
 	ipv6_addr_copy(&treq->rmt_addr, &ipv6_hdr(skb)->saddr);
 	ipv6_addr_copy(&treq->loc_addr, &ipv6_hdr(skb)->daddr);
-	TCP_ECN_create_request(req, skb->h.th);
+	TCP_ECN_create_request(req, tcp_hdr(skb));
 	treq->pktopts = NULL;
 	if (ipv6_opt_accepted(sk, skb) ||
 	    np->rxopt.bits.rxinfo || np->rxopt.bits.rxoinfo ||
@@ -1528,14 +1528,14 @@ out:
 static __sum16 tcp_v6_checksum_init(struct sk_buff *skb)
 {
 	if (skb->ip_summed == CHECKSUM_COMPLETE) {
-		if (!tcp_v6_check(skb->h.th, skb->len, &ipv6_hdr(skb)->saddr,
+		if (!tcp_v6_check(tcp_hdr(skb), skb->len, &ipv6_hdr(skb)->saddr,
 				  &ipv6_hdr(skb)->daddr, skb->csum)) {
 			skb->ip_summed = CHECKSUM_UNNECESSARY;
 			return 0;
 		}
 	}
 
-	skb->csum = ~csum_unfold(tcp_v6_check(skb->h.th, skb->len,
+	skb->csum = ~csum_unfold(tcp_v6_check(tcp_hdr(skb), skb->len,
 					      &ipv6_hdr(skb)->saddr,
 					      &ipv6_hdr(skb)->daddr, 0));
 
@@ -1601,7 +1601,7 @@ static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb)
 
 	if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */
 		TCP_CHECK_TIMER(sk);
-		if (tcp_rcv_established(sk, skb, skb->h.th, skb->len))
+		if (tcp_rcv_established(sk, skb, tcp_hdr(skb), skb->len))
 			goto reset;
 		TCP_CHECK_TIMER(sk);
 		if (opt_skb)
@@ -1632,7 +1632,7 @@ static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb)
 	}
 
 	TCP_CHECK_TIMER(sk);
-	if (tcp_rcv_state_process(sk, skb, skb->h.th, skb->len))
+	if (tcp_rcv_state_process(sk, skb, tcp_hdr(skb), skb->len))
 		goto reset;
 	TCP_CHECK_TIMER(sk);
 	if (opt_skb)
@@ -1698,7 +1698,7 @@ static int tcp_v6_rcv(struct sk_buff **pskb)
 	if (!pskb_may_pull(skb, sizeof(struct tcphdr)))
 		goto discard_it;
 
-	th = skb->h.th;
+	th = tcp_hdr(skb);
 
 	if (th->doff < sizeof(struct tcphdr)/4)
 		goto bad_packet;
@@ -1709,7 +1709,7 @@ static int tcp_v6_rcv(struct sk_buff **pskb)
 	     tcp_v6_checksum_init(skb)))
 		goto bad_packet;
 
-	th = skb->h.th;
+	th = tcp_hdr(skb);
 	TCP_SKB_CB(skb)->seq = ntohl(th->seq);
 	TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin +
 				    skb->len - th->doff*4);
-- 
cgit v1.2.3


From 9c70220b73908f64792422a2c39c593c4792f2c5 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Wed, 25 Apr 2007 18:04:18 -0700
Subject: [SK_BUFF]: Introduce skb_transport_header(skb)

For the places where we need a pointer to the transport header, it is
still legal to touch skb->h.raw directly if just adding to,
subtracting from or setting it to another layer header.

Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/appletalk/ltpc.c    |  7 +++++--
 drivers/net/cxgb3/sge.c         |  8 +++++---
 drivers/s390/net/qeth_eddp.c    |  4 ++--
 include/linux/atalk.h           |  4 ++--
 include/linux/dccp.h            | 19 ++++++++++++-------
 include/linux/icmp.h            |  2 +-
 include/linux/icmpv6.h          |  2 +-
 include/linux/igmp.h            |  6 +++---
 include/linux/ip.h              |  2 +-
 include/linux/ipv6.h            |  2 +-
 include/linux/sctp.h            |  2 +-
 include/linux/skbuff.h          |  5 +++++
 include/linux/tcp.h             |  2 +-
 include/linux/udp.h             |  2 +-
 include/net/ipx.h               |  2 +-
 include/net/pkt_cls.h           |  2 +-
 include/net/udp.h               |  4 ++--
 net/802/psnap.c                 |  2 +-
 net/ax25/af_ax25.c              |  5 +++--
 net/bluetooth/hci_core.c        |  4 ++--
 net/core/dev.c                  |  6 +++---
 net/econet/af_econet.c          |  2 +-
 net/ipv4/igmp.c                 |  2 +-
 net/ipv4/ip_gre.c               |  2 +-
 net/ipv4/ip_output.c            |  6 ++++--
 net/ipv4/ipconfig.c             |  4 ++--
 net/ipv4/ipmr.c                 |  8 +++++---
 net/ipv4/tcp.c                  | 12 +++++++-----
 net/ipv4/tcp_input.c            | 13 +++++++------
 net/ipv4/xfrm4_mode_beet.c      |  2 +-
 net/ipv4/xfrm4_mode_transport.c |  5 +++--
 net/ipv6/ah6.c                  |  2 +-
 net/ipv6/esp6.c                 |  2 +-
 net/ipv6/exthdrs.c              | 21 ++++++++++-----------
 net/ipv6/ipcomp6.c              |  2 +-
 net/ipv6/mcast.c                | 16 +++++++++-------
 net/ipv6/mip6.c                 |  8 ++++----
 net/ipv6/ndisc.c                | 17 +++++++++--------
 net/ipv6/raw.c                  |  2 +-
 net/ipv6/reassembly.c           |  2 +-
 net/ipv6/xfrm6_mode_transport.c |  5 +++--
 net/xfrm/xfrm_input.c           |  6 +++---
 42 files changed, 129 insertions(+), 102 deletions(-)

(limited to 'net/ipv4/tcp_input.c')

diff --git a/drivers/net/appletalk/ltpc.c b/drivers/net/appletalk/ltpc.c
index dc3bce992dcf..43c17c85c97b 100644
--- a/drivers/net/appletalk/ltpc.c
+++ b/drivers/net/appletalk/ltpc.c
@@ -917,6 +917,7 @@ static int ltpc_xmit(struct sk_buff *skb, struct net_device *dev)
 
 	int i;
 	struct lt_sendlap cbuf;
+	unsigned char *hdr;
 
 	cbuf.command = LT_SENDLAP;
 	cbuf.dnode = skb->data[0];
@@ -932,11 +933,13 @@ static int ltpc_xmit(struct sk_buff *skb, struct net_device *dev)
 		printk("\n");
 	}
 
-	do_write(dev,&cbuf,sizeof(cbuf),skb->h.raw,skb->len);
+	hdr = skb_transport_header(skb);
+	do_write(dev, &cbuf, sizeof(cbuf), hdr, skb->len);
 
 	if(debug & DEBUG_UPPER) {
 		printk("sent %d ddp bytes\n",skb->len);
-		for(i=0;i<skb->len;i++) printk("%02x ",skb->h.raw[i]);
+		for (i = 0; i < skb->len; i++)
+			printk("%02x ", hdr[i]);
 		printk("\n");
 	}
 
diff --git a/drivers/net/cxgb3/sge.c b/drivers/net/cxgb3/sge.c
index a70fe9145a2e..610e4769efa4 100644
--- a/drivers/net/cxgb3/sge.c
+++ b/drivers/net/cxgb3/sge.c
@@ -1324,12 +1324,14 @@ static void write_ofld_wr(struct adapter *adap, struct sk_buff *skb,
 
 	flits = skb_transport_offset(skb) / 8;
 	sgp = ndesc == 1 ? (struct sg_ent *)&d->flit[flits] : sgl;
-	sgl_flits = make_sgl(skb, sgp, skb->h.raw, skb->tail - skb->h.raw,
+	sgl_flits = make_sgl(skb, sgp, skb_transport_header(skb),
+			     skb->tail - skb_transport_header(skb),
 			     adap->pdev);
 	if (need_skb_unmap()) {
 		setup_deferred_unmapping(skb, adap->pdev, sgp, sgl_flits);
 		skb->destructor = deferred_unmap_destructor;
-		((struct unmap_info *)skb->cb)->len = skb->tail - skb->h.raw;
+		((struct unmap_info *)skb->cb)->len = (skb->tail -
+						       skb_transport_header(skb));
 	}
 
 	write_wr_hdr_sgl(ndesc, skb, d, pidx, q, sgl, flits, sgl_flits,
@@ -1351,7 +1353,7 @@ static inline unsigned int calc_tx_descs_ofld(const struct sk_buff *skb)
 		return 1;	/* packet fits as immediate data */
 
 	flits = skb_transport_offset(skb) / 8;	/* headers */
-	if (skb->tail != skb->h.raw)
+	if (skb->tail != skb_transport_header(skb))
 		cnt++;
 	return flits_to_desc(flits + sgl_len(cnt));
 }
diff --git a/drivers/s390/net/qeth_eddp.c b/drivers/s390/net/qeth_eddp.c
index b8e84674e170..5890bb5ad23e 100644
--- a/drivers/s390/net/qeth_eddp.c
+++ b/drivers/s390/net/qeth_eddp.c
@@ -476,13 +476,13 @@ qeth_eddp_fill_context_tcp(struct qeth_eddp_context *ctx,
 		eddp = qeth_eddp_create_eddp_data(qhdr,
 						  skb_network_header(skb),
 						  ip_hdrlen(skb),
-						  skb->h.raw,
+						  skb_transport_header(skb),
 						  tcp_hdrlen(skb));
 	else
 		eddp = qeth_eddp_create_eddp_data(qhdr,
 						  skb_network_header(skb),
 						  sizeof(struct ipv6hdr),
-						  skb->h.raw,
+						  skb_transport_header(skb),
 						  tcp_hdrlen(skb));
 
 	if (eddp == NULL) {
diff --git a/include/linux/atalk.h b/include/linux/atalk.h
index d12984ddaa9f..ced8a1ed080c 100644
--- a/include/linux/atalk.h
+++ b/include/linux/atalk.h
@@ -101,7 +101,7 @@ struct ddpehdr {
 
 static __inline__ struct ddpehdr *ddp_hdr(struct sk_buff *skb)
 {
-	return (struct ddpehdr *)skb->h.raw;
+	return (struct ddpehdr *)skb_transport_header(skb);
 }
 
 /* AppleTalk AARP headers */
@@ -129,7 +129,7 @@ struct elapaarp {
 
 static __inline__ struct elapaarp *aarp_hdr(struct sk_buff *skb)
 {
-	return (struct elapaarp *)skb->h.raw;
+	return (struct elapaarp *)skb_transport_header(skb);
 }
 
 /* Not specified - how long till we drop a resolved entry */
diff --git a/include/linux/dccp.h b/include/linux/dccp.h
index 1f4df61735f7..fdd4217f1047 100644
--- a/include/linux/dccp.h
+++ b/include/linux/dccp.h
@@ -260,19 +260,20 @@ enum {
 
 static inline struct dccp_hdr *dccp_hdr(const struct sk_buff *skb)
 {
-	return (struct dccp_hdr *)skb->h.raw;
+	return (struct dccp_hdr *)skb_transport_header(skb);
 }
 
 static inline struct dccp_hdr *dccp_zeroed_hdr(struct sk_buff *skb, int headlen)
 {
 	skb_push(skb, headlen);
 	skb_reset_transport_header(skb);
-	return memset(skb->h.raw, 0, headlen);
+	return memset(skb_transport_header(skb), 0, headlen);
 }
 
 static inline struct dccp_hdr_ext *dccp_hdrx(const struct sk_buff *skb)
 {
-	return (struct dccp_hdr_ext *)(skb->h.raw + sizeof(struct dccp_hdr));
+	return (struct dccp_hdr_ext *)(skb_transport_header(skb) +
+				       sizeof(struct dccp_hdr));
 }
 
 static inline unsigned int __dccp_basic_hdr_len(const struct dccp_hdr *dh)
@@ -301,12 +302,14 @@ static inline __u64 dccp_hdr_seq(const struct sk_buff *skb)
 
 static inline struct dccp_hdr_request *dccp_hdr_request(struct sk_buff *skb)
 {
-	return (struct dccp_hdr_request *)(skb->h.raw + dccp_basic_hdr_len(skb));
+	return (struct dccp_hdr_request *)(skb_transport_header(skb) +
+					   dccp_basic_hdr_len(skb));
 }
 
 static inline struct dccp_hdr_ack_bits *dccp_hdr_ack_bits(const struct sk_buff *skb)
 {
-	return (struct dccp_hdr_ack_bits *)(skb->h.raw + dccp_basic_hdr_len(skb));
+	return (struct dccp_hdr_ack_bits *)(skb_transport_header(skb) +
+					    dccp_basic_hdr_len(skb));
 }
 
 static inline u64 dccp_hdr_ack_seq(const struct sk_buff *skb)
@@ -317,12 +320,14 @@ static inline u64 dccp_hdr_ack_seq(const struct sk_buff *skb)
 
 static inline struct dccp_hdr_response *dccp_hdr_response(struct sk_buff *skb)
 {
-	return (struct dccp_hdr_response *)(skb->h.raw + dccp_basic_hdr_len(skb));
+	return (struct dccp_hdr_response *)(skb_transport_header(skb) +
+					    dccp_basic_hdr_len(skb));
 }
 
 static inline struct dccp_hdr_reset *dccp_hdr_reset(struct sk_buff *skb)
 {
-	return (struct dccp_hdr_reset *)(skb->h.raw + dccp_basic_hdr_len(skb));
+	return (struct dccp_hdr_reset *)(skb_transport_header(skb) +
+					 dccp_basic_hdr_len(skb));
 }
 
 static inline unsigned int __dccp_hdr_len(const struct dccp_hdr *dh)
diff --git a/include/linux/icmp.h b/include/linux/icmp.h
index cd3017a15789..474f2a51cf0a 100644
--- a/include/linux/icmp.h
+++ b/include/linux/icmp.h
@@ -87,7 +87,7 @@ struct icmphdr {
 
 static inline struct icmphdr *icmp_hdr(const struct sk_buff *skb)
 {
-	return (struct icmphdr *)skb->h.raw;
+	return (struct icmphdr *)skb_transport_header(skb);
 }
 #endif
 
diff --git a/include/linux/icmpv6.h b/include/linux/icmpv6.h
index 0b5ba5eb7ed2..7c5e9817e998 100644
--- a/include/linux/icmpv6.h
+++ b/include/linux/icmpv6.h
@@ -80,7 +80,7 @@ struct icmp6hdr {
 
 static inline struct icmp6hdr *icmp6_hdr(const struct sk_buff *skb)
 {
-	return (struct icmp6hdr *)skb->h.raw;
+	return (struct icmp6hdr *)skb_transport_header(skb);
 }
 #endif
 
diff --git a/include/linux/igmp.h b/include/linux/igmp.h
index ca285527b879..f510e7e382a8 100644
--- a/include/linux/igmp.h
+++ b/include/linux/igmp.h
@@ -85,19 +85,19 @@ struct igmpv3_query {
 
 static inline struct igmphdr *igmp_hdr(const struct sk_buff *skb)
 {
-	return (struct igmphdr *)skb->h.raw;
+	return (struct igmphdr *)skb_transport_header(skb);
 }
 
 static inline struct igmpv3_report *
 			igmpv3_report_hdr(const struct sk_buff *skb)
 {
-	return (struct igmpv3_report *)skb->h.raw;
+	return (struct igmpv3_report *)skb_transport_header(skb);
 }
 
 static inline struct igmpv3_query *
 			igmpv3_query_hdr(const struct sk_buff *skb)
 {
-	return (struct igmpv3_query *)skb->h.raw;
+	return (struct igmpv3_query *)skb_transport_header(skb);
 }
 #endif
 
diff --git a/include/linux/ip.h b/include/linux/ip.h
index 19578440b5fc..bd0a2a8631c6 100644
--- a/include/linux/ip.h
+++ b/include/linux/ip.h
@@ -114,7 +114,7 @@ static inline struct iphdr *ip_hdr(const struct sk_buff *skb)
 
 static inline struct iphdr *ipip_hdr(const struct sk_buff *skb)
 {
-	return (struct iphdr *)skb->h.raw;
+	return (struct iphdr *)skb_transport_header(skb);
 }
 #endif
 
diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h
index b768fcc0a4c4..09ea01a8a99c 100644
--- a/include/linux/ipv6.h
+++ b/include/linux/ipv6.h
@@ -230,7 +230,7 @@ static inline struct ipv6hdr *ipv6_hdr(const struct sk_buff *skb)
 
 static inline struct ipv6hdr *ipipv6_hdr(const struct sk_buff *skb)
 {
-	return (struct ipv6hdr *)skb->h.raw;
+	return (struct ipv6hdr *)skb_transport_header(skb);
 }
 
 /* 
diff --git a/include/linux/sctp.h b/include/linux/sctp.h
index d76767dfe59e..d70df61a029f 100644
--- a/include/linux/sctp.h
+++ b/include/linux/sctp.h
@@ -68,7 +68,7 @@ typedef struct sctphdr {
 
 static inline struct sctphdr *sctp_hdr(const struct sk_buff *skb)
 {
-	return (struct sctphdr *)skb->h.raw;
+	return (struct sctphdr *)skb_transport_header(skb);
 }
 #endif
 
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index d3f186230ee2..39a6da243b24 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -951,6 +951,11 @@ static inline void skb_reserve(struct sk_buff *skb, int len)
 	skb->tail += len;
 }
 
+static inline unsigned char *skb_transport_header(const struct sk_buff *skb)
+{
+	return skb->h.raw;
+}
+
 static inline void skb_reset_transport_header(struct sk_buff *skb)
 {
 	skb->h.raw = skb->data;
diff --git a/include/linux/tcp.h b/include/linux/tcp.h
index 911d937fb4c1..c6b9f92e8289 100644
--- a/include/linux/tcp.h
+++ b/include/linux/tcp.h
@@ -180,7 +180,7 @@ struct tcp_md5sig {
 
 static inline struct tcphdr *tcp_hdr(const struct sk_buff *skb)
 {
-	return (struct tcphdr *)skb->h.raw;
+	return (struct tcphdr *)skb_transport_header(skb);
 }
 
 static inline unsigned int tcp_hdrlen(const struct sk_buff *skb)
diff --git a/include/linux/udp.h b/include/linux/udp.h
index 1f58503af9a6..6de445c31a64 100644
--- a/include/linux/udp.h
+++ b/include/linux/udp.h
@@ -31,7 +31,7 @@ struct udphdr {
 
 static inline struct udphdr *udp_hdr(const struct sk_buff *skb)
 {
-	return (struct udphdr *)skb->h.raw;
+	return (struct udphdr *)skb_transport_header(skb);
 }
 #endif
 
diff --git a/include/net/ipx.h b/include/net/ipx.h
index c6b2ee610866..4cc0b4eca948 100644
--- a/include/net/ipx.h
+++ b/include/net/ipx.h
@@ -43,7 +43,7 @@ struct ipxhdr {
 
 static __inline__ struct ipxhdr *ipx_hdr(struct sk_buff *skb)
 {
-	return (struct ipxhdr *)skb->h.raw;
+	return (struct ipxhdr *)skb_transport_header(skb);
 }
 
 struct ipx_interface {
diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h
index 880eb7b54164..dcb3a91f1364 100644
--- a/include/net/pkt_cls.h
+++ b/include/net/pkt_cls.h
@@ -328,7 +328,7 @@ static inline unsigned char * tcf_get_base_ptr(struct sk_buff *skb, int layer)
 		case TCF_LAYER_NETWORK:
 			return skb_network_header(skb);
 		case TCF_LAYER_TRANSPORT:
-			return skb->h.raw;
+			return skb_transport_header(skb);
 	}
 
 	return NULL;
diff --git a/include/net/udp.h b/include/net/udp.h
index 4a9699f79281..4906ed7113e7 100644
--- a/include/net/udp.h
+++ b/include/net/udp.h
@@ -89,8 +89,8 @@ static inline int udp_lib_checksum_complete(struct sk_buff *skb)
  */
 static inline __wsum udp_csum_outgoing(struct sock *sk, struct sk_buff *skb)
 {
-	__wsum csum = csum_partial(skb->h.raw, sizeof(struct udphdr), 0);
-
+	__wsum csum = csum_partial(skb_transport_header(skb),
+				   sizeof(struct udphdr), 0);
 	skb_queue_walk(&sk->sk_write_queue, skb) {
 		csum = csum_add(csum, skb->csum);
 	}
diff --git a/net/802/psnap.c b/net/802/psnap.c
index 6e7c2120b83f..7cba1f426081 100644
--- a/net/802/psnap.c
+++ b/net/802/psnap.c
@@ -56,7 +56,7 @@ static int snap_rcv(struct sk_buff *skb, struct net_device *dev,
 	};
 
 	rcu_read_lock();
-	proto = find_snap_client(skb->h.raw);
+	proto = find_snap_client(skb_transport_header(skb));
 	if (proto) {
 		/* Pass the frame on. */
 		skb->h.raw  += 5;
diff --git a/net/ax25/af_ax25.c b/net/ax25/af_ax25.c
index 75d4d695edec..5f28887822e9 100644
--- a/net/ax25/af_ax25.c
+++ b/net/ax25/af_ax25.c
@@ -1585,9 +1585,10 @@ static int ax25_sendmsg(struct kiocb *iocb, struct socket *sock,
 
 	skb_set_transport_header(skb, lv);
 
-	SOCK_DEBUG(sk, "base=%p pos=%p\n", skb->data, skb->h.raw);
+	SOCK_DEBUG(sk, "base=%p pos=%p\n",
+		   skb->data, skb_transport_header(skb));
 
-	*skb->h.raw = AX25_UI;
+	*skb_transport_header(skb) = AX25_UI;
 
 	/* Datagram frames go straight out of the door as UI */
 	ax25_queue_xmit(skb, ax25->ax25_dev->dev);
diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c
index c11ceb6b3f79..c177e75d64a6 100644
--- a/net/bluetooth/hci_core.c
+++ b/net/bluetooth/hci_core.c
@@ -1076,7 +1076,7 @@ static void hci_add_acl_hdr(struct sk_buff *skb, __u16 handle, __u16 flags)
 
 	skb_push(skb, HCI_ACL_HDR_SIZE);
 	skb_reset_transport_header(skb);
-	hdr = (struct hci_acl_hdr *)skb->h.raw;
+	hdr = (struct hci_acl_hdr *)skb_transport_header(skb);
 	hdr->handle = cpu_to_le16(hci_handle_pack(handle, flags));
 	hdr->dlen   = cpu_to_le16(len);
 }
@@ -1145,7 +1145,7 @@ int hci_send_sco(struct hci_conn *conn, struct sk_buff *skb)
 
 	skb_push(skb, HCI_SCO_HDR_SIZE);
 	skb_reset_transport_header(skb);
-	memcpy(skb->h.raw, &hdr, HCI_SCO_HDR_SIZE);
+	memcpy(skb_transport_header(skb), &hdr, HCI_SCO_HDR_SIZE);
 
 	skb->dev = (void *) hdev;
 	bt_cb(skb)->pkt_type = HCI_SCODATA_PKT;
diff --git a/net/core/dev.c b/net/core/dev.c
index f7f7e5687e46..30fcc7f9d4ed 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -1175,12 +1175,12 @@ int skb_checksum_help(struct sk_buff *skb)
 	BUG_ON(offset > (int)skb->len);
 	csum = skb_checksum(skb, offset, skb->len-offset, 0);
 
-	offset = skb->tail - skb->h.raw;
+	offset = skb->tail - skb_transport_header(skb);
 	BUG_ON(offset <= 0);
 	BUG_ON(skb->csum_offset + 2 > offset);
 
-	*(__sum16*)(skb->h.raw + skb->csum_offset) = csum_fold(csum);
-
+	*(__sum16 *)(skb_transport_header(skb) +
+		     skb->csum_offset) = csum_fold(csum);
 out_set_summed:
 	skb->ip_summed = CHECKSUM_NONE;
 out:
diff --git a/net/econet/af_econet.c b/net/econet/af_econet.c
index dcc2e4b6b2fe..78993dadb53a 100644
--- a/net/econet/af_econet.c
+++ b/net/econet/af_econet.c
@@ -943,7 +943,7 @@ static void aun_data_available(struct sock *sk, int slen)
 		printk(KERN_DEBUG "AUN: recvfrom() error %d\n", -err);
 	}
 
-	data = skb->h.raw + sizeof(struct udphdr);
+	data = skb_transport_header(skb) + sizeof(struct udphdr);
 	ah = (struct aunhdr *)data;
 	len = skb->len - sizeof(struct udphdr);
 	ip = ip_hdr(skb);
diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c
index 292516bb1eca..8f0df7b4dfe7 100644
--- a/net/ipv4/igmp.c
+++ b/net/ipv4/igmp.c
@@ -348,7 +348,7 @@ static int igmpv3_sendpack(struct sk_buff *skb)
 	struct iphdr *pip = ip_hdr(skb);
 	struct igmphdr *pig = igmp_hdr(skb);
 	const int iplen = skb->tail - skb->nh.raw;
-	const int igmplen = skb->tail - skb->h.raw;
+	const int igmplen = skb->tail - skb_transport_header(skb);
 
 	pip->tot_len = htons(iplen);
 	ip_send_check(pip);
diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index 39216e6a59ed..e6a9e452fd61 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -619,7 +619,7 @@ static int ipgre_rcv(struct sk_buff *skb)
 		skb_reset_mac_header(skb);
 		__pskb_pull(skb, offset);
 		skb_reset_network_header(skb);
-		skb_postpull_rcsum(skb, skb->h.raw, offset);
+		skb_postpull_rcsum(skb, skb_transport_header(skb), offset);
 		skb->pkt_type = PACKET_HOST;
 #ifdef CONFIG_NET_IPGRE_BROADCAST
 		if (MULTICAST(iph->daddr)) {
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index 602268661eb3..11029b9d4cf7 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -1128,7 +1128,7 @@ ssize_t	ip_append_page(struct sock *sk, struct page *page,
 			if (fraggap) {
 				skb->csum = skb_copy_and_csum_bits(skb_prev,
 								   maxfraglen,
-								   skb->h.raw,
+						    skb_transport_header(skb),
 								   fraggap, 0);
 				skb_prev->csum = csum_sub(skb_prev->csum,
 							  skb->csum);
@@ -1374,7 +1374,9 @@ void ip_send_reply(struct sock *sk, struct sk_buff *skb, struct ip_reply_arg *ar
 		       &ipc, rt, MSG_DONTWAIT);
 	if ((skb = skb_peek(&sk->sk_write_queue)) != NULL) {
 		if (arg->csumoffset >= 0)
-			*((__sum16 *)skb->h.raw + arg->csumoffset) = csum_fold(csum_add(skb->csum, arg->csum));
+			*((__sum16 *)skb_transport_header(skb) +
+			  arg->csumoffset) = csum_fold(csum_add(skb->csum,
+								arg->csum));
 		skb->ip_summed = CHECKSUM_NONE;
 		ip_push_pending_frames(sk);
 	}
diff --git a/net/ipv4/ipconfig.c b/net/ipv4/ipconfig.c
index 6b91c9f5d57a..4e19ee0e0102 100644
--- a/net/ipv4/ipconfig.c
+++ b/net/ipv4/ipconfig.c
@@ -432,7 +432,7 @@ ic_rarp_recv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt
 		goto drop;
 
 	/* Basic sanity checks can be done without the lock.  */
-	rarp = (struct arphdr *)skb->h.raw;
+	rarp = (struct arphdr *)skb_transport_header(skb);
 
 	/* If this test doesn't pass, it's not IP, or we should
 	 * ignore it anyway.
@@ -455,7 +455,7 @@ ic_rarp_recv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt
 		goto drop;
 
 	/* OK, it is all there and looks valid, process... */
-	rarp = (struct arphdr *)skb->h.raw;
+	rarp = (struct arphdr *)skb_transport_header(skb);
 	rarp_ptr = (unsigned char *) (rarp + 1);
 
 	/* One reply at a time, please. */
diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index 05bc27002def..8f45c95db451 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -1437,7 +1437,8 @@ int pim_rcv_v1(struct sk_buff * skb)
 	    pim->group != PIM_V1_VERSION || pim->code != PIM_V1_REGISTER)
 		goto drop;
 
-	encap = (struct iphdr*)(skb->h.raw + sizeof(struct igmphdr));
+	encap = (struct iphdr *)(skb_transport_header(skb) +
+				 sizeof(struct igmphdr));
 	/*
 	   Check that:
 	   a. packet is really destinted to a multicast group
@@ -1490,7 +1491,7 @@ static int pim_rcv(struct sk_buff * skb)
 	if (!pskb_may_pull(skb, sizeof(*pim) + sizeof(*encap)))
 		goto drop;
 
-	pim = (struct pimreghdr*)skb->h.raw;
+	pim = (struct pimreghdr *)skb_transport_header(skb);
 	if (pim->type != ((PIM_VERSION<<4)|(PIM_REGISTER)) ||
 	    (pim->flags&PIM_NULL_REGISTER) ||
 	    (ip_compute_csum((void *)pim, sizeof(*pim)) != 0 &&
@@ -1498,7 +1499,8 @@ static int pim_rcv(struct sk_buff * skb)
 		goto drop;
 
 	/* check if the inner packet is destined to mcast group */
-	encap = (struct iphdr*)(skb->h.raw + sizeof(struct pimreghdr));
+	encap = (struct iphdr *)(skb_transport_header(skb) +
+				 sizeof(struct pimreghdr));
 	if (!MULTICAST(encap->daddr) ||
 	    encap->tot_len == 0 ||
 	    ntohs(encap->tot_len) + sizeof(*pim) > skb->len)
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index f832f3c33ab1..2b214cc3724c 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -2219,8 +2219,9 @@ struct sk_buff *tcp_tso_segment(struct sk_buff *skb, int features)
 		th->check = ~csum_fold((__force __wsum)((__force u32)th->check +
 				       (__force u32)delta));
 		if (skb->ip_summed != CHECKSUM_PARTIAL)
-			th->check = csum_fold(csum_partial(skb->h.raw, thlen,
-							   skb->csum));
+			th->check =
+			     csum_fold(csum_partial(skb_transport_header(skb),
+						    thlen, skb->csum));
 
 		seq += len;
 		skb = skb->next;
@@ -2230,12 +2231,13 @@ struct sk_buff *tcp_tso_segment(struct sk_buff *skb, int features)
 		th->cwr = 0;
 	} while (skb->next);
 
-	delta = htonl(oldlen + (skb->tail - skb->h.raw) + skb->data_len);
+	delta = htonl(oldlen + (skb->tail - skb_transport_header(skb)) +
+		      skb->data_len);
 	th->check = ~csum_fold((__force __wsum)((__force u32)th->check +
 				(__force u32)delta));
 	if (skb->ip_summed != CHECKSUM_PARTIAL)
-		th->check = csum_fold(csum_partial(skb->h.raw, thlen,
-						   skb->csum));
+		th->check = csum_fold(csum_partial(skb_transport_header(skb),
+						   thlen, skb->csum));
 
 out:
 	return segs;
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index c1ce36237380..9c3b4c7a50ad 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -140,7 +140,7 @@ static void tcp_measure_rcv_mss(struct sock *sk,
 		 *
 		 * "len" is invariant segment length, including TCP header.
 		 */
-		len += skb->data - skb->h.raw;
+		len += skb->data - skb_transport_header(skb);
 		if (len >= TCP_MIN_RCVMSS + sizeof(struct tcphdr) ||
 		    /* If PSH is not set, packet should be
 		     * full sized, provided peer TCP is not badly broken.
@@ -940,7 +940,8 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_
 {
 	const struct inet_connection_sock *icsk = inet_csk(sk);
 	struct tcp_sock *tp = tcp_sk(sk);
-	unsigned char *ptr = ack_skb->h.raw + TCP_SKB_CB(ack_skb)->sacked;
+	unsigned char *ptr = (skb_transport_header(ack_skb) +
+			      TCP_SKB_CB(ack_skb)->sacked);
 	struct tcp_sack_block_wire *sp = (struct tcp_sack_block_wire *)(ptr+2);
 	struct sk_buff *cached_skb;
 	int num_sacks = (ptr[1] - TCPOLEN_SACK_BASE)>>3;
@@ -3634,10 +3635,10 @@ tcp_collapse(struct sock *sk, struct sk_buff_head *list,
 			return;
 
 		skb_set_mac_header(nskb, skb_mac_header(skb) - skb->head);
-		skb_set_network_header(nskb,
-				       skb_network_header(skb) - skb->head);
-		skb_set_transport_header(nskb, skb->h.raw - skb->head);
-
+		skb_set_network_header(nskb, (skb_network_header(skb) -
+					      skb->head));
+		skb_set_transport_header(nskb, (skb_transport_header(skb) -
+						skb->head));
 		skb_reserve(nskb, header);
 		memcpy(nskb->head, skb->head, header);
 		memcpy(nskb->cb, skb->cb, sizeof(skb->cb));
diff --git a/net/ipv4/xfrm4_mode_beet.c b/net/ipv4/xfrm4_mode_beet.c
index 32fcfc0b5c8c..591f0f1ef87f 100644
--- a/net/ipv4/xfrm4_mode_beet.c
+++ b/net/ipv4/xfrm4_mode_beet.c
@@ -51,7 +51,7 @@ static int xfrm4_beet_output(struct xfrm_state *x, struct sk_buff *skb)
 
 		BUG_ON(optlen < 0);
 
-		ph = (struct ip_beet_phdr *)skb->h.raw;
+		ph = (struct ip_beet_phdr *)skb_transport_header(skb);
 		ph->padlen = 4 - (optlen & 4);
 		ph->hdrlen = optlen / 8;
 		ph->nexthdr = top_iph->protocol;
diff --git a/net/ipv4/xfrm4_mode_transport.c b/net/ipv4/xfrm4_mode_transport.c
index 2c46cbb3bbb5..dc8834ea3754 100644
--- a/net/ipv4/xfrm4_mode_transport.c
+++ b/net/ipv4/xfrm4_mode_transport.c
@@ -45,10 +45,11 @@ static int xfrm4_transport_output(struct xfrm_state *x, struct sk_buff *skb)
  */
 static int xfrm4_transport_input(struct xfrm_state *x, struct sk_buff *skb)
 {
-	int ihl = skb->data - skb->h.raw;
+	int ihl = skb->data - skb_transport_header(skb);
 
 	if (skb->h.raw != skb->nh.raw) {
-		memmove(skb->h.raw, skb_network_header(skb), ihl);
+		memmove(skb_transport_header(skb),
+			skb_network_header(skb), ihl);
 		skb->nh.raw = skb->h.raw;
 	}
 	ip_hdr(skb)->tot_len = htons(skb->len + ihl);
diff --git a/net/ipv6/ah6.c b/net/ipv6/ah6.c
index e5ee981d3e10..d2af4fe3725b 100644
--- a/net/ipv6/ah6.c
+++ b/net/ipv6/ah6.c
@@ -268,7 +268,7 @@ static int ah6_output(struct xfrm_state *x, struct sk_buff *skb)
 			goto error_free_iph;
 	}
 
-	ah = (struct ip_auth_hdr *)skb->h.raw;
+	ah = (struct ip_auth_hdr *)skb_transport_header(skb);
 	ah->nexthdr = nexthdr;
 
 	top_iph->priority    = 0;
diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c
index ad522b7b5771..436eb9e6a6cf 100644
--- a/net/ipv6/esp6.c
+++ b/net/ipv6/esp6.c
@@ -87,7 +87,7 @@ static int esp6_output(struct xfrm_state *x, struct sk_buff *skb)
 	pskb_put(skb, trailer, clen - skb->len);
 
 	top_iph = (struct ipv6hdr *)__skb_push(skb, hdr_len);
-	esph = (struct ipv6_esp_hdr *)skb->h.raw;
+	esph = (struct ipv6_esp_hdr *)skb_transport_header(skb);
 	top_iph->payload_len = htons(skb->len + alen - sizeof(*top_iph));
 	*(u8 *)(trailer->tail - 1) = *skb_network_header(skb);
 	*skb_network_header(skb) = IPPROTO_ESP;
diff --git a/net/ipv6/exthdrs.c b/net/ipv6/exthdrs.c
index 1bda0299890e..f25ee773f52e 100644
--- a/net/ipv6/exthdrs.c
+++ b/net/ipv6/exthdrs.c
@@ -144,7 +144,7 @@ static int ip6_parse_tlv(struct tlvtype_proc *procs, struct sk_buff **skbp)
 	struct tlvtype_proc *curr;
 	const unsigned char *nh = skb_network_header(skb);
 	int off = skb->h.raw - skb->nh.raw;
-	int len = ((skb->h.raw[1]+1)<<3);
+	int len = (skb_transport_header(skb)[1] + 1) << 3;
 
 	if (skb_transport_offset(skb) + len > skb_headlen(skb))
 		goto bad;
@@ -290,15 +290,14 @@ static int ipv6_destopt_rcv(struct sk_buff **skbp)
 
 	if (!pskb_may_pull(skb, skb_transport_offset(skb) + 8) ||
 	    !pskb_may_pull(skb, (skb_transport_offset(skb) +
-				 ((skb->h.raw[1] + 1) << 3)))) {
+				 ((skb_transport_header(skb)[1] + 1) << 3)))) {
 		IP6_INC_STATS_BH(ip6_dst_idev(skb->dst),
 				 IPSTATS_MIB_INHDRERRORS);
 		kfree_skb(skb);
 		return -1;
 	}
 
-	opt->lastopt = skb->h.raw - skb->nh.raw;
-	opt->dst1 = skb->h.raw - skb->nh.raw;
+	opt->lastopt = opt->dst1 = skb->h.raw - skb->nh.raw;
 #ifdef CONFIG_IPV6_MIP6
 	dstbuf = opt->dst1;
 #endif
@@ -307,7 +306,7 @@ static int ipv6_destopt_rcv(struct sk_buff **skbp)
 	if (ip6_parse_tlv(tlvprocdestopt_lst, skbp)) {
 		dst_release(dst);
 		skb = *skbp;
-		skb->h.raw += ((skb->h.raw[1]+1)<<3);
+		skb->h.raw += (skb_transport_header(skb)[1] + 1) << 3;
 		opt = IP6CB(skb);
 #ifdef CONFIG_IPV6_MIP6
 		opt->nhoff = dstbuf;
@@ -390,14 +389,14 @@ static int ipv6_rthdr_rcv(struct sk_buff **skbp)
 
 	if (!pskb_may_pull(skb, skb_transport_offset(skb) + 8) ||
 	    !pskb_may_pull(skb, (skb_transport_offset(skb) +
-				 ((skb->h.raw[1] + 1) << 3)))) {
+				 ((skb_transport_header(skb)[1] + 1) << 3)))) {
 		IP6_INC_STATS_BH(ip6_dst_idev(skb->dst),
 				 IPSTATS_MIB_INHDRERRORS);
 		kfree_skb(skb);
 		return -1;
 	}
 
-	hdr = (struct ipv6_rt_hdr *) skb->h.raw;
+	hdr = (struct ipv6_rt_hdr *)skb_transport_header(skb);
 
 	switch (hdr->type) {
 #ifdef CONFIG_IPV6_MIP6
@@ -444,8 +443,7 @@ looped_back:
 			break;
 		}
 
-		opt->lastopt = skb->h.raw - skb->nh.raw;
-		opt->srcrt = skb->h.raw - skb->nh.raw;
+		opt->lastopt = opt->srcrt = skb->h.raw - skb->nh.raw;
 		skb->h.raw += (hdr->hdrlen + 1) << 3;
 		opt->dst0 = opt->dst1;
 		opt->dst1 = 0;
@@ -745,7 +743,8 @@ int ipv6_parse_hopopts(struct sk_buff **skbp)
 	 * hop-by-hop options.
 	 */
 	if (!pskb_may_pull(skb, sizeof(struct ipv6hdr) + 8) ||
-	    !pskb_may_pull(skb, sizeof(struct ipv6hdr) + ((skb->h.raw[1] + 1) << 3))) {
+	    !pskb_may_pull(skb, (sizeof(struct ipv6hdr) +
+				 ((skb_transport_header(skb)[1] + 1) << 3)))) {
 		kfree_skb(skb);
 		return -1;
 	}
@@ -753,7 +752,7 @@ int ipv6_parse_hopopts(struct sk_buff **skbp)
 	opt->hop = sizeof(struct ipv6hdr);
 	if (ip6_parse_tlv(tlvprochopopt_lst, skbp)) {
 		skb = *skbp;
-		skb->h.raw += (skb->h.raw[1]+1)<<3;
+		skb->h.raw += (skb_transport_header(skb)[1] + 1) << 3;
 		opt = IP6CB(skb);
 		opt->nhoff = sizeof(struct ipv6hdr);
 		return 1;
diff --git a/net/ipv6/ipcomp6.c b/net/ipv6/ipcomp6.c
index 4a6501695e98..5555c98dea03 100644
--- a/net/ipv6/ipcomp6.c
+++ b/net/ipv6/ipcomp6.c
@@ -143,7 +143,7 @@ static int ipcomp6_output(struct xfrm_state *x, struct sk_buff *skb)
 	/* compression */
 	plen = skb->len - hdr_len;
 	dlen = IPCOMP_SCRATCH_SIZE;
-	start = skb->h.raw;
+	start = skb_transport_header(skb);
 
 	cpu = get_cpu();
 	scratch = *per_cpu_ptr(ipcomp6_scratches, cpu);
diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c
index 1f2a3be9308a..c6436f5e3e9f 100644
--- a/net/ipv6/mcast.c
+++ b/net/ipv6/mcast.c
@@ -1212,7 +1212,7 @@ int igmp6_event_query(struct sk_buff *skb)
 			in6_dev_put(idev);
 			return -EINVAL;
 		}
-		mlh2 = (struct mld2_query *) skb->h.raw;
+		mlh2 = (struct mld2_query *)skb_transport_header(skb);
 		max_delay = (MLDV2_MRC(ntohs(mlh2->mrc))*HZ)/1000;
 		if (!max_delay)
 			max_delay = 1;
@@ -1235,7 +1235,7 @@ int igmp6_event_query(struct sk_buff *skb)
 				in6_dev_put(idev);
 				return -EINVAL;
 			}
-			mlh2 = (struct mld2_query *) skb->h.raw;
+			mlh2 = (struct mld2_query *)skb_transport_header(skb);
 			mark = 1;
 		}
 	} else {
@@ -1460,18 +1460,20 @@ static inline int mld_dev_queue_xmit(struct sk_buff *skb)
 static void mld_sendpack(struct sk_buff *skb)
 {
 	struct ipv6hdr *pip6 = ipv6_hdr(skb);
-	struct mld2_report *pmr = (struct mld2_report *)skb->h.raw;
+	struct mld2_report *pmr =
+			      (struct mld2_report *)skb_transport_header(skb);
 	int payload_len, mldlen;
 	struct inet6_dev *idev = in6_dev_get(skb->dev);
 	int err;
 
 	IP6_INC_STATS(idev, IPSTATS_MIB_OUTREQUESTS);
 	payload_len = skb->tail - skb_network_header(skb) - sizeof(*pip6);
-	mldlen = skb->tail - skb->h.raw;
+	mldlen = skb->tail - skb_transport_header(skb);
 	pip6->payload_len = htons(payload_len);
 
 	pmr->csum = csum_ipv6_magic(&pip6->saddr, &pip6->daddr, mldlen,
-		IPPROTO_ICMPV6, csum_partial(skb->h.raw, mldlen, 0));
+		IPPROTO_ICMPV6, csum_partial(skb_transport_header(skb),
+					     mldlen, 0));
 	err = NF_HOOK(PF_INET6, NF_IP6_LOCAL_OUT, skb, NULL, skb->dev,
 		mld_dev_queue_xmit);
 	if (!err) {
@@ -1505,7 +1507,7 @@ static struct sk_buff *add_grhead(struct sk_buff *skb, struct ifmcaddr6 *pmc,
 	pgr->grec_auxwords = 0;
 	pgr->grec_nsrcs = 0;
 	pgr->grec_mca = pmc->mca_addr;	/* structure copy */
-	pmr = (struct mld2_report *)skb->h.raw;
+	pmr = (struct mld2_report *)skb_transport_header(skb);
 	pmr->ngrec = htons(ntohs(pmr->ngrec)+1);
 	*ppgr = pgr;
 	return skb;
@@ -1538,7 +1540,7 @@ static struct sk_buff *add_grec(struct sk_buff *skb, struct ifmcaddr6 *pmc,
 	if (!*psf_list)
 		goto empty_source;
 
-	pmr = skb ? (struct mld2_report *)skb->h.raw : NULL;
+	pmr = skb ? (struct mld2_report *)skb_transport_header(skb) : NULL;
 
 	/* EX and TO_EX get a fresh packet, if needed */
 	if (truncate) {
diff --git a/net/ipv6/mip6.c b/net/ipv6/mip6.c
index 85202891644e..f0288e92fb52 100644
--- a/net/ipv6/mip6.c
+++ b/net/ipv6/mip6.c
@@ -92,10 +92,10 @@ int mip6_mh_filter(struct sock *sk, struct sk_buff *skb)
 
 	if (!pskb_may_pull(skb, (skb_transport_offset(skb)) + 8) ||
 	    !pskb_may_pull(skb, (skb_transport_offset(skb) +
-				 ((skb->h.raw[1] + 1) << 3))))
+				 ((skb_transport_header(skb)[1] + 1) << 3))))
 		return -1;
 
-	mh = (struct ip6_mh *)skb->h.raw;
+	mh = (struct ip6_mh *)skb_transport_header(skb);
 
 	if (mh->ip6mh_hdrlen < mip6_mh_len(mh->ip6mh_type)) {
 		LIMIT_NETDEBUG(KERN_DEBUG "mip6: MH message too short: %d vs >=%d\n",
@@ -158,7 +158,7 @@ static int mip6_destopt_output(struct xfrm_state *x, struct sk_buff *skb)
 	nexthdr = *skb_network_header(skb);
 	*skb_network_header(skb) = IPPROTO_DSTOPTS;
 
-	dstopt = (struct ipv6_destopt_hdr *)skb->h.raw;
+	dstopt = (struct ipv6_destopt_hdr *)skb_transport_header(skb);
 	dstopt->nexthdr = nexthdr;
 
 	hao = mip6_padn((char *)(dstopt + 1),
@@ -370,7 +370,7 @@ static int mip6_rthdr_output(struct xfrm_state *x, struct sk_buff *skb)
 	nexthdr = *skb_network_header(skb);
 	*skb_network_header(skb) = IPPROTO_ROUTING;
 
-	rt2 = (struct rt2_hdr *)skb->h.raw;
+	rt2 = (struct rt2_hdr *)skb_transport_header(skb);
 	rt2->rt_hdr.nexthdr = nexthdr;
 	rt2->rt_hdr.hdrlen = (x->props.header_len >> 3) - 1;
 	rt2->rt_hdr.type = IPV6_SRCRT_TYPE_2;
diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c
index 8b946f56287a..f9a85ab594db 100644
--- a/net/ipv6/ndisc.c
+++ b/net/ipv6/ndisc.c
@@ -760,7 +760,7 @@ static void ndisc_solicit(struct neighbour *neigh, struct sk_buff *skb)
 
 static void ndisc_recv_ns(struct sk_buff *skb)
 {
-	struct nd_msg *msg = (struct nd_msg *)skb->h.raw;
+	struct nd_msg *msg = (struct nd_msg *)skb_transport_header(skb);
 	struct in6_addr *saddr = &ipv6_hdr(skb)->saddr;
 	struct in6_addr *daddr = &ipv6_hdr(skb)->daddr;
 	u8 *lladdr = NULL;
@@ -938,7 +938,7 @@ out:
 
 static void ndisc_recv_na(struct sk_buff *skb)
 {
-	struct nd_msg *msg = (struct nd_msg *)skb->h.raw;
+	struct nd_msg *msg = (struct nd_msg *)skb_transport_header(skb);
 	struct in6_addr *saddr = &ipv6_hdr(skb)->saddr;
 	struct in6_addr *daddr = &ipv6_hdr(skb)->daddr;
 	u8 *lladdr = NULL;
@@ -1040,7 +1040,7 @@ out:
 
 static void ndisc_recv_rs(struct sk_buff *skb)
 {
-	struct rs_msg *rs_msg = (struct rs_msg *) skb->h.raw;
+	struct rs_msg *rs_msg = (struct rs_msg *)skb_transport_header(skb);
 	unsigned long ndoptlen = skb->len - sizeof(*rs_msg);
 	struct neighbour *neigh;
 	struct inet6_dev *idev;
@@ -1097,7 +1097,7 @@ out:
 
 static void ndisc_router_discovery(struct sk_buff *skb)
 {
-	struct ra_msg *ra_msg = (struct ra_msg *) skb->h.raw;
+	struct ra_msg *ra_msg = (struct ra_msg *)skb_transport_header(skb);
 	struct neighbour *neigh = NULL;
 	struct inet6_dev *in6_dev;
 	struct rt6_info *rt = NULL;
@@ -1108,7 +1108,8 @@ static void ndisc_router_discovery(struct sk_buff *skb)
 
 	__u8 * opt = (__u8 *)(ra_msg + 1);
 
-	optlen = (skb->tail - skb->h.raw) - sizeof(struct ra_msg);
+	optlen = (skb->tail - skb_transport_header(skb)) -
+		  sizeof(struct ra_msg);
 
 	if (!(ipv6_addr_type(&ipv6_hdr(skb)->saddr) & IPV6_ADDR_LINKLOCAL)) {
 		ND_PRINTK2(KERN_WARNING
@@ -1357,7 +1358,7 @@ static void ndisc_redirect_rcv(struct sk_buff *skb)
 		return;
 	}
 
-	optlen = skb->tail - skb->h.raw;
+	optlen = skb->tail - skb_transport_header(skb);
 	optlen -= sizeof(struct icmp6hdr) + 2 * sizeof(struct in6_addr);
 
 	if (optlen < 0) {
@@ -1584,9 +1585,9 @@ int ndisc_rcv(struct sk_buff *skb)
 	if (!pskb_may_pull(skb, skb->len))
 		return 0;
 
-	msg = (struct nd_msg *) skb->h.raw;
+	msg = (struct nd_msg *)skb_transport_header(skb);
 
-	__skb_push(skb, skb->data-skb->h.raw);
+	__skb_push(skb, skb->data - skb_transport_header(skb));
 
 	if (ipv6_hdr(skb)->hop_limit != 255) {
 		ND_PRINTK2(KERN_WARNING
diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c
index bb049f1c2679..116257d59a36 100644
--- a/net/ipv6/raw.c
+++ b/net/ipv6/raw.c
@@ -1077,7 +1077,7 @@ static int rawv6_ioctl(struct sock *sk, int cmd, unsigned long arg)
 			spin_lock_bh(&sk->sk_receive_queue.lock);
 			skb = skb_peek(&sk->sk_receive_queue);
 			if (skb != NULL)
-				amount = skb->tail - skb->h.raw;
+				amount = skb->tail - skb_transport_header(skb);
 			spin_unlock_bh(&sk->sk_receive_queue.lock);
 			return put_user(amount, (int __user *)arg);
 		}
diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c
index ef29a7bb97ce..31d4271ea540 100644
--- a/net/ipv6/reassembly.c
+++ b/net/ipv6/reassembly.c
@@ -726,7 +726,7 @@ static int ipv6_frag_rcv(struct sk_buff **skbp)
 	}
 
 	hdr = ipv6_hdr(skb);
-	fhdr = (struct frag_hdr *)skb->h.raw;
+	fhdr = (struct frag_hdr *)skb_transport_header(skb);
 
 	if (!(fhdr->frag_off & htons(0xFFF9))) {
 		/* It is not a fragmented frame */
diff --git a/net/ipv6/xfrm6_mode_transport.c b/net/ipv6/xfrm6_mode_transport.c
index 0134d74ef087..d526f4e9c65e 100644
--- a/net/ipv6/xfrm6_mode_transport.c
+++ b/net/ipv6/xfrm6_mode_transport.c
@@ -51,10 +51,11 @@ static int xfrm6_transport_output(struct xfrm_state *x, struct sk_buff *skb)
  */
 static int xfrm6_transport_input(struct xfrm_state *x, struct sk_buff *skb)
 {
-	int ihl = skb->data - skb->h.raw;
+	int ihl = skb->data - skb_transport_header(skb);
 
 	if (skb->h.raw != skb->nh.raw) {
-		memmove(skb->h.raw, skb_network_header(skb), ihl);
+		memmove(skb_transport_header(skb),
+			skb_network_header(skb), ihl);
 		skb->nh.raw = skb->h.raw;
 	}
 	ipv6_hdr(skb)->payload_len = htons(skb->len + ihl -
diff --git a/net/xfrm/xfrm_input.c b/net/xfrm/xfrm_input.c
index ee15bdae1419..5c4695840c58 100644
--- a/net/xfrm/xfrm_input.c
+++ b/net/xfrm/xfrm_input.c
@@ -62,7 +62,7 @@ int xfrm_parse_spi(struct sk_buff *skb, u8 nexthdr, __be32 *spi, __be32 *seq)
 	case IPPROTO_COMP:
 		if (!pskb_may_pull(skb, sizeof(struct ip_comp_hdr)))
 			return -EINVAL;
-		*spi = htonl(ntohs(*(__be16*)(skb->h.raw + 2)));
+		*spi = htonl(ntohs(*(__be16*)(skb_transport_header(skb) + 2)));
 		*seq = 0;
 		return 0;
 	default:
@@ -72,8 +72,8 @@ int xfrm_parse_spi(struct sk_buff *skb, u8 nexthdr, __be32 *spi, __be32 *seq)
 	if (!pskb_may_pull(skb, 16))
 		return -EINVAL;
 
-	*spi = *(__be32*)(skb->h.raw + offset);
-	*seq = *(__be32*)(skb->h.raw + offset_seq);
+	*spi = *(__be32*)(skb_transport_header(skb) + offset);
+	*seq = *(__be32*)(skb_transport_header(skb) + offset_seq);
 	return 0;
 }
 EXPORT_SYMBOL(xfrm_parse_spi);
-- 
cgit v1.2.3


From 604763722c655c7e3f31ecf6f7b4dafcd26a7a15 Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Mon, 9 Apr 2007 11:59:39 -0700
Subject: [NET]: Treat CHECKSUM_PARTIAL as CHECKSUM_UNNECESSARY

When a transmitted packet is looped back directly, CHECKSUM_PARTIAL
maps to the semantics of CHECKSUM_UNNECESSARY.  Therefore we should
treat it as such in the stack.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/skbuff.h     | 16 +++++++++++-----
 include/net/tcp.h          |  2 +-
 include/net/udp.h          |  2 +-
 net/core/netpoll.c         |  2 +-
 net/ipv4/ipvs/ip_vs_core.c |  6 ++----
 net/ipv4/tcp_input.c       |  6 +++---
 net/ipv4/tcp_ipv4.c        |  3 +--
 net/ipv4/udp.c             |  4 ++--
 net/ipv6/raw.c             |  4 ++--
 net/ipv6/tcp_ipv6.c        |  3 +--
 net/ipv6/udp.c             |  4 ++--
 net/sctp/input.c           |  3 +--
 net/sunrpc/socklib.c       |  2 +-
 13 files changed, 29 insertions(+), 28 deletions(-)

(limited to 'net/ipv4/tcp_input.c')

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 910560e85561..c413afbe0b9c 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -32,10 +32,11 @@
 #define HAVE_ALLOC_SKB		/* For the drivers to know */
 #define HAVE_ALIGNABLE_SKB	/* Ditto 8)		   */
 
+/* Don't change this without changing skb_csum_unnecessary! */
 #define CHECKSUM_NONE 0
-#define CHECKSUM_PARTIAL 1
-#define CHECKSUM_UNNECESSARY 2
-#define CHECKSUM_COMPLETE 3
+#define CHECKSUM_UNNECESSARY 1
+#define CHECKSUM_COMPLETE 2
+#define CHECKSUM_PARTIAL 3
 
 #define SKB_DATA_ALIGN(X)	(((X) + (SMP_CACHE_BYTES - 1)) & \
 				 ~(SMP_CACHE_BYTES - 1))
@@ -1572,6 +1573,11 @@ static inline void __net_timestamp(struct sk_buff *skb)
 extern __sum16 __skb_checksum_complete_head(struct sk_buff *skb, int len);
 extern __sum16 __skb_checksum_complete(struct sk_buff *skb);
 
+static inline int skb_csum_unnecessary(const struct sk_buff *skb)
+{
+	return skb->ip_summed & CHECKSUM_UNNECESSARY;
+}
+
 /**
  *	skb_checksum_complete - Calculate checksum of an entire packet
  *	@skb: packet to process
@@ -1590,8 +1596,8 @@ extern __sum16 __skb_checksum_complete(struct sk_buff *skb);
  */
 static inline unsigned int skb_checksum_complete(struct sk_buff *skb)
 {
-	return skb->ip_summed != CHECKSUM_UNNECESSARY &&
-		__skb_checksum_complete(skb);
+	return skb_csum_unnecessary(skb) ?
+	       0 : __skb_checksum_complete(skb);
 }
 
 #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
diff --git a/include/net/tcp.h b/include/net/tcp.h
index af9273204cfd..07f724e02f84 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -818,7 +818,7 @@ static inline __sum16 __tcp_checksum_complete(struct sk_buff *skb)
 
 static inline int tcp_checksum_complete(struct sk_buff *skb)
 {
-	return skb->ip_summed != CHECKSUM_UNNECESSARY &&
+	return !skb_csum_unnecessary(skb) &&
 		__tcp_checksum_complete(skb);
 }
 
diff --git a/include/net/udp.h b/include/net/udp.h
index 4906ed7113e7..98755ebaf163 100644
--- a/include/net/udp.h
+++ b/include/net/udp.h
@@ -77,7 +77,7 @@ static inline __sum16 __udp_lib_checksum_complete(struct sk_buff *skb)
 
 static inline int udp_lib_checksum_complete(struct sk_buff *skb)
 {
-	return skb->ip_summed != CHECKSUM_UNNECESSARY &&
+	return !skb_csum_unnecessary(skb) &&
 		__udp_lib_checksum_complete(skb);
 }
 
diff --git a/net/core/netpoll.c b/net/core/netpoll.c
index 1fb30c3528bc..b316435b0e2a 100644
--- a/net/core/netpoll.c
+++ b/net/core/netpoll.c
@@ -86,7 +86,7 @@ static __sum16 checksum_udp(struct sk_buff *skb, struct udphdr *uh,
 {
 	__wsum psum;
 
-	if (uh->check == 0 || skb->ip_summed == CHECKSUM_UNNECESSARY)
+	if (uh->check == 0 || skb_csum_unnecessary(skb))
 		return 0;
 
 	psum = csum_tcpudp_nofold(saddr, daddr, ulen, IPPROTO_UDP, 0);
diff --git a/net/ipv4/ipvs/ip_vs_core.c b/net/ipv4/ipvs/ip_vs_core.c
index 62cfbed317bf..f005a2f929f4 100644
--- a/net/ipv4/ipvs/ip_vs_core.c
+++ b/net/ipv4/ipvs/ip_vs_core.c
@@ -681,8 +681,7 @@ static int ip_vs_out_icmp(struct sk_buff **pskb, int *related)
 	}
 
 	/* Ensure the checksum is correct */
-	if (skb->ip_summed != CHECKSUM_UNNECESSARY &&
-	    ip_vs_checksum_complete(skb, ihl)) {
+	if (!skb_csum_unnecessary(skb) && ip_vs_checksum_complete(skb, ihl)) {
 		/* Failed checksum! */
 		IP_VS_DBG(1, "Forward ICMP: failed checksum from %d.%d.%d.%d!\n",
 			  NIPQUAD(iph->saddr));
@@ -921,8 +920,7 @@ ip_vs_in_icmp(struct sk_buff **pskb, int *related, unsigned int hooknum)
 	verdict = NF_DROP;
 
 	/* Ensure the checksum is correct */
-	if (skb->ip_summed != CHECKSUM_UNNECESSARY &&
-	    ip_vs_checksum_complete(skb, ihl)) {
+	if (!skb_csum_unnecessary(skb) && ip_vs_checksum_complete(skb, ihl)) {
 		/* Failed checksum! */
 		IP_VS_DBG(1, "Incoming ICMP: failed checksum from %d.%d.%d.%d!\n",
 			  NIPQUAD(iph->saddr));
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 9c3b4c7a50ad..d1604f59d77e 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -4009,7 +4009,7 @@ static int tcp_copy_to_iovec(struct sock *sk, struct sk_buff *skb, int hlen)
 	int err;
 
 	local_bh_enable();
-	if (skb->ip_summed==CHECKSUM_UNNECESSARY)
+	if (skb_csum_unnecessary(skb))
 		err = skb_copy_datagram_iovec(skb, hlen, tp->ucopy.iov, chunk);
 	else
 		err = skb_copy_and_csum_datagram_iovec(skb, hlen,
@@ -4041,7 +4041,7 @@ static __sum16 __tcp_checksum_complete_user(struct sock *sk, struct sk_buff *skb
 
 static inline int tcp_checksum_complete_user(struct sock *sk, struct sk_buff *skb)
 {
-	return skb->ip_summed != CHECKSUM_UNNECESSARY &&
+	return !skb_csum_unnecessary(skb) &&
 		__tcp_checksum_complete_user(sk, skb);
 }
 
@@ -4059,7 +4059,7 @@ static int tcp_dma_try_early_copy(struct sock *sk, struct sk_buff *skb, int hlen
 	if (!tp->ucopy.dma_chan && tp->ucopy.pinned_list)
 		tp->ucopy.dma_chan = get_softnet_dma();
 
-	if (tp->ucopy.dma_chan && skb->ip_summed == CHECKSUM_UNNECESSARY) {
+	if (tp->ucopy.dma_chan && skb_csum_unnecessary(skb)) {
 
 		dma_cookie = dma_skb_copy_datagram_iovec(tp->ucopy.dma_chan,
 			skb, hlen, tp->ucopy.iov, chunk, tp->ucopy.pinned_list);
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index a091a99ad263..5a3e7f839fc5 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -1638,8 +1638,7 @@ int tcp_v4_rcv(struct sk_buff *skb)
 	 * Packet length and doff are validated by header prediction,
 	 * provided case of th->doff==0 is eliminated.
 	 * So, we defer the checks. */
-	if ((skb->ip_summed != CHECKSUM_UNNECESSARY &&
-	     tcp_v4_checksum_init(skb)))
+	if (!skb_csum_unnecessary(skb) && tcp_v4_checksum_init(skb))
 		goto bad_packet;
 
 	th = tcp_hdr(skb);
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 5ad7a26e3091..cec0f2cc49b7 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -848,7 +848,7 @@ try_again:
 			goto csum_copy_err;
 	}
 
-	if (skb->ip_summed == CHECKSUM_UNNECESSARY)
+	if (skb_csum_unnecessary(skb))
 		err = skb_copy_datagram_iovec(skb, sizeof(struct udphdr),
 					      msg->msg_iov, copied       );
 	else {
@@ -1190,7 +1190,7 @@ static inline int udp4_csum_init(struct sk_buff *skb, struct udphdr *uh,
 				      proto, skb->csum))
 			skb->ip_summed = CHECKSUM_UNNECESSARY;
 	}
-	if (skb->ip_summed != CHECKSUM_UNNECESSARY)
+	if (!skb_csum_unnecessary(skb))
 		skb->csum = csum_tcpudp_nofold(iph->saddr, iph->daddr,
 					       skb->len, proto, 0);
 	/* Probably, we should checksum udp header (it should be in cache
diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c
index 2b3be68b70a7..f65fcd7704ca 100644
--- a/net/ipv6/raw.c
+++ b/net/ipv6/raw.c
@@ -368,7 +368,7 @@ int rawv6_rcv(struct sock *sk, struct sk_buff *skb)
 				     skb->len, inet->num, skb->csum))
 			skb->ip_summed = CHECKSUM_UNNECESSARY;
 	}
-	if (skb->ip_summed != CHECKSUM_UNNECESSARY)
+	if (!skb_csum_unnecessary(skb))
 		skb->csum = ~csum_unfold(csum_ipv6_magic(&ipv6_hdr(skb)->saddr,
 							 &ipv6_hdr(skb)->daddr,
 							 skb->len,
@@ -421,7 +421,7 @@ static int rawv6_recvmsg(struct kiocb *iocb, struct sock *sk,
 		msg->msg_flags |= MSG_TRUNC;
 	}
 
-	if (skb->ip_summed==CHECKSUM_UNNECESSARY) {
+	if (skb_csum_unnecessary(skb)) {
 		err = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, copied);
 	} else if (msg->msg_flags&MSG_TRUNC) {
 		if (__skb_checksum_complete(skb))
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 7e824b97126d..2b668a6ae698 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -1707,8 +1707,7 @@ static int tcp_v6_rcv(struct sk_buff **pskb)
 	if (!pskb_may_pull(skb, th->doff*4))
 		goto discard_it;
 
-	if ((skb->ip_summed != CHECKSUM_UNNECESSARY &&
-	     tcp_v6_checksum_init(skb)))
+	if (!skb_csum_unnecessary(skb) && tcp_v6_checksum_init(skb))
 		goto bad_packet;
 
 	th = tcp_hdr(skb);
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index 1e3dfb20b1cf..b083c09e3d2d 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -153,7 +153,7 @@ try_again:
 			goto csum_copy_err;
 	}
 
-	if (skb->ip_summed == CHECKSUM_UNNECESSARY)
+	if (skb_csum_unnecessary(skb))
 		err = skb_copy_datagram_iovec(skb, sizeof(struct udphdr),
 					      msg->msg_iov, copied       );
 	else {
@@ -397,7 +397,7 @@ static inline int udp6_csum_init(struct sk_buff *skb, struct udphdr *uh,
 			     skb->len, proto, skb->csum))
 		skb->ip_summed = CHECKSUM_UNNECESSARY;
 
-	if (skb->ip_summed != CHECKSUM_UNNECESSARY)
+	if (!skb_csum_unnecessary(skb))
 		skb->csum = ~csum_unfold(csum_ipv6_magic(&ipv6_hdr(skb)->saddr,
 							 &ipv6_hdr(skb)->daddr,
 							 skb->len, proto, 0));
diff --git a/net/sctp/input.c b/net/sctp/input.c
index 18b97eedc1fa..885109fb3dda 100644
--- a/net/sctp/input.c
+++ b/net/sctp/input.c
@@ -140,8 +140,7 @@ int sctp_rcv(struct sk_buff *skb)
 	__skb_pull(skb, skb_transport_offset(skb));
 	if (skb->len < sizeof(struct sctphdr))
 		goto discard_it;
-	if ((skb->ip_summed != CHECKSUM_UNNECESSARY) &&
-	    (sctp_rcv_checksum(skb) < 0))
+	if (!skb_csum_unnecessary(skb) && sctp_rcv_checksum(skb) < 0)
 		goto discard_it;
 
 	skb_pull(skb, sizeof(struct sctphdr));
diff --git a/net/sunrpc/socklib.c b/net/sunrpc/socklib.c
index 634885b0c04d..1d377d1ab7f4 100644
--- a/net/sunrpc/socklib.c
+++ b/net/sunrpc/socklib.c
@@ -154,7 +154,7 @@ int csum_partial_copy_to_xdr(struct xdr_buf *xdr, struct sk_buff *skb)
 	desc.offset = sizeof(struct udphdr);
 	desc.count = skb->len - desc.offset;
 
-	if (skb->ip_summed == CHECKSUM_UNNECESSARY)
+	if (skb_csum_unnecessary(skb))
 		goto no_checksum;
 
 	desc.csum = csum_partial(skb->data, desc.offset, skb->csum);
-- 
cgit v1.2.3


From 3ff50b7997fe06cd5d276b229967bb52d6b3b6c1 Mon Sep 17 00:00:00 2001
From: Stephen Hemminger <shemminger@linux-foundation.org>
Date: Fri, 20 Apr 2007 17:09:22 -0700
Subject: [NET]: cleanup extra semicolons

Spring cleaning time...

There seems to be a lot of places in the network code that have
extra bogus semicolons after conditionals.  Most commonly is a
bogus semicolon after: switch() { }

Signed-off-by: Stephen Hemminger <shemminger@linux-foundation.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/802/tr.c             |  3 ++-
 net/8021q/vlan.c         |  6 +++---
 net/8021q/vlan_dev.c     |  4 ++--
 net/ax25/ax25_ip.c       |  2 +-
 net/bluetooth/hci_core.c |  4 ++--
 net/bluetooth/sco.c      |  2 +-
 net/core/link_watch.c    |  2 +-
 net/core/pktgen.c        |  6 +++---
 net/core/rtnetlink.c     |  2 +-
 net/core/skbuff.c        |  2 +-
 net/ipv4/fib_semantics.c |  2 +-
 net/ipv4/ipconfig.c      |  4 ++--
 net/ipv4/multipath_drr.c |  2 +-
 net/ipv4/tcp.c           |  7 ++++---
 net/ipv4/tcp_input.c     |  9 +++++----
 net/ipv4/xfrm4_policy.c  |  2 +-
 net/ipv6/addrconf.c      |  4 ++--
 net/ipv6/datagram.c      |  2 +-
 net/ipv6/exthdrs.c       |  2 +-
 net/ipv6/icmp.c          |  5 +++--
 net/ipv6/ndisc.c         |  2 +-
 net/ipv6/raw.c           | 14 ++++++++------
 net/ipv6/route.c         |  2 +-
 net/ipv6/xfrm6_policy.c  |  5 +++--
 net/irda/af_irda.c       |  4 ++--
 net/iucv/af_iucv.c       |  2 +-
 net/key/af_key.c         |  2 +-
 net/sched/cls_u32.c      |  2 +-
 net/sched/sch_dsmark.c   |  6 +++---
 net/sched/sch_ingress.c  |  4 ++--
 net/sched/sch_prio.c     |  2 +-
 net/sctp/associola.c     |  2 +-
 net/sctp/debug.c         |  5 +++--
 net/sctp/ipv6.c          |  2 +-
 net/sctp/output.c        |  2 +-
 net/sctp/outqueue.c      | 12 ++++++------
 net/sctp/sm_make_chunk.c |  4 ++--
 net/sctp/sm_sideeffect.c |  5 +++--
 net/sctp/sm_statefuns.c  | 10 +++++-----
 net/sctp/sm_statetable.c |  2 +-
 net/sctp/socket.c        | 11 ++++++-----
 net/sctp/transport.c     |  2 +-
 net/sctp/ulpqueue.c      |  8 ++++----
 net/tipc/link.c          |  2 +-
 net/xfrm/xfrm_policy.c   |  2 +-
 net/xfrm/xfrm_state.c    |  8 ++++----
 net/xfrm/xfrm_user.c     | 22 +++++++++++-----------
 47 files changed, 113 insertions(+), 103 deletions(-)

(limited to 'net/ipv4/tcp_input.c')

diff --git a/net/802/tr.c b/net/802/tr.c
index eb2de0d16208..0ba1946211c9 100644
--- a/net/802/tr.c
+++ b/net/802/tr.c
@@ -554,7 +554,8 @@ static int rif_seq_show(struct seq_file *seq, void *v)
 					if(j==1) {
 						segment=ntohs(entry->rseg[j-1])>>4;
 						seq_printf(seq,"  %03X",segment);
-					};
+					}
+
 					segment=ntohs(entry->rseg[j])>>4;
 					brdgnmb=ntohs(entry->rseg[j-1])&0x00f;
 					seq_printf(seq,"-%01X-%03X",brdgnmb,segment);
diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c
index eb1c71ed7dfe..c0c7bb8e9f07 100644
--- a/net/8021q/vlan.c
+++ b/net/8021q/vlan.c
@@ -470,7 +470,7 @@ static struct net_device *register_vlan_device(const char *eth_IF_name,
 		 */
 	default:
 		snprintf(name, IFNAMSIZ, "vlan%.4i", VLAN_ID);
-	};
+	}
 
 	new_dev = alloc_netdev(sizeof(struct vlan_dev_info), name,
 			       vlan_setup);
@@ -685,7 +685,7 @@ static int vlan_device_event(struct notifier_block *unused, unsigned long event,
 				break;
 		}
 		break;
-	};
+	}
 
 out:
 	return NOTIFY_DONE;
@@ -819,7 +819,7 @@ static int vlan_ioctl_handler(void __user *arg)
 		printk(VLAN_DBG "%s: Unknown VLAN CMD: %x \n",
 			__FUNCTION__, args.cmd);
 		return -EINVAL;
-	};
+	}
 out:
 	return err;
 }
diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c
index 7ff6b7948485..ec46084f44b4 100644
--- a/net/8021q/vlan_dev.c
+++ b/net/8021q/vlan_dev.c
@@ -66,7 +66,7 @@ int vlan_dev_rebuild_header(struct sk_buff *skb)
 
 		memcpy(veth->h_source, dev->dev_addr, ETH_ALEN);
 		break;
-	};
+	}
 
 	return 0;
 }
@@ -219,7 +219,7 @@ int vlan_skb_recv(struct sk_buff *skb, struct net_device *dev,
 		break;
 	default:
 		break;
-	};
+	}
 
 	/*  Was a VLAN packet, grab the encapsulated protocol, which the layer
 	 * three protocols care about.
diff --git a/net/ax25/ax25_ip.c b/net/ax25/ax25_ip.c
index 4d4ef35e1782..930e4918037f 100644
--- a/net/ax25/ax25_ip.c
+++ b/net/ax25/ax25_ip.c
@@ -121,7 +121,7 @@ int ax25_rebuild_header(struct sk_buff *skb)
 		digipeat = route->digipeat;
 		dev = route->dev;
 		ip_mode = route->ip_mode;
-	};
+	}
 
 	if (dev == NULL)
 		dev = skb->dev;
diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c
index c177e75d64a6..aa4b56a8c3ea 100644
--- a/net/bluetooth/hci_core.c
+++ b/net/bluetooth/hci_core.c
@@ -149,7 +149,7 @@ static int __hci_request(struct hci_dev *hdev, void (*req)(struct hci_dev *hdev,
 	default:
 		err = -ETIMEDOUT;
 		break;
-	};
+	}
 
 	hdev->req_status = hdev->req_result = 0;
 
@@ -1388,7 +1388,7 @@ static void hci_rx_task(unsigned long arg)
 			case HCI_SCODATA_PKT:
 				kfree_skb(skb);
 				continue;
-			};
+			}
 		}
 
 		/* Process frame */
diff --git a/net/bluetooth/sco.c b/net/bluetooth/sco.c
index ae4391440950..3f5163e725ed 100644
--- a/net/bluetooth/sco.c
+++ b/net/bluetooth/sco.c
@@ -393,7 +393,7 @@ static void sco_sock_close(struct sock *sk)
 	default:
 		sock_set_flag(sk, SOCK_ZAPPED);
 		break;
-	};
+	}
 
 	release_sock(sk);
 
diff --git a/net/core/link_watch.c b/net/core/link_watch.c
index 8b45c9d3b249..e3c26a9ccad6 100644
--- a/net/core/link_watch.c
+++ b/net/core/link_watch.c
@@ -79,7 +79,7 @@ static void rfc2863_policy(struct net_device *dev)
 	case IF_LINK_MODE_DEFAULT:
 	default:
 		break;
-	};
+	}
 
 	dev->operstate = operstate;
 
diff --git a/net/core/pktgen.c b/net/core/pktgen.c
index f9469ea530cc..b92a322872a8 100644
--- a/net/core/pktgen.c
+++ b/net/core/pktgen.c
@@ -786,7 +786,7 @@ static int count_trail_chars(const char __user * user_buffer,
 			break;
 		default:
 			goto done;
-		};
+		}
 	}
 done:
 	return i;
@@ -829,7 +829,7 @@ static int strn_len(const char __user * user_buffer, unsigned int maxlen)
 			break;
 		default:
 			break;
-		};
+		}
 	}
 done_str:
 	return i;
@@ -1906,7 +1906,7 @@ static int pktgen_device_event(struct notifier_block *unused,
 	case NETDEV_UNREGISTER:
 		pktgen_mark_device(dev->name);
 		break;
-	};
+	}
 
 	return NOTIFY_DONE;
 }
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index bc95fab0b0ce..75cea8ea4cf3 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -399,7 +399,7 @@ static void set_operstate(struct net_device *dev, unsigned char transition)
 		    operstate == IF_OPER_UNKNOWN)
 			operstate = IF_OPER_DORMANT;
 		break;
-	};
+	}
 
 	if (dev->operstate != operstate) {
 		write_lock_bh(&dev_base_lock);
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 6b50d58cce1e..142257307fa2 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -304,7 +304,7 @@ void kfree_skbmem(struct sk_buff *skb)
 		if (atomic_dec_and_test(fclone_ref))
 			kmem_cache_free(skbuff_fclone_cache, other);
 		break;
-	};
+	}
 }
 
 /**
diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c
index 3dad12ee76c3..406ea7050aed 100644
--- a/net/ipv4/fib_semantics.c
+++ b/net/ipv4/fib_semantics.c
@@ -927,7 +927,7 @@ int fib_semantic_match(struct list_head *head, const struct flowi *flp,
 			default:
 				printk(KERN_DEBUG "impossible 102\n");
 				return -EINVAL;
-			};
+			}
 		}
 		return err;
 	}
diff --git a/net/ipv4/ipconfig.c b/net/ipv4/ipconfig.c
index 4e19ee0e0102..597c800b2fdc 100644
--- a/net/ipv4/ipconfig.c
+++ b/net/ipv4/ipconfig.c
@@ -939,7 +939,7 @@ static int __init ic_bootp_recv(struct sk_buff *skb, struct net_device *dev, str
 					if (opt[1] >= 4)
 						memcpy(&server_id, opt + 2, 4);
 					break;
-				};
+				}
 			}
 
 #ifdef IPCONFIG_DEBUG
@@ -984,7 +984,7 @@ static int __init ic_bootp_recv(struct sk_buff *skb, struct net_device *dev, str
 				ic_myaddr = NONE;
 				ic_servaddr = NONE;
 				goto drop_unlock;
-			};
+			}
 
 			ic_dhcp_msgtype = mt;
 
diff --git a/net/ipv4/multipath_drr.c b/net/ipv4/multipath_drr.c
index 574c735836fc..b03c5ca2c823 100644
--- a/net/ipv4/multipath_drr.c
+++ b/net/ipv4/multipath_drr.c
@@ -100,7 +100,7 @@ static int drr_dev_event(struct notifier_block *this,
 
 		spin_unlock_bh(&state_lock);
 		break;
-	};
+	}
 
 	return NOTIFY_DONE;
 }
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index bb9d91a7662f..4664733f139c 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -444,7 +444,7 @@ int tcp_ioctl(struct sock *sk, int cmd, unsigned long arg)
 		break;
 	default:
 		return -ENOIOCTLCMD;
-	};
+	}
 
 	return put_user(answ, (int __user *)arg);
 }
@@ -1954,7 +1954,8 @@ static int do_tcp_setsockopt(struct sock *sk, int level,
 	default:
 		err = -ENOPROTOOPT;
 		break;
-	};
+	}
+
 	release_sock(sk);
 	return err;
 }
@@ -2124,7 +2125,7 @@ static int do_tcp_getsockopt(struct sock *sk, int level,
 		return 0;
 	default:
 		return -ENOPROTOOPT;
-	};
+	}
 
 	if (put_user(len, optlen))
 		return -EFAULT;
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index d1604f59d77e..2fbfc2e4209c 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -2708,7 +2708,7 @@ static int tcp_process_frto(struct sock *sk, u32 prior_snd_una, int flag)
 		default:
 			tcp_ratehalving_spur_to_response(sk);
 			break;
-		};
+		}
 		tp->frto_counter = 0;
 	}
 	return 0;
@@ -2915,10 +2915,11 @@ void tcp_parse_options(struct sk_buff *skb, struct tcp_options_received *opt_rx,
 					 */
 					break;
 #endif
-				};
+				}
+
 				ptr+=opsize-2;
 				length-=opsize;
-		};
+		}
 	}
 }
 
@@ -3124,7 +3125,7 @@ static void tcp_fin(struct sk_buff *skb, struct sock *sk, struct tcphdr *th)
 			printk(KERN_ERR "%s: Impossible, sk->sk_state=%d\n",
 			       __FUNCTION__, sk->sk_state);
 			break;
-	};
+	}
 
 	/* It _is_ possible, that we have something out-of-order _after_ FIN.
 	 * Probably, we should reset in this case. For now drop them.
diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c
index f1c32ff59d16..4ff8ed30024f 100644
--- a/net/ipv4/xfrm4_policy.c
+++ b/net/ipv4/xfrm4_policy.c
@@ -263,7 +263,7 @@ _decode_session4(struct sk_buff *skb, struct flowi *fl)
 		default:
 			fl->fl_ipsec_spi = 0;
 			break;
-		};
+		}
 	}
 	fl->proto = iph->protocol;
 	fl->fl4_dst = iph->daddr;
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 9ba9e92d1934..33ccc95c349b 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -2288,7 +2288,7 @@ static int addrconf_notify(struct notifier_block *this, unsigned long event,
 		default:
 			addrconf_dev_config(dev);
 			break;
-		};
+		}
 		if (idev) {
 			if (run_pending)
 				addrconf_dad_run(idev);
@@ -2341,7 +2341,7 @@ static int addrconf_notify(struct notifier_block *this, unsigned long event,
 		}
 #endif
 		break;
-	};
+	}
 
 	return NOTIFY_OK;
 }
diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c
index 4a355fea4098..403eee66b9c5 100644
--- a/net/ipv6/datagram.c
+++ b/net/ipv6/datagram.c
@@ -723,7 +723,7 @@ int datagram_send_ctl(struct msghdr *msg, struct flowi *fl,
 				       cmsg->cmsg_type);
 			err = -EINVAL;
 			break;
-		};
+		}
 	}
 
 exit_f:
diff --git a/net/ipv6/exthdrs.c b/net/ipv6/exthdrs.c
index 275d2e812a44..4aa7fb024b3a 100644
--- a/net/ipv6/exthdrs.c
+++ b/net/ipv6/exthdrs.c
@@ -130,7 +130,7 @@ static int ip6_tlvopt_unknown(struct sk_buff **skbp, int optoff)
 	case 2: /* send ICMP PARM PROB regardless and drop packet */
 		icmpv6_param_prob(skb, ICMPV6_UNK_OPTION, optoff);
 		return 0;
-	};
+	}
 
 	kfree_skb(skb);
 	return 0;
diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c
index e94992ab92e6..e9bcce9e7bdf 100644
--- a/net/ipv6/icmp.c
+++ b/net/ipv6/icmp.c
@@ -732,7 +732,8 @@ static int icmpv6_rcv(struct sk_buff **pskb)
 		 */
 
 		icmpv6_notify(skb, type, hdr->icmp6_code, hdr->icmp6_mtu);
-	};
+	}
+
 	kfree_skb(skb);
 	return 0;
 
@@ -865,7 +866,7 @@ int icmpv6_err_convert(int type, int code, int *err)
 	case ICMPV6_TIME_EXCEED:
 		*err = EHOSTUNREACH;
 		break;
-	};
+	}
 
 	return fatal;
 }
diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c
index b1cf70816477..4ee1216f8018 100644
--- a/net/ipv6/ndisc.c
+++ b/net/ipv6/ndisc.c
@@ -1630,7 +1630,7 @@ int ndisc_rcv(struct sk_buff *skb)
 	case NDISC_REDIRECT:
 		ndisc_redirect_rcv(skb);
 		break;
-	};
+	}
 
 	return 0;
 }
diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c
index f65fcd7704ca..009a1047fc3f 100644
--- a/net/ipv6/raw.c
+++ b/net/ipv6/raw.c
@@ -882,7 +882,7 @@ static int rawv6_seticmpfilter(struct sock *sk, int level, int optname,
 		return 0;
 	default:
 		return -ENOPROTOOPT;
-	};
+	}
 
 	return 0;
 }
@@ -907,7 +907,7 @@ static int rawv6_geticmpfilter(struct sock *sk, int level, int optname,
 		return 0;
 	default:
 		return -ENOPROTOOPT;
-	};
+	}
 
 	return 0;
 }
@@ -961,7 +961,8 @@ static int rawv6_setsockopt(struct sock *sk, int level, int optname,
 		default:
 			return ipv6_setsockopt(sk, level, optname, optval,
 					       optlen);
-	};
+	}
+
 	return do_rawv6_setsockopt(sk, level, optname, optval, optlen);
 }
 
@@ -982,7 +983,7 @@ static int compat_rawv6_setsockopt(struct sock *sk, int level, int optname,
 	default:
 		return compat_ipv6_setsockopt(sk, level, optname,
 					      optval, optlen);
-	};
+	}
 	return do_rawv6_setsockopt(sk, level, optname, optval, optlen);
 }
 #endif
@@ -1035,7 +1036,8 @@ static int rawv6_getsockopt(struct sock *sk, int level, int optname,
 		default:
 			return ipv6_getsockopt(sk, level, optname, optval,
 					       optlen);
-	};
+	}
+
 	return do_rawv6_getsockopt(sk, level, optname, optval, optlen);
 }
 
@@ -1056,7 +1058,7 @@ static int compat_rawv6_getsockopt(struct sock *sk, int level, int optname,
 	default:
 		return compat_ipv6_getsockopt(sk, level, optname,
 					      optval, optlen);
-	};
+	}
 	return do_rawv6_getsockopt(sk, level, optname, optval, optlen);
 }
 #endif
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 70f760f069b1..6264ec3bb154 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -1760,7 +1760,7 @@ int ipv6_route_ioctl(unsigned int cmd, void __user *arg)
 		rtnl_unlock();
 
 		return err;
-	};
+	}
 
 	return -EINVAL;
 }
diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c
index ef746d4f3131..1faa2ea80afc 100644
--- a/net/ipv6/xfrm6_policy.c
+++ b/net/ipv6/xfrm6_policy.c
@@ -240,7 +240,8 @@ __xfrm6_bundle_create(struct xfrm_policy *policy, struct xfrm_state **xfrm, int
 		if (!afinfo) {
 			dst = *dst_p;
 			goto error;
-		};
+		}
+
 		dst_prev->output = afinfo->output;
 		xfrm_state_put_afinfo(afinfo);
 		/* Sheit... I remember I did this right. Apparently,
@@ -337,7 +338,7 @@ _decode_session6(struct sk_buff *skb, struct flowi *fl)
 			fl->fl_ipsec_spi = 0;
 			fl->proto = nexthdr;
 			return;
-		};
+		}
 	}
 }
 
diff --git a/net/irda/af_irda.c b/net/irda/af_irda.c
index bf994c85e45a..baca1565aa11 100644
--- a/net/irda/af_irda.c
+++ b/net/irda/af_irda.c
@@ -220,7 +220,7 @@ static void irda_connect_confirm(void *instance, void *sap,
 		break;
 	default:
 		self->max_data_size = irttp_get_max_seg_size(self->tsap);
-	};
+	}
 
 	IRDA_DEBUG(2, "%s(), max_data_size=%d\n", __FUNCTION__,
 		   self->max_data_size);
@@ -283,7 +283,7 @@ static void irda_connect_indication(void *instance, void *sap,
 		break;
 	default:
 		self->max_data_size = irttp_get_max_seg_size(self->tsap);
-	};
+	}
 
 	IRDA_DEBUG(2, "%s(), max_data_size=%d\n", __FUNCTION__,
 		   self->max_data_size);
diff --git a/net/iucv/af_iucv.c b/net/iucv/af_iucv.c
index 55632883d17b..e84c924a81ee 100644
--- a/net/iucv/af_iucv.c
+++ b/net/iucv/af_iucv.c
@@ -181,7 +181,7 @@ static void iucv_sock_close(struct sock *sk)
 	default:
 		sock_set_flag(sk, SOCK_ZAPPED);
 		break;
-	};
+	}
 
 	release_sock(sk);
 	iucv_sock_kill(sk);
diff --git a/net/key/af_key.c b/net/key/af_key.c
index 3cd228aacfe8..a99444142dc7 100644
--- a/net/key/af_key.c
+++ b/net/key/af_key.c
@@ -379,7 +379,7 @@ static int verify_address_len(void *p)
 		 */
 		return -EINVAL;
 		break;
-	};
+	}
 
 	return 0;
 }
diff --git a/net/sched/cls_u32.c b/net/sched/cls_u32.c
index 62e1deb27a17..c7a347bd6d70 100644
--- a/net/sched/cls_u32.c
+++ b/net/sched/cls_u32.c
@@ -436,7 +436,7 @@ static void u32_destroy(struct tcf_proto *tp)
 			BUG_TRAP(ht->refcnt == 0);
 
 			kfree(ht);
-		};
+		}
 
 		kfree(tp_c);
 	}
diff --git a/net/sched/sch_dsmark.c b/net/sched/sch_dsmark.c
index e38e0d00d1e6..3c6fd181263f 100644
--- a/net/sched/sch_dsmark.c
+++ b/net/sched/sch_dsmark.c
@@ -226,7 +226,7 @@ static int dsmark_enqueue(struct sk_buff *skb,struct Qdisc *sch)
 			default:
 				skb->tc_index = 0;
 				break;
-		};
+		}
 	}
 
 	if (TC_H_MAJ(skb->priority) == sch->handle)
@@ -257,7 +257,7 @@ static int dsmark_enqueue(struct sk_buff *skb,struct Qdisc *sch)
 				if (p->default_index != NO_DEFAULT_INDEX)
 					skb->tc_index = p->default_index;
 				break;
-		};
+		}
 	}
 
 	err = p->q->enqueue(skb,p->q);
@@ -310,7 +310,7 @@ static struct sk_buff *dsmark_dequeue(struct Qdisc *sch)
 				       "unsupported protocol %d\n",
 				       ntohs(skb->protocol));
 			break;
-	};
+	}
 
 	return skb;
 }
diff --git a/net/sched/sch_ingress.c b/net/sched/sch_ingress.c
index ad22dc6af22a..f8b9f1cdf738 100644
--- a/net/sched/sch_ingress.c
+++ b/net/sched/sch_ingress.c
@@ -170,7 +170,7 @@ static int ingress_enqueue(struct sk_buff *skb,struct Qdisc *sch)
 			skb->tc_index = TC_H_MIN(res.classid);
 			result = TC_ACT_OK;
 			break;
-	};
+	}
 /* backward compat */
 #else
 #ifdef	CONFIG_NET_CLS_POLICE
@@ -187,7 +187,7 @@ static int ingress_enqueue(struct sk_buff *skb,struct Qdisc *sch)
 		sch->bstats.bytes += skb->len;
 		result = NF_ACCEPT;
 		break;
-	};
+	}
 
 #else
 	D2PRINTK("Overriding result to ACCEPT\n");
diff --git a/net/sched/sch_prio.c b/net/sched/sch_prio.c
index 5cfe60bf6e25..269a6e17c6c4 100644
--- a/net/sched/sch_prio.c
+++ b/net/sched/sch_prio.c
@@ -62,7 +62,7 @@ prio_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr)
 			*qerr = NET_XMIT_SUCCESS;
 		case TC_ACT_SHOT:
 			return NULL;
-		};
+		}
 
 		if (!q->filter_list ) {
 #else
diff --git a/net/sctp/associola.c b/net/sctp/associola.c
index 37a343e1ebb7..db73ef97485a 100644
--- a/net/sctp/associola.c
+++ b/net/sctp/associola.c
@@ -733,7 +733,7 @@ void sctp_assoc_control_transport(struct sctp_association *asoc,
 
 	default:
 		return;
-	};
+	}
 
 	/* Generate and send a SCTP_PEER_ADDR_CHANGE notification to the
 	 * user.
diff --git a/net/sctp/debug.c b/net/sctp/debug.c
index 5f5ab28977c9..e8c0f7435d7f 100644
--- a/net/sctp/debug.c
+++ b/net/sctp/debug.c
@@ -93,8 +93,9 @@ const char *sctp_cname(const sctp_subtype_t cid)
 		return "FWD_TSN";
 
 	default:
-		return "unknown chunk";
-	};
+		break;
+	}
+
 	return "unknown chunk";
 }
 
diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c
index 5b0cdda4b449..ca527a27dd05 100644
--- a/net/sctp/ipv6.c
+++ b/net/sctp/ipv6.c
@@ -604,7 +604,7 @@ static sctp_scope_t sctp_v6_scope(union sctp_addr *addr)
 	default:
 		retval = SCTP_SCOPE_GLOBAL;
 		break;
-	};
+	}
 
 	return retval;
 }
diff --git a/net/sctp/output.c b/net/sctp/output.c
index f875fc3ced54..d85543def754 100644
--- a/net/sctp/output.c
+++ b/net/sctp/output.c
@@ -176,7 +176,7 @@ sctp_xmit_t sctp_packet_transmit_chunk(struct sctp_packet *packet,
 	case SCTP_XMIT_OK:
 	case SCTP_XMIT_NAGLE_DELAY:
 		break;
-	};
+	}
 
 	return retval;
 }
diff --git a/net/sctp/outqueue.c b/net/sctp/outqueue.c
index 41abfd17627e..992f361084b7 100644
--- a/net/sctp/outqueue.c
+++ b/net/sctp/outqueue.c
@@ -338,7 +338,7 @@ int sctp_outq_tail(struct sctp_outq *q, struct sctp_chunk *chunk)
 				SCTP_INC_STATS(SCTP_MIB_OUTORDERCHUNKS);
 			q->empty = 0;
 			break;
-		};
+		}
 	} else {
 		list_add_tail(&chunk->list, &q->control_chunk_list);
 		SCTP_INC_STATS(SCTP_MIB_OUTCTRLCHUNKS);
@@ -630,7 +630,7 @@ static int sctp_outq_flush_rtx(struct sctp_outq *q, struct sctp_packet *pkt,
 			/* Retrieve a new chunk to bundle. */
 			lchunk = sctp_list_dequeue(lqueue);
 			break;
-		};
+		}
 
 		/* If we are here due to a retransmit timeout or a fast
 		 * retransmit and if there are any chunks left in the retransmit
@@ -779,7 +779,7 @@ int sctp_outq_flush(struct sctp_outq *q, int rtx_timeout)
 		default:
 			/* We built a chunk with an illegal type! */
 			BUG();
-		};
+		}
 	}
 
 	/* Is it OK to send data chunks?  */
@@ -1397,7 +1397,7 @@ static void sctp_check_transmitted(struct sctp_outq *q,
 				SCTP_DEBUG_PRINTK("ACKed: %08x", tsn);
 				dbg_prt_state = 0;
 				dbg_ack_tsn = tsn;
-			};
+			}
 
 			dbg_last_ack_tsn = tsn;
 #endif /* SCTP_DEBUG */
@@ -1452,7 +1452,7 @@ static void sctp_check_transmitted(struct sctp_outq *q,
 				SCTP_DEBUG_PRINTK("KEPT: %08x",tsn);
 				dbg_prt_state = 1;
 				dbg_kept_tsn = tsn;
-			};
+			}
 
 			dbg_last_kept_tsn = tsn;
 #endif /* SCTP_DEBUG */
@@ -1476,7 +1476,7 @@ static void sctp_check_transmitted(struct sctp_outq *q,
 		} else {
 			SCTP_DEBUG_PRINTK("\n");
 		}
-	};
+	}
 #endif /* SCTP_DEBUG */
 	if (transport) {
 		if (bytes_acked) {
diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c
index 759ea3d19976..be783a3761c4 100644
--- a/net/sctp/sm_make_chunk.c
+++ b/net/sctp/sm_make_chunk.c
@@ -2077,7 +2077,7 @@ static int sctp_process_param(struct sctp_association *asoc,
 
 			default: /* Just ignore anything else.  */
 				break;
-			};
+			}
 		}
 		break;
 
@@ -2118,7 +2118,7 @@ static int sctp_process_param(struct sctp_association *asoc,
 		SCTP_DEBUG_PRINTK("Ignoring param: %d for association %p.\n",
 				  ntohs(param.p->type), asoc);
 		break;
-	};
+	}
 
 	return retval;
 }
diff --git a/net/sctp/sm_sideeffect.c b/net/sctp/sm_sideeffect.c
index 0a1a197193a2..b37a7adeb150 100644
--- a/net/sctp/sm_sideeffect.c
+++ b/net/sctp/sm_sideeffect.c
@@ -1009,7 +1009,7 @@ static int sctp_side_effects(sctp_event_t event_type, sctp_subtype_t subtype,
 		       status, state, event_type, subtype.chunk);
 		BUG();
 		break;
-	};
+	}
 
 bail:
 	return error;
@@ -1489,7 +1489,8 @@ static int sctp_cmd_interpreter(sctp_event_t event_type,
 			printk(KERN_WARNING "Impossible command: %u, %p\n",
 			       cmd->verb, cmd->obj.ptr);
 			break;
-		};
+		}
+
 		if (error)
 			break;
 	}
diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c
index e0ec16dd678a..9e28a5d51200 100644
--- a/net/sctp/sm_statefuns.c
+++ b/net/sctp/sm_statefuns.c
@@ -629,7 +629,7 @@ sctp_disposition_t sctp_sf_do_5_1D_ce(const struct sctp_endpoint *ep,
 		case -SCTP_IERROR_BAD_SIG:
 		default:
 			return sctp_sf_pdiscard(ep, asoc, type, arg, commands);
-		};
+		}
 	}
 
 
@@ -1195,7 +1195,7 @@ static void sctp_tietags_populate(struct sctp_association *new_asoc,
 		new_asoc->c.my_ttag   = asoc->c.my_vtag;
 		new_asoc->c.peer_ttag = asoc->c.peer_vtag;
 		break;
-	};
+	}
 
 	/* Other parameters for the endpoint SHOULD be copied from the
 	 * existing parameters of the association (e.g. number of
@@ -1904,7 +1904,7 @@ sctp_disposition_t sctp_sf_do_5_2_4_dupcook(const struct sctp_endpoint *ep,
 		case -SCTP_IERROR_BAD_SIG:
 		default:
 			return sctp_sf_pdiscard(ep, asoc, type, arg, commands);
-		};
+		}
 	}
 
 	/* Compare the tie_tag in cookie with the verification tag of
@@ -1936,7 +1936,7 @@ sctp_disposition_t sctp_sf_do_5_2_4_dupcook(const struct sctp_endpoint *ep,
 	default: /* Discard packet for all others. */
 		retval = sctp_sf_pdiscard(ep, asoc, type, arg, commands);
 		break;
-	};
+	}
 
 	/* Delete the tempory new association. */
 	sctp_add_cmd_sf(commands, SCTP_CMD_NEW_ASOC, SCTP_ASOC(new_asoc));
@@ -4816,7 +4816,7 @@ sctp_disposition_t sctp_sf_t2_timer_expire(const struct sctp_endpoint *ep,
 	default:
 		BUG();
 		break;
-	};
+	}
 
 	if (!reply)
 		goto nomem;
diff --git a/net/sctp/sm_statetable.c b/net/sctp/sm_statetable.c
index 5e54b17377f4..523071c7902f 100644
--- a/net/sctp/sm_statetable.c
+++ b/net/sctp/sm_statetable.c
@@ -101,7 +101,7 @@ const sctp_sm_table_entry_t *sctp_sm_lookup_event(sctp_event_t event_type,
 	default:
 		/* Yikes!  We got an illegal event type.  */
 		return &bug;
-	};
+	}
 }
 
 #define TYPE_SCTP_FUNC(func) {.fn = func, .name = #func}
diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index f904f2bc0f2c..11938fb20395 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -941,7 +941,7 @@ SCTP_STATIC int sctp_setsockopt_bindx(struct sock* sk,
 	default:
 		err = -EINVAL;
 		break;
-	};
+	}
 
 out:
 	kfree(kaddrs);
@@ -3048,7 +3048,7 @@ SCTP_STATIC int sctp_setsockopt(struct sock *sk, int level, int optname,
 	default:
 		retval = -ENOPROTOOPT;
 		break;
-	};
+	}
 
 	sctp_release_sock(sk);
 
@@ -4873,7 +4873,7 @@ SCTP_STATIC int sctp_getsockopt(struct sock *sk, int level, int optname,
 	default:
 		retval = -ENOPROTOOPT;
 		break;
-	};
+	}
 
 	sctp_release_sock(sk);
 	return retval;
@@ -5198,7 +5198,8 @@ int sctp_inet_listen(struct socket *sock, int backlog)
 		break;
 	default:
 		break;
-	};
+	}
+
 	if (err)
 		goto cleanup;
 
@@ -5461,7 +5462,7 @@ SCTP_STATIC int sctp_msghdr_parse(const struct msghdr *msg,
 
 		default:
 			return -EINVAL;
-		};
+		}
 	}
 	return 0;
 }
diff --git a/net/sctp/transport.c b/net/sctp/transport.c
index 4d8c2ab864fc..961df275d5b9 100644
--- a/net/sctp/transport.c
+++ b/net/sctp/transport.c
@@ -507,7 +507,7 @@ void sctp_transport_lower_cwnd(struct sctp_transport *transport,
 			transport->cwnd = max(transport->cwnd/2,
 						 4*transport->asoc->pathmtu);
 		break;
-	};
+	}
 
 	transport->partial_bytes_acked = 0;
 	SCTP_DEBUG_PRINTK("%s: transport: %p reason: %d cwnd: "
diff --git a/net/sctp/ulpqueue.c b/net/sctp/ulpqueue.c
index 0fa4d4d4df17..34eb977a204d 100644
--- a/net/sctp/ulpqueue.c
+++ b/net/sctp/ulpqueue.c
@@ -391,7 +391,7 @@ static struct sctp_ulpevent *sctp_make_reassembled_event(struct sk_buff_head *qu
 			break;
 		pos->next = pnext;
 		pos = pnext;
-	};
+	}
 
 	event = sctp_skb2event(f_frag);
 	SCTP_INC_STATS(SCTP_MIB_REASMUSRMSGS);
@@ -476,7 +476,7 @@ static inline struct sctp_ulpevent *sctp_ulpq_retrieve_reassembled(struct sctp_u
 			else
 				first_frag = NULL;
 			break;
-		};
+		}
 	}
 
 	asoc = ulpq->asoc;
@@ -556,7 +556,7 @@ static inline struct sctp_ulpevent *sctp_ulpq_retrieve_partial(struct sctp_ulpq
 			goto done;
 		default:
 			return NULL;
-		};
+		}
 	}
 
 	/* We have the reassembled event. There is no need to look
@@ -648,7 +648,7 @@ static inline struct sctp_ulpevent *sctp_ulpq_retrieve_first(struct sctp_ulpq *u
 			break;
 		default:
 			return NULL;
-		};
+		}
 	}
 
 	/* We have the reassembled event. There is no need to look
diff --git a/net/tipc/link.c b/net/tipc/link.c
index f3f99c8ea08a..2124f32ef29f 100644
--- a/net/tipc/link.c
+++ b/net/tipc/link.c
@@ -2570,7 +2570,7 @@ void tipc_link_recv_bundle(struct sk_buff *buf)
 		if (obuf == NULL) {
 			warn("Link unable to unbundle message(s)\n");
 			break;
-		};
+		}
 		pos += align(msg_size(buf_msg(obuf)));
 		msg_dbg(buf_msg(obuf), "     /");
 		tipc_net_route_msg(obuf);
diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index 194257554553..762926009c04 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -1049,7 +1049,7 @@ static inline int policy_to_flow_dir(int dir)
 		return FLOW_DIR_OUT;
 	case XFRM_POLICY_FWD:
 		return FLOW_DIR_FWD;
-	};
+	}
 }
 
 static struct xfrm_policy *xfrm_sk_policy_lookup(struct sock *sk, int dir, struct flowi *fl)
diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c
index 69a3600afd9d..743f07e7f698 100644
--- a/net/xfrm/xfrm_state.c
+++ b/net/xfrm/xfrm_state.c
@@ -458,7 +458,7 @@ static struct xfrm_state *__xfrm_state_lookup(xfrm_address_t *daddr, __be32 spi,
 					     x->id.daddr.a6))
 				continue;
 			break;
-		};
+		}
 
 		xfrm_state_hold(x);
 		return x;
@@ -493,7 +493,7 @@ static struct xfrm_state *__xfrm_state_lookup_byaddr(xfrm_address_t *daddr, xfrm
 					     x->props.saddr.a6))
 				continue;
 			break;
-		};
+		}
 
 		xfrm_state_hold(x);
 		return x;
@@ -722,7 +722,7 @@ static struct xfrm_state *__find_acq_core(unsigned short family, u8 mode, u32 re
 					     (struct in6_addr *)saddr))
 				continue;
 			break;
-		};
+		}
 
 		xfrm_state_hold(x);
 		return x;
@@ -755,7 +755,7 @@ static struct xfrm_state *__find_acq_core(unsigned short family, u8 mode, u32 re
 			ipv6_addr_copy((struct in6_addr *)x->id.daddr.a6,
 				       (struct in6_addr *)daddr);
 			break;
-		};
+		}
 
 		x->km.state = XFRM_STATE_ACQ;
 		x->id.proto = proto;
diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c
index 88659edc9b1a..f91521d5f2ab 100644
--- a/net/xfrm/xfrm_user.c
+++ b/net/xfrm/xfrm_user.c
@@ -71,7 +71,7 @@ static int verify_one_alg(struct rtattr **xfrma, enum xfrm_attr_type_t type)
 
 	default:
 		return -EINVAL;
-	};
+	}
 
 	algp->alg_name[CRYPTO_MAX_ALG_NAME - 1] = '\0';
 	return 0;
@@ -152,7 +152,7 @@ static int verify_newsa_info(struct xfrm_usersa_info *p,
 
 	default:
 		goto out;
-	};
+	}
 
 	err = -EINVAL;
 	switch (p->id.proto) {
@@ -192,7 +192,7 @@ static int verify_newsa_info(struct xfrm_usersa_info *p,
 
 	default:
 		goto out;
-	};
+	}
 
 	if ((err = verify_one_alg(xfrma, XFRMA_ALG_AUTH)))
 		goto out;
@@ -217,7 +217,7 @@ static int verify_newsa_info(struct xfrm_usersa_info *p,
 
 	default:
 		goto out;
-	};
+	}
 
 	err = 0;
 
@@ -711,7 +711,7 @@ static int verify_userspi_info(struct xfrm_userspi_info *p)
 
 	default:
 		return -EINVAL;
-	};
+	}
 
 	if (p->min > p->max)
 		return -EINVAL;
@@ -789,7 +789,7 @@ static int verify_policy_dir(u8 dir)
 
 	default:
 		return -EINVAL;
-	};
+	}
 
 	return 0;
 }
@@ -805,7 +805,7 @@ static int verify_policy_type(u8 type)
 
 	default:
 		return -EINVAL;
-	};
+	}
 
 	return 0;
 }
@@ -821,7 +821,7 @@ static int verify_newpolicy_info(struct xfrm_userpolicy_info *p)
 
 	default:
 		return -EINVAL;
-	};
+	}
 
 	switch (p->action) {
 	case XFRM_POLICY_ALLOW:
@@ -830,7 +830,7 @@ static int verify_newpolicy_info(struct xfrm_userpolicy_info *p)
 
 	default:
 		return -EINVAL;
-	};
+	}
 
 	switch (p->sel.family) {
 	case AF_INET:
@@ -845,7 +845,7 @@ static int verify_newpolicy_info(struct xfrm_userpolicy_info *p)
 
 	default:
 		return -EINVAL;
-	};
+	}
 
 	return verify_policy_dir(p->dir);
 }
@@ -912,7 +912,7 @@ static int validate_tmpl(int nr, struct xfrm_user_tmpl *ut, u16 family)
 #endif
 		default:
 			return -EINVAL;
-		};
+		}
 	}
 
 	return 0;
-- 
cgit v1.2.3


From 9e412ba7632f71259a53085665d4983b78257b7c Mon Sep 17 00:00:00 2001
From: Ilpo Järvinen <ilpo.jarvinen@helsinki.fi>
Date: Fri, 20 Apr 2007 22:18:02 -0700
Subject: [TCP]: Sed magic converts func(sk, tp, ...) -> func(sk, ...)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This is (mostly) automated change using magic:

sed -e '/struct sock \*sk/ N' -e '/struct sock \*sk/ N'
    -e '/struct sock \*sk/ N' -e '/struct sock \*sk/ N'
    -e 's|struct sock \*sk,[\n\t ]*struct tcp_sock \*tp\([^{]*\n{\n\)|
	  struct sock \*sk\1\tstruct tcp_sock *tp = tcp_sk(sk);\n|g'
    -e 's|struct sock \*sk, struct tcp_sock \*tp|
	  struct sock \*sk|g' -e 's|sk, tp\([^-]\)|sk\1|g'

Fixed four unused variable (tp) warnings that were introduced.

In addition, manually added newlines after local variables and
tweaked function arguments positioning.

$ gcc --version
gcc (GCC) 4.1.1 20060525 (Red Hat 4.1.1-1)
...
$ codiff -fV built-in.o.old built-in.o.new
net/ipv4/route.c:
  rt_cache_flush |  +14
 1 function changed, 14 bytes added

net/ipv4/tcp.c:
  tcp_setsockopt |   -5
  tcp_sendpage   |  -25
  tcp_sendmsg    |  -16
 3 functions changed, 46 bytes removed

net/ipv4/tcp_input.c:
  tcp_try_undo_recovery |   +3
  tcp_try_undo_dsack    |   +2
  tcp_mark_head_lost    |  -12
  tcp_ack               |  -15
  tcp_event_data_recv   |  -32
  tcp_rcv_state_process |  -10
  tcp_rcv_established   |   +1
 7 functions changed, 6 bytes added, 69 bytes removed, diff: -63

net/ipv4/tcp_output.c:
  update_send_head          |   -9
  tcp_transmit_skb          |  +19
  tcp_cwnd_validate         |   +1
  tcp_write_wakeup          |  -17
  __tcp_push_pending_frames |  -25
  tcp_push_one              |   -8
  tcp_send_fin              |   -4
 7 functions changed, 20 bytes added, 63 bytes removed, diff: -43

built-in.o.new:
 18 functions changed, 40 bytes added, 178 bytes removed, diff: -138

Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@helsinki.fi>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/tcp.h     |  25 +++++----
 include/net/tcp_ecn.h |  11 ++--
 net/ipv4/tcp.c        |  39 +++++++-------
 net/ipv4/tcp_input.c  | 145 ++++++++++++++++++++++++++++++--------------------
 net/ipv4/tcp_output.c |  54 ++++++++++---------
 5 files changed, 158 insertions(+), 116 deletions(-)

(limited to 'net/ipv4/tcp_input.c')

diff --git a/include/net/tcp.h b/include/net/tcp.h
index e79803353c83..43910fe3c448 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -420,9 +420,9 @@ extern __u32 cookie_v4_init_sequence(struct sock *sk, struct sk_buff *skb,
 
 /* tcp_output.c */
 
-extern void __tcp_push_pending_frames(struct sock *sk, struct tcp_sock *tp,
-				      unsigned int cur_mss, int nonagle);
-extern int tcp_may_send_now(struct sock *sk, struct tcp_sock *tp);
+extern void __tcp_push_pending_frames(struct sock *sk, unsigned int cur_mss,
+				      int nonagle);
+extern int tcp_may_send_now(struct sock *sk);
 extern int tcp_retransmit_skb(struct sock *, struct sk_buff *);
 extern void tcp_xmit_retransmit_queue(struct sock *);
 extern void tcp_simple_retransmit(struct sock *);
@@ -479,8 +479,10 @@ static inline void tcp_fast_path_on(struct tcp_sock *tp)
 	__tcp_fast_path_on(tp, tp->snd_wnd >> tp->rx_opt.snd_wscale);
 }
 
-static inline void tcp_fast_path_check(struct sock *sk, struct tcp_sock *tp)
+static inline void tcp_fast_path_check(struct sock *sk)
 {
+	struct tcp_sock *tp = tcp_sk(sk);
+
 	if (skb_queue_empty(&tp->out_of_order_queue) &&
 	    tp->rcv_wnd &&
 	    atomic_read(&sk->sk_rmem_alloc) < sk->sk_rcvbuf &&
@@ -591,10 +593,10 @@ static inline void tcp_dec_pcount_approx(__u32 *count,
 	}
 }
 
-static inline void tcp_packets_out_inc(struct sock *sk, 
-				       struct tcp_sock *tp,
+static inline void tcp_packets_out_inc(struct sock *sk,
 				       const struct sk_buff *skb)
 {
+	struct tcp_sock *tp = tcp_sk(sk);
 	int orig = tp->packets_out;
 
 	tp->packets_out += tcp_skb_pcount(skb);
@@ -778,18 +780,21 @@ static inline void tcp_minshall_update(struct tcp_sock *tp, int mss,
 		tp->snd_sml = TCP_SKB_CB(skb)->end_seq;
 }
 
-static inline void tcp_check_probe_timer(struct sock *sk, struct tcp_sock *tp)
+static inline void tcp_check_probe_timer(struct sock *sk)
 {
+	struct tcp_sock *tp = tcp_sk(sk);
 	const struct inet_connection_sock *icsk = inet_csk(sk);
+
 	if (!tp->packets_out && !icsk->icsk_pending)
 		inet_csk_reset_xmit_timer(sk, ICSK_TIME_PROBE0,
 					  icsk->icsk_rto, TCP_RTO_MAX);
 }
 
-static inline void tcp_push_pending_frames(struct sock *sk,
-					   struct tcp_sock *tp)
+static inline void tcp_push_pending_frames(struct sock *sk)
 {
-	__tcp_push_pending_frames(sk, tp, tcp_current_mss(sk, 1), tp->nonagle);
+	struct tcp_sock *tp = tcp_sk(sk);
+
+	__tcp_push_pending_frames(sk, tcp_current_mss(sk, 1), tp->nonagle);
 }
 
 static inline void tcp_init_wl(struct tcp_sock *tp, u32 ack, u32 seq)
diff --git a/include/net/tcp_ecn.h b/include/net/tcp_ecn.h
index b5f7c6ac0880..89eb3e05116d 100644
--- a/include/net/tcp_ecn.h
+++ b/include/net/tcp_ecn.h
@@ -27,9 +27,10 @@ static inline void TCP_ECN_send_synack(struct tcp_sock *tp,
 		TCP_SKB_CB(skb)->flags &= ~TCPCB_FLAG_ECE;
 }
 
-static inline void TCP_ECN_send_syn(struct sock *sk, struct tcp_sock *tp,
-				    struct sk_buff *skb)
+static inline void TCP_ECN_send_syn(struct sock *sk, struct sk_buff *skb)
 {
+	struct tcp_sock *tp = tcp_sk(sk);
+
 	tp->ecn_flags = 0;
 	if (sysctl_tcp_ecn) {
 		TCP_SKB_CB(skb)->flags |= TCPCB_FLAG_ECE|TCPCB_FLAG_CWR;
@@ -44,9 +45,11 @@ TCP_ECN_make_synack(struct request_sock *req, struct tcphdr *th)
 		th->ece = 1;
 }
 
-static inline void TCP_ECN_send(struct sock *sk, struct tcp_sock *tp,
-				struct sk_buff *skb, int tcp_header_len)
+static inline void TCP_ECN_send(struct sock *sk, struct sk_buff *skb,
+				int tcp_header_len)
 {
+	struct tcp_sock *tp = tcp_sk(sk);
+
 	if (tp->ecn_flags & TCP_ECN_OK) {
 		/* Not-retransmitted data segment: set ECT and inject CWR. */
 		if (skb->len != tcp_header_len &&
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 99ad52c00c96..2cf9a898ce50 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -460,9 +460,9 @@ static inline int forced_push(struct tcp_sock *tp)
 	return after(tp->write_seq, tp->pushed_seq + (tp->max_window >> 1));
 }
 
-static inline void skb_entail(struct sock *sk, struct tcp_sock *tp,
-			      struct sk_buff *skb)
+static inline void skb_entail(struct sock *sk, struct sk_buff *skb)
 {
+	struct tcp_sock *tp = tcp_sk(sk);
 	struct tcp_skb_cb *tcb = TCP_SKB_CB(skb);
 
 	skb->csum    = 0;
@@ -486,15 +486,17 @@ static inline void tcp_mark_urg(struct tcp_sock *tp, int flags,
 	}
 }
 
-static inline void tcp_push(struct sock *sk, struct tcp_sock *tp, int flags,
-			    int mss_now, int nonagle)
+static inline void tcp_push(struct sock *sk, int flags, int mss_now,
+			    int nonagle)
 {
+	struct tcp_sock *tp = tcp_sk(sk);
+
 	if (tcp_send_head(sk)) {
 		struct sk_buff *skb = tcp_write_queue_tail(sk);
 		if (!(flags & MSG_MORE) || forced_push(tp))
 			tcp_mark_push(tp, skb);
 		tcp_mark_urg(tp, flags, skb);
-		__tcp_push_pending_frames(sk, tp, mss_now,
+		__tcp_push_pending_frames(sk, mss_now,
 					  (flags & MSG_MORE) ? TCP_NAGLE_CORK : nonagle);
 	}
 }
@@ -540,7 +542,7 @@ new_segment:
 			if (!skb)
 				goto wait_for_memory;
 
-			skb_entail(sk, tp, skb);
+			skb_entail(sk, skb);
 			copy = size_goal;
 		}
 
@@ -586,7 +588,7 @@ new_segment:
 
 		if (forced_push(tp)) {
 			tcp_mark_push(tp, skb);
-			__tcp_push_pending_frames(sk, tp, mss_now, TCP_NAGLE_PUSH);
+			__tcp_push_pending_frames(sk, mss_now, TCP_NAGLE_PUSH);
 		} else if (skb == tcp_send_head(sk))
 			tcp_push_one(sk, mss_now);
 		continue;
@@ -595,7 +597,7 @@ wait_for_sndbuf:
 		set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
 wait_for_memory:
 		if (copied)
-			tcp_push(sk, tp, flags & ~MSG_MORE, mss_now, TCP_NAGLE_PUSH);
+			tcp_push(sk, flags & ~MSG_MORE, mss_now, TCP_NAGLE_PUSH);
 
 		if ((err = sk_stream_wait_memory(sk, &timeo)) != 0)
 			goto do_error;
@@ -606,7 +608,7 @@ wait_for_memory:
 
 out:
 	if (copied)
-		tcp_push(sk, tp, flags, mss_now, tp->nonagle);
+		tcp_push(sk, flags, mss_now, tp->nonagle);
 	return copied;
 
 do_error:
@@ -637,8 +639,9 @@ ssize_t tcp_sendpage(struct socket *sock, struct page *page, int offset,
 #define TCP_PAGE(sk)	(sk->sk_sndmsg_page)
 #define TCP_OFF(sk)	(sk->sk_sndmsg_off)
 
-static inline int select_size(struct sock *sk, struct tcp_sock *tp)
+static inline int select_size(struct sock *sk)
 {
+	struct tcp_sock *tp = tcp_sk(sk);
 	int tmp = tp->mss_cache;
 
 	if (sk->sk_route_caps & NETIF_F_SG) {
@@ -714,7 +717,7 @@ new_segment:
 				if (!sk_stream_memory_free(sk))
 					goto wait_for_sndbuf;
 
-				skb = sk_stream_alloc_pskb(sk, select_size(sk, tp),
+				skb = sk_stream_alloc_pskb(sk, select_size(sk),
 							   0, sk->sk_allocation);
 				if (!skb)
 					goto wait_for_memory;
@@ -725,7 +728,7 @@ new_segment:
 				if (sk->sk_route_caps & NETIF_F_ALL_CSUM)
 					skb->ip_summed = CHECKSUM_PARTIAL;
 
-				skb_entail(sk, tp, skb);
+				skb_entail(sk, skb);
 				copy = size_goal;
 			}
 
@@ -830,7 +833,7 @@ new_segment:
 
 			if (forced_push(tp)) {
 				tcp_mark_push(tp, skb);
-				__tcp_push_pending_frames(sk, tp, mss_now, TCP_NAGLE_PUSH);
+				__tcp_push_pending_frames(sk, mss_now, TCP_NAGLE_PUSH);
 			} else if (skb == tcp_send_head(sk))
 				tcp_push_one(sk, mss_now);
 			continue;
@@ -839,7 +842,7 @@ wait_for_sndbuf:
 			set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
 wait_for_memory:
 			if (copied)
-				tcp_push(sk, tp, flags & ~MSG_MORE, mss_now, TCP_NAGLE_PUSH);
+				tcp_push(sk, flags & ~MSG_MORE, mss_now, TCP_NAGLE_PUSH);
 
 			if ((err = sk_stream_wait_memory(sk, &timeo)) != 0)
 				goto do_error;
@@ -851,7 +854,7 @@ wait_for_memory:
 
 out:
 	if (copied)
-		tcp_push(sk, tp, flags, mss_now, tp->nonagle);
+		tcp_push(sk, flags, mss_now, tp->nonagle);
 	TCP_CHECK_TIMER(sk);
 	release_sock(sk);
 	return copied;
@@ -1389,7 +1392,7 @@ do_prequeue:
 skip_copy:
 		if (tp->urg_data && after(tp->copied_seq, tp->urg_seq)) {
 			tp->urg_data = 0;
-			tcp_fast_path_check(sk, tp);
+			tcp_fast_path_check(sk);
 		}
 		if (used + offset < skb->len)
 			continue;
@@ -1830,7 +1833,7 @@ static int do_tcp_setsockopt(struct sock *sk, int level,
 			 * for currently queued segments.
 			 */
 			tp->nonagle |= TCP_NAGLE_OFF|TCP_NAGLE_PUSH;
-			tcp_push_pending_frames(sk, tp);
+			tcp_push_pending_frames(sk);
 		} else {
 			tp->nonagle &= ~TCP_NAGLE_OFF;
 		}
@@ -1854,7 +1857,7 @@ static int do_tcp_setsockopt(struct sock *sk, int level,
 			tp->nonagle &= ~TCP_NAGLE_CORK;
 			if (tp->nonagle&TCP_NAGLE_OFF)
 				tp->nonagle |= TCP_NAGLE_PUSH;
-			tcp_push_pending_frames(sk, tp);
+			tcp_push_pending_frames(sk);
 		}
 		break;
 
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 2fbfc2e4209c..633389390788 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -235,9 +235,9 @@ static void tcp_fixup_sndbuf(struct sock *sk)
  */
 
 /* Slow part of check#2. */
-static int __tcp_grow_window(const struct sock *sk, struct tcp_sock *tp,
-			     const struct sk_buff *skb)
+static int __tcp_grow_window(const struct sock *sk, const struct sk_buff *skb)
 {
+	struct tcp_sock *tp = tcp_sk(sk);
 	/* Optimize this! */
 	int truesize = tcp_win_from_space(skb->truesize)/2;
 	int window = tcp_win_from_space(sysctl_tcp_rmem[2])/2;
@@ -252,9 +252,11 @@ static int __tcp_grow_window(const struct sock *sk, struct tcp_sock *tp,
 	return 0;
 }
 
-static void tcp_grow_window(struct sock *sk, struct tcp_sock *tp,
+static void tcp_grow_window(struct sock *sk,
 			    struct sk_buff *skb)
 {
+	struct tcp_sock *tp = tcp_sk(sk);
+
 	/* Check #1 */
 	if (tp->rcv_ssthresh < tp->window_clamp &&
 	    (int)tp->rcv_ssthresh < tcp_space(sk) &&
@@ -267,7 +269,7 @@ static void tcp_grow_window(struct sock *sk, struct tcp_sock *tp,
 		if (tcp_win_from_space(skb->truesize) <= skb->len)
 			incr = 2*tp->advmss;
 		else
-			incr = __tcp_grow_window(sk, tp, skb);
+			incr = __tcp_grow_window(sk, skb);
 
 		if (incr) {
 			tp->rcv_ssthresh = min(tp->rcv_ssthresh + incr, tp->window_clamp);
@@ -330,8 +332,9 @@ static void tcp_init_buffer_space(struct sock *sk)
 }
 
 /* 5. Recalculate window clamp after socket hit its memory bounds. */
-static void tcp_clamp_window(struct sock *sk, struct tcp_sock *tp)
+static void tcp_clamp_window(struct sock *sk)
 {
+	struct tcp_sock *tp = tcp_sk(sk);
 	struct inet_connection_sock *icsk = inet_csk(sk);
 
 	icsk->icsk_ack.quick = 0;
@@ -503,8 +506,9 @@ new_measure:
  * each ACK we send, he increments snd_cwnd and transmits more of his
  * queue.  -DaveM
  */
-static void tcp_event_data_recv(struct sock *sk, struct tcp_sock *tp, struct sk_buff *skb)
+static void tcp_event_data_recv(struct sock *sk, struct sk_buff *skb)
 {
+	struct tcp_sock *tp = tcp_sk(sk);
 	struct inet_connection_sock *icsk = inet_csk(sk);
 	u32 now;
 
@@ -545,7 +549,7 @@ static void tcp_event_data_recv(struct sock *sk, struct tcp_sock *tp, struct sk_
 	TCP_ECN_check_ce(tp, skb);
 
 	if (skb->len >= 128)
-		tcp_grow_window(sk, tp, skb);
+		tcp_grow_window(sk, skb);
 }
 
 /* Called to compute a smoothed rtt estimate. The data fed to this
@@ -1541,8 +1545,10 @@ static inline int tcp_skb_timedout(struct sock *sk, struct sk_buff *skb)
 	return (tcp_time_stamp - TCP_SKB_CB(skb)->when > inet_csk(sk)->icsk_rto);
 }
 
-static inline int tcp_head_timedout(struct sock *sk, struct tcp_sock *tp)
+static inline int tcp_head_timedout(struct sock *sk)
 {
+	struct tcp_sock *tp = tcp_sk(sk);
+
 	return tp->packets_out &&
 	       tcp_skb_timedout(sk, tcp_write_queue_head(sk));
 }
@@ -1640,8 +1646,9 @@ static inline int tcp_head_timedout(struct sock *sk, struct tcp_sock *tp)
  * Main question: may we further continue forward transmission
  * with the same cwnd?
  */
-static int tcp_time_to_recover(struct sock *sk, struct tcp_sock *tp)
+static int tcp_time_to_recover(struct sock *sk)
 {
+	struct tcp_sock *tp = tcp_sk(sk);
 	__u32 packets_out;
 
 	/* Do not perform any recovery during FRTO algorithm */
@@ -1659,7 +1666,7 @@ static int tcp_time_to_recover(struct sock *sk, struct tcp_sock *tp)
 	/* Trick#3 : when we use RFC2988 timer restart, fast
 	 * retransmit can be triggered by timeout of queue head.
 	 */
-	if (tcp_head_timedout(sk, tp))
+	if (tcp_head_timedout(sk))
 		return 1;
 
 	/* Trick#4: It is still not OK... But will it be useful to delay
@@ -1668,7 +1675,7 @@ static int tcp_time_to_recover(struct sock *sk, struct tcp_sock *tp)
 	packets_out = tp->packets_out;
 	if (packets_out <= tp->reordering &&
 	    tp->sacked_out >= max_t(__u32, packets_out/2, sysctl_tcp_reordering) &&
-	    !tcp_may_send_now(sk, tp)) {
+	    !tcp_may_send_now(sk)) {
 		/* We have nothing to send. This connection is limited
 		 * either by receiver window or by application.
 		 */
@@ -1708,8 +1715,10 @@ static void tcp_add_reno_sack(struct sock *sk)
 
 /* Account for ACK, ACKing some data in Reno Recovery phase. */
 
-static void tcp_remove_reno_sacks(struct sock *sk, struct tcp_sock *tp, int acked)
+static void tcp_remove_reno_sacks(struct sock *sk, int acked)
 {
+	struct tcp_sock *tp = tcp_sk(sk);
+
 	if (acked > 0) {
 		/* One ACK acked hole. The rest eat duplicate ACKs. */
 		if (acked-1 >= tp->sacked_out)
@@ -1728,9 +1737,10 @@ static inline void tcp_reset_reno_sack(struct tcp_sock *tp)
 }
 
 /* Mark head of queue up as lost. */
-static void tcp_mark_head_lost(struct sock *sk, struct tcp_sock *tp,
+static void tcp_mark_head_lost(struct sock *sk,
 			       int packets, u32 high_seq)
 {
+	struct tcp_sock *tp = tcp_sk(sk);
 	struct sk_buff *skb;
 	int cnt;
 
@@ -1771,15 +1781,17 @@ static void tcp_mark_head_lost(struct sock *sk, struct tcp_sock *tp,
 
 /* Account newly detected lost packet(s) */
 
-static void tcp_update_scoreboard(struct sock *sk, struct tcp_sock *tp)
+static void tcp_update_scoreboard(struct sock *sk)
 {
+	struct tcp_sock *tp = tcp_sk(sk);
+
 	if (IsFack(tp)) {
 		int lost = tp->fackets_out - tp->reordering;
 		if (lost <= 0)
 			lost = 1;
-		tcp_mark_head_lost(sk, tp, lost, tp->high_seq);
+		tcp_mark_head_lost(sk, lost, tp->high_seq);
 	} else {
-		tcp_mark_head_lost(sk, tp, 1, tp->high_seq);
+		tcp_mark_head_lost(sk, 1, tp->high_seq);
 	}
 
 	/* New heuristics: it is possible only after we switched
@@ -1787,7 +1799,7 @@ static void tcp_update_scoreboard(struct sock *sk, struct tcp_sock *tp)
 	 * Hence, we can detect timed out packets during fast
 	 * retransmit without falling to slow start.
 	 */
-	if (!IsReno(tp) && tcp_head_timedout(sk, tp)) {
+	if (!IsReno(tp) && tcp_head_timedout(sk)) {
 		struct sk_buff *skb;
 
 		skb = tp->scoreboard_skb_hint ? tp->scoreboard_skb_hint
@@ -1867,9 +1879,11 @@ static inline int tcp_packet_delayed(struct tcp_sock *tp)
 /* Undo procedures. */
 
 #if FASTRETRANS_DEBUG > 1
-static void DBGUNDO(struct sock *sk, struct tcp_sock *tp, const char *msg)
+static void DBGUNDO(struct sock *sk, const char *msg)
 {
+	struct tcp_sock *tp = tcp_sk(sk);
 	struct inet_sock *inet = inet_sk(sk);
+
 	printk(KERN_DEBUG "Undo %s %u.%u.%u.%u/%u c%u l%u ss%u/%u p%u\n",
 	       msg,
 	       NIPQUAD(inet->daddr), ntohs(inet->dport),
@@ -1915,13 +1929,15 @@ static inline int tcp_may_undo(struct tcp_sock *tp)
 }
 
 /* People celebrate: "We love our President!" */
-static int tcp_try_undo_recovery(struct sock *sk, struct tcp_sock *tp)
+static int tcp_try_undo_recovery(struct sock *sk)
 {
+	struct tcp_sock *tp = tcp_sk(sk);
+
 	if (tcp_may_undo(tp)) {
 		/* Happy end! We did not retransmit anything
 		 * or our original transmission succeeded.
 		 */
-		DBGUNDO(sk, tp, inet_csk(sk)->icsk_ca_state == TCP_CA_Loss ? "loss" : "retrans");
+		DBGUNDO(sk, inet_csk(sk)->icsk_ca_state == TCP_CA_Loss ? "loss" : "retrans");
 		tcp_undo_cwr(sk, 1);
 		if (inet_csk(sk)->icsk_ca_state == TCP_CA_Loss)
 			NET_INC_STATS_BH(LINUX_MIB_TCPLOSSUNDO);
@@ -1941,10 +1957,12 @@ static int tcp_try_undo_recovery(struct sock *sk, struct tcp_sock *tp)
 }
 
 /* Try to undo cwnd reduction, because D-SACKs acked all retransmitted data */
-static void tcp_try_undo_dsack(struct sock *sk, struct tcp_sock *tp)
+static void tcp_try_undo_dsack(struct sock *sk)
 {
+	struct tcp_sock *tp = tcp_sk(sk);
+
 	if (tp->undo_marker && !tp->undo_retrans) {
-		DBGUNDO(sk, tp, "D-SACK");
+		DBGUNDO(sk, "D-SACK");
 		tcp_undo_cwr(sk, 1);
 		tp->undo_marker = 0;
 		NET_INC_STATS_BH(LINUX_MIB_TCPDSACKUNDO);
@@ -1953,9 +1971,9 @@ static void tcp_try_undo_dsack(struct sock *sk, struct tcp_sock *tp)
 
 /* Undo during fast recovery after partial ACK. */
 
-static int tcp_try_undo_partial(struct sock *sk, struct tcp_sock *tp,
-				int acked)
+static int tcp_try_undo_partial(struct sock *sk, int acked)
 {
+	struct tcp_sock *tp = tcp_sk(sk);
 	/* Partial ACK arrived. Force Hoe's retransmit. */
 	int failed = IsReno(tp) || tp->fackets_out>tp->reordering;
 
@@ -1968,7 +1986,7 @@ static int tcp_try_undo_partial(struct sock *sk, struct tcp_sock *tp,
 
 		tcp_update_reordering(sk, tcp_fackets_out(tp) + acked, 1);
 
-		DBGUNDO(sk, tp, "Hoe");
+		DBGUNDO(sk, "Hoe");
 		tcp_undo_cwr(sk, 0);
 		NET_INC_STATS_BH(LINUX_MIB_TCPPARTIALUNDO);
 
@@ -1982,8 +2000,10 @@ static int tcp_try_undo_partial(struct sock *sk, struct tcp_sock *tp,
 }
 
 /* Undo during loss recovery after partial ACK. */
-static int tcp_try_undo_loss(struct sock *sk, struct tcp_sock *tp)
+static int tcp_try_undo_loss(struct sock *sk)
 {
+	struct tcp_sock *tp = tcp_sk(sk);
+
 	if (tcp_may_undo(tp)) {
 		struct sk_buff *skb;
 		tcp_for_write_queue(skb, sk) {
@@ -1994,7 +2014,7 @@ static int tcp_try_undo_loss(struct sock *sk, struct tcp_sock *tp)
 
 		clear_all_retrans_hints(tp);
 
-		DBGUNDO(sk, tp, "partial loss");
+		DBGUNDO(sk, "partial loss");
 		tp->lost_out = 0;
 		tp->left_out = tp->sacked_out;
 		tcp_undo_cwr(sk, 1);
@@ -2016,8 +2036,10 @@ static inline void tcp_complete_cwr(struct sock *sk)
 	tcp_ca_event(sk, CA_EVENT_COMPLETE_CWR);
 }
 
-static void tcp_try_to_open(struct sock *sk, struct tcp_sock *tp, int flag)
+static void tcp_try_to_open(struct sock *sk, int flag)
 {
+	struct tcp_sock *tp = tcp_sk(sk);
+
 	tp->left_out = tp->sacked_out;
 
 	if (tp->retrans_out == 0)
@@ -2111,7 +2133,7 @@ tcp_fastretrans_alert(struct sock *sk, u32 prior_snd_una,
 	    before(tp->snd_una, tp->high_seq) &&
 	    icsk->icsk_ca_state != TCP_CA_Open &&
 	    tp->fackets_out > tp->reordering) {
-		tcp_mark_head_lost(sk, tp, tp->fackets_out-tp->reordering, tp->high_seq);
+		tcp_mark_head_lost(sk, tp->fackets_out-tp->reordering, tp->high_seq);
 		NET_INC_STATS_BH(LINUX_MIB_TCPLOSS);
 	}
 
@@ -2127,7 +2149,7 @@ tcp_fastretrans_alert(struct sock *sk, u32 prior_snd_una,
 		switch (icsk->icsk_ca_state) {
 		case TCP_CA_Loss:
 			icsk->icsk_retransmits = 0;
-			if (tcp_try_undo_recovery(sk, tp))
+			if (tcp_try_undo_recovery(sk))
 				return;
 			break;
 
@@ -2141,7 +2163,7 @@ tcp_fastretrans_alert(struct sock *sk, u32 prior_snd_una,
 			break;
 
 		case TCP_CA_Disorder:
-			tcp_try_undo_dsack(sk, tp);
+			tcp_try_undo_dsack(sk);
 			if (!tp->undo_marker ||
 			    /* For SACK case do not Open to allow to undo
 			     * catching for all duplicate ACKs. */
@@ -2154,7 +2176,7 @@ tcp_fastretrans_alert(struct sock *sk, u32 prior_snd_una,
 		case TCP_CA_Recovery:
 			if (IsReno(tp))
 				tcp_reset_reno_sack(tp);
-			if (tcp_try_undo_recovery(sk, tp))
+			if (tcp_try_undo_recovery(sk))
 				return;
 			tcp_complete_cwr(sk);
 			break;
@@ -2170,14 +2192,14 @@ tcp_fastretrans_alert(struct sock *sk, u32 prior_snd_una,
 		} else {
 			int acked = prior_packets - tp->packets_out;
 			if (IsReno(tp))
-				tcp_remove_reno_sacks(sk, tp, acked);
-			is_dupack = tcp_try_undo_partial(sk, tp, acked);
+				tcp_remove_reno_sacks(sk, acked);
+			is_dupack = tcp_try_undo_partial(sk, acked);
 		}
 		break;
 	case TCP_CA_Loss:
 		if (flag&FLAG_DATA_ACKED)
 			icsk->icsk_retransmits = 0;
-		if (!tcp_try_undo_loss(sk, tp)) {
+		if (!tcp_try_undo_loss(sk)) {
 			tcp_moderate_cwnd(tp);
 			tcp_xmit_retransmit_queue(sk);
 			return;
@@ -2194,10 +2216,10 @@ tcp_fastretrans_alert(struct sock *sk, u32 prior_snd_una,
 		}
 
 		if (icsk->icsk_ca_state == TCP_CA_Disorder)
-			tcp_try_undo_dsack(sk, tp);
+			tcp_try_undo_dsack(sk);
 
-		if (!tcp_time_to_recover(sk, tp)) {
-			tcp_try_to_open(sk, tp, flag);
+		if (!tcp_time_to_recover(sk)) {
+			tcp_try_to_open(sk, flag);
 			return;
 		}
 
@@ -2236,8 +2258,8 @@ tcp_fastretrans_alert(struct sock *sk, u32 prior_snd_una,
 		tcp_set_ca_state(sk, TCP_CA_Recovery);
 	}
 
-	if (is_dupack || tcp_head_timedout(sk, tp))
-		tcp_update_scoreboard(sk, tp);
+	if (is_dupack || tcp_head_timedout(sk))
+		tcp_update_scoreboard(sk);
 	tcp_cwnd_down(sk);
 	tcp_xmit_retransmit_queue(sk);
 }
@@ -2313,8 +2335,10 @@ static void tcp_cong_avoid(struct sock *sk, u32 ack, u32 rtt,
  * RFC2988 recommends to restart timer to now+rto.
  */
 
-static void tcp_ack_packets_out(struct sock *sk, struct tcp_sock *tp)
+static void tcp_ack_packets_out(struct sock *sk)
 {
+	struct tcp_sock *tp = tcp_sk(sk);
+
 	if (!tp->packets_out) {
 		inet_csk_clear_xmit_timer(sk, ICSK_TIME_RETRANS);
 	} else {
@@ -2471,7 +2495,7 @@ static int tcp_clean_rtx_queue(struct sock *sk, __s32 *seq_rtt_p)
 
 	if (acked&FLAG_ACKED) {
 		tcp_ack_update_rtt(sk, acked, seq_rtt);
-		tcp_ack_packets_out(sk, tp);
+		tcp_ack_packets_out(sk);
 		if (rtt_sample && !(acked & FLAG_RETRANS_DATA_ACKED))
 			(*rtt_sample)(sk, tcp_usrtt(&tv));
 
@@ -2556,9 +2580,10 @@ static inline int tcp_may_update_window(const struct tcp_sock *tp, const u32 ack
  * Window update algorithm, described in RFC793/RFC1122 (used in linux-2.2
  * and in FreeBSD. NetBSD's one is even worse.) is wrong.
  */
-static int tcp_ack_update_window(struct sock *sk, struct tcp_sock *tp,
-				 struct sk_buff *skb, u32 ack, u32 ack_seq)
+static int tcp_ack_update_window(struct sock *sk, struct sk_buff *skb, u32 ack,
+				 u32 ack_seq)
 {
+	struct tcp_sock *tp = tcp_sk(sk);
 	int flag = 0;
 	u32 nwin = ntohs(tcp_hdr(skb)->window);
 
@@ -2576,7 +2601,7 @@ static int tcp_ack_update_window(struct sock *sk, struct tcp_sock *tp,
 			 * fast path is recovered for sending TCP.
 			 */
 			tp->pred_flags = 0;
-			tcp_fast_path_check(sk, tp);
+			tcp_fast_path_check(sk);
 
 			if (nwin > tp->max_window) {
 				tp->max_window = nwin;
@@ -2762,7 +2787,7 @@ static int tcp_ack(struct sock *sk, struct sk_buff *skb, int flag)
 		else
 			NET_INC_STATS_BH(LINUX_MIB_TCPPUREACKS);
 
-		flag |= tcp_ack_update_window(sk, tp, skb, ack, ack_seq);
+		flag |= tcp_ack_update_window(sk, skb, ack, ack_seq);
 
 		if (TCP_SKB_CB(skb)->sacked)
 			flag |= tcp_sacktag_write_queue(sk, skb, prior_snd_una);
@@ -3426,7 +3451,7 @@ queue_and_out:
 		}
 		tp->rcv_nxt = TCP_SKB_CB(skb)->end_seq;
 		if (skb->len)
-			tcp_event_data_recv(sk, tp, skb);
+			tcp_event_data_recv(sk, skb);
 		if (th->fin)
 			tcp_fin(skb, sk, th);
 
@@ -3443,7 +3468,7 @@ queue_and_out:
 		if (tp->rx_opt.num_sacks)
 			tcp_sack_remove(tp);
 
-		tcp_fast_path_check(sk, tp);
+		tcp_fast_path_check(sk);
 
 		if (eaten > 0)
 			__kfree_skb(skb);
@@ -3734,7 +3759,7 @@ static int tcp_prune_queue(struct sock *sk)
 	NET_INC_STATS_BH(LINUX_MIB_PRUNECALLED);
 
 	if (atomic_read(&sk->sk_rmem_alloc) >= sk->sk_rcvbuf)
-		tcp_clamp_window(sk, tp);
+		tcp_clamp_window(sk);
 	else if (tcp_memory_pressure)
 		tp->rcv_ssthresh = min(tp->rcv_ssthresh, 4U * tp->advmss);
 
@@ -3803,8 +3828,10 @@ void tcp_cwnd_application_limited(struct sock *sk)
 	tp->snd_cwnd_stamp = tcp_time_stamp;
 }
 
-static int tcp_should_expand_sndbuf(struct sock *sk, struct tcp_sock *tp)
+static int tcp_should_expand_sndbuf(struct sock *sk)
 {
+	struct tcp_sock *tp = tcp_sk(sk);
+
 	/* If the user specified a specific send buffer setting, do
 	 * not modify it.
 	 */
@@ -3836,7 +3863,7 @@ static void tcp_new_space(struct sock *sk)
 {
 	struct tcp_sock *tp = tcp_sk(sk);
 
-	if (tcp_should_expand_sndbuf(sk, tp)) {
+	if (tcp_should_expand_sndbuf(sk)) {
 		int sndmem = max_t(u32, tp->rx_opt.mss_clamp, tp->mss_cache) +
 			MAX_TCP_HEADER + 16 + sizeof(struct sk_buff),
 		    demanded = max_t(unsigned int, tp->snd_cwnd,
@@ -3860,9 +3887,9 @@ static void tcp_check_space(struct sock *sk)
 	}
 }
 
-static inline void tcp_data_snd_check(struct sock *sk, struct tcp_sock *tp)
+static inline void tcp_data_snd_check(struct sock *sk)
 {
-	tcp_push_pending_frames(sk, tp);
+	tcp_push_pending_frames(sk);
 	tcp_check_space(sk);
 }
 
@@ -4196,7 +4223,7 @@ int tcp_rcv_established(struct sock *sk, struct sk_buff *skb,
 				 */
 				tcp_ack(sk, skb, 0);
 				__kfree_skb(skb);
-				tcp_data_snd_check(sk, tp);
+				tcp_data_snd_check(sk);
 				return 0;
 			} else { /* Header too small */
 				TCP_INC_STATS_BH(TCP_MIB_INERRS);
@@ -4267,12 +4294,12 @@ int tcp_rcv_established(struct sock *sk, struct sk_buff *skb,
 				tp->rcv_nxt = TCP_SKB_CB(skb)->end_seq;
 			}
 
-			tcp_event_data_recv(sk, tp, skb);
+			tcp_event_data_recv(sk, skb);
 
 			if (TCP_SKB_CB(skb)->ack_seq != tp->snd_una) {
 				/* Well, only one small jumplet in fast path... */
 				tcp_ack(sk, skb, FLAG_DATA);
-				tcp_data_snd_check(sk, tp);
+				tcp_data_snd_check(sk);
 				if (!inet_csk_ack_scheduled(sk))
 					goto no_ack;
 			}
@@ -4355,7 +4382,7 @@ step5:
 	/* step 7: process the segment text */
 	tcp_data_queue(sk, skb);
 
-	tcp_data_snd_check(sk, tp);
+	tcp_data_snd_check(sk);
 	tcp_ack_snd_check(sk);
 	return 0;
 
@@ -4672,7 +4699,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
 		/* Do step6 onward by hand. */
 		tcp_urg(sk, skb, th);
 		__kfree_skb(skb);
-		tcp_data_snd_check(sk, tp);
+		tcp_data_snd_check(sk);
 		return 0;
 	}
 
@@ -4864,7 +4891,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
 
 	/* tcp_data could move socket to TIME-WAIT */
 	if (sk->sk_state != TCP_CLOSE) {
-		tcp_data_snd_check(sk, tp);
+		tcp_data_snd_check(sk);
 		tcp_ack_snd_check(sk);
 	}
 
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 94d9f0c63682..3a60aea744ae 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -62,12 +62,13 @@ int sysctl_tcp_base_mss __read_mostly = 512;
 /* By default, RFC2861 behavior.  */
 int sysctl_tcp_slow_start_after_idle __read_mostly = 1;
 
-static void update_send_head(struct sock *sk, struct tcp_sock *tp,
-			     struct sk_buff *skb)
+static void update_send_head(struct sock *sk, struct sk_buff *skb)
 {
+	struct tcp_sock *tp = tcp_sk(sk);
+
 	tcp_advance_send_head(sk, skb);
 	tp->snd_nxt = TCP_SKB_CB(skb)->end_seq;
-	tcp_packets_out_inc(sk, tp, skb);
+	tcp_packets_out_inc(sk, skb);
 }
 
 /* SND.NXT, if window was not shrunk.
@@ -76,8 +77,10 @@ static void update_send_head(struct sock *sk, struct tcp_sock *tp,
  * Anything in between SND.UNA...SND.UNA+SND.WND also can be already
  * invalid. OK, let's make this for now:
  */
-static inline __u32 tcp_acceptable_seq(struct sock *sk, struct tcp_sock *tp)
+static inline __u32 tcp_acceptable_seq(struct sock *sk)
 {
+	struct tcp_sock *tp = tcp_sk(sk);
+
 	if (!before(tp->snd_una+tp->snd_wnd, tp->snd_nxt))
 		return tp->snd_nxt;
 	else
@@ -516,7 +519,7 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
 					     md5 ? &md5_hash_location :
 #endif
 					     NULL);
-		TCP_ECN_send(sk, tp, skb, tcp_header_size);
+		TCP_ECN_send(sk, skb, tcp_header_size);
 	}
 
 #ifdef CONFIG_TCP_MD5SIG
@@ -927,8 +930,9 @@ unsigned int tcp_current_mss(struct sock *sk, int large_allowed)
 
 /* Congestion window validation. (RFC2861) */
 
-static void tcp_cwnd_validate(struct sock *sk, struct tcp_sock *tp)
+static void tcp_cwnd_validate(struct sock *sk)
 {
+	struct tcp_sock *tp = tcp_sk(sk);
 	__u32 packets_out = tp->packets_out;
 
 	if (packets_out >= tp->snd_cwnd) {
@@ -1076,8 +1080,9 @@ static unsigned int tcp_snd_test(struct sock *sk, struct sk_buff *skb,
 	return cwnd_quota;
 }
 
-int tcp_may_send_now(struct sock *sk, struct tcp_sock *tp)
+int tcp_may_send_now(struct sock *sk)
 {
+	struct tcp_sock *tp = tcp_sk(sk);
 	struct sk_buff *skb = tcp_send_head(sk);
 
 	return (skb &&
@@ -1144,8 +1149,9 @@ static int tso_fragment(struct sock *sk, struct sk_buff *skb, unsigned int len,
  *
  * This algorithm is from John Heffner.
  */
-static int tcp_tso_should_defer(struct sock *sk, struct tcp_sock *tp, struct sk_buff *skb)
+static int tcp_tso_should_defer(struct sock *sk, struct sk_buff *skb)
 {
+	struct tcp_sock *tp = tcp_sk(sk);
 	const struct inet_connection_sock *icsk = inet_csk(sk);
 	u32 send_win, cong_win, limit, in_flight;
 
@@ -1324,7 +1330,7 @@ static int tcp_mtu_probe(struct sock *sk)
 		/* Decrement cwnd here because we are sending
 		* effectively two packets. */
 		tp->snd_cwnd--;
-		update_send_head(sk, tp, nskb);
+		update_send_head(sk, nskb);
 
 		icsk->icsk_mtup.probe_size = tcp_mss_to_mtu(sk, nskb->len);
 		tp->mtu_probe.probe_seq_start = TCP_SKB_CB(nskb)->seq;
@@ -1387,7 +1393,7 @@ static int tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle)
 						      nonagle : TCP_NAGLE_PUSH))))
 				break;
 		} else {
-			if (tcp_tso_should_defer(sk, tp, skb))
+			if (tcp_tso_should_defer(sk, skb))
 				break;
 		}
 
@@ -1416,14 +1422,14 @@ static int tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle)
 		/* Advance the send_head.  This one is sent out.
 		 * This call will increment packets_out.
 		 */
-		update_send_head(sk, tp, skb);
+		update_send_head(sk, skb);
 
 		tcp_minshall_update(tp, mss_now, skb);
 		sent_pkts++;
 	}
 
 	if (likely(sent_pkts)) {
-		tcp_cwnd_validate(sk, tp);
+		tcp_cwnd_validate(sk);
 		return 0;
 	}
 	return !tp->packets_out && tcp_send_head(sk);
@@ -1433,14 +1439,14 @@ static int tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle)
  * TCP_CORK or attempt at coalescing tiny packets.
  * The socket must be locked by the caller.
  */
-void __tcp_push_pending_frames(struct sock *sk, struct tcp_sock *tp,
-			       unsigned int cur_mss, int nonagle)
+void __tcp_push_pending_frames(struct sock *sk, unsigned int cur_mss,
+			       int nonagle)
 {
 	struct sk_buff *skb = tcp_send_head(sk);
 
 	if (skb) {
 		if (tcp_write_xmit(sk, cur_mss, nonagle))
-			tcp_check_probe_timer(sk, tp);
+			tcp_check_probe_timer(sk);
 	}
 }
 
@@ -1484,8 +1490,8 @@ void tcp_push_one(struct sock *sk, unsigned int mss_now)
 		TCP_SKB_CB(skb)->when = tcp_time_stamp;
 
 		if (likely(!tcp_transmit_skb(sk, skb, 1, sk->sk_allocation))) {
-			update_send_head(sk, tp, skb);
-			tcp_cwnd_validate(sk, tp);
+			update_send_head(sk, skb);
+			tcp_cwnd_validate(sk);
 			return;
 		}
 	}
@@ -1933,7 +1939,7 @@ void tcp_xmit_retransmit_queue(struct sock *sk)
 	 * segments to send.
 	 */
 
-	if (tcp_may_send_now(sk, tp))
+	if (tcp_may_send_now(sk))
 		return;
 
 	if (tp->forward_skb_hint) {
@@ -2023,7 +2029,7 @@ void tcp_send_fin(struct sock *sk)
 		TCP_SKB_CB(skb)->end_seq = TCP_SKB_CB(skb)->seq + 1;
 		tcp_queue_skb(sk, skb);
 	}
-	__tcp_push_pending_frames(sk, tp, mss_now, TCP_NAGLE_OFF);
+	__tcp_push_pending_frames(sk, mss_now, TCP_NAGLE_OFF);
 }
 
 /* We get here when a process closes a file descriptor (either due to
@@ -2033,7 +2039,6 @@ void tcp_send_fin(struct sock *sk)
  */
 void tcp_send_active_reset(struct sock *sk, gfp_t priority)
 {
-	struct tcp_sock *tp = tcp_sk(sk);
 	struct sk_buff *skb;
 
 	/* NOTE: No TCP options attached and we never retransmit this. */
@@ -2053,7 +2058,7 @@ void tcp_send_active_reset(struct sock *sk, gfp_t priority)
 	skb_shinfo(skb)->gso_type = 0;
 
 	/* Send it off. */
-	TCP_SKB_CB(skb)->seq = tcp_acceptable_seq(sk, tp);
+	TCP_SKB_CB(skb)->seq = tcp_acceptable_seq(sk);
 	TCP_SKB_CB(skb)->end_seq = TCP_SKB_CB(skb)->seq;
 	TCP_SKB_CB(skb)->when = tcp_time_stamp;
 	if (tcp_transmit_skb(sk, skb, 0, priority))
@@ -2271,7 +2276,7 @@ int tcp_connect(struct sock *sk)
 	skb_reserve(buff, MAX_TCP_HEADER);
 
 	TCP_SKB_CB(buff)->flags = TCPCB_FLAG_SYN;
-	TCP_ECN_send_syn(sk, tp, buff);
+	TCP_ECN_send_syn(sk, buff);
 	TCP_SKB_CB(buff)->sacked = 0;
 	skb_shinfo(buff)->gso_segs = 1;
 	skb_shinfo(buff)->gso_size = 0;
@@ -2363,7 +2368,6 @@ void tcp_send_ack(struct sock *sk)
 {
 	/* If we have been reset, we may not send again. */
 	if (sk->sk_state != TCP_CLOSE) {
-		struct tcp_sock *tp = tcp_sk(sk);
 		struct sk_buff *buff;
 
 		/* We are not putting this on the write queue, so
@@ -2389,7 +2393,7 @@ void tcp_send_ack(struct sock *sk)
 		skb_shinfo(buff)->gso_type = 0;
 
 		/* Send it off, this clears delayed acks for us. */
-		TCP_SKB_CB(buff)->seq = TCP_SKB_CB(buff)->end_seq = tcp_acceptable_seq(sk, tp);
+		TCP_SKB_CB(buff)->seq = TCP_SKB_CB(buff)->end_seq = tcp_acceptable_seq(sk);
 		TCP_SKB_CB(buff)->when = tcp_time_stamp;
 		tcp_transmit_skb(sk, buff, 0, GFP_ATOMIC);
 	}
@@ -2467,7 +2471,7 @@ int tcp_write_wakeup(struct sock *sk)
 			TCP_SKB_CB(skb)->when = tcp_time_stamp;
 			err = tcp_transmit_skb(sk, skb, 1, GFP_ATOMIC);
 			if (!err) {
-				update_send_head(sk, tp, skb);
+				update_send_head(sk, skb);
 			}
 			return err;
 		} else {
-- 
cgit v1.2.3


From 164891aadf1721fca4dce473bb0e0998181537c6 Mon Sep 17 00:00:00 2001
From: Stephen Hemminger <shemminger@linux-foundation.org>
Date: Mon, 23 Apr 2007 22:26:16 -0700
Subject: [TCP]: Congestion control API update.

Do some simple changes to make congestion control API faster/cleaner.
* use ktime_t rather than timeval
* merge rtt sampling into existing ack callback
  this means one indirect call versus two per ack.
* use flags bits to store options/settings

Signed-off-by: Stephen Hemminger <shemminger@linux-foundation.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/skbuff.h  |  5 +++++
 include/net/tcp.h       |  9 +++++----
 net/ipv4/tcp_bic.c      |  2 +-
 net/ipv4/tcp_cong.c     | 14 +++++++-------
 net/ipv4/tcp_cubic.c    |  2 +-
 net/ipv4/tcp_htcp.c     |  2 +-
 net/ipv4/tcp_illinois.c | 16 +++++++---------
 net/ipv4/tcp_input.c    | 25 ++++++++-----------------
 net/ipv4/tcp_lp.c       |  8 +++++---
 net/ipv4/tcp_output.c   |  2 +-
 net/ipv4/tcp_vegas.c    | 10 +++++++---
 net/ipv4/tcp_veno.c     | 10 +++++++---
 net/ipv4/tcp_westwood.c |  2 +-
 net/ipv4/tcp_yeah.c     |  6 ++++--
 net/ipv4/tcp_yeah.h     |  7 +++++--
 15 files changed, 65 insertions(+), 55 deletions(-)

(limited to 'net/ipv4/tcp_input.c')

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 50f6f6a094cf..2694cb3ca763 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -1569,6 +1569,11 @@ static inline void __net_timestamp(struct sk_buff *skb)
 	skb->tstamp = ktime_get_real();
 }
 
+static inline ktime_t net_timedelta(ktime_t t)
+{
+	return ktime_sub(ktime_get_real(), t);
+}
+
 
 extern __sum16 __skb_checksum_complete_head(struct sk_buff *skb, int len);
 extern __sum16 __skb_checksum_complete(struct sk_buff *skb);
diff --git a/include/net/tcp.h b/include/net/tcp.h
index 43910fe3c448..a385797f160a 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -629,9 +629,12 @@ enum tcp_ca_event {
 #define TCP_CA_MAX	128
 #define TCP_CA_BUF_MAX	(TCP_CA_NAME_MAX*TCP_CA_MAX)
 
+#define TCP_CONG_NON_RESTRICTED 0x1
+#define TCP_CONG_RTT_STAMP	0x2
+
 struct tcp_congestion_ops {
 	struct list_head	list;
-	int	non_restricted;
+	unsigned long flags;
 
 	/* initialize private data (optional) */
 	void (*init)(struct sock *sk);
@@ -645,8 +648,6 @@ struct tcp_congestion_ops {
 	/* do new cwnd calculation (required) */
 	void (*cong_avoid)(struct sock *sk, u32 ack,
 			   u32 rtt, u32 in_flight, int good_ack);
-	/* round trip time sample per acked packet (optional) */
-	void (*rtt_sample)(struct sock *sk, u32 usrtt);
 	/* call before changing ca_state (optional) */
 	void (*set_state)(struct sock *sk, u8 new_state);
 	/* call when cwnd event occurs (optional) */
@@ -654,7 +655,7 @@ struct tcp_congestion_ops {
 	/* new value of cwnd after loss (optional) */
 	u32  (*undo_cwnd)(struct sock *sk);
 	/* hook for packet ack accounting (optional) */
-	void (*pkts_acked)(struct sock *sk, u32 num_acked);
+	void (*pkts_acked)(struct sock *sk, u32 num_acked, ktime_t last);
 	/* get info for inet_diag (optional) */
 	void (*get_info)(struct sock *sk, u32 ext, struct sk_buff *skb);
 
diff --git a/net/ipv4/tcp_bic.c b/net/ipv4/tcp_bic.c
index 5730333cd0ac..281c9f913257 100644
--- a/net/ipv4/tcp_bic.c
+++ b/net/ipv4/tcp_bic.c
@@ -206,7 +206,7 @@ static void bictcp_state(struct sock *sk, u8 new_state)
 /* Track delayed acknowledgment ratio using sliding window
  * ratio = (15*ratio + sample) / 16
  */
-static void bictcp_acked(struct sock *sk, u32 cnt)
+static void bictcp_acked(struct sock *sk, u32 cnt, ktime_t last)
 {
 	const struct inet_connection_sock *icsk = inet_csk(sk);
 
diff --git a/net/ipv4/tcp_cong.c b/net/ipv4/tcp_cong.c
index ccd88407e0cd..86b26539e54b 100644
--- a/net/ipv4/tcp_cong.c
+++ b/net/ipv4/tcp_cong.c
@@ -126,7 +126,7 @@ int tcp_set_default_congestion_control(const char *name)
 #endif
 
 	if (ca) {
-		ca->non_restricted = 1;	/* default is always allowed */
+		ca->flags |= TCP_CONG_NON_RESTRICTED;	/* default is always allowed */
 		list_move(&ca->list, &tcp_cong_list);
 		ret = 0;
 	}
@@ -181,7 +181,7 @@ void tcp_get_allowed_congestion_control(char *buf, size_t maxlen)
 	*buf = '\0';
 	rcu_read_lock();
 	list_for_each_entry_rcu(ca, &tcp_cong_list, list) {
-		if (!ca->non_restricted)
+		if (!(ca->flags & TCP_CONG_NON_RESTRICTED))
 			continue;
 		offs += snprintf(buf + offs, maxlen - offs,
 				 "%s%s",
@@ -212,16 +212,16 @@ int tcp_set_allowed_congestion_control(char *val)
 		}
 	}
 
-	/* pass 2 clear */
+	/* pass 2 clear old values */
 	list_for_each_entry_rcu(ca, &tcp_cong_list, list)
-		ca->non_restricted = 0;
+		ca->flags &= ~TCP_CONG_NON_RESTRICTED;
 
 	/* pass 3 mark as allowed */
 	while ((name = strsep(&val, " ")) && *name) {
 		ca = tcp_ca_find(name);
 		WARN_ON(!ca);
 		if (ca)
-			ca->non_restricted = 1;
+			ca->flags |= TCP_CONG_NON_RESTRICTED;
 	}
 out:
 	spin_unlock(&tcp_cong_list_lock);
@@ -256,7 +256,7 @@ int tcp_set_congestion_control(struct sock *sk, const char *name)
 	if (!ca)
 		err = -ENOENT;
 
-	else if (!(ca->non_restricted || capable(CAP_NET_ADMIN)))
+	else if (!((ca->flags & TCP_CONG_NON_RESTRICTED) || capable(CAP_NET_ADMIN)))
 		err = -EPERM;
 
 	else if (!try_module_get(ca->owner))
@@ -371,8 +371,8 @@ u32 tcp_reno_min_cwnd(const struct sock *sk)
 EXPORT_SYMBOL_GPL(tcp_reno_min_cwnd);
 
 struct tcp_congestion_ops tcp_reno = {
+	.flags		= TCP_CONG_NON_RESTRICTED,
 	.name		= "reno",
-	.non_restricted = 1,
 	.owner		= THIS_MODULE,
 	.ssthresh	= tcp_reno_ssthresh,
 	.cong_avoid	= tcp_reno_cong_avoid,
diff --git a/net/ipv4/tcp_cubic.c b/net/ipv4/tcp_cubic.c
index 296845be912b..14224487b16b 100644
--- a/net/ipv4/tcp_cubic.c
+++ b/net/ipv4/tcp_cubic.c
@@ -334,7 +334,7 @@ static void bictcp_state(struct sock *sk, u8 new_state)
 /* Track delayed acknowledgment ratio using sliding window
  * ratio = (15*ratio + sample) / 16
  */
-static void bictcp_acked(struct sock *sk, u32 cnt)
+static void bictcp_acked(struct sock *sk, u32 cnt, ktime_t last)
 {
 	const struct inet_connection_sock *icsk = inet_csk(sk);
 
diff --git a/net/ipv4/tcp_htcp.c b/net/ipv4/tcp_htcp.c
index 1020eb48d8d1..4ba4a7ae0a85 100644
--- a/net/ipv4/tcp_htcp.c
+++ b/net/ipv4/tcp_htcp.c
@@ -98,7 +98,7 @@ static inline void measure_rtt(struct sock *sk)
 	}
 }
 
-static void measure_achieved_throughput(struct sock *sk, u32 pkts_acked)
+static void measure_achieved_throughput(struct sock *sk, u32 pkts_acked, ktime_t last)
 {
 	const struct inet_connection_sock *icsk = inet_csk(sk);
 	const struct tcp_sock *tp = tcp_sk(sk);
diff --git a/net/ipv4/tcp_illinois.c b/net/ipv4/tcp_illinois.c
index ae6298600886..8e3165917f72 100644
--- a/net/ipv4/tcp_illinois.c
+++ b/net/ipv4/tcp_illinois.c
@@ -83,9 +83,14 @@ static void tcp_illinois_init(struct sock *sk)
 }
 
 /* Measure RTT for each ack. */
-static void tcp_illinois_rtt_sample(struct sock *sk, u32 rtt)
+static void tcp_illinois_acked(struct sock *sk, u32 pkts_acked, ktime_t last)
 {
 	struct illinois *ca = inet_csk_ca(sk);
+	u32 rtt;
+
+	ca->acked = pkts_acked;
+
+	rtt = ktime_to_ns(net_timedelta(last)) / NSEC_PER_USEC;
 
 	/* ignore bogus values, this prevents wraparound in alpha math */
 	if (rtt > RTT_MAX)
@@ -103,13 +108,6 @@ static void tcp_illinois_rtt_sample(struct sock *sk, u32 rtt)
 	ca->sum_rtt += rtt;
 }
 
-/* Capture count of packets covered by ack, to adjust for delayed acks */
-static void tcp_illinois_acked(struct sock *sk, u32 pkts_acked)
-{
-	struct illinois *ca = inet_csk_ca(sk);
-	ca->acked = pkts_acked;
-}
-
 /* Maximum queuing delay */
 static inline u32 max_delay(const struct illinois *ca)
 {
@@ -325,12 +323,12 @@ static void tcp_illinois_info(struct sock *sk, u32 ext,
 }
 
 static struct tcp_congestion_ops tcp_illinois = {
+	.flags		= TCP_CONG_RTT_STAMP,
 	.init		= tcp_illinois_init,
 	.ssthresh	= tcp_illinois_ssthresh,
 	.min_cwnd	= tcp_reno_min_cwnd,
 	.cong_avoid	= tcp_illinois_cong_avoid,
 	.set_state	= tcp_illinois_state,
-	.rtt_sample	= tcp_illinois_rtt_sample,
 	.get_info	= tcp_illinois_info,
 	.pkts_acked	= tcp_illinois_acked,
 
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 633389390788..051f0f815f17 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -2402,14 +2402,6 @@ static int tcp_tso_acked(struct sock *sk, struct sk_buff *skb,
 	return acked;
 }
 
-static u32 tcp_usrtt(struct timeval *tv)
-{
-	struct timeval now;
-
-	do_gettimeofday(&now);
-	return (now.tv_sec - tv->tv_sec) * 1000000 + (now.tv_usec - tv->tv_usec);
-}
-
 /* Remove acknowledged frames from the retransmission queue. */
 static int tcp_clean_rtx_queue(struct sock *sk, __s32 *seq_rtt_p)
 {
@@ -2420,9 +2412,7 @@ static int tcp_clean_rtx_queue(struct sock *sk, __s32 *seq_rtt_p)
 	int acked = 0;
 	__s32 seq_rtt = -1;
 	u32 pkts_acked = 0;
-	void (*rtt_sample)(struct sock *sk, u32 usrtt)
-		= icsk->icsk_ca_ops->rtt_sample;
-	struct timeval tv = { .tv_sec = 0, .tv_usec = 0 };
+	ktime_t last_ackt = ktime_set(0,0);
 
 	while ((skb = tcp_write_queue_head(sk)) &&
 	       skb != tcp_send_head(sk)) {
@@ -2471,7 +2461,7 @@ static int tcp_clean_rtx_queue(struct sock *sk, __s32 *seq_rtt_p)
 				seq_rtt = -1;
 			} else if (seq_rtt < 0) {
 				seq_rtt = now - scb->when;
-				skb_get_timestamp(skb, &tv);
+				last_ackt = skb->tstamp;
 			}
 			if (sacked & TCPCB_SACKED_ACKED)
 				tp->sacked_out -= tcp_skb_pcount(skb);
@@ -2484,7 +2474,7 @@ static int tcp_clean_rtx_queue(struct sock *sk, __s32 *seq_rtt_p)
 			}
 		} else if (seq_rtt < 0) {
 			seq_rtt = now - scb->when;
-			skb_get_timestamp(skb, &tv);
+			last_ackt = skb->tstamp;
 		}
 		tcp_dec_pcount_approx(&tp->fackets_out, skb);
 		tcp_packets_out_dec(tp, skb);
@@ -2494,13 +2484,14 @@ static int tcp_clean_rtx_queue(struct sock *sk, __s32 *seq_rtt_p)
 	}
 
 	if (acked&FLAG_ACKED) {
+		const struct tcp_congestion_ops *ca_ops
+			= inet_csk(sk)->icsk_ca_ops;
+
 		tcp_ack_update_rtt(sk, acked, seq_rtt);
 		tcp_ack_packets_out(sk);
-		if (rtt_sample && !(acked & FLAG_RETRANS_DATA_ACKED))
-			(*rtt_sample)(sk, tcp_usrtt(&tv));
 
-		if (icsk->icsk_ca_ops->pkts_acked)
-			icsk->icsk_ca_ops->pkts_acked(sk, pkts_acked);
+		if (ca_ops->pkts_acked)
+			ca_ops->pkts_acked(sk, pkts_acked, last_ackt);
 	}
 
 #if FASTRETRANS_DEBUG > 0
diff --git a/net/ipv4/tcp_lp.c b/net/ipv4/tcp_lp.c
index f0ebaf0e21cb..b4e062ab24a1 100644
--- a/net/ipv4/tcp_lp.c
+++ b/net/ipv4/tcp_lp.c
@@ -218,7 +218,7 @@ static u32 tcp_lp_owd_calculator(struct sock *sk)
  *   3. calc smoothed OWD (SOWD).
  * Most ideas come from the original TCP-LP implementation.
  */
-static void tcp_lp_rtt_sample(struct sock *sk, u32 usrtt)
+static void tcp_lp_rtt_sample(struct sock *sk, u32 rtt)
 {
 	struct lp *lp = inet_csk_ca(sk);
 	s64 mowd = tcp_lp_owd_calculator(sk);
@@ -261,11 +261,13 @@ static void tcp_lp_rtt_sample(struct sock *sk, u32 usrtt)
  * newReno in increase case.
  * We work it out by following the idea from TCP-LP's paper directly
  */
-static void tcp_lp_pkts_acked(struct sock *sk, u32 num_acked)
+static void tcp_lp_pkts_acked(struct sock *sk, u32 num_acked, ktime_t last)
 {
 	struct tcp_sock *tp = tcp_sk(sk);
 	struct lp *lp = inet_csk_ca(sk);
 
+	tcp_lp_rtt_sample(sk,  ktime_to_ns(net_timedelta(last)) / NSEC_PER_USEC);
+
 	/* calc inference */
 	if (tcp_time_stamp > tp->rx_opt.rcv_tsecr)
 		lp->inference = 3 * (tcp_time_stamp - tp->rx_opt.rcv_tsecr);
@@ -312,11 +314,11 @@ static void tcp_lp_pkts_acked(struct sock *sk, u32 num_acked)
 }
 
 static struct tcp_congestion_ops tcp_lp = {
+	.flags = TCP_CONG_RTT_STAMP,
 	.init = tcp_lp_init,
 	.ssthresh = tcp_reno_ssthresh,
 	.cong_avoid = tcp_lp_cong_avoid,
 	.min_cwnd = tcp_reno_min_cwnd,
-	.rtt_sample = tcp_lp_rtt_sample,
 	.pkts_acked = tcp_lp_pkts_acked,
 
 	.owner = THIS_MODULE,
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 3a60aea744ae..e70a6840cb64 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -409,7 +409,7 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
 	/* If congestion control is doing timestamping, we must
 	 * take such a timestamp before we potentially clone/copy.
 	 */
-	if (icsk->icsk_ca_ops->rtt_sample)
+	if (icsk->icsk_ca_ops->flags & TCP_CONG_RTT_STAMP)
 		__net_timestamp(skb);
 
 	if (likely(clone_it)) {
diff --git a/net/ipv4/tcp_vegas.c b/net/ipv4/tcp_vegas.c
index 87e72bef6d08..f4104eeb5f26 100644
--- a/net/ipv4/tcp_vegas.c
+++ b/net/ipv4/tcp_vegas.c
@@ -120,10 +120,13 @@ static void tcp_vegas_init(struct sock *sk)
  *   o min-filter RTT samples from a much longer window (forever for now)
  *     to find the propagation delay (baseRTT)
  */
-static void tcp_vegas_rtt_calc(struct sock *sk, u32 usrtt)
+static void tcp_vegas_pkts_acked(struct sock *sk, u32 cnt, ktime_t last)
 {
 	struct vegas *vegas = inet_csk_ca(sk);
-	u32 vrtt = usrtt + 1; /* Never allow zero rtt or baseRTT */
+	u32 vrtt;
+
+	/* Never allow zero rtt or baseRTT */
+	vrtt = (ktime_to_ns(net_timedelta(last)) / NSEC_PER_USEC) + 1;
 
 	/* Filter to find propagation delay: */
 	if (vrtt < vegas->baseRTT)
@@ -353,11 +356,12 @@ static void tcp_vegas_get_info(struct sock *sk, u32 ext,
 }
 
 static struct tcp_congestion_ops tcp_vegas = {
+	.flags		= TCP_CONG_RTT_STAMP,
 	.init		= tcp_vegas_init,
 	.ssthresh	= tcp_reno_ssthresh,
 	.cong_avoid	= tcp_vegas_cong_avoid,
 	.min_cwnd	= tcp_reno_min_cwnd,
-	.rtt_sample	= tcp_vegas_rtt_calc,
+	.pkts_acked	= tcp_vegas_pkts_acked,
 	.set_state	= tcp_vegas_state,
 	.cwnd_event	= tcp_vegas_cwnd_event,
 	.get_info	= tcp_vegas_get_info,
diff --git a/net/ipv4/tcp_veno.c b/net/ipv4/tcp_veno.c
index ce57bf302f6c..0b50d0607a0e 100644
--- a/net/ipv4/tcp_veno.c
+++ b/net/ipv4/tcp_veno.c
@@ -69,10 +69,13 @@ static void tcp_veno_init(struct sock *sk)
 }
 
 /* Do rtt sampling needed for Veno. */
-static void tcp_veno_rtt_calc(struct sock *sk, u32 usrtt)
+static void tcp_veno_pkts_acked(struct sock *sk, u32 cnt, ktime_t last)
 {
 	struct veno *veno = inet_csk_ca(sk);
-	u32 vrtt = usrtt + 1;	/* Never allow zero rtt or basertt */
+	u32 vrtt;
+
+	/* Never allow zero rtt or baseRTT */
+	vrtt = (ktime_to_ns(net_timedelta(last)) / NSEC_PER_USEC) + 1;
 
 	/* Filter to find propagation delay: */
 	if (vrtt < veno->basertt)
@@ -199,10 +202,11 @@ static u32 tcp_veno_ssthresh(struct sock *sk)
 }
 
 static struct tcp_congestion_ops tcp_veno = {
+	.flags		= TCP_CONG_RTT_STAMP,
 	.init		= tcp_veno_init,
 	.ssthresh	= tcp_veno_ssthresh,
 	.cong_avoid	= tcp_veno_cong_avoid,
-	.rtt_sample	= tcp_veno_rtt_calc,
+	.pkts_acked	= tcp_veno_pkts_acked,
 	.set_state	= tcp_veno_state,
 	.cwnd_event	= tcp_veno_cwnd_event,
 
diff --git a/net/ipv4/tcp_westwood.c b/net/ipv4/tcp_westwood.c
index ae1026a67720..e61e09dd513e 100644
--- a/net/ipv4/tcp_westwood.c
+++ b/net/ipv4/tcp_westwood.c
@@ -100,7 +100,7 @@ static void westwood_filter(struct westwood *w, u32 delta)
  * Called after processing group of packets.
  * but all westwood needs is the last sample of srtt.
  */
-static void tcp_westwood_pkts_acked(struct sock *sk, u32 cnt)
+static void tcp_westwood_pkts_acked(struct sock *sk, u32 cnt, ktime_t last)
 {
 	struct westwood *w = inet_csk_ca(sk);
 	if (cnt > 0)
diff --git a/net/ipv4/tcp_yeah.c b/net/ipv4/tcp_yeah.c
index 46dd1bee583a..81ef02c1649a 100644
--- a/net/ipv4/tcp_yeah.c
+++ b/net/ipv4/tcp_yeah.c
@@ -64,13 +64,15 @@ static void tcp_yeah_init(struct sock *sk)
 }
 
 
-static void tcp_yeah_pkts_acked(struct sock *sk, u32 pkts_acked)
+static void tcp_yeah_pkts_acked(struct sock *sk, u32 pkts_acked, ktime_t last)
 {
 	const struct inet_connection_sock *icsk = inet_csk(sk);
 	struct yeah *yeah = inet_csk_ca(sk);
 
 	if (icsk->icsk_ca_state == TCP_CA_Open)
 		yeah->pkts_acked = pkts_acked;
+
+	tcp_vegas_pkts_acked(sk, pkts_acked, last);
 }
 
 static void tcp_yeah_cong_avoid(struct sock *sk, u32 ack,
@@ -237,11 +239,11 @@ static u32 tcp_yeah_ssthresh(struct sock *sk) {
 }
 
 static struct tcp_congestion_ops tcp_yeah = {
+	.flags		= TCP_CONG_RTT_STAMP,
 	.init		= tcp_yeah_init,
 	.ssthresh	= tcp_yeah_ssthresh,
 	.cong_avoid	= tcp_yeah_cong_avoid,
 	.min_cwnd	= tcp_reno_min_cwnd,
-	.rtt_sample	= tcp_vegas_rtt_calc,
 	.set_state	= tcp_vegas_state,
 	.cwnd_event	= tcp_vegas_cwnd_event,
 	.get_info	= tcp_vegas_get_info,
diff --git a/net/ipv4/tcp_yeah.h b/net/ipv4/tcp_yeah.h
index a62d82038fd0..33ad5385c188 100644
--- a/net/ipv4/tcp_yeah.h
+++ b/net/ipv4/tcp_yeah.h
@@ -81,10 +81,13 @@ static void tcp_vegas_state(struct sock *sk, u8 ca_state)
  *   o min-filter RTT samples from a much longer window (forever for now)
  *     to find the propagation delay (baseRTT)
  */
-static void tcp_vegas_rtt_calc(struct sock *sk, u32 usrtt)
+static void tcp_vegas_pkts_acked(struct sock *sk, u32 cnt, ktime_t last)
 {
 	struct vegas *vegas = inet_csk_ca(sk);
-	u32 vrtt = usrtt + 1; /* Never allow zero rtt or baseRTT */
+	u32 vrtt;
+
+	/* Never allow zero rtt or baseRTT */
+	vrtt = (ktime_to_ns(net_timedelta(last)) / NSEC_PER_USEC) + 1;
 
 	/* Filter to find propagation delay: */
 	if (vrtt < vegas->baseRTT)
-- 
cgit v1.2.3


From 575ee7140dabe9b9c4f66f4f867039b97e548867 Mon Sep 17 00:00:00 2001
From: Ilpo Järvinen <ilpo.jarvinen@helsinki.fi>
Date: Mon, 30 Apr 2007 00:39:55 -0700
Subject: [TCP] FRTO: Delay skb available check until it's mandatory
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

No new data is needed until the first ACK comes, so no need to check
for application limitedness until then.

Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@helsinki.fi>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp_input.c | 19 +++++++++++--------
 1 file changed, 11 insertions(+), 8 deletions(-)

(limited to 'net/ipv4/tcp_input.c')

diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 051f0f815f17..6b669898b197 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -1265,20 +1265,15 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_
 	return flag;
 }
 
-/* F-RTO can only be used if these conditions are satisfied:
- *  - there must be some unsent new data
- *  - the advertised window should allow sending it
- *  - TCP has never retransmitted anything other than head (SACK enhanced
- *    variant from Appendix B of RFC4138 is more robust here)
+/* F-RTO can only be used if TCP has never retransmitted anything other than
+ * head (SACK enhanced variant from Appendix B of RFC4138 is more robust here)
  */
 int tcp_use_frto(struct sock *sk)
 {
 	const struct tcp_sock *tp = tcp_sk(sk);
 	struct sk_buff *skb;
 
-	if (!sysctl_tcp_frto || !tcp_send_head(sk) ||
-		after(TCP_SKB_CB(tcp_send_head(sk))->end_seq,
-		      tp->snd_una + tp->snd_wnd))
+	if (!sysctl_tcp_frto)
 		return 0;
 
 	if (IsSackFrto())
@@ -2710,6 +2705,14 @@ static int tcp_process_frto(struct sock *sk, u32 prior_snd_una, int flag)
 	}
 
 	if (tp->frto_counter == 1) {
+		/* Sending of the next skb must be allowed or no FRTO */
+		if (!tcp_send_head(sk) ||
+		    after(TCP_SKB_CB(tcp_send_head(sk))->end_seq,
+				     tp->snd_una + tp->snd_wnd)) {
+			tcp_enter_frto_loss(sk, tp->frto_counter + 1, flag);
+			return 1;
+		}
+
 		tp->snd_cwnd = tcp_packets_in_flight(tp) + 2;
 		tp->frto_counter = 2;
 		return 1;
-- 
cgit v1.2.3


From d551e4541dd60ae53459f77a971f2d6043431f5f Mon Sep 17 00:00:00 2001
From: Ilpo Järvinen <ilpo.jarvinen@helsinki.fi>
Date: Mon, 30 Apr 2007 00:42:20 -0700
Subject: [TCP] FRTO: RFC4138 allows Nagle override when new data must be sent
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This is a corner case where less than MSS sized new data thingie
is awaiting in the send queue. For F-RTO to work correctly, a
new data segment must be sent at certain point or F-RTO cannot
be used at all. RFC4138 allows overriding of Nagle at that
point.

Implementation uses frto_counter states 2 and 3 to distinguish
when Nagle override is needed.

Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@helsinki.fi>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/tcp.h     |  5 +++++
 net/ipv4/tcp_input.c  | 13 ++++++++-----
 net/ipv4/tcp_output.c |  6 ++++--
 3 files changed, 17 insertions(+), 7 deletions(-)

(limited to 'net/ipv4/tcp_input.c')

diff --git a/include/net/tcp.h b/include/net/tcp.h
index c6ecd455edab..ef8f9d4dae85 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -1199,9 +1199,14 @@ static inline struct sk_buff *tcp_send_head(struct sock *sk)
 
 static inline void tcp_advance_send_head(struct sock *sk, struct sk_buff *skb)
 {
+	struct tcp_sock *tp = tcp_sk(sk);
+
 	sk->sk_send_head = skb->next;
 	if (sk->sk_send_head == (struct sk_buff *)&sk->sk_write_queue)
 		sk->sk_send_head = NULL;
+	/* Don't override Nagle indefinately with F-RTO */
+	if (tp->frto_counter == 2)
+		tp->frto_counter = 3;
 }
 
 static inline void tcp_check_send_head(struct sock *sk, struct sk_buff *skb_unlinked)
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 6b669898b197..7641b2761a14 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -2637,7 +2637,9 @@ static void tcp_undo_spur_to_response(struct sock *sk, int flag)
  *                  algorithm is not part of the F-RTO detection algorithm
  *                  given in RFC4138 but can be selected separately).
  * Otherwise (basically on duplicate ACK), RTO was (likely) caused by a loss
- * and TCP falls back to conventional RTO recovery.
+ * and TCP falls back to conventional RTO recovery. F-RTO allows overriding
+ * of Nagle, this is done using frto_counter states 2 and 3, when a new data
+ * segment of any size sent during F-RTO, state 2 is upgraded to 3.
  *
  * Rationale: if the RTO was spurious, new ACKs should arrive from the
  * original window even after we transmit two new data segments.
@@ -2666,7 +2668,7 @@ static int tcp_process_frto(struct sock *sk, u32 prior_snd_una, int flag)
 		inet_csk(sk)->icsk_retransmits = 0;
 
 	if (!before(tp->snd_una, tp->frto_highmark)) {
-		tcp_enter_frto_loss(sk, tp->frto_counter + 1, flag);
+		tcp_enter_frto_loss(sk, (tp->frto_counter == 1 ? 2 : 3), flag);
 		return 1;
 	}
 
@@ -2692,7 +2694,7 @@ static int tcp_process_frto(struct sock *sk, u32 prior_snd_una, int flag)
 			return 1;
 		}
 
-		if ((tp->frto_counter == 2) &&
+		if ((tp->frto_counter >= 2) &&
 		    (!(flag&FLAG_FORWARD_PROGRESS) ||
 		     ((flag&FLAG_DATA_SACKED) && !(flag&FLAG_ONLY_ORIG_SACKED)))) {
 			/* RFC4138 shortcoming (see comment above) */
@@ -2709,14 +2711,15 @@ static int tcp_process_frto(struct sock *sk, u32 prior_snd_una, int flag)
 		if (!tcp_send_head(sk) ||
 		    after(TCP_SKB_CB(tcp_send_head(sk))->end_seq,
 				     tp->snd_una + tp->snd_wnd)) {
-			tcp_enter_frto_loss(sk, tp->frto_counter + 1, flag);
+			tcp_enter_frto_loss(sk, (tp->frto_counter == 1 ? 2 : 3),
+					    flag);
 			return 1;
 		}
 
 		tp->snd_cwnd = tcp_packets_in_flight(tp) + 2;
 		tp->frto_counter = 2;
 		return 1;
-	} else /* frto_counter == 2 */ {
+	} else {
 		switch (sysctl_tcp_frto_response) {
 		case 2:
 			tcp_undo_spur_to_response(sk, flag);
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index b5fa3c19afee..0faacf9c419d 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -1035,8 +1035,10 @@ static inline int tcp_nagle_test(struct tcp_sock *tp, struct sk_buff *skb,
 	if (nonagle & TCP_NAGLE_PUSH)
 		return 1;
 
-	/* Don't use the nagle rule for urgent data (or for the final FIN).  */
-	if (tp->urg_mode ||
+	/* Don't use the nagle rule for urgent data (or for the final FIN).
+	 * Nagle can be ignored during F-RTO too (see RFC4138).
+	 */
+	if (tp->urg_mode || (tp->frto_counter == 2) ||
 	    (TCP_SKB_CB(skb)->flags & TCPCB_FLAG_FIN))
 		return 1;
 
-- 
cgit v1.2.3