From 540207ae69777b85d167df28f469e77f0fcbb8f9 Mon Sep 17 00:00:00 2001
From: WANG Cong <xiyou.wangcong@gmail.com>
Date: Wed, 15 Apr 2015 11:48:49 -0700
Subject: fou: avoid missing unlock in failure path

Fixes: 7a6c8c34e5b7 ("fou: implement FOU_CMD_GET")
Reported-by: Dan Carpenter <dan.carpenter@oracle.com>
Cc: Dan Carpenter <dan.carpenter@oracle.com>
Signed-off-by: Cong Wang <xiyou.wangcong@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/fou.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/fou.c b/net/ipv4/fou.c
index af150b43b214..34968cd5c146 100644
--- a/net/ipv4/fou.c
+++ b/net/ipv4/fou.c
@@ -711,11 +711,10 @@ static int fou_nl_dump(struct sk_buff *skb, struct netlink_callback *cb)
 				    cb->nlh->nlmsg_seq, NLM_F_MULTI,
 				    skb, FOU_CMD_GET);
 		if (ret)
-			goto done;
+			break;
 	}
 	mutex_unlock(&fn->fou_lock);
 
-done:
 	cb->args[0] = idx;
 	return skb->len;
 }
-- 
cgit v1.2.3


From 5a950ad58d412d76d33f4f4399d69308d511c1a4 Mon Sep 17 00:00:00 2001
From: Wei Yongjun <yongjun_wei@trendmicro.com.cn>
Date: Thu, 16 Apr 2015 21:17:35 +0800
Subject: netns: remove duplicated include from net_namespace.c

Remove duplicated include.

Signed-off-by: Wei Yongjun <yongjun_wei@trendmicro.com.cn>
Acked-by: Nicolas Dichtel <nicolas.dichtel@6wind.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/net_namespace.c | 1 -
 1 file changed, 1 deletion(-)

(limited to 'net')

diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c
index a3abb719221f..78fc04ad36fc 100644
--- a/net/core/net_namespace.c
+++ b/net/core/net_namespace.c
@@ -16,7 +16,6 @@
 #include <linux/export.h>
 #include <linux/user_namespace.h>
 #include <linux/net_namespace.h>
-#include <linux/rtnetlink.h>
 #include <net/sock.h>
 #include <net/netlink.h>
 #include <net/net_namespace.h>
-- 
cgit v1.2.3


From a166151cbe33b53221c24259e4a7201064b3ba79 Mon Sep 17 00:00:00 2001
From: Alexei Starovoitov <ast@plumgrid.com>
Date: Wed, 15 Apr 2015 12:55:45 -0700
Subject: bpf: fix bpf helpers to use skb->mac_header relative offsets

For the short-term solution, lets fix bpf helper functions to use
skb->mac_header relative offsets instead of skb->data in order to
get the same eBPF programs with cls_bpf and act_bpf work on ingress
and egress qdisc path. We need to ensure that mac_header is set
before calling into programs. This is effectively the first option
from below referenced discussion.

More long term solution for LD_ABS|LD_IND instructions will be more
intrusive but also more beneficial than this, and implemented later
as it's too risky at this point in time.

I.e., we plan to look into the option of moving skb_pull() out of
eth_type_trans() and into netif_receive_skb() as has been suggested
as second option. Meanwhile, this solution ensures ingress can be
used with eBPF, too, and that we won't run into ABI troubles later.
For dealing with negative offsets inside eBPF helper functions,
we've implemented bpf_skb_clone_unwritable() to test for unwriteable
headers.

Reference: http://thread.gmane.org/gmane.linux.network/359129/focus=359694
Fixes: 608cd71a9c7c ("tc: bpf: generalize pedit action")
Fixes: 91bc4822c3d6 ("tc: bpf: add checksum helpers")
Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/bpf.h    |  2 +-
 include/uapi/linux/filter.h |  7 +++++--
 net/core/filter.c           | 41 ++++++++++++++++++++++++++++++++---------
 net/sched/act_bpf.c         |  3 +++
 net/sched/cls_bpf.c         |  3 +++
 samples/bpf/tcbpf1_kern.c   | 16 ++++++----------
 6 files changed, 50 insertions(+), 22 deletions(-)

(limited to 'net')

diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 5c1cee11f777..a9ebdf5701e8 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -177,7 +177,7 @@ enum bpf_func_id {
 	/**
 	 * skb_store_bytes(skb, offset, from, len, flags) - store bytes into packet
 	 * @skb: pointer to skb
-	 * @offset: offset within packet from skb->data
+	 * @offset: offset within packet from skb->mac_header
 	 * @from: pointer where to copy bytes from
 	 * @len: number of bytes to store into packet
 	 * @flags: bit 0 - if true, recompute skb->csum
diff --git a/include/uapi/linux/filter.h b/include/uapi/linux/filter.h
index 34c7936ca114..c97340e43dd6 100644
--- a/include/uapi/linux/filter.h
+++ b/include/uapi/linux/filter.h
@@ -79,8 +79,11 @@ struct sock_fprog {	/* Required for SO_ATTACH_FILTER. */
 #define SKF_AD_RANDOM	56
 #define SKF_AD_VLAN_TPID	60
 #define SKF_AD_MAX	64
-#define SKF_NET_OFF   (-0x100000)
-#define SKF_LL_OFF    (-0x200000)
 
+#define SKF_NET_OFF	(-0x100000)
+#define SKF_LL_OFF	(-0x200000)
+
+#define BPF_NET_OFF	SKF_NET_OFF
+#define BPF_LL_OFF	SKF_LL_OFF
 
 #endif /* _UAPI__LINUX_FILTER_H__ */
diff --git a/net/core/filter.c b/net/core/filter.c
index b669e75d2b36..bf831a85c315 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -1175,12 +1175,27 @@ int sk_attach_bpf(u32 ufd, struct sock *sk)
 	return 0;
 }
 
+/**
+ *	bpf_skb_clone_not_writable - is the header of a clone not writable
+ *	@skb: buffer to check
+ *	@len: length up to which to write, can be negative
+ *
+ *	Returns true if modifying the header part of the cloned buffer
+ *	does require the data to be copied. I.e. this version works with
+ *	negative lengths needed for eBPF case!
+ */
+static bool bpf_skb_clone_unwritable(const struct sk_buff *skb, int len)
+{
+	return skb_header_cloned(skb) ||
+	       (int) skb_headroom(skb) + len > skb->hdr_len;
+}
+
 #define BPF_RECOMPUTE_CSUM(flags)	((flags) & 1)
 
 static u64 bpf_skb_store_bytes(u64 r1, u64 r2, u64 r3, u64 r4, u64 flags)
 {
 	struct sk_buff *skb = (struct sk_buff *) (long) r1;
-	unsigned int offset = (unsigned int) r2;
+	int offset = (int) r2;
 	void *from = (void *) (long) r3;
 	unsigned int len = (unsigned int) r4;
 	char buf[16];
@@ -1194,10 +1209,12 @@ static u64 bpf_skb_store_bytes(u64 r1, u64 r2, u64 r3, u64 r4, u64 flags)
 	 *
 	 * so check for invalid 'offset' and too large 'len'
 	 */
-	if (unlikely(offset > 0xffff || len > sizeof(buf)))
+	if (unlikely((u32) offset > 0xffff || len > sizeof(buf)))
 		return -EFAULT;
 
-	if (skb_cloned(skb) && !skb_clone_writable(skb, offset + len))
+	offset -= skb->data - skb_mac_header(skb);
+	if (unlikely(skb_cloned(skb) &&
+		     bpf_skb_clone_unwritable(skb, offset + len)))
 		return -EFAULT;
 
 	ptr = skb_header_pointer(skb, offset, len, buf);
@@ -1232,15 +1249,18 @@ const struct bpf_func_proto bpf_skb_store_bytes_proto = {
 #define BPF_HEADER_FIELD_SIZE(flags)	((flags) & 0x0f)
 #define BPF_IS_PSEUDO_HEADER(flags)	((flags) & 0x10)
 
-static u64 bpf_l3_csum_replace(u64 r1, u64 offset, u64 from, u64 to, u64 flags)
+static u64 bpf_l3_csum_replace(u64 r1, u64 r2, u64 from, u64 to, u64 flags)
 {
 	struct sk_buff *skb = (struct sk_buff *) (long) r1;
+	int offset = (int) r2;
 	__sum16 sum, *ptr;
 
-	if (unlikely(offset > 0xffff))
+	if (unlikely((u32) offset > 0xffff))
 		return -EFAULT;
 
-	if (skb_cloned(skb) && !skb_clone_writable(skb, offset + sizeof(sum)))
+	offset -= skb->data - skb_mac_header(skb);
+	if (unlikely(skb_cloned(skb) &&
+		     bpf_skb_clone_unwritable(skb, offset + sizeof(sum))))
 		return -EFAULT;
 
 	ptr = skb_header_pointer(skb, offset, sizeof(sum), &sum);
@@ -1276,16 +1296,19 @@ const struct bpf_func_proto bpf_l3_csum_replace_proto = {
 	.arg5_type	= ARG_ANYTHING,
 };
 
-static u64 bpf_l4_csum_replace(u64 r1, u64 offset, u64 from, u64 to, u64 flags)
+static u64 bpf_l4_csum_replace(u64 r1, u64 r2, u64 from, u64 to, u64 flags)
 {
 	struct sk_buff *skb = (struct sk_buff *) (long) r1;
 	u32 is_pseudo = BPF_IS_PSEUDO_HEADER(flags);
+	int offset = (int) r2;
 	__sum16 sum, *ptr;
 
-	if (unlikely(offset > 0xffff))
+	if (unlikely((u32) offset > 0xffff))
 		return -EFAULT;
 
-	if (skb_cloned(skb) && !skb_clone_writable(skb, offset + sizeof(sum)))
+	offset -= skb->data - skb_mac_header(skb);
+	if (unlikely(skb_cloned(skb) &&
+		     bpf_skb_clone_unwritable(skb, offset + sizeof(sum))))
 		return -EFAULT;
 
 	ptr = skb_header_pointer(skb, offset, sizeof(sum), &sum);
diff --git a/net/sched/act_bpf.c b/net/sched/act_bpf.c
index 4d2cede17468..dc6a2d324bd8 100644
--- a/net/sched/act_bpf.c
+++ b/net/sched/act_bpf.c
@@ -38,6 +38,9 @@ static int tcf_bpf(struct sk_buff *skb, const struct tc_action *act,
 	struct tcf_bpf *prog = act->priv;
 	int action, filter_res;
 
+	if (unlikely(!skb_mac_header_was_set(skb)))
+		return TC_ACT_UNSPEC;
+
 	spin_lock(&prog->tcf_lock);
 
 	prog->tcf_tm.lastuse = jiffies;
diff --git a/net/sched/cls_bpf.c b/net/sched/cls_bpf.c
index 5c4171c5d2bd..91bd9c19471d 100644
--- a/net/sched/cls_bpf.c
+++ b/net/sched/cls_bpf.c
@@ -66,6 +66,9 @@ static int cls_bpf_classify(struct sk_buff *skb, const struct tcf_proto *tp,
 	struct cls_bpf_prog *prog;
 	int ret = -1;
 
+	if (unlikely(!skb_mac_header_was_set(skb)))
+		return -1;
+
 	/* Needed here for accessing maps. */
 	rcu_read_lock();
 	list_for_each_entry_rcu(prog, &head->plist, link) {
diff --git a/samples/bpf/tcbpf1_kern.c b/samples/bpf/tcbpf1_kern.c
index 7cf3f42a6e39..7c27710f8296 100644
--- a/samples/bpf/tcbpf1_kern.c
+++ b/samples/bpf/tcbpf1_kern.c
@@ -4,6 +4,8 @@
 #include <uapi/linux/ip.h>
 #include <uapi/linux/in.h>
 #include <uapi/linux/tcp.h>
+#include <uapi/linux/filter.h>
+
 #include "bpf_helpers.h"
 
 /* compiler workaround */
@@ -14,18 +16,12 @@ static inline void set_dst_mac(struct __sk_buff *skb, char *mac)
 	bpf_skb_store_bytes(skb, 0, mac, ETH_ALEN, 1);
 }
 
-/* use 1 below for ingress qdisc and 0 for egress */
-#if 0
-#undef ETH_HLEN
-#define ETH_HLEN 0
-#endif
-
 #define IP_CSUM_OFF (ETH_HLEN + offsetof(struct iphdr, check))
 #define TOS_OFF (ETH_HLEN + offsetof(struct iphdr, tos))
 
 static inline void set_ip_tos(struct __sk_buff *skb, __u8 new_tos)
 {
-	__u8 old_tos = load_byte(skb, TOS_OFF);
+	__u8 old_tos = load_byte(skb, BPF_LL_OFF + TOS_OFF);
 
 	bpf_l3_csum_replace(skb, IP_CSUM_OFF, htons(old_tos), htons(new_tos), 2);
 	bpf_skb_store_bytes(skb, TOS_OFF, &new_tos, sizeof(new_tos), 0);
@@ -38,7 +34,7 @@ static inline void set_ip_tos(struct __sk_buff *skb, __u8 new_tos)
 
 static inline void set_tcp_ip_src(struct __sk_buff *skb, __u32 new_ip)
 {
-	__u32 old_ip = _htonl(load_word(skb, IP_SRC_OFF));
+	__u32 old_ip = _htonl(load_word(skb, BPF_LL_OFF + IP_SRC_OFF));
 
 	bpf_l4_csum_replace(skb, TCP_CSUM_OFF, old_ip, new_ip, IS_PSEUDO | sizeof(new_ip));
 	bpf_l3_csum_replace(skb, IP_CSUM_OFF, old_ip, new_ip, sizeof(new_ip));
@@ -48,7 +44,7 @@ static inline void set_tcp_ip_src(struct __sk_buff *skb, __u32 new_ip)
 #define TCP_DPORT_OFF (ETH_HLEN + sizeof(struct iphdr) + offsetof(struct tcphdr, dest))
 static inline void set_tcp_dest_port(struct __sk_buff *skb, __u16 new_port)
 {
-	__u16 old_port = htons(load_half(skb, TCP_DPORT_OFF));
+	__u16 old_port = htons(load_half(skb, BPF_LL_OFF + TCP_DPORT_OFF));
 
 	bpf_l4_csum_replace(skb, TCP_CSUM_OFF, old_port, new_port, sizeof(new_port));
 	bpf_skb_store_bytes(skb, TCP_DPORT_OFF, &new_port, sizeof(new_port), 0);
@@ -57,7 +53,7 @@ static inline void set_tcp_dest_port(struct __sk_buff *skb, __u16 new_port)
 SEC("classifier")
 int bpf_prog1(struct __sk_buff *skb)
 {
-	__u8 proto = load_byte(skb, ETH_HLEN + offsetof(struct iphdr, protocol));
+	__u8 proto = load_byte(skb, BPF_LL_OFF + ETH_HLEN + offsetof(struct iphdr, protocol));
 	long *value;
 
 	if (proto == IPPROTO_TCP) {
-- 
cgit v1.2.3


From 4c0ee414e877b899f7fc80aafb98d9425c02797f Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Thu, 16 Apr 2015 16:12:53 +0800
Subject: Revert "net: Reset secmark when scrubbing packet"

This patch reverts commit b8fb4e0648a2ab3734140342002f68fb0c7d1602
because the secmark must be preserved even when a packet crosses
namespace boundaries.  The reason is that security labels apply to
the system as a whole and is not per-namespace.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/skbuff.c | 1 -
 1 file changed, 1 deletion(-)

(limited to 'net')

diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 3b6e5830256e..f9800f4059b4 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -4133,7 +4133,6 @@ void skb_scrub_packet(struct sk_buff *skb, bool xnet)
 	skb_dst_drop(skb);
 	skb->mark = 0;
 	skb_sender_cpu_clear(skb);
-	skb_init_secmark(skb);
 	secpath_reset(skb);
 	nf_reset(skb);
 	nf_reset_trace(skb);
-- 
cgit v1.2.3


From 213dd74aee765d4e5f3f4b9607fef0cf97faa2af Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Thu, 16 Apr 2015 09:03:27 +0800
Subject: skbuff: Do not scrub skb mark within the same name space
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

On Wed, Apr 15, 2015 at 05:41:26PM +0200, Nicolas Dichtel wrote:
> Le 15/04/2015 15:57, Herbert Xu a écrit :
> >On Wed, Apr 15, 2015 at 06:22:29PM +0800, Herbert Xu wrote:
> [snip]
> >Subject: skbuff: Do not scrub skb mark within the same name space
> >
> >The commit ea23192e8e577dfc51e0f4fc5ca113af334edff9 ("tunnels:
> Maybe add a Fixes tag?
> Fixes: ea23192e8e57 ("tunnels: harmonize cleanup done on skb on rx path")
>
> >harmonize cleanup done on skb on rx path") broke anyone trying to
> >use netfilter marking across IPv4 tunnels.  While most of the
> >fields that are cleared by skb_scrub_packet don't matter, the
> >netfilter mark must be preserved.
> >
> >This patch rearranges skb_scurb_packet to preserve the mark field.
> nit: s/scurb/scrub
>
> Else it's fine for me.

Sure.

PS I used the wrong email for James the first time around.  So
let me repeat the question here.  Should secmark be preserved
or cleared across tunnels within the same name space? In fact,
do our security models even support name spaces?

---8<---
The commit ea23192e8e577dfc51e0f4fc5ca113af334edff9 ("tunnels:
harmonize cleanup done on skb on rx path") broke anyone trying to
use netfilter marking across IPv4 tunnels.  While most of the
fields that are cleared by skb_scrub_packet don't matter, the
netfilter mark must be preserved.

This patch rearranges skb_scrub_packet to preserve the mark field.

Fixes: ea23192e8e57 ("tunnels: harmonize cleanup done on skb on rx path")
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Acked-by: Thomas Graf <tgraf@suug.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/skbuff.c | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

(limited to 'net')

diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index f9800f4059b4..d1967dab9cc6 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -4124,18 +4124,21 @@ EXPORT_SYMBOL(skb_try_coalesce);
  */
 void skb_scrub_packet(struct sk_buff *skb, bool xnet)
 {
-	if (xnet)
-		skb_orphan(skb);
 	skb->tstamp.tv64 = 0;
 	skb->pkt_type = PACKET_HOST;
 	skb->skb_iif = 0;
 	skb->ignore_df = 0;
 	skb_dst_drop(skb);
-	skb->mark = 0;
 	skb_sender_cpu_clear(skb);
 	secpath_reset(skb);
 	nf_reset(skb);
 	nf_reset_trace(skb);
+
+	if (!xnet)
+		return;
+
+	skb_orphan(skb);
+	skb->mark = 0;
 }
 EXPORT_SYMBOL_GPL(skb_scrub_packet);
 
-- 
cgit v1.2.3


From fad9dfefea6405039491e7e4fc21fb6e59e7d26c Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Thu, 16 Apr 2015 16:12:28 -0700
Subject: tcp: tcp_get_info() should fetch socket fields once

tcp_get_info() can be called without holding socket lock,
so any socket fields can change under us.

Use READ_ONCE() to fetch sk_pacing_rate and sk_max_pacing_rate

Fixes: 977cb0ecf82e ("tcp: add pacing_rate information into tcp_info")
Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp.c | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 18e3a12eb1b2..59c8a027721b 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -2595,6 +2595,7 @@ void tcp_get_info(const struct sock *sk, struct tcp_info *info)
 	const struct tcp_sock *tp = tcp_sk(sk);
 	const struct inet_connection_sock *icsk = inet_csk(sk);
 	u32 now = tcp_time_stamp;
+	u32 rate;
 
 	memset(info, 0, sizeof(*info));
 
@@ -2655,10 +2656,11 @@ void tcp_get_info(const struct sock *sk, struct tcp_info *info)
 
 	info->tcpi_total_retrans = tp->total_retrans;
 
-	info->tcpi_pacing_rate = sk->sk_pacing_rate != ~0U ?
-					sk->sk_pacing_rate : ~0ULL;
-	info->tcpi_max_pacing_rate = sk->sk_max_pacing_rate != ~0U ?
-					sk->sk_max_pacing_rate : ~0ULL;
+	rate = READ_ONCE(sk->sk_pacing_rate);
+	info->tcpi_pacing_rate = rate != ~0U ? rate : ~0ULL;
+
+	rate = READ_ONCE(sk->sk_max_pacing_rate);
+	info->tcpi_max_pacing_rate = rate != ~0U ? rate : ~0ULL;
 }
 EXPORT_SYMBOL_GPL(tcp_get_info);
 
-- 
cgit v1.2.3


From 521f1cf1dbb9d5ad858dca5dc75d1b45f64b6589 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Thu, 16 Apr 2015 18:10:35 -0700
Subject: inet_diag: fix access to tcp cc information

Two different problems are fixed here :

1) inet_sk_diag_fill() might be called without socket lock held.
   icsk->icsk_ca_ops can change under us and module be unloaded.
   -> Access to freed memory.
   Fix this using rcu_read_lock() to prevent module unload.

2) Some TCP Congestion Control modules provide information
   but again this is not safe against icsk->icsk_ca_ops
   change and nla_put() errors were ignored. Some sockets
   could not get the additional info if skb was almost full.

Fix this by returning a status from get_info() handlers and
using rcu protection as well.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Acked-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/tcp.h       |  2 +-
 net/ipv4/inet_diag.c    | 28 ++++++++++++++++++++++------
 net/ipv4/tcp_dctcp.c    |  5 +++--
 net/ipv4/tcp_illinois.c |  6 +++---
 net/ipv4/tcp_vegas.c    |  5 +++--
 net/ipv4/tcp_vegas.h    |  2 +-
 net/ipv4/tcp_westwood.c |  6 +++---
 7 files changed, 36 insertions(+), 18 deletions(-)

(limited to 'net')

diff --git a/include/net/tcp.h b/include/net/tcp.h
index 9598871485ce..051dc5c2802d 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -829,7 +829,7 @@ struct tcp_congestion_ops {
 	/* hook for packet ack accounting (optional) */
 	void (*pkts_acked)(struct sock *sk, u32 num_acked, s32 rtt_us);
 	/* get info for inet_diag (optional) */
-	void (*get_info)(struct sock *sk, u32 ext, struct sk_buff *skb);
+	int (*get_info)(struct sock *sk, u32 ext, struct sk_buff *skb);
 
 	char 		name[TCP_CA_NAME_MAX];
 	struct module 	*owner;
diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c
index 70e8b3c308ec..bb77ebdae3b3 100644
--- a/net/ipv4/inet_diag.c
+++ b/net/ipv4/inet_diag.c
@@ -111,6 +111,7 @@ int inet_sk_diag_fill(struct sock *sk, struct inet_connection_sock *icsk,
 		      const struct nlmsghdr *unlh)
 {
 	const struct inet_sock *inet = inet_sk(sk);
+	const struct tcp_congestion_ops *ca_ops;
 	const struct inet_diag_handler *handler;
 	int ext = req->idiag_ext;
 	struct inet_diag_msg *r;
@@ -208,16 +209,31 @@ int inet_sk_diag_fill(struct sock *sk, struct inet_connection_sock *icsk,
 		info = nla_data(attr);
 	}
 
-	if ((ext & (1 << (INET_DIAG_CONG - 1))) && icsk->icsk_ca_ops)
-		if (nla_put_string(skb, INET_DIAG_CONG,
-				   icsk->icsk_ca_ops->name) < 0)
+	if (ext & (1 << (INET_DIAG_CONG - 1))) {
+		int err = 0;
+
+		rcu_read_lock();
+		ca_ops = READ_ONCE(icsk->icsk_ca_ops);
+		if (ca_ops)
+			err = nla_put_string(skb, INET_DIAG_CONG, ca_ops->name);
+		rcu_read_unlock();
+		if (err < 0)
 			goto errout;
+	}
 
 	handler->idiag_get_info(sk, r, info);
 
-	if (sk->sk_state < TCP_TIME_WAIT &&
-	    icsk->icsk_ca_ops && icsk->icsk_ca_ops->get_info)
-		icsk->icsk_ca_ops->get_info(sk, ext, skb);
+	if (sk->sk_state < TCP_TIME_WAIT) {
+		int err = 0;
+
+		rcu_read_lock();
+		ca_ops = READ_ONCE(icsk->icsk_ca_ops);
+		if (ca_ops && ca_ops->get_info)
+			err = ca_ops->get_info(sk, ext, skb);
+		rcu_read_unlock();
+		if (err < 0)
+			goto errout;
+	}
 
 out:
 	nlmsg_end(skb, nlh);
diff --git a/net/ipv4/tcp_dctcp.c b/net/ipv4/tcp_dctcp.c
index b504371af742..4376016f7fa5 100644
--- a/net/ipv4/tcp_dctcp.c
+++ b/net/ipv4/tcp_dctcp.c
@@ -277,7 +277,7 @@ static void dctcp_cwnd_event(struct sock *sk, enum tcp_ca_event ev)
 	}
 }
 
-static void dctcp_get_info(struct sock *sk, u32 ext, struct sk_buff *skb)
+static int dctcp_get_info(struct sock *sk, u32 ext, struct sk_buff *skb)
 {
 	const struct dctcp *ca = inet_csk_ca(sk);
 
@@ -297,8 +297,9 @@ static void dctcp_get_info(struct sock *sk, u32 ext, struct sk_buff *skb)
 			info.dctcp_ab_tot = ca->acked_bytes_total;
 		}
 
-		nla_put(skb, INET_DIAG_DCTCPINFO, sizeof(info), &info);
+		return nla_put(skb, INET_DIAG_DCTCPINFO, sizeof(info), &info);
 	}
+	return 0;
 }
 
 static struct tcp_congestion_ops dctcp __read_mostly = {
diff --git a/net/ipv4/tcp_illinois.c b/net/ipv4/tcp_illinois.c
index 1d5a30a90adf..67476f085e48 100644
--- a/net/ipv4/tcp_illinois.c
+++ b/net/ipv4/tcp_illinois.c
@@ -300,8 +300,7 @@ static u32 tcp_illinois_ssthresh(struct sock *sk)
 }
 
 /* Extract info for Tcp socket info provided via netlink. */
-static void tcp_illinois_info(struct sock *sk, u32 ext,
-			      struct sk_buff *skb)
+static int tcp_illinois_info(struct sock *sk, u32 ext, struct sk_buff *skb)
 {
 	const struct illinois *ca = inet_csk_ca(sk);
 
@@ -318,8 +317,9 @@ static void tcp_illinois_info(struct sock *sk, u32 ext,
 			do_div(t, info.tcpv_rttcnt);
 			info.tcpv_rtt = t;
 		}
-		nla_put(skb, INET_DIAG_VEGASINFO, sizeof(info), &info);
+		return nla_put(skb, INET_DIAG_VEGASINFO, sizeof(info), &info);
 	}
+	return 0;
 }
 
 static struct tcp_congestion_ops tcp_illinois __read_mostly = {
diff --git a/net/ipv4/tcp_vegas.c b/net/ipv4/tcp_vegas.c
index a6afde666ab1..c71a1b8f7bde 100644
--- a/net/ipv4/tcp_vegas.c
+++ b/net/ipv4/tcp_vegas.c
@@ -286,7 +286,7 @@ static void tcp_vegas_cong_avoid(struct sock *sk, u32 ack, u32 acked)
 }
 
 /* Extract info for Tcp socket info provided via netlink. */
-void tcp_vegas_get_info(struct sock *sk, u32 ext, struct sk_buff *skb)
+int tcp_vegas_get_info(struct sock *sk, u32 ext, struct sk_buff *skb)
 {
 	const struct vegas *ca = inet_csk_ca(sk);
 	if (ext & (1 << (INET_DIAG_VEGASINFO - 1))) {
@@ -297,8 +297,9 @@ void tcp_vegas_get_info(struct sock *sk, u32 ext, struct sk_buff *skb)
 			.tcpv_minrtt = ca->minRTT,
 		};
 
-		nla_put(skb, INET_DIAG_VEGASINFO, sizeof(info), &info);
+		return nla_put(skb, INET_DIAG_VEGASINFO, sizeof(info), &info);
 	}
+	return 0;
 }
 EXPORT_SYMBOL_GPL(tcp_vegas_get_info);
 
diff --git a/net/ipv4/tcp_vegas.h b/net/ipv4/tcp_vegas.h
index 0531b99d8637..e8a6b33cc61d 100644
--- a/net/ipv4/tcp_vegas.h
+++ b/net/ipv4/tcp_vegas.h
@@ -19,6 +19,6 @@ void tcp_vegas_init(struct sock *sk);
 void tcp_vegas_state(struct sock *sk, u8 ca_state);
 void tcp_vegas_pkts_acked(struct sock *sk, u32 cnt, s32 rtt_us);
 void tcp_vegas_cwnd_event(struct sock *sk, enum tcp_ca_event event);
-void tcp_vegas_get_info(struct sock *sk, u32 ext, struct sk_buff *skb);
+int tcp_vegas_get_info(struct sock *sk, u32 ext, struct sk_buff *skb);
 
 #endif	/* __TCP_VEGAS_H */
diff --git a/net/ipv4/tcp_westwood.c b/net/ipv4/tcp_westwood.c
index bb63fba47d47..b3c57cceb990 100644
--- a/net/ipv4/tcp_westwood.c
+++ b/net/ipv4/tcp_westwood.c
@@ -256,8 +256,7 @@ static void tcp_westwood_event(struct sock *sk, enum tcp_ca_event event)
 }
 
 /* Extract info for Tcp socket info provided via netlink. */
-static void tcp_westwood_info(struct sock *sk, u32 ext,
-			      struct sk_buff *skb)
+static int tcp_westwood_info(struct sock *sk, u32 ext, struct sk_buff *skb)
 {
 	const struct westwood *ca = inet_csk_ca(sk);
 
@@ -268,8 +267,9 @@ static void tcp_westwood_info(struct sock *sk, u32 ext,
 			.tcpv_minrtt = jiffies_to_usecs(ca->rtt_min),
 		};
 
-		nla_put(skb, INET_DIAG_VEGASINFO, sizeof(info), &info);
+		return nla_put(skb, INET_DIAG_VEGASINFO, sizeof(info), &info);
 	}
+	return 0;
 }
 
 static struct tcp_congestion_ops tcp_westwood __read_mostly = {
-- 
cgit v1.2.3


From f40ae91307c275fc8b17420fa74145e9937c3c0b Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Fri, 17 Apr 2015 13:32:09 +0800
Subject: act_mirred: Fix bogus header when redirecting from VLAN

When you redirect a VLAN device to any device, you end up with
crap in af_packet on the xmit path because hard_header_len is
not equal to skb->mac_len.  So the redirected packet contains
four extra bytes at the start which then gets interpreted as
part of the MAC address.

This patch fixes this by only pushing skb->mac_len.  We also
need to fix ifb because it tries to undo the pushing done by
act_mirred.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Acked-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ifb.c      | 2 +-
 net/sched/act_mirred.c | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/drivers/net/ifb.c b/drivers/net/ifb.c
index 34f846b4bd05..94570aace241 100644
--- a/drivers/net/ifb.c
+++ b/drivers/net/ifb.c
@@ -105,7 +105,7 @@ static void ri_tasklet(unsigned long dev)
 		if (from & AT_EGRESS) {
 			dev_queue_xmit(skb);
 		} else if (from & AT_INGRESS) {
-			skb_pull(skb, skb->dev->hard_header_len);
+			skb_pull(skb, skb->mac_len);
 			netif_receive_skb(skb);
 		} else
 			BUG();
diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c
index 5953517ec059..3f63ceac8e01 100644
--- a/net/sched/act_mirred.c
+++ b/net/sched/act_mirred.c
@@ -157,7 +157,7 @@ static int tcf_mirred(struct sk_buff *skb, const struct tc_action *a,
 
 	if (!(at & AT_EGRESS)) {
 		if (m->tcfm_ok_push)
-			skb_push(skb2, skb2->dev->hard_header_len);
+			skb_push(skb2, skb->mac_len);
 	}
 
 	/* mirror is always swallowed */
-- 
cgit v1.2.3


From 8b86a61da37cbbcf4bd6e87fda494a59b1cf16c4 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Fri, 17 Apr 2015 15:45:04 +0200
Subject: net: remove unused 'dev' argument from netif_needs_gso()

In commit 04ffcb255f22 ("net: Add ndo_gso_check") Tom originally
added the 'dev' argument to be able to call ndo_gso_check().

Then later, when generalizing this in commit 5f35227ea34b
("net: Generalize ndo_gso_check to ndo_features_check")
Jesse removed the call to ndo_gso_check() in netif_needs_gso()
by calling the new ndo_features_check() in a different place.
This made the 'dev' argument unused.

Remove the unused argument and go back to the code as before.

Cc: Tom Herbert <therbert@google.com>
Cc: Jesse Gross <jesse@nicira.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/macvtap.c      | 2 +-
 drivers/net/xen-netfront.c | 2 +-
 include/linux/netdevice.h  | 2 +-
 net/core/dev.c             | 2 +-
 4 files changed, 4 insertions(+), 4 deletions(-)

(limited to 'net')

diff --git a/drivers/net/macvtap.c b/drivers/net/macvtap.c
index 8362aef0c15e..58c6ba5746d5 100644
--- a/drivers/net/macvtap.c
+++ b/drivers/net/macvtap.c
@@ -313,7 +313,7 @@ static rx_handler_result_t macvtap_handle_frame(struct sk_buff **pskb)
 	 */
 	if (q->flags & IFF_VNET_HDR)
 		features |= vlan->tap_features;
-	if (netif_needs_gso(dev, skb, features)) {
+	if (netif_needs_gso(skb, features)) {
 		struct sk_buff *segs = __skb_gso_segment(skb, features, false);
 
 		if (IS_ERR(segs))
diff --git a/drivers/net/xen-netfront.c b/drivers/net/xen-netfront.c
index 720aaf6313d2..8dcf31063e9a 100644
--- a/drivers/net/xen-netfront.c
+++ b/drivers/net/xen-netfront.c
@@ -560,7 +560,7 @@ static int xennet_start_xmit(struct sk_buff *skb, struct net_device *dev)
 
 	if (unlikely(!netif_carrier_ok(dev) ||
 		     (slots > 1 && !xennet_can_sg(dev)) ||
-		     netif_needs_gso(dev, skb, netif_skb_features(skb)))) {
+		     netif_needs_gso(skb, netif_skb_features(skb)))) {
 		spin_unlock_irqrestore(&queue->tx_lock, flags);
 		goto drop;
 	}
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index b5679aed660b..bcbde799ec69 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -3713,7 +3713,7 @@ static inline bool skb_gso_ok(struct sk_buff *skb, netdev_features_t features)
 	       (!skb_has_frag_list(skb) || (features & NETIF_F_FRAGLIST));
 }
 
-static inline bool netif_needs_gso(struct net_device *dev, struct sk_buff *skb,
+static inline bool netif_needs_gso(struct sk_buff *skb,
 				   netdev_features_t features)
 {
 	return skb_is_gso(skb) && (!skb_gso_ok(skb, features) ||
diff --git a/net/core/dev.c b/net/core/dev.c
index af4a1b0adc10..1796cef55ab5 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -2713,7 +2713,7 @@ static struct sk_buff *validate_xmit_skb(struct sk_buff *skb, struct net_device
 	if (unlikely(!skb))
 		goto out_null;
 
-	if (netif_needs_gso(dev, skb, features)) {
+	if (netif_needs_gso(skb, features)) {
 		struct sk_buff *segs;
 
 		segs = skb_gso_segment(skb, features);
-- 
cgit v1.2.3


From e3122b7fae7b4e3d1d49fa84f6515bcbe6cbc6fc Mon Sep 17 00:00:00 2001
From: Vivien Didelot <vivien.didelot@savoirfairelinux.com>
Date: Fri, 17 Apr 2015 15:12:25 -0400
Subject: net: dsa: use DEVICE_ATTR_RW to declare temp1_max

Since commit da4759c (sysfs: Use only return value from is_visible for
the file mode), it is possible to reduce the permissions of a file.

So declare temp1_max with the DEVICE_ATTR_RW macro and remove the write
permission in dsa_hwmon_attrs_visible if set_temp_limit isn't provided.

Signed-off-by: Vivien Didelot <vivien.didelot@savoirfairelinux.com>
Reviewed-by: Guenter Roeck <linux@roeck-us.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/dsa/dsa.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'net')

diff --git a/net/dsa/dsa.c b/net/dsa/dsa.c
index 5eaadabe23a1..079a224471e7 100644
--- a/net/dsa/dsa.c
+++ b/net/dsa/dsa.c
@@ -124,7 +124,7 @@ static ssize_t temp1_max_store(struct device *dev,
 
 	return count;
 }
-static DEVICE_ATTR(temp1_max, S_IRUGO, temp1_max_show, temp1_max_store);
+static DEVICE_ATTR_RW(temp1_max);
 
 static ssize_t temp1_max_alarm_show(struct device *dev,
 				    struct device_attribute *attr, char *buf)
@@ -159,8 +159,8 @@ static umode_t dsa_hwmon_attrs_visible(struct kobject *kobj,
 	if (index == 1) {
 		if (!drv->get_temp_limit)
 			mode = 0;
-		else if (drv->set_temp_limit)
-			mode |= S_IWUSR;
+		else if (!drv->set_temp_limit)
+			mode &= ~S_IWUSR;
 	} else if (index == 2 && !drv->get_temp_alarm) {
 		mode = 0;
 	}
-- 
cgit v1.2.3