From 7eced5ab5a7366ee7ca5360b3eca9d220c2b2887 Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Sun, 3 Jun 2018 12:06:57 +0200
Subject: netfilter: nf_tables: add NFT_LOGLEVEL_* enumeration and use it

This is internal, not exposed through uapi, and although it maps with
userspace LOG_*, with the introduction of LOGLEVEL_AUDIT we are
incurring in namespace pollution.

This patch adds the NFT_LOGLEVEL_ enumeration and use it from nft_log.

Fixes: 1a893b44de45 ("netfilter: nf_tables: Add audit support to log statement")
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
Acked-by: Phil Sutter <phil@nwl.cc>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/netfilter/nft_log.c | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

(limited to 'net')

diff --git a/net/netfilter/nft_log.c b/net/netfilter/nft_log.c
index 7eef1cffbf1b..655187bed5d8 100644
--- a/net/netfilter/nft_log.c
+++ b/net/netfilter/nft_log.c
@@ -111,7 +111,7 @@ static void nft_log_eval(const struct nft_expr *expr,
 	const struct nft_log *priv = nft_expr_priv(expr);
 
 	if (priv->loginfo.type == NF_LOG_TYPE_LOG &&
-	    priv->loginfo.u.log.level == LOGLEVEL_AUDIT) {
+	    priv->loginfo.u.log.level == NFT_LOGLEVEL_AUDIT) {
 		nft_log_eval_audit(pkt);
 		return;
 	}
@@ -166,9 +166,9 @@ static int nft_log_init(const struct nft_ctx *ctx,
 			li->u.log.level =
 				ntohl(nla_get_be32(tb[NFTA_LOG_LEVEL]));
 		} else {
-			li->u.log.level = LOGLEVEL_WARNING;
+			li->u.log.level = NFT_LOGLEVEL_WARNING;
 		}
-		if (li->u.log.level > LOGLEVEL_AUDIT) {
+		if (li->u.log.level > NFT_LOGLEVEL_AUDIT) {
 			err = -EINVAL;
 			goto err1;
 		}
@@ -196,7 +196,7 @@ static int nft_log_init(const struct nft_ctx *ctx,
 		break;
 	}
 
-	if (li->u.log.level == LOGLEVEL_AUDIT)
+	if (li->u.log.level == NFT_LOGLEVEL_AUDIT)
 		return 0;
 
 	err = nf_logger_find_get(ctx->family, li->type);
@@ -220,7 +220,7 @@ static void nft_log_destroy(const struct nft_ctx *ctx,
 	if (priv->prefix != nft_log_null_prefix)
 		kfree(priv->prefix);
 
-	if (li->u.log.level == LOGLEVEL_AUDIT)
+	if (li->u.log.level == NFT_LOGLEVEL_AUDIT)
 		return;
 
 	nf_logger_put(ctx->family, li->type);
-- 
cgit v1.2.3


From fd3a88625844907151737fc3b4201676effa6d27 Mon Sep 17 00:00:00 2001
From: Willem de Bruijn <willemb@google.com>
Date: Wed, 6 Jun 2018 11:23:01 -0400
Subject: net: in virtio_net_hdr only add VLAN_HLEN to csum_start if payload
 holds vlan

Tun, tap, virtio, packet and uml vector all use struct virtio_net_hdr
to communicate packet metadata to userspace.

For skbuffs with vlan, the first two return the packet as it may have
existed on the wire, inserting the VLAN tag in the user buffer.  Then
virtio_net_hdr.csum_start needs to be adjusted by VLAN_HLEN bytes.

Commit f09e2249c4f5 ("macvtap: restore vlan header on user read")
added this feature to macvtap. Commit 3ce9b20f1971 ("macvtap: Fix
csum_start when VLAN tags are present") then fixed up csum_start.

Virtio, packet and uml do not insert the vlan header in the user
buffer.

When introducing virtio_net_hdr_from_skb to deduplicate filling in
the virtio_net_hdr, the variant from macvtap which adds VLAN_HLEN was
applied uniformly, breaking csum offset for packets with vlan on
virtio and packet.

Make insertion of VLAN_HLEN optional. Convert the callers to pass it
when needed.

Fixes: e858fae2b0b8f4 ("virtio_net: use common code for virtio_net_hdr and skb GSO conversion")
Fixes: 1276f24eeef2 ("packet: use common code for virtio_net_hdr and skb GSO conversion")
Signed-off-by: Willem de Bruijn <willemb@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 arch/um/drivers/vector_transports.c |  3 ++-
 drivers/net/tap.c                   |  5 ++++-
 drivers/net/tun.c                   |  3 ++-
 drivers/net/virtio_net.c            |  3 ++-
 include/linux/virtio_net.h          | 11 ++++-------
 net/packet/af_packet.c              |  4 ++--
 6 files changed, 16 insertions(+), 13 deletions(-)

(limited to 'net')

diff --git a/arch/um/drivers/vector_transports.c b/arch/um/drivers/vector_transports.c
index 9065047f844b..77e4ebc206ae 100644
--- a/arch/um/drivers/vector_transports.c
+++ b/arch/um/drivers/vector_transports.c
@@ -120,7 +120,8 @@ static int raw_form_header(uint8_t *header,
 		skb,
 		vheader,
 		virtio_legacy_is_little_endian(),
-		false
+		false,
+		0
 	);
 
 	return 0;
diff --git a/drivers/net/tap.c b/drivers/net/tap.c
index 9b6cb780affe..f0f7cd977667 100644
--- a/drivers/net/tap.c
+++ b/drivers/net/tap.c
@@ -774,13 +774,16 @@ static ssize_t tap_put_user(struct tap_queue *q,
 	int total;
 
 	if (q->flags & IFF_VNET_HDR) {
+		int vlan_hlen = skb_vlan_tag_present(skb) ? VLAN_HLEN : 0;
 		struct virtio_net_hdr vnet_hdr;
+
 		vnet_hdr_len = READ_ONCE(q->vnet_hdr_sz);
 		if (iov_iter_count(iter) < vnet_hdr_len)
 			return -EINVAL;
 
 		if (virtio_net_hdr_from_skb(skb, &vnet_hdr,
-					    tap_is_little_endian(q), true))
+					    tap_is_little_endian(q), true,
+					    vlan_hlen))
 			BUG();
 
 		if (copy_to_iter(&vnet_hdr, sizeof(vnet_hdr), iter) !=
diff --git a/drivers/net/tun.c b/drivers/net/tun.c
index 85e14adf5207..a192a017cc68 100644
--- a/drivers/net/tun.c
+++ b/drivers/net/tun.c
@@ -2089,7 +2089,8 @@ static ssize_t tun_put_user(struct tun_struct *tun,
 			return -EINVAL;
 
 		if (virtio_net_hdr_from_skb(skb, &gso,
-					    tun_is_little_endian(tun), true)) {
+					    tun_is_little_endian(tun), true,
+					    vlan_hlen)) {
 			struct skb_shared_info *sinfo = skb_shinfo(skb);
 			pr_err("unexpected GSO type: "
 			       "0x%x, gso_size %d, hdr_len %d\n",
diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
index 2aaa18ec7d46..1619ee3070b6 100644
--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c
@@ -1411,7 +1411,8 @@ static int xmit_skb(struct send_queue *sq, struct sk_buff *skb)
 		hdr = skb_vnet_hdr(skb);
 
 	if (virtio_net_hdr_from_skb(skb, &hdr->hdr,
-				    virtio_is_little_endian(vi->vdev), false))
+				    virtio_is_little_endian(vi->vdev), false,
+				    0))
 		BUG();
 
 	if (vi->mergeable_rx_bufs)
diff --git a/include/linux/virtio_net.h b/include/linux/virtio_net.h
index f144216febc6..9397628a1967 100644
--- a/include/linux/virtio_net.h
+++ b/include/linux/virtio_net.h
@@ -58,7 +58,8 @@ static inline int virtio_net_hdr_to_skb(struct sk_buff *skb,
 static inline int virtio_net_hdr_from_skb(const struct sk_buff *skb,
 					  struct virtio_net_hdr *hdr,
 					  bool little_endian,
-					  bool has_data_valid)
+					  bool has_data_valid,
+					  int vlan_hlen)
 {
 	memset(hdr, 0, sizeof(*hdr));   /* no info leak */
 
@@ -83,12 +84,8 @@ static inline int virtio_net_hdr_from_skb(const struct sk_buff *skb,
 
 	if (skb->ip_summed == CHECKSUM_PARTIAL) {
 		hdr->flags = VIRTIO_NET_HDR_F_NEEDS_CSUM;
-		if (skb_vlan_tag_present(skb))
-			hdr->csum_start = __cpu_to_virtio16(little_endian,
-				skb_checksum_start_offset(skb) + VLAN_HLEN);
-		else
-			hdr->csum_start = __cpu_to_virtio16(little_endian,
-				skb_checksum_start_offset(skb));
+		hdr->csum_start = __cpu_to_virtio16(little_endian,
+			skb_checksum_start_offset(skb) + vlan_hlen);
 		hdr->csum_offset = __cpu_to_virtio16(little_endian,
 				skb->csum_offset);
 	} else if (has_data_valid &&
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index 54ce66f68482..ee018564b2b4 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -2005,7 +2005,7 @@ static int packet_rcv_vnet(struct msghdr *msg, const struct sk_buff *skb,
 		return -EINVAL;
 	*len -= sizeof(vnet_hdr);
 
-	if (virtio_net_hdr_from_skb(skb, &vnet_hdr, vio_le(), true))
+	if (virtio_net_hdr_from_skb(skb, &vnet_hdr, vio_le(), true, 0))
 		return -EINVAL;
 
 	return memcpy_to_msg(msg, (void *)&vnet_hdr, sizeof(vnet_hdr));
@@ -2272,7 +2272,7 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev,
 	if (do_vnet) {
 		if (virtio_net_hdr_from_skb(skb, h.raw + macoff -
 					    sizeof(struct virtio_net_hdr),
-					    vio_le(), true)) {
+					    vio_le(), true, 0)) {
 			spin_lock(&sk->sk_receive_queue.lock);
 			goto drop_n_account;
 		}
-- 
cgit v1.2.3


From 000ade8016400d93b4d7c89970d96b8c14773d45 Mon Sep 17 00:00:00 2001
From: Sultan Alsawaf <sultanxda@gmail.com>
Date: Wed, 6 Jun 2018 15:56:54 -0700
Subject: ip_tunnel: Fix name string concatenate in __ip_tunnel_create()

By passing a limit of 2 bytes to strncat, strncat is limited to writing
fewer bytes than what it's supposed to append to the name here.

Since the bounds are checked on the line above this, just remove the string
bounds checks entirely since they're unneeded.

Signed-off-by: Sultan Alsawaf <sultanxda@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/ip_tunnel.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c
index 38d906baf1df..c4f5602308ed 100644
--- a/net/ipv4/ip_tunnel.c
+++ b/net/ipv4/ip_tunnel.c
@@ -261,8 +261,8 @@ static struct net_device *__ip_tunnel_create(struct net *net,
 	} else {
 		if (strlen(ops->kind) > (IFNAMSIZ - 3))
 			goto failed;
-		strlcpy(name, ops->kind, IFNAMSIZ);
-		strncat(name, "%d", 2);
+		strcpy(name, ops->kind);
+		strcat(name, "%d");
 	}
 
 	ASSERT_RTNL();
-- 
cgit v1.2.3


From 8d97ca6b6755bf7ef57d323642ca9ee80d689782 Mon Sep 17 00:00:00 2001
From: Alexei Starovoitov <ast@fb.com>
Date: Thu, 7 Jun 2018 10:29:29 -0700
Subject: bpfilter: fix OUTPUT_FORMAT

CONFIG_OUTPUT_FORMAT is x86 only macro.
Used objdump to extract elf file format.

Fixes: d2ba09c17a06 ("net: add skeleton of bpfilter kernel module")
Reported-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/bpfilter/Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/bpfilter/Makefile b/net/bpfilter/Makefile
index aafa72001fcd..e0bbe7583e58 100644
--- a/net/bpfilter/Makefile
+++ b/net/bpfilter/Makefile
@@ -21,7 +21,7 @@ endif
 # which bpfilter_kern.c passes further into umh blob loader at run-time
 quiet_cmd_copy_umh = GEN $@
       cmd_copy_umh = echo ':' > $(obj)/.bpfilter_umh.o.cmd; \
-      $(OBJCOPY) -I binary -O $(CONFIG_OUTPUT_FORMAT) \
+      $(OBJCOPY) -I binary -O `$(OBJDUMP) -f $<|grep format|cut -d' ' -f8` \
       -B `$(OBJDUMP) -f $<|grep architecture|cut -d, -f1|cut -d' ' -f2` \
       --rename-section .data=.init.rodata $< $@
 
-- 
cgit v1.2.3


From a5a16e43529b5040760ebf9bd9b056dd34861f93 Mon Sep 17 00:00:00 2001
From: Geert Uytterhoeven <geert@linux-m68k.org>
Date: Thu, 7 Jun 2018 15:37:34 +0200
Subject: xsk: Fix umem fill/completion queue mmap on 32-bit
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

With gcc-4.1.2 on 32-bit:

    net/xdp/xsk.c:663: warning: integer constant is too large for ‘long’ type
    net/xdp/xsk.c:665: warning: integer constant is too large for ‘long’ type

Add the missing "ULL" suffixes to the large XDP_UMEM_PGOFF_*_RING values
to fix this.

    net/xdp/xsk.c:663: warning: comparison is always false due to limited range of data type
    net/xdp/xsk.c:665: warning: comparison is always false due to limited range of data type

"unsigned long" is 32-bit on 32-bit systems, hence the offset is
truncated, and can never be equal to any of the XDP_UMEM_PGOFF_*_RING
values.  Use loff_t (and the required cast) to fix this.

Fixes: 423f38329d267969 ("xsk: add umem fill queue support and mmap")
Fixes: fe2308328cd2f26e ("xsk: add umem completion queue support and mmap")
Signed-off-by: Geert Uytterhoeven <geert@linux-m68k.org>
Acked-by: Björn Töpel <bjorn.topel@intel.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 include/uapi/linux/if_xdp.h | 4 ++--
 net/xdp/xsk.c               | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

(limited to 'net')

diff --git a/include/uapi/linux/if_xdp.h b/include/uapi/linux/if_xdp.h
index 1fa0e977ea8d..caed8b1614ff 100644
--- a/include/uapi/linux/if_xdp.h
+++ b/include/uapi/linux/if_xdp.h
@@ -63,8 +63,8 @@ struct xdp_statistics {
 /* Pgoff for mmaping the rings */
 #define XDP_PGOFF_RX_RING			  0
 #define XDP_PGOFF_TX_RING		 0x80000000
-#define XDP_UMEM_PGOFF_FILL_RING	0x100000000
-#define XDP_UMEM_PGOFF_COMPLETION_RING	0x180000000
+#define XDP_UMEM_PGOFF_FILL_RING	0x100000000ULL
+#define XDP_UMEM_PGOFF_COMPLETION_RING	0x180000000ULL
 
 /* Rx/Tx descriptor */
 struct xdp_desc {
diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c
index c6ed2454f7ce..36919a254ba3 100644
--- a/net/xdp/xsk.c
+++ b/net/xdp/xsk.c
@@ -643,7 +643,7 @@ static int xsk_getsockopt(struct socket *sock, int level, int optname,
 static int xsk_mmap(struct file *file, struct socket *sock,
 		    struct vm_area_struct *vma)
 {
-	unsigned long offset = vma->vm_pgoff << PAGE_SHIFT;
+	loff_t offset = (loff_t)vma->vm_pgoff << PAGE_SHIFT;
 	unsigned long size = vma->vm_end - vma->vm_start;
 	struct xdp_sock *xs = xdp_sk(sock->sk);
 	struct xsk_queue *q = NULL;
-- 
cgit v1.2.3


From c09290c5637692a9bfe7740e4c5e693efff12810 Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <daniel@iogearbox.net>
Date: Fri, 8 Jun 2018 00:06:01 +0200
Subject: bpf, xdp: fix crash in xdp_umem_unaccount_pages

syzkaller was able to trigger the following panic for AF_XDP:

  BUG: KASAN: null-ptr-deref in atomic64_sub include/asm-generic/atomic-instrumented.h:144 [inline]
  BUG: KASAN: null-ptr-deref in atomic_long_sub include/asm-generic/atomic-long.h:199 [inline]
  BUG: KASAN: null-ptr-deref in xdp_umem_unaccount_pages.isra.4+0x3d/0x80 net/xdp/xdp_umem.c:135
  Write of size 8 at addr 0000000000000060 by task syz-executor246/4527

  CPU: 1 PID: 4527 Comm: syz-executor246 Not tainted 4.17.0+ #89
  Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011
  Call Trace:
   __dump_stack lib/dump_stack.c:77 [inline]
   dump_stack+0x1b9/0x294 lib/dump_stack.c:113
   kasan_report_error mm/kasan/report.c:352 [inline]
   kasan_report.cold.7+0x6d/0x2fe mm/kasan/report.c:412
   check_memory_region_inline mm/kasan/kasan.c:260 [inline]
   check_memory_region+0x13e/0x1b0 mm/kasan/kasan.c:267
   kasan_check_write+0x14/0x20 mm/kasan/kasan.c:278
   atomic64_sub include/asm-generic/atomic-instrumented.h:144 [inline]
   atomic_long_sub include/asm-generic/atomic-long.h:199 [inline]
   xdp_umem_unaccount_pages.isra.4+0x3d/0x80 net/xdp/xdp_umem.c:135
   xdp_umem_reg net/xdp/xdp_umem.c:334 [inline]
   xdp_umem_create+0xd6c/0x10f0 net/xdp/xdp_umem.c:349
   xsk_setsockopt+0x443/0x550 net/xdp/xsk.c:531
   __sys_setsockopt+0x1bd/0x390 net/socket.c:1935
   __do_sys_setsockopt net/socket.c:1946 [inline]
   __se_sys_setsockopt net/socket.c:1943 [inline]
   __x64_sys_setsockopt+0xbe/0x150 net/socket.c:1943
   do_syscall_64+0x1b1/0x800 arch/x86/entry/common.c:287
   entry_SYSCALL_64_after_hwframe+0x49/0xbe

In xdp_umem_reg() the call to xdp_umem_account_pages() passed
with CAP_IPC_LOCK where we didn't need to end up charging rlimit
on memlock for the current user and therefore umem->user continues
to be NULL. Later on through fault injection syzkaller triggered
a failure in either umem->pgs or umem->pages allocation such that
we bail out and undo accounting in xdp_umem_unaccount_pages()
where we eventually hit the panic since it tries to deref the
umem->user.

The code is pretty close to mm_account_pinned_pages() and
mm_unaccount_pinned_pages() pair and potentially could reuse
it even in a later cleanup, and it appears that the initial
commit c0c77d8fb787 ("xsk: add user memory registration support
sockopt") got this right while later follow-up introduced the
bug via a49049ea2576 ("xsk: simplified umem setup").

Fixes: a49049ea2576 ("xsk: simplified umem setup")
Reported-by: syzbot+979217770b09ebf5c407@syzkaller.appspotmail.com
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 net/xdp/xdp_umem.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/xdp/xdp_umem.c b/net/xdp/xdp_umem.c
index 7eb4948a38d2..b9ef487c4618 100644
--- a/net/xdp/xdp_umem.c
+++ b/net/xdp/xdp_umem.c
@@ -132,8 +132,10 @@ static void xdp_umem_unpin_pages(struct xdp_umem *umem)
 
 static void xdp_umem_unaccount_pages(struct xdp_umem *umem)
 {
-	atomic_long_sub(umem->npgs, &umem->user->locked_vm);
-	free_uid(umem->user);
+	if (umem->user) {
+		atomic_long_sub(umem->npgs, &umem->user->locked_vm);
+		free_uid(umem->user);
+	}
 }
 
 static void xdp_umem_release(struct xdp_umem *umem)
-- 
cgit v1.2.3


From 66e58e0ef80a56a1d7857b6ce121141563cdd93e Mon Sep 17 00:00:00 2001
From: Alexei Starovoitov <ast@kernel.org>
Date: Thu, 7 Jun 2018 15:31:14 -0700
Subject: bpfilter: fix race in pipe access

syzbot reported the following crash
[  338.293946] bpfilter: read fail -512
[  338.304515] kasan: GPF could be caused by NULL-ptr deref or user memory access
[  338.311863] general protection fault: 0000 [#1] SMP KASAN
[  338.344360] RIP: 0010:__vfs_write+0x4a6/0x960
[  338.426363] Call Trace:
[  338.456967]  __kernel_write+0x10c/0x380
[  338.460928]  __bpfilter_process_sockopt+0x1d8/0x35b
[  338.487103]  bpfilter_mbox_request+0x4d/0xb0
[  338.491492]  bpfilter_ip_get_sockopt+0x6b/0x90

This can happen when multiple cpus trying to talk to user mode process
via bpfilter_mbox_request(). One cpu grabs the mutex while another goes to
sleep on the same mutex. Then former cpu sees that umh pipe is down and
shuts down the pipes. Later cpu finally acquires the mutex and crashes
on freed pipe.
Fix the race by using info.pid as an indicator that umh and pipes are healthy
and check it after acquiring the mutex.

Fixes: d2ba09c17a06 ("net: add skeleton of bpfilter kernel module")
Reported-by: syzbot+7ade6c94abb2774c0fee@syzkaller.appspotmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/bpfilter/bpfilter_kern.c | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

(limited to 'net')

diff --git a/net/bpfilter/bpfilter_kern.c b/net/bpfilter/bpfilter_kern.c
index b13d058f8c34..09522573f611 100644
--- a/net/bpfilter/bpfilter_kern.c
+++ b/net/bpfilter/bpfilter_kern.c
@@ -24,17 +24,19 @@ static void shutdown_umh(struct umh_info *info)
 {
 	struct task_struct *tsk;
 
+	if (!info->pid)
+		return;
 	tsk = pid_task(find_vpid(info->pid), PIDTYPE_PID);
 	if (tsk)
 		force_sig(SIGKILL, tsk);
 	fput(info->pipe_to_umh);
 	fput(info->pipe_from_umh);
+	info->pid = 0;
 }
 
 static void __stop_umh(void)
 {
-	if (IS_ENABLED(CONFIG_INET) &&
-	    bpfilter_process_sockopt) {
+	if (IS_ENABLED(CONFIG_INET)) {
 		bpfilter_process_sockopt = NULL;
 		shutdown_umh(&info);
 	}
@@ -55,7 +57,7 @@ static int __bpfilter_process_sockopt(struct sock *sk, int optname,
 	struct mbox_reply reply;
 	loff_t pos;
 	ssize_t n;
-	int ret;
+	int ret = -EFAULT;
 
 	req.is_set = is_set;
 	req.pid = current->pid;
@@ -63,6 +65,8 @@ static int __bpfilter_process_sockopt(struct sock *sk, int optname,
 	req.addr = (long)optval;
 	req.len = optlen;
 	mutex_lock(&bpfilter_lock);
+	if (!info.pid)
+		goto out;
 	n = __kernel_write(info.pipe_to_umh, &req, sizeof(req), &pos);
 	if (n != sizeof(req)) {
 		pr_err("write fail %zd\n", n);
-- 
cgit v1.2.3


From 8d499533e0bc02d44283dbdab03142b599b8ba16 Mon Sep 17 00:00:00 2001
From: Davide Caratti <dcaratti@redhat.com>
Date: Fri, 8 Jun 2018 05:02:31 +0200
Subject: net/sched: act_simple: fix parsing of TCA_DEF_DATA

use nla_strlcpy() to avoid copying data beyond the length of TCA_DEF_DATA
netlink attribute, in case it is less than SIMP_MAX_DATA and it does not
end with '\0' character.

v2: fix errors in the commit message, thanks Hangbin Liu

Fixes: fa1b1cff3d06 ("net_cls_act: Make act_simple use of netlink policy.")
Signed-off-by: Davide Caratti <dcaratti@redhat.com>
Reviewed-by: Simon Horman <simon.horman@netronome.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/act_simple.c | 15 ++++++---------
 1 file changed, 6 insertions(+), 9 deletions(-)

(limited to 'net')

diff --git a/net/sched/act_simple.c b/net/sched/act_simple.c
index 9618b4a83cee..98c4afe7c15b 100644
--- a/net/sched/act_simple.c
+++ b/net/sched/act_simple.c
@@ -53,22 +53,22 @@ static void tcf_simp_release(struct tc_action *a)
 	kfree(d->tcfd_defdata);
 }
 
-static int alloc_defdata(struct tcf_defact *d, char *defdata)
+static int alloc_defdata(struct tcf_defact *d, const struct nlattr *defdata)
 {
 	d->tcfd_defdata = kzalloc(SIMP_MAX_DATA, GFP_KERNEL);
 	if (unlikely(!d->tcfd_defdata))
 		return -ENOMEM;
-	strlcpy(d->tcfd_defdata, defdata, SIMP_MAX_DATA);
+	nla_strlcpy(d->tcfd_defdata, defdata, SIMP_MAX_DATA);
 	return 0;
 }
 
-static void reset_policy(struct tcf_defact *d, char *defdata,
+static void reset_policy(struct tcf_defact *d, const struct nlattr *defdata,
 			 struct tc_defact *p)
 {
 	spin_lock_bh(&d->tcf_lock);
 	d->tcf_action = p->action;
 	memset(d->tcfd_defdata, 0, SIMP_MAX_DATA);
-	strlcpy(d->tcfd_defdata, defdata, SIMP_MAX_DATA);
+	nla_strlcpy(d->tcfd_defdata, defdata, SIMP_MAX_DATA);
 	spin_unlock_bh(&d->tcf_lock);
 }
 
@@ -87,7 +87,6 @@ static int tcf_simp_init(struct net *net, struct nlattr *nla,
 	struct tcf_defact *d;
 	bool exists = false;
 	int ret = 0, err;
-	char *defdata;
 
 	if (nla == NULL)
 		return -EINVAL;
@@ -110,8 +109,6 @@ static int tcf_simp_init(struct net *net, struct nlattr *nla,
 		return -EINVAL;
 	}
 
-	defdata = nla_data(tb[TCA_DEF_DATA]);
-
 	if (!exists) {
 		ret = tcf_idr_create(tn, parm->index, est, a,
 				     &act_simp_ops, bind, false);
@@ -119,7 +116,7 @@ static int tcf_simp_init(struct net *net, struct nlattr *nla,
 			return ret;
 
 		d = to_defact(*a);
-		ret = alloc_defdata(d, defdata);
+		ret = alloc_defdata(d, tb[TCA_DEF_DATA]);
 		if (ret < 0) {
 			tcf_idr_release(*a, bind);
 			return ret;
@@ -133,7 +130,7 @@ static int tcf_simp_init(struct net *net, struct nlattr *nla,
 		if (!ovr)
 			return -EEXIST;
 
-		reset_policy(d, defdata, parm);
+		reset_policy(d, tb[TCA_DEF_DATA], parm);
 	}
 
 	if (ret == ACT_P_CREATED)
-- 
cgit v1.2.3


From 6c206b20092a3623184cff9470dba75d21507874 Mon Sep 17 00:00:00 2001
From: Paolo Abeni <pabeni@redhat.com>
Date: Fri, 8 Jun 2018 11:35:40 +0200
Subject: udp: fix rx queue len reported by diag and proc interface

After commit 6b229cf77d68 ("udp: add batching to udp_rmem_release()")
the sk_rmem_alloc field does not measure exactly anymore the
receive queue length, because we batch the rmem release. The issue
is really apparent only after commit 0d4a6608f68c ("udp: do rmem bulk
free even if the rx sk queue is empty"): the user space can easily
check for an empty socket with not-0 queue length reported by the 'ss'
tool or the procfs interface.

We need to use a custom UDP helper to report the correct queue length,
taking into account the forward allocation deficit.

Reported-by: trevor.francis@46labs.com
Fixes: 6b229cf77d68 ("UDP: add batching to udp_rmem_release()")
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/transp_v6.h | 11 +++++++++--
 include/net/udp.h       |  5 +++++
 net/ipv4/udp.c          |  2 +-
 net/ipv4/udp_diag.c     |  2 +-
 net/ipv6/datagram.c     |  6 +++---
 net/ipv6/udp.c          |  3 ++-
 6 files changed, 21 insertions(+), 8 deletions(-)

(limited to 'net')

diff --git a/include/net/transp_v6.h b/include/net/transp_v6.h
index c4f5caaf3778..f6a3543e5247 100644
--- a/include/net/transp_v6.h
+++ b/include/net/transp_v6.h
@@ -45,8 +45,15 @@ int ip6_datagram_send_ctl(struct net *net, struct sock *sk, struct msghdr *msg,
 			  struct flowi6 *fl6, struct ipcm6_cookie *ipc6,
 			  struct sockcm_cookie *sockc);
 
-void ip6_dgram_sock_seq_show(struct seq_file *seq, struct sock *sp,
-			     __u16 srcp, __u16 destp, int bucket);
+void __ip6_dgram_sock_seq_show(struct seq_file *seq, struct sock *sp,
+			       __u16 srcp, __u16 destp, int rqueue, int bucket);
+static inline void
+ip6_dgram_sock_seq_show(struct seq_file *seq, struct sock *sp, __u16 srcp,
+			__u16 destp, int bucket)
+{
+	__ip6_dgram_sock_seq_show(seq, sp, srcp, destp, sk_rmem_alloc_get(sp),
+				  bucket);
+}
 
 #define LOOPBACK4_IPV6 cpu_to_be32(0x7f000006)
 
diff --git a/include/net/udp.h b/include/net/udp.h
index 7ba0ed252c52..b1ea8b0f5e6a 100644
--- a/include/net/udp.h
+++ b/include/net/udp.h
@@ -247,6 +247,11 @@ static inline __be16 udp_flow_src_port(struct net *net, struct sk_buff *skb,
 	return htons((((u64) hash * (max - min)) >> 32) + min);
 }
 
+static inline int udp_rqueue_get(struct sock *sk)
+{
+	return sk_rmem_alloc_get(sk) - READ_ONCE(udp_sk(sk)->forward_deficit);
+}
+
 /* net/ipv4/udp.c */
 void udp_destruct_sock(struct sock *sk);
 void skb_consume_udp(struct sock *sk, struct sk_buff *skb, int len);
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 3365362cac88..9bb27df4dac5 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -2772,7 +2772,7 @@ static void udp4_format_sock(struct sock *sp, struct seq_file *f,
 		" %02X %08X:%08X %02X:%08lX %08X %5u %8d %lu %d %pK %d",
 		bucket, src, srcp, dest, destp, sp->sk_state,
 		sk_wmem_alloc_get(sp),
-		sk_rmem_alloc_get(sp),
+		udp_rqueue_get(sp),
 		0, 0L, 0,
 		from_kuid_munged(seq_user_ns(f), sock_i_uid(sp)),
 		0, sock_i_ino(sp),
diff --git a/net/ipv4/udp_diag.c b/net/ipv4/udp_diag.c
index d0390d844ac8..d9ad986c7b2c 100644
--- a/net/ipv4/udp_diag.c
+++ b/net/ipv4/udp_diag.c
@@ -163,7 +163,7 @@ static int udp_diag_dump_one(struct sk_buff *in_skb, const struct nlmsghdr *nlh,
 static void udp_diag_get_info(struct sock *sk, struct inet_diag_msg *r,
 		void *info)
 {
-	r->idiag_rqueue = sk_rmem_alloc_get(sk);
+	r->idiag_rqueue = udp_rqueue_get(sk);
 	r->idiag_wqueue = sk_wmem_alloc_get(sk);
 }
 
diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c
index a02ad100f0d7..2ee08b6a86a4 100644
--- a/net/ipv6/datagram.c
+++ b/net/ipv6/datagram.c
@@ -1019,8 +1019,8 @@ exit_f:
 }
 EXPORT_SYMBOL_GPL(ip6_datagram_send_ctl);
 
-void ip6_dgram_sock_seq_show(struct seq_file *seq, struct sock *sp,
-			     __u16 srcp, __u16 destp, int bucket)
+void __ip6_dgram_sock_seq_show(struct seq_file *seq, struct sock *sp,
+			       __u16 srcp, __u16 destp, int rqueue, int bucket)
 {
 	const struct in6_addr *dest, *src;
 
@@ -1036,7 +1036,7 @@ void ip6_dgram_sock_seq_show(struct seq_file *seq, struct sock *sp,
 		   dest->s6_addr32[2], dest->s6_addr32[3], destp,
 		   sp->sk_state,
 		   sk_wmem_alloc_get(sp),
-		   sk_rmem_alloc_get(sp),
+		   rqueue,
 		   0, 0L, 0,
 		   from_kuid_munged(seq_user_ns(seq), sock_i_uid(sp)),
 		   0,
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index 164afd31aebf..e6645cae403e 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -1523,7 +1523,8 @@ int udp6_seq_show(struct seq_file *seq, void *v)
 		struct inet_sock *inet = inet_sk(v);
 		__u16 srcp = ntohs(inet->inet_sport);
 		__u16 destp = ntohs(inet->inet_dport);
-		ip6_dgram_sock_seq_show(seq, v, srcp, destp, bucket);
+		__ip6_dgram_sock_seq_show(seq, v, srcp, destp,
+					  udp_rqueue_get(v), bucket);
 	}
 	return 0;
 }
-- 
cgit v1.2.3


From 873aca2ee86e533665a73292c4e308ded1e9bafe Mon Sep 17 00:00:00 2001
From: Petr Machata <petrm@mellanox.com>
Date: Fri, 8 Jun 2018 15:11:47 +0200
Subject: net: bridge: Fix locking in br_fdb_find_port()

Callers of br_fdb_find() need to hold the hash lock, which
br_fdb_find_port() doesn't do. However, since br_fdb_find_port() is not
doing any actual FDB manipulation, the hash lock is not really needed at
all. So convert to br_fdb_find_rcu(), surrounded by rcu_read_lock() /
_unlock() pair.

The device pointer copied from inside the FDB entry is then kept alive
by the RTNL lock, which br_fdb_find_port() asserts.

Fixes: 4d4fd36126d6 ("net: bridge: Publish bridge accessor functions")
Signed-off-by: Petr Machata <petrm@mellanox.com>
Acked-by: Nikolay Aleksandrov <nikolay@cumulusnetworks.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/bridge/br_fdb.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/bridge/br_fdb.c b/net/bridge/br_fdb.c
index b19e3104afd6..502f66349530 100644
--- a/net/bridge/br_fdb.c
+++ b/net/bridge/br_fdb.c
@@ -135,9 +135,11 @@ struct net_device *br_fdb_find_port(const struct net_device *br_dev,
 		return NULL;
 
 	br = netdev_priv(br_dev);
-	f = br_fdb_find(br, addr, vid);
+	rcu_read_lock();
+	f = br_fdb_find_rcu(br, addr, vid);
 	if (f && f->dst)
 		dev = f->dst->dev;
+	rcu_read_unlock();
 
 	return dev;
 }
-- 
cgit v1.2.3


From 6d8c50dcb029872b298eea68cc6209c866fd3e14 Mon Sep 17 00:00:00 2001
From: Cong Wang <xiyou.wangcong@gmail.com>
Date: Thu, 7 Jun 2018 13:39:49 -0700
Subject: socket: close race condition between sock_close() and
 sockfs_setattr()

fchownat() doesn't even hold refcnt of fd until it figures out
fd is really needed (otherwise is ignored) and releases it after
it resolves the path. This means sock_close() could race with
sockfs_setattr(), which leads to a NULL pointer dereference
since typically we set sock->sk to NULL in ->release().

As pointed out by Al, this is unique to sockfs. So we can fix this
in socket layer by acquiring inode_lock in sock_close() and
checking against NULL in sockfs_setattr().

sock_release() is called in many places, only the sock_close()
path matters here. And fortunately, this should not affect normal
sock_close() as it is only called when the last fd refcnt is gone.
It only affects sock_close() with a parallel sockfs_setattr() in
progress, which is not common.

Fixes: 86741ec25462 ("net: core: Add a UID field to struct sock.")
Reported-by: shankarapailoor <shankarapailoor@gmail.com>
Cc: Tetsuo Handa <penguin-kernel@i-love.sakura.ne.jp>
Cc: Lorenzo Colitti <lorenzo@google.com>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Cong Wang <xiyou.wangcong@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/socket.c | 18 +++++++++++++++---
 1 file changed, 15 insertions(+), 3 deletions(-)

(limited to 'net')

diff --git a/net/socket.c b/net/socket.c
index af57d85bcb48..8a109012608a 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -541,7 +541,10 @@ static int sockfs_setattr(struct dentry *dentry, struct iattr *iattr)
 	if (!err && (iattr->ia_valid & ATTR_UID)) {
 		struct socket *sock = SOCKET_I(d_inode(dentry));
 
-		sock->sk->sk_uid = iattr->ia_uid;
+		if (sock->sk)
+			sock->sk->sk_uid = iattr->ia_uid;
+		else
+			err = -ENOENT;
 	}
 
 	return err;
@@ -590,12 +593,16 @@ EXPORT_SYMBOL(sock_alloc);
  *	an inode not a file.
  */
 
-void sock_release(struct socket *sock)
+static void __sock_release(struct socket *sock, struct inode *inode)
 {
 	if (sock->ops) {
 		struct module *owner = sock->ops->owner;
 
+		if (inode)
+			inode_lock(inode);
 		sock->ops->release(sock);
+		if (inode)
+			inode_unlock(inode);
 		sock->ops = NULL;
 		module_put(owner);
 	}
@@ -609,6 +616,11 @@ void sock_release(struct socket *sock)
 	}
 	sock->file = NULL;
 }
+
+void sock_release(struct socket *sock)
+{
+	__sock_release(sock, NULL);
+}
 EXPORT_SYMBOL(sock_release);
 
 void __sock_tx_timestamp(__u16 tsflags, __u8 *tx_flags)
@@ -1171,7 +1183,7 @@ static int sock_mmap(struct file *file, struct vm_area_struct *vma)
 
 static int sock_close(struct inode *inode, struct file *filp)
 {
-	sock_release(SOCKET_I(inode));
+	__sock_release(SOCKET_I(inode), inode);
 	return 0;
 }
 
-- 
cgit v1.2.3


From 867f816badc01e6da655028810d468c9f935b37c Mon Sep 17 00:00:00 2001
From: Soheil Hassas Yeganeh <soheil@google.com>
Date: Fri, 8 Jun 2018 22:47:10 -0400
Subject: tcp: limit sk_rcvlowat by the maximum receive buffer

The user-provided value to setsockopt(SO_RCVLOWAT) can be
larger than the maximum possible receive buffer. Such values
mute POLLIN signals on the socket which can stall progress
on the socket.

Limit the user-provided value to half of the maximum receive
buffer, i.e., half of sk_rcvbuf when the receive buffer size
is set by the user, or otherwise half of sysctl_tcp_rmem[2].

Fixes: d1361840f8c5 ("tcp: fix SO_RCVLOWAT and RCVBUF autotuning")
Signed-off-by: Soheil Hassas Yeganeh <soheil@google.com>
Signed-off-by: Eric Dumazet <edumazet@google.com>
Reviewed-by: Neal Cardwell <ncardwell@google.com>
Acked-by: Willem de Bruijn <willemb@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp.c | 12 +++++++-----
 1 file changed, 7 insertions(+), 5 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 2741953adaba..141acd92e58a 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -1694,6 +1694,13 @@ EXPORT_SYMBOL(tcp_peek_len);
 /* Make sure sk_rcvbuf is big enough to satisfy SO_RCVLOWAT hint */
 int tcp_set_rcvlowat(struct sock *sk, int val)
 {
+	int cap;
+
+	if (sk->sk_userlocks & SOCK_RCVBUF_LOCK)
+		cap = sk->sk_rcvbuf >> 1;
+	else
+		cap = sock_net(sk)->ipv4.sysctl_tcp_rmem[2] >> 1;
+	val = min(val, cap);
 	sk->sk_rcvlowat = val ? : 1;
 
 	/* Check if we need to signal EPOLLIN right now */
@@ -1702,12 +1709,7 @@ int tcp_set_rcvlowat(struct sock *sk, int val)
 	if (sk->sk_userlocks & SOCK_RCVBUF_LOCK)
 		return 0;
 
-	/* val comes from user space and might be close to INT_MAX */
 	val <<= 1;
-	if (val < 0)
-		val = INT_MAX;
-
-	val = min(val, sock_net(sk)->ipv4.sysctl_tcp_rmem[2]);
 	if (val > sk->sk_rcvbuf) {
 		sk->sk_rcvbuf = val;
 		tcp_sk(sk)->window_clamp = tcp_win_from_space(sk, val);
-- 
cgit v1.2.3