From 4cc4a1708903f404d2ca0dfde30e71e052c6cbc9 Mon Sep 17 00:00:00 2001
From: Sven Eckelmann <sven@narfation.org>
Date: Thu, 28 Nov 2019 12:25:45 +0100
Subject: batman-adv: Fix DAT candidate selection on little endian systems

The distributed arp table is using a DHT to store and retrieve MAC address
information for an IP address. This is done using unicast messages to
selected peers. The potential peers are looked up using the IP address and
the VID.

While the IP address is always stored in big endian byte order, this is not
the case of the VID. It can (depending on the host system) either be big
endian or little endian. The host must therefore always convert it to big
endian to ensure that all devices calculate the same peers for the same
lookup data.

Fixes: be1db4f6615b ("batman-adv: make the Distributed ARP Table vlan aware")
Signed-off-by: Sven Eckelmann <sven@narfation.org>
Signed-off-by: Simon Wunderlich <sw@simonwunderlich.de>
---
 net/batman-adv/distributed-arp-table.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/batman-adv/distributed-arp-table.c b/net/batman-adv/distributed-arp-table.c
index b0af3a11d406..ec7bf5a4a9fc 100644
--- a/net/batman-adv/distributed-arp-table.c
+++ b/net/batman-adv/distributed-arp-table.c
@@ -285,6 +285,7 @@ static u32 batadv_hash_dat(const void *data, u32 size)
 	u32 hash = 0;
 	const struct batadv_dat_entry *dat = data;
 	const unsigned char *key;
+	__be16 vid;
 	u32 i;
 
 	key = (const unsigned char *)&dat->ip;
@@ -294,7 +295,8 @@ static u32 batadv_hash_dat(const void *data, u32 size)
 		hash ^= (hash >> 6);
 	}
 
-	key = (const unsigned char *)&dat->vid;
+	vid = htons(dat->vid);
+	key = (__force const unsigned char *)&vid;
 	for (i = 0; i < sizeof(dat->vid); i++) {
 		hash += key[i];
 		hash += (hash << 10);
-- 
cgit v1.2.3


From 02a614499600af836137c3fbc4404cd96365fff2 Mon Sep 17 00:00:00 2001
From: Markus Theil <markus.theil@tu-ilmenau.de>
Date: Tue, 3 Dec 2019 19:06:44 +0100
Subject: mac80211: mesh: restrict airtime metric to peered established plinks

The following warning is triggered every time an unestablished mesh peer
gets dumped. Checks if a peer link is established before retrieving the
airtime link metric.

[ 9563.022567] WARNING: CPU: 0 PID: 6287 at net/mac80211/mesh_hwmp.c:345
               airtime_link_metric_get+0xa2/0xb0 [mac80211]
[ 9563.022697] Hardware name: PC Engines apu2/apu2, BIOS v4.10.0.3
[ 9563.022756] RIP: 0010:airtime_link_metric_get+0xa2/0xb0 [mac80211]
[ 9563.022838] Call Trace:
[ 9563.022897]  sta_set_sinfo+0x936/0xa10 [mac80211]
[ 9563.022964]  ieee80211_dump_station+0x6d/0x90 [mac80211]
[ 9563.023062]  nl80211_dump_station+0x154/0x2a0 [cfg80211]
[ 9563.023120]  netlink_dump+0x17b/0x370
[ 9563.023130]  netlink_recvmsg+0x2a4/0x480
[ 9563.023140]  ____sys_recvmsg+0xa6/0x160
[ 9563.023154]  ___sys_recvmsg+0x93/0xe0
[ 9563.023169]  __sys_recvmsg+0x7e/0xd0
[ 9563.023210]  do_syscall_64+0x4e/0x140
[ 9563.023217]  entry_SYSCALL_64_after_hwframe+0x44/0xa9

Signed-off-by: Markus Theil <markus.theil@tu-ilmenau.de>
Link: https://lore.kernel.org/r/20191203180644.70653-1-markus.theil@tu-ilmenau.de
[rewrite commit message]
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/mac80211/mesh_hwmp.c | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'net')

diff --git a/net/mac80211/mesh_hwmp.c b/net/mac80211/mesh_hwmp.c
index 68af62306385..d69983370381 100644
--- a/net/mac80211/mesh_hwmp.c
+++ b/net/mac80211/mesh_hwmp.c
@@ -328,6 +328,9 @@ u32 airtime_link_metric_get(struct ieee80211_local *local,
 	unsigned long fail_avg =
 		ewma_mesh_fail_avg_read(&sta->mesh->fail_avg);
 
+	if (sta->mesh->plink_state != NL80211_PLINK_ESTAB)
+		return MAX_METRIC;
+
 	/* Try to get rate based on HW/SW RC algorithm.
 	 * Rate is returned in units of Kbps, correct this
 	 * to comply with airtime calculation units
-- 
cgit v1.2.3


From e7a5f1f1cd0008e5ad379270a8657e121eedb669 Mon Sep 17 00:00:00 2001
From: Lingpeng Chen <forrest0579@gmail.com>
Date: Thu, 9 Jan 2020 09:48:33 +0800
Subject: bpf/sockmap: Read psock ingress_msg before sk_receive_queue

Right now in tcp_bpf_recvmsg, sock read data first from sk_receive_queue
if not empty than psock->ingress_msg otherwise. If a FIN packet arrives
and there's also some data in psock->ingress_msg, the data in
psock->ingress_msg will be purged. It is always happen when request to a
HTTP1.0 server like python SimpleHTTPServer since the server send FIN
packet after data is sent out.

Fixes: 604326b41a6fb ("bpf, sockmap: convert to generic sk_msg interface")
Reported-by: Arika Chen <eaglesora@gmail.com>
Suggested-by: Arika Chen <eaglesora@gmail.com>
Signed-off-by: Lingpeng Chen <forrest0579@gmail.com>
Signed-off-by: John Fastabend <john.fastabend@gmail.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Song Liu <songliubraving@fb.com>
Link: https://lore.kernel.org/bpf/20200109014833.18951-1-forrest0579@gmail.com
---
 net/ipv4/tcp_bpf.c | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/tcp_bpf.c b/net/ipv4/tcp_bpf.c
index e38705165ac9..e6b08b5a0895 100644
--- a/net/ipv4/tcp_bpf.c
+++ b/net/ipv4/tcp_bpf.c
@@ -121,14 +121,14 @@ int tcp_bpf_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
 	struct sk_psock *psock;
 	int copied, ret;
 
-	if (unlikely(flags & MSG_ERRQUEUE))
-		return inet_recv_error(sk, msg, len, addr_len);
-	if (!skb_queue_empty(&sk->sk_receive_queue))
-		return tcp_recvmsg(sk, msg, len, nonblock, flags, addr_len);
-
 	psock = sk_psock_get(sk);
 	if (unlikely(!psock))
 		return tcp_recvmsg(sk, msg, len, nonblock, flags, addr_len);
+	if (unlikely(flags & MSG_ERRQUEUE))
+		return inet_recv_error(sk, msg, len, addr_len);
+	if (!skb_queue_empty(&sk->sk_receive_queue) &&
+	    sk_psock_queue_empty(psock))
+		return tcp_recvmsg(sk, msg, len, nonblock, flags, addr_len);
 	lock_sock(sk);
 msg_bytes_ready:
 	copied = __tcp_bpf_recvmsg(sk, psock, msg, len, flags);
@@ -139,7 +139,7 @@ msg_bytes_ready:
 		timeo = sock_rcvtimeo(sk, nonblock);
 		data = tcp_bpf_wait_data(sk, psock, flags, timeo, &err);
 		if (data) {
-			if (skb_queue_empty(&sk->sk_receive_queue))
+			if (!sk_psock_queue_empty(psock))
 				goto msg_bytes_ready;
 			release_sock(sk);
 			sk_psock_put(sk, psock);
-- 
cgit v1.2.3


From e21dba7a4df4d93da237da65a096084b4f2e87b4 Mon Sep 17 00:00:00 2001
From: Martin Schiller <ms@dev.tdt.de>
Date: Thu, 9 Jan 2020 07:31:14 +0100
Subject: net/x25: fix nonblocking connect

This patch fixes 2 issues in x25_connect():

1. It makes absolutely no sense to reset the neighbour and the
connection state after a (successful) nonblocking call of x25_connect.
This prevents any connection from being established, since the response
(call accept) cannot be processed.

2. Any further calls to x25_connect() while a call is pending should
simply return, instead of creating new Call Request (on different
logical channels).

This patch should also fix the "KASAN: null-ptr-deref Write in
x25_connect" and "BUG: unable to handle kernel NULL pointer dereference
in x25_connect" bugs reported by syzbot.

Signed-off-by: Martin Schiller <ms@dev.tdt.de>
Reported-by: syzbot+429c200ffc8772bfe070@syzkaller.appspotmail.com
Reported-by: syzbot+eec0c87f31a7c3b66f7b@syzkaller.appspotmail.com
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/x25/af_x25.c | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/x25/af_x25.c b/net/x25/af_x25.c
index 2efe44a34644..d5b09bbff375 100644
--- a/net/x25/af_x25.c
+++ b/net/x25/af_x25.c
@@ -766,6 +766,10 @@ static int x25_connect(struct socket *sock, struct sockaddr *uaddr,
 	if (sk->sk_state == TCP_ESTABLISHED)
 		goto out;
 
+	rc = -EALREADY;	/* Do nothing if call is already in progress */
+	if (sk->sk_state == TCP_SYN_SENT)
+		goto out;
+
 	sk->sk_state   = TCP_CLOSE;
 	sock->state = SS_UNCONNECTED;
 
@@ -812,7 +816,7 @@ static int x25_connect(struct socket *sock, struct sockaddr *uaddr,
 	/* Now the loop */
 	rc = -EINPROGRESS;
 	if (sk->sk_state != TCP_ESTABLISHED && (flags & O_NONBLOCK))
-		goto out_put_neigh;
+		goto out;
 
 	rc = x25_wait_for_connection_establishment(sk);
 	if (rc)
-- 
cgit v1.2.3


From 2e012c74823629d9db27963c79caa3f5b2010746 Mon Sep 17 00:00:00 2001
From: Lorenz Bauer <lmb@cloudflare.com>
Date: Fri, 10 Jan 2020 13:23:36 +0000
Subject: net: bpf: Don't leak time wait and request sockets
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

It's possible to leak time wait and request sockets via the following
BPF pseudo code:
 
  sk = bpf_skc_lookup_tcp(...)
  if (sk)
    bpf_sk_release(sk)

If sk->sk_state is TCP_NEW_SYN_RECV or TCP_TIME_WAIT the refcount taken
by bpf_skc_lookup_tcp is not undone by bpf_sk_release. This is because
sk_flags is re-used for other data in both kinds of sockets. The check

  !sock_flag(sk, SOCK_RCU_FREE)

therefore returns a bogus result. Check that sk_flags is valid by calling
sk_fullsock. Skip checking SOCK_RCU_FREE if we already know that sk is
not a full socket.

Fixes: edbf8c01de5a ("bpf: add skc_lookup_tcp helper")
Fixes: f7355a6c0497 ("bpf: Check sk_fullsock() before returning from bpf_sk_lookup()")
Signed-off-by: Lorenz Bauer <lmb@cloudflare.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Martin KaFai Lau <kafai@fb.com>
Link: https://lore.kernel.org/bpf/20200110132336.26099-1-lmb@cloudflare.com
---
 net/core/filter.c | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

(limited to 'net')

diff --git a/net/core/filter.c b/net/core/filter.c
index 28b3c258188c..d22d108fc6e3 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -5318,8 +5318,7 @@ __bpf_sk_lookup(struct sk_buff *skb, struct bpf_sock_tuple *tuple, u32 len,
 	if (sk) {
 		sk = sk_to_full_sk(sk);
 		if (!sk_fullsock(sk)) {
-			if (!sock_flag(sk, SOCK_RCU_FREE))
-				sock_gen_put(sk);
+			sock_gen_put(sk);
 			return NULL;
 		}
 	}
@@ -5356,8 +5355,7 @@ bpf_sk_lookup(struct sk_buff *skb, struct bpf_sock_tuple *tuple, u32 len,
 	if (sk) {
 		sk = sk_to_full_sk(sk);
 		if (!sk_fullsock(sk)) {
-			if (!sock_flag(sk, SOCK_RCU_FREE))
-				sock_gen_put(sk);
+			sock_gen_put(sk);
 			return NULL;
 		}
 	}
@@ -5424,7 +5422,8 @@ static const struct bpf_func_proto bpf_sk_lookup_udp_proto = {
 
 BPF_CALL_1(bpf_sk_release, struct sock *, sk)
 {
-	if (!sock_flag(sk, SOCK_RCU_FREE))
+	/* Only full sockets have sk->sk_flags. */
+	if (!sk_fullsock(sk) || !sock_flag(sk, SOCK_RCU_FREE))
 		sock_gen_put(sk);
 	return 0;
 }
-- 
cgit v1.2.3


From 5c5d22a750d4bcf35e9539dccec397b0edcce176 Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <jakub.kicinski@netronome.com>
Date: Fri, 10 Jan 2020 04:36:55 -0800
Subject: net/tls: avoid spurious decryption error with HW resync

When device loses sync mid way through a record - kernel
has to re-encrypt the part of the record which the device
already decrypted to be able to decrypt and authenticate
the record in its entirety.

The re-encryption piggy backs on the decryption routine,
but obviously because the partially decrypted record can't
be authenticated crypto API returns an error which is then
ignored by tls_device_reencrypt().

Commit 5c5ec6685806 ("net/tls: add TlsDecryptError stat")
added a statistic to count decryption errors, this statistic
can't be incremented when we see the expected re-encryption
error. Move the inc to the caller.

Reported-and-tested-by: David Beckett <david.beckett@netronome.com>
Fixes: 5c5ec6685806 ("net/tls: add TlsDecryptError stat")
Signed-off-by: Jakub Kicinski <jakub.kicinski@netronome.com>
Reviewed-by: Simon Horman <simon.horman@netronome.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/tls/tls_sw.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'net')

diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c
index c6803a82b769..1bf886269ede 100644
--- a/net/tls/tls_sw.c
+++ b/net/tls/tls_sw.c
@@ -256,8 +256,6 @@ static int tls_do_decryption(struct sock *sk,
 			return ret;
 
 		ret = crypto_wait_req(ret, &ctx->async_wait);
-	} else if (ret == -EBADMSG) {
-		TLS_INC_STATS(sock_net(sk), LINUX_MIB_TLSDECRYPTERROR);
 	}
 
 	if (async)
@@ -1515,7 +1513,9 @@ static int decrypt_skb_update(struct sock *sk, struct sk_buff *skb,
 				if (err == -EINPROGRESS)
 					tls_advance_record_sn(sk, prot,
 							      &tls_ctx->rx);
-
+				else if (err == -EBADMSG)
+					TLS_INC_STATS(sock_net(sk),
+						      LINUX_MIB_TLSDECRYPTERROR);
 				return err;
 			}
 		} else {
-- 
cgit v1.2.3


From db885e66d268884dc72967279b7e84f522556abc Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <jakub.kicinski@netronome.com>
Date: Fri, 10 Jan 2020 04:38:32 -0800
Subject: net/tls: fix async operation

Mallesham reports the TLS with async accelerator was broken by
commit d10523d0b3d7 ("net/tls: free the record on encryption error")
because encryption can return -EINPROGRESS in such setups, which
should not be treated as an error.

The error is also present in the BPF path (likely copied from there).

Reported-by: Mallesham Jatharakonda <mallesham.jatharakonda@oneconvergence.com>
Fixes: d3b18ad31f93 ("tls: add bpf support to sk_msg handling")
Fixes: d10523d0b3d7 ("net/tls: free the record on encryption error")
Signed-off-by: Jakub Kicinski <jakub.kicinski@netronome.com>
Reviewed-by: Simon Horman <simon.horman@netronome.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/tls/tls_sw.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c
index 1bf886269ede..5c7c00429f8e 100644
--- a/net/tls/tls_sw.c
+++ b/net/tls/tls_sw.c
@@ -770,7 +770,7 @@ static int bpf_exec_tx_verdict(struct sk_msg *msg, struct sock *sk,
 	psock = sk_psock_get(sk);
 	if (!psock || !policy) {
 		err = tls_push_record(sk, flags, record_type);
-		if (err) {
+		if (err && err != -EINPROGRESS) {
 			*copied -= sk_msg_free(sk, msg);
 			tls_free_open_rec(sk);
 		}
@@ -799,7 +799,7 @@ more_data:
 	switch (psock->eval) {
 	case __SK_PASS:
 		err = tls_push_record(sk, flags, record_type);
-		if (err < 0) {
+		if (err && err != -EINPROGRESS) {
 			*copied -= sk_msg_free(sk, msg);
 			tls_free_open_rec(sk);
 			goto out_err;
-- 
cgit v1.2.3


From 9827c0634e461703abf81e8cc8b7adf5da5886d0 Mon Sep 17 00:00:00 2001
From: David Ahern <dsahern@gmail.com>
Date: Fri, 10 Jan 2020 09:03:58 -0800
Subject: ipv4: Detect rollover in specific fib table dump

Sven-Haegar reported looping on fib dumps when 255.255.255.255 route has
been added to a table. The looping is caused by the key rolling over from
FFFFFFFF to 0. When dumping a specific table only, we need a means to detect
when the table dump is done. The key and count saved to cb args are both 0
only at the start of the table dump. If key is 0 and count > 0, then we are
in the rollover case. Detect and return to avoid looping.

This only affects dumps of a specific table; for dumps of all tables
(the case prior to the change in the Fixes tag) inet_dump_fib moved
the entry counter to the next table and reset the cb args used by
fib_table_dump and fn_trie_dump_leaf, so the rollover ffffffff back
to 0 did not cause looping with the dumps.

Fixes: effe67926624 ("net: Enable kernel side filtering of route dumps")
Reported-by: Sven-Haegar Koch <haegar@sdinet.de>
Signed-off-by: David Ahern <dsahern@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/fib_trie.c | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'net')

diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
index b9df9c09b84e..195469a13371 100644
--- a/net/ipv4/fib_trie.c
+++ b/net/ipv4/fib_trie.c
@@ -2193,6 +2193,12 @@ int fib_table_dump(struct fib_table *tb, struct sk_buff *skb,
 	int count = cb->args[2];
 	t_key key = cb->args[3];
 
+	/* First time here, count and key are both always 0. Count > 0
+	 * and key == 0 means the dump has wrapped around and we are done.
+	 */
+	if (count && !key)
+		return skb->len;
+
 	while ((l = leaf_walk_rcu(&tp, key)) != NULL) {
 		int err;
 
-- 
cgit v1.2.3


From 4c582234ab3948d08a24c82eb1e00436aabacbc6 Mon Sep 17 00:00:00 2001
From: Ido Schimmel <idosch@mellanox.com>
Date: Thu, 9 Jan 2020 19:57:41 +0200
Subject: devlink: Wait longer before warning about unset port type

The commit cited below causes devlink to emit a warning if a type was
not set on a devlink port for longer than 30 seconds to "prevent
misbehavior of drivers". This proved to be problematic when
unregistering the backing netdev. The flow is always:

devlink_port_type_clear()	// schedules the warning
unregister_netdev()		// blocking
devlink_port_unregister()	// cancels the warning

The call to unregister_netdev() can block for long periods of time for
various reasons: RTNL lock is contended, large amounts of configuration
to unroll following dismantle of the netdev, etc. This results in
devlink emitting a warning despite the driver behaving correctly.

In emulated environments (of future hardware) which are usually very
slow, the warning can also be emitted during port creation as more than
30 seconds can pass between the time the devlink port is registered and
when its type is set.

In addition, syzbot has hit this warning [1] 1974 times since 07/11/19
without being able to produce a reproducer. Probably because
reproduction depends on the load or other bugs (e.g., RTNL not being
released).

To prevent bogus warnings, increase the timeout to 1 hour.

[1] https://syzkaller.appspot.com/bug?id=e99b59e9c024a666c9f7450dc162a4b74d09d9cb

Fixes: 136bf27fc0e9 ("devlink: add warning in case driver does not set port type")
Signed-off-by: Ido Schimmel <idosch@mellanox.com>
Reported-by: syzbot+b0a18ed7b08b735d2f41@syzkaller.appspotmail.com
Reported-by: Alex Veber <alexve@mellanox.com>
Tested-by: Alex Veber <alexve@mellanox.com>
Acked-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/devlink.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/core/devlink.c b/net/core/devlink.c
index 4c63c9a4c09e..b8d698a2bf57 100644
--- a/net/core/devlink.c
+++ b/net/core/devlink.c
@@ -6406,7 +6406,7 @@ static bool devlink_port_type_should_warn(struct devlink_port *devlink_port)
 	       devlink_port->attrs.flavour != DEVLINK_PORT_FLAVOUR_DSA;
 }
 
-#define DEVLINK_PORT_TYPE_WARN_TIMEOUT (HZ * 30)
+#define DEVLINK_PORT_TYPE_WARN_TIMEOUT (HZ * 3600)
 
 static void devlink_port_type_warn_schedule(struct devlink_port *devlink_port)
 {
-- 
cgit v1.2.3


From b0efcae5e12b341e569f971ccd193e31f1d0ffa8 Mon Sep 17 00:00:00 2001
From: Jacob Keller <jacob.e.keller@intel.com>
Date: Thu, 9 Jan 2020 11:08:20 -0800
Subject: devlink: correct misspelling of snapshot

The function to obtain a unique snapshot id was mistakenly typo'd as
devlink_region_shapshot_id_get. Fix this typo by renaming the function
and all of its users.

Signed-off-by: Jacob Keller <jacob.e.keller@intel.com>
Acked-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlx4/crdump.c | 2 +-
 drivers/net/netdevsim/dev.c                 | 2 +-
 include/net/devlink.h                       | 2 +-
 net/core/devlink.c                          | 6 +++---
 4 files changed, 6 insertions(+), 6 deletions(-)

(limited to 'net')

diff --git a/drivers/net/ethernet/mellanox/mlx4/crdump.c b/drivers/net/ethernet/mellanox/mlx4/crdump.c
index eaf08f7ad128..64ed725aec28 100644
--- a/drivers/net/ethernet/mellanox/mlx4/crdump.c
+++ b/drivers/net/ethernet/mellanox/mlx4/crdump.c
@@ -182,7 +182,7 @@ int mlx4_crdump_collect(struct mlx4_dev *dev)
 	crdump_enable_crspace_access(dev, cr_space);
 
 	/* Get the available snapshot ID for the dumps */
-	id = devlink_region_shapshot_id_get(devlink);
+	id = devlink_region_snapshot_id_get(devlink);
 
 	/* Try to capture dumps */
 	mlx4_crdump_collect_crspace(dev, cr_space, id);
diff --git a/drivers/net/netdevsim/dev.c b/drivers/net/netdevsim/dev.c
index 059711edfc61..4b39aba2e9c4 100644
--- a/drivers/net/netdevsim/dev.c
+++ b/drivers/net/netdevsim/dev.c
@@ -53,7 +53,7 @@ static ssize_t nsim_dev_take_snapshot_write(struct file *file,
 
 	get_random_bytes(dummy_data, NSIM_DEV_DUMMY_REGION_SIZE);
 
-	id = devlink_region_shapshot_id_get(priv_to_devlink(nsim_dev));
+	id = devlink_region_snapshot_id_get(priv_to_devlink(nsim_dev));
 	err = devlink_region_snapshot_create(nsim_dev->dummy_region,
 					     dummy_data, id, kfree);
 	if (err) {
diff --git a/include/net/devlink.h b/include/net/devlink.h
index 47f87b2fcf63..38b4acb93f74 100644
--- a/include/net/devlink.h
+++ b/include/net/devlink.h
@@ -938,7 +938,7 @@ struct devlink_region *devlink_region_create(struct devlink *devlink,
 					     u32 region_max_snapshots,
 					     u64 region_size);
 void devlink_region_destroy(struct devlink_region *region);
-u32 devlink_region_shapshot_id_get(struct devlink *devlink);
+u32 devlink_region_snapshot_id_get(struct devlink *devlink);
 int devlink_region_snapshot_create(struct devlink_region *region,
 				   u8 *data, u32 snapshot_id,
 				   devlink_snapshot_data_dest_t *data_destructor);
diff --git a/net/core/devlink.c b/net/core/devlink.c
index b8d698a2bf57..f76219bf0c21 100644
--- a/net/core/devlink.c
+++ b/net/core/devlink.c
@@ -7563,7 +7563,7 @@ void devlink_region_destroy(struct devlink_region *region)
 EXPORT_SYMBOL_GPL(devlink_region_destroy);
 
 /**
- *	devlink_region_shapshot_id_get - get snapshot ID
+ *	devlink_region_snapshot_id_get - get snapshot ID
  *
  *	This callback should be called when adding a new snapshot,
  *	Driver should use the same id for multiple snapshots taken
@@ -7571,7 +7571,7 @@ EXPORT_SYMBOL_GPL(devlink_region_destroy);
  *
  *	@devlink: devlink
  */
-u32 devlink_region_shapshot_id_get(struct devlink *devlink)
+u32 devlink_region_snapshot_id_get(struct devlink *devlink)
 {
 	u32 id;
 
@@ -7581,7 +7581,7 @@ u32 devlink_region_shapshot_id_get(struct devlink *devlink)
 
 	return id;
 }
-EXPORT_SYMBOL_GPL(devlink_region_shapshot_id_get);
+EXPORT_SYMBOL_GPL(devlink_region_snapshot_id_get);
 
 /**
  *	devlink_region_snapshot_create - create a new snapshot
-- 
cgit v1.2.3


From c120959387efa51479056fd01dc90adfba7a590c Mon Sep 17 00:00:00 2001
From: Cong Wang <xiyou.wangcong@gmail.com>
Date: Fri, 10 Jan 2020 11:53:08 -0800
Subject: netfilter: fix a use-after-free in mtype_destroy()

map->members is freed by ip_set_free() right before using it in
mtype_ext_cleanup() again. So we just have to move it down.

Reported-by: syzbot+4c3cc6dbe7259dbf9054@syzkaller.appspotmail.com
Fixes: 40cd63bf33b2 ("netfilter: ipset: Support extensions which need a per data destroy function")
Acked-by: Jozsef Kadlecsik <kadlec@netfilter.org>
Signed-off-by: Cong Wang <xiyou.wangcong@gmail.com>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/netfilter/ipset/ip_set_bitmap_gen.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/netfilter/ipset/ip_set_bitmap_gen.h b/net/netfilter/ipset/ip_set_bitmap_gen.h
index 1abd6f0dc227..077a2cb65fcb 100644
--- a/net/netfilter/ipset/ip_set_bitmap_gen.h
+++ b/net/netfilter/ipset/ip_set_bitmap_gen.h
@@ -60,9 +60,9 @@ mtype_destroy(struct ip_set *set)
 	if (SET_WITH_TIMEOUT(set))
 		del_timer_sync(&map->gc);
 
-	ip_set_free(map->members);
 	if (set->dsize && set->extensions & IPSET_EXT_DESTROY)
 		mtype_ext_cleanup(set);
+	ip_set_free(map->members);
 	ip_set_free(map);
 
 	set->data = NULL;
-- 
cgit v1.2.3


From 212e7f56605ef9688d0846db60c6c6ec06544095 Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Sat, 11 Jan 2020 23:19:53 +0100
Subject: netfilter: arp_tables: init netns pointer in xt_tgdtor_param struct

An earlier commit (1b789577f655060d98d20e,
"netfilter: arp_tables: init netns pointer in xt_tgchk_param struct")
fixed missing net initialization for arptables, but turns out it was
incomplete.  We can get a very similar struct net NULL deref during
error unwinding:

general protection fault: 0000 [#1] PREEMPT SMP KASAN
RIP: 0010:xt_rateest_put+0xa1/0x440 net/netfilter/xt_RATEEST.c:77
 xt_rateest_tg_destroy+0x72/0xa0 net/netfilter/xt_RATEEST.c:175
 cleanup_entry net/ipv4/netfilter/arp_tables.c:509 [inline]
 translate_table+0x11f4/0x1d80 net/ipv4/netfilter/arp_tables.c:587
 do_replace net/ipv4/netfilter/arp_tables.c:981 [inline]
 do_arpt_set_ctl+0x317/0x650 net/ipv4/netfilter/arp_tables.c:1461

Also init the netns pointer in xt_tgdtor_param struct.

Fixes: add67461240c1d ("netfilter: add struct net * to target parameters")
Reported-by: syzbot+91bdd8eece0f6629ec8b@syzkaller.appspotmail.com
Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/ipv4/netfilter/arp_tables.c | 19 ++++++++++---------
 1 file changed, 10 insertions(+), 9 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c
index 069f72edb264..f1f78a742b36 100644
--- a/net/ipv4/netfilter/arp_tables.c
+++ b/net/ipv4/netfilter/arp_tables.c
@@ -496,12 +496,13 @@ static inline int check_entry_size_and_hooks(struct arpt_entry *e,
 	return 0;
 }
 
-static inline void cleanup_entry(struct arpt_entry *e)
+static void cleanup_entry(struct arpt_entry *e, struct net *net)
 {
 	struct xt_tgdtor_param par;
 	struct xt_entry_target *t;
 
 	t = arpt_get_target(e);
+	par.net      = net;
 	par.target   = t->u.kernel.target;
 	par.targinfo = t->data;
 	par.family   = NFPROTO_ARP;
@@ -584,7 +585,7 @@ static int translate_table(struct net *net,
 		xt_entry_foreach(iter, entry0, newinfo->size) {
 			if (i-- == 0)
 				break;
-			cleanup_entry(iter);
+			cleanup_entry(iter, net);
 		}
 		return ret;
 	}
@@ -927,7 +928,7 @@ static int __do_replace(struct net *net, const char *name,
 	/* Decrease module usage counts and free resource */
 	loc_cpu_old_entry = oldinfo->entries;
 	xt_entry_foreach(iter, loc_cpu_old_entry, oldinfo->size)
-		cleanup_entry(iter);
+		cleanup_entry(iter, net);
 
 	xt_free_table_info(oldinfo);
 	if (copy_to_user(counters_ptr, counters,
@@ -990,7 +991,7 @@ static int do_replace(struct net *net, const void __user *user,
 
  free_newinfo_untrans:
 	xt_entry_foreach(iter, loc_cpu_entry, newinfo->size)
-		cleanup_entry(iter);
+		cleanup_entry(iter, net);
  free_newinfo:
 	xt_free_table_info(newinfo);
 	return ret;
@@ -1287,7 +1288,7 @@ static int compat_do_replace(struct net *net, void __user *user,
 
  free_newinfo_untrans:
 	xt_entry_foreach(iter, loc_cpu_entry, newinfo->size)
-		cleanup_entry(iter);
+		cleanup_entry(iter, net);
  free_newinfo:
 	xt_free_table_info(newinfo);
 	return ret;
@@ -1514,7 +1515,7 @@ static int do_arpt_get_ctl(struct sock *sk, int cmd, void __user *user, int *len
 	return ret;
 }
 
-static void __arpt_unregister_table(struct xt_table *table)
+static void __arpt_unregister_table(struct net *net, struct xt_table *table)
 {
 	struct xt_table_info *private;
 	void *loc_cpu_entry;
@@ -1526,7 +1527,7 @@ static void __arpt_unregister_table(struct xt_table *table)
 	/* Decrease module usage counts and free resources */
 	loc_cpu_entry = private->entries;
 	xt_entry_foreach(iter, loc_cpu_entry, private->size)
-		cleanup_entry(iter);
+		cleanup_entry(iter, net);
 	if (private->number > private->initial_entries)
 		module_put(table_owner);
 	xt_free_table_info(private);
@@ -1566,7 +1567,7 @@ int arpt_register_table(struct net *net,
 
 	ret = nf_register_net_hooks(net, ops, hweight32(table->valid_hooks));
 	if (ret != 0) {
-		__arpt_unregister_table(new_table);
+		__arpt_unregister_table(net, new_table);
 		*res = NULL;
 	}
 
@@ -1581,7 +1582,7 @@ void arpt_unregister_table(struct net *net, struct xt_table *table,
 			   const struct nf_hook_ops *ops)
 {
 	nf_unregister_net_hooks(net, ops, hweight32(table->valid_hooks));
-	__arpt_unregister_table(table);
+	__arpt_unregister_table(net, table);
 }
 
 /* The built-in targets: standard (NULL) and error. */
-- 
cgit v1.2.3


From c742c59e1fbd022b64d91aa9a0092b3a699d653c Mon Sep 17 00:00:00 2001
From: Sunil Muthuswamy <sunilmut@microsoft.com>
Date: Tue, 14 Jan 2020 00:52:14 +0000
Subject: hv_sock: Remove the accept port restriction

Currently, hv_sock restricts the port the guest socket can accept
connections on. hv_sock divides the socket port namespace into two parts
for server side (listening socket), 0-0x7FFFFFFF & 0x80000000-0xFFFFFFFF
(there are no restrictions on client port namespace). The first part
(0-0x7FFFFFFF) is reserved for sockets where connections can be accepted.
The second part (0x80000000-0xFFFFFFFF) is reserved for allocating ports
for the peer (host) socket, once a connection is accepted.
This reservation of the port namespace is specific to hv_sock and not
known by the generic vsock library (ex: af_vsock). This is problematic
because auto-binds/ephemeral ports are handled by the generic vsock
library and it has no knowledge of this port reservation and could
allocate a port that is not compatible with hv_sock (and legitimately so).
The issue hasn't surfaced so far because the auto-bind code of vsock
(__vsock_bind_stream) prior to the change 'VSOCK: bind to random port for
VMADDR_PORT_ANY' would start walking up from LAST_RESERVED_PORT (1023) and
start assigning ports. That will take a large number of iterations to hit
0x7FFFFFFF. But, after the above change to randomize port selection, the
issue has started coming up more frequently.
There has really been no good reason to have this port reservation logic
in hv_sock from the get go. Reserving a local port for peer ports is not
how things are handled generally. Peer ports should reflect the peer port.
This fixes the issue by lifting the port reservation, and also returns the
right peer port. Since the code converts the GUID to the peer port (by
using the first 4 bytes), there is a possibility of conflicts, but that
seems like a reasonable risk to take, given this is limited to vsock and
that only applies to all local sockets.

Signed-off-by: Sunil Muthuswamy <sunilmut@microsoft.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/vmw_vsock/hyperv_transport.c | 65 ++++------------------------------------
 1 file changed, 6 insertions(+), 59 deletions(-)

(limited to 'net')

diff --git a/net/vmw_vsock/hyperv_transport.c b/net/vmw_vsock/hyperv_transport.c
index b3bdae74c243..3492c021925f 100644
--- a/net/vmw_vsock/hyperv_transport.c
+++ b/net/vmw_vsock/hyperv_transport.c
@@ -138,28 +138,15 @@ struct hvsock {
  ****************************************************************************
  * The only valid Service GUIDs, from the perspectives of both the host and *
  * Linux VM, that can be connected by the other end, must conform to this   *
- * format: <port>-facb-11e6-bd58-64006a7986d3, and the "port" must be in    *
- * this range [0, 0x7FFFFFFF].                                              *
+ * format: <port>-facb-11e6-bd58-64006a7986d3.                              *
  ****************************************************************************
  *
  * When we write apps on the host to connect(), the GUID ServiceID is used.
  * When we write apps in Linux VM to connect(), we only need to specify the
  * port and the driver will form the GUID and use that to request the host.
  *
- * From the perspective of Linux VM:
- * 1. the local ephemeral port (i.e. the local auto-bound port when we call
- * connect() without explicit bind()) is generated by __vsock_bind_stream(),
- * and the range is [1024, 0xFFFFFFFF).
- * 2. the remote ephemeral port (i.e. the auto-generated remote port for
- * a connect request initiated by the host's connect()) is generated by
- * hvs_remote_addr_init() and the range is [0x80000000, 0xFFFFFFFF).
  */
 
-#define MAX_LISTEN_PORT			((u32)0x7FFFFFFF)
-#define MAX_VM_LISTEN_PORT		MAX_LISTEN_PORT
-#define MAX_HOST_LISTEN_PORT		MAX_LISTEN_PORT
-#define MIN_HOST_EPHEMERAL_PORT		(MAX_HOST_LISTEN_PORT + 1)
-
 /* 00000000-facb-11e6-bd58-64006a7986d3 */
 static const guid_t srv_id_template =
 	GUID_INIT(0x00000000, 0xfacb, 0x11e6, 0xbd, 0x58,
@@ -184,34 +171,6 @@ static void hvs_addr_init(struct sockaddr_vm *addr, const guid_t *svr_id)
 	vsock_addr_init(addr, VMADDR_CID_ANY, port);
 }
 
-static void hvs_remote_addr_init(struct sockaddr_vm *remote,
-				 struct sockaddr_vm *local)
-{
-	static u32 host_ephemeral_port = MIN_HOST_EPHEMERAL_PORT;
-	struct sock *sk;
-
-	/* Remote peer is always the host */
-	vsock_addr_init(remote, VMADDR_CID_HOST, VMADDR_PORT_ANY);
-
-	while (1) {
-		/* Wrap around ? */
-		if (host_ephemeral_port < MIN_HOST_EPHEMERAL_PORT ||
-		    host_ephemeral_port == VMADDR_PORT_ANY)
-			host_ephemeral_port = MIN_HOST_EPHEMERAL_PORT;
-
-		remote->svm_port = host_ephemeral_port++;
-
-		sk = vsock_find_connected_socket(remote, local);
-		if (!sk) {
-			/* Found an available ephemeral port */
-			return;
-		}
-
-		/* Release refcnt got in vsock_find_connected_socket */
-		sock_put(sk);
-	}
-}
-
 static void hvs_set_channel_pending_send_size(struct vmbus_channel *chan)
 {
 	set_channel_pending_send_size(chan,
@@ -341,12 +300,7 @@ static void hvs_open_connection(struct vmbus_channel *chan)
 	if_type = &chan->offermsg.offer.if_type;
 	if_instance = &chan->offermsg.offer.if_instance;
 	conn_from_host = chan->offermsg.offer.u.pipe.user_def[0];
-
-	/* The host or the VM should only listen on a port in
-	 * [0, MAX_LISTEN_PORT]
-	 */
-	if (!is_valid_srv_id(if_type) ||
-	    get_port_by_srv_id(if_type) > MAX_LISTEN_PORT)
+	if (!is_valid_srv_id(if_type))
 		return;
 
 	hvs_addr_init(&addr, conn_from_host ? if_type : if_instance);
@@ -371,8 +325,11 @@ static void hvs_open_connection(struct vmbus_channel *chan)
 		vnew = vsock_sk(new);
 
 		hvs_addr_init(&vnew->local_addr, if_type);
-		hvs_remote_addr_init(&vnew->remote_addr, &vnew->local_addr);
 
+		/* Remote peer is always the host */
+		vsock_addr_init(&vnew->remote_addr,
+				VMADDR_CID_HOST, VMADDR_PORT_ANY);
+		vnew->remote_addr.svm_port = get_port_by_srv_id(if_instance);
 		ret = vsock_assign_transport(vnew, vsock_sk(sk));
 		/* Transport assigned (looking at remote_addr) must be the
 		 * same where we received the request.
@@ -766,16 +723,6 @@ static bool hvs_stream_is_active(struct vsock_sock *vsk)
 
 static bool hvs_stream_allow(u32 cid, u32 port)
 {
-	/* The host's port range [MIN_HOST_EPHEMERAL_PORT, 0xFFFFFFFF) is
-	 * reserved as ephemeral ports, which are used as the host's ports
-	 * when the host initiates connections.
-	 *
-	 * Perform this check in the guest so an immediate error is produced
-	 * instead of a timeout.
-	 */
-	if (port > MAX_HOST_LISTEN_PORT)
-		return false;
-
 	if (cid == VMADDR_CID_HOST)
 		return true;
 
-- 
cgit v1.2.3


From c4b9d655e445a8be0bff624aedea190606b5ebbc Mon Sep 17 00:00:00 2001
From: Ganapathi Bhat <ganapathi.bhat@nxp.com>
Date: Fri, 20 Dec 2019 10:14:32 +0000
Subject: wireless: fix enabling channel 12 for custom regulatory domain

Commit e33e2241e272 ("Revert "cfg80211: Use 5MHz bandwidth by
default when checking usable channels"") fixed a broken
regulatory (leaving channel 12 open for AP where not permitted).
Apply a similar fix to custom regulatory domain processing.

Signed-off-by: Cathy Luo <xiaohua.luo@nxp.com>
Signed-off-by: Ganapathi Bhat <ganapathi.bhat@nxp.com>
Link: https://lore.kernel.org/r/1576836859-8945-1-git-send-email-ganapathi.bhat@nxp.com
[reword commit message, fix coding style, add a comment]
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/wireless/reg.c | 13 ++++++++++---
 1 file changed, 10 insertions(+), 3 deletions(-)

(limited to 'net')

diff --git a/net/wireless/reg.c b/net/wireless/reg.c
index 446c76d44e65..3c2070040277 100644
--- a/net/wireless/reg.c
+++ b/net/wireless/reg.c
@@ -2261,14 +2261,15 @@ static void update_all_wiphy_regulatory(enum nl80211_reg_initiator initiator)
 
 static void handle_channel_custom(struct wiphy *wiphy,
 				  struct ieee80211_channel *chan,
-				  const struct ieee80211_regdomain *regd)
+				  const struct ieee80211_regdomain *regd,
+				  u32 min_bw)
 {
 	u32 bw_flags = 0;
 	const struct ieee80211_reg_rule *reg_rule = NULL;
 	const struct ieee80211_power_rule *power_rule = NULL;
 	u32 bw;
 
-	for (bw = MHZ_TO_KHZ(20); bw >= MHZ_TO_KHZ(5); bw = bw / 2) {
+	for (bw = MHZ_TO_KHZ(20); bw >= min_bw; bw = bw / 2) {
 		reg_rule = freq_reg_info_regd(MHZ_TO_KHZ(chan->center_freq),
 					      regd, bw);
 		if (!IS_ERR(reg_rule))
@@ -2324,8 +2325,14 @@ static void handle_band_custom(struct wiphy *wiphy,
 	if (!sband)
 		return;
 
+	/*
+	 * We currently assume that you always want at least 20 MHz,
+	 * otherwise channel 12 might get enabled if this rule is
+	 * compatible to US, which permits 2402 - 2472 MHz.
+	 */
 	for (i = 0; i < sband->n_channels; i++)
-		handle_channel_custom(wiphy, &sband->channels[i], regd);
+		handle_channel_custom(wiphy, &sband->channels[i], regd,
+				      MHZ_TO_KHZ(20));
 }
 
 /* Used by drivers prior to wiphy registration */
-- 
cgit v1.2.3


From 26ec17a1dc5ecdd8d91aba63ead6f8b5ad5dea0d Mon Sep 17 00:00:00 2001
From: Orr Mazor <orr.mazor@tandemg.com>
Date: Sun, 22 Dec 2019 14:55:31 +0000
Subject: cfg80211: Fix radar event during another phy CAC

In case a radar event of CAC_FINISHED or RADAR_DETECTED
happens during another phy is during CAC we might need
to cancel that CAC.

If we got a radar in a channel that another phy is now
doing CAC on then the CAC should be canceled there.

If, for example, 2 phys doing CAC on the same channels,
or on comptable channels, once on of them will finish his
CAC the other might need to cancel his CAC, since it is no
longer relevant.

To fix that the commit adds an callback and implement it in
mac80211 to end CAC.
This commit also adds a call to said callback if after a radar
event we see the CAC is no longer relevant

Signed-off-by: Orr Mazor <Orr.Mazor@tandemg.com>
Reviewed-by: Sergey Matyukevich <sergey.matyukevich.os@quantenna.com>
Link: https://lore.kernel.org/r/20191222145449.15792-1-Orr.Mazor@tandemg.com
[slightly reformat/reword commit message]
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/net/cfg80211.h  |  5 +++++
 net/mac80211/cfg.c      | 23 +++++++++++++++++++++++
 net/wireless/rdev-ops.h | 10 ++++++++++
 net/wireless/reg.c      | 23 ++++++++++++++++++++++-
 net/wireless/trace.h    |  5 +++++
 5 files changed, 65 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index 059524b87c4c..f22bd6c838a3 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -3548,6 +3548,9 @@ struct cfg80211_update_owe_info {
  *
  * @start_radar_detection: Start radar detection in the driver.
  *
+ * @end_cac: End running CAC, probably because a related CAC
+ *	was finished on another phy.
+ *
  * @update_ft_ies: Provide updated Fast BSS Transition information to the
  *	driver. If the SME is in the driver/firmware, this information can be
  *	used in building Authentication and Reassociation Request frames.
@@ -3874,6 +3877,8 @@ struct cfg80211_ops {
 					 struct net_device *dev,
 					 struct cfg80211_chan_def *chandef,
 					 u32 cac_time_ms);
+	void	(*end_cac)(struct wiphy *wiphy,
+				struct net_device *dev);
 	int	(*update_ft_ies)(struct wiphy *wiphy, struct net_device *dev,
 				 struct cfg80211_update_ft_ies_params *ftie);
 	int	(*crit_proto_start)(struct wiphy *wiphy,
diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c
index 4fb7f1f12109..000c742d0527 100644
--- a/net/mac80211/cfg.c
+++ b/net/mac80211/cfg.c
@@ -2954,6 +2954,28 @@ static int ieee80211_start_radar_detection(struct wiphy *wiphy,
 	return err;
 }
 
+static void ieee80211_end_cac(struct wiphy *wiphy,
+			      struct net_device *dev)
+{
+	struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
+	struct ieee80211_local *local = sdata->local;
+
+	mutex_lock(&local->mtx);
+	list_for_each_entry(sdata, &local->interfaces, list) {
+		/* it might be waiting for the local->mtx, but then
+		 * by the time it gets it, sdata->wdev.cac_started
+		 * will no longer be true
+		 */
+		cancel_delayed_work(&sdata->dfs_cac_timer_work);
+
+		if (sdata->wdev.cac_started) {
+			ieee80211_vif_release_channel(sdata);
+			sdata->wdev.cac_started = false;
+		}
+	}
+	mutex_unlock(&local->mtx);
+}
+
 static struct cfg80211_beacon_data *
 cfg80211_beacon_dup(struct cfg80211_beacon_data *beacon)
 {
@@ -4023,6 +4045,7 @@ const struct cfg80211_ops mac80211_config_ops = {
 #endif
 	.get_channel = ieee80211_cfg_get_channel,
 	.start_radar_detection = ieee80211_start_radar_detection,
+	.end_cac = ieee80211_end_cac,
 	.channel_switch = ieee80211_channel_switch,
 	.set_qos_map = ieee80211_set_qos_map,
 	.set_ap_chanwidth = ieee80211_set_ap_chanwidth,
diff --git a/net/wireless/rdev-ops.h b/net/wireless/rdev-ops.h
index e853a4fe6f97..663c0d3127a4 100644
--- a/net/wireless/rdev-ops.h
+++ b/net/wireless/rdev-ops.h
@@ -1167,6 +1167,16 @@ rdev_start_radar_detection(struct cfg80211_registered_device *rdev,
 	return ret;
 }
 
+static inline void
+rdev_end_cac(struct cfg80211_registered_device *rdev,
+	     struct net_device *dev)
+{
+	trace_rdev_end_cac(&rdev->wiphy, dev);
+	if (rdev->ops->end_cac)
+		rdev->ops->end_cac(&rdev->wiphy, dev);
+	trace_rdev_return_void(&rdev->wiphy);
+}
+
 static inline int
 rdev_set_mcast_rate(struct cfg80211_registered_device *rdev,
 		    struct net_device *dev,
diff --git a/net/wireless/reg.c b/net/wireless/reg.c
index 3c2070040277..fff9a74891fc 100644
--- a/net/wireless/reg.c
+++ b/net/wireless/reg.c
@@ -3892,6 +3892,25 @@ bool regulatory_pre_cac_allowed(struct wiphy *wiphy)
 }
 EXPORT_SYMBOL(regulatory_pre_cac_allowed);
 
+static void cfg80211_check_and_end_cac(struct cfg80211_registered_device *rdev)
+{
+	struct wireless_dev *wdev;
+	/* If we finished CAC or received radar, we should end any
+	 * CAC running on the same channels.
+	 * the check !cfg80211_chandef_dfs_usable contain 2 options:
+	 * either all channels are available - those the CAC_FINISHED
+	 * event has effected another wdev state, or there is a channel
+	 * in unavailable state in wdev chandef - those the RADAR_DETECTED
+	 * event has effected another wdev state.
+	 * In both cases we should end the CAC on the wdev.
+	 */
+	list_for_each_entry(wdev, &rdev->wiphy.wdev_list, list) {
+		if (wdev->cac_started &&
+		    !cfg80211_chandef_dfs_usable(&rdev->wiphy, &wdev->chandef))
+			rdev_end_cac(rdev, wdev->netdev);
+	}
+}
+
 void regulatory_propagate_dfs_state(struct wiphy *wiphy,
 				    struct cfg80211_chan_def *chandef,
 				    enum nl80211_dfs_state dfs_state,
@@ -3918,8 +3937,10 @@ void regulatory_propagate_dfs_state(struct wiphy *wiphy,
 		cfg80211_set_dfs_state(&rdev->wiphy, chandef, dfs_state);
 
 		if (event == NL80211_RADAR_DETECTED ||
-		    event == NL80211_RADAR_CAC_FINISHED)
+		    event == NL80211_RADAR_CAC_FINISHED) {
 			cfg80211_sched_dfs_chan_update(rdev);
+			cfg80211_check_and_end_cac(rdev);
+		}
 
 		nl80211_radar_notify(rdev, chandef, event, NULL, GFP_KERNEL);
 	}
diff --git a/net/wireless/trace.h b/net/wireless/trace.h
index d98ad2b3143b..8677d7ab7d69 100644
--- a/net/wireless/trace.h
+++ b/net/wireless/trace.h
@@ -646,6 +646,11 @@ DEFINE_EVENT(wiphy_netdev_evt, rdev_flush_pmksa,
 	TP_ARGS(wiphy, netdev)
 );
 
+DEFINE_EVENT(wiphy_netdev_evt, rdev_end_cac,
+	     TP_PROTO(struct wiphy *wiphy, struct net_device *netdev),
+	     TP_ARGS(wiphy, netdev)
+);
+
 DECLARE_EVENT_CLASS(station_add_change,
 	TP_PROTO(struct wiphy *wiphy, struct net_device *netdev, u8 *mac,
 		 struct station_parameters *params),
-- 
cgit v1.2.3


From 6f601265215a421f425ba3a4850a35861d024643 Mon Sep 17 00:00:00 2001
From: Jouni Malinen <j@w1.fi>
Date: Tue, 7 Jan 2020 17:35:45 +0200
Subject: mac80211: Fix TKIP replay protection immediately after key setup

TKIP replay protection was skipped for the very first frame received
after a new key is configured. While this is potentially needed to avoid
dropping a frame in some cases, this does leave a window for replay
attacks with group-addressed frames at the station side. Any earlier
frame sent by the AP using the same key would be accepted as a valid
frame and the internal RSC would then be updated to the TSC from that
frame. This would allow multiple previously transmitted group-addressed
frames to be replayed until the next valid new group-addressed frame
from the AP is received by the station.

Fix this by limiting the no-replay-protection exception to apply only
for the case where TSC=0, i.e., when this is for the very first frame
protected using the new key, and the local RSC had not been set to a
higher value when configuring the key (which may happen with GTK).

Signed-off-by: Jouni Malinen <j@w1.fi>
Link: https://lore.kernel.org/r/20200107153545.10934-1-j@w1.fi
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/mac80211/tkip.c | 18 +++++++++++++++---
 1 file changed, 15 insertions(+), 3 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/tkip.c b/net/mac80211/tkip.c
index 727dc9f3f3b3..e7f57bb18f6e 100644
--- a/net/mac80211/tkip.c
+++ b/net/mac80211/tkip.c
@@ -263,9 +263,21 @@ int ieee80211_tkip_decrypt_data(struct arc4_ctx *ctx,
 	if ((keyid >> 6) != key->conf.keyidx)
 		return TKIP_DECRYPT_INVALID_KEYIDX;
 
-	if (rx_ctx->ctx.state != TKIP_STATE_NOT_INIT &&
-	    (iv32 < rx_ctx->iv32 ||
-	     (iv32 == rx_ctx->iv32 && iv16 <= rx_ctx->iv16)))
+	/* Reject replays if the received TSC is smaller than or equal to the
+	 * last received value in a valid message, but with an exception for
+	 * the case where a new key has been set and no valid frame using that
+	 * key has yet received and the local RSC was initialized to 0. This
+	 * exception allows the very first frame sent by the transmitter to be
+	 * accepted even if that transmitter were to use TSC 0 (IEEE 802.11
+	 * described TSC to be initialized to 1 whenever a new key is taken into
+	 * use).
+	 */
+	if (iv32 < rx_ctx->iv32 ||
+	    (iv32 == rx_ctx->iv32 &&
+	     (iv16 < rx_ctx->iv16 ||
+	      (iv16 == rx_ctx->iv16 &&
+	       (rx_ctx->iv32 || rx_ctx->iv16 ||
+		rx_ctx->ctx.state != TKIP_STATE_NOT_INIT)))))
 		return TKIP_DECRYPT_REPLAY;
 
 	if (only_iv) {
-- 
cgit v1.2.3


From e16119655c9e6c4aa5767cd971baa9c491f41b13 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Tue, 7 Jan 2020 21:07:35 +0100
Subject: wireless: wext: avoid gcc -O3 warning

After the introduction of CONFIG_CC_OPTIMIZE_FOR_PERFORMANCE_O3,
the wext code produces a bogus warning:

In function 'iw_handler_get_iwstats',
    inlined from 'ioctl_standard_call' at net/wireless/wext-core.c:1015:9,
    inlined from 'wireless_process_ioctl' at net/wireless/wext-core.c:935:10,
    inlined from 'wext_ioctl_dispatch.part.8' at net/wireless/wext-core.c:986:8,
    inlined from 'wext_handle_ioctl':
net/wireless/wext-core.c:671:3: error: argument 1 null where non-null expected [-Werror=nonnull]
   memcpy(extra, stats, sizeof(struct iw_statistics));
   ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
In file included from arch/x86/include/asm/string.h:5,
net/wireless/wext-core.c: In function 'wext_handle_ioctl':
arch/x86/include/asm/string_64.h:14:14: note: in a call to function 'memcpy' declared here

The problem is that ioctl_standard_call() sometimes calls the handler
with a NULL argument that would cause a problem for iw_handler_get_iwstats.
However, iw_handler_get_iwstats never actually gets called that way.

Marking that function as noinline avoids the warning and leads
to slightly smaller object code as well.

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Link: https://lore.kernel.org/r/20200107200741.3588770-1-arnd@arndb.de
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/wireless/wext-core.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/wireless/wext-core.c b/net/wireless/wext-core.c
index 5e677dac2a0c..69102fda9ebd 100644
--- a/net/wireless/wext-core.c
+++ b/net/wireless/wext-core.c
@@ -657,7 +657,8 @@ struct iw_statistics *get_wireless_stats(struct net_device *dev)
 	return NULL;
 }
 
-static int iw_handler_get_iwstats(struct net_device *		dev,
+/* noinline to avoid a bogus warning with -O3 */
+static noinline int iw_handler_get_iwstats(struct net_device *	dev,
 				  struct iw_request_info *	info,
 				  union iwreq_data *		wrqu,
 				  char *			extra)
-- 
cgit v1.2.3


From 5a128a088a2ab0b5190eeb232b5aa0b1017a0317 Mon Sep 17 00:00:00 2001
From: Markus Theil <markus.theil@tu-ilmenau.de>
Date: Wed, 8 Jan 2020 12:55:36 +0100
Subject: cfg80211: fix deadlocks in autodisconnect work

Use methods which do not try to acquire the wdev lock themselves.

Cc: stable@vger.kernel.org
Fixes: 37b1c004685a3 ("cfg80211: Support all iftypes in autodisconnect_wk")
Signed-off-by: Markus Theil <markus.theil@tu-ilmenau.de>
Link: https://lore.kernel.org/r/20200108115536.2262-1-markus.theil@tu-ilmenau.de
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/wireless/sme.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'net')

diff --git a/net/wireless/sme.c b/net/wireless/sme.c
index 7a6c38ddc65a..d32a2ec4d96a 100644
--- a/net/wireless/sme.c
+++ b/net/wireless/sme.c
@@ -1307,14 +1307,14 @@ void cfg80211_autodisconnect_wk(struct work_struct *work)
 	if (wdev->conn_owner_nlportid) {
 		switch (wdev->iftype) {
 		case NL80211_IFTYPE_ADHOC:
-			cfg80211_leave_ibss(rdev, wdev->netdev, false);
+			__cfg80211_leave_ibss(rdev, wdev->netdev, false);
 			break;
 		case NL80211_IFTYPE_AP:
 		case NL80211_IFTYPE_P2P_GO:
-			cfg80211_stop_ap(rdev, wdev->netdev, false);
+			__cfg80211_stop_ap(rdev, wdev->netdev, false);
 			break;
 		case NL80211_IFTYPE_MESH_POINT:
-			cfg80211_leave_mesh(rdev, wdev->netdev);
+			__cfg80211_leave_mesh(rdev, wdev->netdev);
 			break;
 		case NL80211_IFTYPE_STATION:
 		case NL80211_IFTYPE_P2P_CLIENT:
-- 
cgit v1.2.3


From 2a279b34169e9bbf7c240691466420aba75b4175 Mon Sep 17 00:00:00 2001
From: Felix Fietkau <nbd@nbd.name>
Date: Wed, 8 Jan 2020 18:06:29 +0100
Subject: cfg80211: fix memory leak in nl80211_probe_mesh_link

The per-tid statistics need to be released after the call to rdev_get_station

Cc: stable@vger.kernel.org
Fixes: 5ab92e7fe49a ("cfg80211: add support to probe unexercised mesh link")
Signed-off-by: Felix Fietkau <nbd@nbd.name>
Link: https://lore.kernel.org/r/20200108170630.33680-1-nbd@nbd.name
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/wireless/nl80211.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'net')

diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index da5262b2298b..73e1430c7ac5 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -13796,6 +13796,8 @@ static int nl80211_probe_mesh_link(struct sk_buff *skb, struct genl_info *info)
 	if (err)
 		return err;
 
+	cfg80211_sinfo_release_content(&sinfo);
+
 	return rdev_probe_mesh_link(rdev, dev, dest, buf, len);
 }
 
-- 
cgit v1.2.3


From df16737d438f534d0cc9948c7c5158f1986c5c87 Mon Sep 17 00:00:00 2001
From: Felix Fietkau <nbd@nbd.name>
Date: Wed, 8 Jan 2020 18:06:30 +0100
Subject: cfg80211: fix memory leak in cfg80211_cqm_rssi_update

The per-tid statistics need to be released after the call to rdev_get_station

Cc: stable@vger.kernel.org
Fixes: 8689c051a201 ("cfg80211: dynamically allocate per-tid stats for station info")
Signed-off-by: Felix Fietkau <nbd@nbd.name>
Link: https://lore.kernel.org/r/20200108170630.33680-2-nbd@nbd.name
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/wireless/nl80211.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'net')

diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index 73e1430c7ac5..1e97ac5435b2 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -10843,6 +10843,7 @@ static int cfg80211_cqm_rssi_update(struct cfg80211_registered_device *rdev,
 		if (err)
 			return err;
 
+		cfg80211_sinfo_release_content(&sinfo);
 		if (sinfo.filled & BIT_ULL(NL80211_STA_INFO_BEACON_SIGNAL_AVG))
 			wdev->cqm_config->last_rssi_event_value =
 				(s8) sinfo.rx_beacon_signal_avg;
-- 
cgit v1.2.3


From 24953de0a5e31dcca7e82c8a3c79abc2dfe8fb6e Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Mon, 13 Jan 2020 12:53:59 +0100
Subject: cfg80211: check for set_wiphy_params

Check if set_wiphy_params is assigned and return an error if not,
some drivers (e.g. virt_wifi where syzbot reported it) don't have
it.

Reported-by: syzbot+e8a797964a4180eb57d5@syzkaller.appspotmail.com
Reported-by: syzbot+34b582cf32c1db008f8e@syzkaller.appspotmail.com
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Link: https://lore.kernel.org/r/20200113125358.ac07f276efff.Ibd85ee1b12e47b9efb00a2adc5cd3fac50da791a@changeid
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/wireless/rdev-ops.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'net')

diff --git a/net/wireless/rdev-ops.h b/net/wireless/rdev-ops.h
index 663c0d3127a4..e0d34f796d0b 100644
--- a/net/wireless/rdev-ops.h
+++ b/net/wireless/rdev-ops.h
@@ -538,6 +538,10 @@ static inline int
 rdev_set_wiphy_params(struct cfg80211_registered_device *rdev, u32 changed)
 {
 	int ret;
+
+	if (!rdev->ops->set_wiphy_params)
+		return -EOPNOTSUPP;
+
 	trace_rdev_set_wiphy_params(&rdev->wiphy, changed);
 	ret = rdev->ops->set_wiphy_params(&rdev->wiphy, changed);
 	trace_rdev_return_int(&rdev->wiphy, ret);
-- 
cgit v1.2.3


From 81c044fc3bdc5b7be967cd3682528ea94b58c06a Mon Sep 17 00:00:00 2001
From: Felix Fietkau <nbd@nbd.name>
Date: Mon, 13 Jan 2020 19:21:07 +0100
Subject: cfg80211: fix page refcount issue in A-MSDU decap

The fragments attached to a skb can be part of a compound page. In that case,
page_ref_inc will increment the refcount for the wrong page. Fix this by
using get_page instead, which calls page_ref_inc on the compound head and
also checks for overflow.

Fixes: 2b67f944f88c ("cfg80211: reuse existing page fragments in A-MSDU rx")
Cc: stable@vger.kernel.org
Signed-off-by: Felix Fietkau <nbd@nbd.name>
Link: https://lore.kernel.org/r/20200113182107.20461-1-nbd@nbd.name
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/wireless/util.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/wireless/util.c b/net/wireless/util.c
index 5b4ed5bbc542..8481e9ac33da 100644
--- a/net/wireless/util.c
+++ b/net/wireless/util.c
@@ -564,7 +564,7 @@ __frame_add_frag(struct sk_buff *skb, struct page *page,
 	struct skb_shared_info *sh = skb_shinfo(skb);
 	int page_offset;
 
-	page_ref_inc(page);
+	get_page(page);
 	page_offset = ptr - page_address(page);
 	skb_add_rx_frag(skb, sh->nr_frags, page, page_offset, len, size);
 }
-- 
cgit v1.2.3


From e176b1ba476cf36f723cfcc7a9e57f3cb47dec70 Mon Sep 17 00:00:00 2001
From: Pengcheng Yang <yangpc@wangsu.com>
Date: Tue, 14 Jan 2020 17:23:40 +0800
Subject: tcp: fix marked lost packets not being retransmitted

When the packet pointed to by retransmit_skb_hint is unlinked by ACK,
retransmit_skb_hint will be set to NULL in tcp_clean_rtx_queue().
If packet loss is detected at this time, retransmit_skb_hint will be set
to point to the current packet loss in tcp_verify_retransmit_hint(),
then the packets that were previously marked lost but not retransmitted
due to the restriction of cwnd will be skipped and cannot be
retransmitted.

To fix this, when retransmit_skb_hint is NULL, retransmit_skb_hint can
be reset only after all marked lost packets are retransmitted
(retrans_out >= lost_out), otherwise we need to traverse from
tcp_rtx_queue_head in tcp_xmit_retransmit_queue().

Packetdrill to demonstrate:

// Disable RACK and set max_reordering to keep things simple
    0 `sysctl -q net.ipv4.tcp_recovery=0`
   +0 `sysctl -q net.ipv4.tcp_max_reordering=3`

// Establish a connection
   +0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
   +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
   +0 bind(3, ..., ...) = 0
   +0 listen(3, 1) = 0

  +.1 < S 0:0(0) win 32792 <mss 1000,sackOK,nop,nop,nop,wscale 7>
   +0 > S. 0:0(0) ack 1 <...>
 +.01 < . 1:1(0) ack 1 win 257
   +0 accept(3, ..., ...) = 4

// Send 8 data segments
   +0 write(4, ..., 8000) = 8000
   +0 > P. 1:8001(8000) ack 1

// Enter recovery and 1:3001 is marked lost
 +.01 < . 1:1(0) ack 1 win 257 <sack 3001:4001,nop,nop>
   +0 < . 1:1(0) ack 1 win 257 <sack 5001:6001 3001:4001,nop,nop>
   +0 < . 1:1(0) ack 1 win 257 <sack 5001:7001 3001:4001,nop,nop>

// Retransmit 1:1001, now retransmit_skb_hint points to 1001:2001
   +0 > . 1:1001(1000) ack 1

// 1001:2001 was ACKed causing retransmit_skb_hint to be set to NULL
 +.01 < . 1:1(0) ack 2001 win 257 <sack 5001:8001 3001:4001,nop,nop>
// Now retransmit_skb_hint points to 4001:5001 which is now marked lost

// BUG: 2001:3001 was not retransmitted
   +0 > . 2001:3001(1000) ack 1

Signed-off-by: Pengcheng Yang <yangpc@wangsu.com>
Acked-by: Neal Cardwell <ncardwell@google.com>
Tested-by: Neal Cardwell <ncardwell@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp_input.c | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 0238b554a1f0..5347ab2c9c58 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -915,9 +915,10 @@ static void tcp_check_sack_reordering(struct sock *sk, const u32 low_seq,
 /* This must be called before lost_out is incremented */
 static void tcp_verify_retransmit_hint(struct tcp_sock *tp, struct sk_buff *skb)
 {
-	if (!tp->retransmit_skb_hint ||
-	    before(TCP_SKB_CB(skb)->seq,
-		   TCP_SKB_CB(tp->retransmit_skb_hint)->seq))
+	if ((!tp->retransmit_skb_hint && tp->retrans_out >= tp->lost_out) ||
+	    (tp->retransmit_skb_hint &&
+	     before(TCP_SKB_CB(skb)->seq,
+		    TCP_SKB_CB(tp->retransmit_skb_hint)->seq)))
 		tp->retransmit_skb_hint = skb;
 }
 
-- 
cgit v1.2.3


From 7e81a35302066c5a00b4c72d83e3ea4cad6eeb5b Mon Sep 17 00:00:00 2001
From: John Fastabend <john.fastabend@gmail.com>
Date: Sat, 11 Jan 2020 06:12:00 +0000
Subject: bpf: Sockmap, ensure sock lock held during tear down

The sock_map_free() and sock_hash_free() paths used to delete sockmap
and sockhash maps walk the maps and destroy psock and bpf state associated
with the socks in the map. When done the socks no longer have BPF programs
attached and will function normally. This can happen while the socks in
the map are still "live" meaning data may be sent/received during the walk.

Currently, though we don't take the sock_lock when the psock and bpf state
is removed through this path. Specifically, this means we can be writing
into the ops structure pointers such as sendmsg, sendpage, recvmsg, etc.
while they are also being called from the networking side. This is not
safe, we never used proper READ_ONCE/WRITE_ONCE semantics here if we
believed it was safe. Further its not clear to me its even a good idea
to try and do this on "live" sockets while networking side might also
be using the socket. Instead of trying to reason about using the socks
from both sides lets realize that every use case I'm aware of rarely
deletes maps, in fact kubernetes/Cilium case builds map at init and
never tears it down except on errors. So lets do the simple fix and
grab sock lock.

This patch wraps sock deletes from maps in sock lock and adds some
annotations so we catch any other cases easier.

Fixes: 604326b41a6fb ("bpf, sockmap: convert to generic sk_msg interface")
Signed-off-by: John Fastabend <john.fastabend@gmail.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Song Liu <songliubraving@fb.com>
Cc: stable@vger.kernel.org
Link: https://lore.kernel.org/bpf/20200111061206.8028-3-john.fastabend@gmail.com
---
 net/core/skmsg.c    | 2 ++
 net/core/sock_map.c | 7 ++++++-
 2 files changed, 8 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/core/skmsg.c b/net/core/skmsg.c
index ded2d5227678..3866d7e20c07 100644
--- a/net/core/skmsg.c
+++ b/net/core/skmsg.c
@@ -594,6 +594,8 @@ EXPORT_SYMBOL_GPL(sk_psock_destroy);
 
 void sk_psock_drop(struct sock *sk, struct sk_psock *psock)
 {
+	sock_owned_by_me(sk);
+
 	sk_psock_cork_free(psock);
 	sk_psock_zap_ingress(psock);
 
diff --git a/net/core/sock_map.c b/net/core/sock_map.c
index eb114ee419b6..8998e356f423 100644
--- a/net/core/sock_map.c
+++ b/net/core/sock_map.c
@@ -241,8 +241,11 @@ static void sock_map_free(struct bpf_map *map)
 		struct sock *sk;
 
 		sk = xchg(psk, NULL);
-		if (sk)
+		if (sk) {
+			lock_sock(sk);
 			sock_map_unref(sk, psk);
+			release_sock(sk);
+		}
 	}
 	raw_spin_unlock_bh(&stab->lock);
 	rcu_read_unlock();
@@ -862,7 +865,9 @@ static void sock_hash_free(struct bpf_map *map)
 		raw_spin_lock_bh(&bucket->lock);
 		hlist_for_each_entry_safe(elem, node, &bucket->head, node) {
 			hlist_del_rcu(&elem->node);
+			lock_sock(elem->sk);
 			sock_map_unref(elem->sk, elem);
+			release_sock(elem->sk);
 		}
 		raw_spin_unlock_bh(&bucket->lock);
 	}
-- 
cgit v1.2.3


From 33bfe20dd7117dd81fd896a53f743a233e1ad64f Mon Sep 17 00:00:00 2001
From: John Fastabend <john.fastabend@gmail.com>
Date: Sat, 11 Jan 2020 06:12:01 +0000
Subject: bpf: Sockmap/tls, push write_space updates through ulp updates

When sockmap sock with TLS enabled is removed we cleanup bpf/psock state
and call tcp_update_ulp() to push updates to TLS ULP on top. However, we
don't push the write_space callback up and instead simply overwrite the
op with the psock stored previous op. This may or may not be correct so
to ensure we don't overwrite the TLS write space hook pass this field to
the ULP and have it fixup the ctx.

This completes a previous fix that pushed the ops through to the ULP
but at the time missed doing this for write_space, presumably because
write_space TLS hook was added around the same time.

Fixes: 95fa145479fbc ("bpf: sockmap/tls, close can race with map free")
Signed-off-by: John Fastabend <john.fastabend@gmail.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Reviewed-by: Jakub Sitnicki <jakub@cloudflare.com>
Acked-by: Jonathan Lemon <jonathan.lemon@gmail.com>
Cc: stable@vger.kernel.org
Link: https://lore.kernel.org/bpf/20200111061206.8028-4-john.fastabend@gmail.com
---
 include/linux/skmsg.h | 12 ++++++++----
 include/net/tcp.h     |  6 ++++--
 net/ipv4/tcp_ulp.c    |  6 ++++--
 net/tls/tls_main.c    | 10 +++++++---
 4 files changed, 23 insertions(+), 11 deletions(-)

(limited to 'net')

diff --git a/include/linux/skmsg.h b/include/linux/skmsg.h
index b6afe01f8592..14d61bba0b79 100644
--- a/include/linux/skmsg.h
+++ b/include/linux/skmsg.h
@@ -359,17 +359,21 @@ static inline void sk_psock_restore_proto(struct sock *sk,
 					  struct sk_psock *psock)
 {
 	sk->sk_prot->unhash = psock->saved_unhash;
-	sk->sk_write_space = psock->saved_write_space;
 
 	if (psock->sk_proto) {
 		struct inet_connection_sock *icsk = inet_csk(sk);
 		bool has_ulp = !!icsk->icsk_ulp_data;
 
-		if (has_ulp)
-			tcp_update_ulp(sk, psock->sk_proto);
-		else
+		if (has_ulp) {
+			tcp_update_ulp(sk, psock->sk_proto,
+				       psock->saved_write_space);
+		} else {
 			sk->sk_prot = psock->sk_proto;
+			sk->sk_write_space = psock->saved_write_space;
+		}
 		psock->sk_proto = NULL;
+	} else {
+		sk->sk_write_space = psock->saved_write_space;
 	}
 }
 
diff --git a/include/net/tcp.h b/include/net/tcp.h
index e460ea7f767b..e6f48384dc71 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -2147,7 +2147,8 @@ struct tcp_ulp_ops {
 	/* initialize ulp */
 	int (*init)(struct sock *sk);
 	/* update ulp */
-	void (*update)(struct sock *sk, struct proto *p);
+	void (*update)(struct sock *sk, struct proto *p,
+		       void (*write_space)(struct sock *sk));
 	/* cleanup ulp */
 	void (*release)(struct sock *sk);
 	/* diagnostic */
@@ -2162,7 +2163,8 @@ void tcp_unregister_ulp(struct tcp_ulp_ops *type);
 int tcp_set_ulp(struct sock *sk, const char *name);
 void tcp_get_available_ulp(char *buf, size_t len);
 void tcp_cleanup_ulp(struct sock *sk);
-void tcp_update_ulp(struct sock *sk, struct proto *p);
+void tcp_update_ulp(struct sock *sk, struct proto *p,
+		    void (*write_space)(struct sock *sk));
 
 #define MODULE_ALIAS_TCP_ULP(name)				\
 	__MODULE_INFO(alias, alias_userspace, name);		\
diff --git a/net/ipv4/tcp_ulp.c b/net/ipv4/tcp_ulp.c
index 12ab5db2b71c..38d3ad141161 100644
--- a/net/ipv4/tcp_ulp.c
+++ b/net/ipv4/tcp_ulp.c
@@ -99,17 +99,19 @@ void tcp_get_available_ulp(char *buf, size_t maxlen)
 	rcu_read_unlock();
 }
 
-void tcp_update_ulp(struct sock *sk, struct proto *proto)
+void tcp_update_ulp(struct sock *sk, struct proto *proto,
+		    void (*write_space)(struct sock *sk))
 {
 	struct inet_connection_sock *icsk = inet_csk(sk);
 
 	if (!icsk->icsk_ulp_ops) {
+		sk->sk_write_space = write_space;
 		sk->sk_prot = proto;
 		return;
 	}
 
 	if (icsk->icsk_ulp_ops->update)
-		icsk->icsk_ulp_ops->update(sk, proto);
+		icsk->icsk_ulp_ops->update(sk, proto, write_space);
 }
 
 void tcp_cleanup_ulp(struct sock *sk)
diff --git a/net/tls/tls_main.c b/net/tls/tls_main.c
index dac24c7aa7d4..94774c0e5ff3 100644
--- a/net/tls/tls_main.c
+++ b/net/tls/tls_main.c
@@ -732,15 +732,19 @@ out:
 	return rc;
 }
 
-static void tls_update(struct sock *sk, struct proto *p)
+static void tls_update(struct sock *sk, struct proto *p,
+		       void (*write_space)(struct sock *sk))
 {
 	struct tls_context *ctx;
 
 	ctx = tls_get_ctx(sk);
-	if (likely(ctx))
+	if (likely(ctx)) {
+		ctx->sk_write_space = write_space;
 		ctx->sk_proto = p;
-	else
+	} else {
 		sk->sk_prot = p;
+		sk->sk_write_space = write_space;
+	}
 }
 
 static int tls_get_info(const struct sock *sk, struct sk_buff *skb)
-- 
cgit v1.2.3


From 6562e29cf6f0ddd368657d97a8d484ffc30df5ef Mon Sep 17 00:00:00 2001
From: John Fastabend <john.fastabend@gmail.com>
Date: Sat, 11 Jan 2020 06:12:02 +0000
Subject: bpf: Sockmap, skmsg helper overestimates push, pull, and pop bounds

In the push, pull, and pop helpers operating on skmsg objects to make
data writable or insert/remove data we use this bounds check to ensure
specified data is valid,

 /* Bounds checks: start and pop must be inside message */
 if (start >= offset + l || last >= msg->sg.size)
     return -EINVAL;

The problem here is offset has already included the length of the
current element the 'l' above. So start could be past the end of
the scatterlist element in the case where start also points into an
offset on the last skmsg element.

To fix do the accounting slightly different by adding the length of
the previous entry to offset at the start of the iteration. And
ensure its initialized to zero so that the first iteration does
nothing.

Fixes: 604326b41a6fb ("bpf, sockmap: convert to generic sk_msg interface")
Fixes: 6fff607e2f14b ("bpf: sk_msg program helper bpf_msg_push_data")
Fixes: 7246d8ed4dcce ("bpf: helper to pop data from messages")
Signed-off-by: John Fastabend <john.fastabend@gmail.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Song Liu <songliubraving@fb.com>
Cc: stable@vger.kernel.org
Link: https://lore.kernel.org/bpf/20200111061206.8028-5-john.fastabend@gmail.com
---
 net/core/filter.c | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

(limited to 'net')

diff --git a/net/core/filter.c b/net/core/filter.c
index d22d108fc6e3..ffa2278020d7 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -2231,10 +2231,10 @@ BPF_CALL_4(bpf_msg_pull_data, struct sk_msg *, msg, u32, start,
 	/* First find the starting scatterlist element */
 	i = msg->sg.start;
 	do {
+		offset += len;
 		len = sk_msg_elem(msg, i)->length;
 		if (start < offset + len)
 			break;
-		offset += len;
 		sk_msg_iter_var_next(i);
 	} while (i != msg->sg.end);
 
@@ -2346,7 +2346,7 @@ BPF_CALL_4(bpf_msg_push_data, struct sk_msg *, msg, u32, start,
 	   u32, len, u64, flags)
 {
 	struct scatterlist sge, nsge, nnsge, rsge = {0}, *psge;
-	u32 new, i = 0, l, space, copy = 0, offset = 0;
+	u32 new, i = 0, l = 0, space, copy = 0, offset = 0;
 	u8 *raw, *to, *from;
 	struct page *page;
 
@@ -2356,11 +2356,11 @@ BPF_CALL_4(bpf_msg_push_data, struct sk_msg *, msg, u32, start,
 	/* First find the starting scatterlist element */
 	i = msg->sg.start;
 	do {
+		offset += l;
 		l = sk_msg_elem(msg, i)->length;
 
 		if (start < offset + l)
 			break;
-		offset += l;
 		sk_msg_iter_var_next(i);
 	} while (i != msg->sg.end);
 
@@ -2506,7 +2506,7 @@ static void sk_msg_shift_right(struct sk_msg *msg, int i)
 BPF_CALL_4(bpf_msg_pop_data, struct sk_msg *, msg, u32, start,
 	   u32, len, u64, flags)
 {
-	u32 i = 0, l, space, offset = 0;
+	u32 i = 0, l = 0, space, offset = 0;
 	u64 last = start + len;
 	int pop;
 
@@ -2516,11 +2516,11 @@ BPF_CALL_4(bpf_msg_pop_data, struct sk_msg *, msg, u32, start,
 	/* First find the starting scatterlist element */
 	i = msg->sg.start;
 	do {
+		offset += l;
 		l = sk_msg_elem(msg, i)->length;
 
 		if (start < offset + l)
 			break;
-		offset += l;
 		sk_msg_iter_var_next(i);
 	} while (i != msg->sg.end);
 
-- 
cgit v1.2.3


From cf21e9ba1eb86c9333ca5b05b2f1cc94021bcaef Mon Sep 17 00:00:00 2001
From: John Fastabend <john.fastabend@gmail.com>
Date: Sat, 11 Jan 2020 06:12:03 +0000
Subject: bpf: Sockmap/tls, msg_push_data may leave end mark in place

Leaving an incorrect end mark in place when passing to crypto
layer will cause crypto layer to stop processing data before
all data is encrypted. To fix clear the end mark on push
data instead of expecting users of the helper to clear the
mark value after the fact.

This happens when we push data into the middle of a skmsg and
have room for it so we don't do a set of copies that already
clear the end flag.

Fixes: 6fff607e2f14b ("bpf: sk_msg program helper bpf_msg_push_data")
Signed-off-by: John Fastabend <john.fastabend@gmail.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Song Liu <songliubraving@fb.com>
Cc: stable@vger.kernel.org
Link: https://lore.kernel.org/bpf/20200111061206.8028-6-john.fastabend@gmail.com
---
 net/core/filter.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'net')

diff --git a/net/core/filter.c b/net/core/filter.c
index ffa2278020d7..538f6a735a19 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -2415,6 +2415,7 @@ BPF_CALL_4(bpf_msg_push_data, struct sk_msg *, msg, u32, start,
 
 		sk_msg_iter_var_next(i);
 		sg_unmark_end(psge);
+		sg_unmark_end(&rsge);
 		sk_msg_iter_next(msg, end);
 	}
 
-- 
cgit v1.2.3


From d468e4775c1c351616947ba0cccc43273963b9b5 Mon Sep 17 00:00:00 2001
From: John Fastabend <john.fastabend@gmail.com>
Date: Sat, 11 Jan 2020 06:12:04 +0000
Subject: bpf: Sockmap/tls, tls_sw can create a plaintext buf > encrypt buf

It is possible to build a plaintext buffer using push helper that is larger
than the allocated encrypt buffer. When this record is pushed to crypto
layers this can result in a NULL pointer dereference because the crypto
API expects the encrypt buffer is large enough to fit the plaintext
buffer. Kernel splat below.

To resolve catch the cases this can happen and split the buffer into two
records to send individually. Unfortunately, there is still one case to
handle where the split creates a zero sized buffer. In this case we merge
the buffers and unmark the split. This happens when apply is zero and user
pushed data beyond encrypt buffer. This fixes the original case as well
because the split allocated an encrypt buffer larger than the plaintext
buffer and the merge simply moves the pointers around so we now have
a reference to the new (larger) encrypt buffer.

Perhaps its not ideal but it seems the best solution for a fixes branch
and avoids handling these two cases, (a) apply that needs split and (b)
non apply case. The are edge cases anyways so optimizing them seems not
necessary unless someone wants later in next branches.

[  306.719107] BUG: kernel NULL pointer dereference, address: 0000000000000008
[...]
[  306.747260] RIP: 0010:scatterwalk_copychunks+0x12f/0x1b0
[...]
[  306.770350] Call Trace:
[  306.770956]  scatterwalk_map_and_copy+0x6c/0x80
[  306.772026]  gcm_enc_copy_hash+0x4b/0x50
[  306.772925]  gcm_hash_crypt_remain_continue+0xef/0x110
[  306.774138]  gcm_hash_crypt_continue+0xa1/0xb0
[  306.775103]  ? gcm_hash_crypt_continue+0xa1/0xb0
[  306.776103]  gcm_hash_assoc_remain_continue+0x94/0xa0
[  306.777170]  gcm_hash_assoc_continue+0x9d/0xb0
[  306.778239]  gcm_hash_init_continue+0x8f/0xa0
[  306.779121]  gcm_hash+0x73/0x80
[  306.779762]  gcm_encrypt_continue+0x6d/0x80
[  306.780582]  crypto_gcm_encrypt+0xcb/0xe0
[  306.781474]  crypto_aead_encrypt+0x1f/0x30
[  306.782353]  tls_push_record+0x3b9/0xb20 [tls]
[  306.783314]  ? sk_psock_msg_verdict+0x199/0x300
[  306.784287]  bpf_exec_tx_verdict+0x3f2/0x680 [tls]
[  306.785357]  tls_sw_sendmsg+0x4a3/0x6a0 [tls]

test_sockmap test signature to trigger bug,

[TEST]: (1, 1, 1, sendmsg, pass,redir,start 1,end 2,pop (1,2),ktls,):

Fixes: d3b18ad31f93d ("tls: add bpf support to sk_msg handling")
Signed-off-by: John Fastabend <john.fastabend@gmail.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Jonathan Lemon <jonathan.lemon@gmail.com>
Cc: stable@vger.kernel.org
Link: https://lore.kernel.org/bpf/20200111061206.8028-7-john.fastabend@gmail.com
---
 net/tls/tls_sw.c | 20 ++++++++++++++++++++
 1 file changed, 20 insertions(+)

(limited to 'net')

diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c
index c6803a82b769..31f6bbbc8992 100644
--- a/net/tls/tls_sw.c
+++ b/net/tls/tls_sw.c
@@ -682,12 +682,32 @@ static int tls_push_record(struct sock *sk, int flags,
 
 	split_point = msg_pl->apply_bytes;
 	split = split_point && split_point < msg_pl->sg.size;
+	if (unlikely((!split &&
+		      msg_pl->sg.size +
+		      prot->overhead_size > msg_en->sg.size) ||
+		     (split &&
+		      split_point +
+		      prot->overhead_size > msg_en->sg.size))) {
+		split = true;
+		split_point = msg_en->sg.size;
+	}
 	if (split) {
 		rc = tls_split_open_record(sk, rec, &tmp, msg_pl, msg_en,
 					   split_point, prot->overhead_size,
 					   &orig_end);
 		if (rc < 0)
 			return rc;
+		/* This can happen if above tls_split_open_record allocates
+		 * a single large encryption buffer instead of two smaller
+		 * ones. In this case adjust pointers and continue without
+		 * split.
+		 */
+		if (!msg_pl->sg.size) {
+			tls_merge_open_record(sk, rec, tmp, orig_end);
+			msg_pl = &rec->msg_plaintext;
+			msg_en = &rec->msg_encrypted;
+			split = false;
+		}
 		sk_msg_trim(sk, msg_en, msg_pl->sg.size +
 			    prot->overhead_size);
 	}
-- 
cgit v1.2.3


From 9aaaa56845a06aeabdd597cbe19492dc01f281ec Mon Sep 17 00:00:00 2001
From: John Fastabend <john.fastabend@gmail.com>
Date: Sat, 11 Jan 2020 06:12:05 +0000
Subject: bpf: Sockmap/tls, skmsg can have wrapped skmsg that needs extra
 chaining

Its possible through a set of push, pop, apply helper calls to construct
a skmsg, which is just a ring of scatterlist elements, with the start
value larger than the end value. For example,

      end       start
  |_0_|_1_| ... |_n_|_n+1_|

Where end points at 1 and start points and n so that valid elements is
the set {n, n+1, 0, 1}.

Currently, because we don't build the correct chain only {n, n+1} will
be sent. This adds a check and sg_chain call to correctly submit the
above to the crypto and tls send path.

Fixes: d3b18ad31f93d ("tls: add bpf support to sk_msg handling")
Signed-off-by: John Fastabend <john.fastabend@gmail.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Jonathan Lemon <jonathan.lemon@gmail.com>
Cc: stable@vger.kernel.org
Link: https://lore.kernel.org/bpf/20200111061206.8028-8-john.fastabend@gmail.com
---
 net/tls/tls_sw.c | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'net')

diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c
index 31f6bbbc8992..21c7725d17ca 100644
--- a/net/tls/tls_sw.c
+++ b/net/tls/tls_sw.c
@@ -729,6 +729,12 @@ static int tls_push_record(struct sock *sk, int flags,
 		sg_mark_end(sk_msg_elem(msg_pl, i));
 	}
 
+	if (msg_pl->sg.end < msg_pl->sg.start) {
+		sg_chain(&msg_pl->sg.data[msg_pl->sg.start],
+			 MAX_SKB_FRAGS - msg_pl->sg.start + 1,
+			 msg_pl->sg.data);
+	}
+
 	i = msg_pl->sg.start;
 	sg_chain(rec->sg_aead_in, 2, &msg_pl->sg.data[i]);
 
-- 
cgit v1.2.3


From 7361d44896ff20d48bdd502d1a0cd66308055d45 Mon Sep 17 00:00:00 2001
From: John Fastabend <john.fastabend@gmail.com>
Date: Sat, 11 Jan 2020 06:12:06 +0000
Subject: bpf: Sockmap/tls, fix pop data with SK_DROP return code

When user returns SK_DROP we need to reset the number of copied bytes
to indicate to the user the bytes were dropped and not sent. If we
don't reset the copied arg sendmsg will return as if those bytes were
copied giving the user a positive return value.

This works as expected today except in the case where the user also
pops bytes. In the pop case the sg.size is reduced but we don't correctly
account for this when copied bytes is reset. The popped bytes are not
accounted for and we return a small positive value potentially confusing
the user.

The reason this happens is due to a typo where we do the wrong comparison
when accounting for pop bytes. In this fix notice the if/else is not
needed and that we have a similar problem if we push data except its not
visible to the user because if delta is larger the sg.size we return a
negative value so it appears as an error regardless.

Fixes: 7246d8ed4dcce ("bpf: helper to pop data from messages")
Signed-off-by: John Fastabend <john.fastabend@gmail.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Jonathan Lemon <jonathan.lemon@gmail.com>
Cc: stable@vger.kernel.org
Link: https://lore.kernel.org/bpf/20200111061206.8028-9-john.fastabend@gmail.com
---
 net/ipv4/tcp_bpf.c | 5 +----
 net/tls/tls_sw.c   | 5 +----
 2 files changed, 2 insertions(+), 8 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/tcp_bpf.c b/net/ipv4/tcp_bpf.c
index e6b08b5a0895..8a01428f80c1 100644
--- a/net/ipv4/tcp_bpf.c
+++ b/net/ipv4/tcp_bpf.c
@@ -315,10 +315,7 @@ more_data:
 		 */
 		delta = msg->sg.size;
 		psock->eval = sk_psock_msg_verdict(sk, psock, msg);
-		if (msg->sg.size < delta)
-			delta -= msg->sg.size;
-		else
-			delta = 0;
+		delta -= msg->sg.size;
 	}
 
 	if (msg->cork_bytes &&
diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c
index 21c7725d17ca..159d49dab403 100644
--- a/net/tls/tls_sw.c
+++ b/net/tls/tls_sw.c
@@ -809,10 +809,7 @@ more_data:
 	if (psock->eval == __SK_NONE) {
 		delta = msg->sg.size;
 		psock->eval = sk_psock_msg_verdict(sk, psock, msg);
-		if (delta < msg->sg.size)
-			delta -= msg->sg.size;
-		else
-			delta = 0;
+		delta -= msg->sg.size;
 	}
 	if (msg->cork_bytes && msg->cork_bytes > msg->sg.size &&
 	    !enospc && !full_record) {
-- 
cgit v1.2.3


From ad32205470919c8e04cdd33e0613bdba50c2376d Mon Sep 17 00:00:00 2001
From: Alexander Lobakin <alobakin@dlink.ru>
Date: Wed, 15 Jan 2020 11:54:38 +0300
Subject: net: dsa: tag_gswip: fix typo in tagger name

The correct name is GSWIP (Gigabit Switch IP). Typo was introduced in
875138f81d71a ("dsa: Move tagger name into its ops structure") while
moving tagger names to their structures.

Fixes: 875138f81d71a ("dsa: Move tagger name into its ops structure")
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: Alexander Lobakin <alobakin@dlink.ru>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Acked-by: Hauke Mehrtens <hauke@hauke-m.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/dsa/tag_gswip.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/dsa/tag_gswip.c b/net/dsa/tag_gswip.c
index b678160bbd66..408d4af390a0 100644
--- a/net/dsa/tag_gswip.c
+++ b/net/dsa/tag_gswip.c
@@ -104,7 +104,7 @@ static struct sk_buff *gswip_tag_rcv(struct sk_buff *skb,
 }
 
 static const struct dsa_device_ops gswip_netdev_ops = {
-	.name = "gwsip",
+	.name = "gswip",
 	.proto	= DSA_TAG_PROTO_GSWIP,
 	.xmit = gswip_tag_xmit,
 	.rcv = gswip_tag_rcv,
-- 
cgit v1.2.3


From bd5874da57edd001b35cf28ae737779498c16a56 Mon Sep 17 00:00:00 2001
From: Alexander Lobakin <alobakin@dlink.ru>
Date: Wed, 15 Jan 2020 11:56:52 +0300
Subject: net: dsa: tag_qca: fix doubled Tx statistics

DSA subsystem takes care of netdev statistics since commit 4ed70ce9f01c
("net: dsa: Refactor transmit path to eliminate duplication"), so
any accounting inside tagger callbacks is redundant and can lead to
messing up the stats.
This bug is present in Qualcomm tagger since day 0.

Fixes: cafdc45c949b ("net-next: dsa: add Qualcomm tag RX/TX handler")
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: Alexander Lobakin <alobakin@dlink.ru>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/dsa/tag_qca.c | 3 ---
 1 file changed, 3 deletions(-)

(limited to 'net')

diff --git a/net/dsa/tag_qca.c b/net/dsa/tag_qca.c
index c95885215525..c8a128c9e5e0 100644
--- a/net/dsa/tag_qca.c
+++ b/net/dsa/tag_qca.c
@@ -33,9 +33,6 @@ static struct sk_buff *qca_tag_xmit(struct sk_buff *skb, struct net_device *dev)
 	struct dsa_port *dp = dsa_slave_to_port(dev);
 	u16 *phdr, hdr;
 
-	dev->stats.tx_packets++;
-	dev->stats.tx_bytes += skb->len;
-
 	if (skb_cow_head(skb, 0) < 0)
 		return NULL;
 
-- 
cgit v1.2.3


From ec7470b834fe7b5d7eff11b6677f5d7fdf5e9a91 Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Mon, 13 Jan 2020 18:09:58 +0100
Subject: netfilter: nf_tables: store transaction list locally while requesting
 module

This patch fixes a WARN_ON in nft_set_destroy() due to missing
set reference count drop from the preparation phase. This is triggered
by the module autoload path. Do not exercise the abort path from
nft_request_module() while preparation phase cleaning up is still
pending.

 WARNING: CPU: 3 PID: 3456 at net/netfilter/nf_tables_api.c:3740 nft_set_destroy+0x45/0x50 [nf_tables]
 [...]
 CPU: 3 PID: 3456 Comm: nft Not tainted 5.4.6-arch3-1 #1
 RIP: 0010:nft_set_destroy+0x45/0x50 [nf_tables]
 Code: e8 30 eb 83 c6 48 8b 85 80 00 00 00 48 8b b8 90 00 00 00 e8 dd 6b d7 c5 48 8b 7d 30 e8 24 dd eb c5 48 89 ef 5d e9 6b c6 e5 c5 <0f> 0b c3 0f 1f 84 00 00 00 00 00 0f 1f 44 00 00 48 8b 7f 10 e9 52
 RSP: 0018:ffffac4f43e53700 EFLAGS: 00010202
 RAX: 0000000000000001 RBX: ffff99d63a154d80 RCX: 0000000001f88e03
 RDX: 0000000001f88c03 RSI: ffff99d6560ef0c0 RDI: ffff99d63a101200
 RBP: ffff99d617721de0 R08: 0000000000000000 R09: 0000000000000318
 R10: 00000000f0000000 R11: 0000000000000001 R12: ffffffff880fabf0
 R13: dead000000000122 R14: dead000000000100 R15: ffff99d63a154d80
 FS:  00007ff3dbd5b740(0000) GS:ffff99d6560c0000(0000) knlGS:0000000000000000
 CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
 CR2: 00001cb5de6a9000 CR3: 000000016eb6a004 CR4: 00000000001606e0
 Call Trace:
  __nf_tables_abort+0x3e3/0x6d0 [nf_tables]
  nft_request_module+0x6f/0x110 [nf_tables]
  nft_expr_type_request_module+0x28/0x50 [nf_tables]
  nf_tables_expr_parse+0x198/0x1f0 [nf_tables]
  nft_expr_init+0x3b/0xf0 [nf_tables]
  nft_dynset_init+0x1e2/0x410 [nf_tables]
  nf_tables_newrule+0x30a/0x930 [nf_tables]
  nfnetlink_rcv_batch+0x2a0/0x640 [nfnetlink]
  nfnetlink_rcv+0x125/0x171 [nfnetlink]
  netlink_unicast+0x179/0x210
  netlink_sendmsg+0x208/0x3d0
  sock_sendmsg+0x5e/0x60
  ____sys_sendmsg+0x21b/0x290

Update comment on the code to describe the new behaviour.

Reported-by: Marco Oliverio <marco.oliverio@tanaza.com>
Fixes: 452238e8d5ff ("netfilter: nf_tables: add and use helper for module autoload")
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/netfilter/nf_tables_api.c | 19 ++++++++++---------
 1 file changed, 10 insertions(+), 9 deletions(-)

(limited to 'net')

diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index 43f05b3acd60..168765d1d1c2 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -564,23 +564,21 @@ __nf_tables_chain_type_lookup(const struct nlattr *nla, u8 family)
 }
 
 /*
- * Loading a module requires dropping mutex that guards the
- * transaction.
- * We first need to abort any pending transactions as once
- * mutex is unlocked a different client could start a new
- * transaction.  It must not see any 'future generation'
- * changes * as these changes will never happen.
+ * Loading a module requires dropping mutex that guards the transaction.
+ * A different client might race to start a new transaction meanwhile. Zap the
+ * list of pending transaction and then restore it once the mutex is grabbed
+ * again. Users of this function return EAGAIN which implicitly triggers the
+ * transaction abort path to clean up the list of pending transactions.
  */
 #ifdef CONFIG_MODULES
-static int __nf_tables_abort(struct net *net);
-
 static void nft_request_module(struct net *net, const char *fmt, ...)
 {
 	char module_name[MODULE_NAME_LEN];
+	LIST_HEAD(commit_list);
 	va_list args;
 	int ret;
 
-	__nf_tables_abort(net);
+	list_splice_init(&net->nft.commit_list, &commit_list);
 
 	va_start(args, fmt);
 	ret = vsnprintf(module_name, MODULE_NAME_LEN, fmt, args);
@@ -591,6 +589,9 @@ static void nft_request_module(struct net *net, const char *fmt, ...)
 	mutex_unlock(&net->nft.commit_mutex);
 	request_module("%s", module_name);
 	mutex_lock(&net->nft.commit_mutex);
+
+	WARN_ON_ONCE(!list_empty(&net->nft.commit_list));
+	list_splice(&commit_list, &net->nft.commit_list);
 }
 #endif
 
-- 
cgit v1.2.3


From 1c702bf902bd37349f6d91cd7f4b372b1e46d0ed Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Thu, 16 Jan 2020 08:44:11 +0100
Subject: netfilter: nft_tunnel: fix null-attribute check

else we get null deref when one of the attributes is missing, both
must be non-null.

Reported-by: syzbot+76d0b80493ac881ff77b@syzkaller.appspotmail.com
Fixes: aaecfdb5c5dd8ba ("netfilter: nf_tables: match on tunnel metadata")
Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/netfilter/nft_tunnel.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/netfilter/nft_tunnel.c b/net/netfilter/nft_tunnel.c
index 3d4c2ae605a8..d89c7c553030 100644
--- a/net/netfilter/nft_tunnel.c
+++ b/net/netfilter/nft_tunnel.c
@@ -76,7 +76,7 @@ static int nft_tunnel_get_init(const struct nft_ctx *ctx,
 	struct nft_tunnel *priv = nft_expr_priv(expr);
 	u32 len;
 
-	if (!tb[NFTA_TUNNEL_KEY] &&
+	if (!tb[NFTA_TUNNEL_KEY] ||
 	    !tb[NFTA_TUNNEL_DREG])
 		return -EINVAL;
 
-- 
cgit v1.2.3


From 9ec22d7c6c69146180577f3ad5fdf504beeaee62 Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Thu, 16 Jan 2020 08:58:05 +0100
Subject: netfilter: nft_tunnel: ERSPAN_VERSION must not be null

Fixes: af308b94a2a4a5 ("netfilter: nf_tables: add tunnel support")
Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/netfilter/nft_tunnel.c | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'net')

diff --git a/net/netfilter/nft_tunnel.c b/net/netfilter/nft_tunnel.c
index d89c7c553030..5284fcf16be7 100644
--- a/net/netfilter/nft_tunnel.c
+++ b/net/netfilter/nft_tunnel.c
@@ -266,6 +266,9 @@ static int nft_tunnel_obj_erspan_init(const struct nlattr *attr,
 	if (err < 0)
 		return err;
 
+	if (!tb[NFTA_TUNNEL_KEY_ERSPAN_VERSION])
+		 return -EINVAL;
+
 	version = ntohl(nla_get_be32(tb[NFTA_TUNNEL_KEY_ERSPAN_VERSION]));
 	switch (version) {
 	case ERSPAN_VERSION:
-- 
cgit v1.2.3


From 9332d27d7918182add34e8043f6a754530fdd022 Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Thu, 16 Jan 2020 09:06:50 +0100
Subject: netfilter: nf_tables: remove WARN and add NLA_STRING upper limits

This WARN can trigger because some of the names fed to the module
autoload function can be of arbitrary length.

Remove the WARN and add limits for all NLA_STRING attributes.

Reported-by: syzbot+0e63ae76d117ae1c3a01@syzkaller.appspotmail.com
Fixes: 452238e8d5ffd8 ("netfilter: nf_tables: add and use helper for module autoload")
Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/netfilter/nf_tables_api.c | 13 +++++++++----
 1 file changed, 9 insertions(+), 4 deletions(-)

(limited to 'net')

diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index 168765d1d1c2..b3692458d428 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -22,6 +22,8 @@
 #include <net/net_namespace.h>
 #include <net/sock.h>
 
+#define NFT_MODULE_AUTOLOAD_LIMIT (MODULE_NAME_LEN - sizeof("nft-expr-255-"))
+
 static LIST_HEAD(nf_tables_expressions);
 static LIST_HEAD(nf_tables_objects);
 static LIST_HEAD(nf_tables_flowtables);
@@ -583,7 +585,7 @@ static void nft_request_module(struct net *net, const char *fmt, ...)
 	va_start(args, fmt);
 	ret = vsnprintf(module_name, MODULE_NAME_LEN, fmt, args);
 	va_end(args);
-	if (WARN(ret >= MODULE_NAME_LEN, "truncated: '%s' (len %d)", module_name, ret))
+	if (ret >= MODULE_NAME_LEN)
 		return;
 
 	mutex_unlock(&net->nft.commit_mutex);
@@ -1242,7 +1244,8 @@ static const struct nla_policy nft_chain_policy[NFTA_CHAIN_MAX + 1] = {
 				    .len = NFT_CHAIN_MAXNAMELEN - 1 },
 	[NFTA_CHAIN_HOOK]	= { .type = NLA_NESTED },
 	[NFTA_CHAIN_POLICY]	= { .type = NLA_U32 },
-	[NFTA_CHAIN_TYPE]	= { .type = NLA_STRING },
+	[NFTA_CHAIN_TYPE]	= { .type = NLA_STRING,
+				    .len = NFT_MODULE_AUTOLOAD_LIMIT },
 	[NFTA_CHAIN_COUNTERS]	= { .type = NLA_NESTED },
 	[NFTA_CHAIN_FLAGS]	= { .type = NLA_U32 },
 };
@@ -2356,7 +2359,8 @@ static const struct nft_expr_type *nft_expr_type_get(struct net *net,
 }
 
 static const struct nla_policy nft_expr_policy[NFTA_EXPR_MAX + 1] = {
-	[NFTA_EXPR_NAME]	= { .type = NLA_STRING },
+	[NFTA_EXPR_NAME]	= { .type = NLA_STRING,
+				    .len = NFT_MODULE_AUTOLOAD_LIMIT },
 	[NFTA_EXPR_DATA]	= { .type = NLA_NESTED },
 };
 
@@ -4199,7 +4203,8 @@ static const struct nla_policy nft_set_elem_policy[NFTA_SET_ELEM_MAX + 1] = {
 	[NFTA_SET_ELEM_USERDATA]	= { .type = NLA_BINARY,
 					    .len = NFT_USERDATA_MAXLEN },
 	[NFTA_SET_ELEM_EXPR]		= { .type = NLA_NESTED },
-	[NFTA_SET_ELEM_OBJREF]		= { .type = NLA_STRING },
+	[NFTA_SET_ELEM_OBJREF]		= { .type = NLA_STRING,
+					    .len = NFT_OBJ_MAXNAMELEN - 1 },
 };
 
 static const struct nla_policy nft_set_elem_list_policy[NFTA_SET_ELEM_LIST_MAX + 1] = {
-- 
cgit v1.2.3


From cd77e75b5e4632d05d61fec5383052713c8ede2b Mon Sep 17 00:00:00 2001
From: Dan Carpenter <dan.carpenter@oracle.com>
Date: Thu, 16 Jan 2020 13:09:31 +0300
Subject: netfilter: nf_tables: fix memory leak in
 nf_tables_parse_netdev_hooks()

Syzbot detected a leak in nf_tables_parse_netdev_hooks().  If the hook
already exists, then the error handling doesn't free the newest "hook".

Reported-by: syzbot+f9d4095107fc8749c69c@syzkaller.appspotmail.com
Fixes: b75a3e8371bc ("netfilter: nf_tables: allow netdevice to be used only once per flowtable")
Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
Reviewed-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/netfilter/nf_tables_api.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'net')

diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index b3692458d428..896a6e8aff91 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -1680,6 +1680,7 @@ static int nf_tables_parse_netdev_hooks(struct net *net,
 			goto err_hook;
 		}
 		if (nft_hook_list_find(hook_list, hook)) {
+			kfree(hook);
 			err = -EEXIST;
 			goto err_hook;
 		}
-- 
cgit v1.2.3


From 335178d5429c4cee61b58f4ac80688f556630818 Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Thu, 16 Jan 2020 12:03:01 +0100
Subject: netfilter: nf_tables: fix flowtable list del corruption

syzbot reported following crash:

  list_del corruption, ffff88808c9bb000->prev is LIST_POISON2 (dead000000000122)
  [..]
  Call Trace:
   __list_del_entry include/linux/list.h:131 [inline]
   list_del_rcu include/linux/rculist.h:148 [inline]
   nf_tables_commit+0x1068/0x3b30 net/netfilter/nf_tables_api.c:7183
   [..]

The commit transaction list has:

NFT_MSG_NEWTABLE
NFT_MSG_NEWFLOWTABLE
NFT_MSG_DELFLOWTABLE
NFT_MSG_DELTABLE

A missing generation check during DELTABLE processing causes it to queue
the DELFLOWTABLE operation a second time, so we corrupt the list here:

  case NFT_MSG_DELFLOWTABLE:
     list_del_rcu(&nft_trans_flowtable(trans)->list);
     nf_tables_flowtable_notify(&trans->ctx,

because we have two different DELFLOWTABLE transactions for the same
flowtable.  We then call list_del_rcu() twice for the same flowtable->list.

The object handling seems to suffer from the same bug so add a generation
check too and only queue delete transactions for flowtables/objects that
are still active in the next generation.

Reported-by: syzbot+37a6804945a3a13b1572@syzkaller.appspotmail.com
Fixes: 3b49e2e94e6eb ("netfilter: nf_tables: add flow table netlink frontend")
Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/netfilter/nf_tables_api.c | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'net')

diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index 896a6e8aff91..65f51a2e9c2a 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -1048,12 +1048,18 @@ static int nft_flush_table(struct nft_ctx *ctx)
 	}
 
 	list_for_each_entry_safe(flowtable, nft, &ctx->table->flowtables, list) {
+		if (!nft_is_active_next(ctx->net, flowtable))
+			continue;
+
 		err = nft_delflowtable(ctx, flowtable);
 		if (err < 0)
 			goto out;
 	}
 
 	list_for_each_entry_safe(obj, ne, &ctx->table->objects, list) {
+		if (!nft_is_active_next(ctx->net, obj))
+			continue;
+
 		err = nft_delobj(ctx, obj);
 		if (err < 0)
 			goto out;
-- 
cgit v1.2.3


From 61177e911dad660df86a4553eb01c95ece2f6a82 Mon Sep 17 00:00:00 2001
From: Eyal Birger <eyal.birger@gmail.com>
Date: Tue, 14 Jan 2020 10:03:50 +0200
Subject: netfilter: nat: fix ICMP header corruption on ICMP errors

Commit 8303b7e8f018 ("netfilter: nat: fix spurious connection timeouts")
made nf_nat_icmp_reply_translation() use icmp_manip_pkt() as the l4
manipulation function for the outer packet on ICMP errors.

However, icmp_manip_pkt() assumes the packet has an 'id' field which
is not correct for all types of ICMP messages.

This is not correct for ICMP error packets, and leads to bogus bytes
being written the ICMP header, which can be wrongfully regarded as
'length' bytes by RFC 4884 compliant receivers.

Fix by assigning the 'id' field only for ICMP messages that have this
semantic.

Reported-by: Shmulik Ladkani <shmulik.ladkani@gmail.com>
Fixes: 8303b7e8f018 ("netfilter: nat: fix spurious connection timeouts")
Signed-off-by: Eyal Birger <eyal.birger@gmail.com>
Acked-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/netfilter/nf_nat_proto.c | 13 +++++++++++++
 1 file changed, 13 insertions(+)

(limited to 'net')

diff --git a/net/netfilter/nf_nat_proto.c b/net/netfilter/nf_nat_proto.c
index 0a59c14b5177..64eedc17037a 100644
--- a/net/netfilter/nf_nat_proto.c
+++ b/net/netfilter/nf_nat_proto.c
@@ -233,6 +233,19 @@ icmp_manip_pkt(struct sk_buff *skb,
 		return false;
 
 	hdr = (struct icmphdr *)(skb->data + hdroff);
+	switch (hdr->type) {
+	case ICMP_ECHO:
+	case ICMP_ECHOREPLY:
+	case ICMP_TIMESTAMP:
+	case ICMP_TIMESTAMPREPLY:
+	case ICMP_INFO_REQUEST:
+	case ICMP_INFO_REPLY:
+	case ICMP_ADDRESS:
+	case ICMP_ADDRESSREPLY:
+		break;
+	default:
+		return true;
+	}
 	inet_proto_csum_replace2(&hdr->checksum, skb,
 				 hdr->un.echo.id, tuple->src.u.icmp.id, false);
 	hdr->un.echo.id = tuple->src.u.icmp.id;
-- 
cgit v1.2.3


From 44c23d71599f81a1c7fe8389e0319822dd50c37c Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Wed, 15 Jan 2020 08:20:39 -0800
Subject: net/sched: act_ife: initalize ife->metalist earlier

It seems better to init ife->metalist earlier in tcf_ife_init()
to avoid the following crash :

kasan: CONFIG_KASAN_INLINE enabled
kasan: GPF could be caused by NULL-ptr deref or user memory access
general protection fault: 0000 [#1] PREEMPT SMP KASAN
CPU: 0 PID: 10483 Comm: syz-executor216 Not tainted 5.5.0-rc5-syzkaller #0
Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011
RIP: 0010:_tcf_ife_cleanup net/sched/act_ife.c:412 [inline]
RIP: 0010:tcf_ife_cleanup+0x6e/0x400 net/sched/act_ife.c:431
Code: 48 c1 ea 03 80 3c 02 00 0f 85 94 03 00 00 49 8b bd f8 00 00 00 48 b8 00 00 00 00 00 fc ff df 4c 8d 67 e8 48 89 fa 48 c1 ea 03 <80> 3c 02 00 0f 85 5c 03 00 00 48 bb 00 00 00 00 00 fc ff df 48 8b
RSP: 0018:ffffc90001dc6d00 EFLAGS: 00010246
RAX: dffffc0000000000 RBX: ffffffff864619c0 RCX: ffffffff815bfa09
RDX: 0000000000000000 RSI: 0000000000000004 RDI: 0000000000000000
RBP: ffffc90001dc6d50 R08: 0000000000000004 R09: fffff520003b8d8e
R10: fffff520003b8d8d R11: 0000000000000003 R12: ffffffffffffffe8
R13: ffff8880a79fc000 R14: ffff88809aba0e00 R15: 0000000000000000
FS:  0000000001b51880(0000) GS:ffff8880ae800000(0000) knlGS:0000000000000000
CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
CR2: 0000563f52cce140 CR3: 0000000093541000 CR4: 00000000001406f0
DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
Call Trace:
 tcf_action_cleanup+0x62/0x1b0 net/sched/act_api.c:119
 __tcf_action_put+0xfa/0x130 net/sched/act_api.c:135
 __tcf_idr_release net/sched/act_api.c:165 [inline]
 __tcf_idr_release+0x59/0xf0 net/sched/act_api.c:145
 tcf_idr_release include/net/act_api.h:171 [inline]
 tcf_ife_init+0x97c/0x1870 net/sched/act_ife.c:616
 tcf_action_init_1+0x6b6/0xa40 net/sched/act_api.c:944
 tcf_action_init+0x21a/0x330 net/sched/act_api.c:1000
 tcf_action_add+0xf5/0x3b0 net/sched/act_api.c:1410
 tc_ctl_action+0x390/0x488 net/sched/act_api.c:1465
 rtnetlink_rcv_msg+0x45e/0xaf0 net/core/rtnetlink.c:5424
 netlink_rcv_skb+0x177/0x450 net/netlink/af_netlink.c:2477
 rtnetlink_rcv+0x1d/0x30 net/core/rtnetlink.c:5442
 netlink_unicast_kernel net/netlink/af_netlink.c:1302 [inline]
 netlink_unicast+0x58c/0x7d0 net/netlink/af_netlink.c:1328
 netlink_sendmsg+0x91c/0xea0 net/netlink/af_netlink.c:1917
 sock_sendmsg_nosec net/socket.c:639 [inline]
 sock_sendmsg+0xd7/0x130 net/socket.c:659
 ____sys_sendmsg+0x753/0x880 net/socket.c:2330
 ___sys_sendmsg+0x100/0x170 net/socket.c:2384
 __sys_sendmsg+0x105/0x1d0 net/socket.c:2417
 __do_sys_sendmsg net/socket.c:2426 [inline]
 __se_sys_sendmsg net/socket.c:2424 [inline]
 __x64_sys_sendmsg+0x78/0xb0 net/socket.c:2424
 do_syscall_64+0xfa/0x790 arch/x86/entry/common.c:294
 entry_SYSCALL_64_after_hwframe+0x49/0xbe

Fixes: 11a94d7fd80f ("net/sched: act_ife: validate the control action inside init()")
Signed-off-by: Eric Dumazet <edumazet@google.com>
Reported-by: syzbot <syzkaller@googlegroups.com>
Cc: Davide Caratti <dcaratti@redhat.com>
Reviewed-by: Davide Caratti <dcaratti@redhat.com>
Acked-by: Cong Wang <xiyou.wangcong@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/act_ife.c | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

(limited to 'net')

diff --git a/net/sched/act_ife.c b/net/sched/act_ife.c
index 5e6379028fc3..c1fcd85719d6 100644
--- a/net/sched/act_ife.c
+++ b/net/sched/act_ife.c
@@ -537,6 +537,9 @@ static int tcf_ife_init(struct net *net, struct nlattr *nla,
 	}
 
 	ife = to_ife(*a);
+	if (ret == ACT_P_CREATED)
+		INIT_LIST_HEAD(&ife->metalist);
+
 	err = tcf_action_check_ctrlact(parm->action, tp, &goto_ch, extack);
 	if (err < 0)
 		goto release_idr;
@@ -566,10 +569,6 @@ static int tcf_ife_init(struct net *net, struct nlattr *nla,
 		p->eth_type = ife_type;
 	}
 
-
-	if (ret == ACT_P_CREATED)
-		INIT_LIST_HEAD(&ife->metalist);
-
 	if (tb[TCA_IFE_METALST]) {
 		err = nla_parse_nested_deprecated(tb2, IFE_META_MAX,
 						  tb[TCA_IFE_METALST], NULL,
-- 
cgit v1.2.3


From 53d374979ef147ab51f5d632dfe20b14aebeccd0 Mon Sep 17 00:00:00 2001
From: Cong Wang <xiyou.wangcong@gmail.com>
Date: Wed, 15 Jan 2020 13:02:38 -0800
Subject: net: avoid updating qdisc_xmit_lock_key in
 netdev_update_lockdep_key()

syzbot reported some bogus lockdep warnings, for example bad unlock
balance in sch_direct_xmit(). They are due to a race condition between
slow path and fast path, that is qdisc_xmit_lock_key gets re-registered
in netdev_update_lockdep_key() on slow path, while we could still
acquire the queue->_xmit_lock on fast path in this small window:

CPU A						CPU B
						__netif_tx_lock();
lockdep_unregister_key(qdisc_xmit_lock_key);
						__netif_tx_unlock();
lockdep_register_key(qdisc_xmit_lock_key);

In fact, unlike the addr_list_lock which has to be reordered when
the master/slave device relationship changes, queue->_xmit_lock is
only acquired on fast path and only when NETIF_F_LLTX is not set,
so there is likely no nested locking for it.

Therefore, we can just get rid of re-registration of
qdisc_xmit_lock_key.

Reported-by: syzbot+4ec99438ed7450da6272@syzkaller.appspotmail.com
Fixes: ab92d68fc22f ("net: core: add generic lockdep keys")
Cc: Taehee Yoo <ap420073@gmail.com>
Signed-off-by: Cong Wang <xiyou.wangcong@gmail.com>
Acked-by: Taehee Yoo <ap420073@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/dev.c | 12 ------------
 1 file changed, 12 deletions(-)

(limited to 'net')

diff --git a/net/core/dev.c b/net/core/dev.c
index 0ad39c87b7fd..7e885d069707 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -9177,22 +9177,10 @@ static void netdev_unregister_lockdep_key(struct net_device *dev)
 
 void netdev_update_lockdep_key(struct net_device *dev)
 {
-	struct netdev_queue *queue;
-	int i;
-
-	lockdep_unregister_key(&dev->qdisc_xmit_lock_key);
 	lockdep_unregister_key(&dev->addr_list_lock_key);
-
-	lockdep_register_key(&dev->qdisc_xmit_lock_key);
 	lockdep_register_key(&dev->addr_list_lock_key);
 
 	lockdep_set_class(&dev->addr_list_lock, &dev->addr_list_lock_key);
-	for (i = 0; i < dev->num_tx_queues; i++) {
-		queue = netdev_get_tx_queue(dev, i);
-
-		lockdep_set_class(&queue->_xmit_lock,
-				  &dev->qdisc_xmit_lock_key);
-	}
 }
 EXPORT_SYMBOL(netdev_update_lockdep_key);
 
-- 
cgit v1.2.3


From 09d4f10a5e78d76a53e3e584f1e6a701b6d24108 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Sat, 18 Jan 2020 20:45:06 -0800
Subject: net: sched: act_ctinfo: fix memory leak
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Implement a cleanup method to properly free ci->params

BUG: memory leak
unreferenced object 0xffff88811746e2c0 (size 64):
  comm "syz-executor617", pid 7106, jiffies 4294943055 (age 14.250s)
  hex dump (first 32 bytes):
    00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00  ................
    c0 34 60 84 ff ff ff ff 00 00 00 00 00 00 00 00  .4`.............
  backtrace:
    [<0000000015aa236f>] kmemleak_alloc_recursive include/linux/kmemleak.h:43 [inline]
    [<0000000015aa236f>] slab_post_alloc_hook mm/slab.h:586 [inline]
    [<0000000015aa236f>] slab_alloc mm/slab.c:3320 [inline]
    [<0000000015aa236f>] kmem_cache_alloc_trace+0x145/0x2c0 mm/slab.c:3549
    [<000000002c946bd1>] kmalloc include/linux/slab.h:556 [inline]
    [<000000002c946bd1>] kzalloc include/linux/slab.h:670 [inline]
    [<000000002c946bd1>] tcf_ctinfo_init+0x21a/0x530 net/sched/act_ctinfo.c:236
    [<0000000086952cca>] tcf_action_init_1+0x400/0x5b0 net/sched/act_api.c:944
    [<000000005ab29bf8>] tcf_action_init+0x135/0x1c0 net/sched/act_api.c:1000
    [<00000000392f56f9>] tcf_action_add+0x9a/0x200 net/sched/act_api.c:1410
    [<0000000088f3c5dd>] tc_ctl_action+0x14d/0x1bb net/sched/act_api.c:1465
    [<000000006b39d986>] rtnetlink_rcv_msg+0x178/0x4b0 net/core/rtnetlink.c:5424
    [<00000000fd6ecace>] netlink_rcv_skb+0x61/0x170 net/netlink/af_netlink.c:2477
    [<0000000047493d02>] rtnetlink_rcv+0x1d/0x30 net/core/rtnetlink.c:5442
    [<00000000bdcf8286>] netlink_unicast_kernel net/netlink/af_netlink.c:1302 [inline]
    [<00000000bdcf8286>] netlink_unicast+0x223/0x310 net/netlink/af_netlink.c:1328
    [<00000000fc5b92d9>] netlink_sendmsg+0x2c0/0x570 net/netlink/af_netlink.c:1917
    [<00000000da84d076>] sock_sendmsg_nosec net/socket.c:639 [inline]
    [<00000000da84d076>] sock_sendmsg+0x54/0x70 net/socket.c:659
    [<0000000042fb2eee>] ____sys_sendmsg+0x2d0/0x300 net/socket.c:2330
    [<000000008f23f67e>] ___sys_sendmsg+0x8a/0xd0 net/socket.c:2384
    [<00000000d838e4f6>] __sys_sendmsg+0x80/0xf0 net/socket.c:2417
    [<00000000289a9cb1>] __do_sys_sendmsg net/socket.c:2426 [inline]
    [<00000000289a9cb1>] __se_sys_sendmsg net/socket.c:2424 [inline]
    [<00000000289a9cb1>] __x64_sys_sendmsg+0x23/0x30 net/socket.c:2424

Fixes: 24ec483cec98 ("net: sched: Introduce act_ctinfo action")
Signed-off-by: Eric Dumazet <edumazet@google.com>
Reported-by: syzbot <syzkaller@googlegroups.com>
Cc: Kevin 'ldir' Darbyshire-Bryant <ldir@darbyshire-bryant.me.uk>
Cc: Cong Wang <xiyou.wangcong@gmail.com>
Cc: Toke Høiland-Jørgensen <toke@redhat.com>
Acked-by: Kevin 'ldir' Darbyshire-Bryant <ldir@darbyshire-bryant.me.uk>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/act_ctinfo.c | 11 +++++++++++
 1 file changed, 11 insertions(+)

(limited to 'net')

diff --git a/net/sched/act_ctinfo.c b/net/sched/act_ctinfo.c
index 40038c321b4a..19649623493b 100644
--- a/net/sched/act_ctinfo.c
+++ b/net/sched/act_ctinfo.c
@@ -360,6 +360,16 @@ static int tcf_ctinfo_search(struct net *net, struct tc_action **a, u32 index)
 	return tcf_idr_search(tn, a, index);
 }
 
+static void tcf_ctinfo_cleanup(struct tc_action *a)
+{
+	struct tcf_ctinfo *ci = to_ctinfo(a);
+	struct tcf_ctinfo_params *cp;
+
+	cp = rcu_dereference_protected(ci->params, 1);
+	if (cp)
+		kfree_rcu(cp, rcu);
+}
+
 static struct tc_action_ops act_ctinfo_ops = {
 	.kind	= "ctinfo",
 	.id	= TCA_ID_CTINFO,
@@ -367,6 +377,7 @@ static struct tc_action_ops act_ctinfo_ops = {
 	.act	= tcf_ctinfo_act,
 	.dump	= tcf_ctinfo_dump,
 	.init	= tcf_ctinfo_init,
+	.cleanup= tcf_ctinfo_cleanup,
 	.walk	= tcf_ctinfo_walker,
 	.lookup	= tcf_ctinfo_search,
 	.size	= sizeof(struct tcf_ctinfo),
-- 
cgit v1.2.3