From 4429b668e0375206408617d6440e3bb76c56c7d2 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Sat, 5 Jan 2019 16:06:07 +0300 Subject: xprtrdma: Fix error code in rpcrdma_buffer_create() This should return -ENOMEM if __alloc_workqueue_key() fails, but it returns success. Fixes: 6d2d0ee27c7a ("xprtrdma: Replace rpcrdma_receive_wq with a per-xprt workqueue") Signed-off-by: Dan Carpenter Reviewed-by: Chuck Lever Signed-off-by: Anna Schumaker --- net/sunrpc/xprtrdma/verbs.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c index 7749a2bf6887..3dde05892c8e 100644 --- a/net/sunrpc/xprtrdma/verbs.c +++ b/net/sunrpc/xprtrdma/verbs.c @@ -1113,8 +1113,10 @@ rpcrdma_buffer_create(struct rpcrdma_xprt *r_xprt) WQ_MEM_RECLAIM | WQ_HIGHPRI, 0, r_xprt->rx_xprt.address_strings[RPC_DISPLAY_ADDR]); - if (!buf->rb_completion_wq) + if (!buf->rb_completion_wq) { + rc = -ENOMEM; goto out; + } return 0; out: -- cgit v1.2.3 From 6e17f58c486d9554341f70aa5b63b8fbed07b3fa Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Sat, 5 Jan 2019 16:06:48 +0300 Subject: xprtrdma: Double free in rpcrdma_sendctxs_create() The clean up is handled by the caller, rpcrdma_buffer_create(), so this call to rpcrdma_sendctxs_destroy() leads to a double free. Fixes: ae72950abf99 ("xprtrdma: Add data structure to manage RDMA Send arguments") Signed-off-by: Dan Carpenter Reviewed-by: Chuck Lever Signed-off-by: Anna Schumaker --- net/sunrpc/xprtrdma/verbs.c | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) (limited to 'net') diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c index 3dde05892c8e..4994e75945b8 100644 --- a/net/sunrpc/xprtrdma/verbs.c +++ b/net/sunrpc/xprtrdma/verbs.c @@ -845,17 +845,13 @@ static int rpcrdma_sendctxs_create(struct rpcrdma_xprt *r_xprt) for (i = 0; i <= buf->rb_sc_last; i++) { sc = rpcrdma_sendctx_create(&r_xprt->rx_ia); if (!sc) - goto out_destroy; + return -ENOMEM; sc->sc_xprt = r_xprt; buf->rb_sc_ctxs[i] = sc; } return 0; - -out_destroy: - rpcrdma_sendctxs_destroy(buf); - return -ENOMEM; } /* The sendctx queue is not guaranteed to have a size that is a -- cgit v1.2.3 From 6a829eb8619fbdde6d7d627ad582fe119805f39d Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Thu, 3 Jan 2019 09:04:45 -0500 Subject: SUNRPC: Fix TCP receive code on archs with flush_dcache_page() After receiving data into the page cache, we need to call flush_dcache_page() for the architectures that define it. Fixes: 277e4ab7d530b ("SUNRPC: Simplify TCP receive code by switching...") Reported-by: Geert Uytterhoeven Signed-off-by: Trond Myklebust Cc: stable@vger.kernel.org # v4.20 Tested-by: Geert Uytterhoeven Signed-off-by: Anna Schumaker --- net/sunrpc/xprtsock.c | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) (limited to 'net') diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index 13559e6a460b..7754aa3e434f 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -48,6 +48,7 @@ #include #include #include +#include #include #include @@ -376,6 +377,26 @@ xs_read_discard(struct socket *sock, struct msghdr *msg, int flags, return sock_recvmsg(sock, msg, flags); } +#if ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE +static void +xs_flush_bvec(const struct bio_vec *bvec, size_t count, size_t seek) +{ + struct bvec_iter bi = { + .bi_size = count, + }; + struct bio_vec bv; + + bvec_iter_advance(bvec, &bi, seek & PAGE_MASK); + for_each_bvec(bv, bvec, bi, bi) + flush_dcache_page(bv.bv_page); +} +#else +static inline void +xs_flush_bvec(const struct bio_vec *bvec, size_t count, size_t seek) +{ +} +#endif + static ssize_t xs_read_xdr_buf(struct socket *sock, struct msghdr *msg, int flags, struct xdr_buf *buf, size_t count, size_t seek, size_t *read) @@ -409,6 +430,7 @@ xs_read_xdr_buf(struct socket *sock, struct msghdr *msg, int flags, seek + buf->page_base); if (ret <= 0) goto sock_err; + xs_flush_bvec(buf->bvec, ret, seek + buf->page_base); offset += ret - buf->page_base; if (offset == count || msg->msg_flags & (MSG_EOR|MSG_TRUNC)) goto out; -- cgit v1.2.3 From 310529e663ed975d564cf029f878583e70c3b8a3 Mon Sep 17 00:00:00 2001 From: Phil Sutter Date: Sun, 30 Dec 2018 17:27:43 +0100 Subject: netfilter: nf_tables: Fix for endless loop when dumping ruleset __nf_tables_dump_rules() stores the current idx value into cb->args[0] before returning to caller. With multiple chains present, cb->args[0] is therefore updated after each chain's rules have been traversed. This though causes the final nf_tables_dump_rules() run (which should return an skb->len of zero since no rules are left to dump) to continue dumping rules for each but the first chain. Fix this by moving the cb->args[0] update to nf_tables_dump_rules(). With no final action to be performed anymore in __nf_tables_dump_rules(), drop 'out_unfinished' jump label and 'rc' variable - instead return the appropriate value directly. Fixes: 241faeceb849c ("netfilter: nf_tables: Speed up selective rule dumps") Signed-off-by: Phil Sutter Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_tables_api.c | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) (limited to 'net') diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 2b0a93300dd7..e3ddd8e95e58 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -2304,7 +2304,6 @@ static int __nf_tables_dump_rules(struct sk_buff *skb, struct net *net = sock_net(skb->sk); unsigned int s_idx = cb->args[0]; const struct nft_rule *rule; - int rc = 1; list_for_each_entry_rcu(rule, &chain->rules, list) { if (!nft_is_active(net, rule)) @@ -2321,16 +2320,13 @@ static int __nf_tables_dump_rules(struct sk_buff *skb, NLM_F_MULTI | NLM_F_APPEND, table->family, table, chain, rule) < 0) - goto out_unfinished; + return 1; nl_dump_check_consistent(cb, nlmsg_hdr(skb)); cont: (*idx)++; } - rc = 0; -out_unfinished: - cb->args[0] = *idx; - return rc; + return 0; } static int nf_tables_dump_rules(struct sk_buff *skb, @@ -2382,6 +2378,8 @@ static int nf_tables_dump_rules(struct sk_buff *skb, } done: rcu_read_unlock(); + + cb->args[0] = idx; return skb->len; } -- cgit v1.2.3 From b91d9036883793122cf6575ca4dfbfbdd201a83d Mon Sep 17 00:00:00 2001 From: Taehee Yoo Date: Fri, 4 Jan 2019 17:56:16 +0900 Subject: netfilter: nf_tables: fix leaking object reference count There is no code that decreases the reference count of stateful objects in error path of the nft_add_set_elem(). this causes a leak of reference count of stateful objects. Test commands: $nft add table ip filter $nft add counter ip filter c1 $nft add map ip filter m1 { type ipv4_addr : counter \;} $nft add element ip filter m1 { 1 : c1 } $nft add element ip filter m1 { 1 : c1 } $nft delete element ip filter m1 { 1 } $nft delete counter ip filter c1 Result: Error: Could not process rule: Device or resource busy delete counter ip filter c1 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^ At the second 'nft add element ip filter m1 { 1 : c1 }', the reference count of the 'c1' is increased then it tries to insert into the 'm1'. but the 'm1' already has same element so it returns -EEXIST. But it doesn't decrease the reference count of the 'c1' in the error path. Due to a leak of the reference count of the 'c1', the 'c1' can't be removed by 'nft delete counter ip filter c1'. Fixes: 8aeff920dcc9 ("netfilter: nf_tables: add stateful object reference to set elements") Signed-off-by: Taehee Yoo Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_tables_api.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'net') diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index e3ddd8e95e58..dcea979423bc 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -4506,6 +4506,8 @@ err6: err5: kfree(trans); err4: + if (obj) + obj->use--; kfree(elem.priv); err3: if (nla[NFTA_SET_ELEM_DATA] != NULL) -- cgit v1.2.3 From 715849ab31f8e57bbad84cc6c38912aeba6beb21 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Tue, 8 Jan 2019 23:18:58 +0100 Subject: netfilter: nf_tables: selective rule dump needs table to be specified Table needs to be specified for selective rule dumps per chain. Fixes: 241faeceb849c ("netfilter: nf_tables: Speed up selective rule dumps") Reported-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_tables_api.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index dcea979423bc..fb07f6cfc719 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -2350,7 +2350,7 @@ static int nf_tables_dump_rules(struct sk_buff *skb, if (ctx && ctx->table && strcmp(ctx->table, table->name) != 0) continue; - if (ctx && ctx->chain) { + if (ctx && ctx->table && ctx->chain) { struct rhlist_head *list, *tmp; list = rhltable_lookup(&table->chains_ht, ctx->chain, -- cgit v1.2.3 From e7f45099442a380f8e087b6a8aadc36e887df1cc Mon Sep 17 00:00:00 2001 From: Santosh kumar pradhan Date: Wed, 9 Jan 2019 22:08:26 +0530 Subject: sunrpc: kernel BUG at kernel/cred.c:825! Init missing debug member magic with CRED_MAGIC. Signed-off-by: Santosh kumar pradhan Reported-by: Dave Jones Signed-off-by: Anna Schumaker --- net/sunrpc/auth.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'net') diff --git a/net/sunrpc/auth.c b/net/sunrpc/auth.c index 1ff9768f5456..f3023bbc0b7f 100644 --- a/net/sunrpc/auth.c +++ b/net/sunrpc/auth.c @@ -41,6 +41,9 @@ static unsigned long number_cred_unused; static struct cred machine_cred = { .usage = ATOMIC_INIT(1), +#ifdef CONFIG_DEBUG_CREDENTIALS + .magic = CRED_MAGIC, +#endif }; /* -- cgit v1.2.3 From a799aea0988ea0d1b1f263e996fdad2f6133c680 Mon Sep 17 00:00:00 2001 From: wenxu Date: Wed, 9 Jan 2019 10:40:11 +0800 Subject: netfilter: nft_flow_offload: Fix reverse route lookup Using the following example: client 1.1.1.7 ---> 2.2.2.7 which dnat to 10.0.0.7 server The first reply packet (ie. syn+ack) uses an incorrect destination address for the reverse route lookup since it uses: daddr = ct->tuplehash[!dir].tuple.dst.u3.ip; which is 2.2.2.7 in the scenario that is described above, while this should be: daddr = ct->tuplehash[dir].tuple.src.u3.ip; that is 10.0.0.7. Signed-off-by: wenxu Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nft_flow_offload.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/netfilter/nft_flow_offload.c b/net/netfilter/nft_flow_offload.c index 974525eb92df..ccdb8f5ababb 100644 --- a/net/netfilter/nft_flow_offload.c +++ b/net/netfilter/nft_flow_offload.c @@ -29,10 +29,10 @@ static int nft_flow_route(const struct nft_pktinfo *pkt, memset(&fl, 0, sizeof(fl)); switch (nft_pf(pkt)) { case NFPROTO_IPV4: - fl.u.ip4.daddr = ct->tuplehash[!dir].tuple.dst.u3.ip; + fl.u.ip4.daddr = ct->tuplehash[dir].tuple.src.u3.ip; break; case NFPROTO_IPV6: - fl.u.ip6.daddr = ct->tuplehash[!dir].tuple.dst.u3.in6; + fl.u.ip6.daddr = ct->tuplehash[dir].tuple.src.u3.in6; break; } -- cgit v1.2.3 From e2c8d550a973bb34fc28bc8d0ec996f84562fb8a Mon Sep 17 00:00:00 2001 From: Shakeel Butt Date: Wed, 2 Jan 2019 19:14:31 -0800 Subject: netfilter: ebtables: account ebt_table_info to kmemcg The [ip,ip6,arp]_tables use x_tables_info internally and the underlying memory is already accounted to kmemcg. Do the same for ebtables. The syzbot, by using setsockopt(EBT_SO_SET_ENTRIES), was able to OOM the whole system from a restricted memcg, a potential DoS. By accounting the ebt_table_info, the memory used for ebt_table_info can be contained within the memcg of the allocating process. However the lifetime of ebt_table_info is independent of the allocating process and is tied to the network namespace. So, the oom-killer will not be able to relieve the memory pressure due to ebt_table_info memory. The memory for ebt_table_info is allocated through vmalloc. Currently vmalloc does not handle the oom-killed allocating process correctly and one large allocation can bypass memcg limit enforcement. So, with this patch, at least the small allocations will be contained. For large allocations, we need to fix vmalloc. Reported-by: syzbot+7713f3aa67be76b1552c@syzkaller.appspotmail.com Signed-off-by: Shakeel Butt Reviewed-by: Kirill Tkhai Signed-off-by: Pablo Neira Ayuso --- net/bridge/netfilter/ebtables.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c index 491828713e0b..5e55cef0cec3 100644 --- a/net/bridge/netfilter/ebtables.c +++ b/net/bridge/netfilter/ebtables.c @@ -1137,14 +1137,16 @@ static int do_replace(struct net *net, const void __user *user, tmp.name[sizeof(tmp.name) - 1] = 0; countersize = COUNTER_OFFSET(tmp.nentries) * nr_cpu_ids; - newinfo = vmalloc(sizeof(*newinfo) + countersize); + newinfo = __vmalloc(sizeof(*newinfo) + countersize, GFP_KERNEL_ACCOUNT, + PAGE_KERNEL); if (!newinfo) return -ENOMEM; if (countersize) memset(newinfo->counters, 0, countersize); - newinfo->entries = vmalloc(tmp.entries_size); + newinfo->entries = __vmalloc(tmp.entries_size, GFP_KERNEL_ACCOUNT, + PAGE_KERNEL); if (!newinfo->entries) { ret = -ENOMEM; goto free_newinfo; -- cgit v1.2.3 From 10f4e765879e514e1ce7f52ed26603047af196e2 Mon Sep 17 00:00:00 2001 From: wenxu Date: Thu, 10 Jan 2019 14:51:35 +0800 Subject: netfilter: nft_flow_offload: fix interaction with vrf slave device In the forward chain, the iif is changed from slave device to master vrf device. Thus, flow offload does not find a match on the lower slave device. This patch uses the cached route, ie. dst->dev, to update the iif and oif fields in the flow entry. After this patch, the following example works fine: # ip addr add dev eth0 1.1.1.1/24 # ip addr add dev eth1 10.0.0.1/24 # ip link add user1 type vrf table 1 # ip l set user1 up # ip l set dev eth0 master user1 # ip l set dev eth1 master user1 # nft add table firewall # nft add flowtable f fb1 { hook ingress priority 0 \; devices = { eth0, eth1 } \; } # nft add chain f ftb-all {type filter hook forward priority 0 \; policy accept \; } # nft add rule f ftb-all ct zone 1 ip protocol tcp flow offload @fb1 # nft add rule f ftb-all ct zone 1 ip protocol udp flow offload @fb1 Signed-off-by: wenxu Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_flow_table.h | 1 - net/netfilter/nf_flow_table_core.c | 5 +++-- net/netfilter/nft_flow_offload.c | 4 ++-- 3 files changed, 5 insertions(+), 5 deletions(-) (limited to 'net') diff --git a/include/net/netfilter/nf_flow_table.h b/include/net/netfilter/nf_flow_table.h index 7d5cda7ce32a..3e370cb36263 100644 --- a/include/net/netfilter/nf_flow_table.h +++ b/include/net/netfilter/nf_flow_table.h @@ -84,7 +84,6 @@ struct flow_offload { struct nf_flow_route { struct { struct dst_entry *dst; - int ifindex; } tuple[FLOW_OFFLOAD_DIR_MAX]; }; diff --git a/net/netfilter/nf_flow_table_core.c b/net/netfilter/nf_flow_table_core.c index fa0844e2a68d..c0c72ae9df42 100644 --- a/net/netfilter/nf_flow_table_core.c +++ b/net/netfilter/nf_flow_table_core.c @@ -28,6 +28,7 @@ flow_offload_fill_dir(struct flow_offload *flow, struct nf_conn *ct, { struct flow_offload_tuple *ft = &flow->tuplehash[dir].tuple; struct nf_conntrack_tuple *ctt = &ct->tuplehash[dir].tuple; + struct dst_entry *other_dst = route->tuple[!dir].dst; struct dst_entry *dst = route->tuple[dir].dst; ft->dir = dir; @@ -50,8 +51,8 @@ flow_offload_fill_dir(struct flow_offload *flow, struct nf_conn *ct, ft->src_port = ctt->src.u.tcp.port; ft->dst_port = ctt->dst.u.tcp.port; - ft->iifidx = route->tuple[dir].ifindex; - ft->oifidx = route->tuple[!dir].ifindex; + ft->iifidx = other_dst->dev->ifindex; + ft->oifidx = dst->dev->ifindex; ft->dst_cache = dst; } diff --git a/net/netfilter/nft_flow_offload.c b/net/netfilter/nft_flow_offload.c index ccdb8f5ababb..188c6bbf4e16 100644 --- a/net/netfilter/nft_flow_offload.c +++ b/net/netfilter/nft_flow_offload.c @@ -30,9 +30,11 @@ static int nft_flow_route(const struct nft_pktinfo *pkt, switch (nft_pf(pkt)) { case NFPROTO_IPV4: fl.u.ip4.daddr = ct->tuplehash[dir].tuple.src.u3.ip; + fl.u.ip4.flowi4_oif = nft_in(pkt)->ifindex; break; case NFPROTO_IPV6: fl.u.ip6.daddr = ct->tuplehash[dir].tuple.src.u3.in6; + fl.u.ip6.flowi6_oif = nft_in(pkt)->ifindex; break; } @@ -41,9 +43,7 @@ static int nft_flow_route(const struct nft_pktinfo *pkt, return -ENOENT; route->tuple[dir].dst = this_dst; - route->tuple[dir].ifindex = nft_in(pkt)->ifindex; route->tuple[!dir].dst = other_dst; - route->tuple[!dir].ifindex = nft_out(pkt)->ifindex; return 0; } -- cgit v1.2.3 From 2314e879747e82896f51cce4488f6a00f3e1af7b Mon Sep 17 00:00:00 2001 From: Henry Yen Date: Mon, 14 Jan 2019 17:59:43 +0800 Subject: netfilter: nft_flow_offload: fix checking method of conntrack helper This patch uses nfct_help() to detect whether an established connection needs conntrack helper instead of using test_bit(IPS_HELPER_BIT, &ct->status). The reason is that IPS_HELPER_BIT is only set when using explicit CT target. However, in the case that a device enables conntrack helper via command "echo 1 > /proc/sys/net/netfilter/nf_conntrack_helper", the status of IPS_HELPER_BIT will not present any change, and consequently it loses the checking ability in the context. Signed-off-by: Henry Yen Reviewed-by: Ryder Lee Tested-by: John Crispin Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nft_flow_offload.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/netfilter/nft_flow_offload.c b/net/netfilter/nft_flow_offload.c index 188c6bbf4e16..6e6b9adf7d38 100644 --- a/net/netfilter/nft_flow_offload.c +++ b/net/netfilter/nft_flow_offload.c @@ -12,6 +12,7 @@ #include #include #include +#include struct nft_flow_offload { struct nft_flowtable *flowtable; @@ -66,6 +67,7 @@ static void nft_flow_offload_eval(const struct nft_expr *expr, { struct nft_flow_offload *priv = nft_expr_priv(expr); struct nf_flowtable *flowtable = &priv->flowtable->data; + const struct nf_conn_help *help; enum ip_conntrack_info ctinfo; struct nf_flow_route route; struct flow_offload *flow; @@ -88,7 +90,8 @@ static void nft_flow_offload_eval(const struct nft_expr *expr, goto out; } - if (test_bit(IPS_HELPER_BIT, &ct->status)) + help = nfct_help(ct); + if (help) goto out; if (ctinfo == IP_CT_NEW || -- cgit v1.2.3 From cc5b5d3565048ae57d14e5674a5fb085b2ab0193 Mon Sep 17 00:00:00 2001 From: Krzysztof Kazimierczak Date: Thu, 10 Jan 2019 20:29:02 +0100 Subject: xsk: Check if a queue exists during umem setup MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In the xdp_umem_assign_dev() path, the xsk code does not check if a queue for which umem is to be created exists. It leads to a situation where umem is not assigned to any Tx/Rx queue of a netdevice, without notifying the stack about an error. This affects both XDP_SKB and XDP_DRV modes - in case of XDP_DRV_ZC, queue index is checked by the driver. This patch fixes xsk code, so that in both XDP_SKB and XDP_DRV mode of AF_XDP, an error is returned when requested queue index exceedes an existing maximum. Fixes: c9b47cc1fabca ("xsk: fix bug when trying to use both copy and zero-copy on one queue id") Reported-by: Jakub Spizewski Signed-off-by: Krzysztof Kazimierczak Acked-by: Björn Töpel Signed-off-by: Daniel Borkmann --- net/xdp/xdp_umem.c | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/xdp/xdp_umem.c b/net/xdp/xdp_umem.c index a264cf2accd0..d4de871e7d4d 100644 --- a/net/xdp/xdp_umem.c +++ b/net/xdp/xdp_umem.c @@ -41,13 +41,20 @@ void xdp_del_sk_umem(struct xdp_umem *umem, struct xdp_sock *xs) * not know if the device has more tx queues than rx, or the opposite. * This might also change during run time. */ -static void xdp_reg_umem_at_qid(struct net_device *dev, struct xdp_umem *umem, - u16 queue_id) +static int xdp_reg_umem_at_qid(struct net_device *dev, struct xdp_umem *umem, + u16 queue_id) { + if (queue_id >= max_t(unsigned int, + dev->real_num_rx_queues, + dev->real_num_tx_queues)) + return -EINVAL; + if (queue_id < dev->real_num_rx_queues) dev->_rx[queue_id].umem = umem; if (queue_id < dev->real_num_tx_queues) dev->_tx[queue_id].umem = umem; + + return 0; } struct xdp_umem *xdp_get_umem_from_qid(struct net_device *dev, @@ -88,7 +95,10 @@ int xdp_umem_assign_dev(struct xdp_umem *umem, struct net_device *dev, goto out_rtnl_unlock; } - xdp_reg_umem_at_qid(dev, umem, queue_id); + err = xdp_reg_umem_at_qid(dev, umem, queue_id); + if (err) + goto out_rtnl_unlock; + umem->dev = dev; umem->queue_id = queue_id; if (force_copy) -- cgit v1.2.3 From e66721f0436396f779291a29616858b76bfd9415 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 2 Jan 2019 17:53:10 -0500 Subject: SUNRPC: Ensure rq_bytes_sent is reset before request transmission When we resend a request, ensure that the 'rq_bytes_sent' is reset to zero. Signed-off-by: Trond Myklebust Signed-off-by: Anna Schumaker --- net/sunrpc/clnt.c | 1 - net/sunrpc/xprt.c | 1 + 2 files changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index 71d9599b5816..0878c793ce7f 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -1739,7 +1739,6 @@ rpc_xdr_encode(struct rpc_task *task) xdr_buf_init(&req->rq_rcv_buf, req->rq_rbuffer, req->rq_rcvsize); - req->rq_bytes_sent = 0; p = rpc_encode_header(task); if (p == NULL) { diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c index 73547d17d3c6..9075ae150ae5 100644 --- a/net/sunrpc/xprt.c +++ b/net/sunrpc/xprt.c @@ -1151,6 +1151,7 @@ xprt_request_enqueue_transmit(struct rpc_task *task) struct rpc_xprt *xprt = req->rq_xprt; if (xprt_request_need_enqueue_transmit(task, req)) { + req->rq_bytes_sent = 0; spin_lock(&xprt->queue_lock); /* * Requests that carry congestion control credits are added -- cgit v1.2.3 From 97b78ae96ba76f4ca2d8f5afee6a2e567ccb8f45 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 2 Jan 2019 17:53:13 -0500 Subject: SUNRPC: Ensure we respect the RPCSEC_GSS sequence number limit According to RFC2203, the RPCSEC_GSS sequence numbers are bounded to an upper limit of MAXSEQ = 0x80000000. Ensure that we handle that correctly. Signed-off-by: Trond Myklebust Signed-off-by: Anna Schumaker --- net/sunrpc/auth_gss/auth_gss.c | 12 +++++++++--- net/sunrpc/clnt.c | 19 ++++++++++++------- 2 files changed, 21 insertions(+), 10 deletions(-) (limited to 'net') diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c index dc86713b32b6..1531b0219344 100644 --- a/net/sunrpc/auth_gss/auth_gss.c +++ b/net/sunrpc/auth_gss/auth_gss.c @@ -1549,8 +1549,10 @@ gss_marshal(struct rpc_task *task, __be32 *p) cred_len = p++; spin_lock(&ctx->gc_seq_lock); - req->rq_seqno = ctx->gc_seq++; + req->rq_seqno = (ctx->gc_seq < MAXSEQ) ? ctx->gc_seq++ : MAXSEQ; spin_unlock(&ctx->gc_seq_lock); + if (req->rq_seqno == MAXSEQ) + goto out_expired; *p++ = htonl((u32) RPC_GSS_VERSION); *p++ = htonl((u32) ctx->gc_proc); @@ -1572,14 +1574,18 @@ gss_marshal(struct rpc_task *task, __be32 *p) mic.data = (u8 *)(p + 1); maj_stat = gss_get_mic(ctx->gc_gss_ctx, &verf_buf, &mic); if (maj_stat == GSS_S_CONTEXT_EXPIRED) { - clear_bit(RPCAUTH_CRED_UPTODATE, &cred->cr_flags); + goto out_expired; } else if (maj_stat != 0) { - printk("gss_marshal: gss_get_mic FAILED (%d)\n", maj_stat); + pr_warn("gss_marshal: gss_get_mic FAILED (%d)\n", maj_stat); + task->tk_status = -EIO; goto out_put_ctx; } p = xdr_encode_opaque(p, NULL, mic.len); gss_put_ctx(ctx); return p; +out_expired: + clear_bit(RPCAUTH_CRED_UPTODATE, &cred->cr_flags); + task->tk_status = -EKEYEXPIRED; out_put_ctx: gss_put_ctx(ctx); return NULL; diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index 0878c793ce7f..d7ec6132c046 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -1741,11 +1741,8 @@ rpc_xdr_encode(struct rpc_task *task) req->rq_rcvsize); p = rpc_encode_header(task); - if (p == NULL) { - printk(KERN_INFO "RPC: couldn't encode RPC header, exit EIO\n"); - rpc_exit(task, -EIO); + if (p == NULL) return; - } encode = task->tk_msg.rpc_proc->p_encode; if (encode == NULL) @@ -1770,10 +1767,17 @@ call_encode(struct rpc_task *task) /* Did the encode result in an error condition? */ if (task->tk_status != 0) { /* Was the error nonfatal? */ - if (task->tk_status == -EAGAIN || task->tk_status == -ENOMEM) + switch (task->tk_status) { + case -EAGAIN: + case -ENOMEM: rpc_delay(task, HZ >> 4); - else + break; + case -EKEYEXPIRED: + task->tk_action = call_refresh; + break; + default: rpc_exit(task, task->tk_status); + } return; } @@ -2335,7 +2339,8 @@ rpc_encode_header(struct rpc_task *task) *p++ = htonl(clnt->cl_vers); /* program version */ *p++ = htonl(task->tk_msg.rpc_proc->p_proc); /* procedure */ p = rpcauth_marshcred(task, p); - req->rq_slen = xdr_adjust_iovec(&req->rq_svec[0], p); + if (p) + req->rq_slen = xdr_adjust_iovec(&req->rq_svec[0], p); return p; } -- cgit v1.2.3 From deaa5c96c2f7e8b934088a1e70a0fe8797bd1149 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Wed, 9 Jan 2019 10:04:57 -0500 Subject: SUNRPC: Address Kerberos performance/behavior regression When using Kerberos with v4.20, I've observed frequent connection loss on heavy workloads. I traced it down to the client underrunning the GSS sequence number window -- NFS servers are required to drop the RPC with the low sequence number, and also drop the connection to signal that an RPC was dropped. Bisected to commit 918f3c1fe83c ("SUNRPC: Improve latency for interactive tasks"). I've got a one-line workaround for this issue, which is easy to backport to v4.20 while a more permanent solution is being derived. Essentially, tk_owner-based sorting is disabled for RPCs that carry a GSS sequence number. Fixes: 918f3c1fe83c ("SUNRPC: Improve latency for interactive ... ") Signed-off-by: Chuck Lever Signed-off-by: Anna Schumaker --- net/sunrpc/xprt.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c index 9075ae150ae5..f1ec2110efeb 100644 --- a/net/sunrpc/xprt.c +++ b/net/sunrpc/xprt.c @@ -1178,7 +1178,7 @@ xprt_request_enqueue_transmit(struct rpc_task *task) INIT_LIST_HEAD(&req->rq_xmit2); goto out; } - } else { + } else if (!req->rq_seqno) { list_for_each_entry(pos, &xprt->xmit_queue, rq_xmit) { if (pos->rq_task->tk_owner != task->tk_owner) continue; -- cgit v1.2.3 From ab5098fa25b91cb6fe0a0676f17abb64f2bbf024 Mon Sep 17 00:00:00 2001 From: Olivier Matz Date: Wed, 9 Jan 2019 10:57:21 +0100 Subject: ip6_gre: fix tunnel list corruption for x-netns In changelink ops, the ip6gre_net pointer is retrieved from dev_net(dev), which is wrong in case of x-netns. Thus, the tunnel is not unlinked from its current list and is relinked into another net namespace. This corrupts the tunnel lists and can later trigger a kernel oops. Fix this by retrieving the netns from device private area. Fixes: c8632fc30bb0 ("net: ip6_gre: Split up ip6gre_changelink()") Cc: Petr Machata Signed-off-by: Olivier Matz Acked-by: Nicolas Dichtel Signed-off-by: David S. Miller --- net/ipv6/ip6_gre.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c index 09d0826742f8..f2543df50035 100644 --- a/net/ipv6/ip6_gre.c +++ b/net/ipv6/ip6_gre.c @@ -2025,9 +2025,9 @@ static int ip6gre_changelink(struct net_device *dev, struct nlattr *tb[], struct nlattr *data[], struct netlink_ext_ack *extack) { - struct ip6gre_net *ign = net_generic(dev_net(dev), ip6gre_net_id); + struct ip6_tnl *t = netdev_priv(dev); + struct ip6gre_net *ign = net_generic(t->net, ip6gre_net_id); struct __ip6_tnl_parm p; - struct ip6_tnl *t; t = ip6gre_changelink_common(dev, tb, data, &p, extack); if (IS_ERR(t)) -- cgit v1.2.3 From f97f4dd8b3bb9d0993d2491e0f22024c68109184 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Wed, 9 Jan 2019 09:57:39 +0000 Subject: net: ipv4: Fix memory leak in network namespace dismantle IPv4 routing tables are flushed in two cases: 1. In response to events in the netdev and inetaddr notification chains 2. When a network namespace is being dismantled In both cases only routes associated with a dead nexthop group are flushed. However, a nexthop group will only be marked as dead in case it is populated with actual nexthops using a nexthop device. This is not the case when the route in question is an error route (e.g., 'blackhole', 'unreachable'). Therefore, when a network namespace is being dismantled such routes are not flushed and leaked [1]. To reproduce: # ip netns add blue # ip -n blue route add unreachable 192.0.2.0/24 # ip netns del blue Fix this by not skipping error routes that are not marked with RTNH_F_DEAD when flushing the routing tables. To prevent the flushing of such routes in case #1, add a parameter to fib_table_flush() that indicates if the table is flushed as part of namespace dismantle or not. Note that this problem does not exist in IPv6 since error routes are associated with the loopback device. [1] unreferenced object 0xffff888066650338 (size 56): comm "ip", pid 1206, jiffies 4294786063 (age 26.235s) hex dump (first 32 bytes): 00 00 00 00 00 00 00 00 b0 1c 62 61 80 88 ff ff ..........ba.... e8 8b a1 64 80 88 ff ff 00 07 00 08 fe 00 00 00 ...d............ backtrace: [<00000000856ed27d>] inet_rtm_newroute+0x129/0x220 [<00000000fcdfc00a>] rtnetlink_rcv_msg+0x397/0xa20 [<00000000cb85801a>] netlink_rcv_skb+0x132/0x380 [<00000000ebc991d2>] netlink_unicast+0x4c0/0x690 [<0000000014f62875>] netlink_sendmsg+0x929/0xe10 [<00000000bac9d967>] sock_sendmsg+0xc8/0x110 [<00000000223e6485>] ___sys_sendmsg+0x77a/0x8f0 [<000000002e94f880>] __sys_sendmsg+0xf7/0x250 [<00000000ccb1fa72>] do_syscall_64+0x14d/0x610 [<00000000ffbe3dae>] entry_SYSCALL_64_after_hwframe+0x49/0xbe [<000000003a8b605b>] 0xffffffffffffffff unreferenced object 0xffff888061621c88 (size 48): comm "ip", pid 1206, jiffies 4294786063 (age 26.235s) hex dump (first 32 bytes): 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b kkkkkkkkkkkkkkkk 6b 6b 6b 6b 6b 6b 6b 6b d8 8e 26 5f 80 88 ff ff kkkkkkkk..&_.... backtrace: [<00000000733609e3>] fib_table_insert+0x978/0x1500 [<00000000856ed27d>] inet_rtm_newroute+0x129/0x220 [<00000000fcdfc00a>] rtnetlink_rcv_msg+0x397/0xa20 [<00000000cb85801a>] netlink_rcv_skb+0x132/0x380 [<00000000ebc991d2>] netlink_unicast+0x4c0/0x690 [<0000000014f62875>] netlink_sendmsg+0x929/0xe10 [<00000000bac9d967>] sock_sendmsg+0xc8/0x110 [<00000000223e6485>] ___sys_sendmsg+0x77a/0x8f0 [<000000002e94f880>] __sys_sendmsg+0xf7/0x250 [<00000000ccb1fa72>] do_syscall_64+0x14d/0x610 [<00000000ffbe3dae>] entry_SYSCALL_64_after_hwframe+0x49/0xbe [<000000003a8b605b>] 0xffffffffffffffff Fixes: 8cced9eff1d4 ("[NETNS]: Enable routing configuration in non-initial namespace.") Signed-off-by: Ido Schimmel Reviewed-by: David Ahern Signed-off-by: David S. Miller --- include/net/ip_fib.h | 2 +- net/ipv4/fib_frontend.c | 4 ++-- net/ipv4/fib_trie.c | 15 ++++++++++++--- 3 files changed, 15 insertions(+), 6 deletions(-) (limited to 'net') diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h index c5969762a8f4..9c8214d2116d 100644 --- a/include/net/ip_fib.h +++ b/include/net/ip_fib.h @@ -241,7 +241,7 @@ int fib_table_delete(struct net *, struct fib_table *, struct fib_config *, struct netlink_ext_ack *extack); int fib_table_dump(struct fib_table *table, struct sk_buff *skb, struct netlink_callback *cb, struct fib_dump_filter *filter); -int fib_table_flush(struct net *net, struct fib_table *table); +int fib_table_flush(struct net *net, struct fib_table *table, bool flush_all); struct fib_table *fib_trie_unmerge(struct fib_table *main_tb); void fib_table_flush_external(struct fib_table *table); void fib_free_table(struct fib_table *tb); diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index 6df95be96311..fe4f6a624238 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -203,7 +203,7 @@ static void fib_flush(struct net *net) struct fib_table *tb; hlist_for_each_entry_safe(tb, tmp, head, tb_hlist) - flushed += fib_table_flush(net, tb); + flushed += fib_table_flush(net, tb, false); } if (flushed) @@ -1463,7 +1463,7 @@ static void ip_fib_net_exit(struct net *net) hlist_for_each_entry_safe(tb, tmp, head, tb_hlist) { hlist_del(&tb->tb_hlist); - fib_table_flush(net, tb); + fib_table_flush(net, tb, true); fib_free_table(tb); } } diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c index 237c9f72b265..a573e37e0615 100644 --- a/net/ipv4/fib_trie.c +++ b/net/ipv4/fib_trie.c @@ -1856,7 +1856,7 @@ void fib_table_flush_external(struct fib_table *tb) } /* Caller must hold RTNL. */ -int fib_table_flush(struct net *net, struct fib_table *tb) +int fib_table_flush(struct net *net, struct fib_table *tb, bool flush_all) { struct trie *t = (struct trie *)tb->tb_data; struct key_vector *pn = t->kv; @@ -1904,8 +1904,17 @@ int fib_table_flush(struct net *net, struct fib_table *tb) hlist_for_each_entry_safe(fa, tmp, &n->leaf, fa_list) { struct fib_info *fi = fa->fa_info; - if (!fi || !(fi->fib_flags & RTNH_F_DEAD) || - tb->tb_id != fa->tb_id) { + if (!fi || tb->tb_id != fa->tb_id || + (!(fi->fib_flags & RTNH_F_DEAD) && + !fib_props[fa->fa_type].error)) { + slen = fa->fa_slen; + continue; + } + + /* Do not flush error routes if network namespace is + * not being dismantled + */ + if (!flush_all && fib_props[fa->fa_type].error) { slen = fa->fa_slen; continue; } -- cgit v1.2.3 From 80b3671e9377916bf2b02e56113fa7377ce5705a Mon Sep 17 00:00:00 2001 From: Hangbin Liu Date: Thu, 10 Jan 2019 11:17:42 +0800 Subject: ip6_gre: update version related info when changing link We forgot to update ip6erspan version related info when changing link, which will cause setting new hwid failed. Reported-by: Jianlin Shi Fixes: 94d7d8f292870 ("ip6_gre: add erspan v2 support") Signed-off-by: Hangbin Liu Signed-off-by: David S. Miller --- net/ipv6/ip6_gre.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'net') diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c index f2543df50035..026f08735549 100644 --- a/net/ipv6/ip6_gre.c +++ b/net/ipv6/ip6_gre.c @@ -1169,6 +1169,10 @@ static void ip6gre_tnl_copy_tnl_parm(struct ip6_tnl *t, t->parms.i_flags = p->i_flags; t->parms.o_flags = p->o_flags; t->parms.fwmark = p->fwmark; + t->parms.erspan_ver = p->erspan_ver; + t->parms.index = p->index; + t->parms.dir = p->dir; + t->parms.hwid = p->hwid; dst_cache_reset(&t->dst_cache); } -- cgit v1.2.3 From f6bab199315b70fd83fe3ee0947bc84c7a35f3d4 Mon Sep 17 00:00:00 2001 From: Toke Høiland-Jørgensen Date: Wed, 9 Jan 2019 17:09:42 +0100 Subject: sched: Avoid dereferencing skb pointer after child enqueue MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Parent qdiscs may dereference the pointer to the enqueued skb after enqueue. However, both CAKE and TBF call consume_skb() on the original skb when splitting GSO packets, leading to a potential use-after-free in the parent. Fix this by avoiding dereferencing the skb pointer after enqueueing to the child. Signed-off-by: Toke Høiland-Jørgensen Signed-off-by: David S. Miller --- net/sched/sch_cbs.c | 3 ++- net/sched/sch_drr.c | 3 ++- net/sched/sch_dsmark.c | 3 ++- net/sched/sch_hfsc.c | 5 ++--- net/sched/sch_htb.c | 3 ++- net/sched/sch_prio.c | 3 ++- net/sched/sch_qfq.c | 16 +++++++++------- net/sched/sch_tbf.c | 3 ++- 8 files changed, 23 insertions(+), 16 deletions(-) (limited to 'net') diff --git a/net/sched/sch_cbs.c b/net/sched/sch_cbs.c index e689e11b6d0f..c6a502933fe7 100644 --- a/net/sched/sch_cbs.c +++ b/net/sched/sch_cbs.c @@ -88,13 +88,14 @@ static int cbs_child_enqueue(struct sk_buff *skb, struct Qdisc *sch, struct Qdisc *child, struct sk_buff **to_free) { + unsigned int len = qdisc_pkt_len(skb); int err; err = child->ops->enqueue(skb, child, to_free); if (err != NET_XMIT_SUCCESS) return err; - qdisc_qstats_backlog_inc(sch, skb); + sch->qstats.backlog += len; sch->q.qlen++; return NET_XMIT_SUCCESS; diff --git a/net/sched/sch_drr.c b/net/sched/sch_drr.c index cdebaed0f8cf..feaf47178653 100644 --- a/net/sched/sch_drr.c +++ b/net/sched/sch_drr.c @@ -350,6 +350,7 @@ static struct drr_class *drr_classify(struct sk_buff *skb, struct Qdisc *sch, static int drr_enqueue(struct sk_buff *skb, struct Qdisc *sch, struct sk_buff **to_free) { + unsigned int len = qdisc_pkt_len(skb); struct drr_sched *q = qdisc_priv(sch); struct drr_class *cl; int err = 0; @@ -376,7 +377,7 @@ static int drr_enqueue(struct sk_buff *skb, struct Qdisc *sch, cl->deficit = cl->quantum; } - qdisc_qstats_backlog_inc(sch, skb); + sch->qstats.backlog += len; sch->q.qlen++; return err; } diff --git a/net/sched/sch_dsmark.c b/net/sched/sch_dsmark.c index f6f480784bc6..42471464ded3 100644 --- a/net/sched/sch_dsmark.c +++ b/net/sched/sch_dsmark.c @@ -199,6 +199,7 @@ static struct tcf_block *dsmark_tcf_block(struct Qdisc *sch, unsigned long cl, static int dsmark_enqueue(struct sk_buff *skb, struct Qdisc *sch, struct sk_buff **to_free) { + unsigned int len = qdisc_pkt_len(skb); struct dsmark_qdisc_data *p = qdisc_priv(sch); int err; @@ -271,7 +272,7 @@ static int dsmark_enqueue(struct sk_buff *skb, struct Qdisc *sch, return err; } - qdisc_qstats_backlog_inc(sch, skb); + sch->qstats.backlog += len; sch->q.qlen++; return NET_XMIT_SUCCESS; diff --git a/net/sched/sch_hfsc.c b/net/sched/sch_hfsc.c index b18ec1f6de60..6bb8f73a8473 100644 --- a/net/sched/sch_hfsc.c +++ b/net/sched/sch_hfsc.c @@ -1539,6 +1539,7 @@ hfsc_dump_qdisc(struct Qdisc *sch, struct sk_buff *skb) static int hfsc_enqueue(struct sk_buff *skb, struct Qdisc *sch, struct sk_buff **to_free) { + unsigned int len = qdisc_pkt_len(skb); struct hfsc_class *cl; int uninitialized_var(err); @@ -1560,8 +1561,6 @@ hfsc_enqueue(struct sk_buff *skb, struct Qdisc *sch, struct sk_buff **to_free) } if (cl->qdisc->q.qlen == 1) { - unsigned int len = qdisc_pkt_len(skb); - if (cl->cl_flags & HFSC_RSC) init_ed(cl, len); if (cl->cl_flags & HFSC_FSC) @@ -1576,7 +1575,7 @@ hfsc_enqueue(struct sk_buff *skb, struct Qdisc *sch, struct sk_buff **to_free) } - qdisc_qstats_backlog_inc(sch, skb); + sch->qstats.backlog += len; sch->q.qlen++; return NET_XMIT_SUCCESS; diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c index 58b449490757..30f9da7e1076 100644 --- a/net/sched/sch_htb.c +++ b/net/sched/sch_htb.c @@ -581,6 +581,7 @@ static int htb_enqueue(struct sk_buff *skb, struct Qdisc *sch, struct sk_buff **to_free) { int uninitialized_var(ret); + unsigned int len = qdisc_pkt_len(skb); struct htb_sched *q = qdisc_priv(sch); struct htb_class *cl = htb_classify(skb, sch, &ret); @@ -610,7 +611,7 @@ static int htb_enqueue(struct sk_buff *skb, struct Qdisc *sch, htb_activate(q, cl); } - qdisc_qstats_backlog_inc(sch, skb); + sch->qstats.backlog += len; sch->q.qlen++; return NET_XMIT_SUCCESS; } diff --git a/net/sched/sch_prio.c b/net/sched/sch_prio.c index cdf68706e40f..847141cd900f 100644 --- a/net/sched/sch_prio.c +++ b/net/sched/sch_prio.c @@ -72,6 +72,7 @@ prio_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr) static int prio_enqueue(struct sk_buff *skb, struct Qdisc *sch, struct sk_buff **to_free) { + unsigned int len = qdisc_pkt_len(skb); struct Qdisc *qdisc; int ret; @@ -88,7 +89,7 @@ prio_enqueue(struct sk_buff *skb, struct Qdisc *sch, struct sk_buff **to_free) ret = qdisc_enqueue(skb, qdisc, to_free); if (ret == NET_XMIT_SUCCESS) { - qdisc_qstats_backlog_inc(sch, skb); + sch->qstats.backlog += len; sch->q.qlen++; return NET_XMIT_SUCCESS; } diff --git a/net/sched/sch_qfq.c b/net/sched/sch_qfq.c index dc37c4ead439..8d5e55d5bed2 100644 --- a/net/sched/sch_qfq.c +++ b/net/sched/sch_qfq.c @@ -1210,6 +1210,7 @@ static struct qfq_aggregate *qfq_choose_next_agg(struct qfq_sched *q) static int qfq_enqueue(struct sk_buff *skb, struct Qdisc *sch, struct sk_buff **to_free) { + unsigned int len = qdisc_pkt_len(skb), gso_segs; struct qfq_sched *q = qdisc_priv(sch); struct qfq_class *cl; struct qfq_aggregate *agg; @@ -1224,17 +1225,17 @@ static int qfq_enqueue(struct sk_buff *skb, struct Qdisc *sch, } pr_debug("qfq_enqueue: cl = %x\n", cl->common.classid); - if (unlikely(cl->agg->lmax < qdisc_pkt_len(skb))) { + if (unlikely(cl->agg->lmax < len)) { pr_debug("qfq: increasing maxpkt from %u to %u for class %u", - cl->agg->lmax, qdisc_pkt_len(skb), cl->common.classid); - err = qfq_change_agg(sch, cl, cl->agg->class_weight, - qdisc_pkt_len(skb)); + cl->agg->lmax, len, cl->common.classid); + err = qfq_change_agg(sch, cl, cl->agg->class_weight, len); if (err) { cl->qstats.drops++; return qdisc_drop(skb, sch, to_free); } } + gso_segs = skb_is_gso(skb) ? skb_shinfo(skb)->gso_segs : 1; err = qdisc_enqueue(skb, cl->qdisc, to_free); if (unlikely(err != NET_XMIT_SUCCESS)) { pr_debug("qfq_enqueue: enqueue failed %d\n", err); @@ -1245,8 +1246,9 @@ static int qfq_enqueue(struct sk_buff *skb, struct Qdisc *sch, return err; } - bstats_update(&cl->bstats, skb); - qdisc_qstats_backlog_inc(sch, skb); + cl->bstats.bytes += len; + cl->bstats.packets += gso_segs; + sch->qstats.backlog += len; ++sch->q.qlen; agg = cl->agg; @@ -1254,7 +1256,7 @@ static int qfq_enqueue(struct sk_buff *skb, struct Qdisc *sch, if (cl->qdisc->q.qlen != 1) { if (unlikely(skb == cl->qdisc->ops->peek(cl->qdisc)) && list_first_entry(&agg->active, struct qfq_class, alist) - == cl && cl->deficit < qdisc_pkt_len(skb)) + == cl && cl->deficit < len) list_move_tail(&cl->alist, &agg->active); return err; diff --git a/net/sched/sch_tbf.c b/net/sched/sch_tbf.c index 942dcca09cf2..7f272a9070c5 100644 --- a/net/sched/sch_tbf.c +++ b/net/sched/sch_tbf.c @@ -185,6 +185,7 @@ static int tbf_enqueue(struct sk_buff *skb, struct Qdisc *sch, struct sk_buff **to_free) { struct tbf_sched_data *q = qdisc_priv(sch); + unsigned int len = qdisc_pkt_len(skb); int ret; if (qdisc_pkt_len(skb) > q->max_size) { @@ -200,7 +201,7 @@ static int tbf_enqueue(struct sk_buff *skb, struct Qdisc *sch, return ret; } - qdisc_qstats_backlog_inc(sch, skb); + sch->qstats.backlog += len; sch->q.qlen++; return NET_XMIT_SUCCESS; } -- cgit v1.2.3 From 37d9cf1a3ce35de3df6f7d209bfb1f50cf188cea Mon Sep 17 00:00:00 2001 From: Toke Høiland-Jørgensen Date: Wed, 9 Jan 2019 17:09:43 +0100 Subject: sched: Fix detection of empty queues in child qdiscs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Several qdiscs check on enqueue whether the packet was enqueued to a class with an empty queue, in which case the class is activated. This is done by checking if the qlen is exactly 1 after enqueue. However, if GSO splitting is enabled in the child qdisc, a single packet can result in a qlen longer than 1. This means the activation check fails, leading to a stalled queue. Fix this by checking if the queue is empty *before* enqueue, and running the activation logic if this was the case. Reported-by: Pete Heist Signed-off-by: Toke Høiland-Jørgensen Signed-off-by: David S. Miller --- net/sched/sch_drr.c | 4 +++- net/sched/sch_hfsc.c | 4 +++- net/sched/sch_qfq.c | 4 +++- 3 files changed, 9 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/sched/sch_drr.c b/net/sched/sch_drr.c index feaf47178653..09b800991065 100644 --- a/net/sched/sch_drr.c +++ b/net/sched/sch_drr.c @@ -354,6 +354,7 @@ static int drr_enqueue(struct sk_buff *skb, struct Qdisc *sch, struct drr_sched *q = qdisc_priv(sch); struct drr_class *cl; int err = 0; + bool first; cl = drr_classify(skb, sch, &err); if (cl == NULL) { @@ -363,6 +364,7 @@ static int drr_enqueue(struct sk_buff *skb, struct Qdisc *sch, return err; } + first = !cl->qdisc->q.qlen; err = qdisc_enqueue(skb, cl->qdisc, to_free); if (unlikely(err != NET_XMIT_SUCCESS)) { if (net_xmit_drop_count(err)) { @@ -372,7 +374,7 @@ static int drr_enqueue(struct sk_buff *skb, struct Qdisc *sch, return err; } - if (cl->qdisc->q.qlen == 1) { + if (first) { list_add_tail(&cl->alist, &q->active); cl->deficit = cl->quantum; } diff --git a/net/sched/sch_hfsc.c b/net/sched/sch_hfsc.c index 6bb8f73a8473..24cc220a3218 100644 --- a/net/sched/sch_hfsc.c +++ b/net/sched/sch_hfsc.c @@ -1542,6 +1542,7 @@ hfsc_enqueue(struct sk_buff *skb, struct Qdisc *sch, struct sk_buff **to_free) unsigned int len = qdisc_pkt_len(skb); struct hfsc_class *cl; int uninitialized_var(err); + bool first; cl = hfsc_classify(skb, sch, &err); if (cl == NULL) { @@ -1551,6 +1552,7 @@ hfsc_enqueue(struct sk_buff *skb, struct Qdisc *sch, struct sk_buff **to_free) return err; } + first = !cl->qdisc->q.qlen; err = qdisc_enqueue(skb, cl->qdisc, to_free); if (unlikely(err != NET_XMIT_SUCCESS)) { if (net_xmit_drop_count(err)) { @@ -1560,7 +1562,7 @@ hfsc_enqueue(struct sk_buff *skb, struct Qdisc *sch, struct sk_buff **to_free) return err; } - if (cl->qdisc->q.qlen == 1) { + if (first) { if (cl->cl_flags & HFSC_RSC) init_ed(cl, len); if (cl->cl_flags & HFSC_FSC) diff --git a/net/sched/sch_qfq.c b/net/sched/sch_qfq.c index 8d5e55d5bed2..29f5c4a24688 100644 --- a/net/sched/sch_qfq.c +++ b/net/sched/sch_qfq.c @@ -1215,6 +1215,7 @@ static int qfq_enqueue(struct sk_buff *skb, struct Qdisc *sch, struct qfq_class *cl; struct qfq_aggregate *agg; int err = 0; + bool first; cl = qfq_classify(skb, sch, &err); if (cl == NULL) { @@ -1236,6 +1237,7 @@ static int qfq_enqueue(struct sk_buff *skb, struct Qdisc *sch, } gso_segs = skb_is_gso(skb) ? skb_shinfo(skb)->gso_segs : 1; + first = !cl->qdisc->q.qlen; err = qdisc_enqueue(skb, cl->qdisc, to_free); if (unlikely(err != NET_XMIT_SUCCESS)) { pr_debug("qfq_enqueue: enqueue failed %d\n", err); @@ -1253,7 +1255,7 @@ static int qfq_enqueue(struct sk_buff *skb, struct Qdisc *sch, agg = cl->agg; /* if the queue was not empty, then done here */ - if (cl->qdisc->q.qlen != 1) { + if (!first) { if (unlikely(skb == cl->qdisc->ops->peek(cl->qdisc)) && list_first_entry(&agg->active, struct qfq_class, alist) == cl && cl->deficit < len) -- cgit v1.2.3 From 8c6c37fdc20ec9ffaa342f827a8e20afe736fb0c Mon Sep 17 00:00:00 2001 From: Toke Høiland-Jørgensen Date: Wed, 9 Jan 2019 17:09:44 +0100 Subject: sch_cake: Correctly update parent qlen when splitting GSO packets MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit To ensure parent qdiscs have the same notion of the number of enqueued packets even after splitting a GSO packet, update the qdisc tree with the number of packets that was added due to the split. Reported-by: Pete Heist Tested-by: Pete Heist Signed-off-by: Toke Høiland-Jørgensen Signed-off-by: David S. Miller --- net/sched/sch_cake.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/sched/sch_cake.c b/net/sched/sch_cake.c index b910cd5c56f7..73940293700d 100644 --- a/net/sched/sch_cake.c +++ b/net/sched/sch_cake.c @@ -1667,7 +1667,7 @@ static s32 cake_enqueue(struct sk_buff *skb, struct Qdisc *sch, if (skb_is_gso(skb) && q->rate_flags & CAKE_FLAG_SPLIT_GSO) { struct sk_buff *segs, *nskb; netdev_features_t features = netif_skb_features(skb); - unsigned int slen = 0; + unsigned int slen = 0, numsegs = 0; segs = skb_gso_segment(skb, features & ~NETIF_F_GSO_MASK); if (IS_ERR_OR_NULL(segs)) @@ -1683,6 +1683,7 @@ static s32 cake_enqueue(struct sk_buff *skb, struct Qdisc *sch, flow_queue_add(flow, segs); sch->q.qlen++; + numsegs++; slen += segs->len; q->buffer_used += segs->truesize; b->packets++; @@ -1696,7 +1697,7 @@ static s32 cake_enqueue(struct sk_buff *skb, struct Qdisc *sch, sch->qstats.backlog += slen; q->avg_window_bytes += slen; - qdisc_tree_reduce_backlog(sch, 1, len); + qdisc_tree_reduce_backlog(sch, 1-numsegs, len-slen); consume_skb(skb); } else { /* not splitting */ -- cgit v1.2.3 From a88289f4ddee4165d5f796bd99e09eec3133c16b Mon Sep 17 00:00:00 2001 From: Ying Xue Date: Mon, 14 Jan 2019 17:22:24 +0800 Subject: tipc: fix uninit-value in in tipc_conn_rcv_sub syzbot reported: BUG: KMSAN: uninit-value in tipc_conn_rcv_sub+0x184/0x950 net/tipc/topsrv.c:373 CPU: 0 PID: 66 Comm: kworker/u4:4 Not tainted 4.17.0-rc3+ #88 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Workqueue: tipc_rcv tipc_conn_recv_work Call Trace: __dump_stack lib/dump_stack.c:77 [inline] dump_stack+0x185/0x1d0 lib/dump_stack.c:113 kmsan_report+0x142/0x240 mm/kmsan/kmsan.c:1067 __msan_warning_32+0x6c/0xb0 mm/kmsan/kmsan_instr.c:683 tipc_conn_rcv_sub+0x184/0x950 net/tipc/topsrv.c:373 tipc_conn_rcv_from_sock net/tipc/topsrv.c:409 [inline] tipc_conn_recv_work+0x3cd/0x560 net/tipc/topsrv.c:424 process_one_work+0x12c6/0x1f60 kernel/workqueue.c:2145 worker_thread+0x113c/0x24f0 kernel/workqueue.c:2279 kthread+0x539/0x720 kernel/kthread.c:239 ret_from_fork+0x35/0x40 arch/x86/entry/entry_64.S:412 Local variable description: ----s.i@tipc_conn_recv_work Variable was created at: tipc_conn_recv_work+0x65/0x560 net/tipc/topsrv.c:419 process_one_work+0x12c6/0x1f60 kernel/workqueue.c:2145 In tipc_conn_rcv_from_sock(), it always supposes the length of message received from sock_recvmsg() is not smaller than the size of struct tipc_subscr. However, this assumption is false. Especially when the length of received message is shorter than struct tipc_subscr size, we will end up touching uninitialized fields in tipc_conn_rcv_sub(). Reported-by: syzbot+8951a3065ee7fd6d6e23@syzkaller.appspotmail.com Reported-by: syzbot+75e6e042c5bbf691fc82@syzkaller.appspotmail.com Signed-off-by: Ying Xue Signed-off-by: David S. Miller --- net/tipc/topsrv.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/tipc/topsrv.c b/net/tipc/topsrv.c index efb16f69bd2c..a457c0fbbef1 100644 --- a/net/tipc/topsrv.c +++ b/net/tipc/topsrv.c @@ -398,7 +398,7 @@ static int tipc_conn_rcv_from_sock(struct tipc_conn *con) ret = sock_recvmsg(con->sock, &msg, MSG_DONTWAIT); if (ret == -EWOULDBLOCK) return -EWOULDBLOCK; - if (ret > 0) { + if (ret == sizeof(s)) { read_lock_bh(&sk->sk_callback_lock); ret = tipc_conn_rcv_sub(srv, con, &s); read_unlock_bh(&sk->sk_callback_lock); -- cgit v1.2.3 From 8b66fee7f8ee18f9c51260e7a43ab37db5177a05 Mon Sep 17 00:00:00 2001 From: Ying Xue Date: Mon, 14 Jan 2019 17:22:25 +0800 Subject: tipc: fix uninit-value in tipc_nl_compat_link_reset_stats syzbot reports following splat: BUG: KMSAN: uninit-value in strlen+0x3b/0xa0 lib/string.c:486 CPU: 1 PID: 11057 Comm: syz-executor0 Not tainted 4.20.0-rc7+ #2 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Call Trace: __dump_stack lib/dump_stack.c:77 [inline] dump_stack+0x173/0x1d0 lib/dump_stack.c:113 kmsan_report+0x12e/0x2a0 mm/kmsan/kmsan.c:613 __msan_warning+0x82/0xf0 mm/kmsan/kmsan_instr.c:295 strlen+0x3b/0xa0 lib/string.c:486 nla_put_string include/net/netlink.h:1154 [inline] tipc_nl_compat_link_reset_stats+0x1f0/0x360 net/tipc/netlink_compat.c:760 __tipc_nl_compat_doit net/tipc/netlink_compat.c:311 [inline] tipc_nl_compat_doit+0x3aa/0xaf0 net/tipc/netlink_compat.c:344 tipc_nl_compat_handle net/tipc/netlink_compat.c:1107 [inline] tipc_nl_compat_recv+0x14d7/0x2760 net/tipc/netlink_compat.c:1210 genl_family_rcv_msg net/netlink/genetlink.c:601 [inline] genl_rcv_msg+0x185f/0x1a60 net/netlink/genetlink.c:626 netlink_rcv_skb+0x444/0x640 net/netlink/af_netlink.c:2477 genl_rcv+0x63/0x80 net/netlink/genetlink.c:637 netlink_unicast_kernel net/netlink/af_netlink.c:1310 [inline] netlink_unicast+0xf40/0x1020 net/netlink/af_netlink.c:1336 netlink_sendmsg+0x127f/0x1300 net/netlink/af_netlink.c:1917 sock_sendmsg_nosec net/socket.c:621 [inline] sock_sendmsg net/socket.c:631 [inline] ___sys_sendmsg+0xdb9/0x11b0 net/socket.c:2116 __sys_sendmsg net/socket.c:2154 [inline] __do_sys_sendmsg net/socket.c:2163 [inline] __se_sys_sendmsg+0x305/0x460 net/socket.c:2161 __x64_sys_sendmsg+0x4a/0x70 net/socket.c:2161 do_syscall_64+0xbc/0xf0 arch/x86/entry/common.c:291 entry_SYSCALL_64_after_hwframe+0x63/0xe7 RIP: 0033:0x457ec9 Code: 6d b7 fb ff c3 66 2e 0f 1f 84 00 00 00 00 00 66 90 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 0f 83 3b b7 fb ff c3 66 2e 0f 1f 84 00 00 00 00 RSP: 002b:00007f2557338c78 EFLAGS: 00000246 ORIG_RAX: 000000000000002e RAX: ffffffffffffffda RBX: 0000000000000003 RCX: 0000000000457ec9 RDX: 0000000000000000 RSI: 00000000200001c0 RDI: 0000000000000003 RBP: 000000000073bf00 R08: 0000000000000000 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000246 R12: 00007f25573396d4 R13: 00000000004cb478 R14: 00000000004d86c8 R15: 00000000ffffffff Uninit was created at: kmsan_save_stack_with_flags mm/kmsan/kmsan.c:204 [inline] kmsan_internal_poison_shadow+0x92/0x150 mm/kmsan/kmsan.c:158 kmsan_kmalloc+0xa6/0x130 mm/kmsan/kmsan_hooks.c:176 kmsan_slab_alloc+0xe/0x10 mm/kmsan/kmsan_hooks.c:185 slab_post_alloc_hook mm/slab.h:446 [inline] slab_alloc_node mm/slub.c:2759 [inline] __kmalloc_node_track_caller+0xe18/0x1030 mm/slub.c:4383 __kmalloc_reserve net/core/skbuff.c:137 [inline] __alloc_skb+0x309/0xa20 net/core/skbuff.c:205 alloc_skb include/linux/skbuff.h:998 [inline] netlink_alloc_large_skb net/netlink/af_netlink.c:1182 [inline] netlink_sendmsg+0xb82/0x1300 net/netlink/af_netlink.c:1892 sock_sendmsg_nosec net/socket.c:621 [inline] sock_sendmsg net/socket.c:631 [inline] ___sys_sendmsg+0xdb9/0x11b0 net/socket.c:2116 __sys_sendmsg net/socket.c:2154 [inline] __do_sys_sendmsg net/socket.c:2163 [inline] __se_sys_sendmsg+0x305/0x460 net/socket.c:2161 __x64_sys_sendmsg+0x4a/0x70 net/socket.c:2161 do_syscall_64+0xbc/0xf0 arch/x86/entry/common.c:291 entry_SYSCALL_64_after_hwframe+0x63/0xe7 The uninitialised access happened in tipc_nl_compat_link_reset_stats: nla_put_string(skb, TIPC_NLA_LINK_NAME, name) This is because name string is not validated before it's used. Reported-by: syzbot+e01d94b5a4c266be6e4c@syzkaller.appspotmail.com Signed-off-by: Ying Xue Signed-off-by: David S. Miller --- net/tipc/netlink_compat.c | 15 +++++++++++++++ 1 file changed, 15 insertions(+) (limited to 'net') diff --git a/net/tipc/netlink_compat.c b/net/tipc/netlink_compat.c index 77e4b2418f30..b2b115b22871 100644 --- a/net/tipc/netlink_compat.c +++ b/net/tipc/netlink_compat.c @@ -87,6 +87,11 @@ static int tipc_skb_tailroom(struct sk_buff *skb) return limit; } +static inline int TLV_GET_DATA_LEN(struct tlv_desc *tlv) +{ + return TLV_GET_LEN(tlv) - TLV_SPACE(0); +} + static int tipc_add_tlv(struct sk_buff *skb, u16 type, void *data, u16 len) { struct tlv_desc *tlv = (struct tlv_desc *)skb_tail_pointer(skb); @@ -166,6 +171,11 @@ static struct sk_buff *tipc_get_err_tlv(char *str) return buf; } +static inline bool string_is_valid(char *s, int len) +{ + return memchr(s, '\0', len) ? true : false; +} + static int __tipc_nl_compat_dumpit(struct tipc_nl_compat_cmd_dump *cmd, struct tipc_nl_compat_msg *msg, struct sk_buff *arg) @@ -750,6 +760,7 @@ static int tipc_nl_compat_link_reset_stats(struct tipc_nl_compat_cmd_doit *cmd, { char *name; struct nlattr *link; + int len; name = (char *)TLV_DATA(msg->req); @@ -757,6 +768,10 @@ static int tipc_nl_compat_link_reset_stats(struct tipc_nl_compat_cmd_doit *cmd, if (!link) return -EMSGSIZE; + len = min_t(int, TLV_GET_DATA_LEN(msg->req), TIPC_MAX_LINK_NAME); + if (!string_is_valid(name, len)) + return -EINVAL; + if (nla_put_string(skb, TIPC_NLA_LINK_NAME, name)) return -EMSGSIZE; -- cgit v1.2.3 From 0762216c0ad2a2fccd63890648eca491f2c83d9a Mon Sep 17 00:00:00 2001 From: Ying Xue Date: Mon, 14 Jan 2019 17:22:26 +0800 Subject: tipc: fix uninit-value in tipc_nl_compat_bearer_enable syzbot reported: BUG: KMSAN: uninit-value in strlen+0x3b/0xa0 lib/string.c:484 CPU: 1 PID: 6371 Comm: syz-executor652 Not tainted 4.19.0-rc8+ #70 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Call Trace: __dump_stack lib/dump_stack.c:77 [inline] dump_stack+0x306/0x460 lib/dump_stack.c:113 kmsan_report+0x1a2/0x2e0 mm/kmsan/kmsan.c:917 __msan_warning+0x7c/0xe0 mm/kmsan/kmsan_instr.c:500 strlen+0x3b/0xa0 lib/string.c:484 nla_put_string include/net/netlink.h:1011 [inline] tipc_nl_compat_bearer_enable+0x238/0x7b0 net/tipc/netlink_compat.c:389 __tipc_nl_compat_doit net/tipc/netlink_compat.c:311 [inline] tipc_nl_compat_doit+0x39f/0xae0 net/tipc/netlink_compat.c:344 tipc_nl_compat_recv+0x147c/0x2760 net/tipc/netlink_compat.c:1107 genl_family_rcv_msg net/netlink/genetlink.c:601 [inline] genl_rcv_msg+0x185c/0x1a20 net/netlink/genetlink.c:626 netlink_rcv_skb+0x394/0x640 net/netlink/af_netlink.c:2454 genl_rcv+0x63/0x80 net/netlink/genetlink.c:637 netlink_unicast_kernel net/netlink/af_netlink.c:1317 [inline] netlink_unicast+0x166d/0x1720 net/netlink/af_netlink.c:1343 netlink_sendmsg+0x1391/0x1420 net/netlink/af_netlink.c:1908 sock_sendmsg_nosec net/socket.c:621 [inline] sock_sendmsg net/socket.c:631 [inline] ___sys_sendmsg+0xe47/0x1200 net/socket.c:2116 __sys_sendmsg net/socket.c:2154 [inline] __do_sys_sendmsg net/socket.c:2163 [inline] __se_sys_sendmsg+0x307/0x460 net/socket.c:2161 __x64_sys_sendmsg+0x4a/0x70 net/socket.c:2161 do_syscall_64+0xbe/0x100 arch/x86/entry/common.c:291 entry_SYSCALL_64_after_hwframe+0x63/0xe7 RIP: 0033:0x440179 Code: 18 89 d0 c3 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 00 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 0f 83 fb 13 fc ff c3 66 2e 0f 1f 84 00 00 00 00 RSP: 002b:00007fffef7beee8 EFLAGS: 00000213 ORIG_RAX: 000000000000002e RAX: ffffffffffffffda RBX: 00000000004002c8 RCX: 0000000000440179 RDX: 0000000000000000 RSI: 0000000020000100 RDI: 0000000000000003 RBP: 00000000006ca018 R08: 0000000000000000 R09: 00000000004002c8 R10: 0000000000000000 R11: 0000000000000213 R12: 0000000000401a00 R13: 0000000000401a90 R14: 0000000000000000 R15: 0000000000000000 Uninit was created at: kmsan_save_stack_with_flags mm/kmsan/kmsan.c:255 [inline] kmsan_internal_poison_shadow+0xc8/0x1d0 mm/kmsan/kmsan.c:180 kmsan_kmalloc+0xa4/0x120 mm/kmsan/kmsan_hooks.c:104 kmsan_slab_alloc+0x10/0x20 mm/kmsan/kmsan_hooks.c:113 slab_post_alloc_hook mm/slab.h:446 [inline] slab_alloc_node mm/slub.c:2727 [inline] __kmalloc_node_track_caller+0xb43/0x1400 mm/slub.c:4360 __kmalloc_reserve net/core/skbuff.c:138 [inline] __alloc_skb+0x422/0xe90 net/core/skbuff.c:206 alloc_skb include/linux/skbuff.h:996 [inline] netlink_alloc_large_skb net/netlink/af_netlink.c:1189 [inline] netlink_sendmsg+0xcaf/0x1420 net/netlink/af_netlink.c:1883 sock_sendmsg_nosec net/socket.c:621 [inline] sock_sendmsg net/socket.c:631 [inline] ___sys_sendmsg+0xe47/0x1200 net/socket.c:2116 __sys_sendmsg net/socket.c:2154 [inline] __do_sys_sendmsg net/socket.c:2163 [inline] __se_sys_sendmsg+0x307/0x460 net/socket.c:2161 __x64_sys_sendmsg+0x4a/0x70 net/socket.c:2161 do_syscall_64+0xbe/0x100 arch/x86/entry/common.c:291 entry_SYSCALL_64_after_hwframe+0x63/0xe7 The root cause is that we don't validate whether bear name is a valid string in tipc_nl_compat_bearer_enable(). Meanwhile, we also fix the same issue in the following functions: tipc_nl_compat_bearer_disable() tipc_nl_compat_link_stat_dump() tipc_nl_compat_media_set() tipc_nl_compat_bearer_set() Reported-by: syzbot+b33d5cae0efd35dbfe77@syzkaller.appspotmail.com Signed-off-by: Ying Xue Signed-off-by: David S. Miller --- net/tipc/netlink_compat.c | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) (limited to 'net') diff --git a/net/tipc/netlink_compat.c b/net/tipc/netlink_compat.c index b2b115b22871..68a0b7308936 100644 --- a/net/tipc/netlink_compat.c +++ b/net/tipc/netlink_compat.c @@ -389,6 +389,7 @@ static int tipc_nl_compat_bearer_enable(struct tipc_nl_compat_cmd_doit *cmd, struct nlattr *prop; struct nlattr *bearer; struct tipc_bearer_config *b; + int len; b = (struct tipc_bearer_config *)TLV_DATA(msg->req); @@ -396,6 +397,10 @@ static int tipc_nl_compat_bearer_enable(struct tipc_nl_compat_cmd_doit *cmd, if (!bearer) return -EMSGSIZE; + len = min_t(int, TLV_GET_DATA_LEN(msg->req), TIPC_MAX_BEARER_NAME); + if (!string_is_valid(b->name, len)) + return -EINVAL; + if (nla_put_string(skb, TIPC_NLA_BEARER_NAME, b->name)) return -EMSGSIZE; @@ -421,6 +426,7 @@ static int tipc_nl_compat_bearer_disable(struct tipc_nl_compat_cmd_doit *cmd, { char *name; struct nlattr *bearer; + int len; name = (char *)TLV_DATA(msg->req); @@ -428,6 +434,10 @@ static int tipc_nl_compat_bearer_disable(struct tipc_nl_compat_cmd_doit *cmd, if (!bearer) return -EMSGSIZE; + len = min_t(int, TLV_GET_DATA_LEN(msg->req), TIPC_MAX_BEARER_NAME); + if (!string_is_valid(name, len)) + return -EINVAL; + if (nla_put_string(skb, TIPC_NLA_BEARER_NAME, name)) return -EMSGSIZE; @@ -488,6 +498,7 @@ static int tipc_nl_compat_link_stat_dump(struct tipc_nl_compat_msg *msg, struct nlattr *prop[TIPC_NLA_PROP_MAX + 1]; struct nlattr *stats[TIPC_NLA_STATS_MAX + 1]; int err; + int len; if (!attrs[TIPC_NLA_LINK]) return -EINVAL; @@ -514,6 +525,11 @@ static int tipc_nl_compat_link_stat_dump(struct tipc_nl_compat_msg *msg, return err; name = (char *)TLV_DATA(msg->req); + + len = min_t(int, TLV_GET_DATA_LEN(msg->req), TIPC_MAX_LINK_NAME); + if (!string_is_valid(name, len)) + return -EINVAL; + if (strcmp(name, nla_data(link[TIPC_NLA_LINK_NAME])) != 0) return 0; @@ -654,6 +670,7 @@ static int tipc_nl_compat_media_set(struct sk_buff *skb, struct nlattr *prop; struct nlattr *media; struct tipc_link_config *lc; + int len; lc = (struct tipc_link_config *)TLV_DATA(msg->req); @@ -661,6 +678,10 @@ static int tipc_nl_compat_media_set(struct sk_buff *skb, if (!media) return -EMSGSIZE; + len = min_t(int, TLV_GET_DATA_LEN(msg->req), TIPC_MAX_MEDIA_NAME); + if (!string_is_valid(lc->name, len)) + return -EINVAL; + if (nla_put_string(skb, TIPC_NLA_MEDIA_NAME, lc->name)) return -EMSGSIZE; @@ -681,6 +702,7 @@ static int tipc_nl_compat_bearer_set(struct sk_buff *skb, struct nlattr *prop; struct nlattr *bearer; struct tipc_link_config *lc; + int len; lc = (struct tipc_link_config *)TLV_DATA(msg->req); @@ -688,6 +710,10 @@ static int tipc_nl_compat_bearer_set(struct sk_buff *skb, if (!bearer) return -EMSGSIZE; + len = min_t(int, TLV_GET_DATA_LEN(msg->req), TIPC_MAX_MEDIA_NAME); + if (!string_is_valid(lc->name, len)) + return -EINVAL; + if (nla_put_string(skb, TIPC_NLA_BEARER_NAME, lc->name)) return -EMSGSIZE; -- cgit v1.2.3 From edf5ff04a45750ac8ce2435974f001dc9cfbf055 Mon Sep 17 00:00:00 2001 From: Ying Xue Date: Mon, 14 Jan 2019 17:22:27 +0800 Subject: tipc: fix uninit-value in tipc_nl_compat_link_set syzbot reports following splat: BUG: KMSAN: uninit-value in strlen+0x3b/0xa0 lib/string.c:486 CPU: 1 PID: 9306 Comm: syz-executor172 Not tainted 4.20.0-rc7+ #2 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Call Trace: __dump_stack lib/dump_stack.c:77 [inline] dump_stack+0x173/0x1d0 lib/dump_stack.c:113 kmsan_report+0x12e/0x2a0 mm/kmsan/kmsan.c:613 __msan_warning+0x82/0xf0 mm/kmsan/kmsan_instr.c:313 strlen+0x3b/0xa0 lib/string.c:486 nla_put_string include/net/netlink.h:1154 [inline] __tipc_nl_compat_link_set net/tipc/netlink_compat.c:708 [inline] tipc_nl_compat_link_set+0x929/0x1220 net/tipc/netlink_compat.c:744 __tipc_nl_compat_doit net/tipc/netlink_compat.c:311 [inline] tipc_nl_compat_doit+0x3aa/0xaf0 net/tipc/netlink_compat.c:344 tipc_nl_compat_handle net/tipc/netlink_compat.c:1107 [inline] tipc_nl_compat_recv+0x14d7/0x2760 net/tipc/netlink_compat.c:1210 genl_family_rcv_msg net/netlink/genetlink.c:601 [inline] genl_rcv_msg+0x185f/0x1a60 net/netlink/genetlink.c:626 netlink_rcv_skb+0x444/0x640 net/netlink/af_netlink.c:2477 genl_rcv+0x63/0x80 net/netlink/genetlink.c:637 netlink_unicast_kernel net/netlink/af_netlink.c:1310 [inline] netlink_unicast+0xf40/0x1020 net/netlink/af_netlink.c:1336 netlink_sendmsg+0x127f/0x1300 net/netlink/af_netlink.c:1917 sock_sendmsg_nosec net/socket.c:621 [inline] sock_sendmsg net/socket.c:631 [inline] ___sys_sendmsg+0xdb9/0x11b0 net/socket.c:2116 __sys_sendmsg net/socket.c:2154 [inline] __do_sys_sendmsg net/socket.c:2163 [inline] __se_sys_sendmsg+0x305/0x460 net/socket.c:2161 __x64_sys_sendmsg+0x4a/0x70 net/socket.c:2161 do_syscall_64+0xbc/0xf0 arch/x86/entry/common.c:291 entry_SYSCALL_64_after_hwframe+0x63/0xe7 The uninitialised access happened in nla_put_string(skb, TIPC_NLA_LINK_NAME, lc->name) This is because lc->name string is not validated before it's used. Reported-by: syzbot+d78b8a29241a195aefb8@syzkaller.appspotmail.com Signed-off-by: Ying Xue Signed-off-by: David S. Miller --- net/tipc/netlink_compat.c | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'net') diff --git a/net/tipc/netlink_compat.c b/net/tipc/netlink_compat.c index 68a0b7308936..89e6ae3b3c33 100644 --- a/net/tipc/netlink_compat.c +++ b/net/tipc/netlink_compat.c @@ -762,9 +762,14 @@ static int tipc_nl_compat_link_set(struct tipc_nl_compat_cmd_doit *cmd, struct tipc_link_config *lc; struct tipc_bearer *bearer; struct tipc_media *media; + int len; lc = (struct tipc_link_config *)TLV_DATA(msg->req); + len = min_t(int, TLV_GET_DATA_LEN(msg->req), TIPC_MAX_LINK_NAME); + if (!string_is_valid(lc->name, len)) + return -EINVAL; + media = tipc_media_find(lc->name); if (media) { cmd->doit = &__tipc_nl_media_set; -- cgit v1.2.3 From 974cb0e3e7c963ced06c4e32c5b2884173fa5e01 Mon Sep 17 00:00:00 2001 From: Ying Xue Date: Mon, 14 Jan 2019 17:22:28 +0800 Subject: tipc: fix uninit-value in tipc_nl_compat_name_table_dump syzbot reported: BUG: KMSAN: uninit-value in __arch_swab32 arch/x86/include/uapi/asm/swab.h:10 [inline] BUG: KMSAN: uninit-value in __fswab32 include/uapi/linux/swab.h:59 [inline] BUG: KMSAN: uninit-value in tipc_nl_compat_name_table_dump+0x4a8/0xba0 net/tipc/netlink_compat.c:826 CPU: 0 PID: 6290 Comm: syz-executor848 Not tainted 4.19.0-rc8+ #70 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Call Trace: __dump_stack lib/dump_stack.c:77 [inline] dump_stack+0x306/0x460 lib/dump_stack.c:113 kmsan_report+0x1a2/0x2e0 mm/kmsan/kmsan.c:917 __msan_warning+0x7c/0xe0 mm/kmsan/kmsan_instr.c:500 __arch_swab32 arch/x86/include/uapi/asm/swab.h:10 [inline] __fswab32 include/uapi/linux/swab.h:59 [inline] tipc_nl_compat_name_table_dump+0x4a8/0xba0 net/tipc/netlink_compat.c:826 __tipc_nl_compat_dumpit+0x59e/0xdb0 net/tipc/netlink_compat.c:205 tipc_nl_compat_dumpit+0x63a/0x820 net/tipc/netlink_compat.c:270 tipc_nl_compat_handle net/tipc/netlink_compat.c:1151 [inline] tipc_nl_compat_recv+0x1402/0x2760 net/tipc/netlink_compat.c:1210 genl_family_rcv_msg net/netlink/genetlink.c:601 [inline] genl_rcv_msg+0x185c/0x1a20 net/netlink/genetlink.c:626 netlink_rcv_skb+0x394/0x640 net/netlink/af_netlink.c:2454 genl_rcv+0x63/0x80 net/netlink/genetlink.c:637 netlink_unicast_kernel net/netlink/af_netlink.c:1317 [inline] netlink_unicast+0x166d/0x1720 net/netlink/af_netlink.c:1343 netlink_sendmsg+0x1391/0x1420 net/netlink/af_netlink.c:1908 sock_sendmsg_nosec net/socket.c:621 [inline] sock_sendmsg net/socket.c:631 [inline] ___sys_sendmsg+0xe47/0x1200 net/socket.c:2116 __sys_sendmsg net/socket.c:2154 [inline] __do_sys_sendmsg net/socket.c:2163 [inline] __se_sys_sendmsg+0x307/0x460 net/socket.c:2161 __x64_sys_sendmsg+0x4a/0x70 net/socket.c:2161 do_syscall_64+0xbe/0x100 arch/x86/entry/common.c:291 entry_SYSCALL_64_after_hwframe+0x63/0xe7 RIP: 0033:0x440179 Code: 18 89 d0 c3 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 00 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 0f 83 fb 13 fc ff c3 66 2e 0f 1f 84 00 00 00 00 RSP: 002b:00007ffecec49318 EFLAGS: 00000213 ORIG_RAX: 000000000000002e RAX: ffffffffffffffda RBX: 00000000004002c8 RCX: 0000000000440179 RDX: 0000000000000000 RSI: 0000000020000100 RDI: 0000000000000003 RBP: 00000000006ca018 R08: 0000000000000000 R09: 00000000004002c8 R10: 0000000000000000 R11: 0000000000000213 R12: 0000000000401a00 R13: 0000000000401a90 R14: 0000000000000000 R15: 0000000000000000 Uninit was created at: kmsan_save_stack_with_flags mm/kmsan/kmsan.c:255 [inline] kmsan_internal_poison_shadow+0xc8/0x1d0 mm/kmsan/kmsan.c:180 kmsan_kmalloc+0xa4/0x120 mm/kmsan/kmsan_hooks.c:104 kmsan_slab_alloc+0x10/0x20 mm/kmsan/kmsan_hooks.c:113 slab_post_alloc_hook mm/slab.h:446 [inline] slab_alloc_node mm/slub.c:2727 [inline] __kmalloc_node_track_caller+0xb43/0x1400 mm/slub.c:4360 __kmalloc_reserve net/core/skbuff.c:138 [inline] __alloc_skb+0x422/0xe90 net/core/skbuff.c:206 alloc_skb include/linux/skbuff.h:996 [inline] netlink_alloc_large_skb net/netlink/af_netlink.c:1189 [inline] netlink_sendmsg+0xcaf/0x1420 net/netlink/af_netlink.c:1883 sock_sendmsg_nosec net/socket.c:621 [inline] sock_sendmsg net/socket.c:631 [inline] ___sys_sendmsg+0xe47/0x1200 net/socket.c:2116 __sys_sendmsg net/socket.c:2154 [inline] __do_sys_sendmsg net/socket.c:2163 [inline] __se_sys_sendmsg+0x307/0x460 net/socket.c:2161 __x64_sys_sendmsg+0x4a/0x70 net/socket.c:2161 do_syscall_64+0xbe/0x100 arch/x86/entry/common.c:291 entry_SYSCALL_64_after_hwframe+0x63/0xe7 We cannot take for granted the thing that the length of data contained in TLV is longer than the size of struct tipc_name_table_query in tipc_nl_compat_name_table_dump(). Reported-by: syzbot+06e771a754829716a327@syzkaller.appspotmail.com Signed-off-by: Ying Xue Signed-off-by: David S. Miller --- net/tipc/netlink_compat.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'net') diff --git a/net/tipc/netlink_compat.c b/net/tipc/netlink_compat.c index 89e6ae3b3c33..b90786ca1d21 100644 --- a/net/tipc/netlink_compat.c +++ b/net/tipc/netlink_compat.c @@ -824,6 +824,8 @@ static int tipc_nl_compat_name_table_dump_header(struct tipc_nl_compat_msg *msg) }; ntq = (struct tipc_name_table_query *)TLV_DATA(msg->req); + if (TLV_GET_DATA_LEN(msg->req) < sizeof(struct tipc_name_table_query)) + return -EINVAL; depth = ntohl(ntq->depth); -- cgit v1.2.3 From 2753ca5d9009c180dbfd4c802c80983b4b6108d1 Mon Sep 17 00:00:00 2001 From: Ying Xue Date: Mon, 14 Jan 2019 17:22:29 +0800 Subject: tipc: fix uninit-value in tipc_nl_compat_doit BUG: KMSAN: uninit-value in tipc_nl_compat_doit+0x404/0xa10 net/tipc/netlink_compat.c:335 CPU: 0 PID: 4514 Comm: syz-executor485 Not tainted 4.16.0+ #87 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Call Trace: __dump_stack lib/dump_stack.c:17 [inline] dump_stack+0x185/0x1d0 lib/dump_stack.c:53 kmsan_report+0x142/0x240 mm/kmsan/kmsan.c:1067 __msan_warning_32+0x6c/0xb0 mm/kmsan/kmsan_instr.c:683 tipc_nl_compat_doit+0x404/0xa10 net/tipc/netlink_compat.c:335 tipc_nl_compat_recv+0x164b/0x2700 net/tipc/netlink_compat.c:1153 genl_family_rcv_msg net/netlink/genetlink.c:599 [inline] genl_rcv_msg+0x1686/0x1810 net/netlink/genetlink.c:624 netlink_rcv_skb+0x378/0x600 net/netlink/af_netlink.c:2447 genl_rcv+0x63/0x80 net/netlink/genetlink.c:635 netlink_unicast_kernel net/netlink/af_netlink.c:1311 [inline] netlink_unicast+0x166b/0x1740 net/netlink/af_netlink.c:1337 netlink_sendmsg+0x1048/0x1310 net/netlink/af_netlink.c:1900 sock_sendmsg_nosec net/socket.c:630 [inline] sock_sendmsg net/socket.c:640 [inline] ___sys_sendmsg+0xec0/0x1310 net/socket.c:2046 __sys_sendmsg net/socket.c:2080 [inline] SYSC_sendmsg+0x2a3/0x3d0 net/socket.c:2091 SyS_sendmsg+0x54/0x80 net/socket.c:2087 do_syscall_64+0x309/0x430 arch/x86/entry/common.c:287 entry_SYSCALL_64_after_hwframe+0x3d/0xa2 RIP: 0033:0x43fda9 RSP: 002b:00007ffd0c184ba8 EFLAGS: 00000213 ORIG_RAX: 000000000000002e RAX: ffffffffffffffda RBX: 00000000004002c8 RCX: 000000000043fda9 RDX: 0000000000000000 RSI: 0000000020023000 RDI: 0000000000000003 RBP: 00000000006ca018 R08: 00000000004002c8 R09: 00000000004002c8 R10: 00000000004002c8 R11: 0000000000000213 R12: 00000000004016d0 R13: 0000000000401760 R14: 0000000000000000 R15: 0000000000000000 Uninit was created at: kmsan_save_stack_with_flags mm/kmsan/kmsan.c:278 [inline] kmsan_internal_poison_shadow+0xb8/0x1b0 mm/kmsan/kmsan.c:188 kmsan_kmalloc+0x94/0x100 mm/kmsan/kmsan.c:314 kmsan_slab_alloc+0x11/0x20 mm/kmsan/kmsan.c:321 slab_post_alloc_hook mm/slab.h:445 [inline] slab_alloc_node mm/slub.c:2737 [inline] __kmalloc_node_track_caller+0xaed/0x11c0 mm/slub.c:4369 __kmalloc_reserve net/core/skbuff.c:138 [inline] __alloc_skb+0x2cf/0x9f0 net/core/skbuff.c:206 alloc_skb include/linux/skbuff.h:984 [inline] netlink_alloc_large_skb net/netlink/af_netlink.c:1183 [inline] netlink_sendmsg+0x9a6/0x1310 net/netlink/af_netlink.c:1875 sock_sendmsg_nosec net/socket.c:630 [inline] sock_sendmsg net/socket.c:640 [inline] ___sys_sendmsg+0xec0/0x1310 net/socket.c:2046 __sys_sendmsg net/socket.c:2080 [inline] SYSC_sendmsg+0x2a3/0x3d0 net/socket.c:2091 SyS_sendmsg+0x54/0x80 net/socket.c:2087 do_syscall_64+0x309/0x430 arch/x86/entry/common.c:287 entry_SYSCALL_64_after_hwframe+0x3d/0xa2 In tipc_nl_compat_recv(), when the len variable returned by nlmsg_attrlen() is 0, the message is still treated as a valid one, which is obviously unresonable. When len is zero, it means the message not only doesn't contain any valid TLV payload, but also TLV header is not included. Under this stituation, tlv_type field in TLV header is still accessed in tipc_nl_compat_dumpit() or tipc_nl_compat_doit(), but the field space is obviously illegal. Of course, it is not initialized. Reported-by: syzbot+bca0dc46634781f08b38@syzkaller.appspotmail.com Reported-by: syzbot+6bdb590321a7ae40c1a6@syzkaller.appspotmail.com Signed-off-by: Ying Xue Signed-off-by: David S. Miller --- net/tipc/netlink_compat.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/tipc/netlink_compat.c b/net/tipc/netlink_compat.c index b90786ca1d21..4ad3586da8f0 100644 --- a/net/tipc/netlink_compat.c +++ b/net/tipc/netlink_compat.c @@ -1256,7 +1256,7 @@ static int tipc_nl_compat_recv(struct sk_buff *skb, struct genl_info *info) } len = nlmsg_attrlen(req_nlh, GENL_HDRLEN + TIPC_GENL_HDRLEN); - if (len && !TLV_OK(msg.req, len)) { + if (!len || !TLV_OK(msg.req, len)) { msg.rep = tipc_get_err_tlv(TIPC_CFG_NOT_SUPPORTED); err = -EOPNOTSUPP; goto send; -- cgit v1.2.3 From e122d845a01ece2ddd28b2f125ef2db66b8b627a Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 10 Jan 2019 16:59:13 +0000 Subject: Revert "rxrpc: Allow failed client calls to be retried" The changes introduced to allow rxrpc calls to be retried creates an issue when it comes to refcounting afs_call structs. The problem is that when rxrpc_send_data() queues the last packet for an asynchronous call, the following sequence can occur: (1) The notify_end_tx callback is invoked which causes the state in the afs_call to be changed from AFS_CALL_CL_REQUESTING or AFS_CALL_SV_REPLYING. (2) afs_deliver_to_call() can then process event notifications from rxrpc on the async_work queue. (3) Delivery of events, such as an abort from the server, can cause the afs_call state to be changed to AFS_CALL_COMPLETE on async_work. (4) For an asynchronous call, afs_process_async_call() notes that the call is complete and tried to clean up all the refs on async_work. (5) rxrpc_send_data() might return the amount of data transferred (success) or an error - which could in turn reflect a local error or a received error. Synchronising the clean up after rxrpc_kernel_send_data() returns an error with the asynchronous cleanup is then tricky to get right. Mostly revert commit c038a58ccfd6704d4d7d60ed3d6a0fca13cf13a4. The two API functions the original commit added aren't currently used. This makes rxrpc_kernel_send_data() always return successfully if it queued the data it was given. Note that this doesn't affect synchronous calls since their Rx notification function merely pokes a wait queue and does not refcounting. The asynchronous call notification function *has* to do refcounting and pass a ref over the work item to avoid the need to sync the workqueue in call cleanup. Signed-off-by: David Howells Signed-off-by: David S. Miller --- Documentation/networking/rxrpc.txt | 45 ------------------ include/net/af_rxrpc.h | 16 ------- net/rxrpc/af_rxrpc.c | 70 --------------------------- net/rxrpc/ar-internal.h | 19 +++++--- net/rxrpc/call_object.c | 97 -------------------------------------- net/rxrpc/conn_client.c | 5 +- net/rxrpc/sendmsg.c | 24 +++++----- 7 files changed, 24 insertions(+), 252 deletions(-) (limited to 'net') diff --git a/Documentation/networking/rxrpc.txt b/Documentation/networking/rxrpc.txt index c9d052e0cf51..2df5894353d6 100644 --- a/Documentation/networking/rxrpc.txt +++ b/Documentation/networking/rxrpc.txt @@ -1000,51 +1000,6 @@ The kernel interface functions are as follows: size should be set when the call is begun. tx_total_len may not be less than zero. - (*) Check to see the completion state of a call so that the caller can assess - whether it needs to be retried. - - enum rxrpc_call_completion { - RXRPC_CALL_SUCCEEDED, - RXRPC_CALL_REMOTELY_ABORTED, - RXRPC_CALL_LOCALLY_ABORTED, - RXRPC_CALL_LOCAL_ERROR, - RXRPC_CALL_NETWORK_ERROR, - }; - - int rxrpc_kernel_check_call(struct socket *sock, struct rxrpc_call *call, - enum rxrpc_call_completion *_compl, - u32 *_abort_code); - - On return, -EINPROGRESS will be returned if the call is still ongoing; if - it is finished, *_compl will be set to indicate the manner of completion, - *_abort_code will be set to any abort code that occurred. 0 will be - returned on a successful completion, -ECONNABORTED will be returned if the - client failed due to a remote abort and anything else will return an - appropriate error code. - - The caller should look at this information to decide if it's worth - retrying the call. - - (*) Retry a client call. - - int rxrpc_kernel_retry_call(struct socket *sock, - struct rxrpc_call *call, - struct sockaddr_rxrpc *srx, - struct key *key); - - This attempts to partially reinitialise a call and submit it again while - reusing the original call's Tx queue to avoid the need to repackage and - re-encrypt the data to be sent. call indicates the call to retry, srx the - new address to send it to and key the encryption key to use for signing or - encrypting the packets. - - For this to work, the first Tx data packet must still be in the transmit - queue, and currently this is only permitted for local and network errors - and the call must not have been aborted. Any partially constructed Tx - packet is left as is and can continue being filled afterwards. - - It returns 0 if the call was requeued and an error otherwise. - (*) Get call RTT. u64 rxrpc_kernel_get_rtt(struct socket *sock, struct rxrpc_call *call); diff --git a/include/net/af_rxrpc.h b/include/net/af_rxrpc.h index 1adefe42c0a6..2bfb87eb98ce 100644 --- a/include/net/af_rxrpc.h +++ b/include/net/af_rxrpc.h @@ -20,18 +20,6 @@ struct sock; struct socket; struct rxrpc_call; -/* - * Call completion condition (state == RXRPC_CALL_COMPLETE). - */ -enum rxrpc_call_completion { - RXRPC_CALL_SUCCEEDED, /* - Normal termination */ - RXRPC_CALL_REMOTELY_ABORTED, /* - call aborted by peer */ - RXRPC_CALL_LOCALLY_ABORTED, /* - call aborted locally on error or close */ - RXRPC_CALL_LOCAL_ERROR, /* - call failed due to local error */ - RXRPC_CALL_NETWORK_ERROR, /* - call terminated by network error */ - NR__RXRPC_CALL_COMPLETIONS -}; - /* * Debug ID counter for tracing. */ @@ -73,10 +61,6 @@ int rxrpc_kernel_charge_accept(struct socket *, rxrpc_notify_rx_t, rxrpc_user_attach_call_t, unsigned long, gfp_t, unsigned int); void rxrpc_kernel_set_tx_length(struct socket *, struct rxrpc_call *, s64); -int rxrpc_kernel_retry_call(struct socket *, struct rxrpc_call *, - struct sockaddr_rxrpc *, struct key *); -int rxrpc_kernel_check_call(struct socket *, struct rxrpc_call *, - enum rxrpc_call_completion *, u32 *); u32 rxrpc_kernel_check_life(const struct socket *, const struct rxrpc_call *); void rxrpc_kernel_probe_life(struct socket *, struct rxrpc_call *); u32 rxrpc_kernel_get_epoch(struct socket *, struct rxrpc_call *); diff --git a/net/rxrpc/af_rxrpc.c b/net/rxrpc/af_rxrpc.c index a2522f9d71e2..96f2952bbdfd 100644 --- a/net/rxrpc/af_rxrpc.c +++ b/net/rxrpc/af_rxrpc.c @@ -418,76 +418,6 @@ u32 rxrpc_kernel_get_epoch(struct socket *sock, struct rxrpc_call *call) } EXPORT_SYMBOL(rxrpc_kernel_get_epoch); -/** - * rxrpc_kernel_check_call - Check a call's state - * @sock: The socket the call is on - * @call: The call to check - * @_compl: Where to store the completion state - * @_abort_code: Where to store any abort code - * - * Allow a kernel service to query the state of a call and find out the manner - * of its termination if it has completed. Returns -EINPROGRESS if the call is - * still going, 0 if the call finished successfully, -ECONNABORTED if the call - * was aborted and an appropriate error if the call failed in some other way. - */ -int rxrpc_kernel_check_call(struct socket *sock, struct rxrpc_call *call, - enum rxrpc_call_completion *_compl, u32 *_abort_code) -{ - if (call->state != RXRPC_CALL_COMPLETE) - return -EINPROGRESS; - smp_rmb(); - *_compl = call->completion; - *_abort_code = call->abort_code; - return call->error; -} -EXPORT_SYMBOL(rxrpc_kernel_check_call); - -/** - * rxrpc_kernel_retry_call - Allow a kernel service to retry a call - * @sock: The socket the call is on - * @call: The call to retry - * @srx: The address of the peer to contact - * @key: The security context to use (defaults to socket setting) - * - * Allow a kernel service to try resending a client call that failed due to a - * network error to a new address. The Tx queue is maintained intact, thereby - * relieving the need to re-encrypt any request data that has already been - * buffered. - */ -int rxrpc_kernel_retry_call(struct socket *sock, struct rxrpc_call *call, - struct sockaddr_rxrpc *srx, struct key *key) -{ - struct rxrpc_conn_parameters cp; - struct rxrpc_sock *rx = rxrpc_sk(sock->sk); - int ret; - - _enter("%d{%d}", call->debug_id, atomic_read(&call->usage)); - - if (!key) - key = rx->key; - if (key && !key->payload.data[0]) - key = NULL; /* a no-security key */ - - memset(&cp, 0, sizeof(cp)); - cp.local = rx->local; - cp.key = key; - cp.security_level = 0; - cp.exclusive = false; - cp.service_id = srx->srx_service; - - mutex_lock(&call->user_mutex); - - ret = rxrpc_prepare_call_for_retry(rx, call); - if (ret == 0) - ret = rxrpc_retry_client_call(rx, call, &cp, srx, GFP_KERNEL); - - mutex_unlock(&call->user_mutex); - rxrpc_put_peer(cp.peer); - _leave(" = %d", ret); - return ret; -} -EXPORT_SYMBOL(rxrpc_kernel_retry_call); - /** * rxrpc_kernel_new_call_notification - Get notifications of new calls * @sock: The socket to intercept received messages on diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h index bc628acf4f4f..4b1a534d290a 100644 --- a/net/rxrpc/ar-internal.h +++ b/net/rxrpc/ar-internal.h @@ -476,7 +476,6 @@ enum rxrpc_call_flag { RXRPC_CALL_EXPOSED, /* The call was exposed to the world */ RXRPC_CALL_RX_LAST, /* Received the last packet (at rxtx_top) */ RXRPC_CALL_TX_LAST, /* Last packet in Tx buffer (at rxtx_top) */ - RXRPC_CALL_TX_LASTQ, /* Last packet has been queued */ RXRPC_CALL_SEND_PING, /* A ping will need to be sent */ RXRPC_CALL_PINGING, /* Ping in process */ RXRPC_CALL_RETRANS_TIMEOUT, /* Retransmission due to timeout occurred */ @@ -517,6 +516,18 @@ enum rxrpc_call_state { NR__RXRPC_CALL_STATES }; +/* + * Call completion condition (state == RXRPC_CALL_COMPLETE). + */ +enum rxrpc_call_completion { + RXRPC_CALL_SUCCEEDED, /* - Normal termination */ + RXRPC_CALL_REMOTELY_ABORTED, /* - call aborted by peer */ + RXRPC_CALL_LOCALLY_ABORTED, /* - call aborted locally on error or close */ + RXRPC_CALL_LOCAL_ERROR, /* - call failed due to local error */ + RXRPC_CALL_NETWORK_ERROR, /* - call terminated by network error */ + NR__RXRPC_CALL_COMPLETIONS +}; + /* * Call Tx congestion management modes. */ @@ -761,15 +772,9 @@ struct rxrpc_call *rxrpc_new_client_call(struct rxrpc_sock *, struct sockaddr_rxrpc *, struct rxrpc_call_params *, gfp_t, unsigned int); -int rxrpc_retry_client_call(struct rxrpc_sock *, - struct rxrpc_call *, - struct rxrpc_conn_parameters *, - struct sockaddr_rxrpc *, - gfp_t); void rxrpc_incoming_call(struct rxrpc_sock *, struct rxrpc_call *, struct sk_buff *); void rxrpc_release_call(struct rxrpc_sock *, struct rxrpc_call *); -int rxrpc_prepare_call_for_retry(struct rxrpc_sock *, struct rxrpc_call *); void rxrpc_release_calls_on_socket(struct rxrpc_sock *); bool __rxrpc_queue_call(struct rxrpc_call *); bool rxrpc_queue_call(struct rxrpc_call *); diff --git a/net/rxrpc/call_object.c b/net/rxrpc/call_object.c index 8f1a8f85b1f9..8aa2937b069f 100644 --- a/net/rxrpc/call_object.c +++ b/net/rxrpc/call_object.c @@ -324,48 +324,6 @@ error: return ERR_PTR(ret); } -/* - * Retry a call to a new address. It is expected that the Tx queue of the call - * will contain data previously packaged for an old call. - */ -int rxrpc_retry_client_call(struct rxrpc_sock *rx, - struct rxrpc_call *call, - struct rxrpc_conn_parameters *cp, - struct sockaddr_rxrpc *srx, - gfp_t gfp) -{ - const void *here = __builtin_return_address(0); - int ret; - - /* Set up or get a connection record and set the protocol parameters, - * including channel number and call ID. - */ - ret = rxrpc_connect_call(rx, call, cp, srx, gfp); - if (ret < 0) - goto error; - - trace_rxrpc_call(call, rxrpc_call_connected, atomic_read(&call->usage), - here, NULL); - - rxrpc_start_call_timer(call); - - _net("CALL new %d on CONN %d", call->debug_id, call->conn->debug_id); - - if (!test_and_set_bit(RXRPC_CALL_EV_RESEND, &call->events)) - rxrpc_queue_call(call); - - _leave(" = 0"); - return 0; - -error: - rxrpc_set_call_completion(call, RXRPC_CALL_LOCAL_ERROR, - RX_CALL_DEAD, ret); - trace_rxrpc_call(call, rxrpc_call_error, atomic_read(&call->usage), - here, ERR_PTR(ret)); - _leave(" = %d", ret); - return ret; -} - /* * Set up an incoming call. call->conn points to the connection. * This is called in BH context and isn't allowed to fail. @@ -533,61 +491,6 @@ void rxrpc_release_call(struct rxrpc_sock *rx, struct rxrpc_call *call) _leave(""); } -/* - * Prepare a kernel service call for retry. - */ -int rxrpc_prepare_call_for_retry(struct rxrpc_sock *rx, struct rxrpc_call *call) -{ - const void *here = __builtin_return_address(0); - int i; - u8 last = 0; - - _enter("{%d,%d}", call->debug_id, atomic_read(&call->usage)); - - trace_rxrpc_call(call, rxrpc_call_release, atomic_read(&call->usage), - here, (const void *)call->flags); - - ASSERTCMP(call->state, ==, RXRPC_CALL_COMPLETE); - ASSERTCMP(call->completion, !=, RXRPC_CALL_REMOTELY_ABORTED); - ASSERTCMP(call->completion, !=, RXRPC_CALL_LOCALLY_ABORTED); - ASSERT(list_empty(&call->recvmsg_link)); - - del_timer_sync(&call->timer); - - _debug("RELEASE CALL %p (%d CONN %p)", call, call->debug_id, call->conn); - - if (call->conn) - rxrpc_disconnect_call(call); - - if (rxrpc_is_service_call(call) || - !call->tx_phase || - call->tx_hard_ack != 0 || - call->rx_hard_ack != 0 || - call->rx_top != 0) - return -EINVAL; - - call->state = RXRPC_CALL_UNINITIALISED; - call->completion = RXRPC_CALL_SUCCEEDED; - call->call_id = 0; - call->cid = 0; - call->cong_cwnd = 0; - call->cong_extra = 0; - call->cong_ssthresh = 0; - call->cong_mode = 0; - call->cong_dup_acks = 0; - call->cong_cumul_acks = 0; - call->acks_lowest_nak = 0; - - for (i = 0; i < RXRPC_RXTX_BUFF_SIZE; i++) { - last |= call->rxtx_annotations[i]; - call->rxtx_annotations[i] &= RXRPC_TX_ANNO_LAST; - call->rxtx_annotations[i] |= RXRPC_TX_ANNO_RETRANS; - } - - _leave(" = 0"); - return 0; -} - /* * release all the calls associated with a socket */ diff --git a/net/rxrpc/conn_client.c b/net/rxrpc/conn_client.c index 521189f4b666..b2adfa825363 100644 --- a/net/rxrpc/conn_client.c +++ b/net/rxrpc/conn_client.c @@ -562,10 +562,7 @@ static void rxrpc_activate_one_channel(struct rxrpc_connection *conn, clear_bit(RXRPC_CONN_FINAL_ACK_0 + channel, &conn->flags); write_lock_bh(&call->state_lock); - if (!test_bit(RXRPC_CALL_TX_LASTQ, &call->flags)) - call->state = RXRPC_CALL_CLIENT_SEND_REQUEST; - else - call->state = RXRPC_CALL_CLIENT_AWAIT_REPLY; + call->state = RXRPC_CALL_CLIENT_SEND_REQUEST; write_unlock_bh(&call->state_lock); rxrpc_see_call(call); diff --git a/net/rxrpc/sendmsg.c b/net/rxrpc/sendmsg.c index be01f9c5d963..46c9312085b1 100644 --- a/net/rxrpc/sendmsg.c +++ b/net/rxrpc/sendmsg.c @@ -169,10 +169,8 @@ static void rxrpc_queue_packet(struct rxrpc_sock *rx, struct rxrpc_call *call, ASSERTCMP(seq, ==, call->tx_top + 1); - if (last) { + if (last) annotation |= RXRPC_TX_ANNO_LAST; - set_bit(RXRPC_CALL_TX_LASTQ, &call->flags); - } /* We have to set the timestamp before queueing as the retransmit * algorithm can see the packet as soon as we queue it. @@ -386,6 +384,11 @@ static int rxrpc_send_data(struct rxrpc_sock *rx, call->tx_total_len -= copy; } + /* check for the far side aborting the call or a network error + * occurring */ + if (call->state == RXRPC_CALL_COMPLETE) + goto call_terminated; + /* add the packet to the send queue if it's now full */ if (sp->remain <= 0 || (msg_data_left(msg) == 0 && !more)) { @@ -425,16 +428,6 @@ static int rxrpc_send_data(struct rxrpc_sock *rx, notify_end_tx); skb = NULL; } - - /* Check for the far side aborting the call or a network error - * occurring. If this happens, save any packet that was under - * construction so that in the case of a network error, the - * call can be retried or redirected. - */ - if (call->state == RXRPC_CALL_COMPLETE) { - ret = call->error; - goto out; - } } while (msg_data_left(msg) > 0); success: @@ -444,6 +437,11 @@ out: _leave(" = %d", ret); return ret; +call_terminated: + rxrpc_free_skb(skb, rxrpc_skb_tx_freed); + _leave(" = %d", call->error); + return call->error; + maybe_error: if (copied) goto success; -- cgit v1.2.3 From 9174c3df1cd181c14913138d50ccbe539bb08335 Mon Sep 17 00:00:00 2001 From: Davide Caratti Date: Thu, 10 Jan 2019 20:21:02 +0100 Subject: net/sched: act_tunnel_key: fix memory leak in case of action replace running the following TDC test cases: 7afc - Replace tunnel_key set action with all parameters 364d - Replace tunnel_key set action with all parameters and cookie it's possible to trigger kmemleak warnings like: unreferenced object 0xffff94797127ab40 (size 192): comm "tc", pid 3248, jiffies 4300565293 (age 1006.862s) hex dump (first 32 bytes): 00 00 00 00 00 00 00 00 c0 93 f9 8a ff ff ff ff ................ 41 84 ee 89 ff ff ff ff 00 00 00 00 00 00 00 00 A............... backtrace: [<000000001e85b61c>] tunnel_key_init+0x31d/0x820 [act_tunnel_key] [<000000007f3f6ee7>] tcf_action_init_1+0x384/0x4c0 [<00000000e89e3ded>] tcf_action_init+0x12b/0x1a0 [<00000000c1c8c0f8>] tcf_action_add+0x73/0x170 [<0000000095a9fc28>] tc_ctl_action+0x122/0x160 [<000000004bebeac5>] rtnetlink_rcv_msg+0x263/0x2d0 [<000000009fd862dd>] netlink_rcv_skb+0x4a/0x110 [<00000000b55199e7>] netlink_unicast+0x1a0/0x250 [<000000004996cd21>] netlink_sendmsg+0x2c1/0x3c0 [<000000004d6a94b4>] sock_sendmsg+0x36/0x40 [<000000005d9f0208>] ___sys_sendmsg+0x280/0x2f0 [<00000000dec19023>] __sys_sendmsg+0x5e/0xa0 [<000000004b82ac81>] do_syscall_64+0x5b/0x180 [<00000000a0f1209a>] entry_SYSCALL_64_after_hwframe+0x44/0xa9 [<000000002926b2ab>] 0xffffffffffffffff when the tunnel_key action is replaced, the kernel forgets to release the dst metadata: ensure they are released by tunnel_key_init(), the same way it's done in tunnel_key_release(). Fixes: d0f6dd8a914f4 ("net/sched: Introduce act_tunnel_key") Signed-off-by: Davide Caratti Acked-by: Cong Wang Signed-off-by: David S. Miller --- net/sched/act_tunnel_key.c | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) (limited to 'net') diff --git a/net/sched/act_tunnel_key.c b/net/sched/act_tunnel_key.c index c3b90fadaff6..8b43fe0130f7 100644 --- a/net/sched/act_tunnel_key.c +++ b/net/sched/act_tunnel_key.c @@ -197,6 +197,15 @@ static const struct nla_policy tunnel_key_policy[TCA_TUNNEL_KEY_MAX + 1] = { [TCA_TUNNEL_KEY_ENC_TTL] = { .type = NLA_U8 }, }; +static void tunnel_key_release_params(struct tcf_tunnel_key_params *p) +{ + if (!p) + return; + if (p->tcft_action == TCA_TUNNEL_KEY_ACT_SET) + dst_release(&p->tcft_enc_metadata->dst); + kfree_rcu(p, rcu); +} + static int tunnel_key_init(struct net *net, struct nlattr *nla, struct nlattr *est, struct tc_action **a, int ovr, int bind, bool rtnl_held, @@ -360,8 +369,7 @@ static int tunnel_key_init(struct net *net, struct nlattr *nla, rcu_swap_protected(t->params, params_new, lockdep_is_held(&t->tcf_lock)); spin_unlock_bh(&t->tcf_lock); - if (params_new) - kfree_rcu(params_new, rcu); + tunnel_key_release_params(params_new); if (ret == ACT_P_CREATED) tcf_idr_insert(tn, *a); @@ -385,12 +393,7 @@ static void tunnel_key_release(struct tc_action *a) struct tcf_tunnel_key_params *params; params = rcu_dereference_protected(t->params, 1); - if (params) { - if (params->tcft_action == TCA_TUNNEL_KEY_ACT_SET) - dst_release(¶ms->tcft_enc_metadata->dst); - - kfree_rcu(params, rcu); - } + tunnel_key_release_params(params); } static int tunnel_key_geneve_opts_dump(struct sk_buff *skb, -- cgit v1.2.3 From 13d7f46386e060df31b727c9975e38306fa51e7a Mon Sep 17 00:00:00 2001 From: Willem de Bruijn Date: Thu, 10 Jan 2019 14:40:33 -0500 Subject: tcp: allow MSG_ZEROCOPY transmission also in CLOSE_WAIT state TCP transmission with MSG_ZEROCOPY fails if the peer closes its end of the connection and so transitions this socket to CLOSE_WAIT state. Transmission in close wait state is acceptable. Other similar tests in the stack (e.g., in FastOpen) accept both states. Relax this test, too. Link: https://www.mail-archive.com/netdev@vger.kernel.org/msg276886.html Link: https://www.mail-archive.com/netdev@vger.kernel.org/msg227390.html Fixes: f214f915e7db ("tcp: enable MSG_ZEROCOPY") Reported-by: Marek Majkowski Signed-off-by: Willem de Bruijn CC: Yuchung Cheng CC: Neal Cardwell CC: Soheil Hassas Yeganeh CC: Alexey Kodanev Acked-by: Soheil Hassas Yeganeh Reviewed-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/tcp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 27e2f6837062..2079145a3b7c 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -1186,7 +1186,7 @@ int tcp_sendmsg_locked(struct sock *sk, struct msghdr *msg, size_t size) flags = msg->msg_flags; if (flags & MSG_ZEROCOPY && size && sock_flag(sk, SOCK_ZEROCOPY)) { - if (sk->sk_state != TCP_ESTABLISHED) { + if ((1 << sk->sk_state) & ~(TCPF_ESTABLISHED | TCPF_CLOSE_WAIT)) { err = -EINVAL; goto out_err; } -- cgit v1.2.3 From 26fc181e6cacacd4837da7ffe0c871134a421600 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 11 Jan 2019 06:27:35 -0800 Subject: fou, fou6: do not assume linear skbs Both gue_err() and gue6_err() incorrectly assume linear skbs. Fix them to use pskb_may_pull(). BUG: KMSAN: uninit-value in gue6_err+0x475/0xc40 net/ipv6/fou6.c:101 CPU: 0 PID: 18083 Comm: syz-executor1 Not tainted 5.0.0-rc1+ #7 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Call Trace: __dump_stack lib/dump_stack.c:77 [inline] dump_stack+0x173/0x1d0 lib/dump_stack.c:113 kmsan_report+0x12e/0x2a0 mm/kmsan/kmsan.c:600 __msan_warning+0x82/0xf0 mm/kmsan/kmsan_instr.c:313 gue6_err+0x475/0xc40 net/ipv6/fou6.c:101 __udp6_lib_err_encap_no_sk net/ipv6/udp.c:434 [inline] __udp6_lib_err_encap net/ipv6/udp.c:491 [inline] __udp6_lib_err+0x18d0/0x2590 net/ipv6/udp.c:522 udplitev6_err+0x118/0x130 net/ipv6/udplite.c:27 icmpv6_notify+0x462/0x9f0 net/ipv6/icmp.c:784 icmpv6_rcv+0x18ac/0x3fa0 net/ipv6/icmp.c:872 ip6_protocol_deliver_rcu+0xb5a/0x23a0 net/ipv6/ip6_input.c:394 ip6_input_finish net/ipv6/ip6_input.c:434 [inline] NF_HOOK include/linux/netfilter.h:289 [inline] ip6_input+0x2b6/0x350 net/ipv6/ip6_input.c:443 dst_input include/net/dst.h:450 [inline] ip6_rcv_finish+0x4e7/0x6d0 net/ipv6/ip6_input.c:76 NF_HOOK include/linux/netfilter.h:289 [inline] ipv6_rcv+0x34b/0x3f0 net/ipv6/ip6_input.c:272 __netif_receive_skb_one_core net/core/dev.c:4973 [inline] __netif_receive_skb net/core/dev.c:5083 [inline] process_backlog+0x756/0x10e0 net/core/dev.c:5923 napi_poll net/core/dev.c:6346 [inline] net_rx_action+0x78b/0x1a60 net/core/dev.c:6412 __do_softirq+0x53f/0x93a kernel/softirq.c:293 do_softirq_own_stack+0x49/0x80 arch/x86/entry/entry_64.S:1039 do_softirq kernel/softirq.c:338 [inline] __local_bh_enable_ip+0x16f/0x1a0 kernel/softirq.c:190 local_bh_enable+0x36/0x40 include/linux/bottom_half.h:32 rcu_read_unlock_bh include/linux/rcupdate.h:696 [inline] ip6_finish_output2+0x1d64/0x25f0 net/ipv6/ip6_output.c:121 ip6_finish_output+0xae4/0xbc0 net/ipv6/ip6_output.c:154 NF_HOOK_COND include/linux/netfilter.h:278 [inline] ip6_output+0x5ca/0x710 net/ipv6/ip6_output.c:171 dst_output include/net/dst.h:444 [inline] ip6_local_out+0x164/0x1d0 net/ipv6/output_core.c:176 ip6_send_skb+0xfa/0x390 net/ipv6/ip6_output.c:1727 udp_v6_send_skb+0x1733/0x1d20 net/ipv6/udp.c:1169 udpv6_sendmsg+0x424e/0x45d0 net/ipv6/udp.c:1466 inet_sendmsg+0x54a/0x720 net/ipv4/af_inet.c:798 sock_sendmsg_nosec net/socket.c:621 [inline] sock_sendmsg net/socket.c:631 [inline] ___sys_sendmsg+0xdb9/0x11b0 net/socket.c:2116 __sys_sendmmsg+0x580/0xad0 net/socket.c:2211 __do_sys_sendmmsg net/socket.c:2240 [inline] __se_sys_sendmmsg+0xbd/0xe0 net/socket.c:2237 __x64_sys_sendmmsg+0x56/0x70 net/socket.c:2237 do_syscall_64+0xbc/0xf0 arch/x86/entry/common.c:291 entry_SYSCALL_64_after_hwframe+0x63/0xe7 RIP: 0033:0x457ec9 Code: 6d b7 fb ff c3 66 2e 0f 1f 84 00 00 00 00 00 66 90 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 0f 83 3b b7 fb ff c3 66 2e 0f 1f 84 00 00 00 00 RSP: 002b:00007f4a5204fc78 EFLAGS: 00000246 ORIG_RAX: 0000000000000133 RAX: ffffffffffffffda RBX: 0000000000000004 RCX: 0000000000457ec9 RDX: 00000000040001ab RSI: 0000000020000240 RDI: 0000000000000003 RBP: 000000000073bf00 R08: 0000000000000000 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000246 R12: 00007f4a520506d4 R13: 00000000004c4ce5 R14: 00000000004d85d8 R15: 00000000ffffffff Uninit was created at: kmsan_save_stack_with_flags mm/kmsan/kmsan.c:205 [inline] kmsan_internal_poison_shadow+0x92/0x150 mm/kmsan/kmsan.c:159 kmsan_kmalloc+0xa6/0x130 mm/kmsan/kmsan_hooks.c:176 kmsan_slab_alloc+0xe/0x10 mm/kmsan/kmsan_hooks.c:185 slab_post_alloc_hook mm/slab.h:446 [inline] slab_alloc_node mm/slub.c:2754 [inline] __kmalloc_node_track_caller+0xe9e/0xff0 mm/slub.c:4377 __kmalloc_reserve net/core/skbuff.c:140 [inline] __alloc_skb+0x309/0xa20 net/core/skbuff.c:208 alloc_skb include/linux/skbuff.h:1012 [inline] alloc_skb_with_frags+0x1c7/0xac0 net/core/skbuff.c:5288 sock_alloc_send_pskb+0xafd/0x10a0 net/core/sock.c:2091 sock_alloc_send_skb+0xca/0xe0 net/core/sock.c:2108 __ip6_append_data+0x42ed/0x5dc0 net/ipv6/ip6_output.c:1443 ip6_append_data+0x3c2/0x650 net/ipv6/ip6_output.c:1619 icmp6_send+0x2f5c/0x3c40 net/ipv6/icmp.c:574 icmpv6_send+0xe5/0x110 net/ipv6/ip6_icmp.c:43 ip6_link_failure+0x5c/0x2c0 net/ipv6/route.c:2231 dst_link_failure include/net/dst.h:427 [inline] vti_xmit net/ipv4/ip_vti.c:229 [inline] vti_tunnel_xmit+0xf3b/0x1ea0 net/ipv4/ip_vti.c:265 __netdev_start_xmit include/linux/netdevice.h:4382 [inline] netdev_start_xmit include/linux/netdevice.h:4391 [inline] xmit_one net/core/dev.c:3278 [inline] dev_hard_start_xmit+0x604/0xc40 net/core/dev.c:3294 __dev_queue_xmit+0x2e48/0x3b80 net/core/dev.c:3864 dev_queue_xmit+0x4b/0x60 net/core/dev.c:3897 neigh_direct_output+0x42/0x50 net/core/neighbour.c:1511 neigh_output include/net/neighbour.h:508 [inline] ip6_finish_output2+0x1d4e/0x25f0 net/ipv6/ip6_output.c:120 ip6_finish_output+0xae4/0xbc0 net/ipv6/ip6_output.c:154 NF_HOOK_COND include/linux/netfilter.h:278 [inline] ip6_output+0x5ca/0x710 net/ipv6/ip6_output.c:171 dst_output include/net/dst.h:444 [inline] ip6_local_out+0x164/0x1d0 net/ipv6/output_core.c:176 ip6_send_skb+0xfa/0x390 net/ipv6/ip6_output.c:1727 udp_v6_send_skb+0x1733/0x1d20 net/ipv6/udp.c:1169 udpv6_sendmsg+0x424e/0x45d0 net/ipv6/udp.c:1466 inet_sendmsg+0x54a/0x720 net/ipv4/af_inet.c:798 sock_sendmsg_nosec net/socket.c:621 [inline] sock_sendmsg net/socket.c:631 [inline] ___sys_sendmsg+0xdb9/0x11b0 net/socket.c:2116 __sys_sendmmsg+0x580/0xad0 net/socket.c:2211 __do_sys_sendmmsg net/socket.c:2240 [inline] __se_sys_sendmmsg+0xbd/0xe0 net/socket.c:2237 __x64_sys_sendmmsg+0x56/0x70 net/socket.c:2237 do_syscall_64+0xbc/0xf0 arch/x86/entry/common.c:291 entry_SYSCALL_64_after_hwframe+0x63/0xe7 Fixes: b8a51b38e4d4 ("fou, fou6: ICMP error handlers for FoU and GUE") Signed-off-by: Eric Dumazet Reported-by: syzbot Cc: Stefano Brivio Cc: Sabrina Dubroca Signed-off-by: David S. Miller --- net/ipv4/fou.c | 9 +++++++-- net/ipv6/fou6.c | 9 +++++++-- 2 files changed, 14 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/ipv4/fou.c b/net/ipv4/fou.c index 632863541082..437070d1ffb1 100644 --- a/net/ipv4/fou.c +++ b/net/ipv4/fou.c @@ -1020,10 +1020,11 @@ static int gue_err(struct sk_buff *skb, u32 info) { int transport_offset = skb_transport_offset(skb); struct guehdr *guehdr; - size_t optlen; + size_t len, optlen; int ret; - if (skb->len < sizeof(struct udphdr) + sizeof(struct guehdr)) + len = sizeof(struct udphdr) + sizeof(struct guehdr); + if (!pskb_may_pull(skb, len)) return -EINVAL; guehdr = (struct guehdr *)&udp_hdr(skb)[1]; @@ -1058,6 +1059,10 @@ static int gue_err(struct sk_buff *skb, u32 info) optlen = guehdr->hlen << 2; + if (!pskb_may_pull(skb, len + optlen)) + return -EINVAL; + + guehdr = (struct guehdr *)&udp_hdr(skb)[1]; if (validate_gue_flags(guehdr, optlen)) return -EINVAL; diff --git a/net/ipv6/fou6.c b/net/ipv6/fou6.c index 7da7bf3b7fe3..b858bd5280bf 100644 --- a/net/ipv6/fou6.c +++ b/net/ipv6/fou6.c @@ -90,10 +90,11 @@ static int gue6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, { int transport_offset = skb_transport_offset(skb); struct guehdr *guehdr; - size_t optlen; + size_t len, optlen; int ret; - if (skb->len < sizeof(struct udphdr) + sizeof(struct guehdr)) + len = sizeof(struct udphdr) + sizeof(struct guehdr); + if (!pskb_may_pull(skb, len)) return -EINVAL; guehdr = (struct guehdr *)&udp_hdr(skb)[1]; @@ -128,6 +129,10 @@ static int gue6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, optlen = guehdr->hlen << 2; + if (!pskb_may_pull(skb, len + optlen)) + return -EINVAL; + + guehdr = (struct guehdr *)&udp_hdr(skb)[1]; if (validate_gue_flags(guehdr, optlen)) return -EINVAL; -- cgit v1.2.3 From cd0c4e70fc0ccfa705cdf55efb27519ce9337a26 Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Fri, 11 Jan 2019 18:55:42 -0800 Subject: net_sched: refetch skb protocol for each filter Martin reported a set of filters don't work after changing from reclassify to continue. Looking into the code, it looks like skb protocol is not always fetched for each iteration of the filters. But, as demonstrated by Martin, TC actions could modify skb->protocol, for example act_vlan, this means we have to refetch skb protocol in each iteration, rather than using the one we fetch in the beginning of the loop. This bug is _not_ introduced by commit 3b3ae880266d ("net: sched: consolidate tc_classify{,_compat}"), technically, if act_vlan is the only action that modifies skb protocol, then it is commit c7e2b9689ef8 ("sched: introduce vlan action") which introduced this bug. Reported-by: Martin Olsson Cc: Jamal Hadi Salim Cc: Jiri Pirko Signed-off-by: Cong Wang Acked-by: Jamal Hadi Salim Signed-off-by: David S. Miller --- net/sched/cls_api.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'net') diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c index 8ce2a0507970..e2b5cb2eb34e 100644 --- a/net/sched/cls_api.c +++ b/net/sched/cls_api.c @@ -1277,7 +1277,6 @@ EXPORT_SYMBOL(tcf_block_cb_unregister); int tcf_classify(struct sk_buff *skb, const struct tcf_proto *tp, struct tcf_result *res, bool compat_mode) { - __be16 protocol = tc_skb_protocol(skb); #ifdef CONFIG_NET_CLS_ACT const int max_reclassify_loop = 4; const struct tcf_proto *orig_tp = tp; @@ -1287,6 +1286,7 @@ int tcf_classify(struct sk_buff *skb, const struct tcf_proto *tp, reclassify: #endif for (; tp; tp = rcu_dereference_bh(tp->next)) { + __be16 protocol = tc_skb_protocol(skb); int err; if (tp->protocol != protocol && @@ -1319,7 +1319,6 @@ reset: } tp = first_tp; - protocol = tc_skb_protocol(skb); goto reclassify; #endif } -- cgit v1.2.3 From 04a4af334b971814eedf4e4a413343ad3287d9a9 Mon Sep 17 00:00:00 2001 From: Ross Lagerwall Date: Mon, 14 Jan 2019 09:16:56 +0000 Subject: openvswitch: Avoid OOB read when parsing flow nlattrs For nested and variable attributes, the expected length of an attribute is not known and marked by a negative number. This results in an OOB read when the expected length is later used to check if the attribute is all zeros. Fix this by using the actual length of the attribute rather than the expected length. Signed-off-by: Ross Lagerwall Acked-by: Pravin B Shelar Signed-off-by: David S. Miller --- net/openvswitch/flow_netlink.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/openvswitch/flow_netlink.c b/net/openvswitch/flow_netlink.c index 435a4bdf8f89..691da853bef5 100644 --- a/net/openvswitch/flow_netlink.c +++ b/net/openvswitch/flow_netlink.c @@ -500,7 +500,7 @@ static int __parse_flow_nlattrs(const struct nlattr *attr, return -EINVAL; } - if (!nz || !is_all_zero(nla_data(nla), expected_len)) { + if (!nz || !is_all_zero(nla_data(nla), nla_len(nla))) { attrs |= 1 << type; a[type] = nla; } -- cgit v1.2.3 From 20704bd1633dd5afb29a321d3a615c9c8e9c9d05 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Mon, 14 Jan 2019 18:10:06 +0800 Subject: erspan: build the header with the right proto according to erspan_ver As said in draft-foschiano-erspan-03#section4: Different frame variants known as "ERSPAN Types" can be distinguished based on the GRE "Protocol Type" field value: Type I and II's value is 0x88BE while Type III's is 0x22EB [ETYPES]. So set it properly in erspan_xmit() according to erspan_ver. While at it, also remove the unused parameter 'proto' in erspan_fb_xmit(). Fixes: 94d7d8f29287 ("ip6_gre: add erspan v2 support") Reported-by: Jianlin Shi Signed-off-by: Xin Long Signed-off-by: David S. Miller --- net/ipv4/ip_gre.c | 23 ++++++++++++++--------- net/ipv6/ip6_gre.c | 6 ++++-- 2 files changed, 18 insertions(+), 11 deletions(-) (limited to 'net') diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index d1d09f3e5f9e..b1a74d80d868 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c @@ -569,8 +569,7 @@ err_free_skb: dev->stats.tx_dropped++; } -static void erspan_fb_xmit(struct sk_buff *skb, struct net_device *dev, - __be16 proto) +static void erspan_fb_xmit(struct sk_buff *skb, struct net_device *dev) { struct ip_tunnel *tunnel = netdev_priv(dev); struct ip_tunnel_info *tun_info; @@ -578,10 +577,10 @@ static void erspan_fb_xmit(struct sk_buff *skb, struct net_device *dev, struct erspan_metadata *md; struct rtable *rt = NULL; bool truncate = false; + __be16 df, proto; struct flowi4 fl; int tunnel_hlen; int version; - __be16 df; int nhoff; int thoff; @@ -626,18 +625,20 @@ static void erspan_fb_xmit(struct sk_buff *skb, struct net_device *dev, if (version == 1) { erspan_build_header(skb, ntohl(tunnel_id_to_key32(key->tun_id)), ntohl(md->u.index), truncate, true); + proto = htons(ETH_P_ERSPAN); } else if (version == 2) { erspan_build_header_v2(skb, ntohl(tunnel_id_to_key32(key->tun_id)), md->u.md2.dir, get_hwid(&md->u.md2), truncate, true); + proto = htons(ETH_P_ERSPAN2); } else { goto err_free_rt; } gre_build_header(skb, 8, TUNNEL_SEQ, - htons(ETH_P_ERSPAN), 0, htonl(tunnel->o_seqno++)); + proto, 0, htonl(tunnel->o_seqno++)); df = key->tun_flags & TUNNEL_DONT_FRAGMENT ? htons(IP_DF) : 0; @@ -721,12 +722,13 @@ static netdev_tx_t erspan_xmit(struct sk_buff *skb, { struct ip_tunnel *tunnel = netdev_priv(dev); bool truncate = false; + __be16 proto; if (!pskb_inet_may_pull(skb)) goto free_skb; if (tunnel->collect_md) { - erspan_fb_xmit(skb, dev, skb->protocol); + erspan_fb_xmit(skb, dev); return NETDEV_TX_OK; } @@ -742,19 +744,22 @@ static netdev_tx_t erspan_xmit(struct sk_buff *skb, } /* Push ERSPAN header */ - if (tunnel->erspan_ver == 1) + if (tunnel->erspan_ver == 1) { erspan_build_header(skb, ntohl(tunnel->parms.o_key), tunnel->index, truncate, true); - else if (tunnel->erspan_ver == 2) + proto = htons(ETH_P_ERSPAN); + } else if (tunnel->erspan_ver == 2) { erspan_build_header_v2(skb, ntohl(tunnel->parms.o_key), tunnel->dir, tunnel->hwid, truncate, true); - else + proto = htons(ETH_P_ERSPAN2); + } else { goto free_skb; + } tunnel->parms.o_flags &= ~TUNNEL_KEY; - __gre_xmit(skb, dev, &tunnel->parms.iph, htons(ETH_P_ERSPAN)); + __gre_xmit(skb, dev, &tunnel->parms.iph, proto); return NETDEV_TX_OK; free_skb: diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c index 026f08735549..b1be67ca6768 100644 --- a/net/ipv6/ip6_gre.c +++ b/net/ipv6/ip6_gre.c @@ -922,6 +922,7 @@ static netdev_tx_t ip6erspan_tunnel_xmit(struct sk_buff *skb, __u8 dsfield = false; struct flowi6 fl6; int err = -EINVAL; + __be16 proto; __u32 mtu; int nhoff; int thoff; @@ -1035,8 +1036,9 @@ static netdev_tx_t ip6erspan_tunnel_xmit(struct sk_buff *skb, } /* Push GRE header. */ - gre_build_header(skb, 8, TUNNEL_SEQ, - htons(ETH_P_ERSPAN), 0, htonl(t->o_seqno++)); + proto = (t->parms.erspan_ver == 1) ? htons(ETH_P_ERSPAN) + : htons(ETH_P_ERSPAN2); + gre_build_header(skb, 8, TUNNEL_SEQ, proto, 0, htonl(t->o_seqno++)); /* TooBig packet may have updated dst->dev's mtu */ if (!t->parms.collect_md && dst && dst_mtu(dst) > dst->dev->mtu) -- cgit v1.2.3 From 400b8b9a2a17918f8ce00786f596f530e7f30d50 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Mon, 14 Jan 2019 18:34:02 +0800 Subject: sctp: allocate sctp_sockaddr_entry with kzalloc The similar issue as fixed in Commit 4a2eb0c37b47 ("sctp: initialize sin6_flowinfo for ipv6 addrs in sctp_inet6addr_event") also exists in sctp_inetaddr_event, as Alexander noticed. To fix it, allocate sctp_sockaddr_entry with kzalloc for both sctp ipv4 and ipv6 addresses, as does in sctp_v4/6_copy_addrlist(). Reported-by: Alexander Potapenko Signed-off-by: Xin Long Reported-by: syzbot+ae0c70c0c2d40c51bb92@syzkaller.appspotmail.com Acked-by: Marcelo Ricardo Leitner Acked-by: Neil Horman Signed-off-by: David S. Miller --- net/sctp/ipv6.c | 5 +---- net/sctp/protocol.c | 4 +--- 2 files changed, 2 insertions(+), 7 deletions(-) (limited to 'net') diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c index b9ed271b7ef7..ed8e006dae85 100644 --- a/net/sctp/ipv6.c +++ b/net/sctp/ipv6.c @@ -97,11 +97,9 @@ static int sctp_inet6addr_event(struct notifier_block *this, unsigned long ev, switch (ev) { case NETDEV_UP: - addr = kmalloc(sizeof(struct sctp_sockaddr_entry), GFP_ATOMIC); + addr = kzalloc(sizeof(*addr), GFP_ATOMIC); if (addr) { addr->a.v6.sin6_family = AF_INET6; - addr->a.v6.sin6_port = 0; - addr->a.v6.sin6_flowinfo = 0; addr->a.v6.sin6_addr = ifa->addr; addr->a.v6.sin6_scope_id = ifa->idev->dev->ifindex; addr->valid = 1; @@ -434,7 +432,6 @@ static void sctp_v6_copy_addrlist(struct list_head *addrlist, addr = kzalloc(sizeof(*addr), GFP_ATOMIC); if (addr) { addr->a.v6.sin6_family = AF_INET6; - addr->a.v6.sin6_port = 0; addr->a.v6.sin6_addr = ifp->addr; addr->a.v6.sin6_scope_id = dev->ifindex; addr->valid = 1; diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c index d5878ae55840..4e0eeb113ef5 100644 --- a/net/sctp/protocol.c +++ b/net/sctp/protocol.c @@ -101,7 +101,6 @@ static void sctp_v4_copy_addrlist(struct list_head *addrlist, addr = kzalloc(sizeof(*addr), GFP_ATOMIC); if (addr) { addr->a.v4.sin_family = AF_INET; - addr->a.v4.sin_port = 0; addr->a.v4.sin_addr.s_addr = ifa->ifa_local; addr->valid = 1; INIT_LIST_HEAD(&addr->list); @@ -776,10 +775,9 @@ static int sctp_inetaddr_event(struct notifier_block *this, unsigned long ev, switch (ev) { case NETDEV_UP: - addr = kmalloc(sizeof(struct sctp_sockaddr_entry), GFP_ATOMIC); + addr = kzalloc(sizeof(*addr), GFP_ATOMIC); if (addr) { addr->a.v4.sin_family = AF_INET; - addr->a.v4.sin_port = 0; addr->a.v4.sin_addr.s_addr = ifa->ifa_local; addr->valid = 1; spin_lock_bh(&net->sctp.local_addr_lock); -- cgit v1.2.3 From 01b833ab44c9e484060aad72267fc7e71beb559b Mon Sep 17 00:00:00 2001 From: Konstantin Khlebnikov Date: Mon, 14 Jan 2019 13:38:43 +0300 Subject: net/core/neighbour: fix kmemleak minimal reference count for hash tables This should be 1 for normal allocations, 0 disables leak reporting. Signed-off-by: Konstantin Khlebnikov Reported-by: Cong Wang Fixes: 85704cb8dcfd ("net/core/neighbour: tell kmemleak about hash tables") Signed-off-by: David S. Miller --- net/core/neighbour.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/core/neighbour.c b/net/core/neighbour.c index 3e27a779f288..96fdc9134726 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -450,7 +450,7 @@ static struct neigh_hash_table *neigh_hash_alloc(unsigned int shift) buckets = (struct neighbour __rcu **) __get_free_pages(GFP_ATOMIC | __GFP_ZERO, get_order(size)); - kmemleak_alloc(buckets, size, 0, GFP_ATOMIC); + kmemleak_alloc(buckets, size, 1, GFP_ATOMIC); } if (!buckets) { kfree(ret); -- cgit v1.2.3 From a5a82d841186d13c4a6d500dfcf7d02b4195e3ff Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Mon, 14 Jan 2019 10:52:45 -0800 Subject: ipv6: route: place a warning with duplicated string with correct extack "IPv6: " prefix is already added by pr_fmt, no need to include it again in the pr_warn() format. The message predates extack support, we can replace the whole thing with an extack message. Suggested-by: David Ahern Signed-off-by: Jakub Kicinski Reviewed-by: David Ahern Signed-off-by: David S. Miller --- net/ipv6/route.c | 14 ++------------ 1 file changed, 2 insertions(+), 12 deletions(-) (limited to 'net') diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 40b225f87d5e..964491cf3672 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -4251,17 +4251,6 @@ struct rt6_nh { struct list_head next; }; -static void ip6_print_replace_route_err(struct list_head *rt6_nh_list) -{ - struct rt6_nh *nh; - - list_for_each_entry(nh, rt6_nh_list, next) { - pr_warn("IPV6: multipath route replace failed (check consistency of installed routes): %pI6c nexthop %pI6c ifi %d\n", - &nh->r_cfg.fc_dst, &nh->r_cfg.fc_gateway, - nh->r_cfg.fc_ifindex); - } -} - static int ip6_route_info_append(struct net *net, struct list_head *rt6_nh_list, struct fib6_info *rt, @@ -4407,7 +4396,8 @@ static int ip6_route_multipath_add(struct fib6_config *cfg, nh->fib6_info = NULL; if (err) { if (replace && nhn) - ip6_print_replace_route_err(&rt6_nh_list); + NL_SET_ERR_MSG_MOD(extack, + "multipath route replace failed (check consistency of installed routes)"); err_nh = nh; goto add_errout; } -- cgit v1.2.3 From 1a9352687c19e4937d861ff2c5c6fc45c0a08aff Mon Sep 17 00:00:00 2001 From: Taehee Yoo Date: Wed, 16 Jan 2019 01:35:22 +0900 Subject: net: bpfilter: change section name of bpfilter UMH blob. The section of bpfilter UMH blob is the ".bpfilter_umh". but this is not an explicit section. so linking warning occurred at compile time for the powerpc. So, this patch makes use of the ".rodata" instead of the ".bpfilter_umh". Config condition: CONFIG_BPFILTER=y CONFIG_BPFILTER_UMH=y Result: ld: warning: orphan section `.bpfilter_umh' from `net/bpfilter/bpfilter_umh_blob.o' being placed in section `.bpfilter_umh' Fixes: 61fbf5933d42 ("net: bpfilter: restart bpfilter_umh when error occurred") Reported-by: Stephen Rothwell Signed-off-by: Taehee Yoo Signed-off-by: David S. Miller --- net/bpfilter/bpfilter_umh_blob.S | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/bpfilter/bpfilter_umh_blob.S b/net/bpfilter/bpfilter_umh_blob.S index 7f1c521dcc2f..9ea6100dca87 100644 --- a/net/bpfilter/bpfilter_umh_blob.S +++ b/net/bpfilter/bpfilter_umh_blob.S @@ -1,5 +1,5 @@ /* SPDX-License-Identifier: GPL-2.0 */ - .section .bpfilter_umh, "a" + .section .rodata, "a" .global bpfilter_umh_start bpfilter_umh_start: .incbin "net/bpfilter/bpfilter_umh" -- cgit v1.2.3 From 0f149c9fec3cd720628ecde83bfc6f64c1e7dcb6 Mon Sep 17 00:00:00 2001 From: Willem de Bruijn Date: Tue, 15 Jan 2019 11:40:02 -0500 Subject: udp: with udp_segment release on error path Failure __ip_append_data triggers udp_flush_pending_frames, but these tests happen later. The skb must be freed directly. Fixes: bec1f6f697362 ("udp: generate gso with UDP_SEGMENT") Reported-by: Eric Dumazet Signed-off-by: Willem de Bruijn Signed-off-by: David S. Miller --- net/ipv4/udp.c | 16 ++++++++++++---- net/ipv6/udp.c | 16 ++++++++++++---- 2 files changed, 24 insertions(+), 8 deletions(-) (limited to 'net') diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 3fb0ed5e4789..3d2a81bdc2ab 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -847,15 +847,23 @@ static int udp_send_skb(struct sk_buff *skb, struct flowi4 *fl4, const int hlen = skb_network_header_len(skb) + sizeof(struct udphdr); - if (hlen + cork->gso_size > cork->fragsize) + if (hlen + cork->gso_size > cork->fragsize) { + kfree_skb(skb); return -EINVAL; - if (skb->len > cork->gso_size * UDP_MAX_SEGMENTS) + } + if (skb->len > cork->gso_size * UDP_MAX_SEGMENTS) { + kfree_skb(skb); return -EINVAL; - if (sk->sk_no_check_tx) + } + if (sk->sk_no_check_tx) { + kfree_skb(skb); return -EINVAL; + } if (skb->ip_summed != CHECKSUM_PARTIAL || is_udplite || - dst_xfrm(skb_dst(skb))) + dst_xfrm(skb_dst(skb))) { + kfree_skb(skb); return -EIO; + } skb_shinfo(skb)->gso_size = cork->gso_size; skb_shinfo(skb)->gso_type = SKB_GSO_UDP_L4; diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 7c3505006f8e..e1f2b9660666 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -1132,15 +1132,23 @@ static int udp_v6_send_skb(struct sk_buff *skb, struct flowi6 *fl6, const int hlen = skb_network_header_len(skb) + sizeof(struct udphdr); - if (hlen + cork->gso_size > cork->fragsize) + if (hlen + cork->gso_size > cork->fragsize) { + kfree_skb(skb); return -EINVAL; - if (skb->len > cork->gso_size * UDP_MAX_SEGMENTS) + } + if (skb->len > cork->gso_size * UDP_MAX_SEGMENTS) { + kfree_skb(skb); return -EINVAL; - if (udp_sk(sk)->no_check6_tx) + } + if (udp_sk(sk)->no_check6_tx) { + kfree_skb(skb); return -EINVAL; + } if (skb->ip_summed != CHECKSUM_PARTIAL || is_udplite || - dst_xfrm(skb_dst(skb))) + dst_xfrm(skb_dst(skb))) { + kfree_skb(skb); return -EIO; + } skb_shinfo(skb)->gso_size = cork->gso_size; skb_shinfo(skb)->gso_type = SKB_GSO_UDP_L4; -- cgit v1.2.3 From c61c27687a5abce11431e6de1adb6e36099b9859 Mon Sep 17 00:00:00 2001 From: Mathieu Malaterre Date: Wed, 16 Jan 2019 20:35:41 +0100 Subject: bpf: Correctly annotate implicit fall through in bpf_base_func_proto MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit There is a plan to build the kernel with -Wimplicit-fallthrough and this place in the code produced a warnings (W=1). To preserve as much of the existing comment only change a ‘:’ into a ‘,’. This is enough change, to match the regular expression expected by GCC. This commit removes the following warning: net/core/filter.c:5310:6: warning: this statement may fall through [-Wimplicit-fallthrough=] Signed-off-by: Mathieu Malaterre Signed-off-by: Daniel Borkmann --- net/core/filter.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/core/filter.c b/net/core/filter.c index 2b3b436ef545..d9076e609fca 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -5309,7 +5309,7 @@ bpf_base_func_proto(enum bpf_func_id func_id) case BPF_FUNC_trace_printk: if (capable(CAP_SYS_ADMIN)) return bpf_get_trace_printk_proto(); - /* else: fall through */ + /* else, fall through */ default: return NULL; } -- cgit v1.2.3 From 2cddd20147826aef283115abb00012d4dafe3cdb Mon Sep 17 00:00:00 2001 From: Ivan Vecera Date: Wed, 16 Jan 2019 16:53:52 +0100 Subject: net/sched: cls_flower: allocate mask dynamically in fl_change() Recent changes (especially 05cd271fd61a ("cls_flower: Support multiple masks per priority")) in the fl_flow_mask structure grow it and its current size e.g. on x86_64 with defconfig is 760 bytes and more than 1024 bytes with some debug options enabled. Prior the mentioned commit its size was 176 bytes (using defconfig on x86_64). With regard to this fact it's reasonable to allocate this structure dynamically in fl_change() to reduce its stack size. v2: - use kzalloc() instead of kcalloc() Fixes: 05cd271fd61a ("cls_flower: Support multiple masks per priority") Cc: Jiri Pirko Cc: Paul Blakey Acked-by: Jiri Pirko Signed-off-by: Ivan Vecera Signed-off-by: David S. Miller --- net/sched/cls_flower.c | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) (limited to 'net') diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c index dad04e710493..f6aa57fbbbaf 100644 --- a/net/sched/cls_flower.c +++ b/net/sched/cls_flower.c @@ -1290,17 +1290,23 @@ static int fl_change(struct net *net, struct sk_buff *in_skb, struct cls_fl_head *head = rtnl_dereference(tp->root); struct cls_fl_filter *fold = *arg; struct cls_fl_filter *fnew; + struct fl_flow_mask *mask; struct nlattr **tb; - struct fl_flow_mask mask = {}; int err; if (!tca[TCA_OPTIONS]) return -EINVAL; - tb = kcalloc(TCA_FLOWER_MAX + 1, sizeof(struct nlattr *), GFP_KERNEL); - if (!tb) + mask = kzalloc(sizeof(struct fl_flow_mask), GFP_KERNEL); + if (!mask) return -ENOBUFS; + tb = kcalloc(TCA_FLOWER_MAX + 1, sizeof(struct nlattr *), GFP_KERNEL); + if (!tb) { + err = -ENOBUFS; + goto errout_mask_alloc; + } + err = nla_parse_nested(tb, TCA_FLOWER_MAX, tca[TCA_OPTIONS], fl_policy, NULL); if (err < 0) @@ -1343,12 +1349,12 @@ static int fl_change(struct net *net, struct sk_buff *in_skb, } } - err = fl_set_parms(net, tp, fnew, &mask, base, tb, tca[TCA_RATE], ovr, + err = fl_set_parms(net, tp, fnew, mask, base, tb, tca[TCA_RATE], ovr, tp->chain->tmplt_priv, extack); if (err) goto errout_idr; - err = fl_check_assign_mask(head, fnew, fold, &mask); + err = fl_check_assign_mask(head, fnew, fold, mask); if (err) goto errout_idr; @@ -1392,6 +1398,7 @@ static int fl_change(struct net *net, struct sk_buff *in_skb, } kfree(tb); + kfree(mask); return 0; errout_mask: @@ -1405,6 +1412,8 @@ errout: kfree(fnew); errout_tb: kfree(tb); +errout_mask_alloc: + kfree(mask); return err; } -- cgit v1.2.3 From 8f6b5392856a4b74224e257f3e0874a163b04603 Mon Sep 17 00:00:00 2001 From: Alexey Kodanev Date: Wed, 16 Jan 2019 19:17:44 +0300 Subject: udp: add missing rehash callback to udplite After commit 4cdeeee9252a ("net: udp: prefer listeners bound to an address"), UDP-Lite only works when specifying a local address for the sockets. This is related to the problem addressed in the commit 719f835853a9 ("udp: add rehash on connect()"). Moreover, __udp4_lib_lookup() now looks for a socket immediately in the secondary hash table. The issue was found with LTP/network tests (UDP-Lite test-cases). Fixes: 4cdeeee9252a ("net: udp: prefer listeners bound to an address") Signed-off-by: Alexey Kodanev Reviewed-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/udp.c | 2 +- net/ipv4/udp_impl.h | 1 + net/ipv4/udplite.c | 1 + 3 files changed, 3 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 3d2a81bdc2ab..5c3cd5d84a6f 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -1926,7 +1926,7 @@ void udp_lib_rehash(struct sock *sk, u16 newhash) } EXPORT_SYMBOL(udp_lib_rehash); -static void udp_v4_rehash(struct sock *sk) +void udp_v4_rehash(struct sock *sk) { u16 new_hash = ipv4_portaddr_hash(sock_net(sk), inet_sk(sk)->inet_rcv_saddr, diff --git a/net/ipv4/udp_impl.h b/net/ipv4/udp_impl.h index 322672655419..6b2fa77eeb1c 100644 --- a/net/ipv4/udp_impl.h +++ b/net/ipv4/udp_impl.h @@ -10,6 +10,7 @@ int __udp4_lib_rcv(struct sk_buff *, struct udp_table *, int); int __udp4_lib_err(struct sk_buff *, u32, struct udp_table *); int udp_v4_get_port(struct sock *sk, unsigned short snum); +void udp_v4_rehash(struct sock *sk); int udp_setsockopt(struct sock *sk, int level, int optname, char __user *optval, unsigned int optlen); diff --git a/net/ipv4/udplite.c b/net/ipv4/udplite.c index 39c7f17d916f..3c94b8f0ff27 100644 --- a/net/ipv4/udplite.c +++ b/net/ipv4/udplite.c @@ -53,6 +53,7 @@ struct proto udplite_prot = { .sendpage = udp_sendpage, .hash = udp_lib_hash, .unhash = udp_lib_unhash, + .rehash = udp_v4_rehash, .get_port = udp_v4_get_port, .memory_allocated = &udp_memory_allocated, .sysctl_mem = sysctl_udp_mem, -- cgit v1.2.3 From f7c46156f4a9d6ba5c6bcc5c48945e87b0f08c65 Mon Sep 17 00:00:00 2001 From: Alexey Kodanev Date: Wed, 16 Jan 2019 19:17:45 +0300 Subject: udp6: add missing rehash callback to udplite After commit 23b0269e58ae ("net: udp6: prefer listeners bound to an address"), UDP-Lite only works when specifying a local address for the sockets. This is related to the problem addressed in the commit 719f835853a9 ("udp: add rehash on connect()"). Moreover, __udp6_lib_lookup() now looks for a socket immediately in the secondary hash table. And this issue was found with LTP/network tests as well. Fixes: 23b0269e58ae ("net: udp6: prefer listeners bound to an address") Signed-off-by: Alexey Kodanev Reviewed-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv6/udp.c | 2 +- net/ipv6/udp_impl.h | 1 + net/ipv6/udplite.c | 1 + 3 files changed, 3 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index e1f2b9660666..2596ffdeebea 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -102,7 +102,7 @@ int udp_v6_get_port(struct sock *sk, unsigned short snum) return udp_lib_get_port(sk, snum, hash2_nulladdr); } -static void udp_v6_rehash(struct sock *sk) +void udp_v6_rehash(struct sock *sk) { u16 new_hash = ipv6_portaddr_hash(sock_net(sk), &sk->sk_v6_rcv_saddr, diff --git a/net/ipv6/udp_impl.h b/net/ipv6/udp_impl.h index 5730e6503cb4..20e324b6f358 100644 --- a/net/ipv6/udp_impl.h +++ b/net/ipv6/udp_impl.h @@ -13,6 +13,7 @@ int __udp6_lib_err(struct sk_buff *, struct inet6_skb_parm *, u8, u8, int, __be32, struct udp_table *); int udp_v6_get_port(struct sock *sk, unsigned short snum); +void udp_v6_rehash(struct sock *sk); int udpv6_getsockopt(struct sock *sk, int level, int optname, char __user *optval, int __user *optlen); diff --git a/net/ipv6/udplite.c b/net/ipv6/udplite.c index a125aebc29e5..f35907836444 100644 --- a/net/ipv6/udplite.c +++ b/net/ipv6/udplite.c @@ -49,6 +49,7 @@ struct proto udplitev6_prot = { .recvmsg = udpv6_recvmsg, .hash = udp_lib_hash, .unhash = udp_lib_unhash, + .rehash = udp_v6_rehash, .get_port = udp_v6_get_port, .memory_allocated = &udp_memory_allocated, .sysctl_mem = sysctl_udp_mem, -- cgit v1.2.3 From f4924f24da8c7ef64195096817f3cde324091d97 Mon Sep 17 00:00:00 2001 From: Peter Oskolkov Date: Wed, 16 Jan 2019 08:47:54 -0800 Subject: bpf: bpf_setsockopt: reset sock dst on SO_MARK changes MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In sock_setsockopt() (net/core/sock.h), when SO_MARK option is used to change sk_mark, sk_dst_reset(sk) is called. The same should be done in bpf_setsockopt(). Fixes: 8c4b4c7e9ff0 ("bpf: Add setsockopt helper function to bpf") Reported-by: Maciej Żenczykowski Signed-off-by: Peter Oskolkov Acked-by: Martin KaFai Lau Reviewed-by: Maciej Żenczykowski Signed-off-by: Daniel Borkmann --- net/core/filter.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/core/filter.c b/net/core/filter.c index d9076e609fca..d9ea51b47f35 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -4132,7 +4132,10 @@ BPF_CALL_5(bpf_setsockopt, struct bpf_sock_ops_kern *, bpf_sock, sk->sk_rcvlowat = val ? : 1; break; case SO_MARK: - sk->sk_mark = val; + if (sk->sk_mark != val) { + sk->sk_mark = val; + sk_dst_reset(sk); + } break; default: ret = -EINVAL; -- cgit v1.2.3 From e224c390a6259c529f7b2a6bd215a087b3344f5c Mon Sep 17 00:00:00 2001 From: Yuchung Cheng Date: Thu, 17 Jan 2019 08:51:01 -0800 Subject: bpf: fix SO_MAX_PACING_RATE to support TCP internal pacing If sch_fq packet scheduler is not used, TCP can fallback to internal pacing, but this requires sk_pacing_status to be properly set. Fixes: 8c4b4c7e9ff0 ("bpf: Add setsockopt helper function to bpf") Signed-off-by: Yuchung Cheng Signed-off-by: Eric Dumazet Cc: Lawrence Brakmo Acked-by: Martin KaFai Lau Signed-off-by: Daniel Borkmann --- net/core/filter.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'net') diff --git a/net/core/filter.c b/net/core/filter.c index d9ea51b47f35..dab10d21cae8 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -4119,6 +4119,10 @@ BPF_CALL_5(bpf_setsockopt, struct bpf_sock_ops_kern *, bpf_sock, sk->sk_sndbuf = max_t(int, val * 2, SOCK_MIN_SNDBUF); break; case SO_MAX_PACING_RATE: /* 32bit version */ + if (val != ~0U) + cmpxchg(&sk->sk_pacing_status, + SK_PACING_NONE, + SK_PACING_NEEDED); sk->sk_max_pacing_rate = (val == ~0U) ? ~0UL : val; sk->sk_pacing_rate = min(sk->sk_pacing_rate, sk->sk_max_pacing_rate); -- cgit v1.2.3 From 88a8121dc1d3d0dbddd411b79ed236b6b6ea415c Mon Sep 17 00:00:00 2001 From: Nicolas Dichtel Date: Thu, 17 Jan 2019 11:27:22 +0100 Subject: af_packet: fix raw sockets over 6in4 tunnel Since commit cb9f1b783850, scapy (which uses an AF_PACKET socket in SOCK_RAW mode) is unable to send a basic icmp packet over a sit tunnel: Here is a example of the setup: $ ip link set ntfp2 up $ ip addr add 10.125.0.1/24 dev ntfp2 $ ip tunnel add tun1 mode sit ttl 64 local 10.125.0.1 remote 10.125.0.2 dev ntfp2 $ ip addr add fd00:cafe:cafe::1/128 dev tun1 $ ip link set dev tun1 up $ ip route add fd00:200::/64 dev tun1 $ scapy >>> p = [] >>> p += IPv6(src='fd00:100::1', dst='fd00:200::1')/ICMPv6EchoRequest() >>> send(p, count=1, inter=0.1) >>> quit() $ ip -s link ls dev tun1 | grep -A1 "TX.*errors" TX: bytes packets errors dropped carrier collsns 0 0 1 0 0 0 The problem is that the network offset is set to the hard_header_len of the output device (tun1, ie 14 + 20) and in our case, because the packet is small (48 bytes) the pskb_inet_may_pull() fails (it tries to pull 40 bytes (ipv6 header) starting from the network offset). This problem is more generally related to device with variable hard header length. To avoid a too intrusive patch in the current release, a (ugly) workaround is proposed in this patch. It has to be cleaned up in net-next. Link: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=993675a3100b1 Link: http://patchwork.ozlabs.org/patch/1024489/ Fixes: cb9f1b783850 ("ip: validate header length on virtual device xmit") CC: Willem de Bruijn CC: Maxim Mikityanskiy Signed-off-by: Nicolas Dichtel Acked-by: Willem de Bruijn Signed-off-by: David S. Miller --- net/packet/af_packet.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index d0945253f43b..3b1a78906bc0 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -2887,7 +2887,8 @@ static int packet_snd(struct socket *sock, struct msghdr *msg, size_t len) goto out_free; } else if (reserve) { skb_reserve(skb, -reserve); - if (len < reserve) + if (len < reserve + sizeof(struct ipv6hdr) && + dev->min_header_len != dev->hard_header_len) skb_reset_network_header(skb); } -- cgit v1.2.3 From 28c1382fa28f2e2d9d0d6f25ae879b5af2ecbd03 Mon Sep 17 00:00:00 2001 From: Yunjian Wang Date: Thu, 17 Jan 2019 09:46:41 +0800 Subject: net: bridge: Fix ethernet header pointer before check skb forwardable The skb header should be set to ethernet header before using is_skb_forwardable. Because the ethernet header length has been considered in is_skb_forwardable(including dev->hard_header_len length). To reproduce the issue: 1, add 2 ports on linux bridge br using following commands: $ brctl addbr br $ brctl addif br eth0 $ brctl addif br eth1 2, the MTU of eth0 and eth1 is 1500 3, send a packet(Data 1480, UDP 8, IP 20, Ethernet 14, VLAN 4) from eth0 to eth1 So the expect result is packet larger than 1500 cannot pass through eth0 and eth1. But currently, the packet passes through success, it means eth1's MTU limit doesn't take effect. Fixes: f6367b4660dd ("bridge: use is_skb_forwardable in forward path") Cc: bridge@lists.linux-foundation.org Cc: Nkolay Aleksandrov Cc: Roopa Prabhu Cc: Stephen Hemminger Signed-off-by: Yunjian Wang Signed-off-by: David S. Miller --- net/bridge/br_forward.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) (limited to 'net') diff --git a/net/bridge/br_forward.c b/net/bridge/br_forward.c index 2cb8da465b98..48ddc60b4fbd 100644 --- a/net/bridge/br_forward.c +++ b/net/bridge/br_forward.c @@ -36,10 +36,10 @@ static inline int should_deliver(const struct net_bridge_port *p, int br_dev_queue_push_xmit(struct net *net, struct sock *sk, struct sk_buff *skb) { + skb_push(skb, ETH_HLEN); if (!is_skb_forwardable(skb->dev, skb)) goto drop; - skb_push(skb, ETH_HLEN); br_drop_fake_rtable(skb); if (skb->ip_summed == CHECKSUM_PARTIAL && @@ -98,12 +98,11 @@ static void __br_forward(const struct net_bridge_port *to, net = dev_net(indev); } else { if (unlikely(netpoll_tx_running(to->br->dev))) { - if (!is_skb_forwardable(skb->dev, skb)) { + skb_push(skb, ETH_HLEN); + if (!is_skb_forwardable(skb->dev, skb)) kfree_skb(skb); - } else { - skb_push(skb, ETH_HLEN); + else br_netpoll_send_skb(to, skb); - } return; } br_hook = NF_BR_LOCAL_OUT; -- cgit v1.2.3 From 87fff3cacd0112bcaf42f932c1e44ae32b42f1fb Mon Sep 17 00:00:00 2001 From: Yang Wei Date: Thu, 17 Jan 2019 23:11:30 +0800 Subject: neighbour: Do not perturb drop profiles when neigh_probe Replace the kfree_skb() by consume_skb() to be drop monitor(dropwatch, perf) friendly. Signed-off-by: Yang Wei Signed-off-by: David S. Miller --- net/core/neighbour.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/core/neighbour.c b/net/core/neighbour.c index 96fdc9134726..4230400b9a30 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -1007,7 +1007,7 @@ static void neigh_probe(struct neighbour *neigh) if (neigh->ops->solicit) neigh->ops->solicit(neigh, skb); atomic_inc(&neigh->probes); - kfree_skb(skb); + consume_skb(skb); } /* Called when a timer expires for a neighbour entry. */ -- cgit v1.2.3 From 6c57f0458022298e4da1729c67bd33ce41c14e7a Mon Sep 17 00:00:00 2001 From: Ross Lagerwall Date: Thu, 17 Jan 2019 15:34:38 +0000 Subject: net: Fix usage of pskb_trim_rcsum In certain cases, pskb_trim_rcsum() may change skb pointers. Reinitialize header pointers afterwards to avoid potential use-after-frees. Add a note in the documentation of pskb_trim_rcsum(). Found by KASAN. Signed-off-by: Ross Lagerwall Signed-off-by: David S. Miller --- drivers/net/ppp/pppoe.c | 1 + include/linux/skbuff.h | 1 + net/bridge/br_netfilter_ipv6.c | 1 + net/bridge/netfilter/nft_reject_bridge.c | 1 + net/ipv4/ip_input.c | 1 + 5 files changed, 5 insertions(+) (limited to 'net') diff --git a/drivers/net/ppp/pppoe.c b/drivers/net/ppp/pppoe.c index 62dc564b251d..f22639f0116a 100644 --- a/drivers/net/ppp/pppoe.c +++ b/drivers/net/ppp/pppoe.c @@ -445,6 +445,7 @@ static int pppoe_rcv(struct sk_buff *skb, struct net_device *dev, if (pskb_trim_rcsum(skb, len)) goto drop; + ph = pppoe_hdr(skb); pn = pppoe_pernet(dev_net(dev)); /* Note that get_item does a sock_hold(), so sk_pppox(po) diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 93f56fddd92a..95d25b010a25 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -3218,6 +3218,7 @@ int pskb_trim_rcsum_slow(struct sk_buff *skb, unsigned int len); * * This is exactly the same as pskb_trim except that it ensures the * checksum of received packets are still valid after the operation. + * It can change skb pointers. */ static inline int pskb_trim_rcsum(struct sk_buff *skb, unsigned int len) diff --git a/net/bridge/br_netfilter_ipv6.c b/net/bridge/br_netfilter_ipv6.c index 94039f588f1d..564710f88f93 100644 --- a/net/bridge/br_netfilter_ipv6.c +++ b/net/bridge/br_netfilter_ipv6.c @@ -131,6 +131,7 @@ int br_validate_ipv6(struct net *net, struct sk_buff *skb) IPSTATS_MIB_INDISCARDS); goto drop; } + hdr = ipv6_hdr(skb); } if (hdr->nexthdr == NEXTHDR_HOP && br_nf_check_hbh_len(skb)) goto drop; diff --git a/net/bridge/netfilter/nft_reject_bridge.c b/net/bridge/netfilter/nft_reject_bridge.c index 08cbed7d940e..419e8edf23ba 100644 --- a/net/bridge/netfilter/nft_reject_bridge.c +++ b/net/bridge/netfilter/nft_reject_bridge.c @@ -229,6 +229,7 @@ static bool reject6_br_csum_ok(struct sk_buff *skb, int hook) pskb_trim_rcsum(skb, ntohs(ip6h->payload_len) + sizeof(*ip6h))) return false; + ip6h = ipv6_hdr(skb); thoff = ipv6_skip_exthdr(skb, ((u8*)(ip6h+1) - skb->data), &proto, &fo); if (thoff < 0 || thoff >= skb->len || (fo & htons(~0x7)) != 0) return false; diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c index 26921f6b3b92..51d8efba6de2 100644 --- a/net/ipv4/ip_input.c +++ b/net/ipv4/ip_input.c @@ -488,6 +488,7 @@ static struct sk_buff *ip_rcv_core(struct sk_buff *skb, struct net *net) goto drop; } + iph = ip_hdr(skb); skb->transport_header = skb->network_header + iph->ihl*4; /* Remove any debris in the socket control block */ -- cgit v1.2.3 From 710ae72877378e7cde611efd30fe90502a6e5b30 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Fri, 18 Jan 2019 15:58:00 +0000 Subject: net: bridge: Mark FDB entries that were added by user as such Externally learned entries can be added by a user or by a switch driver that is notifying the bridge driver about entries that were learned in hardware. In the first case, the entries are not marked with the 'added_by_user' flag, which causes switch drivers to ignore them and not offload them. The 'added_by_user' flag can be set on externally learned FDB entries based on the 'swdev_notify' parameter in br_fdb_external_learn_add(), which effectively means if the created / updated FDB entry was added by a user or not. Fixes: 816a3bed9549 ("switchdev: Add fdb.added_by_user to switchdev notifications") Signed-off-by: Ido Schimmel Reported-by: Alexander Petrovskiy Reviewed-by: Petr Machata Cc: Roopa Prabhu Cc: Nikolay Aleksandrov Cc: bridge@lists.linux-foundation.org Signed-off-by: David S. Miller --- net/bridge/br_fdb.c | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'net') diff --git a/net/bridge/br_fdb.c b/net/bridge/br_fdb.c index fe3c758791ca..9e14767500ea 100644 --- a/net/bridge/br_fdb.c +++ b/net/bridge/br_fdb.c @@ -1128,6 +1128,8 @@ int br_fdb_external_learn_add(struct net_bridge *br, struct net_bridge_port *p, err = -ENOMEM; goto err_unlock; } + if (swdev_notify) + fdb->added_by_user = 1; fdb->added_by_external_learn = 1; fdb_notify(br, fdb, RTM_NEWNEIGH, swdev_notify); } else { @@ -1147,6 +1149,9 @@ int br_fdb_external_learn_add(struct net_bridge *br, struct net_bridge_port *p, modified = true; } + if (swdev_notify) + fdb->added_by_user = 1; + if (modified) fdb_notify(br, fdb, RTM_NEWNEIGH, swdev_notify); } -- cgit v1.2.3 From e7c87bd6cc4ec7b0ac1ed0a88a58f8206c577488 Mon Sep 17 00:00:00 2001 From: Willem de Bruijn Date: Tue, 15 Jan 2019 20:19:22 -0500 Subject: bpf: in __bpf_redirect_no_mac pull mac only if present Syzkaller was able to construct a packet of negative length by redirecting from bpf_prog_test_run_skb with BPF_PROG_TYPE_LWT_XMIT: BUG: KASAN: slab-out-of-bounds in memcpy include/linux/string.h:345 [inline] BUG: KASAN: slab-out-of-bounds in skb_copy_from_linear_data include/linux/skbuff.h:3421 [inline] BUG: KASAN: slab-out-of-bounds in __pskb_copy_fclone+0x2dd/0xeb0 net/core/skbuff.c:1395 Read of size 4294967282 at addr ffff8801d798009c by task syz-executor2/12942 kasan_report.cold.9+0x242/0x309 mm/kasan/report.c:412 check_memory_region_inline mm/kasan/kasan.c:260 [inline] check_memory_region+0x13e/0x1b0 mm/kasan/kasan.c:267 memcpy+0x23/0x50 mm/kasan/kasan.c:302 memcpy include/linux/string.h:345 [inline] skb_copy_from_linear_data include/linux/skbuff.h:3421 [inline] __pskb_copy_fclone+0x2dd/0xeb0 net/core/skbuff.c:1395 __pskb_copy include/linux/skbuff.h:1053 [inline] pskb_copy include/linux/skbuff.h:2904 [inline] skb_realloc_headroom+0xe7/0x120 net/core/skbuff.c:1539 ipip6_tunnel_xmit net/ipv6/sit.c:965 [inline] sit_tunnel_xmit+0xe1b/0x30d0 net/ipv6/sit.c:1029 __netdev_start_xmit include/linux/netdevice.h:4325 [inline] netdev_start_xmit include/linux/netdevice.h:4334 [inline] xmit_one net/core/dev.c:3219 [inline] dev_hard_start_xmit+0x295/0xc90 net/core/dev.c:3235 __dev_queue_xmit+0x2f0d/0x3950 net/core/dev.c:3805 dev_queue_xmit+0x17/0x20 net/core/dev.c:3838 __bpf_tx_skb net/core/filter.c:2016 [inline] __bpf_redirect_common net/core/filter.c:2054 [inline] __bpf_redirect+0x5cf/0xb20 net/core/filter.c:2061 ____bpf_clone_redirect net/core/filter.c:2094 [inline] bpf_clone_redirect+0x2f6/0x490 net/core/filter.c:2066 bpf_prog_41f2bcae09cd4ac3+0xb25/0x1000 The generated test constructs a packet with mac header, network header, skb->data pointing to network header and skb->len 0. Redirecting to a sit0 through __bpf_redirect_no_mac pulls the mac length, even though skb->data already is at skb->network_header. bpf_prog_test_run_skb has already pulled it as LWT_XMIT !is_l2. Update the offset calculation to pull only if skb->data differs from skb->network_header, which is not true in this case. The test itself can be run only from commit 1cf1cae963c2 ("bpf: introduce BPF_PROG_TEST_RUN command"), but the same type of packets with skb at network header could already be built from lwt xmit hooks, so this fix is more relevant to that commit. Also set the mac header on redirect from LWT_XMIT, as even after this change to __bpf_redirect_no_mac that field is expected to be set, but is not yet in ip_finish_output2. Fixes: 3a0af8fd61f9 ("bpf: BPF for lightweight tunnel infrastructure") Reported-by: syzbot Signed-off-by: Willem de Bruijn Acked-by: Martin KaFai Lau Signed-off-by: Daniel Borkmann --- net/core/filter.c | 21 +++++++++++---------- net/core/lwt_bpf.c | 1 + 2 files changed, 12 insertions(+), 10 deletions(-) (limited to 'net') diff --git a/net/core/filter.c b/net/core/filter.c index dab10d21cae8..7559d6835ecb 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -2020,18 +2020,19 @@ static inline int __bpf_tx_skb(struct net_device *dev, struct sk_buff *skb) static int __bpf_redirect_no_mac(struct sk_buff *skb, struct net_device *dev, u32 flags) { - /* skb->mac_len is not set on normal egress */ - unsigned int mlen = skb->network_header - skb->mac_header; + unsigned int mlen = skb_network_offset(skb); - __skb_pull(skb, mlen); + if (mlen) { + __skb_pull(skb, mlen); - /* At ingress, the mac header has already been pulled once. - * At egress, skb_pospull_rcsum has to be done in case that - * the skb is originated from ingress (i.e. a forwarded skb) - * to ensure that rcsum starts at net header. - */ - if (!skb_at_tc_ingress(skb)) - skb_postpull_rcsum(skb, skb_mac_header(skb), mlen); + /* At ingress, the mac header has already been pulled once. + * At egress, skb_pospull_rcsum has to be done in case that + * the skb is originated from ingress (i.e. a forwarded skb) + * to ensure that rcsum starts at net header. + */ + if (!skb_at_tc_ingress(skb)) + skb_postpull_rcsum(skb, skb_mac_header(skb), mlen); + } skb_pop_mac_header(skb); skb_reset_mac_len(skb); return flags & BPF_F_INGRESS ? diff --git a/net/core/lwt_bpf.c b/net/core/lwt_bpf.c index 3e85437f7106..a648568c5e8f 100644 --- a/net/core/lwt_bpf.c +++ b/net/core/lwt_bpf.c @@ -63,6 +63,7 @@ static int run_lwt_bpf(struct sk_buff *skb, struct bpf_lwt_prog *lwt, lwt->name ? : ""); ret = BPF_OK; } else { + skb_reset_mac_header(skb); ret = skb_do_redirect(skb); if (ret == 0) ret = BPF_REDIRECT; -- cgit v1.2.3