summaryrefslogtreecommitdiff
path: root/net
diff options
context:
space:
mode:
Diffstat (limited to 'net')
-rw-r--r--net/8021q/vlan.c100
-rw-r--r--net/8021q/vlan_core.c128
-rw-r--r--net/batman-adv/Kconfig10
-rw-r--r--net/batman-adv/bat_iv_ogm.c25
-rw-r--r--net/batman-adv/bat_v.c26
-rw-r--r--net/batman-adv/bat_v_elp.c6
-rw-r--r--net/batman-adv/bridge_loop_avoidance.c82
-rw-r--r--net/batman-adv/debugfs.c2
-rw-r--r--net/batman-adv/distributed-arp-table.c42
-rw-r--r--net/batman-adv/fragmentation.c2
-rw-r--r--net/batman-adv/gateway_client.c3
-rw-r--r--net/batman-adv/hard-interface.c3
-rw-r--r--net/batman-adv/hash.c2
-rw-r--r--net/batman-adv/hash.h6
-rw-r--r--net/batman-adv/log.c60
-rw-r--r--net/batman-adv/main.c3
-rw-r--r--net/batman-adv/main.h3
-rw-r--r--net/batman-adv/multicast.c51
-rw-r--r--net/batman-adv/netlink.c24
-rw-r--r--net/batman-adv/trace.c2
-rw-r--r--net/batman-adv/trace.h6
-rw-r--r--net/batman-adv/translation-table.c41
-rw-r--r--net/batman-adv/types.h5
-rw-r--r--net/bluetooth/6lowpan.c2
-rw-r--r--net/bpf/test_run.c36
-rw-r--r--net/bridge/br.c89
-rw-r--r--net/bridge/br_device.c11
-rw-r--r--net/bridge/br_fdb.c46
-rw-r--r--net/bridge/br_if.c23
-rw-r--r--net/bridge/br_input.c4
-rw-r--r--net/bridge/br_mdb.c126
-rw-r--r--net/bridge/br_multicast.c439
-rw-r--r--net/bridge/br_netfilter_hooks.c15
-rw-r--r--net/bridge/br_netlink.c67
-rw-r--r--net/bridge/br_private.h88
-rw-r--r--net/bridge/br_switchdev.c5
-rw-r--r--net/bridge/br_sysfs_br.c36
-rw-r--r--net/bridge/br_sysfs_if.c3
-rw-r--r--net/bridge/br_vlan.c74
-rw-r--r--net/can/raw.c15
-rw-r--r--net/ceph/messenger.c12
-rw-r--r--net/core/datagram.c45
-rw-r--r--net/core/dev.c228
-rw-r--r--net/core/dev_addr_lists.c100
-rw-r--r--net/core/dev_ioctl.c4
-rw-r--r--net/core/devlink.c5
-rw-r--r--net/core/filter.c455
-rw-r--r--net/core/flow_dissector.c4
-rw-r--r--net/core/neighbour.c237
-rw-r--r--net/core/net-sysfs.c2
-rw-r--r--net/core/net_namespace.c159
-rw-r--r--net/core/netpoll.c2
-rw-r--r--net/core/rtnetlink.c518
-rw-r--r--net/core/skbuff.c118
-rw-r--r--net/core/sock.c6
-rw-r--r--net/core/sock_reuseport.c1
-rw-r--r--net/core/stream.c2
-rw-r--r--net/dccp/proto.c3
-rw-r--r--net/decnet/af_decnet.c2
-rw-r--r--net/dsa/Kconfig4
-rw-r--r--net/dsa/dsa.c8
-rw-r--r--net/dsa/dsa_priv.h2
-rw-r--r--net/dsa/master.c63
-rw-r--r--net/dsa/port.c3
-rw-r--r--net/dsa/slave.c86
-rw-r--r--net/dsa/tag_brcm.c2
-rw-r--r--net/dsa/tag_dsa.c1
-rw-r--r--net/dsa/tag_edsa.c1
-rw-r--r--net/dsa/tag_gswip.c1
-rw-r--r--net/dsa/tag_ksz.c117
-rw-r--r--net/dsa/tag_lan9303.c1
-rw-r--r--net/dsa/tag_mtk.c1
-rw-r--r--net/dsa/tag_qca.c1
-rw-r--r--net/dsa/tag_trailer.c1
-rw-r--r--net/ethernet/eth.c56
-rw-r--r--net/ieee802154/nl-phy.c2
-rw-r--r--net/ipv4/af_inet.c13
-rw-r--r--net/ipv4/devinet.c2
-rw-r--r--net/ipv4/fou.c7
-rw-r--r--net/ipv4/inet_fragment.c29
-rw-r--r--net/ipv4/inet_hashtables.c61
-rw-r--r--net/ipv4/ip_forward.c7
-rw-r--r--net/ipv4/ip_fragment.c7
-rw-r--r--net/ipv4/ip_gre.c6
-rw-r--r--net/ipv4/ip_input.c4
-rw-r--r--net/ipv4/ip_output.c41
-rw-r--r--net/ipv4/ip_tunnel_core.c4
-rw-r--r--net/ipv4/ipconfig.c19
-rw-r--r--net/ipv4/ipmr.c15
-rw-r--r--net/ipv4/netfilter/ipt_MASQUERADE.c7
-rw-r--r--net/ipv4/netfilter/nf_nat_masquerade_ipv4.c38
-rw-r--r--net/ipv4/netfilter/nft_masq_ipv4.c4
-rw-r--r--net/ipv4/proc.c1
-rw-r--r--net/ipv4/raw.c2
-rw-r--r--net/ipv4/route.c2
-rw-r--r--net/ipv4/tcp.c17
-rw-r--r--net/ipv4/tcp_bbr.c15
-rw-r--r--net/ipv4/tcp_bpf.c17
-rw-r--r--net/ipv4/tcp_input.c109
-rw-r--r--net/ipv4/tcp_ipv4.c110
-rw-r--r--net/ipv4/tcp_offload.c6
-rw-r--r--net/ipv4/tcp_output.c85
-rw-r--r--net/ipv4/tcp_timer.c22
-rw-r--r--net/ipv4/udp.c80
-rw-r--r--net/ipv4/udp_offload.c15
-rw-r--r--net/ipv4/udp_tunnel.c17
-rw-r--r--net/ipv6/addrconf.c21
-rw-r--r--net/ipv6/inet6_hashtables.c54
-rw-r--r--net/ipv6/ip6_gre.c6
-rw-r--r--net/ipv6/ip6_input.c4
-rw-r--r--net/ipv6/ip6_offload.c35
-rw-r--r--net/ipv6/ip6_output.c90
-rw-r--r--net/ipv6/ip6_udp_tunnel.c16
-rw-r--r--net/ipv6/ip6mr.c9
-rw-r--r--net/ipv6/netfilter.c3
-rw-r--r--net/ipv6/netfilter/ip6t_MASQUERADE.c8
-rw-r--r--net/ipv6/netfilter/nf_conntrack_reasm.c8
-rw-r--r--net/ipv6/netfilter/nf_nat_masquerade_ipv6.c49
-rw-r--r--net/ipv6/netfilter/nft_masq_ipv6.c4
-rw-r--r--net/ipv6/reassembly.c8
-rw-r--r--net/ipv6/route.c14
-rw-r--r--net/ipv6/seg6_iptunnel.c1
-rw-r--r--net/ipv6/tcp_ipv6.c3
-rw-r--r--net/ipv6/tcpv6_offload.c7
-rw-r--r--net/ipv6/udp.c115
-rw-r--r--net/ipv6/udp_offload.c7
-rw-r--r--net/iucv/af_iucv.c41
-rw-r--r--net/l2tp/l2tp_core.c9
-rw-r--r--net/l2tp/l2tp_ppp.c4
-rw-r--r--net/l3mdev/l3mdev.c18
-rw-r--r--net/mac80211/cfg.c7
-rw-r--r--net/mac80211/iface.c2
-rw-r--r--net/mac80211/mlme.c12
-rw-r--r--net/mac80211/rx.c5
-rw-r--r--net/mac80211/status.c2
-rw-r--r--net/mac80211/tx.c4
-rw-r--r--net/ncsi/internal.h24
-rw-r--r--net/ncsi/ncsi-aen.c75
-rw-r--r--net/ncsi/ncsi-manage.c550
-rw-r--r--net/ncsi/ncsi-netlink.c233
-rw-r--r--net/ncsi/ncsi-pkt.h9
-rw-r--r--net/ncsi/ncsi-rsp.c43
-rw-r--r--net/netfilter/ipvs/ip_vs_ctl.c3
-rw-r--r--net/netfilter/nf_conncount.c44
-rw-r--r--net/netfilter/nf_conntrack_proto_gre.c14
-rw-r--r--net/netfilter/nf_tables_api.c46
-rw-r--r--net/netfilter/nfnetlink_cttimeout.c15
-rw-r--r--net/netfilter/nfnetlink_queue.c5
-rw-r--r--net/netfilter/nft_compat.c3
-rw-r--r--net/netfilter/nft_flow_offload.c5
-rw-r--r--net/netfilter/xt_RATEEST.c10
-rw-r--r--net/netfilter/xt_hashlimit.c9
-rw-r--r--net/openvswitch/conntrack.c2
-rw-r--r--net/openvswitch/flow.c2
-rw-r--r--net/openvswitch/vport-geneve.c2
-rw-r--r--net/openvswitch/vport-gre.c2
-rw-r--r--net/openvswitch/vport-vxlan.c2
-rw-r--r--net/packet/af_packet.c4
-rw-r--r--net/rxrpc/af_rxrpc.c27
-rw-r--r--net/sched/act_api.c221
-rw-r--r--net/sched/act_mirred.c3
-rw-r--r--net/sched/act_pedit.c3
-rw-r--r--net/sched/act_police.c60
-rw-r--r--net/sched/act_tunnel_key.c25
-rw-r--r--net/sched/act_vlan.c2
-rw-r--r--net/sched/cls_api.c337
-rw-r--r--net/sched/cls_bpf.c4
-rw-r--r--net/sched/cls_flower.c205
-rw-r--r--net/sched/cls_matchall.c5
-rw-r--r--net/sched/cls_u32.c10
-rw-r--r--net/sched/sch_api.c21
-rw-r--r--net/sched/sch_etf.c79
-rw-r--r--net/sched/sch_fq.c59
-rw-r--r--net/sched/sch_gred.c375
-rw-r--r--net/sched/sch_mq.c9
-rw-r--r--net/sched/sch_netem.c101
-rw-r--r--net/sched/sch_red.c19
-rw-r--r--net/sctp/associola.c11
-rw-r--r--net/sctp/bind_addr.c28
-rw-r--r--net/sctp/chunk.c14
-rw-r--r--net/sctp/input.c129
-rw-r--r--net/sctp/output.c25
-rw-r--r--net/sctp/primitive.c2
-rw-r--r--net/sctp/sm_make_chunk.c3
-rw-r--r--net/sctp/sm_sideeffect.c12
-rw-r--r--net/sctp/sm_statetable.c2
-rw-r--r--net/sctp/socket.c203
-rw-r--r--net/sctp/stream.c1
-rw-r--r--net/sctp/stream_interleave.c46
-rw-r--r--net/sctp/ulpqueue.c8
-rw-r--r--net/smc/af_smc.c62
-rw-r--r--net/smc/smc_cdc.c26
-rw-r--r--net/smc/smc_cdc.h60
-rw-r--r--net/smc/smc_clc.c33
-rw-r--r--net/smc/smc_clc.h3
-rw-r--r--net/smc/smc_core.c36
-rw-r--r--net/smc/smc_core.h11
-rw-r--r--net/smc/smc_ism.c43
-rw-r--r--net/smc/smc_ism.h1
-rw-r--r--net/smc/smc_llc.c57
-rw-r--r--net/smc/smc_llc.h2
-rw-r--r--net/smc/smc_wr.c4
-rw-r--r--net/socket.c2
-rw-r--r--net/sunrpc/auth_generic.c8
-rw-r--r--net/sunrpc/auth_gss/auth_gss.c65
-rw-r--r--net/sunrpc/clnt.c8
-rw-r--r--net/sunrpc/socklib.c2
-rw-r--r--net/sunrpc/xdr.c7
-rw-r--r--net/sunrpc/xprt.c13
-rw-r--r--net/sunrpc/xprtsock.c81
-rw-r--r--net/switchdev/switchdev.c213
-rw-r--r--net/tipc/discover.c19
-rw-r--r--net/tipc/link.c70
-rw-r--r--net/tipc/msg.h1
-rw-r--r--net/tipc/net.c45
-rw-r--r--net/tipc/net.h2
-rw-r--r--net/tipc/node.c13
-rw-r--r--net/tipc/socket.c15
-rw-r--r--net/tls/tls_sw.c11
-rw-r--r--net/wireless/mlme.c4
-rw-r--r--net/wireless/nl80211.c1
-rw-r--r--net/wireless/sme.c8
-rw-r--r--net/wireless/util.c2
-rw-r--r--net/x25/af_x25.c18
-rw-r--r--net/x25/x25_in.c9
225 files changed, 6250 insertions, 3148 deletions
diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c
index 1b7a375c6616..dc4411165e43 100644
--- a/net/8021q/vlan.c
+++ b/net/8021q/vlan.c
@@ -358,6 +358,7 @@ static int __vlan_device_event(struct net_device *dev, unsigned long event)
static int vlan_device_event(struct notifier_block *unused, unsigned long event,
void *ptr)
{
+ struct netlink_ext_ack *extack = netdev_notifier_info_to_extack(ptr);
struct net_device *dev = netdev_notifier_info_to_dev(ptr);
struct vlan_group *grp;
struct vlan_info *vlan_info;
@@ -460,7 +461,8 @@ static int vlan_device_event(struct notifier_block *unused, unsigned long event,
vlan = vlan_dev_priv(vlandev);
if (!(vlan->flags & VLAN_FLAG_LOOSE_BINDING))
- dev_change_flags(vlandev, flgs | IFF_UP);
+ dev_change_flags(vlandev, flgs | IFF_UP,
+ extack);
netif_stacked_transfer_operstate(dev, vlandev);
}
break;
@@ -648,93 +650,6 @@ out:
return err;
}
-static struct sk_buff *vlan_gro_receive(struct list_head *head,
- struct sk_buff *skb)
-{
- const struct packet_offload *ptype;
- unsigned int hlen, off_vlan;
- struct sk_buff *pp = NULL;
- struct vlan_hdr *vhdr;
- struct sk_buff *p;
- __be16 type;
- int flush = 1;
-
- off_vlan = skb_gro_offset(skb);
- hlen = off_vlan + sizeof(*vhdr);
- vhdr = skb_gro_header_fast(skb, off_vlan);
- if (skb_gro_header_hard(skb, hlen)) {
- vhdr = skb_gro_header_slow(skb, hlen, off_vlan);
- if (unlikely(!vhdr))
- goto out;
- }
-
- type = vhdr->h_vlan_encapsulated_proto;
-
- rcu_read_lock();
- ptype = gro_find_receive_by_type(type);
- if (!ptype)
- goto out_unlock;
-
- flush = 0;
-
- list_for_each_entry(p, head, list) {
- struct vlan_hdr *vhdr2;
-
- if (!NAPI_GRO_CB(p)->same_flow)
- continue;
-
- vhdr2 = (struct vlan_hdr *)(p->data + off_vlan);
- if (compare_vlan_header(vhdr, vhdr2))
- NAPI_GRO_CB(p)->same_flow = 0;
- }
-
- skb_gro_pull(skb, sizeof(*vhdr));
- skb_gro_postpull_rcsum(skb, vhdr, sizeof(*vhdr));
- pp = call_gro_receive(ptype->callbacks.gro_receive, head, skb);
-
-out_unlock:
- rcu_read_unlock();
-out:
- skb_gro_flush_final(skb, pp, flush);
-
- return pp;
-}
-
-static int vlan_gro_complete(struct sk_buff *skb, int nhoff)
-{
- struct vlan_hdr *vhdr = (struct vlan_hdr *)(skb->data + nhoff);
- __be16 type = vhdr->h_vlan_encapsulated_proto;
- struct packet_offload *ptype;
- int err = -ENOENT;
-
- rcu_read_lock();
- ptype = gro_find_complete_by_type(type);
- if (ptype)
- err = ptype->callbacks.gro_complete(skb, nhoff + sizeof(*vhdr));
-
- rcu_read_unlock();
- return err;
-}
-
-static struct packet_offload vlan_packet_offloads[] __read_mostly = {
- {
- .type = cpu_to_be16(ETH_P_8021Q),
- .priority = 10,
- .callbacks = {
- .gro_receive = vlan_gro_receive,
- .gro_complete = vlan_gro_complete,
- },
- },
- {
- .type = cpu_to_be16(ETH_P_8021AD),
- .priority = 10,
- .callbacks = {
- .gro_receive = vlan_gro_receive,
- .gro_complete = vlan_gro_complete,
- },
- },
-};
-
static int __net_init vlan_init_net(struct net *net)
{
struct vlan_net *vn = net_generic(net, vlan_net_id);
@@ -762,7 +677,6 @@ static struct pernet_operations vlan_net_ops = {
static int __init vlan_proto_init(void)
{
int err;
- unsigned int i;
pr_info("%s v%s\n", vlan_fullname, vlan_version);
@@ -786,9 +700,6 @@ static int __init vlan_proto_init(void)
if (err < 0)
goto err5;
- for (i = 0; i < ARRAY_SIZE(vlan_packet_offloads); i++)
- dev_add_offload(&vlan_packet_offloads[i]);
-
vlan_ioctl_set(vlan_ioctl_handler);
return 0;
@@ -806,13 +717,8 @@ err0:
static void __exit vlan_cleanup_module(void)
{
- unsigned int i;
-
vlan_ioctl_set(NULL);
- for (i = 0; i < ARRAY_SIZE(vlan_packet_offloads); i++)
- dev_remove_offload(&vlan_packet_offloads[i]);
-
vlan_netlink_fini();
unregister_netdevice_notifier(&vlan_notifier_block);
diff --git a/net/8021q/vlan_core.c b/net/8021q/vlan_core.c
index 4f60e86f4b8d..a313165e7a67 100644
--- a/net/8021q/vlan_core.c
+++ b/net/8021q/vlan_core.c
@@ -57,7 +57,7 @@ bool vlan_do_receive(struct sk_buff **skbp)
}
skb->priority = vlan_get_ingress_priority(vlan_dev, skb->vlan_tci);
- skb->vlan_tci = 0;
+ __vlan_hwaccel_clear_tag(skb);
rx_stats = this_cpu_ptr(vlan_dev_priv(vlan_dev)->vlan_pcpu_stats);
@@ -223,6 +223,33 @@ static int vlan_kill_rx_filter_info(struct net_device *dev, __be16 proto, u16 vi
return -ENODEV;
}
+int vlan_for_each(struct net_device *dev,
+ int (*action)(struct net_device *dev, int vid, void *arg),
+ void *arg)
+{
+ struct vlan_vid_info *vid_info;
+ struct vlan_info *vlan_info;
+ struct net_device *vdev;
+ int ret;
+
+ ASSERT_RTNL();
+
+ vlan_info = rtnl_dereference(dev->vlan_info);
+ if (!vlan_info)
+ return 0;
+
+ list_for_each_entry(vid_info, &vlan_info->vid_list, list) {
+ vdev = vlan_group_get_device(&vlan_info->grp, vid_info->proto,
+ vid_info->vid);
+ ret = action(vdev, vid_info->vid, arg);
+ if (ret)
+ return ret;
+ }
+
+ return 0;
+}
+EXPORT_SYMBOL(vlan_for_each);
+
int vlan_filter_push_vids(struct vlan_info *vlan_info, __be16 proto)
{
struct net_device *real_dev = vlan_info->real_dev;
@@ -426,3 +453,102 @@ bool vlan_uses_dev(const struct net_device *dev)
return vlan_info->grp.nr_vlan_devs ? true : false;
}
EXPORT_SYMBOL(vlan_uses_dev);
+
+static struct sk_buff *vlan_gro_receive(struct list_head *head,
+ struct sk_buff *skb)
+{
+ const struct packet_offload *ptype;
+ unsigned int hlen, off_vlan;
+ struct sk_buff *pp = NULL;
+ struct vlan_hdr *vhdr;
+ struct sk_buff *p;
+ __be16 type;
+ int flush = 1;
+
+ off_vlan = skb_gro_offset(skb);
+ hlen = off_vlan + sizeof(*vhdr);
+ vhdr = skb_gro_header_fast(skb, off_vlan);
+ if (skb_gro_header_hard(skb, hlen)) {
+ vhdr = skb_gro_header_slow(skb, hlen, off_vlan);
+ if (unlikely(!vhdr))
+ goto out;
+ }
+
+ type = vhdr->h_vlan_encapsulated_proto;
+
+ rcu_read_lock();
+ ptype = gro_find_receive_by_type(type);
+ if (!ptype)
+ goto out_unlock;
+
+ flush = 0;
+
+ list_for_each_entry(p, head, list) {
+ struct vlan_hdr *vhdr2;
+
+ if (!NAPI_GRO_CB(p)->same_flow)
+ continue;
+
+ vhdr2 = (struct vlan_hdr *)(p->data + off_vlan);
+ if (compare_vlan_header(vhdr, vhdr2))
+ NAPI_GRO_CB(p)->same_flow = 0;
+ }
+
+ skb_gro_pull(skb, sizeof(*vhdr));
+ skb_gro_postpull_rcsum(skb, vhdr, sizeof(*vhdr));
+ pp = call_gro_receive(ptype->callbacks.gro_receive, head, skb);
+
+out_unlock:
+ rcu_read_unlock();
+out:
+ skb_gro_flush_final(skb, pp, flush);
+
+ return pp;
+}
+
+static int vlan_gro_complete(struct sk_buff *skb, int nhoff)
+{
+ struct vlan_hdr *vhdr = (struct vlan_hdr *)(skb->data + nhoff);
+ __be16 type = vhdr->h_vlan_encapsulated_proto;
+ struct packet_offload *ptype;
+ int err = -ENOENT;
+
+ rcu_read_lock();
+ ptype = gro_find_complete_by_type(type);
+ if (ptype)
+ err = ptype->callbacks.gro_complete(skb, nhoff + sizeof(*vhdr));
+
+ rcu_read_unlock();
+ return err;
+}
+
+static struct packet_offload vlan_packet_offloads[] __read_mostly = {
+ {
+ .type = cpu_to_be16(ETH_P_8021Q),
+ .priority = 10,
+ .callbacks = {
+ .gro_receive = vlan_gro_receive,
+ .gro_complete = vlan_gro_complete,
+ },
+ },
+ {
+ .type = cpu_to_be16(ETH_P_8021AD),
+ .priority = 10,
+ .callbacks = {
+ .gro_receive = vlan_gro_receive,
+ .gro_complete = vlan_gro_complete,
+ },
+ },
+};
+
+static int __init vlan_offload_init(void)
+{
+ unsigned int i;
+
+ for (i = 0; i < ARRAY_SIZE(vlan_packet_offloads); i++)
+ dev_add_offload(&vlan_packet_offloads[i]);
+
+ return 0;
+}
+
+fs_initcall(vlan_offload_init);
diff --git a/net/batman-adv/Kconfig b/net/batman-adv/Kconfig
index f75816f58107..c386e6981416 100644
--- a/net/batman-adv/Kconfig
+++ b/net/batman-adv/Kconfig
@@ -22,7 +22,6 @@
config BATMAN_ADV
tristate "B.A.T.M.A.N. Advanced Meshing Protocol"
depends on NET
- select CRC16
select LIBCRC32C
help
B.A.T.M.A.N. (better approach to mobile ad-hoc networking) is
@@ -48,6 +47,7 @@ config BATMAN_ADV_BATMAN_V
config BATMAN_ADV_BLA
bool "Bridge Loop Avoidance"
depends on BATMAN_ADV && INET
+ select CRC16
default y
help
This option enables BLA (Bridge Loop Avoidance), a mechanism
@@ -82,6 +82,7 @@ config BATMAN_ADV_NC
config BATMAN_ADV_MCAST
bool "Multicast optimisation"
depends on BATMAN_ADV && INET && !(BRIDGE=m && BATMAN_ADV=y)
+ default y
help
This option enables the multicast optimisation which aims to
reduce the air overhead while improving the reliability of
@@ -100,12 +101,13 @@ config BATMAN_ADV_DEBUGFS
config BATMAN_ADV_DEBUG
bool "B.A.T.M.A.N. debugging"
- depends on BATMAN_ADV_DEBUGFS
+ depends on BATMAN_ADV
help
This is an option for use by developers; most people should
say N here. This enables compilation of support for
- outputting debugging information to the kernel log. The
- output is controlled via the module parameter debug.
+ outputting debugging information to the debugfs log or tracing
+ buffer. The output is controlled via the batadv netdev specific
+ log_level setting.
config BATMAN_ADV_TRACING
bool "B.A.T.M.A.N. tracing support"
diff --git a/net/batman-adv/bat_iv_ogm.c b/net/batman-adv/bat_iv_ogm.c
index d2227091029f..f97e566f0402 100644
--- a/net/batman-adv/bat_iv_ogm.c
+++ b/net/batman-adv/bat_iv_ogm.c
@@ -34,7 +34,6 @@
#include <linux/kernel.h>
#include <linux/kref.h>
#include <linux/list.h>
-#include <linux/lockdep.h>
#include <linux/netdevice.h>
#include <linux/netlink.h>
#include <linux/pkt_sched.h>
@@ -2585,13 +2584,14 @@ static void batadv_iv_gw_print(struct batadv_priv *bat_priv,
* batadv_iv_gw_dump_entry() - Dump a gateway into a message
* @msg: Netlink message to dump into
* @portid: Port making netlink request
- * @seq: Sequence number of netlink message
+ * @cb: Control block containing additional options
* @bat_priv: The bat priv with all the soft interface information
* @gw_node: Gateway to be dumped
*
* Return: Error code, or 0 on success
*/
-static int batadv_iv_gw_dump_entry(struct sk_buff *msg, u32 portid, u32 seq,
+static int batadv_iv_gw_dump_entry(struct sk_buff *msg, u32 portid,
+ struct netlink_callback *cb,
struct batadv_priv *bat_priv,
struct batadv_gw_node *gw_node)
{
@@ -2611,13 +2611,16 @@ static int batadv_iv_gw_dump_entry(struct sk_buff *msg, u32 portid, u32 seq,
curr_gw = batadv_gw_get_selected_gw_node(bat_priv);
- hdr = genlmsg_put(msg, portid, seq, &batadv_netlink_family,
- NLM_F_MULTI, BATADV_CMD_GET_GATEWAYS);
+ hdr = genlmsg_put(msg, portid, cb->nlh->nlmsg_seq,
+ &batadv_netlink_family, NLM_F_MULTI,
+ BATADV_CMD_GET_GATEWAYS);
if (!hdr) {
ret = -ENOBUFS;
goto out;
}
+ genl_dump_check_consistent(cb, hdr);
+
ret = -EMSGSIZE;
if (curr_gw == gw_node)
@@ -2668,13 +2671,15 @@ static void batadv_iv_gw_dump(struct sk_buff *msg, struct netlink_callback *cb,
int idx_skip = cb->args[0];
int idx = 0;
- rcu_read_lock();
- hlist_for_each_entry_rcu(gw_node, &bat_priv->gw.gateway_list, list) {
+ spin_lock_bh(&bat_priv->gw.list_lock);
+ cb->seq = bat_priv->gw.generation << 1 | 1;
+
+ hlist_for_each_entry(gw_node, &bat_priv->gw.gateway_list, list) {
if (idx++ < idx_skip)
continue;
- if (batadv_iv_gw_dump_entry(msg, portid, cb->nlh->nlmsg_seq,
- bat_priv, gw_node)) {
+ if (batadv_iv_gw_dump_entry(msg, portid, cb, bat_priv,
+ gw_node)) {
idx_skip = idx - 1;
goto unlock;
}
@@ -2682,7 +2687,7 @@ static void batadv_iv_gw_dump(struct sk_buff *msg, struct netlink_callback *cb,
idx_skip = idx;
unlock:
- rcu_read_unlock();
+ spin_unlock_bh(&bat_priv->gw.list_lock);
cb->args[0] = idx_skip;
}
diff --git a/net/batman-adv/bat_v.c b/net/batman-adv/bat_v.c
index 6baec4e68898..90e33f84d37a 100644
--- a/net/batman-adv/bat_v.c
+++ b/net/batman-adv/bat_v.c
@@ -27,11 +27,13 @@
#include <linux/jiffies.h>
#include <linux/kernel.h>
#include <linux/kref.h>
+#include <linux/list.h>
#include <linux/netdevice.h>
#include <linux/netlink.h>
#include <linux/rculist.h>
#include <linux/rcupdate.h>
#include <linux/seq_file.h>
+#include <linux/spinlock.h>
#include <linux/stddef.h>
#include <linux/types.h>
#include <linux/workqueue.h>
@@ -915,13 +917,14 @@ static void batadv_v_gw_print(struct batadv_priv *bat_priv,
* batadv_v_gw_dump_entry() - Dump a gateway into a message
* @msg: Netlink message to dump into
* @portid: Port making netlink request
- * @seq: Sequence number of netlink message
+ * @cb: Control block containing additional options
* @bat_priv: The bat priv with all the soft interface information
* @gw_node: Gateway to be dumped
*
* Return: Error code, or 0 on success
*/
-static int batadv_v_gw_dump_entry(struct sk_buff *msg, u32 portid, u32 seq,
+static int batadv_v_gw_dump_entry(struct sk_buff *msg, u32 portid,
+ struct netlink_callback *cb,
struct batadv_priv *bat_priv,
struct batadv_gw_node *gw_node)
{
@@ -941,13 +944,16 @@ static int batadv_v_gw_dump_entry(struct sk_buff *msg, u32 portid, u32 seq,
curr_gw = batadv_gw_get_selected_gw_node(bat_priv);
- hdr = genlmsg_put(msg, portid, seq, &batadv_netlink_family,
- NLM_F_MULTI, BATADV_CMD_GET_GATEWAYS);
+ hdr = genlmsg_put(msg, portid, cb->nlh->nlmsg_seq,
+ &batadv_netlink_family, NLM_F_MULTI,
+ BATADV_CMD_GET_GATEWAYS);
if (!hdr) {
ret = -ENOBUFS;
goto out;
}
+ genl_dump_check_consistent(cb, hdr);
+
ret = -EMSGSIZE;
if (curr_gw == gw_node) {
@@ -1018,13 +1024,15 @@ static void batadv_v_gw_dump(struct sk_buff *msg, struct netlink_callback *cb,
int idx_skip = cb->args[0];
int idx = 0;
- rcu_read_lock();
- hlist_for_each_entry_rcu(gw_node, &bat_priv->gw.gateway_list, list) {
+ spin_lock_bh(&bat_priv->gw.list_lock);
+ cb->seq = bat_priv->gw.generation << 1 | 1;
+
+ hlist_for_each_entry(gw_node, &bat_priv->gw.gateway_list, list) {
if (idx++ < idx_skip)
continue;
- if (batadv_v_gw_dump_entry(msg, portid, cb->nlh->nlmsg_seq,
- bat_priv, gw_node)) {
+ if (batadv_v_gw_dump_entry(msg, portid, cb, bat_priv,
+ gw_node)) {
idx_skip = idx - 1;
goto unlock;
}
@@ -1032,7 +1040,7 @@ static void batadv_v_gw_dump(struct sk_buff *msg, struct netlink_callback *cb,
idx_skip = idx;
unlock:
- rcu_read_unlock();
+ spin_unlock_bh(&bat_priv->gw.list_lock);
cb->args[0] = idx_skip;
}
diff --git a/net/batman-adv/bat_v_elp.c b/net/batman-adv/bat_v_elp.c
index 9f481cfdf77d..e8090f099eb8 100644
--- a/net/batman-adv/bat_v_elp.c
+++ b/net/batman-adv/bat_v_elp.c
@@ -352,19 +352,21 @@ out:
*/
int batadv_v_elp_iface_enable(struct batadv_hard_iface *hard_iface)
{
+ static const size_t tvlv_padding = sizeof(__be32);
struct batadv_elp_packet *elp_packet;
unsigned char *elp_buff;
u32 random_seqno;
size_t size;
int res = -ENOMEM;
- size = ETH_HLEN + NET_IP_ALIGN + BATADV_ELP_HLEN;
+ size = ETH_HLEN + NET_IP_ALIGN + BATADV_ELP_HLEN + tvlv_padding;
hard_iface->bat_v.elp_skb = dev_alloc_skb(size);
if (!hard_iface->bat_v.elp_skb)
goto out;
skb_reserve(hard_iface->bat_v.elp_skb, ETH_HLEN + NET_IP_ALIGN);
- elp_buff = skb_put_zero(hard_iface->bat_v.elp_skb, BATADV_ELP_HLEN);
+ elp_buff = skb_put_zero(hard_iface->bat_v.elp_skb,
+ BATADV_ELP_HLEN + tvlv_padding);
elp_packet = (struct batadv_elp_packet *)elp_buff;
elp_packet->packet_type = BATADV_ELP;
diff --git a/net/batman-adv/bridge_loop_avoidance.c b/net/batman-adv/bridge_loop_avoidance.c
index 5f1aeeded0e3..5fdde2947802 100644
--- a/net/batman-adv/bridge_loop_avoidance.c
+++ b/net/batman-adv/bridge_loop_avoidance.c
@@ -2094,14 +2094,15 @@ out:
* to a netlink socket
* @msg: buffer for the message
* @portid: netlink port
- * @seq: Sequence number of netlink message
+ * @cb: Control block containing additional options
* @primary_if: primary interface
* @claim: entry to dump
*
* Return: 0 or error code.
*/
static int
-batadv_bla_claim_dump_entry(struct sk_buff *msg, u32 portid, u32 seq,
+batadv_bla_claim_dump_entry(struct sk_buff *msg, u32 portid,
+ struct netlink_callback *cb,
struct batadv_hard_iface *primary_if,
struct batadv_bla_claim *claim)
{
@@ -2111,13 +2112,16 @@ batadv_bla_claim_dump_entry(struct sk_buff *msg, u32 portid, u32 seq,
void *hdr;
int ret = -EINVAL;
- hdr = genlmsg_put(msg, portid, seq, &batadv_netlink_family,
- NLM_F_MULTI, BATADV_CMD_GET_BLA_CLAIM);
+ hdr = genlmsg_put(msg, portid, cb->nlh->nlmsg_seq,
+ &batadv_netlink_family, NLM_F_MULTI,
+ BATADV_CMD_GET_BLA_CLAIM);
if (!hdr) {
ret = -ENOBUFS;
goto out;
}
+ genl_dump_check_consistent(cb, hdr);
+
is_own = batadv_compare_eth(claim->backbone_gw->orig,
primary_addr);
@@ -2153,28 +2157,33 @@ out:
* to a netlink socket
* @msg: buffer for the message
* @portid: netlink port
- * @seq: Sequence number of netlink message
+ * @cb: Control block containing additional options
* @primary_if: primary interface
- * @head: bucket to dump
+ * @hash: hash to dump
+ * @bucket: bucket index to dump
* @idx_skip: How many entries to skip
*
* Return: always 0.
*/
static int
-batadv_bla_claim_dump_bucket(struct sk_buff *msg, u32 portid, u32 seq,
+batadv_bla_claim_dump_bucket(struct sk_buff *msg, u32 portid,
+ struct netlink_callback *cb,
struct batadv_hard_iface *primary_if,
- struct hlist_head *head, int *idx_skip)
+ struct batadv_hashtable *hash, unsigned int bucket,
+ int *idx_skip)
{
struct batadv_bla_claim *claim;
int idx = 0;
int ret = 0;
- rcu_read_lock();
- hlist_for_each_entry_rcu(claim, head, hash_entry) {
+ spin_lock_bh(&hash->list_locks[bucket]);
+ cb->seq = atomic_read(&hash->generation) << 1 | 1;
+
+ hlist_for_each_entry(claim, &hash->table[bucket], hash_entry) {
if (idx++ < *idx_skip)
continue;
- ret = batadv_bla_claim_dump_entry(msg, portid, seq,
+ ret = batadv_bla_claim_dump_entry(msg, portid, cb,
primary_if, claim);
if (ret) {
*idx_skip = idx - 1;
@@ -2184,7 +2193,7 @@ batadv_bla_claim_dump_bucket(struct sk_buff *msg, u32 portid, u32 seq,
*idx_skip = 0;
unlock:
- rcu_read_unlock();
+ spin_unlock_bh(&hash->list_locks[bucket]);
return ret;
}
@@ -2204,7 +2213,6 @@ int batadv_bla_claim_dump(struct sk_buff *msg, struct netlink_callback *cb)
struct batadv_hashtable *hash;
struct batadv_priv *bat_priv;
int bucket = cb->args[0];
- struct hlist_head *head;
int idx = cb->args[1];
int ifindex;
int ret = 0;
@@ -2230,11 +2238,8 @@ int batadv_bla_claim_dump(struct sk_buff *msg, struct netlink_callback *cb)
}
while (bucket < hash->size) {
- head = &hash->table[bucket];
-
- if (batadv_bla_claim_dump_bucket(msg, portid,
- cb->nlh->nlmsg_seq,
- primary_if, head, &idx))
+ if (batadv_bla_claim_dump_bucket(msg, portid, cb, primary_if,
+ hash, bucket, &idx))
break;
bucket++;
}
@@ -2325,14 +2330,15 @@ out:
* netlink socket
* @msg: buffer for the message
* @portid: netlink port
- * @seq: Sequence number of netlink message
+ * @cb: Control block containing additional options
* @primary_if: primary interface
* @backbone_gw: entry to dump
*
* Return: 0 or error code.
*/
static int
-batadv_bla_backbone_dump_entry(struct sk_buff *msg, u32 portid, u32 seq,
+batadv_bla_backbone_dump_entry(struct sk_buff *msg, u32 portid,
+ struct netlink_callback *cb,
struct batadv_hard_iface *primary_if,
struct batadv_bla_backbone_gw *backbone_gw)
{
@@ -2343,13 +2349,16 @@ batadv_bla_backbone_dump_entry(struct sk_buff *msg, u32 portid, u32 seq,
void *hdr;
int ret = -EINVAL;
- hdr = genlmsg_put(msg, portid, seq, &batadv_netlink_family,
- NLM_F_MULTI, BATADV_CMD_GET_BLA_BACKBONE);
+ hdr = genlmsg_put(msg, portid, cb->nlh->nlmsg_seq,
+ &batadv_netlink_family, NLM_F_MULTI,
+ BATADV_CMD_GET_BLA_BACKBONE);
if (!hdr) {
ret = -ENOBUFS;
goto out;
}
+ genl_dump_check_consistent(cb, hdr);
+
is_own = batadv_compare_eth(backbone_gw->orig, primary_addr);
spin_lock_bh(&backbone_gw->crc_lock);
@@ -2386,28 +2395,33 @@ out:
* a netlink socket
* @msg: buffer for the message
* @portid: netlink port
- * @seq: Sequence number of netlink message
+ * @cb: Control block containing additional options
* @primary_if: primary interface
- * @head: bucket to dump
+ * @hash: hash to dump
+ * @bucket: bucket index to dump
* @idx_skip: How many entries to skip
*
* Return: always 0.
*/
static int
-batadv_bla_backbone_dump_bucket(struct sk_buff *msg, u32 portid, u32 seq,
+batadv_bla_backbone_dump_bucket(struct sk_buff *msg, u32 portid,
+ struct netlink_callback *cb,
struct batadv_hard_iface *primary_if,
- struct hlist_head *head, int *idx_skip)
+ struct batadv_hashtable *hash,
+ unsigned int bucket, int *idx_skip)
{
struct batadv_bla_backbone_gw *backbone_gw;
int idx = 0;
int ret = 0;
- rcu_read_lock();
- hlist_for_each_entry_rcu(backbone_gw, head, hash_entry) {
+ spin_lock_bh(&hash->list_locks[bucket]);
+ cb->seq = atomic_read(&hash->generation) << 1 | 1;
+
+ hlist_for_each_entry(backbone_gw, &hash->table[bucket], hash_entry) {
if (idx++ < *idx_skip)
continue;
- ret = batadv_bla_backbone_dump_entry(msg, portid, seq,
+ ret = batadv_bla_backbone_dump_entry(msg, portid, cb,
primary_if, backbone_gw);
if (ret) {
*idx_skip = idx - 1;
@@ -2417,7 +2431,7 @@ batadv_bla_backbone_dump_bucket(struct sk_buff *msg, u32 portid, u32 seq,
*idx_skip = 0;
unlock:
- rcu_read_unlock();
+ spin_unlock_bh(&hash->list_locks[bucket]);
return ret;
}
@@ -2437,7 +2451,6 @@ int batadv_bla_backbone_dump(struct sk_buff *msg, struct netlink_callback *cb)
struct batadv_hashtable *hash;
struct batadv_priv *bat_priv;
int bucket = cb->args[0];
- struct hlist_head *head;
int idx = cb->args[1];
int ifindex;
int ret = 0;
@@ -2463,11 +2476,8 @@ int batadv_bla_backbone_dump(struct sk_buff *msg, struct netlink_callback *cb)
}
while (bucket < hash->size) {
- head = &hash->table[bucket];
-
- if (batadv_bla_backbone_dump_bucket(msg, portid,
- cb->nlh->nlmsg_seq,
- primary_if, head, &idx))
+ if (batadv_bla_backbone_dump_bucket(msg, portid, cb, primary_if,
+ hash, bucket, &idx))
break;
bucket++;
}
diff --git a/net/batman-adv/debugfs.c b/net/batman-adv/debugfs.c
index 8b608a2e2653..d4a7702e48d8 100644
--- a/net/batman-adv/debugfs.c
+++ b/net/batman-adv/debugfs.c
@@ -19,6 +19,7 @@
#include "debugfs.h"
#include "main.h"
+#include <asm/current.h>
#include <linux/dcache.h>
#include <linux/debugfs.h>
#include <linux/err.h>
@@ -27,6 +28,7 @@
#include <linux/fs.h>
#include <linux/netdevice.h>
#include <linux/printk.h>
+#include <linux/sched.h>
#include <linux/seq_file.h>
#include <linux/stat.h>
#include <linux/stddef.h>
diff --git a/net/batman-adv/distributed-arp-table.c b/net/batman-adv/distributed-arp-table.c
index a60bacf7120b..b9ffe1826527 100644
--- a/net/batman-adv/distributed-arp-table.c
+++ b/net/batman-adv/distributed-arp-table.c
@@ -863,23 +863,27 @@ out:
* netlink socket
* @msg: buffer for the message
* @portid: netlink port
- * @seq: Sequence number of netlink message
+ * @cb: Control block containing additional options
* @dat_entry: entry to dump
*
* Return: 0 or error code.
*/
static int
-batadv_dat_cache_dump_entry(struct sk_buff *msg, u32 portid, u32 seq,
+batadv_dat_cache_dump_entry(struct sk_buff *msg, u32 portid,
+ struct netlink_callback *cb,
struct batadv_dat_entry *dat_entry)
{
int msecs;
void *hdr;
- hdr = genlmsg_put(msg, portid, seq, &batadv_netlink_family,
- NLM_F_MULTI, BATADV_CMD_GET_DAT_CACHE);
+ hdr = genlmsg_put(msg, portid, cb->nlh->nlmsg_seq,
+ &batadv_netlink_family, NLM_F_MULTI,
+ BATADV_CMD_GET_DAT_CACHE);
if (!hdr)
return -ENOBUFS;
+ genl_dump_check_consistent(cb, hdr);
+
msecs = jiffies_to_msecs(jiffies - dat_entry->last_update);
if (nla_put_in_addr(msg, BATADV_ATTR_DAT_CACHE_IP4ADDRESS,
@@ -901,27 +905,31 @@ batadv_dat_cache_dump_entry(struct sk_buff *msg, u32 portid, u32 seq,
* a netlink socket
* @msg: buffer for the message
* @portid: netlink port
- * @seq: Sequence number of netlink message
- * @head: bucket to dump
+ * @cb: Control block containing additional options
+ * @hash: hash to dump
+ * @bucket: bucket index to dump
* @idx_skip: How many entries to skip
*
* Return: 0 or error code.
*/
static int
-batadv_dat_cache_dump_bucket(struct sk_buff *msg, u32 portid, u32 seq,
- struct hlist_head *head, int *idx_skip)
+batadv_dat_cache_dump_bucket(struct sk_buff *msg, u32 portid,
+ struct netlink_callback *cb,
+ struct batadv_hashtable *hash, unsigned int bucket,
+ int *idx_skip)
{
struct batadv_dat_entry *dat_entry;
int idx = 0;
- rcu_read_lock();
- hlist_for_each_entry_rcu(dat_entry, head, hash_entry) {
+ spin_lock_bh(&hash->list_locks[bucket]);
+ cb->seq = atomic_read(&hash->generation) << 1 | 1;
+
+ hlist_for_each_entry(dat_entry, &hash->table[bucket], hash_entry) {
if (idx < *idx_skip)
goto skip;
- if (batadv_dat_cache_dump_entry(msg, portid, seq,
- dat_entry)) {
- rcu_read_unlock();
+ if (batadv_dat_cache_dump_entry(msg, portid, cb, dat_entry)) {
+ spin_unlock_bh(&hash->list_locks[bucket]);
*idx_skip = idx;
return -EMSGSIZE;
@@ -930,7 +938,7 @@ batadv_dat_cache_dump_bucket(struct sk_buff *msg, u32 portid, u32 seq,
skip:
idx++;
}
- rcu_read_unlock();
+ spin_unlock_bh(&hash->list_locks[bucket]);
return 0;
}
@@ -951,7 +959,6 @@ int batadv_dat_cache_dump(struct sk_buff *msg, struct netlink_callback *cb)
struct batadv_hashtable *hash;
struct batadv_priv *bat_priv;
int bucket = cb->args[0];
- struct hlist_head *head;
int idx = cb->args[1];
int ifindex;
int ret = 0;
@@ -977,10 +984,7 @@ int batadv_dat_cache_dump(struct sk_buff *msg, struct netlink_callback *cb)
}
while (bucket < hash->size) {
- head = &hash->table[bucket];
-
- if (batadv_dat_cache_dump_bucket(msg, portid,
- cb->nlh->nlmsg_seq, head,
+ if (batadv_dat_cache_dump_bucket(msg, portid, cb, hash, bucket,
&idx))
break;
diff --git a/net/batman-adv/fragmentation.c b/net/batman-adv/fragmentation.c
index 0fddc17106bd..5b71a289d04f 100644
--- a/net/batman-adv/fragmentation.c
+++ b/net/batman-adv/fragmentation.c
@@ -275,7 +275,7 @@ batadv_frag_merge_packets(struct hlist_head *chain)
kfree(entry);
packet = (struct batadv_frag_packet *)skb_out->data;
- size = ntohs(packet->total_size);
+ size = ntohs(packet->total_size) + hdr_size;
/* Make room for the rest of the fragments. */
if (pskb_expand_head(skb_out, 0, size - skb_out->len, GFP_ATOMIC) < 0) {
diff --git a/net/batman-adv/gateway_client.c b/net/batman-adv/gateway_client.c
index 140c61a3f1ec..9d8e5eda2314 100644
--- a/net/batman-adv/gateway_client.c
+++ b/net/batman-adv/gateway_client.c
@@ -377,6 +377,7 @@ static void batadv_gw_node_add(struct batadv_priv *bat_priv,
kref_get(&gw_node->refcount);
hlist_add_head_rcu(&gw_node->list, &bat_priv->gw.gateway_list);
+ bat_priv->gw.generation++;
batadv_dbg(BATADV_DBG_BATMAN, bat_priv,
"Found new gateway %pM -> gw bandwidth: %u.%u/%u.%u MBit\n",
@@ -472,6 +473,7 @@ void batadv_gw_node_update(struct batadv_priv *bat_priv,
if (!hlist_unhashed(&gw_node->list)) {
hlist_del_init_rcu(&gw_node->list);
batadv_gw_node_put(gw_node);
+ bat_priv->gw.generation++;
}
spin_unlock_bh(&bat_priv->gw.list_lock);
@@ -518,6 +520,7 @@ void batadv_gw_node_free(struct batadv_priv *bat_priv)
&bat_priv->gw.gateway_list, list) {
hlist_del_init_rcu(&gw_node->list);
batadv_gw_node_put(gw_node);
+ bat_priv->gw.generation++;
}
spin_unlock_bh(&bat_priv->gw.list_lock);
}
diff --git a/net/batman-adv/hard-interface.c b/net/batman-adv/hard-interface.c
index 781c5b6e6e8e..508f4416dfc9 100644
--- a/net/batman-adv/hard-interface.c
+++ b/net/batman-adv/hard-interface.c
@@ -951,6 +951,7 @@ batadv_hardif_add_interface(struct net_device *net_dev)
batadv_check_known_mac_addr(hard_iface->net_dev);
kref_get(&hard_iface->refcount);
list_add_tail_rcu(&hard_iface->list, &batadv_hardif_list);
+ batadv_hardif_generation++;
return hard_iface;
@@ -993,6 +994,7 @@ void batadv_hardif_remove_interfaces(void)
list_for_each_entry_safe(hard_iface, hard_iface_tmp,
&batadv_hardif_list, list) {
list_del_rcu(&hard_iface->list);
+ batadv_hardif_generation++;
batadv_hardif_remove_interface(hard_iface);
}
rtnl_unlock();
@@ -1054,6 +1056,7 @@ static int batadv_hard_if_event(struct notifier_block *this,
case NETDEV_UNREGISTER:
case NETDEV_PRE_TYPE_CHANGE:
list_del_rcu(&hard_iface->list);
+ batadv_hardif_generation++;
batadv_hardif_remove_interface(hard_iface);
break;
diff --git a/net/batman-adv/hash.c b/net/batman-adv/hash.c
index 7b49e4001778..9194f4d891b1 100644
--- a/net/batman-adv/hash.c
+++ b/net/batman-adv/hash.c
@@ -32,6 +32,8 @@ static void batadv_hash_init(struct batadv_hashtable *hash)
INIT_HLIST_HEAD(&hash->table[i]);
spin_lock_init(&hash->list_locks[i]);
}
+
+ atomic_set(&hash->generation, 0);
}
/**
diff --git a/net/batman-adv/hash.h b/net/batman-adv/hash.h
index 9490a7ca2ba6..0e36fa1c7c3e 100644
--- a/net/batman-adv/hash.h
+++ b/net/batman-adv/hash.h
@@ -21,6 +21,7 @@
#include "main.h"
+#include <linux/atomic.h>
#include <linux/compiler.h>
#include <linux/list.h>
#include <linux/rculist.h>
@@ -58,6 +59,9 @@ struct batadv_hashtable {
/** @size: size of hashtable */
u32 size;
+
+ /** @generation: current (generation) sequence number */
+ atomic_t generation;
};
/* allocates and clears the hash */
@@ -112,6 +116,7 @@ static inline int batadv_hash_add(struct batadv_hashtable *hash,
/* no duplicate found in list, add new element */
hlist_add_head_rcu(data_node, head);
+ atomic_inc(&hash->generation);
ret = 0;
@@ -154,6 +159,7 @@ static inline void *batadv_hash_remove(struct batadv_hashtable *hash,
data_save = node;
hlist_del_rcu(node);
+ atomic_inc(&hash->generation);
break;
}
spin_unlock_bh(&hash->list_locks[index]);
diff --git a/net/batman-adv/log.c b/net/batman-adv/log.c
index 6beb5f067810..02e55b78132f 100644
--- a/net/batman-adv/log.c
+++ b/net/batman-adv/log.c
@@ -43,6 +43,8 @@
#include "debugfs.h"
#include "trace.h"
+#ifdef CONFIG_BATMAN_ADV_DEBUGFS
+
#define BATADV_LOG_BUFF_MASK (batadv_log_buff_len - 1)
static const int batadv_log_buff_len = BATADV_LOG_BUF_LEN;
@@ -92,33 +94,6 @@ static int batadv_fdebug_log(struct batadv_priv_debug_log *debug_log,
return 0;
}
-/**
- * batadv_debug_log() - Add debug log entry
- * @bat_priv: the bat priv with all the soft interface information
- * @fmt: format string
- *
- * Return: 0 on success or negative error number in case of failure
- */
-int batadv_debug_log(struct batadv_priv *bat_priv, const char *fmt, ...)
-{
- struct va_format vaf;
- va_list args;
-
- va_start(args, fmt);
-
- vaf.fmt = fmt;
- vaf.va = &args;
-
- batadv_fdebug_log(bat_priv->debug_log, "[%10u] %pV",
- jiffies_to_msecs(jiffies), &vaf);
-
- trace_batadv_dbg(bat_priv, &vaf);
-
- va_end(args);
-
- return 0;
-}
-
static int batadv_log_open(struct inode *inode, struct file *file)
{
if (!try_module_get(THIS_MODULE))
@@ -259,3 +234,34 @@ void batadv_debug_log_cleanup(struct batadv_priv *bat_priv)
kfree(bat_priv->debug_log);
bat_priv->debug_log = NULL;
}
+
+#endif /* CONFIG_BATMAN_ADV_DEBUGFS */
+
+/**
+ * batadv_debug_log() - Add debug log entry
+ * @bat_priv: the bat priv with all the soft interface information
+ * @fmt: format string
+ *
+ * Return: 0 on success or negative error number in case of failure
+ */
+int batadv_debug_log(struct batadv_priv *bat_priv, const char *fmt, ...)
+{
+ struct va_format vaf;
+ va_list args;
+
+ va_start(args, fmt);
+
+ vaf.fmt = fmt;
+ vaf.va = &args;
+
+#ifdef CONFIG_BATMAN_ADV_DEBUGFS
+ batadv_fdebug_log(bat_priv->debug_log, "[%10u] %pV",
+ jiffies_to_msecs(jiffies), &vaf);
+#endif
+
+ trace_batadv_dbg(bat_priv, &vaf);
+
+ va_end(args);
+
+ return 0;
+}
diff --git a/net/batman-adv/main.c b/net/batman-adv/main.c
index 69c0d85bceb3..d1ed839fd32b 100644
--- a/net/batman-adv/main.c
+++ b/net/batman-adv/main.c
@@ -74,6 +74,7 @@
* list traversals just rcu-locked
*/
struct list_head batadv_hardif_list;
+unsigned int batadv_hardif_generation;
static int (*batadv_rx_handler[256])(struct sk_buff *skb,
struct batadv_hard_iface *recv_if);
@@ -186,6 +187,8 @@ int batadv_mesh_init(struct net_device *soft_iface)
INIT_HLIST_HEAD(&bat_priv->softif_vlan_list);
INIT_HLIST_HEAD(&bat_priv->tp_list);
+ bat_priv->gw.generation = 0;
+
ret = batadv_v_mesh_init(bat_priv);
if (ret < 0)
goto err;
diff --git a/net/batman-adv/main.h b/net/batman-adv/main.h
index 2002b70e18db..b572066325e4 100644
--- a/net/batman-adv/main.h
+++ b/net/batman-adv/main.h
@@ -25,7 +25,7 @@
#define BATADV_DRIVER_DEVICE "batman-adv"
#ifndef BATADV_SOURCE_VERSION
-#define BATADV_SOURCE_VERSION "2018.4"
+#define BATADV_SOURCE_VERSION "2019.0"
#endif
/* B.A.T.M.A.N. parameters */
@@ -247,6 +247,7 @@ static inline int batadv_print_vid(unsigned short vid)
}
extern struct list_head batadv_hardif_list;
+extern unsigned int batadv_hardif_generation;
extern unsigned char batadv_broadcast_addr[];
extern struct workqueue_struct *batadv_event_workqueue;
diff --git a/net/batman-adv/multicast.c b/net/batman-adv/multicast.c
index 86725d792e15..69244e4598f5 100644
--- a/net/batman-adv/multicast.c
+++ b/net/batman-adv/multicast.c
@@ -1365,22 +1365,26 @@ int batadv_mcast_mesh_info_put(struct sk_buff *msg,
* to a netlink socket
* @msg: buffer for the message
* @portid: netlink port
- * @seq: Sequence number of netlink message
+ * @cb: Control block containing additional options
* @orig_node: originator to dump the multicast flags of
*
* Return: 0 or error code.
*/
static int
-batadv_mcast_flags_dump_entry(struct sk_buff *msg, u32 portid, u32 seq,
+batadv_mcast_flags_dump_entry(struct sk_buff *msg, u32 portid,
+ struct netlink_callback *cb,
struct batadv_orig_node *orig_node)
{
void *hdr;
- hdr = genlmsg_put(msg, portid, seq, &batadv_netlink_family,
- NLM_F_MULTI, BATADV_CMD_GET_MCAST_FLAGS);
+ hdr = genlmsg_put(msg, portid, cb->nlh->nlmsg_seq,
+ &batadv_netlink_family, NLM_F_MULTI,
+ BATADV_CMD_GET_MCAST_FLAGS);
if (!hdr)
return -ENOBUFS;
+ genl_dump_check_consistent(cb, hdr);
+
if (nla_put(msg, BATADV_ATTR_ORIG_ADDRESS, ETH_ALEN,
orig_node->orig)) {
genlmsg_cancel(msg, hdr);
@@ -1405,21 +1409,26 @@ batadv_mcast_flags_dump_entry(struct sk_buff *msg, u32 portid, u32 seq,
* table to a netlink socket
* @msg: buffer for the message
* @portid: netlink port
- * @seq: Sequence number of netlink message
- * @head: bucket to dump
+ * @cb: Control block containing additional options
+ * @hash: hash to dump
+ * @bucket: bucket index to dump
* @idx_skip: How many entries to skip
*
* Return: 0 or error code.
*/
static int
-batadv_mcast_flags_dump_bucket(struct sk_buff *msg, u32 portid, u32 seq,
- struct hlist_head *head, long *idx_skip)
+batadv_mcast_flags_dump_bucket(struct sk_buff *msg, u32 portid,
+ struct netlink_callback *cb,
+ struct batadv_hashtable *hash,
+ unsigned int bucket, long *idx_skip)
{
struct batadv_orig_node *orig_node;
long idx = 0;
- rcu_read_lock();
- hlist_for_each_entry_rcu(orig_node, head, hash_entry) {
+ spin_lock_bh(&hash->list_locks[bucket]);
+ cb->seq = atomic_read(&hash->generation) << 1 | 1;
+
+ hlist_for_each_entry(orig_node, &hash->table[bucket], hash_entry) {
if (!test_bit(BATADV_ORIG_CAPA_HAS_MCAST,
&orig_node->capa_initialized))
continue;
@@ -1427,9 +1436,8 @@ batadv_mcast_flags_dump_bucket(struct sk_buff *msg, u32 portid, u32 seq,
if (idx < *idx_skip)
goto skip;
- if (batadv_mcast_flags_dump_entry(msg, portid, seq,
- orig_node)) {
- rcu_read_unlock();
+ if (batadv_mcast_flags_dump_entry(msg, portid, cb, orig_node)) {
+ spin_unlock_bh(&hash->list_locks[bucket]);
*idx_skip = idx;
return -EMSGSIZE;
@@ -1438,7 +1446,7 @@ batadv_mcast_flags_dump_bucket(struct sk_buff *msg, u32 portid, u32 seq,
skip:
idx++;
}
- rcu_read_unlock();
+ spin_unlock_bh(&hash->list_locks[bucket]);
return 0;
}
@@ -1447,7 +1455,7 @@ skip:
* __batadv_mcast_flags_dump() - dump multicast flags table to a netlink socket
* @msg: buffer for the message
* @portid: netlink port
- * @seq: Sequence number of netlink message
+ * @cb: Control block containing additional options
* @bat_priv: the bat priv with all the soft interface information
* @bucket: current bucket to dump
* @idx: index in current bucket to the next entry to dump
@@ -1455,19 +1463,17 @@ skip:
* Return: 0 or error code.
*/
static int
-__batadv_mcast_flags_dump(struct sk_buff *msg, u32 portid, u32 seq,
+__batadv_mcast_flags_dump(struct sk_buff *msg, u32 portid,
+ struct netlink_callback *cb,
struct batadv_priv *bat_priv, long *bucket, long *idx)
{
struct batadv_hashtable *hash = bat_priv->orig_hash;
long bucket_tmp = *bucket;
- struct hlist_head *head;
long idx_tmp = *idx;
while (bucket_tmp < hash->size) {
- head = &hash->table[bucket_tmp];
-
- if (batadv_mcast_flags_dump_bucket(msg, portid, seq, head,
- &idx_tmp))
+ if (batadv_mcast_flags_dump_bucket(msg, portid, cb, hash,
+ *bucket, &idx_tmp))
break;
bucket_tmp++;
@@ -1550,8 +1556,7 @@ int batadv_mcast_flags_dump(struct sk_buff *msg, struct netlink_callback *cb)
return ret;
bat_priv = netdev_priv(primary_if->soft_iface);
- ret = __batadv_mcast_flags_dump(msg, portid, cb->nlh->nlmsg_seq,
- bat_priv, bucket, idx);
+ ret = __batadv_mcast_flags_dump(msg, portid, cb, bat_priv, bucket, idx);
batadv_hardif_put(primary_if);
return ret;
diff --git a/net/batman-adv/netlink.c b/net/batman-adv/netlink.c
index 0d9459b69bdb..2dc3304cee54 100644
--- a/net/batman-adv/netlink.c
+++ b/net/batman-adv/netlink.c
@@ -29,11 +29,11 @@
#include <linux/if_ether.h>
#include <linux/init.h>
#include <linux/kernel.h>
+#include <linux/list.h>
#include <linux/netdevice.h>
#include <linux/netlink.h>
#include <linux/printk.h>
-#include <linux/rculist.h>
-#include <linux/rcupdate.h>
+#include <linux/rtnetlink.h>
#include <linux/skbuff.h>
#include <linux/stddef.h>
#include <linux/types.h>
@@ -445,23 +445,27 @@ out:
* batadv_netlink_dump_hardif_entry() - Dump one hard interface into a message
* @msg: Netlink message to dump into
* @portid: Port making netlink request
- * @seq: Sequence number of netlink message
+ * @cb: Control block containing additional options
* @hard_iface: Hard interface to dump
*
* Return: error code, or 0 on success
*/
static int
-batadv_netlink_dump_hardif_entry(struct sk_buff *msg, u32 portid, u32 seq,
+batadv_netlink_dump_hardif_entry(struct sk_buff *msg, u32 portid,
+ struct netlink_callback *cb,
struct batadv_hard_iface *hard_iface)
{
struct net_device *net_dev = hard_iface->net_dev;
void *hdr;
- hdr = genlmsg_put(msg, portid, seq, &batadv_netlink_family, NLM_F_MULTI,
+ hdr = genlmsg_put(msg, portid, cb->nlh->nlmsg_seq,
+ &batadv_netlink_family, NLM_F_MULTI,
BATADV_CMD_GET_HARDIFS);
if (!hdr)
return -EMSGSIZE;
+ genl_dump_check_consistent(cb, hdr);
+
if (nla_put_u32(msg, BATADV_ATTR_HARD_IFINDEX,
net_dev->ifindex) ||
nla_put_string(msg, BATADV_ATTR_HARD_IFNAME,
@@ -498,7 +502,6 @@ batadv_netlink_dump_hardifs(struct sk_buff *msg, struct netlink_callback *cb)
struct batadv_hard_iface *hard_iface;
int ifindex;
int portid = NETLINK_CB(cb->skb).portid;
- int seq = cb->nlh->nlmsg_seq;
int skip = cb->args[0];
int i = 0;
@@ -516,23 +519,24 @@ batadv_netlink_dump_hardifs(struct sk_buff *msg, struct netlink_callback *cb)
return -ENODEV;
}
- rcu_read_lock();
+ rtnl_lock();
+ cb->seq = batadv_hardif_generation << 1 | 1;
- list_for_each_entry_rcu(hard_iface, &batadv_hardif_list, list) {
+ list_for_each_entry(hard_iface, &batadv_hardif_list, list) {
if (hard_iface->soft_iface != soft_iface)
continue;
if (i++ < skip)
continue;
- if (batadv_netlink_dump_hardif_entry(msg, portid, seq,
+ if (batadv_netlink_dump_hardif_entry(msg, portid, cb,
hard_iface)) {
i--;
break;
}
}
- rcu_read_unlock();
+ rtnl_unlock();
dev_put(soft_iface);
diff --git a/net/batman-adv/trace.c b/net/batman-adv/trace.c
index 3d57f9981f25..8e1024217cff 100644
--- a/net/batman-adv/trace.c
+++ b/net/batman-adv/trace.c
@@ -16,7 +16,5 @@
* along with this program; if not, see <http://www.gnu.org/licenses/>.
*/
-#include <linux/module.h>
-
#define CREATE_TRACE_POINTS
#include "trace.h"
diff --git a/net/batman-adv/trace.h b/net/batman-adv/trace.h
index 3acda26a30ca..104784be94d7 100644
--- a/net/batman-adv/trace.h
+++ b/net/batman-adv/trace.h
@@ -21,7 +21,13 @@
#include "main.h"
+#include <linux/bug.h>
+#include <linux/kernel.h>
+#include <linux/netdevice.h>
+#include <linux/percpu.h>
+#include <linux/printk.h>
#include <linux/tracepoint.h>
+#include <linux/types.h>
#undef TRACE_SYSTEM
#define TRACE_SYSTEM batadv
diff --git a/net/batman-adv/translation-table.c b/net/batman-adv/translation-table.c
index d21624c44665..8dcd4968cde7 100644
--- a/net/batman-adv/translation-table.c
+++ b/net/batman-adv/translation-table.c
@@ -1145,14 +1145,15 @@ out:
* batadv_tt_local_dump_entry() - Dump one TT local entry into a message
* @msg :Netlink message to dump into
* @portid: Port making netlink request
- * @seq: Sequence number of netlink message
+ * @cb: Control block containing additional options
* @bat_priv: The bat priv with all the soft interface information
* @common: tt local & tt global common data
*
* Return: Error code, or 0 on success
*/
static int
-batadv_tt_local_dump_entry(struct sk_buff *msg, u32 portid, u32 seq,
+batadv_tt_local_dump_entry(struct sk_buff *msg, u32 portid,
+ struct netlink_callback *cb,
struct batadv_priv *bat_priv,
struct batadv_tt_common_entry *common)
{
@@ -1173,12 +1174,14 @@ batadv_tt_local_dump_entry(struct sk_buff *msg, u32 portid, u32 seq,
batadv_softif_vlan_put(vlan);
- hdr = genlmsg_put(msg, portid, seq, &batadv_netlink_family,
- NLM_F_MULTI,
+ hdr = genlmsg_put(msg, portid, cb->nlh->nlmsg_seq,
+ &batadv_netlink_family, NLM_F_MULTI,
BATADV_CMD_GET_TRANSTABLE_LOCAL);
if (!hdr)
return -ENOBUFS;
+ genl_dump_check_consistent(cb, hdr);
+
if (nla_put(msg, BATADV_ATTR_TT_ADDRESS, ETH_ALEN, common->addr) ||
nla_put_u32(msg, BATADV_ATTR_TT_CRC32, crc) ||
nla_put_u16(msg, BATADV_ATTR_TT_VID, common->vid) ||
@@ -1201,34 +1204,39 @@ batadv_tt_local_dump_entry(struct sk_buff *msg, u32 portid, u32 seq,
* batadv_tt_local_dump_bucket() - Dump one TT local bucket into a message
* @msg: Netlink message to dump into
* @portid: Port making netlink request
- * @seq: Sequence number of netlink message
+ * @cb: Control block containing additional options
* @bat_priv: The bat priv with all the soft interface information
- * @head: Pointer to the list containing the local tt entries
+ * @hash: hash to dump
+ * @bucket: bucket index to dump
* @idx_s: Number of entries to skip
*
* Return: Error code, or 0 on success
*/
static int
-batadv_tt_local_dump_bucket(struct sk_buff *msg, u32 portid, u32 seq,
+batadv_tt_local_dump_bucket(struct sk_buff *msg, u32 portid,
+ struct netlink_callback *cb,
struct batadv_priv *bat_priv,
- struct hlist_head *head, int *idx_s)
+ struct batadv_hashtable *hash, unsigned int bucket,
+ int *idx_s)
{
struct batadv_tt_common_entry *common;
int idx = 0;
- rcu_read_lock();
- hlist_for_each_entry_rcu(common, head, hash_entry) {
+ spin_lock_bh(&hash->list_locks[bucket]);
+ cb->seq = atomic_read(&hash->generation) << 1 | 1;
+
+ hlist_for_each_entry(common, &hash->table[bucket], hash_entry) {
if (idx++ < *idx_s)
continue;
- if (batadv_tt_local_dump_entry(msg, portid, seq, bat_priv,
+ if (batadv_tt_local_dump_entry(msg, portid, cb, bat_priv,
common)) {
- rcu_read_unlock();
+ spin_unlock_bh(&hash->list_locks[bucket]);
*idx_s = idx - 1;
return -EMSGSIZE;
}
}
- rcu_read_unlock();
+ spin_unlock_bh(&hash->list_locks[bucket]);
*idx_s = 0;
return 0;
@@ -1248,7 +1256,6 @@ int batadv_tt_local_dump(struct sk_buff *msg, struct netlink_callback *cb)
struct batadv_priv *bat_priv;
struct batadv_hard_iface *primary_if = NULL;
struct batadv_hashtable *hash;
- struct hlist_head *head;
int ret;
int ifindex;
int bucket = cb->args[0];
@@ -1276,10 +1283,8 @@ int batadv_tt_local_dump(struct sk_buff *msg, struct netlink_callback *cb)
hash = bat_priv->tt.local_hash;
while (bucket < hash->size) {
- head = &hash->table[bucket];
-
- if (batadv_tt_local_dump_bucket(msg, portid, cb->nlh->nlmsg_seq,
- bat_priv, head, &idx))
+ if (batadv_tt_local_dump_bucket(msg, portid, cb, bat_priv,
+ hash, bucket, &idx))
break;
bucket++;
diff --git a/net/batman-adv/types.h b/net/batman-adv/types.h
index 45b5592de816..cbe17da36fcb 100644
--- a/net/batman-adv/types.h
+++ b/net/batman-adv/types.h
@@ -1096,12 +1096,15 @@ struct batadv_priv_gw {
/** @gateway_list: list of available gateway nodes */
struct hlist_head gateway_list;
- /** @list_lock: lock protecting gateway_list & curr_gw */
+ /** @list_lock: lock protecting gateway_list, curr_gw, generation */
spinlock_t list_lock;
/** @curr_gw: pointer to currently selected gateway node */
struct batadv_gw_node __rcu *curr_gw;
+ /** @generation: current (generation) sequence number */
+ unsigned int generation;
+
/**
* @mode: gateway operation: off, client or server (see batadv_gw_modes)
*/
diff --git a/net/bluetooth/6lowpan.c b/net/bluetooth/6lowpan.c
index 828e87fe8027..9d79c7de234a 100644
--- a/net/bluetooth/6lowpan.c
+++ b/net/bluetooth/6lowpan.c
@@ -607,7 +607,7 @@ static void ifup(struct net_device *netdev)
int err;
rtnl_lock();
- err = dev_open(netdev);
+ err = dev_open(netdev, NULL);
if (err < 0)
BT_INFO("iface %s cannot be opened (%d)", netdev->name, err);
rtnl_unlock();
diff --git a/net/bpf/test_run.c b/net/bpf/test_run.c
index c89c22c49015..fa2644d276ef 100644
--- a/net/bpf/test_run.c
+++ b/net/bpf/test_run.c
@@ -28,12 +28,13 @@ static __always_inline u32 bpf_test_run_one(struct bpf_prog *prog, void *ctx,
return ret;
}
-static u32 bpf_test_run(struct bpf_prog *prog, void *ctx, u32 repeat, u32 *time)
+static int bpf_test_run(struct bpf_prog *prog, void *ctx, u32 repeat, u32 *ret,
+ u32 *time)
{
struct bpf_cgroup_storage *storage[MAX_BPF_CGROUP_STORAGE_TYPE] = { 0 };
enum bpf_cgroup_storage_type stype;
u64 time_start, time_spent = 0;
- u32 ret = 0, i;
+ u32 i;
for_each_cgroup_storage_type(stype) {
storage[stype] = bpf_cgroup_storage_alloc(prog, stype);
@@ -49,7 +50,7 @@ static u32 bpf_test_run(struct bpf_prog *prog, void *ctx, u32 repeat, u32 *time)
repeat = 1;
time_start = ktime_get_ns();
for (i = 0; i < repeat; i++) {
- ret = bpf_test_run_one(prog, ctx, storage);
+ *ret = bpf_test_run_one(prog, ctx, storage);
if (need_resched()) {
if (signal_pending(current))
break;
@@ -65,7 +66,7 @@ static u32 bpf_test_run(struct bpf_prog *prog, void *ctx, u32 repeat, u32 *time)
for_each_cgroup_storage_type(stype)
bpf_cgroup_storage_free(storage[stype]);
- return ret;
+ return 0;
}
static int bpf_test_finish(const union bpf_attr *kattr,
@@ -74,8 +75,18 @@ static int bpf_test_finish(const union bpf_attr *kattr,
{
void __user *data_out = u64_to_user_ptr(kattr->test.data_out);
int err = -EFAULT;
+ u32 copy_size = size;
+
+ /* Clamp copy if the user has provided a size hint, but copy the full
+ * buffer if not to retain old behaviour.
+ */
+ if (kattr->test.data_size_out &&
+ copy_size > kattr->test.data_size_out) {
+ copy_size = kattr->test.data_size_out;
+ err = -ENOSPC;
+ }
- if (data_out && copy_to_user(data_out, data, size))
+ if (data_out && copy_to_user(data_out, data, copy_size))
goto out;
if (copy_to_user(&uattr->test.data_size_out, &size, sizeof(size)))
goto out;
@@ -83,7 +94,8 @@ static int bpf_test_finish(const union bpf_attr *kattr,
goto out;
if (copy_to_user(&uattr->test.duration, &duration, sizeof(duration)))
goto out;
- err = 0;
+ if (err != -ENOSPC)
+ err = 0;
out:
return err;
}
@@ -165,7 +177,12 @@ int bpf_prog_test_run_skb(struct bpf_prog *prog, const union bpf_attr *kattr,
__skb_push(skb, hh_len);
if (is_direct_pkt_access)
bpf_compute_data_pointers(skb);
- retval = bpf_test_run(prog, skb, repeat, &duration);
+ ret = bpf_test_run(prog, skb, repeat, &retval, &duration);
+ if (ret) {
+ kfree_skb(skb);
+ kfree(sk);
+ return ret;
+ }
if (!is_l2) {
if (skb_headroom(skb) < hh_len) {
int nhead = HH_DATA_ALIGN(hh_len - skb_headroom(skb));
@@ -212,11 +229,14 @@ int bpf_prog_test_run_xdp(struct bpf_prog *prog, const union bpf_attr *kattr,
rxqueue = __netif_get_rx_queue(current->nsproxy->net_ns->loopback_dev, 0);
xdp.rxq = &rxqueue->xdp_rxq;
- retval = bpf_test_run(prog, &xdp, repeat, &duration);
+ ret = bpf_test_run(prog, &xdp, repeat, &retval, &duration);
+ if (ret)
+ goto out;
if (xdp.data != data + XDP_PACKET_HEADROOM + NET_IP_ALIGN ||
xdp.data_end != xdp.data + size)
size = xdp.data_end - xdp.data;
ret = bpf_test_finish(kattr, uattr, xdp.data, size, retval, duration);
+out:
kfree(data);
return ret;
}
diff --git a/net/bridge/br.c b/net/bridge/br.c
index 360ad66c21e9..a5174e5001d8 100644
--- a/net/bridge/br.c
+++ b/net/bridge/br.c
@@ -31,6 +31,8 @@
*/
static int br_device_event(struct notifier_block *unused, unsigned long event, void *ptr)
{
+ struct netlink_ext_ack *extack = netdev_notifier_info_to_extack(ptr);
+ struct netdev_notifier_pre_changeaddr_info *prechaddr_info;
struct net_device *dev = netdev_notifier_info_to_dev(ptr);
struct net_bridge_port *p;
struct net_bridge *br;
@@ -56,6 +58,17 @@ static int br_device_event(struct notifier_block *unused, unsigned long event, v
br_mtu_auto_adjust(br);
break;
+ case NETDEV_PRE_CHANGEADDR:
+ if (br->dev->addr_assign_type == NET_ADDR_SET)
+ break;
+ prechaddr_info = ptr;
+ err = dev_pre_changeaddr_notify(br->dev,
+ prechaddr_info->dev_addr,
+ extack);
+ if (err)
+ return notifier_from_errno(err);
+ break;
+
case NETDEV_CHANGEADDR:
spin_lock_bh(&br->lock);
br_fdb_changeaddr(p, dev->dev_addr);
@@ -175,6 +188,82 @@ static struct notifier_block br_switchdev_notifier = {
.notifier_call = br_switchdev_event,
};
+/* br_boolopt_toggle - change user-controlled boolean option
+ *
+ * @br: bridge device
+ * @opt: id of the option to change
+ * @on: new option value
+ * @extack: extack for error messages
+ *
+ * Changes the value of the respective boolean option to @on taking care of
+ * any internal option value mapping and configuration.
+ */
+int br_boolopt_toggle(struct net_bridge *br, enum br_boolopt_id opt, bool on,
+ struct netlink_ext_ack *extack)
+{
+ switch (opt) {
+ case BR_BOOLOPT_NO_LL_LEARN:
+ br_opt_toggle(br, BROPT_NO_LL_LEARN, on);
+ break;
+ default:
+ /* shouldn't be called with unsupported options */
+ WARN_ON(1);
+ break;
+ }
+
+ return 0;
+}
+
+int br_boolopt_get(const struct net_bridge *br, enum br_boolopt_id opt)
+{
+ switch (opt) {
+ case BR_BOOLOPT_NO_LL_LEARN:
+ return br_opt_get(br, BROPT_NO_LL_LEARN);
+ default:
+ /* shouldn't be called with unsupported options */
+ WARN_ON(1);
+ break;
+ }
+
+ return 0;
+}
+
+int br_boolopt_multi_toggle(struct net_bridge *br,
+ struct br_boolopt_multi *bm,
+ struct netlink_ext_ack *extack)
+{
+ unsigned long bitmap = bm->optmask;
+ int err = 0;
+ int opt_id;
+
+ for_each_set_bit(opt_id, &bitmap, BR_BOOLOPT_MAX) {
+ bool on = !!(bm->optval & BIT(opt_id));
+
+ err = br_boolopt_toggle(br, opt_id, on, extack);
+ if (err) {
+ br_debug(br, "boolopt multi-toggle error: option: %d current: %d new: %d error: %d\n",
+ opt_id, br_boolopt_get(br, opt_id), on, err);
+ break;
+ }
+ }
+
+ return err;
+}
+
+void br_boolopt_multi_get(const struct net_bridge *br,
+ struct br_boolopt_multi *bm)
+{
+ u32 optval = 0;
+ int opt_id;
+
+ for (opt_id = 0; opt_id < BR_BOOLOPT_MAX; opt_id++)
+ optval |= (br_boolopt_get(br, opt_id) << opt_id);
+
+ bm->optval = optval;
+ bm->optmask = GENMASK((BR_BOOLOPT_MAX - 1), 0);
+}
+
+/* private bridge options, controlled by the kernel */
void br_opt_toggle(struct net_bridge *br, enum net_bridge_opts opt, bool on)
{
bool cur = !!br_opt_get(br, opt);
diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c
index c6abf927f0c9..013323b6dbe4 100644
--- a/net/bridge/br_device.c
+++ b/net/bridge/br_device.c
@@ -131,9 +131,17 @@ static int br_dev_init(struct net_device *dev)
return err;
}
+ err = br_mdb_hash_init(br);
+ if (err) {
+ free_percpu(br->stats);
+ br_fdb_hash_fini(br);
+ return err;
+ }
+
err = br_vlan_init(br);
if (err) {
free_percpu(br->stats);
+ br_mdb_hash_fini(br);
br_fdb_hash_fini(br);
return err;
}
@@ -142,6 +150,7 @@ static int br_dev_init(struct net_device *dev)
if (err) {
free_percpu(br->stats);
br_vlan_flush(br);
+ br_mdb_hash_fini(br);
br_fdb_hash_fini(br);
}
br_set_lockdep_class(dev);
@@ -156,6 +165,7 @@ static void br_dev_uninit(struct net_device *dev)
br_multicast_dev_del(br);
br_multicast_uninit_stats(br);
br_vlan_flush(br);
+ br_mdb_hash_fini(br);
br_fdb_hash_fini(br);
free_percpu(br->stats);
}
@@ -393,6 +403,7 @@ static const struct net_device_ops br_netdev_ops = {
.ndo_fdb_add = br_fdb_add,
.ndo_fdb_del = br_fdb_delete,
.ndo_fdb_dump = br_fdb_dump,
+ .ndo_fdb_get = br_fdb_get,
.ndo_bridge_getlink = br_getlink,
.ndo_bridge_setlink = br_setlink,
.ndo_bridge_dellink = br_dellink,
diff --git a/net/bridge/br_fdb.c b/net/bridge/br_fdb.c
index e56ba3912a90..fe3c758791ca 100644
--- a/net/bridge/br_fdb.c
+++ b/net/bridge/br_fdb.c
@@ -773,6 +773,32 @@ skip:
return err;
}
+int br_fdb_get(struct sk_buff *skb,
+ struct nlattr *tb[],
+ struct net_device *dev,
+ const unsigned char *addr,
+ u16 vid, u32 portid, u32 seq,
+ struct netlink_ext_ack *extack)
+{
+ struct net_bridge *br = netdev_priv(dev);
+ struct net_bridge_fdb_entry *f;
+ int err = 0;
+
+ rcu_read_lock();
+ f = br_fdb_find_rcu(br, addr, vid);
+ if (!f) {
+ NL_SET_ERR_MSG(extack, "Fdb entry not found");
+ err = -ENOENT;
+ goto errout;
+ }
+
+ err = fdb_fill_info(skb, br, f, portid, seq,
+ RTM_NEWNEIGH, 0);
+errout:
+ rcu_read_unlock();
+ return err;
+}
+
/* Update (create or replace) forwarding database entry */
static int fdb_add_entry(struct net_bridge *br, struct net_bridge_port *source,
const u8 *addr, u16 state, u16 flags, u16 vid,
@@ -1164,3 +1190,23 @@ void br_fdb_offloaded_set(struct net_bridge *br, struct net_bridge_port *p,
spin_unlock_bh(&br->hash_lock);
}
+
+void br_fdb_clear_offload(const struct net_device *dev, u16 vid)
+{
+ struct net_bridge_fdb_entry *f;
+ struct net_bridge_port *p;
+
+ ASSERT_RTNL();
+
+ p = br_port_get_rtnl(dev);
+ if (!p)
+ return;
+
+ spin_lock_bh(&p->br->hash_lock);
+ hlist_for_each_entry(f, &p->br->fdb_list, fdb_node) {
+ if (f->dst == p && f->key.vlan_id == vid)
+ f->offloaded = 0;
+ }
+ spin_unlock_bh(&p->br->hash_lock);
+}
+EXPORT_SYMBOL_GPL(br_fdb_clear_offload);
diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c
index 9b46d2dc4c22..41f0a696a65f 100644
--- a/net/bridge/br_if.c
+++ b/net/bridge/br_if.c
@@ -650,7 +650,16 @@ int br_add_if(struct net_bridge *br, struct net_device *dev,
if (br_fdb_insert(br, p, dev->dev_addr, 0))
netdev_err(dev, "failed insert local address bridge forwarding table\n");
- err = nbp_vlan_init(p);
+ if (br->dev->addr_assign_type != NET_ADDR_SET) {
+ /* Ask for permission to use this MAC address now, even if we
+ * don't end up choosing it below.
+ */
+ err = dev_pre_changeaddr_notify(br->dev, dev->dev_addr, extack);
+ if (err)
+ goto err7;
+ }
+
+ err = nbp_vlan_init(p, extack);
if (err) {
netdev_err(dev, "failed to initialize vlan filtering on this port\n");
goto err7;
@@ -741,3 +750,15 @@ void br_port_flags_change(struct net_bridge_port *p, unsigned long mask)
if (mask & BR_NEIGH_SUPPRESS)
br_recalculate_neigh_suppress_enabled(br);
}
+
+bool br_port_flag_is_set(const struct net_device *dev, unsigned long flag)
+{
+ struct net_bridge_port *p;
+
+ p = br_port_get_rtnl_rcu(dev);
+ if (!p)
+ return false;
+
+ return p->flags & flag;
+}
+EXPORT_SYMBOL_GPL(br_port_flag_is_set);
diff --git a/net/bridge/br_input.c b/net/bridge/br_input.c
index 3ddca11f44c2..5ea7e56119c1 100644
--- a/net/bridge/br_input.c
+++ b/net/bridge/br_input.c
@@ -188,7 +188,9 @@ static void __br_handle_local_finish(struct sk_buff *skb)
u16 vid = 0;
/* check if vlan is allowed, to avoid spoofing */
- if (p->flags & BR_LEARNING && br_should_learn(p, skb, &vid))
+ if ((p->flags & BR_LEARNING) &&
+ !br_opt_get(p->br, BROPT_NO_LL_LEARN) &&
+ br_should_learn(p, skb, &vid))
br_fdb_update(p->br, p, eth_hdr(skb)->h_source, vid, false);
}
diff --git a/net/bridge/br_mdb.c b/net/bridge/br_mdb.c
index a7ea2d431714..f69c8d91dc81 100644
--- a/net/bridge/br_mdb.c
+++ b/net/bridge/br_mdb.c
@@ -78,82 +78,72 @@ static void __mdb_entry_to_br_ip(struct br_mdb_entry *entry, struct br_ip *ip)
static int br_mdb_fill_info(struct sk_buff *skb, struct netlink_callback *cb,
struct net_device *dev)
{
+ int idx = 0, s_idx = cb->args[1], err = 0;
struct net_bridge *br = netdev_priv(dev);
- struct net_bridge_mdb_htable *mdb;
+ struct net_bridge_mdb_entry *mp;
struct nlattr *nest, *nest2;
- int i, err = 0;
- int idx = 0, s_idx = cb->args[1];
if (!br_opt_get(br, BROPT_MULTICAST_ENABLED))
return 0;
- mdb = rcu_dereference(br->mdb);
- if (!mdb)
- return 0;
-
nest = nla_nest_start(skb, MDBA_MDB);
if (nest == NULL)
return -EMSGSIZE;
- for (i = 0; i < mdb->max; i++) {
- struct net_bridge_mdb_entry *mp;
+ hlist_for_each_entry_rcu(mp, &br->mdb_list, mdb_node) {
struct net_bridge_port_group *p;
struct net_bridge_port_group __rcu **pp;
struct net_bridge_port *port;
- hlist_for_each_entry_rcu(mp, &mdb->mhash[i], hlist[mdb->ver]) {
- if (idx < s_idx)
- goto skip;
+ if (idx < s_idx)
+ goto skip;
- nest2 = nla_nest_start(skb, MDBA_MDB_ENTRY);
- if (nest2 == NULL) {
- err = -EMSGSIZE;
- goto out;
- }
+ nest2 = nla_nest_start(skb, MDBA_MDB_ENTRY);
+ if (!nest2) {
+ err = -EMSGSIZE;
+ break;
+ }
- for (pp = &mp->ports;
- (p = rcu_dereference(*pp)) != NULL;
- pp = &p->next) {
- struct nlattr *nest_ent;
- struct br_mdb_entry e;
-
- port = p->port;
- if (!port)
- continue;
-
- memset(&e, 0, sizeof(e));
- e.ifindex = port->dev->ifindex;
- e.vid = p->addr.vid;
- __mdb_entry_fill_flags(&e, p->flags);
- if (p->addr.proto == htons(ETH_P_IP))
- e.addr.u.ip4 = p->addr.u.ip4;
+ for (pp = &mp->ports; (p = rcu_dereference(*pp)) != NULL;
+ pp = &p->next) {
+ struct nlattr *nest_ent;
+ struct br_mdb_entry e;
+
+ port = p->port;
+ if (!port)
+ continue;
+
+ memset(&e, 0, sizeof(e));
+ e.ifindex = port->dev->ifindex;
+ e.vid = p->addr.vid;
+ __mdb_entry_fill_flags(&e, p->flags);
+ if (p->addr.proto == htons(ETH_P_IP))
+ e.addr.u.ip4 = p->addr.u.ip4;
#if IS_ENABLED(CONFIG_IPV6)
- if (p->addr.proto == htons(ETH_P_IPV6))
- e.addr.u.ip6 = p->addr.u.ip6;
+ if (p->addr.proto == htons(ETH_P_IPV6))
+ e.addr.u.ip6 = p->addr.u.ip6;
#endif
- e.addr.proto = p->addr.proto;
- nest_ent = nla_nest_start(skb,
- MDBA_MDB_ENTRY_INFO);
- if (!nest_ent) {
- nla_nest_cancel(skb, nest2);
- err = -EMSGSIZE;
- goto out;
- }
- if (nla_put_nohdr(skb, sizeof(e), &e) ||
- nla_put_u32(skb,
- MDBA_MDB_EATTR_TIMER,
- br_timer_value(&p->timer))) {
- nla_nest_cancel(skb, nest_ent);
- nla_nest_cancel(skb, nest2);
- err = -EMSGSIZE;
- goto out;
- }
- nla_nest_end(skb, nest_ent);
+ e.addr.proto = p->addr.proto;
+ nest_ent = nla_nest_start(skb, MDBA_MDB_ENTRY_INFO);
+ if (!nest_ent) {
+ nla_nest_cancel(skb, nest2);
+ err = -EMSGSIZE;
+ goto out;
}
- nla_nest_end(skb, nest2);
- skip:
- idx++;
+ if (nla_put_nohdr(skb, sizeof(e), &e) ||
+ nla_put_u32(skb,
+ MDBA_MDB_EATTR_TIMER,
+ br_timer_value(&p->timer))) {
+ nla_nest_cancel(skb, nest_ent);
+ nla_nest_cancel(skb, nest2);
+ err = -EMSGSIZE;
+ goto out;
+ }
+ nla_nest_end(skb, nest_ent);
}
+ nla_nest_end(skb, nest2);
+skip:
+ idx++;
}
out:
@@ -203,8 +193,7 @@ static int br_mdb_dump(struct sk_buff *skb, struct netlink_callback *cb)
rcu_read_lock();
- /* In theory this could be wrapped to 0... */
- cb->seq = net->dev_base_seq + br_mdb_rehash_seq;
+ cb->seq = net->dev_base_seq;
for_each_netdev_rcu(net, dev) {
if (dev->priv_flags & IFF_EBRIDGE) {
@@ -297,7 +286,6 @@ static void br_mdb_complete(struct net_device *dev, int err, void *priv)
struct br_mdb_complete_info *data = priv;
struct net_bridge_port_group __rcu **pp;
struct net_bridge_port_group *p;
- struct net_bridge_mdb_htable *mdb;
struct net_bridge_mdb_entry *mp;
struct net_bridge_port *port = data->port;
struct net_bridge *br = port->br;
@@ -306,8 +294,7 @@ static void br_mdb_complete(struct net_device *dev, int err, void *priv)
goto err;
spin_lock_bh(&br->multicast_lock);
- mdb = mlock_dereference(br->mdb, br);
- mp = br_mdb_ip_get(mdb, &data->ip);
+ mp = br_mdb_ip_get(br, &data->ip);
if (!mp)
goto out;
for (pp = &mp->ports; (p = mlock_dereference(*pp, br)) != NULL;
@@ -344,7 +331,7 @@ static void br_mdb_switchdev_host_port(struct net_device *dev,
mdb.obj.orig_dev = dev;
switch (type) {
case RTM_NEWMDB:
- switchdev_port_obj_add(lower_dev, &mdb.obj);
+ switchdev_port_obj_add(lower_dev, &mdb.obj, NULL);
break;
case RTM_DELMDB:
switchdev_port_obj_del(lower_dev, &mdb.obj);
@@ -394,7 +381,7 @@ static void __br_mdb_notify(struct net_device *dev, struct net_bridge_port *p,
__mdb_entry_to_br_ip(entry, &complete_info->ip);
mdb.obj.complete_priv = complete_info;
mdb.obj.complete = br_mdb_complete;
- if (switchdev_port_obj_add(port_dev, &mdb.obj))
+ if (switchdev_port_obj_add(port_dev, &mdb.obj, NULL))
kfree(complete_info);
}
} else if (p && port_dev && type == RTM_DELMDB) {
@@ -588,14 +575,12 @@ static int br_mdb_add_group(struct net_bridge *br, struct net_bridge_port *port,
struct net_bridge_mdb_entry *mp;
struct net_bridge_port_group *p;
struct net_bridge_port_group __rcu **pp;
- struct net_bridge_mdb_htable *mdb;
unsigned long now = jiffies;
int err;
- mdb = mlock_dereference(br->mdb, br);
- mp = br_mdb_ip_get(mdb, group);
+ mp = br_mdb_ip_get(br, group);
if (!mp) {
- mp = br_multicast_new_group(br, port, group);
+ mp = br_multicast_new_group(br, group);
err = PTR_ERR_OR_ZERO(mp);
if (err)
return err;
@@ -696,7 +681,6 @@ static int br_mdb_add(struct sk_buff *skb, struct nlmsghdr *nlh,
static int __br_mdb_del(struct net_bridge *br, struct br_mdb_entry *entry)
{
- struct net_bridge_mdb_htable *mdb;
struct net_bridge_mdb_entry *mp;
struct net_bridge_port_group *p;
struct net_bridge_port_group __rcu **pp;
@@ -709,9 +693,7 @@ static int __br_mdb_del(struct net_bridge *br, struct br_mdb_entry *entry)
__mdb_entry_to_br_ip(entry, &ip);
spin_lock_bh(&br->multicast_lock);
- mdb = mlock_dereference(br->mdb, br);
-
- mp = br_mdb_ip_get(mdb, &ip);
+ mp = br_mdb_ip_get(br, &ip);
if (!mp)
goto unlock;
@@ -728,7 +710,7 @@ static int __br_mdb_del(struct net_bridge *br, struct br_mdb_entry *entry)
rcu_assign_pointer(*pp, p->next);
hlist_del_init(&p->mglist);
del_timer(&p->timer);
- call_rcu_bh(&p->rcu, br_multicast_free_pg);
+ kfree_rcu(p, rcu);
err = 0;
if (!mp->ports && !mp->host_joined &&
diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c
index 6bac0d6b7b94..879cd2315769 100644
--- a/net/bridge/br_multicast.c
+++ b/net/bridge/br_multicast.c
@@ -37,6 +37,14 @@
#include "br_private.h"
+static const struct rhashtable_params br_mdb_rht_params = {
+ .head_offset = offsetof(struct net_bridge_mdb_entry, rhnode),
+ .key_offset = offsetof(struct net_bridge_mdb_entry, addr),
+ .key_len = sizeof(struct br_ip),
+ .automatic_shrinking = true,
+ .locks_mul = 1,
+};
+
static void br_multicast_start_querier(struct net_bridge *br,
struct bridge_mcast_own_query *query);
static void br_multicast_add_router(struct net_bridge *br,
@@ -54,7 +62,6 @@ static void br_ip6_multicast_leave_group(struct net_bridge *br,
const struct in6_addr *group,
__u16 vid, const unsigned char *src);
#endif
-unsigned int br_mdb_rehash_seq;
static inline int br_ip_equal(const struct br_ip *a, const struct br_ip *b)
{
@@ -73,89 +80,58 @@ static inline int br_ip_equal(const struct br_ip *a, const struct br_ip *b)
return 0;
}
-static inline int __br_ip4_hash(struct net_bridge_mdb_htable *mdb, __be32 ip,
- __u16 vid)
-{
- return jhash_2words((__force u32)ip, vid, mdb->secret) & (mdb->max - 1);
-}
-
-#if IS_ENABLED(CONFIG_IPV6)
-static inline int __br_ip6_hash(struct net_bridge_mdb_htable *mdb,
- const struct in6_addr *ip,
- __u16 vid)
-{
- return jhash_2words(ipv6_addr_hash(ip), vid,
- mdb->secret) & (mdb->max - 1);
-}
-#endif
-
-static inline int br_ip_hash(struct net_bridge_mdb_htable *mdb,
- struct br_ip *ip)
+static struct net_bridge_mdb_entry *br_mdb_ip_get_rcu(struct net_bridge *br,
+ struct br_ip *dst)
{
- switch (ip->proto) {
- case htons(ETH_P_IP):
- return __br_ip4_hash(mdb, ip->u.ip4, ip->vid);
-#if IS_ENABLED(CONFIG_IPV6)
- case htons(ETH_P_IPV6):
- return __br_ip6_hash(mdb, &ip->u.ip6, ip->vid);
-#endif
- }
- return 0;
+ return rhashtable_lookup(&br->mdb_hash_tbl, dst, br_mdb_rht_params);
}
-static struct net_bridge_mdb_entry *__br_mdb_ip_get(
- struct net_bridge_mdb_htable *mdb, struct br_ip *dst, int hash)
+struct net_bridge_mdb_entry *br_mdb_ip_get(struct net_bridge *br,
+ struct br_ip *dst)
{
- struct net_bridge_mdb_entry *mp;
+ struct net_bridge_mdb_entry *ent;
- hlist_for_each_entry_rcu(mp, &mdb->mhash[hash], hlist[mdb->ver]) {
- if (br_ip_equal(&mp->addr, dst))
- return mp;
- }
-
- return NULL;
-}
+ lockdep_assert_held_once(&br->multicast_lock);
-struct net_bridge_mdb_entry *br_mdb_ip_get(struct net_bridge_mdb_htable *mdb,
- struct br_ip *dst)
-{
- if (!mdb)
- return NULL;
+ rcu_read_lock();
+ ent = rhashtable_lookup(&br->mdb_hash_tbl, dst, br_mdb_rht_params);
+ rcu_read_unlock();
- return __br_mdb_ip_get(mdb, dst, br_ip_hash(mdb, dst));
+ return ent;
}
-static struct net_bridge_mdb_entry *br_mdb_ip4_get(
- struct net_bridge_mdb_htable *mdb, __be32 dst, __u16 vid)
+static struct net_bridge_mdb_entry *br_mdb_ip4_get(struct net_bridge *br,
+ __be32 dst, __u16 vid)
{
struct br_ip br_dst;
+ memset(&br_dst, 0, sizeof(br_dst));
br_dst.u.ip4 = dst;
br_dst.proto = htons(ETH_P_IP);
br_dst.vid = vid;
- return br_mdb_ip_get(mdb, &br_dst);
+ return br_mdb_ip_get(br, &br_dst);
}
#if IS_ENABLED(CONFIG_IPV6)
-static struct net_bridge_mdb_entry *br_mdb_ip6_get(
- struct net_bridge_mdb_htable *mdb, const struct in6_addr *dst,
- __u16 vid)
+static struct net_bridge_mdb_entry *br_mdb_ip6_get(struct net_bridge *br,
+ const struct in6_addr *dst,
+ __u16 vid)
{
struct br_ip br_dst;
+ memset(&br_dst, 0, sizeof(br_dst));
br_dst.u.ip6 = *dst;
br_dst.proto = htons(ETH_P_IPV6);
br_dst.vid = vid;
- return br_mdb_ip_get(mdb, &br_dst);
+ return br_mdb_ip_get(br, &br_dst);
}
#endif
struct net_bridge_mdb_entry *br_mdb_get(struct net_bridge *br,
struct sk_buff *skb, u16 vid)
{
- struct net_bridge_mdb_htable *mdb = rcu_dereference(br->mdb);
struct br_ip ip;
if (!br_opt_get(br, BROPT_MULTICAST_ENABLED))
@@ -164,6 +140,7 @@ struct net_bridge_mdb_entry *br_mdb_get(struct net_bridge *br,
if (BR_INPUT_SKB_CB(skb)->igmp)
return NULL;
+ memset(&ip, 0, sizeof(ip));
ip.proto = skb->protocol;
ip.vid = vid;
@@ -180,70 +157,13 @@ struct net_bridge_mdb_entry *br_mdb_get(struct net_bridge *br,
return NULL;
}
- return br_mdb_ip_get(mdb, &ip);
-}
-
-static void br_mdb_free(struct rcu_head *head)
-{
- struct net_bridge_mdb_htable *mdb =
- container_of(head, struct net_bridge_mdb_htable, rcu);
- struct net_bridge_mdb_htable *old = mdb->old;
-
- mdb->old = NULL;
- kfree(old->mhash);
- kfree(old);
-}
-
-static int br_mdb_copy(struct net_bridge_mdb_htable *new,
- struct net_bridge_mdb_htable *old,
- int elasticity)
-{
- struct net_bridge_mdb_entry *mp;
- int maxlen;
- int len;
- int i;
-
- for (i = 0; i < old->max; i++)
- hlist_for_each_entry(mp, &old->mhash[i], hlist[old->ver])
- hlist_add_head(&mp->hlist[new->ver],
- &new->mhash[br_ip_hash(new, &mp->addr)]);
-
- if (!elasticity)
- return 0;
-
- maxlen = 0;
- for (i = 0; i < new->max; i++) {
- len = 0;
- hlist_for_each_entry(mp, &new->mhash[i], hlist[new->ver])
- len++;
- if (len > maxlen)
- maxlen = len;
- }
-
- return maxlen > elasticity ? -EINVAL : 0;
-}
-
-void br_multicast_free_pg(struct rcu_head *head)
-{
- struct net_bridge_port_group *p =
- container_of(head, struct net_bridge_port_group, rcu);
-
- kfree(p);
-}
-
-static void br_multicast_free_group(struct rcu_head *head)
-{
- struct net_bridge_mdb_entry *mp =
- container_of(head, struct net_bridge_mdb_entry, rcu);
-
- kfree(mp);
+ return br_mdb_ip_get_rcu(br, &ip);
}
static void br_multicast_group_expired(struct timer_list *t)
{
struct net_bridge_mdb_entry *mp = from_timer(mp, t, timer);
struct net_bridge *br = mp->br;
- struct net_bridge_mdb_htable *mdb;
spin_lock(&br->multicast_lock);
if (!netif_running(br->dev) || timer_pending(&mp->timer))
@@ -255,12 +175,11 @@ static void br_multicast_group_expired(struct timer_list *t)
if (mp->ports)
goto out;
- mdb = mlock_dereference(br->mdb, br);
+ rhashtable_remove_fast(&br->mdb_hash_tbl, &mp->rhnode,
+ br_mdb_rht_params);
+ hlist_del_rcu(&mp->mdb_node);
- hlist_del_rcu(&mp->hlist[mdb->ver]);
- mdb->size--;
-
- call_rcu_bh(&mp->rcu, br_multicast_free_group);
+ kfree_rcu(mp, rcu);
out:
spin_unlock(&br->multicast_lock);
@@ -269,14 +188,11 @@ out:
static void br_multicast_del_pg(struct net_bridge *br,
struct net_bridge_port_group *pg)
{
- struct net_bridge_mdb_htable *mdb;
struct net_bridge_mdb_entry *mp;
struct net_bridge_port_group *p;
struct net_bridge_port_group __rcu **pp;
- mdb = mlock_dereference(br->mdb, br);
-
- mp = br_mdb_ip_get(mdb, &pg->addr);
+ mp = br_mdb_ip_get(br, &pg->addr);
if (WARN_ON(!mp))
return;
@@ -291,7 +207,7 @@ static void br_multicast_del_pg(struct net_bridge *br,
del_timer(&p->timer);
br_mdb_notify(br->dev, p->port, &pg->addr, RTM_DELMDB,
p->flags);
- call_rcu_bh(&p->rcu, br_multicast_free_pg);
+ kfree_rcu(p, rcu);
if (!mp->ports && !mp->host_joined &&
netif_running(br->dev))
@@ -319,53 +235,6 @@ out:
spin_unlock(&br->multicast_lock);
}
-static int br_mdb_rehash(struct net_bridge_mdb_htable __rcu **mdbp, int max,
- int elasticity)
-{
- struct net_bridge_mdb_htable *old = rcu_dereference_protected(*mdbp, 1);
- struct net_bridge_mdb_htable *mdb;
- int err;
-
- mdb = kmalloc(sizeof(*mdb), GFP_ATOMIC);
- if (!mdb)
- return -ENOMEM;
-
- mdb->max = max;
- mdb->old = old;
-
- mdb->mhash = kcalloc(max, sizeof(*mdb->mhash), GFP_ATOMIC);
- if (!mdb->mhash) {
- kfree(mdb);
- return -ENOMEM;
- }
-
- mdb->size = old ? old->size : 0;
- mdb->ver = old ? old->ver ^ 1 : 0;
-
- if (!old || elasticity)
- get_random_bytes(&mdb->secret, sizeof(mdb->secret));
- else
- mdb->secret = old->secret;
-
- if (!old)
- goto out;
-
- err = br_mdb_copy(mdb, old, elasticity);
- if (err) {
- kfree(mdb->mhash);
- kfree(mdb);
- return err;
- }
-
- br_mdb_rehash_seq++;
- call_rcu_bh(&mdb->rcu, br_mdb_free);
-
-out:
- rcu_assign_pointer(*mdbp, mdb);
-
- return 0;
-}
-
static struct sk_buff *br_ip4_multicast_alloc_query(struct net_bridge *br,
__be32 group,
u8 *igmp_type)
@@ -589,111 +458,19 @@ static struct sk_buff *br_multicast_alloc_query(struct net_bridge *br,
return NULL;
}
-static struct net_bridge_mdb_entry *br_multicast_get_group(
- struct net_bridge *br, struct net_bridge_port *port,
- struct br_ip *group, int hash)
-{
- struct net_bridge_mdb_htable *mdb;
- struct net_bridge_mdb_entry *mp;
- unsigned int count = 0;
- unsigned int max;
- int elasticity;
- int err;
-
- mdb = rcu_dereference_protected(br->mdb, 1);
- hlist_for_each_entry(mp, &mdb->mhash[hash], hlist[mdb->ver]) {
- count++;
- if (unlikely(br_ip_equal(group, &mp->addr)))
- return mp;
- }
-
- elasticity = 0;
- max = mdb->max;
-
- if (unlikely(count > br->hash_elasticity && count)) {
- if (net_ratelimit())
- br_info(br, "Multicast hash table "
- "chain limit reached: %s\n",
- port ? port->dev->name : br->dev->name);
-
- elasticity = br->hash_elasticity;
- }
-
- if (mdb->size >= max) {
- max *= 2;
- if (unlikely(max > br->hash_max)) {
- br_warn(br, "Multicast hash table maximum of %d "
- "reached, disabling snooping: %s\n",
- br->hash_max,
- port ? port->dev->name : br->dev->name);
- err = -E2BIG;
-disable:
- br_opt_toggle(br, BROPT_MULTICAST_ENABLED, false);
- goto err;
- }
- }
-
- if (max > mdb->max || elasticity) {
- if (mdb->old) {
- if (net_ratelimit())
- br_info(br, "Multicast hash table "
- "on fire: %s\n",
- port ? port->dev->name : br->dev->name);
- err = -EEXIST;
- goto err;
- }
-
- err = br_mdb_rehash(&br->mdb, max, elasticity);
- if (err) {
- br_warn(br, "Cannot rehash multicast "
- "hash table, disabling snooping: %s, %d, %d\n",
- port ? port->dev->name : br->dev->name,
- mdb->size, err);
- goto disable;
- }
-
- err = -EAGAIN;
- goto err;
- }
-
- return NULL;
-
-err:
- mp = ERR_PTR(err);
- return mp;
-}
-
struct net_bridge_mdb_entry *br_multicast_new_group(struct net_bridge *br,
- struct net_bridge_port *p,
struct br_ip *group)
{
- struct net_bridge_mdb_htable *mdb;
struct net_bridge_mdb_entry *mp;
- int hash;
int err;
- mdb = rcu_dereference_protected(br->mdb, 1);
- if (!mdb) {
- err = br_mdb_rehash(&br->mdb, BR_HASH_SIZE, 0);
- if (err)
- return ERR_PTR(err);
- goto rehash;
- }
-
- hash = br_ip_hash(mdb, group);
- mp = br_multicast_get_group(br, p, group, hash);
- switch (PTR_ERR(mp)) {
- case 0:
- break;
-
- case -EAGAIN:
-rehash:
- mdb = rcu_dereference_protected(br->mdb, 1);
- hash = br_ip_hash(mdb, group);
- break;
+ mp = br_mdb_ip_get(br, group);
+ if (mp)
+ return mp;
- default:
- goto out;
+ if (atomic_read(&br->mdb_hash_tbl.nelems) >= br->hash_max) {
+ br_opt_toggle(br, BROPT_MULTICAST_ENABLED, false);
+ return ERR_PTR(-E2BIG);
}
mp = kzalloc(sizeof(*mp), GFP_ATOMIC);
@@ -703,11 +480,15 @@ rehash:
mp->br = br;
mp->addr = *group;
timer_setup(&mp->timer, br_multicast_group_expired, 0);
+ err = rhashtable_lookup_insert_fast(&br->mdb_hash_tbl, &mp->rhnode,
+ br_mdb_rht_params);
+ if (err) {
+ kfree(mp);
+ mp = ERR_PTR(err);
+ } else {
+ hlist_add_head_rcu(&mp->mdb_node, &br->mdb_list);
+ }
- hlist_add_head_rcu(&mp->hlist[mdb->ver], &mdb->mhash[hash]);
- mdb->size++;
-
-out:
return mp;
}
@@ -768,7 +549,7 @@ static int br_multicast_add_group(struct net_bridge *br,
(port && port->state == BR_STATE_DISABLED))
goto out;
- mp = br_multicast_new_group(br, port, group);
+ mp = br_multicast_new_group(br, group);
err = PTR_ERR(mp);
if (IS_ERR(mp))
goto err;
@@ -837,6 +618,7 @@ static int br_ip6_multicast_add_group(struct net_bridge *br,
if (ipv6_addr_is_ll_all_nodes(group))
return 0;
+ memset(&br_group, 0, sizeof(br_group));
br_group.u.ip6 = *group;
br_group.proto = htons(ETH_P_IPV6);
br_group.vid = vid;
@@ -1483,7 +1265,7 @@ static void br_ip4_multicast_query(struct net_bridge *br,
goto out;
}
- mp = br_mdb_ip4_get(mlock_dereference(br->mdb, br), group, vid);
+ mp = br_mdb_ip4_get(br, group, vid);
if (!mp)
goto out;
@@ -1567,7 +1349,7 @@ static int br_ip6_multicast_query(struct net_bridge *br,
goto out;
}
- mp = br_mdb_ip6_get(mlock_dereference(br->mdb, br), group, vid);
+ mp = br_mdb_ip6_get(br, group, vid);
if (!mp)
goto out;
@@ -1601,7 +1383,6 @@ br_multicast_leave_group(struct net_bridge *br,
struct bridge_mcast_own_query *own_query,
const unsigned char *src)
{
- struct net_bridge_mdb_htable *mdb;
struct net_bridge_mdb_entry *mp;
struct net_bridge_port_group *p;
unsigned long now;
@@ -1612,8 +1393,7 @@ br_multicast_leave_group(struct net_bridge *br,
(port && port->state == BR_STATE_DISABLED))
goto out;
- mdb = mlock_dereference(br->mdb, br);
- mp = br_mdb_ip_get(mdb, group);
+ mp = br_mdb_ip_get(br, group);
if (!mp)
goto out;
@@ -1629,7 +1409,7 @@ br_multicast_leave_group(struct net_bridge *br,
rcu_assign_pointer(*pp, p->next);
hlist_del_init(&p->mglist);
del_timer(&p->timer);
- call_rcu_bh(&p->rcu, br_multicast_free_pg);
+ kfree_rcu(p, rcu);
br_mdb_notify(br->dev, port, group, RTM_DELMDB,
p->flags);
@@ -1961,8 +1741,7 @@ static void br_ip6_multicast_query_expired(struct timer_list *t)
void br_multicast_init(struct net_bridge *br)
{
- br->hash_elasticity = 4;
- br->hash_max = 512;
+ br->hash_max = BR_MULTICAST_DEFAULT_HASH_MAX;
br->multicast_router = MDB_RTR_TYPE_TEMP_QUERY;
br->multicast_last_member_count = 2;
@@ -1999,6 +1778,7 @@ void br_multicast_init(struct net_bridge *br)
timer_setup(&br->ip6_own_query.timer,
br_ip6_multicast_query_expired, 0);
#endif
+ INIT_HLIST_HEAD(&br->mdb_list);
}
static void __br_multicast_open(struct net_bridge *br,
@@ -2033,40 +1813,20 @@ void br_multicast_stop(struct net_bridge *br)
void br_multicast_dev_del(struct net_bridge *br)
{
- struct net_bridge_mdb_htable *mdb;
struct net_bridge_mdb_entry *mp;
- struct hlist_node *n;
- u32 ver;
- int i;
+ struct hlist_node *tmp;
spin_lock_bh(&br->multicast_lock);
- mdb = mlock_dereference(br->mdb, br);
- if (!mdb)
- goto out;
-
- br->mdb = NULL;
-
- ver = mdb->ver;
- for (i = 0; i < mdb->max; i++) {
- hlist_for_each_entry_safe(mp, n, &mdb->mhash[i],
- hlist[ver]) {
- del_timer(&mp->timer);
- call_rcu_bh(&mp->rcu, br_multicast_free_group);
- }
+ hlist_for_each_entry_safe(mp, tmp, &br->mdb_list, mdb_node) {
+ del_timer(&mp->timer);
+ rhashtable_remove_fast(&br->mdb_hash_tbl, &mp->rhnode,
+ br_mdb_rht_params);
+ hlist_del_rcu(&mp->mdb_node);
+ kfree_rcu(mp, rcu);
}
-
- if (mdb->old) {
- spin_unlock_bh(&br->multicast_lock);
- rcu_barrier_bh();
- spin_lock_bh(&br->multicast_lock);
- WARN_ON(mdb->old);
- }
-
- mdb->old = mdb;
- call_rcu_bh(&mdb->rcu, br_mdb_free);
-
-out:
spin_unlock_bh(&br->multicast_lock);
+
+ rcu_barrier();
}
int br_multicast_set_router(struct net_bridge *br, unsigned long val)
@@ -2176,7 +1936,6 @@ static void br_multicast_start_querier(struct net_bridge *br,
int br_multicast_toggle(struct net_bridge *br, unsigned long val)
{
- struct net_bridge_mdb_htable *mdb;
struct net_bridge_port *port;
int err = 0;
@@ -2192,21 +1951,6 @@ int br_multicast_toggle(struct net_bridge *br, unsigned long val)
if (!netif_running(br->dev))
goto unlock;
- mdb = mlock_dereference(br->mdb, br);
- if (mdb) {
- if (mdb->old) {
- err = -EEXIST;
-rollback:
- br_opt_toggle(br, BROPT_MULTICAST_ENABLED, false);
- goto unlock;
- }
-
- err = br_mdb_rehash(&br->mdb, mdb->max,
- br->hash_elasticity);
- if (err)
- goto rollback;
- }
-
br_multicast_open(br);
list_for_each_entry(port, &br->port_list, list)
__br_multicast_enable_port(port);
@@ -2271,45 +2015,6 @@ unlock:
return 0;
}
-int br_multicast_set_hash_max(struct net_bridge *br, unsigned long val)
-{
- int err = -EINVAL;
- u32 old;
- struct net_bridge_mdb_htable *mdb;
-
- spin_lock_bh(&br->multicast_lock);
- if (!is_power_of_2(val))
- goto unlock;
-
- mdb = mlock_dereference(br->mdb, br);
- if (mdb && val < mdb->size)
- goto unlock;
-
- err = 0;
-
- old = br->hash_max;
- br->hash_max = val;
-
- if (mdb) {
- if (mdb->old) {
- err = -EEXIST;
-rollback:
- br->hash_max = old;
- goto unlock;
- }
-
- err = br_mdb_rehash(&br->mdb, br->hash_max,
- br->hash_elasticity);
- if (err)
- goto rollback;
- }
-
-unlock:
- spin_unlock_bh(&br->multicast_lock);
-
- return err;
-}
-
int br_multicast_set_igmp_version(struct net_bridge *br, unsigned long val)
{
/* Currently we support only version 2 and 3 */
@@ -2646,3 +2351,13 @@ void br_multicast_get_stats(const struct net_bridge *br,
}
memcpy(dest, &tdst, sizeof(*dest));
}
+
+int br_mdb_hash_init(struct net_bridge *br)
+{
+ return rhashtable_init(&br->mdb_hash_tbl, &br_mdb_rht_params);
+}
+
+void br_mdb_hash_fini(struct net_bridge *br)
+{
+ rhashtable_destroy(&br->mdb_hash_tbl);
+}
diff --git a/net/bridge/br_netfilter_hooks.c b/net/bridge/br_netfilter_hooks.c
index b1b5e8516724..c9383c470a83 100644
--- a/net/bridge/br_netfilter_hooks.c
+++ b/net/bridge/br_netfilter_hooks.c
@@ -671,10 +671,8 @@ static int br_nf_push_frag_xmit(struct net *net, struct sock *sk, struct sk_buff
return 0;
}
- if (data->vlan_tci) {
- skb->vlan_tci = data->vlan_tci;
- skb->vlan_proto = data->vlan_proto;
- }
+ if (data->vlan_proto)
+ __vlan_hwaccel_put_tag(skb, data->vlan_proto, data->vlan_tci);
skb_copy_to_linear_data_offset(skb, -data->size, data->mac, data->size);
__skb_push(skb, data->encap_size);
@@ -740,8 +738,13 @@ static int br_nf_dev_queue_xmit(struct net *net, struct sock *sk, struct sk_buff
data = this_cpu_ptr(&brnf_frag_data_storage);
- data->vlan_tci = skb->vlan_tci;
- data->vlan_proto = skb->vlan_proto;
+ if (skb_vlan_tag_present(skb)) {
+ data->vlan_tci = skb->vlan_tci;
+ data->vlan_proto = skb->vlan_proto;
+ } else {
+ data->vlan_proto = 0;
+ }
+
data->encap_size = nf_bridge_encap_header_len(skb);
data->size = ETH_HLEN + data->encap_size;
diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c
index 3345f1984542..935495b93a99 100644
--- a/net/bridge/br_netlink.c
+++ b/net/bridge/br_netlink.c
@@ -525,7 +525,8 @@ int br_getlink(struct sk_buff *skb, u32 pid, u32 seq,
}
static int br_vlan_info(struct net_bridge *br, struct net_bridge_port *p,
- int cmd, struct bridge_vlan_info *vinfo, bool *changed)
+ int cmd, struct bridge_vlan_info *vinfo, bool *changed,
+ struct netlink_ext_ack *extack)
{
bool curr_change;
int err = 0;
@@ -537,11 +538,11 @@ static int br_vlan_info(struct net_bridge *br, struct net_bridge_port *p,
* per-VLAN entry as well
*/
err = nbp_vlan_add(p, vinfo->vid, vinfo->flags,
- &curr_change);
+ &curr_change, extack);
} else {
vinfo->flags |= BRIDGE_VLAN_INFO_BRENTRY;
err = br_vlan_add(br, vinfo->vid, vinfo->flags,
- &curr_change);
+ &curr_change, extack);
}
if (curr_change)
*changed = true;
@@ -568,7 +569,8 @@ static int br_process_vlan_info(struct net_bridge *br,
struct net_bridge_port *p, int cmd,
struct bridge_vlan_info *vinfo_curr,
struct bridge_vlan_info **vinfo_last,
- bool *changed)
+ bool *changed,
+ struct netlink_ext_ack *extack)
{
if (!vinfo_curr->vid || vinfo_curr->vid >= VLAN_VID_MASK)
return -EINVAL;
@@ -598,7 +600,8 @@ static int br_process_vlan_info(struct net_bridge *br,
sizeof(struct bridge_vlan_info));
for (v = (*vinfo_last)->vid; v <= vinfo_curr->vid; v++) {
tmp_vinfo.vid = v;
- err = br_vlan_info(br, p, cmd, &tmp_vinfo, changed);
+ err = br_vlan_info(br, p, cmd, &tmp_vinfo, changed,
+ extack);
if (err)
break;
}
@@ -607,13 +610,14 @@ static int br_process_vlan_info(struct net_bridge *br,
return err;
}
- return br_vlan_info(br, p, cmd, vinfo_curr, changed);
+ return br_vlan_info(br, p, cmd, vinfo_curr, changed, extack);
}
static int br_afspec(struct net_bridge *br,
struct net_bridge_port *p,
struct nlattr *af_spec,
- int cmd, bool *changed)
+ int cmd, bool *changed,
+ struct netlink_ext_ack *extack)
{
struct bridge_vlan_info *vinfo_curr = NULL;
struct bridge_vlan_info *vinfo_last = NULL;
@@ -643,7 +647,8 @@ static int br_afspec(struct net_bridge *br,
return -EINVAL;
vinfo_curr = nla_data(attr);
err = br_process_vlan_info(br, p, cmd, vinfo_curr,
- &vinfo_last, changed);
+ &vinfo_last, changed,
+ extack);
if (err)
return err;
break;
@@ -850,7 +855,8 @@ static int br_setport(struct net_bridge_port *p, struct nlattr *tb[])
}
/* Change state and parameters on port. */
-int br_setlink(struct net_device *dev, struct nlmsghdr *nlh, u16 flags)
+int br_setlink(struct net_device *dev, struct nlmsghdr *nlh, u16 flags,
+ struct netlink_ext_ack *extack)
{
struct net_bridge *br = (struct net_bridge *)netdev_priv(dev);
struct nlattr *tb[IFLA_BRPORT_MAX + 1];
@@ -897,7 +903,7 @@ int br_setlink(struct net_device *dev, struct nlmsghdr *nlh, u16 flags)
}
if (afspec)
- err = br_afspec(br, p, afspec, RTM_SETLINK, &changed);
+ err = br_afspec(br, p, afspec, RTM_SETLINK, &changed, extack);
if (changed)
br_ifinfo_notify(RTM_NEWLINK, br, p);
@@ -923,7 +929,7 @@ int br_dellink(struct net_device *dev, struct nlmsghdr *nlh, u16 flags)
if (!p && !(dev->priv_flags & IFF_EBRIDGE))
return -EINVAL;
- err = br_afspec(br, p, afspec, RTM_DELLINK, &changed);
+ err = br_afspec(br, p, afspec, RTM_DELLINK, &changed, NULL);
if (changed)
/* Send RTM_NEWLINK because userspace
* expects RTM_NEWLINK for vlan dels
@@ -1035,6 +1041,8 @@ static const struct nla_policy br_policy[IFLA_BR_MAX + 1] = {
[IFLA_BR_MCAST_IGMP_VERSION] = { .type = NLA_U8 },
[IFLA_BR_MCAST_MLD_VERSION] = { .type = NLA_U8 },
[IFLA_BR_VLAN_STATS_PER_PORT] = { .type = NLA_U8 },
+ [IFLA_BR_MULTI_BOOLOPT] = { .type = NLA_EXACT_LEN,
+ .len = sizeof(struct br_boolopt_multi) },
};
static int br_changelink(struct net_device *brdev, struct nlattr *tb[],
@@ -1103,7 +1111,7 @@ static int br_changelink(struct net_device *brdev, struct nlattr *tb[],
if (data[IFLA_BR_VLAN_DEFAULT_PVID]) {
__u16 defpvid = nla_get_u16(data[IFLA_BR_VLAN_DEFAULT_PVID]);
- err = __br_vlan_set_default_pvid(br, defpvid);
+ err = __br_vlan_set_default_pvid(br, defpvid, extack);
if (err)
return err;
}
@@ -1187,19 +1195,12 @@ static int br_changelink(struct net_device *brdev, struct nlattr *tb[],
return err;
}
- if (data[IFLA_BR_MCAST_HASH_ELASTICITY]) {
- u32 val = nla_get_u32(data[IFLA_BR_MCAST_HASH_ELASTICITY]);
+ if (data[IFLA_BR_MCAST_HASH_ELASTICITY])
+ br_warn(br, "the hash_elasticity option has been deprecated and is always %u\n",
+ RHT_ELASTICITY);
- br->hash_elasticity = val;
- }
-
- if (data[IFLA_BR_MCAST_HASH_MAX]) {
- u32 hash_max = nla_get_u32(data[IFLA_BR_MCAST_HASH_MAX]);
-
- err = br_multicast_set_hash_max(br, hash_max);
- if (err)
- return err;
- }
+ if (data[IFLA_BR_MCAST_HASH_MAX])
+ br->hash_max = nla_get_u32(data[IFLA_BR_MCAST_HASH_MAX]);
if (data[IFLA_BR_MCAST_LAST_MEMBER_CNT]) {
u32 val = nla_get_u32(data[IFLA_BR_MCAST_LAST_MEMBER_CNT]);
@@ -1296,6 +1297,15 @@ static int br_changelink(struct net_device *brdev, struct nlattr *tb[],
}
#endif
+ if (data[IFLA_BR_MULTI_BOOLOPT]) {
+ struct br_boolopt_multi *bm;
+
+ bm = nla_data(data[IFLA_BR_MULTI_BOOLOPT]);
+ err = br_boolopt_multi_toggle(br, bm, extack);
+ if (err)
+ return err;
+ }
+
return 0;
}
@@ -1374,6 +1384,7 @@ static size_t br_get_size(const struct net_device *brdev)
nla_total_size(sizeof(u8)) + /* IFLA_BR_NF_CALL_IP6TABLES */
nla_total_size(sizeof(u8)) + /* IFLA_BR_NF_CALL_ARPTABLES */
#endif
+ nla_total_size(sizeof(struct br_boolopt_multi)) + /* IFLA_BR_MULTI_BOOLOPT */
0;
}
@@ -1387,6 +1398,7 @@ static int br_fill_info(struct sk_buff *skb, const struct net_device *brdev)
u32 stp_enabled = br->stp_enabled;
u16 priority = (br->bridge_id.prio[0] << 8) | br->bridge_id.prio[1];
u8 vlan_enabled = br_vlan_enabled(br->dev);
+ struct br_boolopt_multi bm;
u64 clockval;
clockval = br_timer_value(&br->hello_timer);
@@ -1403,6 +1415,7 @@ static int br_fill_info(struct sk_buff *skb, const struct net_device *brdev)
if (nla_put_u64_64bit(skb, IFLA_BR_GC_TIMER, clockval, IFLA_BR_PAD))
return -EMSGSIZE;
+ br_boolopt_multi_get(br, &bm);
if (nla_put_u32(skb, IFLA_BR_FORWARD_DELAY, forward_delay) ||
nla_put_u32(skb, IFLA_BR_HELLO_TIME, hello_time) ||
nla_put_u32(skb, IFLA_BR_MAX_AGE, age_time) ||
@@ -1420,7 +1433,8 @@ static int br_fill_info(struct sk_buff *skb, const struct net_device *brdev)
nla_put_u8(skb, IFLA_BR_TOPOLOGY_CHANGE, br->topology_change) ||
nla_put_u8(skb, IFLA_BR_TOPOLOGY_CHANGE_DETECTED,
br->topology_change_detected) ||
- nla_put(skb, IFLA_BR_GROUP_ADDR, ETH_ALEN, br->group_addr))
+ nla_put(skb, IFLA_BR_GROUP_ADDR, ETH_ALEN, br->group_addr) ||
+ nla_put(skb, IFLA_BR_MULTI_BOOLOPT, sizeof(bm), &bm))
return -EMSGSIZE;
#ifdef CONFIG_BRIDGE_VLAN_FILTERING
@@ -1442,8 +1456,7 @@ static int br_fill_info(struct sk_buff *skb, const struct net_device *brdev)
br_opt_get(br, BROPT_MULTICAST_QUERIER)) ||
nla_put_u8(skb, IFLA_BR_MCAST_STATS_ENABLED,
br_opt_get(br, BROPT_MULTICAST_STATS_ENABLED)) ||
- nla_put_u32(skb, IFLA_BR_MCAST_HASH_ELASTICITY,
- br->hash_elasticity) ||
+ nla_put_u32(skb, IFLA_BR_MCAST_HASH_ELASTICITY, RHT_ELASTICITY) ||
nla_put_u32(skb, IFLA_BR_MCAST_HASH_MAX, br->hash_max) ||
nla_put_u32(skb, IFLA_BR_MCAST_LAST_MEMBER_CNT,
br->multicast_last_member_count) ||
diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h
index 2920e06a5403..d240b3e7919f 100644
--- a/net/bridge/br_private.h
+++ b/net/bridge/br_private.h
@@ -31,6 +31,8 @@
#define BR_PORT_BITS 10
#define BR_MAX_PORTS (1<<BR_PORT_BITS)
+#define BR_MULTICAST_DEFAULT_HASH_MAX 4096
+
#define BR_VERSION "2.3"
/* Control of forwarding link local multicast */
@@ -102,12 +104,18 @@ struct br_tunnel_info {
struct metadata_dst *tunnel_dst;
};
+/* private vlan flags */
+enum {
+ BR_VLFLAG_PER_PORT_STATS = BIT(0),
+};
+
/**
* struct net_bridge_vlan - per-vlan entry
*
* @vnode: rhashtable member
* @vid: VLAN id
* @flags: bridge vlan flags
+ * @priv_flags: private (in-kernel) bridge vlan flags
* @stats: per-cpu VLAN statistics
* @br: if MASTER flag set, this points to a bridge struct
* @port: if MASTER flag unset, this points to a port struct
@@ -127,6 +135,7 @@ struct net_bridge_vlan {
struct rhash_head tnode;
u16 vid;
u16 flags;
+ u16 priv_flags;
struct br_vlan_stats __percpu *stats;
union {
struct net_bridge *br;
@@ -206,23 +215,14 @@ struct net_bridge_port_group {
};
struct net_bridge_mdb_entry {
- struct hlist_node hlist[2];
+ struct rhash_head rhnode;
struct net_bridge *br;
struct net_bridge_port_group __rcu *ports;
struct rcu_head rcu;
struct timer_list timer;
struct br_ip addr;
bool host_joined;
-};
-
-struct net_bridge_mdb_htable {
- struct hlist_head *mhash;
- struct rcu_head rcu;
- struct net_bridge_mdb_htable *old;
- u32 size;
- u32 max;
- u32 secret;
- u32 ver;
+ struct hlist_node mdb_node;
};
struct net_bridge_port {
@@ -321,6 +321,7 @@ enum net_bridge_opts {
BROPT_NEIGH_SUPPRESS_ENABLED,
BROPT_MTU_SET_BY_USER,
BROPT_VLAN_STATS_PER_PORT,
+ BROPT_NO_LL_LEARN,
};
struct net_bridge {
@@ -373,7 +374,6 @@ struct net_bridge {
#ifdef CONFIG_BRIDGE_IGMP_SNOOPING
- u32 hash_elasticity;
u32 hash_max;
u32 multicast_last_member_count;
@@ -392,7 +392,9 @@ struct net_bridge {
unsigned long multicast_query_response_interval;
unsigned long multicast_startup_query_interval;
- struct net_bridge_mdb_htable __rcu *mdb;
+ struct rhashtable mdb_hash_tbl;
+
+ struct hlist_head mdb_list;
struct hlist_head router_list;
struct timer_list multicast_router_timer;
@@ -500,6 +502,14 @@ static inline int br_opt_get(const struct net_bridge *br,
return test_bit(opt, &br->options);
}
+int br_boolopt_toggle(struct net_bridge *br, enum br_boolopt_id opt, bool on,
+ struct netlink_ext_ack *extack);
+int br_boolopt_get(const struct net_bridge *br, enum br_boolopt_id opt);
+int br_boolopt_multi_toggle(struct net_bridge *br,
+ struct br_boolopt_multi *bm,
+ struct netlink_ext_ack *extack);
+void br_boolopt_multi_get(const struct net_bridge *br,
+ struct br_boolopt_multi *bm);
void br_opt_toggle(struct net_bridge *br, enum net_bridge_opts opt, bool on);
/* br_device.c */
@@ -565,6 +575,9 @@ int br_fdb_add(struct ndmsg *nlh, struct nlattr *tb[], struct net_device *dev,
const unsigned char *addr, u16 vid, u16 nlh_flags);
int br_fdb_dump(struct sk_buff *skb, struct netlink_callback *cb,
struct net_device *dev, struct net_device *fdev, int *idx);
+int br_fdb_get(struct sk_buff *skb, struct nlattr *tb[], struct net_device *dev,
+ const unsigned char *addr, u16 vid, u32 portid, u32 seq,
+ struct netlink_ext_ack *extack);
int br_fdb_sync_static(struct net_bridge *br, struct net_bridge_port *p);
void br_fdb_unsync_static(struct net_bridge *br, struct net_bridge_port *p);
int br_fdb_external_learn_add(struct net_bridge *br, struct net_bridge_port *p,
@@ -643,7 +656,6 @@ int br_ioctl_deviceless_stub(struct net *net, unsigned int cmd,
/* br_multicast.c */
#ifdef CONFIG_BRIDGE_IGMP_SNOOPING
-extern unsigned int br_mdb_rehash_seq;
int br_multicast_rcv(struct net_bridge *br, struct net_bridge_port *port,
struct sk_buff *skb, u16 vid);
struct net_bridge_mdb_entry *br_mdb_get(struct net_bridge *br,
@@ -668,17 +680,15 @@ int br_multicast_set_igmp_version(struct net_bridge *br, unsigned long val);
int br_multicast_set_mld_version(struct net_bridge *br, unsigned long val);
#endif
struct net_bridge_mdb_entry *
-br_mdb_ip_get(struct net_bridge_mdb_htable *mdb, struct br_ip *dst);
+br_mdb_ip_get(struct net_bridge *br, struct br_ip *dst);
struct net_bridge_mdb_entry *
-br_multicast_new_group(struct net_bridge *br, struct net_bridge_port *port,
- struct br_ip *group);
-void br_multicast_free_pg(struct rcu_head *head);
+br_multicast_new_group(struct net_bridge *br, struct br_ip *group);
struct net_bridge_port_group *
br_multicast_new_port_group(struct net_bridge_port *port, struct br_ip *group,
struct net_bridge_port_group __rcu *next,
unsigned char flags, const unsigned char *src);
-void br_mdb_init(void);
-void br_mdb_uninit(void);
+int br_mdb_hash_init(struct net_bridge *br);
+void br_mdb_hash_fini(struct net_bridge *br);
void br_mdb_notify(struct net_device *dev, struct net_bridge_port *port,
struct br_ip *group, int type, u8 flags);
void br_rtr_notify(struct net_device *dev, struct net_bridge_port *port,
@@ -690,6 +700,8 @@ void br_multicast_uninit_stats(struct net_bridge *br);
void br_multicast_get_stats(const struct net_bridge *br,
const struct net_bridge_port *p,
struct br_mcast_stats *dest);
+void br_mdb_init(void);
+void br_mdb_uninit(void);
#define mlock_dereference(X, br) \
rcu_dereference_protected(X, lockdep_is_held(&br->multicast_lock))
@@ -815,6 +827,15 @@ static inline void br_mdb_uninit(void)
{
}
+static inline int br_mdb_hash_init(struct net_bridge *br)
+{
+ return 0;
+}
+
+static inline void br_mdb_hash_fini(struct net_bridge *br)
+{
+}
+
static inline void br_multicast_count(struct net_bridge *br,
const struct net_bridge_port *p,
const struct sk_buff *skb,
@@ -850,7 +871,7 @@ struct sk_buff *br_handle_vlan(struct net_bridge *br,
struct net_bridge_vlan_group *vg,
struct sk_buff *skb);
int br_vlan_add(struct net_bridge *br, u16 vid, u16 flags,
- bool *changed);
+ bool *changed, struct netlink_ext_ack *extack);
int br_vlan_delete(struct net_bridge *br, u16 vid);
void br_vlan_flush(struct net_bridge *br);
struct net_bridge_vlan *br_vlan_find(struct net_bridge_vlan_group *vg, u16 vid);
@@ -863,12 +884,13 @@ int br_vlan_set_stats(struct net_bridge *br, unsigned long val);
int br_vlan_set_stats_per_port(struct net_bridge *br, unsigned long val);
int br_vlan_init(struct net_bridge *br);
int br_vlan_set_default_pvid(struct net_bridge *br, unsigned long val);
-int __br_vlan_set_default_pvid(struct net_bridge *br, u16 pvid);
+int __br_vlan_set_default_pvid(struct net_bridge *br, u16 pvid,
+ struct netlink_ext_ack *extack);
int nbp_vlan_add(struct net_bridge_port *port, u16 vid, u16 flags,
- bool *changed);
+ bool *changed, struct netlink_ext_ack *extack);
int nbp_vlan_delete(struct net_bridge_port *port, u16 vid);
void nbp_vlan_flush(struct net_bridge_port *port);
-int nbp_vlan_init(struct net_bridge_port *port);
+int nbp_vlan_init(struct net_bridge_port *port, struct netlink_ext_ack *extack);
int nbp_get_num_vlan_infos(struct net_bridge_port *p, u32 filter_mask);
void br_vlan_get_stats(const struct net_bridge_vlan *v,
struct br_vlan_stats *stats);
@@ -905,7 +927,7 @@ static inline int br_vlan_get_tag(const struct sk_buff *skb, u16 *vid)
int err = 0;
if (skb_vlan_tag_present(skb)) {
- *vid = skb_vlan_tag_get(skb) & VLAN_VID_MASK;
+ *vid = skb_vlan_tag_get_id(skb);
} else {
*vid = 0;
err = -EINVAL;
@@ -953,7 +975,7 @@ static inline struct sk_buff *br_handle_vlan(struct net_bridge *br,
}
static inline int br_vlan_add(struct net_bridge *br, u16 vid, u16 flags,
- bool *changed)
+ bool *changed, struct netlink_ext_ack *extack)
{
*changed = false;
return -EOPNOTSUPP;
@@ -978,7 +1000,7 @@ static inline int br_vlan_init(struct net_bridge *br)
}
static inline int nbp_vlan_add(struct net_bridge_port *port, u16 vid, u16 flags,
- bool *changed)
+ bool *changed, struct netlink_ext_ack *extack)
{
*changed = false;
return -EOPNOTSUPP;
@@ -999,7 +1021,8 @@ static inline struct net_bridge_vlan *br_vlan_find(struct net_bridge_vlan_group
return NULL;
}
-static inline int nbp_vlan_init(struct net_bridge_port *port)
+static inline int nbp_vlan_init(struct net_bridge_port *port,
+ struct netlink_ext_ack *extack)
{
return 0;
}
@@ -1120,7 +1143,8 @@ int br_netlink_init(void);
void br_netlink_fini(void);
void br_ifinfo_notify(int event, const struct net_bridge *br,
const struct net_bridge_port *port);
-int br_setlink(struct net_device *dev, struct nlmsghdr *nlmsg, u16 flags);
+int br_setlink(struct net_device *dev, struct nlmsghdr *nlmsg, u16 flags,
+ struct netlink_ext_ack *extack);
int br_dellink(struct net_device *dev, struct nlmsghdr *nlmsg, u16 flags);
int br_getlink(struct sk_buff *skb, u32 pid, u32 seq, struct net_device *dev,
u32 filter_mask, int nlflags);
@@ -1155,7 +1179,8 @@ int br_switchdev_set_port_flag(struct net_bridge_port *p,
unsigned long mask);
void br_switchdev_fdb_notify(const struct net_bridge_fdb_entry *fdb,
int type);
-int br_switchdev_port_vlan_add(struct net_device *dev, u16 vid, u16 flags);
+int br_switchdev_port_vlan_add(struct net_device *dev, u16 vid, u16 flags,
+ struct netlink_ext_ack *extack);
int br_switchdev_port_vlan_del(struct net_device *dev, u16 vid);
static inline void br_switchdev_frame_unmark(struct sk_buff *skb)
@@ -1187,7 +1212,8 @@ static inline int br_switchdev_set_port_flag(struct net_bridge_port *p,
}
static inline int br_switchdev_port_vlan_add(struct net_device *dev,
- u16 vid, u16 flags)
+ u16 vid, u16 flags,
+ struct netlink_ext_ack *extack)
{
return -EOPNOTSUPP;
}
diff --git a/net/bridge/br_switchdev.c b/net/bridge/br_switchdev.c
index b993df770675..035ff59d9cbd 100644
--- a/net/bridge/br_switchdev.c
+++ b/net/bridge/br_switchdev.c
@@ -140,7 +140,8 @@ br_switchdev_fdb_notify(const struct net_bridge_fdb_entry *fdb, int type)
}
}
-int br_switchdev_port_vlan_add(struct net_device *dev, u16 vid, u16 flags)
+int br_switchdev_port_vlan_add(struct net_device *dev, u16 vid, u16 flags,
+ struct netlink_ext_ack *extack)
{
struct switchdev_obj_port_vlan v = {
.obj.orig_dev = dev,
@@ -150,7 +151,7 @@ int br_switchdev_port_vlan_add(struct net_device *dev, u16 vid, u16 flags)
.vid_end = vid,
};
- return switchdev_port_obj_add(dev, &v.obj);
+ return switchdev_port_obj_add(dev, &v.obj, extack);
}
int br_switchdev_port_vlan_del(struct net_device *dev, u16 vid)
diff --git a/net/bridge/br_sysfs_br.c b/net/bridge/br_sysfs_br.c
index 60182bef6341..b05b94e9c595 100644
--- a/net/bridge/br_sysfs_br.c
+++ b/net/bridge/br_sysfs_br.c
@@ -328,6 +328,27 @@ static ssize_t flush_store(struct device *d,
}
static DEVICE_ATTR_WO(flush);
+static ssize_t no_linklocal_learn_show(struct device *d,
+ struct device_attribute *attr,
+ char *buf)
+{
+ struct net_bridge *br = to_bridge(d);
+ return sprintf(buf, "%d\n", br_boolopt_get(br, BR_BOOLOPT_NO_LL_LEARN));
+}
+
+static int set_no_linklocal_learn(struct net_bridge *br, unsigned long val)
+{
+ return br_boolopt_toggle(br, BR_BOOLOPT_NO_LL_LEARN, !!val, NULL);
+}
+
+static ssize_t no_linklocal_learn_store(struct device *d,
+ struct device_attribute *attr,
+ const char *buf, size_t len)
+{
+ return store_bridge_parm(d, buf, len, set_no_linklocal_learn);
+}
+static DEVICE_ATTR_RW(no_linklocal_learn);
+
#ifdef CONFIG_BRIDGE_IGMP_SNOOPING
static ssize_t multicast_router_show(struct device *d,
struct device_attribute *attr, char *buf)
@@ -403,13 +424,13 @@ static DEVICE_ATTR_RW(multicast_querier);
static ssize_t hash_elasticity_show(struct device *d,
struct device_attribute *attr, char *buf)
{
- struct net_bridge *br = to_bridge(d);
- return sprintf(buf, "%u\n", br->hash_elasticity);
+ return sprintf(buf, "%u\n", RHT_ELASTICITY);
}
static int set_elasticity(struct net_bridge *br, unsigned long val)
{
- br->hash_elasticity = val;
+ br_warn(br, "the hash_elasticity option has been deprecated and is always %u\n",
+ RHT_ELASTICITY);
return 0;
}
@@ -428,10 +449,16 @@ static ssize_t hash_max_show(struct device *d, struct device_attribute *attr,
return sprintf(buf, "%u\n", br->hash_max);
}
+static int set_hash_max(struct net_bridge *br, unsigned long val)
+{
+ br->hash_max = val;
+ return 0;
+}
+
static ssize_t hash_max_store(struct device *d, struct device_attribute *attr,
const char *buf, size_t len)
{
- return store_bridge_parm(d, buf, len, br_multicast_set_hash_max);
+ return store_bridge_parm(d, buf, len, set_hash_max);
}
static DEVICE_ATTR_RW(hash_max);
@@ -841,6 +868,7 @@ static struct attribute *bridge_attrs[] = {
&dev_attr_gc_timer.attr,
&dev_attr_group_addr.attr,
&dev_attr_flush.attr,
+ &dev_attr_no_linklocal_learn.attr,
#ifdef CONFIG_BRIDGE_IGMP_SNOOPING
&dev_attr_multicast_router.attr,
&dev_attr_multicast_snooping.attr,
diff --git a/net/bridge/br_sysfs_if.c b/net/bridge/br_sysfs_if.c
index 7c87a2fe5248..88715edb119a 100644
--- a/net/bridge/br_sysfs_if.c
+++ b/net/bridge/br_sysfs_if.c
@@ -320,9 +320,6 @@ static ssize_t brport_store(struct kobject *kobj,
if (!rtnl_trylock())
return restart_syscall();
- if (!p->dev || !p->br)
- goto out_unlock;
-
if (brport_attr->store_raw) {
char *buf_copy;
diff --git a/net/bridge/br_vlan.c b/net/bridge/br_vlan.c
index 8c9297a01947..4a2f31157ef5 100644
--- a/net/bridge/br_vlan.c
+++ b/net/bridge/br_vlan.c
@@ -80,14 +80,14 @@ static bool __vlan_add_flags(struct net_bridge_vlan *v, u16 flags)
}
static int __vlan_vid_add(struct net_device *dev, struct net_bridge *br,
- u16 vid, u16 flags)
+ u16 vid, u16 flags, struct netlink_ext_ack *extack)
{
int err;
/* Try switchdev op first. In case it is not supported, fallback to
* 8021q add.
*/
- err = br_switchdev_port_vlan_add(dev, vid, flags);
+ err = br_switchdev_port_vlan_add(dev, vid, flags, extack);
if (err == -EOPNOTSUPP)
return vlan_vid_add(dev, br->vlan_proto, vid);
return err;
@@ -139,7 +139,9 @@ static int __vlan_vid_del(struct net_device *dev, struct net_bridge *br,
/* Returns a master vlan, if it didn't exist it gets created. In all cases a
* a reference is taken to the master vlan before returning.
*/
-static struct net_bridge_vlan *br_vlan_get_master(struct net_bridge *br, u16 vid)
+static struct net_bridge_vlan *
+br_vlan_get_master(struct net_bridge *br, u16 vid,
+ struct netlink_ext_ack *extack)
{
struct net_bridge_vlan_group *vg;
struct net_bridge_vlan *masterv;
@@ -150,7 +152,7 @@ static struct net_bridge_vlan *br_vlan_get_master(struct net_bridge *br, u16 vid
bool changed;
/* missing global ctx, create it now */
- if (br_vlan_add(br, vid, 0, &changed))
+ if (br_vlan_add(br, vid, 0, &changed, extack))
return NULL;
masterv = br_vlan_find(vg, vid);
if (WARN_ON(!masterv))
@@ -197,7 +199,7 @@ static void nbp_vlan_rcu_free(struct rcu_head *rcu)
v = container_of(rcu, struct net_bridge_vlan, rcu);
WARN_ON(br_vlan_is_master(v));
/* if we had per-port stats configured then free them here */
- if (v->brvlan->stats != v->stats)
+ if (v->priv_flags & BR_VLFLAG_PER_PORT_STATS)
free_percpu(v->stats);
v->stats = NULL;
kfree(v);
@@ -214,7 +216,8 @@ static void nbp_vlan_rcu_free(struct rcu_head *rcu)
* 4. same as 3 but with both master and brentry flags set so the entry
* will be used for filtering in both the port and the bridge
*/
-static int __vlan_add(struct net_bridge_vlan *v, u16 flags)
+static int __vlan_add(struct net_bridge_vlan *v, u16 flags,
+ struct netlink_ext_ack *extack)
{
struct net_bridge_vlan *masterv = NULL;
struct net_bridge_port *p = NULL;
@@ -239,7 +242,7 @@ static int __vlan_add(struct net_bridge_vlan *v, u16 flags)
* This ensures tagged traffic enters the bridge when
* promiscuous mode is disabled by br_manage_promisc().
*/
- err = __vlan_vid_add(dev, br, v->vid, flags);
+ err = __vlan_vid_add(dev, br, v->vid, flags, extack);
if (err)
goto out;
@@ -249,12 +252,12 @@ static int __vlan_add(struct net_bridge_vlan *v, u16 flags)
err = br_vlan_add(br, v->vid,
flags | BRIDGE_VLAN_INFO_BRENTRY,
- &changed);
+ &changed, extack);
if (err)
goto out_filt;
}
- masterv = br_vlan_get_master(br, v->vid);
+ masterv = br_vlan_get_master(br, v->vid, extack);
if (!masterv)
goto out_filt;
v->brvlan = masterv;
@@ -264,11 +267,12 @@ static int __vlan_add(struct net_bridge_vlan *v, u16 flags)
err = -ENOMEM;
goto out_filt;
}
+ v->priv_flags |= BR_VLFLAG_PER_PORT_STATS;
} else {
v->stats = masterv->stats;
}
} else {
- err = br_switchdev_port_vlan_add(dev, v->vid, flags);
+ err = br_switchdev_port_vlan_add(dev, v->vid, flags, extack);
if (err && err != -EOPNOTSUPP)
goto out;
}
@@ -420,7 +424,7 @@ struct sk_buff *br_handle_vlan(struct net_bridge *br,
}
if (v->flags & BRIDGE_VLAN_INFO_UNTAGGED)
- skb->vlan_tci = 0;
+ __vlan_hwaccel_clear_tag(skb);
if (p && (p->flags & BR_VLAN_TUNNEL) &&
br_handle_egress_vlan_tunnel(skb, v)) {
@@ -493,8 +497,8 @@ static bool __allowed_ingress(const struct net_bridge *br,
__vlan_hwaccel_put_tag(skb, br->vlan_proto, pvid);
else
/* Priority-tagged Frame.
- * At this point, We know that skb->vlan_tci had
- * VLAN_TAG_PRESENT bit and its VID field was 0x000.
+ * At this point, we know that skb->vlan_tci VID
+ * field was 0.
* We update only VID field and preserve PCP field.
*/
skb->vlan_tci |= pvid;
@@ -590,11 +594,12 @@ bool br_should_learn(struct net_bridge_port *p, struct sk_buff *skb, u16 *vid)
static int br_vlan_add_existing(struct net_bridge *br,
struct net_bridge_vlan_group *vg,
struct net_bridge_vlan *vlan,
- u16 flags, bool *changed)
+ u16 flags, bool *changed,
+ struct netlink_ext_ack *extack)
{
int err;
- err = br_switchdev_port_vlan_add(br->dev, vlan->vid, flags);
+ err = br_switchdev_port_vlan_add(br->dev, vlan->vid, flags, extack);
if (err && err != -EOPNOTSUPP)
return err;
@@ -633,7 +638,8 @@ err_flags:
* Must be called with vid in range from 1 to 4094 inclusive.
* changed must be true only if the vlan was created or updated
*/
-int br_vlan_add(struct net_bridge *br, u16 vid, u16 flags, bool *changed)
+int br_vlan_add(struct net_bridge *br, u16 vid, u16 flags, bool *changed,
+ struct netlink_ext_ack *extack)
{
struct net_bridge_vlan_group *vg;
struct net_bridge_vlan *vlan;
@@ -645,7 +651,8 @@ int br_vlan_add(struct net_bridge *br, u16 vid, u16 flags, bool *changed)
vg = br_vlan_group(br);
vlan = br_vlan_find(vg, vid);
if (vlan)
- return br_vlan_add_existing(br, vg, vlan, flags, changed);
+ return br_vlan_add_existing(br, vg, vlan, flags, changed,
+ extack);
vlan = kzalloc(sizeof(*vlan), GFP_KERNEL);
if (!vlan)
@@ -662,7 +669,7 @@ int br_vlan_add(struct net_bridge *br, u16 vid, u16 flags, bool *changed)
vlan->br = br;
if (flags & BRIDGE_VLAN_INFO_BRENTRY)
refcount_set(&vlan->refcnt, 1);
- ret = __vlan_add(vlan, flags);
+ ret = __vlan_add(vlan, flags, extack);
if (ret) {
free_percpu(vlan->stats);
kfree(vlan);
@@ -913,7 +920,8 @@ static void br_vlan_disable_default_pvid(struct net_bridge *br)
br->default_pvid = 0;
}
-int __br_vlan_set_default_pvid(struct net_bridge *br, u16 pvid)
+int __br_vlan_set_default_pvid(struct net_bridge *br, u16 pvid,
+ struct netlink_ext_ack *extack)
{
const struct net_bridge_vlan *pvent;
struct net_bridge_vlan_group *vg;
@@ -945,7 +953,7 @@ int __br_vlan_set_default_pvid(struct net_bridge *br, u16 pvid)
BRIDGE_VLAN_INFO_PVID |
BRIDGE_VLAN_INFO_UNTAGGED |
BRIDGE_VLAN_INFO_BRENTRY,
- &vlchange);
+ &vlchange, extack);
if (err)
goto out;
br_vlan_delete(br, old_pvid);
@@ -965,7 +973,7 @@ int __br_vlan_set_default_pvid(struct net_bridge *br, u16 pvid)
err = nbp_vlan_add(p, pvid,
BRIDGE_VLAN_INFO_PVID |
BRIDGE_VLAN_INFO_UNTAGGED,
- &vlchange);
+ &vlchange, extack);
if (err)
goto err_port;
nbp_vlan_delete(p, old_pvid);
@@ -987,7 +995,7 @@ err_port:
nbp_vlan_add(p, old_pvid,
BRIDGE_VLAN_INFO_PVID |
BRIDGE_VLAN_INFO_UNTAGGED,
- &vlchange);
+ &vlchange, NULL);
nbp_vlan_delete(p, pvid);
}
@@ -997,7 +1005,7 @@ err_port:
BRIDGE_VLAN_INFO_PVID |
BRIDGE_VLAN_INFO_UNTAGGED |
BRIDGE_VLAN_INFO_BRENTRY,
- &vlchange);
+ &vlchange, NULL);
br_vlan_delete(br, pvid);
}
goto out;
@@ -1020,7 +1028,7 @@ int br_vlan_set_default_pvid(struct net_bridge *br, unsigned long val)
err = -EPERM;
goto out;
}
- err = __br_vlan_set_default_pvid(br, pvid);
+ err = __br_vlan_set_default_pvid(br, pvid, NULL);
out:
return err;
}
@@ -1046,7 +1054,7 @@ int br_vlan_init(struct net_bridge *br)
rcu_assign_pointer(br->vlgrp, vg);
ret = br_vlan_add(br, 1,
BRIDGE_VLAN_INFO_PVID | BRIDGE_VLAN_INFO_UNTAGGED |
- BRIDGE_VLAN_INFO_BRENTRY, &changed);
+ BRIDGE_VLAN_INFO_BRENTRY, &changed, NULL);
if (ret)
goto err_vlan_add;
@@ -1063,7 +1071,7 @@ err_rhtbl:
goto out;
}
-int nbp_vlan_init(struct net_bridge_port *p)
+int nbp_vlan_init(struct net_bridge_port *p, struct netlink_ext_ack *extack)
{
struct switchdev_attr attr = {
.orig_dev = p->br->dev,
@@ -1096,7 +1104,7 @@ int nbp_vlan_init(struct net_bridge_port *p)
ret = nbp_vlan_add(p, p->br->default_pvid,
BRIDGE_VLAN_INFO_PVID |
BRIDGE_VLAN_INFO_UNTAGGED,
- &changed);
+ &changed, extack);
if (ret)
goto err_vlan_add;
}
@@ -1121,7 +1129,7 @@ err_vlan_enabled:
* changed must be true only if the vlan was created or updated
*/
int nbp_vlan_add(struct net_bridge_port *port, u16 vid, u16 flags,
- bool *changed)
+ bool *changed, struct netlink_ext_ack *extack)
{
struct net_bridge_vlan *vlan;
int ret;
@@ -1132,7 +1140,7 @@ int nbp_vlan_add(struct net_bridge_port *port, u16 vid, u16 flags,
vlan = br_vlan_find(nbp_vlan_group(port), vid);
if (vlan) {
/* Pass the flags to the hardware bridge */
- ret = br_switchdev_port_vlan_add(port->dev, vid, flags);
+ ret = br_switchdev_port_vlan_add(port->dev, vid, flags, extack);
if (ret && ret != -EOPNOTSUPP)
return ret;
*changed = __vlan_add_flags(vlan, flags);
@@ -1146,7 +1154,7 @@ int nbp_vlan_add(struct net_bridge_port *port, u16 vid, u16 flags,
vlan->vid = vid;
vlan->port = port;
- ret = __vlan_add(vlan, flags);
+ ret = __vlan_add(vlan, flags, extack);
if (ret)
kfree(vlan);
else
@@ -1216,9 +1224,13 @@ void br_vlan_get_stats(const struct net_bridge_vlan *v,
int br_vlan_get_pvid(const struct net_device *dev, u16 *p_pvid)
{
struct net_bridge_vlan_group *vg;
+ struct net_bridge_port *p;
ASSERT_RTNL();
- if (netif_is_bridge_master(dev))
+ p = br_port_get_check_rtnl(dev);
+ if (p)
+ vg = nbp_vlan_group(p);
+ else if (netif_is_bridge_master(dev))
vg = br_vlan_group(netdev_priv(dev));
else
return -EINVAL;
diff --git a/net/can/raw.c b/net/can/raw.c
index 1051eee82581..3aab7664933f 100644
--- a/net/can/raw.c
+++ b/net/can/raw.c
@@ -745,18 +745,19 @@ static int raw_sendmsg(struct socket *sock, struct msghdr *msg, size_t size)
} else
ifindex = ro->ifindex;
- if (ro->fd_frames) {
+ dev = dev_get_by_index(sock_net(sk), ifindex);
+ if (!dev)
+ return -ENXIO;
+
+ err = -EINVAL;
+ if (ro->fd_frames && dev->mtu == CANFD_MTU) {
if (unlikely(size != CANFD_MTU && size != CAN_MTU))
- return -EINVAL;
+ goto put_dev;
} else {
if (unlikely(size != CAN_MTU))
- return -EINVAL;
+ goto put_dev;
}
- dev = dev_get_by_index(sock_net(sk), ifindex);
- if (!dev)
- return -ENXIO;
-
skb = sock_alloc_send_skb(sk, size + sizeof(struct can_skb_priv),
msg->msg_flags & MSG_DONTWAIT, &err);
if (!skb)
diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c
index 57fcc6b4bf6e..2f126eff275d 100644
--- a/net/ceph/messenger.c
+++ b/net/ceph/messenger.c
@@ -580,9 +580,15 @@ static int ceph_tcp_sendpage(struct socket *sock, struct page *page,
struct bio_vec bvec;
int ret;
- /* sendpage cannot properly handle pages with page_count == 0,
- * we need to fallback to sendmsg if that's the case */
- if (page_count(page) >= 1)
+ /*
+ * sendpage cannot properly handle pages with page_count == 0,
+ * we need to fall back to sendmsg if that's the case.
+ *
+ * Same goes for slab pages: skb_can_coalesce() allows
+ * coalescing neighboring slab objects into a single frag which
+ * triggers one of hardened usercopy checks.
+ */
+ if (page_count(page) >= 1 && !PageSlab(page))
return __ceph_tcp_sendpage(sock, page, offset, size, more);
bvec.bv_page = page;
diff --git a/net/core/datagram.c b/net/core/datagram.c
index 57f3a6fcfc1e..4bf62b1afa3b 100644
--- a/net/core/datagram.c
+++ b/net/core/datagram.c
@@ -728,49 +728,6 @@ fault:
return -EFAULT;
}
-__sum16 __skb_checksum_complete_head(struct sk_buff *skb, int len)
-{
- __sum16 sum;
-
- sum = csum_fold(skb_checksum(skb, 0, len, skb->csum));
- if (likely(!sum)) {
- if (unlikely(skb->ip_summed == CHECKSUM_COMPLETE) &&
- !skb->csum_complete_sw)
- netdev_rx_csum_fault(skb->dev);
- }
- if (!skb_shared(skb))
- skb->csum_valid = !sum;
- return sum;
-}
-EXPORT_SYMBOL(__skb_checksum_complete_head);
-
-__sum16 __skb_checksum_complete(struct sk_buff *skb)
-{
- __wsum csum;
- __sum16 sum;
-
- csum = skb_checksum(skb, 0, skb->len, 0);
-
- /* skb->csum holds pseudo checksum */
- sum = csum_fold(csum_add(skb->csum, csum));
- if (likely(!sum)) {
- if (unlikely(skb->ip_summed == CHECKSUM_COMPLETE) &&
- !skb->csum_complete_sw)
- netdev_rx_csum_fault(skb->dev);
- }
-
- if (!skb_shared(skb)) {
- /* Save full packet checksum */
- skb->csum = csum;
- skb->ip_summed = CHECKSUM_COMPLETE;
- skb->csum_complete_sw = 1;
- skb->csum_valid = !sum;
- }
-
- return sum;
-}
-EXPORT_SYMBOL(__skb_checksum_complete);
-
/**
* skb_copy_and_csum_datagram_msg - Copy and checksum skb to user iovec.
* @skb: skbuff
@@ -810,7 +767,7 @@ int skb_copy_and_csum_datagram_msg(struct sk_buff *skb,
if (unlikely(skb->ip_summed == CHECKSUM_COMPLETE) &&
!skb->csum_complete_sw)
- netdev_rx_csum_fault(NULL);
+ netdev_rx_csum_fault(NULL, skb);
}
return 0;
fault:
diff --git a/net/core/dev.c b/net/core/dev.c
index 0ffcbdd55fa9..1b5a4410be0e 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -145,6 +145,7 @@
#include <linux/sctp.h>
#include <net/udp_tunnel.h>
#include <linux/net_namespace.h>
+#include <linux/indirect_call_wrapper.h>
#include "net-sysfs.h"
@@ -162,6 +163,9 @@ static struct list_head offload_base __read_mostly;
static int netif_rx_internal(struct sk_buff *skb);
static int call_netdevice_notifiers_info(unsigned long val,
struct netdev_notifier_info *info);
+static int call_netdevice_notifiers_extack(unsigned long val,
+ struct net_device *dev,
+ struct netlink_ext_ack *extack);
static struct napi_struct *napi_by_id(unsigned int napi_id);
/*
@@ -1361,7 +1365,7 @@ void netdev_notify_peers(struct net_device *dev)
}
EXPORT_SYMBOL(netdev_notify_peers);
-static int __dev_open(struct net_device *dev)
+static int __dev_open(struct net_device *dev, struct netlink_ext_ack *extack)
{
const struct net_device_ops *ops = dev->netdev_ops;
int ret;
@@ -1377,7 +1381,7 @@ static int __dev_open(struct net_device *dev)
*/
netpoll_poll_disable(dev);
- ret = call_netdevice_notifiers(NETDEV_PRE_UP, dev);
+ ret = call_netdevice_notifiers_extack(NETDEV_PRE_UP, dev, extack);
ret = notifier_to_errno(ret);
if (ret)
return ret;
@@ -1406,7 +1410,8 @@ static int __dev_open(struct net_device *dev)
/**
* dev_open - prepare an interface for use.
- * @dev: device to open
+ * @dev: device to open
+ * @extack: netlink extended ack
*
* Takes a device from down to up state. The device's private open
* function is invoked and then the multicast lists are loaded. Finally
@@ -1416,14 +1421,14 @@ static int __dev_open(struct net_device *dev)
* Calling this function on an active interface is a nop. On a failure
* a negative errno code is returned.
*/
-int dev_open(struct net_device *dev)
+int dev_open(struct net_device *dev, struct netlink_ext_ack *extack)
{
int ret;
if (dev->flags & IFF_UP)
return 0;
- ret = __dev_open(dev);
+ ret = __dev_open(dev, extack);
if (ret < 0)
return ret;
@@ -1585,6 +1590,7 @@ const char *netdev_cmd_to_name(enum netdev_cmd cmd)
N(UDP_TUNNEL_DROP_INFO) N(CHANGE_TX_QUEUE_LEN)
N(CVLAN_FILTER_PUSH_INFO) N(CVLAN_FILTER_DROP_INFO)
N(SVLAN_FILTER_PUSH_INFO) N(SVLAN_FILTER_DROP_INFO)
+ N(PRE_CHANGEADDR)
}
#undef N
return "UNKNOWN_NETDEV_EVENT";
@@ -1733,6 +1739,18 @@ static int call_netdevice_notifiers_info(unsigned long val,
return raw_notifier_call_chain(&netdev_chain, val, info);
}
+static int call_netdevice_notifiers_extack(unsigned long val,
+ struct net_device *dev,
+ struct netlink_ext_ack *extack)
+{
+ struct netdev_notifier_info info = {
+ .dev = dev,
+ .extack = extack,
+ };
+
+ return call_netdevice_notifiers_info(val, &info);
+}
+
/**
* call_netdevice_notifiers - call all network notifier blocks
* @val: value passed unmodified to notifier function
@@ -1744,11 +1762,7 @@ static int call_netdevice_notifiers_info(unsigned long val,
int call_netdevice_notifiers(unsigned long val, struct net_device *dev)
{
- struct netdev_notifier_info info = {
- .dev = dev,
- };
-
- return call_netdevice_notifiers_info(val, &info);
+ return call_netdevice_notifiers_extack(val, dev, NULL);
}
EXPORT_SYMBOL(call_netdevice_notifiers);
@@ -2175,6 +2189,20 @@ static bool remove_xps_queue_cpu(struct net_device *dev,
return active;
}
+static void reset_xps_maps(struct net_device *dev,
+ struct xps_dev_maps *dev_maps,
+ bool is_rxqs_map)
+{
+ if (is_rxqs_map) {
+ static_key_slow_dec_cpuslocked(&xps_rxqs_needed);
+ RCU_INIT_POINTER(dev->xps_rxqs_map, NULL);
+ } else {
+ RCU_INIT_POINTER(dev->xps_cpus_map, NULL);
+ }
+ static_key_slow_dec_cpuslocked(&xps_needed);
+ kfree_rcu(dev_maps, rcu);
+}
+
static void clean_xps_maps(struct net_device *dev, const unsigned long *mask,
struct xps_dev_maps *dev_maps, unsigned int nr_ids,
u16 offset, u16 count, bool is_rxqs_map)
@@ -2186,18 +2214,15 @@ static void clean_xps_maps(struct net_device *dev, const unsigned long *mask,
j < nr_ids;)
active |= remove_xps_queue_cpu(dev, dev_maps, j, offset,
count);
- if (!active) {
- if (is_rxqs_map) {
- RCU_INIT_POINTER(dev->xps_rxqs_map, NULL);
- } else {
- RCU_INIT_POINTER(dev->xps_cpus_map, NULL);
+ if (!active)
+ reset_xps_maps(dev, dev_maps, is_rxqs_map);
- for (i = offset + (count - 1); count--; i--)
- netdev_queue_numa_node_write(
- netdev_get_tx_queue(dev, i),
- NUMA_NO_NODE);
+ if (!is_rxqs_map) {
+ for (i = offset + (count - 1); count--; i--) {
+ netdev_queue_numa_node_write(
+ netdev_get_tx_queue(dev, i),
+ NUMA_NO_NODE);
}
- kfree_rcu(dev_maps, rcu);
}
}
@@ -2234,10 +2259,6 @@ static void netif_reset_xps_queues(struct net_device *dev, u16 offset,
false);
out_no_maps:
- if (static_key_enabled(&xps_rxqs_needed))
- static_key_slow_dec_cpuslocked(&xps_rxqs_needed);
-
- static_key_slow_dec_cpuslocked(&xps_needed);
mutex_unlock(&xps_map_mutex);
cpus_read_unlock();
}
@@ -2355,9 +2376,12 @@ int __netif_set_xps_queue(struct net_device *dev, const unsigned long *mask,
if (!new_dev_maps)
goto out_no_new_maps;
- static_key_slow_inc_cpuslocked(&xps_needed);
- if (is_rxqs_map)
- static_key_slow_inc_cpuslocked(&xps_rxqs_needed);
+ if (!dev_maps) {
+ /* Increment static keys at most once per type */
+ static_key_slow_inc_cpuslocked(&xps_needed);
+ if (is_rxqs_map)
+ static_key_slow_inc_cpuslocked(&xps_rxqs_needed);
+ }
for (j = -1; j = netif_attrmask_next(j, possible_mask, nr_ids),
j < nr_ids;) {
@@ -2455,13 +2479,8 @@ out_no_new_maps:
}
/* free map if not active */
- if (!active) {
- if (is_rxqs_map)
- RCU_INIT_POINTER(dev->xps_rxqs_map, NULL);
- else
- RCU_INIT_POINTER(dev->xps_cpus_map, NULL);
- kfree_rcu(dev_maps, rcu);
- }
+ if (!active)
+ reset_xps_maps(dev, dev_maps, is_rxqs_map);
out_no_maps:
mutex_unlock(&xps_map_mutex);
@@ -3091,10 +3110,17 @@ EXPORT_SYMBOL(__skb_gso_segment);
/* Take action when hardware reception checksum errors are detected. */
#ifdef CONFIG_BUG
-void netdev_rx_csum_fault(struct net_device *dev)
+void netdev_rx_csum_fault(struct net_device *dev, struct sk_buff *skb)
{
if (net_ratelimit()) {
pr_err("%s: hw csum failure\n", dev ? dev->name : "<unknown>");
+ if (dev)
+ pr_err("dev features: %pNF\n", &dev->features);
+ pr_err("skb len=%u data_len=%u pkt_type=%u gso_size=%u gso_type=%u nr_frags=%u ip_summed=%u csum=%x csum_complete_sw=%d csum_valid=%d csum_level=%u\n",
+ skb->len, skb->data_len, skb->pkt_type,
+ skb_shinfo(skb)->gso_size, skb_shinfo(skb)->gso_type,
+ skb_shinfo(skb)->nr_frags, skb->ip_summed, skb->csum,
+ skb->csum_complete_sw, skb->csum_valid, skb->csum_level);
dump_stack();
}
}
@@ -4520,9 +4546,14 @@ static int netif_rx_internal(struct sk_buff *skb)
int netif_rx(struct sk_buff *skb)
{
+ int ret;
+
trace_netif_rx_entry(skb);
- return netif_rx_internal(skb);
+ ret = netif_rx_internal(skb);
+ trace_netif_rx_exit(ret);
+
+ return ret;
}
EXPORT_SYMBOL(netif_rx);
@@ -4537,6 +4568,7 @@ int netif_rx_ni(struct sk_buff *skb)
if (local_softirq_pending())
do_softirq();
preempt_enable();
+ trace_netif_rx_ni_exit(err);
return err;
}
@@ -4889,7 +4921,7 @@ skip_classify:
* and set skb->priority like in vlan_do_receive()
* For the time being, just ignore Priority Code Point
*/
- skb->vlan_tci = 0;
+ __vlan_hwaccel_clear_tag(skb);
}
type = skb->protocol;
@@ -5009,7 +5041,7 @@ static void __netif_receive_skb_list_core(struct list_head *head, bool pfmemallo
struct net_device *orig_dev = skb->dev;
struct packet_type *pt_prev = NULL;
- list_del(&skb->list);
+ skb_list_del_init(skb);
__netif_receive_skb_core(skb, pfmemalloc, &pt_prev);
if (!pt_prev)
continue;
@@ -5165,7 +5197,7 @@ static void netif_receive_skb_list_internal(struct list_head *head)
INIT_LIST_HEAD(&sublist);
list_for_each_entry_safe(skb, next, head, list) {
net_timestamp_check(netdev_tstamp_prequeue, skb);
- list_del(&skb->list);
+ skb_list_del_init(skb);
if (!skb_defer_rx_timestamp(skb))
list_add_tail(&skb->list, &sublist);
}
@@ -5176,7 +5208,7 @@ static void netif_receive_skb_list_internal(struct list_head *head)
rcu_read_lock();
list_for_each_entry_safe(skb, next, head, list) {
xdp_prog = rcu_dereference(skb->dev->xdp_prog);
- list_del(&skb->list);
+ skb_list_del_init(skb);
if (do_xdp_generic(xdp_prog, skb) == XDP_PASS)
list_add_tail(&skb->list, &sublist);
}
@@ -5195,7 +5227,7 @@ static void netif_receive_skb_list_internal(struct list_head *head)
if (cpu >= 0) {
/* Will be handled, remove from list */
- list_del(&skb->list);
+ skb_list_del_init(skb);
enqueue_to_backlog(skb, cpu, &rflow->last_qtail);
}
}
@@ -5222,9 +5254,14 @@ static void netif_receive_skb_list_internal(struct list_head *head)
*/
int netif_receive_skb(struct sk_buff *skb)
{
+ int ret;
+
trace_netif_receive_skb_entry(skb);
- return netif_receive_skb_internal(skb);
+ ret = netif_receive_skb_internal(skb);
+ trace_netif_receive_skb_exit(ret);
+
+ return ret;
}
EXPORT_SYMBOL(netif_receive_skb);
@@ -5244,9 +5281,12 @@ void netif_receive_skb_list(struct list_head *head)
if (list_empty(head))
return;
- list_for_each_entry(skb, head, list)
- trace_netif_receive_skb_list_entry(skb);
+ if (trace_netif_receive_skb_list_entry_enabled()) {
+ list_for_each_entry(skb, head, list)
+ trace_netif_receive_skb_list_entry(skb);
+ }
netif_receive_skb_list_internal(head);
+ trace_netif_receive_skb_list_exit(0);
}
EXPORT_SYMBOL(netif_receive_skb_list);
@@ -5299,6 +5339,8 @@ static void flush_all_backlogs(void)
put_online_cpus();
}
+INDIRECT_CALLABLE_DECLARE(int inet_gro_complete(struct sk_buff *, int));
+INDIRECT_CALLABLE_DECLARE(int ipv6_gro_complete(struct sk_buff *, int));
static int napi_gro_complete(struct sk_buff *skb)
{
struct packet_offload *ptype;
@@ -5318,7 +5360,9 @@ static int napi_gro_complete(struct sk_buff *skb)
if (ptype->type != type || !ptype->callbacks.gro_complete)
continue;
- err = ptype->callbacks.gro_complete(skb, 0);
+ err = INDIRECT_CALL_INET(ptype->callbacks.gro_complete,
+ ipv6_gro_complete, inet_gro_complete,
+ skb, 0);
break;
}
rcu_read_unlock();
@@ -5357,11 +5401,13 @@ static void __napi_gro_flush_chain(struct napi_struct *napi, u32 index,
*/
void napi_gro_flush(struct napi_struct *napi, bool flush_old)
{
- u32 i;
+ unsigned long bitmask = napi->gro_bitmask;
+ unsigned int i, base = ~0U;
- for (i = 0; i < GRO_HASH_BUCKETS; i++) {
- if (test_bit(i, &napi->gro_bitmask))
- __napi_gro_flush_chain(napi, i, flush_old);
+ while ((i = ffs(bitmask)) != 0) {
+ bitmask >>= i;
+ base += i;
+ __napi_gro_flush_chain(napi, base, flush_old);
}
}
EXPORT_SYMBOL(napi_gro_flush);
@@ -5386,7 +5432,9 @@ static struct list_head *gro_list_prepare(struct napi_struct *napi,
}
diffs = (unsigned long)p->dev ^ (unsigned long)skb->dev;
- diffs |= p->vlan_tci ^ skb->vlan_tci;
+ diffs |= skb_vlan_tag_present(p) ^ skb_vlan_tag_present(skb);
+ if (skb_vlan_tag_present(p))
+ diffs |= p->vlan_tci ^ skb->vlan_tci;
diffs |= skb_metadata_dst_cmp(p, skb);
diffs |= skb_metadata_differs(p, skb);
if (maclen == ETH_HLEN)
@@ -5461,6 +5509,10 @@ static void gro_flush_oldest(struct list_head *head)
napi_gro_complete(oldest);
}
+INDIRECT_CALLABLE_DECLARE(struct sk_buff *inet_gro_receive(struct list_head *,
+ struct sk_buff *));
+INDIRECT_CALLABLE_DECLARE(struct sk_buff *ipv6_gro_receive(struct list_head *,
+ struct sk_buff *));
static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
{
u32 hash = skb_get_hash_raw(skb) & (GRO_HASH_BUCKETS - 1);
@@ -5510,7 +5562,9 @@ static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff
NAPI_GRO_CB(skb)->csum_valid = 0;
}
- pp = ptype->callbacks.gro_receive(gro_head, skb);
+ pp = INDIRECT_CALL_INET(ptype->callbacks.gro_receive,
+ ipv6_gro_receive, inet_gro_receive,
+ gro_head, skb);
break;
}
rcu_read_unlock();
@@ -5634,12 +5688,17 @@ static gro_result_t napi_skb_finish(gro_result_t ret, struct sk_buff *skb)
gro_result_t napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
{
+ gro_result_t ret;
+
skb_mark_napi_id(skb, napi);
trace_napi_gro_receive_entry(skb);
skb_gro_reset_offset(skb);
- return napi_skb_finish(dev_gro_receive(napi, skb), skb);
+ ret = napi_skb_finish(dev_gro_receive(napi, skb), skb);
+ trace_napi_gro_receive_exit(ret);
+
+ return ret;
}
EXPORT_SYMBOL(napi_gro_receive);
@@ -5652,9 +5711,13 @@ static void napi_reuse_skb(struct napi_struct *napi, struct sk_buff *skb)
__skb_pull(skb, skb_headlen(skb));
/* restore the reserve we had after netdev_alloc_skb_ip_align() */
skb_reserve(skb, NET_SKB_PAD + NET_IP_ALIGN - skb_headroom(skb));
- skb->vlan_tci = 0;
+ __vlan_hwaccel_clear_tag(skb);
skb->dev = napi->dev;
skb->skb_iif = 0;
+
+ /* eth_type_trans() assumes pkt_type is PACKET_HOST */
+ skb->pkt_type = PACKET_HOST;
+
skb->encapsulation = 0;
skb_shinfo(skb)->gso_type = 0;
skb->truesize = SKB_TRUESIZE(skb_end_offset(skb));
@@ -5753,6 +5816,7 @@ static struct sk_buff *napi_frags_skb(struct napi_struct *napi)
gro_result_t napi_gro_frags(struct napi_struct *napi)
{
+ gro_result_t ret;
struct sk_buff *skb = napi_frags_skb(napi);
if (!skb)
@@ -5760,7 +5824,10 @@ gro_result_t napi_gro_frags(struct napi_struct *napi)
trace_napi_gro_frags_entry(skb);
- return napi_frags_finish(napi, skb, dev_gro_receive(napi, skb));
+ ret = napi_frags_finish(napi, skb, dev_gro_receive(napi, skb));
+ trace_napi_gro_frags_exit(ret);
+
+ return ret;
}
EXPORT_SYMBOL(napi_gro_frags);
@@ -5776,10 +5843,11 @@ __sum16 __skb_gro_checksum_complete(struct sk_buff *skb)
/* NAPI_GRO_CB(skb)->csum holds pseudo checksum */
sum = csum_fold(csum_add(NAPI_GRO_CB(skb)->csum, wsum));
+ /* See comments in __skb_checksum_complete(). */
if (likely(!sum)) {
if (unlikely(skb->ip_summed == CHECKSUM_COMPLETE) &&
!skb->csum_complete_sw)
- netdev_rx_csum_fault(skb->dev);
+ netdev_rx_csum_fault(skb->dev, skb);
}
NAPI_GRO_CB(skb)->csum = wsum;
@@ -5966,11 +6034,14 @@ bool napi_complete_done(struct napi_struct *n, int work_done)
if (work_done)
timeout = n->dev->gro_flush_timeout;
+ /* When the NAPI instance uses a timeout and keeps postponing
+ * it, we need to bound somehow the time packets are kept in
+ * the GRO layer
+ */
+ napi_gro_flush(n, !!timeout);
if (timeout)
hrtimer_start(&n->timer, ns_to_ktime(timeout),
HRTIMER_MODE_REL_PINNED);
- else
- napi_gro_flush(n, false);
}
if (unlikely(!list_empty(&n->poll_list))) {
/* If n->poll_list is not empty, we need to mask irqs */
@@ -6197,8 +6268,8 @@ void netif_napi_add(struct net_device *dev, struct napi_struct *napi,
napi->skb = NULL;
napi->poll = poll;
if (weight > NAPI_POLL_WEIGHT)
- pr_err_once("netif_napi_add() called with weight %d on device %s\n",
- weight, dev->name);
+ netdev_err_once(dev, "%s() called with weight %d\n", __func__,
+ weight);
napi->weight = weight;
list_add(&napi->dev_list, &dev->napi_list);
napi->dev = dev;
@@ -7455,7 +7526,8 @@ unsigned int dev_get_flags(const struct net_device *dev)
}
EXPORT_SYMBOL(dev_get_flags);
-int __dev_change_flags(struct net_device *dev, unsigned int flags)
+int __dev_change_flags(struct net_device *dev, unsigned int flags,
+ struct netlink_ext_ack *extack)
{
unsigned int old_flags = dev->flags;
int ret;
@@ -7492,7 +7564,7 @@ int __dev_change_flags(struct net_device *dev, unsigned int flags)
if (old_flags & IFF_UP)
__dev_close(dev);
else
- ret = __dev_open(dev);
+ ret = __dev_open(dev, extack);
}
if ((flags ^ dev->gflags) & IFF_PROMISC) {
@@ -7552,16 +7624,18 @@ void __dev_notify_flags(struct net_device *dev, unsigned int old_flags,
* dev_change_flags - change device settings
* @dev: device
* @flags: device state flags
+ * @extack: netlink extended ack
*
* Change settings on device based state flags. The flags are
* in the userspace exported format.
*/
-int dev_change_flags(struct net_device *dev, unsigned int flags)
+int dev_change_flags(struct net_device *dev, unsigned int flags,
+ struct netlink_ext_ack *extack)
{
int ret;
unsigned int changes, old_flags = dev->flags, old_gflags = dev->gflags;
- ret = __dev_change_flags(dev, flags);
+ ret = __dev_change_flags(dev, flags, extack);
if (ret < 0)
return ret;
@@ -7694,13 +7768,36 @@ void dev_set_group(struct net_device *dev, int new_group)
EXPORT_SYMBOL(dev_set_group);
/**
+ * dev_pre_changeaddr_notify - Call NETDEV_PRE_CHANGEADDR.
+ * @dev: device
+ * @addr: new address
+ * @extack: netlink extended ack
+ */
+int dev_pre_changeaddr_notify(struct net_device *dev, const char *addr,
+ struct netlink_ext_ack *extack)
+{
+ struct netdev_notifier_pre_changeaddr_info info = {
+ .info.dev = dev,
+ .info.extack = extack,
+ .dev_addr = addr,
+ };
+ int rc;
+
+ rc = call_netdevice_notifiers_info(NETDEV_PRE_CHANGEADDR, &info.info);
+ return notifier_to_errno(rc);
+}
+EXPORT_SYMBOL(dev_pre_changeaddr_notify);
+
+/**
* dev_set_mac_address - Change Media Access Control Address
* @dev: device
* @sa: new address
+ * @extack: netlink extended ack
*
* Change the hardware (MAC) address of the device
*/
-int dev_set_mac_address(struct net_device *dev, struct sockaddr *sa)
+int dev_set_mac_address(struct net_device *dev, struct sockaddr *sa,
+ struct netlink_ext_ack *extack)
{
const struct net_device_ops *ops = dev->netdev_ops;
int err;
@@ -7711,6 +7808,9 @@ int dev_set_mac_address(struct net_device *dev, struct sockaddr *sa)
return -EINVAL;
if (!netif_device_present(dev))
return -ENODEV;
+ err = dev_pre_changeaddr_notify(dev, sa->sa_data, extack);
+ if (err)
+ return err;
err = ops->ndo_set_mac_address(dev, sa);
if (err)
return err;
diff --git a/net/core/dev_addr_lists.c b/net/core/dev_addr_lists.c
index d884d8f5f0e5..a6723b306717 100644
--- a/net/core/dev_addr_lists.c
+++ b/net/core/dev_addr_lists.c
@@ -278,6 +278,103 @@ int __hw_addr_sync_dev(struct netdev_hw_addr_list *list,
EXPORT_SYMBOL(__hw_addr_sync_dev);
/**
+ * __hw_addr_ref_sync_dev - Synchronize device's multicast address list taking
+ * into account references
+ * @list: address list to synchronize
+ * @dev: device to sync
+ * @sync: function to call if address or reference on it should be added
+ * @unsync: function to call if address or some reference on it should removed
+ *
+ * This function is intended to be called from the ndo_set_rx_mode
+ * function of devices that require explicit address or references on it
+ * add/remove notifications. The unsync function may be NULL in which case
+ * the addresses or references on it requiring removal will simply be
+ * removed without any notification to the device. That is responsibility of
+ * the driver to identify and distribute address or references on it between
+ * internal address tables.
+ **/
+int __hw_addr_ref_sync_dev(struct netdev_hw_addr_list *list,
+ struct net_device *dev,
+ int (*sync)(struct net_device *,
+ const unsigned char *, int),
+ int (*unsync)(struct net_device *,
+ const unsigned char *, int))
+{
+ struct netdev_hw_addr *ha, *tmp;
+ int err, ref_cnt;
+
+ /* first go through and flush out any unsynced/stale entries */
+ list_for_each_entry_safe(ha, tmp, &list->list, list) {
+ /* sync if address is not used */
+ if ((ha->sync_cnt << 1) <= ha->refcount)
+ continue;
+
+ /* if fails defer unsyncing address */
+ ref_cnt = ha->refcount - ha->sync_cnt;
+ if (unsync && unsync(dev, ha->addr, ref_cnt))
+ continue;
+
+ ha->refcount = (ref_cnt << 1) + 1;
+ ha->sync_cnt = ref_cnt;
+ __hw_addr_del_entry(list, ha, false, false);
+ }
+
+ /* go through and sync updated/new entries to the list */
+ list_for_each_entry_safe(ha, tmp, &list->list, list) {
+ /* sync if address added or reused */
+ if ((ha->sync_cnt << 1) >= ha->refcount)
+ continue;
+
+ ref_cnt = ha->refcount - ha->sync_cnt;
+ err = sync(dev, ha->addr, ref_cnt);
+ if (err)
+ return err;
+
+ ha->refcount = ref_cnt << 1;
+ ha->sync_cnt = ref_cnt;
+ }
+
+ return 0;
+}
+EXPORT_SYMBOL(__hw_addr_ref_sync_dev);
+
+/**
+ * __hw_addr_ref_unsync_dev - Remove synchronized addresses and references on
+ * it from device
+ * @list: address list to remove synchronized addresses (references on it) from
+ * @dev: device to sync
+ * @unsync: function to call if address and references on it should be removed
+ *
+ * Remove all addresses that were added to the device by
+ * __hw_addr_ref_sync_dev(). This function is intended to be called from the
+ * ndo_stop or ndo_open functions on devices that require explicit address (or
+ * references on it) add/remove notifications. If the unsync function pointer
+ * is NULL then this function can be used to just reset the sync_cnt for the
+ * addresses in the list.
+ **/
+void __hw_addr_ref_unsync_dev(struct netdev_hw_addr_list *list,
+ struct net_device *dev,
+ int (*unsync)(struct net_device *,
+ const unsigned char *, int))
+{
+ struct netdev_hw_addr *ha, *tmp;
+
+ list_for_each_entry_safe(ha, tmp, &list->list, list) {
+ if (!ha->sync_cnt)
+ continue;
+
+ /* if fails defer unsyncing address */
+ if (unsync && unsync(dev, ha->addr, ha->sync_cnt))
+ continue;
+
+ ha->refcount -= ha->sync_cnt - 1;
+ ha->sync_cnt = 0;
+ __hw_addr_del_entry(list, ha, false, false);
+ }
+}
+EXPORT_SYMBOL(__hw_addr_ref_unsync_dev);
+
+/**
* __hw_addr_unsync_dev - Remove synchronized addresses from device
* @list: address list to remove synchronized addresses from
* @dev: device to sync
@@ -401,6 +498,9 @@ int dev_addr_add(struct net_device *dev, const unsigned char *addr,
ASSERT_RTNL();
+ err = dev_pre_changeaddr_notify(dev, addr, NULL);
+ if (err)
+ return err;
err = __hw_addr_add(&dev->dev_addrs, addr, dev->addr_len, addr_type);
if (!err)
call_netdevice_notifiers(NETDEV_CHANGEADDR, dev);
diff --git a/net/core/dev_ioctl.c b/net/core/dev_ioctl.c
index 90e8aa36881e..31380fd5a4e2 100644
--- a/net/core/dev_ioctl.c
+++ b/net/core/dev_ioctl.c
@@ -234,7 +234,7 @@ static int dev_ifsioc(struct net *net, struct ifreq *ifr, unsigned int cmd)
switch (cmd) {
case SIOCSIFFLAGS: /* Set interface flags */
- return dev_change_flags(dev, ifr->ifr_flags);
+ return dev_change_flags(dev, ifr->ifr_flags, NULL);
case SIOCSIFMETRIC: /* Set the metric on the interface
(currently unused) */
@@ -246,7 +246,7 @@ static int dev_ifsioc(struct net *net, struct ifreq *ifr, unsigned int cmd)
case SIOCSIFHWADDR:
if (dev->addr_len > sizeof(struct sockaddr))
return -EINVAL;
- return dev_set_mac_address(dev, &ifr->ifr_hwaddr);
+ return dev_set_mac_address(dev, &ifr->ifr_hwaddr, NULL);
case SIOCSIFHWBROADCAST:
if (ifr->ifr_hwaddr.sa_family != dev->type)
diff --git a/net/core/devlink.c b/net/core/devlink.c
index 3a4b29a13d31..abb0da9d7b4b 100644
--- a/net/core/devlink.c
+++ b/net/core/devlink.c
@@ -2692,6 +2692,11 @@ static const struct devlink_param devlink_param_generic[] = {
.name = DEVLINK_PARAM_GENERIC_MSIX_VEC_PER_PF_MIN_NAME,
.type = DEVLINK_PARAM_GENERIC_MSIX_VEC_PER_PF_MIN_TYPE,
},
+ {
+ .id = DEVLINK_PARAM_GENERIC_ID_FW_LOAD_POLICY,
+ .name = DEVLINK_PARAM_GENERIC_FW_LOAD_POLICY_NAME,
+ .type = DEVLINK_PARAM_GENERIC_FW_LOAD_POLICY_TYPE,
+ },
};
static int devlink_param_generic_verify(const struct devlink_param *param)
diff --git a/net/core/filter.c b/net/core/filter.c
index e521c5ebc7d1..f9348806e843 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -296,22 +296,18 @@ static u32 convert_skb_access(int skb_field, int dst_reg, int src_reg,
break;
case SKF_AD_VLAN_TAG:
- case SKF_AD_VLAN_TAG_PRESENT:
BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, vlan_tci) != 2);
- BUILD_BUG_ON(VLAN_TAG_PRESENT != 0x1000);
/* dst_reg = *(u16 *) (src_reg + offsetof(vlan_tci)) */
*insn++ = BPF_LDX_MEM(BPF_H, dst_reg, src_reg,
offsetof(struct sk_buff, vlan_tci));
- if (skb_field == SKF_AD_VLAN_TAG) {
- *insn++ = BPF_ALU32_IMM(BPF_AND, dst_reg,
- ~VLAN_TAG_PRESENT);
- } else {
- /* dst_reg >>= 12 */
- *insn++ = BPF_ALU32_IMM(BPF_RSH, dst_reg, 12);
- /* dst_reg &= 1 */
+ break;
+ case SKF_AD_VLAN_TAG_PRESENT:
+ *insn++ = BPF_LDX_MEM(BPF_B, dst_reg, src_reg, PKT_VLAN_PRESENT_OFFSET());
+ if (PKT_VLAN_PRESENT_BIT)
+ *insn++ = BPF_ALU32_IMM(BPF_RSH, dst_reg, PKT_VLAN_PRESENT_BIT);
+ if (PKT_VLAN_PRESENT_BIT < 7)
*insn++ = BPF_ALU32_IMM(BPF_AND, dst_reg, 1);
- }
break;
}
@@ -467,7 +463,8 @@ static bool convert_bpf_ld_abs(struct sock_filter *fp, struct bpf_insn **insnp)
bool ldx_off_ok = offset <= S16_MAX;
*insn++ = BPF_MOV64_REG(BPF_REG_TMP, BPF_REG_H);
- *insn++ = BPF_ALU64_IMM(BPF_SUB, BPF_REG_TMP, offset);
+ if (offset)
+ *insn++ = BPF_ALU64_IMM(BPF_SUB, BPF_REG_TMP, offset);
*insn++ = BPF_JMP_IMM(BPF_JSLT, BPF_REG_TMP,
size, 2 + endian + (!ldx_off_ok * 2));
if (ldx_off_ok) {
@@ -2428,6 +2425,174 @@ static const struct bpf_func_proto bpf_msg_push_data_proto = {
.arg4_type = ARG_ANYTHING,
};
+static void sk_msg_shift_left(struct sk_msg *msg, int i)
+{
+ int prev;
+
+ do {
+ prev = i;
+ sk_msg_iter_var_next(i);
+ msg->sg.data[prev] = msg->sg.data[i];
+ } while (i != msg->sg.end);
+
+ sk_msg_iter_prev(msg, end);
+}
+
+static void sk_msg_shift_right(struct sk_msg *msg, int i)
+{
+ struct scatterlist tmp, sge;
+
+ sk_msg_iter_next(msg, end);
+ sge = sk_msg_elem_cpy(msg, i);
+ sk_msg_iter_var_next(i);
+ tmp = sk_msg_elem_cpy(msg, i);
+
+ while (i != msg->sg.end) {
+ msg->sg.data[i] = sge;
+ sk_msg_iter_var_next(i);
+ sge = tmp;
+ tmp = sk_msg_elem_cpy(msg, i);
+ }
+}
+
+BPF_CALL_4(bpf_msg_pop_data, struct sk_msg *, msg, u32, start,
+ u32, len, u64, flags)
+{
+ u32 i = 0, l, space, offset = 0;
+ u64 last = start + len;
+ int pop;
+
+ if (unlikely(flags))
+ return -EINVAL;
+
+ /* First find the starting scatterlist element */
+ i = msg->sg.start;
+ do {
+ l = sk_msg_elem(msg, i)->length;
+
+ if (start < offset + l)
+ break;
+ offset += l;
+ sk_msg_iter_var_next(i);
+ } while (i != msg->sg.end);
+
+ /* Bounds checks: start and pop must be inside message */
+ if (start >= offset + l || last >= msg->sg.size)
+ return -EINVAL;
+
+ space = MAX_MSG_FRAGS - sk_msg_elem_used(msg);
+
+ pop = len;
+ /* --------------| offset
+ * -| start |-------- len -------|
+ *
+ * |----- a ----|-------- pop -------|----- b ----|
+ * |______________________________________________| length
+ *
+ *
+ * a: region at front of scatter element to save
+ * b: region at back of scatter element to save when length > A + pop
+ * pop: region to pop from element, same as input 'pop' here will be
+ * decremented below per iteration.
+ *
+ * Two top-level cases to handle when start != offset, first B is non
+ * zero and second B is zero corresponding to when a pop includes more
+ * than one element.
+ *
+ * Then if B is non-zero AND there is no space allocate space and
+ * compact A, B regions into page. If there is space shift ring to
+ * the rigth free'ing the next element in ring to place B, leaving
+ * A untouched except to reduce length.
+ */
+ if (start != offset) {
+ struct scatterlist *nsge, *sge = sk_msg_elem(msg, i);
+ int a = start;
+ int b = sge->length - pop - a;
+
+ sk_msg_iter_var_next(i);
+
+ if (pop < sge->length - a) {
+ if (space) {
+ sge->length = a;
+ sk_msg_shift_right(msg, i);
+ nsge = sk_msg_elem(msg, i);
+ get_page(sg_page(sge));
+ sg_set_page(nsge,
+ sg_page(sge),
+ b, sge->offset + pop + a);
+ } else {
+ struct page *page, *orig;
+ u8 *to, *from;
+
+ page = alloc_pages(__GFP_NOWARN |
+ __GFP_COMP | GFP_ATOMIC,
+ get_order(a + b));
+ if (unlikely(!page))
+ return -ENOMEM;
+
+ sge->length = a;
+ orig = sg_page(sge);
+ from = sg_virt(sge);
+ to = page_address(page);
+ memcpy(to, from, a);
+ memcpy(to + a, from + a + pop, b);
+ sg_set_page(sge, page, a + b, 0);
+ put_page(orig);
+ }
+ pop = 0;
+ } else if (pop >= sge->length - a) {
+ sge->length = a;
+ pop -= (sge->length - a);
+ }
+ }
+
+ /* From above the current layout _must_ be as follows,
+ *
+ * -| offset
+ * -| start
+ *
+ * |---- pop ---|---------------- b ------------|
+ * |____________________________________________| length
+ *
+ * Offset and start of the current msg elem are equal because in the
+ * previous case we handled offset != start and either consumed the
+ * entire element and advanced to the next element OR pop == 0.
+ *
+ * Two cases to handle here are first pop is less than the length
+ * leaving some remainder b above. Simply adjust the element's layout
+ * in this case. Or pop >= length of the element so that b = 0. In this
+ * case advance to next element decrementing pop.
+ */
+ while (pop) {
+ struct scatterlist *sge = sk_msg_elem(msg, i);
+
+ if (pop < sge->length) {
+ sge->length -= pop;
+ sge->offset += pop;
+ pop = 0;
+ } else {
+ pop -= sge->length;
+ sk_msg_shift_left(msg, i);
+ }
+ sk_msg_iter_var_next(i);
+ }
+
+ sk_mem_uncharge(msg->sk, len - pop);
+ msg->sg.size -= (len - pop);
+ sk_msg_compute_data_pointers(msg);
+ return 0;
+}
+
+static const struct bpf_func_proto bpf_msg_pop_data_proto = {
+ .func = bpf_msg_pop_data,
+ .gpl_only = false,
+ .ret_type = RET_INTEGER,
+ .arg1_type = ARG_PTR_TO_CTX,
+ .arg2_type = ARG_ANYTHING,
+ .arg3_type = ARG_ANYTHING,
+ .arg4_type = ARG_ANYTHING,
+};
+
BPF_CALL_1(bpf_get_cgroup_classid, const struct sk_buff *, skb)
{
return task_get_classid(skb);
@@ -3908,6 +4073,26 @@ static const struct bpf_func_proto bpf_get_socket_uid_proto = {
.arg1_type = ARG_PTR_TO_CTX,
};
+BPF_CALL_5(bpf_sockopt_event_output, struct bpf_sock_ops_kern *, bpf_sock,
+ struct bpf_map *, map, u64, flags, void *, data, u64, size)
+{
+ if (unlikely(flags & ~(BPF_F_INDEX_MASK)))
+ return -EINVAL;
+
+ return bpf_event_output(map, flags, data, size, NULL, 0, NULL);
+}
+
+static const struct bpf_func_proto bpf_sockopt_event_output_proto = {
+ .func = bpf_sockopt_event_output,
+ .gpl_only = true,
+ .ret_type = RET_INTEGER,
+ .arg1_type = ARG_PTR_TO_CTX,
+ .arg2_type = ARG_CONST_MAP_PTR,
+ .arg3_type = ARG_ANYTHING,
+ .arg4_type = ARG_PTR_TO_MEM,
+ .arg5_type = ARG_CONST_SIZE_OR_ZERO,
+};
+
BPF_CALL_5(bpf_setsockopt, struct bpf_sock_ops_kern *, bpf_sock,
int, level, int, optname, char *, optval, int, optlen)
{
@@ -4825,47 +5010,40 @@ static const struct bpf_func_proto bpf_lwt_seg6_adjust_srh_proto = {
#ifdef CONFIG_INET
static struct sock *sk_lookup(struct net *net, struct bpf_sock_tuple *tuple,
- struct sk_buff *skb, u8 family, u8 proto)
+ int dif, int sdif, u8 family, u8 proto)
{
bool refcounted = false;
struct sock *sk = NULL;
- int dif = 0;
-
- if (skb->dev)
- dif = skb->dev->ifindex;
if (family == AF_INET) {
__be32 src4 = tuple->ipv4.saddr;
__be32 dst4 = tuple->ipv4.daddr;
- int sdif = inet_sdif(skb);
if (proto == IPPROTO_TCP)
- sk = __inet_lookup(net, &tcp_hashinfo, skb, 0,
+ sk = __inet_lookup(net, &tcp_hashinfo, NULL, 0,
src4, tuple->ipv4.sport,
dst4, tuple->ipv4.dport,
dif, sdif, &refcounted);
else
sk = __udp4_lib_lookup(net, src4, tuple->ipv4.sport,
dst4, tuple->ipv4.dport,
- dif, sdif, &udp_table, skb);
+ dif, sdif, &udp_table, NULL);
#if IS_ENABLED(CONFIG_IPV6)
} else {
struct in6_addr *src6 = (struct in6_addr *)&tuple->ipv6.saddr;
struct in6_addr *dst6 = (struct in6_addr *)&tuple->ipv6.daddr;
- u16 hnum = ntohs(tuple->ipv6.dport);
- int sdif = inet6_sdif(skb);
if (proto == IPPROTO_TCP)
- sk = __inet6_lookup(net, &tcp_hashinfo, skb, 0,
+ sk = __inet6_lookup(net, &tcp_hashinfo, NULL, 0,
src6, tuple->ipv6.sport,
- dst6, hnum,
+ dst6, ntohs(tuple->ipv6.dport),
dif, sdif, &refcounted);
else if (likely(ipv6_bpf_stub))
sk = ipv6_bpf_stub->udp6_lib_lookup(net,
src6, tuple->ipv6.sport,
- dst6, hnum,
+ dst6, tuple->ipv6.dport,
dif, sdif,
- &udp_table, skb);
+ &udp_table, NULL);
#endif
}
@@ -4882,31 +5060,34 @@ static struct sock *sk_lookup(struct net *net, struct bpf_sock_tuple *tuple,
* callers to satisfy BPF_CALL declarations.
*/
static unsigned long
-bpf_sk_lookup(struct sk_buff *skb, struct bpf_sock_tuple *tuple, u32 len,
- u8 proto, u64 netns_id, u64 flags)
+__bpf_sk_lookup(struct sk_buff *skb, struct bpf_sock_tuple *tuple, u32 len,
+ struct net *caller_net, u32 ifindex, u8 proto, u64 netns_id,
+ u64 flags)
{
- struct net *caller_net;
struct sock *sk = NULL;
u8 family = AF_UNSPEC;
struct net *net;
+ int sdif;
family = len == sizeof(tuple->ipv4) ? AF_INET : AF_INET6;
- if (unlikely(family == AF_UNSPEC || netns_id > U32_MAX || flags))
+ if (unlikely(family == AF_UNSPEC || flags ||
+ !((s32)netns_id < 0 || netns_id <= S32_MAX)))
goto out;
- if (skb->dev)
- caller_net = dev_net(skb->dev);
+ if (family == AF_INET)
+ sdif = inet_sdif(skb);
else
- caller_net = sock_net(skb->sk);
- if (netns_id) {
+ sdif = inet6_sdif(skb);
+
+ if ((s32)netns_id < 0) {
+ net = caller_net;
+ sk = sk_lookup(net, tuple, ifindex, sdif, family, proto);
+ } else {
net = get_net_ns_by_id(caller_net, netns_id);
if (unlikely(!net))
goto out;
- sk = sk_lookup(net, tuple, skb, family, proto);
+ sk = sk_lookup(net, tuple, ifindex, sdif, family, proto);
put_net(net);
- } else {
- net = caller_net;
- sk = sk_lookup(net, tuple, skb, family, proto);
}
if (sk)
@@ -4915,6 +5096,25 @@ out:
return (unsigned long) sk;
}
+static unsigned long
+bpf_sk_lookup(struct sk_buff *skb, struct bpf_sock_tuple *tuple, u32 len,
+ u8 proto, u64 netns_id, u64 flags)
+{
+ struct net *caller_net;
+ int ifindex;
+
+ if (skb->dev) {
+ caller_net = dev_net(skb->dev);
+ ifindex = skb->dev->ifindex;
+ } else {
+ caller_net = sock_net(skb->sk);
+ ifindex = 0;
+ }
+
+ return __bpf_sk_lookup(skb, tuple, len, caller_net, ifindex,
+ proto, netns_id, flags);
+}
+
BPF_CALL_5(bpf_sk_lookup_tcp, struct sk_buff *, skb,
struct bpf_sock_tuple *, tuple, u32, len, u64, netns_id, u64, flags)
{
@@ -4964,6 +5164,87 @@ static const struct bpf_func_proto bpf_sk_release_proto = {
.ret_type = RET_INTEGER,
.arg1_type = ARG_PTR_TO_SOCKET,
};
+
+BPF_CALL_5(bpf_xdp_sk_lookup_udp, struct xdp_buff *, ctx,
+ struct bpf_sock_tuple *, tuple, u32, len, u32, netns_id, u64, flags)
+{
+ struct net *caller_net = dev_net(ctx->rxq->dev);
+ int ifindex = ctx->rxq->dev->ifindex;
+
+ return __bpf_sk_lookup(NULL, tuple, len, caller_net, ifindex,
+ IPPROTO_UDP, netns_id, flags);
+}
+
+static const struct bpf_func_proto bpf_xdp_sk_lookup_udp_proto = {
+ .func = bpf_xdp_sk_lookup_udp,
+ .gpl_only = false,
+ .pkt_access = true,
+ .ret_type = RET_PTR_TO_SOCKET_OR_NULL,
+ .arg1_type = ARG_PTR_TO_CTX,
+ .arg2_type = ARG_PTR_TO_MEM,
+ .arg3_type = ARG_CONST_SIZE,
+ .arg4_type = ARG_ANYTHING,
+ .arg5_type = ARG_ANYTHING,
+};
+
+BPF_CALL_5(bpf_xdp_sk_lookup_tcp, struct xdp_buff *, ctx,
+ struct bpf_sock_tuple *, tuple, u32, len, u32, netns_id, u64, flags)
+{
+ struct net *caller_net = dev_net(ctx->rxq->dev);
+ int ifindex = ctx->rxq->dev->ifindex;
+
+ return __bpf_sk_lookup(NULL, tuple, len, caller_net, ifindex,
+ IPPROTO_TCP, netns_id, flags);
+}
+
+static const struct bpf_func_proto bpf_xdp_sk_lookup_tcp_proto = {
+ .func = bpf_xdp_sk_lookup_tcp,
+ .gpl_only = false,
+ .pkt_access = true,
+ .ret_type = RET_PTR_TO_SOCKET_OR_NULL,
+ .arg1_type = ARG_PTR_TO_CTX,
+ .arg2_type = ARG_PTR_TO_MEM,
+ .arg3_type = ARG_CONST_SIZE,
+ .arg4_type = ARG_ANYTHING,
+ .arg5_type = ARG_ANYTHING,
+};
+
+BPF_CALL_5(bpf_sock_addr_sk_lookup_tcp, struct bpf_sock_addr_kern *, ctx,
+ struct bpf_sock_tuple *, tuple, u32, len, u64, netns_id, u64, flags)
+{
+ return __bpf_sk_lookup(NULL, tuple, len, sock_net(ctx->sk), 0,
+ IPPROTO_TCP, netns_id, flags);
+}
+
+static const struct bpf_func_proto bpf_sock_addr_sk_lookup_tcp_proto = {
+ .func = bpf_sock_addr_sk_lookup_tcp,
+ .gpl_only = false,
+ .ret_type = RET_PTR_TO_SOCKET_OR_NULL,
+ .arg1_type = ARG_PTR_TO_CTX,
+ .arg2_type = ARG_PTR_TO_MEM,
+ .arg3_type = ARG_CONST_SIZE,
+ .arg4_type = ARG_ANYTHING,
+ .arg5_type = ARG_ANYTHING,
+};
+
+BPF_CALL_5(bpf_sock_addr_sk_lookup_udp, struct bpf_sock_addr_kern *, ctx,
+ struct bpf_sock_tuple *, tuple, u32, len, u64, netns_id, u64, flags)
+{
+ return __bpf_sk_lookup(NULL, tuple, len, sock_net(ctx->sk), 0,
+ IPPROTO_UDP, netns_id, flags);
+}
+
+static const struct bpf_func_proto bpf_sock_addr_sk_lookup_udp_proto = {
+ .func = bpf_sock_addr_sk_lookup_udp,
+ .gpl_only = false,
+ .ret_type = RET_PTR_TO_SOCKET_OR_NULL,
+ .arg1_type = ARG_PTR_TO_CTX,
+ .arg2_type = ARG_PTR_TO_MEM,
+ .arg3_type = ARG_CONST_SIZE,
+ .arg4_type = ARG_ANYTHING,
+ .arg5_type = ARG_ANYTHING,
+};
+
#endif /* CONFIG_INET */
bool bpf_helper_changes_pkt_data(void *func)
@@ -4986,6 +5267,7 @@ bool bpf_helper_changes_pkt_data(void *func)
func == bpf_xdp_adjust_meta ||
func == bpf_msg_pull_data ||
func == bpf_msg_push_data ||
+ func == bpf_msg_pop_data ||
func == bpf_xdp_adjust_tail ||
#if IS_ENABLED(CONFIG_IPV6_SEG6_BPF)
func == bpf_lwt_seg6_store_bytes ||
@@ -5070,6 +5352,14 @@ sock_addr_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
return &bpf_get_socket_cookie_sock_addr_proto;
case BPF_FUNC_get_local_storage:
return &bpf_get_local_storage_proto;
+#ifdef CONFIG_INET
+ case BPF_FUNC_sk_lookup_tcp:
+ return &bpf_sock_addr_sk_lookup_tcp_proto;
+ case BPF_FUNC_sk_lookup_udp:
+ return &bpf_sock_addr_sk_lookup_udp_proto;
+ case BPF_FUNC_sk_release:
+ return &bpf_sk_release_proto;
+#endif /* CONFIG_INET */
default:
return bpf_base_func_proto(func_id);
}
@@ -5214,6 +5504,14 @@ xdp_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
return &bpf_xdp_adjust_tail_proto;
case BPF_FUNC_fib_lookup:
return &bpf_xdp_fib_lookup_proto;
+#ifdef CONFIG_INET
+ case BPF_FUNC_sk_lookup_udp:
+ return &bpf_xdp_sk_lookup_udp_proto;
+ case BPF_FUNC_sk_lookup_tcp:
+ return &bpf_xdp_sk_lookup_tcp_proto;
+ case BPF_FUNC_sk_release:
+ return &bpf_sk_release_proto;
+#endif
default:
return bpf_base_func_proto(func_id);
}
@@ -5240,6 +5538,8 @@ sock_ops_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
return &bpf_get_socket_cookie_sock_ops_proto;
case BPF_FUNC_get_local_storage:
return &bpf_get_local_storage_proto;
+ case BPF_FUNC_perf_event_output:
+ return &bpf_sockopt_event_output_proto;
default:
return bpf_base_func_proto(func_id);
}
@@ -5264,6 +5564,8 @@ sk_msg_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
return &bpf_msg_pull_data_proto;
case BPF_FUNC_msg_push_data:
return &bpf_msg_push_data_proto;
+ case BPF_FUNC_msg_pop_data:
+ return &bpf_msg_pop_data_proto;
default:
return bpf_base_func_proto(func_id);
}
@@ -5436,8 +5738,12 @@ static bool bpf_skb_is_valid_access(int off, int size, enum bpf_access_type type
if (size != size_default)
return false;
break;
- case bpf_ctx_range(struct __sk_buff, flow_keys):
- if (size != sizeof(struct bpf_flow_keys *))
+ case bpf_ctx_range_ptr(struct __sk_buff, flow_keys):
+ if (size != sizeof(__u64))
+ return false;
+ break;
+ case bpf_ctx_range(struct __sk_buff, tstamp):
+ if (size != sizeof(__u64))
return false;
break;
default:
@@ -5465,8 +5771,10 @@ static bool sk_filter_is_valid_access(int off, int size,
case bpf_ctx_range(struct __sk_buff, data):
case bpf_ctx_range(struct __sk_buff, data_meta):
case bpf_ctx_range(struct __sk_buff, data_end):
- case bpf_ctx_range(struct __sk_buff, flow_keys):
+ case bpf_ctx_range_ptr(struct __sk_buff, flow_keys):
case bpf_ctx_range_till(struct __sk_buff, family, local_port):
+ case bpf_ctx_range(struct __sk_buff, tstamp):
+ case bpf_ctx_range(struct __sk_buff, wire_len):
return false;
}
@@ -5490,7 +5798,8 @@ static bool cg_skb_is_valid_access(int off, int size,
switch (off) {
case bpf_ctx_range(struct __sk_buff, tc_classid):
case bpf_ctx_range(struct __sk_buff, data_meta):
- case bpf_ctx_range(struct __sk_buff, flow_keys):
+ case bpf_ctx_range_ptr(struct __sk_buff, flow_keys):
+ case bpf_ctx_range(struct __sk_buff, wire_len):
return false;
case bpf_ctx_range(struct __sk_buff, data):
case bpf_ctx_range(struct __sk_buff, data_end):
@@ -5505,6 +5814,10 @@ static bool cg_skb_is_valid_access(int off, int size,
case bpf_ctx_range(struct __sk_buff, priority):
case bpf_ctx_range_till(struct __sk_buff, cb[0], cb[4]):
break;
+ case bpf_ctx_range(struct __sk_buff, tstamp):
+ if (!capable(CAP_SYS_ADMIN))
+ return false;
+ break;
default:
return false;
}
@@ -5531,7 +5844,9 @@ static bool lwt_is_valid_access(int off, int size,
case bpf_ctx_range(struct __sk_buff, tc_classid):
case bpf_ctx_range_till(struct __sk_buff, family, local_port):
case bpf_ctx_range(struct __sk_buff, data_meta):
- case bpf_ctx_range(struct __sk_buff, flow_keys):
+ case bpf_ctx_range_ptr(struct __sk_buff, flow_keys):
+ case bpf_ctx_range(struct __sk_buff, tstamp):
+ case bpf_ctx_range(struct __sk_buff, wire_len):
return false;
}
@@ -5741,6 +6056,7 @@ static bool tc_cls_act_is_valid_access(int off, int size,
case bpf_ctx_range(struct __sk_buff, priority):
case bpf_ctx_range(struct __sk_buff, tc_classid):
case bpf_ctx_range_till(struct __sk_buff, cb[0], cb[4]):
+ case bpf_ctx_range(struct __sk_buff, tstamp):
break;
default:
return false;
@@ -5757,7 +6073,7 @@ static bool tc_cls_act_is_valid_access(int off, int size,
case bpf_ctx_range(struct __sk_buff, data_end):
info->reg_type = PTR_TO_PACKET_END;
break;
- case bpf_ctx_range(struct __sk_buff, flow_keys):
+ case bpf_ctx_range_ptr(struct __sk_buff, flow_keys):
case bpf_ctx_range_till(struct __sk_buff, family, local_port):
return false;
}
@@ -5959,7 +6275,9 @@ static bool sk_skb_is_valid_access(int off, int size,
switch (off) {
case bpf_ctx_range(struct __sk_buff, tc_classid):
case bpf_ctx_range(struct __sk_buff, data_meta):
- case bpf_ctx_range(struct __sk_buff, flow_keys):
+ case bpf_ctx_range_ptr(struct __sk_buff, flow_keys):
+ case bpf_ctx_range(struct __sk_buff, tstamp):
+ case bpf_ctx_range(struct __sk_buff, wire_len):
return false;
}
@@ -6040,12 +6358,14 @@ static bool flow_dissector_is_valid_access(int off, int size,
case bpf_ctx_range(struct __sk_buff, data_end):
info->reg_type = PTR_TO_PACKET_END;
break;
- case bpf_ctx_range(struct __sk_buff, flow_keys):
+ case bpf_ctx_range_ptr(struct __sk_buff, flow_keys):
info->reg_type = PTR_TO_FLOW_KEYS;
break;
case bpf_ctx_range(struct __sk_buff, tc_classid):
case bpf_ctx_range(struct __sk_buff, data_meta):
case bpf_ctx_range_till(struct __sk_buff, family, local_port):
+ case bpf_ctx_range(struct __sk_buff, tstamp):
+ case bpf_ctx_range(struct __sk_buff, wire_len):
return false;
}
@@ -6140,19 +6460,19 @@ static u32 bpf_convert_ctx_access(enum bpf_access_type type,
break;
case offsetof(struct __sk_buff, vlan_present):
- case offsetof(struct __sk_buff, vlan_tci):
- BUILD_BUG_ON(VLAN_TAG_PRESENT != 0x1000);
+ *target_size = 1;
+ *insn++ = BPF_LDX_MEM(BPF_B, si->dst_reg, si->src_reg,
+ PKT_VLAN_PRESENT_OFFSET());
+ if (PKT_VLAN_PRESENT_BIT)
+ *insn++ = BPF_ALU32_IMM(BPF_RSH, si->dst_reg, PKT_VLAN_PRESENT_BIT);
+ if (PKT_VLAN_PRESENT_BIT < 7)
+ *insn++ = BPF_ALU32_IMM(BPF_AND, si->dst_reg, 1);
+ break;
+ case offsetof(struct __sk_buff, vlan_tci):
*insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->src_reg,
bpf_target_off(struct sk_buff, vlan_tci, 2,
target_size));
- if (si->off == offsetof(struct __sk_buff, vlan_tci)) {
- *insn++ = BPF_ALU32_IMM(BPF_AND, si->dst_reg,
- ~VLAN_TAG_PRESENT);
- } else {
- *insn++ = BPF_ALU32_IMM(BPF_RSH, si->dst_reg, 12);
- *insn++ = BPF_ALU32_IMM(BPF_AND, si->dst_reg, 1);
- }
break;
case offsetof(struct __sk_buff, cb[0]) ...
@@ -6355,6 +6675,33 @@ static u32 bpf_convert_ctx_access(enum bpf_access_type type,
*insn++ = BPF_LDX_MEM(BPF_SIZEOF(void *), si->dst_reg,
si->src_reg, off);
break;
+
+ case offsetof(struct __sk_buff, tstamp):
+ BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, tstamp) != 8);
+
+ if (type == BPF_WRITE)
+ *insn++ = BPF_STX_MEM(BPF_DW,
+ si->dst_reg, si->src_reg,
+ bpf_target_off(struct sk_buff,
+ tstamp, 8,
+ target_size));
+ else
+ *insn++ = BPF_LDX_MEM(BPF_DW,
+ si->dst_reg, si->src_reg,
+ bpf_target_off(struct sk_buff,
+ tstamp, 8,
+ target_size));
+ break;
+
+ case offsetof(struct __sk_buff, wire_len):
+ BUILD_BUG_ON(FIELD_SIZEOF(struct qdisc_skb_cb, pkt_len) != 4);
+
+ off = si->off;
+ off -= offsetof(struct __sk_buff, wire_len);
+ off += offsetof(struct sk_buff, cb);
+ off += offsetof(struct qdisc_skb_cb, pkt_len);
+ *target_size = 4;
+ *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg, off);
}
return insn - insn_buf;
diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c
index 56d1e9b73142..2e8d91e54179 100644
--- a/net/core/flow_dissector.c
+++ b/net/core/flow_dissector.c
@@ -1165,8 +1165,8 @@ ip_proto_again:
break;
}
- if (dissector_uses_key(flow_dissector,
- FLOW_DISSECTOR_KEY_PORTS)) {
+ if (dissector_uses_key(flow_dissector, FLOW_DISSECTOR_KEY_PORTS) &&
+ !(key_control->flags & FLOW_DIS_IS_FRAGMENT)) {
key_ports = skb_flow_dissector_target(flow_dissector,
FLOW_DISSECTOR_KEY_PORTS,
target_container);
diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index 41954e42a2de..fb4372cb1de1 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -118,21 +118,77 @@ unsigned long neigh_rand_reach_time(unsigned long base)
}
EXPORT_SYMBOL(neigh_rand_reach_time);
+static void neigh_mark_dead(struct neighbour *n)
+{
+ n->dead = 1;
+ if (!list_empty(&n->gc_list)) {
+ list_del_init(&n->gc_list);
+ atomic_dec(&n->tbl->gc_entries);
+ }
+}
+
+static void neigh_update_gc_list(struct neighbour *n)
+{
+ bool on_gc_list, exempt_from_gc;
+
+ write_lock_bh(&n->tbl->lock);
+ write_lock(&n->lock);
+
+ /* remove from the gc list if new state is permanent or if neighbor
+ * is externally learned; otherwise entry should be on the gc list
+ */
+ exempt_from_gc = n->nud_state & NUD_PERMANENT ||
+ n->flags & NTF_EXT_LEARNED;
+ on_gc_list = !list_empty(&n->gc_list);
+
+ if (exempt_from_gc && on_gc_list) {
+ list_del_init(&n->gc_list);
+ atomic_dec(&n->tbl->gc_entries);
+ } else if (!exempt_from_gc && !on_gc_list) {
+ /* add entries to the tail; cleaning removes from the front */
+ list_add_tail(&n->gc_list, &n->tbl->gc_list);
+ atomic_inc(&n->tbl->gc_entries);
+ }
+
+ write_unlock(&n->lock);
+ write_unlock_bh(&n->tbl->lock);
+}
+
+static bool neigh_update_ext_learned(struct neighbour *neigh, u32 flags,
+ int *notify)
+{
+ bool rc = false;
+ u8 ndm_flags;
+
+ if (!(flags & NEIGH_UPDATE_F_ADMIN))
+ return rc;
+
+ ndm_flags = (flags & NEIGH_UPDATE_F_EXT_LEARNED) ? NTF_EXT_LEARNED : 0;
+ if ((neigh->flags ^ ndm_flags) & NTF_EXT_LEARNED) {
+ if (ndm_flags & NTF_EXT_LEARNED)
+ neigh->flags |= NTF_EXT_LEARNED;
+ else
+ neigh->flags &= ~NTF_EXT_LEARNED;
+ rc = true;
+ *notify = 1;
+ }
+
+ return rc;
+}
-static bool neigh_del(struct neighbour *n, __u8 state, __u8 flags,
- struct neighbour __rcu **np, struct neigh_table *tbl)
+static bool neigh_del(struct neighbour *n, struct neighbour __rcu **np,
+ struct neigh_table *tbl)
{
bool retval = false;
write_lock(&n->lock);
- if (refcount_read(&n->refcnt) == 1 && !(n->nud_state & state) &&
- !(n->flags & flags)) {
+ if (refcount_read(&n->refcnt) == 1) {
struct neighbour *neigh;
neigh = rcu_dereference_protected(n->next,
lockdep_is_held(&tbl->lock));
rcu_assign_pointer(*np, neigh);
- n->dead = 1;
+ neigh_mark_dead(n);
retval = true;
}
write_unlock(&n->lock);
@@ -158,7 +214,7 @@ bool neigh_remove_one(struct neighbour *ndel, struct neigh_table *tbl)
while ((n = rcu_dereference_protected(*np,
lockdep_is_held(&tbl->lock)))) {
if (n == ndel)
- return neigh_del(n, 0, 0, np, tbl);
+ return neigh_del(n, np, tbl);
np = &n->next;
}
return false;
@@ -166,32 +222,29 @@ bool neigh_remove_one(struct neighbour *ndel, struct neigh_table *tbl)
static int neigh_forced_gc(struct neigh_table *tbl)
{
+ int max_clean = atomic_read(&tbl->gc_entries) - tbl->gc_thresh2;
+ unsigned long tref = jiffies - 5 * HZ;
+ struct neighbour *n, *tmp;
int shrunk = 0;
- int i;
- struct neigh_hash_table *nht;
NEIGH_CACHE_STAT_INC(tbl, forced_gc_runs);
write_lock_bh(&tbl->lock);
- nht = rcu_dereference_protected(tbl->nht,
- lockdep_is_held(&tbl->lock));
- for (i = 0; i < (1 << nht->hash_shift); i++) {
- struct neighbour *n;
- struct neighbour __rcu **np;
- np = &nht->hash_buckets[i];
- while ((n = rcu_dereference_protected(*np,
- lockdep_is_held(&tbl->lock))) != NULL) {
- /* Neighbour record may be discarded if:
- * - nobody refers to it.
- * - it is not permanent
- */
- if (neigh_del(n, NUD_PERMANENT, NTF_EXT_LEARNED, np,
- tbl)) {
- shrunk = 1;
- continue;
- }
- np = &n->next;
+ list_for_each_entry_safe(n, tmp, &tbl->gc_list, gc_list) {
+ if (refcount_read(&n->refcnt) == 1) {
+ bool remove = false;
+
+ write_lock(&n->lock);
+ if ((n->nud_state == NUD_FAILED) ||
+ time_after(tref, n->updated))
+ remove = true;
+ write_unlock(&n->lock);
+
+ if (remove && neigh_remove_one(n, tbl))
+ shrunk++;
+ if (shrunk >= max_clean)
+ break;
}
}
@@ -260,8 +313,7 @@ static void neigh_flush_dev(struct neigh_table *tbl, struct net_device *dev,
lockdep_is_held(&tbl->lock)));
write_lock(&n->lock);
neigh_del_timer(n);
- n->dead = 1;
-
+ neigh_mark_dead(n);
if (refcount_read(&n->refcnt) != 1) {
/* The most unpleasant situation.
We must destroy neighbour entry,
@@ -321,13 +373,18 @@ int neigh_ifdown(struct neigh_table *tbl, struct net_device *dev)
}
EXPORT_SYMBOL(neigh_ifdown);
-static struct neighbour *neigh_alloc(struct neigh_table *tbl, struct net_device *dev)
+static struct neighbour *neigh_alloc(struct neigh_table *tbl,
+ struct net_device *dev,
+ bool exempt_from_gc)
{
struct neighbour *n = NULL;
unsigned long now = jiffies;
int entries;
- entries = atomic_inc_return(&tbl->entries) - 1;
+ if (exempt_from_gc)
+ goto do_alloc;
+
+ entries = atomic_inc_return(&tbl->gc_entries) - 1;
if (entries >= tbl->gc_thresh3 ||
(entries >= tbl->gc_thresh2 &&
time_after(now, tbl->last_flush + 5 * HZ))) {
@@ -340,6 +397,7 @@ static struct neighbour *neigh_alloc(struct neigh_table *tbl, struct net_device
}
}
+do_alloc:
n = kzalloc(tbl->entry_size + dev->neigh_priv_len, GFP_ATOMIC);
if (!n)
goto out_entries;
@@ -358,11 +416,15 @@ static struct neighbour *neigh_alloc(struct neigh_table *tbl, struct net_device
n->tbl = tbl;
refcount_set(&n->refcnt, 1);
n->dead = 1;
+ INIT_LIST_HEAD(&n->gc_list);
+
+ atomic_inc(&tbl->entries);
out:
return n;
out_entries:
- atomic_dec(&tbl->entries);
+ if (!exempt_from_gc)
+ atomic_dec(&tbl->gc_entries);
goto out;
}
@@ -505,13 +567,15 @@ struct neighbour *neigh_lookup_nodev(struct neigh_table *tbl, struct net *net,
}
EXPORT_SYMBOL(neigh_lookup_nodev);
-struct neighbour *__neigh_create(struct neigh_table *tbl, const void *pkey,
- struct net_device *dev, bool want_ref)
+static struct neighbour *___neigh_create(struct neigh_table *tbl,
+ const void *pkey,
+ struct net_device *dev,
+ bool exempt_from_gc, bool want_ref)
{
+ struct neighbour *n1, *rc, *n = neigh_alloc(tbl, dev, exempt_from_gc);
u32 hash_val;
unsigned int key_len = tbl->key_len;
int error;
- struct neighbour *n1, *rc, *n = neigh_alloc(tbl, dev);
struct neigh_hash_table *nht;
if (!n) {
@@ -574,6 +638,9 @@ struct neighbour *__neigh_create(struct neigh_table *tbl, const void *pkey,
}
n->dead = 0;
+ if (!exempt_from_gc)
+ list_add_tail(&n->gc_list, &n->tbl->gc_list);
+
if (want_ref)
neigh_hold(n);
rcu_assign_pointer(n->next,
@@ -591,6 +658,12 @@ out_neigh_release:
neigh_release(n);
goto out;
}
+
+struct neighbour *__neigh_create(struct neigh_table *tbl, const void *pkey,
+ struct net_device *dev, bool want_ref)
+{
+ return ___neigh_create(tbl, pkey, dev, false, want_ref);
+}
EXPORT_SYMBOL(__neigh_create);
static u32 pneigh_hash(const void *pkey, unsigned int key_len)
@@ -854,7 +927,7 @@ static void neigh_periodic_work(struct work_struct *work)
(state == NUD_FAILED ||
time_after(jiffies, n->used + NEIGH_VAR(n->parms, GC_STALETIME)))) {
*np = n->next;
- n->dead = 1;
+ neigh_mark_dead(n);
write_unlock(&n->lock);
neigh_cleanup_and_release(n);
continue;
@@ -1137,9 +1210,11 @@ static void neigh_update_hhs(struct neighbour *neigh)
Caller MUST hold reference count on the entry.
*/
-int neigh_update(struct neighbour *neigh, const u8 *lladdr, u8 new,
- u32 flags, u32 nlmsg_pid)
+static int __neigh_update(struct neighbour *neigh, const u8 *lladdr,
+ u8 new, u32 flags, u32 nlmsg_pid,
+ struct netlink_ext_ack *extack)
{
+ bool ext_learn_change = false;
u8 old;
int err;
int notify = 0;
@@ -1155,10 +1230,12 @@ int neigh_update(struct neighbour *neigh, const u8 *lladdr, u8 new,
if (!(flags & NEIGH_UPDATE_F_ADMIN) &&
(old & (NUD_NOARP | NUD_PERMANENT)))
goto out;
- if (neigh->dead)
+ if (neigh->dead) {
+ NL_SET_ERR_MSG(extack, "Neighbor entry is now dead");
goto out;
+ }
- neigh_update_ext_learned(neigh, flags, &notify);
+ ext_learn_change = neigh_update_ext_learned(neigh, flags, &notify);
if (!(new & NUD_VALID)) {
neigh_del_timer(neigh);
@@ -1193,8 +1270,10 @@ int neigh_update(struct neighbour *neigh, const u8 *lladdr, u8 new,
use it, otherwise discard the request.
*/
err = -EINVAL;
- if (!(old & NUD_VALID))
+ if (!(old & NUD_VALID)) {
+ NL_SET_ERR_MSG(extack, "No link layer address given");
goto out;
+ }
lladdr = neigh->ha;
}
@@ -1302,11 +1381,20 @@ out:
neigh_update_is_router(neigh, flags, &notify);
write_unlock_bh(&neigh->lock);
+ if (((new ^ old) & NUD_PERMANENT) || ext_learn_change)
+ neigh_update_gc_list(neigh);
+
if (notify)
neigh_update_notify(neigh, nlmsg_pid);
return err;
}
+
+int neigh_update(struct neighbour *neigh, const u8 *lladdr, u8 new,
+ u32 flags, u32 nlmsg_pid)
+{
+ return __neigh_update(neigh, lladdr, new, flags, nlmsg_pid, NULL);
+}
EXPORT_SYMBOL(neigh_update);
/* Update the neigh to listen temporarily for probe responses, even if it is
@@ -1571,6 +1659,7 @@ void neigh_table_init(int index, struct neigh_table *tbl)
unsigned long phsize;
INIT_LIST_HEAD(&tbl->parms_list);
+ INIT_LIST_HEAD(&tbl->gc_list);
list_add(&tbl->parms.list, &tbl->parms_list);
write_pnet(&tbl->parms.net, &init_net);
refcount_set(&tbl->parms.refcnt, 1);
@@ -1678,8 +1767,10 @@ static int neigh_delete(struct sk_buff *skb, struct nlmsghdr *nlh,
goto out;
dst_attr = nlmsg_find_attr(nlh, sizeof(*ndm), NDA_DST);
- if (dst_attr == NULL)
+ if (!dst_attr) {
+ NL_SET_ERR_MSG(extack, "Network address not specified");
goto out;
+ }
ndm = nlmsg_data(nlh);
if (ndm->ndm_ifindex) {
@@ -1694,8 +1785,10 @@ static int neigh_delete(struct sk_buff *skb, struct nlmsghdr *nlh,
if (tbl == NULL)
return -EAFNOSUPPORT;
- if (nla_len(dst_attr) < (int)tbl->key_len)
+ if (nla_len(dst_attr) < (int)tbl->key_len) {
+ NL_SET_ERR_MSG(extack, "Invalid network address");
goto out;
+ }
if (ndm->ndm_flags & NTF_PROXY) {
err = pneigh_delete(tbl, net, nla_data(dst_attr), dev);
@@ -1711,10 +1804,9 @@ static int neigh_delete(struct sk_buff *skb, struct nlmsghdr *nlh,
goto out;
}
- err = neigh_update(neigh, NULL, NUD_FAILED,
- NEIGH_UPDATE_F_OVERRIDE |
- NEIGH_UPDATE_F_ADMIN,
- NETLINK_CB(skb).portid);
+ err = __neigh_update(neigh, NULL, NUD_FAILED,
+ NEIGH_UPDATE_F_OVERRIDE | NEIGH_UPDATE_F_ADMIN,
+ NETLINK_CB(skb).portid, extack);
write_lock_bh(&tbl->lock);
neigh_release(neigh);
neigh_remove_one(neigh, tbl);
@@ -1736,6 +1828,7 @@ static int neigh_add(struct sk_buff *skb, struct nlmsghdr *nlh,
struct net_device *dev = NULL;
struct neighbour *neigh;
void *dst, *lladdr;
+ u8 protocol = 0;
int err;
ASSERT_RTNL();
@@ -1744,8 +1837,10 @@ static int neigh_add(struct sk_buff *skb, struct nlmsghdr *nlh,
goto out;
err = -EINVAL;
- if (tb[NDA_DST] == NULL)
+ if (!tb[NDA_DST]) {
+ NL_SET_ERR_MSG(extack, "Network address not specified");
goto out;
+ }
ndm = nlmsg_data(nlh);
if (ndm->ndm_ifindex) {
@@ -1755,19 +1850,32 @@ static int neigh_add(struct sk_buff *skb, struct nlmsghdr *nlh,
goto out;
}
- if (tb[NDA_LLADDR] && nla_len(tb[NDA_LLADDR]) < dev->addr_len)
+ if (tb[NDA_LLADDR] && nla_len(tb[NDA_LLADDR]) < dev->addr_len) {
+ NL_SET_ERR_MSG(extack, "Invalid link address");
goto out;
+ }
}
tbl = neigh_find_table(ndm->ndm_family);
if (tbl == NULL)
return -EAFNOSUPPORT;
- if (nla_len(tb[NDA_DST]) < (int)tbl->key_len)
+ if (nla_len(tb[NDA_DST]) < (int)tbl->key_len) {
+ NL_SET_ERR_MSG(extack, "Invalid network address");
goto out;
+ }
+
dst = nla_data(tb[NDA_DST]);
lladdr = tb[NDA_LLADDR] ? nla_data(tb[NDA_LLADDR]) : NULL;
+ if (tb[NDA_PROTOCOL]) {
+ if (nla_len(tb[NDA_PROTOCOL]) != sizeof(u8)) {
+ NL_SET_ERR_MSG(extack, "Invalid protocol attribute");
+ goto out;
+ }
+ protocol = nla_get_u8(tb[NDA_PROTOCOL]);
+ }
+
if (ndm->ndm_flags & NTF_PROXY) {
struct pneigh_entry *pn;
@@ -1775,22 +1883,30 @@ static int neigh_add(struct sk_buff *skb, struct nlmsghdr *nlh,
pn = pneigh_lookup(tbl, net, dst, dev, 1);
if (pn) {
pn->flags = ndm->ndm_flags;
+ if (protocol)
+ pn->protocol = protocol;
err = 0;
}
goto out;
}
- if (dev == NULL)
+ if (!dev) {
+ NL_SET_ERR_MSG(extack, "Device not specified");
goto out;
+ }
neigh = neigh_lookup(tbl, dst, dev);
if (neigh == NULL) {
+ bool exempt_from_gc;
+
if (!(nlh->nlmsg_flags & NLM_F_CREATE)) {
err = -ENOENT;
goto out;
}
- neigh = __neigh_lookup_errno(tbl, dst, dev);
+ exempt_from_gc = ndm->ndm_state & NUD_PERMANENT ||
+ ndm->ndm_flags & NTF_EXT_LEARNED;
+ neigh = ___neigh_create(tbl, dst, dev, exempt_from_gc, true);
if (IS_ERR(neigh)) {
err = PTR_ERR(neigh);
goto out;
@@ -1817,8 +1933,12 @@ static int neigh_add(struct sk_buff *skb, struct nlmsghdr *nlh,
neigh_event_send(neigh, NULL);
err = 0;
} else
- err = neigh_update(neigh, lladdr, ndm->ndm_state, flags,
- NETLINK_CB(skb).portid);
+ err = __neigh_update(neigh, lladdr, ndm->ndm_state, flags,
+ NETLINK_CB(skb).portid, extack);
+
+ if (protocol)
+ neigh->protocol = protocol;
+
neigh_release(neigh);
out:
@@ -2312,6 +2432,9 @@ static int neigh_fill_info(struct sk_buff *skb, struct neighbour *neigh,
nla_put(skb, NDA_CACHEINFO, sizeof(ci), &ci))
goto nla_put_failure;
+ if (neigh->protocol && nla_put_u8(skb, NDA_PROTOCOL, neigh->protocol))
+ goto nla_put_failure;
+
nlmsg_end(skb, nlh);
return 0;
@@ -2343,6 +2466,9 @@ static int pneigh_fill_info(struct sk_buff *skb, struct pneigh_entry *pn,
if (nla_put(skb, NDA_DST, tbl->key_len, pn->key))
goto nla_put_failure;
+ if (pn->protocol && nla_put_u8(skb, NDA_PROTOCOL, pn->protocol))
+ goto nla_put_failure;
+
nlmsg_end(skb, nlh);
return 0;
@@ -2631,7 +2757,7 @@ void __neigh_for_each_release(struct neigh_table *tbl,
rcu_assign_pointer(*np,
rcu_dereference_protected(n->next,
lockdep_is_held(&tbl->lock)));
- n->dead = 1;
+ neigh_mark_dead(n);
} else
np = &n->next;
write_unlock(&n->lock);
@@ -2998,7 +3124,8 @@ static inline size_t neigh_nlmsg_size(void)
+ nla_total_size(MAX_ADDR_LEN) /* NDA_DST */
+ nla_total_size(MAX_ADDR_LEN) /* NDA_LLADDR */
+ nla_total_size(sizeof(struct nda_cacheinfo))
- + nla_total_size(4); /* NDA_PROBES */
+ + nla_total_size(4) /* NDA_PROBES */
+ + nla_total_size(1); /* NDA_PROTOCOL */
}
static void __neigh_notify(struct neighbour *n, int type, int flags,
diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c
index bd67c4d0fcfd..ff9fd2bb4ce4 100644
--- a/net/core/net-sysfs.c
+++ b/net/core/net-sysfs.c
@@ -337,7 +337,7 @@ NETDEVICE_SHOW_RW(mtu, fmt_dec);
static int change_flags(struct net_device *dev, unsigned long new_flags)
{
- return dev_change_flags(dev, (unsigned int)new_flags);
+ return dev_change_flags(dev, (unsigned int)new_flags, NULL);
}
static ssize_t flags_store(struct device *dev, struct device_attribute *attr,
diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c
index fefe72774aeb..05b23b285058 100644
--- a/net/core/net_namespace.c
+++ b/net/core/net_namespace.c
@@ -669,6 +669,7 @@ static const struct nla_policy rtnl_net_policy[NETNSA_MAX + 1] = {
[NETNSA_NSID] = { .type = NLA_S32 },
[NETNSA_PID] = { .type = NLA_U32 },
[NETNSA_FD] = { .type = NLA_U32 },
+ [NETNSA_TARGET_NSID] = { .type = NLA_S32 },
};
static int rtnl_net_newid(struct sk_buff *skb, struct nlmsghdr *nlh,
@@ -735,23 +736,38 @@ static int rtnl_net_get_size(void)
{
return NLMSG_ALIGN(sizeof(struct rtgenmsg))
+ nla_total_size(sizeof(s32)) /* NETNSA_NSID */
+ + nla_total_size(sizeof(s32)) /* NETNSA_CURRENT_NSID */
;
}
-static int rtnl_net_fill(struct sk_buff *skb, u32 portid, u32 seq, int flags,
- int cmd, struct net *net, int nsid)
+struct net_fill_args {
+ u32 portid;
+ u32 seq;
+ int flags;
+ int cmd;
+ int nsid;
+ bool add_ref;
+ int ref_nsid;
+};
+
+static int rtnl_net_fill(struct sk_buff *skb, struct net_fill_args *args)
{
struct nlmsghdr *nlh;
struct rtgenmsg *rth;
- nlh = nlmsg_put(skb, portid, seq, cmd, sizeof(*rth), flags);
+ nlh = nlmsg_put(skb, args->portid, args->seq, args->cmd, sizeof(*rth),
+ args->flags);
if (!nlh)
return -EMSGSIZE;
rth = nlmsg_data(nlh);
rth->rtgen_family = AF_UNSPEC;
- if (nla_put_s32(skb, NETNSA_NSID, nsid))
+ if (nla_put_s32(skb, NETNSA_NSID, args->nsid))
+ goto nla_put_failure;
+
+ if (args->add_ref &&
+ nla_put_s32(skb, NETNSA_CURRENT_NSID, args->ref_nsid))
goto nla_put_failure;
nlmsg_end(skb, nlh);
@@ -767,10 +783,15 @@ static int rtnl_net_getid(struct sk_buff *skb, struct nlmsghdr *nlh,
{
struct net *net = sock_net(skb->sk);
struct nlattr *tb[NETNSA_MAX + 1];
+ struct net_fill_args fillargs = {
+ .portid = NETLINK_CB(skb).portid,
+ .seq = nlh->nlmsg_seq,
+ .cmd = RTM_NEWNSID,
+ };
+ struct net *peer, *target = net;
struct nlattr *nla;
struct sk_buff *msg;
- struct net *peer;
- int err, id;
+ int err;
err = nlmsg_parse(nlh, sizeof(struct rtgenmsg), tb, NETNSA_MAX,
rtnl_net_policy, extack);
@@ -782,6 +803,11 @@ static int rtnl_net_getid(struct sk_buff *skb, struct nlmsghdr *nlh,
} else if (tb[NETNSA_FD]) {
peer = get_net_ns_by_fd(nla_get_u32(tb[NETNSA_FD]));
nla = tb[NETNSA_FD];
+ } else if (tb[NETNSA_NSID]) {
+ peer = get_net_ns_by_id(net, nla_get_u32(tb[NETNSA_NSID]));
+ if (!peer)
+ peer = ERR_PTR(-ENOENT);
+ nla = tb[NETNSA_NSID];
} else {
NL_SET_ERR_MSG(extack, "Peer netns reference is missing");
return -EINVAL;
@@ -793,15 +819,29 @@ static int rtnl_net_getid(struct sk_buff *skb, struct nlmsghdr *nlh,
return PTR_ERR(peer);
}
+ if (tb[NETNSA_TARGET_NSID]) {
+ int id = nla_get_s32(tb[NETNSA_TARGET_NSID]);
+
+ target = rtnl_get_net_ns_capable(NETLINK_CB(skb).sk, id);
+ if (IS_ERR(target)) {
+ NL_SET_BAD_ATTR(extack, tb[NETNSA_TARGET_NSID]);
+ NL_SET_ERR_MSG(extack,
+ "Target netns reference is invalid");
+ err = PTR_ERR(target);
+ goto out;
+ }
+ fillargs.add_ref = true;
+ fillargs.ref_nsid = peernet2id(net, peer);
+ }
+
msg = nlmsg_new(rtnl_net_get_size(), GFP_KERNEL);
if (!msg) {
err = -ENOMEM;
goto out;
}
- id = peernet2id(net, peer);
- err = rtnl_net_fill(msg, NETLINK_CB(skb).portid, nlh->nlmsg_seq, 0,
- RTM_NEWNSID, net, id);
+ fillargs.nsid = peernet2id(target, peer);
+ err = rtnl_net_fill(msg, &fillargs);
if (err < 0)
goto err_out;
@@ -811,14 +851,17 @@ static int rtnl_net_getid(struct sk_buff *skb, struct nlmsghdr *nlh,
err_out:
nlmsg_free(msg);
out:
+ if (fillargs.add_ref)
+ put_net(target);
put_net(peer);
return err;
}
struct rtnl_net_dump_cb {
- struct net *net;
+ struct net *tgt_net;
+ struct net *ref_net;
struct sk_buff *skb;
- struct netlink_callback *cb;
+ struct net_fill_args fillargs;
int idx;
int s_idx;
};
@@ -831,9 +874,10 @@ static int rtnl_net_dumpid_one(int id, void *peer, void *data)
if (net_cb->idx < net_cb->s_idx)
goto cont;
- ret = rtnl_net_fill(net_cb->skb, NETLINK_CB(net_cb->cb->skb).portid,
- net_cb->cb->nlh->nlmsg_seq, NLM_F_MULTI,
- RTM_NEWNSID, net_cb->net, id);
+ net_cb->fillargs.nsid = id;
+ if (net_cb->fillargs.add_ref)
+ net_cb->fillargs.ref_nsid = __peernet2id(net_cb->ref_net, peer);
+ ret = rtnl_net_fill(net_cb->skb, &net_cb->fillargs);
if (ret < 0)
return ret;
@@ -842,33 +886,96 @@ cont:
return 0;
}
+static int rtnl_valid_dump_net_req(const struct nlmsghdr *nlh, struct sock *sk,
+ struct rtnl_net_dump_cb *net_cb,
+ struct netlink_callback *cb)
+{
+ struct netlink_ext_ack *extack = cb->extack;
+ struct nlattr *tb[NETNSA_MAX + 1];
+ int err, i;
+
+ err = nlmsg_parse_strict(nlh, sizeof(struct rtgenmsg), tb, NETNSA_MAX,
+ rtnl_net_policy, extack);
+ if (err < 0)
+ return err;
+
+ for (i = 0; i <= NETNSA_MAX; i++) {
+ if (!tb[i])
+ continue;
+
+ if (i == NETNSA_TARGET_NSID) {
+ struct net *net;
+
+ net = rtnl_get_net_ns_capable(sk, nla_get_s32(tb[i]));
+ if (IS_ERR(net)) {
+ NL_SET_BAD_ATTR(extack, tb[i]);
+ NL_SET_ERR_MSG(extack,
+ "Invalid target network namespace id");
+ return PTR_ERR(net);
+ }
+ net_cb->fillargs.add_ref = true;
+ net_cb->ref_net = net_cb->tgt_net;
+ net_cb->tgt_net = net;
+ } else {
+ NL_SET_BAD_ATTR(extack, tb[i]);
+ NL_SET_ERR_MSG(extack,
+ "Unsupported attribute in dump request");
+ return -EINVAL;
+ }
+ }
+
+ return 0;
+}
+
static int rtnl_net_dumpid(struct sk_buff *skb, struct netlink_callback *cb)
{
- struct net *net = sock_net(skb->sk);
struct rtnl_net_dump_cb net_cb = {
- .net = net,
+ .tgt_net = sock_net(skb->sk),
.skb = skb,
- .cb = cb,
+ .fillargs = {
+ .portid = NETLINK_CB(cb->skb).portid,
+ .seq = cb->nlh->nlmsg_seq,
+ .flags = NLM_F_MULTI,
+ .cmd = RTM_NEWNSID,
+ },
.idx = 0,
.s_idx = cb->args[0],
};
+ int err = 0;
- if (cb->strict_check &&
- nlmsg_attrlen(cb->nlh, sizeof(struct rtgenmsg))) {
- NL_SET_ERR_MSG(cb->extack, "Unknown data in network namespace id dump request");
- return -EINVAL;
+ if (cb->strict_check) {
+ err = rtnl_valid_dump_net_req(cb->nlh, skb->sk, &net_cb, cb);
+ if (err < 0)
+ goto end;
}
- spin_lock_bh(&net->nsid_lock);
- idr_for_each(&net->netns_ids, rtnl_net_dumpid_one, &net_cb);
- spin_unlock_bh(&net->nsid_lock);
+ spin_lock_bh(&net_cb.tgt_net->nsid_lock);
+ if (net_cb.fillargs.add_ref &&
+ !net_eq(net_cb.ref_net, net_cb.tgt_net) &&
+ !spin_trylock_bh(&net_cb.ref_net->nsid_lock)) {
+ spin_unlock_bh(&net_cb.tgt_net->nsid_lock);
+ err = -EAGAIN;
+ goto end;
+ }
+ idr_for_each(&net_cb.tgt_net->netns_ids, rtnl_net_dumpid_one, &net_cb);
+ if (net_cb.fillargs.add_ref &&
+ !net_eq(net_cb.ref_net, net_cb.tgt_net))
+ spin_unlock_bh(&net_cb.ref_net->nsid_lock);
+ spin_unlock_bh(&net_cb.tgt_net->nsid_lock);
cb->args[0] = net_cb.idx;
- return skb->len;
+end:
+ if (net_cb.fillargs.add_ref)
+ put_net(net_cb.tgt_net);
+ return err < 0 ? err : skb->len;
}
static void rtnl_net_notifyid(struct net *net, int cmd, int id)
{
+ struct net_fill_args fillargs = {
+ .cmd = cmd,
+ .nsid = id,
+ };
struct sk_buff *msg;
int err = -ENOMEM;
@@ -876,7 +983,7 @@ static void rtnl_net_notifyid(struct net *net, int cmd, int id)
if (!msg)
goto out;
- err = rtnl_net_fill(msg, 0, 0, 0, cmd, net, id);
+ err = rtnl_net_fill(msg, &fillargs);
if (err < 0)
goto err_out;
diff --git a/net/core/netpoll.c b/net/core/netpoll.c
index 2b9fdbc43205..36a2b63ffd6d 100644
--- a/net/core/netpoll.c
+++ b/net/core/netpoll.c
@@ -663,7 +663,7 @@ int netpoll_setup(struct netpoll *np)
np_info(np, "device %s not up yet, forcing it\n", np->dev_name);
- err = dev_open(ndev);
+ err = dev_open(ndev, NULL);
if (err) {
np_err(np, "failed to open %s\n", ndev->name);
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 86f2d9cbdae3..baf2685b4da2 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -59,7 +59,7 @@
#include <net/rtnetlink.h>
#include <net/net_namespace.h>
-#define RTNL_MAX_TYPE 49
+#define RTNL_MAX_TYPE 50
#define RTNL_SLAVE_MAX_TYPE 36
struct rtnl_link {
@@ -2444,7 +2444,7 @@ static int do_setlink(const struct sk_buff *skb,
sa->sa_family = dev->type;
memcpy(sa->sa_data, nla_data(tb[IFLA_ADDRESS]),
dev->addr_len);
- err = dev_set_mac_address(dev, sa);
+ err = dev_set_mac_address(dev, sa, extack);
kfree(sa);
if (err)
goto errout;
@@ -2489,7 +2489,8 @@ static int do_setlink(const struct sk_buff *skb,
}
if (ifm->ifi_flags || ifm->ifi_change) {
- err = dev_change_flags(dev, rtnl_dev_combine_flags(dev, ifm));
+ err = dev_change_flags(dev, rtnl_dev_combine_flags(dev, ifm),
+ extack);
if (err < 0)
goto errout;
}
@@ -2870,7 +2871,8 @@ int rtnl_configure_link(struct net_device *dev, const struct ifinfomsg *ifm)
old_flags = dev->flags;
if (ifm && (ifm->ifi_flags || ifm->ifi_change)) {
- err = __dev_change_flags(dev, rtnl_dev_combine_flags(dev, ifm));
+ err = __dev_change_flags(dev, rtnl_dev_combine_flags(dev, ifm),
+ NULL);
if (err < 0)
return err;
}
@@ -2971,20 +2973,24 @@ static int rtnl_group_changelink(const struct sk_buff *skb,
return 0;
}
-static int rtnl_newlink(struct sk_buff *skb, struct nlmsghdr *nlh,
- struct netlink_ext_ack *extack)
+static int __rtnl_newlink(struct sk_buff *skb, struct nlmsghdr *nlh,
+ struct nlattr **attr, struct netlink_ext_ack *extack)
{
+ struct nlattr *slave_attr[RTNL_SLAVE_MAX_TYPE + 1];
+ unsigned char name_assign_type = NET_NAME_USER;
+ struct nlattr *linkinfo[IFLA_INFO_MAX + 1];
+ const struct rtnl_link_ops *m_ops = NULL;
+ struct net_device *master_dev = NULL;
struct net *net = sock_net(skb->sk);
const struct rtnl_link_ops *ops;
- const struct rtnl_link_ops *m_ops = NULL;
+ struct nlattr *tb[IFLA_MAX + 1];
+ struct net *dest_net, *link_net;
+ struct nlattr **slave_data;
+ char kind[MODULE_NAME_LEN];
struct net_device *dev;
- struct net_device *master_dev = NULL;
struct ifinfomsg *ifm;
- char kind[MODULE_NAME_LEN];
char ifname[IFNAMSIZ];
- struct nlattr *tb[IFLA_MAX+1];
- struct nlattr *linkinfo[IFLA_INFO_MAX+1];
- unsigned char name_assign_type = NET_NAME_USER;
+ struct nlattr **data;
int err;
#ifdef CONFIG_MODULES
@@ -3040,195 +3046,200 @@ replay:
ops = NULL;
}
- if (1) {
- struct nlattr *attr[RTNL_MAX_TYPE + 1];
- struct nlattr *slave_attr[RTNL_SLAVE_MAX_TYPE + 1];
- struct nlattr **data = NULL;
- struct nlattr **slave_data = NULL;
- struct net *dest_net, *link_net = NULL;
-
- if (ops) {
- if (ops->maxtype > RTNL_MAX_TYPE)
- return -EINVAL;
+ data = NULL;
+ if (ops) {
+ if (ops->maxtype > RTNL_MAX_TYPE)
+ return -EINVAL;
- if (ops->maxtype && linkinfo[IFLA_INFO_DATA]) {
- err = nla_parse_nested(attr, ops->maxtype,
- linkinfo[IFLA_INFO_DATA],
- ops->policy, extack);
- if (err < 0)
- return err;
- data = attr;
- }
- if (ops->validate) {
- err = ops->validate(tb, data, extack);
- if (err < 0)
- return err;
- }
+ if (ops->maxtype && linkinfo[IFLA_INFO_DATA]) {
+ err = nla_parse_nested(attr, ops->maxtype,
+ linkinfo[IFLA_INFO_DATA],
+ ops->policy, extack);
+ if (err < 0)
+ return err;
+ data = attr;
+ }
+ if (ops->validate) {
+ err = ops->validate(tb, data, extack);
+ if (err < 0)
+ return err;
}
+ }
- if (m_ops) {
- if (m_ops->slave_maxtype > RTNL_SLAVE_MAX_TYPE)
- return -EINVAL;
+ slave_data = NULL;
+ if (m_ops) {
+ if (m_ops->slave_maxtype > RTNL_SLAVE_MAX_TYPE)
+ return -EINVAL;
- if (m_ops->slave_maxtype &&
- linkinfo[IFLA_INFO_SLAVE_DATA]) {
- err = nla_parse_nested(slave_attr,
- m_ops->slave_maxtype,
- linkinfo[IFLA_INFO_SLAVE_DATA],
- m_ops->slave_policy,
- extack);
- if (err < 0)
- return err;
- slave_data = slave_attr;
- }
+ if (m_ops->slave_maxtype &&
+ linkinfo[IFLA_INFO_SLAVE_DATA]) {
+ err = nla_parse_nested(slave_attr, m_ops->slave_maxtype,
+ linkinfo[IFLA_INFO_SLAVE_DATA],
+ m_ops->slave_policy, extack);
+ if (err < 0)
+ return err;
+ slave_data = slave_attr;
}
+ }
- if (dev) {
- int status = 0;
-
- if (nlh->nlmsg_flags & NLM_F_EXCL)
- return -EEXIST;
- if (nlh->nlmsg_flags & NLM_F_REPLACE)
- return -EOPNOTSUPP;
+ if (dev) {
+ int status = 0;
- if (linkinfo[IFLA_INFO_DATA]) {
- if (!ops || ops != dev->rtnl_link_ops ||
- !ops->changelink)
- return -EOPNOTSUPP;
+ if (nlh->nlmsg_flags & NLM_F_EXCL)
+ return -EEXIST;
+ if (nlh->nlmsg_flags & NLM_F_REPLACE)
+ return -EOPNOTSUPP;
- err = ops->changelink(dev, tb, data, extack);
- if (err < 0)
- return err;
- status |= DO_SETLINK_NOTIFY;
- }
+ if (linkinfo[IFLA_INFO_DATA]) {
+ if (!ops || ops != dev->rtnl_link_ops ||
+ !ops->changelink)
+ return -EOPNOTSUPP;
- if (linkinfo[IFLA_INFO_SLAVE_DATA]) {
- if (!m_ops || !m_ops->slave_changelink)
- return -EOPNOTSUPP;
+ err = ops->changelink(dev, tb, data, extack);
+ if (err < 0)
+ return err;
+ status |= DO_SETLINK_NOTIFY;
+ }
- err = m_ops->slave_changelink(master_dev, dev,
- tb, slave_data,
- extack);
- if (err < 0)
- return err;
- status |= DO_SETLINK_NOTIFY;
- }
+ if (linkinfo[IFLA_INFO_SLAVE_DATA]) {
+ if (!m_ops || !m_ops->slave_changelink)
+ return -EOPNOTSUPP;
- return do_setlink(skb, dev, ifm, extack, tb, ifname,
- status);
+ err = m_ops->slave_changelink(master_dev, dev, tb,
+ slave_data, extack);
+ if (err < 0)
+ return err;
+ status |= DO_SETLINK_NOTIFY;
}
- if (!(nlh->nlmsg_flags & NLM_F_CREATE)) {
- if (ifm->ifi_index == 0 && tb[IFLA_GROUP])
- return rtnl_group_changelink(skb, net,
+ return do_setlink(skb, dev, ifm, extack, tb, ifname, status);
+ }
+
+ if (!(nlh->nlmsg_flags & NLM_F_CREATE)) {
+ if (ifm->ifi_index == 0 && tb[IFLA_GROUP])
+ return rtnl_group_changelink(skb, net,
nla_get_u32(tb[IFLA_GROUP]),
ifm, extack, tb);
- return -ENODEV;
- }
+ return -ENODEV;
+ }
- if (tb[IFLA_MAP] || tb[IFLA_PROTINFO])
- return -EOPNOTSUPP;
+ if (tb[IFLA_MAP] || tb[IFLA_PROTINFO])
+ return -EOPNOTSUPP;
- if (!ops) {
+ if (!ops) {
#ifdef CONFIG_MODULES
- if (kind[0]) {
- __rtnl_unlock();
- request_module("rtnl-link-%s", kind);
- rtnl_lock();
- ops = rtnl_link_ops_get(kind);
- if (ops)
- goto replay;
- }
-#endif
- NL_SET_ERR_MSG(extack, "Unknown device type");
- return -EOPNOTSUPP;
+ if (kind[0]) {
+ __rtnl_unlock();
+ request_module("rtnl-link-%s", kind);
+ rtnl_lock();
+ ops = rtnl_link_ops_get(kind);
+ if (ops)
+ goto replay;
}
+#endif
+ NL_SET_ERR_MSG(extack, "Unknown device type");
+ return -EOPNOTSUPP;
+ }
- if (!ops->setup)
- return -EOPNOTSUPP;
-
- if (!ifname[0]) {
- snprintf(ifname, IFNAMSIZ, "%s%%d", ops->kind);
- name_assign_type = NET_NAME_ENUM;
- }
+ if (!ops->setup)
+ return -EOPNOTSUPP;
- dest_net = rtnl_link_get_net_capable(skb, net, tb, CAP_NET_ADMIN);
- if (IS_ERR(dest_net))
- return PTR_ERR(dest_net);
+ if (!ifname[0]) {
+ snprintf(ifname, IFNAMSIZ, "%s%%d", ops->kind);
+ name_assign_type = NET_NAME_ENUM;
+ }
- if (tb[IFLA_LINK_NETNSID]) {
- int id = nla_get_s32(tb[IFLA_LINK_NETNSID]);
+ dest_net = rtnl_link_get_net_capable(skb, net, tb, CAP_NET_ADMIN);
+ if (IS_ERR(dest_net))
+ return PTR_ERR(dest_net);
- link_net = get_net_ns_by_id(dest_net, id);
- if (!link_net) {
- NL_SET_ERR_MSG(extack, "Unknown network namespace id");
- err = -EINVAL;
- goto out;
- }
- err = -EPERM;
- if (!netlink_ns_capable(skb, link_net->user_ns, CAP_NET_ADMIN))
- goto out;
- }
+ if (tb[IFLA_LINK_NETNSID]) {
+ int id = nla_get_s32(tb[IFLA_LINK_NETNSID]);
- dev = rtnl_create_link(link_net ? : dest_net, ifname,
- name_assign_type, ops, tb, extack);
- if (IS_ERR(dev)) {
- err = PTR_ERR(dev);
+ link_net = get_net_ns_by_id(dest_net, id);
+ if (!link_net) {
+ NL_SET_ERR_MSG(extack, "Unknown network namespace id");
+ err = -EINVAL;
goto out;
}
+ err = -EPERM;
+ if (!netlink_ns_capable(skb, link_net->user_ns, CAP_NET_ADMIN))
+ goto out;
+ } else {
+ link_net = NULL;
+ }
- dev->ifindex = ifm->ifi_index;
+ dev = rtnl_create_link(link_net ? : dest_net, ifname,
+ name_assign_type, ops, tb, extack);
+ if (IS_ERR(dev)) {
+ err = PTR_ERR(dev);
+ goto out;
+ }
- if (ops->newlink) {
- err = ops->newlink(link_net ? : net, dev, tb, data,
- extack);
- /* Drivers should call free_netdev() in ->destructor
- * and unregister it on failure after registration
- * so that device could be finally freed in rtnl_unlock.
- */
- if (err < 0) {
- /* If device is not registered at all, free it now */
- if (dev->reg_state == NETREG_UNINITIALIZED)
- free_netdev(dev);
- goto out;
- }
- } else {
- err = register_netdevice(dev);
- if (err < 0) {
+ dev->ifindex = ifm->ifi_index;
+
+ if (ops->newlink) {
+ err = ops->newlink(link_net ? : net, dev, tb, data, extack);
+ /* Drivers should call free_netdev() in ->destructor
+ * and unregister it on failure after registration
+ * so that device could be finally freed in rtnl_unlock.
+ */
+ if (err < 0) {
+ /* If device is not registered at all, free it now */
+ if (dev->reg_state == NETREG_UNINITIALIZED)
free_netdev(dev);
- goto out;
- }
+ goto out;
+ }
+ } else {
+ err = register_netdevice(dev);
+ if (err < 0) {
+ free_netdev(dev);
+ goto out;
}
- err = rtnl_configure_link(dev, ifm);
+ }
+ err = rtnl_configure_link(dev, ifm);
+ if (err < 0)
+ goto out_unregister;
+ if (link_net) {
+ err = dev_change_net_namespace(dev, dest_net, ifname);
if (err < 0)
goto out_unregister;
- if (link_net) {
- err = dev_change_net_namespace(dev, dest_net, ifname);
- if (err < 0)
- goto out_unregister;
- }
- if (tb[IFLA_MASTER]) {
- err = do_set_master(dev, nla_get_u32(tb[IFLA_MASTER]),
- extack);
- if (err)
- goto out_unregister;
- }
+ }
+ if (tb[IFLA_MASTER]) {
+ err = do_set_master(dev, nla_get_u32(tb[IFLA_MASTER]), extack);
+ if (err)
+ goto out_unregister;
+ }
out:
- if (link_net)
- put_net(link_net);
- put_net(dest_net);
- return err;
+ if (link_net)
+ put_net(link_net);
+ put_net(dest_net);
+ return err;
out_unregister:
- if (ops->newlink) {
- LIST_HEAD(list_kill);
+ if (ops->newlink) {
+ LIST_HEAD(list_kill);
- ops->dellink(dev, &list_kill);
- unregister_netdevice_many(&list_kill);
- } else {
- unregister_netdevice(dev);
- }
- goto out;
+ ops->dellink(dev, &list_kill);
+ unregister_netdevice_many(&list_kill);
+ } else {
+ unregister_netdevice(dev);
}
+ goto out;
+}
+
+static int rtnl_newlink(struct sk_buff *skb, struct nlmsghdr *nlh,
+ struct netlink_ext_ack *extack)
+{
+ struct nlattr **attr;
+ int ret;
+
+ attr = kmalloc_array(RTNL_MAX_TYPE + 1, sizeof(*attr), GFP_KERNEL);
+ if (!attr)
+ return -ENOMEM;
+
+ ret = __rtnl_newlink(skb, nlh, attr, extack);
+ kfree(attr);
+ return ret;
}
static int rtnl_getlink(struct sk_buff *skb, struct nlmsghdr *nlh,
@@ -3449,6 +3460,18 @@ void rtmsg_ifinfo_newnet(int type, struct net_device *dev, unsigned int change,
new_nsid, new_ifindex);
}
+static const struct nla_policy nda_policy[NDA_MAX+1] = {
+ [NDA_DST] = { .type = NLA_BINARY, .len = MAX_ADDR_LEN },
+ [NDA_LLADDR] = { .type = NLA_BINARY, .len = MAX_ADDR_LEN },
+ [NDA_CACHEINFO] = { .len = sizeof(struct nda_cacheinfo) },
+ [NDA_PROBES] = { .type = NLA_U32 },
+ [NDA_VLAN] = { .type = NLA_U16 },
+ [NDA_PORT] = { .type = NLA_U16 },
+ [NDA_VNI] = { .type = NLA_U32 },
+ [NDA_IFINDEX] = { .type = NLA_U32 },
+ [NDA_MASTER] = { .type = NLA_U32 },
+};
+
static int nlmsg_populate_fdb_fill(struct sk_buff *skb,
struct net_device *dev,
u8 *addr, u16 vid, u32 pid, u32 seq,
@@ -3808,6 +3831,9 @@ int ndo_dflt_fdb_dump(struct sk_buff *skb,
{
int err;
+ if (dev->type != ARPHRD_ETHER)
+ return -EINVAL;
+
netif_addr_lock_bh(dev);
err = nlmsg_populate_fdb(skb, cb, dev, idx, &dev->uc);
if (err)
@@ -4007,6 +4033,160 @@ out:
return skb->len;
}
+static int valid_fdb_get_strict(const struct nlmsghdr *nlh,
+ struct nlattr **tb, u8 *ndm_flags,
+ int *br_idx, int *brport_idx, u8 **addr,
+ u16 *vid, struct netlink_ext_ack *extack)
+{
+ struct ndmsg *ndm;
+ int err, i;
+
+ if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*ndm))) {
+ NL_SET_ERR_MSG(extack, "Invalid header for fdb get request");
+ return -EINVAL;
+ }
+
+ ndm = nlmsg_data(nlh);
+ if (ndm->ndm_pad1 || ndm->ndm_pad2 || ndm->ndm_state ||
+ ndm->ndm_type) {
+ NL_SET_ERR_MSG(extack, "Invalid values in header for fdb get request");
+ return -EINVAL;
+ }
+
+ if (ndm->ndm_flags & ~(NTF_MASTER | NTF_SELF)) {
+ NL_SET_ERR_MSG(extack, "Invalid flags in header for fdb get request");
+ return -EINVAL;
+ }
+
+ err = nlmsg_parse_strict(nlh, sizeof(struct ndmsg), tb, NDA_MAX,
+ nda_policy, extack);
+ if (err < 0)
+ return err;
+
+ *ndm_flags = ndm->ndm_flags;
+ *brport_idx = ndm->ndm_ifindex;
+ for (i = 0; i <= NDA_MAX; ++i) {
+ if (!tb[i])
+ continue;
+
+ switch (i) {
+ case NDA_MASTER:
+ *br_idx = nla_get_u32(tb[i]);
+ break;
+ case NDA_LLADDR:
+ if (nla_len(tb[i]) != ETH_ALEN) {
+ NL_SET_ERR_MSG(extack, "Invalid address in fdb get request");
+ return -EINVAL;
+ }
+ *addr = nla_data(tb[i]);
+ break;
+ case NDA_VLAN:
+ err = fdb_vid_parse(tb[i], vid, extack);
+ if (err)
+ return err;
+ break;
+ case NDA_VNI:
+ break;
+ default:
+ NL_SET_ERR_MSG(extack, "Unsupported attribute in fdb get request");
+ return -EINVAL;
+ }
+ }
+
+ return 0;
+}
+
+static int rtnl_fdb_get(struct sk_buff *in_skb, struct nlmsghdr *nlh,
+ struct netlink_ext_ack *extack)
+{
+ struct net_device *dev = NULL, *br_dev = NULL;
+ const struct net_device_ops *ops = NULL;
+ struct net *net = sock_net(in_skb->sk);
+ struct nlattr *tb[NDA_MAX + 1];
+ struct sk_buff *skb;
+ int brport_idx = 0;
+ u8 ndm_flags = 0;
+ int br_idx = 0;
+ u8 *addr = NULL;
+ u16 vid = 0;
+ int err;
+
+ err = valid_fdb_get_strict(nlh, tb, &ndm_flags, &br_idx,
+ &brport_idx, &addr, &vid, extack);
+ if (err < 0)
+ return err;
+
+ if (brport_idx) {
+ dev = __dev_get_by_index(net, brport_idx);
+ if (!dev) {
+ NL_SET_ERR_MSG(extack, "Unknown device ifindex");
+ return -ENODEV;
+ }
+ }
+
+ if (br_idx) {
+ if (dev) {
+ NL_SET_ERR_MSG(extack, "Master and device are mutually exclusive");
+ return -EINVAL;
+ }
+
+ br_dev = __dev_get_by_index(net, br_idx);
+ if (!br_dev) {
+ NL_SET_ERR_MSG(extack, "Invalid master ifindex");
+ return -EINVAL;
+ }
+ ops = br_dev->netdev_ops;
+ }
+
+ if (dev) {
+ if (!ndm_flags || (ndm_flags & NTF_MASTER)) {
+ if (!(dev->priv_flags & IFF_BRIDGE_PORT)) {
+ NL_SET_ERR_MSG(extack, "Device is not a bridge port");
+ return -EINVAL;
+ }
+ br_dev = netdev_master_upper_dev_get(dev);
+ if (!br_dev) {
+ NL_SET_ERR_MSG(extack, "Master of device not found");
+ return -EINVAL;
+ }
+ ops = br_dev->netdev_ops;
+ } else {
+ if (!(ndm_flags & NTF_SELF)) {
+ NL_SET_ERR_MSG(extack, "Missing NTF_SELF");
+ return -EINVAL;
+ }
+ ops = dev->netdev_ops;
+ }
+ }
+
+ if (!br_dev && !dev) {
+ NL_SET_ERR_MSG(extack, "No device specified");
+ return -ENODEV;
+ }
+
+ if (!ops || !ops->ndo_fdb_get) {
+ NL_SET_ERR_MSG(extack, "Fdb get operation not supported by device");
+ return -EOPNOTSUPP;
+ }
+
+ skb = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL);
+ if (!skb)
+ return -ENOBUFS;
+
+ if (br_dev)
+ dev = br_dev;
+ err = ops->ndo_fdb_get(skb, tb, dev, addr, vid,
+ NETLINK_CB(in_skb).portid,
+ nlh->nlmsg_seq, extack);
+ if (err)
+ goto out;
+
+ return rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
+out:
+ kfree_skb(skb);
+ return err;
+}
+
static int brport_nla_put_flag(struct sk_buff *skb, u32 flags, u32 mask,
unsigned int attrnum, unsigned int flag)
{
@@ -4318,7 +4498,8 @@ static int rtnl_bridge_setlink(struct sk_buff *skb, struct nlmsghdr *nlh,
goto out;
}
- err = br_dev->netdev_ops->ndo_bridge_setlink(dev, nlh, flags);
+ err = br_dev->netdev_ops->ndo_bridge_setlink(dev, nlh, flags,
+ extack);
if (err)
goto out;
@@ -4330,7 +4511,8 @@ static int rtnl_bridge_setlink(struct sk_buff *skb, struct nlmsghdr *nlh,
err = -EOPNOTSUPP;
else
err = dev->netdev_ops->ndo_bridge_setlink(dev, nlh,
- flags);
+ flags,
+ extack);
if (!err) {
flags &= ~BRIDGE_FLAGS_SELF;
@@ -5065,7 +5247,7 @@ void __init rtnetlink_init(void)
rtnl_register(PF_BRIDGE, RTM_NEWNEIGH, rtnl_fdb_add, NULL, 0);
rtnl_register(PF_BRIDGE, RTM_DELNEIGH, rtnl_fdb_del, NULL, 0);
- rtnl_register(PF_BRIDGE, RTM_GETNEIGH, NULL, rtnl_fdb_dump, 0);
+ rtnl_register(PF_BRIDGE, RTM_GETNEIGH, rtnl_fdb_get, rtnl_fdb_dump, 0);
rtnl_register(PF_BRIDGE, RTM_GETLINK, NULL, rtnl_bridge_getlink, 0);
rtnl_register(PF_BRIDGE, RTM_DELLINK, rtnl_bridge_dellink, NULL, 0);
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index b4ee5c8b928f..40552547c69a 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -1089,7 +1089,7 @@ void sock_zerocopy_put(struct ubuf_info *uarg)
}
EXPORT_SYMBOL_GPL(sock_zerocopy_put);
-void sock_zerocopy_put_abort(struct ubuf_info *uarg)
+void sock_zerocopy_put_abort(struct ubuf_info *uarg, bool have_uref)
{
if (uarg) {
struct sock *sk = skb_from_uarg(uarg)->sk;
@@ -1097,7 +1097,8 @@ void sock_zerocopy_put_abort(struct ubuf_info *uarg)
atomic_dec(&sk->sk_zckey);
uarg->len--;
- sock_zerocopy_put(uarg);
+ if (have_uref)
+ sock_zerocopy_put(uarg);
}
}
EXPORT_SYMBOL_GPL(sock_zerocopy_put_abort);
@@ -1105,6 +1106,12 @@ EXPORT_SYMBOL_GPL(sock_zerocopy_put_abort);
extern int __zerocopy_sg_from_iter(struct sock *sk, struct sk_buff *skb,
struct iov_iter *from, size_t length);
+int skb_zerocopy_iter_dgram(struct sk_buff *skb, struct msghdr *msg, int len)
+{
+ return __zerocopy_sg_from_iter(skb->sk, skb, &msg->msg_iter, len);
+}
+EXPORT_SYMBOL_GPL(skb_zerocopy_iter_dgram);
+
int skb_zerocopy_iter_stream(struct sock *sk, struct sk_buff *skb,
struct msghdr *msg, int len,
struct ubuf_info *uarg)
@@ -1131,7 +1138,7 @@ int skb_zerocopy_iter_stream(struct sock *sk, struct sk_buff *skb,
return err;
}
- skb_zcopy_set(skb, uarg);
+ skb_zcopy_set(skb, uarg, NULL);
return skb->len - orig_len;
}
EXPORT_SYMBOL_GPL(skb_zerocopy_iter_stream);
@@ -1151,7 +1158,7 @@ static int skb_zerocopy_clone(struct sk_buff *nskb, struct sk_buff *orig,
if (skb_copy_ubufs(nskb, GFP_ATOMIC))
return -EIO;
}
- skb_zcopy_set(nskb, skb_uarg(orig));
+ skb_zcopy_set(nskb, skb_uarg(orig), NULL);
}
return 0;
}
@@ -1925,8 +1932,6 @@ void *__pskb_pull_tail(struct sk_buff *skb, int delta)
struct sk_buff *insp = NULL;
do {
- BUG_ON(!list);
-
if (list->len <= eat) {
/* Eaten as whole. */
eat -= list->len;
@@ -2366,19 +2371,6 @@ error:
}
EXPORT_SYMBOL_GPL(skb_send_sock_locked);
-/* Send skb data on a socket. */
-int skb_send_sock(struct sock *sk, struct sk_buff *skb, int offset, int len)
-{
- int ret = 0;
-
- lock_sock(sk);
- ret = skb_send_sock_locked(sk, skb, offset, len);
- release_sock(sk);
-
- return ret;
-}
-EXPORT_SYMBOL_GPL(skb_send_sock);
-
/**
* skb_store_bits - store bits from kernel buffer to skb
* @skb: destination buffer
@@ -2645,6 +2637,65 @@ __wsum skb_copy_and_csum_bits(const struct sk_buff *skb, int offset,
}
EXPORT_SYMBOL(skb_copy_and_csum_bits);
+__sum16 __skb_checksum_complete_head(struct sk_buff *skb, int len)
+{
+ __sum16 sum;
+
+ sum = csum_fold(skb_checksum(skb, 0, len, skb->csum));
+ /* See comments in __skb_checksum_complete(). */
+ if (likely(!sum)) {
+ if (unlikely(skb->ip_summed == CHECKSUM_COMPLETE) &&
+ !skb->csum_complete_sw)
+ netdev_rx_csum_fault(skb->dev, skb);
+ }
+ if (!skb_shared(skb))
+ skb->csum_valid = !sum;
+ return sum;
+}
+EXPORT_SYMBOL(__skb_checksum_complete_head);
+
+/* This function assumes skb->csum already holds pseudo header's checksum,
+ * which has been changed from the hardware checksum, for example, by
+ * __skb_checksum_validate_complete(). And, the original skb->csum must
+ * have been validated unsuccessfully for CHECKSUM_COMPLETE case.
+ *
+ * It returns non-zero if the recomputed checksum is still invalid, otherwise
+ * zero. The new checksum is stored back into skb->csum unless the skb is
+ * shared.
+ */
+__sum16 __skb_checksum_complete(struct sk_buff *skb)
+{
+ __wsum csum;
+ __sum16 sum;
+
+ csum = skb_checksum(skb, 0, skb->len, 0);
+
+ sum = csum_fold(csum_add(skb->csum, csum));
+ /* This check is inverted, because we already knew the hardware
+ * checksum is invalid before calling this function. So, if the
+ * re-computed checksum is valid instead, then we have a mismatch
+ * between the original skb->csum and skb_checksum(). This means either
+ * the original hardware checksum is incorrect or we screw up skb->csum
+ * when moving skb->data around.
+ */
+ if (likely(!sum)) {
+ if (unlikely(skb->ip_summed == CHECKSUM_COMPLETE) &&
+ !skb->csum_complete_sw)
+ netdev_rx_csum_fault(skb->dev, skb);
+ }
+
+ if (!skb_shared(skb)) {
+ /* Save full packet checksum */
+ skb->csum = csum;
+ skb->ip_summed = CHECKSUM_COMPLETE;
+ skb->csum_complete_sw = 1;
+ skb->csum_valid = !sum;
+ }
+
+ return sum;
+}
+EXPORT_SYMBOL(__skb_checksum_complete);
+
static __wsum warn_crc32c_csum_update(const void *buff, int len, __wsum sum)
{
net_warn_ratelimited(
@@ -2962,28 +3013,6 @@ void skb_append(struct sk_buff *old, struct sk_buff *newsk, struct sk_buff_head
}
EXPORT_SYMBOL(skb_append);
-/**
- * skb_insert - insert a buffer
- * @old: buffer to insert before
- * @newsk: buffer to insert
- * @list: list to use
- *
- * Place a packet before a given packet in a list. The list locks are
- * taken and this function is atomic with respect to other list locked
- * calls.
- *
- * A buffer cannot be placed on two lists at the same time.
- */
-void skb_insert(struct sk_buff *old, struct sk_buff *newsk, struct sk_buff_head *list)
-{
- unsigned long flags;
-
- spin_lock_irqsave(&list->lock, flags);
- __skb_insert(newsk, old->prev, old, list);
- spin_unlock_irqrestore(&list->lock, flags);
-}
-EXPORT_SYMBOL(skb_insert);
-
static inline void skb_split_inside_header(struct sk_buff *skb,
struct sk_buff* skb1,
const u32 len, const int pos)
@@ -4854,6 +4883,11 @@ void skb_scrub_packet(struct sk_buff *skb, bool xnet)
nf_reset(skb);
nf_reset_trace(skb);
+#ifdef CONFIG_NET_SWITCHDEV
+ skb->offload_fwd_mark = 0;
+ skb->offload_l3_fwd_mark = 0;
+#endif
+
if (!xnet)
return;
@@ -5123,7 +5157,7 @@ int skb_vlan_pop(struct sk_buff *skb)
int err;
if (likely(skb_vlan_tag_present(skb))) {
- skb->vlan_tci = 0;
+ __vlan_hwaccel_clear_tag(skb);
} else {
if (unlikely(!eth_type_vlan(skb->protocol)))
return 0;
diff --git a/net/core/sock.c b/net/core/sock.c
index 6d7e189e3cd9..f00902c532cc 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -700,6 +700,7 @@ int sock_setsockopt(struct socket *sock, int level, int optname,
break;
case SO_DONTROUTE:
sock_valbool_flag(sk, SOCK_LOCALROUTE, valbool);
+ sk_dst_reset(sk);
break;
case SO_BROADCAST:
sock_valbool_flag(sk, SOCK_BROADCAST, valbool);
@@ -1018,7 +1019,10 @@ set_rcvbuf:
case SO_ZEROCOPY:
if (sk->sk_family == PF_INET || sk->sk_family == PF_INET6) {
- if (sk->sk_protocol != IPPROTO_TCP)
+ if (!((sk->sk_type == SOCK_STREAM &&
+ sk->sk_protocol == IPPROTO_TCP) ||
+ (sk->sk_type == SOCK_DGRAM &&
+ sk->sk_protocol == IPPROTO_UDP)))
ret = -ENOTSUPP;
} else if (sk->sk_family != PF_RDS) {
ret = -ENOTSUPP;
diff --git a/net/core/sock_reuseport.c b/net/core/sock_reuseport.c
index ba5cba56f574..d8fe3e549373 100644
--- a/net/core/sock_reuseport.c
+++ b/net/core/sock_reuseport.c
@@ -187,6 +187,7 @@ int reuseport_add_sock(struct sock *sk, struct sock *sk2, bool bind_inany)
call_rcu(&old_reuse->rcu, reuseport_free_rcu);
return 0;
}
+EXPORT_SYMBOL(reuseport_add_sock);
void reuseport_detach_sock(struct sock *sk)
{
diff --git a/net/core/stream.c b/net/core/stream.c
index 7d329fb1f553..e94bb02a5629 100644
--- a/net/core/stream.c
+++ b/net/core/stream.c
@@ -32,7 +32,7 @@ void sk_stream_write_space(struct sock *sk)
struct socket *sock = sk->sk_socket;
struct socket_wq *wq;
- if (sk_stream_is_writeable(sk) && sock) {
+ if (__sk_stream_is_writeable(sk, 1) && sock) {
clear_bit(SOCK_NOSPACE, &sock->flags);
rcu_read_lock();
diff --git a/net/dccp/proto.c b/net/dccp/proto.c
index 658cd32bb7b3..be0b223aa862 100644
--- a/net/dccp/proto.c
+++ b/net/dccp/proto.c
@@ -1141,6 +1141,9 @@ static int __init dccp_init(void)
goto out_fail;
rc = -ENOBUFS;
inet_hashinfo_init(&dccp_hashinfo);
+ inet_hashinfo2_init(&dccp_hashinfo, "dccp_listen_portaddr_hash",
+ INET_LHTABLE_SIZE, 21, /* one slot per 2 MB*/
+ 0, 64 * 1024);
dccp_hashinfo.bind_bucket_cachep =
kmem_cache_create("dccp_bind_bucket",
sizeof(struct inet_bind_bucket), 0,
diff --git a/net/decnet/af_decnet.c b/net/decnet/af_decnet.c
index 7d6ff983ba2c..7aab5d088c72 100644
--- a/net/decnet/af_decnet.c
+++ b/net/decnet/af_decnet.c
@@ -192,7 +192,7 @@ static int check_port(__le16 port)
static unsigned short port_alloc(struct sock *sk)
{
struct dn_scp *scp = DN_SK(sk);
-static unsigned short port = 0x2000;
+ static unsigned short port = 0x2000;
unsigned short i_port = port;
while(check_port(cpu_to_le16(++port)) != 0) {
diff --git a/net/dsa/Kconfig b/net/dsa/Kconfig
index 48c41918fb35..91e52973ee13 100644
--- a/net/dsa/Kconfig
+++ b/net/dsa/Kconfig
@@ -44,6 +44,10 @@ config NET_DSA_TAG_GSWIP
config NET_DSA_TAG_KSZ
bool
+config NET_DSA_TAG_KSZ9477
+ bool
+ select NET_DSA_TAG_KSZ
+
config NET_DSA_TAG_LAN9303
bool
diff --git a/net/dsa/dsa.c b/net/dsa/dsa.c
index a69c1790bbfc..aee909bcddc4 100644
--- a/net/dsa/dsa.c
+++ b/net/dsa/dsa.c
@@ -55,8 +55,8 @@ const struct dsa_device_ops *dsa_device_ops[DSA_TAG_LAST] = {
#ifdef CONFIG_NET_DSA_TAG_GSWIP
[DSA_TAG_PROTO_GSWIP] = &gswip_netdev_ops,
#endif
-#ifdef CONFIG_NET_DSA_TAG_KSZ
- [DSA_TAG_PROTO_KSZ] = &ksz_netdev_ops,
+#ifdef CONFIG_NET_DSA_TAG_KSZ9477
+ [DSA_TAG_PROTO_KSZ9477] = &ksz9477_netdev_ops,
#endif
#ifdef CONFIG_NET_DSA_TAG_LAN9303
[DSA_TAG_PROTO_LAN9303] = &lan9303_netdev_ops,
@@ -91,8 +91,8 @@ const char *dsa_tag_protocol_to_str(const struct dsa_device_ops *ops)
#ifdef CONFIG_NET_DSA_TAG_GSWIP
[DSA_TAG_PROTO_GSWIP] = "gswip",
#endif
-#ifdef CONFIG_NET_DSA_TAG_KSZ
- [DSA_TAG_PROTO_KSZ] = "ksz",
+#ifdef CONFIG_NET_DSA_TAG_KSZ9477
+ [DSA_TAG_PROTO_KSZ9477] = "ksz9477",
#endif
#ifdef CONFIG_NET_DSA_TAG_LAN9303
[DSA_TAG_PROTO_LAN9303] = "lan9303",
diff --git a/net/dsa/dsa_priv.h b/net/dsa/dsa_priv.h
index 9e4fd04ab53c..026a05774bf7 100644
--- a/net/dsa/dsa_priv.h
+++ b/net/dsa/dsa_priv.h
@@ -210,7 +210,7 @@ extern const struct dsa_device_ops edsa_netdev_ops;
extern const struct dsa_device_ops gswip_netdev_ops;
/* tag_ksz.c */
-extern const struct dsa_device_ops ksz_netdev_ops;
+extern const struct dsa_device_ops ksz9477_netdev_ops;
/* tag_lan9303.c */
extern const struct dsa_device_ops lan9303_netdev_ops;
diff --git a/net/dsa/master.c b/net/dsa/master.c
index c90ee3227dea..71bb15f491c8 100644
--- a/net/dsa/master.c
+++ b/net/dsa/master.c
@@ -158,8 +158,59 @@ static void dsa_master_ethtool_teardown(struct net_device *dev)
cpu_dp->orig_ethtool_ops = NULL;
}
+static ssize_t tagging_show(struct device *d, struct device_attribute *attr,
+ char *buf)
+{
+ struct net_device *dev = to_net_dev(d);
+ struct dsa_port *cpu_dp = dev->dsa_ptr;
+
+ return sprintf(buf, "%s\n",
+ dsa_tag_protocol_to_str(cpu_dp->tag_ops));
+}
+static DEVICE_ATTR_RO(tagging);
+
+static struct attribute *dsa_slave_attrs[] = {
+ &dev_attr_tagging.attr,
+ NULL
+};
+
+static const struct attribute_group dsa_group = {
+ .name = "dsa",
+ .attrs = dsa_slave_attrs,
+};
+
+static void dsa_master_set_mtu(struct net_device *dev, struct dsa_port *cpu_dp)
+{
+ unsigned int mtu = ETH_DATA_LEN + cpu_dp->tag_ops->overhead;
+ int err;
+
+ rtnl_lock();
+ if (mtu <= dev->max_mtu) {
+ err = dev_set_mtu(dev, mtu);
+ if (err)
+ netdev_dbg(dev, "Unable to set MTU to include for DSA overheads\n");
+ }
+ rtnl_unlock();
+}
+
+static void dsa_master_reset_mtu(struct net_device *dev)
+{
+ int err;
+
+ rtnl_lock();
+ err = dev_set_mtu(dev, ETH_DATA_LEN);
+ if (err)
+ netdev_dbg(dev,
+ "Unable to reset MTU to exclude DSA overheads\n");
+ rtnl_unlock();
+}
+
int dsa_master_setup(struct net_device *dev, struct dsa_port *cpu_dp)
{
+ int ret;
+
+ dsa_master_set_mtu(dev, cpu_dp);
+
/* If we use a tagging format that doesn't have an ethertype
* field, make sure that all packets from this point on get
* sent to the tag format's receive function.
@@ -168,12 +219,22 @@ int dsa_master_setup(struct net_device *dev, struct dsa_port *cpu_dp)
dev->dsa_ptr = cpu_dp;
- return dsa_master_ethtool_setup(dev);
+ ret = dsa_master_ethtool_setup(dev);
+ if (ret)
+ return ret;
+
+ ret = sysfs_create_group(&dev->dev.kobj, &dsa_group);
+ if (ret)
+ dsa_master_ethtool_teardown(dev);
+
+ return ret;
}
void dsa_master_teardown(struct net_device *dev)
{
+ sysfs_remove_group(&dev->dev.kobj, &dsa_group);
dsa_master_ethtool_teardown(dev);
+ dsa_master_reset_mtu(dev);
dev->dsa_ptr = NULL;
diff --git a/net/dsa/port.c b/net/dsa/port.c
index ed0595459df1..2d7e01b23572 100644
--- a/net/dsa/port.c
+++ b/net/dsa/port.c
@@ -252,9 +252,6 @@ int dsa_port_vlan_add(struct dsa_port *dp,
.vlan = vlan,
};
- if (netif_is_bridge_master(vlan->obj.orig_dev))
- return -EOPNOTSUPP;
-
if (br_vlan_enabled(dp->bridge_dev))
return dsa_port_notify(dp, DSA_NOTIFIER_VLAN_ADD, &info);
diff --git a/net/dsa/slave.c b/net/dsa/slave.c
index 7d0c19e7edcf..a3fcc1d01615 100644
--- a/net/dsa/slave.c
+++ b/net/dsa/slave.c
@@ -1050,35 +1050,12 @@ static const struct net_device_ops dsa_slave_netdev_ops = {
static const struct switchdev_ops dsa_slave_switchdev_ops = {
.switchdev_port_attr_get = dsa_slave_port_attr_get,
.switchdev_port_attr_set = dsa_slave_port_attr_set,
- .switchdev_port_obj_add = dsa_slave_port_obj_add,
- .switchdev_port_obj_del = dsa_slave_port_obj_del,
};
static struct device_type dsa_type = {
.name = "dsa",
};
-static ssize_t tagging_show(struct device *d, struct device_attribute *attr,
- char *buf)
-{
- struct net_device *dev = to_net_dev(d);
- struct dsa_port *dp = dsa_slave_to_port(dev);
-
- return sprintf(buf, "%s\n",
- dsa_tag_protocol_to_str(dp->cpu_dp->tag_ops));
-}
-static DEVICE_ATTR_RO(tagging);
-
-static struct attribute *dsa_slave_attrs[] = {
- &dev_attr_tagging.attr,
- NULL
-};
-
-static const struct attribute_group dsa_group = {
- .name = "dsa",
- .attrs = dsa_slave_attrs,
-};
-
static void dsa_slave_phylink_validate(struct net_device *dev,
unsigned long *supported,
struct phylink_link_state *state)
@@ -1374,14 +1351,8 @@ int dsa_slave_create(struct dsa_port *port)
goto out_phy;
}
- ret = sysfs_create_group(&slave_dev->dev.kobj, &dsa_group);
- if (ret)
- goto out_unreg;
-
return 0;
-out_unreg:
- unregister_netdev(slave_dev);
out_phy:
rtnl_lock();
phylink_disconnect_phy(p->dp->pl);
@@ -1405,7 +1376,6 @@ void dsa_slave_destroy(struct net_device *slave_dev)
rtnl_unlock();
dsa_slave_notify(slave_dev, DSA_PORT_UNREGISTER);
- sysfs_remove_group(&slave_dev->dev.kobj, &dsa_group);
unregister_netdev(slave_dev);
phylink_destroy(dp->pl);
free_percpu(p->stats64);
@@ -1557,6 +1527,44 @@ err_fdb_work_init:
return NOTIFY_BAD;
}
+static int
+dsa_slave_switchdev_port_obj_event(unsigned long event,
+ struct net_device *netdev,
+ struct switchdev_notifier_port_obj_info *port_obj_info)
+{
+ int err = -EOPNOTSUPP;
+
+ switch (event) {
+ case SWITCHDEV_PORT_OBJ_ADD:
+ err = dsa_slave_port_obj_add(netdev, port_obj_info->obj,
+ port_obj_info->trans);
+ break;
+ case SWITCHDEV_PORT_OBJ_DEL:
+ err = dsa_slave_port_obj_del(netdev, port_obj_info->obj);
+ break;
+ }
+
+ port_obj_info->handled = true;
+ return notifier_from_errno(err);
+}
+
+static int dsa_slave_switchdev_blocking_event(struct notifier_block *unused,
+ unsigned long event, void *ptr)
+{
+ struct net_device *dev = switchdev_notifier_info_to_dev(ptr);
+
+ if (!dsa_slave_dev_check(dev))
+ return NOTIFY_DONE;
+
+ switch (event) {
+ case SWITCHDEV_PORT_OBJ_ADD: /* fall through */
+ case SWITCHDEV_PORT_OBJ_DEL:
+ return dsa_slave_switchdev_port_obj_event(event, dev, ptr);
+ }
+
+ return NOTIFY_DONE;
+}
+
static struct notifier_block dsa_slave_nb __read_mostly = {
.notifier_call = dsa_slave_netdevice_event,
};
@@ -1565,8 +1573,13 @@ static struct notifier_block dsa_slave_switchdev_notifier = {
.notifier_call = dsa_slave_switchdev_event,
};
+static struct notifier_block dsa_slave_switchdev_blocking_notifier = {
+ .notifier_call = dsa_slave_switchdev_blocking_event,
+};
+
int dsa_slave_register_notifier(void)
{
+ struct notifier_block *nb;
int err;
err = register_netdevice_notifier(&dsa_slave_nb);
@@ -1577,8 +1590,15 @@ int dsa_slave_register_notifier(void)
if (err)
goto err_switchdev_nb;
+ nb = &dsa_slave_switchdev_blocking_notifier;
+ err = register_switchdev_blocking_notifier(nb);
+ if (err)
+ goto err_switchdev_blocking_nb;
+
return 0;
+err_switchdev_blocking_nb:
+ unregister_switchdev_notifier(&dsa_slave_switchdev_notifier);
err_switchdev_nb:
unregister_netdevice_notifier(&dsa_slave_nb);
return err;
@@ -1586,8 +1606,14 @@ err_switchdev_nb:
void dsa_slave_unregister_notifier(void)
{
+ struct notifier_block *nb;
int err;
+ nb = &dsa_slave_switchdev_blocking_notifier;
+ err = unregister_switchdev_blocking_notifier(nb);
+ if (err)
+ pr_err("DSA: failed to unregister switchdev blocking notifier (%d)\n", err);
+
err = unregister_switchdev_notifier(&dsa_slave_switchdev_notifier);
if (err)
pr_err("DSA: failed to unregister switchdev notifier (%d)\n", err);
diff --git a/net/dsa/tag_brcm.c b/net/dsa/tag_brcm.c
index 2b06bb91318b..4aa1d368a5ae 100644
--- a/net/dsa/tag_brcm.c
+++ b/net/dsa/tag_brcm.c
@@ -174,6 +174,7 @@ static struct sk_buff *brcm_tag_rcv(struct sk_buff *skb, struct net_device *dev,
const struct dsa_device_ops brcm_netdev_ops = {
.xmit = brcm_tag_xmit,
.rcv = brcm_tag_rcv,
+ .overhead = BRCM_TAG_LEN,
};
#endif
@@ -196,5 +197,6 @@ static struct sk_buff *brcm_tag_rcv_prepend(struct sk_buff *skb,
const struct dsa_device_ops brcm_prepend_netdev_ops = {
.xmit = brcm_tag_xmit_prepend,
.rcv = brcm_tag_rcv_prepend,
+ .overhead = BRCM_TAG_LEN,
};
#endif
diff --git a/net/dsa/tag_dsa.c b/net/dsa/tag_dsa.c
index cd13cfc542ce..8b2f92e3f3a2 100644
--- a/net/dsa/tag_dsa.c
+++ b/net/dsa/tag_dsa.c
@@ -149,4 +149,5 @@ static struct sk_buff *dsa_rcv(struct sk_buff *skb, struct net_device *dev,
const struct dsa_device_ops dsa_netdev_ops = {
.xmit = dsa_xmit,
.rcv = dsa_rcv,
+ .overhead = DSA_HLEN,
};
diff --git a/net/dsa/tag_edsa.c b/net/dsa/tag_edsa.c
index 4083326b806e..f5b87ee5c94e 100644
--- a/net/dsa/tag_edsa.c
+++ b/net/dsa/tag_edsa.c
@@ -168,4 +168,5 @@ static struct sk_buff *edsa_rcv(struct sk_buff *skb, struct net_device *dev,
const struct dsa_device_ops edsa_netdev_ops = {
.xmit = edsa_xmit,
.rcv = edsa_rcv,
+ .overhead = EDSA_HLEN,
};
diff --git a/net/dsa/tag_gswip.c b/net/dsa/tag_gswip.c
index 49e9b73f1be3..cb6f82ffe5eb 100644
--- a/net/dsa/tag_gswip.c
+++ b/net/dsa/tag_gswip.c
@@ -106,4 +106,5 @@ static struct sk_buff *gswip_tag_rcv(struct sk_buff *skb,
const struct dsa_device_ops gswip_netdev_ops = {
.xmit = gswip_tag_xmit,
.rcv = gswip_tag_rcv,
+ .overhead = GSWIP_RX_HEADER_LEN,
};
diff --git a/net/dsa/tag_ksz.c b/net/dsa/tag_ksz.c
index 0f62effad88f..da71b9e2af52 100644
--- a/net/dsa/tag_ksz.c
+++ b/net/dsa/tag_ksz.c
@@ -14,34 +14,18 @@
#include <net/dsa.h>
#include "dsa_priv.h"
-/* For Ingress (Host -> KSZ), 2 bytes are added before FCS.
- * ---------------------------------------------------------------------------
- * DA(6bytes)|SA(6bytes)|....|Data(nbytes)|tag0(1byte)|tag1(1byte)|FCS(4bytes)
- * ---------------------------------------------------------------------------
- * tag0 : Prioritization (not used now)
- * tag1 : each bit represents port (eg, 0x01=port1, 0x02=port2, 0x10=port5)
- *
- * For Egress (KSZ -> Host), 1 byte is added before FCS.
- * ---------------------------------------------------------------------------
- * DA(6bytes)|SA(6bytes)|....|Data(nbytes)|tag0(1byte)|FCS(4bytes)
- * ---------------------------------------------------------------------------
- * tag0 : zero-based value represents port
- * (eg, 0x00=port1, 0x02=port3, 0x06=port7)
- */
-
-#define KSZ_INGRESS_TAG_LEN 2
-#define KSZ_EGRESS_TAG_LEN 1
+/* Typically only one byte is used for tail tag. */
+#define KSZ_EGRESS_TAG_LEN 1
-static struct sk_buff *ksz_xmit(struct sk_buff *skb, struct net_device *dev)
+static struct sk_buff *ksz_common_xmit(struct sk_buff *skb,
+ struct net_device *dev, int len)
{
- struct dsa_port *dp = dsa_slave_to_port(dev);
struct sk_buff *nskb;
int padlen;
- u8 *tag;
padlen = (skb->len >= ETH_ZLEN) ? 0 : ETH_ZLEN - skb->len;
- if (skb_tailroom(skb) >= padlen + KSZ_INGRESS_TAG_LEN) {
+ if (skb_tailroom(skb) >= padlen + len) {
/* Let dsa_slave_xmit() free skb */
if (__skb_put_padto(skb, skb->len + padlen, false))
return NULL;
@@ -49,7 +33,7 @@ static struct sk_buff *ksz_xmit(struct sk_buff *skb, struct net_device *dev)
nskb = skb;
} else {
nskb = alloc_skb(NET_IP_ALIGN + skb->len +
- padlen + KSZ_INGRESS_TAG_LEN, GFP_ATOMIC);
+ padlen + len, GFP_ATOMIC);
if (!nskb)
return NULL;
skb_reserve(nskb, NET_IP_ALIGN);
@@ -70,33 +54,88 @@ static struct sk_buff *ksz_xmit(struct sk_buff *skb, struct net_device *dev)
consume_skb(skb);
}
- tag = skb_put(nskb, KSZ_INGRESS_TAG_LEN);
- tag[0] = 0;
- tag[1] = 1 << dp->index; /* destination port */
-
return nskb;
}
-static struct sk_buff *ksz_rcv(struct sk_buff *skb, struct net_device *dev,
- struct packet_type *pt)
+static struct sk_buff *ksz_common_rcv(struct sk_buff *skb,
+ struct net_device *dev,
+ unsigned int port, unsigned int len)
{
- u8 *tag;
- int source_port;
+ skb->dev = dsa_master_find_slave(dev, 0, port);
+ if (!skb->dev)
+ return NULL;
- tag = skb_tail_pointer(skb) - KSZ_EGRESS_TAG_LEN;
+ pskb_trim_rcsum(skb, skb->len - len);
- source_port = tag[0] & 7;
+ return skb;
+}
- skb->dev = dsa_master_find_slave(dev, 0, source_port);
- if (!skb->dev)
+/*
+ * For Ingress (Host -> KSZ9477), 2 bytes are added before FCS.
+ * ---------------------------------------------------------------------------
+ * DA(6bytes)|SA(6bytes)|....|Data(nbytes)|tag0(1byte)|tag1(1byte)|FCS(4bytes)
+ * ---------------------------------------------------------------------------
+ * tag0 : Prioritization (not used now)
+ * tag1 : each bit represents port (eg, 0x01=port1, 0x02=port2, 0x10=port5)
+ *
+ * For Egress (KSZ9477 -> Host), 1 byte is added before FCS.
+ * ---------------------------------------------------------------------------
+ * DA(6bytes)|SA(6bytes)|....|Data(nbytes)|tag0(1byte)|FCS(4bytes)
+ * ---------------------------------------------------------------------------
+ * tag0 : zero-based value represents port
+ * (eg, 0x00=port1, 0x02=port3, 0x06=port7)
+ */
+
+#define KSZ9477_INGRESS_TAG_LEN 2
+#define KSZ9477_PTP_TAG_LEN 4
+#define KSZ9477_PTP_TAG_INDICATION 0x80
+
+#define KSZ9477_TAIL_TAG_OVERRIDE BIT(9)
+#define KSZ9477_TAIL_TAG_LOOKUP BIT(10)
+
+static struct sk_buff *ksz9477_xmit(struct sk_buff *skb,
+ struct net_device *dev)
+{
+ struct dsa_port *dp = dsa_slave_to_port(dev);
+ struct sk_buff *nskb;
+ u16 *tag;
+ u8 *addr;
+
+ nskb = ksz_common_xmit(skb, dev, KSZ9477_INGRESS_TAG_LEN);
+ if (!nskb)
return NULL;
- pskb_trim_rcsum(skb, skb->len - KSZ_EGRESS_TAG_LEN);
+ /* Tag encoding */
+ tag = skb_put(nskb, KSZ9477_INGRESS_TAG_LEN);
+ addr = skb_mac_header(nskb);
- return skb;
+ *tag = BIT(dp->index);
+
+ if (is_link_local_ether_addr(addr))
+ *tag |= KSZ9477_TAIL_TAG_OVERRIDE;
+
+ *tag = cpu_to_be16(*tag);
+
+ return nskb;
+}
+
+static struct sk_buff *ksz9477_rcv(struct sk_buff *skb, struct net_device *dev,
+ struct packet_type *pt)
+{
+ /* Tag decoding */
+ u8 *tag = skb_tail_pointer(skb) - KSZ_EGRESS_TAG_LEN;
+ unsigned int port = tag[0] & 7;
+ unsigned int len = KSZ_EGRESS_TAG_LEN;
+
+ /* Extra 4-bytes PTP timestamp */
+ if (tag[0] & KSZ9477_PTP_TAG_INDICATION)
+ len += KSZ9477_PTP_TAG_LEN;
+
+ return ksz_common_rcv(skb, dev, port, len);
}
-const struct dsa_device_ops ksz_netdev_ops = {
- .xmit = ksz_xmit,
- .rcv = ksz_rcv,
+const struct dsa_device_ops ksz9477_netdev_ops = {
+ .xmit = ksz9477_xmit,
+ .rcv = ksz9477_rcv,
+ .overhead = KSZ9477_INGRESS_TAG_LEN,
};
diff --git a/net/dsa/tag_lan9303.c b/net/dsa/tag_lan9303.c
index 548c00254c07..f48889e46ff7 100644
--- a/net/dsa/tag_lan9303.c
+++ b/net/dsa/tag_lan9303.c
@@ -140,4 +140,5 @@ static struct sk_buff *lan9303_rcv(struct sk_buff *skb, struct net_device *dev,
const struct dsa_device_ops lan9303_netdev_ops = {
.xmit = lan9303_xmit,
.rcv = lan9303_rcv,
+ .overhead = LAN9303_TAG_LEN,
};
diff --git a/net/dsa/tag_mtk.c b/net/dsa/tag_mtk.c
index 11535bc70743..f39f4dfeda34 100644
--- a/net/dsa/tag_mtk.c
+++ b/net/dsa/tag_mtk.c
@@ -109,4 +109,5 @@ const struct dsa_device_ops mtk_netdev_ops = {
.xmit = mtk_tag_xmit,
.rcv = mtk_tag_rcv,
.flow_dissect = mtk_tag_flow_dissect,
+ .overhead = MTK_HDR_LEN,
};
diff --git a/net/dsa/tag_qca.c b/net/dsa/tag_qca.c
index 613f4ee97771..ed4f6dc26365 100644
--- a/net/dsa/tag_qca.c
+++ b/net/dsa/tag_qca.c
@@ -101,4 +101,5 @@ static struct sk_buff *qca_tag_rcv(struct sk_buff *skb, struct net_device *dev,
const struct dsa_device_ops qca_netdev_ops = {
.xmit = qca_tag_xmit,
.rcv = qca_tag_rcv,
+ .overhead = QCA_HDR_LEN,
};
diff --git a/net/dsa/tag_trailer.c b/net/dsa/tag_trailer.c
index 56197f0d9608..b40756ed6e57 100644
--- a/net/dsa/tag_trailer.c
+++ b/net/dsa/tag_trailer.c
@@ -84,4 +84,5 @@ static struct sk_buff *trailer_rcv(struct sk_buff *skb, struct net_device *dev,
const struct dsa_device_ops trailer_netdev_ops = {
.xmit = trailer_xmit,
.rcv = trailer_rcv,
+ .overhead = 4,
};
diff --git a/net/ethernet/eth.c b/net/ethernet/eth.c
index fd8faa0dfa61..4c520110b04f 100644
--- a/net/ethernet/eth.c
+++ b/net/ethernet/eth.c
@@ -47,6 +47,7 @@
#include <linux/inet.h>
#include <linux/ip.h>
#include <linux/netdevice.h>
+#include <linux/nvmem-consumer.h>
#include <linux/etherdevice.h>
#include <linux/skbuff.h>
#include <linux/errno.h>
@@ -165,15 +166,17 @@ __be16 eth_type_trans(struct sk_buff *skb, struct net_device *dev)
eth = (struct ethhdr *)skb->data;
skb_pull_inline(skb, ETH_HLEN);
- if (unlikely(is_multicast_ether_addr_64bits(eth->h_dest))) {
- if (ether_addr_equal_64bits(eth->h_dest, dev->broadcast))
- skb->pkt_type = PACKET_BROADCAST;
- else
- skb->pkt_type = PACKET_MULTICAST;
+ if (unlikely(!ether_addr_equal_64bits(eth->h_dest,
+ dev->dev_addr))) {
+ if (unlikely(is_multicast_ether_addr_64bits(eth->h_dest))) {
+ if (ether_addr_equal_64bits(eth->h_dest, dev->broadcast))
+ skb->pkt_type = PACKET_BROADCAST;
+ else
+ skb->pkt_type = PACKET_MULTICAST;
+ } else {
+ skb->pkt_type = PACKET_OTHERHOST;
+ }
}
- else if (unlikely(!ether_addr_equal_64bits(eth->h_dest,
- dev->dev_addr)))
- skb->pkt_type = PACKET_OTHERHOST;
/*
* Some variants of DSA tagging don't have an ethertype field
@@ -548,3 +551,40 @@ int eth_platform_get_mac_address(struct device *dev, u8 *mac_addr)
return 0;
}
EXPORT_SYMBOL(eth_platform_get_mac_address);
+
+/**
+ * Obtain the MAC address from an nvmem cell named 'mac-address' associated
+ * with given device.
+ *
+ * @dev: Device with which the mac-address cell is associated.
+ * @addrbuf: Buffer to which the MAC address will be copied on success.
+ *
+ * Returns 0 on success or a negative error number on failure.
+ */
+int nvmem_get_mac_address(struct device *dev, void *addrbuf)
+{
+ struct nvmem_cell *cell;
+ const void *mac;
+ size_t len;
+
+ cell = nvmem_cell_get(dev, "mac-address");
+ if (IS_ERR(cell))
+ return PTR_ERR(cell);
+
+ mac = nvmem_cell_read(cell, &len);
+ nvmem_cell_put(cell);
+
+ if (IS_ERR(mac))
+ return PTR_ERR(mac);
+
+ if (len != ETH_ALEN || !is_valid_ether_addr(mac)) {
+ kfree(mac);
+ return -EINVAL;
+ }
+
+ ether_addr_copy(addrbuf, mac);
+ kfree(mac);
+
+ return 0;
+}
+EXPORT_SYMBOL(nvmem_get_mac_address);
diff --git a/net/ieee802154/nl-phy.c b/net/ieee802154/nl-phy.c
index b231e40f006a..0c25c0bcc4da 100644
--- a/net/ieee802154/nl-phy.c
+++ b/net/ieee802154/nl-phy.c
@@ -242,7 +242,7 @@ int ieee802154_add_iface(struct sk_buff *skb, struct genl_info *info)
* dev_set_mac_address require RTNL_LOCK
*/
rtnl_lock();
- rc = dev_set_mac_address(dev, &addr);
+ rc = dev_set_mac_address(dev, &addr, NULL);
rtnl_unlock();
if (rc)
goto dev_unregister;
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index 326c422c22f8..0dfb72c46671 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -1385,6 +1385,10 @@ out:
}
EXPORT_SYMBOL(inet_gso_segment);
+INDIRECT_CALLABLE_DECLARE(struct sk_buff *tcp4_gro_receive(struct list_head *,
+ struct sk_buff *));
+INDIRECT_CALLABLE_DECLARE(struct sk_buff *udp4_gro_receive(struct list_head *,
+ struct sk_buff *));
struct sk_buff *inet_gro_receive(struct list_head *head, struct sk_buff *skb)
{
const struct net_offload *ops;
@@ -1494,7 +1498,8 @@ struct sk_buff *inet_gro_receive(struct list_head *head, struct sk_buff *skb)
skb_gro_pull(skb, sizeof(*iph));
skb_set_transport_header(skb, skb_gro_offset(skb));
- pp = call_gro_receive(ops->callbacks.gro_receive, head, skb);
+ pp = indirect_call_gro_receive(tcp4_gro_receive, udp4_gro_receive,
+ ops->callbacks.gro_receive, head, skb);
out_unlock:
rcu_read_unlock();
@@ -1556,6 +1561,8 @@ int inet_recv_error(struct sock *sk, struct msghdr *msg, int len, int *addr_len)
return -EINVAL;
}
+INDIRECT_CALLABLE_DECLARE(int tcp4_gro_complete(struct sk_buff *, int));
+INDIRECT_CALLABLE_DECLARE(int udp4_gro_complete(struct sk_buff *, int));
int inet_gro_complete(struct sk_buff *skb, int nhoff)
{
__be16 newlen = htons(skb->len - nhoff);
@@ -1581,7 +1588,9 @@ int inet_gro_complete(struct sk_buff *skb, int nhoff)
* because any hdr with option will have been flushed in
* inet_gro_receive().
*/
- err = ops->callbacks.gro_complete(skb, nhoff + sizeof(*iph));
+ err = INDIRECT_CALL_2(ops->callbacks.gro_complete,
+ tcp4_gro_complete, udp4_gro_complete,
+ skb, nhoff + sizeof(*iph));
out_unlock:
rcu_read_unlock();
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index a34602ae27de..5b9b6d497f71 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -1100,7 +1100,7 @@ int devinet_ioctl(struct net *net, unsigned int cmd, struct ifreq *ifr)
inet_del_ifa(in_dev, ifap, 1);
break;
}
- ret = dev_change_flags(dev, ifr->ifr_flags);
+ ret = dev_change_flags(dev, ifr->ifr_flags, NULL);
break;
case SIOCSIFADDR: /* Set interface address (and family) */
diff --git a/net/ipv4/fou.c b/net/ipv4/fou.c
index 0d0ad19ecb87..0c9f171fb085 100644
--- a/net/ipv4/fou.c
+++ b/net/ipv4/fou.c
@@ -1061,6 +1061,13 @@ static int gue_err(struct sk_buff *skb, u32 info)
if (validate_gue_flags(guehdr, optlen))
return -EINVAL;
+ /* Handling exceptions for direct UDP encapsulation in GUE would lead to
+ * recursion. Besides, this kind of encapsulation can't even be
+ * configured currently. Discard this.
+ */
+ if (guehdr->proto_ctype == IPPROTO_UDP)
+ return -EOPNOTSUPP;
+
skb_set_transport_header(skb, -(int)sizeof(struct icmphdr));
ret = gue_err_proto_handler(guehdr->proto_ctype, skb, info);
diff --git a/net/ipv4/inet_fragment.c b/net/ipv4/inet_fragment.c
index bcb11f3a27c0..760a9e52e02b 100644
--- a/net/ipv4/inet_fragment.c
+++ b/net/ipv4/inet_fragment.c
@@ -178,21 +178,22 @@ static struct inet_frag_queue *inet_frag_alloc(struct netns_frags *nf,
}
static struct inet_frag_queue *inet_frag_create(struct netns_frags *nf,
- void *arg)
+ void *arg,
+ struct inet_frag_queue **prev)
{
struct inet_frags *f = nf->f;
struct inet_frag_queue *q;
- int err;
q = inet_frag_alloc(nf, f, arg);
- if (!q)
+ if (!q) {
+ *prev = ERR_PTR(-ENOMEM);
return NULL;
-
+ }
mod_timer(&q->timer, jiffies + nf->timeout);
- err = rhashtable_insert_fast(&nf->rhashtable, &q->node,
- f->rhash_params);
- if (err < 0) {
+ *prev = rhashtable_lookup_get_insert_key(&nf->rhashtable, &q->key,
+ &q->node, f->rhash_params);
+ if (*prev) {
q->flags |= INET_FRAG_COMPLETE;
inet_frag_kill(q);
inet_frag_destroy(q);
@@ -204,22 +205,22 @@ static struct inet_frag_queue *inet_frag_create(struct netns_frags *nf,
/* TODO : call from rcu_read_lock() and no longer use refcount_inc_not_zero() */
struct inet_frag_queue *inet_frag_find(struct netns_frags *nf, void *key)
{
- struct inet_frag_queue *fq;
+ struct inet_frag_queue *fq = NULL, *prev;
if (!nf->high_thresh || frag_mem_limit(nf) > nf->high_thresh)
return NULL;
rcu_read_lock();
- fq = rhashtable_lookup(&nf->rhashtable, key, nf->f->rhash_params);
- if (fq) {
+ prev = rhashtable_lookup(&nf->rhashtable, key, nf->f->rhash_params);
+ if (!prev)
+ fq = inet_frag_create(nf, key, &prev);
+ if (prev && !IS_ERR(prev)) {
+ fq = prev;
if (!refcount_inc_not_zero(&fq->refcnt))
fq = NULL;
- rcu_read_unlock();
- return fq;
}
rcu_read_unlock();
-
- return inet_frag_create(nf, key);
+ return fq;
}
EXPORT_SYMBOL(inet_frag_find);
diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c
index 13890d5bfc34..2445614de6a7 100644
--- a/net/ipv4/inet_hashtables.c
+++ b/net/ipv4/inet_hashtables.c
@@ -234,24 +234,16 @@ static inline int compute_score(struct sock *sk, struct net *net,
const int dif, const int sdif, bool exact_dif)
{
int score = -1;
- struct inet_sock *inet = inet_sk(sk);
- bool dev_match;
- if (net_eq(sock_net(sk), net) && inet->inet_num == hnum &&
+ if (net_eq(sock_net(sk), net) && sk->sk_num == hnum &&
!ipv6_only_sock(sk)) {
- __be32 rcv_saddr = inet->inet_rcv_saddr;
- score = sk->sk_family == PF_INET ? 2 : 1;
- if (rcv_saddr) {
- if (rcv_saddr != daddr)
- return -1;
- score += 4;
- }
- dev_match = inet_sk_bound_dev_eq(net, sk->sk_bound_dev_if,
- dif, sdif);
- if (!dev_match)
+ if (sk->sk_rcv_saddr != daddr)
+ return -1;
+
+ if (!inet_sk_bound_dev_eq(net, sk->sk_bound_dev_if, dif, sdif))
return -1;
- score += 4;
+ score = sk->sk_family == PF_INET ? 2 : 1;
if (sk->sk_incoming_cpu == raw_smp_processor_id())
score++;
}
@@ -307,26 +299,12 @@ struct sock *__inet_lookup_listener(struct net *net,
const __be32 daddr, const unsigned short hnum,
const int dif, const int sdif)
{
- unsigned int hash = inet_lhashfn(net, hnum);
- struct inet_listen_hashbucket *ilb = &hashinfo->listening_hash[hash];
- bool exact_dif = inet_exact_dif_match(net, skb);
struct inet_listen_hashbucket *ilb2;
- struct sock *sk, *result = NULL;
- int score, hiscore = 0;
+ struct sock *result = NULL;
unsigned int hash2;
- u32 phash = 0;
-
- if (ilb->count <= 10 || !hashinfo->lhash2)
- goto port_lookup;
-
- /* Too many sk in the ilb bucket (which is hashed by port alone).
- * Try lhash2 (which is hashed by port and addr) instead.
- */
hash2 = ipv4_portaddr_hash(net, daddr, hnum);
ilb2 = inet_lhash2_bucket(hashinfo, hash2);
- if (ilb2->count > ilb->count)
- goto port_lookup;
result = inet_lhash2_lookup(net, ilb2, skb, doff,
saddr, sport, daddr, hnum,
@@ -335,34 +313,12 @@ struct sock *__inet_lookup_listener(struct net *net,
goto done;
/* Lookup lhash2 with INADDR_ANY */
-
hash2 = ipv4_portaddr_hash(net, htonl(INADDR_ANY), hnum);
ilb2 = inet_lhash2_bucket(hashinfo, hash2);
- if (ilb2->count > ilb->count)
- goto port_lookup;
result = inet_lhash2_lookup(net, ilb2, skb, doff,
- saddr, sport, daddr, hnum,
+ saddr, sport, htonl(INADDR_ANY), hnum,
dif, sdif);
- goto done;
-
-port_lookup:
- sk_for_each_rcu(sk, &ilb->head) {
- score = compute_score(sk, net, hnum, daddr,
- dif, sdif, exact_dif);
- if (score > hiscore) {
- if (sk->sk_reuseport) {
- phash = inet_ehashfn(net, daddr, hnum,
- saddr, sport);
- result = reuseport_select_sock(sk, phash,
- skb, doff);
- if (result)
- goto done;
- }
- result = sk;
- hiscore = score;
- }
- }
done:
if (unlikely(IS_ERR(result)))
return NULL;
@@ -829,6 +785,7 @@ void __init inet_hashinfo2_init(struct inet_hashinfo *h, const char *name,
h->lhash2[i].count = 0;
}
}
+EXPORT_SYMBOL_GPL(inet_hashinfo2_init);
int inet_ehash_locks_alloc(struct inet_hashinfo *hashinfo)
{
diff --git a/net/ipv4/ip_forward.c b/net/ipv4/ip_forward.c
index 32662e9e5d21..06ee4696703c 100644
--- a/net/ipv4/ip_forward.c
+++ b/net/ipv4/ip_forward.c
@@ -69,6 +69,13 @@ static int ip_forward_finish(struct net *net, struct sock *sk, struct sk_buff *s
__IP_INC_STATS(net, IPSTATS_MIB_OUTFORWDATAGRAMS);
__IP_ADD_STATS(net, IPSTATS_MIB_OUTOCTETS, skb->len);
+#ifdef CONFIG_NET_SWITCHDEV
+ if (skb->offload_l3_fwd_mark) {
+ consume_skb(skb);
+ return 0;
+ }
+#endif
+
if (unlikely(opt->optlen))
ip_forward_options(skb);
diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c
index d6ee343fdb86..aa0b22697998 100644
--- a/net/ipv4/ip_fragment.c
+++ b/net/ipv4/ip_fragment.c
@@ -515,6 +515,7 @@ static int ip_frag_reasm(struct ipq *qp, struct sk_buff *skb,
struct rb_node *rbn;
int len;
int ihlen;
+ int delta;
int err;
u8 ecn;
@@ -556,10 +557,16 @@ static int ip_frag_reasm(struct ipq *qp, struct sk_buff *skb,
if (len > 65535)
goto out_oversize;
+ delta = - head->truesize;
+
/* Head of list must not be cloned. */
if (skb_unclone(head, GFP_ATOMIC))
goto out_nomem;
+ delta += head->truesize;
+ if (delta)
+ add_frag_mem_limit(qp->q.net, delta);
+
/* If the first fragment is fragmented itself, we split
* it to two chunks: the first with data and paged part
* and the second, holding only fragments. */
diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index 76a9a5f7a40e..c7a7bd58a23c 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -1341,12 +1341,6 @@ static void ipgre_tap_setup(struct net_device *dev)
ip_tunnel_setup(dev, gre_tap_net_id);
}
-bool is_gretap_dev(const struct net_device *dev)
-{
- return dev->netdev_ops == &gre_tap_netdev_ops;
-}
-EXPORT_SYMBOL_GPL(is_gretap_dev);
-
static int ipgre_newlink(struct net *src_net, struct net_device *dev,
struct nlattr *tb[], struct nlattr *data[],
struct netlink_ext_ack *extack)
diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c
index 72250b4e466d..26921f6b3b92 100644
--- a/net/ipv4/ip_input.c
+++ b/net/ipv4/ip_input.c
@@ -546,7 +546,7 @@ static void ip_list_rcv_finish(struct net *net, struct sock *sk,
list_for_each_entry_safe(skb, next, head, list) {
struct dst_entry *dst;
- list_del(&skb->list);
+ skb_list_del_init(skb);
/* if ingress device is enslaved to an L3 master device pass the
* skb to its handler for processing
*/
@@ -593,7 +593,7 @@ void ip_list_rcv(struct list_head *head, struct packet_type *pt,
struct net_device *dev = skb->dev;
struct net *net = dev_net(dev);
- list_del(&skb->list);
+ skb_list_del_init(skb);
skb = ip_rcv_core(skb, net);
if (skb == NULL)
continue;
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index c09219e7f230..ab6618036afe 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -867,6 +867,7 @@ static int __ip_append_data(struct sock *sk,
unsigned int flags)
{
struct inet_sock *inet = inet_sk(sk);
+ struct ubuf_info *uarg = NULL;
struct sk_buff *skb;
struct ip_options *opt = cork->opt;
@@ -880,8 +881,8 @@ static int __ip_append_data(struct sock *sk,
int csummode = CHECKSUM_NONE;
struct rtable *rt = (struct rtable *)cork->dst;
unsigned int wmem_alloc_delta = 0;
+ bool paged, extra_uref;
u32 tskey = 0;
- bool paged;
skb = skb_peek_tail(queue);
@@ -916,6 +917,20 @@ static int __ip_append_data(struct sock *sk,
(!exthdrlen || (rt->dst.dev->features & NETIF_F_HW_ESP_TX_CSUM)))
csummode = CHECKSUM_PARTIAL;
+ if (flags & MSG_ZEROCOPY && length && sock_flag(sk, SOCK_ZEROCOPY)) {
+ uarg = sock_zerocopy_realloc(sk, length, skb_zcopy(skb));
+ if (!uarg)
+ return -ENOBUFS;
+ extra_uref = true;
+ if (rt->dst.dev->features & NETIF_F_SG &&
+ csummode == CHECKSUM_PARTIAL) {
+ paged = true;
+ } else {
+ uarg->zerocopy = 0;
+ skb_zcopy_set(skb, uarg, &extra_uref);
+ }
+ }
+
cork->length += length;
/* So, what's going on in the loop below?
@@ -939,7 +954,7 @@ static int __ip_append_data(struct sock *sk,
unsigned int fraglen;
unsigned int fraggap;
unsigned int alloclen;
- unsigned int pagedlen = 0;
+ unsigned int pagedlen;
struct sk_buff *skb_prev;
alloc_new_skb:
skb_prev = skb;
@@ -956,6 +971,7 @@ alloc_new_skb:
if (datalen > mtu - fragheaderlen)
datalen = maxfraglen - fragheaderlen;
fraglen = datalen + fragheaderlen;
+ pagedlen = 0;
if ((flags & MSG_MORE) &&
!(rt->dst.dev->features&NETIF_F_SG))
@@ -1000,12 +1016,6 @@ alloc_new_skb:
skb->csum = 0;
skb_reserve(skb, hh_len);
- /* only the initial fragment is time stamped */
- skb_shinfo(skb)->tx_flags = cork->tx_flags;
- cork->tx_flags = 0;
- skb_shinfo(skb)->tskey = tskey;
- tskey = 0;
-
/*
* Find where to start putting bytes.
*/
@@ -1038,6 +1048,13 @@ alloc_new_skb:
exthdrlen = 0;
csummode = CHECKSUM_NONE;
+ /* only the initial fragment is time stamped */
+ skb_shinfo(skb)->tx_flags = cork->tx_flags;
+ cork->tx_flags = 0;
+ skb_shinfo(skb)->tskey = tskey;
+ tskey = 0;
+ skb_zcopy_set(skb, uarg, &extra_uref);
+
if ((flags & MSG_CONFIRM) && !skb_prev)
skb_set_dst_pending_confirm(skb, 1);
@@ -1067,7 +1084,7 @@ alloc_new_skb:
err = -EFAULT;
goto error;
}
- } else {
+ } else if (!uarg || !uarg->zerocopy) {
int i = skb_shinfo(skb)->nr_frags;
err = -ENOMEM;
@@ -1097,6 +1114,10 @@ alloc_new_skb:
skb->data_len += copy;
skb->truesize += copy;
wmem_alloc_delta += copy;
+ } else {
+ err = skb_zerocopy_iter_dgram(skb, from, copy);
+ if (err < 0)
+ goto error;
}
offset += copy;
length -= copy;
@@ -1109,6 +1130,8 @@ alloc_new_skb:
error_efault:
err = -EFAULT;
error:
+ if (uarg)
+ sock_zerocopy_put_abort(uarg, extra_uref);
cork->length -= length;
IP_INC_STATS(sock_net(sk), IPSTATS_MIB_OUTDISCARDS);
refcount_add(wmem_alloc_delta, &sk->sk_wmem_alloc);
diff --git a/net/ipv4/ip_tunnel_core.c b/net/ipv4/ip_tunnel_core.c
index dde671e97829..c857ec6b9784 100644
--- a/net/ipv4/ip_tunnel_core.c
+++ b/net/ipv4/ip_tunnel_core.c
@@ -80,7 +80,7 @@ void iptunnel_xmit(struct sock *sk, struct rtable *rt, struct sk_buff *skb,
iph->version = 4;
iph->ihl = sizeof(struct iphdr) >> 2;
- iph->frag_off = df;
+ iph->frag_off = ip_mtu_locked(&rt->dst) ? 0 : df;
iph->protocol = proto;
iph->tos = tos;
iph->daddr = dst;
@@ -120,7 +120,7 @@ int __iptunnel_pull_header(struct sk_buff *skb, int hdr_len,
}
skb_clear_hash_if_not_l4(skb);
- skb->vlan_tci = 0;
+ __vlan_hwaccel_clear_tag(skb);
skb_set_queue_mapping(skb, 0);
skb_scrub_packet(skb, xnet);
diff --git a/net/ipv4/ipconfig.c b/net/ipv4/ipconfig.c
index 88212615bf4c..208a5b4419c6 100644
--- a/net/ipv4/ipconfig.c
+++ b/net/ipv4/ipconfig.c
@@ -220,7 +220,7 @@ static int __init ic_open_devs(void)
for_each_netdev(&init_net, dev) {
if (!(dev->flags & IFF_LOOPBACK) && !netdev_uses_dsa(dev))
continue;
- if (dev_change_flags(dev, dev->flags | IFF_UP) < 0)
+ if (dev_change_flags(dev, dev->flags | IFF_UP, NULL) < 0)
pr_err("IP-Config: Failed to open %s\n", dev->name);
}
@@ -238,7 +238,7 @@ static int __init ic_open_devs(void)
if (ic_proto_enabled && !able)
continue;
oflags = dev->flags;
- if (dev_change_flags(dev, oflags | IFF_UP) < 0) {
+ if (dev_change_flags(dev, oflags | IFF_UP, NULL) < 0) {
pr_err("IP-Config: Failed to open %s\n",
dev->name);
continue;
@@ -315,7 +315,7 @@ static void __init ic_close_devs(void)
dev = d->dev;
if (d != ic_dev && !netdev_uses_dsa(dev)) {
pr_debug("IP-Config: Downing %s\n", dev->name);
- dev_change_flags(dev, d->flags);
+ dev_change_flags(dev, d->flags, NULL);
}
kfree(d);
}
@@ -1361,18 +1361,7 @@ static int ntp_servers_seq_show(struct seq_file *seq, void *v)
}
return 0;
}
-
-static int ntp_servers_seq_open(struct inode *inode, struct file *file)
-{
- return single_open(file, ntp_servers_seq_show, NULL);
-}
-
-static const struct file_operations ntp_servers_seq_fops = {
- .open = ntp_servers_seq_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = single_release,
-};
+DEFINE_SHOW_ATTRIBUTE(ntp_servers_seq);
#endif /* CONFIG_PROC_FS */
/*
diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index a6defbec4f1b..75c654924532 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -506,7 +506,7 @@ static struct net_device *ipmr_new_tunnel(struct net *net, struct vifctl *v)
dev->flags |= IFF_MULTICAST;
if (!ipmr_init_vif_indev(dev))
goto failure;
- if (dev_open(dev))
+ if (dev_open(dev, NULL))
goto failure;
dev_hold(dev);
}
@@ -589,7 +589,7 @@ static struct net_device *ipmr_reg_vif(struct net *net, struct mr_table *mrt)
if (!ipmr_init_vif_indev(dev))
goto failure;
- if (dev_open(dev))
+ if (dev_open(dev, NULL))
goto failure;
dev_hold(dev);
@@ -1802,7 +1802,7 @@ static bool ipmr_forward_offloaded(struct sk_buff *skb, struct mr_table *mrt,
struct vif_device *out_vif = &mrt->vif_table[out_vifi];
struct vif_device *in_vif = &mrt->vif_table[in_vifi];
- if (!skb->offload_mr_fwd_mark)
+ if (!skb->offload_l3_fwd_mark)
return false;
if (!out_vif->dev_parent_id.id_len || !in_vif->dev_parent_id.id_len)
return false;
@@ -1820,8 +1820,7 @@ static bool ipmr_forward_offloaded(struct sk_buff *skb, struct mr_table *mrt,
/* Processing handlers for ipmr_forward */
static void ipmr_queue_xmit(struct net *net, struct mr_table *mrt,
- int in_vifi, struct sk_buff *skb,
- struct mfc_cache *c, int vifi)
+ int in_vifi, struct sk_buff *skb, int vifi)
{
const struct iphdr *iph = ip_hdr(skb);
struct vif_device *vif = &mrt->vif_table[vifi];
@@ -2027,7 +2026,7 @@ forward:
if (skb2)
ipmr_queue_xmit(net, mrt, true_vifi,
- skb2, c, psend);
+ skb2, psend);
}
psend = ct;
}
@@ -2039,9 +2038,9 @@ last_forward:
if (skb2)
ipmr_queue_xmit(net, mrt, true_vifi, skb2,
- c, psend);
+ psend);
} else {
- ipmr_queue_xmit(net, mrt, true_vifi, skb, c, psend);
+ ipmr_queue_xmit(net, mrt, true_vifi, skb, psend);
return;
}
}
diff --git a/net/ipv4/netfilter/ipt_MASQUERADE.c b/net/ipv4/netfilter/ipt_MASQUERADE.c
index ce1512b02cb2..fd3f9e8a74da 100644
--- a/net/ipv4/netfilter/ipt_MASQUERADE.c
+++ b/net/ipv4/netfilter/ipt_MASQUERADE.c
@@ -81,9 +81,12 @@ static int __init masquerade_tg_init(void)
int ret;
ret = xt_register_target(&masquerade_tg_reg);
+ if (ret)
+ return ret;
- if (ret == 0)
- nf_nat_masquerade_ipv4_register_notifier();
+ ret = nf_nat_masquerade_ipv4_register_notifier();
+ if (ret)
+ xt_unregister_target(&masquerade_tg_reg);
return ret;
}
diff --git a/net/ipv4/netfilter/nf_nat_masquerade_ipv4.c b/net/ipv4/netfilter/nf_nat_masquerade_ipv4.c
index a9d5e013e555..41327bb99093 100644
--- a/net/ipv4/netfilter/nf_nat_masquerade_ipv4.c
+++ b/net/ipv4/netfilter/nf_nat_masquerade_ipv4.c
@@ -147,28 +147,50 @@ static struct notifier_block masq_inet_notifier = {
.notifier_call = masq_inet_event,
};
-static atomic_t masquerade_notifier_refcount = ATOMIC_INIT(0);
+static int masq_refcnt;
+static DEFINE_MUTEX(masq_mutex);
-void nf_nat_masquerade_ipv4_register_notifier(void)
+int nf_nat_masquerade_ipv4_register_notifier(void)
{
+ int ret = 0;
+
+ mutex_lock(&masq_mutex);
/* check if the notifier was already set */
- if (atomic_inc_return(&masquerade_notifier_refcount) > 1)
- return;
+ if (++masq_refcnt > 1)
+ goto out_unlock;
/* Register for device down reports */
- register_netdevice_notifier(&masq_dev_notifier);
+ ret = register_netdevice_notifier(&masq_dev_notifier);
+ if (ret)
+ goto err_dec;
/* Register IP address change reports */
- register_inetaddr_notifier(&masq_inet_notifier);
+ ret = register_inetaddr_notifier(&masq_inet_notifier);
+ if (ret)
+ goto err_unregister;
+
+ mutex_unlock(&masq_mutex);
+ return ret;
+
+err_unregister:
+ unregister_netdevice_notifier(&masq_dev_notifier);
+err_dec:
+ masq_refcnt--;
+out_unlock:
+ mutex_unlock(&masq_mutex);
+ return ret;
}
EXPORT_SYMBOL_GPL(nf_nat_masquerade_ipv4_register_notifier);
void nf_nat_masquerade_ipv4_unregister_notifier(void)
{
+ mutex_lock(&masq_mutex);
/* check if the notifier still has clients */
- if (atomic_dec_return(&masquerade_notifier_refcount) > 0)
- return;
+ if (--masq_refcnt > 0)
+ goto out_unlock;
unregister_netdevice_notifier(&masq_dev_notifier);
unregister_inetaddr_notifier(&masq_inet_notifier);
+out_unlock:
+ mutex_unlock(&masq_mutex);
}
EXPORT_SYMBOL_GPL(nf_nat_masquerade_ipv4_unregister_notifier);
diff --git a/net/ipv4/netfilter/nft_masq_ipv4.c b/net/ipv4/netfilter/nft_masq_ipv4.c
index f1193e1e928a..6847de1d1db8 100644
--- a/net/ipv4/netfilter/nft_masq_ipv4.c
+++ b/net/ipv4/netfilter/nft_masq_ipv4.c
@@ -69,7 +69,9 @@ static int __init nft_masq_ipv4_module_init(void)
if (ret < 0)
return ret;
- nf_nat_masquerade_ipv4_register_notifier();
+ ret = nf_nat_masquerade_ipv4_register_notifier();
+ if (ret)
+ nft_unregister_expr(&nft_masq_ipv4_type);
return ret;
}
diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c
index 70289682a670..c3610b37bb4c 100644
--- a/net/ipv4/proc.c
+++ b/net/ipv4/proc.c
@@ -219,6 +219,7 @@ static const struct snmp_mib snmp4_net_list[] = {
SNMP_MIB_ITEM("TCPRenoRecoveryFail", LINUX_MIB_TCPRENORECOVERYFAIL),
SNMP_MIB_ITEM("TCPSackRecoveryFail", LINUX_MIB_TCPSACKRECOVERYFAIL),
SNMP_MIB_ITEM("TCPRcvCollapsed", LINUX_MIB_TCPRCVCOLLAPSED),
+ SNMP_MIB_ITEM("TCPBacklogCoalesce", LINUX_MIB_TCPBACKLOGCOALESCE),
SNMP_MIB_ITEM("TCPDSACKOldSent", LINUX_MIB_TCPDSACKOLDSENT),
SNMP_MIB_ITEM("TCPDSACKOfoSent", LINUX_MIB_TCPDSACKOFOSENT),
SNMP_MIB_ITEM("TCPDSACKRecv", LINUX_MIB_TCPDSACKRECV),
diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c
index fb1f02015a15..076f51646d26 100644
--- a/net/ipv4/raw.c
+++ b/net/ipv4/raw.c
@@ -1132,6 +1132,7 @@ void __init raw_proc_exit(void)
{
unregister_pernet_subsys(&raw_net_ops);
}
+#endif /* CONFIG_PROC_FS */
static void raw_sysctl_init_net(struct net *net)
{
@@ -1156,4 +1157,3 @@ void __init raw_init(void)
if (register_pernet_subsys(&raw_sysctl_ops))
panic("RAW: failed to init sysctl parameters.\n");
}
-#endif /* CONFIG_PROC_FS */
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index c0a9d26c06ce..c4ddbc5f01fc 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -1677,7 +1677,7 @@ static void ip_handle_martian_source(struct net_device *dev,
print_hex_dump(KERN_WARNING, "ll header: ",
DUMP_PREFIX_OFFSET, 16, 1,
skb_mac_header(skb),
- dev->hard_header_len, true);
+ dev->hard_header_len, false);
}
}
#endif
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 9e6bc4d6daa7..27e2f6837062 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -1423,7 +1423,7 @@ do_error:
if (copied + copied_syn)
goto out;
out_err:
- sock_zerocopy_put_abort(uarg);
+ sock_zerocopy_put_abort(uarg, true);
err = sk_stream_error(sk, flags, err);
/* make sure we wake any epoll edge trigger waiter */
if (unlikely(skb_queue_len(&sk->sk_write_queue) == 0 &&
@@ -2088,7 +2088,7 @@ int tcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int nonblock,
}
continue;
- found_ok_skb:
+found_ok_skb:
/* Ok so how much can we use? */
used = skb->len - offset;
if (len < used)
@@ -2147,7 +2147,7 @@ skip_copy:
sk_eat_skb(sk, skb);
continue;
- found_fin_ok:
+found_fin_ok:
/* Process the FIN. */
++*seq;
if (!(flags & MSG_PEEK))
@@ -2241,10 +2241,6 @@ void tcp_set_state(struct sock *sk, int state)
* socket sitting in hash tables.
*/
inet_sk_state_store(sk, state);
-
-#ifdef STATE_TRACE
- SOCK_DEBUG(sk, "TCP sk=%p, State %s -> %s\n", sk, statename[oldstate], statename[state]);
-#endif
}
EXPORT_SYMBOL_GPL(tcp_set_state);
@@ -3246,6 +3242,7 @@ static size_t tcp_opt_stats_get_size(void)
nla_total_size_64bit(sizeof(u64)) + /* TCP_NLA_BYTES_RETRANS */
nla_total_size(sizeof(u32)) + /* TCP_NLA_DSACK_DUPS */
nla_total_size(sizeof(u32)) + /* TCP_NLA_REORD_SEEN */
+ nla_total_size(sizeof(u32)) + /* TCP_NLA_SRTT */
0;
}
@@ -3299,6 +3296,7 @@ struct sk_buff *tcp_get_timestamping_opt_stats(const struct sock *sk)
TCP_NLA_PAD);
nla_put_u32(stats, TCP_NLA_DSACK_DUPS, tp->dsack_dups);
nla_put_u32(stats, TCP_NLA_REORD_SEEN, tp->reord_seen);
+ nla_put_u32(stats, TCP_NLA_SRTT, tp->srtt_us >> 3);
return stats;
}
@@ -3658,8 +3656,11 @@ bool tcp_alloc_md5sig_pool(void)
if (unlikely(!tcp_md5sig_pool_populated)) {
mutex_lock(&tcp_md5sig_mutex);
- if (!tcp_md5sig_pool_populated)
+ if (!tcp_md5sig_pool_populated) {
__tcp_alloc_md5sig_pool();
+ if (tcp_md5sig_pool_populated)
+ static_key_slow_inc(&tcp_md5_needed);
+ }
mutex_unlock(&tcp_md5sig_mutex);
}
diff --git a/net/ipv4/tcp_bbr.c b/net/ipv4/tcp_bbr.c
index 9277abdd822a..0f497fc49c3f 100644
--- a/net/ipv4/tcp_bbr.c
+++ b/net/ipv4/tcp_bbr.c
@@ -128,7 +128,12 @@ static const u32 bbr_probe_rtt_mode_ms = 200;
/* Skip TSO below the following bandwidth (bits/sec): */
static const int bbr_min_tso_rate = 1200000;
-/* Pace at ~1% below estimated bw, on average, to reduce queue at bottleneck. */
+/* Pace at ~1% below estimated bw, on average, to reduce queue at bottleneck.
+ * In order to help drive the network toward lower queues and low latency while
+ * maintaining high utilization, the average pacing rate aims to be slightly
+ * lower than the estimated bandwidth. This is an important aspect of the
+ * design.
+ */
static const int bbr_pacing_margin_percent = 1;
/* We use a high_gain value of 2/ln(2) because it's the smallest pacing gain
@@ -247,13 +252,7 @@ static void bbr_init_pacing_rate_from_rtt(struct sock *sk)
sk->sk_pacing_rate = bbr_bw_to_pacing_rate(sk, bw, bbr_high_gain);
}
-/* Pace using current bw estimate and a gain factor. In order to help drive the
- * network toward lower queues while maintaining high utilization and low
- * latency, the average pacing rate aims to be slightly (~1%) lower than the
- * estimated bandwidth. This is an important aspect of the design. In this
- * implementation this slightly lower pacing rate is achieved implicitly by not
- * including link-layer headers in the packet size used for the pacing rate.
- */
+/* Pace using current bw estimate and a gain factor. */
static void bbr_set_pacing_rate(struct sock *sk, u32 bw, int gain)
{
struct tcp_sock *tp = tcp_sk(sk);
diff --git a/net/ipv4/tcp_bpf.c b/net/ipv4/tcp_bpf.c
index 3b45fe530f91..a47c1cdf90fc 100644
--- a/net/ipv4/tcp_bpf.c
+++ b/net/ipv4/tcp_bpf.c
@@ -289,12 +289,23 @@ static int tcp_bpf_send_verdict(struct sock *sk, struct sk_psock *psock,
{
bool cork = false, enospc = msg->sg.start == msg->sg.end;
struct sock *sk_redir;
- u32 tosend;
+ u32 tosend, delta = 0;
int ret;
more_data:
- if (psock->eval == __SK_NONE)
+ if (psock->eval == __SK_NONE) {
+ /* Track delta in msg size to add/subtract it on SK_DROP from
+ * returned to user copied size. This ensures user doesn't
+ * get a positive return code with msg_cut_data and SK_DROP
+ * verdict.
+ */
+ delta = msg->sg.size;
psock->eval = sk_psock_msg_verdict(sk, psock, msg);
+ if (msg->sg.size < delta)
+ delta -= msg->sg.size;
+ else
+ delta = 0;
+ }
if (msg->cork_bytes &&
msg->cork_bytes > msg->sg.size && !enospc) {
@@ -350,7 +361,7 @@ more_data:
default:
sk_msg_free_partial(sk, msg, tosend);
sk_msg_apply_bytes(psock, tosend);
- *copied -= tosend;
+ *copied -= (tosend + delta);
return -EACCES;
}
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 2868ef28ce52..76858b14ebe9 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -579,10 +579,12 @@ static inline void tcp_rcv_rtt_measure_ts(struct sock *sk,
u32 delta = tcp_time_stamp(tp) - tp->rx_opt.rcv_tsecr;
u32 delta_us;
- if (!delta)
- delta = 1;
- delta_us = delta * (USEC_PER_SEC / TCP_TS_HZ);
- tcp_rcv_rtt_update(tp, delta_us, 0);
+ if (likely(delta < INT_MAX / (USEC_PER_SEC / TCP_TS_HZ))) {
+ if (!delta)
+ delta = 1;
+ delta_us = delta * (USEC_PER_SEC / TCP_TS_HZ);
+ tcp_rcv_rtt_update(tp, delta_us, 0);
+ }
}
}
@@ -1863,16 +1865,20 @@ static void tcp_check_reno_reordering(struct sock *sk, const int addend)
/* Emulate SACKs for SACKless connection: account for a new dupack. */
-static void tcp_add_reno_sack(struct sock *sk)
+static void tcp_add_reno_sack(struct sock *sk, int num_dupack)
{
- struct tcp_sock *tp = tcp_sk(sk);
- u32 prior_sacked = tp->sacked_out;
+ if (num_dupack) {
+ struct tcp_sock *tp = tcp_sk(sk);
+ u32 prior_sacked = tp->sacked_out;
+ s32 delivered;
- tp->sacked_out++;
- tcp_check_reno_reordering(sk, 0);
- if (tp->sacked_out > prior_sacked)
- tp->delivered++; /* Some out-of-order packet is delivered */
- tcp_verify_left_out(tp);
+ tp->sacked_out += num_dupack;
+ tcp_check_reno_reordering(sk, 0);
+ delivered = tp->sacked_out - prior_sacked;
+ if (delivered > 0)
+ tp->delivered += delivered;
+ tcp_verify_left_out(tp);
+ }
}
/* Account for ACK, ACKing some data in Reno Recovery phase. */
@@ -2457,8 +2463,8 @@ void tcp_cwnd_reduction(struct sock *sk, int newly_acked_sacked, int flag)
u64 dividend = (u64)tp->snd_ssthresh * tp->prr_delivered +
tp->prior_cwnd - 1;
sndcnt = div_u64(dividend, tp->prior_cwnd) - tp->prr_out;
- } else if ((flag & FLAG_RETRANS_DATA_ACKED) &&
- !(flag & FLAG_LOST_RETRANS)) {
+ } else if ((flag & (FLAG_RETRANS_DATA_ACKED | FLAG_LOST_RETRANS)) ==
+ FLAG_RETRANS_DATA_ACKED) {
sndcnt = min_t(int, delta,
max_t(int, tp->prr_delivered - tp->prr_out,
newly_acked_sacked) + 1);
@@ -2634,7 +2640,7 @@ void tcp_enter_recovery(struct sock *sk, bool ece_ack)
/* Process an ACK in CA_Loss state. Move to CA_Open if lost data are
* recovered or spurious. Otherwise retransmits more on partial ACKs.
*/
-static void tcp_process_loss(struct sock *sk, int flag, bool is_dupack,
+static void tcp_process_loss(struct sock *sk, int flag, int num_dupack,
int *rexmit)
{
struct tcp_sock *tp = tcp_sk(sk);
@@ -2653,7 +2659,7 @@ static void tcp_process_loss(struct sock *sk, int flag, bool is_dupack,
return;
if (after(tp->snd_nxt, tp->high_seq)) {
- if (flag & FLAG_DATA_SACKED || is_dupack)
+ if (flag & FLAG_DATA_SACKED || num_dupack)
tp->frto = 0; /* Step 3.a. loss was real */
} else if (flag & FLAG_SND_UNA_ADVANCED && !recovered) {
tp->high_seq = tp->snd_nxt;
@@ -2679,8 +2685,8 @@ static void tcp_process_loss(struct sock *sk, int flag, bool is_dupack,
/* A Reno DUPACK means new data in F-RTO step 2.b above are
* delivered. Lower inflight to clock out (re)tranmissions.
*/
- if (after(tp->snd_nxt, tp->high_seq) && is_dupack)
- tcp_add_reno_sack(sk);
+ if (after(tp->snd_nxt, tp->high_seq) && num_dupack)
+ tcp_add_reno_sack(sk, num_dupack);
else if (flag & FLAG_SND_UNA_ADVANCED)
tcp_reset_reno_sack(tp);
}
@@ -2757,13 +2763,13 @@ static bool tcp_force_fast_retransmit(struct sock *sk)
* tcp_xmit_retransmit_queue().
*/
static void tcp_fastretrans_alert(struct sock *sk, const u32 prior_snd_una,
- bool is_dupack, int *ack_flag, int *rexmit)
+ int num_dupack, int *ack_flag, int *rexmit)
{
struct inet_connection_sock *icsk = inet_csk(sk);
struct tcp_sock *tp = tcp_sk(sk);
int fast_rexmit = 0, flag = *ack_flag;
- bool do_lost = is_dupack || ((flag & FLAG_DATA_SACKED) &&
- tcp_force_fast_retransmit(sk));
+ bool do_lost = num_dupack || ((flag & FLAG_DATA_SACKED) &&
+ tcp_force_fast_retransmit(sk));
if (!tp->packets_out && tp->sacked_out)
tp->sacked_out = 0;
@@ -2810,8 +2816,8 @@ static void tcp_fastretrans_alert(struct sock *sk, const u32 prior_snd_una,
switch (icsk->icsk_ca_state) {
case TCP_CA_Recovery:
if (!(flag & FLAG_SND_UNA_ADVANCED)) {
- if (tcp_is_reno(tp) && is_dupack)
- tcp_add_reno_sack(sk);
+ if (tcp_is_reno(tp))
+ tcp_add_reno_sack(sk, num_dupack);
} else {
if (tcp_try_undo_partial(sk, prior_snd_una))
return;
@@ -2826,7 +2832,7 @@ static void tcp_fastretrans_alert(struct sock *sk, const u32 prior_snd_una,
tcp_identify_packet_loss(sk, ack_flag);
break;
case TCP_CA_Loss:
- tcp_process_loss(sk, flag, is_dupack, rexmit);
+ tcp_process_loss(sk, flag, num_dupack, rexmit);
tcp_identify_packet_loss(sk, ack_flag);
if (!(icsk->icsk_ca_state == TCP_CA_Open ||
(*ack_flag & FLAG_LOST_RETRANS)))
@@ -2837,8 +2843,7 @@ static void tcp_fastretrans_alert(struct sock *sk, const u32 prior_snd_una,
if (tcp_is_reno(tp)) {
if (flag & FLAG_SND_UNA_ADVANCED)
tcp_reset_reno_sack(tp);
- if (is_dupack)
- tcp_add_reno_sack(sk);
+ tcp_add_reno_sack(sk, num_dupack);
}
if (icsk->icsk_ca_state <= TCP_CA_Disorder)
@@ -2910,9 +2915,11 @@ static bool tcp_ack_update_rtt(struct sock *sk, const int flag,
if (seq_rtt_us < 0 && tp->rx_opt.saw_tstamp && tp->rx_opt.rcv_tsecr &&
flag & FLAG_ACKED) {
u32 delta = tcp_time_stamp(tp) - tp->rx_opt.rcv_tsecr;
- u32 delta_us = delta * (USEC_PER_SEC / TCP_TS_HZ);
- seq_rtt_us = ca_rtt_us = delta_us;
+ if (likely(delta < INT_MAX / (USEC_PER_SEC / TCP_TS_HZ))) {
+ seq_rtt_us = delta * (USEC_PER_SEC / TCP_TS_HZ);
+ ca_rtt_us = seq_rtt_us;
+ }
}
rs->rtt_us = ca_rtt_us; /* RTT of last (S)ACKed packet (or -1) */
if (seq_rtt_us < 0)
@@ -3558,7 +3565,7 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
bool is_sack_reneg = tp->is_sack_reneg;
u32 ack_seq = TCP_SKB_CB(skb)->seq;
u32 ack = TCP_SKB_CB(skb)->ack_seq;
- bool is_dupack = false;
+ int num_dupack = 0;
int prior_packets = tp->packets_out;
u32 delivered = tp->delivered;
u32 lost = tp->lost;
@@ -3610,7 +3617,8 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
if (flag & FLAG_UPDATE_TS_RECENT)
tcp_replace_ts_recent(tp, TCP_SKB_CB(skb)->seq);
- if (!(flag & FLAG_SLOWPATH) && after(ack, prior_snd_una)) {
+ if ((flag & (FLAG_SLOWPATH | FLAG_SND_UNA_ADVANCED)) ==
+ FLAG_SND_UNA_ADVANCED) {
/* Window is constant, pure forward advance.
* No more checks are required.
* Note, we use the fact that SND.UNA>=SND.WL2.
@@ -3668,8 +3676,13 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
tcp_set_xmit_timer(sk);
if (tcp_ack_is_dubious(sk, flag)) {
- is_dupack = !(flag & (FLAG_SND_UNA_ADVANCED | FLAG_NOT_DUP));
- tcp_fastretrans_alert(sk, prior_snd_una, is_dupack, &flag,
+ if (!(flag & (FLAG_SND_UNA_ADVANCED | FLAG_NOT_DUP))) {
+ num_dupack = 1;
+ /* Consider if pure acks were aggregated in tcp_add_backlog() */
+ if (!(flag & FLAG_DATA))
+ num_dupack = max_t(u16, 1, skb_shinfo(skb)->gso_segs);
+ }
+ tcp_fastretrans_alert(sk, prior_snd_una, num_dupack, &flag,
&rexmit);
}
@@ -3687,7 +3700,7 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
no_queue:
/* If data was DSACKed, see if we can undo a cwnd reduction. */
if (flag & FLAG_DSACKING_ACK) {
- tcp_fastretrans_alert(sk, prior_snd_una, is_dupack, &flag,
+ tcp_fastretrans_alert(sk, prior_snd_una, num_dupack, &flag,
&rexmit);
tcp_newly_delivered(sk, delivered, flag);
}
@@ -3712,7 +3725,7 @@ old_ack:
if (TCP_SKB_CB(skb)->sacked) {
flag |= tcp_sacktag_write_queue(sk, skb, prior_snd_una,
&sack_state);
- tcp_fastretrans_alert(sk, prior_snd_una, is_dupack, &flag,
+ tcp_fastretrans_alert(sk, prior_snd_una, num_dupack, &flag,
&rexmit);
tcp_newly_delivered(sk, delivered, flag);
tcp_xmit_recovery(sk, rexmit);
@@ -4268,7 +4281,7 @@ static void tcp_sack_new_ofo_skb(struct sock *sk, u32 seq, u32 end_seq)
* If the sack array is full, forget about the last one.
*/
if (this_sack >= TCP_NUM_SACKS) {
- if (tp->compressed_ack)
+ if (tp->compressed_ack > TCP_FASTRETRANS_THRESH)
tcp_send_ack(sk);
this_sack--;
tp->rx_opt.num_sacks--;
@@ -4363,6 +4376,7 @@ static bool tcp_try_coalesce(struct sock *sk,
if (TCP_SKB_CB(from)->has_rxtstamp) {
TCP_SKB_CB(to)->has_rxtstamp = true;
to->tstamp = from->tstamp;
+ skb_hwtstamps(to)->hwtstamp = skb_hwtstamps(from)->hwtstamp;
}
return true;
@@ -4601,13 +4615,12 @@ end:
}
}
-static int __must_check tcp_queue_rcv(struct sock *sk, struct sk_buff *skb, int hdrlen,
- bool *fragstolen)
+static int __must_check tcp_queue_rcv(struct sock *sk, struct sk_buff *skb,
+ bool *fragstolen)
{
int eaten;
struct sk_buff *tail = skb_peek_tail(&sk->sk_receive_queue);
- __skb_pull(skb, hdrlen);
eaten = (tail &&
tcp_try_coalesce(sk, tail,
skb, fragstolen)) ? 1 : 0;
@@ -4658,7 +4671,7 @@ int tcp_send_rcvq(struct sock *sk, struct msghdr *msg, size_t size)
TCP_SKB_CB(skb)->end_seq = TCP_SKB_CB(skb)->seq + size;
TCP_SKB_CB(skb)->ack_seq = tcp_sk(sk)->snd_una - 1;
- if (tcp_queue_rcv(sk, skb, 0, &fragstolen)) {
+ if (tcp_queue_rcv(sk, skb, &fragstolen)) {
WARN_ON_ONCE(fragstolen); /* should not happen */
__kfree_skb(skb);
}
@@ -4718,7 +4731,7 @@ queue_and_out:
goto drop;
}
- eaten = tcp_queue_rcv(sk, skb, 0, &fragstolen);
+ eaten = tcp_queue_rcv(sk, skb, &fragstolen);
if (skb->len)
tcp_event_data_recv(sk, skb);
if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN)
@@ -5188,7 +5201,17 @@ send_now:
if (!tcp_is_sack(tp) ||
tp->compressed_ack >= sock_net(sk)->ipv4.sysctl_tcp_comp_sack_nr)
goto send_now;
- tp->compressed_ack++;
+
+ if (tp->compressed_ack_rcv_nxt != tp->rcv_nxt) {
+ tp->compressed_ack_rcv_nxt = tp->rcv_nxt;
+ if (tp->compressed_ack > TCP_FASTRETRANS_THRESH)
+ NET_ADD_STATS(sock_net(sk), LINUX_MIB_TCPACKCOMPRESSED,
+ tp->compressed_ack - TCP_FASTRETRANS_THRESH);
+ tp->compressed_ack = 0;
+ }
+
+ if (++tp->compressed_ack <= TCP_FASTRETRANS_THRESH)
+ goto send_now;
if (hrtimer_is_queued(&tp->compressed_ack_timer))
return;
@@ -5584,8 +5607,8 @@ void tcp_rcv_established(struct sock *sk, struct sk_buff *skb)
NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPHPHITS);
/* Bulk data transfer: receiver */
- eaten = tcp_queue_rcv(sk, skb, tcp_header_len,
- &fragstolen);
+ __skb_pull(skb, tcp_header_len);
+ eaten = tcp_queue_rcv(sk, skb, &fragstolen);
tcp_event_data_recv(sk, skb);
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index a336787d75e5..efc6fef692ff 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -542,7 +542,6 @@ int tcp_v4_err(struct sk_buff *icmp_skb, u32 info)
icsk->icsk_rto = inet_csk_rto_backoff(icsk, TCP_RTO_MAX);
skb = tcp_rtx_queue_head(sk);
- BUG_ON(!skb);
tcp_mstamp_refresh(tp);
delta_us = (u32)(tp->tcp_mstamp - tcp_skb_timestamp_us(skb));
@@ -971,10 +970,13 @@ static void tcp_v4_reqsk_destructor(struct request_sock *req)
* We need to maintain these in the sk structure.
*/
+struct static_key tcp_md5_needed __read_mostly;
+EXPORT_SYMBOL(tcp_md5_needed);
+
/* Find the Key structure for an address. */
-struct tcp_md5sig_key *tcp_md5_do_lookup(const struct sock *sk,
- const union tcp_md5_addr *addr,
- int family)
+struct tcp_md5sig_key *__tcp_md5_do_lookup(const struct sock *sk,
+ const union tcp_md5_addr *addr,
+ int family)
{
const struct tcp_sock *tp = tcp_sk(sk);
struct tcp_md5sig_key *key;
@@ -1012,7 +1014,7 @@ struct tcp_md5sig_key *tcp_md5_do_lookup(const struct sock *sk,
}
return best_match;
}
-EXPORT_SYMBOL(tcp_md5_do_lookup);
+EXPORT_SYMBOL(__tcp_md5_do_lookup);
static struct tcp_md5sig_key *tcp_md5_do_lookup_exact(const struct sock *sk,
const union tcp_md5_addr *addr,
@@ -1620,12 +1622,14 @@ int tcp_v4_early_demux(struct sk_buff *skb)
bool tcp_add_backlog(struct sock *sk, struct sk_buff *skb)
{
u32 limit = sk->sk_rcvbuf + sk->sk_sndbuf;
-
- /* Only socket owner can try to collapse/prune rx queues
- * to reduce memory overhead, so add a little headroom here.
- * Few sockets backlog are possibly concurrently non empty.
- */
- limit += 64*1024;
+ struct skb_shared_info *shinfo;
+ const struct tcphdr *th;
+ struct tcphdr *thtail;
+ struct sk_buff *tail;
+ unsigned int hdrlen;
+ bool fragstolen;
+ u32 gso_segs;
+ int delta;
/* In case all data was pulled from skb frags (in __pskb_pull_tail()),
* we can fix skb->truesize to its real value to avoid future drops.
@@ -1635,6 +1639,86 @@ bool tcp_add_backlog(struct sock *sk, struct sk_buff *skb)
*/
skb_condense(skb);
+ skb_dst_drop(skb);
+
+ if (unlikely(tcp_checksum_complete(skb))) {
+ bh_unlock_sock(sk);
+ __TCP_INC_STATS(sock_net(sk), TCP_MIB_CSUMERRORS);
+ __TCP_INC_STATS(sock_net(sk), TCP_MIB_INERRS);
+ return true;
+ }
+
+ /* Attempt coalescing to last skb in backlog, even if we are
+ * above the limits.
+ * This is okay because skb capacity is limited to MAX_SKB_FRAGS.
+ */
+ th = (const struct tcphdr *)skb->data;
+ hdrlen = th->doff * 4;
+ shinfo = skb_shinfo(skb);
+
+ if (!shinfo->gso_size)
+ shinfo->gso_size = skb->len - hdrlen;
+
+ if (!shinfo->gso_segs)
+ shinfo->gso_segs = 1;
+
+ tail = sk->sk_backlog.tail;
+ if (!tail)
+ goto no_coalesce;
+ thtail = (struct tcphdr *)tail->data;
+
+ if (TCP_SKB_CB(tail)->end_seq != TCP_SKB_CB(skb)->seq ||
+ TCP_SKB_CB(tail)->ip_dsfield != TCP_SKB_CB(skb)->ip_dsfield ||
+ ((TCP_SKB_CB(tail)->tcp_flags |
+ TCP_SKB_CB(skb)->tcp_flags) & TCPHDR_URG) ||
+ ((TCP_SKB_CB(tail)->tcp_flags ^
+ TCP_SKB_CB(skb)->tcp_flags) & (TCPHDR_ECE | TCPHDR_CWR)) ||
+#ifdef CONFIG_TLS_DEVICE
+ tail->decrypted != skb->decrypted ||
+#endif
+ thtail->doff != th->doff ||
+ memcmp(thtail + 1, th + 1, hdrlen - sizeof(*th)))
+ goto no_coalesce;
+
+ __skb_pull(skb, hdrlen);
+ if (skb_try_coalesce(tail, skb, &fragstolen, &delta)) {
+ thtail->window = th->window;
+
+ TCP_SKB_CB(tail)->end_seq = TCP_SKB_CB(skb)->end_seq;
+
+ if (after(TCP_SKB_CB(skb)->ack_seq, TCP_SKB_CB(tail)->ack_seq))
+ TCP_SKB_CB(tail)->ack_seq = TCP_SKB_CB(skb)->ack_seq;
+
+ TCP_SKB_CB(tail)->tcp_flags |= TCP_SKB_CB(skb)->tcp_flags;
+
+ if (TCP_SKB_CB(skb)->has_rxtstamp) {
+ TCP_SKB_CB(tail)->has_rxtstamp = true;
+ tail->tstamp = skb->tstamp;
+ skb_hwtstamps(tail)->hwtstamp = skb_hwtstamps(skb)->hwtstamp;
+ }
+
+ /* Not as strict as GRO. We only need to carry mss max value */
+ skb_shinfo(tail)->gso_size = max(shinfo->gso_size,
+ skb_shinfo(tail)->gso_size);
+
+ gso_segs = skb_shinfo(tail)->gso_segs + shinfo->gso_segs;
+ skb_shinfo(tail)->gso_segs = min_t(u32, gso_segs, 0xFFFF);
+
+ sk->sk_backlog.len += delta;
+ __NET_INC_STATS(sock_net(sk),
+ LINUX_MIB_TCPBACKLOGCOALESCE);
+ kfree_skb_partial(skb, fragstolen);
+ return false;
+ }
+ __skb_push(skb, hdrlen);
+
+no_coalesce:
+ /* Only socket owner can try to collapse/prune rx queues
+ * to reduce memory overhead, so add a little headroom here.
+ * Few sockets backlog are possibly concurrently non empty.
+ */
+ limit += 64*1024;
+
if (unlikely(sk_add_backlog(sk, skb, limit))) {
bh_unlock_sock(sk);
__NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPBACKLOGDROP);
@@ -2575,8 +2659,8 @@ static int __net_init tcp_sk_init(struct net *net)
* which are too large can cause TCP streams to be bursty.
*/
net->ipv4.sysctl_tcp_tso_win_divisor = 3;
- /* Default TSQ limit of four TSO segments */
- net->ipv4.sysctl_tcp_limit_output_bytes = 262144;
+ /* Default TSQ limit of 16 TSO segments */
+ net->ipv4.sysctl_tcp_limit_output_bytes = 16 * 65536;
/* rfc5961 challenge ack rate limiting */
net->ipv4.sysctl_tcp_challenge_ack_limit = 1000;
net->ipv4.sysctl_tcp_min_tso_segs = 2;
diff --git a/net/ipv4/tcp_offload.c b/net/ipv4/tcp_offload.c
index 870b0a335061..0fbf7d4df9da 100644
--- a/net/ipv4/tcp_offload.c
+++ b/net/ipv4/tcp_offload.c
@@ -10,6 +10,7 @@
* TCPv4 GSO/GRO support
*/
+#include <linux/indirect_call_wrapper.h>
#include <linux/skbuff.h>
#include <net/tcp.h>
#include <net/protocol.h>
@@ -305,7 +306,8 @@ int tcp_gro_complete(struct sk_buff *skb)
}
EXPORT_SYMBOL(tcp_gro_complete);
-static struct sk_buff *tcp4_gro_receive(struct list_head *head, struct sk_buff *skb)
+INDIRECT_CALLABLE_SCOPE
+struct sk_buff *tcp4_gro_receive(struct list_head *head, struct sk_buff *skb)
{
/* Don't bother verifying checksum if we're going to flush anyway. */
if (!NAPI_GRO_CB(skb)->flush &&
@@ -318,7 +320,7 @@ static struct sk_buff *tcp4_gro_receive(struct list_head *head, struct sk_buff *
return tcp_gro_receive(head, skb);
}
-static int tcp4_gro_complete(struct sk_buff *skb, int thoff)
+INDIRECT_CALLABLE_SCOPE int tcp4_gro_complete(struct sk_buff *skb, int thoff)
{
const struct iphdr *iph = ip_hdr(skb);
struct tcphdr *th = tcp_hdr(skb);
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 9c34b97d365d..730bc44dbad9 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -180,10 +180,10 @@ static inline void tcp_event_ack_sent(struct sock *sk, unsigned int pkts,
{
struct tcp_sock *tp = tcp_sk(sk);
- if (unlikely(tp->compressed_ack)) {
+ if (unlikely(tp->compressed_ack > TCP_FASTRETRANS_THRESH)) {
NET_ADD_STATS(sock_net(sk), LINUX_MIB_TCPACKCOMPRESSED,
- tp->compressed_ack);
- tp->compressed_ack = 0;
+ tp->compressed_ack - TCP_FASTRETRANS_THRESH);
+ tp->compressed_ack = TCP_FASTRETRANS_THRESH;
if (hrtimer_try_to_cancel(&tp->compressed_ack_timer) == 1)
__sock_put(sk);
}
@@ -233,16 +233,14 @@ void tcp_select_initial_window(const struct sock *sk, int __space, __u32 mss,
if (init_rcv_wnd)
*rcv_wnd = min(*rcv_wnd, init_rcv_wnd * mss);
- (*rcv_wscale) = 0;
+ *rcv_wscale = 0;
if (wscale_ok) {
/* Set window scaling on max possible window */
space = max_t(u32, space, sock_net(sk)->ipv4.sysctl_tcp_rmem[2]);
space = max_t(u32, space, sysctl_rmem_max);
space = min_t(u32, space, *window_clamp);
- while (space > U16_MAX && (*rcv_wscale) < TCP_MAX_WSCALE) {
- space >>= 1;
- (*rcv_wscale)++;
- }
+ *rcv_wscale = clamp_t(int, ilog2(space) - 15,
+ 0, TCP_MAX_WSCALE);
}
/* Set the clamp no higher than max representable value */
(*window_clamp) = min_t(__u32, U16_MAX << (*rcv_wscale), *window_clamp);
@@ -596,7 +594,8 @@ static unsigned int tcp_syn_options(struct sock *sk, struct sk_buff *skb,
*md5 = NULL;
#ifdef CONFIG_TCP_MD5SIG
- if (unlikely(rcu_access_pointer(tp->md5sig_info))) {
+ if (static_key_false(&tcp_md5_needed) &&
+ rcu_access_pointer(tp->md5sig_info)) {
*md5 = tp->af_specific->md5_lookup(sk, sk);
if (*md5) {
opts->options |= OPTION_MD5;
@@ -732,7 +731,8 @@ static unsigned int tcp_established_options(struct sock *sk, struct sk_buff *skb
*md5 = NULL;
#ifdef CONFIG_TCP_MD5SIG
- if (unlikely(rcu_access_pointer(tp->md5sig_info))) {
+ if (static_key_false(&tcp_md5_needed) &&
+ rcu_access_pointer(tp->md5sig_info)) {
*md5 = tp->af_specific->md5_lookup(sk, sk);
if (*md5) {
opts->options |= OPTION_MD5;
@@ -1904,24 +1904,27 @@ static int tso_fragment(struct sock *sk, enum tcp_queue tcp_queue,
* This algorithm is from John Heffner.
*/
static bool tcp_tso_should_defer(struct sock *sk, struct sk_buff *skb,
- bool *is_cwnd_limited, u32 max_segs)
+ bool *is_cwnd_limited,
+ bool *is_rwnd_limited,
+ u32 max_segs)
{
const struct inet_connection_sock *icsk = inet_csk(sk);
- u32 age, send_win, cong_win, limit, in_flight;
+ u32 send_win, cong_win, limit, in_flight;
struct tcp_sock *tp = tcp_sk(sk);
struct sk_buff *head;
int win_divisor;
-
- if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN)
- goto send_now;
+ s64 delta;
if (icsk->icsk_ca_state >= TCP_CA_Recovery)
goto send_now;
/* Avoid bursty behavior by allowing defer
- * only if the last write was recent.
+ * only if the last write was recent (1 ms).
+ * Note that tp->tcp_wstamp_ns can be in the future if we have
+ * packets waiting in a qdisc or device for EDT delivery.
*/
- if ((s32)(tcp_jiffies32 - tp->lsndtime) > 0)
+ delta = tp->tcp_clock_cache - tp->tcp_wstamp_ns - NSEC_PER_MSEC;
+ if (delta > 0)
goto send_now;
in_flight = tcp_packets_in_flight(tp);
@@ -1968,15 +1971,33 @@ static bool tcp_tso_should_defer(struct sock *sk, struct sk_buff *skb,
head = tcp_rtx_queue_head(sk);
if (!head)
goto send_now;
- age = tcp_stamp_us_delta(tp->tcp_mstamp, tcp_skb_timestamp_us(head));
+ delta = tp->tcp_clock_cache - head->tstamp;
/* If next ACK is likely to come too late (half srtt), do not defer */
- if (age < (tp->srtt_us >> 4))
+ if ((s64)(delta - (u64)NSEC_PER_USEC * (tp->srtt_us >> 4)) < 0)
goto send_now;
- /* Ok, it looks like it is advisable to defer. */
+ /* Ok, it looks like it is advisable to defer.
+ * Three cases are tracked :
+ * 1) We are cwnd-limited
+ * 2) We are rwnd-limited
+ * 3) We are application limited.
+ */
+ if (cong_win < send_win) {
+ if (cong_win <= skb->len) {
+ *is_cwnd_limited = true;
+ return true;
+ }
+ } else {
+ if (send_win <= skb->len) {
+ *is_rwnd_limited = true;
+ return true;
+ }
+ }
- if (cong_win < send_win && cong_win <= skb->len)
- *is_cwnd_limited = true;
+ /* If this packet won't get more data, do not wait. */
+ if ((TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN) ||
+ TCP_SKB_CB(skb)->eor)
+ goto send_now;
return true;
@@ -2212,8 +2233,9 @@ static bool tcp_small_queue_check(struct sock *sk, const struct sk_buff *skb,
limit = max_t(unsigned long,
2 * skb->truesize,
sk->sk_pacing_rate >> sk->sk_pacing_shift);
- limit = min_t(unsigned long, limit,
- sock_net(sk)->ipv4.sysctl_tcp_limit_output_bytes);
+ if (sk->sk_pacing_status == SK_PACING_NONE)
+ limit = min_t(unsigned long, limit,
+ sock_net(sk)->ipv4.sysctl_tcp_limit_output_bytes);
limit <<= factor;
if (refcount_read(&sk->sk_wmem_alloc) > limit) {
@@ -2356,7 +2378,7 @@ static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle,
} else {
if (!push_one &&
tcp_tso_should_defer(sk, skb, &is_cwnd_limited,
- max_segs))
+ &is_rwnd_limited, max_segs))
break;
}
@@ -2494,15 +2516,18 @@ void tcp_send_loss_probe(struct sock *sk)
goto rearm_timer;
}
skb = skb_rb_last(&sk->tcp_rtx_queue);
+ if (unlikely(!skb)) {
+ WARN_ONCE(tp->packets_out,
+ "invalid inflight: %u state %u cwnd %u mss %d\n",
+ tp->packets_out, sk->sk_state, tp->snd_cwnd, mss);
+ inet_csk(sk)->icsk_pending = 0;
+ return;
+ }
/* At most one outstanding TLP retransmission. */
if (tp->tlp_high_seq)
goto rearm_timer;
- /* Retransmit last segment. */
- if (WARN_ON(!skb))
- goto rearm_timer;
-
if (skb_still_in_host_queue(sk, skb))
goto rearm_timer;
@@ -2920,7 +2945,7 @@ int __tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb, int segs)
TCP_SKB_CB(skb)->sacked |= TCPCB_EVER_RETRANS;
trace_tcp_retransmit_skb(sk, skb);
} else if (err != -EBUSY) {
- NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPRETRANSFAIL);
+ NET_ADD_STATS(sock_net(sk), LINUX_MIB_TCPRETRANSFAIL, segs);
}
return err;
}
diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c
index 676020663ce8..f87dbc78b6bc 100644
--- a/net/ipv4/tcp_timer.c
+++ b/net/ipv4/tcp_timer.c
@@ -40,15 +40,17 @@ static u32 tcp_clamp_rto_to_user_timeout(const struct sock *sk)
{
struct inet_connection_sock *icsk = inet_csk(sk);
u32 elapsed, start_ts;
+ s32 remaining;
start_ts = tcp_retransmit_stamp(sk);
if (!icsk->icsk_user_timeout || !start_ts)
return icsk->icsk_rto;
elapsed = tcp_time_stamp(tcp_sk(sk)) - start_ts;
- if (elapsed >= icsk->icsk_user_timeout)
+ remaining = icsk->icsk_user_timeout - elapsed;
+ if (remaining <= 0)
return 1; /* user timeout has passed; fire ASAP */
- else
- return min_t(u32, icsk->icsk_rto, msecs_to_jiffies(icsk->icsk_user_timeout - elapsed));
+
+ return min_t(u32, icsk->icsk_rto, msecs_to_jiffies(remaining));
}
/**
@@ -209,7 +211,7 @@ static bool retransmits_timed_out(struct sock *sk,
(boundary - linear_backoff_thresh) * TCP_RTO_MAX;
timeout = jiffies_to_msecs(timeout);
}
- return (tcp_time_stamp(tcp_sk(sk)) - start_ts) >= timeout;
+ return (s32)(tcp_time_stamp(tcp_sk(sk)) - start_ts - timeout) >= 0;
}
/* A write timeout has occurred. Process the after effects. */
@@ -376,7 +378,7 @@ static void tcp_probe_timer(struct sock *sk)
return;
}
- if (icsk->icsk_probes_out > max_probes) {
+ if (icsk->icsk_probes_out >= max_probes) {
abort: tcp_write_err(sk);
} else {
/* Only send another probe if we didn't close things up. */
@@ -482,11 +484,12 @@ void tcp_retransmit_timer(struct sock *sk)
goto out_reset_timer;
}
+ __NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPTIMEOUTS);
if (tcp_write_timeout(sk))
goto out;
if (icsk->icsk_retransmits == 0) {
- int mib_idx;
+ int mib_idx = 0;
if (icsk->icsk_ca_state == TCP_CA_Recovery) {
if (tcp_is_sack(tp))
@@ -501,10 +504,9 @@ void tcp_retransmit_timer(struct sock *sk)
mib_idx = LINUX_MIB_TCPSACKFAILURES;
else
mib_idx = LINUX_MIB_TCPRENOFAILURES;
- } else {
- mib_idx = LINUX_MIB_TCPTIMEOUTS;
}
- __NET_INC_STATS(sock_net(sk), mib_idx);
+ if (mib_idx)
+ __NET_INC_STATS(sock_net(sk), mib_idx);
}
tcp_enter_loss(sk);
@@ -740,7 +742,7 @@ static enum hrtimer_restart tcp_compressed_ack_kick(struct hrtimer *timer)
bh_lock_sock(sk);
if (!sock_owned_by_user(sk)) {
- if (tp->compressed_ack)
+ if (tp->compressed_ack > TCP_FASTRETRANS_THRESH)
tcp_send_ack(sk);
} else {
if (!test_and_set_bit(TCP_DELACK_TIMER_DEFERRED,
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 6f8890c5bc7e..3fb0ed5e4789 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -380,15 +380,12 @@ static int compute_score(struct sock *sk, struct net *net,
ipv6_only_sock(sk))
return -1;
- score = (sk->sk_family == PF_INET) ? 2 : 1;
- inet = inet_sk(sk);
+ if (sk->sk_rcv_saddr != daddr)
+ return -1;
- if (inet->inet_rcv_saddr) {
- if (inet->inet_rcv_saddr != daddr)
- return -1;
- score += 4;
- }
+ score = (sk->sk_family == PF_INET) ? 2 : 1;
+ inet = inet_sk(sk);
if (inet->inet_daddr) {
if (inet->inet_daddr != saddr)
return -1;
@@ -464,65 +461,30 @@ struct sock *__udp4_lib_lookup(struct net *net, __be32 saddr,
__be16 sport, __be32 daddr, __be16 dport, int dif,
int sdif, struct udp_table *udptable, struct sk_buff *skb)
{
- struct sock *sk, *result;
+ struct sock *result;
unsigned short hnum = ntohs(dport);
- unsigned int hash2, slot2, slot = udp_hashfn(net, hnum, udptable->mask);
- struct udp_hslot *hslot2, *hslot = &udptable->hash[slot];
+ unsigned int hash2, slot2;
+ struct udp_hslot *hslot2;
bool exact_dif = udp_lib_exact_dif_match(net, skb);
- int score, badness;
- u32 hash = 0;
- if (hslot->count > 10) {
- hash2 = ipv4_portaddr_hash(net, daddr, hnum);
+ hash2 = ipv4_portaddr_hash(net, daddr, hnum);
+ slot2 = hash2 & udptable->mask;
+ hslot2 = &udptable->hash2[slot2];
+
+ result = udp4_lib_lookup2(net, saddr, sport,
+ daddr, hnum, dif, sdif,
+ exact_dif, hslot2, skb);
+ if (!result) {
+ hash2 = ipv4_portaddr_hash(net, htonl(INADDR_ANY), hnum);
slot2 = hash2 & udptable->mask;
hslot2 = &udptable->hash2[slot2];
- if (hslot->count < hslot2->count)
- goto begin;
result = udp4_lib_lookup2(net, saddr, sport,
- daddr, hnum, dif, sdif,
+ htonl(INADDR_ANY), hnum, dif, sdif,
exact_dif, hslot2, skb);
- if (!result) {
- unsigned int old_slot2 = slot2;
- hash2 = ipv4_portaddr_hash(net, htonl(INADDR_ANY), hnum);
- slot2 = hash2 & udptable->mask;
- /* avoid searching the same slot again. */
- if (unlikely(slot2 == old_slot2))
- return result;
-
- hslot2 = &udptable->hash2[slot2];
- if (hslot->count < hslot2->count)
- goto begin;
-
- result = udp4_lib_lookup2(net, saddr, sport,
- daddr, hnum, dif, sdif,
- exact_dif, hslot2, skb);
- }
- if (unlikely(IS_ERR(result)))
- return NULL;
- return result;
- }
-begin:
- result = NULL;
- badness = 0;
- sk_for_each_rcu(sk, &hslot->head) {
- score = compute_score(sk, net, saddr, sport,
- daddr, hnum, dif, sdif, exact_dif);
- if (score > badness) {
- if (sk->sk_reuseport) {
- hash = udp_ehashfn(net, daddr, hnum,
- saddr, sport);
- result = reuseport_select_sock(sk, hash, skb,
- sizeof(struct udphdr));
- if (unlikely(IS_ERR(result)))
- return NULL;
- if (result)
- return result;
- }
- result = sk;
- badness = score;
- }
}
+ if (unlikely(IS_ERR(result)))
+ return NULL;
return result;
}
EXPORT_SYMBOL_GPL(__udp4_lib_lookup);
@@ -587,7 +549,7 @@ static inline bool __udp_is_mcast_sock(struct net *net, struct sock *sk,
DEFINE_STATIC_KEY_FALSE(udp_encap_needed_key);
void udp_encap_enable(void)
{
- static_branch_enable(&udp_encap_needed_key);
+ static_branch_inc(&udp_encap_needed_key);
}
EXPORT_SYMBOL(udp_encap_enable);
@@ -2524,7 +2486,7 @@ void udp_destroy_sock(struct sock *sk)
encap_destroy(sk);
}
if (up->encap_enabled)
- static_branch_disable(&udp_encap_needed_key);
+ static_branch_dec(&udp_encap_needed_key);
}
}
diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c
index 0646d61f4fa8..64f9715173ac 100644
--- a/net/ipv4/udp_offload.c
+++ b/net/ipv4/udp_offload.c
@@ -13,6 +13,7 @@
#include <linux/skbuff.h>
#include <net/udp.h>
#include <net/protocol.h>
+#include <net/inet_common.h>
static struct sk_buff *__skb_udp_tunnel_segment(struct sk_buff *skb,
netdev_features_t features,
@@ -391,6 +392,8 @@ static struct sk_buff *udp_gro_receive_segment(struct list_head *head,
return NULL;
}
+INDIRECT_CALLABLE_DECLARE(struct sock *udp6_lib_lookup_skb(struct sk_buff *skb,
+ __be16 sport, __be16 dport));
struct sk_buff *udp_gro_receive(struct list_head *head, struct sk_buff *skb,
struct udphdr *uh, udp_lookup_t lookup)
{
@@ -402,7 +405,8 @@ struct sk_buff *udp_gro_receive(struct list_head *head, struct sk_buff *skb,
struct sock *sk;
rcu_read_lock();
- sk = (*lookup)(skb, uh->source, uh->dest);
+ sk = INDIRECT_CALL_INET(lookup, udp6_lib_lookup_skb,
+ udp4_lib_lookup_skb, skb, uh->source, uh->dest);
if (!sk)
goto out_unlock;
@@ -451,8 +455,8 @@ out_unlock:
}
EXPORT_SYMBOL(udp_gro_receive);
-static struct sk_buff *udp4_gro_receive(struct list_head *head,
- struct sk_buff *skb)
+INDIRECT_CALLABLE_SCOPE
+struct sk_buff *udp4_gro_receive(struct list_head *head, struct sk_buff *skb)
{
struct udphdr *uh = udp_gro_udphdr(skb);
@@ -502,7 +506,8 @@ int udp_gro_complete(struct sk_buff *skb, int nhoff,
uh->len = newlen;
rcu_read_lock();
- sk = (*lookup)(skb, uh->source, uh->dest);
+ sk = INDIRECT_CALL_INET(lookup, udp6_lib_lookup_skb,
+ udp4_lib_lookup_skb, skb, uh->source, uh->dest);
if (sk && udp_sk(sk)->gro_enabled) {
err = udp_gro_complete_segment(skb);
} else if (sk && udp_sk(sk)->gro_complete) {
@@ -525,7 +530,7 @@ int udp_gro_complete(struct sk_buff *skb, int nhoff,
}
EXPORT_SYMBOL(udp_gro_complete);
-static int udp4_gro_complete(struct sk_buff *skb, int nhoff)
+INDIRECT_CALLABLE_SCOPE int udp4_gro_complete(struct sk_buff *skb, int nhoff)
{
const struct iphdr *iph = ip_hdr(skb);
struct udphdr *uh = (struct udphdr *)(skb->data + nhoff);
diff --git a/net/ipv4/udp_tunnel.c b/net/ipv4/udp_tunnel.c
index d0c412fc56ad..be8b5b2157d8 100644
--- a/net/ipv4/udp_tunnel.c
+++ b/net/ipv4/udp_tunnel.c
@@ -20,6 +20,23 @@ int udp_sock_create4(struct net *net, struct udp_port_cfg *cfg,
if (err < 0)
goto error;
+ if (cfg->bind_ifindex) {
+ struct net_device *dev;
+
+ dev = dev_get_by_index(net, cfg->bind_ifindex);
+ if (!dev) {
+ err = -ENODEV;
+ goto error;
+ }
+
+ err = kernel_setsockopt(sock, SOL_SOCKET, SO_BINDTODEVICE,
+ dev->name, strlen(dev->name) + 1);
+ dev_put(dev);
+
+ if (err < 0)
+ goto error;
+ }
+
udp_addr.sin_family = AF_INET;
udp_addr.sin_addr = cfg->local_ip;
udp_addr.sin_port = cfg->local_udp_port;
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 63a808d5af15..521e471f1cf9 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -179,7 +179,7 @@ static void addrconf_dad_start(struct inet6_ifaddr *ifp);
static void addrconf_dad_work(struct work_struct *w);
static void addrconf_dad_completed(struct inet6_ifaddr *ifp, bool bump_id,
bool send_na);
-static void addrconf_dad_run(struct inet6_dev *idev);
+static void addrconf_dad_run(struct inet6_dev *idev, bool restart);
static void addrconf_rs_timer(struct timer_list *t);
static void __ipv6_ifa_notify(int event, struct inet6_ifaddr *ifa);
static void ipv6_ifa_notify(int event, struct inet6_ifaddr *ifa);
@@ -2820,7 +2820,7 @@ int addrconf_set_dstaddr(struct net *net, void __user *arg)
dev = __dev_get_by_name(net, p.name);
if (!dev)
goto err_exit;
- err = dev_open(dev);
+ err = dev_open(dev, NULL);
}
}
#endif
@@ -3439,6 +3439,7 @@ static int addrconf_notify(struct notifier_block *this, unsigned long event,
void *ptr)
{
struct net_device *dev = netdev_notifier_info_to_dev(ptr);
+ struct netdev_notifier_change_info *change_info;
struct netdev_notifier_changeupper_info *info;
struct inet6_dev *idev = __in6_dev_get(dev);
struct net *net = dev_net(dev);
@@ -3513,7 +3514,7 @@ static int addrconf_notify(struct notifier_block *this, unsigned long event,
break;
}
- if (idev) {
+ if (!IS_ERR_OR_NULL(idev)) {
if (idev->if_flags & IF_READY) {
/* device is already configured -
* but resend MLD reports, we might
@@ -3521,6 +3522,9 @@ static int addrconf_notify(struct notifier_block *this, unsigned long event,
* multicast snooping switches
*/
ipv6_mc_up(idev);
+ change_info = ptr;
+ if (change_info->flags_changed & IFF_NOARP)
+ addrconf_dad_run(idev, true);
rt6_sync_up(dev, RTNH_F_LINKDOWN);
break;
}
@@ -3555,7 +3559,7 @@ static int addrconf_notify(struct notifier_block *this, unsigned long event,
if (!IS_ERR_OR_NULL(idev)) {
if (run_pending)
- addrconf_dad_run(idev);
+ addrconf_dad_run(idev, false);
/* Device has an address by now */
rt6_sync_up(dev, RTNH_F_DEAD);
@@ -4173,16 +4177,19 @@ static void addrconf_dad_completed(struct inet6_ifaddr *ifp, bool bump_id,
addrconf_verify_rtnl();
}
-static void addrconf_dad_run(struct inet6_dev *idev)
+static void addrconf_dad_run(struct inet6_dev *idev, bool restart)
{
struct inet6_ifaddr *ifp;
read_lock_bh(&idev->lock);
list_for_each_entry(ifp, &idev->addr_list, if_list) {
spin_lock(&ifp->lock);
- if (ifp->flags & IFA_F_TENTATIVE &&
- ifp->state == INET6_IFADDR_STATE_DAD)
+ if ((ifp->flags & IFA_F_TENTATIVE &&
+ ifp->state == INET6_IFADDR_STATE_DAD) || restart) {
+ if (restart)
+ ifp->state = INET6_IFADDR_STATE_PREDAD;
addrconf_dad_kick(ifp);
+ }
spin_unlock(&ifp->lock);
}
read_unlock_bh(&idev->lock);
diff --git a/net/ipv6/inet6_hashtables.c b/net/ipv6/inet6_hashtables.c
index 5eeeba7181a1..f3515ebe9b3a 100644
--- a/net/ipv6/inet6_hashtables.c
+++ b/net/ipv6/inet6_hashtables.c
@@ -99,23 +99,16 @@ static inline int compute_score(struct sock *sk, struct net *net,
const int dif, const int sdif, bool exact_dif)
{
int score = -1;
- bool dev_match;
if (net_eq(sock_net(sk), net) && inet_sk(sk)->inet_num == hnum &&
sk->sk_family == PF_INET6) {
+ if (!ipv6_addr_equal(&sk->sk_v6_rcv_saddr, daddr))
+ return -1;
- score = 1;
- if (!ipv6_addr_any(&sk->sk_v6_rcv_saddr)) {
- if (!ipv6_addr_equal(&sk->sk_v6_rcv_saddr, daddr))
- return -1;
- score++;
- }
- dev_match = inet_sk_bound_dev_eq(net, sk->sk_bound_dev_if,
- dif, sdif);
- if (!dev_match)
+ if (!inet_sk_bound_dev_eq(net, sk->sk_bound_dev_if, dif, sdif))
return -1;
- score++;
+ score = 1;
if (sk->sk_incoming_cpu == raw_smp_processor_id())
score++;
}
@@ -164,26 +157,12 @@ struct sock *inet6_lookup_listener(struct net *net,
const __be16 sport, const struct in6_addr *daddr,
const unsigned short hnum, const int dif, const int sdif)
{
- unsigned int hash = inet_lhashfn(net, hnum);
- struct inet_listen_hashbucket *ilb = &hashinfo->listening_hash[hash];
- bool exact_dif = inet6_exact_dif_match(net, skb);
struct inet_listen_hashbucket *ilb2;
- struct sock *sk, *result = NULL;
- int score, hiscore = 0;
+ struct sock *result = NULL;
unsigned int hash2;
- u32 phash = 0;
-
- if (ilb->count <= 10 || !hashinfo->lhash2)
- goto port_lookup;
-
- /* Too many sk in the ilb bucket (which is hashed by port alone).
- * Try lhash2 (which is hashed by port and addr) instead.
- */
hash2 = ipv6_portaddr_hash(net, daddr, hnum);
ilb2 = inet_lhash2_bucket(hashinfo, hash2);
- if (ilb2->count > ilb->count)
- goto port_lookup;
result = inet6_lhash2_lookup(net, ilb2, skb, doff,
saddr, sport, daddr, hnum,
@@ -192,33 +171,12 @@ struct sock *inet6_lookup_listener(struct net *net,
goto done;
/* Lookup lhash2 with in6addr_any */
-
hash2 = ipv6_portaddr_hash(net, &in6addr_any, hnum);
ilb2 = inet_lhash2_bucket(hashinfo, hash2);
- if (ilb2->count > ilb->count)
- goto port_lookup;
result = inet6_lhash2_lookup(net, ilb2, skb, doff,
- saddr, sport, daddr, hnum,
+ saddr, sport, &in6addr_any, hnum,
dif, sdif);
- goto done;
-
-port_lookup:
- sk_for_each(sk, &ilb->head) {
- score = compute_score(sk, net, hnum, daddr, dif, sdif, exact_dif);
- if (score > hiscore) {
- if (sk->sk_reuseport) {
- phash = inet6_ehashfn(net, daddr, hnum,
- saddr, sport);
- result = reuseport_select_sock(sk, phash,
- skb, doff);
- if (result)
- goto done;
- }
- result = sk;
- hiscore = score;
- }
- }
done:
if (unlikely(IS_ERR(result)))
return NULL;
diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c
index 81b69bcee714..229e55c99021 100644
--- a/net/ipv6/ip6_gre.c
+++ b/net/ipv6/ip6_gre.c
@@ -1885,12 +1885,6 @@ static void ip6gre_tap_setup(struct net_device *dev)
netif_keep_dst(dev);
}
-bool is_ip6gretap_dev(const struct net_device *dev)
-{
- return dev->netdev_ops == &ip6gre_tap_netdev_ops;
-}
-EXPORT_SYMBOL_GPL(is_ip6gretap_dev);
-
static bool ip6gre_netlink_encap_parms(struct nlattr *data[],
struct ip_tunnel_encap *ipencap)
{
diff --git a/net/ipv6/ip6_input.c b/net/ipv6/ip6_input.c
index 3c06cc9e9b79..c7ed2b6d5a1d 100644
--- a/net/ipv6/ip6_input.c
+++ b/net/ipv6/ip6_input.c
@@ -95,7 +95,7 @@ static void ip6_list_rcv_finish(struct net *net, struct sock *sk,
list_for_each_entry_safe(skb, next, head, list) {
struct dst_entry *dst;
- list_del(&skb->list);
+ skb_list_del_init(skb);
/* if ingress device is enslaved to an L3 master device pass the
* skb to its handler for processing
*/
@@ -296,7 +296,7 @@ void ipv6_list_rcv(struct list_head *head, struct packet_type *pt,
struct net_device *dev = skb->dev;
struct net *net = dev_net(dev);
- list_del(&skb->list);
+ skb_list_del_init(skb);
skb = ip6_rcv_core(skb, dev, net);
if (skb == NULL)
continue;
diff --git a/net/ipv6/ip6_offload.c b/net/ipv6/ip6_offload.c
index 70f525c33cb6..5c045691c302 100644
--- a/net/ipv6/ip6_offload.c
+++ b/net/ipv6/ip6_offload.c
@@ -20,6 +20,23 @@
#include "ip6_offload.h"
+/* All GRO functions are always builtin, except UDP over ipv6, which lays in
+ * ipv6 module, as it depends on UDPv6 lookup function, so we need special care
+ * when ipv6 is built as a module
+ */
+#if IS_BUILTIN(CONFIG_IPV6)
+#define INDIRECT_CALL_L4(f, f2, f1, ...) INDIRECT_CALL_2(f, f2, f1, __VA_ARGS__)
+#else
+#define INDIRECT_CALL_L4(f, f2, f1, ...) INDIRECT_CALL_1(f, f2, __VA_ARGS__)
+#endif
+
+#define indirect_call_gro_receive_l4(f2, f1, cb, head, skb) \
+({ \
+ unlikely(gro_recursion_inc_test(skb)) ? \
+ NAPI_GRO_CB(skb)->flush |= 1, NULL : \
+ INDIRECT_CALL_L4(cb, f2, f1, head, skb); \
+})
+
static int ipv6_gso_pull_exthdrs(struct sk_buff *skb, int proto)
{
const struct net_offload *ops = NULL;
@@ -164,8 +181,12 @@ static int ipv6_exthdrs_len(struct ipv6hdr *iph,
return len;
}
-static struct sk_buff *ipv6_gro_receive(struct list_head *head,
- struct sk_buff *skb)
+INDIRECT_CALLABLE_DECLARE(struct sk_buff *tcp6_gro_receive(struct list_head *,
+ struct sk_buff *));
+INDIRECT_CALLABLE_DECLARE(struct sk_buff *udp6_gro_receive(struct list_head *,
+ struct sk_buff *));
+INDIRECT_CALLABLE_SCOPE struct sk_buff *ipv6_gro_receive(struct list_head *head,
+ struct sk_buff *skb)
{
const struct net_offload *ops;
struct sk_buff *pp = NULL;
@@ -260,7 +281,8 @@ not_same_flow:
skb_gro_postpull_rcsum(skb, iph, nlen);
- pp = call_gro_receive(ops->callbacks.gro_receive, head, skb);
+ pp = indirect_call_gro_receive_l4(tcp6_gro_receive, udp6_gro_receive,
+ ops->callbacks.gro_receive, head, skb);
out_unlock:
rcu_read_unlock();
@@ -301,7 +323,9 @@ static struct sk_buff *ip4ip6_gro_receive(struct list_head *head,
return inet_gro_receive(head, skb);
}
-static int ipv6_gro_complete(struct sk_buff *skb, int nhoff)
+INDIRECT_CALLABLE_DECLARE(int tcp6_gro_complete(struct sk_buff *, int));
+INDIRECT_CALLABLE_DECLARE(int udp6_gro_complete(struct sk_buff *, int));
+INDIRECT_CALLABLE_SCOPE int ipv6_gro_complete(struct sk_buff *skb, int nhoff)
{
const struct net_offload *ops;
struct ipv6hdr *iph = (struct ipv6hdr *)(skb->data + nhoff);
@@ -320,7 +344,8 @@ static int ipv6_gro_complete(struct sk_buff *skb, int nhoff)
if (WARN_ON(!ops || !ops->callbacks.gro_complete))
goto out_unlock;
- err = ops->callbacks.gro_complete(skb, nhoff);
+ err = INDIRECT_CALL_L4(ops->callbacks.gro_complete, tcp6_gro_complete,
+ udp6_gro_complete, skb, nhoff);
out_unlock:
rcu_read_unlock();
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index 89e0d5118afe..9d55ee33b7f9 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -195,37 +195,37 @@ int ip6_xmit(const struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6,
const struct ipv6_pinfo *np = inet6_sk(sk);
struct in6_addr *first_hop = &fl6->daddr;
struct dst_entry *dst = skb_dst(skb);
+ unsigned int head_room;
struct ipv6hdr *hdr;
u8 proto = fl6->flowi6_proto;
int seg_len = skb->len;
int hlimit = -1;
u32 mtu;
- if (opt) {
- unsigned int head_room;
+ head_room = sizeof(struct ipv6hdr) + LL_RESERVED_SPACE(dst->dev);
+ if (opt)
+ head_room += opt->opt_nflen + opt->opt_flen;
- /* First: exthdrs may take lots of space (~8K for now)
- MAX_HEADER is not enough.
- */
- head_room = opt->opt_nflen + opt->opt_flen;
- seg_len += head_room;
- head_room += sizeof(struct ipv6hdr) + LL_RESERVED_SPACE(dst->dev);
-
- if (skb_headroom(skb) < head_room) {
- struct sk_buff *skb2 = skb_realloc_headroom(skb, head_room);
- if (!skb2) {
- IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
- IPSTATS_MIB_OUTDISCARDS);
- kfree_skb(skb);
- return -ENOBUFS;
- }
- if (skb->sk)
- skb_set_owner_w(skb2, skb->sk);
- consume_skb(skb);
- skb = skb2;
+ if (unlikely(skb_headroom(skb) < head_room)) {
+ struct sk_buff *skb2 = skb_realloc_headroom(skb, head_room);
+ if (!skb2) {
+ IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
+ IPSTATS_MIB_OUTDISCARDS);
+ kfree_skb(skb);
+ return -ENOBUFS;
}
+ if (skb->sk)
+ skb_set_owner_w(skb2, skb->sk);
+ consume_skb(skb);
+ skb = skb2;
+ }
+
+ if (opt) {
+ seg_len += opt->opt_nflen + opt->opt_flen;
+
if (opt->opt_flen)
ipv6_push_frag_opts(skb, opt, &proto);
+
if (opt->opt_nflen)
ipv6_push_nfrag_opts(skb, opt, &proto, &first_hop,
&fl6->saddr);
@@ -378,6 +378,13 @@ static inline int ip6_forward_finish(struct net *net, struct sock *sk,
__IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTFORWDATAGRAMS);
__IP6_ADD_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTOCTETS, skb->len);
+#ifdef CONFIG_NET_SWITCHDEV
+ if (skb->offload_l3_fwd_mark) {
+ consume_skb(skb);
+ return 0;
+ }
+#endif
+
return dst_output(net, sk, skb);
}
@@ -1245,6 +1252,7 @@ static int __ip6_append_data(struct sock *sk,
{
struct sk_buff *skb, *skb_prev = NULL;
unsigned int maxfraglen, fragheaderlen, mtu, orig_mtu, pmtu;
+ struct ubuf_info *uarg = NULL;
int exthdrlen = 0;
int dst_exthdrlen = 0;
int hh_len;
@@ -1257,7 +1265,7 @@ static int __ip6_append_data(struct sock *sk,
int csummode = CHECKSUM_NONE;
unsigned int maxnonfragsize, headersize;
unsigned int wmem_alloc_delta = 0;
- bool paged;
+ bool paged, extra_uref;
skb = skb_peek_tail(queue);
if (!skb) {
@@ -1322,6 +1330,20 @@ emsgsize:
rt->dst.dev->features & (NETIF_F_IPV6_CSUM | NETIF_F_HW_CSUM))
csummode = CHECKSUM_PARTIAL;
+ if (flags & MSG_ZEROCOPY && length && sock_flag(sk, SOCK_ZEROCOPY)) {
+ uarg = sock_zerocopy_realloc(sk, length, skb_zcopy(skb));
+ if (!uarg)
+ return -ENOBUFS;
+ extra_uref = true;
+ if (rt->dst.dev->features & NETIF_F_SG &&
+ csummode == CHECKSUM_PARTIAL) {
+ paged = true;
+ } else {
+ uarg->zerocopy = 0;
+ skb_zcopy_set(skb, uarg, &extra_uref);
+ }
+ }
+
/*
* Let's try using as much space as possible.
* Use MTU if total length of the message fits into the MTU.
@@ -1354,7 +1376,7 @@ emsgsize:
unsigned int fraglen;
unsigned int fraggap;
unsigned int alloclen;
- unsigned int pagedlen = 0;
+ unsigned int pagedlen;
alloc_new_skb:
/* There's no room in the current skb */
if (skb)
@@ -1378,6 +1400,7 @@ alloc_new_skb:
if (datalen > (cork->length <= mtu && !(cork->flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - fragheaderlen)
datalen = maxfraglen - fragheaderlen - rt->dst.trailer_len;
fraglen = datalen + fragheaderlen;
+ pagedlen = 0;
if ((flags & MSG_MORE) &&
!(rt->dst.dev->features&NETIF_F_SG))
@@ -1439,12 +1462,6 @@ alloc_new_skb:
skb_reserve(skb, hh_len + sizeof(struct frag_hdr) +
dst_exthdrlen);
- /* Only the initial fragment is time stamped */
- skb_shinfo(skb)->tx_flags = cork->tx_flags;
- cork->tx_flags = 0;
- skb_shinfo(skb)->tskey = tskey;
- tskey = 0;
-
/*
* Find where to start putting bytes
*/
@@ -1476,6 +1493,13 @@ alloc_new_skb:
exthdrlen = 0;
dst_exthdrlen = 0;
+ /* Only the initial fragment is time stamped */
+ skb_shinfo(skb)->tx_flags = cork->tx_flags;
+ cork->tx_flags = 0;
+ skb_shinfo(skb)->tskey = tskey;
+ tskey = 0;
+ skb_zcopy_set(skb, uarg, &extra_uref);
+
if ((flags & MSG_CONFIRM) && !skb_prev)
skb_set_dst_pending_confirm(skb, 1);
@@ -1505,7 +1529,7 @@ alloc_new_skb:
err = -EFAULT;
goto error;
}
- } else {
+ } else if (!uarg || !uarg->zerocopy) {
int i = skb_shinfo(skb)->nr_frags;
err = -ENOMEM;
@@ -1535,6 +1559,10 @@ alloc_new_skb:
skb->data_len += copy;
skb->truesize += copy;
wmem_alloc_delta += copy;
+ } else {
+ err = skb_zerocopy_iter_dgram(skb, from, copy);
+ if (err < 0)
+ goto error;
}
offset += copy;
length -= copy;
@@ -1547,6 +1575,8 @@ alloc_new_skb:
error_efault:
err = -EFAULT;
error:
+ if (uarg)
+ sock_zerocopy_put_abort(uarg, extra_uref);
cork->length -= length;
IP6_INC_STATS(sock_net(sk), rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS);
refcount_add(wmem_alloc_delta, &sk->sk_wmem_alloc);
diff --git a/net/ipv6/ip6_udp_tunnel.c b/net/ipv6/ip6_udp_tunnel.c
index b283f293ee4a..3965d5396b0a 100644
--- a/net/ipv6/ip6_udp_tunnel.c
+++ b/net/ipv6/ip6_udp_tunnel.c
@@ -31,6 +31,22 @@ int udp_sock_create6(struct net *net, struct udp_port_cfg *cfg,
if (err < 0)
goto error;
}
+ if (cfg->bind_ifindex) {
+ struct net_device *dev;
+
+ dev = dev_get_by_index(net, cfg->bind_ifindex);
+ if (!dev) {
+ err = -ENODEV;
+ goto error;
+ }
+
+ err = kernel_setsockopt(sock, SOL_SOCKET, SO_BINDTODEVICE,
+ dev->name, strlen(dev->name) + 1);
+ dev_put(dev);
+
+ if (err < 0)
+ goto error;
+ }
udp6_addr.sin6_family = AF_INET6;
memcpy(&udp6_addr.sin6_addr, &cfg->local_ip6,
diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c
index e2ea691e42c6..34b8a90e6be2 100644
--- a/net/ipv6/ip6mr.c
+++ b/net/ipv6/ip6mr.c
@@ -655,7 +655,7 @@ static struct net_device *ip6mr_reg_vif(struct net *net, struct mr_table *mrt)
return NULL;
}
- if (dev_open(dev))
+ if (dev_open(dev, NULL))
goto failure;
dev_hold(dev);
@@ -1968,7 +1968,7 @@ static inline int ip6mr_forward2_finish(struct net *net, struct sock *sk, struct
*/
static int ip6mr_forward2(struct net *net, struct mr_table *mrt,
- struct sk_buff *skb, struct mfc6_cache *c, int vifi)
+ struct sk_buff *skb, int vifi)
{
struct ipv6hdr *ipv6h;
struct vif_device *vif = &mrt->vif_table[vifi];
@@ -2134,15 +2134,14 @@ forward:
if (psend != -1) {
struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
if (skb2)
- ip6mr_forward2(net, mrt, skb2,
- c, psend);
+ ip6mr_forward2(net, mrt, skb2, psend);
}
psend = ct;
}
}
last_forward:
if (psend != -1) {
- ip6mr_forward2(net, mrt, skb, c, psend);
+ ip6mr_forward2(net, mrt, skb, psend);
return;
}
diff --git a/net/ipv6/netfilter.c b/net/ipv6/netfilter.c
index 5ae8e1c51079..8b075f0bc351 100644
--- a/net/ipv6/netfilter.c
+++ b/net/ipv6/netfilter.c
@@ -24,7 +24,8 @@ int ip6_route_me_harder(struct net *net, struct sk_buff *skb)
unsigned int hh_len;
struct dst_entry *dst;
struct flowi6 fl6 = {
- .flowi6_oif = sk ? sk->sk_bound_dev_if : 0,
+ .flowi6_oif = sk && sk->sk_bound_dev_if ? sk->sk_bound_dev_if :
+ rt6_need_strict(&iph->daddr) ? skb_dst(skb)->dev->ifindex : 0,
.flowi6_mark = skb->mark,
.flowi6_uid = sock_net_uid(net, sk),
.daddr = iph->daddr,
diff --git a/net/ipv6/netfilter/ip6t_MASQUERADE.c b/net/ipv6/netfilter/ip6t_MASQUERADE.c
index 491f808e356a..29c7f1915a96 100644
--- a/net/ipv6/netfilter/ip6t_MASQUERADE.c
+++ b/net/ipv6/netfilter/ip6t_MASQUERADE.c
@@ -58,8 +58,12 @@ static int __init masquerade_tg6_init(void)
int err;
err = xt_register_target(&masquerade_tg6_reg);
- if (err == 0)
- nf_nat_masquerade_ipv6_register_notifier();
+ if (err)
+ return err;
+
+ err = nf_nat_masquerade_ipv6_register_notifier();
+ if (err)
+ xt_unregister_target(&masquerade_tg6_reg);
return err;
}
diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c
index d219979c3e52..181da2c40f9a 100644
--- a/net/ipv6/netfilter/nf_conntrack_reasm.c
+++ b/net/ipv6/netfilter/nf_conntrack_reasm.c
@@ -341,7 +341,7 @@ static bool
nf_ct_frag6_reasm(struct frag_queue *fq, struct sk_buff *prev, struct net_device *dev)
{
struct sk_buff *fp, *head = fq->q.fragments;
- int payload_len;
+ int payload_len, delta;
u8 ecn;
inet_frag_kill(&fq->q);
@@ -363,10 +363,16 @@ nf_ct_frag6_reasm(struct frag_queue *fq, struct sk_buff *prev, struct net_devic
return false;
}
+ delta = - head->truesize;
+
/* Head of list must not be cloned. */
if (skb_unclone(head, GFP_ATOMIC))
return false;
+ delta += head->truesize;
+ if (delta)
+ add_frag_mem_limit(fq->q.net, delta);
+
/* If the first fragment is fragmented itself, we split
* it to two chunks: the first with data and paged part
* and the second, holding only fragments. */
diff --git a/net/ipv6/netfilter/nf_nat_masquerade_ipv6.c b/net/ipv6/netfilter/nf_nat_masquerade_ipv6.c
index 3e4bf2286abe..0ad0da5a2600 100644
--- a/net/ipv6/netfilter/nf_nat_masquerade_ipv6.c
+++ b/net/ipv6/netfilter/nf_nat_masquerade_ipv6.c
@@ -132,8 +132,8 @@ static void iterate_cleanup_work(struct work_struct *work)
* of ipv6 addresses being deleted), we also need to add an upper
* limit to the number of queued work items.
*/
-static int masq_inet_event(struct notifier_block *this,
- unsigned long event, void *ptr)
+static int masq_inet6_event(struct notifier_block *this,
+ unsigned long event, void *ptr)
{
struct inet6_ifaddr *ifa = ptr;
const struct net_device *dev;
@@ -171,30 +171,53 @@ static int masq_inet_event(struct notifier_block *this,
return NOTIFY_DONE;
}
-static struct notifier_block masq_inet_notifier = {
- .notifier_call = masq_inet_event,
+static struct notifier_block masq_inet6_notifier = {
+ .notifier_call = masq_inet6_event,
};
-static atomic_t masquerade_notifier_refcount = ATOMIC_INIT(0);
+static int masq_refcnt;
+static DEFINE_MUTEX(masq_mutex);
-void nf_nat_masquerade_ipv6_register_notifier(void)
+int nf_nat_masquerade_ipv6_register_notifier(void)
{
+ int ret = 0;
+
+ mutex_lock(&masq_mutex);
/* check if the notifier is already set */
- if (atomic_inc_return(&masquerade_notifier_refcount) > 1)
- return;
+ if (++masq_refcnt > 1)
+ goto out_unlock;
+
+ ret = register_netdevice_notifier(&masq_dev_notifier);
+ if (ret)
+ goto err_dec;
+
+ ret = register_inet6addr_notifier(&masq_inet6_notifier);
+ if (ret)
+ goto err_unregister;
- register_netdevice_notifier(&masq_dev_notifier);
- register_inet6addr_notifier(&masq_inet_notifier);
+ mutex_unlock(&masq_mutex);
+ return ret;
+
+err_unregister:
+ unregister_netdevice_notifier(&masq_dev_notifier);
+err_dec:
+ masq_refcnt--;
+out_unlock:
+ mutex_unlock(&masq_mutex);
+ return ret;
}
EXPORT_SYMBOL_GPL(nf_nat_masquerade_ipv6_register_notifier);
void nf_nat_masquerade_ipv6_unregister_notifier(void)
{
+ mutex_lock(&masq_mutex);
/* check if the notifier still has clients */
- if (atomic_dec_return(&masquerade_notifier_refcount) > 0)
- return;
+ if (--masq_refcnt > 0)
+ goto out_unlock;
- unregister_inet6addr_notifier(&masq_inet_notifier);
+ unregister_inet6addr_notifier(&masq_inet6_notifier);
unregister_netdevice_notifier(&masq_dev_notifier);
+out_unlock:
+ mutex_unlock(&masq_mutex);
}
EXPORT_SYMBOL_GPL(nf_nat_masquerade_ipv6_unregister_notifier);
diff --git a/net/ipv6/netfilter/nft_masq_ipv6.c b/net/ipv6/netfilter/nft_masq_ipv6.c
index dd0122f3cffe..e06c82e9dfcd 100644
--- a/net/ipv6/netfilter/nft_masq_ipv6.c
+++ b/net/ipv6/netfilter/nft_masq_ipv6.c
@@ -70,7 +70,9 @@ static int __init nft_masq_ipv6_module_init(void)
if (ret < 0)
return ret;
- nf_nat_masquerade_ipv6_register_notifier();
+ ret = nf_nat_masquerade_ipv6_register_notifier();
+ if (ret)
+ nft_unregister_expr(&nft_masq_ipv6_type);
return ret;
}
diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c
index 5c3c92713096..aa26c45486d9 100644
--- a/net/ipv6/reassembly.c
+++ b/net/ipv6/reassembly.c
@@ -281,7 +281,7 @@ static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *prev,
{
struct net *net = container_of(fq->q.net, struct net, ipv6.frags);
struct sk_buff *fp, *head = fq->q.fragments;
- int payload_len;
+ int payload_len, delta;
unsigned int nhoff;
int sum_truesize;
u8 ecn;
@@ -322,10 +322,16 @@ static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *prev,
if (payload_len > IPV6_MAXPLEN)
goto out_oversize;
+ delta = - head->truesize;
+
/* Head of list must not be cloned. */
if (skb_unclone(head, GFP_ATOMIC))
goto out_oom;
+ delta += head->truesize;
+ if (delta)
+ add_frag_mem_limit(fq->q.net, delta);
+
/* If the first fragment is fragmented itself, we split
* it to two chunks: the first with data and paged part
* and the second, holding only fragments. */
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index b2447b7c7303..194bc162866d 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -2232,8 +2232,7 @@ static void ip6_link_failure(struct sk_buff *skb)
if (rt) {
rcu_read_lock();
if (rt->rt6i_flags & RTF_CACHE) {
- if (dst_hold_safe(&rt->dst))
- rt6_remove_exception_rt(rt);
+ rt6_remove_exception_rt(rt);
} else {
struct fib6_info *from;
struct fib6_node *fn;
@@ -2360,10 +2359,13 @@ EXPORT_SYMBOL_GPL(ip6_update_pmtu);
void ip6_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, __be32 mtu)
{
+ int oif = sk->sk_bound_dev_if;
struct dst_entry *dst;
- ip6_update_pmtu(skb, sock_net(sk), mtu,
- sk->sk_bound_dev_if, sk->sk_mark, sk->sk_uid);
+ if (!oif && skb->dev)
+ oif = l3mdev_master_ifindex(skb->dev);
+
+ ip6_update_pmtu(skb, sock_net(sk), mtu, oif, sk->sk_mark, sk->sk_uid);
dst = __sk_dst_get(sk);
if (!dst || !dst->obsolete ||
@@ -3215,8 +3217,8 @@ static int ip6_del_cached_rt(struct rt6_info *rt, struct fib6_config *cfg)
if (cfg->fc_flags & RTF_GATEWAY &&
!ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway))
goto out;
- if (dst_hold_safe(&rt->dst))
- rc = rt6_remove_exception_rt(rt);
+
+ rc = rt6_remove_exception_rt(rt);
out:
return rc;
}
diff --git a/net/ipv6/seg6_iptunnel.c b/net/ipv6/seg6_iptunnel.c
index a8854dd3e9c5..8181ee7e1e27 100644
--- a/net/ipv6/seg6_iptunnel.c
+++ b/net/ipv6/seg6_iptunnel.c
@@ -347,6 +347,7 @@ static int seg6_output(struct net *net, struct sock *sk, struct sk_buff *skb)
struct ipv6hdr *hdr = ipv6_hdr(skb);
struct flowi6 fl6;
+ memset(&fl6, 0, sizeof(fl6));
fl6.daddr = hdr->daddr;
fl6.saddr = hdr->saddr;
fl6.flowlabel = ip6_flowinfo(hdr);
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index a3f559162521..b81eb7cb815e 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -737,6 +737,7 @@ static void tcp_v6_init_req(struct request_sock *req,
const struct sock *sk_listener,
struct sk_buff *skb)
{
+ bool l3_slave = ipv6_l3mdev_skb(TCP_SKB_CB(skb)->header.h6.flags);
struct inet_request_sock *ireq = inet_rsk(req);
const struct ipv6_pinfo *np = inet6_sk(sk_listener);
@@ -744,7 +745,7 @@ static void tcp_v6_init_req(struct request_sock *req,
ireq->ir_v6_loc_addr = ipv6_hdr(skb)->daddr;
/* So that link locals have meaning */
- if (!sk_listener->sk_bound_dev_if &&
+ if ((!sk_listener->sk_bound_dev_if || l3_slave) &&
ipv6_addr_type(&ireq->ir_v6_rmt_addr) & IPV6_ADDR_LINKLOCAL)
ireq->ir_iif = tcp_v6_iif(skb);
diff --git a/net/ipv6/tcpv6_offload.c b/net/ipv6/tcpv6_offload.c
index e72947c99454..3179c425d7ff 100644
--- a/net/ipv6/tcpv6_offload.c
+++ b/net/ipv6/tcpv6_offload.c
@@ -9,14 +9,15 @@
*
* TCPv6 GSO/GRO support
*/
+#include <linux/indirect_call_wrapper.h>
#include <linux/skbuff.h>
#include <net/protocol.h>
#include <net/tcp.h>
#include <net/ip6_checksum.h>
#include "ip6_offload.h"
-static struct sk_buff *tcp6_gro_receive(struct list_head *head,
- struct sk_buff *skb)
+INDIRECT_CALLABLE_SCOPE
+struct sk_buff *tcp6_gro_receive(struct list_head *head, struct sk_buff *skb)
{
/* Don't bother verifying checksum if we're going to flush anyway. */
if (!NAPI_GRO_CB(skb)->flush &&
@@ -29,7 +30,7 @@ static struct sk_buff *tcp6_gro_receive(struct list_head *head,
return tcp_gro_receive(head, skb);
}
-static int tcp6_gro_complete(struct sk_buff *skb, int thoff)
+INDIRECT_CALLABLE_SCOPE int tcp6_gro_complete(struct sk_buff *skb, int thoff)
{
const struct ipv6hdr *iph = ipv6_hdr(skb);
struct tcphdr *th = tcp_hdr(skb);
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index 0c0cb1611aef..9cbf363172bd 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -125,6 +125,9 @@ static int compute_score(struct sock *sk, struct net *net,
sk->sk_family != PF_INET6)
return -1;
+ if (!ipv6_addr_equal(&sk->sk_v6_rcv_saddr, daddr))
+ return -1;
+
score = 0;
inet = inet_sk(sk);
@@ -134,12 +137,6 @@ static int compute_score(struct sock *sk, struct net *net,
score++;
}
- if (!ipv6_addr_any(&sk->sk_v6_rcv_saddr)) {
- if (!ipv6_addr_equal(&sk->sk_v6_rcv_saddr, daddr))
- return -1;
- score++;
- }
-
if (!ipv6_addr_any(&sk->sk_v6_daddr)) {
if (!ipv6_addr_equal(&sk->sk_v6_daddr, saddr))
return -1;
@@ -197,66 +194,32 @@ struct sock *__udp6_lib_lookup(struct net *net,
int dif, int sdif, struct udp_table *udptable,
struct sk_buff *skb)
{
- struct sock *sk, *result;
unsigned short hnum = ntohs(dport);
- unsigned int hash2, slot2, slot = udp_hashfn(net, hnum, udptable->mask);
- struct udp_hslot *hslot2, *hslot = &udptable->hash[slot];
+ unsigned int hash2, slot2;
+ struct udp_hslot *hslot2;
+ struct sock *result;
bool exact_dif = udp6_lib_exact_dif_match(net, skb);
- int score, badness;
- u32 hash = 0;
- if (hslot->count > 10) {
- hash2 = ipv6_portaddr_hash(net, daddr, hnum);
+ hash2 = ipv6_portaddr_hash(net, daddr, hnum);
+ slot2 = hash2 & udptable->mask;
+ hslot2 = &udptable->hash2[slot2];
+
+ result = udp6_lib_lookup2(net, saddr, sport,
+ daddr, hnum, dif, sdif, exact_dif,
+ hslot2, skb);
+ if (!result) {
+ hash2 = ipv6_portaddr_hash(net, &in6addr_any, hnum);
slot2 = hash2 & udptable->mask;
+
hslot2 = &udptable->hash2[slot2];
- if (hslot->count < hslot2->count)
- goto begin;
result = udp6_lib_lookup2(net, saddr, sport,
- daddr, hnum, dif, sdif, exact_dif,
- hslot2, skb);
- if (!result) {
- unsigned int old_slot2 = slot2;
- hash2 = ipv6_portaddr_hash(net, &in6addr_any, hnum);
- slot2 = hash2 & udptable->mask;
- /* avoid searching the same slot again. */
- if (unlikely(slot2 == old_slot2))
- return result;
-
- hslot2 = &udptable->hash2[slot2];
- if (hslot->count < hslot2->count)
- goto begin;
-
- result = udp6_lib_lookup2(net, saddr, sport,
- daddr, hnum, dif, sdif,
- exact_dif, hslot2,
- skb);
- }
- if (unlikely(IS_ERR(result)))
- return NULL;
- return result;
- }
-begin:
- result = NULL;
- badness = -1;
- sk_for_each_rcu(sk, &hslot->head) {
- score = compute_score(sk, net, saddr, sport, daddr, hnum, dif,
- sdif, exact_dif);
- if (score > badness) {
- if (sk->sk_reuseport) {
- hash = udp6_ehashfn(net, daddr, hnum,
- saddr, sport);
- result = reuseport_select_sock(sk, hash, skb,
- sizeof(struct udphdr));
- if (unlikely(IS_ERR(result)))
- return NULL;
- if (result)
- return result;
- }
- result = sk;
- badness = score;
- }
+ &in6addr_any, hnum, dif, sdif,
+ exact_dif, hslot2,
+ skb);
}
+ if (unlikely(IS_ERR(result)))
+ return NULL;
return result;
}
EXPORT_SYMBOL_GPL(__udp6_lib_lookup);
@@ -326,6 +289,7 @@ int udpv6_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
int err;
int is_udplite = IS_UDPLITE(sk);
bool checksum_valid = false;
+ struct udp_mib *mib;
int is_udp4;
if (flags & MSG_ERRQUEUE)
@@ -349,6 +313,7 @@ try_again:
msg->msg_flags |= MSG_TRUNC;
is_udp4 = (skb->protocol == htons(ETH_P_IP));
+ mib = __UDPX_MIB(sk, is_udp4);
/*
* If checksum is needed at all, try to do it while copying the
@@ -377,24 +342,13 @@ try_again:
if (unlikely(err)) {
if (!peeked) {
atomic_inc(&sk->sk_drops);
- if (is_udp4)
- UDP_INC_STATS(sock_net(sk), UDP_MIB_INERRORS,
- is_udplite);
- else
- UDP6_INC_STATS(sock_net(sk), UDP_MIB_INERRORS,
- is_udplite);
+ SNMP_INC_STATS(mib, UDP_MIB_INERRORS);
}
kfree_skb(skb);
return err;
}
- if (!peeked) {
- if (is_udp4)
- UDP_INC_STATS(sock_net(sk), UDP_MIB_INDATAGRAMS,
- is_udplite);
- else
- UDP6_INC_STATS(sock_net(sk), UDP_MIB_INDATAGRAMS,
- is_udplite);
- }
+ if (!peeked)
+ SNMP_INC_STATS(mib, UDP_MIB_INDATAGRAMS);
sock_recv_ts_and_drops(msg, sk, skb);
@@ -443,17 +397,8 @@ try_again:
csum_copy_err:
if (!__sk_queue_drop_skb(sk, &udp_sk(sk)->reader_queue, skb, flags,
udp_skb_destructor)) {
- if (is_udp4) {
- UDP_INC_STATS(sock_net(sk),
- UDP_MIB_CSUMERRORS, is_udplite);
- UDP_INC_STATS(sock_net(sk),
- UDP_MIB_INERRORS, is_udplite);
- } else {
- UDP6_INC_STATS(sock_net(sk),
- UDP_MIB_CSUMERRORS, is_udplite);
- UDP6_INC_STATS(sock_net(sk),
- UDP_MIB_INERRORS, is_udplite);
- }
+ SNMP_INC_STATS(mib, UDP_MIB_CSUMERRORS);
+ SNMP_INC_STATS(mib, UDP_MIB_INERRORS);
}
kfree_skb(skb);
@@ -466,7 +411,7 @@ csum_copy_err:
DEFINE_STATIC_KEY_FALSE(udpv6_encap_needed_key);
void udpv6_encap_enable(void)
{
- static_branch_enable(&udpv6_encap_needed_key);
+ static_branch_inc(&udpv6_encap_needed_key);
}
EXPORT_SYMBOL(udpv6_encap_enable);
@@ -1597,7 +1542,7 @@ void udpv6_destroy_sock(struct sock *sk)
encap_destroy(sk);
}
if (up->encap_enabled)
- static_branch_disable(&udpv6_encap_needed_key);
+ static_branch_dec(&udpv6_encap_needed_key);
}
inet6_destroy_sock(sk);
diff --git a/net/ipv6/udp_offload.c b/net/ipv6/udp_offload.c
index 828b2457f97b..83b11d0ac091 100644
--- a/net/ipv6/udp_offload.c
+++ b/net/ipv6/udp_offload.c
@@ -11,6 +11,7 @@
*/
#include <linux/skbuff.h>
#include <linux/netdevice.h>
+#include <linux/indirect_call_wrapper.h>
#include <net/protocol.h>
#include <net/ipv6.h>
#include <net/udp.h>
@@ -114,8 +115,8 @@ out:
return segs;
}
-static struct sk_buff *udp6_gro_receive(struct list_head *head,
- struct sk_buff *skb)
+INDIRECT_CALLABLE_SCOPE
+struct sk_buff *udp6_gro_receive(struct list_head *head, struct sk_buff *skb)
{
struct udphdr *uh = udp_gro_udphdr(skb);
@@ -142,7 +143,7 @@ flush:
return NULL;
}
-static int udp6_gro_complete(struct sk_buff *skb, int nhoff)
+INDIRECT_CALLABLE_SCOPE int udp6_gro_complete(struct sk_buff *skb, int nhoff)
{
const struct ipv6hdr *ipv6h = ipv6_hdr(skb);
struct udphdr *uh = (struct udphdr *)(skb->data + nhoff);
diff --git a/net/iucv/af_iucv.c b/net/iucv/af_iucv.c
index 0bed4cc20603..78ea5a739d10 100644
--- a/net/iucv/af_iucv.c
+++ b/net/iucv/af_iucv.c
@@ -1873,30 +1873,26 @@ static void iucv_callback_txdone(struct iucv_path *path,
struct sock *sk = path->private;
struct sk_buff *this = NULL;
struct sk_buff_head *list = &iucv_sk(sk)->send_skb_q;
- struct sk_buff *list_skb = list->next;
+ struct sk_buff *list_skb;
unsigned long flags;
bh_lock_sock(sk);
- if (!skb_queue_empty(list)) {
- spin_lock_irqsave(&list->lock, flags);
- while (list_skb != (struct sk_buff *)list) {
- if (msg->tag == IUCV_SKB_CB(list_skb)->tag) {
- this = list_skb;
- break;
- }
- list_skb = list_skb->next;
+ spin_lock_irqsave(&list->lock, flags);
+ skb_queue_walk(list, list_skb) {
+ if (msg->tag == IUCV_SKB_CB(list_skb)->tag) {
+ this = list_skb;
+ break;
}
- if (this)
- __skb_unlink(this, list);
-
- spin_unlock_irqrestore(&list->lock, flags);
+ }
+ if (this)
+ __skb_unlink(this, list);
+ spin_unlock_irqrestore(&list->lock, flags);
- if (this) {
- kfree_skb(this);
- /* wake up any process waiting for sending */
- iucv_sock_wake_msglim(sk);
- }
+ if (this) {
+ kfree_skb(this);
+ /* wake up any process waiting for sending */
+ iucv_sock_wake_msglim(sk);
}
if (sk->sk_state == IUCV_CLOSING) {
@@ -2284,11 +2280,7 @@ static void afiucv_hs_callback_txnotify(struct sk_buff *skb,
list = &iucv->send_skb_q;
spin_lock_irqsave(&list->lock, flags);
- if (skb_queue_empty(list))
- goto out_unlock;
- list_skb = list->next;
- nskb = list_skb->next;
- while (list_skb != (struct sk_buff *)list) {
+ skb_queue_walk_safe(list, list_skb, nskb) {
if (skb_shinfo(list_skb) == skb_shinfo(skb)) {
switch (n) {
case TX_NOTIFY_OK:
@@ -2321,10 +2313,7 @@ static void afiucv_hs_callback_txnotify(struct sk_buff *skb,
}
break;
}
- list_skb = nskb;
- nskb = nskb->next;
}
-out_unlock:
spin_unlock_irqrestore(&list->lock, flags);
if (sk->sk_state == IUCV_CLOSING) {
diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c
index 82cdf9020b53..26f1d435696a 100644
--- a/net/l2tp/l2tp_core.c
+++ b/net/l2tp/l2tp_core.c
@@ -1490,12 +1490,7 @@ int l2tp_tunnel_register(struct l2tp_tunnel *tunnel, struct net *net,
goto err_sock;
}
- sk = sock->sk;
-
- sock_hold(sk);
- tunnel->sock = sk;
tunnel->l2tp_net = net;
-
pn = l2tp_pernet(net);
spin_lock_bh(&pn->l2tp_tunnel_list_lock);
@@ -1510,6 +1505,10 @@ int l2tp_tunnel_register(struct l2tp_tunnel *tunnel, struct net *net,
list_add_rcu(&tunnel->list, &pn->l2tp_tunnel_list);
spin_unlock_bh(&pn->l2tp_tunnel_list_lock);
+ sk = sock->sk;
+ sock_hold(sk);
+ tunnel->sock = sk;
+
if (tunnel->encap == L2TP_ENCAPTYPE_UDP) {
struct udp_tunnel_sock_cfg udp_cfg = {
.sk_user_data = tunnel,
diff --git a/net/l2tp/l2tp_ppp.c b/net/l2tp/l2tp_ppp.c
index 04d9946dcdba..c03c6461f236 100644
--- a/net/l2tp/l2tp_ppp.c
+++ b/net/l2tp/l2tp_ppp.c
@@ -236,6 +236,10 @@ static void pppol2tp_recv(struct l2tp_session *session, struct sk_buff *skb, int
skb->data[1] == PPP_UI)
skb_pull(skb, 2);
+ /* Decompress protocol field if PFC is enabled */
+ if ((*skb->data) & 0x1)
+ *(u8 *)skb_push(skb, 1) = 0;
+
if (sk->sk_state & PPPOX_BOUND) {
struct pppox_sock *po;
diff --git a/net/l3mdev/l3mdev.c b/net/l3mdev/l3mdev.c
index 8da86ceca33d..309dee76724e 100644
--- a/net/l3mdev/l3mdev.c
+++ b/net/l3mdev/l3mdev.c
@@ -47,6 +47,24 @@ int l3mdev_master_ifindex_rcu(const struct net_device *dev)
EXPORT_SYMBOL_GPL(l3mdev_master_ifindex_rcu);
/**
+ * l3mdev_master_upper_ifindex_by_index - get index of upper l3 master
+ * device
+ * @net: network namespace for device index lookup
+ * @ifindex: targeted interface
+ */
+int l3mdev_master_upper_ifindex_by_index_rcu(struct net *net, int ifindex)
+{
+ struct net_device *dev;
+
+ dev = dev_get_by_index_rcu(net, ifindex);
+ while (dev && !netif_is_l3_master(dev))
+ dev = netdev_master_upper_dev_get(dev);
+
+ return dev ? dev->ifindex : 0;
+}
+EXPORT_SYMBOL_GPL(l3mdev_master_upper_ifindex_by_index_rcu);
+
+/**
* l3mdev_fib_table - get FIB table id associated with an L3
* master interface
* @dev: targeted interface
diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c
index 51622333d460..818aa0060349 100644
--- a/net/mac80211/cfg.c
+++ b/net/mac80211/cfg.c
@@ -2891,7 +2891,7 @@ cfg80211_beacon_dup(struct cfg80211_beacon_data *beacon)
len = beacon->head_len + beacon->tail_len + beacon->beacon_ies_len +
beacon->proberesp_ies_len + beacon->assocresp_ies_len +
- beacon->probe_resp_len;
+ beacon->probe_resp_len + beacon->lci_len + beacon->civicloc_len;
new_beacon = kzalloc(sizeof(*new_beacon) + len, GFP_KERNEL);
if (!new_beacon)
@@ -2934,8 +2934,9 @@ cfg80211_beacon_dup(struct cfg80211_beacon_data *beacon)
memcpy(pos, beacon->probe_resp, beacon->probe_resp_len);
pos += beacon->probe_resp_len;
}
- if (beacon->ftm_responder)
- new_beacon->ftm_responder = beacon->ftm_responder;
+
+ /* might copy -1, meaning no changes requested */
+ new_beacon->ftm_responder = beacon->ftm_responder;
if (beacon->lci) {
new_beacon->lci_len = beacon->lci_len;
new_beacon->lci = pos;
diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c
index 5836ddeac9e3..5f3c81e705c7 100644
--- a/net/mac80211/iface.c
+++ b/net/mac80211/iface.c
@@ -1015,6 +1015,8 @@ static void ieee80211_do_stop(struct ieee80211_sub_if_data *sdata,
if (local->open_count == 0)
ieee80211_clear_tx_pending(local);
+ sdata->vif.bss_conf.beacon_int = 0;
+
/*
* If the interface goes down while suspended, presumably because
* the device was unplugged and that happens before our resume,
diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index d2bc8d57c87e..bcf5ffc1567a 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -2766,6 +2766,7 @@ static bool ieee80211_mark_sta_auth(struct ieee80211_sub_if_data *sdata,
{
struct ieee80211_if_managed *ifmgd = &sdata->u.mgd;
struct sta_info *sta;
+ bool result = true;
sdata_info(sdata, "authenticated\n");
ifmgd->auth_data->done = true;
@@ -2778,15 +2779,18 @@ static bool ieee80211_mark_sta_auth(struct ieee80211_sub_if_data *sdata,
sta = sta_info_get(sdata, bssid);
if (!sta) {
WARN_ONCE(1, "%s: STA %pM not found", sdata->name, bssid);
- return false;
+ result = false;
+ goto out;
}
if (sta_info_move_state(sta, IEEE80211_STA_AUTH)) {
sdata_info(sdata, "failed moving %pM to auth\n", bssid);
- return false;
+ result = false;
+ goto out;
}
- mutex_unlock(&sdata->local->sta_mtx);
- return true;
+out:
+ mutex_unlock(&sdata->local->sta_mtx);
+ return result;
}
static void ieee80211_rx_mgmt_auth(struct ieee80211_sub_if_data *sdata,
diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
index 3bd3b5769797..428f7ad5f9b5 100644
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c
@@ -1403,6 +1403,7 @@ ieee80211_rx_h_check_dup(struct ieee80211_rx_data *rx)
return RX_CONTINUE;
if (ieee80211_is_ctl(hdr->frame_control) ||
+ ieee80211_is_nullfunc(hdr->frame_control) ||
ieee80211_is_qos_nullfunc(hdr->frame_control) ||
is_multicast_ether_addr(hdr->addr1))
return RX_CONTINUE;
@@ -3063,7 +3064,7 @@ ieee80211_rx_h_action(struct ieee80211_rx_data *rx)
cfg80211_sta_opmode_change_notify(sdata->dev,
rx->sta->addr,
&sta_opmode,
- GFP_KERNEL);
+ GFP_ATOMIC);
goto handled;
}
case WLAN_HT_ACTION_NOTIFY_CHANWIDTH: {
@@ -3100,7 +3101,7 @@ ieee80211_rx_h_action(struct ieee80211_rx_data *rx)
cfg80211_sta_opmode_change_notify(sdata->dev,
rx->sta->addr,
&sta_opmode,
- GFP_KERNEL);
+ GFP_ATOMIC);
goto handled;
}
default:
diff --git a/net/mac80211/status.c b/net/mac80211/status.c
index aa4afbf0abaf..a794ca729000 100644
--- a/net/mac80211/status.c
+++ b/net/mac80211/status.c
@@ -964,6 +964,8 @@ void ieee80211_tx_status_ext(struct ieee80211_hw *hw,
/* Track when last TDLS packet was ACKed */
if (test_sta_flag(sta, WLAN_STA_TDLS_PEER_AUTH))
sta->status_stats.last_tdls_pkt_time = jiffies;
+ } else if (test_sta_flag(sta, WLAN_STA_PS_STA)) {
+ return;
} else {
ieee80211_lost_packet(sta, info);
}
diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
index e0ccee23fbcd..1f536ba573b4 100644
--- a/net/mac80211/tx.c
+++ b/net/mac80211/tx.c
@@ -439,8 +439,8 @@ ieee80211_tx_h_multicast_ps_buf(struct ieee80211_tx_data *tx)
if (ieee80211_hw_check(&tx->local->hw, QUEUE_CONTROL))
info->hw_queue = tx->sdata->vif.cab_queue;
- /* no stations in PS mode */
- if (!atomic_read(&ps->num_sta_ps))
+ /* no stations in PS mode and no buffered packets */
+ if (!atomic_read(&ps->num_sta_ps) && skb_queue_empty(&ps->bc_buf))
return TX_CONTINUE;
info->flags |= IEEE80211_TX_CTL_SEND_AFTER_DTIM;
diff --git a/net/ncsi/internal.h b/net/ncsi/internal.h
index 1dae77c54009..87505600dbb2 100644
--- a/net/ncsi/internal.h
+++ b/net/ncsi/internal.h
@@ -73,10 +73,15 @@ enum {
#define NCSI_OEM_MFR_BCM_ID 0x113d
/* Broadcom specific OEM Command */
#define NCSI_OEM_BCM_CMD_GMA 0x01 /* CMD ID for Get MAC */
+/* Mellanox specific OEM Command */
+#define NCSI_OEM_MLX_CMD_GMA 0x00 /* CMD ID for Get MAC */
+#define NCSI_OEM_MLX_CMD_GMA_PARAM 0x1b /* Parameter for GMA */
/* OEM Command payload lengths*/
#define NCSI_OEM_BCM_CMD_GMA_LEN 12
+#define NCSI_OEM_MLX_CMD_GMA_LEN 8
/* Mac address offset in OEM response */
#define BCM_MAC_ADDR_OFFSET 28
+#define MLX_MAC_ADDR_OFFSET 8
struct ncsi_channel_version {
@@ -222,6 +227,10 @@ struct ncsi_package {
unsigned int channel_num; /* Number of channels */
struct list_head channels; /* List of chanels */
struct list_head node; /* Form list of packages */
+
+ bool multi_channel; /* Enable multiple channels */
+ u32 channel_whitelist; /* Channels to configure */
+ struct ncsi_channel *preferred_channel; /* Primary channel */
};
struct ncsi_request {
@@ -287,16 +296,16 @@ struct ncsi_dev_priv {
#define NCSI_DEV_PROBED 1 /* Finalized NCSI topology */
#define NCSI_DEV_HWA 2 /* Enabled HW arbitration */
#define NCSI_DEV_RESHUFFLE 4
+#define NCSI_DEV_RESET 8 /* Reset state of NC */
unsigned int gma_flag; /* OEM GMA flag */
spinlock_t lock; /* Protect the NCSI device */
#if IS_ENABLED(CONFIG_IPV6)
unsigned int inet6_addr_num; /* Number of IPv6 addresses */
#endif
+ unsigned int package_probe_id;/* Current ID during probe */
unsigned int package_num; /* Number of packages */
struct list_head packages; /* List of packages */
struct ncsi_channel *hot_channel; /* Channel was ever active */
- struct ncsi_package *force_package; /* Force a specific package */
- struct ncsi_channel *force_channel; /* Force a specific channel */
struct ncsi_request requests[256]; /* Request table */
unsigned int request_id; /* Last used request ID */
#define NCSI_REQ_START_IDX 1
@@ -309,6 +318,9 @@ struct ncsi_dev_priv {
struct list_head node; /* Form NCSI device list */
#define NCSI_MAX_VLAN_VIDS 15
struct list_head vlan_vids; /* List of active VLAN IDs */
+
+ bool multi_package; /* Enable multiple packages */
+ u32 package_whitelist; /* Packages to configure */
};
struct ncsi_cmd_arg {
@@ -341,6 +353,7 @@ extern spinlock_t ncsi_dev_lock;
list_for_each_entry_rcu(nc, &np->channels, node)
/* Resources */
+int ncsi_reset_dev(struct ncsi_dev *nd);
void ncsi_start_channel_monitor(struct ncsi_channel *nc);
void ncsi_stop_channel_monitor(struct ncsi_channel *nc);
struct ncsi_channel *ncsi_find_channel(struct ncsi_package *np,
@@ -361,6 +374,13 @@ struct ncsi_request *ncsi_alloc_request(struct ncsi_dev_priv *ndp,
void ncsi_free_request(struct ncsi_request *nr);
struct ncsi_dev *ncsi_find_dev(struct net_device *dev);
int ncsi_process_next_channel(struct ncsi_dev_priv *ndp);
+bool ncsi_channel_has_link(struct ncsi_channel *channel);
+bool ncsi_channel_is_last(struct ncsi_dev_priv *ndp,
+ struct ncsi_channel *channel);
+int ncsi_update_tx_channel(struct ncsi_dev_priv *ndp,
+ struct ncsi_package *np,
+ struct ncsi_channel *disable,
+ struct ncsi_channel *enable);
/* Packet handlers */
u32 ncsi_calculate_checksum(unsigned char *data, int len);
diff --git a/net/ncsi/ncsi-aen.c b/net/ncsi/ncsi-aen.c
index 25e483e8278b..26d67e27551f 100644
--- a/net/ncsi/ncsi-aen.c
+++ b/net/ncsi/ncsi-aen.c
@@ -50,13 +50,15 @@ static int ncsi_validate_aen_pkt(struct ncsi_aen_pkt_hdr *h,
static int ncsi_aen_handler_lsc(struct ncsi_dev_priv *ndp,
struct ncsi_aen_pkt_hdr *h)
{
- struct ncsi_aen_lsc_pkt *lsc;
- struct ncsi_channel *nc;
+ struct ncsi_channel *nc, *tmp;
struct ncsi_channel_mode *ncm;
- bool chained;
- int state;
unsigned long old_data, data;
+ struct ncsi_aen_lsc_pkt *lsc;
+ struct ncsi_package *np;
+ bool had_link, has_link;
unsigned long flags;
+ bool chained;
+ int state;
/* Find the NCSI channel */
ncsi_find_package_and_channel(ndp, h->common.channel, NULL, &nc);
@@ -73,6 +75,9 @@ static int ncsi_aen_handler_lsc(struct ncsi_dev_priv *ndp,
ncm->data[2] = data;
ncm->data[4] = ntohl(lsc->oem_status);
+ had_link = !!(old_data & 0x1);
+ has_link = !!(data & 0x1);
+
netdev_dbg(ndp->ndev.dev, "NCSI: LSC AEN - channel %u state %s\n",
nc->id, data & 0x1 ? "up" : "down");
@@ -80,22 +85,60 @@ static int ncsi_aen_handler_lsc(struct ncsi_dev_priv *ndp,
state = nc->state;
spin_unlock_irqrestore(&nc->lock, flags);
- if (!((old_data ^ data) & 0x1) || chained)
- return 0;
- if (!(state == NCSI_CHANNEL_INACTIVE && (data & 0x1)) &&
- !(state == NCSI_CHANNEL_ACTIVE && !(data & 0x1)))
+ if (state == NCSI_CHANNEL_INACTIVE)
+ netdev_warn(ndp->ndev.dev,
+ "NCSI: Inactive channel %u received AEN!\n",
+ nc->id);
+
+ if ((had_link == has_link) || chained)
return 0;
- if (!(ndp->flags & NCSI_DEV_HWA) &&
- state == NCSI_CHANNEL_ACTIVE)
- ndp->flags |= NCSI_DEV_RESHUFFLE;
+ if (!ndp->multi_package && !nc->package->multi_channel) {
+ if (had_link) {
+ ndp->flags |= NCSI_DEV_RESHUFFLE;
+ ncsi_stop_channel_monitor(nc);
+ spin_lock_irqsave(&ndp->lock, flags);
+ list_add_tail_rcu(&nc->link, &ndp->channel_queue);
+ spin_unlock_irqrestore(&ndp->lock, flags);
+ return ncsi_process_next_channel(ndp);
+ }
+ /* Configured channel came up */
+ return 0;
+ }
- ncsi_stop_channel_monitor(nc);
- spin_lock_irqsave(&ndp->lock, flags);
- list_add_tail_rcu(&nc->link, &ndp->channel_queue);
- spin_unlock_irqrestore(&ndp->lock, flags);
+ if (had_link) {
+ ncm = &nc->modes[NCSI_MODE_TX_ENABLE];
+ if (ncsi_channel_is_last(ndp, nc)) {
+ /* No channels left, reconfigure */
+ return ncsi_reset_dev(&ndp->ndev);
+ } else if (ncm->enable) {
+ /* Need to failover Tx channel */
+ ncsi_update_tx_channel(ndp, nc->package, nc, NULL);
+ }
+ } else if (has_link && nc->package->preferred_channel == nc) {
+ /* Return Tx to preferred channel */
+ ncsi_update_tx_channel(ndp, nc->package, NULL, nc);
+ } else if (has_link) {
+ NCSI_FOR_EACH_PACKAGE(ndp, np) {
+ NCSI_FOR_EACH_CHANNEL(np, tmp) {
+ /* Enable Tx on this channel if the current Tx
+ * channel is down.
+ */
+ ncm = &tmp->modes[NCSI_MODE_TX_ENABLE];
+ if (ncm->enable &&
+ !ncsi_channel_has_link(tmp)) {
+ ncsi_update_tx_channel(ndp, nc->package,
+ tmp, nc);
+ break;
+ }
+ }
+ }
+ }
- return ncsi_process_next_channel(ndp);
+ /* Leave configured channels active in a multi-channel scenario so
+ * AEN events are still received.
+ */
+ return 0;
}
static int ncsi_aen_handler_cr(struct ncsi_dev_priv *ndp,
diff --git a/net/ncsi/ncsi-manage.c b/net/ncsi/ncsi-manage.c
index bfc43b28c7a6..31359d5e14ad 100644
--- a/net/ncsi/ncsi-manage.c
+++ b/net/ncsi/ncsi-manage.c
@@ -28,6 +28,29 @@
LIST_HEAD(ncsi_dev_list);
DEFINE_SPINLOCK(ncsi_dev_lock);
+bool ncsi_channel_has_link(struct ncsi_channel *channel)
+{
+ return !!(channel->modes[NCSI_MODE_LINK].data[2] & 0x1);
+}
+
+bool ncsi_channel_is_last(struct ncsi_dev_priv *ndp,
+ struct ncsi_channel *channel)
+{
+ struct ncsi_package *np;
+ struct ncsi_channel *nc;
+
+ NCSI_FOR_EACH_PACKAGE(ndp, np)
+ NCSI_FOR_EACH_CHANNEL(np, nc) {
+ if (nc == channel)
+ continue;
+ if (nc->state == NCSI_CHANNEL_ACTIVE &&
+ ncsi_channel_has_link(nc))
+ return false;
+ }
+
+ return true;
+}
+
static void ncsi_report_link(struct ncsi_dev_priv *ndp, bool force_down)
{
struct ncsi_dev *nd = &ndp->ndev;
@@ -52,7 +75,7 @@ static void ncsi_report_link(struct ncsi_dev_priv *ndp, bool force_down)
continue;
}
- if (nc->modes[NCSI_MODE_LINK].data[2] & 0x1) {
+ if (ncsi_channel_has_link(nc)) {
spin_unlock_irqrestore(&nc->lock, flags);
nd->link_up = 1;
goto report;
@@ -113,10 +136,8 @@ static void ncsi_channel_monitor(struct timer_list *t)
default:
netdev_err(ndp->ndev.dev, "NCSI Channel %d timed out!\n",
nc->id);
- if (!(ndp->flags & NCSI_DEV_HWA)) {
- ncsi_report_link(ndp, true);
- ndp->flags |= NCSI_DEV_RESHUFFLE;
- }
+ ncsi_report_link(ndp, true);
+ ndp->flags |= NCSI_DEV_RESHUFFLE;
ncsi_stop_channel_monitor(nc);
@@ -269,6 +290,7 @@ struct ncsi_package *ncsi_add_package(struct ncsi_dev_priv *ndp,
np->ndp = ndp;
spin_lock_init(&np->lock);
INIT_LIST_HEAD(&np->channels);
+ np->channel_whitelist = UINT_MAX;
spin_lock_irqsave(&ndp->lock, flags);
tmp = ncsi_find_package(ndp, id);
@@ -442,12 +464,14 @@ static void ncsi_request_timeout(struct timer_list *t)
static void ncsi_suspend_channel(struct ncsi_dev_priv *ndp)
{
struct ncsi_dev *nd = &ndp->ndev;
- struct ncsi_package *np = ndp->active_package;
- struct ncsi_channel *nc = ndp->active_channel;
+ struct ncsi_package *np;
+ struct ncsi_channel *nc, *tmp;
struct ncsi_cmd_arg nca;
unsigned long flags;
int ret;
+ np = ndp->active_package;
+ nc = ndp->active_channel;
nca.ndp = ndp;
nca.req_flags = NCSI_REQ_FLAG_EVENT_DRIVEN;
switch (nd->state) {
@@ -523,6 +547,15 @@ static void ncsi_suspend_channel(struct ncsi_dev_priv *ndp)
if (ret)
goto error;
+ NCSI_FOR_EACH_CHANNEL(np, tmp) {
+ /* If there is another channel active on this package
+ * do not deselect the package.
+ */
+ if (tmp != nc && tmp->state == NCSI_CHANNEL_ACTIVE) {
+ nd->state = ncsi_dev_state_suspend_done;
+ break;
+ }
+ }
break;
case ncsi_dev_state_suspend_deselect:
ndp->pending_req_num = 1;
@@ -541,8 +574,10 @@ static void ncsi_suspend_channel(struct ncsi_dev_priv *ndp)
spin_lock_irqsave(&nc->lock, flags);
nc->state = NCSI_CHANNEL_INACTIVE;
spin_unlock_irqrestore(&nc->lock, flags);
- ncsi_process_next_channel(ndp);
-
+ if (ndp->flags & NCSI_DEV_RESET)
+ ncsi_reset_dev(nd);
+ else
+ ncsi_process_next_channel(ndp);
break;
default:
netdev_warn(nd->dev, "Wrong NCSI state 0x%x in suspend\n",
@@ -675,12 +710,38 @@ static int ncsi_oem_gma_handler_bcm(struct ncsi_cmd_arg *nca)
return ret;
}
+static int ncsi_oem_gma_handler_mlx(struct ncsi_cmd_arg *nca)
+{
+ union {
+ u8 data_u8[NCSI_OEM_MLX_CMD_GMA_LEN];
+ u32 data_u32[NCSI_OEM_MLX_CMD_GMA_LEN / sizeof(u32)];
+ } u;
+ int ret = 0;
+
+ nca->payload = NCSI_OEM_MLX_CMD_GMA_LEN;
+
+ memset(&u, 0, sizeof(u));
+ u.data_u32[0] = ntohl(NCSI_OEM_MFR_MLX_ID);
+ u.data_u8[5] = NCSI_OEM_MLX_CMD_GMA;
+ u.data_u8[6] = NCSI_OEM_MLX_CMD_GMA_PARAM;
+
+ nca->data = u.data_u8;
+
+ ret = ncsi_xmit_cmd(nca);
+ if (ret)
+ netdev_err(nca->ndp->ndev.dev,
+ "NCSI: Failed to transmit cmd 0x%x during configure\n",
+ nca->type);
+ return ret;
+}
+
/* OEM Command handlers initialization */
static struct ncsi_oem_gma_handler {
unsigned int mfr_id;
int (*handler)(struct ncsi_cmd_arg *nca);
} ncsi_oem_gma_handlers[] = {
- { NCSI_OEM_MFR_BCM_ID, ncsi_oem_gma_handler_bcm }
+ { NCSI_OEM_MFR_BCM_ID, ncsi_oem_gma_handler_bcm },
+ { NCSI_OEM_MFR_MLX_ID, ncsi_oem_gma_handler_mlx }
};
static int ncsi_gma_handler(struct ncsi_cmd_arg *nca, unsigned int mf_id)
@@ -717,13 +778,144 @@ static int ncsi_gma_handler(struct ncsi_cmd_arg *nca, unsigned int mf_id)
#endif /* CONFIG_NCSI_OEM_CMD_GET_MAC */
+/* Determine if a given channel from the channel_queue should be used for Tx */
+static bool ncsi_channel_is_tx(struct ncsi_dev_priv *ndp,
+ struct ncsi_channel *nc)
+{
+ struct ncsi_channel_mode *ncm;
+ struct ncsi_channel *channel;
+ struct ncsi_package *np;
+
+ /* Check if any other channel has Tx enabled; a channel may have already
+ * been configured and removed from the channel queue.
+ */
+ NCSI_FOR_EACH_PACKAGE(ndp, np) {
+ if (!ndp->multi_package && np != nc->package)
+ continue;
+ NCSI_FOR_EACH_CHANNEL(np, channel) {
+ ncm = &channel->modes[NCSI_MODE_TX_ENABLE];
+ if (ncm->enable)
+ return false;
+ }
+ }
+
+ /* This channel is the preferred channel and has link */
+ list_for_each_entry_rcu(channel, &ndp->channel_queue, link) {
+ np = channel->package;
+ if (np->preferred_channel &&
+ ncsi_channel_has_link(np->preferred_channel)) {
+ return np->preferred_channel == nc;
+ }
+ }
+
+ /* This channel has link */
+ if (ncsi_channel_has_link(nc))
+ return true;
+
+ list_for_each_entry_rcu(channel, &ndp->channel_queue, link)
+ if (ncsi_channel_has_link(channel))
+ return false;
+
+ /* No other channel has link; default to this one */
+ return true;
+}
+
+/* Change the active Tx channel in a multi-channel setup */
+int ncsi_update_tx_channel(struct ncsi_dev_priv *ndp,
+ struct ncsi_package *package,
+ struct ncsi_channel *disable,
+ struct ncsi_channel *enable)
+{
+ struct ncsi_cmd_arg nca;
+ struct ncsi_channel *nc;
+ struct ncsi_package *np;
+ int ret = 0;
+
+ if (!package->multi_channel && !ndp->multi_package)
+ netdev_warn(ndp->ndev.dev,
+ "NCSI: Trying to update Tx channel in single-channel mode\n");
+ nca.ndp = ndp;
+ nca.req_flags = 0;
+
+ /* Find current channel with Tx enabled */
+ NCSI_FOR_EACH_PACKAGE(ndp, np) {
+ if (disable)
+ break;
+ if (!ndp->multi_package && np != package)
+ continue;
+
+ NCSI_FOR_EACH_CHANNEL(np, nc)
+ if (nc->modes[NCSI_MODE_TX_ENABLE].enable) {
+ disable = nc;
+ break;
+ }
+ }
+
+ /* Find a suitable channel for Tx */
+ NCSI_FOR_EACH_PACKAGE(ndp, np) {
+ if (enable)
+ break;
+ if (!ndp->multi_package && np != package)
+ continue;
+ if (!(ndp->package_whitelist & (0x1 << np->id)))
+ continue;
+
+ if (np->preferred_channel &&
+ ncsi_channel_has_link(np->preferred_channel)) {
+ enable = np->preferred_channel;
+ break;
+ }
+
+ NCSI_FOR_EACH_CHANNEL(np, nc) {
+ if (!(np->channel_whitelist & 0x1 << nc->id))
+ continue;
+ if (nc->state != NCSI_CHANNEL_ACTIVE)
+ continue;
+ if (ncsi_channel_has_link(nc)) {
+ enable = nc;
+ break;
+ }
+ }
+ }
+
+ if (disable == enable)
+ return -1;
+
+ if (!enable)
+ return -1;
+
+ if (disable) {
+ nca.channel = disable->id;
+ nca.package = disable->package->id;
+ nca.type = NCSI_PKT_CMD_DCNT;
+ ret = ncsi_xmit_cmd(&nca);
+ if (ret)
+ netdev_err(ndp->ndev.dev,
+ "Error %d sending DCNT\n",
+ ret);
+ }
+
+ netdev_info(ndp->ndev.dev, "NCSI: channel %u enables Tx\n", enable->id);
+
+ nca.channel = enable->id;
+ nca.package = enable->package->id;
+ nca.type = NCSI_PKT_CMD_ECNT;
+ ret = ncsi_xmit_cmd(&nca);
+ if (ret)
+ netdev_err(ndp->ndev.dev,
+ "Error %d sending ECNT\n",
+ ret);
+
+ return ret;
+}
+
static void ncsi_configure_channel(struct ncsi_dev_priv *ndp)
{
- struct ncsi_dev *nd = &ndp->ndev;
- struct net_device *dev = nd->dev;
struct ncsi_package *np = ndp->active_package;
struct ncsi_channel *nc = ndp->active_channel;
struct ncsi_channel *hot_nc = NULL;
+ struct ncsi_dev *nd = &ndp->ndev;
+ struct net_device *dev = nd->dev;
struct ncsi_cmd_arg nca;
unsigned char index;
unsigned long flags;
@@ -845,20 +1037,29 @@ static void ncsi_configure_channel(struct ncsi_dev_priv *ndp)
} else if (nd->state == ncsi_dev_state_config_ebf) {
nca.type = NCSI_PKT_CMD_EBF;
nca.dwords[0] = nc->caps[NCSI_CAP_BC].cap;
- nd->state = ncsi_dev_state_config_ecnt;
+ if (ncsi_channel_is_tx(ndp, nc))
+ nd->state = ncsi_dev_state_config_ecnt;
+ else
+ nd->state = ncsi_dev_state_config_ec;
#if IS_ENABLED(CONFIG_IPV6)
if (ndp->inet6_addr_num > 0 &&
(nc->caps[NCSI_CAP_GENERIC].cap &
NCSI_CAP_GENERIC_MC))
nd->state = ncsi_dev_state_config_egmf;
- else
- nd->state = ncsi_dev_state_config_ecnt;
} else if (nd->state == ncsi_dev_state_config_egmf) {
nca.type = NCSI_PKT_CMD_EGMF;
nca.dwords[0] = nc->caps[NCSI_CAP_MC].cap;
- nd->state = ncsi_dev_state_config_ecnt;
+ if (ncsi_channel_is_tx(ndp, nc))
+ nd->state = ncsi_dev_state_config_ecnt;
+ else
+ nd->state = ncsi_dev_state_config_ec;
#endif /* CONFIG_IPV6 */
} else if (nd->state == ncsi_dev_state_config_ecnt) {
+ if (np->preferred_channel &&
+ nc != np->preferred_channel)
+ netdev_info(ndp->ndev.dev,
+ "NCSI: Tx failed over to channel %u\n",
+ nc->id);
nca.type = NCSI_PKT_CMD_ECNT;
nd->state = ncsi_dev_state_config_ec;
} else if (nd->state == ncsi_dev_state_config_ec) {
@@ -889,6 +1090,16 @@ static void ncsi_configure_channel(struct ncsi_dev_priv *ndp)
netdev_dbg(ndp->ndev.dev, "NCSI: channel %u config done\n",
nc->id);
spin_lock_irqsave(&nc->lock, flags);
+ nc->state = NCSI_CHANNEL_ACTIVE;
+
+ if (ndp->flags & NCSI_DEV_RESET) {
+ /* A reset event happened during config, start it now */
+ nc->reconfigure_needed = false;
+ spin_unlock_irqrestore(&nc->lock, flags);
+ ncsi_reset_dev(nd);
+ break;
+ }
+
if (nc->reconfigure_needed) {
/* This channel's configuration has been updated
* part-way during the config state - start the
@@ -909,10 +1120,8 @@ static void ncsi_configure_channel(struct ncsi_dev_priv *ndp)
if (nc->modes[NCSI_MODE_LINK].data[2] & 0x1) {
hot_nc = nc;
- nc->state = NCSI_CHANNEL_ACTIVE;
} else {
hot_nc = NULL;
- nc->state = NCSI_CHANNEL_INACTIVE;
netdev_dbg(ndp->ndev.dev,
"NCSI: channel %u link down after config\n",
nc->id);
@@ -940,43 +1149,35 @@ error:
static int ncsi_choose_active_channel(struct ncsi_dev_priv *ndp)
{
- struct ncsi_package *np, *force_package;
- struct ncsi_channel *nc, *found, *hot_nc, *force_channel;
+ struct ncsi_channel *nc, *found, *hot_nc;
struct ncsi_channel_mode *ncm;
- unsigned long flags;
+ unsigned long flags, cflags;
+ struct ncsi_package *np;
+ bool with_link;
spin_lock_irqsave(&ndp->lock, flags);
hot_nc = ndp->hot_channel;
- force_channel = ndp->force_channel;
- force_package = ndp->force_package;
spin_unlock_irqrestore(&ndp->lock, flags);
- /* Force a specific channel whether or not it has link if we have been
- * configured to do so
- */
- if (force_package && force_channel) {
- found = force_channel;
- ncm = &found->modes[NCSI_MODE_LINK];
- if (!(ncm->data[2] & 0x1))
- netdev_info(ndp->ndev.dev,
- "NCSI: Channel %u forced, but it is link down\n",
- found->id);
- goto out;
- }
-
- /* The search is done once an inactive channel with up
- * link is found.
+ /* By default the search is done once an inactive channel with up
+ * link is found, unless a preferred channel is set.
+ * If multi_package or multi_channel are configured all channels in the
+ * whitelist are added to the channel queue.
*/
found = NULL;
+ with_link = false;
NCSI_FOR_EACH_PACKAGE(ndp, np) {
- if (ndp->force_package && np != ndp->force_package)
+ if (!(ndp->package_whitelist & (0x1 << np->id)))
continue;
NCSI_FOR_EACH_CHANNEL(np, nc) {
- spin_lock_irqsave(&nc->lock, flags);
+ if (!(np->channel_whitelist & (0x1 << nc->id)))
+ continue;
+
+ spin_lock_irqsave(&nc->lock, cflags);
if (!list_empty(&nc->link) ||
nc->state != NCSI_CHANNEL_INACTIVE) {
- spin_unlock_irqrestore(&nc->lock, flags);
+ spin_unlock_irqrestore(&nc->lock, cflags);
continue;
}
@@ -988,32 +1189,49 @@ static int ncsi_choose_active_channel(struct ncsi_dev_priv *ndp)
ncm = &nc->modes[NCSI_MODE_LINK];
if (ncm->data[2] & 0x1) {
- spin_unlock_irqrestore(&nc->lock, flags);
found = nc;
- goto out;
+ with_link = true;
}
- spin_unlock_irqrestore(&nc->lock, flags);
+ /* If multi_channel is enabled configure all valid
+ * channels whether or not they currently have link
+ * so they will have AENs enabled.
+ */
+ if (with_link || np->multi_channel) {
+ spin_lock_irqsave(&ndp->lock, flags);
+ list_add_tail_rcu(&nc->link,
+ &ndp->channel_queue);
+ spin_unlock_irqrestore(&ndp->lock, flags);
+
+ netdev_dbg(ndp->ndev.dev,
+ "NCSI: Channel %u added to queue (link %s)\n",
+ nc->id,
+ ncm->data[2] & 0x1 ? "up" : "down");
+ }
+
+ spin_unlock_irqrestore(&nc->lock, cflags);
+
+ if (with_link && !np->multi_channel)
+ break;
}
+ if (with_link && !ndp->multi_package)
+ break;
}
- if (!found) {
+ if (list_empty(&ndp->channel_queue) && found) {
+ netdev_info(ndp->ndev.dev,
+ "NCSI: No channel with link found, configuring channel %u\n",
+ found->id);
+ spin_lock_irqsave(&ndp->lock, flags);
+ list_add_tail_rcu(&found->link, &ndp->channel_queue);
+ spin_unlock_irqrestore(&ndp->lock, flags);
+ } else if (!found) {
netdev_warn(ndp->ndev.dev,
- "NCSI: No channel found with link\n");
+ "NCSI: No channel found to configure!\n");
ncsi_report_link(ndp, true);
return -ENODEV;
}
- ncm = &found->modes[NCSI_MODE_LINK];
- netdev_dbg(ndp->ndev.dev,
- "NCSI: Channel %u added to queue (link %s)\n",
- found->id, ncm->data[2] & 0x1 ? "up" : "down");
-
-out:
- spin_lock_irqsave(&ndp->lock, flags);
- list_add_tail_rcu(&found->link, &ndp->channel_queue);
- spin_unlock_irqrestore(&ndp->lock, flags);
-
return ncsi_process_next_channel(ndp);
}
@@ -1050,35 +1268,6 @@ static bool ncsi_check_hwa(struct ncsi_dev_priv *ndp)
return false;
}
-static int ncsi_enable_hwa(struct ncsi_dev_priv *ndp)
-{
- struct ncsi_package *np;
- struct ncsi_channel *nc;
- unsigned long flags;
-
- /* Move all available channels to processing queue */
- spin_lock_irqsave(&ndp->lock, flags);
- NCSI_FOR_EACH_PACKAGE(ndp, np) {
- NCSI_FOR_EACH_CHANNEL(np, nc) {
- WARN_ON_ONCE(nc->state != NCSI_CHANNEL_INACTIVE ||
- !list_empty(&nc->link));
- ncsi_stop_channel_monitor(nc);
- list_add_tail_rcu(&nc->link, &ndp->channel_queue);
- }
- }
- spin_unlock_irqrestore(&ndp->lock, flags);
-
- /* We can have no channels in extremely case */
- if (list_empty(&ndp->channel_queue)) {
- netdev_err(ndp->ndev.dev,
- "NCSI: No available channels for HWA\n");
- ncsi_report_link(ndp, false);
- return -ENOENT;
- }
-
- return ncsi_process_next_channel(ndp);
-}
-
static void ncsi_probe_channel(struct ncsi_dev_priv *ndp)
{
struct ncsi_dev *nd = &ndp->ndev;
@@ -1110,70 +1299,28 @@ static void ncsi_probe_channel(struct ncsi_dev_priv *ndp)
nd->state = ncsi_dev_state_probe_package;
break;
case ncsi_dev_state_probe_package:
- ndp->pending_req_num = 16;
+ ndp->pending_req_num = 1;
- /* Select all possible packages */
nca.type = NCSI_PKT_CMD_SP;
nca.bytes[0] = 1;
+ nca.package = ndp->package_probe_id;
nca.channel = NCSI_RESERVED_CHANNEL;
- for (index = 0; index < 8; index++) {
- nca.package = index;
- ret = ncsi_xmit_cmd(&nca);
- if (ret)
- goto error;
- }
-
- /* Disable all possible packages */
- nca.type = NCSI_PKT_CMD_DP;
- for (index = 0; index < 8; index++) {
- nca.package = index;
- ret = ncsi_xmit_cmd(&nca);
- if (ret)
- goto error;
- }
-
+ ret = ncsi_xmit_cmd(&nca);
+ if (ret)
+ goto error;
nd->state = ncsi_dev_state_probe_channel;
break;
case ncsi_dev_state_probe_channel:
- if (!ndp->active_package)
- ndp->active_package = list_first_or_null_rcu(
- &ndp->packages, struct ncsi_package, node);
- else if (list_is_last(&ndp->active_package->node,
- &ndp->packages))
- ndp->active_package = NULL;
- else
- ndp->active_package = list_next_entry(
- ndp->active_package, node);
-
- /* All available packages and channels are enumerated. The
- * enumeration happens for once when the NCSI interface is
- * started. So we need continue to start the interface after
- * the enumeration.
- *
- * We have to choose an active channel before configuring it.
- * Note that we possibly don't have active channel in extreme
- * situation.
- */
+ ndp->active_package = ncsi_find_package(ndp,
+ ndp->package_probe_id);
if (!ndp->active_package) {
- ndp->flags |= NCSI_DEV_PROBED;
- if (ncsi_check_hwa(ndp))
- ncsi_enable_hwa(ndp);
- else
- ncsi_choose_active_channel(ndp);
- return;
+ /* No response */
+ nd->state = ncsi_dev_state_probe_dp;
+ schedule_work(&ndp->work);
+ break;
}
-
- /* Select the active package */
- ndp->pending_req_num = 1;
- nca.type = NCSI_PKT_CMD_SP;
- nca.bytes[0] = 1;
- nca.package = ndp->active_package->id;
- nca.channel = NCSI_RESERVED_CHANNEL;
- ret = ncsi_xmit_cmd(&nca);
- if (ret)
- goto error;
-
nd->state = ncsi_dev_state_probe_cis;
+ schedule_work(&ndp->work);
break;
case ncsi_dev_state_probe_cis:
ndp->pending_req_num = NCSI_RESERVED_CHANNEL;
@@ -1222,22 +1369,35 @@ static void ncsi_probe_channel(struct ncsi_dev_priv *ndp)
case ncsi_dev_state_probe_dp:
ndp->pending_req_num = 1;
- /* Deselect the active package */
+ /* Deselect the current package */
nca.type = NCSI_PKT_CMD_DP;
- nca.package = ndp->active_package->id;
+ nca.package = ndp->package_probe_id;
nca.channel = NCSI_RESERVED_CHANNEL;
ret = ncsi_xmit_cmd(&nca);
if (ret)
goto error;
- /* Scan channels in next package */
- nd->state = ncsi_dev_state_probe_channel;
+ /* Probe next package */
+ ndp->package_probe_id++;
+ if (ndp->package_probe_id >= 8) {
+ /* Probe finished */
+ ndp->flags |= NCSI_DEV_PROBED;
+ break;
+ }
+ nd->state = ncsi_dev_state_probe_package;
+ ndp->active_package = NULL;
break;
default:
netdev_warn(nd->dev, "Wrong NCSI state 0x%0x in enumeration\n",
nd->state);
}
+ if (ndp->flags & NCSI_DEV_PROBED) {
+ /* Check if all packages have HWA support */
+ ncsi_check_hwa(ndp);
+ ncsi_choose_active_channel(ndp);
+ }
+
return;
error:
netdev_err(ndp->ndev.dev,
@@ -1556,6 +1716,7 @@ struct ncsi_dev *ncsi_register_dev(struct net_device *dev,
INIT_LIST_HEAD(&ndp->channel_queue);
INIT_LIST_HEAD(&ndp->vlan_vids);
INIT_WORK(&ndp->work, ncsi_dev_work);
+ ndp->package_whitelist = UINT_MAX;
/* Initialize private NCSI device */
spin_lock_init(&ndp->lock);
@@ -1592,26 +1753,19 @@ EXPORT_SYMBOL_GPL(ncsi_register_dev);
int ncsi_start_dev(struct ncsi_dev *nd)
{
struct ncsi_dev_priv *ndp = TO_NCSI_DEV_PRIV(nd);
- int ret;
if (nd->state != ncsi_dev_state_registered &&
nd->state != ncsi_dev_state_functional)
return -ENOTTY;
if (!(ndp->flags & NCSI_DEV_PROBED)) {
+ ndp->package_probe_id = 0;
nd->state = ncsi_dev_state_probe;
schedule_work(&ndp->work);
return 0;
}
- if (ndp->flags & NCSI_DEV_HWA) {
- netdev_info(ndp->ndev.dev, "NCSI: Enabling HWA mode\n");
- ret = ncsi_enable_hwa(ndp);
- } else {
- ret = ncsi_choose_active_channel(ndp);
- }
-
- return ret;
+ return ncsi_reset_dev(nd);
}
EXPORT_SYMBOL_GPL(ncsi_start_dev);
@@ -1624,7 +1778,10 @@ void ncsi_stop_dev(struct ncsi_dev *nd)
int old_state;
unsigned long flags;
- /* Stop the channel monitor and reset channel's state */
+ /* Stop the channel monitor on any active channels. Don't reset the
+ * channel state so we know which were active when ncsi_start_dev()
+ * is next called.
+ */
NCSI_FOR_EACH_PACKAGE(ndp, np) {
NCSI_FOR_EACH_CHANNEL(np, nc) {
ncsi_stop_channel_monitor(nc);
@@ -1632,7 +1789,6 @@ void ncsi_stop_dev(struct ncsi_dev *nd)
spin_lock_irqsave(&nc->lock, flags);
chained = !list_empty(&nc->link);
old_state = nc->state;
- nc->state = NCSI_CHANNEL_INACTIVE;
spin_unlock_irqrestore(&nc->lock, flags);
WARN_ON_ONCE(chained ||
@@ -1645,6 +1801,92 @@ void ncsi_stop_dev(struct ncsi_dev *nd)
}
EXPORT_SYMBOL_GPL(ncsi_stop_dev);
+int ncsi_reset_dev(struct ncsi_dev *nd)
+{
+ struct ncsi_dev_priv *ndp = TO_NCSI_DEV_PRIV(nd);
+ struct ncsi_channel *nc, *active, *tmp;
+ struct ncsi_package *np;
+ unsigned long flags;
+
+ spin_lock_irqsave(&ndp->lock, flags);
+
+ if (!(ndp->flags & NCSI_DEV_RESET)) {
+ /* Haven't been called yet, check states */
+ switch (nd->state & ncsi_dev_state_major) {
+ case ncsi_dev_state_registered:
+ case ncsi_dev_state_probe:
+ /* Not even probed yet - do nothing */
+ spin_unlock_irqrestore(&ndp->lock, flags);
+ return 0;
+ case ncsi_dev_state_suspend:
+ case ncsi_dev_state_config:
+ /* Wait for the channel to finish its suspend/config
+ * operation; once it finishes it will check for
+ * NCSI_DEV_RESET and reset the state.
+ */
+ ndp->flags |= NCSI_DEV_RESET;
+ spin_unlock_irqrestore(&ndp->lock, flags);
+ return 0;
+ }
+ } else {
+ switch (nd->state) {
+ case ncsi_dev_state_suspend_done:
+ case ncsi_dev_state_config_done:
+ case ncsi_dev_state_functional:
+ /* Ok */
+ break;
+ default:
+ /* Current reset operation happening */
+ spin_unlock_irqrestore(&ndp->lock, flags);
+ return 0;
+ }
+ }
+
+ if (!list_empty(&ndp->channel_queue)) {
+ /* Clear any channel queue we may have interrupted */
+ list_for_each_entry_safe(nc, tmp, &ndp->channel_queue, link)
+ list_del_init(&nc->link);
+ }
+ spin_unlock_irqrestore(&ndp->lock, flags);
+
+ active = NULL;
+ NCSI_FOR_EACH_PACKAGE(ndp, np) {
+ NCSI_FOR_EACH_CHANNEL(np, nc) {
+ spin_lock_irqsave(&nc->lock, flags);
+
+ if (nc->state == NCSI_CHANNEL_ACTIVE) {
+ active = nc;
+ nc->state = NCSI_CHANNEL_INVISIBLE;
+ spin_unlock_irqrestore(&nc->lock, flags);
+ ncsi_stop_channel_monitor(nc);
+ break;
+ }
+
+ spin_unlock_irqrestore(&nc->lock, flags);
+ }
+ if (active)
+ break;
+ }
+
+ if (!active) {
+ /* Done */
+ spin_lock_irqsave(&ndp->lock, flags);
+ ndp->flags &= ~NCSI_DEV_RESET;
+ spin_unlock_irqrestore(&ndp->lock, flags);
+ return ncsi_choose_active_channel(ndp);
+ }
+
+ spin_lock_irqsave(&ndp->lock, flags);
+ ndp->flags |= NCSI_DEV_RESET;
+ ndp->active_channel = active;
+ ndp->active_package = active->package;
+ spin_unlock_irqrestore(&ndp->lock, flags);
+
+ nd->state = ncsi_dev_state_suspend;
+ schedule_work(&ndp->work);
+ return 0;
+}
+
void ncsi_unregister_dev(struct ncsi_dev *nd)
{
struct ncsi_dev_priv *ndp = TO_NCSI_DEV_PRIV(nd);
diff --git a/net/ncsi/ncsi-netlink.c b/net/ncsi/ncsi-netlink.c
index 33314381b4f5..5d782445d2fc 100644
--- a/net/ncsi/ncsi-netlink.c
+++ b/net/ncsi/ncsi-netlink.c
@@ -30,6 +30,9 @@ static const struct nla_policy ncsi_genl_policy[NCSI_ATTR_MAX + 1] = {
[NCSI_ATTR_PACKAGE_ID] = { .type = NLA_U32 },
[NCSI_ATTR_CHANNEL_ID] = { .type = NLA_U32 },
[NCSI_ATTR_DATA] = { .type = NLA_BINARY, .len = 2048 },
+ [NCSI_ATTR_MULTI_FLAG] = { .type = NLA_FLAG },
+ [NCSI_ATTR_PACKAGE_MASK] = { .type = NLA_U32 },
+ [NCSI_ATTR_CHANNEL_MASK] = { .type = NLA_U32 },
};
static struct ncsi_dev_priv *ndp_from_ifindex(struct net *net, u32 ifindex)
@@ -69,7 +72,7 @@ static int ncsi_write_channel_info(struct sk_buff *skb,
nla_put_u32(skb, NCSI_CHANNEL_ATTR_LINK_STATE, m->data[2]);
if (nc->state == NCSI_CHANNEL_ACTIVE)
nla_put_flag(skb, NCSI_CHANNEL_ATTR_ACTIVE);
- if (ndp->force_channel == nc)
+ if (nc == nc->package->preferred_channel)
nla_put_flag(skb, NCSI_CHANNEL_ATTR_FORCED);
nla_put_u32(skb, NCSI_CHANNEL_ATTR_VERSION_MAJOR, nc->version.version);
@@ -114,7 +117,7 @@ static int ncsi_write_package_info(struct sk_buff *skb,
if (!pnest)
return -ENOMEM;
nla_put_u32(skb, NCSI_PKG_ATTR_ID, np->id);
- if (ndp->force_package == np)
+ if ((0x1 << np->id) == ndp->package_whitelist)
nla_put_flag(skb, NCSI_PKG_ATTR_FORCED);
cnest = nla_nest_start(skb, NCSI_PKG_ATTR_CHANNEL_LIST);
if (!cnest) {
@@ -290,49 +293,58 @@ static int ncsi_set_interface_nl(struct sk_buff *msg, struct genl_info *info)
package_id = nla_get_u32(info->attrs[NCSI_ATTR_PACKAGE_ID]);
package = NULL;
- spin_lock_irqsave(&ndp->lock, flags);
-
NCSI_FOR_EACH_PACKAGE(ndp, np)
if (np->id == package_id)
package = np;
if (!package) {
/* The user has set a package that does not exist */
- spin_unlock_irqrestore(&ndp->lock, flags);
return -ERANGE;
}
channel = NULL;
- if (!info->attrs[NCSI_ATTR_CHANNEL_ID]) {
- /* Allow any channel */
- channel_id = NCSI_RESERVED_CHANNEL;
- } else {
+ if (info->attrs[NCSI_ATTR_CHANNEL_ID]) {
channel_id = nla_get_u32(info->attrs[NCSI_ATTR_CHANNEL_ID]);
NCSI_FOR_EACH_CHANNEL(package, nc)
- if (nc->id == channel_id)
+ if (nc->id == channel_id) {
channel = nc;
+ break;
+ }
+ if (!channel) {
+ netdev_info(ndp->ndev.dev,
+ "NCSI: Channel %u does not exist!\n",
+ channel_id);
+ return -ERANGE;
+ }
}
- if (channel_id != NCSI_RESERVED_CHANNEL && !channel) {
- /* The user has set a channel that does not exist on this
- * package
- */
- spin_unlock_irqrestore(&ndp->lock, flags);
- netdev_info(ndp->ndev.dev, "NCSI: Channel %u does not exist!\n",
- channel_id);
- return -ERANGE;
- }
-
- ndp->force_package = package;
- ndp->force_channel = channel;
+ spin_lock_irqsave(&ndp->lock, flags);
+ ndp->package_whitelist = 0x1 << package->id;
+ ndp->multi_package = false;
spin_unlock_irqrestore(&ndp->lock, flags);
- netdev_info(ndp->ndev.dev, "Set package 0x%x, channel 0x%x%s as preferred\n",
- package_id, channel_id,
- channel_id == NCSI_RESERVED_CHANNEL ? " (any)" : "");
+ spin_lock_irqsave(&package->lock, flags);
+ package->multi_channel = false;
+ if (channel) {
+ package->channel_whitelist = 0x1 << channel->id;
+ package->preferred_channel = channel;
+ } else {
+ /* Allow any channel */
+ package->channel_whitelist = UINT_MAX;
+ package->preferred_channel = NULL;
+ }
+ spin_unlock_irqrestore(&package->lock, flags);
+
+ if (channel)
+ netdev_info(ndp->ndev.dev,
+ "Set package 0x%x, channel 0x%x as preferred\n",
+ package_id, channel_id);
+ else
+ netdev_info(ndp->ndev.dev, "Set package 0x%x as preferred\n",
+ package_id);
- /* Bounce the NCSI channel to set changes */
- ncsi_stop_dev(&ndp->ndev);
- ncsi_start_dev(&ndp->ndev);
+ /* Update channel configuration */
+ if (!(ndp->flags & NCSI_DEV_RESET))
+ ncsi_reset_dev(&ndp->ndev);
return 0;
}
@@ -340,6 +352,7 @@ static int ncsi_set_interface_nl(struct sk_buff *msg, struct genl_info *info)
static int ncsi_clear_interface_nl(struct sk_buff *msg, struct genl_info *info)
{
struct ncsi_dev_priv *ndp;
+ struct ncsi_package *np;
unsigned long flags;
if (!info || !info->attrs)
@@ -353,16 +366,24 @@ static int ncsi_clear_interface_nl(struct sk_buff *msg, struct genl_info *info)
if (!ndp)
return -ENODEV;
- /* Clear any override */
+ /* Reset any whitelists and disable multi mode */
spin_lock_irqsave(&ndp->lock, flags);
- ndp->force_package = NULL;
- ndp->force_channel = NULL;
+ ndp->package_whitelist = UINT_MAX;
+ ndp->multi_package = false;
spin_unlock_irqrestore(&ndp->lock, flags);
+
+ NCSI_FOR_EACH_PACKAGE(ndp, np) {
+ spin_lock_irqsave(&np->lock, flags);
+ np->multi_channel = false;
+ np->channel_whitelist = UINT_MAX;
+ np->preferred_channel = NULL;
+ spin_unlock_irqrestore(&np->lock, flags);
+ }
netdev_info(ndp->ndev.dev, "NCSI: Cleared preferred package/channel\n");
- /* Bounce the NCSI channel to set changes */
- ncsi_stop_dev(&ndp->ndev);
- ncsi_start_dev(&ndp->ndev);
+ /* Update channel configuration */
+ if (!(ndp->flags & NCSI_DEV_RESET))
+ ncsi_reset_dev(&ndp->ndev);
return 0;
}
@@ -563,6 +584,138 @@ int ncsi_send_netlink_err(struct net_device *dev,
return nlmsg_unicast(net->genl_sock, skb, snd_portid);
}
+static int ncsi_set_package_mask_nl(struct sk_buff *msg,
+ struct genl_info *info)
+{
+ struct ncsi_dev_priv *ndp;
+ unsigned long flags;
+ int rc;
+
+ if (!info || !info->attrs)
+ return -EINVAL;
+
+ if (!info->attrs[NCSI_ATTR_IFINDEX])
+ return -EINVAL;
+
+ if (!info->attrs[NCSI_ATTR_PACKAGE_MASK])
+ return -EINVAL;
+
+ ndp = ndp_from_ifindex(get_net(sock_net(msg->sk)),
+ nla_get_u32(info->attrs[NCSI_ATTR_IFINDEX]));
+ if (!ndp)
+ return -ENODEV;
+
+ spin_lock_irqsave(&ndp->lock, flags);
+ if (nla_get_flag(info->attrs[NCSI_ATTR_MULTI_FLAG])) {
+ if (ndp->flags & NCSI_DEV_HWA) {
+ ndp->multi_package = true;
+ rc = 0;
+ } else {
+ netdev_err(ndp->ndev.dev,
+ "NCSI: Can't use multiple packages without HWA\n");
+ rc = -EPERM;
+ }
+ } else {
+ ndp->multi_package = false;
+ rc = 0;
+ }
+
+ if (!rc)
+ ndp->package_whitelist =
+ nla_get_u32(info->attrs[NCSI_ATTR_PACKAGE_MASK]);
+ spin_unlock_irqrestore(&ndp->lock, flags);
+
+ if (!rc) {
+ /* Update channel configuration */
+ if (!(ndp->flags & NCSI_DEV_RESET))
+ ncsi_reset_dev(&ndp->ndev);
+ }
+
+ return rc;
+}
+
+static int ncsi_set_channel_mask_nl(struct sk_buff *msg,
+ struct genl_info *info)
+{
+ struct ncsi_package *np, *package;
+ struct ncsi_channel *nc, *channel;
+ u32 package_id, channel_id;
+ struct ncsi_dev_priv *ndp;
+ unsigned long flags;
+
+ if (!info || !info->attrs)
+ return -EINVAL;
+
+ if (!info->attrs[NCSI_ATTR_IFINDEX])
+ return -EINVAL;
+
+ if (!info->attrs[NCSI_ATTR_PACKAGE_ID])
+ return -EINVAL;
+
+ if (!info->attrs[NCSI_ATTR_CHANNEL_MASK])
+ return -EINVAL;
+
+ ndp = ndp_from_ifindex(get_net(sock_net(msg->sk)),
+ nla_get_u32(info->attrs[NCSI_ATTR_IFINDEX]));
+ if (!ndp)
+ return -ENODEV;
+
+ package_id = nla_get_u32(info->attrs[NCSI_ATTR_PACKAGE_ID]);
+ package = NULL;
+ NCSI_FOR_EACH_PACKAGE(ndp, np)
+ if (np->id == package_id) {
+ package = np;
+ break;
+ }
+ if (!package)
+ return -ERANGE;
+
+ spin_lock_irqsave(&package->lock, flags);
+
+ channel = NULL;
+ if (info->attrs[NCSI_ATTR_CHANNEL_ID]) {
+ channel_id = nla_get_u32(info->attrs[NCSI_ATTR_CHANNEL_ID]);
+ NCSI_FOR_EACH_CHANNEL(np, nc)
+ if (nc->id == channel_id) {
+ channel = nc;
+ break;
+ }
+ if (!channel) {
+ spin_unlock_irqrestore(&package->lock, flags);
+ return -ERANGE;
+ }
+ netdev_dbg(ndp->ndev.dev,
+ "NCSI: Channel %u set as preferred channel\n",
+ channel->id);
+ }
+
+ package->channel_whitelist =
+ nla_get_u32(info->attrs[NCSI_ATTR_CHANNEL_MASK]);
+ if (package->channel_whitelist == 0)
+ netdev_dbg(ndp->ndev.dev,
+ "NCSI: Package %u set to all channels disabled\n",
+ package->id);
+
+ package->preferred_channel = channel;
+
+ if (nla_get_flag(info->attrs[NCSI_ATTR_MULTI_FLAG])) {
+ package->multi_channel = true;
+ netdev_info(ndp->ndev.dev,
+ "NCSI: Multi-channel enabled on package %u\n",
+ package_id);
+ } else {
+ package->multi_channel = false;
+ }
+
+ spin_unlock_irqrestore(&package->lock, flags);
+
+ /* Update channel configuration */
+ if (!(ndp->flags & NCSI_DEV_RESET))
+ ncsi_reset_dev(&ndp->ndev);
+
+ return 0;
+}
+
static const struct genl_ops ncsi_ops[] = {
{
.cmd = NCSI_CMD_PKG_INFO,
@@ -589,6 +742,18 @@ static const struct genl_ops ncsi_ops[] = {
.doit = ncsi_send_cmd_nl,
.flags = GENL_ADMIN_PERM,
},
+ {
+ .cmd = NCSI_CMD_SET_PACKAGE_MASK,
+ .policy = ncsi_genl_policy,
+ .doit = ncsi_set_package_mask_nl,
+ .flags = GENL_ADMIN_PERM,
+ },
+ {
+ .cmd = NCSI_CMD_SET_CHANNEL_MASK,
+ .policy = ncsi_genl_policy,
+ .doit = ncsi_set_channel_mask_nl,
+ .flags = GENL_ADMIN_PERM,
+ },
};
static struct genl_family ncsi_genl_family __ro_after_init = {
diff --git a/net/ncsi/ncsi-pkt.h b/net/ncsi/ncsi-pkt.h
index 4d3f06be38bd..2a6d83a596c9 100644
--- a/net/ncsi/ncsi-pkt.h
+++ b/net/ncsi/ncsi-pkt.h
@@ -165,6 +165,15 @@ struct ncsi_rsp_oem_pkt {
unsigned char data[]; /* Payload data */
};
+/* Mellanox Response Data */
+struct ncsi_rsp_oem_mlx_pkt {
+ unsigned char cmd_rev; /* Command Revision */
+ unsigned char cmd; /* Command ID */
+ unsigned char param; /* Parameter */
+ unsigned char optional; /* Optional data */
+ unsigned char data[]; /* Data */
+};
+
/* Broadcom Response Data */
struct ncsi_rsp_oem_bcm_pkt {
unsigned char ver; /* Payload Version */
diff --git a/net/ncsi/ncsi-rsp.c b/net/ncsi/ncsi-rsp.c
index 77e07ba3f493..dc07fcc7938e 100644
--- a/net/ncsi/ncsi-rsp.c
+++ b/net/ncsi/ncsi-rsp.c
@@ -256,7 +256,7 @@ static int ncsi_rsp_handler_dcnt(struct ncsi_request *nr)
if (!ncm->enable)
return 0;
- ncm->enable = 1;
+ ncm->enable = 0;
return 0;
}
@@ -611,6 +611,45 @@ static int ncsi_rsp_handler_snfc(struct ncsi_request *nr)
return 0;
}
+/* Response handler for Mellanox command Get Mac Address */
+static int ncsi_rsp_handler_oem_mlx_gma(struct ncsi_request *nr)
+{
+ struct ncsi_dev_priv *ndp = nr->ndp;
+ struct net_device *ndev = ndp->ndev.dev;
+ const struct net_device_ops *ops = ndev->netdev_ops;
+ struct ncsi_rsp_oem_pkt *rsp;
+ struct sockaddr saddr;
+ int ret = 0;
+
+ /* Get the response header */
+ rsp = (struct ncsi_rsp_oem_pkt *)skb_network_header(nr->rsp);
+
+ saddr.sa_family = ndev->type;
+ ndev->priv_flags |= IFF_LIVE_ADDR_CHANGE;
+ memcpy(saddr.sa_data, &rsp->data[MLX_MAC_ADDR_OFFSET], ETH_ALEN);
+ ret = ops->ndo_set_mac_address(ndev, &saddr);
+ if (ret < 0)
+ netdev_warn(ndev, "NCSI: 'Writing mac address to device failed\n");
+
+ return ret;
+}
+
+/* Response handler for Mellanox card */
+static int ncsi_rsp_handler_oem_mlx(struct ncsi_request *nr)
+{
+ struct ncsi_rsp_oem_mlx_pkt *mlx;
+ struct ncsi_rsp_oem_pkt *rsp;
+
+ /* Get the response header */
+ rsp = (struct ncsi_rsp_oem_pkt *)skb_network_header(nr->rsp);
+ mlx = (struct ncsi_rsp_oem_mlx_pkt *)(rsp->data);
+
+ if (mlx->cmd == NCSI_OEM_MLX_CMD_GMA &&
+ mlx->param == NCSI_OEM_MLX_CMD_GMA_PARAM)
+ return ncsi_rsp_handler_oem_mlx_gma(nr);
+ return 0;
+}
+
/* Response handler for Broadcom command Get Mac Address */
static int ncsi_rsp_handler_oem_bcm_gma(struct ncsi_request *nr)
{
@@ -655,7 +694,7 @@ static struct ncsi_rsp_oem_handler {
unsigned int mfr_id;
int (*handler)(struct ncsi_request *nr);
} ncsi_rsp_oem_handlers[] = {
- { NCSI_OEM_MFR_MLX_ID, NULL },
+ { NCSI_OEM_MFR_MLX_ID, ncsi_rsp_handler_oem_mlx },
{ NCSI_OEM_MFR_BCM_ID, ncsi_rsp_handler_oem_bcm }
};
diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c
index 83395bf6dc35..432141f04af3 100644
--- a/net/netfilter/ipvs/ip_vs_ctl.c
+++ b/net/netfilter/ipvs/ip_vs_ctl.c
@@ -3980,6 +3980,9 @@ static void __net_exit ip_vs_control_net_cleanup_sysctl(struct netns_ipvs *ipvs)
static struct notifier_block ip_vs_dst_notifier = {
.notifier_call = ip_vs_dst_event,
+#ifdef CONFIG_IP_VS_IPV6
+ .priority = ADDRCONF_NOTIFY_PRIORITY + 5,
+#endif
};
int __net_init ip_vs_control_net_init(struct netns_ipvs *ipvs)
diff --git a/net/netfilter/nf_conncount.c b/net/netfilter/nf_conncount.c
index 02ca7df793f5..b6d0f6deea86 100644
--- a/net/netfilter/nf_conncount.c
+++ b/net/netfilter/nf_conncount.c
@@ -49,6 +49,7 @@ struct nf_conncount_tuple {
struct nf_conntrack_zone zone;
int cpu;
u32 jiffies32;
+ bool dead;
struct rcu_head rcu_head;
};
@@ -106,15 +107,16 @@ nf_conncount_add(struct nf_conncount_list *list,
conn->zone = *zone;
conn->cpu = raw_smp_processor_id();
conn->jiffies32 = (u32)jiffies;
- spin_lock(&list->list_lock);
+ conn->dead = false;
+ spin_lock_bh(&list->list_lock);
if (list->dead == true) {
kmem_cache_free(conncount_conn_cachep, conn);
- spin_unlock(&list->list_lock);
+ spin_unlock_bh(&list->list_lock);
return NF_CONNCOUNT_SKIP;
}
list_add_tail(&conn->node, &list->head);
list->count++;
- spin_unlock(&list->list_lock);
+ spin_unlock_bh(&list->list_lock);
return NF_CONNCOUNT_ADDED;
}
EXPORT_SYMBOL_GPL(nf_conncount_add);
@@ -132,19 +134,22 @@ static bool conn_free(struct nf_conncount_list *list,
{
bool free_entry = false;
- spin_lock(&list->list_lock);
+ spin_lock_bh(&list->list_lock);
- if (list->count == 0) {
- spin_unlock(&list->list_lock);
- return free_entry;
+ if (conn->dead) {
+ spin_unlock_bh(&list->list_lock);
+ return free_entry;
}
list->count--;
+ conn->dead = true;
list_del_rcu(&conn->node);
- if (list->count == 0)
+ if (list->count == 0) {
+ list->dead = true;
free_entry = true;
+ }
- spin_unlock(&list->list_lock);
+ spin_unlock_bh(&list->list_lock);
call_rcu(&conn->rcu_head, __conn_free);
return free_entry;
}
@@ -245,7 +250,7 @@ void nf_conncount_list_init(struct nf_conncount_list *list)
{
spin_lock_init(&list->list_lock);
INIT_LIST_HEAD(&list->head);
- list->count = 1;
+ list->count = 0;
list->dead = false;
}
EXPORT_SYMBOL_GPL(nf_conncount_list_init);
@@ -259,6 +264,7 @@ bool nf_conncount_gc_list(struct net *net,
struct nf_conn *found_ct;
unsigned int collected = 0;
bool free_entry = false;
+ bool ret = false;
list_for_each_entry_safe(conn, conn_n, &list->head, node) {
found = find_or_evict(net, list, conn, &free_entry);
@@ -288,7 +294,15 @@ bool nf_conncount_gc_list(struct net *net,
if (collected > CONNCOUNT_GC_MAX_NODES)
return false;
}
- return false;
+
+ spin_lock_bh(&list->list_lock);
+ if (!list->count) {
+ list->dead = true;
+ ret = true;
+ }
+ spin_unlock_bh(&list->list_lock);
+
+ return ret;
}
EXPORT_SYMBOL_GPL(nf_conncount_gc_list);
@@ -309,11 +323,8 @@ static void tree_nodes_free(struct rb_root *root,
while (gc_count) {
rbconn = gc_nodes[--gc_count];
spin_lock(&rbconn->list.list_lock);
- if (rbconn->list.count == 0 && rbconn->list.dead == false) {
- rbconn->list.dead = true;
- rb_erase(&rbconn->node, root);
- call_rcu(&rbconn->rcu_head, __tree_nodes_free);
- }
+ rb_erase(&rbconn->node, root);
+ call_rcu(&rbconn->rcu_head, __tree_nodes_free);
spin_unlock(&rbconn->list.list_lock);
}
}
@@ -414,6 +425,7 @@ insert_tree(struct net *net,
nf_conncount_list_init(&rbconn->list);
list_add(&conn->node, &rbconn->list.head);
count = 1;
+ rbconn->list.count = count;
rb_link_node(&rbconn->node, parent, rbnode);
rb_insert_color(&rbconn->node, root);
diff --git a/net/netfilter/nf_conntrack_proto_gre.c b/net/netfilter/nf_conntrack_proto_gre.c
index 9b48dc8b4b88..2a5e56c6d8d9 100644
--- a/net/netfilter/nf_conntrack_proto_gre.c
+++ b/net/netfilter/nf_conntrack_proto_gre.c
@@ -43,24 +43,12 @@
#include <linux/netfilter/nf_conntrack_proto_gre.h>
#include <linux/netfilter/nf_conntrack_pptp.h>
-enum grep_conntrack {
- GRE_CT_UNREPLIED,
- GRE_CT_REPLIED,
- GRE_CT_MAX
-};
-
static const unsigned int gre_timeouts[GRE_CT_MAX] = {
[GRE_CT_UNREPLIED] = 30*HZ,
[GRE_CT_REPLIED] = 180*HZ,
};
static unsigned int proto_gre_net_id __read_mostly;
-struct netns_proto_gre {
- struct nf_proto_net nf;
- rwlock_t keymap_lock;
- struct list_head keymap_list;
- unsigned int gre_timeouts[GRE_CT_MAX];
-};
static inline struct netns_proto_gre *gre_pernet(struct net *net)
{
@@ -402,6 +390,8 @@ static int __init nf_ct_proto_gre_init(void)
{
int ret;
+ BUILD_BUG_ON(offsetof(struct netns_proto_gre, nf) != 0);
+
ret = register_pernet_subsys(&proto_gre_net_ops);
if (ret < 0)
goto out_pernet;
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index 42487d01a3ed..2e61aab6ed73 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -2457,7 +2457,7 @@ err:
static void nf_tables_rule_destroy(const struct nft_ctx *ctx,
struct nft_rule *rule)
{
- struct nft_expr *expr;
+ struct nft_expr *expr, *next;
/*
* Careful: some expressions might not be initialized in case this
@@ -2465,8 +2465,9 @@ static void nf_tables_rule_destroy(const struct nft_ctx *ctx,
*/
expr = nft_expr_first(rule);
while (expr != nft_expr_last(rule) && expr->ops) {
+ next = nft_expr_next(expr);
nf_tables_expr_destroy(ctx, expr);
- expr = nft_expr_next(expr);
+ expr = next;
}
kfree(rule);
}
@@ -2589,17 +2590,14 @@ static int nf_tables_newrule(struct net *net, struct sock *nlsk,
if (chain->use == UINT_MAX)
return -EOVERFLOW;
- }
-
- if (nla[NFTA_RULE_POSITION]) {
- if (!(nlh->nlmsg_flags & NLM_F_CREATE))
- return -EOPNOTSUPP;
- pos_handle = be64_to_cpu(nla_get_be64(nla[NFTA_RULE_POSITION]));
- old_rule = __nft_rule_lookup(chain, pos_handle);
- if (IS_ERR(old_rule)) {
- NL_SET_BAD_ATTR(extack, nla[NFTA_RULE_POSITION]);
- return PTR_ERR(old_rule);
+ if (nla[NFTA_RULE_POSITION]) {
+ pos_handle = be64_to_cpu(nla_get_be64(nla[NFTA_RULE_POSITION]));
+ old_rule = __nft_rule_lookup(chain, pos_handle);
+ if (IS_ERR(old_rule)) {
+ NL_SET_BAD_ATTR(extack, nla[NFTA_RULE_POSITION]);
+ return PTR_ERR(old_rule);
+ }
}
}
@@ -2669,21 +2667,14 @@ static int nf_tables_newrule(struct net *net, struct sock *nlsk,
}
if (nlh->nlmsg_flags & NLM_F_REPLACE) {
- if (!nft_is_active_next(net, old_rule)) {
- err = -ENOENT;
- goto err2;
- }
- trans = nft_trans_rule_add(&ctx, NFT_MSG_DELRULE,
- old_rule);
+ trans = nft_trans_rule_add(&ctx, NFT_MSG_NEWRULE, rule);
if (trans == NULL) {
err = -ENOMEM;
goto err2;
}
- nft_deactivate_next(net, old_rule);
- chain->use--;
-
- if (nft_trans_rule_add(&ctx, NFT_MSG_NEWRULE, rule) == NULL) {
- err = -ENOMEM;
+ err = nft_delrule(&ctx, old_rule);
+ if (err < 0) {
+ nft_trans_destroy(trans);
goto err2;
}
@@ -6324,7 +6315,7 @@ static void nf_tables_commit_chain_free_rules_old(struct nft_rule **rules)
call_rcu(&old->h, __nf_tables_commit_chain_free_rules_old);
}
-static void nf_tables_commit_chain_active(struct net *net, struct nft_chain *chain)
+static void nf_tables_commit_chain(struct net *net, struct nft_chain *chain)
{
struct nft_rule **g0, **g1;
bool next_genbit;
@@ -6441,11 +6432,8 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb)
/* step 2. Make rules_gen_X visible to packet path */
list_for_each_entry(table, &net->nft.tables, list) {
- list_for_each_entry(chain, &table->chains, list) {
- if (!nft_is_active_next(net, chain))
- continue;
- nf_tables_commit_chain_active(net, chain);
- }
+ list_for_each_entry(chain, &table->chains, list)
+ nf_tables_commit_chain(net, chain);
}
/*
diff --git a/net/netfilter/nfnetlink_cttimeout.c b/net/netfilter/nfnetlink_cttimeout.c
index a518eb162344..109b0d27345a 100644
--- a/net/netfilter/nfnetlink_cttimeout.c
+++ b/net/netfilter/nfnetlink_cttimeout.c
@@ -455,7 +455,8 @@ static int cttimeout_default_get(struct net *net, struct sock *ctnl,
case IPPROTO_TCP:
timeouts = nf_tcp_pernet(net)->timeouts;
break;
- case IPPROTO_UDP:
+ case IPPROTO_UDP: /* fallthrough */
+ case IPPROTO_UDPLITE:
timeouts = nf_udp_pernet(net)->timeouts;
break;
case IPPROTO_DCCP:
@@ -471,11 +472,21 @@ static int cttimeout_default_get(struct net *net, struct sock *ctnl,
timeouts = nf_sctp_pernet(net)->timeouts;
#endif
break;
+ case IPPROTO_GRE:
+#ifdef CONFIG_NF_CT_PROTO_GRE
+ if (l4proto->net_id) {
+ struct netns_proto_gre *net_gre;
+
+ net_gre = net_generic(net, *l4proto->net_id);
+ timeouts = net_gre->gre_timeouts;
+ }
+#endif
+ break;
case 255:
timeouts = &nf_generic_pernet(net)->timeout;
break;
default:
- WARN_ON_ONCE(1);
+ WARN_ONCE(1, "Missing timeouts for proto %d", l4proto->l4proto);
break;
}
diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c
index 43041f087eb3..1ce30efe6854 100644
--- a/net/netfilter/nfnetlink_queue.c
+++ b/net/netfilter/nfnetlink_queue.c
@@ -1148,8 +1148,9 @@ static int nfqa_parse_bridge(struct nf_queue_entry *entry,
if (!tb[NFQA_VLAN_TCI] || !tb[NFQA_VLAN_PROTO])
return -EINVAL;
- entry->skb->vlan_tci = ntohs(nla_get_be16(tb[NFQA_VLAN_TCI]));
- entry->skb->vlan_proto = nla_get_be16(tb[NFQA_VLAN_PROTO]);
+ __vlan_hwaccel_put_tag(entry->skb,
+ nla_get_be16(tb[NFQA_VLAN_PROTO]),
+ ntohs(nla_get_be16(tb[NFQA_VLAN_TCI])));
}
if (nfqa[NFQA_L2HDR]) {
diff --git a/net/netfilter/nft_compat.c b/net/netfilter/nft_compat.c
index 9d0ede474224..7334e0b80a5e 100644
--- a/net/netfilter/nft_compat.c
+++ b/net/netfilter/nft_compat.c
@@ -520,6 +520,7 @@ __nft_match_destroy(const struct nft_ctx *ctx, const struct nft_expr *expr,
void *info)
{
struct xt_match *match = expr->ops->data;
+ struct module *me = match->me;
struct xt_mtdtor_param par;
par.net = ctx->net;
@@ -530,7 +531,7 @@ __nft_match_destroy(const struct nft_ctx *ctx, const struct nft_expr *expr,
par.match->destroy(&par);
if (nft_xt_put(container_of(expr->ops, struct nft_xt, ops)))
- module_put(match->me);
+ module_put(me);
}
static void
diff --git a/net/netfilter/nft_flow_offload.c b/net/netfilter/nft_flow_offload.c
index e82d9a966c45..974525eb92df 100644
--- a/net/netfilter/nft_flow_offload.c
+++ b/net/netfilter/nft_flow_offload.c
@@ -214,7 +214,9 @@ static int __init nft_flow_offload_module_init(void)
{
int err;
- register_netdevice_notifier(&flow_offload_netdev_notifier);
+ err = register_netdevice_notifier(&flow_offload_netdev_notifier);
+ if (err)
+ goto err;
err = nft_register_expr(&nft_flow_offload_type);
if (err < 0)
@@ -224,6 +226,7 @@ static int __init nft_flow_offload_module_init(void)
register_expr:
unregister_netdevice_notifier(&flow_offload_netdev_notifier);
+err:
return err;
}
diff --git a/net/netfilter/xt_RATEEST.c b/net/netfilter/xt_RATEEST.c
index dec843cadf46..9e05c86ba5c4 100644
--- a/net/netfilter/xt_RATEEST.c
+++ b/net/netfilter/xt_RATEEST.c
@@ -201,18 +201,8 @@ static __net_init int xt_rateest_net_init(struct net *net)
return 0;
}
-static void __net_exit xt_rateest_net_exit(struct net *net)
-{
- struct xt_rateest_net *xn = net_generic(net, xt_rateest_id);
- int i;
-
- for (i = 0; i < ARRAY_SIZE(xn->hash); i++)
- WARN_ON_ONCE(!hlist_empty(&xn->hash[i]));
-}
-
static struct pernet_operations xt_rateest_net_ops = {
.init = xt_rateest_net_init,
- .exit = xt_rateest_net_exit,
.id = &xt_rateest_id,
.size = sizeof(struct xt_rateest_net),
};
diff --git a/net/netfilter/xt_hashlimit.c b/net/netfilter/xt_hashlimit.c
index 3e7d259e5d8d..1ad4017f9b73 100644
--- a/net/netfilter/xt_hashlimit.c
+++ b/net/netfilter/xt_hashlimit.c
@@ -295,9 +295,10 @@ static int htable_create(struct net *net, struct hashlimit_cfg3 *cfg,
/* copy match config into hashtable config */
ret = cfg_copy(&hinfo->cfg, (void *)cfg, 3);
-
- if (ret)
+ if (ret) {
+ vfree(hinfo);
return ret;
+ }
hinfo->cfg.size = size;
if (hinfo->cfg.max == 0)
@@ -814,7 +815,6 @@ hashlimit_mt_v1(const struct sk_buff *skb, struct xt_action_param *par)
int ret;
ret = cfg_copy(&cfg, (void *)&info->cfg, 1);
-
if (ret)
return ret;
@@ -830,7 +830,6 @@ hashlimit_mt_v2(const struct sk_buff *skb, struct xt_action_param *par)
int ret;
ret = cfg_copy(&cfg, (void *)&info->cfg, 2);
-
if (ret)
return ret;
@@ -921,7 +920,6 @@ static int hashlimit_mt_check_v1(const struct xt_mtchk_param *par)
return ret;
ret = cfg_copy(&cfg, (void *)&info->cfg, 1);
-
if (ret)
return ret;
@@ -940,7 +938,6 @@ static int hashlimit_mt_check_v2(const struct xt_mtchk_param *par)
return ret;
ret = cfg_copy(&cfg, (void *)&info->cfg, 2);
-
if (ret)
return ret;
diff --git a/net/openvswitch/conntrack.c b/net/openvswitch/conntrack.c
index a4660c48ff01..cd94f925495a 100644
--- a/net/openvswitch/conntrack.c
+++ b/net/openvswitch/conntrack.c
@@ -1166,7 +1166,7 @@ static int ovs_ct_commit(struct net *net, struct sw_flow_key *key,
&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple);
if (err) {
net_warn_ratelimited("openvswitch: zone: %u "
- "execeeds conntrack limit\n",
+ "exceeds conntrack limit\n",
info->zone.id);
return err;
}
diff --git a/net/openvswitch/flow.c b/net/openvswitch/flow.c
index fa393815991e..57e07768c9d1 100644
--- a/net/openvswitch/flow.c
+++ b/net/openvswitch/flow.c
@@ -597,7 +597,7 @@ static int key_extract(struct sk_buff *skb, struct sw_flow_key *key)
* skb_vlan_pop(), which will later shift the ethertype into
* skb->protocol.
*/
- if (key->eth.cvlan.tci & htons(VLAN_TAG_PRESENT))
+ if (key->eth.cvlan.tci & htons(VLAN_CFI_MASK))
skb->protocol = key->eth.cvlan.tpid;
else
skb->protocol = key->eth.type;
diff --git a/net/openvswitch/vport-geneve.c b/net/openvswitch/vport-geneve.c
index 5aaf3babfc3f..acb6077b7478 100644
--- a/net/openvswitch/vport-geneve.c
+++ b/net/openvswitch/vport-geneve.c
@@ -93,7 +93,7 @@ static struct vport *geneve_tnl_create(const struct vport_parms *parms)
return ERR_CAST(dev);
}
- err = dev_change_flags(dev, dev->flags | IFF_UP);
+ err = dev_change_flags(dev, dev->flags | IFF_UP, NULL);
if (err < 0) {
rtnl_delete_link(dev);
rtnl_unlock();
diff --git a/net/openvswitch/vport-gre.c b/net/openvswitch/vport-gre.c
index 0e72d95b0e8f..c38a62464b85 100644
--- a/net/openvswitch/vport-gre.c
+++ b/net/openvswitch/vport-gre.c
@@ -68,7 +68,7 @@ static struct vport *gre_tnl_create(const struct vport_parms *parms)
return ERR_CAST(dev);
}
- err = dev_change_flags(dev, dev->flags | IFF_UP);
+ err = dev_change_flags(dev, dev->flags | IFF_UP, NULL);
if (err < 0) {
rtnl_delete_link(dev);
rtnl_unlock();
diff --git a/net/openvswitch/vport-vxlan.c b/net/openvswitch/vport-vxlan.c
index 7e6301b2ec4d..8f16f11f7ad3 100644
--- a/net/openvswitch/vport-vxlan.c
+++ b/net/openvswitch/vport-vxlan.c
@@ -131,7 +131,7 @@ static struct vport *vxlan_tnl_create(const struct vport_parms *parms)
return ERR_CAST(dev);
}
- err = dev_change_flags(dev, dev->flags | IFF_UP);
+ err = dev_change_flags(dev, dev->flags | IFF_UP, NULL);
if (err < 0) {
rtnl_delete_link(dev);
rtnl_unlock();
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index ec3095f13aae..a74650e98f42 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -2394,7 +2394,7 @@ static void tpacket_destruct_skb(struct sk_buff *skb)
void *ph;
__u32 ts;
- ph = skb_shinfo(skb)->destructor_arg;
+ ph = skb_zcopy_get_nouarg(skb);
packet_dec_pending(&po->tx_ring);
ts = __packet_set_timestamp(po, ph, skb);
@@ -2461,7 +2461,7 @@ static int tpacket_fill_skb(struct packet_sock *po, struct sk_buff *skb,
skb->mark = po->sk.sk_mark;
skb->tstamp = sockc->transmit_time;
sock_tx_timestamp(&po->sk, sockc->tsflags, &skb_shinfo(skb)->tx_flags);
- skb_shinfo(skb)->destructor_arg = ph.raw;
+ skb_zcopy_set_nouarg(skb, ph.raw);
skb_reserve(skb, hlen);
skb_reset_network_header(skb);
diff --git a/net/rxrpc/af_rxrpc.c b/net/rxrpc/af_rxrpc.c
index 64362d078da8..a2522f9d71e2 100644
--- a/net/rxrpc/af_rxrpc.c
+++ b/net/rxrpc/af_rxrpc.c
@@ -375,17 +375,36 @@ EXPORT_SYMBOL(rxrpc_kernel_end_call);
* getting ACKs from the server. Returns a number representing the life state
* which can be compared to that returned by a previous call.
*
- * If this is a client call, ping ACKs will be sent to the server to find out
- * whether it's still responsive and whether the call is still alive on the
- * server.
+ * If the life state stalls, rxrpc_kernel_probe_life() should be called and
+ * then 2RTT waited.
*/
-u32 rxrpc_kernel_check_life(struct socket *sock, struct rxrpc_call *call)
+u32 rxrpc_kernel_check_life(const struct socket *sock,
+ const struct rxrpc_call *call)
{
return call->acks_latest;
}
EXPORT_SYMBOL(rxrpc_kernel_check_life);
/**
+ * rxrpc_kernel_probe_life - Poke the peer to see if it's still alive
+ * @sock: The socket the call is on
+ * @call: The call to check
+ *
+ * In conjunction with rxrpc_kernel_check_life(), allow a kernel service to
+ * find out whether a call is still alive by pinging it. This should cause the
+ * life state to be bumped in about 2*RTT.
+ *
+ * The must be called in TASK_RUNNING state on pain of might_sleep() objecting.
+ */
+void rxrpc_kernel_probe_life(struct socket *sock, struct rxrpc_call *call)
+{
+ rxrpc_propose_ACK(call, RXRPC_ACK_PING, 0, 0, true, false,
+ rxrpc_propose_ack_ping_for_check_life);
+ rxrpc_send_ack_packet(call, true, NULL);
+}
+EXPORT_SYMBOL(rxrpc_kernel_probe_life);
+
+/**
* rxrpc_kernel_get_epoch - Retrieve the epoch value from a call.
* @sock: The socket the call is on
* @call: The call to query
diff --git a/net/sched/act_api.c b/net/sched/act_api.c
index 9c1b0729aebf..d4b8355737d8 100644
--- a/net/sched/act_api.c
+++ b/net/sched/act_api.c
@@ -21,8 +21,6 @@
#include <linux/kmod.h>
#include <linux/err.h>
#include <linux/module.h>
-#include <linux/rhashtable.h>
-#include <linux/list.h>
#include <net/net_namespace.h>
#include <net/sock.h>
#include <net/sch_generic.h>
@@ -1522,227 +1520,8 @@ out_module_put:
return skb->len;
}
-struct tcf_action_net {
- struct rhashtable egdev_ht;
-};
-
-static unsigned int tcf_action_net_id;
-
-struct tcf_action_egdev_cb {
- struct list_head list;
- tc_setup_cb_t *cb;
- void *cb_priv;
-};
-
-struct tcf_action_egdev {
- struct rhash_head ht_node;
- const struct net_device *dev;
- unsigned int refcnt;
- struct list_head cb_list;
-};
-
-static const struct rhashtable_params tcf_action_egdev_ht_params = {
- .key_offset = offsetof(struct tcf_action_egdev, dev),
- .head_offset = offsetof(struct tcf_action_egdev, ht_node),
- .key_len = sizeof(const struct net_device *),
-};
-
-static struct tcf_action_egdev *
-tcf_action_egdev_lookup(const struct net_device *dev)
-{
- struct net *net = dev_net(dev);
- struct tcf_action_net *tan = net_generic(net, tcf_action_net_id);
-
- return rhashtable_lookup_fast(&tan->egdev_ht, &dev,
- tcf_action_egdev_ht_params);
-}
-
-static struct tcf_action_egdev *
-tcf_action_egdev_get(const struct net_device *dev)
-{
- struct tcf_action_egdev *egdev;
- struct tcf_action_net *tan;
-
- egdev = tcf_action_egdev_lookup(dev);
- if (egdev)
- goto inc_ref;
-
- egdev = kzalloc(sizeof(*egdev), GFP_KERNEL);
- if (!egdev)
- return NULL;
- INIT_LIST_HEAD(&egdev->cb_list);
- egdev->dev = dev;
- tan = net_generic(dev_net(dev), tcf_action_net_id);
- rhashtable_insert_fast(&tan->egdev_ht, &egdev->ht_node,
- tcf_action_egdev_ht_params);
-
-inc_ref:
- egdev->refcnt++;
- return egdev;
-}
-
-static void tcf_action_egdev_put(struct tcf_action_egdev *egdev)
-{
- struct tcf_action_net *tan;
-
- if (--egdev->refcnt)
- return;
- tan = net_generic(dev_net(egdev->dev), tcf_action_net_id);
- rhashtable_remove_fast(&tan->egdev_ht, &egdev->ht_node,
- tcf_action_egdev_ht_params);
- kfree(egdev);
-}
-
-static struct tcf_action_egdev_cb *
-tcf_action_egdev_cb_lookup(struct tcf_action_egdev *egdev,
- tc_setup_cb_t *cb, void *cb_priv)
-{
- struct tcf_action_egdev_cb *egdev_cb;
-
- list_for_each_entry(egdev_cb, &egdev->cb_list, list)
- if (egdev_cb->cb == cb && egdev_cb->cb_priv == cb_priv)
- return egdev_cb;
- return NULL;
-}
-
-static int tcf_action_egdev_cb_call(struct tcf_action_egdev *egdev,
- enum tc_setup_type type,
- void *type_data, bool err_stop)
-{
- struct tcf_action_egdev_cb *egdev_cb;
- int ok_count = 0;
- int err;
-
- list_for_each_entry(egdev_cb, &egdev->cb_list, list) {
- err = egdev_cb->cb(type, type_data, egdev_cb->cb_priv);
- if (err) {
- if (err_stop)
- return err;
- } else {
- ok_count++;
- }
- }
- return ok_count;
-}
-
-static int tcf_action_egdev_cb_add(struct tcf_action_egdev *egdev,
- tc_setup_cb_t *cb, void *cb_priv)
-{
- struct tcf_action_egdev_cb *egdev_cb;
-
- egdev_cb = tcf_action_egdev_cb_lookup(egdev, cb, cb_priv);
- if (WARN_ON(egdev_cb))
- return -EEXIST;
- egdev_cb = kzalloc(sizeof(*egdev_cb), GFP_KERNEL);
- if (!egdev_cb)
- return -ENOMEM;
- egdev_cb->cb = cb;
- egdev_cb->cb_priv = cb_priv;
- list_add(&egdev_cb->list, &egdev->cb_list);
- return 0;
-}
-
-static void tcf_action_egdev_cb_del(struct tcf_action_egdev *egdev,
- tc_setup_cb_t *cb, void *cb_priv)
-{
- struct tcf_action_egdev_cb *egdev_cb;
-
- egdev_cb = tcf_action_egdev_cb_lookup(egdev, cb, cb_priv);
- if (WARN_ON(!egdev_cb))
- return;
- list_del(&egdev_cb->list);
- kfree(egdev_cb);
-}
-
-static int __tc_setup_cb_egdev_register(const struct net_device *dev,
- tc_setup_cb_t *cb, void *cb_priv)
-{
- struct tcf_action_egdev *egdev = tcf_action_egdev_get(dev);
- int err;
-
- if (!egdev)
- return -ENOMEM;
- err = tcf_action_egdev_cb_add(egdev, cb, cb_priv);
- if (err)
- goto err_cb_add;
- return 0;
-
-err_cb_add:
- tcf_action_egdev_put(egdev);
- return err;
-}
-int tc_setup_cb_egdev_register(const struct net_device *dev,
- tc_setup_cb_t *cb, void *cb_priv)
-{
- int err;
-
- rtnl_lock();
- err = __tc_setup_cb_egdev_register(dev, cb, cb_priv);
- rtnl_unlock();
- return err;
-}
-EXPORT_SYMBOL_GPL(tc_setup_cb_egdev_register);
-
-static void __tc_setup_cb_egdev_unregister(const struct net_device *dev,
- tc_setup_cb_t *cb, void *cb_priv)
-{
- struct tcf_action_egdev *egdev = tcf_action_egdev_lookup(dev);
-
- if (WARN_ON(!egdev))
- return;
- tcf_action_egdev_cb_del(egdev, cb, cb_priv);
- tcf_action_egdev_put(egdev);
-}
-void tc_setup_cb_egdev_unregister(const struct net_device *dev,
- tc_setup_cb_t *cb, void *cb_priv)
-{
- rtnl_lock();
- __tc_setup_cb_egdev_unregister(dev, cb, cb_priv);
- rtnl_unlock();
-}
-EXPORT_SYMBOL_GPL(tc_setup_cb_egdev_unregister);
-
-int tc_setup_cb_egdev_call(const struct net_device *dev,
- enum tc_setup_type type, void *type_data,
- bool err_stop)
-{
- struct tcf_action_egdev *egdev = tcf_action_egdev_lookup(dev);
-
- if (!egdev)
- return 0;
- return tcf_action_egdev_cb_call(egdev, type, type_data, err_stop);
-}
-EXPORT_SYMBOL_GPL(tc_setup_cb_egdev_call);
-
-static __net_init int tcf_action_net_init(struct net *net)
-{
- struct tcf_action_net *tan = net_generic(net, tcf_action_net_id);
-
- return rhashtable_init(&tan->egdev_ht, &tcf_action_egdev_ht_params);
-}
-
-static void __net_exit tcf_action_net_exit(struct net *net)
-{
- struct tcf_action_net *tan = net_generic(net, tcf_action_net_id);
-
- rhashtable_destroy(&tan->egdev_ht);
-}
-
-static struct pernet_operations tcf_action_net_ops = {
- .init = tcf_action_net_init,
- .exit = tcf_action_net_exit,
- .id = &tcf_action_net_id,
- .size = sizeof(struct tcf_action_net),
-};
-
static int __init tc_action_init(void)
{
- int err;
-
- err = register_pernet_subsys(&tcf_action_net_ops);
- if (err)
- return err;
-
rtnl_register(PF_UNSPEC, RTM_NEWACTION, tc_ctl_action, NULL, 0);
rtnl_register(PF_UNSPEC, RTM_DELACTION, tc_ctl_action, NULL, 0);
rtnl_register(PF_UNSPEC, RTM_GETACTION, tc_ctl_action, tc_dump_action,
diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c
index 1dae5f2b358f..c8cf4d10c435 100644
--- a/net/sched/act_mirred.c
+++ b/net/sched/act_mirred.c
@@ -258,7 +258,8 @@ static int tcf_mirred_act(struct sk_buff *skb, const struct tc_action *a,
if (is_redirect) {
skb2->tc_redirected = 1;
skb2->tc_from_ingress = skb2->tc_at_ingress;
-
+ if (skb2->tc_from_ingress)
+ skb2->tstamp = 0;
/* let's the caller reinsert the packet, if possible */
if (use_reinsert) {
res->ingress = want_ingress;
diff --git a/net/sched/act_pedit.c b/net/sched/act_pedit.c
index da3dd0f68cc2..2b372a06b432 100644
--- a/net/sched/act_pedit.c
+++ b/net/sched/act_pedit.c
@@ -201,7 +201,8 @@ static int tcf_pedit_init(struct net *net, struct nlattr *nla,
goto out_release;
}
} else {
- return err;
+ ret = err;
+ goto out_free;
}
p = to_pedit(*a);
diff --git a/net/sched/act_police.c b/net/sched/act_police.c
index 052855d47354..ec8ec55e0fe8 100644
--- a/net/sched/act_police.c
+++ b/net/sched/act_police.c
@@ -27,10 +27,7 @@ struct tcf_police_params {
u32 tcfp_ewma_rate;
s64 tcfp_burst;
u32 tcfp_mtu;
- s64 tcfp_toks;
- s64 tcfp_ptoks;
s64 tcfp_mtu_ptoks;
- s64 tcfp_t_c;
struct psched_ratecfg rate;
bool rate_present;
struct psched_ratecfg peak;
@@ -41,6 +38,11 @@ struct tcf_police_params {
struct tcf_police {
struct tc_action common;
struct tcf_police_params __rcu *params;
+
+ spinlock_t tcfp_lock ____cacheline_aligned_in_smp;
+ s64 tcfp_toks;
+ s64 tcfp_ptoks;
+ s64 tcfp_t_c;
};
#define to_police(pc) ((struct tcf_police *)pc)
@@ -83,7 +85,7 @@ static int tcf_police_init(struct net *net, struct nlattr *nla,
int ovr, int bind, bool rtnl_held,
struct netlink_ext_ack *extack)
{
- int ret = 0, err;
+ int ret = 0, tcfp_result = TC_ACT_OK, err, size;
struct nlattr *tb[TCA_POLICE_MAX + 1];
struct tc_police *parm;
struct tcf_police *police;
@@ -91,7 +93,6 @@ static int tcf_police_init(struct net *net, struct nlattr *nla,
struct tc_action_net *tn = net_generic(net, police_net_id);
struct tcf_police_params *new;
bool exists = false;
- int size;
if (nla == NULL)
return -EINVAL;
@@ -122,6 +123,7 @@ static int tcf_police_init(struct net *net, struct nlattr *nla,
return ret;
}
ret = ACT_P_CREATED;
+ spin_lock_init(&(to_police(*a)->tcfp_lock));
} else if (!ovr) {
tcf_idr_release(*a, bind);
return -EEXIST;
@@ -157,6 +159,16 @@ static int tcf_police_init(struct net *net, struct nlattr *nla,
goto failure;
}
+ if (tb[TCA_POLICE_RESULT]) {
+ tcfp_result = nla_get_u32(tb[TCA_POLICE_RESULT]);
+ if (TC_ACT_EXT_CMP(tcfp_result, TC_ACT_GOTO_CHAIN)) {
+ NL_SET_ERR_MSG(extack,
+ "goto chain not allowed on fallback");
+ err = -EINVAL;
+ goto failure;
+ }
+ }
+
new = kzalloc(sizeof(*new), GFP_KERNEL);
if (unlikely(!new)) {
err = -ENOMEM;
@@ -164,6 +176,7 @@ static int tcf_police_init(struct net *net, struct nlattr *nla,
}
/* No failure allowed after this point */
+ new->tcfp_result = tcfp_result;
new->tcfp_mtu = parm->mtu;
if (!new->tcfp_mtu) {
new->tcfp_mtu = ~0;
@@ -186,28 +199,20 @@ static int tcf_police_init(struct net *net, struct nlattr *nla,
}
new->tcfp_burst = PSCHED_TICKS2NS(parm->burst);
- new->tcfp_toks = new->tcfp_burst;
- if (new->peak_present) {
+ if (new->peak_present)
new->tcfp_mtu_ptoks = (s64)psched_l2t_ns(&new->peak,
new->tcfp_mtu);
- new->tcfp_ptoks = new->tcfp_mtu_ptoks;
- }
if (tb[TCA_POLICE_AVRATE])
new->tcfp_ewma_rate = nla_get_u32(tb[TCA_POLICE_AVRATE]);
- if (tb[TCA_POLICE_RESULT]) {
- new->tcfp_result = nla_get_u32(tb[TCA_POLICE_RESULT]);
- if (TC_ACT_EXT_CMP(new->tcfp_result, TC_ACT_GOTO_CHAIN)) {
- NL_SET_ERR_MSG(extack,
- "goto chain not allowed on fallback");
- err = -EINVAL;
- goto failure;
- }
- }
-
spin_lock_bh(&police->tcf_lock);
- new->tcfp_t_c = ktime_get_ns();
+ spin_lock_bh(&police->tcfp_lock);
+ police->tcfp_t_c = ktime_get_ns();
+ police->tcfp_toks = new->tcfp_burst;
+ if (new->peak_present)
+ police->tcfp_ptoks = new->tcfp_mtu_ptoks;
+ spin_unlock_bh(&police->tcfp_lock);
police->tcf_action = parm->action;
rcu_swap_protected(police->params,
new,
@@ -257,25 +262,28 @@ static int tcf_police_act(struct sk_buff *skb, const struct tc_action *a,
}
now = ktime_get_ns();
- toks = min_t(s64, now - p->tcfp_t_c, p->tcfp_burst);
+ spin_lock_bh(&police->tcfp_lock);
+ toks = min_t(s64, now - police->tcfp_t_c, p->tcfp_burst);
if (p->peak_present) {
- ptoks = toks + p->tcfp_ptoks;
+ ptoks = toks + police->tcfp_ptoks;
if (ptoks > p->tcfp_mtu_ptoks)
ptoks = p->tcfp_mtu_ptoks;
ptoks -= (s64)psched_l2t_ns(&p->peak,
qdisc_pkt_len(skb));
}
- toks += p->tcfp_toks;
+ toks += police->tcfp_toks;
if (toks > p->tcfp_burst)
toks = p->tcfp_burst;
toks -= (s64)psched_l2t_ns(&p->rate, qdisc_pkt_len(skb));
if ((toks|ptoks) >= 0) {
- p->tcfp_t_c = now;
- p->tcfp_toks = toks;
- p->tcfp_ptoks = ptoks;
+ police->tcfp_t_c = now;
+ police->tcfp_toks = toks;
+ police->tcfp_ptoks = ptoks;
+ spin_unlock_bh(&police->tcfp_lock);
ret = p->tcfp_result;
goto inc_drops;
}
+ spin_unlock_bh(&police->tcfp_lock);
}
inc_overlimits:
diff --git a/net/sched/act_tunnel_key.c b/net/sched/act_tunnel_key.c
index 4cca8f274662..c3b90fadaff6 100644
--- a/net/sched/act_tunnel_key.c
+++ b/net/sched/act_tunnel_key.c
@@ -210,9 +210,9 @@ static int tunnel_key_init(struct net *net, struct nlattr *nla,
struct tcf_tunnel_key *t;
bool exists = false;
__be16 dst_port = 0;
+ __be64 key_id = 0;
int opts_len = 0;
- __be64 key_id;
- __be16 flags;
+ __be16 flags = 0;
u8 tos, ttl;
int ret = 0;
int err;
@@ -246,15 +246,15 @@ static int tunnel_key_init(struct net *net, struct nlattr *nla,
case TCA_TUNNEL_KEY_ACT_RELEASE:
break;
case TCA_TUNNEL_KEY_ACT_SET:
- if (!tb[TCA_TUNNEL_KEY_ENC_KEY_ID]) {
- NL_SET_ERR_MSG(extack, "Missing tunnel key id");
- ret = -EINVAL;
- goto err_out;
- }
+ if (tb[TCA_TUNNEL_KEY_ENC_KEY_ID]) {
+ __be32 key32;
- key_id = key32_to_tunnel_id(nla_get_be32(tb[TCA_TUNNEL_KEY_ENC_KEY_ID]));
+ key32 = nla_get_be32(tb[TCA_TUNNEL_KEY_ENC_KEY_ID]);
+ key_id = key32_to_tunnel_id(key32);
+ flags = TUNNEL_KEY;
+ }
- flags = TUNNEL_KEY | TUNNEL_CSUM;
+ flags |= TUNNEL_CSUM;
if (tb[TCA_TUNNEL_KEY_NO_CSUM] &&
nla_get_u8(tb[TCA_TUNNEL_KEY_NO_CSUM]))
flags &= ~TUNNEL_CSUM;
@@ -508,10 +508,13 @@ static int tunnel_key_dump(struct sk_buff *skb, struct tc_action *a,
struct ip_tunnel_key *key = &info->key;
__be32 key_id = tunnel_id_to_key32(key->tun_id);
- if (nla_put_be32(skb, TCA_TUNNEL_KEY_ENC_KEY_ID, key_id) ||
+ if (((key->tun_flags & TUNNEL_KEY) &&
+ nla_put_be32(skb, TCA_TUNNEL_KEY_ENC_KEY_ID, key_id)) ||
tunnel_key_dump_addresses(skb,
&params->tcft_enc_metadata->u.tun_info) ||
- nla_put_be16(skb, TCA_TUNNEL_KEY_ENC_DST_PORT, key->tp_dst) ||
+ (key->tp_dst &&
+ nla_put_be16(skb, TCA_TUNNEL_KEY_ENC_DST_PORT,
+ key->tp_dst)) ||
nla_put_u8(skb, TCA_TUNNEL_KEY_NO_CSUM,
!(key->tun_flags & TUNNEL_CSUM)) ||
tunnel_key_opts_dump(skb, info))
diff --git a/net/sched/act_vlan.c b/net/sched/act_vlan.c
index ba677d54a7af..93fdaf707313 100644
--- a/net/sched/act_vlan.c
+++ b/net/sched/act_vlan.c
@@ -63,7 +63,7 @@ static int tcf_vlan_act(struct sk_buff *skb, const struct tc_action *a,
/* extract existing tag (and guarantee no hw-accel tag) */
if (skb_vlan_tag_present(skb)) {
tci = skb_vlan_tag_get(skb);
- skb->vlan_tci = 0;
+ __vlan_hwaccel_clear_tag(skb);
} else {
/* in-payload vlan tag, pop it */
err = __skb_vlan_pop(skb, &tci);
diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c
index f427a1e00e7e..8ce2a0507970 100644
--- a/net/sched/cls_api.c
+++ b/net/sched/cls_api.c
@@ -25,6 +25,7 @@
#include <linux/kmod.h>
#include <linux/slab.h>
#include <linux/idr.h>
+#include <linux/rhashtable.h>
#include <net/net_namespace.h>
#include <net/sock.h>
#include <net/netlink.h>
@@ -365,6 +366,245 @@ static void tcf_chain_flush(struct tcf_chain *chain)
}
}
+static struct tcf_block *tc_dev_ingress_block(struct net_device *dev)
+{
+ const struct Qdisc_class_ops *cops;
+ struct Qdisc *qdisc;
+
+ if (!dev_ingress_queue(dev))
+ return NULL;
+
+ qdisc = dev_ingress_queue(dev)->qdisc_sleeping;
+ if (!qdisc)
+ return NULL;
+
+ cops = qdisc->ops->cl_ops;
+ if (!cops)
+ return NULL;
+
+ if (!cops->tcf_block)
+ return NULL;
+
+ return cops->tcf_block(qdisc, TC_H_MIN_INGRESS, NULL);
+}
+
+static struct rhashtable indr_setup_block_ht;
+
+struct tc_indr_block_dev {
+ struct rhash_head ht_node;
+ struct net_device *dev;
+ unsigned int refcnt;
+ struct list_head cb_list;
+ struct tcf_block *block;
+};
+
+struct tc_indr_block_cb {
+ struct list_head list;
+ void *cb_priv;
+ tc_indr_block_bind_cb_t *cb;
+ void *cb_ident;
+};
+
+static const struct rhashtable_params tc_indr_setup_block_ht_params = {
+ .key_offset = offsetof(struct tc_indr_block_dev, dev),
+ .head_offset = offsetof(struct tc_indr_block_dev, ht_node),
+ .key_len = sizeof(struct net_device *),
+};
+
+static struct tc_indr_block_dev *
+tc_indr_block_dev_lookup(struct net_device *dev)
+{
+ return rhashtable_lookup_fast(&indr_setup_block_ht, &dev,
+ tc_indr_setup_block_ht_params);
+}
+
+static struct tc_indr_block_dev *tc_indr_block_dev_get(struct net_device *dev)
+{
+ struct tc_indr_block_dev *indr_dev;
+
+ indr_dev = tc_indr_block_dev_lookup(dev);
+ if (indr_dev)
+ goto inc_ref;
+
+ indr_dev = kzalloc(sizeof(*indr_dev), GFP_KERNEL);
+ if (!indr_dev)
+ return NULL;
+
+ INIT_LIST_HEAD(&indr_dev->cb_list);
+ indr_dev->dev = dev;
+ indr_dev->block = tc_dev_ingress_block(dev);
+ if (rhashtable_insert_fast(&indr_setup_block_ht, &indr_dev->ht_node,
+ tc_indr_setup_block_ht_params)) {
+ kfree(indr_dev);
+ return NULL;
+ }
+
+inc_ref:
+ indr_dev->refcnt++;
+ return indr_dev;
+}
+
+static void tc_indr_block_dev_put(struct tc_indr_block_dev *indr_dev)
+{
+ if (--indr_dev->refcnt)
+ return;
+
+ rhashtable_remove_fast(&indr_setup_block_ht, &indr_dev->ht_node,
+ tc_indr_setup_block_ht_params);
+ kfree(indr_dev);
+}
+
+static struct tc_indr_block_cb *
+tc_indr_block_cb_lookup(struct tc_indr_block_dev *indr_dev,
+ tc_indr_block_bind_cb_t *cb, void *cb_ident)
+{
+ struct tc_indr_block_cb *indr_block_cb;
+
+ list_for_each_entry(indr_block_cb, &indr_dev->cb_list, list)
+ if (indr_block_cb->cb == cb &&
+ indr_block_cb->cb_ident == cb_ident)
+ return indr_block_cb;
+ return NULL;
+}
+
+static struct tc_indr_block_cb *
+tc_indr_block_cb_add(struct tc_indr_block_dev *indr_dev, void *cb_priv,
+ tc_indr_block_bind_cb_t *cb, void *cb_ident)
+{
+ struct tc_indr_block_cb *indr_block_cb;
+
+ indr_block_cb = tc_indr_block_cb_lookup(indr_dev, cb, cb_ident);
+ if (indr_block_cb)
+ return ERR_PTR(-EEXIST);
+
+ indr_block_cb = kzalloc(sizeof(*indr_block_cb), GFP_KERNEL);
+ if (!indr_block_cb)
+ return ERR_PTR(-ENOMEM);
+
+ indr_block_cb->cb_priv = cb_priv;
+ indr_block_cb->cb = cb;
+ indr_block_cb->cb_ident = cb_ident;
+ list_add(&indr_block_cb->list, &indr_dev->cb_list);
+
+ return indr_block_cb;
+}
+
+static void tc_indr_block_cb_del(struct tc_indr_block_cb *indr_block_cb)
+{
+ list_del(&indr_block_cb->list);
+ kfree(indr_block_cb);
+}
+
+static void tc_indr_block_ing_cmd(struct tc_indr_block_dev *indr_dev,
+ struct tc_indr_block_cb *indr_block_cb,
+ enum tc_block_command command)
+{
+ struct tc_block_offload bo = {
+ .command = command,
+ .binder_type = TCF_BLOCK_BINDER_TYPE_CLSACT_INGRESS,
+ .block = indr_dev->block,
+ };
+
+ if (!indr_dev->block)
+ return;
+
+ indr_block_cb->cb(indr_dev->dev, indr_block_cb->cb_priv, TC_SETUP_BLOCK,
+ &bo);
+}
+
+int __tc_indr_block_cb_register(struct net_device *dev, void *cb_priv,
+ tc_indr_block_bind_cb_t *cb, void *cb_ident)
+{
+ struct tc_indr_block_cb *indr_block_cb;
+ struct tc_indr_block_dev *indr_dev;
+ int err;
+
+ indr_dev = tc_indr_block_dev_get(dev);
+ if (!indr_dev)
+ return -ENOMEM;
+
+ indr_block_cb = tc_indr_block_cb_add(indr_dev, cb_priv, cb, cb_ident);
+ err = PTR_ERR_OR_ZERO(indr_block_cb);
+ if (err)
+ goto err_dev_put;
+
+ tc_indr_block_ing_cmd(indr_dev, indr_block_cb, TC_BLOCK_BIND);
+ return 0;
+
+err_dev_put:
+ tc_indr_block_dev_put(indr_dev);
+ return err;
+}
+EXPORT_SYMBOL_GPL(__tc_indr_block_cb_register);
+
+int tc_indr_block_cb_register(struct net_device *dev, void *cb_priv,
+ tc_indr_block_bind_cb_t *cb, void *cb_ident)
+{
+ int err;
+
+ rtnl_lock();
+ err = __tc_indr_block_cb_register(dev, cb_priv, cb, cb_ident);
+ rtnl_unlock();
+
+ return err;
+}
+EXPORT_SYMBOL_GPL(tc_indr_block_cb_register);
+
+void __tc_indr_block_cb_unregister(struct net_device *dev,
+ tc_indr_block_bind_cb_t *cb, void *cb_ident)
+{
+ struct tc_indr_block_cb *indr_block_cb;
+ struct tc_indr_block_dev *indr_dev;
+
+ indr_dev = tc_indr_block_dev_lookup(dev);
+ if (!indr_dev)
+ return;
+
+ indr_block_cb = tc_indr_block_cb_lookup(indr_dev, cb, cb_ident);
+ if (!indr_block_cb)
+ return;
+
+ /* Send unbind message if required to free any block cbs. */
+ tc_indr_block_ing_cmd(indr_dev, indr_block_cb, TC_BLOCK_UNBIND);
+ tc_indr_block_cb_del(indr_block_cb);
+ tc_indr_block_dev_put(indr_dev);
+}
+EXPORT_SYMBOL_GPL(__tc_indr_block_cb_unregister);
+
+void tc_indr_block_cb_unregister(struct net_device *dev,
+ tc_indr_block_bind_cb_t *cb, void *cb_ident)
+{
+ rtnl_lock();
+ __tc_indr_block_cb_unregister(dev, cb, cb_ident);
+ rtnl_unlock();
+}
+EXPORT_SYMBOL_GPL(tc_indr_block_cb_unregister);
+
+static void tc_indr_block_call(struct tcf_block *block, struct net_device *dev,
+ struct tcf_block_ext_info *ei,
+ enum tc_block_command command,
+ struct netlink_ext_ack *extack)
+{
+ struct tc_indr_block_cb *indr_block_cb;
+ struct tc_indr_block_dev *indr_dev;
+ struct tc_block_offload bo = {
+ .command = command,
+ .binder_type = ei->binder_type,
+ .block = block,
+ .extack = extack,
+ };
+
+ indr_dev = tc_indr_block_dev_lookup(dev);
+ if (!indr_dev)
+ return;
+
+ indr_dev->block = command == TC_BLOCK_BIND ? block : NULL;
+
+ list_for_each_entry(indr_block_cb, &indr_dev->cb_list, list)
+ indr_block_cb->cb(dev, indr_block_cb->cb_priv, TC_SETUP_BLOCK,
+ &bo);
+}
+
static bool tcf_block_offload_in_use(struct tcf_block *block)
{
return block->offloadcnt;
@@ -406,12 +646,17 @@ static int tcf_block_offload_bind(struct tcf_block *block, struct Qdisc *q,
err = tcf_block_offload_cmd(block, dev, ei, TC_BLOCK_BIND, extack);
if (err == -EOPNOTSUPP)
goto no_offload_dev_inc;
- return err;
+ if (err)
+ return err;
+
+ tc_indr_block_call(block, dev, ei, TC_BLOCK_BIND, extack);
+ return 0;
no_offload_dev_inc:
if (tcf_block_offload_in_use(block))
return -EOPNOTSUPP;
block->nooffloaddevcnt++;
+ tc_indr_block_call(block, dev, ei, TC_BLOCK_BIND, extack);
return 0;
}
@@ -421,6 +666,8 @@ static void tcf_block_offload_unbind(struct tcf_block *block, struct Qdisc *q,
struct net_device *dev = q->dev_queue->dev;
int err;
+ tc_indr_block_call(block, dev, ei, TC_BLOCK_UNBIND, NULL);
+
if (!dev->netdev_ops->ndo_setup_tc)
goto no_offload_dev_dec;
err = tcf_block_offload_cmd(block, dev, ei, TC_BLOCK_UNBIND, NULL);
@@ -1023,29 +1270,6 @@ void tcf_block_cb_unregister(struct tcf_block *block,
}
EXPORT_SYMBOL(tcf_block_cb_unregister);
-static int tcf_block_cb_call(struct tcf_block *block, enum tc_setup_type type,
- void *type_data, bool err_stop)
-{
- struct tcf_block_cb *block_cb;
- int ok_count = 0;
- int err;
-
- /* Make sure all netdevs sharing this block are offload-capable. */
- if (block->nooffloaddevcnt && err_stop)
- return -EOPNOTSUPP;
-
- list_for_each_entry(block_cb, &block->cb_list, list) {
- err = block_cb->cb(type, type_data, block_cb->cb_priv);
- if (err) {
- if (err_stop)
- return err;
- } else {
- ok_count++;
- }
- }
- return ok_count;
-}
-
/* Main classifier routine: scans classifier chain attached
* to this qdisc, (optionally) tests for protocol and asks
* specific classifiers.
@@ -2268,54 +2492,26 @@ int tcf_exts_dump_stats(struct sk_buff *skb, struct tcf_exts *exts)
}
EXPORT_SYMBOL(tcf_exts_dump_stats);
-static int tc_exts_setup_cb_egdev_call(struct tcf_exts *exts,
- enum tc_setup_type type,
- void *type_data, bool err_stop)
+int tc_setup_cb_call(struct tcf_block *block, enum tc_setup_type type,
+ void *type_data, bool err_stop)
{
+ struct tcf_block_cb *block_cb;
int ok_count = 0;
-#ifdef CONFIG_NET_CLS_ACT
- const struct tc_action *a;
- struct net_device *dev;
- int i, ret;
+ int err;
- if (!tcf_exts_has_actions(exts))
- return 0;
+ /* Make sure all netdevs sharing this block are offload-capable. */
+ if (block->nooffloaddevcnt && err_stop)
+ return -EOPNOTSUPP;
- for (i = 0; i < exts->nr_actions; i++) {
- a = exts->actions[i];
- if (!a->ops->get_dev)
- continue;
- dev = a->ops->get_dev(a);
- if (!dev)
- continue;
- ret = tc_setup_cb_egdev_call(dev, type, type_data, err_stop);
- a->ops->put_dev(dev);
- if (ret < 0)
- return ret;
- ok_count += ret;
+ list_for_each_entry(block_cb, &block->cb_list, list) {
+ err = block_cb->cb(type, type_data, block_cb->cb_priv);
+ if (err) {
+ if (err_stop)
+ return err;
+ } else {
+ ok_count++;
+ }
}
-#endif
- return ok_count;
-}
-
-int tc_setup_cb_call(struct tcf_block *block, struct tcf_exts *exts,
- enum tc_setup_type type, void *type_data, bool err_stop)
-{
- int ok_count;
- int ret;
-
- ret = tcf_block_cb_call(block, type, type_data, err_stop);
- if (ret < 0)
- return ret;
- ok_count = ret;
-
- if (!exts || ok_count)
- return ok_count;
- ret = tc_exts_setup_cb_egdev_call(exts, type, type_data, err_stop);
- if (ret < 0)
- return ret;
- ok_count += ret;
-
return ok_count;
}
EXPORT_SYMBOL(tc_setup_cb_call);
@@ -2355,6 +2551,11 @@ static int __init tc_filter_init(void)
if (err)
goto err_register_pernet_subsys;
+ err = rhashtable_init(&indr_setup_block_ht,
+ &tc_indr_setup_block_ht_params);
+ if (err)
+ goto err_rhash_setup_block_ht;
+
rtnl_register(PF_UNSPEC, RTM_NEWTFILTER, tc_new_tfilter, NULL, 0);
rtnl_register(PF_UNSPEC, RTM_DELTFILTER, tc_del_tfilter, NULL, 0);
rtnl_register(PF_UNSPEC, RTM_GETTFILTER, tc_get_tfilter,
@@ -2366,6 +2567,8 @@ static int __init tc_filter_init(void)
return 0;
+err_rhash_setup_block_ht:
+ unregister_pernet_subsys(&tcf_net_ops);
err_register_pernet_subsys:
destroy_workqueue(tc_filter_wq);
return err;
diff --git a/net/sched/cls_bpf.c b/net/sched/cls_bpf.c
index fa6fe2fe0f32..a95cb240a606 100644
--- a/net/sched/cls_bpf.c
+++ b/net/sched/cls_bpf.c
@@ -169,7 +169,7 @@ static int cls_bpf_offload_cmd(struct tcf_proto *tp, struct cls_bpf_prog *prog,
if (oldprog)
tcf_block_offload_dec(block, &oldprog->gen_flags);
- err = tc_setup_cb_call(block, NULL, TC_SETUP_CLSBPF, &cls_bpf, skip_sw);
+ err = tc_setup_cb_call(block, TC_SETUP_CLSBPF, &cls_bpf, skip_sw);
if (prog) {
if (err < 0) {
cls_bpf_offload_cmd(tp, oldprog, prog, extack);
@@ -234,7 +234,7 @@ static void cls_bpf_offload_update_stats(struct tcf_proto *tp,
cls_bpf.name = prog->bpf_name;
cls_bpf.exts_integrated = prog->exts_integrated;
- tc_setup_cb_call(block, NULL, TC_SETUP_CLSBPF, &cls_bpf, false);
+ tc_setup_cb_call(block, TC_SETUP_CLSBPF, &cls_bpf, false);
}
static int cls_bpf_init(struct tcf_proto *tp)
diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c
index 9aada2d0ef06..1eb2e2c31dd5 100644
--- a/net/sched/cls_flower.c
+++ b/net/sched/cls_flower.c
@@ -55,6 +55,8 @@ struct fl_flow_key {
struct flow_dissector_key_ip ip;
struct flow_dissector_key_ip enc_ip;
struct flow_dissector_key_enc_opts enc_opts;
+ struct flow_dissector_key_ports tp_min;
+ struct flow_dissector_key_ports tp_max;
} __aligned(BITS_PER_LONG / 8); /* Ensure that we can do comparisons as longs. */
struct fl_flow_mask_range {
@@ -65,6 +67,7 @@ struct fl_flow_mask_range {
struct fl_flow_mask {
struct fl_flow_key key;
struct fl_flow_mask_range range;
+ u32 flags;
struct rhash_head ht_node;
struct rhashtable ht;
struct rhashtable_params filter_ht_params;
@@ -179,13 +182,89 @@ static void fl_clear_masked_range(struct fl_flow_key *key,
memset(fl_key_get_start(key, mask), 0, fl_mask_range(mask));
}
-static struct cls_fl_filter *fl_lookup(struct fl_flow_mask *mask,
- struct fl_flow_key *mkey)
+static bool fl_range_port_dst_cmp(struct cls_fl_filter *filter,
+ struct fl_flow_key *key,
+ struct fl_flow_key *mkey)
+{
+ __be16 min_mask, max_mask, min_val, max_val;
+
+ min_mask = htons(filter->mask->key.tp_min.dst);
+ max_mask = htons(filter->mask->key.tp_max.dst);
+ min_val = htons(filter->key.tp_min.dst);
+ max_val = htons(filter->key.tp_max.dst);
+
+ if (min_mask && max_mask) {
+ if (htons(key->tp.dst) < min_val ||
+ htons(key->tp.dst) > max_val)
+ return false;
+
+ /* skb does not have min and max values */
+ mkey->tp_min.dst = filter->mkey.tp_min.dst;
+ mkey->tp_max.dst = filter->mkey.tp_max.dst;
+ }
+ return true;
+}
+
+static bool fl_range_port_src_cmp(struct cls_fl_filter *filter,
+ struct fl_flow_key *key,
+ struct fl_flow_key *mkey)
+{
+ __be16 min_mask, max_mask, min_val, max_val;
+
+ min_mask = htons(filter->mask->key.tp_min.src);
+ max_mask = htons(filter->mask->key.tp_max.src);
+ min_val = htons(filter->key.tp_min.src);
+ max_val = htons(filter->key.tp_max.src);
+
+ if (min_mask && max_mask) {
+ if (htons(key->tp.src) < min_val ||
+ htons(key->tp.src) > max_val)
+ return false;
+
+ /* skb does not have min and max values */
+ mkey->tp_min.src = filter->mkey.tp_min.src;
+ mkey->tp_max.src = filter->mkey.tp_max.src;
+ }
+ return true;
+}
+
+static struct cls_fl_filter *__fl_lookup(struct fl_flow_mask *mask,
+ struct fl_flow_key *mkey)
{
return rhashtable_lookup_fast(&mask->ht, fl_key_get_start(mkey, mask),
mask->filter_ht_params);
}
+static struct cls_fl_filter *fl_lookup_range(struct fl_flow_mask *mask,
+ struct fl_flow_key *mkey,
+ struct fl_flow_key *key)
+{
+ struct cls_fl_filter *filter, *f;
+
+ list_for_each_entry_rcu(filter, &mask->filters, list) {
+ if (!fl_range_port_dst_cmp(filter, key, mkey))
+ continue;
+
+ if (!fl_range_port_src_cmp(filter, key, mkey))
+ continue;
+
+ f = __fl_lookup(mask, mkey);
+ if (f)
+ return f;
+ }
+ return NULL;
+}
+
+static struct cls_fl_filter *fl_lookup(struct fl_flow_mask *mask,
+ struct fl_flow_key *mkey,
+ struct fl_flow_key *key)
+{
+ if ((mask->flags & TCA_FLOWER_MASK_FLAGS_RANGE))
+ return fl_lookup_range(mask, mkey, key);
+
+ return __fl_lookup(mask, mkey);
+}
+
static int fl_classify(struct sk_buff *skb, const struct tcf_proto *tp,
struct tcf_result *res)
{
@@ -208,7 +287,7 @@ static int fl_classify(struct sk_buff *skb, const struct tcf_proto *tp,
fl_set_masked_key(&skb_mkey, &skb_key, mask);
- f = fl_lookup(mask, &skb_mkey);
+ f = fl_lookup(mask, &skb_mkey, &skb_key);
if (f && !tc_skip_sw(f->flags)) {
*res = f->res;
return tcf_exts_exec(skb, &f->exts, res);
@@ -289,8 +368,7 @@ static void fl_hw_destroy_filter(struct tcf_proto *tp, struct cls_fl_filter *f,
cls_flower.command = TC_CLSFLOWER_DESTROY;
cls_flower.cookie = (unsigned long) f;
- tc_setup_cb_call(block, &f->exts, TC_SETUP_CLSFLOWER,
- &cls_flower, false);
+ tc_setup_cb_call(block, TC_SETUP_CLSFLOWER, &cls_flower, false);
tcf_block_offload_dec(block, &f->flags);
}
@@ -312,8 +390,7 @@ static int fl_hw_replace_filter(struct tcf_proto *tp,
cls_flower.exts = &f->exts;
cls_flower.classid = f->res.classid;
- err = tc_setup_cb_call(block, &f->exts, TC_SETUP_CLSFLOWER,
- &cls_flower, skip_sw);
+ err = tc_setup_cb_call(block, TC_SETUP_CLSFLOWER, &cls_flower, skip_sw);
if (err < 0) {
fl_hw_destroy_filter(tp, f, NULL);
return err;
@@ -339,8 +416,7 @@ static void fl_hw_update_stats(struct tcf_proto *tp, struct cls_fl_filter *f)
cls_flower.exts = &f->exts;
cls_flower.classid = f->res.classid;
- tc_setup_cb_call(block, &f->exts, TC_SETUP_CLSFLOWER,
- &cls_flower, false);
+ tc_setup_cb_call(block, TC_SETUP_CLSFLOWER, &cls_flower, false);
}
static bool __fl_delete(struct tcf_proto *tp, struct cls_fl_filter *f,
@@ -514,6 +590,31 @@ static void fl_set_key_val(struct nlattr **tb,
memcpy(mask, nla_data(tb[mask_type]), len);
}
+static int fl_set_key_port_range(struct nlattr **tb, struct fl_flow_key *key,
+ struct fl_flow_key *mask)
+{
+ fl_set_key_val(tb, &key->tp_min.dst,
+ TCA_FLOWER_KEY_PORT_DST_MIN, &mask->tp_min.dst,
+ TCA_FLOWER_UNSPEC, sizeof(key->tp_min.dst));
+ fl_set_key_val(tb, &key->tp_max.dst,
+ TCA_FLOWER_KEY_PORT_DST_MAX, &mask->tp_max.dst,
+ TCA_FLOWER_UNSPEC, sizeof(key->tp_max.dst));
+ fl_set_key_val(tb, &key->tp_min.src,
+ TCA_FLOWER_KEY_PORT_SRC_MIN, &mask->tp_min.src,
+ TCA_FLOWER_UNSPEC, sizeof(key->tp_min.src));
+ fl_set_key_val(tb, &key->tp_max.src,
+ TCA_FLOWER_KEY_PORT_SRC_MAX, &mask->tp_max.src,
+ TCA_FLOWER_UNSPEC, sizeof(key->tp_max.src));
+
+ if ((mask->tp_min.dst && mask->tp_max.dst &&
+ htons(key->tp_max.dst) <= htons(key->tp_min.dst)) ||
+ (mask->tp_min.src && mask->tp_max.src &&
+ htons(key->tp_max.src) <= htons(key->tp_min.src)))
+ return -EINVAL;
+
+ return 0;
+}
+
static int fl_set_key_mpls(struct nlattr **tb,
struct flow_dissector_key_mpls *key_val,
struct flow_dissector_key_mpls *key_mask)
@@ -709,11 +810,23 @@ static int fl_set_enc_opt(struct nlattr **tb, struct fl_flow_key *key,
struct netlink_ext_ack *extack)
{
const struct nlattr *nla_enc_key, *nla_opt_key, *nla_opt_msk = NULL;
- int option_len, key_depth, msk_depth = 0;
+ int err, option_len, key_depth, msk_depth = 0;
+
+ err = nla_validate_nested(tb[TCA_FLOWER_KEY_ENC_OPTS],
+ TCA_FLOWER_KEY_ENC_OPTS_MAX,
+ enc_opts_policy, extack);
+ if (err)
+ return err;
nla_enc_key = nla_data(tb[TCA_FLOWER_KEY_ENC_OPTS]);
if (tb[TCA_FLOWER_KEY_ENC_OPTS_MASK]) {
+ err = nla_validate_nested(tb[TCA_FLOWER_KEY_ENC_OPTS_MASK],
+ TCA_FLOWER_KEY_ENC_OPTS_MAX,
+ enc_opts_policy, extack);
+ if (err)
+ return err;
+
nla_opt_msk = nla_data(tb[TCA_FLOWER_KEY_ENC_OPTS_MASK]);
msk_depth = nla_len(tb[TCA_FLOWER_KEY_ENC_OPTS_MASK]);
}
@@ -909,6 +1022,14 @@ static int fl_set_key(struct net *net, struct nlattr **tb,
sizeof(key->arp.tha));
}
+ if (key->basic.ip_proto == IPPROTO_TCP ||
+ key->basic.ip_proto == IPPROTO_UDP ||
+ key->basic.ip_proto == IPPROTO_SCTP) {
+ ret = fl_set_key_port_range(tb, key, mask);
+ if (ret)
+ return ret;
+ }
+
if (tb[TCA_FLOWER_KEY_ENC_IPV4_SRC] ||
tb[TCA_FLOWER_KEY_ENC_IPV4_DST]) {
key->enc_control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS;
@@ -1026,8 +1147,9 @@ static void fl_init_dissector(struct flow_dissector *dissector,
FLOW_DISSECTOR_KEY_IPV4_ADDRS, ipv4);
FL_KEY_SET_IF_MASKED(mask, keys, cnt,
FLOW_DISSECTOR_KEY_IPV6_ADDRS, ipv6);
- FL_KEY_SET_IF_MASKED(mask, keys, cnt,
- FLOW_DISSECTOR_KEY_PORTS, tp);
+ if (FL_KEY_IS_MASKED(mask, tp) ||
+ FL_KEY_IS_MASKED(mask, tp_min) || FL_KEY_IS_MASKED(mask, tp_max))
+ FL_KEY_SET(keys, cnt, FLOW_DISSECTOR_KEY_PORTS, tp);
FL_KEY_SET_IF_MASKED(mask, keys, cnt,
FLOW_DISSECTOR_KEY_IP, ip);
FL_KEY_SET_IF_MASKED(mask, keys, cnt,
@@ -1074,6 +1196,10 @@ static struct fl_flow_mask *fl_create_new_mask(struct cls_fl_head *head,
fl_mask_copy(newmask, mask);
+ if ((newmask->key.tp_min.dst && newmask->key.tp_max.dst) ||
+ (newmask->key.tp_min.src && newmask->key.tp_max.src))
+ newmask->flags |= TCA_FLOWER_MASK_FLAGS_RANGE;
+
err = fl_init_mask_hashtable(newmask);
if (err)
goto errout_free;
@@ -1226,18 +1352,16 @@ static int fl_change(struct net *net, struct sk_buff *in_skb,
if (err)
goto errout_idr;
- if (!tc_skip_sw(fnew->flags)) {
- if (!fold && fl_lookup(fnew->mask, &fnew->mkey)) {
- err = -EEXIST;
- goto errout_mask;
- }
-
- err = rhashtable_insert_fast(&fnew->mask->ht, &fnew->ht_node,
- fnew->mask->filter_ht_params);
- if (err)
- goto errout_mask;
+ if (!fold && __fl_lookup(fnew->mask, &fnew->mkey)) {
+ err = -EEXIST;
+ goto errout_mask;
}
+ err = rhashtable_insert_fast(&fnew->mask->ht, &fnew->ht_node,
+ fnew->mask->filter_ht_params);
+ if (err)
+ goto errout_mask;
+
if (!tc_skip_hw(fnew->flags)) {
err = fl_hw_replace_filter(tp, fnew, extack);
if (err)
@@ -1291,9 +1415,8 @@ static int fl_delete(struct tcf_proto *tp, void *arg, bool *last,
struct cls_fl_head *head = rtnl_dereference(tp->root);
struct cls_fl_filter *f = arg;
- if (!tc_skip_sw(f->flags))
- rhashtable_remove_fast(&f->mask->ht, &f->ht_node,
- f->mask->filter_ht_params);
+ rhashtable_remove_fast(&f->mask->ht, &f->ht_node,
+ f->mask->filter_ht_params);
__fl_delete(tp, f, extack);
*last = list_empty(&head->masks);
return 0;
@@ -1376,8 +1499,7 @@ static void fl_hw_create_tmplt(struct tcf_chain *chain,
/* We don't care if driver (any of them) fails to handle this
* call. It serves just as a hint for it.
*/
- tc_setup_cb_call(block, NULL, TC_SETUP_CLSFLOWER,
- &cls_flower, false);
+ tc_setup_cb_call(block, TC_SETUP_CLSFLOWER, &cls_flower, false);
}
static void fl_hw_destroy_tmplt(struct tcf_chain *chain,
@@ -1390,8 +1512,7 @@ static void fl_hw_destroy_tmplt(struct tcf_chain *chain,
cls_flower.command = TC_CLSFLOWER_TMPLT_DESTROY;
cls_flower.cookie = (unsigned long) tmplt;
- tc_setup_cb_call(block, NULL, TC_SETUP_CLSFLOWER,
- &cls_flower, false);
+ tc_setup_cb_call(block, TC_SETUP_CLSFLOWER, &cls_flower, false);
}
static void *fl_tmplt_create(struct net *net, struct tcf_chain *chain,
@@ -1464,6 +1585,26 @@ static int fl_dump_key_val(struct sk_buff *skb,
return 0;
}
+static int fl_dump_key_port_range(struct sk_buff *skb, struct fl_flow_key *key,
+ struct fl_flow_key *mask)
+{
+ if (fl_dump_key_val(skb, &key->tp_min.dst, TCA_FLOWER_KEY_PORT_DST_MIN,
+ &mask->tp_min.dst, TCA_FLOWER_UNSPEC,
+ sizeof(key->tp_min.dst)) ||
+ fl_dump_key_val(skb, &key->tp_max.dst, TCA_FLOWER_KEY_PORT_DST_MAX,
+ &mask->tp_max.dst, TCA_FLOWER_UNSPEC,
+ sizeof(key->tp_max.dst)) ||
+ fl_dump_key_val(skb, &key->tp_min.src, TCA_FLOWER_KEY_PORT_SRC_MIN,
+ &mask->tp_min.src, TCA_FLOWER_UNSPEC,
+ sizeof(key->tp_min.src)) ||
+ fl_dump_key_val(skb, &key->tp_max.src, TCA_FLOWER_KEY_PORT_SRC_MAX,
+ &mask->tp_max.src, TCA_FLOWER_UNSPEC,
+ sizeof(key->tp_max.src)))
+ return -1;
+
+ return 0;
+}
+
static int fl_dump_key_mpls(struct sk_buff *skb,
struct flow_dissector_key_mpls *mpls_key,
struct flow_dissector_key_mpls *mpls_mask)
@@ -1800,6 +1941,12 @@ static int fl_dump_key(struct sk_buff *skb, struct net *net,
sizeof(key->arp.tha))))
goto nla_put_failure;
+ if ((key->basic.ip_proto == IPPROTO_TCP ||
+ key->basic.ip_proto == IPPROTO_UDP ||
+ key->basic.ip_proto == IPPROTO_SCTP) &&
+ fl_dump_key_port_range(skb, key, mask))
+ goto nla_put_failure;
+
if (key->enc_control.addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS &&
(fl_dump_key_val(skb, &key->enc_ipv4.src,
TCA_FLOWER_KEY_ENC_IPV4_SRC, &mask->enc_ipv4.src,
diff --git a/net/sched/cls_matchall.c b/net/sched/cls_matchall.c
index 856fa79d4ffd..0e408ee9dcec 100644
--- a/net/sched/cls_matchall.c
+++ b/net/sched/cls_matchall.c
@@ -71,7 +71,7 @@ static void mall_destroy_hw_filter(struct tcf_proto *tp,
cls_mall.command = TC_CLSMATCHALL_DESTROY;
cls_mall.cookie = cookie;
- tc_setup_cb_call(block, NULL, TC_SETUP_CLSMATCHALL, &cls_mall, false);
+ tc_setup_cb_call(block, TC_SETUP_CLSMATCHALL, &cls_mall, false);
tcf_block_offload_dec(block, &head->flags);
}
@@ -90,8 +90,7 @@ static int mall_replace_hw_filter(struct tcf_proto *tp,
cls_mall.exts = &head->exts;
cls_mall.cookie = cookie;
- err = tc_setup_cb_call(block, NULL, TC_SETUP_CLSMATCHALL,
- &cls_mall, skip_sw);
+ err = tc_setup_cb_call(block, TC_SETUP_CLSMATCHALL, &cls_mall, skip_sw);
if (err < 0) {
mall_destroy_hw_filter(tp, head, cookie, NULL);
return err;
diff --git a/net/sched/cls_u32.c b/net/sched/cls_u32.c
index 4b28fd44576d..dcea21004604 100644
--- a/net/sched/cls_u32.c
+++ b/net/sched/cls_u32.c
@@ -491,7 +491,7 @@ static void u32_clear_hw_hnode(struct tcf_proto *tp, struct tc_u_hnode *h,
cls_u32.hnode.handle = h->handle;
cls_u32.hnode.prio = h->prio;
- tc_setup_cb_call(block, NULL, TC_SETUP_CLSU32, &cls_u32, false);
+ tc_setup_cb_call(block, TC_SETUP_CLSU32, &cls_u32, false);
}
static int u32_replace_hw_hnode(struct tcf_proto *tp, struct tc_u_hnode *h,
@@ -509,7 +509,7 @@ static int u32_replace_hw_hnode(struct tcf_proto *tp, struct tc_u_hnode *h,
cls_u32.hnode.handle = h->handle;
cls_u32.hnode.prio = h->prio;
- err = tc_setup_cb_call(block, NULL, TC_SETUP_CLSU32, &cls_u32, skip_sw);
+ err = tc_setup_cb_call(block, TC_SETUP_CLSU32, &cls_u32, skip_sw);
if (err < 0) {
u32_clear_hw_hnode(tp, h, NULL);
return err;
@@ -533,7 +533,7 @@ static void u32_remove_hw_knode(struct tcf_proto *tp, struct tc_u_knode *n,
cls_u32.command = TC_CLSU32_DELETE_KNODE;
cls_u32.knode.handle = n->handle;
- tc_setup_cb_call(block, NULL, TC_SETUP_CLSU32, &cls_u32, false);
+ tc_setup_cb_call(block, TC_SETUP_CLSU32, &cls_u32, false);
tcf_block_offload_dec(block, &n->flags);
}
@@ -558,11 +558,12 @@ static int u32_replace_hw_knode(struct tcf_proto *tp, struct tc_u_knode *n,
cls_u32.knode.mask = 0;
#endif
cls_u32.knode.sel = &n->sel;
+ cls_u32.knode.res = &n->res;
cls_u32.knode.exts = &n->exts;
if (n->ht_down)
cls_u32.knode.link_handle = ht->handle;
- err = tc_setup_cb_call(block, NULL, TC_SETUP_CLSU32, &cls_u32, skip_sw);
+ err = tc_setup_cb_call(block, TC_SETUP_CLSU32, &cls_u32, skip_sw);
if (err < 0) {
u32_remove_hw_knode(tp, n, NULL);
return err;
@@ -1206,6 +1207,7 @@ static int u32_reoffload_knode(struct tcf_proto *tp, struct tc_u_knode *n,
cls_u32.knode.mask = 0;
#endif
cls_u32.knode.sel = &n->sel;
+ cls_u32.knode.res = &n->res;
cls_u32.knode.exts = &n->exts;
if (n->ht_down)
cls_u32.knode.link_handle = ht->handle;
diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
index f55bc50cd0a9..187a57e7d601 100644
--- a/net/sched/sch_api.c
+++ b/net/sched/sch_api.c
@@ -335,7 +335,6 @@ out:
static struct Qdisc *qdisc_leaf(struct Qdisc *p, u32 classid)
{
unsigned long cl;
- struct Qdisc *leaf;
const struct Qdisc_class_ops *cops = p->ops->cl_ops;
if (cops == NULL)
@@ -344,8 +343,7 @@ static struct Qdisc *qdisc_leaf(struct Qdisc *p, u32 classid)
if (cl == 0)
return NULL;
- leaf = cops->leaf(p, cl);
- return leaf;
+ return cops->leaf(p, cl);
}
/* Find queueing discipline by name */
@@ -860,6 +858,21 @@ void qdisc_offload_graft_helper(struct net_device *dev, struct Qdisc *sch,
}
EXPORT_SYMBOL(qdisc_offload_graft_helper);
+static void qdisc_offload_graft_root(struct net_device *dev,
+ struct Qdisc *new, struct Qdisc *old,
+ struct netlink_ext_ack *extack)
+{
+ struct tc_root_qopt_offload graft_offload = {
+ .command = TC_ROOT_GRAFT,
+ .handle = new ? new->handle : 0,
+ .ingress = (new && new->flags & TCQ_F_INGRESS) ||
+ (old && old->flags & TCQ_F_INGRESS),
+ };
+
+ qdisc_offload_graft_helper(dev, NULL, new, old,
+ TC_SETUP_ROOT_QDISC, &graft_offload, extack);
+}
+
static int tc_fill_qdisc(struct sk_buff *skb, struct Qdisc *q, u32 clid,
u32 portid, u32 seq, u16 flags, int event)
{
@@ -1026,6 +1039,8 @@ static int qdisc_graft(struct net_device *dev, struct Qdisc *parent,
if (dev->flags & IFF_UP)
dev_deactivate(dev);
+ qdisc_offload_graft_root(dev, new, old, extack);
+
if (new && new->ops->attach)
goto skip;
diff --git a/net/sched/sch_etf.c b/net/sched/sch_etf.c
index 1538d6fa8165..1150f22983df 100644
--- a/net/sched/sch_etf.c
+++ b/net/sched/sch_etf.c
@@ -30,7 +30,7 @@ struct etf_sched_data {
int queue;
s32 delta; /* in ns */
ktime_t last; /* The txtime of the last skb sent to the netdevice. */
- struct rb_root head;
+ struct rb_root_cached head;
struct qdisc_watchdog watchdog;
ktime_t (*get_time)(void);
};
@@ -104,7 +104,7 @@ static struct sk_buff *etf_peek_timesortedlist(struct Qdisc *sch)
struct etf_sched_data *q = qdisc_priv(sch);
struct rb_node *p;
- p = rb_first(&q->head);
+ p = rb_first_cached(&q->head);
if (!p)
return NULL;
@@ -117,8 +117,10 @@ static void reset_watchdog(struct Qdisc *sch)
struct sk_buff *skb = etf_peek_timesortedlist(sch);
ktime_t next;
- if (!skb)
+ if (!skb) {
+ qdisc_watchdog_cancel(&q->watchdog);
return;
+ }
next = ktime_sub_ns(skb->tstamp, q->delta);
qdisc_watchdog_schedule_ns(&q->watchdog, ktime_to_ns(next));
@@ -154,8 +156,9 @@ static int etf_enqueue_timesortedlist(struct sk_buff *nskb, struct Qdisc *sch,
struct sk_buff **to_free)
{
struct etf_sched_data *q = qdisc_priv(sch);
- struct rb_node **p = &q->head.rb_node, *parent = NULL;
+ struct rb_node **p = &q->head.rb_root.rb_node, *parent = NULL;
ktime_t txtime = nskb->tstamp;
+ bool leftmost = true;
if (!is_packet_valid(sch, nskb)) {
report_sock_error(nskb, EINVAL,
@@ -168,13 +171,15 @@ static int etf_enqueue_timesortedlist(struct sk_buff *nskb, struct Qdisc *sch,
parent = *p;
skb = rb_to_skb(parent);
- if (ktime_after(txtime, skb->tstamp))
+ if (ktime_after(txtime, skb->tstamp)) {
p = &parent->rb_right;
- else
+ leftmost = false;
+ } else {
p = &parent->rb_left;
+ }
}
rb_link_node(&nskb->rbnode, parent, p);
- rb_insert_color(&nskb->rbnode, &q->head);
+ rb_insert_color_cached(&nskb->rbnode, &q->head, leftmost);
qdisc_qstats_backlog_inc(sch, nskb);
sch->q.qlen++;
@@ -185,12 +190,42 @@ static int etf_enqueue_timesortedlist(struct sk_buff *nskb, struct Qdisc *sch,
return NET_XMIT_SUCCESS;
}
-static void timesortedlist_erase(struct Qdisc *sch, struct sk_buff *skb,
- bool drop)
+static void timesortedlist_drop(struct Qdisc *sch, struct sk_buff *skb,
+ ktime_t now)
+{
+ struct etf_sched_data *q = qdisc_priv(sch);
+ struct sk_buff *to_free = NULL;
+ struct sk_buff *tmp = NULL;
+
+ skb_rbtree_walk_from_safe(skb, tmp) {
+ if (ktime_after(skb->tstamp, now))
+ break;
+
+ rb_erase_cached(&skb->rbnode, &q->head);
+
+ /* The rbnode field in the skb re-uses these fields, now that
+ * we are done with the rbnode, reset them.
+ */
+ skb->next = NULL;
+ skb->prev = NULL;
+ skb->dev = qdisc_dev(sch);
+
+ report_sock_error(skb, ECANCELED, SO_EE_CODE_TXTIME_MISSED);
+
+ qdisc_qstats_backlog_dec(sch, skb);
+ qdisc_drop(skb, sch, &to_free);
+ qdisc_qstats_overlimit(sch);
+ sch->q.qlen--;
+ }
+
+ kfree_skb_list(to_free);
+}
+
+static void timesortedlist_remove(struct Qdisc *sch, struct sk_buff *skb)
{
struct etf_sched_data *q = qdisc_priv(sch);
- rb_erase(&skb->rbnode, &q->head);
+ rb_erase_cached(&skb->rbnode, &q->head);
/* The rbnode field in the skb re-uses these fields, now that
* we are done with the rbnode, reset them.
@@ -201,19 +236,9 @@ static void timesortedlist_erase(struct Qdisc *sch, struct sk_buff *skb,
qdisc_qstats_backlog_dec(sch, skb);
- if (drop) {
- struct sk_buff *to_free = NULL;
+ qdisc_bstats_update(sch, skb);
- report_sock_error(skb, ECANCELED, SO_EE_CODE_TXTIME_MISSED);
-
- qdisc_drop(skb, sch, &to_free);
- kfree_skb_list(to_free);
- qdisc_qstats_overlimit(sch);
- } else {
- qdisc_bstats_update(sch, skb);
-
- q->last = skb->tstamp;
- }
+ q->last = skb->tstamp;
sch->q.qlen--;
}
@@ -232,7 +257,7 @@ static struct sk_buff *etf_dequeue_timesortedlist(struct Qdisc *sch)
/* Drop if packet has expired while in queue. */
if (ktime_before(skb->tstamp, now)) {
- timesortedlist_erase(sch, skb, true);
+ timesortedlist_drop(sch, skb, now);
skb = NULL;
goto out;
}
@@ -241,7 +266,7 @@ static struct sk_buff *etf_dequeue_timesortedlist(struct Qdisc *sch)
* txtime from deadline to (now + delta).
*/
if (q->deadline_mode) {
- timesortedlist_erase(sch, skb, false);
+ timesortedlist_remove(sch, skb);
skb->tstamp = now;
goto out;
}
@@ -250,7 +275,7 @@ static struct sk_buff *etf_dequeue_timesortedlist(struct Qdisc *sch)
/* Dequeue only if now is within the [txtime - delta, txtime] range. */
if (ktime_after(now, next))
- timesortedlist_erase(sch, skb, false);
+ timesortedlist_remove(sch, skb);
else
skb = NULL;
@@ -386,14 +411,14 @@ static int etf_init(struct Qdisc *sch, struct nlattr *opt,
static void timesortedlist_clear(struct Qdisc *sch)
{
struct etf_sched_data *q = qdisc_priv(sch);
- struct rb_node *p = rb_first(&q->head);
+ struct rb_node *p = rb_first_cached(&q->head);
while (p) {
struct sk_buff *skb = rb_to_skb(p);
p = rb_next(p);
- rb_erase(&skb->rbnode, &q->head);
+ rb_erase_cached(&skb->rbnode, &q->head);
rtnl_kfree_skbs(skb, skb);
sch->q.qlen--;
}
diff --git a/net/sched/sch_fq.c b/net/sched/sch_fq.c
index 4b1af706896c..1a662f2bb7bb 100644
--- a/net/sched/sch_fq.c
+++ b/net/sched/sch_fq.c
@@ -94,6 +94,7 @@ struct fq_sched_data {
u32 flow_refill_delay;
u32 flow_plimit; /* max packets per flow */
unsigned long flow_max_rate; /* optional max rate per flow */
+ u64 ce_threshold;
u32 orphan_mask; /* mask for orphaned skb */
u32 low_rate_threshold;
struct rb_root *fq_root;
@@ -107,6 +108,7 @@ struct fq_sched_data {
u64 stat_gc_flows;
u64 stat_internal_packets;
u64 stat_throttled;
+ u64 stat_ce_mark;
u64 stat_flows_plimit;
u64 stat_pkts_too_long;
u64 stat_allocation_errors;
@@ -412,16 +414,21 @@ static void fq_check_throttled(struct fq_sched_data *q, u64 now)
static struct sk_buff *fq_dequeue(struct Qdisc *sch)
{
struct fq_sched_data *q = qdisc_priv(sch);
- u64 now = ktime_get_ns();
struct fq_flow_head *head;
struct sk_buff *skb;
struct fq_flow *f;
unsigned long rate;
u32 plen;
+ u64 now;
+
+ if (!sch->q.qlen)
+ return NULL;
skb = fq_dequeue_head(sch, &q->internal);
if (skb)
goto out;
+
+ now = ktime_get_ns();
fq_check_throttled(q, now);
begin:
head = &q->new_flows;
@@ -454,6 +461,11 @@ begin:
fq_flow_set_throttled(q, f);
goto begin;
}
+ if (time_next_packet &&
+ (s64)(now - time_next_packet - q->ce_threshold) > 0) {
+ INET_ECN_set_ce(skb);
+ q->stat_ce_mark++;
+ }
}
skb = fq_dequeue_head(sch, f);
@@ -469,22 +481,29 @@ begin:
goto begin;
}
prefetch(&skb->end);
- f->credit -= qdisc_pkt_len(skb);
+ plen = qdisc_pkt_len(skb);
+ f->credit -= plen;
- if (ktime_to_ns(skb->tstamp) || !q->rate_enable)
+ if (!q->rate_enable)
goto out;
rate = q->flow_max_rate;
- if (skb->sk)
- rate = min(skb->sk->sk_pacing_rate, rate);
-
- if (rate <= q->low_rate_threshold) {
- f->credit = 0;
- plen = qdisc_pkt_len(skb);
- } else {
- plen = max(qdisc_pkt_len(skb), q->quantum);
- if (f->credit > 0)
- goto out;
+
+ /* If EDT time was provided for this skb, we need to
+ * update f->time_next_packet only if this qdisc enforces
+ * a flow max rate.
+ */
+ if (!skb->tstamp) {
+ if (skb->sk)
+ rate = min(skb->sk->sk_pacing_rate, rate);
+
+ if (rate <= q->low_rate_threshold) {
+ f->credit = 0;
+ } else {
+ plen = max(plen, q->quantum);
+ if (f->credit > 0)
+ goto out;
+ }
}
if (rate != ~0UL) {
u64 len = (u64)plen * NSEC_PER_SEC;
@@ -650,6 +669,7 @@ static const struct nla_policy fq_policy[TCA_FQ_MAX + 1] = {
[TCA_FQ_BUCKETS_LOG] = { .type = NLA_U32 },
[TCA_FQ_FLOW_REFILL_DELAY] = { .type = NLA_U32 },
[TCA_FQ_LOW_RATE_THRESHOLD] = { .type = NLA_U32 },
+ [TCA_FQ_CE_THRESHOLD] = { .type = NLA_U32 },
};
static int fq_change(struct Qdisc *sch, struct nlattr *opt,
@@ -729,6 +749,10 @@ static int fq_change(struct Qdisc *sch, struct nlattr *opt,
if (tb[TCA_FQ_ORPHAN_MASK])
q->orphan_mask = nla_get_u32(tb[TCA_FQ_ORPHAN_MASK]);
+ if (tb[TCA_FQ_CE_THRESHOLD])
+ q->ce_threshold = (u64)NSEC_PER_USEC *
+ nla_get_u32(tb[TCA_FQ_CE_THRESHOLD]);
+
if (!err) {
sch_tree_unlock(sch);
err = fq_resize(sch, fq_log);
@@ -779,6 +803,10 @@ static int fq_init(struct Qdisc *sch, struct nlattr *opt,
q->fq_trees_log = ilog2(1024);
q->orphan_mask = 1024 - 1;
q->low_rate_threshold = 550000 / 8;
+
+ /* Default ce_threshold of 4294 seconds */
+ q->ce_threshold = (u64)NSEC_PER_USEC * ~0U;
+
qdisc_watchdog_init_clockid(&q->watchdog, sch, CLOCK_MONOTONIC);
if (opt)
@@ -792,6 +820,7 @@ static int fq_init(struct Qdisc *sch, struct nlattr *opt,
static int fq_dump(struct Qdisc *sch, struct sk_buff *skb)
{
struct fq_sched_data *q = qdisc_priv(sch);
+ u64 ce_threshold = q->ce_threshold;
struct nlattr *opts;
opts = nla_nest_start(skb, TCA_OPTIONS);
@@ -800,6 +829,8 @@ static int fq_dump(struct Qdisc *sch, struct sk_buff *skb)
/* TCA_FQ_FLOW_DEFAULT_RATE is not used anymore */
+ do_div(ce_threshold, NSEC_PER_USEC);
+
if (nla_put_u32(skb, TCA_FQ_PLIMIT, sch->limit) ||
nla_put_u32(skb, TCA_FQ_FLOW_PLIMIT, q->flow_plimit) ||
nla_put_u32(skb, TCA_FQ_QUANTUM, q->quantum) ||
@@ -812,6 +843,7 @@ static int fq_dump(struct Qdisc *sch, struct sk_buff *skb)
nla_put_u32(skb, TCA_FQ_ORPHAN_MASK, q->orphan_mask) ||
nla_put_u32(skb, TCA_FQ_LOW_RATE_THRESHOLD,
q->low_rate_threshold) ||
+ nla_put_u32(skb, TCA_FQ_CE_THRESHOLD, (u32)ce_threshold) ||
nla_put_u32(skb, TCA_FQ_BUCKETS_LOG, q->fq_trees_log))
goto nla_put_failure;
@@ -841,6 +873,7 @@ static int fq_dump_stats(struct Qdisc *sch, struct gnet_dump *d)
st.throttled_flows = q->throttled_flows;
st.unthrottle_latency_ns = min_t(unsigned long,
q->unthrottle_latency_ns, ~0U);
+ st.ce_mark = q->stat_ce_mark;
sch_tree_unlock(sch);
return gnet_stats_copy_app(d, &st, sizeof(st));
diff --git a/net/sched/sch_gred.c b/net/sched/sch_gred.c
index 4a042abf844c..234afbf9115b 100644
--- a/net/sched/sch_gred.c
+++ b/net/sched/sch_gred.c
@@ -23,19 +23,23 @@
#include <linux/types.h>
#include <linux/kernel.h>
#include <linux/skbuff.h>
+#include <net/pkt_cls.h>
#include <net/pkt_sched.h>
#include <net/red.h>
#define GRED_DEF_PRIO (MAX_DPs / 2)
#define GRED_VQ_MASK (MAX_DPs - 1)
+#define GRED_VQ_RED_FLAGS (TC_RED_ECN | TC_RED_HARDDROP)
+
struct gred_sched_data;
struct gred_sched;
struct gred_sched_data {
u32 limit; /* HARD maximal queue length */
u32 DP; /* the drop parameters */
- u32 bytesin; /* bytes seen on virtualQ so far*/
+ u32 red_flags; /* virtualQ version of red_flags */
+ u64 bytesin; /* bytes seen on virtualQ so far*/
u32 packetsin; /* packets seen on virtualQ so far*/
u32 backlog; /* bytes on the virtualQ */
u8 prio; /* the prio of this vq */
@@ -139,14 +143,27 @@ static inline void gred_store_wred_set(struct gred_sched *table,
table->wred_set.qidlestart = q->vars.qidlestart;
}
-static inline int gred_use_ecn(struct gred_sched *t)
+static int gred_use_ecn(struct gred_sched_data *q)
+{
+ return q->red_flags & TC_RED_ECN;
+}
+
+static int gred_use_harddrop(struct gred_sched_data *q)
{
- return t->red_flags & TC_RED_ECN;
+ return q->red_flags & TC_RED_HARDDROP;
}
-static inline int gred_use_harddrop(struct gred_sched *t)
+static bool gred_per_vq_red_flags_used(struct gred_sched *table)
{
- return t->red_flags & TC_RED_HARDDROP;
+ unsigned int i;
+
+ /* Local per-vq flags couldn't have been set unless global are 0 */
+ if (table->red_flags)
+ return false;
+ for (i = 0; i < MAX_DPs; i++)
+ if (table->tab[i] && table->tab[i]->red_flags)
+ return true;
+ return false;
}
static int gred_enqueue(struct sk_buff *skb, struct Qdisc *sch,
@@ -212,7 +229,7 @@ static int gred_enqueue(struct sk_buff *skb, struct Qdisc *sch,
case RED_PROB_MARK:
qdisc_qstats_overlimit(sch);
- if (!gred_use_ecn(t) || !INET_ECN_set_ce(skb)) {
+ if (!gred_use_ecn(q) || !INET_ECN_set_ce(skb)) {
q->stats.prob_drop++;
goto congestion_drop;
}
@@ -222,7 +239,7 @@ static int gred_enqueue(struct sk_buff *skb, struct Qdisc *sch,
case RED_HARD_MARK:
qdisc_qstats_overlimit(sch);
- if (gred_use_harddrop(t) || !gred_use_ecn(t) ||
+ if (gred_use_harddrop(q) || !gred_use_ecn(q) ||
!INET_ECN_set_ce(skb)) {
q->stats.forced_drop++;
goto congestion_drop;
@@ -295,15 +312,103 @@ static void gred_reset(struct Qdisc *sch)
}
}
+static void gred_offload(struct Qdisc *sch, enum tc_gred_command command)
+{
+ struct gred_sched *table = qdisc_priv(sch);
+ struct net_device *dev = qdisc_dev(sch);
+ struct tc_gred_qopt_offload opt = {
+ .command = command,
+ .handle = sch->handle,
+ .parent = sch->parent,
+ };
+
+ if (!tc_can_offload(dev) || !dev->netdev_ops->ndo_setup_tc)
+ return;
+
+ if (command == TC_GRED_REPLACE) {
+ unsigned int i;
+
+ opt.set.grio_on = gred_rio_mode(table);
+ opt.set.wred_on = gred_wred_mode(table);
+ opt.set.dp_cnt = table->DPs;
+ opt.set.dp_def = table->def;
+
+ for (i = 0; i < table->DPs; i++) {
+ struct gred_sched_data *q = table->tab[i];
+
+ if (!q)
+ continue;
+ opt.set.tab[i].present = true;
+ opt.set.tab[i].limit = q->limit;
+ opt.set.tab[i].prio = q->prio;
+ opt.set.tab[i].min = q->parms.qth_min >> q->parms.Wlog;
+ opt.set.tab[i].max = q->parms.qth_max >> q->parms.Wlog;
+ opt.set.tab[i].is_ecn = gred_use_ecn(q);
+ opt.set.tab[i].is_harddrop = gred_use_harddrop(q);
+ opt.set.tab[i].probability = q->parms.max_P;
+ opt.set.tab[i].backlog = &q->backlog;
+ }
+ opt.set.qstats = &sch->qstats;
+ }
+
+ dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_QDISC_GRED, &opt);
+}
+
+static int gred_offload_dump_stats(struct Qdisc *sch)
+{
+ struct gred_sched *table = qdisc_priv(sch);
+ struct tc_gred_qopt_offload *hw_stats;
+ unsigned int i;
+ int ret;
+
+ hw_stats = kzalloc(sizeof(*hw_stats), GFP_KERNEL);
+ if (!hw_stats)
+ return -ENOMEM;
+
+ hw_stats->command = TC_GRED_STATS;
+ hw_stats->handle = sch->handle;
+ hw_stats->parent = sch->parent;
+
+ for (i = 0; i < MAX_DPs; i++)
+ if (table->tab[i])
+ hw_stats->stats.xstats[i] = &table->tab[i]->stats;
+
+ ret = qdisc_offload_dump_helper(sch, TC_SETUP_QDISC_GRED, hw_stats);
+ /* Even if driver returns failure adjust the stats - in case offload
+ * ended but driver still wants to adjust the values.
+ */
+ for (i = 0; i < MAX_DPs; i++) {
+ if (!table->tab[i])
+ continue;
+ table->tab[i]->packetsin += hw_stats->stats.bstats[i].packets;
+ table->tab[i]->bytesin += hw_stats->stats.bstats[i].bytes;
+ table->tab[i]->backlog += hw_stats->stats.qstats[i].backlog;
+
+ _bstats_update(&sch->bstats,
+ hw_stats->stats.bstats[i].bytes,
+ hw_stats->stats.bstats[i].packets);
+ sch->qstats.qlen += hw_stats->stats.qstats[i].qlen;
+ sch->qstats.backlog += hw_stats->stats.qstats[i].backlog;
+ sch->qstats.drops += hw_stats->stats.qstats[i].drops;
+ sch->qstats.requeues += hw_stats->stats.qstats[i].requeues;
+ sch->qstats.overlimits += hw_stats->stats.qstats[i].overlimits;
+ }
+
+ kfree(hw_stats);
+ return ret;
+}
+
static inline void gred_destroy_vq(struct gred_sched_data *q)
{
kfree(q);
}
-static inline int gred_change_table_def(struct Qdisc *sch, struct nlattr *dps)
+static int gred_change_table_def(struct Qdisc *sch, struct nlattr *dps,
+ struct netlink_ext_ack *extack)
{
struct gred_sched *table = qdisc_priv(sch);
struct tc_gred_sopt *sopt;
+ bool red_flags_changed;
int i;
if (!dps)
@@ -311,13 +416,28 @@ static inline int gred_change_table_def(struct Qdisc *sch, struct nlattr *dps)
sopt = nla_data(dps);
- if (sopt->DPs > MAX_DPs || sopt->DPs == 0 ||
- sopt->def_DP >= sopt->DPs)
+ if (sopt->DPs > MAX_DPs) {
+ NL_SET_ERR_MSG_MOD(extack, "number of virtual queues too high");
+ return -EINVAL;
+ }
+ if (sopt->DPs == 0) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "number of virtual queues can't be 0");
+ return -EINVAL;
+ }
+ if (sopt->def_DP >= sopt->DPs) {
+ NL_SET_ERR_MSG_MOD(extack, "default virtual queue above virtual queue count");
return -EINVAL;
+ }
+ if (sopt->flags && gred_per_vq_red_flags_used(table)) {
+ NL_SET_ERR_MSG_MOD(extack, "can't set per-Qdisc RED flags when per-virtual queue flags are used");
+ return -EINVAL;
+ }
sch_tree_lock(sch);
table->DPs = sopt->DPs;
table->def = sopt->def_DP;
+ red_flags_changed = table->red_flags != sopt->flags;
table->red_flags = sopt->flags;
/*
@@ -337,6 +457,12 @@ static inline int gred_change_table_def(struct Qdisc *sch, struct nlattr *dps)
gred_disable_wred_mode(table);
}
+ if (red_flags_changed)
+ for (i = 0; i < table->DPs; i++)
+ if (table->tab[i])
+ table->tab[i]->red_flags =
+ table->red_flags & GRED_VQ_RED_FLAGS;
+
for (i = table->DPs; i < MAX_DPs; i++) {
if (table->tab[i]) {
pr_warn("GRED: Warning: Destroying shadowed VQ 0x%x\n",
@@ -346,25 +472,30 @@ static inline int gred_change_table_def(struct Qdisc *sch, struct nlattr *dps)
}
}
+ gred_offload(sch, TC_GRED_REPLACE);
return 0;
}
static inline int gred_change_vq(struct Qdisc *sch, int dp,
struct tc_gred_qopt *ctl, int prio,
u8 *stab, u32 max_P,
- struct gred_sched_data **prealloc)
+ struct gred_sched_data **prealloc,
+ struct netlink_ext_ack *extack)
{
struct gred_sched *table = qdisc_priv(sch);
struct gred_sched_data *q = table->tab[dp];
- if (!red_check_params(ctl->qth_min, ctl->qth_max, ctl->Wlog))
+ if (!red_check_params(ctl->qth_min, ctl->qth_max, ctl->Wlog)) {
+ NL_SET_ERR_MSG_MOD(extack, "invalid RED parameters");
return -EINVAL;
+ }
if (!q) {
table->tab[dp] = q = *prealloc;
*prealloc = NULL;
if (!q)
return -ENOMEM;
+ q->red_flags = table->red_flags & GRED_VQ_RED_FLAGS;
}
q->DP = dp;
@@ -384,14 +515,127 @@ static inline int gred_change_vq(struct Qdisc *sch, int dp,
return 0;
}
+static const struct nla_policy gred_vq_policy[TCA_GRED_VQ_MAX + 1] = {
+ [TCA_GRED_VQ_DP] = { .type = NLA_U32 },
+ [TCA_GRED_VQ_FLAGS] = { .type = NLA_U32 },
+};
+
+static const struct nla_policy gred_vqe_policy[TCA_GRED_VQ_ENTRY_MAX + 1] = {
+ [TCA_GRED_VQ_ENTRY] = { .type = NLA_NESTED },
+};
+
static const struct nla_policy gred_policy[TCA_GRED_MAX + 1] = {
[TCA_GRED_PARMS] = { .len = sizeof(struct tc_gred_qopt) },
[TCA_GRED_STAB] = { .len = 256 },
[TCA_GRED_DPS] = { .len = sizeof(struct tc_gred_sopt) },
[TCA_GRED_MAX_P] = { .type = NLA_U32 },
[TCA_GRED_LIMIT] = { .type = NLA_U32 },
+ [TCA_GRED_VQ_LIST] = { .type = NLA_NESTED },
};
+static void gred_vq_apply(struct gred_sched *table, const struct nlattr *entry)
+{
+ struct nlattr *tb[TCA_GRED_VQ_MAX + 1];
+ u32 dp;
+
+ nla_parse_nested(tb, TCA_GRED_VQ_MAX, entry, gred_vq_policy, NULL);
+
+ dp = nla_get_u32(tb[TCA_GRED_VQ_DP]);
+
+ if (tb[TCA_GRED_VQ_FLAGS])
+ table->tab[dp]->red_flags = nla_get_u32(tb[TCA_GRED_VQ_FLAGS]);
+}
+
+static void gred_vqs_apply(struct gred_sched *table, struct nlattr *vqs)
+{
+ const struct nlattr *attr;
+ int rem;
+
+ nla_for_each_nested(attr, vqs, rem) {
+ switch (nla_type(attr)) {
+ case TCA_GRED_VQ_ENTRY:
+ gred_vq_apply(table, attr);
+ break;
+ }
+ }
+}
+
+static int gred_vq_validate(struct gred_sched *table, u32 cdp,
+ const struct nlattr *entry,
+ struct netlink_ext_ack *extack)
+{
+ struct nlattr *tb[TCA_GRED_VQ_MAX + 1];
+ int err;
+ u32 dp;
+
+ err = nla_parse_nested(tb, TCA_GRED_VQ_MAX, entry, gred_vq_policy,
+ extack);
+ if (err < 0)
+ return err;
+
+ if (!tb[TCA_GRED_VQ_DP]) {
+ NL_SET_ERR_MSG_MOD(extack, "Virtual queue with no index specified");
+ return -EINVAL;
+ }
+ dp = nla_get_u32(tb[TCA_GRED_VQ_DP]);
+ if (dp >= table->DPs) {
+ NL_SET_ERR_MSG_MOD(extack, "Virtual queue with index out of bounds");
+ return -EINVAL;
+ }
+ if (dp != cdp && !table->tab[dp]) {
+ NL_SET_ERR_MSG_MOD(extack, "Virtual queue not yet instantiated");
+ return -EINVAL;
+ }
+
+ if (tb[TCA_GRED_VQ_FLAGS]) {
+ u32 red_flags = nla_get_u32(tb[TCA_GRED_VQ_FLAGS]);
+
+ if (table->red_flags && table->red_flags != red_flags) {
+ NL_SET_ERR_MSG_MOD(extack, "can't change per-virtual queue RED flags when per-Qdisc flags are used");
+ return -EINVAL;
+ }
+ if (red_flags & ~GRED_VQ_RED_FLAGS) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "invalid RED flags specified");
+ return -EINVAL;
+ }
+ }
+
+ return 0;
+}
+
+static int gred_vqs_validate(struct gred_sched *table, u32 cdp,
+ struct nlattr *vqs, struct netlink_ext_ack *extack)
+{
+ const struct nlattr *attr;
+ int rem, err;
+
+ err = nla_validate_nested(vqs, TCA_GRED_VQ_ENTRY_MAX,
+ gred_vqe_policy, extack);
+ if (err < 0)
+ return err;
+
+ nla_for_each_nested(attr, vqs, rem) {
+ switch (nla_type(attr)) {
+ case TCA_GRED_VQ_ENTRY:
+ err = gred_vq_validate(table, cdp, attr, extack);
+ if (err)
+ return err;
+ break;
+ default:
+ NL_SET_ERR_MSG_MOD(extack, "GRED_VQ_LIST can contain only entry attributes");
+ return -EINVAL;
+ }
+ }
+
+ if (rem > 0) {
+ NL_SET_ERR_MSG_MOD(extack, "Trailing data after parsing virtual queue list");
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
static int gred_change(struct Qdisc *sch, struct nlattr *opt,
struct netlink_ext_ack *extack)
{
@@ -406,29 +650,39 @@ static int gred_change(struct Qdisc *sch, struct nlattr *opt,
if (opt == NULL)
return -EINVAL;
- err = nla_parse_nested(tb, TCA_GRED_MAX, opt, gred_policy, NULL);
+ err = nla_parse_nested(tb, TCA_GRED_MAX, opt, gred_policy, extack);
if (err < 0)
return err;
if (tb[TCA_GRED_PARMS] == NULL && tb[TCA_GRED_STAB] == NULL) {
if (tb[TCA_GRED_LIMIT] != NULL)
sch->limit = nla_get_u32(tb[TCA_GRED_LIMIT]);
- return gred_change_table_def(sch, tb[TCA_GRED_DPS]);
+ return gred_change_table_def(sch, tb[TCA_GRED_DPS], extack);
}
if (tb[TCA_GRED_PARMS] == NULL ||
tb[TCA_GRED_STAB] == NULL ||
- tb[TCA_GRED_LIMIT] != NULL)
+ tb[TCA_GRED_LIMIT] != NULL) {
+ NL_SET_ERR_MSG_MOD(extack, "can't configure Qdisc and virtual queue at the same time");
return -EINVAL;
+ }
max_P = tb[TCA_GRED_MAX_P] ? nla_get_u32(tb[TCA_GRED_MAX_P]) : 0;
- err = -EINVAL;
ctl = nla_data(tb[TCA_GRED_PARMS]);
stab = nla_data(tb[TCA_GRED_STAB]);
- if (ctl->DP >= table->DPs)
- goto errout;
+ if (ctl->DP >= table->DPs) {
+ NL_SET_ERR_MSG_MOD(extack, "virtual queue index above virtual queue count");
+ return -EINVAL;
+ }
+
+ if (tb[TCA_GRED_VQ_LIST]) {
+ err = gred_vqs_validate(table, ctl->DP, tb[TCA_GRED_VQ_LIST],
+ extack);
+ if (err)
+ return err;
+ }
if (gred_rio_mode(table)) {
if (ctl->prio == 0) {
@@ -448,9 +702,13 @@ static int gred_change(struct Qdisc *sch, struct nlattr *opt,
prealloc = kzalloc(sizeof(*prealloc), GFP_KERNEL);
sch_tree_lock(sch);
- err = gred_change_vq(sch, ctl->DP, ctl, prio, stab, max_P, &prealloc);
+ err = gred_change_vq(sch, ctl->DP, ctl, prio, stab, max_P, &prealloc,
+ extack);
if (err < 0)
- goto errout_locked;
+ goto err_unlock_free;
+
+ if (tb[TCA_GRED_VQ_LIST])
+ gred_vqs_apply(table, tb[TCA_GRED_VQ_LIST]);
if (gred_rio_mode(table)) {
gred_disable_wred_mode(table);
@@ -458,12 +716,15 @@ static int gred_change(struct Qdisc *sch, struct nlattr *opt,
gred_enable_wred_mode(table);
}
- err = 0;
+ sch_tree_unlock(sch);
+ kfree(prealloc);
+
+ gred_offload(sch, TC_GRED_REPLACE);
+ return 0;
-errout_locked:
+err_unlock_free:
sch_tree_unlock(sch);
kfree(prealloc);
-errout:
return err;
}
@@ -476,12 +737,15 @@ static int gred_init(struct Qdisc *sch, struct nlattr *opt,
if (!opt)
return -EINVAL;
- err = nla_parse_nested(tb, TCA_GRED_MAX, opt, gred_policy, NULL);
+ err = nla_parse_nested(tb, TCA_GRED_MAX, opt, gred_policy, extack);
if (err < 0)
return err;
- if (tb[TCA_GRED_PARMS] || tb[TCA_GRED_STAB])
+ if (tb[TCA_GRED_PARMS] || tb[TCA_GRED_STAB]) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "virtual queue configuration can't be specified at initialization time");
return -EINVAL;
+ }
if (tb[TCA_GRED_LIMIT])
sch->limit = nla_get_u32(tb[TCA_GRED_LIMIT]);
@@ -489,13 +753,13 @@ static int gred_init(struct Qdisc *sch, struct nlattr *opt,
sch->limit = qdisc_dev(sch)->tx_queue_len
* psched_mtu(qdisc_dev(sch));
- return gred_change_table_def(sch, tb[TCA_GRED_DPS]);
+ return gred_change_table_def(sch, tb[TCA_GRED_DPS], extack);
}
static int gred_dump(struct Qdisc *sch, struct sk_buff *skb)
{
struct gred_sched *table = qdisc_priv(sch);
- struct nlattr *parms, *opts = NULL;
+ struct nlattr *parms, *vqs, *opts = NULL;
int i;
u32 max_p[MAX_DPs];
struct tc_gred_sopt sopt = {
@@ -505,6 +769,9 @@ static int gred_dump(struct Qdisc *sch, struct sk_buff *skb)
.flags = table->red_flags,
};
+ if (gred_offload_dump_stats(sch))
+ goto nla_put_failure;
+
opts = nla_nest_start(skb, TCA_OPTIONS);
if (opts == NULL)
goto nla_put_failure;
@@ -522,6 +789,7 @@ static int gred_dump(struct Qdisc *sch, struct sk_buff *skb)
if (nla_put_u32(skb, TCA_GRED_LIMIT, sch->limit))
goto nla_put_failure;
+ /* Old style all-in-one dump of VQs */
parms = nla_nest_start(skb, TCA_GRED_PARMS);
if (parms == NULL)
goto nla_put_failure;
@@ -572,6 +840,58 @@ append_opt:
nla_nest_end(skb, parms);
+ /* Dump the VQs again, in more structured way */
+ vqs = nla_nest_start(skb, TCA_GRED_VQ_LIST);
+ if (!vqs)
+ goto nla_put_failure;
+
+ for (i = 0; i < MAX_DPs; i++) {
+ struct gred_sched_data *q = table->tab[i];
+ struct nlattr *vq;
+
+ if (!q)
+ continue;
+
+ vq = nla_nest_start(skb, TCA_GRED_VQ_ENTRY);
+ if (!vq)
+ goto nla_put_failure;
+
+ if (nla_put_u32(skb, TCA_GRED_VQ_DP, q->DP))
+ goto nla_put_failure;
+
+ if (nla_put_u32(skb, TCA_GRED_VQ_FLAGS, q->red_flags))
+ goto nla_put_failure;
+
+ /* Stats */
+ if (nla_put_u64_64bit(skb, TCA_GRED_VQ_STAT_BYTES, q->bytesin,
+ TCA_GRED_VQ_PAD))
+ goto nla_put_failure;
+ if (nla_put_u32(skb, TCA_GRED_VQ_STAT_PACKETS, q->packetsin))
+ goto nla_put_failure;
+ if (nla_put_u32(skb, TCA_GRED_VQ_STAT_BACKLOG,
+ gred_backlog(table, q, sch)))
+ goto nla_put_failure;
+ if (nla_put_u32(skb, TCA_GRED_VQ_STAT_PROB_DROP,
+ q->stats.prob_drop))
+ goto nla_put_failure;
+ if (nla_put_u32(skb, TCA_GRED_VQ_STAT_PROB_MARK,
+ q->stats.prob_mark))
+ goto nla_put_failure;
+ if (nla_put_u32(skb, TCA_GRED_VQ_STAT_FORCED_DROP,
+ q->stats.forced_drop))
+ goto nla_put_failure;
+ if (nla_put_u32(skb, TCA_GRED_VQ_STAT_FORCED_MARK,
+ q->stats.forced_mark))
+ goto nla_put_failure;
+ if (nla_put_u32(skb, TCA_GRED_VQ_STAT_PDROP, q->stats.pdrop))
+ goto nla_put_failure;
+ if (nla_put_u32(skb, TCA_GRED_VQ_STAT_OTHER, q->stats.other))
+ goto nla_put_failure;
+
+ nla_nest_end(skb, vq);
+ }
+ nla_nest_end(skb, vqs);
+
return nla_nest_end(skb, opts);
nla_put_failure:
@@ -588,6 +908,7 @@ static void gred_destroy(struct Qdisc *sch)
if (table->tab[i])
gred_destroy_vq(table->tab[i]);
}
+ gred_offload(sch, TC_GRED_DESTROY);
}
static struct Qdisc_ops gred_qdisc_ops __read_mostly = {
diff --git a/net/sched/sch_mq.c b/net/sched/sch_mq.c
index 1db5c1bf6ddd..203659bc3906 100644
--- a/net/sched/sch_mq.c
+++ b/net/sched/sch_mq.c
@@ -193,6 +193,7 @@ static int mq_graft(struct Qdisc *sch, unsigned long cl, struct Qdisc *new,
struct Qdisc **old, struct netlink_ext_ack *extack)
{
struct netdev_queue *dev_queue = mq_queue_get(sch, cl);
+ struct tc_mq_qopt_offload graft_offload;
struct net_device *dev = qdisc_dev(sch);
if (dev->flags & IFF_UP)
@@ -203,6 +204,14 @@ static int mq_graft(struct Qdisc *sch, unsigned long cl, struct Qdisc *new,
new->flags |= TCQ_F_ONETXQUEUE | TCQ_F_NOPARENT;
if (dev->flags & IFF_UP)
dev_activate(dev);
+
+ graft_offload.handle = sch->handle;
+ graft_offload.graft_params.queue = cl - 1;
+ graft_offload.graft_params.child_handle = new ? new->handle : 0;
+ graft_offload.command = TC_MQ_GRAFT;
+
+ qdisc_offload_graft_helper(qdisc_dev(sch), sch, new, *old,
+ TC_SETUP_QDISC_MQ, &graft_offload, extack);
return 0;
}
diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c
index 57b3ad9394ad..75046ec72144 100644
--- a/net/sched/sch_netem.c
+++ b/net/sched/sch_netem.c
@@ -77,6 +77,10 @@ struct netem_sched_data {
/* internal t(ime)fifo qdisc uses t_root and sch->limit */
struct rb_root t_root;
+ /* a linear queue; reduces rbtree rebalancing when jitter is low */
+ struct sk_buff *t_head;
+ struct sk_buff *t_tail;
+
/* optional qdisc for classful handling (NULL at netem init) */
struct Qdisc *qdisc;
@@ -369,26 +373,39 @@ static void tfifo_reset(struct Qdisc *sch)
rb_erase(&skb->rbnode, &q->t_root);
rtnl_kfree_skbs(skb, skb);
}
+
+ rtnl_kfree_skbs(q->t_head, q->t_tail);
+ q->t_head = NULL;
+ q->t_tail = NULL;
}
static void tfifo_enqueue(struct sk_buff *nskb, struct Qdisc *sch)
{
struct netem_sched_data *q = qdisc_priv(sch);
u64 tnext = netem_skb_cb(nskb)->time_to_send;
- struct rb_node **p = &q->t_root.rb_node, *parent = NULL;
- while (*p) {
- struct sk_buff *skb;
-
- parent = *p;
- skb = rb_to_skb(parent);
- if (tnext >= netem_skb_cb(skb)->time_to_send)
- p = &parent->rb_right;
+ if (!q->t_tail || tnext >= netem_skb_cb(q->t_tail)->time_to_send) {
+ if (q->t_tail)
+ q->t_tail->next = nskb;
else
- p = &parent->rb_left;
+ q->t_head = nskb;
+ q->t_tail = nskb;
+ } else {
+ struct rb_node **p = &q->t_root.rb_node, *parent = NULL;
+
+ while (*p) {
+ struct sk_buff *skb;
+
+ parent = *p;
+ skb = rb_to_skb(parent);
+ if (tnext >= netem_skb_cb(skb)->time_to_send)
+ p = &parent->rb_right;
+ else
+ p = &parent->rb_left;
+ }
+ rb_link_node(&nskb->rbnode, parent, p);
+ rb_insert_color(&nskb->rbnode, &q->t_root);
}
- rb_link_node(&nskb->rbnode, parent, p);
- rb_insert_color(&nskb->rbnode, &q->t_root);
sch->q.qlen++;
}
@@ -431,6 +448,9 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch,
int count = 1;
int rc = NET_XMIT_SUCCESS;
+ /* Do not fool qdisc_drop_all() */
+ skb->prev = NULL;
+
/* Random duplication */
if (q->duplicate && q->duplicate >= get_crandom(&q->dup_cor))
++count;
@@ -530,9 +550,16 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch,
t_skb = skb_rb_last(&q->t_root);
t_last = netem_skb_cb(t_skb);
if (!last ||
- t_last->time_to_send > last->time_to_send) {
+ t_last->time_to_send > last->time_to_send)
+ last = t_last;
+ }
+ if (q->t_tail) {
+ struct netem_skb_cb *t_last =
+ netem_skb_cb(q->t_tail);
+
+ if (!last ||
+ t_last->time_to_send > last->time_to_send)
last = t_last;
- }
}
if (last) {
@@ -611,11 +638,38 @@ static void get_slot_next(struct netem_sched_data *q, u64 now)
q->slot.bytes_left = q->slot_config.max_bytes;
}
+static struct sk_buff *netem_peek(struct netem_sched_data *q)
+{
+ struct sk_buff *skb = skb_rb_first(&q->t_root);
+ u64 t1, t2;
+
+ if (!skb)
+ return q->t_head;
+ if (!q->t_head)
+ return skb;
+
+ t1 = netem_skb_cb(skb)->time_to_send;
+ t2 = netem_skb_cb(q->t_head)->time_to_send;
+ if (t1 < t2)
+ return skb;
+ return q->t_head;
+}
+
+static void netem_erase_head(struct netem_sched_data *q, struct sk_buff *skb)
+{
+ if (skb == q->t_head) {
+ q->t_head = skb->next;
+ if (!q->t_head)
+ q->t_tail = NULL;
+ } else {
+ rb_erase(&skb->rbnode, &q->t_root);
+ }
+}
+
static struct sk_buff *netem_dequeue(struct Qdisc *sch)
{
struct netem_sched_data *q = qdisc_priv(sch);
struct sk_buff *skb;
- struct rb_node *p;
tfifo_dequeue:
skb = __qdisc_dequeue_head(&sch->q);
@@ -625,20 +679,18 @@ deliver:
qdisc_bstats_update(sch, skb);
return skb;
}
- p = rb_first(&q->t_root);
- if (p) {
+ skb = netem_peek(q);
+ if (skb) {
u64 time_to_send;
u64 now = ktime_get_ns();
- skb = rb_to_skb(p);
-
/* if more time remaining? */
time_to_send = netem_skb_cb(skb)->time_to_send;
if (q->slot.slot_next && q->slot.slot_next < time_to_send)
get_slot_next(q, now);
- if (time_to_send <= now && q->slot.slot_next <= now) {
- rb_erase(p, &q->t_root);
+ if (time_to_send <= now && q->slot.slot_next <= now) {
+ netem_erase_head(q, skb);
sch->q.qlen--;
qdisc_qstats_backlog_dec(sch, skb);
skb->next = NULL;
@@ -648,15 +700,6 @@ deliver:
*/
skb->dev = qdisc_dev(sch);
-#ifdef CONFIG_NET_CLS_ACT
- /*
- * If it's at ingress let's pretend the delay is
- * from the network (tstamp will be updated).
- */
- if (skb->tc_redirected && skb->tc_from_ingress)
- skb->tstamp = 0;
-#endif
-
if (q->slot.slot_next) {
q->slot.packets_left--;
q->slot.bytes_left -= qdisc_pkt_len(skb);
diff --git a/net/sched/sch_red.c b/net/sched/sch_red.c
index 7682f7a618a1..9df9942340ea 100644
--- a/net/sched/sch_red.c
+++ b/net/sched/sch_red.c
@@ -166,7 +166,9 @@ static int red_offload(struct Qdisc *sch, bool enable)
opt.set.min = q->parms.qth_min >> q->parms.Wlog;
opt.set.max = q->parms.qth_max >> q->parms.Wlog;
opt.set.probability = q->parms.max_P;
+ opt.set.limit = q->limit;
opt.set.is_ecn = red_use_ecn(q);
+ opt.set.is_harddrop = red_use_harddrop(q);
opt.set.qstats = &sch->qstats;
} else {
opt.command = TC_RED_DESTROY;
@@ -366,6 +368,21 @@ static int red_dump_class(struct Qdisc *sch, unsigned long cl,
return 0;
}
+static void red_graft_offload(struct Qdisc *sch,
+ struct Qdisc *new, struct Qdisc *old,
+ struct netlink_ext_ack *extack)
+{
+ struct tc_red_qopt_offload graft_offload = {
+ .handle = sch->handle,
+ .parent = sch->parent,
+ .child_handle = new->handle,
+ .command = TC_RED_GRAFT,
+ };
+
+ qdisc_offload_graft_helper(qdisc_dev(sch), sch, new, old,
+ TC_SETUP_QDISC_RED, &graft_offload, extack);
+}
+
static int red_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
struct Qdisc **old, struct netlink_ext_ack *extack)
{
@@ -375,6 +392,8 @@ static int red_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
new = &noop_qdisc;
*old = qdisc_replace(sch, new, &q->qdisc);
+
+ red_graft_offload(sch, new, *old, extack);
return 0;
}
diff --git a/net/sctp/associola.c b/net/sctp/associola.c
index 6a28b96e779e..201c888604e4 100644
--- a/net/sctp/associola.c
+++ b/net/sctp/associola.c
@@ -118,9 +118,6 @@ static struct sctp_association *sctp_association_init(
asoc->flowlabel = sp->flowlabel;
asoc->dscp = sp->dscp;
- /* Initialize default path MTU. */
- asoc->pathmtu = sp->pathmtu;
-
/* Set association default SACK delay */
asoc->sackdelay = msecs_to_jiffies(sp->sackdelay);
asoc->sackfreq = sp->sackfreq;
@@ -135,6 +132,8 @@ static struct sctp_association *sctp_association_init(
*/
asoc->max_burst = sp->max_burst;
+ asoc->subscribe = sp->subscribe;
+
/* initialize association timers */
asoc->timeouts[SCTP_EVENT_TIMEOUT_T1_COOKIE] = asoc->rto_initial;
asoc->timeouts[SCTP_EVENT_TIMEOUT_T1_INIT] = asoc->rto_initial;
@@ -252,6 +251,10 @@ static struct sctp_association *sctp_association_init(
0, gfp))
goto fail_init;
+ /* Initialize default path MTU. */
+ asoc->pathmtu = sp->pathmtu;
+ sctp_assoc_update_frag_point(asoc);
+
/* Assume that peer would support both address types unless we are
* told otherwise.
*/
@@ -434,7 +437,7 @@ static void sctp_association_destroy(struct sctp_association *asoc)
WARN_ON(atomic_read(&asoc->rmem_alloc));
- kfree(asoc);
+ kfree_rcu(asoc, rcu);
SCTP_DBG_OBJCNT_DEC(assoc);
}
diff --git a/net/sctp/bind_addr.c b/net/sctp/bind_addr.c
index 7df3704982f5..ebf28adba789 100644
--- a/net/sctp/bind_addr.c
+++ b/net/sctp/bind_addr.c
@@ -337,6 +337,34 @@ int sctp_bind_addr_match(struct sctp_bind_addr *bp,
return match;
}
+int sctp_bind_addrs_check(struct sctp_sock *sp,
+ struct sctp_sock *sp2, int cnt2)
+{
+ struct sctp_bind_addr *bp2 = &sp2->ep->base.bind_addr;
+ struct sctp_bind_addr *bp = &sp->ep->base.bind_addr;
+ struct sctp_sockaddr_entry *laddr, *laddr2;
+ bool exist = false;
+ int cnt = 0;
+
+ rcu_read_lock();
+ list_for_each_entry_rcu(laddr, &bp->address_list, list) {
+ list_for_each_entry_rcu(laddr2, &bp2->address_list, list) {
+ if (sp->pf->af->cmp_addr(&laddr->a, &laddr2->a) &&
+ laddr->valid && laddr2->valid) {
+ exist = true;
+ goto next;
+ }
+ }
+ cnt = 0;
+ break;
+next:
+ cnt++;
+ }
+ rcu_read_unlock();
+
+ return (cnt == cnt2) ? 0 : (exist ? -EEXIST : 1);
+}
+
/* Does the address 'addr' conflict with any addresses in
* the bp.
*/
diff --git a/net/sctp/chunk.c b/net/sctp/chunk.c
index ce8087846f05..64bef313d436 100644
--- a/net/sctp/chunk.c
+++ b/net/sctp/chunk.c
@@ -86,11 +86,10 @@ void sctp_datamsg_free(struct sctp_datamsg *msg)
/* Final destructruction of datamsg memory. */
static void sctp_datamsg_destroy(struct sctp_datamsg *msg)
{
+ struct sctp_association *asoc = NULL;
struct list_head *pos, *temp;
struct sctp_chunk *chunk;
- struct sctp_sock *sp;
struct sctp_ulpevent *ev;
- struct sctp_association *asoc = NULL;
int error = 0, notify;
/* If we failed, we may need to notify. */
@@ -108,9 +107,8 @@ static void sctp_datamsg_destroy(struct sctp_datamsg *msg)
else
error = asoc->outqueue.error;
- sp = sctp_sk(asoc->base.sk);
- notify = sctp_ulpevent_type_enabled(SCTP_SEND_FAILED,
- &sp->subscribe);
+ notify = sctp_ulpevent_type_enabled(asoc->subscribe,
+ SCTP_SEND_FAILED);
}
/* Generate a SEND FAILED event only if enabled. */
@@ -191,6 +189,12 @@ struct sctp_datamsg *sctp_datamsg_from_user(struct sctp_association *asoc,
* the packet
*/
max_data = asoc->frag_point;
+ if (unlikely(!max_data)) {
+ max_data = sctp_min_frag_point(sctp_sk(asoc->base.sk),
+ sctp_datachk_len(&asoc->stream));
+ pr_warn_ratelimited("%s: asoc:%p frag_point is zero, forcing max_data to default minimum (%Zu)",
+ __func__, asoc, max_data);
+ }
/* If the the peer requested that we authenticate DATA chunks
* we need to account for bundling of the AUTH chunks along with
diff --git a/net/sctp/input.c b/net/sctp/input.c
index 7ab08a5b36dc..d7a649d240e5 100644
--- a/net/sctp/input.c
+++ b/net/sctp/input.c
@@ -57,6 +57,7 @@
#include <net/sctp/checksum.h>
#include <net/net_namespace.h>
#include <linux/rhashtable.h>
+#include <net/sock_reuseport.h>
/* Forward declarations for internal helpers. */
static int sctp_rcv_ootb(struct sk_buff *);
@@ -65,8 +66,10 @@ static struct sctp_association *__sctp_rcv_lookup(struct net *net,
const union sctp_addr *paddr,
const union sctp_addr *laddr,
struct sctp_transport **transportp);
-static struct sctp_endpoint *__sctp_rcv_lookup_endpoint(struct net *net,
- const union sctp_addr *laddr);
+static struct sctp_endpoint *__sctp_rcv_lookup_endpoint(
+ struct net *net, struct sk_buff *skb,
+ const union sctp_addr *laddr,
+ const union sctp_addr *daddr);
static struct sctp_association *__sctp_lookup_association(
struct net *net,
const union sctp_addr *local,
@@ -171,7 +174,7 @@ int sctp_rcv(struct sk_buff *skb)
asoc = __sctp_rcv_lookup(net, skb, &src, &dest, &transport);
if (!asoc)
- ep = __sctp_rcv_lookup_endpoint(net, &dest);
+ ep = __sctp_rcv_lookup_endpoint(net, skb, &dest, &src);
/* Retrieve the common input handling substructure. */
rcvr = asoc ? &asoc->base : &ep->base;
@@ -721,43 +724,87 @@ discard:
}
/* Insert endpoint into the hash table. */
-static void __sctp_hash_endpoint(struct sctp_endpoint *ep)
+static int __sctp_hash_endpoint(struct sctp_endpoint *ep)
{
- struct net *net = sock_net(ep->base.sk);
- struct sctp_ep_common *epb;
+ struct sock *sk = ep->base.sk;
+ struct net *net = sock_net(sk);
struct sctp_hashbucket *head;
+ struct sctp_ep_common *epb;
epb = &ep->base;
-
epb->hashent = sctp_ep_hashfn(net, epb->bind_addr.port);
head = &sctp_ep_hashtable[epb->hashent];
+ if (sk->sk_reuseport) {
+ bool any = sctp_is_ep_boundall(sk);
+ struct sctp_ep_common *epb2;
+ struct list_head *list;
+ int cnt = 0, err = 1;
+
+ list_for_each(list, &ep->base.bind_addr.address_list)
+ cnt++;
+
+ sctp_for_each_hentry(epb2, &head->chain) {
+ struct sock *sk2 = epb2->sk;
+
+ if (!net_eq(sock_net(sk2), net) || sk2 == sk ||
+ !uid_eq(sock_i_uid(sk2), sock_i_uid(sk)) ||
+ !sk2->sk_reuseport)
+ continue;
+
+ err = sctp_bind_addrs_check(sctp_sk(sk2),
+ sctp_sk(sk), cnt);
+ if (!err) {
+ err = reuseport_add_sock(sk, sk2, any);
+ if (err)
+ return err;
+ break;
+ } else if (err < 0) {
+ return err;
+ }
+ }
+
+ if (err) {
+ err = reuseport_alloc(sk, any);
+ if (err)
+ return err;
+ }
+ }
+
write_lock(&head->lock);
hlist_add_head(&epb->node, &head->chain);
write_unlock(&head->lock);
+ return 0;
}
/* Add an endpoint to the hash. Local BH-safe. */
-void sctp_hash_endpoint(struct sctp_endpoint *ep)
+int sctp_hash_endpoint(struct sctp_endpoint *ep)
{
+ int err;
+
local_bh_disable();
- __sctp_hash_endpoint(ep);
+ err = __sctp_hash_endpoint(ep);
local_bh_enable();
+
+ return err;
}
/* Remove endpoint from the hash table. */
static void __sctp_unhash_endpoint(struct sctp_endpoint *ep)
{
- struct net *net = sock_net(ep->base.sk);
+ struct sock *sk = ep->base.sk;
struct sctp_hashbucket *head;
struct sctp_ep_common *epb;
epb = &ep->base;
- epb->hashent = sctp_ep_hashfn(net, epb->bind_addr.port);
+ epb->hashent = sctp_ep_hashfn(sock_net(sk), epb->bind_addr.port);
head = &sctp_ep_hashtable[epb->hashent];
+ if (rcu_access_pointer(sk->sk_reuseport_cb))
+ reuseport_detach_sock(sk);
+
write_lock(&head->lock);
hlist_del_init(&epb->node);
write_unlock(&head->lock);
@@ -771,16 +818,35 @@ void sctp_unhash_endpoint(struct sctp_endpoint *ep)
local_bh_enable();
}
+static inline __u32 sctp_hashfn(const struct net *net, __be16 lport,
+ const union sctp_addr *paddr, __u32 seed)
+{
+ __u32 addr;
+
+ if (paddr->sa.sa_family == AF_INET6)
+ addr = jhash(&paddr->v6.sin6_addr, 16, seed);
+ else
+ addr = (__force __u32)paddr->v4.sin_addr.s_addr;
+
+ return jhash_3words(addr, ((__force __u32)paddr->v4.sin_port) << 16 |
+ (__force __u32)lport, net_hash_mix(net), seed);
+}
+
/* Look up an endpoint. */
-static struct sctp_endpoint *__sctp_rcv_lookup_endpoint(struct net *net,
- const union sctp_addr *laddr)
+static struct sctp_endpoint *__sctp_rcv_lookup_endpoint(
+ struct net *net, struct sk_buff *skb,
+ const union sctp_addr *laddr,
+ const union sctp_addr *paddr)
{
struct sctp_hashbucket *head;
struct sctp_ep_common *epb;
struct sctp_endpoint *ep;
+ struct sock *sk;
+ __be16 lport;
int hash;
- hash = sctp_ep_hashfn(net, ntohs(laddr->v4.sin_port));
+ lport = laddr->v4.sin_port;
+ hash = sctp_ep_hashfn(net, ntohs(lport));
head = &sctp_ep_hashtable[hash];
read_lock(&head->lock);
sctp_for_each_hentry(epb, &head->chain) {
@@ -792,6 +858,15 @@ static struct sctp_endpoint *__sctp_rcv_lookup_endpoint(struct net *net,
ep = sctp_sk(net->sctp.ctl_sock)->ep;
hit:
+ sk = ep->base.sk;
+ if (sk->sk_reuseport) {
+ __u32 phash = sctp_hashfn(net, lport, paddr, 0);
+
+ sk = reuseport_select_sock(sk, phash, skb,
+ sizeof(struct sctphdr));
+ if (sk)
+ ep = sctp_sk(sk)->ep;
+ }
sctp_endpoint_hold(ep);
read_unlock(&head->lock);
return ep;
@@ -830,35 +905,17 @@ out:
static inline __u32 sctp_hash_obj(const void *data, u32 len, u32 seed)
{
const struct sctp_transport *t = data;
- const union sctp_addr *paddr = &t->ipaddr;
- const struct net *net = sock_net(t->asoc->base.sk);
- __be16 lport = htons(t->asoc->base.bind_addr.port);
- __u32 addr;
- if (paddr->sa.sa_family == AF_INET6)
- addr = jhash(&paddr->v6.sin6_addr, 16, seed);
- else
- addr = (__force __u32)paddr->v4.sin_addr.s_addr;
-
- return jhash_3words(addr, ((__force __u32)paddr->v4.sin_port) << 16 |
- (__force __u32)lport, net_hash_mix(net), seed);
+ return sctp_hashfn(sock_net(t->asoc->base.sk),
+ htons(t->asoc->base.bind_addr.port),
+ &t->ipaddr, seed);
}
static inline __u32 sctp_hash_key(const void *data, u32 len, u32 seed)
{
const struct sctp_hash_cmp_arg *x = data;
- const union sctp_addr *paddr = x->paddr;
- const struct net *net = x->net;
- __be16 lport = x->lport;
- __u32 addr;
-
- if (paddr->sa.sa_family == AF_INET6)
- addr = jhash(&paddr->v6.sin6_addr, 16, seed);
- else
- addr = (__force __u32)paddr->v4.sin_addr.s_addr;
- return jhash_3words(addr, ((__force __u32)paddr->v4.sin_port) << 16 |
- (__force __u32)lport, net_hash_mix(net), seed);
+ return sctp_hashfn(x->net, x->lport, x->paddr, seed);
}
static const struct rhashtable_params sctp_hash_params = {
diff --git a/net/sctp/output.c b/net/sctp/output.c
index 67939ad99c01..025f48e14a91 100644
--- a/net/sctp/output.c
+++ b/net/sctp/output.c
@@ -118,6 +118,9 @@ void sctp_packet_config(struct sctp_packet *packet, __u32 vtag,
sctp_transport_route(tp, NULL, sp);
if (asoc->param_flags & SPP_PMTUD_ENABLE)
sctp_assoc_sync_pmtu(asoc);
+ } else if (!sctp_transport_pmtu_check(tp)) {
+ if (asoc->param_flags & SPP_PMTUD_ENABLE)
+ sctp_assoc_sync_pmtu(asoc);
}
if (asoc->pmtu_pending) {
@@ -396,25 +399,6 @@ finish:
return retval;
}
-static void sctp_packet_release_owner(struct sk_buff *skb)
-{
- sk_free(skb->sk);
-}
-
-static void sctp_packet_set_owner_w(struct sk_buff *skb, struct sock *sk)
-{
- skb_orphan(skb);
- skb->sk = sk;
- skb->destructor = sctp_packet_release_owner;
-
- /*
- * The data chunks have already been accounted for in sctp_sendmsg(),
- * therefore only reserve a single byte to keep socket around until
- * the packet has been transmitted.
- */
- refcount_inc(&sk->sk_wmem_alloc);
-}
-
static void sctp_packet_gso_append(struct sk_buff *head, struct sk_buff *skb)
{
if (SCTP_OUTPUT_CB(head)->last == head)
@@ -426,6 +410,7 @@ static void sctp_packet_gso_append(struct sk_buff *head, struct sk_buff *skb)
head->truesize += skb->truesize;
head->data_len += skb->len;
head->len += skb->len;
+ refcount_add(skb->truesize, &head->sk->sk_wmem_alloc);
__skb_header_release(skb);
}
@@ -601,7 +586,7 @@ int sctp_packet_transmit(struct sctp_packet *packet, gfp_t gfp)
if (!head)
goto out;
skb_reserve(head, packet->overhead + MAX_HEADER);
- sctp_packet_set_owner_w(head, sk);
+ skb_set_owner_w(head, sk);
/* set sctp header */
sh = skb_push(head, sizeof(struct sctphdr));
diff --git a/net/sctp/primitive.c b/net/sctp/primitive.c
index c0817f7a8964..a8c4c33377bc 100644
--- a/net/sctp/primitive.c
+++ b/net/sctp/primitive.c
@@ -53,7 +53,7 @@
int sctp_primitive_ ## name(struct net *net, struct sctp_association *asoc, \
void *arg) { \
int error = 0; \
- enum sctp_event event_type; union sctp_subtype subtype; \
+ enum sctp_event_type event_type; union sctp_subtype subtype; \
enum sctp_state state; \
struct sctp_endpoint *ep; \
\
diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c
index 4a4fd1971255..f4ac6c592e13 100644
--- a/net/sctp/sm_make_chunk.c
+++ b/net/sctp/sm_make_chunk.c
@@ -2462,6 +2462,9 @@ int sctp_process_init(struct sctp_association *asoc, struct sctp_chunk *chunk,
asoc->c.sinit_max_instreams, gfp))
goto clean_up;
+ /* Update frag_point when stream_interleave may get changed. */
+ sctp_assoc_update_frag_point(asoc);
+
if (!asoc->temp && sctp_assoc_set_id(asoc, gfp))
goto clean_up;
diff --git a/net/sctp/sm_sideeffect.c b/net/sctp/sm_sideeffect.c
index 85d393090238..1d143bc3f73d 100644
--- a/net/sctp/sm_sideeffect.c
+++ b/net/sctp/sm_sideeffect.c
@@ -52,7 +52,7 @@
#include <net/sctp/sm.h>
#include <net/sctp/stream_sched.h>
-static int sctp_cmd_interpreter(enum sctp_event event_type,
+static int sctp_cmd_interpreter(enum sctp_event_type event_type,
union sctp_subtype subtype,
enum sctp_state state,
struct sctp_endpoint *ep,
@@ -61,7 +61,7 @@ static int sctp_cmd_interpreter(enum sctp_event event_type,
enum sctp_disposition status,
struct sctp_cmd_seq *commands,
gfp_t gfp);
-static int sctp_side_effects(enum sctp_event event_type,
+static int sctp_side_effects(enum sctp_event_type event_type,
union sctp_subtype subtype,
enum sctp_state state,
struct sctp_endpoint *ep,
@@ -623,7 +623,7 @@ static void sctp_cmd_init_failed(struct sctp_cmd_seq *commands,
/* Worker routine to handle SCTP_CMD_ASSOC_FAILED. */
static void sctp_cmd_assoc_failed(struct sctp_cmd_seq *commands,
struct sctp_association *asoc,
- enum sctp_event event_type,
+ enum sctp_event_type event_type,
union sctp_subtype subtype,
struct sctp_chunk *chunk,
unsigned int error)
@@ -1162,7 +1162,7 @@ static void sctp_cmd_send_asconf(struct sctp_association *asoc)
* If you want to understand all of lksctp, this is a
* good place to start.
*/
-int sctp_do_sm(struct net *net, enum sctp_event event_type,
+int sctp_do_sm(struct net *net, enum sctp_event_type event_type,
union sctp_subtype subtype, enum sctp_state state,
struct sctp_endpoint *ep, struct sctp_association *asoc,
void *event_arg, gfp_t gfp)
@@ -1199,7 +1199,7 @@ int sctp_do_sm(struct net *net, enum sctp_event event_type,
/*****************************************************************
* This the master state function side effect processing function.
*****************************************************************/
-static int sctp_side_effects(enum sctp_event event_type,
+static int sctp_side_effects(enum sctp_event_type event_type,
union sctp_subtype subtype,
enum sctp_state state,
struct sctp_endpoint *ep,
@@ -1285,7 +1285,7 @@ bail:
********************************************************************/
/* This is the side-effect interpreter. */
-static int sctp_cmd_interpreter(enum sctp_event event_type,
+static int sctp_cmd_interpreter(enum sctp_event_type event_type,
union sctp_subtype subtype,
enum sctp_state state,
struct sctp_endpoint *ep,
diff --git a/net/sctp/sm_statetable.c b/net/sctp/sm_statetable.c
index 691d9dc620e3..d239b94aa48c 100644
--- a/net/sctp/sm_statetable.c
+++ b/net/sctp/sm_statetable.c
@@ -79,7 +79,7 @@ static const struct sctp_sm_table_entry bug = {
const struct sctp_sm_table_entry *sctp_sm_lookup_event(
struct net *net,
- enum sctp_event event_type,
+ enum sctp_event_type event_type,
enum sctp_state state,
union sctp_subtype event_subtype)
{
diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index 739f3e50120d..f93c3cf9e567 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -2230,7 +2230,7 @@ static int sctp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
if (sp->recvrcvinfo)
sctp_ulpevent_read_rcvinfo(event, msg);
/* Check if we allow SCTP_SNDRCVINFO. */
- if (sp->subscribe.sctp_data_io_event)
+ if (sctp_ulpevent_type_enabled(sp->subscribe, SCTP_DATA_IO_EVENT))
sctp_ulpevent_read_sndrcvinfo(event, msg);
err = copied;
@@ -2304,22 +2304,33 @@ static int sctp_setsockopt_disable_fragments(struct sock *sk,
static int sctp_setsockopt_events(struct sock *sk, char __user *optval,
unsigned int optlen)
{
+ struct sctp_event_subscribe subscribe;
+ __u8 *sn_type = (__u8 *)&subscribe;
+ struct sctp_sock *sp = sctp_sk(sk);
struct sctp_association *asoc;
- struct sctp_ulpevent *event;
+ int i;
if (optlen > sizeof(struct sctp_event_subscribe))
return -EINVAL;
- if (copy_from_user(&sctp_sk(sk)->subscribe, optval, optlen))
+
+ if (copy_from_user(&subscribe, optval, optlen))
return -EFAULT;
+ for (i = 0; i < optlen; i++)
+ sctp_ulpevent_type_set(&sp->subscribe, SCTP_SN_TYPE_BASE + i,
+ sn_type[i]);
+
+ list_for_each_entry(asoc, &sp->ep->asocs, asocs)
+ asoc->subscribe = sctp_sk(sk)->subscribe;
+
/* At the time when a user app subscribes to SCTP_SENDER_DRY_EVENT,
* if there is no data to be sent or retransmit, the stack will
* immediately send up this notification.
*/
- if (sctp_ulpevent_type_enabled(SCTP_SENDER_DRY_EVENT,
- &sctp_sk(sk)->subscribe)) {
- asoc = sctp_id2assoc(sk, 0);
+ if (sctp_ulpevent_type_enabled(sp->subscribe, SCTP_SENDER_DRY_EVENT)) {
+ struct sctp_ulpevent *event;
+ asoc = sctp_id2assoc(sk, 0);
if (asoc && sctp_outq_is_empty(&asoc->outqueue)) {
event = sctp_ulpevent_make_sender_dry_event(asoc,
GFP_USER | __GFP_NOWARN);
@@ -3324,8 +3335,7 @@ static int sctp_setsockopt_maxseg(struct sock *sk, char __user *optval, unsigned
__u16 datasize = asoc ? sctp_datachk_len(&asoc->stream) :
sizeof(struct sctp_data_chunk);
- min_len = sctp_mtu_payload(sp, SCTP_DEFAULT_MINSEGMENT,
- datasize);
+ min_len = sctp_min_frag_point(sp, datasize);
max_len = SCTP_MAX_CHUNK_LEN - datasize;
if (val < min_len || val > max_len)
@@ -3940,32 +3950,16 @@ static int sctp_setsockopt_pr_supported(struct sock *sk,
unsigned int optlen)
{
struct sctp_assoc_value params;
- struct sctp_association *asoc;
- int retval = -EINVAL;
if (optlen != sizeof(params))
- goto out;
-
- if (copy_from_user(&params, optval, optlen)) {
- retval = -EFAULT;
- goto out;
- }
-
- asoc = sctp_id2assoc(sk, params.assoc_id);
- if (asoc) {
- asoc->prsctp_enable = !!params.assoc_value;
- } else if (!params.assoc_id) {
- struct sctp_sock *sp = sctp_sk(sk);
+ return -EINVAL;
- sp->ep->prsctp_enable = !!params.assoc_value;
- } else {
- goto out;
- }
+ if (copy_from_user(&params, optval, optlen))
+ return -EFAULT;
- retval = 0;
+ sctp_sk(sk)->ep->prsctp_enable = !!params.assoc_value;
-out:
- return retval;
+ return 0;
}
static int sctp_setsockopt_default_prinfo(struct sock *sk,
@@ -4277,6 +4271,57 @@ static int sctp_setsockopt_reuse_port(struct sock *sk, char __user *optval,
return 0;
}
+static int sctp_setsockopt_event(struct sock *sk, char __user *optval,
+ unsigned int optlen)
+{
+ struct sctp_association *asoc;
+ struct sctp_ulpevent *event;
+ struct sctp_event param;
+ int retval = 0;
+
+ if (optlen < sizeof(param)) {
+ retval = -EINVAL;
+ goto out;
+ }
+
+ optlen = sizeof(param);
+ if (copy_from_user(&param, optval, optlen)) {
+ retval = -EFAULT;
+ goto out;
+ }
+
+ if (param.se_type < SCTP_SN_TYPE_BASE ||
+ param.se_type > SCTP_SN_TYPE_MAX) {
+ retval = -EINVAL;
+ goto out;
+ }
+
+ asoc = sctp_id2assoc(sk, param.se_assoc_id);
+ if (!asoc) {
+ sctp_ulpevent_type_set(&sctp_sk(sk)->subscribe,
+ param.se_type, param.se_on);
+ goto out;
+ }
+
+ sctp_ulpevent_type_set(&asoc->subscribe, param.se_type, param.se_on);
+
+ if (param.se_type == SCTP_SENDER_DRY_EVENT && param.se_on) {
+ if (sctp_outq_is_empty(&asoc->outqueue)) {
+ event = sctp_ulpevent_make_sender_dry_event(asoc,
+ GFP_USER | __GFP_NOWARN);
+ if (!event) {
+ retval = -ENOMEM;
+ goto out;
+ }
+
+ asoc->stream.si->enqueue_event(&asoc->ulpq, event);
+ }
+ }
+
+out:
+ return retval;
+}
+
/* API 6.2 setsockopt(), getsockopt()
*
* Applications use setsockopt() and getsockopt() to set or retrieve
@@ -4474,6 +4519,9 @@ static int sctp_setsockopt(struct sock *sk, int level, int optname,
case SCTP_REUSE_PORT:
retval = sctp_setsockopt_reuse_port(sk, optval, optlen);
break;
+ case SCTP_EVENT:
+ retval = sctp_setsockopt_event(sk, optval, optlen);
+ break;
default:
retval = -ENOPROTOOPT;
break;
@@ -4722,7 +4770,7 @@ static int sctp_init_sock(struct sock *sk)
/* Initialize default event subscriptions. By default, all the
* options are off.
*/
- memset(&sp->subscribe, 0, sizeof(struct sctp_event_subscribe));
+ sp->subscribe = 0;
/* Default Peer Address Parameters. These defaults can
* be modified via SCTP_PEER_ADDR_PARAMS
@@ -5267,14 +5315,24 @@ static int sctp_getsockopt_disable_fragments(struct sock *sk, int len,
static int sctp_getsockopt_events(struct sock *sk, int len, char __user *optval,
int __user *optlen)
{
+ struct sctp_event_subscribe subscribe;
+ __u8 *sn_type = (__u8 *)&subscribe;
+ int i;
+
if (len == 0)
return -EINVAL;
if (len > sizeof(struct sctp_event_subscribe))
len = sizeof(struct sctp_event_subscribe);
if (put_user(len, optlen))
return -EFAULT;
- if (copy_to_user(optval, &sctp_sk(sk)->subscribe, len))
+
+ for (i = 0; i < len; i++)
+ sn_type[i] = sctp_ulpevent_type_enabled(sctp_sk(sk)->subscribe,
+ SCTP_SN_TYPE_BASE + i);
+
+ if (copy_to_user(optval, &subscribe, len))
return -EFAULT;
+
return 0;
}
@@ -7409,6 +7467,37 @@ static int sctp_getsockopt_reuse_port(struct sock *sk, int len,
return 0;
}
+static int sctp_getsockopt_event(struct sock *sk, int len, char __user *optval,
+ int __user *optlen)
+{
+ struct sctp_association *asoc;
+ struct sctp_event param;
+ __u16 subscribe;
+
+ if (len < sizeof(param))
+ return -EINVAL;
+
+ len = sizeof(param);
+ if (copy_from_user(&param, optval, len))
+ return -EFAULT;
+
+ if (param.se_type < SCTP_SN_TYPE_BASE ||
+ param.se_type > SCTP_SN_TYPE_MAX)
+ return -EINVAL;
+
+ asoc = sctp_id2assoc(sk, param.se_assoc_id);
+ subscribe = asoc ? asoc->subscribe : sctp_sk(sk)->subscribe;
+ param.se_on = sctp_ulpevent_type_enabled(subscribe, param.se_type);
+
+ if (put_user(len, optlen))
+ return -EFAULT;
+
+ if (copy_to_user(optval, &param, len))
+ return -EFAULT;
+
+ return 0;
+}
+
static int sctp_getsockopt(struct sock *sk, int level, int optname,
char __user *optval, int __user *optlen)
{
@@ -7607,6 +7696,9 @@ static int sctp_getsockopt(struct sock *sk, int level, int optname,
case SCTP_REUSE_PORT:
retval = sctp_getsockopt_reuse_port(sk, len, optval, optlen);
break;
+ case SCTP_EVENT:
+ retval = sctp_getsockopt_event(sk, len, optval, optlen);
+ break;
default:
retval = -ENOPROTOOPT;
break;
@@ -7644,8 +7736,10 @@ static struct sctp_bind_bucket *sctp_bucket_create(
static long sctp_get_port_local(struct sock *sk, union sctp_addr *addr)
{
- bool reuse = (sk->sk_reuse || sctp_sk(sk)->reuse);
+ struct sctp_sock *sp = sctp_sk(sk);
+ bool reuse = (sk->sk_reuse || sp->reuse);
struct sctp_bind_hashbucket *head; /* hash list */
+ kuid_t uid = sock_i_uid(sk);
struct sctp_bind_bucket *pp;
unsigned short snum;
int ret;
@@ -7721,7 +7815,10 @@ pp_found:
pr_debug("%s: found a possible match\n", __func__);
- if (pp->fastreuse && reuse && sk->sk_state != SCTP_SS_LISTENING)
+ if ((pp->fastreuse && reuse &&
+ sk->sk_state != SCTP_SS_LISTENING) ||
+ (pp->fastreuseport && sk->sk_reuseport &&
+ uid_eq(pp->fastuid, uid)))
goto success;
/* Run through the list of sockets bound to the port
@@ -7735,16 +7832,18 @@ pp_found:
* in an endpoint.
*/
sk_for_each_bound(sk2, &pp->owner) {
- struct sctp_endpoint *ep2;
- ep2 = sctp_sk(sk2)->ep;
+ struct sctp_sock *sp2 = sctp_sk(sk2);
+ struct sctp_endpoint *ep2 = sp2->ep;
if (sk == sk2 ||
- (reuse && (sk2->sk_reuse || sctp_sk(sk2)->reuse) &&
- sk2->sk_state != SCTP_SS_LISTENING))
+ (reuse && (sk2->sk_reuse || sp2->reuse) &&
+ sk2->sk_state != SCTP_SS_LISTENING) ||
+ (sk->sk_reuseport && sk2->sk_reuseport &&
+ uid_eq(uid, sock_i_uid(sk2))))
continue;
- if (sctp_bind_addr_conflict(&ep2->base.bind_addr, addr,
- sctp_sk(sk2), sctp_sk(sk))) {
+ if (sctp_bind_addr_conflict(&ep2->base.bind_addr,
+ addr, sp2, sp)) {
ret = (long)sk2;
goto fail_unlock;
}
@@ -7767,19 +7866,32 @@ pp_not_found:
pp->fastreuse = 1;
else
pp->fastreuse = 0;
- } else if (pp->fastreuse &&
- (!reuse || sk->sk_state == SCTP_SS_LISTENING))
- pp->fastreuse = 0;
+
+ if (sk->sk_reuseport) {
+ pp->fastreuseport = 1;
+ pp->fastuid = uid;
+ } else {
+ pp->fastreuseport = 0;
+ }
+ } else {
+ if (pp->fastreuse &&
+ (!reuse || sk->sk_state == SCTP_SS_LISTENING))
+ pp->fastreuse = 0;
+
+ if (pp->fastreuseport &&
+ (!sk->sk_reuseport || !uid_eq(pp->fastuid, uid)))
+ pp->fastreuseport = 0;
+ }
/* We are set, so fill up all the data in the hash table
* entry, tie the socket list information with the rest of the
* sockets FIXME: Blurry, NPI (ipg).
*/
success:
- if (!sctp_sk(sk)->bind_hash) {
+ if (!sp->bind_hash) {
inet_sk(sk)->inet_num = snum;
sk_add_bind_node(sk, &pp->owner);
- sctp_sk(sk)->bind_hash = pp;
+ sp->bind_hash = pp;
}
ret = 0;
@@ -7852,8 +7964,7 @@ static int sctp_listen_start(struct sock *sk, int backlog)
}
sk->sk_max_ack_backlog = backlog;
- sctp_hash_endpoint(ep);
- return 0;
+ return sctp_hash_endpoint(ep);
}
/*
diff --git a/net/sctp/stream.c b/net/sctp/stream.c
index ffb940d3b57c..3892e7630f3a 100644
--- a/net/sctp/stream.c
+++ b/net/sctp/stream.c
@@ -535,7 +535,6 @@ int sctp_send_add_streams(struct sctp_association *asoc,
goto out;
}
- stream->incnt = incnt;
stream->outcnt = outcnt;
asoc->strreset_outstanding = !!out + !!in;
diff --git a/net/sctp/stream_interleave.c b/net/sctp/stream_interleave.c
index 0a78cdf86463..a6bf21579466 100644
--- a/net/sctp/stream_interleave.c
+++ b/net/sctp/stream_interleave.c
@@ -140,7 +140,7 @@ static void sctp_intl_store_reasm(struct sctp_ulpq *ulpq,
struct sctp_ulpevent *event)
{
struct sctp_ulpevent *cevent;
- struct sk_buff *pos;
+ struct sk_buff *pos, *loc;
pos = skb_peek_tail(&ulpq->reasm);
if (!pos) {
@@ -166,23 +166,30 @@ static void sctp_intl_store_reasm(struct sctp_ulpq *ulpq,
return;
}
+ loc = NULL;
skb_queue_walk(&ulpq->reasm, pos) {
cevent = sctp_skb2event(pos);
if (event->stream < cevent->stream ||
(event->stream == cevent->stream &&
- MID_lt(event->mid, cevent->mid)))
+ MID_lt(event->mid, cevent->mid))) {
+ loc = pos;
break;
-
+ }
if (event->stream == cevent->stream &&
event->mid == cevent->mid &&
!(cevent->msg_flags & SCTP_DATA_FIRST_FRAG) &&
(event->msg_flags & SCTP_DATA_FIRST_FRAG ||
- event->fsn < cevent->fsn))
+ event->fsn < cevent->fsn)) {
+ loc = pos;
break;
+ }
}
- __skb_queue_before(&ulpq->reasm, pos, sctp_event2skb(event));
+ if (!loc)
+ __skb_queue_tail(&ulpq->reasm, sctp_event2skb(event));
+ else
+ __skb_queue_before(&ulpq->reasm, loc, sctp_event2skb(event));
}
static struct sctp_ulpevent *sctp_intl_retrieve_partial(
@@ -383,7 +390,7 @@ static void sctp_intl_store_ordered(struct sctp_ulpq *ulpq,
struct sctp_ulpevent *event)
{
struct sctp_ulpevent *cevent;
- struct sk_buff *pos;
+ struct sk_buff *pos, *loc;
pos = skb_peek_tail(&ulpq->lobby);
if (!pos) {
@@ -403,18 +410,25 @@ static void sctp_intl_store_ordered(struct sctp_ulpq *ulpq,
return;
}
+ loc = NULL;
skb_queue_walk(&ulpq->lobby, pos) {
cevent = (struct sctp_ulpevent *)pos->cb;
- if (cevent->stream > event->stream)
+ if (cevent->stream > event->stream) {
+ loc = pos;
break;
-
+ }
if (cevent->stream == event->stream &&
- MID_lt(event->mid, cevent->mid))
+ MID_lt(event->mid, cevent->mid)) {
+ loc = pos;
break;
+ }
}
- __skb_queue_before(&ulpq->lobby, pos, sctp_event2skb(event));
+ if (!loc)
+ __skb_queue_tail(&ulpq->lobby, sctp_event2skb(event));
+ else
+ __skb_queue_before(&ulpq->lobby, loc, sctp_event2skb(event));
}
static void sctp_intl_retrieve_ordered(struct sctp_ulpq *ulpq,
@@ -489,7 +503,7 @@ static int sctp_enqueue_event(struct sctp_ulpq *ulpq,
sk_incoming_cpu_update(sk);
}
- if (!sctp_ulpevent_is_enabled(event, &sp->subscribe))
+ if (!sctp_ulpevent_is_enabled(event, ulpq->asoc->subscribe))
goto out_free;
if (skb_list)
@@ -980,17 +994,19 @@ static void sctp_intl_stream_abort_pd(struct sctp_ulpq *ulpq, __u16 sid,
struct sock *sk = ulpq->asoc->base.sk;
struct sctp_ulpevent *ev = NULL;
- if (!sctp_ulpevent_type_enabled(SCTP_PARTIAL_DELIVERY_EVENT,
- &sctp_sk(sk)->subscribe))
+ if (!sctp_ulpevent_type_enabled(ulpq->asoc->subscribe,
+ SCTP_PARTIAL_DELIVERY_EVENT))
return;
ev = sctp_ulpevent_make_pdapi(ulpq->asoc, SCTP_PARTIAL_DELIVERY_ABORTED,
sid, mid, flags, gfp);
if (ev) {
+ struct sctp_sock *sp = sctp_sk(sk);
+
__skb_queue_tail(&sk->sk_receive_queue, sctp_event2skb(ev));
- if (!sctp_sk(sk)->data_ready_signalled) {
- sctp_sk(sk)->data_ready_signalled = 1;
+ if (!sp->data_ready_signalled) {
+ sp->data_ready_signalled = 1;
sk->sk_data_ready(sk);
}
}
diff --git a/net/sctp/ulpqueue.c b/net/sctp/ulpqueue.c
index 331cc734e3db..5dde92101743 100644
--- a/net/sctp/ulpqueue.c
+++ b/net/sctp/ulpqueue.c
@@ -219,7 +219,7 @@ int sctp_ulpq_tail_event(struct sctp_ulpq *ulpq, struct sctp_ulpevent *event)
sk_incoming_cpu_update(sk);
}
/* Check if the user wishes to receive this event. */
- if (!sctp_ulpevent_is_enabled(event, &sp->subscribe))
+ if (!sctp_ulpevent_is_enabled(event, ulpq->asoc->subscribe))
goto out_free;
/* If we are in partial delivery mode, post to the lobby until
@@ -1129,16 +1129,16 @@ void sctp_ulpq_renege(struct sctp_ulpq *ulpq, struct sctp_chunk *chunk,
void sctp_ulpq_abort_pd(struct sctp_ulpq *ulpq, gfp_t gfp)
{
struct sctp_ulpevent *ev = NULL;
- struct sock *sk;
struct sctp_sock *sp;
+ struct sock *sk;
if (!ulpq->pd_mode)
return;
sk = ulpq->asoc->base.sk;
sp = sctp_sk(sk);
- if (sctp_ulpevent_type_enabled(SCTP_PARTIAL_DELIVERY_EVENT,
- &sctp_sk(sk)->subscribe))
+ if (sctp_ulpevent_type_enabled(ulpq->asoc->subscribe,
+ SCTP_PARTIAL_DELIVERY_EVENT))
ev = sctp_ulpevent_make_pdapi(ulpq->asoc,
SCTP_PARTIAL_DELIVERY_ABORTED,
0, 0, 0, gfp);
diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c
index 80e2119f1c70..63f08b4e51d6 100644
--- a/net/smc/af_smc.c
+++ b/net/smc/af_smc.c
@@ -127,6 +127,8 @@ static int smc_release(struct socket *sock)
smc = smc_sk(sk);
/* cleanup for a dangling non-blocking connect */
+ if (smc->connect_info && sk->sk_state == SMC_INIT)
+ tcp_abort(smc->clcsock->sk, ECONNABORTED);
flush_work(&smc->connect_work);
kfree(smc->connect_info);
smc->connect_info = NULL;
@@ -299,14 +301,17 @@ static void smc_copy_sock_settings_to_smc(struct smc_sock *smc)
smc_copy_sock_settings(&smc->sk, smc->clcsock->sk, SK_FLAGS_CLC_TO_SMC);
}
-/* register a new rmb, optionally send confirm_rkey msg to register with peer */
+/* register a new rmb, send confirm_rkey msg to register with peer */
static int smc_reg_rmb(struct smc_link *link, struct smc_buf_desc *rmb_desc,
bool conf_rkey)
{
- /* register memory region for new rmb */
- if (smc_wr_reg_send(link, rmb_desc->mr_rx[SMC_SINGLE_LINK])) {
- rmb_desc->regerr = 1;
- return -EFAULT;
+ if (!rmb_desc->wr_reg) {
+ /* register memory region for new rmb */
+ if (smc_wr_reg_send(link, rmb_desc->mr_rx[SMC_SINGLE_LINK])) {
+ rmb_desc->regerr = 1;
+ return -EFAULT;
+ }
+ rmb_desc->wr_reg = 1;
}
if (!conf_rkey)
return 0;
@@ -335,8 +340,8 @@ static int smc_clnt_conf_first_link(struct smc_sock *smc)
struct smc_clc_msg_decline dclc;
rc = smc_clc_wait_msg(smc, &dclc, sizeof(dclc),
- SMC_CLC_DECLINE);
- return rc;
+ SMC_CLC_DECLINE, CLC_WAIT_TIME_SHORT);
+ return rc == -EAGAIN ? SMC_CLC_DECL_TIMEOUT_CL : rc;
}
if (link->llc_confirm_rc)
@@ -363,8 +368,8 @@ static int smc_clnt_conf_first_link(struct smc_sock *smc)
struct smc_clc_msg_decline dclc;
rc = smc_clc_wait_msg(smc, &dclc, sizeof(dclc),
- SMC_CLC_DECLINE);
- return rc;
+ SMC_CLC_DECLINE, CLC_WAIT_TIME_SHORT);
+ return rc == -EAGAIN ? SMC_CLC_DECL_TIMEOUT_AL : rc;
}
/* send add link reject message, only one link supported for now */
@@ -533,7 +538,8 @@ static int smc_connect_clc(struct smc_sock *smc, int smc_type,
if (rc)
return rc;
/* receive SMC Accept CLC message */
- return smc_clc_wait_msg(smc, aclc, sizeof(*aclc), SMC_CLC_ACCEPT);
+ return smc_clc_wait_msg(smc, aclc, sizeof(*aclc), SMC_CLC_ACCEPT,
+ CLC_WAIT_TIME);
}
/* setup for RDMA connection of client */
@@ -547,7 +553,8 @@ static int smc_connect_rdma(struct smc_sock *smc,
mutex_lock(&smc_create_lgr_pending);
local_contact = smc_conn_create(smc, false, aclc->hdr.flag, ibdev,
- ibport, &aclc->lcl, NULL, 0);
+ ibport, ntoh24(aclc->qpn), &aclc->lcl,
+ NULL, 0);
if (local_contact < 0) {
if (local_contact == -ENOMEM)
reason_code = SMC_CLC_DECL_MEM;/* insufficient memory*/
@@ -580,8 +587,7 @@ static int smc_connect_rdma(struct smc_sock *smc,
return smc_connect_abort(smc, SMC_CLC_DECL_ERR_RDYLNK,
local_contact);
} else {
- if (!smc->conn.rmb_desc->reused &&
- smc_reg_rmb(link, smc->conn.rmb_desc, true))
+ if (smc_reg_rmb(link, smc->conn.rmb_desc, true))
return smc_connect_abort(smc, SMC_CLC_DECL_ERR_REGRMB,
local_contact);
}
@@ -618,7 +624,7 @@ static int smc_connect_ism(struct smc_sock *smc,
int rc = 0;
mutex_lock(&smc_create_lgr_pending);
- local_contact = smc_conn_create(smc, true, aclc->hdr.flag, NULL, 0,
+ local_contact = smc_conn_create(smc, true, aclc->hdr.flag, NULL, 0, 0,
NULL, ismdev, aclc->gid);
if (local_contact < 0)
return smc_connect_abort(smc, SMC_CLC_DECL_MEM, 0);
@@ -965,8 +971,8 @@ static int smc_serv_conf_first_link(struct smc_sock *smc)
struct smc_clc_msg_decline dclc;
rc = smc_clc_wait_msg(smc, &dclc, sizeof(dclc),
- SMC_CLC_DECLINE);
- return rc;
+ SMC_CLC_DECLINE, CLC_WAIT_TIME_SHORT);
+ return rc == -EAGAIN ? SMC_CLC_DECL_TIMEOUT_CL : rc;
}
if (link->llc_confirm_resp_rc)
@@ -986,8 +992,8 @@ static int smc_serv_conf_first_link(struct smc_sock *smc)
struct smc_clc_msg_decline dclc;
rc = smc_clc_wait_msg(smc, &dclc, sizeof(dclc),
- SMC_CLC_DECLINE);
- return rc;
+ SMC_CLC_DECLINE, CLC_WAIT_TIME_SHORT);
+ return rc == -EAGAIN ? SMC_CLC_DECL_TIMEOUT_AL : rc;
}
smc_llc_link_active(link, net->ipv4.sysctl_tcp_keepalive_time);
@@ -1083,7 +1089,7 @@ static int smc_listen_rdma_init(struct smc_sock *new_smc,
int *local_contact)
{
/* allocate connection / link group */
- *local_contact = smc_conn_create(new_smc, false, 0, ibdev, ibport,
+ *local_contact = smc_conn_create(new_smc, false, 0, ibdev, ibport, 0,
&pclc->lcl, NULL, 0);
if (*local_contact < 0) {
if (*local_contact == -ENOMEM)
@@ -1107,7 +1113,7 @@ static int smc_listen_ism_init(struct smc_sock *new_smc,
struct smc_clc_msg_smcd *pclc_smcd;
pclc_smcd = smc_get_clc_msg_smcd(pclc);
- *local_contact = smc_conn_create(new_smc, true, 0, NULL, 0, NULL,
+ *local_contact = smc_conn_create(new_smc, true, 0, NULL, 0, 0, NULL,
ismdev, pclc_smcd->gid);
if (*local_contact < 0) {
if (*local_contact == -ENOMEM)
@@ -1142,10 +1148,8 @@ static int smc_listen_rdma_reg(struct smc_sock *new_smc, int local_contact)
struct smc_link *link = &new_smc->conn.lgr->lnk[SMC_SINGLE_LINK];
if (local_contact != SMC_FIRST_CONTACT) {
- if (!new_smc->conn.rmb_desc->reused) {
- if (smc_reg_rmb(link, new_smc->conn.rmb_desc, true))
- return SMC_CLC_DECL_ERR_REGRMB;
- }
+ if (smc_reg_rmb(link, new_smc->conn.rmb_desc, true))
+ return SMC_CLC_DECL_ERR_REGRMB;
}
smc_rmb_sync_sg_for_device(&new_smc->conn);
@@ -1181,7 +1185,6 @@ static int smc_listen_rdma_finish(struct smc_sock *new_smc,
return 0;
decline:
- mutex_unlock(&smc_create_lgr_pending);
smc_listen_decline(new_smc, reason_code, local_contact);
return reason_code;
}
@@ -1222,7 +1225,7 @@ static void smc_listen_work(struct work_struct *work)
*/
pclc = (struct smc_clc_msg_proposal *)&buf;
reason_code = smc_clc_wait_msg(new_smc, pclc, SMC_CLC_MAX_LEN,
- SMC_CLC_PROPOSAL);
+ SMC_CLC_PROPOSAL, CLC_WAIT_TIME);
if (reason_code) {
smc_listen_decline(new_smc, reason_code, 0);
return;
@@ -1272,7 +1275,7 @@ static void smc_listen_work(struct work_struct *work)
/* receive SMC Confirm CLC message */
reason_code = smc_clc_wait_msg(new_smc, &cclc, sizeof(cclc),
- SMC_CLC_CONFIRM);
+ SMC_CLC_CONFIRM, CLC_WAIT_TIME);
if (reason_code) {
mutex_unlock(&smc_create_lgr_pending);
smc_listen_decline(new_smc, reason_code, local_contact);
@@ -1281,8 +1284,10 @@ static void smc_listen_work(struct work_struct *work)
/* finish worker */
if (!ism_supported) {
- if (smc_listen_rdma_finish(new_smc, &cclc, local_contact))
+ if (smc_listen_rdma_finish(new_smc, &cclc, local_contact)) {
+ mutex_unlock(&smc_create_lgr_pending);
return;
+ }
}
smc_conn_save_peer_info(new_smc, &cclc);
mutex_unlock(&smc_create_lgr_pending);
@@ -1354,7 +1359,6 @@ static int smc_listen(struct socket *sock, int backlog)
sk->sk_max_ack_backlog = backlog;
sk->sk_ack_backlog = 0;
sk->sk_state = SMC_LISTEN;
- INIT_WORK(&smc->tcp_listen_work, smc_tcp_listen_work);
sock_hold(sk); /* sock_hold in tcp_listen_worker */
if (!schedule_work(&smc->tcp_listen_work))
sock_put(sk);
diff --git a/net/smc/smc_cdc.c b/net/smc/smc_cdc.c
index ed5dcf03fe0b..db83332ac1c8 100644
--- a/net/smc/smc_cdc.c
+++ b/net/smc/smc_cdc.c
@@ -81,7 +81,7 @@ static inline void smc_cdc_add_pending_send(struct smc_connection *conn,
sizeof(struct smc_cdc_msg) > SMC_WR_BUF_SIZE,
"must increase SMC_WR_BUF_SIZE to at least sizeof(struct smc_cdc_msg)");
BUILD_BUG_ON_MSG(
- sizeof(struct smc_cdc_msg) != SMC_WR_TX_SIZE,
+ offsetofend(struct smc_cdc_msg, reserved) > SMC_WR_TX_SIZE,
"must adapt SMC_WR_TX_SIZE to sizeof(struct smc_cdc_msg); if not all smc_wr upper layer protocols use the same message size any more, must start to set link->wr_tx_sges[i].length on each individual smc_wr_tx_send()");
BUILD_BUG_ON_MSG(
sizeof(struct smc_cdc_tx_pend) > SMC_WR_TX_PEND_PRIV_SIZE,
@@ -177,23 +177,24 @@ void smc_cdc_tx_dismiss_slots(struct smc_connection *conn)
int smcd_cdc_msg_send(struct smc_connection *conn)
{
struct smc_sock *smc = container_of(conn, struct smc_sock, conn);
+ union smc_host_cursor curs;
struct smcd_cdc_msg cdc;
int rc, diff;
memset(&cdc, 0, sizeof(cdc));
cdc.common.type = SMC_CDC_MSG_TYPE;
- cdc.prod_wrap = conn->local_tx_ctrl.prod.wrap;
- cdc.prod_count = conn->local_tx_ctrl.prod.count;
-
- cdc.cons_wrap = conn->local_tx_ctrl.cons.wrap;
- cdc.cons_count = conn->local_tx_ctrl.cons.count;
- cdc.prod_flags = conn->local_tx_ctrl.prod_flags;
- cdc.conn_state_flags = conn->local_tx_ctrl.conn_state_flags;
+ curs.acurs.counter = atomic64_read(&conn->local_tx_ctrl.prod.acurs);
+ cdc.prod.wrap = curs.wrap;
+ cdc.prod.count = curs.count;
+ curs.acurs.counter = atomic64_read(&conn->local_tx_ctrl.cons.acurs);
+ cdc.cons.wrap = curs.wrap;
+ cdc.cons.count = curs.count;
+ cdc.cons.prod_flags = conn->local_tx_ctrl.prod_flags;
+ cdc.cons.conn_state_flags = conn->local_tx_ctrl.conn_state_flags;
rc = smcd_tx_ism_write(conn, &cdc, sizeof(cdc), 0, 1);
if (rc)
return rc;
- smc_curs_copy(&conn->rx_curs_confirmed, &conn->local_tx_ctrl.cons,
- conn);
+ smc_curs_copy(&conn->rx_curs_confirmed, &curs, conn);
/* Calculate transmitted data and increment free send buffer space */
diff = smc_curs_diff(conn->sndbuf_desc->len, &conn->tx_curs_fin,
&conn->tx_curs_sent);
@@ -331,13 +332,16 @@ static void smc_cdc_msg_recv(struct smc_sock *smc, struct smc_cdc_msg *cdc)
static void smcd_cdc_rx_tsklet(unsigned long data)
{
struct smc_connection *conn = (struct smc_connection *)data;
+ struct smcd_cdc_msg *data_cdc;
struct smcd_cdc_msg cdc;
struct smc_sock *smc;
if (!conn)
return;
- memcpy(&cdc, conn->rmb_desc->cpu_addr, sizeof(cdc));
+ data_cdc = (struct smcd_cdc_msg *)conn->rmb_desc->cpu_addr;
+ smcd_curs_copy(&cdc.prod, &data_cdc->prod, conn);
+ smcd_curs_copy(&cdc.cons, &data_cdc->cons, conn);
smc = container_of(conn, struct smc_sock, conn);
smc_cdc_msg_recv(smc, (struct smc_cdc_msg *)&cdc);
}
diff --git a/net/smc/smc_cdc.h b/net/smc/smc_cdc.h
index 934df4473a7c..b5bfe38c7f9b 100644
--- a/net/smc/smc_cdc.h
+++ b/net/smc/smc_cdc.h
@@ -48,21 +48,31 @@ struct smc_cdc_msg {
struct smc_cdc_producer_flags prod_flags;
struct smc_cdc_conn_state_flags conn_state_flags;
u8 reserved[18];
-} __packed; /* format defined in RFC7609 */
+};
+
+/* SMC-D cursor format */
+union smcd_cdc_cursor {
+ struct {
+ u16 wrap;
+ u32 count;
+ struct smc_cdc_producer_flags prod_flags;
+ struct smc_cdc_conn_state_flags conn_state_flags;
+ } __packed;
+#ifdef KERNEL_HAS_ATOMIC64
+ atomic64_t acurs; /* for atomic processing */
+#else
+ u64 acurs; /* for atomic processing */
+#endif
+} __aligned(8);
/* CDC message for SMC-D */
struct smcd_cdc_msg {
struct smc_wr_rx_hdr common; /* Type = 0xFE */
u8 res1[7];
- u16 prod_wrap;
- u32 prod_count;
- u8 res2[2];
- u16 cons_wrap;
- u32 cons_count;
- struct smc_cdc_producer_flags prod_flags;
- struct smc_cdc_conn_state_flags conn_state_flags;
+ union smcd_cdc_cursor prod;
+ union smcd_cdc_cursor cons;
u8 res3[8];
-} __packed;
+} __aligned(8);
static inline bool smc_cdc_rxed_any_close(struct smc_connection *conn)
{
@@ -135,6 +145,21 @@ static inline void smc_curs_copy_net(union smc_cdc_cursor *tgt,
#endif
}
+static inline void smcd_curs_copy(union smcd_cdc_cursor *tgt,
+ union smcd_cdc_cursor *src,
+ struct smc_connection *conn)
+{
+#ifndef KERNEL_HAS_ATOMIC64
+ unsigned long flags;
+
+ spin_lock_irqsave(&conn->acurs_lock, flags);
+ tgt->acurs = src->acurs;
+ spin_unlock_irqrestore(&conn->acurs_lock, flags);
+#else
+ atomic64_set(&tgt->acurs, atomic64_read(&src->acurs));
+#endif
+}
+
/* calculate cursor difference between old and new, where old <= new */
static inline int smc_curs_diff(unsigned int size,
union smc_host_cursor *old,
@@ -222,12 +247,17 @@ static inline void smcr_cdc_msg_to_host(struct smc_host_cdc_msg *local,
static inline void smcd_cdc_msg_to_host(struct smc_host_cdc_msg *local,
struct smcd_cdc_msg *peer)
{
- local->prod.wrap = peer->prod_wrap;
- local->prod.count = peer->prod_count;
- local->cons.wrap = peer->cons_wrap;
- local->cons.count = peer->cons_count;
- local->prod_flags = peer->prod_flags;
- local->conn_state_flags = peer->conn_state_flags;
+ union smc_host_cursor temp;
+
+ temp.wrap = peer->prod.wrap;
+ temp.count = peer->prod.count;
+ atomic64_set(&local->prod.acurs, atomic64_read(&temp.acurs));
+
+ temp.wrap = peer->cons.wrap;
+ temp.count = peer->cons.count;
+ atomic64_set(&local->cons.acurs, atomic64_read(&temp.acurs));
+ local->prod_flags = peer->cons.prod_flags;
+ local->conn_state_flags = peer->cons.conn_state_flags;
}
static inline void smc_cdc_msg_to_host(struct smc_host_cdc_msg *local,
diff --git a/net/smc/smc_clc.c b/net/smc/smc_clc.c
index 89c3a8c7859a..776e9dfc915d 100644
--- a/net/smc/smc_clc.c
+++ b/net/smc/smc_clc.c
@@ -265,7 +265,7 @@ out:
* clcsock error, -EINTR, -ECONNRESET, -EPROTO otherwise.
*/
int smc_clc_wait_msg(struct smc_sock *smc, void *buf, int buflen,
- u8 expected_type)
+ u8 expected_type, unsigned long timeout)
{
long rcvtimeo = smc->clcsock->sk->sk_rcvtimeo;
struct sock *clc_sk = smc->clcsock->sk;
@@ -285,7 +285,7 @@ int smc_clc_wait_msg(struct smc_sock *smc, void *buf, int buflen,
* sizeof(struct smc_clc_msg_hdr)
*/
krflags = MSG_PEEK | MSG_WAITALL;
- smc->clcsock->sk->sk_rcvtimeo = CLC_WAIT_TIME;
+ clc_sk->sk_rcvtimeo = timeout;
iov_iter_kvec(&msg.msg_iter, READ, &vec, 1,
sizeof(struct smc_clc_msg_hdr));
len = sock_recvmsg(smc->clcsock, &msg, krflags);
@@ -297,7 +297,11 @@ int smc_clc_wait_msg(struct smc_sock *smc, void *buf, int buflen,
}
if (clc_sk->sk_err) {
reason_code = -clc_sk->sk_err;
- smc->sk.sk_err = clc_sk->sk_err;
+ if (clc_sk->sk_err == EAGAIN &&
+ expected_type == SMC_CLC_DECLINE)
+ clc_sk->sk_err = 0; /* reset for fallback usage */
+ else
+ smc->sk.sk_err = clc_sk->sk_err;
goto out;
}
if (!len) { /* peer has performed orderly shutdown */
@@ -306,7 +310,8 @@ int smc_clc_wait_msg(struct smc_sock *smc, void *buf, int buflen,
goto out;
}
if (len < 0) {
- smc->sk.sk_err = -len;
+ if (len != -EAGAIN || expected_type != SMC_CLC_DECLINE)
+ smc->sk.sk_err = -len;
reason_code = len;
goto out;
}
@@ -346,7 +351,7 @@ int smc_clc_wait_msg(struct smc_sock *smc, void *buf, int buflen,
}
out:
- smc->clcsock->sk->sk_rcvtimeo = rcvtimeo;
+ clc_sk->sk_rcvtimeo = rcvtimeo;
return reason_code;
}
@@ -374,10 +379,8 @@ int smc_clc_send_decline(struct smc_sock *smc, u32 peer_diag_info)
len = kernel_sendmsg(smc->clcsock, &msg, &vec, 1,
sizeof(struct smc_clc_msg_decline));
if (len < sizeof(struct smc_clc_msg_decline))
- smc->sk.sk_err = EPROTO;
- if (len < 0)
- smc->sk.sk_err = -len;
- return sock_error(&smc->sk);
+ len = -EPROTO;
+ return len > 0 ? 0 : len;
}
/* send CLC PROPOSAL message across internal TCP socket */
@@ -536,7 +539,6 @@ int smc_clc_send_accept(struct smc_sock *new_smc, int srv_first_contact)
struct smc_link *link;
struct msghdr msg;
struct kvec vec;
- int rc = 0;
int len;
memset(&aclc, 0, sizeof(aclc));
@@ -589,13 +591,8 @@ int smc_clc_send_accept(struct smc_sock *new_smc, int srv_first_contact)
vec.iov_len = ntohs(aclc.hdr.length);
len = kernel_sendmsg(new_smc->clcsock, &msg, &vec, 1,
ntohs(aclc.hdr.length));
- if (len < ntohs(aclc.hdr.length)) {
- if (len >= 0)
- new_smc->sk.sk_err = EPROTO;
- else
- new_smc->sk.sk_err = new_smc->clcsock->sk->sk_err;
- rc = sock_error(&new_smc->sk);
- }
+ if (len < ntohs(aclc.hdr.length))
+ len = len >= 0 ? -EPROTO : -new_smc->clcsock->sk->sk_err;
- return rc;
+ return len > 0 ? 0 : len;
}
diff --git a/net/smc/smc_clc.h b/net/smc/smc_clc.h
index 18da89b681c2..24658e8c0de4 100644
--- a/net/smc/smc_clc.h
+++ b/net/smc/smc_clc.h
@@ -27,6 +27,7 @@
#define SMC_TYPE_D 1 /* SMC-D only */
#define SMC_TYPE_B 3 /* SMC-R and SMC-D */
#define CLC_WAIT_TIME (6 * HZ) /* max. wait time on clcsock */
+#define CLC_WAIT_TIME_SHORT HZ /* short wait time on clcsock */
#define SMC_CLC_DECL_MEM 0x01010000 /* insufficient memory resources */
#define SMC_CLC_DECL_TIMEOUT_CL 0x02010000 /* timeout w4 QP confirm link */
#define SMC_CLC_DECL_TIMEOUT_AL 0x02020000 /* timeout w4 QP add link */
@@ -182,7 +183,7 @@ struct smcd_dev;
int smc_clc_prfx_match(struct socket *clcsock,
struct smc_clc_msg_proposal_prefix *prop);
int smc_clc_wait_msg(struct smc_sock *smc, void *buf, int buflen,
- u8 expected_type);
+ u8 expected_type, unsigned long timeout);
int smc_clc_send_decline(struct smc_sock *smc, u32 peer_diag_info);
int smc_clc_send_proposal(struct smc_sock *smc, int smc_type,
struct smc_ib_device *smcibdev, u8 ibport, u8 gid[],
diff --git a/net/smc/smc_core.c b/net/smc/smc_core.c
index 18daebcef181..35c1cdc93e1c 100644
--- a/net/smc/smc_core.c
+++ b/net/smc/smc_core.c
@@ -149,6 +149,8 @@ static int smc_link_send_delete(struct smc_link *lnk)
return -ENOTCONN;
}
+static void smc_lgr_free(struct smc_link_group *lgr);
+
static void smc_lgr_free_work(struct work_struct *work)
{
struct smc_link_group *lgr = container_of(to_delayed_work(work),
@@ -171,8 +173,11 @@ free:
spin_unlock_bh(&smc_lgr_list.lock);
if (!lgr->is_smcd && !lgr->terminating) {
+ struct smc_link *lnk = &lgr->lnk[SMC_SINGLE_LINK];
+
/* try to send del link msg, on error free lgr immediately */
- if (!smc_link_send_delete(&lgr->lnk[SMC_SINGLE_LINK])) {
+ if (lnk->state == SMC_LNK_ACTIVE &&
+ !smc_link_send_delete(lnk)) {
/* reschedule in case we never receive a response */
smc_lgr_schedule_free_work(lgr);
return;
@@ -184,6 +189,8 @@ free:
if (!lgr->is_smcd && lnk->state != SMC_LNK_INACTIVE)
smc_llc_link_inactive(lnk);
+ if (lgr->is_smcd)
+ smc_ism_signal_shutdown(lgr);
smc_lgr_free(lgr);
}
}
@@ -293,8 +300,13 @@ static void smc_buf_unuse(struct smc_connection *conn,
conn->sndbuf_desc->used = 0;
if (conn->rmb_desc) {
if (!conn->rmb_desc->regerr) {
- conn->rmb_desc->reused = 1;
conn->rmb_desc->used = 0;
+ if (!lgr->is_smcd) {
+ /* unregister rmb with peer */
+ smc_llc_do_delete_rkey(
+ &lgr->lnk[SMC_SINGLE_LINK],
+ conn->rmb_desc);
+ }
} else {
/* buf registration failed, reuse not possible */
write_lock_bh(&lgr->rmbs_lock);
@@ -408,7 +420,7 @@ static void smc_lgr_free_bufs(struct smc_link_group *lgr)
}
/* remove a link group */
-void smc_lgr_free(struct smc_link_group *lgr)
+static void smc_lgr_free(struct smc_link_group *lgr)
{
smc_lgr_free_bufs(lgr);
if (lgr->is_smcd)
@@ -485,7 +497,7 @@ void smc_port_terminate(struct smc_ib_device *smcibdev, u8 ibport)
}
/* Called when SMC-D device is terminated or peer is lost */
-void smc_smcd_terminate(struct smcd_dev *dev, u64 peer_gid)
+void smc_smcd_terminate(struct smcd_dev *dev, u64 peer_gid, unsigned short vlan)
{
struct smc_link_group *lgr, *l;
LIST_HEAD(lgr_free_list);
@@ -495,7 +507,7 @@ void smc_smcd_terminate(struct smcd_dev *dev, u64 peer_gid)
list_for_each_entry_safe(lgr, l, &smc_lgr_list.list, list) {
if (lgr->is_smcd && lgr->smcd == dev &&
(!peer_gid || lgr->peer_gid == peer_gid) &&
- !list_empty(&lgr->list)) {
+ (vlan == VLAN_VID_MASK || lgr->vlan_id == vlan)) {
__smc_lgr_terminate(lgr);
list_move(&lgr->list, &lgr_free_list);
}
@@ -506,6 +518,8 @@ void smc_smcd_terminate(struct smcd_dev *dev, u64 peer_gid)
list_for_each_entry_safe(lgr, l, &lgr_free_list, list) {
list_del_init(&lgr->list);
cancel_delayed_work_sync(&lgr->free_work);
+ if (!peer_gid && vlan == VLAN_VID_MASK) /* dev terminated? */
+ smc_ism_signal_shutdown(lgr);
smc_lgr_free(lgr);
}
}
@@ -559,7 +573,7 @@ out:
static bool smcr_lgr_match(struct smc_link_group *lgr,
struct smc_clc_msg_local *lcl,
- enum smc_lgr_role role)
+ enum smc_lgr_role role, u32 clcqpn)
{
return !memcmp(lgr->peer_systemid, lcl->id_for_peer,
SMC_SYSTEMID_LEN) &&
@@ -567,7 +581,9 @@ static bool smcr_lgr_match(struct smc_link_group *lgr,
SMC_GID_SIZE) &&
!memcmp(lgr->lnk[SMC_SINGLE_LINK].peer_mac, lcl->mac,
sizeof(lcl->mac)) &&
- lgr->role == role;
+ lgr->role == role &&
+ (lgr->role == SMC_SERV ||
+ lgr->lnk[SMC_SINGLE_LINK].peer_qpn == clcqpn);
}
static bool smcd_lgr_match(struct smc_link_group *lgr,
@@ -578,7 +594,7 @@ static bool smcd_lgr_match(struct smc_link_group *lgr,
/* create a new SMC connection (and a new link group if necessary) */
int smc_conn_create(struct smc_sock *smc, bool is_smcd, int srv_first_contact,
- struct smc_ib_device *smcibdev, u8 ibport,
+ struct smc_ib_device *smcibdev, u8 ibport, u32 clcqpn,
struct smc_clc_msg_local *lcl, struct smcd_dev *smcd,
u64 peer_gid)
{
@@ -603,7 +619,7 @@ int smc_conn_create(struct smc_sock *smc, bool is_smcd, int srv_first_contact,
list_for_each_entry(lgr, &smc_lgr_list.list, list) {
write_lock_bh(&lgr->conns_lock);
if ((is_smcd ? smcd_lgr_match(lgr, smcd, peer_gid) :
- smcr_lgr_match(lgr, lcl, role)) &&
+ smcr_lgr_match(lgr, lcl, role, clcqpn)) &&
!lgr->sync_err &&
lgr->vlan_id == vlan_id &&
(role == SMC_CLNT ||
@@ -1024,6 +1040,8 @@ void smc_core_exit(void)
smc_llc_link_inactive(lnk);
}
cancel_delayed_work_sync(&lgr->free_work);
+ if (lgr->is_smcd)
+ smc_ism_signal_shutdown(lgr);
smc_lgr_free(lgr); /* free link group */
}
}
diff --git a/net/smc/smc_core.h b/net/smc/smc_core.h
index c156674733c9..b00287989a3d 100644
--- a/net/smc/smc_core.h
+++ b/net/smc/smc_core.h
@@ -109,6 +109,9 @@ struct smc_link {
int llc_testlink_time; /* testlink interval */
struct completion llc_confirm_rkey; /* wait 4 rx of cnf rkey */
int llc_confirm_rkey_rc; /* rc from cnf rkey msg */
+ struct completion llc_delete_rkey; /* wait 4 rx of del rkey */
+ int llc_delete_rkey_rc; /* rc from del rkey msg */
+ struct mutex llc_delete_rkey_mutex; /* serialize usage */
};
/* For now we just allow one parallel link per link group. The SMC protocol
@@ -127,7 +130,7 @@ struct smc_buf_desc {
struct page *pages;
int len; /* length of buffer */
u32 used; /* currently used / unused */
- u8 reused : 1; /* new created / reused */
+ u8 wr_reg : 1; /* mem region registered */
u8 regerr : 1; /* err during registration */
union {
struct { /* SMC-R */
@@ -243,11 +246,11 @@ struct smc_sock;
struct smc_clc_msg_accept_confirm;
struct smc_clc_msg_local;
-void smc_lgr_free(struct smc_link_group *lgr);
void smc_lgr_forget(struct smc_link_group *lgr);
void smc_lgr_terminate(struct smc_link_group *lgr);
void smc_port_terminate(struct smc_ib_device *smcibdev, u8 ibport);
-void smc_smcd_terminate(struct smcd_dev *dev, u64 peer_gid);
+void smc_smcd_terminate(struct smcd_dev *dev, u64 peer_gid,
+ unsigned short vlan);
int smc_buf_create(struct smc_sock *smc, bool is_smcd);
int smc_uncompress_bufsize(u8 compressed);
int smc_rmb_rtoken_handling(struct smc_connection *conn,
@@ -262,7 +265,7 @@ int smc_vlan_by_tcpsk(struct socket *clcsock, unsigned short *vlan_id);
void smc_conn_free(struct smc_connection *conn);
int smc_conn_create(struct smc_sock *smc, bool is_smcd, int srv_first_contact,
- struct smc_ib_device *smcibdev, u8 ibport,
+ struct smc_ib_device *smcibdev, u8 ibport, u32 clcqpn,
struct smc_clc_msg_local *lcl, struct smcd_dev *smcd,
u64 peer_gid);
void smcd_conn_free(struct smc_connection *conn);
diff --git a/net/smc/smc_ism.c b/net/smc/smc_ism.c
index e36f21ce7252..2fff79db1a59 100644
--- a/net/smc/smc_ism.c
+++ b/net/smc/smc_ism.c
@@ -187,22 +187,28 @@ struct smc_ism_event_work {
#define ISM_EVENT_REQUEST 0x0001
#define ISM_EVENT_RESPONSE 0x0002
#define ISM_EVENT_REQUEST_IR 0x00000001
+#define ISM_EVENT_CODE_SHUTDOWN 0x80
#define ISM_EVENT_CODE_TESTLINK 0x83
+union smcd_sw_event_info {
+ u64 info;
+ struct {
+ u8 uid[SMC_LGR_ID_SIZE];
+ unsigned short vlan_id;
+ u16 code;
+ };
+};
+
static void smcd_handle_sw_event(struct smc_ism_event_work *wrk)
{
- union {
- u64 info;
- struct {
- u32 uid;
- unsigned short vlanid;
- u16 code;
- };
- } ev_info;
+ union smcd_sw_event_info ev_info;
+ ev_info.info = wrk->event.info;
switch (wrk->event.code) {
+ case ISM_EVENT_CODE_SHUTDOWN: /* Peer shut down DMBs */
+ smc_smcd_terminate(wrk->smcd, wrk->event.tok, ev_info.vlan_id);
+ break;
case ISM_EVENT_CODE_TESTLINK: /* Activity timer */
- ev_info.info = wrk->event.info;
if (ev_info.code == ISM_EVENT_REQUEST) {
ev_info.code = ISM_EVENT_RESPONSE;
wrk->smcd->ops->signal_event(wrk->smcd,
@@ -215,6 +221,21 @@ static void smcd_handle_sw_event(struct smc_ism_event_work *wrk)
}
}
+int smc_ism_signal_shutdown(struct smc_link_group *lgr)
+{
+ int rc;
+ union smcd_sw_event_info ev_info;
+
+ memcpy(ev_info.uid, lgr->id, SMC_LGR_ID_SIZE);
+ ev_info.vlan_id = lgr->vlan_id;
+ ev_info.code = ISM_EVENT_REQUEST;
+ rc = lgr->smcd->ops->signal_event(lgr->smcd, lgr->peer_gid,
+ ISM_EVENT_REQUEST_IR,
+ ISM_EVENT_CODE_SHUTDOWN,
+ ev_info.info);
+ return rc;
+}
+
/* worker for SMC-D events */
static void smc_ism_event_work(struct work_struct *work)
{
@@ -223,7 +244,7 @@ static void smc_ism_event_work(struct work_struct *work)
switch (wrk->event.type) {
case ISM_EVENT_GID: /* GID event, token is peer GID */
- smc_smcd_terminate(wrk->smcd, wrk->event.tok);
+ smc_smcd_terminate(wrk->smcd, wrk->event.tok, VLAN_VID_MASK);
break;
case ISM_EVENT_DMB:
break;
@@ -289,7 +310,7 @@ void smcd_unregister_dev(struct smcd_dev *smcd)
spin_unlock(&smcd_dev_list.lock);
flush_workqueue(smcd->event_wq);
destroy_workqueue(smcd->event_wq);
- smc_smcd_terminate(smcd, 0);
+ smc_smcd_terminate(smcd, 0, VLAN_VID_MASK);
device_del(&smcd->dev);
}
diff --git a/net/smc/smc_ism.h b/net/smc/smc_ism.h
index aee45b860b79..4da946cbfa29 100644
--- a/net/smc/smc_ism.h
+++ b/net/smc/smc_ism.h
@@ -45,4 +45,5 @@ int smc_ism_register_dmb(struct smc_link_group *lgr, int buf_size,
int smc_ism_unregister_dmb(struct smcd_dev *dev, struct smc_buf_desc *dmb_desc);
int smc_ism_write(struct smcd_dev *dev, const struct smc_ism_position *pos,
void *data, size_t len);
+int smc_ism_signal_shutdown(struct smc_link_group *lgr);
#endif
diff --git a/net/smc/smc_llc.c b/net/smc/smc_llc.c
index 9c916c709ca7..a6d3623d06f4 100644
--- a/net/smc/smc_llc.c
+++ b/net/smc/smc_llc.c
@@ -238,6 +238,29 @@ static int smc_llc_send_confirm_rkey(struct smc_link *link,
return rc;
}
+/* send LLC delete rkey request */
+static int smc_llc_send_delete_rkey(struct smc_link *link,
+ struct smc_buf_desc *rmb_desc)
+{
+ struct smc_llc_msg_delete_rkey *rkeyllc;
+ struct smc_wr_tx_pend_priv *pend;
+ struct smc_wr_buf *wr_buf;
+ int rc;
+
+ rc = smc_llc_add_pending_send(link, &wr_buf, &pend);
+ if (rc)
+ return rc;
+ rkeyllc = (struct smc_llc_msg_delete_rkey *)wr_buf;
+ memset(rkeyllc, 0, sizeof(*rkeyllc));
+ rkeyllc->hd.common.type = SMC_LLC_DELETE_RKEY;
+ rkeyllc->hd.length = sizeof(struct smc_llc_msg_delete_rkey);
+ rkeyllc->num_rkeys = 1;
+ rkeyllc->rkey[0] = htonl(rmb_desc->mr_rx[SMC_SINGLE_LINK]->rkey);
+ /* send llc message */
+ rc = smc_wr_tx_send(link, pend);
+ return rc;
+}
+
/* prepare an add link message */
static void smc_llc_prep_add_link(struct smc_llc_msg_add_link *addllc,
struct smc_link *link, u8 mac[], u8 gid[],
@@ -509,7 +532,9 @@ static void smc_llc_rx_delete_rkey(struct smc_link *link,
int i, max;
if (llc->hd.flags & SMC_LLC_FLAG_RESP) {
- /* unused as long as we don't send this type of msg */
+ link->llc_delete_rkey_rc = llc->hd.flags &
+ SMC_LLC_FLAG_RKEY_NEG;
+ complete(&link->llc_delete_rkey);
} else {
max = min_t(u8, llc->num_rkeys, SMC_LLC_DEL_RKEY_MAX);
for (i = 0; i < max; i++) {
@@ -610,6 +635,8 @@ int smc_llc_link_init(struct smc_link *link)
init_completion(&link->llc_add);
init_completion(&link->llc_add_resp);
init_completion(&link->llc_confirm_rkey);
+ init_completion(&link->llc_delete_rkey);
+ mutex_init(&link->llc_delete_rkey_mutex);
init_completion(&link->llc_testlink_resp);
INIT_DELAYED_WORK(&link->llc_testlink_wrk, smc_llc_testlink_work);
return 0;
@@ -650,8 +677,11 @@ int smc_llc_do_confirm_rkey(struct smc_link *link,
{
int rc;
+ /* protected by mutex smc_create_lgr_pending */
reinit_completion(&link->llc_confirm_rkey);
- smc_llc_send_confirm_rkey(link, rmb_desc);
+ rc = smc_llc_send_confirm_rkey(link, rmb_desc);
+ if (rc)
+ return rc;
/* receive CONFIRM RKEY response from server over RoCE fabric */
rc = wait_for_completion_interruptible_timeout(&link->llc_confirm_rkey,
SMC_LLC_WAIT_TIME);
@@ -660,6 +690,29 @@ int smc_llc_do_confirm_rkey(struct smc_link *link,
return 0;
}
+/* unregister an rtoken at the remote peer */
+int smc_llc_do_delete_rkey(struct smc_link *link,
+ struct smc_buf_desc *rmb_desc)
+{
+ int rc;
+
+ mutex_lock(&link->llc_delete_rkey_mutex);
+ reinit_completion(&link->llc_delete_rkey);
+ rc = smc_llc_send_delete_rkey(link, rmb_desc);
+ if (rc)
+ goto out;
+ /* receive DELETE RKEY response from server over RoCE fabric */
+ rc = wait_for_completion_interruptible_timeout(&link->llc_delete_rkey,
+ SMC_LLC_WAIT_TIME);
+ if (rc <= 0 || link->llc_delete_rkey_rc)
+ rc = -EFAULT;
+ else
+ rc = 0;
+out:
+ mutex_unlock(&link->llc_delete_rkey_mutex);
+ return rc;
+}
+
/***************************** init, exit, misc ******************************/
static struct smc_wr_rx_handler smc_llc_rx_handlers[] = {
diff --git a/net/smc/smc_llc.h b/net/smc/smc_llc.h
index 9e2ff088e301..461c0c3ef76e 100644
--- a/net/smc/smc_llc.h
+++ b/net/smc/smc_llc.h
@@ -49,6 +49,8 @@ void smc_llc_link_inactive(struct smc_link *link);
void smc_llc_link_clear(struct smc_link *link);
int smc_llc_do_confirm_rkey(struct smc_link *link,
struct smc_buf_desc *rmb_desc);
+int smc_llc_do_delete_rkey(struct smc_link *link,
+ struct smc_buf_desc *rmb_desc);
int smc_llc_init(void) __init;
#endif /* SMC_LLC_H */
diff --git a/net/smc/smc_wr.c b/net/smc/smc_wr.c
index 3c458d279855..c2694750a6a8 100644
--- a/net/smc/smc_wr.c
+++ b/net/smc/smc_wr.c
@@ -215,12 +215,14 @@ int smc_wr_tx_put_slot(struct smc_link *link,
pend = container_of(wr_pend_priv, struct smc_wr_tx_pend, priv);
if (pend->idx < link->wr_tx_cnt) {
+ u32 idx = pend->idx;
+
/* clear the full struct smc_wr_tx_pend including .priv */
memset(&link->wr_tx_pends[pend->idx], 0,
sizeof(link->wr_tx_pends[pend->idx]));
memset(&link->wr_tx_bufs[pend->idx], 0,
sizeof(link->wr_tx_bufs[pend->idx]));
- test_and_clear_bit(pend->idx, link->wr_tx_mask);
+ test_and_clear_bit(idx, link->wr_tx_mask);
return 1;
}
diff --git a/net/socket.c b/net/socket.c
index 593826e11a53..334fcc617ef2 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -853,7 +853,7 @@ static ssize_t sock_splice_read(struct file *file, loff_t *ppos,
struct socket *sock = file->private_data;
if (unlikely(!sock->ops->splice_read))
- return -EINVAL;
+ return generic_file_splice_read(file, ppos, pipe, len, flags);
return sock->ops->splice_read(sock, ppos, pipe, len, flags);
}
diff --git a/net/sunrpc/auth_generic.c b/net/sunrpc/auth_generic.c
index d8831b988b1e..ab4a3be1542a 100644
--- a/net/sunrpc/auth_generic.c
+++ b/net/sunrpc/auth_generic.c
@@ -281,13 +281,7 @@ static bool generic_key_to_expire(struct rpc_cred *cred)
{
struct auth_cred *acred = &container_of(cred, struct generic_cred,
gc_base)->acred;
- bool ret;
-
- get_rpccred(cred);
- ret = test_bit(RPC_CRED_KEY_EXPIRE_SOON, &acred->ac_flags);
- put_rpccred(cred);
-
- return ret;
+ return test_bit(RPC_CRED_KEY_EXPIRE_SOON, &acred->ac_flags);
}
static const struct rpc_credops generic_credops = {
diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c
index 30f970cdc7f6..ba765473d1f0 100644
--- a/net/sunrpc/auth_gss/auth_gss.c
+++ b/net/sunrpc/auth_gss/auth_gss.c
@@ -1239,36 +1239,59 @@ gss_create(const struct rpc_auth_create_args *args, struct rpc_clnt *clnt)
return &gss_auth->rpc_auth;
}
+static struct gss_cred *
+gss_dup_cred(struct gss_auth *gss_auth, struct gss_cred *gss_cred)
+{
+ struct gss_cred *new;
+
+ /* Make a copy of the cred so that we can reference count it */
+ new = kzalloc(sizeof(*gss_cred), GFP_NOIO);
+ if (new) {
+ struct auth_cred acred = {
+ .uid = gss_cred->gc_base.cr_uid,
+ };
+ struct gss_cl_ctx *ctx =
+ rcu_dereference_protected(gss_cred->gc_ctx, 1);
+
+ rpcauth_init_cred(&new->gc_base, &acred,
+ &gss_auth->rpc_auth,
+ &gss_nullops);
+ new->gc_base.cr_flags = 1UL << RPCAUTH_CRED_UPTODATE;
+ new->gc_service = gss_cred->gc_service;
+ new->gc_principal = gss_cred->gc_principal;
+ kref_get(&gss_auth->kref);
+ rcu_assign_pointer(new->gc_ctx, ctx);
+ gss_get_ctx(ctx);
+ }
+ return new;
+}
+
/*
- * gss_destroying_context will cause the RPCSEC_GSS to send a NULL RPC call
+ * gss_send_destroy_context will cause the RPCSEC_GSS to send a NULL RPC call
* to the server with the GSS control procedure field set to
* RPC_GSS_PROC_DESTROY. This should normally cause the server to release
* all RPCSEC_GSS state associated with that context.
*/
-static int
-gss_destroying_context(struct rpc_cred *cred)
+static void
+gss_send_destroy_context(struct rpc_cred *cred)
{
struct gss_cred *gss_cred = container_of(cred, struct gss_cred, gc_base);
struct gss_auth *gss_auth = container_of(cred->cr_auth, struct gss_auth, rpc_auth);
struct gss_cl_ctx *ctx = rcu_dereference_protected(gss_cred->gc_ctx, 1);
+ struct gss_cred *new;
struct rpc_task *task;
- if (test_bit(RPCAUTH_CRED_UPTODATE, &cred->cr_flags) == 0)
- return 0;
+ new = gss_dup_cred(gss_auth, gss_cred);
+ if (new) {
+ ctx->gc_proc = RPC_GSS_PROC_DESTROY;
- ctx->gc_proc = RPC_GSS_PROC_DESTROY;
- cred->cr_ops = &gss_nullops;
+ task = rpc_call_null(gss_auth->client, &new->gc_base,
+ RPC_TASK_ASYNC|RPC_TASK_SOFT);
+ if (!IS_ERR(task))
+ rpc_put_task(task);
- /* Take a reference to ensure the cred will be destroyed either
- * by the RPC call or by the put_rpccred() below */
- get_rpccred(cred);
-
- task = rpc_call_null(gss_auth->client, cred, RPC_TASK_ASYNC|RPC_TASK_SOFT);
- if (!IS_ERR(task))
- rpc_put_task(task);
-
- put_rpccred(cred);
- return 1;
+ put_rpccred(&new->gc_base);
+ }
}
/* gss_destroy_cred (and gss_free_ctx) are used to clean up after failure
@@ -1330,8 +1353,8 @@ static void
gss_destroy_cred(struct rpc_cred *cred)
{
- if (gss_destroying_context(cred))
- return;
+ if (test_and_clear_bit(RPCAUTH_CRED_UPTODATE, &cred->cr_flags) != 0)
+ gss_send_destroy_context(cred);
gss_destroy_nullcred(cred);
}
@@ -1768,6 +1791,7 @@ priv_release_snd_buf(struct rpc_rqst *rqstp)
for (i=0; i < rqstp->rq_enc_pages_num; i++)
__free_page(rqstp->rq_enc_pages[i]);
kfree(rqstp->rq_enc_pages);
+ rqstp->rq_release_snd_buf = NULL;
}
static int
@@ -1776,6 +1800,9 @@ alloc_enc_pages(struct rpc_rqst *rqstp)
struct xdr_buf *snd_buf = &rqstp->rq_snd_buf;
int first, last, i;
+ if (rqstp->rq_release_snd_buf)
+ rqstp->rq_release_snd_buf(rqstp);
+
if (snd_buf->page_len == 0) {
rqstp->rq_enc_pages_num = 0;
return 0;
diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
index ae3b8145da35..c6782aa47525 100644
--- a/net/sunrpc/clnt.c
+++ b/net/sunrpc/clnt.c
@@ -1915,6 +1915,13 @@ call_connect_status(struct rpc_task *task)
struct rpc_clnt *clnt = task->tk_client;
int status = task->tk_status;
+ /* Check if the task was already transmitted */
+ if (!test_bit(RPC_TASK_NEED_XMIT, &task->tk_runstate)) {
+ xprt_end_transmit(task);
+ task->tk_action = call_transmit_status;
+ return;
+ }
+
dprint_status(task);
trace_rpc_connect_status(task);
@@ -2302,6 +2309,7 @@ out_retry:
task->tk_status = 0;
/* Note: rpc_verify_header() may have freed the RPC slot */
if (task->tk_rqstp == req) {
+ xdr_free_bvec(&req->rq_rcv_buf);
req->rq_reply_bytes_recvd = req->rq_rcv_buf.len = 0;
if (task->tk_client->cl_discrtry)
xprt_conditional_disconnect(req->rq_xprt,
diff --git a/net/sunrpc/socklib.c b/net/sunrpc/socklib.c
index 9062967575c4..7e55cfc69697 100644
--- a/net/sunrpc/socklib.c
+++ b/net/sunrpc/socklib.c
@@ -175,7 +175,7 @@ int csum_partial_copy_to_xdr(struct xdr_buf *xdr, struct sk_buff *skb)
return -1;
if (unlikely(skb->ip_summed == CHECKSUM_COMPLETE) &&
!skb->csum_complete_sw)
- netdev_rx_csum_fault(skb->dev);
+ netdev_rx_csum_fault(skb->dev, skb);
return 0;
no_checksum:
if (xdr_partial_copy_from_skb(xdr, 0, &desc, xdr_skb_read_bits) < 0)
diff --git a/net/sunrpc/xdr.c b/net/sunrpc/xdr.c
index 2bbb8d38d2bf..f302c6eb8779 100644
--- a/net/sunrpc/xdr.c
+++ b/net/sunrpc/xdr.c
@@ -546,7 +546,7 @@ EXPORT_SYMBOL_GPL(xdr_commit_encode);
static __be32 *xdr_get_next_encode_buffer(struct xdr_stream *xdr,
size_t nbytes)
{
- static __be32 *p;
+ __be32 *p;
int space_left;
int frag1bytes, frag2bytes;
@@ -673,11 +673,10 @@ void xdr_truncate_encode(struct xdr_stream *xdr, size_t len)
WARN_ON_ONCE(xdr->iov);
return;
}
- if (fraglen) {
+ if (fraglen)
xdr->end = head->iov_base + head->iov_len;
- xdr->page_ptr--;
- }
/* (otherwise assume xdr->end is already set) */
+ xdr->page_ptr--;
head->iov_len = len;
buf->len = len;
xdr->p = head->iov_base + head->iov_len;
diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c
index 86bea4520c4d..ce927002862a 100644
--- a/net/sunrpc/xprt.c
+++ b/net/sunrpc/xprt.c
@@ -826,8 +826,15 @@ void xprt_connect(struct rpc_task *task)
return;
if (xprt_test_and_set_connecting(xprt))
return;
- xprt->stat.connect_start = jiffies;
- xprt->ops->connect(xprt, task);
+ /* Race breaker */
+ if (!xprt_connected(xprt)) {
+ xprt->stat.connect_start = jiffies;
+ xprt->ops->connect(xprt, task);
+ } else {
+ xprt_clear_connecting(xprt);
+ task->tk_status = 0;
+ rpc_wake_up_queued_task(&xprt->pending, task);
+ }
}
xprt_release_write(xprt, task);
}
@@ -1623,6 +1630,8 @@ xprt_request_init(struct rpc_task *task)
req->rq_snd_buf.buflen = 0;
req->rq_rcv_buf.len = 0;
req->rq_rcv_buf.buflen = 0;
+ req->rq_snd_buf.bvec = NULL;
+ req->rq_rcv_buf.bvec = NULL;
req->rq_release_snd_buf = NULL;
xprt_reset_majortimeo(req);
dprintk("RPC: %5u reserved req %p xid %08x\n", task->tk_pid,
diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index ae77c71c1f64..8a5e823e0b33 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -330,18 +330,16 @@ xs_alloc_sparse_pages(struct xdr_buf *buf, size_t want, gfp_t gfp)
{
size_t i,n;
- if (!(buf->flags & XDRBUF_SPARSE_PAGES))
+ if (!want || !(buf->flags & XDRBUF_SPARSE_PAGES))
return want;
- if (want > buf->page_len)
- want = buf->page_len;
n = (buf->page_base + want + PAGE_SIZE - 1) >> PAGE_SHIFT;
for (i = 0; i < n; i++) {
if (buf->pages[i])
continue;
buf->bvec[i].bv_page = buf->pages[i] = alloc_page(gfp);
if (!buf->pages[i]) {
- buf->page_len = (i * PAGE_SIZE) - buf->page_base;
- return buf->page_len;
+ i *= PAGE_SIZE;
+ return i > buf->page_base ? i - buf->page_base : 0;
}
}
return want;
@@ -378,8 +376,8 @@ static ssize_t
xs_read_discard(struct socket *sock, struct msghdr *msg, int flags,
size_t count)
{
- struct kvec kvec = { 0 };
- return xs_read_kvec(sock, msg, flags | MSG_TRUNC, &kvec, count, 0);
+ iov_iter_discard(&msg->msg_iter, READ, count);
+ return sock_recvmsg(sock, msg, flags);
}
static ssize_t
@@ -398,16 +396,17 @@ xs_read_xdr_buf(struct socket *sock, struct msghdr *msg, int flags,
if (offset == count || msg->msg_flags & (MSG_EOR|MSG_TRUNC))
goto out;
if (ret != want)
- goto eagain;
+ goto out;
seek = 0;
} else {
seek -= buf->head[0].iov_len;
offset += buf->head[0].iov_len;
}
- if (seek < buf->page_len) {
- want = xs_alloc_sparse_pages(buf,
- min_t(size_t, count - offset, buf->page_len),
- GFP_NOWAIT);
+
+ want = xs_alloc_sparse_pages(buf,
+ min_t(size_t, count - offset, buf->page_len),
+ GFP_NOWAIT);
+ if (seek < want) {
ret = xs_read_bvec(sock, msg, flags, buf->bvec,
xdr_buf_pagecount(buf),
want + buf->page_base,
@@ -418,12 +417,13 @@ xs_read_xdr_buf(struct socket *sock, struct msghdr *msg, int flags,
if (offset == count || msg->msg_flags & (MSG_EOR|MSG_TRUNC))
goto out;
if (ret != want)
- goto eagain;
+ goto out;
seek = 0;
} else {
- seek -= buf->page_len;
- offset += buf->page_len;
+ seek -= want;
+ offset += want;
}
+
if (seek < buf->tail[0].iov_len) {
want = min_t(size_t, count - offset, buf->tail[0].iov_len);
ret = xs_read_kvec(sock, msg, flags, &buf->tail[0], want, seek);
@@ -433,17 +433,13 @@ xs_read_xdr_buf(struct socket *sock, struct msghdr *msg, int flags,
if (offset == count || msg->msg_flags & (MSG_EOR|MSG_TRUNC))
goto out;
if (ret != want)
- goto eagain;
+ goto out;
} else
offset += buf->tail[0].iov_len;
ret = -EMSGSIZE;
- msg->msg_flags |= MSG_TRUNC;
out:
*read = offset - seek_init;
return ret;
-eagain:
- ret = -EAGAIN;
- goto out;
sock_err:
offset += seek;
goto out;
@@ -486,19 +482,20 @@ xs_read_stream_request(struct sock_xprt *transport, struct msghdr *msg,
if (transport->recv.offset == transport->recv.len) {
if (xs_read_stream_request_done(transport))
msg->msg_flags |= MSG_EOR;
- return transport->recv.copied;
+ return read;
}
switch (ret) {
+ default:
+ break;
+ case -EFAULT:
case -EMSGSIZE:
- return transport->recv.copied;
+ msg->msg_flags |= MSG_TRUNC;
+ return read;
case 0:
return -ESHUTDOWN;
- default:
- if (ret < 0)
- return ret;
}
- return -EAGAIN;
+ return ret < 0 ? ret : read;
}
static size_t
@@ -537,7 +534,7 @@ xs_read_stream_call(struct sock_xprt *transport, struct msghdr *msg, int flags)
ret = xs_read_stream_request(transport, msg, flags, req);
if (msg->msg_flags & (MSG_EOR|MSG_TRUNC))
- xprt_complete_bc_request(req, ret);
+ xprt_complete_bc_request(req, transport->recv.copied);
return ret;
}
@@ -570,7 +567,7 @@ xs_read_stream_reply(struct sock_xprt *transport, struct msghdr *msg, int flags)
spin_lock(&xprt->queue_lock);
if (msg->msg_flags & (MSG_EOR|MSG_TRUNC))
- xprt_complete_rqst(req->rq_task, ret);
+ xprt_complete_rqst(req->rq_task, transport->recv.copied);
xprt_unpin_rqst(req);
out:
spin_unlock(&xprt->queue_lock);
@@ -591,10 +588,8 @@ xs_read_stream(struct sock_xprt *transport, int flags)
if (ret <= 0)
goto out_err;
transport->recv.offset = ret;
- if (ret != want) {
- ret = -EAGAIN;
- goto out_err;
- }
+ if (transport->recv.offset != want)
+ return transport->recv.offset;
transport->recv.len = be32_to_cpu(transport->recv.fraghdr) &
RPC_FRAGMENT_SIZE_MASK;
transport->recv.offset -= sizeof(transport->recv.fraghdr);
@@ -602,6 +597,9 @@ xs_read_stream(struct sock_xprt *transport, int flags)
}
switch (be32_to_cpu(transport->recv.calldir)) {
+ default:
+ msg.msg_flags |= MSG_TRUNC;
+ break;
case RPC_CALL:
ret = xs_read_stream_call(transport, &msg, flags);
break;
@@ -616,6 +614,9 @@ xs_read_stream(struct sock_xprt *transport, int flags)
goto out_err;
read += ret;
if (transport->recv.offset < transport->recv.len) {
+ if (!(msg.msg_flags & MSG_TRUNC))
+ return read;
+ msg.msg_flags = 0;
ret = xs_read_discard(transport->sock, &msg, flags,
transport->recv.len - transport->recv.offset);
if (ret <= 0)
@@ -623,7 +624,7 @@ xs_read_stream(struct sock_xprt *transport, int flags)
transport->recv.offset += ret;
read += ret;
if (transport->recv.offset != transport->recv.len)
- return -EAGAIN;
+ return read;
}
if (xs_read_stream_request_done(transport)) {
trace_xs_stream_read_request(transport);
@@ -633,13 +634,7 @@ xs_read_stream(struct sock_xprt *transport, int flags)
transport->recv.len = 0;
return read;
out_err:
- switch (ret) {
- case 0:
- case -ESHUTDOWN:
- xprt_force_disconnect(&transport->xprt);
- return -ESHUTDOWN;
- }
- return ret;
+ return ret != 0 ? ret : -ESHUTDOWN;
}
static void xs_stream_data_receive(struct sock_xprt *transport)
@@ -648,12 +643,12 @@ static void xs_stream_data_receive(struct sock_xprt *transport)
ssize_t ret = 0;
mutex_lock(&transport->recv_mutex);
+ clear_bit(XPRT_SOCK_DATA_READY, &transport->sock_state);
if (transport->sock == NULL)
goto out;
- clear_bit(XPRT_SOCK_DATA_READY, &transport->sock_state);
for (;;) {
ret = xs_read_stream(transport, MSG_DONTWAIT);
- if (ret <= 0)
+ if (ret < 0)
break;
read += ret;
cond_resched();
@@ -1345,10 +1340,10 @@ static void xs_udp_data_receive(struct sock_xprt *transport)
int err;
mutex_lock(&transport->recv_mutex);
+ clear_bit(XPRT_SOCK_DATA_READY, &transport->sock_state);
sk = transport->inet;
if (sk == NULL)
goto out;
- clear_bit(XPRT_SOCK_DATA_READY, &transport->sock_state);
for (;;) {
skb = skb_recv_udp(sk, 0, 1, &err);
if (skb == NULL)
diff --git a/net/switchdev/switchdev.c b/net/switchdev/switchdev.c
index 74b9d916a58b..5df9d1138ac9 100644
--- a/net/switchdev/switchdev.c
+++ b/net/switchdev/switchdev.c
@@ -353,34 +353,35 @@ static size_t switchdev_obj_size(const struct switchdev_obj *obj)
return 0;
}
-static int __switchdev_port_obj_add(struct net_device *dev,
- const struct switchdev_obj *obj,
- struct switchdev_trans *trans)
+static int switchdev_port_obj_notify(enum switchdev_notifier_type nt,
+ struct net_device *dev,
+ const struct switchdev_obj *obj,
+ struct switchdev_trans *trans,
+ struct netlink_ext_ack *extack)
{
- const struct switchdev_ops *ops = dev->switchdev_ops;
- struct net_device *lower_dev;
- struct list_head *iter;
- int err = -EOPNOTSUPP;
-
- if (ops && ops->switchdev_port_obj_add)
- return ops->switchdev_port_obj_add(dev, obj, trans);
+ int rc;
+ int err;
- /* Switch device port(s) may be stacked under
- * bond/team/vlan dev, so recurse down to add object on
- * each port.
- */
+ struct switchdev_notifier_port_obj_info obj_info = {
+ .obj = obj,
+ .trans = trans,
+ .handled = false,
+ };
- netdev_for_each_lower_dev(dev, lower_dev, iter) {
- err = __switchdev_port_obj_add(lower_dev, obj, trans);
- if (err)
- break;
+ rc = call_switchdev_blocking_notifiers(nt, dev, &obj_info.info, extack);
+ err = notifier_to_errno(rc);
+ if (err) {
+ WARN_ON(!obj_info.handled);
+ return err;
}
-
- return err;
+ if (!obj_info.handled)
+ return -EOPNOTSUPP;
+ return 0;
}
static int switchdev_port_obj_add_now(struct net_device *dev,
- const struct switchdev_obj *obj)
+ const struct switchdev_obj *obj,
+ struct netlink_ext_ack *extack)
{
struct switchdev_trans trans;
int err;
@@ -397,7 +398,8 @@ static int switchdev_port_obj_add_now(struct net_device *dev,
*/
trans.ph_prepare = true;
- err = __switchdev_port_obj_add(dev, obj, &trans);
+ err = switchdev_port_obj_notify(SWITCHDEV_PORT_OBJ_ADD,
+ dev, obj, &trans, extack);
if (err) {
/* Prepare phase failed: abort the transaction. Any
* resources reserved in the prepare phase are
@@ -416,7 +418,8 @@ static int switchdev_port_obj_add_now(struct net_device *dev,
*/
trans.ph_prepare = false;
- err = __switchdev_port_obj_add(dev, obj, &trans);
+ err = switchdev_port_obj_notify(SWITCHDEV_PORT_OBJ_ADD,
+ dev, obj, &trans, extack);
WARN(err, "%s: Commit of object (id=%d) failed.\n", dev->name, obj->id);
switchdev_trans_items_warn_destroy(dev, &trans);
@@ -429,7 +432,7 @@ static void switchdev_port_obj_add_deferred(struct net_device *dev,
const struct switchdev_obj *obj = data;
int err;
- err = switchdev_port_obj_add_now(dev, obj);
+ err = switchdev_port_obj_add_now(dev, obj, NULL);
if (err && err != -EOPNOTSUPP)
netdev_err(dev, "failed (err=%d) to add object (id=%d)\n",
err, obj->id);
@@ -459,38 +462,21 @@ static int switchdev_port_obj_add_defer(struct net_device *dev,
* in case SWITCHDEV_F_DEFER flag is not set.
*/
int switchdev_port_obj_add(struct net_device *dev,
- const struct switchdev_obj *obj)
+ const struct switchdev_obj *obj,
+ struct netlink_ext_ack *extack)
{
if (obj->flags & SWITCHDEV_F_DEFER)
return switchdev_port_obj_add_defer(dev, obj);
ASSERT_RTNL();
- return switchdev_port_obj_add_now(dev, obj);
+ return switchdev_port_obj_add_now(dev, obj, extack);
}
EXPORT_SYMBOL_GPL(switchdev_port_obj_add);
static int switchdev_port_obj_del_now(struct net_device *dev,
const struct switchdev_obj *obj)
{
- const struct switchdev_ops *ops = dev->switchdev_ops;
- struct net_device *lower_dev;
- struct list_head *iter;
- int err = -EOPNOTSUPP;
-
- if (ops && ops->switchdev_port_obj_del)
- return ops->switchdev_port_obj_del(dev, obj);
-
- /* Switch device port(s) may be stacked under
- * bond/team/vlan dev, so recurse down to delete object on
- * each port.
- */
-
- netdev_for_each_lower_dev(dev, lower_dev, iter) {
- err = switchdev_port_obj_del_now(lower_dev, obj);
- if (err)
- break;
- }
-
- return err;
+ return switchdev_port_obj_notify(SWITCHDEV_PORT_OBJ_DEL,
+ dev, obj, NULL, NULL);
}
static void switchdev_port_obj_del_deferred(struct net_device *dev,
@@ -535,6 +521,7 @@ int switchdev_port_obj_del(struct net_device *dev,
EXPORT_SYMBOL_GPL(switchdev_port_obj_del);
static ATOMIC_NOTIFIER_HEAD(switchdev_notif_chain);
+static BLOCKING_NOTIFIER_HEAD(switchdev_blocking_notif_chain);
/**
* register_switchdev_notifier - Register notifier
@@ -572,10 +559,38 @@ int call_switchdev_notifiers(unsigned long val, struct net_device *dev,
struct switchdev_notifier_info *info)
{
info->dev = dev;
+ info->extack = NULL;
return atomic_notifier_call_chain(&switchdev_notif_chain, val, info);
}
EXPORT_SYMBOL_GPL(call_switchdev_notifiers);
+int register_switchdev_blocking_notifier(struct notifier_block *nb)
+{
+ struct blocking_notifier_head *chain = &switchdev_blocking_notif_chain;
+
+ return blocking_notifier_chain_register(chain, nb);
+}
+EXPORT_SYMBOL_GPL(register_switchdev_blocking_notifier);
+
+int unregister_switchdev_blocking_notifier(struct notifier_block *nb)
+{
+ struct blocking_notifier_head *chain = &switchdev_blocking_notif_chain;
+
+ return blocking_notifier_chain_unregister(chain, nb);
+}
+EXPORT_SYMBOL_GPL(unregister_switchdev_blocking_notifier);
+
+int call_switchdev_blocking_notifiers(unsigned long val, struct net_device *dev,
+ struct switchdev_notifier_info *info,
+ struct netlink_ext_ack *extack)
+{
+ info->dev = dev;
+ info->extack = extack;
+ return blocking_notifier_call_chain(&switchdev_blocking_notif_chain,
+ val, info);
+}
+EXPORT_SYMBOL_GPL(call_switchdev_blocking_notifiers);
+
bool switchdev_port_same_parent_id(struct net_device *a,
struct net_device *b)
{
@@ -595,3 +610,109 @@ bool switchdev_port_same_parent_id(struct net_device *a,
return netdev_phys_item_id_same(&a_attr.u.ppid, &b_attr.u.ppid);
}
EXPORT_SYMBOL_GPL(switchdev_port_same_parent_id);
+
+static int __switchdev_handle_port_obj_add(struct net_device *dev,
+ struct switchdev_notifier_port_obj_info *port_obj_info,
+ bool (*check_cb)(const struct net_device *dev),
+ int (*add_cb)(struct net_device *dev,
+ const struct switchdev_obj *obj,
+ struct switchdev_trans *trans,
+ struct netlink_ext_ack *extack))
+{
+ struct netlink_ext_ack *extack;
+ struct net_device *lower_dev;
+ struct list_head *iter;
+ int err = -EOPNOTSUPP;
+
+ extack = switchdev_notifier_info_to_extack(&port_obj_info->info);
+
+ if (check_cb(dev)) {
+ /* This flag is only checked if the return value is success. */
+ port_obj_info->handled = true;
+ return add_cb(dev, port_obj_info->obj, port_obj_info->trans,
+ extack);
+ }
+
+ /* Switch ports might be stacked under e.g. a LAG. Ignore the
+ * unsupported devices, another driver might be able to handle them. But
+ * propagate to the callers any hard errors.
+ *
+ * If the driver does its own bookkeeping of stacked ports, it's not
+ * necessary to go through this helper.
+ */
+ netdev_for_each_lower_dev(dev, lower_dev, iter) {
+ err = __switchdev_handle_port_obj_add(lower_dev, port_obj_info,
+ check_cb, add_cb);
+ if (err && err != -EOPNOTSUPP)
+ return err;
+ }
+
+ return err;
+}
+
+int switchdev_handle_port_obj_add(struct net_device *dev,
+ struct switchdev_notifier_port_obj_info *port_obj_info,
+ bool (*check_cb)(const struct net_device *dev),
+ int (*add_cb)(struct net_device *dev,
+ const struct switchdev_obj *obj,
+ struct switchdev_trans *trans,
+ struct netlink_ext_ack *extack))
+{
+ int err;
+
+ err = __switchdev_handle_port_obj_add(dev, port_obj_info, check_cb,
+ add_cb);
+ if (err == -EOPNOTSUPP)
+ err = 0;
+ return err;
+}
+EXPORT_SYMBOL_GPL(switchdev_handle_port_obj_add);
+
+static int __switchdev_handle_port_obj_del(struct net_device *dev,
+ struct switchdev_notifier_port_obj_info *port_obj_info,
+ bool (*check_cb)(const struct net_device *dev),
+ int (*del_cb)(struct net_device *dev,
+ const struct switchdev_obj *obj))
+{
+ struct net_device *lower_dev;
+ struct list_head *iter;
+ int err = -EOPNOTSUPP;
+
+ if (check_cb(dev)) {
+ /* This flag is only checked if the return value is success. */
+ port_obj_info->handled = true;
+ return del_cb(dev, port_obj_info->obj);
+ }
+
+ /* Switch ports might be stacked under e.g. a LAG. Ignore the
+ * unsupported devices, another driver might be able to handle them. But
+ * propagate to the callers any hard errors.
+ *
+ * If the driver does its own bookkeeping of stacked ports, it's not
+ * necessary to go through this helper.
+ */
+ netdev_for_each_lower_dev(dev, lower_dev, iter) {
+ err = __switchdev_handle_port_obj_del(lower_dev, port_obj_info,
+ check_cb, del_cb);
+ if (err && err != -EOPNOTSUPP)
+ return err;
+ }
+
+ return err;
+}
+
+int switchdev_handle_port_obj_del(struct net_device *dev,
+ struct switchdev_notifier_port_obj_info *port_obj_info,
+ bool (*check_cb)(const struct net_device *dev),
+ int (*del_cb)(struct net_device *dev,
+ const struct switchdev_obj *obj))
+{
+ int err;
+
+ err = __switchdev_handle_port_obj_del(dev, port_obj_info, check_cb,
+ del_cb);
+ if (err == -EOPNOTSUPP)
+ err = 0;
+ return err;
+}
+EXPORT_SYMBOL_GPL(switchdev_handle_port_obj_del);
diff --git a/net/tipc/discover.c b/net/tipc/discover.c
index 2830709957bd..c138d68e8a69 100644
--- a/net/tipc/discover.c
+++ b/net/tipc/discover.c
@@ -166,7 +166,8 @@ static bool tipc_disc_addr_trial_msg(struct tipc_discoverer *d,
/* Apply trial address if we just left trial period */
if (!trial && !self) {
- tipc_net_finalize(net, tn->trial_addr);
+ tipc_sched_net_finalize(net, tn->trial_addr);
+ msg_set_prevnode(buf_msg(d->skb), tn->trial_addr);
msg_set_type(buf_msg(d->skb), DSC_REQ_MSG);
}
@@ -300,14 +301,12 @@ static void tipc_disc_timeout(struct timer_list *t)
goto exit;
}
- /* Trial period over ? */
- if (!time_before(jiffies, tn->addr_trial_end)) {
- /* Did we just leave it ? */
- if (!tipc_own_addr(net))
- tipc_net_finalize(net, tn->trial_addr);
-
- msg_set_type(buf_msg(d->skb), DSC_REQ_MSG);
- msg_set_prevnode(buf_msg(d->skb), tipc_own_addr(net));
+ /* Did we just leave trial period ? */
+ if (!time_before(jiffies, tn->addr_trial_end) && !tipc_own_addr(net)) {
+ mod_timer(&d->timer, jiffies + TIPC_DISC_INIT);
+ spin_unlock_bh(&d->lock);
+ tipc_sched_net_finalize(net, tn->trial_addr);
+ return;
}
/* Adjust timeout interval according to discovery phase */
@@ -319,6 +318,8 @@ static void tipc_disc_timeout(struct timer_list *t)
d->timer_intv = TIPC_DISC_SLOW;
else if (!d->num_nodes && d->timer_intv > TIPC_DISC_FAST)
d->timer_intv = TIPC_DISC_FAST;
+ msg_set_type(buf_msg(d->skb), DSC_REQ_MSG);
+ msg_set_prevnode(buf_msg(d->skb), tn->trial_addr);
}
mod_timer(&d->timer, jiffies + d->timer_intv);
diff --git a/net/tipc/link.c b/net/tipc/link.c
index 201c3b5bc96b..9e265eb89726 100644
--- a/net/tipc/link.c
+++ b/net/tipc/link.c
@@ -105,7 +105,7 @@ struct tipc_stats {
* @transmitq: queue for sent, non-acked messages
* @backlogq: queue for messages waiting to be sent
* @snt_nxt: next sequence number to use for outbound messages
- * @last_retransmitted: sequence number of most recently retransmitted message
+ * @prev_from: sequence number of most previous retransmission request
* @stale_cnt: counter for number of identical retransmit attempts
* @stale_limit: time when repeated identical retransmits must force link reset
* @ackers: # of peers that needs to ack each packet before it can be released
@@ -163,7 +163,7 @@ struct tipc_link {
u16 limit;
} backlog[5];
u16 snd_nxt;
- u16 last_retransm;
+ u16 prev_from;
u16 window;
u16 stale_cnt;
unsigned long stale_limit;
@@ -186,9 +186,6 @@ struct tipc_link {
u16 acked;
struct tipc_link *bc_rcvlink;
struct tipc_link *bc_sndlink;
- unsigned long prev_retr;
- u16 prev_from;
- u16 prev_to;
u8 nack_state;
bool bc_peer_is_up;
@@ -210,7 +207,7 @@ enum {
BC_NACK_SND_SUPPRESS,
};
-#define TIPC_BC_RETR_LIMIT 10 /* [ms] */
+#define TIPC_BC_RETR_LIM msecs_to_jiffies(10) /* [ms] */
/*
* Interval between NACKs when packets arrive out of order
@@ -1036,10 +1033,12 @@ static int tipc_link_retrans(struct tipc_link *l, struct tipc_link *r,
if (!skb)
return 0;
+ if (less(to, from))
+ return 0;
/* Detect repeated retransmit failures on same packet */
- if (r->last_retransm != buf_seqno(skb)) {
- r->last_retransm = buf_seqno(skb);
+ if (r->prev_from != from) {
+ r->prev_from = from;
r->stale_limit = jiffies + msecs_to_jiffies(r->tolerance);
r->stale_cnt = 0;
} else if (++r->stale_cnt > 99 && time_after(jiffies, r->stale_limit)) {
@@ -1055,6 +1054,11 @@ static int tipc_link_retrans(struct tipc_link *l, struct tipc_link *r,
continue;
if (more(msg_seqno(hdr), to))
break;
+ if (link_is_bc_sndlink(l)) {
+ if (time_before(jiffies, TIPC_SKB_CB(skb)->nxt_retr))
+ continue;
+ TIPC_SKB_CB(skb)->nxt_retr = jiffies + TIPC_BC_RETR_LIM;
+ }
_skb = __pskb_copy(skb, MIN_H_SIZE, GFP_ATOMIC);
if (!_skb)
return 0;
@@ -1594,14 +1598,17 @@ static int tipc_link_proto_rcv(struct tipc_link *l, struct sk_buff *skb,
if (in_range(peers_prio, l->priority + 1, TIPC_MAX_LINK_PRI))
l->priority = peers_prio;
- /* ACTIVATE_MSG serves as PEER_RESET if link is already down */
- if (msg_peer_stopping(hdr))
+ /* If peer is going down we want full re-establish cycle */
+ if (msg_peer_stopping(hdr)) {
rc = tipc_link_fsm_evt(l, LINK_FAILURE_EVT);
- else if ((mtyp == RESET_MSG) || !link_is_up(l))
+ break;
+ }
+ /* ACTIVATE_MSG serves as PEER_RESET if link is already down */
+ if (mtyp == RESET_MSG || !link_is_up(l))
rc = tipc_link_fsm_evt(l, LINK_PEER_RESET_EVT);
/* ACTIVATE_MSG takes up link if it was already locally reset */
- if ((mtyp == ACTIVATE_MSG) && (l->state == LINK_ESTABLISHING))
+ if (mtyp == ACTIVATE_MSG && l->state == LINK_ESTABLISHING)
rc = TIPC_LINK_UP_EVT;
l->peer_session = msg_session(hdr);
@@ -1734,42 +1741,6 @@ void tipc_link_bc_init_rcv(struct tipc_link *l, struct tipc_msg *hdr)
l->rcv_nxt = peers_snd_nxt;
}
-/* link_bc_retr eval()- check if the indicated range can be retransmitted now
- * - Adjust permitted range if there is overlap with previous retransmission
- */
-static bool link_bc_retr_eval(struct tipc_link *l, u16 *from, u16 *to)
-{
- unsigned long elapsed = jiffies_to_msecs(jiffies - l->prev_retr);
-
- if (less(*to, *from))
- return false;
-
- /* New retransmission request */
- if ((elapsed > TIPC_BC_RETR_LIMIT) ||
- less(*to, l->prev_from) || more(*from, l->prev_to)) {
- l->prev_from = *from;
- l->prev_to = *to;
- l->prev_retr = jiffies;
- return true;
- }
-
- /* Inside range of previous retransmit */
- if (!less(*from, l->prev_from) && !more(*to, l->prev_to))
- return false;
-
- /* Fully or partially outside previous range => exclude overlap */
- if (less(*from, l->prev_from)) {
- *to = l->prev_from - 1;
- l->prev_from = *from;
- }
- if (more(*to, l->prev_to)) {
- *from = l->prev_to + 1;
- l->prev_to = *to;
- }
- l->prev_retr = jiffies;
- return true;
-}
-
/* tipc_link_bc_sync_rcv - update rcv link according to peer's send state
*/
int tipc_link_bc_sync_rcv(struct tipc_link *l, struct tipc_msg *hdr,
@@ -1800,8 +1771,7 @@ int tipc_link_bc_sync_rcv(struct tipc_link *l, struct tipc_msg *hdr,
if (more(peers_snd_nxt, l->rcv_nxt + l->window))
return rc;
- if (link_bc_retr_eval(snd_l, &from, &to))
- rc = tipc_link_retrans(snd_l, l, from, to, xmitq);
+ rc = tipc_link_retrans(snd_l, l, from, to, xmitq);
l->snd_nxt = peers_snd_nxt;
if (link_bc_rcv_gap(l))
diff --git a/net/tipc/msg.h b/net/tipc/msg.h
index a2879e6ec5b6..a0924956bb61 100644
--- a/net/tipc/msg.h
+++ b/net/tipc/msg.h
@@ -105,6 +105,7 @@ struct tipc_skb_cb {
u32 bytes_read;
u32 orig_member;
struct sk_buff *tail;
+ unsigned long nxt_retr;
bool validated;
u16 chain_imp;
u16 ackers;
diff --git a/net/tipc/net.c b/net/tipc/net.c
index 62199cf5a56c..f076edb74338 100644
--- a/net/tipc/net.c
+++ b/net/tipc/net.c
@@ -104,6 +104,14 @@
* - A local spin_lock protecting the queue of subscriber events.
*/
+struct tipc_net_work {
+ struct work_struct work;
+ struct net *net;
+ u32 addr;
+};
+
+static void tipc_net_finalize(struct net *net, u32 addr);
+
int tipc_net_init(struct net *net, u8 *node_id, u32 addr)
{
if (tipc_own_id(net)) {
@@ -119,17 +127,38 @@ int tipc_net_init(struct net *net, u8 *node_id, u32 addr)
return 0;
}
-void tipc_net_finalize(struct net *net, u32 addr)
+static void tipc_net_finalize(struct net *net, u32 addr)
{
struct tipc_net *tn = tipc_net(net);
- if (!cmpxchg(&tn->node_addr, 0, addr)) {
- tipc_set_node_addr(net, addr);
- tipc_named_reinit(net);
- tipc_sk_reinit(net);
- tipc_nametbl_publish(net, TIPC_CFG_SRV, addr, addr,
- TIPC_CLUSTER_SCOPE, 0, addr);
- }
+ if (cmpxchg(&tn->node_addr, 0, addr))
+ return;
+ tipc_set_node_addr(net, addr);
+ tipc_named_reinit(net);
+ tipc_sk_reinit(net);
+ tipc_nametbl_publish(net, TIPC_CFG_SRV, addr, addr,
+ TIPC_CLUSTER_SCOPE, 0, addr);
+}
+
+static void tipc_net_finalize_work(struct work_struct *work)
+{
+ struct tipc_net_work *fwork;
+
+ fwork = container_of(work, struct tipc_net_work, work);
+ tipc_net_finalize(fwork->net, fwork->addr);
+ kfree(fwork);
+}
+
+void tipc_sched_net_finalize(struct net *net, u32 addr)
+{
+ struct tipc_net_work *fwork = kzalloc(sizeof(*fwork), GFP_ATOMIC);
+
+ if (!fwork)
+ return;
+ INIT_WORK(&fwork->work, tipc_net_finalize_work);
+ fwork->net = net;
+ fwork->addr = addr;
+ schedule_work(&fwork->work);
}
void tipc_net_stop(struct net *net)
diff --git a/net/tipc/net.h b/net/tipc/net.h
index 09ad02b50bb1..b7f2e364eb99 100644
--- a/net/tipc/net.h
+++ b/net/tipc/net.h
@@ -42,7 +42,7 @@
extern const struct nla_policy tipc_nl_net_policy[];
int tipc_net_init(struct net *net, u8 *node_id, u32 addr);
-void tipc_net_finalize(struct net *net, u32 addr);
+void tipc_sched_net_finalize(struct net *net, u32 addr);
void tipc_net_stop(struct net *net);
int tipc_nl_net_dump(struct sk_buff *skb, struct netlink_callback *cb);
int tipc_nl_net_set(struct sk_buff *skb, struct genl_info *info);
diff --git a/net/tipc/node.c b/net/tipc/node.c
index 2afc4f8c37a7..32556f480a60 100644
--- a/net/tipc/node.c
+++ b/net/tipc/node.c
@@ -584,12 +584,15 @@ static void tipc_node_clear_links(struct tipc_node *node)
/* tipc_node_cleanup - delete nodes that does not
* have active links for NODE_CLEANUP_AFTER time
*/
-static int tipc_node_cleanup(struct tipc_node *peer)
+static bool tipc_node_cleanup(struct tipc_node *peer)
{
struct tipc_net *tn = tipc_net(peer->net);
bool deleted = false;
- spin_lock_bh(&tn->node_list_lock);
+ /* If lock held by tipc_node_stop() the node will be deleted anyway */
+ if (!spin_trylock_bh(&tn->node_list_lock))
+ return false;
+
tipc_node_write_lock(peer);
if (!node_is_up(peer) && time_after(jiffies, peer->delete_at)) {
@@ -621,6 +624,12 @@ static void tipc_node_timeout(struct timer_list *t)
__skb_queue_head_init(&xmitq);
+ /* Initial node interval to value larger (10 seconds), then it will be
+ * recalculated with link lowest tolerance
+ */
+ tipc_node_read_lock(n);
+ n->keepalive_intv = 10000;
+ tipc_node_read_unlock(n);
for (bearer_id = 0; remains && (bearer_id < MAX_BEARERS); bearer_id++) {
tipc_node_read_lock(n);
le = &n->links[bearer_id];
diff --git a/net/tipc/socket.c b/net/tipc/socket.c
index 636e6131769d..b57b1be7252b 100644
--- a/net/tipc/socket.c
+++ b/net/tipc/socket.c
@@ -1555,16 +1555,17 @@ static void tipc_sk_set_orig_addr(struct msghdr *m, struct sk_buff *skb)
/**
* tipc_sk_anc_data_recv - optionally capture ancillary data for received message
* @m: descriptor for message info
- * @msg: received message header
+ * @skb: received message buffer
* @tsk: TIPC port associated with message
*
* Note: Ancillary data is not captured if not requested by receiver.
*
* Returns 0 if successful, otherwise errno
*/
-static int tipc_sk_anc_data_recv(struct msghdr *m, struct tipc_msg *msg,
+static int tipc_sk_anc_data_recv(struct msghdr *m, struct sk_buff *skb,
struct tipc_sock *tsk)
{
+ struct tipc_msg *msg;
u32 anc_data[3];
u32 err;
u32 dest_type;
@@ -1573,6 +1574,7 @@ static int tipc_sk_anc_data_recv(struct msghdr *m, struct tipc_msg *msg,
if (likely(m->msg_controllen == 0))
return 0;
+ msg = buf_msg(skb);
/* Optionally capture errored message object(s) */
err = msg ? msg_errcode(msg) : 0;
@@ -1583,6 +1585,9 @@ static int tipc_sk_anc_data_recv(struct msghdr *m, struct tipc_msg *msg,
if (res)
return res;
if (anc_data[1]) {
+ if (skb_linearize(skb))
+ return -ENOMEM;
+ msg = buf_msg(skb);
res = put_cmsg(m, SOL_TIPC, TIPC_RETDATA, anc_data[1],
msg_data(msg));
if (res)
@@ -1744,9 +1749,10 @@ static int tipc_recvmsg(struct socket *sock, struct msghdr *m,
/* Collect msg meta data, including error code and rejected data */
tipc_sk_set_orig_addr(m, skb);
- rc = tipc_sk_anc_data_recv(m, hdr, tsk);
+ rc = tipc_sk_anc_data_recv(m, skb, tsk);
if (unlikely(rc))
goto exit;
+ hdr = buf_msg(skb);
/* Capture data if non-error msg, otherwise just set return value */
if (likely(!err)) {
@@ -1856,9 +1862,10 @@ static int tipc_recvstream(struct socket *sock, struct msghdr *m,
/* Collect msg meta data, incl. error code and rejected data */
if (!copied) {
tipc_sk_set_orig_addr(m, skb);
- rc = tipc_sk_anc_data_recv(m, hdr, tsk);
+ rc = tipc_sk_anc_data_recv(m, skb, tsk);
if (rc)
break;
+ hdr = buf_msg(skb);
}
/* Copy data if msg ok, otherwise return error/partial data */
diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c
index 7b1af8b59cd2..d4ecc66464e6 100644
--- a/net/tls/tls_sw.c
+++ b/net/tls/tls_sw.c
@@ -687,6 +687,7 @@ static int bpf_exec_tx_verdict(struct sk_msg *msg, struct sock *sk,
struct sock *sk_redir;
struct tls_rec *rec;
int err = 0, send;
+ u32 delta = 0;
bool enospc;
psock = sk_psock_get(sk);
@@ -694,8 +695,14 @@ static int bpf_exec_tx_verdict(struct sk_msg *msg, struct sock *sk,
return tls_push_record(sk, flags, record_type);
more_data:
enospc = sk_msg_full(msg);
- if (psock->eval == __SK_NONE)
+ if (psock->eval == __SK_NONE) {
+ delta = msg->sg.size;
psock->eval = sk_psock_msg_verdict(sk, psock, msg);
+ if (delta < msg->sg.size)
+ delta -= msg->sg.size;
+ else
+ delta = 0;
+ }
if (msg->cork_bytes && msg->cork_bytes > msg->sg.size &&
!enospc && !full_record) {
err = -ENOSPC;
@@ -743,7 +750,7 @@ more_data:
msg->apply_bytes -= send;
if (msg->sg.size == 0)
tls_free_open_rec(sk);
- *copied -= send;
+ *copied -= (send + delta);
err = -EACCES;
}
diff --git a/net/wireless/mlme.c b/net/wireless/mlme.c
index 12b3edf70a7b..1615e503f8e3 100644
--- a/net/wireless/mlme.c
+++ b/net/wireless/mlme.c
@@ -272,11 +272,11 @@ void cfg80211_oper_and_ht_capa(struct ieee80211_ht_cap *ht_capa,
p1 = (u8*)(ht_capa);
p2 = (u8*)(ht_capa_mask);
- for (i = 0; i<sizeof(*ht_capa); i++)
+ for (i = 0; i < sizeof(*ht_capa); i++)
p1[i] &= p2[i];
}
-/* Do a logical ht_capa &= ht_capa_mask. */
+/* Do a logical vht_capa &= vht_capa_mask. */
void cfg80211_oper_and_vht_capa(struct ieee80211_vht_cap *vht_capa,
const struct ieee80211_vht_cap *vht_capa_mask)
{
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index 744b5851bbf9..8d763725498c 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -7870,6 +7870,7 @@ static int nl80211_channel_switch(struct sk_buff *skb, struct genl_info *info)
}
memset(&params, 0, sizeof(params));
+ params.beacon_csa.ftm_responder = -1;
if (!info->attrs[NL80211_ATTR_WIPHY_FREQ] ||
!info->attrs[NL80211_ATTR_CH_SWITCH_COUNT])
diff --git a/net/wireless/sme.c b/net/wireless/sme.c
index d536b07582f8..f741d8376a46 100644
--- a/net/wireless/sme.c
+++ b/net/wireless/sme.c
@@ -642,11 +642,15 @@ static bool cfg80211_is_all_idle(void)
* All devices must be idle as otherwise if you are actively
* scanning some new beacon hints could be learned and would
* count as new regulatory hints.
+ * Also if there is any other active beaconing interface we
+ * need not issue a disconnect hint and reset any info such
+ * as chan dfs state, etc.
*/
list_for_each_entry(rdev, &cfg80211_rdev_list, list) {
list_for_each_entry(wdev, &rdev->wiphy.wdev_list, list) {
wdev_lock(wdev);
- if (wdev->conn || wdev->current_bss)
+ if (wdev->conn || wdev->current_bss ||
+ cfg80211_beaconing_iface_active(wdev))
is_all_idle = false;
wdev_unlock(wdev);
}
@@ -1171,6 +1175,8 @@ int cfg80211_connect(struct cfg80211_registered_device *rdev,
cfg80211_oper_and_ht_capa(&connect->ht_capa_mask,
rdev->wiphy.ht_capa_mod_mask);
+ cfg80211_oper_and_vht_capa(&connect->vht_capa_mask,
+ rdev->wiphy.vht_capa_mod_mask);
if (connkeys && connkeys->def >= 0) {
int idx;
diff --git a/net/wireless/util.c b/net/wireless/util.c
index ef14d80ca03e..d473bd135da8 100644
--- a/net/wireless/util.c
+++ b/net/wireless/util.c
@@ -1421,6 +1421,8 @@ size_t ieee80211_ie_split_ric(const u8 *ies, size_t ielen,
ies[pos + ext],
ext == 2))
pos = skip_ie(ies, ielen, pos);
+ else
+ break;
}
} else {
pos = skip_ie(ies, ielen, pos);
diff --git a/net/x25/af_x25.c b/net/x25/af_x25.c
index d49aa79b7997..5121729b8b63 100644
--- a/net/x25/af_x25.c
+++ b/net/x25/af_x25.c
@@ -100,7 +100,7 @@ int x25_parse_address_block(struct sk_buff *skb,
}
len = *skb->data;
- needed = 1 + (len >> 4) + (len & 0x0f);
+ needed = 1 + ((len >> 4) + (len & 0x0f) + 1) / 2;
if (!pskb_may_pull(skb, needed)) {
/* packet is too short to hold the addresses it claims
@@ -288,7 +288,7 @@ static struct sock *x25_find_listener(struct x25_address *addr,
sk_for_each(s, &x25_list)
if ((!strcmp(addr->x25_addr,
x25_sk(s)->source_addr.x25_addr) ||
- !strcmp(addr->x25_addr,
+ !strcmp(x25_sk(s)->source_addr.x25_addr,
null_x25_address.x25_addr)) &&
s->sk_state == TCP_LISTEN) {
/*
@@ -688,11 +688,15 @@ static int x25_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
goto out;
}
- len = strlen(addr->sx25_addr.x25_addr);
- for (i = 0; i < len; i++) {
- if (!isdigit(addr->sx25_addr.x25_addr[i])) {
- rc = -EINVAL;
- goto out;
+ /* check for the null_x25_address */
+ if (strcmp(addr->sx25_addr.x25_addr, null_x25_address.x25_addr)) {
+
+ len = strlen(addr->sx25_addr.x25_addr);
+ for (i = 0; i < len; i++) {
+ if (!isdigit(addr->sx25_addr.x25_addr[i])) {
+ rc = -EINVAL;
+ goto out;
+ }
}
}
diff --git a/net/x25/x25_in.c b/net/x25/x25_in.c
index 3c12cae32001..afb26221d8a8 100644
--- a/net/x25/x25_in.c
+++ b/net/x25/x25_in.c
@@ -142,6 +142,15 @@ static int x25_state1_machine(struct sock *sk, struct sk_buff *skb, int frametyp
sk->sk_state_change(sk);
break;
}
+ case X25_CALL_REQUEST:
+ /* call collision */
+ x25->causediag.cause = 0x01;
+ x25->causediag.diagnostic = 0x48;
+
+ x25_write_internal(sk, X25_CLEAR_REQUEST);
+ x25_disconnect(sk, EISCONN, 0x01, 0x48);
+ break;
+
case X25_CLEAR_REQUEST:
if (!pskb_may_pull(skb, X25_STD_MIN_LEN + 2))
goto out_clear;