summaryrefslogtreecommitdiff
path: root/net
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2021-07-14 09:24:32 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2021-07-14 09:24:32 -0700
commit8096acd7442e613fad0354fc8dfdb2003cceea0b (patch)
treead8b748475fa87fe7c3b6f9cd00da8d7b8d078bd /net
parentd1d488d813703618f0dd93f0e4c4a05928114aa8 (diff)
parentbcb9928a155444dbd212473e60241ca0a7f641e1 (diff)
Merge tag 'net-5.14-rc2' of git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net
Pull networking fixes from Jakub Kicinski. "Including fixes from bpf and netfilter. Current release - regressions: - sock: fix parameter order in sock_setsockopt() Current release - new code bugs: - netfilter: nft_last: - fix incorrect arithmetic when restoring last used - honor NFTA_LAST_SET on restoration Previous releases - regressions: - udp: properly flush normal packet at GRO time - sfc: ensure correct number of XDP queues; don't allow enabling the feature if there isn't sufficient resources to Tx from any CPU - dsa: sja1105: fix address learning getting disabled on the CPU port - mptcp: addresses a rmem accounting issue that could keep packets in subflow receive buffers longer than necessary, delaying MPTCP-level ACKs - ip_tunnel: fix mtu calculation for ETHER tunnel devices - do not reuse skbs allocated from skbuff_fclone_cache in the napi skb cache, we'd try to return them to the wrong slab cache - tcp: consistently disable header prediction for mptcp Previous releases - always broken: - bpf: fix subprog poke descriptor tracking use-after-free - ipv6: - allocate enough headroom in ip6_finish_output2() in case iptables TEE is used - tcp: drop silly ICMPv6 packet too big messages to avoid expensive and pointless lookups (which may serve as a DDOS vector) - make sure fwmark is copied in SYNACK packets - fix 'disable_policy' for forwarded packets (align with IPv4) - netfilter: conntrack: - do not renew entry stuck in tcp SYN_SENT state - do not mark RST in the reply direction coming after SYN packet for an out-of-sync entry - mptcp: cleanly handle error conditions with MP_JOIN and syncookies - mptcp: fix double free when rejecting a join due to port mismatch - validate lwtstate->data before returning from skb_tunnel_info() - tcp: call sk_wmem_schedule before sk_mem_charge in zerocopy path - mt76: mt7921: continue to probe driver when fw already downloaded - bonding: fix multiple issues with offloading IPsec to (thru?) bond - stmmac: ptp: fix issues around Qbv support and setting time back - bcmgenet: always clear wake-up based on energy detection Misc: - sctp: move 198 addresses from unusable to private scope - ptp: support virtual clocks and timestamping - openvswitch: optimize operation for key comparison" * tag 'net-5.14-rc2' of git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net: (158 commits) net: dsa: properly check for the bridge_leave methods in dsa_switch_bridge_leave() sfc: add logs explaining XDP_TX/REDIRECT is not available sfc: ensure correct number of XDP queues sfc: fix lack of XDP TX queues - error XDP TX failed (-22) net: fddi: fix UAF in fza_probe net: dsa: sja1105: fix address learning getting disabled on the CPU port net: ocelot: fix switchdev objects synced for wrong netdev with LAG offload net: Use nlmsg_unicast() instead of netlink_unicast() octeontx2-pf: Fix uninitialized boolean variable pps ipv6: allocate enough headroom in ip6_finish_output2() net: hdlc: rename 'mod_init' & 'mod_exit' functions to be module-specific net: bridge: multicast: fix MRD advertisement router port marking race net: bridge: multicast: fix PIM hello router port marking race net: phy: marvell10g: fix differentiation of 88X3310 from 88X3340 dsa: fix for_each_child.cocci warnings virtio_net: check virtqueue_add_sgs() return value mptcp: properly account bulk freed memory selftests: mptcp: fix case multiple subflows limited by server mptcp: avoid processing packet if a subflow reset mptcp: fix syncookie process if mptcp can not_accept new subflow ...
Diffstat (limited to 'net')
-rw-r--r--net/802/garp.c14
-rw-r--r--net/802/mrp.c14
-rw-r--r--net/bridge/br_if.c17
-rw-r--r--net/bridge/br_multicast.c6
-rw-r--r--net/core/dev.c16
-rw-r--r--net/core/skbuff.c1
-rw-r--r--net/core/sock.c71
-rw-r--r--net/dsa/switch.c8
-rw-r--r--net/ethtool/Makefile2
-rw-r--r--net/ethtool/common.c14
-rw-r--r--net/ethtool/netlink.c10
-rw-r--r--net/ethtool/netlink.h2
-rw-r--r--net/ethtool/phc_vclocks.c94
-rw-r--r--net/ipv4/fib_frontend.c2
-rw-r--r--net/ipv4/inet_diag.c5
-rw-r--r--net/ipv4/ip_tunnel.c18
-rw-r--r--net/ipv4/ipmr.c2
-rw-r--r--net/ipv4/raw_diag.c7
-rw-r--r--net/ipv4/tcp.c3
-rw-r--r--net/ipv4/tcp_input.c21
-rw-r--r--net/ipv4/tcp_ipv4.c4
-rw-r--r--net/ipv4/tcp_output.c1
-rw-r--r--net/ipv4/udp.c6
-rw-r--r--net/ipv4/udp_diag.c6
-rw-r--r--net/ipv4/udp_offload.c6
-rw-r--r--net/ipv6/ip6_output.c32
-rw-r--r--net/ipv6/tcp_ipv6.c21
-rw-r--r--net/ipv6/udp.c2
-rw-r--r--net/ipv6/xfrm6_output.c2
-rw-r--r--net/iucv/iucv.c22
-rw-r--r--net/mptcp/mib.c1
-rw-r--r--net/mptcp/mib.h1
-rw-r--r--net/mptcp/mptcp_diag.c6
-rw-r--r--net/mptcp/options.c19
-rw-r--r--net/mptcp/protocol.c12
-rw-r--r--net/mptcp/protocol.h10
-rw-r--r--net/mptcp/sockopt.c68
-rw-r--r--net/mptcp/subflow.c11
-rw-r--r--net/mptcp/syncookies.c16
-rw-r--r--net/ncsi/Kconfig6
-rw-r--r--net/ncsi/internal.h5
-rw-r--r--net/ncsi/ncsi-manage.c51
-rw-r--r--net/ncsi/ncsi-rsp.c11
-rw-r--r--net/netfilter/nf_conntrack_core.c11
-rw-r--r--net/netfilter/nf_conntrack_netlink.c3
-rw-r--r--net/netfilter/nf_conntrack_proto.c7
-rw-r--r--net/netfilter/nf_conntrack_proto_gre.c13
-rw-r--r--net/netfilter/nf_conntrack_proto_tcp.c69
-rw-r--r--net/netfilter/nf_conntrack_standalone.c10
-rw-r--r--net/netfilter/nf_tables_api.c3
-rw-r--r--net/netfilter/nft_last.c12
-rw-r--r--net/netlink/af_netlink.c2
-rw-r--r--net/openvswitch/flow_table.c6
-rw-r--r--net/sched/act_ct.c14
-rw-r--r--net/sched/sch_taprio.c2
-rw-r--r--net/sctp/diag.c6
-rw-r--r--net/sctp/protocol.c3
-rw-r--r--net/sctp/sm_make_chunk.c2
-rw-r--r--net/sctp/transport.c11
-rw-r--r--net/socket.c19
-rw-r--r--net/unix/diag.c6
61 files changed, 671 insertions, 174 deletions
diff --git a/net/802/garp.c b/net/802/garp.c
index 400bd857e5f5..f6012f8e59f0 100644
--- a/net/802/garp.c
+++ b/net/802/garp.c
@@ -203,6 +203,19 @@ static void garp_attr_destroy(struct garp_applicant *app, struct garp_attr *attr
kfree(attr);
}
+static void garp_attr_destroy_all(struct garp_applicant *app)
+{
+ struct rb_node *node, *next;
+ struct garp_attr *attr;
+
+ for (node = rb_first(&app->gid);
+ next = node ? rb_next(node) : NULL, node != NULL;
+ node = next) {
+ attr = rb_entry(node, struct garp_attr, node);
+ garp_attr_destroy(app, attr);
+ }
+}
+
static int garp_pdu_init(struct garp_applicant *app)
{
struct sk_buff *skb;
@@ -609,6 +622,7 @@ void garp_uninit_applicant(struct net_device *dev, struct garp_application *appl
spin_lock_bh(&app->lock);
garp_gid_event(app, GARP_EVENT_TRANSMIT_PDU);
+ garp_attr_destroy_all(app);
garp_pdu_queue(app);
spin_unlock_bh(&app->lock);
diff --git a/net/802/mrp.c b/net/802/mrp.c
index bea6e43d45a0..35e04cc5390c 100644
--- a/net/802/mrp.c
+++ b/net/802/mrp.c
@@ -292,6 +292,19 @@ static void mrp_attr_destroy(struct mrp_applicant *app, struct mrp_attr *attr)
kfree(attr);
}
+static void mrp_attr_destroy_all(struct mrp_applicant *app)
+{
+ struct rb_node *node, *next;
+ struct mrp_attr *attr;
+
+ for (node = rb_first(&app->mad);
+ next = node ? rb_next(node) : NULL, node != NULL;
+ node = next) {
+ attr = rb_entry(node, struct mrp_attr, node);
+ mrp_attr_destroy(app, attr);
+ }
+}
+
static int mrp_pdu_init(struct mrp_applicant *app)
{
struct sk_buff *skb;
@@ -895,6 +908,7 @@ void mrp_uninit_applicant(struct net_device *dev, struct mrp_application *appl)
spin_lock_bh(&app->lock);
mrp_mad_event(app, MRP_EVENT_TX);
+ mrp_attr_destroy_all(app);
mrp_pdu_queue(app);
spin_unlock_bh(&app->lock);
diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c
index f7d2f472ae24..6e4a32354a13 100644
--- a/net/bridge/br_if.c
+++ b/net/bridge/br_if.c
@@ -562,7 +562,7 @@ int br_add_if(struct net_bridge *br, struct net_device *dev,
struct net_bridge_port *p;
int err = 0;
unsigned br_hr, dev_hr;
- bool changed_addr;
+ bool changed_addr, fdb_synced = false;
/* Don't allow bridging non-ethernet like devices. */
if ((dev->flags & IFF_LOOPBACK) ||
@@ -652,6 +652,19 @@ int br_add_if(struct net_bridge *br, struct net_device *dev,
list_add_rcu(&p->list, &br->port_list);
nbp_update_port_count(br);
+ if (!br_promisc_port(p) && (p->dev->priv_flags & IFF_UNICAST_FLT)) {
+ /* When updating the port count we also update all ports'
+ * promiscuous mode.
+ * A port leaving promiscuous mode normally gets the bridge's
+ * fdb synced to the unicast filter (if supported), however,
+ * `br_port_clear_promisc` does not distinguish between
+ * non-promiscuous ports and *new* ports, so we need to
+ * sync explicitly here.
+ */
+ fdb_synced = br_fdb_sync_static(br, p) == 0;
+ if (!fdb_synced)
+ netdev_err(dev, "failed to sync bridge static fdb addresses to this port\n");
+ }
netdev_update_features(br->dev);
@@ -701,6 +714,8 @@ int br_add_if(struct net_bridge *br, struct net_device *dev,
return 0;
err7:
+ if (fdb_synced)
+ br_fdb_unsync_static(br, p);
list_del_rcu(&p->list);
br_fdb_delete_by_port(br, p, 0, 1);
nbp_update_port_count(br);
diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c
index 53c3a9d80d9c..d0434dc8c03b 100644
--- a/net/bridge/br_multicast.c
+++ b/net/bridge/br_multicast.c
@@ -3264,7 +3264,9 @@ static void br_multicast_pim(struct net_bridge *br,
pim_hdr_type(pimhdr) != PIM_TYPE_HELLO)
return;
+ spin_lock(&br->multicast_lock);
br_ip4_multicast_mark_router(br, port);
+ spin_unlock(&br->multicast_lock);
}
static int br_ip4_multicast_mrd_rcv(struct net_bridge *br,
@@ -3275,7 +3277,9 @@ static int br_ip4_multicast_mrd_rcv(struct net_bridge *br,
igmp_hdr(skb)->type != IGMP_MRDISC_ADV)
return -ENOMSG;
+ spin_lock(&br->multicast_lock);
br_ip4_multicast_mark_router(br, port);
+ spin_unlock(&br->multicast_lock);
return 0;
}
@@ -3343,7 +3347,9 @@ static void br_ip6_multicast_mrd_rcv(struct net_bridge *br,
if (icmp6_hdr(skb)->icmp6_type != ICMPV6_MRDISC_ADV)
return;
+ spin_lock(&br->multicast_lock);
br_ip6_multicast_mark_router(br, port);
+ spin_unlock(&br->multicast_lock);
}
static int br_multicast_ipv6_rcv(struct net_bridge *br,
diff --git a/net/core/dev.c b/net/core/dev.c
index c253c2aafe97..64b21f0a2048 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -6008,6 +6008,19 @@ static void gro_list_prepare(const struct list_head *head,
diffs = memcmp(skb_mac_header(p),
skb_mac_header(skb),
maclen);
+
+ diffs |= skb_get_nfct(p) ^ skb_get_nfct(skb);
+#if IS_ENABLED(CONFIG_SKB_EXTENSIONS) && IS_ENABLED(CONFIG_NET_TC_SKB_EXT)
+ if (!diffs) {
+ struct tc_skb_ext *skb_ext = skb_ext_find(skb, TC_SKB_EXT);
+ struct tc_skb_ext *p_ext = skb_ext_find(p, TC_SKB_EXT);
+
+ diffs |= (!!p_ext) ^ (!!skb_ext);
+ if (!diffs && unlikely(skb_ext))
+ diffs |= p_ext->chain ^ skb_ext->chain;
+ }
+#endif
+
NAPI_GRO_CB(p)->same_flow = !diffs;
}
}
@@ -6221,6 +6234,8 @@ static gro_result_t napi_skb_finish(struct napi_struct *napi,
case GRO_MERGED_FREE:
if (NAPI_GRO_CB(skb)->free == NAPI_GRO_FREE_STOLEN_HEAD)
napi_skb_free_stolen_head(skb);
+ else if (skb->fclone != SKB_FCLONE_UNAVAILABLE)
+ __kfree_skb(skb);
else
__kfree_skb_defer(skb);
break;
@@ -6270,6 +6285,7 @@ static void napi_reuse_skb(struct napi_struct *napi, struct sk_buff *skb)
skb_shinfo(skb)->gso_type = 0;
skb->truesize = SKB_TRUESIZE(skb_end_offset(skb));
skb_ext_reset(skb);
+ nf_reset_ct(skb);
napi->skb = skb;
}
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 12aabcda6db2..f63de967ac25 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -943,6 +943,7 @@ void __kfree_skb_defer(struct sk_buff *skb)
void napi_skb_free_stolen_head(struct sk_buff *skb)
{
+ nf_reset_ct(skb);
skb_dst_drop(skb);
skb_ext_put(skb);
napi_skb_cache_put(skb);
diff --git a/net/core/sock.c b/net/core/sock.c
index ba1c0f75cd45..a3eea6e0b30a 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -139,6 +139,8 @@
#include <net/tcp.h>
#include <net/busy_poll.h>
+#include <linux/ethtool.h>
+
static DEFINE_MUTEX(proto_list_mutex);
static LIST_HEAD(proto_list);
@@ -810,8 +812,47 @@ void sock_set_timestamp(struct sock *sk, int optname, bool valbool)
}
}
-int sock_set_timestamping(struct sock *sk, int optname, int val)
+static int sock_timestamping_bind_phc(struct sock *sk, int phc_index)
+{
+ struct net *net = sock_net(sk);
+ struct net_device *dev = NULL;
+ bool match = false;
+ int *vclock_index;
+ int i, num;
+
+ if (sk->sk_bound_dev_if)
+ dev = dev_get_by_index(net, sk->sk_bound_dev_if);
+
+ if (!dev) {
+ pr_err("%s: sock not bind to device\n", __func__);
+ return -EOPNOTSUPP;
+ }
+
+ num = ethtool_get_phc_vclocks(dev, &vclock_index);
+ for (i = 0; i < num; i++) {
+ if (*(vclock_index + i) == phc_index) {
+ match = true;
+ break;
+ }
+ }
+
+ if (num > 0)
+ kfree(vclock_index);
+
+ if (!match)
+ return -EINVAL;
+
+ sk->sk_bind_phc = phc_index;
+
+ return 0;
+}
+
+int sock_set_timestamping(struct sock *sk, int optname,
+ struct so_timestamping timestamping)
{
+ int val = timestamping.flags;
+ int ret;
+
if (val & ~SOF_TIMESTAMPING_MASK)
return -EINVAL;
@@ -832,6 +873,12 @@ int sock_set_timestamping(struct sock *sk, int optname, int val)
!(val & SOF_TIMESTAMPING_OPT_TSONLY))
return -EINVAL;
+ if (val & SOF_TIMESTAMPING_BIND_PHC) {
+ ret = sock_timestamping_bind_phc(sk, timestamping.bind_phc);
+ if (ret)
+ return ret;
+ }
+
sk->sk_tsflags = val;
sock_valbool_flag(sk, SOCK_TSTAMP_NEW, optname == SO_TIMESTAMPING_NEW);
@@ -907,6 +954,7 @@ EXPORT_SYMBOL(sock_set_mark);
int sock_setsockopt(struct socket *sock, int level, int optname,
sockptr_t optval, unsigned int optlen)
{
+ struct so_timestamping timestamping;
struct sock_txtime sk_txtime;
struct sock *sk = sock->sk;
int val;
@@ -1068,12 +1116,22 @@ set_sndbuf:
case SO_TIMESTAMP_NEW:
case SO_TIMESTAMPNS_OLD:
case SO_TIMESTAMPNS_NEW:
- sock_set_timestamp(sk, valbool, optname);
+ sock_set_timestamp(sk, optname, valbool);
break;
case SO_TIMESTAMPING_NEW:
case SO_TIMESTAMPING_OLD:
- ret = sock_set_timestamping(sk, optname, val);
+ if (optlen == sizeof(timestamping)) {
+ if (copy_from_sockptr(&timestamping, optval,
+ sizeof(timestamping))) {
+ ret = -EFAULT;
+ break;
+ }
+ } else {
+ memset(&timestamping, 0, sizeof(timestamping));
+ timestamping.flags = val;
+ }
+ ret = sock_set_timestamping(sk, optname, timestamping);
break;
case SO_RCVLOWAT:
@@ -1201,7 +1259,7 @@ set_sndbuf:
if (val < 0)
ret = -EINVAL;
else
- sk->sk_ll_usec = val;
+ WRITE_ONCE(sk->sk_ll_usec, val);
}
break;
case SO_PREFER_BUSY_POLL:
@@ -1348,6 +1406,7 @@ int sock_getsockopt(struct socket *sock, int level, int optname,
struct __kernel_old_timeval tm;
struct __kernel_sock_timeval stm;
struct sock_txtime txtime;
+ struct so_timestamping timestamping;
} v;
int lv = sizeof(int);
@@ -1451,7 +1510,9 @@ int sock_getsockopt(struct socket *sock, int level, int optname,
break;
case SO_TIMESTAMPING_OLD:
- v.val = sk->sk_tsflags;
+ lv = sizeof(v.timestamping);
+ v.timestamping.flags = sk->sk_tsflags;
+ v.timestamping.bind_phc = sk->sk_bind_phc;
break;
case SO_RCVTIMEO_OLD:
diff --git a/net/dsa/switch.c b/net/dsa/switch.c
index af71b8638098..5ece05dfd8f2 100644
--- a/net/dsa/switch.c
+++ b/net/dsa/switch.c
@@ -113,11 +113,11 @@ static int dsa_switch_bridge_leave(struct dsa_switch *ds,
int err, port;
if (dst->index == info->tree_index && ds->index == info->sw_index &&
- ds->ops->port_bridge_join)
+ ds->ops->port_bridge_leave)
ds->ops->port_bridge_leave(ds, info->port, info->br);
if ((dst->index != info->tree_index || ds->index != info->sw_index) &&
- ds->ops->crosschip_bridge_join)
+ ds->ops->crosschip_bridge_leave)
ds->ops->crosschip_bridge_leave(ds, info->tree_index,
info->sw_index, info->port,
info->br);
@@ -427,7 +427,7 @@ static int dsa_switch_lag_join(struct dsa_switch *ds,
info->port, info->lag,
info->info);
- return 0;
+ return -EOPNOTSUPP;
}
static int dsa_switch_lag_leave(struct dsa_switch *ds,
@@ -440,7 +440,7 @@ static int dsa_switch_lag_leave(struct dsa_switch *ds,
return ds->ops->crosschip_lag_leave(ds, info->sw_index,
info->port, info->lag);
- return 0;
+ return -EOPNOTSUPP;
}
static int dsa_switch_mdb_add(struct dsa_switch *ds,
diff --git a/net/ethtool/Makefile b/net/ethtool/Makefile
index 723c9a8a8cdf..0a19470efbfb 100644
--- a/net/ethtool/Makefile
+++ b/net/ethtool/Makefile
@@ -7,4 +7,4 @@ obj-$(CONFIG_ETHTOOL_NETLINK) += ethtool_nl.o
ethtool_nl-y := netlink.o bitset.o strset.o linkinfo.o linkmodes.o \
linkstate.o debug.o wol.o features.o privflags.o rings.o \
channels.o coalesce.o pause.o eee.o tsinfo.o cabletest.o \
- tunnels.o fec.o eeprom.o stats.o
+ tunnels.o fec.o eeprom.o stats.o phc_vclocks.o
diff --git a/net/ethtool/common.c b/net/ethtool/common.c
index f9dcbad84788..c63e0739dc6a 100644
--- a/net/ethtool/common.c
+++ b/net/ethtool/common.c
@@ -4,6 +4,7 @@
#include <linux/net_tstamp.h>
#include <linux/phy.h>
#include <linux/rtnetlink.h>
+#include <linux/ptp_clock_kernel.h>
#include "common.h"
@@ -397,6 +398,7 @@ const char sof_timestamping_names[][ETH_GSTRING_LEN] = {
[const_ilog2(SOF_TIMESTAMPING_OPT_STATS)] = "option-stats",
[const_ilog2(SOF_TIMESTAMPING_OPT_PKTINFO)] = "option-pktinfo",
[const_ilog2(SOF_TIMESTAMPING_OPT_TX_SWHW)] = "option-tx-swhw",
+ [const_ilog2(SOF_TIMESTAMPING_BIND_PHC)] = "bind-phc",
};
static_assert(ARRAY_SIZE(sof_timestamping_names) == __SOF_TIMESTAMPING_CNT);
@@ -554,6 +556,18 @@ int __ethtool_get_ts_info(struct net_device *dev, struct ethtool_ts_info *info)
return 0;
}
+int ethtool_get_phc_vclocks(struct net_device *dev, int **vclock_index)
+{
+ struct ethtool_ts_info info = { };
+ int num = 0;
+
+ if (!__ethtool_get_ts_info(dev, &info))
+ num = ptp_get_vclocks_index(info.phc_index, vclock_index);
+
+ return num;
+}
+EXPORT_SYMBOL(ethtool_get_phc_vclocks);
+
const struct ethtool_phy_ops *ethtool_phy_ops;
void ethtool_set_ethtool_phy_ops(const struct ethtool_phy_ops *ops)
diff --git a/net/ethtool/netlink.c b/net/ethtool/netlink.c
index a7346346114f..73e0f5b626bf 100644
--- a/net/ethtool/netlink.c
+++ b/net/ethtool/netlink.c
@@ -248,6 +248,7 @@ ethnl_default_requests[__ETHTOOL_MSG_USER_CNT] = {
[ETHTOOL_MSG_TSINFO_GET] = &ethnl_tsinfo_request_ops,
[ETHTOOL_MSG_MODULE_EEPROM_GET] = &ethnl_module_eeprom_request_ops,
[ETHTOOL_MSG_STATS_GET] = &ethnl_stats_request_ops,
+ [ETHTOOL_MSG_PHC_VCLOCKS_GET] = &ethnl_phc_vclocks_request_ops,
};
static struct ethnl_dump_ctx *ethnl_dump_context(struct netlink_callback *cb)
@@ -958,6 +959,15 @@ static const struct genl_ops ethtool_genl_ops[] = {
.policy = ethnl_stats_get_policy,
.maxattr = ARRAY_SIZE(ethnl_stats_get_policy) - 1,
},
+ {
+ .cmd = ETHTOOL_MSG_PHC_VCLOCKS_GET,
+ .doit = ethnl_default_doit,
+ .start = ethnl_default_start,
+ .dumpit = ethnl_default_dumpit,
+ .done = ethnl_default_done,
+ .policy = ethnl_phc_vclocks_get_policy,
+ .maxattr = ARRAY_SIZE(ethnl_phc_vclocks_get_policy) - 1,
+ },
};
static const struct genl_multicast_group ethtool_nl_mcgrps[] = {
diff --git a/net/ethtool/netlink.h b/net/ethtool/netlink.h
index 3e25a47fd482..3fc395c86702 100644
--- a/net/ethtool/netlink.h
+++ b/net/ethtool/netlink.h
@@ -347,6 +347,7 @@ extern const struct ethnl_request_ops ethnl_tsinfo_request_ops;
extern const struct ethnl_request_ops ethnl_fec_request_ops;
extern const struct ethnl_request_ops ethnl_module_eeprom_request_ops;
extern const struct ethnl_request_ops ethnl_stats_request_ops;
+extern const struct ethnl_request_ops ethnl_phc_vclocks_request_ops;
extern const struct nla_policy ethnl_header_policy[ETHTOOL_A_HEADER_FLAGS + 1];
extern const struct nla_policy ethnl_header_policy_stats[ETHTOOL_A_HEADER_FLAGS + 1];
@@ -382,6 +383,7 @@ extern const struct nla_policy ethnl_fec_get_policy[ETHTOOL_A_FEC_HEADER + 1];
extern const struct nla_policy ethnl_fec_set_policy[ETHTOOL_A_FEC_AUTO + 1];
extern const struct nla_policy ethnl_module_eeprom_get_policy[ETHTOOL_A_MODULE_EEPROM_I2C_ADDRESS + 1];
extern const struct nla_policy ethnl_stats_get_policy[ETHTOOL_A_STATS_GROUPS + 1];
+extern const struct nla_policy ethnl_phc_vclocks_get_policy[ETHTOOL_A_PHC_VCLOCKS_HEADER + 1];
int ethnl_set_linkinfo(struct sk_buff *skb, struct genl_info *info);
int ethnl_set_linkmodes(struct sk_buff *skb, struct genl_info *info);
diff --git a/net/ethtool/phc_vclocks.c b/net/ethtool/phc_vclocks.c
new file mode 100644
index 000000000000..637b2f5297d5
--- /dev/null
+++ b/net/ethtool/phc_vclocks.c
@@ -0,0 +1,94 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright 2021 NXP
+ */
+#include "netlink.h"
+#include "common.h"
+
+struct phc_vclocks_req_info {
+ struct ethnl_req_info base;
+};
+
+struct phc_vclocks_reply_data {
+ struct ethnl_reply_data base;
+ int num;
+ int *index;
+};
+
+#define PHC_VCLOCKS_REPDATA(__reply_base) \
+ container_of(__reply_base, struct phc_vclocks_reply_data, base)
+
+const struct nla_policy ethnl_phc_vclocks_get_policy[] = {
+ [ETHTOOL_A_PHC_VCLOCKS_HEADER] = NLA_POLICY_NESTED(ethnl_header_policy),
+};
+
+static int phc_vclocks_prepare_data(const struct ethnl_req_info *req_base,
+ struct ethnl_reply_data *reply_base,
+ struct genl_info *info)
+{
+ struct phc_vclocks_reply_data *data = PHC_VCLOCKS_REPDATA(reply_base);
+ struct net_device *dev = reply_base->dev;
+ int ret;
+
+ ret = ethnl_ops_begin(dev);
+ if (ret < 0)
+ return ret;
+ data->num = ethtool_get_phc_vclocks(dev, &data->index);
+ ethnl_ops_complete(dev);
+
+ return ret;
+}
+
+static int phc_vclocks_reply_size(const struct ethnl_req_info *req_base,
+ const struct ethnl_reply_data *reply_base)
+{
+ const struct phc_vclocks_reply_data *data =
+ PHC_VCLOCKS_REPDATA(reply_base);
+ int len = 0;
+
+ if (data->num > 0) {
+ len += nla_total_size(sizeof(u32));
+ len += nla_total_size(sizeof(s32) * data->num);
+ }
+
+ return len;
+}
+
+static int phc_vclocks_fill_reply(struct sk_buff *skb,
+ const struct ethnl_req_info *req_base,
+ const struct ethnl_reply_data *reply_base)
+{
+ const struct phc_vclocks_reply_data *data =
+ PHC_VCLOCKS_REPDATA(reply_base);
+
+ if (data->num <= 0)
+ return 0;
+
+ if (nla_put_u32(skb, ETHTOOL_A_PHC_VCLOCKS_NUM, data->num) ||
+ nla_put(skb, ETHTOOL_A_PHC_VCLOCKS_INDEX,
+ sizeof(s32) * data->num, data->index))
+ return -EMSGSIZE;
+
+ return 0;
+}
+
+static void phc_vclocks_cleanup_data(struct ethnl_reply_data *reply_base)
+{
+ const struct phc_vclocks_reply_data *data =
+ PHC_VCLOCKS_REPDATA(reply_base);
+
+ kfree(data->index);
+}
+
+const struct ethnl_request_ops ethnl_phc_vclocks_request_ops = {
+ .request_cmd = ETHTOOL_MSG_PHC_VCLOCKS_GET,
+ .reply_cmd = ETHTOOL_MSG_PHC_VCLOCKS_GET_REPLY,
+ .hdr_attr = ETHTOOL_A_PHC_VCLOCKS_HEADER,
+ .req_info_size = sizeof(struct phc_vclocks_req_info),
+ .reply_data_size = sizeof(struct phc_vclocks_reply_data),
+
+ .prepare_data = phc_vclocks_prepare_data,
+ .reply_size = phc_vclocks_reply_size,
+ .fill_reply = phc_vclocks_fill_reply,
+ .cleanup_data = phc_vclocks_cleanup_data,
+};
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index a933bd6345b1..9fe13e4f5d08 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -1376,7 +1376,7 @@ static void nl_fib_input(struct sk_buff *skb)
portid = NETLINK_CB(skb).portid; /* netlink portid */
NETLINK_CB(skb).portid = 0; /* from kernel */
NETLINK_CB(skb).dst_group = 0; /* unicast */
- netlink_unicast(net->ipv4.fibnl, skb, portid, MSG_DONTWAIT);
+ nlmsg_unicast(net->ipv4.fibnl, skb, portid);
}
static int __net_init nl_fib_lookup_init(struct net *net)
diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c
index e65f4ef024a4..ef7897226f08 100644
--- a/net/ipv4/inet_diag.c
+++ b/net/ipv4/inet_diag.c
@@ -580,10 +580,7 @@ int inet_diag_dump_one_icsk(struct inet_hashinfo *hashinfo,
nlmsg_free(rep);
goto out;
}
- err = netlink_unicast(net->diag_nlsk, rep, NETLINK_CB(in_skb).portid,
- MSG_DONTWAIT);
- if (err > 0)
- err = 0;
+ err = nlmsg_unicast(net->diag_nlsk, rep, NETLINK_CB(in_skb).portid);
out:
if (sk)
diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c
index f6cc26de5ed3..0dca00745ac3 100644
--- a/net/ipv4/ip_tunnel.c
+++ b/net/ipv4/ip_tunnel.c
@@ -317,7 +317,7 @@ static int ip_tunnel_bind_dev(struct net_device *dev)
}
dev->needed_headroom = t_hlen + hlen;
- mtu -= t_hlen;
+ mtu -= t_hlen + (dev->type == ARPHRD_ETHER ? dev->hard_header_len : 0);
if (mtu < IPV4_MIN_MTU)
mtu = IPV4_MIN_MTU;
@@ -348,6 +348,9 @@ static struct ip_tunnel *ip_tunnel_create(struct net *net,
t_hlen = nt->hlen + sizeof(struct iphdr);
dev->min_mtu = ETH_MIN_MTU;
dev->max_mtu = IP_MAX_MTU - t_hlen;
+ if (dev->type == ARPHRD_ETHER)
+ dev->max_mtu -= dev->hard_header_len;
+
ip_tunnel_add(itn, nt);
return nt;
@@ -489,11 +492,14 @@ static int tnl_update_pmtu(struct net_device *dev, struct sk_buff *skb,
tunnel_hlen = md ? tunnel_hlen : tunnel->hlen;
pkt_size = skb->len - tunnel_hlen;
+ pkt_size -= dev->type == ARPHRD_ETHER ? dev->hard_header_len : 0;
- if (df)
+ if (df) {
mtu = dst_mtu(&rt->dst) - (sizeof(struct iphdr) + tunnel_hlen);
- else
+ mtu -= dev->type == ARPHRD_ETHER ? dev->hard_header_len : 0;
+ } else {
mtu = skb_valid_dst(skb) ? dst_mtu(skb_dst(skb)) : dev->mtu;
+ }
if (skb_valid_dst(skb))
skb_dst_update_pmtu_no_confirm(skb, mtu);
@@ -972,6 +978,9 @@ int __ip_tunnel_change_mtu(struct net_device *dev, int new_mtu, bool strict)
int t_hlen = tunnel->hlen + sizeof(struct iphdr);
int max_mtu = IP_MAX_MTU - t_hlen;
+ if (dev->type == ARPHRD_ETHER)
+ max_mtu -= dev->hard_header_len;
+
if (new_mtu < ETH_MIN_MTU)
return -EINVAL;
@@ -1149,6 +1158,9 @@ int ip_tunnel_newlink(struct net_device *dev, struct nlattr *tb[],
if (tb[IFLA_MTU]) {
unsigned int max = IP_MAX_MTU - (nt->hlen + sizeof(struct iphdr));
+ if (dev->type == ARPHRD_ETHER)
+ max -= dev->hard_header_len;
+
mtu = clamp(dev->mtu, (unsigned int)ETH_MIN_MTU, max);
}
diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index 7b12a40dd465..2dda856ca260 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -2119,7 +2119,7 @@ int ip_mr_input(struct sk_buff *skb)
raw_rcv(mroute_sk, skb);
return 0;
}
- }
+ }
}
/* already under rcu_read_lock() */
diff --git a/net/ipv4/raw_diag.c b/net/ipv4/raw_diag.c
index 1b5b8af27aaf..ccacbde30a2c 100644
--- a/net/ipv4/raw_diag.c
+++ b/net/ipv4/raw_diag.c
@@ -119,11 +119,8 @@ static int raw_diag_dump_one(struct netlink_callback *cb,
return err;
}
- err = netlink_unicast(net->diag_nlsk, rep,
- NETLINK_CB(in_skb).portid,
- MSG_DONTWAIT);
- if (err > 0)
- err = 0;
+ err = nlmsg_unicast(net->diag_nlsk, rep, NETLINK_CB(in_skb).portid);
+
return err;
}
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index d5ab5f243640..8cb44040ec68 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -1375,6 +1375,9 @@ new_segment:
}
pfrag->offset += copy;
} else {
+ if (!sk_wmem_schedule(sk, copy))
+ goto wait_for_space;
+
err = skb_zerocopy_iter_stream(sk, skb, msg, copy, uarg);
if (err == -EMSGSIZE || err == -EEXIST) {
tcp_mark_push(tp, skb);
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index e6ca5a1f3b59..149ceb5c94ff 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -4247,6 +4247,9 @@ void tcp_reset(struct sock *sk, struct sk_buff *skb)
{
trace_tcp_receive_reset(sk);
+ /* mptcp can't tell us to ignore reset pkts,
+ * so just ignore the return value of mptcp_incoming_options().
+ */
if (sk_is_mptcp(sk))
mptcp_incoming_options(sk, skb);
@@ -4941,8 +4944,13 @@ static void tcp_data_queue(struct sock *sk, struct sk_buff *skb)
bool fragstolen;
int eaten;
- if (sk_is_mptcp(sk))
- mptcp_incoming_options(sk, skb);
+ /* If a subflow has been reset, the packet should not continue
+ * to be processed, drop the packet.
+ */
+ if (sk_is_mptcp(sk) && !mptcp_incoming_options(sk, skb)) {
+ __kfree_skb(skb);
+ return;
+ }
if (TCP_SKB_CB(skb)->seq == TCP_SKB_CB(skb)->end_seq) {
__kfree_skb(skb);
@@ -5922,8 +5930,8 @@ void tcp_init_transfer(struct sock *sk, int bpf_op, struct sk_buff *skb)
tp->snd_cwnd = tcp_init_cwnd(tp, __sk_dst_get(sk));
tp->snd_cwnd_stamp = tcp_jiffies32;
- icsk->icsk_ca_initialized = 0;
bpf_skops_established(sk, bpf_op, skb);
+ /* Initialize congestion control unless BPF initialized it already: */
if (!icsk->icsk_ca_initialized)
tcp_init_congestion_control(sk);
tcp_init_buffer_space(sk);
@@ -6523,8 +6531,11 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb)
case TCP_CLOSING:
case TCP_LAST_ACK:
if (!before(TCP_SKB_CB(skb)->seq, tp->rcv_nxt)) {
- if (sk_is_mptcp(sk))
- mptcp_incoming_options(sk, skb);
+ /* If a subflow has been reset, the packet should not
+ * continue to be processed, drop the packet.
+ */
+ if (sk_is_mptcp(sk) && !mptcp_incoming_options(sk, skb))
+ goto discard;
break;
}
fallthrough;
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index e66ad6bfe808..b9dc2d6197be 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -342,7 +342,7 @@ void tcp_v4_mtu_reduced(struct sock *sk)
if ((1 << sk->sk_state) & (TCPF_LISTEN | TCPF_CLOSE))
return;
- mtu = tcp_sk(sk)->mtu_info;
+ mtu = READ_ONCE(tcp_sk(sk)->mtu_info);
dst = inet_csk_update_pmtu(sk, mtu);
if (!dst)
return;
@@ -546,7 +546,7 @@ int tcp_v4_err(struct sk_buff *skb, u32 info)
if (sk->sk_state == TCP_LISTEN)
goto out;
- tp->mtu_info = info;
+ WRITE_ONCE(tp->mtu_info, info);
if (!sock_owned_by_user(sk)) {
tcp_v4_mtu_reduced(sk);
} else {
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index bde781f46b41..29553fce8502 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -1732,6 +1732,7 @@ int tcp_mtu_to_mss(struct sock *sk, int pmtu)
return __tcp_mtu_to_mss(sk, pmtu) -
(tcp_sk(sk)->tcp_header_len - sizeof(struct tcphdr));
}
+EXPORT_SYMBOL(tcp_mtu_to_mss);
/* Inverse of above */
int tcp_mss_to_mtu(struct sock *sk, int mss)
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 62682807b4b2..62cd4cd52e84 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -1102,7 +1102,7 @@ int udp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
}
ipcm_init_sk(&ipc, inet);
- ipc.gso_size = up->gso_size;
+ ipc.gso_size = READ_ONCE(up->gso_size);
if (msg->msg_controllen) {
err = udp_cmsg_send(sk, msg, &ipc.gso_size);
@@ -2695,7 +2695,7 @@ int udp_lib_setsockopt(struct sock *sk, int level, int optname,
case UDP_SEGMENT:
if (val < 0 || val > USHRT_MAX)
return -EINVAL;
- up->gso_size = val;
+ WRITE_ONCE(up->gso_size, val);
break;
case UDP_GRO:
@@ -2790,7 +2790,7 @@ int udp_lib_getsockopt(struct sock *sk, int level, int optname,
break;
case UDP_SEGMENT:
- val = up->gso_size;
+ val = READ_ONCE(up->gso_size);
break;
case UDP_GRO:
diff --git a/net/ipv4/udp_diag.c b/net/ipv4/udp_diag.c
index b2cee9a307d4..1ed8c4d78e5c 100644
--- a/net/ipv4/udp_diag.c
+++ b/net/ipv4/udp_diag.c
@@ -77,10 +77,8 @@ static int udp_dump_one(struct udp_table *tbl,
kfree_skb(rep);
goto out;
}
- err = netlink_unicast(net->diag_nlsk, rep, NETLINK_CB(in_skb).portid,
- MSG_DONTWAIT);
- if (err > 0)
- err = 0;
+ err = nlmsg_unicast(net->diag_nlsk, rep, NETLINK_CB(in_skb).portid);
+
out:
if (sk)
sock_put(sk);
diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c
index 54e06b88af69..9dde1e5fb449 100644
--- a/net/ipv4/udp_offload.c
+++ b/net/ipv4/udp_offload.c
@@ -525,8 +525,10 @@ struct sk_buff *udp_gro_receive(struct list_head *head, struct sk_buff *skb,
if ((!sk && (skb->dev->features & NETIF_F_GRO_UDP_FWD)) ||
(sk && udp_sk(sk)->gro_enabled) || NAPI_GRO_CB(skb)->is_flist)
- pp = call_gro_receive(udp_gro_receive_segment, head, skb);
- return pp;
+ return call_gro_receive(udp_gro_receive_segment, head, skb);
+
+ /* no GRO, be sure flush the current packet */
+ goto out;
}
if (NAPI_GRO_CB(skb)->encap_mark ||
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index 984050f35c61..01bea76e3891 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -60,10 +60,38 @@ static int ip6_finish_output2(struct net *net, struct sock *sk, struct sk_buff *
{
struct dst_entry *dst = skb_dst(skb);
struct net_device *dev = dst->dev;
+ unsigned int hh_len = LL_RESERVED_SPACE(dev);
+ int delta = hh_len - skb_headroom(skb);
const struct in6_addr *nexthop;
struct neighbour *neigh;
int ret;
+ /* Be paranoid, rather than too clever. */
+ if (unlikely(delta > 0) && dev->header_ops) {
+ /* pskb_expand_head() might crash, if skb is shared */
+ if (skb_shared(skb)) {
+ struct sk_buff *nskb = skb_clone(skb, GFP_ATOMIC);
+
+ if (likely(nskb)) {
+ if (skb->sk)
+ skb_set_owner_w(skb, skb->sk);
+ consume_skb(skb);
+ } else {
+ kfree_skb(skb);
+ }
+ skb = nskb;
+ }
+ if (skb &&
+ pskb_expand_head(skb, SKB_DATA_ALIGN(delta), 0, GFP_ATOMIC)) {
+ kfree_skb(skb);
+ skb = NULL;
+ }
+ if (!skb) {
+ IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTDISCARDS);
+ return -ENOMEM;
+ }
+ }
+
if (ipv6_addr_is_multicast(&ipv6_hdr(skb)->daddr)) {
struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
@@ -479,7 +507,9 @@ int ip6_forward(struct sk_buff *skb)
if (skb_warn_if_lro(skb))
goto drop;
- if (!xfrm6_policy_check(NULL, XFRM_POLICY_FWD, skb)) {
+ if (!net->ipv6.devconf_all->disable_policy &&
+ !idev->cnf.disable_policy &&
+ !xfrm6_policy_check(NULL, XFRM_POLICY_FWD, skb)) {
__IP6_INC_STATS(net, idev, IPSTATS_MIB_INDISCARDS);
goto drop;
}
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 578ab6305c3f..0ce52d46e4f8 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -348,11 +348,20 @@ failure:
static void tcp_v6_mtu_reduced(struct sock *sk)
{
struct dst_entry *dst;
+ u32 mtu;
if ((1 << sk->sk_state) & (TCPF_LISTEN | TCPF_CLOSE))
return;
- dst = inet6_csk_update_pmtu(sk, tcp_sk(sk)->mtu_info);
+ mtu = READ_ONCE(tcp_sk(sk)->mtu_info);
+
+ /* Drop requests trying to increase our current mss.
+ * Check done in __ip6_rt_update_pmtu() is too late.
+ */
+ if (tcp_mtu_to_mss(sk, mtu) >= tcp_sk(sk)->mss_cache)
+ return;
+
+ dst = inet6_csk_update_pmtu(sk, mtu);
if (!dst)
return;
@@ -433,6 +442,8 @@ static int tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
}
if (type == ICMPV6_PKT_TOOBIG) {
+ u32 mtu = ntohl(info);
+
/* We are not interested in TCP_LISTEN and open_requests
* (SYN-ACKs send out by Linux are always <576bytes so
* they should go through unfragmented).
@@ -443,7 +454,11 @@ static int tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
if (!ip6_sk_accept_pmtu(sk))
goto out;
- tp->mtu_info = ntohl(info);
+ if (mtu < IPV6_MIN_MTU)
+ goto out;
+
+ WRITE_ONCE(tp->mtu_info, mtu);
+
if (!sock_owned_by_user(sk))
tcp_v6_mtu_reduced(sk);
else if (!test_and_set_bit(TCP_MTU_REDUCED_DEFERRED,
@@ -540,7 +555,7 @@ static int tcp_v6_send_synack(const struct sock *sk, struct dst_entry *dst,
opt = ireq->ipv6_opt;
if (!opt)
opt = rcu_dereference(np->opt);
- err = ip6_xmit(sk, skb, fl6, sk->sk_mark, opt,
+ err = ip6_xmit(sk, skb, fl6, skb->mark ? : sk->sk_mark, opt,
tclass, sk->sk_priority);
rcu_read_unlock();
err = net_xmit_eval(err);
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index 368972dbd919..0cc7ba531b34 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -1296,7 +1296,7 @@ int udpv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
int (*getfrag)(void *, char *, int, int, int, struct sk_buff *);
ipcm6_init(&ipc6);
- ipc6.gso_size = up->gso_size;
+ ipc6.gso_size = READ_ONCE(up->gso_size);
ipc6.sockc.tsflags = sk->sk_tsflags;
ipc6.sockc.mark = sk->sk_mark;
diff --git a/net/ipv6/xfrm6_output.c b/net/ipv6/xfrm6_output.c
index 57fa27c1cdf9..d0d280077721 100644
--- a/net/ipv6/xfrm6_output.c
+++ b/net/ipv6/xfrm6_output.c
@@ -49,7 +49,7 @@ static int __xfrm6_output(struct net *net, struct sock *sk, struct sk_buff *skb)
{
struct dst_entry *dst = skb_dst(skb);
struct xfrm_state *x = dst->xfrm;
- int mtu;
+ unsigned int mtu;
bool toobig;
#ifdef CONFIG_NETFILTER
diff --git a/net/iucv/iucv.c b/net/iucv/iucv.c
index 349c6ac3313f..e6795d5a546a 100644
--- a/net/iucv/iucv.c
+++ b/net/iucv/iucv.c
@@ -1635,14 +1635,16 @@ struct iucv_message_pending {
u8 iptype;
u32 ipmsgid;
u32 iptrgcls;
- union {
- u32 iprmmsg1_u32;
- u8 iprmmsg1[4];
- } ln1msg1;
- union {
- u32 ipbfln1f;
- u8 iprmmsg2[4];
- } ln1msg2;
+ struct {
+ union {
+ u32 iprmmsg1_u32;
+ u8 iprmmsg1[4];
+ } ln1msg1;
+ union {
+ u32 ipbfln1f;
+ u8 iprmmsg2[4];
+ } ln1msg2;
+ } rmmsg;
u32 res1[3];
u32 ipbfln2f;
u8 ippollfg;
@@ -1660,10 +1662,10 @@ static void iucv_message_pending(struct iucv_irq_data *data)
msg.id = imp->ipmsgid;
msg.class = imp->iptrgcls;
if (imp->ipflags1 & IUCV_IPRMDATA) {
- memcpy(msg.rmmsg, imp->ln1msg1.iprmmsg1, 8);
+ memcpy(msg.rmmsg, &imp->rmmsg, 8);
msg.length = 8;
} else
- msg.length = imp->ln1msg2.ipbfln1f;
+ msg.length = imp->rmmsg.ln1msg2.ipbfln1f;
msg.reply_size = imp->ipbfln2f;
path->handler->message_pending(path, &msg);
}
diff --git a/net/mptcp/mib.c b/net/mptcp/mib.c
index 52ea2517e856..ff2cc0e3273d 100644
--- a/net/mptcp/mib.c
+++ b/net/mptcp/mib.c
@@ -44,6 +44,7 @@ static const struct snmp_mib mptcp_snmp_list[] = {
SNMP_MIB_ITEM("RmSubflow", MPTCP_MIB_RMSUBFLOW),
SNMP_MIB_ITEM("MPPrioTx", MPTCP_MIB_MPPRIOTX),
SNMP_MIB_ITEM("MPPrioRx", MPTCP_MIB_MPPRIORX),
+ SNMP_MIB_ITEM("RcvPruned", MPTCP_MIB_RCVPRUNED),
SNMP_MIB_SENTINEL
};
diff --git a/net/mptcp/mib.h b/net/mptcp/mib.h
index 193466c9b549..0663cb12b448 100644
--- a/net/mptcp/mib.h
+++ b/net/mptcp/mib.h
@@ -37,6 +37,7 @@ enum linux_mptcp_mib_field {
MPTCP_MIB_RMSUBFLOW, /* Remove a subflow */
MPTCP_MIB_MPPRIOTX, /* Transmit a MP_PRIO */
MPTCP_MIB_MPPRIORX, /* Received a MP_PRIO */
+ MPTCP_MIB_RCVPRUNED, /* Incoming packet dropped due to memory limit */
__MPTCP_MIB_MAX
};
diff --git a/net/mptcp/mptcp_diag.c b/net/mptcp/mptcp_diag.c
index 8f88ddeab6a2..f48eb6315bbb 100644
--- a/net/mptcp/mptcp_diag.c
+++ b/net/mptcp/mptcp_diag.c
@@ -57,10 +57,8 @@ static int mptcp_diag_dump_one(struct netlink_callback *cb,
kfree_skb(rep);
goto out;
}
- err = netlink_unicast(net->diag_nlsk, rep, NETLINK_CB(in_skb).portid,
- MSG_DONTWAIT);
- if (err > 0)
- err = 0;
+ err = nlmsg_unicast(net->diag_nlsk, rep, NETLINK_CB(in_skb).portid);
+
out:
sock_put(sk);
diff --git a/net/mptcp/options.c b/net/mptcp/options.c
index b5850afea343..4452455aef7f 100644
--- a/net/mptcp/options.c
+++ b/net/mptcp/options.c
@@ -1035,7 +1035,8 @@ static bool add_addr_hmac_valid(struct mptcp_sock *msk,
return hmac == mp_opt->ahmac;
}
-void mptcp_incoming_options(struct sock *sk, struct sk_buff *skb)
+/* Return false if a subflow has been reset, else return true */
+bool mptcp_incoming_options(struct sock *sk, struct sk_buff *skb)
{
struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
struct mptcp_sock *msk = mptcp_sk(subflow->conn);
@@ -1053,12 +1054,16 @@ void mptcp_incoming_options(struct sock *sk, struct sk_buff *skb)
__mptcp_check_push(subflow->conn, sk);
__mptcp_data_acked(subflow->conn);
mptcp_data_unlock(subflow->conn);
- return;
+ return true;
}
mptcp_get_options(sk, skb, &mp_opt);
+
+ /* The subflow can be in close state only if check_fully_established()
+ * just sent a reset. If so, tell the caller to ignore the current packet.
+ */
if (!check_fully_established(msk, sk, subflow, skb, &mp_opt))
- return;
+ return sk->sk_state != TCP_CLOSE;
if (mp_opt.fastclose &&
msk->local_key == mp_opt.rcvr_key) {
@@ -1100,7 +1105,7 @@ void mptcp_incoming_options(struct sock *sk, struct sk_buff *skb)
}
if (!mp_opt.dss)
- return;
+ return true;
/* we can't wait for recvmsg() to update the ack_seq, otherwise
* monodirectional flows will stuck
@@ -1119,12 +1124,12 @@ void mptcp_incoming_options(struct sock *sk, struct sk_buff *skb)
schedule_work(&msk->work))
sock_hold(subflow->conn);
- return;
+ return true;
}
mpext = skb_ext_add(skb, SKB_EXT_MPTCP);
if (!mpext)
- return;
+ return true;
memset(mpext, 0, sizeof(*mpext));
@@ -1153,6 +1158,8 @@ void mptcp_incoming_options(struct sock *sk, struct sk_buff *skb)
if (mpext->csum_reqd)
mpext->csum = mp_opt.csum;
}
+
+ return true;
}
static void mptcp_set_rwin(const struct tcp_sock *tp)
diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
index 7a5afa8c6866..a88924947815 100644
--- a/net/mptcp/protocol.c
+++ b/net/mptcp/protocol.c
@@ -474,7 +474,7 @@ static void mptcp_cleanup_rbuf(struct mptcp_sock *msk)
bool cleanup, rx_empty;
cleanup = (space > 0) && (space >= (old_space << 1));
- rx_empty = !atomic_read(&sk->sk_rmem_alloc);
+ rx_empty = !__mptcp_rmem(sk);
mptcp_for_each_subflow(msk, subflow) {
struct sock *ssk = mptcp_subflow_tcp_sock(subflow);
@@ -720,8 +720,10 @@ void mptcp_data_ready(struct sock *sk, struct sock *ssk)
sk_rbuf = ssk_rbuf;
/* over limit? can't append more skbs to msk, Also, no need to wake-up*/
- if (atomic_read(&sk->sk_rmem_alloc) > sk_rbuf)
+ if (__mptcp_rmem(sk) > sk_rbuf) {
+ MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_RCVPRUNED);
return;
+ }
/* Wake-up the reader only for in-sequence data */
mptcp_data_lock(sk);
@@ -1754,7 +1756,7 @@ static int __mptcp_recvmsg_mskq(struct mptcp_sock *msk,
if (!(flags & MSG_PEEK)) {
/* we will bulk release the skb memory later */
skb->destructor = NULL;
- msk->rmem_released += skb->truesize;
+ WRITE_ONCE(msk->rmem_released, msk->rmem_released + skb->truesize);
__skb_unlink(skb, &msk->receive_queue);
__kfree_skb(skb);
}
@@ -1873,7 +1875,7 @@ static void __mptcp_update_rmem(struct sock *sk)
atomic_sub(msk->rmem_released, &sk->sk_rmem_alloc);
sk_mem_uncharge(sk, msk->rmem_released);
- msk->rmem_released = 0;
+ WRITE_ONCE(msk->rmem_released, 0);
}
static void __mptcp_splice_receive_queue(struct sock *sk)
@@ -2380,7 +2382,7 @@ static int __mptcp_init_sock(struct sock *sk)
msk->out_of_order_queue = RB_ROOT;
msk->first_pending = NULL;
msk->wmem_reserved = 0;
- msk->rmem_released = 0;
+ WRITE_ONCE(msk->rmem_released, 0);
msk->tx_pending_data = 0;
msk->first = NULL;
diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h
index 426ed80fe72f..0f0c026c5f8b 100644
--- a/net/mptcp/protocol.h
+++ b/net/mptcp/protocol.h
@@ -296,9 +296,17 @@ static inline struct mptcp_sock *mptcp_sk(const struct sock *sk)
return (struct mptcp_sock *)sk;
}
+/* the msk socket don't use the backlog, also account for the bulk
+ * free memory
+ */
+static inline int __mptcp_rmem(const struct sock *sk)
+{
+ return atomic_read(&sk->sk_rmem_alloc) - READ_ONCE(mptcp_sk(sk)->rmem_released);
+}
+
static inline int __mptcp_space(const struct sock *sk)
{
- return tcp_space(sk) + READ_ONCE(mptcp_sk(sk)->rmem_released);
+ return tcp_win_from_space(sk, READ_ONCE(sk->sk_rcvbuf) - __mptcp_rmem(sk));
}
static inline struct mptcp_data_frag *mptcp_send_head(const struct sock *sk)
diff --git a/net/mptcp/sockopt.c b/net/mptcp/sockopt.c
index 092d1f635d27..8c03afac5ca0 100644
--- a/net/mptcp/sockopt.c
+++ b/net/mptcp/sockopt.c
@@ -157,19 +157,7 @@ static int mptcp_setsockopt_sol_socket_tstamp(struct mptcp_sock *msk, int optnam
struct sock *ssk = mptcp_subflow_tcp_sock(subflow);
bool slow = lock_sock_fast(ssk);
- switch (optname) {
- case SO_TIMESTAMP_OLD:
- case SO_TIMESTAMP_NEW:
- case SO_TIMESTAMPNS_OLD:
- case SO_TIMESTAMPNS_NEW:
- sock_set_timestamp(sk, optname, !!val);
- break;
- case SO_TIMESTAMPING_NEW:
- case SO_TIMESTAMPING_OLD:
- sock_set_timestamping(sk, optname, val);
- break;
- }
-
+ sock_set_timestamp(sk, optname, !!val);
unlock_sock_fast(ssk, slow);
}
@@ -178,7 +166,8 @@ static int mptcp_setsockopt_sol_socket_tstamp(struct mptcp_sock *msk, int optnam
}
static int mptcp_setsockopt_sol_socket_int(struct mptcp_sock *msk, int optname,
- sockptr_t optval, unsigned int optlen)
+ sockptr_t optval,
+ unsigned int optlen)
{
int val, ret;
@@ -205,14 +194,56 @@ static int mptcp_setsockopt_sol_socket_int(struct mptcp_sock *msk, int optname,
case SO_TIMESTAMP_NEW:
case SO_TIMESTAMPNS_OLD:
case SO_TIMESTAMPNS_NEW:
- case SO_TIMESTAMPING_OLD:
- case SO_TIMESTAMPING_NEW:
return mptcp_setsockopt_sol_socket_tstamp(msk, optname, val);
}
return -ENOPROTOOPT;
}
+static int mptcp_setsockopt_sol_socket_timestamping(struct mptcp_sock *msk,
+ int optname,
+ sockptr_t optval,
+ unsigned int optlen)
+{
+ struct mptcp_subflow_context *subflow;
+ struct sock *sk = (struct sock *)msk;
+ struct so_timestamping timestamping;
+ int ret;
+
+ if (optlen == sizeof(timestamping)) {
+ if (copy_from_sockptr(&timestamping, optval,
+ sizeof(timestamping)))
+ return -EFAULT;
+ } else if (optlen == sizeof(int)) {
+ memset(&timestamping, 0, sizeof(timestamping));
+
+ if (copy_from_sockptr(&timestamping.flags, optval, sizeof(int)))
+ return -EFAULT;
+ } else {
+ return -EINVAL;
+ }
+
+ ret = sock_setsockopt(sk->sk_socket, SOL_SOCKET, optname,
+ KERNEL_SOCKPTR(&timestamping),
+ sizeof(timestamping));
+ if (ret)
+ return ret;
+
+ lock_sock(sk);
+
+ mptcp_for_each_subflow(msk, subflow) {
+ struct sock *ssk = mptcp_subflow_tcp_sock(subflow);
+ bool slow = lock_sock_fast(ssk);
+
+ sock_set_timestamping(sk, optname, timestamping);
+ unlock_sock_fast(ssk, slow);
+ }
+
+ release_sock(sk);
+
+ return 0;
+}
+
static int mptcp_setsockopt_sol_socket_linger(struct mptcp_sock *msk, sockptr_t optval,
unsigned int optlen)
{
@@ -299,9 +330,12 @@ static int mptcp_setsockopt_sol_socket(struct mptcp_sock *msk, int optname,
case SO_TIMESTAMP_NEW:
case SO_TIMESTAMPNS_OLD:
case SO_TIMESTAMPNS_NEW:
+ return mptcp_setsockopt_sol_socket_int(msk, optname, optval,
+ optlen);
case SO_TIMESTAMPING_OLD:
case SO_TIMESTAMPING_NEW:
- return mptcp_setsockopt_sol_socket_int(msk, optname, optval, optlen);
+ return mptcp_setsockopt_sol_socket_timestamping(msk, optname,
+ optval, optlen);
case SO_LINGER:
return mptcp_setsockopt_sol_socket_linger(msk, optval, optlen);
case SO_RCVLOWAT:
diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c
index 66d0b1893d26..966f777d35ce 100644
--- a/net/mptcp/subflow.c
+++ b/net/mptcp/subflow.c
@@ -214,11 +214,6 @@ again:
ntohs(inet_sk(sk_listener)->inet_sport),
ntohs(inet_sk((struct sock *)subflow_req->msk)->inet_sport));
if (!mptcp_pm_sport_in_anno_list(subflow_req->msk, sk_listener)) {
- sock_put((struct sock *)subflow_req->msk);
- mptcp_token_destroy_request(req);
- tcp_request_sock_ops.destructor(req);
- subflow_req->msk = NULL;
- subflow_req->mp_join = 0;
SUBFLOW_REQ_INC_STATS(req, MPTCP_MIB_MISMATCHPORTSYNRX);
return -EPERM;
}
@@ -230,6 +225,8 @@ again:
if (unlikely(req->syncookie)) {
if (mptcp_can_accept_new_subflow(subflow_req->msk))
subflow_init_req_cookie_join_save(subflow_req, skb);
+ else
+ return -EPERM;
}
pr_debug("token=%u, remote_nonce=%u msk=%p", subflow_req->token,
@@ -269,9 +266,7 @@ int mptcp_subflow_init_cookie_req(struct request_sock *req,
if (!mptcp_token_join_cookie_init_state(subflow_req, skb))
return -EINVAL;
- if (mptcp_can_accept_new_subflow(subflow_req->msk))
- subflow_req->mp_join = 1;
-
+ subflow_req->mp_join = 1;
subflow_req->ssn_offset = TCP_SKB_CB(skb)->seq - 1;
}
diff --git a/net/mptcp/syncookies.c b/net/mptcp/syncookies.c
index abe0fd099746..37127781aee9 100644
--- a/net/mptcp/syncookies.c
+++ b/net/mptcp/syncookies.c
@@ -37,7 +37,21 @@ static spinlock_t join_entry_locks[COOKIE_JOIN_SLOTS] __cacheline_aligned_in_smp
static u32 mptcp_join_entry_hash(struct sk_buff *skb, struct net *net)
{
- u32 i = skb_get_hash(skb) ^ net_hash_mix(net);
+ static u32 mptcp_join_hash_secret __read_mostly;
+ struct tcphdr *th = tcp_hdr(skb);
+ u32 seq, i;
+
+ net_get_random_once(&mptcp_join_hash_secret,
+ sizeof(mptcp_join_hash_secret));
+
+ if (th->syn)
+ seq = TCP_SKB_CB(skb)->seq;
+ else
+ seq = TCP_SKB_CB(skb)->seq - 1;
+
+ i = jhash_3words(seq, net_hash_mix(net),
+ (__force __u32)th->source << 16 | (__force __u32)th->dest,
+ mptcp_join_hash_secret);
return i % ARRAY_SIZE(join_entries);
}
diff --git a/net/ncsi/Kconfig b/net/ncsi/Kconfig
index 93309081f5a4..ea1dd32b6b1f 100644
--- a/net/ncsi/Kconfig
+++ b/net/ncsi/Kconfig
@@ -17,3 +17,9 @@ config NCSI_OEM_CMD_GET_MAC
help
This allows to get MAC address from NCSI firmware and set them back to
controller.
+config NCSI_OEM_CMD_KEEP_PHY
+ bool "Keep PHY Link up"
+ depends on NET_NCSI
+ help
+ This allows to keep PHY link up and prevents any channel resets during
+ the host load.
diff --git a/net/ncsi/internal.h b/net/ncsi/internal.h
index cbbb0de4750a..0b6cfd3b31e0 100644
--- a/net/ncsi/internal.h
+++ b/net/ncsi/internal.h
@@ -78,6 +78,9 @@ enum {
/* OEM Vendor Manufacture ID */
#define NCSI_OEM_MFR_MLX_ID 0x8119
#define NCSI_OEM_MFR_BCM_ID 0x113d
+#define NCSI_OEM_MFR_INTEL_ID 0x157
+/* Intel specific OEM command */
+#define NCSI_OEM_INTEL_CMD_KEEP_PHY 0x20 /* CMD ID for Keep PHY up */
/* Broadcom specific OEM Command */
#define NCSI_OEM_BCM_CMD_GMA 0x01 /* CMD ID for Get MAC */
/* Mellanox specific OEM Command */
@@ -86,6 +89,7 @@ enum {
#define NCSI_OEM_MLX_CMD_SMAF 0x01 /* CMD ID for Set MC Affinity */
#define NCSI_OEM_MLX_CMD_SMAF_PARAM 0x07 /* Parameter for SMAF */
/* OEM Command payload lengths*/
+#define NCSI_OEM_INTEL_CMD_KEEP_PHY_LEN 7
#define NCSI_OEM_BCM_CMD_GMA_LEN 12
#define NCSI_OEM_MLX_CMD_GMA_LEN 8
#define NCSI_OEM_MLX_CMD_SMAF_LEN 60
@@ -271,6 +275,7 @@ enum {
ncsi_dev_state_probe_mlx_gma,
ncsi_dev_state_probe_mlx_smaf,
ncsi_dev_state_probe_cis,
+ ncsi_dev_state_probe_keep_phy,
ncsi_dev_state_probe_gvi,
ncsi_dev_state_probe_gc,
ncsi_dev_state_probe_gls,
diff --git a/net/ncsi/ncsi-manage.c b/net/ncsi/ncsi-manage.c
index ca04b6df1341..89c7742cd72e 100644
--- a/net/ncsi/ncsi-manage.c
+++ b/net/ncsi/ncsi-manage.c
@@ -689,6 +689,35 @@ static int set_one_vid(struct ncsi_dev_priv *ndp, struct ncsi_channel *nc,
return 0;
}
+#if IS_ENABLED(CONFIG_NCSI_OEM_CMD_KEEP_PHY)
+
+static int ncsi_oem_keep_phy_intel(struct ncsi_cmd_arg *nca)
+{
+ unsigned char data[NCSI_OEM_INTEL_CMD_KEEP_PHY_LEN];
+ int ret = 0;
+
+ nca->payload = NCSI_OEM_INTEL_CMD_KEEP_PHY_LEN;
+
+ memset(data, 0, NCSI_OEM_INTEL_CMD_KEEP_PHY_LEN);
+ *(unsigned int *)data = ntohl((__force __be32)NCSI_OEM_MFR_INTEL_ID);
+
+ data[4] = NCSI_OEM_INTEL_CMD_KEEP_PHY;
+
+ /* PHY Link up attribute */
+ data[6] = 0x1;
+
+ nca->data = data;
+
+ ret = ncsi_xmit_cmd(nca);
+ if (ret)
+ netdev_err(nca->ndp->ndev.dev,
+ "NCSI: Failed to transmit cmd 0x%x during configure\n",
+ nca->type);
+ return ret;
+}
+
+#endif
+
#if IS_ENABLED(CONFIG_NCSI_OEM_CMD_GET_MAC)
/* NCSI OEM Command APIs */
@@ -700,7 +729,7 @@ static int ncsi_oem_gma_handler_bcm(struct ncsi_cmd_arg *nca)
nca->payload = NCSI_OEM_BCM_CMD_GMA_LEN;
memset(data, 0, NCSI_OEM_BCM_CMD_GMA_LEN);
- *(unsigned int *)data = ntohl(NCSI_OEM_MFR_BCM_ID);
+ *(unsigned int *)data = ntohl((__force __be32)NCSI_OEM_MFR_BCM_ID);
data[5] = NCSI_OEM_BCM_CMD_GMA;
nca->data = data;
@@ -724,7 +753,7 @@ static int ncsi_oem_gma_handler_mlx(struct ncsi_cmd_arg *nca)
nca->payload = NCSI_OEM_MLX_CMD_GMA_LEN;
memset(&u, 0, sizeof(u));
- u.data_u32[0] = ntohl(NCSI_OEM_MFR_MLX_ID);
+ u.data_u32[0] = ntohl((__force __be32)NCSI_OEM_MFR_MLX_ID);
u.data_u8[5] = NCSI_OEM_MLX_CMD_GMA;
u.data_u8[6] = NCSI_OEM_MLX_CMD_GMA_PARAM;
@@ -747,7 +776,7 @@ static int ncsi_oem_smaf_mlx(struct ncsi_cmd_arg *nca)
int ret = 0;
memset(&u, 0, sizeof(u));
- u.data_u32[0] = ntohl(NCSI_OEM_MFR_MLX_ID);
+ u.data_u32[0] = ntohl((__force __be32)NCSI_OEM_MFR_MLX_ID);
u.data_u8[5] = NCSI_OEM_MLX_CMD_SMAF;
u.data_u8[6] = NCSI_OEM_MLX_CMD_SMAF_PARAM;
memcpy(&u.data_u8[MLX_SMAF_MAC_ADDR_OFFSET],
@@ -1392,7 +1421,23 @@ static void ncsi_probe_channel(struct ncsi_dev_priv *ndp)
}
nd->state = ncsi_dev_state_probe_gvi;
+ if (IS_ENABLED(CONFIG_NCSI_OEM_CMD_KEEP_PHY))
+ nd->state = ncsi_dev_state_probe_keep_phy;
+ break;
+#if IS_ENABLED(CONFIG_NCSI_OEM_CMD_KEEP_PHY)
+ case ncsi_dev_state_probe_keep_phy:
+ ndp->pending_req_num = 1;
+
+ nca.type = NCSI_PKT_CMD_OEM;
+ nca.package = ndp->active_package->id;
+ nca.channel = 0;
+ ret = ncsi_oem_keep_phy_intel(&nca);
+ if (ret)
+ goto error;
+
+ nd->state = ncsi_dev_state_probe_gvi;
break;
+#endif /* CONFIG_NCSI_OEM_CMD_KEEP_PHY */
case ncsi_dev_state_probe_gvi:
case ncsi_dev_state_probe_gc:
case ncsi_dev_state_probe_gls:
diff --git a/net/ncsi/ncsi-rsp.c b/net/ncsi/ncsi-rsp.c
index 888ccc2d4e34..d48374894817 100644
--- a/net/ncsi/ncsi-rsp.c
+++ b/net/ncsi/ncsi-rsp.c
@@ -403,7 +403,7 @@ static int ncsi_rsp_handler_ev(struct ncsi_request *nr)
/* Update to VLAN mode */
cmd = (struct ncsi_cmd_ev_pkt *)skb_network_header(nr->cmd);
ncm->enable = 1;
- ncm->data[0] = ntohl(cmd->mode);
+ ncm->data[0] = ntohl((__force __be32)cmd->mode);
return 0;
}
@@ -699,12 +699,19 @@ static int ncsi_rsp_handler_oem_bcm(struct ncsi_request *nr)
return 0;
}
+/* Response handler for Intel card */
+static int ncsi_rsp_handler_oem_intel(struct ncsi_request *nr)
+{
+ return 0;
+}
+
static struct ncsi_rsp_oem_handler {
unsigned int mfr_id;
int (*handler)(struct ncsi_request *nr);
} ncsi_rsp_oem_handlers[] = {
{ NCSI_OEM_MFR_MLX_ID, ncsi_rsp_handler_oem_mlx },
- { NCSI_OEM_MFR_BCM_ID, ncsi_rsp_handler_oem_bcm }
+ { NCSI_OEM_MFR_BCM_ID, ncsi_rsp_handler_oem_bcm },
+ { NCSI_OEM_MFR_INTEL_ID, ncsi_rsp_handler_oem_intel }
};
/* Response handler for OEM command */
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index 96ba19fc8155..83c52df85870 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -149,7 +149,15 @@ static void nf_conntrack_all_lock(void)
spin_lock(&nf_conntrack_locks_all_lock);
- nf_conntrack_locks_all = true;
+ /* For nf_contrack_locks_all, only the latest time when another
+ * CPU will see an update is controlled, by the "release" of the
+ * spin_lock below.
+ * The earliest time is not controlled, an thus KCSAN could detect
+ * a race when nf_conntract_lock() reads the variable.
+ * WRITE_ONCE() is used to ensure the compiler will not
+ * optimize the write.
+ */
+ WRITE_ONCE(nf_conntrack_locks_all, true);
for (i = 0; i < CONNTRACK_LOCKS; i++) {
spin_lock(&nf_conntrack_locks[i]);
@@ -2457,7 +2465,6 @@ i_see_dead_people:
}
list_for_each_entry(net, net_exit_list, exit_list) {
- nf_conntrack_proto_pernet_fini(net);
nf_conntrack_ecache_pernet_fini(net);
nf_conntrack_expect_pernet_fini(net);
free_percpu(net->ct.stat);
diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index 4e1a9dba7077..e81af33b233b 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -218,6 +218,7 @@ static int ctnetlink_dump_helpinfo(struct sk_buff *skb,
if (!help)
return 0;
+ rcu_read_lock();
helper = rcu_dereference(help->helper);
if (!helper)
goto out;
@@ -233,9 +234,11 @@ static int ctnetlink_dump_helpinfo(struct sk_buff *skb,
nla_nest_end(skb, nest_helper);
out:
+ rcu_read_unlock();
return 0;
nla_put_failure:
+ rcu_read_unlock();
return -1;
}
diff --git a/net/netfilter/nf_conntrack_proto.c b/net/netfilter/nf_conntrack_proto.c
index 55647409a9be..8f7a9837349c 100644
--- a/net/netfilter/nf_conntrack_proto.c
+++ b/net/netfilter/nf_conntrack_proto.c
@@ -697,13 +697,6 @@ void nf_conntrack_proto_pernet_init(struct net *net)
#endif
}
-void nf_conntrack_proto_pernet_fini(struct net *net)
-{
-#ifdef CONFIG_NF_CT_PROTO_GRE
- nf_ct_gre_keymap_flush(net);
-#endif
-}
-
module_param_call(hashsize, nf_conntrack_set_hashsize, param_get_uint,
&nf_conntrack_htable_size, 0600);
diff --git a/net/netfilter/nf_conntrack_proto_gre.c b/net/netfilter/nf_conntrack_proto_gre.c
index db11e403d818..728eeb0aea87 100644
--- a/net/netfilter/nf_conntrack_proto_gre.c
+++ b/net/netfilter/nf_conntrack_proto_gre.c
@@ -55,19 +55,6 @@ static inline struct nf_gre_net *gre_pernet(struct net *net)
return &net->ct.nf_ct_proto.gre;
}
-void nf_ct_gre_keymap_flush(struct net *net)
-{
- struct nf_gre_net *net_gre = gre_pernet(net);
- struct nf_ct_gre_keymap *km, *tmp;
-
- spin_lock_bh(&keymap_lock);
- list_for_each_entry_safe(km, tmp, &net_gre->keymap_list, list) {
- list_del_rcu(&km->list);
- kfree_rcu(km, rcu);
- }
- spin_unlock_bh(&keymap_lock);
-}
-
static inline int gre_key_cmpfn(const struct nf_ct_gre_keymap *km,
const struct nf_conntrack_tuple *t)
{
diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c
index f7e8baf59b51..3259416f2ea4 100644
--- a/net/netfilter/nf_conntrack_proto_tcp.c
+++ b/net/netfilter/nf_conntrack_proto_tcp.c
@@ -823,6 +823,22 @@ static noinline bool tcp_new(struct nf_conn *ct, const struct sk_buff *skb,
return true;
}
+static bool tcp_can_early_drop(const struct nf_conn *ct)
+{
+ switch (ct->proto.tcp.state) {
+ case TCP_CONNTRACK_FIN_WAIT:
+ case TCP_CONNTRACK_LAST_ACK:
+ case TCP_CONNTRACK_TIME_WAIT:
+ case TCP_CONNTRACK_CLOSE:
+ case TCP_CONNTRACK_CLOSE_WAIT:
+ return true;
+ default:
+ break;
+ }
+
+ return false;
+}
+
/* Returns verdict for packet, or -1 for invalid. */
int nf_conntrack_tcp_packet(struct nf_conn *ct,
struct sk_buff *skb,
@@ -1030,10 +1046,30 @@ int nf_conntrack_tcp_packet(struct nf_conn *ct,
if (index != TCP_RST_SET)
break;
- if (ct->proto.tcp.seen[!dir].flags & IP_CT_TCP_FLAG_MAXACK_SET) {
+ /* If we are closing, tuple might have been re-used already.
+ * last_index, last_ack, and all other ct fields used for
+ * sequence/window validation are outdated in that case.
+ *
+ * As the conntrack can already be expired by GC under pressure,
+ * just skip validation checks.
+ */
+ if (tcp_can_early_drop(ct))
+ goto in_window;
+
+ /* td_maxack might be outdated if we let a SYN through earlier */
+ if ((ct->proto.tcp.seen[!dir].flags & IP_CT_TCP_FLAG_MAXACK_SET) &&
+ ct->proto.tcp.last_index != TCP_SYN_SET) {
u32 seq = ntohl(th->seq);
- if (before(seq, ct->proto.tcp.seen[!dir].td_maxack)) {
+ /* If we are not in established state and SEQ=0 this is most
+ * likely an answer to a SYN we let go through above (last_index
+ * can be updated due to out-of-order ACKs).
+ */
+ if (seq == 0 && !nf_conntrack_tcp_established(ct))
+ break;
+
+ if (before(seq, ct->proto.tcp.seen[!dir].td_maxack) &&
+ !tn->tcp_ignore_invalid_rst) {
/* Invalid RST */
spin_unlock_bh(&ct->lock);
nf_ct_l4proto_log_invalid(skb, ct, state, "invalid rst");
@@ -1134,6 +1170,16 @@ int nf_conntrack_tcp_packet(struct nf_conn *ct,
nf_ct_kill_acct(ct, ctinfo, skb);
return NF_ACCEPT;
}
+
+ if (index == TCP_SYN_SET && old_state == TCP_CONNTRACK_SYN_SENT) {
+ /* do not renew timeout on SYN retransmit.
+ *
+ * Else port reuse by client or NAT middlebox can keep
+ * entry alive indefinitely (including nat info).
+ */
+ return NF_ACCEPT;
+ }
+
/* ESTABLISHED without SEEN_REPLY, i.e. mid-connection
* pickup with loose=1. Avoid large ESTABLISHED timeout.
*/
@@ -1155,22 +1201,6 @@ int nf_conntrack_tcp_packet(struct nf_conn *ct,
return NF_ACCEPT;
}
-static bool tcp_can_early_drop(const struct nf_conn *ct)
-{
- switch (ct->proto.tcp.state) {
- case TCP_CONNTRACK_FIN_WAIT:
- case TCP_CONNTRACK_LAST_ACK:
- case TCP_CONNTRACK_TIME_WAIT:
- case TCP_CONNTRACK_CLOSE:
- case TCP_CONNTRACK_CLOSE_WAIT:
- return true;
- default:
- break;
- }
-
- return false;
-}
-
#if IS_ENABLED(CONFIG_NF_CT_NETLINK)
#include <linux/netfilter/nfnetlink.h>
@@ -1437,6 +1467,9 @@ void nf_conntrack_tcp_init_net(struct net *net)
*/
tn->tcp_be_liberal = 0;
+ /* If it's non-zero, we turn off RST sequence number check */
+ tn->tcp_ignore_invalid_rst = 0;
+
/* Max number of the retransmitted packets without receiving an (acceptable)
* ACK from the destination. If this number is reached, a shorter timer
* will be started.
diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c
index f57a951c9b5e..214d9f9e499b 100644
--- a/net/netfilter/nf_conntrack_standalone.c
+++ b/net/netfilter/nf_conntrack_standalone.c
@@ -579,6 +579,7 @@ enum nf_ct_sysctl_index {
#endif
NF_SYSCTL_CT_PROTO_TCP_LOOSE,
NF_SYSCTL_CT_PROTO_TCP_LIBERAL,
+ NF_SYSCTL_CT_PROTO_TCP_IGNORE_INVALID_RST,
NF_SYSCTL_CT_PROTO_TCP_MAX_RETRANS,
NF_SYSCTL_CT_PROTO_TIMEOUT_UDP,
NF_SYSCTL_CT_PROTO_TIMEOUT_UDP_STREAM,
@@ -798,6 +799,14 @@ static struct ctl_table nf_ct_sysctl_table[] = {
.extra1 = SYSCTL_ZERO,
.extra2 = SYSCTL_ONE,
},
+ [NF_SYSCTL_CT_PROTO_TCP_IGNORE_INVALID_RST] = {
+ .procname = "nf_conntrack_tcp_ignore_invalid_rst",
+ .maxlen = sizeof(u8),
+ .mode = 0644,
+ .proc_handler = proc_dou8vec_minmax,
+ .extra1 = SYSCTL_ZERO,
+ .extra2 = SYSCTL_ONE,
+ },
[NF_SYSCTL_CT_PROTO_TCP_MAX_RETRANS] = {
.procname = "nf_conntrack_tcp_max_retrans",
.maxlen = sizeof(u8),
@@ -1004,6 +1013,7 @@ static void nf_conntrack_standalone_init_tcp_sysctl(struct net *net,
XASSIGN(LOOSE, &tn->tcp_loose);
XASSIGN(LIBERAL, &tn->tcp_be_liberal);
XASSIGN(MAX_RETRANS, &tn->tcp_max_retrans);
+ XASSIGN(IGNORE_INVALID_RST, &tn->tcp_ignore_invalid_rst);
#undef XASSIGN
#if IS_ENABLED(CONFIG_NF_FLOW_TABLE)
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index 390d4466567f..de182d1f7c4e 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -3446,7 +3446,8 @@ static int nf_tables_newrule(struct sk_buff *skb, const struct nfnl_info *info,
return 0;
err_destroy_flow_rule:
- nft_flow_rule_destroy(flow);
+ if (flow)
+ nft_flow_rule_destroy(flow);
err_release_rule:
nf_tables_rule_release(&ctx, rule);
err_release_expr:
diff --git a/net/netfilter/nft_last.c b/net/netfilter/nft_last.c
index 913ac45167f2..8088b99f2ee3 100644
--- a/net/netfilter/nft_last.c
+++ b/net/netfilter/nft_last.c
@@ -23,15 +23,21 @@ static int nft_last_init(const struct nft_ctx *ctx, const struct nft_expr *expr,
{
struct nft_last_priv *priv = nft_expr_priv(expr);
u64 last_jiffies;
+ u32 last_set = 0;
int err;
- if (tb[NFTA_LAST_MSECS]) {
+ if (tb[NFTA_LAST_SET]) {
+ last_set = ntohl(nla_get_be32(tb[NFTA_LAST_SET]));
+ if (last_set == 1)
+ priv->last_set = 1;
+ }
+
+ if (last_set && tb[NFTA_LAST_MSECS]) {
err = nf_msecs_to_jiffies64(tb[NFTA_LAST_MSECS], &last_jiffies);
if (err < 0)
return err;
- priv->last_jiffies = jiffies + (unsigned long)last_jiffies;
- priv->last_set = 1;
+ priv->last_jiffies = jiffies - (unsigned long)last_jiffies;
}
return 0;
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index d233ac4a91b6..380f95aacdec 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -2471,7 +2471,7 @@ void netlink_ack(struct sk_buff *in_skb, struct nlmsghdr *nlh, int err,
nlmsg_end(skb, rep);
- netlink_unicast(in_skb->sk, skb, NETLINK_CB(in_skb).portid, MSG_DONTWAIT);
+ nlmsg_unicast(in_skb->sk, skb, NETLINK_CB(in_skb).portid);
}
EXPORT_SYMBOL(netlink_ack);
diff --git a/net/openvswitch/flow_table.c b/net/openvswitch/flow_table.c
index c89c8da99f1a..d4a2db0b2299 100644
--- a/net/openvswitch/flow_table.c
+++ b/net/openvswitch/flow_table.c
@@ -670,13 +670,13 @@ static bool cmp_key(const struct sw_flow_key *key1,
{
const long *cp1 = (const long *)((const u8 *)key1 + key_start);
const long *cp2 = (const long *)((const u8 *)key2 + key_start);
- long diffs = 0;
int i;
for (i = key_start; i < key_end; i += sizeof(long))
- diffs |= *cp1++ ^ *cp2++;
+ if (*cp1++ ^ *cp2++)
+ return false;
- return diffs == 0;
+ return true;
}
static bool flow_cmp_masked_key(const struct sw_flow *flow,
diff --git a/net/sched/act_ct.c b/net/sched/act_ct.c
index a656baa321fe..1b4b3514c94f 100644
--- a/net/sched/act_ct.c
+++ b/net/sched/act_ct.c
@@ -322,11 +322,22 @@ err_alloc:
static void tcf_ct_flow_table_cleanup_work(struct work_struct *work)
{
+ struct flow_block_cb *block_cb, *tmp_cb;
struct tcf_ct_flow_table *ct_ft;
+ struct flow_block *block;
ct_ft = container_of(to_rcu_work(work), struct tcf_ct_flow_table,
rwork);
nf_flow_table_free(&ct_ft->nf_ft);
+
+ /* Remove any remaining callbacks before cleanup */
+ block = &ct_ft->nf_ft.flow_block;
+ down_write(&ct_ft->nf_ft.flow_block_lock);
+ list_for_each_entry_safe(block_cb, tmp_cb, &block->cb_list, list) {
+ list_del(&block_cb->list);
+ flow_block_cb_free(block_cb);
+ }
+ up_write(&ct_ft->nf_ft.flow_block_lock);
kfree(ct_ft);
module_put(THIS_MODULE);
@@ -1026,7 +1037,8 @@ do_nat:
/* This will take care of sending queued events
* even if the connection is already confirmed.
*/
- nf_conntrack_confirm(skb);
+ if (nf_conntrack_confirm(skb) != NF_ACCEPT)
+ goto drop;
}
if (!skip_add)
diff --git a/net/sched/sch_taprio.c b/net/sched/sch_taprio.c
index 66fe2b82af9a..07b30d0601d7 100644
--- a/net/sched/sch_taprio.c
+++ b/net/sched/sch_taprio.c
@@ -564,7 +564,7 @@ static struct sk_buff *taprio_dequeue_soft(struct Qdisc *sch)
/* if there's no entry, it means that the schedule didn't
* start yet, so force all gates to be open, this is in
* accordance to IEEE 802.1Qbv-2015 Section 8.6.9.4.5
- * "AdminGateSates"
+ * "AdminGateStates"
*/
gate_mask = entry ? entry->gate_mask : TAPRIO_ALL_GATES_OPEN;
diff --git a/net/sctp/diag.c b/net/sctp/diag.c
index 493fc01e5d2b..760b367644c1 100644
--- a/net/sctp/diag.c
+++ b/net/sctp/diag.c
@@ -284,10 +284,8 @@ static int sctp_tsp_dump_one(struct sctp_transport *tsp, void *p)
goto out;
}
- err = netlink_unicast(net->diag_nlsk, rep, NETLINK_CB(in_skb).portid,
- MSG_DONTWAIT);
- if (err > 0)
- err = 0;
+ err = nlmsg_unicast(net->diag_nlsk, rep, NETLINK_CB(in_skb).portid);
+
out:
return err;
}
diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c
index 3c1fbf38f4f7..ec0f52567c16 100644
--- a/net/sctp/protocol.c
+++ b/net/sctp/protocol.c
@@ -398,7 +398,8 @@ static enum sctp_scope sctp_v4_scope(union sctp_addr *addr)
retval = SCTP_SCOPE_LINK;
} else if (ipv4_is_private_10(addr->v4.sin_addr.s_addr) ||
ipv4_is_private_172(addr->v4.sin_addr.s_addr) ||
- ipv4_is_private_192(addr->v4.sin_addr.s_addr)) {
+ ipv4_is_private_192(addr->v4.sin_addr.s_addr) ||
+ ipv4_is_test_198(addr->v4.sin_addr.s_addr)) {
retval = SCTP_SCOPE_PRIVATE;
} else {
retval = SCTP_SCOPE_GLOBAL;
diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c
index 6c08e5048d38..b8fa8f1a7277 100644
--- a/net/sctp/sm_make_chunk.c
+++ b/net/sctp/sm_make_chunk.c
@@ -1163,7 +1163,7 @@ struct sctp_chunk *sctp_make_heartbeat(const struct sctp_association *asoc,
const struct sctp_transport *transport,
__u32 probe_size)
{
- struct sctp_sender_hb_info hbinfo;
+ struct sctp_sender_hb_info hbinfo = {};
struct sctp_chunk *retval;
retval = sctp_make_control(asoc, SCTP_CID_HEARTBEAT, 0,
diff --git a/net/sctp/transport.c b/net/sctp/transport.c
index 5f23804f21c7..397a6244dd97 100644
--- a/net/sctp/transport.c
+++ b/net/sctp/transport.c
@@ -335,10 +335,13 @@ void sctp_transport_pl_recv(struct sctp_transport *t)
t->pathmtu = t->pl.pmtu + sctp_transport_pl_hlen(t);
sctp_assoc_sync_pmtu(t->asoc);
}
- } else if (t->pl.state == SCTP_PL_COMPLETE && ++t->pl.raise_count == 30) {
- /* Raise probe_size again after 30 * interval in Search Complete */
- t->pl.state = SCTP_PL_SEARCH; /* Search Complete -> Search */
- t->pl.probe_size += SCTP_PL_MIN_STEP;
+ } else if (t->pl.state == SCTP_PL_COMPLETE) {
+ t->pl.raise_count++;
+ if (t->pl.raise_count == 30) {
+ /* Raise probe_size again after 30 * interval in Search Complete */
+ t->pl.state = SCTP_PL_SEARCH; /* Search Complete -> Search */
+ t->pl.probe_size += SCTP_PL_MIN_STEP;
+ }
}
}
diff --git a/net/socket.c b/net/socket.c
index bd9233da2497..0b2dad3bdf7f 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -104,6 +104,7 @@
#include <linux/sockios.h>
#include <net/busy_poll.h>
#include <linux/errqueue.h>
+#include <linux/ptp_clock_kernel.h>
#ifdef CONFIG_NET_RX_BUSY_POLL
unsigned int sysctl_net_busy_read __read_mostly;
@@ -873,12 +874,18 @@ void __sock_recv_timestamp(struct msghdr *msg, struct sock *sk,
empty = 0;
if (shhwtstamps &&
(sk->sk_tsflags & SOF_TIMESTAMPING_RAW_HARDWARE) &&
- !skb_is_swtx_tstamp(skb, false_tstamp) &&
- ktime_to_timespec64_cond(shhwtstamps->hwtstamp, tss.ts + 2)) {
- empty = 0;
- if ((sk->sk_tsflags & SOF_TIMESTAMPING_OPT_PKTINFO) &&
- !skb_is_err_queue(skb))
- put_ts_pktinfo(msg, skb);
+ !skb_is_swtx_tstamp(skb, false_tstamp)) {
+ if (sk->sk_tsflags & SOF_TIMESTAMPING_BIND_PHC)
+ ptp_convert_timestamp(shhwtstamps, sk->sk_bind_phc);
+
+ if (ktime_to_timespec64_cond(shhwtstamps->hwtstamp,
+ tss.ts + 2)) {
+ empty = 0;
+
+ if ((sk->sk_tsflags & SOF_TIMESTAMPING_OPT_PKTINFO) &&
+ !skb_is_err_queue(skb))
+ put_ts_pktinfo(msg, skb);
+ }
}
if (!empty) {
if (sock_flag(sk, SOCK_TSTAMP_NEW))
diff --git a/net/unix/diag.c b/net/unix/diag.c
index 9ff64f9df1f3..7e7d7f45685a 100644
--- a/net/unix/diag.c
+++ b/net/unix/diag.c
@@ -295,10 +295,8 @@ again:
goto again;
}
- err = netlink_unicast(net->diag_nlsk, rep, NETLINK_CB(in_skb).portid,
- MSG_DONTWAIT);
- if (err > 0)
- err = 0;
+ err = nlmsg_unicast(net->diag_nlsk, rep, NETLINK_CB(in_skb).portid);
+
out:
if (sk)
sock_put(sk);