summaryrefslogtreecommitdiff
path: root/net
diff options
context:
space:
mode:
Diffstat (limited to 'net')
-rw-r--r--net/9p/client.c4
-rw-r--r--net/can/af_can.c34
-rw-r--r--net/can/j1939/main.c22
-rw-r--r--net/can/j1939/socket.c13
-rw-r--r--net/can/proc.c19
-rw-r--r--net/ceph/ceph_common.c17
-rw-r--r--net/core/rtnetlink.c2
-rw-r--r--net/core/skbuff.c2
-rw-r--r--net/dsa/Kconfig3
-rw-r--r--net/dsa/dsa_priv.h25
-rw-r--r--net/dsa/slave.c59
-rw-r--r--net/dsa/tag_mtk.c19
-rw-r--r--net/dsa/tag_rtl4_a.c12
-rw-r--r--net/ethtool/channels.c26
-rw-r--r--net/hsr/hsr_framereg.c9
-rw-r--r--net/hsr/hsr_framereg.h1
-rw-r--r--net/hsr/hsr_main.h1
-rw-r--r--net/ipv4/af_inet.c1
-rw-r--r--net/ipv4/cipso_ipv4.c13
-rw-r--r--net/ipv4/icmp.c5
-rw-r--r--net/ipv4/inetpeer.c21
-rw-r--r--net/ipv4/ip_tunnel.c5
-rw-r--r--net/ipv4/ip_vti.c6
-rw-r--r--net/ipv4/nexthop.c10
-rw-r--r--net/ipv4/tcp.c26
-rw-r--r--net/ipv4/udp_offload.c2
-rw-r--r--net/ipv6/af_inet6.c1
-rw-r--r--net/ipv6/calipso.c14
-rw-r--r--net/ipv6/icmp.c18
-rw-r--r--net/ipv6/ip6_gre.c16
-rw-r--r--net/ipv6/ip6_icmp.c12
-rw-r--r--net/ipv6/ip6_tunnel.c10
-rw-r--r--net/ipv6/ip6_vti.c6
-rw-r--r--net/ipv6/sit.c2
-rw-r--r--net/l2tp/l2tp_core.c41
-rw-r--r--net/l2tp/l2tp_core.h1
-rw-r--r--net/l2tp/l2tp_netlink.c6
-rw-r--r--net/mpls/mpls_gso.c3
-rw-r--r--net/mptcp/options.c23
-rw-r--r--net/mptcp/protocol.c223
-rw-r--r--net/mptcp/protocol.h14
-rw-r--r--net/mptcp/subflow.c20
-rw-r--r--net/netfilter/nf_conntrack_helper.c3
-rw-r--r--net/netfilter/nf_conntrack_proto_tcp.c6
-rw-r--r--net/netfilter/nf_nat_proto.c25
-rw-r--r--net/netfilter/nf_tables_api.c19
-rw-r--r--net/netfilter/x_tables.c6
-rw-r--r--net/netlabel/netlabel_cipso_v4.c3
-rw-r--r--net/psample/psample.c4
-rw-r--r--net/qrtr/qrtr.c6
-rw-r--r--net/qrtr/tun.c12
-rw-r--r--net/sched/cls_flower.c15
-rw-r--r--net/sched/sch_api.c8
-rw-r--r--net/sctp/tsnmap.c2
-rw-r--r--net/socket.c16
-rw-r--r--net/sunrpc/rpc_pipe.c1
-rw-r--r--net/sunrpc/svcsock.c35
-rw-r--r--net/sunrpc/xprtrdma/backchannel.c4
-rw-r--r--net/sunrpc/xprtrdma/frwr_ops.c12
-rw-r--r--net/sunrpc/xprtrdma/rpc_rdma.c67
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma_backchannel.c4
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma_transport.c6
-rw-r--r--net/sunrpc/xprtrdma/xprt_rdma.h15
-rw-r--r--net/sunrpc/xprtsock.c17
-rw-r--r--net/unix/af_unix.c5
65 files changed, 595 insertions, 463 deletions
diff --git a/net/9p/client.c b/net/9p/client.c
index 4f62f299da0c..0a9019da18f3 100644
--- a/net/9p/client.c
+++ b/net/9p/client.c
@@ -1623,10 +1623,6 @@ p9_client_read_once(struct p9_fid *fid, u64 offset, struct iov_iter *to,
}
p9_debug(P9_DEBUG_9P, "<<< RREAD count %d\n", count);
- if (!count) {
- p9_tag_remove(clnt, req);
- return 0;
- }
if (non_zc) {
int n = copy_to_iter(dataptr, count, to);
diff --git a/net/can/af_can.c b/net/can/af_can.c
index 837bb8af0ec3..cce2af10eb3e 100644
--- a/net/can/af_can.c
+++ b/net/can/af_can.c
@@ -304,8 +304,8 @@ static struct can_dev_rcv_lists *can_dev_rcv_lists_find(struct net *net,
struct net_device *dev)
{
if (dev) {
- struct can_ml_priv *ml_priv = dev->ml_priv;
- return &ml_priv->dev_rcv_lists;
+ struct can_ml_priv *can_ml = can_get_ml_priv(dev);
+ return &can_ml->dev_rcv_lists;
} else {
return net->can.rx_alldev_list;
}
@@ -790,25 +790,6 @@ void can_proto_unregister(const struct can_proto *cp)
}
EXPORT_SYMBOL(can_proto_unregister);
-/* af_can notifier to create/remove CAN netdevice specific structs */
-static int can_notifier(struct notifier_block *nb, unsigned long msg,
- void *ptr)
-{
- struct net_device *dev = netdev_notifier_info_to_dev(ptr);
-
- if (dev->type != ARPHRD_CAN)
- return NOTIFY_DONE;
-
- switch (msg) {
- case NETDEV_REGISTER:
- WARN(!dev->ml_priv,
- "No CAN mid layer private allocated, please fix your driver and use alloc_candev()!\n");
- break;
- }
-
- return NOTIFY_DONE;
-}
-
static int can_pernet_init(struct net *net)
{
spin_lock_init(&net->can.rcvlists_lock);
@@ -876,11 +857,6 @@ static const struct net_proto_family can_family_ops = {
.owner = THIS_MODULE,
};
-/* notifier block for netdevice event */
-static struct notifier_block can_netdev_notifier __read_mostly = {
- .notifier_call = can_notifier,
-};
-
static struct pernet_operations can_pernet_ops __read_mostly = {
.init = can_pernet_init,
.exit = can_pernet_exit,
@@ -911,17 +887,12 @@ static __init int can_init(void)
err = sock_register(&can_family_ops);
if (err)
goto out_sock;
- err = register_netdevice_notifier(&can_netdev_notifier);
- if (err)
- goto out_notifier;
dev_add_pack(&can_packet);
dev_add_pack(&canfd_packet);
return 0;
-out_notifier:
- sock_unregister(PF_CAN);
out_sock:
unregister_pernet_subsys(&can_pernet_ops);
out_pernet:
@@ -935,7 +906,6 @@ static __exit void can_exit(void)
/* protocol unregister */
dev_remove_pack(&canfd_packet);
dev_remove_pack(&can_packet);
- unregister_netdevice_notifier(&can_netdev_notifier);
sock_unregister(PF_CAN);
unregister_pernet_subsys(&can_pernet_ops);
diff --git a/net/can/j1939/main.c b/net/can/j1939/main.c
index bb914d8b4216..da3a7a7bcff2 100644
--- a/net/can/j1939/main.c
+++ b/net/can/j1939/main.c
@@ -140,9 +140,9 @@ static struct j1939_priv *j1939_priv_create(struct net_device *ndev)
static inline void j1939_priv_set(struct net_device *ndev,
struct j1939_priv *priv)
{
- struct can_ml_priv *can_ml_priv = ndev->ml_priv;
+ struct can_ml_priv *can_ml = can_get_ml_priv(ndev);
- can_ml_priv->j1939_priv = priv;
+ can_ml->j1939_priv = priv;
}
static void __j1939_priv_release(struct kref *kref)
@@ -211,12 +211,9 @@ static void __j1939_rx_release(struct kref *kref)
/* get pointer to priv without increasing ref counter */
static inline struct j1939_priv *j1939_ndev_to_priv(struct net_device *ndev)
{
- struct can_ml_priv *can_ml_priv = ndev->ml_priv;
+ struct can_ml_priv *can_ml = can_get_ml_priv(ndev);
- if (!can_ml_priv)
- return NULL;
-
- return can_ml_priv->j1939_priv;
+ return can_ml->j1939_priv;
}
static struct j1939_priv *j1939_priv_get_by_ndev_locked(struct net_device *ndev)
@@ -225,9 +222,6 @@ static struct j1939_priv *j1939_priv_get_by_ndev_locked(struct net_device *ndev)
lockdep_assert_held(&j1939_netdev_lock);
- if (ndev->type != ARPHRD_CAN)
- return NULL;
-
priv = j1939_ndev_to_priv(ndev);
if (priv)
j1939_priv_get(priv);
@@ -348,15 +342,16 @@ static int j1939_netdev_notify(struct notifier_block *nb,
unsigned long msg, void *data)
{
struct net_device *ndev = netdev_notifier_info_to_dev(data);
+ struct can_ml_priv *can_ml = can_get_ml_priv(ndev);
struct j1939_priv *priv;
+ if (!can_ml)
+ goto notify_done;
+
priv = j1939_priv_get_by_ndev(ndev);
if (!priv)
goto notify_done;
- if (ndev->type != ARPHRD_CAN)
- goto notify_put;
-
switch (msg) {
case NETDEV_DOWN:
j1939_cancel_active_session(priv, NULL);
@@ -365,7 +360,6 @@ static int j1939_netdev_notify(struct notifier_block *nb,
break;
}
-notify_put:
j1939_priv_put(priv);
notify_done:
diff --git a/net/can/j1939/socket.c b/net/can/j1939/socket.c
index f23966526a88..56aa66147d5a 100644
--- a/net/can/j1939/socket.c
+++ b/net/can/j1939/socket.c
@@ -12,6 +12,7 @@
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+#include <linux/can/can-ml.h>
#include <linux/can/core.h>
#include <linux/can/skb.h>
#include <linux/errqueue.h>
@@ -453,6 +454,7 @@ static int j1939_sk_bind(struct socket *sock, struct sockaddr *uaddr, int len)
j1939_jsk_del(priv, jsk);
j1939_local_ecu_put(priv, jsk->addr.src_name, jsk->addr.sa);
} else {
+ struct can_ml_priv *can_ml;
struct net_device *ndev;
ndev = dev_get_by_index(net, addr->can_ifindex);
@@ -461,15 +463,8 @@ static int j1939_sk_bind(struct socket *sock, struct sockaddr *uaddr, int len)
goto out_release_sock;
}
- if (ndev->type != ARPHRD_CAN) {
- dev_put(ndev);
- ret = -ENODEV;
- goto out_release_sock;
- }
-
- if (!ndev->ml_priv) {
- netdev_warn_once(ndev,
- "No CAN mid layer private allocated, please fix your driver and use alloc_candev()!\n");
+ can_ml = can_get_ml_priv(ndev);
+ if (!can_ml) {
dev_put(ndev);
ret = -ENODEV;
goto out_release_sock;
diff --git a/net/can/proc.c b/net/can/proc.c
index 5ea8695f507e..b15760b5c1cc 100644
--- a/net/can/proc.c
+++ b/net/can/proc.c
@@ -322,8 +322,11 @@ static int can_rcvlist_proc_show(struct seq_file *m, void *v)
/* receive list for registered CAN devices */
for_each_netdev_rcu(net, dev) {
- if (dev->type == ARPHRD_CAN && dev->ml_priv)
- can_rcvlist_proc_show_one(m, idx, dev, dev->ml_priv);
+ struct can_ml_priv *can_ml = can_get_ml_priv(dev);
+
+ if (can_ml)
+ can_rcvlist_proc_show_one(m, idx, dev,
+ &can_ml->dev_rcv_lists);
}
rcu_read_unlock();
@@ -375,8 +378,10 @@ static int can_rcvlist_sff_proc_show(struct seq_file *m, void *v)
/* sff receive list for registered CAN devices */
for_each_netdev_rcu(net, dev) {
- if (dev->type == ARPHRD_CAN && dev->ml_priv) {
- dev_rcv_lists = dev->ml_priv;
+ struct can_ml_priv *can_ml = can_get_ml_priv(dev);
+
+ if (can_ml) {
+ dev_rcv_lists = &can_ml->dev_rcv_lists;
can_rcvlist_proc_show_array(m, dev, dev_rcv_lists->rx_sff,
ARRAY_SIZE(dev_rcv_lists->rx_sff));
}
@@ -406,8 +411,10 @@ static int can_rcvlist_eff_proc_show(struct seq_file *m, void *v)
/* eff receive list for registered CAN devices */
for_each_netdev_rcu(net, dev) {
- if (dev->type == ARPHRD_CAN && dev->ml_priv) {
- dev_rcv_lists = dev->ml_priv;
+ struct can_ml_priv *can_ml = can_get_ml_priv(dev);
+
+ if (can_ml) {
+ dev_rcv_lists = &can_ml->dev_rcv_lists;
can_rcvlist_proc_show_array(m, dev, dev_rcv_lists->rx_eff,
ARRAY_SIZE(dev_rcv_lists->rx_eff));
}
diff --git a/net/ceph/ceph_common.c b/net/ceph/ceph_common.c
index 271287c5ec12..97d6ea763e32 100644
--- a/net/ceph/ceph_common.c
+++ b/net/ceph/ceph_common.c
@@ -252,7 +252,6 @@ static int parse_fsid(const char *str, struct ceph_fsid *fsid)
* ceph options
*/
enum {
- Opt_osdtimeout,
Opt_osdkeepalivetimeout,
Opt_mount_timeout,
Opt_osd_idle_ttl,
@@ -307,7 +306,8 @@ static const struct constant_table ceph_param_ms_mode[] = {
static const struct fs_parameter_spec ceph_parameters[] = {
fsparam_flag ("abort_on_full", Opt_abort_on_full),
- fsparam_flag_no ("cephx_require_signatures", Opt_cephx_require_signatures),
+ __fsparam (NULL, "cephx_require_signatures", Opt_cephx_require_signatures,
+ fs_param_neg_with_no|fs_param_deprecated, NULL),
fsparam_flag_no ("cephx_sign_messages", Opt_cephx_sign_messages),
fsparam_flag_no ("crc", Opt_crc),
fsparam_string ("crush_location", Opt_crush_location),
@@ -319,8 +319,6 @@ static const struct fs_parameter_spec ceph_parameters[] = {
fsparam_u32 ("osd_idle_ttl", Opt_osd_idle_ttl),
fsparam_u32 ("osd_request_timeout", Opt_osd_request_timeout),
fsparam_u32 ("osdkeepalive", Opt_osdkeepalivetimeout),
- __fsparam (fs_param_is_s32, "osdtimeout", Opt_osdtimeout,
- fs_param_deprecated, NULL),
fsparam_enum ("read_from_replica", Opt_read_from_replica,
ceph_param_read_from_replica),
fsparam_enum ("ms_mode", Opt_ms_mode,
@@ -552,9 +550,6 @@ int ceph_parse_param(struct fs_parameter *param, struct ceph_options *opt,
}
break;
- case Opt_osdtimeout:
- warn_plog(&log, "Ignoring osdtimeout");
- break;
case Opt_osdkeepalivetimeout:
/* 0 isn't well defined right now, reject it */
if (result.uint_32 < 1 || result.uint_32 > INT_MAX / 1000)
@@ -596,9 +591,9 @@ int ceph_parse_param(struct fs_parameter *param, struct ceph_options *opt,
break;
case Opt_cephx_require_signatures:
if (!result.negated)
- opt->flags &= ~CEPH_OPT_NOMSGAUTH;
+ warn_plog(&log, "Ignoring cephx_require_signatures");
else
- opt->flags |= CEPH_OPT_NOMSGAUTH;
+ warn_plog(&log, "Ignoring nocephx_require_signatures, use nocephx_sign_messages");
break;
case Opt_cephx_sign_messages:
if (!result.negated)
@@ -686,8 +681,6 @@ int ceph_print_client_options(struct seq_file *m, struct ceph_client *client,
seq_puts(m, "noshare,");
if (opt->flags & CEPH_OPT_NOCRC)
seq_puts(m, "nocrc,");
- if (opt->flags & CEPH_OPT_NOMSGAUTH)
- seq_puts(m, "nocephx_require_signatures,");
if (opt->flags & CEPH_OPT_NOMSGSIGN)
seq_puts(m, "nocephx_sign_messages,");
if ((opt->flags & CEPH_OPT_TCP_NODELAY) == 0)
@@ -756,7 +749,7 @@ struct ceph_client *ceph_create_client(struct ceph_options *opt, void *private)
client->supported_features = CEPH_FEATURES_SUPPORTED_DEFAULT;
client->required_features = CEPH_FEATURES_REQUIRED_DEFAULT;
- if (!ceph_test_opt(client, NOMSGAUTH))
+ if (!ceph_test_opt(client, NOMSGSIGN))
client->required_features |= CEPH_FEATURE_MSG_AUTH;
/* msgr */
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 0edc0b2baaa4..1bdcb33fb561 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -2147,7 +2147,7 @@ out:
out_err:
cb->args[1] = idx;
cb->args[0] = h;
- cb->seq = net->dev_base_seq;
+ cb->seq = tgt_net->dev_base_seq;
nl_dump_check_consistent(cb, nlmsg_hdr(skb));
if (netnsid >= 0)
put_net(tgt_net);
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 545a472273a5..c421c8f80925 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -3659,6 +3659,8 @@ unsigned int skb_find_text(struct sk_buff *skb, unsigned int from,
struct ts_state state;
unsigned int ret;
+ BUILD_BUG_ON(sizeof(struct skb_seq_state) > sizeof(state.cb));
+
config->get_next_block = skb_ts_get_next_block;
config->finish = skb_ts_finish;
diff --git a/net/dsa/Kconfig b/net/dsa/Kconfig
index a45572cfb71a..58b8fc82cd3c 100644
--- a/net/dsa/Kconfig
+++ b/net/dsa/Kconfig
@@ -9,6 +9,7 @@ menuconfig NET_DSA
tristate "Distributed Switch Architecture"
depends on HAVE_NET_DSA
depends on BRIDGE || BRIDGE=n
+ depends on HSR || HSR=n
select GRO_CELLS
select NET_SWITCHDEV
select PHYLINK
@@ -117,6 +118,8 @@ config NET_DSA_TAG_OCELOT
config NET_DSA_TAG_OCELOT_8021Q
tristate "Tag driver for Ocelot family of switches, using VLAN"
+ depends on MSCC_OCELOT_SWITCH_LIB || \
+ (MSCC_OCELOT_SWITCH_LIB=n && COMPILE_TEST)
select NET_DSA_TAG_8021Q
help
Say Y or M if you want to enable support for tagging frames with a
diff --git a/net/dsa/dsa_priv.h b/net/dsa/dsa_priv.h
index 2eeaa42f2e08..9d4b0e9b1aa1 100644
--- a/net/dsa/dsa_priv.h
+++ b/net/dsa/dsa_priv.h
@@ -230,8 +230,8 @@ int dsa_port_hsr_join(struct dsa_port *dp, struct net_device *hsr);
void dsa_port_hsr_leave(struct dsa_port *dp, struct net_device *hsr);
extern const struct phylink_mac_ops dsa_port_phylink_mac_ops;
-static inline bool dsa_port_offloads_netdev(struct dsa_port *dp,
- struct net_device *dev)
+static inline bool dsa_port_offloads_bridge_port(struct dsa_port *dp,
+ struct net_device *dev)
{
/* Switchdev offloading can be configured on: */
@@ -241,12 +241,6 @@ static inline bool dsa_port_offloads_netdev(struct dsa_port *dp,
*/
return true;
- if (dp->bridge_dev == dev)
- /* DSA ports connected to a bridge, and event was emitted
- * for the bridge.
- */
- return true;
-
if (dp->lag_dev == dev)
/* DSA ports connected to a bridge via a LAG */
return true;
@@ -254,14 +248,23 @@ static inline bool dsa_port_offloads_netdev(struct dsa_port *dp,
return false;
}
+static inline bool dsa_port_offloads_bridge(struct dsa_port *dp,
+ struct net_device *bridge_dev)
+{
+ /* DSA ports connected to a bridge, and event was emitted
+ * for the bridge.
+ */
+ return dp->bridge_dev == bridge_dev;
+}
+
/* Returns true if any port of this tree offloads the given net_device */
-static inline bool dsa_tree_offloads_netdev(struct dsa_switch_tree *dst,
- struct net_device *dev)
+static inline bool dsa_tree_offloads_bridge_port(struct dsa_switch_tree *dst,
+ struct net_device *dev)
{
struct dsa_port *dp;
list_for_each_entry(dp, &dst->ports, list)
- if (dsa_port_offloads_netdev(dp, dev))
+ if (dsa_port_offloads_bridge_port(dp, dev))
return true;
return false;
diff --git a/net/dsa/slave.c b/net/dsa/slave.c
index 491e3761b5f4..992fcab4b552 100644
--- a/net/dsa/slave.c
+++ b/net/dsa/slave.c
@@ -278,28 +278,43 @@ static int dsa_slave_port_attr_set(struct net_device *dev,
struct dsa_port *dp = dsa_slave_to_port(dev);
int ret;
- if (!dsa_port_offloads_netdev(dp, attr->orig_dev))
- return -EOPNOTSUPP;
-
switch (attr->id) {
case SWITCHDEV_ATTR_ID_PORT_STP_STATE:
+ if (!dsa_port_offloads_bridge_port(dp, attr->orig_dev))
+ return -EOPNOTSUPP;
+
ret = dsa_port_set_state(dp, attr->u.stp_state);
break;
case SWITCHDEV_ATTR_ID_BRIDGE_VLAN_FILTERING:
+ if (!dsa_port_offloads_bridge(dp, attr->orig_dev))
+ return -EOPNOTSUPP;
+
ret = dsa_port_vlan_filtering(dp, attr->u.vlan_filtering,
extack);
break;
case SWITCHDEV_ATTR_ID_BRIDGE_AGEING_TIME:
+ if (!dsa_port_offloads_bridge(dp, attr->orig_dev))
+ return -EOPNOTSUPP;
+
ret = dsa_port_ageing_time(dp, attr->u.ageing_time);
break;
case SWITCHDEV_ATTR_ID_PORT_PRE_BRIDGE_FLAGS:
+ if (!dsa_port_offloads_bridge_port(dp, attr->orig_dev))
+ return -EOPNOTSUPP;
+
ret = dsa_port_pre_bridge_flags(dp, attr->u.brport_flags,
extack);
break;
case SWITCHDEV_ATTR_ID_PORT_BRIDGE_FLAGS:
+ if (!dsa_port_offloads_bridge_port(dp, attr->orig_dev))
+ return -EOPNOTSUPP;
+
ret = dsa_port_bridge_flags(dp, attr->u.brport_flags, extack);
break;
case SWITCHDEV_ATTR_ID_BRIDGE_MROUTER:
+ if (!dsa_port_offloads_bridge(dp, attr->orig_dev))
+ return -EOPNOTSUPP;
+
ret = dsa_port_mrouter(dp->cpu_dp, attr->u.mrouter, extack);
break;
default:
@@ -341,9 +356,6 @@ static int dsa_slave_vlan_add(struct net_device *dev,
struct switchdev_obj_port_vlan vlan;
int err;
- if (!dsa_port_offloads_netdev(dp, obj->orig_dev))
- return -EOPNOTSUPP;
-
if (dsa_port_skip_vlan_configuration(dp)) {
NL_SET_ERR_MSG_MOD(extack, "skipping configuration of VLAN");
return 0;
@@ -391,27 +403,36 @@ static int dsa_slave_port_obj_add(struct net_device *dev,
switch (obj->id) {
case SWITCHDEV_OBJ_ID_PORT_MDB:
- if (!dsa_port_offloads_netdev(dp, obj->orig_dev))
+ if (!dsa_port_offloads_bridge_port(dp, obj->orig_dev))
return -EOPNOTSUPP;
+
err = dsa_port_mdb_add(dp, SWITCHDEV_OBJ_PORT_MDB(obj));
break;
case SWITCHDEV_OBJ_ID_HOST_MDB:
+ if (!dsa_port_offloads_bridge(dp, obj->orig_dev))
+ return -EOPNOTSUPP;
+
/* DSA can directly translate this to a normal MDB add,
* but on the CPU port.
*/
err = dsa_port_mdb_add(dp->cpu_dp, SWITCHDEV_OBJ_PORT_MDB(obj));
break;
case SWITCHDEV_OBJ_ID_PORT_VLAN:
+ if (!dsa_port_offloads_bridge_port(dp, obj->orig_dev))
+ return -EOPNOTSUPP;
+
err = dsa_slave_vlan_add(dev, obj, extack);
break;
case SWITCHDEV_OBJ_ID_MRP:
- if (!dsa_port_offloads_netdev(dp, obj->orig_dev))
+ if (!dsa_port_offloads_bridge(dp, obj->orig_dev))
return -EOPNOTSUPP;
+
err = dsa_port_mrp_add(dp, SWITCHDEV_OBJ_MRP(obj));
break;
case SWITCHDEV_OBJ_ID_RING_ROLE_MRP:
- if (!dsa_port_offloads_netdev(dp, obj->orig_dev))
+ if (!dsa_port_offloads_bridge(dp, obj->orig_dev))
return -EOPNOTSUPP;
+
err = dsa_port_mrp_add_ring_role(dp,
SWITCHDEV_OBJ_RING_ROLE_MRP(obj));
break;
@@ -431,9 +452,6 @@ static int dsa_slave_vlan_del(struct net_device *dev,
struct switchdev_obj_port_vlan *vlan;
int err;
- if (!dsa_port_offloads_netdev(dp, obj->orig_dev))
- return -EOPNOTSUPP;
-
if (dsa_port_skip_vlan_configuration(dp))
return 0;
@@ -459,27 +477,36 @@ static int dsa_slave_port_obj_del(struct net_device *dev,
switch (obj->id) {
case SWITCHDEV_OBJ_ID_PORT_MDB:
- if (!dsa_port_offloads_netdev(dp, obj->orig_dev))
+ if (!dsa_port_offloads_bridge_port(dp, obj->orig_dev))
return -EOPNOTSUPP;
+
err = dsa_port_mdb_del(dp, SWITCHDEV_OBJ_PORT_MDB(obj));
break;
case SWITCHDEV_OBJ_ID_HOST_MDB:
+ if (!dsa_port_offloads_bridge(dp, obj->orig_dev))
+ return -EOPNOTSUPP;
+
/* DSA can directly translate this to a normal MDB add,
* but on the CPU port.
*/
err = dsa_port_mdb_del(dp->cpu_dp, SWITCHDEV_OBJ_PORT_MDB(obj));
break;
case SWITCHDEV_OBJ_ID_PORT_VLAN:
+ if (!dsa_port_offloads_bridge_port(dp, obj->orig_dev))
+ return -EOPNOTSUPP;
+
err = dsa_slave_vlan_del(dev, obj);
break;
case SWITCHDEV_OBJ_ID_MRP:
- if (!dsa_port_offloads_netdev(dp, obj->orig_dev))
+ if (!dsa_port_offloads_bridge(dp, obj->orig_dev))
return -EOPNOTSUPP;
+
err = dsa_port_mrp_del(dp, SWITCHDEV_OBJ_MRP(obj));
break;
case SWITCHDEV_OBJ_ID_RING_ROLE_MRP:
- if (!dsa_port_offloads_netdev(dp, obj->orig_dev))
+ if (!dsa_port_offloads_bridge(dp, obj->orig_dev))
return -EOPNOTSUPP;
+
err = dsa_port_mrp_del_ring_role(dp,
SWITCHDEV_OBJ_RING_ROLE_MRP(obj));
break;
@@ -2298,7 +2325,7 @@ static int dsa_slave_switchdev_event(struct notifier_block *unused,
* other ports bridged with the LAG should be able to
* autonomously forward towards it.
*/
- if (dsa_tree_offloads_netdev(dp->ds->dst, dev))
+ if (dsa_tree_offloads_bridge_port(dp->ds->dst, dev))
return NOTIFY_DONE;
}
diff --git a/net/dsa/tag_mtk.c b/net/dsa/tag_mtk.c
index 38dcdded74c0..59748487664f 100644
--- a/net/dsa/tag_mtk.c
+++ b/net/dsa/tag_mtk.c
@@ -13,6 +13,7 @@
#define MTK_HDR_LEN 4
#define MTK_HDR_XMIT_UNTAGGED 0
#define MTK_HDR_XMIT_TAGGED_TPID_8100 1
+#define MTK_HDR_XMIT_TAGGED_TPID_88A8 2
#define MTK_HDR_RECV_SOURCE_PORT_MASK GENMASK(2, 0)
#define MTK_HDR_XMIT_DP_BIT_MASK GENMASK(5, 0)
#define MTK_HDR_XMIT_SA_DIS BIT(6)
@@ -21,8 +22,8 @@ static struct sk_buff *mtk_tag_xmit(struct sk_buff *skb,
struct net_device *dev)
{
struct dsa_port *dp = dsa_slave_to_port(dev);
+ u8 xmit_tpid;
u8 *mtk_tag;
- bool is_vlan_skb = true;
unsigned char *dest = eth_hdr(skb)->h_dest;
bool is_multicast_skb = is_multicast_ether_addr(dest) &&
!is_broadcast_ether_addr(dest);
@@ -33,10 +34,17 @@ static struct sk_buff *mtk_tag_xmit(struct sk_buff *skb,
* the both special and VLAN tag at the same time and then look up VLAN
* table with VID.
*/
- if (!skb_vlan_tagged(skb)) {
+ switch (skb->protocol) {
+ case htons(ETH_P_8021Q):
+ xmit_tpid = MTK_HDR_XMIT_TAGGED_TPID_8100;
+ break;
+ case htons(ETH_P_8021AD):
+ xmit_tpid = MTK_HDR_XMIT_TAGGED_TPID_88A8;
+ break;
+ default:
+ xmit_tpid = MTK_HDR_XMIT_UNTAGGED;
skb_push(skb, MTK_HDR_LEN);
memmove(skb->data, skb->data + MTK_HDR_LEN, 2 * ETH_ALEN);
- is_vlan_skb = false;
}
mtk_tag = skb->data + 2 * ETH_ALEN;
@@ -44,8 +52,7 @@ static struct sk_buff *mtk_tag_xmit(struct sk_buff *skb,
/* Mark tag attribute on special tag insertion to notify hardware
* whether that's a combined special tag with 802.1Q header.
*/
- mtk_tag[0] = is_vlan_skb ? MTK_HDR_XMIT_TAGGED_TPID_8100 :
- MTK_HDR_XMIT_UNTAGGED;
+ mtk_tag[0] = xmit_tpid;
mtk_tag[1] = (1 << dp->index) & MTK_HDR_XMIT_DP_BIT_MASK;
/* Disable SA learning for multicast frames */
@@ -53,7 +60,7 @@ static struct sk_buff *mtk_tag_xmit(struct sk_buff *skb,
mtk_tag[1] |= MTK_HDR_XMIT_SA_DIS;
/* Tag control information is kept for 802.1Q */
- if (!is_vlan_skb) {
+ if (xmit_tpid == MTK_HDR_XMIT_UNTAGGED) {
mtk_tag[2] = 0;
mtk_tag[3] = 0;
}
diff --git a/net/dsa/tag_rtl4_a.c b/net/dsa/tag_rtl4_a.c
index c17d39b4a1a0..e9176475bac8 100644
--- a/net/dsa/tag_rtl4_a.c
+++ b/net/dsa/tag_rtl4_a.c
@@ -35,14 +35,12 @@ static struct sk_buff *rtl4a_tag_xmit(struct sk_buff *skb,
struct net_device *dev)
{
struct dsa_port *dp = dsa_slave_to_port(dev);
+ __be16 *p;
u8 *tag;
- u16 *p;
u16 out;
/* Pad out to at least 60 bytes */
- if (unlikely(eth_skb_pad(skb)))
- return NULL;
- if (skb_cow_head(skb, RTL4_A_HDR_LEN) < 0)
+ if (unlikely(__skb_put_padto(skb, ETH_ZLEN, false)))
return NULL;
netdev_dbg(dev, "add realtek tag to package to port %d\n",
@@ -53,13 +51,13 @@ static struct sk_buff *rtl4a_tag_xmit(struct sk_buff *skb,
tag = skb->data + 2 * ETH_ALEN;
/* Set Ethertype */
- p = (u16 *)tag;
+ p = (__be16 *)tag;
*p = htons(RTL4_A_ETHERTYPE);
out = (RTL4_A_PROTOCOL_RTL8366RB << 12) | (2 << 8);
- /* The lower bits is the port numer */
+ /* The lower bits is the port number */
out |= (u8)dp->index;
- p = (u16 *)(tag + 2);
+ p = (__be16 *)(tag + 2);
*p = htons(out);
return skb;
diff --git a/net/ethtool/channels.c b/net/ethtool/channels.c
index 25a9e566ef5c..6a070dc8e4b0 100644
--- a/net/ethtool/channels.c
+++ b/net/ethtool/channels.c
@@ -116,10 +116,9 @@ int ethnl_set_channels(struct sk_buff *skb, struct genl_info *info)
struct ethtool_channels channels = {};
struct ethnl_req_info req_info = {};
struct nlattr **tb = info->attrs;
- const struct nlattr *err_attr;
+ u32 err_attr, max_rx_in_use = 0;
const struct ethtool_ops *ops;
struct net_device *dev;
- u32 max_rx_in_use = 0;
int ret;
ret = ethnl_parse_header_dev_get(&req_info,
@@ -157,34 +156,35 @@ int ethnl_set_channels(struct sk_buff *skb, struct genl_info *info)
/* ensure new channel counts are within limits */
if (channels.rx_count > channels.max_rx)
- err_attr = tb[ETHTOOL_A_CHANNELS_RX_COUNT];
+ err_attr = ETHTOOL_A_CHANNELS_RX_COUNT;
else if (channels.tx_count > channels.max_tx)
- err_attr = tb[ETHTOOL_A_CHANNELS_TX_COUNT];
+ err_attr = ETHTOOL_A_CHANNELS_TX_COUNT;
else if (channels.other_count > channels.max_other)
- err_attr = tb[ETHTOOL_A_CHANNELS_OTHER_COUNT];
+ err_attr = ETHTOOL_A_CHANNELS_OTHER_COUNT;
else if (channels.combined_count > channels.max_combined)
- err_attr = tb[ETHTOOL_A_CHANNELS_COMBINED_COUNT];
+ err_attr = ETHTOOL_A_CHANNELS_COMBINED_COUNT;
else
- err_attr = NULL;
+ err_attr = 0;
if (err_attr) {
ret = -EINVAL;
- NL_SET_ERR_MSG_ATTR(info->extack, err_attr,
+ NL_SET_ERR_MSG_ATTR(info->extack, tb[err_attr],
"requested channel count exceeds maximum");
goto out_ops;
}
/* ensure there is at least one RX and one TX channel */
if (!channels.combined_count && !channels.rx_count)
- err_attr = tb[ETHTOOL_A_CHANNELS_RX_COUNT];
+ err_attr = ETHTOOL_A_CHANNELS_RX_COUNT;
else if (!channels.combined_count && !channels.tx_count)
- err_attr = tb[ETHTOOL_A_CHANNELS_TX_COUNT];
+ err_attr = ETHTOOL_A_CHANNELS_TX_COUNT;
else
- err_attr = NULL;
+ err_attr = 0;
if (err_attr) {
if (mod_combined)
- err_attr = tb[ETHTOOL_A_CHANNELS_COMBINED_COUNT];
+ err_attr = ETHTOOL_A_CHANNELS_COMBINED_COUNT;
ret = -EINVAL;
- NL_SET_ERR_MSG_ATTR(info->extack, err_attr, "requested channel counts would result in no RX or TX channel being configured");
+ NL_SET_ERR_MSG_ATTR(info->extack, tb[err_attr],
+ "requested channel counts would result in no RX or TX channel being configured");
goto out_ops;
}
diff --git a/net/hsr/hsr_framereg.c b/net/hsr/hsr_framereg.c
index f9a8cc82ae2e..bb1351c38397 100644
--- a/net/hsr/hsr_framereg.c
+++ b/net/hsr/hsr_framereg.c
@@ -164,8 +164,10 @@ static struct hsr_node *hsr_add_node(struct hsr_priv *hsr,
* as initialization. (0 could trigger an spurious ring error warning).
*/
now = jiffies;
- for (i = 0; i < HSR_PT_PORTS; i++)
+ for (i = 0; i < HSR_PT_PORTS; i++) {
new_node->time_in[i] = now;
+ new_node->time_out[i] = now;
+ }
for (i = 0; i < HSR_PT_PORTS; i++)
new_node->seq_out[i] = seq_out;
@@ -413,9 +415,12 @@ void hsr_register_frame_in(struct hsr_node *node, struct hsr_port *port,
int hsr_register_frame_out(struct hsr_port *port, struct hsr_node *node,
u16 sequence_nr)
{
- if (seq_nr_before_or_eq(sequence_nr, node->seq_out[port->type]))
+ if (seq_nr_before_or_eq(sequence_nr, node->seq_out[port->type]) &&
+ time_is_after_jiffies(node->time_out[port->type] +
+ msecs_to_jiffies(HSR_ENTRY_FORGET_TIME)))
return 1;
+ node->time_out[port->type] = jiffies;
node->seq_out[port->type] = sequence_nr;
return 0;
}
diff --git a/net/hsr/hsr_framereg.h b/net/hsr/hsr_framereg.h
index 86b43f539f2c..d9628e7a5f05 100644
--- a/net/hsr/hsr_framereg.h
+++ b/net/hsr/hsr_framereg.h
@@ -75,6 +75,7 @@ struct hsr_node {
enum hsr_port_type addr_B_port;
unsigned long time_in[HSR_PT_PORTS];
bool time_in_stale[HSR_PT_PORTS];
+ unsigned long time_out[HSR_PT_PORTS];
/* if the node is a SAN */
bool san_a;
bool san_b;
diff --git a/net/hsr/hsr_main.h b/net/hsr/hsr_main.h
index a169808ee78a..8f264672b70b 100644
--- a/net/hsr/hsr_main.h
+++ b/net/hsr/hsr_main.h
@@ -22,6 +22,7 @@
#define HSR_LIFE_CHECK_INTERVAL 2000 /* ms */
#define HSR_NODE_FORGET_TIME 60000 /* ms */
#define HSR_ANNOUNCE_INTERVAL 100 /* ms */
+#define HSR_ENTRY_FORGET_TIME 400 /* ms */
/* By how much may slave1 and slave2 timestamps of latest received frame from
* each node differ before we notify of communication problem?
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index a02ce89b56b5..1355e6c0d567 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -1021,7 +1021,6 @@ static int inet_compat_ioctl(struct socket *sock, unsigned int cmd, unsigned lon
const struct proto_ops inet_stream_ops = {
.family = PF_INET,
- .flags = PROTO_CMSG_DATA_ONLY,
.owner = THIS_MODULE,
.release = inet_release,
.bind = inet_bind,
diff --git a/net/ipv4/cipso_ipv4.c b/net/ipv4/cipso_ipv4.c
index 471d33a0d095..bfaf327e9d12 100644
--- a/net/ipv4/cipso_ipv4.c
+++ b/net/ipv4/cipso_ipv4.c
@@ -519,16 +519,10 @@ int cipso_v4_doi_remove(u32 doi, struct netlbl_audit *audit_info)
ret_val = -ENOENT;
goto doi_remove_return;
}
- if (!refcount_dec_and_test(&doi_def->refcount)) {
- spin_unlock(&cipso_v4_doi_list_lock);
- ret_val = -EBUSY;
- goto doi_remove_return;
- }
list_del_rcu(&doi_def->list);
spin_unlock(&cipso_v4_doi_list_lock);
- cipso_v4_cache_invalidate();
- call_rcu(&doi_def->rcu, cipso_v4_doi_free_rcu);
+ cipso_v4_doi_putdef(doi_def);
ret_val = 0;
doi_remove_return:
@@ -585,9 +579,6 @@ void cipso_v4_doi_putdef(struct cipso_v4_doi *doi_def)
if (!refcount_dec_and_test(&doi_def->refcount))
return;
- spin_lock(&cipso_v4_doi_list_lock);
- list_del_rcu(&doi_def->list);
- spin_unlock(&cipso_v4_doi_list_lock);
cipso_v4_cache_invalidate();
call_rcu(&doi_def->rcu, cipso_v4_doi_free_rcu);
@@ -1162,7 +1153,7 @@ static void cipso_v4_gentag_hdr(const struct cipso_v4_doi *doi_def,
{
buf[0] = IPOPT_CIPSO;
buf[1] = CIPSO_V4_HDR_LEN + len;
- *(__be32 *)&buf[2] = htonl(doi_def->doi);
+ put_unaligned_be32(doi_def->doi, &buf[2]);
}
/**
diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c
index 396b492c804f..616e2dc1c8fa 100644
--- a/net/ipv4/icmp.c
+++ b/net/ipv4/icmp.c
@@ -775,13 +775,14 @@ EXPORT_SYMBOL(__icmp_send);
void icmp_ndo_send(struct sk_buff *skb_in, int type, int code, __be32 info)
{
struct sk_buff *cloned_skb = NULL;
+ struct ip_options opts = { 0 };
enum ip_conntrack_info ctinfo;
struct nf_conn *ct;
__be32 orig_ip;
ct = nf_ct_get(skb_in, &ctinfo);
if (!ct || !(ct->status & IPS_SRC_NAT)) {
- icmp_send(skb_in, type, code, info);
+ __icmp_send(skb_in, type, code, info, &opts);
return;
}
@@ -796,7 +797,7 @@ void icmp_ndo_send(struct sk_buff *skb_in, int type, int code, __be32 info)
orig_ip = ip_hdr(skb_in)->saddr;
ip_hdr(skb_in)->saddr = ct->tuplehash[0].tuple.src.u3.ip;
- icmp_send(skb_in, type, code, info);
+ __icmp_send(skb_in, type, code, info, &opts);
ip_hdr(skb_in)->saddr = orig_ip;
out:
consume_skb(cloned_skb);
diff --git a/net/ipv4/inetpeer.c b/net/ipv4/inetpeer.c
index ff327a62c9ce..da21dfce24d7 100644
--- a/net/ipv4/inetpeer.c
+++ b/net/ipv4/inetpeer.c
@@ -65,7 +65,7 @@ EXPORT_SYMBOL_GPL(inet_peer_base_init);
#define PEER_MAX_GC 32
/* Exported for sysctl_net_ipv4. */
-int inet_peer_threshold __read_mostly = 65536 + 128; /* start to throw entries more
+int inet_peer_threshold __read_mostly; /* start to throw entries more
* aggressively at this stage */
int inet_peer_minttl __read_mostly = 120 * HZ; /* TTL under high load: 120 sec */
int inet_peer_maxttl __read_mostly = 10 * 60 * HZ; /* usual time to live: 10 min */
@@ -73,20 +73,13 @@ int inet_peer_maxttl __read_mostly = 10 * 60 * HZ; /* usual time to live: 10 min
/* Called from ip_output.c:ip_init */
void __init inet_initpeers(void)
{
- struct sysinfo si;
+ u64 nr_entries;
- /* Use the straight interface to information about memory. */
- si_meminfo(&si);
- /* The values below were suggested by Alexey Kuznetsov
- * <kuznet@ms2.inr.ac.ru>. I don't have any opinion about the values
- * myself. --SAW
- */
- if (si.totalram <= (32768*1024)/PAGE_SIZE)
- inet_peer_threshold >>= 1; /* max pool size about 1MB on IA32 */
- if (si.totalram <= (16384*1024)/PAGE_SIZE)
- inet_peer_threshold >>= 1; /* about 512KB */
- if (si.totalram <= (8192*1024)/PAGE_SIZE)
- inet_peer_threshold >>= 2; /* about 128KB */
+ /* 1% of physical memory */
+ nr_entries = div64_ul((u64)totalram_pages() << PAGE_SHIFT,
+ 100 * L1_CACHE_ALIGN(sizeof(struct inet_peer)));
+
+ inet_peer_threshold = clamp_val(nr_entries, 4096, 65536 + 128);
peer_cachep = kmem_cache_create("inet_peer_cache",
sizeof(struct inet_peer),
diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c
index 76a420c76f16..f6cc26de5ed3 100644
--- a/net/ipv4/ip_tunnel.c
+++ b/net/ipv4/ip_tunnel.c
@@ -502,8 +502,7 @@ static int tnl_update_pmtu(struct net_device *dev, struct sk_buff *skb,
if (!skb_is_gso(skb) &&
(inner_iph->frag_off & htons(IP_DF)) &&
mtu < pkt_size) {
- memset(IPCB(skb), 0, sizeof(*IPCB(skb)));
- icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu));
+ icmp_ndo_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu));
return -E2BIG;
}
}
@@ -527,7 +526,7 @@ static int tnl_update_pmtu(struct net_device *dev, struct sk_buff *skb,
if (!skb_is_gso(skb) && mtu >= IPV6_MIN_MTU &&
mtu < pkt_size) {
- icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
+ icmpv6_ndo_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
return -E2BIG;
}
}
diff --git a/net/ipv4/ip_vti.c b/net/ipv4/ip_vti.c
index abc171e79d3e..eb207089ece0 100644
--- a/net/ipv4/ip_vti.c
+++ b/net/ipv4/ip_vti.c
@@ -238,13 +238,13 @@ static netdev_tx_t vti_xmit(struct sk_buff *skb, struct net_device *dev,
if (skb->len > mtu) {
skb_dst_update_pmtu_no_confirm(skb, mtu);
if (skb->protocol == htons(ETH_P_IP)) {
- icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED,
- htonl(mtu));
+ icmp_ndo_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED,
+ htonl(mtu));
} else {
if (mtu < IPV6_MIN_MTU)
mtu = IPV6_MIN_MTU;
- icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
+ icmpv6_ndo_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
}
dst_release(dst);
diff --git a/net/ipv4/nexthop.c b/net/ipv4/nexthop.c
index f1c6cbdb9e43..743777bce179 100644
--- a/net/ipv4/nexthop.c
+++ b/net/ipv4/nexthop.c
@@ -1399,7 +1399,7 @@ out:
/* rtnl */
/* remove all nexthops tied to a device being deleted */
-static void nexthop_flush_dev(struct net_device *dev)
+static void nexthop_flush_dev(struct net_device *dev, unsigned long event)
{
unsigned int hash = nh_dev_hashfn(dev->ifindex);
struct net *net = dev_net(dev);
@@ -1411,6 +1411,10 @@ static void nexthop_flush_dev(struct net_device *dev)
if (nhi->fib_nhc.nhc_dev != dev)
continue;
+ if (nhi->reject_nh &&
+ (event == NETDEV_DOWN || event == NETDEV_CHANGE))
+ continue;
+
remove_nexthop(net, nhi->nh_parent, NULL);
}
}
@@ -2189,11 +2193,11 @@ static int nh_netdev_event(struct notifier_block *this,
switch (event) {
case NETDEV_DOWN:
case NETDEV_UNREGISTER:
- nexthop_flush_dev(dev);
+ nexthop_flush_dev(dev, event);
break;
case NETDEV_CHANGE:
if (!(dev_get_flags(dev) & (IFF_RUNNING | IFF_LOWER_UP)))
- nexthop_flush_dev(dev);
+ nexthop_flush_dev(dev, event);
break;
case NETDEV_CHANGEMTU:
info_ext = ptr;
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index a3422e42784e..de7cc8445ac0 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -3469,16 +3469,23 @@ static int do_tcp_setsockopt(struct sock *sk, int level, int optname,
break;
case TCP_QUEUE_SEQ:
- if (sk->sk_state != TCP_CLOSE)
+ if (sk->sk_state != TCP_CLOSE) {
err = -EPERM;
- else if (tp->repair_queue == TCP_SEND_QUEUE)
- WRITE_ONCE(tp->write_seq, val);
- else if (tp->repair_queue == TCP_RECV_QUEUE) {
- WRITE_ONCE(tp->rcv_nxt, val);
- WRITE_ONCE(tp->copied_seq, val);
- }
- else
+ } else if (tp->repair_queue == TCP_SEND_QUEUE) {
+ if (!tcp_rtx_queue_empty(sk))
+ err = -EPERM;
+ else
+ WRITE_ONCE(tp->write_seq, val);
+ } else if (tp->repair_queue == TCP_RECV_QUEUE) {
+ if (tp->rcv_nxt != tp->copied_seq) {
+ err = -EPERM;
+ } else {
+ WRITE_ONCE(tp->rcv_nxt, val);
+ WRITE_ONCE(tp->copied_seq, val);
+ }
+ } else {
err = -EINVAL;
+ }
break;
case TCP_REPAIR_OPTIONS:
@@ -4143,7 +4150,8 @@ static int do_tcp_getsockopt(struct sock *sk, int level,
if (get_user(len, optlen))
return -EFAULT;
- if (len < offsetofend(struct tcp_zerocopy_receive, length))
+ if (len < 0 ||
+ len < offsetofend(struct tcp_zerocopy_receive, length))
return -EINVAL;
if (unlikely(len > sizeof(zc))) {
err = check_zeroed_user(optval + sizeof(zc),
diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c
index b76c48efd37e..c5b4b586570f 100644
--- a/net/ipv4/udp_offload.c
+++ b/net/ipv4/udp_offload.c
@@ -526,7 +526,7 @@ struct sk_buff *udp_gro_receive(struct list_head *head, struct sk_buff *skb,
}
if (!sk || NAPI_GRO_CB(skb)->encap_mark ||
- (skb->ip_summed != CHECKSUM_PARTIAL &&
+ (uh->check && skb->ip_summed != CHECKSUM_PARTIAL &&
NAPI_GRO_CB(skb)->csum_cnt == 0 &&
!NAPI_GRO_CB(skb)->csum_valid) ||
!udp_sk(sk)->gro_receive)
diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index 1fb75f01756c..802f5111805a 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -665,7 +665,6 @@ int inet6_recvmsg(struct socket *sock, struct msghdr *msg, size_t size,
const struct proto_ops inet6_stream_ops = {
.family = PF_INET6,
- .flags = PROTO_CMSG_DATA_ONLY,
.owner = THIS_MODULE,
.release = inet6_release,
.bind = inet6_bind,
diff --git a/net/ipv6/calipso.c b/net/ipv6/calipso.c
index 51184a70ac7e..1578ed9e97d8 100644
--- a/net/ipv6/calipso.c
+++ b/net/ipv6/calipso.c
@@ -83,6 +83,9 @@ struct calipso_map_cache_entry {
static struct calipso_map_cache_bkt *calipso_cache;
+static void calipso_cache_invalidate(void);
+static void calipso_doi_putdef(struct calipso_doi *doi_def);
+
/* Label Mapping Cache Functions
*/
@@ -444,15 +447,10 @@ static int calipso_doi_remove(u32 doi, struct netlbl_audit *audit_info)
ret_val = -ENOENT;
goto doi_remove_return;
}
- if (!refcount_dec_and_test(&doi_def->refcount)) {
- spin_unlock(&calipso_doi_list_lock);
- ret_val = -EBUSY;
- goto doi_remove_return;
- }
list_del_rcu(&doi_def->list);
spin_unlock(&calipso_doi_list_lock);
- call_rcu(&doi_def->rcu, calipso_doi_free_rcu);
+ calipso_doi_putdef(doi_def);
ret_val = 0;
doi_remove_return:
@@ -508,10 +506,8 @@ static void calipso_doi_putdef(struct calipso_doi *doi_def)
if (!refcount_dec_and_test(&doi_def->refcount))
return;
- spin_lock(&calipso_doi_list_lock);
- list_del_rcu(&doi_def->list);
- spin_unlock(&calipso_doi_list_lock);
+ calipso_cache_invalidate();
call_rcu(&doi_def->rcu, calipso_doi_free_rcu);
}
diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c
index f3d05866692e..fd1f896115c1 100644
--- a/net/ipv6/icmp.c
+++ b/net/ipv6/icmp.c
@@ -331,10 +331,9 @@ static int icmpv6_getfrag(void *from, char *to, int offset, int len, int odd, st
}
#if IS_ENABLED(CONFIG_IPV6_MIP6)
-static void mip6_addr_swap(struct sk_buff *skb)
+static void mip6_addr_swap(struct sk_buff *skb, const struct inet6_skb_parm *opt)
{
struct ipv6hdr *iph = ipv6_hdr(skb);
- struct inet6_skb_parm *opt = IP6CB(skb);
struct ipv6_destopt_hao *hao;
struct in6_addr tmp;
int off;
@@ -351,7 +350,7 @@ static void mip6_addr_swap(struct sk_buff *skb)
}
}
#else
-static inline void mip6_addr_swap(struct sk_buff *skb) {}
+static inline void mip6_addr_swap(struct sk_buff *skb, const struct inet6_skb_parm *opt) {}
#endif
static struct dst_entry *icmpv6_route_lookup(struct net *net,
@@ -446,7 +445,8 @@ static int icmp6_iif(const struct sk_buff *skb)
* Send an ICMP message in response to a packet in error
*/
void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info,
- const struct in6_addr *force_saddr)
+ const struct in6_addr *force_saddr,
+ const struct inet6_skb_parm *parm)
{
struct inet6_dev *idev = NULL;
struct ipv6hdr *hdr = ipv6_hdr(skb);
@@ -542,7 +542,7 @@ void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info,
if (!(skb->dev->flags & IFF_LOOPBACK) && !icmpv6_global_allow(net, type))
goto out_bh_enable;
- mip6_addr_swap(skb);
+ mip6_addr_swap(skb, parm);
sk = icmpv6_xmit_lock(net);
if (!sk)
@@ -559,7 +559,7 @@ void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info,
/* select a more meaningful saddr from input if */
struct net_device *in_netdev;
- in_netdev = dev_get_by_index(net, IP6CB(skb)->iif);
+ in_netdev = dev_get_by_index(net, parm->iif);
if (in_netdev) {
ipv6_dev_get_saddr(net, in_netdev, &fl6.daddr,
inet6_sk(sk)->srcprefs,
@@ -640,7 +640,7 @@ EXPORT_SYMBOL(icmp6_send);
*/
void icmpv6_param_prob(struct sk_buff *skb, u8 code, int pos)
{
- icmp6_send(skb, ICMPV6_PARAMPROB, code, pos, NULL);
+ icmp6_send(skb, ICMPV6_PARAMPROB, code, pos, NULL, IP6CB(skb));
kfree_skb(skb);
}
@@ -697,10 +697,10 @@ int ip6_err_gen_icmpv6_unreach(struct sk_buff *skb, int nhs, int type,
}
if (type == ICMP_TIME_EXCEEDED)
icmp6_send(skb2, ICMPV6_TIME_EXCEED, ICMPV6_EXC_HOPLIMIT,
- info, &temp_saddr);
+ info, &temp_saddr, IP6CB(skb2));
else
icmp6_send(skb2, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH,
- info, &temp_saddr);
+ info, &temp_saddr, IP6CB(skb2));
if (rt)
ip6_rt_put(rt);
diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c
index c3bc89b6b1a1..1baf43aacb2e 100644
--- a/net/ipv6/ip6_gre.c
+++ b/net/ipv6/ip6_gre.c
@@ -678,8 +678,8 @@ static int prepare_ip6gre_xmit_ipv6(struct sk_buff *skb,
tel = (struct ipv6_tlv_tnl_enc_lim *)&skb_network_header(skb)[offset];
if (tel->encap_limit == 0) {
- icmpv6_send(skb, ICMPV6_PARAMPROB,
- ICMPV6_HDR_FIELD, offset + 2);
+ icmpv6_ndo_send(skb, ICMPV6_PARAMPROB,
+ ICMPV6_HDR_FIELD, offset + 2);
return -1;
}
*encap_limit = tel->encap_limit - 1;
@@ -805,8 +805,8 @@ static inline int ip6gre_xmit_ipv4(struct sk_buff *skb, struct net_device *dev)
if (err != 0) {
/* XXX: send ICMP error even if DF is not set. */
if (err == -EMSGSIZE)
- icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED,
- htonl(mtu));
+ icmp_ndo_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED,
+ htonl(mtu));
return -1;
}
@@ -837,7 +837,7 @@ static inline int ip6gre_xmit_ipv6(struct sk_buff *skb, struct net_device *dev)
&mtu, skb->protocol);
if (err != 0) {
if (err == -EMSGSIZE)
- icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
+ icmpv6_ndo_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
return -1;
}
@@ -1063,10 +1063,10 @@ static netdev_tx_t ip6erspan_tunnel_xmit(struct sk_buff *skb,
/* XXX: send ICMP error even if DF is not set. */
if (err == -EMSGSIZE) {
if (skb->protocol == htons(ETH_P_IP))
- icmp_send(skb, ICMP_DEST_UNREACH,
- ICMP_FRAG_NEEDED, htonl(mtu));
+ icmp_ndo_send(skb, ICMP_DEST_UNREACH,
+ ICMP_FRAG_NEEDED, htonl(mtu));
else
- icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
+ icmpv6_ndo_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
}
goto tx_err;
diff --git a/net/ipv6/ip6_icmp.c b/net/ipv6/ip6_icmp.c
index 70c8c2f36c98..9e3574880cb0 100644
--- a/net/ipv6/ip6_icmp.c
+++ b/net/ipv6/ip6_icmp.c
@@ -33,23 +33,25 @@ int inet6_unregister_icmp_sender(ip6_icmp_send_t *fn)
}
EXPORT_SYMBOL(inet6_unregister_icmp_sender);
-void icmpv6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info)
+void __icmpv6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info,
+ const struct inet6_skb_parm *parm)
{
ip6_icmp_send_t *send;
rcu_read_lock();
send = rcu_dereference(ip6_icmp_send);
if (send)
- send(skb, type, code, info, NULL);
+ send(skb, type, code, info, NULL, parm);
rcu_read_unlock();
}
-EXPORT_SYMBOL(icmpv6_send);
+EXPORT_SYMBOL(__icmpv6_send);
#endif
#if IS_ENABLED(CONFIG_NF_NAT)
#include <net/netfilter/nf_conntrack.h>
void icmpv6_ndo_send(struct sk_buff *skb_in, u8 type, u8 code, __u32 info)
{
+ struct inet6_skb_parm parm = { 0 };
struct sk_buff *cloned_skb = NULL;
enum ip_conntrack_info ctinfo;
struct in6_addr orig_ip;
@@ -57,7 +59,7 @@ void icmpv6_ndo_send(struct sk_buff *skb_in, u8 type, u8 code, __u32 info)
ct = nf_ct_get(skb_in, &ctinfo);
if (!ct || !(ct->status & IPS_SRC_NAT)) {
- icmpv6_send(skb_in, type, code, info);
+ __icmpv6_send(skb_in, type, code, info, &parm);
return;
}
@@ -72,7 +74,7 @@ void icmpv6_ndo_send(struct sk_buff *skb_in, u8 type, u8 code, __u32 info)
orig_ip = ipv6_hdr(skb_in)->saddr;
ipv6_hdr(skb_in)->saddr = ct->tuplehash[0].tuple.src.u3.in6;
- icmpv6_send(skb_in, type, code, info);
+ __icmpv6_send(skb_in, type, code, info, &parm);
ipv6_hdr(skb_in)->saddr = orig_ip;
out:
consume_skb(cloned_skb);
diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c
index a7950baa05e5..3fa0eca5a06f 100644
--- a/net/ipv6/ip6_tunnel.c
+++ b/net/ipv6/ip6_tunnel.c
@@ -1332,8 +1332,8 @@ ipxip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev,
tel = (void *)&skb_network_header(skb)[offset];
if (tel->encap_limit == 0) {
- icmpv6_send(skb, ICMPV6_PARAMPROB,
- ICMPV6_HDR_FIELD, offset + 2);
+ icmpv6_ndo_send(skb, ICMPV6_PARAMPROB,
+ ICMPV6_HDR_FIELD, offset + 2);
return -1;
}
encap_limit = tel->encap_limit - 1;
@@ -1385,11 +1385,11 @@ ipxip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev,
if (err == -EMSGSIZE)
switch (protocol) {
case IPPROTO_IPIP:
- icmp_send(skb, ICMP_DEST_UNREACH,
- ICMP_FRAG_NEEDED, htonl(mtu));
+ icmp_ndo_send(skb, ICMP_DEST_UNREACH,
+ ICMP_FRAG_NEEDED, htonl(mtu));
break;
case IPPROTO_IPV6:
- icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
+ icmpv6_ndo_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
break;
default:
break;
diff --git a/net/ipv6/ip6_vti.c b/net/ipv6/ip6_vti.c
index 0225fd694192..f10e7a72ea62 100644
--- a/net/ipv6/ip6_vti.c
+++ b/net/ipv6/ip6_vti.c
@@ -521,10 +521,10 @@ vti6_xmit(struct sk_buff *skb, struct net_device *dev, struct flowi *fl)
if (mtu < IPV6_MIN_MTU)
mtu = IPV6_MIN_MTU;
- icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
+ icmpv6_ndo_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
} else {
- icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED,
- htonl(mtu));
+ icmp_ndo_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED,
+ htonl(mtu));
}
err = -EMSGSIZE;
diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c
index 93636867aee2..63ccd9f2dccc 100644
--- a/net/ipv6/sit.c
+++ b/net/ipv6/sit.c
@@ -987,7 +987,7 @@ static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb,
skb_dst_update_pmtu_no_confirm(skb, mtu);
if (skb->len > mtu && !skb_is_gso(skb)) {
- icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
+ icmpv6_ndo_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
ip_rt_put(rt);
goto tx_error;
}
diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c
index 7be5103ff2a8..203890e378cb 100644
--- a/net/l2tp/l2tp_core.c
+++ b/net/l2tp/l2tp_core.c
@@ -649,9 +649,9 @@ void l2tp_recv_common(struct l2tp_session *session, struct sk_buff *skb,
/* Parse and check optional cookie */
if (session->peer_cookie_len > 0) {
if (memcmp(ptr, &session->peer_cookie[0], session->peer_cookie_len)) {
- pr_warn_ratelimited("%s: cookie mismatch (%u/%u). Discarding.\n",
- tunnel->name, tunnel->tunnel_id,
- session->session_id);
+ pr_debug_ratelimited("%s: cookie mismatch (%u/%u). Discarding.\n",
+ tunnel->name, tunnel->tunnel_id,
+ session->session_id);
atomic_long_inc(&session->stats.rx_cookie_discards);
goto discard;
}
@@ -702,8 +702,8 @@ void l2tp_recv_common(struct l2tp_session *session, struct sk_buff *skb,
* If user has configured mandatory sequence numbers, discard.
*/
if (session->recv_seq) {
- pr_warn_ratelimited("%s: recv data has no seq numbers when required. Discarding.\n",
- session->name);
+ pr_debug_ratelimited("%s: recv data has no seq numbers when required. Discarding.\n",
+ session->name);
atomic_long_inc(&session->stats.rx_seq_discards);
goto discard;
}
@@ -718,8 +718,8 @@ void l2tp_recv_common(struct l2tp_session *session, struct sk_buff *skb,
session->send_seq = 0;
l2tp_session_set_header_len(session, tunnel->version);
} else if (session->send_seq) {
- pr_warn_ratelimited("%s: recv data has no seq numbers when required. Discarding.\n",
- session->name);
+ pr_debug_ratelimited("%s: recv data has no seq numbers when required. Discarding.\n",
+ session->name);
atomic_long_inc(&session->stats.rx_seq_discards);
goto discard;
}
@@ -809,9 +809,9 @@ static int l2tp_udp_recv_core(struct l2tp_tunnel *tunnel, struct sk_buff *skb)
/* Short packet? */
if (!pskb_may_pull(skb, L2TP_HDR_SIZE_MAX)) {
- pr_warn_ratelimited("%s: recv short packet (len=%d)\n",
- tunnel->name, skb->len);
- goto error;
+ pr_debug_ratelimited("%s: recv short packet (len=%d)\n",
+ tunnel->name, skb->len);
+ goto invalid;
}
/* Point to L2TP header */
@@ -824,9 +824,9 @@ static int l2tp_udp_recv_core(struct l2tp_tunnel *tunnel, struct sk_buff *skb)
/* Check protocol version */
version = hdrflags & L2TP_HDR_VER_MASK;
if (version != tunnel->version) {
- pr_warn_ratelimited("%s: recv protocol version mismatch: got %d expected %d\n",
- tunnel->name, version, tunnel->version);
- goto error;
+ pr_debug_ratelimited("%s: recv protocol version mismatch: got %d expected %d\n",
+ tunnel->name, version, tunnel->version);
+ goto invalid;
}
/* Get length of L2TP packet */
@@ -834,7 +834,7 @@ static int l2tp_udp_recv_core(struct l2tp_tunnel *tunnel, struct sk_buff *skb)
/* If type is control packet, it is handled by userspace. */
if (hdrflags & L2TP_HDRFLAG_T)
- goto error;
+ goto pass;
/* Skip flags */
ptr += 2;
@@ -863,21 +863,24 @@ static int l2tp_udp_recv_core(struct l2tp_tunnel *tunnel, struct sk_buff *skb)
l2tp_session_dec_refcount(session);
/* Not found? Pass to userspace to deal with */
- pr_warn_ratelimited("%s: no session found (%u/%u). Passing up.\n",
- tunnel->name, tunnel_id, session_id);
- goto error;
+ pr_debug_ratelimited("%s: no session found (%u/%u). Passing up.\n",
+ tunnel->name, tunnel_id, session_id);
+ goto pass;
}
if (tunnel->version == L2TP_HDR_VER_3 &&
l2tp_v3_ensure_opt_in_linear(session, skb, &ptr, &optr))
- goto error;
+ goto invalid;
l2tp_recv_common(session, skb, ptr, optr, hdrflags, length);
l2tp_session_dec_refcount(session);
return 0;
-error:
+invalid:
+ atomic_long_inc(&tunnel->stats.rx_invalid);
+
+pass:
/* Put UDP header back */
__skb_push(skb, sizeof(struct udphdr));
diff --git a/net/l2tp/l2tp_core.h b/net/l2tp/l2tp_core.h
index cb21d906343e..98ea98eb9567 100644
--- a/net/l2tp/l2tp_core.h
+++ b/net/l2tp/l2tp_core.h
@@ -39,6 +39,7 @@ struct l2tp_stats {
atomic_long_t rx_oos_packets;
atomic_long_t rx_errors;
atomic_long_t rx_cookie_discards;
+ atomic_long_t rx_invalid;
};
struct l2tp_tunnel;
diff --git a/net/l2tp/l2tp_netlink.c b/net/l2tp/l2tp_netlink.c
index 83956c9ee1fc..96eb91be9238 100644
--- a/net/l2tp/l2tp_netlink.c
+++ b/net/l2tp/l2tp_netlink.c
@@ -428,6 +428,9 @@ static int l2tp_nl_tunnel_send(struct sk_buff *skb, u32 portid, u32 seq, int fla
L2TP_ATTR_STATS_PAD) ||
nla_put_u64_64bit(skb, L2TP_ATTR_RX_ERRORS,
atomic_long_read(&tunnel->stats.rx_errors),
+ L2TP_ATTR_STATS_PAD) ||
+ nla_put_u64_64bit(skb, L2TP_ATTR_RX_INVALID,
+ atomic_long_read(&tunnel->stats.rx_invalid),
L2TP_ATTR_STATS_PAD))
goto nla_put_failure;
nla_nest_end(skb, nest);
@@ -771,6 +774,9 @@ static int l2tp_nl_session_send(struct sk_buff *skb, u32 portid, u32 seq, int fl
L2TP_ATTR_STATS_PAD) ||
nla_put_u64_64bit(skb, L2TP_ATTR_RX_ERRORS,
atomic_long_read(&session->stats.rx_errors),
+ L2TP_ATTR_STATS_PAD) ||
+ nla_put_u64_64bit(skb, L2TP_ATTR_RX_INVALID,
+ atomic_long_read(&session->stats.rx_invalid),
L2TP_ATTR_STATS_PAD))
goto nla_put_failure;
nla_nest_end(skb, nest);
diff --git a/net/mpls/mpls_gso.c b/net/mpls/mpls_gso.c
index b1690149b6fa..1482259de9b5 100644
--- a/net/mpls/mpls_gso.c
+++ b/net/mpls/mpls_gso.c
@@ -14,6 +14,7 @@
#include <linux/netdev_features.h>
#include <linux/netdevice.h>
#include <linux/skbuff.h>
+#include <net/mpls.h>
static struct sk_buff *mpls_gso_segment(struct sk_buff *skb,
netdev_features_t features)
@@ -27,6 +28,8 @@ static struct sk_buff *mpls_gso_segment(struct sk_buff *skb,
skb_reset_network_header(skb);
mpls_hlen = skb_inner_network_header(skb) - skb_network_header(skb);
+ if (unlikely(!mpls_hlen || mpls_hlen % MPLS_HLEN))
+ goto out;
if (unlikely(!pskb_may_pull(skb, mpls_hlen)))
goto out;
diff --git a/net/mptcp/options.c b/net/mptcp/options.c
index b63574d6b812..444a38681e93 100644
--- a/net/mptcp/options.c
+++ b/net/mptcp/options.c
@@ -411,6 +411,7 @@ static void clear_3rdack_retransmission(struct sock *sk)
}
static bool mptcp_established_options_mp(struct sock *sk, struct sk_buff *skb,
+ bool snd_data_fin_enable,
unsigned int *size,
unsigned int remaining,
struct mptcp_out_options *opts)
@@ -428,9 +429,10 @@ static bool mptcp_established_options_mp(struct sock *sk, struct sk_buff *skb,
if (!skb)
return false;
- /* MPC/MPJ needed only on 3rd ack packet */
- if (subflow->fully_established ||
- subflow->snd_isn != TCP_SKB_CB(skb)->seq)
+ /* MPC/MPJ needed only on 3rd ack packet, DATA_FIN and TCP shutdown take precedence */
+ if (subflow->fully_established || snd_data_fin_enable ||
+ subflow->snd_isn != TCP_SKB_CB(skb)->seq ||
+ sk->sk_state != TCP_ESTABLISHED)
return false;
if (subflow->mp_capable) {
@@ -502,20 +504,20 @@ static void mptcp_write_data_fin(struct mptcp_subflow_context *subflow,
}
static bool mptcp_established_options_dss(struct sock *sk, struct sk_buff *skb,
+ bool snd_data_fin_enable,
unsigned int *size,
unsigned int remaining,
struct mptcp_out_options *opts)
{
struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
struct mptcp_sock *msk = mptcp_sk(subflow->conn);
- u64 snd_data_fin_enable, ack_seq;
unsigned int dss_size = 0;
struct mptcp_ext *mpext;
unsigned int ack_size;
bool ret = false;
+ u64 ack_seq;
mpext = skb ? mptcp_get_ext(skb) : NULL;
- snd_data_fin_enable = mptcp_data_fin_enabled(msk);
if (!skb || (mpext && mpext->use_map) || snd_data_fin_enable) {
unsigned int map_size;
@@ -717,12 +719,15 @@ bool mptcp_established_options(struct sock *sk, struct sk_buff *skb,
unsigned int *size, unsigned int remaining,
struct mptcp_out_options *opts)
{
+ struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
+ struct mptcp_sock *msk = mptcp_sk(subflow->conn);
unsigned int opt_size = 0;
+ bool snd_data_fin;
bool ret = false;
opts->suboptions = 0;
- if (unlikely(mptcp_check_fallback(sk)))
+ if (unlikely(__mptcp_check_fallback(msk)))
return false;
/* prevent adding of any MPTCP related options on reset packet
@@ -731,10 +736,10 @@ bool mptcp_established_options(struct sock *sk, struct sk_buff *skb,
if (unlikely(skb && TCP_SKB_CB(skb)->tcp_flags & TCPHDR_RST))
return false;
- if (mptcp_established_options_mp(sk, skb, &opt_size, remaining, opts))
+ snd_data_fin = mptcp_data_fin_enabled(msk);
+ if (mptcp_established_options_mp(sk, skb, snd_data_fin, &opt_size, remaining, opts))
ret = true;
- else if (mptcp_established_options_dss(sk, skb, &opt_size, remaining,
- opts))
+ else if (mptcp_established_options_dss(sk, skb, snd_data_fin, &opt_size, remaining, opts))
ret = true;
/* we reserved enough space for the above options, and exceeding the
diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
index a57f3eab7b6a..76958570ae7f 100644
--- a/net/mptcp/protocol.c
+++ b/net/mptcp/protocol.c
@@ -11,6 +11,7 @@
#include <linux/netdevice.h>
#include <linux/sched/signal.h>
#include <linux/atomic.h>
+#include <linux/igmp.h>
#include <net/sock.h>
#include <net/inet_common.h>
#include <net/inet_hashtables.h>
@@ -19,6 +20,7 @@
#include <net/tcp_states.h>
#if IS_ENABLED(CONFIG_MPTCP_IPV6)
#include <net/transp_v6.h>
+#include <net/addrconf.h>
#endif
#include <net/mptcp.h>
#include <net/xfrm.h>
@@ -1059,6 +1061,12 @@ out:
}
}
+static void __mptcp_clean_una_wakeup(struct sock *sk)
+{
+ __mptcp_clean_una(sk);
+ mptcp_write_space(sk);
+}
+
static void mptcp_enter_memory_pressure(struct sock *sk)
{
struct mptcp_subflow_context *subflow;
@@ -1187,6 +1195,7 @@ static bool mptcp_tx_cache_refill(struct sock *sk, int size,
*/
while (skbs->qlen > 1) {
skb = __skb_dequeue_tail(skbs);
+ *total_ts -= skb->truesize;
__kfree_skb(skb);
}
return skbs->qlen > 0;
@@ -1442,7 +1451,7 @@ static void mptcp_push_release(struct sock *sk, struct sock *ssk,
release_sock(ssk);
}
-static void mptcp_push_pending(struct sock *sk, unsigned int flags)
+static void __mptcp_push_pending(struct sock *sk, unsigned int flags)
{
struct sock *prev_ssk = NULL, *ssk = NULL;
struct mptcp_sock *msk = mptcp_sk(sk);
@@ -1694,14 +1703,14 @@ static int mptcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
wait_for_memory:
mptcp_set_nospace(sk);
- mptcp_push_pending(sk, msg->msg_flags);
+ __mptcp_push_pending(sk, msg->msg_flags);
ret = sk_stream_wait_memory(sk, &timeo);
if (ret)
goto out;
}
if (copied)
- mptcp_push_pending(sk, msg->msg_flags);
+ __mptcp_push_pending(sk, msg->msg_flags);
out:
release_sock(sk);
@@ -2113,6 +2122,14 @@ static struct sock *mptcp_subflow_get_retrans(const struct mptcp_sock *msk)
return backup;
}
+static void mptcp_dispose_initial_subflow(struct mptcp_sock *msk)
+{
+ if (msk->subflow) {
+ iput(SOCK_INODE(msk->subflow));
+ msk->subflow = NULL;
+ }
+}
+
/* subflow sockets can be either outgoing (connect) or incoming
* (accept).
*
@@ -2124,6 +2141,8 @@ static struct sock *mptcp_subflow_get_retrans(const struct mptcp_sock *msk)
static void __mptcp_close_ssk(struct sock *sk, struct sock *ssk,
struct mptcp_subflow_context *subflow)
{
+ struct mptcp_sock *msk = mptcp_sk(sk);
+
list_del(&subflow->node);
lock_sock_nested(ssk, SINGLE_DEPTH_NESTING);
@@ -2152,6 +2171,18 @@ static void __mptcp_close_ssk(struct sock *sk, struct sock *ssk,
release_sock(ssk);
sock_put(ssk);
+
+ if (ssk == msk->last_snd)
+ msk->last_snd = NULL;
+
+ if (ssk == msk->ack_hint)
+ msk->ack_hint = NULL;
+
+ if (ssk == msk->first)
+ msk->first = NULL;
+
+ if (msk->subflow && ssk == msk->subflow->sk)
+ mptcp_dispose_initial_subflow(msk);
}
void mptcp_close_ssk(struct sock *sk, struct sock *ssk,
@@ -2236,60 +2267,23 @@ static void mptcp_check_fastclose(struct mptcp_sock *msk)
mptcp_close_wake_up(sk);
}
-static void mptcp_worker(struct work_struct *work)
+static void __mptcp_retrans(struct sock *sk)
{
- struct mptcp_sock *msk = container_of(work, struct mptcp_sock, work);
- struct sock *ssk, *sk = &msk->sk.icsk_inet.sk;
+ struct mptcp_sock *msk = mptcp_sk(sk);
struct mptcp_sendmsg_info info = {};
struct mptcp_data_frag *dfrag;
size_t copied = 0;
- int state, ret;
-
- lock_sock(sk);
- state = sk->sk_state;
- if (unlikely(state == TCP_CLOSE))
- goto unlock;
-
- mptcp_check_data_fin_ack(sk);
- __mptcp_flush_join_list(msk);
-
- mptcp_check_fastclose(msk);
-
- if (msk->pm.status)
- mptcp_pm_nl_work(msk);
-
- if (test_and_clear_bit(MPTCP_WORK_EOF, &msk->flags))
- mptcp_check_for_eof(msk);
-
- __mptcp_check_send_data_fin(sk);
- mptcp_check_data_fin(sk);
-
- /* if the msk data is completely acked, or the socket timedout,
- * there is no point in keeping around an orphaned sk
- */
- if (sock_flag(sk, SOCK_DEAD) &&
- (mptcp_check_close_timeout(sk) ||
- (state != sk->sk_state &&
- ((1 << inet_sk_state_load(sk)) & (TCPF_CLOSE | TCPF_FIN_WAIT2))))) {
- inet_sk_state_store(sk, TCP_CLOSE);
- __mptcp_destroy_sock(sk);
- goto unlock;
- }
-
- if (test_and_clear_bit(MPTCP_WORK_CLOSE_SUBFLOW, &msk->flags))
- __mptcp_close_subflow(msk);
-
- if (!test_and_clear_bit(MPTCP_WORK_RTX, &msk->flags))
- goto unlock;
+ struct sock *ssk;
+ int ret;
- __mptcp_clean_una(sk);
+ __mptcp_clean_una_wakeup(sk);
dfrag = mptcp_rtx_head(sk);
if (!dfrag)
- goto unlock;
+ return;
ssk = mptcp_subflow_get_retrans(msk);
if (!ssk)
- goto reset_unlock;
+ goto reset_timer;
lock_sock(ssk);
@@ -2315,9 +2309,52 @@ static void mptcp_worker(struct work_struct *work)
mptcp_set_timeout(sk, ssk);
release_sock(ssk);
-reset_unlock:
+reset_timer:
if (!mptcp_timer_pending(sk))
mptcp_reset_timer(sk);
+}
+
+static void mptcp_worker(struct work_struct *work)
+{
+ struct mptcp_sock *msk = container_of(work, struct mptcp_sock, work);
+ struct sock *sk = &msk->sk.icsk_inet.sk;
+ int state;
+
+ lock_sock(sk);
+ state = sk->sk_state;
+ if (unlikely(state == TCP_CLOSE))
+ goto unlock;
+
+ mptcp_check_data_fin_ack(sk);
+ __mptcp_flush_join_list(msk);
+
+ mptcp_check_fastclose(msk);
+
+ if (msk->pm.status)
+ mptcp_pm_nl_work(msk);
+
+ if (test_and_clear_bit(MPTCP_WORK_EOF, &msk->flags))
+ mptcp_check_for_eof(msk);
+
+ __mptcp_check_send_data_fin(sk);
+ mptcp_check_data_fin(sk);
+
+ /* There is no point in keeping around an orphaned sk timedout or
+ * closed, but we need the msk around to reply to incoming DATA_FIN,
+ * even if it is orphaned and in FIN_WAIT2 state
+ */
+ if (sock_flag(sk, SOCK_DEAD) &&
+ (mptcp_check_close_timeout(sk) || sk->sk_state == TCP_CLOSE)) {
+ inet_sk_state_store(sk, TCP_CLOSE);
+ __mptcp_destroy_sock(sk);
+ goto unlock;
+ }
+
+ if (test_and_clear_bit(MPTCP_WORK_CLOSE_SUBFLOW, &msk->flags))
+ __mptcp_close_subflow(msk);
+
+ if (test_and_clear_bit(MPTCP_WORK_RTX, &msk->flags))
+ __mptcp_retrans(sk);
unlock:
release_sock(sk);
@@ -2522,12 +2559,6 @@ static void __mptcp_destroy_sock(struct sock *sk)
might_sleep();
- /* dispose the ancillatory tcp socket, if any */
- if (msk->subflow) {
- iput(SOCK_INODE(msk->subflow));
- msk->subflow = NULL;
- }
-
/* be sure to always acquire the join list lock, to sync vs
* mptcp_finish_join().
*/
@@ -2552,6 +2583,7 @@ static void __mptcp_destroy_sock(struct sock *sk)
sk_stream_kill_queues(sk);
xfrm_sk_free_policy(sk);
sk_refcnt_debug_release(sk);
+ mptcp_dispose_initial_subflow(msk);
sock_put(sk);
}
@@ -2933,13 +2965,14 @@ static void mptcp_release_cb(struct sock *sk)
{
unsigned long flags, nflags;
- /* push_pending may touch wmem_reserved, do it before the later
- * cleanup
- */
- if (test_and_clear_bit(MPTCP_CLEAN_UNA, &mptcp_sk(sk)->flags))
- __mptcp_clean_una(sk);
- if (test_and_clear_bit(MPTCP_PUSH_PENDING, &mptcp_sk(sk)->flags)) {
- /* mptcp_push_pending() acquires the subflow socket lock
+ for (;;) {
+ flags = 0;
+ if (test_and_clear_bit(MPTCP_PUSH_PENDING, &mptcp_sk(sk)->flags))
+ flags |= MPTCP_PUSH_PENDING;
+ if (!flags)
+ break;
+
+ /* the following actions acquire the subflow socket lock
*
* 1) can't be invoked in atomic scope
* 2) must avoid ABBA deadlock with msk socket spinlock: the RX
@@ -2948,13 +2981,21 @@ static void mptcp_release_cb(struct sock *sk)
*/
spin_unlock_bh(&sk->sk_lock.slock);
- mptcp_push_pending(sk, 0);
+ if (flags & MPTCP_PUSH_PENDING)
+ __mptcp_push_pending(sk, 0);
+
+ cond_resched();
spin_lock_bh(&sk->sk_lock.slock);
}
+
+ if (test_and_clear_bit(MPTCP_CLEAN_UNA, &mptcp_sk(sk)->flags))
+ __mptcp_clean_una_wakeup(sk);
if (test_and_clear_bit(MPTCP_ERROR_REPORT, &mptcp_sk(sk)->flags))
__mptcp_error_report(sk);
- /* clear any wmem reservation and errors */
+ /* push_pending may touch wmem_reserved, ensure we do the cleanup
+ * later
+ */
__mptcp_update_wmem(sk);
__mptcp_update_rmem(sk);
@@ -3284,6 +3325,9 @@ static int mptcp_stream_accept(struct socket *sock, struct socket *newsock,
/* PM/worker can now acquire the first subflow socket
* lock without racing with listener queue cleanup,
* we can notify it, if needed.
+ *
+ * Even if remote has reset the initial subflow by now
+ * the refcnt is still at least one.
*/
subflow = mptcp_subflow_ctx(msk->first);
list_add(&subflow->node, &msk->conn_list);
@@ -3375,10 +3419,34 @@ static __poll_t mptcp_poll(struct file *file, struct socket *sock,
return mask;
}
+static int mptcp_release(struct socket *sock)
+{
+ struct mptcp_subflow_context *subflow;
+ struct sock *sk = sock->sk;
+ struct mptcp_sock *msk;
+
+ if (!sk)
+ return 0;
+
+ lock_sock(sk);
+
+ msk = mptcp_sk(sk);
+
+ mptcp_for_each_subflow(msk, subflow) {
+ struct sock *ssk = mptcp_subflow_tcp_sock(subflow);
+
+ ip_mc_drop_socket(ssk);
+ }
+
+ release_sock(sk);
+
+ return inet_release(sock);
+}
+
static const struct proto_ops mptcp_stream_ops = {
.family = PF_INET,
.owner = THIS_MODULE,
- .release = inet_release,
+ .release = mptcp_release,
.bind = mptcp_bind,
.connect = mptcp_stream_connect,
.socketpair = sock_no_socketpair,
@@ -3470,10 +3538,35 @@ void __init mptcp_proto_init(void)
}
#if IS_ENABLED(CONFIG_MPTCP_IPV6)
+static int mptcp6_release(struct socket *sock)
+{
+ struct mptcp_subflow_context *subflow;
+ struct mptcp_sock *msk;
+ struct sock *sk = sock->sk;
+
+ if (!sk)
+ return 0;
+
+ lock_sock(sk);
+
+ msk = mptcp_sk(sk);
+
+ mptcp_for_each_subflow(msk, subflow) {
+ struct sock *ssk = mptcp_subflow_tcp_sock(subflow);
+
+ ip_mc_drop_socket(ssk);
+ ipv6_sock_mc_close(ssk);
+ ipv6_sock_ac_close(ssk);
+ }
+
+ release_sock(sk);
+ return inet6_release(sock);
+}
+
static const struct proto_ops mptcp_v6_stream_ops = {
.family = PF_INET6,
.owner = THIS_MODULE,
- .release = inet6_release,
+ .release = mptcp6_release,
.bind = mptcp_bind,
.connect = mptcp_stream_connect,
.socketpair = sock_no_socketpair,
diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h
index 91827d949766..e21a5bc36cf0 100644
--- a/net/mptcp/protocol.h
+++ b/net/mptcp/protocol.h
@@ -52,14 +52,15 @@
#define TCPOLEN_MPTCP_DSS_MAP64 14
#define TCPOLEN_MPTCP_DSS_CHECKSUM 2
#define TCPOLEN_MPTCP_ADD_ADDR 16
-#define TCPOLEN_MPTCP_ADD_ADDR_PORT 20
+#define TCPOLEN_MPTCP_ADD_ADDR_PORT 18
#define TCPOLEN_MPTCP_ADD_ADDR_BASE 8
-#define TCPOLEN_MPTCP_ADD_ADDR_BASE_PORT 12
+#define TCPOLEN_MPTCP_ADD_ADDR_BASE_PORT 10
#define TCPOLEN_MPTCP_ADD_ADDR6 28
-#define TCPOLEN_MPTCP_ADD_ADDR6_PORT 32
+#define TCPOLEN_MPTCP_ADD_ADDR6_PORT 30
#define TCPOLEN_MPTCP_ADD_ADDR6_BASE 20
-#define TCPOLEN_MPTCP_ADD_ADDR6_BASE_PORT 24
-#define TCPOLEN_MPTCP_PORT_LEN 4
+#define TCPOLEN_MPTCP_ADD_ADDR6_BASE_PORT 22
+#define TCPOLEN_MPTCP_PORT_LEN 2
+#define TCPOLEN_MPTCP_PORT_ALIGN 2
#define TCPOLEN_MPTCP_RM_ADDR_BASE 4
#define TCPOLEN_MPTCP_PRIO 3
#define TCPOLEN_MPTCP_PRIO_ALIGN 4
@@ -701,8 +702,9 @@ static inline unsigned int mptcp_add_addr_len(int family, bool echo, bool port)
len = TCPOLEN_MPTCP_ADD_ADDR6_BASE;
if (!echo)
len += MPTCPOPT_THMAC_LEN;
+ /* account for 2 trailing 'nop' options */
if (port)
- len += TCPOLEN_MPTCP_PORT_LEN;
+ len += TCPOLEN_MPTCP_PORT_LEN + TCPOLEN_MPTCP_PORT_ALIGN;
return len;
}
diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c
index 06e233410e0e..3d47d670e665 100644
--- a/net/mptcp/subflow.c
+++ b/net/mptcp/subflow.c
@@ -687,11 +687,6 @@ create_child:
/* move the msk reference ownership to the subflow */
subflow_req->msk = NULL;
ctx->conn = (struct sock *)owner;
- if (!mptcp_finish_join(child))
- goto dispose_child;
-
- SUBFLOW_REQ_INC_STATS(req, MPTCP_MIB_JOINACKRX);
- tcp_rsk(req)->drop_req = true;
if (subflow_use_different_sport(owner, sk)) {
pr_debug("ack inet_sport=%d %d",
@@ -699,10 +694,16 @@ create_child:
ntohs(inet_sk((struct sock *)owner)->inet_sport));
if (!mptcp_pm_sport_in_anno_list(owner, sk)) {
SUBFLOW_REQ_INC_STATS(req, MPTCP_MIB_MISMATCHPORTACKRX);
- goto out;
+ goto dispose_child;
}
SUBFLOW_REQ_INC_STATS(req, MPTCP_MIB_JOINPORTACKRX);
}
+
+ if (!mptcp_finish_join(child))
+ goto dispose_child;
+
+ SUBFLOW_REQ_INC_STATS(req, MPTCP_MIB_JOINACKRX);
+ tcp_rsk(req)->drop_req = true;
}
}
@@ -1096,6 +1097,12 @@ static void subflow_data_ready(struct sock *sk)
msk = mptcp_sk(parent);
if (state & TCPF_LISTEN) {
+ /* MPJ subflow are removed from accept queue before reaching here,
+ * avoid stray wakeups
+ */
+ if (reqsk_queue_empty(&inet_csk(sk)->icsk_accept_queue))
+ return;
+
set_bit(MPTCP_DATA_READY, &msk->flags);
parent->sk_data_ready(parent);
return;
@@ -1291,6 +1298,7 @@ failed_unlink:
spin_lock_bh(&msk->join_list_lock);
list_del(&subflow->node);
spin_unlock_bh(&msk->join_list_lock);
+ sock_put(mptcp_subflow_tcp_sock(subflow));
failed:
subflow->disposable = 1;
diff --git a/net/netfilter/nf_conntrack_helper.c b/net/netfilter/nf_conntrack_helper.c
index 118f415928ae..b055187235f8 100644
--- a/net/netfilter/nf_conntrack_helper.c
+++ b/net/netfilter/nf_conntrack_helper.c
@@ -219,7 +219,7 @@ nf_ct_lookup_helper(struct nf_conn *ct, struct net *net)
return NULL;
pr_info("nf_conntrack: default automatic helper assignment "
"has been turned off for security reasons and CT-based "
- " firewall rule not found. Use the iptables CT target "
+ "firewall rule not found. Use the iptables CT target "
"to attach helpers instead.\n");
net->ct.auto_assign_helper_warned = 1;
return NULL;
@@ -228,7 +228,6 @@ nf_ct_lookup_helper(struct nf_conn *ct, struct net *net)
return __nf_ct_helper_find(&ct->tuplehash[IP_CT_DIR_REPLY].tuple);
}
-
int __nf_ct_try_assign_helper(struct nf_conn *ct, struct nf_conn *tmpl,
gfp_t flags)
{
diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c
index 1d7e1c595546..ec23330687a5 100644
--- a/net/netfilter/nf_conntrack_proto_tcp.c
+++ b/net/netfilter/nf_conntrack_proto_tcp.c
@@ -982,8 +982,10 @@ int nf_conntrack_tcp_packet(struct nf_conn *ct,
IP_CT_EXP_CHALLENGE_ACK;
}
spin_unlock_bh(&ct->lock);
- nf_ct_l4proto_log_invalid(skb, ct, "invalid packet ignored in "
- "state %s ", tcp_conntrack_names[old_state]);
+ nf_ct_l4proto_log_invalid(skb, ct,
+ "packet (index %d) in dir %d ignored, state %s",
+ index, dir,
+ tcp_conntrack_names[old_state]);
return NF_ACCEPT;
case TCP_CONNTRACK_MAX:
/* Special case for SYN proxy: when the SYN to the server or
diff --git a/net/netfilter/nf_nat_proto.c b/net/netfilter/nf_nat_proto.c
index e87b6bd6b3cd..4731d21fc3ad 100644
--- a/net/netfilter/nf_nat_proto.c
+++ b/net/netfilter/nf_nat_proto.c
@@ -646,8 +646,8 @@ nf_nat_ipv4_fn(void *priv, struct sk_buff *skb,
}
static unsigned int
-nf_nat_ipv4_in(void *priv, struct sk_buff *skb,
- const struct nf_hook_state *state)
+nf_nat_ipv4_pre_routing(void *priv, struct sk_buff *skb,
+ const struct nf_hook_state *state)
{
unsigned int ret;
__be32 daddr = ip_hdr(skb)->daddr;
@@ -660,6 +660,23 @@ nf_nat_ipv4_in(void *priv, struct sk_buff *skb,
}
static unsigned int
+nf_nat_ipv4_local_in(void *priv, struct sk_buff *skb,
+ const struct nf_hook_state *state)
+{
+ __be32 saddr = ip_hdr(skb)->saddr;
+ struct sock *sk = skb->sk;
+ unsigned int ret;
+
+ ret = nf_nat_ipv4_fn(priv, skb, state);
+
+ if (ret == NF_ACCEPT && sk && saddr != ip_hdr(skb)->saddr &&
+ !inet_sk_transparent(sk))
+ skb_orphan(skb); /* TCP edemux obtained wrong socket */
+
+ return ret;
+}
+
+static unsigned int
nf_nat_ipv4_out(void *priv, struct sk_buff *skb,
const struct nf_hook_state *state)
{
@@ -736,7 +753,7 @@ nf_nat_ipv4_local_fn(void *priv, struct sk_buff *skb,
static const struct nf_hook_ops nf_nat_ipv4_ops[] = {
/* Before packet filtering, change destination */
{
- .hook = nf_nat_ipv4_in,
+ .hook = nf_nat_ipv4_pre_routing,
.pf = NFPROTO_IPV4,
.hooknum = NF_INET_PRE_ROUTING,
.priority = NF_IP_PRI_NAT_DST,
@@ -757,7 +774,7 @@ static const struct nf_hook_ops nf_nat_ipv4_ops[] = {
},
/* After packet filtering, change source */
{
- .hook = nf_nat_ipv4_fn,
+ .hook = nf_nat_ipv4_local_in,
.pf = NFPROTO_IPV4,
.hooknum = NF_INET_LOCAL_IN,
.priority = NF_IP_PRI_NAT_SRC,
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index c1eb5cdb3033..224c8e537cb3 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -916,6 +916,12 @@ static int nf_tables_updtable(struct nft_ctx *ctx)
if (flags == ctx->table->flags)
return 0;
+ if ((nft_table_has_owner(ctx->table) &&
+ !(flags & NFT_TABLE_F_OWNER)) ||
+ (!nft_table_has_owner(ctx->table) &&
+ flags & NFT_TABLE_F_OWNER))
+ return -EOPNOTSUPP;
+
trans = nft_trans_alloc(ctx, NFT_MSG_NEWTABLE,
sizeof(struct nft_trans_table));
if (trans == NULL)
@@ -9022,8 +9028,12 @@ static void __nft_release_hooks(struct net *net)
{
struct nft_table *table;
- list_for_each_entry(table, &net->nft.tables, list)
+ list_for_each_entry(table, &net->nft.tables, list) {
+ if (nft_table_has_owner(table))
+ continue;
+
__nft_release_hook(net, table);
+ }
}
static void __nft_release_table(struct net *net, struct nft_table *table)
@@ -9073,13 +9083,12 @@ static void __nft_release_table(struct net *net, struct nft_table *table)
nf_tables_table_destroy(&ctx);
}
-static void __nft_release_tables(struct net *net, u32 nlpid)
+static void __nft_release_tables(struct net *net)
{
struct nft_table *table, *nt;
list_for_each_entry_safe(table, nt, &net->nft.tables, list) {
- if (nft_table_has_owner(table) &&
- nlpid != table->nlpid)
+ if (nft_table_has_owner(table))
continue;
__nft_release_table(net, table);
@@ -9145,7 +9154,7 @@ static void __net_exit nf_tables_exit_net(struct net *net)
mutex_lock(&net->nft.commit_mutex);
if (!list_empty(&net->nft.commit_list))
__nf_tables_abort(net, NFNL_ABORT_NONE);
- __nft_release_tables(net, 0);
+ __nft_release_tables(net);
mutex_unlock(&net->nft.commit_mutex);
WARN_ON_ONCE(!list_empty(&net->nft.tables));
WARN_ON_ONCE(!list_empty(&net->nft.module_list));
diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c
index acce622582e3..bce6ca203d46 100644
--- a/net/netfilter/x_tables.c
+++ b/net/netfilter/x_tables.c
@@ -330,6 +330,7 @@ static int match_revfn(u8 af, const char *name, u8 revision, int *bestp)
const struct xt_match *m;
int have_rev = 0;
+ mutex_lock(&xt[af].mutex);
list_for_each_entry(m, &xt[af].match, list) {
if (strcmp(m->name, name) == 0) {
if (m->revision > *bestp)
@@ -338,6 +339,7 @@ static int match_revfn(u8 af, const char *name, u8 revision, int *bestp)
have_rev = 1;
}
}
+ mutex_unlock(&xt[af].mutex);
if (af != NFPROTO_UNSPEC && !have_rev)
return match_revfn(NFPROTO_UNSPEC, name, revision, bestp);
@@ -350,6 +352,7 @@ static int target_revfn(u8 af, const char *name, u8 revision, int *bestp)
const struct xt_target *t;
int have_rev = 0;
+ mutex_lock(&xt[af].mutex);
list_for_each_entry(t, &xt[af].target, list) {
if (strcmp(t->name, name) == 0) {
if (t->revision > *bestp)
@@ -358,6 +361,7 @@ static int target_revfn(u8 af, const char *name, u8 revision, int *bestp)
have_rev = 1;
}
}
+ mutex_unlock(&xt[af].mutex);
if (af != NFPROTO_UNSPEC && !have_rev)
return target_revfn(NFPROTO_UNSPEC, name, revision, bestp);
@@ -371,12 +375,10 @@ int xt_find_revision(u8 af, const char *name, u8 revision, int target,
{
int have_rev, best = -1;
- mutex_lock(&xt[af].mutex);
if (target == 1)
have_rev = target_revfn(af, name, revision, &best);
else
have_rev = match_revfn(af, name, revision, &best);
- mutex_unlock(&xt[af].mutex);
/* Nothing at all? Return 0 to try loading module. */
if (best == -1) {
diff --git a/net/netlabel/netlabel_cipso_v4.c b/net/netlabel/netlabel_cipso_v4.c
index 726dda95934c..4f50a64315cf 100644
--- a/net/netlabel/netlabel_cipso_v4.c
+++ b/net/netlabel/netlabel_cipso_v4.c
@@ -575,6 +575,7 @@ list_start:
break;
}
+ cipso_v4_doi_putdef(doi_def);
rcu_read_unlock();
genlmsg_end(ans_skb, data);
@@ -583,12 +584,14 @@ list_start:
list_retry:
/* XXX - this limit is a guesstimate */
if (nlsze_mult < 4) {
+ cipso_v4_doi_putdef(doi_def);
rcu_read_unlock();
kfree_skb(ans_skb);
nlsze_mult *= 2;
goto list_start;
}
list_failure_lock:
+ cipso_v4_doi_putdef(doi_def);
rcu_read_unlock();
list_failure:
kfree_skb(ans_skb);
diff --git a/net/psample/psample.c b/net/psample/psample.c
index 33e238c965bd..482c07f2766b 100644
--- a/net/psample/psample.c
+++ b/net/psample/psample.c
@@ -309,10 +309,10 @@ static int psample_tunnel_meta_len(struct ip_tunnel_info *tun_info)
unsigned short tun_proto = ip_tunnel_info_af(tun_info);
const struct ip_tunnel_key *tun_key = &tun_info->key;
int tun_opts_len = tun_info->options_len;
- int sum = 0;
+ int sum = nla_total_size(0); /* PSAMPLE_ATTR_TUNNEL */
if (tun_key->tun_flags & TUNNEL_KEY)
- sum += nla_total_size(sizeof(u64));
+ sum += nla_total_size_64bit(sizeof(u64));
if (tun_info->mode & IP_TUNNEL_INFO_BRIDGE)
sum += nla_total_size(0);
diff --git a/net/qrtr/qrtr.c b/net/qrtr/qrtr.c
index b34358282f37..edb6ac17ceca 100644
--- a/net/qrtr/qrtr.c
+++ b/net/qrtr/qrtr.c
@@ -439,7 +439,7 @@ int qrtr_endpoint_post(struct qrtr_endpoint *ep, const void *data, size_t len)
if (len == 0 || len & 3)
return -EINVAL;
- skb = netdev_alloc_skb(NULL, len);
+ skb = __netdev_alloc_skb(NULL, len, GFP_ATOMIC | __GFP_NOWARN);
if (!skb)
return -ENOMEM;
@@ -958,8 +958,10 @@ static int qrtr_sendmsg(struct socket *sock, struct msghdr *msg, size_t len)
plen = (len + 3) & ~3;
skb = sock_alloc_send_skb(sk, plen + QRTR_HDR_MAX_SIZE,
msg->msg_flags & MSG_DONTWAIT, &rc);
- if (!skb)
+ if (!skb) {
+ rc = -ENOMEM;
goto out_node;
+ }
skb_reserve(skb, QRTR_HDR_MAX_SIZE);
diff --git a/net/qrtr/tun.c b/net/qrtr/tun.c
index b238c40a9984..304b41fea5ab 100644
--- a/net/qrtr/tun.c
+++ b/net/qrtr/tun.c
@@ -31,6 +31,7 @@ static int qrtr_tun_send(struct qrtr_endpoint *ep, struct sk_buff *skb)
static int qrtr_tun_open(struct inode *inode, struct file *filp)
{
struct qrtr_tun *tun;
+ int ret;
tun = kzalloc(sizeof(*tun), GFP_KERNEL);
if (!tun)
@@ -43,7 +44,16 @@ static int qrtr_tun_open(struct inode *inode, struct file *filp)
filp->private_data = tun;
- return qrtr_endpoint_register(&tun->ep, QRTR_EP_NID_AUTO);
+ ret = qrtr_endpoint_register(&tun->ep, QRTR_EP_NID_AUTO);
+ if (ret)
+ goto out;
+
+ return 0;
+
+out:
+ filp->private_data = NULL;
+ kfree(tun);
+ return ret;
}
static ssize_t qrtr_tun_read_iter(struct kiocb *iocb, struct iov_iter *to)
diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c
index 2409e522c68f..d097b5c15faa 100644
--- a/net/sched/cls_flower.c
+++ b/net/sched/cls_flower.c
@@ -1417,6 +1417,21 @@ static int fl_validate_ct_state(u16 state, struct nlattr *tb,
return -EINVAL;
}
+ if (state & TCA_FLOWER_KEY_CT_FLAGS_INVALID &&
+ state & ~(TCA_FLOWER_KEY_CT_FLAGS_TRACKED |
+ TCA_FLOWER_KEY_CT_FLAGS_INVALID)) {
+ NL_SET_ERR_MSG_ATTR(extack, tb,
+ "when inv is set, only trk may be set");
+ return -EINVAL;
+ }
+
+ if (state & TCA_FLOWER_KEY_CT_FLAGS_NEW &&
+ state & TCA_FLOWER_KEY_CT_FLAGS_REPLY) {
+ NL_SET_ERR_MSG_ATTR(extack, tb,
+ "new and rpl are mutually exclusive");
+ return -EINVAL;
+ }
+
return 0;
}
diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
index e2e4353db8a7..f87d07736a14 100644
--- a/net/sched/sch_api.c
+++ b/net/sched/sch_api.c
@@ -2168,7 +2168,7 @@ static int tc_dump_tclass_qdisc(struct Qdisc *q, struct sk_buff *skb,
static int tc_dump_tclass_root(struct Qdisc *root, struct sk_buff *skb,
struct tcmsg *tcm, struct netlink_callback *cb,
- int *t_p, int s_t)
+ int *t_p, int s_t, bool recur)
{
struct Qdisc *q;
int b;
@@ -2179,7 +2179,7 @@ static int tc_dump_tclass_root(struct Qdisc *root, struct sk_buff *skb,
if (tc_dump_tclass_qdisc(root, skb, tcm, cb, t_p, s_t) < 0)
return -1;
- if (!qdisc_dev(root))
+ if (!qdisc_dev(root) || !recur)
return 0;
if (tcm->tcm_parent) {
@@ -2214,13 +2214,13 @@ static int tc_dump_tclass(struct sk_buff *skb, struct netlink_callback *cb)
s_t = cb->args[0];
t = 0;
- if (tc_dump_tclass_root(dev->qdisc, skb, tcm, cb, &t, s_t) < 0)
+ if (tc_dump_tclass_root(dev->qdisc, skb, tcm, cb, &t, s_t, true) < 0)
goto done;
dev_queue = dev_ingress_queue(dev);
if (dev_queue &&
tc_dump_tclass_root(dev_queue->qdisc_sleeping, skb, tcm, cb,
- &t, s_t) < 0)
+ &t, s_t, false) < 0)
goto done;
done:
diff --git a/net/sctp/tsnmap.c b/net/sctp/tsnmap.c
index a9c6af5795d8..5ba456727f63 100644
--- a/net/sctp/tsnmap.c
+++ b/net/sctp/tsnmap.c
@@ -75,7 +75,7 @@ int sctp_tsnmap_check(const struct sctp_tsnmap *map, __u32 tsn)
return 1;
/* Verify that we can hold this TSN and that it will not
- * overlfow our map
+ * overflow our map
*/
if (!TSN_lt(tsn, map->base_tsn + SCTP_TSN_MAP_SIZE))
return -1;
diff --git a/net/socket.c b/net/socket.c
index 7f0617ab5437..84a8049c2b09 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -334,6 +334,7 @@ static const struct xattr_handler sockfs_xattr_handler = {
};
static int sockfs_security_xattr_set(const struct xattr_handler *handler,
+ struct user_namespace *mnt_userns,
struct dentry *dentry, struct inode *inode,
const char *suffix, const void *value,
size_t size, int flags)
@@ -537,9 +538,10 @@ static ssize_t sockfs_listxattr(struct dentry *dentry, char *buffer,
return used;
}
-static int sockfs_setattr(struct dentry *dentry, struct iattr *iattr)
+static int sockfs_setattr(struct user_namespace *mnt_userns,
+ struct dentry *dentry, struct iattr *iattr)
{
- int err = simple_setattr(dentry, iattr);
+ int err = simple_setattr(&init_user_ns, dentry, iattr);
if (!err && (iattr->ia_valid & ATTR_UID)) {
struct socket *sock = SOCKET_I(d_inode(dentry));
@@ -2411,10 +2413,6 @@ static int ___sys_sendmsg(struct socket *sock, struct user_msghdr __user *msg,
long __sys_sendmsg_sock(struct socket *sock, struct msghdr *msg,
unsigned int flags)
{
- /* disallow ancillary data requests from this path */
- if (msg->msg_control || msg->msg_controllen)
- return -EINVAL;
-
return ____sys_sendmsg(sock, msg, flags, NULL, 0);
}
@@ -2623,12 +2621,6 @@ long __sys_recvmsg_sock(struct socket *sock, struct msghdr *msg,
struct user_msghdr __user *umsg,
struct sockaddr __user *uaddr, unsigned int flags)
{
- if (msg->msg_control || msg->msg_controllen) {
- /* disallow ancillary data reqs unless cmsg is plain data */
- if (!(sock->ops->flags & PROTO_CMSG_DATA_ONLY))
- return -EINVAL;
- }
-
return ____sys_recvmsg(sock, msg, umsg, uaddr, flags, 0);
}
diff --git a/net/sunrpc/rpc_pipe.c b/net/sunrpc/rpc_pipe.c
index 8241f5a4a01c..09c000d490a1 100644
--- a/net/sunrpc/rpc_pipe.c
+++ b/net/sunrpc/rpc_pipe.c
@@ -478,6 +478,7 @@ rpc_get_inode(struct super_block *sb, umode_t mode)
inode->i_fop = &simple_dir_operations;
inode->i_op = &simple_dir_inode_operations;
inc_nlink(inode);
+ break;
default:
break;
}
diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c
index 5a809c64dc7b..2e2f007dfc9f 100644
--- a/net/sunrpc/svcsock.c
+++ b/net/sunrpc/svcsock.c
@@ -1078,9 +1078,8 @@ static int svc_tcp_send_kvec(struct socket *sock, const struct kvec *vec,
* In addition, the logic assumes that * .bv_len is never larger
* than PAGE_SIZE.
*/
-static int svc_tcp_sendmsg(struct socket *sock, struct msghdr *msg,
- struct xdr_buf *xdr, rpc_fraghdr marker,
- unsigned int *sentp)
+static int svc_tcp_sendmsg(struct socket *sock, struct xdr_buf *xdr,
+ rpc_fraghdr marker, unsigned int *sentp)
{
const struct kvec *head = xdr->head;
const struct kvec *tail = xdr->tail;
@@ -1088,21 +1087,22 @@ static int svc_tcp_sendmsg(struct socket *sock, struct msghdr *msg,
.iov_base = &marker,
.iov_len = sizeof(marker),
};
- int flags, ret;
+ struct msghdr msg = {
+ .msg_flags = 0,
+ };
+ int ret;
*sentp = 0;
xdr_alloc_bvec(xdr, GFP_KERNEL);
- msg->msg_flags = MSG_MORE;
- ret = kernel_sendmsg(sock, msg, &rm, 1, rm.iov_len);
+ ret = kernel_sendmsg(sock, &msg, &rm, 1, rm.iov_len);
if (ret < 0)
return ret;
*sentp += ret;
if (ret != rm.iov_len)
return -EAGAIN;
- flags = head->iov_len < xdr->len ? MSG_MORE | MSG_SENDPAGE_NOTLAST : 0;
- ret = svc_tcp_send_kvec(sock, head, flags);
+ ret = svc_tcp_send_kvec(sock, head, 0);
if (ret < 0)
return ret;
*sentp += ret;
@@ -1116,15 +1116,11 @@ static int svc_tcp_sendmsg(struct socket *sock, struct msghdr *msg,
bvec = xdr->bvec + (xdr->page_base >> PAGE_SHIFT);
offset = offset_in_page(xdr->page_base);
remaining = xdr->page_len;
- flags = MSG_MORE | MSG_SENDPAGE_NOTLAST;
while (remaining > 0) {
- if (remaining <= PAGE_SIZE && tail->iov_len == 0)
- flags = 0;
-
len = min(remaining, bvec->bv_len - offset);
ret = kernel_sendpage(sock, bvec->bv_page,
bvec->bv_offset + offset,
- len, flags);
+ len, 0);
if (ret < 0)
return ret;
*sentp += ret;
@@ -1163,26 +1159,28 @@ static int svc_tcp_sendto(struct svc_rqst *rqstp)
struct xdr_buf *xdr = &rqstp->rq_res;
rpc_fraghdr marker = cpu_to_be32(RPC_LAST_STREAM_FRAGMENT |
(u32)xdr->len);
- struct msghdr msg = {
- .msg_flags = 0,
- };
unsigned int sent;
int err;
svc_tcp_release_rqst(rqstp);
+ atomic_inc(&svsk->sk_sendqlen);
mutex_lock(&xprt->xpt_mutex);
if (svc_xprt_is_dead(xprt))
goto out_notconn;
- err = svc_tcp_sendmsg(svsk->sk_sock, &msg, xdr, marker, &sent);
+ tcp_sock_set_cork(svsk->sk_sk, true);
+ err = svc_tcp_sendmsg(svsk->sk_sock, xdr, marker, &sent);
xdr_free_bvec(xdr);
trace_svcsock_tcp_send(xprt, err < 0 ? err : sent);
if (err < 0 || sent != (xdr->len + sizeof(marker)))
goto out_close;
+ if (atomic_dec_and_test(&svsk->sk_sendqlen))
+ tcp_sock_set_cork(svsk->sk_sk, false);
mutex_unlock(&xprt->xpt_mutex);
return sent;
out_notconn:
+ atomic_dec(&svsk->sk_sendqlen);
mutex_unlock(&xprt->xpt_mutex);
return -ENOTCONN;
out_close:
@@ -1192,6 +1190,7 @@ out_close:
(err < 0) ? err : sent, xdr->len);
set_bit(XPT_CLOSE, &xprt->xpt_flags);
svc_xprt_enqueue(xprt);
+ atomic_dec(&svsk->sk_sendqlen);
mutex_unlock(&xprt->xpt_mutex);
return -EAGAIN;
}
@@ -1261,7 +1260,7 @@ static void svc_tcp_init(struct svc_sock *svsk, struct svc_serv *serv)
svsk->sk_datalen = 0;
memset(&svsk->sk_pages[0], 0, sizeof(svsk->sk_pages));
- tcp_sk(sk)->nonagle |= TCP_NAGLE_OFF;
+ tcp_sock_set_nodelay(sk);
set_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags);
switch (sk->sk_state) {
diff --git a/net/sunrpc/xprtrdma/backchannel.c b/net/sunrpc/xprtrdma/backchannel.c
index 946edf2db646..a249837d6a55 100644
--- a/net/sunrpc/xprtrdma/backchannel.c
+++ b/net/sunrpc/xprtrdma/backchannel.c
@@ -2,7 +2,7 @@
/*
* Copyright (c) 2015-2020, Oracle and/or its affiliates.
*
- * Support for backward direction RPCs on RPC/RDMA.
+ * Support for reverse-direction RPCs on RPC/RDMA.
*/
#include <linux/sunrpc/xprt.h>
@@ -208,7 +208,7 @@ create_req:
}
/**
- * rpcrdma_bc_receive_call - Handle a backward direction call
+ * rpcrdma_bc_receive_call - Handle a reverse-direction Call
* @r_xprt: transport receiving the call
* @rep: receive buffer containing the call
*
diff --git a/net/sunrpc/xprtrdma/frwr_ops.c b/net/sunrpc/xprtrdma/frwr_ops.c
index baca49fe83af..766a1048a48a 100644
--- a/net/sunrpc/xprtrdma/frwr_ops.c
+++ b/net/sunrpc/xprtrdma/frwr_ops.c
@@ -306,20 +306,14 @@ struct rpcrdma_mr_seg *frwr_map(struct rpcrdma_xprt *r_xprt,
if (nsegs > ep->re_max_fr_depth)
nsegs = ep->re_max_fr_depth;
for (i = 0; i < nsegs;) {
- if (seg->mr_page)
- sg_set_page(&mr->mr_sg[i],
- seg->mr_page,
- seg->mr_len,
- offset_in_page(seg->mr_offset));
- else
- sg_set_buf(&mr->mr_sg[i], seg->mr_offset,
- seg->mr_len);
+ sg_set_page(&mr->mr_sg[i], seg->mr_page,
+ seg->mr_len, seg->mr_offset);
++seg;
++i;
if (ep->re_mrtype == IB_MR_TYPE_SG_GAPS)
continue;
- if ((i < nsegs && offset_in_page(seg->mr_offset)) ||
+ if ((i < nsegs && seg->mr_offset) ||
offset_in_page((seg-1)->mr_offset + (seg-1)->mr_len))
break;
}
diff --git a/net/sunrpc/xprtrdma/rpc_rdma.c b/net/sunrpc/xprtrdma/rpc_rdma.c
index 8f5d0cb68360..292f066d006e 100644
--- a/net/sunrpc/xprtrdma/rpc_rdma.c
+++ b/net/sunrpc/xprtrdma/rpc_rdma.c
@@ -204,9 +204,7 @@ rpcrdma_alloc_sparse_pages(struct xdr_buf *buf)
return 0;
}
-/* Split @vec on page boundaries into SGEs. FMR registers pages, not
- * a byte range. Other modes coalesce these SGEs into a single MR
- * when they can.
+/* Convert @vec to a single SGL element.
*
* Returns pointer to next available SGE, and bumps the total number
* of SGEs consumed.
@@ -215,22 +213,11 @@ static struct rpcrdma_mr_seg *
rpcrdma_convert_kvec(struct kvec *vec, struct rpcrdma_mr_seg *seg,
unsigned int *n)
{
- u32 remaining, page_offset;
- char *base;
-
- base = vec->iov_base;
- page_offset = offset_in_page(base);
- remaining = vec->iov_len;
- while (remaining) {
- seg->mr_page = NULL;
- seg->mr_offset = base;
- seg->mr_len = min_t(u32, PAGE_SIZE - page_offset, remaining);
- remaining -= seg->mr_len;
- base += seg->mr_len;
- ++seg;
- ++(*n);
- page_offset = 0;
- }
+ seg->mr_page = virt_to_page(vec->iov_base);
+ seg->mr_offset = offset_in_page(vec->iov_base);
+ seg->mr_len = vec->iov_len;
+ ++seg;
+ ++(*n);
return seg;
}
@@ -259,7 +246,7 @@ rpcrdma_convert_iovs(struct rpcrdma_xprt *r_xprt, struct xdr_buf *xdrbuf,
page_base = offset_in_page(xdrbuf->page_base);
while (len) {
seg->mr_page = *ppages;
- seg->mr_offset = (char *)page_base;
+ seg->mr_offset = page_base;
seg->mr_len = min_t(u32, PAGE_SIZE - page_base, len);
len -= seg->mr_len;
++ppages;
@@ -268,10 +255,7 @@ rpcrdma_convert_iovs(struct rpcrdma_xprt *r_xprt, struct xdr_buf *xdrbuf,
page_base = 0;
}
- /* When encoding a Read chunk, the tail iovec contains an
- * XDR pad and may be omitted.
- */
- if (type == rpcrdma_readch && r_xprt->rx_ep->re_implicit_roundup)
+ if (type == rpcrdma_readch)
goto out;
/* When encoding a Write chunk, some servers need to see an
@@ -283,7 +267,7 @@ rpcrdma_convert_iovs(struct rpcrdma_xprt *r_xprt, struct xdr_buf *xdrbuf,
goto out;
if (xdrbuf->tail[0].iov_len)
- seg = rpcrdma_convert_kvec(&xdrbuf->tail[0], seg, &n);
+ rpcrdma_convert_kvec(&xdrbuf->tail[0], seg, &n);
out:
if (unlikely(n > RPCRDMA_MAX_SEGS))
@@ -644,9 +628,8 @@ out_mapping_err:
return false;
}
-/* The tail iovec may include an XDR pad for the page list,
- * as well as additional content, and may not reside in the
- * same page as the head iovec.
+/* The tail iovec might not reside in the same page as the
+ * head iovec.
*/
static bool rpcrdma_prepare_tail_iov(struct rpcrdma_req *req,
struct xdr_buf *xdr,
@@ -764,27 +747,19 @@ static bool rpcrdma_prepare_readch(struct rpcrdma_xprt *r_xprt,
struct rpcrdma_req *req,
struct xdr_buf *xdr)
{
+ struct kvec *tail = &xdr->tail[0];
+
if (!rpcrdma_prepare_head_iov(r_xprt, req, xdr->head[0].iov_len))
return false;
- /* If there is a Read chunk, the page list is being handled
+ /* If there is a Read chunk, the page list is handled
* via explicit RDMA, and thus is skipped here.
*/
- /* Do not include the tail if it is only an XDR pad */
- if (xdr->tail[0].iov_len > 3) {
- unsigned int page_base, len;
-
- /* If the content in the page list is an odd length,
- * xdr_write_pages() adds a pad at the beginning of
- * the tail iovec. Force the tail's non-pad content to
- * land at the next XDR position in the Send message.
- */
- page_base = offset_in_page(xdr->tail[0].iov_base);
- len = xdr->tail[0].iov_len;
- page_base += len & 3;
- len -= len & 3;
- if (!rpcrdma_prepare_tail_iov(req, xdr, page_base, len))
+ if (tail->iov_len) {
+ if (!rpcrdma_prepare_tail_iov(req, xdr,
+ offset_in_page(tail->iov_base),
+ tail->iov_len))
return false;
kref_get(&req->rl_kref);
}
@@ -1164,14 +1139,10 @@ rpcrdma_is_bcall(struct rpcrdma_xprt *r_xprt, struct rpcrdma_rep *rep)
*/
p = xdr_inline_decode(xdr, 3 * sizeof(*p));
if (unlikely(!p))
- goto out_short;
+ return true;
rpcrdma_bc_receive_call(r_xprt, rep);
return true;
-
-out_short:
- pr_warn("RPC/RDMA short backward direction call\n");
- return true;
}
#else /* CONFIG_SUNRPC_BACKCHANNEL */
{
diff --git a/net/sunrpc/xprtrdma/svc_rdma_backchannel.c b/net/sunrpc/xprtrdma/svc_rdma_backchannel.c
index 63f8be974df2..4a1edbb4028e 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_backchannel.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_backchannel.c
@@ -2,7 +2,7 @@
/*
* Copyright (c) 2015-2018 Oracle. All rights reserved.
*
- * Support for backward direction RPCs on RPC/RDMA (server-side).
+ * Support for reverse-direction RPCs on RPC/RDMA (server-side).
*/
#include <linux/sunrpc/svc_rdma.h>
@@ -59,7 +59,7 @@ out_unlock:
spin_unlock(&xprt->queue_lock);
}
-/* Send a backwards direction RPC call.
+/* Send a reverse-direction RPC Call.
*
* Caller holds the connection's mutex and has already marshaled
* the RPC/RDMA request.
diff --git a/net/sunrpc/xprtrdma/svc_rdma_transport.c b/net/sunrpc/xprtrdma/svc_rdma_transport.c
index afba4e9d5425..c895f80df659 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_transport.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c
@@ -475,9 +475,6 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt)
if (!svc_rdma_post_recvs(newxprt))
goto errout;
- /* Swap out the handler */
- newxprt->sc_cm_id->event_handler = svc_rdma_cma_handler;
-
/* Construct RDMA-CM private message */
pmsg.cp_magic = rpcrdma_cmp_magic;
pmsg.cp_version = RPCRDMA_CMP_VERSION;
@@ -498,7 +495,10 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt)
}
conn_param.private_data = &pmsg;
conn_param.private_data_len = sizeof(pmsg);
+ rdma_lock_handler(newxprt->sc_cm_id);
+ newxprt->sc_cm_id->event_handler = svc_rdma_cma_handler;
ret = rdma_accept(newxprt->sc_cm_id, &conn_param);
+ rdma_unlock_handler(newxprt->sc_cm_id);
if (ret) {
trace_svcrdma_accept_err(newxprt, ret);
goto errout;
diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h
index 94b28657aeeb..fe3be985e239 100644
--- a/net/sunrpc/xprtrdma/xprt_rdma.h
+++ b/net/sunrpc/xprtrdma/xprt_rdma.h
@@ -98,9 +98,9 @@ struct rpcrdma_ep {
atomic_t re_completion_ids;
};
-/* Pre-allocate extra Work Requests for handling backward receives
- * and sends. This is a fixed value because the Work Queues are
- * allocated when the forward channel is set up, long before the
+/* Pre-allocate extra Work Requests for handling reverse-direction
+ * Receives and Sends. This is a fixed value because the Work Queues
+ * are allocated when the forward channel is set up, long before the
* backchannel is provisioned. This value is two times
* NFS4_DEF_CB_SLOT_TABLE_SIZE.
*/
@@ -283,10 +283,11 @@ enum {
RPCRDMA_MAX_IOV_SEGS,
};
-struct rpcrdma_mr_seg { /* chunk descriptors */
- u32 mr_len; /* length of chunk or segment */
- struct page *mr_page; /* owning page, if any */
- char *mr_offset; /* kva if no page, else offset */
+/* Arguments for DMA mapping and registration */
+struct rpcrdma_mr_seg {
+ u32 mr_len; /* length of segment */
+ struct page *mr_page; /* underlying struct page */
+ u64 mr_offset; /* IN: page offset, OUT: iova */
};
/* The Send SGE array is provisioned to send a maximum size
diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index c56a66cdf4ac..e35760f238a4 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -829,7 +829,7 @@ xs_stream_record_marker(struct xdr_buf *xdr)
* EAGAIN: The socket was blocked, please call again later to
* complete the request
* ENOTCONN: Caller needs to invoke connect logic then call again
- * other: Some other error occured, the request was not sent
+ * other: Some other error occurred, the request was not sent
*/
static int xs_local_send_request(struct rpc_rqst *req)
{
@@ -1665,7 +1665,7 @@ static int xs_bind(struct sock_xprt *transport, struct socket *sock)
* This ensures that we can continue to establish TCP
* connections even when all local ephemeral ports are already
* a part of some TCP connection. This makes no difference
- * for UDP sockets, but also doens't harm them.
+ * for UDP sockets, but also doesn't harm them.
*
* If we're asking for any reserved port (i.e. port == 0 &&
* transport->xprt.resvport == 1) xs_get_srcport above will
@@ -1875,6 +1875,7 @@ static int xs_local_setup_socket(struct sock_xprt *transport)
xprt->stat.connect_time += (long)jiffies -
xprt->stat.connect_start;
xprt_set_connected(xprt);
+ break;
case -ENOBUFS:
break;
case -ENOENT:
@@ -2276,10 +2277,8 @@ static void xs_tcp_setup_socket(struct work_struct *work)
case -EHOSTUNREACH:
case -EADDRINUSE:
case -ENOBUFS:
- /*
- * xs_tcp_force_close() wakes tasks with -EIO.
- * We need to wake them first to ensure the
- * correct error code.
+ /* xs_tcp_force_close() wakes tasks with a fixed error code.
+ * We need to wake them first to ensure the correct error code.
*/
xprt_wake_pending_tasks(xprt, status);
xs_tcp_force_close(xprt);
@@ -2380,7 +2379,7 @@ static void xs_error_handle(struct work_struct *work)
}
/**
- * xs_local_print_stats - display AF_LOCAL socket-specifc stats
+ * xs_local_print_stats - display AF_LOCAL socket-specific stats
* @xprt: rpc_xprt struct containing statistics
* @seq: output file
*
@@ -2409,7 +2408,7 @@ static void xs_local_print_stats(struct rpc_xprt *xprt, struct seq_file *seq)
}
/**
- * xs_udp_print_stats - display UDP socket-specifc stats
+ * xs_udp_print_stats - display UDP socket-specific stats
* @xprt: rpc_xprt struct containing statistics
* @seq: output file
*
@@ -2433,7 +2432,7 @@ static void xs_udp_print_stats(struct rpc_xprt *xprt, struct seq_file *seq)
}
/**
- * xs_tcp_print_stats - display TCP socket-specifc stats
+ * xs_tcp_print_stats - display TCP socket-specific stats
* @xprt: rpc_xprt struct containing statistics
* @seq: output file
*
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index 41c3303c3357..5a31307ceb76 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -936,7 +936,7 @@ static struct sock *unix_find_other(struct net *net,
if (err)
goto fail;
inode = d_backing_inode(path.dentry);
- err = inode_permission(inode, MAY_WRITE);
+ err = path_permission(&path, MAY_WRITE);
if (err)
goto put_fail;
@@ -996,7 +996,8 @@ static int unix_mknod(const char *sun_path, umode_t mode, struct path *res)
*/
err = security_path_mknod(&path, dentry, mode, 0);
if (!err) {
- err = vfs_mknod(d_inode(path.dentry), dentry, mode, 0);
+ err = vfs_mknod(mnt_user_ns(path.mnt), d_inode(path.dentry),
+ dentry, mode, 0);
if (!err) {
res->mnt = mntget(path.mnt);
res->dentry = dget(dentry);