summaryrefslogtreecommitdiff
path: root/net/mptcp
diff options
context:
space:
mode:
Diffstat (limited to 'net/mptcp')
-rw-r--r--net/mptcp/Makefile4
-rw-r--r--net/mptcp/bpf.c21
-rw-r--r--net/mptcp/ctrl.c21
-rw-r--r--net/mptcp/mib.c5
-rw-r--r--net/mptcp/mib.h7
-rw-r--r--net/mptcp/mptcp_diag.c105
-rw-r--r--net/mptcp/options.c69
-rw-r--r--net/mptcp/pm.c108
-rw-r--r--net/mptcp/pm_netlink.c266
-rw-r--r--net/mptcp/pm_userspace.c429
-rw-r--r--net/mptcp/protocol.c123
-rw-r--r--net/mptcp/protocol.h101
-rw-r--r--net/mptcp/sockopt.c21
-rw-r--r--net/mptcp/subflow.c72
14 files changed, 1141 insertions, 211 deletions
diff --git a/net/mptcp/Makefile b/net/mptcp/Makefile
index e54daceac58b..6e7df47c9584 100644
--- a/net/mptcp/Makefile
+++ b/net/mptcp/Makefile
@@ -2,7 +2,7 @@
obj-$(CONFIG_MPTCP) += mptcp.o
mptcp-y := protocol.o subflow.o options.o token.o crypto.o ctrl.o pm.o diag.o \
- mib.o pm_netlink.o sockopt.o
+ mib.o pm_netlink.o sockopt.o pm_userspace.o
obj-$(CONFIG_SYN_COOKIES) += syncookies.o
obj-$(CONFIG_INET_MPTCP_DIAG) += mptcp_diag.o
@@ -10,3 +10,5 @@ obj-$(CONFIG_INET_MPTCP_DIAG) += mptcp_diag.o
mptcp_crypto_test-objs := crypto_test.o
mptcp_token_test-objs := token_test.o
obj-$(CONFIG_MPTCP_KUNIT_TEST) += mptcp_crypto_test.o mptcp_token_test.o
+
+obj-$(CONFIG_BPF_SYSCALL) += bpf.o
diff --git a/net/mptcp/bpf.c b/net/mptcp/bpf.c
new file mode 100644
index 000000000000..5a0a84ad94af
--- /dev/null
+++ b/net/mptcp/bpf.c
@@ -0,0 +1,21 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Multipath TCP
+ *
+ * Copyright (c) 2020, Tessares SA.
+ * Copyright (c) 2022, SUSE.
+ *
+ * Author: Nicolas Rybowski <nicolas.rybowski@tessares.net>
+ */
+
+#define pr_fmt(fmt) "MPTCP: " fmt
+
+#include <linux/bpf.h>
+#include "protocol.h"
+
+struct mptcp_sock *bpf_mptcp_sock_from_subflow(struct sock *sk)
+{
+ if (sk && sk_fullsock(sk) && sk->sk_protocol == IPPROTO_TCP && sk_is_mptcp(sk))
+ return mptcp_sk(mptcp_subflow_ctx(sk)->conn);
+
+ return NULL;
+}
diff --git a/net/mptcp/ctrl.c b/net/mptcp/ctrl.c
index 8b235468c88f..ae20b7d92e28 100644
--- a/net/mptcp/ctrl.c
+++ b/net/mptcp/ctrl.c
@@ -16,6 +16,11 @@
#define MPTCP_SYSCTL_PATH "net/mptcp"
static int mptcp_pernet_id;
+
+#ifdef CONFIG_SYSCTL
+static int mptcp_pm_type_max = __MPTCP_PM_TYPE_MAX;
+#endif
+
struct mptcp_pernet {
#ifdef CONFIG_SYSCTL
struct ctl_table_header *ctl_table_hdr;
@@ -26,6 +31,7 @@ struct mptcp_pernet {
u8 mptcp_enabled;
u8 checksum_enabled;
u8 allow_join_initial_addr_port;
+ u8 pm_type;
};
static struct mptcp_pernet *mptcp_get_pernet(const struct net *net)
@@ -58,6 +64,11 @@ unsigned int mptcp_stale_loss_cnt(const struct net *net)
return mptcp_get_pernet(net)->stale_loss_cnt;
}
+int mptcp_get_pm_type(const struct net *net)
+{
+ return mptcp_get_pernet(net)->pm_type;
+}
+
static void mptcp_pernet_set_defaults(struct mptcp_pernet *pernet)
{
pernet->mptcp_enabled = 1;
@@ -65,6 +76,7 @@ static void mptcp_pernet_set_defaults(struct mptcp_pernet *pernet)
pernet->checksum_enabled = 0;
pernet->allow_join_initial_addr_port = 1;
pernet->stale_loss_cnt = 4;
+ pernet->pm_type = MPTCP_PM_TYPE_KERNEL;
}
#ifdef CONFIG_SYSCTL
@@ -108,6 +120,14 @@ static struct ctl_table mptcp_sysctl_table[] = {
.mode = 0644,
.proc_handler = proc_douintvec_minmax,
},
+ {
+ .procname = "pm_type",
+ .maxlen = sizeof(u8),
+ .mode = 0644,
+ .proc_handler = proc_dou8vec_minmax,
+ .extra1 = SYSCTL_ZERO,
+ .extra2 = &mptcp_pm_type_max
+ },
{}
};
@@ -128,6 +148,7 @@ static int mptcp_pernet_new_table(struct net *net, struct mptcp_pernet *pernet)
table[2].data = &pernet->checksum_enabled;
table[3].data = &pernet->allow_join_initial_addr_port;
table[4].data = &pernet->stale_loss_cnt;
+ table[5].data = &pernet->pm_type;
hdr = register_net_sysctl(net, MPTCP_SYSCTL_PATH, table);
if (!hdr)
diff --git a/net/mptcp/mib.c b/net/mptcp/mib.c
index e55d3dfbee0c..0dac2863c6e1 100644
--- a/net/mptcp/mib.c
+++ b/net/mptcp/mib.c
@@ -24,6 +24,7 @@ static const struct snmp_mib mptcp_snmp_list[] = {
SNMP_MIB_ITEM("MPJoinAckRx", MPTCP_MIB_JOINACKRX),
SNMP_MIB_ITEM("MPJoinAckHMacFailure", MPTCP_MIB_JOINACKMAC),
SNMP_MIB_ITEM("DSSNotMatching", MPTCP_MIB_DSSNOMATCH),
+ SNMP_MIB_ITEM("InfiniteMapTx", MPTCP_MIB_INFINITEMAPTX),
SNMP_MIB_ITEM("InfiniteMapRx", MPTCP_MIB_INFINITEMAPRX),
SNMP_MIB_ITEM("DSSNoMatchTCP", MPTCP_MIB_DSSTCPMISMATCH),
SNMP_MIB_ITEM("DataCsumErr", MPTCP_MIB_DATACSUMERR),
@@ -55,6 +56,10 @@ static const struct snmp_mib mptcp_snmp_list[] = {
SNMP_MIB_ITEM("RcvPruned", MPTCP_MIB_RCVPRUNED),
SNMP_MIB_ITEM("SubflowStale", MPTCP_MIB_SUBFLOWSTALE),
SNMP_MIB_ITEM("SubflowRecover", MPTCP_MIB_SUBFLOWRECOVER),
+ SNMP_MIB_ITEM("SndWndShared", MPTCP_MIB_SNDWNDSHARED),
+ SNMP_MIB_ITEM("RcvWndShared", MPTCP_MIB_RCVWNDSHARED),
+ SNMP_MIB_ITEM("RcvWndConflictUpdate", MPTCP_MIB_RCVWNDCONFLICTUPDATE),
+ SNMP_MIB_ITEM("RcvWndConflict", MPTCP_MIB_RCVWNDCONFLICT),
SNMP_MIB_SENTINEL
};
diff --git a/net/mptcp/mib.h b/net/mptcp/mib.h
index 00576179a619..2be3596374f4 100644
--- a/net/mptcp/mib.h
+++ b/net/mptcp/mib.h
@@ -17,6 +17,7 @@ enum linux_mptcp_mib_field {
MPTCP_MIB_JOINACKRX, /* Received an ACK + MP_JOIN */
MPTCP_MIB_JOINACKMAC, /* HMAC was wrong on ACK + MP_JOIN */
MPTCP_MIB_DSSNOMATCH, /* Received a new mapping that did not match the previous one */
+ MPTCP_MIB_INFINITEMAPTX, /* Sent an infinite mapping */
MPTCP_MIB_INFINITEMAPRX, /* Received an infinite mapping */
MPTCP_MIB_DSSTCPMISMATCH, /* DSS-mapping did not map with TCP's sequence numbers */
MPTCP_MIB_DATACSUMERR, /* The data checksum fail */
@@ -48,6 +49,12 @@ enum linux_mptcp_mib_field {
MPTCP_MIB_RCVPRUNED, /* Incoming packet dropped due to memory limit */
MPTCP_MIB_SUBFLOWSTALE, /* Subflows entered 'stale' status */
MPTCP_MIB_SUBFLOWRECOVER, /* Subflows returned to active status after being stale */
+ MPTCP_MIB_SNDWNDSHARED, /* Subflow snd wnd is overridden by msk's one */
+ MPTCP_MIB_RCVWNDSHARED, /* Subflow rcv wnd is overridden by msk's one */
+ MPTCP_MIB_RCVWNDCONFLICTUPDATE, /* subflow rcv wnd is overridden by msk's one due to
+ * conflict with another subflow while updating msk rcv wnd
+ */
+ MPTCP_MIB_RCVWNDCONFLICT, /* Conflict with while updating msk rcv wnd */
__MPTCP_MIB_MAX
};
diff --git a/net/mptcp/mptcp_diag.c b/net/mptcp/mptcp_diag.c
index f44125dd6697..7f9a71780437 100644
--- a/net/mptcp/mptcp_diag.c
+++ b/net/mptcp/mptcp_diag.c
@@ -66,20 +66,103 @@ out_nosk:
return err;
}
+struct mptcp_diag_ctx {
+ long s_slot;
+ long s_num;
+ unsigned int l_slot;
+ unsigned int l_num;
+};
+
+static void mptcp_diag_dump_listeners(struct sk_buff *skb, struct netlink_callback *cb,
+ const struct inet_diag_req_v2 *r,
+ bool net_admin)
+{
+ struct inet_diag_dump_data *cb_data = cb->data;
+ struct mptcp_diag_ctx *diag_ctx = (void *)cb->ctx;
+ struct nlattr *bc = cb_data->inet_diag_nla_bc;
+ struct net *net = sock_net(skb->sk);
+ int i;
+
+ for (i = diag_ctx->l_slot; i <= tcp_hashinfo.lhash2_mask; i++) {
+ struct inet_listen_hashbucket *ilb;
+ struct hlist_nulls_node *node;
+ struct sock *sk;
+ int num = 0;
+
+ ilb = &tcp_hashinfo.lhash2[i];
+
+ rcu_read_lock();
+ spin_lock(&ilb->lock);
+ sk_nulls_for_each(sk, node, &ilb->nulls_head) {
+ const struct mptcp_subflow_context *ctx = mptcp_subflow_ctx(sk);
+ struct inet_sock *inet = inet_sk(sk);
+ int ret;
+
+ if (num < diag_ctx->l_num)
+ goto next_listen;
+
+ if (!ctx || strcmp(inet_csk(sk)->icsk_ulp_ops->name, "mptcp"))
+ goto next_listen;
+
+ sk = ctx->conn;
+ if (!sk || !net_eq(sock_net(sk), net))
+ goto next_listen;
+
+ if (r->sdiag_family != AF_UNSPEC &&
+ sk->sk_family != r->sdiag_family)
+ goto next_listen;
+
+ if (r->id.idiag_sport != inet->inet_sport &&
+ r->id.idiag_sport)
+ goto next_listen;
+
+ if (!refcount_inc_not_zero(&sk->sk_refcnt))
+ goto next_listen;
+
+ ret = sk_diag_dump(sk, skb, cb, r, bc, net_admin);
+
+ sock_put(sk);
+
+ if (ret < 0) {
+ spin_unlock(&ilb->lock);
+ rcu_read_unlock();
+ diag_ctx->l_slot = i;
+ diag_ctx->l_num = num;
+ return;
+ }
+ diag_ctx->l_num = num + 1;
+ num = 0;
+next_listen:
+ ++num;
+ }
+ spin_unlock(&ilb->lock);
+ rcu_read_unlock();
+
+ cond_resched();
+ diag_ctx->l_num = 0;
+ }
+
+ diag_ctx->l_num = 0;
+ diag_ctx->l_slot = i;
+}
+
static void mptcp_diag_dump(struct sk_buff *skb, struct netlink_callback *cb,
const struct inet_diag_req_v2 *r)
{
bool net_admin = netlink_net_capable(cb->skb, CAP_NET_ADMIN);
+ struct mptcp_diag_ctx *diag_ctx = (void *)cb->ctx;
struct net *net = sock_net(skb->sk);
struct inet_diag_dump_data *cb_data;
struct mptcp_sock *msk;
struct nlattr *bc;
+ BUILD_BUG_ON(sizeof(cb->ctx) < sizeof(*diag_ctx));
+
cb_data = cb->data;
bc = cb_data->inet_diag_nla_bc;
- while ((msk = mptcp_token_iter_next(net, &cb->args[0], &cb->args[1])) !=
- NULL) {
+ while ((msk = mptcp_token_iter_next(net, &diag_ctx->s_slot,
+ &diag_ctx->s_num)) != NULL) {
struct inet_sock *inet = (struct inet_sock *)msk;
struct sock *sk = (struct sock *)msk;
int ret = 0;
@@ -101,11 +184,14 @@ next:
sock_put(sk);
if (ret < 0) {
/* will retry on the same position */
- cb->args[1]--;
+ diag_ctx->s_num--;
break;
}
cond_resched();
}
+
+ if ((r->idiag_states & TCPF_LISTEN) && r->id.idiag_dport == 0)
+ mptcp_diag_dump_listeners(skb, cb, r, net_admin);
}
static void mptcp_diag_get_info(struct sock *sk, struct inet_diag_msg *r,
@@ -116,6 +202,19 @@ static void mptcp_diag_get_info(struct sock *sk, struct inet_diag_msg *r,
r->idiag_rqueue = sk_rmem_alloc_get(sk);
r->idiag_wqueue = sk_wmem_alloc_get(sk);
+
+ if (inet_sk_state_load(sk) == TCP_LISTEN) {
+ struct sock *lsk = READ_ONCE(msk->first);
+
+ if (lsk) {
+ /* override with settings from tcp listener,
+ * so Send-Q will show accept queue.
+ */
+ r->idiag_rqueue = READ_ONCE(lsk->sk_ack_backlog);
+ r->idiag_wqueue = READ_ONCE(lsk->sk_max_ack_backlog);
+ }
+ }
+
if (!info)
return;
diff --git a/net/mptcp/options.c b/net/mptcp/options.c
index b548cec86c9d..be3b918a6d15 100644
--- a/net/mptcp/options.c
+++ b/net/mptcp/options.c
@@ -825,7 +825,7 @@ bool mptcp_established_options(struct sock *sk, struct sk_buff *skb,
opts->suboptions = 0;
- if (unlikely(__mptcp_check_fallback(msk)))
+ if (unlikely(__mptcp_check_fallback(msk) && !mptcp_check_infinite_map(skb)))
return false;
if (unlikely(skb && TCP_SKB_CB(skb)->tcp_flags & TCPHDR_RST)) {
@@ -931,7 +931,7 @@ static bool check_fully_established(struct mptcp_sock *msk, struct sock *ssk,
if (TCP_SKB_CB(skb)->seq == subflow->ssn_offset + 1 &&
TCP_SKB_CB(skb)->end_seq == TCP_SKB_CB(skb)->seq &&
subflow->mp_join && (mp_opt->suboptions & OPTIONS_MPTCP_MPJ) &&
- READ_ONCE(msk->pm.server_side))
+ !subflow->request_join)
tcp_send_ack(ssk);
goto fully_established;
}
@@ -1133,7 +1133,7 @@ bool mptcp_incoming_options(struct sock *sk, struct sk_buff *skb)
if ((mp_opt.suboptions & OPTION_MPTCP_ADD_ADDR) &&
add_addr_hmac_valid(msk, &mp_opt)) {
if (!mp_opt.echo) {
- mptcp_pm_add_addr_received(msk, &mp_opt.addr);
+ mptcp_pm_add_addr_received(sk, &mp_opt.addr);
MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_ADDADDR);
} else {
mptcp_pm_add_addr_echoed(msk, &mp_opt.addr);
@@ -1224,20 +1224,62 @@ bool mptcp_incoming_options(struct sock *sk, struct sk_buff *skb)
return true;
}
-static void mptcp_set_rwin(const struct tcp_sock *tp)
+static void mptcp_set_rwin(struct tcp_sock *tp, struct tcphdr *th)
{
const struct sock *ssk = (const struct sock *)tp;
- const struct mptcp_subflow_context *subflow;
+ struct mptcp_subflow_context *subflow;
+ u64 ack_seq, rcv_wnd_old, rcv_wnd_new;
struct mptcp_sock *msk;
- u64 ack_seq;
+ u32 new_win;
+ u64 win;
subflow = mptcp_subflow_ctx(ssk);
msk = mptcp_sk(subflow->conn);
- ack_seq = READ_ONCE(msk->ack_seq) + tp->rcv_wnd;
+ ack_seq = READ_ONCE(msk->ack_seq);
+ rcv_wnd_new = ack_seq + tp->rcv_wnd;
+
+ rcv_wnd_old = atomic64_read(&msk->rcv_wnd_sent);
+ if (after64(rcv_wnd_new, rcv_wnd_old)) {
+ u64 rcv_wnd;
+
+ for (;;) {
+ rcv_wnd = atomic64_cmpxchg(&msk->rcv_wnd_sent, rcv_wnd_old, rcv_wnd_new);
+
+ if (rcv_wnd == rcv_wnd_old)
+ break;
+ if (before64(rcv_wnd_new, rcv_wnd)) {
+ MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_RCVWNDCONFLICTUPDATE);
+ goto raise_win;
+ }
+ MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_RCVWNDCONFLICT);
+ rcv_wnd_old = rcv_wnd;
+ }
+ return;
+ }
+
+ if (rcv_wnd_new != rcv_wnd_old) {
+raise_win:
+ win = rcv_wnd_old - ack_seq;
+ tp->rcv_wnd = min_t(u64, win, U32_MAX);
+ new_win = tp->rcv_wnd;
- if (after64(ack_seq, READ_ONCE(msk->rcv_wnd_sent)))
- WRITE_ONCE(msk->rcv_wnd_sent, ack_seq);
+ /* Make sure we do not exceed the maximum possible
+ * scaled window.
+ */
+ if (unlikely(th->syn))
+ new_win = min(new_win, 65535U) << tp->rx_opt.rcv_wscale;
+ if (!tp->rx_opt.rcv_wscale &&
+ sock_net(ssk)->ipv4.sysctl_tcp_workaround_signed_windows)
+ new_win = min(new_win, MAX_TCP_WINDOW);
+ else
+ new_win = min(new_win, (65535U << tp->rx_opt.rcv_wscale));
+
+ /* RFC1323 scaling applied */
+ new_win >>= tp->rx_opt.rcv_wscale;
+ th->window = htons(new_win);
+ MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_RCVWNDSHARED);
+ }
}
__sum16 __mptcp_make_csum(u64 data_seq, u32 subflow_seq, u16 data_len, __wsum sum)
@@ -1275,7 +1317,7 @@ static void put_len_csum(u16 len, __sum16 csum, void *data)
put_unaligned(csum, sumptr);
}
-void mptcp_write_options(__be32 *ptr, const struct tcp_sock *tp,
+void mptcp_write_options(struct tcphdr *th, __be32 *ptr, struct tcp_sock *tp,
struct mptcp_out_options *opts)
{
const struct sock *ssk = (const struct sock *)tp;
@@ -1350,8 +1392,11 @@ void mptcp_write_options(__be32 *ptr, const struct tcp_sock *tp,
put_unaligned_be32(mpext->subflow_seq, ptr);
ptr += 1;
if (opts->csum_reqd) {
+ /* data_len == 0 is reserved for the infinite mapping,
+ * the checksum will also be set to 0.
+ */
put_len_csum(mpext->data_len,
- mptcp_make_csum(mpext),
+ (mpext->data_len ? mptcp_make_csum(mpext) : 0),
ptr);
} else {
put_unaligned_be32(mpext->data_len << 16 |
@@ -1562,7 +1607,7 @@ mp_capable_done:
}
if (tp)
- mptcp_set_rwin(tp);
+ mptcp_set_rwin(tp, th);
}
__be32 mptcp_get_reset_option(const struct sk_buff *skb)
diff --git a/net/mptcp/pm.c b/net/mptcp/pm.c
index aa51b100e033..59a85220edc9 100644
--- a/net/mptcp/pm.c
+++ b/net/mptcp/pm.c
@@ -87,6 +87,9 @@ bool mptcp_pm_allow_new_subflow(struct mptcp_sock *msk)
unsigned int subflows_max;
int ret = 0;
+ if (mptcp_pm_is_userspace(msk))
+ return mptcp_userspace_pm_active(msk);
+
subflows_max = mptcp_pm_get_subflows_max(msk);
pr_debug("msk=%p subflows=%d max=%d allow=%d", msk, pm->subflows,
@@ -178,7 +181,8 @@ void mptcp_pm_subflow_check_next(struct mptcp_sock *msk, const struct sock *ssk,
struct mptcp_pm_data *pm = &msk->pm;
bool update_subflows;
- update_subflows = subflow->request_join || subflow->mp_join;
+ update_subflows = (subflow->request_join || subflow->mp_join) &&
+ mptcp_pm_is_kernel(msk);
if (!READ_ONCE(pm->work_pending) && !update_subflows)
return;
@@ -195,19 +199,28 @@ void mptcp_pm_subflow_check_next(struct mptcp_sock *msk, const struct sock *ssk,
spin_unlock_bh(&pm->lock);
}
-void mptcp_pm_add_addr_received(struct mptcp_sock *msk,
+void mptcp_pm_add_addr_received(const struct sock *ssk,
const struct mptcp_addr_info *addr)
{
+ struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk);
+ struct mptcp_sock *msk = mptcp_sk(subflow->conn);
struct mptcp_pm_data *pm = &msk->pm;
pr_debug("msk=%p remote_id=%d accept=%d", msk, addr->id,
READ_ONCE(pm->accept_addr));
- mptcp_event_addr_announced(msk, addr);
+ mptcp_event_addr_announced(ssk, addr);
spin_lock_bh(&pm->lock);
- if (!READ_ONCE(pm->accept_addr)) {
+ if (mptcp_pm_is_userspace(msk)) {
+ if (mptcp_userspace_pm_active(msk)) {
+ mptcp_pm_announce_addr(msk, addr, true);
+ mptcp_pm_add_addr_send_ack(msk);
+ } else {
+ __MPTCP_INC_STATS(sock_net((struct sock *)msk), MPTCP_MIB_ADDADDRDROP);
+ }
+ } else if (!READ_ONCE(pm->accept_addr)) {
mptcp_pm_announce_addr(msk, addr, true);
mptcp_pm_add_addr_send_ack(msk);
} else if (mptcp_pm_schedule_work(msk, MPTCP_PM_ADD_ADDR_RECEIVED)) {
@@ -261,19 +274,49 @@ void mptcp_pm_rm_addr_received(struct mptcp_sock *msk,
spin_unlock_bh(&pm->lock);
}
-void mptcp_pm_mp_prio_received(struct sock *sk, u8 bkup)
+void mptcp_pm_mp_prio_received(struct sock *ssk, u8 bkup)
{
- struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
+ struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk);
+ struct sock *sk = subflow->conn;
+ struct mptcp_sock *msk;
pr_debug("subflow->backup=%d, bkup=%d\n", subflow->backup, bkup);
- subflow->backup = bkup;
+ msk = mptcp_sk(sk);
+ if (subflow->backup != bkup) {
+ subflow->backup = bkup;
+ mptcp_data_lock(sk);
+ if (!sock_owned_by_user(sk))
+ msk->last_snd = NULL;
+ else
+ __set_bit(MPTCP_RESET_SCHEDULER, &msk->cb_flags);
+ mptcp_data_unlock(sk);
+ }
- mptcp_event(MPTCP_EVENT_SUB_PRIORITY, mptcp_sk(subflow->conn), sk, GFP_ATOMIC);
+ mptcp_event(MPTCP_EVENT_SUB_PRIORITY, msk, ssk, GFP_ATOMIC);
}
void mptcp_pm_mp_fail_received(struct sock *sk, u64 fail_seq)
{
+ struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
+ struct mptcp_sock *msk = mptcp_sk(subflow->conn);
+ struct sock *s = (struct sock *)msk;
+
pr_debug("fail_seq=%llu", fail_seq);
+
+ if (!READ_ONCE(msk->allow_infinite_fallback))
+ return;
+
+ if (!READ_ONCE(subflow->mp_fail_response_expect)) {
+ pr_debug("send MP_FAIL response and infinite map");
+
+ subflow->send_mp_fail = 1;
+ MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_MPFAILTX);
+ subflow->send_infinite_map = 1;
+ } else if (!sock_flag(sk, SOCK_DEAD)) {
+ pr_debug("MP_FAIL response received");
+
+ sk_stop_timer(s, &s->sk_timer);
+ }
}
/* path manager helpers */
@@ -381,27 +424,48 @@ void mptcp_pm_subflow_chk_stale(const struct mptcp_sock *msk, struct sock *ssk)
void mptcp_pm_data_reset(struct mptcp_sock *msk)
{
- msk->pm.add_addr_signaled = 0;
- msk->pm.add_addr_accepted = 0;
- msk->pm.local_addr_used = 0;
- msk->pm.subflows = 0;
- msk->pm.rm_list_tx.nr = 0;
- msk->pm.rm_list_rx.nr = 0;
- WRITE_ONCE(msk->pm.work_pending, false);
- WRITE_ONCE(msk->pm.addr_signal, 0);
- WRITE_ONCE(msk->pm.accept_addr, false);
- WRITE_ONCE(msk->pm.accept_subflow, false);
- WRITE_ONCE(msk->pm.remote_deny_join_id0, false);
- msk->pm.status = 0;
- bitmap_fill(msk->pm.id_avail_bitmap, MPTCP_PM_MAX_ADDR_ID + 1);
+ u8 pm_type = mptcp_get_pm_type(sock_net((struct sock *)msk));
+ struct mptcp_pm_data *pm = &msk->pm;
- mptcp_pm_nl_data_init(msk);
+ pm->add_addr_signaled = 0;
+ pm->add_addr_accepted = 0;
+ pm->local_addr_used = 0;
+ pm->subflows = 0;
+ pm->rm_list_tx.nr = 0;
+ pm->rm_list_rx.nr = 0;
+ WRITE_ONCE(pm->pm_type, pm_type);
+
+ if (pm_type == MPTCP_PM_TYPE_KERNEL) {
+ bool subflows_allowed = !!mptcp_pm_get_subflows_max(msk);
+
+ /* pm->work_pending must be only be set to 'true' when
+ * pm->pm_type is set to MPTCP_PM_TYPE_KERNEL
+ */
+ WRITE_ONCE(pm->work_pending,
+ (!!mptcp_pm_get_local_addr_max(msk) &&
+ subflows_allowed) ||
+ !!mptcp_pm_get_add_addr_signal_max(msk));
+ WRITE_ONCE(pm->accept_addr,
+ !!mptcp_pm_get_add_addr_accept_max(msk) &&
+ subflows_allowed);
+ WRITE_ONCE(pm->accept_subflow, subflows_allowed);
+ } else {
+ WRITE_ONCE(pm->work_pending, 0);
+ WRITE_ONCE(pm->accept_addr, 0);
+ WRITE_ONCE(pm->accept_subflow, 0);
+ }
+
+ WRITE_ONCE(pm->addr_signal, 0);
+ WRITE_ONCE(pm->remote_deny_join_id0, false);
+ pm->status = 0;
+ bitmap_fill(msk->pm.id_avail_bitmap, MPTCP_PM_MAX_ADDR_ID + 1);
}
void mptcp_pm_data_init(struct mptcp_sock *msk)
{
spin_lock_init(&msk->pm.lock);
INIT_LIST_HEAD(&msk->pm.anno_list);
+ INIT_LIST_HEAD(&msk->pm.userspace_pm_local_addr_list);
mptcp_pm_data_reset(msk);
}
diff --git a/net/mptcp/pm_netlink.c b/net/mptcp/pm_netlink.c
index b5e8de6f7507..e099f2a12504 100644
--- a/net/mptcp/pm_netlink.c
+++ b/net/mptcp/pm_netlink.c
@@ -22,14 +22,6 @@ static struct genl_family mptcp_genl_family;
static int pm_nl_pernet_id;
-struct mptcp_pm_addr_entry {
- struct list_head list;
- struct mptcp_addr_info addr;
- u8 flags;
- int ifindex;
- struct socket *lsk;
-};
-
struct mptcp_pm_add_entry {
struct list_head list;
struct mptcp_addr_info addr;
@@ -55,8 +47,19 @@ struct pm_nl_pernet {
#define MPTCP_PM_ADDR_MAX 8
#define ADD_ADDR_RETRANS_MAX 3
-static bool addresses_equal(const struct mptcp_addr_info *a,
- const struct mptcp_addr_info *b, bool use_port)
+static struct pm_nl_pernet *pm_nl_get_pernet(const struct net *net)
+{
+ return net_generic(net, pm_nl_pernet_id);
+}
+
+static struct pm_nl_pernet *
+pm_nl_get_pernet_from_msk(const struct mptcp_sock *msk)
+{
+ return pm_nl_get_pernet(sock_net((struct sock *)msk));
+}
+
+bool mptcp_addresses_equal(const struct mptcp_addr_info *a,
+ const struct mptcp_addr_info *b, bool use_port)
{
bool addr_equals = false;
@@ -120,7 +123,7 @@ static bool lookup_subflow_by_saddr(const struct list_head *list,
skc = (struct sock_common *)mptcp_subflow_tcp_sock(subflow);
local_address(skc, &cur);
- if (addresses_equal(&cur, saddr, saddr->port))
+ if (mptcp_addresses_equal(&cur, saddr, saddr->port))
return true;
}
@@ -138,7 +141,7 @@ static bool lookup_subflow_by_daddr(const struct list_head *list,
skc = (struct sock_common *)mptcp_subflow_tcp_sock(subflow);
remote_address(skc, &cur);
- if (addresses_equal(&cur, daddr, daddr->port))
+ if (mptcp_addresses_equal(&cur, daddr, daddr->port))
return true;
}
@@ -206,43 +209,39 @@ select_signal_address(struct pm_nl_pernet *pernet, const struct mptcp_sock *msk)
unsigned int mptcp_pm_get_add_addr_signal_max(const struct mptcp_sock *msk)
{
- const struct pm_nl_pernet *pernet;
+ const struct pm_nl_pernet *pernet = pm_nl_get_pernet_from_msk(msk);
- pernet = net_generic(sock_net((const struct sock *)msk), pm_nl_pernet_id);
return READ_ONCE(pernet->add_addr_signal_max);
}
EXPORT_SYMBOL_GPL(mptcp_pm_get_add_addr_signal_max);
unsigned int mptcp_pm_get_add_addr_accept_max(const struct mptcp_sock *msk)
{
- struct pm_nl_pernet *pernet;
+ struct pm_nl_pernet *pernet = pm_nl_get_pernet_from_msk(msk);
- pernet = net_generic(sock_net((struct sock *)msk), pm_nl_pernet_id);
return READ_ONCE(pernet->add_addr_accept_max);
}
EXPORT_SYMBOL_GPL(mptcp_pm_get_add_addr_accept_max);
unsigned int mptcp_pm_get_subflows_max(const struct mptcp_sock *msk)
{
- struct pm_nl_pernet *pernet;
+ struct pm_nl_pernet *pernet = pm_nl_get_pernet_from_msk(msk);
- pernet = net_generic(sock_net((struct sock *)msk), pm_nl_pernet_id);
return READ_ONCE(pernet->subflows_max);
}
EXPORT_SYMBOL_GPL(mptcp_pm_get_subflows_max);
unsigned int mptcp_pm_get_local_addr_max(const struct mptcp_sock *msk)
{
- struct pm_nl_pernet *pernet;
+ struct pm_nl_pernet *pernet = pm_nl_get_pernet_from_msk(msk);
- pernet = net_generic(sock_net((struct sock *)msk), pm_nl_pernet_id);
return READ_ONCE(pernet->local_addr_max);
}
EXPORT_SYMBOL_GPL(mptcp_pm_get_local_addr_max);
bool mptcp_pm_nl_check_work_pending(struct mptcp_sock *msk)
{
- struct pm_nl_pernet *pernet = net_generic(sock_net((struct sock *)msk), pm_nl_pernet_id);
+ struct pm_nl_pernet *pernet = pm_nl_get_pernet_from_msk(msk);
if (msk->pm.subflows == mptcp_pm_get_subflows_max(msk) ||
(find_next_and_bit(pernet->id_bitmap, msk->pm.id_avail_bitmap,
@@ -262,7 +261,7 @@ mptcp_lookup_anno_list_by_saddr(const struct mptcp_sock *msk,
lockdep_assert_held(&msk->pm.lock);
list_for_each_entry(entry, &msk->pm.anno_list, list) {
- if (addresses_equal(&entry->addr, addr, true))
+ if (mptcp_addresses_equal(&entry->addr, addr, true))
return entry;
}
@@ -279,7 +278,7 @@ bool mptcp_pm_sport_in_anno_list(struct mptcp_sock *msk, const struct sock *sk)
spin_lock_bh(&msk->pm.lock);
list_for_each_entry(entry, &msk->pm.anno_list, list) {
- if (addresses_equal(&entry->addr, &saddr, true)) {
+ if (mptcp_addresses_equal(&entry->addr, &saddr, true)) {
ret = true;
goto out;
}
@@ -353,8 +352,8 @@ mptcp_pm_del_add_timer(struct mptcp_sock *msk,
return entry;
}
-static bool mptcp_pm_alloc_anno_list(struct mptcp_sock *msk,
- const struct mptcp_pm_addr_entry *entry)
+bool mptcp_pm_alloc_anno_list(struct mptcp_sock *msk,
+ const struct mptcp_pm_addr_entry *entry)
{
struct mptcp_pm_add_entry *add_entry = NULL;
struct sock *sk = (struct sock *)msk;
@@ -362,8 +361,16 @@ static bool mptcp_pm_alloc_anno_list(struct mptcp_sock *msk,
lockdep_assert_held(&msk->pm.lock);
- if (mptcp_lookup_anno_list_by_saddr(msk, &entry->addr))
- return false;
+ add_entry = mptcp_lookup_anno_list_by_saddr(msk, &entry->addr);
+
+ if (add_entry) {
+ if (mptcp_pm_is_kernel(msk))
+ return false;
+
+ sk_reset_timer(sk, &add_entry->add_timer,
+ jiffies + mptcp_get_add_addr_timeout(net));
+ return true;
+ }
add_entry = kmalloc(sizeof(*add_entry), GFP_ATOMIC);
if (!add_entry)
@@ -406,7 +413,7 @@ static bool lookup_address_in_vec(const struct mptcp_addr_info *addrs, unsigned
int i;
for (i = 0; i < nr; i++) {
- if (addresses_equal(&addrs[i], addr, addr->port))
+ if (mptcp_addresses_equal(&addrs[i], addr, addr->port))
return true;
}
@@ -442,7 +449,7 @@ static unsigned int fill_remote_addresses_vec(struct mptcp_sock *msk, bool fullm
mptcp_for_each_subflow(msk, subflow) {
ssk = mptcp_subflow_tcp_sock(subflow);
remote_address((struct sock_common *)ssk, &addrs[i]);
- if (deny_id0 && addresses_equal(&addrs[i], &remote, false))
+ if (deny_id0 && mptcp_addresses_equal(&addrs[i], &remote, false))
continue;
if (!lookup_address_in_vec(addrs, i, &addrs[i]) &&
@@ -475,7 +482,7 @@ __lookup_addr(struct pm_nl_pernet *pernet, const struct mptcp_addr_info *info,
struct mptcp_pm_addr_entry *entry;
list_for_each_entry(entry, &pernet->local_addr_list, list) {
- if ((!lookup_by_id && addresses_equal(&entry->addr, info, true)) ||
+ if ((!lookup_by_id && mptcp_addresses_equal(&entry->addr, info, true)) ||
(lookup_by_id && entry->addr.id == info->id))
return entry;
}
@@ -490,7 +497,7 @@ lookup_id_by_addr(const struct pm_nl_pernet *pernet, const struct mptcp_addr_inf
rcu_read_lock();
list_for_each_entry(entry, &pernet->local_addr_list, list) {
- if (addresses_equal(&entry->addr, addr, entry->addr.port)) {
+ if (mptcp_addresses_equal(&entry->addr, addr, entry->addr.port)) {
ret = entry->addr.id;
break;
}
@@ -508,7 +515,7 @@ static void mptcp_pm_create_subflow_or_signal_addr(struct mptcp_sock *msk)
struct pm_nl_pernet *pernet;
unsigned int subflows_max;
- pernet = net_generic(sock_net(sk), pm_nl_pernet_id);
+ pernet = pm_nl_get_pernet(sock_net(sk));
add_addr_signal_max = mptcp_pm_get_add_addr_signal_max(msk);
local_addr_max = mptcp_pm_get_local_addr_max(msk);
@@ -604,7 +611,7 @@ static unsigned int fill_local_addresses_vec(struct mptcp_sock *msk,
unsigned int subflows_max;
int i = 0;
- pernet = net_generic(sock_net(sk), pm_nl_pernet_id);
+ pernet = pm_nl_get_pernet_from_msk(msk);
subflows_max = mptcp_pm_get_subflows_max(msk);
rcu_read_lock();
@@ -724,9 +731,11 @@ static int mptcp_pm_nl_mp_prio_send_ack(struct mptcp_sock *msk,
struct mptcp_addr_info local;
local_address((struct sock_common *)ssk, &local);
- if (!addresses_equal(&local, addr, addr->port))
+ if (!mptcp_addresses_equal(&local, addr, addr->port))
continue;
+ if (subflow->backup != bkup)
+ msk->last_snd = NULL;
subflow->backup = bkup;
subflow->send_mp_prio = 1;
subflow->request_bkup = bkup;
@@ -796,6 +805,9 @@ static void mptcp_pm_nl_rm_addr_or_subflow(struct mptcp_sock *msk,
if (!removed)
continue;
+ if (!mptcp_pm_is_kernel(msk))
+ continue;
+
if (rm_type == MPTCP_MIB_RMADDR) {
msk->pm.add_addr_accepted--;
WRITE_ONCE(msk->pm.accept_addr, true);
@@ -889,9 +901,9 @@ static int mptcp_pm_nl_append_new_local_addr(struct pm_nl_pernet *pernet,
* singled addresses
*/
list_for_each_entry(cur, &pernet->local_addr_list, list) {
- if (addresses_equal(&cur->addr, &entry->addr,
- address_use_port(entry) &&
- address_use_port(cur))) {
+ if (mptcp_addresses_equal(&cur->addr, &entry->addr,
+ address_use_port(entry) &&
+ address_use_port(cur))) {
/* allow replacing the exiting endpoint only if such
* endpoint is an implicit one and the user-space
* did not provide an endpoint id
@@ -1018,14 +1030,17 @@ int mptcp_pm_nl_get_local_id(struct mptcp_sock *msk, struct sock_common *skc)
*/
local_address((struct sock_common *)msk, &msk_local);
local_address((struct sock_common *)skc, &skc_local);
- if (addresses_equal(&msk_local, &skc_local, false))
+ if (mptcp_addresses_equal(&msk_local, &skc_local, false))
return 0;
- pernet = net_generic(sock_net((struct sock *)msk), pm_nl_pernet_id);
+ if (mptcp_pm_is_userspace(msk))
+ return mptcp_userspace_pm_get_local_id(msk, &skc_local);
+
+ pernet = pm_nl_get_pernet_from_msk(msk);
rcu_read_lock();
list_for_each_entry_rcu(entry, &pernet->local_addr_list, list) {
- if (addresses_equal(&entry->addr, &skc_local, entry->addr.port)) {
+ if (mptcp_addresses_equal(&entry->addr, &skc_local, entry->addr.port)) {
ret = entry->addr.id;
break;
}
@@ -1052,18 +1067,6 @@ int mptcp_pm_nl_get_local_id(struct mptcp_sock *msk, struct sock_common *skc)
return ret;
}
-void mptcp_pm_nl_data_init(struct mptcp_sock *msk)
-{
- struct mptcp_pm_data *pm = &msk->pm;
- bool subflows;
-
- subflows = !!mptcp_pm_get_subflows_max(msk);
- WRITE_ONCE(pm->work_pending, (!!mptcp_pm_get_local_addr_max(msk) && subflows) ||
- !!mptcp_pm_get_add_addr_signal_max(msk));
- WRITE_ONCE(pm->accept_addr, !!mptcp_pm_get_add_addr_accept_max(msk) && subflows);
- WRITE_ONCE(pm->accept_subflow, subflows);
-}
-
#define MPTCP_PM_CMD_GRP_OFFSET 0
#define MPTCP_PM_EV_GRP_OFFSET 1
@@ -1091,6 +1094,10 @@ static const struct nla_policy mptcp_pm_policy[MPTCP_PM_ATTR_MAX + 1] = {
NLA_POLICY_NESTED(mptcp_pm_addr_policy),
[MPTCP_PM_ATTR_RCV_ADD_ADDRS] = { .type = NLA_U32, },
[MPTCP_PM_ATTR_SUBFLOWS] = { .type = NLA_U32, },
+ [MPTCP_PM_ATTR_TOKEN] = { .type = NLA_U32, },
+ [MPTCP_PM_ATTR_LOC_ID] = { .type = NLA_U8, },
+ [MPTCP_PM_ATTR_ADDR_REMOTE] =
+ NLA_POLICY_NESTED(mptcp_pm_addr_policy),
};
void mptcp_pm_nl_subflow_chk_stale(const struct mptcp_sock *msk, struct sock *ssk)
@@ -1139,11 +1146,12 @@ static int mptcp_pm_family_to_addr(int family)
return MPTCP_PM_ADDR_ATTR_ADDR4;
}
-static int mptcp_pm_parse_addr(struct nlattr *attr, struct genl_info *info,
- bool require_family,
- struct mptcp_pm_addr_entry *entry)
+static int mptcp_pm_parse_pm_addr_attr(struct nlattr *tb[],
+ const struct nlattr *attr,
+ struct genl_info *info,
+ struct mptcp_addr_info *addr,
+ bool require_family)
{
- struct nlattr *tb[MPTCP_PM_ADDR_ATTR_MAX + 1];
int err, addr_addr;
if (!attr) {
@@ -1157,27 +1165,29 @@ static int mptcp_pm_parse_addr(struct nlattr *attr, struct genl_info *info,
if (err)
return err;
- memset(entry, 0, sizeof(*entry));
+ if (tb[MPTCP_PM_ADDR_ATTR_ID])
+ addr->id = nla_get_u8(tb[MPTCP_PM_ADDR_ATTR_ID]);
+
if (!tb[MPTCP_PM_ADDR_ATTR_FAMILY]) {
if (!require_family)
- goto skip_family;
+ return err;
NL_SET_ERR_MSG_ATTR(info->extack, attr,
"missing family");
return -EINVAL;
}
- entry->addr.family = nla_get_u16(tb[MPTCP_PM_ADDR_ATTR_FAMILY]);
- if (entry->addr.family != AF_INET
+ addr->family = nla_get_u16(tb[MPTCP_PM_ADDR_ATTR_FAMILY]);
+ if (addr->family != AF_INET
#if IS_ENABLED(CONFIG_MPTCP_IPV6)
- && entry->addr.family != AF_INET6
+ && addr->family != AF_INET6
#endif
) {
NL_SET_ERR_MSG_ATTR(info->extack, attr,
"unknown address family");
return -EINVAL;
}
- addr_addr = mptcp_pm_family_to_addr(entry->addr.family);
+ addr_addr = mptcp_pm_family_to_addr(addr->family);
if (!tb[addr_addr]) {
NL_SET_ERR_MSG_ATTR(info->extack, attr,
"missing address data");
@@ -1185,22 +1195,47 @@ static int mptcp_pm_parse_addr(struct nlattr *attr, struct genl_info *info,
}
#if IS_ENABLED(CONFIG_MPTCP_IPV6)
- if (entry->addr.family == AF_INET6)
- entry->addr.addr6 = nla_get_in6_addr(tb[addr_addr]);
+ if (addr->family == AF_INET6)
+ addr->addr6 = nla_get_in6_addr(tb[addr_addr]);
else
#endif
- entry->addr.addr.s_addr = nla_get_in_addr(tb[addr_addr]);
+ addr->addr.s_addr = nla_get_in_addr(tb[addr_addr]);
+
+ if (tb[MPTCP_PM_ADDR_ATTR_PORT])
+ addr->port = htons(nla_get_u16(tb[MPTCP_PM_ADDR_ATTR_PORT]));
+
+ return err;
+}
+
+int mptcp_pm_parse_addr(struct nlattr *attr, struct genl_info *info,
+ struct mptcp_addr_info *addr)
+{
+ struct nlattr *tb[MPTCP_PM_ADDR_ATTR_MAX + 1];
+
+ memset(addr, 0, sizeof(*addr));
+
+ return mptcp_pm_parse_pm_addr_attr(tb, attr, info, addr, true);
+}
+
+int mptcp_pm_parse_entry(struct nlattr *attr, struct genl_info *info,
+ bool require_family,
+ struct mptcp_pm_addr_entry *entry)
+{
+ struct nlattr *tb[MPTCP_PM_ADDR_ATTR_MAX + 1];
+ int err;
+
+ memset(entry, 0, sizeof(*entry));
+
+ err = mptcp_pm_parse_pm_addr_attr(tb, attr, info, &entry->addr, require_family);
+ if (err)
+ return err;
-skip_family:
if (tb[MPTCP_PM_ADDR_ATTR_IF_IDX]) {
u32 val = nla_get_s32(tb[MPTCP_PM_ADDR_ATTR_IF_IDX]);
entry->ifindex = val;
}
- if (tb[MPTCP_PM_ADDR_ATTR_ID])
- entry->addr.id = nla_get_u8(tb[MPTCP_PM_ADDR_ATTR_ID]);
-
if (tb[MPTCP_PM_ADDR_ATTR_FLAGS])
entry->flags = nla_get_u32(tb[MPTCP_PM_ADDR_ATTR_FLAGS]);
@@ -1212,7 +1247,7 @@ skip_family:
static struct pm_nl_pernet *genl_info_pm_nl(struct genl_info *info)
{
- return net_generic(genl_info_net(info), pm_nl_pernet_id);
+ return pm_nl_get_pernet(genl_info_net(info));
}
static int mptcp_nl_add_subflow_or_signal_addr(struct net *net)
@@ -1223,7 +1258,8 @@ static int mptcp_nl_add_subflow_or_signal_addr(struct net *net)
while ((msk = mptcp_token_iter_next(net, &s_slot, &s_num)) != NULL) {
struct sock *sk = (struct sock *)msk;
- if (!READ_ONCE(msk->fully_established))
+ if (!READ_ONCE(msk->fully_established) ||
+ mptcp_pm_is_userspace(msk))
goto next;
lock_sock(sk);
@@ -1247,7 +1283,7 @@ static int mptcp_nl_cmd_add_addr(struct sk_buff *skb, struct genl_info *info)
struct mptcp_pm_addr_entry addr, *entry;
int ret;
- ret = mptcp_pm_parse_addr(attr, info, true, &addr);
+ ret = mptcp_pm_parse_entry(attr, info, true, &addr);
if (ret < 0)
return ret;
@@ -1296,17 +1332,25 @@ static int mptcp_nl_cmd_add_addr(struct sk_buff *skb, struct genl_info *info)
return 0;
}
-int mptcp_pm_get_flags_and_ifindex_by_id(struct net *net, unsigned int id,
+int mptcp_pm_get_flags_and_ifindex_by_id(struct mptcp_sock *msk, unsigned int id,
u8 *flags, int *ifindex)
{
struct mptcp_pm_addr_entry *entry;
+ struct sock *sk = (struct sock *)msk;
+ struct net *net = sock_net(sk);
*flags = 0;
*ifindex = 0;
if (id) {
+ if (mptcp_pm_is_userspace(msk))
+ return mptcp_userspace_pm_get_flags_and_ifindex_by_id(msk,
+ id,
+ flags,
+ ifindex);
+
rcu_read_lock();
- entry = __lookup_addr_by_id(net_generic(net, pm_nl_pernet_id), id);
+ entry = __lookup_addr_by_id(pm_nl_get_pernet(net), id);
if (entry) {
*flags = entry->flags;
*ifindex = entry->ifindex;
@@ -1366,6 +1410,9 @@ static int mptcp_nl_remove_subflow_and_signal_addr(struct net *net,
struct sock *sk = (struct sock *)msk;
bool remove_subflow;
+ if (mptcp_pm_is_userspace(msk))
+ goto next;
+
if (list_empty(&msk->conn_list)) {
mptcp_pm_remove_anno_addr(msk, addr, false);
goto next;
@@ -1400,11 +1447,11 @@ static int mptcp_nl_remove_id_zero_address(struct net *net,
struct sock *sk = (struct sock *)msk;
struct mptcp_addr_info msk_local;
- if (list_empty(&msk->conn_list))
+ if (list_empty(&msk->conn_list) || mptcp_pm_is_userspace(msk))
goto next;
local_address((struct sock_common *)msk, &msk_local);
- if (!addresses_equal(&msk_local, addr, addr->port))
+ if (!mptcp_addresses_equal(&msk_local, addr, addr->port))
goto next;
lock_sock(sk);
@@ -1430,7 +1477,7 @@ static int mptcp_nl_cmd_del_addr(struct sk_buff *skb, struct genl_info *info)
unsigned int addr_max;
int ret;
- ret = mptcp_pm_parse_addr(attr, info, false, &addr);
+ ret = mptcp_pm_parse_entry(attr, info, false, &addr);
if (ret < 0)
return ret;
@@ -1470,8 +1517,8 @@ static int mptcp_nl_cmd_del_addr(struct sk_buff *skb, struct genl_info *info)
return ret;
}
-static void mptcp_pm_remove_addrs_and_subflows(struct mptcp_sock *msk,
- struct list_head *rm_list)
+void mptcp_pm_remove_addrs_and_subflows(struct mptcp_sock *msk,
+ struct list_head *rm_list)
{
struct mptcp_rm_list alist = { .nr = 0 }, slist = { .nr = 0 };
struct mptcp_pm_addr_entry *entry;
@@ -1507,9 +1554,11 @@ static void mptcp_nl_remove_addrs_list(struct net *net,
while ((msk = mptcp_token_iter_next(net, &s_slot, &s_num)) != NULL) {
struct sock *sk = (struct sock *)msk;
- lock_sock(sk);
- mptcp_pm_remove_addrs_and_subflows(msk, rm_list);
- release_sock(sk);
+ if (!mptcp_pm_is_userspace(msk)) {
+ lock_sock(sk);
+ mptcp_pm_remove_addrs_and_subflows(msk, rm_list);
+ release_sock(sk);
+ }
sock_put(sk);
cond_resched();
@@ -1602,7 +1651,7 @@ static int mptcp_nl_cmd_get_addr(struct sk_buff *skb, struct genl_info *info)
void *reply;
int ret;
- ret = mptcp_pm_parse_addr(attr, info, false, &addr);
+ ret = mptcp_pm_parse_entry(attr, info, false, &addr);
if (ret < 0)
return ret;
@@ -1653,7 +1702,7 @@ static int mptcp_nl_cmd_dump_addrs(struct sk_buff *msg,
void *hdr;
int i;
- pernet = net_generic(net, pm_nl_pernet_id);
+ pernet = pm_nl_get_pernet(net);
spin_lock_bh(&pernet->lock);
for (i = id; i < MPTCP_PM_MAX_ADDR_ID + 1; i++) {
@@ -1782,7 +1831,7 @@ static int mptcp_nl_set_flags(struct net *net,
while ((msk = mptcp_token_iter_next(net, &s_slot, &s_num)) != NULL) {
struct sock *sk = (struct sock *)msk;
- if (list_empty(&msk->conn_list))
+ if (list_empty(&msk->conn_list) || mptcp_pm_is_userspace(msk))
goto next;
lock_sock(sk);
@@ -1813,7 +1862,7 @@ static int mptcp_nl_cmd_set_flags(struct sk_buff *skb, struct genl_info *info)
u8 bkup = 0, lookup_by_id = 0;
int ret;
- ret = mptcp_pm_parse_addr(attr, info, false, &addr);
+ ret = mptcp_pm_parse_entry(attr, info, false, &addr);
if (ret < 0)
return ret;
@@ -1852,6 +1901,13 @@ static void mptcp_nl_mcast_send(struct net *net, struct sk_buff *nlskb, gfp_t gf
nlskb, 0, MPTCP_PM_EV_GRP_OFFSET, gfp);
}
+bool mptcp_userspace_pm_active(const struct mptcp_sock *msk)
+{
+ return genl_has_listeners(&mptcp_genl_family,
+ sock_net((const struct sock *)msk),
+ MPTCP_PM_EV_GRP_OFFSET);
+}
+
static int mptcp_event_add_subflow(struct sk_buff *skb, const struct sock *ssk)
{
const struct inet_sock *issk = inet_sk(ssk);
@@ -1972,6 +2028,9 @@ static int mptcp_event_created(struct sk_buff *skb,
if (err)
return err;
+ if (nla_put_u8(skb, MPTCP_ATTR_SERVER_SIDE, READ_ONCE(msk->pm.server_side)))
+ return -EMSGSIZE;
+
return mptcp_event_add_subflow(skb, ssk);
}
@@ -2006,10 +2065,12 @@ nla_put_failure:
kfree_skb(skb);
}
-void mptcp_event_addr_announced(const struct mptcp_sock *msk,
+void mptcp_event_addr_announced(const struct sock *ssk,
const struct mptcp_addr_info *info)
{
- struct net *net = sock_net((const struct sock *)msk);
+ struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk);
+ struct mptcp_sock *msk = mptcp_sk(subflow->conn);
+ struct net *net = sock_net(ssk);
struct nlmsghdr *nlh;
struct sk_buff *skb;
@@ -2031,7 +2092,10 @@ void mptcp_event_addr_announced(const struct mptcp_sock *msk,
if (nla_put_u8(skb, MPTCP_ATTR_REM_ID, info->id))
goto nla_put_failure;
- if (nla_put_be16(skb, MPTCP_ATTR_DPORT, info->port))
+ if (nla_put_be16(skb, MPTCP_ATTR_DPORT,
+ info->port == 0 ?
+ inet_sk(ssk)->inet_dport :
+ info->port))
goto nla_put_failure;
switch (info->family) {
@@ -2148,6 +2212,26 @@ static const struct genl_small_ops mptcp_pm_ops[] = {
.doit = mptcp_nl_cmd_set_flags,
.flags = GENL_ADMIN_PERM,
},
+ {
+ .cmd = MPTCP_PM_CMD_ANNOUNCE,
+ .doit = mptcp_nl_cmd_announce,
+ .flags = GENL_ADMIN_PERM,
+ },
+ {
+ .cmd = MPTCP_PM_CMD_REMOVE,
+ .doit = mptcp_nl_cmd_remove,
+ .flags = GENL_ADMIN_PERM,
+ },
+ {
+ .cmd = MPTCP_PM_CMD_SUBFLOW_CREATE,
+ .doit = mptcp_nl_cmd_sf_create,
+ .flags = GENL_ADMIN_PERM,
+ },
+ {
+ .cmd = MPTCP_PM_CMD_SUBFLOW_DESTROY,
+ .doit = mptcp_nl_cmd_sf_destroy,
+ .flags = GENL_ADMIN_PERM,
+ },
};
static struct genl_family mptcp_genl_family __ro_after_init = {
@@ -2165,7 +2249,7 @@ static struct genl_family mptcp_genl_family __ro_after_init = {
static int __net_init pm_nl_init_net(struct net *net)
{
- struct pm_nl_pernet *pernet = net_generic(net, pm_nl_pernet_id);
+ struct pm_nl_pernet *pernet = pm_nl_get_pernet(net);
INIT_LIST_HEAD_RCU(&pernet->local_addr_list);
@@ -2187,7 +2271,7 @@ static void __net_exit pm_nl_exit_net(struct list_head *net_list)
struct net *net;
list_for_each_entry(net, net_list, exit_list) {
- struct pm_nl_pernet *pernet = net_generic(net, pm_nl_pernet_id);
+ struct pm_nl_pernet *pernet = pm_nl_get_pernet(net);
/* net is removed from namespace list, can't race with
* other modifiers, also netns core already waited for a
diff --git a/net/mptcp/pm_userspace.c b/net/mptcp/pm_userspace.c
new file mode 100644
index 000000000000..f56378e4f597
--- /dev/null
+++ b/net/mptcp/pm_userspace.c
@@ -0,0 +1,429 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Multipath TCP
+ *
+ * Copyright (c) 2022, Intel Corporation.
+ */
+
+#include "protocol.h"
+
+void mptcp_free_local_addr_list(struct mptcp_sock *msk)
+{
+ struct mptcp_pm_addr_entry *entry, *tmp;
+ struct sock *sk = (struct sock *)msk;
+ LIST_HEAD(free_list);
+
+ if (!mptcp_pm_is_userspace(msk))
+ return;
+
+ spin_lock_bh(&msk->pm.lock);
+ list_splice_init(&msk->pm.userspace_pm_local_addr_list, &free_list);
+ spin_unlock_bh(&msk->pm.lock);
+
+ list_for_each_entry_safe(entry, tmp, &free_list, list) {
+ sock_kfree_s(sk, entry, sizeof(*entry));
+ }
+}
+
+int mptcp_userspace_pm_append_new_local_addr(struct mptcp_sock *msk,
+ struct mptcp_pm_addr_entry *entry)
+{
+ DECLARE_BITMAP(id_bitmap, MPTCP_PM_MAX_ADDR_ID + 1);
+ struct mptcp_pm_addr_entry *match = NULL;
+ struct sock *sk = (struct sock *)msk;
+ struct mptcp_pm_addr_entry *e;
+ bool addr_match = false;
+ bool id_match = false;
+ int ret = -EINVAL;
+
+ bitmap_zero(id_bitmap, MPTCP_PM_MAX_ADDR_ID + 1);
+
+ spin_lock_bh(&msk->pm.lock);
+ list_for_each_entry(e, &msk->pm.userspace_pm_local_addr_list, list) {
+ addr_match = mptcp_addresses_equal(&e->addr, &entry->addr, true);
+ if (addr_match && entry->addr.id == 0)
+ entry->addr.id = e->addr.id;
+ id_match = (e->addr.id == entry->addr.id);
+ if (addr_match && id_match) {
+ match = e;
+ break;
+ } else if (addr_match || id_match) {
+ break;
+ }
+ __set_bit(e->addr.id, id_bitmap);
+ }
+
+ if (!match && !addr_match && !id_match) {
+ /* Memory for the entry is allocated from the
+ * sock option buffer.
+ */
+ e = sock_kmalloc(sk, sizeof(*e), GFP_ATOMIC);
+ if (!e) {
+ spin_unlock_bh(&msk->pm.lock);
+ return -ENOMEM;
+ }
+
+ *e = *entry;
+ if (!e->addr.id)
+ e->addr.id = find_next_zero_bit(id_bitmap,
+ MPTCP_PM_MAX_ADDR_ID + 1,
+ 1);
+ list_add_tail_rcu(&e->list, &msk->pm.userspace_pm_local_addr_list);
+ ret = e->addr.id;
+ } else if (match) {
+ ret = entry->addr.id;
+ }
+
+ spin_unlock_bh(&msk->pm.lock);
+ return ret;
+}
+
+int mptcp_userspace_pm_get_flags_and_ifindex_by_id(struct mptcp_sock *msk,
+ unsigned int id,
+ u8 *flags, int *ifindex)
+{
+ struct mptcp_pm_addr_entry *entry, *match = NULL;
+
+ *flags = 0;
+ *ifindex = 0;
+
+ spin_lock_bh(&msk->pm.lock);
+ list_for_each_entry(entry, &msk->pm.userspace_pm_local_addr_list, list) {
+ if (id == entry->addr.id) {
+ match = entry;
+ break;
+ }
+ }
+ spin_unlock_bh(&msk->pm.lock);
+ if (match) {
+ *flags = match->flags;
+ *ifindex = match->ifindex;
+ }
+
+ return 0;
+}
+
+int mptcp_userspace_pm_get_local_id(struct mptcp_sock *msk,
+ struct mptcp_addr_info *skc)
+{
+ struct mptcp_pm_addr_entry new_entry;
+ __be16 msk_sport = ((struct inet_sock *)
+ inet_sk((struct sock *)msk))->inet_sport;
+
+ memset(&new_entry, 0, sizeof(struct mptcp_pm_addr_entry));
+ new_entry.addr = *skc;
+ new_entry.addr.id = 0;
+ new_entry.flags = MPTCP_PM_ADDR_FLAG_IMPLICIT;
+
+ if (new_entry.addr.port == msk_sport)
+ new_entry.addr.port = 0;
+
+ return mptcp_userspace_pm_append_new_local_addr(msk, &new_entry);
+}
+
+int mptcp_nl_cmd_announce(struct sk_buff *skb, struct genl_info *info)
+{
+ struct nlattr *token = info->attrs[MPTCP_PM_ATTR_TOKEN];
+ struct nlattr *addr = info->attrs[MPTCP_PM_ATTR_ADDR];
+ struct mptcp_pm_addr_entry addr_val;
+ struct mptcp_sock *msk;
+ int err = -EINVAL;
+ u32 token_val;
+
+ if (!addr || !token) {
+ GENL_SET_ERR_MSG(info, "missing required inputs");
+ return err;
+ }
+
+ token_val = nla_get_u32(token);
+
+ msk = mptcp_token_get_sock(sock_net(skb->sk), token_val);
+ if (!msk) {
+ NL_SET_ERR_MSG_ATTR(info->extack, token, "invalid token");
+ return err;
+ }
+
+ if (!mptcp_pm_is_userspace(msk)) {
+ GENL_SET_ERR_MSG(info, "invalid request; userspace PM not selected");
+ goto announce_err;
+ }
+
+ err = mptcp_pm_parse_entry(addr, info, true, &addr_val);
+ if (err < 0) {
+ GENL_SET_ERR_MSG(info, "error parsing local address");
+ goto announce_err;
+ }
+
+ if (addr_val.addr.id == 0 || !(addr_val.flags & MPTCP_PM_ADDR_FLAG_SIGNAL)) {
+ GENL_SET_ERR_MSG(info, "invalid addr id or flags");
+ goto announce_err;
+ }
+
+ err = mptcp_userspace_pm_append_new_local_addr(msk, &addr_val);
+ if (err < 0) {
+ GENL_SET_ERR_MSG(info, "did not match address and id");
+ goto announce_err;
+ }
+
+ lock_sock((struct sock *)msk);
+ spin_lock_bh(&msk->pm.lock);
+
+ if (mptcp_pm_alloc_anno_list(msk, &addr_val)) {
+ mptcp_pm_announce_addr(msk, &addr_val.addr, false);
+ mptcp_pm_nl_addr_send_ack(msk);
+ }
+
+ spin_unlock_bh(&msk->pm.lock);
+ release_sock((struct sock *)msk);
+
+ err = 0;
+ announce_err:
+ sock_put((struct sock *)msk);
+ return err;
+}
+
+int mptcp_nl_cmd_remove(struct sk_buff *skb, struct genl_info *info)
+{
+ struct nlattr *token = info->attrs[MPTCP_PM_ATTR_TOKEN];
+ struct nlattr *id = info->attrs[MPTCP_PM_ATTR_LOC_ID];
+ struct mptcp_pm_addr_entry *match = NULL;
+ struct mptcp_pm_addr_entry *entry;
+ struct mptcp_sock *msk;
+ LIST_HEAD(free_list);
+ int err = -EINVAL;
+ u32 token_val;
+ u8 id_val;
+
+ if (!id || !token) {
+ GENL_SET_ERR_MSG(info, "missing required inputs");
+ return err;
+ }
+
+ id_val = nla_get_u8(id);
+ token_val = nla_get_u32(token);
+
+ msk = mptcp_token_get_sock(sock_net(skb->sk), token_val);
+ if (!msk) {
+ NL_SET_ERR_MSG_ATTR(info->extack, token, "invalid token");
+ return err;
+ }
+
+ if (!mptcp_pm_is_userspace(msk)) {
+ GENL_SET_ERR_MSG(info, "invalid request; userspace PM not selected");
+ goto remove_err;
+ }
+
+ lock_sock((struct sock *)msk);
+
+ list_for_each_entry(entry, &msk->pm.userspace_pm_local_addr_list, list) {
+ if (entry->addr.id == id_val) {
+ match = entry;
+ break;
+ }
+ }
+
+ if (!match) {
+ GENL_SET_ERR_MSG(info, "address with specified id not found");
+ release_sock((struct sock *)msk);
+ goto remove_err;
+ }
+
+ list_move(&match->list, &free_list);
+
+ mptcp_pm_remove_addrs_and_subflows(msk, &free_list);
+
+ release_sock((struct sock *)msk);
+
+ list_for_each_entry_safe(match, entry, &free_list, list) {
+ sock_kfree_s((struct sock *)msk, match, sizeof(*match));
+ }
+
+ err = 0;
+ remove_err:
+ sock_put((struct sock *)msk);
+ return err;
+}
+
+int mptcp_nl_cmd_sf_create(struct sk_buff *skb, struct genl_info *info)
+{
+ struct nlattr *raddr = info->attrs[MPTCP_PM_ATTR_ADDR_REMOTE];
+ struct nlattr *token = info->attrs[MPTCP_PM_ATTR_TOKEN];
+ struct nlattr *laddr = info->attrs[MPTCP_PM_ATTR_ADDR];
+ struct mptcp_addr_info addr_r;
+ struct mptcp_addr_info addr_l;
+ struct mptcp_sock *msk;
+ int err = -EINVAL;
+ struct sock *sk;
+ u32 token_val;
+
+ if (!laddr || !raddr || !token) {
+ GENL_SET_ERR_MSG(info, "missing required inputs");
+ return err;
+ }
+
+ token_val = nla_get_u32(token);
+
+ msk = mptcp_token_get_sock(genl_info_net(info), token_val);
+ if (!msk) {
+ NL_SET_ERR_MSG_ATTR(info->extack, token, "invalid token");
+ return err;
+ }
+
+ if (!mptcp_pm_is_userspace(msk)) {
+ GENL_SET_ERR_MSG(info, "invalid request; userspace PM not selected");
+ goto create_err;
+ }
+
+ err = mptcp_pm_parse_addr(laddr, info, &addr_l);
+ if (err < 0) {
+ NL_SET_ERR_MSG_ATTR(info->extack, laddr, "error parsing local addr");
+ goto create_err;
+ }
+
+ if (addr_l.id == 0) {
+ NL_SET_ERR_MSG_ATTR(info->extack, laddr, "missing local addr id");
+ goto create_err;
+ }
+
+ err = mptcp_pm_parse_addr(raddr, info, &addr_r);
+ if (err < 0) {
+ NL_SET_ERR_MSG_ATTR(info->extack, raddr, "error parsing remote addr");
+ goto create_err;
+ }
+
+ sk = &msk->sk.icsk_inet.sk;
+ lock_sock(sk);
+
+ err = __mptcp_subflow_connect(sk, &addr_l, &addr_r);
+
+ release_sock(sk);
+
+ create_err:
+ sock_put((struct sock *)msk);
+ return err;
+}
+
+static struct sock *mptcp_nl_find_ssk(struct mptcp_sock *msk,
+ const struct mptcp_addr_info *local,
+ const struct mptcp_addr_info *remote)
+{
+ struct sock *sk = &msk->sk.icsk_inet.sk;
+ struct mptcp_subflow_context *subflow;
+ struct sock *found = NULL;
+
+ if (local->family != remote->family)
+ return NULL;
+
+ lock_sock(sk);
+
+ mptcp_for_each_subflow(msk, subflow) {
+ const struct inet_sock *issk;
+ struct sock *ssk;
+
+ ssk = mptcp_subflow_tcp_sock(subflow);
+
+ if (local->family != ssk->sk_family)
+ continue;
+
+ issk = inet_sk(ssk);
+
+ switch (ssk->sk_family) {
+ case AF_INET:
+ if (issk->inet_saddr != local->addr.s_addr ||
+ issk->inet_daddr != remote->addr.s_addr)
+ continue;
+ break;
+#if IS_ENABLED(CONFIG_MPTCP_IPV6)
+ case AF_INET6: {
+ const struct ipv6_pinfo *pinfo = inet6_sk(ssk);
+
+ if (!ipv6_addr_equal(&local->addr6, &pinfo->saddr) ||
+ !ipv6_addr_equal(&remote->addr6, &ssk->sk_v6_daddr))
+ continue;
+ break;
+ }
+#endif
+ default:
+ continue;
+ }
+
+ if (issk->inet_sport == local->port &&
+ issk->inet_dport == remote->port) {
+ found = ssk;
+ goto found;
+ }
+ }
+
+found:
+ release_sock(sk);
+
+ return found;
+}
+
+int mptcp_nl_cmd_sf_destroy(struct sk_buff *skb, struct genl_info *info)
+{
+ struct nlattr *raddr = info->attrs[MPTCP_PM_ATTR_ADDR_REMOTE];
+ struct nlattr *token = info->attrs[MPTCP_PM_ATTR_TOKEN];
+ struct nlattr *laddr = info->attrs[MPTCP_PM_ATTR_ADDR];
+ struct mptcp_addr_info addr_l;
+ struct mptcp_addr_info addr_r;
+ struct mptcp_sock *msk;
+ struct sock *sk, *ssk;
+ int err = -EINVAL;
+ u32 token_val;
+
+ if (!laddr || !raddr || !token) {
+ GENL_SET_ERR_MSG(info, "missing required inputs");
+ return err;
+ }
+
+ token_val = nla_get_u32(token);
+
+ msk = mptcp_token_get_sock(genl_info_net(info), token_val);
+ if (!msk) {
+ NL_SET_ERR_MSG_ATTR(info->extack, token, "invalid token");
+ return err;
+ }
+
+ if (!mptcp_pm_is_userspace(msk)) {
+ GENL_SET_ERR_MSG(info, "invalid request; userspace PM not selected");
+ goto destroy_err;
+ }
+
+ err = mptcp_pm_parse_addr(laddr, info, &addr_l);
+ if (err < 0) {
+ NL_SET_ERR_MSG_ATTR(info->extack, laddr, "error parsing local addr");
+ goto destroy_err;
+ }
+
+ err = mptcp_pm_parse_addr(raddr, info, &addr_r);
+ if (err < 0) {
+ NL_SET_ERR_MSG_ATTR(info->extack, raddr, "error parsing remote addr");
+ goto destroy_err;
+ }
+
+ if (addr_l.family != addr_r.family) {
+ GENL_SET_ERR_MSG(info, "address families do not match");
+ goto destroy_err;
+ }
+
+ if (!addr_l.port || !addr_r.port) {
+ GENL_SET_ERR_MSG(info, "missing local or remote port");
+ goto destroy_err;
+ }
+
+ sk = &msk->sk.icsk_inet.sk;
+ ssk = mptcp_nl_find_ssk(msk, &addr_l, &addr_r);
+ if (ssk) {
+ struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk);
+
+ mptcp_subflow_shutdown(sk, ssk, RCV_SHUTDOWN | SEND_SHUTDOWN);
+ mptcp_close_ssk(sk, ssk, subflow);
+ err = 0;
+ } else {
+ err = -ESRCH;
+ }
+
+ destroy_err:
+ sock_put((struct sock *)msk);
+ return err;
+}
diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
index 0cbea3b6d0a4..17e13396024a 100644
--- a/net/mptcp/protocol.c
+++ b/net/mptcp/protocol.c
@@ -216,7 +216,7 @@ static void mptcp_data_queue_ofo(struct mptcp_sock *msk, struct sk_buff *skb)
seq = MPTCP_SKB_CB(skb)->map_seq;
end_seq = MPTCP_SKB_CB(skb)->end_seq;
- max_seq = READ_ONCE(msk->rcv_wnd_sent);
+ max_seq = atomic64_read(&msk->rcv_wnd_sent);
pr_debug("msk=%p seq=%llx limit=%llx empty=%d", msk, seq, max_seq,
RB_EMPTY_ROOT(&msk->out_of_order_queue));
@@ -225,7 +225,7 @@ static void mptcp_data_queue_ofo(struct mptcp_sock *msk, struct sk_buff *skb)
mptcp_drop(sk, skb);
pr_debug("oow by %lld, rcv_wnd_sent %llu\n",
(unsigned long long)end_seq - (unsigned long)max_seq,
- (unsigned long long)msk->rcv_wnd_sent);
+ (unsigned long long)atomic64_read(&msk->rcv_wnd_sent));
MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_NODSSWINDOW);
return;
}
@@ -1141,18 +1141,21 @@ struct mptcp_sendmsg_info {
bool data_lock_held;
};
-static int mptcp_check_allowed_size(struct mptcp_sock *msk, u64 data_seq,
- int avail_size)
+static int mptcp_check_allowed_size(const struct mptcp_sock *msk, struct sock *ssk,
+ u64 data_seq, int avail_size)
{
u64 window_end = mptcp_wnd_end(msk);
+ u64 mptcp_snd_wnd;
if (__mptcp_check_fallback(msk))
return avail_size;
- if (!before64(data_seq + avail_size, window_end)) {
- u64 allowed_size = window_end - data_seq;
+ mptcp_snd_wnd = window_end - data_seq;
+ avail_size = min_t(unsigned int, mptcp_snd_wnd, avail_size);
- return min_t(unsigned int, allowed_size, avail_size);
+ if (unlikely(tcp_sk(ssk)->snd_wnd < mptcp_snd_wnd)) {
+ tcp_sk(ssk)->snd_wnd = min_t(u64, U32_MAX, mptcp_snd_wnd);
+ MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_SNDWNDSHARED);
}
return avail_size;
@@ -1229,6 +1232,22 @@ static void mptcp_update_data_checksum(struct sk_buff *skb, int added)
mpext->csum = csum_fold(csum_block_add(csum, skb_checksum(skb, offset, added, 0), offset));
}
+static void mptcp_update_infinite_map(struct mptcp_sock *msk,
+ struct sock *ssk,
+ struct mptcp_ext *mpext)
+{
+ if (!mpext)
+ return;
+
+ mpext->infinite_map = 1;
+ mpext->data_len = 0;
+
+ MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_INFINITEMAPTX);
+ mptcp_subflow_ctx(ssk)->send_infinite_map = 0;
+ pr_fallback(msk);
+ __mptcp_do_fallback(msk);
+}
+
static int mptcp_sendmsg_frag(struct sock *sk, struct sock *ssk,
struct mptcp_data_frag *dfrag,
struct mptcp_sendmsg_info *info)
@@ -1289,7 +1308,7 @@ alloc_skb:
}
/* Zero window and all data acked? Probe. */
- copy = mptcp_check_allowed_size(msk, data_seq, copy);
+ copy = mptcp_check_allowed_size(msk, ssk, data_seq, copy);
if (copy == 0) {
u64 snd_una = READ_ONCE(msk->snd_una);
@@ -1360,6 +1379,8 @@ alloc_skb:
out:
if (READ_ONCE(msk->csum_enabled))
mptcp_update_data_checksum(skb, copy);
+ if (mptcp_subflow_ctx(ssk)->send_infinite_map)
+ mptcp_update_infinite_map(msk, ssk, mpext);
trace_mptcp_sendmsg_frag(mpext);
mptcp_subflow_ctx(ssk)->rel_write_seq += copy;
return copy;
@@ -1480,11 +1501,16 @@ static struct sock *mptcp_subflow_get_send(struct mptcp_sock *msk)
* to check that subflow has a non empty cwin.
*/
ssk = send_info[SSK_MODE_ACTIVE].ssk;
- if (!ssk || !sk_stream_memory_free(ssk) || !tcp_sk(ssk)->snd_wnd)
+ if (!ssk || !sk_stream_memory_free(ssk))
return NULL;
- burst = min_t(int, MPTCP_SEND_BURST_SIZE, tcp_sk(ssk)->snd_wnd);
+ burst = min_t(int, MPTCP_SEND_BURST_SIZE, mptcp_wnd_end(msk) - msk->snd_nxt);
wmem = READ_ONCE(ssk->sk_wmem_queued);
+ if (!burst) {
+ msk->last_snd = NULL;
+ return ssk;
+ }
+
subflow = mptcp_subflow_ctx(ssk);
subflow->avg_pacing_rate = div_u64((u64)subflow->avg_pacing_rate * wmem +
READ_ONCE(ssk->sk_pacing_rate) * burst,
@@ -2012,7 +2038,7 @@ static unsigned int mptcp_inq_hint(const struct sock *sk)
}
static int mptcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
- int nonblock, int flags, int *addr_len)
+ int flags, int *addr_len)
{
struct mptcp_sock *msk = mptcp_sk(sk);
struct scm_timestamping_internal tss;
@@ -2030,7 +2056,7 @@ static int mptcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
goto out_err;
}
- timeo = sock_rcvtimeo(sk, nonblock);
+ timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT);
len = min_t(size_t, len, INT_MAX);
target = sock_rcvlowat(sk, flags & MSG_WAITALL, len);
@@ -2149,6 +2175,21 @@ static void mptcp_retransmit_timer(struct timer_list *t)
sock_put(sk);
}
+static struct mptcp_subflow_context *
+mp_fail_response_expect_subflow(struct mptcp_sock *msk)
+{
+ struct mptcp_subflow_context *subflow, *ret = NULL;
+
+ mptcp_for_each_subflow(msk, subflow) {
+ if (READ_ONCE(subflow->mp_fail_response_expect)) {
+ ret = subflow;
+ break;
+ }
+ }
+
+ return ret;
+}
+
static void mptcp_timeout_timer(struct timer_list *t)
{
struct sock *sk = from_timer(sk, t, sk_timer);
@@ -2465,6 +2506,7 @@ static void __mptcp_retrans(struct sock *sk)
dfrag->already_sent = max(dfrag->already_sent, info.sent);
tcp_push(ssk, 0, info.mss_now, tcp_sk(ssk)->nonagle,
info.size_goal);
+ WRITE_ONCE(msk->allow_infinite_fallback, false);
}
release_sock(ssk);
@@ -2476,6 +2518,23 @@ reset_timer:
mptcp_reset_timer(sk);
}
+static void mptcp_mp_fail_no_response(struct mptcp_sock *msk)
+{
+ struct mptcp_subflow_context *subflow;
+ struct sock *ssk;
+ bool slow;
+
+ subflow = mp_fail_response_expect_subflow(msk);
+ if (subflow) {
+ pr_debug("MP_FAIL doesn't respond, reset the subflow");
+
+ ssk = mptcp_subflow_tcp_sock(subflow);
+ slow = lock_sock_fast(ssk);
+ mptcp_subflow_reset(ssk);
+ unlock_sock_fast(ssk, slow);
+ }
+}
+
static void mptcp_worker(struct work_struct *work)
{
struct mptcp_sock *msk = container_of(work, struct mptcp_sock, work);
@@ -2516,6 +2575,8 @@ static void mptcp_worker(struct work_struct *work)
if (test_and_clear_bit(MPTCP_WORK_RTX, &msk->flags))
__mptcp_retrans(sk);
+ mptcp_mp_fail_no_response(msk);
+
unlock:
release_sock(sk);
sock_put(sk);
@@ -2539,6 +2600,7 @@ static int __mptcp_init_sock(struct sock *sk)
msk->first = NULL;
inet_csk(sk)->icsk_sync_mss = mptcp_sync_mss;
WRITE_ONCE(msk->csum_enabled, mptcp_is_checksum_enabled(sock_net(sk)));
+ WRITE_ONCE(msk->allow_infinite_fallback, true);
msk->recovery = false;
mptcp_pm_data_init(msk);
@@ -2733,7 +2795,7 @@ static void __mptcp_destroy_sock(struct sock *sk)
/* join list will be eventually flushed (with rst) at sock lock release time*/
list_splice_init(&msk->conn_list, &conn_list);
- sk_stop_timer(sk, &msk->sk.icsk_retransmit_timer);
+ mptcp_stop_timer(sk);
sk_stop_timer(sk, &sk->sk_timer);
msk->pm.status = 0;
@@ -2841,7 +2903,7 @@ static int mptcp_disconnect(struct sock *sk, int flags)
__mptcp_close_ssk(sk, ssk, subflow, MPTCP_CF_FASTCLOSE);
}
- sk_stop_timer(sk, &msk->sk.icsk_retransmit_timer);
+ mptcp_stop_timer(sk);
sk_stop_timer(sk, &sk->sk_timer);
if (mptcp_sk(sk)->token)
@@ -2916,7 +2978,7 @@ struct sock *mptcp_sk_clone(const struct sock *sk,
mptcp_crypto_key_sha(msk->remote_key, NULL, &ack_seq);
ack_seq++;
WRITE_ONCE(msk->ack_seq, ack_seq);
- WRITE_ONCE(msk->rcv_wnd_sent, ack_seq);
+ atomic64_set(&msk->rcv_wnd_sent, ack_seq);
}
sock_reset_flag(nsk, SOCK_RCU_FREE);
@@ -3017,6 +3079,7 @@ void mptcp_destroy_common(struct mptcp_sock *msk)
msk->rmem_fwd_alloc = 0;
mptcp_token_destroy(msk);
mptcp_pm_free_anno_list(msk);
+ mptcp_free_local_addr_list(msk);
}
static void mptcp_destroy(struct sock *sk)
@@ -3092,15 +3155,19 @@ static void mptcp_release_cb(struct sock *sk)
spin_lock_bh(&sk->sk_lock.slock);
}
- /* be sure to set the current sk state before tacking actions
- * depending on sk_state
- */
- if (__test_and_clear_bit(MPTCP_CONNECTED, &msk->cb_flags))
- __mptcp_set_connected(sk);
if (__test_and_clear_bit(MPTCP_CLEAN_UNA, &msk->cb_flags))
__mptcp_clean_una_wakeup(sk);
- if (__test_and_clear_bit(MPTCP_ERROR_REPORT, &msk->cb_flags))
- __mptcp_error_report(sk);
+ if (unlikely(&msk->cb_flags)) {
+ /* be sure to set the current sk state before tacking actions
+ * depending on sk_state, that is processing MPTCP_ERROR_REPORT
+ */
+ if (__test_and_clear_bit(MPTCP_CONNECTED, &msk->cb_flags))
+ __mptcp_set_connected(sk);
+ if (__test_and_clear_bit(MPTCP_ERROR_REPORT, &msk->cb_flags))
+ __mptcp_error_report(sk);
+ if (__test_and_clear_bit(MPTCP_RESET_SCHEDULER, &msk->cb_flags))
+ msk->last_snd = NULL;
+ }
__mptcp_update_rmem(sk);
}
@@ -3204,9 +3271,9 @@ void mptcp_finish_connect(struct sock *ssk)
WRITE_ONCE(msk->write_seq, subflow->idsn + 1);
WRITE_ONCE(msk->snd_nxt, msk->write_seq);
WRITE_ONCE(msk->ack_seq, ack_seq);
- WRITE_ONCE(msk->rcv_wnd_sent, ack_seq);
WRITE_ONCE(msk->can_ack, 1);
WRITE_ONCE(msk->snd_una, msk->write_seq);
+ atomic64_set(&msk->rcv_wnd_sent, ack_seq);
mptcp_pm_new_connection(msk, ssk, 0);
@@ -3237,15 +3304,12 @@ bool mptcp_finish_join(struct sock *ssk)
return false;
}
- if (!msk->pm.server_side)
+ if (!list_empty(&subflow->node))
goto out;
if (!mptcp_pm_allow_new_subflow(msk))
goto err_prohibited;
- if (WARN_ON_ONCE(!list_empty(&subflow->node)))
- goto err_prohibited;
-
/* active connections are already on conn_list.
* If we can't acquire msk socket lock here, let the release callback
* handle it
@@ -3271,6 +3335,7 @@ err_prohibited:
}
subflow->map_seq = READ_ONCE(msk->ack_seq);
+ WRITE_ONCE(msk->allow_infinite_fallback, false);
out:
mptcp_event(MPTCP_EVENT_SUB_ESTABLISHED, msk, ssk, GFP_ATOMIC);
@@ -3703,8 +3768,8 @@ void __init mptcp_proto_init(void)
for_each_possible_cpu(cpu) {
delegated = per_cpu_ptr(&mptcp_delegated_actions, cpu);
INIT_LIST_HEAD(&delegated->head);
- netif_tx_napi_add(&mptcp_napi_dev, &delegated->napi, mptcp_napi_poll,
- NAPI_POLL_WEIGHT);
+ netif_napi_add_tx(&mptcp_napi_dev, &delegated->napi,
+ mptcp_napi_poll);
napi_enable(&delegated->napi);
}
diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h
index 5655a63aa6a8..200f89f6d62f 100644
--- a/net/mptcp/protocol.h
+++ b/net/mptcp/protocol.h
@@ -11,6 +11,7 @@
#include <net/tcp.h>
#include <net/inet_connection_sock.h>
#include <uapi/linux/mptcp.h>
+#include <net/genetlink.h>
#define MPTCP_SUPPORTED_VERSION 1
@@ -124,6 +125,7 @@
#define MPTCP_RETRANSMIT 4
#define MPTCP_FLUSH_JOIN_LIST 5
#define MPTCP_CONNECTED 6
+#define MPTCP_RESET_SCHEDULER 7
static inline bool before64(__u64 seq1, __u64 seq2)
{
@@ -182,6 +184,14 @@ enum mptcp_pm_status {
*/
};
+enum mptcp_pm_type {
+ MPTCP_PM_TYPE_KERNEL = 0,
+ MPTCP_PM_TYPE_USERSPACE,
+
+ __MPTCP_PM_TYPE_NR,
+ __MPTCP_PM_TYPE_MAX = __MPTCP_PM_TYPE_NR - 1,
+};
+
/* Status bits below MPTCP_PM_ALREADY_ESTABLISHED need pm worker actions */
#define MPTCP_PM_WORK_MASK ((1 << MPTCP_PM_ALREADY_ESTABLISHED) - 1)
@@ -198,6 +208,7 @@ struct mptcp_pm_data {
struct mptcp_addr_info local;
struct mptcp_addr_info remote;
struct list_head anno_list;
+ struct list_head userspace_pm_local_addr_list;
spinlock_t lock; /*protects the whole PM data */
@@ -210,6 +221,7 @@ struct mptcp_pm_data {
u8 add_addr_signaled;
u8 add_addr_accepted;
u8 local_addr_used;
+ u8 pm_type;
u8 subflows;
u8 status;
DECLARE_BITMAP(id_avail_bitmap, MPTCP_PM_MAX_ADDR_ID + 1);
@@ -217,6 +229,14 @@ struct mptcp_pm_data {
struct mptcp_rm_list rm_list_rx;
};
+struct mptcp_pm_addr_entry {
+ struct list_head list;
+ struct mptcp_addr_info addr;
+ u8 flags;
+ int ifindex;
+ struct socket *lsk;
+};
+
struct mptcp_data_frag {
struct list_head list;
u64 data_seq;
@@ -236,7 +256,7 @@ struct mptcp_sock {
u64 write_seq;
u64 snd_nxt;
u64 ack_seq;
- u64 rcv_wnd_sent;
+ atomic64_t rcv_wnd_sent;
u64 rcv_data_fin_seq;
int rmem_fwd_alloc;
struct sock *last_snd;
@@ -262,6 +282,7 @@ struct mptcp_sock {
bool rcv_fastclose;
bool use_64bit_ack; /* Set when we received a 64-bit DSN */
bool csum_enabled;
+ bool allow_infinite_fallback;
u8 recvmsg_inq:1,
cork:1,
nodelay:1;
@@ -439,6 +460,7 @@ struct mptcp_subflow_context {
send_mp_prio : 1,
send_mp_fail : 1,
send_fastclose : 1,
+ send_infinite_map : 1,
rx_eof : 1,
can_ack : 1, /* only after processing the remote a key */
disposable : 1, /* ctx can be free at ulp release time */
@@ -446,6 +468,7 @@ struct mptcp_subflow_context {
local_id_valid : 1, /* local_id is correctly initialized */
valid_csum_seen : 1; /* at least one csum validated */
enum mptcp_data_avail data_avail;
+ bool mp_fail_response_expect;
u32 remote_nonce;
u64 thmac;
u32 local_nonce;
@@ -572,6 +595,7 @@ unsigned int mptcp_get_add_addr_timeout(const struct net *net);
int mptcp_is_checksum_enabled(const struct net *net);
int mptcp_allow_join_id0(const struct net *net);
unsigned int mptcp_stale_loss_cnt(const struct net *net);
+int mptcp_get_pm_type(const struct net *net);
void mptcp_subflow_fully_established(struct mptcp_subflow_context *subflow,
struct mptcp_options_received *mp_opt);
bool __mptcp_retransmit_pending_data(struct sock *sk);
@@ -587,6 +611,9 @@ void mptcp_subflow_reset(struct sock *ssk);
void mptcp_sock_graft(struct sock *sk, struct socket *parent);
struct socket *__mptcp_nmpc_socket(const struct mptcp_sock *msk);
+bool mptcp_addresses_equal(const struct mptcp_addr_info *a,
+ const struct mptcp_addr_info *b, bool use_port);
+
/* called with sk socket lock held */
int __mptcp_subflow_connect(struct sock *sk, const struct mptcp_addr_info *loc,
const struct mptcp_addr_info *remote);
@@ -622,19 +649,6 @@ static inline void mptcp_subflow_tcp_fallback(struct sock *sk,
inet_csk(sk)->icsk_af_ops = ctx->icsk_af_ops;
}
-static inline bool mptcp_has_another_subflow(struct sock *ssk)
-{
- struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk), *tmp;
- struct mptcp_sock *msk = mptcp_sk(subflow->conn);
-
- mptcp_for_each_subflow(msk, tmp) {
- if (tmp != subflow)
- return true;
- }
-
- return false;
-}
-
void __init mptcp_proto_init(void);
#if IS_ENABLED(CONFIG_MPTCP_IPV6)
int __init mptcp_proto_v6_init(void);
@@ -729,6 +743,11 @@ __sum16 __mptcp_make_csum(u64 data_seq, u32 subflow_seq, u16 data_len, __wsum su
void __init mptcp_pm_init(void);
void mptcp_pm_data_init(struct mptcp_sock *msk);
void mptcp_pm_data_reset(struct mptcp_sock *msk);
+int mptcp_pm_parse_addr(struct nlattr *attr, struct genl_info *info,
+ struct mptcp_addr_info *addr);
+int mptcp_pm_parse_entry(struct nlattr *attr, struct genl_info *info,
+ bool require_family,
+ struct mptcp_pm_addr_entry *entry);
void mptcp_pm_subflow_chk_stale(const struct mptcp_sock *msk, struct sock *ssk);
void mptcp_pm_nl_subflow_chk_stale(const struct mptcp_sock *msk, struct sock *ssk);
void mptcp_pm_new_connection(struct mptcp_sock *msk, const struct sock *ssk, int server_side);
@@ -739,7 +758,7 @@ void mptcp_pm_subflow_established(struct mptcp_sock *msk);
bool mptcp_pm_nl_check_work_pending(struct mptcp_sock *msk);
void mptcp_pm_subflow_check_next(struct mptcp_sock *msk, const struct sock *ssk,
const struct mptcp_subflow_context *subflow);
-void mptcp_pm_add_addr_received(struct mptcp_sock *msk,
+void mptcp_pm_add_addr_received(const struct sock *ssk,
const struct mptcp_addr_info *addr);
void mptcp_pm_add_addr_echoed(struct mptcp_sock *msk,
const struct mptcp_addr_info *addr);
@@ -749,6 +768,8 @@ void mptcp_pm_rm_addr_received(struct mptcp_sock *msk,
const struct mptcp_rm_list *rm_list);
void mptcp_pm_mp_prio_received(struct sock *sk, u8 bkup);
void mptcp_pm_mp_fail_received(struct sock *sk, u64 fail_seq);
+bool mptcp_pm_alloc_anno_list(struct mptcp_sock *msk,
+ const struct mptcp_pm_addr_entry *entry);
void mptcp_pm_free_anno_list(struct mptcp_sock *msk);
bool mptcp_pm_sport_in_anno_list(struct mptcp_sock *msk, const struct sock *sk);
struct mptcp_pm_add_entry *
@@ -757,19 +778,34 @@ mptcp_pm_del_add_timer(struct mptcp_sock *msk,
struct mptcp_pm_add_entry *
mptcp_lookup_anno_list_by_saddr(const struct mptcp_sock *msk,
const struct mptcp_addr_info *addr);
-int mptcp_pm_get_flags_and_ifindex_by_id(struct net *net, unsigned int id,
+int mptcp_pm_get_flags_and_ifindex_by_id(struct mptcp_sock *msk,
+ unsigned int id,
u8 *flags, int *ifindex);
+int mptcp_userspace_pm_get_flags_and_ifindex_by_id(struct mptcp_sock *msk,
+ unsigned int id,
+ u8 *flags, int *ifindex);
int mptcp_pm_announce_addr(struct mptcp_sock *msk,
const struct mptcp_addr_info *addr,
bool echo);
int mptcp_pm_remove_addr(struct mptcp_sock *msk, const struct mptcp_rm_list *rm_list);
int mptcp_pm_remove_subflow(struct mptcp_sock *msk, const struct mptcp_rm_list *rm_list);
+void mptcp_pm_remove_addrs_and_subflows(struct mptcp_sock *msk,
+ struct list_head *rm_list);
+
+int mptcp_userspace_pm_append_new_local_addr(struct mptcp_sock *msk,
+ struct mptcp_pm_addr_entry *entry);
+void mptcp_free_local_addr_list(struct mptcp_sock *msk);
+int mptcp_nl_cmd_announce(struct sk_buff *skb, struct genl_info *info);
+int mptcp_nl_cmd_remove(struct sk_buff *skb, struct genl_info *info);
+int mptcp_nl_cmd_sf_create(struct sk_buff *skb, struct genl_info *info);
+int mptcp_nl_cmd_sf_destroy(struct sk_buff *skb, struct genl_info *info);
void mptcp_event(enum mptcp_event_type type, const struct mptcp_sock *msk,
const struct sock *ssk, gfp_t gfp);
-void mptcp_event_addr_announced(const struct mptcp_sock *msk, const struct mptcp_addr_info *info);
+void mptcp_event_addr_announced(const struct sock *ssk, const struct mptcp_addr_info *info);
void mptcp_event_addr_removed(const struct mptcp_sock *msk, u8 id);
+bool mptcp_userspace_pm_active(const struct mptcp_sock *msk);
static inline bool mptcp_pm_should_add_signal(struct mptcp_sock *msk)
{
@@ -792,6 +828,16 @@ static inline bool mptcp_pm_should_rm_signal(struct mptcp_sock *msk)
return READ_ONCE(msk->pm.addr_signal) & BIT(MPTCP_RM_ADDR_SIGNAL);
}
+static inline bool mptcp_pm_is_userspace(const struct mptcp_sock *msk)
+{
+ return READ_ONCE(msk->pm.pm_type) == MPTCP_PM_TYPE_USERSPACE;
+}
+
+static inline bool mptcp_pm_is_kernel(const struct mptcp_sock *msk)
+{
+ return READ_ONCE(msk->pm.pm_type) == MPTCP_PM_TYPE_KERNEL;
+}
+
static inline unsigned int mptcp_add_addr_len(int family, bool echo, bool port)
{
u8 len = TCPOLEN_MPTCP_ADD_ADDR_BASE;
@@ -822,9 +868,9 @@ bool mptcp_pm_add_addr_signal(struct mptcp_sock *msk, const struct sk_buff *skb,
bool mptcp_pm_rm_addr_signal(struct mptcp_sock *msk, unsigned int remaining,
struct mptcp_rm_list *rm_list);
int mptcp_pm_get_local_id(struct mptcp_sock *msk, struct sock_common *skc);
+int mptcp_userspace_pm_get_local_id(struct mptcp_sock *msk, struct mptcp_addr_info *skc);
void __init mptcp_pm_nl_init(void);
-void mptcp_pm_nl_data_init(struct mptcp_sock *msk);
void mptcp_pm_nl_work(struct mptcp_sock *msk);
void mptcp_pm_nl_rm_subflow_received(struct mptcp_sock *msk,
const struct mptcp_rm_list *rm_list);
@@ -890,13 +936,28 @@ static inline void mptcp_do_fallback(struct sock *sk)
#define pr_fallback(a) pr_debug("%s:fallback to TCP (msk=%p)", __func__, a)
+static inline bool mptcp_check_infinite_map(struct sk_buff *skb)
+{
+ struct mptcp_ext *mpext;
+
+ mpext = skb ? mptcp_get_ext(skb) : NULL;
+ if (mpext && mpext->infinite_map)
+ return true;
+
+ return false;
+}
+
+static inline bool is_active_ssk(struct mptcp_subflow_context *subflow)
+{
+ return (subflow->request_mptcp || subflow->request_join);
+}
+
static inline bool subflow_simultaneous_connect(struct sock *sk)
{
struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
- struct sock *parent = subflow->conn;
return sk->sk_state == TCP_ESTABLISHED &&
- !mptcp_sk(parent)->pm.server_side &&
+ is_active_ssk(subflow) &&
!subflow->conn_finished;
}
diff --git a/net/mptcp/sockopt.c b/net/mptcp/sockopt.c
index f949d22f52bd..423d3826ca1e 100644
--- a/net/mptcp/sockopt.c
+++ b/net/mptcp/sockopt.c
@@ -756,6 +756,18 @@ static int mptcp_setsockopt_v4(struct mptcp_sock *msk, int optname,
return -EOPNOTSUPP;
}
+static int mptcp_setsockopt_sol_tcp_defer(struct mptcp_sock *msk, sockptr_t optval,
+ unsigned int optlen)
+{
+ struct socket *listener;
+
+ listener = __mptcp_nmpc_socket(msk);
+ if (!listener)
+ return 0; /* TCP_DEFER_ACCEPT does not fail */
+
+ return tcp_setsockopt(listener->sk, SOL_TCP, TCP_DEFER_ACCEPT, optval, optlen);
+}
+
static int mptcp_setsockopt_sol_tcp(struct mptcp_sock *msk, int optname,
sockptr_t optval, unsigned int optlen)
{
@@ -782,6 +794,8 @@ static int mptcp_setsockopt_sol_tcp(struct mptcp_sock *msk, int optname,
return mptcp_setsockopt_sol_tcp_cork(msk, optval, optlen);
case TCP_NODELAY:
return mptcp_setsockopt_sol_tcp_nodelay(msk, optval, optlen);
+ case TCP_DEFER_ACCEPT:
+ return mptcp_setsockopt_sol_tcp_defer(msk, optval, optlen);
}
return -EOPNOTSUPP;
@@ -853,15 +867,11 @@ out:
void mptcp_diag_fill_info(struct mptcp_sock *msk, struct mptcp_info *info)
{
- struct sock *sk = &msk->sk.icsk_inet.sk;
u32 flags = 0;
- bool slow;
u8 val;
memset(info, 0, sizeof(*info));
- slow = lock_sock_fast(sk);
-
info->mptcpi_subflows = READ_ONCE(msk->pm.subflows);
info->mptcpi_add_addr_signal = READ_ONCE(msk->pm.add_addr_signaled);
info->mptcpi_add_addr_accepted = READ_ONCE(msk->pm.add_addr_accepted);
@@ -882,8 +892,6 @@ void mptcp_diag_fill_info(struct mptcp_sock *msk, struct mptcp_info *info)
info->mptcpi_snd_una = READ_ONCE(msk->snd_una);
info->mptcpi_rcv_nxt = READ_ONCE(msk->ack_seq);
info->mptcpi_csum_enabled = READ_ONCE(msk->csum_enabled);
-
- unlock_sock_fast(sk, slow);
}
EXPORT_SYMBOL_GPL(mptcp_diag_fill_info);
@@ -1148,6 +1156,7 @@ static int mptcp_getsockopt_sol_tcp(struct mptcp_sock *msk, int optname,
case TCP_CONGESTION:
case TCP_INFO:
case TCP_CC_INFO:
+ case TCP_DEFER_ACCEPT:
return mptcp_getsockopt_first_sf_only(msk, SOL_TCP, optname,
optval, optlen);
case TCP_INQ:
diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c
index be76ada89d96..8841e8cd9ad8 100644
--- a/net/mptcp/subflow.c
+++ b/net/mptcp/subflow.c
@@ -62,7 +62,9 @@ static void subflow_generate_hmac(u64 key1, u64 key2, u32 nonce1, u32 nonce2,
static bool mptcp_can_accept_new_subflow(const struct mptcp_sock *msk)
{
return mptcp_is_fully_established((void *)msk) &&
- READ_ONCE(msk->pm.accept_subflow);
+ ((mptcp_pm_is_userspace(msk) &&
+ mptcp_userspace_pm_active(msk)) ||
+ READ_ONCE(msk->pm.accept_subflow));
}
/* validate received token and create truncated hmac and nonce for SYN-ACK */
@@ -441,6 +443,7 @@ static void subflow_finish_connect(struct sock *sk, const struct sk_buff *skb)
subflow->backup = mp_opt.backup;
subflow->thmac = mp_opt.thmac;
subflow->remote_nonce = mp_opt.nonce;
+ subflow->remote_id = mp_opt.join_id;
pr_debug("subflow=%p, thmac=%llu, remote_nonce=%u backup=%d",
subflow, subflow->thmac, subflow->remote_nonce,
subflow->backup);
@@ -971,6 +974,7 @@ static enum mapping_status get_mapping_status(struct sock *ssk,
{
struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk);
bool csum_reqd = READ_ONCE(msk->csum_enabled);
+ struct sock *sk = (struct sock *)msk;
struct mptcp_ext *mpext;
struct sk_buff *skb;
u16 data_len;
@@ -1009,7 +1013,12 @@ static enum mapping_status get_mapping_status(struct sock *ssk,
data_len = mpext->data_len;
if (data_len == 0) {
+ pr_debug("infinite mapping received");
MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_INFINITEMAPRX);
+ subflow->map_data_len = 0;
+ if (!sock_flag(ssk, SOCK_DEAD))
+ sk_stop_timer(sk, &sk->sk_timer);
+
return MAPPING_INVALID;
}
@@ -1218,35 +1227,43 @@ no_data:
return false;
fallback:
- /* RFC 8684 section 3.7. */
- if (subflow->send_mp_fail) {
- if (mptcp_has_another_subflow(ssk)) {
- while ((skb = skb_peek(&ssk->sk_receive_queue)))
- sk_eat_skb(ssk, skb);
+ if (!__mptcp_check_fallback(msk)) {
+ /* RFC 8684 section 3.7. */
+ if (subflow->send_mp_fail) {
+ if (!READ_ONCE(msk->allow_infinite_fallback)) {
+ ssk->sk_err = EBADMSG;
+ tcp_set_state(ssk, TCP_CLOSE);
+ subflow->reset_transient = 0;
+ subflow->reset_reason = MPTCP_RST_EMIDDLEBOX;
+ tcp_send_active_reset(ssk, GFP_ATOMIC);
+ while ((skb = skb_peek(&ssk->sk_receive_queue)))
+ sk_eat_skb(ssk, skb);
+ } else if (!sock_flag(ssk, SOCK_DEAD)) {
+ WRITE_ONCE(subflow->mp_fail_response_expect, true);
+ sk_reset_timer((struct sock *)msk,
+ &((struct sock *)msk)->sk_timer,
+ jiffies + TCP_RTO_MAX);
+ }
+ WRITE_ONCE(subflow->data_avail, MPTCP_SUBFLOW_NODATA);
+ return true;
}
- ssk->sk_err = EBADMSG;
- tcp_set_state(ssk, TCP_CLOSE);
- subflow->reset_transient = 0;
- subflow->reset_reason = MPTCP_RST_EMIDDLEBOX;
- tcp_send_active_reset(ssk, GFP_ATOMIC);
- WRITE_ONCE(subflow->data_avail, MPTCP_SUBFLOW_NODATA);
- return true;
- }
- if (!subflow_can_fallback(subflow)) {
- /* fatal protocol error, close the socket.
- * subflow_error_report() will introduce the appropriate barriers
- */
- ssk->sk_err = EBADMSG;
- tcp_set_state(ssk, TCP_CLOSE);
- subflow->reset_transient = 0;
- subflow->reset_reason = MPTCP_RST_EMPTCP;
- tcp_send_active_reset(ssk, GFP_ATOMIC);
- WRITE_ONCE(subflow->data_avail, MPTCP_SUBFLOW_NODATA);
- return false;
+ if (!subflow_can_fallback(subflow) && subflow->map_data_len) {
+ /* fatal protocol error, close the socket.
+ * subflow_error_report() will introduce the appropriate barriers
+ */
+ ssk->sk_err = EBADMSG;
+ tcp_set_state(ssk, TCP_CLOSE);
+ subflow->reset_transient = 0;
+ subflow->reset_reason = MPTCP_RST_EMPTCP;
+ tcp_send_active_reset(ssk, GFP_ATOMIC);
+ WRITE_ONCE(subflow->data_avail, MPTCP_SUBFLOW_NODATA);
+ return false;
+ }
+
+ __mptcp_do_fallback(msk);
}
- __mptcp_do_fallback(msk);
skb = skb_peek(&ssk->sk_receive_queue);
subflow->map_valid = 1;
subflow->map_seq = READ_ONCE(msk->ack_seq);
@@ -1461,7 +1478,7 @@ int __mptcp_subflow_connect(struct sock *sk, const struct mptcp_addr_info *loc,
if (local_id)
subflow_set_local_id(subflow, local_id);
- mptcp_pm_get_flags_and_ifindex_by_id(sock_net(sk), local_id,
+ mptcp_pm_get_flags_and_ifindex_by_id(msk, local_id,
&flags, &ifindex);
subflow->remote_key = msk->remote_key;
subflow->local_key = msk->local_key;
@@ -1498,6 +1515,7 @@ int __mptcp_subflow_connect(struct sock *sk, const struct mptcp_addr_info *loc,
/* discard the subflow socket */
mptcp_sock_graft(ssk, sk->sk_socket);
iput(SOCK_INODE(sf));
+ WRITE_ONCE(msk->allow_infinite_fallback, false);
return err;
failed_unlink: