summaryrefslogtreecommitdiff
path: root/net/mptcp
diff options
context:
space:
mode:
Diffstat (limited to 'net/mptcp')
-rw-r--r--net/mptcp/pm_netlink.c131
-rw-r--r--net/mptcp/protocol.c60
-rw-r--r--net/mptcp/protocol.h3
-rw-r--r--net/mptcp/subflow.c2
4 files changed, 90 insertions, 106 deletions
diff --git a/net/mptcp/pm_netlink.c b/net/mptcp/pm_netlink.c
index 7c7395b58944..291b5da42fdb 100644
--- a/net/mptcp/pm_netlink.c
+++ b/net/mptcp/pm_netlink.c
@@ -413,7 +413,7 @@ static bool lookup_address_in_vec(const struct mptcp_addr_info *addrs, unsigned
int i;
for (i = 0; i < nr; i++) {
- if (mptcp_addresses_equal(&addrs[i], addr, addr->port))
+ if (addrs[i].id == addr->id)
return true;
}
@@ -449,7 +449,8 @@ static unsigned int fill_remote_addresses_vec(struct mptcp_sock *msk, bool fullm
mptcp_for_each_subflow(msk, subflow) {
ssk = mptcp_subflow_tcp_sock(subflow);
remote_address((struct sock_common *)ssk, &addrs[i]);
- if (deny_id0 && mptcp_addresses_equal(&addrs[i], &remote, false))
+ addrs[i].id = subflow->remote_id;
+ if (deny_id0 && !addrs[i].id)
continue;
if (!lookup_address_in_vec(addrs, i, &addrs[i]) &&
@@ -463,6 +464,37 @@ static unsigned int fill_remote_addresses_vec(struct mptcp_sock *msk, bool fullm
return i;
}
+static void __mptcp_pm_send_ack(struct mptcp_sock *msk, struct mptcp_subflow_context *subflow,
+ bool prio, bool backup)
+{
+ struct sock *ssk = mptcp_subflow_tcp_sock(subflow);
+ bool slow;
+
+ pr_debug("send ack for %s",
+ prio ? "mp_prio" : (mptcp_pm_should_add_signal(msk) ? "add_addr" : "rm_addr"));
+
+ slow = lock_sock_fast(ssk);
+ if (prio) {
+ if (subflow->backup != backup)
+ msk->last_snd = NULL;
+
+ subflow->send_mp_prio = 1;
+ subflow->backup = backup;
+ subflow->request_bkup = backup;
+ }
+
+ __mptcp_subflow_send_ack(ssk);
+ unlock_sock_fast(ssk, slow);
+}
+
+static void mptcp_pm_send_ack(struct mptcp_sock *msk, struct mptcp_subflow_context *subflow,
+ bool prio, bool backup)
+{
+ spin_unlock_bh(&msk->pm.lock);
+ __mptcp_pm_send_ack(msk, subflow, prio, backup);
+ spin_lock_bh(&msk->pm.lock);
+}
+
static struct mptcp_pm_addr_entry *
__lookup_addr_by_id(struct pm_nl_pernet *pernet, unsigned int id)
{
@@ -482,30 +514,14 @@ __lookup_addr(struct pm_nl_pernet *pernet, const struct mptcp_addr_info *info,
struct mptcp_pm_addr_entry *entry;
list_for_each_entry(entry, &pernet->local_addr_list, list) {
- if ((!lookup_by_id && mptcp_addresses_equal(&entry->addr, info, true)) ||
+ if ((!lookup_by_id &&
+ mptcp_addresses_equal(&entry->addr, info, entry->addr.port)) ||
(lookup_by_id && entry->addr.id == info->id))
return entry;
}
return NULL;
}
-static int
-lookup_id_by_addr(const struct pm_nl_pernet *pernet, const struct mptcp_addr_info *addr)
-{
- const struct mptcp_pm_addr_entry *entry;
- int ret = -1;
-
- rcu_read_lock();
- list_for_each_entry(entry, &pernet->local_addr_list, list) {
- if (mptcp_addresses_equal(&entry->addr, addr, entry->addr.port)) {
- ret = entry->addr.id;
- break;
- }
- }
- rcu_read_unlock();
- return ret;
-}
-
static void mptcp_pm_create_subflow_or_signal_addr(struct mptcp_sock *msk)
{
struct sock *sk = (struct sock *)msk;
@@ -523,13 +539,23 @@ static void mptcp_pm_create_subflow_or_signal_addr(struct mptcp_sock *msk)
/* do lazy endpoint usage accounting for the MPC subflows */
if (unlikely(!(msk->pm.status & BIT(MPTCP_PM_MPC_ENDPOINT_ACCOUNTED))) && msk->first) {
+ struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(msk->first);
+ struct mptcp_pm_addr_entry *entry;
struct mptcp_addr_info mpc_addr;
- int mpc_id;
+ bool backup = false;
local_address((struct sock_common *)msk->first, &mpc_addr);
- mpc_id = lookup_id_by_addr(pernet, &mpc_addr);
- if (mpc_id >= 0)
- __clear_bit(mpc_id, msk->pm.id_avail_bitmap);
+ rcu_read_lock();
+ entry = __lookup_addr(pernet, &mpc_addr, false);
+ if (entry) {
+ __clear_bit(entry->addr.id, msk->pm.id_avail_bitmap);
+ msk->mpc_endpoint_id = entry->addr.id;
+ backup = !!(entry->flags & MPTCP_PM_ADDR_FLAG_BACKUP);
+ }
+ rcu_read_unlock();
+
+ if (backup)
+ mptcp_pm_send_ack(msk, subflow, true, backup);
msk->pm.status |= BIT(MPTCP_PM_MPC_ENDPOINT_ACCOUNTED);
}
@@ -705,16 +731,8 @@ void mptcp_pm_nl_addr_send_ack(struct mptcp_sock *msk)
return;
subflow = list_first_entry_or_null(&msk->conn_list, typeof(*subflow), node);
- if (subflow) {
- struct sock *ssk = mptcp_subflow_tcp_sock(subflow);
-
- spin_unlock_bh(&msk->pm.lock);
- pr_debug("send ack for %s",
- mptcp_pm_should_add_signal(msk) ? "add_addr" : "rm_addr");
-
- mptcp_subflow_send_ack(ssk);
- spin_lock_bh(&msk->pm.lock);
- }
+ if (subflow)
+ mptcp_pm_send_ack(msk, subflow, false, false);
}
int mptcp_pm_nl_mp_prio_send_ack(struct mptcp_sock *msk,
@@ -729,7 +747,6 @@ int mptcp_pm_nl_mp_prio_send_ack(struct mptcp_sock *msk,
mptcp_for_each_subflow(msk, subflow) {
struct sock *ssk = mptcp_subflow_tcp_sock(subflow);
struct mptcp_addr_info local, remote;
- bool slow;
local_address((struct sock_common *)ssk, &local);
if (!mptcp_addresses_equal(&local, addr, addr->port))
@@ -741,23 +758,18 @@ int mptcp_pm_nl_mp_prio_send_ack(struct mptcp_sock *msk,
continue;
}
- slow = lock_sock_fast(ssk);
- if (subflow->backup != bkup)
- msk->last_snd = NULL;
- subflow->backup = bkup;
- subflow->send_mp_prio = 1;
- subflow->request_bkup = bkup;
-
- pr_debug("send ack for mp_prio");
- __mptcp_subflow_send_ack(ssk);
- unlock_sock_fast(ssk, slow);
-
+ __mptcp_pm_send_ack(msk, subflow, true, bkup);
return 0;
}
return -EINVAL;
}
+static bool mptcp_local_id_match(const struct mptcp_sock *msk, u8 local_id, u8 id)
+{
+ return local_id == id || (!local_id && msk->mpc_endpoint_id == id);
+}
+
static void mptcp_pm_nl_rm_addr_or_subflow(struct mptcp_sock *msk,
const struct mptcp_rm_list *rm_list,
enum linux_mptcp_mib_field rm_type)
@@ -781,6 +793,7 @@ static void mptcp_pm_nl_rm_addr_or_subflow(struct mptcp_sock *msk,
return;
for (i = 0; i < rm_list->nr; i++) {
+ u8 rm_id = rm_list->ids[i];
bool removed = false;
list_for_each_entry_safe(subflow, tmp, &msk->conn_list, node) {
@@ -788,15 +801,15 @@ static void mptcp_pm_nl_rm_addr_or_subflow(struct mptcp_sock *msk,
int how = RCV_SHUTDOWN | SEND_SHUTDOWN;
u8 id = subflow->local_id;
- if (rm_type == MPTCP_MIB_RMADDR)
- id = subflow->remote_id;
-
- if (rm_list->ids[i] != id)
+ if (rm_type == MPTCP_MIB_RMADDR && subflow->remote_id != rm_id)
+ continue;
+ if (rm_type == MPTCP_MIB_RMSUBFLOW && !mptcp_local_id_match(msk, id, rm_id))
continue;
- pr_debug(" -> %s rm_list_ids[%d]=%u local_id=%u remote_id=%u",
+ pr_debug(" -> %s rm_list_ids[%d]=%u local_id=%u remote_id=%u mpc_id=%u",
rm_type == MPTCP_MIB_RMADDR ? "address" : "subflow",
- i, rm_list->ids[i], subflow->local_id, subflow->remote_id);
+ i, rm_id, subflow->local_id, subflow->remote_id,
+ msk->mpc_endpoint_id);
spin_unlock_bh(&msk->pm.lock);
mptcp_subflow_shutdown(sk, ssk, how);
@@ -808,7 +821,7 @@ static void mptcp_pm_nl_rm_addr_or_subflow(struct mptcp_sock *msk,
__MPTCP_INC_STATS(sock_net(sk), rm_type);
}
if (rm_type == MPTCP_MIB_RMSUBFLOW)
- __set_bit(rm_list->ids[i], msk->pm.id_avail_bitmap);
+ __set_bit(rm_id ? rm_id : msk->mpc_endpoint_id, msk->pm.id_avail_bitmap);
if (!removed)
continue;
@@ -907,10 +920,11 @@ static int mptcp_pm_nl_append_new_local_addr(struct pm_nl_pernet *pernet,
/* do not insert duplicate address, differentiate on port only
* singled addresses
*/
+ if (!address_use_port(entry))
+ entry->addr.port = 0;
list_for_each_entry(cur, &pernet->local_addr_list, list) {
if (mptcp_addresses_equal(&cur->addr, &entry->addr,
- address_use_port(entry) &&
- address_use_port(cur))) {
+ cur->addr.port || entry->addr.port)) {
/* allow replacing the exiting endpoint only if such
* endpoint is an implicit one and the user-space
* did not provide an endpoint id
@@ -956,7 +970,10 @@ find_next:
}
pernet->addrs++;
- list_add_tail_rcu(&entry->list, &pernet->local_addr_list);
+ if (!entry->addr.port)
+ list_add_tail_rcu(&entry->list, &pernet->local_addr_list);
+ else
+ list_add_rcu(&entry->list, &pernet->local_addr_list);
ret = entry->addr.id;
out:
@@ -1134,7 +1151,7 @@ void mptcp_pm_nl_subflow_chk_stale(const struct mptcp_sock *msk, struct sock *ss
}
unlock_sock_fast(ssk, slow);
- /* always try to push the pending data regarless of re-injections:
+ /* always try to push the pending data regardless of re-injections:
* we can possibly use backup subflows now, and subflow selection
* is cheap under the msk socket lock
*/
diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
index 7e1518bb6115..a3f1c1461874 100644
--- a/net/mptcp/protocol.c
+++ b/net/mptcp/protocol.c
@@ -167,8 +167,8 @@ static bool mptcp_ooo_try_coalesce(struct mptcp_sock *msk, struct sk_buff *to,
static void __mptcp_rmem_reclaim(struct sock *sk, int amount)
{
- amount >>= SK_MEM_QUANTUM_SHIFT;
- mptcp_sk(sk)->rmem_fwd_alloc -= amount << SK_MEM_QUANTUM_SHIFT;
+ amount >>= PAGE_SHIFT;
+ mptcp_sk(sk)->rmem_fwd_alloc -= amount << PAGE_SHIFT;
__sk_mem_reduce_allocated(sk, amount);
}
@@ -181,8 +181,8 @@ static void mptcp_rmem_uncharge(struct sock *sk, int size)
reclaimable = msk->rmem_fwd_alloc - sk_unused_reserved_mem(sk);
/* see sk_mem_uncharge() for the rationale behind the following schema */
- if (unlikely(reclaimable >= SK_RECLAIM_THRESHOLD))
- __mptcp_rmem_reclaim(sk, SK_RECLAIM_CHUNK);
+ if (unlikely(reclaimable >= PAGE_SIZE))
+ __mptcp_rmem_reclaim(sk, reclaimable);
}
static void mptcp_rfree(struct sk_buff *skb)
@@ -323,20 +323,16 @@ static bool mptcp_rmem_schedule(struct sock *sk, struct sock *ssk, int size)
struct mptcp_sock *msk = mptcp_sk(sk);
int amt, amount;
- if (size < msk->rmem_fwd_alloc)
+ if (size <= msk->rmem_fwd_alloc)
return true;
+ size -= msk->rmem_fwd_alloc;
amt = sk_mem_pages(size);
- amount = amt << SK_MEM_QUANTUM_SHIFT;
- msk->rmem_fwd_alloc += amount;
- if (!__sk_mem_raise_allocated(sk, size, amt, SK_MEM_RECV)) {
- if (ssk->sk_forward_alloc < amount) {
- msk->rmem_fwd_alloc -= amount;
- return false;
- }
+ amount = amt << PAGE_SHIFT;
+ if (!__sk_mem_raise_allocated(sk, size, amt, SK_MEM_RECV))
+ return false;
- ssk->sk_forward_alloc -= amount;
- }
+ msk->rmem_fwd_alloc += amount;
return true;
}
@@ -512,7 +508,7 @@ void __mptcp_subflow_send_ack(struct sock *ssk)
tcp_send_ack(ssk);
}
-void mptcp_subflow_send_ack(struct sock *ssk)
+static void mptcp_subflow_send_ack(struct sock *ssk)
{
bool slow;
@@ -971,25 +967,6 @@ static bool mptcp_frag_can_collapse_to(const struct mptcp_sock *msk,
df->data_seq + df->data_len == msk->write_seq;
}
-static void __mptcp_mem_reclaim_partial(struct sock *sk)
-{
- int reclaimable = mptcp_sk(sk)->rmem_fwd_alloc - sk_unused_reserved_mem(sk);
-
- lockdep_assert_held_once(&sk->sk_lock.slock);
-
- if (reclaimable > SK_MEM_QUANTUM)
- __mptcp_rmem_reclaim(sk, reclaimable - 1);
-
- sk_mem_reclaim_partial(sk);
-}
-
-static void mptcp_mem_reclaim_partial(struct sock *sk)
-{
- mptcp_data_lock(sk);
- __mptcp_mem_reclaim_partial(sk);
- mptcp_data_unlock(sk);
-}
-
static void dfrag_uncharge(struct sock *sk, int len)
{
sk_mem_uncharge(sk, len);
@@ -1009,7 +986,6 @@ static void __mptcp_clean_una(struct sock *sk)
{
struct mptcp_sock *msk = mptcp_sk(sk);
struct mptcp_data_frag *dtmp, *dfrag;
- bool cleaned = false;
u64 snd_una;
/* on fallback we just need to ignore snd_una, as this is really
@@ -1032,7 +1008,6 @@ static void __mptcp_clean_una(struct sock *sk)
}
dfrag_clear(sk, dfrag);
- cleaned = true;
}
dfrag = mptcp_rtx_head(sk);
@@ -1054,7 +1029,6 @@ static void __mptcp_clean_una(struct sock *sk)
dfrag->already_sent -= delta;
dfrag_uncharge(sk, delta);
- cleaned = true;
}
/* all retransmitted data acked, recovery completed */
@@ -1062,9 +1036,6 @@ static void __mptcp_clean_una(struct sock *sk)
msk->recovery = false;
out:
- if (cleaned && tcp_under_memory_pressure(sk))
- __mptcp_mem_reclaim_partial(sk);
-
if (snd_una == READ_ONCE(msk->snd_nxt) &&
snd_una == READ_ONCE(msk->write_seq)) {
if (mptcp_timer_pending(sk) && !mptcp_data_fin_enabled(msk))
@@ -1216,12 +1187,6 @@ static struct sk_buff *mptcp_alloc_tx_skb(struct sock *sk, struct sock *ssk, boo
{
gfp_t gfp = data_lock_held ? GFP_ATOMIC : sk->sk_allocation;
- if (unlikely(tcp_under_memory_pressure(sk))) {
- if (data_lock_held)
- __mptcp_mem_reclaim_partial(sk);
- else
- mptcp_mem_reclaim_partial(sk);
- }
return __mptcp_alloc_tx_skb(sk, ssk, gfp);
}
@@ -3464,7 +3429,10 @@ static struct proto mptcp_prot = {
.get_port = mptcp_get_port,
.forward_alloc_get = mptcp_forward_alloc_get,
.sockets_allocated = &mptcp_sockets_allocated,
+
.memory_allocated = &tcp_memory_allocated,
+ .per_cpu_fw_alloc = &tcp_memory_per_cpu_fw_alloc,
+
.memory_pressure = &tcp_memory_pressure,
.sysctl_wmem_offset = offsetof(struct net, ipv4.sysctl_tcp_wmem),
.sysctl_rmem_offset = offsetof(struct net, ipv4.sysctl_tcp_rmem),
diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h
index 480c5320b86e..5d6043c16b09 100644
--- a/net/mptcp/protocol.h
+++ b/net/mptcp/protocol.h
@@ -83,7 +83,6 @@
/* MPTCP MP_JOIN flags */
#define MPTCPOPT_BACKUP BIT(0)
-#define MPTCPOPT_HMAC_LEN 20
#define MPTCPOPT_THMAC_LEN 8
/* MPTCP MP_CAPABLE flags */
@@ -283,6 +282,7 @@ struct mptcp_sock {
bool use_64bit_ack; /* Set when we received a 64-bit DSN */
bool csum_enabled;
bool allow_infinite_fallback;
+ u8 mpc_endpoint_id;
u8 recvmsg_inq:1,
cork:1,
nodelay:1;
@@ -608,7 +608,6 @@ void mptcp_subflow_shutdown(struct sock *sk, struct sock *ssk, int how);
void mptcp_close_ssk(struct sock *sk, struct sock *ssk,
struct mptcp_subflow_context *subflow);
void __mptcp_subflow_send_ack(struct sock *ssk);
-void mptcp_subflow_send_ack(struct sock *ssk);
void mptcp_subflow_reset(struct sock *ssk);
void mptcp_subflow_queue_clean(struct sock *ssk);
void mptcp_sock_graft(struct sock *sk, struct socket *parent);
diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c
index af28f3b60389..901c763dcdbb 100644
--- a/net/mptcp/subflow.c
+++ b/net/mptcp/subflow.c
@@ -1634,7 +1634,7 @@ int mptcp_subflow_create_socket(struct sock *sk, struct socket **new_sock)
/* the newly created socket really belongs to the owning MPTCP master
* socket, even if for additional subflows the allocation is performed
* by a kernel workqueue. Adjust inode references, so that the
- * procfs/diag interaces really show this one belonging to the correct
+ * procfs/diag interfaces really show this one belonging to the correct
* user.
*/
SOCK_INODE(sf)->i_ino = SOCK_INODE(sk->sk_socket)->i_ino;