diff options
Diffstat (limited to 'net/mptcp/subflow.c')
-rw-r--r-- | net/mptcp/subflow.c | 95 |
1 files changed, 36 insertions, 59 deletions
diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c index 9bf3c7bc1762..e120e9616454 100644 --- a/net/mptcp/subflow.c +++ b/net/mptcp/subflow.c @@ -421,6 +421,7 @@ static bool subflow_use_different_dport(struct mptcp_sock *msk, const struct soc void __mptcp_set_connected(struct sock *sk) { + __mptcp_propagate_sndbuf(sk, mptcp_sk(sk)->first); if (sk->sk_state == TCP_SYN_SENT) { inet_sk_state_store(sk, TCP_ESTABLISHED); sk->sk_state_change(sk); @@ -472,7 +473,6 @@ static void subflow_finish_connect(struct sock *sk, const struct sk_buff *skb) return; msk = mptcp_sk(parent); - mptcp_propagate_sndbuf(parent, sk); subflow->rel_write_seq = 1; subflow->conn_finished = 1; subflow->ssn_offset = TCP_SKB_CB(skb)->seq; @@ -1226,7 +1226,7 @@ static void mptcp_subflow_fail(struct mptcp_sock *msk, struct sock *ssk) WRITE_ONCE(subflow->fail_tout, fail_tout); tcp_send_ack(ssk); - mptcp_reset_timeout(msk, subflow->fail_tout); + mptcp_reset_tout_timer(msk, subflow->fail_tout); } static bool subflow_check_data_avail(struct sock *ssk) @@ -1237,7 +1237,7 @@ static bool subflow_check_data_avail(struct sock *ssk) struct sk_buff *skb; if (!skb_peek(&ssk->sk_receive_queue)) - WRITE_ONCE(subflow->data_avail, MPTCP_SUBFLOW_NODATA); + WRITE_ONCE(subflow->data_avail, false); if (subflow->data_avail) return true; @@ -1271,7 +1271,7 @@ static bool subflow_check_data_avail(struct sock *ssk) continue; } - WRITE_ONCE(subflow->data_avail, MPTCP_SUBFLOW_DATA_AVAIL); + WRITE_ONCE(subflow->data_avail, true); break; } return true; @@ -1293,7 +1293,7 @@ fallback: goto reset; } mptcp_subflow_fail(msk, ssk); - WRITE_ONCE(subflow->data_avail, MPTCP_SUBFLOW_DATA_AVAIL); + WRITE_ONCE(subflow->data_avail, true); return true; } @@ -1310,7 +1310,7 @@ reset: while ((skb = skb_peek(&ssk->sk_receive_queue))) sk_eat_skb(ssk, skb); tcp_send_active_reset(ssk, GFP_ATOMIC); - WRITE_ONCE(subflow->data_avail, MPTCP_SUBFLOW_NODATA); + WRITE_ONCE(subflow->data_avail, false); return false; } @@ -1322,7 +1322,7 @@ reset: subflow->map_seq = READ_ONCE(msk->ack_seq); subflow->map_data_len = skb->len; subflow->map_subflow_seq = tcp_sk(ssk)->copied_seq - subflow->ssn_offset; - WRITE_ONCE(subflow->data_avail, MPTCP_SUBFLOW_DATA_AVAIL); + WRITE_ONCE(subflow->data_avail, true); return true; } @@ -1334,7 +1334,7 @@ bool mptcp_subflow_data_available(struct sock *sk) if (subflow->map_valid && mptcp_subflow_get_map_offset(subflow) >= subflow->map_data_len) { subflow->map_valid = 0; - WRITE_ONCE(subflow->data_avail, MPTCP_SUBFLOW_NODATA); + WRITE_ONCE(subflow->data_avail, false); pr_debug("Done with mapping: seq=%u data_len=%u", subflow->map_subflow_seq, @@ -1362,42 +1362,6 @@ void mptcp_space(const struct sock *ssk, int *space, int *full_space) *full_space = mptcp_win_from_space(sk, READ_ONCE(sk->sk_rcvbuf)); } -void __mptcp_error_report(struct sock *sk) -{ - struct mptcp_subflow_context *subflow; - struct mptcp_sock *msk = mptcp_sk(sk); - - mptcp_for_each_subflow(msk, subflow) { - struct sock *ssk = mptcp_subflow_tcp_sock(subflow); - int err = sock_error(ssk); - int ssk_state; - - if (!err) - continue; - - /* only propagate errors on fallen-back sockets or - * on MPC connect - */ - if (sk->sk_state != TCP_SYN_SENT && !__mptcp_check_fallback(msk)) - continue; - - /* We need to propagate only transition to CLOSE state. - * Orphaned socket will see such state change via - * subflow_sched_work_if_closed() and that path will properly - * destroy the msk as needed. - */ - ssk_state = inet_sk_state_load(ssk); - if (ssk_state == TCP_CLOSE && !sock_flag(sk, SOCK_DEAD)) - inet_sk_state_store(sk, ssk_state); - WRITE_ONCE(sk->sk_err, -err); - - /* This barrier is coupled with smp_rmb() in mptcp_poll() */ - smp_wmb(); - sk_error_report(sk); - break; - } -} - static void subflow_error_report(struct sock *ssk) { struct sock *sk = mptcp_subflow_ctx(ssk)->conn; @@ -1441,10 +1405,18 @@ static void subflow_data_ready(struct sock *sk) WARN_ON_ONCE(!__mptcp_check_fallback(msk) && !subflow->mp_capable && !subflow->mp_join && !(state & TCPF_CLOSE)); - if (mptcp_subflow_data_available(sk)) + if (mptcp_subflow_data_available(sk)) { mptcp_data_ready(parent, sk); - else if (unlikely(sk->sk_err)) + + /* subflow-level lowat test are not relevant. + * respect the msk-level threshold eventually mandating an immediate ack + */ + if (mptcp_data_avail(msk) < parent->sk_rcvlowat && + (tcp_sk(sk)->rcv_nxt - tcp_sk(sk)->rcv_wup) > inet_csk(sk)->icsk_ack.rcv_mss) + inet_csk(sk)->icsk_ack.pending |= ICSK_ACK_NOW; + } else if (unlikely(sk->sk_err)) { subflow_error_report(sk); + } } static void subflow_write_space(struct sock *ssk) @@ -1561,8 +1533,6 @@ int __mptcp_subflow_connect(struct sock *sk, const struct mptcp_addr_info *loc, if (addr.ss_family == AF_INET6) addrlen = sizeof(struct sockaddr_in6); #endif - mptcp_sockopt_sync(msk, ssk); - ssk->sk_bound_dev_if = ifindex; err = kernel_bind(sf, (struct sockaddr *)&addr, addrlen); if (err) @@ -1588,6 +1558,7 @@ int __mptcp_subflow_connect(struct sock *sk, const struct mptcp_addr_info *loc, mptcp_sock_graft(ssk, sk->sk_socket); iput(SOCK_INODE(sf)); WRITE_ONCE(msk->allow_infinite_fallback, false); + mptcp_stop_tout_timer(sk); return 0; failed_unlink: @@ -1672,7 +1643,7 @@ int mptcp_subflow_create_socket(struct sock *sk, unsigned short family, err = security_mptcp_add_subflow(sk, sf->sk); if (err) - goto release_ssk; + goto err_free; /* the newly created socket has to be in the same cgroup as its parent */ mptcp_attach_cgroup(sk, sf->sk); @@ -1686,15 +1657,12 @@ int mptcp_subflow_create_socket(struct sock *sk, unsigned short family, get_net_track(net, &sf->sk->ns_tracker, GFP_KERNEL); sock_inuse_add(net, 1); err = tcp_set_ulp(sf->sk, "mptcp"); + if (err) + goto err_free; -release_ssk: + mptcp_sockopt_sync_locked(mptcp_sk(sk), sf->sk); release_sock(sf->sk); - if (err) { - sock_release(sf); - return err; - } - /* the newly created socket really belongs to the owning MPTCP master * socket, even if for additional subflows the allocation is performed * by a kernel workqueue. Adjust inode references, so that the @@ -1714,6 +1682,11 @@ release_ssk: mptcp_subflow_ops_override(sf->sk); return 0; + +err_free: + release_sock(sf->sk); + sock_release(sf); + return err; } static struct mptcp_subflow_context *subflow_create_ctx(struct sock *sk, @@ -1763,7 +1736,6 @@ static void subflow_state_change(struct sock *sk) msk = mptcp_sk(parent); if (subflow_simultaneous_connect(sk)) { - mptcp_propagate_sndbuf(parent, sk); mptcp_do_fallback(sk); mptcp_rcv_space_init(msk, sk); pr_fallback(msk); @@ -1991,9 +1963,15 @@ static void subflow_ulp_clone(const struct request_sock *req, static void tcp_release_cb_override(struct sock *ssk) { struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk); + long status; - if (mptcp_subflow_has_delegated_action(subflow)) - mptcp_subflow_process_delegated(ssk); + /* process and clear all the pending actions, but leave the subflow into + * the napi queue. To respect locking, only the same CPU that originated + * the action can touch the list. mptcp_napi_poll will take care of it. + */ + status = set_mask_bits(&subflow->delegated_status, MPTCP_DELEGATE_ACTIONS_MASK, 0); + if (status) + mptcp_subflow_process_delegated(ssk, status); tcp_release_cb(ssk); } @@ -2073,7 +2051,6 @@ void __init mptcp_subflow_init(void) subflow_v6m_specific.send_check = ipv4_specific.send_check; subflow_v6m_specific.net_header_len = ipv4_specific.net_header_len; subflow_v6m_specific.mtu_reduced = ipv4_specific.mtu_reduced; - subflow_v6m_specific.net_frag_header_len = 0; subflow_v6m_specific.rebuild_header = subflow_rebuild_header; tcpv6_prot_override = tcpv6_prot; |