From fb586f25300f4587c7ebd097a604bf269b25bfa7 Mon Sep 17 00:00:00 2001 From: Marcelo Ricardo Leitner Date: Fri, 8 Apr 2016 16:41:28 -0300 Subject: sctp: delay calls to sk_data_ready() as much as possible Currently processing of multiple chunks in a single SCTP packet leads to multiple calls to sk_data_ready, causing multiple wake up signals which are costy and doesn't make it wake up any faster. With this patch it will note that the wake up is pending and will do it before leaving the state machine interpreter, latest place possible to do it realiably and cleanly. Note that sk_data_ready events are not dependent on asocs, unlike waking up writers. v2: series re-checked v3: use local vars to cleanup the code, suggested by Jakub Sitnicki Signed-off-by: Marcelo Ricardo Leitner Signed-off-by: David S. Miller --- net/sctp/ulpqueue.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net/sctp/ulpqueue.c') diff --git a/net/sctp/ulpqueue.c b/net/sctp/ulpqueue.c index ce469d648ffb..72e5b3e41cdd 100644 --- a/net/sctp/ulpqueue.c +++ b/net/sctp/ulpqueue.c @@ -264,7 +264,7 @@ int sctp_ulpq_tail_event(struct sctp_ulpq *ulpq, struct sctp_ulpevent *event) sctp_ulpq_clear_pd(ulpq); if (queue == &sk->sk_receive_queue) - sk->sk_data_ready(sk); + sctp_sk(sk)->pending_data_ready = 1; return 1; out_free: @@ -1140,5 +1140,5 @@ void sctp_ulpq_abort_pd(struct sctp_ulpq *ulpq, gfp_t gfp) /* If there is data waiting, send it up the socket now. */ if (sctp_ulpq_clear_pd(ulpq) || ev) - sk->sk_data_ready(sk); + sctp_sk(sk)->pending_data_ready = 1; } -- cgit v1.2.3 From 311b21774f1389f9c34eac4da90c43c95fc2b62b Mon Sep 17 00:00:00 2001 From: Marcelo Ricardo Leitner Date: Wed, 13 Apr 2016 19:12:29 -0300 Subject: sctp: simplify sk_receive_queue locking SCTP already serializes access to rcvbuf through its sock lock: sctp_recvmsg takes it right in the start and release at the end, while rx path will also take the lock before doing any socket processing. On sctp_rcv() it will check if there is an user using the socket and, if there is, it will queue incoming packets to the backlog. The backlog processing will do the same. Even timers will do such check and re-schedule if an user is using the socket. Simplifying this will allow us to remove sctp_skb_list_tail and get ride of some expensive lockings. The lists that it is used on are also mangled with functions like __skb_queue_tail and __skb_unlink in the same context, like on sctp_ulpq_tail_event() and sctp_clear_pd(). sctp_close() will also purge those while using only the sock lock. Therefore the lockings performed by sctp_skb_list_tail() are not necessary. This patch removes this function and replaces its calls with just skb_queue_splice_tail_init() instead. The biggest gain is at sctp_ulpq_tail_event(), because the events always contain a list, even if it's queueing a single skb and this was triggering expensive calls to spin_lock_irqsave/_irqrestore for every data chunk received. As SCTP will deliver each data chunk on a corresponding recvmsg, the more effective the change will be. Before this patch, with chunks with 30 bytes: netperf -t SCTP_STREAM -H 192.168.1.2 -cC -l 60 -- -m 30 -S 400000 400000 -s 400000 400000 on a 10Gbit link with 1500 MTU: SCTP STREAM TEST from 0.0.0.0 (0.0.0.0) port 0 AF_INET to 192.168.1.1 () port 0 AF_INET Recv Send Send Utilization Service Demand Socket Socket Message Elapsed Send Recv Send Recv Size Size Size Time Throughput local remote local remote bytes bytes bytes secs. 10^6bits/s % S % S us/KB us/KB 425984 425984 30 60.00 137.45 7.34 7.36 52.504 52.608 With it: SCTP STREAM TEST from 0.0.0.0 (0.0.0.0) port 0 AF_INET to 192.168.1.1 () port 0 AF_INET Recv Send Send Utilization Service Demand Socket Socket Message Elapsed Send Recv Send Recv Size Size Size Time Throughput local remote local remote bytes bytes bytes secs. 10^6bits/s % S % S us/KB us/KB 425984 425984 30 60.00 179.10 7.97 6.70 43.740 36.788 Signed-off-by: Marcelo Ricardo Leitner Signed-off-by: David S. Miller --- include/net/sctp/sctp.h | 15 --------------- net/sctp/socket.c | 4 +--- net/sctp/ulpqueue.c | 5 +++-- 3 files changed, 4 insertions(+), 20 deletions(-) (limited to 'net/sctp/ulpqueue.c') diff --git a/include/net/sctp/sctp.h b/include/net/sctp/sctp.h index 03fb33efcae2..978d5f67d5a7 100644 --- a/include/net/sctp/sctp.h +++ b/include/net/sctp/sctp.h @@ -359,21 +359,6 @@ int sctp_do_peeloff(struct sock *sk, sctp_assoc_t id, struct socket **sockp); #define sctp_skb_for_each(pos, head, tmp) \ skb_queue_walk_safe(head, pos, tmp) -/* A helper to append an entire skb list (list) to another (head). */ -static inline void sctp_skb_list_tail(struct sk_buff_head *list, - struct sk_buff_head *head) -{ - unsigned long flags; - - spin_lock_irqsave(&head->lock, flags); - spin_lock(&list->lock); - - skb_queue_splice_tail_init(list, head); - - spin_unlock(&list->lock); - spin_unlock_irqrestore(&head->lock, flags); -} - /** * sctp_list_dequeue - remove from the head of the queue * @list: list to dequeue from diff --git a/net/sctp/socket.c b/net/sctp/socket.c index 36697f85ce48..bf265a4bba6e 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -6766,13 +6766,11 @@ struct sk_buff *sctp_skb_recv_datagram(struct sock *sk, int flags, * However, this function was correct in any case. 8) */ if (flags & MSG_PEEK) { - spin_lock_bh(&sk->sk_receive_queue.lock); skb = skb_peek(&sk->sk_receive_queue); if (skb) atomic_inc(&skb->users); - spin_unlock_bh(&sk->sk_receive_queue.lock); } else { - skb = skb_dequeue(&sk->sk_receive_queue); + skb = __skb_dequeue(&sk->sk_receive_queue); } if (skb) diff --git a/net/sctp/ulpqueue.c b/net/sctp/ulpqueue.c index 72e5b3e41cdd..ec12a8920e5f 100644 --- a/net/sctp/ulpqueue.c +++ b/net/sctp/ulpqueue.c @@ -141,7 +141,8 @@ int sctp_clear_pd(struct sock *sk, struct sctp_association *asoc) */ if (!skb_queue_empty(&sp->pd_lobby)) { struct list_head *list; - sctp_skb_list_tail(&sp->pd_lobby, &sk->sk_receive_queue); + skb_queue_splice_tail_init(&sp->pd_lobby, + &sk->sk_receive_queue); list = (struct list_head *)&sctp_sk(sk)->pd_lobby; INIT_LIST_HEAD(list); return 1; @@ -252,7 +253,7 @@ int sctp_ulpq_tail_event(struct sctp_ulpq *ulpq, struct sctp_ulpevent *event) * collected on a list. */ if (skb_list) - sctp_skb_list_tail(skb_list, queue); + skb_queue_splice_tail_init(skb_list, queue); else __skb_queue_tail(queue, skb); -- cgit v1.2.3 From 0970f5b3665933f5f0d069607c78fb10bd918b62 Mon Sep 17 00:00:00 2001 From: Marcelo Ricardo Leitner Date: Fri, 29 Apr 2016 14:17:08 -0300 Subject: sctp: signal sk_data_ready earlier on data chunks reception Dave Miller pointed out that fb586f25300f ("sctp: delay calls to sk_data_ready() as much as possible") may insert latency specially if the receiving application is running on another CPU and that it would be better if we signalled as early as possible. This patch thus basically inverts the logic on fb586f25300f and signals it as early as possible, similar to what we had before. Fixes: fb586f25300f ("sctp: delay calls to sk_data_ready() as much as possible") Reported-by: Dave Miller Signed-off-by: Marcelo Ricardo Leitner Signed-off-by: David S. Miller --- include/net/sctp/structs.h | 2 +- net/sctp/sm_sideeffect.c | 7 +++---- net/sctp/ulpqueue.c | 25 ++++++++++++++++--------- 3 files changed, 20 insertions(+), 14 deletions(-) (limited to 'net/sctp/ulpqueue.c') diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h index 558bae3cbe0d..16b013a6191c 100644 --- a/include/net/sctp/structs.h +++ b/include/net/sctp/structs.h @@ -218,7 +218,7 @@ struct sctp_sock { frag_interleave:1, recvrcvinfo:1, recvnxtinfo:1, - pending_data_ready:1; + data_ready_signalled:1; atomic_t pd_mode; /* Receive to here while partial delivery is in effect. */ diff --git a/net/sctp/sm_sideeffect.c b/net/sctp/sm_sideeffect.c index e8f0112f9b28..aa3712259368 100644 --- a/net/sctp/sm_sideeffect.c +++ b/net/sctp/sm_sideeffect.c @@ -1741,10 +1741,9 @@ out: } else if (local_cork) error = sctp_outq_uncork(&asoc->outqueue, gfp); - if (sp->pending_data_ready) { - sk->sk_data_ready(sk); - sp->pending_data_ready = 0; - } + if (sp->data_ready_signalled) + sp->data_ready_signalled = 0; + return error; nomem: error = -ENOMEM; diff --git a/net/sctp/ulpqueue.c b/net/sctp/ulpqueue.c index ec12a8920e5f..ec166d2bd2d9 100644 --- a/net/sctp/ulpqueue.c +++ b/net/sctp/ulpqueue.c @@ -194,6 +194,7 @@ static int sctp_ulpq_clear_pd(struct sctp_ulpq *ulpq) int sctp_ulpq_tail_event(struct sctp_ulpq *ulpq, struct sctp_ulpevent *event) { struct sock *sk = ulpq->asoc->base.sk; + struct sctp_sock *sp = sctp_sk(sk); struct sk_buff_head *queue, *skb_list; struct sk_buff *skb = sctp_event2skb(event); int clear_pd = 0; @@ -211,7 +212,7 @@ int sctp_ulpq_tail_event(struct sctp_ulpq *ulpq, struct sctp_ulpevent *event) sk_incoming_cpu_update(sk); } /* Check if the user wishes to receive this event. */ - if (!sctp_ulpevent_is_enabled(event, &sctp_sk(sk)->subscribe)) + if (!sctp_ulpevent_is_enabled(event, &sp->subscribe)) goto out_free; /* If we are in partial delivery mode, post to the lobby until @@ -219,7 +220,7 @@ int sctp_ulpq_tail_event(struct sctp_ulpq *ulpq, struct sctp_ulpevent *event) * the association the cause of the partial delivery. */ - if (atomic_read(&sctp_sk(sk)->pd_mode) == 0) { + if (atomic_read(&sp->pd_mode) == 0) { queue = &sk->sk_receive_queue; } else { if (ulpq->pd_mode) { @@ -231,7 +232,7 @@ int sctp_ulpq_tail_event(struct sctp_ulpq *ulpq, struct sctp_ulpevent *event) if ((event->msg_flags & MSG_NOTIFICATION) || (SCTP_DATA_NOT_FRAG == (event->msg_flags & SCTP_DATA_FRAG_MASK))) - queue = &sctp_sk(sk)->pd_lobby; + queue = &sp->pd_lobby; else { clear_pd = event->msg_flags & MSG_EOR; queue = &sk->sk_receive_queue; @@ -242,10 +243,10 @@ int sctp_ulpq_tail_event(struct sctp_ulpq *ulpq, struct sctp_ulpevent *event) * can queue this to the receive queue instead * of the lobby. */ - if (sctp_sk(sk)->frag_interleave) + if (sp->frag_interleave) queue = &sk->sk_receive_queue; else - queue = &sctp_sk(sk)->pd_lobby; + queue = &sp->pd_lobby; } } @@ -264,8 +265,10 @@ int sctp_ulpq_tail_event(struct sctp_ulpq *ulpq, struct sctp_ulpevent *event) if (clear_pd) sctp_ulpq_clear_pd(ulpq); - if (queue == &sk->sk_receive_queue) - sctp_sk(sk)->pending_data_ready = 1; + if (queue == &sk->sk_receive_queue && !sp->data_ready_signalled) { + sp->data_ready_signalled = 1; + sk->sk_data_ready(sk); + } return 1; out_free: @@ -1126,11 +1129,13 @@ void sctp_ulpq_abort_pd(struct sctp_ulpq *ulpq, gfp_t gfp) { struct sctp_ulpevent *ev = NULL; struct sock *sk; + struct sctp_sock *sp; if (!ulpq->pd_mode) return; sk = ulpq->asoc->base.sk; + sp = sctp_sk(sk); if (sctp_ulpevent_type_enabled(SCTP_PARTIAL_DELIVERY_EVENT, &sctp_sk(sk)->subscribe)) ev = sctp_ulpevent_make_pdapi(ulpq->asoc, @@ -1140,6 +1145,8 @@ void sctp_ulpq_abort_pd(struct sctp_ulpq *ulpq, gfp_t gfp) __skb_queue_tail(&sk->sk_receive_queue, sctp_event2skb(ev)); /* If there is data waiting, send it up the socket now. */ - if (sctp_ulpq_clear_pd(ulpq) || ev) - sctp_sk(sk)->pending_data_ready = 1; + if ((sctp_ulpq_clear_pd(ulpq) || ev) && !sp->data_ready_signalled) { + sp->data_ready_signalled = 1; + sk->sk_data_ready(sk); + } } -- cgit v1.2.3