From e3274026e2ec69eec6ab51bc499e14bb548548d0 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Mon, 11 Sep 2023 10:38:39 -0400 Subject: SUNRPC: move all of xprt handling into svc_xprt_handle() svc_xprt_handle() does lots of things itself, but leaves some to the caller - svc_recv(). This isn't elegant. Move that code out of svc_recv() into svc_xprt_handle() Move the calls to svc_xprt_release() from svc_send() and svc_drop() (the two possible final steps in svc_process()) and from svc_recv() (in the case where svc_process() wasn't called) into svc_xprt_handle(). Signed-off-by: NeilBrown Signed-off-by: Chuck Lever --- net/sunrpc/svc_xprt.c | 53 +++++++++++++++++++-------------------------------- 1 file changed, 20 insertions(+), 33 deletions(-) (limited to 'net') diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c index 4cfe9640df48..60759647fee4 100644 --- a/net/sunrpc/svc_xprt.c +++ b/net/sunrpc/svc_xprt.c @@ -785,7 +785,7 @@ static void svc_add_new_temp_xprt(struct svc_serv *serv, struct svc_xprt *newxpt svc_xprt_received(newxpt); } -static int svc_handle_xprt(struct svc_rqst *rqstp, struct svc_xprt *xprt) +static void svc_handle_xprt(struct svc_rqst *rqstp, struct svc_xprt *xprt) { struct svc_serv *serv = rqstp->rq_server; int len = 0; @@ -826,11 +826,26 @@ static int svc_handle_xprt(struct svc_rqst *rqstp, struct svc_xprt *xprt) len = xprt->xpt_ops->xpo_recvfrom(rqstp); rqstp->rq_reserved = serv->sv_max_mesg; atomic_add(rqstp->rq_reserved, &xprt->xpt_reserved); + if (len <= 0) + goto out; + + trace_svc_xdr_recvfrom(&rqstp->rq_arg); + + clear_bit(XPT_OLD, &xprt->xpt_flags); + + rqstp->rq_chandle.defer = svc_defer; + + if (serv->sv_stats) + serv->sv_stats->netcnt++; + percpu_counter_inc(&rqstp->rq_pool->sp_messages_arrived); + rqstp->rq_stime = ktime_get(); + svc_process(rqstp); } else svc_xprt_received(xprt); out: - return len; + rqstp->rq_res.len = 0; + svc_xprt_release(rqstp); } /** @@ -844,11 +859,9 @@ out: void svc_recv(struct svc_rqst *rqstp) { struct svc_xprt *xprt = NULL; - struct svc_serv *serv = rqstp->rq_server; - int len; if (!svc_alloc_arg(rqstp)) - goto out; + return; try_to_freeze(); cond_resched(); @@ -856,31 +869,9 @@ void svc_recv(struct svc_rqst *rqstp) goto out; xprt = svc_get_next_xprt(rqstp); - if (!xprt) - goto out; - - len = svc_handle_xprt(rqstp, xprt); - - /* No data, incomplete (TCP) read, or accept() */ - if (len <= 0) - goto out_release; - - trace_svc_xdr_recvfrom(&rqstp->rq_arg); - - clear_bit(XPT_OLD, &xprt->xpt_flags); - - rqstp->rq_chandle.defer = svc_defer; - - if (serv->sv_stats) - serv->sv_stats->netcnt++; - percpu_counter_inc(&rqstp->rq_pool->sp_messages_arrived); - rqstp->rq_stime = ktime_get(); - svc_process(rqstp); + if (xprt) + svc_handle_xprt(rqstp, xprt); out: - return; -out_release: - rqstp->rq_res.len = 0; - svc_xprt_release(rqstp); } EXPORT_SYMBOL_GPL(svc_recv); @@ -890,7 +881,6 @@ EXPORT_SYMBOL_GPL(svc_recv); void svc_drop(struct svc_rqst *rqstp) { trace_svc_drop(rqstp); - svc_xprt_release(rqstp); } EXPORT_SYMBOL_GPL(svc_drop); @@ -906,8 +896,6 @@ void svc_send(struct svc_rqst *rqstp) int status; xprt = rqstp->rq_xprt; - if (!xprt) - return; /* calculate over-all length */ xb = &rqstp->rq_res; @@ -920,7 +908,6 @@ void svc_send(struct svc_rqst *rqstp) status = xprt->xpt_ops->xpo_sendto(rqstp); trace_svc_send(rqstp, status); - svc_xprt_release(rqstp); } /* -- cgit v1.2.3 From 7b31f4daebad296e3164602b8303c265ec4ac7dc Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Mon, 11 Sep 2023 10:38:45 -0400 Subject: SUNRPC: rename and refactor svc_get_next_xprt() svc_get_next_xprt() does a lot more than just get an xprt. It also decides if it needs to sleep, depending not only on the availability of xprts but also on the need to exit or handle external work. So rename it to svc_rqst_wait_for_work() and only do the testing and waiting. Move all the waiting-related code out of svc_recv() into the new svc_rqst_wait_for_work(). Move the dequeueing code out of svc_get_next_xprt() into svc_recv(). Previously svc_xprt_dequeue() would be called twice, once before waiting and possibly once after. Now instead rqst_should_sleep() is called twice. Once to decide if waiting is needed, and once to check against after setting the task state do see if we might have missed a wakeup. Signed-off-by: NeilBrown Signed-off-by: Chuck Lever --- net/sunrpc/svc_xprt.c | 92 ++++++++++++++++++++++++--------------------------- 1 file changed, 44 insertions(+), 48 deletions(-) (limited to 'net') diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c index 60759647fee4..835160da3ad4 100644 --- a/net/sunrpc/svc_xprt.c +++ b/net/sunrpc/svc_xprt.c @@ -722,51 +722,34 @@ rqst_should_sleep(struct svc_rqst *rqstp) return true; } -static struct svc_xprt *svc_get_next_xprt(struct svc_rqst *rqstp) +static void svc_rqst_wait_for_work(struct svc_rqst *rqstp) { - struct svc_pool *pool = rqstp->rq_pool; - - /* rq_xprt should be clear on entry */ - WARN_ON_ONCE(rqstp->rq_xprt); + struct svc_pool *pool = rqstp->rq_pool; - rqstp->rq_xprt = svc_xprt_dequeue(pool); - if (rqstp->rq_xprt) - goto out_found; - - set_current_state(TASK_IDLE); - smp_mb__before_atomic(); - clear_bit(SP_CONGESTED, &pool->sp_flags); - clear_bit(RQ_BUSY, &rqstp->rq_flags); - smp_mb__after_atomic(); - - if (likely(rqst_should_sleep(rqstp))) - schedule(); - else - __set_current_state(TASK_RUNNING); + if (rqst_should_sleep(rqstp)) { + set_current_state(TASK_IDLE); + smp_mb__before_atomic(); + clear_bit(SP_CONGESTED, &pool->sp_flags); + clear_bit(RQ_BUSY, &rqstp->rq_flags); + smp_mb__after_atomic(); + + /* Need to check should_sleep() again after + * setting task state in case a wakeup happened + * between testing and setting. + */ + if (rqst_should_sleep(rqstp)) { + schedule(); + } else { + __set_current_state(TASK_RUNNING); + cond_resched(); + } + set_bit(RQ_BUSY, &rqstp->rq_flags); + smp_mb__after_atomic(); + } else { + cond_resched(); + } try_to_freeze(); - - set_bit(RQ_BUSY, &rqstp->rq_flags); - smp_mb__after_atomic(); - clear_bit(SP_TASK_PENDING, &pool->sp_flags); - rqstp->rq_xprt = svc_xprt_dequeue(pool); - if (rqstp->rq_xprt) - goto out_found; - - if (kthread_should_stop()) - return NULL; - return NULL; -out_found: - clear_bit(SP_TASK_PENDING, &pool->sp_flags); - /* Normally we will wait up to 5 seconds for any required - * cache information to be provided. - */ - if (!test_bit(SP_CONGESTED, &pool->sp_flags)) - rqstp->rq_chandle.thread_wait = 5*HZ; - else - rqstp->rq_chandle.thread_wait = 1*HZ; - trace_svc_xprt_dequeue(rqstp); - return rqstp->rq_xprt; } static void svc_add_new_temp_xprt(struct svc_serv *serv, struct svc_xprt *newxpt) @@ -858,20 +841,33 @@ out: */ void svc_recv(struct svc_rqst *rqstp) { - struct svc_xprt *xprt = NULL; + struct svc_pool *pool = rqstp->rq_pool; if (!svc_alloc_arg(rqstp)) return; - try_to_freeze(); - cond_resched(); + svc_rqst_wait_for_work(rqstp); + + clear_bit(SP_TASK_PENDING, &pool->sp_flags); + if (kthread_should_stop()) - goto out; + return; + + rqstp->rq_xprt = svc_xprt_dequeue(pool); + if (rqstp->rq_xprt) { + struct svc_xprt *xprt = rqstp->rq_xprt; + + /* Normally we will wait up to 5 seconds for any required + * cache information to be provided. + */ + if (!test_bit(SP_CONGESTED, &pool->sp_flags)) + rqstp->rq_chandle.thread_wait = 5 * HZ; + else + rqstp->rq_chandle.thread_wait = 1 * HZ; - xprt = svc_get_next_xprt(rqstp); - if (xprt) + trace_svc_xprt_dequeue(rqstp); svc_handle_xprt(rqstp, xprt); -out: + } } EXPORT_SYMBOL_GPL(svc_recv); -- cgit v1.2.3 From 6ed8cdf967f7e9fc96cd1c129719ef99db2f9afc Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Mon, 11 Sep 2023 10:38:51 -0400 Subject: SUNRPC: Clean up bc_svc_process() The test robot complained that, in some build configurations, the @error variable in bc_svc_process's only caller is set but never used. This happens because dprintk() is the only consumer of that value. - Remove the dprintk() call sites in favor of the svc_process tracepoint - The @error variable and the return value of bc_svc_process() are now unused, so get rid of them. - The @serv parameter is set to rqstp->rq_serv by the only caller, and bc_svc_process() then uses it only to set rqstp->rq_serv. It can be removed. - Rename bc_svc_process() according to the convention that globally-visible RPC server functions have names that begin with "svc_"; and because it is globally-visible, give it a proper kdoc comment. Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202308121314.HA8Rq2XG-lkp@intel.com/ Signed-off-by: Chuck Lever --- fs/nfs/callback.c | 6 +----- include/linux/sunrpc/svc.h | 3 +-- net/sunrpc/svc.c | 38 ++++++++++++-------------------------- 3 files changed, 14 insertions(+), 33 deletions(-) (limited to 'net') diff --git a/fs/nfs/callback.c b/fs/nfs/callback.c index 466ebf1d41b2..272e6d2bb478 100644 --- a/fs/nfs/callback.c +++ b/fs/nfs/callback.c @@ -95,7 +95,6 @@ nfs41_callback_svc(void *vrqstp) struct svc_rqst *rqstp = vrqstp; struct svc_serv *serv = rqstp->rq_server; struct rpc_rqst *req; - int error; DEFINE_WAIT(wq); set_freezable(); @@ -109,10 +108,7 @@ nfs41_callback_svc(void *vrqstp) list_del(&req->rq_bc_list); spin_unlock_bh(&serv->sv_cb_lock); finish_wait(&serv->sv_cb_waitq, &wq); - dprintk("Invoking bc_svc_process()\n"); - error = bc_svc_process(serv, req, rqstp); - dprintk("bc_svc_process() returned w/ error code= %d\n", - error); + svc_process_bc(req, rqstp); } else { spin_unlock_bh(&serv->sv_cb_lock); if (!kthread_should_stop()) diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h index dbf5b21feafe..0cdca5960171 100644 --- a/include/linux/sunrpc/svc.h +++ b/include/linux/sunrpc/svc.h @@ -413,8 +413,7 @@ struct svc_serv * svc_create_pooled(struct svc_program *, unsigned int, int svc_set_num_threads(struct svc_serv *, struct svc_pool *, int); int svc_pool_stats_open(struct svc_serv *serv, struct file *file); void svc_process(struct svc_rqst *rqstp); -int bc_svc_process(struct svc_serv *, struct rpc_rqst *, - struct svc_rqst *); +void svc_process_bc(struct rpc_rqst *req, struct svc_rqst *rqstp); int svc_register(const struct svc_serv *, struct net *, const int, const unsigned short, const unsigned short); diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c index 812fda9d45dd..a3d031deb1ec 100644 --- a/net/sunrpc/svc.c +++ b/net/sunrpc/svc.c @@ -1544,24 +1544,20 @@ out_drop: } #if defined(CONFIG_SUNRPC_BACKCHANNEL) -/* - * Process a backchannel RPC request that arrived over an existing - * outbound connection +/** + * svc_process_bc - process a reverse-direction RPC request + * @req: RPC request to be used for client-side processing + * @rqstp: server-side execution context + * */ -int -bc_svc_process(struct svc_serv *serv, struct rpc_rqst *req, - struct svc_rqst *rqstp) +void svc_process_bc(struct rpc_rqst *req, struct svc_rqst *rqstp) { struct rpc_task *task; int proc_error; - int error; - - dprintk("svc: %s(%p)\n", __func__, req); /* Build the svc_rqst used by the common processing routine */ rqstp->rq_xid = req->rq_xid; rqstp->rq_prot = req->rq_xprt->prot; - rqstp->rq_server = serv; rqstp->rq_bc_net = req->rq_xprt->xprt_net; rqstp->rq_addrlen = sizeof(req->rq_xprt->addr); @@ -1590,10 +1586,8 @@ bc_svc_process(struct svc_serv *serv, struct rpc_rqst *req, * been processed by the caller. */ svcxdr_init_decode(rqstp); - if (!xdr_inline_decode(&rqstp->rq_arg_stream, XDR_UNIT * 2)) { - error = -EINVAL; - goto out; - } + if (!xdr_inline_decode(&rqstp->rq_arg_stream, XDR_UNIT * 2)) + return; /* Parse and execute the bc call */ proc_error = svc_process_common(rqstp); @@ -1602,26 +1596,18 @@ bc_svc_process(struct svc_serv *serv, struct rpc_rqst *req, if (!proc_error) { /* Processing error: drop the request */ xprt_free_bc_request(req); - error = -EINVAL; - goto out; + return; } /* Finally, send the reply synchronously */ memcpy(&req->rq_snd_buf, &rqstp->rq_res, sizeof(req->rq_snd_buf)); task = rpc_run_bc_task(req); - if (IS_ERR(task)) { - error = PTR_ERR(task); - goto out; - } + if (IS_ERR(task)) + return; WARN_ON_ONCE(atomic_read(&task->tk_count) != 1); - error = task->tk_status; rpc_put_task(task); - -out: - dprintk("svc: %s(), error=%d\n", __func__, error); - return error; } -EXPORT_SYMBOL_GPL(bc_svc_process); +EXPORT_SYMBOL_GPL(svc_process_bc); #endif /* CONFIG_SUNRPC_BACKCHANNEL */ /** -- cgit v1.2.3 From 063ab935a48b3a2854f433957adbb2bde396ed22 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Mon, 11 Sep 2023 10:38:58 -0400 Subject: SUNRPC: integrate back-channel processing with svc_recv() Using svc_recv() for (NFSv4.1) back-channel handling means we have just one mechanism for waking threads. Also change kthread_freezable_should_stop() in nfs4_callback_svc() to kthread_should_stop() as used elsewhere. kthread_freezable_should_stop() effectively adds a try_to_freeze() call, and svc_recv() already contains that at an appropriate place. Signed-off-by: NeilBrown Cc: Trond Myklebust Cc: Anna Schumaker Signed-off-by: Chuck Lever --- fs/nfs/callback.c | 42 ++------------------------------------- include/linux/sunrpc/svc.h | 2 -- net/sunrpc/backchannel_rqst.c | 8 +++----- net/sunrpc/svc.c | 2 +- net/sunrpc/svc_xprt.c | 27 +++++++++++++++++++++++++ net/sunrpc/xprtrdma/backchannel.c | 2 +- 6 files changed, 34 insertions(+), 49 deletions(-) (limited to 'net') diff --git a/fs/nfs/callback.c b/fs/nfs/callback.c index 272e6d2bb478..42a0c2f1e785 100644 --- a/fs/nfs/callback.c +++ b/fs/nfs/callback.c @@ -78,7 +78,7 @@ nfs4_callback_svc(void *vrqstp) set_freezable(); - while (!kthread_freezable_should_stop(NULL)) + while (!kthread_should_stop()) svc_recv(rqstp); svc_exit_thread(rqstp); @@ -86,41 +86,6 @@ nfs4_callback_svc(void *vrqstp) } #if defined(CONFIG_NFS_V4_1) -/* - * The callback service for NFSv4.1 callbacks - */ -static int -nfs41_callback_svc(void *vrqstp) -{ - struct svc_rqst *rqstp = vrqstp; - struct svc_serv *serv = rqstp->rq_server; - struct rpc_rqst *req; - DEFINE_WAIT(wq); - - set_freezable(); - - while (!kthread_freezable_should_stop(NULL)) { - prepare_to_wait(&serv->sv_cb_waitq, &wq, TASK_IDLE); - spin_lock_bh(&serv->sv_cb_lock); - if (!list_empty(&serv->sv_cb_list)) { - req = list_first_entry(&serv->sv_cb_list, - struct rpc_rqst, rq_bc_list); - list_del(&req->rq_bc_list); - spin_unlock_bh(&serv->sv_cb_lock); - finish_wait(&serv->sv_cb_waitq, &wq); - svc_process_bc(req, rqstp); - } else { - spin_unlock_bh(&serv->sv_cb_lock); - if (!kthread_should_stop()) - schedule(); - finish_wait(&serv->sv_cb_waitq, &wq); - } - } - - svc_exit_thread(rqstp); - return 0; -} - static inline void nfs_callback_bc_serv(u32 minorversion, struct rpc_xprt *xprt, struct svc_serv *serv) { @@ -233,10 +198,7 @@ static struct svc_serv *nfs_callback_create_svc(int minorversion) cb_info->users); threadfn = nfs4_callback_svc; -#if defined(CONFIG_NFS_V4_1) - if (minorversion) - threadfn = nfs41_callback_svc; -#else +#if !defined(CONFIG_NFS_V4_1) if (minorversion) return ERR_PTR(-ENOTSUPP); #endif diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h index 0cdca5960171..acbe1314febd 100644 --- a/include/linux/sunrpc/svc.h +++ b/include/linux/sunrpc/svc.h @@ -92,8 +92,6 @@ struct svc_serv { * that arrive over the same * connection */ spinlock_t sv_cb_lock; /* protects the svc_cb_list */ - wait_queue_head_t sv_cb_waitq; /* sleep here if there are no - * entries in the svc_cb_list */ bool sv_bc_enabled; /* service uses backchannel */ #endif /* CONFIG_SUNRPC_BACKCHANNEL */ }; diff --git a/net/sunrpc/backchannel_rqst.c b/net/sunrpc/backchannel_rqst.c index 65a6c6429a53..44b7c89a635f 100644 --- a/net/sunrpc/backchannel_rqst.c +++ b/net/sunrpc/backchannel_rqst.c @@ -349,10 +349,8 @@ found: } /* - * Add callback request to callback list. The callback - * service sleeps on the sv_cb_waitq waiting for new - * requests. Wake it up after adding enqueing the - * request. + * Add callback request to callback list. Wake a thread + * on the first pool (usually the only pool) to handle it. */ void xprt_complete_bc_request(struct rpc_rqst *req, uint32_t copied) { @@ -371,6 +369,6 @@ void xprt_complete_bc_request(struct rpc_rqst *req, uint32_t copied) xprt_get(xprt); spin_lock(&bc_serv->sv_cb_lock); list_add(&req->rq_bc_list, &bc_serv->sv_cb_list); - wake_up(&bc_serv->sv_cb_waitq); spin_unlock(&bc_serv->sv_cb_lock); + svc_pool_wake_idle_thread(&bc_serv->sv_pools[0]); } diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c index a3d031deb1ec..b98a159eb17f 100644 --- a/net/sunrpc/svc.c +++ b/net/sunrpc/svc.c @@ -440,7 +440,6 @@ __svc_init_bc(struct svc_serv *serv) { INIT_LIST_HEAD(&serv->sv_cb_list); spin_lock_init(&serv->sv_cb_lock); - init_waitqueue_head(&serv->sv_cb_waitq); } #else static void @@ -718,6 +717,7 @@ void svc_pool_wake_idle_thread(struct svc_pool *pool) set_bit(SP_CONGESTED, &pool->sp_flags); } +EXPORT_SYMBOL_GPL(svc_pool_wake_idle_thread); static struct svc_pool * svc_pool_next(struct svc_serv *serv, struct svc_pool *pool, unsigned int *state) diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c index 835160da3ad4..b057f1cbe7a1 100644 --- a/net/sunrpc/svc_xprt.c +++ b/net/sunrpc/svc_xprt.c @@ -17,6 +17,7 @@ #include #include #include +#include #include #include #include @@ -719,6 +720,13 @@ rqst_should_sleep(struct svc_rqst *rqstp) if (freezing(current)) return false; +#if defined(CONFIG_SUNRPC_BACKCHANNEL) + if (svc_is_backchannel(rqstp)) { + if (!list_empty(&rqstp->rq_server->sv_cb_list)) + return false; + } +#endif + return true; } @@ -868,6 +876,25 @@ void svc_recv(struct svc_rqst *rqstp) trace_svc_xprt_dequeue(rqstp); svc_handle_xprt(rqstp, xprt); } + +#if defined(CONFIG_SUNRPC_BACKCHANNEL) + if (svc_is_backchannel(rqstp)) { + struct svc_serv *serv = rqstp->rq_server; + struct rpc_rqst *req; + + spin_lock_bh(&serv->sv_cb_lock); + req = list_first_entry_or_null(&serv->sv_cb_list, + struct rpc_rqst, rq_bc_list); + if (req) { + list_del(&req->rq_bc_list); + spin_unlock_bh(&serv->sv_cb_lock); + + svc_process_bc(req, rqstp); + return; + } + spin_unlock_bh(&serv->sv_cb_lock); + } +#endif } EXPORT_SYMBOL_GPL(svc_recv); diff --git a/net/sunrpc/xprtrdma/backchannel.c b/net/sunrpc/xprtrdma/backchannel.c index e4d84a13c566..bfc434ec52a7 100644 --- a/net/sunrpc/xprtrdma/backchannel.c +++ b/net/sunrpc/xprtrdma/backchannel.c @@ -267,7 +267,7 @@ void rpcrdma_bc_receive_call(struct rpcrdma_xprt *r_xprt, list_add(&rqst->rq_bc_list, &bc_serv->sv_cb_list); spin_unlock(&bc_serv->sv_cb_lock); - wake_up(&bc_serv->sv_cb_waitq); + svc_pool_wake_idle_thread(&bc_serv->sv_pools[0]); r_xprt->rx_stats.bcall_count++; return; -- cgit v1.2.3 From fa341560ca7458f4396d5a0771cb5f2358d8535d Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Mon, 11 Sep 2023 10:39:04 -0400 Subject: SUNRPC: change how svc threads are asked to exit. svc threads are currently stopped using kthread_stop(). This requires identifying a specific thread. However we don't care which thread stops, just as long as one does. So instead, set a flag in the svc_pool to say that a thread needs to die, and have each thread check this flag instead of calling kthread_should_stop(). The first thread to find and clear this flag then moves towards exiting. This removes an explicit dependency on sp_all_threads which will make a future patch simpler. Signed-off-by: NeilBrown Signed-off-by: Chuck Lever --- fs/lockd/svc.c | 5 ++--- fs/lockd/svclock.c | 5 ++--- fs/nfs/callback.c | 2 +- fs/nfsd/nfs4proc.c | 8 +++++--- fs/nfsd/nfssvc.c | 2 +- include/linux/lockd/lockd.h | 2 +- include/linux/sunrpc/svc.h | 26 +++++++++++++++++++++++++- net/sunrpc/svc.c | 43 +++++++++++++++++++++---------------------- net/sunrpc/svc_xprt.c | 7 +++---- 9 files changed, 61 insertions(+), 39 deletions(-) (limited to 'net') diff --git a/fs/lockd/svc.c b/fs/lockd/svc.c index 365cc7adff66..81be07c1d3d1 100644 --- a/fs/lockd/svc.c +++ b/fs/lockd/svc.c @@ -24,7 +24,6 @@ #include #include #include -#include #include #include @@ -135,11 +134,11 @@ lockd(void *vrqstp) * The main request loop. We don't terminate until the last * NFS mount or NFS daemon has gone away. */ - while (!kthread_should_stop()) { + while (!svc_thread_should_stop(rqstp)) { /* update sv_maxconn if it has changed */ rqstp->rq_server->sv_maxconn = nlm_max_connections; - nlmsvc_retry_blocked(); + nlmsvc_retry_blocked(rqstp); svc_recv(rqstp); } if (nlmsvc_ops) diff --git a/fs/lockd/svclock.c b/fs/lockd/svclock.c index 993999297e31..2dc10900ad1c 100644 --- a/fs/lockd/svclock.c +++ b/fs/lockd/svclock.c @@ -30,7 +30,6 @@ #include #include #include -#include #include #define NLMDBG_FACILITY NLMDBG_SVCLOCK @@ -1032,13 +1031,13 @@ retry_deferred_block(struct nlm_block *block) * be retransmitted. */ void -nlmsvc_retry_blocked(void) +nlmsvc_retry_blocked(struct svc_rqst *rqstp) { unsigned long timeout = MAX_SCHEDULE_TIMEOUT; struct nlm_block *block; spin_lock(&nlm_blocked_lock); - while (!list_empty(&nlm_blocked) && !kthread_should_stop()) { + while (!list_empty(&nlm_blocked) && !svc_thread_should_stop(rqstp)) { block = list_entry(nlm_blocked.next, struct nlm_block, b_list); if (block->b_when == NLM_NEVER) diff --git a/fs/nfs/callback.c b/fs/nfs/callback.c index 42a0c2f1e785..4ffa1f469e90 100644 --- a/fs/nfs/callback.c +++ b/fs/nfs/callback.c @@ -78,7 +78,7 @@ nfs4_callback_svc(void *vrqstp) set_freezable(); - while (!kthread_should_stop()) + while (!svc_thread_should_stop(rqstp)) svc_recv(rqstp); svc_exit_thread(rqstp); diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c index 580c5f592408..d7e88c7beba3 100644 --- a/fs/nfsd/nfs4proc.c +++ b/fs/nfsd/nfs4proc.c @@ -1329,7 +1329,8 @@ extern void nfs_sb_deactive(struct super_block *sb); * setup a work entry in the ssc delayed unmount list. */ static __be32 nfsd4_ssc_setup_dul(struct nfsd_net *nn, char *ipaddr, - struct nfsd4_ssc_umount_item **nsui) + struct nfsd4_ssc_umount_item **nsui, + struct svc_rqst *rqstp) { struct nfsd4_ssc_umount_item *ni = NULL; struct nfsd4_ssc_umount_item *work = NULL; @@ -1351,7 +1352,7 @@ try_again: spin_unlock(&nn->nfsd_ssc_lock); /* allow 20secs for mount/unmount for now - revisit */ - if (kthread_should_stop() || + if (svc_thread_should_stop(rqstp) || (schedule_timeout(20*HZ) == 0)) { finish_wait(&nn->nfsd_ssc_waitq, &wait); kfree(work); @@ -1467,7 +1468,7 @@ nfsd4_interssc_connect(struct nl4_server *nss, struct svc_rqst *rqstp, goto out_free_rawdata; snprintf(dev_name, len + 5, "%s%s%s:/", startsep, ipaddr, endsep); - status = nfsd4_ssc_setup_dul(nn, ipaddr, nsui); + status = nfsd4_ssc_setup_dul(nn, ipaddr, nsui, rqstp); if (status) goto out_free_devname; if ((*nsui)->nsui_vfsmount) @@ -1642,6 +1643,7 @@ static ssize_t _nfsd_copy_file_range(struct nfsd4_copy *copy, if (bytes_total == 0) bytes_total = ULLONG_MAX; do { + /* Only async copies can be stopped here */ if (kthread_should_stop()) break; bytes_copied = nfsd_copy_file_range(src, src_pos, dst, dst_pos, diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c index c7af1095f6b5..0b03a2e50dee 100644 --- a/fs/nfsd/nfssvc.c +++ b/fs/nfsd/nfssvc.c @@ -957,7 +957,7 @@ nfsd(void *vrqstp) /* * The main request loop */ - while (!kthread_should_stop()) { + while (!svc_thread_should_stop(rqstp)) { /* Update sv_maxconn if it has changed */ rqstp->rq_server->sv_maxconn = nn->max_connections; diff --git a/include/linux/lockd/lockd.h b/include/linux/lockd/lockd.h index 0f016d69c996..9f565416d186 100644 --- a/include/linux/lockd/lockd.h +++ b/include/linux/lockd/lockd.h @@ -282,7 +282,7 @@ __be32 nlmsvc_testlock(struct svc_rqst *, struct nlm_file *, struct nlm_host *, struct nlm_lock *, struct nlm_lock *, struct nlm_cookie *); __be32 nlmsvc_cancel_blocked(struct net *net, struct nlm_file *, struct nlm_lock *); -void nlmsvc_retry_blocked(void); +void nlmsvc_retry_blocked(struct svc_rqst *rqstp); void nlmsvc_traverse_blocks(struct nlm_host *, struct nlm_file *, nlm_host_match_fn_t match); void nlmsvc_grant_reply(struct nlm_cookie *, __be32); diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h index acbe1314febd..0ec691070e27 100644 --- a/include/linux/sunrpc/svc.h +++ b/include/linux/sunrpc/svc.h @@ -50,6 +50,8 @@ struct svc_pool { enum { SP_TASK_PENDING, /* still work to do even if no xprt is queued */ SP_CONGESTED, /* all threads are busy, none idle */ + SP_NEED_VICTIM, /* One thread needs to agree to exit */ + SP_VICTIM_REMAINS, /* One thread needs to actually exit */ }; @@ -259,7 +261,7 @@ enum { RQ_DROPME, /* drop current reply */ RQ_SPLICE_OK, /* turned off in gss privacy to prevent * encrypting page cache pages */ - RQ_VICTIM, /* about to be shut down */ + RQ_VICTIM, /* Have agreed to shut down */ RQ_BUSY, /* request is busy */ RQ_DATA, /* request has data */ }; @@ -299,6 +301,28 @@ static inline struct sockaddr *svc_daddr(const struct svc_rqst *rqst) return (struct sockaddr *) &rqst->rq_daddr; } +/** + * svc_thread_should_stop - check if this thread should stop + * @rqstp: the thread that might need to stop + * + * To stop an svc thread, the pool flags SP_NEED_VICTIM and SP_VICTIM_REMAINS + * are set. The first thread which sees SP_NEED_VICTIM clears it, becoming + * the victim using this function. It should then promptly call + * svc_exit_thread() to complete the process, clearing SP_VICTIM_REMAINS + * so the task waiting for a thread to exit can wake and continue. + * + * Return values: + * %true: caller should invoke svc_exit_thread() + * %false: caller should do nothing + */ +static inline bool svc_thread_should_stop(struct svc_rqst *rqstp) +{ + if (test_and_clear_bit(SP_NEED_VICTIM, &rqstp->rq_pool->sp_flags)) + set_bit(RQ_VICTIM, &rqstp->rq_flags); + + return test_bit(RQ_VICTIM, &rqstp->rq_flags); +} + struct svc_deferred_req { u32 prot; /* protocol (UDP or TCP) */ struct svc_xprt *xprt; diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c index b98a159eb17f..db579bbc0a0a 100644 --- a/net/sunrpc/svc.c +++ b/net/sunrpc/svc.c @@ -725,19 +725,22 @@ svc_pool_next(struct svc_serv *serv, struct svc_pool *pool, unsigned int *state) return pool ? pool : &serv->sv_pools[(*state)++ % serv->sv_nrpools]; } -static struct task_struct * +static struct svc_pool * svc_pool_victim(struct svc_serv *serv, struct svc_pool *pool, unsigned int *state) { unsigned int i; - struct task_struct *task = NULL; if (pool != NULL) { spin_lock_bh(&pool->sp_lock); + if (pool->sp_nrthreads) + goto found_pool; + spin_unlock_bh(&pool->sp_lock); + return NULL; } else { for (i = 0; i < serv->sv_nrpools; i++) { pool = &serv->sv_pools[--(*state) % serv->sv_nrpools]; spin_lock_bh(&pool->sp_lock); - if (!list_empty(&pool->sp_all_threads)) + if (pool->sp_nrthreads) goto found_pool; spin_unlock_bh(&pool->sp_lock); } @@ -745,16 +748,10 @@ svc_pool_victim(struct svc_serv *serv, struct svc_pool *pool, unsigned int *stat } found_pool: - if (!list_empty(&pool->sp_all_threads)) { - struct svc_rqst *rqstp; - - rqstp = list_entry(pool->sp_all_threads.next, struct svc_rqst, rq_all); - set_bit(RQ_VICTIM, &rqstp->rq_flags); - list_del_rcu(&rqstp->rq_all); - task = rqstp->rq_task; - } + set_bit(SP_VICTIM_REMAINS, &pool->sp_flags); + set_bit(SP_NEED_VICTIM, &pool->sp_flags); spin_unlock_bh(&pool->sp_lock); - return task; + return pool; } static int @@ -795,18 +792,16 @@ svc_start_kthreads(struct svc_serv *serv, struct svc_pool *pool, int nrservs) static int svc_stop_kthreads(struct svc_serv *serv, struct svc_pool *pool, int nrservs) { - struct svc_rqst *rqstp; - struct task_struct *task; unsigned int state = serv->sv_nrthreads-1; + struct svc_pool *victim; do { - task = svc_pool_victim(serv, pool, &state); - if (task == NULL) + victim = svc_pool_victim(serv, pool, &state); + if (!victim) break; - rqstp = kthread_data(task); - /* Did we lose a race to svo_function threadfn? */ - if (kthread_stop(task) == -EINTR) - svc_exit_thread(rqstp); + svc_pool_wake_idle_thread(victim); + wait_on_bit(&victim->sp_flags, SP_VICTIM_REMAINS, + TASK_IDLE); nrservs++; } while (nrservs < 0); return 0; @@ -926,8 +921,7 @@ svc_exit_thread(struct svc_rqst *rqstp) spin_lock_bh(&pool->sp_lock); pool->sp_nrthreads--; - if (!test_and_set_bit(RQ_VICTIM, &rqstp->rq_flags)) - list_del_rcu(&rqstp->rq_all); + list_del_rcu(&rqstp->rq_all); spin_unlock_bh(&pool->sp_lock); spin_lock_bh(&serv->sv_lock); @@ -938,6 +932,11 @@ svc_exit_thread(struct svc_rqst *rqstp) svc_rqst_free(rqstp); svc_put(serv); + /* That svc_put() cannot be the last, because the thread + * waiting for SP_VICTIM_REMAINS to clear must hold + * a reference. So it is still safe to access pool. + */ + clear_and_wake_up_bit(SP_VICTIM_REMAINS, &pool->sp_flags); } EXPORT_SYMBOL_GPL(svc_exit_thread); diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c index b057f1cbe7a1..b8539545fefd 100644 --- a/net/sunrpc/svc_xprt.c +++ b/net/sunrpc/svc_xprt.c @@ -9,7 +9,6 @@ #include #include #include -#include #include #include #include @@ -675,7 +674,7 @@ static bool svc_alloc_arg(struct svc_rqst *rqstp) continue; set_current_state(TASK_IDLE); - if (kthread_should_stop()) { + if (svc_thread_should_stop(rqstp)) { set_current_state(TASK_RUNNING); return false; } @@ -713,7 +712,7 @@ rqst_should_sleep(struct svc_rqst *rqstp) return false; /* are we shutting down? */ - if (kthread_should_stop()) + if (svc_thread_should_stop(rqstp)) return false; /* are we freezing? */ @@ -858,7 +857,7 @@ void svc_recv(struct svc_rqst *rqstp) clear_bit(SP_TASK_PENDING, &pool->sp_flags); - if (kthread_should_stop()) + if (svc_thread_should_stop(rqstp)) return; rqstp->rq_xprt = svc_xprt_dequeue(pool); -- cgit v1.2.3 From 5ff817b23534dd3942f881ab01dd5050505517aa Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Mon, 11 Sep 2023 10:39:11 -0400 Subject: SUNRPC: add list of idle threads Rather than searching a list of threads to find an idle one, having a list of idle threads allows an idle thread to be found immediately. This adds some spin_lock calls which is not ideal, but as the hold-time is tiny it is still faster than searching a list. A future patch will remove them using llist.h. This involves some subtlety and so is left to a separate patch. This removes the need for the RQ_BUSY flag. The rqst is "busy" precisely when it is not on the "idle" list. Signed-off-by: NeilBrown Signed-off-by: Chuck Lever --- include/linux/sunrpc/svc.h | 25 ++++++++++++++++++++++++- include/trace/events/sunrpc.h | 1 - net/sunrpc/svc.c | 14 +++++++++----- net/sunrpc/svc_xprt.c | 15 +++++++++++---- 4 files changed, 44 insertions(+), 11 deletions(-) (limited to 'net') diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h index 0ec691070e27..e9c34e99bc88 100644 --- a/include/linux/sunrpc/svc.h +++ b/include/linux/sunrpc/svc.h @@ -37,6 +37,7 @@ struct svc_pool { struct list_head sp_sockets; /* pending sockets */ unsigned int sp_nrthreads; /* # of threads in pool */ struct list_head sp_all_threads; /* all server threads */ + struct list_head sp_idle_threads; /* idle server threads */ /* statistics on pool operation */ struct percpu_counter sp_messages_arrived; @@ -186,6 +187,7 @@ extern u32 svc_max_payload(const struct svc_rqst *rqstp); */ struct svc_rqst { struct list_head rq_all; /* all threads list */ + struct list_head rq_idle; /* On the idle list */ struct rcu_head rq_rcu_head; /* for RCU deferred kfree */ struct svc_xprt * rq_xprt; /* transport ptr */ @@ -262,10 +264,31 @@ enum { RQ_SPLICE_OK, /* turned off in gss privacy to prevent * encrypting page cache pages */ RQ_VICTIM, /* Have agreed to shut down */ - RQ_BUSY, /* request is busy */ RQ_DATA, /* request has data */ }; +/** + * svc_thread_set_busy - mark a thread as busy + * @rqstp: the thread which is now busy + * + * If rq_idle is "empty", the thread must be busy. + */ +static inline void svc_thread_set_busy(struct svc_rqst *rqstp) +{ + INIT_LIST_HEAD(&rqstp->rq_idle); +} + +/** + * svc_thread_busy - check if a thread as busy + * @rqstp: the thread which might be busy + * + * If rq_idle is "empty", the thread must be busy. + */ +static inline bool svc_thread_busy(struct svc_rqst *rqstp) +{ + return list_empty(&rqstp->rq_idle); +} + #define SVC_NET(rqst) (rqst->rq_xprt ? rqst->rq_xprt->xpt_net : rqst->rq_bc_net) /* diff --git a/include/trace/events/sunrpc.h b/include/trace/events/sunrpc.h index 6beb38c1dcb5..337c90787fb1 100644 --- a/include/trace/events/sunrpc.h +++ b/include/trace/events/sunrpc.h @@ -1677,7 +1677,6 @@ DEFINE_SVCXDRBUF_EVENT(sendto); svc_rqst_flag(DROPME) \ svc_rqst_flag(SPLICE_OK) \ svc_rqst_flag(VICTIM) \ - svc_rqst_flag(BUSY) \ svc_rqst_flag_end(DATA) #undef svc_rqst_flag diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c index db579bbc0a0a..9d080fe2dcdf 100644 --- a/net/sunrpc/svc.c +++ b/net/sunrpc/svc.c @@ -510,6 +510,7 @@ __svc_create(struct svc_program *prog, unsigned int bufsize, int npools, pool->sp_id = i; INIT_LIST_HEAD(&pool->sp_sockets); INIT_LIST_HEAD(&pool->sp_all_threads); + INIT_LIST_HEAD(&pool->sp_idle_threads); spin_lock_init(&pool->sp_lock); percpu_counter_init(&pool->sp_messages_arrived, 0, GFP_KERNEL); @@ -641,7 +642,7 @@ svc_rqst_alloc(struct svc_serv *serv, struct svc_pool *pool, int node) folio_batch_init(&rqstp->rq_fbatch); - __set_bit(RQ_BUSY, &rqstp->rq_flags); + svc_thread_set_busy(rqstp); rqstp->rq_server = serv; rqstp->rq_pool = pool; @@ -702,10 +703,13 @@ void svc_pool_wake_idle_thread(struct svc_pool *pool) struct svc_rqst *rqstp; rcu_read_lock(); - list_for_each_entry_rcu(rqstp, &pool->sp_all_threads, rq_all) { - if (test_and_set_bit(RQ_BUSY, &rqstp->rq_flags)) - continue; - + spin_lock_bh(&pool->sp_lock); + rqstp = list_first_entry_or_null(&pool->sp_idle_threads, + struct svc_rqst, rq_idle); + if (rqstp) + list_del_init(&rqstp->rq_idle); + spin_unlock_bh(&pool->sp_lock); + if (rqstp) { WRITE_ONCE(rqstp->rq_qtime, ktime_get()); wake_up_process(rqstp->rq_task); rcu_read_unlock(); diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c index b8539545fefd..ebfeeb504a79 100644 --- a/net/sunrpc/svc_xprt.c +++ b/net/sunrpc/svc_xprt.c @@ -737,8 +737,9 @@ static void svc_rqst_wait_for_work(struct svc_rqst *rqstp) set_current_state(TASK_IDLE); smp_mb__before_atomic(); clear_bit(SP_CONGESTED, &pool->sp_flags); - clear_bit(RQ_BUSY, &rqstp->rq_flags); - smp_mb__after_atomic(); + spin_lock_bh(&pool->sp_lock); + list_add(&rqstp->rq_idle, &pool->sp_idle_threads); + spin_unlock_bh(&pool->sp_lock); /* Need to check should_sleep() again after * setting task state in case a wakeup happened @@ -751,8 +752,14 @@ static void svc_rqst_wait_for_work(struct svc_rqst *rqstp) cond_resched(); } - set_bit(RQ_BUSY, &rqstp->rq_flags); - smp_mb__after_atomic(); + /* We *must* be removed from the list before we can continue. + * If we were woken, this is already done + */ + if (!svc_thread_busy(rqstp)) { + spin_lock_bh(&pool->sp_lock); + list_del_init(&rqstp->rq_idle); + spin_unlock_bh(&pool->sp_lock); + } } else { cond_resched(); } -- cgit v1.2.3 From 2b65a226840c0e86db0e7926856a0a017b3390f2 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Mon, 11 Sep 2023 10:39:17 -0400 Subject: SUNRPC: discard SP_CONGESTED We can tell if a pool is congested by checking if the idle list is empty. We don't need a separate flag. Signed-off-by: NeilBrown Signed-off-by: Chuck Lever --- include/linux/sunrpc/svc.h | 1 - net/sunrpc/svc.c | 1 - net/sunrpc/svc_xprt.c | 4 +--- 3 files changed, 1 insertion(+), 5 deletions(-) (limited to 'net') diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h index e9c34e99bc88..22b3018ebf62 100644 --- a/include/linux/sunrpc/svc.h +++ b/include/linux/sunrpc/svc.h @@ -50,7 +50,6 @@ struct svc_pool { /* bits for sp_flags */ enum { SP_TASK_PENDING, /* still work to do even if no xprt is queued */ - SP_CONGESTED, /* all threads are busy, none idle */ SP_NEED_VICTIM, /* One thread needs to agree to exit */ SP_VICTIM_REMAINS, /* One thread needs to actually exit */ }; diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c index 9d080fe2dcdf..db4674211f36 100644 --- a/net/sunrpc/svc.c +++ b/net/sunrpc/svc.c @@ -719,7 +719,6 @@ void svc_pool_wake_idle_thread(struct svc_pool *pool) } rcu_read_unlock(); - set_bit(SP_CONGESTED, &pool->sp_flags); } EXPORT_SYMBOL_GPL(svc_pool_wake_idle_thread); diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c index ebfeeb504a79..fa0d854a5596 100644 --- a/net/sunrpc/svc_xprt.c +++ b/net/sunrpc/svc_xprt.c @@ -735,8 +735,6 @@ static void svc_rqst_wait_for_work(struct svc_rqst *rqstp) if (rqst_should_sleep(rqstp)) { set_current_state(TASK_IDLE); - smp_mb__before_atomic(); - clear_bit(SP_CONGESTED, &pool->sp_flags); spin_lock_bh(&pool->sp_lock); list_add(&rqstp->rq_idle, &pool->sp_idle_threads); spin_unlock_bh(&pool->sp_lock); @@ -874,7 +872,7 @@ void svc_recv(struct svc_rqst *rqstp) /* Normally we will wait up to 5 seconds for any required * cache information to be provided. */ - if (!test_bit(SP_CONGESTED, &pool->sp_flags)) + if (!list_empty(&pool->sp_idle_threads)) rqstp->rq_chandle.thread_wait = 5 * HZ; else rqstp->rq_chandle.thread_wait = 1 * HZ; -- cgit v1.2.3 From 9bd4161c591710f152a8cd3ed85ea928c61e26ca Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Mon, 11 Sep 2023 10:39:30 -0400 Subject: SUNRPC: change service idle list to be an llist With an llist we don't need to take a lock to add a thread to the list, though we still need a lock to remove it. That will go in the next patch. Unlike double-linked lists, a thread cannot reliably remove itself from the list. Only the first thread can be removed, and that can change asynchronously. So some care is needed. We already check if there is pending work to do, so we are unlikely to add ourselves to the idle list and then want to remove ourselves again. If we DO find something needs to be done after adding ourselves to the list, we simply wake up the first thread on the list. If that was us, we successfully removed ourselves and can continue. If it was some other thread, they will do the work that needs to be done. We can safely sleep until woken. We also remove the test on freezing() from rqst_should_sleep(). Instead we set TASK_FREEZABLE before scheduling. This makes is safe to schedule() when a freeze is pending. As we now loop waiting to be removed from the idle queue, this is a cleaner way to handle freezing. Signed-off-by: NeilBrown Signed-off-by: Chuck Lever --- include/linux/sunrpc/svc.h | 21 +++++---------------- net/sunrpc/svc.c | 14 +++++++------- net/sunrpc/svc_xprt.c | 45 +++++++++++++++++++-------------------------- 3 files changed, 31 insertions(+), 49 deletions(-) (limited to 'net') diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h index 22b3018ebf62..ad4572630335 100644 --- a/include/linux/sunrpc/svc.h +++ b/include/linux/sunrpc/svc.h @@ -37,7 +37,7 @@ struct svc_pool { struct list_head sp_sockets; /* pending sockets */ unsigned int sp_nrthreads; /* # of threads in pool */ struct list_head sp_all_threads; /* all server threads */ - struct list_head sp_idle_threads; /* idle server threads */ + struct llist_head sp_idle_threads; /* idle server threads */ /* statistics on pool operation */ struct percpu_counter sp_messages_arrived; @@ -186,7 +186,7 @@ extern u32 svc_max_payload(const struct svc_rqst *rqstp); */ struct svc_rqst { struct list_head rq_all; /* all threads list */ - struct list_head rq_idle; /* On the idle list */ + struct llist_node rq_idle; /* On the idle list */ struct rcu_head rq_rcu_head; /* for RCU deferred kfree */ struct svc_xprt * rq_xprt; /* transport ptr */ @@ -266,26 +266,15 @@ enum { RQ_DATA, /* request has data */ }; -/** - * svc_thread_set_busy - mark a thread as busy - * @rqstp: the thread which is now busy - * - * If rq_idle is "empty", the thread must be busy. - */ -static inline void svc_thread_set_busy(struct svc_rqst *rqstp) -{ - INIT_LIST_HEAD(&rqstp->rq_idle); -} - /** * svc_thread_busy - check if a thread as busy * @rqstp: the thread which might be busy * - * If rq_idle is "empty", the thread must be busy. + * A thread is only busy when it is not an the idle list. */ -static inline bool svc_thread_busy(struct svc_rqst *rqstp) +static inline bool svc_thread_busy(const struct svc_rqst *rqstp) { - return list_empty(&rqstp->rq_idle); + return !llist_on_list(&rqstp->rq_idle); } #define SVC_NET(rqst) (rqst->rq_xprt ? rqst->rq_xprt->xpt_net : rqst->rq_bc_net) diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c index db4674211f36..54ae6a569f6a 100644 --- a/net/sunrpc/svc.c +++ b/net/sunrpc/svc.c @@ -510,7 +510,7 @@ __svc_create(struct svc_program *prog, unsigned int bufsize, int npools, pool->sp_id = i; INIT_LIST_HEAD(&pool->sp_sockets); INIT_LIST_HEAD(&pool->sp_all_threads); - INIT_LIST_HEAD(&pool->sp_idle_threads); + init_llist_head(&pool->sp_idle_threads); spin_lock_init(&pool->sp_lock); percpu_counter_init(&pool->sp_messages_arrived, 0, GFP_KERNEL); @@ -642,7 +642,7 @@ svc_rqst_alloc(struct svc_serv *serv, struct svc_pool *pool, int node) folio_batch_init(&rqstp->rq_fbatch); - svc_thread_set_busy(rqstp); + init_llist_node(&rqstp->rq_idle); rqstp->rq_server = serv; rqstp->rq_pool = pool; @@ -701,15 +701,15 @@ svc_prepare_thread(struct svc_serv *serv, struct svc_pool *pool, int node) void svc_pool_wake_idle_thread(struct svc_pool *pool) { struct svc_rqst *rqstp; + struct llist_node *ln; rcu_read_lock(); spin_lock_bh(&pool->sp_lock); - rqstp = list_first_entry_or_null(&pool->sp_idle_threads, - struct svc_rqst, rq_idle); - if (rqstp) - list_del_init(&rqstp->rq_idle); + ln = llist_del_first_init(&pool->sp_idle_threads); spin_unlock_bh(&pool->sp_lock); - if (rqstp) { + if (ln) { + rqstp = llist_entry(ln, struct svc_rqst, rq_idle); + WRITE_ONCE(rqstp->rq_qtime, ktime_get()); wake_up_process(rqstp->rq_task); rcu_read_unlock(); diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c index fa0d854a5596..17c43bde35c9 100644 --- a/net/sunrpc/svc_xprt.c +++ b/net/sunrpc/svc_xprt.c @@ -715,10 +715,6 @@ rqst_should_sleep(struct svc_rqst *rqstp) if (svc_thread_should_stop(rqstp)) return false; - /* are we freezing? */ - if (freezing(current)) - return false; - #if defined(CONFIG_SUNRPC_BACKCHANNEL) if (svc_is_backchannel(rqstp)) { if (!list_empty(&rqstp->rq_server->sv_cb_list)) @@ -734,30 +730,26 @@ static void svc_rqst_wait_for_work(struct svc_rqst *rqstp) struct svc_pool *pool = rqstp->rq_pool; if (rqst_should_sleep(rqstp)) { - set_current_state(TASK_IDLE); - spin_lock_bh(&pool->sp_lock); - list_add(&rqstp->rq_idle, &pool->sp_idle_threads); - spin_unlock_bh(&pool->sp_lock); + set_current_state(TASK_IDLE | TASK_FREEZABLE); + llist_add(&rqstp->rq_idle, &pool->sp_idle_threads); + + if (unlikely(!rqst_should_sleep(rqstp))) + /* Work just became available. This thread cannot simply + * choose not to sleep as it *must* wait until removed. + * So wake the first waiter - whether it is this + * thread or some other, it will get the work done. + */ + svc_pool_wake_idle_thread(pool); - /* Need to check should_sleep() again after - * setting task state in case a wakeup happened - * between testing and setting. + /* Since a thread cannot remove itself from an llist, + * schedule until someone else removes @rqstp from + * the idle list. */ - if (rqst_should_sleep(rqstp)) { + while (!svc_thread_busy(rqstp)) { schedule(); - } else { - __set_current_state(TASK_RUNNING); - cond_resched(); - } - - /* We *must* be removed from the list before we can continue. - * If we were woken, this is already done - */ - if (!svc_thread_busy(rqstp)) { - spin_lock_bh(&pool->sp_lock); - list_del_init(&rqstp->rq_idle); - spin_unlock_bh(&pool->sp_lock); + set_current_state(TASK_IDLE | TASK_FREEZABLE); } + __set_current_state(TASK_RUNNING); } else { cond_resched(); } @@ -870,9 +862,10 @@ void svc_recv(struct svc_rqst *rqstp) struct svc_xprt *xprt = rqstp->rq_xprt; /* Normally we will wait up to 5 seconds for any required - * cache information to be provided. + * cache information to be provided. When there are no + * idle threads, we reduce the wait time. */ - if (!list_empty(&pool->sp_idle_threads)) + if (pool->sp_idle_threads.first) rqstp->rq_chandle.thread_wait = 5 * HZ; else rqstp->rq_chandle.thread_wait = 1 * HZ; -- cgit v1.2.3 From d7926ee8b78e554de7b7c6ce7b94e7ff7485ecd5 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Mon, 11 Sep 2023 10:39:50 -0400 Subject: SUNRPC: rename some functions from rqst_ to svc_thread_ Functions which directly manipulate a 'struct rqst', such as svc_rqst_alloc() or svc_rqst_release_pages(), can reasonably have "rqst" in there name. However functions that act on the running thread, such as XX_should_sleep() or XX_wait_for_work() should seem more natural with a "svc_thread_" prefix. So make those changes. Signed-off-by: NeilBrown Signed-off-by: Chuck Lever --- net/sunrpc/svc_xprt.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'net') diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c index 17c43bde35c9..1b300a7889eb 100644 --- a/net/sunrpc/svc_xprt.c +++ b/net/sunrpc/svc_xprt.c @@ -699,7 +699,7 @@ static bool svc_alloc_arg(struct svc_rqst *rqstp) } static bool -rqst_should_sleep(struct svc_rqst *rqstp) +svc_thread_should_sleep(struct svc_rqst *rqstp) { struct svc_pool *pool = rqstp->rq_pool; @@ -725,15 +725,15 @@ rqst_should_sleep(struct svc_rqst *rqstp) return true; } -static void svc_rqst_wait_for_work(struct svc_rqst *rqstp) +static void svc_thread_wait_for_work(struct svc_rqst *rqstp) { struct svc_pool *pool = rqstp->rq_pool; - if (rqst_should_sleep(rqstp)) { + if (svc_thread_should_sleep(rqstp)) { set_current_state(TASK_IDLE | TASK_FREEZABLE); llist_add(&rqstp->rq_idle, &pool->sp_idle_threads); - if (unlikely(!rqst_should_sleep(rqstp))) + if (unlikely(!svc_thread_should_sleep(rqstp))) /* Work just became available. This thread cannot simply * choose not to sleep as it *must* wait until removed. * So wake the first waiter - whether it is this @@ -850,7 +850,7 @@ void svc_recv(struct svc_rqst *rqstp) if (!svc_alloc_arg(rqstp)) return; - svc_rqst_wait_for_work(rqstp); + svc_thread_wait_for_work(rqstp); clear_bit(SP_TASK_PENDING, &pool->sp_flags); -- cgit v1.2.3 From 5b80147e0c70181654e8e54eae99f69b2bf891b1 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Mon, 11 Sep 2023 10:39:56 -0400 Subject: SUNRPC: only have one thread waking up at a time Currently if several items of work become available in quick succession, that number of threads (if available) will be woken. By the time some of them wake up another thread that was already cache-warm might have come along and completed the work. Anecdotal evidence suggests as many as 15% of wakes find nothing to do once they get to the point of looking. This patch changes svc_pool_wake_idle_thread() to wake the first thread on the queue but NOT remove it. Subsequent calls will wake the same thread. Once that thread starts it will dequeue itself and after dequeueing some work to do, it will wake the next thread if there is more work ready. This results in a more orderly increase in the number of busy threads. As a bonus, this allows us to reduce locking around the idle queue. svc_pool_wake_idle_thread() no longer needs to take a lock (beyond rcu_read_lock()) as it doesn't manipulate the queue, it just looks at the first item. The thread itself can avoid locking by using the new llist_del_first_this() interface. This will safely remove the thread itself if it is the head. If it isn't the head, it will do nothing. If multiple threads call this concurrently only one will succeed. The others will do nothing, so no corruption can result. If a thread wakes up and finds that it cannot dequeue itself that means either - that it wasn't woken because it was the head of the queue. Maybe the freezer woke it. In that case it can go back to sleep (after trying to freeze of course). - some other thread found there was nothing to do very recently, and placed itself on the head of the queue in front of this thread. It must check again after placing itself there, so it can be deemed to be responsible for any pending work, and this thread can go back to sleep until woken. No code ever tests for busy threads any more. Only each thread itself cares if it is busy. So svc_thread_busy() is no longer needed. Signed-off-by: NeilBrown Signed-off-by: Chuck Lever --- include/linux/sunrpc/svc.h | 11 ----------- net/sunrpc/svc.c | 14 ++++++-------- net/sunrpc/svc_xprt.c | 38 +++++++++++++++++++++++++------------- 3 files changed, 31 insertions(+), 32 deletions(-) (limited to 'net') diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h index ad4572630335..dafa362b4fdd 100644 --- a/include/linux/sunrpc/svc.h +++ b/include/linux/sunrpc/svc.h @@ -266,17 +266,6 @@ enum { RQ_DATA, /* request has data */ }; -/** - * svc_thread_busy - check if a thread as busy - * @rqstp: the thread which might be busy - * - * A thread is only busy when it is not an the idle list. - */ -static inline bool svc_thread_busy(const struct svc_rqst *rqstp) -{ - return !llist_on_list(&rqstp->rq_idle); -} - #define SVC_NET(rqst) (rqst->rq_xprt ? rqst->rq_xprt->xpt_net : rqst->rq_bc_net) /* diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c index 54ae6a569f6a..326592162af1 100644 --- a/net/sunrpc/svc.c +++ b/net/sunrpc/svc.c @@ -642,7 +642,6 @@ svc_rqst_alloc(struct svc_serv *serv, struct svc_pool *pool, int node) folio_batch_init(&rqstp->rq_fbatch); - init_llist_node(&rqstp->rq_idle); rqstp->rq_server = serv; rqstp->rq_pool = pool; @@ -704,17 +703,16 @@ void svc_pool_wake_idle_thread(struct svc_pool *pool) struct llist_node *ln; rcu_read_lock(); - spin_lock_bh(&pool->sp_lock); - ln = llist_del_first_init(&pool->sp_idle_threads); - spin_unlock_bh(&pool->sp_lock); + ln = READ_ONCE(pool->sp_idle_threads.first); if (ln) { rqstp = llist_entry(ln, struct svc_rqst, rq_idle); - WRITE_ONCE(rqstp->rq_qtime, ktime_get()); - wake_up_process(rqstp->rq_task); + if (!task_is_running(rqstp->rq_task)) { + wake_up_process(rqstp->rq_task); + trace_svc_wake_up(rqstp->rq_task->pid); + percpu_counter_inc(&pool->sp_threads_woken); + } rcu_read_unlock(); - percpu_counter_inc(&pool->sp_threads_woken); - trace_svc_wake_up(rqstp->rq_task->pid); return; } rcu_read_unlock(); diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c index 1b300a7889eb..75f66714e3a7 100644 --- a/net/sunrpc/svc_xprt.c +++ b/net/sunrpc/svc_xprt.c @@ -732,20 +732,19 @@ static void svc_thread_wait_for_work(struct svc_rqst *rqstp) if (svc_thread_should_sleep(rqstp)) { set_current_state(TASK_IDLE | TASK_FREEZABLE); llist_add(&rqstp->rq_idle, &pool->sp_idle_threads); + if (likely(svc_thread_should_sleep(rqstp))) + schedule(); - if (unlikely(!svc_thread_should_sleep(rqstp))) - /* Work just became available. This thread cannot simply - * choose not to sleep as it *must* wait until removed. - * So wake the first waiter - whether it is this - * thread or some other, it will get the work done. + while (!llist_del_first_this(&pool->sp_idle_threads, + &rqstp->rq_idle)) { + /* Work just became available. This thread can only + * handle it after removing rqstp from the idle + * list. If that attempt failed, some other thread + * must have queued itself after finding no + * work to do, so that thread has taken responsibly + * for this new work. This thread can safely sleep + * until woken again. */ - svc_pool_wake_idle_thread(pool); - - /* Since a thread cannot remove itself from an llist, - * schedule until someone else removes @rqstp from - * the idle list. - */ - while (!svc_thread_busy(rqstp)) { schedule(); set_current_state(TASK_IDLE | TASK_FREEZABLE); } @@ -835,6 +834,15 @@ out: svc_xprt_release(rqstp); } +static void svc_thread_wake_next(struct svc_rqst *rqstp) +{ + if (!svc_thread_should_sleep(rqstp)) + /* More work pending after I dequeued some, + * wake another worker + */ + svc_pool_wake_idle_thread(rqstp->rq_pool); +} + /** * svc_recv - Receive and process the next request on any transport * @rqstp: an idle RPC service thread @@ -854,13 +862,16 @@ void svc_recv(struct svc_rqst *rqstp) clear_bit(SP_TASK_PENDING, &pool->sp_flags); - if (svc_thread_should_stop(rqstp)) + if (svc_thread_should_stop(rqstp)) { + svc_thread_wake_next(rqstp); return; + } rqstp->rq_xprt = svc_xprt_dequeue(pool); if (rqstp->rq_xprt) { struct svc_xprt *xprt = rqstp->rq_xprt; + svc_thread_wake_next(rqstp); /* Normally we will wait up to 5 seconds for any required * cache information to be provided. When there are no * idle threads, we reduce the wait time. @@ -885,6 +896,7 @@ void svc_recv(struct svc_rqst *rqstp) if (req) { list_del(&req->rq_bc_list); spin_unlock_bh(&serv->sv_cb_lock); + svc_thread_wake_next(rqstp); svc_process_bc(req, rqstp); return; -- cgit v1.2.3 From 9a0e6accc0a8c3adf72f1b43be8019961b68663a Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Mon, 11 Sep 2023 10:40:02 -0400 Subject: SUNRPC: use lwq for sp_sockets - renamed to sp_xprts lwq avoids using back pointers in lists, and uses less locking. This introduces a new spinlock, but the other one will be removed in a future patch. For svc_clean_up_xprts(), we now dequeue the entire queue, walk it to remove and process the xprts that need cleaning up, then re-enqueue the remaining queue. Signed-off-by: NeilBrown Signed-off-by: Chuck Lever --- include/linux/sunrpc/svc.h | 3 ++- include/linux/sunrpc/svc_xprt.h | 2 +- net/sunrpc/svc.c | 2 +- net/sunrpc/svc_xprt.c | 57 ++++++++++++----------------------------- 4 files changed, 21 insertions(+), 43 deletions(-) (limited to 'net') diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h index dafa362b4fdd..7ff9fe785e49 100644 --- a/include/linux/sunrpc/svc.h +++ b/include/linux/sunrpc/svc.h @@ -17,6 +17,7 @@ #include #include #include +#include #include #include #include @@ -34,7 +35,7 @@ struct svc_pool { unsigned int sp_id; /* pool id; also node id on NUMA */ spinlock_t sp_lock; /* protects all fields */ - struct list_head sp_sockets; /* pending sockets */ + struct lwq sp_xprts; /* pending transports */ unsigned int sp_nrthreads; /* # of threads in pool */ struct list_head sp_all_threads; /* all server threads */ struct llist_head sp_idle_threads; /* idle server threads */ diff --git a/include/linux/sunrpc/svc_xprt.h b/include/linux/sunrpc/svc_xprt.h index fa55d12dc765..8e20cd60e2e7 100644 --- a/include/linux/sunrpc/svc_xprt.h +++ b/include/linux/sunrpc/svc_xprt.h @@ -54,7 +54,7 @@ struct svc_xprt { const struct svc_xprt_ops *xpt_ops; struct kref xpt_ref; struct list_head xpt_list; - struct list_head xpt_ready; + struct lwq_node xpt_ready; unsigned long xpt_flags; struct svc_serv *xpt_server; /* service for transport */ diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c index 326592162af1..244b5b9eba4d 100644 --- a/net/sunrpc/svc.c +++ b/net/sunrpc/svc.c @@ -508,7 +508,7 @@ __svc_create(struct svc_program *prog, unsigned int bufsize, int npools, i, serv->sv_name); pool->sp_id = i; - INIT_LIST_HEAD(&pool->sp_sockets); + lwq_init(&pool->sp_xprts); INIT_LIST_HEAD(&pool->sp_all_threads); init_llist_head(&pool->sp_idle_threads); spin_lock_init(&pool->sp_lock); diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c index 75f66714e3a7..28ca7db55da1 100644 --- a/net/sunrpc/svc_xprt.c +++ b/net/sunrpc/svc_xprt.c @@ -201,7 +201,6 @@ void svc_xprt_init(struct net *net, struct svc_xprt_class *xcl, kref_init(&xprt->xpt_ref); xprt->xpt_server = serv; INIT_LIST_HEAD(&xprt->xpt_list); - INIT_LIST_HEAD(&xprt->xpt_ready); INIT_LIST_HEAD(&xprt->xpt_deferred); INIT_LIST_HEAD(&xprt->xpt_users); mutex_init(&xprt->xpt_mutex); @@ -472,9 +471,7 @@ void svc_xprt_enqueue(struct svc_xprt *xprt) pool = svc_pool_for_cpu(xprt->xpt_server); percpu_counter_inc(&pool->sp_sockets_queued); - spin_lock_bh(&pool->sp_lock); - list_add_tail(&xprt->xpt_ready, &pool->sp_sockets); - spin_unlock_bh(&pool->sp_lock); + lwq_enqueue(&xprt->xpt_ready, &pool->sp_xprts); svc_pool_wake_idle_thread(pool); } @@ -487,18 +484,9 @@ static struct svc_xprt *svc_xprt_dequeue(struct svc_pool *pool) { struct svc_xprt *xprt = NULL; - if (list_empty(&pool->sp_sockets)) - goto out; - - spin_lock_bh(&pool->sp_lock); - if (likely(!list_empty(&pool->sp_sockets))) { - xprt = list_first_entry(&pool->sp_sockets, - struct svc_xprt, xpt_ready); - list_del_init(&xprt->xpt_ready); + xprt = lwq_dequeue(&pool->sp_xprts, struct svc_xprt, xpt_ready); + if (xprt) svc_xprt_get(xprt); - } - spin_unlock_bh(&pool->sp_lock); -out: return xprt; } @@ -708,7 +696,7 @@ svc_thread_should_sleep(struct svc_rqst *rqstp) return false; /* was a socket queued? */ - if (!list_empty(&pool->sp_sockets)) + if (!lwq_empty(&pool->sp_xprts)) return false; /* are we shutting down? */ @@ -1050,7 +1038,6 @@ static void svc_delete_xprt(struct svc_xprt *xprt) spin_lock_bh(&serv->sv_lock); list_del_init(&xprt->xpt_list); - WARN_ON_ONCE(!list_empty(&xprt->xpt_ready)); if (test_bit(XPT_TEMP, &xprt->xpt_flags)) serv->sv_tmpcnt--; spin_unlock_bh(&serv->sv_lock); @@ -1101,36 +1088,26 @@ static int svc_close_list(struct svc_serv *serv, struct list_head *xprt_list, st return ret; } -static struct svc_xprt *svc_dequeue_net(struct svc_serv *serv, struct net *net) +static void svc_clean_up_xprts(struct svc_serv *serv, struct net *net) { - struct svc_pool *pool; struct svc_xprt *xprt; - struct svc_xprt *tmp; int i; for (i = 0; i < serv->sv_nrpools; i++) { - pool = &serv->sv_pools[i]; - - spin_lock_bh(&pool->sp_lock); - list_for_each_entry_safe(xprt, tmp, &pool->sp_sockets, xpt_ready) { - if (xprt->xpt_net != net) - continue; - list_del_init(&xprt->xpt_ready); - spin_unlock_bh(&pool->sp_lock); - return xprt; + struct svc_pool *pool = &serv->sv_pools[i]; + struct llist_node *q, **t1, *t2; + + q = lwq_dequeue_all(&pool->sp_xprts); + lwq_for_each_safe(xprt, t1, t2, &q, xpt_ready) { + if (xprt->xpt_net == net) { + set_bit(XPT_CLOSE, &xprt->xpt_flags); + svc_delete_xprt(xprt); + xprt = NULL; + } } - spin_unlock_bh(&pool->sp_lock); - } - return NULL; -} -static void svc_clean_up_xprts(struct svc_serv *serv, struct net *net) -{ - struct svc_xprt *xprt; - - while ((xprt = svc_dequeue_net(serv, net))) { - set_bit(XPT_CLOSE, &xprt->xpt_flags); - svc_delete_xprt(xprt); + if (q) + lwq_enqueue_batch(q, &pool->sp_xprts); } } -- cgit v1.2.3 From 2e8fc923fe476db8cab9b6458027eccb22f3b6e6 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Mon, 11 Sep 2023 10:40:09 -0400 Subject: SUNRPC: change sp_nrthreads to atomic_t Using an atomic_t avoids the need to take a spinlock (which can soon be removed). Choosing a thread to kill needs to be careful as we cannot set the "die now" bit atomically with the test on the count. Instead we temporarily increase the count. Signed-off-by: NeilBrown Signed-off-by: Chuck Lever --- fs/nfsd/nfssvc.c | 11 +++++------ include/linux/sunrpc/svc.h | 2 +- net/sunrpc/svc.c | 37 ++++++++++++++++++++----------------- 3 files changed, 26 insertions(+), 24 deletions(-) (limited to 'net') diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c index 0b03a2e50dee..433154b9eee0 100644 --- a/fs/nfsd/nfssvc.c +++ b/fs/nfsd/nfssvc.c @@ -713,14 +713,13 @@ int nfsd_nrpools(struct net *net) int nfsd_get_nrthreads(int n, int *nthreads, struct net *net) { - int i = 0; struct nfsd_net *nn = net_generic(net, nfsd_net_id); + struct svc_serv *serv = nn->nfsd_serv; + int i; - if (nn->nfsd_serv != NULL) { - for (i = 0; i < nn->nfsd_serv->sv_nrpools && i < n; i++) - nthreads[i] = nn->nfsd_serv->sv_pools[i].sp_nrthreads; - } - + if (serv) + for (i = 0; i < serv->sv_nrpools && i < n; i++) + nthreads[i] = atomic_read(&serv->sv_pools[i].sp_nrthreads); return 0; } diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h index 7ff9fe785e49..9d0fcd6148ae 100644 --- a/include/linux/sunrpc/svc.h +++ b/include/linux/sunrpc/svc.h @@ -36,7 +36,7 @@ struct svc_pool { unsigned int sp_id; /* pool id; also node id on NUMA */ spinlock_t sp_lock; /* protects all fields */ struct lwq sp_xprts; /* pending transports */ - unsigned int sp_nrthreads; /* # of threads in pool */ + atomic_t sp_nrthreads; /* # of threads in pool */ struct list_head sp_all_threads; /* all server threads */ struct llist_head sp_idle_threads; /* idle server threads */ diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c index 244b5b9eba4d..0928d3f918b0 100644 --- a/net/sunrpc/svc.c +++ b/net/sunrpc/svc.c @@ -681,8 +681,8 @@ svc_prepare_thread(struct svc_serv *serv, struct svc_pool *pool, int node) serv->sv_nrthreads += 1; spin_unlock_bh(&serv->sv_lock); + atomic_inc(&pool->sp_nrthreads); spin_lock_bh(&pool->sp_lock); - pool->sp_nrthreads++; list_add_rcu(&rqstp->rq_all, &pool->sp_all_threads); spin_unlock_bh(&pool->sp_lock); return rqstp; @@ -727,23 +727,24 @@ svc_pool_next(struct svc_serv *serv, struct svc_pool *pool, unsigned int *state) } static struct svc_pool * -svc_pool_victim(struct svc_serv *serv, struct svc_pool *pool, unsigned int *state) +svc_pool_victim(struct svc_serv *serv, struct svc_pool *target_pool, + unsigned int *state) { + struct svc_pool *pool; unsigned int i; +retry: + pool = target_pool; + if (pool != NULL) { - spin_lock_bh(&pool->sp_lock); - if (pool->sp_nrthreads) + if (atomic_inc_not_zero(&pool->sp_nrthreads)) goto found_pool; - spin_unlock_bh(&pool->sp_lock); return NULL; } else { for (i = 0; i < serv->sv_nrpools; i++) { pool = &serv->sv_pools[--(*state) % serv->sv_nrpools]; - spin_lock_bh(&pool->sp_lock); - if (pool->sp_nrthreads) + if (atomic_inc_not_zero(&pool->sp_nrthreads)) goto found_pool; - spin_unlock_bh(&pool->sp_lock); } return NULL; } @@ -751,8 +752,12 @@ svc_pool_victim(struct svc_serv *serv, struct svc_pool *pool, unsigned int *stat found_pool: set_bit(SP_VICTIM_REMAINS, &pool->sp_flags); set_bit(SP_NEED_VICTIM, &pool->sp_flags); - spin_unlock_bh(&pool->sp_lock); - return pool; + if (!atomic_dec_and_test(&pool->sp_nrthreads)) + return pool; + /* Nothing left in this pool any more */ + clear_bit(SP_NEED_VICTIM, &pool->sp_flags); + clear_bit(SP_VICTIM_REMAINS, &pool->sp_flags); + goto retry; } static int @@ -828,13 +833,10 @@ svc_stop_kthreads(struct svc_serv *serv, struct svc_pool *pool, int nrservs) int svc_set_num_threads(struct svc_serv *serv, struct svc_pool *pool, int nrservs) { - if (pool == NULL) { + if (!pool) nrservs -= serv->sv_nrthreads; - } else { - spin_lock_bh(&pool->sp_lock); - nrservs -= pool->sp_nrthreads; - spin_unlock_bh(&pool->sp_lock); - } + else + nrservs -= atomic_read(&pool->sp_nrthreads); if (nrservs > 0) return svc_start_kthreads(serv, pool, nrservs); @@ -921,10 +923,11 @@ svc_exit_thread(struct svc_rqst *rqstp) struct svc_pool *pool = rqstp->rq_pool; spin_lock_bh(&pool->sp_lock); - pool->sp_nrthreads--; list_del_rcu(&rqstp->rq_all); spin_unlock_bh(&pool->sp_lock); + atomic_dec(&pool->sp_nrthreads); + spin_lock_bh(&serv->sv_lock); serv->sv_nrthreads -= 1; spin_unlock_bh(&serv->sv_lock); -- cgit v1.2.3 From 580a25756a9f639180b29a508f3bdd24c50a936a Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Mon, 11 Sep 2023 10:40:15 -0400 Subject: SUNRPC: discard sp_lock sp_lock is now only used to protect sp_all_threads. This isn't needed as sp_all_threads is only manipulated through svc_set_num_threads(), which is already serialized. Read-acccess only requires rcu_read_lock(). So no more locking is needed. Signed-off-by: NeilBrown Signed-off-by: Chuck Lever --- include/linux/sunrpc/svc.h | 1 - net/sunrpc/svc.c | 10 +++++----- 2 files changed, 5 insertions(+), 6 deletions(-) (limited to 'net') diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h index 9d0fcd6148ae..8ce1392c1a35 100644 --- a/include/linux/sunrpc/svc.h +++ b/include/linux/sunrpc/svc.h @@ -34,7 +34,6 @@ */ struct svc_pool { unsigned int sp_id; /* pool id; also node id on NUMA */ - spinlock_t sp_lock; /* protects all fields */ struct lwq sp_xprts; /* pending transports */ atomic_t sp_nrthreads; /* # of threads in pool */ struct list_head sp_all_threads; /* all server threads */ diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c index 0928d3f918b0..efe7a58ccbdc 100644 --- a/net/sunrpc/svc.c +++ b/net/sunrpc/svc.c @@ -511,7 +511,6 @@ __svc_create(struct svc_program *prog, unsigned int bufsize, int npools, lwq_init(&pool->sp_xprts); INIT_LIST_HEAD(&pool->sp_all_threads); init_llist_head(&pool->sp_idle_threads); - spin_lock_init(&pool->sp_lock); percpu_counter_init(&pool->sp_messages_arrived, 0, GFP_KERNEL); percpu_counter_init(&pool->sp_sockets_queued, 0, GFP_KERNEL); @@ -682,9 +681,12 @@ svc_prepare_thread(struct svc_serv *serv, struct svc_pool *pool, int node) spin_unlock_bh(&serv->sv_lock); atomic_inc(&pool->sp_nrthreads); - spin_lock_bh(&pool->sp_lock); + + /* Protected by whatever lock the service uses when calling + * svc_set_num_threads() + */ list_add_rcu(&rqstp->rq_all, &pool->sp_all_threads); - spin_unlock_bh(&pool->sp_lock); + return rqstp; } @@ -922,9 +924,7 @@ svc_exit_thread(struct svc_rqst *rqstp) struct svc_serv *serv = rqstp->rq_server; struct svc_pool *pool = rqstp->rq_pool; - spin_lock_bh(&pool->sp_lock); list_del_rcu(&rqstp->rq_all); - spin_unlock_bh(&pool->sp_lock); atomic_dec(&pool->sp_nrthreads); -- cgit v1.2.3 From 15d39883ee7dfc023d8a24f5d4b58100e1d04ad9 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Mon, 11 Sep 2023 10:40:22 -0400 Subject: SUNRPC: change the back-channel queue to lwq This removes the need to store and update back-links in the list. It also remove the need for the _bh version of spin_lock(). Signed-off-by: NeilBrown Cc: Trond Myklebust Cc: Anna Schumaker Signed-off-by: Chuck Lever --- include/linux/sunrpc/svc.h | 3 +-- include/linux/sunrpc/xprt.h | 3 ++- net/sunrpc/backchannel_rqst.c | 5 +---- net/sunrpc/svc.c | 3 +-- net/sunrpc/svc_xprt.c | 12 +++--------- net/sunrpc/xprtrdma/backchannel.c | 4 +--- 6 files changed, 9 insertions(+), 21 deletions(-) (limited to 'net') diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h index 8ce1392c1a35..c1feaf0d1542 100644 --- a/include/linux/sunrpc/svc.h +++ b/include/linux/sunrpc/svc.h @@ -90,10 +90,9 @@ struct svc_serv { int (*sv_threadfn)(void *data); #if defined(CONFIG_SUNRPC_BACKCHANNEL) - struct list_head sv_cb_list; /* queue for callback requests + struct lwq sv_cb_list; /* queue for callback requests * that arrive over the same * connection */ - spinlock_t sv_cb_lock; /* protects the svc_cb_list */ bool sv_bc_enabled; /* service uses backchannel */ #endif /* CONFIG_SUNRPC_BACKCHANNEL */ }; diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h index 4ecc89301eb7..f85d3a0daca2 100644 --- a/include/linux/sunrpc/xprt.h +++ b/include/linux/sunrpc/xprt.h @@ -57,6 +57,7 @@ struct xprt_class; struct seq_file; struct svc_serv; struct net; +#include /* * This describes a complete RPC request @@ -121,7 +122,7 @@ struct rpc_rqst { int rq_ntrans; #if defined(CONFIG_SUNRPC_BACKCHANNEL) - struct list_head rq_bc_list; /* Callback service list */ + struct lwq_node rq_bc_list; /* Callback service list */ unsigned long rq_bc_pa_state; /* Backchannel prealloc state */ struct list_head rq_bc_pa_list; /* Backchannel prealloc list */ #endif /* CONFIG_SUNRPC_BACKCHANEL */ diff --git a/net/sunrpc/backchannel_rqst.c b/net/sunrpc/backchannel_rqst.c index 44b7c89a635f..caa94cf57123 100644 --- a/net/sunrpc/backchannel_rqst.c +++ b/net/sunrpc/backchannel_rqst.c @@ -83,7 +83,6 @@ static struct rpc_rqst *xprt_alloc_bc_req(struct rpc_xprt *xprt) return NULL; req->rq_xprt = xprt; - INIT_LIST_HEAD(&req->rq_bc_list); /* Preallocate one XDR receive buffer */ if (xprt_alloc_xdr_buf(&req->rq_rcv_buf, gfp_flags) < 0) { @@ -367,8 +366,6 @@ void xprt_complete_bc_request(struct rpc_rqst *req, uint32_t copied) dprintk("RPC: add callback request to list\n"); xprt_get(xprt); - spin_lock(&bc_serv->sv_cb_lock); - list_add(&req->rq_bc_list, &bc_serv->sv_cb_list); - spin_unlock(&bc_serv->sv_cb_lock); + lwq_enqueue(&req->rq_bc_list, &bc_serv->sv_cb_list); svc_pool_wake_idle_thread(&bc_serv->sv_pools[0]); } diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c index efe7a58ccbdc..7d4c10e609dc 100644 --- a/net/sunrpc/svc.c +++ b/net/sunrpc/svc.c @@ -438,8 +438,7 @@ EXPORT_SYMBOL_GPL(svc_bind); static void __svc_init_bc(struct svc_serv *serv) { - INIT_LIST_HEAD(&serv->sv_cb_list); - spin_lock_init(&serv->sv_cb_lock); + lwq_init(&serv->sv_cb_list); } #else static void diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c index 28ca7db55da1..fee83d1024bc 100644 --- a/net/sunrpc/svc_xprt.c +++ b/net/sunrpc/svc_xprt.c @@ -705,7 +705,7 @@ svc_thread_should_sleep(struct svc_rqst *rqstp) #if defined(CONFIG_SUNRPC_BACKCHANNEL) if (svc_is_backchannel(rqstp)) { - if (!list_empty(&rqstp->rq_server->sv_cb_list)) + if (!lwq_empty(&rqstp->rq_server->sv_cb_list)) return false; } #endif @@ -878,18 +878,12 @@ void svc_recv(struct svc_rqst *rqstp) struct svc_serv *serv = rqstp->rq_server; struct rpc_rqst *req; - spin_lock_bh(&serv->sv_cb_lock); - req = list_first_entry_or_null(&serv->sv_cb_list, - struct rpc_rqst, rq_bc_list); + req = lwq_dequeue(&serv->sv_cb_list, + struct rpc_rqst, rq_bc_list); if (req) { - list_del(&req->rq_bc_list); - spin_unlock_bh(&serv->sv_cb_lock); svc_thread_wake_next(rqstp); - svc_process_bc(req, rqstp); - return; } - spin_unlock_bh(&serv->sv_cb_lock); } #endif } diff --git a/net/sunrpc/xprtrdma/backchannel.c b/net/sunrpc/xprtrdma/backchannel.c index bfc434ec52a7..8c817e755262 100644 --- a/net/sunrpc/xprtrdma/backchannel.c +++ b/net/sunrpc/xprtrdma/backchannel.c @@ -263,9 +263,7 @@ void rpcrdma_bc_receive_call(struct rpcrdma_xprt *r_xprt, /* Queue rqst for ULP's callback service */ bc_serv = xprt->bc_serv; xprt_get(xprt); - spin_lock(&bc_serv->sv_cb_lock); - list_add(&rqst->rq_bc_list, &bc_serv->sv_cb_list); - spin_unlock(&bc_serv->sv_cb_lock); + lwq_enqueue(&rqst->rq_bc_list, &bc_serv->sv_cb_list); svc_pool_wake_idle_thread(&bc_serv->sv_pools[0]); -- cgit v1.2.3 From 789ce196a31dd13276076762204bee87df893e53 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Tue, 19 Sep 2023 11:35:15 -0400 Subject: SUNRPC: Remove BUG_ON call sites There is no need to take down the whole system for these assertions. I'd rather not attempt a heroic save here, as some bug has occurred that has left the transport data structures in an unknown state. Just warn and then leak the left-over resources. Acked-by: Christian Brauner Reviewed-by: NeilBrown Reviewed-by: Jeff Layton Signed-off-by: Chuck Lever --- net/sunrpc/svc.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c index 7d4c10e609dc..3f2ea7a0496f 100644 --- a/net/sunrpc/svc.c +++ b/net/sunrpc/svc.c @@ -573,11 +573,12 @@ svc_destroy(struct kref *ref) timer_shutdown_sync(&serv->sv_temptimer); /* - * The last user is gone and thus all sockets have to be destroyed to - * the point. Check this. + * Remaining transports at this point are not expected. */ - BUG_ON(!list_empty(&serv->sv_permsocks)); - BUG_ON(!list_empty(&serv->sv_tempsocks)); + WARN_ONCE(!list_empty(&serv->sv_permsocks), + "SVC: permsocks remain for %s\n", serv->sv_program->pg_name); + WARN_ONCE(!list_empty(&serv->sv_tempsocks), + "SVC: tempsocks remain for %s\n", serv->sv_program->pg_name); cache_clean_deferred(serv); -- cgit v1.2.3 From 197115ebf358cb440c73e868b2a0a5ef728decc6 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Tue, 10 Oct 2023 13:23:41 -0400 Subject: svcrdma: Drop connection after an RDMA Read error When an RPC Call message cannot be pulled from the client, that is a message loss, by definition. Close the connection to trigger the client to resend. Cc: Reviewed-by: Tom Talpey Signed-off-by: Chuck Lever --- net/sunrpc/xprtrdma/svc_rdma_recvfrom.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c index 85c8bcaebb80..3b05f90a3e50 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c +++ b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c @@ -852,7 +852,8 @@ out_readfail: if (ret == -EINVAL) svc_rdma_send_error(rdma_xprt, ctxt, ret); svc_rdma_recv_ctxt_put(rdma_xprt, ctxt); - return ret; + svc_xprt_deferred_close(xprt); + return -ENOTCONN; out_backchannel: svc_rdma_handle_bc_reply(rqstp, ctxt); -- cgit v1.2.3