From b55e0ba19cd2d83317a7f1dbb626080951442ab3 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Mon, 28 Apr 2008 18:22:50 -0400 Subject: nfsd: remove unnecessary atomic ops These bit operations don't need to be atomic. They're all done under a single big mutex anyway. Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4state.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 8799b8708188..5c97d47676ae 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -1579,8 +1579,8 @@ nfs4_upgrade_open(struct svc_rqst *rqstp, struct svc_fh *cur_fh, struct nfs4_sta } /* remember the open */ filp->f_mode |= open->op_share_access; - set_bit(open->op_share_access, &stp->st_access_bmap); - set_bit(open->op_share_deny, &stp->st_deny_bmap); + __set_bit(open->op_share_access, &stp->st_access_bmap); + __set_bit(open->op_share_deny, &stp->st_deny_bmap); return nfs_ok; } -- cgit v1.2.3 From b42ab1bf9b2f4e70ed246f075c4bc4f52687e48e Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Mon, 5 May 2008 17:17:44 -0400 Subject: nfsd: add dprintk of compound return We already print each operation of the compound when debugging is turned on; printing the result could also help with remote debugging. Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4proc.c | 1 + 1 file changed, 1 insertion(+) (limited to 'fs') diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c index c309c881bd4e..313484380a9b 100644 --- a/fs/nfsd/nfs4proc.c +++ b/fs/nfsd/nfs4proc.c @@ -952,6 +952,7 @@ encode_op: out: nfsd4_release_compoundargs(args); cstate_free(cstate); + dprintk("nfsv4 compound returned %d\n", ntohl(status)); return status; } -- cgit v1.2.3 From 7de6dd7a9e600928c231e6001bb6350b19123398 Mon Sep 17 00:00:00 2001 From: Benny Halevy Date: Thu, 29 May 2008 12:56:08 +0300 Subject: nfsd: make nfs4xdr WRITEMEM safe against zero count WRITEMEM zeroes the last word in the destination buffer for padding purposes, but this must not be done if no bytes are to be copied, as it would result in zeroing of the word right before the array. The current implementation works since it's always called with non zero nbytes or it follows an encoding of the string (or opaque) length which, if equal to zero, can be overwritten with zero. Nevertheless, it seems safer to check for this case. Signed-off-by: Benny Halevy Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4xdr.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index c513bbdf2d36..9547ab636274 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -1201,7 +1201,7 @@ nfsd4_decode_compound(struct nfsd4_compoundargs *argp) *p++ = htonl((u32)((n) >> 32)); \ *p++ = htonl((u32)(n)); \ } while (0) -#define WRITEMEM(ptr,nbytes) do { \ +#define WRITEMEM(ptr,nbytes) do if (nbytes > 0) { \ *(p + XDR_QUADLEN(nbytes) -1) = 0; \ memcpy(p, ptr, nbytes); \ p += XDR_QUADLEN(nbytes); \ -- cgit v1.2.3 From a94c972a98a29dc2d10d007d628b60303982f8d9 Mon Sep 17 00:00:00 2001 From: Neil Brown Date: Tue, 10 Jun 2008 08:40:35 -0400 Subject: knfsd: Replace lock_kernel with a mutex for nfsd thread startup/shutdown locking. This removes the BKL from the RPC service creation codepath. The BKL really isn't adequate for this job since some of this info needs protection across sleeps. Also, add some comments to try and clarify how the locking should work and to make it clear that the BKL isn't necessary as long as there is adequate locking between tasks when touching the svc_serv fields. Signed-off-by: Neil Brown Signed-off-by: Jeff Layton Signed-off-by: J. Bruce Fields --- fs/nfsd/nfsctl.c | 37 +++++++++++++++++++++++-------------- fs/nfsd/nfssvc.c | 45 ++++++++++++++++++++++++++++++++------------- include/linux/nfsd/nfsd.h | 1 + net/sunrpc/svc.c | 15 +++++++++------ 4 files changed, 65 insertions(+), 33 deletions(-) (limited to 'fs') diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c index 5ac00c4fee91..049d2a9c7715 100644 --- a/fs/nfsd/nfsctl.c +++ b/fs/nfsd/nfsctl.c @@ -450,22 +450,26 @@ static ssize_t write_pool_threads(struct file *file, char *buf, size_t size) int i; int rv; int len; - int npools = nfsd_nrpools(); + int npools; int *nthreads; + mutex_lock(&nfsd_mutex); + npools = nfsd_nrpools(); if (npools == 0) { /* * NFS is shut down. The admin can start it by * writing to the threads file but NOT the pool_threads * file, sorry. Report zero threads. */ + mutex_unlock(&nfsd_mutex); strcpy(buf, "0\n"); return strlen(buf); } nthreads = kcalloc(npools, sizeof(int), GFP_KERNEL); + rv = -ENOMEM; if (nthreads == NULL) - return -ENOMEM; + goto out_free; if (size > 0) { for (i = 0; i < npools; i++) { @@ -496,10 +500,12 @@ static ssize_t write_pool_threads(struct file *file, char *buf, size_t size) mesg += len; } + mutex_unlock(&nfsd_mutex); return (mesg-buf); out_free: kfree(nthreads); + mutex_unlock(&nfsd_mutex); return rv; } @@ -566,14 +572,13 @@ static ssize_t write_versions(struct file *file, char *buf, size_t size) return len; } -static ssize_t write_ports(struct file *file, char *buf, size_t size) +static ssize_t __write_ports(struct file *file, char *buf, size_t size) { if (size == 0) { int len = 0; - lock_kernel(); + if (nfsd_serv) len = svc_xprt_names(nfsd_serv, buf, 0); - unlock_kernel(); return len; } /* Either a single 'fd' number is written, in which @@ -603,9 +608,7 @@ static ssize_t write_ports(struct file *file, char *buf, size_t size) /* Decrease the count, but don't shutdown the * the service */ - lock_kernel(); nfsd_serv->sv_nrthreads--; - unlock_kernel(); } return err < 0 ? err : 0; } @@ -614,10 +617,8 @@ static ssize_t write_ports(struct file *file, char *buf, size_t size) int len = 0; if (!toclose) return -ENOMEM; - lock_kernel(); if (nfsd_serv) len = svc_sock_names(buf, nfsd_serv, toclose); - unlock_kernel(); if (len >= 0) lockd_down(); kfree(toclose); @@ -655,7 +656,6 @@ static ssize_t write_ports(struct file *file, char *buf, size_t size) if (sscanf(&buf[1], "%15s %4d", transport, &port) == 2) { if (port == 0) return -EINVAL; - lock_kernel(); if (nfsd_serv) { xprt = svc_find_xprt(nfsd_serv, transport, AF_UNSPEC, port); @@ -666,13 +666,22 @@ static ssize_t write_ports(struct file *file, char *buf, size_t size) } else err = -ENOTCONN; } - unlock_kernel(); return err < 0 ? err : 0; } } return -EINVAL; } +static ssize_t write_ports(struct file *file, char *buf, size_t size) +{ + ssize_t rv; + mutex_lock(&nfsd_mutex); + rv = __write_ports(file, buf, size); + mutex_unlock(&nfsd_mutex); + return rv; +} + + int nfsd_max_blksize; static ssize_t write_maxblksize(struct file *file, char *buf, size_t size) @@ -691,13 +700,13 @@ static ssize_t write_maxblksize(struct file *file, char *buf, size_t size) if (bsize > NFSSVC_MAXBLKSIZE) bsize = NFSSVC_MAXBLKSIZE; bsize &= ~(1024-1); - lock_kernel(); + mutex_lock(&nfsd_mutex); if (nfsd_serv && nfsd_serv->sv_nrthreads) { - unlock_kernel(); + mutex_unlock(&nfsd_mutex); return -EBUSY; } nfsd_max_blksize = bsize; - unlock_kernel(); + mutex_unlock(&nfsd_mutex); } return sprintf(buf, "%d\n", nfsd_max_blksize); } diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c index 941041f4b136..512bd04c6dda 100644 --- a/fs/nfsd/nfssvc.c +++ b/fs/nfsd/nfssvc.c @@ -53,11 +53,27 @@ extern struct svc_program nfsd_program; static void nfsd(struct svc_rqst *rqstp); struct timeval nfssvc_boot; - struct svc_serv *nfsd_serv; static atomic_t nfsd_busy; static unsigned long nfsd_last_call; static DEFINE_SPINLOCK(nfsd_call_lock); +/* + * nfsd_mutex protects nfsd_serv -- both the pointer itself and the members + * of the svc_serv struct. In particular, ->sv_nrthreads but also to some + * extent ->sv_temp_socks and ->sv_permsocks. It also protects nfsdstats.th_cnt + * + * If (out side the lock) nfsd_serv is non-NULL, then it must point to a + * properly initialised 'struct svc_serv' with ->sv_nrthreads > 0. That number + * of nfsd threads must exist and each must listed in ->sp_all_threads in each + * entry of ->sv_pools[]. + * + * Transitions of the thread count between zero and non-zero are of particular + * interest since the svc_serv needs to be created and initialized at that + * point, or freed. + */ +DEFINE_MUTEX(nfsd_mutex); +struct svc_serv *nfsd_serv; + #if defined(CONFIG_NFSD_V2_ACL) || defined(CONFIG_NFSD_V3_ACL) static struct svc_stat nfsd_acl_svcstats; static struct svc_version * nfsd_acl_version[] = { @@ -190,13 +206,14 @@ void nfsd_reset_versions(void) } } + int nfsd_create_serv(void) { int err = 0; - lock_kernel(); + + WARN_ON(!mutex_is_locked(&nfsd_mutex)); if (nfsd_serv) { svc_get(nfsd_serv); - unlock_kernel(); return 0; } if (nfsd_max_blksize == 0) { @@ -223,7 +240,7 @@ int nfsd_create_serv(void) nfsd, SIG_NOCLEAN, THIS_MODULE); if (nfsd_serv == NULL) err = -ENOMEM; - unlock_kernel(); + do_gettimeofday(&nfssvc_boot); /* record boot time */ return err; } @@ -282,6 +299,8 @@ int nfsd_set_nrthreads(int n, int *nthreads) int tot = 0; int err = 0; + WARN_ON(!mutex_is_locked(&nfsd_mutex)); + if (nfsd_serv == NULL || n <= 0) return 0; @@ -316,7 +335,6 @@ int nfsd_set_nrthreads(int n, int *nthreads) nthreads[0] = 1; /* apply the new numbers */ - lock_kernel(); svc_get(nfsd_serv); for (i = 0; i < n; i++) { err = svc_set_num_threads(nfsd_serv, &nfsd_serv->sv_pools[i], @@ -325,7 +343,6 @@ int nfsd_set_nrthreads(int n, int *nthreads) break; } svc_destroy(nfsd_serv); - unlock_kernel(); return err; } @@ -334,8 +351,8 @@ int nfsd_svc(unsigned short port, int nrservs) { int error; - - lock_kernel(); + + mutex_lock(&nfsd_mutex); dprintk("nfsd: creating service\n"); error = -EINVAL; if (nrservs <= 0) @@ -363,7 +380,7 @@ nfsd_svc(unsigned short port, int nrservs) failure: svc_destroy(nfsd_serv); /* Release server */ out: - unlock_kernel(); + mutex_unlock(&nfsd_mutex); return error; } @@ -399,7 +416,7 @@ nfsd(struct svc_rqst *rqstp) sigset_t shutdown_mask, allowed_mask; /* Lock module and set up kernel thread */ - lock_kernel(); + mutex_lock(&nfsd_mutex); daemonize("nfsd"); /* After daemonize() this kernel thread shares current->fs @@ -417,11 +434,13 @@ nfsd(struct svc_rqst *rqstp) siginitsetinv(&shutdown_mask, SHUTDOWN_SIGS); siginitsetinv(&allowed_mask, ALLOWED_SIGS); + nfsdstats.th_cnt++; rqstp->rq_task = current; - unlock_kernel(); + mutex_unlock(&nfsd_mutex); + /* * We want less throttling in balance_dirty_pages() so that nfs to @@ -477,7 +496,7 @@ nfsd(struct svc_rqst *rqstp) /* Clear signals before calling svc_exit_thread() */ flush_signals(current); - lock_kernel(); + mutex_lock(&nfsd_mutex); nfsdstats.th_cnt --; @@ -486,7 +505,7 @@ out: svc_exit_thread(rqstp); /* Release module */ - unlock_kernel(); + mutex_unlock(&nfsd_mutex); module_put_and_exit(0); } diff --git a/include/linux/nfsd/nfsd.h b/include/linux/nfsd/nfsd.h index 41d30c9c9de6..88d85b964429 100644 --- a/include/linux/nfsd/nfsd.h +++ b/include/linux/nfsd/nfsd.h @@ -54,6 +54,7 @@ typedef int (*nfsd_dirop_t)(struct inode *, struct dentry *, int, int); extern struct svc_program nfsd_program; extern struct svc_version nfsd_version2, nfsd_version3, nfsd_version4; +extern struct mutex nfsd_mutex; extern struct svc_serv *nfsd_serv; extern struct seq_operations nfs_exports_op; diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c index 01c7e311b904..7bffaff2a3ab 100644 --- a/net/sunrpc/svc.c +++ b/net/sunrpc/svc.c @@ -461,7 +461,8 @@ svc_create_pooled(struct svc_program *prog, unsigned int bufsize, EXPORT_SYMBOL(svc_create_pooled); /* - * Destroy an RPC service. Should be called with the BKL held + * Destroy an RPC service. Should be called with appropriate locking to + * protect the sv_nrthreads, sv_permsocks and sv_tempsocks. */ void svc_destroy(struct svc_serv *serv) @@ -578,9 +579,10 @@ out_enomem: EXPORT_SYMBOL(svc_prepare_thread); /* - * Create a thread in the given pool. Caller must hold BKL. - * On a NUMA or SMP machine, with a multi-pool serv, the thread - * will be restricted to run on the cpus belonging to the pool. + * Create a thread in the given pool. Caller must hold BKL or another lock to + * serialize access to the svc_serv struct. On a NUMA or SMP machine, with a + * multi-pool serv, the thread will be restricted to run on the cpus belonging + * to the pool. */ static int __svc_create_thread(svc_thread_fn func, struct svc_serv *serv, @@ -674,7 +676,7 @@ found_pool: * of threads the given number. If `pool' is non-NULL, applies * only to threads in that pool, otherwise round-robins between * all pools. Must be called with a svc_get() reference and - * the BKL held. + * the BKL or another lock to protect access to svc_serv fields. * * Destroying threads relies on the service threads filling in * rqstp->rq_task, which only the nfs ones do. Assumes the serv @@ -722,7 +724,8 @@ svc_set_num_threads(struct svc_serv *serv, struct svc_pool *pool, int nrservs) EXPORT_SYMBOL(svc_set_num_threads); /* - * Called from a server thread as it's exiting. Caller must hold BKL. + * Called from a server thread as it's exiting. Caller must hold the BKL or + * the "service mutex", whichever is appropriate for the service. */ void svc_exit_thread(struct svc_rqst *rqstp) -- cgit v1.2.3 From 25d3ab8a08d7d004f670af39fd3a821630374493 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Tue, 10 Jun 2008 08:40:36 -0400 Subject: knfsd: clean up nfsd filesystem interfaces Several of the nfsd filesystem interfaces allow changes to parameters that don't have any effect on a running nfsd service. They are only ever checked when nfsd is started. This patch fixes it so that changes to those procfiles return -EBUSY if nfsd is already running to make it clear that changes on the fly don't work. The patch should also close some relatively harmless races between changing the info in those interfaces and starting nfsd, since these variables are being moved under the protection of the nfsd_mutex. Finally, the nfsv4recoverydir file always returns -EINVAL if read. This patch fixes it to return the recoverydir path as expected. Signed-off-by: Jeff Layton Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4state.c | 17 ++++++++++---- fs/nfsd/nfsctl.c | 66 ++++++++++++++++++++++++++++++++++++++++++----------- fs/nfsd/nfssvc.c | 8 +++++++ 3 files changed, 74 insertions(+), 17 deletions(-) (limited to 'fs') diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 5c97d47676ae..bf11d6879ab4 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -3249,12 +3249,14 @@ nfs4_state_shutdown(void) nfs4_unlock_state(); } +/* + * user_recovery_dirname is protected by the nfsd_mutex since it's only + * accessed when nfsd is starting. + */ static void nfs4_set_recdir(char *recdir) { - nfs4_lock_state(); strcpy(user_recovery_dirname, recdir); - nfs4_unlock_state(); } /* @@ -3278,6 +3280,12 @@ nfs4_reset_recoverydir(char *recdir) return status; } +char * +nfs4_recoverydir(void) +{ + return user_recovery_dirname; +} + /* * Called when leasetime is changed. * @@ -3286,11 +3294,12 @@ nfs4_reset_recoverydir(char *recdir) * we start to register any changes in lease time. If the administrator * really wants to change the lease time *now*, they can go ahead and bring * nfsd down and then back up again after changing the lease time. + * + * user_lease_time is protected by nfsd_mutex since it's only really accessed + * when nfsd is starting */ void nfs4_reset_lease(time_t leasetime) { - lock_kernel(); user_lease_time = leasetime; - unlock_kernel(); } diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c index 049d2a9c7715..2c2eb8796c10 100644 --- a/fs/nfsd/nfsctl.c +++ b/fs/nfsd/nfsctl.c @@ -509,7 +509,7 @@ out_free: return rv; } -static ssize_t write_versions(struct file *file, char *buf, size_t size) +static ssize_t __write_versions(struct file *file, char *buf, size_t size) { /* * Format: @@ -572,6 +572,16 @@ static ssize_t write_versions(struct file *file, char *buf, size_t size) return len; } +static ssize_t write_versions(struct file *file, char *buf, size_t size) +{ + ssize_t rv; + + mutex_lock(&nfsd_mutex); + rv = __write_versions(file, buf, size); + mutex_unlock(&nfsd_mutex); + return rv; +} + static ssize_t __write_ports(struct file *file, char *buf, size_t size) { if (size == 0) { @@ -675,6 +685,7 @@ static ssize_t __write_ports(struct file *file, char *buf, size_t size) static ssize_t write_ports(struct file *file, char *buf, size_t size) { ssize_t rv; + mutex_lock(&nfsd_mutex); rv = __write_ports(file, buf, size); mutex_unlock(&nfsd_mutex); @@ -714,16 +725,17 @@ static ssize_t write_maxblksize(struct file *file, char *buf, size_t size) #ifdef CONFIG_NFSD_V4 extern time_t nfs4_leasetime(void); -static ssize_t write_leasetime(struct file *file, char *buf, size_t size) +static ssize_t __write_leasetime(struct file *file, char *buf, size_t size) { /* if size > 10 seconds, call * nfs4_reset_lease() then write out the new lease (seconds) as reply */ char *mesg = buf; - int rv; + int rv, lease; if (size > 0) { - int lease; + if (nfsd_serv) + return -EBUSY; rv = get_int(&mesg, &lease); if (rv) return rv; @@ -735,24 +747,52 @@ static ssize_t write_leasetime(struct file *file, char *buf, size_t size) return strlen(buf); } -static ssize_t write_recoverydir(struct file *file, char *buf, size_t size) +static ssize_t write_leasetime(struct file *file, char *buf, size_t size) +{ + ssize_t rv; + + mutex_lock(&nfsd_mutex); + rv = __write_leasetime(file, buf, size); + mutex_unlock(&nfsd_mutex); + return rv; +} + +extern char *nfs4_recoverydir(void); + +static ssize_t __write_recoverydir(struct file *file, char *buf, size_t size) { char *mesg = buf; char *recdir; int len, status; - if (size == 0 || size > PATH_MAX || buf[size-1] != '\n') - return -EINVAL; - buf[size-1] = 0; + if (size > 0) { + if (nfsd_serv) + return -EBUSY; + if (size > PATH_MAX || buf[size-1] != '\n') + return -EINVAL; + buf[size-1] = 0; - recdir = mesg; - len = qword_get(&mesg, recdir, size); - if (len <= 0) - return -EINVAL; + recdir = mesg; + len = qword_get(&mesg, recdir, size); + if (len <= 0) + return -EINVAL; - status = nfs4_reset_recoverydir(recdir); + status = nfs4_reset_recoverydir(recdir); + } + sprintf(buf, "%s\n", nfs4_recoverydir()); return strlen(buf); } + +static ssize_t write_recoverydir(struct file *file, char *buf, size_t size) +{ + ssize_t rv; + + mutex_lock(&nfsd_mutex); + rv = __write_recoverydir(file, buf, size); + mutex_unlock(&nfsd_mutex); + return rv; +} + #endif /*----------------------------------------------------------------------------*/ diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c index 512bd04c6dda..929af233510e 100644 --- a/fs/nfsd/nfssvc.c +++ b/fs/nfsd/nfssvc.c @@ -70,6 +70,14 @@ static DEFINE_SPINLOCK(nfsd_call_lock); * Transitions of the thread count between zero and non-zero are of particular * interest since the svc_serv needs to be created and initialized at that * point, or freed. + * + * Finally, the nfsd_mutex also protects some of the global variables that are + * accessed when nfsd starts and that are settable via the write_* routines in + * nfsctl.c. In particular: + * + * user_recovery_dirname + * user_lease_time + * nfsd_versions */ DEFINE_MUTEX(nfsd_mutex); struct svc_serv *nfsd_serv; -- cgit v1.2.3 From 6a41183b0a6f28971c4a0bd8e54eba1a74091ce3 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Tue, 10 Jun 2008 08:40:37 -0400 Subject: knfsd: remove special handling for SIGHUP The special handling for SIGHUP in knfsd is a holdover from much earlier versions of Linux where reloading the export table was more expensive. That facility is not really needed anymore and to my knowledge, is seldom-used. Signed-off-by: Jeff Layton Signed-off-by: J. Bruce Fields --- fs/nfsd/nfssvc.c | 33 ++++++++------------------------- 1 file changed, 8 insertions(+), 25 deletions(-) (limited to 'fs') diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c index 929af233510e..6339cb70a08d 100644 --- a/fs/nfsd/nfssvc.c +++ b/fs/nfsd/nfssvc.c @@ -44,11 +44,6 @@ * when not handling a request. i.e. when waiting */ #define SHUTDOWN_SIGS (sigmask(SIGKILL) | sigmask(SIGHUP) | sigmask(SIGINT) | sigmask(SIGQUIT)) -/* if the last thread dies with SIGHUP, then the exports table is - * left unchanged ( like 2.4-{0-9} ). Any other signal will clear - * the exports table (like 2.2). - */ -#define SIG_NOCLEAN SIGHUP extern struct svc_program nfsd_program; static void nfsd(struct svc_rqst *rqstp); @@ -175,7 +170,6 @@ int nfsd_nrthreads(void) return nfsd_serv->sv_nrthreads; } -static int killsig; /* signal that was used to kill last nfsd */ static void nfsd_last_thread(struct svc_serv *serv) { /* When last nfsd thread exits we need to do some clean-up */ @@ -186,11 +180,9 @@ static void nfsd_last_thread(struct svc_serv *serv) nfsd_racache_shutdown(); nfs4_state_shutdown(); - printk(KERN_WARNING "nfsd: last server has exited\n"); - if (killsig != SIG_NOCLEAN) { - printk(KERN_WARNING "nfsd: unexporting all filesystems\n"); - nfsd_export_flush(); - } + printk(KERN_WARNING "nfsd: last server has exited, flushing export " + "cache\n"); + nfsd_export_flush(); } void nfsd_reset_versions(void) @@ -242,10 +234,9 @@ int nfsd_create_serv(void) } atomic_set(&nfsd_busy, 0); - nfsd_serv = svc_create_pooled(&nfsd_program, - nfsd_max_blksize, - nfsd_last_thread, - nfsd, SIG_NOCLEAN, THIS_MODULE); + nfsd_serv = svc_create_pooled(&nfsd_program, nfsd_max_blksize, + nfsd_last_thread, nfsd, SIGINT, + THIS_MODULE); if (nfsd_serv == NULL) err = -ENOMEM; @@ -490,17 +481,9 @@ nfsd(struct svc_rqst *rqstp) atomic_dec(&nfsd_busy); } - if (err != -EINTR) { + if (err != -EINTR) printk(KERN_WARNING "nfsd: terminating on error %d\n", -err); - } else { - unsigned int signo; - - for (signo = 1; signo <= _NSIG; signo++) - if (sigismember(¤t->pending.signal, signo) && - !sigismember(¤t->blocked, signo)) - break; - killsig = signo; - } + /* Clear signals before calling svc_exit_thread() */ flush_signals(current); -- cgit v1.2.3 From 89a8922f36389cd4a284e5d7727cd792ef4544e0 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Tue, 10 Jun 2008 08:40:38 -0400 Subject: knfsd: convert knfsd to kthread API This patch is rather large, but I couldn't figure out a way to break it up that would remain bisectable. It does several things: - change svc_thread_fn typedef to better match what kthread_create expects - change svc_pool_map_set_cpumask to be more kthread friendly. Make it take a task arg and and get rid of the "oldmask" - have svc_set_num_threads call kthread_create directly - eliminate __svc_create_thread Signed-off-by: Jeff Layton Signed-off-by: J. Bruce Fields --- fs/nfsd/nfssvc.c | 45 ++++++++++++-------- include/linux/sunrpc/svc.h | 2 +- net/sunrpc/svc.c | 100 +++++++++++++++------------------------------ 3 files changed, 64 insertions(+), 83 deletions(-) (limited to 'fs') diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c index 6339cb70a08d..9e2156813710 100644 --- a/fs/nfsd/nfssvc.c +++ b/fs/nfsd/nfssvc.c @@ -21,6 +21,7 @@ #include #include #include +#include #include #include @@ -46,7 +47,7 @@ #define SHUTDOWN_SIGS (sigmask(SIGKILL) | sigmask(SIGHUP) | sigmask(SIGINT) | sigmask(SIGQUIT)) extern struct svc_program nfsd_program; -static void nfsd(struct svc_rqst *rqstp); +static int nfsd(void *vrqstp); struct timeval nfssvc_boot; static atomic_t nfsd_busy; static unsigned long nfsd_last_call; @@ -407,18 +408,19 @@ update_thread_usage(int busy_threads) /* * This is the NFS server kernel thread */ -static void -nfsd(struct svc_rqst *rqstp) +static int +nfsd(void *vrqstp) { + struct svc_rqst *rqstp = (struct svc_rqst *) vrqstp; struct fs_struct *fsp; - int err; sigset_t shutdown_mask, allowed_mask; + int err, preverr = 0; + unsigned int signo; /* Lock module and set up kernel thread */ mutex_lock(&nfsd_mutex); - daemonize("nfsd"); - /* After daemonize() this kernel thread shares current->fs + /* At this point, the thread shares current->fs * with the init process. We need to create files with a * umask of 0 instead of init's umask. */ fsp = copy_fs_struct(current->fs); @@ -433,14 +435,18 @@ nfsd(struct svc_rqst *rqstp) siginitsetinv(&shutdown_mask, SHUTDOWN_SIGS); siginitsetinv(&allowed_mask, ALLOWED_SIGS); + /* + * thread is spawned with all signals set to SIG_IGN, re-enable + * the ones that matter + */ + for (signo = 1; signo <= _NSIG; signo++) { + if (!sigismember(&shutdown_mask, signo)) + allow_signal(signo); + } nfsdstats.th_cnt++; - - rqstp->rq_task = current; - mutex_unlock(&nfsd_mutex); - /* * We want less throttling in balance_dirty_pages() so that nfs to * localhost doesn't cause nfsd to lock up due to all the client's @@ -462,15 +468,25 @@ nfsd(struct svc_rqst *rqstp) */ while ((err = svc_recv(rqstp, 60*60*HZ)) == -EAGAIN) ; - if (err < 0) + if (err == -EINTR) break; + else if (err < 0) { + if (err != preverr) { + printk(KERN_WARNING "%s: unexpected error " + "from svc_recv (%d)\n", __func__, -err); + preverr = err; + } + schedule_timeout_uninterruptible(HZ); + continue; + } + update_thread_usage(atomic_read(&nfsd_busy)); atomic_inc(&nfsd_busy); /* Lock the export hash tables for reading. */ exp_readlock(); - /* Process request with signals blocked. */ + /* Process request with signals blocked. */ sigprocmask(SIG_SETMASK, &allowed_mask, NULL); svc_process(rqstp); @@ -481,14 +497,10 @@ nfsd(struct svc_rqst *rqstp) atomic_dec(&nfsd_busy); } - if (err != -EINTR) - printk(KERN_WARNING "nfsd: terminating on error %d\n", -err); - /* Clear signals before calling svc_exit_thread() */ flush_signals(current); mutex_lock(&nfsd_mutex); - nfsdstats.th_cnt --; out: @@ -498,6 +510,7 @@ out: /* Release module */ mutex_unlock(&nfsd_mutex); module_put_and_exit(0); + return 0; } static __be32 map_new_errors(u32 vers, __be32 nfserr) diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h index 4b54c5fdcfd9..011d6d8100d8 100644 --- a/include/linux/sunrpc/svc.h +++ b/include/linux/sunrpc/svc.h @@ -22,7 +22,7 @@ /* * This is the RPC server thread function prototype */ -typedef void (*svc_thread_fn)(struct svc_rqst *); +typedef int (*svc_thread_fn)(void *); /* * diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c index 7bffaff2a3ab..03a9f1a9e75c 100644 --- a/net/sunrpc/svc.c +++ b/net/sunrpc/svc.c @@ -18,6 +18,7 @@ #include #include #include +#include #include #include @@ -291,15 +292,14 @@ svc_pool_map_put(void) /* - * Set the current thread's cpus_allowed mask so that it + * Set the given thread's cpus_allowed mask so that it * will only run on cpus in the given pool. - * - * Returns 1 and fills in oldmask iff a cpumask was applied. */ -static inline int -svc_pool_map_set_cpumask(unsigned int pidx, cpumask_t *oldmask) +static inline void +svc_pool_map_set_cpumask(struct task_struct *task, unsigned int pidx) { struct svc_pool_map *m = &svc_pool_map; + unsigned int node = m->pool_to[pidx]; /* * The caller checks for sv_nrpools > 1, which @@ -307,26 +307,17 @@ svc_pool_map_set_cpumask(unsigned int pidx, cpumask_t *oldmask) */ BUG_ON(m->count == 0); - switch (m->mode) - { - default: - return 0; + switch (m->mode) { case SVC_POOL_PERCPU: { - unsigned int cpu = m->pool_to[pidx]; - - *oldmask = current->cpus_allowed; - set_cpus_allowed_ptr(current, &cpumask_of_cpu(cpu)); - return 1; + set_cpus_allowed_ptr(task, &cpumask_of_cpu(node)); + break; } case SVC_POOL_PERNODE: { - unsigned int node = m->pool_to[pidx]; node_to_cpumask_ptr(nodecpumask, node); - - *oldmask = current->cpus_allowed; - set_cpus_allowed_ptr(current, nodecpumask); - return 1; + set_cpus_allowed_ptr(task, nodecpumask); + break; } } } @@ -578,47 +569,6 @@ out_enomem: } EXPORT_SYMBOL(svc_prepare_thread); -/* - * Create a thread in the given pool. Caller must hold BKL or another lock to - * serialize access to the svc_serv struct. On a NUMA or SMP machine, with a - * multi-pool serv, the thread will be restricted to run on the cpus belonging - * to the pool. - */ -static int -__svc_create_thread(svc_thread_fn func, struct svc_serv *serv, - struct svc_pool *pool) -{ - struct svc_rqst *rqstp; - int error = -ENOMEM; - int have_oldmask = 0; - cpumask_t uninitialized_var(oldmask); - - rqstp = svc_prepare_thread(serv, pool); - if (IS_ERR(rqstp)) { - error = PTR_ERR(rqstp); - goto out; - } - - if (serv->sv_nrpools > 1) - have_oldmask = svc_pool_map_set_cpumask(pool->sp_id, &oldmask); - - error = kernel_thread((int (*)(void *)) func, rqstp, 0); - - if (have_oldmask) - set_cpus_allowed(current, oldmask); - - if (error < 0) - goto out_thread; - svc_sock_update_bufs(serv); - error = 0; -out: - return error; - -out_thread: - svc_exit_thread(rqstp); - goto out; -} - /* * Choose a pool in which to create a new thread, for svc_set_num_threads */ @@ -688,7 +638,9 @@ found_pool: int svc_set_num_threads(struct svc_serv *serv, struct svc_pool *pool, int nrservs) { - struct task_struct *victim; + struct svc_rqst *rqstp; + struct task_struct *task; + struct svc_pool *chosen_pool; int error = 0; unsigned int state = serv->sv_nrthreads-1; @@ -704,18 +656,34 @@ svc_set_num_threads(struct svc_serv *serv, struct svc_pool *pool, int nrservs) /* create new threads */ while (nrservs > 0) { nrservs--; + chosen_pool = choose_pool(serv, pool, &state); + + rqstp = svc_prepare_thread(serv, chosen_pool); + if (IS_ERR(rqstp)) { + error = PTR_ERR(rqstp); + break; + } + __module_get(serv->sv_module); - error = __svc_create_thread(serv->sv_function, serv, - choose_pool(serv, pool, &state)); - if (error < 0) { + task = kthread_create(serv->sv_function, rqstp, serv->sv_name); + if (IS_ERR(task)) { + error = PTR_ERR(task); module_put(serv->sv_module); + svc_exit_thread(rqstp); break; } + + rqstp->rq_task = task; + if (serv->sv_nrpools > 1) + svc_pool_map_set_cpumask(task, chosen_pool->sp_id); + + svc_sock_update_bufs(serv); + wake_up_process(task); } /* destroy old threads */ while (nrservs < 0 && - (victim = choose_victim(serv, pool, &state)) != NULL) { - send_sig(serv->sv_kill_signal, victim, 1); + (task = choose_victim(serv, pool, &state)) != NULL) { + send_sig(serv->sv_kill_signal, task, 1); nrservs++; } -- cgit v1.2.3 From c9daaadafa659a09589db563fc16f44032df55bc Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Tue, 10 Jun 2008 08:40:39 -0400 Subject: sunrpc: remove sv_kill_signal field from svc_serv struct Since we no longer make any distinction between shutdown signals with nfsd, then it becomes easier to just standardize on a particular signal to use to bring it down (SIGINT, in this case). Signed-off-by: Jeff Layton Signed-off-by: J. Bruce Fields --- fs/nfsd/nfssvc.c | 3 +-- include/linux/sunrpc/svc.h | 5 ++--- net/sunrpc/svc.c | 5 ++--- 3 files changed, 5 insertions(+), 8 deletions(-) (limited to 'fs') diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c index 9e2156813710..26c81149d49a 100644 --- a/fs/nfsd/nfssvc.c +++ b/fs/nfsd/nfssvc.c @@ -236,8 +236,7 @@ int nfsd_create_serv(void) atomic_set(&nfsd_busy, 0); nfsd_serv = svc_create_pooled(&nfsd_program, nfsd_max_blksize, - nfsd_last_thread, nfsd, SIGINT, - THIS_MODULE); + nfsd_last_thread, nfsd, THIS_MODULE); if (nfsd_serv == NULL) err = -ENOMEM; diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h index 011d6d8100d8..dc69068d94c7 100644 --- a/include/linux/sunrpc/svc.h +++ b/include/linux/sunrpc/svc.h @@ -80,7 +80,6 @@ struct svc_serv { struct module * sv_module; /* optional module to count when * adding threads */ svc_thread_fn sv_function; /* main function for threads */ - int sv_kill_signal; /* signal to kill threads */ }; /* @@ -388,8 +387,8 @@ struct svc_rqst *svc_prepare_thread(struct svc_serv *serv, struct svc_pool *pool); void svc_exit_thread(struct svc_rqst *); struct svc_serv * svc_create_pooled(struct svc_program *, unsigned int, - void (*shutdown)(struct svc_serv*), - svc_thread_fn, int sig, struct module *); + void (*shutdown)(struct svc_serv*), svc_thread_fn, + struct module *); int svc_set_num_threads(struct svc_serv *, struct svc_pool *, int); void svc_destroy(struct svc_serv *); int svc_process(struct svc_rqst *); diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c index 03a9f1a9e75c..5a32cb7c4bb4 100644 --- a/net/sunrpc/svc.c +++ b/net/sunrpc/svc.c @@ -434,7 +434,7 @@ EXPORT_SYMBOL(svc_create); struct svc_serv * svc_create_pooled(struct svc_program *prog, unsigned int bufsize, void (*shutdown)(struct svc_serv *serv), - svc_thread_fn func, int sig, struct module *mod) + svc_thread_fn func, struct module *mod) { struct svc_serv *serv; unsigned int npools = svc_pool_map_get(); @@ -443,7 +443,6 @@ svc_create_pooled(struct svc_program *prog, unsigned int bufsize, if (serv != NULL) { serv->sv_function = func; - serv->sv_kill_signal = sig; serv->sv_module = mod; } @@ -683,7 +682,7 @@ svc_set_num_threads(struct svc_serv *serv, struct svc_pool *pool, int nrservs) /* destroy old threads */ while (nrservs < 0 && (task = choose_victim(serv, pool, &state)) != NULL) { - send_sig(serv->sv_kill_signal, task, 1); + send_sig(SIGINT, task, 1); nrservs++; } -- cgit v1.2.3 From cd322f5f0df6a1f1313025468d60b3807928fce8 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Wed, 11 Jun 2008 10:03:12 -0400 Subject: lockd: close potential race with rapid lockd_up/lockd_down cycle If lockd_down is called very rapidly after lockd_up returns, then there is a slim chance that lockd() will never be called. kthread() will return before calling the function, so we'll end up never actually calling the cleanup functions for the thread. Signed-off-by: Jeff Layton Signed-off-by: J. Bruce Fields --- fs/lockd/svc.c | 33 +++++++++++++-------------------- 1 file changed, 13 insertions(+), 20 deletions(-) (limited to 'fs') diff --git a/fs/lockd/svc.c b/fs/lockd/svc.c index 2169af4d5455..5bd9bf0fa9df 100644 --- a/fs/lockd/svc.c +++ b/fs/lockd/svc.c @@ -50,7 +50,7 @@ EXPORT_SYMBOL(nlmsvc_ops); static DEFINE_MUTEX(nlmsvc_mutex); static unsigned int nlmsvc_users; static struct task_struct *nlmsvc_task; -static struct svc_serv *nlmsvc_serv; +static struct svc_rqst *nlmsvc_rqst; int nlmsvc_grace_period; unsigned long nlmsvc_timeout; @@ -194,20 +194,11 @@ lockd(void *vrqstp) svc_process(rqstp); } - flush_signals(current); if (nlmsvc_ops) nlmsvc_invalidate_all(); nlm_shutdown_hosts(); - unlock_kernel(); - - nlmsvc_task = NULL; - nlmsvc_serv = NULL; - - /* Exit the RPC thread */ - svc_exit_thread(rqstp); - return 0; } @@ -254,16 +245,15 @@ int lockd_up(int proto) /* Maybe add a 'family' option when IPv6 is supported ?? */ { struct svc_serv *serv; - struct svc_rqst *rqstp; int error = 0; mutex_lock(&nlmsvc_mutex); /* * Check whether we're already up and running. */ - if (nlmsvc_serv) { + if (nlmsvc_rqst) { if (proto) - error = make_socks(nlmsvc_serv, proto); + error = make_socks(nlmsvc_rqst->rq_server, proto); goto out; } @@ -288,9 +278,10 @@ lockd_up(int proto) /* Maybe add a 'family' option when IPv6 is supported ?? */ /* * Create the kernel thread and wait for it to start. */ - rqstp = svc_prepare_thread(serv, &serv->sv_pools[0]); - if (IS_ERR(rqstp)) { - error = PTR_ERR(rqstp); + nlmsvc_rqst = svc_prepare_thread(serv, &serv->sv_pools[0]); + if (IS_ERR(nlmsvc_rqst)) { + error = PTR_ERR(nlmsvc_rqst); + nlmsvc_rqst = NULL; printk(KERN_WARNING "lockd_up: svc_rqst allocation failed, error=%d\n", error); @@ -298,16 +289,15 @@ lockd_up(int proto) /* Maybe add a 'family' option when IPv6 is supported ?? */ } svc_sock_update_bufs(serv); - nlmsvc_serv = rqstp->rq_server; - nlmsvc_task = kthread_run(lockd, rqstp, serv->sv_name); + nlmsvc_task = kthread_run(lockd, nlmsvc_rqst, serv->sv_name); if (IS_ERR(nlmsvc_task)) { error = PTR_ERR(nlmsvc_task); + svc_exit_thread(nlmsvc_rqst); nlmsvc_task = NULL; - nlmsvc_serv = NULL; + nlmsvc_rqst = NULL; printk(KERN_WARNING "lockd_up: kthread_run failed, error=%d\n", error); - svc_exit_thread(rqstp); goto destroy_and_out; } @@ -346,6 +336,9 @@ lockd_down(void) BUG(); } kthread_stop(nlmsvc_task); + svc_exit_thread(nlmsvc_rqst); + nlmsvc_task = NULL; + nlmsvc_rqst = NULL; out: mutex_unlock(&nlmsvc_mutex); } -- cgit v1.2.3 From cb085a14de5f49ad9fc4342f0e017647b5c4b1f6 Mon Sep 17 00:00:00 2001 From: Neil Brown Date: Thu, 12 Jun 2008 13:38:42 +1000 Subject: nfsd: fix race in nfsd_nrthreads() We need the nfsd_mutex before accessing nfsd_serv->sv_nrthreads or we can't even guarantee nfsd_serv will still be there. Signed-off-by: Neil Brown Acked-by: Jeff Layton Signed-off-by: J. Bruce Fields --- fs/nfsd/nfssvc.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) (limited to 'fs') diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c index 26c81149d49a..96fdbcab8d95 100644 --- a/fs/nfsd/nfssvc.c +++ b/fs/nfsd/nfssvc.c @@ -165,10 +165,12 @@ int nfsd_vers(int vers, enum vers_op change) int nfsd_nrthreads(void) { - if (nfsd_serv == NULL) - return 0; - else - return nfsd_serv->sv_nrthreads; + int rv = 0; + mutex_lock(&nfsd_mutex); + if (nfsd_serv) + rv = nfsd_serv->sv_nrthreads; + mutex_unlock(&nfsd_mutex); + return rv; } static void nfsd_last_thread(struct svc_serv *serv) -- cgit v1.2.3 From f51f1a3b481677e2b749485b58df11bfd6ce9058 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Fri, 13 Jun 2008 21:42:15 +1000 Subject: knfsd: nfsd: Handle ERESTARTSYS from syscalls. OCFS2 can return -ERESTARTSYS from write requests (and possibly elsewhere) if there is a signal pending. If nfsd is shutdown (by sending a signal to each thread) while there is still an IO load from the client, each thread could handle one last request with a signal pending. This can result in -ERESTARTSYS which is not understood by nfserrno() and so is reflected back to the client as nfserr_io aka -EIO. This is wrong. Instead, interpret ERESTARTSYS to mean "don't send a reply". The client will resend and - if the server is restarted - the write will (hopefully) be successful and everyone will be happy. The symptom that I narrowed down to this was: copy a large file via NFS to an OCFS2 filesystem, and restart the nfs server during the copy. The 'cp' might get an -EIO, and the file will be corrupted - presumably holes in the middle were writes appeared to fail. Signed-off-by: Neil Brown Signed-off-by: J. Bruce Fields --- fs/nfsd/nfsproc.c | 1 + 1 file changed, 1 insertion(+) (limited to 'fs') diff --git a/fs/nfsd/nfsproc.c b/fs/nfsd/nfsproc.c index 6cfc96a12483..d305bb475c7e 100644 --- a/fs/nfsd/nfsproc.c +++ b/fs/nfsd/nfsproc.c @@ -614,6 +614,7 @@ nfserrno (int errno) #endif { nfserr_stale, -ESTALE }, { nfserr_jukebox, -ETIMEDOUT }, + { nfserr_dropit, -ERESTARTSYS }, { nfserr_dropit, -EAGAIN }, { nfserr_dropit, -ENOMEM }, { nfserr_badname, -ESRCH }, -- cgit v1.2.3