From 48e03bc515cff75718de5460458680a230ae997e Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 5 Jun 2009 12:35:15 -0400 Subject: nfsd: Use write gathering only with NFSv2 NFSv3 and above can use unstable writes whenever they are sending more than one write, rather than relying on the flaky write gathering heuristics. More often than not, write gathering is currently getting it wrong when the NFSv3 clients are sending a single write with FILE_SYNC for efficiency reasons. This patch turns off write gathering for NFSv3/v4, and ensures that it only applies to the one case that can actually benefit: namely NFSv2. Signed-off-by: Trond Myklebust Signed-off-by: J. Bruce Fields --- fs/nfsd/vfs.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) (limited to 'fs/nfsd/vfs.c') diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index b660435978d2..f30cc4eadb0a 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c @@ -975,6 +975,7 @@ nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file, __be32 err = 0; int host_err; int stable = *stablep; + int use_wgather; #ifdef MSNFS err = nfserr_perm; @@ -993,9 +994,10 @@ nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file, * - the sync export option has been set, or * - the client requested O_SYNC behavior (NFSv3 feature). * - The file system doesn't support fsync(). - * When gathered writes have been configured for this volume, + * When NFSv2 gathered writes have been configured for this volume, * flushing the data to disk is handled separately below. */ + use_wgather = (rqstp->rq_vers == 2) && EX_WGATHER(exp); if (!file->f_op->fsync) {/* COMMIT3 cannot work */ stable = 2; @@ -1004,7 +1006,7 @@ nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file, if (!EX_ISSYNC(exp)) stable = 0; - if (stable && !EX_WGATHER(exp)) { + if (stable && !use_wgather) { spin_lock(&file->f_lock); file->f_flags |= O_SYNC; spin_unlock(&file->f_lock); @@ -1040,7 +1042,7 @@ nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file, * nice and simple solution (IMHO), and it seems to * work:-) */ - if (EX_WGATHER(exp)) { + if (use_wgather) { if (atomic_read(&inode->i_writecount) > 1 || (last_ino == inode->i_ino && last_dev == inode->i_sb->s_dev)) { dprintk("nfsd: write defer %d\n", task_pid_nr(current)); -- cgit v1.2.3 From 9d2a3f31d6d7832cd441eeda08bc2266cdd5d972 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Mon, 15 Jun 2009 18:47:56 -0700 Subject: nfsd: track last inode only in use_wgather case Updating last_ino and last_dev probably isn't useful in the !use_wgather case. Also remove some pointless ifdef'd-out code. Signed-off-by: J. Bruce Fields --- fs/nfsd/vfs.c | 25 ++++++++++--------------- 1 file changed, 10 insertions(+), 15 deletions(-) (limited to 'fs/nfsd/vfs.c') diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index f30cc4eadb0a..ebf56c6040ca 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c @@ -1026,7 +1026,7 @@ nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file, if (host_err >= 0 && (inode->i_mode & (S_ISUID | S_ISGID))) kill_suid(dentry); - if (host_err >= 0 && stable) { + if (host_err >= 0 && stable && use_wgather) { static ino_t last_ino; static dev_t last_dev; @@ -1042,21 +1042,16 @@ nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file, * nice and simple solution (IMHO), and it seems to * work:-) */ - if (use_wgather) { - if (atomic_read(&inode->i_writecount) > 1 - || (last_ino == inode->i_ino && last_dev == inode->i_sb->s_dev)) { - dprintk("nfsd: write defer %d\n", task_pid_nr(current)); - msleep(10); - dprintk("nfsd: write resume %d\n", task_pid_nr(current)); - } + if (atomic_read(&inode->i_writecount) > 1 + || (last_ino == inode->i_ino && last_dev == inode->i_sb->s_dev)) { + dprintk("nfsd: write defer %d\n", task_pid_nr(current)); + msleep(10); + dprintk("nfsd: write resume %d\n", task_pid_nr(current)); + } - if (inode->i_state & I_DIRTY) { - dprintk("nfsd: write sync %d\n", task_pid_nr(current)); - host_err=nfsd_sync(file); - } -#if 0 - wake_up(&inode->i_wait); -#endif + if (inode->i_state & I_DIRTY) { + dprintk("nfsd: write sync %d\n", task_pid_nr(current)); + host_err=nfsd_sync(file); } last_ino = inode->i_ino; last_dev = inode->i_sb->s_dev; -- cgit v1.2.3 From d911df7b8d44de41661363a4e29ee710180ba025 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Mon, 15 Jun 2009 16:03:53 -0700 Subject: nfsd: Pull write-gathering code out of nfsd_vfs_write This is a relatively self-contained piece of code that handles a special case--move it to its own function. Signed-off-by: J. Bruce Fields --- fs/nfsd/vfs.c | 69 +++++++++++++++++++++++++++++++++-------------------------- 1 file changed, 39 insertions(+), 30 deletions(-) (limited to 'fs/nfsd/vfs.c') diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index ebf56c6040ca..6ad76a4cfc01 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c @@ -963,6 +963,43 @@ static void kill_suid(struct dentry *dentry) mutex_unlock(&dentry->d_inode->i_mutex); } +/* + * Gathered writes: If another process is currently writing to the file, + * there's a high chance this is another nfsd (triggered by a bulk write + * from a client's biod). Rather than syncing the file with each write + * request, we sleep for 10 msec. + * + * I don't know if this roughly approximates C. Juszak's idea of + * gathered writes, but it's a nice and simple solution (IMHO), and it + * seems to work:-) + * + * Note: we do this only in the NFSv2 case, since v3 and higher have a + * better tool (separate unstable writes and commits) for solving this + * problem. + */ +static int wait_for_concurrent_writes(struct file *file) +{ + struct inode *inode = file->f_path.dentry->d_inode; + static ino_t last_ino; + static dev_t last_dev; + int err = 0; + + if (atomic_read(&inode->i_writecount) > 1 + || (last_ino == inode->i_ino && last_dev == inode->i_sb->s_dev)) { + dprintk("nfsd: write defer %d\n", task_pid_nr(current)); + msleep(10); + dprintk("nfsd: write resume %d\n", task_pid_nr(current)); + } + + if (inode->i_state & I_DIRTY) { + dprintk("nfsd: write sync %d\n", task_pid_nr(current)); + err = nfsd_sync(file); + } + last_ino = inode->i_ino; + last_dev = inode->i_sb->s_dev; + return err; +} + static __be32 nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file, loff_t offset, struct kvec *vec, int vlen, @@ -1026,36 +1063,8 @@ nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file, if (host_err >= 0 && (inode->i_mode & (S_ISUID | S_ISGID))) kill_suid(dentry); - if (host_err >= 0 && stable && use_wgather) { - static ino_t last_ino; - static dev_t last_dev; - - /* - * Gathered writes: If another process is currently - * writing to the file, there's a high chance - * this is another nfsd (triggered by a bulk write - * from a client's biod). Rather than syncing the - * file with each write request, we sleep for 10 msec. - * - * I don't know if this roughly approximates - * C. Juszak's idea of gathered writes, but it's a - * nice and simple solution (IMHO), and it seems to - * work:-) - */ - if (atomic_read(&inode->i_writecount) > 1 - || (last_ino == inode->i_ino && last_dev == inode->i_sb->s_dev)) { - dprintk("nfsd: write defer %d\n", task_pid_nr(current)); - msleep(10); - dprintk("nfsd: write resume %d\n", task_pid_nr(current)); - } - - if (inode->i_state & I_DIRTY) { - dprintk("nfsd: write sync %d\n", task_pid_nr(current)); - host_err=nfsd_sync(file); - } - last_ino = inode->i_ino; - last_dev = inode->i_sb->s_dev; - } + if (host_err >= 0 && stable && use_wgather) + host_err = wait_for_concurrent_writes(file); dprintk("nfsd: write complete host_err=%d\n", host_err); if (host_err >= 0) -- cgit v1.2.3 From e4636d535e32768c8c500641ddb144f56e3dc5c0 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Mon, 15 Jun 2009 19:07:13 -0700 Subject: nfsd: minor nfsd_vfs_write cleanup There's no need to check host_err >= 0 every time here when we could check host_err < 0 once, following the usual kernel style. Signed-off-by: J. Bruce Fields --- fs/nfsd/vfs.c | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) (limited to 'fs/nfsd/vfs.c') diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index 6ad76a4cfc01..1cf70616a11e 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c @@ -1053,19 +1053,20 @@ nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file, oldfs = get_fs(); set_fs(KERNEL_DS); host_err = vfs_writev(file, (struct iovec __user *)vec, vlen, &offset); set_fs(oldfs); - if (host_err >= 0) { - *cnt = host_err; - nfsdstats.io_write += host_err; - fsnotify_modify(file->f_path.dentry); - } + if (host_err < 0) + goto out_nfserr; + *cnt = host_err; + nfsdstats.io_write += host_err; + fsnotify_modify(file->f_path.dentry); /* clear setuid/setgid flag after write */ - if (host_err >= 0 && (inode->i_mode & (S_ISUID | S_ISGID))) + if (inode->i_mode & (S_ISUID | S_ISGID)) kill_suid(dentry); - if (host_err >= 0 && stable && use_wgather) + if (stable && use_wgather) host_err = wait_for_concurrent_writes(file); +out_nfserr: dprintk("nfsd: write complete host_err=%d\n", host_err); if (host_err >= 0) err = 0; -- cgit v1.2.3 From 033a666ccb842ab4134fcd0c861d5ba9f5d6bf3a Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 2 Jul 2009 14:35:32 +0100 Subject: NFSD: Don't hold unrefcounted creds over call to nfsd_setuser() nfsd_open() gets an unrefcounted pointer to the current process's effective credentials at the top of the function, then calls nfsd_setuser() via fh_verify() - which may replace and destroy the current process's effective credentials - and then passes the unrefcounted pointer to dentry_open() - but the credentials may have been destroyed by this point. Instead, the value from current_cred() should be passed directly to dentry_open() as one of its arguments, rather than being cached in a variable. Possibly fh_verify() should return the creds to use. This is a regression introduced by 745ca2475a6ac596e3d8d37c2759c0fbe2586227 "CRED: Pass credentials through dentry_open()". Signed-off-by: David Howells Tested-and-Verified-By: Steve Dickson Cc: stable@kernel.org Signed-off-by: J. Bruce Fields --- fs/nfsd/vfs.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'fs/nfsd/vfs.c') diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index 4145083dcf88..23341c1063bc 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c @@ -678,7 +678,6 @@ __be32 nfsd_open(struct svc_rqst *rqstp, struct svc_fh *fhp, int type, int access, struct file **filp) { - const struct cred *cred = current_cred(); struct dentry *dentry; struct inode *inode; int flags = O_RDONLY|O_LARGEFILE; @@ -733,7 +732,7 @@ nfsd_open(struct svc_rqst *rqstp, struct svc_fh *fhp, int type, vfs_dq_init(inode); } *filp = dentry_open(dget(dentry), mntget(fhp->fh_export->ex_path.mnt), - flags, cred); + flags, current_cred()); if (IS_ERR(*filp)) host_err = PTR_ERR(*filp); else -- cgit v1.2.3